diff --git a/Documentation/ABI/testing/sysfs-class-watchdog b/Documentation/ABI/testing/sysfs-class-watchdog index 6317ade5ad19..675f9b537661 100644 --- a/Documentation/ABI/testing/sysfs-class-watchdog +++ b/Documentation/ABI/testing/sysfs-class-watchdog @@ -72,3 +72,37 @@ Description: It is a read/write file. When read, the currently assigned pretimeout governor is returned. When written, it sets the pretimeout governor. + +What: /sys/class/watchdog/watchdog1/access_cs0 +Date: August 2019 +Contact: Ivan Mikhaylov , + Alexander Amelkin +Description: + It is a read/write file. This attribute exists only if the + system has booted from the alternate flash chip due to + expiration of a watchdog timer of AST2400/AST2500 when + alternate boot function was enabled with 'aspeed,alt-boot' + devicetree option for that watchdog or with an appropriate + h/w strapping (for WDT2 only). + + At alternate flash the 'access_cs0' sysfs node provides: + ast2400: a way to get access to the primary SPI flash + chip at CS0 after booting from the alternate + chip at CS1. + ast2500: a way to restore the normal address mapping + from (CS0->CS1, CS1->CS0) to (CS0->CS0, + CS1->CS1). + + Clearing the boot code selection and timeout counter also + resets to the initial state the chip select line mapping. When + the SoC is in normal mapping state (i.e. booted from CS0), + clearing those bits does nothing for both versions of the SoC. + For alternate boot mode (booted from CS1 due to wdt2 + expiration) the behavior differs as described above. + + This option can be used with wdt2 (watchdog1) only. + + When read, the current status of the boot code selection is + shown. When written with any non-zero value, it clears + the boot code selection and the timeout counter, which results + in chipselect reset for AST2400/AST2500. diff --git a/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt b/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt index 991728cb46cb..c8501530173c 100644 --- a/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt +++ b/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt @@ -6,6 +6,8 @@ Required properties: - "mediatek,mt7622-pwm": found on mt7622 SoC. - "mediatek,mt7623-pwm": found on mt7623 SoC. - "mediatek,mt7628-pwm": found on mt7628 SoC. + - "mediatek,mt7629-pwm", "mediatek,mt7622-pwm": found on mt7629 SoC. + - "mediatek,mt8516-pwm": found on mt8516 SoC. - reg: physical base address and length of the controller's registers. - #pwm-cells: must be 2. See pwm.txt in this directory for a description of the cell format. diff --git a/Documentation/devicetree/bindings/pwm/pwm-sprd.txt b/Documentation/devicetree/bindings/pwm/pwm-sprd.txt new file mode 100644 index 000000000000..16fa5a096206 --- /dev/null +++ b/Documentation/devicetree/bindings/pwm/pwm-sprd.txt @@ -0,0 +1,40 @@ +Spreadtrum PWM controller + +Spreadtrum SoCs PWM controller provides 4 PWM channels. + +Required properties: +- compatible : Should be "sprd,ums512-pwm". +- reg: Physical base address and length of the controller's registers. +- clocks: The phandle and specifier referencing the controller's clocks. +- clock-names: Should contain following entries: + "pwmn": used to derive the functional clock for PWM channel n (n range: 0 ~ 3). + "enablen": for PWM channel n enable clock (n range: 0 ~ 3). +- #pwm-cells: Should be 2. See pwm.txt in this directory for a description of + the cells format. + +Optional properties: +- assigned-clocks: Reference to the PWM clock entries. +- assigned-clock-parents: The phandle of the parent clock of PWM clock. + +Example: + pwms: pwm@32260000 { + compatible = "sprd,ums512-pwm"; + reg = <0 0x32260000 0 0x10000>; + clock-names = "pwm0", "enable0", + "pwm1", "enable1", + "pwm2", "enable2", + "pwm3", "enable3"; + clocks = <&aon_clk CLK_PWM0>, <&aonapb_gate CLK_PWM0_EB>, + <&aon_clk CLK_PWM1>, <&aonapb_gate CLK_PWM1_EB>, + <&aon_clk CLK_PWM2>, <&aonapb_gate CLK_PWM2_EB>, + <&aon_clk CLK_PWM3>, <&aonapb_gate CLK_PWM3_EB>; + assigned-clocks = <&aon_clk CLK_PWM0>, + <&aon_clk CLK_PWM1>, + <&aon_clk CLK_PWM2>, + <&aon_clk CLK_PWM3>; + assigned-clock-parents = <&ext_26m>, + <&ext_26m>, + <&ext_26m>, + <&ext_26m>; + #pwm-cells = <2>; + }; diff --git a/Documentation/devicetree/bindings/thermal/qoriq-thermal.txt b/Documentation/devicetree/bindings/thermal/qoriq-thermal.txt index 04cbb90a5d3e..28f2cbaf1702 100644 --- a/Documentation/devicetree/bindings/thermal/qoriq-thermal.txt +++ b/Documentation/devicetree/bindings/thermal/qoriq-thermal.txt @@ -23,6 +23,7 @@ Required properties: Optional property: - little-endian : If present, the TMU registers are little endian. If absent, the default is big endian. +- clocks : the clock for clocking the TMU silicon. Example: diff --git a/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml b/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml new file mode 100644 index 000000000000..3a54f58683a0 --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/watchdog/allwinner,sun4i-a10-wdt.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Allwinner A10 Watchdog Device Tree Bindings + +allOf: + - $ref: "watchdog.yaml#" + +maintainers: + - Chen-Yu Tsai + - Maxime Ripard + +properties: + compatible: + oneOf: + - const: allwinner,sun4i-a10-wdt + - const: allwinner,sun6i-a31-wdt + - items: + - const: allwinner,sun50i-a64-wdt + - const: allwinner,sun6i-a31-wdt + - items: + - const: allwinner,sun50i-h6-wdt + - const: allwinner,sun6i-a31-wdt + - items: + - const: allwinner,suniv-f1c100s-wdt + - const: allwinner,sun4i-a10-wdt + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + interrupts: + maxItems: 1 + +required: + - compatible + - reg + - clocks + - interrupts + +unevaluatedProperties: false + +examples: + - | + wdt: watchdog@1c20c90 { + compatible = "allwinner,sun4i-a10-wdt"; + reg = <0x01c20c90 0x10>; + interrupts = <24>; + clocks = <&osc24M>; + timeout-sec = <10>; + }; + +... diff --git a/Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt b/Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt index c5077a1f5cb3..d78d4a8fb868 100644 --- a/Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt +++ b/Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt @@ -4,6 +4,7 @@ Required properties: - compatible: must be one of: - "aspeed,ast2400-wdt" - "aspeed,ast2500-wdt" + - "aspeed,ast2600-wdt" - reg: physical base address of the controller and length of memory mapped region diff --git a/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.txt b/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.txt new file mode 100644 index 000000000000..f902508d6cac --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.txt @@ -0,0 +1,22 @@ +* Freescale i.MX7ULP Watchdog Timer (WDT) Controller + +Required properties: +- compatible : Should be "fsl,imx7ulp-wdt" +- reg : Should contain WDT registers location and length +- interrupts : Should contain WDT interrupt +- clocks: Should contain a phandle pointing to the gated peripheral clock. + +Optional properties: +- timeout-sec : Contains the watchdog timeout in seconds + +Examples: + +wdog1: watchdog@403d0000 { + compatible = "fsl,imx7ulp-wdt"; + reg = <0x403d0000 0x10000>; + interrupts = ; + clocks = <&pcc2 IMX7ULP_CLK_WDG1>; + assigned-clocks = <&pcc2 IMX7ULP_CLK_WDG1>; + assigned-clocks-parents = <&scg1 IMX7ULP_CLK_FIRC_BUS_CLK>; + timeout-sec = <40>; +}; diff --git a/Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt b/Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt deleted file mode 100644 index e65198d82a2b..000000000000 --- a/Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt +++ /dev/null @@ -1,22 +0,0 @@ -Allwinner SoCs Watchdog timer - -Required properties: - -- compatible : should be one of - "allwinner,sun4i-a10-wdt" - "allwinner,sun6i-a31-wdt" - "allwinner,sun50i-a64-wdt","allwinner,sun6i-a31-wdt" - "allwinner,sun50i-h6-wdt","allwinner,sun6i-a31-wdt" - "allwinner,suniv-f1c100s-wdt", "allwinner,sun4i-a10-wdt" -- reg : Specifies base physical address and size of the registers. - -Optional properties: -- timeout-sec : Contains the watchdog timeout in seconds - -Example: - -wdt: watchdog@1c20c90 { - compatible = "allwinner,sun4i-a10-wdt"; - reg = <0x01c20c90 0x10>; - timeout-sec = <10>; -}; diff --git a/Documentation/devicetree/bindings/watchdog/watchdog.yaml b/Documentation/devicetree/bindings/watchdog/watchdog.yaml new file mode 100644 index 000000000000..187bf6cb62bf --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/watchdog.yaml @@ -0,0 +1,26 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/watchdog/watchdog.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Watchdog Generic Bindings + +maintainers: + - Guenter Roeck + - Wim Van Sebroeck + +description: | + This document describes generic bindings which can be used to + describe watchdog devices in a device tree. + +properties: + $nodename: + pattern: "^watchdog(@.*|-[0-9a-f])?$" + + timeout-sec: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + Contains the watchdog timeout in seconds. + +... diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index fd2bcf99cda0..2c3a9f761205 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -37,3 +37,13 @@ filesystem implementations. journalling fscrypt fsverity + +Filesystems +=========== + +Documentation for filesystem implementations. + +.. toctree:: + :maxdepth: 2 + + virtiofs diff --git a/Documentation/filesystems/virtiofs.rst b/Documentation/filesystems/virtiofs.rst new file mode 100644 index 000000000000..4f338e3cb3f7 --- /dev/null +++ b/Documentation/filesystems/virtiofs.rst @@ -0,0 +1,60 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=================================================== +virtiofs: virtio-fs host<->guest shared file system +=================================================== + +- Copyright (C) 2019 Red Hat, Inc. + +Introduction +============ +The virtiofs file system for Linux implements a driver for the paravirtualized +VIRTIO "virtio-fs" device for guest<->host file system sharing. It allows a +guest to mount a directory that has been exported on the host. + +Guests often require access to files residing on the host or remote systems. +Use cases include making files available to new guests during installation, +booting from a root file system located on the host, persistent storage for +stateless or ephemeral guests, and sharing a directory between guests. + +Although it is possible to use existing network file systems for some of these +tasks, they require configuration steps that are hard to automate and they +expose the storage network to the guest. The virtio-fs device was designed to +solve these problems by providing file system access without networking. + +Furthermore the virtio-fs device takes advantage of the co-location of the +guest and host to increase performance and provide semantics that are not +possible with network file systems. + +Usage +===== +Mount file system with tag ``myfs`` on ``/mnt``: + +.. code-block:: sh + + guest# mount -t virtiofs myfs /mnt + +Please see https://virtio-fs.gitlab.io/ for details on how to configure QEMU +and the virtiofsd daemon. + +Internals +========= +Since the virtio-fs device uses the FUSE protocol for file system requests, the +virtiofs file system for Linux is integrated closely with the FUSE file system +client. The guest acts as the FUSE client while the host acts as the FUSE +server. The /dev/fuse interface between the kernel and userspace is replaced +with the virtio-fs device interface. + +FUSE requests are placed into a virtqueue and processed by the host. The +response portion of the buffer is filled in by the host and the guest handles +the request completion. + +Mapping /dev/fuse to virtqueues requires solving differences in semantics +between /dev/fuse and virtqueues. Each time the /dev/fuse device is read, the +FUSE client may choose which request to transfer, making it possible to +prioritize certain requests over others. Virtqueues have queue semantics and +it is not possible to change the order of requests that have been enqueued. +This is especially important if the virtqueue becomes full since it is then +impossible to add high priority requests. In order to address this difference, +the virtio-fs device uses a "hiprio" virtqueue specifically for requests that +have priority over normal requests. diff --git a/Documentation/virt/kvm/api.txt b/Documentation/virt/kvm/api.txt index 136f1eef3712..4833904d32a5 100644 --- a/Documentation/virt/kvm/api.txt +++ b/Documentation/virt/kvm/api.txt @@ -5309,3 +5309,16 @@ Architectures: x86 This capability indicates that KVM supports paravirtualized Hyper-V IPI send hypercalls: HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx. +8.21 KVM_CAP_HYPERV_DIRECT_TLBFLUSH + +Architecture: x86 + +This capability indicates that KVM running on top of Hyper-V hypervisor +enables Direct TLB flush for its guests meaning that TLB flush +hypercalls are handled by Level 0 hypervisor (Hyper-V) bypassing KVM. +Due to the different ABI for hypercall parameters between Hyper-V and +KVM, enabling this capability effectively disables all hypercall +handling by KVM (as some KVM hypercall may be mistakenly treated as TLB +flush hypercalls by Hyper-V) so userspace should disable KVM identification +in CPUID and only exposes Hyper-V identification. In this case, guest +thinks it's running on Hyper-V and only use Hyper-V hypercalls. diff --git a/Documentation/watchdog/watchdog-parameters.rst b/Documentation/watchdog/watchdog-parameters.rst index a3985cc5aeda..223c99361a30 100644 --- a/Documentation/watchdog/watchdog-parameters.rst +++ b/Documentation/watchdog/watchdog-parameters.rst @@ -301,15 +301,6 @@ ixp4xx_wdt: ------------------------------------------------- -ks8695_wdt: - wdt_time: - Watchdog time in seconds. (default=5) - nowayout: - Watchdog cannot be stopped once started - (default=kernel config parameter) - -------------------------------------------------- - machzwd: nowayout: Watchdog cannot be stopped once started @@ -375,16 +366,6 @@ nic7018_wdt: ------------------------------------------------- -nuc900_wdt: - heartbeat: - Watchdog heartbeats in seconds. - (default = 15) - nowayout: - Watchdog cannot be stopped once started - (default=kernel config parameter) - -------------------------------------------------- - omap_wdt: timer_margin: initial watchdog timeout (in seconds) diff --git a/MAINTAINERS b/MAINTAINERS index a97f1be63b9d..857611c746c4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9060,6 +9060,7 @@ F: include/keys/trusted.h KEYS/KEYRINGS: M: David Howells +M: Jarkko Sakkinen L: keyrings@vger.kernel.org S: Maintained F: Documentation/security/keys/core.rst @@ -13245,9 +13246,11 @@ F: drivers/media/rc/pwm-ir-tx.c PWM SUBSYSTEM M: Thierry Reding +R: Uwe Kleine-König L: linux-pwm@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/thierry.reding/linux-pwm.git +Q: https://patchwork.ozlabs.org/project/linux-pwm/list/ F: Documentation/driver-api/pwm.rst F: Documentation/devicetree/bindings/pwm/ F: include/linux/pwm.h @@ -13256,6 +13259,7 @@ F: drivers/video/backlight/pwm_bl.c F: include/linux/pwm_backlight.h F: drivers/gpio/gpio-mvebu.c F: Documentation/devicetree/bindings/gpio/gpio-mvebu.txt +K: pwm_(config|apply_state|ops) PXA GPIO DRIVER M: Robert Jarzmik @@ -16071,6 +16075,7 @@ THERMAL M: Zhang Rui M: Eduardo Valentin R: Daniel Lezcano +R: Amit Kucheria L: linux-pm@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/rzhang/linux.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/evalenti/linux-soc-thermal.git @@ -17275,6 +17280,18 @@ S: Supported F: drivers/s390/virtio/ F: arch/s390/include/uapi/asm/virtio-ccw.h +VIRTIO FILE SYSTEM +M: Vivek Goyal +M: Stefan Hajnoczi +M: Miklos Szeredi +L: virtualization@lists.linux-foundation.org +L: linux-fsdevel@vger.kernel.org +W: https://virtio-fs.gitlab.io/ +S: Supported +F: fs/fuse/virtio_fs.c +F: include/uapi/linux/virtio_fs.h +F: Documentation/filesystems/virtiofs.rst + VIRTIO GPU DRIVER M: David Airlie M: Gerd Hoffmann diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c index 6bbd4ae2beb0..4cf35b09c0ec 100644 --- a/arch/nios2/kernel/setup.c +++ b/arch/nios2/kernel/setup.c @@ -123,7 +123,7 @@ asmlinkage void __init nios2_boot_init(unsigned r4, unsigned r5, unsigned r6, dtb_passed = r6; if (r7) - strncpy(cmdline_passed, (char *)r7, COMMAND_LINE_SIZE); + strlcpy(cmdline_passed, (char *)r7, COMMAND_LINE_SIZE); } #endif @@ -131,10 +131,10 @@ asmlinkage void __init nios2_boot_init(unsigned r4, unsigned r5, unsigned r6, #ifndef CONFIG_CMDLINE_FORCE if (cmdline_passed[0]) - strncpy(boot_command_line, cmdline_passed, COMMAND_LINE_SIZE); + strlcpy(boot_command_line, cmdline_passed, COMMAND_LINE_SIZE); #ifdef CONFIG_NIOS2_CMDLINE_IGNORE_DTB else - strncpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); + strlcpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); #endif #endif diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi index 42b5ec223100..afa43c7ea369 100644 --- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi +++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi @@ -13,6 +13,7 @@ aliases { serial0 = &uart0; serial1 = &uart1; + ethernet0 = ð0; }; chosen { @@ -60,7 +61,6 @@ }; }; cpu2: cpu@2 { - clock-frequency = <0>; compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; d-cache-block-size = <64>; d-cache-sets = <64>; @@ -84,7 +84,6 @@ }; }; cpu3: cpu@3 { - clock-frequency = <0>; compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; d-cache-block-size = <64>; d-cache-sets = <64>; @@ -108,7 +107,6 @@ }; }; cpu4: cpu@4 { - clock-frequency = <0>; compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; d-cache-block-size = <64>; d-cache-sets = <64>; @@ -230,6 +228,24 @@ #size-cells = <0>; status = "disabled"; }; + pwm0: pwm@10020000 { + compatible = "sifive,fu540-c000-pwm", "sifive,pwm0"; + reg = <0x0 0x10020000 0x0 0x1000>; + interrupt-parent = <&plic0>; + interrupts = <42 43 44 45>; + clocks = <&prci PRCI_CLK_TLCLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + pwm1: pwm@10021000 { + compatible = "sifive,fu540-c000-pwm", "sifive,pwm0"; + reg = <0x0 0x10021000 0x0 0x1000>; + interrupt-parent = <&plic0>; + interrupts = <46 47 48 49>; + clocks = <&prci PRCI_CLK_TLCLK>; + #pwm-cells = <3>; + status = "disabled"; + }; }; }; diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index 93d68cbd64fe..104d334511cd 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -85,3 +85,11 @@ reg = <0>; }; }; + +&pwm0 { + status = "okay"; +}; + +&pwm1 { + status = "okay"; +}; diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 3efff552a261..420a0dbef386 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -29,6 +29,8 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y CONFIG_NETLINK_DIAG=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y CONFIG_PCI=y CONFIG_PCIEPORTBUS=y CONFIG_PCI_HOST_GENERIC=y @@ -39,6 +41,7 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_VIRTIO_BLK=y CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y +CONFIG_SCSI_VIRTIO=y CONFIG_ATA=y CONFIG_SATA_AHCI=y CONFIG_SATA_AHCI_PLATFORM=y @@ -54,6 +57,7 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_EARLYCON_RISCV_SBI=y CONFIG_HVC_RISCV_SBI=y +CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y CONFIG_SPI=y @@ -61,6 +65,7 @@ CONFIG_SPI_SIFIVE=y # CONFIG_PTP_1588_CLOCK is not set CONFIG_DRM=y CONFIG_DRM_RADEON=y +CONFIG_DRM_VIRTIO_GPU=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_USB=y CONFIG_USB_XHCI_HCD=y @@ -73,7 +78,12 @@ CONFIG_USB_STORAGE=y CONFIG_USB_UAS=y CONFIG_MMC=y CONFIG_MMC_SPI=y +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_MMIO=y +CONFIG_RPMSG_CHAR=y +CONFIG_RPMSG_VIRTIO=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_AUTOFS4_FS=y @@ -86,6 +96,7 @@ CONFIG_NFS_V4=y CONFIG_NFS_V4_1=y CONFIG_NFS_V4_2=y CONFIG_ROOT_NFS=y +CONFIG_9P_FS=y CONFIG_CRYPTO_USER_API_HASH=y CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_PRINTK_TIME=y diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig index 7da93e494445..87ee6e62b64b 100644 --- a/arch/riscv/configs/rv32_defconfig +++ b/arch/riscv/configs/rv32_defconfig @@ -29,6 +29,8 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y CONFIG_NETLINK_DIAG=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y CONFIG_PCI=y CONFIG_PCIEPORTBUS=y CONFIG_PCI_HOST_GENERIC=y @@ -39,6 +41,7 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_VIRTIO_BLK=y CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y +CONFIG_SCSI_VIRTIO=y CONFIG_ATA=y CONFIG_SATA_AHCI=y CONFIG_SATA_AHCI_PLATFORM=y @@ -54,11 +57,13 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_EARLYCON_RISCV_SBI=y CONFIG_HVC_RISCV_SBI=y +CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y # CONFIG_PTP_1588_CLOCK is not set CONFIG_DRM=y CONFIG_DRM_RADEON=y +CONFIG_DRM_VIRTIO_GPU=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_USB=y CONFIG_USB_XHCI_HCD=y @@ -69,7 +74,12 @@ CONFIG_USB_OHCI_HCD=y CONFIG_USB_OHCI_HCD_PLATFORM=y CONFIG_USB_STORAGE=y CONFIG_USB_UAS=y +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_MMIO=y +CONFIG_RPMSG_CHAR=y +CONFIG_RPMSG_VIRTIO=y CONFIG_SIFIVE_PLIC=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y @@ -83,6 +93,7 @@ CONFIG_NFS_V4=y CONFIG_NFS_V4_1=y CONFIG_NFS_V4_2=y CONFIG_ROOT_NFS=y +CONFIG_9P_FS=y CONFIG_CRYPTO_USER_API_HASH=y CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_PRINTK_TIME=y diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index c60123f018f5..7255f2d8395b 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -83,6 +83,18 @@ extern pgd_t swapper_pg_dir[]; #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC +#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) +#define VMALLOC_END (PAGE_OFFSET - 1) +#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) + +#define FIXADDR_TOP VMALLOC_START +#ifdef CONFIG_64BIT +#define FIXADDR_SIZE PMD_SIZE +#else +#define FIXADDR_SIZE PGDIR_SIZE +#endif +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) + /* * Roughly size the vmemmap space to be large enough to fit enough * struct pages to map half the virtual address space. Then @@ -424,18 +436,6 @@ extern void *dtb_early_va; extern void setup_bootmem(void); extern void paging_init(void); -#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) -#define VMALLOC_END (PAGE_OFFSET - 1) -#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) - -#define FIXADDR_TOP VMALLOC_START -#ifdef CONFIG_64BIT -#define FIXADDR_SIZE PMD_SIZE -#else -#define FIXADDR_SIZE PGDIR_SIZE -#endif -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) - /* * Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32. * Note that PGDIR_SIZE must evenly divide TASK_SIZE. diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 74ccfd464071..da7aa88113c2 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -166,9 +166,13 @@ ENTRY(handle_exception) move a0, sp /* pt_regs */ tail do_IRQ 1: - /* Exceptions run with interrupts enabled */ + /* Exceptions run with interrupts enabled or disabled + depending on the state of sstatus.SR_SPIE */ + andi t0, s1, SR_SPIE + beqz t0, 1f csrs CSR_SSTATUS, SR_SIE +1: /* Handle syscalls */ li t0, EXC_SYSCALL beq s4, t0, handle_syscall diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 15a9189f91ad..72f89b7590dd 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -63,6 +63,11 @@ _start_kernel: li t0, SR_FS csrc CSR_SSTATUS, t0 +#ifdef CONFIG_SMP + li t0, CONFIG_NR_CPUS + bgeu a0, t0, .Lsecondary_park +#endif + /* Pick one hart to run the main boot sequence */ la a3, hart_lottery li a2, 1 @@ -154,9 +159,6 @@ relocate: .Lsecondary_start: #ifdef CONFIG_SMP - li a1, CONFIG_NR_CPUS - bgeu a0, a1, .Lsecondary_park - /* Set trap vector to spin forever to help debug */ la a3, .Lsecondary_park csrw CSR_STVEC, a3 diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 3836760d7aaf..b18cd6c8e8fb 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -206,3 +206,4 @@ void smp_send_reschedule(int cpu) { send_ipi_single(cpu, IPI_RESCHEDULE); } +EXPORT_SYMBOL_GPL(smp_send_reschedule); diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c index 541a2b885814..9dd1f2e64db1 100644 --- a/arch/riscv/kernel/time.c +++ b/arch/riscv/kernel/time.c @@ -9,6 +9,7 @@ #include unsigned long riscv_timebase; +EXPORT_SYMBOL_GPL(riscv_timebase); void __init time_init(void) { diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 7a2705694f5b..7741e211f7f5 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -180,7 +180,15 @@ /* Recommend using enlightened VMCS */ #define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14) +/* + * Virtual processor will never share a physical core with another virtual + * processor, except for virtual processors that are reported as sibling SMT + * threads. + */ +#define HV_X64_NO_NONARCH_CORESHARING BIT(18) + /* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */ +#define HV_X64_NESTED_DIRECT_FLUSH BIT(17) #define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18) #define HV_X64_NESTED_MSR_BITMAP BIT(19) @@ -524,14 +532,24 @@ struct hv_timer_message_payload { __u64 delivery_time; /* When the message was delivered */ } __packed; +struct hv_nested_enlightenments_control { + struct { + __u32 directhypercall:1; + __u32 reserved:31; + } features; + struct { + __u32 reserved; + } hypercallControls; +} __packed; + /* Define virtual processor assist page structure. */ struct hv_vp_assist_page { __u32 apic_assist; - __u32 reserved; - __u64 vtl_control[2]; - __u64 nested_enlightenments_control[2]; - __u32 enlighten_vmentry; - __u32 padding; + __u32 reserved1; + __u64 vtl_control[3]; + struct hv_nested_enlightenments_control nested_control; + __u8 enlighten_vmentry; + __u8 reserved2[7]; __u64 current_nested_vmcs; } __packed; @@ -882,4 +900,7 @@ struct hv_tlb_flush_ex { u64 gva_list[]; } __packed; +struct hv_partition_assist_pg { + u32 tlb_lock_count; +}; #endif diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a3a3ec73fa2f..23edf56cf577 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -320,6 +320,7 @@ struct kvm_mmu_page { struct list_head link; struct hlist_node hash_link; bool unsync; + u8 mmu_valid_gen; bool mmio_cached; /* @@ -335,7 +336,6 @@ struct kvm_mmu_page { int root_count; /* Currently serving as active root */ unsigned int unsync_children; struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ - unsigned long mmu_valid_gen; DECLARE_BITMAP(unsync_child_bitmap, 512); #ifdef CONFIG_X86_32 @@ -844,6 +844,8 @@ struct kvm_hv { /* How many vCPUs have VP index != vCPU index */ atomic_t num_mismatched_vp_indexes; + + struct hv_partition_assist_pg *hv_pa_pg; }; enum kvm_irqchip_mode { @@ -857,12 +859,13 @@ struct kvm_arch { unsigned long n_requested_mmu_pages; unsigned long n_max_mmu_pages; unsigned int indirect_shadow_pages; - unsigned long mmu_valid_gen; + u8 mmu_valid_gen; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; /* * Hash table of struct kvm_mmu_page. */ struct list_head active_mmu_pages; + struct list_head zapped_obsolete_pages; struct kvm_page_track_notifier_node mmu_sp_tracker; struct kvm_page_track_notifier_head track_notifier_head; @@ -1213,6 +1216,7 @@ struct kvm_x86_ops { bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu); bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); + int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); }; struct kvm_arch_async_pf { @@ -1312,18 +1316,42 @@ extern u64 kvm_default_tsc_scaling_ratio; extern u64 kvm_mce_cap_supported; -enum emulation_result { - EMULATE_DONE, /* no further processing */ - EMULATE_USER_EXIT, /* kvm_run ready for userspace exit */ - EMULATE_FAIL, /* can't emulate this instruction */ -}; - +/* + * EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing + * userspace I/O) to indicate that the emulation context + * should be resued as is, i.e. skip initialization of + * emulation context, instruction fetch and decode. + * + * EMULTYPE_TRAP_UD - Set when emulating an intercepted #UD from hardware. + * Indicates that only select instructions (tagged with + * EmulateOnUD) should be emulated (to minimize the emulator + * attack surface). See also EMULTYPE_TRAP_UD_FORCED. + * + * EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to + * decode the instruction length. For use *only* by + * kvm_x86_ops->skip_emulated_instruction() implementations. + * + * EMULTYPE_ALLOW_RETRY - Set when the emulator should resume the guest to + * retry native execution under certain conditions. + * + * EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was + * triggered by KVM's magic "force emulation" prefix, + * which is opt in via module param (off by default). + * Bypasses EmulateOnUD restriction despite emulating + * due to an intercepted #UD (see EMULTYPE_TRAP_UD). + * Used to test the full emulator from userspace. + * + * EMULTYPE_VMWARE_GP - Set when emulating an intercepted #GP for VMware + * backdoor emulation, which is opt in via module param. + * VMware backoor emulation handles select instructions + * and reinjects the #GP for all other cases. + */ #define EMULTYPE_NO_DECODE (1 << 0) #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) #define EMULTYPE_ALLOW_RETRY (1 << 3) -#define EMULTYPE_NO_UD_ON_FAIL (1 << 4) -#define EMULTYPE_VMWARE (1 << 5) +#define EMULTYPE_TRAP_UD_FORCED (1 << 4) +#define EMULTYPE_VMWARE_GP (1 << 5) int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type); int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, void *insn, int insn_len); @@ -1506,7 +1534,7 @@ enum { #define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0) #define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm) -asmlinkage void __noreturn kvm_spurious_fault(void); +asmlinkage void kvm_spurious_fault(void); /* * Hardware virtualization extension instructions may fault if a @@ -1514,24 +1542,14 @@ asmlinkage void __noreturn kvm_spurious_fault(void); * Usually after catching the fault we just panic; during reboot * instead the instruction is ignored. */ -#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \ +#define __kvm_handle_fault_on_reboot(insn) \ "666: \n\t" \ insn "\n\t" \ "jmp 668f \n\t" \ "667: \n\t" \ "call kvm_spurious_fault \n\t" \ "668: \n\t" \ - ".pushsection .fixup, \"ax\" \n\t" \ - "700: \n\t" \ - cleanup_insn "\n\t" \ - "cmpb $0, kvm_rebooting\n\t" \ - "je 667b \n\t" \ - "jmp 668b \n\t" \ - ".popsection \n\t" \ - _ASM_EXTABLE(666b, 700b) - -#define __kvm_handle_fault_on_reboot(insn) \ - ____kvm_handle_fault_on_reboot(insn, "") + _ASM_EXTABLE(666b, 667b) #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index dec9c1e84c78..6ece8561ba66 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -52,6 +52,7 @@ enum { INTERCEPT_MWAIT, INTERCEPT_MWAIT_COND, INTERCEPT_XSETBV, + INTERCEPT_RDPRU, }; diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index b15e6465870f..1835767aa335 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -69,6 +69,7 @@ #define SECONDARY_EXEC_PT_USE_GPA 0x01000000 #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC 0x00400000 #define SECONDARY_EXEC_TSC_SCALING 0x02000000 +#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE 0x04000000 #define PIN_BASED_EXT_INTR_MASK 0x00000001 #define PIN_BASED_NMI_EXITING 0x00000008 @@ -110,6 +111,7 @@ #define VMX_MISC_SAVE_EFER_LMA 0x00000020 #define VMX_MISC_ACTIVITY_HLT 0x00000040 #define VMX_MISC_ZERO_LEN_INS 0x40000000 +#define VMX_MISC_MSR_LIST_MULTIPLIER 512 /* VMFUNC functions */ #define VMX_VMFUNC_EPTP_SWITCHING 0x00000001 diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index a9731f8a480f..2e8a30f06c74 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -75,6 +75,7 @@ #define SVM_EXIT_MWAIT 0x08b #define SVM_EXIT_MWAIT_COND 0x08c #define SVM_EXIT_XSETBV 0x08d +#define SVM_EXIT_RDPRU 0x08e #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index f01950aa7fae..3eb8411ab60e 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -86,6 +86,8 @@ #define EXIT_REASON_PML_FULL 62 #define EXIT_REASON_XSAVES 63 #define EXIT_REASON_XRSTORS 64 +#define EXIT_REASON_UMWAIT 67 +#define EXIT_REASON_TPAUSE 68 #define VMX_EXIT_REASONS \ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ @@ -144,7 +146,9 @@ { EXIT_REASON_RDSEED, "RDSEED" }, \ { EXIT_REASON_PML_FULL, "PML_FULL" }, \ { EXIT_REASON_XSAVES, "XSAVES" }, \ - { EXIT_REASON_XRSTORS, "XRSTORS" } + { EXIT_REASON_XRSTORS, "XRSTORS" }, \ + { EXIT_REASON_UMWAIT, "UMWAIT" }, \ + { EXIT_REASON_TPAUSE, "TPAUSE" } #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2 diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c index 32b4dc9030aa..c222f283b456 100644 --- a/arch/x86/kernel/cpu/umwait.c +++ b/arch/x86/kernel/cpu/umwait.c @@ -17,6 +17,12 @@ */ static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE); +u32 get_umwait_control_msr(void) +{ + return umwait_control_cached; +} +EXPORT_SYMBOL_GPL(get_umwait_control_msr); + /* * Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by * hardware or BIOS before kernel boot. diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index dd5985eb61b4..63316036f85a 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -304,7 +304,13 @@ static void do_host_cpuid(struct kvm_cpuid_entry2 *entry, u32 function, case 7: case 0xb: case 0xd: + case 0xf: + case 0x10: + case 0x12: case 0x14: + case 0x17: + case 0x18: + case 0x1f: case 0x8000001d: entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; break; @@ -360,7 +366,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | - F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B); + F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/; /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index fff790a3f4ee..23ff65504d7e 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -23,6 +23,7 @@ #include "ioapic.h" #include "hyperv.h" +#include #include #include #include @@ -645,7 +646,9 @@ static int stimer_notify_direct(struct kvm_vcpu_hv_stimer *stimer) .vector = stimer->config.apic_vector }; - return !kvm_apic_set_irq(vcpu, &irq, NULL); + if (lapic_in_kernel(vcpu)) + return !kvm_apic_set_irq(vcpu, &irq, NULL); + return 0; } static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer) @@ -1852,7 +1855,13 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE; ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; - ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE; + + /* + * Direct Synthetic timers only make sense with in-kernel + * LAPIC + */ + if (lapic_in_kernel(vcpu)) + ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE; break; @@ -1864,7 +1873,8 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED; if (evmcs_ver) ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; - + if (!cpu_smt_possible()) + ent->eax |= HV_X64_NO_NONARCH_CORESHARING; /* * Default number of spinlock retry attempts, matches * HyperV 2016. diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 8675458c2205..3a3a6854dcca 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -65,7 +65,9 @@ #define APIC_BROADCAST 0xFF #define X2APIC_BROADCAST 0xFFFFFFFFul -#define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100 +static bool lapic_timer_advance_dynamic __read_mostly; +#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 +#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 5000 #define LAPIC_TIMER_ADVANCE_ADJUST_INIT 1000 /* step-by-step approximation to mitigate fluctuation */ #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8 @@ -1485,26 +1487,25 @@ static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu, u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns; u64 ns; + /* Do not adjust for tiny fluctuations or large random spikes. */ + if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX || + abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN) + return; + /* too early */ if (advance_expire_delta < 0) { ns = -advance_expire_delta * 1000000ULL; do_div(ns, vcpu->arch.virtual_tsc_khz); - timer_advance_ns -= min((u32)ns, - timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); + timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP; } else { /* too late */ ns = advance_expire_delta * 1000000ULL; do_div(ns, vcpu->arch.virtual_tsc_khz); - timer_advance_ns += min((u32)ns, - timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); + timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP; } - if (abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_DONE) - apic->lapic_timer.timer_advance_adjust_done = true; - if (unlikely(timer_advance_ns > 5000)) { + if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_ADJUST_MAX)) timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT; - apic->lapic_timer.timer_advance_adjust_done = false; - } apic->lapic_timer.timer_advance_ns = timer_advance_ns; } @@ -1524,7 +1525,7 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) if (guest_tsc < tsc_deadline) __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc); - if (unlikely(!apic->lapic_timer.timer_advance_adjust_done)) + if (lapic_timer_advance_dynamic) adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta); } @@ -2302,13 +2303,12 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) apic->lapic_timer.timer.function = apic_timer_fn; if (timer_advance_ns == -1) { apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT; - apic->lapic_timer.timer_advance_adjust_done = false; + lapic_timer_advance_dynamic = true; } else { apic->lapic_timer.timer_advance_ns = timer_advance_ns; - apic->lapic_timer.timer_advance_adjust_done = true; + lapic_timer_advance_dynamic = false; } - /* * APIC is created enabled. This will prevent kvm_lapic_set_base from * thinking that APIC state has changed. diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 50053d2b8b7b..2aad7e226fc0 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -35,7 +35,6 @@ struct kvm_timer { s64 advance_expire_delta; atomic_t pending; /* accumulated triggered timers */ bool hv_timer_in_use; - bool timer_advance_adjust_done; }; struct kvm_lapic { diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a10af9c87f8a..5269aa057dfa 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -403,8 +403,6 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, mask |= (gpa & shadow_nonpresent_or_rsvd_mask) << shadow_nonpresent_or_rsvd_mask_len; - page_header(__pa(sptep))->mmio_cached = true; - trace_mark_mmio_spte(sptep, gfn, access, gen); mmu_spte_set(sptep, mask); } @@ -2103,6 +2101,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct * depends on valid pages being added to the head of the list. See * comments in kvm_zap_obsolete_pages(). */ + sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen; list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); kvm_mod_used_mmu_pages(vcpu->kvm, +1); return sp; @@ -2252,7 +2251,7 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, #define for_each_valid_sp(_kvm, _sp, _gfn) \ hlist_for_each_entry(_sp, \ &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ - if (is_obsolete_sp((_kvm), (_sp)) || (_sp)->role.invalid) { \ + if (is_obsolete_sp((_kvm), (_sp))) { \ } else #define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \ @@ -2311,7 +2310,8 @@ static void mmu_audit_disable(void) { } static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp) { - return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen); + return sp->role.invalid || + unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen); } static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, @@ -2538,7 +2538,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, if (level > PT_PAGE_TABLE_LEVEL && need_sync) flush |= kvm_sync_pages(vcpu, gfn, &invalid_list); } - sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen; clear_page(sp->spt); trace_kvm_mmu_get_page(sp, true); @@ -2753,7 +2752,12 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm, } else { list_move(&sp->link, &kvm->arch.active_mmu_pages); - if (!sp->role.invalid) + /* + * Obsolete pages cannot be used on any vCPUs, see the comment + * in kvm_mmu_zap_all_fast(). Note, is_obsolete_sp() also + * treats invalid shadow pages as being obsolete. + */ + if (!is_obsolete_sp(kvm, sp)) kvm_reload_remote_mmus(kvm); } @@ -5383,7 +5387,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, void *insn, int insn_len) { int r, emulation_type = 0; - enum emulation_result er; bool direct = vcpu->arch.mmu->direct_map; /* With shadow page tables, fault_address contains a GVA or nGPA. */ @@ -5450,19 +5453,8 @@ emulate: return 1; } - er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len); - - switch (er) { - case EMULATE_DONE: - return 1; - case EMULATE_USER_EXIT: - ++vcpu->stat.mmio_exits; - /* fall through */ - case EMULATE_FAIL: - return 0; - default: - BUG(); - } + return x86_emulate_instruction(vcpu, cr2, emulation_type, insn, + insn_len); } EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); @@ -5684,12 +5676,11 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu) return ret; } - +#define BATCH_ZAP_PAGES 10 static void kvm_zap_obsolete_pages(struct kvm *kvm) { struct kvm_mmu_page *sp, *node; - LIST_HEAD(invalid_list); - int ign; + int nr_zapped, batch = 0; restart: list_for_each_entry_safe_reverse(sp, node, @@ -5702,46 +5693,39 @@ restart: break; /* - * Do not repeatedly zap a root page to avoid unnecessary - * KVM_REQ_MMU_RELOAD, otherwise we may not be able to - * progress: - * vcpu 0 vcpu 1 - * call vcpu_enter_guest(): - * 1): handle KVM_REQ_MMU_RELOAD - * and require mmu-lock to - * load mmu - * repeat: - * 1): zap root page and - * send KVM_REQ_MMU_RELOAD - * - * 2): if (cond_resched_lock(mmu-lock)) - * - * 2): hold mmu-lock and load mmu - * - * 3): see KVM_REQ_MMU_RELOAD bit - * on vcpu->requests is set - * then return 1 to call - * vcpu_enter_guest() again. - * goto repeat; - * - * Since we are reversely walking the list and the invalid - * list will be moved to the head, skip the invalid page - * can help us to avoid the infinity list walking. + * Skip invalid pages with a non-zero root count, zapping pages + * with a non-zero root count will never succeed, i.e. the page + * will get thrown back on active_mmu_pages and we'll get stuck + * in an infinite loop. */ - if (sp->role.invalid) + if (sp->role.invalid && sp->root_count) continue; - if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { - kvm_mmu_commit_zap_page(kvm, &invalid_list); - cond_resched_lock(&kvm->mmu_lock); + /* + * No need to flush the TLB since we're only zapping shadow + * pages with an obsolete generation number and all vCPUS have + * loaded a new root, i.e. the shadow pages being zapped cannot + * be in active use by the guest. + */ + if (batch >= BATCH_ZAP_PAGES && + cond_resched_lock(&kvm->mmu_lock)) { + batch = 0; goto restart; } - if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign)) + if (__kvm_mmu_prepare_zap_page(kvm, sp, + &kvm->arch.zapped_obsolete_pages, &nr_zapped)) { + batch += nr_zapped; goto restart; + } } - kvm_mmu_commit_zap_page(kvm, &invalid_list); + /* + * Trigger a remote TLB flush before freeing the page tables to ensure + * KVM is not in the middle of a lockless shadow page table walk, which + * may reference the pages. + */ + kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages); } /* @@ -5755,13 +5739,39 @@ restart: */ static void kvm_mmu_zap_all_fast(struct kvm *kvm) { + lockdep_assert_held(&kvm->slots_lock); + spin_lock(&kvm->mmu_lock); - kvm->arch.mmu_valid_gen++; + trace_kvm_mmu_zap_all_fast(kvm); + + /* + * Toggle mmu_valid_gen between '0' and '1'. Because slots_lock is + * held for the entire duration of zapping obsolete pages, it's + * impossible for there to be multiple invalid generations associated + * with *valid* shadow pages at any given time, i.e. there is exactly + * one valid generation and (at most) one invalid generation. + */ + kvm->arch.mmu_valid_gen = kvm->arch.mmu_valid_gen ? 0 : 1; + + /* + * Notify all vcpus to reload its shadow page table and flush TLB. + * Then all vcpus will switch to new shadow page table with the new + * mmu_valid_gen. + * + * Note: we need to do this under the protection of mmu_lock, + * otherwise, vcpu would purge shadow page but miss tlb flush. + */ + kvm_reload_remote_mmus(kvm); kvm_zap_obsolete_pages(kvm); spin_unlock(&kvm->mmu_lock); } +static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm) +{ + return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages)); +} + static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, struct kvm_page_track_notifier_node *node) @@ -5959,7 +5969,7 @@ void kvm_mmu_slot_set_dirty(struct kvm *kvm, } EXPORT_SYMBOL_GPL(kvm_mmu_slot_set_dirty); -static void __kvm_mmu_zap_all(struct kvm *kvm, bool mmio_only) +void kvm_mmu_zap_all(struct kvm *kvm) { struct kvm_mmu_page *sp, *node; LIST_HEAD(invalid_list); @@ -5968,14 +5978,10 @@ static void __kvm_mmu_zap_all(struct kvm *kvm, bool mmio_only) spin_lock(&kvm->mmu_lock); restart: list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) { - if (mmio_only && !sp->mmio_cached) - continue; if (sp->role.invalid && sp->root_count) continue; - if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign)) { - WARN_ON_ONCE(mmio_only); + if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign)) goto restart; - } if (cond_resched_lock(&kvm->mmu_lock)) goto restart; } @@ -5984,11 +5990,6 @@ restart: spin_unlock(&kvm->mmu_lock); } -void kvm_mmu_zap_all(struct kvm *kvm) -{ - return __kvm_mmu_zap_all(kvm, false); -} - void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen) { WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS); @@ -6010,7 +6011,7 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen) */ if (unlikely(gen == 0)) { kvm_debug_ratelimited("kvm: zapping shadow pages for mmio generation wraparound\n"); - __kvm_mmu_zap_all(kvm, true); + kvm_mmu_zap_all_fast(kvm); } } @@ -6041,16 +6042,24 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) * want to shrink a VM that only started to populate its MMU * anyway. */ - if (!kvm->arch.n_used_mmu_pages) + if (!kvm->arch.n_used_mmu_pages && + !kvm_has_zapped_obsolete_pages(kvm)) continue; idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); + if (kvm_has_zapped_obsolete_pages(kvm)) { + kvm_mmu_commit_zap_page(kvm, + &kvm->arch.zapped_obsolete_pages); + goto unlock; + } + if (prepare_zap_oldest_mmu_page(kvm, &invalid_list)) freed++; kvm_mmu_commit_zap_page(kvm, &invalid_list); +unlock: spin_unlock(&kvm->mmu_lock); srcu_read_unlock(&kvm->srcu, idx); diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index d8001b4bca05..7ca8831c7d1a 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -8,16 +8,18 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvmmmu -#define KVM_MMU_PAGE_FIELDS \ - __field(__u64, gfn) \ - __field(__u32, role) \ - __field(__u32, root_count) \ +#define KVM_MMU_PAGE_FIELDS \ + __field(__u8, mmu_valid_gen) \ + __field(__u64, gfn) \ + __field(__u32, role) \ + __field(__u32, root_count) \ __field(bool, unsync) -#define KVM_MMU_PAGE_ASSIGN(sp) \ - __entry->gfn = sp->gfn; \ - __entry->role = sp->role.word; \ - __entry->root_count = sp->root_count; \ +#define KVM_MMU_PAGE_ASSIGN(sp) \ + __entry->mmu_valid_gen = sp->mmu_valid_gen; \ + __entry->gfn = sp->gfn; \ + __entry->role = sp->role.word; \ + __entry->root_count = sp->root_count; \ __entry->unsync = sp->unsync; #define KVM_MMU_PAGE_PRINTK() ({ \ @@ -29,8 +31,9 @@ \ role.word = __entry->role; \ \ - trace_seq_printf(p, "sp gfn %llx l%u %u-byte q%u%s %s%s" \ + trace_seq_printf(p, "sp gen %u gfn %llx l%u %u-byte q%u%s %s%s" \ " %snxe %sad root %u %s%c", \ + __entry->mmu_valid_gen, \ __entry->gfn, role.level, \ role.gpte_is_8_bytes ? 8 : 4, \ role.quadrant, \ @@ -279,6 +282,27 @@ TRACE_EVENT( ) ); +TRACE_EVENT( + kvm_mmu_zap_all_fast, + TP_PROTO(struct kvm *kvm), + TP_ARGS(kvm), + + TP_STRUCT__entry( + __field(__u8, mmu_valid_gen) + __field(unsigned int, mmu_used_pages) + ), + + TP_fast_assign( + __entry->mmu_valid_gen = kvm->arch.mmu_valid_gen; + __entry->mmu_used_pages = kvm->arch.n_used_mmu_pages; + ), + + TP_printk("kvm-mmu-valid-gen %u used_pages %x", + __entry->mmu_valid_gen, __entry->mmu_used_pages + ) +); + + TRACE_EVENT( check_mmio_spte, TP_PROTO(u64 spte, unsigned int kvm_gen, unsigned int spte_gen), diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 04fe21849b6e..f8ecb6df5106 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -777,17 +777,18 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu) svm->next_rip = svm->vmcb->control.next_rip; } - if (!svm->next_rip) - return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP); - - if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE) - printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n", - __func__, kvm_rip_read(vcpu), svm->next_rip); - - kvm_rip_write(vcpu, svm->next_rip); + if (!svm->next_rip) { + if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP)) + return 0; + } else { + if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE) + pr_err("%s: ip 0x%lx next 0x%llx\n", + __func__, kvm_rip_read(vcpu), svm->next_rip); + kvm_rip_write(vcpu, svm->next_rip); + } svm_set_interrupt_shadow(vcpu, 0); - return EMULATE_DONE; + return 1; } static void svm_queue_exception(struct kvm_vcpu *vcpu) @@ -1539,6 +1540,7 @@ static void init_vmcb(struct vcpu_svm *svm) set_intercept(svm, INTERCEPT_SKINIT); set_intercept(svm, INTERCEPT_WBINVD); set_intercept(svm, INTERCEPT_XSETBV); + set_intercept(svm, INTERCEPT_RDPRU); set_intercept(svm, INTERCEPT_RSM); if (!kvm_mwait_in_guest(svm->vcpu.kvm)) { @@ -2768,17 +2770,18 @@ static int gp_interception(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; u32 error_code = svm->vmcb->control.exit_info_1; - int er; WARN_ON_ONCE(!enable_vmware_backdoor); - er = kvm_emulate_instruction(vcpu, - EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); - if (er == EMULATE_USER_EXIT) - return 0; - else if (er != EMULATE_DONE) + /* + * VMware backdoor emulation on #GP interception only handles IN{S}, + * OUT{S}, and RDPMC, none of which generate a non-zero error code. + */ + if (error_code) { kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); - return 1; + return 1; + } + return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP); } static bool is_erratum_383(void) @@ -2876,7 +2879,7 @@ static int io_interception(struct vcpu_svm *svm) string = (io_info & SVM_IOIO_STR_MASK) != 0; in = (io_info & SVM_IOIO_TYPE_MASK) != 0; if (string) - return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0); port = io_info >> 16; size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; @@ -3830,6 +3833,12 @@ static int xsetbv_interception(struct vcpu_svm *svm) return 1; } +static int rdpru_interception(struct vcpu_svm *svm) +{ + kvm_queue_exception(&svm->vcpu, UD_VECTOR); + return 1; +} + static int task_switch_interception(struct vcpu_svm *svm) { u16 tss_selector; @@ -3883,24 +3892,15 @@ static int task_switch_interception(struct vcpu_svm *svm) int_type == SVM_EXITINTINFO_TYPE_SOFT || (int_type == SVM_EXITINTINFO_TYPE_EXEPT && (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) { - if (skip_emulated_instruction(&svm->vcpu) != EMULATE_DONE) - goto fail; + if (!skip_emulated_instruction(&svm->vcpu)) + return 0; } if (int_type != SVM_EXITINTINFO_TYPE_SOFT) int_vec = -1; - if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason, - has_error_code, error_code) == EMULATE_FAIL) - goto fail; - - return 1; - -fail: - svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; - svm->vcpu.run->internal.ndata = 0; - return 0; + return kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason, + has_error_code, error_code); } static int cpuid_interception(struct vcpu_svm *svm) @@ -3921,7 +3921,7 @@ static int iret_interception(struct vcpu_svm *svm) static int invlpg_interception(struct vcpu_svm *svm) { if (!static_cpu_has(X86_FEATURE_DECODEASSISTS)) - return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(&svm->vcpu, 0); kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1); return kvm_skip_emulated_instruction(&svm->vcpu); @@ -3929,13 +3929,12 @@ static int invlpg_interception(struct vcpu_svm *svm) static int emulate_on_interception(struct vcpu_svm *svm) { - return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(&svm->vcpu, 0); } static int rsm_interception(struct vcpu_svm *svm) { - return kvm_emulate_instruction_from_buffer(&svm->vcpu, - rsm_ins_bytes, 2) == EMULATE_DONE; + return kvm_emulate_instruction_from_buffer(&svm->vcpu, rsm_ins_bytes, 2); } static int rdpmc_interception(struct vcpu_svm *svm) @@ -4724,7 +4723,7 @@ static int avic_unaccelerated_access_interception(struct vcpu_svm *svm) ret = avic_unaccel_trap_write(svm); } else { /* Handling Fault */ - ret = (kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE); + ret = kvm_emulate_instruction(&svm->vcpu, 0); } return ret; @@ -4791,6 +4790,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_MONITOR] = monitor_interception, [SVM_EXIT_MWAIT] = mwait_interception, [SVM_EXIT_XSETBV] = xsetbv_interception, + [SVM_EXIT_RDPRU] = rdpru_interception, [SVM_EXIT_NPF] = npf_interception, [SVM_EXIT_RSM] = rsm_interception, [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception, @@ -7099,13 +7099,6 @@ failed: return ret; } -static int nested_enable_evmcs(struct kvm_vcpu *vcpu, - uint16_t *vmcs_version) -{ - /* Intel-only feature */ - return -ENODEV; -} - static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) { unsigned long cr4 = kvm_read_cr4(vcpu); @@ -7311,7 +7304,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .mem_enc_reg_region = svm_register_enc_region, .mem_enc_unreg_region = svm_unregister_enc_region, - .nested_enable_evmcs = nested_enable_evmcs, + .nested_enable_evmcs = NULL, .nested_get_evmcs_version = NULL, .need_emulation_on_page_fault = svm_need_emulation_on_page_fault, diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index d6664ee3d127..7aa69716d516 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -247,6 +247,12 @@ static inline bool vmx_xsaves_supported(void) SECONDARY_EXEC_XSAVES; } +static inline bool vmx_waitpkg_supported(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; +} + static inline bool cpu_has_vmx_tsc_scaling(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h index 39a24eec8884..07ebf6882a45 100644 --- a/arch/x86/kvm/vmx/evmcs.h +++ b/arch/x86/kvm/vmx/evmcs.h @@ -178,6 +178,8 @@ static inline void evmcs_load(u64 phys_addr) struct hv_vp_assist_page *vp_ap = hv_get_vp_assist_page(smp_processor_id()); + if (current_evmcs->hv_enlightenments_control.nested_flush_hypercall) + vp_ap->nested_control.features.directhypercall = 1; vp_ap->current_nested_vmcs = phys_addr; vp_ap->enlighten_vmentry = 1; } diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 1a10cd351940..41abc62c9a8a 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -198,6 +198,16 @@ static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator) pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator); } +static inline bool vmx_control_verify(u32 control, u32 low, u32 high) +{ + return fixed_bits_valid(control, low, high); +} + +static inline u64 vmx_control_msr(u32 low, u32 high) +{ + return low | ((u64)high << 32); +} + static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx) { secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS); @@ -866,16 +876,34 @@ static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu, return 0; } +static u32 nested_vmx_max_atomic_switch_msrs(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + u64 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low, + vmx->nested.msrs.misc_high); + + return (vmx_misc_max_msr(vmx_misc) + 1) * VMX_MISC_MSR_LIST_MULTIPLIER; +} + /* * Load guest's/host's msr at nested entry/exit. * return 0 for success, entry index for failure. + * + * One of the failure modes for MSR load/store is when a list exceeds the + * virtual hardware's capacity. To maintain compatibility with hardware inasmuch + * as possible, process all valid entries before failing rather than precheck + * for a capacity violation. */ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) { u32 i; struct vmx_msr_entry e; + u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu); for (i = 0; i < count; i++) { + if (unlikely(i >= max_msr_list_size)) + goto fail; + if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e), &e, sizeof(e))) { pr_debug_ratelimited( @@ -906,8 +934,12 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) u64 data; u32 i; struct vmx_msr_entry e; + u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu); for (i = 0; i < count; i++) { + if (unlikely(i >= max_msr_list_size)) + return -EINVAL; + if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e), &e, 2 * sizeof(u32))) { @@ -1013,17 +1045,6 @@ static u16 nested_get_vpid02(struct kvm_vcpu *vcpu) return vmx->nested.vpid02 ? vmx->nested.vpid02 : vmx->vpid; } - -static inline bool vmx_control_verify(u32 control, u32 low, u32 high) -{ - return fixed_bits_valid(control, low, high); -} - -static inline u64 vmx_control_msr(u32 low, u32 high) -{ - return low | ((u64)high << 32); -} - static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask) { superset &= mask; @@ -2089,6 +2110,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) SECONDARY_EXEC_ENABLE_INVPCID | SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_XSAVES | + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_ENABLE_VMFUNC); @@ -2642,8 +2664,23 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, CC(!kvm_pat_valid(vmcs12->host_ia32_pat))) return -EINVAL; - ia32e = (vmcs12->vm_exit_controls & - VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0; +#ifdef CONFIG_X86_64 + ia32e = !!(vcpu->arch.efer & EFER_LMA); +#else + ia32e = false; +#endif + + if (ia32e) { + if (CC(!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)) || + CC(!(vmcs12->host_cr4 & X86_CR4_PAE))) + return -EINVAL; + } else { + if (CC(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) || + CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) || + CC(vmcs12->host_cr4 & X86_CR4_PCIDE) || + CC((vmcs12->host_rip) >> 32)) + return -EINVAL; + } if (CC(vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) || CC(vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) || @@ -2662,7 +2699,8 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) || CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) || CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) || - CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu))) + CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)) || + CC(is_noncanonical_address(vmcs12->host_rip, vcpu))) return -EINVAL; #endif @@ -5441,6 +5479,10 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) case EXIT_REASON_ENCLS: /* SGX is never exposed to L1 */ return false; + case EXIT_REASON_UMWAIT: + case EXIT_REASON_TPAUSE: + return nested_cpu_has2(vmcs12, + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE); default: return true; } diff --git a/arch/x86/kvm/vmx/ops.h b/arch/x86/kvm/vmx/ops.h index 2200fb698dd0..45eaedee2ac0 100644 --- a/arch/x86/kvm/vmx/ops.h +++ b/arch/x86/kvm/vmx/ops.h @@ -11,8 +11,13 @@ #include "vmcs.h" #define __ex(x) __kvm_handle_fault_on_reboot(x) -#define __ex_clear(x, reg) \ - ____kvm_handle_fault_on_reboot(x, "xor " reg ", " reg) + +asmlinkage void vmread_error(unsigned long field, bool fault); +void vmwrite_error(unsigned long field, unsigned long value); +void vmclear_error(struct vmcs *vmcs, u64 phys_addr); +void vmptrld_error(struct vmcs *vmcs, u64 phys_addr); +void invvpid_error(unsigned long ext, u16 vpid, gva_t gva); +void invept_error(unsigned long ext, u64 eptp, gpa_t gpa); static __always_inline void vmcs_check16(unsigned long field) { @@ -62,8 +67,22 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field) { unsigned long value; - asm volatile (__ex_clear("vmread %1, %0", "%k0") - : "=r"(value) : "r"(field)); + asm volatile("1: vmread %2, %1\n\t" + ".byte 0x3e\n\t" /* branch taken hint */ + "ja 3f\n\t" + "mov %2, %%" _ASM_ARG1 "\n\t" + "xor %%" _ASM_ARG2 ", %%" _ASM_ARG2 "\n\t" + "2: call vmread_error\n\t" + "xor %k1, %k1\n\t" + "3:\n\t" + + ".pushsection .fixup, \"ax\"\n\t" + "4: mov %2, %%" _ASM_ARG1 "\n\t" + "mov $1, %%" _ASM_ARG2 "\n\t" + "jmp 2b\n\t" + ".popsection\n\t" + _ASM_EXTABLE(1b, 4b) + : ASM_CALL_CONSTRAINT, "=r"(value) : "r"(field) : "cc"); return value; } @@ -103,21 +122,39 @@ static __always_inline unsigned long vmcs_readl(unsigned long field) return __vmcs_readl(field); } -static noinline void vmwrite_error(unsigned long field, unsigned long value) -{ - printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n", - field, value, vmcs_read32(VM_INSTRUCTION_ERROR)); - dump_stack(); -} +#define vmx_asm1(insn, op1, error_args...) \ +do { \ + asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \ + ".byte 0x2e\n\t" /* branch not taken hint */ \ + "jna %l[error]\n\t" \ + _ASM_EXTABLE(1b, %l[fault]) \ + : : op1 : "cc" : error, fault); \ + return; \ +error: \ + insn##_error(error_args); \ + return; \ +fault: \ + kvm_spurious_fault(); \ +} while (0) + +#define vmx_asm2(insn, op1, op2, error_args...) \ +do { \ + asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \ + ".byte 0x2e\n\t" /* branch not taken hint */ \ + "jna %l[error]\n\t" \ + _ASM_EXTABLE(1b, %l[fault]) \ + : : op1, op2 : "cc" : error, fault); \ + return; \ +error: \ + insn##_error(error_args); \ + return; \ +fault: \ + kvm_spurious_fault(); \ +} while (0) static __always_inline void __vmcs_writel(unsigned long field, unsigned long value) { - bool error; - - asm volatile (__ex("vmwrite %2, %1") CC_SET(na) - : CC_OUT(na) (error) : "r"(field), "rm"(value)); - if (unlikely(error)) - vmwrite_error(field, value); + vmx_asm2(vmwrite, "r"(field), "rm"(value), field, value); } static __always_inline void vmcs_write16(unsigned long field, u16 value) @@ -182,28 +219,18 @@ static __always_inline void vmcs_set_bits(unsigned long field, u32 mask) static inline void vmcs_clear(struct vmcs *vmcs) { u64 phys_addr = __pa(vmcs); - bool error; - asm volatile (__ex("vmclear %1") CC_SET(na) - : CC_OUT(na) (error) : "m"(phys_addr)); - if (unlikely(error)) - printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n", - vmcs, phys_addr); + vmx_asm1(vmclear, "m"(phys_addr), vmcs, phys_addr); } static inline void vmcs_load(struct vmcs *vmcs) { u64 phys_addr = __pa(vmcs); - bool error; if (static_branch_unlikely(&enable_evmcs)) return evmcs_load(phys_addr); - asm volatile (__ex("vmptrld %1") CC_SET(na) - : CC_OUT(na) (error) : "m"(phys_addr)); - if (unlikely(error)) - printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n", - vmcs, phys_addr); + vmx_asm1(vmptrld, "m"(phys_addr), vmcs, phys_addr); } static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva) @@ -213,11 +240,8 @@ static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva) u64 rsvd : 48; u64 gva; } operand = { vpid, 0, gva }; - bool error; - asm volatile (__ex("invvpid %2, %1") CC_SET(na) - : CC_OUT(na) (error) : "r"(ext), "m"(operand)); - BUG_ON(error); + vmx_asm2(invvpid, "r"(ext), "m"(operand), ext, vpid, gva); } static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa) @@ -225,11 +249,8 @@ static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa) struct { u64 eptp, gpa; } operand = {eptp, gpa}; - bool error; - asm volatile (__ex("invept %2, %1") CC_SET(na) - : CC_OUT(na) (error) : "r"(ext), "m"(operand)); - BUG_ON(error); + vmx_asm2(invept, "r"(ext), "m"(operand), ext, eptp, gpa); } static inline bool vpid_sync_vcpu_addr(int vpid, gva_t addr) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 4a99be1fae4e..d4575ffb3cec 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -343,6 +343,48 @@ static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bit void vmx_vmexit(void); +#define vmx_insn_failed(fmt...) \ +do { \ + WARN_ONCE(1, fmt); \ + pr_warn_ratelimited(fmt); \ +} while (0) + +asmlinkage void vmread_error(unsigned long field, bool fault) +{ + if (fault) + kvm_spurious_fault(); + else + vmx_insn_failed("kvm: vmread failed: field=%lx\n", field); +} + +noinline void vmwrite_error(unsigned long field, unsigned long value) +{ + vmx_insn_failed("kvm: vmwrite failed: field=%lx val=%lx err=%d\n", + field, value, vmcs_read32(VM_INSTRUCTION_ERROR)); +} + +noinline void vmclear_error(struct vmcs *vmcs, u64 phys_addr) +{ + vmx_insn_failed("kvm: vmclear failed: %p/%llx\n", vmcs, phys_addr); +} + +noinline void vmptrld_error(struct vmcs *vmcs, u64 phys_addr) +{ + vmx_insn_failed("kvm: vmptrld failed: %p/%llx\n", vmcs, phys_addr); +} + +noinline void invvpid_error(unsigned long ext, u16 vpid, gva_t gva) +{ + vmx_insn_failed("kvm: invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n", + ext, vpid, gva); +} + +noinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa) +{ + vmx_insn_failed("kvm: invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n", + ext, eptp, gpa); +} + static DEFINE_PER_CPU(struct vmcs *, vmxarea); DEFINE_PER_CPU(struct vmcs *, current_vmcs); /* @@ -486,6 +528,31 @@ static int hv_remote_flush_tlb(struct kvm *kvm) return hv_remote_flush_tlb_with_range(kvm, NULL); } +static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu) +{ + struct hv_enlightened_vmcs *evmcs; + struct hv_partition_assist_pg **p_hv_pa_pg = + &vcpu->kvm->arch.hyperv.hv_pa_pg; + /* + * Synthetic VM-Exit is not enabled in current code and so All + * evmcs in singe VM shares same assist page. + */ + if (!*p_hv_pa_pg) + *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL); + + if (!*p_hv_pa_pg) + return -ENOMEM; + + evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs; + + evmcs->partition_assist_page = + __pa(*p_hv_pa_pg); + evmcs->hv_vm_id = (unsigned long)vcpu->kvm; + evmcs->hv_enlightenments_control.nested_flush_hypercall = 1; + + return 0; +} + #endif /* IS_ENABLED(CONFIG_HYPERV) */ /* @@ -1472,27 +1539,32 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data) return 0; } -/* - * Returns an int to be compatible with SVM implementation (which can fail). - * Do not use directly, use skip_emulated_instruction() instead. - */ -static int __skip_emulated_instruction(struct kvm_vcpu *vcpu) +static int skip_emulated_instruction(struct kvm_vcpu *vcpu) { unsigned long rip; - rip = kvm_rip_read(vcpu); - rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); - kvm_rip_write(vcpu, rip); + /* + * Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on + * undefined behavior: Intel's SDM doesn't mandate the VMCS field be + * set when EPT misconfig occurs. In practice, real hardware updates + * VM_EXIT_INSTRUCTION_LEN on EPT misconfig, but other hypervisors + * (namely Hyper-V) don't set it due to it being undefined behavior, + * i.e. we end up advancing IP with some random value. + */ + if (!static_cpu_has(X86_FEATURE_HYPERVISOR) || + to_vmx(vcpu)->exit_reason != EXIT_REASON_EPT_MISCONFIG) { + rip = kvm_rip_read(vcpu); + rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); + kvm_rip_write(vcpu, rip); + } else { + if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP)) + return 0; + } /* skipping an emulated instruction also counts */ vmx_set_interrupt_shadow(vcpu, 0); - return EMULATE_DONE; -} - -static inline void skip_emulated_instruction(struct kvm_vcpu *vcpu) -{ - (void)__skip_emulated_instruction(vcpu); + return 1; } static void vmx_clear_hlt(struct kvm_vcpu *vcpu) @@ -1527,8 +1599,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu) int inc_eip = 0; if (kvm_exception_is_soft(nr)) inc_eip = vcpu->arch.event_exit_inst_len; - if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE) - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + kvm_inject_realmode_interrupt(vcpu, nr, inc_eip); return; } @@ -1700,6 +1771,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) #endif case MSR_EFER: return kvm_get_msr_common(vcpu, msr_info); + case MSR_IA32_UMWAIT_CONTROL: + if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) + return 1; + + msr_info->data = vmx->msr_ia32_umwait_control; + break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) @@ -1873,6 +1950,16 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; vmcs_write64(GUEST_BNDCFGS, data); break; + case MSR_IA32_UMWAIT_CONTROL: + if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) + return 1; + + /* The reserved bit 1 and non-32 bit [63:32] should be zero */ + if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32))) + return 1; + + vmx->msr_ia32_umwait_control = data; + break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) @@ -2290,6 +2377,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, SECONDARY_EXEC_RDRAND_EXITING | SECONDARY_EXEC_ENABLE_PML | SECONDARY_EXEC_TSC_SCALING | + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | SECONDARY_EXEC_PT_USE_GPA | SECONDARY_EXEC_PT_CONCEAL_VMX | SECONDARY_EXEC_ENABLE_VMFUNC | @@ -4026,6 +4114,23 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) } } + if (vmx_waitpkg_supported()) { + bool waitpkg_enabled = + guest_cpuid_has(vcpu, X86_FEATURE_WAITPKG); + + if (!waitpkg_enabled) + exec_control &= ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; + + if (nested) { + if (waitpkg_enabled) + vmx->nested.msrs.secondary_ctls_high |= + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; + else + vmx->nested.msrs.secondary_ctls_high &= + ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; + } + } + vmx->secondary_exec_control = exec_control; } @@ -4160,6 +4265,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmx->rmode.vm86_active = 0; vmx->spec_ctrl = 0; + vmx->msr_ia32_umwait_control = 0; + vcpu->arch.microcode_version = 0x100000000ULL; vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); vmx->hv_deadline_tsc = -1; @@ -4277,8 +4384,7 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) int inc_eip = 0; if (vcpu->arch.interrupt.soft) inc_eip = vcpu->arch.event_exit_inst_len; - if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE) - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + kvm_inject_realmode_interrupt(vcpu, irq, inc_eip); return; } intr = irq | INTR_INFO_VALID_MASK; @@ -4314,8 +4420,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) vmx->loaded_vmcs->nmi_known_unmasked = false; if (vmx->rmode.vm86_active) { - if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE) - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0); return; } @@ -4442,7 +4547,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, * Cause the #SS fault with 0 error code in VM86 mode. */ if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) { - if (kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE) { + if (kvm_emulate_instruction(vcpu, 0)) { if (vcpu->arch.halt_request) { vcpu->arch.halt_request = 0; return kvm_vcpu_halt(vcpu); @@ -4493,7 +4598,6 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) u32 intr_info, ex_no, error_code; unsigned long cr2, rip, dr6; u32 vect_info; - enum emulation_result er; vect_info = vmx->idt_vectoring_info; intr_info = vmx->exit_intr_info; @@ -4510,13 +4614,17 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) { WARN_ON_ONCE(!enable_vmware_backdoor); - er = kvm_emulate_instruction(vcpu, - EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); - if (er == EMULATE_USER_EXIT) - return 0; - else if (er != EMULATE_DONE) + + /* + * VMware backdoor emulation on #GP interception only handles + * IN{S}, OUT{S}, and RDPMC, none of which generate a non-zero + * error code on #GP. + */ + if (error_code) { kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); - return 1; + return 1; + } + return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP); } /* @@ -4558,7 +4666,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) vcpu->arch.dr6 &= ~DR_TRAP_BITS; vcpu->arch.dr6 |= dr6 | DR6_RTM; if (is_icebp(intr_info)) - skip_emulated_instruction(vcpu); + WARN_ON(!skip_emulated_instruction(vcpu)); kvm_queue_exception(vcpu, DB_VECTOR); return 1; @@ -4613,7 +4721,7 @@ static int handle_io(struct kvm_vcpu *vcpu) ++vcpu->stat.io_exits; if (string) - return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0); port = exit_qualification >> 16; size = (exit_qualification & 7) + 1; @@ -4687,7 +4795,7 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) static int handle_desc(struct kvm_vcpu *vcpu) { WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP)); - return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0); } static int handle_cr(struct kvm_vcpu *vcpu) @@ -4903,7 +5011,7 @@ static int handle_vmcall(struct kvm_vcpu *vcpu) static int handle_invd(struct kvm_vcpu *vcpu) { - return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0); } static int handle_invlpg(struct kvm_vcpu *vcpu) @@ -4937,20 +5045,6 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu) return 1; } -static int handle_xsaves(struct kvm_vcpu *vcpu) -{ - kvm_skip_emulated_instruction(vcpu); - WARN(1, "this should never happen\n"); - return 1; -} - -static int handle_xrstors(struct kvm_vcpu *vcpu) -{ - kvm_skip_emulated_instruction(vcpu); - WARN(1, "this should never happen\n"); - return 1; -} - static int handle_apic_access(struct kvm_vcpu *vcpu) { if (likely(fasteoi)) { @@ -4970,7 +5064,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } } - return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0); } static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) @@ -5039,23 +5133,15 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION && type != INTR_TYPE_EXT_INTR && type != INTR_TYPE_NMI_INTR)) - skip_emulated_instruction(vcpu); - - if (kvm_task_switch(vcpu, tss_selector, - type == INTR_TYPE_SOFT_INTR ? idt_index : -1, reason, - has_error_code, error_code) == EMULATE_FAIL) { - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; - return 0; - } + WARN_ON(!skip_emulated_instruction(vcpu)); /* * TODO: What about debug traps on tss switch? * Are we supposed to inject them and update dr6? */ - - return 1; + return kvm_task_switch(vcpu, tss_selector, + type == INTR_TYPE_SOFT_INTR ? idt_index : -1, + reason, has_error_code, error_code); } static int handle_ept_violation(struct kvm_vcpu *vcpu) @@ -5114,21 +5200,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) if (!is_guest_mode(vcpu) && !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { trace_kvm_fast_mmio(gpa); - /* - * Doing kvm_skip_emulated_instruction() depends on undefined - * behavior: Intel's manual doesn't mandate - * VM_EXIT_INSTRUCTION_LEN to be set in VMCS when EPT MISCONFIG - * occurs and while on real hardware it was observed to be set, - * other hypervisors (namely Hyper-V) don't set it, we end up - * advancing IP with some random value. Disable fast mmio when - * running nested and keep it for real hardware in hope that - * VM_EXIT_INSTRUCTION_LEN will always be set correctly. - */ - if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) - return kvm_skip_emulated_instruction(vcpu); - else - return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) == - EMULATE_DONE; + return kvm_skip_emulated_instruction(vcpu); } return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); @@ -5147,8 +5219,6 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu) static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - enum emulation_result err = EMULATE_DONE; - int ret = 1; bool intr_window_requested; unsigned count = 130; @@ -5169,41 +5239,35 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (kvm_test_request(KVM_REQ_EVENT, vcpu)) return 1; - err = kvm_emulate_instruction(vcpu, 0); - - if (err == EMULATE_USER_EXIT) { - ++vcpu->stat.mmio_exits; - ret = 0; - goto out; - } - - if (err != EMULATE_DONE) - goto emulation_error; + if (!kvm_emulate_instruction(vcpu, 0)) + return 0; if (vmx->emulation_required && !vmx->rmode.vm86_active && - vcpu->arch.exception.pending) - goto emulation_error; + vcpu->arch.exception.pending) { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = + KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; + return 0; + } if (vcpu->arch.halt_request) { vcpu->arch.halt_request = 0; - ret = kvm_vcpu_halt(vcpu); - goto out; + return kvm_vcpu_halt(vcpu); } + /* + * Note, return 1 and not 0, vcpu_run() is responsible for + * morphing the pending signal into the proper return code. + */ if (signal_pending(current)) - goto out; + return 1; + if (need_resched()) schedule(); } -out: - return ret; - -emulation_error: - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; - return 0; + return 1; } static void grow_ple_window(struct kvm_vcpu *vcpu) @@ -5474,6 +5538,14 @@ static int handle_encls(struct kvm_vcpu *vcpu) return 1; } +static int handle_unexpected_vmexit(struct kvm_vcpu *vcpu) +{ + kvm_skip_emulated_instruction(vcpu); + WARN_ONCE(1, "Unexpected VM-Exit Reason = 0x%x", + vmcs_read32(VM_EXIT_REASON)); + return 1; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -5525,13 +5597,15 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_INVVPID] = handle_vmx_instruction, [EXIT_REASON_RDRAND] = handle_invalid_op, [EXIT_REASON_RDSEED] = handle_invalid_op, - [EXIT_REASON_XSAVES] = handle_xsaves, - [EXIT_REASON_XRSTORS] = handle_xrstors, + [EXIT_REASON_XSAVES] = handle_unexpected_vmexit, + [EXIT_REASON_XRSTORS] = handle_unexpected_vmexit, [EXIT_REASON_PML_FULL] = handle_pml_full, [EXIT_REASON_INVPCID] = handle_invpcid, [EXIT_REASON_VMFUNC] = handle_vmx_instruction, [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, [EXIT_REASON_ENCLS] = handle_encls, + [EXIT_REASON_UMWAIT] = handle_unexpected_vmexit, + [EXIT_REASON_TPAUSE] = handle_unexpected_vmexit, }; static const int kvm_vmx_max_exit_handlers = @@ -6362,6 +6436,23 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) msrs[i].host, false); } +static void atomic_switch_umwait_control_msr(struct vcpu_vmx *vmx) +{ + u32 host_umwait_control; + + if (!vmx_has_waitpkg(vmx)) + return; + + host_umwait_control = get_umwait_control_msr(); + + if (vmx->msr_ia32_umwait_control != host_umwait_control) + add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL, + vmx->msr_ia32_umwait_control, + host_umwait_control, false); + else + clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL); +} + static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6456,6 +6547,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) pt_guest_enter(vmx); atomic_switch_perf_msrs(vmx); + atomic_switch_umwait_control_msr(vmx); if (enable_preemption_timer) vmx_update_hv_timer(vcpu); @@ -6511,6 +6603,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; + if (static_branch_unlikely(&enable_evmcs)) + current_evmcs->hv_vp_id = vcpu->arch.hyperv.vp_index; + /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ if (vmx->host_debugctlmsr) update_debugctlmsr(vmx->host_debugctlmsr); @@ -6578,6 +6673,7 @@ static struct kvm *vmx_vm_alloc(void) static void vmx_vm_free(struct kvm *kvm) { + kfree(kvm->arch.hyperv.hv_pa_pg); vfree(to_kvm_vmx(kvm)); } @@ -7706,7 +7802,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .run = vmx_vcpu_run, .handle_exit = vmx_handle_exit, - .skip_emulated_instruction = __skip_emulated_instruction, + .skip_emulated_instruction = skip_emulated_instruction, .set_interrupt_shadow = vmx_set_interrupt_shadow, .get_interrupt_shadow = vmx_get_interrupt_shadow, .patch_hypercall = vmx_patch_hypercall, @@ -7837,6 +7933,7 @@ static void vmx_exit(void) if (!vp_ap) continue; + vp_ap->nested_control.features.directhypercall = 0; vp_ap->current_nested_vmcs = 0; vp_ap->enlighten_vmentry = 0; } @@ -7876,6 +7973,11 @@ static int __init vmx_init(void) pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n"); static_branch_enable(&enable_evmcs); } + + if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH) + vmx_x86_ops.enable_direct_tlbflush + = hv_enable_direct_tlbflush; + } else { enlightened_vmcs = false; } diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 64d5a4890aa9..bee16687dc0b 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -14,6 +14,8 @@ extern const u32 vmx_msr_index[]; extern u64 host_efer; +extern u32 get_umwait_control_msr(void); + #define MSR_TYPE_R 1 #define MSR_TYPE_W 2 #define MSR_TYPE_RW 3 @@ -211,6 +213,7 @@ struct vcpu_vmx { #endif u64 spec_ctrl; + u32 msr_ia32_umwait_control; u32 secondary_exec_control; @@ -497,6 +500,12 @@ static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx) vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio); } +static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx) +{ + return vmx->secondary_exec_control & + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; +} + void dump_vmcs(void); #endif /* __KVM_X86_VMX_H */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index dfd641243568..0ed07d8d2caa 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -360,7 +360,8 @@ EXPORT_SYMBOL_GPL(kvm_set_apic_base); asmlinkage __visible void kvm_spurious_fault(void) { /* Fault while not rebooting. We want the trace. */ - BUG(); + if (!kvm_rebooting) + BUG(); } EXPORT_SYMBOL_GPL(kvm_spurious_fault); @@ -1145,6 +1146,44 @@ static u32 msrs_to_save[] = { MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B, MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B, MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B, + MSR_IA32_UMWAIT_CONTROL, + + MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1, + MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_ARCH_PERFMON_FIXED_CTR0 + 3, + MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS, + MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL, + MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1, + MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3, + MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5, + MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7, + MSR_ARCH_PERFMON_PERFCTR0 + 8, MSR_ARCH_PERFMON_PERFCTR0 + 9, + MSR_ARCH_PERFMON_PERFCTR0 + 10, MSR_ARCH_PERFMON_PERFCTR0 + 11, + MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13, + MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15, + MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17, + MSR_ARCH_PERFMON_PERFCTR0 + 18, MSR_ARCH_PERFMON_PERFCTR0 + 19, + MSR_ARCH_PERFMON_PERFCTR0 + 20, MSR_ARCH_PERFMON_PERFCTR0 + 21, + MSR_ARCH_PERFMON_PERFCTR0 + 22, MSR_ARCH_PERFMON_PERFCTR0 + 23, + MSR_ARCH_PERFMON_PERFCTR0 + 24, MSR_ARCH_PERFMON_PERFCTR0 + 25, + MSR_ARCH_PERFMON_PERFCTR0 + 26, MSR_ARCH_PERFMON_PERFCTR0 + 27, + MSR_ARCH_PERFMON_PERFCTR0 + 28, MSR_ARCH_PERFMON_PERFCTR0 + 29, + MSR_ARCH_PERFMON_PERFCTR0 + 30, MSR_ARCH_PERFMON_PERFCTR0 + 31, + MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1, + MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3, + MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5, + MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7, + MSR_ARCH_PERFMON_EVENTSEL0 + 8, MSR_ARCH_PERFMON_EVENTSEL0 + 9, + MSR_ARCH_PERFMON_EVENTSEL0 + 10, MSR_ARCH_PERFMON_EVENTSEL0 + 11, + MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13, + MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15, + MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17, + MSR_ARCH_PERFMON_EVENTSEL0 + 18, MSR_ARCH_PERFMON_EVENTSEL0 + 19, + MSR_ARCH_PERFMON_EVENTSEL0 + 20, MSR_ARCH_PERFMON_EVENTSEL0 + 21, + MSR_ARCH_PERFMON_EVENTSEL0 + 22, MSR_ARCH_PERFMON_EVENTSEL0 + 23, + MSR_ARCH_PERFMON_EVENTSEL0 + 24, MSR_ARCH_PERFMON_EVENTSEL0 + 25, + MSR_ARCH_PERFMON_EVENTSEL0 + 26, MSR_ARCH_PERFMON_EVENTSEL0 + 27, + MSR_ARCH_PERFMON_EVENTSEL0 + 28, MSR_ARCH_PERFMON_EVENTSEL0 + 29, + MSR_ARCH_PERFMON_EVENTSEL0 + 30, MSR_ARCH_PERFMON_EVENTSEL0 + 31, }; static unsigned num_msrs_to_save; @@ -3169,7 +3208,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_HYPERV_EVENTFD: case KVM_CAP_HYPERV_TLBFLUSH: case KVM_CAP_HYPERV_SEND_IPI: - case KVM_CAP_HYPERV_ENLIGHTENED_VMCS: case KVM_CAP_HYPERV_CPUID: case KVM_CAP_PCI_SEGMENT: case KVM_CAP_DEBUGREGS: @@ -3246,6 +3284,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = kvm_x86_ops->get_nested_state ? kvm_x86_ops->get_nested_state(NULL, NULL, 0) : 0; break; + case KVM_CAP_HYPERV_DIRECT_TLBFLUSH: + r = kvm_x86_ops->enable_direct_tlbflush != NULL; + break; + case KVM_CAP_HYPERV_ENLIGHTENED_VMCS: + r = kvm_x86_ops->nested_enable_evmcs != NULL; + break; default: break; } @@ -4019,6 +4063,11 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, r = -EFAULT; } return r; + case KVM_CAP_HYPERV_DIRECT_TLBFLUSH: + if (!kvm_x86_ops->enable_direct_tlbflush) + return -ENOTTY; + + return kvm_x86_ops->enable_direct_tlbflush(vcpu); default: return -EINVAL; @@ -5051,6 +5100,11 @@ static void kvm_init_msr_list(void) u32 dummy[2]; unsigned i, j; + BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4, + "Please update the fixed PMCs in msrs_to_save[]"); + BUILD_BUG_ON_MSG(INTEL_PMC_MAX_GENERIC != 32, + "Please update the generic perfctr/eventsel MSRs in msrs_to_save[]"); + for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) { if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) continue; @@ -5389,7 +5443,6 @@ EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system); int handle_ud(struct kvm_vcpu *vcpu) { int emul_type = EMULTYPE_TRAP_UD; - enum emulation_result er; char sig[5]; /* ud2; .ascii "kvm" */ struct x86_exception e; @@ -5398,15 +5451,10 @@ int handle_ud(struct kvm_vcpu *vcpu) sig, sizeof(sig), &e) == 0 && memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) { kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig)); - emul_type = 0; + emul_type = EMULTYPE_TRAP_UD_FORCED; } - er = kvm_emulate_instruction(vcpu, emul_type); - if (er == EMULATE_USER_EXIT) - return 0; - if (er != EMULATE_DONE) - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; + return kvm_emulate_instruction(vcpu, emul_type); } EXPORT_SYMBOL_GPL(handle_ud); @@ -6228,7 +6276,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) vcpu->arch.emulate_regs_need_sync_from_vcpu = false; } -int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) +void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) { struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; int ret; @@ -6240,37 +6288,43 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) ctxt->_eip = ctxt->eip + inc_eip; ret = emulate_int_real(ctxt, irq); - if (ret != X86EMUL_CONTINUE) - return EMULATE_FAIL; - - ctxt->eip = ctxt->_eip; - kvm_rip_write(vcpu, ctxt->eip); - kvm_set_rflags(vcpu, ctxt->eflags); - - return EMULATE_DONE; + if (ret != X86EMUL_CONTINUE) { + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + } else { + ctxt->eip = ctxt->_eip; + kvm_rip_write(vcpu, ctxt->eip); + kvm_set_rflags(vcpu, ctxt->eflags); + } } EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt); static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) { - int r = EMULATE_DONE; - ++vcpu->stat.insn_emulation_fail; trace_kvm_emulate_insn_failed(vcpu); - if (emulation_type & EMULTYPE_NO_UD_ON_FAIL) - return EMULATE_FAIL; + if (emulation_type & EMULTYPE_VMWARE_GP) { + kvm_queue_exception_e(vcpu, GP_VECTOR, 0); + return 1; + } + + if (emulation_type & EMULTYPE_SKIP) { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; + return 0; + } + + kvm_queue_exception(vcpu, UD_VECTOR); if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; vcpu->run->internal.ndata = 0; - r = EMULATE_USER_EXIT; + return 0; } - kvm_queue_exception(vcpu, UD_VECTOR); - - return r; + return 1; } static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, @@ -6425,7 +6479,7 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7, return dr6; } -static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r) +static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu) { struct kvm_run *kvm_run = vcpu->run; @@ -6434,10 +6488,10 @@ static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r) kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; kvm_run->debug.arch.exception = DB_VECTOR; kvm_run->exit_reason = KVM_EXIT_DEBUG; - *r = EMULATE_USER_EXIT; - } else { - kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS); + return 0; } + kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS); + return 1; } int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) @@ -6446,7 +6500,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) int r; r = kvm_x86_ops->skip_emulated_instruction(vcpu); - if (unlikely(r != EMULATE_DONE)) + if (unlikely(!r)) return 0; /* @@ -6458,8 +6512,8 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) * that sets the TF flag". */ if (unlikely(rflags & X86_EFLAGS_TF)) - kvm_vcpu_do_singlestep(vcpu, &r); - return r == EMULATE_DONE; + r = kvm_vcpu_do_singlestep(vcpu); + return r; } EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction); @@ -6478,7 +6532,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) kvm_run->debug.arch.pc = eip; kvm_run->debug.arch.exception = DB_VECTOR; kvm_run->exit_reason = KVM_EXIT_DEBUG; - *r = EMULATE_USER_EXIT; + *r = 0; return true; } } @@ -6494,7 +6548,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) vcpu->arch.dr6 &= ~DR_TRAP_BITS; vcpu->arch.dr6 |= dr6 | DR6_RTM; kvm_queue_exception(vcpu, DB_VECTOR); - *r = EMULATE_DONE; + *r = 1; return true; } } @@ -6578,11 +6632,14 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, trace_kvm_emulate_insn_start(vcpu); ++vcpu->stat.insn_emulation; if (r != EMULATION_OK) { - if (emulation_type & EMULTYPE_TRAP_UD) - return EMULATE_FAIL; + if ((emulation_type & EMULTYPE_TRAP_UD) || + (emulation_type & EMULTYPE_TRAP_UD_FORCED)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, emulation_type)) - return EMULATE_DONE; + return 1; if (ctxt->have_exception) { /* * #UD should result in just EMULATION_FAILED, and trap-like @@ -6591,28 +6648,32 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, WARN_ON_ONCE(ctxt->exception.vector == UD_VECTOR || exception_type(ctxt->exception.vector) == EXCPT_TRAP); inject_emulated_exception(vcpu); - return EMULATE_DONE; + return 1; } - if (emulation_type & EMULTYPE_SKIP) - return EMULATE_FAIL; return handle_emulation_failure(vcpu, emulation_type); } } - if ((emulation_type & EMULTYPE_VMWARE) && - !is_vmware_backdoor_opcode(ctxt)) - return EMULATE_FAIL; + if ((emulation_type & EMULTYPE_VMWARE_GP) && + !is_vmware_backdoor_opcode(ctxt)) { + kvm_queue_exception_e(vcpu, GP_VECTOR, 0); + return 1; + } + /* + * Note, EMULTYPE_SKIP is intended for use *only* by vendor callbacks + * for kvm_skip_emulated_instruction(). The caller is responsible for + * updating interruptibility state and injecting single-step #DBs. + */ if (emulation_type & EMULTYPE_SKIP) { kvm_rip_write(vcpu, ctxt->_eip); if (ctxt->eflags & X86_EFLAGS_RF) kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF); - kvm_x86_ops->set_interrupt_shadow(vcpu, 0); - return EMULATE_DONE; + return 1; } if (retry_instruction(ctxt, cr2, emulation_type)) - return EMULATE_DONE; + return 1; /* this is needed for vmware backdoor interface to work since it changes registers values during IO operation */ @@ -6628,18 +6689,18 @@ restart: r = x86_emulate_insn(ctxt); if (r == EMULATION_INTERCEPTED) - return EMULATE_DONE; + return 1; if (r == EMULATION_FAILED) { if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, emulation_type)) - return EMULATE_DONE; + return 1; return handle_emulation_failure(vcpu, emulation_type); } if (ctxt->have_exception) { - r = EMULATE_DONE; + r = 1; if (inject_emulated_exception(vcpu)) return r; } else if (vcpu->arch.pio.count) { @@ -6650,16 +6711,18 @@ restart: writeback = false; vcpu->arch.complete_userspace_io = complete_emulated_pio; } - r = EMULATE_USER_EXIT; + r = 0; } else if (vcpu->mmio_needed) { + ++vcpu->stat.mmio_exits; + if (!vcpu->mmio_is_write) writeback = false; - r = EMULATE_USER_EXIT; + r = 0; vcpu->arch.complete_userspace_io = complete_emulated_mmio; } else if (r == EMULATION_RESTART) goto restart; else - r = EMULATE_DONE; + r = 1; if (writeback) { unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); @@ -6668,8 +6731,8 @@ restart: if (!ctxt->have_exception || exception_type(ctxt->exception.vector) == EXCPT_TRAP) { kvm_rip_write(vcpu, ctxt->eip); - if (r == EMULATE_DONE && ctxt->tf) - kvm_vcpu_do_singlestep(vcpu, &r); + if (r && ctxt->tf) + r = kvm_vcpu_do_singlestep(vcpu); __kvm_set_rflags(vcpu, ctxt->eflags); } @@ -8263,12 +8326,11 @@ static int vcpu_run(struct kvm_vcpu *vcpu) static inline int complete_emulated_io(struct kvm_vcpu *vcpu) { int r; + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE); srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); - if (r != EMULATE_DONE) - return 0; - return 1; + return r; } static int complete_emulated_pio(struct kvm_vcpu *vcpu) @@ -8636,14 +8698,17 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason, has_error_code, error_code); - - if (ret) - return EMULATE_FAIL; + if (ret) { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; + return 0; + } kvm_rip_write(vcpu, ctxt->eip); kvm_set_rflags(vcpu, ctxt->eflags); kvm_make_request(KVM_REQ_EVENT, vcpu); - return EMULATE_DONE; + return 1; } EXPORT_SYMBOL_GPL(kvm_task_switch); @@ -9361,6 +9426,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); + INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); atomic_set(&kvm->arch.noncoherent_dma_count, 0); @@ -9690,8 +9756,13 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, * Scan sptes if dirty logging has been stopped, dropping those * which can be collapsed into a single large-page spte. Later * page faults will create the large-page sptes. + * + * There is no need to do this in any of the following cases: + * CREATE: No dirty mappings will already exist. + * MOVE/DELETE: The old mappings will already have been cleaned up by + * kvm_arch_flush_shadow_memslot() */ - if ((change != KVM_MR_DELETE) && + if (change == KVM_MR_FLAGS_ONLY && (old->flags & KVM_MEM_LOG_DIRTY_PAGES) && !(new->flags & KVM_MEM_LOG_DIRTY_PAGES)) kvm_mmu_zap_collapsible_sptes(kvm, new); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index b5274e2a53cf..dbf7442a822b 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -261,7 +261,7 @@ static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk) } void kvm_set_pending_timer(struct kvm_vcpu *vcpu); -int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); +void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr); u64 get_kvmclock_ns(struct kvm *kvm); diff --git a/block/blk-flush.c b/block/blk-flush.c index aedd9320e605..1eec9cbe5a0a 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -214,6 +214,16 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) /* release the tag's ownership to the req cloned from */ spin_lock_irqsave(&fq->mq_flush_lock, flags); + + if (!refcount_dec_and_test(&flush_rq->ref)) { + fq->rq_status = error; + spin_unlock_irqrestore(&fq->mq_flush_lock, flags); + return; + } + + if (fq->rq_status != BLK_STS_OK) + error = fq->rq_status; + hctx = flush_rq->mq_hctx; if (!q->elevator) { blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq); diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 3b39deb8b9f8..2a3db80c1dce 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -529,8 +529,8 @@ struct iocg_wake_ctx { static const struct ioc_params autop[] = { [AUTOP_HDD] = { .qos = { - [QOS_RLAT] = 50000, /* 50ms */ - [QOS_WLAT] = 50000, + [QOS_RLAT] = 250000, /* 250ms */ + [QOS_WLAT] = 250000, [QOS_MIN] = VRATE_MIN_PPM, [QOS_MAX] = VRATE_MAX_PPM, }, @@ -1343,7 +1343,7 @@ static void ioc_timer_fn(struct timer_list *timer) u32 ppm_wthr = MILLION - ioc->params.qos[QOS_WPPM]; u32 missed_ppm[2], rq_wait_pct; u64 period_vtime; - int i; + int prev_busy_level, i; /* how were the latencies during the period? */ ioc_lat_stat(ioc, missed_ppm, &rq_wait_pct); @@ -1407,7 +1407,8 @@ static void ioc_timer_fn(struct timer_list *timer) * comparing vdone against period start. If lagging behind * IOs from past periods, don't increase vrate. */ - if (!atomic_read(&iocg_to_blkg(iocg)->use_delay) && + if ((ppm_rthr != MILLION || ppm_wthr != MILLION) && + !atomic_read(&iocg_to_blkg(iocg)->use_delay) && time_after64(vtime, vdone) && time_after64(vtime, now.vnow - MAX_LAGGING_PERIODS * period_vtime) && @@ -1531,26 +1532,29 @@ skip_surplus_transfers: * and experiencing shortages but not surpluses, we're too stingy * and should increase vtime rate. */ + prev_busy_level = ioc->busy_level; if (rq_wait_pct > RQ_WAIT_BUSY_PCT || missed_ppm[READ] > ppm_rthr || missed_ppm[WRITE] > ppm_wthr) { ioc->busy_level = max(ioc->busy_level, 0); ioc->busy_level++; - } else if (nr_lagging) { - ioc->busy_level = max(ioc->busy_level, 0); - } else if (nr_shortages && !nr_surpluses && - rq_wait_pct <= RQ_WAIT_BUSY_PCT * UNBUSY_THR_PCT / 100 && + } else if (rq_wait_pct <= RQ_WAIT_BUSY_PCT * UNBUSY_THR_PCT / 100 && missed_ppm[READ] <= ppm_rthr * UNBUSY_THR_PCT / 100 && missed_ppm[WRITE] <= ppm_wthr * UNBUSY_THR_PCT / 100) { - ioc->busy_level = min(ioc->busy_level, 0); - ioc->busy_level--; + /* take action iff there is contention */ + if (nr_shortages && !nr_lagging) { + ioc->busy_level = min(ioc->busy_level, 0); + /* redistribute surpluses first */ + if (!nr_surpluses) + ioc->busy_level--; + } } else { ioc->busy_level = 0; } ioc->busy_level = clamp(ioc->busy_level, -1000, 1000); - if (ioc->busy_level) { + if (ioc->busy_level > 0 || (ioc->busy_level < 0 && !nr_lagging)) { u64 vrate = atomic64_read(&ioc->vtime_rate); u64 vrate_min = ioc->vrate_min, vrate_max = ioc->vrate_max; @@ -1592,6 +1596,10 @@ skip_surplus_transfers: atomic64_set(&ioc->vtime_rate, vrate); ioc->inuse_margin_vtime = DIV64_U64_ROUND_UP( ioc->period_us * vrate * INUSE_MARGIN_PCT, 100); + } else if (ioc->busy_level != prev_busy_level || nr_lagging) { + trace_iocost_ioc_vrate_adj(ioc, atomic64_read(&ioc->vtime_rate), + &missed_ppm, rq_wait_pct, nr_lagging, + nr_shortages, nr_surpluses); } ioc_refresh_params(ioc, false); diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index c9d183d6c499..ca22afd47b3d 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -555,8 +555,6 @@ void blk_mq_sched_free_requests(struct request_queue *q) struct blk_mq_hw_ctx *hctx; int i; - lockdep_assert_held(&q->sysfs_lock); - queue_for_each_hw_ctx(q, hctx, i) { if (hctx->sched_tags) blk_mq_free_rqs(q->tag_set, hctx->sched_tags, i); diff --git a/block/blk-mq.c b/block/blk-mq.c index 29275f5a996f..6e3b15f70cd7 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -918,7 +918,10 @@ static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, */ if (blk_mq_req_expired(rq, next)) blk_mq_rq_timed_out(rq, reserved); - if (refcount_dec_and_test(&rq->ref)) + + if (is_flush_rq(rq, hctx)) + rq->end_io(rq, 0); + else if (refcount_dec_and_test(&rq->ref)) __blk_mq_free_request(rq); return true; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index b82736c781c5..46f5198be017 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -482,7 +482,6 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page, blk_mq_quiesce_queue(q); wbt_set_min_lat(q, val); - wbt_update_limits(q); blk_mq_unquiesce_queue(q); blk_mq_unfreeze_queue(q); @@ -989,13 +988,11 @@ int blk_register_queue(struct gendisk *disk) blk_mq_debugfs_register(q); } - /* - * The flag of QUEUE_FLAG_REGISTERED isn't set yet, so elevator - * switch won't happen at all. - */ + mutex_lock(&q->sysfs_lock); if (q->elevator) { ret = elv_register_queue(q, false); if (ret) { + mutex_unlock(&q->sysfs_lock); mutex_unlock(&q->sysfs_dir_lock); kobject_del(&q->kobj); blk_trace_remove_sysfs(dev); @@ -1005,7 +1002,6 @@ int blk_register_queue(struct gendisk *disk) has_elevator = true; } - mutex_lock(&q->sysfs_lock); blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q); wbt_enable_default(q); blk_throtl_register_queue(q); @@ -1062,12 +1058,10 @@ void blk_unregister_queue(struct gendisk *disk) kobject_del(&q->kobj); blk_trace_remove_sysfs(disk_to_dev(disk)); - /* - * q->kobj has been removed, so it is safe to check if elevator - * exists without holding q->sysfs_lock. - */ + mutex_lock(&q->sysfs_lock); if (q->elevator) elv_unregister_queue(q); + mutex_unlock(&q->sysfs_lock); mutex_unlock(&q->sysfs_dir_lock); kobject_put(&disk_to_dev(disk)->kobj); diff --git a/block/blk.h b/block/blk.h index ed347f7a97b1..47fba9362e60 100644 --- a/block/blk.h +++ b/block/blk.h @@ -19,6 +19,7 @@ struct blk_flush_queue { unsigned int flush_queue_delayed:1; unsigned int flush_pending_idx:1; unsigned int flush_running_idx:1; + blk_status_t rq_status; unsigned long flush_pending_since; struct list_head flush_queue[2]; struct list_head flush_data_in_flight; @@ -47,6 +48,12 @@ static inline void __blk_get_queue(struct request_queue *q) kobject_get(&q->kobj); } +static inline bool +is_flush_rq(struct request *req, struct blk_mq_hw_ctx *hctx) +{ + return hctx->fq->flush_rq == req; +} + struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, int node, int cmd_size, gfp_t flags); void blk_free_flush_queue(struct blk_flush_queue *q); @@ -194,6 +201,8 @@ void elv_unregister_queue(struct request_queue *q); static inline void elevator_exit(struct request_queue *q, struct elevator_queue *e) { + lockdep_assert_held(&q->sysfs_lock); + blk_mq_sched_free_requests(q); __elevator_exit(q, e); } diff --git a/block/elevator.c b/block/elevator.c index bba10e83478a..5437059c9261 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -503,9 +503,7 @@ int elv_register_queue(struct request_queue *q, bool uevent) if (uevent) kobject_uevent(&e->kobj, KOBJ_ADD); - mutex_lock(&q->sysfs_lock); e->registered = 1; - mutex_unlock(&q->sysfs_lock); } return error; } @@ -523,11 +521,9 @@ void elv_unregister_queue(struct request_queue *q) kobject_uevent(&e->kobj, KOBJ_REMOVE); kobject_del(&e->kobj); - mutex_lock(&q->sysfs_lock); e->registered = 0; /* Re-enable throttling in case elevator disabled it */ wbt_enable_default(q); - mutex_unlock(&q->sysfs_lock); } } @@ -590,32 +586,11 @@ int elevator_switch_mq(struct request_queue *q, lockdep_assert_held(&q->sysfs_lock); if (q->elevator) { - if (q->elevator->registered) { - mutex_unlock(&q->sysfs_lock); - - /* - * Concurrent elevator switch can't happen becasue - * sysfs write is always exclusively on same file. - * - * Also the elevator queue won't be freed after - * sysfs_lock is released becasue kobject_del() in - * blk_unregister_queue() waits for completion of - * .store & .show on its attributes. - */ + if (q->elevator->registered) elv_unregister_queue(q); - mutex_lock(&q->sysfs_lock); - } ioc_clear_queue(q); elevator_exit(q, q->elevator); - - /* - * sysfs_lock may be dropped, so re-check if queue is - * unregistered. If yes, don't switch to new elevator - * any more - */ - if (!blk_queue_registered(q)) - return 0; } ret = blk_mq_init_sched(q, new_e); @@ -623,11 +598,7 @@ int elevator_switch_mq(struct request_queue *q, goto out; if (new_e) { - mutex_unlock(&q->sysfs_lock); - ret = elv_register_queue(q, true); - - mutex_lock(&q->sysfs_lock); if (ret) { elevator_exit(q, q->elevator); goto out; diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index 869d47f89599..6c0687694341 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -694,7 +694,7 @@ static void mvebu_pwm_get_state(struct pwm_chip *chip, } static int mvebu_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct mvebu_pwm *mvpwm = to_mvebu_pwm(chip); struct mvebu_gpio_chip *mvchip = mvpwm->mvchip; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index 61bd10310604..5803fcbae22f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c @@ -948,6 +948,7 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block case AMD_IP_BLOCK_TYPE_UVD: case AMD_IP_BLOCK_TYPE_VCN: case AMD_IP_BLOCK_TYPE_VCE: + case AMD_IP_BLOCK_TYPE_SDMA: if (swsmu) ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate); else @@ -956,7 +957,6 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block break; case AMD_IP_BLOCK_TYPE_GMC: case AMD_IP_BLOCK_TYPE_ACP: - case AMD_IP_BLOCK_TYPE_SDMA: ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( (adev)->powerplay.pp_handle, block_type, gate)); break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index bdf849da32e4..264677ab248a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1012,11 +1012,16 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x731B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, {0x1002, 0x731F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, /* Navi14 */ - {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, + {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT}, /* Renoir */ {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU|AMD_EXP_HW_SUPPORT}, + /* Navi12 */ + {0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT}, + {0, 0, 0} }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 7850084a05e3..60655834d649 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -143,7 +143,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, /* ring tests don't use a job */ if (job) { vm = job->vm; - fence_ctx = job->base.s_fence->scheduled.context; + fence_ctx = job->base.s_fence ? + job->base.s_fence->scheduled.context : 0; } else { vm = NULL; fence_ctx = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 0e2ec608530b..f6147528be64 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -677,6 +677,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) sh_num = 0xffffffff; + if (info->read_mmr_reg.count > 128) + return -EINVAL; + regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL); if (!regs) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index db28823891ac..638c821611ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -70,6 +70,11 @@ MODULE_FIRMWARE("amdgpu/navi10_mec.bin"); MODULE_FIRMWARE("amdgpu/navi10_mec2.bin"); MODULE_FIRMWARE("amdgpu/navi10_rlc.bin"); +MODULE_FIRMWARE("amdgpu/navi14_ce_wks.bin"); +MODULE_FIRMWARE("amdgpu/navi14_pfp_wks.bin"); +MODULE_FIRMWARE("amdgpu/navi14_me_wks.bin"); +MODULE_FIRMWARE("amdgpu/navi14_mec_wks.bin"); +MODULE_FIRMWARE("amdgpu/navi14_mec2_wks.bin"); MODULE_FIRMWARE("amdgpu/navi14_ce.bin"); MODULE_FIRMWARE("amdgpu/navi14_pfp.bin"); MODULE_FIRMWARE("amdgpu/navi14_me.bin"); @@ -594,7 +599,8 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; - char fw_name[30]; + char fw_name[40]; + char wks[10]; int err; struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; @@ -607,12 +613,16 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) DRM_DEBUG("\n"); + memset(wks, 0, sizeof(wks)); switch (adev->asic_type) { case CHIP_NAVI10: chip_name = "navi10"; break; case CHIP_NAVI14: chip_name = "navi14"; + if (!(adev->pdev->device == 0x7340 && + adev->pdev->revision != 0x00)) + snprintf(wks, sizeof(wks), "_wks"); break; case CHIP_NAVI12: chip_name = "navi12"; @@ -621,7 +631,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) BUG(); } - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp%s.bin", chip_name, wks); err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); if (err) goto out; @@ -632,7 +642,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", chip_name, wks); err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); if (err) goto out; @@ -643,7 +653,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", chip_name, wks); err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); if (err) goto out; @@ -708,7 +718,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) if (adev->gfx.rlc.is_rlc_v2_1) gfx_v10_0_init_rlc_ext_microcode(adev); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", chip_name, wks); err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); if (err) goto out; @@ -719,7 +729,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2%s.bin", chip_name, wks); err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); if (!err) { err = amdgpu_ucode_validate(adev->gfx.mec2_fw); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 83d45f98a461..dcadc73bffd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1650,7 +1650,6 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_RAVEN: - case CHIP_RENOIR: gfx_v9_0_init_lbpw(adev); break; case CHIP_VEGA20: @@ -3026,7 +3025,6 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_RAVEN: - case CHIP_RENOIR: if (amdgpu_lbpw == 0) gfx_v9_0_enable_lbpw(adev, false); else diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index ff18b3a57892..78452cf0115d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1889,8 +1889,9 @@ static int sdma_v4_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->set_powergating_by_smu) + if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_powergating_by_smu) || + adev->asic_type == CHIP_RENOIR) amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false); if (!amdgpu_sriov_vf(adev)) @@ -1917,8 +1918,9 @@ static int sdma_v4_0_hw_fini(void *handle) sdma_v4_0_ctx_switch_enable(adev, false); sdma_v4_0_enable(adev, false); - if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs - && adev->powerplay.pp_funcs->set_powergating_by_smu) + if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs + && adev->powerplay.pp_funcs->set_powergating_by_smu) || + adev->asic_type == CHIP_RENOIR) amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c index 4a5951036927..c44723c267c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c @@ -493,7 +493,15 @@ static void smu_v11_0_i2c_fini(struct i2c_adapter *control) } /* Restore clock gating */ - smu_v11_0_i2c_set_clock_gating(control, true); + + /* + * TODO Reenabling clock gating seems to break subsequent SMU operation + * on the I2C bus. My guess is that SMU doesn't disable clock gating like + * we do here before working with the bus. So for now just don't restore + * it but later work with SMU to see if they have this issue and can + * update their code appropriately + */ + /* smu_v11_0_i2c_set_clock_gating(control, true); */ } diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index a8cf82d46109..901fe3590165 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -694,10 +694,10 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0x003f8000, 0x8f6f896f, 0x88776f77, 0x8a6eff6e, 0x023f8000, 0xb9eef807, - 0xb970f812, 0xb971f813, - 0x8ff08870, 0xf4051bb8, + 0xb97af812, 0xb97bf813, + 0x8ffa887a, 0xf4051bbd, 0xfa000000, 0xbf8cc07f, - 0xf4051c38, 0xfa000008, + 0xf4051ebd, 0xfa000008, 0xbf8cc07f, 0x87ee6e6e, 0xbf840001, 0xbe80206e, 0xb971f803, 0x8771ff71, diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm index 35986219ce5f..cdaa523ce6be 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm @@ -187,12 +187,12 @@ L_FETCH_2ND_TRAP: // Read second-level TBA/TMA from first-level TMA and jump if available. // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data) // ttmp12 holds SQ_WAVE_STATUS - s_getreg_b32 ttmp4, hwreg(HW_REG_SHADER_TMA_LO) - s_getreg_b32 ttmp5, hwreg(HW_REG_SHADER_TMA_HI) - s_lshl_b64 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 - s_load_dwordx2 [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1 // second-level TBA + s_getreg_b32 ttmp14, hwreg(HW_REG_SHADER_TMA_LO) + s_getreg_b32 ttmp15, hwreg(HW_REG_SHADER_TMA_HI) + s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 + s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA s_waitcnt lgkmcnt(0) - s_load_dwordx2 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1 // second-level TMA + s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA s_waitcnt lgkmcnt(0) s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3] s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e1b09bb432bd..8cab6da512a0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2113,6 +2113,7 @@ static int amdgpu_dm_backlight_get_brightness(struct backlight_device *bd) } static const struct backlight_ops amdgpu_dm_backlight_ops = { + .options = BL_CORE_SUSPENDRESUME, .get_brightness = amdgpu_dm_backlight_get_brightness, .update_status = amdgpu_dm_backlight_update_status, }; @@ -2384,6 +2385,8 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) if (adev->asic_type != CHIP_CARRIZO && adev->asic_type != CHIP_STONEY) dm->dc->debug.disable_stutter = amdgpu_pp_feature_mask & PP_STUTTER_MODE ? false : true; + if (adev->asic_type == CHIP_RENOIR) + dm->dc->debug.disable_stutter = true; return 0; fail: @@ -5770,8 +5773,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, * change FB pitch, DCC state, rotation or mirroing. */ bundle->flip_addrs[planes_count].flip_immediate = - (crtc->state->pageflip_flags & - DRM_MODE_PAGE_FLIP_ASYNC) != 0 && + crtc->state->async_flip && acrtc_state->update_type == UPDATE_TYPE_FAST; timestamp_ns = ktime_get_ns(); @@ -6348,7 +6350,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) amdgpu_dm_enable_crtc_interrupts(dev, state, true); for_each_new_crtc_in_state(state, crtc, new_crtc_state, j) - if (new_crtc_state->pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC) + if (new_crtc_state->async_flip) wait_for_vblank = false; /* update planes when needed per crtc*/ diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index 383f4f8db8f4..9b2cb57bf2ba 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -708,6 +708,10 @@ static void hack_bounding_box(struct dcn_bw_internal_vars *v, unsigned int get_highest_allowed_voltage_level(uint32_t hw_internal_rev) { + /* for dali, the highest voltage level we want is 0 */ + if (ASICREV_IS_DALI(hw_internal_rev)) + return 0; + /* we are ok with all levels */ return 4; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c index 5cc3acccda2a..b1e657e137a9 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c @@ -98,11 +98,14 @@ uint32_t dce110_get_min_vblank_time_us(const struct dc_state *context) struct dc_stream_state *stream = context->streams[j]; uint32_t vertical_blank_in_pixels = 0; uint32_t vertical_blank_time = 0; + uint32_t vertical_total_min = stream->timing.v_total; + struct dc_crtc_timing_adjust adjust = stream->adjust; + if (adjust.v_total_max != adjust.v_total_min) + vertical_total_min = adjust.v_total_min; vertical_blank_in_pixels = stream->timing.h_total * - (stream->timing.v_total + (vertical_total_min - stream->timing.v_addressable); - vertical_blank_time = vertical_blank_in_pixels * 10000 / stream->timing.pix_clk_100hz; @@ -171,6 +174,10 @@ void dce11_pplib_apply_display_requirements( struct dc_state *context) { struct dm_pp_display_configuration *pp_display_cfg = &context->pp_display_cfg; + int memory_type_multiplier = MEMORY_TYPE_MULTIPLIER_CZ; + + if (dc->bw_vbios && dc->bw_vbios->memory_type == bw_def_hbm) + memory_type_multiplier = MEMORY_TYPE_HBM; pp_display_cfg->all_displays_in_sync = context->bw_ctx.bw.dce.all_displays_in_sync; @@ -183,8 +190,20 @@ void dce11_pplib_apply_display_requirements( pp_display_cfg->cpu_pstate_separation_time = context->bw_ctx.bw.dce.blackout_recovery_time_us; - pp_display_cfg->min_memory_clock_khz = context->bw_ctx.bw.dce.yclk_khz - / MEMORY_TYPE_MULTIPLIER_CZ; + /* + * TODO: determine whether the bandwidth has reached memory's limitation + * , then change minimum memory clock based on real-time bandwidth + * limitation. + */ + if (ASICREV_IS_VEGA20_P(dc->ctx->asic_id.hw_internal_rev) && (context->stream_count >= 2)) { + pp_display_cfg->min_memory_clock_khz = max(pp_display_cfg->min_memory_clock_khz, + (uint32_t) div64_s64( + div64_s64(dc->bw_vbios->high_yclk.value, + memory_type_multiplier), 10000)); + } else { + pp_display_cfg->min_memory_clock_khz = context->bw_ctx.bw.dce.yclk_khz + / memory_type_multiplier; + } pp_display_cfg->min_engine_clock_khz = determine_sclk_from_bounding_box( dc, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c index 1488ffddf4e3..31b698bf9cfc 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c @@ -148,7 +148,7 @@ static void dce_mi_program_pte_vm( pte->min_pte_before_flip_horiz_scan; REG_UPDATE(GRPH_PIPE_OUTSTANDING_REQUEST_LIMIT, - GRPH_PIPE_OUTSTANDING_REQUEST_LIMIT, 0xff); + GRPH_PIPE_OUTSTANDING_REQUEST_LIMIT, 0x7f); REG_UPDATE_3(DVMM_PTE_CONTROL, DVMM_PAGE_WIDTH, page_width, @@ -157,7 +157,7 @@ static void dce_mi_program_pte_vm( REG_UPDATE_2(DVMM_PTE_ARB_CONTROL, DVMM_PTE_REQ_PER_CHUNK, pte->pte_req_per_chunk, - DVMM_MAX_PTE_REQ_OUTSTANDING, 0xff); + DVMM_MAX_PTE_REQ_OUTSTANDING, 0x7f); } static void program_urgency_watermark( diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c index afc61055eca1..1787b9bf800a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c @@ -1091,6 +1091,7 @@ struct resource_pool *dce100_create_resource_pool( if (construct(num_virtual_links, dc, pool)) return &pool->base; + kfree(pool); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index c66fe170e1e8..318e9c2e2ca8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -1462,6 +1462,7 @@ struct resource_pool *dce110_create_resource_pool( if (construct(num_virtual_links, dc, pool, asic_id)) return &pool->base; + kfree(pool); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c index 3ac4c7e73050..83e1878161c9 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c @@ -987,6 +987,10 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc) struct dm_pp_clock_levels_with_latency mem_clks = {0}; struct dm_pp_wm_sets_with_clock_ranges clk_ranges = {0}; struct dm_pp_clock_levels clks = {0}; + int memory_type_multiplier = MEMORY_TYPE_MULTIPLIER_CZ; + + if (dc->bw_vbios && dc->bw_vbios->memory_type == bw_def_hbm) + memory_type_multiplier = MEMORY_TYPE_HBM; /*do system clock TODO PPLIB: after PPLIB implement, * then remove old way @@ -1026,12 +1030,12 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc) &clks); dc->bw_vbios->low_yclk = bw_frc_to_fixed( - clks.clocks_in_khz[0] * MEMORY_TYPE_MULTIPLIER_CZ, 1000); + clks.clocks_in_khz[0] * memory_type_multiplier, 1000); dc->bw_vbios->mid_yclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels>>1] * MEMORY_TYPE_MULTIPLIER_CZ, + clks.clocks_in_khz[clks.num_levels>>1] * memory_type_multiplier, 1000); dc->bw_vbios->high_yclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels-1] * MEMORY_TYPE_MULTIPLIER_CZ, + clks.clocks_in_khz[clks.num_levels-1] * memory_type_multiplier, 1000); return; @@ -1067,12 +1071,12 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc) * YCLK = UMACLK*m_memoryTypeMultiplier */ dc->bw_vbios->low_yclk = bw_frc_to_fixed( - mem_clks.data[0].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ, 1000); + mem_clks.data[0].clocks_in_khz * memory_type_multiplier, 1000); dc->bw_vbios->mid_yclk = bw_frc_to_fixed( - mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ, + mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * memory_type_multiplier, 1000); dc->bw_vbios->high_yclk = bw_frc_to_fixed( - mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ, + mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * memory_type_multiplier, 1000); /* Now notify PPLib/SMU about which Watermarks sets they should select @@ -1338,6 +1342,7 @@ struct resource_pool *dce112_create_resource_pool( if (construct(num_virtual_links, dc, pool)) return &pool->base; + kfree(pool); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index 7d08154e9662..8b85e5274bba 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -847,6 +847,8 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc) int i; unsigned int clk; unsigned int latency; + /*original logic in dal3*/ + int memory_type_multiplier = MEMORY_TYPE_MULTIPLIER_CZ; /*do system clock*/ if (!dm_pp_get_clock_levels_by_type_with_latency( @@ -905,13 +907,16 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc) * ALSO always convert UMA clock (from PPLIB) to YCLK (HW formula): * YCLK = UMACLK*m_memoryTypeMultiplier */ + if (dc->bw_vbios->memory_type == bw_def_hbm) + memory_type_multiplier = MEMORY_TYPE_HBM; + dc->bw_vbios->low_yclk = bw_frc_to_fixed( - mem_clks.data[0].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ, 1000); + mem_clks.data[0].clocks_in_khz * memory_type_multiplier, 1000); dc->bw_vbios->mid_yclk = bw_frc_to_fixed( - mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ, + mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * memory_type_multiplier, 1000); dc->bw_vbios->high_yclk = bw_frc_to_fixed( - mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ, + mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * memory_type_multiplier, 1000); /* Now notify PPLib/SMU about which Watermarks sets they should select @@ -1203,6 +1208,7 @@ struct resource_pool *dce120_create_resource_pool( if (construct(num_virtual_links, dc, pool)) return &pool->base; + kfree(pool); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 5a89e462e7cc..59305e411a66 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -1570,6 +1570,7 @@ struct resource_pool *dcn10_create_resource_pool( if (construct(init_data->num_virtual_links, dc, pool)) return &pool->base; + kfree(pool); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 3ca5139f1273..de182185fe1f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -23,6 +23,8 @@ * */ +#include + #include "dm_services.h" #include "dc.h" diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_factory_dcn21.c b/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_factory_dcn21.c index 34485d9de78a..8572678f8d4f 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_factory_dcn21.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_factory_dcn21.c @@ -35,12 +35,10 @@ #include "hw_factory_dcn21.h" - #include "dcn/dcn_2_1_0_offset.h" #include "dcn/dcn_2_1_0_sh_mask.h" #include "renoir_ip_offset.h" - #include "reg_helper.h" #include "../hpd_regs.h" /* begin ********************* @@ -136,6 +134,39 @@ static const struct ddc_sh_mask ddc_mask[] = { DDC_MASK_SH_LIST_DCN2(_MASK, 6) }; +#include "../generic_regs.h" + +/* set field name */ +#define SF_GENERIC(reg_name, field_name, post_fix)\ + .field_name = reg_name ## __ ## field_name ## post_fix + +#define generic_regs(id) \ +{\ + GENERIC_REG_LIST(id)\ +} + +static const struct generic_registers generic_regs[] = { + generic_regs(A), +}; + +static const struct generic_sh_mask generic_shift[] = { + GENERIC_MASK_SH_LIST(__SHIFT, A), +}; + +static const struct generic_sh_mask generic_mask[] = { + GENERIC_MASK_SH_LIST(_MASK, A), +}; + +static void define_generic_registers(struct hw_gpio_pin *pin, uint32_t en) +{ + struct hw_generic *generic = HW_GENERIC_FROM_BASE(pin); + + generic->regs = &generic_regs[en]; + generic->shifts = &generic_shift[en]; + generic->masks = &generic_mask[en]; + generic->base.regs = &generic_regs[en].gpio; +} + static void define_ddc_registers( struct hw_gpio_pin *pin, uint32_t en) @@ -181,7 +212,8 @@ static const struct hw_factory_funcs funcs = { .get_hpd_pin = dal_hw_hpd_get_pin, .get_generic_pin = dal_hw_generic_get_pin, .define_hpd_registers = define_hpd_registers, - .define_ddc_registers = define_ddc_registers + .define_ddc_registers = define_ddc_registers, + .define_generic_registers = define_generic_registers }; /* * dal_hw_factory_dcn10_init diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.c b/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.c index ad7c43746291..fbb58fb8c318 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.c @@ -58,7 +58,6 @@ #define SF_HPD(reg_name, field_name, post_fix)\ .field_name = reg_name ## __ ## field_name ## post_fix - /* macros to expend register list macro defined in HW object header file * end *********************/ @@ -71,7 +70,7 @@ static bool offset_to_id( { switch (offset) { /* GENERIC */ - case REG(DC_GENERICA): + case REG(DC_GPIO_GENERIC_A): *id = GPIO_ID_GENERIC; switch (mask) { case DC_GPIO_GENERIC_A__DC_GPIO_GENERICA_A_MASK: diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index 1cc1c8ce633b..bef224bf803e 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -31,6 +31,8 @@ #include "dm_pp_smu.h" #define MEMORY_TYPE_MULTIPLIER_CZ 4 +#define MEMORY_TYPE_HBM 2 + enum dce_version resource_parse_asic_id( struct hw_asic_id asic_id); diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h index 1f16892f0add..1be6c44fd32f 100644 --- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h +++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h @@ -137,10 +137,13 @@ #define RAVEN1_F0 0xF0 #define RAVEN_UNKNOWN 0xFF +#define PICASSO_15D8_REV_E3 0xE3 +#define PICASSO_15D8_REV_E4 0xE4 + #define ASICREV_IS_RAVEN(eChipRev) ((eChipRev >= RAVEN_A0) && eChipRev < RAVEN_UNKNOWN) #define ASICREV_IS_PICASSO(eChipRev) ((eChipRev >= PICASSO_A0) && (eChipRev < RAVEN2_A0)) -#define ASICREV_IS_RAVEN2(eChipRev) ((eChipRev >= RAVEN2_A0) && (eChipRev < 0xF0)) - +#define ASICREV_IS_RAVEN2(eChipRev) ((eChipRev >= RAVEN2_A0) && (eChipRev < PICASSO_15D8_REV_E3)) +#define ASICREV_IS_DALI(eChipRev) ((eChipRev >= PICASSO_15D8_REV_E3) && (eChipRev < RAVEN1_F0)) #define ASICREV_IS_RV1_F0(eChipRev) ((eChipRev >= RAVEN1_F0) && (eChipRev < RAVEN_UNKNOWN)) diff --git a/drivers/gpu/drm/amd/include/renoir_ip_offset.h b/drivers/gpu/drm/amd/include/renoir_ip_offset.h index 554714c8e000..094648cac392 100644 --- a/drivers/gpu/drm/amd/include/renoir_ip_offset.h +++ b/drivers/gpu/drm/amd/include/renoir_ip_offset.h @@ -155,7 +155,7 @@ static const struct IP_BASE MP0_BASE ={ { { { 0x00016000, 0x0243FC00, 0x00DC0000 { { 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0 } } } }; -static const struct IP_BASE MP1_BASE ={ { { { 0x00016200, 0x02400400, 0x00E80000, 0x00EC0000, 0x00F00000 } }, +static const struct IP_BASE MP1_BASE ={ { { { 0x00016000, 0x02400400, 0x00E80000, 0x00EC0000, 0x00F00000 } }, { { 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0 } }, diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index fa636cb462c1..fa8ad7db2b3a 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -1531,6 +1531,7 @@ static int pp_asic_reset_mode_2(void *handle) static int pp_smu_i2c_bus_access(void *handle, bool acquire) { struct pp_hwmgr *hwmgr = handle; + int ret = 0; if (!hwmgr || !hwmgr->pm_en) return -EINVAL; @@ -1540,7 +1541,11 @@ static int pp_smu_i2c_bus_access(void *handle, bool acquire) return -EINVAL; } - return hwmgr->hwmgr_func->smu_i2c_bus_access(hwmgr, acquire); + mutex_lock(&hwmgr->smu_lock); + ret = hwmgr->hwmgr_func->smu_i2c_bus_access(hwmgr, acquire); + mutex_unlock(&hwmgr->smu_lock); + + return ret; } static const struct amd_pm_funcs pp_dpm_funcs = { diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 22f3c60d380f..33960fb38a5d 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -354,6 +354,9 @@ int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type, case AMD_IP_BLOCK_TYPE_GFX: ret = smu_gfx_off_control(smu, gate); break; + case AMD_IP_BLOCK_TYPE_SDMA: + ret = smu_powergate_sdma(smu, gate); + break; default: break; } diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c index 2a6da546fb55..e62bfba51562 100644 --- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c @@ -177,12 +177,82 @@ static int renoir_get_dpm_uclk_limited(struct smu_context *smu, uint32_t *clock, } +static int renoir_print_clk_levels(struct smu_context *smu, + enum smu_clk_type clk_type, char *buf) +{ + int i, size = 0, ret = 0; + uint32_t cur_value = 0, value = 0, count = 0, min = 0, max = 0; + DpmClocks_t *clk_table = smu->smu_table.clocks_table; + SmuMetrics_t metrics = {0}; + + if (!clk_table || clk_type >= SMU_CLK_COUNT) + return -EINVAL; + + ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0, + (void *)&metrics, false); + if (ret) + return ret; + + switch (clk_type) { + case SMU_GFXCLK: + case SMU_SCLK: + /* retirve table returned paramters unit is MHz */ + cur_value = metrics.ClockFrequency[CLOCK_GFXCLK]; + ret = smu_get_dpm_freq_range(smu, SMU_GFXCLK, &min, &max); + if (!ret) { + /* driver only know min/max gfx_clk, Add level 1 for all other gfx clks */ + if (cur_value == max) + i = 2; + else if (cur_value == min) + i = 0; + else + i = 1; + + size += sprintf(buf + size, "0: %uMhz %s\n", min, + i == 0 ? "*" : ""); + size += sprintf(buf + size, "1: %uMhz %s\n", + i == 1 ? cur_value : RENOIR_UMD_PSTATE_GFXCLK, + i == 1 ? "*" : ""); + size += sprintf(buf + size, "2: %uMhz %s\n", max, + i == 2 ? "*" : ""); + } + return size; + case SMU_SOCCLK: + count = NUM_SOCCLK_DPM_LEVELS; + cur_value = metrics.ClockFrequency[CLOCK_SOCCLK]; + break; + case SMU_MCLK: + count = NUM_MEMCLK_DPM_LEVELS; + cur_value = metrics.ClockFrequency[CLOCK_UMCCLK]; + break; + case SMU_DCEFCLK: + count = NUM_DCFCLK_DPM_LEVELS; + cur_value = metrics.ClockFrequency[CLOCK_DCFCLK]; + break; + case SMU_FCLK: + count = NUM_FCLK_DPM_LEVELS; + cur_value = metrics.ClockFrequency[CLOCK_FCLK]; + break; + default: + return -EINVAL; + } + + for (i = 0; i < count; i++) { + GET_DPM_CUR_FREQ(clk_table, clk_type, i, value); + size += sprintf(buf + size, "%d: %uMhz %s\n", i, value, + cur_value == value ? "*" : ""); + } + + return size; +} + static const struct pptable_funcs renoir_ppt_funcs = { .get_smu_msg_index = renoir_get_smu_msg_index, .get_smu_table_index = renoir_get_smu_table_index, .tables_init = renoir_tables_init, .set_power_state = NULL, .get_dpm_uclk_limited = renoir_get_dpm_uclk_limited, + .print_clk_levels = renoir_print_clk_levels, }; void renoir_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.h b/drivers/gpu/drm/amd/powerplay/renoir_ppt.h index e9b7237c0f7f..2a390ddd37dd 100644 --- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.h +++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.h @@ -25,4 +25,29 @@ extern void renoir_set_ppt_funcs(struct smu_context *smu); +/* UMD PState Renoir Msg Parameters in MHz */ +#define RENOIR_UMD_PSTATE_GFXCLK 700 +#define RENOIR_UMD_PSTATE_SOCCLK 678 +#define RENOIR_UMD_PSTATE_FCLK 800 + +#define GET_DPM_CUR_FREQ(table, clk_type, dpm_level, freq) \ + do { \ + switch (clk_type) { \ + case SMU_SOCCLK: \ + freq = table->SocClocks[dpm_level].Freq; \ + break; \ + case SMU_MCLK: \ + freq = table->MemClocks[dpm_level].Freq; \ + break; \ + case SMU_DCEFCLK: \ + freq = table->DcfClocks[dpm_level].Freq; \ + break; \ + case SMU_FCLK: \ + freq = table->FClocks[dpm_level].Freq; \ + break; \ + default: \ + break; \ + } \ + } while (0) + #endif diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index 3d61c4fb4dec..9e13e466e72c 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -874,6 +874,9 @@ static int adv7511_bridge_attach(struct drm_bridge *bridge) &adv7511_connector_helper_funcs); drm_connector_attach_encoder(&adv->connector, bridge->encoder); + if (adv->type == ADV7533) + ret = adv7533_attach_dsi(adv); + if (adv->i2c_main->irq) regmap_write(adv->regmap, ADV7511_REG_INT_ENABLE(0), ADV7511_INT0_HPD); @@ -1219,17 +1222,8 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) drm_bridge_add(&adv7511->bridge); adv7511_audio_init(dev, adv7511); - - if (adv7511->type == ADV7533) { - ret = adv7533_attach_dsi(adv7511); - if (ret) - goto err_remove_bridge; - } - return 0; -err_remove_bridge: - drm_bridge_remove(&adv7511->bridge); err_unregister_cec: i2c_unregister_device(adv7511->i2c_cec); if (adv7511->cec_clk) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index aa16ea17ff9b..3ef2ac52ce94 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -26,6 +26,7 @@ */ #include +#include #include #include @@ -1580,9 +1581,23 @@ static void commit_tail(struct drm_atomic_state *old_state) { struct drm_device *dev = old_state->dev; const struct drm_mode_config_helper_funcs *funcs; + ktime_t start; + s64 commit_time_ms; funcs = dev->mode_config.helper_private; + /* + * We're measuring the _entire_ commit, so the time will vary depending + * on how many fences and objects are involved. For the purposes of self + * refresh, this is desirable since it'll give us an idea of how + * congested things are. This will inform our decision on how often we + * should enter self refresh after idle. + * + * These times will be averaged out in the self refresh helpers to avoid + * overreacting over one outlier frame + */ + start = ktime_get(); + drm_atomic_helper_wait_for_fences(dev, old_state, false); drm_atomic_helper_wait_for_dependencies(old_state); @@ -1592,6 +1607,11 @@ static void commit_tail(struct drm_atomic_state *old_state) else drm_atomic_helper_commit_tail(old_state); + commit_time_ms = ktime_ms_delta(ktime_get(), start); + if (commit_time_ms > 0) + drm_self_refresh_helper_update_avg_times(old_state, + (unsigned long)commit_time_ms); + drm_atomic_helper_commit_cleanup_done(old_state); drm_atomic_state_put(old_state); @@ -3275,7 +3295,7 @@ static int page_flip_common(struct drm_atomic_state *state, return PTR_ERR(crtc_state); crtc_state->event = event; - crtc_state->pageflip_flags = flags; + crtc_state->async_flip = flags & DRM_MODE_PAGE_FLIP_ASYNC; plane_state = drm_atomic_get_plane_state(state, plane); if (IS_ERR(plane_state)) diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c index 46dc264a248b..d0a937fb0c56 100644 --- a/drivers/gpu/drm/drm_atomic_state_helper.c +++ b/drivers/gpu/drm/drm_atomic_state_helper.c @@ -128,7 +128,7 @@ void __drm_atomic_helper_crtc_duplicate_state(struct drm_crtc *crtc, state->zpos_changed = false; state->commit = NULL; state->event = NULL; - state->pageflip_flags = 0; + state->async_flip = false; /* Self refresh should be canceled when a new update is available */ state->active = drm_atomic_crtc_effectively_active(state); diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 5a5b42db6f2a..7a26bfb5329c 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -1305,8 +1305,7 @@ int drm_mode_atomic_ioctl(struct drm_device *dev, if (arg->reserved) return -EINVAL; - if ((arg->flags & DRM_MODE_PAGE_FLIP_ASYNC) && - !dev->mode_config.async_page_flip) + if (arg->flags & DRM_MODE_PAGE_FLIP_ASYNC) return -EINVAL; /* can't test and expect an event at the same time. */ diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index c456c3d3def2..769feefeeeef 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -976,14 +976,14 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags) if (ret) goto err_minors; + dev->registered = true; + if (dev->driver->load) { ret = dev->driver->load(dev, flags); if (ret) goto err_minors; } - dev->registered = true; - if (drm_core_check_feature(dev, DRIVER_MODESET)) drm_modeset_register_all(dev); diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index f675a3bb2c88..fcd728d7cf72 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -336,7 +336,12 @@ drm_setclientcap(struct drm_device *dev, void *data, struct drm_file *file_priv) case DRM_CLIENT_CAP_ATOMIC: if (!drm_core_check_feature(dev, DRIVER_ATOMIC)) return -EOPNOTSUPP; - if (req->value > 1) + /* The modesetting DDX has a totally broken idea of atomic. */ + if (current->comm[0] == 'X' && req->value == 1) { + pr_info("broken atomic modeset userspace detected, disabling atomic\n"); + return -EOPNOTSUPP; + } + if (req->value > 2) return -EINVAL; file_priv->atomic = req->value; file_priv->universal_planes = req->value; diff --git a/drivers/gpu/drm/drm_mode_object.c b/drivers/gpu/drm/drm_mode_object.c index c355ba8e6d5d..6a23e36ed4fe 100644 --- a/drivers/gpu/drm/drm_mode_object.c +++ b/drivers/gpu/drm/drm_mode_object.c @@ -42,7 +42,7 @@ int __drm_mode_object_add(struct drm_device *dev, struct drm_mode_object *obj, { int ret; - WARN_ON(dev->registered && !obj_free_cb); + WARN_ON(!dev->driver->load && dev->registered && !obj_free_cb); mutex_lock(&dev->mode_config.idr_mutex); ret = idr_alloc(&dev->mode_config.object_idr, register_obj ? obj : NULL, @@ -104,7 +104,7 @@ void drm_mode_object_register(struct drm_device *dev, void drm_mode_object_unregister(struct drm_device *dev, struct drm_mode_object *object) { - WARN_ON(dev->registered && !object->free_cb); + WARN_ON(!dev->driver->load && dev->registered && !object->free_cb); mutex_lock(&dev->mode_config.idr_mutex); if (object->id) { diff --git a/drivers/gpu/drm/drm_self_refresh_helper.c b/drivers/gpu/drm/drm_self_refresh_helper.c index 4b9424a8f1f1..68f4765a5896 100644 --- a/drivers/gpu/drm/drm_self_refresh_helper.c +++ b/drivers/gpu/drm/drm_self_refresh_helper.c @@ -5,6 +5,7 @@ * Authors: * Sean Paul */ +#include #include #include #include @@ -50,11 +51,17 @@ * atomic_check when &drm_crtc_state.self_refresh_active is true. */ +#define SELF_REFRESH_AVG_SEED_MS 200 + +DECLARE_EWMA(psr_time, 4, 4) + struct drm_self_refresh_data { struct drm_crtc *crtc; struct delayed_work entry_work; - struct drm_atomic_state *save_state; - unsigned int entry_delay_ms; + + struct mutex avg_mutex; + struct ewma_psr_time entry_avg_ms; + struct ewma_psr_time exit_avg_ms; }; static void drm_self_refresh_helper_entry_work(struct work_struct *work) @@ -122,6 +129,44 @@ out_drop_locks: drm_modeset_acquire_fini(&ctx); } +/** + * drm_self_refresh_helper_update_avg_times - Updates a crtc's SR time averages + * @state: the state which has just been applied to hardware + * @commit_time_ms: the amount of time in ms that this commit took to complete + * + * Called after &drm_mode_config_funcs.atomic_commit_tail, this function will + * update the average entry/exit self refresh times on self refresh transitions. + * These averages will be used when calculating how long to delay before + * entering self refresh mode after activity. + */ +void drm_self_refresh_helper_update_avg_times(struct drm_atomic_state *state, + unsigned int commit_time_ms) +{ + struct drm_crtc *crtc; + struct drm_crtc_state *old_crtc_state, *new_crtc_state; + int i; + + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, + new_crtc_state, i) { + struct drm_self_refresh_data *sr_data = crtc->self_refresh_data; + struct ewma_psr_time *time; + + if (old_crtc_state->self_refresh_active == + new_crtc_state->self_refresh_active) + continue; + + if (new_crtc_state->self_refresh_active) + time = &sr_data->entry_avg_ms; + else + time = &sr_data->exit_avg_ms; + + mutex_lock(&sr_data->avg_mutex); + ewma_psr_time_add(time, commit_time_ms); + mutex_unlock(&sr_data->avg_mutex); + } +} +EXPORT_SYMBOL(drm_self_refresh_helper_update_avg_times); + /** * drm_self_refresh_helper_alter_state - Alters the atomic state for SR exit * @state: the state currently being checked @@ -153,6 +198,7 @@ void drm_self_refresh_helper_alter_state(struct drm_atomic_state *state) for_each_new_crtc_in_state(state, crtc, crtc_state, i) { struct drm_self_refresh_data *sr_data; + unsigned int delay; /* Don't trigger the entry timer when we're already in SR */ if (crtc_state->self_refresh_active) @@ -162,8 +208,13 @@ void drm_self_refresh_helper_alter_state(struct drm_atomic_state *state) if (!sr_data) continue; + mutex_lock(&sr_data->avg_mutex); + delay = (ewma_psr_time_read(&sr_data->entry_avg_ms) + + ewma_psr_time_read(&sr_data->exit_avg_ms)) * 2; + mutex_unlock(&sr_data->avg_mutex); + mod_delayed_work(system_wq, &sr_data->entry_work, - msecs_to_jiffies(sr_data->entry_delay_ms)); + msecs_to_jiffies(delay)); } } EXPORT_SYMBOL(drm_self_refresh_helper_alter_state); @@ -171,12 +222,10 @@ EXPORT_SYMBOL(drm_self_refresh_helper_alter_state); /** * drm_self_refresh_helper_init - Initializes self refresh helpers for a crtc * @crtc: the crtc which supports self refresh supported displays - * @entry_delay_ms: amount of inactivity to wait before entering self refresh * * Returns zero if successful or -errno on failure */ -int drm_self_refresh_helper_init(struct drm_crtc *crtc, - unsigned int entry_delay_ms) +int drm_self_refresh_helper_init(struct drm_crtc *crtc) { struct drm_self_refresh_data *sr_data = crtc->self_refresh_data; @@ -190,8 +239,18 @@ int drm_self_refresh_helper_init(struct drm_crtc *crtc, INIT_DELAYED_WORK(&sr_data->entry_work, drm_self_refresh_helper_entry_work); - sr_data->entry_delay_ms = entry_delay_ms; sr_data->crtc = crtc; + mutex_init(&sr_data->avg_mutex); + ewma_psr_time_init(&sr_data->entry_avg_ms); + ewma_psr_time_init(&sr_data->exit_avg_ms); + + /* + * Seed the averages so they're non-zero (and sufficiently large + * for even poorly performing panels). As time goes on, this will be + * averaged out and the values will trend to their true value. + */ + ewma_psr_time_add(&sr_data->entry_avg_ms, SELF_REFRESH_AVG_SEED_MS); + ewma_psr_time_add(&sr_data->exit_avg_ms, SELF_REFRESH_AVG_SEED_MS); crtc->self_refresh_data = sr_data; return 0; diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index 2db029371c91..5193b6257061 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -267,7 +267,7 @@ nv50_wndw_atomic_check_acquire(struct nv50_wndw *wndw, bool modeset, asyw->image.pitch[0] = fb->base.pitches[0]; } - if (!(asyh->state.pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC)) + if (!asyh->state.async_flip) asyw->image.interval = 1; else asyw->image.interval = 0; @@ -383,7 +383,7 @@ nv50_wndw_atomic_check_lut(struct nv50_wndw *wndw, } /* Can't do an immediate flip while changing the LUT. */ - asyh->state.pageflip_flags &= ~DRM_MODE_PAGE_FLIP_ASYNC; + asyh->state.async_flip = false; } static int diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c index a1f5fa6a742a..12ff77dacc95 100644 --- a/drivers/gpu/drm/panfrost/panfrost_devfreq.c +++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c @@ -39,7 +39,7 @@ static int panfrost_devfreq_target(struct device *dev, unsigned long *freq, * If frequency scaling from low to high, adjust voltage first. * If frequency scaling from high to low, adjust frequency first. */ - if (old_clk_rate < target_rate && pfdev->regulator) { + if (old_clk_rate < target_rate) { err = regulator_set_voltage(pfdev->regulator, target_volt, target_volt); if (err) { @@ -53,14 +53,12 @@ static int panfrost_devfreq_target(struct device *dev, unsigned long *freq, if (err) { dev_err(dev, "Cannot set frequency %lu (%d)\n", target_rate, err); - if (pfdev->regulator) - regulator_set_voltage(pfdev->regulator, - pfdev->devfreq.cur_volt, - pfdev->devfreq.cur_volt); + regulator_set_voltage(pfdev->regulator, pfdev->devfreq.cur_volt, + pfdev->devfreq.cur_volt); return err; } - if (old_clk_rate > target_rate && pfdev->regulator) { + if (old_clk_rate > target_rate) { err = regulator_set_voltage(pfdev->regulator, target_volt, target_volt); if (err) diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c index 46b0b02e4289..238fb6d54df4 100644 --- a/drivers/gpu/drm/panfrost/panfrost_device.c +++ b/drivers/gpu/drm/panfrost/panfrost_device.c @@ -89,12 +89,9 @@ static int panfrost_regulator_init(struct panfrost_device *pfdev) { int ret; - pfdev->regulator = devm_regulator_get_optional(pfdev->dev, "mali"); + pfdev->regulator = devm_regulator_get(pfdev->dev, "mali"); if (IS_ERR(pfdev->regulator)) { ret = PTR_ERR(pfdev->regulator); - pfdev->regulator = NULL; - if (ret == -ENODEV) - return 0; dev_err(pfdev->dev, "failed to get regulator: %d\n", ret); return ret; } @@ -110,8 +107,7 @@ static int panfrost_regulator_init(struct panfrost_device *pfdev) static void panfrost_regulator_fini(struct panfrost_device *pfdev) { - if (pfdev->regulator) - regulator_disable(pfdev->regulator); + regulator_disable(pfdev->regulator); } int panfrost_device_init(struct panfrost_device *pfdev) diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index 6010f9ee7c1f..bdd990568476 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -394,28 +394,40 @@ void panfrost_mmu_pgtable_free(struct panfrost_file_priv *priv) free_io_pgtable_ops(mmu->pgtbl_ops); } -static struct drm_mm_node *addr_to_drm_mm_node(struct panfrost_device *pfdev, int as, u64 addr) +static struct panfrost_gem_object * +addr_to_drm_mm_node(struct panfrost_device *pfdev, int as, u64 addr) { - struct drm_mm_node *node = NULL; + struct panfrost_gem_object *bo = NULL; + struct panfrost_file_priv *priv; + struct drm_mm_node *node; u64 offset = addr >> PAGE_SHIFT; struct panfrost_mmu *mmu; spin_lock(&pfdev->as_lock); list_for_each_entry(mmu, &pfdev->as_lru_list, list) { - struct panfrost_file_priv *priv; - if (as != mmu->as) - continue; + if (as == mmu->as) + break; + } + if (as != mmu->as) + goto out; - priv = container_of(mmu, struct panfrost_file_priv, mmu); - drm_mm_for_each_node(node, &priv->mm) { - if (offset >= node->start && offset < (node->start + node->size)) - goto out; + priv = container_of(mmu, struct panfrost_file_priv, mmu); + + spin_lock(&priv->mm_lock); + + drm_mm_for_each_node(node, &priv->mm) { + if (offset >= node->start && + offset < (node->start + node->size)) { + bo = drm_mm_node_to_panfrost_bo(node); + drm_gem_object_get(&bo->base.base); + break; } } + spin_unlock(&priv->mm_lock); out: spin_unlock(&pfdev->as_lock); - return node; + return bo; } #define NUM_FAULT_PAGES (SZ_2M / PAGE_SIZE) @@ -423,29 +435,28 @@ out: int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr) { int ret, i; - struct drm_mm_node *node; struct panfrost_gem_object *bo; struct address_space *mapping; pgoff_t page_offset; struct sg_table *sgt; struct page **pages; - node = addr_to_drm_mm_node(pfdev, as, addr); - if (!node) + bo = addr_to_drm_mm_node(pfdev, as, addr); + if (!bo) return -ENOENT; - bo = drm_mm_node_to_panfrost_bo(node); if (!bo->is_heap) { dev_WARN(pfdev->dev, "matching BO is not heap type (GPU VA = %llx)", - node->start << PAGE_SHIFT); - return -EINVAL; + bo->node.start << PAGE_SHIFT); + ret = -EINVAL; + goto err_bo; } WARN_ON(bo->mmu->as != as); /* Assume 2MB alignment and size multiple */ addr &= ~((u64)SZ_2M - 1); page_offset = addr >> PAGE_SHIFT; - page_offset -= node->start; + page_offset -= bo->node.start; mutex_lock(&bo->base.pages_lock); @@ -454,7 +465,8 @@ int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr) sizeof(struct sg_table), GFP_KERNEL | __GFP_ZERO); if (!bo->sgts) { mutex_unlock(&bo->base.pages_lock); - return -ENOMEM; + ret = -ENOMEM; + goto err_bo; } pages = kvmalloc_array(bo->base.base.size >> PAGE_SHIFT, @@ -463,7 +475,8 @@ int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr) kfree(bo->sgts); bo->sgts = NULL; mutex_unlock(&bo->base.pages_lock); - return -ENOMEM; + ret = -ENOMEM; + goto err_bo; } bo->base.pages = pages; bo->base.pages_use_count = 1; @@ -501,12 +514,16 @@ int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr) dev_dbg(pfdev->dev, "mapped page fault @ AS%d %llx", as, addr); + drm_gem_object_put_unlocked(&bo->base.base); + return 0; err_map: sg_free_table(sgt); err_pages: drm_gem_shmem_put_pages(&bo->base); +err_bo: + drm_gem_object_put_unlocked(&bo->base.base); return ret; } diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 431e6b64b77d..d0bc91ed7c90 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -324,8 +324,39 @@ bool radeon_device_is_virtual(void); static int radeon_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { + unsigned long flags = 0; int ret; + if (!ent) + return -ENODEV; /* Avoid NULL-ptr deref in drm_get_pci_dev */ + + flags = ent->driver_data; + + if (!radeon_si_support) { + switch (flags & RADEON_FAMILY_MASK) { + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: + case CHIP_HAINAN: + dev_info(&pdev->dev, + "SI support disabled by module param\n"); + return -ENODEV; + } + } + if (!radeon_cik_support) { + switch (flags & RADEON_FAMILY_MASK) { + case CHIP_KAVERI: + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KABINI: + case CHIP_MULLINS: + dev_info(&pdev->dev, + "CIK support disabled by module param\n"); + return -ENODEV; + } + } + if (vga_switcheroo_client_probe_defer(pdev)) return -EPROBE_DEFER; diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 07f7ace42c4b..e85c554eeaa9 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -100,31 +100,6 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags) struct radeon_device *rdev; int r, acpi_status; - if (!radeon_si_support) { - switch (flags & RADEON_FAMILY_MASK) { - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_VERDE: - case CHIP_OLAND: - case CHIP_HAINAN: - dev_info(dev->dev, - "SI support disabled by module param\n"); - return -ENODEV; - } - } - if (!radeon_cik_support) { - switch (flags & RADEON_FAMILY_MASK) { - case CHIP_KAVERI: - case CHIP_BONAIRE: - case CHIP_HAWAII: - case CHIP_KABINI: - case CHIP_MULLINS: - dev_info(dev->dev, - "CIK support disabled by module param\n"); - return -ENODEV; - } - } - rdev = kzalloc(sizeof(struct radeon_device), GFP_KERNEL); if (rdev == NULL) { return -ENOMEM; diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index 2f821c58007c..613404f86668 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -39,8 +39,6 @@ #include "rockchip_drm_vop.h" #include "rockchip_rgb.h" -#define VOP_SELF_REFRESH_ENTRY_DELAY_MS 100 - #define VOP_WIN_SET(vop, win, name, v) \ vop_reg_set(vop, &win->phy->name, win->base, ~0, v, #name) #define VOP_SCL_SET(vop, win, name, v) \ @@ -1563,8 +1561,7 @@ static int vop_create_crtc(struct vop *vop) init_completion(&vop->line_flag_completion); crtc->port = port; - ret = drm_self_refresh_helper_init(crtc, - VOP_SELF_REFRESH_ENTRY_DELAY_MS); + ret = drm_self_refresh_helper_init(crtc); if (ret) DRM_DEV_DEBUG_KMS(vop->dev, "Failed to init %s with SR helpers %d, ignoring\n", diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c index 2859cc99b73e..156c2a18a239 100644 --- a/drivers/ntb/hw/amd/ntb_hw_amd.c +++ b/drivers/ntb/hw/amd/ntb_hw_amd.c @@ -78,7 +78,7 @@ static int ndev_mw_to_bar(struct amd_ntb_dev *ndev, int idx) if (idx < 0 || idx > ndev->mw_count) return -EINVAL; - return 1 << idx; + return ndev->dev_data->mw_idx << idx; } static int amd_ntb_mw_count(struct ntb_dev *ntb, int pidx) @@ -909,7 +909,7 @@ static int amd_init_ntb(struct amd_ntb_dev *ndev) { void __iomem *mmio = ndev->self_mmio; - ndev->mw_count = AMD_MW_CNT; + ndev->mw_count = ndev->dev_data->mw_count; ndev->spad_count = AMD_SPADS_CNT; ndev->db_count = AMD_DB_CNT; @@ -1069,6 +1069,8 @@ static int amd_ntb_pci_probe(struct pci_dev *pdev, goto err_ndev; } + ndev->dev_data = (struct ntb_dev_data *)id->driver_data; + ndev_init_struct(ndev, pdev); rc = amd_ntb_init_pci(ndev, pdev); @@ -1123,9 +1125,21 @@ static const struct file_operations amd_ntb_debugfs_info = { .read = ndev_debugfs_read, }; +static const struct ntb_dev_data dev_data[] = { + { /* for device 145b */ + .mw_count = 3, + .mw_idx = 1, + }, + { /* for device 148b */ + .mw_count = 2, + .mw_idx = 2, + }, +}; + static const struct pci_device_id amd_ntb_pci_tbl[] = { - {PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_NTB)}, - {0} + { PCI_VDEVICE(AMD, 0x145b), (kernel_ulong_t)&dev_data[0] }, + { PCI_VDEVICE(AMD, 0x148b), (kernel_ulong_t)&dev_data[1] }, + { 0, } }; MODULE_DEVICE_TABLE(pci, amd_ntb_pci_tbl); diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.h b/drivers/ntb/hw/amd/ntb_hw_amd.h index 8f3617a46292..139a307147bc 100644 --- a/drivers/ntb/hw/amd/ntb_hw_amd.h +++ b/drivers/ntb/hw/amd/ntb_hw_amd.h @@ -52,7 +52,6 @@ #include #include -#define PCI_DEVICE_ID_AMD_NTB 0x145B #define AMD_LINK_HB_TIMEOUT msecs_to_jiffies(1000) #define AMD_LINK_STATUS_OFFSET 0x68 #define NTB_LIN_STA_ACTIVE_BIT 0x00000002 @@ -93,7 +92,6 @@ static inline void _write64(u64 val, void __iomem *mmio) enum { /* AMD NTB Capability */ - AMD_MW_CNT = 3, AMD_DB_CNT = 16, AMD_MSIX_VECTOR_CNT = 24, AMD_SPADS_CNT = 16, @@ -170,6 +168,11 @@ enum { AMD_PEER_OFFSET = 0x400, }; +struct ntb_dev_data { + const unsigned char mw_count; + const unsigned int mw_idx; +}; + struct amd_ntb_dev; struct amd_ntb_vec { @@ -185,6 +188,7 @@ struct amd_ntb_dev { u32 cntl_sta; u32 peer_sta; + struct ntb_dev_data *dev_data; unsigned char mw_count; unsigned char spad_count; unsigned char db_count; diff --git a/drivers/ntb/hw/idt/Kconfig b/drivers/ntb/hw/idt/Kconfig index bfc7cac94102..c79b54c1747d 100644 --- a/drivers/ntb/hw/idt/Kconfig +++ b/drivers/ntb/hw/idt/Kconfig @@ -4,11 +4,11 @@ config NTB_IDT depends on PCI select HWMON help - This driver supports NTB of cappable IDT PCIe-switches. + This driver supports NTB of capable IDT PCIe-switches. Some of the pre-initializations must be made before IDT PCIe-switch - exposes it NT-functions correctly. It should be done by either proper - initialisation of EEPROM connected to master smbus of the switch or + exposes its NT-functions correctly. It should be done by either proper + initialization of EEPROM connected to master SMbus of the switch or by BIOS using slave-SMBus interface changing corresponding registers value. Evidently it must be done before PCI bus enumeration is finished in Linux kernel. diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index f4959458d909..86ffa716eaf2 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -306,7 +306,7 @@ static int switchtec_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx, if (rc) return rc; - if (addr == 0 || size == 0) { + if (size == 0) { if (widx < nr_direct_mw) switchtec_ntb_mw_clr_direct(sndev, widx); else diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 40c90ca10729..00a5d5764993 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -292,7 +292,7 @@ static int ntb_transport_bus_match(struct device *dev, static int ntb_transport_bus_probe(struct device *dev) { const struct ntb_transport_client *client; - int rc = -EINVAL; + int rc; get_device(dev); diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index d028331558ea..e9b7c2dfc730 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -1378,7 +1378,7 @@ static int perf_setup_peer_mw(struct perf_peer *peer) int ret; /* Get outbound MW parameters and map it */ - ret = ntb_peer_mw_get_addr(perf->ntb, peer->gidx, &phys_addr, + ret = ntb_peer_mw_get_addr(perf->ntb, perf->gidx, &phys_addr, &peer->outbuf_size); if (ret) return ret; diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig index b0e632ba8590..e3a2518503ed 100644 --- a/drivers/pwm/Kconfig +++ b/drivers/pwm/Kconfig @@ -44,7 +44,7 @@ config PWM_AB8500 config PWM_ATMEL tristate "Atmel PWM support" - depends on ARCH_AT91 + depends on ARCH_AT91 && OF help Generic PWM framework driver for Atmel SoC. @@ -423,6 +423,17 @@ config PWM_SPEAR To compile this driver as a module, choose M here: the module will be called pwm-spear. +config PWM_SPRD + tristate "Spreadtrum PWM support" + depends on ARCH_SPRD || COMPILE_TEST + depends on HAS_IOMEM + help + Generic PWM framework driver for the PWM controller on + Spreadtrum SoCs. + + To compile this driver as a module, choose M here: the module + will be called pwm-sprd. + config PWM_STI tristate "STiH4xx PWM support" depends on ARCH_STI diff --git a/drivers/pwm/Makefile b/drivers/pwm/Makefile index 76b555b51887..26326adf71d7 100644 --- a/drivers/pwm/Makefile +++ b/drivers/pwm/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_PWM_ROCKCHIP) += pwm-rockchip.o obj-$(CONFIG_PWM_SAMSUNG) += pwm-samsung.o obj-$(CONFIG_PWM_SIFIVE) += pwm-sifive.o obj-$(CONFIG_PWM_SPEAR) += pwm-spear.o +obj-$(CONFIG_PWM_SPRD) += pwm-sprd.o obj-$(CONFIG_PWM_STI) += pwm-sti.o obj-$(CONFIG_PWM_STM32) += pwm-stm32.o obj-$(CONFIG_PWM_STM32_LP) += pwm-stm32-lp.o diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 8edfac17364e..6ad51aa60c03 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -448,36 +448,44 @@ EXPORT_SYMBOL_GPL(pwm_free); /** * pwm_apply_state() - atomically apply a new state to a PWM device * @pwm: PWM device - * @state: new state to apply. This can be adjusted by the PWM driver - * if the requested config is not achievable, for example, - * ->duty_cycle and ->period might be approximated. + * @state: new state to apply */ -int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state) +int pwm_apply_state(struct pwm_device *pwm, const struct pwm_state *state) { + struct pwm_chip *chip; int err; if (!pwm || !state || !state->period || state->duty_cycle > state->period) return -EINVAL; + chip = pwm->chip; + if (state->period == pwm->state.period && state->duty_cycle == pwm->state.duty_cycle && state->polarity == pwm->state.polarity && state->enabled == pwm->state.enabled) return 0; - if (pwm->chip->ops->apply) { - err = pwm->chip->ops->apply(pwm->chip, pwm, state); + if (chip->ops->apply) { + err = chip->ops->apply(chip, pwm, state); if (err) return err; - pwm->state = *state; + /* + * .apply might have to round some values in *state, if possible + * read the actually implemented value back. + */ + if (chip->ops->get_state) + chip->ops->get_state(chip, pwm, &pwm->state); + else + pwm->state = *state; } else { /* * FIXME: restore the initial state in case of error. */ if (state->polarity != pwm->state.polarity) { - if (!pwm->chip->ops->set_polarity) + if (!chip->ops->set_polarity) return -ENOTSUPP; /* @@ -486,12 +494,12 @@ int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state) * ->apply(). */ if (pwm->state.enabled) { - pwm->chip->ops->disable(pwm->chip, pwm); + chip->ops->disable(chip, pwm); pwm->state.enabled = false; } - err = pwm->chip->ops->set_polarity(pwm->chip, pwm, - state->polarity); + err = chip->ops->set_polarity(chip, pwm, + state->polarity); if (err) return err; @@ -500,9 +508,9 @@ int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state) if (state->period != pwm->state.period || state->duty_cycle != pwm->state.duty_cycle) { - err = pwm->chip->ops->config(pwm->chip, pwm, - state->duty_cycle, - state->period); + err = chip->ops->config(pwm->chip, pwm, + state->duty_cycle, + state->period); if (err) return err; @@ -512,11 +520,11 @@ int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state) if (state->enabled != pwm->state.enabled) { if (state->enabled) { - err = pwm->chip->ops->enable(pwm->chip, pwm); + err = chip->ops->enable(chip, pwm); if (err) return err; } else { - pwm->chip->ops->disable(pwm->chip, pwm); + chip->ops->disable(chip, pwm); } pwm->state.enabled = state->enabled; diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c index d13a83f430ac..dcbc0489dfd4 100644 --- a/drivers/pwm/pwm-atmel-hlcdc.c +++ b/drivers/pwm/pwm-atmel-hlcdc.c @@ -39,7 +39,7 @@ static inline struct atmel_hlcdc_pwm *to_atmel_hlcdc_pwm(struct pwm_chip *chip) } static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct atmel_hlcdc_pwm *chip = to_atmel_hlcdc_pwm(c); struct atmel_hlcdc *hlcdc = chip->hlcdc; diff --git a/drivers/pwm/pwm-atmel.c b/drivers/pwm/pwm-atmel.c index e5e1eaf372fa..9ba733467e26 100644 --- a/drivers/pwm/pwm-atmel.c +++ b/drivers/pwm/pwm-atmel.c @@ -209,7 +209,7 @@ static void atmel_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm, } static int atmel_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct atmel_pwm_chip *atmel_pwm = to_atmel_pwm_chip(chip); struct pwm_state cstate; @@ -318,19 +318,6 @@ static const struct atmel_pwm_data mchp_sam9x60_pwm_data = { }, }; -static const struct platform_device_id atmel_pwm_devtypes[] = { - { - .name = "at91sam9rl-pwm", - .driver_data = (kernel_ulong_t)&atmel_sam9rl_pwm_data, - }, { - .name = "sama5d3-pwm", - .driver_data = (kernel_ulong_t)&atmel_sama5_pwm_data, - }, { - /* sentinel */ - }, -}; -MODULE_DEVICE_TABLE(platform, atmel_pwm_devtypes); - static const struct of_device_id atmel_pwm_dt_ids[] = { { .compatible = "atmel,at91sam9rl-pwm", @@ -350,34 +337,20 @@ static const struct of_device_id atmel_pwm_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, atmel_pwm_dt_ids); -static inline const struct atmel_pwm_data * -atmel_pwm_get_driver_data(struct platform_device *pdev) -{ - const struct platform_device_id *id; - - if (pdev->dev.of_node) - return of_device_get_match_data(&pdev->dev); - - id = platform_get_device_id(pdev); - - return (struct atmel_pwm_data *)id->driver_data; -} - static int atmel_pwm_probe(struct platform_device *pdev) { - const struct atmel_pwm_data *data; struct atmel_pwm_chip *atmel_pwm; struct resource *res; int ret; - data = atmel_pwm_get_driver_data(pdev); - if (!data) - return -ENODEV; - atmel_pwm = devm_kzalloc(&pdev->dev, sizeof(*atmel_pwm), GFP_KERNEL); if (!atmel_pwm) return -ENOMEM; + mutex_init(&atmel_pwm->isr_lock); + atmel_pwm->data = of_device_get_match_data(&pdev->dev); + atmel_pwm->updated_pwms = 0; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); atmel_pwm->base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(atmel_pwm->base)) @@ -395,17 +368,10 @@ static int atmel_pwm_probe(struct platform_device *pdev) atmel_pwm->chip.dev = &pdev->dev; atmel_pwm->chip.ops = &atmel_pwm_ops; - - if (pdev->dev.of_node) { - atmel_pwm->chip.of_xlate = of_pwm_xlate_with_flags; - atmel_pwm->chip.of_pwm_n_cells = 3; - } - + atmel_pwm->chip.of_xlate = of_pwm_xlate_with_flags; + atmel_pwm->chip.of_pwm_n_cells = 3; atmel_pwm->chip.base = -1; atmel_pwm->chip.npwm = 4; - atmel_pwm->data = data; - atmel_pwm->updated_pwms = 0; - mutex_init(&atmel_pwm->isr_lock); ret = pwmchip_add(&atmel_pwm->chip); if (ret < 0) { @@ -437,7 +403,6 @@ static struct platform_driver atmel_pwm_driver = { .name = "atmel-pwm", .of_match_table = of_match_ptr(atmel_pwm_dt_ids), }, - .id_table = atmel_pwm_devtypes, .probe = atmel_pwm_probe, .remove = atmel_pwm_remove, }; diff --git a/drivers/pwm/pwm-bcm-iproc.c b/drivers/pwm/pwm-bcm-iproc.c index d961a8207b1c..56c38cfae92c 100644 --- a/drivers/pwm/pwm-bcm-iproc.c +++ b/drivers/pwm/pwm-bcm-iproc.c @@ -115,7 +115,7 @@ static void iproc_pwmc_get_state(struct pwm_chip *chip, struct pwm_device *pwm, } static int iproc_pwmc_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { unsigned long prescale = IPROC_PWM_PRESCALE_MIN; struct iproc_pwmc *ip = to_iproc_pwmc(chip); diff --git a/drivers/pwm/pwm-bcm2835.c b/drivers/pwm/pwm-bcm2835.c index f6fe0b922e1e..91e24f01b54e 100644 --- a/drivers/pwm/pwm-bcm2835.c +++ b/drivers/pwm/pwm-bcm2835.c @@ -21,7 +21,7 @@ #define PERIOD(x) (((x) * 0x10) + 0x10) #define DUTY(x) (((x) * 0x10) + 0x14) -#define MIN_PERIOD 108 /* 9.2 MHz max. PWM clock */ +#define PERIOD_MIN 0x2 struct bcm2835_pwm { struct pwm_chip chip; @@ -64,6 +64,7 @@ static int bcm2835_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, struct bcm2835_pwm *pc = to_bcm2835_pwm(chip); unsigned long rate = clk_get_rate(pc->clk); unsigned long scaler; + u32 period; if (!rate) { dev_err(pc->dev, "failed to get clock rate\n"); @@ -71,17 +72,14 @@ static int bcm2835_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, } scaler = DIV_ROUND_CLOSEST(NSEC_PER_SEC, rate); + period = DIV_ROUND_CLOSEST(period_ns, scaler); - if (period_ns <= MIN_PERIOD) { - dev_err(pc->dev, "period %d not supported, minimum %d\n", - period_ns, MIN_PERIOD); + if (period < PERIOD_MIN) return -EINVAL; - } writel(DIV_ROUND_CLOSEST(duty_ns, scaler), pc->base + DUTY(pwm->hwpwm)); - writel(DIV_ROUND_CLOSEST(period_ns, scaler), - pc->base + PERIOD(pwm->hwpwm)); + writel(period, pc->base + PERIOD(pwm->hwpwm)); return 0; } @@ -155,8 +153,11 @@ static int bcm2835_pwm_probe(struct platform_device *pdev) pc->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(pc->clk)) { - dev_err(&pdev->dev, "clock not found: %ld\n", PTR_ERR(pc->clk)); - return PTR_ERR(pc->clk); + ret = PTR_ERR(pc->clk); + if (ret != -EPROBE_DEFER) + dev_err(&pdev->dev, "clock not found: %d\n", ret); + + return ret; } ret = clk_prepare_enable(pc->clk); diff --git a/drivers/pwm/pwm-cros-ec.c b/drivers/pwm/pwm-cros-ec.c index 85bea2d40b7d..89497448d217 100644 --- a/drivers/pwm/pwm-cros-ec.c +++ b/drivers/pwm/pwm-cros-ec.c @@ -93,7 +93,7 @@ static int cros_ec_pwm_get_duty(struct cros_ec_device *ec, u8 index) } static int cros_ec_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct cros_ec_pwm_device *ec_pwm = pwm_to_cros_ec_pwm(chip); int duty_cycle; diff --git a/drivers/pwm/pwm-fsl-ftm.c b/drivers/pwm/pwm-fsl-ftm.c index 9d31a217111d..59272a920479 100644 --- a/drivers/pwm/pwm-fsl-ftm.c +++ b/drivers/pwm/pwm-fsl-ftm.c @@ -227,7 +227,7 @@ static bool fsl_pwm_is_other_pwm_enabled(struct fsl_pwm_chip *fpc, static int fsl_pwm_apply_config(struct fsl_pwm_chip *fpc, struct pwm_device *pwm, - struct pwm_state *newstate) + const struct pwm_state *newstate) { unsigned int duty; u32 reg_polarity; @@ -292,17 +292,13 @@ static int fsl_pwm_apply_config(struct fsl_pwm_chip *fpc, regmap_update_bits(fpc->regmap, FTM_POL, BIT(pwm->hwpwm), reg_polarity); - newstate->period = fsl_pwm_ticks_to_ns(fpc, - fpc->period.mod_period + 1); - newstate->duty_cycle = fsl_pwm_ticks_to_ns(fpc, duty); - ftm_set_write_protection(fpc); return 0; } static int fsl_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *newstate) + const struct pwm_state *newstate) { struct fsl_pwm_chip *fpc = to_fsl_chip(chip); struct pwm_state *oldstate = &pwm->state; diff --git a/drivers/pwm/pwm-hibvt.c b/drivers/pwm/pwm-hibvt.c index 753bd58111e4..ad205fdad372 100644 --- a/drivers/pwm/pwm-hibvt.c +++ b/drivers/pwm/pwm-hibvt.c @@ -149,7 +149,7 @@ static void hibvt_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, } static int hibvt_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct hibvt_pwm_chip *hi_pwm_chip = to_hibvt_pwm_chip(chip); diff --git a/drivers/pwm/pwm-imx-tpm.c b/drivers/pwm/pwm-imx-tpm.c index e8385c1cf342..9145f6160649 100644 --- a/drivers/pwm/pwm-imx-tpm.c +++ b/drivers/pwm/pwm-imx-tpm.c @@ -89,7 +89,7 @@ to_imx_tpm_pwm_chip(struct pwm_chip *chip) static int pwm_imx_tpm_round_state(struct pwm_chip *chip, struct imx_tpm_pwm_param *p, struct pwm_state *real_state, - struct pwm_state *state) + const struct pwm_state *state) { struct imx_tpm_pwm_chip *tpm = to_imx_tpm_pwm_chip(chip); u32 rate, prescale, period_count, clock_unit; @@ -289,7 +289,7 @@ static int pwm_imx_tpm_apply_hw(struct pwm_chip *chip, static int pwm_imx_tpm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct imx_tpm_pwm_chip *tpm = to_imx_tpm_pwm_chip(chip); struct imx_tpm_pwm_param param; diff --git a/drivers/pwm/pwm-imx27.c b/drivers/pwm/pwm-imx27.c index 434a351fb626..ae11d8577f18 100644 --- a/drivers/pwm/pwm-imx27.c +++ b/drivers/pwm/pwm-imx27.c @@ -3,6 +3,10 @@ * simple driver for PWM (Pulse Width Modulator) controller * * Derived from pxa PWM driver by eric miao + * + * Limitations: + * - When disabled the output is driven to 0 independent of the configured + * polarity. */ #include @@ -205,7 +209,7 @@ static void pwm_imx27_wait_fifo_slot(struct pwm_chip *chip, } static int pwm_imx27_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { unsigned long period_cycles, duty_cycles, prescale; struct pwm_imx27_chip *imx = to_pwm_imx27_chip(chip); diff --git a/drivers/pwm/pwm-jz4740.c b/drivers/pwm/pwm-jz4740.c index f901e8a0d33d..9d78cc21cb12 100644 --- a/drivers/pwm/pwm-jz4740.c +++ b/drivers/pwm/pwm-jz4740.c @@ -2,6 +2,11 @@ /* * Copyright (C) 2010, Lars-Peter Clausen * JZ4740 platform PWM support + * + * Limitations: + * - The .apply callback doesn't complete the currently running period before + * reconfiguring the hardware. + * - Each period starts with the inactive part. */ #include @@ -83,7 +88,7 @@ static void jz4740_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) } static int jz4740_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct jz4740_pwm_chip *jz4740 = to_jz4740(pwm->chip); unsigned long long tmp; diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c index 4098a4601691..75bbfe5f3bc2 100644 --- a/drivers/pwm/pwm-lpss.c +++ b/drivers/pwm/pwm-lpss.c @@ -122,7 +122,7 @@ static inline void pwm_lpss_cond_enable(struct pwm_device *pwm, bool cond) } static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct pwm_lpss_chip *lpwm = to_lpwm(chip); int ret; diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c index eb6674ce995f..b94e0d09c300 100644 --- a/drivers/pwm/pwm-mediatek.c +++ b/drivers/pwm/pwm-mediatek.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * Mediatek Pulse Width Modulator driver + * MediaTek Pulse Width Modulator driver * * Copyright (C) 2015 John Crispin * Copyright (C) 2017 Zhi Mao * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. */ #include @@ -35,125 +33,107 @@ #define PWM_CLK_DIV_MAX 7 -enum { - MTK_CLK_MAIN = 0, - MTK_CLK_TOP, - MTK_CLK_PWM1, - MTK_CLK_PWM2, - MTK_CLK_PWM3, - MTK_CLK_PWM4, - MTK_CLK_PWM5, - MTK_CLK_PWM6, - MTK_CLK_PWM7, - MTK_CLK_PWM8, - MTK_CLK_MAX, -}; - -static const char * const mtk_pwm_clk_name[MTK_CLK_MAX] = { - "main", "top", "pwm1", "pwm2", "pwm3", "pwm4", "pwm5", "pwm6", "pwm7", - "pwm8" -}; - -struct mtk_pwm_platform_data { +struct pwm_mediatek_of_data { unsigned int num_pwms; bool pwm45_fixup; - bool has_clks; }; /** - * struct mtk_pwm_chip - struct representing PWM chip + * struct pwm_mediatek_chip - struct representing PWM chip * @chip: linux PWM chip representation * @regs: base address of PWM chip - * @clks: list of clocks + * @clk_top: the top clock generator + * @clk_main: the clock used by PWM core + * @clk_pwms: the clock used by each PWM channel + * @clk_freq: the fix clock frequency of legacy MIPS SoC */ -struct mtk_pwm_chip { +struct pwm_mediatek_chip { struct pwm_chip chip; void __iomem *regs; - struct clk *clks[MTK_CLK_MAX]; - const struct mtk_pwm_platform_data *soc; + struct clk *clk_top; + struct clk *clk_main; + struct clk **clk_pwms; + const struct pwm_mediatek_of_data *soc; }; -static const unsigned int mtk_pwm_reg_offset[] = { +static const unsigned int pwm_mediatek_reg_offset[] = { 0x0010, 0x0050, 0x0090, 0x00d0, 0x0110, 0x0150, 0x0190, 0x0220 }; -static inline struct mtk_pwm_chip *to_mtk_pwm_chip(struct pwm_chip *chip) +static inline struct pwm_mediatek_chip * +to_pwm_mediatek_chip(struct pwm_chip *chip) { - return container_of(chip, struct mtk_pwm_chip, chip); + return container_of(chip, struct pwm_mediatek_chip, chip); } -static int mtk_pwm_clk_enable(struct pwm_chip *chip, struct pwm_device *pwm) +static int pwm_mediatek_clk_enable(struct pwm_chip *chip, + struct pwm_device *pwm) { - struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip); + struct pwm_mediatek_chip *pc = to_pwm_mediatek_chip(chip); int ret; - if (!pc->soc->has_clks) - return 0; - - ret = clk_prepare_enable(pc->clks[MTK_CLK_TOP]); + ret = clk_prepare_enable(pc->clk_top); if (ret < 0) return ret; - ret = clk_prepare_enable(pc->clks[MTK_CLK_MAIN]); + ret = clk_prepare_enable(pc->clk_main); if (ret < 0) goto disable_clk_top; - ret = clk_prepare_enable(pc->clks[MTK_CLK_PWM1 + pwm->hwpwm]); + ret = clk_prepare_enable(pc->clk_pwms[pwm->hwpwm]); if (ret < 0) goto disable_clk_main; return 0; disable_clk_main: - clk_disable_unprepare(pc->clks[MTK_CLK_MAIN]); + clk_disable_unprepare(pc->clk_main); disable_clk_top: - clk_disable_unprepare(pc->clks[MTK_CLK_TOP]); + clk_disable_unprepare(pc->clk_top); return ret; } -static void mtk_pwm_clk_disable(struct pwm_chip *chip, struct pwm_device *pwm) +static void pwm_mediatek_clk_disable(struct pwm_chip *chip, + struct pwm_device *pwm) { - struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip); + struct pwm_mediatek_chip *pc = to_pwm_mediatek_chip(chip); - if (!pc->soc->has_clks) - return; - - clk_disable_unprepare(pc->clks[MTK_CLK_PWM1 + pwm->hwpwm]); - clk_disable_unprepare(pc->clks[MTK_CLK_MAIN]); - clk_disable_unprepare(pc->clks[MTK_CLK_TOP]); + clk_disable_unprepare(pc->clk_pwms[pwm->hwpwm]); + clk_disable_unprepare(pc->clk_main); + clk_disable_unprepare(pc->clk_top); } -static inline u32 mtk_pwm_readl(struct mtk_pwm_chip *chip, unsigned int num, - unsigned int offset) +static inline u32 pwm_mediatek_readl(struct pwm_mediatek_chip *chip, + unsigned int num, unsigned int offset) { - return readl(chip->regs + mtk_pwm_reg_offset[num] + offset); + return readl(chip->regs + pwm_mediatek_reg_offset[num] + offset); } -static inline void mtk_pwm_writel(struct mtk_pwm_chip *chip, - unsigned int num, unsigned int offset, - u32 value) +static inline void pwm_mediatek_writel(struct pwm_mediatek_chip *chip, + unsigned int num, unsigned int offset, + u32 value) { - writel(value, chip->regs + mtk_pwm_reg_offset[num] + offset); + writel(value, chip->regs + pwm_mediatek_reg_offset[num] + offset); } -static int mtk_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, - int duty_ns, int period_ns) +static int pwm_mediatek_config(struct pwm_chip *chip, struct pwm_device *pwm, + int duty_ns, int period_ns) { - struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip); - struct clk *clk = pc->clks[MTK_CLK_PWM1 + pwm->hwpwm]; + struct pwm_mediatek_chip *pc = to_pwm_mediatek_chip(chip); u32 clkdiv = 0, cnt_period, cnt_duty, reg_width = PWMDWIDTH, reg_thres = PWMTHRES; u64 resolution; int ret; - ret = mtk_pwm_clk_enable(chip, pwm); + ret = pwm_mediatek_clk_enable(chip, pwm); + if (ret < 0) return ret; /* Using resolution in picosecond gets accuracy higher */ resolution = (u64)NSEC_PER_SEC * 1000; - do_div(resolution, clk_get_rate(clk)); + do_div(resolution, clk_get_rate(pc->clk_pwms[pwm->hwpwm])); cnt_period = DIV_ROUND_CLOSEST_ULL((u64)period_ns * 1000, resolution); while (cnt_period > 8191) { @@ -164,7 +144,7 @@ static int mtk_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, } if (clkdiv > PWM_CLK_DIV_MAX) { - mtk_pwm_clk_disable(chip, pwm); + pwm_mediatek_clk_disable(chip, pwm); dev_err(chip->dev, "period %d not supported\n", period_ns); return -EINVAL; } @@ -179,22 +159,22 @@ static int mtk_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, } cnt_duty = DIV_ROUND_CLOSEST_ULL((u64)duty_ns * 1000, resolution); - mtk_pwm_writel(pc, pwm->hwpwm, PWMCON, BIT(15) | clkdiv); - mtk_pwm_writel(pc, pwm->hwpwm, reg_width, cnt_period); - mtk_pwm_writel(pc, pwm->hwpwm, reg_thres, cnt_duty); + pwm_mediatek_writel(pc, pwm->hwpwm, PWMCON, BIT(15) | clkdiv); + pwm_mediatek_writel(pc, pwm->hwpwm, reg_width, cnt_period); + pwm_mediatek_writel(pc, pwm->hwpwm, reg_thres, cnt_duty); - mtk_pwm_clk_disable(chip, pwm); + pwm_mediatek_clk_disable(chip, pwm); return 0; } -static int mtk_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) +static int pwm_mediatek_enable(struct pwm_chip *chip, struct pwm_device *pwm) { - struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip); + struct pwm_mediatek_chip *pc = to_pwm_mediatek_chip(chip); u32 value; int ret; - ret = mtk_pwm_clk_enable(chip, pwm); + ret = pwm_mediatek_clk_enable(chip, pwm); if (ret < 0) return ret; @@ -205,29 +185,28 @@ static int mtk_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) return 0; } -static void mtk_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) +static void pwm_mediatek_disable(struct pwm_chip *chip, struct pwm_device *pwm) { - struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip); + struct pwm_mediatek_chip *pc = to_pwm_mediatek_chip(chip); u32 value; value = readl(pc->regs); value &= ~BIT(pwm->hwpwm); writel(value, pc->regs); - mtk_pwm_clk_disable(chip, pwm); + pwm_mediatek_clk_disable(chip, pwm); } -static const struct pwm_ops mtk_pwm_ops = { - .config = mtk_pwm_config, - .enable = mtk_pwm_enable, - .disable = mtk_pwm_disable, +static const struct pwm_ops pwm_mediatek_ops = { + .config = pwm_mediatek_config, + .enable = pwm_mediatek_enable, + .disable = pwm_mediatek_disable, .owner = THIS_MODULE, }; -static int mtk_pwm_probe(struct platform_device *pdev) +static int pwm_mediatek_probe(struct platform_device *pdev) { - const struct mtk_pwm_platform_data *data; - struct mtk_pwm_chip *pc; + struct pwm_mediatek_chip *pc; struct resource *res; unsigned int i; int ret; @@ -236,31 +215,51 @@ static int mtk_pwm_probe(struct platform_device *pdev) if (!pc) return -ENOMEM; - data = of_device_get_match_data(&pdev->dev); - if (data == NULL) - return -EINVAL; - pc->soc = data; + pc->soc = of_device_get_match_data(&pdev->dev); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); pc->regs = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(pc->regs)) return PTR_ERR(pc->regs); - for (i = 0; i < data->num_pwms + 2 && pc->soc->has_clks; i++) { - pc->clks[i] = devm_clk_get(&pdev->dev, mtk_pwm_clk_name[i]); - if (IS_ERR(pc->clks[i])) { + pc->clk_pwms = devm_kcalloc(&pdev->dev, pc->soc->num_pwms, + sizeof(*pc->clk_pwms), GFP_KERNEL); + if (!pc->clk_pwms) + return -ENOMEM; + + pc->clk_top = devm_clk_get(&pdev->dev, "top"); + if (IS_ERR(pc->clk_top)) { + dev_err(&pdev->dev, "clock: top fail: %ld\n", + PTR_ERR(pc->clk_top)); + return PTR_ERR(pc->clk_top); + } + + pc->clk_main = devm_clk_get(&pdev->dev, "main"); + if (IS_ERR(pc->clk_main)) { + dev_err(&pdev->dev, "clock: main fail: %ld\n", + PTR_ERR(pc->clk_main)); + return PTR_ERR(pc->clk_main); + } + + for (i = 0; i < pc->soc->num_pwms; i++) { + char name[8]; + + snprintf(name, sizeof(name), "pwm%d", i + 1); + + pc->clk_pwms[i] = devm_clk_get(&pdev->dev, name); + if (IS_ERR(pc->clk_pwms[i])) { dev_err(&pdev->dev, "clock: %s fail: %ld\n", - mtk_pwm_clk_name[i], PTR_ERR(pc->clks[i])); - return PTR_ERR(pc->clks[i]); + name, PTR_ERR(pc->clk_pwms[i])); + return PTR_ERR(pc->clk_pwms[i]); } } platform_set_drvdata(pdev, pc); pc->chip.dev = &pdev->dev; - pc->chip.ops = &mtk_pwm_ops; + pc->chip.ops = &pwm_mediatek_ops; pc->chip.base = -1; - pc->chip.npwm = data->num_pwms; + pc->chip.npwm = pc->soc->num_pwms; ret = pwmchip_add(&pc->chip); if (ret < 0) { @@ -271,55 +270,63 @@ static int mtk_pwm_probe(struct platform_device *pdev) return 0; } -static int mtk_pwm_remove(struct platform_device *pdev) +static int pwm_mediatek_remove(struct platform_device *pdev) { - struct mtk_pwm_chip *pc = platform_get_drvdata(pdev); + struct pwm_mediatek_chip *pc = platform_get_drvdata(pdev); return pwmchip_remove(&pc->chip); } -static const struct mtk_pwm_platform_data mt2712_pwm_data = { +static const struct pwm_mediatek_of_data mt2712_pwm_data = { .num_pwms = 8, .pwm45_fixup = false, - .has_clks = true, }; -static const struct mtk_pwm_platform_data mt7622_pwm_data = { +static const struct pwm_mediatek_of_data mt7622_pwm_data = { .num_pwms = 6, .pwm45_fixup = false, - .has_clks = true, }; -static const struct mtk_pwm_platform_data mt7623_pwm_data = { +static const struct pwm_mediatek_of_data mt7623_pwm_data = { .num_pwms = 5, .pwm45_fixup = true, - .has_clks = true, }; -static const struct mtk_pwm_platform_data mt7628_pwm_data = { +static const struct pwm_mediatek_of_data mt7628_pwm_data = { .num_pwms = 4, .pwm45_fixup = true, - .has_clks = false, }; -static const struct of_device_id mtk_pwm_of_match[] = { +static const struct pwm_mediatek_of_data mt7629_pwm_data = { + .num_pwms = 1, + .pwm45_fixup = false, +}; + +static const struct pwm_mediatek_of_data mt8516_pwm_data = { + .num_pwms = 5, + .pwm45_fixup = false, +}; + +static const struct of_device_id pwm_mediatek_of_match[] = { { .compatible = "mediatek,mt2712-pwm", .data = &mt2712_pwm_data }, { .compatible = "mediatek,mt7622-pwm", .data = &mt7622_pwm_data }, { .compatible = "mediatek,mt7623-pwm", .data = &mt7623_pwm_data }, { .compatible = "mediatek,mt7628-pwm", .data = &mt7628_pwm_data }, + { .compatible = "mediatek,mt7629-pwm", .data = &mt7629_pwm_data }, + { .compatible = "mediatek,mt8516-pwm", .data = &mt8516_pwm_data }, { }, }; -MODULE_DEVICE_TABLE(of, mtk_pwm_of_match); +MODULE_DEVICE_TABLE(of, pwm_mediatek_of_match); -static struct platform_driver mtk_pwm_driver = { +static struct platform_driver pwm_mediatek_driver = { .driver = { - .name = "mtk-pwm", - .of_match_table = mtk_pwm_of_match, + .name = "pwm-mediatek", + .of_match_table = pwm_mediatek_of_match, }, - .probe = mtk_pwm_probe, - .remove = mtk_pwm_remove, + .probe = pwm_mediatek_probe, + .remove = pwm_mediatek_remove, }; -module_platform_driver(mtk_pwm_driver); +module_platform_driver(pwm_mediatek_driver); MODULE_AUTHOR("John Crispin "); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c index 3cbff5cbb789..6245bbdb6e6c 100644 --- a/drivers/pwm/pwm-meson.c +++ b/drivers/pwm/pwm-meson.c @@ -159,7 +159,7 @@ static void meson_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm) } static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct meson_pwm_channel *channel = pwm_get_chip_data(pwm); unsigned int duty, period, pre_div, cnt, duty_cnt; @@ -265,7 +265,7 @@ static void meson_pwm_disable(struct meson_pwm *meson, struct pwm_device *pwm) } static int meson_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct meson_pwm_channel *channel = pwm_get_chip_data(pwm); struct meson_pwm *meson = to_meson_pwm(chip); diff --git a/drivers/pwm/pwm-mxs.c b/drivers/pwm/pwm-mxs.c index 04c0f6b95c1a..b14376b47ac8 100644 --- a/drivers/pwm/pwm-mxs.c +++ b/drivers/pwm/pwm-mxs.c @@ -126,15 +126,13 @@ static int mxs_pwm_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; struct mxs_pwm_chip *mxs; - struct resource *res; int ret; mxs = devm_kzalloc(&pdev->dev, sizeof(*mxs), GFP_KERNEL); if (!mxs) return -ENOMEM; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - mxs->base = devm_ioremap_resource(&pdev->dev, res); + mxs->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(mxs->base)) return PTR_ERR(mxs->base); diff --git a/drivers/pwm/pwm-rcar.c b/drivers/pwm/pwm-rcar.c index 5b2b8ecc354c..852eb2347954 100644 --- a/drivers/pwm/pwm-rcar.c +++ b/drivers/pwm/pwm-rcar.c @@ -158,7 +158,7 @@ static void rcar_pwm_disable(struct rcar_pwm_chip *rp) } static int rcar_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct rcar_pwm_chip *rp = to_rcar_pwm_chip(chip); struct pwm_state cur_state; @@ -187,7 +187,7 @@ static int rcar_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, /* The SYNC should be set to 0 even if rcar_pwm_set_counter failed */ rcar_pwm_update(rp, RCAR_PWMCR_SYNC, 0, RCAR_PWMCR); - if (!ret && state->enabled) + if (!ret) ret = rcar_pwm_enable(rp); return ret; diff --git a/drivers/pwm/pwm-rockchip.c b/drivers/pwm/pwm-rockchip.c index 51b96cb7dd25..73352e6fbccb 100644 --- a/drivers/pwm/pwm-rockchip.c +++ b/drivers/pwm/pwm-rockchip.c @@ -90,16 +90,16 @@ static void rockchip_pwm_get_state(struct pwm_chip *chip, state->enabled = ((val & enable_conf) == enable_conf) ? true : false; - if (pc->data->supports_polarity) { - if (!(val & PWM_DUTY_POSITIVE)) - state->polarity = PWM_POLARITY_INVERSED; - } + if (pc->data->supports_polarity && !(val & PWM_DUTY_POSITIVE)) + state->polarity = PWM_POLARITY_INVERSED; + else + state->polarity = PWM_POLARITY_NORMAL; clk_disable(pc->pclk); } static void rockchip_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); unsigned long period, duty; @@ -183,7 +183,7 @@ static int rockchip_pwm_enable(struct pwm_chip *chip, } static int rockchip_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); struct pwm_state curstate; @@ -212,12 +212,6 @@ static int rockchip_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, goto out; } - /* - * Update the state with the real hardware, which can differ a bit - * because of period/duty_cycle approximation. - */ - rockchip_pwm_get_state(chip, pwm, state); - out: clk_disable(pc->pclk); diff --git a/drivers/pwm/pwm-sifive.c b/drivers/pwm/pwm-sifive.c index a7c107f19e66..cc63f9baa481 100644 --- a/drivers/pwm/pwm-sifive.c +++ b/drivers/pwm/pwm-sifive.c @@ -147,7 +147,7 @@ static int pwm_sifive_enable(struct pwm_chip *chip, bool enable) } static int pwm_sifive_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct pwm_sifive_ddata *ddata = pwm_sifive_chip_to_ddata(chip); struct pwm_state cur_state; @@ -250,10 +250,8 @@ static int pwm_sifive_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ddata->regs = devm_ioremap_resource(dev, res); - if (IS_ERR(ddata->regs)) { - dev_err(dev, "Unable to map IO resources\n"); + if (IS_ERR(ddata->regs)) return PTR_ERR(ddata->regs); - } ddata->clk = devm_clk_get(dev, NULL); if (IS_ERR(ddata->clk)) { diff --git a/drivers/pwm/pwm-sprd.c b/drivers/pwm/pwm-sprd.c new file mode 100644 index 000000000000..be2394227423 --- /dev/null +++ b/drivers/pwm/pwm-sprd.c @@ -0,0 +1,309 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Spreadtrum Communications Inc. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define SPRD_PWM_PRESCALE 0x0 +#define SPRD_PWM_MOD 0x4 +#define SPRD_PWM_DUTY 0x8 +#define SPRD_PWM_ENABLE 0x18 + +#define SPRD_PWM_MOD_MAX GENMASK(7, 0) +#define SPRD_PWM_DUTY_MSK GENMASK(15, 0) +#define SPRD_PWM_PRESCALE_MSK GENMASK(7, 0) +#define SPRD_PWM_ENABLE_BIT BIT(0) + +#define SPRD_PWM_CHN_NUM 4 +#define SPRD_PWM_REGS_SHIFT 5 +#define SPRD_PWM_CHN_CLKS_NUM 2 +#define SPRD_PWM_CHN_OUTPUT_CLK 1 + +struct sprd_pwm_chn { + struct clk_bulk_data clks[SPRD_PWM_CHN_CLKS_NUM]; + u32 clk_rate; +}; + +struct sprd_pwm_chip { + void __iomem *base; + struct device *dev; + struct pwm_chip chip; + int num_pwms; + struct sprd_pwm_chn chn[SPRD_PWM_CHN_NUM]; +}; + +/* + * The list of clocks required by PWM channels, and each channel has 2 clocks: + * enable clock and pwm clock. + */ +static const char * const sprd_pwm_clks[] = { + "enable0", "pwm0", + "enable1", "pwm1", + "enable2", "pwm2", + "enable3", "pwm3", +}; + +static u32 sprd_pwm_read(struct sprd_pwm_chip *spc, u32 hwid, u32 reg) +{ + u32 offset = reg + (hwid << SPRD_PWM_REGS_SHIFT); + + return readl_relaxed(spc->base + offset); +} + +static void sprd_pwm_write(struct sprd_pwm_chip *spc, u32 hwid, + u32 reg, u32 val) +{ + u32 offset = reg + (hwid << SPRD_PWM_REGS_SHIFT); + + writel_relaxed(val, spc->base + offset); +} + +static void sprd_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, + struct pwm_state *state) +{ + struct sprd_pwm_chip *spc = + container_of(chip, struct sprd_pwm_chip, chip); + struct sprd_pwm_chn *chn = &spc->chn[pwm->hwpwm]; + u32 val, duty, prescale; + u64 tmp; + int ret; + + /* + * The clocks to PWM channel has to be enabled first before + * reading to the registers. + */ + ret = clk_bulk_prepare_enable(SPRD_PWM_CHN_CLKS_NUM, chn->clks); + if (ret) { + dev_err(spc->dev, "failed to enable pwm%u clocks\n", + pwm->hwpwm); + return; + } + + val = sprd_pwm_read(spc, pwm->hwpwm, SPRD_PWM_ENABLE); + if (val & SPRD_PWM_ENABLE_BIT) + state->enabled = true; + else + state->enabled = false; + + /* + * The hardware provides a counter that is feed by the source clock. + * The period length is (PRESCALE + 1) * MOD counter steps. + * The duty cycle length is (PRESCALE + 1) * DUTY counter steps. + * Thus the period_ns and duty_ns calculation formula should be: + * period_ns = NSEC_PER_SEC * (prescale + 1) * mod / clk_rate + * duty_ns = NSEC_PER_SEC * (prescale + 1) * duty / clk_rate + */ + val = sprd_pwm_read(spc, pwm->hwpwm, SPRD_PWM_PRESCALE); + prescale = val & SPRD_PWM_PRESCALE_MSK; + tmp = (prescale + 1) * NSEC_PER_SEC * SPRD_PWM_MOD_MAX; + state->period = DIV_ROUND_CLOSEST_ULL(tmp, chn->clk_rate); + + val = sprd_pwm_read(spc, pwm->hwpwm, SPRD_PWM_DUTY); + duty = val & SPRD_PWM_DUTY_MSK; + tmp = (prescale + 1) * NSEC_PER_SEC * duty; + state->duty_cycle = DIV_ROUND_CLOSEST_ULL(tmp, chn->clk_rate); + + /* Disable PWM clocks if the PWM channel is not in enable state. */ + if (!state->enabled) + clk_bulk_disable_unprepare(SPRD_PWM_CHN_CLKS_NUM, chn->clks); +} + +static int sprd_pwm_config(struct sprd_pwm_chip *spc, struct pwm_device *pwm, + int duty_ns, int period_ns) +{ + struct sprd_pwm_chn *chn = &spc->chn[pwm->hwpwm]; + u32 prescale, duty; + u64 tmp; + + /* + * The hardware provides a counter that is feed by the source clock. + * The period length is (PRESCALE + 1) * MOD counter steps. + * The duty cycle length is (PRESCALE + 1) * DUTY counter steps. + * + * To keep the maths simple we're always using MOD = SPRD_PWM_MOD_MAX. + * The value for PRESCALE is selected such that the resulting period + * gets the maximal length not bigger than the requested one with the + * given settings (MOD = SPRD_PWM_MOD_MAX and input clock). + */ + duty = duty_ns * SPRD_PWM_MOD_MAX / period_ns; + + tmp = (u64)chn->clk_rate * period_ns; + do_div(tmp, NSEC_PER_SEC); + prescale = DIV_ROUND_CLOSEST_ULL(tmp, SPRD_PWM_MOD_MAX) - 1; + if (prescale > SPRD_PWM_PRESCALE_MSK) + prescale = SPRD_PWM_PRESCALE_MSK; + + /* + * Note: Writing DUTY triggers the hardware to actually apply the + * values written to MOD and DUTY to the output, so must keep writing + * DUTY last. + * + * The hardware can ensures that current running period is completed + * before changing a new configuration to avoid mixed settings. + */ + sprd_pwm_write(spc, pwm->hwpwm, SPRD_PWM_PRESCALE, prescale); + sprd_pwm_write(spc, pwm->hwpwm, SPRD_PWM_MOD, SPRD_PWM_MOD_MAX); + sprd_pwm_write(spc, pwm->hwpwm, SPRD_PWM_DUTY, duty); + + return 0; +} + +static int sprd_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, + const struct pwm_state *state) +{ + struct sprd_pwm_chip *spc = + container_of(chip, struct sprd_pwm_chip, chip); + struct sprd_pwm_chn *chn = &spc->chn[pwm->hwpwm]; + struct pwm_state *cstate = &pwm->state; + int ret; + + if (state->enabled) { + if (!cstate->enabled) { + /* + * The clocks to PWM channel has to be enabled first + * before writing to the registers. + */ + ret = clk_bulk_prepare_enable(SPRD_PWM_CHN_CLKS_NUM, + chn->clks); + if (ret) { + dev_err(spc->dev, + "failed to enable pwm%u clocks\n", + pwm->hwpwm); + return ret; + } + } + + if (state->period != cstate->period || + state->duty_cycle != cstate->duty_cycle) { + ret = sprd_pwm_config(spc, pwm, state->duty_cycle, + state->period); + if (ret) + return ret; + } + + sprd_pwm_write(spc, pwm->hwpwm, SPRD_PWM_ENABLE, 1); + } else if (cstate->enabled) { + /* + * Note: After setting SPRD_PWM_ENABLE to zero, the controller + * will not wait for current period to be completed, instead it + * will stop the PWM channel immediately. + */ + sprd_pwm_write(spc, pwm->hwpwm, SPRD_PWM_ENABLE, 0); + + clk_bulk_disable_unprepare(SPRD_PWM_CHN_CLKS_NUM, chn->clks); + } + + return 0; +} + +static const struct pwm_ops sprd_pwm_ops = { + .apply = sprd_pwm_apply, + .get_state = sprd_pwm_get_state, + .owner = THIS_MODULE, +}; + +static int sprd_pwm_clk_init(struct sprd_pwm_chip *spc) +{ + struct clk *clk_pwm; + int ret, i; + + for (i = 0; i < SPRD_PWM_CHN_NUM; i++) { + struct sprd_pwm_chn *chn = &spc->chn[i]; + int j; + + for (j = 0; j < SPRD_PWM_CHN_CLKS_NUM; ++j) + chn->clks[j].id = + sprd_pwm_clks[i * SPRD_PWM_CHN_CLKS_NUM + j]; + + ret = devm_clk_bulk_get(spc->dev, SPRD_PWM_CHN_CLKS_NUM, + chn->clks); + if (ret) { + if (ret == -ENOENT) + break; + + if (ret != -EPROBE_DEFER) + dev_err(spc->dev, + "failed to get channel clocks\n"); + + return ret; + } + + clk_pwm = chn->clks[SPRD_PWM_CHN_OUTPUT_CLK].clk; + chn->clk_rate = clk_get_rate(clk_pwm); + } + + if (!i) { + dev_err(spc->dev, "no available PWM channels\n"); + return -ENODEV; + } + + spc->num_pwms = i; + + return 0; +} + +static int sprd_pwm_probe(struct platform_device *pdev) +{ + struct sprd_pwm_chip *spc; + int ret; + + spc = devm_kzalloc(&pdev->dev, sizeof(*spc), GFP_KERNEL); + if (!spc) + return -ENOMEM; + + spc->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(spc->base)) + return PTR_ERR(spc->base); + + spc->dev = &pdev->dev; + platform_set_drvdata(pdev, spc); + + ret = sprd_pwm_clk_init(spc); + if (ret) + return ret; + + spc->chip.dev = &pdev->dev; + spc->chip.ops = &sprd_pwm_ops; + spc->chip.base = -1; + spc->chip.npwm = spc->num_pwms; + + ret = pwmchip_add(&spc->chip); + if (ret) + dev_err(&pdev->dev, "failed to add PWM chip\n"); + + return ret; +} + +static int sprd_pwm_remove(struct platform_device *pdev) +{ + struct sprd_pwm_chip *spc = platform_get_drvdata(pdev); + + return pwmchip_remove(&spc->chip); +} + +static const struct of_device_id sprd_pwm_of_match[] = { + { .compatible = "sprd,ums512-pwm", }, + { }, +}; +MODULE_DEVICE_TABLE(of, sprd_pwm_of_match); + +static struct platform_driver sprd_pwm_driver = { + .driver = { + .name = "sprd-pwm", + .of_match_table = sprd_pwm_of_match, + }, + .probe = sprd_pwm_probe, + .remove = sprd_pwm_remove, +}; + +module_platform_driver(sprd_pwm_driver); + +MODULE_DESCRIPTION("Spreadtrum PWM Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/pwm/pwm-sti.c b/drivers/pwm/pwm-sti.c index 20450e34ad57..1508616d794c 100644 --- a/drivers/pwm/pwm-sti.c +++ b/drivers/pwm/pwm-sti.c @@ -564,10 +564,8 @@ static int sti_pwm_probe(struct platform_device *pdev) return PTR_ERR(pc->regmap); irq = platform_get_irq(pdev, 0); - if (irq < 0) { - dev_err(&pdev->dev, "Failed to obtain IRQ\n"); + if (irq < 0) return irq; - } ret = devm_request_irq(&pdev->dev, irq, sti_pwm_interrupt, 0, pdev->name, pc); diff --git a/drivers/pwm/pwm-stm32-lp.c b/drivers/pwm/pwm-stm32-lp.c index 2211a642066d..67fca62524dc 100644 --- a/drivers/pwm/pwm-stm32-lp.c +++ b/drivers/pwm/pwm-stm32-lp.c @@ -32,7 +32,7 @@ static inline struct stm32_pwm_lp *to_stm32_pwm_lp(struct pwm_chip *chip) #define STM32_LPTIM_MAX_PRESCALER 128 static int stm32_pwm_lp_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct stm32_pwm_lp *priv = to_stm32_pwm_lp(chip); unsigned long long prd, div, dty; @@ -59,6 +59,12 @@ static int stm32_pwm_lp_apply(struct pwm_chip *chip, struct pwm_device *pwm, /* Calculate the period and prescaler value */ div = (unsigned long long)clk_get_rate(priv->clk) * state->period; do_div(div, NSEC_PER_SEC); + if (!div) { + /* Clock is too slow to achieve requested period. */ + dev_dbg(priv->chip.dev, "Can't reach %u ns\n", state->period); + return -EINVAL; + } + prd = div; while (div > STM32_LPTIM_MAX_ARR) { presc++; diff --git a/drivers/pwm/pwm-stm32.c b/drivers/pwm/pwm-stm32.c index 740e2dec8313..359b08596d9e 100644 --- a/drivers/pwm/pwm-stm32.c +++ b/drivers/pwm/pwm-stm32.c @@ -440,7 +440,7 @@ static void stm32_pwm_disable(struct stm32_pwm *priv, int ch) } static int stm32_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { bool enabled; struct stm32_pwm *priv = to_stm32_pwm_dev(chip); @@ -468,7 +468,7 @@ static int stm32_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, } static int stm32_pwm_apply_locked(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct stm32_pwm *priv = to_stm32_pwm_dev(chip); int ret; diff --git a/drivers/pwm/pwm-sun4i.c b/drivers/pwm/pwm-sun4i.c index de78c824bbfd..6f5840a1a82d 100644 --- a/drivers/pwm/pwm-sun4i.c +++ b/drivers/pwm/pwm-sun4i.c @@ -145,7 +145,7 @@ static void sun4i_pwm_get_state(struct pwm_chip *chip, } static int sun4i_pwm_calculate(struct sun4i_pwm_chip *sun4i_pwm, - struct pwm_state *state, + const struct pwm_state *state, u32 *dty, u32 *prd, unsigned int *prsclr) { u64 clk_rate, div = 0; @@ -192,17 +192,11 @@ static int sun4i_pwm_calculate(struct sun4i_pwm_chip *sun4i_pwm, *dty = div; *prsclr = prescaler; - div = (u64)pval * NSEC_PER_SEC * *prd; - state->period = DIV_ROUND_CLOSEST_ULL(div, clk_rate); - - div = (u64)pval * NSEC_PER_SEC * *dty; - state->duty_cycle = DIV_ROUND_CLOSEST_ULL(div, clk_rate); - return 0; } static int sun4i_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct sun4i_pwm_chip *sun4i_pwm = to_sun4i_pwm_chip(chip); struct pwm_state cstate; diff --git a/drivers/pwm/pwm-zx.c b/drivers/pwm/pwm-zx.c index e24f4be35316..e2c21cc34a96 100644 --- a/drivers/pwm/pwm-zx.c +++ b/drivers/pwm/pwm-zx.c @@ -148,7 +148,7 @@ static int zx_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, } static int zx_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct zx_pwm_chip *zpc = to_zx_pwm_chip(chip); struct pwm_state cstate; diff --git a/drivers/thermal/armada_thermal.c b/drivers/thermal/armada_thermal.c index 8c07a393dc2e..709a22f455e9 100644 --- a/drivers/thermal/armada_thermal.c +++ b/drivers/thermal/armada_thermal.c @@ -53,7 +53,6 @@ #define CONTROL0_TSEN_MODE_EXTERNAL 0x2 #define CONTROL0_TSEN_MODE_MASK 0x3 -#define CONTROL1_TSEN_AVG_SHIFT 0 #define CONTROL1_TSEN_AVG_MASK 0x7 #define CONTROL1_EXT_TSEN_SW_RESET BIT(7) #define CONTROL1_EXT_TSEN_HW_RESETn BIT(8) @@ -267,8 +266,8 @@ static void armada_cp110_init(struct platform_device *pdev, /* Average the output value over 2^1 = 2 samples */ regmap_read(priv->syscon, data->syscon_control1_off, ®); - reg &= ~CONTROL1_TSEN_AVG_MASK << CONTROL1_TSEN_AVG_SHIFT; - reg |= 1 << CONTROL1_TSEN_AVG_SHIFT; + reg &= ~CONTROL1_TSEN_AVG_MASK; + reg |= 1; regmap_write(priv->syscon, data->syscon_control1_off, reg); } diff --git a/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c index 9716bc3abaf9..7130e90773ed 100644 --- a/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c +++ b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c @@ -77,9 +77,6 @@ int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trtp, struct acpi_buffer element = { 0, NULL }; struct acpi_buffer trt_format = { sizeof("RRNNNNNN"), "RRNNNNNN" }; - if (!acpi_has_method(handle, "_TRT")) - return -ENODEV; - status = acpi_evaluate_object(handle, "_TRT", NULL, &buffer); if (ACPI_FAILURE(status)) return -ENODEV; @@ -158,9 +155,6 @@ int acpi_parse_art(acpi_handle handle, int *art_count, struct art **artp, struct acpi_buffer art_format = { sizeof("RRNNNNNNNNNNN"), "RRNNNNNNNNNNN" }; - if (!acpi_has_method(handle, "_ART")) - return -ENODEV; - status = acpi_evaluate_object(handle, "_ART", NULL, &buffer); if (ACPI_FAILURE(status)) return -ENODEV; diff --git a/drivers/thermal/intel/int340x_thermal/int3403_thermal.c b/drivers/thermal/intel/int340x_thermal/int3403_thermal.c index f5749d4418ae..a7bbd8584ae2 100644 --- a/drivers/thermal/intel/int340x_thermal/int3403_thermal.c +++ b/drivers/thermal/intel/int340x_thermal/int3403_thermal.c @@ -181,7 +181,7 @@ static int int3403_cdev_add(struct int3403_priv *priv) p = buf.pointer; if (!p || (p->type != ACPI_TYPE_PACKAGE)) { - printk(KERN_WARNING "Invalid PPSS data\n"); + pr_warn("Invalid PPSS data\n"); kfree(buf.pointer); return -EFAULT; } diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c index d3446acf9bbd..89a015387283 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c @@ -39,6 +39,9 @@ /* GeminiLake thermal reporting device */ #define PCI_DEVICE_ID_PROC_GLK_THERMAL 0x318C +/* IceLake thermal reporting device */ +#define PCI_DEVICE_ID_PROC_ICL_THERMAL 0x8a03 + #define DRV_NAME "proc_thermal" struct power_config { @@ -137,6 +140,72 @@ static const struct attribute_group power_limit_attribute_group = { .name = "power_limits" }; +static ssize_t tcc_offset_degree_celsius_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u64 val; + int err; + + err = rdmsrl_safe(MSR_IA32_TEMPERATURE_TARGET, &val); + if (err) + return err; + + val = (val >> 24) & 0xff; + return sprintf(buf, "%d\n", (int)val); +} + +static int tcc_offset_update(int tcc) +{ + u64 val; + int err; + + if (!tcc) + return -EINVAL; + + err = rdmsrl_safe(MSR_IA32_TEMPERATURE_TARGET, &val); + if (err) + return err; + + val &= ~GENMASK_ULL(31, 24); + val |= (tcc & 0xff) << 24; + + err = wrmsrl_safe(MSR_IA32_TEMPERATURE_TARGET, val); + if (err) + return err; + + return 0; +} + +static int tcc_offset_save; + +static ssize_t tcc_offset_degree_celsius_store(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + u64 val; + int tcc, err; + + err = rdmsrl_safe(MSR_PLATFORM_INFO, &val); + if (err) + return err; + + if (!(val & BIT(30))) + return -EACCES; + + if (kstrtoint(buf, 0, &tcc)) + return -EINVAL; + + err = tcc_offset_update(tcc); + if (err) + return err; + + tcc_offset_save = tcc; + + return count; +} + +static DEVICE_ATTR_RW(tcc_offset_degree_celsius); + static int stored_tjmax; /* since it is fixed, we can have local storage */ static int get_tjmax(void) @@ -332,6 +401,7 @@ static void proc_thermal_remove(struct proc_thermal_device *proc_priv) acpi_remove_notify_handler(proc_priv->adev->handle, ACPI_DEVICE_NOTIFY, proc_thermal_notify); int340x_thermal_zone_remove(proc_priv->int340x_zone); + sysfs_remove_file(&proc_priv->dev->kobj, &dev_attr_tcc_offset_degree_celsius.attr); sysfs_remove_group(&proc_priv->dev->kobj, &power_limit_attribute_group); } @@ -355,8 +425,15 @@ static int int3401_add(struct platform_device *pdev) dev_info(&pdev->dev, "Creating sysfs group for PROC_THERMAL_PLATFORM_DEV\n"); - return sysfs_create_group(&pdev->dev.kobj, - &power_limit_attribute_group); + ret = sysfs_create_file(&pdev->dev.kobj, &dev_attr_tcc_offset_degree_celsius.attr); + if (ret) + return ret; + + ret = sysfs_create_group(&pdev->dev.kobj, &power_limit_attribute_group); + if (ret) + sysfs_remove_file(&pdev->dev.kobj, &dev_attr_tcc_offset_degree_celsius.attr); + + return ret; } static int int3401_remove(struct platform_device *pdev) @@ -588,8 +665,15 @@ static int proc_thermal_pci_probe(struct pci_dev *pdev, dev_info(&pdev->dev, "Creating sysfs group for PROC_THERMAL_PCI\n"); - return sysfs_create_group(&pdev->dev.kobj, - &power_limit_attribute_group); + ret = sysfs_create_file(&pdev->dev.kobj, &dev_attr_tcc_offset_degree_celsius.attr); + if (ret) + return ret; + + ret = sysfs_create_group(&pdev->dev.kobj, &power_limit_attribute_group); + if (ret) + sysfs_remove_file(&pdev->dev.kobj, &dev_attr_tcc_offset_degree_celsius.attr); + + return ret; } static void proc_thermal_pci_remove(struct pci_dev *pdev) @@ -615,6 +699,8 @@ static int proc_thermal_resume(struct device *dev) proc_dev = dev_get_drvdata(dev); proc_thermal_read_ppcc(proc_dev); + tcc_offset_update(tcc_offset_save); + return 0; } #else @@ -636,6 +722,8 @@ static const struct pci_device_id proc_thermal_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_CNL_THERMAL)}, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_CFL_THERMAL)}, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_GLK_THERMAL)}, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_ICL_THERMAL), + .driver_data = (kernel_ulong_t)&rapl_mmio_hsw, }, { 0, }, }; diff --git a/drivers/thermal/intel/intel_pch_thermal.c b/drivers/thermal/intel/intel_pch_thermal.c index 99f8b2540f18..4f0bb8f502e1 100644 --- a/drivers/thermal/intel/intel_pch_thermal.c +++ b/drivers/thermal/intel/intel_pch_thermal.c @@ -371,16 +371,14 @@ static void intel_pch_thermal_remove(struct pci_dev *pdev) static int intel_pch_thermal_suspend(struct device *device) { - struct pci_dev *pdev = to_pci_dev(device); - struct pch_thermal_device *ptd = pci_get_drvdata(pdev); + struct pch_thermal_device *ptd = dev_get_drvdata(device); return ptd->ops->suspend(ptd); } static int intel_pch_thermal_resume(struct device *device) { - struct pci_dev *pdev = to_pci_dev(device); - struct pch_thermal_device *ptd = pci_get_drvdata(pdev); + struct pch_thermal_device *ptd = dev_get_drvdata(device); return ptd->ops->resume(ptd); } diff --git a/drivers/thermal/qcom/tsens-8960.c b/drivers/thermal/qcom/tsens-8960.c index 8d9b721dadb6..e46a4e3f25c4 100644 --- a/drivers/thermal/qcom/tsens-8960.c +++ b/drivers/thermal/qcom/tsens-8960.c @@ -229,6 +229,8 @@ static int calibrate_8960(struct tsens_priv *priv) for (i = 0; i < num_read; i++, s++) s->offset = data[i]; + kfree(data); + return 0; } diff --git a/drivers/thermal/qcom/tsens-v0_1.c b/drivers/thermal/qcom/tsens-v0_1.c index 6f26fadf4c27..055647bcee67 100644 --- a/drivers/thermal/qcom/tsens-v0_1.c +++ b/drivers/thermal/qcom/tsens-v0_1.c @@ -145,8 +145,10 @@ static int calibrate_8916(struct tsens_priv *priv) return PTR_ERR(qfprom_cdata); qfprom_csel = (u32 *)qfprom_read(priv->dev, "calib_sel"); - if (IS_ERR(qfprom_csel)) + if (IS_ERR(qfprom_csel)) { + kfree(qfprom_cdata); return PTR_ERR(qfprom_csel); + } mode = (qfprom_csel[0] & MSM8916_CAL_SEL_MASK) >> MSM8916_CAL_SEL_SHIFT; dev_dbg(priv->dev, "calibration mode is %d\n", mode); @@ -181,6 +183,8 @@ static int calibrate_8916(struct tsens_priv *priv) } compute_intercept_slope(priv, p1, p2, mode); + kfree(qfprom_cdata); + kfree(qfprom_csel); return 0; } @@ -198,8 +202,10 @@ static int calibrate_8974(struct tsens_priv *priv) return PTR_ERR(calib); bkp = (u32 *)qfprom_read(priv->dev, "calib_backup"); - if (IS_ERR(bkp)) + if (IS_ERR(bkp)) { + kfree(calib); return PTR_ERR(bkp); + } calib_redun_sel = bkp[1] & BKP_REDUN_SEL; calib_redun_sel >>= BKP_REDUN_SHIFT; @@ -313,6 +319,8 @@ static int calibrate_8974(struct tsens_priv *priv) } compute_intercept_slope(priv, p1, p2, mode); + kfree(calib); + kfree(bkp); return 0; } diff --git a/drivers/thermal/qcom/tsens-v1.c b/drivers/thermal/qcom/tsens-v1.c index 10b595d4f619..870f502f2cb6 100644 --- a/drivers/thermal/qcom/tsens-v1.c +++ b/drivers/thermal/qcom/tsens-v1.c @@ -138,6 +138,7 @@ static int calibrate_v1(struct tsens_priv *priv) } compute_intercept_slope(priv, p1, p2, mode); + kfree(qfprom_cdata); return 0; } diff --git a/drivers/thermal/qcom/tsens.h b/drivers/thermal/qcom/tsens.h index 2fd94997245b..b89083b61c38 100644 --- a/drivers/thermal/qcom/tsens.h +++ b/drivers/thermal/qcom/tsens.h @@ -17,6 +17,7 @@ #include #include +#include struct tsens_priv; diff --git a/drivers/thermal/qoriq_thermal.c b/drivers/thermal/qoriq_thermal.c index 7b364933bfb1..39542c670301 100644 --- a/drivers/thermal/qoriq_thermal.c +++ b/drivers/thermal/qoriq_thermal.c @@ -2,6 +2,7 @@ // // Copyright 2016 Freescale Semiconductor, Inc. +#include #include #include #include @@ -72,6 +73,7 @@ struct qoriq_sensor { struct qoriq_tmu_data { struct qoriq_tmu_regs __iomem *regs; + struct clk *clk; bool little_endian; struct qoriq_sensor *sensor[SITES_MAX]; }; @@ -202,32 +204,39 @@ static int qoriq_tmu_probe(struct platform_device *pdev) data->little_endian = of_property_read_bool(np, "little-endian"); - data->regs = of_iomap(np, 0); - if (!data->regs) { + data->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(data->regs)) { dev_err(&pdev->dev, "Failed to get memory region\n"); - ret = -ENODEV; - goto err_iomap; + return PTR_ERR(data->regs); + } + + data->clk = devm_clk_get_optional(&pdev->dev, NULL); + if (IS_ERR(data->clk)) + return PTR_ERR(data->clk); + + ret = clk_prepare_enable(data->clk); + if (ret) { + dev_err(&pdev->dev, "Failed to enable clock\n"); + return ret; } qoriq_tmu_init_device(data); /* TMU initialization */ ret = qoriq_tmu_calibration(pdev); /* TMU calibration */ if (ret < 0) - goto err_tmu; + goto err; ret = qoriq_tmu_register_tmu_zone(pdev); if (ret < 0) { dev_err(&pdev->dev, "Failed to register sensors\n"); ret = -ENODEV; - goto err_iomap; + goto err; } return 0; -err_tmu: - iounmap(data->regs); - -err_iomap: +err: + clk_disable_unprepare(data->clk); platform_set_drvdata(pdev, NULL); return ret; @@ -240,14 +249,14 @@ static int qoriq_tmu_remove(struct platform_device *pdev) /* Disable monitoring */ tmu_write(data, TMR_DISABLE, &data->regs->tmr); - iounmap(data->regs); + clk_disable_unprepare(data->clk); + platform_set_drvdata(pdev, NULL); return 0; } -#ifdef CONFIG_PM_SLEEP -static int qoriq_tmu_suspend(struct device *dev) +static int __maybe_unused qoriq_tmu_suspend(struct device *dev) { u32 tmr; struct qoriq_tmu_data *data = dev_get_drvdata(dev); @@ -257,14 +266,21 @@ static int qoriq_tmu_suspend(struct device *dev) tmr &= ~TMR_ME; tmu_write(data, tmr, &data->regs->tmr); + clk_disable_unprepare(data->clk); + return 0; } -static int qoriq_tmu_resume(struct device *dev) +static int __maybe_unused qoriq_tmu_resume(struct device *dev) { u32 tmr; + int ret; struct qoriq_tmu_data *data = dev_get_drvdata(dev); + ret = clk_prepare_enable(data->clk); + if (ret) + return ret; + /* Enable monitoring */ tmr = tmu_read(data, &data->regs->tmr); tmr |= TMR_ME; @@ -272,7 +288,6 @@ static int qoriq_tmu_resume(struct device *dev) return 0; } -#endif static SIMPLE_DEV_PM_OPS(qoriq_tmu_pm_ops, qoriq_tmu_suspend, qoriq_tmu_resume); diff --git a/drivers/thermal/rcar_gen3_thermal.c b/drivers/thermal/rcar_gen3_thermal.c index a56463308694..755d2b5bd2c2 100644 --- a/drivers/thermal/rcar_gen3_thermal.c +++ b/drivers/thermal/rcar_gen3_thermal.c @@ -443,9 +443,8 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev) if (ret) goto error_unregister; - ret = devm_add_action(dev, rcar_gen3_hwmon_action, zone); + ret = devm_add_action_or_reset(dev, rcar_gen3_hwmon_action, zone); if (ret) { - rcar_gen3_hwmon_action(zone); goto error_unregister; } diff --git a/drivers/thermal/tegra/soctherm.c b/drivers/thermal/tegra/soctherm.c index 43941eb734eb..5acaad3a594f 100644 --- a/drivers/thermal/tegra/soctherm.c +++ b/drivers/thermal/tegra/soctherm.c @@ -202,7 +202,7 @@ /* get dividend from the depth */ #define THROT_DEPTH_DIVIDEND(depth) ((256 * (100 - (depth)) / 100) - 1) -/* gk20a nv_therm interface N:3 Mapping. Levels defined in tegra124-sochterm.h +/* gk20a nv_therm interface N:3 Mapping. Levels defined in tegra124-soctherm.h * level vector * NONE 3'b000 * LOW 3'b001 diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 6bab66e84eb5..d4481cc8958f 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -304,7 +304,7 @@ static void thermal_zone_device_set_polling(struct thermal_zone_device *tz, &tz->poll_queue, msecs_to_jiffies(delay)); else - cancel_delayed_work(&tz->poll_queue); + cancel_delayed_work_sync(&tz->poll_queue); } static void monitor_thermal_zone(struct thermal_zone_device *tz) @@ -985,7 +985,7 @@ __thermal_cooling_device_register(struct device_node *np, result = device_register(&cdev->device); if (result) { ida_simple_remove(&thermal_cdev_ida, cdev->id); - kfree(cdev); + put_device(&cdev->device); return ERR_PTR(result); } @@ -1240,21 +1240,31 @@ thermal_zone_device_register(const char *type, int trips, int mask, struct thermal_zone_device *tz; enum thermal_trip_type trip_type; int trip_temp; + int id; int result; int count; struct thermal_governor *governor; - if (!type || strlen(type) == 0) + if (!type || strlen(type) == 0) { + pr_err("Error: No thermal zone type defined\n"); return ERR_PTR(-EINVAL); + } - if (type && strlen(type) >= THERMAL_NAME_LENGTH) + if (type && strlen(type) >= THERMAL_NAME_LENGTH) { + pr_err("Error: Thermal zone name (%s) too long, should be under %d chars\n", + type, THERMAL_NAME_LENGTH); return ERR_PTR(-EINVAL); + } - if (trips > THERMAL_MAX_TRIPS || trips < 0 || mask >> trips) + if (trips > THERMAL_MAX_TRIPS || trips < 0 || mask >> trips) { + pr_err("Error: Incorrect number of thermal trips\n"); return ERR_PTR(-EINVAL); + } - if (!ops) + if (!ops) { + pr_err("Error: Thermal zone device ops not defined\n"); return ERR_PTR(-EINVAL); + } if (trips > 0 && (!ops->get_trip_type || !ops->get_trip_temp)) return ERR_PTR(-EINVAL); @@ -1266,11 +1276,13 @@ thermal_zone_device_register(const char *type, int trips, int mask, INIT_LIST_HEAD(&tz->thermal_instances); ida_init(&tz->ida); mutex_init(&tz->lock); - result = ida_simple_get(&thermal_tz_ida, 0, 0, GFP_KERNEL); - if (result < 0) + id = ida_simple_get(&thermal_tz_ida, 0, 0, GFP_KERNEL); + if (id < 0) { + result = id; goto free_tz; + } - tz->id = result; + tz->id = id; strlcpy(tz->type, type, sizeof(tz->type)); tz->ops = ops; tz->tzp = tzp; @@ -1292,7 +1304,7 @@ thermal_zone_device_register(const char *type, int trips, int mask, dev_set_name(&tz->device, "thermal_zone%d", tz->id); result = device_register(&tz->device); if (result) - goto remove_device_groups; + goto release_device; for (count = 0; count < trips; count++) { if (tz->ops->get_trip_type(tz, count, &trip_type)) @@ -1343,14 +1355,12 @@ thermal_zone_device_register(const char *type, int trips, int mask, return tz; unregister: - ida_simple_remove(&thermal_tz_ida, tz->id); - device_unregister(&tz->device); - return ERR_PTR(result); - -remove_device_groups: - thermal_zone_destroy_device_groups(tz); + device_del(&tz->device); +release_device: + put_device(&tz->device); + tz = NULL; remove_id: - ida_simple_remove(&thermal_tz_ida, tz->id); + ida_simple_remove(&thermal_tz_ida, id); free_tz: kfree(tz); return ERR_PTR(result); diff --git a/drivers/thermal/thermal_hwmon.c b/drivers/thermal/thermal_hwmon.c index 40c69a533b24..dd5d8ee37928 100644 --- a/drivers/thermal/thermal_hwmon.c +++ b/drivers/thermal/thermal_hwmon.c @@ -87,13 +87,17 @@ static struct thermal_hwmon_device * thermal_hwmon_lookup_by_type(const struct thermal_zone_device *tz) { struct thermal_hwmon_device *hwmon; + char type[THERMAL_NAME_LENGTH]; mutex_lock(&thermal_hwmon_list_lock); - list_for_each_entry(hwmon, &thermal_hwmon_list, node) - if (!strcmp(hwmon->type, tz->type)) { + list_for_each_entry(hwmon, &thermal_hwmon_list, node) { + strcpy(type, tz->type); + strreplace(type, '-', '_'); + if (!strcmp(hwmon->type, type)) { mutex_unlock(&thermal_hwmon_list_lock); return hwmon; } + } mutex_unlock(&thermal_hwmon_list_lock); return NULL; diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index a45f9e3e442b..58e7c100b6ad 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -477,13 +477,6 @@ config IXP4XX_WATCHDOG Say N if you are unsure. -config KS8695_WATCHDOG - tristate "KS8695 watchdog" - depends on ARCH_KS8695 - help - Watchdog timer embedded into KS8695 processor. This will reboot your - system when the timeout is reached. - config HAVE_S3C2410_WATCHDOG bool help @@ -662,15 +655,6 @@ config STMP3XXX_RTC_WATCHDOG To compile this driver as a module, choose M here: the module will be called stmp3xxx_rtc_wdt. -config NUC900_WATCHDOG - tristate "Nuvoton NUC900 watchdog" - depends on ARCH_W90X900 || COMPILE_TEST - help - Say Y here if to include support for the watchdog timer - for the Nuvoton NUC900 series SoCs. - To compile this driver as a module, choose M here: the - module will be called nuc900_wdt. - config TS4800_WATCHDOG tristate "TS-4800 Watchdog" depends on HAS_IOMEM && OF @@ -740,6 +724,19 @@ config IMX_SC_WDT To compile this driver as a module, choose M here: the module will be called imx_sc_wdt. +config IMX7ULP_WDT + tristate "IMX7ULP Watchdog" + depends on ARCH_MXC || COMPILE_TEST + select WATCHDOG_CORE + help + This is the driver for the hardware watchdog on the Freescale + IMX7ULP and later processors. If you have one of these + processors and wish to have watchdog support enabled, + say Y, otherwise say N. + + To compile this driver as a module, choose M here: the + module will be called imx7ulp_wdt. + config UX500_WATCHDOG tristate "ST-Ericsson Ux500 watchdog" depends on MFD_DB8500_PRCMU @@ -1046,8 +1043,8 @@ config F71808E_WDT depends on X86 help This is the driver for the hardware watchdog on the Fintek F71808E, - F71862FG, F71868, F71869, F71882FG, F71889FG, F81865 and F81866 - Super I/O controllers. + F71862FG, F71868, F71869, F71882FG, F71889FG, F81803, F81865, and + F81866 Super I/O controllers. You can compile this driver directly into the kernel, or use it as a module. The module will be called f71808e_wdt. diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index 7caa920e7e60..2ee352bf3372 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -49,7 +49,6 @@ obj-$(CONFIG_21285_WATCHDOG) += wdt285.o obj-$(CONFIG_977_WATCHDOG) += wdt977.o obj-$(CONFIG_FTWDT010_WATCHDOG) += ftwdt010_wdt.o obj-$(CONFIG_IXP4XX_WATCHDOG) += ixp4xx_wdt.o -obj-$(CONFIG_KS8695_WATCHDOG) += ks8695_wdt.o obj-$(CONFIG_S3C2410_WATCHDOG) += s3c2410_wdt.o obj-$(CONFIG_SA1100_WATCHDOG) += sa1100_wdt.o obj-$(CONFIG_SAMA5D4_WATCHDOG) += sama5d4_wdt.o @@ -64,11 +63,11 @@ obj-$(CONFIG_RN5T618_WATCHDOG) += rn5t618_wdt.o obj-$(CONFIG_COH901327_WATCHDOG) += coh901327_wdt.o obj-$(CONFIG_NPCM7XX_WATCHDOG) += npcm_wdt.o obj-$(CONFIG_STMP3XXX_RTC_WATCHDOG) += stmp3xxx_rtc_wdt.o -obj-$(CONFIG_NUC900_WATCHDOG) += nuc900_wdt.o obj-$(CONFIG_TS4800_WATCHDOG) += ts4800_wdt.o obj-$(CONFIG_TS72XX_WATCHDOG) += ts72xx_wdt.o obj-$(CONFIG_IMX2_WDT) += imx2_wdt.o obj-$(CONFIG_IMX_SC_WDT) += imx_sc_wdt.o +obj-$(CONFIG_IMX7ULP_WDT) += imx7ulp_wdt.o obj-$(CONFIG_UX500_WATCHDOG) += ux500_wdt.o obj-$(CONFIG_RETU_WATCHDOG) += retu_wdt.o obj-$(CONFIG_BCM2835_WDT) += bcm2835_wdt.o diff --git a/drivers/watchdog/aspeed_wdt.c b/drivers/watchdog/aspeed_wdt.c index cc71861e033a..4ec0906bf12c 100644 --- a/drivers/watchdog/aspeed_wdt.c +++ b/drivers/watchdog/aspeed_wdt.c @@ -34,6 +34,7 @@ static const struct aspeed_wdt_config ast2500_config = { static const struct of_device_id aspeed_wdt_of_table[] = { { .compatible = "aspeed,ast2400-wdt", .data = &ast2400_config }, { .compatible = "aspeed,ast2500-wdt", .data = &ast2500_config }, + { .compatible = "aspeed,ast2600-wdt", .data = &ast2500_config }, { }, }; MODULE_DEVICE_TABLE(of, aspeed_wdt_of_table); @@ -53,6 +54,8 @@ MODULE_DEVICE_TABLE(of, aspeed_wdt_of_table); #define WDT_CTRL_ENABLE BIT(0) #define WDT_TIMEOUT_STATUS 0x10 #define WDT_TIMEOUT_STATUS_BOOT_SECONDARY BIT(1) +#define WDT_CLEAR_TIMEOUT_STATUS 0x14 +#define WDT_CLEAR_TIMEOUT_AND_BOOT_CODE_SELECTION BIT(0) /* * WDT_RESET_WIDTH controls the characteristics of the external pulse (if @@ -165,6 +168,60 @@ static int aspeed_wdt_restart(struct watchdog_device *wdd, return 0; } +/* access_cs0 shows if cs0 is accessible, hence the reverted bit */ +static ssize_t access_cs0_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct aspeed_wdt *wdt = dev_get_drvdata(dev); + u32 status = readl(wdt->base + WDT_TIMEOUT_STATUS); + + return sprintf(buf, "%u\n", + !(status & WDT_TIMEOUT_STATUS_BOOT_SECONDARY)); +} + +static ssize_t access_cs0_store(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t size) +{ + struct aspeed_wdt *wdt = dev_get_drvdata(dev); + unsigned long val; + + if (kstrtoul(buf, 10, &val)) + return -EINVAL; + + if (val) + writel(WDT_CLEAR_TIMEOUT_AND_BOOT_CODE_SELECTION, + wdt->base + WDT_CLEAR_TIMEOUT_STATUS); + + return size; +} + +/* + * This attribute exists only if the system has booted from the alternate + * flash with 'alt-boot' option. + * + * At alternate flash the 'access_cs0' sysfs node provides: + * ast2400: a way to get access to the primary SPI flash chip at CS0 + * after booting from the alternate chip at CS1. + * ast2500: a way to restore the normal address mapping from + * (CS0->CS1, CS1->CS0) to (CS0->CS0, CS1->CS1). + * + * Clearing the boot code selection and timeout counter also resets to the + * initial state the chip select line mapping. When the SoC is in normal + * mapping state (i.e. booted from CS0), clearing those bits does nothing for + * both versions of the SoC. For alternate boot mode (booted from CS1 due to + * wdt2 expiration) the behavior differs as described above. + * + * This option can be used with wdt2 (watchdog1) only. + */ +static DEVICE_ATTR_RW(access_cs0); + +static struct attribute *bswitch_attrs[] = { + &dev_attr_access_cs0.attr, + NULL +}; +ATTRIBUTE_GROUPS(bswitch); + static const struct watchdog_ops aspeed_wdt_ops = { .start = aspeed_wdt_start, .stop = aspeed_wdt_stop, @@ -259,7 +316,8 @@ static int aspeed_wdt_probe(struct platform_device *pdev) set_bit(WDOG_HW_RUNNING, &wdt->wdd.status); } - if (of_device_is_compatible(np, "aspeed,ast2500-wdt")) { + if ((of_device_is_compatible(np, "aspeed,ast2500-wdt")) || + (of_device_is_compatible(np, "aspeed,ast2600-wdt"))) { u32 reg = readl(wdt->base + WDT_RESET_WIDTH); reg &= config->ext_pulse_width_mask; @@ -306,9 +364,16 @@ static int aspeed_wdt_probe(struct platform_device *pdev) } status = readl(wdt->base + WDT_TIMEOUT_STATUS); - if (status & WDT_TIMEOUT_STATUS_BOOT_SECONDARY) + if (status & WDT_TIMEOUT_STATUS_BOOT_SECONDARY) { wdt->wdd.bootstatus = WDIOF_CARDRESET; + if (of_device_is_compatible(np, "aspeed,ast2400-wdt") || + of_device_is_compatible(np, "aspeed,ast2500-wdt")) + wdt->wdd.groups = bswitch_groups; + } + + dev_set_drvdata(dev, wdt); + return devm_watchdog_register_device(dev, &wdt->wdd); } diff --git a/drivers/watchdog/ath79_wdt.c b/drivers/watchdog/ath79_wdt.c index 2e09981fe978..75de664ef4b0 100644 --- a/drivers/watchdog/ath79_wdt.c +++ b/drivers/watchdog/ath79_wdt.c @@ -302,7 +302,7 @@ static int ath79_wdt_remove(struct platform_device *pdev) return 0; } -static void ath97_wdt_shutdown(struct platform_device *pdev) +static void ath79_wdt_shutdown(struct platform_device *pdev) { ath79_wdt_disable(); } @@ -318,7 +318,7 @@ MODULE_DEVICE_TABLE(of, ath79_wdt_match); static struct platform_driver ath79_wdt_driver = { .probe = ath79_wdt_probe, .remove = ath79_wdt_remove, - .shutdown = ath97_wdt_shutdown, + .shutdown = ath79_wdt_shutdown, .driver = { .name = DRIVER_NAME, .of_match_table = of_match_ptr(ath79_wdt_match), diff --git a/drivers/watchdog/cpwd.c b/drivers/watchdog/cpwd.c index b973b31179df..9393be584e72 100644 --- a/drivers/watchdog/cpwd.c +++ b/drivers/watchdog/cpwd.c @@ -473,29 +473,6 @@ static long cpwd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return 0; } -static long cpwd_compat_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - int rval = -ENOIOCTLCMD; - - switch (cmd) { - /* solaris ioctls are specific to this driver */ - case WIOCSTART: - case WIOCSTOP: - case WIOCGSTAT: - mutex_lock(&cpwd_mutex); - rval = cpwd_ioctl(file, cmd, arg); - mutex_unlock(&cpwd_mutex); - break; - - /* everything else is handled by the generic compat layer */ - default: - break; - } - - return rval; -} - static ssize_t cpwd_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { @@ -520,7 +497,7 @@ static ssize_t cpwd_read(struct file *file, char __user *buffer, static const struct file_operations cpwd_fops = { .owner = THIS_MODULE, .unlocked_ioctl = cpwd_ioctl, - .compat_ioctl = cpwd_compat_ioctl, + .compat_ioctl = compat_ptr_ioctl, .open = cpwd_open, .write = cpwd_write, .read = cpwd_read, diff --git a/drivers/watchdog/diag288_wdt.c b/drivers/watchdog/diag288_wdt.c index 181440b7b4d0..aafc8d98bf9f 100644 --- a/drivers/watchdog/diag288_wdt.c +++ b/drivers/watchdog/diag288_wdt.c @@ -26,13 +26,11 @@ #include #include #include -#include #include #include #include #include #include -#include #define MAX_CMDLEN 240 #define DEFAULT_CMD "SYSTEM RESTART" @@ -70,7 +68,6 @@ MODULE_PARM_DESC(conceal, "Enable the CONCEAL CP option while the watchdog is ac module_param_named(nowayout, nowayout_info, bool, 0444); MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default = CONFIG_WATCHDOG_NOWAYOUT)"); -MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR); MODULE_ALIAS("vmwatchdog"); static int __diag288(unsigned int func, unsigned int timeout, diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c index ff5cf1b48a4d..e46104c2fd94 100644 --- a/drivers/watchdog/f71808e_wdt.c +++ b/drivers/watchdog/f71808e_wdt.c @@ -31,8 +31,10 @@ #define SIO_REG_DEVID 0x20 /* Device ID (2 bytes) */ #define SIO_REG_DEVREV 0x22 /* Device revision */ #define SIO_REG_MANID 0x23 /* Fintek ID (2 bytes) */ +#define SIO_REG_CLOCK_SEL 0x26 /* Clock select */ #define SIO_REG_ROM_ADDR_SEL 0x27 /* ROM address select */ #define SIO_F81866_REG_PORT_SEL 0x27 /* F81866 Multi-Function Register */ +#define SIO_REG_TSI_LEVEL_SEL 0x28 /* TSI Level select */ #define SIO_REG_MFUNCT1 0x29 /* Multi function select 1 */ #define SIO_REG_MFUNCT2 0x2a /* Multi function select 2 */ #define SIO_REG_MFUNCT3 0x2b /* Multi function select 3 */ @@ -49,6 +51,7 @@ #define SIO_F71869A_ID 0x1007 /* Chipset ID */ #define SIO_F71882_ID 0x0541 /* Chipset ID */ #define SIO_F71889_ID 0x0723 /* Chipset ID */ +#define SIO_F81803_ID 0x1210 /* Chipset ID */ #define SIO_F81865_ID 0x0704 /* Chipset ID */ #define SIO_F81866_ID 0x1010 /* Chipset ID */ @@ -108,7 +111,7 @@ MODULE_PARM_DESC(start_withtimeout, "Start watchdog timer on module load with" " given initial timeout. Zero (default) disables this feature."); enum chips { f71808fg, f71858fg, f71862fg, f71868, f71869, f71882fg, f71889fg, - f81865, f81866}; + f81803, f81865, f81866}; static const char *f71808e_names[] = { "f71808fg", @@ -118,6 +121,7 @@ static const char *f71808e_names[] = { "f71869", "f71882fg", "f71889fg", + "f81803", "f81865", "f81866", }; @@ -370,6 +374,14 @@ static int watchdog_start(void) superio_inb(watchdog.sioaddr, SIO_REG_MFUNCT3) & 0xcf); break; + case f81803: + /* Enable TSI Level register bank */ + superio_clear_bit(watchdog.sioaddr, SIO_REG_CLOCK_SEL, 3); + /* Set pin 27 to WDTRST# */ + superio_outb(watchdog.sioaddr, SIO_REG_TSI_LEVEL_SEL, 0x5f & + superio_inb(watchdog.sioaddr, SIO_REG_TSI_LEVEL_SEL)); + break; + case f81865: /* Set pin 70 to WDTRST# */ superio_clear_bit(watchdog.sioaddr, SIO_REG_MFUNCT3, 5); @@ -809,6 +821,9 @@ static int __init f71808e_find(int sioaddr) /* Confirmed (by datasheet) not to have a watchdog. */ err = -ENODEV; goto exit; + case SIO_F81803_ID: + watchdog.type = f81803; + break; case SIO_F81865_ID: watchdog.type = f81865; break; diff --git a/drivers/watchdog/imx2_wdt.c b/drivers/watchdog/imx2_wdt.c index 32af3974e6bb..8d019a961ccc 100644 --- a/drivers/watchdog/imx2_wdt.c +++ b/drivers/watchdog/imx2_wdt.c @@ -55,7 +55,7 @@ #define IMX2_WDT_WMCR 0x08 /* Misc Register */ -#define IMX2_WDT_MAX_TIME 128 +#define IMX2_WDT_MAX_TIME 128U #define IMX2_WDT_DEFAULT_TIME 60 /* in seconds */ #define WDOG_SEC_TO_COUNT(s) ((s * 2 - 1) << 8) @@ -180,7 +180,7 @@ static int imx2_wdt_set_timeout(struct watchdog_device *wdog, { unsigned int actual; - actual = min(new_timeout, wdog->max_hw_heartbeat_ms * 1000); + actual = min(new_timeout, IMX2_WDT_MAX_TIME); __imx2_wdt_set_timeout(wdog, actual); wdog->timeout = new_timeout; return 0; diff --git a/drivers/watchdog/imx7ulp_wdt.c b/drivers/watchdog/imx7ulp_wdt.c new file mode 100644 index 000000000000..5ce51026989a --- /dev/null +++ b/drivers/watchdog/imx7ulp_wdt.c @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 NXP. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define WDOG_CS 0x0 +#define WDOG_CS_CMD32EN BIT(13) +#define WDOG_CS_ULK BIT(11) +#define WDOG_CS_RCS BIT(10) +#define WDOG_CS_EN BIT(7) +#define WDOG_CS_UPDATE BIT(5) + +#define WDOG_CNT 0x4 +#define WDOG_TOVAL 0x8 + +#define REFRESH_SEQ0 0xA602 +#define REFRESH_SEQ1 0xB480 +#define REFRESH ((REFRESH_SEQ1 << 16) | REFRESH_SEQ0) + +#define UNLOCK_SEQ0 0xC520 +#define UNLOCK_SEQ1 0xD928 +#define UNLOCK ((UNLOCK_SEQ1 << 16) | UNLOCK_SEQ0) + +#define DEFAULT_TIMEOUT 60 +#define MAX_TIMEOUT 128 +#define WDOG_CLOCK_RATE 1000 + +static bool nowayout = WATCHDOG_NOWAYOUT; +module_param(nowayout, bool, 0000); +MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" + __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); + +struct imx7ulp_wdt_device { + struct notifier_block restart_handler; + struct watchdog_device wdd; + void __iomem *base; + struct clk *clk; +}; + +static inline void imx7ulp_wdt_enable(void __iomem *base, bool enable) +{ + u32 val = readl(base + WDOG_CS); + + writel(UNLOCK, base + WDOG_CNT); + if (enable) + writel(val | WDOG_CS_EN, base + WDOG_CS); + else + writel(val & ~WDOG_CS_EN, base + WDOG_CS); +} + +static inline bool imx7ulp_wdt_is_enabled(void __iomem *base) +{ + u32 val = readl(base + WDOG_CS); + + return val & WDOG_CS_EN; +} + +static int imx7ulp_wdt_ping(struct watchdog_device *wdog) +{ + struct imx7ulp_wdt_device *wdt = watchdog_get_drvdata(wdog); + + writel(REFRESH, wdt->base + WDOG_CNT); + + return 0; +} + +static int imx7ulp_wdt_start(struct watchdog_device *wdog) +{ + struct imx7ulp_wdt_device *wdt = watchdog_get_drvdata(wdog); + + imx7ulp_wdt_enable(wdt->base, true); + + return 0; +} + +static int imx7ulp_wdt_stop(struct watchdog_device *wdog) +{ + struct imx7ulp_wdt_device *wdt = watchdog_get_drvdata(wdog); + + imx7ulp_wdt_enable(wdt->base, false); + + return 0; +} + +static int imx7ulp_wdt_set_timeout(struct watchdog_device *wdog, + unsigned int timeout) +{ + struct imx7ulp_wdt_device *wdt = watchdog_get_drvdata(wdog); + u32 val = WDOG_CLOCK_RATE * timeout; + + writel(UNLOCK, wdt->base + WDOG_CNT); + writel(val, wdt->base + WDOG_TOVAL); + + wdog->timeout = timeout; + + return 0; +} + +static const struct watchdog_ops imx7ulp_wdt_ops = { + .owner = THIS_MODULE, + .start = imx7ulp_wdt_start, + .stop = imx7ulp_wdt_stop, + .ping = imx7ulp_wdt_ping, + .set_timeout = imx7ulp_wdt_set_timeout, +}; + +static const struct watchdog_info imx7ulp_wdt_info = { + .identity = "i.MX7ULP watchdog timer", + .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | + WDIOF_MAGICCLOSE, +}; + +static inline void imx7ulp_wdt_init(void __iomem *base, unsigned int timeout) +{ + u32 val; + + /* unlock the wdog for reconfiguration */ + writel_relaxed(UNLOCK_SEQ0, base + WDOG_CNT); + writel_relaxed(UNLOCK_SEQ1, base + WDOG_CNT); + + /* set an initial timeout value in TOVAL */ + writel(timeout, base + WDOG_TOVAL); + /* enable 32bit command sequence and reconfigure */ + val = BIT(13) | BIT(8) | BIT(5); + writel(val, base + WDOG_CS); +} + +static void imx7ulp_wdt_action(void *data) +{ + clk_disable_unprepare(data); +} + +static int imx7ulp_wdt_probe(struct platform_device *pdev) +{ + struct imx7ulp_wdt_device *imx7ulp_wdt; + struct device *dev = &pdev->dev; + struct watchdog_device *wdog; + int ret; + + imx7ulp_wdt = devm_kzalloc(dev, sizeof(*imx7ulp_wdt), GFP_KERNEL); + if (!imx7ulp_wdt) + return -ENOMEM; + + platform_set_drvdata(pdev, imx7ulp_wdt); + + imx7ulp_wdt->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(imx7ulp_wdt->base)) + return PTR_ERR(imx7ulp_wdt->base); + + imx7ulp_wdt->clk = devm_clk_get(dev, NULL); + if (IS_ERR(imx7ulp_wdt->clk)) { + dev_err(dev, "Failed to get watchdog clock\n"); + return PTR_ERR(imx7ulp_wdt->clk); + } + + ret = clk_prepare_enable(imx7ulp_wdt->clk); + if (ret) + return ret; + + ret = devm_add_action_or_reset(dev, imx7ulp_wdt_action, imx7ulp_wdt->clk); + if (ret) + return ret; + + wdog = &imx7ulp_wdt->wdd; + wdog->info = &imx7ulp_wdt_info; + wdog->ops = &imx7ulp_wdt_ops; + wdog->min_timeout = 1; + wdog->max_timeout = MAX_TIMEOUT; + wdog->parent = dev; + wdog->timeout = DEFAULT_TIMEOUT; + + watchdog_init_timeout(wdog, 0, dev); + watchdog_stop_on_reboot(wdog); + watchdog_stop_on_unregister(wdog); + watchdog_set_drvdata(wdog, imx7ulp_wdt); + imx7ulp_wdt_init(imx7ulp_wdt->base, wdog->timeout * WDOG_CLOCK_RATE); + + return devm_watchdog_register_device(dev, wdog); +} + +static int __maybe_unused imx7ulp_wdt_suspend(struct device *dev) +{ + struct imx7ulp_wdt_device *imx7ulp_wdt = dev_get_drvdata(dev); + + if (watchdog_active(&imx7ulp_wdt->wdd)) + imx7ulp_wdt_stop(&imx7ulp_wdt->wdd); + + clk_disable_unprepare(imx7ulp_wdt->clk); + + return 0; +} + +static int __maybe_unused imx7ulp_wdt_resume(struct device *dev) +{ + struct imx7ulp_wdt_device *imx7ulp_wdt = dev_get_drvdata(dev); + u32 timeout = imx7ulp_wdt->wdd.timeout * WDOG_CLOCK_RATE; + int ret; + + ret = clk_prepare_enable(imx7ulp_wdt->clk); + if (ret) + return ret; + + if (imx7ulp_wdt_is_enabled(imx7ulp_wdt->base)) + imx7ulp_wdt_init(imx7ulp_wdt->base, timeout); + + if (watchdog_active(&imx7ulp_wdt->wdd)) + imx7ulp_wdt_start(&imx7ulp_wdt->wdd); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(imx7ulp_wdt_pm_ops, imx7ulp_wdt_suspend, + imx7ulp_wdt_resume); + +static const struct of_device_id imx7ulp_wdt_dt_ids[] = { + { .compatible = "fsl,imx7ulp-wdt", }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, imx7ulp_wdt_dt_ids); + +static struct platform_driver imx7ulp_wdt_driver = { + .probe = imx7ulp_wdt_probe, + .driver = { + .name = "imx7ulp-wdt", + .pm = &imx7ulp_wdt_pm_ops, + .of_match_table = imx7ulp_wdt_dt_ids, + }, +}; +module_platform_driver(imx7ulp_wdt_driver); + +MODULE_AUTHOR("Anson Huang "); +MODULE_DESCRIPTION("Freescale i.MX7ULP watchdog driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/watchdog/imx_sc_wdt.c b/drivers/watchdog/imx_sc_wdt.c index 78eaaf75a263..7ea5cf54e94a 100644 --- a/drivers/watchdog/imx_sc_wdt.c +++ b/drivers/watchdog/imx_sc_wdt.c @@ -175,12 +175,9 @@ static int imx_sc_wdt_probe(struct platform_device *pdev) watchdog_stop_on_unregister(wdog); ret = devm_watchdog_register_device(dev, wdog); - - if (ret) { - dev_err(dev, "Failed to register watchdog device\n"); - return ret; - } - + if (ret) + return ret; + ret = imx_scu_irq_group_enable(SC_IRQ_GROUP_WDOG, SC_IRQ_WDOG, true); diff --git a/drivers/watchdog/jz4740_wdt.c b/drivers/watchdog/jz4740_wdt.c index d4a90916dd38..c6052ae54f32 100644 --- a/drivers/watchdog/jz4740_wdt.c +++ b/drivers/watchdog/jz4740_wdt.c @@ -162,7 +162,6 @@ static int jz4740_wdt_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct jz4740_wdt_drvdata *drvdata; struct watchdog_device *jz4740_wdt; - int ret; drvdata = devm_kzalloc(dev, sizeof(struct jz4740_wdt_drvdata), GFP_KERNEL); diff --git a/drivers/watchdog/ks8695_wdt.c b/drivers/watchdog/ks8695_wdt.c deleted file mode 100644 index 1550ce3c5702..000000000000 --- a/drivers/watchdog/ks8695_wdt.c +++ /dev/null @@ -1,319 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Watchdog driver for Kendin/Micrel KS8695. - * - * (C) 2007 Andrew Victor - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define KS8695_TMR_OFFSET (0xF0000 + 0xE400) -#define KS8695_TMR_VA (KS8695_IO_VA + KS8695_TMR_OFFSET) - -/* - * Timer registers - */ -#define KS8695_TMCON (0x00) /* Timer Control Register */ -#define KS8695_T0TC (0x08) /* Timer 0 Timeout Count Register */ -#define TMCON_T0EN (1 << 0) /* Timer 0 Enable */ - -/* Timer0 Timeout Counter Register */ -#define T0TC_WATCHDOG (0xff) /* Enable watchdog mode */ - -#define WDT_DEFAULT_TIME 5 /* seconds */ -#define WDT_MAX_TIME 171 /* seconds */ - -static int wdt_time = WDT_DEFAULT_TIME; -static bool nowayout = WATCHDOG_NOWAYOUT; - -module_param(wdt_time, int, 0); -MODULE_PARM_DESC(wdt_time, "Watchdog time in seconds. (default=" - __MODULE_STRING(WDT_DEFAULT_TIME) ")"); - -#ifdef CONFIG_WATCHDOG_NOWAYOUT -module_param(nowayout, bool, 0); -MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" - __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); -#endif - - -static unsigned long ks8695wdt_busy; -static DEFINE_SPINLOCK(ks8695_lock); - -/* ......................................................................... */ - -/* - * Disable the watchdog. - */ -static inline void ks8695_wdt_stop(void) -{ - unsigned long tmcon; - - spin_lock(&ks8695_lock); - /* disable timer0 */ - tmcon = __raw_readl(KS8695_TMR_VA + KS8695_TMCON); - __raw_writel(tmcon & ~TMCON_T0EN, KS8695_TMR_VA + KS8695_TMCON); - spin_unlock(&ks8695_lock); -} - -/* - * Enable and reset the watchdog. - */ -static inline void ks8695_wdt_start(void) -{ - unsigned long tmcon; - unsigned long tval = wdt_time * KS8695_CLOCK_RATE; - - spin_lock(&ks8695_lock); - /* disable timer0 */ - tmcon = __raw_readl(KS8695_TMR_VA + KS8695_TMCON); - __raw_writel(tmcon & ~TMCON_T0EN, KS8695_TMR_VA + KS8695_TMCON); - - /* program timer0 */ - __raw_writel(tval | T0TC_WATCHDOG, KS8695_TMR_VA + KS8695_T0TC); - - /* re-enable timer0 */ - tmcon = __raw_readl(KS8695_TMR_VA + KS8695_TMCON); - __raw_writel(tmcon | TMCON_T0EN, KS8695_TMR_VA + KS8695_TMCON); - spin_unlock(&ks8695_lock); -} - -/* - * Reload the watchdog timer. (ie, pat the watchdog) - */ -static inline void ks8695_wdt_reload(void) -{ - unsigned long tmcon; - - spin_lock(&ks8695_lock); - /* disable, then re-enable timer0 */ - tmcon = __raw_readl(KS8695_TMR_VA + KS8695_TMCON); - __raw_writel(tmcon & ~TMCON_T0EN, KS8695_TMR_VA + KS8695_TMCON); - __raw_writel(tmcon | TMCON_T0EN, KS8695_TMR_VA + KS8695_TMCON); - spin_unlock(&ks8695_lock); -} - -/* - * Change the watchdog time interval. - */ -static int ks8695_wdt_settimeout(int new_time) -{ - /* - * All counting occurs at KS8695_CLOCK_RATE / 128 = 0.256 Hz - * - * Since WDV is a 16-bit counter, the maximum period is - * 65536 / 0.256 = 256 seconds. - */ - if ((new_time <= 0) || (new_time > WDT_MAX_TIME)) - return -EINVAL; - - /* Set new watchdog time. It will be used when - ks8695_wdt_start() is called. */ - wdt_time = new_time; - return 0; -} - -/* ......................................................................... */ - -/* - * Watchdog device is opened, and watchdog starts running. - */ -static int ks8695_wdt_open(struct inode *inode, struct file *file) -{ - if (test_and_set_bit(0, &ks8695wdt_busy)) - return -EBUSY; - - ks8695_wdt_start(); - return stream_open(inode, file); -} - -/* - * Close the watchdog device. - * If CONFIG_WATCHDOG_NOWAYOUT is NOT defined then the watchdog is also - * disabled. - */ -static int ks8695_wdt_close(struct inode *inode, struct file *file) -{ - /* Disable the watchdog when file is closed */ - if (!nowayout) - ks8695_wdt_stop(); - clear_bit(0, &ks8695wdt_busy); - return 0; -} - -static const struct watchdog_info ks8695_wdt_info = { - .identity = "ks8695 watchdog", - .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING, -}; - -/* - * Handle commands from user-space. - */ -static long ks8695_wdt_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - void __user *argp = (void __user *)arg; - int __user *p = argp; - int new_value; - - switch (cmd) { - case WDIOC_GETSUPPORT: - return copy_to_user(argp, &ks8695_wdt_info, - sizeof(ks8695_wdt_info)) ? -EFAULT : 0; - case WDIOC_GETSTATUS: - case WDIOC_GETBOOTSTATUS: - return put_user(0, p); - case WDIOC_SETOPTIONS: - if (get_user(new_value, p)) - return -EFAULT; - if (new_value & WDIOS_DISABLECARD) - ks8695_wdt_stop(); - if (new_value & WDIOS_ENABLECARD) - ks8695_wdt_start(); - return 0; - case WDIOC_KEEPALIVE: - ks8695_wdt_reload(); /* pat the watchdog */ - return 0; - case WDIOC_SETTIMEOUT: - if (get_user(new_value, p)) - return -EFAULT; - if (ks8695_wdt_settimeout(new_value)) - return -EINVAL; - /* Enable new time value */ - ks8695_wdt_start(); - /* Return current value */ - return put_user(wdt_time, p); - case WDIOC_GETTIMEOUT: - return put_user(wdt_time, p); - default: - return -ENOTTY; - } -} - -/* - * Pat the watchdog whenever device is written to. - */ -static ssize_t ks8695_wdt_write(struct file *file, const char *data, - size_t len, loff_t *ppos) -{ - ks8695_wdt_reload(); /* pat the watchdog */ - return len; -} - -/* ......................................................................... */ - -static const struct file_operations ks8695wdt_fops = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .unlocked_ioctl = ks8695_wdt_ioctl, - .open = ks8695_wdt_open, - .release = ks8695_wdt_close, - .write = ks8695_wdt_write, -}; - -static struct miscdevice ks8695wdt_miscdev = { - .minor = WATCHDOG_MINOR, - .name = "watchdog", - .fops = &ks8695wdt_fops, -}; - -static int ks8695wdt_probe(struct platform_device *pdev) -{ - int res; - - if (ks8695wdt_miscdev.parent) - return -EBUSY; - ks8695wdt_miscdev.parent = &pdev->dev; - - res = misc_register(&ks8695wdt_miscdev); - if (res) - return res; - - pr_info("KS8695 Watchdog Timer enabled (%d seconds%s)\n", - wdt_time, nowayout ? ", nowayout" : ""); - return 0; -} - -static int ks8695wdt_remove(struct platform_device *pdev) -{ - misc_deregister(&ks8695wdt_miscdev); - ks8695wdt_miscdev.parent = NULL; - - return 0; -} - -static void ks8695wdt_shutdown(struct platform_device *pdev) -{ - ks8695_wdt_stop(); -} - -#ifdef CONFIG_PM - -static int ks8695wdt_suspend(struct platform_device *pdev, pm_message_t message) -{ - ks8695_wdt_stop(); - return 0; -} - -static int ks8695wdt_resume(struct platform_device *pdev) -{ - if (ks8695wdt_busy) - ks8695_wdt_start(); - return 0; -} - -#else -#define ks8695wdt_suspend NULL -#define ks8695wdt_resume NULL -#endif - -static struct platform_driver ks8695wdt_driver = { - .probe = ks8695wdt_probe, - .remove = ks8695wdt_remove, - .shutdown = ks8695wdt_shutdown, - .suspend = ks8695wdt_suspend, - .resume = ks8695wdt_resume, - .driver = { - .name = "ks8695_wdt", - }, -}; - -static int __init ks8695_wdt_init(void) -{ - /* Check that the heartbeat value is within range; - if not reset to the default */ - if (ks8695_wdt_settimeout(wdt_time)) { - ks8695_wdt_settimeout(WDT_DEFAULT_TIME); - pr_info("ks8695_wdt: wdt_time value must be 1 <= wdt_time <= %i" - ", using %d\n", wdt_time, WDT_MAX_TIME); - } - return platform_driver_register(&ks8695wdt_driver); -} - -static void __exit ks8695_wdt_exit(void) -{ - platform_driver_unregister(&ks8695wdt_driver); -} - -module_init(ks8695_wdt_init); -module_exit(ks8695_wdt_exit); - -MODULE_AUTHOR("Andrew Victor"); -MODULE_DESCRIPTION("Watchdog driver for KS8695"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:ks8695_wdt"); diff --git a/drivers/watchdog/nuc900_wdt.c b/drivers/watchdog/nuc900_wdt.c deleted file mode 100644 index db124cebe838..000000000000 --- a/drivers/watchdog/nuc900_wdt.c +++ /dev/null @@ -1,302 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (c) 2009 Nuvoton technology corporation. - * - * Wan ZongShun - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define REG_WTCR 0x1c -#define WTCLK (0x01 << 10) -#define WTE (0x01 << 7) /*wdt enable*/ -#define WTIS (0x03 << 4) -#define WTIF (0x01 << 3) -#define WTRF (0x01 << 2) -#define WTRE (0x01 << 1) -#define WTR (0x01 << 0) -/* - * The watchdog time interval can be calculated via following formula: - * WTIS real time interval (formula) - * 0x00 ((2^ 14 ) * ((external crystal freq) / 256))seconds - * 0x01 ((2^ 16 ) * ((external crystal freq) / 256))seconds - * 0x02 ((2^ 18 ) * ((external crystal freq) / 256))seconds - * 0x03 ((2^ 20 ) * ((external crystal freq) / 256))seconds - * - * The external crystal freq is 15Mhz in the nuc900 evaluation board. - * So 0x00 = +-0.28 seconds, 0x01 = +-1.12 seconds, 0x02 = +-4.48 seconds, - * 0x03 = +- 16.92 seconds.. - */ -#define WDT_HW_TIMEOUT 0x02 -#define WDT_TIMEOUT (HZ/2) -#define WDT_HEARTBEAT 15 - -static int heartbeat = WDT_HEARTBEAT; -module_param(heartbeat, int, 0); -MODULE_PARM_DESC(heartbeat, "Watchdog heartbeats in seconds. " - "(default = " __MODULE_STRING(WDT_HEARTBEAT) ")"); - -static bool nowayout = WATCHDOG_NOWAYOUT; -module_param(nowayout, bool, 0); -MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started " - "(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); - -struct nuc900_wdt { - struct clk *wdt_clock; - struct platform_device *pdev; - void __iomem *wdt_base; - char expect_close; - struct timer_list timer; - spinlock_t wdt_lock; - unsigned long next_heartbeat; -}; - -static unsigned long nuc900wdt_busy; -static struct nuc900_wdt *nuc900_wdt; - -static inline void nuc900_wdt_keepalive(void) -{ - unsigned int val; - - spin_lock(&nuc900_wdt->wdt_lock); - - val = __raw_readl(nuc900_wdt->wdt_base + REG_WTCR); - val |= (WTR | WTIF); - __raw_writel(val, nuc900_wdt->wdt_base + REG_WTCR); - - spin_unlock(&nuc900_wdt->wdt_lock); -} - -static inline void nuc900_wdt_start(void) -{ - unsigned int val; - - spin_lock(&nuc900_wdt->wdt_lock); - - val = __raw_readl(nuc900_wdt->wdt_base + REG_WTCR); - val |= (WTRE | WTE | WTR | WTCLK | WTIF); - val &= ~WTIS; - val |= (WDT_HW_TIMEOUT << 0x04); - __raw_writel(val, nuc900_wdt->wdt_base + REG_WTCR); - - spin_unlock(&nuc900_wdt->wdt_lock); - - nuc900_wdt->next_heartbeat = jiffies + heartbeat * HZ; - mod_timer(&nuc900_wdt->timer, jiffies + WDT_TIMEOUT); -} - -static inline void nuc900_wdt_stop(void) -{ - unsigned int val; - - del_timer(&nuc900_wdt->timer); - - spin_lock(&nuc900_wdt->wdt_lock); - - val = __raw_readl(nuc900_wdt->wdt_base + REG_WTCR); - val &= ~WTE; - __raw_writel(val, nuc900_wdt->wdt_base + REG_WTCR); - - spin_unlock(&nuc900_wdt->wdt_lock); -} - -static inline void nuc900_wdt_ping(void) -{ - nuc900_wdt->next_heartbeat = jiffies + heartbeat * HZ; -} - -static int nuc900_wdt_open(struct inode *inode, struct file *file) -{ - - if (test_and_set_bit(0, &nuc900wdt_busy)) - return -EBUSY; - - nuc900_wdt_start(); - - return stream_open(inode, file); -} - -static int nuc900_wdt_close(struct inode *inode, struct file *file) -{ - if (nuc900_wdt->expect_close == 42) - nuc900_wdt_stop(); - else { - dev_crit(&nuc900_wdt->pdev->dev, - "Unexpected close, not stopping watchdog!\n"); - nuc900_wdt_ping(); - } - - nuc900_wdt->expect_close = 0; - clear_bit(0, &nuc900wdt_busy); - return 0; -} - -static const struct watchdog_info nuc900_wdt_info = { - .identity = "nuc900 watchdog", - .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | - WDIOF_MAGICCLOSE, -}; - -static long nuc900_wdt_ioctl(struct file *file, - unsigned int cmd, unsigned long arg) -{ - void __user *argp = (void __user *)arg; - int __user *p = argp; - int new_value; - - switch (cmd) { - case WDIOC_GETSUPPORT: - return copy_to_user(argp, &nuc900_wdt_info, - sizeof(nuc900_wdt_info)) ? -EFAULT : 0; - case WDIOC_GETSTATUS: - case WDIOC_GETBOOTSTATUS: - return put_user(0, p); - - case WDIOC_KEEPALIVE: - nuc900_wdt_ping(); - return 0; - - case WDIOC_SETTIMEOUT: - if (get_user(new_value, p)) - return -EFAULT; - - heartbeat = new_value; - nuc900_wdt_ping(); - - return put_user(new_value, p); - case WDIOC_GETTIMEOUT: - return put_user(heartbeat, p); - default: - return -ENOTTY; - } -} - -static ssize_t nuc900_wdt_write(struct file *file, const char __user *data, - size_t len, loff_t *ppos) -{ - if (!len) - return 0; - - /* Scan for magic character */ - if (!nowayout) { - size_t i; - - nuc900_wdt->expect_close = 0; - - for (i = 0; i < len; i++) { - char c; - if (get_user(c, data + i)) - return -EFAULT; - if (c == 'V') { - nuc900_wdt->expect_close = 42; - break; - } - } - } - - nuc900_wdt_ping(); - return len; -} - -static void nuc900_wdt_timer_ping(struct timer_list *unused) -{ - if (time_before(jiffies, nuc900_wdt->next_heartbeat)) { - nuc900_wdt_keepalive(); - mod_timer(&nuc900_wdt->timer, jiffies + WDT_TIMEOUT); - } else - dev_warn(&nuc900_wdt->pdev->dev, "Will reset the machine !\n"); -} - -static const struct file_operations nuc900wdt_fops = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .unlocked_ioctl = nuc900_wdt_ioctl, - .open = nuc900_wdt_open, - .release = nuc900_wdt_close, - .write = nuc900_wdt_write, -}; - -static struct miscdevice nuc900wdt_miscdev = { - .minor = WATCHDOG_MINOR, - .name = "watchdog", - .fops = &nuc900wdt_fops, -}; - -static int nuc900wdt_probe(struct platform_device *pdev) -{ - int ret = 0; - - nuc900_wdt = devm_kzalloc(&pdev->dev, sizeof(*nuc900_wdt), - GFP_KERNEL); - if (!nuc900_wdt) - return -ENOMEM; - - nuc900_wdt->pdev = pdev; - - spin_lock_init(&nuc900_wdt->wdt_lock); - - nuc900_wdt->wdt_base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(nuc900_wdt->wdt_base)) - return PTR_ERR(nuc900_wdt->wdt_base); - - nuc900_wdt->wdt_clock = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(nuc900_wdt->wdt_clock)) { - dev_err(&pdev->dev, "failed to find watchdog clock source\n"); - return PTR_ERR(nuc900_wdt->wdt_clock); - } - - clk_enable(nuc900_wdt->wdt_clock); - - timer_setup(&nuc900_wdt->timer, nuc900_wdt_timer_ping, 0); - - ret = misc_register(&nuc900wdt_miscdev); - if (ret) { - dev_err(&pdev->dev, "err register miscdev on minor=%d (%d)\n", - WATCHDOG_MINOR, ret); - goto err_clk; - } - - return 0; - -err_clk: - clk_disable(nuc900_wdt->wdt_clock); - return ret; -} - -static int nuc900wdt_remove(struct platform_device *pdev) -{ - misc_deregister(&nuc900wdt_miscdev); - - clk_disable(nuc900_wdt->wdt_clock); - - return 0; -} - -static struct platform_driver nuc900wdt_driver = { - .probe = nuc900wdt_probe, - .remove = nuc900wdt_remove, - .driver = { - .name = "nuc900-wdt", - }, -}; - -module_platform_driver(nuc900wdt_driver); - -MODULE_AUTHOR("Wan ZongShun "); -MODULE_DESCRIPTION("Watchdog driver for NUC900"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:nuc900-wdt"); diff --git a/drivers/watchdog/orion_wdt.c b/drivers/watchdog/orion_wdt.c index cdb0d174c5e2..1cccf8eb1c5d 100644 --- a/drivers/watchdog/orion_wdt.c +++ b/drivers/watchdog/orion_wdt.c @@ -35,7 +35,15 @@ * Watchdog timer block registers. */ #define TIMER_CTRL 0x0000 -#define TIMER_A370_STATUS 0x04 +#define TIMER1_FIXED_ENABLE_BIT BIT(12) +#define WDT_AXP_FIXED_ENABLE_BIT BIT(10) +#define TIMER1_ENABLE_BIT BIT(2) + +#define TIMER_A370_STATUS 0x0004 +#define WDT_A370_EXPIRED BIT(31) +#define TIMER1_STATUS_BIT BIT(8) + +#define TIMER1_VAL_OFF 0x001c #define WDT_MAX_CYCLE_COUNT 0xffffffff @@ -43,9 +51,6 @@ #define WDT_A370_RATIO_SHIFT 5 #define WDT_A370_RATIO (1 << WDT_A370_RATIO_SHIFT) -#define WDT_AXP_FIXED_ENABLE_BIT BIT(10) -#define WDT_A370_EXPIRED BIT(31) - static bool nowayout = WATCHDOG_NOWAYOUT; static int heartbeat = -1; /* module parameter (seconds) */ @@ -158,6 +163,7 @@ static int armadaxp_wdt_clock_init(struct platform_device *pdev, struct orion_watchdog *dev) { int ret; + u32 val; dev->clk = of_clk_get_by_name(pdev->dev.of_node, "fixed"); if (IS_ERR(dev->clk)) @@ -168,10 +174,9 @@ static int armadaxp_wdt_clock_init(struct platform_device *pdev, return ret; } - /* Enable the fixed watchdog clock input */ - atomic_io_modify(dev->reg + TIMER_CTRL, - WDT_AXP_FIXED_ENABLE_BIT, - WDT_AXP_FIXED_ENABLE_BIT); + /* Fix the wdt and timer1 clock freqency to 25MHz */ + val = WDT_AXP_FIXED_ENABLE_BIT | TIMER1_FIXED_ENABLE_BIT; + atomic_io_modify(dev->reg + TIMER_CTRL, val, val); dev->clk_rate = clk_get_rate(dev->clk); return 0; @@ -183,6 +188,10 @@ static int orion_wdt_ping(struct watchdog_device *wdt_dev) /* Reload watchdog duration */ writel(dev->clk_rate * wdt_dev->timeout, dev->reg + dev->data->wdt_counter_offset); + if (dev->wdt.info->options & WDIOF_PRETIMEOUT) + writel(dev->clk_rate * (wdt_dev->timeout - wdt_dev->pretimeout), + dev->reg + TIMER1_VAL_OFF); + return 0; } @@ -194,13 +203,18 @@ static int armada375_start(struct watchdog_device *wdt_dev) /* Set watchdog duration */ writel(dev->clk_rate * wdt_dev->timeout, dev->reg + dev->data->wdt_counter_offset); + if (dev->wdt.info->options & WDIOF_PRETIMEOUT) + writel(dev->clk_rate * (wdt_dev->timeout - wdt_dev->pretimeout), + dev->reg + TIMER1_VAL_OFF); /* Clear the watchdog expiration bit */ atomic_io_modify(dev->reg + TIMER_A370_STATUS, WDT_A370_EXPIRED, 0); /* Enable watchdog timer */ - atomic_io_modify(dev->reg + TIMER_CTRL, dev->data->wdt_enable_bit, - dev->data->wdt_enable_bit); + reg = dev->data->wdt_enable_bit; + if (dev->wdt.info->options & WDIOF_PRETIMEOUT) + reg |= TIMER1_ENABLE_BIT; + atomic_io_modify(dev->reg + TIMER_CTRL, reg, reg); /* Enable reset on watchdog */ reg = readl(dev->rstout); @@ -277,7 +291,7 @@ static int orion_stop(struct watchdog_device *wdt_dev) static int armada375_stop(struct watchdog_device *wdt_dev) { struct orion_watchdog *dev = watchdog_get_drvdata(wdt_dev); - u32 reg; + u32 reg, mask; /* Disable reset on watchdog */ atomic_io_modify(dev->rstout_mask, dev->data->rstout_mask_bit, @@ -287,7 +301,10 @@ static int armada375_stop(struct watchdog_device *wdt_dev) writel(reg, dev->rstout); /* Disable watchdog timer */ - atomic_io_modify(dev->reg + TIMER_CTRL, dev->data->wdt_enable_bit, 0); + mask = dev->data->wdt_enable_bit; + if (wdt_dev->info->options & WDIOF_PRETIMEOUT) + mask |= TIMER1_ENABLE_BIT; + atomic_io_modify(dev->reg + TIMER_CTRL, mask, 0); return 0; } @@ -349,7 +366,7 @@ static unsigned int orion_wdt_get_timeleft(struct watchdog_device *wdt_dev) return readl(dev->reg + dev->data->wdt_counter_offset) / dev->clk_rate; } -static const struct watchdog_info orion_wdt_info = { +static struct watchdog_info orion_wdt_info = { .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE, .identity = "Orion Watchdog", }; @@ -368,6 +385,16 @@ static irqreturn_t orion_wdt_irq(int irq, void *devid) return IRQ_HANDLED; } +static irqreturn_t orion_wdt_pre_irq(int irq, void *devid) +{ + struct orion_watchdog *dev = devid; + + atomic_io_modify(dev->reg + TIMER_A370_STATUS, + TIMER1_STATUS_BIT, 0); + watchdog_notify_pretimeout(&dev->wdt); + return IRQ_HANDLED; +} + /* * The original devicetree binding for this driver specified only * one memory resource, so in order to keep DT backwards compatibility @@ -589,6 +616,19 @@ static int orion_wdt_probe(struct platform_device *pdev) } } + /* Optional 2nd interrupt for pretimeout */ + irq = platform_get_irq(pdev, 1); + if (irq > 0) { + orion_wdt_info.options |= WDIOF_PRETIMEOUT; + ret = devm_request_irq(&pdev->dev, irq, orion_wdt_pre_irq, + 0, pdev->name, dev); + if (ret < 0) { + dev_err(&pdev->dev, "failed to request IRQ\n"); + goto disable_clk; + } + } + + watchdog_set_nowayout(&dev->wdt, nowayout); ret = watchdog_register_device(&dev->wdt); if (ret) diff --git a/drivers/watchdog/qcom-wdt.c b/drivers/watchdog/qcom-wdt.c index 7be7f87be28f..a494543d3ae1 100644 --- a/drivers/watchdog/qcom-wdt.c +++ b/drivers/watchdog/qcom-wdt.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2014, The Linux Foundation. All rights reserved. */ +#include #include #include +#include #include #include #include @@ -19,6 +21,9 @@ enum wdt_reg { WDT_BITE_TIME, }; +#define QCOM_WDT_ENABLE BIT(0) +#define QCOM_WDT_ENABLE_IRQ BIT(1) + static const u32 reg_offset_data_apcs_tmr[] = { [WDT_RST] = 0x38, [WDT_EN] = 0x40, @@ -37,7 +42,6 @@ static const u32 reg_offset_data_kpss[] = { struct qcom_wdt { struct watchdog_device wdd; - struct clk *clk; unsigned long rate; void __iomem *base; const u32 *layout; @@ -54,15 +58,35 @@ struct qcom_wdt *to_qcom_wdt(struct watchdog_device *wdd) return container_of(wdd, struct qcom_wdt, wdd); } +static inline int qcom_get_enable(struct watchdog_device *wdd) +{ + int enable = QCOM_WDT_ENABLE; + + if (wdd->pretimeout) + enable |= QCOM_WDT_ENABLE_IRQ; + + return enable; +} + +static irqreturn_t qcom_wdt_isr(int irq, void *arg) +{ + struct watchdog_device *wdd = arg; + + watchdog_notify_pretimeout(wdd); + + return IRQ_HANDLED; +} + static int qcom_wdt_start(struct watchdog_device *wdd) { struct qcom_wdt *wdt = to_qcom_wdt(wdd); + unsigned int bark = wdd->timeout - wdd->pretimeout; writel(0, wdt_addr(wdt, WDT_EN)); writel(1, wdt_addr(wdt, WDT_RST)); - writel(wdd->timeout * wdt->rate, wdt_addr(wdt, WDT_BARK_TIME)); + writel(bark * wdt->rate, wdt_addr(wdt, WDT_BARK_TIME)); writel(wdd->timeout * wdt->rate, wdt_addr(wdt, WDT_BITE_TIME)); - writel(1, wdt_addr(wdt, WDT_EN)); + writel(qcom_get_enable(wdd), wdt_addr(wdt, WDT_EN)); return 0; } @@ -89,6 +113,13 @@ static int qcom_wdt_set_timeout(struct watchdog_device *wdd, return qcom_wdt_start(wdd); } +static int qcom_wdt_set_pretimeout(struct watchdog_device *wdd, + unsigned int timeout) +{ + wdd->pretimeout = timeout; + return qcom_wdt_start(wdd); +} + static int qcom_wdt_restart(struct watchdog_device *wdd, unsigned long action, void *data) { @@ -105,7 +136,7 @@ static int qcom_wdt_restart(struct watchdog_device *wdd, unsigned long action, writel(1, wdt_addr(wdt, WDT_RST)); writel(timeout, wdt_addr(wdt, WDT_BARK_TIME)); writel(timeout, wdt_addr(wdt, WDT_BITE_TIME)); - writel(1, wdt_addr(wdt, WDT_EN)); + writel(QCOM_WDT_ENABLE, wdt_addr(wdt, WDT_EN)); /* * Actually make sure the above sequence hits hardware before sleeping. @@ -121,6 +152,7 @@ static const struct watchdog_ops qcom_wdt_ops = { .stop = qcom_wdt_stop, .ping = qcom_wdt_ping, .set_timeout = qcom_wdt_set_timeout, + .set_pretimeout = qcom_wdt_set_pretimeout, .restart = qcom_wdt_restart, .owner = THIS_MODULE, }; @@ -133,6 +165,15 @@ static const struct watchdog_info qcom_wdt_info = { .identity = KBUILD_MODNAME, }; +static const struct watchdog_info qcom_wdt_pt_info = { + .options = WDIOF_KEEPALIVEPING + | WDIOF_MAGICCLOSE + | WDIOF_SETTIMEOUT + | WDIOF_PRETIMEOUT + | WDIOF_CARDRESET, + .identity = KBUILD_MODNAME, +}; + static void qcom_clk_disable_unprepare(void *data) { clk_disable_unprepare(data); @@ -146,7 +187,8 @@ static int qcom_wdt_probe(struct platform_device *pdev) struct device_node *np = dev->of_node; const u32 *regs; u32 percpu_offset; - int ret; + int irq, ret; + struct clk *clk; regs = of_device_get_match_data(dev); if (!regs) { @@ -173,19 +215,18 @@ static int qcom_wdt_probe(struct platform_device *pdev) if (IS_ERR(wdt->base)) return PTR_ERR(wdt->base); - wdt->clk = devm_clk_get(dev, NULL); - if (IS_ERR(wdt->clk)) { + clk = devm_clk_get(dev, NULL); + if (IS_ERR(clk)) { dev_err(dev, "failed to get input clock\n"); - return PTR_ERR(wdt->clk); + return PTR_ERR(clk); } - ret = clk_prepare_enable(wdt->clk); + ret = clk_prepare_enable(clk); if (ret) { dev_err(dev, "failed to setup clock\n"); return ret; } - ret = devm_add_action_or_reset(dev, qcom_clk_disable_unprepare, - wdt->clk); + ret = devm_add_action_or_reset(dev, qcom_clk_disable_unprepare, clk); if (ret) return ret; @@ -197,14 +238,31 @@ static int qcom_wdt_probe(struct platform_device *pdev) * that it would bite before a second elapses it's usefulness is * limited. Bail if this is the case. */ - wdt->rate = clk_get_rate(wdt->clk); + wdt->rate = clk_get_rate(clk); if (wdt->rate == 0 || wdt->rate > 0x10000000U) { dev_err(dev, "invalid clock rate\n"); return -EINVAL; } - wdt->wdd.info = &qcom_wdt_info; + /* check if there is pretimeout support */ + irq = platform_get_irq(pdev, 0); + if (irq > 0) { + ret = devm_request_irq(dev, irq, qcom_wdt_isr, + IRQF_TRIGGER_RISING, + "wdt_bark", &wdt->wdd); + if (ret) + return ret; + + wdt->wdd.info = &qcom_wdt_pt_info; + wdt->wdd.pretimeout = 1; + } else { + if (irq == -EPROBE_DEFER) + return -EPROBE_DEFER; + + wdt->wdd.info = &qcom_wdt_info; + } + wdt->wdd.ops = &qcom_wdt_ops; wdt->wdd.min_timeout = 1; wdt->wdd.max_timeout = 0x10000000U / wdt->rate; diff --git a/drivers/watchdog/sprd_wdt.c b/drivers/watchdog/sprd_wdt.c index edba4e278685..0bb17b046140 100644 --- a/drivers/watchdog/sprd_wdt.c +++ b/drivers/watchdog/sprd_wdt.c @@ -284,10 +284,8 @@ static int sprd_wdt_probe(struct platform_device *pdev) } wdt->irq = platform_get_irq(pdev, 0); - if (wdt->irq < 0) { - dev_err(dev, "failed to get IRQ resource\n"); + if (wdt->irq < 0) return wdt->irq; - } ret = devm_request_irq(dev, wdt->irq, sprd_wdt_isr, IRQF_NO_SUSPEND, "sprd-wdt", (void *)wdt); diff --git a/drivers/watchdog/ziirave_wdt.c b/drivers/watchdog/ziirave_wdt.c index dec660c509b3..4a363a8b2d20 100644 --- a/drivers/watchdog/ziirave_wdt.c +++ b/drivers/watchdog/ziirave_wdt.c @@ -21,8 +21,11 @@ #include #include +#include + #define ZIIRAVE_TIMEOUT_MIN 3 #define ZIIRAVE_TIMEOUT_MAX 255 +#define ZIIRAVE_TIMEOUT_DEFAULT 30 #define ZIIRAVE_PING_VALUE 0x0 @@ -48,16 +51,12 @@ static char *ziirave_reasons[] = {"power cycle", "hw watchdog", NULL, NULL, #define ZIIRAVE_FIRM_PKT_TOTAL_SIZE 20 #define ZIIRAVE_FIRM_PKT_DATA_SIZE 16 -#define ZIIRAVE_FIRM_FLASH_MEMORY_START 0x1600 -#define ZIIRAVE_FIRM_FLASH_MEMORY_END 0x2bbf +#define ZIIRAVE_FIRM_FLASH_MEMORY_START (2 * 0x1600) +#define ZIIRAVE_FIRM_FLASH_MEMORY_END (2 * 0x2bbf) +#define ZIIRAVE_FIRM_PAGE_SIZE 128 /* Received and ready for next Download packet. */ #define ZIIRAVE_FIRM_DOWNLOAD_ACK 1 -/* Currently writing to flash. Retry Download status in a moment! */ -#define ZIIRAVE_FIRM_DOWNLOAD_BUSY 2 - -/* Wait for ACK timeout in ms */ -#define ZIIRAVE_FIRM_WAIT_FOR_ACK_TIMEOUT 50 /* Firmware commands */ #define ZIIRAVE_CMD_DOWNLOAD_START 0x10 @@ -68,6 +67,12 @@ static char *ziirave_reasons[] = {"power cycle", "hw watchdog", NULL, NULL, #define ZIIRAVE_CMD_JUMP_TO_BOOTLOADER 0x0c #define ZIIRAVE_CMD_DOWNLOAD_PACKET 0x0e +#define ZIIRAVE_CMD_JUMP_TO_BOOTLOADER_MAGIC 1 +#define ZIIRAVE_CMD_RESET_PROCESSOR_MAGIC 1 + +#define ZIIRAVE_FW_VERSION_FMT "02.%02u.%02u" +#define ZIIRAVE_BL_VERSION_FMT "01.%02u.%02u" + struct ziirave_wdt_rev { unsigned char major; unsigned char minor; @@ -165,67 +170,37 @@ static unsigned int ziirave_wdt_get_timeleft(struct watchdog_device *wdd) return ret; } -static int ziirave_firm_wait_for_ack(struct watchdog_device *wdd) +static int ziirave_firm_read_ack(struct watchdog_device *wdd) { struct i2c_client *client = to_i2c_client(wdd->parent); int ret; - unsigned long timeout; - timeout = jiffies + msecs_to_jiffies(ZIIRAVE_FIRM_WAIT_FOR_ACK_TIMEOUT); - do { - if (time_after(jiffies, timeout)) - return -ETIMEDOUT; - - usleep_range(5000, 10000); - - ret = i2c_smbus_read_byte(client); - if (ret < 0) { - dev_err(&client->dev, "Failed to read byte\n"); - return ret; - } - } while (ret == ZIIRAVE_FIRM_DOWNLOAD_BUSY); + ret = i2c_smbus_read_byte(client); + if (ret < 0) { + dev_err(&client->dev, "Failed to read status byte\n"); + return ret; + } return ret == ZIIRAVE_FIRM_DOWNLOAD_ACK ? 0 : -EIO; } -static int ziirave_firm_set_read_addr(struct watchdog_device *wdd, u16 addr) +static int ziirave_firm_set_read_addr(struct watchdog_device *wdd, u32 addr) { struct i2c_client *client = to_i2c_client(wdd->parent); + const u16 addr16 = (u16)addr / 2; u8 address[2]; - address[0] = addr & 0xff; - address[1] = (addr >> 8) & 0xff; + put_unaligned_le16(addr16, address); return i2c_smbus_write_block_data(client, ZIIRAVE_CMD_DOWNLOAD_SET_READ_ADDR, - ARRAY_SIZE(address), address); + sizeof(address), address); } -static int ziirave_firm_write_block_data(struct watchdog_device *wdd, - u8 command, u8 length, const u8 *data, - bool wait_for_ack) +static bool ziirave_firm_addr_readonly(u32 addr) { - struct i2c_client *client = to_i2c_client(wdd->parent); - int ret; - - ret = i2c_smbus_write_block_data(client, command, length, data); - if (ret) { - dev_err(&client->dev, - "Failed to send command 0x%02x: %d\n", command, ret); - return ret; - } - - if (wait_for_ack) - ret = ziirave_firm_wait_for_ack(wdd); - - return ret; -} - -static int ziirave_firm_write_byte(struct watchdog_device *wdd, u8 command, - u8 byte, bool wait_for_ack) -{ - return ziirave_firm_write_block_data(wdd, command, 1, &byte, - wait_for_ack); + return addr < ZIIRAVE_FIRM_FLASH_MEMORY_START || + addr > ZIIRAVE_FIRM_FLASH_MEMORY_END; } /* @@ -240,35 +215,53 @@ static int ziirave_firm_write_byte(struct watchdog_device *wdd, u8 command, * Data0 .. Data15: Array of 16 bytes of data. * Checksum: Checksum byte to verify data integrity. */ -static int ziirave_firm_write_pkt(struct watchdog_device *wdd, - const struct ihex_binrec *rec) +static int __ziirave_firm_write_pkt(struct watchdog_device *wdd, + u32 addr, const u8 *data, u8 len) { + const u16 addr16 = (u16)addr / 2; struct i2c_client *client = to_i2c_client(wdd->parent); u8 i, checksum = 0, packet[ZIIRAVE_FIRM_PKT_TOTAL_SIZE]; int ret; - u16 addr; - memset(packet, 0, ARRAY_SIZE(packet)); + /* Check max data size */ + if (len > ZIIRAVE_FIRM_PKT_DATA_SIZE) { + dev_err(&client->dev, "Firmware packet too long (%d)\n", + len); + return -EMSGSIZE; + } + + /* + * Ignore packets that are targeting program memory outisde of + * app partition, since they will be ignored by the + * bootloader. At the same time, we need to make sure we'll + * allow zero length packet that will be sent as the last step + * of firmware update + */ + if (len && ziirave_firm_addr_readonly(addr)) + return 0; /* Packet length */ - packet[0] = (u8)be16_to_cpu(rec->len); + packet[0] = len; /* Packet address */ - addr = (be32_to_cpu(rec->addr) & 0xffff) >> 1; - packet[1] = addr & 0xff; - packet[2] = (addr & 0xff00) >> 8; + put_unaligned_le16(addr16, packet + 1); - /* Packet data */ - if (be16_to_cpu(rec->len) > ZIIRAVE_FIRM_PKT_DATA_SIZE) - return -EMSGSIZE; - memcpy(packet + 3, rec->data, be16_to_cpu(rec->len)); + memcpy(packet + 3, data, len); + memset(packet + 3 + len, 0, ZIIRAVE_FIRM_PKT_DATA_SIZE - len); /* Packet checksum */ - for (i = 0; i < ZIIRAVE_FIRM_PKT_TOTAL_SIZE - 1; i++) + for (i = 0; i < len + 3; i++) checksum += packet[i]; packet[ZIIRAVE_FIRM_PKT_TOTAL_SIZE - 1] = checksum; - ret = ziirave_firm_write_block_data(wdd, ZIIRAVE_CMD_DOWNLOAD_PACKET, - ARRAY_SIZE(packet), packet, true); + ret = i2c_smbus_write_block_data(client, ZIIRAVE_CMD_DOWNLOAD_PACKET, + sizeof(packet), packet); + if (ret) { + dev_err(&client->dev, + "Failed to send DOWNLOAD_PACKET: %d\n", ret); + return ret; + } + + ret = ziirave_firm_read_ack(wdd); if (ret) dev_err(&client->dev, "Failed to write firmware packet at address 0x%04x: %d\n", @@ -277,6 +270,30 @@ static int ziirave_firm_write_pkt(struct watchdog_device *wdd, return ret; } +static int ziirave_firm_write_pkt(struct watchdog_device *wdd, + u32 addr, const u8 *data, u8 len) +{ + const u8 max_write_len = ZIIRAVE_FIRM_PAGE_SIZE - + (addr - ALIGN_DOWN(addr, ZIIRAVE_FIRM_PAGE_SIZE)); + int ret; + + if (len > max_write_len) { + /* + * If data crossed page boundary we need to split this + * write in two + */ + ret = __ziirave_firm_write_pkt(wdd, addr, data, max_write_len); + if (ret) + return ret; + + addr += max_write_len; + data += max_write_len; + len -= max_write_len; + } + + return __ziirave_firm_write_pkt(wdd, addr, data, len); +} + static int ziirave_firm_verify(struct watchdog_device *wdd, const struct firmware *fw) { @@ -284,16 +301,12 @@ static int ziirave_firm_verify(struct watchdog_device *wdd, const struct ihex_binrec *rec; int i, ret; u8 data[ZIIRAVE_FIRM_PKT_DATA_SIZE]; - u16 addr; for (rec = (void *)fw->data; rec; rec = ihex_next_binrec(rec)) { - /* Zero length marks end of records */ - if (!be16_to_cpu(rec->len)) - break; + const u16 len = be16_to_cpu(rec->len); + const u32 addr = be32_to_cpu(rec->addr); - addr = (be32_to_cpu(rec->addr) & 0xffff) >> 1; - if (addr < ZIIRAVE_FIRM_FLASH_MEMORY_START || - addr > ZIIRAVE_FIRM_FLASH_MEMORY_END) + if (ziirave_firm_addr_readonly(addr)) continue; ret = ziirave_firm_set_read_addr(wdd, addr); @@ -304,7 +317,7 @@ static int ziirave_firm_verify(struct watchdog_device *wdd, return ret; } - for (i = 0; i < ARRAY_SIZE(data); i++) { + for (i = 0; i < len; i++) { ret = i2c_smbus_read_byte_data(client, ZIIRAVE_CMD_DOWNLOAD_READ_BYTE); if (ret < 0) { @@ -315,7 +328,7 @@ static int ziirave_firm_verify(struct watchdog_device *wdd, data[i] = ret; } - if (memcmp(data, rec->data, be16_to_cpu(rec->len))) { + if (memcmp(data, rec->data, len)) { dev_err(&client->dev, "Firmware mismatch at address 0x%04x\n", addr); return -EINVAL; @@ -329,97 +342,45 @@ static int ziirave_firm_upload(struct watchdog_device *wdd, const struct firmware *fw) { struct i2c_client *client = to_i2c_client(wdd->parent); - int ret, words_till_page_break; const struct ihex_binrec *rec; - struct ihex_binrec *rec_new; + int ret; - ret = ziirave_firm_write_byte(wdd, ZIIRAVE_CMD_JUMP_TO_BOOTLOADER, 1, - false); - if (ret) + ret = i2c_smbus_write_byte_data(client, + ZIIRAVE_CMD_JUMP_TO_BOOTLOADER, + ZIIRAVE_CMD_JUMP_TO_BOOTLOADER_MAGIC); + if (ret) { + dev_err(&client->dev, "Failed to jump to bootloader\n"); return ret; + } msleep(500); - ret = ziirave_firm_write_byte(wdd, ZIIRAVE_CMD_DOWNLOAD_START, 1, true); - if (ret) + ret = i2c_smbus_write_byte(client, ZIIRAVE_CMD_DOWNLOAD_START); + if (ret) { + dev_err(&client->dev, "Failed to start download\n"); return ret; + } + + ret = ziirave_firm_read_ack(wdd); + if (ret) { + dev_err(&client->dev, "No ACK for start download\n"); + return ret; + } msleep(500); for (rec = (void *)fw->data; rec; rec = ihex_next_binrec(rec)) { - /* Zero length marks end of records */ - if (!be16_to_cpu(rec->len)) - break; - - /* Check max data size */ - if (be16_to_cpu(rec->len) > ZIIRAVE_FIRM_PKT_DATA_SIZE) { - dev_err(&client->dev, "Firmware packet too long (%d)\n", - be16_to_cpu(rec->len)); - return -EMSGSIZE; - } - - /* Calculate words till page break */ - words_till_page_break = (64 - ((be32_to_cpu(rec->addr) >> 1) & - 0x3f)); - if ((be16_to_cpu(rec->len) >> 1) > words_till_page_break) { - /* - * Data in passes page boundary, so we need to split in - * two blocks of data. Create a packet with the first - * block of data. - */ - rec_new = kzalloc(sizeof(struct ihex_binrec) + - (words_till_page_break << 1), - GFP_KERNEL); - if (!rec_new) - return -ENOMEM; - - rec_new->len = cpu_to_be16(words_till_page_break << 1); - rec_new->addr = rec->addr; - memcpy(rec_new->data, rec->data, - be16_to_cpu(rec_new->len)); - - ret = ziirave_firm_write_pkt(wdd, rec_new); - kfree(rec_new); - if (ret) - return ret; - - /* Create a packet with the second block of data */ - rec_new = kzalloc(sizeof(struct ihex_binrec) + - be16_to_cpu(rec->len) - - (words_till_page_break << 1), - GFP_KERNEL); - if (!rec_new) - return -ENOMEM; - - /* Remaining bytes */ - rec_new->len = rec->len - - cpu_to_be16(words_till_page_break << 1); - - rec_new->addr = cpu_to_be32(be32_to_cpu(rec->addr) + - (words_till_page_break << 1)); - - memcpy(rec_new->data, - rec->data + (words_till_page_break << 1), - be16_to_cpu(rec_new->len)); - - ret = ziirave_firm_write_pkt(wdd, rec_new); - kfree(rec_new); - if (ret) - return ret; - } else { - ret = ziirave_firm_write_pkt(wdd, rec); - if (ret) - return ret; - } + ret = ziirave_firm_write_pkt(wdd, be32_to_cpu(rec->addr), + rec->data, be16_to_cpu(rec->len)); + if (ret) + return ret; } - /* For end of download, the length field will be set to 0 */ - rec_new = kzalloc(sizeof(struct ihex_binrec) + 1, GFP_KERNEL); - if (!rec_new) - return -ENOMEM; - - ret = ziirave_firm_write_pkt(wdd, rec_new); - kfree(rec_new); + /* + * Finish firmware download process by sending a zero length + * payload + */ + ret = ziirave_firm_write_pkt(wdd, 0, NULL, 0); if (ret) { dev_err(&client->dev, "Failed to send EMPTY packet: %d\n", ret); return ret; @@ -437,15 +398,22 @@ static int ziirave_firm_upload(struct watchdog_device *wdd, } /* End download operation */ - ret = ziirave_firm_write_byte(wdd, ZIIRAVE_CMD_DOWNLOAD_END, 1, false); - if (ret) + ret = i2c_smbus_write_byte(client, ZIIRAVE_CMD_DOWNLOAD_END); + if (ret) { + dev_err(&client->dev, + "Failed to end firmware download: %d\n", ret); return ret; + } /* Reset the processor */ - ret = ziirave_firm_write_byte(wdd, ZIIRAVE_CMD_RESET_PROCESSOR, 1, - false); - if (ret) + ret = i2c_smbus_write_byte_data(client, + ZIIRAVE_CMD_RESET_PROCESSOR, + ZIIRAVE_CMD_RESET_PROCESSOR_MAGIC); + if (ret) { + dev_err(&client->dev, + "Failed to reset the watchdog: %d\n", ret); return ret; + } msleep(500); @@ -478,7 +446,7 @@ static ssize_t ziirave_wdt_sysfs_show_firm(struct device *dev, if (ret) return ret; - ret = sprintf(buf, "02.%02u.%02u", w_priv->firmware_rev.major, + ret = sprintf(buf, ZIIRAVE_FW_VERSION_FMT, w_priv->firmware_rev.major, w_priv->firmware_rev.minor); mutex_unlock(&w_priv->sysfs_mutex); @@ -501,7 +469,7 @@ static ssize_t ziirave_wdt_sysfs_show_boot(struct device *dev, if (ret) return ret; - ret = sprintf(buf, "01.%02u.%02u", w_priv->bootloader_rev.major, + ret = sprintf(buf, ZIIRAVE_BL_VERSION_FMT, w_priv->bootloader_rev.major, w_priv->bootloader_rev.minor); mutex_unlock(&w_priv->sysfs_mutex); @@ -568,7 +536,8 @@ static ssize_t ziirave_wdt_sysfs_store_firm(struct device *dev, goto unlock_mutex; } - dev_info(&client->dev, "Firmware updated to version 02.%02u.%02u\n", + dev_info(&client->dev, + "Firmware updated to version " ZIIRAVE_FW_VERSION_FMT "\n", w_priv->firmware_rev.major, w_priv->firmware_rev.minor); /* Restore the watchdog timeout */ @@ -611,7 +580,7 @@ static int ziirave_wdt_init_duration(struct i2c_client *client) &reset_duration); if (ret) { dev_info(&client->dev, - "Unable to set reset pulse duration, using default\n"); + "No reset pulse duration specified, using default\n"); return 0; } } @@ -633,7 +602,10 @@ static int ziirave_wdt_probe(struct i2c_client *client, struct ziirave_wdt_data *w_priv; int val; - if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA)) + if (!i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_BYTE | + I2C_FUNC_SMBUS_BYTE_DATA | + I2C_FUNC_SMBUS_WRITE_BLOCK_DATA)) return -ENODEV; w_priv = devm_kzalloc(&client->dev, sizeof(*w_priv), GFP_KERNEL); @@ -658,57 +630,80 @@ static int ziirave_wdt_probe(struct i2c_client *client, */ if (w_priv->wdd.timeout == 0) { val = i2c_smbus_read_byte_data(client, ZIIRAVE_WDT_TIMEOUT); - if (val < 0) + if (val < 0) { + dev_err(&client->dev, "Failed to read timeout\n"); return val; + } - if (val < ZIIRAVE_TIMEOUT_MIN) - return -ENODEV; + if (val > ZIIRAVE_TIMEOUT_MAX || + val < ZIIRAVE_TIMEOUT_MIN) + val = ZIIRAVE_TIMEOUT_DEFAULT; w_priv->wdd.timeout = val; - } else { - ret = ziirave_wdt_set_timeout(&w_priv->wdd, - w_priv->wdd.timeout); - if (ret) - return ret; - - dev_info(&client->dev, "Timeout set to %ds.", - w_priv->wdd.timeout); } + ret = ziirave_wdt_set_timeout(&w_priv->wdd, w_priv->wdd.timeout); + if (ret) { + dev_err(&client->dev, "Failed to set timeout\n"); + return ret; + } + + dev_info(&client->dev, "Timeout set to %ds\n", w_priv->wdd.timeout); + watchdog_set_nowayout(&w_priv->wdd, nowayout); i2c_set_clientdata(client, w_priv); /* If in unconfigured state, set to stopped */ val = i2c_smbus_read_byte_data(client, ZIIRAVE_WDT_STATE); - if (val < 0) + if (val < 0) { + dev_err(&client->dev, "Failed to read state\n"); return val; + } if (val == ZIIRAVE_STATE_INITIAL) ziirave_wdt_stop(&w_priv->wdd); ret = ziirave_wdt_init_duration(client); - if (ret) + if (ret) { + dev_err(&client->dev, "Failed to init duration\n"); return ret; + } ret = ziirave_wdt_revision(client, &w_priv->firmware_rev, ZIIRAVE_WDT_FIRM_VER_MAJOR); - if (ret) + if (ret) { + dev_err(&client->dev, "Failed to read firmware version\n"); return ret; + } + + dev_info(&client->dev, + "Firmware version: " ZIIRAVE_FW_VERSION_FMT "\n", + w_priv->firmware_rev.major, w_priv->firmware_rev.minor); ret = ziirave_wdt_revision(client, &w_priv->bootloader_rev, ZIIRAVE_WDT_BOOT_VER_MAJOR); - if (ret) + if (ret) { + dev_err(&client->dev, "Failed to read bootloader version\n"); return ret; + } + + dev_info(&client->dev, + "Bootloader version: " ZIIRAVE_BL_VERSION_FMT "\n", + w_priv->bootloader_rev.major, w_priv->bootloader_rev.minor); w_priv->reset_reason = i2c_smbus_read_byte_data(client, ZIIRAVE_WDT_RESET_REASON); - if (w_priv->reset_reason < 0) + if (w_priv->reset_reason < 0) { + dev_err(&client->dev, "Failed to read reset reason\n"); return w_priv->reset_reason; + } if (w_priv->reset_reason >= ARRAY_SIZE(ziirave_reasons) || - !ziirave_reasons[w_priv->reset_reason]) + !ziirave_reasons[w_priv->reset_reason]) { + dev_err(&client->dev, "Invalid reset reason\n"); return -ENODEV; + } ret = watchdog_register_device(&w_priv->wdd); diff --git a/fs/9p/cache.c b/fs/9p/cache.c index 995e332eee5c..eb2151fb6049 100644 --- a/fs/9p/cache.c +++ b/fs/9p/cache.c @@ -51,6 +51,8 @@ void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses) if (!v9ses->cachetag) { if (v9fs_random_cachetag(v9ses) < 0) { v9ses->fscache = NULL; + kfree(v9ses->cachetag); + v9ses->cachetag = NULL; return; } } diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 4cc966a31cb3..fe7f0bd2048e 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -513,6 +513,7 @@ v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma) v9inode = V9FS_I(inode); mutex_lock(&v9inode->v_mutex); if (!v9inode->writeback_fid && + (vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE)) { /* * clone a fid and add it to writeback_fid @@ -614,6 +615,8 @@ static void v9fs_mmap_vm_close(struct vm_area_struct *vma) (vma->vm_end - vma->vm_start - 1), }; + if (!(vma->vm_flags & VM_SHARED)) + return; p9_debug(P9_DEBUG_VFS, "9p VMA close, %p, flushing", vma); diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index ca243e658d71..74df32be4c6a 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -58,7 +58,7 @@ static int v9fs_set_super(struct super_block *s, void *data) static int v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, - int flags, void *data) + int flags) { int ret; @@ -132,7 +132,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, retval = PTR_ERR(sb); goto clunk_fid; } - retval = v9fs_fill_super(sb, v9ses, flags, data); + retval = v9fs_fill_super(sb, v9ses, flags); if (retval) goto release_sb; diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig index 24fc5a5c1b97..0635cba19971 100644 --- a/fs/fuse/Kconfig +++ b/fs/fuse/Kconfig @@ -27,3 +27,14 @@ config CUSE If you want to develop or use a userspace character device based on CUSE, answer Y or M. + +config VIRTIO_FS + tristate "Virtio Filesystem" + depends on FUSE_FS + select VIRTIO + help + The Virtio Filesystem allows guests to mount file systems from the + host. + + If you want to share files between guests or with the host, answer Y + or M. diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile index 9485019c2a14..6419a2b3510d 100644 --- a/fs/fuse/Makefile +++ b/fs/fuse/Makefile @@ -5,5 +5,6 @@ obj-$(CONFIG_FUSE_FS) += fuse.o obj-$(CONFIG_CUSE) += cuse.o +obj-$(CONFIG_VIRTIO_FS) += virtio_fs.o fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index fc89cb40e874..956aeaf961ae 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -353,6 +353,10 @@ struct fuse_req { /** Used to wake up the task waiting for completion of request*/ wait_queue_head_t waitq; +#if IS_ENABLED(CONFIG_VIRTIO_FS) + /** virtio-fs's physically contiguous buffer for in and out args */ + void *argbuf; +#endif }; struct fuse_iqueue; @@ -383,6 +387,11 @@ struct fuse_iqueue_ops { */ void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq) __releases(fiq->lock); + + /** + * Clean up when fuse_iqueue is destroyed + */ + void (*release)(struct fuse_iqueue *fiq); }; /** /dev/fuse input queue operations */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 51cb471f4dc3..e040e2a2b621 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -630,6 +630,10 @@ EXPORT_SYMBOL_GPL(fuse_conn_init); void fuse_conn_put(struct fuse_conn *fc) { if (refcount_dec_and_test(&fc->count)) { + struct fuse_iqueue *fiq = &fc->iq; + + if (fiq->ops->release) + fiq->ops->release(fiq); put_pid_ns(fc->pid_ns); put_user_ns(fc->user_ns); fc->release(fc); diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c new file mode 100644 index 000000000000..6af3f131e468 --- /dev/null +++ b/fs/fuse/virtio_fs.c @@ -0,0 +1,1195 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * virtio-fs: Virtio Filesystem + * Copyright (C) 2018 Red Hat, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "fuse_i.h" + +/* List of virtio-fs device instances and a lock for the list. Also provides + * mutual exclusion in device removal and mounting path + */ +static DEFINE_MUTEX(virtio_fs_mutex); +static LIST_HEAD(virtio_fs_instances); + +enum { + VQ_HIPRIO, + VQ_REQUEST +}; + +/* Per-virtqueue state */ +struct virtio_fs_vq { + spinlock_t lock; + struct virtqueue *vq; /* protected by ->lock */ + struct work_struct done_work; + struct list_head queued_reqs; + struct delayed_work dispatch_work; + struct fuse_dev *fud; + bool connected; + long in_flight; + char name[24]; +} ____cacheline_aligned_in_smp; + +/* A virtio-fs device instance */ +struct virtio_fs { + struct kref refcount; + struct list_head list; /* on virtio_fs_instances */ + char *tag; + struct virtio_fs_vq *vqs; + unsigned int nvqs; /* number of virtqueues */ + unsigned int num_request_queues; /* number of request queues */ +}; + +struct virtio_fs_forget { + struct fuse_in_header ih; + struct fuse_forget_in arg; + /* This request can be temporarily queued on virt queue */ + struct list_head list; +}; + +static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) +{ + struct virtio_fs *fs = vq->vdev->priv; + + return &fs->vqs[vq->index]; +} + +static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq) +{ + return &vq_to_fsvq(vq)->fud->pq; +} + +static void release_virtio_fs_obj(struct kref *ref) +{ + struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount); + + kfree(vfs->vqs); + kfree(vfs); +} + +/* Make sure virtiofs_mutex is held */ +static void virtio_fs_put(struct virtio_fs *fs) +{ + kref_put(&fs->refcount, release_virtio_fs_obj); +} + +static void virtio_fs_fiq_release(struct fuse_iqueue *fiq) +{ + struct virtio_fs *vfs = fiq->priv; + + mutex_lock(&virtio_fs_mutex); + virtio_fs_put(vfs); + mutex_unlock(&virtio_fs_mutex); +} + +static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) +{ + WARN_ON(fsvq->in_flight < 0); + + /* Wait for in flight requests to finish.*/ + while (1) { + spin_lock(&fsvq->lock); + if (!fsvq->in_flight) { + spin_unlock(&fsvq->lock); + break; + } + spin_unlock(&fsvq->lock); + /* TODO use completion instead of timeout */ + usleep_range(1000, 2000); + } + + flush_work(&fsvq->done_work); + flush_delayed_work(&fsvq->dispatch_work); +} + +static inline void drain_hiprio_queued_reqs(struct virtio_fs_vq *fsvq) +{ + struct virtio_fs_forget *forget; + + spin_lock(&fsvq->lock); + while (1) { + forget = list_first_entry_or_null(&fsvq->queued_reqs, + struct virtio_fs_forget, list); + if (!forget) + break; + list_del(&forget->list); + kfree(forget); + } + spin_unlock(&fsvq->lock); +} + +static void virtio_fs_drain_all_queues(struct virtio_fs *fs) +{ + struct virtio_fs_vq *fsvq; + int i; + + for (i = 0; i < fs->nvqs; i++) { + fsvq = &fs->vqs[i]; + if (i == VQ_HIPRIO) + drain_hiprio_queued_reqs(fsvq); + + virtio_fs_drain_queue(fsvq); + } +} + +static void virtio_fs_start_all_queues(struct virtio_fs *fs) +{ + struct virtio_fs_vq *fsvq; + int i; + + for (i = 0; i < fs->nvqs; i++) { + fsvq = &fs->vqs[i]; + spin_lock(&fsvq->lock); + fsvq->connected = true; + spin_unlock(&fsvq->lock); + } +} + +/* Add a new instance to the list or return -EEXIST if tag name exists*/ +static int virtio_fs_add_instance(struct virtio_fs *fs) +{ + struct virtio_fs *fs2; + bool duplicate = false; + + mutex_lock(&virtio_fs_mutex); + + list_for_each_entry(fs2, &virtio_fs_instances, list) { + if (strcmp(fs->tag, fs2->tag) == 0) + duplicate = true; + } + + if (!duplicate) + list_add_tail(&fs->list, &virtio_fs_instances); + + mutex_unlock(&virtio_fs_mutex); + + if (duplicate) + return -EEXIST; + return 0; +} + +/* Return the virtio_fs with a given tag, or NULL */ +static struct virtio_fs *virtio_fs_find_instance(const char *tag) +{ + struct virtio_fs *fs; + + mutex_lock(&virtio_fs_mutex); + + list_for_each_entry(fs, &virtio_fs_instances, list) { + if (strcmp(fs->tag, tag) == 0) { + kref_get(&fs->refcount); + goto found; + } + } + + fs = NULL; /* not found */ + +found: + mutex_unlock(&virtio_fs_mutex); + + return fs; +} + +static void virtio_fs_free_devs(struct virtio_fs *fs) +{ + unsigned int i; + + for (i = 0; i < fs->nvqs; i++) { + struct virtio_fs_vq *fsvq = &fs->vqs[i]; + + if (!fsvq->fud) + continue; + + fuse_dev_free(fsvq->fud); + fsvq->fud = NULL; + } +} + +/* Read filesystem name from virtio config into fs->tag (must kfree()). */ +static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs) +{ + char tag_buf[sizeof_field(struct virtio_fs_config, tag)]; + char *end; + size_t len; + + virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag), + &tag_buf, sizeof(tag_buf)); + end = memchr(tag_buf, '\0', sizeof(tag_buf)); + if (end == tag_buf) + return -EINVAL; /* empty tag */ + if (!end) + end = &tag_buf[sizeof(tag_buf)]; + + len = end - tag_buf; + fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL); + if (!fs->tag) + return -ENOMEM; + memcpy(fs->tag, tag_buf, len); + fs->tag[len] = '\0'; + return 0; +} + +/* Work function for hiprio completion */ +static void virtio_fs_hiprio_done_work(struct work_struct *work) +{ + struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, + done_work); + struct virtqueue *vq = fsvq->vq; + + /* Free completed FUSE_FORGET requests */ + spin_lock(&fsvq->lock); + do { + unsigned int len; + void *req; + + virtqueue_disable_cb(vq); + + while ((req = virtqueue_get_buf(vq, &len)) != NULL) { + kfree(req); + fsvq->in_flight--; + } + } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq))); + spin_unlock(&fsvq->lock); +} + +static void virtio_fs_dummy_dispatch_work(struct work_struct *work) +{ +} + +static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) +{ + struct virtio_fs_forget *forget; + struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, + dispatch_work.work); + struct virtqueue *vq = fsvq->vq; + struct scatterlist sg; + struct scatterlist *sgs[] = {&sg}; + bool notify; + int ret; + + pr_debug("virtio-fs: worker %s called.\n", __func__); + while (1) { + spin_lock(&fsvq->lock); + forget = list_first_entry_or_null(&fsvq->queued_reqs, + struct virtio_fs_forget, list); + if (!forget) { + spin_unlock(&fsvq->lock); + return; + } + + list_del(&forget->list); + if (!fsvq->connected) { + spin_unlock(&fsvq->lock); + kfree(forget); + continue; + } + + sg_init_one(&sg, forget, sizeof(*forget)); + + /* Enqueue the request */ + dev_dbg(&vq->vdev->dev, "%s\n", __func__); + ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC); + if (ret < 0) { + if (ret == -ENOMEM || ret == -ENOSPC) { + pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n", + ret); + list_add_tail(&forget->list, + &fsvq->queued_reqs); + schedule_delayed_work(&fsvq->dispatch_work, + msecs_to_jiffies(1)); + } else { + pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", + ret); + kfree(forget); + } + spin_unlock(&fsvq->lock); + return; + } + + fsvq->in_flight++; + notify = virtqueue_kick_prepare(vq); + spin_unlock(&fsvq->lock); + + if (notify) + virtqueue_notify(vq); + pr_debug("virtio-fs: worker %s dispatched one forget request.\n", + __func__); + } +} + +/* Allocate and copy args into req->argbuf */ +static int copy_args_to_argbuf(struct fuse_req *req) +{ + struct fuse_args *args = req->args; + unsigned int offset = 0; + unsigned int num_in; + unsigned int num_out; + unsigned int len; + unsigned int i; + + num_in = args->in_numargs - args->in_pages; + num_out = args->out_numargs - args->out_pages; + len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) + + fuse_len_args(num_out, args->out_args); + + req->argbuf = kmalloc(len, GFP_ATOMIC); + if (!req->argbuf) + return -ENOMEM; + + for (i = 0; i < num_in; i++) { + memcpy(req->argbuf + offset, + args->in_args[i].value, + args->in_args[i].size); + offset += args->in_args[i].size; + } + + return 0; +} + +/* Copy args out of and free req->argbuf */ +static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req) +{ + unsigned int remaining; + unsigned int offset; + unsigned int num_in; + unsigned int num_out; + unsigned int i; + + remaining = req->out.h.len - sizeof(req->out.h); + num_in = args->in_numargs - args->in_pages; + num_out = args->out_numargs - args->out_pages; + offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args); + + for (i = 0; i < num_out; i++) { + unsigned int argsize = args->out_args[i].size; + + if (args->out_argvar && + i == args->out_numargs - 1 && + argsize > remaining) { + argsize = remaining; + } + + memcpy(args->out_args[i].value, req->argbuf + offset, argsize); + offset += argsize; + + if (i != args->out_numargs - 1) + remaining -= argsize; + } + + /* Store the actual size of the variable-length arg */ + if (args->out_argvar) + args->out_args[args->out_numargs - 1].size = remaining; + + kfree(req->argbuf); + req->argbuf = NULL; +} + +/* Work function for request completion */ +static void virtio_fs_requests_done_work(struct work_struct *work) +{ + struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, + done_work); + struct fuse_pqueue *fpq = &fsvq->fud->pq; + struct fuse_conn *fc = fsvq->fud->fc; + struct virtqueue *vq = fsvq->vq; + struct fuse_req *req; + struct fuse_args_pages *ap; + struct fuse_req *next; + struct fuse_args *args; + unsigned int len, i, thislen; + struct page *page; + LIST_HEAD(reqs); + + /* Collect completed requests off the virtqueue */ + spin_lock(&fsvq->lock); + do { + virtqueue_disable_cb(vq); + + while ((req = virtqueue_get_buf(vq, &len)) != NULL) { + spin_lock(&fpq->lock); + list_move_tail(&req->list, &reqs); + spin_unlock(&fpq->lock); + } + } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq))); + spin_unlock(&fsvq->lock); + + /* End requests */ + list_for_each_entry_safe(req, next, &reqs, list) { + /* + * TODO verify that server properly follows FUSE protocol + * (oh.uniq, oh.len) + */ + args = req->args; + copy_args_from_argbuf(args, req); + + if (args->out_pages && args->page_zeroing) { + len = args->out_args[args->out_numargs - 1].size; + ap = container_of(args, typeof(*ap), args); + for (i = 0; i < ap->num_pages; i++) { + thislen = ap->descs[i].length; + if (len < thislen) { + WARN_ON(ap->descs[i].offset); + page = ap->pages[i]; + zero_user_segment(page, len, thislen); + len = 0; + } else { + len -= thislen; + } + } + } + + spin_lock(&fpq->lock); + clear_bit(FR_SENT, &req->flags); + list_del_init(&req->list); + spin_unlock(&fpq->lock); + + fuse_request_end(fc, req); + spin_lock(&fsvq->lock); + fsvq->in_flight--; + spin_unlock(&fsvq->lock); + } +} + +/* Virtqueue interrupt handler */ +static void virtio_fs_vq_done(struct virtqueue *vq) +{ + struct virtio_fs_vq *fsvq = vq_to_fsvq(vq); + + dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name); + + schedule_work(&fsvq->done_work); +} + +/* Initialize virtqueues */ +static int virtio_fs_setup_vqs(struct virtio_device *vdev, + struct virtio_fs *fs) +{ + struct virtqueue **vqs; + vq_callback_t **callbacks; + const char **names; + unsigned int i; + int ret = 0; + + virtio_cread(vdev, struct virtio_fs_config, num_request_queues, + &fs->num_request_queues); + if (fs->num_request_queues == 0) + return -EINVAL; + + fs->nvqs = 1 + fs->num_request_queues; + fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL); + if (!fs->vqs) + return -ENOMEM; + + vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL); + callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]), + GFP_KERNEL); + names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL); + if (!vqs || !callbacks || !names) { + ret = -ENOMEM; + goto out; + } + + callbacks[VQ_HIPRIO] = virtio_fs_vq_done; + snprintf(fs->vqs[VQ_HIPRIO].name, sizeof(fs->vqs[VQ_HIPRIO].name), + "hiprio"); + names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name; + INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work); + INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs); + INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work, + virtio_fs_hiprio_dispatch_work); + spin_lock_init(&fs->vqs[VQ_HIPRIO].lock); + + /* Initialize the requests virtqueues */ + for (i = VQ_REQUEST; i < fs->nvqs; i++) { + spin_lock_init(&fs->vqs[i].lock); + INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work); + INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work, + virtio_fs_dummy_dispatch_work); + INIT_LIST_HEAD(&fs->vqs[i].queued_reqs); + snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name), + "requests.%u", i - VQ_REQUEST); + callbacks[i] = virtio_fs_vq_done; + names[i] = fs->vqs[i].name; + } + + ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, NULL); + if (ret < 0) + goto out; + + for (i = 0; i < fs->nvqs; i++) + fs->vqs[i].vq = vqs[i]; + + virtio_fs_start_all_queues(fs); +out: + kfree(names); + kfree(callbacks); + kfree(vqs); + if (ret) + kfree(fs->vqs); + return ret; +} + +/* Free virtqueues (device must already be reset) */ +static void virtio_fs_cleanup_vqs(struct virtio_device *vdev, + struct virtio_fs *fs) +{ + vdev->config->del_vqs(vdev); +} + +static int virtio_fs_probe(struct virtio_device *vdev) +{ + struct virtio_fs *fs; + int ret; + + fs = kzalloc(sizeof(*fs), GFP_KERNEL); + if (!fs) + return -ENOMEM; + kref_init(&fs->refcount); + vdev->priv = fs; + + ret = virtio_fs_read_tag(vdev, fs); + if (ret < 0) + goto out; + + ret = virtio_fs_setup_vqs(vdev, fs); + if (ret < 0) + goto out; + + /* TODO vq affinity */ + + /* Bring the device online in case the filesystem is mounted and + * requests need to be sent before we return. + */ + virtio_device_ready(vdev); + + ret = virtio_fs_add_instance(fs); + if (ret < 0) + goto out_vqs; + + return 0; + +out_vqs: + vdev->config->reset(vdev); + virtio_fs_cleanup_vqs(vdev, fs); + +out: + vdev->priv = NULL; + kfree(fs); + return ret; +} + +static void virtio_fs_stop_all_queues(struct virtio_fs *fs) +{ + struct virtio_fs_vq *fsvq; + int i; + + for (i = 0; i < fs->nvqs; i++) { + fsvq = &fs->vqs[i]; + spin_lock(&fsvq->lock); + fsvq->connected = false; + spin_unlock(&fsvq->lock); + } +} + +static void virtio_fs_remove(struct virtio_device *vdev) +{ + struct virtio_fs *fs = vdev->priv; + + mutex_lock(&virtio_fs_mutex); + /* This device is going away. No one should get new reference */ + list_del_init(&fs->list); + virtio_fs_stop_all_queues(fs); + virtio_fs_drain_all_queues(fs); + vdev->config->reset(vdev); + virtio_fs_cleanup_vqs(vdev, fs); + + vdev->priv = NULL; + /* Put device reference on virtio_fs object */ + virtio_fs_put(fs); + mutex_unlock(&virtio_fs_mutex); +} + +#ifdef CONFIG_PM_SLEEP +static int virtio_fs_freeze(struct virtio_device *vdev) +{ + /* TODO need to save state here */ + pr_warn("virtio-fs: suspend/resume not yet supported\n"); + return -EOPNOTSUPP; +} + +static int virtio_fs_restore(struct virtio_device *vdev) +{ + /* TODO need to restore state here */ + return 0; +} +#endif /* CONFIG_PM_SLEEP */ + +const static struct virtio_device_id id_table[] = { + { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID }, + {}, +}; + +const static unsigned int feature_table[] = {}; + +static struct virtio_driver virtio_fs_driver = { + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .feature_table = feature_table, + .feature_table_size = ARRAY_SIZE(feature_table), + .probe = virtio_fs_probe, + .remove = virtio_fs_remove, +#ifdef CONFIG_PM_SLEEP + .freeze = virtio_fs_freeze, + .restore = virtio_fs_restore, +#endif +}; + +static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq) +__releases(fiq->lock) +{ + struct fuse_forget_link *link; + struct virtio_fs_forget *forget; + struct scatterlist sg; + struct scatterlist *sgs[] = {&sg}; + struct virtio_fs *fs; + struct virtqueue *vq; + struct virtio_fs_vq *fsvq; + bool notify; + u64 unique; + int ret; + + link = fuse_dequeue_forget(fiq, 1, NULL); + unique = fuse_get_unique(fiq); + + fs = fiq->priv; + fsvq = &fs->vqs[VQ_HIPRIO]; + spin_unlock(&fiq->lock); + + /* Allocate a buffer for the request */ + forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL); + + forget->ih = (struct fuse_in_header){ + .opcode = FUSE_FORGET, + .nodeid = link->forget_one.nodeid, + .unique = unique, + .len = sizeof(*forget), + }; + forget->arg = (struct fuse_forget_in){ + .nlookup = link->forget_one.nlookup, + }; + + sg_init_one(&sg, forget, sizeof(*forget)); + + /* Enqueue the request */ + spin_lock(&fsvq->lock); + + if (!fsvq->connected) { + kfree(forget); + spin_unlock(&fsvq->lock); + goto out; + } + + vq = fsvq->vq; + dev_dbg(&vq->vdev->dev, "%s\n", __func__); + + ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC); + if (ret < 0) { + if (ret == -ENOMEM || ret == -ENOSPC) { + pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later.\n", + ret); + list_add_tail(&forget->list, &fsvq->queued_reqs); + schedule_delayed_work(&fsvq->dispatch_work, + msecs_to_jiffies(1)); + } else { + pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", + ret); + kfree(forget); + } + spin_unlock(&fsvq->lock); + goto out; + } + + fsvq->in_flight++; + notify = virtqueue_kick_prepare(vq); + + spin_unlock(&fsvq->lock); + + if (notify) + virtqueue_notify(vq); +out: + kfree(link); +} + +static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq) +__releases(fiq->lock) +{ + /* + * TODO interrupts. + * + * Normal fs operations on a local filesystems aren't interruptible. + * Exceptions are blocking lock operations; for example fcntl(F_SETLKW) + * with shared lock between host and guest. + */ + spin_unlock(&fiq->lock); +} + +/* Return the number of scatter-gather list elements required */ +static unsigned int sg_count_fuse_req(struct fuse_req *req) +{ + struct fuse_args *args = req->args; + struct fuse_args_pages *ap = container_of(args, typeof(*ap), args); + unsigned int total_sgs = 1 /* fuse_in_header */; + + if (args->in_numargs - args->in_pages) + total_sgs += 1; + + if (args->in_pages) + total_sgs += ap->num_pages; + + if (!test_bit(FR_ISREPLY, &req->flags)) + return total_sgs; + + total_sgs += 1 /* fuse_out_header */; + + if (args->out_numargs - args->out_pages) + total_sgs += 1; + + if (args->out_pages) + total_sgs += ap->num_pages; + + return total_sgs; +} + +/* Add pages to scatter-gather list and return number of elements used */ +static unsigned int sg_init_fuse_pages(struct scatterlist *sg, + struct page **pages, + struct fuse_page_desc *page_descs, + unsigned int num_pages, + unsigned int total_len) +{ + unsigned int i; + unsigned int this_len; + + for (i = 0; i < num_pages && total_len; i++) { + sg_init_table(&sg[i], 1); + this_len = min(page_descs[i].length, total_len); + sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset); + total_len -= this_len; + } + + return i; +} + +/* Add args to scatter-gather list and return number of elements used */ +static unsigned int sg_init_fuse_args(struct scatterlist *sg, + struct fuse_req *req, + struct fuse_arg *args, + unsigned int numargs, + bool argpages, + void *argbuf, + unsigned int *len_used) +{ + struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args); + unsigned int total_sgs = 0; + unsigned int len; + + len = fuse_len_args(numargs - argpages, args); + if (len) + sg_init_one(&sg[total_sgs++], argbuf, len); + + if (argpages) + total_sgs += sg_init_fuse_pages(&sg[total_sgs], + ap->pages, ap->descs, + ap->num_pages, + args[numargs - 1].size); + + if (len_used) + *len_used = len; + + return total_sgs; +} + +/* Add a request to a virtqueue and kick the device */ +static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, + struct fuse_req *req) +{ + /* requests need at least 4 elements */ + struct scatterlist *stack_sgs[6]; + struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)]; + struct scatterlist **sgs = stack_sgs; + struct scatterlist *sg = stack_sg; + struct virtqueue *vq; + struct fuse_args *args = req->args; + unsigned int argbuf_used = 0; + unsigned int out_sgs = 0; + unsigned int in_sgs = 0; + unsigned int total_sgs; + unsigned int i; + int ret; + bool notify; + + /* Does the sglist fit on the stack? */ + total_sgs = sg_count_fuse_req(req); + if (total_sgs > ARRAY_SIZE(stack_sgs)) { + sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC); + sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC); + if (!sgs || !sg) { + ret = -ENOMEM; + goto out; + } + } + + /* Use a bounce buffer since stack args cannot be mapped */ + ret = copy_args_to_argbuf(req); + if (ret < 0) + goto out; + + /* Request elements */ + sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h)); + out_sgs += sg_init_fuse_args(&sg[out_sgs], req, + (struct fuse_arg *)args->in_args, + args->in_numargs, args->in_pages, + req->argbuf, &argbuf_used); + + /* Reply elements */ + if (test_bit(FR_ISREPLY, &req->flags)) { + sg_init_one(&sg[out_sgs + in_sgs++], + &req->out.h, sizeof(req->out.h)); + in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req, + args->out_args, args->out_numargs, + args->out_pages, + req->argbuf + argbuf_used, NULL); + } + + WARN_ON(out_sgs + in_sgs != total_sgs); + + for (i = 0; i < total_sgs; i++) + sgs[i] = &sg[i]; + + spin_lock(&fsvq->lock); + + if (!fsvq->connected) { + spin_unlock(&fsvq->lock); + ret = -ENOTCONN; + goto out; + } + + vq = fsvq->vq; + ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC); + if (ret < 0) { + spin_unlock(&fsvq->lock); + goto out; + } + + fsvq->in_flight++; + notify = virtqueue_kick_prepare(vq); + + spin_unlock(&fsvq->lock); + + if (notify) + virtqueue_notify(vq); + +out: + if (ret < 0 && req->argbuf) { + kfree(req->argbuf); + req->argbuf = NULL; + } + if (sgs != stack_sgs) { + kfree(sgs); + kfree(sg); + } + + return ret; +} + +static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq) +__releases(fiq->lock) +{ + unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */ + struct virtio_fs *fs; + struct fuse_conn *fc; + struct fuse_req *req; + struct fuse_pqueue *fpq; + int ret; + + WARN_ON(list_empty(&fiq->pending)); + req = list_last_entry(&fiq->pending, struct fuse_req, list); + clear_bit(FR_PENDING, &req->flags); + list_del_init(&req->list); + WARN_ON(!list_empty(&fiq->pending)); + spin_unlock(&fiq->lock); + + fs = fiq->priv; + fc = fs->vqs[queue_id].fud->fc; + + pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n", + __func__, req->in.h.opcode, req->in.h.unique, + req->in.h.nodeid, req->in.h.len, + fuse_len_args(req->args->out_numargs, req->args->out_args)); + + fpq = &fs->vqs[queue_id].fud->pq; + spin_lock(&fpq->lock); + if (!fpq->connected) { + spin_unlock(&fpq->lock); + req->out.h.error = -ENODEV; + pr_err("virtio-fs: %s disconnected\n", __func__); + fuse_request_end(fc, req); + return; + } + list_add_tail(&req->list, fpq->processing); + spin_unlock(&fpq->lock); + set_bit(FR_SENT, &req->flags); + /* matches barrier in request_wait_answer() */ + smp_mb__after_atomic(); + +retry: + ret = virtio_fs_enqueue_req(&fs->vqs[queue_id], req); + if (ret < 0) { + if (ret == -ENOMEM || ret == -ENOSPC) { + /* Virtqueue full. Retry submission */ + /* TODO use completion instead of timeout */ + usleep_range(20, 30); + goto retry; + } + req->out.h.error = ret; + pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret); + spin_lock(&fpq->lock); + clear_bit(FR_SENT, &req->flags); + list_del_init(&req->list); + spin_unlock(&fpq->lock); + fuse_request_end(fc, req); + return; + } +} + +const static struct fuse_iqueue_ops virtio_fs_fiq_ops = { + .wake_forget_and_unlock = virtio_fs_wake_forget_and_unlock, + .wake_interrupt_and_unlock = virtio_fs_wake_interrupt_and_unlock, + .wake_pending_and_unlock = virtio_fs_wake_pending_and_unlock, + .release = virtio_fs_fiq_release, +}; + +static int virtio_fs_fill_super(struct super_block *sb) +{ + struct fuse_conn *fc = get_fuse_conn_super(sb); + struct virtio_fs *fs = fc->iq.priv; + unsigned int i; + int err; + struct fuse_fs_context ctx = { + .rootmode = S_IFDIR, + .default_permissions = 1, + .allow_other = 1, + .max_read = UINT_MAX, + .blksize = 512, + .destroy = true, + .no_control = true, + .no_force_umount = true, + }; + + mutex_lock(&virtio_fs_mutex); + + /* After holding mutex, make sure virtiofs device is still there. + * Though we are holding a reference to it, drive ->remove might + * still have cleaned up virtual queues. In that case bail out. + */ + err = -EINVAL; + if (list_empty(&fs->list)) { + pr_info("virtio-fs: tag <%s> not found\n", fs->tag); + goto err; + } + + err = -ENOMEM; + /* Allocate fuse_dev for hiprio and notification queues */ + for (i = 0; i < VQ_REQUEST; i++) { + struct virtio_fs_vq *fsvq = &fs->vqs[i]; + + fsvq->fud = fuse_dev_alloc(); + if (!fsvq->fud) + goto err_free_fuse_devs; + } + + ctx.fudptr = (void **)&fs->vqs[VQ_REQUEST].fud; + err = fuse_fill_super_common(sb, &ctx); + if (err < 0) + goto err_free_fuse_devs; + + fc = fs->vqs[VQ_REQUEST].fud->fc; + + for (i = 0; i < fs->nvqs; i++) { + struct virtio_fs_vq *fsvq = &fs->vqs[i]; + + if (i == VQ_REQUEST) + continue; /* already initialized */ + fuse_dev_install(fsvq->fud, fc); + } + + /* Previous unmount will stop all queues. Start these again */ + virtio_fs_start_all_queues(fs); + fuse_send_init(fc); + mutex_unlock(&virtio_fs_mutex); + return 0; + +err_free_fuse_devs: + virtio_fs_free_devs(fs); +err: + mutex_unlock(&virtio_fs_mutex); + return err; +} + +static void virtio_kill_sb(struct super_block *sb) +{ + struct fuse_conn *fc = get_fuse_conn_super(sb); + struct virtio_fs *vfs; + struct virtio_fs_vq *fsvq; + + /* If mount failed, we can still be called without any fc */ + if (!fc) + return fuse_kill_sb_anon(sb); + + vfs = fc->iq.priv; + fsvq = &vfs->vqs[VQ_HIPRIO]; + + /* Stop forget queue. Soon destroy will be sent */ + spin_lock(&fsvq->lock); + fsvq->connected = false; + spin_unlock(&fsvq->lock); + virtio_fs_drain_all_queues(vfs); + + fuse_kill_sb_anon(sb); + + /* fuse_kill_sb_anon() must have sent destroy. Stop all queues + * and drain one more time and free fuse devices. Freeing fuse + * devices will drop their reference on fuse_conn and that in + * turn will drop its reference on virtio_fs object. + */ + virtio_fs_stop_all_queues(vfs); + virtio_fs_drain_all_queues(vfs); + virtio_fs_free_devs(vfs); +} + +static int virtio_fs_test_super(struct super_block *sb, + struct fs_context *fsc) +{ + struct fuse_conn *fc = fsc->s_fs_info; + + return fc->iq.priv == get_fuse_conn_super(sb)->iq.priv; +} + +static int virtio_fs_set_super(struct super_block *sb, + struct fs_context *fsc) +{ + int err; + + err = get_anon_bdev(&sb->s_dev); + if (!err) + fuse_conn_get(fsc->s_fs_info); + + return err; +} + +static int virtio_fs_get_tree(struct fs_context *fsc) +{ + struct virtio_fs *fs; + struct super_block *sb; + struct fuse_conn *fc; + int err; + + /* This gets a reference on virtio_fs object. This ptr gets installed + * in fc->iq->priv. Once fuse_conn is going away, it calls ->put() + * to drop the reference to this object. + */ + fs = virtio_fs_find_instance(fsc->source); + if (!fs) { + pr_info("virtio-fs: tag <%s> not found\n", fsc->source); + return -EINVAL; + } + + fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL); + if (!fc) { + mutex_lock(&virtio_fs_mutex); + virtio_fs_put(fs); + mutex_unlock(&virtio_fs_mutex); + return -ENOMEM; + } + + fuse_conn_init(fc, get_user_ns(current_user_ns()), &virtio_fs_fiq_ops, + fs); + fc->release = fuse_free_conn; + fc->delete_stale = true; + + fsc->s_fs_info = fc; + sb = sget_fc(fsc, virtio_fs_test_super, virtio_fs_set_super); + fuse_conn_put(fc); + if (IS_ERR(sb)) + return PTR_ERR(sb); + + if (!sb->s_root) { + err = virtio_fs_fill_super(sb); + if (err) { + deactivate_locked_super(sb); + return err; + } + + sb->s_flags |= SB_ACTIVE; + } + + WARN_ON(fsc->root); + fsc->root = dget(sb->s_root); + return 0; +} + +static const struct fs_context_operations virtio_fs_context_ops = { + .get_tree = virtio_fs_get_tree, +}; + +static int virtio_fs_init_fs_context(struct fs_context *fsc) +{ + fsc->ops = &virtio_fs_context_ops; + return 0; +} + +static struct file_system_type virtio_fs_type = { + .owner = THIS_MODULE, + .name = "virtiofs", + .init_fs_context = virtio_fs_init_fs_context, + .kill_sb = virtio_kill_sb, +}; + +static int __init virtio_fs_init(void) +{ + int ret; + + ret = register_virtio_driver(&virtio_fs_driver); + if (ret < 0) + return ret; + + ret = register_filesystem(&virtio_fs_type); + if (ret < 0) { + unregister_virtio_driver(&virtio_fs_driver); + return ret; + } + + return 0; +} +module_init(virtio_fs_init); + +static void __exit virtio_fs_exit(void) +{ + unregister_filesystem(&virtio_fs_type); + unregister_virtio_driver(&virtio_fs_driver); +} +module_exit(virtio_fs_exit); + +MODULE_AUTHOR("Stefan Hajnoczi "); +MODULE_DESCRIPTION("Virtio Filesystem"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_FS(KBUILD_MODNAME); +MODULE_DEVICE_TABLE(virtio, id_table); diff --git a/fs/io_uring.c b/fs/io_uring.c index dd094b387cab..aa8ac557493c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2768,6 +2768,38 @@ out: return submit; } +struct io_wait_queue { + struct wait_queue_entry wq; + struct io_ring_ctx *ctx; + unsigned to_wait; + unsigned nr_timeouts; +}; + +static inline bool io_should_wake(struct io_wait_queue *iowq) +{ + struct io_ring_ctx *ctx = iowq->ctx; + + /* + * Wake up if we have enough events, or if a timeout occured since we + * started waiting. For timeouts, we always want to return to userspace, + * regardless of event count. + */ + return io_cqring_events(ctx->rings) >= iowq->to_wait || + atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts; +} + +static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode, + int wake_flags, void *key) +{ + struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue, + wq); + + if (!io_should_wake(iowq)) + return -1; + + return autoremove_wake_function(curr, mode, wake_flags, key); +} + /* * Wait until events become available, if we don't already have some. The * application must reap them itself, as they reside on the shared cq ring. @@ -2775,8 +2807,16 @@ out: static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, const sigset_t __user *sig, size_t sigsz) { + struct io_wait_queue iowq = { + .wq = { + .private = current, + .func = io_wake_function, + .entry = LIST_HEAD_INIT(iowq.wq.entry), + }, + .ctx = ctx, + .to_wait = min_events, + }; struct io_rings *rings = ctx->rings; - unsigned nr_timeouts; int ret; if (io_cqring_events(rings) >= min_events) @@ -2795,15 +2835,21 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, return ret; } - nr_timeouts = atomic_read(&ctx->cq_timeouts); - /* - * Return if we have enough events, or if a timeout occured since - * we started waiting. For timeouts, we always want to return to - * userspace. - */ - ret = wait_event_interruptible(ctx->wait, - io_cqring_events(rings) >= min_events || - atomic_read(&ctx->cq_timeouts) != nr_timeouts); + ret = 0; + iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts); + do { + prepare_to_wait_exclusive(&ctx->wait, &iowq.wq, + TASK_INTERRUPTIBLE); + if (io_should_wake(&iowq)) + break; + schedule(); + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + } while (1); + finish_wait(&ctx->wait, &iowq.wq); + restore_saved_sigmask_unless(ret == -ERESTARTSYS); if (ret == -ERESTARTSYS) ret = -EINTR; @@ -3455,7 +3501,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait) if (READ_ONCE(ctx->rings->sq.tail) - ctx->cached_sq_head != ctx->rings->sq_ring_entries) mask |= EPOLLOUT | EPOLLWRNORM; - if (READ_ONCE(ctx->rings->sq.head) != ctx->cached_cq_tail) + if (READ_ONCE(ctx->rings->cq.head) != ctx->cached_cq_tail) mask |= EPOLLIN | EPOLLRDNORM; return mask; diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 7d14c11bdc0a..408b6f4e63c0 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -285,12 +285,12 @@ struct drm_crtc_state { u32 target_vblank; /** - * @pageflip_flags: + * @async_flip: * - * DRM_MODE_PAGE_FLIP_* flags, as passed to the page flip ioctl. - * Zero in any other case. + * This is set when DRM_MODE_PAGE_FLIP_ASYNC is set in the legacy + * PAGE_FLIP IOCTL. It's not wired up for the atomic IOCTL itself yet. */ - u32 pageflip_flags; + bool async_flip; /** * @vrr_enabled: @@ -1108,7 +1108,7 @@ struct drm_crtc { /** * @self_refresh_data: Holds the state for the self refresh helpers * - * Initialized via drm_self_refresh_helper_register(). + * Initialized via drm_self_refresh_helper_init(). */ struct drm_self_refresh_data *self_refresh_data; }; diff --git a/include/drm/drm_self_refresh_helper.h b/include/drm/drm_self_refresh_helper.h index 397a583ccca7..5b79d253fb46 100644 --- a/include/drm/drm_self_refresh_helper.h +++ b/include/drm/drm_self_refresh_helper.h @@ -12,9 +12,9 @@ struct drm_atomic_state; struct drm_crtc; void drm_self_refresh_helper_alter_state(struct drm_atomic_state *state); +void drm_self_refresh_helper_update_avg_times(struct drm_atomic_state *state, + unsigned int commit_time_ms); -int drm_self_refresh_helper_init(struct drm_crtc *crtc, - unsigned int entry_delay_ms); - +int drm_self_refresh_helper_init(struct drm_crtc *crtc); void drm_self_refresh_helper_cleanup(struct drm_crtc *crtc); #endif diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 88dc0c653925..d0633ebdaa9c 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -201,12 +201,14 @@ enum cpuhp_smt_control { extern enum cpuhp_smt_control cpu_smt_control; extern void cpu_smt_disable(bool force); extern void cpu_smt_check_topology(void); +extern bool cpu_smt_possible(void); extern int cpuhp_smt_enable(void); extern int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval); #else # define cpu_smt_control (CPU_SMT_NOT_IMPLEMENTED) static inline void cpu_smt_disable(bool force) { } static inline void cpu_smt_check_topology(void) { } +static inline bool cpu_smt_possible(void) { return false; } static inline int cpuhp_smt_enable(void) { return 0; } static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; } #endif diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 24632a7a7d11..b2c9c460947d 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -262,7 +262,7 @@ struct pwm_ops { int (*capture)(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_capture *result, unsigned long timeout); int (*apply)(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state); + const struct pwm_state *state); void (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_state *state); struct module *owner; @@ -316,7 +316,7 @@ struct pwm_capture { /* PWM user APIs */ struct pwm_device *pwm_request(int pwm_id, const char *label); void pwm_free(struct pwm_device *pwm); -int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state); +int pwm_apply_state(struct pwm_device *pwm, const struct pwm_state *state); int pwm_adjust_config(struct pwm_device *pwm); /** diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index df2e12fb3381..802b0377a49e 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -133,6 +133,8 @@ * * 7.31 * - add FUSE_WRITE_KILL_PRIV flag + * - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING + * - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag */ #ifndef _LINUX_FUSE_H @@ -274,6 +276,7 @@ struct fuse_file_lock { * FUSE_CACHE_SYMLINKS: cache READLINK responses * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request + * FUSE_MAP_ALIGNMENT: map_alignment field is valid */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -301,6 +304,7 @@ struct fuse_file_lock { #define FUSE_CACHE_SYMLINKS (1 << 23) #define FUSE_NO_OPENDIR_SUPPORT (1 << 24) #define FUSE_EXPLICIT_INVAL_DATA (1 << 25) +#define FUSE_MAP_ALIGNMENT (1 << 26) /** * CUSE INIT request/reply flags @@ -422,6 +426,8 @@ enum fuse_opcode { FUSE_RENAME2 = 45, FUSE_LSEEK = 46, FUSE_COPY_FILE_RANGE = 47, + FUSE_SETUPMAPPING = 48, + FUSE_REMOVEMAPPING = 49, /* CUSE specific operations */ CUSE_INIT = 4096, @@ -656,7 +662,7 @@ struct fuse_init_out { uint32_t max_write; uint32_t time_gran; uint16_t max_pages; - uint16_t padding; + uint16_t map_alignment; uint32_t unused[8]; }; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 233efbb1c81c..52641d8ca9e8 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -999,6 +999,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_PTRAUTH_GENERIC 172 #define KVM_CAP_PMU_EVENT_FILTER 173 #define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174 +#define KVM_CAP_HYPERV_DIRECT_TLBFLUSH 175 #ifdef KVM_CAP_IRQ_ROUTING @@ -1145,6 +1146,7 @@ struct kvm_dirty_tlb { #define KVM_REG_S390 0x5000000000000000ULL #define KVM_REG_ARM64 0x6000000000000000ULL #define KVM_REG_MIPS 0x7000000000000000ULL +#define KVM_REG_RISCV 0x8000000000000000ULL #define KVM_REG_SIZE_SHIFT 52 #define KVM_REG_SIZE_MASK 0x00f0000000000000ULL diff --git a/include/uapi/linux/virtio_fs.h b/include/uapi/linux/virtio_fs.h new file mode 100644 index 000000000000..b02eb2ac3d99 --- /dev/null +++ b/include/uapi/linux/virtio_fs.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ + +#ifndef _UAPI_LINUX_VIRTIO_FS_H +#define _UAPI_LINUX_VIRTIO_FS_H + +#include +#include +#include +#include + +struct virtio_fs_config { + /* Filesystem name (UTF-8, not NUL-terminated, padded with NULs) */ + __u8 tag[36]; + + /* Number of request queues */ + __u32 num_request_queues; +} __attribute__((packed)); + +#endif /* _UAPI_LINUX_VIRTIO_FS_H */ diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 348fd0176f75..585e07b27333 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -44,6 +44,7 @@ #define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ #define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ #define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ +#define VIRTIO_ID_FS 26 /* virtio filesystem */ #define VIRTIO_ID_PMEM 27 /* virtio pmem */ #endif /* _LINUX_VIRTIO_IDS_H */ diff --git a/kernel/cpu.c b/kernel/cpu.c index d392e9df3d00..4bc4f6cd5634 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -392,8 +392,7 @@ enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; void __init cpu_smt_disable(bool force) { - if (cpu_smt_control == CPU_SMT_FORCE_DISABLED || - cpu_smt_control == CPU_SMT_NOT_SUPPORTED) + if (!cpu_smt_possible()) return; if (force) { @@ -438,6 +437,14 @@ static inline bool cpu_smt_allowed(unsigned int cpu) */ return !cpumask_test_cpu(cpu, &cpus_booted_once_mask); } + +/* Returns true if SMT is not supported of forcefully (irreversibly) disabled */ +bool cpu_smt_possible(void) +{ + return cpu_smt_control != CPU_SMT_FORCE_DISABLED && + cpu_smt_control != CPU_SMT_NOT_SUPPORTED; +} +EXPORT_SYMBOL_GPL(cpu_smt_possible); #else static inline bool cpu_smt_allowed(unsigned int cpu) { return true; } #endif diff --git a/kernel/events/core.c b/kernel/events/core.c index 39937da6ae6a..4aeb65c98f45 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2244,7 +2244,7 @@ static void __perf_event_disable(struct perf_event *event, * * If event->ctx is a cloned context, callers must make sure that * every task struct that event->ctx->task could possibly point to - * remains valid. This condition is satisifed when called through + * remains valid. This condition is satisfied when called through * perf_event_for_each_child or perf_event_for_each because they * hold the top-level event's child_mutex, so any descendant that * goes to exit will block in perf_event_exit_event(). @@ -6059,7 +6059,7 @@ static void perf_sample_regs_intr(struct perf_regs *regs_intr, * Get remaining task size from user stack pointer. * * It'd be better to take stack vma map and limit this more - * precisly, but there's no way to get it safely under interrupt, + * precisely, but there's no way to get it safely under interrupt, * so using TASK_SIZE as limit. */ static u64 perf_ustack_task_size(struct pt_regs *regs) @@ -6621,7 +6621,7 @@ void perf_prepare_sample(struct perf_event_header *header, if (sample_type & PERF_SAMPLE_STACK_USER) { /* - * Either we need PERF_SAMPLE_STACK_USER bit to be allways + * Either we need PERF_SAMPLE_STACK_USER bit to be always * processed as the last one or have additional check added * in case new sample type is added, because we could eat * up the rest of the sample size. diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 89bab079e7a4..e84d21aa0722 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -269,7 +269,7 @@ pv_wait_early(struct pv_node *prev, int loop) if ((loop & PV_PREV_CHECK_MASK) != 0) return false; - return READ_ONCE(prev->state) != vcpu_running || vcpu_is_preempted(prev->cpu); + return READ_ONCE(prev->state) != vcpu_running; } /* diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 0e315a2e77ae..4820823515e9 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1678,24 +1678,26 @@ void timer_clear_idle(void) static int collect_expired_timers(struct timer_base *base, struct hlist_head *heads) { + unsigned long now = READ_ONCE(jiffies); + /* * NOHZ optimization. After a long idle sleep we need to forward the * base to current jiffies. Avoid a loop by searching the bitfield for * the next expiring timer. */ - if ((long)(jiffies - base->clk) > 2) { + if ((long)(now - base->clk) > 2) { unsigned long next = __next_timer_interrupt(base); /* * If the next timer is ahead of time forward to current * jiffies, otherwise forward to the next expiry time: */ - if (time_after(next, jiffies)) { + if (time_after(next, now)) { /* * The call site will increment base->clk and then * terminate the expiry loop immediately. */ - base->clk = jiffies; + base->clk = now; return 0; } base->clk = next; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index a6697e28ddda..402dc3ce88d3 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -549,10 +549,11 @@ static bool trace_kprobe_has_same_kprobe(struct trace_kprobe *orig, for (i = 0; i < orig->tp.nr_args; i++) { if (strcmp(orig->tp.args[i].comm, comp->tp.args[i].comm)) - continue; + break; } - return true; + if (i == orig->tp.nr_args) + return true; } return false; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 34dd6d0016a3..dd884341f5c5 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -431,10 +431,11 @@ static bool trace_uprobe_has_same_uprobe(struct trace_uprobe *orig, for (i = 0; i < orig->tp.nr_args; i++) { if (strcmp(orig->tp.args[i].comm, comp->tp.args[i].comm)) - continue; + break; } - return true; + if (i == orig->tp.nr_args) + return true; } return false; diff --git a/mm/usercopy.c b/mm/usercopy.c index 98e924864554..660717a1ea5c 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -11,6 +11,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include #include @@ -227,7 +228,12 @@ static inline void check_heap_object(const void *ptr, unsigned long n, if (!virt_addr_valid(ptr)) return; - page = virt_to_head_page(ptr); + /* + * When CONFIG_HIGHMEM=y, kmap_to_page() will give either the + * highmem page or fallback to virt_to_page(). The following + * is effectively a highmem-aware virt_to_head_page(). + */ + page = compound_head(kmap_to_page((void *)ptr)); if (PageSlab(page)) { /* Check slab allocator for flags and size. */ diff --git a/net/9p/client.c b/net/9p/client.c index 9622f3e469f6..1d48afc7033c 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -281,6 +281,7 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size) p9pdu_reset(&req->tc); p9pdu_reset(&req->rc); + req->t_err = 0; req->status = REQ_STATUS_ALLOC; init_waitqueue_head(&req->wq); INIT_LIST_HEAD(&req->req_list); diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 5171b9c7ca3e..0652d3eed9bd 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -231,6 +231,8 @@ #define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */ #define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ #define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ +#define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ +#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ @@ -354,6 +356,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ diff --git a/tools/arch/x86/include/uapi/asm/unistd.h b/tools/arch/x86/include/uapi/asm/unistd.h index 30d7d04d72d6..196fdd02b8b1 100644 --- a/tools/arch/x86/include/uapi/asm/unistd.h +++ b/tools/arch/x86/include/uapi/asm/unistd.h @@ -3,7 +3,7 @@ #define _UAPI_ASM_X86_UNISTD_H /* x32 syscall flag bit */ -#define __X32_SYSCALL_BIT 0x40000000 +#define __X32_SYSCALL_BIT 0x40000000UL #ifndef __KERNEL__ # ifdef __i386__ diff --git a/tools/include/asm/bug.h b/tools/include/asm/bug.h index bbd75ac8b202..550223f0a6e6 100644 --- a/tools/include/asm/bug.h +++ b/tools/include/asm/bug.h @@ -3,6 +3,7 @@ #define _TOOLS_ASM_BUG_H #include +#include #define __WARN_printf(arg...) do { fprintf(stderr, arg); } while (0) diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 1be0e798e362..1fc8faa6e973 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -569,7 +569,7 @@ __SYSCALL(__NR_semget, sys_semget) __SC_COMP(__NR_semctl, sys_semctl, compat_sys_semctl) #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_semtimedop 192 -__SC_COMP(__NR_semtimedop, sys_semtimedop, sys_semtimedop_time32) +__SC_3264(__NR_semtimedop, sys_semtimedop_time32, sys_semtimedop) #endif #define __NR_semop 193 __SYSCALL(__NR_semop, sys_semop) diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 094bb03b9cc2..7da1b37b27aa 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -181,7 +181,7 @@ struct prctl_mm_map { #define PR_GET_THP_DISABLE 42 /* - * Tell the kernel to start/stop helping userspace manage bounds tables. + * No longer implemented, but left here to ensure the numbers stay reserved: */ #define PR_MPX_ENABLE_MANAGEMENT 43 #define PR_MPX_DISABLE_MANAGEMENT 44 @@ -229,4 +229,9 @@ struct prctl_mm_map { # define PR_PAC_APDBKEY (1UL << 3) # define PR_PAC_APGAKEY (1UL << 4) +/* Tagged user address controls for arm64 */ +#define PR_SET_TAGGED_ADDR_CTRL 55 +#define PR_GET_TAGGED_ADDR_CTRL 56 +# define PR_TAGGED_ADDR_ENABLE (1UL << 0) + #endif /* _LINUX_PRCTL_H */ diff --git a/tools/lib/traceevent/Build b/tools/lib/traceevent/Build index ba54bfce0b0b..f9a5d79578f5 100644 --- a/tools/lib/traceevent/Build +++ b/tools/lib/traceevent/Build @@ -6,14 +6,3 @@ libtraceevent-y += parse-utils.o libtraceevent-y += kbuffer-parse.o libtraceevent-y += tep_strerror.o libtraceevent-y += event-parse-api.o - -plugin_jbd2-y += plugin_jbd2.o -plugin_hrtimer-y += plugin_hrtimer.o -plugin_kmem-y += plugin_kmem.o -plugin_kvm-y += plugin_kvm.o -plugin_mac80211-y += plugin_mac80211.o -plugin_sched_switch-y += plugin_sched_switch.o -plugin_function-y += plugin_function.o -plugin_xen-y += plugin_xen.o -plugin_scsi-y += plugin_scsi.o -plugin_cfg80211-y += plugin_cfg80211.o diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt new file mode 100644 index 000000000000..2c6a61811118 --- /dev/null +++ b/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt @@ -0,0 +1,130 @@ +libtraceevent(3) +================ + +NAME +---- +tep_print_event - Writes event information into a trace sequence. + +SYNOPSIS +-------- +[verse] +-- +*#include * +*#include * + +void *tep_print_event*(struct tep_handle pass:[*]_tep_, struct trace_seqpass:[*]_s_, struct tep_record pass:[*]_record_, const char pass:[*]_fmt_, _..._) +-- + +DESCRIPTION +----------- + +The _tep_print_event()_ function parses the event information of the given +_record_ and writes it into the trace sequence _s_, according to the format +string _fmt_. The desired information is specified after the format string. +The _fmt_ is printf-like format string, following arguments are supported: +[verse] +-- + TEP_PRINT_PID, "%d" - PID of the event. + TEP_PRINT_CPU, "%d" - Event CPU. + TEP_PRINT_COMM, "%s" - Event command string. + TEP_PRINT_NAME, "%s" - Event name. + TEP_PRINT_LATENCY, "%s" - Latency of the event. It prints 4 or more + fields - interrupt state, scheduling state, + current context, and preemption count. + Field 1 is the interrupt enabled state: + d : Interrupts are disabled + . : Interrupts are enabled + X : The architecture does not support this + information + Field 2 is the "need resched" state. + N : The task is set to call the scheduler when + possible, as another higher priority task + may need to be scheduled in. + . : The task is not set to call the scheduler. + Field 3 is the context state. + . : Normal context + s : Soft interrupt context + h : Hard interrupt context + H : Hard interrupt context which triggered + during soft interrupt context. + z : NMI context + Z : NMI context which triggered during hard + interrupt context + Field 4 is the preemption count. + . : The preempt count is zero. + On preemptible kernels (where the task can be scheduled + out in arbitrary locations while in kernel context), the + preempt count, when non zero, will prevent the kernel + from scheduling out the current task. The preempt count + number is displayed when it is not zero. + Depending on the kernel, it may show other fields + (lock depth, or migration disabled, which are unique to + specialized kernels). + TEP_PRINT_TIME, %d - event time stamp. A divisor and precision can be + specified as part of this format string: + "%precision.divisord". Example: + "%3.1000d" - divide the time by 1000 and print the first + 3 digits before the dot. Thus, the time stamp + "123456000" will be printed as "123.456" + TEP_PRINT_INFO, "%s" - event information. + TEP_PRINT_INFO_RAW, "%s" - event information, in raw format. + +-- +EXAMPLE +------- +[source,c] +-- +#include +#include +... +struct trace_seq seq; +trace_seq_init(&seq); +struct tep_handle *tep = tep_alloc(); +... +void print_my_event(struct tep_record *record) +{ + trace_seq_reset(&seq); + tep_print_event(tep, s, record, "%16s-%-5d [%03d] %s %6.1000d %s %s", + TEP_PRINT_COMM, TEP_PRINT_PID, TEP_PRINT_CPU, + TEP_PRINT_LATENCY, TEP_PRINT_TIME, TEP_PRINT_NAME, + TEP_PRINT_INFO); +} +... +-- + +FILES +----- +[verse] +-- +*event-parse.h* + Header file to include in order to have access to the library APIs. +*trace-seq.h* + Header file to include in order to have access to trace sequences related APIs. + Trace sequences are used to allow a function to call several other functions + to create a string of data to use. +*-ltraceevent* + Linker switch to add when building a program that uses the library. +-- + +SEE ALSO +-------- +_libtraceevent(3)_, _trace-cmd(1)_ + +AUTHOR +------ +[verse] +-- +*Steven Rostedt* , author of *libtraceevent*. +*Tzvetomir Stoyanov* , author of this man page. +-- +REPORTING BUGS +-------------- +Report bugs to + +LICENSE +------- +libtraceevent is Free Software licensed under the GNU LGPL 2.1 + +RESOURCES +--------- +https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt b/tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt index 38bfea30a5f6..f6aca0df2151 100644 --- a/tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt +++ b/tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt @@ -59,12 +59,12 @@ parser context. The _tep_register_function()_ function registers a function name mapped to an address and (optional) module. This mapping is used in case the function tracer -or events have "%pF" or "%pS" parameter in its format string. It is common to -pass in the kallsyms function names with their corresponding addresses with this +or events have "%pS" parameter in its format string. It is common to pass in +the kallsyms function names with their corresponding addresses with this function. The _tep_ argument is the trace event parser context. The _name_ is -the name of the function, the string is copied internally. The _addr_ is -the start address of the function. The _mod_ is the kernel module -the function may be in (NULL for none). +the name of the function, the string is copied internally. The _addr_ is the +start address of the function. The _mod_ is the kernel module the function may +be in (NULL for none). The _tep_register_print_string()_ function registers a string by the address it was stored in the kernel. Some strings internal to the kernel with static diff --git a/tools/lib/traceevent/Documentation/libtraceevent-handle.txt b/tools/lib/traceevent/Documentation/libtraceevent-handle.txt index 8d568316847d..45b20172e262 100644 --- a/tools/lib/traceevent/Documentation/libtraceevent-handle.txt +++ b/tools/lib/traceevent/Documentation/libtraceevent-handle.txt @@ -3,7 +3,7 @@ libtraceevent(3) NAME ---- -tep_alloc, tep_free,tep_ref, tep_unref,tep_ref_get - Create, destroy, manage +tep_alloc, tep_free,tep_ref, tep_unref,tep_get_ref - Create, destroy, manage references of trace event parser context. SYNOPSIS @@ -16,7 +16,7 @@ struct tep_handle pass:[*]*tep_alloc*(void); void *tep_free*(struct tep_handle pass:[*]_tep_); void *tep_ref*(struct tep_handle pass:[*]_tep_); void *tep_unref*(struct tep_handle pass:[*]_tep_); -int *tep_ref_get*(struct tep_handle pass:[*]_tep_); +int *tep_get_ref*(struct tep_handle pass:[*]_tep_); -- DESCRIPTION @@ -57,9 +57,9 @@ EXAMPLE ... struct tep_handle *tep = tep_alloc(); ... -int ref = tep_ref_get(tep); +int ref = tep_get_ref(tep); tep_ref(tep); -if ( (ref+1) != tep_ref_get(tep)) { +if ( (ref+1) != tep_get_ref(tep)) { /* Something wrong happened, the counter is not incremented by 1 */ } tep_unref(tep); diff --git a/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt b/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt new file mode 100644 index 000000000000..596032ade31f --- /dev/null +++ b/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt @@ -0,0 +1,99 @@ +libtraceevent(3) +================ + +NAME +---- +tep_load_plugins, tep_unload_plugins - Load / unload traceevent plugins. + +SYNOPSIS +-------- +[verse] +-- +*#include * + +struct tep_plugin_list pass:[*]*tep_load_plugins*(struct tep_handle pass:[*]_tep_); +void *tep_unload_plugins*(struct tep_plugin_list pass:[*]_plugin_list_, struct tep_handle pass:[*]_tep_); +-- + +DESCRIPTION +----------- +The _tep_load_plugins()_ function loads all plugins, located in the plugin +directories. The _tep_ argument is trace event parser context. +The plugin directories are : +[verse] +-- + - System's plugin directory, defined at the library compile time. It + depends on the library installation prefix and usually is + _(install_preffix)/lib/traceevent/plugins_ + - Directory, defined by the environment variable _TRACEEVENT_PLUGIN_DIR_ + - User's plugin directory, located at _~/.local/lib/traceevent/plugins_ +-- +Loading of plugins can be controlled by the _tep_flags_, using the +_tep_set_flag()_ API: +[verse] +-- + _TEP_DISABLE_SYS_PLUGINS_ - do not load plugins, located in + the system's plugin directory. + _TEP_DISABLE_PLUGINS_ - do not load any plugins. +-- +The _tep_set_flag()_ API needs to be called before _tep_load_plugins()_, if +loading of all plugins is not the desired case. + +The _tep_unload_plugins()_ function unloads the plugins, previously loaded by +_tep_load_plugins()_. The _tep_ argument is trace event parser context. The +_plugin_list_ is the list of loaded plugins, returned by +the _tep_load_plugins()_ function. + +RETURN VALUE +------------ +The _tep_load_plugins()_ function returns a list of successfully loaded plugins, +or NULL in case no plugins are loaded. + +EXAMPLE +------- +[source,c] +-- +#include +... +struct tep_handle *tep = tep_alloc(); +... +struct tep_plugin_list *plugins = tep_load_plugins(tep); +if (plugins == NULL) { + /* no plugins are loaded */ +} +... +tep_unload_plugins(plugins, tep); +-- + +FILES +----- +[verse] +-- +*event-parse.h* + Header file to include in order to have access to the library APIs. +*-ltraceevent* + Linker switch to add when building a program that uses the library. +-- + +SEE ALSO +-------- +_libtraceevent(3)_, _trace-cmd(1)_, _tep_set_flag(3)_ + +AUTHOR +------ +[verse] +-- +*Steven Rostedt* , author of *libtraceevent*. +*Tzvetomir Stoyanov* , author of this man page. +-- +REPORTING BUGS +-------------- +Report bugs to + +LICENSE +------- +libtraceevent is Free Software licensed under the GNU LGPL 2.1 + +RESOURCES +--------- +https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent.txt b/tools/lib/traceevent/Documentation/libtraceevent.txt index fbd977b47de1..d530a7ce8fb2 100644 --- a/tools/lib/traceevent/Documentation/libtraceevent.txt +++ b/tools/lib/traceevent/Documentation/libtraceevent.txt @@ -16,7 +16,7 @@ Management of tep handler data structure and access of its members: void *tep_free*(struct tep_handle pass:[*]_tep_); void *tep_ref*(struct tep_handle pass:[*]_tep_); void *tep_unref*(struct tep_handle pass:[*]_tep_); - int *tep_ref_get*(struct tep_handle pass:[*]_tep_); + int *tep_get_ref*(struct tep_handle pass:[*]_tep_); void *tep_set_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_); void *tep_clear_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_); bool *tep_test_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flags_); @@ -26,15 +26,12 @@ Management of tep handler data structure and access of its members: void *tep_set_long_size*(struct tep_handle pass:[*]_tep_, int _long_size_); int *tep_get_page_size*(struct tep_handle pass:[*]_tep_); void *tep_set_page_size*(struct tep_handle pass:[*]_tep_, int _page_size_); - bool *tep_is_latency_format*(struct tep_handle pass:[*]_tep_); - void *tep_set_latency_format*(struct tep_handle pass:[*]_tep_, int _lat_); int *tep_get_header_page_size*(struct tep_handle pass:[*]_tep_); int *tep_get_header_timestamp_size*(struct tep_handle pass:[*]_tep_); bool *tep_is_old_format*(struct tep_handle pass:[*]_tep_); int *tep_strerror*(struct tep_handle pass:[*]_tep_, enum tep_errno _errnum_, char pass:[*]_buf_, size_t _buflen_); Register / unregister APIs: - int *tep_register_trace_clock*(struct tep_handle pass:[*]_tep_, const char pass:[*]_trace_clock_); int *tep_register_function*(struct tep_handle pass:[*]_tep_, char pass:[*]_name_, unsigned long long _addr_, char pass:[*]_mod_); int *tep_register_event_handler*(struct tep_handle pass:[*]_tep_, int _id_, const char pass:[*]_sys_name_, const char pass:[*]_event_name_, tep_event_handler_func _func_, void pass:[*]_context_); int *tep_unregister_event_handler*(struct tep_handle pass:[*]tep, int id, const char pass:[*]sys_name, const char pass:[*]event_name, tep_event_handler_func func, void pass:[*]_context_); @@ -57,14 +54,7 @@ Event related APIs: int *tep_get_events_count*(struct tep_handle pass:[*]_tep_); struct tep_event pass:[*]pass:[*]*tep_list_events*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_); struct tep_event pass:[*]pass:[*]*tep_list_events_copy*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_); - -Event printing: - void *tep_print_event*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_record pass:[*]_record_, bool _use_trace_clock_); - void *tep_print_event_data*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, struct tep_record pass:[*]_record_); - void *tep_event_info*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, struct tep_record pass:[*]_record_); - void *tep_print_event_task*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, struct tep_record pass:[*]_record_); - void *tep_print_event_time*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, struct tep_record pass:[*]record, bool _use_trace_clock_); - void *tep_set_print_raw*(struct tep_handle pass:[*]_tep_, int _print_raw_); + void *tep_print_event*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_record pass:[*]_record_, const char pass:[*]_fmt_, _..._); Event finding: struct tep_event pass:[*]*tep_find_event*(struct tep_handle pass:[*]_tep_, int _id_); @@ -116,7 +106,6 @@ Filter management: int *tep_filter_compare*(struct tep_event_filter pass:[*]_filter1_, struct tep_event_filter pass:[*]_filter2_); Parsing various data from the records: - void *tep_data_latency_format*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_record pass:[*]_record_); int *tep_data_type*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); int *tep_data_pid*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); int *tep_data_preempt_count*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index a39cdd0d890d..5315f3787f8d 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -58,30 +58,6 @@ export man_dir man_dir_SQ INSTALL export DESTDIR DESTDIR_SQ export EVENT_PARSE_VERSION -set_plugin_dir := 1 - -# Set plugin_dir to preffered global plugin location -# If we install under $HOME directory we go under -# $(HOME)/.local/lib/traceevent/plugins -# -# We dont set PLUGIN_DIR in case we install under $HOME -# directory, because by default the code looks under: -# $(HOME)/.local/lib/traceevent/plugins by default. -# -ifeq ($(plugin_dir),) -ifeq ($(prefix),$(HOME)) -override plugin_dir = $(HOME)/.local/lib/traceevent/plugins -set_plugin_dir := 0 -else -override plugin_dir = $(libdir)/traceevent/plugins -endif -endif - -ifeq ($(set_plugin_dir),1) -PLUGIN_DIR = -DPLUGIN_DIR="$(plugin_dir)" -PLUGIN_DIR_SQ = '$(subst ','\'',$(PLUGIN_DIR))' -endif - include ../../scripts/Makefile.include # copy a bit from Linux kbuild @@ -105,7 +81,6 @@ export prefix libdir src obj # Shell quotes libdir_SQ = $(subst ','\'',$(libdir)) libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) -plugin_dir_SQ = $(subst ','\'',$(plugin_dir)) CONFIG_INCLUDES = CONFIG_LIBS = @@ -151,29 +126,14 @@ MAKEOVERRIDES= export srctree OUTPUT CC LD CFLAGS V build := -f $(srctree)/tools/build/Makefile.build dir=. obj -PLUGINS = plugin_jbd2.so -PLUGINS += plugin_hrtimer.so -PLUGINS += plugin_kmem.so -PLUGINS += plugin_kvm.so -PLUGINS += plugin_mac80211.so -PLUGINS += plugin_sched_switch.so -PLUGINS += plugin_function.so -PLUGINS += plugin_xen.so -PLUGINS += plugin_scsi.so -PLUGINS += plugin_cfg80211.so - -PLUGINS := $(addprefix $(OUTPUT),$(PLUGINS)) -PLUGINS_IN := $(PLUGINS:.so=-in.o) - TE_IN := $(OUTPUT)libtraceevent-in.o LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET)) -DYNAMIC_LIST_FILE := $(OUTPUT)libtraceevent-dynamic-list -CMD_TARGETS = $(LIB_TARGET) $(PLUGINS) $(DYNAMIC_LIST_FILE) +CMD_TARGETS = $(LIB_TARGET) TARGETS = $(CMD_TARGETS) -all: all_cmd +all: all_cmd plugins all_cmd: $(CMD_TARGETS) @@ -188,17 +148,6 @@ $(OUTPUT)libtraceevent.so.$(EVENT_PARSE_VERSION): $(TE_IN) $(OUTPUT)libtraceevent.a: $(TE_IN) $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ -$(OUTPUT)libtraceevent-dynamic-list: $(PLUGINS) - $(QUIET_GEN)$(call do_generate_dynamic_list_file, $(PLUGINS), $@) - -plugins: $(PLUGINS) - -__plugin_obj = $(notdir $@) - plugin_obj = $(__plugin_obj:-in.o=) - -$(PLUGINS_IN): force - $(Q)$(MAKE) $(build)=$(plugin_obj) - $(OUTPUT)%.so: $(OUTPUT)%-in.o $(QUIET_LINK)$(CC) $(CFLAGS) -shared $(LDFLAGS) -nostartfiles -o $@ $^ @@ -258,25 +207,6 @@ define do_install $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2' endef -define do_install_plugins - for plugin in $1; do \ - $(call do_install,$$plugin,$(plugin_dir_SQ)); \ - done -endef - -define do_generate_dynamic_list_file - symbol_type=`$(NM) -u -D $1 | awk 'NF>1 {print $$1}' | \ - xargs echo "U w W" | tr 'w ' 'W\n' | sort -u | xargs echo`;\ - if [ "$$symbol_type" = "U W" ];then \ - (echo '{'; \ - $(NM) -u -D $1 | awk 'NF>1 {print "\t"$$2";"}' | sort -u;\ - echo '};'; \ - ) > $2; \ - else \ - (echo Either missing one of [$1] or bad version of $(NM)) 1>&2;\ - fi -endef - PKG_CONFIG_FILE = libtraceevent.pc define do_install_pkgconfig_file if [ -n "${pkgconfig_dir}" ]; then \ @@ -296,10 +226,6 @@ install_lib: all_cmd install_plugins install_headers install_pkgconfig $(call do_install_mkdir,$(libdir_SQ)); \ cp -fpR $(LIB_INSTALL) $(DESTDIR)$(libdir_SQ) -install_plugins: $(PLUGINS) - $(call QUIET_INSTALL, trace_plugins) \ - $(call do_install_plugins, $(PLUGINS)) - install_pkgconfig: $(call QUIET_INSTALL, $(PKG_CONFIG_FILE)) \ $(call do_install_pkgconfig_file,$(prefix)) @@ -313,7 +239,7 @@ install_headers: install: install_lib -clean: +clean: clean_plugins $(call QUIET_CLEAN, libtraceevent) \ $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd; \ $(RM) TRACEEVENT-CFLAGS tags TAGS; \ @@ -351,7 +277,19 @@ help: @echo ' doc-install - install the man pages' @echo ' doc-uninstall - uninstall the man pages' @echo'' -PHONY += force plugins + +PHONY += plugins +plugins: + $(call descend,plugins) + +PHONY += install_plugins +install_plugins: + $(call descend,plugins,install) + +PHONY += clean_plugins +clean_plugins: + $(call descend,plugins,clean) + force: # Declare the contents of the .PHONY variable as phony. We keep that diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index bb22238debfe..d948475585ce 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -4367,10 +4367,20 @@ static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, s switch (*ptr) { case 's': case 'S': - case 'f': - case 'F': case 'x': break; + case 'f': + case 'F': + /* + * Pre-5.5 kernels use %pf and + * %pF for printing symbols + * while kernels since 5.5 use + * %pfw for fwnodes. So check + * %p[fF] isn't followed by 'w'. + */ + if (ptr[1] != 'w') + break; + /* fall through */ default: /* * Older kernels do not process @@ -4487,12 +4497,12 @@ get_bprint_format(void *data, int size __maybe_unused, printk = find_printk(tep, addr); if (!printk) { - if (asprintf(&format, "%%pf: (NO FORMAT FOUND at %llx)\n", addr) < 0) + if (asprintf(&format, "%%ps: (NO FORMAT FOUND at %llx)\n", addr) < 0) return NULL; return format; } - if (asprintf(&format, "%s: %s", "%pf", printk->printk) < 0) + if (asprintf(&format, "%s: %s", "%ps", printk->printk) < 0) return NULL; return format; @@ -5517,8 +5527,10 @@ static void print_event_time(struct tep_handle *tep, struct trace_seq *s, if (divstr && isdigit(*(divstr + 1))) div = atoi(divstr + 1); time = record->ts; - if (div) + if (div) { + time += div / 2; time /= div; + } pr = prec; while (pr--) p10 *= 10; diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index d438ee44289f..b77837f75a0d 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -441,6 +441,8 @@ int tep_register_print_string(struct tep_handle *tep, const char *fmt, unsigned long long addr); bool tep_is_pid_registered(struct tep_handle *tep, int pid); +struct tep_event *tep_get_event(struct tep_handle *tep, int index); + #define TEP_PRINT_INFO "INFO" #define TEP_PRINT_INFO_RAW "INFO_RAW" #define TEP_PRINT_COMM "COMM" diff --git a/tools/lib/traceevent/plugins/Build b/tools/lib/traceevent/plugins/Build new file mode 100644 index 000000000000..210d26910613 --- /dev/null +++ b/tools/lib/traceevent/plugins/Build @@ -0,0 +1,10 @@ +plugin_jbd2-y += plugin_jbd2.o +plugin_hrtimer-y += plugin_hrtimer.o +plugin_kmem-y += plugin_kmem.o +plugin_kvm-y += plugin_kvm.o +plugin_mac80211-y += plugin_mac80211.o +plugin_sched_switch-y += plugin_sched_switch.o +plugin_function-y += plugin_function.o +plugin_xen-y += plugin_xen.o +plugin_scsi-y += plugin_scsi.o +plugin_cfg80211-y += plugin_cfg80211.o diff --git a/tools/lib/traceevent/plugins/Makefile b/tools/lib/traceevent/plugins/Makefile new file mode 100644 index 000000000000..f440989fa55e --- /dev/null +++ b/tools/lib/traceevent/plugins/Makefile @@ -0,0 +1,222 @@ +# SPDX-License-Identifier: GPL-2.0 + +#MAKEFLAGS += --no-print-directory + + +# Makefiles suck: This macro sets a default value of $(2) for the +# variable named by $(1), unless the variable has been set by +# environment or command line. This is necessary for CC and AR +# because make sets default values, so the simpler ?= approach +# won't work as expected. +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. +$(call allow-override,CC,$(CROSS_COMPILE)gcc) +$(call allow-override,AR,$(CROSS_COMPILE)ar) +$(call allow-override,NM,$(CROSS_COMPILE)nm) +$(call allow-override,PKG_CONFIG,pkg-config) + +EXT = -std=gnu99 +INSTALL = install + +# Use DESTDIR for installing into a different root directory. +# This is useful for building a package. The program will be +# installed in this directory as if it was the root directory. +# Then the build tool can move it later. +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + +LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1) +ifeq ($(LP64), 1) + libdir_relative = lib64 +else + libdir_relative = lib +endif + +prefix ?= /usr/local +libdir = $(prefix)/$(libdir_relative) + +set_plugin_dir := 1 + +# Set plugin_dir to preffered global plugin location +# If we install under $HOME directory we go under +# $(HOME)/.local/lib/traceevent/plugins +# +# We dont set PLUGIN_DIR in case we install under $HOME +# directory, because by default the code looks under: +# $(HOME)/.local/lib/traceevent/plugins by default. +# +ifeq ($(plugin_dir),) +ifeq ($(prefix),$(HOME)) +override plugin_dir = $(HOME)/.local/lib/traceevent/plugins +set_plugin_dir := 0 +else +override plugin_dir = $(libdir)/traceevent/plugins +endif +endif + +ifeq ($(set_plugin_dir),1) +PLUGIN_DIR = -DPLUGIN_DIR="$(plugin_dir)" +PLUGIN_DIR_SQ = '$(subst ','\'',$(PLUGIN_DIR))' +endif + +include ../../../scripts/Makefile.include + +# copy a bit from Linux kbuild + +ifeq ("$(origin V)", "command line") + VERBOSE = $(V) +endif +ifndef VERBOSE + VERBOSE = 0 +endif + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) +endif + +export prefix libdir src obj + +# Shell quotes +plugin_dir_SQ = $(subst ','\'',$(plugin_dir)) + +CONFIG_INCLUDES = +CONFIG_LIBS = +CONFIG_FLAGS = + +OBJ = $@ +N = + +INCLUDES = -I. -I.. -I $(srctree)/tools/include $(CONFIG_INCLUDES) + +# Set compile option CFLAGS +ifdef EXTRA_CFLAGS + CFLAGS := $(EXTRA_CFLAGS) +else + CFLAGS := -g -Wall +endif + +# Append required CFLAGS +override CFLAGS += -fPIC +override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ) +override CFLAGS += $(udis86-flags) -D_GNU_SOURCE + +ifeq ($(VERBOSE),1) + Q = +else + Q = @ +endif + +# Disable command line variables (CFLAGS) override from top +# level Makefile (perf), otherwise build Makefile will get +# the same command line setup. +MAKEOVERRIDES= + +export srctree OUTPUT CC LD CFLAGS V + +build := -f $(srctree)/tools/build/Makefile.build dir=. obj + +DYNAMIC_LIST_FILE := $(OUTPUT)libtraceevent-dynamic-list + +PLUGINS = plugin_jbd2.so +PLUGINS += plugin_hrtimer.so +PLUGINS += plugin_kmem.so +PLUGINS += plugin_kvm.so +PLUGINS += plugin_mac80211.so +PLUGINS += plugin_sched_switch.so +PLUGINS += plugin_function.so +PLUGINS += plugin_xen.so +PLUGINS += plugin_scsi.so +PLUGINS += plugin_cfg80211.so + +PLUGINS := $(addprefix $(OUTPUT),$(PLUGINS)) +PLUGINS_IN := $(PLUGINS:.so=-in.o) + +plugins: $(PLUGINS) $(DYNAMIC_LIST_FILE) + +__plugin_obj = $(notdir $@) + plugin_obj = $(__plugin_obj:-in.o=) + +$(PLUGINS_IN): force + $(Q)$(MAKE) $(build)=$(plugin_obj) + +$(OUTPUT)libtraceevent-dynamic-list: $(PLUGINS) + $(QUIET_GEN)$(call do_generate_dynamic_list_file, $(PLUGINS), $@) + +$(OUTPUT)%.so: $(OUTPUT)%-in.o + $(QUIET_LINK)$(CC) $(CFLAGS) -shared $(LDFLAGS) -nostartfiles -o $@ $^ + +define update_dir + (echo $1 > $@.tmp; \ + if [ -r $@ ] && cmp -s $@ $@.tmp; then \ + rm -f $@.tmp; \ + else \ + echo ' UPDATE $@'; \ + mv -f $@.tmp $@; \ + fi); +endef + +tags: force + $(RM) tags + find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \ + --regex-c++='/_PE\(([^,)]*).*/TEP_ERRNO__\1/' + +TAGS: force + $(RM) TAGS + find . -name '*.[ch]' | xargs etags \ + --regex='/_PE(\([^,)]*\).*/TEP_ERRNO__\1/' + +define do_install_mkdir + if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ + fi +endef + +define do_install + $(call do_install_mkdir,$2); \ + $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2' +endef + +define do_install_plugins + for plugin in $1; do \ + $(call do_install,$$plugin,$(plugin_dir_SQ)); \ + done +endef + +define do_generate_dynamic_list_file + symbol_type=`$(NM) -u -D $1 | awk 'NF>1 {print $$1}' | \ + xargs echo "U w W" | tr 'w ' 'W\n' | sort -u | xargs echo`;\ + if [ "$$symbol_type" = "U W" ];then \ + (echo '{'; \ + $(NM) -u -D $1 | awk 'NF>1 {print "\t"$$2";"}' | sort -u;\ + echo '};'; \ + ) > $2; \ + else \ + (echo Either missing one of [$1] or bad version of $(NM)) 1>&2;\ + fi +endef + +install: $(PLUGINS) + $(call QUIET_INSTALL, trace_plugins) \ + $(call do_install_plugins, $(PLUGINS)) + +clean: + $(call QUIET_CLEAN, trace_plugins) \ + $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd; \ + $(RM) $(OUTPUT)libtraceevent-dynamic-list \ + $(RM) TRACEEVENT-CFLAGS tags TAGS; + +PHONY += force plugins +force: + +# Declare the contents of the .PHONY variable as phony. We keep that +# information in a variable so we can use it in if_changed and friends. +.PHONY: $(PHONY) diff --git a/tools/lib/traceevent/plugin_cfg80211.c b/tools/lib/traceevent/plugins/plugin_cfg80211.c similarity index 100% rename from tools/lib/traceevent/plugin_cfg80211.c rename to tools/lib/traceevent/plugins/plugin_cfg80211.c diff --git a/tools/lib/traceevent/plugin_function.c b/tools/lib/traceevent/plugins/plugin_function.c similarity index 100% rename from tools/lib/traceevent/plugin_function.c rename to tools/lib/traceevent/plugins/plugin_function.c diff --git a/tools/lib/traceevent/plugin_hrtimer.c b/tools/lib/traceevent/plugins/plugin_hrtimer.c similarity index 100% rename from tools/lib/traceevent/plugin_hrtimer.c rename to tools/lib/traceevent/plugins/plugin_hrtimer.c diff --git a/tools/lib/traceevent/plugin_jbd2.c b/tools/lib/traceevent/plugins/plugin_jbd2.c similarity index 100% rename from tools/lib/traceevent/plugin_jbd2.c rename to tools/lib/traceevent/plugins/plugin_jbd2.c diff --git a/tools/lib/traceevent/plugin_kmem.c b/tools/lib/traceevent/plugins/plugin_kmem.c similarity index 100% rename from tools/lib/traceevent/plugin_kmem.c rename to tools/lib/traceevent/plugins/plugin_kmem.c diff --git a/tools/lib/traceevent/plugin_kvm.c b/tools/lib/traceevent/plugins/plugin_kvm.c similarity index 100% rename from tools/lib/traceevent/plugin_kvm.c rename to tools/lib/traceevent/plugins/plugin_kvm.c diff --git a/tools/lib/traceevent/plugin_mac80211.c b/tools/lib/traceevent/plugins/plugin_mac80211.c similarity index 100% rename from tools/lib/traceevent/plugin_mac80211.c rename to tools/lib/traceevent/plugins/plugin_mac80211.c diff --git a/tools/lib/traceevent/plugin_sched_switch.c b/tools/lib/traceevent/plugins/plugin_sched_switch.c similarity index 100% rename from tools/lib/traceevent/plugin_sched_switch.c rename to tools/lib/traceevent/plugins/plugin_sched_switch.c diff --git a/tools/lib/traceevent/plugin_scsi.c b/tools/lib/traceevent/plugins/plugin_scsi.c similarity index 100% rename from tools/lib/traceevent/plugin_scsi.c rename to tools/lib/traceevent/plugins/plugin_scsi.c diff --git a/tools/lib/traceevent/plugin_xen.c b/tools/lib/traceevent/plugins/plugin_xen.c similarity index 100% rename from tools/lib/traceevent/plugin_xen.c rename to tools/lib/traceevent/plugins/plugin_xen.c diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 176f2f084060..044c9a3cb247 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -138,7 +138,6 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, "do_task_dead", "__module_put_and_exit", "complete_and_exit", - "kvm_spurious_fault", "__reiserfs_panic", "lbug_with_loc", "fortify_panic", diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index a269d78456b6..46f7fba2306c 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -924,7 +924,7 @@ ifndef NO_JVMTI JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}') else ifneq (,$(wildcard /usr/sbin/alternatives)) - JDIR=$(shell /usr/sbin/alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') + JDIR=$(shell /usr/sbin/alternatives --display java | tail -1 | cut -d' ' -f 5 | sed -e 's%/jre/bin/java.%%g' -e 's%/bin/java.%%g') endif endif ifndef JDIR diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index f9807d8c005b..902c792f326a 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -292,7 +292,7 @@ endif LIBTRACEEVENT = $(TE_PATH)libtraceevent.a export LIBTRACEEVENT -LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)libtraceevent-dynamic-list +LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)plugins/libtraceevent-dynamic-list # # The static build has no dynsym table, so this does not work for @@ -567,7 +567,7 @@ all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS) # Create python binding output directory if not already present _dummy := $(shell [ -d '$(OUTPUT)python' ] || mkdir -p '$(OUTPUT)python') -$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST) +$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST) $(LIBPERF) $(QUIET_GEN)LDSHARED="$(CC) -pthread -shared" \ CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS)' \ $(PYTHON_WORD) util/setup.py \ @@ -737,7 +737,7 @@ libtraceevent_plugins: FORCE $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins $(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins - $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent-dynamic-list + $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)plugins/libtraceevent-dynamic-list $(LIBTRACEEVENT)-clean: $(call QUIET_CLEAN, libtraceevent) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index c32db09baf0d..ede040cf82ad 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -23,9 +23,10 @@ #include "../../util/event.h" #include "../../util/evlist.h" #include "../../util/evsel.h" +#include "../../util/evsel_config.h" #include "../../util/pmu.h" #include "../../util/cs-etm.h" -#include "../../util/util.h" +#include // page_size #include "../../util/session.h" #include @@ -416,7 +417,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, if (err) goto out; - tracking_evsel = perf_evlist__last(evlist); + tracking_evsel = evlist__last(evlist); perf_evlist__set_tracking_event(evlist, tracking_evsel); tracking_evsel->core.attr.freq = 0; @@ -648,7 +649,7 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, if (priv_size != cs_etm_info_priv_size(itr, session->evlist)) return -EINVAL; - if (!session->evlist->nr_mmaps) + if (!session->evlist->core.nr_mmaps) return -EINVAL; /* If the cpu_map is empty all online CPUs are involved */ diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 4b364692da67..eba6541ec0f1 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -16,7 +16,7 @@ #include "../../util/evsel.h" #include "../../util/evlist.h" #include "../../util/session.h" -#include "../../util/util.h" +#include // page_size #include "../../util/pmu.h" #include "../../util/debug.h" #include "../../util/auxtrace.h" @@ -51,7 +51,7 @@ static int arm_spe_info_fill(struct auxtrace_record *itr, if (priv_size != ARM_SPE_AUXTRACE_PRIV_SIZE) return -EINVAL; - if (!session->evlist->nr_mmaps) + if (!session->evlist->core.nr_mmaps) return -EINVAL; auxtrace_info->type = PERF_AUXTRACE_ARM_SPE; @@ -129,7 +129,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, if (err) return err; - tracking_evsel = perf_evlist__last(evlist); + tracking_evsel = evlist__last(evlist); perf_evlist__set_tracking_event(evlist, tracking_evsel); tracking_evsel->core.attr.freq = 0; diff --git a/tools/perf/arch/arm64/util/dwarf-regs.c b/tools/perf/arch/arm64/util/dwarf-regs.c index b047b882c5b1..917b97d7c5d3 100644 --- a/tools/perf/arch/arm64/util/dwarf-regs.c +++ b/tools/perf/arch/arm64/util/dwarf-regs.c @@ -11,7 +11,6 @@ #include #include /* for struct user_pt_regs */ #include -#include "util.h" struct pt_regs_dwarfnum { const char *name; diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c index e41defaaa2e6..a32e4b72a98f 100644 --- a/tools/perf/arch/arm64/util/header.c +++ b/tools/perf/arch/arm64/util/header.c @@ -1,5 +1,7 @@ #include #include +#include +#include #include #include "debug.h" #include "header.h" @@ -29,7 +31,7 @@ char *get_cpuid_str(struct perf_pmu *pmu) /* read midr from list of cpus mapped to this pmu */ cpus = perf_cpu_map__get(pmu->cpus); - for (cpu = 0; cpu < cpus->nr; cpu++) { + for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) { scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d"MIDR, sysfs, cpus->map[cpu]); diff --git a/tools/perf/arch/arm64/util/unwind-libunwind.c b/tools/perf/arch/arm64/util/unwind-libunwind.c index 002520d4036b..1495a9523a23 100644 --- a/tools/perf/arch/arm64/util/unwind-libunwind.c +++ b/tools/perf/arch/arm64/util/unwind-libunwind.c @@ -5,8 +5,8 @@ #include #include "perf_regs.h" #include "../../util/unwind.h" -#include "../../util/debug.h" #endif +#include "../../util/debug.h" int LIBUNWIND__ARCH_REG_ID(int regnum) { diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c b/tools/perf/arch/powerpc/util/dwarf-regs.c index 4952890b9428..0c4f4caf53ac 100644 --- a/tools/perf/arch/powerpc/util/dwarf-regs.c +++ b/tools/perf/arch/powerpc/util/dwarf-regs.c @@ -12,7 +12,6 @@ #include #include #include -#include "util.h" struct pt_regs_dwarfnum { const char *name; diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c index 0b242664f5ea..b6b7bc7e31a1 100644 --- a/tools/perf/arch/powerpc/util/header.c +++ b/tools/perf/arch/powerpc/util/header.c @@ -6,7 +6,6 @@ #include #include #include "header.h" -#include "util.h" #define mfspr(rn) ({unsigned long rval; \ asm volatile("mfspr %0," __stringify(rn) \ diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c index f0dbf7b075c8..9cc1c4a9dec4 100644 --- a/tools/perf/arch/powerpc/util/kvm-stat.c +++ b/tools/perf/arch/powerpc/util/kvm-stat.c @@ -5,9 +5,11 @@ #include "util/debug.h" #include "util/evsel.h" #include "util/evlist.h" +#include "util/pmu.h" #include "book3s_hv_exits.h" #include "book3s_hcalls.h" +#include #define NR_TPS 4 @@ -172,3 +174,46 @@ int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused) return ret; } + +/* + * Incase of powerpc architecture, pmu registers are programmable + * by guest kernel. So monitoring guest via host may not provide + * valid samples with default 'cycles' event. It is better to use + * 'trace_imc/trace_cycles' event for guest profiling, since it + * can track the guest instruction pointer in the trace-record. + * + * Function to parse the arguments and return appropriate values. + */ +int kvm_add_default_arch_event(int *argc, const char **argv) +{ + const char **tmp; + bool event = false; + int i, j = *argc; + + const struct option event_options[] = { + OPT_BOOLEAN('e', "event", &event, NULL), + OPT_END() + }; + + tmp = calloc(j + 1, sizeof(char *)); + if (!tmp) + return -EINVAL; + + for (i = 0; i < j; i++) + tmp[i] = argv[i]; + + parse_options(j, tmp, event_options, NULL, PARSE_OPT_KEEP_UNKNOWN); + if (!event) { + if (pmu_have_event("trace_imc", "trace_cycles")) { + argv[j++] = strdup("-e"); + argv[j++] = strdup("trace_imc/trace_cycles/"); + *argc += 2; + } else { + free(tmp); + return -EINVAL; + } + } + + free(tmp); + return 0; +} diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c index fc9c2f5fcd52..3018a054526a 100644 --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c @@ -13,6 +13,7 @@ #include "util/callchain.h" #include "util/debug.h" #include "util/dso.h" +#include "util/event.h" // struct ip_callchain #include "util/map.h" #include "util/symbol.h" diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index 8a4b717e0a53..abb7a12d8f93 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -4,7 +4,6 @@ * Copyright (C) 2015 Naveen N. Rao, IBM Corporation */ -#include "debug.h" #include "dso.h" #include "symbol.h" #include "map.h" diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile index cb198787570a..6ac8887be7c9 100644 --- a/tools/perf/arch/s390/Makefile +++ b/tools/perf/arch/s390/Makefile @@ -4,6 +4,7 @@ PERF_HAVE_DWARF_REGS := 1 endif HAVE_KVM_STAT_SUPPORT := 1 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 +PERF_HAVE_JITDUMP := 1 # # Syscall table generation for perf diff --git a/tools/perf/arch/s390/util/auxtrace.c b/tools/perf/arch/s390/util/auxtrace.c index b0fb70e38960..0db5c58c98e8 100644 --- a/tools/perf/arch/s390/util/auxtrace.c +++ b/tools/perf/arch/s390/util/auxtrace.c @@ -1,4 +1,5 @@ #include +#include #include #include #include diff --git a/tools/perf/arch/s390/util/machine.c b/tools/perf/arch/s390/util/machine.c index c8c86a0c9b79..724efb2d842d 100644 --- a/tools/perf/arch/s390/util/machine.c +++ b/tools/perf/arch/s390/util/machine.c @@ -2,7 +2,7 @@ #include #include #include -#include "util.h" +#include // page_size #include "machine.h" #include "api/fs/fs.h" #include "debug.h" diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c index 3b5cc3373821..3ec562a2aaba 100644 --- a/tools/perf/arch/x86/tests/intel-cqm.c +++ b/tools/perf/arch/x86/tests/intel-cqm.c @@ -5,7 +5,7 @@ #include "evlist.h" #include "evsel.h" #include "arch-tests.h" -#include "util.h" +#include // page_size #include #include @@ -63,9 +63,9 @@ int test__intel_cqm_count_nmi_context(struct test *test __maybe_unused, int subt goto out; } - evsel = perf_evlist__first(evlist); + evsel = evlist__first(evlist); if (!evsel) { - pr_debug("perf_evlist__first failed\n"); + pr_debug("evlist__first failed\n"); goto out; } diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index eb3635941c2b..fa947952c16a 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -15,9 +15,9 @@ #include "evlist.h" #include "evsel.h" #include "thread_map.h" -#include "cpumap.h" #include "record.h" #include "tsc.h" +#include "util/mmap.h" #include "tests/tests.h" #include "arch-tests.h" @@ -66,7 +66,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe union perf_event *event; u64 test_tsc, comm1_tsc, comm2_tsc; u64 test_time, comm1_time = 0, comm2_time = 0; - struct perf_mmap *md; + struct mmap *md; threads = thread_map__new(-1, getpid(), UINT_MAX); CHECK_NOT_NULL__(threads); @@ -83,7 +83,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe perf_evlist__config(evlist, &opts, NULL); - evsel = perf_evlist__first(evlist); + evsel = evlist__first(evlist); evsel->core.attr.comm = 1; evsel->core.attr.disabled = 1; @@ -91,9 +91,9 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe CHECK__(evlist__open(evlist)); - CHECK__(perf_evlist__mmap(evlist, UINT_MAX)); + CHECK__(evlist__mmap(evlist, UINT_MAX)); - pc = evlist->mmap[0].base; + pc = evlist->mmap[0].core.base; ret = perf_read_tsc_conversion(pc, &tc); if (ret) { if (ret == -EOPNOTSUPP) { @@ -115,7 +115,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe evlist__disable(evlist); - for (i = 0; i < evlist->nr_mmaps; i++) { + for (i = 0; i < evlist->core.nr_mmaps; i++) { md = &evlist->mmap[i]; if (perf_mmap__read_init(md) < 0) continue; diff --git a/tools/perf/arch/x86/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c index 6e67cee792b1..1ea916656a2d 100644 --- a/tools/perf/arch/x86/tests/rdpmc.c +++ b/tools/perf/arch/x86/tests/rdpmc.c @@ -13,7 +13,7 @@ #include "tests/tests.h" #include "cloexec.h" #include "event.h" -#include "util.h" +#include // page_size #include "arch-tests.h" static u64 rdpmc(unsigned int counter) diff --git a/tools/perf/arch/x86/util/archinsn.c b/tools/perf/arch/x86/util/archinsn.c index 9876c7a7ed7c..3e6791531ca5 100644 --- a/tools/perf/arch/x86/util/archinsn.c +++ b/tools/perf/arch/x86/util/archinsn.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "../../../../arch/x86/include/asm/insn.h" #include "archinsn.h" +#include "event.h" #include "machine.h" #include "thread.h" #include "symbol.h" diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c index a3a0b6884779..d357c625c09f 100644 --- a/tools/perf/arch/x86/util/event.c +++ b/tools/perf/arch/x86/util/event.c @@ -3,6 +3,8 @@ #include #include +#include "../../util/event.h" +#include "../../util/synthetic-events.h" #include "../../util/machine.h" #include "../../util/tool.h" #include "../../util/map.h" diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index d263430c045f..f7f68a50a5cd 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -15,6 +15,7 @@ #include "../../util/event.h" #include "../../util/evsel.h" #include "../../util/evlist.h" +#include "../../util/mmap.h" #include "../../util/session.h" #include "../../util/pmu.h" #include "../../util/debug.h" @@ -22,7 +23,7 @@ #include "../../util/tsc.h" #include "../../util/auxtrace.h" #include "../../util/intel-bts.h" -#include "../../util/util.h" +#include // page_size #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) @@ -74,10 +75,10 @@ static int intel_bts_info_fill(struct auxtrace_record *itr, if (priv_size != INTEL_BTS_AUXTRACE_PRIV_SIZE) return -EINVAL; - if (!session->evlist->nr_mmaps) + if (!session->evlist->core.nr_mmaps) return -EINVAL; - pc = session->evlist->mmap[0].base; + pc = session->evlist->mmap[0].core.base; if (pc) { err = perf_read_tsc_conversion(pc, &tc); if (err) { @@ -230,7 +231,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr, if (err) return err; - tracking_evsel = perf_evlist__last(evlist); + tracking_evsel = evlist__last(evlist); perf_evlist__set_tracking_event(evlist, tracking_evsel); diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index cb7cf16af79c..d6d26256915f 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -18,6 +18,7 @@ #include "../../util/evlist.h" #include "../../util/evsel.h" #include "../../util/cpumap.h" +#include "../../util/mmap.h" #include #include "../../util/parse-events.h" #include "../../util/pmu.h" @@ -26,7 +27,7 @@ #include "../../util/record.h" #include "../../util/target.h" #include "../../util/tsc.h" -#include "../../util/util.h" +#include // page_size #include "../../util/intel-pt.h" #define KiB(x) ((x) * 1024) @@ -351,10 +352,10 @@ static int intel_pt_info_fill(struct auxtrace_record *itr, filter = intel_pt_find_filter(session->evlist, ptr->intel_pt_pmu); filter_str_len = filter ? strlen(filter) : 0; - if (!session->evlist->nr_mmaps) + if (!session->evlist->core.nr_mmaps) return -EINVAL; - pc = session->evlist->mmap[0].base; + pc = session->evlist->mmap[0].core.base; if (pc) { err = perf_read_tsc_conversion(pc, &tc); if (err) { @@ -416,12 +417,12 @@ static int intel_pt_track_switches(struct evlist *evlist) return err; } - evsel = perf_evlist__last(evlist); + evsel = evlist__last(evlist); perf_evsel__set_sample_bit(evsel, CPU); perf_evsel__set_sample_bit(evsel, TIME); - evsel->system_wide = true; + evsel->core.system_wide = true; evsel->no_aux_samples = true; evsel->immediate = true; @@ -716,13 +717,13 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, if (err) return err; - switch_evsel = perf_evlist__last(evlist); + switch_evsel = evlist__last(evlist); switch_evsel->core.attr.freq = 0; switch_evsel->core.attr.sample_period = 1; switch_evsel->core.attr.context_switch = 1; - switch_evsel->system_wide = true; + switch_evsel->core.system_wide = true; switch_evsel->no_aux_samples = true; switch_evsel->immediate = true; @@ -774,7 +775,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, if (err) return err; - tracking_evsel = perf_evlist__last(evlist); + tracking_evsel = evlist__last(evlist); perf_evlist__set_tracking_event(evlist, tracking_evsel); diff --git a/tools/perf/arch/x86/util/machine.c b/tools/perf/arch/x86/util/machine.c index 1e9ec783b9a1..e17e080e76f4 100644 --- a/tools/perf/arch/x86/util/machine.c +++ b/tools/perf/arch/x86/util/machine.c @@ -1,9 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include -#include "../../util/util.h" +#include // page_size #include "../../util/machine.h" #include "../../util/map.h" #include "../../util/symbol.h" diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c index c5197a15119b..2f55afb14e1f 100644 --- a/tools/perf/arch/x86/util/tsc.c +++ b/tools/perf/arch/x86/util/tsc.c @@ -8,6 +8,8 @@ #include #include #include "../../../util/debug.h" +#include "../../../util/event.h" +#include "../../../util/synthetic-events.h" #include "../../../util/tsc.h" int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, diff --git a/tools/perf/arch/x86/util/unwind-libunwind.c b/tools/perf/arch/x86/util/unwind-libunwind.c index 05920e3edf7a..47357973b55b 100644 --- a/tools/perf/arch/x86/util/unwind-libunwind.c +++ b/tools/perf/arch/x86/util/unwind-libunwind.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include "../../util/debug.h" #ifndef REMOTE_UNWIND_LIBUNWIND #include #include "perf_regs.h" #include "../../util/unwind.h" -#include "../../util/debug.h" #endif #ifdef HAVE_ARCH_X86_64_SUPPORT diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c index d1caa4a0a12a..bb617e568841 100644 --- a/tools/perf/bench/epoll-ctl.c +++ b/tools/perf/bench/epoll-ctl.c @@ -21,12 +21,12 @@ #include #include #include +#include #include #include "../util/stat.h" #include #include "bench.h" -#include "cpumap.h" #include diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index f6b4472847d2..7af694437f4e 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -76,12 +76,12 @@ #include #include #include +#include #include #include "../util/stat.h" #include #include "bench.h" -#include "cpumap.h" #include diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 80e138904c66..8ba0c3330a9a 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -20,13 +20,13 @@ #include #include #include +#include #include #include "../util/stat.h" #include #include "bench.h" #include "futex.h" -#include "cpumap.h" #include diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index c5d6d0abbaa9..d0cae8125423 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -14,10 +14,10 @@ #include #include #include +#include #include #include "bench.h" #include "futex.h" -#include "cpumap.h" #include #include diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index 75d3418c1a88..a00a6891447a 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -20,10 +20,10 @@ #include #include #include +#include #include #include "bench.h" #include "futex.h" -#include "cpumap.h" #include #include diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index 163fe16c275a..a053cf2b7039 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -29,7 +29,8 @@ int bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe #include #include #include "futex.h" -#include "cpumap.h" +#include +#include #include #include diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index 77dcdc13618a..df810096abfe 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -20,10 +20,10 @@ #include #include #include +#include #include #include "bench.h" #include "futex.h" -#include "cpumap.h" #include #include diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 62b8ef4bcb1f..5797253b9700 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -9,7 +9,6 @@ /* For the CLR_() macros */ #include -#include "../builtin.h" #include #include "../util/cloexec.h" diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c index c63eb9a46346..97e4a4fb3362 100644 --- a/tools/perf/bench/sched-messaging.c +++ b/tools/perf/bench/sched-messaging.c @@ -10,9 +10,7 @@ * */ -#include "../util/util.h" #include -#include "../builtin.h" #include "bench.h" /* Test groups of 20 processes spraying to 20 receivers */ diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c index 35b07f197d48..3c88d1f201f1 100644 --- a/tools/perf/bench/sched-pipe.c +++ b/tools/perf/bench/sched-pipe.c @@ -9,9 +9,7 @@ * http://people.redhat.com/mingo/cfs-scheduler/tools/pipe-test-1m.c * Ported to perf by Hitoshi Mitake */ -#include "../util/util.h" #include -#include "../builtin.h" #include "bench.h" #include diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 4e4d2e76232e..8db8fc9bddef 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -27,6 +27,7 @@ #include "util/sort.h" #include "util/hist.h" #include "util/dso.h" +#include "util/machine.h" #include "util/map.h" #include "util/session.h" #include "util/tool.h" @@ -39,6 +40,7 @@ #include #include #include +#include struct perf_annotate { struct perf_tool tool; @@ -583,8 +585,8 @@ int cmd_annotate(int argc, const char **argv) data.path = input_name; annotate.session = perf_session__new(&data, false, &annotate.tool); - if (annotate.session == NULL) - return -1; + if (IS_ERR(annotate.session)) + return PTR_ERR(annotate.session); annotate.has_br_stack = perf_header__has_feat(&annotate.session->header, HEADER_BRANCH_STACK); diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 1a69eb565dc0..39efa51d7fb3 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -28,6 +28,7 @@ #include "util/util.h" #include "util/probe-file.h" #include +#include static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid) { @@ -422,8 +423,8 @@ int cmd_buildid_cache(int argc, const char **argv) data.force = force; session = perf_session__new(&data, false, NULL); - if (session == NULL) - return -1; + if (IS_ERR(session)) + return PTR_ERR(session); } if (symbol__init(session ? &session->header.env : NULL) < 0) diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 5a0d8b378cb5..e3ef75583514 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -18,6 +18,7 @@ #include "util/symbol.h" #include "util/data.h" #include +#include static int sysfs__fprintf_build_id(FILE *fp) { @@ -65,8 +66,8 @@ static int perf_session__list_build_ids(bool force, bool with_hits) goto out; session = perf_session__new(&data, false, &build_id__mark_dso_hit_ops); - if (session == NULL) - return -1; + if (IS_ERR(session)) + return PTR_ERR(session); /* * We take all buildids when the file contains AUX area tracing data diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index b09b12e0976b..3542b6ab9813 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -20,6 +21,7 @@ #include #include "debug.h" #include "builtin.h" +#include #include #include #include "map_symbol.h" @@ -2780,8 +2782,9 @@ static int perf_c2c__report(int argc, const char **argv) } session = perf_session__new(&data, 0, &c2c.tool); - if (session == NULL) { - pr_debug("No memory for session\n"); + if (IS_ERR(session)) { + err = PTR_ERR(session); + pr_debug("Error creating perf session\n"); goto out; } diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c index 42d8157e047a..2603015f98be 100644 --- a/tools/perf/builtin-config.c +++ b/tools/perf/builtin-config.c @@ -9,7 +9,6 @@ #include "util/cache.h" #include -#include "util/util.h" #include "util/debug.h" #include "util/config.h" #include diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 827e4800d862..c37a78677955 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -23,6 +23,7 @@ #include "util/time-utils.h" #include "util/annotate.h" #include "util/map.h" +#include #include #include #include @@ -1153,9 +1154,9 @@ static int check_file_brstack(void) data__for_each_file(i, d) { d->session = perf_session__new(&d->data, false, &pdiff.tool); - if (!d->session) { + if (IS_ERR(d->session)) { pr_err("Failed to open %s\n", d->data.path); - return -1; + return PTR_ERR(d->session); } has_br_stack = perf_header__has_feat(&d->session->header, @@ -1185,9 +1186,9 @@ static int __cmd_diff(void) data__for_each_file(i, d) { d->session = perf_session__new(&d->data, false, &pdiff.tool); - if (!d->session) { + if (IS_ERR(d->session)) { + ret = PTR_ERR(d->session); pr_err("Failed to open %s\n", d->data.path); - ret = -1; goto out_delete; } diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index 238fa3876805..440501994931 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -5,18 +5,18 @@ */ #include "builtin.h" -#include "util/util.h" - #include #include "perf.h" #include "util/evlist.h" #include "util/evsel.h" +#include "util/evsel_fprintf.h" #include "util/parse-events.h" #include #include "util/session.h" #include "util/data.h" #include "util/debug.h" +#include static int __cmd_evlist(const char *file_name, struct perf_attr_details *details) { @@ -30,8 +30,8 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details bool has_tracepoint = false; session = perf_session__new(&data, 0, NULL); - if (session == NULL) - return -1; + if (IS_ERR(session)) + return PTR_ERR(session); evlist__for_each_entry(session->evlist, pos) { perf_evsel__fprintf(pos, details, stdout); diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index c14f40b858bc..372ecb3e2c06 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -21,7 +21,9 @@ #include "util/auxtrace.h" #include "util/jit.h" #include "util/symbol.h" +#include "util/synthetic-events.h" #include "util/thread.h" +#include #include @@ -834,8 +836,8 @@ int cmd_inject(int argc, const char **argv) data.path = inject.input_name; inject.session = perf_session__new(&data, true, &inject.tool); - if (inject.session == NULL) - return -1; + if (IS_ERR(inject.session)) + return PTR_ERR(inject.session); if (zstd_init(&(inject.session->zstd_data), 0) < 0) pr_warning("Decompression initialization failed.\n"); diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index b5682beaad72..1e61e353f579 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -14,6 +14,7 @@ #include "util/tool.h" #include "util/callchain.h" #include "util/time-utils.h" +#include #include #include @@ -1956,8 +1957,8 @@ int cmd_kmem(int argc, const char **argv) data.path = input_name; kmem_session = session = perf_session__new(&data, false, &perf_kmem); - if (session == NULL) - return -1; + if (IS_ERR(session)) + return PTR_ERR(session); ret = -1; diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 0a4fcbe32bf6..2227e2f42c09 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -5,6 +5,7 @@ #include "util/build-id.h" #include "util/evsel.h" #include "util/evlist.h" +#include "util/mmap.h" #include "util/term.h" #include "util/symbol.h" #include "util/thread.h" @@ -17,9 +18,11 @@ #include "util/debug.h" #include "util/tool.h" #include "util/stat.h" +#include "util/synthetic-events.h" #include "util/top.h" #include "util/data.h" #include "util/ordered-events.h" +#include "util/kvm-stat.h" #include "ui/ui.h" #include @@ -31,6 +34,7 @@ #include #include +#include #include #include #include @@ -58,7 +62,6 @@ static const char *get_filename_for_perf_kvm(void) } #ifdef HAVE_KVM_STAT_SUPPORT -#include "util/kvm-stat.h" void exit_event_get_key(struct evsel *evsel, struct perf_sample *sample, @@ -748,7 +751,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, { struct evlist *evlist = kvm->evlist; union perf_event *event; - struct perf_mmap *md; + struct mmap *md; u64 timestamp; s64 n = 0; int err; @@ -799,7 +802,7 @@ static int perf_kvm__mmap_read(struct perf_kvm_stat *kvm) s64 n, ntotal = 0; u64 flush_time = ULLONG_MAX, mmap_time; - for (i = 0; i < kvm->evlist->nr_mmaps; i++) { + for (i = 0; i < kvm->evlist->core.nr_mmaps; i++) { n = perf_kvm__mmap_read_idx(kvm, i, &mmap_time); if (n < 0) return -1; @@ -964,10 +967,10 @@ static int kvm_events_live_report(struct perf_kvm_stat *kvm) goto out; } - if (perf_evlist__add_pollfd(kvm->evlist, kvm->timerfd) < 0) + if (evlist__add_pollfd(kvm->evlist, kvm->timerfd) < 0) goto out; - nr_stdin = perf_evlist__add_pollfd(kvm->evlist, fileno(stdin)); + nr_stdin = evlist__add_pollfd(kvm->evlist, fileno(stdin)); if (nr_stdin < 0) goto out; @@ -978,7 +981,7 @@ static int kvm_events_live_report(struct perf_kvm_stat *kvm) evlist__enable(kvm->evlist); while (!done) { - struct fdarray *fda = &kvm->evlist->pollfd; + struct fdarray *fda = &kvm->evlist->core.pollfd; int rc; rc = perf_kvm__mmap_read(kvm); @@ -1058,7 +1061,7 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm) goto out; } - if (perf_evlist__mmap(evlist, kvm->opts.mmap_pages) < 0) { + if (evlist__mmap(evlist, kvm->opts.mmap_pages) < 0) { ui__error("Failed to mmap the events: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); evlist__close(evlist); @@ -1090,9 +1093,9 @@ static int read_events(struct perf_kvm_stat *kvm) kvm->tool = eops; kvm->session = perf_session__new(&file, false, &kvm->tool); - if (!kvm->session) { + if (IS_ERR(kvm->session)) { pr_err("Initializing perf session failed\n"); - return -1; + return PTR_ERR(kvm->session); } symbol__init(&kvm->session->header.env); @@ -1445,8 +1448,8 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, * perf session */ kvm->session = perf_session__new(&data, false, &kvm->tool); - if (kvm->session == NULL) { - err = -1; + if (IS_ERR(kvm->session)) { + err = PTR_ERR(kvm->session); goto out; } kvm->session->evlist = kvm->evlist; @@ -1513,11 +1516,21 @@ perf_stat: } #endif /* HAVE_KVM_STAT_SUPPORT */ +int __weak kvm_add_default_arch_event(int *argc __maybe_unused, + const char **argv __maybe_unused) +{ + return 0; +} + static int __cmd_record(const char *file_name, int argc, const char **argv) { - int rec_argc, i = 0, j; + int rec_argc, i = 0, j, ret; const char **rec_argv; + ret = kvm_add_default_arch_event(&argc, argv); + if (ret) + return -EINVAL; + rec_argc = argc + 2; rec_argv = calloc(rec_argc + 1, sizeof(char *)); rec_argv[i++] = strdup("record"); diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index e290f6b348d8..08e62ae9d37e 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -81,9 +81,9 @@ int cmd_list(int argc, const char **argv) long_desc_flag, details_flag); else if (strcmp(argv[i], "sdt") == 0) print_sdt_events(NULL, NULL, raw_dump); - else if (strcmp(argv[i], "metric") == 0) + else if (strcmp(argv[i], "metric") == 0 || strcmp(argv[i], "metrics") == 0) metricgroup__print(true, false, NULL, raw_dump, details_flag); - else if (strcmp(argv[i], "metricgroup") == 0) + else if (strcmp(argv[i], "metricgroup") == 0 || strcmp(argv[i], "metricgroups") == 0) metricgroup__print(false, true, NULL, raw_dump, details_flag); else if ((sep = strchr(argv[i], ':')) != NULL) { int sep_idx; diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 4c2b7f437cdf..474dfd59d7eb 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -30,6 +30,7 @@ #include #include #include +#include static struct perf_session *session; @@ -872,9 +873,9 @@ static int __cmd_report(bool display_info) }; session = perf_session__new(&data, false, &eops); - if (!session) { + if (IS_ERR(session)) { pr_err("Initializing perf session failed\n"); - return -1; + return PTR_ERR(session); } symbol__init(&session->header.env); diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 27d2bde943a8..a13f5817d6fc 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -17,6 +17,7 @@ #include "util/dso.h" #include "util/map.h" #include "util/symbol.h" +#include #define MEM_OPERATION_LOAD 0x1 #define MEM_OPERATION_STORE 0x2 @@ -249,8 +250,8 @@ static int report_raw_events(struct perf_mem *mem) struct perf_session *session = perf_session__new(&data, false, &mem->tool); - if (session == NULL) - return -1; + if (IS_ERR(session)) + return PTR_ERR(session); if (mem->cpu_list) { ret = perf_session__cpu_bitmap(session, mem->cpu_list, diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 1447004eee8a..23332861de6e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -20,6 +20,7 @@ #include "util/evlist.h" #include "util/evsel.h" #include "util/debug.h" +#include "util/mmap.h" #include "util/target.h" #include "util/session.h" #include "util/tool.h" @@ -38,6 +39,7 @@ #include "util/trigger.h" #include "util/perf-hooks.h" #include "util/cpu-set-sched.h" +#include "util/synthetic-events.h" #include "util/time-utils.h" #include "util/units.h" #include "util/bpf-event.h" @@ -53,6 +55,7 @@ #include #include #include +#include #include #include #include @@ -117,7 +120,7 @@ static bool switch_output_time(struct record *rec) trigger_is_ready(&switch_output_trigger); } -static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused, +static int record__write(struct record *rec, struct mmap *map __maybe_unused, void *bf, size_t size) { struct perf_data_file *file = &rec->session->data->file; @@ -166,7 +169,7 @@ static int record__aio_write(struct aiocb *cblock, int trace_fd, return rc; } -static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock) +static int record__aio_complete(struct mmap *md, struct aiocb *cblock) { void *rem_buf; off_t rem_off; @@ -212,7 +215,7 @@ static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock) return rc; } -static int record__aio_sync(struct perf_mmap *md, bool sync_all) +static int record__aio_sync(struct mmap *md, bool sync_all) { struct aiocb **aiocb = md->aio.aiocb; struct aiocb *cblocks = md->aio.cblocks; @@ -253,12 +256,12 @@ struct record_aio { size_t size; }; -static int record__aio_pushfn(struct perf_mmap *map, void *to, void *buf, size_t size) +static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size) { struct record_aio *aio = to; /* - * map->base data pointed by buf is copied into free map->aio.data[] buffer + * map->core.base data pointed by buf is copied into free map->aio.data[] buffer * to release space in the kernel buffer as fast as possible, calling * perf_mmap__consume() from perf_mmap__push() function. * @@ -298,7 +301,7 @@ static int record__aio_pushfn(struct perf_mmap *map, void *to, void *buf, size_t return size; } -static int record__aio_push(struct record *rec, struct perf_mmap *map, off_t *off) +static int record__aio_push(struct record *rec, struct mmap *map, off_t *off) { int ret, idx; int trace_fd = rec->session->data->file.fd; @@ -349,15 +352,15 @@ static void record__aio_mmap_read_sync(struct record *rec) { int i; struct evlist *evlist = rec->evlist; - struct perf_mmap *maps = evlist->mmap; + struct mmap *maps = evlist->mmap; if (!record__aio_enabled(rec)) return; - for (i = 0; i < evlist->nr_mmaps; i++) { - struct perf_mmap *map = &maps[i]; + for (i = 0; i < evlist->core.nr_mmaps; i++) { + struct mmap *map = &maps[i]; - if (map->base) + if (map->core.base) record__aio_sync(map, true); } } @@ -385,7 +388,7 @@ static int record__aio_parse(const struct option *opt, #else /* HAVE_AIO_SUPPORT */ static int nr_cblocks_max = 0; -static int record__aio_push(struct record *rec __maybe_unused, struct perf_mmap *map __maybe_unused, +static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused, off_t *off __maybe_unused) { return -1; @@ -437,7 +440,7 @@ static int record__mmap_flush_parse(const struct option *opt, if (!opts->mmap_flush) opts->mmap_flush = MMAP_FLUSH_DEFAULT; - flush_max = perf_evlist__mmap_size(opts->mmap_pages); + flush_max = evlist__mmap_size(opts->mmap_pages); flush_max /= 4; if (opts->mmap_flush > flush_max) opts->mmap_flush = flush_max; @@ -480,7 +483,7 @@ static int process_synthesized_event(struct perf_tool *tool, return record__write(rec, NULL, event, event->header.size); } -static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size) +static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) { struct record *rec = to; @@ -525,7 +528,7 @@ static void record__sig_exit(void) #ifdef HAVE_AUXTRACE_SUPPORT static int record__process_auxtrace(struct perf_tool *tool, - struct perf_mmap *map, + struct mmap *map, union perf_event *event, void *data1, size_t len1, void *data2, size_t len2) { @@ -563,7 +566,7 @@ static int record__process_auxtrace(struct perf_tool *tool, } static int record__auxtrace_mmap_read(struct record *rec, - struct perf_mmap *map) + struct mmap *map) { int ret; @@ -579,7 +582,7 @@ static int record__auxtrace_mmap_read(struct record *rec, } static int record__auxtrace_mmap_read_snapshot(struct record *rec, - struct perf_mmap *map) + struct mmap *map) { int ret; @@ -600,8 +603,8 @@ static int record__auxtrace_read_snapshot_all(struct record *rec) int i; int rc = 0; - for (i = 0; i < rec->evlist->nr_mmaps; i++) { - struct perf_mmap *map = &rec->evlist->mmap[i]; + for (i = 0; i < rec->evlist->core.nr_mmaps; i++) { + struct mmap *map = &rec->evlist->mmap[i]; if (!map->auxtrace_mmap.base) continue; @@ -666,7 +669,7 @@ static int record__auxtrace_init(struct record *rec) static inline int record__auxtrace_mmap_read(struct record *rec __maybe_unused, - struct perf_mmap *map __maybe_unused) + struct mmap *map __maybe_unused) { return 0; } @@ -705,7 +708,7 @@ static int record__mmap_evlist(struct record *rec, if (opts->affinity != PERF_AFFINITY_SYS) cpu__setup_cpunode_map(); - if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, + if (evlist__mmap_ex(evlist, opts->mmap_pages, opts->auxtrace_mmap_pages, opts->auxtrace_snapshot_mode, opts->nr_cblocks, opts->affinity, @@ -753,9 +756,9 @@ static int record__open(struct record *rec) if (perf_evlist__add_dummy(evlist)) return -ENOMEM; - pos = perf_evlist__first(evlist); + pos = evlist__first(evlist); pos->tracking = 0; - pos = perf_evlist__last(evlist); + pos = evlist__last(evlist); pos->tracking = 1; pos->core.attr.enable_on_exec = 1; } @@ -786,6 +789,17 @@ try_again: pos->supported = true; } + if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(evlist)) { + pr_warning( +"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" +"check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" +"Samples in kernel functions may not be resolved if a suitable vmlinux\n" +"file is not found in the buildid cache or in the vmlinux path.\n\n" +"Samples in kernel modules won't be resolved at all.\n\n" +"If some relocation was applied (e.g. kexec) symbols may be misresolved\n" +"even with a suitable vmlinux or kallsyms file.\n\n"); + } + if (perf_evlist__apply_filters(evlist, &pos)) { pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", pos->filter, perf_evsel__name(pos), errno, @@ -888,7 +902,7 @@ static struct perf_event_header finished_round_event = { .type = PERF_RECORD_FINISHED_ROUND, }; -static void record__adjust_affinity(struct record *rec, struct perf_mmap *map) +static void record__adjust_affinity(struct record *rec, struct mmap *map) { if (rec->opts.affinity != PERF_AFFINITY_SYS && !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) { @@ -935,7 +949,7 @@ static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, u64 bytes_written = rec->bytes_written; int i; int rc = 0; - struct perf_mmap *maps; + struct mmap *maps; int trace_fd = rec->data.file.fd; off_t off = 0; @@ -952,20 +966,20 @@ static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, if (record__aio_enabled(rec)) off = record__aio_get_pos(trace_fd); - for (i = 0; i < evlist->nr_mmaps; i++) { + for (i = 0; i < evlist->core.nr_mmaps; i++) { u64 flush = 0; - struct perf_mmap *map = &maps[i]; + struct mmap *map = &maps[i]; - if (map->base) { + if (map->core.base) { record__adjust_affinity(rec, map); if (synch) { - flush = map->flush; - map->flush = 1; + flush = map->core.flush; + map->core.flush = 1; } if (!record__aio_enabled(rec)) { if (perf_mmap__push(map, rec, record__pushfn) < 0) { if (synch) - map->flush = flush; + map->core.flush = flush; rc = -1; goto out; } @@ -973,13 +987,13 @@ static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, if (record__aio_push(rec, map, &off) < 0) { record__aio_set_pos(trace_fd, off); if (synch) - map->flush = flush; + map->core.flush = flush; rc = -1; goto out; } } if (synch) - map->flush = flush; + map->core.flush = flush; } if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && @@ -1180,23 +1194,14 @@ static void workload_exec_failed_signal(int signo __maybe_unused, static void snapshot_sig_handler(int sig); static void alarm_sig_handler(int sig); -int __weak -perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused, - struct perf_tool *tool __maybe_unused, - perf_event__handler_t process __maybe_unused, - struct machine *machine __maybe_unused) -{ - return 0; -} - static const struct perf_event_mmap_page * perf_evlist__pick_pc(struct evlist *evlist) { if (evlist) { - if (evlist->mmap && evlist->mmap[0].base) - return evlist->mmap[0].base; - if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base) - return evlist->overwrite_mmap[0].base; + if (evlist->mmap && evlist->mmap[0].core.base) + return evlist->mmap[0].core.base; + if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base) + return evlist->overwrite_mmap[0].core.base; } return NULL; } @@ -1362,9 +1367,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) } session = perf_session__new(data, false, tool); - if (session == NULL) { + if (IS_ERR(session)) { pr_err("Perf session creation failed.\n"); - return -1; + return PTR_ERR(session); } fd = perf_data__fd(data); @@ -1407,7 +1412,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) err = -1; goto out_child; } - session->header.env.comp_mmap_len = session->evlist->mmap_len; + session->header.env.comp_mmap_len = session->evlist->core.mmap_len; err = bpf__apply_obj_config(); if (err) { @@ -1610,7 +1615,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) if (hits == rec->samples) { if (done || draining) break; - err = perf_evlist__poll(rec->evlist, -1); + err = evlist__poll(rec->evlist, -1); /* * Propagate error, only if there's any. Ignore positive * number of returned events and interrupt error. @@ -1619,7 +1624,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) err = 0; waking++; - if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0) + if (evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0) draining = true; } @@ -1976,7 +1981,7 @@ out_free: static void switch_output_size_warn(struct record *rec) { - u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages); + u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages); struct switch_output *s = &rec->switch_output; wakeup_size /= 2; @@ -2371,16 +2376,6 @@ int cmd_record(int argc, const char **argv) err = -ENOMEM; - if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist)) - pr_warning( -"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" -"check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" -"Samples in kernel functions may not be resolved if a suitable vmlinux\n" -"file is not found in the buildid cache or in the vmlinux path.\n\n" -"Samples in kernel modules won't be resolved at all.\n\n" -"If some relocation was applied (e.g. kexec) symbols may be misresolved\n" -"even with a suitable vmlinux or kallsyms file.\n\n"); - if (rec->no_buildid_cache || rec->no_buildid) { disable_buildid_cache(); } else if (rec->switch_output.enabled) { diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index b18fab94d38d..aae0e57c60fb 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -48,7 +48,7 @@ #include "util/auxtrace.h" #include "util/units.h" #include "util/branch.h" -#include "util/util.h" +#include "util/util.h" // perf_tip() #include "ui/ui.h" #include "ui/progress.h" @@ -1269,8 +1269,8 @@ int cmd_report(int argc, const char **argv) repeat: session = perf_session__new(&data, false, &report.tool); - if (session == NULL) - return -1; + if (IS_ERR(session)) + return PTR_ERR(session); ret = evswitch__init(&report.evswitch, session->evlist, stderr); if (ret) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index ec96d64aec69..5cacc4f84c8d 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -3,8 +3,10 @@ #include "perf.h" #include "perf-sys.h" +#include "util/cpumap.h" #include "util/evlist.h" #include "util/evsel.h" +#include "util/evsel_fprintf.h" #include "util/symbol.h" #include "util/thread.h" #include "util/header.h" @@ -23,6 +25,7 @@ #include "util/trace-event.h" #include "util/debug.h" +#include "util/event.h" #include #include @@ -36,7 +39,9 @@ #include #include #include +#include #include +#include #include @@ -1794,9 +1799,9 @@ static int perf_sched__read_events(struct perf_sched *sched) int rc = -1; session = perf_session__new(&data, false, &sched->tool); - if (session == NULL) { - pr_debug("No Memory for session\n"); - return -1; + if (IS_ERR(session)) { + pr_debug("Error creating perf session"); + return PTR_ERR(session); } symbol__init(&session->header.env); @@ -2051,7 +2056,7 @@ static void timehist_print_sample(struct perf_sched *sched, EVSEL__PRINT_SYM | EVSEL__PRINT_ONELINE | EVSEL__PRINT_CALLCHAIN_ARROW | EVSEL__PRINT_SKIP_IGNORED, - &callchain_cursor, stdout); + &callchain_cursor, symbol_conf.bt_stop_list, stdout); out: printf("\n"); @@ -2986,8 +2991,8 @@ static int perf_sched__timehist(struct perf_sched *sched) symbol_conf.use_callchain = sched->show_callchain; session = perf_session__new(&data, false, &sched->tool); - if (session == NULL) - return -ENOMEM; + if (IS_ERR(session)) + return PTR_ERR(session); evlist = session->evlist; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index e079b34201f2..286fc70d7402 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -17,6 +17,7 @@ #include "util/trace-event.h" #include "util/evlist.h" #include "util/evsel.h" +#include "util/evsel_fprintf.h" #include "util/evswitch.h" #include "util/sort.h" #include "util/data.h" @@ -52,6 +53,7 @@ #include #include #include +#include #include "util/record.h" #include "util/util.h" #include "perf.h" @@ -1324,7 +1326,8 @@ static int perf_sample__fprintf_bts(struct perf_sample *sample, } else printed += fprintf(fp, "\n"); - printed += sample__fprintf_sym(sample, al, 0, print_opts, cursor, fp); + printed += sample__fprintf_sym(sample, al, 0, print_opts, cursor, + symbol_conf.bt_stop_list, fp); } /* print branch_to information */ @@ -1866,7 +1869,8 @@ static void process_event(struct perf_script *script, cursor = &callchain_cursor; fputc(cursor ? '\n' : ' ', fp); - sample__fprintf_sym(sample, al, 0, output[type].print_ip_opts, cursor, fp); + sample__fprintf_sym(sample, al, 0, output[type].print_ip_opts, cursor, + symbol_conf.bt_stop_list, fp); } if (PRINT_FIELD(IREGS)) @@ -1915,7 +1919,7 @@ static void __process_stat(struct evsel *counter, u64 tstamp) int cpu, thread; static int header_printed; - if (counter->system_wide) + if (counter->core.system_wide) nthreads = 1; if (!header_printed) { @@ -2042,7 +2046,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, return err; evlist = *pevlist; - evsel = perf_evlist__last(*pevlist); + evsel = evlist__last(*pevlist); if (!evsel->priv) { if (scr->per_event_dump) { @@ -3083,8 +3087,8 @@ int find_scripts(char **scripts_array, char **scripts_path_array, int num, int i = 0; session = perf_session__new(&data, false, NULL); - if (!session) - return -1; + if (IS_ERR(session)) + return PTR_ERR(session); snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path()); @@ -3754,8 +3758,8 @@ int cmd_script(int argc, const char **argv) } session = perf_session__new(&data, false, &script.tool); - if (session == NULL) - return -1; + if (IS_ERR(session)) + return PTR_ERR(session); if (header || header_only) { script.tool.show_feat_hdr = SHOW_FEAT_HEADER; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 7e17bf9f700a..468fc49420ce 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -61,6 +61,7 @@ #include "util/tool.h" #include "util/string2.h" #include "util/metricgroup.h" +#include "util/synthetic-events.h" #include "util/target.h" #include "util/time-utils.h" #include "util/top.h" @@ -82,6 +83,7 @@ #include #include #include +#include #include #include @@ -233,7 +235,7 @@ static int write_stat_round_event(u64 tm, u64 type) #define WRITE_STAT_ROUND_EVENT(time, interval) \ write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval) -#define SID(e, x, y) xyarray__entry(e->sample_id, x, y) +#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y) static int perf_evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread, @@ -276,7 +278,7 @@ static int read_counter(struct evsel *counter, struct timespec *rs) if (!counter->supported) return -ENOENT; - if (counter->system_wide) + if (counter->core.system_wide) nthreads = 1; for (thread = 0; thread < nthreads; thread++) { @@ -540,8 +542,8 @@ try_again: if (err < 0) return err; - err = perf_stat_synthesize_config(&stat_config, NULL, evsel_list, - process_synthesized_event, is_pipe); + err = perf_event__synthesize_stat_events(&stat_config, NULL, evsel_list, + process_synthesized_event, is_pipe); if (err < 0) return err; } @@ -822,18 +824,6 @@ static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused, return cpu_map__get_core(map, cpu, NULL); } -static int cpu_map__get_max(struct perf_cpu_map *map) -{ - int i, max = -1; - - for (i = 0; i < map->nr; i++) { - if (map->map[i] > max) - max = map->map[i]; - } - - return max; -} - static int perf_stat__get_aggr(struct perf_stat_config *config, aggr_get_id_t get_id, struct perf_cpu_map *map, int idx) { @@ -928,7 +918,7 @@ static int perf_stat_init_aggr_mode(void) * taking the highest cpu number to be the size of * the aggregation translate cpumap. */ - nr = cpu_map__get_max(evsel_list->core.cpus); + nr = perf_cpu_map__max(evsel_list->core.cpus); stat_config.cpus_aggr_map = perf_cpu_map__empty_new(nr + 1); return stat_config.cpus_aggr_map ? 0 : -ENOMEM; } @@ -1447,9 +1437,9 @@ static int __cmd_record(int argc, const char **argv) } session = perf_session__new(data, false, NULL); - if (session == NULL) { - pr_err("Perf session creation failed.\n"); - return -1; + if (IS_ERR(session)) { + pr_err("Perf session creation failed\n"); + return PTR_ERR(session); } init_features(session); @@ -1646,8 +1636,8 @@ static int __cmd_report(int argc, const char **argv) perf_stat.data.mode = PERF_DATA_MODE_READ; session = perf_session__new(&perf_stat.data, false, &perf_stat.tool); - if (session == NULL) - return -1; + if (IS_ERR(session)) + return PTR_ERR(session); perf_stat.session = session; stat_config.output = stderr; @@ -1681,7 +1671,7 @@ static void setup_system_wide(int forks) struct evsel *counter; evlist__for_each_entry(evsel_list, counter) { - if (!counter->system_wide) + if (!counter->core.system_wide) return; } @@ -1963,8 +1953,11 @@ int cmd_stat(int argc, const char **argv) fprintf(output, "[ perf stat: executing run #%d ... ]\n", run_idx + 1); + if (run_idx != 0) + perf_evlist__reset_prev_raw_counts(evsel_list); + status = run_perf_stat(argc, argv, run_idx); - if (forever && status != -1) { + if (forever && status != -1 && !interval) { print_counters(NULL, argc, argv); perf_stat__reset_stats(); } diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index e0e822695a29..9e84fae9b096 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -35,6 +35,7 @@ #include "util/tool.h" #include "util/data.h" #include "util/debug.h" +#include #ifdef LACKS_OPEN_MEMSTREAM_PROTOTYPE FILE *open_memstream(char **ptr, size_t *sizeloc); @@ -1601,8 +1602,8 @@ static int __cmd_timechart(struct timechart *tchart, const char *output_name) &tchart->tool); int ret = -EINVAL; - if (session == NULL) - return -1; + if (IS_ERR(session)) + return PTR_ERR(session); symbol__init(&session->header.env); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 726e3f2dd8c7..1f60124eb19b 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -27,11 +27,14 @@ #include "util/dso.h" #include "util/evlist.h" #include "util/evsel.h" +#include "util/evsel_config.h" #include "util/event.h" #include "util/machine.h" #include "util/map.h" +#include "util/mmap.h" #include "util/session.h" #include "util/symbol.h" +#include "util/synthetic-events.h" #include "util/top.h" #include "util/util.h" #include @@ -76,6 +79,7 @@ #include #include #include +#include #include @@ -528,7 +532,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c) prompt_integer(&counter, "Enter details event counter"); if (counter >= top->evlist->core.nr_entries) { - top->sym_evsel = perf_evlist__first(top->evlist); + top->sym_evsel = evlist__first(top->evlist); fprintf(stderr, "Sorry, no such event, using %s.\n", perf_evsel__name(top->sym_evsel)); sleep(1); break; @@ -537,7 +541,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c) if (top->sym_evsel->idx == counter) break; } else - top->sym_evsel = perf_evlist__first(top->evlist); + top->sym_evsel = evlist__first(top->evlist); break; case 'f': prompt_integer(&top->count_filter, "Enter display event count filter"); @@ -861,7 +865,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) { struct record_opts *opts = &top->record_opts; struct evlist *evlist = top->evlist; - struct perf_mmap *md; + struct mmap *md; union perf_event *event; md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx]; @@ -901,7 +905,7 @@ static void perf_top__mmap_read(struct perf_top *top) if (overwrite) perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING); - for (i = 0; i < top->evlist->nr_mmaps; i++) + for (i = 0; i < top->evlist->core.nr_mmaps; i++) perf_top__mmap_read_idx(top, i); if (overwrite) { @@ -959,7 +963,7 @@ static int perf_top__overwrite_check(struct perf_top *top) /* has term for current event */ if ((overwrite < 0) && (set >= 0)) { /* if it's first event, set overwrite */ - if (evsel == perf_evlist__first(evlist)) + if (evsel == evlist__first(evlist)) overwrite = set; else return -1; @@ -983,7 +987,7 @@ static int perf_top_overwrite_fallback(struct perf_top *top, return 0; /* only fall back when first event fails */ - if (evsel != perf_evlist__first(evlist)) + if (evsel != evlist__first(evlist)) return 0; evlist__for_each_entry(evlist, counter) @@ -1040,7 +1044,7 @@ try_again: } } - if (perf_evlist__mmap(evlist, opts->mmap_pages) < 0) { + if (evlist__mmap(evlist, opts->mmap_pages) < 0) { ui__error("Failed to mmap with %d (%s)\n", errno, str_error_r(errno, msg, sizeof(msg))); goto out_err; @@ -1304,7 +1308,7 @@ static int __cmd_top(struct perf_top *top) } /* Wait for a minimal set of events before starting the snapshot */ - perf_evlist__poll(top->evlist, 100); + evlist__poll(top->evlist, 100); perf_top__mmap_read(top); @@ -1314,7 +1318,7 @@ static int __cmd_top(struct perf_top *top) perf_top__mmap_read(top); if (opts->overwrite || (hits == top->samples)) - ret = perf_evlist__poll(top->evlist, 100); + ret = evlist__poll(top->evlist, 100); if (resize) { perf_top__resize(top); @@ -1641,7 +1645,7 @@ int cmd_top(int argc, const char **argv) goto out_delete_evlist; } - top.sym_evsel = perf_evlist__first(top.evlist); + top.sym_evsel = evlist__first(top.evlist); if (!callchain_param.enabled) { symbol_conf.cumulate_callchain = false; @@ -1671,8 +1675,8 @@ int cmd_top(int argc, const char **argv) } top.session = perf_session__new(NULL, false, NULL); - if (top.session == NULL) { - status = -1; + if (IS_ERR(top.session)) { + status = PTR_ERR(top.session); goto out_delete_evlist; } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 0f633f0d6be8..bb5130d02155 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -28,8 +28,12 @@ #include "util/dso.h" #include "util/env.h" #include "util/event.h" +#include "util/evsel.h" +#include "util/evsel_fprintf.h" +#include "util/synthetic-events.h" #include "util/evlist.h" #include "util/evswitch.h" +#include "util/mmap.h" #include #include #include "util/machine.h" @@ -2074,7 +2078,7 @@ static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sam EVSEL__PRINT_DSO | EVSEL__PRINT_UNKNOWN_AS_ADDR; - return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output); + return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, symbol_conf.bt_stop_list, trace->output); } static const char *errno_to_name(struct evsel *evsel, int err) @@ -3408,7 +3412,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (trace->dump.map) bpf_map__fprintf(trace->dump.map, trace->output); - err = perf_evlist__mmap(evlist, trace->opts.mmap_pages); + err = evlist__mmap(evlist, trace->opts.mmap_pages); if (err < 0) goto out_error_mmap; @@ -3425,7 +3429,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) trace->multiple_threads = perf_thread_map__pid(evlist->core.threads, 0) == -1 || evlist->core.threads->nr > 1 || - perf_evlist__first(evlist)->core.attr.inherit; + evlist__first(evlist)->core.attr.inherit; /* * Now that we already used evsel->core.attr to ask the kernel to setup the @@ -3441,9 +3445,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv) again: before = trace->nr_events; - for (i = 0; i < evlist->nr_mmaps; i++) { + for (i = 0; i < evlist->core.nr_mmaps; i++) { union perf_event *event; - struct perf_mmap *md; + struct mmap *md; md = &evlist->mmap[i]; if (perf_mmap__read_init(md) < 0) @@ -3472,8 +3476,8 @@ again: if (trace->nr_events == before) { int timeout = done ? 100 : -1; - if (!draining && perf_evlist__poll(evlist, timeout) > 0) { - if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0) + if (!draining && evlist__poll(evlist, timeout) > 0) { + if (evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0) draining = true; goto again; @@ -3584,8 +3588,8 @@ static int trace__replay(struct trace *trace) trace->multiple_threads = true; session = perf_session__new(&data, false, &trace->tool); - if (session == NULL) - return -1; + if (IS_ERR(session)) + return PTR_ERR(session); if (trace->opts.target.pid) symbol_conf.pid_list_str = strdup(trace->opts.target.pid); diff --git a/tools/perf/jvmti/Build b/tools/perf/jvmti/Build index eaeb8cb5379b..1e148bbdf820 100644 --- a/tools/perf/jvmti/Build +++ b/tools/perf/jvmti/Build @@ -1,8 +1,17 @@ jvmti-y += libjvmti.o jvmti-y += jvmti_agent.o +# For strlcpy +jvmti-y += libstring.o + CFLAGS_jvmti = -fPIC -DPIC -I$(JDIR)/include -I$(JDIR)/include/linux CFLAGS_REMOVE_jvmti = -Wmissing-declarations CFLAGS_REMOVE_jvmti += -Wstrict-prototypes CFLAGS_REMOVE_jvmti += -Wextra CFLAGS_REMOVE_jvmti += -Wwrite-strings + +CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" + +$(OUTPUT)jvmti/libstring.o: ../lib/string.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) diff --git a/tools/perf/lib/Makefile b/tools/perf/lib/Makefile index a67efb8d9d39..85ccb8c439a4 100644 --- a/tools/perf/lib/Makefile +++ b/tools/perf/lib/Makefile @@ -59,7 +59,13 @@ else CFLAGS := -g -Wall endif -INCLUDES = -I$(srctree)/tools/perf/lib/include -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/ -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi +INCLUDES = \ +-I$(srctree)/tools/perf/lib/include \ +-I$(srctree)/tools/lib/ \ +-I$(srctree)/tools/include \ +-I$(srctree)/tools/arch/$(SRCARCH)/include/ \ +-I$(srctree)/tools/arch/$(SRCARCH)/include/uapi \ +-I$(srctree)/tools/include/uapi # Append required CFLAGS override CFLAGS += $(EXTRA_WARNINGS) @@ -88,13 +94,34 @@ LIBPERF_PC := $(OUTPUT)libperf.pc LIBPERF_ALL := $(LIBPERF_A) $(OUTPUT)libperf.so* +LIB_DIR := $(srctree)/tools/lib/api/ + +ifneq ($(OUTPUT),) +ifneq ($(subdir),) + API_PATH=$(OUTPUT)/../lib/api/ +else + API_PATH=$(OUTPUT) +endif +else + API_PATH=$(LIB_DIR) +endif + +LIBAPI = $(API_PATH)libapi.a + +$(LIBAPI): FORCE + $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a + +$(LIBAPI)-clean: + $(call QUIET_CLEAN, libapi) + $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null + $(LIBPERF_IN): FORCE $(Q)$(MAKE) $(build)=libperf $(LIBPERF_A): $(LIBPERF_IN) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) -$(LIBPERF_SO): $(LIBPERF_IN) +$(LIBPERF_SO): $(LIBPERF_IN) $(LIBAPI) $(QUIET_LINK)$(CC) --shared -Wl,-soname,libperf.so \ -Wl,--version-script=$(VERSION_SCRIPT) $^ -o $@ @ln -sf $(@F) $(OUTPUT)libperf.so @@ -106,12 +133,12 @@ libs: $(LIBPERF_A) $(LIBPERF_SO) $(LIBPERF_PC) all: fixdep $(Q)$(MAKE) libs -clean: +clean: $(LIBAPI)-clean $(call QUIET_CLEAN, libperf) $(RM) $(LIBPERF_A) \ *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBPERF_VERSION) .*.d .*.cmd LIBPERF-CFLAGS $(LIBPERF_PC) $(Q)$(MAKE) -C tests clean -tests: +tests: libs $(Q)$(MAKE) -C tests $(Q)$(MAKE) -C tests run @@ -146,6 +173,7 @@ install_headers: $(call do_install,include/perf/threadmap.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/evlist.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644); + $(call do_install,include/perf/event.h,$(prefix)/include/perf,644); install_pkgconfig: $(LIBPERF_PC) $(call QUIET_INSTALL, $(LIBPERF_PC)) \ diff --git a/tools/perf/lib/core.c b/tools/perf/lib/core.c index 29d5e3348718..d0b9ae422b9f 100644 --- a/tools/perf/lib/core.c +++ b/tools/perf/lib/core.c @@ -4,7 +4,9 @@ #include #include +#include #include +#include #include "internal.h" static int __base_pr(enum libperf_print_level level, const char *format, @@ -15,11 +17,6 @@ static int __base_pr(enum libperf_print_level level, const char *format, static libperf_print_fn_t __libperf_pr = __base_pr; -void libperf_set_print(libperf_print_fn_t fn) -{ - __libperf_pr = fn; -} - __printf(2, 3) void libperf_print(enum libperf_print_level level, const char *format, ...) { @@ -32,3 +29,9 @@ void libperf_print(enum libperf_print_level level, const char *format, ...) __libperf_pr(level, format, args); va_end(args); } + +void libperf_init(libperf_print_fn_t fn) +{ + page_size = sysconf(_SC_PAGE_SIZE); + __libperf_pr = fn; +} diff --git a/tools/perf/lib/cpumap.c b/tools/perf/lib/cpumap.c index 1f0e6f334237..2ca1fafa620d 100644 --- a/tools/perf/lib/cpumap.c +++ b/tools/perf/lib/cpumap.c @@ -260,3 +260,15 @@ int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu) return -1; } + +int perf_cpu_map__max(struct perf_cpu_map *map) +{ + int i, max = -1; + + for (i = 0; i < map->nr; i++) { + if (map->map[i] > max) + max = map->map[i]; + } + + return max; +} diff --git a/tools/perf/lib/evlist.c b/tools/perf/lib/evlist.c index f4dc9a208332..d1496fee810c 100644 --- a/tools/perf/lib/evlist.c +++ b/tools/perf/lib/evlist.c @@ -1,16 +1,30 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include +#include +#include #include #include +#include #include #include +#include +#include +#include +#include +#include #include #include +#include void perf_evlist__init(struct perf_evlist *evlist) { + int i; + + for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) + INIT_HLIST_HEAD(&evlist->heads[i]); INIT_LIST_HEAD(&evlist->entries); evlist->nr_entries = 0; } @@ -157,3 +171,113 @@ void perf_evlist__disable(struct perf_evlist *evlist) perf_evlist__for_each_entry(evlist, evsel) perf_evsel__disable(evsel); } + +u64 perf_evlist__read_format(struct perf_evlist *evlist) +{ + struct perf_evsel *first = perf_evlist__first(evlist); + + return first->attr.read_format; +} + +#define SID(e, x, y) xyarray__entry(e->sample_id, x, y) + +static void perf_evlist__id_hash(struct perf_evlist *evlist, + struct perf_evsel *evsel, + int cpu, int thread, u64 id) +{ + int hash; + struct perf_sample_id *sid = SID(evsel, cpu, thread); + + sid->id = id; + sid->evsel = evsel; + hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); + hlist_add_head(&sid->node, &evlist->heads[hash]); +} + +void perf_evlist__id_add(struct perf_evlist *evlist, + struct perf_evsel *evsel, + int cpu, int thread, u64 id) +{ + perf_evlist__id_hash(evlist, evsel, cpu, thread, id); + evsel->id[evsel->ids++] = id; +} + +int perf_evlist__id_add_fd(struct perf_evlist *evlist, + struct perf_evsel *evsel, + int cpu, int thread, int fd) +{ + u64 read_data[4] = { 0, }; + int id_idx = 1; /* The first entry is the counter value */ + u64 id; + int ret; + + ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); + if (!ret) + goto add; + + if (errno != ENOTTY) + return -1; + + /* Legacy way to get event id.. All hail to old kernels! */ + + /* + * This way does not work with group format read, so bail + * out in that case. + */ + if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) + return -1; + + if (!(evsel->attr.read_format & PERF_FORMAT_ID) || + read(fd, &read_data, sizeof(read_data)) == -1) + return -1; + + if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + ++id_idx; + if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + ++id_idx; + + id = read_data[id_idx]; + +add: + perf_evlist__id_add(evlist, evsel, cpu, thread, id); + return 0; +} + +int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) +{ + int nr_cpus = perf_cpu_map__nr(evlist->cpus); + int nr_threads = perf_thread_map__nr(evlist->threads); + int nfds = 0; + struct perf_evsel *evsel; + + perf_evlist__for_each_entry(evlist, evsel) { + if (evsel->system_wide) + nfds += nr_cpus; + else + nfds += nr_cpus * nr_threads; + } + + if (fdarray__available_entries(&evlist->pollfd) < nfds && + fdarray__grow(&evlist->pollfd, nfds) < 0) + return -ENOMEM; + + return 0; +} + +int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, + void *ptr, short revent) +{ + int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); + + if (pos >= 0) { + evlist->pollfd.priv[pos].ptr = ptr; + fcntl(fd, F_SETFL, O_NONBLOCK); + } + + return pos; +} + +int perf_evlist__poll(struct perf_evlist *evlist, int timeout) +{ + return fdarray__poll(&evlist->pollfd, timeout); +} diff --git a/tools/perf/lib/evsel.c b/tools/perf/lib/evsel.c index 24abc80dd767..a8cb582e2721 100644 --- a/tools/perf/lib/evsel.c +++ b/tools/perf/lib/evsel.c @@ -230,3 +230,33 @@ struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel) { return &evsel->attr; } + +int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) +{ + if (ncpus == 0 || nthreads == 0) + return 0; + + if (evsel->system_wide) + nthreads = 1; + + evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id)); + if (evsel->sample_id == NULL) + return -ENOMEM; + + evsel->id = zalloc(ncpus * nthreads * sizeof(u64)); + if (evsel->id == NULL) { + xyarray__delete(evsel->sample_id); + evsel->sample_id = NULL; + return -ENOMEM; + } + + return 0; +} + +void perf_evsel__free_id(struct perf_evsel *evsel) +{ + xyarray__delete(evsel->sample_id); + evsel->sample_id = NULL; + zfree(&evsel->id); + evsel->ids = 0; +} diff --git a/tools/perf/lib/include/internal/evlist.h b/tools/perf/lib/include/internal/evlist.h index 448891f06e3e..9f440ab12b76 100644 --- a/tools/perf/lib/include/internal/evlist.h +++ b/tools/perf/lib/include/internal/evlist.h @@ -3,6 +3,11 @@ #define __LIBPERF_INTERNAL_EVLIST_H #include +#include +#include + +#define PERF_EVLIST__HLIST_BITS 8 +#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS) struct perf_cpu_map; struct perf_thread_map; @@ -13,8 +18,16 @@ struct perf_evlist { bool has_user_cpus; struct perf_cpu_map *cpus; struct perf_thread_map *threads; + int nr_mmaps; + size_t mmap_len; + struct fdarray pollfd; + struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; }; +int perf_evlist__alloc_pollfd(struct perf_evlist *evlist); +int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, + void *ptr, short revent); + /** * __perf_evlist__for_each_entry - iterate thru all the evsels * @list: list_head instance to iterate @@ -47,4 +60,24 @@ struct perf_evlist { #define perf_evlist__for_each_entry_reverse(evlist, evsel) \ __perf_evlist__for_each_entry_reverse(&(evlist)->entries, evsel) +static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist) +{ + return list_entry(evlist->entries.next, struct perf_evsel, node); +} + +static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist) +{ + return list_entry(evlist->entries.prev, struct perf_evsel, node); +} + +u64 perf_evlist__read_format(struct perf_evlist *evlist); + +void perf_evlist__id_add(struct perf_evlist *evlist, + struct perf_evsel *evsel, + int cpu, int thread, u64 id); + +int perf_evlist__id_add_fd(struct perf_evlist *evlist, + struct perf_evsel *evsel, + int cpu, int thread, int fd); + #endif /* __LIBPERF_INTERNAL_EVLIST_H */ diff --git a/tools/perf/lib/include/internal/evsel.h b/tools/perf/lib/include/internal/evsel.h index 8b854d1c9b45..a69b8299c36f 100644 --- a/tools/perf/lib/include/internal/evsel.h +++ b/tools/perf/lib/include/internal/evsel.h @@ -4,9 +4,35 @@ #include #include +#include +#include struct perf_cpu_map; struct perf_thread_map; +struct xyarray; + +/* + * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are + * more than one entry in the evlist. + */ +struct perf_sample_id { + struct hlist_node node; + u64 id; + struct perf_evsel *evsel; + /* + * 'idx' will be used for AUX area sampling. A sample will have AUX area + * data that will be queued for decoding, where there are separate + * queues for each CPU (per-cpu tracing) or task (per-thread tracing). + * The sample ID can be used to lookup 'idx' which is effectively the + * queue number. + */ + int idx; + int cpu; + pid_t tid; + + /* Holds total ID period value for PERF_SAMPLE_READ processing. */ + u64 period; +}; struct perf_evsel { struct list_head node; @@ -15,9 +41,13 @@ struct perf_evsel { struct perf_cpu_map *own_cpus; struct perf_thread_map *threads; struct xyarray *fd; + struct xyarray *sample_id; + u64 *id; + u32 ids; /* parse modifier helper */ int nr_members; + bool system_wide; }; int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); @@ -26,4 +56,7 @@ void perf_evsel__free_fd(struct perf_evsel *evsel); int perf_evsel__read_size(struct perf_evsel *evsel); int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter); +int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads); +void perf_evsel__free_id(struct perf_evsel *evsel); + #endif /* __LIBPERF_INTERNAL_EVSEL_H */ diff --git a/tools/perf/lib/include/internal/lib.h b/tools/perf/lib/include/internal/lib.h index 0b56f1201dc9..5175d491b2d4 100644 --- a/tools/perf/lib/include/internal/lib.h +++ b/tools/perf/lib/include/internal/lib.h @@ -2,7 +2,9 @@ #ifndef __LIBPERF_INTERNAL_LIB_H #define __LIBPERF_INTERNAL_LIB_H -#include +#include + +extern unsigned int page_size; ssize_t readn(int fd, void *buf, size_t n); ssize_t writen(int fd, const void *buf, size_t n); diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h new file mode 100644 index 000000000000..ba1e519c15b9 --- /dev/null +++ b/tools/perf/lib/include/internal/mmap.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LIBPERF_INTERNAL_MMAP_H +#define __LIBPERF_INTERNAL_MMAP_H + +#include +#include +#include +#include + +/* perf sample has 16 bits size limit */ +#define PERF_SAMPLE_MAX_SIZE (1 << 16) + +/** + * struct perf_mmap - perf's ring buffer mmap details + * + * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this + */ +struct perf_mmap { + void *base; + int mask; + int fd; + int cpu; + refcount_t refcnt; + u64 prev; + u64 start; + u64 end; + bool overwrite; + u64 flush; + char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); +}; + +#endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/perf/lib/include/perf/core.h b/tools/perf/lib/include/perf/core.h index c341a7b2c874..cfd70e720c1c 100644 --- a/tools/perf/lib/include/perf/core.h +++ b/tools/perf/lib/include/perf/core.h @@ -17,6 +17,6 @@ enum libperf_print_level { typedef int (*libperf_print_fn_t)(enum libperf_print_level level, const char *, va_list ap); -LIBPERF_API void libperf_set_print(libperf_print_fn_t fn); +LIBPERF_API void libperf_init(libperf_print_fn_t fn); #endif /* __LIBPERF_CORE_H */ diff --git a/tools/perf/lib/include/perf/cpumap.h b/tools/perf/lib/include/perf/cpumap.h index 8aa995c59498..ac9aa497f84a 100644 --- a/tools/perf/lib/include/perf/cpumap.h +++ b/tools/perf/lib/include/perf/cpumap.h @@ -16,6 +16,7 @@ LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map); LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus); LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map); +LIBPERF_API int perf_cpu_map__max(struct perf_cpu_map *map); #define perf_cpu_map__for_each_cpu(cpu, idx, cpus) \ for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx); \ diff --git a/tools/perf/lib/include/perf/evlist.h b/tools/perf/lib/include/perf/evlist.h index 38365f8f3fba..8a2ce0757ab2 100644 --- a/tools/perf/lib/include/perf/evlist.h +++ b/tools/perf/lib/include/perf/evlist.h @@ -31,5 +31,6 @@ LIBPERF_API void perf_evlist__disable(struct perf_evlist *evlist); LIBPERF_API void perf_evlist__set_maps(struct perf_evlist *evlist, struct perf_cpu_map *cpus, struct perf_thread_map *threads); +LIBPERF_API int perf_evlist__poll(struct perf_evlist *evlist, int timeout); #endif /* __LIBPERF_EVLIST_H */ diff --git a/tools/perf/lib/lib.c b/tools/perf/lib/lib.c index 2a81819c3b8c..18658931fc71 100644 --- a/tools/perf/lib/lib.c +++ b/tools/perf/lib/lib.c @@ -5,6 +5,8 @@ #include #include +unsigned int page_size; + static ssize_t ion(bool is_read, int fd, void *buf, size_t n) { void *buf_start = buf; diff --git a/tools/perf/lib/libperf.map b/tools/perf/lib/libperf.map index dc4d66363bc4..ab8dbde1136c 100644 --- a/tools/perf/lib/libperf.map +++ b/tools/perf/lib/libperf.map @@ -1,6 +1,6 @@ LIBPERF_0.0.1 { global: - libperf_set_print; + libperf_init; perf_cpu_map__dummy_new; perf_cpu_map__get; perf_cpu_map__put; @@ -9,6 +9,7 @@ LIBPERF_0.0.1 { perf_cpu_map__nr; perf_cpu_map__cpu; perf_cpu_map__empty; + perf_cpu_map__max; perf_thread_map__new_dummy; perf_thread_map__set_pid; perf_thread_map__comm; @@ -38,6 +39,7 @@ LIBPERF_0.0.1 { perf_evlist__remove; perf_evlist__next; perf_evlist__set_maps; + perf_evlist__poll; local: *; }; diff --git a/tools/perf/lib/tests/test-cpumap.c b/tools/perf/lib/tests/test-cpumap.c index 76a43cfb83a1..aa34c20df07e 100644 --- a/tools/perf/lib/tests/test-cpumap.c +++ b/tools/perf/lib/tests/test-cpumap.c @@ -1,13 +1,23 @@ // SPDX-License-Identifier: GPL-2.0 +#include +#include #include #include +static int libperf_print(enum libperf_print_level level, + const char *fmt, va_list ap) +{ + return vfprintf(stderr, fmt, ap); +} + int main(int argc, char **argv) { struct perf_cpu_map *cpus; __T_START; + libperf_init(libperf_print); + cpus = perf_cpu_map__dummy_new(); if (!cpus) return -1; diff --git a/tools/perf/lib/tests/test-evlist.c b/tools/perf/lib/tests/test-evlist.c index 4e1407f20ffd..e6b2ab2e2bde 100644 --- a/tools/perf/lib/tests/test-evlist.c +++ b/tools/perf/lib/tests/test-evlist.c @@ -1,4 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include +#include #include #include #include @@ -6,6 +8,12 @@ #include #include +static int libperf_print(enum libperf_print_level level, + const char *fmt, va_list ap) +{ + return vfprintf(stderr, fmt, ap); +} + static int test_stat_cpu(void) { struct perf_cpu_map *cpus; @@ -177,6 +185,8 @@ int main(int argc, char **argv) { __T_START; + libperf_init(libperf_print); + test_stat_cpu(); test_stat_thread(); test_stat_thread_enable(); diff --git a/tools/perf/lib/tests/test-evsel.c b/tools/perf/lib/tests/test-evsel.c index 2c648fe5617e..1b6c4285ac2b 100644 --- a/tools/perf/lib/tests/test-evsel.c +++ b/tools/perf/lib/tests/test-evsel.c @@ -1,10 +1,18 @@ // SPDX-License-Identifier: GPL-2.0 +#include +#include #include #include #include #include #include +static int libperf_print(enum libperf_print_level level, + const char *fmt, va_list ap) +{ + return vfprintf(stderr, fmt, ap); +} + static int test_stat_cpu(void) { struct perf_cpu_map *cpus; @@ -116,6 +124,8 @@ int main(int argc, char **argv) { __T_START; + libperf_init(libperf_print); + test_stat_cpu(); test_stat_thread(); test_stat_thread_enable(); diff --git a/tools/perf/lib/tests/test-threadmap.c b/tools/perf/lib/tests/test-threadmap.c index 10a4f4cbbdd5..8c5f47247d9e 100644 --- a/tools/perf/lib/tests/test-threadmap.c +++ b/tools/perf/lib/tests/test-threadmap.c @@ -1,13 +1,23 @@ // SPDX-License-Identifier: GPL-2.0 +#include +#include #include #include +static int libperf_print(enum libperf_print_level level, + const char *fmt, va_list ap) +{ + return vfprintf(stderr, fmt, ap); +} + int main(int argc, char **argv) { struct perf_thread_map *threads; __T_START; + libperf_init(libperf_print); + threads = perf_thread_map__new_dummy(); if (!threads) return -1; diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 1193b923e801..27f94b0bb874 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -12,6 +12,7 @@ #include "util/build-id.h" #include "util/cache.h" #include "util/env.h" +#include // page_size #include #include "util/config.h" #include @@ -20,11 +21,12 @@ #include "util/bpf-loader.h" #include "util/debug.h" #include "util/event.h" -#include "util/util.h" +#include "util/util.h" // usage() #include "ui/ui.h" #include "perf-sys.h" #include #include +#include #include #include #include @@ -428,6 +430,12 @@ void pthread__unblock_sigwinch(void) pthread_sigmask(SIG_UNBLOCK, &set, NULL); } +static int libperf_print(enum libperf_print_level level, + const char *fmt, va_list ap) +{ + return eprintf(level, verbose, fmt, ap); +} + int main(int argc, const char **argv) { int err; @@ -438,8 +446,7 @@ int main(int argc, const char **argv) exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT); pager_init(PERF_PAGER_ENVIRONMENT); - /* The page_size is placed in util object. */ - page_size = sysconf(_SC_PAGE_SIZE); + libperf_init(libperf_print); cmd = extract_argv0_path(argv[0]); if (!cmd) diff --git a/tools/perf/pmu-events/README b/tools/perf/pmu-events/README index e62b09b6a844..de7efa2cebd1 100644 --- a/tools/perf/pmu-events/README +++ b/tools/perf/pmu-events/README @@ -30,9 +30,9 @@ the topic. Eg: "Floating-point.json". All the topic JSON files for a CPU model/family should be in a separate sub directory. Thus for the Silvermont X86 CPU: - $ ls tools/perf/pmu-events/arch/x86/Silvermont_core - Cache.json Memory.json Virtual-Memory.json - Frontend.json Pipeline.json + $ ls tools/perf/pmu-events/arch/x86/silvermont + cache.json memory.json virtual-memory.json + frontend.json pipeline.json The JSONs folder for a CPU model/family may be placed in the root arch folder, or may be placed in a vendor sub-folder under the arch folder @@ -94,7 +94,7 @@ users to specify events by their name: where 'pm_1plus_ppc_cmpl' is a Power8 PMU event. -However some errors in processing may cause the perf build to fail. +However some errors in processing may cause the alias build to fail. Mapfile format =============== @@ -119,7 +119,7 @@ where: Header line The header line is the first line in the file, which is - always _IGNORED_. It can empty. + always _IGNORED_. It can be empty. CPUID: CPUID is an arch-specific char string, that can be used @@ -138,15 +138,15 @@ where: files, relative to the directory containing the mapfile.csv Type: - indicates whether the events or "core" or "uncore" events. + indicates whether the events are "core" or "uncore" events. Eg: - $ grep Silvermont tools/perf/pmu-events/arch/x86/mapfile.csv - GenuineIntel-6-37,V13,Silvermont_core,core - GenuineIntel-6-4D,V13,Silvermont_core,core - GenuineIntel-6-4C,V13,Silvermont_core,core + $ grep silvermont tools/perf/pmu-events/arch/x86/mapfile.csv + GenuineIntel-6-37,v13,silvermont,core + GenuineIntel-6-4D,v13,silvermont,core + GenuineIntel-6-4C,v13,silvermont,core i.e the three CPU models use the JSON files (i.e PMU events) listed - in the directory 'tools/perf/pmu-events/arch/x86/Silvermont_core'. + in the directory 'tools/perf/pmu-events/arch/x86/silvermont'. diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json new file mode 100644 index 000000000000..b5e5d055c70d --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json @@ -0,0 +1,14 @@ +[ + { + "PublicDescription": "Mispredicted or not predicted branch speculatively executed. This event counts any predictable branch instruction which is mispredicted either due to dynamic misprediction or because the MMU is off and the branches are statically predicted not taken.", + "EventCode": "0x10", + "EventName": "BR_MIS_PRED", + "BriefDescription": "Mispredicted or not predicted branch speculatively executed." + }, + { + "PublicDescription": "Predictable branch speculatively executed. This event counts all predictable branches.", + "EventCode": "0x12", + "EventName": "BR_PRED", + "BriefDescription": "Predictable branch speculatively executed." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json new file mode 100644 index 000000000000..fce7309ae624 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json @@ -0,0 +1,24 @@ +[ + { + "EventCode": "0x11", + "EventName": "CPU_CYCLES", + "BriefDescription": "The number of core clock cycles." + }, + { + "PublicDescription": "Bus access. This event counts for every beat of data transferred over the data channels between the core and the SCU. If both read and write data beats are transferred on a given cycle, this event is counted twice on that cycle. This event counts the sum of BUS_ACCESS_RD and BUS_ACCESS_WR.", + "EventCode": "0x19", + "EventName": "BUS_ACCESS", + "BriefDescription": "Bus access." + }, + { + "EventCode": "0x1D", + "EventName": "BUS_CYCLES", + "BriefDescription": "Bus cycles. This event duplicates CPU_CYCLES." + }, + { + "ArchStdEvent": "BUS_ACCESS_RD" + }, + { + "ArchStdEvent": "BUS_ACCESS_WR" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json new file mode 100644 index 000000000000..24594081c199 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json @@ -0,0 +1,207 @@ +[ + { + "PublicDescription": "L1 instruction cache refill. This event counts any instruction fetch which misses in the cache.", + "EventCode": "0x01", + "EventName": "L1I_CACHE_REFILL", + "BriefDescription": "L1 instruction cache refill" + }, + { + "PublicDescription": "L1 instruction TLB refill. This event counts any refill of the instruction L1 TLB from the L2 TLB. This includes refills that result in a translation fault.", + "EventCode": "0x02", + "EventName": "L1I_TLB_REFILL", + "BriefDescription": "L1 instruction TLB refill" + }, + { + "PublicDescription": "L1 data cache refill. This event counts any load or store operation or page table walk access which causes data to be read from outside the L1, including accesses which do not allocate into L1.", + "EventCode": "0x03", + "EventName": "L1D_CACHE_REFILL", + "BriefDescription": "L1 data cache refill" + }, + { + "PublicDescription": "L1 data cache access. This event counts any load or store operation or page table walk access which looks up in the L1 data cache. In particular, any access which could count the L1D_CACHE_REFILL event causes this event to count.", + "EventCode": "0x04", + "EventName": "L1D_CACHE", + "BriefDescription": "L1 data cache access" + }, + { + "PublicDescription": "L1 data TLB refill. This event counts any refill of the data L1 TLB from the L2 TLB. This includes refills that result in a translation fault.", + "EventCode": "0x05", + "EventName": "L1D_TLB_REFILL", + "BriefDescription": "L1 data TLB refill" + }, + { + "PublicDescription": "Level 1 instruction cache access or Level 0 Macro-op cache access. This event counts any instruction fetch which accesses the L1 instruction cache or L0 Macro-op cache.", + "EventCode": "0x14", + "EventName": "L1I_CACHE", + "BriefDescription": "L1 instruction cache access" + }, + { + "PublicDescription": "L1 data cache Write-Back. This event counts any write-back of data from the L1 data cache to L2 or L3. This counts both victim line evictions and snoops, including cache maintenance operations.", + "EventCode": "0x15", + "EventName": "L1D_CACHE_WB", + "BriefDescription": "L1 data cache Write-Back" + }, + { + "PublicDescription": "L2 data cache access. This event counts any transaction from L1 which looks up in the L2 cache, and any write-back from the L1 to the L2. Snoops from outside the core and cache maintenance operations are not counted.", + "EventCode": "0x16", + "EventName": "L2D_CACHE", + "BriefDescription": "L2 data cache access" + }, + { + "PublicDescription": "L2 data cache refill. This event counts any cacheable transaction from L1 which causes data to be read from outside the core. L2 refills caused by stashes into L2 should not be counted", + "EventCode": "0x17", + "EventName": "L2D_CACHE_REFILL", + "BriefDescription": "L2 data cache refill" + }, + { + "PublicDescription": "L2 data cache write-back. This event counts any write-back of data from the L2 cache to outside the core. This includes snoops to the L2 which return data, regardless of whether they cause an invalidation. Invalidations from the L2 which do not write data outside of the core and snoops which return data from the L1 are not counted", + "EventCode": "0x18", + "EventName": "L2D_CACHE_WB", + "BriefDescription": "L2 data cache write-back" + }, + { + "PublicDescription": "L2 data cache allocation without refill. This event counts any full cache line write into the L2 cache which does not cause a linefill, including write-backs from L1 to L2 and full-line writes which do not allocate into L1.", + "EventCode": "0x20", + "EventName": "L2D_CACHE_ALLOCATE", + "BriefDescription": "L2 data cache allocation without refill" + }, + { + "PublicDescription": "Level 1 data TLB access. This event counts any load or store operation which accesses the data L1 TLB. If both a load and a store are executed on a cycle, this event counts twice. This event counts regardless of whether the MMU is enabled.", + "EventCode": "0x25", + "EventName": "L1D_TLB", + "BriefDescription": "Level 1 data TLB access." + }, + { + "PublicDescription": "Level 1 instruction TLB access. This event counts any instruction fetch which accesses the instruction L1 TLB.This event counts regardless of whether the MMU is enabled.", + "EventCode": "0x26", + "EventName": "L1I_TLB", + "BriefDescription": "Level 1 instruction TLB access" + }, + { + "PublicDescription": "This event counts any full cache line write into the L3 cache which does not cause a linefill, including write-backs from L2 to L3 and full-line writes which do not allocate into L2", + "EventCode": "0x29", + "EventName": "L3D_CACHE_ALLOCATE", + "BriefDescription": "Allocation without refill" + }, + { + "PublicDescription": "Attributable Level 3 unified cache refill. This event counts for any cacheable read transaction returning datafrom the SCU for which the data source was outside the cluster. Transactions such as ReadUnique are counted here as 'read' transactions, even though they can be generated by store instructions.", + "EventCode": "0x2A", + "EventName": "L3D_CACHE_REFILL", + "BriefDescription": "Attributable Level 3 unified cache refill." + }, + { + "PublicDescription": "Attributable Level 3 unified cache access. This event counts for any cacheable read transaction returning datafrom the SCU, or for any cacheable write to the SCU.", + "EventCode": "0x2B", + "EventName": "L3D_CACHE", + "BriefDescription": "Attributable Level 3 unified cache access." + }, + { + "PublicDescription": "Attributable L2 data or unified TLB refill. This event counts on anyrefill of the L2 TLB, caused by either an instruction or data access.This event does not count if the MMU is disabled.", + "EventCode": "0x2D", + "EventName": "L2D_TLB_REFILL", + "BriefDescription": "Attributable L2 data or unified TLB refill" + }, + { + "PublicDescription": "Attributable L2 data or unified TLB access. This event counts on any access to the L2 TLB (caused by a refill of any of the L1 TLBs). This event does not count if the MMU is disabled.", + "EventCode": "0x2F", + "EventName": "L2D_TLB", + "BriefDescription": "Attributable L2 data or unified TLB access" + }, + { + "PublicDescription": "Access to data TLB that caused a page table walk. This event counts on any data access which causes L2D_TLB_REFILL to count.", + "EventCode": "0x34", + "EventName": "DTLB_WALK", + "BriefDescription": "Access to data TLB that caused a page table walk." + }, + { + "PublicDescription": "Access to instruction TLB that caused a page table walk. This event counts on any instruction access which causes L2D_TLB_REFILL to count.", + "EventCode": "0x35", + "EventName": "ITLB_WALK", + "BriefDescription": "Access to instruction TLB that caused a page table walk." + }, + { + "EventCode": "0x36", + "EventName": "LL_CACHE_RD", + "BriefDescription": "Last level cache access, read" + }, + { + "EventCode": "0x37", + "EventName": "LL_CACHE_MISS_RD", + "BriefDescription": "Last level cache miss, read" + }, + { + "ArchStdEvent": "L1D_CACHE_INVAL" + }, + { + "ArchStdEvent": "L1D_CACHE_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_INNER" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_OUTER" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_WR" + }, + { + "ArchStdEvent": "L1D_CACHE_WB_CLEAN" + }, + { + "ArchStdEvent": "L1D_CACHE_WB_VICTIM" + }, + { + "ArchStdEvent": "L1D_CACHE_WR" + }, + { + "ArchStdEvent": "L1D_TLB_RD" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_RD" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_WR" + }, + { + "ArchStdEvent": "L1D_TLB_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_INVAL" + }, + { + "ArchStdEvent": "L2D_CACHE_RD" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_RD" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_WB_CLEAN" + }, + { + "ArchStdEvent": "L2D_CACHE_WB_VICTIM" + }, + { + "ArchStdEvent": "L2D_CACHE_WR" + }, + { + "ArchStdEvent": "L2D_TLB_RD" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL_RD" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_TLB_WR" + }, + { + "ArchStdEvent": "L3D_CACHE_RD" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json new file mode 100644 index 000000000000..98d29c862320 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json @@ -0,0 +1,52 @@ +[ + { + "EventCode": "0x09", + "EventName": "EXC_TAKEN", + "BriefDescription": "Exception taken." + }, + { + "PublicDescription": "Local memory error. This event counts any correctable or uncorrectable memory error (ECC or parity) in the protected core RAMs", + "EventCode": "0x1A", + "EventName": "MEMORY_ERROR", + "BriefDescription": "Local memory error." + }, + { + "ArchStdEvent": "EXC_DABORT" + }, + { + "ArchStdEvent": "EXC_FIQ" + }, + { + "ArchStdEvent": "EXC_HVC" + }, + { + "ArchStdEvent": "EXC_IRQ" + }, + { + "ArchStdEvent": "EXC_PABORT" + }, + { + "ArchStdEvent": "EXC_SMC" + }, + { + "ArchStdEvent": "EXC_SVC" + }, + { + "ArchStdEvent": "EXC_TRAP_DABORT" + }, + { + "ArchStdEvent": "EXC_TRAP_FIQ" + }, + { + "ArchStdEvent": "EXC_TRAP_IRQ" + }, + { + "ArchStdEvent": "EXC_TRAP_OTHER" + }, + { + "ArchStdEvent": "EXC_TRAP_PABORT" + }, + { + "ArchStdEvent": "EXC_UNDEF" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json new file mode 100644 index 000000000000..c153ac706d8d --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json @@ -0,0 +1,108 @@ +[ + { + "PublicDescription": "Software increment. Instruction architecturally executed (condition code check pass).", + "EventCode": "0x00", + "EventName": "SW_INCR", + "BriefDescription": "Software increment." + }, + { + "PublicDescription": "Instruction architecturally executed. This event counts all retired instructions, including those that fail their condition check.", + "EventCode": "0x08", + "EventName": "INST_RETIRED", + "BriefDescription": "Instruction architecturally executed." + }, + { + "EventCode": "0x0A", + "EventName": "EXC_RETURN", + "BriefDescription": "Instruction architecturally executed, condition code check pass, exception return." + }, + { + "PublicDescription": "Instruction architecturally executed, condition code check pass, write to CONTEXTIDR. This event only counts writes to CONTEXTIDR in AArch32 state, and via the CONTEXTIDR_EL1 mnemonic in AArch64 state.", + "EventCode": "0x0B", + "EventName": "CID_WRITE_RETIRED", + "BriefDescription": "Instruction architecturally executed, condition code check pass, write to CONTEXTIDR." + }, + { + "EventCode": "0x1B", + "EventName": "INST_SPEC", + "BriefDescription": "Operation speculatively executed" + }, + { + "PublicDescription": "Instruction architecturally executed, condition code check pass, write to TTBR. This event only counts writes to TTBR0/TTBR1 in AArch32 state and TTBR0_EL1/TTBR1_EL1 in AArch64 state.", + "EventCode": "0x1C", + "EventName": "TTBR_WRITE_RETIRED", + "BriefDescription": "Instruction architecturally executed, condition code check pass, write to TTBR" + }, + { + "PublicDescription": "Instruction architecturally executed, branch. This event counts all branches, taken or not. This excludes exception entries, debug entries and CCFAIL branches.", + "EventCode": "0x21", + "EventName": "BR_RETIRED", + "BriefDescription": "Instruction architecturally executed, branch." + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted branch. This event counts any branch counted by BR_RETIRED which is not correctly predicted and causes a pipeline flush.", + "EventCode": "0x22", + "EventName": "BR_MIS_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, mispredicted branch." + }, + { + "ArchStdEvent": "ASE_SPEC" + }, + { + "ArchStdEvent": "BR_IMMED_SPEC" + }, + { + "ArchStdEvent": "BR_INDIRECT_SPEC" + }, + { + "ArchStdEvent": "BR_RETURN_SPEC" + }, + { + "ArchStdEvent": "CRYPTO_SPEC" + }, + { + "ArchStdEvent": "DMB_SPEC" + }, + { + "ArchStdEvent": "DP_SPEC" + }, + { + "ArchStdEvent": "DSB_SPEC" + }, + { + "ArchStdEvent": "ISB_SPEC" + }, + { + "ArchStdEvent": "LDREX_SPEC" + }, + { + "ArchStdEvent": "LDST_SPEC" + }, + { + "ArchStdEvent": "LD_SPEC" + }, + { + "ArchStdEvent": "PC_WRITE_SPEC" + }, + { + "ArchStdEvent": "RC_LD_SPEC" + }, + { + "ArchStdEvent": "RC_ST_SPEC" + }, + { + "ArchStdEvent": "STREX_FAIL_SPEC" + }, + { + "ArchStdEvent": "STREX_PASS_SPEC" + }, + { + "ArchStdEvent": "STREX_SPEC" + }, + { + "ArchStdEvent": "ST_SPEC" + }, + { + "ArchStdEvent": "VFP_SPEC" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json new file mode 100644 index 000000000000..b86643253f19 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json @@ -0,0 +1,23 @@ +[ + { + "PublicDescription": "Data memory access. This event counts memory accesses due to load or store instructions. This event counts the sum of MEM_ACCESS_RD and MEM_ACCESS_WR.", + "EventCode": "0x13", + "EventName": "MEM_ACCESS", + "BriefDescription": "Data memory access" + }, + { + "ArchStdEvent": "MEM_ACCESS_RD" + }, + { + "ArchStdEvent": "MEM_ACCESS_WR" + }, + { + "ArchStdEvent": "UNALIGNED_LD_SPEC" + }, + { + "ArchStdEvent": "UNALIGNED_ST_SPEC" + }, + { + "ArchStdEvent": "UNALIGNED_LDST_SPEC" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json new file mode 100644 index 000000000000..8bde029a62d5 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json @@ -0,0 +1,7 @@ +[ + { + "EventCode": "0x31", + "EventName": "REMOTE_ACCESS", + "BriefDescription": "Access to another socket in a multi-socket system" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json new file mode 100644 index 000000000000..010a647f9d02 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json @@ -0,0 +1,14 @@ +[ + { + "PublicDescription": "No operation issued because of the frontend. The counter counts on any cycle when there are no fetched instructions available to dispatch.", + "EventCode": "0x23", + "EventName": "STALL_FRONTEND", + "BriefDescription": "No operation issued because of the frontend." + }, + { + "PublicDescription": "No operation issued because of the backend. The counter counts on any cycle fetched instructions are not dispatched due to resource constraints.", + "EventCode": "0x24", + "EventName": "STALL_BACKEND", + "BriefDescription": "No operation issued because of the backend." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index 927fcddcb4aa..0d609149b82a 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -16,6 +16,8 @@ 0x00000000420f1000,v1,arm/cortex-a53,core 0x00000000410fd070,v1,arm/cortex-a57-a72,core 0x00000000410fd080,v1,arm/cortex-a57-a72,core +0x00000000410fd0b0,v1,arm/cortex-a76-n1,core +0x00000000410fd0c0,v1,arm/cortex-a76-n1,core 0x00000000420f5160,v1,cavium/thunderx2,core 0x00000000430f0af0,v1,cavium/thunderx2,core 0x00000000480fd010,v1,hisilicon/hip08,core diff --git a/tools/perf/pmu-events/arch/powerpc/power8/other.json b/tools/perf/pmu-events/arch/powerpc/power8/other.json index 9dc2f6b70354..b2a3df07fbc4 100644 --- a/tools/perf/pmu-events/arch/powerpc/power8/other.json +++ b/tools/perf/pmu-events/arch/powerpc/power8/other.json @@ -1775,30 +1775,6 @@ "BriefDescription": "L3 Load Prefetches", "PublicDescription": "" }, - {, - "EventCode": "0xa29084", - "EventName": "PM_L3_P0_GRP_PUMP", - "BriefDescription": "L3 pf sent with grp scope port 0", - "PublicDescription": "" - }, - {, - "EventCode": "0x528084", - "EventName": "PM_L3_P0_LCO_DATA", - "BriefDescription": "lco sent with data port 0", - "PublicDescription": "" - }, - {, - "EventCode": "0x518080", - "EventName": "PM_L3_P0_LCO_NO_DATA", - "BriefDescription": "dataless l3 lco sent port 0", - "PublicDescription": "" - }, - {, - "EventCode": "0xa4908c", - "EventName": "PM_L3_P0_LCO_RTY", - "BriefDescription": "L3 LCO received retry port 0", - "PublicDescription": "" - }, {, "EventCode": "0x84908d", "EventName": "PM_L3_PF0_ALLOC", diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json b/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json index fad4af9142cb..6221a840fcea 100644 --- a/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json +++ b/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json @@ -283,5 +283,47 @@ "BriefDescription": "Total cycles spent with one or more fill requests in flight from L2.", "PublicDescription": "Total cycles spent with one or more fill requests in flight from L2.", "UMask": "0x1" + }, + { + "EventName": "l3_request_g1.caching_l3_cache_accesses", + "EventCode": "0x01", + "BriefDescription": "Caching: L3 cache accesses", + "UMask": "0x80", + "Unit": "L3PMC" + }, + { + "EventName": "l3_lookup_state.all_l3_req_typs", + "EventCode": "0x04", + "BriefDescription": "All L3 Request Types", + "UMask": "0xff", + "Unit": "L3PMC" + }, + { + "EventName": "l3_comb_clstr_state.other_l3_miss_typs", + "EventCode": "0x06", + "BriefDescription": "Other L3 Miss Request Types", + "UMask": "0xfe", + "Unit": "L3PMC" + }, + { + "EventName": "l3_comb_clstr_state.request_miss", + "EventCode": "0x06", + "BriefDescription": "L3 cache misses", + "UMask": "0x01", + "Unit": "L3PMC" + }, + { + "EventName": "xi_sys_fill_latency", + "EventCode": "0x90", + "BriefDescription": "L3 Cache Miss Latency. Total cycles for all transactions divided by 16. Ignores SliceMask and ThreadMask.", + "UMask": "0x00", + "Unit": "L3PMC" + }, + { + "EventName": "xi_ccx_sdp_req1.all_l3_miss_req_typs", + "EventCode": "0x9a", + "BriefDescription": "All L3 Miss Request Types. Ignores SliceMask and ThreadMask.", + "UMask": "0x3f", + "Unit": "L3PMC" } ] diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/core.json b/tools/perf/pmu-events/arch/x86/amdfam17h/core.json index 7b285b0a7f35..1079544eeed5 100644 --- a/tools/perf/pmu-events/arch/x86/amdfam17h/core.json +++ b/tools/perf/pmu-events/arch/x86/amdfam17h/core.json @@ -13,7 +13,7 @@ { "EventName": "ex_ret_brn", "EventCode": "0xc2", - "BriefDescription": "[Retired Branch Instructions.", + "BriefDescription": "Retired Branch Instructions.", "PublicDescription": "The number of branch instructions retired. This includes all types of architectural control flow changes, including exceptions and interrupts." }, { diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index d413761621b0..9e37287da924 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -239,6 +239,7 @@ static struct map { { "hisi_sccl,ddrc", "hisi_sccl,ddrc" }, { "hisi_sccl,hha", "hisi_sccl,hha" }, { "hisi_sccl,l3c", "hisi_sccl,l3c" }, + { "L3PMC", "amd_l3" }, {} }; diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index a637a4a90760..338cd9faa835 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -10,6 +10,7 @@ #include "tests.h" #include "debug.h" #include "parse-events.h" +#include "util/mmap.h" #include #include @@ -32,8 +33,8 @@ static int count_samples(struct evlist *evlist, int *sample_count, { int i; - for (i = 0; i < evlist->nr_mmaps; i++) { - struct perf_mmap *map = &evlist->overwrite_mmap[i]; + for (i = 0; i < evlist->core.nr_mmaps; i++) { + struct mmap *map = &evlist->overwrite_mmap[i]; union perf_event *event; perf_mmap__read_init(map); @@ -63,9 +64,9 @@ static int do_test(struct evlist *evlist, int mmap_pages, int err; char sbuf[STRERR_BUFSIZE]; - err = perf_evlist__mmap(evlist, mmap_pages); + err = evlist__mmap(evlist, mmap_pages); if (err < 0) { - pr_debug("perf_evlist__mmap: %s\n", + pr_debug("evlist__mmap: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); return TEST_FAIL; } @@ -75,7 +76,7 @@ static int do_test(struct evlist *evlist, int mmap_pages, evlist__disable(evlist); err = count_samples(evlist, sample_count, comm_count); - perf_evlist__munmap(evlist); + evlist__munmap(evlist); return err; } diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c index db2aadff3708..96c137360918 100644 --- a/tools/perf/tests/bitmap.c +++ b/tools/perf/tests/bitmap.c @@ -2,8 +2,8 @@ #include #include #include +#include #include "tests.h" -#include "cpumap.h" #include "debug.h" #define NBITS 100 diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index fc102e4f403e..1eb0bffaed6c 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -19,6 +19,7 @@ #include "llvm.h" #include "debug.h" #include "parse-events.h" +#include "util/mmap.h" #define NR_ITERS 111 #define PERF_TEST_BPF_PATH "/sys/fs/bpf/perf_test" @@ -167,9 +168,9 @@ static int do_test(struct bpf_object *obj, int (*func)(void), goto out_delete_evlist; } - err = perf_evlist__mmap(evlist, opts.mmap_pages); + err = evlist__mmap(evlist, opts.mmap_pages); if (err < 0) { - pr_debug("perf_evlist__mmap: %s\n", + pr_debug("evlist__mmap: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; } @@ -178,9 +179,9 @@ static int do_test(struct bpf_object *obj, int (*func)(void), (*func)(); evlist__disable(evlist); - for (i = 0; i < evlist->nr_mmaps; i++) { + for (i = 0; i < evlist->core.nr_mmaps; i++) { union perf_event *event; - struct perf_mmap *md; + struct mmap *md; md = &evlist->mmap[i]; if (perf_mmap__read_init(md) < 0) diff --git a/tools/perf/tests/clang.c b/tools/perf/tests/clang.c index f45fe11dcf50..2577d3ed1531 100644 --- a/tools/perf/tests/clang.c +++ b/tools/perf/tests/clang.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 #include "tests.h" -#include "debug.h" -#include "util.h" #include "c++/clang-c.h" #include diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index c1c29e08e7fb..f5764a3890b9 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -19,12 +19,13 @@ #include "evlist.h" #include "evsel.h" #include "thread_map.h" -#include "cpumap.h" #include "machine.h" #include "map.h" #include "symbol.h" #include "event.h" #include "record.h" +#include "util/mmap.h" +#include "util/synthetic-events.h" #include "thread.h" #include "tests.h" @@ -419,10 +420,10 @@ static int process_events(struct machine *machine, struct evlist *evlist, struct state *state) { union perf_event *event; - struct perf_mmap *md; + struct mmap *md; int i, ret; - for (i = 0; i < evlist->nr_mmaps; i++) { + for (i = 0; i < evlist->core.nr_mmaps; i++) { md = &evlist->mmap[i]; if (perf_mmap__read_init(md) < 0) continue; @@ -651,7 +652,7 @@ static int do_test_code_reading(bool try_kcore) perf_evlist__config(evlist, &opts, NULL); - evsel = perf_evlist__first(evlist); + evsel = evlist__first(evlist); evsel->core.attr.comm = 1; evsel->core.attr.disabled = 1; @@ -685,9 +686,9 @@ static int do_test_code_reading(bool try_kcore) break; } - ret = perf_evlist__mmap(evlist, UINT_MAX); + ret = evlist__mmap(evlist, UINT_MAX); if (ret < 0) { - pr_debug("perf_evlist__mmap failed\n"); + pr_debug("evlist__mmap failed\n"); goto out_put; } diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c index 39493de50117..8a0d236202b0 100644 --- a/tools/perf/tests/cpumap.c +++ b/tools/perf/tests/cpumap.c @@ -3,6 +3,7 @@ #include #include "cpumap.h" #include "event.h" +#include "util/synthetic-events.h" #include #include #include diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c index a4874d4ce7ef..627c1aaf1c9e 100644 --- a/tools/perf/tests/dso-data.c +++ b/tools/perf/tests/dso-data.c @@ -10,7 +10,6 @@ #include #include #include "dso.h" -#include "util.h" #include "machine.h" #include "symbol.h" #include "tests.h" diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 4125255ff637..4f4ecbcbe87e 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -15,6 +15,7 @@ #include "symbol.h" #include "thread.h" #include "callchain.h" +#include "util/synthetic-events.h" #if defined (__x86_64__) || defined (__i386__) || defined (__powerpc__) #include "arch-tests.h" diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c index d824a726906c..1ee8704e2284 100644 --- a/tools/perf/tests/event-times.c +++ b/tools/perf/tests/event-times.c @@ -9,7 +9,6 @@ #include "tests.h" #include "evlist.h" #include "evsel.h" -#include "util.h" #include "debug.h" #include "parse-events.h" #include "thread_map.h" @@ -17,7 +16,7 @@ static int attach__enable_on_exec(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__last(evlist); + struct evsel *evsel = evlist__last(evlist); struct target target = { .uid = UINT_MAX, }; @@ -59,7 +58,7 @@ static int detach__enable_on_exec(struct evlist *evlist) static int attach__current_disabled(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__last(evlist); + struct evsel *evsel = evlist__last(evlist); struct perf_thread_map *threads; int err; @@ -85,7 +84,7 @@ static int attach__current_disabled(struct evlist *evlist) static int attach__current_enabled(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__last(evlist); + struct evsel *evsel = evlist__last(evlist); struct perf_thread_map *threads; int err; @@ -105,14 +104,14 @@ static int attach__current_enabled(struct evlist *evlist) static int detach__disable(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__last(evlist); + struct evsel *evsel = evlist__last(evlist); return evsel__enable(evsel); } static int attach__cpu_disabled(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__last(evlist); + struct evsel *evsel = evlist__last(evlist); struct perf_cpu_map *cpus; int err; @@ -141,7 +140,7 @@ static int attach__cpu_disabled(struct evlist *evlist) static int attach__cpu_enabled(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__last(evlist); + struct evsel *evsel = evlist__last(evlist); struct perf_cpu_map *cpus; int err; @@ -181,7 +180,7 @@ static int test_times(int (attach)(struct evlist *), goto out_err; } - evsel = perf_evlist__last(evlist); + evsel = evlist__last(evlist); evsel->core.attr.read_format |= PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index cac4290e233a..c727379cf20e 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -2,10 +2,12 @@ #include #include #include +#include "cpumap.h" #include "evlist.h" #include "evsel.h" #include "header.h" #include "machine.h" +#include "util/synthetic-events.h" #include "tool.h" #include "tests.h" #include "debug.h" @@ -90,12 +92,12 @@ int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unu evlist = perf_evlist__new_default(); TEST_ASSERT_VAL("failed to get evlist", evlist); - evsel = perf_evlist__first(evlist); + evsel = evlist__first(evlist); - TEST_ASSERT_VAL("failed to allos ids", - !perf_evsel__alloc_id(evsel, 1, 1)); + TEST_ASSERT_VAL("failed to allocate ids", + !perf_evsel__alloc_id(&evsel->core, 1, 1)); - perf_evlist__id_add(evlist, evsel, 0, 0, 123); + perf_evlist__id_add(&evlist->core, &evsel->core, 0, 0, 123); evsel->unit = strdup("KRAVA"); diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c index 5330f106a6ee..956205bf9326 100644 --- a/tools/perf/tests/evsel-roundtrip-name.c +++ b/tools/perf/tests/evsel-roundtrip-name.c @@ -34,7 +34,7 @@ static int perf_evsel__roundtrip_cache_name_test(void) } idx = 0; - evsel = perf_evlist__first(evlist); + evsel = evlist__first(evlist); for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c index de110d8f169b..6f34d08b84e5 100644 --- a/tools/perf/tests/hists_common.c +++ b/tools/perf/tests/hists_common.c @@ -2,6 +2,7 @@ #include #include "util/debug.h" #include "util/dso.h" +#include "util/event.h" // struct perf_sample #include "util/map.h" #include "util/symbol.h" #include "util/sort.h" @@ -10,6 +11,7 @@ #include "util/thread.h" #include "tests/hists_common.h" #include +#include static struct { u32 pid; diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index fa55b7bad3af..6367c8f6ca22 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -721,7 +721,7 @@ int test__hists_cumulate(struct test *test __maybe_unused, int subtest __maybe_u if (verbose > 1) machine__fprintf(machine, stderr); - evsel = perf_evlist__first(evlist); + evsel = evlist__first(evlist); for (i = 0; i < ARRAY_SIZE(testcases); i++) { err = testcases[i](evsel, machine); diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index 8be4d0b61e3a..a024d3f3a412 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -8,6 +8,7 @@ #include "machine.h" #include "parse-events.h" #include "hists_common.h" +#include "util/mmap.h" #include #include @@ -310,8 +311,8 @@ int test__hists_link(struct test *test __maybe_unused, int subtest __maybe_unuse print_hists_in(hists); } - first = perf_evlist__first(evlist); - evsel = perf_evlist__last(evlist); + first = evlist__first(evlist); + evsel = evlist__last(evlist); first_hists = evsel__hists(first); hists = evsel__hists(evsel); diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index 3f6dfa212260..38f804ff6452 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -608,7 +608,7 @@ int test__hists_output(struct test *test __maybe_unused, int subtest __maybe_unu if (verbose > 1) machine__fprintf(machine, stderr); - evsel = perf_evlist__first(evlist); + evsel = evlist__first(evlist); for (i = 0; i < ARRAY_SIZE(testcases); i++) { err = testcases[i](evsel, machine); diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index 9f0762d987fa..92c7d591bcac 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -12,8 +12,8 @@ #include "evsel.h" #include "record.h" #include "thread_map.h" -#include "cpumap.h" #include "tests.h" +#include "util/mmap.h" #define CHECK__(x) { \ while ((x) < 0) { \ @@ -32,11 +32,11 @@ static int find_comm(struct evlist *evlist, const char *comm) { union perf_event *event; - struct perf_mmap *md; + struct mmap *md; int i, found; found = 0; - for (i = 0; i < evlist->nr_mmaps; i++) { + for (i = 0; i < evlist->core.nr_mmaps; i++) { md = &evlist->mmap[i]; if (perf_mmap__read_init(md) < 0) continue; @@ -93,7 +93,7 @@ int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_un perf_evlist__config(evlist, &opts, NULL); - evsel = perf_evlist__first(evlist); + evsel = evlist__first(evlist); evsel->core.attr.comm = 1; evsel->core.attr.disabled = 1; @@ -105,7 +105,7 @@ int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_un goto out_err; } - CHECK__(perf_evlist__mmap(evlist, UINT_MAX)); + CHECK__(evlist__mmap(evlist, UINT_MAX)); /* * First, test that a 'comm' event can be found when the event is @@ -132,7 +132,7 @@ int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_un evlist__enable(evlist); - evsel = perf_evlist__last(evlist); + evsel = evlist__last(evlist); CHECK__(evsel__disable(evsel)); @@ -143,7 +143,7 @@ int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_un found = find_comm(evlist, comm); if (found != 1) { - pr_debug("Seconf time, failed to find tracking event.\n"); + pr_debug("Second time, failed to find tracking event.\n"); goto out_err; } diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c index 022e4c9cf092..ae6cda81c209 100644 --- a/tools/perf/tests/llvm.c +++ b/tools/perf/tests/llvm.c @@ -7,7 +7,6 @@ #include "llvm.h" #include "tests.h" #include "debug.h" -#include "util.h" #ifdef HAVE_LIBBPF_SUPPORT static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz) diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 70c48475896d..c850d1664c56 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -100,7 +100,7 @@ make_install_info := install-info make_install_pdf := install-pdf make_install_prefix := install prefix=/tmp/krava make_install_prefix_slash := install prefix=/tmp/krava/ -make_static := LDFLAGS=-static +make_static := LDFLAGS=-static NO_PERF_READ_VDSO32=1 NO_PERF_READ_VDSOX32=1 NO_JVMTI=1 # all the NO_* variable combined make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1 @@ -327,6 +327,10 @@ make_kernelsrc_tools: (make -C ../../tools $(PARALLEL_OPT) $(K_O_OPT) perf) > $@ 2>&1 && \ test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false) +make_libperf: + @echo "- make -C lib"; + make -C lib clean >$@ 2>&1; make -C lib >>$@ 2>&1 && rm $@ + FEATURES_DUMP_FILE := $(FULL_O)/BUILD_TEST_FEATURE_DUMP FEATURES_DUMP_FILE_STATIC := $(FULL_O)/BUILD_TEST_FEATURE_DUMP_STATIC @@ -365,5 +369,5 @@ $(foreach t,$(run),$(if $(findstring make_static,$(t)),\ $(eval $(t) := $($(t)) FEATURES_DUMP=$(FEATURES_DUMP_FILE)))) endif -.PHONY: all $(run) $(run_O) tarpkg clean make_kernelsrc make_kernelsrc_tools +.PHONY: all $(run) $(run_O) tarpkg clean make_kernelsrc make_kernelsrc_tools make_libperf endif # ifndef MK diff --git a/tools/perf/tests/mem2node.c b/tools/perf/tests/mem2node.c index 7672ade70f20..a258bd51f1a4 100644 --- a/tools/perf/tests/mem2node.c +++ b/tools/perf/tests/mem2node.c @@ -4,7 +4,7 @@ #include #include #include -#include "cpumap.h" +#include #include "debug.h" #include "env.h" #include "mem2node.h" diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 85e1d7337dc0..3a22dce991ba 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -10,8 +10,8 @@ #include "evlist.h" #include "evsel.h" #include "thread_map.h" -#include "cpumap.h" #include "tests.h" +#include "util/mmap.h" #include #include #include @@ -43,7 +43,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse expected_nr_events[nsyscalls], i, j; struct evsel *evsels[nsyscalls], *evsel; char sbuf[STRERR_BUFSIZE]; - struct perf_mmap *md; + struct mmap *md; threads = thread_map__new(-1, getpid(), UINT_MAX); if (threads == NULL) { @@ -53,7 +53,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse cpus = perf_cpu_map__new(NULL); if (cpus == NULL) { - pr_debug("cpu_map__new\n"); + pr_debug("perf_cpu_map__new\n"); goto out_free_threads; } @@ -100,7 +100,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse expected_nr_events[i] = 1 + rand() % 127; } - if (perf_evlist__mmap(evlist, 128) < 0) { + if (evlist__mmap(evlist, 128) < 0) { pr_debug("failed to mmap events: %d (%s)\n", errno, str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c index 360d70deb855..8d9d4cbff76d 100644 --- a/tools/perf/tests/mmap-thread-lookup.c +++ b/tools/perf/tests/mmap-thread-lookup.c @@ -8,13 +8,15 @@ #include #include #include "debug.h" +#include "event.h" #include "tests.h" #include "machine.h" #include "thread_map.h" #include "map.h" #include "symbol.h" +#include "util/synthetic-events.h" #include "thread.h" -#include "util.h" +#include // page_size #define THREADS 4 diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index 9171f77cd9cd..93c176523e38 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -14,7 +14,8 @@ #include "evsel.h" #include "tests.h" #include "thread_map.h" -#include "cpumap.h" +#include +#include #include "debug.h" #include "stat.h" #include "util/counts.h" @@ -37,7 +38,7 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int cpus = perf_cpu_map__new(NULL); if (cpus == NULL) { - pr_debug("cpu_map__new\n"); + pr_debug("perf_cpu_map__new\n"); goto out_thread_map_delete; } diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index b71167b43dda..2b5c46813053 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -11,6 +11,7 @@ #include "record.h" #include "tests.h" #include "debug.h" +#include "util/mmap.h" #include #ifndef O_DIRECTORY @@ -69,9 +70,9 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest goto out_delete_evlist; } - err = perf_evlist__mmap(evlist, UINT_MAX); + err = evlist__mmap(evlist, UINT_MAX); if (err < 0) { - pr_debug("perf_evlist__mmap: %s\n", + pr_debug("evlist__mmap: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; } @@ -86,9 +87,9 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest while (1) { int before = nr_events; - for (i = 0; i < evlist->nr_mmaps; i++) { + for (i = 0; i < evlist->core.nr_mmaps; i++) { union perf_event *event; - struct perf_mmap *md; + struct mmap *md; md = &evlist->mmap[i]; if (perf_mmap__read_init(md) < 0) @@ -126,7 +127,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest } if (nr_events == before) - perf_evlist__poll(evlist, 10); + evlist__poll(evlist, 10); if (++nr_polls > 5) { pr_debug("%s: no events!\n", __func__); diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 02ba696fb87f..25e0ed2eedfc 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -6,7 +6,6 @@ #include "tests.h" #include "debug.h" #include "pmu.h" -#include "util.h" #include #include #include @@ -47,7 +46,7 @@ static bool kvm_s390_create_vm_valid(void) static int test__checkevent_tracepoint(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong number of groups", 0 == evlist->nr_groups); @@ -78,7 +77,7 @@ static int test__checkevent_tracepoint_multi(struct evlist *evlist) static int test__checkevent_raw(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); @@ -88,7 +87,7 @@ static int test__checkevent_raw(struct evlist *evlist) static int test__checkevent_numeric(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", 1 == evsel->core.attr.type); @@ -98,7 +97,7 @@ static int test__checkevent_numeric(struct evlist *evlist) static int test__checkevent_symbolic_name(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); @@ -109,7 +108,7 @@ static int test__checkevent_symbolic_name(struct evlist *evlist) static int test__checkevent_symbolic_name_config(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); @@ -130,7 +129,7 @@ static int test__checkevent_symbolic_name_config(struct evlist *evlist) static int test__checkevent_symbolic_alias(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type); @@ -141,7 +140,7 @@ static int test__checkevent_symbolic_alias(struct evlist *evlist) static int test__checkevent_genhw(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->core.attr.type); @@ -151,7 +150,7 @@ static int test__checkevent_genhw(struct evlist *evlist) static int test__checkevent_breakpoint(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type); @@ -165,7 +164,7 @@ static int test__checkevent_breakpoint(struct evlist *evlist) static int test__checkevent_breakpoint_x(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type); @@ -178,7 +177,7 @@ static int test__checkevent_breakpoint_x(struct evlist *evlist) static int test__checkevent_breakpoint_r(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", @@ -193,7 +192,7 @@ static int test__checkevent_breakpoint_r(struct evlist *evlist) static int test__checkevent_breakpoint_w(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", @@ -208,7 +207,7 @@ static int test__checkevent_breakpoint_w(struct evlist *evlist) static int test__checkevent_breakpoint_rw(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", @@ -223,7 +222,7 @@ static int test__checkevent_breakpoint_rw(struct evlist *evlist) static int test__checkevent_tracepoint_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); @@ -254,7 +253,7 @@ test__checkevent_tracepoint_multi_modifier(struct evlist *evlist) static int test__checkevent_raw_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); @@ -266,7 +265,7 @@ static int test__checkevent_raw_modifier(struct evlist *evlist) static int test__checkevent_numeric_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); @@ -278,7 +277,7 @@ static int test__checkevent_numeric_modifier(struct evlist *evlist) static int test__checkevent_symbolic_name_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); @@ -290,7 +289,7 @@ static int test__checkevent_symbolic_name_modifier(struct evlist *evlist) static int test__checkevent_exclude_host_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host); @@ -300,7 +299,7 @@ static int test__checkevent_exclude_host_modifier(struct evlist *evlist) static int test__checkevent_exclude_guest_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); @@ -310,7 +309,7 @@ static int test__checkevent_exclude_guest_modifier(struct evlist *evlist) static int test__checkevent_symbolic_alias_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); @@ -322,7 +321,7 @@ static int test__checkevent_symbolic_alias_modifier(struct evlist *evlist) static int test__checkevent_genhw_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); @@ -334,7 +333,7 @@ static int test__checkevent_genhw_modifier(struct evlist *evlist) static int test__checkevent_exclude_idle_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude idle", evsel->core.attr.exclude_idle); TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); @@ -349,7 +348,7 @@ static int test__checkevent_exclude_idle_modifier(struct evlist *evlist) static int test__checkevent_exclude_idle_modifier_1(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude idle", evsel->core.attr.exclude_idle); TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); @@ -364,7 +363,7 @@ static int test__checkevent_exclude_idle_modifier_1(struct evlist *evlist) static int test__checkevent_breakpoint_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); @@ -379,7 +378,7 @@ static int test__checkevent_breakpoint_modifier(struct evlist *evlist) static int test__checkevent_breakpoint_x_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); @@ -393,7 +392,7 @@ static int test__checkevent_breakpoint_x_modifier(struct evlist *evlist) static int test__checkevent_breakpoint_r_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); @@ -407,7 +406,7 @@ static int test__checkevent_breakpoint_r_modifier(struct evlist *evlist) static int test__checkevent_breakpoint_w_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); @@ -421,7 +420,7 @@ static int test__checkevent_breakpoint_w_modifier(struct evlist *evlist) static int test__checkevent_breakpoint_rw_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); @@ -436,7 +435,7 @@ static int test__checkevent_breakpoint_rw_modifier(struct evlist *evlist) static int test__checkevent_pmu(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); @@ -454,7 +453,7 @@ static int test__checkevent_pmu(struct evlist *evlist) static int test__checkevent_list(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->core.nr_entries); @@ -493,7 +492,7 @@ static int test__checkevent_list(struct evlist *evlist) static int test__checkevent_pmu_name(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); /* cpu/config=1,name=krava/u */ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); @@ -514,7 +513,7 @@ static int test__checkevent_pmu_name(struct evlist *evlist) static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); /* cpu/config=1,call-graph=fp,time,period=100000/ */ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); @@ -547,7 +546,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist) static int test__checkevent_pmu_events(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); @@ -565,7 +564,7 @@ static int test__checkevent_pmu_events(struct evlist *evlist) static int test__checkevent_pmu_events_mix(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); /* pmu-event:u */ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); @@ -643,7 +642,7 @@ static int test__group1(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); /* instructions:k */ - evsel = leader = perf_evlist__first(evlist); + evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config); @@ -685,7 +684,7 @@ static int test__group2(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); /* faults + :ku modifier */ - evsel = leader = perf_evlist__first(evlist); + evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_SW_PAGE_FAULTS == evsel->core.attr.config); @@ -740,7 +739,7 @@ static int test__group3(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups); /* group1 syscalls:sys_enter_openat:H */ - evsel = leader = perf_evlist__first(evlist); + evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->core.attr.type); TEST_ASSERT_VAL("wrong sample_type", PERF_TP_SAMPLE_TYPE == evsel->core.attr.sample_type); @@ -832,7 +831,7 @@ static int test__group4(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); /* cycles:u + p */ - evsel = leader = perf_evlist__first(evlist); + evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -876,7 +875,7 @@ static int test__group5(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups); /* cycles + G */ - evsel = leader = perf_evlist__first(evlist); + evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -962,7 +961,7 @@ static int test__group_gh1(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); /* cycles + :H group modifier */ - evsel = leader = perf_evlist__first(evlist); + evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -1002,7 +1001,7 @@ static int test__group_gh2(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); /* cycles + :G group modifier */ - evsel = leader = perf_evlist__first(evlist); + evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -1042,7 +1041,7 @@ static int test__group_gh3(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); /* cycles:G + :u group modifier */ - evsel = leader = perf_evlist__first(evlist); + evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -1082,7 +1081,7 @@ static int test__group_gh4(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); /* cycles:G + :uG group modifier */ - evsel = leader = perf_evlist__first(evlist); + evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -1121,7 +1120,7 @@ static int test__leader_sample1(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->core.nr_entries); /* cycles - sampling group leader */ - evsel = leader = perf_evlist__first(evlist); + evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -1174,7 +1173,7 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); /* instructions - sampling group leader */ - evsel = leader = perf_evlist__first(evlist); + evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config); @@ -1208,7 +1207,7 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused) static int test__checkevent_pinned_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); @@ -1226,7 +1225,7 @@ static int test__pinned_group(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->core.nr_entries); /* cycles - group leader */ - evsel = leader = perf_evlist__first(evlist); + evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -1252,7 +1251,7 @@ static int test__pinned_group(struct evlist *evlist) static int test__checkevent_breakpoint_len(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type); @@ -1267,7 +1266,7 @@ static int test__checkevent_breakpoint_len(struct evlist *evlist) static int test__checkevent_breakpoint_len_w(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type); @@ -1283,7 +1282,7 @@ static int test__checkevent_breakpoint_len_w(struct evlist *evlist) static int test__checkevent_breakpoint_len_rw_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); @@ -1295,7 +1294,7 @@ test__checkevent_breakpoint_len_rw_modifier(struct evlist *evlist) static int test__checkevent_precise_max_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type); @@ -1306,7 +1305,7 @@ static int test__checkevent_precise_max_modifier(struct evlist *evlist) static int test__checkevent_config_symbol(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "insn") == 0); return 0; @@ -1314,7 +1313,7 @@ static int test__checkevent_config_symbol(struct evlist *evlist) static int test__checkevent_config_raw(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "rawpmu") == 0); return 0; @@ -1322,7 +1321,7 @@ static int test__checkevent_config_raw(struct evlist *evlist) static int test__checkevent_config_num(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "numpmu") == 0); return 0; @@ -1330,7 +1329,7 @@ static int test__checkevent_config_num(struct evlist *evlist) static int test__checkevent_config_cache(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "cachepmu") == 0); return 0; @@ -1343,7 +1342,7 @@ static bool test__intel_pt_valid(void) static int test__intel_pt(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "intel_pt//u") == 0); return 0; @@ -1351,7 +1350,7 @@ static int test__intel_pt(struct evlist *evlist) static int test__checkevent_complex_name(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong complex name parsing", strcmp(evsel->name, "COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks") == 0); return 0; @@ -1359,7 +1358,7 @@ static int test__checkevent_complex_name(struct evlist *evlist) static int test__sym_event_slash(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); TEST_ASSERT_VAL("wrong config", evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES); @@ -1369,7 +1368,7 @@ static int test__sym_event_slash(struct evlist *evlist) static int test__sym_event_dc(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); TEST_ASSERT_VAL("wrong config", evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES); diff --git a/tools/perf/tests/parse-no-sample-id-all.c b/tools/perf/tests/parse-no-sample-id-all.c index 8284752a60c8..adf3c9c4a416 100644 --- a/tools/perf/tests/parse-no-sample-id-all.c +++ b/tools/perf/tests/parse-no-sample-id-all.c @@ -1,4 +1,3 @@ -// SPDX-License-Identifier: GPL-2.0 #include #include #include @@ -8,7 +7,6 @@ #include "event.h" #include "evlist.h" #include "header.h" -#include "util.h" #include "debug.h" static int process_event(struct evlist **pevlist, union perf_event *event) diff --git a/tools/perf/tests/perf-hooks.c b/tools/perf/tests/perf-hooks.c index a693bcf017ea..dbc27199c65e 100644 --- a/tools/perf/tests/perf-hooks.c +++ b/tools/perf/tests/perf-hooks.c @@ -4,7 +4,6 @@ #include "tests.h" #include "debug.h" -#include "util.h" #include "perf-hooks.h" static void sigsegv_handler(int sig __maybe_unused) diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index e1b42292cf7f..437426be29e9 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -11,6 +11,7 @@ #include "debug.h" #include "record.h" #include "tests.h" +#include "util/mmap.h" static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp) { @@ -103,7 +104,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus /* * Config the evsels, setting attr->comm on the first one, etc. */ - evsel = perf_evlist__first(evlist); + evsel = evlist__first(evlist); perf_evsel__set_sample_bit(evsel, CPU); perf_evsel__set_sample_bit(evsel, TID); perf_evsel__set_sample_bit(evsel, TIME); @@ -143,9 +144,9 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus * fds in the same CPU to be injected in the same mmap ring buffer * (using ioctl(PERF_EVENT_IOC_SET_OUTPUT)). */ - err = perf_evlist__mmap(evlist, opts.mmap_pages); + err = evlist__mmap(evlist, opts.mmap_pages); if (err < 0) { - pr_debug("perf_evlist__mmap: %s\n", + pr_debug("evlist__mmap: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; } @@ -164,9 +165,9 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus while (1) { int before = total_events; - for (i = 0; i < evlist->nr_mmaps; i++) { + for (i = 0; i < evlist->core.nr_mmaps; i++) { union perf_event *event; - struct perf_mmap *md; + struct mmap *md; md = &evlist->mmap[i]; if (perf_mmap__read_init(md) < 0) @@ -286,7 +287,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus * perf_event_attr.wakeup_events, just PERF_EVENT_SAMPLE does. */ if (total_events == before && false) - perf_evlist__poll(evlist, -1); + evlist__poll(evlist, -1); sleep(1); if (++wakeups > 5) { diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c index 14a78898d79e..74379ff1f7fa 100644 --- a/tools/perf/tests/pmu.c +++ b/tools/perf/tests/pmu.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include "parse-events.h" #include "pmu.h" -#include "util.h" #include "tests.h" #include #include diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 5fcc06817076..3a02426db9a6 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -9,10 +9,10 @@ #include "map_symbol.h" #include "branch.h" -#include "util.h" #include "event.h" #include "evsel.h" #include "debug.h" +#include "util/synthetic-events.h" #include "tests.h" diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c index cf1bd57d3023..60f0e9ee04fb 100644 --- a/tools/perf/tests/sdt.c +++ b/tools/perf/tests/sdt.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include diff --git a/tools/perf/tests/stat.c b/tools/perf/tests/stat.c index cc10b4116c9f..c1911501c39c 100644 --- a/tools/perf/tests/stat.c +++ b/tools/perf/tests/stat.c @@ -5,6 +5,7 @@ #include "stat.h" #include "counts.h" #include "debug.h" +#include "util/synthetic-events.h" static bool has_term(struct perf_record_stat_config *config, u64 tag, u64 val) diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 97694a040986..84519df87f30 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -12,6 +12,7 @@ #include "util/evsel.h" #include "util/evlist.h" #include "util/cpumap.h" +#include "util/mmap.h" #include "util/thread_map.h" #include @@ -42,7 +43,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) }; struct perf_cpu_map *cpus; struct perf_thread_map *threads; - struct perf_mmap *md; + struct mmap *md; attr.sample_freq = 500; @@ -82,7 +83,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) goto out_delete_evlist; } - err = perf_evlist__mmap(evlist, 128); + err = evlist__mmap(evlist, 128); if (err < 0) { pr_debug("failed to mmap event: %d (%s)\n", errno, str_error_r(errno, sbuf, sizeof(sbuf))); diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 1a60fa1219f5..ffa592e0020e 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -14,9 +14,9 @@ #include "evlist.h" #include "evsel.h" #include "thread_map.h" -#include "cpumap.h" #include "record.h" #include "tests.h" +#include "util/mmap.h" static int spin_sleep(void) { @@ -144,7 +144,7 @@ static int process_sample_event(struct evlist *evlist, return err; /* * Check for no missing sched_switch events i.e. that the - * evsel->system_wide flag has worked. + * evsel->core.system_wide flag has worked. */ if (switch_tracking->tids[cpu] != -1 && switch_tracking->tids[cpu] != prev_tid) { @@ -264,10 +264,10 @@ static int process_events(struct evlist *evlist, unsigned pos, cnt = 0; LIST_HEAD(events); struct event_node *events_array, *node; - struct perf_mmap *md; + struct mmap *md; int i, ret; - for (i = 0; i < evlist->nr_mmaps; i++) { + for (i = 0; i < evlist->core.nr_mmaps; i++) { md = &evlist->mmap[i]; if (perf_mmap__read_init(md) < 0) continue; @@ -316,7 +316,7 @@ out_free_nodes: * * This function implements a test that checks that sched_switch events and * tracking events can be recorded for a workload (current process) using the - * evsel->system_wide and evsel->tracking flags (respectively) with other events + * evsel->core.system_wide and evsel->tracking flags (respectively) with other events * sometimes enabled or disabled. */ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_unused) @@ -367,7 +367,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ goto out_err; } - cpu_clocks_evsel = perf_evlist__last(evlist); + cpu_clocks_evsel = evlist__last(evlist); /* Second event */ err = parse_events(evlist, "cycles:u", NULL); @@ -376,7 +376,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ goto out_err; } - cycles_evsel = perf_evlist__last(evlist); + cycles_evsel = evlist__last(evlist); /* Third event */ if (!perf_evlist__can_select_event(evlist, sched_switch)) { @@ -391,22 +391,22 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ goto out_err; } - switch_evsel = perf_evlist__last(evlist); + switch_evsel = evlist__last(evlist); perf_evsel__set_sample_bit(switch_evsel, CPU); perf_evsel__set_sample_bit(switch_evsel, TIME); - switch_evsel->system_wide = true; + switch_evsel->core.system_wide = true; switch_evsel->no_aux_samples = true; switch_evsel->immediate = true; /* Test moving an event to the front */ - if (cycles_evsel == perf_evlist__first(evlist)) { + if (cycles_evsel == evlist__first(evlist)) { pr_debug("cycles event already at front"); goto out_err; } perf_evlist__to_front(evlist, cycles_evsel); - if (cycles_evsel != perf_evlist__first(evlist)) { + if (cycles_evsel != evlist__first(evlist)) { pr_debug("Failed to move cycles event to front"); goto out_err; } @@ -421,7 +421,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ goto out_err; } - tracking_evsel = perf_evlist__last(evlist); + tracking_evsel = evlist__last(evlist); perf_evlist__set_tracking_event(evlist, tracking_evsel); @@ -434,7 +434,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ perf_evlist__config(evlist, &opts, NULL); /* Check moved event is still at the front */ - if (cycles_evsel != perf_evlist__first(evlist)) { + if (cycles_evsel != evlist__first(evlist)) { pr_debug("Front event no longer at front"); goto out_err; } @@ -461,9 +461,9 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ goto out; } - err = perf_evlist__mmap(evlist, UINT_MAX); + err = evlist__mmap(evlist, UINT_MAX); if (err) { - pr_debug("perf_evlist__mmap failed!\n"); + pr_debug("evlist__mmap failed!\n"); goto out_err; } diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index f610e8c0a083..bce3a4cb4c89 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -4,12 +4,13 @@ #include "evsel.h" #include "target.h" #include "thread_map.h" -#include "cpumap.h" #include "tests.h" +#include "util/mmap.h" #include #include #include +#include #include static int exited; @@ -51,7 +52,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused char sbuf[STRERR_BUFSIZE]; struct perf_cpu_map *cpus; struct perf_thread_map *threads; - struct perf_mmap *md; + struct mmap *md; signal(SIGCHLD, sig_handler); @@ -87,7 +88,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused goto out_delete_evlist; } - evsel = perf_evlist__first(evlist); + evsel = evlist__first(evlist); evsel->core.attr.task = 1; #ifdef __s390x__ evsel->core.attr.sample_freq = 1000000; @@ -106,7 +107,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused goto out_delete_evlist; } - if (perf_evlist__mmap(evlist, 128) < 0) { + if (evlist__mmap(evlist, 128) < 0) { pr_debug("failed to mmap events: %d (%s)\n", errno, str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; @@ -129,7 +130,7 @@ retry: out_init: if (!exited || !nr_exit) { - perf_evlist__poll(evlist, -1); + evlist__poll(evlist, -1); goto retry; } diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index 39168c57943b..28f51c4bd373 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -8,6 +8,7 @@ #include "thread_map.h" #include "debug.h" #include "event.h" +#include "util/synthetic-events.h" #include #include diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index a4f9f5182b47..4a800499d7c3 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -3,11 +3,12 @@ #include #include #include +#include "cpumap.h" #include "tests.h" -#include "util.h" #include "session.h" #include "evlist.h" #include "debug.h" +#include #define TEMPL "/tmp/perf-test-XXXXXX" #define DATA_SIZE 10 @@ -39,7 +40,7 @@ static int session_write_header(char *path) }; session = perf_session__new(&data, false, NULL); - TEST_ASSERT_VAL("can't get session", session); + TEST_ASSERT_VAL("can't get session", !IS_ERR(session)); session->evlist = perf_evlist__new_default(); TEST_ASSERT_VAL("can't get evlist", session->evlist); @@ -70,7 +71,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) int i; session = perf_session__new(&data, false, NULL); - TEST_ASSERT_VAL("can't get session", session); + TEST_ASSERT_VAL("can't get session", !IS_ERR(session)); /* On platforms with large numbers of CPUs process_cpu_topology() * might issue an error while reading the perf.data file section diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index 01f434c067c6..aa296ffea6d1 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -7,7 +7,7 @@ #include "dso.h" #include "map.h" #include "symbol.h" -#include "util.h" +#include // page_size #include "tests.h" #include "debug.h" #include "machine.h" diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index f93d40b1c203..781afe42e90e 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#include "../util/util.h" #include "../util/string2.h" #include "../util/config.h" #include "libslang.h" diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index ac74ed2c23a0..82207db8f97c 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -2,7 +2,6 @@ #include "../browser.h" #include "../helpline.h" #include "../ui.h" -#include "../util.h" #include "../../util/annotate.h" #include "../../util/debug.h" #include "../../util/dso.h" diff --git a/tools/perf/ui/browsers/header.c b/tools/perf/ui/browsers/header.c index 0f59a7001479..57e6e4332f74 100644 --- a/tools/perf/ui/browsers/header.c +++ b/tools/perf/ui/browsers/header.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#include "util/debug.h" #include "ui/browser.h" #include "ui/keysyms.h" #include "ui/ui.h" diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 589168ca9f62..7a7187e069b4 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3319,13 +3319,13 @@ browse_hists: switch (key) { case K_TAB: if (pos->core.node.next == &evlist->core.entries) - pos = perf_evlist__first(evlist); + pos = evlist__first(evlist); else pos = perf_evsel__next(pos); goto browse_hists; case K_UNTAB: if (pos->core.node.prev == &evlist->core.entries) - pos = perf_evlist__last(evlist); + pos = evlist__last(evlist); else pos = perf_evsel__prev(pos); goto browse_hists; @@ -3417,7 +3417,7 @@ int perf_evlist__tui_browse_hists(struct evlist *evlist, const char *help, single_entry: if (nr_entries == 1) { - struct evsel *first = perf_evlist__first(evlist); + struct evsel *first = evlist__first(evlist); return perf_evsel__hists_browse(first, nr_entries, help, false, hbt, min_pcnt, diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c index 893b065971f6..3d49b916c9e4 100644 --- a/tools/perf/ui/browsers/map.c +++ b/tools/perf/ui/browsers/map.c @@ -5,7 +5,6 @@ #include #include #include -#include "../../util/util.h" #include "../../util/debug.h" #include "../../util/map.h" #include "../../util/dso.h" diff --git a/tools/perf/ui/browsers/res_sample.c b/tools/perf/ui/browsers/res_sample.c index f16a38fea45e..76d356a18790 100644 --- a/tools/perf/ui/browsers/res_sample.c +++ b/tools/perf/ui/browsers/res_sample.c @@ -7,7 +7,7 @@ #include "config.h" #include "time-utils.h" #include "../util.h" -#include "../../util/util.h" +#include "../../util/util.h" // perf_exe() #include "../../perf.h" #include #include diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c index 586a21acc13d..fc733a6354d4 100644 --- a/tools/perf/ui/browsers/scripts.c +++ b/tools/perf/ui/browsers/scripts.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include "../../builtin.h" #include "../../perf.h" -#include "../../util/util.h" +#include "../../util/util.h" // perf_exe() +#include "../util.h" #include "../../util/hist.h" #include "../../util/debug.h" #include "../../util/symbol.h" diff --git a/tools/perf/ui/gtk/helpline.c b/tools/perf/ui/gtk/helpline.c index e166da9ec767..e40a006aead8 100644 --- a/tools/perf/ui/gtk/helpline.c +++ b/tools/perf/ui/gtk/helpline.c @@ -6,7 +6,6 @@ #include "gtk.h" #include "../ui.h" #include "../helpline.h" -#include "../../util/debug.h" static void gtk_helpline_pop(void) { diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 6c2efc10bf5c..ed1a97b2c4b0 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -8,6 +8,7 @@ #include "../string2.h" #include "gtk.h" #include +#include #include #define MAX_COLUMNS 32 diff --git a/tools/perf/ui/gtk/progress.c b/tools/perf/ui/gtk/progress.c index b6ad8857da78..eea6fcde518a 100644 --- a/tools/perf/ui/gtk/progress.c +++ b/tools/perf/ui/gtk/progress.c @@ -3,7 +3,6 @@ #include "gtk.h" #include "../progress.h" -#include "util.h" static GtkWidget *dialog; static GtkWidget *progress; diff --git a/tools/perf/ui/gtk/setup.c b/tools/perf/ui/gtk/setup.c index 1a2616b97b5c..f5eee4d66873 100644 --- a/tools/perf/ui/gtk/setup.c +++ b/tools/perf/ui/gtk/setup.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "gtk.h" -#include "../../util/debug.h" +#include +#include "../util.h" extern struct perf_error_ops perf_gtk_eops; diff --git a/tools/perf/ui/gtk/util.c b/tools/perf/ui/gtk/util.c index c2c558958b9c..c47f5c387838 100644 --- a/tools/perf/ui/gtk/util.c +++ b/tools/perf/ui/gtk/util.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 #include "../util.h" -#include "../../util/debug.h" #include "gtk.h" #include diff --git a/tools/perf/ui/helpline.c b/tools/perf/ui/helpline.c index 54bcd08df87e..911182b3f5e6 100644 --- a/tools/perf/ui/helpline.c +++ b/tools/perf/ui/helpline.c @@ -3,10 +3,8 @@ #include #include -#include "../util/debug.h" #include "helpline.h" #include "ui.h" -#include "../util/util.h" char ui_helpline__current[512]; diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 3e533de7d852..f73675500061 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -8,7 +8,6 @@ #include "../util/callchain.h" #include "../util/debug.h" #include "../util/hist.h" -#include "../util/util.h" #include "../util/sort.h" #include "../util/evsel.h" #include "../util/evlist.h" diff --git a/tools/perf/ui/setup.c b/tools/perf/ui/setup.c index c7a86b4be9f5..700335cde618 100644 --- a/tools/perf/ui/setup.c +++ b/tools/perf/ui/setup.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include #include "../util/debug.h" #include "../util/hist.h" -#include "../util/util.h" #include "ui.h" pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER; diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 832ca6cfbe30..5365606e9dad 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -5,6 +5,7 @@ #include "../../util/callchain.h" #include "../../util/debug.h" +#include "../../util/event.h" #include "../../util/hist.h" #include "../../util/map.h" #include "../../util/map_groups.h" diff --git a/tools/perf/ui/tui/helpline.c b/tools/perf/ui/tui/helpline.c index 5f188f678c55..298d6af82fdd 100644 --- a/tools/perf/ui/tui/helpline.c +++ b/tools/perf/ui/tui/helpline.c @@ -6,7 +6,6 @@ #include #include -#include "../../util/debug.h" #include "../helpline.h" #include "../ui.h" #include "../libslang.h" diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index 56651a4f5aa0..e9bfe856a5de 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -2,13 +2,13 @@ #include #include #include +#include #include #ifdef HAVE_BACKTRACE_SUPPORT #include #endif #include "../../util/debug.h" -#include "../../util/util.h" #include "../../perf.h" #include "../browser.h" #include "../helpline.h" diff --git a/tools/perf/ui/tui/util.c b/tools/perf/ui/tui/util.c index 087d9ab054c8..b98dd0e31dc1 100644 --- a/tools/perf/ui/tui/util.c +++ b/tools/perf/ui/tui/util.c @@ -5,7 +5,6 @@ #include #include -#include "../../util/debug.h" #include "../browser.h" #include "../keysyms.h" #include "../helpline.h" diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 0b4d8e0d474c..8dcfca1a882f 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -3,6 +3,7 @@ perf-y += block-range.o perf-y += build-id.o perf-y += cacheline.o perf-y += config.o +perf-y += copyfile.o perf-y += ctype.o perf-y += db-export.o perf-y += env.o @@ -10,6 +11,7 @@ perf-y += event.o perf-y += evlist.o perf-y += evsel.o perf-y += evsel_fprintf.o +perf-y += perf_event_attr_fprintf.o perf-y += evswitch.o perf-y += find_bit.o perf-y += get_current_dir_name.o @@ -86,6 +88,7 @@ perf-y += stat-display.o perf-y += record.o perf-y += srcline.o perf-y += srccode.o +perf-y += synthetic-events.o perf-y += data.o perf-y += tsc.o perf-y += cloexec.o diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 1748f528b6e9..e830eadfca2a 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -14,7 +14,7 @@ #include #include #include -#include "util.h" +#include "util.h" // hex_width() #include "ui/ui.h" #include "sort.h" #include "build-id.h" @@ -34,6 +34,7 @@ #include "bpf-event.h" #include "block-range.h" #include "string2.h" +#include "util/event.h" #include "arch/common.h" #include #include diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 8a7340f6a2a2..53be12b23ff4 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -16,7 +16,6 @@ #include #include -#include "cpumap.h" #include "color.h" #include "evsel.h" #include "machine.h" diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 6f25224a3def..8470dfe9fe97 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -31,8 +31,8 @@ #include "map.h" #include "pmu.h" #include "evsel.h" -#include "cpumap.h" #include "symbol.h" +#include "util/synthetic-events.h" #include "thread_map.h" #include "asm/bug.h" #include "auxtrace.h" @@ -50,10 +50,12 @@ #include "intel-bts.h" #include "arm-spe.h" #include "s390-cpumsf.h" -#include "util.h" +#include "util/mmap.h" #include +#include #include "symbol/kallsyms.h" +#include static bool auxtrace__dont_decode(struct perf_session *session) { @@ -1226,7 +1228,7 @@ int perf_event__process_auxtrace_error(struct perf_session *session, return 0; } -static int __auxtrace_mmap__read(struct perf_mmap *map, +static int __auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr, struct perf_tool *tool, process_auxtrace_t fn, bool snapshot, size_t snapshot_size) @@ -1337,13 +1339,13 @@ static int __auxtrace_mmap__read(struct perf_mmap *map, return 1; } -int auxtrace_mmap__read(struct perf_mmap *map, struct auxtrace_record *itr, +int auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr, struct perf_tool *tool, process_auxtrace_t fn) { return __auxtrace_mmap__read(map, itr, tool, fn, false, 0); } -int auxtrace_mmap__read_snapshot(struct perf_mmap *map, +int auxtrace_mmap__read_snapshot(struct mmap *map, struct auxtrace_record *itr, struct perf_tool *tool, process_auxtrace_t fn, size_t snapshot_size) diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 37e70dc01436..f201f36bc35f 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -11,21 +11,22 @@ #include #include #include +#include // FILE #include #include #include #include #include -#include "event.h" - union perf_event; struct perf_session; struct evlist; struct perf_tool; -struct perf_mmap; +struct mmap; +struct perf_sample; struct option; struct record_opts; +struct perf_record_auxtrace_error; struct perf_record_auxtrace_info; struct events_stats; @@ -444,14 +445,14 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, bool per_cpu); typedef int (*process_auxtrace_t)(struct perf_tool *tool, - struct perf_mmap *map, + struct mmap *map, union perf_event *event, void *data1, size_t len1, void *data2, size_t len2); -int auxtrace_mmap__read(struct perf_mmap *map, struct auxtrace_record *itr, +int auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr, struct perf_tool *tool, process_auxtrace_t fn); -int auxtrace_mmap__read_snapshot(struct perf_mmap *map, +int auxtrace_mmap__read_snapshot(struct mmap *map, struct auxtrace_record *itr, struct perf_tool *tool, process_auxtrace_t fn, size_t snapshot_size); @@ -524,10 +525,6 @@ void auxtrace_synth_error(struct perf_record_auxtrace_error *auxtrace_error, int int code, int cpu, pid_t pid, pid_t tid, u64 ip, const char *msg, u64 timestamp); -int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, - struct perf_tool *tool, - struct perf_session *session, - perf_event__handler_t process); int perf_event__process_auxtrace_info(struct perf_session *session, union perf_event *event); s64 perf_event__process_auxtrace(struct perf_session *session, @@ -604,15 +601,6 @@ void auxtrace_record__free(struct auxtrace_record *itr __maybe_unused) { } -static inline int -perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr __maybe_unused, - struct perf_tool *tool __maybe_unused, - struct perf_session *session __maybe_unused, - perf_event__handler_t process __maybe_unused) -{ - return -EINVAL; -} - static inline int auxtrace_record__options(struct auxtrace_record *itr __maybe_unused, struct evlist *evlist __maybe_unused, diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 7a3d4b125323..f7ed5d122e22 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -16,6 +16,7 @@ #include "map.h" #include "evlist.h" #include "record.h" +#include "util/synthetic-events.h" #define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h index a01c2fd68c03..81fdc88e6c1a 100644 --- a/tools/perf/util/bpf-event.h +++ b/tools/perf/util/bpf-event.h @@ -6,9 +6,9 @@ #include #include #include -#include "event.h" #include +struct bpf_prog_info; struct machine; union perf_event; struct perf_env; @@ -33,11 +33,6 @@ struct btf_node { #ifdef HAVE_LIBBPF_SUPPORT int machine__process_bpf(struct machine *machine, union perf_event *event, struct perf_sample *sample); - -int perf_event__synthesize_bpf_events(struct perf_session *session, - perf_event__handler_t process, - struct machine *machine, - struct record_opts *opts); int bpf_event__add_sb_event(struct evlist **evlist, struct perf_env *env); void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, @@ -51,14 +46,6 @@ static inline int machine__process_bpf(struct machine *machine __maybe_unused, return 0; } -static inline int perf_event__synthesize_bpf_events(struct perf_session *session __maybe_unused, - perf_event__handler_t process __maybe_unused, - struct machine *machine __maybe_unused, - struct record_opts *opts __maybe_unused) -{ - return 0; -} - static inline int bpf_event__add_sb_event(struct evlist **evlist __maybe_unused, struct perf_env *env __maybe_unused) { diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 37283e865352..10c187b8b8ea 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -1568,7 +1568,7 @@ struct evsel *bpf__setup_output_event(struct evlist *evlist, const char *name) return ERR_PTR(-err); } - evsel = perf_evlist__last(evlist); + evsel = evlist__last(evlist); } bpf__for_each_map_named(map, obj, tmp, name) { diff --git a/tools/perf/util/branch.c b/tools/perf/util/branch.c index 9d1e090084a2..2285b1eb3128 100644 --- a/tools/perf/util/branch.c +++ b/tools/perf/util/branch.c @@ -1,5 +1,3 @@ -#include "util/util.h" -#include "util/debug.h" #include "util/map_symbol.h" #include "util/branch.h" #include diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index 06f66dad0b79..88e00d268f6f 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h @@ -1,8 +1,15 @@ #ifndef _PERF_BRANCH_H #define _PERF_BRANCH_H 1 - +/* + * The linux/stddef.h isn't need here, but is needed for __always_inline used + * in files included from uapi/linux/perf_event.h such as + * /usr/include/linux/swab.h and /usr/include/linux/byteorder/little_endian.h, + * detected in at least musl libc, used in Alpine Linux. -acme + */ #include #include +#include +#include #include #include diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index e5fb77755d9e..c076fc7fe025 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -7,12 +7,13 @@ * Copyright (C) 2009, 2010 Red Hat Inc. * Copyright (C) 2009, 2010 Arnaldo Carvalho de Melo */ -#include "util.h" +#include "util.h" // lsdir(), mkdir_p(), rm_rf() #include #include #include #include #include +#include "util/copyfile.h" #include "dso.h" #include "build-id.h" #include "event.h" diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index c14646c1f2eb..9a9b56ed3f0a 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -23,6 +23,7 @@ #include "debug.h" #include "dso.h" +#include "event.h" #include "hist.h" #include "sort.h" #include "machine.h" diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index b042ceef4114..83398e5bbe4b 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -4,12 +4,15 @@ #include #include -#include "event.h" #include "map_symbol.h" #include "branch.h" +struct addr_location; struct evsel; +struct ip_callchain; struct map; +struct perf_sample; +struct thread; #define HELP_PAD "\t\t\t\t" diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c index 4e904fcb2783..a12872f2856a 100644 --- a/tools/perf/util/cloexec.c +++ b/tools/perf/util/cloexec.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include "util.h" +#include "util.h" // for sched_getcpu() #include "../perf-sys.h" #include "cloexec.h" #include "event.h" diff --git a/tools/perf/util/copyfile.c b/tools/perf/util/copyfile.c new file mode 100644 index 000000000000..3fa0db136667 --- /dev/null +++ b/tools/perf/util/copyfile.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "util/copyfile.h" +#include "util/namespaces.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int slow_copyfile(const char *from, const char *to, struct nsinfo *nsi) +{ + int err = -1; + char *line = NULL; + size_t n; + FILE *from_fp, *to_fp; + struct nscookie nsc; + + nsinfo__mountns_enter(nsi, &nsc); + from_fp = fopen(from, "r"); + nsinfo__mountns_exit(&nsc); + if (from_fp == NULL) + goto out; + + to_fp = fopen(to, "w"); + if (to_fp == NULL) + goto out_fclose_from; + + while (getline(&line, &n, from_fp) > 0) + if (fputs(line, to_fp) == EOF) + goto out_fclose_to; + err = 0; +out_fclose_to: + fclose(to_fp); + free(line); +out_fclose_from: + fclose(from_fp); +out: + return err; +} + +int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size) +{ + void *ptr; + loff_t pgoff; + + pgoff = off_in & ~(page_size - 1); + off_in -= pgoff; + + ptr = mmap(NULL, off_in + size, PROT_READ, MAP_PRIVATE, ifd, pgoff); + if (ptr == MAP_FAILED) + return -1; + + while (size) { + ssize_t ret = pwrite(ofd, ptr + off_in, size, off_out); + if (ret < 0 && errno == EINTR) + continue; + if (ret <= 0) + break; + + size -= ret; + off_in += ret; + off_out += ret; + } + munmap(ptr, off_in + size); + + return size ? -1 : 0; +} + +static int copyfile_mode_ns(const char *from, const char *to, mode_t mode, + struct nsinfo *nsi) +{ + int fromfd, tofd; + struct stat st; + int err; + char *tmp = NULL, *ptr = NULL; + struct nscookie nsc; + + nsinfo__mountns_enter(nsi, &nsc); + err = stat(from, &st); + nsinfo__mountns_exit(&nsc); + if (err) + goto out; + err = -1; + + /* extra 'x' at the end is to reserve space for '.' */ + if (asprintf(&tmp, "%s.XXXXXXx", to) < 0) { + tmp = NULL; + goto out; + } + ptr = strrchr(tmp, '/'); + if (!ptr) + goto out; + ptr = memmove(ptr + 1, ptr, strlen(ptr) - 1); + *ptr = '.'; + + tofd = mkstemp(tmp); + if (tofd < 0) + goto out; + + if (fchmod(tofd, mode)) + goto out_close_to; + + if (st.st_size == 0) { /* /proc? do it slowly... */ + err = slow_copyfile(from, tmp, nsi); + goto out_close_to; + } + + nsinfo__mountns_enter(nsi, &nsc); + fromfd = open(from, O_RDONLY); + nsinfo__mountns_exit(&nsc); + if (fromfd < 0) + goto out_close_to; + + err = copyfile_offset(fromfd, 0, tofd, 0, st.st_size); + + close(fromfd); +out_close_to: + close(tofd); + if (!err) + err = link(tmp, to); + unlink(tmp); +out: + free(tmp); + return err; +} + +int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi) +{ + return copyfile_mode_ns(from, to, 0755, nsi); +} + +int copyfile_mode(const char *from, const char *to, mode_t mode) +{ + return copyfile_mode_ns(from, to, mode, NULL); +} + +int copyfile(const char *from, const char *to) +{ + return copyfile_mode(from, to, 0755); +} diff --git a/tools/perf/util/copyfile.h b/tools/perf/util/copyfile.h new file mode 100644 index 000000000000..e85d2f22f3cc --- /dev/null +++ b/tools/perf/util/copyfile.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef PERF_COPYFILE_H_ +#define PERF_COPYFILE_H_ + +#include +#include +#include + +struct nsinfo; + +int copyfile(const char *from, const char *to); +int copyfile_mode(const char *from, const char *to, mode_t mode); +int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi); +int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size); + +#endif // PERF_COPYFILE_H_ diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 37d7c492b155..cd92a99eb89d 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -17,7 +17,6 @@ #include "cs-etm.h" #include "cs-etm-decoder.h" #include "intlist.h" -#include "util.h" /* use raw logging */ #ifdef CS_DEBUG_RAW diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 707afdbd9529..4ba0f871f086 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -35,7 +35,7 @@ #include "thread.h" #include "thread-stack.h" #include -#include "util.h" +#include "util/synthetic-events.h" #define MAX_TIMESTAMP (~0ULL) @@ -1298,7 +1298,7 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, attr.read_format = evsel->core.attr.read_format; /* create new id val to be a fixed offset from evsel id */ - id = evsel->id[0] + 1000000000; + id = evsel->core.id[0] + 1000000000; if (!id) id = 1; diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 0c268449959c..dbc772bfb04e 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -30,6 +30,7 @@ #include "machine.h" #include "config.h" #include +#include #define pr_N(n, fmt, ...) \ eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__) @@ -1619,8 +1620,10 @@ int bt_convert__perf2ctf(const char *input, const char *path, err = -1; /* perf.data session */ session = perf_session__new(&data, 0, &c.tool); - if (!session) + if (IS_ERR(session)) { + err = PTR_ERR(session); goto free_writer; + } if (c.queue_size) { ordered_events__set_alloc_size(&session->ordered_events, diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index e75c3a279fe8..88fba2ba549f 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -13,9 +13,10 @@ #include #include "data.h" -#include "util.h" +#include "util.h" // rm_rf_perf_data() #include "debug.h" #include "header.h" +#include static void close_dir(struct perf_data_file *files, int nr) { diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index a1b59bd35519..e55114f0336f 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -17,7 +17,6 @@ #include "event.h" #include "debug.h" #include "print_binary.h" -#include "util.h" #include "target.h" #include "ui/helpline.h" #include "ui/ui.h" diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index b2deee987ffa..d25ae1c4cee9 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -3,9 +3,9 @@ #ifndef __PERF_DEBUG_H #define __PERF_DEBUG_H +#include #include #include -#include "../ui/util.h" extern int verbose; extern bool quiet, dump_trace; diff --git a/tools/perf/util/demangle-java.c b/tools/perf/util/demangle-java.c index 763328c151e9..6fb7f34c0814 100644 --- a/tools/perf/util/demangle-java.c +++ b/tools/perf/util/demangle-java.c @@ -3,7 +3,6 @@ #include #include #include -#include "debug.h" #include "symbol.h" #include "demangle-java.h" diff --git a/tools/perf/util/demangle-rust.c b/tools/perf/util/demangle-rust.c index 423afbbd386b..a659fc69f73a 100644 --- a/tools/perf/util/demangle-rust.c +++ b/tools/perf/util/demangle-rust.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include "util.h" #include "debug.h" #include "demangle-rust.h" diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c index db55eddce8cd..1b49ecee5aff 100644 --- a/tools/perf/util/dwarf-regs.c +++ b/tools/perf/util/dwarf-regs.c @@ -5,7 +5,6 @@ * Written by: Masami Hiramatsu */ -#include #include #include #include diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index d8e083d42610..db40906e2937 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -4,9 +4,10 @@ #include #include -#include "cpumap.h" #include "rwsem.h" +struct perf_cpu_map; + struct cpu_topology_map { int socket_id; int die_id; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index f4afbb858ebb..fc1e5a991008 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1,16 +1,16 @@ -#include #include #include #include #include #include +#include #include #include #include #include /* To get things like MAP_HUGETLB even on older libc headers */ -#include #include #include +#include "cpumap.h" #include "dso.h" #include "event.h" #include "debug.h" @@ -24,6 +24,7 @@ #include "time-utils.h" #include #include "map.h" +#include "util/namespaces.h" #include "symbol.h" #include "symbol/kallsyms.h" #include "asm/bug.h" @@ -33,8 +34,6 @@ #include "tool.h" #include "../perf.h" -#define DEFAULT_PROC_MAP_PARSE_TIMEOUT 500 - static const char *perf_event__names[] = { [0] = "TOTAL", [PERF_RECORD_MMAP] = "MMAP", @@ -75,18 +74,6 @@ static const char *perf_event__names[] = { [PERF_RECORD_COMPRESSED] = "COMPRESSED", }; -static const char *perf_ns__names[] = { - [NET_NS_INDEX] = "net", - [UTS_NS_INDEX] = "uts", - [IPC_NS_INDEX] = "ipc", - [PID_NS_INDEX] = "pid", - [USER_NS_INDEX] = "user", - [MNT_NS_INDEX] = "mnt", - [CGROUP_NS_INDEX] = "cgroup", -}; - -unsigned int proc_map_timeout = DEFAULT_PROC_MAP_PARSE_TIMEOUT; - const char *perf_event__name(unsigned int id) { if (id >= ARRAY_SIZE(perf_event__names)) @@ -96,775 +83,6 @@ const char *perf_event__name(unsigned int id) return perf_event__names[id]; } -static const char *perf_ns__name(unsigned int id) -{ - if (id >= ARRAY_SIZE(perf_ns__names)) - return "UNKNOWN"; - return perf_ns__names[id]; -} - -int perf_tool__process_synth_event(struct perf_tool *tool, - union perf_event *event, - struct machine *machine, - perf_event__handler_t process) -{ - struct perf_sample synth_sample = { - .pid = -1, - .tid = -1, - .time = -1, - .stream_id = -1, - .cpu = -1, - .period = 1, - .cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK, - }; - - return process(tool, event, &synth_sample, machine); -}; - -/* - * Assumes that the first 4095 bytes of /proc/pid/stat contains - * the comm, tgid and ppid. - */ -static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len, - pid_t *tgid, pid_t *ppid) -{ - char filename[PATH_MAX]; - char bf[4096]; - int fd; - size_t size = 0; - ssize_t n; - char *name, *tgids, *ppids; - - *tgid = -1; - *ppid = -1; - - snprintf(filename, sizeof(filename), "/proc/%d/status", pid); - - fd = open(filename, O_RDONLY); - if (fd < 0) { - pr_debug("couldn't open %s\n", filename); - return -1; - } - - n = read(fd, bf, sizeof(bf) - 1); - close(fd); - if (n <= 0) { - pr_warning("Couldn't get COMM, tigd and ppid for pid %d\n", - pid); - return -1; - } - bf[n] = '\0'; - - name = strstr(bf, "Name:"); - tgids = strstr(bf, "Tgid:"); - ppids = strstr(bf, "PPid:"); - - if (name) { - char *nl; - - name = skip_spaces(name + 5); /* strlen("Name:") */ - nl = strchr(name, '\n'); - if (nl) - *nl = '\0'; - - size = strlen(name); - if (size >= len) - size = len - 1; - memcpy(comm, name, size); - comm[size] = '\0'; - } else { - pr_debug("Name: string not found for pid %d\n", pid); - } - - if (tgids) { - tgids += 5; /* strlen("Tgid:") */ - *tgid = atoi(tgids); - } else { - pr_debug("Tgid: string not found for pid %d\n", pid); - } - - if (ppids) { - ppids += 5; /* strlen("PPid:") */ - *ppid = atoi(ppids); - } else { - pr_debug("PPid: string not found for pid %d\n", pid); - } - - return 0; -} - -static int perf_event__prepare_comm(union perf_event *event, pid_t pid, - struct machine *machine, - pid_t *tgid, pid_t *ppid) -{ - size_t size; - - *ppid = -1; - - memset(&event->comm, 0, sizeof(event->comm)); - - if (machine__is_host(machine)) { - if (perf_event__get_comm_ids(pid, event->comm.comm, - sizeof(event->comm.comm), - tgid, ppid) != 0) { - return -1; - } - } else { - *tgid = machine->pid; - } - - if (*tgid < 0) - return -1; - - event->comm.pid = *tgid; - event->comm.header.type = PERF_RECORD_COMM; - - size = strlen(event->comm.comm) + 1; - size = PERF_ALIGN(size, sizeof(u64)); - memset(event->comm.comm + size, 0, machine->id_hdr_size); - event->comm.header.size = (sizeof(event->comm) - - (sizeof(event->comm.comm) - size) + - machine->id_hdr_size); - event->comm.tid = pid; - - return 0; -} - -pid_t perf_event__synthesize_comm(struct perf_tool *tool, - union perf_event *event, pid_t pid, - perf_event__handler_t process, - struct machine *machine) -{ - pid_t tgid, ppid; - - if (perf_event__prepare_comm(event, pid, machine, &tgid, &ppid) != 0) - return -1; - - if (perf_tool__process_synth_event(tool, event, machine, process) != 0) - return -1; - - return tgid; -} - -static void perf_event__get_ns_link_info(pid_t pid, const char *ns, - struct perf_ns_link_info *ns_link_info) -{ - struct stat64 st; - char proc_ns[128]; - - sprintf(proc_ns, "/proc/%u/ns/%s", pid, ns); - if (stat64(proc_ns, &st) == 0) { - ns_link_info->dev = st.st_dev; - ns_link_info->ino = st.st_ino; - } -} - -int perf_event__synthesize_namespaces(struct perf_tool *tool, - union perf_event *event, - pid_t pid, pid_t tgid, - perf_event__handler_t process, - struct machine *machine) -{ - u32 idx; - struct perf_ns_link_info *ns_link_info; - - if (!tool || !tool->namespace_events) - return 0; - - memset(&event->namespaces, 0, (sizeof(event->namespaces) + - (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + - machine->id_hdr_size)); - - event->namespaces.pid = tgid; - event->namespaces.tid = pid; - - event->namespaces.nr_namespaces = NR_NAMESPACES; - - ns_link_info = event->namespaces.link_info; - - for (idx = 0; idx < event->namespaces.nr_namespaces; idx++) - perf_event__get_ns_link_info(pid, perf_ns__name(idx), - &ns_link_info[idx]); - - event->namespaces.header.type = PERF_RECORD_NAMESPACES; - - event->namespaces.header.size = (sizeof(event->namespaces) + - (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + - machine->id_hdr_size); - - if (perf_tool__process_synth_event(tool, event, machine, process) != 0) - return -1; - - return 0; -} - -static int perf_event__synthesize_fork(struct perf_tool *tool, - union perf_event *event, - pid_t pid, pid_t tgid, pid_t ppid, - perf_event__handler_t process, - struct machine *machine) -{ - memset(&event->fork, 0, sizeof(event->fork) + machine->id_hdr_size); - - /* - * for main thread set parent to ppid from status file. For other - * threads set parent pid to main thread. ie., assume main thread - * spawns all threads in a process - */ - if (tgid == pid) { - event->fork.ppid = ppid; - event->fork.ptid = ppid; - } else { - event->fork.ppid = tgid; - event->fork.ptid = tgid; - } - event->fork.pid = tgid; - event->fork.tid = pid; - event->fork.header.type = PERF_RECORD_FORK; - event->fork.header.misc = PERF_RECORD_MISC_FORK_EXEC; - - event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size); - - if (perf_tool__process_synth_event(tool, event, machine, process) != 0) - return -1; - - return 0; -} - -int perf_event__synthesize_mmap_events(struct perf_tool *tool, - union perf_event *event, - pid_t pid, pid_t tgid, - perf_event__handler_t process, - struct machine *machine, - bool mmap_data) -{ - char filename[PATH_MAX]; - FILE *fp; - unsigned long long t; - bool truncation = false; - unsigned long long timeout = proc_map_timeout * 1000000ULL; - int rc = 0; - const char *hugetlbfs_mnt = hugetlbfs__mountpoint(); - int hugetlbfs_mnt_len = hugetlbfs_mnt ? strlen(hugetlbfs_mnt) : 0; - - if (machine__is_default_guest(machine)) - return 0; - - snprintf(filename, sizeof(filename), "%s/proc/%d/task/%d/maps", - machine->root_dir, pid, pid); - - fp = fopen(filename, "r"); - if (fp == NULL) { - /* - * We raced with a task exiting - just return: - */ - pr_debug("couldn't open %s\n", filename); - return -1; - } - - event->header.type = PERF_RECORD_MMAP2; - t = rdclock(); - - while (1) { - char bf[BUFSIZ]; - char prot[5]; - char execname[PATH_MAX]; - char anonstr[] = "//anon"; - unsigned int ino; - size_t size; - ssize_t n; - - if (fgets(bf, sizeof(bf), fp) == NULL) - break; - - if ((rdclock() - t) > timeout) { - pr_warning("Reading %s time out. " - "You may want to increase " - "the time limit by --proc-map-timeout\n", - filename); - truncation = true; - goto out; - } - - /* ensure null termination since stack will be reused. */ - strcpy(execname, ""); - - /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ - n = sscanf(bf, "%"PRI_lx64"-%"PRI_lx64" %s %"PRI_lx64" %x:%x %u %[^\n]\n", - &event->mmap2.start, &event->mmap2.len, prot, - &event->mmap2.pgoff, &event->mmap2.maj, - &event->mmap2.min, - &ino, execname); - - /* - * Anon maps don't have the execname. - */ - if (n < 7) - continue; - - event->mmap2.ino = (u64)ino; - - /* - * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c - */ - if (machine__is_host(machine)) - event->header.misc = PERF_RECORD_MISC_USER; - else - event->header.misc = PERF_RECORD_MISC_GUEST_USER; - - /* map protection and flags bits */ - event->mmap2.prot = 0; - event->mmap2.flags = 0; - if (prot[0] == 'r') - event->mmap2.prot |= PROT_READ; - if (prot[1] == 'w') - event->mmap2.prot |= PROT_WRITE; - if (prot[2] == 'x') - event->mmap2.prot |= PROT_EXEC; - - if (prot[3] == 's') - event->mmap2.flags |= MAP_SHARED; - else - event->mmap2.flags |= MAP_PRIVATE; - - if (prot[2] != 'x') { - if (!mmap_data || prot[0] != 'r') - continue; - - event->header.misc |= PERF_RECORD_MISC_MMAP_DATA; - } - -out: - if (truncation) - event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT; - - if (!strcmp(execname, "")) - strcpy(execname, anonstr); - - if (hugetlbfs_mnt_len && - !strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) { - strcpy(execname, anonstr); - event->mmap2.flags |= MAP_HUGETLB; - } - - size = strlen(execname) + 1; - memcpy(event->mmap2.filename, execname, size); - size = PERF_ALIGN(size, sizeof(u64)); - event->mmap2.len -= event->mmap.start; - event->mmap2.header.size = (sizeof(event->mmap2) - - (sizeof(event->mmap2.filename) - size)); - memset(event->mmap2.filename + size, 0, machine->id_hdr_size); - event->mmap2.header.size += machine->id_hdr_size; - event->mmap2.pid = tgid; - event->mmap2.tid = pid; - - if (perf_tool__process_synth_event(tool, event, machine, process) != 0) { - rc = -1; - break; - } - - if (truncation) - break; - } - - fclose(fp); - return rc; -} - -int perf_event__synthesize_modules(struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine) -{ - int rc = 0; - struct map *pos; - struct maps *maps = machine__kernel_maps(machine); - union perf_event *event = zalloc((sizeof(event->mmap) + - machine->id_hdr_size)); - if (event == NULL) { - pr_debug("Not enough memory synthesizing mmap event " - "for kernel modules\n"); - return -1; - } - - event->header.type = PERF_RECORD_MMAP; - - /* - * kernel uses 0 for user space maps, see kernel/perf_event.c - * __perf_event_mmap - */ - if (machine__is_host(machine)) - event->header.misc = PERF_RECORD_MISC_KERNEL; - else - event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; - - for (pos = maps__first(maps); pos; pos = map__next(pos)) { - size_t size; - - if (!__map__is_kmodule(pos)) - continue; - - size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); - event->mmap.header.type = PERF_RECORD_MMAP; - event->mmap.header.size = (sizeof(event->mmap) - - (sizeof(event->mmap.filename) - size)); - memset(event->mmap.filename + size, 0, machine->id_hdr_size); - event->mmap.header.size += machine->id_hdr_size; - event->mmap.start = pos->start; - event->mmap.len = pos->end - pos->start; - event->mmap.pid = machine->pid; - - memcpy(event->mmap.filename, pos->dso->long_name, - pos->dso->long_name_len + 1); - if (perf_tool__process_synth_event(tool, event, machine, process) != 0) { - rc = -1; - break; - } - } - - free(event); - return rc; -} - -static int __event__synthesize_thread(union perf_event *comm_event, - union perf_event *mmap_event, - union perf_event *fork_event, - union perf_event *namespaces_event, - pid_t pid, int full, - perf_event__handler_t process, - struct perf_tool *tool, - struct machine *machine, - bool mmap_data) -{ - char filename[PATH_MAX]; - DIR *tasks; - struct dirent *dirent; - pid_t tgid, ppid; - int rc = 0; - - /* special case: only send one comm event using passed in pid */ - if (!full) { - tgid = perf_event__synthesize_comm(tool, comm_event, pid, - process, machine); - - if (tgid == -1) - return -1; - - if (perf_event__synthesize_namespaces(tool, namespaces_event, pid, - tgid, process, machine) < 0) - return -1; - - /* - * send mmap only for thread group leader - * see thread__init_map_groups - */ - if (pid == tgid && - perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, - process, machine, mmap_data)) - return -1; - - return 0; - } - - if (machine__is_default_guest(machine)) - return 0; - - snprintf(filename, sizeof(filename), "%s/proc/%d/task", - machine->root_dir, pid); - - tasks = opendir(filename); - if (tasks == NULL) { - pr_debug("couldn't open %s\n", filename); - return 0; - } - - while ((dirent = readdir(tasks)) != NULL) { - char *end; - pid_t _pid; - - _pid = strtol(dirent->d_name, &end, 10); - if (*end) - continue; - - rc = -1; - if (perf_event__prepare_comm(comm_event, _pid, machine, - &tgid, &ppid) != 0) - break; - - if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid, - ppid, process, machine) < 0) - break; - - if (perf_event__synthesize_namespaces(tool, namespaces_event, _pid, - tgid, process, machine) < 0) - break; - - /* - * Send the prepared comm event - */ - if (perf_tool__process_synth_event(tool, comm_event, machine, process) != 0) - break; - - rc = 0; - if (_pid == pid) { - /* process the parent's maps too */ - rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, - process, machine, mmap_data); - if (rc) - break; - } - } - - closedir(tasks); - return rc; -} - -int perf_event__synthesize_thread_map(struct perf_tool *tool, - struct perf_thread_map *threads, - perf_event__handler_t process, - struct machine *machine, - bool mmap_data) -{ - union perf_event *comm_event, *mmap_event, *fork_event; - union perf_event *namespaces_event; - int err = -1, thread, j; - - comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); - if (comm_event == NULL) - goto out; - - mmap_event = malloc(sizeof(mmap_event->mmap2) + machine->id_hdr_size); - if (mmap_event == NULL) - goto out_free_comm; - - fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size); - if (fork_event == NULL) - goto out_free_mmap; - - namespaces_event = malloc(sizeof(namespaces_event->namespaces) + - (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + - machine->id_hdr_size); - if (namespaces_event == NULL) - goto out_free_fork; - - err = 0; - for (thread = 0; thread < threads->nr; ++thread) { - if (__event__synthesize_thread(comm_event, mmap_event, - fork_event, namespaces_event, - perf_thread_map__pid(threads, thread), 0, - process, tool, machine, - mmap_data)) { - err = -1; - break; - } - - /* - * comm.pid is set to thread group id by - * perf_event__synthesize_comm - */ - if ((int) comm_event->comm.pid != perf_thread_map__pid(threads, thread)) { - bool need_leader = true; - - /* is thread group leader in thread_map? */ - for (j = 0; j < threads->nr; ++j) { - if ((int) comm_event->comm.pid == perf_thread_map__pid(threads, j)) { - need_leader = false; - break; - } - } - - /* if not, generate events for it */ - if (need_leader && - __event__synthesize_thread(comm_event, mmap_event, - fork_event, namespaces_event, - comm_event->comm.pid, 0, - process, tool, machine, - mmap_data)) { - err = -1; - break; - } - } - } - free(namespaces_event); -out_free_fork: - free(fork_event); -out_free_mmap: - free(mmap_event); -out_free_comm: - free(comm_event); -out: - return err; -} - -static int __perf_event__synthesize_threads(struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine, - bool mmap_data, - struct dirent **dirent, - int start, - int num) -{ - union perf_event *comm_event, *mmap_event, *fork_event; - union perf_event *namespaces_event; - int err = -1; - char *end; - pid_t pid; - int i; - - comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); - if (comm_event == NULL) - goto out; - - mmap_event = malloc(sizeof(mmap_event->mmap2) + machine->id_hdr_size); - if (mmap_event == NULL) - goto out_free_comm; - - fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size); - if (fork_event == NULL) - goto out_free_mmap; - - namespaces_event = malloc(sizeof(namespaces_event->namespaces) + - (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + - machine->id_hdr_size); - if (namespaces_event == NULL) - goto out_free_fork; - - for (i = start; i < start + num; i++) { - if (!isdigit(dirent[i]->d_name[0])) - continue; - - pid = (pid_t)strtol(dirent[i]->d_name, &end, 10); - /* only interested in proper numerical dirents */ - if (*end) - continue; - /* - * We may race with exiting thread, so don't stop just because - * one thread couldn't be synthesized. - */ - __event__synthesize_thread(comm_event, mmap_event, fork_event, - namespaces_event, pid, 1, process, - tool, machine, mmap_data); - } - err = 0; - - free(namespaces_event); -out_free_fork: - free(fork_event); -out_free_mmap: - free(mmap_event); -out_free_comm: - free(comm_event); -out: - return err; -} - -struct synthesize_threads_arg { - struct perf_tool *tool; - perf_event__handler_t process; - struct machine *machine; - bool mmap_data; - struct dirent **dirent; - int num; - int start; -}; - -static void *synthesize_threads_worker(void *arg) -{ - struct synthesize_threads_arg *args = arg; - - __perf_event__synthesize_threads(args->tool, args->process, - args->machine, args->mmap_data, - args->dirent, - args->start, args->num); - return NULL; -} - -int perf_event__synthesize_threads(struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine, - bool mmap_data, - unsigned int nr_threads_synthesize) -{ - struct synthesize_threads_arg *args = NULL; - pthread_t *synthesize_threads = NULL; - char proc_path[PATH_MAX]; - struct dirent **dirent; - int num_per_thread; - int m, n, i, j; - int thread_nr; - int base = 0; - int err = -1; - - - if (machine__is_default_guest(machine)) - return 0; - - snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir); - n = scandir(proc_path, &dirent, 0, alphasort); - if (n < 0) - return err; - - if (nr_threads_synthesize == UINT_MAX) - thread_nr = sysconf(_SC_NPROCESSORS_ONLN); - else - thread_nr = nr_threads_synthesize; - - if (thread_nr <= 1) { - err = __perf_event__synthesize_threads(tool, process, - machine, mmap_data, - dirent, base, n); - goto free_dirent; - } - if (thread_nr > n) - thread_nr = n; - - synthesize_threads = calloc(sizeof(pthread_t), thread_nr); - if (synthesize_threads == NULL) - goto free_dirent; - - args = calloc(sizeof(*args), thread_nr); - if (args == NULL) - goto free_threads; - - num_per_thread = n / thread_nr; - m = n % thread_nr; - for (i = 0; i < thread_nr; i++) { - args[i].tool = tool; - args[i].process = process; - args[i].machine = machine; - args[i].mmap_data = mmap_data; - args[i].dirent = dirent; - } - for (i = 0; i < m; i++) { - args[i].num = num_per_thread + 1; - args[i].start = i * args[i].num; - } - if (i != 0) - base = args[i-1].start + args[i-1].num; - for (j = i; j < thread_nr; j++) { - args[j].num = num_per_thread; - args[j].start = base + (j - i) * args[i].num; - } - - for (i = 0; i < thread_nr; i++) { - if (pthread_create(&synthesize_threads[i], NULL, - synthesize_threads_worker, &args[i])) - goto out_join; - } - err = 0; -out_join: - for (i = 0; i < thread_nr; i++) - pthread_join(synthesize_threads[i], NULL); - free(args); -free_threads: - free(synthesize_threads); -free_dirent: - for (i = 0; i < n; i++) - zfree(&dirent[i]); - free(dirent); - - return err; -} - struct process_symbol_args { const char *name; u64 start; @@ -899,327 +117,6 @@ int kallsyms__get_function_start(const char *kallsyms_filename, return 0; } -int __weak perf_event__synthesize_extra_kmaps(struct perf_tool *tool __maybe_unused, - perf_event__handler_t process __maybe_unused, - struct machine *machine __maybe_unused) -{ - return 0; -} - -static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine) -{ - size_t size; - struct map *map = machine__kernel_map(machine); - struct kmap *kmap; - int err; - union perf_event *event; - - if (map == NULL) - return -1; - - kmap = map__kmap(map); - if (!kmap->ref_reloc_sym) - return -1; - - /* - * We should get this from /sys/kernel/sections/.text, but till that is - * available use this, and after it is use this as a fallback for older - * kernels. - */ - event = zalloc((sizeof(event->mmap) + machine->id_hdr_size)); - if (event == NULL) { - pr_debug("Not enough memory synthesizing mmap event " - "for kernel modules\n"); - return -1; - } - - if (machine__is_host(machine)) { - /* - * kernel uses PERF_RECORD_MISC_USER for user space maps, - * see kernel/perf_event.c __perf_event_mmap - */ - event->header.misc = PERF_RECORD_MISC_KERNEL; - } else { - event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; - } - - size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), - "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1; - size = PERF_ALIGN(size, sizeof(u64)); - event->mmap.header.type = PERF_RECORD_MMAP; - event->mmap.header.size = (sizeof(event->mmap) - - (sizeof(event->mmap.filename) - size) + machine->id_hdr_size); - event->mmap.pgoff = kmap->ref_reloc_sym->addr; - event->mmap.start = map->start; - event->mmap.len = map->end - event->mmap.start; - event->mmap.pid = machine->pid; - - err = perf_tool__process_synth_event(tool, event, machine, process); - free(event); - - return err; -} - -int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine) -{ - int err; - - err = __perf_event__synthesize_kernel_mmap(tool, process, machine); - if (err < 0) - return err; - - return perf_event__synthesize_extra_kmaps(tool, process, machine); -} - -int perf_event__synthesize_thread_map2(struct perf_tool *tool, - struct perf_thread_map *threads, - perf_event__handler_t process, - struct machine *machine) -{ - union perf_event *event; - int i, err, size; - - size = sizeof(event->thread_map); - size += threads->nr * sizeof(event->thread_map.entries[0]); - - event = zalloc(size); - if (!event) - return -ENOMEM; - - event->header.type = PERF_RECORD_THREAD_MAP; - event->header.size = size; - event->thread_map.nr = threads->nr; - - for (i = 0; i < threads->nr; i++) { - struct perf_record_thread_map_entry *entry = &event->thread_map.entries[i]; - char *comm = perf_thread_map__comm(threads, i); - - if (!comm) - comm = (char *) ""; - - entry->pid = perf_thread_map__pid(threads, i); - strncpy((char *) &entry->comm, comm, sizeof(entry->comm)); - } - - err = process(tool, event, NULL, machine); - - free(event); - return err; -} - -static void synthesize_cpus(struct cpu_map_entries *cpus, - struct perf_cpu_map *map) -{ - int i; - - cpus->nr = map->nr; - - for (i = 0; i < map->nr; i++) - cpus->cpu[i] = map->map[i]; -} - -static void synthesize_mask(struct perf_record_record_cpu_map *mask, - struct perf_cpu_map *map, int max) -{ - int i; - - mask->nr = BITS_TO_LONGS(max); - mask->long_size = sizeof(long); - - for (i = 0; i < map->nr; i++) - set_bit(map->map[i], mask->mask); -} - -static size_t cpus_size(struct perf_cpu_map *map) -{ - return sizeof(struct cpu_map_entries) + map->nr * sizeof(u16); -} - -static size_t mask_size(struct perf_cpu_map *map, int *max) -{ - int i; - - *max = 0; - - for (i = 0; i < map->nr; i++) { - /* bit possition of the cpu is + 1 */ - int bit = map->map[i] + 1; - - if (bit > *max) - *max = bit; - } - - return sizeof(struct perf_record_record_cpu_map) + BITS_TO_LONGS(*max) * sizeof(long); -} - -void *cpu_map_data__alloc(struct perf_cpu_map *map, size_t *size, u16 *type, int *max) -{ - size_t size_cpus, size_mask; - bool is_dummy = perf_cpu_map__empty(map); - - /* - * Both array and mask data have variable size based - * on the number of cpus and their actual values. - * The size of the 'struct perf_record_cpu_map_data' is: - * - * array = size of 'struct cpu_map_entries' + - * number of cpus * sizeof(u64) - * - * mask = size of 'struct perf_record_record_cpu_map' + - * maximum cpu bit converted to size of longs - * - * and finaly + the size of 'struct perf_record_cpu_map_data'. - */ - size_cpus = cpus_size(map); - size_mask = mask_size(map, max); - - if (is_dummy || (size_cpus < size_mask)) { - *size += size_cpus; - *type = PERF_CPU_MAP__CPUS; - } else { - *size += size_mask; - *type = PERF_CPU_MAP__MASK; - } - - *size += sizeof(struct perf_record_cpu_map_data); - *size = PERF_ALIGN(*size, sizeof(u64)); - return zalloc(*size); -} - -void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data, struct perf_cpu_map *map, - u16 type, int max) -{ - data->type = type; - - switch (type) { - case PERF_CPU_MAP__CPUS: - synthesize_cpus((struct cpu_map_entries *) data->data, map); - break; - case PERF_CPU_MAP__MASK: - synthesize_mask((struct perf_record_record_cpu_map *)data->data, map, max); - default: - break; - }; -} - -static struct perf_record_cpu_map *cpu_map_event__new(struct perf_cpu_map *map) -{ - size_t size = sizeof(struct perf_record_cpu_map); - struct perf_record_cpu_map *event; - int max; - u16 type; - - event = cpu_map_data__alloc(map, &size, &type, &max); - if (!event) - return NULL; - - event->header.type = PERF_RECORD_CPU_MAP; - event->header.size = size; - event->data.type = type; - - cpu_map_data__synthesize(&event->data, map, type, max); - return event; -} - -int perf_event__synthesize_cpu_map(struct perf_tool *tool, - struct perf_cpu_map *map, - perf_event__handler_t process, - struct machine *machine) -{ - struct perf_record_cpu_map *event; - int err; - - event = cpu_map_event__new(map); - if (!event) - return -ENOMEM; - - err = process(tool, (union perf_event *) event, NULL, machine); - - free(event); - return err; -} - -int perf_event__synthesize_stat_config(struct perf_tool *tool, - struct perf_stat_config *config, - perf_event__handler_t process, - struct machine *machine) -{ - struct perf_record_stat_config *event; - int size, i = 0, err; - - size = sizeof(*event); - size += (PERF_STAT_CONFIG_TERM__MAX * sizeof(event->data[0])); - - event = zalloc(size); - if (!event) - return -ENOMEM; - - event->header.type = PERF_RECORD_STAT_CONFIG; - event->header.size = size; - event->nr = PERF_STAT_CONFIG_TERM__MAX; - -#define ADD(__term, __val) \ - event->data[i].tag = PERF_STAT_CONFIG_TERM__##__term; \ - event->data[i].val = __val; \ - i++; - - ADD(AGGR_MODE, config->aggr_mode) - ADD(INTERVAL, config->interval) - ADD(SCALE, config->scale) - - WARN_ONCE(i != PERF_STAT_CONFIG_TERM__MAX, - "stat config terms unbalanced\n"); -#undef ADD - - err = process(tool, (union perf_event *) event, NULL, machine); - - free(event); - return err; -} - -int perf_event__synthesize_stat(struct perf_tool *tool, - u32 cpu, u32 thread, u64 id, - struct perf_counts_values *count, - perf_event__handler_t process, - struct machine *machine) -{ - struct perf_record_stat event; - - event.header.type = PERF_RECORD_STAT; - event.header.size = sizeof(event); - event.header.misc = 0; - - event.id = id; - event.cpu = cpu; - event.thread = thread; - event.val = count->val; - event.ena = count->ena; - event.run = count->run; - - return process(tool, (union perf_event *) &event, NULL, machine); -} - -int perf_event__synthesize_stat_round(struct perf_tool *tool, - u64 evtime, u64 type, - perf_event__handler_t process, - struct machine *machine) -{ - struct perf_record_stat_round event; - - event.header.type = PERF_RECORD_STAT_ROUND; - event.header.size = sizeof(event); - event.header.misc = 0; - - event.time = evtime; - event.type = type; - - return process(tool, (union perf_event *) &event, NULL, machine); -} - void perf_event__read_stat_config(struct perf_stat_config *config, struct perf_record_stat_config *event) { diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 47ad81d47b1a..a0a0c91cde4a 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -279,54 +279,13 @@ enum { void perf_event__print_totals(void); -struct perf_tool; -struct perf_thread_map; struct perf_cpu_map; +struct perf_record_stat_config; struct perf_stat_config; -struct perf_counts_values; +struct perf_tool; -typedef int (*perf_event__handler_t)(struct perf_tool *tool, - union perf_event *event, - struct perf_sample *sample, - struct machine *machine); - -int perf_event__synthesize_thread_map(struct perf_tool *tool, - struct perf_thread_map *threads, - perf_event__handler_t process, - struct machine *machine, bool mmap_data); -int perf_event__synthesize_thread_map2(struct perf_tool *tool, - struct perf_thread_map *threads, - perf_event__handler_t process, - struct machine *machine); -int perf_event__synthesize_cpu_map(struct perf_tool *tool, - struct perf_cpu_map *cpus, - perf_event__handler_t process, - struct machine *machine); -int perf_event__synthesize_threads(struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine, bool mmap_data, - unsigned int nr_threads_synthesize); -int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine); -int perf_event__synthesize_stat_config(struct perf_tool *tool, - struct perf_stat_config *config, - perf_event__handler_t process, - struct machine *machine); void perf_event__read_stat_config(struct perf_stat_config *config, struct perf_record_stat_config *event); -int perf_event__synthesize_stat(struct perf_tool *tool, - u32 cpu, u32 thread, u64 id, - struct perf_counts_values *count, - perf_event__handler_t process, - struct machine *machine); -int perf_event__synthesize_stat_round(struct perf_tool *tool, - u64 time, u64 type, - perf_event__handler_t process, - struct machine *machine); -int perf_event__synthesize_modules(struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine); int perf_event__process_comm(struct perf_tool *tool, union perf_event *event, @@ -380,10 +339,6 @@ int perf_event__process_bpf(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_tool__process_synth_event(struct perf_tool *tool, - union perf_event *event, - struct machine *machine, - perf_event__handler_t process); int perf_event__process(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -405,34 +360,6 @@ void thread__resolve(struct thread *thread, struct addr_location *al, const char *perf_event__name(unsigned int id); -size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, - u64 read_format); -int perf_event__synthesize_sample(union perf_event *event, u64 type, - u64 read_format, - const struct perf_sample *sample); - -pid_t perf_event__synthesize_comm(struct perf_tool *tool, - union perf_event *event, pid_t pid, - perf_event__handler_t process, - struct machine *machine); - -int perf_event__synthesize_namespaces(struct perf_tool *tool, - union perf_event *event, - pid_t pid, pid_t tgid, - perf_event__handler_t process, - struct machine *machine); - -int perf_event__synthesize_mmap_events(struct perf_tool *tool, - union perf_event *event, - pid_t pid, pid_t tgid, - perf_event__handler_t process, - struct machine *machine, - bool mmap_data); - -int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine); - size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp); size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp); size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 095924aa186b..d277a98e62df 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -10,13 +10,14 @@ #include #include #include "cpumap.h" +#include "util/mmap.h" #include "thread_map.h" #include "target.h" #include "evlist.h" #include "evsel.h" #include "debug.h" #include "units.h" -#include "util.h" +#include // page_size #include "../perf.h" #include "asm/bug.h" #include "bpf-event.h" @@ -49,18 +50,14 @@ int sigqueue(pid_t pid, int sig, const union sigval value); #endif #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) -#define SID(e, x, y) xyarray__entry(e->sample_id, x, y) +#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y) void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus, struct perf_thread_map *threads) { - int i; - - for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) - INIT_HLIST_HEAD(&evlist->heads[i]); perf_evlist__init(&evlist->core); perf_evlist__set_maps(&evlist->core, cpus, threads); - fdarray__init(&evlist->pollfd, 64); + fdarray__init(&evlist->core.pollfd, 64); evlist->workload.pid = -1; evlist->bkw_mmap_state = BKW_MMAP_NOTREADY; } @@ -108,7 +105,7 @@ struct evlist *perf_evlist__new_dummy(void) */ void perf_evlist__set_id_pos(struct evlist *evlist) { - struct evsel *first = perf_evlist__first(evlist); + struct evsel *first = evlist__first(evlist); evlist->id_pos = first->id_pos; evlist->is_pos = first->is_pos; @@ -124,7 +121,7 @@ static void perf_evlist__update_id_pos(struct evlist *evlist) perf_evlist__set_id_pos(evlist); } -static void perf_evlist__purge(struct evlist *evlist) +static void evlist__purge(struct evlist *evlist) { struct evsel *pos, *n; @@ -137,11 +134,11 @@ static void perf_evlist__purge(struct evlist *evlist) evlist->core.nr_entries = 0; } -void perf_evlist__exit(struct evlist *evlist) +void evlist__exit(struct evlist *evlist) { zfree(&evlist->mmap); zfree(&evlist->overwrite_mmap); - fdarray__exit(&evlist->pollfd); + fdarray__exit(&evlist->core.pollfd); } void evlist__delete(struct evlist *evlist) @@ -149,14 +146,14 @@ void evlist__delete(struct evlist *evlist) if (evlist == NULL) return; - perf_evlist__munmap(evlist); + evlist__munmap(evlist); evlist__close(evlist); perf_cpu_map__put(evlist->core.cpus); perf_thread_map__put(evlist->core.threads); evlist->core.cpus = NULL; evlist->core.threads = NULL; - perf_evlist__purge(evlist); - perf_evlist__exit(evlist); + evlist__purge(evlist); + evlist__exit(evlist); free(evlist); } @@ -318,7 +315,7 @@ int perf_evlist__add_newtp(struct evlist *evlist, static int perf_evlist__nr_threads(struct evlist *evlist, struct evsel *evsel) { - if (evsel->system_wide) + if (evsel->core.system_wide) return 1; else return perf_thread_map__nr(evlist->core.threads); @@ -401,128 +398,29 @@ int perf_evlist__enable_event_idx(struct evlist *evlist, return perf_evlist__enable_event_thread(evlist, evsel, idx); } -int perf_evlist__alloc_pollfd(struct evlist *evlist) +int evlist__add_pollfd(struct evlist *evlist, int fd) { - int nr_cpus = perf_cpu_map__nr(evlist->core.cpus); - int nr_threads = perf_thread_map__nr(evlist->core.threads); - int nfds = 0; - struct evsel *evsel; - - evlist__for_each_entry(evlist, evsel) { - if (evsel->system_wide) - nfds += nr_cpus; - else - nfds += nr_cpus * nr_threads; - } - - if (fdarray__available_entries(&evlist->pollfd) < nfds && - fdarray__grow(&evlist->pollfd, nfds) < 0) - return -ENOMEM; - - return 0; -} - -static int __perf_evlist__add_pollfd(struct evlist *evlist, int fd, - struct perf_mmap *map, short revent) -{ - int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); - /* - * Save the idx so that when we filter out fds POLLHUP'ed we can - * close the associated evlist->mmap[] entry. - */ - if (pos >= 0) { - evlist->pollfd.priv[pos].ptr = map; - - fcntl(fd, F_SETFL, O_NONBLOCK); - } - - return pos; -} - -int perf_evlist__add_pollfd(struct evlist *evlist, int fd) -{ - return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN); + return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN); } static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, void *arg __maybe_unused) { - struct perf_mmap *map = fda->priv[fd].ptr; + struct mmap *map = fda->priv[fd].ptr; if (map) perf_mmap__put(map); } -int perf_evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask) +int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask) { - return fdarray__filter(&evlist->pollfd, revents_and_mask, + return fdarray__filter(&evlist->core.pollfd, revents_and_mask, perf_evlist__munmap_filtered, NULL); } -int perf_evlist__poll(struct evlist *evlist, int timeout) +int evlist__poll(struct evlist *evlist, int timeout) { - return fdarray__poll(&evlist->pollfd, timeout); -} - -static void perf_evlist__id_hash(struct evlist *evlist, - struct evsel *evsel, - int cpu, int thread, u64 id) -{ - int hash; - struct perf_sample_id *sid = SID(evsel, cpu, thread); - - sid->id = id; - sid->evsel = evsel; - hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); - hlist_add_head(&sid->node, &evlist->heads[hash]); -} - -void perf_evlist__id_add(struct evlist *evlist, struct evsel *evsel, - int cpu, int thread, u64 id) -{ - perf_evlist__id_hash(evlist, evsel, cpu, thread, id); - evsel->id[evsel->ids++] = id; -} - -int perf_evlist__id_add_fd(struct evlist *evlist, - struct evsel *evsel, - int cpu, int thread, int fd) -{ - u64 read_data[4] = { 0, }; - int id_idx = 1; /* The first entry is the counter value */ - u64 id; - int ret; - - ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); - if (!ret) - goto add; - - if (errno != ENOTTY) - return -1; - - /* Legacy way to get event id.. All hail to old kernels! */ - - /* - * This way does not work with group format read, so bail - * out in that case. - */ - if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) - return -1; - - if (!(evsel->core.attr.read_format & PERF_FORMAT_ID) || - read(fd, &read_data, sizeof(read_data)) == -1) - return -1; - - if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) - ++id_idx; - if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) - ++id_idx; - - id = read_data[id_idx]; - - add: - perf_evlist__id_add(evlist, evsel, cpu, thread, id); - return 0; + return perf_evlist__poll(&evlist->core, timeout); } static void perf_evlist__set_sid_idx(struct evlist *evlist, @@ -535,7 +433,7 @@ static void perf_evlist__set_sid_idx(struct evlist *evlist, sid->cpu = evlist->core.cpus->map[cpu]; else sid->cpu = -1; - if (!evsel->system_wide && evlist->core.threads && thread >= 0) + if (!evsel->core.system_wide && evlist->core.threads && thread >= 0) sid->tid = perf_thread_map__pid(evlist->core.threads, thread); else sid->tid = -1; @@ -548,7 +446,7 @@ struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id) int hash; hash = hash_64(id, PERF_EVLIST__HLIST_BITS); - head = &evlist->heads[hash]; + head = &evlist->core.heads[hash]; hlist_for_each_entry(sid, head, node) if (sid->id == id) @@ -562,14 +460,14 @@ struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id) struct perf_sample_id *sid; if (evlist->core.nr_entries == 1 || !id) - return perf_evlist__first(evlist); + return evlist__first(evlist); sid = perf_evlist__id2sid(evlist, id); if (sid) - return sid->evsel; + return container_of(sid->evsel, struct evsel, core); if (!perf_evlist__sample_id_all(evlist)) - return perf_evlist__first(evlist); + return evlist__first(evlist); return NULL; } @@ -584,7 +482,7 @@ struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist, sid = perf_evlist__id2sid(evlist, id); if (sid) - return sid->evsel; + return container_of(sid->evsel, struct evsel, core); return NULL; } @@ -613,7 +511,7 @@ static int perf_evlist__event2id(struct evlist *evlist, struct evsel *perf_evlist__event2evsel(struct evlist *evlist, union perf_event *event) { - struct evsel *first = perf_evlist__first(evlist); + struct evsel *first = evlist__first(evlist); struct hlist_head *head; struct perf_sample_id *sid; int hash; @@ -634,11 +532,11 @@ struct evsel *perf_evlist__event2evsel(struct evlist *evlist, return first; hash = hash_64(id, PERF_EVLIST__HLIST_BITS); - head = &evlist->heads[hash]; + head = &evlist->core.heads[hash]; hlist_for_each_entry(sid, head, node) { if (sid->id == id) - return sid->evsel; + return container_of(sid->evsel, struct evsel, core); } return NULL; } @@ -650,8 +548,8 @@ static int perf_evlist__set_paused(struct evlist *evlist, bool value) if (!evlist->overwrite_mmap) return 0; - for (i = 0; i < evlist->nr_mmaps; i++) { - int fd = evlist->overwrite_mmap[i].fd; + for (i = 0; i < evlist->core.nr_mmaps; i++) { + int fd = evlist->overwrite_mmap[i].core.fd; int err; if (fd < 0) @@ -673,42 +571,42 @@ static int perf_evlist__resume(struct evlist *evlist) return perf_evlist__set_paused(evlist, false); } -static void perf_evlist__munmap_nofree(struct evlist *evlist) +static void evlist__munmap_nofree(struct evlist *evlist) { int i; if (evlist->mmap) - for (i = 0; i < evlist->nr_mmaps; i++) + for (i = 0; i < evlist->core.nr_mmaps; i++) perf_mmap__munmap(&evlist->mmap[i]); if (evlist->overwrite_mmap) - for (i = 0; i < evlist->nr_mmaps; i++) + for (i = 0; i < evlist->core.nr_mmaps; i++) perf_mmap__munmap(&evlist->overwrite_mmap[i]); } -void perf_evlist__munmap(struct evlist *evlist) +void evlist__munmap(struct evlist *evlist) { - perf_evlist__munmap_nofree(evlist); + evlist__munmap_nofree(evlist); zfree(&evlist->mmap); zfree(&evlist->overwrite_mmap); } -static struct perf_mmap *perf_evlist__alloc_mmap(struct evlist *evlist, - bool overwrite) +static struct mmap *evlist__alloc_mmap(struct evlist *evlist, + bool overwrite) { int i; - struct perf_mmap *map; + struct mmap *map; - evlist->nr_mmaps = perf_cpu_map__nr(evlist->core.cpus); + evlist->core.nr_mmaps = perf_cpu_map__nr(evlist->core.cpus); if (perf_cpu_map__empty(evlist->core.cpus)) - evlist->nr_mmaps = perf_thread_map__nr(evlist->core.threads); - map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); + evlist->core.nr_mmaps = perf_thread_map__nr(evlist->core.threads); + map = zalloc(evlist->core.nr_mmaps * sizeof(struct mmap)); if (!map) return NULL; - for (i = 0; i < evlist->nr_mmaps; i++) { - map[i].fd = -1; - map[i].overwrite = overwrite; + for (i = 0; i < evlist->core.nr_mmaps; i++) { + map[i].core.fd = -1; + map[i].core.overwrite = overwrite; /* * When the perf_mmap() call is made we grab one refcount, plus * one extra to let perf_mmap__consume() get the last @@ -718,7 +616,7 @@ static struct perf_mmap *perf_evlist__alloc_mmap(struct evlist *evlist, * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and * thus does perf_mmap__get() on it. */ - refcount_set(&map[i].refcnt, 0); + refcount_set(&map[i].core.refcnt, 0); } return map; } @@ -732,7 +630,7 @@ perf_evlist__should_poll(struct evlist *evlist __maybe_unused, return true; } -static int perf_evlist__mmap_per_evsel(struct evlist *evlist, int idx, +static int evlist__mmap_per_evsel(struct evlist *evlist, int idx, struct mmap_params *mp, int cpu_idx, int thread, int *_output, int *_output_overwrite) { @@ -741,7 +639,7 @@ static int perf_evlist__mmap_per_evsel(struct evlist *evlist, int idx, int evlist_cpu = cpu_map__cpu(evlist->core.cpus, cpu_idx); evlist__for_each_entry(evlist, evsel) { - struct perf_mmap *maps = evlist->mmap; + struct mmap *maps = evlist->mmap; int *output = _output; int fd; int cpu; @@ -752,7 +650,7 @@ static int perf_evlist__mmap_per_evsel(struct evlist *evlist, int idx, maps = evlist->overwrite_mmap; if (!maps) { - maps = perf_evlist__alloc_mmap(evlist, true); + maps = evlist__alloc_mmap(evlist, true); if (!maps) return -1; evlist->overwrite_mmap = maps; @@ -762,7 +660,7 @@ static int perf_evlist__mmap_per_evsel(struct evlist *evlist, int idx, mp->prot &= ~PROT_WRITE; } - if (evsel->system_wide && thread) + if (evsel->core.system_wide && thread) continue; cpu = perf_cpu_map__idx(evsel->core.cpus, evlist_cpu); @@ -792,14 +690,14 @@ static int perf_evlist__mmap_per_evsel(struct evlist *evlist, int idx, * other events, so it should not need to be polled anyway. * Therefore don't add it for polling. */ - if (!evsel->system_wide && - __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) { + if (!evsel->core.system_wide && + perf_evlist__add_pollfd(&evlist->core, fd, &maps[idx], revent) < 0) { perf_mmap__put(&maps[idx]); return -1; } if (evsel->core.attr.read_format & PERF_FORMAT_ID) { - if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, + if (perf_evlist__id_add_fd(&evlist->core, &evsel->core, cpu, thread, fd) < 0) return -1; perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, @@ -810,7 +708,7 @@ static int perf_evlist__mmap_per_evsel(struct evlist *evlist, int idx, return 0; } -static int perf_evlist__mmap_per_cpu(struct evlist *evlist, +static int evlist__mmap_per_cpu(struct evlist *evlist, struct mmap_params *mp) { int cpu, thread; @@ -826,7 +724,7 @@ static int perf_evlist__mmap_per_cpu(struct evlist *evlist, true); for (thread = 0; thread < nr_threads; thread++) { - if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, + if (evlist__mmap_per_evsel(evlist, cpu, mp, cpu, thread, &output, &output_overwrite)) goto out_unmap; } @@ -835,11 +733,11 @@ static int perf_evlist__mmap_per_cpu(struct evlist *evlist, return 0; out_unmap: - perf_evlist__munmap_nofree(evlist); + evlist__munmap_nofree(evlist); return -1; } -static int perf_evlist__mmap_per_thread(struct evlist *evlist, +static int evlist__mmap_per_thread(struct evlist *evlist, struct mmap_params *mp) { int thread; @@ -853,7 +751,7 @@ static int perf_evlist__mmap_per_thread(struct evlist *evlist, auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, false); - if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, + if (evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, &output, &output_overwrite)) goto out_unmap; } @@ -861,7 +759,7 @@ static int perf_evlist__mmap_per_thread(struct evlist *evlist, return 0; out_unmap: - perf_evlist__munmap_nofree(evlist); + evlist__munmap_nofree(evlist); return -1; } @@ -888,7 +786,7 @@ unsigned long perf_event_mlock_kb_in_pages(void) return pages; } -size_t perf_evlist__mmap_size(unsigned long pages) +size_t evlist__mmap_size(unsigned long pages) { if (pages == UINT_MAX) pages = perf_event_mlock_kb_in_pages(); @@ -971,7 +869,7 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, } /** - * perf_evlist__mmap_ex - Create mmaps to receive events. + * evlist__mmap_ex - Create mmaps to receive events. * @evlist: list of events * @pages: map length in pages * @overwrite: overwrite older events? @@ -979,7 +877,7 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, * @auxtrace_overwrite - overwrite older auxtrace data? * * If @overwrite is %false the user needs to signal event consumption using - * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this + * perf_mmap__write_tail(). Using evlist__mmap_read() does this * automatically. * * Similarly, if @auxtrace_overwrite is %false the user needs to signal data @@ -987,7 +885,7 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, * * Return: %0 on success, negative error code otherwise. */ -int perf_evlist__mmap_ex(struct evlist *evlist, unsigned int pages, +int evlist__mmap_ex(struct evlist *evlist, unsigned int pages, unsigned int auxtrace_pages, bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush, int comp_level) @@ -1004,36 +902,36 @@ int perf_evlist__mmap_ex(struct evlist *evlist, unsigned int pages, .comp_level = comp_level }; if (!evlist->mmap) - evlist->mmap = perf_evlist__alloc_mmap(evlist, false); + evlist->mmap = evlist__alloc_mmap(evlist, false); if (!evlist->mmap) return -ENOMEM; - if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) + if (evlist->core.pollfd.entries == NULL && perf_evlist__alloc_pollfd(&evlist->core) < 0) return -ENOMEM; - evlist->mmap_len = perf_evlist__mmap_size(pages); - pr_debug("mmap size %zuB\n", evlist->mmap_len); - mp.mask = evlist->mmap_len - page_size - 1; + evlist->core.mmap_len = evlist__mmap_size(pages); + pr_debug("mmap size %zuB\n", evlist->core.mmap_len); + mp.mask = evlist->core.mmap_len - page_size - 1; - auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len, + auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->core.mmap_len, auxtrace_pages, auxtrace_overwrite); evlist__for_each_entry(evlist, evsel) { if ((evsel->core.attr.read_format & PERF_FORMAT_ID) && - evsel->sample_id == NULL && - perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0) + evsel->core.sample_id == NULL && + perf_evsel__alloc_id(&evsel->core, perf_cpu_map__nr(cpus), threads->nr) < 0) return -ENOMEM; } if (perf_cpu_map__empty(cpus)) - return perf_evlist__mmap_per_thread(evlist, &mp); + return evlist__mmap_per_thread(evlist, &mp); - return perf_evlist__mmap_per_cpu(evlist, &mp); + return evlist__mmap_per_cpu(evlist, &mp); } -int perf_evlist__mmap(struct evlist *evlist, unsigned int pages) +int evlist__mmap(struct evlist *evlist, unsigned int pages) { - return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0); + return evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0); } int perf_evlist__create_maps(struct evlist *evlist, struct target *target) @@ -1225,7 +1123,7 @@ u64 perf_evlist__combined_branch_type(struct evlist *evlist) bool perf_evlist__valid_read_format(struct evlist *evlist) { - struct evsel *first = perf_evlist__first(evlist), *pos = first; + struct evsel *first = evlist__first(evlist), *pos = first; u64 read_format = first->core.attr.read_format; u64 sample_type = first->core.attr.sample_type; @@ -1243,15 +1141,9 @@ bool perf_evlist__valid_read_format(struct evlist *evlist) return true; } -u64 perf_evlist__read_format(struct evlist *evlist) -{ - struct evsel *first = perf_evlist__first(evlist); - return first->core.attr.read_format; -} - u16 perf_evlist__id_hdr_size(struct evlist *evlist) { - struct evsel *first = perf_evlist__first(evlist); + struct evsel *first = evlist__first(evlist); struct perf_sample *data; u64 sample_type; u16 size = 0; @@ -1284,7 +1176,7 @@ out: bool perf_evlist__valid_sample_id_all(struct evlist *evlist) { - struct evsel *first = perf_evlist__first(evlist), *pos = first; + struct evsel *first = evlist__first(evlist), *pos = first; evlist__for_each_entry_continue(evlist, pos) { if (first->core.attr.sample_id_all != pos->core.attr.sample_id_all) @@ -1296,7 +1188,7 @@ bool perf_evlist__valid_sample_id_all(struct evlist *evlist) bool perf_evlist__sample_id_all(struct evlist *evlist) { - struct evsel *first = perf_evlist__first(evlist); + struct evsel *first = evlist__first(evlist); return first->core.attr.sample_id_all; } @@ -1529,19 +1421,6 @@ int perf_evlist__parse_sample_timestamp(struct evlist *evlist, return perf_evsel__parse_sample_timestamp(evsel, event, timestamp); } -size_t perf_evlist__fprintf(struct evlist *evlist, FILE *fp) -{ - struct evsel *evsel; - size_t printed = 0; - - evlist__for_each_entry(evlist, evsel) { - printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "", - perf_evsel__name(evsel)); - } - - return printed + fprintf(fp, "\n"); -} - int perf_evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size) { @@ -1571,7 +1450,7 @@ int perf_evlist__strerror_open(struct evlist *evlist, "Hint:\tThe current value is %d.", value); break; case EINVAL: { - struct evsel *first = perf_evlist__first(evlist); + struct evsel *first = evlist__first(evlist); int max_freq; if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0) @@ -1599,7 +1478,7 @@ out_default: int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size) { char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); - int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0; + int pages_attempted = evlist->core.mmap_len / 1024, pages_max_per_user, printed = 0; switch (err) { case EPERM: @@ -1633,7 +1512,7 @@ void perf_evlist__to_front(struct evlist *evlist, struct evsel *evsel, *n; LIST_HEAD(move); - if (move_evsel == perf_evlist__first(evlist)) + if (move_evsel == evlist__first(evlist)) return; evlist__for_each_entry_safe(evlist, n, evsel) { @@ -1754,7 +1633,7 @@ bool perf_evlist__exclude_kernel(struct evlist *evlist) void perf_evlist__force_leader(struct evlist *evlist) { if (!evlist->nr_groups) { - struct evsel *leader = perf_evlist__first(evlist); + struct evsel *leader = evlist__first(evlist); perf_evlist__set_leader(evlist); leader->forced_leader = true; @@ -1780,7 +1659,7 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list, is_open = false; if (c2->leader == leader) { if (is_open) - evsel__close(c2); + perf_evsel__close(&evsel->core); c2->leader = c2; c2->core.nr_members = 0; } @@ -1844,10 +1723,10 @@ static void *perf_evlist__poll_thread(void *arg) draining = true; if (!draining) - perf_evlist__poll(evlist, 1000); + evlist__poll(evlist, 1000); - for (i = 0; i < evlist->nr_mmaps; i++) { - struct perf_mmap *map = &evlist->mmap[i]; + for (i = 0; i < evlist->core.nr_mmaps; i++) { + struct mmap *map = &evlist->mmap[i]; union perf_event *event; if (perf_mmap__read_init(map)) @@ -1889,7 +1768,7 @@ int perf_evlist__start_sb_thread(struct evlist *evlist, goto out_delete_evlist; } - if (perf_evlist__mmap(evlist, UINT_MAX)) + if (evlist__mmap(evlist, UINT_MAX)) goto out_delete_evlist; evlist__for_each_entry(evlist, counter) { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index a55f0f2546e5..7cfe75522ba5 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -7,11 +7,11 @@ #include #include #include -#include #include +#include #include "events_stats.h" #include "evsel.h" -#include "mmap.h" +#include #include #include @@ -20,16 +20,38 @@ struct thread_map; struct perf_cpu_map; struct record_opts; -#define PERF_EVLIST__HLIST_BITS 8 -#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS) +/* + * State machine of bkw_mmap_state: + * + * .________________(forbid)_____________. + * | V + * NOTREADY --(0)--> RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY + * ^ ^ | ^ | + * | |__(forbid)____/ |___(forbid)___/| + * | | + * \_________________(3)_______________/ + * + * NOTREADY : Backward ring buffers are not ready + * RUNNING : Backward ring buffers are recording + * DATA_PENDING : We are required to collect data from backward ring buffers + * EMPTY : We have collected data from backward ring buffers. + * + * (0): Setup backward ring buffer + * (1): Pause ring buffers for reading + * (2): Read from ring buffers + * (3): Resume ring buffers for recording + */ +enum bkw_mmap_state { + BKW_MMAP_NOTREADY, + BKW_MMAP_RUNNING, + BKW_MMAP_DATA_PENDING, + BKW_MMAP_EMPTY, +}; struct evlist { struct perf_evlist core; - struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; int nr_groups; - int nr_mmaps; bool enabled; - size_t mmap_len; int id_pos; int is_pos; u64 combined_sample_type; @@ -38,9 +60,8 @@ struct evlist { int cork_fd; pid_t pid; } workload; - struct fdarray pollfd; - struct perf_mmap *mmap; - struct perf_mmap *overwrite_mmap; + struct mmap *mmap; + struct mmap *overwrite_mmap; struct evsel *selected; struct events_stats stats; struct perf_env *env; @@ -65,7 +86,7 @@ struct evlist *perf_evlist__new_default(void); struct evlist *perf_evlist__new_dummy(void); void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus, struct perf_thread_map *threads); -void perf_evlist__exit(struct evlist *evlist); +void evlist__exit(struct evlist *evlist); void evlist__delete(struct evlist *evlist); void evlist__add(struct evlist *evlist, struct evsel *entry); @@ -119,17 +140,10 @@ struct evsel * perf_evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name); -void perf_evlist__id_add(struct evlist *evlist, struct evsel *evsel, - int cpu, int thread, u64 id); -int perf_evlist__id_add_fd(struct evlist *evlist, - struct evsel *evsel, - int cpu, int thread, int fd); +int evlist__add_pollfd(struct evlist *evlist, int fd); +int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask); -int perf_evlist__add_pollfd(struct evlist *evlist, int fd); -int perf_evlist__alloc_pollfd(struct evlist *evlist); -int perf_evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask); - -int perf_evlist__poll(struct evlist *evlist, int timeout); +int evlist__poll(struct evlist *evlist, int timeout); struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id); struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist, @@ -139,7 +153,7 @@ struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id); void perf_evlist__toggle_bkw_mmap(struct evlist *evlist, enum bkw_mmap_state state); -void perf_evlist__mmap_consume(struct evlist *evlist, int idx); +void evlist__mmap_consume(struct evlist *evlist, int idx); int evlist__open(struct evlist *evlist); void evlist__close(struct evlist *evlist); @@ -170,14 +184,14 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, unsigned long perf_event_mlock_kb_in_pages(void); -int perf_evlist__mmap_ex(struct evlist *evlist, unsigned int pages, +int evlist__mmap_ex(struct evlist *evlist, unsigned int pages, unsigned int auxtrace_pages, bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush, int comp_level); -int perf_evlist__mmap(struct evlist *evlist, unsigned int pages); -void perf_evlist__munmap(struct evlist *evlist); +int evlist__mmap(struct evlist *evlist, unsigned int pages); +void evlist__munmap(struct evlist *evlist); -size_t perf_evlist__mmap_size(unsigned long pages); +size_t evlist__mmap_size(unsigned long pages); void evlist__disable(struct evlist *evlist); void evlist__enable(struct evlist *evlist); @@ -195,7 +209,6 @@ int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel); void __perf_evlist__set_leader(struct list_head *list); void perf_evlist__set_leader(struct evlist *evlist); -u64 perf_evlist__read_format(struct evlist *evlist); u64 __perf_evlist__combined_sample_type(struct evlist *evlist); u64 perf_evlist__combined_sample_type(struct evlist *evlist); u64 perf_evlist__combined_branch_type(struct evlist *evlist); @@ -221,17 +234,19 @@ static inline bool perf_evlist__empty(struct evlist *evlist) return list_empty(&evlist->core.entries); } -static inline struct evsel *perf_evlist__first(struct evlist *evlist) +static inline struct evsel *evlist__first(struct evlist *evlist) { - return list_entry(evlist->core.entries.next, struct evsel, core.node); + struct perf_evsel *evsel = perf_evlist__first(&evlist->core); + + return container_of(evsel, struct evsel, core); } -static inline struct evsel *perf_evlist__last(struct evlist *evlist) +static inline struct evsel *evlist__last(struct evlist *evlist) { - return list_entry(evlist->core.entries.prev, struct evsel, core.node); -} + struct perf_evsel *evsel = perf_evlist__last(&evlist->core); -size_t perf_evlist__fprintf(struct evlist *evlist, FILE *fp); + return container_of(evsel, struct evsel, core); +} int perf_evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size); int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 85825384f9e8..5591af81a070 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -30,8 +30,10 @@ #include "counts.h" #include "event.h" #include "evsel.h" +#include "util/evsel_config.h" +#include "util/evsel_fprintf.h" #include "evlist.h" -#include "cpumap.h" +#include #include "thread_map.h" #include "target.h" #include "perf_regs.h" @@ -45,6 +47,7 @@ #include "../perf-sys.h" #include "util/parse-branch-options.h" #include +#include #include @@ -1226,36 +1229,6 @@ int evsel__disable(struct evsel *evsel) return err; } -int perf_evsel__alloc_id(struct evsel *evsel, int ncpus, int nthreads) -{ - if (ncpus == 0 || nthreads == 0) - return 0; - - if (evsel->system_wide) - nthreads = 1; - - evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id)); - if (evsel->sample_id == NULL) - return -ENOMEM; - - evsel->id = zalloc(ncpus * nthreads * sizeof(u64)); - if (evsel->id == NULL) { - xyarray__delete(evsel->sample_id); - evsel->sample_id = NULL; - return -ENOMEM; - } - - return 0; -} - -static void perf_evsel__free_id(struct evsel *evsel) -{ - xyarray__delete(evsel->sample_id); - evsel->sample_id = NULL; - zfree(&evsel->id); - evsel->ids = 0; -} - static void perf_evsel__free_config_terms(struct evsel *evsel) { struct perf_evsel_config_term *term, *h; @@ -1272,7 +1245,7 @@ void perf_evsel__exit(struct evsel *evsel) assert(evsel->evlist == NULL); perf_evsel__free_counts(evsel); perf_evsel__free_fd(&evsel->core); - perf_evsel__free_id(evsel); + perf_evsel__free_id(&evsel->core); perf_evsel__free_config_terms(evsel); cgroup__put(evsel->cgrp); perf_cpu_map__put(evsel->core.cpus); @@ -1472,152 +1445,6 @@ static int get_group_fd(struct evsel *evsel, int cpu, int thread) return fd; } -struct bit_names { - int bit; - const char *name; -}; - -static void __p_bits(char *buf, size_t size, u64 value, struct bit_names *bits) -{ - bool first_bit = true; - int i = 0; - - do { - if (value & bits[i].bit) { - buf += scnprintf(buf, size, "%s%s", first_bit ? "" : "|", bits[i].name); - first_bit = false; - } - } while (bits[++i].name != NULL); -} - -static void __p_sample_type(char *buf, size_t size, u64 value) -{ -#define bit_name(n) { PERF_SAMPLE_##n, #n } - struct bit_names bits[] = { - bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR), - bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU), - bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW), - bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), - bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC), - bit_name(WEIGHT), bit_name(PHYS_ADDR), - { .name = NULL, } - }; -#undef bit_name - __p_bits(buf, size, value, bits); -} - -static void __p_branch_sample_type(char *buf, size_t size, u64 value) -{ -#define bit_name(n) { PERF_SAMPLE_BRANCH_##n, #n } - struct bit_names bits[] = { - bit_name(USER), bit_name(KERNEL), bit_name(HV), bit_name(ANY), - bit_name(ANY_CALL), bit_name(ANY_RETURN), bit_name(IND_CALL), - bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX), - bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP), - bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES), - { .name = NULL, } - }; -#undef bit_name - __p_bits(buf, size, value, bits); -} - -static void __p_read_format(char *buf, size_t size, u64 value) -{ -#define bit_name(n) { PERF_FORMAT_##n, #n } - struct bit_names bits[] = { - bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING), - bit_name(ID), bit_name(GROUP), - { .name = NULL, } - }; -#undef bit_name - __p_bits(buf, size, value, bits); -} - -#define BUF_SIZE 1024 - -#define p_hex(val) snprintf(buf, BUF_SIZE, "%#"PRIx64, (uint64_t)(val)) -#define p_unsigned(val) snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val)) -#define p_signed(val) snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val)) -#define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val) -#define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val) -#define p_read_format(val) __p_read_format(buf, BUF_SIZE, val) - -#define PRINT_ATTRn(_n, _f, _p) \ -do { \ - if (attr->_f) { \ - _p(attr->_f); \ - ret += attr__fprintf(fp, _n, buf, priv);\ - } \ -} while (0) - -#define PRINT_ATTRf(_f, _p) PRINT_ATTRn(#_f, _f, _p) - -int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, - attr__fprintf_f attr__fprintf, void *priv) -{ - char buf[BUF_SIZE]; - int ret = 0; - - PRINT_ATTRf(type, p_unsigned); - PRINT_ATTRf(size, p_unsigned); - PRINT_ATTRf(config, p_hex); - PRINT_ATTRn("{ sample_period, sample_freq }", sample_period, p_unsigned); - PRINT_ATTRf(sample_type, p_sample_type); - PRINT_ATTRf(read_format, p_read_format); - - PRINT_ATTRf(disabled, p_unsigned); - PRINT_ATTRf(inherit, p_unsigned); - PRINT_ATTRf(pinned, p_unsigned); - PRINT_ATTRf(exclusive, p_unsigned); - PRINT_ATTRf(exclude_user, p_unsigned); - PRINT_ATTRf(exclude_kernel, p_unsigned); - PRINT_ATTRf(exclude_hv, p_unsigned); - PRINT_ATTRf(exclude_idle, p_unsigned); - PRINT_ATTRf(mmap, p_unsigned); - PRINT_ATTRf(comm, p_unsigned); - PRINT_ATTRf(freq, p_unsigned); - PRINT_ATTRf(inherit_stat, p_unsigned); - PRINT_ATTRf(enable_on_exec, p_unsigned); - PRINT_ATTRf(task, p_unsigned); - PRINT_ATTRf(watermark, p_unsigned); - PRINT_ATTRf(precise_ip, p_unsigned); - PRINT_ATTRf(mmap_data, p_unsigned); - PRINT_ATTRf(sample_id_all, p_unsigned); - PRINT_ATTRf(exclude_host, p_unsigned); - PRINT_ATTRf(exclude_guest, p_unsigned); - PRINT_ATTRf(exclude_callchain_kernel, p_unsigned); - PRINT_ATTRf(exclude_callchain_user, p_unsigned); - PRINT_ATTRf(mmap2, p_unsigned); - PRINT_ATTRf(comm_exec, p_unsigned); - PRINT_ATTRf(use_clockid, p_unsigned); - PRINT_ATTRf(context_switch, p_unsigned); - PRINT_ATTRf(write_backward, p_unsigned); - PRINT_ATTRf(namespaces, p_unsigned); - PRINT_ATTRf(ksymbol, p_unsigned); - PRINT_ATTRf(bpf_event, p_unsigned); - PRINT_ATTRf(aux_output, p_unsigned); - - PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); - PRINT_ATTRf(bp_type, p_unsigned); - PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex); - PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex); - PRINT_ATTRf(branch_sample_type, p_branch_sample_type); - PRINT_ATTRf(sample_regs_user, p_hex); - PRINT_ATTRf(sample_stack_user, p_unsigned); - PRINT_ATTRf(clockid, p_signed); - PRINT_ATTRf(sample_regs_intr, p_hex); - PRINT_ATTRf(aux_watermark, p_unsigned); - PRINT_ATTRf(sample_max_stack, p_unsigned); - - return ret; -} - -static int __open_attr__fprintf(FILE *fp, const char *name, const char *val, - void *priv __maybe_unused) -{ - return fprintf(fp, " %-32s %s\n", name, val); -} - static void perf_evsel__remove_fd(struct evsel *pos, int nr_cpus, int nr_threads, int thread_idx) @@ -1662,7 +1489,7 @@ static bool ignore_missing_thread(struct evsel *evsel, return false; /* The system wide setup does not work with threads. */ - if (evsel->system_wide) + if (evsel->core.system_wide) return false; /* The -ESRCH is perf event syscall errno for pid's not found. */ @@ -1688,6 +1515,12 @@ static bool ignore_missing_thread(struct evsel *evsel, return true; } +static int __open_attr__fprintf(FILE *fp, const char *name, const char *val, + void *priv __maybe_unused) +{ + return fprintf(fp, " %-32s %s\n", name, val); +} + static void display_attr(struct perf_event_attr *attr) { if (verbose >= 2) { @@ -1771,7 +1604,7 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, threads = empty_thread_map; } - if (evsel->system_wide) + if (evsel->core.system_wide) nthreads = 1; else nthreads = threads->nr; @@ -1818,7 +1651,7 @@ retry_sample_id: for (thread = 0; thread < nthreads; thread++) { int fd, group_fd; - if (!evsel->cgrp && !evsel->system_wide) + if (!evsel->cgrp && !evsel->core.system_wide) pid = perf_thread_map__pid(threads, thread); group_fd = get_group_fd(evsel, cpu, thread); @@ -1991,7 +1824,7 @@ out_close: void evsel__close(struct evsel *evsel) { perf_evsel__close(&evsel->core); - perf_evsel__free_id(evsel); + perf_evsel__free_id(&evsel->core); } int perf_evsel__open_per_cpu(struct evsel *evsel, @@ -2419,283 +2252,6 @@ int perf_evsel__parse_sample_timestamp(struct evsel *evsel, return 0; } -size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, - u64 read_format) -{ - size_t sz, result = sizeof(struct perf_record_sample); - - if (type & PERF_SAMPLE_IDENTIFIER) - result += sizeof(u64); - - if (type & PERF_SAMPLE_IP) - result += sizeof(u64); - - if (type & PERF_SAMPLE_TID) - result += sizeof(u64); - - if (type & PERF_SAMPLE_TIME) - result += sizeof(u64); - - if (type & PERF_SAMPLE_ADDR) - result += sizeof(u64); - - if (type & PERF_SAMPLE_ID) - result += sizeof(u64); - - if (type & PERF_SAMPLE_STREAM_ID) - result += sizeof(u64); - - if (type & PERF_SAMPLE_CPU) - result += sizeof(u64); - - if (type & PERF_SAMPLE_PERIOD) - result += sizeof(u64); - - if (type & PERF_SAMPLE_READ) { - result += sizeof(u64); - if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) - result += sizeof(u64); - if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) - result += sizeof(u64); - /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */ - if (read_format & PERF_FORMAT_GROUP) { - sz = sample->read.group.nr * - sizeof(struct sample_read_value); - result += sz; - } else { - result += sizeof(u64); - } - } - - if (type & PERF_SAMPLE_CALLCHAIN) { - sz = (sample->callchain->nr + 1) * sizeof(u64); - result += sz; - } - - if (type & PERF_SAMPLE_RAW) { - result += sizeof(u32); - result += sample->raw_size; - } - - if (type & PERF_SAMPLE_BRANCH_STACK) { - sz = sample->branch_stack->nr * sizeof(struct branch_entry); - sz += sizeof(u64); - result += sz; - } - - if (type & PERF_SAMPLE_REGS_USER) { - if (sample->user_regs.abi) { - result += sizeof(u64); - sz = hweight64(sample->user_regs.mask) * sizeof(u64); - result += sz; - } else { - result += sizeof(u64); - } - } - - if (type & PERF_SAMPLE_STACK_USER) { - sz = sample->user_stack.size; - result += sizeof(u64); - if (sz) { - result += sz; - result += sizeof(u64); - } - } - - if (type & PERF_SAMPLE_WEIGHT) - result += sizeof(u64); - - if (type & PERF_SAMPLE_DATA_SRC) - result += sizeof(u64); - - if (type & PERF_SAMPLE_TRANSACTION) - result += sizeof(u64); - - if (type & PERF_SAMPLE_REGS_INTR) { - if (sample->intr_regs.abi) { - result += sizeof(u64); - sz = hweight64(sample->intr_regs.mask) * sizeof(u64); - result += sz; - } else { - result += sizeof(u64); - } - } - - if (type & PERF_SAMPLE_PHYS_ADDR) - result += sizeof(u64); - - return result; -} - -int perf_event__synthesize_sample(union perf_event *event, u64 type, - u64 read_format, - const struct perf_sample *sample) -{ - __u64 *array; - size_t sz; - /* - * used for cross-endian analysis. See git commit 65014ab3 - * for why this goofiness is needed. - */ - union u64_swap u; - - array = event->sample.array; - - if (type & PERF_SAMPLE_IDENTIFIER) { - *array = sample->id; - array++; - } - - if (type & PERF_SAMPLE_IP) { - *array = sample->ip; - array++; - } - - if (type & PERF_SAMPLE_TID) { - u.val32[0] = sample->pid; - u.val32[1] = sample->tid; - *array = u.val64; - array++; - } - - if (type & PERF_SAMPLE_TIME) { - *array = sample->time; - array++; - } - - if (type & PERF_SAMPLE_ADDR) { - *array = sample->addr; - array++; - } - - if (type & PERF_SAMPLE_ID) { - *array = sample->id; - array++; - } - - if (type & PERF_SAMPLE_STREAM_ID) { - *array = sample->stream_id; - array++; - } - - if (type & PERF_SAMPLE_CPU) { - u.val32[0] = sample->cpu; - u.val32[1] = 0; - *array = u.val64; - array++; - } - - if (type & PERF_SAMPLE_PERIOD) { - *array = sample->period; - array++; - } - - if (type & PERF_SAMPLE_READ) { - if (read_format & PERF_FORMAT_GROUP) - *array = sample->read.group.nr; - else - *array = sample->read.one.value; - array++; - - if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { - *array = sample->read.time_enabled; - array++; - } - - if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { - *array = sample->read.time_running; - array++; - } - - /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */ - if (read_format & PERF_FORMAT_GROUP) { - sz = sample->read.group.nr * - sizeof(struct sample_read_value); - memcpy(array, sample->read.group.values, sz); - array = (void *)array + sz; - } else { - *array = sample->read.one.id; - array++; - } - } - - if (type & PERF_SAMPLE_CALLCHAIN) { - sz = (sample->callchain->nr + 1) * sizeof(u64); - memcpy(array, sample->callchain, sz); - array = (void *)array + sz; - } - - if (type & PERF_SAMPLE_RAW) { - u.val32[0] = sample->raw_size; - *array = u.val64; - array = (void *)array + sizeof(u32); - - memcpy(array, sample->raw_data, sample->raw_size); - array = (void *)array + sample->raw_size; - } - - if (type & PERF_SAMPLE_BRANCH_STACK) { - sz = sample->branch_stack->nr * sizeof(struct branch_entry); - sz += sizeof(u64); - memcpy(array, sample->branch_stack, sz); - array = (void *)array + sz; - } - - if (type & PERF_SAMPLE_REGS_USER) { - if (sample->user_regs.abi) { - *array++ = sample->user_regs.abi; - sz = hweight64(sample->user_regs.mask) * sizeof(u64); - memcpy(array, sample->user_regs.regs, sz); - array = (void *)array + sz; - } else { - *array++ = 0; - } - } - - if (type & PERF_SAMPLE_STACK_USER) { - sz = sample->user_stack.size; - *array++ = sz; - if (sz) { - memcpy(array, sample->user_stack.data, sz); - array = (void *)array + sz; - *array++ = sz; - } - } - - if (type & PERF_SAMPLE_WEIGHT) { - *array = sample->weight; - array++; - } - - if (type & PERF_SAMPLE_DATA_SRC) { - *array = sample->data_src; - array++; - } - - if (type & PERF_SAMPLE_TRANSACTION) { - *array = sample->transaction; - array++; - } - - if (type & PERF_SAMPLE_REGS_INTR) { - if (sample->intr_regs.abi) { - *array++ = sample->intr_regs.abi; - sz = hweight64(sample->intr_regs.mask) * sizeof(u64); - memcpy(array, sample->intr_regs.regs, sz); - array = (void *)array + sz; - } else { - *array++ = 0; - } - } - - if (type & PERF_SAMPLE_PHYS_ADDR) { - *array = sample->phys_addr; - array++; - } - - return 0; -} - struct tep_format_field *perf_evsel__field(struct evsel *evsel, const char *name) { return tep_find_field(evsel->tp_format, name); @@ -2811,9 +2367,11 @@ bool perf_evsel__fallback(struct evsel *evsel, int err, if (evsel->name) free(evsel->name); evsel->name = new_name; - scnprintf(msg, msgsize, -"kernel.perf_event_paranoid=%d, trying to fall back to excluding kernel samples", paranoid); + scnprintf(msg, msgsize, "kernel.perf_event_paranoid=%d, trying " + "to fall back to excluding kernel and hypervisor " + " samples", paranoid); evsel->core.attr.exclude_kernel = 1; + evsel->core.attr.exclude_hv = 1; return true; } @@ -2966,7 +2524,7 @@ static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist) thread++) { int fd = FD(evsel, cpu, thread); - if (perf_evlist__id_add_fd(evlist, evsel, + if (perf_evlist__id_add_fd(&evlist->core, &evsel->core, cpu, thread, fd) < 0) return -1; } @@ -2980,7 +2538,7 @@ int perf_evsel__store_ids(struct evsel *evsel, struct evlist *evlist) struct perf_cpu_map *cpus = evsel->core.cpus; struct perf_thread_map *threads = evsel->core.threads; - if (perf_evsel__alloc_id(evsel, cpus->nr, threads->nr)) + if (perf_evsel__alloc_id(&evsel->core, cpus->nr, threads->nr)) return -ENOMEM; return store_evsel_ids(evsel, evlist); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 68321d10eb2d..ddc5ee6f6592 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -4,7 +4,6 @@ #include #include -#include #include #include #include @@ -13,79 +12,11 @@ #include "symbol_conf.h" #include -struct addr_location; -struct evsel; -union perf_event; - -/* - * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are - * more than one entry in the evlist. - */ -struct perf_sample_id { - struct hlist_node node; - u64 id; - struct evsel *evsel; - /* - * 'idx' will be used for AUX area sampling. A sample will have AUX area - * data that will be queued for decoding, where there are separate - * queues for each CPU (per-cpu tracing) or task (per-thread tracing). - * The sample ID can be used to lookup 'idx' which is effectively the - * queue number. - */ - int idx; - int cpu; - pid_t tid; - - /* Holds total ID period value for PERF_SAMPLE_READ processing. */ - u64 period; -}; - +struct bpf_object; struct cgroup; - -/* - * The 'struct perf_evsel_config_term' is used to pass event - * specific configuration data to perf_evsel__config routine. - * It is allocated within event parsing and attached to - * perf_evsel::config_terms list head. -*/ -enum term_type { - PERF_EVSEL__CONFIG_TERM_PERIOD, - PERF_EVSEL__CONFIG_TERM_FREQ, - PERF_EVSEL__CONFIG_TERM_TIME, - PERF_EVSEL__CONFIG_TERM_CALLGRAPH, - PERF_EVSEL__CONFIG_TERM_STACK_USER, - PERF_EVSEL__CONFIG_TERM_INHERIT, - PERF_EVSEL__CONFIG_TERM_MAX_STACK, - PERF_EVSEL__CONFIG_TERM_MAX_EVENTS, - PERF_EVSEL__CONFIG_TERM_OVERWRITE, - PERF_EVSEL__CONFIG_TERM_DRV_CFG, - PERF_EVSEL__CONFIG_TERM_BRANCH, - PERF_EVSEL__CONFIG_TERM_PERCORE, - PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT, -}; - -struct perf_evsel_config_term { - struct list_head list; - enum term_type type; - union { - u64 period; - u64 freq; - bool time; - char *callgraph; - char *drv_cfg; - u64 stack_user; - int max_stack; - bool inherit; - bool overwrite; - char *branch; - unsigned long max_events; - bool percore; - bool aux_output; - } val; - bool weak; -}; - +struct perf_counts; struct perf_stat_evsel; +union perf_event; typedef int (perf_evsel__sb_cb_t)(union perf_event *event, void *data); @@ -94,10 +25,6 @@ enum perf_tool_event { PERF_TOOL_DURATION_TIME = 1, }; -struct bpf_object; -struct perf_counts; -struct xyarray; - /** struct evsel - event selector * * @evlist - evlist this evsel is in, if it is in one. @@ -117,12 +44,9 @@ struct evsel { struct perf_evsel core; struct evlist *evlist; char *filter; - struct xyarray *sample_id; - u64 *id; struct perf_counts *counts; struct perf_counts *prev_raw_counts; int idx; - u32 ids; unsigned long max_events; unsigned long nr_events_printed; char *name; @@ -146,7 +70,6 @@ struct evsel { bool disabled; bool no_aux_samples; bool immediate; - bool system_wide; bool tracking; bool per_pkg; bool precise_max; @@ -179,11 +102,6 @@ struct evsel { } side_band; }; -union u64_swap { - u64 val64; - u32 val32[2]; -}; - struct perf_missing_features { bool sample_id_all; bool exclude_guest; @@ -282,8 +200,6 @@ const char *perf_evsel__name(struct evsel *evsel); const char *perf_evsel__group_name(struct evsel *evsel); int perf_evsel__group_desc(struct evsel *evsel, char *buf, size_t size); -int perf_evsel__alloc_id(struct evsel *evsel, int ncpus, int nthreads); - void __perf_evsel__set_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit); void __perf_evsel__reset_sample_bit(struct evsel *evsel, @@ -439,37 +355,6 @@ static inline bool perf_evsel__is_clock(struct evsel *evsel) perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK); } -struct perf_attr_details { - bool freq; - bool verbose; - bool event_group; - bool force; - bool trace_fields; -}; - -int perf_evsel__fprintf(struct evsel *evsel, - struct perf_attr_details *details, FILE *fp); - -#define EVSEL__PRINT_IP (1<<0) -#define EVSEL__PRINT_SYM (1<<1) -#define EVSEL__PRINT_DSO (1<<2) -#define EVSEL__PRINT_SYMOFFSET (1<<3) -#define EVSEL__PRINT_ONELINE (1<<4) -#define EVSEL__PRINT_SRCLINE (1<<5) -#define EVSEL__PRINT_UNKNOWN_AS_ADDR (1<<6) -#define EVSEL__PRINT_CALLCHAIN_ARROW (1<<7) -#define EVSEL__PRINT_SKIP_IGNORED (1<<8) - -struct callchain_cursor; - -int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, - unsigned int print_opts, - struct callchain_cursor *cursor, FILE *fp); - -int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al, - int left_alignment, unsigned int print_opts, - struct callchain_cursor *cursor, FILE *fp); - bool perf_evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize); int perf_evsel__open_strerror(struct evsel *evsel, struct target *target, @@ -502,11 +387,6 @@ static inline bool evsel__has_callchain(const struct evsel *evsel) return (evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0; } -typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *); - -int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, - attr__fprintf_f attr__fprintf, void *priv); - struct perf_env *perf_evsel__env(struct evsel *evsel); int perf_evsel__store_ids(struct evsel *evsel, struct evlist *evlist); diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h new file mode 100644 index 000000000000..8a7648037c18 --- /dev/null +++ b/tools/perf/util/evsel_config.h @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef __PERF_EVSEL_CONFIG_H +#define __PERF_EVSEL_CONFIG_H 1 + +#include +#include + +/* + * The 'struct perf_evsel_config_term' is used to pass event + * specific configuration data to perf_evsel__config routine. + * It is allocated within event parsing and attached to + * perf_evsel::config_terms list head. +*/ +enum evsel_term_type { + PERF_EVSEL__CONFIG_TERM_PERIOD, + PERF_EVSEL__CONFIG_TERM_FREQ, + PERF_EVSEL__CONFIG_TERM_TIME, + PERF_EVSEL__CONFIG_TERM_CALLGRAPH, + PERF_EVSEL__CONFIG_TERM_STACK_USER, + PERF_EVSEL__CONFIG_TERM_INHERIT, + PERF_EVSEL__CONFIG_TERM_MAX_STACK, + PERF_EVSEL__CONFIG_TERM_MAX_EVENTS, + PERF_EVSEL__CONFIG_TERM_OVERWRITE, + PERF_EVSEL__CONFIG_TERM_DRV_CFG, + PERF_EVSEL__CONFIG_TERM_BRANCH, + PERF_EVSEL__CONFIG_TERM_PERCORE, + PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT, +}; + +struct perf_evsel_config_term { + struct list_head list; + enum evsel_term_type type; + union { + u64 period; + u64 freq; + bool time; + char *callgraph; + char *drv_cfg; + u64 stack_user; + int max_stack; + bool inherit; + bool overwrite; + char *branch; + unsigned long max_events; + bool percore; + bool aux_output; + } val; + bool weak; +}; +#endif // __PERF_EVSEL_CONFIG_H diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index 496fec01f5d1..028df7afb0dc 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -4,6 +4,8 @@ #include #include #include "evsel.h" +#include "util/evsel_fprintf.h" +#include "util/event.h" #include "callchain.h" #include "map.h" #include "strlist.h" @@ -101,7 +103,7 @@ out: int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, unsigned int print_opts, struct callchain_cursor *cursor, - FILE *fp) + struct strlist *bt_stop_list, FILE *fp) { int printed = 0; struct callchain_cursor_node *node; @@ -174,10 +176,8 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, printed += fprintf(fp, "\n"); /* Add srccode here too? */ - if (symbol_conf.bt_stop_list && - node->sym && - strlist__has_entry(symbol_conf.bt_stop_list, - node->sym->name)) { + if (bt_stop_list && node->sym && + strlist__has_entry(bt_stop_list, node->sym->name)) { break; } @@ -192,7 +192,7 @@ next: int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al, int left_alignment, unsigned int print_opts, - struct callchain_cursor *cursor, FILE *fp) + struct callchain_cursor *cursor, struct strlist *bt_stop_list, FILE *fp) { int printed = 0; int print_ip = print_opts & EVSEL__PRINT_IP; @@ -203,8 +203,8 @@ int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al, int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR; if (cursor != NULL) { - printed += sample__fprintf_callchain(sample, left_alignment, - print_opts, cursor, fp); + printed += sample__fprintf_callchain(sample, left_alignment, print_opts, + cursor, bt_stop_list, fp); } else { printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " "); diff --git a/tools/perf/util/evsel_fprintf.h b/tools/perf/util/evsel_fprintf.h new file mode 100644 index 000000000000..47e6c8456bb1 --- /dev/null +++ b/tools/perf/util/evsel_fprintf.h @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef __PERF_EVSEL_FPRINTF_H +#define __PERF_EVSEL_FPRINTF_H 1 + +#include +#include + +struct evsel; + +struct perf_attr_details { + bool freq; + bool verbose; + bool event_group; + bool force; + bool trace_fields; +}; + +int perf_evsel__fprintf(struct evsel *evsel, + struct perf_attr_details *details, FILE *fp); + +#define EVSEL__PRINT_IP (1<<0) +#define EVSEL__PRINT_SYM (1<<1) +#define EVSEL__PRINT_DSO (1<<2) +#define EVSEL__PRINT_SYMOFFSET (1<<3) +#define EVSEL__PRINT_ONELINE (1<<4) +#define EVSEL__PRINT_SRCLINE (1<<5) +#define EVSEL__PRINT_UNKNOWN_AS_ADDR (1<<6) +#define EVSEL__PRINT_CALLCHAIN_ARROW (1<<7) +#define EVSEL__PRINT_SKIP_IGNORED (1<<8) + +struct addr_location; +struct perf_event_attr; +struct perf_sample; +struct callchain_cursor; +struct strlist; + +int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, + unsigned int print_opts, struct callchain_cursor *cursor, + struct strlist *bt_stop_list, FILE *fp); + +int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al, + int left_alignment, unsigned int print_opts, + struct callchain_cursor *cursor, + struct strlist *bt_stop_list, FILE *fp); + +typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *); + +int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, + attr__fprintf_f attr__fprintf, void *priv); +#endif // __PERF_EVSEL_H diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index b72440bf9a79..d4137559be05 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -35,6 +35,9 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent #elif defined(__sparc__) #define GEN_ELF_ARCH EM_SPARC #define GEN_ELF_CLASS ELFCLASS32 +#elif defined(__s390x__) +#define GEN_ELF_ARCH EM_S390 +#define GEN_ELF_CLASS ELFCLASS64 #else #error "unsupported architecture" #endif diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index b0c34dda30a0..86d9396cb131 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -25,6 +25,7 @@ #include "dso.h" #include "evlist.h" #include "evsel.h" +#include "util/evsel_fprintf.h" #include "header.h" #include "memswap.h" #include "trace-event.h" @@ -42,11 +43,12 @@ #include "tool.h" #include "time-utils.h" #include "units.h" -#include "util.h" +#include "util/util.h" // perf_exe() #include "cputopo.h" #include "bpf-event.h" #include +#include /* * magic2 = "PERFILE2" @@ -70,15 +72,6 @@ struct perf_file_attr { struct perf_file_section ids; }; -struct feat_fd { - struct perf_header *ph; - int fd; - void *buf; /* Either buf != NULL or fd >= 0 */ - ssize_t offset; - size_t size; - struct evsel *events; -}; - void perf_header__set_feat(struct perf_header *header, int feat) { set_bit(feat, header->adds_features); @@ -524,7 +517,7 @@ static int write_event_desc(struct feat_fd *ff, * copy into an nri to be independent of the * type of ids, */ - nri = evsel->ids; + nri = evsel->core.ids; ret = do_write(ff, &nri, sizeof(nri)); if (ret < 0) return ret; @@ -538,7 +531,7 @@ static int write_event_desc(struct feat_fd *ff, /* * write unique ids for this event */ - ret = do_write(ff, evsel->id, evsel->ids * sizeof(u64)); + ret = do_write(ff, evsel->core.id, evsel->core.ids * sizeof(u64)); if (ret < 0) return ret; } @@ -1081,7 +1074,7 @@ static int cpu_cache_level__read(struct cpu_cache_level *cache, u32 cpu, u16 lev scnprintf(file, PATH_MAX, "%s/shared_cpu_list", path); if (sysfs__read_str(file, &cache->map, &len)) { - zfree(&cache->map); + zfree(&cache->size); zfree(&cache->type); return -1; } @@ -1598,7 +1591,7 @@ static void free_event_desc(struct evsel *events) for (evsel = events; evsel->core.attr.size; evsel++) { zfree(&evsel->name); - zfree(&evsel->id); + zfree(&evsel->core.id); } free(events); @@ -1664,8 +1657,8 @@ static struct evsel *read_event_desc(struct feat_fd *ff) id = calloc(nr, sizeof(*id)); if (!id) goto error; - evsel->ids = nr; - evsel->id = id; + evsel->core.ids = nr; + evsel->core.id = id; for (j = 0 ; j < nr; j++) { if (do_read_u64(ff, id)) @@ -1707,9 +1700,9 @@ static void print_event_desc(struct feat_fd *ff, FILE *fp) for (evsel = events; evsel->core.attr.size; evsel++) { fprintf(fp, "# event : name = %s, ", evsel->name); - if (evsel->ids) { + if (evsel->core.ids) { fprintf(fp, ", id = {"); - for (j = 0, id = evsel->id; j < evsel->ids; j++, id++) { + for (j = 0, id = evsel->core.id; j < evsel->core.ids; j++, id++) { if (j) fputc(',', fp); fprintf(fp, " %"PRIu64, *id); @@ -2823,15 +2816,6 @@ static int process_compressed(struct feat_fd *ff, return 0; } -struct feature_ops { - int (*write)(struct feat_fd *ff, struct evlist *evlist); - void (*print)(struct feat_fd *ff, FILE *fp); - int (*process)(struct feat_fd *ff, void *data); - const char *name; - bool full_only; - bool synthesize; -}; - #define FEAT_OPR(n, func, __full_only) \ [HEADER_##n] = { \ .name = __stringify(n), \ @@ -2858,8 +2842,10 @@ struct feature_ops { #define process_branch_stack NULL #define process_stat NULL +// Only used in util/synthetic-events.c +const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE]; -static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { +const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPN(TRACING_DATA, tracing_data, false), FEAT_OPN(BUILD_ID, build_id, false), FEAT_OPR(HOSTNAME, hostname, false), @@ -3083,7 +3069,7 @@ int perf_session__write_header(struct perf_session *session, evlist__for_each_entry(session->evlist, evsel) { evsel->id_offset = lseek(fd, 0, SEEK_CUR); - err = do_write(&ff, evsel->id, evsel->ids * sizeof(u64)); + err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64)); if (err < 0) { pr_debug("failed to write perf header\n"); return err; @@ -3097,7 +3083,7 @@ int perf_session__write_header(struct perf_session *session, .attr = evsel->core.attr, .ids = { .offset = evsel->id_offset, - .size = evsel->ids * sizeof(u64), + .size = evsel->core.ids * sizeof(u64), } }; err = do_write(&ff, &f_attr, sizeof(f_attr)); @@ -3624,7 +3610,7 @@ int perf_session__read_header(struct perf_session *session) * for allocating the perf_sample_id table we fake 1 cpu and * hattr->ids threads. */ - if (perf_evsel__alloc_id(evsel, 1, nr_ids)) + if (perf_evsel__alloc_id(&evsel->core, 1, nr_ids)) goto out_delete_evlist; lseek(fd, f_attr.ids.offset, SEEK_SET); @@ -3633,7 +3619,7 @@ int perf_session__read_header(struct perf_session *session) if (perf_header__getbuffer64(header, fd, &f_id, sizeof(f_id))) goto out_errno; - perf_evlist__id_add(session->evlist, evsel, 0, j, f_id); + perf_evlist__id_add(&session->evlist->core, &evsel->core, 0, j, f_id); } lseek(fd, tmp, SEEK_SET); @@ -3656,105 +3642,6 @@ out_delete_evlist: return -ENOMEM; } -int perf_event__synthesize_attr(struct perf_tool *tool, - struct perf_event_attr *attr, u32 ids, u64 *id, - perf_event__handler_t process) -{ - union perf_event *ev; - size_t size; - int err; - - size = sizeof(struct perf_event_attr); - size = PERF_ALIGN(size, sizeof(u64)); - size += sizeof(struct perf_event_header); - size += ids * sizeof(u64); - - ev = zalloc(size); - - if (ev == NULL) - return -ENOMEM; - - ev->attr.attr = *attr; - memcpy(ev->attr.id, id, ids * sizeof(u64)); - - ev->attr.header.type = PERF_RECORD_HEADER_ATTR; - ev->attr.header.size = (u16)size; - - if (ev->attr.header.size == size) - err = process(tool, ev, NULL, NULL); - else - err = -E2BIG; - - free(ev); - - return err; -} - -int perf_event__synthesize_features(struct perf_tool *tool, - struct perf_session *session, - struct evlist *evlist, - perf_event__handler_t process) -{ - struct perf_header *header = &session->header; - struct feat_fd ff; - struct perf_record_header_feature *fe; - size_t sz, sz_hdr; - int feat, ret; - - sz_hdr = sizeof(fe->header); - sz = sizeof(union perf_event); - /* get a nice alignment */ - sz = PERF_ALIGN(sz, page_size); - - memset(&ff, 0, sizeof(ff)); - - ff.buf = malloc(sz); - if (!ff.buf) - return -ENOMEM; - - ff.size = sz - sz_hdr; - ff.ph = &session->header; - - for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) { - if (!feat_ops[feat].synthesize) { - pr_debug("No record header feature for header :%d\n", feat); - continue; - } - - ff.offset = sizeof(*fe); - - ret = feat_ops[feat].write(&ff, evlist); - if (ret || ff.offset <= (ssize_t)sizeof(*fe)) { - pr_debug("Error writing feature\n"); - continue; - } - /* ff.buf may have changed due to realloc in do_write() */ - fe = ff.buf; - memset(fe, 0, sizeof(*fe)); - - fe->feat_id = feat; - fe->header.type = PERF_RECORD_HEADER_FEATURE; - fe->header.size = ff.offset; - - ret = process(tool, ff.buf, NULL, NULL); - if (ret) { - free(ff.buf); - return ret; - } - } - - /* Send HEADER_LAST_FEATURE mark. */ - fe = ff.buf; - fe->feat_id = HEADER_LAST_FEATURE; - fe->header.type = PERF_RECORD_HEADER_FEATURE; - fe->header.size = sizeof(*fe); - - ret = process(tool, ff.buf, NULL, NULL); - - free(ff.buf); - return ret; -} - int perf_event__process_feature(struct perf_session *session, union perf_event *event) { @@ -3797,113 +3684,6 @@ int perf_event__process_feature(struct perf_session *session, return 0; } -static struct perf_record_event_update * -event_update_event__new(size_t size, u64 type, u64 id) -{ - struct perf_record_event_update *ev; - - size += sizeof(*ev); - size = PERF_ALIGN(size, sizeof(u64)); - - ev = zalloc(size); - if (ev) { - ev->header.type = PERF_RECORD_EVENT_UPDATE; - ev->header.size = (u16)size; - ev->type = type; - ev->id = id; - } - return ev; -} - -int -perf_event__synthesize_event_update_unit(struct perf_tool *tool, - struct evsel *evsel, - perf_event__handler_t process) -{ - struct perf_record_event_update *ev; - size_t size = strlen(evsel->unit); - int err; - - ev = event_update_event__new(size + 1, PERF_EVENT_UPDATE__UNIT, evsel->id[0]); - if (ev == NULL) - return -ENOMEM; - - strlcpy(ev->data, evsel->unit, size + 1); - err = process(tool, (union perf_event *)ev, NULL, NULL); - free(ev); - return err; -} - -int -perf_event__synthesize_event_update_scale(struct perf_tool *tool, - struct evsel *evsel, - perf_event__handler_t process) -{ - struct perf_record_event_update *ev; - struct perf_record_event_update_scale *ev_data; - int err; - - ev = event_update_event__new(sizeof(*ev_data), PERF_EVENT_UPDATE__SCALE, evsel->id[0]); - if (ev == NULL) - return -ENOMEM; - - ev_data = (struct perf_record_event_update_scale *)ev->data; - ev_data->scale = evsel->scale; - err = process(tool, (union perf_event*) ev, NULL, NULL); - free(ev); - return err; -} - -int -perf_event__synthesize_event_update_name(struct perf_tool *tool, - struct evsel *evsel, - perf_event__handler_t process) -{ - struct perf_record_event_update *ev; - size_t len = strlen(evsel->name); - int err; - - ev = event_update_event__new(len + 1, PERF_EVENT_UPDATE__NAME, evsel->id[0]); - if (ev == NULL) - return -ENOMEM; - - strlcpy(ev->data, evsel->name, len + 1); - err = process(tool, (union perf_event*) ev, NULL, NULL); - free(ev); - return err; -} - -int -perf_event__synthesize_event_update_cpus(struct perf_tool *tool, - struct evsel *evsel, - perf_event__handler_t process) -{ - size_t size = sizeof(struct perf_record_event_update); - struct perf_record_event_update *ev; - int max, err; - u16 type; - - if (!evsel->core.own_cpus) - return 0; - - ev = cpu_map_data__alloc(evsel->core.own_cpus, &size, &type, &max); - if (!ev) - return -ENOMEM; - - ev->header.type = PERF_RECORD_EVENT_UPDATE; - ev->header.size = (u16)size; - ev->type = PERF_EVENT_UPDATE__CPUS; - ev->id = evsel->id[0]; - - cpu_map_data__synthesize((struct perf_record_cpu_map_data *)ev->data, - evsel->core.own_cpus, - type, max); - - err = process(tool, (union perf_event*) ev, NULL, NULL); - free(ev); - return err; -} - size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp) { struct perf_record_event_update *ev = &event->event_update; @@ -3943,93 +3723,6 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp) return ret; } -int perf_event__synthesize_attrs(struct perf_tool *tool, - struct evlist *evlist, - perf_event__handler_t process) -{ - struct evsel *evsel; - int err = 0; - - evlist__for_each_entry(evlist, evsel) { - err = perf_event__synthesize_attr(tool, &evsel->core.attr, evsel->ids, - evsel->id, process); - if (err) { - pr_debug("failed to create perf header attribute\n"); - return err; - } - } - - return err; -} - -static bool has_unit(struct evsel *counter) -{ - return counter->unit && *counter->unit; -} - -static bool has_scale(struct evsel *counter) -{ - return counter->scale != 1; -} - -int perf_event__synthesize_extra_attr(struct perf_tool *tool, - struct evlist *evsel_list, - perf_event__handler_t process, - bool is_pipe) -{ - struct evsel *counter; - int err; - - /* - * Synthesize other events stuff not carried within - * attr event - unit, scale, name - */ - evlist__for_each_entry(evsel_list, counter) { - if (!counter->supported) - continue; - - /* - * Synthesize unit and scale only if it's defined. - */ - if (has_unit(counter)) { - err = perf_event__synthesize_event_update_unit(tool, counter, process); - if (err < 0) { - pr_err("Couldn't synthesize evsel unit.\n"); - return err; - } - } - - if (has_scale(counter)) { - err = perf_event__synthesize_event_update_scale(tool, counter, process); - if (err < 0) { - pr_err("Couldn't synthesize evsel counter.\n"); - return err; - } - } - - if (counter->core.own_cpus) { - err = perf_event__synthesize_event_update_cpus(tool, counter, process); - if (err < 0) { - pr_err("Couldn't synthesize evsel cpus.\n"); - return err; - } - } - - /* - * Name is needed only for pipe output, - * perf.data carries event names. - */ - if (is_pipe) { - err = perf_event__synthesize_event_update_name(tool, counter, process); - if (err < 0) { - pr_err("Couldn't synthesize evsel name.\n"); - return err; - } - } - } - return 0; -} - int perf_event__process_attr(struct perf_tool *tool __maybe_unused, union perf_event *event, struct evlist **pevlist) @@ -4058,11 +3751,11 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused, * for allocating the perf_sample_id table we fake 1 cpu and * hattr->ids threads. */ - if (perf_evsel__alloc_id(evsel, 1, n_ids)) + if (perf_evsel__alloc_id(&evsel->core, 1, n_ids)) return -ENOMEM; for (i = 0; i < n_ids; i++) { - perf_evlist__id_add(evlist, evsel, 0, i, event->attr.id[i]); + perf_evlist__id_add(&evlist->core, &evsel->core, 0, i, event->attr.id[i]); } return 0; @@ -4114,55 +3807,6 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, return 0; } -int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, - struct evlist *evlist, - perf_event__handler_t process) -{ - union perf_event ev; - struct tracing_data *tdata; - ssize_t size = 0, aligned_size = 0, padding; - struct feat_fd ff; - int err __maybe_unused = 0; - - /* - * We are going to store the size of the data followed - * by the data contents. Since the fd descriptor is a pipe, - * we cannot seek back to store the size of the data once - * we know it. Instead we: - * - * - write the tracing data to the temp file - * - get/write the data size to pipe - * - write the tracing data from the temp file - * to the pipe - */ - tdata = tracing_data_get(&evlist->core.entries, fd, true); - if (!tdata) - return -1; - - memset(&ev, 0, sizeof(ev)); - - ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA; - size = tdata->size; - aligned_size = PERF_ALIGN(size, sizeof(u64)); - padding = aligned_size - size; - ev.tracing_data.header.size = sizeof(ev.tracing_data); - ev.tracing_data.size = aligned_size; - - process(tool, &ev, NULL, NULL); - - /* - * The put function will copy all the tracing data - * stored in temp file to the pipe. - */ - tracing_data_put(tdata); - - ff = (struct feat_fd){ .fd = fd }; - if (write_padded(&ff, NULL, 0, padding)) - return -1; - - return aligned_size; -} - int perf_event__process_tracing_data(struct perf_session *session, union perf_event *event) { @@ -4202,34 +3846,6 @@ int perf_event__process_tracing_data(struct perf_session *session, return size_read + padding; } -int perf_event__synthesize_build_id(struct perf_tool *tool, - struct dso *pos, u16 misc, - perf_event__handler_t process, - struct machine *machine) -{ - union perf_event ev; - size_t len; - int err = 0; - - if (!pos->hit) - return err; - - memset(&ev, 0, sizeof(ev)); - - len = pos->long_name_len + 1; - len = PERF_ALIGN(len, NAME_ALIGN); - memcpy(&ev.build_id.build_id, pos->build_id, sizeof(pos->build_id)); - ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID; - ev.build_id.header.misc = misc; - ev.build_id.pid = machine->pid; - ev.build_id.header.size = sizeof(ev.build_id) + len; - memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len); - - err = process(tool, &ev, NULL, machine); - - return err; -} - int perf_event__process_build_id(struct perf_session *session, union perf_event *event) { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 3e48ae3c49b1..ca53a929e9fd 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -5,10 +5,10 @@ #include #include #include +#include // FILE #include #include #include -#include "event.h" #include "env.h" #include "pmu.h" @@ -92,8 +92,28 @@ struct perf_header { struct perf_env env; }; +struct feat_fd { + struct perf_header *ph; + int fd; + void *buf; /* Either buf != NULL or fd >= 0 */ + ssize_t offset; + size_t size; + struct evsel *events; +}; + +struct perf_header_feature_ops { + int (*write)(struct feat_fd *ff, struct evlist *evlist); + void (*print)(struct feat_fd *ff, FILE *fp); + int (*process)(struct feat_fd *ff, void *data); + const char *name; + bool full_only; + bool synthesize; +}; + struct evlist; struct perf_session; +struct perf_tool; +union perf_event; int perf_session__read_header(struct perf_session *session); int perf_session__write_header(struct perf_session *session, @@ -115,54 +135,16 @@ int perf_header__process_sections(struct perf_header *header, int fd, int perf_header__fprintf_info(struct perf_session *s, FILE *fp, bool full); -int perf_event__synthesize_features(struct perf_tool *tool, - struct perf_session *session, - struct evlist *evlist, - perf_event__handler_t process); - -int perf_event__synthesize_extra_attr(struct perf_tool *tool, - struct evlist *evsel_list, - perf_event__handler_t process, - bool is_pipe); - int perf_event__process_feature(struct perf_session *session, union perf_event *event); - -int perf_event__synthesize_attr(struct perf_tool *tool, - struct perf_event_attr *attr, u32 ids, u64 *id, - perf_event__handler_t process); -int perf_event__synthesize_attrs(struct perf_tool *tool, - struct evlist *evlist, - perf_event__handler_t process); -int perf_event__synthesize_event_update_unit(struct perf_tool *tool, - struct evsel *evsel, - perf_event__handler_t process); -int perf_event__synthesize_event_update_scale(struct perf_tool *tool, - struct evsel *evsel, - perf_event__handler_t process); -int perf_event__synthesize_event_update_name(struct perf_tool *tool, - struct evsel *evsel, - perf_event__handler_t process); -int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, - struct evsel *evsel, - perf_event__handler_t process); int perf_event__process_attr(struct perf_tool *tool, union perf_event *event, struct evlist **pevlist); int perf_event__process_event_update(struct perf_tool *tool, union perf_event *event, struct evlist **pevlist); size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp); - -int perf_event__synthesize_tracing_data(struct perf_tool *tool, - int fd, struct evlist *evlist, - perf_event__handler_t process); int perf_event__process_tracing_data(struct perf_session *session, union perf_event *event); - -int perf_event__synthesize_build_id(struct perf_tool *tool, - struct dso *pos, u16 misc, - perf_event__handler_t process, - struct machine *machine); int perf_event__process_build_id(struct perf_session *session, union perf_event *event); bool is_perf_magic(u64 magic); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 34803e33dc80..6a186b668303 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -15,6 +15,7 @@ struct addr_location; struct map_symbol; struct mem_info; struct branch_info; +struct branch_stack; struct block_info; struct symbol; struct ui_progress; diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index aacffa2b0362..34cb380d19a3 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -14,7 +14,6 @@ #include #include -#include "cpumap.h" #include "color.h" #include "evsel.h" #include "evlist.h" @@ -29,6 +28,7 @@ #include "auxtrace.h" #include "intel-pt-decoder/intel-pt-insn-decoder.h" #include "intel-bts.h" +#include "util/synthetic-events.h" #define MAX_TIMESTAMP (~0ULL) @@ -768,7 +768,7 @@ static int intel_bts_synth_events(struct intel_bts *bts, int err; evlist__for_each_entry(evlist, evsel) { - if (evsel->core.attr.type == bts->pmu_type && evsel->ids) { + if (evsel->core.attr.type == bts->pmu_type && evsel->core.ids) { found = true; break; } @@ -795,7 +795,7 @@ static int intel_bts_synth_events(struct intel_bts *bts, attr.sample_id_all = evsel->core.attr.sample_id_all; attr.read_format = evsel->core.attr.read_format; - id = evsel->id[0] + 1000000000; + id = evsel->core.id[0] + 1000000000; if (!id) id = 1; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 9b56fb74bedf..a1c9eb6d4f40 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -33,6 +33,7 @@ #include "tsc.h" #include "intel-pt.h" #include "config.h" +#include "util/synthetic-events.h" #include "time-utils.h" #include "../arch/x86/include/uapi/asm/perf_regs.h" @@ -1704,7 +1705,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) struct intel_pt *pt = ptq->pt; struct evsel *evsel = pt->pebs_evsel; u64 sample_type = evsel->core.attr.sample_type; - u64 id = evsel->id[0]; + u64 id = evsel->core.id[0]; u8 cpumode; if (intel_pt_skip_event(pt)) @@ -2719,7 +2720,7 @@ static void intel_pt_set_event_name(struct evlist *evlist, u64 id, struct evsel *evsel; evlist__for_each_entry(evlist, evsel) { - if (evsel->id && evsel->id[0] == id) { + if (evsel->core.id && evsel->core.id[0] == id) { if (evsel->name) zfree(&evsel->name); evsel->name = strdup(name); @@ -2734,7 +2735,7 @@ static struct evsel *intel_pt_evsel(struct intel_pt *pt, struct evsel *evsel; evlist__for_each_entry(evlist, evsel) { - if (evsel->core.attr.type == pt->pmu_type && evsel->ids) + if (evsel->core.attr.type == pt->pmu_type && evsel->core.ids) return evsel; } @@ -2775,7 +2776,7 @@ static int intel_pt_synth_events(struct intel_pt *pt, attr.sample_id_all = evsel->core.attr.sample_id_all; attr.read_format = evsel->core.attr.read_format; - id = evsel->id[0] + 1000000000; + id = evsel->core.id[0] + 1000000000; if (!id) id = 1; @@ -2902,7 +2903,7 @@ static void intel_pt_setup_pebs_events(struct intel_pt *pt) return; evlist__for_each_entry(pt->session->evlist, evsel) { - if (evsel->core.attr.aux_output && evsel->id) { + if (evsel->core.attr.aux_output && evsel->core.id) { pt->sample_pebs = true; pt->pebs_evsel = evsel; return; diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index b80f29bfc7bb..1bdf4c6ea3e5 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -15,7 +15,6 @@ #include #include "build-id.h" -#include "util.h" #include "event.h" #include "debug.h" #include "evlist.h" @@ -27,7 +26,6 @@ #include "jit.h" #include "jitdump.h" #include "genelf.h" -#include "../builtin.h" #include #include @@ -779,7 +777,7 @@ jit_process(struct perf_session *session, * track sample_type to compute id_all layout * perf sets the same sample type to all events as of now */ - first = perf_evlist__first(session->evlist); + first = evlist__first(session->evlist); jd.sample_type = first->core.attr.sample_type; *nbytes = 0; diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h index 46913637085b..6f0fa05b62b6 100644 --- a/tools/perf/util/kvm-stat.h +++ b/tools/perf/util/kvm-stat.h @@ -2,6 +2,8 @@ #ifndef __PERF_KVM_STAT_H #define __PERF_KVM_STAT_H +#ifdef HAVE_KVM_STAT_SUPPORT + #include "tool.h" #include "stat.h" #include "record.h" @@ -144,5 +146,7 @@ extern const int decode_str_len; extern const char *kvm_exit_reason; extern const char *kvm_entry_trace; extern const char *kvm_exit_trace; +#endif /* HAVE_KVM_STAT_SUPPORT */ +extern int kvm_add_default_arch_event(int *argc, const char **argv); #endif /* __PERF_KVM_STAT_H */ diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c index 66756e6be111..6b4e5a0892f8 100644 --- a/tools/perf/util/libunwind/arm64.c +++ b/tools/perf/util/libunwind/arm64.c @@ -22,7 +22,6 @@ #define LIBUNWIND__ARCH_REG_SP PERF_REG_ARM64_SP #include "unwind.h" -#include "debug.h" #include "libunwind-aarch64.h" #include <../../../../arch/arm64/include/uapi/asm/perf_regs.h> #include "../../arch/arm64/util/unwind-libunwind.c" diff --git a/tools/perf/util/libunwind/x86_32.c b/tools/perf/util/libunwind/x86_32.c index c5e568188e19..21c216c40a3b 100644 --- a/tools/perf/util/libunwind/x86_32.c +++ b/tools/perf/util/libunwind/x86_32.c @@ -22,7 +22,6 @@ #define LIBUNWIND__ARCH_REG_SP PERF_REG_X86_SP #include "unwind.h" -#include "debug.h" #include "libunwind-x86.h" #include <../../../../arch/x86/include/uapi/asm/perf_regs.h> diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 55fb4b3b1157..8d04e3d070b1 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c index 397447066033..39062df02629 100644 --- a/tools/perf/util/lzma.c +++ b/tools/perf/util/lzma.c @@ -7,10 +7,10 @@ #include #include #include "compress.h" -#include "util.h" #include "debug.h" #include #include +#include #define BUFSIZE 8192 diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index b4749d3eed08..70a9f8716a4b 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -32,6 +32,7 @@ #include "linux/hash.h" #include "asm/bug.h" #include "bpf-event.h" +#include // page_size #include #include @@ -2609,21 +2610,6 @@ int machines__for_each_thread(struct machines *machines, return rc; } -int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, - struct target *target, struct perf_thread_map *threads, - perf_event__handler_t process, bool data_mmap, - unsigned int nr_threads_synthesize) -{ - if (target__has_task(target)) - return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap); - else if (target__has_cpu(target)) - return perf_event__synthesize_threads(tool, process, - machine, data_mmap, - nr_threads_synthesize); - /* command specified */ - return 0; -} - pid_t machine__get_current_tid(struct machine *machine, int cpu) { int nr_cpus = min(machine->env->nr_cpus_online, MAX_NR_CPUS); diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index ffd391a925a6..18e13c0ccd6a 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -6,7 +6,6 @@ #include #include "map_groups.h" #include "dsos.h" -#include "event.h" #include "rwsem.h" struct addr_location; @@ -252,20 +251,6 @@ int machines__for_each_thread(struct machines *machines, int (*fn)(struct thread *thread, void *p), void *priv); -int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, - struct target *target, struct perf_thread_map *threads, - perf_event__handler_t process, bool data_mmap, - unsigned int nr_threads_synthesize); -static inline -int machine__synthesize_threads(struct machine *machine, struct target *target, - struct perf_thread_map *threads, bool data_mmap, - unsigned int nr_threads_synthesize) -{ - return __machine__synthesize_threads(machine, NULL, target, threads, - perf_event__process, data_mmap, - nr_threads_synthesize); -} - pid_t machine__get_current_tid(struct machine *machine, int cpu); int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, pid_t tid); diff --git a/tools/perf/util/memswap.h b/tools/perf/util/memswap.h index 1e29ff903ca9..2c38e8c2d548 100644 --- a/tools/perf/util/memswap.h +++ b/tools/perf/util/memswap.h @@ -2,6 +2,13 @@ #ifndef PERF_MEMSWAP_H_ #define PERF_MEMSWAP_H_ +#include + +union u64_swap { + u64 val64; + u32 val32[2]; +}; + void mem_bswap_64(void *src, int byte_size); void mem_bswap_32(void *src, int byte_size); diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 33c5b5495482..a35dc57d5995 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -12,6 +12,7 @@ #include #include #include +#include // sysconf() #ifdef HAVE_LIBNUMA_SUPPORT #include #endif @@ -20,25 +21,25 @@ #include "event.h" #include "mmap.h" #include "../perf.h" -#include "util.h" /* page_size */ +#include /* page_size */ -size_t perf_mmap__mmap_len(struct perf_mmap *map) +size_t perf_mmap__mmap_len(struct mmap *map) { - return map->mask + 1 + page_size; + return map->core.mask + 1 + page_size; } /* When check_messup is true, 'end' must points to a good entry */ -static union perf_event *perf_mmap__read(struct perf_mmap *map, +static union perf_event *perf_mmap__read(struct mmap *map, u64 *startp, u64 end) { - unsigned char *data = map->base + page_size; + unsigned char *data = map->core.base + page_size; union perf_event *event = NULL; int diff = end - *startp; if (diff >= (int)sizeof(event->header)) { size_t size; - event = (union perf_event *)&data[*startp & map->mask]; + event = (union perf_event *)&data[*startp & map->core.mask]; size = event->header.size; if (size < sizeof(event->header) || diff < (int)size) @@ -48,20 +49,20 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map, * Event straddles the mmap boundary -- header should always * be inside due to u64 alignment of output. */ - if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) { + if ((*startp & map->core.mask) + size != ((*startp + size) & map->core.mask)) { unsigned int offset = *startp; unsigned int len = min(sizeof(*event), size), cpy; - void *dst = map->event_copy; + void *dst = map->core.event_copy; do { - cpy = min(map->mask + 1 - (offset & map->mask), len); - memcpy(dst, &data[offset & map->mask], cpy); + cpy = min(map->core.mask + 1 - (offset & map->core.mask), len); + memcpy(dst, &data[offset & map->core.mask], cpy); offset += cpy; dst += cpy; len -= cpy; } while (len); - event = (union perf_event *)map->event_copy; + event = (union perf_event *)map->core.event_copy; } *startp += size; @@ -82,55 +83,55 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map, * } * perf_mmap__read_done() */ -union perf_event *perf_mmap__read_event(struct perf_mmap *map) +union perf_event *perf_mmap__read_event(struct mmap *map) { union perf_event *event; /* * Check if event was unmapped due to a POLLHUP/POLLERR. */ - if (!refcount_read(&map->refcnt)) + if (!refcount_read(&map->core.refcnt)) return NULL; /* non-overwirte doesn't pause the ringbuffer */ - if (!map->overwrite) - map->end = perf_mmap__read_head(map); + if (!map->core.overwrite) + map->core.end = perf_mmap__read_head(map); - event = perf_mmap__read(map, &map->start, map->end); + event = perf_mmap__read(map, &map->core.start, map->core.end); - if (!map->overwrite) - map->prev = map->start; + if (!map->core.overwrite) + map->core.prev = map->core.start; return event; } -static bool perf_mmap__empty(struct perf_mmap *map) +static bool perf_mmap__empty(struct mmap *map) { - return perf_mmap__read_head(map) == map->prev && !map->auxtrace_mmap.base; + return perf_mmap__read_head(map) == map->core.prev && !map->auxtrace_mmap.base; } -void perf_mmap__get(struct perf_mmap *map) +void perf_mmap__get(struct mmap *map) { - refcount_inc(&map->refcnt); + refcount_inc(&map->core.refcnt); } -void perf_mmap__put(struct perf_mmap *map) +void perf_mmap__put(struct mmap *map) { - BUG_ON(map->base && refcount_read(&map->refcnt) == 0); + BUG_ON(map->core.base && refcount_read(&map->core.refcnt) == 0); - if (refcount_dec_and_test(&map->refcnt)) + if (refcount_dec_and_test(&map->core.refcnt)) perf_mmap__munmap(map); } -void perf_mmap__consume(struct perf_mmap *map) +void perf_mmap__consume(struct mmap *map) { - if (!map->overwrite) { - u64 old = map->prev; + if (!map->core.overwrite) { + u64 old = map->core.prev; perf_mmap__write_tail(map, old); } - if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map)) + if (refcount_read(&map->core.refcnt) == 1 && perf_mmap__empty(map)) perf_mmap__put(map); } @@ -161,13 +162,13 @@ void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __mayb } #ifdef HAVE_AIO_SUPPORT -static int perf_mmap__aio_enabled(struct perf_mmap *map) +static int perf_mmap__aio_enabled(struct mmap *map) { return map->aio.nr_cblocks > 0; } #ifdef HAVE_LIBNUMA_SUPPORT -static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx) +static int perf_mmap__aio_alloc(struct mmap *map, int idx) { map->aio.data[idx] = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); @@ -179,7 +180,7 @@ static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx) return 0; } -static void perf_mmap__aio_free(struct perf_mmap *map, int idx) +static void perf_mmap__aio_free(struct mmap *map, int idx) { if (map->aio.data[idx]) { munmap(map->aio.data[idx], perf_mmap__mmap_len(map)); @@ -187,7 +188,7 @@ static void perf_mmap__aio_free(struct perf_mmap *map, int idx) } } -static int perf_mmap__aio_bind(struct perf_mmap *map, int idx, int cpu, int affinity) +static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity) { void *data; size_t mmap_len; @@ -207,7 +208,7 @@ static int perf_mmap__aio_bind(struct perf_mmap *map, int idx, int cpu, int affi return 0; } #else /* !HAVE_LIBNUMA_SUPPORT */ -static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx) +static int perf_mmap__aio_alloc(struct mmap *map, int idx) { map->aio.data[idx] = malloc(perf_mmap__mmap_len(map)); if (map->aio.data[idx] == NULL) @@ -216,19 +217,19 @@ static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx) return 0; } -static void perf_mmap__aio_free(struct perf_mmap *map, int idx) +static void perf_mmap__aio_free(struct mmap *map, int idx) { zfree(&(map->aio.data[idx])); } -static int perf_mmap__aio_bind(struct perf_mmap *map __maybe_unused, int idx __maybe_unused, +static int perf_mmap__aio_bind(struct mmap *map __maybe_unused, int idx __maybe_unused, int cpu __maybe_unused, int affinity __maybe_unused) { return 0; } #endif -static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp) +static int perf_mmap__aio_mmap(struct mmap *map, struct mmap_params *mp) { int delta_max, i, prio, ret; @@ -256,7 +257,7 @@ static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp) pr_debug2("failed to allocate data buffer area, error %m"); return -1; } - ret = perf_mmap__aio_bind(map, i, map->cpu, mp->affinity); + ret = perf_mmap__aio_bind(map, i, map->core.cpu, mp->affinity); if (ret == -1) return -1; /* @@ -282,7 +283,7 @@ static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp) return 0; } -static void perf_mmap__aio_munmap(struct perf_mmap *map) +static void perf_mmap__aio_munmap(struct mmap *map) { int i; @@ -294,34 +295,34 @@ static void perf_mmap__aio_munmap(struct perf_mmap *map) zfree(&map->aio.aiocb); } #else /* !HAVE_AIO_SUPPORT */ -static int perf_mmap__aio_enabled(struct perf_mmap *map __maybe_unused) +static int perf_mmap__aio_enabled(struct mmap *map __maybe_unused) { return 0; } -static int perf_mmap__aio_mmap(struct perf_mmap *map __maybe_unused, +static int perf_mmap__aio_mmap(struct mmap *map __maybe_unused, struct mmap_params *mp __maybe_unused) { return 0; } -static void perf_mmap__aio_munmap(struct perf_mmap *map __maybe_unused) +static void perf_mmap__aio_munmap(struct mmap *map __maybe_unused) { } #endif -void perf_mmap__munmap(struct perf_mmap *map) +void perf_mmap__munmap(struct mmap *map) { perf_mmap__aio_munmap(map); if (map->data != NULL) { munmap(map->data, perf_mmap__mmap_len(map)); map->data = NULL; } - if (map->base != NULL) { - munmap(map->base, perf_mmap__mmap_len(map)); - map->base = NULL; - map->fd = -1; - refcount_set(&map->refcnt, 0); + if (map->core.base != NULL) { + munmap(map->core.base, perf_mmap__mmap_len(map)); + map->core.base = NULL; + map->core.fd = -1; + refcount_set(&map->core.refcnt, 0); } auxtrace_mmap__munmap(&map->auxtrace_mmap); } @@ -343,16 +344,16 @@ static void build_node_mask(int node, cpu_set_t *mask) } } -static void perf_mmap__setup_affinity_mask(struct perf_mmap *map, struct mmap_params *mp) +static void perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp) { CPU_ZERO(&map->affinity_mask); if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1) - build_node_mask(cpu__get_node(map->cpu), &map->affinity_mask); + build_node_mask(cpu__get_node(map->core.cpu), &map->affinity_mask); else if (mp->affinity == PERF_AFFINITY_CPU) - CPU_SET(map->cpu, &map->affinity_mask); + CPU_SET(map->core.cpu, &map->affinity_mask); } -int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int cpu) +int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) { /* * The last one will be done at perf_mmap__consume(), so that we @@ -367,23 +368,23 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int c * evlist layer can't just drop it when filtering events in * perf_evlist__filter_pollfd(). */ - refcount_set(&map->refcnt, 2); - map->prev = 0; - map->mask = mp->mask; - map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, + refcount_set(&map->core.refcnt, 2); + map->core.prev = 0; + map->core.mask = mp->mask; + map->core.base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, MAP_SHARED, fd, 0); - if (map->base == MAP_FAILED) { + if (map->core.base == MAP_FAILED) { pr_debug2("failed to mmap perf event ring buffer, error %d\n", errno); - map->base = NULL; + map->core.base = NULL; return -1; } - map->fd = fd; - map->cpu = cpu; + map->core.fd = fd; + map->core.cpu = cpu; perf_mmap__setup_affinity_mask(map, mp); - map->flush = mp->flush; + map->core.flush = mp->flush; map->comp_level = mp->comp_level; @@ -399,7 +400,7 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int c } if (auxtrace_mmap__mmap(&map->auxtrace_mmap, - &mp->auxtrace_mp, map->base, fd)) + &mp->auxtrace_mp, map->core.base, fd)) return -1; return perf_mmap__aio_mmap(map, mp); @@ -440,25 +441,25 @@ static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end) /* * Report the start and end of the available data in ringbuffer */ -static int __perf_mmap__read_init(struct perf_mmap *md) +static int __perf_mmap__read_init(struct mmap *md) { u64 head = perf_mmap__read_head(md); - u64 old = md->prev; - unsigned char *data = md->base + page_size; + u64 old = md->core.prev; + unsigned char *data = md->core.base + page_size; unsigned long size; - md->start = md->overwrite ? head : old; - md->end = md->overwrite ? old : head; + md->core.start = md->core.overwrite ? head : old; + md->core.end = md->core.overwrite ? old : head; - if ((md->end - md->start) < md->flush) + if ((md->core.end - md->core.start) < md->core.flush) return -EAGAIN; - size = md->end - md->start; - if (size > (unsigned long)(md->mask) + 1) { - if (!md->overwrite) { + size = md->core.end - md->core.start; + if (size > (unsigned long)(md->core.mask) + 1) { + if (!md->core.overwrite) { WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); - md->prev = head; + md->core.prev = head; perf_mmap__consume(md); return -EAGAIN; } @@ -467,29 +468,29 @@ static int __perf_mmap__read_init(struct perf_mmap *md) * Backward ring buffer is full. We still have a chance to read * most of data from it. */ - if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end)) + if (overwrite_rb_find_range(data, md->core.mask, &md->core.start, &md->core.end)) return -EINVAL; } return 0; } -int perf_mmap__read_init(struct perf_mmap *map) +int perf_mmap__read_init(struct mmap *map) { /* * Check if event was unmapped due to a POLLHUP/POLLERR. */ - if (!refcount_read(&map->refcnt)) + if (!refcount_read(&map->core.refcnt)) return -ENOENT; return __perf_mmap__read_init(map); } -int perf_mmap__push(struct perf_mmap *md, void *to, - int push(struct perf_mmap *map, void *to, void *buf, size_t size)) +int perf_mmap__push(struct mmap *md, void *to, + int push(struct mmap *map, void *to, void *buf, size_t size)) { u64 head = perf_mmap__read_head(md); - unsigned char *data = md->base + page_size; + unsigned char *data = md->core.base + page_size; unsigned long size; void *buf; int rc = 0; @@ -498,12 +499,12 @@ int perf_mmap__push(struct perf_mmap *md, void *to, if (rc < 0) return (rc == -EAGAIN) ? 1 : -1; - size = md->end - md->start; + size = md->core.end - md->core.start; - if ((md->start & md->mask) + size != (md->end & md->mask)) { - buf = &data[md->start & md->mask]; - size = md->mask + 1 - (md->start & md->mask); - md->start += size; + if ((md->core.start & md->core.mask) + size != (md->core.end & md->core.mask)) { + buf = &data[md->core.start & md->core.mask]; + size = md->core.mask + 1 - (md->core.start & md->core.mask); + md->core.start += size; if (push(md, to, buf, size) < 0) { rc = -1; @@ -511,16 +512,16 @@ int perf_mmap__push(struct perf_mmap *md, void *to, } } - buf = &data[md->start & md->mask]; - size = md->end - md->start; - md->start += size; + buf = &data[md->core.start & md->core.mask]; + size = md->core.end - md->core.start; + md->core.start += size; if (push(md, to, buf, size) < 0) { rc = -1; goto out; } - md->prev = head; + md->core.prev = head; perf_mmap__consume(md); out: return rc; @@ -529,16 +530,16 @@ out: /* * Mandatory for overwrite mode * The direction of overwrite mode is backward. - * The last perf_mmap__read() will set tail to map->prev. - * Need to correct the map->prev to head which is the end of next read. + * The last perf_mmap__read() will set tail to map->core.prev. + * Need to correct the map->core.prev to head which is the end of next read. */ -void perf_mmap__read_done(struct perf_mmap *map) +void perf_mmap__read_done(struct mmap *map) { /* * Check if event was unmapped due to a POLLHUP/POLLERR. */ - if (!refcount_read(&map->refcnt)) + if (!refcount_read(&map->core.refcnt)) return; - map->prev = perf_mmap__read_head(map); + map->core.prev = perf_mmap__read_head(map); } diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 3857a49e8f96..e567c1c875bd 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -1,6 +1,7 @@ #ifndef __PERF_MMAP_H #define __PERF_MMAP_H 1 +#include #include #include #include @@ -15,22 +16,13 @@ struct aiocb; /** - * struct perf_mmap - perf's ring buffer mmap details + * struct mmap - perf's ring buffer mmap details * * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this */ -struct perf_mmap { - void *base; - int mask; - int fd; - int cpu; - refcount_t refcnt; - u64 prev; - u64 start; - u64 end; - bool overwrite; +struct mmap { + struct perf_mmap core; struct auxtrace_mmap auxtrace_mmap; - char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); #ifdef HAVE_AIO_SUPPORT struct { void **data; @@ -40,71 +32,42 @@ struct perf_mmap { } aio; #endif cpu_set_t affinity_mask; - u64 flush; void *data; int comp_level; }; -/* - * State machine of bkw_mmap_state: - * - * .________________(forbid)_____________. - * | V - * NOTREADY --(0)--> RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY - * ^ ^ | ^ | - * | |__(forbid)____/ |___(forbid)___/| - * | | - * \_________________(3)_______________/ - * - * NOTREADY : Backward ring buffers are not ready - * RUNNING : Backward ring buffers are recording - * DATA_PENDING : We are required to collect data from backward ring buffers - * EMPTY : We have collected data from backward ring buffers. - * - * (0): Setup backward ring buffer - * (1): Pause ring buffers for reading - * (2): Read from ring buffers - * (3): Resume ring buffers for recording - */ -enum bkw_mmap_state { - BKW_MMAP_NOTREADY, - BKW_MMAP_RUNNING, - BKW_MMAP_DATA_PENDING, - BKW_MMAP_EMPTY, -}; - struct mmap_params { int prot, mask, nr_cblocks, affinity, flush, comp_level; struct auxtrace_mmap_params auxtrace_mp; }; -int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int cpu); -void perf_mmap__munmap(struct perf_mmap *map); +int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu); +void perf_mmap__munmap(struct mmap *map); -void perf_mmap__get(struct perf_mmap *map); -void perf_mmap__put(struct perf_mmap *map); +void perf_mmap__get(struct mmap *map); +void perf_mmap__put(struct mmap *map); -void perf_mmap__consume(struct perf_mmap *map); +void perf_mmap__consume(struct mmap *map); -static inline u64 perf_mmap__read_head(struct perf_mmap *mm) +static inline u64 perf_mmap__read_head(struct mmap *mm) { - return ring_buffer_read_head(mm->base); + return ring_buffer_read_head(mm->core.base); } -static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) +static inline void perf_mmap__write_tail(struct mmap *md, u64 tail) { - ring_buffer_write_tail(md->base, tail); + ring_buffer_write_tail(md->core.base, tail); } -union perf_event *perf_mmap__read_forward(struct perf_mmap *map); +union perf_event *perf_mmap__read_forward(struct mmap *map); -union perf_event *perf_mmap__read_event(struct perf_mmap *map); +union perf_event *perf_mmap__read_event(struct mmap *map); -int perf_mmap__push(struct perf_mmap *md, void *to, - int push(struct perf_mmap *map, void *to, void *buf, size_t size)); +int perf_mmap__push(struct mmap *md, void *to, + int push(struct mmap *map, void *to, void *buf, size_t size)); -size_t perf_mmap__mmap_len(struct perf_mmap *map); +size_t perf_mmap__mmap_len(struct mmap *map); -int perf_mmap__read_init(struct perf_mmap *md); -void perf_mmap__read_done(struct perf_mmap *map); +int perf_mmap__read_init(struct mmap *md); +void perf_mmap__read_done(struct mmap *map); #endif /*__PERF_MMAP_H */ diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c index 99be15dd2b6b..285d6f30d912 100644 --- a/tools/perf/util/namespaces.c +++ b/tools/perf/util/namespaces.c @@ -17,8 +17,26 @@ #include #include #include +#include #include +static const char *perf_ns__names[] = { + [NET_NS_INDEX] = "net", + [UTS_NS_INDEX] = "uts", + [IPC_NS_INDEX] = "ipc", + [PID_NS_INDEX] = "pid", + [USER_NS_INDEX] = "user", + [MNT_NS_INDEX] = "mnt", + [CGROUP_NS_INDEX] = "cgroup", +}; + +const char *perf_ns__name(unsigned int id) +{ + if (id >= ARRAY_SIZE(perf_ns__names)) + return "UNKNOWN"; + return perf_ns__names[id]; +} + struct namespaces *namespaces__new(struct perf_record_namespaces *event) { struct namespaces *namespaces; diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h index 40edef56cb52..4b33f684eddd 100644 --- a/tools/perf/util/namespaces.h +++ b/tools/perf/util/namespaces.h @@ -66,4 +66,6 @@ static inline void __nsinfo__zput(struct nsinfo **nsip) #define nsinfo__zput(nsi) __nsinfo__zput(&nsi) +const char *perf_ns__name(unsigned int id); + #endif /* __PERF_NAMESPACES_H */ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 5ec21d21113c..b5e2adef49de 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -30,11 +30,12 @@ #include "parse-events-flex.h" #include "pmu.h" #include "thread_map.h" -#include "cpumap.h" #include "probe-file.h" #include "asm/bug.h" #include "util/parse-branch-options.h" #include "metricgroup.h" +#include "util/evsel_config.h" +#include "util/event.h" #define MAX_NAME_LEN 100 @@ -335,7 +336,7 @@ __add_event(struct list_head *list, int *idx, (*idx)++; evsel->core.cpus = perf_cpu_map__get(cpus); evsel->core.own_cpus = perf_cpu_map__get(cpus); - evsel->system_wide = pmu ? pmu->is_uncore : false; + evsel->core.system_wide = pmu ? pmu->is_uncore : false; evsel->auto_merge_stats = auto_merge_stats; if (name) @@ -1936,7 +1937,7 @@ int parse_events(struct evlist *evlist, const char *str, perf_evlist__splice_list_tail(evlist, &parse_state.list); evlist->nr_groups += parse_state.nr_groups; - last = perf_evlist__last(evlist); + last = evlist__last(evlist); last->cmdline_group_boundary = true; return 0; @@ -2050,7 +2051,7 @@ foreach_evsel_in_last_glob(struct evlist *evlist, * So no need to WARN here, let *func do this. */ if (evlist->core.nr_entries > 0) - last = perf_evlist__last(evlist); + last = evlist__last(evlist); do { err = (*func)(last, arg); diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index f1c36ed1cf36..48126ae4cd13 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -9,13 +9,11 @@ #define YYDEBUG 1 #include +#include #include -#include #include -#include "util.h" #include "pmu.h" #include "evsel.h" -#include "debug.h" #include "parse-events.h" #include "parse-events-bison.h" diff --git a/tools/perf/util/perf-hooks.c b/tools/perf/util/perf-hooks.c index e635c594f773..7a0ab3507bd5 100644 --- a/tools/perf/util/perf-hooks.c +++ b/tools/perf/util/perf-hooks.c @@ -12,7 +12,6 @@ #include #include #include -#include "util/util.h" #include "util/debug.h" #include "util/perf-hooks.h" diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c new file mode 100644 index 000000000000..d4ad3f04923a --- /dev/null +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include "util/evsel_fprintf.h" + +struct bit_names { + int bit; + const char *name; +}; + +static void __p_bits(char *buf, size_t size, u64 value, struct bit_names *bits) +{ + bool first_bit = true; + int i = 0; + + do { + if (value & bits[i].bit) { + buf += scnprintf(buf, size, "%s%s", first_bit ? "" : "|", bits[i].name); + first_bit = false; + } + } while (bits[++i].name != NULL); +} + +static void __p_sample_type(char *buf, size_t size, u64 value) +{ +#define bit_name(n) { PERF_SAMPLE_##n, #n } + struct bit_names bits[] = { + bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR), + bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU), + bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW), + bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), + bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC), + bit_name(WEIGHT), bit_name(PHYS_ADDR), + { .name = NULL, } + }; +#undef bit_name + __p_bits(buf, size, value, bits); +} + +static void __p_branch_sample_type(char *buf, size_t size, u64 value) +{ +#define bit_name(n) { PERF_SAMPLE_BRANCH_##n, #n } + struct bit_names bits[] = { + bit_name(USER), bit_name(KERNEL), bit_name(HV), bit_name(ANY), + bit_name(ANY_CALL), bit_name(ANY_RETURN), bit_name(IND_CALL), + bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX), + bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP), + bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES), + { .name = NULL, } + }; +#undef bit_name + __p_bits(buf, size, value, bits); +} + +static void __p_read_format(char *buf, size_t size, u64 value) +{ +#define bit_name(n) { PERF_FORMAT_##n, #n } + struct bit_names bits[] = { + bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING), + bit_name(ID), bit_name(GROUP), + { .name = NULL, } + }; +#undef bit_name + __p_bits(buf, size, value, bits); +} + +#define BUF_SIZE 1024 + +#define p_hex(val) snprintf(buf, BUF_SIZE, "%#"PRIx64, (uint64_t)(val)) +#define p_unsigned(val) snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val)) +#define p_signed(val) snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val)) +#define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val) +#define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val) +#define p_read_format(val) __p_read_format(buf, BUF_SIZE, val) + +#define PRINT_ATTRn(_n, _f, _p) \ +do { \ + if (attr->_f) { \ + _p(attr->_f); \ + ret += attr__fprintf(fp, _n, buf, priv);\ + } \ +} while (0) + +#define PRINT_ATTRf(_f, _p) PRINT_ATTRn(#_f, _f, _p) + +int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, + attr__fprintf_f attr__fprintf, void *priv) +{ + char buf[BUF_SIZE]; + int ret = 0; + + PRINT_ATTRf(type, p_unsigned); + PRINT_ATTRf(size, p_unsigned); + PRINT_ATTRf(config, p_hex); + PRINT_ATTRn("{ sample_period, sample_freq }", sample_period, p_unsigned); + PRINT_ATTRf(sample_type, p_sample_type); + PRINT_ATTRf(read_format, p_read_format); + + PRINT_ATTRf(disabled, p_unsigned); + PRINT_ATTRf(inherit, p_unsigned); + PRINT_ATTRf(pinned, p_unsigned); + PRINT_ATTRf(exclusive, p_unsigned); + PRINT_ATTRf(exclude_user, p_unsigned); + PRINT_ATTRf(exclude_kernel, p_unsigned); + PRINT_ATTRf(exclude_hv, p_unsigned); + PRINT_ATTRf(exclude_idle, p_unsigned); + PRINT_ATTRf(mmap, p_unsigned); + PRINT_ATTRf(comm, p_unsigned); + PRINT_ATTRf(freq, p_unsigned); + PRINT_ATTRf(inherit_stat, p_unsigned); + PRINT_ATTRf(enable_on_exec, p_unsigned); + PRINT_ATTRf(task, p_unsigned); + PRINT_ATTRf(watermark, p_unsigned); + PRINT_ATTRf(precise_ip, p_unsigned); + PRINT_ATTRf(mmap_data, p_unsigned); + PRINT_ATTRf(sample_id_all, p_unsigned); + PRINT_ATTRf(exclude_host, p_unsigned); + PRINT_ATTRf(exclude_guest, p_unsigned); + PRINT_ATTRf(exclude_callchain_kernel, p_unsigned); + PRINT_ATTRf(exclude_callchain_user, p_unsigned); + PRINT_ATTRf(mmap2, p_unsigned); + PRINT_ATTRf(comm_exec, p_unsigned); + PRINT_ATTRf(use_clockid, p_unsigned); + PRINT_ATTRf(context_switch, p_unsigned); + PRINT_ATTRf(write_backward, p_unsigned); + PRINT_ATTRf(namespaces, p_unsigned); + PRINT_ATTRf(ksymbol, p_unsigned); + PRINT_ATTRf(bpf_event, p_unsigned); + PRINT_ATTRf(aux_output, p_unsigned); + + PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); + PRINT_ATTRf(bp_type, p_unsigned); + PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex); + PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex); + PRINT_ATTRf(branch_sample_type, p_branch_sample_type); + PRINT_ATTRf(sample_regs_user, p_hex); + PRINT_ATTRf(sample_stack_user, p_unsigned); + PRINT_ATTRf(clockid, p_signed); + PRINT_ATTRf(sample_regs_intr, p_hex); + PRINT_ATTRf(aux_watermark, p_unsigned); + PRINT_ATTRf(sample_max_stack, p_unsigned); + + return ret; +} diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index fb597fa94234..5608da82ad23 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -20,7 +20,6 @@ #include "debug.h" #include "pmu.h" #include "parse-events.h" -#include "cpumap.h" #include "header.h" #include "pmu-events/pmu-events.h" #include "string2.h" diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index b8e0967c5c21..91cab5f669d2 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2331,6 +2331,7 @@ void clear_probe_trace_event(struct probe_trace_event *tev) } } zfree(&tev->args); + tev->nargs = 0; } struct kprobe_blacklist_node { diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index d13db55a2feb..b659466ea498 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -16,6 +16,7 @@ #include "strlist.h" #include "strfilter.h" #include "debug.h" +#include "build-id.h" #include "dso.h" #include "color.h" #include "symbol.h" diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 505905fc21c5..cd9f95e5044e 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1245,6 +1245,17 @@ static int expand_probe_args(Dwarf_Die *sc_die, struct probe_finder *pf, return n; } +static bool trace_event_finder_overlap(struct trace_event_finder *tf) +{ + int i; + + for (i = 0; i < tf->ntevs; i++) { + if (tf->pf.addr == tf->tevs[i].point.address) + return true; + } + return false; +} + /* Add a found probe point into trace event list */ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf) { @@ -1255,6 +1266,14 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf) struct perf_probe_arg *args = NULL; int ret, i; + /* + * For some reason (e.g. different column assigned to same address) + * This callback can be called with the address which already passed. + * Ignore it first. + */ + if (trace_event_finder_overlap(tf)) + return 0; + /* Check number of tevs */ if (tf->ntevs == tf->max_tevs) { pr_warning("Too many( > %d) probe point found.\n", diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index c6dd478956f1..9af183860fbd 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -10,6 +10,7 @@ util/python.c util/cap.c util/evlist.c util/evsel.c +util/perf_event_attr_fprintf.c util/cpumap.c util/memswap.c util/mmap.c diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 07ca4535e6f7..53f31053a27a 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -6,17 +6,15 @@ #include #include #include -#include "debug.h" #include "evlist.h" #include "callchain.h" #include "evsel.h" #include "event.h" -#include "cpumap.h" #include "print_binary.h" #include "thread_map.h" #include "trace-event.h" #include "mmap.h" -#include "util.h" +#include #include "../perf-sys.h" #if PY_MAJOR_VERSION < 3 @@ -61,6 +59,8 @@ int parse_callchain_record(const char *arg __maybe_unused, */ int verbose; +int eprintf(int level, int var, const char *fmt, ...); + int eprintf(int level, int var, const char *fmt, ...) { va_list args; @@ -884,7 +884,7 @@ static int pyrf_evlist__init(struct pyrf_evlist *pevlist, static void pyrf_evlist__delete(struct pyrf_evlist *pevlist) { - perf_evlist__exit(&pevlist->evlist); + evlist__exit(&pevlist->evlist); Py_TYPE(pevlist)->tp_free((PyObject*)pevlist); } @@ -899,7 +899,7 @@ static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist, &pages, &overwrite)) return NULL; - if (perf_evlist__mmap(evlist, pages) < 0) { + if (evlist__mmap(evlist, pages) < 0) { PyErr_SetFromErrno(PyExc_OSError); return NULL; } @@ -918,7 +918,7 @@ static PyObject *pyrf_evlist__poll(struct pyrf_evlist *pevlist, if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &timeout)) return NULL; - n = perf_evlist__poll(evlist, timeout); + n = evlist__poll(evlist, timeout); if (n < 0) { PyErr_SetFromErrno(PyExc_OSError); return NULL; @@ -935,17 +935,17 @@ static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist, PyObject *list = PyList_New(0); int i; - for (i = 0; i < evlist->pollfd.nr; ++i) { + for (i = 0; i < evlist->core.pollfd.nr; ++i) { PyObject *file; #if PY_MAJOR_VERSION < 3 - FILE *fp = fdopen(evlist->pollfd.entries[i].fd, "r"); + FILE *fp = fdopen(evlist->core.pollfd.entries[i].fd, "r"); if (fp == NULL) goto free_list; file = PyFile_FromFile(fp, "perf", "r", NULL); #else - file = PyFile_FromFd(evlist->pollfd.entries[i].fd, "perf", "r", -1, + file = PyFile_FromFd(evlist->core.pollfd.entries[i].fd, "perf", "r", -1, NULL, NULL, NULL, 0); #endif if (file == NULL) @@ -984,14 +984,14 @@ static PyObject *pyrf_evlist__add(struct pyrf_evlist *pevlist, return Py_BuildValue("i", evlist->core.nr_entries); } -static struct perf_mmap *get_md(struct evlist *evlist, int cpu) +static struct mmap *get_md(struct evlist *evlist, int cpu) { int i; - for (i = 0; i < evlist->nr_mmaps; i++) { - struct perf_mmap *md = &evlist->mmap[i]; + for (i = 0; i < evlist->core.nr_mmaps; i++) { + struct mmap *md = &evlist->mmap[i]; - if (md->cpu == cpu) + if (md->core.cpu == cpu) return md; } @@ -1005,7 +1005,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, union perf_event *event; int sample_id_all = 1, cpu; static char *kwlist[] = { "cpu", "sample_id_all", NULL }; - struct perf_mmap *md; + struct mmap *md; int err; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist, diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 286fe816c0f3..8579505c29a4 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -2,7 +2,6 @@ #include "debug.h" #include "evlist.h" #include "evsel.h" -#include "cpumap.h" #include "parse-events.h" #include #include @@ -10,7 +9,6 @@ #include #include #include -#include "util.h" #include "cloexec.h" #include "record.h" #include "../perf-sys.h" @@ -32,7 +30,7 @@ static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str) if (parse_events(evlist, str, NULL)) goto out_delete; - evsel = perf_evlist__first(evlist); + evsel = evlist__first(evlist); while (1) { fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); @@ -173,7 +171,7 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, use_sample_identifier = perf_can_sample_identifier(); sample_id = true; } else if (evlist->core.nr_entries > 1) { - struct evsel *first = perf_evlist__first(evlist); + struct evsel *first = evlist__first(evlist); evlist__for_each_entry(evlist, evsel) { if (evsel->core.attr.sample_type == first->core.attr.sample_type) @@ -278,7 +276,7 @@ bool perf_evlist__can_select_event(struct evlist *evlist, const char *str) if (err) goto out_delete; - evsel = perf_evlist__last(temp_evlist); + evsel = evlist__last(temp_evlist); if (!evlist || perf_cpu_map__empty(evlist->core.cpus)) { struct perf_cpu_map *cpus = perf_cpu_map__new(NULL); diff --git a/tools/perf/util/rwsem.c b/tools/perf/util/rwsem.c index 5e52e7baa7b6..f3d29d8ddc99 100644 --- a/tools/perf/util/rwsem.c +++ b/tools/perf/util/rwsem.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "util.h" #include "rwsem.h" diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index 24a99909d8b3..6785cd87aa4d 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -151,7 +151,6 @@ #include #include -#include "cpumap.h" #include "color.h" #include "evsel.h" #include "evlist.h" diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index 4d9593e331ea..05b43ab4eeef 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -22,7 +22,6 @@ #include #include "debug.h" -#include "util.h" #include "session.h" #include "evlist.h" #include "color.h" diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 666a56e88d8e..5d341efc3237 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -37,7 +37,6 @@ #include "../dso.h" #include "../callchain.h" #include "../evsel.h" -#include "../util.h" #include "../event.h" #include "../thread.h" #include "../comm.h" @@ -49,7 +48,6 @@ #include "map.h" #include "symbol.h" #include "thread_map.h" -#include "cpumap.h" #include "print_binary.h" #include "stat.h" #include "mem-events.h" diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index e9e4a04f15db..061bb4d6a3f5 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -22,7 +22,6 @@ #include "symbol.h" #include "session.h" #include "tool.h" -#include "cpumap.h" #include "perf_regs.h" #include "asm/bug.h" #include "auxtrace.h" @@ -30,10 +29,11 @@ #include "thread-stack.h" #include "sample-raw.h" #include "stat.h" -#include "util.h" #include "ui/progress.h" #include "../perf.h" #include "arch/common.h" +#include +#include #ifdef HAVE_ZSTD_SUPPORT static int perf_session__process_compressed_event(struct perf_session *session, @@ -187,6 +187,7 @@ static int ordered_events__deliver_event(struct ordered_events *oe, struct perf_session *perf_session__new(struct perf_data *data, bool repipe, struct perf_tool *tool) { + int ret = -ENOMEM; struct perf_session *session = zalloc(sizeof(*session)); if (!session) @@ -201,13 +202,15 @@ struct perf_session *perf_session__new(struct perf_data *data, perf_env__init(&session->header.env); if (data) { - if (perf_data__open(data)) + ret = perf_data__open(data); + if (ret < 0) goto out_delete; session->data = data; if (perf_data__is_read(data)) { - if (perf_session__open(session) < 0) + ret = perf_session__open(session); + if (ret < 0) goto out_delete; /* @@ -222,8 +225,11 @@ struct perf_session *perf_session__new(struct perf_data *data, perf_evlist__init_trace_event_sample_raw(session->evlist); /* Open the directory data. */ - if (data->is_dir && perf_data__open_dir(data)) + if (data->is_dir) { + ret = perf_data__open_dir(data); + if (ret) goto out_delete; + } } } else { session->machines.host.env = &perf_env; @@ -256,7 +262,7 @@ struct perf_session *perf_session__new(struct perf_data *data, out_delete: perf_session__delete(session); out: - return NULL; + return ERR_PTR(ret); } static void perf_session__delete_threads(struct perf_session *session) @@ -1317,6 +1323,7 @@ static int deliver_sample_value(struct evlist *evlist, struct machine *machine) { struct perf_sample_id *sid = perf_evlist__id2sid(evlist, v->id); + struct evsel *evsel; if (sid) { sample->id = v->id; @@ -1336,7 +1343,8 @@ static int deliver_sample_value(struct evlist *evlist, if (!sample->period) return 0; - return tool->sample(tool, event, sample, sid->evsel, machine); + evsel = container_of(sid->evsel, struct evsel, core); + return tool->sample(tool, event, sample, evsel, machine); } static int deliver_sample_group(struct evlist *evlist, @@ -2412,73 +2420,3 @@ int perf_event__process_id_index(struct perf_session *session, } return 0; } - -int perf_event__synthesize_id_index(struct perf_tool *tool, - perf_event__handler_t process, - struct evlist *evlist, - struct machine *machine) -{ - union perf_event *ev; - struct evsel *evsel; - size_t nr = 0, i = 0, sz, max_nr, n; - int err; - - pr_debug2("Synthesizing id index\n"); - - max_nr = (UINT16_MAX - sizeof(struct perf_record_id_index)) / - sizeof(struct id_index_entry); - - evlist__for_each_entry(evlist, evsel) - nr += evsel->ids; - - n = nr > max_nr ? max_nr : nr; - sz = sizeof(struct perf_record_id_index) + n * sizeof(struct id_index_entry); - ev = zalloc(sz); - if (!ev) - return -ENOMEM; - - ev->id_index.header.type = PERF_RECORD_ID_INDEX; - ev->id_index.header.size = sz; - ev->id_index.nr = n; - - evlist__for_each_entry(evlist, evsel) { - u32 j; - - for (j = 0; j < evsel->ids; j++) { - struct id_index_entry *e; - struct perf_sample_id *sid; - - if (i >= n) { - err = process(tool, ev, NULL, machine); - if (err) - goto out_err; - nr -= n; - i = 0; - } - - e = &ev->id_index.entries[i++]; - - e->id = evsel->id[j]; - - sid = perf_evlist__id2sid(evlist, e->id); - if (!sid) { - free(ev); - return -ENOENT; - } - - e->idx = sid->idx; - e->cpu = sid->cpu; - e->tid = sid->tid; - } - } - - sz = sizeof(struct perf_record_id_index) + nr * sizeof(struct id_index_entry); - ev->id_index.header.size = sz; - ev->id_index.nr = nr; - - err = process(tool, ev, NULL, machine); -out_err: - free(ev); - - return err; -} diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index b7aa076ab6fd..b4c9428c18f0 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -138,9 +138,4 @@ int perf_session__deliver_synth_event(struct perf_session *session, int perf_event__process_id_index(struct perf_session *session, union perf_event *event); -int perf_event__synthesize_id_index(struct perf_tool *tool, - perf_event__handler_t process, - struct evlist *evlist, - struct machine *machine); - #endif /* __PERF_SESSION_H */ diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index a2308eb77681..43d1d410854a 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2329,7 +2329,7 @@ static struct evsel *find_evsel(struct evlist *evlist, char *event_name) if (nr > evlist->core.nr_entries) return NULL; - evsel = perf_evlist__first(evlist); + evsel = evlist__first(evlist); while (--nr > 0) evsel = perf_evsel__next(evsel); diff --git a/tools/perf/util/srccode.c b/tools/perf/util/srccode.c index adfcf1ff464c..d84ed8b6caaa 100644 --- a/tools/perf/util/srccode.c +++ b/tools/perf/util/srccode.c @@ -15,7 +15,7 @@ #include #include "srccode.h" #include "debug.h" -#include "util.h" +#include // page_size #define MAXSRCCACHE (32*1024*1024) #define MAXSRCFILES 64 diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 70c87fdb2a43..2c41d47f6f83 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -738,6 +738,8 @@ static void generic_metric(struct perf_stat_config *config, char *n, *pn; expr__ctx_init(&pctx); + /* Must be first id entry */ + expr__add_id(&pctx, name, avg); for (i = 0; metric_events[i]; i++) { struct saved_value *v; struct stats *stats; @@ -776,8 +778,6 @@ static void generic_metric(struct perf_stat_config *config, expr__add_id(&pctx, n, avg_stats(stats)*scale); } - expr__add_id(&pctx, name, avg); - if (!metric_events[i]) { const char *p = metric_expr; diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 8f1ea27f976f..ebdd130557fb 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -4,6 +4,7 @@ #include #include #include "counts.h" +#include "cpumap.h" #include "debug.h" #include "header.h" #include "stat.h" @@ -161,6 +162,15 @@ static void perf_evsel__free_prev_raw_counts(struct evsel *evsel) evsel->prev_raw_counts = NULL; } +static void perf_evsel__reset_prev_raw_counts(struct evsel *evsel) +{ + if (evsel->prev_raw_counts) { + evsel->prev_raw_counts->aggr.val = 0; + evsel->prev_raw_counts->aggr.ena = 0; + evsel->prev_raw_counts->aggr.run = 0; + } +} + static int perf_evsel__alloc_stats(struct evsel *evsel, bool alloc_raw) { int ncpus = perf_evsel__nr_cpus(evsel); @@ -211,6 +221,14 @@ void perf_evlist__reset_stats(struct evlist *evlist) } } +void perf_evlist__reset_prev_raw_counts(struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) + perf_evsel__reset_prev_raw_counts(evsel); +} + static void zero_per_pkg(struct evsel *counter) { if (counter->per_pkg_mask) @@ -318,7 +336,7 @@ static int process_counter_maps(struct perf_stat_config *config, int ncpus = perf_evsel__nr_cpus(counter); int cpu, thread; - if (counter->system_wide) + if (counter->core.system_wide) nthreads = 1; for (thread = 0; thread < nthreads; thread++) { @@ -493,45 +511,3 @@ int create_perf_stat_counter(struct evsel *evsel, return perf_evsel__open_per_thread(evsel, evsel->core.threads); } - -int perf_stat_synthesize_config(struct perf_stat_config *config, - struct perf_tool *tool, - struct evlist *evlist, - perf_event__handler_t process, - bool attrs) -{ - int err; - - if (attrs) { - err = perf_event__synthesize_attrs(tool, evlist, process); - if (err < 0) { - pr_err("Couldn't synthesize attrs.\n"); - return err; - } - } - - err = perf_event__synthesize_extra_attr(tool, evlist, process, - attrs); - - err = perf_event__synthesize_thread_map2(tool, evlist->core.threads, - process, NULL); - if (err < 0) { - pr_err("Couldn't synthesize thread map.\n"); - return err; - } - - err = perf_event__synthesize_cpu_map(tool, evlist->core.cpus, - process, NULL); - if (err < 0) { - pr_err("Couldn't synthesize thread map.\n"); - return err; - } - - err = perf_event__synthesize_stat_config(tool, config, process, NULL); - if (err < 0) { - pr_err("Couldn't synthesize config.\n"); - return err; - } - - return 0; -} diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 14fe3e548229..edbeb2f63e8d 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -7,8 +7,9 @@ #include #include #include "rblist.h" -#include "event.h" +struct perf_cpu_map; +struct perf_stat_config; struct timespec; struct stats { @@ -192,6 +193,7 @@ void perf_stat__collect_metric_expr(struct evlist *); int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw); void perf_evlist__free_stats(struct evlist *evlist); void perf_evlist__reset_stats(struct evlist *evlist); +void perf_evlist__reset_prev_raw_counts(struct evlist *evlist); int perf_stat_process_counter(struct perf_stat_config *config, struct evsel *counter); @@ -210,11 +212,6 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp); int create_perf_stat_counter(struct evsel *evsel, struct perf_stat_config *config, struct target *target); -int perf_stat_synthesize_config(struct perf_stat_config *config, - struct perf_tool *tool, - struct evlist *evlist, - perf_event__handler_t process, - bool attrs); void perf_evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config, diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c index 582f4a69cd48..96f941e01681 100644 --- a/tools/perf/util/svghelper.c +++ b/tools/perf/util/svghelper.c @@ -17,11 +17,11 @@ #include #include #include +#include #include #include "env.h" #include "svghelper.h" -#include "cpumap.h" static u64 first_time, last_time; static u64 turbo_frequency, max_freq; diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 9428639872a6..66f4be1df573 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -7,6 +7,7 @@ #include #include +#include "dso.h" #include "map.h" #include "map_groups.h" #include "symbol.h" @@ -16,10 +17,12 @@ #include "machine.h" #include "vdso.h" #include "debug.h" -#include "util.h" +#include "util/copyfile.h" #include +#include #include #include +#include #ifndef EM_AARCH64 #define EM_AARCH64 183 /* ARM 64 bit */ diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 7e2813ec9498..d6e99af263ec 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -1,8 +1,6 @@ -// SPDX-License-Identifier: GPL-2.0 #include "dso.h" #include "symbol.h" #include "symsrc.h" -#include "util.h" #include #include @@ -13,6 +11,7 @@ #include #include #include +#include static bool check_need_swap(int file_endian) { diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 765c75df2904..a8f80e427674 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -19,7 +19,7 @@ #include "build-id.h" #include "cap.h" #include "dso.h" -#include "util.h" +#include "util.h" // lsdir() #include "debug.h" #include "event.h" #include "machine.h" diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c new file mode 100644 index 000000000000..807cbca403a7 --- /dev/null +++ b/tools/perf/util/synthetic-events.c @@ -0,0 +1,1884 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "util/debug.h" +#include "util/dso.h" +#include "util/event.h" +#include "util/evlist.h" +#include "util/machine.h" +#include "util/map.h" +#include "util/map_symbol.h" +#include "util/branch.h" +#include "util/memswap.h" +#include "util/namespaces.h" +#include "util/session.h" +#include "util/stat.h" +#include "util/symbol.h" +#include "util/synthetic-events.h" +#include "util/target.h" +#include "util/time-utils.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // page_size +#include +#include +#include +#include +#include +#include +#include +#include +#include /* To get things like MAP_HUGETLB even on older libc headers */ +#include +#include +#include +#include +#include + +#define DEFAULT_PROC_MAP_PARSE_TIMEOUT 500 + +unsigned int proc_map_timeout = DEFAULT_PROC_MAP_PARSE_TIMEOUT; + +int perf_tool__process_synth_event(struct perf_tool *tool, + union perf_event *event, + struct machine *machine, + perf_event__handler_t process) +{ + struct perf_sample synth_sample = { + .pid = -1, + .tid = -1, + .time = -1, + .stream_id = -1, + .cpu = -1, + .period = 1, + .cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK, + }; + + return process(tool, event, &synth_sample, machine); +}; + +/* + * Assumes that the first 4095 bytes of /proc/pid/stat contains + * the comm, tgid and ppid. + */ +static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len, + pid_t *tgid, pid_t *ppid) +{ + char filename[PATH_MAX]; + char bf[4096]; + int fd; + size_t size = 0; + ssize_t n; + char *name, *tgids, *ppids; + + *tgid = -1; + *ppid = -1; + + snprintf(filename, sizeof(filename), "/proc/%d/status", pid); + + fd = open(filename, O_RDONLY); + if (fd < 0) { + pr_debug("couldn't open %s\n", filename); + return -1; + } + + n = read(fd, bf, sizeof(bf) - 1); + close(fd); + if (n <= 0) { + pr_warning("Couldn't get COMM, tigd and ppid for pid %d\n", + pid); + return -1; + } + bf[n] = '\0'; + + name = strstr(bf, "Name:"); + tgids = strstr(bf, "Tgid:"); + ppids = strstr(bf, "PPid:"); + + if (name) { + char *nl; + + name = skip_spaces(name + 5); /* strlen("Name:") */ + nl = strchr(name, '\n'); + if (nl) + *nl = '\0'; + + size = strlen(name); + if (size >= len) + size = len - 1; + memcpy(comm, name, size); + comm[size] = '\0'; + } else { + pr_debug("Name: string not found for pid %d\n", pid); + } + + if (tgids) { + tgids += 5; /* strlen("Tgid:") */ + *tgid = atoi(tgids); + } else { + pr_debug("Tgid: string not found for pid %d\n", pid); + } + + if (ppids) { + ppids += 5; /* strlen("PPid:") */ + *ppid = atoi(ppids); + } else { + pr_debug("PPid: string not found for pid %d\n", pid); + } + + return 0; +} + +static int perf_event__prepare_comm(union perf_event *event, pid_t pid, + struct machine *machine, + pid_t *tgid, pid_t *ppid) +{ + size_t size; + + *ppid = -1; + + memset(&event->comm, 0, sizeof(event->comm)); + + if (machine__is_host(machine)) { + if (perf_event__get_comm_ids(pid, event->comm.comm, + sizeof(event->comm.comm), + tgid, ppid) != 0) { + return -1; + } + } else { + *tgid = machine->pid; + } + + if (*tgid < 0) + return -1; + + event->comm.pid = *tgid; + event->comm.header.type = PERF_RECORD_COMM; + + size = strlen(event->comm.comm) + 1; + size = PERF_ALIGN(size, sizeof(u64)); + memset(event->comm.comm + size, 0, machine->id_hdr_size); + event->comm.header.size = (sizeof(event->comm) - + (sizeof(event->comm.comm) - size) + + machine->id_hdr_size); + event->comm.tid = pid; + + return 0; +} + +pid_t perf_event__synthesize_comm(struct perf_tool *tool, + union perf_event *event, pid_t pid, + perf_event__handler_t process, + struct machine *machine) +{ + pid_t tgid, ppid; + + if (perf_event__prepare_comm(event, pid, machine, &tgid, &ppid) != 0) + return -1; + + if (perf_tool__process_synth_event(tool, event, machine, process) != 0) + return -1; + + return tgid; +} + +static void perf_event__get_ns_link_info(pid_t pid, const char *ns, + struct perf_ns_link_info *ns_link_info) +{ + struct stat64 st; + char proc_ns[128]; + + sprintf(proc_ns, "/proc/%u/ns/%s", pid, ns); + if (stat64(proc_ns, &st) == 0) { + ns_link_info->dev = st.st_dev; + ns_link_info->ino = st.st_ino; + } +} + +int perf_event__synthesize_namespaces(struct perf_tool *tool, + union perf_event *event, + pid_t pid, pid_t tgid, + perf_event__handler_t process, + struct machine *machine) +{ + u32 idx; + struct perf_ns_link_info *ns_link_info; + + if (!tool || !tool->namespace_events) + return 0; + + memset(&event->namespaces, 0, (sizeof(event->namespaces) + + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + + machine->id_hdr_size)); + + event->namespaces.pid = tgid; + event->namespaces.tid = pid; + + event->namespaces.nr_namespaces = NR_NAMESPACES; + + ns_link_info = event->namespaces.link_info; + + for (idx = 0; idx < event->namespaces.nr_namespaces; idx++) + perf_event__get_ns_link_info(pid, perf_ns__name(idx), + &ns_link_info[idx]); + + event->namespaces.header.type = PERF_RECORD_NAMESPACES; + + event->namespaces.header.size = (sizeof(event->namespaces) + + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + + machine->id_hdr_size); + + if (perf_tool__process_synth_event(tool, event, machine, process) != 0) + return -1; + + return 0; +} + +static int perf_event__synthesize_fork(struct perf_tool *tool, + union perf_event *event, + pid_t pid, pid_t tgid, pid_t ppid, + perf_event__handler_t process, + struct machine *machine) +{ + memset(&event->fork, 0, sizeof(event->fork) + machine->id_hdr_size); + + /* + * for main thread set parent to ppid from status file. For other + * threads set parent pid to main thread. ie., assume main thread + * spawns all threads in a process + */ + if (tgid == pid) { + event->fork.ppid = ppid; + event->fork.ptid = ppid; + } else { + event->fork.ppid = tgid; + event->fork.ptid = tgid; + } + event->fork.pid = tgid; + event->fork.tid = pid; + event->fork.header.type = PERF_RECORD_FORK; + event->fork.header.misc = PERF_RECORD_MISC_FORK_EXEC; + + event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size); + + if (perf_tool__process_synth_event(tool, event, machine, process) != 0) + return -1; + + return 0; +} + +int perf_event__synthesize_mmap_events(struct perf_tool *tool, + union perf_event *event, + pid_t pid, pid_t tgid, + perf_event__handler_t process, + struct machine *machine, + bool mmap_data) +{ + char filename[PATH_MAX]; + FILE *fp; + unsigned long long t; + bool truncation = false; + unsigned long long timeout = proc_map_timeout * 1000000ULL; + int rc = 0; + const char *hugetlbfs_mnt = hugetlbfs__mountpoint(); + int hugetlbfs_mnt_len = hugetlbfs_mnt ? strlen(hugetlbfs_mnt) : 0; + + if (machine__is_default_guest(machine)) + return 0; + + snprintf(filename, sizeof(filename), "%s/proc/%d/task/%d/maps", + machine->root_dir, pid, pid); + + fp = fopen(filename, "r"); + if (fp == NULL) { + /* + * We raced with a task exiting - just return: + */ + pr_debug("couldn't open %s\n", filename); + return -1; + } + + event->header.type = PERF_RECORD_MMAP2; + t = rdclock(); + + while (1) { + char bf[BUFSIZ]; + char prot[5]; + char execname[PATH_MAX]; + char anonstr[] = "//anon"; + unsigned int ino; + size_t size; + ssize_t n; + + if (fgets(bf, sizeof(bf), fp) == NULL) + break; + + if ((rdclock() - t) > timeout) { + pr_warning("Reading %s time out. " + "You may want to increase " + "the time limit by --proc-map-timeout\n", + filename); + truncation = true; + goto out; + } + + /* ensure null termination since stack will be reused. */ + strcpy(execname, ""); + + /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ + n = sscanf(bf, "%"PRI_lx64"-%"PRI_lx64" %s %"PRI_lx64" %x:%x %u %[^\n]\n", + &event->mmap2.start, &event->mmap2.len, prot, + &event->mmap2.pgoff, &event->mmap2.maj, + &event->mmap2.min, + &ino, execname); + + /* + * Anon maps don't have the execname. + */ + if (n < 7) + continue; + + event->mmap2.ino = (u64)ino; + + /* + * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c + */ + if (machine__is_host(machine)) + event->header.misc = PERF_RECORD_MISC_USER; + else + event->header.misc = PERF_RECORD_MISC_GUEST_USER; + + /* map protection and flags bits */ + event->mmap2.prot = 0; + event->mmap2.flags = 0; + if (prot[0] == 'r') + event->mmap2.prot |= PROT_READ; + if (prot[1] == 'w') + event->mmap2.prot |= PROT_WRITE; + if (prot[2] == 'x') + event->mmap2.prot |= PROT_EXEC; + + if (prot[3] == 's') + event->mmap2.flags |= MAP_SHARED; + else + event->mmap2.flags |= MAP_PRIVATE; + + if (prot[2] != 'x') { + if (!mmap_data || prot[0] != 'r') + continue; + + event->header.misc |= PERF_RECORD_MISC_MMAP_DATA; + } + +out: + if (truncation) + event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT; + + if (!strcmp(execname, "")) + strcpy(execname, anonstr); + + if (hugetlbfs_mnt_len && + !strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) { + strcpy(execname, anonstr); + event->mmap2.flags |= MAP_HUGETLB; + } + + size = strlen(execname) + 1; + memcpy(event->mmap2.filename, execname, size); + size = PERF_ALIGN(size, sizeof(u64)); + event->mmap2.len -= event->mmap.start; + event->mmap2.header.size = (sizeof(event->mmap2) - + (sizeof(event->mmap2.filename) - size)); + memset(event->mmap2.filename + size, 0, machine->id_hdr_size); + event->mmap2.header.size += machine->id_hdr_size; + event->mmap2.pid = tgid; + event->mmap2.tid = pid; + + if (perf_tool__process_synth_event(tool, event, machine, process) != 0) { + rc = -1; + break; + } + + if (truncation) + break; + } + + fclose(fp); + return rc; +} + +int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, + struct machine *machine) +{ + int rc = 0; + struct map *pos; + struct maps *maps = machine__kernel_maps(machine); + union perf_event *event = zalloc((sizeof(event->mmap) + + machine->id_hdr_size)); + if (event == NULL) { + pr_debug("Not enough memory synthesizing mmap event " + "for kernel modules\n"); + return -1; + } + + event->header.type = PERF_RECORD_MMAP; + + /* + * kernel uses 0 for user space maps, see kernel/perf_event.c + * __perf_event_mmap + */ + if (machine__is_host(machine)) + event->header.misc = PERF_RECORD_MISC_KERNEL; + else + event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; + + for (pos = maps__first(maps); pos; pos = map__next(pos)) { + size_t size; + + if (!__map__is_kmodule(pos)) + continue; + + size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); + event->mmap.header.type = PERF_RECORD_MMAP; + event->mmap.header.size = (sizeof(event->mmap) - + (sizeof(event->mmap.filename) - size)); + memset(event->mmap.filename + size, 0, machine->id_hdr_size); + event->mmap.header.size += machine->id_hdr_size; + event->mmap.start = pos->start; + event->mmap.len = pos->end - pos->start; + event->mmap.pid = machine->pid; + + memcpy(event->mmap.filename, pos->dso->long_name, + pos->dso->long_name_len + 1); + if (perf_tool__process_synth_event(tool, event, machine, process) != 0) { + rc = -1; + break; + } + } + + free(event); + return rc; +} + +static int __event__synthesize_thread(union perf_event *comm_event, + union perf_event *mmap_event, + union perf_event *fork_event, + union perf_event *namespaces_event, + pid_t pid, int full, perf_event__handler_t process, + struct perf_tool *tool, struct machine *machine, bool mmap_data) +{ + char filename[PATH_MAX]; + DIR *tasks; + struct dirent *dirent; + pid_t tgid, ppid; + int rc = 0; + + /* special case: only send one comm event using passed in pid */ + if (!full) { + tgid = perf_event__synthesize_comm(tool, comm_event, pid, + process, machine); + + if (tgid == -1) + return -1; + + if (perf_event__synthesize_namespaces(tool, namespaces_event, pid, + tgid, process, machine) < 0) + return -1; + + /* + * send mmap only for thread group leader + * see thread__init_map_groups + */ + if (pid == tgid && + perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, + process, machine, mmap_data)) + return -1; + + return 0; + } + + if (machine__is_default_guest(machine)) + return 0; + + snprintf(filename, sizeof(filename), "%s/proc/%d/task", + machine->root_dir, pid); + + tasks = opendir(filename); + if (tasks == NULL) { + pr_debug("couldn't open %s\n", filename); + return 0; + } + + while ((dirent = readdir(tasks)) != NULL) { + char *end; + pid_t _pid; + + _pid = strtol(dirent->d_name, &end, 10); + if (*end) + continue; + + rc = -1; + if (perf_event__prepare_comm(comm_event, _pid, machine, + &tgid, &ppid) != 0) + break; + + if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid, + ppid, process, machine) < 0) + break; + + if (perf_event__synthesize_namespaces(tool, namespaces_event, _pid, + tgid, process, machine) < 0) + break; + + /* + * Send the prepared comm event + */ + if (perf_tool__process_synth_event(tool, comm_event, machine, process) != 0) + break; + + rc = 0; + if (_pid == pid) { + /* process the parent's maps too */ + rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, + process, machine, mmap_data); + if (rc) + break; + } + } + + closedir(tasks); + return rc; +} + +int perf_event__synthesize_thread_map(struct perf_tool *tool, + struct perf_thread_map *threads, + perf_event__handler_t process, + struct machine *machine, + bool mmap_data) +{ + union perf_event *comm_event, *mmap_event, *fork_event; + union perf_event *namespaces_event; + int err = -1, thread, j; + + comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); + if (comm_event == NULL) + goto out; + + mmap_event = malloc(sizeof(mmap_event->mmap2) + machine->id_hdr_size); + if (mmap_event == NULL) + goto out_free_comm; + + fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size); + if (fork_event == NULL) + goto out_free_mmap; + + namespaces_event = malloc(sizeof(namespaces_event->namespaces) + + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + + machine->id_hdr_size); + if (namespaces_event == NULL) + goto out_free_fork; + + err = 0; + for (thread = 0; thread < threads->nr; ++thread) { + if (__event__synthesize_thread(comm_event, mmap_event, + fork_event, namespaces_event, + perf_thread_map__pid(threads, thread), 0, + process, tool, machine, + mmap_data)) { + err = -1; + break; + } + + /* + * comm.pid is set to thread group id by + * perf_event__synthesize_comm + */ + if ((int) comm_event->comm.pid != perf_thread_map__pid(threads, thread)) { + bool need_leader = true; + + /* is thread group leader in thread_map? */ + for (j = 0; j < threads->nr; ++j) { + if ((int) comm_event->comm.pid == perf_thread_map__pid(threads, j)) { + need_leader = false; + break; + } + } + + /* if not, generate events for it */ + if (need_leader && + __event__synthesize_thread(comm_event, mmap_event, + fork_event, namespaces_event, + comm_event->comm.pid, 0, + process, tool, machine, + mmap_data)) { + err = -1; + break; + } + } + } + free(namespaces_event); +out_free_fork: + free(fork_event); +out_free_mmap: + free(mmap_event); +out_free_comm: + free(comm_event); +out: + return err; +} + +static int __perf_event__synthesize_threads(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine, + bool mmap_data, + struct dirent **dirent, + int start, + int num) +{ + union perf_event *comm_event, *mmap_event, *fork_event; + union perf_event *namespaces_event; + int err = -1; + char *end; + pid_t pid; + int i; + + comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); + if (comm_event == NULL) + goto out; + + mmap_event = malloc(sizeof(mmap_event->mmap2) + machine->id_hdr_size); + if (mmap_event == NULL) + goto out_free_comm; + + fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size); + if (fork_event == NULL) + goto out_free_mmap; + + namespaces_event = malloc(sizeof(namespaces_event->namespaces) + + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + + machine->id_hdr_size); + if (namespaces_event == NULL) + goto out_free_fork; + + for (i = start; i < start + num; i++) { + if (!isdigit(dirent[i]->d_name[0])) + continue; + + pid = (pid_t)strtol(dirent[i]->d_name, &end, 10); + /* only interested in proper numerical dirents */ + if (*end) + continue; + /* + * We may race with exiting thread, so don't stop just because + * one thread couldn't be synthesized. + */ + __event__synthesize_thread(comm_event, mmap_event, fork_event, + namespaces_event, pid, 1, process, + tool, machine, mmap_data); + } + err = 0; + + free(namespaces_event); +out_free_fork: + free(fork_event); +out_free_mmap: + free(mmap_event); +out_free_comm: + free(comm_event); +out: + return err; +} + +struct synthesize_threads_arg { + struct perf_tool *tool; + perf_event__handler_t process; + struct machine *machine; + bool mmap_data; + struct dirent **dirent; + int num; + int start; +}; + +static void *synthesize_threads_worker(void *arg) +{ + struct synthesize_threads_arg *args = arg; + + __perf_event__synthesize_threads(args->tool, args->process, + args->machine, args->mmap_data, + args->dirent, + args->start, args->num); + return NULL; +} + +int perf_event__synthesize_threads(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine, + bool mmap_data, + unsigned int nr_threads_synthesize) +{ + struct synthesize_threads_arg *args = NULL; + pthread_t *synthesize_threads = NULL; + char proc_path[PATH_MAX]; + struct dirent **dirent; + int num_per_thread; + int m, n, i, j; + int thread_nr; + int base = 0; + int err = -1; + + + if (machine__is_default_guest(machine)) + return 0; + + snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir); + n = scandir(proc_path, &dirent, 0, alphasort); + if (n < 0) + return err; + + if (nr_threads_synthesize == UINT_MAX) + thread_nr = sysconf(_SC_NPROCESSORS_ONLN); + else + thread_nr = nr_threads_synthesize; + + if (thread_nr <= 1) { + err = __perf_event__synthesize_threads(tool, process, + machine, mmap_data, + dirent, base, n); + goto free_dirent; + } + if (thread_nr > n) + thread_nr = n; + + synthesize_threads = calloc(sizeof(pthread_t), thread_nr); + if (synthesize_threads == NULL) + goto free_dirent; + + args = calloc(sizeof(*args), thread_nr); + if (args == NULL) + goto free_threads; + + num_per_thread = n / thread_nr; + m = n % thread_nr; + for (i = 0; i < thread_nr; i++) { + args[i].tool = tool; + args[i].process = process; + args[i].machine = machine; + args[i].mmap_data = mmap_data; + args[i].dirent = dirent; + } + for (i = 0; i < m; i++) { + args[i].num = num_per_thread + 1; + args[i].start = i * args[i].num; + } + if (i != 0) + base = args[i-1].start + args[i-1].num; + for (j = i; j < thread_nr; j++) { + args[j].num = num_per_thread; + args[j].start = base + (j - i) * args[i].num; + } + + for (i = 0; i < thread_nr; i++) { + if (pthread_create(&synthesize_threads[i], NULL, + synthesize_threads_worker, &args[i])) + goto out_join; + } + err = 0; +out_join: + for (i = 0; i < thread_nr; i++) + pthread_join(synthesize_threads[i], NULL); + free(args); +free_threads: + free(synthesize_threads); +free_dirent: + for (i = 0; i < n; i++) + zfree(&dirent[i]); + free(dirent); + + return err; +} + +int __weak perf_event__synthesize_extra_kmaps(struct perf_tool *tool __maybe_unused, + perf_event__handler_t process __maybe_unused, + struct machine *machine __maybe_unused) +{ + return 0; +} + +static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) +{ + size_t size; + struct map *map = machine__kernel_map(machine); + struct kmap *kmap; + int err; + union perf_event *event; + + if (map == NULL) + return -1; + + kmap = map__kmap(map); + if (!kmap->ref_reloc_sym) + return -1; + + /* + * We should get this from /sys/kernel/sections/.text, but till that is + * available use this, and after it is use this as a fallback for older + * kernels. + */ + event = zalloc((sizeof(event->mmap) + machine->id_hdr_size)); + if (event == NULL) { + pr_debug("Not enough memory synthesizing mmap event " + "for kernel modules\n"); + return -1; + } + + if (machine__is_host(machine)) { + /* + * kernel uses PERF_RECORD_MISC_USER for user space maps, + * see kernel/perf_event.c __perf_event_mmap + */ + event->header.misc = PERF_RECORD_MISC_KERNEL; + } else { + event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; + } + + size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), + "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1; + size = PERF_ALIGN(size, sizeof(u64)); + event->mmap.header.type = PERF_RECORD_MMAP; + event->mmap.header.size = (sizeof(event->mmap) - + (sizeof(event->mmap.filename) - size) + machine->id_hdr_size); + event->mmap.pgoff = kmap->ref_reloc_sym->addr; + event->mmap.start = map->start; + event->mmap.len = map->end - event->mmap.start; + event->mmap.pid = machine->pid; + + err = perf_tool__process_synth_event(tool, event, machine, process); + free(event); + + return err; +} + +int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) +{ + int err; + + err = __perf_event__synthesize_kernel_mmap(tool, process, machine); + if (err < 0) + return err; + + return perf_event__synthesize_extra_kmaps(tool, process, machine); +} + +int perf_event__synthesize_thread_map2(struct perf_tool *tool, + struct perf_thread_map *threads, + perf_event__handler_t process, + struct machine *machine) +{ + union perf_event *event; + int i, err, size; + + size = sizeof(event->thread_map); + size += threads->nr * sizeof(event->thread_map.entries[0]); + + event = zalloc(size); + if (!event) + return -ENOMEM; + + event->header.type = PERF_RECORD_THREAD_MAP; + event->header.size = size; + event->thread_map.nr = threads->nr; + + for (i = 0; i < threads->nr; i++) { + struct perf_record_thread_map_entry *entry = &event->thread_map.entries[i]; + char *comm = perf_thread_map__comm(threads, i); + + if (!comm) + comm = (char *) ""; + + entry->pid = perf_thread_map__pid(threads, i); + strncpy((char *) &entry->comm, comm, sizeof(entry->comm)); + } + + err = process(tool, event, NULL, machine); + + free(event); + return err; +} + +static void synthesize_cpus(struct cpu_map_entries *cpus, + struct perf_cpu_map *map) +{ + int i; + + cpus->nr = map->nr; + + for (i = 0; i < map->nr; i++) + cpus->cpu[i] = map->map[i]; +} + +static void synthesize_mask(struct perf_record_record_cpu_map *mask, + struct perf_cpu_map *map, int max) +{ + int i; + + mask->nr = BITS_TO_LONGS(max); + mask->long_size = sizeof(long); + + for (i = 0; i < map->nr; i++) + set_bit(map->map[i], mask->mask); +} + +static size_t cpus_size(struct perf_cpu_map *map) +{ + return sizeof(struct cpu_map_entries) + map->nr * sizeof(u16); +} + +static size_t mask_size(struct perf_cpu_map *map, int *max) +{ + int i; + + *max = 0; + + for (i = 0; i < map->nr; i++) { + /* bit possition of the cpu is + 1 */ + int bit = map->map[i] + 1; + + if (bit > *max) + *max = bit; + } + + return sizeof(struct perf_record_record_cpu_map) + BITS_TO_LONGS(*max) * sizeof(long); +} + +void *cpu_map_data__alloc(struct perf_cpu_map *map, size_t *size, u16 *type, int *max) +{ + size_t size_cpus, size_mask; + bool is_dummy = perf_cpu_map__empty(map); + + /* + * Both array and mask data have variable size based + * on the number of cpus and their actual values. + * The size of the 'struct perf_record_cpu_map_data' is: + * + * array = size of 'struct cpu_map_entries' + + * number of cpus * sizeof(u64) + * + * mask = size of 'struct perf_record_record_cpu_map' + + * maximum cpu bit converted to size of longs + * + * and finaly + the size of 'struct perf_record_cpu_map_data'. + */ + size_cpus = cpus_size(map); + size_mask = mask_size(map, max); + + if (is_dummy || (size_cpus < size_mask)) { + *size += size_cpus; + *type = PERF_CPU_MAP__CPUS; + } else { + *size += size_mask; + *type = PERF_CPU_MAP__MASK; + } + + *size += sizeof(struct perf_record_cpu_map_data); + *size = PERF_ALIGN(*size, sizeof(u64)); + return zalloc(*size); +} + +void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data, struct perf_cpu_map *map, + u16 type, int max) +{ + data->type = type; + + switch (type) { + case PERF_CPU_MAP__CPUS: + synthesize_cpus((struct cpu_map_entries *) data->data, map); + break; + case PERF_CPU_MAP__MASK: + synthesize_mask((struct perf_record_record_cpu_map *)data->data, map, max); + default: + break; + }; +} + +static struct perf_record_cpu_map *cpu_map_event__new(struct perf_cpu_map *map) +{ + size_t size = sizeof(struct perf_record_cpu_map); + struct perf_record_cpu_map *event; + int max; + u16 type; + + event = cpu_map_data__alloc(map, &size, &type, &max); + if (!event) + return NULL; + + event->header.type = PERF_RECORD_CPU_MAP; + event->header.size = size; + event->data.type = type; + + cpu_map_data__synthesize(&event->data, map, type, max); + return event; +} + +int perf_event__synthesize_cpu_map(struct perf_tool *tool, + struct perf_cpu_map *map, + perf_event__handler_t process, + struct machine *machine) +{ + struct perf_record_cpu_map *event; + int err; + + event = cpu_map_event__new(map); + if (!event) + return -ENOMEM; + + err = process(tool, (union perf_event *) event, NULL, machine); + + free(event); + return err; +} + +int perf_event__synthesize_stat_config(struct perf_tool *tool, + struct perf_stat_config *config, + perf_event__handler_t process, + struct machine *machine) +{ + struct perf_record_stat_config *event; + int size, i = 0, err; + + size = sizeof(*event); + size += (PERF_STAT_CONFIG_TERM__MAX * sizeof(event->data[0])); + + event = zalloc(size); + if (!event) + return -ENOMEM; + + event->header.type = PERF_RECORD_STAT_CONFIG; + event->header.size = size; + event->nr = PERF_STAT_CONFIG_TERM__MAX; + +#define ADD(__term, __val) \ + event->data[i].tag = PERF_STAT_CONFIG_TERM__##__term; \ + event->data[i].val = __val; \ + i++; + + ADD(AGGR_MODE, config->aggr_mode) + ADD(INTERVAL, config->interval) + ADD(SCALE, config->scale) + + WARN_ONCE(i != PERF_STAT_CONFIG_TERM__MAX, + "stat config terms unbalanced\n"); +#undef ADD + + err = process(tool, (union perf_event *) event, NULL, machine); + + free(event); + return err; +} + +int perf_event__synthesize_stat(struct perf_tool *tool, + u32 cpu, u32 thread, u64 id, + struct perf_counts_values *count, + perf_event__handler_t process, + struct machine *machine) +{ + struct perf_record_stat event; + + event.header.type = PERF_RECORD_STAT; + event.header.size = sizeof(event); + event.header.misc = 0; + + event.id = id; + event.cpu = cpu; + event.thread = thread; + event.val = count->val; + event.ena = count->ena; + event.run = count->run; + + return process(tool, (union perf_event *) &event, NULL, machine); +} + +int perf_event__synthesize_stat_round(struct perf_tool *tool, + u64 evtime, u64 type, + perf_event__handler_t process, + struct machine *machine) +{ + struct perf_record_stat_round event; + + event.header.type = PERF_RECORD_STAT_ROUND; + event.header.size = sizeof(event); + event.header.misc = 0; + + event.time = evtime; + event.type = type; + + return process(tool, (union perf_event *) &event, NULL, machine); +} + +size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format) +{ + size_t sz, result = sizeof(struct perf_record_sample); + + if (type & PERF_SAMPLE_IDENTIFIER) + result += sizeof(u64); + + if (type & PERF_SAMPLE_IP) + result += sizeof(u64); + + if (type & PERF_SAMPLE_TID) + result += sizeof(u64); + + if (type & PERF_SAMPLE_TIME) + result += sizeof(u64); + + if (type & PERF_SAMPLE_ADDR) + result += sizeof(u64); + + if (type & PERF_SAMPLE_ID) + result += sizeof(u64); + + if (type & PERF_SAMPLE_STREAM_ID) + result += sizeof(u64); + + if (type & PERF_SAMPLE_CPU) + result += sizeof(u64); + + if (type & PERF_SAMPLE_PERIOD) + result += sizeof(u64); + + if (type & PERF_SAMPLE_READ) { + result += sizeof(u64); + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + result += sizeof(u64); + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + result += sizeof(u64); + /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */ + if (read_format & PERF_FORMAT_GROUP) { + sz = sample->read.group.nr * + sizeof(struct sample_read_value); + result += sz; + } else { + result += sizeof(u64); + } + } + + if (type & PERF_SAMPLE_CALLCHAIN) { + sz = (sample->callchain->nr + 1) * sizeof(u64); + result += sz; + } + + if (type & PERF_SAMPLE_RAW) { + result += sizeof(u32); + result += sample->raw_size; + } + + if (type & PERF_SAMPLE_BRANCH_STACK) { + sz = sample->branch_stack->nr * sizeof(struct branch_entry); + sz += sizeof(u64); + result += sz; + } + + if (type & PERF_SAMPLE_REGS_USER) { + if (sample->user_regs.abi) { + result += sizeof(u64); + sz = hweight64(sample->user_regs.mask) * sizeof(u64); + result += sz; + } else { + result += sizeof(u64); + } + } + + if (type & PERF_SAMPLE_STACK_USER) { + sz = sample->user_stack.size; + result += sizeof(u64); + if (sz) { + result += sz; + result += sizeof(u64); + } + } + + if (type & PERF_SAMPLE_WEIGHT) + result += sizeof(u64); + + if (type & PERF_SAMPLE_DATA_SRC) + result += sizeof(u64); + + if (type & PERF_SAMPLE_TRANSACTION) + result += sizeof(u64); + + if (type & PERF_SAMPLE_REGS_INTR) { + if (sample->intr_regs.abi) { + result += sizeof(u64); + sz = hweight64(sample->intr_regs.mask) * sizeof(u64); + result += sz; + } else { + result += sizeof(u64); + } + } + + if (type & PERF_SAMPLE_PHYS_ADDR) + result += sizeof(u64); + + return result; +} + +int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, + const struct perf_sample *sample) +{ + __u64 *array; + size_t sz; + /* + * used for cross-endian analysis. See git commit 65014ab3 + * for why this goofiness is needed. + */ + union u64_swap u; + + array = event->sample.array; + + if (type & PERF_SAMPLE_IDENTIFIER) { + *array = sample->id; + array++; + } + + if (type & PERF_SAMPLE_IP) { + *array = sample->ip; + array++; + } + + if (type & PERF_SAMPLE_TID) { + u.val32[0] = sample->pid; + u.val32[1] = sample->tid; + *array = u.val64; + array++; + } + + if (type & PERF_SAMPLE_TIME) { + *array = sample->time; + array++; + } + + if (type & PERF_SAMPLE_ADDR) { + *array = sample->addr; + array++; + } + + if (type & PERF_SAMPLE_ID) { + *array = sample->id; + array++; + } + + if (type & PERF_SAMPLE_STREAM_ID) { + *array = sample->stream_id; + array++; + } + + if (type & PERF_SAMPLE_CPU) { + u.val32[0] = sample->cpu; + u.val32[1] = 0; + *array = u.val64; + array++; + } + + if (type & PERF_SAMPLE_PERIOD) { + *array = sample->period; + array++; + } + + if (type & PERF_SAMPLE_READ) { + if (read_format & PERF_FORMAT_GROUP) + *array = sample->read.group.nr; + else + *array = sample->read.one.value; + array++; + + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { + *array = sample->read.time_enabled; + array++; + } + + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { + *array = sample->read.time_running; + array++; + } + + /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */ + if (read_format & PERF_FORMAT_GROUP) { + sz = sample->read.group.nr * + sizeof(struct sample_read_value); + memcpy(array, sample->read.group.values, sz); + array = (void *)array + sz; + } else { + *array = sample->read.one.id; + array++; + } + } + + if (type & PERF_SAMPLE_CALLCHAIN) { + sz = (sample->callchain->nr + 1) * sizeof(u64); + memcpy(array, sample->callchain, sz); + array = (void *)array + sz; + } + + if (type & PERF_SAMPLE_RAW) { + u.val32[0] = sample->raw_size; + *array = u.val64; + array = (void *)array + sizeof(u32); + + memcpy(array, sample->raw_data, sample->raw_size); + array = (void *)array + sample->raw_size; + } + + if (type & PERF_SAMPLE_BRANCH_STACK) { + sz = sample->branch_stack->nr * sizeof(struct branch_entry); + sz += sizeof(u64); + memcpy(array, sample->branch_stack, sz); + array = (void *)array + sz; + } + + if (type & PERF_SAMPLE_REGS_USER) { + if (sample->user_regs.abi) { + *array++ = sample->user_regs.abi; + sz = hweight64(sample->user_regs.mask) * sizeof(u64); + memcpy(array, sample->user_regs.regs, sz); + array = (void *)array + sz; + } else { + *array++ = 0; + } + } + + if (type & PERF_SAMPLE_STACK_USER) { + sz = sample->user_stack.size; + *array++ = sz; + if (sz) { + memcpy(array, sample->user_stack.data, sz); + array = (void *)array + sz; + *array++ = sz; + } + } + + if (type & PERF_SAMPLE_WEIGHT) { + *array = sample->weight; + array++; + } + + if (type & PERF_SAMPLE_DATA_SRC) { + *array = sample->data_src; + array++; + } + + if (type & PERF_SAMPLE_TRANSACTION) { + *array = sample->transaction; + array++; + } + + if (type & PERF_SAMPLE_REGS_INTR) { + if (sample->intr_regs.abi) { + *array++ = sample->intr_regs.abi; + sz = hweight64(sample->intr_regs.mask) * sizeof(u64); + memcpy(array, sample->intr_regs.regs, sz); + array = (void *)array + sz; + } else { + *array++ = 0; + } + } + + if (type & PERF_SAMPLE_PHYS_ADDR) { + *array = sample->phys_addr; + array++; + } + + return 0; +} + +int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process, + struct evlist *evlist, struct machine *machine) +{ + union perf_event *ev; + struct evsel *evsel; + size_t nr = 0, i = 0, sz, max_nr, n; + int err; + + pr_debug2("Synthesizing id index\n"); + + max_nr = (UINT16_MAX - sizeof(struct perf_record_id_index)) / + sizeof(struct id_index_entry); + + evlist__for_each_entry(evlist, evsel) + nr += evsel->core.ids; + + n = nr > max_nr ? max_nr : nr; + sz = sizeof(struct perf_record_id_index) + n * sizeof(struct id_index_entry); + ev = zalloc(sz); + if (!ev) + return -ENOMEM; + + ev->id_index.header.type = PERF_RECORD_ID_INDEX; + ev->id_index.header.size = sz; + ev->id_index.nr = n; + + evlist__for_each_entry(evlist, evsel) { + u32 j; + + for (j = 0; j < evsel->core.ids; j++) { + struct id_index_entry *e; + struct perf_sample_id *sid; + + if (i >= n) { + err = process(tool, ev, NULL, machine); + if (err) + goto out_err; + nr -= n; + i = 0; + } + + e = &ev->id_index.entries[i++]; + + e->id = evsel->core.id[j]; + + sid = perf_evlist__id2sid(evlist, e->id); + if (!sid) { + free(ev); + return -ENOENT; + } + + e->idx = sid->idx; + e->cpu = sid->cpu; + e->tid = sid->tid; + } + } + + sz = sizeof(struct perf_record_id_index) + nr * sizeof(struct id_index_entry); + ev->id_index.header.size = sz; + ev->id_index.nr = nr; + + err = process(tool, ev, NULL, machine); +out_err: + free(ev); + + return err; +} + +int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, + struct target *target, struct perf_thread_map *threads, + perf_event__handler_t process, bool data_mmap, + unsigned int nr_threads_synthesize) +{ + if (target__has_task(target)) + return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap); + else if (target__has_cpu(target)) + return perf_event__synthesize_threads(tool, process, + machine, data_mmap, + nr_threads_synthesize); + /* command specified */ + return 0; +} + +int machine__synthesize_threads(struct machine *machine, struct target *target, + struct perf_thread_map *threads, bool data_mmap, + unsigned int nr_threads_synthesize) +{ + return __machine__synthesize_threads(machine, NULL, target, threads, + perf_event__process, data_mmap, + nr_threads_synthesize); +} + +static struct perf_record_event_update *event_update_event__new(size_t size, u64 type, u64 id) +{ + struct perf_record_event_update *ev; + + size += sizeof(*ev); + size = PERF_ALIGN(size, sizeof(u64)); + + ev = zalloc(size); + if (ev) { + ev->header.type = PERF_RECORD_EVENT_UPDATE; + ev->header.size = (u16)size; + ev->type = type; + ev->id = id; + } + return ev; +} + +int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evsel *evsel, + perf_event__handler_t process) +{ + size_t size = strlen(evsel->unit); + struct perf_record_event_update *ev; + int err; + + ev = event_update_event__new(size + 1, PERF_EVENT_UPDATE__UNIT, evsel->core.id[0]); + if (ev == NULL) + return -ENOMEM; + + strlcpy(ev->data, evsel->unit, size + 1); + err = process(tool, (union perf_event *)ev, NULL, NULL); + free(ev); + return err; +} + +int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evsel *evsel, + perf_event__handler_t process) +{ + struct perf_record_event_update *ev; + struct perf_record_event_update_scale *ev_data; + int err; + + ev = event_update_event__new(sizeof(*ev_data), PERF_EVENT_UPDATE__SCALE, evsel->core.id[0]); + if (ev == NULL) + return -ENOMEM; + + ev_data = (struct perf_record_event_update_scale *)ev->data; + ev_data->scale = evsel->scale; + err = process(tool, (union perf_event *)ev, NULL, NULL); + free(ev); + return err; +} + +int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evsel *evsel, + perf_event__handler_t process) +{ + struct perf_record_event_update *ev; + size_t len = strlen(evsel->name); + int err; + + ev = event_update_event__new(len + 1, PERF_EVENT_UPDATE__NAME, evsel->core.id[0]); + if (ev == NULL) + return -ENOMEM; + + strlcpy(ev->data, evsel->name, len + 1); + err = process(tool, (union perf_event *)ev, NULL, NULL); + free(ev); + return err; +} + +int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evsel *evsel, + perf_event__handler_t process) +{ + size_t size = sizeof(struct perf_record_event_update); + struct perf_record_event_update *ev; + int max, err; + u16 type; + + if (!evsel->core.own_cpus) + return 0; + + ev = cpu_map_data__alloc(evsel->core.own_cpus, &size, &type, &max); + if (!ev) + return -ENOMEM; + + ev->header.type = PERF_RECORD_EVENT_UPDATE; + ev->header.size = (u16)size; + ev->type = PERF_EVENT_UPDATE__CPUS; + ev->id = evsel->core.id[0]; + + cpu_map_data__synthesize((struct perf_record_cpu_map_data *)ev->data, + evsel->core.own_cpus, type, max); + + err = process(tool, (union perf_event *)ev, NULL, NULL); + free(ev); + return err; +} + +int perf_event__synthesize_attrs(struct perf_tool *tool, struct evlist *evlist, + perf_event__handler_t process) +{ + struct evsel *evsel; + int err = 0; + + evlist__for_each_entry(evlist, evsel) { + err = perf_event__synthesize_attr(tool, &evsel->core.attr, evsel->core.ids, + evsel->core.id, process); + if (err) { + pr_debug("failed to create perf header attribute\n"); + return err; + } + } + + return err; +} + +static bool has_unit(struct evsel *evsel) +{ + return evsel->unit && *evsel->unit; +} + +static bool has_scale(struct evsel *evsel) +{ + return evsel->scale != 1; +} + +int perf_event__synthesize_extra_attr(struct perf_tool *tool, struct evlist *evsel_list, + perf_event__handler_t process, bool is_pipe) +{ + struct evsel *evsel; + int err; + + /* + * Synthesize other events stuff not carried within + * attr event - unit, scale, name + */ + evlist__for_each_entry(evsel_list, evsel) { + if (!evsel->supported) + continue; + + /* + * Synthesize unit and scale only if it's defined. + */ + if (has_unit(evsel)) { + err = perf_event__synthesize_event_update_unit(tool, evsel, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel unit.\n"); + return err; + } + } + + if (has_scale(evsel)) { + err = perf_event__synthesize_event_update_scale(tool, evsel, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel evsel.\n"); + return err; + } + } + + if (evsel->core.own_cpus) { + err = perf_event__synthesize_event_update_cpus(tool, evsel, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel cpus.\n"); + return err; + } + } + + /* + * Name is needed only for pipe output, + * perf.data carries event names. + */ + if (is_pipe) { + err = perf_event__synthesize_event_update_name(tool, evsel, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel name.\n"); + return err; + } + } + } + return 0; +} + +int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr, + u32 ids, u64 *id, perf_event__handler_t process) +{ + union perf_event *ev; + size_t size; + int err; + + size = sizeof(struct perf_event_attr); + size = PERF_ALIGN(size, sizeof(u64)); + size += sizeof(struct perf_event_header); + size += ids * sizeof(u64); + + ev = zalloc(size); + + if (ev == NULL) + return -ENOMEM; + + ev->attr.attr = *attr; + memcpy(ev->attr.id, id, ids * sizeof(u64)); + + ev->attr.header.type = PERF_RECORD_HEADER_ATTR; + ev->attr.header.size = (u16)size; + + if (ev->attr.header.size == size) + err = process(tool, ev, NULL, NULL); + else + err = -E2BIG; + + free(ev); + + return err; +} + +int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist, + perf_event__handler_t process) +{ + union perf_event ev; + struct tracing_data *tdata; + ssize_t size = 0, aligned_size = 0, padding; + struct feat_fd ff; + + /* + * We are going to store the size of the data followed + * by the data contents. Since the fd descriptor is a pipe, + * we cannot seek back to store the size of the data once + * we know it. Instead we: + * + * - write the tracing data to the temp file + * - get/write the data size to pipe + * - write the tracing data from the temp file + * to the pipe + */ + tdata = tracing_data_get(&evlist->core.entries, fd, true); + if (!tdata) + return -1; + + memset(&ev, 0, sizeof(ev)); + + ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA; + size = tdata->size; + aligned_size = PERF_ALIGN(size, sizeof(u64)); + padding = aligned_size - size; + ev.tracing_data.header.size = sizeof(ev.tracing_data); + ev.tracing_data.size = aligned_size; + + process(tool, &ev, NULL, NULL); + + /* + * The put function will copy all the tracing data + * stored in temp file to the pipe. + */ + tracing_data_put(tdata); + + ff = (struct feat_fd){ .fd = fd }; + if (write_padded(&ff, NULL, 0, padding)) + return -1; + + return aligned_size; +} + +int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 misc, + perf_event__handler_t process, struct machine *machine) +{ + union perf_event ev; + size_t len; + + if (!pos->hit) + return 0; + + memset(&ev, 0, sizeof(ev)); + + len = pos->long_name_len + 1; + len = PERF_ALIGN(len, NAME_ALIGN); + memcpy(&ev.build_id.build_id, pos->build_id, sizeof(pos->build_id)); + ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID; + ev.build_id.header.misc = misc; + ev.build_id.pid = machine->pid; + ev.build_id.header.size = sizeof(ev.build_id) + len; + memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len); + + return process(tool, &ev, NULL, machine); +} + +int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool, + struct evlist *evlist, perf_event__handler_t process, bool attrs) +{ + int err; + + if (attrs) { + err = perf_event__synthesize_attrs(tool, evlist, process); + if (err < 0) { + pr_err("Couldn't synthesize attrs.\n"); + return err; + } + } + + err = perf_event__synthesize_extra_attr(tool, evlist, process, attrs); + err = perf_event__synthesize_thread_map2(tool, evlist->core.threads, process, NULL); + if (err < 0) { + pr_err("Couldn't synthesize thread map.\n"); + return err; + } + + err = perf_event__synthesize_cpu_map(tool, evlist->core.cpus, process, NULL); + if (err < 0) { + pr_err("Couldn't synthesize thread map.\n"); + return err; + } + + err = perf_event__synthesize_stat_config(tool, config, process, NULL); + if (err < 0) { + pr_err("Couldn't synthesize config.\n"); + return err; + } + + return 0; +} + +int __weak perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused, + struct perf_tool *tool __maybe_unused, + perf_event__handler_t process __maybe_unused, + struct machine *machine __maybe_unused) +{ + return 0; +} + +extern const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE]; + +int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session *session, + struct evlist *evlist, perf_event__handler_t process) +{ + struct perf_header *header = &session->header; + struct perf_record_header_feature *fe; + struct feat_fd ff; + size_t sz, sz_hdr; + int feat, ret; + + sz_hdr = sizeof(fe->header); + sz = sizeof(union perf_event); + /* get a nice alignment */ + sz = PERF_ALIGN(sz, page_size); + + memset(&ff, 0, sizeof(ff)); + + ff.buf = malloc(sz); + if (!ff.buf) + return -ENOMEM; + + ff.size = sz - sz_hdr; + ff.ph = &session->header; + + for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) { + if (!feat_ops[feat].synthesize) { + pr_debug("No record header feature for header :%d\n", feat); + continue; + } + + ff.offset = sizeof(*fe); + + ret = feat_ops[feat].write(&ff, evlist); + if (ret || ff.offset <= (ssize_t)sizeof(*fe)) { + pr_debug("Error writing feature\n"); + continue; + } + /* ff.buf may have changed due to realloc in do_write() */ + fe = ff.buf; + memset(fe, 0, sizeof(*fe)); + + fe->feat_id = feat; + fe->header.type = PERF_RECORD_HEADER_FEATURE; + fe->header.size = ff.offset; + + ret = process(tool, ff.buf, NULL, NULL); + if (ret) { + free(ff.buf); + return ret; + } + } + + /* Send HEADER_LAST_FEATURE mark. */ + fe = ff.buf; + fe->feat_id = HEADER_LAST_FEATURE; + fe->header.type = PERF_RECORD_HEADER_FEATURE; + fe->header.size = sizeof(*fe); + + ret = process(tool, ff.buf, NULL, NULL); + + free(ff.buf); + return ret; +} diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h new file mode 100644 index 000000000000..baead0cdc381 --- /dev/null +++ b/tools/perf/util/synthetic-events.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_SYNTHETIC_EVENTS_H +#define __PERF_SYNTHETIC_EVENTS_H + +#include +#include // pid_t +#include +#include + +struct auxtrace_record; +struct dso; +struct evlist; +struct evsel; +struct machine; +struct perf_counts_values; +struct perf_cpu_map; +struct perf_event_attr; +struct perf_event_mmap_page; +struct perf_sample; +struct perf_session; +struct perf_stat_config; +struct perf_thread_map; +struct perf_tool; +struct record_opts; +struct target; + +union perf_event; + +typedef int (*perf_event__handler_t)(struct perf_tool *tool, union perf_event *event, + struct perf_sample *sample, struct machine *machine); + +int perf_event__synthesize_attrs(struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process); +int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process); +int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 misc, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_cpu_map(struct perf_tool *tool, struct perf_cpu_map *cpus, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process); +int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process); +int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process); +int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process); +int perf_event__synthesize_extra_attr(struct perf_tool *tool, struct evlist *evsel_list, perf_event__handler_t process, bool is_pipe); +int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session *session, struct evlist *evlist, perf_event__handler_t process); +int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine); +int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine, bool mmap_data); +int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_namespaces(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, const struct perf_sample *sample); +int perf_event__synthesize_stat_config(struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs); +int perf_event__synthesize_stat_round(struct perf_tool *tool, u64 time, u64 type, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_stat(struct perf_tool *tool, u32 cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_thread_map2(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool mmap_data); +int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool mmap_data, unsigned int nr_threads_synthesize); +int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist, perf_event__handler_t process); +int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); +pid_t perf_event__synthesize_comm(struct perf_tool *tool, union perf_event *event, pid_t pid, perf_event__handler_t process, struct machine *machine); + +int perf_tool__process_synth_event(struct perf_tool *tool, union perf_event *event, struct machine *machine, perf_event__handler_t process); + +size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format); + +int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, + struct target *target, struct perf_thread_map *threads, + perf_event__handler_t process, bool data_mmap, + unsigned int nr_threads_synthesize); +int machine__synthesize_threads(struct machine *machine, struct target *target, + struct perf_thread_map *threads, bool data_mmap, + unsigned int nr_threads_synthesize); + +#ifdef HAVE_AUXTRACE_SUPPORT +int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, struct perf_tool *tool, + struct perf_session *session, perf_event__handler_t process); + +#else // HAVE_AUXTRACE_SUPPORT + +#include + +static inline int +perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr __maybe_unused, + struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + perf_event__handler_t process __maybe_unused) +{ + return -EINVAL; +} +#endif // HAVE_AUXTRACE_SUPPORT + +#ifdef HAVE_LIBBPF_SUPPORT +int perf_event__synthesize_bpf_events(struct perf_session *session, perf_event__handler_t process, + struct machine *machine, struct record_opts *opts); +#else // HAVE_LIBBPF_SUPPORT +static inline int perf_event__synthesize_bpf_events(struct perf_session *session __maybe_unused, + perf_event__handler_t process __maybe_unused, + struct machine *machine __maybe_unused, + struct record_opts *opts __maybe_unused) +{ + return 0; +} +#endif // HAVE_LIBBPF_SUPPORT + +#endif // __PERF_SYNTHETIC_EVENTS_H diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c index 565f7aef7e6c..a3db13dea937 100644 --- a/tools/perf/util/target.c +++ b/tools/perf/util/target.c @@ -6,8 +6,6 @@ */ #include "target.h" -#include "util.h" -#include "debug.h" #include #include diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index 51fb574998bb..3dce2de9d005 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -5,7 +5,6 @@ * Refactored from builtin-top.c, see that files for further copyright notes. */ -#include "cpumap.h" #include "event.h" #include "evlist.h" #include "evsel.h" @@ -72,7 +71,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) } if (top->evlist->core.nr_entries == 1) { - struct evsel *first = perf_evlist__first(top->evlist); + struct evsel *first = evlist__first(top->evlist); ret += SNPRINTF(bf + ret, size - ret, "%" PRIu64 "%s ", (uint64_t)first->core.attr.sample_period, opts->freq ? "Hz" : ""); diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index d63d542b2cde..086e98ff42a3 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -2,7 +2,6 @@ /* * Copyright (C) 2008,2009, Steven Rostedt */ -#include "util.h" #include #include #include @@ -19,6 +18,7 @@ #include #include #include +#include // page_size #include "trace-event.h" #include diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index b6c0db068be0..8593d3c200c6 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -15,7 +15,6 @@ #include #include -#include "util.h" #include "trace-event.h" #include "debug.h" diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c index 01b9d89bf5bf..b3ee651e3d91 100644 --- a/tools/perf/util/trace-event.c +++ b/tools/perf/util/trace-event.c @@ -14,7 +14,6 @@ #include #include "trace-event.h" #include "machine.h" -#include "util.h" /* * global trace_event object used by trace_event__tp_format diff --git a/tools/perf/util/tsc.h b/tools/perf/util/tsc.h index e0c3af34ac8d..3c5a632ee57c 100644 --- a/tools/perf/util/tsc.h +++ b/tools/perf/util/tsc.h @@ -4,13 +4,12 @@ #include -#include "event.h" - struct perf_tsc_conversion { u16 time_shift; u32 time_mult; u64 time_zero; }; + struct perf_event_mmap_page; int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, @@ -20,13 +19,4 @@ u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc); u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc); u64 rdtsc(void); -struct perf_event_mmap_page; -struct perf_tool; -struct machine; - -int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, - struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine); - -#endif +#endif // __PERF_TSC_H diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 9ece188ae48a..15f6e46d7124 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -17,7 +17,6 @@ #include "event.h" #include "perf_regs.h" #include "callchain.h" -#include "util.h" static char *debuginfo_path; diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index ebdbb056510c..1800887b2255 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -37,7 +37,6 @@ #include "unwind.h" #include "map.h" #include "symbol.h" -#include "util.h" #include "debug.h" #include "asm/bug.h" #include "dso.h" diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c index 3949a60b00ae..196438ee4c9d 100644 --- a/tools/perf/util/usage.c +++ b/tools/perf/util/usage.c @@ -8,7 +8,6 @@ * Copyright (C) Linus Torvalds, 2005 */ #include "util.h" -#include "debug.h" #include #include #include diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 32322a20a68b..5eda6e19c947 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -2,9 +2,7 @@ #include "util.h" #include "debug.h" #include "event.h" -#include "namespaces.h" #include -#include #include #include #include @@ -41,8 +39,6 @@ void perf_set_multithreaded(void) perf_singlethreaded = false; } -unsigned int page_size; - int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH; int sysctl_perf_event_max_contexts_per_stack = PERF_MAX_CONTEXTS_PER_STACK; @@ -234,138 +230,6 @@ out: return list; } -static int slow_copyfile(const char *from, const char *to, struct nsinfo *nsi) -{ - int err = -1; - char *line = NULL; - size_t n; - FILE *from_fp, *to_fp; - struct nscookie nsc; - - nsinfo__mountns_enter(nsi, &nsc); - from_fp = fopen(from, "r"); - nsinfo__mountns_exit(&nsc); - if (from_fp == NULL) - goto out; - - to_fp = fopen(to, "w"); - if (to_fp == NULL) - goto out_fclose_from; - - while (getline(&line, &n, from_fp) > 0) - if (fputs(line, to_fp) == EOF) - goto out_fclose_to; - err = 0; -out_fclose_to: - fclose(to_fp); - free(line); -out_fclose_from: - fclose(from_fp); -out: - return err; -} - -int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size) -{ - void *ptr; - loff_t pgoff; - - pgoff = off_in & ~(page_size - 1); - off_in -= pgoff; - - ptr = mmap(NULL, off_in + size, PROT_READ, MAP_PRIVATE, ifd, pgoff); - if (ptr == MAP_FAILED) - return -1; - - while (size) { - ssize_t ret = pwrite(ofd, ptr + off_in, size, off_out); - if (ret < 0 && errno == EINTR) - continue; - if (ret <= 0) - break; - - size -= ret; - off_in += ret; - off_out += ret; - } - munmap(ptr, off_in + size); - - return size ? -1 : 0; -} - -static int copyfile_mode_ns(const char *from, const char *to, mode_t mode, - struct nsinfo *nsi) -{ - int fromfd, tofd; - struct stat st; - int err; - char *tmp = NULL, *ptr = NULL; - struct nscookie nsc; - - nsinfo__mountns_enter(nsi, &nsc); - err = stat(from, &st); - nsinfo__mountns_exit(&nsc); - if (err) - goto out; - err = -1; - - /* extra 'x' at the end is to reserve space for '.' */ - if (asprintf(&tmp, "%s.XXXXXXx", to) < 0) { - tmp = NULL; - goto out; - } - ptr = strrchr(tmp, '/'); - if (!ptr) - goto out; - ptr = memmove(ptr + 1, ptr, strlen(ptr) - 1); - *ptr = '.'; - - tofd = mkstemp(tmp); - if (tofd < 0) - goto out; - - if (fchmod(tofd, mode)) - goto out_close_to; - - if (st.st_size == 0) { /* /proc? do it slowly... */ - err = slow_copyfile(from, tmp, nsi); - goto out_close_to; - } - - nsinfo__mountns_enter(nsi, &nsc); - fromfd = open(from, O_RDONLY); - nsinfo__mountns_exit(&nsc); - if (fromfd < 0) - goto out_close_to; - - err = copyfile_offset(fromfd, 0, tofd, 0, st.st_size); - - close(fromfd); -out_close_to: - close(tofd); - if (!err) - err = link(tmp, to); - unlink(tmp); -out: - free(tmp); - return err; -} - -int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi) -{ - return copyfile_mode_ns(from, to, 0755, nsi); -} - -int copyfile_mode(const char *from, const char *to, mode_t mode) -{ - return copyfile_mode_ns(from, to, mode, NULL); -} - -int copyfile(const char *from, const char *to) -{ - return copyfile_mode(from, to, 0755); -} - size_t hex_width(u64 v) { size_t n = 1; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 45a5c6f20197..9969b8b46f7c 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -11,14 +11,12 @@ #include #include #include -#include /* General helper functions */ void usage(const char *err) __noreturn; void die(const char *err, ...) __noreturn __printf(1, 2); struct dirent; -struct nsinfo; struct strlist; int mkdir_p(char *path, mode_t mode); @@ -26,15 +24,9 @@ int rm_rf(const char *path); int rm_rf_perf_data(const char *path); struct strlist *lsdir(const char *name, bool (*filter)(const char *, struct dirent *)); bool lsdir_no_dot_filter(const char *name, struct dirent *d); -int copyfile(const char *from, const char *to); -int copyfile_mode(const char *from, const char *to, mode_t mode); -int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi); -int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size); size_t hex_width(u64 v); -extern unsigned int page_size; - int sysctl__max_stack(void); int fetch_kernel_version(unsigned int *puint, diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index e5e6599603f4..ba4b4395f35d 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -11,7 +11,7 @@ #include "vdso.h" #include "dso.h" -#include "util.h" +#include #include "map.h" #include "symbol.h" #include "machine.h" diff --git a/tools/perf/util/zlib.c b/tools/perf/util/zlib.c index 59d456f716e9..78d2297c1b67 100644 --- a/tools/perf/util/zlib.c +++ b/tools/perf/util/zlib.c @@ -7,11 +7,9 @@ #include #include #include +#include #include "util/compress.h" -#include "util/util.h" -#include "util/debug.h" - #define CHUNK_SIZE 16384 diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 25b43a8c2b15..c3feccb99ff5 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -126,9 +126,9 @@ endif # in the default INSTALL_HDR_PATH usr/include. khdr: ifeq (1,$(DEFAULT_INSTALL_HDR_PATH)) - make --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install + $(MAKE) --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install else - make --no-builtin-rules INSTALL_HDR_PATH=$$BUILD/usr \ + $(MAKE) --no-builtin-rules INSTALL_HDR_PATH=$$BUILD/usr \ ARCH=$(ARCH) -C $(top_srcdir) headers_install endif @@ -136,35 +136,35 @@ all: khdr @for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ mkdir $$BUILD_TARGET -p; \ - make OUTPUT=$$BUILD_TARGET -C $$TARGET;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET;\ done; run_tests: all @for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ - make OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\ done; hotplug: @for TARGET in $(TARGETS_HOTPLUG); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ - make OUTPUT=$$BUILD_TARGET -C $$TARGET;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET;\ done; run_hotplug: hotplug @for TARGET in $(TARGETS_HOTPLUG); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ - make OUTPUT=$$BUILD_TARGET -C $$TARGET run_full_test;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_full_test;\ done; clean_hotplug: @for TARGET in $(TARGETS_HOTPLUG); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ - make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\ done; run_pstore_crash: - make -C pstore run_crash + $(MAKE) -C pstore run_crash # Use $BUILD as the default install root. $BUILD points to the # right output location for the following cases: @@ -184,7 +184,7 @@ ifdef INSTALL_PATH install -m 744 kselftest/prefix.pl $(INSTALL_PATH)/kselftest/ @for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ - make OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \ done; @# Ask all targets to emit their test scripts @@ -203,7 +203,7 @@ ifdef INSTALL_PATH echo "[ -w /dev/kmsg ] && echo \"kselftest: Running tests in $$TARGET\" >> /dev/kmsg" >> $(ALL_SCRIPT); \ echo "cd $$TARGET" >> $(ALL_SCRIPT); \ echo -n "run_many" >> $(ALL_SCRIPT); \ - make -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \ + $(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \ echo "" >> $(ALL_SCRIPT); \ echo "cd \$$ROOT" >> $(ALL_SCRIPT); \ done; @@ -216,7 +216,7 @@ endif clean: @for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ - make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\ done; .PHONY: khdr all run_tests hotplug run_hotplug clean_hotplug run_pstore_crash install clean diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc index 7717c0a09686..ac738500d17f 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc @@ -28,7 +28,7 @@ if [ -z "$FEATURE" ]; then exit_unsupported fi -echo "Test snapshot tigger" +echo "Test snapshot trigger" echo 0 > snapshot echo 1 > events/sched/sched_process_fork/enable ( echo "forked") diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index dc3346e090f5..5614222a6628 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -19,8 +19,6 @@ #include "kvm_util.h" #include "processor.h" -#define DEBUG printf - #define VCPU_ID 1 /* The memory slot index to track dirty pages */ @@ -249,14 +247,12 @@ static void vm_dirty_log_verify(unsigned long *bmap) } static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, - uint64_t extra_mem_pages, void *guest_code, - unsigned long type) + uint64_t extra_mem_pages, void *guest_code) { struct kvm_vm *vm; uint64_t extra_pg_pages = extra_mem_pages / 512 * 2; - vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, - O_RDWR, type); + vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); kvm_vm_elf_load(vm, program_invocation_name, 0, 0); #ifdef __x86_64__ vm_create_irqchip(vm); @@ -265,67 +261,35 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, return vm; } +#define DIRTY_MEM_BITS 30 /* 1G */ +#define PAGE_SHIFT_4K 12 + static void run_test(enum vm_guest_mode mode, unsigned long iterations, unsigned long interval, uint64_t phys_offset) { - unsigned int guest_pa_bits, guest_page_shift; pthread_t vcpu_thread; struct kvm_vm *vm; - uint64_t max_gfn; unsigned long *bmap; - unsigned long type = 0; - switch (mode) { - case VM_MODE_P52V48_4K: - guest_pa_bits = 52; - guest_page_shift = 12; - break; - case VM_MODE_P52V48_64K: - guest_pa_bits = 52; - guest_page_shift = 16; - break; - case VM_MODE_P48V48_4K: - guest_pa_bits = 48; - guest_page_shift = 12; - break; - case VM_MODE_P48V48_64K: - guest_pa_bits = 48; - guest_page_shift = 16; - break; - case VM_MODE_P40V48_4K: - guest_pa_bits = 40; - guest_page_shift = 12; - break; - case VM_MODE_P40V48_64K: - guest_pa_bits = 40; - guest_page_shift = 16; - break; - default: - TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode); - } - - DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - -#ifdef __x86_64__ /* - * FIXME - * The x86_64 kvm selftests framework currently only supports a - * single PML4 which restricts the number of physical address - * bits we can change to 39. + * We reserve page table for 2 times of extra dirty mem which + * will definitely cover the original (1G+) test range. Here + * we do the calculation with 4K page size which is the + * smallest so the page number will be enough for all archs + * (e.g., 64K page size guest will need even less memory for + * page tables). */ - guest_pa_bits = 39; -#endif -#ifdef __aarch64__ - if (guest_pa_bits != 40) - type = KVM_VM_TYPE_ARM_IPA_SIZE(guest_pa_bits); -#endif - max_gfn = (1ul << (guest_pa_bits - guest_page_shift)) - 1; - guest_page_size = (1ul << guest_page_shift); + vm = create_vm(mode, VCPU_ID, + 2ul << (DIRTY_MEM_BITS - PAGE_SHIFT_4K), + guest_code); + + guest_page_size = vm_get_page_size(vm); /* * A little more than 1G of guest page sized pages. Cover the * case where the size is not aligned to 64 pages. */ - guest_num_pages = (1ul << (30 - guest_page_shift)) + 16; + guest_num_pages = (1ul << (DIRTY_MEM_BITS - + vm_get_page_shift(vm))) + 16; #ifdef __s390x__ /* Round up to multiple of 1M (segment size) */ guest_num_pages = (guest_num_pages + 0xff) & ~0xffUL; @@ -335,7 +299,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, !!((guest_num_pages * guest_page_size) % host_page_size); if (!phys_offset) { - guest_test_phys_mem = (max_gfn - guest_num_pages) * guest_page_size; + guest_test_phys_mem = (vm_get_max_gfn(vm) - + guest_num_pages) * guest_page_size; guest_test_phys_mem &= ~(host_page_size - 1); } else { guest_test_phys_mem = phys_offset; @@ -351,8 +316,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, bmap = bitmap_alloc(host_num_pages); host_bmap_track = bitmap_alloc(host_num_pages); - vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code, type); - #ifdef USE_CLEAR_DIRTY_LOG struct kvm_enable_cap cap = {}; @@ -482,7 +445,7 @@ int main(int argc, char *argv[]) #endif #ifdef __x86_64__ - vm_guest_mode_params_init(VM_MODE_P52V48_4K, true, true); + vm_guest_mode_params_init(VM_MODE_PXXV48_4K, true, true); #endif #ifdef __aarch64__ vm_guest_mode_params_init(VM_MODE_P40V48_4K, true, true); diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 5463b7896a0a..29cccaf96baf 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -24,6 +24,12 @@ struct kvm_vm; typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ +#ifndef NDEBUG +#define DEBUG(...) printf(__VA_ARGS__); +#else +#define DEBUG(...) +#endif + /* Minimum allocated guest virtual and physical addresses */ #define KVM_UTIL_MIN_VADDR 0x2000 @@ -38,11 +44,14 @@ enum vm_guest_mode { VM_MODE_P48V48_64K, VM_MODE_P40V48_4K, VM_MODE_P40V48_64K, + VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */ NUM_VM_MODES, }; -#ifdef __aarch64__ +#if defined(__aarch64__) #define VM_MODE_DEFAULT VM_MODE_P40V48_4K +#elif defined(__x86_64__) +#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K #else #define VM_MODE_DEFAULT VM_MODE_P52V48_4K #endif @@ -60,8 +69,7 @@ int kvm_check_cap(long cap); int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap); struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); -struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, - int perm, unsigned long type); +struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); void kvm_vm_free(struct kvm_vm *vmp); void kvm_vm_restart(struct kvm_vm *vmp, int perm); void kvm_vm_release(struct kvm_vm *vmp); @@ -146,6 +154,10 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); bool vm_is_unrestricted_guest(struct kvm_vm *vm); +unsigned int vm_get_page_size(struct kvm_vm *vm); +unsigned int vm_get_page_shift(struct kvm_vm *vm); +unsigned int vm_get_max_gfn(struct kvm_vm *vm); + struct kvm_userspace_memory_region * kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end); diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 80d19740d2dc..0c17f2ee685e 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -325,6 +325,9 @@ uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index); void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, uint64_t msr_value); +uint32_t kvm_get_cpuid_max(void); +void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); + /* * Basic CPU control in CR0 */ diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 486400a97374..86036a59a668 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -264,6 +264,9 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini case VM_MODE_P52V48_4K: TEST_ASSERT(false, "AArch64 does not support 4K sized pages " "with 52-bit physical address ranges"); + case VM_MODE_PXXV48_4K: + TEST_ASSERT(false, "AArch64 does not support 4K sized pages " + "with ANY-bit physical address ranges"); case VM_MODE_P52V48_64K: tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 6e49bb039376..80a338b5403c 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -8,6 +8,7 @@ #include "test_util.h" #include "kvm_util.h" #include "kvm_util_internal.h" +#include "processor.h" #include #include @@ -84,7 +85,7 @@ int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap) return ret; } -static void vm_open(struct kvm_vm *vm, int perm, unsigned long type) +static void vm_open(struct kvm_vm *vm, int perm) { vm->kvm_fd = open(KVM_DEV_PATH, perm); if (vm->kvm_fd < 0) @@ -95,18 +96,19 @@ static void vm_open(struct kvm_vm *vm, int perm, unsigned long type) exit(KSFT_SKIP); } - vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, type); + vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, vm->type); TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, " "rc: %i errno: %i", vm->fd, errno); } const char * const vm_guest_mode_string[] = { - "PA-bits:52, VA-bits:48, 4K pages", - "PA-bits:52, VA-bits:48, 64K pages", - "PA-bits:48, VA-bits:48, 4K pages", - "PA-bits:48, VA-bits:48, 64K pages", - "PA-bits:40, VA-bits:48, 4K pages", - "PA-bits:40, VA-bits:48, 64K pages", + "PA-bits:52, VA-bits:48, 4K pages", + "PA-bits:52, VA-bits:48, 64K pages", + "PA-bits:48, VA-bits:48, 4K pages", + "PA-bits:48, VA-bits:48, 64K pages", + "PA-bits:40, VA-bits:48, 4K pages", + "PA-bits:40, VA-bits:48, 64K pages", + "PA-bits:ANY, VA-bits:48, 4K pages", }; _Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES, "Missing new mode strings?"); @@ -130,17 +132,17 @@ _Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES, * descriptor to control the created VM is created with the permissions * given by perm (e.g. O_RDWR). */ -struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, - int perm, unsigned long type) +struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) { struct kvm_vm *vm; + DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + vm = calloc(1, sizeof(*vm)); TEST_ASSERT(vm != NULL, "Insufficient Memory"); vm->mode = mode; - vm->type = type; - vm_open(vm, perm, type); + vm->type = 0; /* Setup mode specific traits. */ switch (vm->mode) { @@ -186,10 +188,32 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, vm->page_size = 0x10000; vm->page_shift = 16; break; + case VM_MODE_PXXV48_4K: +#ifdef __x86_64__ + kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits); + TEST_ASSERT(vm->va_bits == 48, "Linear address width " + "(%d bits) not supported", vm->va_bits); + vm->pgtable_levels = 4; + vm->page_size = 0x1000; + vm->page_shift = 12; + DEBUG("Guest physical address width detected: %d\n", + vm->pa_bits); +#else + TEST_ASSERT(false, "VM_MODE_PXXV48_4K not supported on " + "non-x86 platforms"); +#endif + break; default: TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode); } +#ifdef __aarch64__ + if (vm->pa_bits != 40) + vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits); +#endif + + vm_open(vm, perm); + /* Limit to VA-bit canonical virtual addresses. */ vm->vpages_valid = sparsebit_alloc(); sparsebit_set_num(vm->vpages_valid, @@ -212,7 +236,7 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) { - return _vm_create(mode, phy_pages, perm, 0); + return _vm_create(mode, phy_pages, perm); } /* @@ -232,7 +256,7 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm) { struct userspace_mem_region *region; - vm_open(vmp, perm, vmp->type); + vm_open(vmp, perm); if (vmp->has_irqchip) vm_create_irqchip(vmp); @@ -1628,3 +1652,18 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm) return val == 'Y'; } + +unsigned int vm_get_page_size(struct kvm_vm *vm) +{ + return vm->page_size; +} + +unsigned int vm_get_page_shift(struct kvm_vm *vm) +{ + return vm->page_shift; +} + +unsigned int vm_get_max_gfn(struct kvm_vm *vm) +{ + return vm->max_gfn; +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 0a5e487dbc50..c53dbc6bc568 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -228,7 +228,7 @@ void sregs_dump(FILE *stream, struct kvm_sregs *sregs, void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) { - TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); /* If needed, create page map l4 table. */ @@ -261,7 +261,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, uint16_t index[4]; struct pageMapL4Entry *pml4e; - TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); TEST_ASSERT((vaddr % vm->page_size) == 0, @@ -547,7 +547,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) struct pageDirectoryEntry *pde; struct pageTableEntry *pte; - TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); index[0] = (gva >> 12) & 0x1ffu; @@ -621,7 +621,7 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot); switch (vm->mode) { - case VM_MODE_P52V48_4K: + case VM_MODE_PXXV48_4K: sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); @@ -1157,3 +1157,25 @@ bool is_intel_cpu(void) chunk = (const uint32_t *)("GenuineIntel"); return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]); } + +uint32_t kvm_get_cpuid_max(void) +{ + return kvm_get_supported_cpuid_entry(0x80000000)->eax; +} + +void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) +{ + struct kvm_cpuid_entry2 *entry; + bool pae; + + /* SDM 4.1.4 */ + if (kvm_get_cpuid_max() < 0x80000008) { + pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6); + *pa_bits = pae ? 36 : 32; + *va_bits = 32; + } else { + entry = kvm_get_supported_cpuid_entry(0x80000008); + *pa_bits = entry->eax & 0xff; + *va_bits = (entry->eax >> 8) & 0xff; + } +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c index 4bfc9a90b1de..da4d89ad5419 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c @@ -32,7 +32,7 @@ void ucall(uint64_t cmd, int nargs, ...) va_end(va); asm volatile("in %[port], %%al" - : : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax"); + : : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax", "memory"); } uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c index ee59831fbc98..443a2b54645b 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c @@ -26,6 +26,25 @@ static void guest_code(void) { } +static int smt_possible(void) +{ + char buf[16]; + FILE *f; + bool res = 1; + + f = fopen("/sys/devices/system/cpu/smt/control", "r"); + if (f) { + if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) { + if (!strncmp(buf, "forceoff", 8) || + !strncmp(buf, "notsupported", 12)) + res = 0; + } + fclose(f); + } + + return res; +} + static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, int evmcs_enabled) { @@ -59,6 +78,14 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, TEST_ASSERT(!entry->padding[0] && !entry->padding[1] && !entry->padding[2], "padding should be zero"); + if (entry->function == 0x40000004) { + int nononarchcs = !!(entry->eax & (1UL << 18)); + + TEST_ASSERT(nononarchcs == !smt_possible(), + "NoNonArchitecturalCoreSharing bit" + " doesn't reflect SMT setting"); + } + /* * If needed for debug: * fprintf(stdout, diff --git a/tools/testing/selftests/livepatch/config b/tools/testing/selftests/livepatch/config index 0dd7700464a8..ad23100cb27c 100644 --- a/tools/testing/selftests/livepatch/config +++ b/tools/testing/selftests/livepatch/config @@ -1 +1,3 @@ +CONFIG_LIVEPATCH=y +CONFIG_DYNAMIC_DEBUG=y CONFIG_TEST_LIVEPATCH=m diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 6ef7f16c4cf5..7f8b5c8982e3 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -199,6 +199,11 @@ struct seccomp_notif_sizes { }; #endif +#ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY +#define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 +#define PTRACE_EVENTMSG_SYSCALL_EXIT 2 +#endif + #ifndef seccomp int seccomp(unsigned int op, unsigned int flags, void *args) { diff --git a/tools/testing/selftests/tpm2/Makefile b/tools/testing/selftests/tpm2/Makefile index bf401f725eef..1a5db1eb8ed5 100644 --- a/tools/testing/selftests/tpm2/Makefile +++ b/tools/testing/selftests/tpm2/Makefile @@ -2,4 +2,4 @@ include ../lib.mk TEST_PROGS := test_smoke.sh test_space.sh -TEST_FILES := tpm2.py tpm2_tests.py +TEST_PROGS_EXTENDED := tpm2.py tpm2_tests.py diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c index c2333c78cf04..afff120c7be6 100644 --- a/tools/testing/selftests/watchdog/watchdog-test.c +++ b/tools/testing/selftests/watchdog/watchdog-test.c @@ -19,7 +19,7 @@ int fd; const char v = 'V'; -static const char sopts[] = "bdehp:t:Tn:NL"; +static const char sopts[] = "bdehp:t:Tn:NLf:"; static const struct option lopts[] = { {"bootstatus", no_argument, NULL, 'b'}, {"disable", no_argument, NULL, 'd'}, @@ -31,6 +31,7 @@ static const struct option lopts[] = { {"pretimeout", required_argument, NULL, 'n'}, {"getpretimeout", no_argument, NULL, 'N'}, {"gettimeleft", no_argument, NULL, 'L'}, + {"file", required_argument, NULL, 'f'}, {NULL, no_argument, NULL, 0x0} }; @@ -69,16 +70,19 @@ static void term(int sig) static void usage(char *progname) { printf("Usage: %s [options]\n", progname); - printf(" -b, --bootstatus Get last boot status (Watchdog/POR)\n"); - printf(" -d, --disable Turn off the watchdog timer\n"); - printf(" -e, --enable Turn on the watchdog timer\n"); - printf(" -h, --help Print the help message\n"); - printf(" -p, --pingrate=P Set ping rate to P seconds (default %d)\n", DEFAULT_PING_RATE); - printf(" -t, --timeout=T Set timeout to T seconds\n"); - printf(" -T, --gettimeout Get the timeout\n"); - printf(" -n, --pretimeout=T Set the pretimeout to T seconds\n"); - printf(" -N, --getpretimeout Get the pretimeout\n"); - printf(" -L, --gettimeleft Get the time left until timer expires\n"); + printf(" -f, --file\t\tOpen watchdog device file\n"); + printf("\t\t\tDefault is /dev/watchdog\n"); + printf(" -b, --bootstatus\tGet last boot status (Watchdog/POR)\n"); + printf(" -d, --disable\t\tTurn off the watchdog timer\n"); + printf(" -e, --enable\t\tTurn on the watchdog timer\n"); + printf(" -h, --help\t\tPrint the help message\n"); + printf(" -p, --pingrate=P\tSet ping rate to P seconds (default %d)\n", + DEFAULT_PING_RATE); + printf(" -t, --timeout=T\tSet timeout to T seconds\n"); + printf(" -T, --gettimeout\tGet the timeout\n"); + printf(" -n, --pretimeout=T\tSet the pretimeout to T seconds\n"); + printf(" -N, --getpretimeout\tGet the pretimeout\n"); + printf(" -L, --gettimeleft\tGet the time left until timer expires\n"); printf("\n"); printf("Parameters are parsed left-to-right in real-time.\n"); printf("Example: %s -d -t 10 -p 5 -e\n", progname); @@ -92,14 +96,20 @@ int main(int argc, char *argv[]) int ret; int c; int oneshot = 0; + char *file = "/dev/watchdog"; setbuf(stdout, NULL); - fd = open("/dev/watchdog", O_WRONLY); + while ((c = getopt_long(argc, argv, sopts, lopts, NULL)) != -1) { + if (c == 'f') + file = optarg; + } + + fd = open(file, O_WRONLY); if (fd == -1) { if (errno == ENOENT) - printf("Watchdog device not enabled.\n"); + printf("Watchdog device (%s) not found.\n", file); else if (errno == EACCES) printf("Run watchdog as root.\n"); else @@ -108,6 +118,8 @@ int main(int argc, char *argv[]) exit(-1); } + optind = 0; + while ((c = getopt_long(argc, argv, sopts, lopts, NULL)) != -1) { switch (c) { case 'b': @@ -190,6 +202,9 @@ int main(int argc, char *argv[]) else printf("WDIOC_GETTIMELEFT error '%s'\n", strerror(errno)); break; + case 'f': + /* Handled above */ + break; default: usage(argv[0]);