From 2145bb687e3fdb128066872fd952f4563d89177e Mon Sep 17 00:00:00 2001 From: Daire McNamara Date: Thu, 16 Dec 2021 14:00:21 +0000 Subject: [PATCH 001/380] dt-bindings: clk: microchip: Add Microchip PolarFire host binding Add device tree bindings for the Microchip PolarFire system clock controller Signed-off-by: Daire McNamara Signed-off-by: Conor Dooley Reviewed-by: Geert Uytterhoeven Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20211216140022.16146-2-conor.dooley@microchip.com Signed-off-by: Stephen Boyd --- .../bindings/clock/microchip,mpfs.yaml | 58 +++++++++++++++++++ .../dt-bindings/clock/microchip,mpfs-clock.h | 45 ++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/microchip,mpfs.yaml create mode 100644 include/dt-bindings/clock/microchip,mpfs-clock.h diff --git a/Documentation/devicetree/bindings/clock/microchip,mpfs.yaml b/Documentation/devicetree/bindings/clock/microchip,mpfs.yaml new file mode 100644 index 000000000000..0c15afa2214c --- /dev/null +++ b/Documentation/devicetree/bindings/clock/microchip,mpfs.yaml @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/microchip,mpfs.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Microchip PolarFire Clock Control Module Binding + +maintainers: + - Daire McNamara + +description: | + Microchip PolarFire clock control (CLKCFG) is an integrated clock controller, + which gates and enables all peripheral clocks. + + This device tree binding describes 33 gate clocks. Clocks are referenced by + user nodes by the CLKCFG node phandle and the clock index in the group, from + 0 to 32. + +properties: + compatible: + const: microchip,mpfs-clkcfg + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + '#clock-cells': + const: 1 + description: | + The clock consumer should specify the desired clock by having the clock + ID in its "clocks" phandle cell. See include/dt-bindings/clock/microchip,mpfs-clock.h + for the full list of PolarFire clock IDs. + +required: + - compatible + - reg + - clocks + - '#clock-cells' + +additionalProperties: false + +examples: + # Clock Config node: + - | + #include + soc { + #address-cells = <2>; + #size-cells = <2>; + clkcfg: clock-controller@20002000 { + compatible = "microchip,mpfs-clkcfg"; + reg = <0x0 0x20002000 0x0 0x1000>; + clocks = <&ref>; + #clock-cells = <1>; + }; + }; diff --git a/include/dt-bindings/clock/microchip,mpfs-clock.h b/include/dt-bindings/clock/microchip,mpfs-clock.h new file mode 100644 index 000000000000..73f2a9324857 --- /dev/null +++ b/include/dt-bindings/clock/microchip,mpfs-clock.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Daire McNamara, + * Copyright (C) 2020 Microchip Technology Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_MICROCHIP_MPFS_H_ +#define _DT_BINDINGS_CLK_MICROCHIP_MPFS_H_ + +#define CLK_CPU 0 +#define CLK_AXI 1 +#define CLK_AHB 2 + +#define CLK_ENVM 3 +#define CLK_MAC0 4 +#define CLK_MAC1 5 +#define CLK_MMC 6 +#define CLK_TIMER 7 +#define CLK_MMUART0 8 +#define CLK_MMUART1 9 +#define CLK_MMUART2 10 +#define CLK_MMUART3 11 +#define CLK_MMUART4 12 +#define CLK_SPI0 13 +#define CLK_SPI1 14 +#define CLK_I2C0 15 +#define CLK_I2C1 16 +#define CLK_CAN0 17 +#define CLK_CAN1 18 +#define CLK_USB 19 +#define CLK_RESERVED 20 +#define CLK_RTC 21 +#define CLK_QSPI 22 +#define CLK_GPIO0 23 +#define CLK_GPIO1 24 +#define CLK_GPIO2 25 +#define CLK_DDRC 26 +#define CLK_FIC0 27 +#define CLK_FIC1 28 +#define CLK_FIC2 29 +#define CLK_FIC3 30 +#define CLK_ATHENA 31 +#define CLK_CFM 32 + +#endif /* _DT_BINDINGS_CLK_MICROCHIP_MPFS_H_ */ From 36d014d37d59065087e51b8381e37993f1ca99bc Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Tue, 25 Jan 2022 18:56:51 -0300 Subject: [PATCH 002/380] KVM: PPC: Book3S HV: Stop returning internal values to userspace Our kvm_arch_vcpu_ioctl_run currently returns the RESUME_HOST values to userspace, against the API of the KVM_RUN ioctl which returns 0 on success. Signed-off-by: Fabiano Rosas Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220125215655.1026224-2-farosas@linux.ibm.com --- arch/powerpc/kvm/powerpc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 2ad0ccd202d5..50414fb2a5ea 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1841,6 +1841,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) #ifdef CONFIG_ALTIVEC out: #endif + + /* + * We're already returning to userspace, don't pass the + * RESUME_HOST flags along. + */ + if (r > 0) + r = 0; + vcpu_put(vcpu); return r; } From b99234b918c6e36b9aa0a5b2981e86b6bd11f8e2 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Tue, 25 Jan 2022 18:56:52 -0300 Subject: [PATCH 003/380] KVM: PPC: Fix vmx/vsx mixup in mmio emulation The MMIO emulation code for vector instructions is duplicated between VSX and VMX. When emulating VMX we should check the VMX copy size instead of the VSX one. Fixes: acc9eb9305fe ("KVM: PPC: Reimplement LOAD_VMX/STORE_VMX instruction ...") Signed-off-by: Fabiano Rosas Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220125215655.1026224-3-farosas@linux.ibm.com --- arch/powerpc/kvm/powerpc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 50414fb2a5ea..c2bd29e90314 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1499,7 +1499,7 @@ int kvmppc_handle_vmx_load(struct kvm_vcpu *vcpu, { enum emulation_result emulated = EMULATE_DONE; - if (vcpu->arch.mmio_vsx_copy_nums > 2) + if (vcpu->arch.mmio_vmx_copy_nums > 2) return EMULATE_FAIL; while (vcpu->arch.mmio_vmx_copy_nums) { @@ -1596,7 +1596,7 @@ int kvmppc_handle_vmx_store(struct kvm_vcpu *vcpu, unsigned int index = rs & KVM_MMIO_REG_MASK; enum emulation_result emulated = EMULATE_DONE; - if (vcpu->arch.mmio_vsx_copy_nums > 2) + if (vcpu->arch.mmio_vmx_copy_nums > 2) return EMULATE_FAIL; vcpu->arch.io_gpr = rs; From 3f831504482ab0d0d53d1966987959d1485345cc Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Tue, 25 Jan 2022 18:56:53 -0300 Subject: [PATCH 004/380] KVM: PPC: mmio: Reject instructions that access more than mmio.data size The MMIO interface between the kernel and userspace uses a structure that supports a maximum of 8-bytes of data. Instructions that access more than that need to be emulated in parts. We currently don't have generic support for splitting the emulation in parts and each set of instructions needs to be explicitly included. There's already an error message being printed when a load or store exceeds the mmio.data buffer but we don't fail the emulation until later at kvmppc_complete_mmio_load and even then we allow userspace to make a partial copy of the data, which ends up overwriting some fields of the mmio structure. This patch makes the emulation fail earlier at kvmppc_handle_load|store, which will send a Program interrupt to the guest. This is better than allowing the guest to proceed with partial data. Note that this was caught in a somewhat artificial scenario using quadword instructions (lq/stq), there's no account of an actual guest in the wild running instructions that are not properly emulated. (While here, remove the "bad MMIO" messages. The caller already has an error message.) Signed-off-by: Fabiano Rosas Reviewed-by: Alexey Kardashevskiy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220125215655.1026224-4-farosas@linux.ibm.com --- arch/powerpc/kvm/powerpc.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index c2bd29e90314..27fb2b70f631 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1114,10 +1114,8 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu) struct kvm_run *run = vcpu->run; u64 gpr; - if (run->mmio.len > sizeof(gpr)) { - printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); + if (run->mmio.len > sizeof(gpr)) return; - } if (!vcpu->arch.mmio_host_swabbed) { switch (run->mmio.len) { @@ -1236,10 +1234,8 @@ static int __kvmppc_handle_load(struct kvm_vcpu *vcpu, host_swabbed = !is_default_endian; } - if (bytes > sizeof(run->mmio.data)) { - printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, - run->mmio.len); - } + if (bytes > sizeof(run->mmio.data)) + return EMULATE_FAIL; run->mmio.phys_addr = vcpu->arch.paddr_accessed; run->mmio.len = bytes; @@ -1325,10 +1321,8 @@ int kvmppc_handle_store(struct kvm_vcpu *vcpu, host_swabbed = !is_default_endian; } - if (bytes > sizeof(run->mmio.data)) { - printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, - run->mmio.len); - } + if (bytes > sizeof(run->mmio.data)) + return EMULATE_FAIL; run->mmio.phys_addr = vcpu->arch.paddr_accessed; run->mmio.len = bytes; From 349fbfe9b918e6dea00734f07c0fbaf4c2e2df5e Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Tue, 25 Jan 2022 18:56:54 -0300 Subject: [PATCH 005/380] KVM: PPC: mmio: Return to guest after emulation failure If MMIO emulation fails we don't want to crash the whole guest by returning to userspace. The original commit bbf45ba57eae ("KVM: ppc: PowerPC 440 KVM implementation") added a todo: /* XXX Deliver Program interrupt to guest. */ and later the commit d69614a295ae ("KVM: PPC: Separate loadstore emulation from priv emulation") added the Program interrupt injection but in another file, so I'm assuming it was missed that this block needed to be altered. Also change the message to a ratelimited one since we're letting the guest run and it could flood the host logs. Signed-off-by: Fabiano Rosas Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220125215655.1026224-5-farosas@linux.ibm.com --- arch/powerpc/kvm/powerpc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 27fb2b70f631..acb0d2a4bdb9 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -307,9 +307,9 @@ int kvmppc_emulate_mmio(struct kvm_vcpu *vcpu) u32 last_inst; kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); - /* XXX Deliver Program interrupt to guest. */ - pr_emerg("%s: emulation failed (%08x)\n", __func__, last_inst); - r = RESUME_HOST; + kvm_debug_ratelimited("Guest access to device memory using unsupported instruction (opcode: %#08x)\n", + last_inst); + r = RESUME_GUEST; break; } default: From c1c8a66367a35aabbad9bd629b105b9fb49f2c1f Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Tue, 25 Jan 2022 18:56:55 -0300 Subject: [PATCH 006/380] KVM: PPC: Book3s: mmio: Deliver DSI after emulation failure MMIO emulation can fail if the guest uses an instruction that we are not prepared to emulate. Since these instructions can be and most likely are valid ones, this is (slightly) closer to an access fault than to an illegal instruction, so deliver a Data Storage interrupt instead of a Program interrupt. BookE ignores bad faults, so it will keep using a Program interrupt because a DSI would cause a fault loop in the guest. Suggested-by: Nicholas Piggin Signed-off-by: Fabiano Rosas Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220125215655.1026224-6-farosas@linux.ibm.com --- arch/powerpc/kvm/emulate_loadstore.c | 10 +++------- arch/powerpc/kvm/powerpc.c | 22 ++++++++++++++++++++++ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index 48272a9b9c30..cfc9114b87d0 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -73,7 +73,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) { u32 inst; enum emulation_result emulated = EMULATE_FAIL; - int advance = 1; struct instruction_op op; /* this default type might be overwritten by subcategories */ @@ -98,6 +97,8 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) int type = op.type & INSTR_TYPE_MASK; int size = GETSIZE(op.type); + vcpu->mmio_is_write = OP_IS_STORE(type); + switch (type) { case LOAD: { int instr_byte_swap = op.type & BYTEREV; @@ -355,15 +356,10 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) } } - if (emulated == EMULATE_FAIL) { - advance = 0; - kvmppc_core_queue_program(vcpu, 0); - } - trace_kvm_ppc_instr(inst, kvmppc_get_pc(vcpu), emulated); /* Advance past emulated instruction. */ - if (advance) + if (emulated != EMULATE_FAIL) kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4); return emulated; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index acb0d2a4bdb9..82d889db2b6b 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -309,6 +309,28 @@ int kvmppc_emulate_mmio(struct kvm_vcpu *vcpu) kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); kvm_debug_ratelimited("Guest access to device memory using unsupported instruction (opcode: %#08x)\n", last_inst); + + /* + * Injecting a Data Storage here is a bit more + * accurate since the instruction that caused the + * access could still be a valid one. + */ + if (!IS_ENABLED(CONFIG_BOOKE)) { + ulong dsisr = DSISR_BADACCESS; + + if (vcpu->mmio_is_write) + dsisr |= DSISR_ISSTORE; + + kvmppc_core_queue_data_storage(vcpu, vcpu->arch.vaddr_accessed, dsisr); + } else { + /* + * BookE does not send a SIGBUS on a bad + * fault, so use a Program interrupt instead + * to avoid a fault loop. + */ + kvmppc_core_queue_program(vcpu, 0); + } + r = RESUME_GUEST; break; } From 279d1a72c0f8021520f68ddb0a1346ff9ba1ea8c Mon Sep 17 00:00:00 2001 From: Sachin Sant Date: Thu, 6 Jan 2022 16:33:53 +0530 Subject: [PATCH 007/380] powerpc/xive: Export XIVE IPI information for online-only processors. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cédric pointed out that XIVE IPI information exported via sysfs (debug/powerpc/xive) display empty lines for processors which are not online. Switch to using for_each_online_cpu() so that information is displayed for online-only processors. Reported-by: Cédric Le Goater Signed-off-by: Sachin Sant Reviewed-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/164146703333.19039.10920919226094771665.sendpatchset@MacBook-Pro.local --- arch/powerpc/sysdev/xive/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 1ca5564bda9d..32863b4daf72 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1791,7 +1791,7 @@ static int xive_ipi_debug_show(struct seq_file *m, void *private) if (xive_ops->debug_show) xive_ops->debug_show(m, private); - for_each_possible_cpu(cpu) + for_each_online_cpu(cpu) xive_debug_show_ipi(m, cpu); return 0; } From b2a6f6043577e09d51a4b5577fff9fc9f5b14b1c Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Wed, 27 Nov 2019 23:09:59 +0100 Subject: [PATCH 008/380] powerpc: add link stack flush mitigation status in debugfs. The link stack flush status is not visible in debugfs. It can be enabled even when count cache flush is disabled. Add separate file for its status. Signed-off-by: Michal Suchanek [mpe: Update for change to link_stack_flush_type] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191127220959.6208-1-msuchanek@suse.de --- arch/powerpc/kernel/security.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index e159d4093d98..d96fd14bd7c9 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -747,14 +747,29 @@ static int count_cache_flush_get(void *data, u64 *val) return 0; } +static int link_stack_flush_get(void *data, u64 *val) +{ + if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) + *val = 0; + else + *val = 1; + + return 0; +} + DEFINE_DEBUGFS_ATTRIBUTE(fops_count_cache_flush, count_cache_flush_get, count_cache_flush_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_link_stack_flush, link_stack_flush_get, + count_cache_flush_set, "%llu\n"); static __init int count_cache_flush_debugfs_init(void) { debugfs_create_file_unsafe("count_cache_flush", 0600, arch_debugfs_dir, NULL, &fops_count_cache_flush); + debugfs_create_file_unsafe("link_stack_flush", 0600, + arch_debugfs_dir, NULL, + &fops_link_stack_flush); return 0; } device_initcall(count_cache_flush_debugfs_init); From f529edd1b69ddf832c3257dcd34e15100038d6b7 Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 12 Jan 2022 12:24:59 +0100 Subject: [PATCH 009/380] powerpc/e500/qemu-e500: allow core to idle without waiting This means an idle guest won't needlessly consume an entire core on the host, waiting for work to show up. Signed-off-by: Tobias Waldekranz Signed-off-by: Joachim Wiberg Acked-by: Scott Wood Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220112112459.1033754-1-troglobit@gmail.com --- arch/powerpc/platforms/85xx/qemu_e500.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c index a4127b0b161f..4c4d577effd9 100644 --- a/arch/powerpc/platforms/85xx/qemu_e500.c +++ b/arch/powerpc/platforms/85xx/qemu_e500.c @@ -67,4 +67,9 @@ define_machine(qemu_e500) { .get_irq = mpic_get_coreint_irq, .calibrate_decr = generic_calibrate_decr, .progress = udbg_progress, +#ifdef CONFIG_PPC64 + .power_save = book3e_idle, +#else + .power_save = e500_idle, +#endif }; From 17846485dff91acce1ad47b508b633dffc32e838 Mon Sep 17 00:00:00 2001 From: Maxim Kiselev Date: Thu, 30 Dec 2021 18:11:21 +0300 Subject: [PATCH 010/380] powerpc: dts: t104xrdb: fix phy type for FMAN 4/5 T1040RDB has two RTL8211E-VB phys which requires setting of internal delays for correct work. Changing the phy-connection-type property to `rgmii-id` will fix this issue. Signed-off-by: Maxim Kiselev Reviewed-by: Maxim Kochetkov Reviewed-by: Vladimir Oltean Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211230151123.1258321-1-bigunclemax@gmail.com --- arch/powerpc/boot/dts/fsl/t104xrdb.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi index 099a598c74c0..bfe1ed5be337 100644 --- a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi +++ b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi @@ -139,12 +139,12 @@ fman@400000 { ethernet@e6000 { phy-handle = <&phy_rgmii_0>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; ethernet@e8000 { phy-handle = <&phy_rgmii_1>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; mdio0: mdio@fc000 { From d5342fdd163ae0553a14820021a107e03eb1ea72 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 20 Dec 2021 14:40:36 +0100 Subject: [PATCH 011/380] powerpc: dts: Fix some I2C unit addresses The unit-address for the Maxim MAX1237 ADCs on XPedite5200 boards don't match the value in the "reg" property and cause a DTC warning. Signed-off-by: Thierry Reding Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211220134036.683309-1-thierry.reding@gmail.com --- arch/powerpc/boot/dts/xpedite5200.dts | 2 +- arch/powerpc/boot/dts/xpedite5200_xmon.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/boot/dts/xpedite5200.dts b/arch/powerpc/boot/dts/xpedite5200.dts index 840ea84bbb59..74b346f2d43c 100644 --- a/arch/powerpc/boot/dts/xpedite5200.dts +++ b/arch/powerpc/boot/dts/xpedite5200.dts @@ -132,7 +132,7 @@ reg = <0x68>; }; - dtt@48 { + dtt@34 { compatible = "maxim,max1237"; reg = <0x34>; }; diff --git a/arch/powerpc/boot/dts/xpedite5200_xmon.dts b/arch/powerpc/boot/dts/xpedite5200_xmon.dts index 449fc1b5dc23..d491c7a8f979 100644 --- a/arch/powerpc/boot/dts/xpedite5200_xmon.dts +++ b/arch/powerpc/boot/dts/xpedite5200_xmon.dts @@ -136,7 +136,7 @@ reg = <0x68>; }; - dtt@48 { + dtt@34 { compatible = "maxim,max1237"; reg = <0x34>; }; From faf01aef0570757bfbf1d655e984742c1dd38068 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 11 Jan 2022 11:54:04 +1100 Subject: [PATCH 012/380] KVM: PPC: Merge powerpc's debugfs entry content into generic entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At the moment KVM on PPC creates 4 types of entries under the kvm debugfs: 1) "%pid-%fd" per a KVM instance (for all platforms); 2) "vm%pid" (for PPC Book3s HV KVM); 3) "vm%u_vcpu%u_timing" (for PPC Book3e KVM); 4) "kvm-xive-%p" (for XIVE PPC Book3s KVM, the same for XICS); The problem with this is that multiple VMs per process is not allowed for 2) and 3) which makes it possible for userspace to trigger errors when creating duplicated debugfs entries. This merges all these into 1). This defines kvm_arch_create_kvm_debugfs() similar to kvm_arch_create_vcpu_debugfs(). This defines 2 hooks in kvmppc_ops that allow specific KVM implementations add necessary entries, this adds the _e500 suffix to kvmppc_create_vcpu_debugfs_e500() to make it clear what platform it is for. This makes use of already existing kvm_arch_create_vcpu_debugfs() on PPC. This removes no more used debugfs_dir pointers from PPC kvm_arch structs. This stops removing vcpu entries as once created vcpus stay around for the entire life of a VM and removed when the KVM instance is closed, see commit d56f5136b010 ("KVM: let kvm_destroy_vm_debugfs clean up vCPU debugfs directories"). Suggested-by: Fabiano Rosas Signed-off-by: Alexey Kardashevskiy Reviewed-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220111005404.162219-1-aik@ozlabs.ru --- arch/powerpc/include/asm/kvm_host.h | 6 ++--- arch/powerpc/include/asm/kvm_ppc.h | 2 ++ arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +- arch/powerpc/kvm/book3s_64_mmu_radix.c | 2 +- arch/powerpc/kvm/book3s_hv.c | 31 ++++++++++---------------- arch/powerpc/kvm/book3s_xics.c | 13 ++--------- arch/powerpc/kvm/book3s_xive.c | 13 ++--------- arch/powerpc/kvm/book3s_xive_native.c | 13 ++--------- arch/powerpc/kvm/e500.c | 1 + arch/powerpc/kvm/e500mc.c | 1 + arch/powerpc/kvm/powerpc.c | 16 ++++++++++--- arch/powerpc/kvm/timing.c | 21 +++++------------ arch/powerpc/kvm/timing.h | 12 +++++----- 13 files changed, 51 insertions(+), 82 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index d9bf60bf0816..faf301d0dec0 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -26,6 +26,8 @@ #include #include +#define __KVM_HAVE_ARCH_VCPU_DEBUGFS + #define KVM_MAX_VCPUS NR_CPUS #define KVM_MAX_VCORES NR_CPUS @@ -295,7 +297,6 @@ struct kvm_arch { bool dawr1_enabled; pgd_t *pgtable; u64 process_table; - struct dentry *debugfs_dir; struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */ #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE @@ -673,7 +674,6 @@ struct kvm_vcpu_arch { u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES]; u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES]; u64 timing_last_exit; - struct dentry *debugfs_exit_timing; #endif #ifdef CONFIG_PPC_BOOK3S @@ -831,8 +831,6 @@ struct kvm_vcpu_arch { struct kvmhv_tb_accumulator rm_exit; /* real-mode exit code */ struct kvmhv_tb_accumulator guest_time; /* guest execution */ struct kvmhv_tb_accumulator cede_time; /* time napping inside guest */ - - struct dentry *debugfs_dir; #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */ }; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index a14dbcd1b8ce..c583d0c37f31 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -314,6 +314,8 @@ struct kvmppc_ops { int (*svm_off)(struct kvm *kvm); int (*enable_dawr1)(struct kvm *kvm); bool (*hash_v3_possible)(void); + int (*create_vm_debugfs)(struct kvm *kvm); + int (*create_vcpu_debugfs)(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry); }; extern struct kvmppc_ops *kvmppc_hv_ops; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 213232914367..0aeb51738ca9 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -2112,7 +2112,7 @@ static const struct file_operations debugfs_htab_fops = { void kvmppc_mmu_debugfs_init(struct kvm *kvm) { - debugfs_create_file("htab", 0400, kvm->arch.debugfs_dir, kvm, + debugfs_create_file("htab", 0400, kvm->debugfs_dentry, kvm, &debugfs_htab_fops); } diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 8cebe5542256..e4ce2a35483f 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -1454,7 +1454,7 @@ static const struct file_operations debugfs_radix_fops = { void kvmhv_radix_debugfs_init(struct kvm *kvm) { - debugfs_create_file("radix", 0400, kvm->arch.debugfs_dir, kvm, + debugfs_create_file("radix", 0400, kvm->debugfs_dentry, kvm, &debugfs_radix_fops); } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 84c89f08ae9a..c7e44d75f8aa 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2767,20 +2767,17 @@ static const struct file_operations debugfs_timings_ops = { }; /* Create a debugfs directory for the vcpu */ -static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id) +static int kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry) { - char buf[16]; - struct kvm *kvm = vcpu->kvm; - - snprintf(buf, sizeof(buf), "vcpu%u", id); - vcpu->arch.debugfs_dir = debugfs_create_dir(buf, kvm->arch.debugfs_dir); - debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir, vcpu, + debugfs_create_file("timings", 0444, debugfs_dentry, vcpu, &debugfs_timings_ops); + return 0; } #else /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */ -static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id) +static int kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry) { + return 0; } #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */ @@ -2903,8 +2900,6 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) vcpu->arch.cpu_type = KVM_CPU_3S_64; kvmppc_sanity_check(vcpu); - debugfs_vcpu_init(vcpu, id); - return 0; } @@ -5223,7 +5218,6 @@ void kvmppc_free_host_rm_ops(void) static int kvmppc_core_init_vm_hv(struct kvm *kvm) { unsigned long lpcr, lpid; - char buf[32]; int ret; mutex_init(&kvm->arch.uvmem_lock); @@ -5356,15 +5350,14 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) kvm->arch.smt_mode = 1; kvm->arch.emul_smt_mode = 1; - /* - * Create a debugfs directory for the VM - */ - snprintf(buf, sizeof(buf), "vm%d", current->pid); - kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir); + return 0; +} + +static int kvmppc_arch_create_vm_debugfs_hv(struct kvm *kvm) +{ kvmppc_mmu_debugfs_init(kvm); if (radix_enabled()) kvmhv_radix_debugfs_init(kvm); - return 0; } @@ -5379,8 +5372,6 @@ static void kvmppc_free_vcores(struct kvm *kvm) static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) { - debugfs_remove_recursive(kvm->arch.debugfs_dir); - if (!cpu_has_feature(CPU_FTR_ARCH_300)) kvm_hv_vm_deactivated(); @@ -6042,6 +6033,8 @@ static struct kvmppc_ops kvm_ops_hv = { .svm_off = kvmhv_svm_off, .enable_dawr1 = kvmhv_enable_dawr1, .hash_v3_possible = kvmppc_hash_v3_possible, + .create_vcpu_debugfs = kvmppc_arch_create_vcpu_debugfs_hv, + .create_vm_debugfs = kvmppc_arch_create_vm_debugfs_hv, }; static int kvm_init_subcore_bitmap(void) diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 9cc466006e8b..306c85e70eea 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -1016,19 +1016,10 @@ DEFINE_SHOW_ATTRIBUTE(xics_debug); static void xics_debugfs_init(struct kvmppc_xics *xics) { - char *name; - - name = kasprintf(GFP_KERNEL, "kvm-xics-%p", xics); - if (!name) { - pr_err("%s: no memory for name\n", __func__); - return; - } - - xics->dentry = debugfs_create_file(name, 0444, arch_debugfs_dir, + xics->dentry = debugfs_create_file("xics", 0444, xics->kvm->debugfs_dentry, xics, &xics_debug_fops); - pr_debug("%s: created %s\n", __func__, name); - kfree(name); + pr_debug("%s: created\n", __func__); } static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm, diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index e216c068075d..37a56cbb1701 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -2354,19 +2354,10 @@ DEFINE_SHOW_ATTRIBUTE(xive_debug); static void xive_debugfs_init(struct kvmppc_xive *xive) { - char *name; - - name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive); - if (!name) { - pr_err("%s: no memory for name\n", __func__); - return; - } - - xive->dentry = debugfs_create_file(name, S_IRUGO, arch_debugfs_dir, + xive->dentry = debugfs_create_file("xive", S_IRUGO, xive->kvm->debugfs_dentry, xive, &xive_debug_fops); - pr_debug("%s: created %s\n", __func__, name); - kfree(name); + pr_debug("%s: created\n", __func__); } static void kvmppc_xive_init(struct kvm_device *dev) diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 561a5bfe0468..3c2b128e5f0f 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -1259,19 +1259,10 @@ DEFINE_SHOW_ATTRIBUTE(xive_native_debug); static void xive_native_debugfs_init(struct kvmppc_xive *xive) { - char *name; - - name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive); - if (!name) { - pr_err("%s: no memory for name\n", __func__); - return; - } - - xive->dentry = debugfs_create_file(name, 0444, arch_debugfs_dir, + xive->dentry = debugfs_create_file("xive", 0444, xive->kvm->debugfs_dentry, xive, &xive_native_debug_fops); - pr_debug("%s: created %s\n", __func__, name); - kfree(name); + pr_debug("%s: created\n", __func__); } static void kvmppc_xive_native_init(struct kvm_device *dev) diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 7e8b69015d20..c8b2b4478545 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -495,6 +495,7 @@ static struct kvmppc_ops kvm_ops_e500 = { .emulate_op = kvmppc_core_emulate_op_e500, .emulate_mtspr = kvmppc_core_emulate_mtspr_e500, .emulate_mfspr = kvmppc_core_emulate_mfspr_e500, + .create_vcpu_debugfs = kvmppc_create_vcpu_debugfs_e500, }; static int __init kvmppc_e500_init(void) diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 1c189b5aadcc..fa0d8dbbe484 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -381,6 +381,7 @@ static struct kvmppc_ops kvm_ops_e500mc = { .emulate_op = kvmppc_core_emulate_op_e500, .emulate_mtspr = kvmppc_core_emulate_mtspr_e500, .emulate_mfspr = kvmppc_core_emulate_mfspr_e500, + .create_vcpu_debugfs = kvmppc_create_vcpu_debugfs_e500, }; static int __init kvmppc_e500mc_init(void) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 82d889db2b6b..1d06b68739b0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -777,7 +777,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) rcuwait_init(&vcpu->arch.wait); vcpu->arch.waitp = &vcpu->arch.wait; - kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id); return 0; out_vcpu_uninit: @@ -794,8 +793,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) /* Make sure we're not using the vcpu anymore */ hrtimer_cancel(&vcpu->arch.dec_timer); - kvmppc_remove_vcpu_debugfs(vcpu); - switch (vcpu->arch.irq_type) { case KVMPPC_IRQ_MPIC: kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu); @@ -2521,3 +2518,16 @@ int kvm_arch_init(void *opaque) } EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr); + +void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry) +{ + if (vcpu->kvm->arch.kvm_ops->create_vcpu_debugfs) + vcpu->kvm->arch.kvm_ops->create_vcpu_debugfs(vcpu, debugfs_dentry); +} + +int kvm_arch_create_vm_debugfs(struct kvm *kvm) +{ + if (kvm->arch.kvm_ops->create_vm_debugfs) + kvm->arch.kvm_ops->create_vm_debugfs(kvm); + return 0; +} diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c index ba56a5cbba97..25071331f8c1 100644 --- a/arch/powerpc/kvm/timing.c +++ b/arch/powerpc/kvm/timing.c @@ -204,21 +204,10 @@ static const struct file_operations kvmppc_exit_timing_fops = { .release = single_release, }; -void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id) +int kvmppc_create_vcpu_debugfs_e500(struct kvm_vcpu *vcpu, + struct dentry *debugfs_dentry) { - static char dbg_fname[50]; - struct dentry *debugfs_file; - - snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing", - current->pid, id); - debugfs_file = debugfs_create_file(dbg_fname, 0666, kvm_debugfs_dir, - vcpu, &kvmppc_exit_timing_fops); - - vcpu->arch.debugfs_exit_timing = debugfs_file; -} - -void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) -{ - debugfs_remove(vcpu->arch.debugfs_exit_timing); - vcpu->arch.debugfs_exit_timing = NULL; + debugfs_create_file("timing", 0666, debugfs_dentry, + vcpu, &kvmppc_exit_timing_fops); + return 0; } diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h index feef7885ba82..45817ab82bb4 100644 --- a/arch/powerpc/kvm/timing.h +++ b/arch/powerpc/kvm/timing.h @@ -14,8 +14,8 @@ #ifdef CONFIG_KVM_EXIT_TIMING void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu); void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu); -void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id); -void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu); +int kvmppc_create_vcpu_debugfs_e500(struct kvm_vcpu *vcpu, + struct dentry *debugfs_dentry); static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) { @@ -26,9 +26,11 @@ static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) /* if exit timing is not configured there is no need to build the c file */ static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {} static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {} -static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, - unsigned int id) {} -static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {} +static inline int kvmppc_create_vcpu_debugfs_e500(struct kvm_vcpu *vcpu, + struct dentry *debugfs_dentry) +{ + return 0; +} static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {} #endif /* CONFIG_KVM_EXIT_TIMING */ From 8e0f353a44ff3d65d933b8c0e4fb15dc89d46617 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 20 Dec 2021 11:02:43 +0800 Subject: [PATCH 013/380] powerpc/kvm: no need to initialise statics to 0 Static variables do not need to be initialised to 0, because compiler will initialise all uninitialised statics to 0. Thus, remove the unneeded initialization. Signed-off-by: Jason Wang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211220030243.603435-1-wangborong@cdjrlc.com --- arch/powerpc/kvm/book3s_64_mmu_host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index c3e31fef0be1..1ae09992c9ea 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -228,7 +228,7 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) struct kvmppc_sid_map *map; struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); u16 sid_map_mask; - static int backwards_map = 0; + static int backwards_map; if (kvmppc_get_msr(vcpu) & MSR_PR) gvsid |= VSID_PR; From eddaa9a402758d379520f6511fb61e89990698aa Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Thu, 6 Jan 2022 17:13:39 +0100 Subject: [PATCH 014/380] powerpc/pseries: read the lpar name from the firmware The LPAR name may be changed after the LPAR has been started in the HMC. In that case lparstat command is not reporting the updated value because it reads it from the device tree which is read at boot time. However this value could be read from RTAS. Adding this value in the /proc/powerpc/lparcfg output allows to read the updated value. However the hypervisor, like Qemu/KVM, may not support this RTAS parameter. In that case the value reported in lparcfg is read from the device tree and so is not updated accordingly. Signed-off-by: Laurent Dufour Reviewed-by: Tyrel Datwyler Reviewed-by: Nathan Lynch [mpe: Drop doc-comment syntax, change RTAS/DT to lower case, use of_root to fix missing of_node_put(), use of_property_read_string()] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220106161339.74656-1-ldufour@linux.ibm.com --- arch/powerpc/platforms/pseries/lparcfg.c | 87 ++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index c7940fcfc911..2119c003fcf9 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -311,6 +311,92 @@ static void parse_mpp_x_data(struct seq_file *m) seq_printf(m, "coalesce_pool_spurr=%ld\n", mpp_x_data.pool_spurr_cycles); } +/* + * PAPR defines, in section "7.3.16 System Parameters Option", the token 55 to + * read the LPAR name, and the largest output data to 4000 + 2 bytes length. + */ +#define SPLPAR_LPAR_NAME_TOKEN 55 +#define GET_SYS_PARM_BUF_SIZE 4002 +#if GET_SYS_PARM_BUF_SIZE > RTAS_DATA_BUF_SIZE +#error "GET_SYS_PARM_BUF_SIZE is larger than RTAS_DATA_BUF_SIZE" +#endif + +/* + * Read the lpar name using the RTAS ibm,get-system-parameter call. + * + * The name read through this call is updated if changes are made by the end + * user on the hypervisor side. + * + * Some hypervisor (like Qemu) may not provide this value. In that case, a non + * null value is returned. + */ +static int read_rtas_lpar_name(struct seq_file *m) +{ + int rc, len, token; + union { + char raw_buffer[GET_SYS_PARM_BUF_SIZE]; + struct { + __be16 len; + char name[GET_SYS_PARM_BUF_SIZE-2]; + }; + } *local_buffer; + + token = rtas_token("ibm,get-system-parameter"); + if (token == RTAS_UNKNOWN_SERVICE) + return -EINVAL; + + local_buffer = kmalloc(sizeof(*local_buffer), GFP_KERNEL); + if (!local_buffer) + return -ENOMEM; + + do { + spin_lock(&rtas_data_buf_lock); + memset(rtas_data_buf, 0, sizeof(*local_buffer)); + rc = rtas_call(token, 3, 1, NULL, SPLPAR_LPAR_NAME_TOKEN, + __pa(rtas_data_buf), sizeof(*local_buffer)); + if (!rc) + memcpy(local_buffer->raw_buffer, rtas_data_buf, + sizeof(local_buffer->raw_buffer)); + spin_unlock(&rtas_data_buf_lock); + } while (rtas_busy_delay(rc)); + + if (!rc) { + /* Force end of string */ + len = min((int) be16_to_cpu(local_buffer->len), + (int) sizeof(local_buffer->name)-1); + local_buffer->name[len] = '\0'; + + seq_printf(m, "partition_name=%s\n", local_buffer->name); + } else + rc = -ENODATA; + + kfree(local_buffer); + return rc; +} + +/* + * Read the LPAR name from the Device Tree. + * + * The value read in the DT is not updated if the end-user is touching the LPAR + * name on the hypervisor side. + */ +static int read_dt_lpar_name(struct seq_file *m) +{ + const char *name; + + if (of_property_read_string(of_root, "ibm,partition-name", &name)) + return -ENOENT; + + seq_printf(m, "partition_name=%s\n", name); + return 0; +} + +static void read_lpar_name(struct seq_file *m) +{ + if (read_rtas_lpar_name(m) && read_dt_lpar_name(m)) + pr_err_once("Error can't get the LPAR name"); +} + #define SPLPAR_CHARACTERISTICS_TOKEN 20 #define SPLPAR_MAXLENGTH 1026*(sizeof(char)) @@ -496,6 +582,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) if (firmware_has_feature(FW_FEATURE_SPLPAR)) { /* this call handles the ibm,get-system-parameter contents */ + read_lpar_name(m); parse_system_parameter_string(m); parse_ppp_data(m); parse_mpp_data(m); From 5ebb74749202a25da4b3cc2eb15470225a05527c Mon Sep 17 00:00:00 2001 From: Maxim Kiselev Date: Fri, 21 Jan 2022 12:14:47 +0300 Subject: [PATCH 015/380] powerpc: dts: t1040rdb: fix ports names for Seville Ethernet switch On board rev A, the network interface labels for the switch ports written on the front panel are different than on rev B and later. This patch fixes network interface names for the switch ports according to labels that are written on the front panel of the board rev B. They start from ETH3 and end at ETH10. This patch also introduces a separate device tree for rev A. The main device tree is supposed to cover rev B and later. Fixes: e69eb0824d8c ("powerpc: dts: t1040rdb: add ports for Seville Ethernet switch") Signed-off-by: Maxim Kiselev Reviewed-by: Maxim Kochetkov Reviewed-by: Vladimir Oltean Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220121091447.3412907-1-bigunclemax@gmail.com --- arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts | 30 ++++++++++++++++++++ arch/powerpc/boot/dts/fsl/t1040rdb.dts | 8 +++--- 2 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts new file mode 100644 index 000000000000..73f8c998c64d --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * T1040RDB-REV-A Device Tree Source + * + * Copyright 2014 - 2015 Freescale Semiconductor Inc. + * + */ + +#include "t1040rdb.dts" + +/ { + model = "fsl,T1040RDB-REV-A"; + compatible = "fsl,T1040RDB-REV-A"; +}; + +&seville_port0 { + label = "ETH5"; +}; + +&seville_port2 { + label = "ETH7"; +}; + +&seville_port4 { + label = "ETH9"; +}; + +&seville_port6 { + label = "ETH11"; +}; diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb.dts b/arch/powerpc/boot/dts/fsl/t1040rdb.dts index af0c8a6f5613..b6733e7e6580 100644 --- a/arch/powerpc/boot/dts/fsl/t1040rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1040rdb.dts @@ -119,7 +119,7 @@ managed = "in-band-status"; phy-handle = <&phy_qsgmii_0>; phy-mode = "qsgmii"; - label = "ETH5"; + label = "ETH3"; status = "okay"; }; @@ -135,7 +135,7 @@ managed = "in-band-status"; phy-handle = <&phy_qsgmii_2>; phy-mode = "qsgmii"; - label = "ETH7"; + label = "ETH5"; status = "okay"; }; @@ -151,7 +151,7 @@ managed = "in-band-status"; phy-handle = <&phy_qsgmii_4>; phy-mode = "qsgmii"; - label = "ETH9"; + label = "ETH7"; status = "okay"; }; @@ -167,7 +167,7 @@ managed = "in-band-status"; phy-handle = <&phy_qsgmii_6>; phy-mode = "qsgmii"; - label = "ETH11"; + label = "ETH9"; status = "okay"; }; From 2e7f1e2b30b5b8aa5de6547407c68670fd227ad8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 20 Jan 2022 12:33:20 +1100 Subject: [PATCH 016/380] powerpc/64: Move paca allocation later in boot Mahesh & Sourabh identified two problems[1][2] with ppc64_bolted_size() and paca allocation. The first is that on a Radix capable machine but with "disable_radix" on the command line, there is a window during early boot where early_radix_enabled() is true, even though it will later become false. early_init_devtree: <- early_radix_enabled() = false early_init_dt_scan_cpus: <- early_radix_enabled() = false ... check_cpu_pa_features: <- early_radix_enabled() = false ... ^ <- early_radix_enabled() = TRUE allocate_paca: | <- early_radix_enabled() = TRUE ... | ppc64_bolted_size: | <- early_radix_enabled() = TRUE if (early_radix_enabled())| <- early_radix_enabled() = TRUE return ULONG_MAX; | ... | ... | <- early_radix_enabled() = TRUE ... | <- early_radix_enabled() = TRUE mmu_early_init_devtree() V ... <- early_radix_enabled() = false This causes ppc64_bolted_size() to return ULONG_MAX for the boot CPU's paca allocation, even though later it will return a different value. This is not currently a bug because the paca allocation is also limited by the RMA size, but that is very fragile. The second issue is that when using the Hash MMU, when we call ppc64_bolted_size() for the boot CPU's paca allocation, we have not yet detected whether 1T segments are available. That causes ppc64_bolted_size() to return 256MB, even if the machine can actually support up to 1T. This is usually OK, we generally have space below 256MB for one paca, but for a kdump kernel placed above 256MB it causes the boot to fail. At boot we cannot discover all the features of the machine instantaneously, so there will always be some periods where we have incomplete knowledge of the system. However both the above problems stem from the fact that we allocate the boot CPU's paca (and paca pointers array) before we decide which MMU we are using, or discover its exact features. Moving the paca allocation slightly later still can solve both the issues described above, and means for a normal boot we don't do any permanent allocations until after we've discovered the MMU. Note that although we move the boot CPU's paca allocation later, we still have a temporary paca (boot_paca) accessible via r13, so code that does read only access to paca fields is safe. The only risk is that some code writes to the boot_paca, and that write will then be lost when we switch away from the boot_paca later in early_setup(). The additional code that runs before the paca allocation is primarily mmu_early_init_devtree(), which is scanning the device tree and populating globals and cur_cpu_spec with MMU related flags. I do not see any additional code that writes to paca fields. [1]: https://lore.kernel.org/r/20211018084434.217772-2-sourabhjain@linux.ibm.com [2]: https://lore.kernel.org/r/20211018084434.217772-3-sourabhjain@linux.ibm.com Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220124130544.408675-1-mpe@ellerman.id.au --- arch/powerpc/kernel/prom.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 3d30d40a0e9c..86c4f009563d 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -352,6 +352,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node, be32_to_cpu(intserv[found_thread])); boot_cpuid = found; + // Pass the boot CPU's hard CPU id back to our caller + *((u32 *)data) = be32_to_cpu(intserv[found_thread]); + /* * PAPR defines "logical" PVR values for cpus that * meet various levels of the architecture: @@ -388,9 +391,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node, cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT; else if (!dt_cpu_ftrs_in_use()) cur_cpu_spec->cpu_features |= CPU_FTR_SMT; - allocate_paca(boot_cpuid); #endif - set_hard_smp_processor_id(found, be32_to_cpu(intserv[found_thread])); return 0; } @@ -714,6 +715,7 @@ static inline void save_fscr_to_task(void) {} void __init early_init_devtree(void *params) { + u32 boot_cpu_hwid; phys_addr_t limit; DBG(" -> early_init_devtree(%px)\n", params); @@ -790,8 +792,6 @@ void __init early_init_devtree(void *params) * FIXME .. and the initrd too? */ move_device_tree(); - allocate_paca_ptrs(); - DBG("Scanning CPUs ...\n"); dt_cpu_ftrs_scan(); @@ -799,7 +799,7 @@ void __init early_init_devtree(void *params) /* Retrieve CPU related informations from the flat tree * (altivec support, boot CPU ID, ...) */ - of_scan_flat_dt(early_init_dt_scan_cpus, NULL); + of_scan_flat_dt(early_init_dt_scan_cpus, &boot_cpu_hwid); if (boot_cpuid < 0) { printk("Failed to identify boot CPU !\n"); BUG(); @@ -816,6 +816,11 @@ void __init early_init_devtree(void *params) mmu_early_init_devtree(); + // NB. paca is not installed until later in early_setup() + allocate_paca_ptrs(); + allocate_paca(boot_cpuid); + set_hard_smp_processor_id(boot_cpuid, boot_cpu_hwid); + #ifdef CONFIG_PPC_POWERNV /* Scan and build the list of machine check recoverable ranges */ of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL); From ccafe7c20b7de330d9091a114c9985305759f1ee Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 25 Jan 2022 13:54:21 +0000 Subject: [PATCH 017/380] macintosh: macio_asic: remove useless cast for driver.name pci_driver name is const char pointer, so the cast it not necessary. Signed-off-by: Corentin Labbe Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220125135421.4081740-1-clabbe@baylibre.com --- drivers/macintosh/macio_asic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c index c1fdf2896021..1943a007e2d5 100644 --- a/drivers/macintosh/macio_asic.c +++ b/drivers/macintosh/macio_asic.c @@ -756,7 +756,7 @@ MODULE_DEVICE_TABLE (pci, pci_ids); /* pci driver glue; this is a "new style" PCI driver module */ static struct pci_driver macio_pci_driver = { - .name = (char *) "macio", + .name = "macio", .id_table = pci_ids, .probe = macio_pci_probe, From 961f649fb3ad9a9e384c695a050d776d970ddabd Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sun, 30 Jan 2022 18:39:18 +0000 Subject: [PATCH 018/380] powerpc/ptdump: Fix sparse warning in hashpagetable.c As reported by sparse: arch/powerpc/mm/ptdump/hashpagetable.c:264:29: warning: restricted __be64 degrades to integer arch/powerpc/mm/ptdump/hashpagetable.c:265:49: warning: restricted __be64 degrades to integer arch/powerpc/mm/ptdump/hashpagetable.c:267:36: warning: incorrect type in assignment (different base types) arch/powerpc/mm/ptdump/hashpagetable.c:267:36: expected unsigned long long [usertype] arch/powerpc/mm/ptdump/hashpagetable.c:267:36: got restricted __be64 [usertype] v arch/powerpc/mm/ptdump/hashpagetable.c:268:36: warning: incorrect type in assignment (different base types) arch/powerpc/mm/ptdump/hashpagetable.c:268:36: expected unsigned long long [usertype] arch/powerpc/mm/ptdump/hashpagetable.c:268:36: got restricted __be64 [usertype] r The values returned by plpar_pte_read_4() are CPU endian, not __be64, so assigning them to struct hash_pte confuses sparse. As a minimal fix open code a struct to hold the values with CPU endian types. Reported-by: kernel test robot Reported-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220202053039.691917-1-mpe@ellerman.id.au --- arch/powerpc/mm/ptdump/hashpagetable.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c index c7f824d294b2..9a601587836b 100644 --- a/arch/powerpc/mm/ptdump/hashpagetable.c +++ b/arch/powerpc/mm/ptdump/hashpagetable.c @@ -238,7 +238,10 @@ static int native_find(unsigned long ea, int psize, bool primary, u64 *v, u64 static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *r) { - struct hash_pte ptes[4]; + struct { + unsigned long v; + unsigned long r; + } ptes[4]; unsigned long vsid, vpn, hash, hpte_group, want_v; int i, j, ssize = mmu_kernel_ssize; long lpar_rc = 0; From 69ab6ac380a00244575de02c406dcb9491bf3368 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Tue, 25 Jan 2022 12:57:32 -0300 Subject: [PATCH 019/380] KVM: PPC: Book3S HV: Check return value of kvmppc_radix_init The return of the function is being shadowed by the call to kvmppc_uvmem_init. Fixes: ca9f4942670c ("KVM: PPC: Book3S HV: Support for running secure guests") Signed-off-by: Fabiano Rosas Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220125155735.1018683-2-farosas@linux.ibm.com --- arch/powerpc/kvm/book3s_hv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index c7e44d75f8aa..98ec92cf837b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -6130,8 +6130,11 @@ static int kvmppc_book3s_init_hv(void) if (r) return r; - if (kvmppc_radix_possible()) + if (kvmppc_radix_possible()) { r = kvmppc_radix_init(); + if (r) + return r; + } r = kvmppc_uvmem_init(); if (r < 0) From c5d0d77b45265905bba2ce6e63c9a02bbd11c43c Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Tue, 25 Jan 2022 12:57:33 -0300 Subject: [PATCH 020/380] KVM: PPC: Book3S HV: Delay setting of kvm ops Delay the setting of kvm_hv_ops until after all init code has completed. This avoids leaving the ops still accessible if the init fails. Signed-off-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220125155735.1018683-3-farosas@linux.ibm.com --- arch/powerpc/kvm/book3s_hv.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 98ec92cf837b..ec56970175e2 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -6119,9 +6119,6 @@ static int kvmppc_book3s_init_hv(void) } #endif - kvm_ops_hv.owner = THIS_MODULE; - kvmppc_hv_ops = &kvm_ops_hv; - init_default_hcalls(); init_vcore_lists(); @@ -6137,10 +6134,15 @@ static int kvmppc_book3s_init_hv(void) } r = kvmppc_uvmem_init(); - if (r < 0) + if (r < 0) { pr_err("KVM-HV: kvmppc_uvmem_init failed %d\n", r); + return r; + } - return r; + kvm_ops_hv.owner = THIS_MODULE; + kvmppc_hv_ops = &kvm_ops_hv; + + return 0; } static void kvmppc_book3s_exit_hv(void) From 175be7e5800e2782a7e38ee9e1b64633494c4b44 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Tue, 25 Jan 2022 12:57:34 -0300 Subject: [PATCH 021/380] KVM: PPC: Book3S HV: Free allocated memory if module init fails The module's exit function is not called when the init fails, we need to do cleanup before returning. Signed-off-by: Fabiano Rosas Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220125155735.1018683-4-farosas@linux.ibm.com --- arch/powerpc/kvm/book3s_hv.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index ec56970175e2..c886557638a1 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -6096,7 +6096,7 @@ static int kvmppc_book3s_init_hv(void) if (!cpu_has_feature(CPU_FTR_ARCH_300)) { r = kvm_init_subcore_bitmap(); if (r) - return r; + goto err; } /* @@ -6112,7 +6112,8 @@ static int kvmppc_book3s_init_hv(void) np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc"); if (!np) { pr_err("KVM-HV: Cannot determine method for accessing XICS\n"); - return -ENODEV; + r = -ENODEV; + goto err; } /* presence of intc confirmed - node can be dropped again */ of_node_put(np); @@ -6125,12 +6126,12 @@ static int kvmppc_book3s_init_hv(void) r = kvmppc_mmu_hv_init(); if (r) - return r; + goto err; if (kvmppc_radix_possible()) { r = kvmppc_radix_init(); if (r) - return r; + goto err; } r = kvmppc_uvmem_init(); @@ -6143,6 +6144,12 @@ static int kvmppc_book3s_init_hv(void) kvmppc_hv_ops = &kvm_ops_hv; return 0; + +err: + kvmhv_nested_exit(); + kvmppc_radix_exit(); + + return r; } static void kvmppc_book3s_exit_hv(void) From 4feb74aa64b35b21a4937f96d7a940adee286e5b Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Tue, 25 Jan 2022 12:57:35 -0300 Subject: [PATCH 022/380] KVM: PPC: Decrement module refcount if init_vm fails We increment the reference count for KVM-HV/PR before the call to kvmppc_core_init_vm. If that function fails we need to decrement the refcount. Also remove the check on kvm_ops->owner because try_module_get can handle a NULL module. Signed-off-by: Fabiano Rosas Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220125155735.1018683-5-farosas@linux.ibm.com --- arch/powerpc/kvm/powerpc.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 1d06b68739b0..9772b176e406 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -453,6 +453,8 @@ int kvm_arch_check_processor_compat(void *opaque) int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { struct kvmppc_ops *kvm_ops = NULL; + int r; + /* * if we have both HV and PR enabled, default is HV */ @@ -474,11 +476,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) } else goto err_out; - if (kvm_ops->owner && !try_module_get(kvm_ops->owner)) + if (!try_module_get(kvm_ops->owner)) return -ENOENT; kvm->arch.kvm_ops = kvm_ops; - return kvmppc_core_init_vm(kvm); + r = kvmppc_core_init_vm(kvm); + if (r) + module_put(kvm_ops->owner); + return r; err_out: return -EINVAL; } From b53c86105919d4136591e3bee198a4829c0f5062 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Wed, 20 Jan 2021 15:18:47 -0300 Subject: [PATCH 023/380] powerpc: Fix debug print in smp_setup_cpu_maps When figuring out the number of threads, the debug message prints "1 thread" for the first iteration of the loop, instead of the actual number of threads calculated from the length of the "ibm,ppc-interrupt-server#s" property. * /cpus/PowerPC,POWER8@20... ibm,ppc-interrupt-server#s -> 1 threads <--- WRONG thread 0 -> cpu 0 (hard id 32) thread 1 -> cpu 1 (hard id 33) thread 2 -> cpu 2 (hard id 34) thread 3 -> cpu 3 (hard id 35) thread 4 -> cpu 4 (hard id 36) thread 5 -> cpu 5 (hard id 37) thread 6 -> cpu 6 (hard id 38) thread 7 -> cpu 7 (hard id 39) * /cpus/PowerPC,POWER8@28... ibm,ppc-interrupt-server#s -> 8 threads thread 0 -> cpu 8 (hard id 40) thread 1 -> cpu 9 (hard id 41) thread 2 -> cpu 10 (hard id 42) thread 3 -> cpu 11 (hard id 43) thread 4 -> cpu 12 (hard id 44) thread 5 -> cpu 13 (hard id 45) thread 6 -> cpu 14 (hard id 46) thread 7 -> cpu 15 (hard id 47) (...) Signed-off-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210120181847.952106-1-farosas@linux.ibm.com --- arch/powerpc/kernel/setup-common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index f8da937df918..518ae5aa9410 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -456,8 +456,8 @@ void __init smp_setup_cpu_maps(void) intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len); if (intserv) { - DBG(" ibm,ppc-interrupt-server#s -> %d threads\n", - nthreads); + DBG(" ibm,ppc-interrupt-server#s -> %lu threads\n", + (len / sizeof(int))); } else { DBG(" no ibm,ppc-interrupt-server#s -> 1 thread\n"); intserv = of_get_property(dn, "reg", &len); From a1c414093370ed50e5b952d96d4ae775c7a18420 Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Sun, 21 Mar 2021 03:09:32 +0530 Subject: [PATCH 024/380] powerpc/epapr: Fix parmeters typo s/parmeters/parameters/ Signed-off-by: Bhaskar Chowdhury Acked-by: Randy Dunlap Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210320213932.22697-1-unixbhaskar@gmail.com --- arch/powerpc/include/asm/epapr_hcalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h index c99ba08a408d..cdf3c6df5123 100644 --- a/arch/powerpc/include/asm/epapr_hcalls.h +++ b/arch/powerpc/include/asm/epapr_hcalls.h @@ -65,7 +65,7 @@ * but the gcc inline assembly syntax does not allow us to specify registers * on the clobber list that are also on the input/output list. Therefore, * the lists of clobbered registers depends on the number of register - * parmeters ("+r" and "=r") passed to the hypercall. + * parameters ("+r" and "=r") passed to the hypercall. * * Each assembly block should use one of the HCALL_CLOBBERSx macros. As a * general rule, 'x' is the number of parameters passed to the assembly From 925f76c55784fdc17ab41aecde06b30439ceb73a Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 8 May 2020 09:12:56 +0000 Subject: [PATCH 025/380] powerpc/spufs: adjust list element pointer type Other uses of &gang->aff_list_head, eg in spufs_assert_affinity, indicate that the list elements have type spu_context, not spu as used here. Change the type of tmp accordingly. This has no impact on the execution, because tmp is not used in the body of the loop. Fixes: c5fc8d2a92461 ("[CELL] cell: add placement computation for scheduling of affinity contexts") Signed-off-by: Julia Lawall Reviewed-by: Jeremy Kerr Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1588929176-28527-1-git-send-email-Julia.Lawall@inria.fr --- arch/powerpc/platforms/cell/spufs/sched.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 369206489895..d058f6233e66 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -340,8 +340,7 @@ static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff, static void aff_set_ref_point_location(struct spu_gang *gang) { int mem_aff, gs, lowest_offset; - struct spu_context *ctx; - struct spu *tmp; + struct spu_context *tmp, *ctx; mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM; lowest_offset = 0; From be7be1c6c6f8bd348f0d83abe7a8f0e21bdaeac8 Mon Sep 17 00:00:00 2001 From: Mamatha Inamdar Date: Thu, 24 Sep 2020 10:44:16 +0530 Subject: [PATCH 026/380] PCI: rpaphp: Add MODULE_DESCRIPTION This patch adds a brief MODULE_DESCRIPTION to rpadlpar_io kernel modules (descriptions taken from Kconfig file). Signed-off-by: Mamatha Inamdar Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200924051343.16052.9571.stgit@localhost.localdomain --- drivers/pci/hotplug/rpadlpar_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/hotplug/rpadlpar_core.c b/drivers/pci/hotplug/rpadlpar_core.c index 0a3c80ba66be..e6991ff67526 100644 --- a/drivers/pci/hotplug/rpadlpar_core.c +++ b/drivers/pci/hotplug/rpadlpar_core.c @@ -478,3 +478,4 @@ static void __exit rpadlpar_io_exit(void) module_init(rpadlpar_io_init); module_exit(rpadlpar_io_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("RPA Dynamic Logical Partitioning driver for I/O slots"); From d4be60fe66b7380530868ceebe549f8eebccacc5 Mon Sep 17 00:00:00 2001 From: Wedson Almeida Filho Date: Wed, 2 Feb 2022 05:51:23 +0000 Subject: [PATCH 027/380] powerpc/module_64: use module_init_section instead of patching names Without this patch, module init sections are disabled by patching their names in arch-specific code when they're loaded (which prevents code in layout_sections from finding init sections). This patch uses the new arch-specific module_init_section instead. This allows modules that have .init_array sections to have the initialisers properly called (on load, before init). Without this patch, the initialisers are not called because .init_array is renamed to _init_array, and thus isn't found by code in find_module_sections(). Signed-off-by: Wedson Almeida Filho Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220202055123.2144842-1-wedsonaf@google.com --- arch/powerpc/kernel/module_64.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 5d77d3f5fbb5..6a45e6ddbe58 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -277,6 +277,12 @@ static Elf64_Sym *find_dot_toc(Elf64_Shdr *sechdrs, return NULL; } +bool module_init_section(const char *name) +{ + /* We don't handle .init for the moment: always return false. */ + return false; +} + int module_frob_arch_sections(Elf64_Ehdr *hdr, Elf64_Shdr *sechdrs, char *secstrings, @@ -286,7 +292,6 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, /* Find .toc and .stubs sections, symtab and strtab */ for (i = 1; i < hdr->e_shnum; i++) { - char *p; if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0) me->arch.stubs_section = i; else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0) { @@ -298,10 +303,6 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, dedotify_versions((void *)hdr + sechdrs[i].sh_offset, sechdrs[i].sh_size); - /* We don't handle .init for the moment: rename to _init */ - while ((p = strstr(secstrings + sechdrs[i].sh_name, ".init"))) - p[0] = '_'; - if (sechdrs[i].sh_type == SHT_SYMTAB) dedotify((void *)hdr + sechdrs[i].sh_offset, sechdrs[i].sh_size / sizeof(Elf64_Sym), From 0198322379c25215b2778482bf1221743a76e2b5 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Wed, 2 Feb 2022 09:48:37 +0530 Subject: [PATCH 028/380] powerpc/perf: Don't use perf_hw_context for trace IMC PMU Trace IMC (In-Memory collection counters) in powerpc is useful for application level profiling. For trace_imc, presently task context (task_ctx_nr) is set to perf_hw_context. But perf_hw_context should only be used for CPU PMU. See commit 26657848502b ("perf/core: Verify we have a single perf_hw_context PMU"). So for trace_imc, even though it is per thread PMU, it is preferred to use sw_context in order to be able to do application level monitoring. Hence change the task_ctx_nr to use perf_sw_context. Fixes: 012ae244845f ("powerpc/perf: Trace imc PMU functions") Signed-off-by: Athira Rajeev Reviewed-by: Madhavan Srinivasan [mpe: Update subject & incorporate notes into change log, reflow comment] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220202041837.65968-1-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/imc-pmu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index e106909ff9c3..e7583fbcc8fa 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1457,7 +1457,11 @@ static int trace_imc_event_init(struct perf_event *event) event->hw.idx = -1; - event->pmu->task_ctx_nr = perf_hw_context; + /* + * There can only be a single PMU for perf_hw_context events which is assigned to + * core PMU. Hence use "perf_sw_context" for trace_imc. + */ + event->pmu->task_ctx_nr = perf_sw_context; event->destroy = reset_global_refc; return 0; } From e414e2938ee26e734f19e92a60cd090ebaff37e6 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 1 Feb 2022 13:31:16 +0100 Subject: [PATCH 029/380] powerpc/xive: Add some error handling code to 'xive_spapr_init()' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'xive_irq_bitmap_add()' can return -ENOMEM. In this case, we should free the memory already allocated and return 'false' to the caller. Also add an error path which undoes the 'tima = ioremap(...)' Signed-off-by: Christophe JAILLET Reviewed-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/564998101804886b151235c8a9f93020923bfd2c.1643718324.git.christophe.jaillet@wanadoo.fr --- arch/powerpc/sysdev/xive/spapr.c | 36 +++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 928f95004501..29456c255f9f 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -67,6 +67,17 @@ static int __init xive_irq_bitmap_add(int base, int count) return 0; } +static void xive_irq_bitmap_remove_all(void) +{ + struct xive_irq_bitmap *xibm, *tmp; + + list_for_each_entry_safe(xibm, tmp, &xive_irq_bitmaps, list) { + list_del(&xibm->list); + kfree(xibm->bitmap); + kfree(xibm); + } +} + static int __xive_irq_bitmap_alloc(struct xive_irq_bitmap *xibm) { int irq; @@ -803,7 +814,7 @@ bool __init xive_spapr_init(void) u32 val; u32 len; const __be32 *reg; - int i; + int i, err; if (xive_spapr_disabled()) return false; @@ -828,23 +839,26 @@ bool __init xive_spapr_init(void) } if (!xive_get_max_prio(&max_prio)) - return false; + goto err_unmap; /* Feed the IRQ number allocator with the ranges given in the DT */ reg = of_get_property(np, "ibm,xive-lisn-ranges", &len); if (!reg) { pr_err("Failed to read 'ibm,xive-lisn-ranges' property\n"); - return false; + goto err_unmap; } if (len % (2 * sizeof(u32)) != 0) { pr_err("invalid 'ibm,xive-lisn-ranges' property\n"); - return false; + goto err_unmap; } - for (i = 0; i < len / (2 * sizeof(u32)); i++, reg += 2) - xive_irq_bitmap_add(be32_to_cpu(reg[0]), - be32_to_cpu(reg[1])); + for (i = 0; i < len / (2 * sizeof(u32)); i++, reg += 2) { + err = xive_irq_bitmap_add(be32_to_cpu(reg[0]), + be32_to_cpu(reg[1])); + if (err < 0) + goto err_mem_free; + } /* Iterate the EQ sizes and pick one */ of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, reg, val) { @@ -855,10 +869,16 @@ bool __init xive_spapr_init(void) /* Initialize XIVE core with our backend */ if (!xive_core_init(np, &xive_spapr_ops, tima, TM_QW1_OS, max_prio)) - return false; + goto err_mem_free; pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10)); return true; + +err_mem_free: + xive_irq_bitmap_remove_all(); +err_unmap: + iounmap(tima); + return false; } machine_arch_initcall(pseries, xive_core_debug_init); From dd75080aa8409ce10d50fb58981c6b59bf8707d3 Mon Sep 17 00:00:00 2001 From: Chen Jingwen Date: Wed, 29 Dec 2021 11:52:26 +0800 Subject: [PATCH 030/380] powerpc/kasan: Fix early region not updated correctly The shadow's page table is not updated when PTE_RPN_SHIFT is 24 and PAGE_SHIFT is 12. It not only causes false positives but also false negative as shown the following text. Fix it by bringing the logic of kasan_early_shadow_page_entry here. 1. False Positive: ================================================================== BUG: KASAN: vmalloc-out-of-bounds in pcpu_alloc+0x508/0xa50 Write of size 16 at addr f57f3be0 by task swapper/0/1 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.15.0-12267-gdebe436e77c7 #1 Call Trace: [c80d1c20] [c07fe7b8] dump_stack_lvl+0x4c/0x6c (unreliable) [c80d1c40] [c02ff668] print_address_description.constprop.0+0x88/0x300 [c80d1c70] [c02ff45c] kasan_report+0x1ec/0x200 [c80d1cb0] [c0300b20] kasan_check_range+0x160/0x2f0 [c80d1cc0] [c03018a4] memset+0x34/0x90 [c80d1ce0] [c0280108] pcpu_alloc+0x508/0xa50 [c80d1d40] [c02fd7bc] __kmem_cache_create+0xfc/0x570 [c80d1d70] [c0283d64] kmem_cache_create_usercopy+0x274/0x3e0 [c80d1db0] [c2036580] init_sd+0xc4/0x1d0 [c80d1de0] [c00044a0] do_one_initcall+0xc0/0x33c [c80d1eb0] [c2001624] kernel_init_freeable+0x2c8/0x384 [c80d1ef0] [c0004b14] kernel_init+0x24/0x170 [c80d1f10] [c001b26c] ret_from_kernel_thread+0x5c/0x64 Memory state around the buggy address: f57f3a80: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f57f3b00: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 >f57f3b80: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ^ f57f3c00: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f57f3c80: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ================================================================== 2. False Negative (with KASAN tests): ================================================================== Before fix: ok 45 - kmalloc_double_kzfree # vmalloc_oob: EXPECTATION FAILED at lib/test_kasan.c:1039 KASAN failure expected in "((volatile char *)area)[3100]", but none occurred not ok 46 - vmalloc_oob not ok 1 - kasan ================================================================== After fix: ok 1 - kasan Fixes: cbd18991e24fe ("powerpc/mm: Fix an Oops in kasan_mmu_init()") Cc: stable@vger.kernel.org # 5.4.x Signed-off-by: Chen Jingwen Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211229035226.59159-1-chenjingwen6@huawei.com --- arch/powerpc/mm/kasan/kasan_init_32.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index cf8770b1a692..f3e4d069e0ba 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -83,13 +83,12 @@ void __init kasan_update_early_region(unsigned long k_start, unsigned long k_end, pte_t pte) { unsigned long k_cur; - phys_addr_t pa = __pa(kasan_early_shadow_page); for (k_cur = k_start; k_cur != k_end; k_cur += PAGE_SIZE) { pmd_t *pmd = pmd_off_k(k_cur); pte_t *ptep = pte_offset_kernel(pmd, k_cur); - if ((pte_val(*ptep) & PTE_RPN_MASK) != pa) + if (pte_page(*ptep) != virt_to_page(lm_alias(kasan_early_shadow_page))) continue; __set_pte_at(&init_mm, k_cur, ptep, pte, 0); From 9872cbfb4558bf68219c5a8a65fd5c29b593323d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 31 Jan 2022 07:15:12 +0000 Subject: [PATCH 031/380] powerpc/603: Remove outdated comment Since commit 84de6ab0e904 ("powerpc/603: don't handle PAGE_ACCESSED in TLB miss handlers.") page table is not updated anymore by TLB miss handlers. Remove the comment. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/38b1ffefd2146fa56bf8aa605d476ad9736bbb37.1643613296.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index fa84744d6b24..7e27b44c1d89 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -504,10 +504,6 @@ DataLoadTLBMiss: lwz r0,0(r2) /* get linux-style pte */ andc. r1,r1,r0 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ - /* - * NOTE! We are assuming this is not an SMP system, otherwise - * we would need to update the pte atomically with lwarx/stwcx. - */ /* Convert linux-style PTE to low word of PPC-style PTE */ rlwinm r1,r0,32-9,30,30 /* _PAGE_RW -> PP msb */ rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ @@ -586,10 +582,6 @@ DataStoreTLBMiss: lwz r0,0(r2) /* get linux-style pte */ andc. r1,r1,r0 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ - /* - * NOTE! We are assuming this is not an SMP system, otherwise - * we would need to update the pte atomically with lwarx/stwcx. - */ /* Convert linux-style PTE to low word of PPC-style PTE */ rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ li r1,0xe06 /* clear out reserved bits & PP msb */ From 4634bf4455fe26f07dabf97c3585c9ccb86353c4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 31 Jan 2022 07:17:57 +0000 Subject: [PATCH 032/380] powerpc/603: Clear C bit when PTE is read only On book3s/32 MMU, PP bits don't offer kernel RO protection, kernel pages are always RW. However, on the 603 a page fault is always generated when the C bit (change bit = dirty bit) is not set. Enforce kernel RO protection by clearing C bit in TLB miss handler when the page doesn't have _PAGE_RW flag. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/bbb13848ff0100a76ee9ea95118058c30ae95f2c.1643613343.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 7e27b44c1d89..d489b965f9a6 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -507,7 +507,9 @@ DataLoadTLBMiss: /* Convert linux-style PTE to low word of PPC-style PTE */ rlwinm r1,r0,32-9,30,30 /* _PAGE_RW -> PP msb */ rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ + rlwimi r1,r0,32-3,24,24 /* _PAGE_RW -> _PAGE_DIRTY */ rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */ + xori r1,r1,_PAGE_DIRTY /* clear dirty when not rw */ ori r1,r1,0xe04 /* clear out reserved bits */ andc r1,r0,r1 /* PP = user? rw? 1: 3: 0 */ BEGIN_FTR_SECTION From 535bda36dbf2d271f59e06fe252c32eff452666d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 31 Jan 2022 08:16:48 +0000 Subject: [PATCH 033/380] powerpc/nohash: Remove pte_same() arch/powerpc/include/asm/nohash/{32/64}/pgtable.h has #define __HAVE_ARCH_PTE_SAME #define pte_same(A,B) ((pte_val(A) ^ pte_val(B)) == 0) include/linux/pgtable.h has #ifndef __HAVE_ARCH_PTE_SAME static inline int pte_same(pte_t pte_a, pte_t pte_b) { return pte_val(pte_a) == pte_val(pte_b); } #endif Remove the powerpc version which is similar to the generic one. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/83c97bd58a3596ef1b0ff28b1e41fd492d005520.1643616989.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pgtable.h | 3 --- arch/powerpc/include/asm/nohash/64/pgtable.h | 3 --- 2 files changed, 6 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index d959c2a73fbf..a0525765c7bb 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -338,9 +338,6 @@ static inline int pte_young(pte_t pte) return pte_val(pte) & _PAGE_ACCESSED; } -#define __HAVE_ARCH_PTE_SAME -#define pte_same(A,B) ((pte_val(A) ^ pte_val(B)) == 0) - /* * Note that on Book E processors, the pmd contains the kernel virtual * (lowmem) address of the pte page. The physical address is less useful diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 2816d158280a..a441056b3eba 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -281,9 +281,6 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, flush_tlb_page(vma, address); } -#define __HAVE_ARCH_PTE_SAME -#define pte_same(A,B) ((pte_val(A) ^ pte_val(B)) == 0) - #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) #define pmd_ERROR(e) \ From 4291d085b0b07a78403e845c187428b038c901cd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 30 Jan 2022 10:29:34 +0000 Subject: [PATCH 034/380] powerpc/32s: Make pte_update() non atomic on 603 core On 603 core, TLB miss handler don't do any change to the page tables so pte_update() doesn't need to be atomic. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/cc89d3c11fc9c742d0df3454a657a3a00be24046.1643538554.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/pgtable.h | 37 ++++++++++++-------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index f8b94f78403f..772e00dc4ef1 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -298,28 +298,35 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p unsigned long clr, unsigned long set, int huge) { pte_basic_t old; - unsigned long tmp; - __asm__ __volatile__( + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) { + unsigned long tmp; + + asm volatile( #ifndef CONFIG_PTE_64BIT -"1: lwarx %0, 0, %3\n" -" andc %1, %0, %4\n" + "1: lwarx %0, 0, %3\n" + " andc %1, %0, %4\n" #else -"1: lwarx %L0, 0, %3\n" -" lwz %0, -4(%3)\n" -" andc %1, %L0, %4\n" + "1: lwarx %L0, 0, %3\n" + " lwz %0, -4(%3)\n" + " andc %1, %L0, %4\n" #endif -" or %1, %1, %5\n" -" stwcx. %1, 0, %3\n" -" bne- 1b" - : "=&r" (old), "=&r" (tmp), "=m" (*p) + " or %1, %1, %5\n" + " stwcx. %1, 0, %3\n" + " bne- 1b" + : "=&r" (old), "=&r" (tmp), "=m" (*p) #ifndef CONFIG_PTE_64BIT - : "r" (p), + : "r" (p), #else - : "b" ((unsigned long)(p) + 4), + : "b" ((unsigned long)(p) + 4), #endif - "r" (clr), "r" (set), "m" (*p) - : "cc" ); + "r" (clr), "r" (set), "m" (*p) + : "cc" ); + } else { + old = pte_val(*p); + + *p = __pte((old & ~(pte_basic_t)clr) | set); + } return old; } From a084c44eaa6a618415e1bcb076c7e1c088bcd54c Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Tue, 26 Oct 2021 16:58:31 -0400 Subject: [PATCH 035/380] s390-vfio-ap: introduces s390 kernel debug feature for vfio_ap device driver Sets up an s390dbf debug log for the vfio_ap device driver for logging events occurring during the lifetime of the driver. Signed-off-by: Tony Krowiak Reviewed-by: Harald Freudenberger Reviewed-by: Matthew Rosato Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/vfio_ap_debug.h | 32 +++++++++++++++++++++++++++++ drivers/s390/crypto/vfio_ap_drv.c | 21 +++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 drivers/s390/crypto/vfio_ap_debug.h diff --git a/drivers/s390/crypto/vfio_ap_debug.h b/drivers/s390/crypto/vfio_ap_debug.h new file mode 100644 index 000000000000..180156121421 --- /dev/null +++ b/drivers/s390/crypto/vfio_ap_debug.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright IBM Corp. 2022 + * + * Author(s): Tony Krowiak + */ +#ifndef VFIO_AP_DEBUG_H +#define VFIO_AP_DEBUG_H + +#include + +#define DBF_ERR 3 /* error conditions */ +#define DBF_WARN 4 /* warning conditions */ +#define DBF_INFO 5 /* informational */ +#define DBF_DEBUG 6 /* for debugging only */ + +#define DBF_MAX_SPRINTF_ARGS 10 + +#define VFIO_AP_DBF(...) \ + debug_sprintf_event(vfio_ap_dbf_info, ##__VA_ARGS__) +#define VFIO_AP_DBF_ERR(...) \ + debug_sprintf_event(vfio_ap_dbf_info, DBF_ERR, ##__VA_ARGS__) +#define VFIO_AP_DBF_WARN(...) \ + debug_sprintf_event(vfio_ap_dbf_info, DBF_WARN, ##__VA_ARGS__) +#define VFIO_AP_DBF_INFO(...) \ + debug_sprintf_event(vfio_ap_dbf_info, DBF_INFO, ##__VA_ARGS__) +#define VFIO_AP_DBF_DBG(...) \ + debug_sprintf_event(vfio_ap_dbf_info, DBF_DEBUG, ##__VA_ARGS__) + +extern debug_info_t *vfio_ap_dbf_info; + +#endif /* VFIO_AP_DEBUG_H */ diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c index e043ae236630..82b6ffee06c5 100644 --- a/drivers/s390/crypto/vfio_ap_drv.c +++ b/drivers/s390/crypto/vfio_ap_drv.c @@ -14,6 +14,7 @@ #include #include #include "vfio_ap_private.h" +#include "vfio_ap_debug.h" #define VFIO_AP_ROOT_NAME "vfio_ap" #define VFIO_AP_DEV_NAME "matrix" @@ -26,6 +27,7 @@ MODULE_DESCRIPTION("VFIO AP device driver, Copyright IBM Corp. 2018"); MODULE_LICENSE("GPL v2"); struct ap_matrix_dev *matrix_dev; +debug_info_t *vfio_ap_dbf_info; /* Only type 10 adapters (CEX4 and later) are supported * by the AP matrix device driver @@ -250,10 +252,28 @@ static void vfio_ap_matrix_dev_destroy(void) root_device_unregister(root_device); } +static int __init vfio_ap_dbf_info_init(void) +{ + vfio_ap_dbf_info = debug_register("vfio_ap", 1, 1, + DBF_MAX_SPRINTF_ARGS * sizeof(long)); + + if (!vfio_ap_dbf_info) + return -ENOENT; + + debug_register_view(vfio_ap_dbf_info, &debug_sprintf_view); + debug_set_level(vfio_ap_dbf_info, DBF_WARN); + + return 0; +} + static int __init vfio_ap_init(void) { int ret; + ret = vfio_ap_dbf_info_init(); + if (ret) + return ret; + /* If there are no AP instructions, there is nothing to pass through. */ if (!ap_instructions_available()) return -ENODEV; @@ -284,6 +304,7 @@ static void __exit vfio_ap_exit(void) vfio_ap_mdev_unregister(); ap_driver_unregister(&vfio_ap_drv); vfio_ap_matrix_dev_destroy(); + debug_unregister(vfio_ap_dbf_info); } module_init(vfio_ap_init); From d5c49db21b743658db03d75cbf410596c2c90371 Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Tue, 11 Jan 2022 10:19:16 -0500 Subject: [PATCH 036/380] MAINTAINERS: update file path for S390 VFIO AP DRIVER Changed the MAINTAINERS file to include the new drivers/s390/crypto/vfio_ap_debug.h file path. Signed-off-by: Tony Krowiak Acked-by: Matthew Rosato Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- MAINTAINERS | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 69a2935daf6c..5e4d1be85c22 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16893,9 +16893,7 @@ L: linux-s390@vger.kernel.org S: Supported W: http://www.ibm.com/developerworks/linux/linux390/ F: Documentation/s390/vfio-ap.rst -F: drivers/s390/crypto/vfio_ap_drv.c -F: drivers/s390/crypto/vfio_ap_ops.c -F: drivers/s390/crypto/vfio_ap_private.h +F: drivers/s390/crypto/vfio_ap* S390 VFIO-CCW DRIVER M: Eric Farman From 68f554b7d250b632d5fa17063f84c618c48b32a8 Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Thu, 4 Nov 2021 16:41:37 -0400 Subject: [PATCH 037/380] s390/vfio-ap: add s390dbf logging to the handle_pqap function This patch adds s390dbf logging to the function that handles interception of the PQAP(AQIC) instruction. Several items of data are validated before ultimately calling the functions that execute the PQAP(AQIC) instruction on behalf of the guest to which the queue for which interrupts are being enabled or disabled is attached. Currently, the handle_pqap function sets status response code 01 (queue not available) in the status word that is normally returned from the PQAP(AQIC) instruction under the following conditions: * Set when the function pointer to the handler is not set in the kvm_s390_crypto object (i.e., the PQAP hook is not registered). * Set when the KVM pointer is not set in the ap_matrix_mdev object (i.e., the matrix mdev is not passed through to a guest). * Set when the queue for which interrupts are being enabled or disabled is either not bound to the vfio_ap device driver or not assigned to the matrix mdev. Setting the response code returned to userspace without also logging a message in the kernel makes it impossible to determine whether the response was due to an error detected by the vfio_ap device driver or because the response code was returned by the firmware in response to the PQAP(AQIC) instruction, so this patch logs a message to the s390dbf log for the vfio_ap device driver for each of the situations described above. Signed-off-by: Tony Krowiak Reviewed-by: Matthew Rosato Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/vfio_ap_ops.c | 78 +++++++++++++++++++++++++++---- 1 file changed, 70 insertions(+), 8 deletions(-) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index abc0b9b88386..537d7dd158a7 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -16,10 +16,12 @@ #include #include #include +#include #include #include #include "vfio_ap_private.h" +#include "vfio_ap_debug.h" #define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough" #define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device" @@ -256,6 +258,48 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, return status; } +/** + * vfio_ap_le_guid_to_be_uuid - convert a little endian guid array into an array + * of big endian elements that can be passed by + * value to an s390dbf sprintf event function to + * format a UUID string. + * + * @guid: the object containing the little endian guid + * @uuid: a six-element array of long values that can be passed by value as + * arguments for a formatting string specifying a UUID. + * + * The S390 Debug Feature (s390dbf) allows the use of "%s" in the sprintf + * event functions if the memory for the passed string is available as long as + * the debug feature exists. Since a mediated device can be removed at any + * time, it's name can not be used because %s passes the reference to the string + * in memory and the reference will go stale once the device is removed . + * + * The s390dbf string formatting function allows a maximum of 9 arguments for a + * message to be displayed in the 'sprintf' view. In order to use the bytes + * comprising the mediated device's UUID to display the mediated device name, + * they will have to be converted into an array whose elements can be passed by + * value to sprintf. For example: + * + * guid array: { 83, 78, 17, 62, bb, f1, f0, 47, 91, 4d, 32, a2, 2e, 3a, 88, 04 } + * mdev name: 62177883-f1bb-47f0-914d-32a22e3a8804 + * array returned: { 62177883, f1bb, 47f0, 914d, 32a2, 2e3a8804 } + * formatting string: "%08lx-%04lx-%04lx-%04lx-%02lx%04lx" + */ +static void vfio_ap_le_guid_to_be_uuid(guid_t *guid, unsigned long *uuid) +{ + /* + * The input guid is ordered in little endian, so it needs to be + * reordered for displaying a UUID as a string. This specifies the + * guid indices in proper order. + */ + uuid[0] = le32_to_cpup((__le32 *)guid); + uuid[1] = le16_to_cpup((__le16 *)&guid->b[4]); + uuid[2] = le16_to_cpup((__le16 *)&guid->b[6]); + uuid[3] = *((__u16 *)&guid->b[8]); + uuid[4] = *((__u16 *)&guid->b[10]); + uuid[5] = *((__u32 *)&guid->b[12]); +} + /** * handle_pqap - PQAP instruction callback * @@ -281,30 +325,48 @@ static int handle_pqap(struct kvm_vcpu *vcpu) { uint64_t status; uint16_t apqn; + unsigned long uuid[6]; struct vfio_ap_queue *q; struct ap_queue_status qstatus = { .response_code = AP_RESPONSE_Q_NOT_AVAIL, }; struct ap_matrix_mdev *matrix_mdev; - /* If we do not use the AIV facility just go to userland */ - if (!(vcpu->arch.sie_block->eca & ECA_AIV)) - return -EOPNOTSUPP; - apqn = vcpu->run->s.regs.gprs[0] & 0xffff; - mutex_lock(&matrix_dev->lock); - if (!vcpu->kvm->arch.crypto.pqap_hook) + /* If we do not use the AIV facility just go to userland */ + if (!(vcpu->arch.sie_block->eca & ECA_AIV)) { + VFIO_AP_DBF_WARN("%s: AIV facility not installed: apqn=0x%04x, eca=0x%04x\n", + __func__, apqn, vcpu->arch.sie_block->eca); + + return -EOPNOTSUPP; + } + + mutex_lock(&matrix_dev->lock); + if (!vcpu->kvm->arch.crypto.pqap_hook) { + VFIO_AP_DBF_WARN("%s: PQAP(AQIC) hook not registered with the vfio_ap driver: apqn=0x%04x\n", + __func__, apqn); goto out_unlock; + } + matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook, struct ap_matrix_mdev, pqap_hook); /* If the there is no guest using the mdev, there is nothing to do */ - if (!matrix_mdev->kvm) + if (!matrix_mdev->kvm) { + vfio_ap_le_guid_to_be_uuid(&matrix_mdev->mdev->uuid, uuid); + VFIO_AP_DBF_WARN("%s: mdev %08lx-%04lx-%04lx-%04lx-%04lx%08lx not in use: apqn=0x%04x\n", + __func__, uuid[0], uuid[1], uuid[2], + uuid[3], uuid[4], uuid[5], apqn); goto out_unlock; + } q = vfio_ap_get_queue(matrix_mdev, apqn); - if (!q) + if (!q) { + VFIO_AP_DBF_WARN("%s: Queue %02x.%04x not bound to the vfio_ap driver\n", + __func__, AP_QID_CARD(apqn), + AP_QID_QUEUE(apqn)); goto out_unlock; + } status = vcpu->run->s.regs.gprs[1]; From 783f0a3ccd79477b159c6ef2104f9cff765215d9 Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Tue, 4 Jan 2022 15:44:13 -0500 Subject: [PATCH 038/380] s390/vfio-ap: add s390dbf logging to the vfio_ap_irq_enable function This patch adds s390dbf logging to the function that executes the PQAP(AQIC) instruction on behalf of the guest to which the queue for which interrupts are being enabled or disabled is attached. Currently, the vfio_ap_irq_enable function sets status response code 06 (notification indicator byte address (nib) invalid) in the status word when the vfio_pin_pages function - called to pin the page containing the nib - returns an error or a different number of pages pinned than requested. Setting the response code returned to userspace without also logging a message in the kernel makes it impossible to determine whether the response was due to an error detected by the vfio_ap device driver or because the response code was returned by the firmware in response to the PQAP(AQIC) instruction. In addition to logging a warning for the situation above, this patch adds the following: * A function to validate the nib address invoked prior to calling the vfio_pin_pages function. This allows for logging a message informing the reader of the reason the page containing the nib can not be pinned if the nib address is not valid. Response code 06 (invalid nib address) will be set in the status word returned to the guest from the instruction. * Checks the return value from the kvm_s390_gisc_register and logs a message informing the reader of the failure. Status response code 08 (invalid gisa) will be set in the status word returned to the guest from the PQAP(AQIC) instruction. * Checks the status response code returned from execution of the PQAP(AQIC) instruction and if it indicates an error, logs a message informing the reader. Signed-off-by: Tony Krowiak Reviewed-by: Matthew Rosato Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.h | 1 + drivers/s390/crypto/vfio_ap_ops.c | 77 ++++++++++++++++++++++++++++--- 2 files changed, 72 insertions(+), 6 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 95b577754b35..714c7583af5e 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -47,6 +47,7 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr) #define AP_RESPONSE_BUSY 0x05 #define AP_RESPONSE_INVALID_ADDRESS 0x06 #define AP_RESPONSE_OTHERWISE_CHANGED 0x07 +#define AP_RESPONSE_INVALID_GISA 0x08 #define AP_RESPONSE_Q_FULL 0x10 #define AP_RESPONSE_NO_PENDING_REPLY 0x10 #define AP_RESPONSE_INDEX_TOO_BIG 0x11 diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 537d7dd158a7..7dc26365e29a 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -185,12 +185,44 @@ end_free: return status; } +/** + * vfio_ap_validate_nib - validate a notification indicator byte (nib) address. + * + * @vcpu: the object representing the vcpu executing the PQAP(AQIC) instruction. + * @nib: the location for storing the nib address. + * @g_pfn: the location for storing the page frame number of the page containing + * the nib. + * + * When the PQAP(AQIC) instruction is executed, general register 2 contains the + * address of the notification indicator byte (nib) used for IRQ notification. + * This function parses the nib from gr2 and calculates the page frame + * number for the guest of the page containing the nib. The values are + * stored in @nib and @g_pfn respectively. + * + * The g_pfn of the nib is then validated to ensure the nib address is valid. + * + * Return: returns zero if the nib address is a valid; otherwise, returns + * -EINVAL. + */ +static int vfio_ap_validate_nib(struct kvm_vcpu *vcpu, unsigned long *nib, + unsigned long *g_pfn) +{ + *nib = vcpu->run->s.regs.gprs[2]; + *g_pfn = *nib >> PAGE_SHIFT; + + if (kvm_is_error_hva(gfn_to_hva(vcpu->kvm, *g_pfn))) + return -EINVAL; + + return 0; +} + /** * vfio_ap_irq_enable - Enable Interruption for a APQN * * @q: the vfio_ap_queue holding AQIC parameters * @isc: the guest ISC to register with the GIB interface - * @nib: the notification indicator byte to pin. + * @vcpu: the vcpu object containing the registers specifying the parameters + * passed to the PQAP(AQIC) instruction. * * Pin the NIB saved in *q * Register the guest ISC to GIB interface and retrieve the @@ -206,22 +238,36 @@ end_free: */ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, int isc, - unsigned long nib) + struct kvm_vcpu *vcpu) { + unsigned long nib; struct ap_qirq_ctrl aqic_gisa = {}; struct ap_queue_status status = {}; struct kvm_s390_gisa *gisa; + int nisc; struct kvm *kvm; unsigned long h_nib, g_pfn, h_pfn; int ret; - g_pfn = nib >> PAGE_SHIFT; + /* Verify that the notification indicator byte address is valid */ + if (vfio_ap_validate_nib(vcpu, &nib, &g_pfn)) { + VFIO_AP_DBF_WARN("%s: invalid NIB address: nib=%#lx, g_pfn=%#lx, apqn=%#04x\n", + __func__, nib, g_pfn, q->apqn); + + status.response_code = AP_RESPONSE_INVALID_ADDRESS; + return status; + } + ret = vfio_pin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1, IOMMU_READ | IOMMU_WRITE, &h_pfn); switch (ret) { case 1: break; default: + VFIO_AP_DBF_WARN("%s: vfio_pin_pages failed: rc=%d," + "nib=%#lx, g_pfn=%#lx, apqn=%#04x\n", + __func__, ret, nib, g_pfn, q->apqn); + status.response_code = AP_RESPONSE_INVALID_ADDRESS; return status; } @@ -231,7 +277,17 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, h_nib = (h_pfn << PAGE_SHIFT) | (nib & ~PAGE_MASK); aqic_gisa.gisc = isc; - aqic_gisa.isc = kvm_s390_gisc_register(kvm, isc); + + nisc = kvm_s390_gisc_register(kvm, isc); + if (nisc < 0) { + VFIO_AP_DBF_WARN("%s: gisc registration failed: nisc=%d, isc=%d, apqn=%#04x\n", + __func__, nisc, isc, q->apqn); + + status.response_code = AP_RESPONSE_INVALID_GISA; + return status; + } + + aqic_gisa.isc = nisc; aqic_gisa.ir = 1; aqic_gisa.gisa = (uint64_t)gisa >> 4; @@ -255,6 +311,16 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, break; } + if (status.response_code != AP_RESPONSE_NORMAL) { + VFIO_AP_DBF_WARN("%s: PQAP(AQIC) failed with status=%#02x: " + "zone=%#x, ir=%#x, gisc=%#x, f=%#x," + "gisa=%#x, isc=%#x, apqn=%#04x\n", + __func__, status.response_code, + aqic_gisa.zone, aqic_gisa.ir, aqic_gisa.gisc, + aqic_gisa.gf, aqic_gisa.gisa, aqic_gisa.isc, + q->apqn); + } + return status; } @@ -372,8 +438,7 @@ static int handle_pqap(struct kvm_vcpu *vcpu) /* If IR bit(16) is set we enable the interrupt */ if ((status >> (63 - 16)) & 0x01) - qstatus = vfio_ap_irq_enable(q, status & 0x07, - vcpu->run->s.regs.gprs[2]); + qstatus = vfio_ap_irq_enable(q, status & 0x07, vcpu); else qstatus = vfio_ap_irq_disable(q); From f36e7c9845d998d1e4100b46cec9c678bff69a24 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 28 Jan 2022 12:10:57 +0100 Subject: [PATCH 039/380] s390: remove invalid email address of Heiko Carstens Remove my old invalid email address which can be found in a couple of files. Instead of updating it, just remove my contact data completely from source files. We have git and other tools which allow to figure out who is responsible for what with recent contact data. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/boot/head.S | 1 - arch/s390/include/asm/crw.h | 1 - arch/s390/include/asm/nmi.h | 1 - arch/s390/include/asm/sclp.h | 1 - arch/s390/include/asm/smp.h | 1 - arch/s390/kernel/base.S | 3 +-- arch/s390/kernel/cache.c | 1 - arch/s390/kernel/early.c | 1 - arch/s390/kernel/entry.S | 1 - arch/s390/kernel/ftrace.c | 3 +-- arch/s390/kernel/head64.S | 1 - arch/s390/kernel/ipl.c | 1 - arch/s390/kernel/machine_kexec.c | 1 - arch/s390/kernel/mcount.S | 2 -- arch/s390/kernel/nmi.c | 1 - arch/s390/kernel/relocate_kernel.S | 3 +-- arch/s390/kernel/smp.c | 1 - arch/s390/kernel/stacktrace.c | 1 - arch/s390/kernel/topology.c | 1 - arch/s390/kvm/kvm-s390.c | 1 - arch/s390/lib/delay.c | 1 - arch/s390/mm/maccess.c | 2 -- arch/s390/mm/vmem.c | 1 - drivers/s390/char/sclp_cmd.c | 3 +-- drivers/s390/char/sclp_config.c | 1 - drivers/s390/cio/crw.c | 1 - tools/perf/arch/s390/util/dwarf-regs.c | 3 +-- 27 files changed, 5 insertions(+), 34 deletions(-) diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index 3a252d140c55..666692429db0 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -5,7 +5,6 @@ * Author(s): Hartmut Penner * Martin Schwidefsky * Rob van der Heij - * Heiko Carstens * * There are 5 different IPL methods * 1) load the image directly into ram at address 0 and do an PSW restart diff --git a/arch/s390/include/asm/crw.h b/arch/s390/include/asm/crw.h index c6ebfd31f1db..97456d98fe76 100644 --- a/arch/s390/include/asm/crw.h +++ b/arch/s390/include/asm/crw.h @@ -5,7 +5,6 @@ * Author(s): Ingo Adlung , * Martin Schwidefsky , * Cornelia Huck , - * Heiko Carstens , */ #ifndef _ASM_S390_CRW_H diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index 55c9051dddfd..292083083830 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h @@ -6,7 +6,6 @@ * Author(s): Ingo Adlung , * Martin Schwidefsky , * Cornelia Huck , - * Heiko Carstens , */ #ifndef _ASM_S390_NMI_H diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index c68ea35de498..947ef51526a0 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright IBM Corp. 2007 - * Author(s): Heiko Carstens */ #ifndef _ASM_S390_SCLP_H diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index f16f4d054ae2..7f5d4763357b 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -3,7 +3,6 @@ * Copyright IBM Corp. 1999, 2012 * Author(s): Denis Joseph Barrow, * Martin Schwidefsky , - * Heiko Carstens , */ #ifndef __ASM_SMP_H #define __ASM_SMP_H diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S index d255c69c1779..f7fe4033df36 100644 --- a/arch/s390/kernel/base.S +++ b/arch/s390/kernel/base.S @@ -3,8 +3,7 @@ * arch/s390/kernel/base.S * * Copyright IBM Corp. 2006, 2007 - * Author(s): Heiko Carstens - * Michael Holzheu + * Author(s): Michael Holzheu */ #include diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c index d66825e53fce..8a9c3bf69f48 100644 --- a/arch/s390/kernel/cache.c +++ b/arch/s390/kernel/cache.c @@ -3,7 +3,6 @@ * Extract CPU cache information and expose them via sysfs. * * Copyright IBM Corp. 2012 - * Author(s): Heiko Carstens */ #include diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 3cdf68c53614..b8cfac4918d9 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -2,7 +2,6 @@ /* * Copyright IBM Corp. 2007, 2009 * Author(s): Hongjie Yang , - * Heiko Carstens */ #define KMSG_COMPONENT "setup" diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 01bae1d51113..dc7347e43ec2 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -6,7 +6,6 @@ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Hartmut Penner (hp@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), - * Heiko Carstens */ #include diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 21d62d8b6b9a..61f72c7fb0da 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -4,8 +4,7 @@ * * Copyright IBM Corp. 2009,2014 * - * Author(s): Heiko Carstens , - * Martin Schwidefsky + * Author(s): Martin Schwidefsky */ #include diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 42f9a325a257..d7b8b6ad574d 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -5,7 +5,6 @@ * Author(s): Hartmut Penner * Martin Schwidefsky * Rob van der Heij - * Heiko Carstens * */ diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 5ad1dde23dc5..a93142785bbc 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -4,7 +4,6 @@ * * Copyright IBM Corp. 2005, 2012 * Author(s): Michael Holzheu - * Heiko Carstens * Volker Sameske */ diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index a16467b3825e..088d57a3083f 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -3,7 +3,6 @@ * Copyright IBM Corp. 2005, 2011 * * Author(s): Rolf Adelsberger, - * Heiko Carstens * Michael Holzheu */ diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 39bcc0e39a10..a06eb86c178f 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -2,8 +2,6 @@ /* * Copyright IBM Corp. 2008, 2009 * - * Author(s): Heiko Carstens , - * */ #include diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 651a51914e34..fc60e29b8690 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -6,7 +6,6 @@ * Author(s): Ingo Adlung , * Martin Schwidefsky , * Cornelia Huck , - * Heiko Carstens , */ #include diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S index fe396673e8a6..9438368c3632 100644 --- a/arch/s390/kernel/relocate_kernel.S +++ b/arch/s390/kernel/relocate_kernel.S @@ -2,8 +2,7 @@ /* * Copyright IBM Corp. 2005 * - * Author(s): Rolf Adelsberger, - * Heiko Carstens + * Author(s): Rolf Adelsberger * */ diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 2bad902d8437..b214af5164bd 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -5,7 +5,6 @@ * Copyright IBM Corp. 1999, 2012 * Author(s): Denis Joseph Barrow, * Martin Schwidefsky , - * Heiko Carstens , * * based on other smp stuff by * (c) 1995 Alan Cox, CymruNET Ltd diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index b7bb1981e9ee..7ee455e8e3d5 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -3,7 +3,6 @@ * Stack trace management functions * * Copyright IBM Corp. 2006 - * Author(s): Heiko Carstens */ #include diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 58f8291950cb..c6eecd4a5302 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007, 2011 - * Author(s): Heiko Carstens */ #define KMSG_COMPONENT "cpu" diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 577f1ead6a51..fd65ab42443f 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -6,7 +6,6 @@ * * Author(s): Carsten Otte * Christian Borntraeger - * Heiko Carstens * Christian Ehrhardt * Jason J. Herne */ diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index bccbf394ae7e..f7f5adea8940 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -4,7 +4,6 @@ * * Copyright IBM Corp. 1999, 2008 * Author(s): Martin Schwidefsky , - * Heiko Carstens , */ #include diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 9663ce3625bc..c1ed1f51c25d 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -4,8 +4,6 @@ * * Copyright IBM Corp. 2009, 2015 * - * Author(s): Heiko Carstens , - * */ #include diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 7d9705eeb02f..5410775639c5 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2006 - * Author(s): Heiko Carstens */ #include diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index 998933e83610..15971997cfe2 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -2,8 +2,7 @@ /* * Copyright IBM Corp. 2007,2012 * - * Author(s): Heiko Carstens , - * Peter Oberparleiter + * Author(s): Peter Oberparleiter */ #define KMSG_COMPONENT "sclp_cmd" diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c index c365110f2dae..10383e936461 100644 --- a/drivers/s390/char/sclp_config.c +++ b/drivers/s390/char/sclp_config.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007 - * Author(s): Heiko Carstens */ #define KMSG_COMPONENT "sclp_config" diff --git a/drivers/s390/cio/crw.c b/drivers/s390/cio/crw.c index fc285ca41141..7b02a6349c4d 100644 --- a/drivers/s390/cio/crw.c +++ b/drivers/s390/cio/crw.c @@ -6,7 +6,6 @@ * Author(s): Ingo Adlung , * Martin Schwidefsky , * Cornelia Huck , - * Heiko Carstens , */ #include diff --git a/tools/perf/arch/s390/util/dwarf-regs.c b/tools/perf/arch/s390/util/dwarf-regs.c index a8ace5cc6301..dfddb3099bfa 100644 --- a/tools/perf/arch/s390/util/dwarf-regs.c +++ b/tools/perf/arch/s390/util/dwarf-regs.c @@ -3,8 +3,7 @@ * Mapping of DWARF debug register numbers into register names. * * Copyright IBM Corp. 2010, 2017 - * Author(s): Heiko Carstens , - * Hendrik Brueckner + * Author(s): Hendrik Brueckner * */ From 98c0d24d1e7576a853b0812d95e599ba1a909e21 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Sun, 23 Jan 2022 20:20:09 +0100 Subject: [PATCH 040/380] s390/ftrace: verify opcode before applying patch commit 72b3942a173c ("scripts: ftrace - move the sort-processing in ftrace_init") had the unexpected side effect that wrong code locations were patched. To prevent this from happening again, verify the opcode before patching it. Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/ftrace.c | 43 ++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 61f72c7fb0da..1db9cc795034 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -164,31 +164,32 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, return 0; } -static void brcl_disable(void *brcl) +static int ftrace_patch_branch_mask(void *addr, u16 expected, bool enable) { - u8 op = 0x04; /* set mask field to zero */ + u16 old; + u8 op; - s390_kernel_write((char *)brcl + 1, &op, sizeof(op)); + if (get_kernel_nofault(old, addr)) + return -EFAULT; + if (old != expected) + return -EINVAL; + /* set mask field to all ones or zeroes */ + op = enable ? 0xf4 : 0x04; + s390_kernel_write((char *)addr + 1, &op, sizeof(op)); + return 0; } int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - brcl_disable((void *)rec->ip); - return 0; -} - -static void brcl_enable(void *brcl) -{ - u8 op = 0xf4; /* set mask field to all ones */ - - s390_kernel_write((char *)brcl + 1, &op, sizeof(op)); + /* Expect brcl 0xf,... */ + return ftrace_patch_branch_mask((void *)rec->ip, 0xc0f4, false); } int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - brcl_enable((void *)rec->ip); - return 0; + /* Expect brcl 0x0,... */ + return ftrace_patch_branch_mask((void *)rec->ip, 0xc004, true); } int ftrace_update_ftrace_func(ftrace_func_t func) @@ -261,14 +262,24 @@ NOKPROBE_SYMBOL(prepare_ftrace_return); */ int ftrace_enable_ftrace_graph_caller(void) { - brcl_disable(ftrace_graph_caller); + int rc; + + /* Expect brc 0xf,... */ + rc = ftrace_patch_branch_mask(ftrace_graph_caller, 0xa7f4, false); + if (rc) + return rc; text_poke_sync_lock(); return 0; } int ftrace_disable_ftrace_graph_caller(void) { - brcl_enable(ftrace_graph_caller); + int rc; + + /* Expect brc 0x0,... */ + rc = ftrace_patch_branch_mask(ftrace_graph_caller, 0xa704, true); + if (rc) + return rc; text_poke_sync_lock(); return 0; } From 1f231e295024d88950c7e4b91f91a47bbeff1637 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Fri, 21 Jan 2022 10:44:25 +0100 Subject: [PATCH 041/380] s390/maccess: fix absolute lowcore virtual vs physical address confusion Due to historical reasons memcpy_absolute() and friend functions misuse the notion of physical vs virtual addresses difference. Note: this does not fix a bug currently, since virtual and physical addresses are identical. Reviewed-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- arch/s390/mm/maccess.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index c1ed1f51c25d..d4d311cb2bb5 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -199,15 +199,15 @@ out: /* * Check if physical address is within prefix or zero page */ -static int is_swapped(unsigned long addr) +static int is_swapped(phys_addr_t addr) { - unsigned long lc; + phys_addr_t lc; int cpu; if (addr < sizeof(struct lowcore)) return 1; for_each_online_cpu(cpu) { - lc = (unsigned long) lowcore_ptr[cpu]; + lc = virt_to_phys(lowcore_ptr[cpu]); if (addr > lc + sizeof(struct lowcore) - 1 || addr < lc) continue; return 1; @@ -223,7 +223,8 @@ static int is_swapped(unsigned long addr) */ void *xlate_dev_mem_ptr(phys_addr_t addr) { - void *bounce = (void *) addr; + void *ptr = phys_to_virt(addr); + void *bounce = ptr; unsigned long size; cpus_read_lock(); @@ -232,7 +233,7 @@ void *xlate_dev_mem_ptr(phys_addr_t addr) size = PAGE_SIZE - (addr & ~PAGE_MASK); bounce = (void *) __get_free_page(GFP_ATOMIC); if (bounce) - memcpy_absolute(bounce, (void *) addr, size); + memcpy_absolute(bounce, ptr, size); } preempt_enable(); cpus_read_unlock(); @@ -242,8 +243,8 @@ void *xlate_dev_mem_ptr(phys_addr_t addr) /* * Free converted buffer for /dev/mem access (if necessary) */ -void unxlate_dev_mem_ptr(phys_addr_t addr, void *buf) +void unxlate_dev_mem_ptr(phys_addr_t addr, void *ptr) { - if ((void *) addr != buf) - free_page((unsigned long) buf); + if (addr != virt_to_phys(ptr)) + free_page((unsigned long)ptr); } From 628c66942e233d73def54fa458641cfc96be6660 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Wed, 26 Jan 2022 13:47:58 +0100 Subject: [PATCH 042/380] s390/sclp_sdias: fix sclp_sdias_copy() virtual vs physical address confusion Due to historical reasons sclp_sdias_copy() misuses the notion of physical vs virtual addresses difference. Note: this does not fix a bug currently, since virtual and physical addresses are identical. Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- drivers/s390/char/sclp_sdias.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/char/sclp_sdias.c b/drivers/s390/char/sclp_sdias.c index 215d4b4a5ff5..e915a343fcf5 100644 --- a/drivers/s390/char/sclp_sdias.c +++ b/drivers/s390/char/sclp_sdias.c @@ -184,7 +184,7 @@ int sclp_sdias_copy(void *dest, int start_blk, int nr_blks) sccb->evbuf.asa_size = SDIAS_ASA_SIZE_64; sccb->evbuf.event_status = 0; sccb->evbuf.blk_cnt = nr_blks; - sccb->evbuf.asa = (unsigned long)dest; + sccb->evbuf.asa = __pa(dest); sccb->evbuf.fbn = start_blk; sccb->evbuf.lbn = 0; sccb->evbuf.dbs = 1; From 9de209c7d584d6e06ad92f120d83d4f27c200497 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Wed, 26 Jan 2022 13:47:59 +0100 Subject: [PATCH 043/380] s390/dump: fix os_info virtual vs physical address confusion Due to historical reasons os_info handling functions misuse the notion of physical vs virtual addresses difference. Note: this does not fix a bug currently, since virtual and physical addresses are identical. Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- arch/s390/kernel/os_info.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c index 4bef35b79b93..fa2be71b2c6b 100644 --- a/arch/s390/kernel/os_info.c +++ b/arch/s390/kernel/os_info.c @@ -45,7 +45,7 @@ void os_info_crashkernel_add(unsigned long base, unsigned long size) */ void os_info_entry_add(int nr, void *ptr, u64 size) { - os_info.entry[nr].addr = (u64)(unsigned long)ptr; + os_info.entry[nr].addr = __pa(ptr); os_info.entry[nr].size = size; os_info.entry[nr].csum = (__force u32)csum_partial(ptr, size, 0); os_info.csum = os_info_csum(&os_info); @@ -62,7 +62,7 @@ void __init os_info_init(void) os_info.version_minor = OS_INFO_VERSION_MINOR; os_info.magic = OS_INFO_MAGIC; os_info.csum = os_info_csum(&os_info); - mem_assign_absolute(S390_lowcore.os_info, (unsigned long) ptr); + mem_assign_absolute(S390_lowcore.os_info, __pa(ptr)); } #ifdef CONFIG_CRASH_DUMP From ba2d394c60ad4b5d70cd449b9e6f6380c2e36a50 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 3 Feb 2022 10:56:07 +0100 Subject: [PATCH 044/380] s390/lgr: use simple assignment instead of memcpy It is quite pointless to use memcpy to copy two bytes, besides that this construct will also partially remove type and size sanity checks. Therefore simply use an assignment. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/lgr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c index 3b895971c3d0..6652e54cf3db 100644 --- a/arch/s390/kernel/lgr.c +++ b/arch/s390/kernel/lgr.c @@ -88,8 +88,7 @@ static void lgr_stsi_2_2_2(struct lgr_info *lgr_info) if (stsi(si, 2, 2, 2)) return; cpascii(lgr_info->name, si->name, sizeof(si->name)); - memcpy(&lgr_info->lpar_number, &si->lpar_number, - sizeof(lgr_info->lpar_number)); + lgr_info->lpar_number = si->lpar_number; } /* From 7c5ed82b800d8615cdda00729e7b62e5899f0b13 Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Fri, 4 Feb 2022 14:26:01 +0530 Subject: [PATCH 045/380] powerpc: Set crashkernel offset to mid of RMA region On large config LPARs (having 192 and more cores), Linux fails to boot due to insufficient memory in the first memblock. It is due to the memory reservation for the crash kernel which starts at 128MB offset of the first memblock. This memory reservation for the crash kernel doesn't leave enough space in the first memblock to accommodate other essential system resources. The crash kernel start address was set to 128MB offset by default to ensure that the crash kernel get some memory below the RMA region which is used to be of size 256MB. But given that the RMA region size can be 512MB or more, setting the crash kernel offset to mid of RMA size will leave enough space for the kernel to allocate memory for other system resources. Since the above crash kernel offset change is only applicable to the LPAR platform, the LPAR feature detection is pushed before the crash kernel reservation. The rest of LPAR specific initialization will still be done during pseries_probe_fw_features as usual. This patch is dependent on changes to paca allocation for boot CPU. It expect boot CPU to discover 1T segment support which is introduced by the patch posted here: https://lists.ozlabs.org/pipermail/linuxppc-dev/2022-January/239175.html Reported-by: Abdul haleem Signed-off-by: Sourabh Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220204085601.107257-1-sourabhjain@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 6 ++++++ arch/powerpc/kexec/core.c | 15 +++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 733e6ef36758..1f42aabbbab3 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1313,6 +1313,12 @@ int __init early_init_dt_scan_rtas(unsigned long node, entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL); sizep = of_get_flat_dt_prop(node, "rtas-size", NULL); +#ifdef CONFIG_PPC64 + /* need this feature to decide the crashkernel offset */ + if (of_get_flat_dt_prop(node, "ibm,hypertas-functions", NULL)) + powerpc_firmware_features |= FW_FEATURE_LPAR; +#endif + if (basep && entryp && sizep) { rtas.base = *basep; rtas.entry = *entryp; diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index 8b68d9f91a03..abf5897ae88c 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -134,11 +134,18 @@ void __init reserve_crashkernel(void) if (!crashk_res.start) { #ifdef CONFIG_PPC64 /* - * On 64bit we split the RMO in half but cap it at half of - * a small SLB (128MB) since the crash kernel needs to place - * itself and some stacks to be in the first segment. + * On the LPAR platform place the crash kernel to mid of + * RMA size (512MB or more) to ensure the crash kernel + * gets enough space to place itself and some stack to be + * in the first segment. At the same time normal kernel + * also get enough space to allocate memory for essential + * system resource in the first segment. Keep the crash + * kernel starts at 128MB offset on other platforms. */ - crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2)); + if (firmware_has_feature(FW_FEATURE_LPAR)) + crashk_res.start = ppc64_rma_size / 2; + else + crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2)); #else crashk_res.start = KDUMP_KERNELBASE; #endif From d6a6c725a20467f52a41270bdaad9565c66f3b7a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 3 Sep 2021 11:18:38 +0000 Subject: [PATCH 046/380] powerpc/machdep: Remove CONFIG_PPC_HAS_FEATURE_CALLS Last user was removed by commit 7bbd827750e6 ("[PATCH] ppc64: very basic desktop g5 sound support"). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/803779fffb4ee0801746b2173d37cea3b273f821.1630667612.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/machdep.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index e821037f74f0..75687e1f994a 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -10,11 +10,6 @@ #include -/* We export this macro for external modules like Alsa to know if - * ppc_md.feature_call is implemented or not - */ -#define CONFIG_PPC_HAS_FEATURE_CALLS - struct pt_regs; struct pci_bus; struct device_node; From e6d03ac156db84422519aa8628efc210d24bf889 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 3 Sep 2021 11:18:40 +0000 Subject: [PATCH 047/380] powerpc/machdep: Move sys_ctrler_t definition into pmac_feature.h sys_ctrler_t definitions are tied to pmac. Move it into pmac_feature.h Signed-off-by: Christophe Leroy [mpe: Move to pmac_feature.h to fix some build errors] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7dd5ead4bbca749e2da089ff6fe2b1878d6bf40e.1630667612.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/machdep.h | 15 --------------- arch/powerpc/include/asm/pmac_feature.h | 12 ++++++++++++ arch/powerpc/platforms/powermac/pmac.h | 2 ++ drivers/macintosh/via-cuda.c | 1 + sound/ppc/pmac.h | 1 + 5 files changed, 16 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 75687e1f994a..06ac7ef07c85 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -230,21 +230,6 @@ extern struct machdep_calls *machine_id; machine_id == &mach_##name; \ }) -#ifdef CONFIG_PPC_PMAC -/* - * Power macintoshes have either a CUDA, PMU or SMU controlling - * system reset, power, NVRAM, RTC. - */ -typedef enum sys_ctrler_kind { - SYS_CTRLER_UNKNOWN = 0, - SYS_CTRLER_CUDA = 1, - SYS_CTRLER_PMU = 2, - SYS_CTRLER_SMU = 3, -} sys_ctrler_t; -extern sys_ctrler_t sys_ctrler; - -#endif /* CONFIG_PPC_PMAC */ - static inline void log_error(char *buf, unsigned int err_type, int fatal) { if (ppc_md.log_error) diff --git a/arch/powerpc/include/asm/pmac_feature.h b/arch/powerpc/include/asm/pmac_feature.h index e08e829261b6..2495866f2e97 100644 --- a/arch/powerpc/include/asm/pmac_feature.h +++ b/arch/powerpc/include/asm/pmac_feature.h @@ -401,5 +401,17 @@ extern u32 __iomem *uninorth_base; */ extern int pmac_get_uninorth_variant(void); +/* + * Power macintoshes have either a CUDA, PMU or SMU controlling + * system reset, power, NVRAM, RTC. + */ +typedef enum sys_ctrler_kind { + SYS_CTRLER_UNKNOWN = 0, + SYS_CTRLER_CUDA = 1, + SYS_CTRLER_PMU = 2, + SYS_CTRLER_SMU = 3, +} sys_ctrler_t; +extern sys_ctrler_t sys_ctrler; + #endif /* __ASM_POWERPC_PMAC_FEATURE_H */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h index 29d2036dcc9d..ba8d4e97095b 100644 --- a/arch/powerpc/platforms/powermac/pmac.h +++ b/arch/powerpc/platforms/powermac/pmac.h @@ -5,6 +5,8 @@ #include #include +#include + /* * Declaration for the various functions exported by the * pmac_* files. Mostly for use by pmac_setup diff --git a/drivers/macintosh/via-cuda.c b/drivers/macintosh/via-cuda.c index cd267392289c..3d0d0b9d471d 100644 --- a/drivers/macintosh/via-cuda.c +++ b/drivers/macintosh/via-cuda.c @@ -21,6 +21,7 @@ #ifdef CONFIG_PPC #include #include +#include #else #include #include diff --git a/sound/ppc/pmac.h b/sound/ppc/pmac.h index a758caf689d2..b6f454130463 100644 --- a/sound/ppc/pmac.h +++ b/sound/ppc/pmac.h @@ -26,6 +26,7 @@ #include #include #include +#include /* maximum number of fragments */ #define PMAC_MAX_FRAGS 32 From fae65a9ac8fd2221dbf034019fa18d72b2b0c8e9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 3 Sep 2021 11:18:42 +0000 Subject: [PATCH 048/380] powerpc/mpc86xx_hpcn: Remove obsolete statement Comment says "Delete this in 2.6.27". Do so now. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a47bb6a69c68156bc2d555152dab5a23733856b7.1630667612.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/86xx/mpc86xx_hpcn.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c index b697918b727d..a6b8ffcbf01a 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c @@ -95,12 +95,6 @@ static int __init mpc86xx_hpcn_probe(void) if (of_machine_is_compatible("fsl,mpc8641hpcn")) return 1; /* Looks good */ - /* Be nice and don't give silent boot death. Delete this in 2.6.27 */ - if (of_machine_is_compatible("mpc86xx")) { - pr_warn("WARNING: your dts/dtb is old. You must update before the next kernel release.\n"); - return 1; - } - return 0; } From 66ada2907864cafa4578b92926cb8bc0a4bc8c9c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 3 Sep 2021 11:18:43 +0000 Subject: [PATCH 049/380] powerpc/corenet: Change criteria to set MPIC_ENABLE_COREINT Don't use ppc_md function comparison. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c8ef82ee5f2713f4c36eb5d2d49b0905c7472801.1630667612.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/85xx/corenet_generic.c | 2 +- arch/powerpc/sysdev/mpic.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index 8d6029099848..17ae75d62518 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -37,7 +37,7 @@ void __init corenet_gen_pic_init(void) unsigned int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU | MPIC_NO_RESET; - if (ppc_md.get_irq == mpic_get_coreint_irq) + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) && !IS_ENABLED(CONFIG_KEXEC_CORE)) flags |= MPIC_ENABLE_COREINT; mpic = mpic_alloc(NULL, 0, flags, 0, 512, " OpenPIC "); diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index d5cb48b61bbd..dbcbaa4c0663 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -1404,10 +1404,8 @@ struct mpic * __init mpic_alloc(struct device_node *node, * with device trees generated by older versions of QEMU. * fsl_version will be zero if MPIC_FSL is not set. */ - if (fsl_version < 0x400 && (flags & MPIC_ENABLE_COREINT)) { - WARN_ON(ppc_md.get_irq != mpic_get_coreint_irq); + if (fsl_version < 0x400 && (flags & MPIC_ENABLE_COREINT)) ppc_md.get_irq = mpic_get_irq; - } /* Reset */ From 12318163737cd8808d13faa6e2393774191a6182 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 30 Nov 2021 13:04:49 +0100 Subject: [PATCH 050/380] powerpc/32: Remove remaining .stabs annotations STABS debug format has been superseded long time ago by DWARF. Remove the few remaining .stabs annotations from old 32 bits code. Reported-by: kernel test robot Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/68932ec2ba6b868d35006b96e90f0890f3da3c05.1638273868.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc_asm.h | 1 - arch/powerpc/kernel/head_book3s_32.S | 3 --- arch/powerpc/lib/checksum_32.S | 3 --- arch/powerpc/lib/copy_32.S | 3 --- 4 files changed, 10 deletions(-) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index f21e6bde17a1..c4b074263bf9 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -208,7 +208,6 @@ GLUE(.,name): n: #define _GLOBAL(n) \ - .stabs __stringify(n:F-1),N_FUN,0,0,n;\ .globl n; \ n: diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index d489b965f9a6..94f88dc8269d 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -50,9 +50,6 @@ mtspr SPRN_DBAT##n##L,RB __HEAD - .stabs "arch/powerpc/kernel/",N_SO,0,0,0f - .stabs "head_book3s_32.S",N_SO,0,0,0f -0: _ENTRY(_stext); /* diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index 27d9070617df..4541e8e29467 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -116,9 +116,6 @@ EXPORT_SYMBOL(__csum_partial) EX_TABLE(8 ## n ## 7b, fault); .text - .stabs "arch/powerpc/lib/",N_SO,0,0,0f - .stabs "checksum_32.S",N_SO,0,0,0f -0: CACHELINE_BYTES = L1_CACHE_BYTES LG_CACHELINE_BYTES = L1_CACHE_SHIFT diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index a3bcf4786e4a..3e9c27c46331 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -57,9 +57,6 @@ EX_TABLE(8 ## n ## 7b,9 ## n ## 1b) .text - .stabs "arch/powerpc/lib/",N_SO,0,0,0f - .stabs "copy_32.S",N_SO,0,0,0f -0: CACHELINE_BYTES = L1_CACHE_BYTES LG_CACHELINE_BYTES = L1_CACHE_SHIFT From 27e21e8f128a56d3462f0fe2fd3a59c02cc002b1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 30 Nov 2021 13:04:50 +0100 Subject: [PATCH 051/380] powerpc/32: Remove _ENTRY() macro _ENTRY() is now redundant with _GLOBAL(). Remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/62a35f8dde2bb74c8d0d7a5430cce07a5a3a6fb6.1638273868.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc_asm.h | 4 ---- arch/powerpc/kernel/head_40x.S | 18 +++++++++--------- arch/powerpc/kernel/head_44x.S | 4 ++-- arch/powerpc/kernel/head_8xx.S | 4 ++-- arch/powerpc/kernel/head_book3s_32.S | 8 ++++---- arch/powerpc/kernel/head_fsl_booke.S | 6 +++--- 6 files changed, 20 insertions(+), 24 deletions(-) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index c4b074263bf9..3c06a33b5da4 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -203,10 +203,6 @@ GLUE(.,name): #else /* 32-bit */ -#define _ENTRY(n) \ - .globl n; \ -n: - #define _GLOBAL(n) \ .globl n; \ n: diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index b6c6d1de5fd5..088f500896c7 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -53,8 +53,8 @@ * This is all going to change RSN when we add bi_recs....... -- Dan */ __HEAD -_ENTRY(_stext); -_ENTRY(_start); +_GLOBAL(_stext); +_GLOBAL(_start); mr r31,r3 /* save device tree ptr */ @@ -82,19 +82,19 @@ turn_on_mmu: */ . = 0xc0 crit_save: -_ENTRY(crit_r10) +_GLOBAL(crit_r10) .space 4 -_ENTRY(crit_r11) +_GLOBAL(crit_r11) .space 4 -_ENTRY(crit_srr0) +_GLOBAL(crit_srr0) .space 4 -_ENTRY(crit_srr1) +_GLOBAL(crit_srr1) .space 4 -_ENTRY(crit_r1) +_GLOBAL(crit_r1) .space 4 -_ENTRY(crit_dear) +_GLOBAL(crit_dear) .space 4 -_ENTRY(crit_esr) +_GLOBAL(crit_esr) .space 4 /* diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index b73a56466903..f15cb9fdb692 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -52,8 +52,8 @@ * */ __HEAD -_ENTRY(_stext); -_ENTRY(_start); +_GLOBAL(_stext); +_GLOBAL(_start); /* * Reserve a word at a fixed location to store the address * of abatron_pteptrs diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 0d073b9fd52c..0b05f2be66b9 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -53,8 +53,8 @@ #define PAGE_SHIFT_8M 23 __HEAD -_ENTRY(_stext); -_ENTRY(_start); +_GLOBAL(_stext); +_GLOBAL(_start); /* MPC8xx * This port was done on an MBX board with an 860. Right now I only diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 94f88dc8269d..519b60695167 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -50,13 +50,13 @@ mtspr SPRN_DBAT##n##L,RB __HEAD -_ENTRY(_stext); +_GLOBAL(_stext); /* * _start is defined this way because the XCOFF loader in the OpenFirmware * on the powermac expects the entry point to be a procedure descriptor. */ -_ENTRY(_start); +_GLOBAL(_start); /* * These are here for legacy reasons, the kernel used to * need to look like a coff function entry for the pmac @@ -775,7 +775,7 @@ relocate_kernel: * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5. */ -_ENTRY(copy_and_flush) +_GLOBAL(copy_and_flush) addi r5,r5,-4 addi r6,r6,-4 4: li r0,L1_CACHE_BYTES/4 @@ -1073,7 +1073,7 @@ BEGIN_MMU_FTR_SECTION END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) blr -_ENTRY(update_bats) +_GLOBAL(update_bats) lis r4, 1f@h ori r4, r4, 1f@l tophys(r4, r4) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index ac2b4dcf5fd3..f0db4f52bc00 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -54,8 +54,8 @@ * */ __HEAD -_ENTRY(_stext); -_ENTRY(_start); +_GLOBAL(_stext); +_GLOBAL(_start); /* * Reserve a word at a fixed location to store the address * of abatron_pteptrs @@ -154,7 +154,7 @@ _ENTRY(_start); * if needed */ -_ENTRY(__early_start) +_GLOBAL(__early_start) LOAD_REG_ADDR_PIC(r20, kernstart_virt_addr) lwz r20,0(r20) From 2f293651eca3eacaeb56747dede31edace7329d2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 20 Dec 2021 16:38:02 +0000 Subject: [PATCH 052/380] livepatch: Fix build failure on 32 bits processors Trying to build livepatch on powerpc/32 results in: kernel/livepatch/core.c: In function 'klp_resolve_symbols': kernel/livepatch/core.c:221:23: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] 221 | sym = (Elf64_Sym *)sechdrs[symndx].sh_addr + ELF_R_SYM(relas[i].r_info); | ^ kernel/livepatch/core.c:221:21: error: assignment to 'Elf32_Sym *' {aka 'struct elf32_sym *'} from incompatible pointer type 'Elf64_Sym *' {aka 'struct elf64_sym *'} [-Werror=incompatible-pointer-types] 221 | sym = (Elf64_Sym *)sechdrs[symndx].sh_addr + ELF_R_SYM(relas[i].r_info); | ^ kernel/livepatch/core.c: In function 'klp_apply_section_relocs': kernel/livepatch/core.c:312:35: error: passing argument 1 of 'klp_resolve_symbols' from incompatible pointer type [-Werror=incompatible-pointer-types] 312 | ret = klp_resolve_symbols(sechdrs, strtab, symndx, sec, sec_objname); | ^~~~~~~ | | | Elf32_Shdr * {aka struct elf32_shdr *} kernel/livepatch/core.c:193:44: note: expected 'Elf64_Shdr *' {aka 'struct elf64_shdr *'} but argument is of type 'Elf32_Shdr *' {aka 'struct elf32_shdr *'} 193 | static int klp_resolve_symbols(Elf64_Shdr *sechdrs, const char *strtab, | ~~~~~~~~~~~~^~~~~~~ Fix it by using the right types instead of forcing 64 bits types. Fixes: 7c8e2bdd5f0d ("livepatch: Apply vmlinux-specific KLP relocations early") Signed-off-by: Christophe Leroy Acked-by: Petr Mladek Acked-by: Joe Lawrence Acked-by: Miroslav Benes Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5288e11b018a762ea3351cc8fb2d4f15093a4457.1640017960.git.christophe.leroy@csgroup.eu --- kernel/livepatch/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 585494ec464f..bc475e62279d 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -190,7 +190,7 @@ static int klp_find_object_symbol(const char *objname, const char *name, return -EINVAL; } -static int klp_resolve_symbols(Elf64_Shdr *sechdrs, const char *strtab, +static int klp_resolve_symbols(Elf_Shdr *sechdrs, const char *strtab, unsigned int symndx, Elf_Shdr *relasec, const char *sec_objname) { @@ -218,7 +218,7 @@ static int klp_resolve_symbols(Elf64_Shdr *sechdrs, const char *strtab, relas = (Elf_Rela *) relasec->sh_addr; /* For each rela in this klp relocation section */ for (i = 0; i < relasec->sh_size / sizeof(Elf_Rela); i++) { - sym = (Elf64_Sym *)sechdrs[symndx].sh_addr + ELF_R_SYM(relas[i].r_info); + sym = (Elf_Sym *)sechdrs[symndx].sh_addr + ELF_R_SYM(relas[i].r_info); if (sym->st_shndx != SHN_LIVEPATCH) { pr_err("symbol %s is not marked as a livepatch symbol\n", strtab + sym->st_name); From 0c850965d6909d39fd69d6a3602bb62b48cad417 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 20 Dec 2021 16:38:09 +0000 Subject: [PATCH 053/380] powerpc/module_32: Fix livepatching for RO modules Livepatching a loaded module involves applying relocations through apply_relocate_add(), which attempts to write to read-only memory when CONFIG_STRICT_MODULE_RWX=y. R_PPC_ADDR16_LO, R_PPC_ADDR16_HI, R_PPC_ADDR16_HA and R_PPC_REL24 are the types generated by the kpatch-build userspace tool or klp-convert kernel tree observed applying a relocation to a post-init module. Use patch_instruction() to patch those relocations. Commit 8734b41b3efe ("powerpc/module_64: Fix livepatching for RO modules") did similar change in module_64. Signed-off-by: Christophe Leroy Acked-by: Joe Lawrence Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d5697157cb7dba3927e19aa17c915a83bc550bb2.1640017960.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/module_32.c | 44 ++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index a491ad481d85..a0432ef46967 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -18,6 +18,7 @@ #include #include #include +#include /* Count how many different relocations (different symbol, different addend) */ @@ -174,15 +175,25 @@ static uint32_t do_plt_call(void *location, entry++; } - entry->jump[0] = PPC_RAW_LIS(_R12, PPC_HA(val)); - entry->jump[1] = PPC_RAW_ADDI(_R12, _R12, PPC_LO(val)); - entry->jump[2] = PPC_RAW_MTCTR(_R12); - entry->jump[3] = PPC_RAW_BCTR(); + if (patch_instruction(&entry->jump[0], ppc_inst(PPC_RAW_LIS(_R12, PPC_HA(val))))) + return 0; + if (patch_instruction(&entry->jump[1], ppc_inst(PPC_RAW_ADDI(_R12, _R12, PPC_LO(val))))) + return 0; + if (patch_instruction(&entry->jump[2], ppc_inst(PPC_RAW_MTCTR(_R12)))) + return 0; + if (patch_instruction(&entry->jump[3], ppc_inst(PPC_RAW_BCTR()))) + return 0; pr_debug("Initialized plt for 0x%x at %p\n", val, entry); return (uint32_t)entry; } +static int patch_location_16(uint32_t *loc, u16 value) +{ + loc = PTR_ALIGN_DOWN(loc, sizeof(u32)); + return patch_instruction(loc, ppc_inst((*loc & 0xffff0000) | value)); +} + int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, @@ -216,37 +227,42 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, case R_PPC_ADDR16_LO: /* Low half of the symbol */ - *(uint16_t *)location = value; + if (patch_location_16(location, PPC_LO(value))) + return -EFAULT; break; case R_PPC_ADDR16_HI: /* Higher half of the symbol */ - *(uint16_t *)location = (value >> 16); + if (patch_location_16(location, PPC_HI(value))) + return -EFAULT; break; case R_PPC_ADDR16_HA: - /* Sign-adjusted lower 16 bits: PPC ELF ABI says: - (((x >> 16) + ((x & 0x8000) ? 1 : 0))) & 0xFFFF. - This is the same, only sane. - */ - *(uint16_t *)location = (value + 0x8000) >> 16; + if (patch_location_16(location, PPC_HA(value))) + return -EFAULT; break; case R_PPC_REL24: if ((int)(value - (uint32_t)location) < -0x02000000 - || (int)(value - (uint32_t)location) >= 0x02000000) + || (int)(value - (uint32_t)location) >= 0x02000000) { value = do_plt_call(location, value, sechdrs, module); + if (!value) + return -EFAULT; + } /* Only replace bits 2 through 26 */ pr_debug("REL24 value = %08X. location = %08X\n", value, (uint32_t)location); pr_debug("Location before: %08X.\n", *(uint32_t *)location); - *(uint32_t *)location - = (*(uint32_t *)location & ~0x03fffffc) + value = (*(uint32_t *)location & ~0x03fffffc) | ((value - (uint32_t)location) & 0x03fffffc); + + if (patch_instruction(location, ppc_inst(value))) + return -EFAULT; + pr_debug("Location after: %08X.\n", *(uint32_t *)location); pr_debug("ie. jump to %08X+%08X = %08X\n", From a4520b25276500f1abcfc55d24f1251b7b08eff6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 20 Dec 2021 16:38:12 +0000 Subject: [PATCH 054/380] powerpc/ftrace: Add support for livepatch to PPC32 PPC64 needs some special logic to properly set up the TOC. See commit 85baa095497f ("powerpc/livepatch: Add live patching support on ppc64le") for details. PPC32 doesn't have TOC so it doesn't need that logic, so adding LIVEPATCH support is straight forward. Add CONFIG_LIVEPATCH_64 and move livepatch stack logic into that item. Livepatch sample modules all work. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/63cb094125b6a6038c65eeac2abaabbabe63addd.1640017960.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 6 +++++- arch/powerpc/include/asm/livepatch.h | 8 +++++--- arch/powerpc/include/asm/thread_info.h | 2 +- arch/powerpc/kernel/asm-offsets.c | 2 +- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index b779603978e1..1d027ce94ab1 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -9,6 +9,10 @@ config 64BIT bool default y if PPC64 +config LIVEPATCH_64 + def_bool PPC64 + depends on LIVEPATCH + config MMU bool default y @@ -221,7 +225,7 @@ config PPC select HAVE_KPROBES_ON_FTRACE select HAVE_KRETPROBES select HAVE_LD_DEAD_CODE_DATA_ELIMINATION - select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS && PPC64 + select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) select HAVE_OPTPROBES diff --git a/arch/powerpc/include/asm/livepatch.h b/arch/powerpc/include/asm/livepatch.h index 4fe018cc207b..37af961eb74c 100644 --- a/arch/powerpc/include/asm/livepatch.h +++ b/arch/powerpc/include/asm/livepatch.h @@ -23,12 +23,14 @@ static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long ip) static inline unsigned long klp_get_ftrace_location(unsigned long faddr) { /* - * Live patch works only with -mprofile-kernel on PPC. In this case, - * the ftrace location is always within the first 16 bytes. + * Live patch works on PPC32 and only with -mprofile-kernel on PPC64. In + * both cases, the ftrace location is always within the first 16 bytes. */ return ftrace_location_range(faddr, faddr + 16); } +#endif /* CONFIG_LIVEPATCH */ +#ifdef CONFIG_LIVEPATCH_64 static inline void klp_init_thread_info(struct task_struct *p) { /* + 1 to account for STACK_END_MAGIC */ @@ -36,6 +38,6 @@ static inline void klp_init_thread_info(struct task_struct *p) } #else static inline void klp_init_thread_info(struct task_struct *p) { } -#endif /* CONFIG_LIVEPATCH */ +#endif #endif /* _ASM_POWERPC_LIVEPATCH_H */ diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index d6e649b3c70b..125328d1b980 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -51,7 +51,7 @@ struct thread_info { unsigned int cpu; #endif unsigned long local_flags; /* private flags for thread */ -#ifdef CONFIG_LIVEPATCH +#ifdef CONFIG_LIVEPATCH_64 unsigned long *livepatch_sp; #endif #if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC32) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 7582f3e3a330..eec536aef83a 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -94,7 +94,7 @@ int main(void) OFFSET(TASK_CPU, task_struct, thread_info.cpu); #endif -#ifdef CONFIG_LIVEPATCH +#ifdef CONFIG_LIVEPATCH_64 OFFSET(TI_livepatch_sp, thread_info, livepatch_sp); #endif From 7875bc9b07cde868784195e215f4deaa0fa928a2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 20 Dec 2021 16:38:16 +0000 Subject: [PATCH 055/380] powerpc/ftrace: Don't save again LR in ftrace_regs_caller() on PPC32 PPC32 mcount() caller already saves LR on stack, no need to save it again. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/eadcfc770b4f1e35535ffb85e28e858a2c31dec4.1640017960.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/trace/ftrace_32.S | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S index 0a02c0cb12d9..7e2fd729116b 100644 --- a/arch/powerpc/kernel/trace/ftrace_32.S +++ b/arch/powerpc/kernel/trace/ftrace_32.S @@ -53,9 +53,6 @@ _GLOBAL(ftrace_stub) blr _GLOBAL(ftrace_regs_caller) - /* Save the original return address in A's stack frame */ - stw r0,LRSAVE(r1) - /* Create our stack frame + pt_regs */ stwu r1,-INT_FRAME_SIZE(r1) From 7bdb478c1d15cfd3a92db6331cb2d3dd3a8b9436 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 20 Dec 2021 16:38:19 +0000 Subject: [PATCH 056/380] powerpc/ftrace: Simplify PPC32's return_to_handler() return_to_handler() was copied from PPC64. For PPC32 it just needs to save r3 and r4, and doesn't require any nop after the bl. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/aab39b77b34fb2c4ed08ed01c547b6ed13643788.1640017960.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/trace/ftrace_32.S | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S index 7e2fd729116b..95ffea2bdc29 100644 --- a/arch/powerpc/kernel/trace/ftrace_32.S +++ b/arch/powerpc/kernel/trace/ftrace_32.S @@ -162,22 +162,18 @@ _GLOBAL(ftrace_graph_caller) _GLOBAL(return_to_handler) /* need to save return values */ - stwu r1, -32(r1) - stw r3, 20(r1) - stw r4, 16(r1) - stw r31, 12(r1) - mr r31, r1 + stwu r1, -16(r1) + stw r3, 8(r1) + stw r4, 12(r1) bl ftrace_return_to_handler - nop /* return value has real return address */ mtlr r3 - lwz r3, 20(r1) - lwz r4, 16(r1) - lwz r31,12(r1) - lwz r1, 0(r1) + lwz r3, 8(r1) + lwz r4, 12(r1) + addi r1, r1, 16 /* Jump back to real return address */ blr From d95bf254be5f74c1e4c8f7cb64e2e21b9cc91717 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 20 Dec 2021 16:38:22 +0000 Subject: [PATCH 057/380] powerpc/ftrace: Prepare PPC32's ftrace_caller() for CONFIG_DYNAMIC_FTRACE_WITH_ARGS In order to implement CONFIG_DYNAMIC_FTRACE_WITH_ARGS, change ftrace_caller() stack layout to match struct pt_regs. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/da9734eba504998fb914aca12131c9f6bf6120a8.1640017960.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ftrace.h | 39 +-------------------------- arch/powerpc/kernel/trace/ftrace_32.S | 29 +++++++++++++++++--- 2 files changed, 26 insertions(+), 42 deletions(-) diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index debe8c4f7062..b3f6184f77ea 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -10,44 +10,7 @@ #define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR -#ifdef __ASSEMBLY__ - -/* Based off of objdump output from glibc */ - -#define MCOUNT_SAVE_FRAME \ - stwu r1,-48(r1); \ - stw r3, 12(r1); \ - stw r4, 16(r1); \ - stw r5, 20(r1); \ - stw r6, 24(r1); \ - mflr r3; \ - lwz r4, 52(r1); \ - mfcr r5; \ - stw r7, 28(r1); \ - stw r8, 32(r1); \ - stw r9, 36(r1); \ - stw r10,40(r1); \ - stw r3, 44(r1); \ - stw r5, 8(r1) - -#define MCOUNT_RESTORE_FRAME \ - lwz r6, 8(r1); \ - lwz r0, 44(r1); \ - lwz r3, 12(r1); \ - mtctr r0; \ - lwz r4, 16(r1); \ - mtcr r6; \ - lwz r5, 20(r1); \ - lwz r6, 24(r1); \ - lwz r0, 52(r1); \ - lwz r7, 28(r1); \ - lwz r8, 32(r1); \ - mtlr r0; \ - lwz r9, 36(r1); \ - lwz r10,40(r1); \ - addi r1, r1, 48 - -#else /* !__ASSEMBLY__ */ +#ifndef __ASSEMBLY__ extern void _mcount(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S index 95ffea2bdc29..c4055b41af5f 100644 --- a/arch/powerpc/kernel/trace/ftrace_32.S +++ b/arch/powerpc/kernel/trace/ftrace_32.S @@ -27,17 +27,38 @@ _GLOBAL(_mcount) EXPORT_SYMBOL(_mcount) _GLOBAL(ftrace_caller) - MCOUNT_SAVE_FRAME - /* r3 ends up with link register */ + stwu r1, -INT_FRAME_SIZE(r1) + + SAVE_GPRS(3, 10, r1) + + addi r8, r1, INT_FRAME_SIZE + stw r8, GPR1(r1) + + mflr r3 + stw r3, _NIP(r1) subi r3, r3, MCOUNT_INSN_SIZE + + stw r0, _LINK(r1) + mr r4, r0 + lis r5,function_trace_op@ha lwz r5,function_trace_op@l(r5) - li r6, 0 + + addi r6, r1, STACK_FRAME_OVERHEAD .globl ftrace_call ftrace_call: bl ftrace_stub nop - MCOUNT_RESTORE_FRAME + + lwz r3, _NIP(r1) + mtctr r3 + + REST_GPRS(3, 10, r1) + + lwz r0, _LINK(r1) + mtlr r0 + + addi r1, r1, INT_FRAME_SIZE ftrace_caller_common: #ifdef CONFIG_FUNCTION_GRAPH_TRACER .globl ftrace_graph_call From c75388a8ceffbf1bf72c61afe66a72e58aa20c74 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 20 Dec 2021 16:38:25 +0000 Subject: [PATCH 058/380] powerpc/ftrace: Prepare PPC64's ftrace_caller() for CONFIG_DYNAMIC_FTRACE_WITH_ARGS In order to implement CONFIG_DYNAMIC_FTRACE_WITH_ARGS, change ftrace_caller() to handle LIVEPATCH the same way as frace_caller_regs(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/850817333cc76593699032e8e9a70d8c36e1af1e.1640017960.git.christophe.leroy@csgroup.eu --- .../powerpc/kernel/trace/ftrace_64_mprofile.S | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S index d636fc755f60..f6f787819273 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -172,14 +172,19 @@ _GLOBAL(ftrace_caller) addi r3, r3, function_trace_op@toc@l ld r5, 0(r3) +#ifdef CONFIG_LIVEPATCH_64 + SAVE_GPR(14, r1) + mr r14,r7 /* remember old NIP */ +#endif /* Calculate ip from nip-4 into r3 for call below */ subi r3, r7, MCOUNT_INSN_SIZE /* Put the original return address in r4 as parent_ip */ + std r0, _LINK(r1) mr r4, r0 - /* Set pt_regs to NULL */ - li r6, 0 + /* Load &pt_regs in r6 for call below */ + addi r6, r1 ,STACK_FRAME_OVERHEAD /* ftrace_call(r3, r4, r5, r6) */ .globl ftrace_call @@ -189,6 +194,10 @@ ftrace_call: ld r3, _NIP(r1) mtctr r3 +#ifdef CONFIG_LIVEPATCH_64 + cmpd r14, r3 /* has NIP been altered? */ + REST_GPR(14, r1) +#endif /* Restore gprs */ REST_GPRS(3, 10, r1) @@ -196,13 +205,17 @@ ftrace_call: /* Restore callee's TOC */ ld r2, 24(r1) + /* Restore possibly modified LR */ + ld r0, _LINK(r1) + mtlr r0 + /* Pop our stack frame */ addi r1, r1, SWITCH_FRAME_SIZE - /* Reload original LR */ - ld r0, LRSAVE(r1) - mtlr r0 - +#ifdef CONFIG_LIVEPATCH_64 + /* Based on the cmpd above, if the NIP was altered handle livepatch */ + bne- livepatch_handler +#endif /* Handle function_graph or go back */ b ftrace_caller_common From 40b035efe288f42bbf4483236cde652584ccb64e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 20 Dec 2021 16:38:28 +0000 Subject: [PATCH 059/380] powerpc/ftrace: Implement CONFIG_DYNAMIC_FTRACE_WITH_ARGS Implement CONFIG_DYNAMIC_FTRACE_WITH_ARGS. It accelerates the call of livepatching. Also note that powerpc being the last one to convert to CONFIG_DYNAMIC_FTRACE_WITH_ARGS, it will now be possible to remove klp_arch_set_pc() on all architectures. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5831f711a778fcd6eb51eb5898f1faae4378b35b.1640017960.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/ftrace.h | 17 +++++++++++++++++ arch/powerpc/include/asm/livepatch.h | 4 +--- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 1d027ce94ab1..2a851d31f120 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -201,6 +201,7 @@ config PPC select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_ARGS if MPROFILE_KERNEL || PPC32 select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL || PPC32 select HAVE_EBPF_JIT select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index b3f6184f77ea..45c3d6f11daa 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -22,6 +22,23 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) struct dyn_arch_ftrace { struct module *mod; }; + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS +struct ftrace_regs { + struct pt_regs regs; +}; + +static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) +{ + return &fregs->regs; +} + +static __always_inline void ftrace_instruction_pointer_set(struct ftrace_regs *fregs, + unsigned long ip) +{ + regs_set_return_ip(&fregs->regs, ip); +} +#endif #endif /* __ASSEMBLY__ */ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS diff --git a/arch/powerpc/include/asm/livepatch.h b/arch/powerpc/include/asm/livepatch.h index 37af961eb74c..6f10de6af6e3 100644 --- a/arch/powerpc/include/asm/livepatch.h +++ b/arch/powerpc/include/asm/livepatch.h @@ -14,9 +14,7 @@ #ifdef CONFIG_LIVEPATCH static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long ip) { - struct pt_regs *regs = ftrace_get_regs(fregs); - - regs_set_return_ip(regs, ip); + ftrace_instruction_pointer_set(fregs, ip); } #define klp_get_ftrace_location klp_get_ftrace_location From 0c81ed5ed43863d313cf253b0ebada6ea2f17676 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 20 Dec 2021 16:38:31 +0000 Subject: [PATCH 060/380] powerpc/ftrace: Refactor ftrace_{en/dis}able_ftrace_graph_caller ftrace_enable_ftrace_graph_caller() and ftrace_disable_ftrace_graph_caller() have common code. They will have even more common code after following patch. Refactor into a single ftrace_modify_ftrace_graph_caller() function. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f37785a531f1a8f201e1b3da45997a5c77e9d820.1640017960.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/trace/ftrace.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 80b6285769f2..ce673764cb69 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -910,30 +910,27 @@ int __init ftrace_dyn_arch_init(void) extern void ftrace_graph_call(void); extern void ftrace_graph_stub(void); -int ftrace_enable_ftrace_graph_caller(void) +static int ftrace_modify_ftrace_graph_caller(bool enable) { unsigned long ip = (unsigned long)(&ftrace_graph_call); unsigned long addr = (unsigned long)(&ftrace_graph_caller); unsigned long stub = (unsigned long)(&ftrace_graph_stub); ppc_inst_t old, new; - old = ftrace_call_replace(ip, stub, 0); - new = ftrace_call_replace(ip, addr, 0); + old = ftrace_call_replace(ip, enable ? stub : addr, 0); + new = ftrace_call_replace(ip, enable ? addr : stub, 0); return ftrace_modify_code(ip, old, new); } +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_ftrace_graph_caller(true); +} + int ftrace_disable_ftrace_graph_caller(void) { - unsigned long ip = (unsigned long)(&ftrace_graph_call); - unsigned long addr = (unsigned long)(&ftrace_graph_caller); - unsigned long stub = (unsigned long)(&ftrace_graph_stub); - ppc_inst_t old, new; - - old = ftrace_call_replace(ip, addr, 0); - new = ftrace_call_replace(ip, stub, 0); - - return ftrace_modify_code(ip, old, new); + return ftrace_modify_ftrace_graph_caller(false); } /* From 830213786c498b0c488fedd2abc15a7ce442b42f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 20 Dec 2021 16:38:35 +0000 Subject: [PATCH 061/380] powerpc/ftrace: directly call of function graph tracer by ftrace caller Modify function graph tracer to be handled directly by the standard ftrace caller. This is made possible as powerpc now supports CONFIG_DYNAMIC_FTRACE_WITH_ARGS. This change simplifies the call of function graph ftrace. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/04d196585ff81bde06a000bd9c633a33a5b21130.1640017960.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ftrace.h | 6 ++ arch/powerpc/kernel/trace/ftrace.c | 11 ++++ arch/powerpc/kernel/trace/ftrace_32.S | 53 +-------------- .../powerpc/kernel/trace/ftrace_64_mprofile.S | 64 +------------------ 4 files changed, 20 insertions(+), 114 deletions(-) diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 45c3d6f11daa..70b457097098 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -38,6 +38,12 @@ static __always_inline void ftrace_instruction_pointer_set(struct ftrace_regs *f { regs_set_return_ip(&fregs->regs, ip); } + +struct ftrace_ops; + +#define ftrace_graph_func ftrace_graph_func +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs); #endif #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index ce673764cb69..74a176e394ef 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -917,6 +917,9 @@ static int ftrace_modify_ftrace_graph_caller(bool enable) unsigned long stub = (unsigned long)(&ftrace_graph_stub); ppc_inst_t old, new; + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS)) + return 0; + old = ftrace_call_replace(ip, enable ? stub : addr, 0); new = ftrace_call_replace(ip, enable ? addr : stub, 0); @@ -955,6 +958,14 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, out: return parent; } + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs) +{ + fregs->regs.link = prepare_ftrace_return(parent_ip, ip, fregs->regs.gpr[1]); +} +#endif #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ #ifdef PPC64_ELF_ABI_v1 diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S index c4055b41af5f..2b425da97a6b 100644 --- a/arch/powerpc/kernel/trace/ftrace_32.S +++ b/arch/powerpc/kernel/trace/ftrace_32.S @@ -59,13 +59,6 @@ ftrace_call: mtlr r0 addi r1, r1, INT_FRAME_SIZE -ftrace_caller_common: -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -.globl ftrace_graph_call -ftrace_graph_call: - b ftrace_graph_stub -_GLOBAL(ftrace_graph_stub) -#endif /* old link register ends up in ctr reg */ bctr @@ -135,52 +128,10 @@ ftrace_regs_call: /* Pop our stack frame */ addi r1, r1, INT_FRAME_SIZE - - b ftrace_caller_common - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -_GLOBAL(ftrace_graph_caller) - stwu r1,-48(r1) - stw r3, 12(r1) - stw r4, 16(r1) - stw r5, 20(r1) - stw r6, 24(r1) - stw r7, 28(r1) - stw r8, 32(r1) - stw r9, 36(r1) - stw r10,40(r1) - - addi r5, r1, 48 - mfctr r4 /* ftrace_caller has moved local addr here */ - stw r4, 44(r1) - mflr r3 /* ftrace_caller has restored LR from stack */ - subi r4, r4, MCOUNT_INSN_SIZE - - bl prepare_ftrace_return - nop - - /* - * prepare_ftrace_return gives us the address we divert to. - * Change the LR in the callers stack frame to this. - */ - stw r3,52(r1) - mtlr r3 - lwz r0,44(r1) - mtctr r0 - - lwz r3, 12(r1) - lwz r4, 16(r1) - lwz r5, 20(r1) - lwz r6, 24(r1) - lwz r7, 28(r1) - lwz r8, 32(r1) - lwz r9, 36(r1) - lwz r10,40(r1) - - addi r1, r1, 48 - + /* old link register ends up in ctr reg */ bctr +#ifdef CONFIG_FUNCTION_GRAPH_TRACER _GLOBAL(return_to_handler) /* need to save return values */ stwu r1, -16(r1) diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S index f6f787819273..6071e0122797 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -124,15 +124,6 @@ ftrace_regs_call: /* Based on the cmpd above, if the NIP was altered handle livepatch */ bne- livepatch_handler #endif - -ftrace_caller_common: -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -.globl ftrace_graph_call -ftrace_graph_call: - b ftrace_graph_stub -_GLOBAL(ftrace_graph_stub) -#endif - bctr /* jump after _mcount site */ _GLOBAL(ftrace_stub) @@ -216,8 +207,7 @@ ftrace_call: /* Based on the cmpd above, if the NIP was altered handle livepatch */ bne- livepatch_handler #endif - /* Handle function_graph or go back */ - b ftrace_caller_common + bctr /* jump after _mcount site */ #ifdef CONFIG_LIVEPATCH /* @@ -286,55 +276,3 @@ livepatch_handler: /* Return to original caller of live patched function */ blr #endif /* CONFIG_LIVEPATCH */ - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -_GLOBAL(ftrace_graph_caller) - stdu r1, -112(r1) - /* with -mprofile-kernel, parameter regs are still alive at _mcount */ - std r10, 104(r1) - std r9, 96(r1) - std r8, 88(r1) - std r7, 80(r1) - std r6, 72(r1) - std r5, 64(r1) - std r4, 56(r1) - std r3, 48(r1) - - /* Save callee's TOC in the ABI compliant location */ - std r2, 24(r1) - ld r2, PACATOC(r13) /* get kernel TOC in r2 */ - - addi r5, r1, 112 - mfctr r4 /* ftrace_caller has moved local addr here */ - std r4, 40(r1) - mflr r3 /* ftrace_caller has restored LR from stack */ - subi r4, r4, MCOUNT_INSN_SIZE - - bl prepare_ftrace_return - nop - - /* - * prepare_ftrace_return gives us the address we divert to. - * Change the LR to this. - */ - mtlr r3 - - ld r0, 40(r1) - mtctr r0 - ld r10, 104(r1) - ld r9, 96(r1) - ld r8, 88(r1) - ld r7, 80(r1) - ld r6, 72(r1) - ld r5, 64(r1) - ld r4, 56(r1) - ld r3, 48(r1) - - /* Restore callee's TOC */ - ld r2, 24(r1) - - addi r1, r1, 112 - mflr r0 - std r0, LRSAVE(r1) - bctr -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ From 41315494beed087011f256b4f1439bb3d8236904 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 20 Dec 2021 16:38:40 +0000 Subject: [PATCH 062/380] powerpc/ftrace: Prepare ftrace_64_mprofile.S for reuse by PPC32 PPC64 mprofile versions and PPC32 are very similar. Modify PPC64 version so that if can be reused for PPC32. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/82a732915dc71ee766e31809350939331944006d.1640017960.git.christophe.leroy@csgroup.eu --- .../powerpc/kernel/trace/ftrace_64_mprofile.S | 73 +++++++++++++------ 1 file changed, 51 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S index 6071e0122797..56da60e98327 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -34,13 +34,16 @@ */ _GLOBAL(ftrace_regs_caller) /* Save the original return address in A's stack frame */ - std r0,LRSAVE(r1) +#ifdef CONFIG_MPROFILE_KERNEL + PPC_STL r0,LRSAVE(r1) +#endif /* Create our stack frame + pt_regs */ - stdu r1,-SWITCH_FRAME_SIZE(r1) + PPC_STLU r1,-SWITCH_FRAME_SIZE(r1) /* Save all gprs to pt_regs */ SAVE_GPR(0, r1) +#ifdef CONFIG_PPC64 SAVE_GPRS(2, 11, r1) /* Ok to continue? */ @@ -49,10 +52,13 @@ _GLOBAL(ftrace_regs_caller) beq ftrace_no_trace SAVE_GPRS(12, 31, r1) +#else + stmw r2, GPR2(r1) +#endif /* Save previous stack pointer (r1) */ addi r8, r1, SWITCH_FRAME_SIZE - std r8, GPR1(r1) + PPC_STL r8, GPR1(r1) /* Load special regs for save below */ mfmsr r8 @@ -63,10 +69,11 @@ _GLOBAL(ftrace_regs_caller) /* Get the _mcount() call site out of LR */ mflr r7 /* Save it as pt_regs->nip */ - std r7, _NIP(r1) + PPC_STL r7, _NIP(r1) /* Save the read LR in pt_regs->link */ - std r0, _LINK(r1) + PPC_STL r0, _LINK(r1) +#ifdef CONFIG_PPC64 /* Save callee's TOC in the ABI compliant location */ std r2, 24(r1) ld r2,PACATOC(r13) /* get kernel TOC in r2 */ @@ -74,8 +81,12 @@ _GLOBAL(ftrace_regs_caller) addis r3,r2,function_trace_op@toc@ha addi r3,r3,function_trace_op@toc@l ld r5,0(r3) +#else + lis r3,function_trace_op@ha + lwz r5,function_trace_op@l(r3) +#endif -#ifdef CONFIG_LIVEPATCH +#ifdef CONFIG_LIVEPATCH_64 mr r14,r7 /* remember old NIP */ #endif /* Calculate ip from nip-4 into r3 for call below */ @@ -85,10 +96,10 @@ _GLOBAL(ftrace_regs_caller) mr r4, r0 /* Save special regs */ - std r8, _MSR(r1) - std r9, _CTR(r1) - std r10, _XER(r1) - std r11, _CCR(r1) + PPC_STL r8, _MSR(r1) + PPC_STL r9, _CTR(r1) + PPC_STL r10, _XER(r1) + PPC_STL r11, _CCR(r1) /* Load &pt_regs in r6 for call below */ addi r6, r1 ,STACK_FRAME_OVERHEAD @@ -100,27 +111,32 @@ ftrace_regs_call: nop /* Load ctr with the possibly modified NIP */ - ld r3, _NIP(r1) + PPC_LL r3, _NIP(r1) mtctr r3 -#ifdef CONFIG_LIVEPATCH +#ifdef CONFIG_LIVEPATCH_64 cmpd r14, r3 /* has NIP been altered? */ #endif /* Restore gprs */ - REST_GPR(0, r1) +#ifdef CONFIG_PPC64 REST_GPRS(2, 31, r1) +#else + lmw r2, GPR2(r1) +#endif /* Restore possibly modified LR */ - ld r0, _LINK(r1) + PPC_LL r0, _LINK(r1) mtlr r0 +#ifdef CONFIG_PPC64 /* Restore callee's TOC */ ld r2, 24(r1) +#endif /* Pop our stack frame */ addi r1, r1, SWITCH_FRAME_SIZE -#ifdef CONFIG_LIVEPATCH +#ifdef CONFIG_LIVEPATCH_64 /* Based on the cmpd above, if the NIP was altered handle livepatch */ bne- livepatch_handler #endif @@ -129,6 +145,7 @@ ftrace_regs_call: _GLOBAL(ftrace_stub) blr +#ifdef CONFIG_PPC64 ftrace_no_trace: mflr r3 mtctr r3 @@ -136,25 +153,31 @@ ftrace_no_trace: addi r1, r1, SWITCH_FRAME_SIZE mtlr r0 bctr +#endif _GLOBAL(ftrace_caller) /* Save the original return address in A's stack frame */ - std r0, LRSAVE(r1) +#ifdef CONFIG_MPROFILE_KERNEL + PPC_STL r0, LRSAVE(r1) +#endif /* Create our stack frame + pt_regs */ - stdu r1, -SWITCH_FRAME_SIZE(r1) + PPC_STLU r1, -SWITCH_FRAME_SIZE(r1) /* Save all gprs to pt_regs */ SAVE_GPRS(3, 10, r1) +#ifdef CONFIG_PPC64 lbz r3, PACA_FTRACE_ENABLED(r13) cmpdi r3, 0 beq ftrace_no_trace +#endif /* Get the _mcount() call site out of LR */ mflr r7 - std r7, _NIP(r1) + PPC_STL r7, _NIP(r1) +#ifdef CONFIG_PPC64 /* Save callee's TOC in the ABI compliant location */ std r2, 24(r1) ld r2, PACATOC(r13) /* get kernel TOC in r2 */ @@ -162,6 +185,10 @@ _GLOBAL(ftrace_caller) addis r3, r2, function_trace_op@toc@ha addi r3, r3, function_trace_op@toc@l ld r5, 0(r3) +#else + lis r3,function_trace_op@ha + lwz r5,function_trace_op@l(r3) +#endif #ifdef CONFIG_LIVEPATCH_64 SAVE_GPR(14, r1) @@ -171,7 +198,7 @@ _GLOBAL(ftrace_caller) subi r3, r7, MCOUNT_INSN_SIZE /* Put the original return address in r4 as parent_ip */ - std r0, _LINK(r1) + PPC_STL r0, _LINK(r1) mr r4, r0 /* Load &pt_regs in r6 for call below */ @@ -183,7 +210,7 @@ ftrace_call: bl ftrace_stub nop - ld r3, _NIP(r1) + PPC_LL r3, _NIP(r1) mtctr r3 #ifdef CONFIG_LIVEPATCH_64 cmpd r14, r3 /* has NIP been altered? */ @@ -193,11 +220,13 @@ ftrace_call: /* Restore gprs */ REST_GPRS(3, 10, r1) +#ifdef CONFIG_PPC64 /* Restore callee's TOC */ ld r2, 24(r1) +#endif /* Restore possibly modified LR */ - ld r0, _LINK(r1) + PPC_LL r0, _LINK(r1) mtlr r0 /* Pop our stack frame */ @@ -209,7 +238,7 @@ ftrace_call: #endif bctr /* jump after _mcount site */ -#ifdef CONFIG_LIVEPATCH +#ifdef CONFIG_LIVEPATCH_64 /* * This function runs in the mcount context, between two functions. As * such it can only clobber registers which are volatile and used in From a0b8cd5c223042efe764864b8dd9396ee127c763 Mon Sep 17 00:00:00 2001 From: Wenli Looi Date: Sun, 23 Jan 2022 23:13:31 -0800 Subject: [PATCH 063/380] MIPS: ath79: add support for QCN550x Adds support for QCN550x, which appears to be very similar to QCA956x. Signed-off-by: Wenli Looi Signed-off-by: Thomas Bogendoerfer --- arch/mips/ath79/early_printk.c | 1 + arch/mips/ath79/setup.c | 6 ++++++ arch/mips/include/asm/mach-ath79/ar71xx_regs.h | 1 + 3 files changed, 8 insertions(+) diff --git a/arch/mips/ath79/early_printk.c b/arch/mips/ath79/early_printk.c index 782732cd1a2b..8751d067f98f 100644 --- a/arch/mips/ath79/early_printk.c +++ b/arch/mips/ath79/early_printk.c @@ -121,6 +121,7 @@ static void prom_putchar_init(void) case REV_ID_MAJOR_QCA9558: case REV_ID_MAJOR_TP9343: case REV_ID_MAJOR_QCA956X: + case REV_ID_MAJOR_QCN550X: _prom_putchar = prom_putchar_ar71xx; break; diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c index 0ac435fe2dc9..45d4d717e47c 100644 --- a/arch/mips/ath79/setup.c +++ b/arch/mips/ath79/setup.c @@ -168,6 +168,12 @@ static void __init ath79_detect_sys_type(void) rev = id & QCA956X_REV_ID_REVISION_MASK; break; + case REV_ID_MAJOR_QCN550X: + ath79_soc = ATH79_SOC_QCA956X; + chip = "550X"; + rev = id & QCA956X_REV_ID_REVISION_MASK; + break; + case REV_ID_MAJOR_TP9343: ath79_soc = ATH79_SOC_TP9343; chip = "9343"; diff --git a/arch/mips/include/asm/mach-ath79/ar71xx_regs.h b/arch/mips/include/asm/mach-ath79/ar71xx_regs.h index 1f9e571af67c..5f837060724e 100644 --- a/arch/mips/include/asm/mach-ath79/ar71xx_regs.h +++ b/arch/mips/include/asm/mach-ath79/ar71xx_regs.h @@ -862,6 +862,7 @@ #define REV_ID_MAJOR_QCA9558 0x1130 #define REV_ID_MAJOR_TP9343 0x0150 #define REV_ID_MAJOR_QCA956X 0x1150 +#define REV_ID_MAJOR_QCN550X 0x2170 #define AR71XX_REV_ID_MINOR_MASK 0x3 #define AR71XX_REV_ID_MINOR_AR7130 0x0 From 0e96ea5c3eb5904e5dc2f3d414e2aba361933b87 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 25 Jan 2022 15:19:24 -0700 Subject: [PATCH 064/380] MIPS: Loongson64: Clean up use of cc-ifversion This Makefile checks that GCC is 4.9 or newer, which is redundant after commit 76ae847497bc ("Documentation: raise minimum supported version of GCC to 5.1"), so cc-option can be removed. Clang does not support -march=loongson3a so it needs to continue to use -march=mips64r2, which no longer needs cc-option because all supported clang versions recognize this flag. -march=loonson3a can be used unconditionally with GCC because the minimum supported GCC version has been bumped past 4.9.0, which won't have the bug mentioned in the comment. The _MIPS_ISA macro redefinition can be removed at the same time for the same reason. Suggested-by: Jiaxun Yang Suggested-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Thomas Bogendoerfer --- arch/mips/loongson64/Platform | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform index 3e660d6d3c2b..f2a08a185ee2 100644 --- a/arch/mips/loongson64/Platform +++ b/arch/mips/loongson64/Platform @@ -5,24 +5,9 @@ cflags-$(CONFIG_CPU_LOONGSON64) += -Wa,--trap -# -# binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a -# as MIPS64 R2; older versions as just R1. This leaves the possibility open -# that GCC might generate R2 code for -march=loongson3a which then is rejected -# by GAS. The cc-option can't probe for this behaviour so -march=loongson3a -# can't easily be used safely within the kbuild framework. -# -ifeq ($(call cc-ifversion, -ge, 0409, y), y) - ifeq ($(call ld-ifversion, -ge, 22500, y), y) - cflags-$(CONFIG_CPU_LOONGSON64) += \ - $(call cc-option,-march=loongson3a -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64) - else - cflags-$(CONFIG_CPU_LOONGSON64) += \ - $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64) - endif -else - cflags-$(CONFIG_CPU_LOONGSON64) += \ - $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64) +ifdef CONFIG_CPU_LOONGSON64 +cflags-$(CONFIG_CC_IS_GCC) += -march=loongson3a +cflags-$(CONFIG_CC_IS_CLANG) += -march=mips64r2 endif # Some -march= flags enable MMI instructions, and GCC complains about that From d49fc69293f2022785f6ee1c3c7d565271abe393 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 25 Jan 2022 15:19:25 -0700 Subject: [PATCH 065/380] MIPS: Loongson{2ef,64}: Wrap -mno-branch-likely with cc-option This flag is not supported by clang, which results in a warning: clang-14: warning: argument unused during compilation: '-mno-branch-likely' [-Wunused-command-line-argument] This breaks cc-option, which adds -Werror to make this warning fatal and catch flags that are not supported. Wrap this flag in cc-option so that it does not cause cc-option to fail, which can cause randconfigs to be really noisy, due to warnings not getting disabled that should be. Additionally, move the cc-option check to Kconfig so that the check is done at configuration time, rather than build time, as builds with no configuration change will be quicker because the cc-option call will not need to happen in those instances. Suggested-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Thomas Bogendoerfer --- arch/mips/Kconfig | 4 ++++ arch/mips/loongson2ef/Platform | 3 ++- arch/mips/loongson64/Platform | 3 ++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 058446f01487..da6b3e0501a7 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -3202,6 +3202,10 @@ config MIPS32_N32 If unsure, say N. +config CC_HAS_MNO_BRANCH_LIKELY + def_bool y + depends on $(cc-option,-mno-branch-likely) + menu "Power management options" config ARCH_HIBERNATION_POSSIBLE diff --git a/arch/mips/loongson2ef/Platform b/arch/mips/loongson2ef/Platform index 50e659aca543..eebabf9df6ac 100644 --- a/arch/mips/loongson2ef/Platform +++ b/arch/mips/loongson2ef/Platform @@ -41,6 +41,7 @@ cflags-y += $(call cc-option,-mno-loongson-mmi) # Loongson Machines' Support # -cflags-$(CONFIG_MACH_LOONGSON2EF) += -I$(srctree)/arch/mips/include/asm/mach-loongson2ef -mno-branch-likely +cflags-$(CONFIG_MACH_LOONGSON2EF) += -I$(srctree)/arch/mips/include/asm/mach-loongson2ef +cflags-$(CONFIG_CC_HAS_MNO_BRANCH_LIKELY) += -mno-branch-likely load-$(CONFIG_LEMOTE_FULOONG2E) += 0xffffffff80100000 load-$(CONFIG_LEMOTE_MACH2F) += 0xffffffff80200000 diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform index f2a08a185ee2..473404cae1c4 100644 --- a/arch/mips/loongson64/Platform +++ b/arch/mips/loongson64/Platform @@ -18,5 +18,6 @@ cflags-y += $(call cc-option,-mno-loongson-mmi) # Loongson Machines' Support # -cflags-$(CONFIG_MACH_LOONGSON64) += -I$(srctree)/arch/mips/include/asm/mach-loongson64 -mno-branch-likely +cflags-$(CONFIG_MACH_LOONGSON64) += -I$(srctree)/arch/mips/include/asm/mach-loongson64 +cflags-$(CONFIG_CC_HAS_MNO_BRANCH_LIKELY) += -mno-branch-likely load-$(CONFIG_CPU_LOONGSON64) += 0xffffffff80200000 From e0a8b93efa2382d370be44bf289157de7e5dacb4 Mon Sep 17 00:00:00 2001 From: Nemanja Rakovic Date: Mon, 31 Jan 2022 11:17:09 +0100 Subject: [PATCH 066/380] mips: Enable KCSAN This patch enables KCSAN for the 64-bit version. Updated rules for the incompatible compilation units (vdso, boot/compressed). Signed-off-by: Nemanja Rakovic Signed-off-by: Thomas Bogendoerfer --- arch/mips/Kconfig | 1 + arch/mips/boot/compressed/Makefile | 1 + arch/mips/vdso/Makefile | 3 +++ 3 files changed, 5 insertions(+) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index da6b3e0501a7..c6672547ac5e 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -101,6 +101,7 @@ config MIPS select TRACE_IRQFLAGS_SUPPORT select VIRT_TO_BUS select ARCH_HAS_ELFCORE_COMPAT + select HAVE_ARCH_KCSAN if 64BIT config MIPS_FIXUP_BIGPHYS_ADDR bool diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index 5a15d51e8884..a35f78212ea9 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile @@ -38,6 +38,7 @@ KBUILD_AFLAGS := $(KBUILD_AFLAGS) -D__ASSEMBLY__ \ KCOV_INSTRUMENT := n GCOV_PROFILE := n UBSAN_SANITIZE := n +KASAN_SANITIZE := n # decompressor objects (linked with vmlinuz) vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o $(obj)/bswapsi.o diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index d65f55f67e19..f72658b3a53f 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -1,6 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 # Objects to go into the VDSO. +# Sanitizer runtimes are unavailable and cannot be linked here. + KCSAN_SANITIZE := n + # Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before # the inclusion of generic Makefile. ARCH_REL_TYPE_ABS := R_MIPS_JUMP_SLOT|R_MIPS_GLOB_DAT From 98b64741d61124a12fb05a7595acb1fd6c1dc55d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 7 Feb 2022 23:50:48 +0100 Subject: [PATCH 067/380] iommu/arm-smmu-v3: Avoid open coded arithmetic in memory allocation kmalloc_array()/kcalloc() should be used to avoid potential overflow when a multiplication is needed to compute the size of the requested memory. So turn a devm_kzalloc()+explicit size computation into an equivalent devm_kcalloc(). Signed-off-by: Christophe JAILLET Acked-by: Robin Murphy Link: https://lore.kernel.org/r/3f7b9b202c6b6f5edc234ab7af5f208fbf8bc944.1644274051.git.christophe.jaillet@wanadoo.fr Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 6dc6d8b6b368..14d06aad0726 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2981,10 +2981,10 @@ static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu) { unsigned int i; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; - size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents; void *strtab = smmu->strtab_cfg.strtab; - cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL); + cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents, + sizeof(*cfg->l1_desc), GFP_KERNEL); if (!cfg->l1_desc) return -ENOMEM; From fcdeb8c34043a192c881b0594b26d195d5e57997 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 7 Feb 2022 23:47:55 +0100 Subject: [PATCH 068/380] iommu/arm-smmu-v3: Simplify memory allocation Use devm_bitmap_zalloc() instead of hand writing it. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/598bd905103dcbe5653a54bb0dfb5a8597728214.1644274051.git.christophe.jaillet@wanadoo.fr Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 22 +++++---------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 14d06aad0726..bbc4eeb42811 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2911,32 +2911,20 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, return 0; } -static void arm_smmu_cmdq_free_bitmap(void *data) -{ - unsigned long *bitmap = data; - bitmap_free(bitmap); -} - static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu) { - int ret = 0; struct arm_smmu_cmdq *cmdq = &smmu->cmdq; unsigned int nents = 1 << cmdq->q.llq.max_n_shift; - atomic_long_t *bitmap; atomic_set(&cmdq->owner_prod, 0); atomic_set(&cmdq->lock, 0); - bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL); - if (!bitmap) { - dev_err(smmu->dev, "failed to allocate cmdq bitmap\n"); - ret = -ENOMEM; - } else { - cmdq->valid_map = bitmap; - devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap); - } + cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents, + GFP_KERNEL); + if (!cmdq->valid_map) + return -ENOMEM; - return ret; + return 0; } static int arm_smmu_init_queues(struct arm_smmu_device *smmu) From 93665e0275a2e2badfcfca5ada3b78b22df4a01a Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Wed, 5 Jan 2022 10:16:19 +0000 Subject: [PATCH 069/380] iommu/arm-smmu: Add missing pm_runtime_disable() in qcom_iommu_device_probe If the probe fails, we should use pm_runtime_disable() to balance pm_runtime_enable(). Add missing pm_runtime_disable() for error handling. Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20220105101619.29108-1-linmq006@gmail.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/qcom_iommu.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index b91874cb6cf3..2f227bc88bd9 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -827,20 +827,20 @@ static int qcom_iommu_device_probe(struct platform_device *pdev) ret = devm_of_platform_populate(dev); if (ret) { dev_err(dev, "Failed to populate iommu contexts\n"); - return ret; + goto err_pm_disable; } ret = iommu_device_sysfs_add(&qcom_iommu->iommu, dev, NULL, dev_name(dev)); if (ret) { dev_err(dev, "Failed to register iommu in sysfs\n"); - return ret; + goto err_pm_disable; } ret = iommu_device_register(&qcom_iommu->iommu, &qcom_iommu_ops, dev); if (ret) { dev_err(dev, "Failed to register iommu\n"); - return ret; + goto err_pm_disable; } bus_set_iommu(&platform_bus_type, &qcom_iommu_ops); @@ -852,6 +852,10 @@ static int qcom_iommu_device_probe(struct platform_device *pdev) } return 0; + +err_pm_disable: + pm_runtime_disable(dev); + return ret; } static int qcom_iommu_device_remove(struct platform_device *pdev) From 30de2b541af98179780054836b48825fcfba4408 Mon Sep 17 00:00:00 2001 From: Zhou Guanghui Date: Wed, 19 Jan 2022 07:07:54 +0000 Subject: [PATCH 070/380] iommu/arm-smmu-v3: fix event handling soft lockup During event processing, events are read from the event queue one by one until the queue is empty.If the master device continuously requests address access at the same time and the SMMU generates events, the cyclic processing of the event takes a long time and softlockup warnings may be reported. arm-smmu-v3 arm-smmu-v3.34.auto: event 0x0a received: arm-smmu-v3 arm-smmu-v3.34.auto: 0x00007f220000280a arm-smmu-v3 arm-smmu-v3.34.auto: 0x000010000000007e arm-smmu-v3 arm-smmu-v3.34.auto: 0x00000000034e8670 watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [irq/268-arm-smm:247] Call trace: _dev_info+0x7c/0xa0 arm_smmu_evtq_thread+0x1c0/0x230 irq_thread_fn+0x30/0x80 irq_thread+0x128/0x210 kthread+0x134/0x138 ret_from_fork+0x10/0x1c Kernel panic - not syncing: softlockup: hung tasks Fix this by calling cond_resched() after the event information is printed. Signed-off-by: Zhou Guanghui Link: https://lore.kernel.org/r/20220119070754.26528-1-zhouguanghui1@huawei.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index bbc4eeb42811..e92974173861 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1558,6 +1558,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) dev_info(smmu->dev, "\t0x%016llx\n", (unsigned long long)evt[i]); + cond_resched(); } /* From c47c7ab9b53635860c6b48736efdd22822d726d7 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 8 Feb 2022 12:17:26 -0700 Subject: [PATCH 071/380] MIPS: Malta: Enable BLK_DEV_INITRD This configuration is useful for boot testing malta_defconfig in QEMU with just a simple cpio initrd, instead of a full ext4 rootfs. This results in an increase of ~164KB of vmlinux (with GCC 11.2.0): $ diskus vmlinux.before 11.19 MB (11,194,368 bytes) $ diskus vmlinux.after 11.36 MB (11,358,208 bytes) This size increase comes from the fact that usr/Kconfig is sourced when CONFIG_BLK_DEV_INITRD is enabled, which defaults to supporting several decompression algorithms for compressed initrds. This seems like a reasonable tradeoff but these configurations could be disabled in the future if there are complaints about the size increase. Signed-off-by: Nathan Chancellor Reviewed-by: Kees Cook Signed-off-by: Thomas Bogendoerfer --- arch/mips/configs/malta_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig index 3321bb576944..3456ac8ded6c 100644 --- a/arch/mips/configs/malta_defconfig +++ b/arch/mips/configs/malta_defconfig @@ -4,6 +4,7 @@ CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=15 CONFIG_NAMESPACES=y CONFIG_RELAY=y +CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y # CONFIG_COMPAT_BRK is not set CONFIG_SLAB=y From dc306186a130c6d9feb0aabc1c71b8ed1674a3bf Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Sat, 29 Jan 2022 08:38:56 +0100 Subject: [PATCH 072/380] s390/dump: fix old lowcore virtual vs physical address confusion Virtual addresses of vmcore_info and os_info members are wrongly passed to copy_oldmem_kernel(), while the function expects physical address of the source. Instead, __pa() macro should have been applied. Yet, use of __pa() macro could be somehow confusing, since copy_oldmem_kernel() may treat the source as an offset, not as a direct physical address (that depens from the oldmem availability and location). Fix the virtual vs physical address confusion and make the way the old lowcore is read consistent across all sources. Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- arch/s390/kernel/asm-offsets.c | 2 ++ arch/s390/kernel/crash_dump.c | 2 +- arch/s390/kernel/os_info.c | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 8e00bb228662..a496b08ea5d1 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -131,6 +131,8 @@ int main(void) OFFSET(__LC_LAST_BREAK, lowcore, last_break); /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ OFFSET(__LC_DUMP_REIPL, lowcore, ipib); + OFFSET(__LC_VMCORE_INFO, lowcore, vmcore_info); + OFFSET(__LC_OS_INFO, lowcore, os_info); /* hardware defined lowcore locations 0x1000 - 0x18ff */ OFFSET(__LC_MCESAD, lowcore, mcesad); OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2); diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index af8202121642..a62bee83a88b 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -432,7 +432,7 @@ static void *get_vmcoreinfo_old(unsigned long *size) Elf64_Nhdr note; void *addr; - if (copy_oldmem_kernel(&addr, &S390_lowcore.vmcore_info, sizeof(addr))) + if (copy_oldmem_kernel(&addr, (void *)__LC_VMCORE_INFO, sizeof(addr))) return NULL; memset(nt_name, 0, sizeof(nt_name)); if (copy_oldmem_kernel(¬e, addr, sizeof(note))) diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c index fa2be71b2c6b..a2b08d3f53ec 100644 --- a/arch/s390/kernel/os_info.c +++ b/arch/s390/kernel/os_info.c @@ -15,6 +15,7 @@ #include #include #include +#include /* * OS info structure has to be page aligned @@ -123,7 +124,7 @@ static void os_info_old_init(void) return; if (!oldmem_data.start) goto fail; - if (copy_oldmem_kernel(&addr, &S390_lowcore.os_info, sizeof(addr))) + if (copy_oldmem_kernel(&addr, (void *)__LC_OS_INFO, sizeof(addr))) goto fail; if (addr == 0 || addr % PAGE_SIZE) goto fail; From 303fd988ed644c7daa260410f3ac99266573557d Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Sat, 29 Jan 2022 09:24:50 +0100 Subject: [PATCH 073/380] s390/maccess: fix semantics of memcpy_real() and its callers There is a confusion with regard to the source address of memcpy_real() and calling functions. While the declared type for a source assumes a virtual address, in fact it always called with physical address of the source. This confusion led to bugs in copy_oldmem_kernel() and copy_oldmem_user() functions, where __pa() macro applied mistakenly to physical addresses. It does not lead to a real issue, since virtual and physical addresses are currently the same. Fix both the bugs and memcpy_real() prototype by making type of source address consistent to the function name and the way it actually used. Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/os_info.h | 2 +- arch/s390/include/asm/processor.h | 2 +- arch/s390/include/asm/uaccess.h | 2 +- arch/s390/kernel/crash_dump.c | 58 +++++++++++++++---------------- arch/s390/kernel/os_info.c | 7 ++-- arch/s390/kernel/smp.c | 2 +- arch/s390/mm/maccess.c | 4 +-- drivers/s390/char/zcore.c | 3 +- 8 files changed, 38 insertions(+), 42 deletions(-) diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h index 3c89279d2a4b..147a8d547ef9 100644 --- a/arch/s390/include/asm/os_info.h +++ b/arch/s390/include/asm/os_info.h @@ -39,7 +39,7 @@ u32 os_info_csum(struct os_info *os_info); #ifdef CONFIG_CRASH_DUMP void *os_info_old_entry(int nr, unsigned long *size); -int copy_oldmem_kernel(void *dst, void *src, size_t count); +int copy_oldmem_kernel(void *dst, unsigned long src, size_t count); #else static inline void *os_info_old_entry(int nr, unsigned long *size) { diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 5581b64a4236..8fd9772c7370 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -317,7 +317,7 @@ extern void (*s390_base_pgm_handler_fn)(void); #define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL -extern int memcpy_real(void *, void *, size_t); +extern int memcpy_real(void *, unsigned long, size_t); extern void memcpy_absolute(void *, void *, size_t); #define mem_assign_absolute(dest, val) do { \ diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index d74e26b48604..f14f4ade15a9 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -279,7 +279,7 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo return __clear_user(to, n); } -int copy_to_user_real(void __user *dest, void *src, unsigned long count); +int copy_to_user_real(void __user *dest, unsigned long src, unsigned long count); void *s390_kernel_write(void *dst, const void *src, size_t size); #define HAVE_GET_KERNEL_NOFAULT diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index a62bee83a88b..69819b765250 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -132,28 +132,27 @@ static inline void *load_real_addr(void *addr) /* * Copy memory of the old, dumped system to a kernel space virtual address */ -int copy_oldmem_kernel(void *dst, void *src, size_t count) +int copy_oldmem_kernel(void *dst, unsigned long src, size_t count) { - unsigned long from, len; + unsigned long len; void *ra; int rc; while (count) { - from = __pa(src); - if (!oldmem_data.start && from < sclp.hsa_size) { + if (!oldmem_data.start && src < sclp.hsa_size) { /* Copy from zfcp/nvme dump HSA area */ - len = min(count, sclp.hsa_size - from); - rc = memcpy_hsa_kernel(dst, from, len); + len = min(count, sclp.hsa_size - src); + rc = memcpy_hsa_kernel(dst, src, len); if (rc) return rc; } else { /* Check for swapped kdump oldmem areas */ - if (oldmem_data.start && from - oldmem_data.start < oldmem_data.size) { - from -= oldmem_data.start; - len = min(count, oldmem_data.size - from); - } else if (oldmem_data.start && from < oldmem_data.size) { - len = min(count, oldmem_data.size - from); - from += oldmem_data.start; + if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) { + src -= oldmem_data.start; + len = min(count, oldmem_data.size - src); + } else if (oldmem_data.start && src < oldmem_data.size) { + len = min(count, oldmem_data.size - src); + src += oldmem_data.start; } else { len = count; } @@ -163,7 +162,7 @@ int copy_oldmem_kernel(void *dst, void *src, size_t count) } else { ra = dst; } - if (memcpy_real(ra, (void *) from, len)) + if (memcpy_real(ra, src, len)) return -EFAULT; } dst += len; @@ -176,31 +175,30 @@ int copy_oldmem_kernel(void *dst, void *src, size_t count) /* * Copy memory of the old, dumped system to a user space virtual address */ -static int copy_oldmem_user(void __user *dst, void *src, size_t count) +static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count) { - unsigned long from, len; + unsigned long len; int rc; while (count) { - from = __pa(src); - if (!oldmem_data.start && from < sclp.hsa_size) { + if (!oldmem_data.start && src < sclp.hsa_size) { /* Copy from zfcp/nvme dump HSA area */ - len = min(count, sclp.hsa_size - from); - rc = memcpy_hsa_user(dst, from, len); + len = min(count, sclp.hsa_size - src); + rc = memcpy_hsa_user(dst, src, len); if (rc) return rc; } else { /* Check for swapped kdump oldmem areas */ - if (oldmem_data.start && from - oldmem_data.start < oldmem_data.size) { - from -= oldmem_data.start; - len = min(count, oldmem_data.size - from); - } else if (oldmem_data.start && from < oldmem_data.size) { - len = min(count, oldmem_data.size - from); - from += oldmem_data.start; + if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) { + src -= oldmem_data.start; + len = min(count, oldmem_data.size - src); + } else if (oldmem_data.start && src < oldmem_data.size) { + len = min(count, oldmem_data.size - src); + src += oldmem_data.start; } else { len = count; } - rc = copy_to_user_real(dst, (void *) from, count); + rc = copy_to_user_real(dst, src, count); if (rc) return rc; } @@ -217,12 +215,12 @@ static int copy_oldmem_user(void __user *dst, void *src, size_t count) ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, unsigned long offset, int userbuf) { - void *src; + unsigned long src; int rc; if (!csize) return 0; - src = (void *) (pfn << PAGE_SHIFT) + offset; + src = pfn_to_phys(pfn) + offset; if (userbuf) rc = copy_oldmem_user((void __force __user *) buf, src, csize); else @@ -429,10 +427,10 @@ static void *nt_prpsinfo(void *ptr) static void *get_vmcoreinfo_old(unsigned long *size) { char nt_name[11], *vmcoreinfo; + unsigned long addr; Elf64_Nhdr note; - void *addr; - if (copy_oldmem_kernel(&addr, (void *)__LC_VMCORE_INFO, sizeof(addr))) + if (copy_oldmem_kernel(&addr, __LC_VMCORE_INFO, sizeof(addr))) return NULL; memset(nt_name, 0, sizeof(nt_name)); if (copy_oldmem_kernel(¬e, addr, sizeof(note))) diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c index a2b08d3f53ec..6b5b64e67eee 100644 --- a/arch/s390/kernel/os_info.c +++ b/arch/s390/kernel/os_info.c @@ -91,7 +91,7 @@ static void os_info_old_alloc(int nr, int align) goto fail; } buf_align = PTR_ALIGN(buf, align); - if (copy_oldmem_kernel(buf_align, (void *) addr, size)) { + if (copy_oldmem_kernel(buf_align, addr, size)) { msg = "copy failed"; goto fail_free; } @@ -124,15 +124,14 @@ static void os_info_old_init(void) return; if (!oldmem_data.start) goto fail; - if (copy_oldmem_kernel(&addr, (void *)__LC_OS_INFO, sizeof(addr))) + if (copy_oldmem_kernel(&addr, __LC_OS_INFO, sizeof(addr))) goto fail; if (addr == 0 || addr % PAGE_SIZE) goto fail; os_info_old = kzalloc(sizeof(*os_info_old), GFP_KERNEL); if (!os_info_old) goto fail; - if (copy_oldmem_kernel(os_info_old, (void *) addr, - sizeof(*os_info_old))) + if (copy_oldmem_kernel(os_info_old, addr, sizeof(*os_info_old))) goto fail_free; if (os_info_old->magic != OS_INFO_MAGIC) goto fail_free; diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index b214af5164bd..4f0e9f412f27 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -670,7 +670,7 @@ static __init void smp_save_cpu_regs(struct save_area *sa, u16 addr, bool is_boot_cpu, void *regs) { if (is_boot_cpu) - copy_oldmem_kernel(regs, (void *) __LC_FPREGS_SAVE_AREA, 512); + copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512); else __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(regs)); save_area_add_regs(sa, regs); diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index d4d311cb2bb5..4cc5020f4e18 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -121,7 +121,7 @@ static unsigned long __no_sanitize_address _memcpy_real(unsigned long dest, /* * Copy memory in real mode (kernel to kernel) */ -int memcpy_real(void *dest, void *src, size_t count) +int memcpy_real(void *dest, unsigned long src, size_t count) { unsigned long _dest = (unsigned long)dest; unsigned long _src = (unsigned long)src; @@ -173,7 +173,7 @@ void memcpy_absolute(void *dest, void *src, size_t count) /* * Copy memory from kernel (real) to user (virtual) */ -int copy_to_user_real(void __user *dest, void *src, unsigned long count) +int copy_to_user_real(void __user *dest, unsigned long src, unsigned long count) { int offs = 0, size, rc; char *buf; diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 3ba2d934a3e8..516783ba950f 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -229,8 +229,7 @@ static int __init zcore_reipl_init(void) rc = memcpy_hsa_kernel(zcore_ipl_block, ipib_info.ipib, PAGE_SIZE); else - rc = memcpy_real(zcore_ipl_block, (void *) ipib_info.ipib, - PAGE_SIZE); + rc = memcpy_real(zcore_ipl_block, ipib_info.ipib, PAGE_SIZE); if (rc || (__force u32)csum_partial(zcore_ipl_block, zcore_ipl_block->hdr.len, 0) != ipib_info.checksum) { TRACE("Checksum does not match\n"); From f413f685c6c094a7b968f66ca2e8512720807203 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 7 Feb 2022 14:02:18 +0100 Subject: [PATCH 074/380] s390/mm: use CRST_ALLOC_ORDER instead of number Use CRST_ALLOC_ORDER to make it more obvious what the order means, and also to be consistent with other code, e.g. the vmemmap code. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/pgalloc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index fd35c1a0213b..2de48b2c1b04 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -53,17 +53,17 @@ __initcall(page_table_register_sysctl); unsigned long *crst_table_alloc(struct mm_struct *mm) { - struct page *page = alloc_pages(GFP_KERNEL, 2); + struct page *page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); if (!page) return NULL; - arch_set_page_dat(page, 2); + arch_set_page_dat(page, CRST_ALLOC_ORDER); return (unsigned long *) page_to_virt(page); } void crst_table_free(struct mm_struct *mm, unsigned long *table) { - free_pages((unsigned long) table, 2); + free_pages((unsigned long)table, CRST_ALLOC_ORDER); } static void __crst_table_upgrade(void *arg) @@ -403,7 +403,7 @@ void __tlb_remove_table(void *_table) switch (half) { case 0x00U: /* pmd, pud, or p4d */ - free_pages((unsigned long) table, 2); + free_pages((unsigned long)table, CRST_ALLOC_ORDER); return; case 0x01U: /* lower 2K of a 4K page table */ case 0x02U: /* higher 2K of a 4K page table */ From 4ee83a2cfbc46c13f2a08fe6d48dbcede53cdbf8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 20 Dec 2021 16:38:44 +0000 Subject: [PATCH 075/380] powerpc/ftrace: Remove ftrace_32.S Functions in ftrace_32.S are common with PPC64. Reuse the ones defined for PPC64 with slight modification when required. Signed-off-by: Christophe Leroy [mpe: Squash in fixup diff from Christophe] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5e837fc190504c4ef834272e70d60ae33f175d49.1640017960.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/trace/Makefile | 6 +- arch/powerpc/kernel/trace/ftrace_32.S | 152 ------------------ .../trace/{ftrace_64.S => ftrace_low.S} | 14 ++ ...ftrace_64_mprofile.S => ftrace_mprofile.S} | 0 4 files changed, 17 insertions(+), 155 deletions(-) delete mode 100644 arch/powerpc/kernel/trace/ftrace_32.S rename arch/powerpc/kernel/trace/{ftrace_64.S => ftrace_low.S} (85%) rename arch/powerpc/kernel/trace/{ftrace_64_mprofile.S => ftrace_mprofile.S} (100%) diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile index 858503775c58..542aa7a8b2b4 100644 --- a/arch/powerpc/kernel/trace/Makefile +++ b/arch/powerpc/kernel/trace/Makefile @@ -8,13 +8,13 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) endif -obj32-$(CONFIG_FUNCTION_TRACER) += ftrace_32.o -obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64.o +obj32-$(CONFIG_FUNCTION_TRACER) += ftrace_mprofile.o ifdef CONFIG_MPROFILE_KERNEL -obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_mprofile.o +obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_mprofile.o else obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_pg.o endif +obj-$(CONFIG_FUNCTION_TRACER) += ftrace_low.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S deleted file mode 100644 index 2b425da97a6b..000000000000 --- a/arch/powerpc/kernel/trace/ftrace_32.S +++ /dev/null @@ -1,152 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Split from entry_32.S - */ - -#include -#include -#include -#include -#include -#include -#include - -_GLOBAL(mcount) -_GLOBAL(_mcount) - /* - * It is required that _mcount on PPC32 must preserve the - * link register. But we have r12 to play with. We use r12 - * to push the return address back to the caller of mcount - * into the ctr register, restore the link register and - * then jump back using the ctr register. - */ - mflr r12 - mtctr r12 - mtlr r0 - bctr -EXPORT_SYMBOL(_mcount) - -_GLOBAL(ftrace_caller) - stwu r1, -INT_FRAME_SIZE(r1) - - SAVE_GPRS(3, 10, r1) - - addi r8, r1, INT_FRAME_SIZE - stw r8, GPR1(r1) - - mflr r3 - stw r3, _NIP(r1) - subi r3, r3, MCOUNT_INSN_SIZE - - stw r0, _LINK(r1) - mr r4, r0 - - lis r5,function_trace_op@ha - lwz r5,function_trace_op@l(r5) - - addi r6, r1, STACK_FRAME_OVERHEAD -.globl ftrace_call -ftrace_call: - bl ftrace_stub - nop - - lwz r3, _NIP(r1) - mtctr r3 - - REST_GPRS(3, 10, r1) - - lwz r0, _LINK(r1) - mtlr r0 - - addi r1, r1, INT_FRAME_SIZE - /* old link register ends up in ctr reg */ - bctr - - -_GLOBAL(ftrace_stub) - blr - -_GLOBAL(ftrace_regs_caller) - /* Create our stack frame + pt_regs */ - stwu r1,-INT_FRAME_SIZE(r1) - - /* Save all gprs to pt_regs */ - stw r0, GPR0(r1) - stmw r2, GPR2(r1) - - /* Save previous stack pointer (r1) */ - addi r8, r1, INT_FRAME_SIZE - stw r8, GPR1(r1) - - /* Load special regs for save below */ - mfmsr r8 - mfctr r9 - mfxer r10 - mfcr r11 - - /* Get the _mcount() call site out of LR */ - mflr r7 - /* Save it as pt_regs->nip */ - stw r7, _NIP(r1) - /* Save the read LR in pt_regs->link */ - stw r0, _LINK(r1) - - lis r3,function_trace_op@ha - lwz r5,function_trace_op@l(r3) - - /* Calculate ip from nip-4 into r3 for call below */ - subi r3, r7, MCOUNT_INSN_SIZE - - /* Put the original return address in r4 as parent_ip */ - mr r4, r0 - - /* Save special regs */ - stw r8, _MSR(r1) - stw r9, _CTR(r1) - stw r10, _XER(r1) - stw r11, _CCR(r1) - - /* Load &pt_regs in r6 for call below */ - addi r6, r1, STACK_FRAME_OVERHEAD - - /* ftrace_call(r3, r4, r5, r6) */ -.globl ftrace_regs_call -ftrace_regs_call: - bl ftrace_stub - nop - - /* Load ctr with the possibly modified NIP */ - lwz r3, _NIP(r1) - mtctr r3 - - /* Restore gprs */ - lmw r2, GPR2(r1) - - /* Restore possibly modified LR */ - lwz r0, _LINK(r1) - mtlr r0 - - /* Pop our stack frame */ - addi r1, r1, INT_FRAME_SIZE - /* old link register ends up in ctr reg */ - bctr - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -_GLOBAL(return_to_handler) - /* need to save return values */ - stwu r1, -16(r1) - stw r3, 8(r1) - stw r4, 12(r1) - - bl ftrace_return_to_handler - - /* return value has real return address */ - mtlr r3 - - lwz r3, 8(r1) - lwz r4, 12(r1) - addi r1, r1, 16 - - /* Jump back to real return address */ - blr -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/trace/ftrace_64.S b/arch/powerpc/kernel/trace/ftrace_low.S similarity index 85% rename from arch/powerpc/kernel/trace/ftrace_64.S rename to arch/powerpc/kernel/trace/ftrace_low.S index 25e5b9e47c06..0bddf1fa6636 100644 --- a/arch/powerpc/kernel/trace/ftrace_64.S +++ b/arch/powerpc/kernel/trace/ftrace_low.S @@ -10,6 +10,7 @@ #include #include +#ifdef CONFIG_PPC64 .pushsection ".tramp.ftrace.text","aw",@progbits; .globl ftrace_tramp_text ftrace_tramp_text: @@ -21,6 +22,7 @@ ftrace_tramp_text: ftrace_tramp_init: .space 64 .popsection +#endif _GLOBAL(mcount) _GLOBAL(_mcount) @@ -33,6 +35,7 @@ EXPORT_SYMBOL(_mcount) #ifdef CONFIG_FUNCTION_GRAPH_TRACER _GLOBAL(return_to_handler) /* need to save return values */ +#ifdef CONFIG_PPC64 std r4, -32(r1) std r3, -24(r1) /* save TOC */ @@ -46,6 +49,11 @@ _GLOBAL(return_to_handler) * Switch to our TOC to run inside the core kernel. */ ld r2, PACATOC(r13) +#else + stwu r1, -16(r1) + stw r3, 8(r1) + stw r4, 12(r1) +#endif bl ftrace_return_to_handler nop @@ -53,11 +61,17 @@ _GLOBAL(return_to_handler) /* return value has real return address */ mtlr r3 +#ifdef CONFIG_PPC64 ld r1, 0(r1) ld r4, -32(r1) ld r3, -24(r1) ld r2, -16(r1) ld r31, -8(r1) +#else + lwz r3, 8(r1) + lwz r4, 12(r1) + addi r1, r1, 16 +#endif /* Jump back to real return address */ blr diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S similarity index 100% rename from arch/powerpc/kernel/trace/ftrace_64_mprofile.S rename to arch/powerpc/kernel/trace/ftrace_mprofile.S From a4c182ecf33584b9b2d1aa9dad073014a504c01f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 24 Dec 2021 11:07:33 +0000 Subject: [PATCH 076/380] powerpc/set_memory: Avoid spinlock recursion in change_page_attr() Commit 1f9ad21c3b38 ("powerpc/mm: Implement set_memory() routines") included a spin_lock() to change_page_attr() in order to safely perform the three step operations. But then commit 9f7853d7609d ("powerpc/mm: Fix set_memory_*() against concurrent accesses") modify it to use pte_update() and do the operation safely against concurrent access. In the meantime, Maxime reported some spinlock recursion. [ 15.351649] BUG: spinlock recursion on CPU#0, kworker/0:2/217 [ 15.357540] lock: init_mm+0x3c/0x420, .magic: dead4ead, .owner: kworker/0:2/217, .owner_cpu: 0 [ 15.366563] CPU: 0 PID: 217 Comm: kworker/0:2 Not tainted 5.15.0+ #523 [ 15.373350] Workqueue: events do_free_init [ 15.377615] Call Trace: [ 15.380232] [e4105ac0] [800946a4] do_raw_spin_lock+0xf8/0x120 (unreliable) [ 15.387340] [e4105ae0] [8001f4ec] change_page_attr+0x40/0x1d4 [ 15.393413] [e4105b10] [801424e0] __apply_to_page_range+0x164/0x310 [ 15.400009] [e4105b60] [80169620] free_pcp_prepare+0x1e4/0x4a0 [ 15.406045] [e4105ba0] [8016c5a0] free_unref_page+0x40/0x2b8 [ 15.411979] [e4105be0] [8018724c] kasan_depopulate_vmalloc_pte+0x6c/0x94 [ 15.418989] [e4105c00] [801424e0] __apply_to_page_range+0x164/0x310 [ 15.425451] [e4105c50] [80187834] kasan_release_vmalloc+0xbc/0x134 [ 15.431898] [e4105c70] [8015f7a8] __purge_vmap_area_lazy+0x4e4/0xdd8 [ 15.438560] [e4105d30] [80160d10] _vm_unmap_aliases.part.0+0x17c/0x24c [ 15.445283] [e4105d60] [801642d0] __vunmap+0x2f0/0x5c8 [ 15.450684] [e4105db0] [800e32d0] do_free_init+0x68/0x94 [ 15.456181] [e4105dd0] [8005d094] process_one_work+0x4bc/0x7b8 [ 15.462283] [e4105e90] [8005d614] worker_thread+0x284/0x6e8 [ 15.468227] [e4105f00] [8006aaec] kthread+0x1f0/0x210 [ 15.473489] [e4105f40] [80017148] ret_from_kernel_thread+0x14/0x1c Remove the read / modify / write sequence to make the operation atomic and remove the spin_lock() in change_page_attr(). To do the operation atomically, we can't use pte modification helpers anymore. Because all platforms have different combination of bits, it is not easy to use those bits directly. But all have the _PAGE_KERNEL_{RO/ROX/RW/RWX} set of flags. All we need it to compare two sets to know which bits are set or cleared. For instance, by comparing _PAGE_KERNEL_ROX and _PAGE_KERNEL_RO you know which bit gets cleared and which bit get set when changing exec permission. Reported-by: Maxime Bizon Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/all/20211212112152.GA27070@sakura/ Link: https://lore.kernel.org/r/43c3c76a1175ae6dc1a3d3b5c3f7ecb48f683eea.1640344012.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/pageattr.c | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c index edea388e9d3f..8812454e70ff 100644 --- a/arch/powerpc/mm/pageattr.c +++ b/arch/powerpc/mm/pageattr.c @@ -15,12 +15,14 @@ #include +static pte_basic_t pte_update_delta(pte_t *ptep, unsigned long addr, + unsigned long old, unsigned long new) +{ + return pte_update(&init_mm, addr, ptep, old & ~new, new & ~old, 0); +} + /* - * Updates the attributes of a page in three steps: - * - * 1. take the page_table_lock - * 2. install the new entry with the updated attributes - * 3. flush the TLB + * Updates the attributes of a page atomically. * * This sequence is safe against concurrent updates, and also allows updating the * attributes of a page currently being executed or accessed. @@ -28,41 +30,33 @@ static int change_page_attr(pte_t *ptep, unsigned long addr, void *data) { long action = (long)data; - pte_t pte; - spin_lock(&init_mm.page_table_lock); - - pte = ptep_get(ptep); - - /* modify the PTE bits as desired, then apply */ + /* modify the PTE bits as desired */ switch (action) { case SET_MEMORY_RO: - pte = pte_wrprotect(pte); + /* Don't clear DIRTY bit */ + pte_update_delta(ptep, addr, _PAGE_KERNEL_RW & ~_PAGE_DIRTY, _PAGE_KERNEL_RO); break; case SET_MEMORY_RW: - pte = pte_mkwrite(pte_mkdirty(pte)); + pte_update_delta(ptep, addr, _PAGE_KERNEL_RO, _PAGE_KERNEL_RW); break; case SET_MEMORY_NX: - pte = pte_exprotect(pte); + pte_update_delta(ptep, addr, _PAGE_KERNEL_ROX, _PAGE_KERNEL_RO); break; case SET_MEMORY_X: - pte = pte_mkexec(pte); + pte_update_delta(ptep, addr, _PAGE_KERNEL_RO, _PAGE_KERNEL_ROX); break; default: WARN_ON_ONCE(1); break; } - pte_update(&init_mm, addr, ptep, ~0UL, pte_val(pte), 0); - /* See ptesync comment in radix__set_pte_at() */ if (radix_enabled()) asm volatile("ptesync": : :"memory"); flush_tlb_kernel_range(addr, addr + PAGE_SIZE); - spin_unlock(&init_mm.page_table_lock); - return 0; } From f222ab83df92acf72691a2021e1f0d99880dcdf1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 24 Dec 2021 11:07:40 +0000 Subject: [PATCH 077/380] powerpc: Add set_memory_{p/np}() and remove set_memory_attr() set_memory_attr() was implemented by commit 4d1755b6a762 ("powerpc/mm: implement set_memory_attr()") because the set_memory_xx() couldn't be used at that time to modify memory "on the fly" as explained it the commit. But set_memory_attr() uses set_pte_at() which leads to warnings when CONFIG_DEBUG_VM is selected, because set_pte_at() is unexpected for updating existing page table entries. The check could be bypassed by using __set_pte_at() instead, as it was the case before commit c988cfd38e48 ("powerpc/32: use set_memory_attr()") but since commit 9f7853d7609d ("powerpc/mm: Fix set_memory_*() against concurrent accesses") it is now possible to use set_memory_xx() functions to update page table entries "on the fly" because the update is now atomic. For DEBUG_PAGEALLOC we need to clear and set back _PAGE_PRESENT. Add set_memory_np() and set_memory_p() for that. Replace all uses of set_memory_attr() by the relevant set_memory_xx() and remove set_memory_attr(). Fixes: c988cfd38e48 ("powerpc/32: use set_memory_attr()") Cc: stable@vger.kernel.org Reported-by: Maxime Bizon Signed-off-by: Christophe Leroy Tested-by: Maxime Bizon Reviewed-by: Russell Currey Depends-on: 9f7853d7609d ("powerpc/mm: Fix set_memory_*() against concurrent accesses") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/cda2b44b55c96f9ac69fa92e68c01084ec9495c5.1640344012.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/set_memory.h | 12 ++++++++- arch/powerpc/mm/pageattr.c | 39 +++++---------------------- arch/powerpc/mm/pgtable_32.c | 24 ++++++++--------- 3 files changed, 28 insertions(+), 47 deletions(-) diff --git a/arch/powerpc/include/asm/set_memory.h b/arch/powerpc/include/asm/set_memory.h index b040094f7920..7ebc807aa8cc 100644 --- a/arch/powerpc/include/asm/set_memory.h +++ b/arch/powerpc/include/asm/set_memory.h @@ -6,6 +6,8 @@ #define SET_MEMORY_RW 1 #define SET_MEMORY_NX 2 #define SET_MEMORY_X 3 +#define SET_MEMORY_NP 4 /* Set memory non present */ +#define SET_MEMORY_P 5 /* Set memory present */ int change_memory_attr(unsigned long addr, int numpages, long action); @@ -29,6 +31,14 @@ static inline int set_memory_x(unsigned long addr, int numpages) return change_memory_attr(addr, numpages, SET_MEMORY_X); } -int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot); +static inline int set_memory_np(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_NP); +} + +static inline int set_memory_p(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_P); +} #endif diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c index 8812454e70ff..85753e32a4de 100644 --- a/arch/powerpc/mm/pageattr.c +++ b/arch/powerpc/mm/pageattr.c @@ -46,6 +46,12 @@ static int change_page_attr(pte_t *ptep, unsigned long addr, void *data) case SET_MEMORY_X: pte_update_delta(ptep, addr, _PAGE_KERNEL_RO, _PAGE_KERNEL_ROX); break; + case SET_MEMORY_NP: + pte_update(&init_mm, addr, ptep, _PAGE_PRESENT, 0, 0); + break; + case SET_MEMORY_P: + pte_update(&init_mm, addr, ptep, 0, _PAGE_PRESENT, 0); + break; default: WARN_ON_ONCE(1); break; @@ -90,36 +96,3 @@ int change_memory_attr(unsigned long addr, int numpages, long action) return apply_to_existing_page_range(&init_mm, start, size, change_page_attr, (void *)action); } - -/* - * Set the attributes of a page: - * - * This function is used by PPC32 at the end of init to set final kernel memory - * protection. It includes changing the maping of the page it is executing from - * and data pages it is using. - */ -static int set_page_attr(pte_t *ptep, unsigned long addr, void *data) -{ - pgprot_t prot = __pgprot((unsigned long)data); - - spin_lock(&init_mm.page_table_lock); - - set_pte_at(&init_mm, addr, ptep, pte_modify(*ptep, prot)); - flush_tlb_kernel_range(addr, addr + PAGE_SIZE); - - spin_unlock(&init_mm.page_table_lock); - - return 0; -} - -int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot) -{ - unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE); - unsigned long sz = numpages * PAGE_SIZE; - - if (numpages <= 0) - return 0; - - return apply_to_existing_page_range(&init_mm, start, sz, set_page_attr, - (void *)pgprot_val(prot)); -} diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 906e4e4328b2..f71ededdc02a 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -135,10 +135,12 @@ void mark_initmem_nx(void) unsigned long numpages = PFN_UP((unsigned long)_einittext) - PFN_DOWN((unsigned long)_sinittext); - if (v_block_mapped((unsigned long)_sinittext)) + if (v_block_mapped((unsigned long)_sinittext)) { mmu_mark_initmem_nx(); - else - set_memory_attr((unsigned long)_sinittext, numpages, PAGE_KERNEL); + } else { + set_memory_nx((unsigned long)_sinittext, numpages); + set_memory_rw((unsigned long)_sinittext, numpages); + } } #ifdef CONFIG_STRICT_KERNEL_RWX @@ -152,18 +154,14 @@ void mark_rodata_ro(void) return; } - numpages = PFN_UP((unsigned long)_etext) - - PFN_DOWN((unsigned long)_stext); - - set_memory_attr((unsigned long)_stext, numpages, PAGE_KERNEL_ROX); /* - * mark .rodata as read only. Use __init_begin rather than __end_rodata - * to cover NOTES and EXCEPTION_TABLE. + * mark .text and .rodata as read only. Use __init_begin rather than + * __end_rodata to cover NOTES and EXCEPTION_TABLE. */ numpages = PFN_UP((unsigned long)__init_begin) - - PFN_DOWN((unsigned long)__start_rodata); + PFN_DOWN((unsigned long)_stext); - set_memory_attr((unsigned long)__start_rodata, numpages, PAGE_KERNEL_RO); + set_memory_ro((unsigned long)_stext, numpages); // mark_initmem_nx() should have already run by now ptdump_check_wx(); @@ -179,8 +177,8 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) return; if (enable) - set_memory_attr(addr, numpages, PAGE_KERNEL); + set_memory_p(addr, numpages); else - set_memory_attr(addr, numpages, __pgprot(0)); + set_memory_np(addr, numpages); } #endif /* CONFIG_DEBUG_PAGEALLOC */ From a8936569a07bf27cc9cfc2a39a1e5ea91273b2d4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 10 Jan 2022 12:29:42 +0000 Subject: [PATCH 078/380] powerpc/bpf: Always reallocate BPF_REG_5, BPF_REG_AX and TMP_REG when possible BPF_REG_5, BPF_REG_AX and TMP_REG are mapped on non volatile registers because there are not enough volatile registers, but they don't need to be preserved on function calls. So when some volatile registers become available, those registers can always be reallocated regardless of whether SEEN_FUNC is set or not. Suggested-by: Naveen N. Rao Signed-off-by: Christophe Leroy Reviewed-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b04c246874b716911139c04bc004b3b14eed07ef.1641817763.git.christophe.leroy@csgroup.eu --- arch/powerpc/net/bpf_jit.h | 3 --- arch/powerpc/net/bpf_jit_comp32.c | 16 ++++++++++++---- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index b20a2a83a6e7..b75507fc8f6b 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -127,9 +127,6 @@ #define SEEN_FUNC 0x20000000 /* might call external helpers */ #define SEEN_TAILCALL 0x40000000 /* uses tail calls */ -#define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */ -#define SEEN_NVREG_MASK 0x0003ffff /* Non volatile registers r14-r31 */ - #ifdef CONFIG_PPC64 extern const int b2p[MAX_BPF_JIT_REG + 2]; #else diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index cf8dd8aea386..43643f1c1034 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -77,14 +77,22 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) return BPF_PPC_STACKFRAME(ctx) - 4; } +#define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */ +#define SEEN_NVREG_FULL_MASK 0x0003ffff /* Non volatile registers r14-r31 */ +#define SEEN_NVREG_TEMP_MASK 0x00001e01 /* BPF_REG_5, BPF_REG_AX, TMP_REG */ + void bpf_jit_realloc_regs(struct codegen_context *ctx) { - if (ctx->seen & SEEN_FUNC) - return; + unsigned int nvreg_mask; - while (ctx->seen & SEEN_NVREG_MASK && + if (ctx->seen & SEEN_FUNC) + nvreg_mask = SEEN_NVREG_TEMP_MASK; + else + nvreg_mask = SEEN_NVREG_FULL_MASK; + + while (ctx->seen & nvreg_mask && (ctx->seen & SEEN_VREG_MASK) != SEEN_VREG_MASK) { - int old = 32 - fls(ctx->seen & (SEEN_NVREG_MASK & 0xaaaaaaab)); + int old = 32 - fls(ctx->seen & (nvreg_mask & 0xaaaaaaab)); int new = 32 - fls(~ctx->seen & (SEEN_VREG_MASK & 0xaaaaaaaa)); int i; From 0670010f3b10aeaad0dfdf0dad0bcd020fc70eb5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 17 Jan 2022 10:06:39 +0000 Subject: [PATCH 079/380] powerpc/32s: Enable STRICT_MODULE_RWX for the 603 core The book3s/32 MMU doesn't support per page execution protection and doesn't support RO protection for kernel pages. However, on the 603 which implements software loaded TLBs, execution protection is honored by the TLB Miss handler which doesn't load Instruction TLB for non executable pages. And RO protection is honored by clearing the C bit for RO pages, leading to DSI. So on the 603, STRICT_MODULE_RWX is possible without much effort. Don't disable STRICT_MODULE_RWX on book3s/32 and print a warning in case STRICT_MODULE_RWX has been selected and the platform has a Hardware HASH MMU. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1e6162f334167e75f1140082932e3a354b16daba.1642413973.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 2 +- arch/powerpc/mm/pgtable_32.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 2a851d31f120..28e4047e99e8 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -135,7 +135,7 @@ config PPC select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX if (PPC_BOOK3S || PPC_8xx || 40x) && !HIBERNATION select ARCH_HAS_STRICT_KERNEL_RWX if FSL_BOOKE && !HIBERNATION && !RANDOMIZE_BASE - select ARCH_HAS_STRICT_MODULE_RWX if ARCH_HAS_STRICT_KERNEL_RWX && !PPC_BOOK3S_32 + select ARCH_HAS_STRICT_MODULE_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE select ARCH_HAS_UBSAN_SANITIZE_ALL diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index f71ededdc02a..a56ade39dc68 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -148,6 +148,9 @@ void mark_rodata_ro(void) { unsigned long numpages; + if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX) && mmu_has_feature(MMU_FTR_HPTE_TABLE)) + pr_warn("This platform has HASH MMU, STRICT_MODULE_RWX won't work\n"); + if (v_block_mapped((unsigned long)_stext + 1)) { mmu_mark_rodata_ro(); ptdump_check_wx(); From 9d44d1bd93b9a881f407b3202dc13fbd85fb5f1a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 21 Jan 2022 07:58:47 +0000 Subject: [PATCH 080/380] powerpc: Use the newly added is_tsk_32bit_task() macro Two places deserve using the macro is_tsk_32bit_task() added by commit 252745240ba0 ("powerpc/audit: Fix syscall_get_arch()") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7304a889dbe885aefad8a8333673c81ee4b8f7a6.1642751874.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/ptrace/ptrace-view.c | 2 +- arch/powerpc/perf/perf_regs.c | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index b8be1d6668b5..f15bc78caf71 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -841,7 +841,7 @@ static const struct user_regset_view user_ppc_compat_view = { const struct user_regset_view *task_user_regset_view(struct task_struct *task) { - if (IS_ENABLED(CONFIG_PPC64) && test_tsk_thread_flag(task, TIF_32BIT)) + if (IS_ENABLED(CONFIG_COMPAT) && is_tsk_32bit_task(task)) return &user_ppc_compat_view; return &user_ppc_native_view; } diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index 51d31b65e423..350dccb0143c 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c @@ -134,12 +134,10 @@ int perf_reg_validate(u64 mask) u64 perf_reg_abi(struct task_struct *task) { -#ifdef CONFIG_PPC64 - if (!test_tsk_thread_flag(task, TIF_32BIT)) - return PERF_SAMPLE_REGS_ABI_64; + if (is_tsk_32bit_task(task)) + return PERF_SAMPLE_REGS_ABI_32; else -#endif - return PERF_SAMPLE_REGS_ABI_32; + return PERF_SAMPLE_REGS_ABI_64; } void perf_get_regs_user(struct perf_regs *regs_user, From 67484e0de9c93b4a9187bb49f45dfdaa8dc03c0b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 21 Jan 2022 08:06:27 +0000 Subject: [PATCH 081/380] powerpc/lib/sstep: Use l1_dcache_bytes() instead of opencoding Don't opencode dcache size retrieval based on whether that's ppc32 or ppc64. Use l1_dcache_bytes() Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6c608fd4795e2d8ea1a0a449405a0087f76d8bb3.1642752375.git.christophe.leroy@csgroup.eu --- arch/powerpc/lib/sstep.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index a94b0cd0bdc5..b7316d697d80 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1065,14 +1065,11 @@ Efault: int emulate_dcbz(unsigned long ea, struct pt_regs *regs) { int err; - unsigned long size; + unsigned long size = l1_dcache_bytes(); #ifdef __powerpc64__ - size = ppc64_caches.l1d.block_size; if (!(regs->msr & MSR_64BIT)) ea &= 0xffffffffUL; -#else - size = L1_CACHE_BYTES; #endif ea &= ~(size - 1); if (!address_ok(regs, ea, size)) From 7c3bba91999075f4cfcab0542e4eb74d2d63554b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 21 Jan 2022 08:06:32 +0000 Subject: [PATCH 082/380] powerpc/lib/sstep: Remove unneeded #ifdef __powerpc64__ MSR_64BIT is always defined, no need to hide code using MSR_64BIT inside an #ifdef __powerpc64__ Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ee61b693bc7e046eed1abb7a34909eb4878a9442.1642752375.git.christophe.leroy@csgroup.eu --- arch/powerpc/lib/sstep.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index b7316d697d80..4aabe3854484 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -75,10 +75,8 @@ extern int do_stqcx(unsigned long ea, unsigned long val0, unsigned long val1, static nokprobe_inline unsigned long truncate_if_32bit(unsigned long msr, unsigned long val) { -#ifdef __powerpc64__ if ((msr & MSR_64BIT) == 0) val &= 0xffffffffUL; -#endif return val; } @@ -1067,10 +1065,8 @@ int emulate_dcbz(unsigned long ea, struct pt_regs *regs) int err; unsigned long size = l1_dcache_bytes(); -#ifdef __powerpc64__ if (!(regs->msr & MSR_64BIT)) ea &= 0xffffffffUL; -#endif ea &= ~(size - 1); if (!address_ok(regs, ea, size)) return -EFAULT; @@ -1136,10 +1132,8 @@ static nokprobe_inline void set_cr0(const struct pt_regs *regs, op->type |= SETCC; op->ccval = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000); -#ifdef __powerpc64__ if (!(regs->msr & MSR_64BIT)) val = (int) val; -#endif if (val < 0) op->ccval |= 0x80000000; else if (val > 0) @@ -1170,12 +1164,10 @@ static nokprobe_inline void add_with_carry(const struct pt_regs *regs, op->type = COMPUTE + SETREG + SETXER; op->reg = rd; op->val = val; -#ifdef __powerpc64__ if (!(regs->msr & MSR_64BIT)) { val = (unsigned int) val; val1 = (unsigned int) val1; } -#endif op->xerval = regs->xer; if (val < val1 || (carry_in && val == val1)) op->xerval |= XER_CA; From 6836f099039e6c72fb548bf527345aa4345c3308 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 21 Jan 2022 08:06:38 +0000 Subject: [PATCH 083/380] powerpc/lib/sstep: use truncate_if_32bit() Use truncate_if_32bit() when possible instead of open coding. truncate_if_32bit() returns an unsigned long, so don't use it when a signed value is expected. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7e1c07123f13156d4a27991a2e2694fb584bc068.1642752375.git.christophe.leroy@csgroup.eu --- arch/powerpc/lib/sstep.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 4aabe3854484..ca38d026fd88 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1065,8 +1065,7 @@ int emulate_dcbz(unsigned long ea, struct pt_regs *regs) int err; unsigned long size = l1_dcache_bytes(); - if (!(regs->msr & MSR_64BIT)) - ea &= 0xffffffffUL; + ea = truncate_if_32bit(regs->msr, ea); ea &= ~(size - 1); if (!address_ok(regs, ea, size)) return -EFAULT; @@ -1164,10 +1163,8 @@ static nokprobe_inline void add_with_carry(const struct pt_regs *regs, op->type = COMPUTE + SETREG + SETXER; op->reg = rd; op->val = val; - if (!(regs->msr & MSR_64BIT)) { - val = (unsigned int) val; - val1 = (unsigned int) val1; - } + val = truncate_if_32bit(regs->msr, val); + val1 = truncate_if_32bit(regs->msr, val1); op->xerval = regs->xer; if (val < val1 || (carry_in && val == val1)) op->xerval |= XER_CA; From f061fb03ee611c5657010ee4fa2a3fa64dfe3bd0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 21 Jan 2022 16:30:21 +0000 Subject: [PATCH 084/380] powerpc/vdso: augment VDSO32 functions to support 64 bits build VDSO64 cacheflush.S datapage.S gettimeofday.S and vgettimeofday.c are very similar to their VDSO32 counterpart. VDSO32 counterpart is already more complete than the VDSO64 version as it supports both PPC32 vdso and 32 bits VDSO for PPC64. Use compat macros wherever necessary in PPC32 files so that they can also be used to build VDSO64. vdso64/note.S is already a link to vdso32/note.S so no change is required. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c2cbb8f046b7efc251053521dc39b752795e26b7.1642782130.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/asm-compat.h | 2 ++ arch/powerpc/kernel/vdso32/cacheflush.S | 4 ++-- arch/powerpc/kernel/vdso32/datapage.S | 10 ++++++-- arch/powerpc/kernel/vdso32/getcpu.S | 4 ++-- arch/powerpc/kernel/vdso32/gettimeofday.S | 8 +++++-- arch/powerpc/kernel/vdso32/vgettimeofday.c | 27 +++++++++++++++++----- 6 files changed, 41 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h index 2b736d9fbb1b..2bc53c646ccd 100644 --- a/arch/powerpc/include/asm/asm-compat.h +++ b/arch/powerpc/include/asm/asm-compat.h @@ -21,6 +21,7 @@ #define PPC_STLCX stringify_in_c(stdcx.) #define PPC_CNTLZL stringify_in_c(cntlzd) #define PPC_MTOCRF(FXM, RS) MTOCRF((FXM), RS) +#define PPC_SRL stringify_in_c(srd) #define PPC_LR_STKOFF 16 #define PPC_MIN_STKFRM 112 @@ -54,6 +55,7 @@ #define PPC_STLCX stringify_in_c(stwcx.) #define PPC_CNTLZL stringify_in_c(cntlzw) #define PPC_MTOCRF stringify_in_c(mtcrf) +#define PPC_SRL stringify_in_c(srw) #define PPC_LR_STKOFF 4 #define PPC_MIN_STKFRM 16 diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S index f340e82d1981..d4e43ab2d5df 100644 --- a/arch/powerpc/kernel/vdso32/cacheflush.S +++ b/arch/powerpc/kernel/vdso32/cacheflush.S @@ -46,7 +46,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) add r8,r8,r5 /* ensure we get enough */ #ifdef CONFIG_PPC64 lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10) - srw. r8,r8,r9 /* compute line count */ + PPC_SRL. r8,r8,r9 /* compute line count */ #else srwi. r8, r8, L1_CACHE_SHIFT mr r7, r6 @@ -72,7 +72,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) subf r8,r6,r4 /* compute length */ add r8,r8,r5 lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10) - srw. r8,r8,r9 /* compute line count */ + PPC_SRL. r8,r8,r9 /* compute line count */ crclr cr0*4+so beqlr /* nothing to do? */ #endif diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S index 65244416ab94..db8e167f0166 100644 --- a/arch/powerpc/kernel/vdso32/datapage.S +++ b/arch/powerpc/kernel/vdso32/datapage.S @@ -30,11 +30,15 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) mr. r4,r3 get_datapage r3 mtlr r12 +#ifdef __powerpc64__ + addi r3,r3,CFG_SYSCALL_MAP64 +#else addi r3,r3,CFG_SYSCALL_MAP32 +#endif + crclr cr0*4+so beqlr li r0,NR_syscalls stw r0,0(r4) - crclr cr0*4+so blr .cfi_endproc V_FUNCTION_END(__kernel_get_syscall_map) @@ -49,8 +53,10 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) mflr r12 .cfi_register lr,r12 get_datapage r3 +#ifndef __powerpc64__ lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3) - lwz r3,CFG_TB_TICKS_PER_SEC(r3) +#endif + PPC_LL r3,CFG_TB_TICKS_PER_SEC(r3) mtlr r12 crclr cr0*4+so blr diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso32/getcpu.S index ff5e214fec41..8e08ccf19062 100644 --- a/arch/powerpc/kernel/vdso32/getcpu.S +++ b/arch/powerpc/kernel/vdso32/getcpu.S @@ -19,8 +19,8 @@ V_FUNCTION_BEGIN(__kernel_getcpu) .cfi_startproc mfspr r5,SPRN_SPRG_VDSO_READ - cmpwi cr0,r3,0 - cmpwi cr1,r4,0 + PPC_LCMPI cr0,r3,0 + PPC_LCMPI cr1,r4,0 clrlwi r6,r5,16 rlwinm r7,r5,16,31-15,31-0 beq cr0,1f diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index d21d08140a5e..c875312274aa 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ /* - * Userland implementation of gettimeofday() for 32 bits processes in a - * ppc64 kernel for use in the vDSO + * Userland implementation of gettimeofday() for processes + * for use in the vDSO * * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org, * IBM Corp. @@ -41,9 +41,11 @@ V_FUNCTION_END(__kernel_clock_gettime) * int __kernel_clock_gettime64(clockid_t clock_id, struct __timespec64 *ts); * */ +#ifndef __powerpc64__ V_FUNCTION_BEGIN(__kernel_clock_gettime64) cvdso_call __c_kernel_clock_gettime64 V_FUNCTION_END(__kernel_clock_gettime64) +#endif /* * Exact prototype of clock_getres() @@ -69,6 +71,7 @@ V_FUNCTION_END(__kernel_time) /* Routines for restoring integer registers, called by the compiler. */ /* Called with r11 pointing to the stack header word of the caller of the */ /* function, just beyond the end of the integer restore area. */ +#ifndef __powerpc64__ _GLOBAL(_restgpr_31_x) _GLOBAL(_rest32gpr_31_x) lwz r0,4(r11) @@ -76,3 +79,4 @@ _GLOBAL(_rest32gpr_31_x) mtlr r0 mr r1,r11 blr +#endif diff --git a/arch/powerpc/kernel/vdso32/vgettimeofday.c b/arch/powerpc/kernel/vdso32/vgettimeofday.c index 65fb03fb1731..55a287c9a736 100644 --- a/arch/powerpc/kernel/vdso32/vgettimeofday.c +++ b/arch/powerpc/kernel/vdso32/vgettimeofday.c @@ -2,8 +2,22 @@ /* * Powerpc userspace implementations of gettimeofday() and similar. */ +#include #include +#ifdef __powerpc64__ +int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts, + const struct vdso_data *vd) +{ + return __cvdso_clock_gettime_data(vd, clock, ts); +} + +int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res, + const struct vdso_data *vd) +{ + return __cvdso_clock_getres_data(vd, clock_id, res); +} +#else int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts, const struct vdso_data *vd) { @@ -16,18 +30,19 @@ int __c_kernel_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts, return __cvdso_clock_gettime_data(vd, clock, ts); } +int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res, + const struct vdso_data *vd) +{ + return __cvdso_clock_getres_time32_data(vd, clock_id, res); +} +#endif + int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz, const struct vdso_data *vd) { return __cvdso_gettimeofday_data(vd, tv, tz); } -int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res, - const struct vdso_data *vd) -{ - return __cvdso_clock_getres_time32_data(vd, clock_id, res); -} - __kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_data *vd) { return __cvdso_time_data(vd, time); From d88378d8d2c776154c6b606f2a423a81d7795f6f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 21 Jan 2022 16:30:23 +0000 Subject: [PATCH 085/380] powerpc/vdso: Rework VDSO32 makefile to add a prefix to object files In order to merge vdso32 and vdso64 build in following patch, rework Makefile is order to add -32 suffix to VDSO32 object files. Also change sigtramp.S to sigtramp32.S as VDSO64 sigtramp.S is too different to be squashed into VDSO32 sigtramp.S at the first place. gen_vdso_offsets.sh also becomes gen_vdso32_offsets.sh Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0c421b704a57b228e75a891512568339c53667ad.1642782130.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vdso32/Makefile | 47 +++++++++---------- ..._vdso_offsets.sh => gen_vdso32_offsets.sh} | 0 .../vdso32/{sigtramp.S => sigtramp32.S} | 0 3 files changed, 21 insertions(+), 26 deletions(-) rename arch/powerpc/kernel/vdso32/{gen_vdso_offsets.sh => gen_vdso32_offsets.sh} (100%) rename arch/powerpc/kernel/vdso32/{sigtramp.S => sigtramp32.S} (100%) diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index 7d9a6fee0e3d..7d7b38d90ca5 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -5,15 +5,16 @@ ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN|R_PPC_REL24 include $(srctree)/lib/vdso/Makefile -obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o +obj-vdso32 = sigtramp32-32.o gettimeofday-32.o datapage-32.o cacheflush-32.o note-32.o getcpu-32.o ifneq ($(c-gettimeofday-y),) - CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) - CFLAGS_vgettimeofday.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) - CFLAGS_vgettimeofday.o += $(call cc-option, -fno-stack-protector) - CFLAGS_vgettimeofday.o += -DDISABLE_BRANCH_PROFILING - CFLAGS_vgettimeofday.o += -ffreestanding -fasynchronous-unwind-tables - CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) + CFLAGS_vgettimeofday-32.o += -include $(c-gettimeofday-y) + CFLAGS_vgettimeofday-32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) + CFLAGS_vgettimeofday-32.o += $(call cc-option, -fno-stack-protector) + CFLAGS_vgettimeofday-32.o += -DDISABLE_BRANCH_PROFILING + CFLAGS_vgettimeofday-32.o += -ffreestanding -fasynchronous-unwind-tables + CFLAGS_REMOVE_vgettimeofday-32.o = $(CC_FLAGS_FTRACE) + CFLAGS_REMOVE_vgettimeofday-32.o += -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc endif # Build rules @@ -24,13 +25,7 @@ else VDSOCC := $(CC) endif -CC32FLAGS := -ifdef CONFIG_PPC64 -CC32FLAGS += -m32 -KBUILD_CFLAGS := $(filter-out -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc,$(KBUILD_CFLAGS)) -endif - -targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday.o +targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday-32.o obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) GCOV_PROFILE := n @@ -38,36 +33,36 @@ KCOV_INSTRUMENT := n UBSAN_SANITIZE := n KASAN_SANITIZE := n -ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ - -Wl,-soname=linux-vdso32.so.1 -Wl,--hash-style=both -asflags-y := -D__VDSO32__ -s +ccflags-y := -shared -fno-common -fno-builtin -nostdlib -Wl,--hash-style=both + +CC32FLAGS := -Wl,-soname=linux-vdso32.so.1 -m32 +AS32FLAGS := -D__VDSO32__ -s -obj-y += vdso32_wrapper.o targets += vdso32.lds CPPFLAGS_vdso32.lds += -P -C -Upowerpc # link rule for the .so file, .lds has to be first -$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday.o FORCE +$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o FORCE $(call if_changed,vdso32ld_and_check) # assembly rules for the .S files -$(obj-vdso32): %.o: %.S FORCE +$(obj-vdso32): %-32.o: %.S FORCE $(call if_changed_dep,vdso32as) -$(obj)/vgettimeofday.o: %.o: %.c FORCE +$(obj)/vgettimeofday-32.o: %-32.o: %.c FORCE $(call if_changed_dep,vdso32cc) # Generate VDSO offsets using helper script -gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh -quiet_cmd_vdsosym = VDSOSYM $@ - cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ +gen-vdso32sym := $(srctree)/$(src)/gen_vdso32_offsets.sh +quiet_cmd_vdso32sym = VDSO32SYM $@ + cmd_vdso32sym = $(NM) $< | $(gen-vdso32sym) | LC_ALL=C sort > $@ include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE - $(call if_changed,vdsosym) + $(call if_changed,vdso32sym) # actual build commands quiet_cmd_vdso32ld_and_check = VDSO32L $@ cmd_vdso32ld_and_check = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) ; $(cmd_vdso_check) quiet_cmd_vdso32as = VDSO32A $@ - cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) -c -o $@ $< + cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) $(AS32FLAGS) -c -o $@ $< quiet_cmd_vdso32cc = VDSO32C $@ cmd_vdso32cc = $(VDSOCC) $(c_flags) $(CC32FLAGS) -c -o $@ $< diff --git a/arch/powerpc/kernel/vdso32/gen_vdso_offsets.sh b/arch/powerpc/kernel/vdso32/gen_vdso32_offsets.sh similarity index 100% rename from arch/powerpc/kernel/vdso32/gen_vdso_offsets.sh rename to arch/powerpc/kernel/vdso32/gen_vdso32_offsets.sh diff --git a/arch/powerpc/kernel/vdso32/sigtramp.S b/arch/powerpc/kernel/vdso32/sigtramp32.S similarity index 100% rename from arch/powerpc/kernel/vdso32/sigtramp.S rename to arch/powerpc/kernel/vdso32/sigtramp32.S From fd1feade75fb1a9275c39d76c5ccdbbbe6b37aa3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 21 Jan 2022 16:30:27 +0000 Subject: [PATCH 086/380] powerpc/vdso: Merge vdso64 and vdso32 into a single directory merge vdso64 into vdso32 and rename it vdso. Reported-by: kernel test robot Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4dbe05cc130f6a0858d09ac72e436c373cb08b70.1642782130.git.christophe.leroy@csgroup.eu --- arch/powerpc/Makefile | 4 +- arch/powerpc/kernel/Makefile | 6 +- .../kernel/{vdso32 => vdso}/.gitignore | 2 + arch/powerpc/kernel/{vdso32 => vdso}/Makefile | 35 +++++++++ .../kernel/{vdso32 => vdso}/cacheflush.S | 0 .../kernel/{vdso32 => vdso}/datapage.S | 0 .../{vdso32 => vdso}/gen_vdso32_offsets.sh | 0 .../gen_vdso64_offsets.sh} | 0 arch/powerpc/kernel/{vdso32 => vdso}/getcpu.S | 0 .../kernel/{vdso32 => vdso}/gettimeofday.S | 0 arch/powerpc/kernel/{vdso32 => vdso}/note.S | 0 .../kernel/{vdso32 => vdso}/sigtramp32.S | 0 .../{vdso64/sigtramp.S => vdso/sigtramp64.S} | 0 .../kernel/{vdso32 => vdso}/vdso32.lds.S | 0 .../kernel/{vdso64 => vdso}/vdso64.lds.S | 0 .../kernel/{vdso32 => vdso}/vgettimeofday.c | 0 arch/powerpc/kernel/vdso32_wrapper.S | 2 +- arch/powerpc/kernel/vdso64/.gitignore | 3 - arch/powerpc/kernel/vdso64/Makefile | 56 -------------- arch/powerpc/kernel/vdso64/cacheflush.S | 75 ------------------- arch/powerpc/kernel/vdso64/datapage.S | 59 --------------- arch/powerpc/kernel/vdso64/getcpu.S | 33 -------- arch/powerpc/kernel/vdso64/gettimeofday.S | 58 -------------- arch/powerpc/kernel/vdso64/note.S | 1 - arch/powerpc/kernel/vdso64/vgettimeofday.c | 29 ------- arch/powerpc/kernel/vdso64_wrapper.S | 2 +- 26 files changed, 44 insertions(+), 321 deletions(-) rename arch/powerpc/kernel/{vdso32 => vdso}/.gitignore (72%) rename arch/powerpc/kernel/{vdso32 => vdso}/Makefile (56%) rename arch/powerpc/kernel/{vdso32 => vdso}/cacheflush.S (100%) rename arch/powerpc/kernel/{vdso32 => vdso}/datapage.S (100%) rename arch/powerpc/kernel/{vdso32 => vdso}/gen_vdso32_offsets.sh (100%) rename arch/powerpc/kernel/{vdso64/gen_vdso_offsets.sh => vdso/gen_vdso64_offsets.sh} (100%) rename arch/powerpc/kernel/{vdso32 => vdso}/getcpu.S (100%) rename arch/powerpc/kernel/{vdso32 => vdso}/gettimeofday.S (100%) rename arch/powerpc/kernel/{vdso32 => vdso}/note.S (100%) rename arch/powerpc/kernel/{vdso32 => vdso}/sigtramp32.S (100%) rename arch/powerpc/kernel/{vdso64/sigtramp.S => vdso/sigtramp64.S} (100%) rename arch/powerpc/kernel/{vdso32 => vdso}/vdso32.lds.S (100%) rename arch/powerpc/kernel/{vdso64 => vdso}/vdso64.lds.S (100%) rename arch/powerpc/kernel/{vdso32 => vdso}/vgettimeofday.c (100%) delete mode 100644 arch/powerpc/kernel/vdso64/.gitignore delete mode 100644 arch/powerpc/kernel/vdso64/Makefile delete mode 100644 arch/powerpc/kernel/vdso64/cacheflush.S delete mode 100644 arch/powerpc/kernel/vdso64/datapage.S delete mode 100644 arch/powerpc/kernel/vdso64/getcpu.S delete mode 100644 arch/powerpc/kernel/vdso64/gettimeofday.S delete mode 100644 arch/powerpc/kernel/vdso64/note.S delete mode 100644 arch/powerpc/kernel/vdso64/vgettimeofday.c diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 5f16ac1583c5..ddc5a706760a 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -421,9 +421,9 @@ ifeq ($(KBUILD_EXTMOD),) prepare: vdso_prepare vdso_prepare: prepare0 $(if $(CONFIG_VDSO32),$(Q)$(MAKE) \ - $(build)=arch/powerpc/kernel/vdso32 include/generated/vdso32-offsets.h) + $(build)=arch/powerpc/kernel/vdso include/generated/vdso32-offsets.h) $(if $(CONFIG_PPC64),$(Q)$(MAKE) \ - $(build)=arch/powerpc/kernel/vdso64 include/generated/vdso64-offsets.h) + $(build)=arch/powerpc/kernel/vdso include/generated/vdso64-offsets.h) endif archprepare: checkbin diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 4d7829399570..4ddd161aef32 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -194,8 +194,8 @@ targets += prom_init_check clean-files := vmlinux.lds # Force dependency (incbin is bad) -$(obj)/vdso32_wrapper.o : $(obj)/vdso32/vdso32.so.dbg -$(obj)/vdso64_wrapper.o : $(obj)/vdso64/vdso64.so.dbg +$(obj)/vdso32_wrapper.o : $(obj)/vdso/vdso32.so.dbg +$(obj)/vdso64_wrapper.o : $(obj)/vdso/vdso64.so.dbg # for cleaning -subdir- += vdso32 vdso64 +subdir- += vdso diff --git a/arch/powerpc/kernel/vdso32/.gitignore b/arch/powerpc/kernel/vdso/.gitignore similarity index 72% rename from arch/powerpc/kernel/vdso32/.gitignore rename to arch/powerpc/kernel/vdso/.gitignore index 824b863ec6bd..dd9bdd67758b 100644 --- a/arch/powerpc/kernel/vdso32/.gitignore +++ b/arch/powerpc/kernel/vdso/.gitignore @@ -1,3 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only vdso32.lds vdso32.so.dbg +vdso64.lds +vdso64.so.dbg diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso/Makefile similarity index 56% rename from arch/powerpc/kernel/vdso32/Makefile rename to arch/powerpc/kernel/vdso/Makefile index 7d7b38d90ca5..954974287ee7 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -6,6 +6,7 @@ ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_ include $(srctree)/lib/vdso/Makefile obj-vdso32 = sigtramp32-32.o gettimeofday-32.o datapage-32.o cacheflush-32.o note-32.o getcpu-32.o +obj-vdso64 = sigtramp64-64.o gettimeofday-64.o datapage-64.o cacheflush-64.o note-64.o getcpu-64.o ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday-32.o += -include $(c-gettimeofday-y) @@ -15,6 +16,17 @@ ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday-32.o += -ffreestanding -fasynchronous-unwind-tables CFLAGS_REMOVE_vgettimeofday-32.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_vgettimeofday-32.o += -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc + CFLAGS_vgettimeofday-64.o += -include $(c-gettimeofday-y) + CFLAGS_vgettimeofday-64.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) + CFLAGS_vgettimeofday-64.o += $(call cc-option, -fno-stack-protector) + CFLAGS_vgettimeofday-64.o += -DDISABLE_BRANCH_PROFILING + CFLAGS_vgettimeofday-64.o += -ffreestanding -fasynchronous-unwind-tables + CFLAGS_REMOVE_vgettimeofday-64.o = $(CC_FLAGS_FTRACE) +# Go prior to 1.16.x assumes r30 is not clobbered by any VDSO code. That used to be true +# by accident when the VDSO was hand-written asm code, but may not be now that the VDSO is +# compiler generated. To avoid breaking Go tell GCC not to use r30. Impact on code +# generation is minimal, it will just use r29 instead. + CFLAGS_vgettimeofday-64.o += $(call cc-option, -ffixed-r30) endif # Build rules @@ -27,6 +39,8 @@ endif targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday-32.o obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) +targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o +obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) GCOV_PROFILE := n KCOV_INSTRUMENT := n @@ -38,26 +52,42 @@ ccflags-y := -shared -fno-common -fno-builtin -nostdlib -Wl,--hash-style=both CC32FLAGS := -Wl,-soname=linux-vdso32.so.1 -m32 AS32FLAGS := -D__VDSO32__ -s +CC64FLAGS := -Wl,-soname=linux-vdso64.so.1 +AS64FLAGS := -D__VDSO64__ -s + targets += vdso32.lds CPPFLAGS_vdso32.lds += -P -C -Upowerpc +targets += vdso64.lds +CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) # link rule for the .so file, .lds has to be first $(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o FORCE $(call if_changed,vdso32ld_and_check) +$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o FORCE + $(call if_changed,vdso64ld_and_check) # assembly rules for the .S files $(obj-vdso32): %-32.o: %.S FORCE $(call if_changed_dep,vdso32as) $(obj)/vgettimeofday-32.o: %-32.o: %.c FORCE $(call if_changed_dep,vdso32cc) +$(obj-vdso64): %-64.o: %.S FORCE + $(call if_changed_dep,vdso64as) +$(obj)/vgettimeofday-64.o: %-64.o: %.c FORCE + $(call if_changed_dep,cc_o_c) # Generate VDSO offsets using helper script gen-vdso32sym := $(srctree)/$(src)/gen_vdso32_offsets.sh quiet_cmd_vdso32sym = VDSO32SYM $@ cmd_vdso32sym = $(NM) $< | $(gen-vdso32sym) | LC_ALL=C sort > $@ +gen-vdso64sym := $(srctree)/$(src)/gen_vdso64_offsets.sh +quiet_cmd_vdso64sym = VDSO64SYM $@ + cmd_vdso64sym = $(NM) $< | $(gen-vdso64sym) | LC_ALL=C sort > $@ include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE $(call if_changed,vdso32sym) +include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE + $(call if_changed,vdso64sym) # actual build commands quiet_cmd_vdso32ld_and_check = VDSO32L $@ @@ -66,3 +96,8 @@ quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) $(AS32FLAGS) -c -o $@ $< quiet_cmd_vdso32cc = VDSO32C $@ cmd_vdso32cc = $(VDSOCC) $(c_flags) $(CC32FLAGS) -c -o $@ $< + +quiet_cmd_vdso64ld_and_check = VDSO64L $@ + cmd_vdso64ld_and_check = $(VDSOCC) $(c_flags) $(CC64FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) ; $(cmd_vdso_check) +quiet_cmd_vdso64as = VDSO64A $@ + cmd_vdso64as = $(VDSOCC) $(a_flags) $(CC64FLAGS) $(AS64FLAGS) -c -o $@ $< diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso/cacheflush.S similarity index 100% rename from arch/powerpc/kernel/vdso32/cacheflush.S rename to arch/powerpc/kernel/vdso/cacheflush.S diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso/datapage.S similarity index 100% rename from arch/powerpc/kernel/vdso32/datapage.S rename to arch/powerpc/kernel/vdso/datapage.S diff --git a/arch/powerpc/kernel/vdso32/gen_vdso32_offsets.sh b/arch/powerpc/kernel/vdso/gen_vdso32_offsets.sh similarity index 100% rename from arch/powerpc/kernel/vdso32/gen_vdso32_offsets.sh rename to arch/powerpc/kernel/vdso/gen_vdso32_offsets.sh diff --git a/arch/powerpc/kernel/vdso64/gen_vdso_offsets.sh b/arch/powerpc/kernel/vdso/gen_vdso64_offsets.sh similarity index 100% rename from arch/powerpc/kernel/vdso64/gen_vdso_offsets.sh rename to arch/powerpc/kernel/vdso/gen_vdso64_offsets.sh diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso/getcpu.S similarity index 100% rename from arch/powerpc/kernel/vdso32/getcpu.S rename to arch/powerpc/kernel/vdso/getcpu.S diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S similarity index 100% rename from arch/powerpc/kernel/vdso32/gettimeofday.S rename to arch/powerpc/kernel/vdso/gettimeofday.S diff --git a/arch/powerpc/kernel/vdso32/note.S b/arch/powerpc/kernel/vdso/note.S similarity index 100% rename from arch/powerpc/kernel/vdso32/note.S rename to arch/powerpc/kernel/vdso/note.S diff --git a/arch/powerpc/kernel/vdso32/sigtramp32.S b/arch/powerpc/kernel/vdso/sigtramp32.S similarity index 100% rename from arch/powerpc/kernel/vdso32/sigtramp32.S rename to arch/powerpc/kernel/vdso/sigtramp32.S diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso/sigtramp64.S similarity index 100% rename from arch/powerpc/kernel/vdso64/sigtramp.S rename to arch/powerpc/kernel/vdso/sigtramp64.S diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso/vdso32.lds.S similarity index 100% rename from arch/powerpc/kernel/vdso32/vdso32.lds.S rename to arch/powerpc/kernel/vdso/vdso32.lds.S diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso/vdso64.lds.S similarity index 100% rename from arch/powerpc/kernel/vdso64/vdso64.lds.S rename to arch/powerpc/kernel/vdso/vdso64.lds.S diff --git a/arch/powerpc/kernel/vdso32/vgettimeofday.c b/arch/powerpc/kernel/vdso/vgettimeofday.c similarity index 100% rename from arch/powerpc/kernel/vdso32/vgettimeofday.c rename to arch/powerpc/kernel/vdso/vgettimeofday.c diff --git a/arch/powerpc/kernel/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32_wrapper.S index 3f5ef035b0a9..10f92f265d51 100644 --- a/arch/powerpc/kernel/vdso32_wrapper.S +++ b/arch/powerpc/kernel/vdso32_wrapper.S @@ -7,7 +7,7 @@ .globl vdso32_start, vdso32_end .balign PAGE_SIZE vdso32_start: - .incbin "arch/powerpc/kernel/vdso32/vdso32.so.dbg" + .incbin "arch/powerpc/kernel/vdso/vdso32.so.dbg" .balign PAGE_SIZE vdso32_end: diff --git a/arch/powerpc/kernel/vdso64/.gitignore b/arch/powerpc/kernel/vdso64/.gitignore deleted file mode 100644 index 84151a7ba31d..000000000000 --- a/arch/powerpc/kernel/vdso64/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -vdso64.lds -vdso64.so.dbg diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile deleted file mode 100644 index 3c5baaa6f1e7..000000000000 --- a/arch/powerpc/kernel/vdso64/Makefile +++ /dev/null @@ -1,56 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# List of files in the vdso, has to be asm only for now - -ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN|R_PPC_REL24 -include $(srctree)/lib/vdso/Makefile - -obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o - -ifneq ($(c-gettimeofday-y),) - CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) - CFLAGS_vgettimeofday.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) - CFLAGS_vgettimeofday.o += $(call cc-option, -fno-stack-protector) - CFLAGS_vgettimeofday.o += -DDISABLE_BRANCH_PROFILING - CFLAGS_vgettimeofday.o += -ffreestanding -fasynchronous-unwind-tables - CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -endif - -# Build rules - -targets := $(obj-vdso64) vdso64.so.dbg vgettimeofday.o -obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) - -GCOV_PROFILE := n -KCOV_INSTRUMENT := n -UBSAN_SANITIZE := n -KASAN_SANITIZE := n - -ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ - -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both - -# Go prior to 1.16.x assumes r30 is not clobbered by any VDSO code. That used to be true -# by accident when the VDSO was hand-written asm code, but may not be now that the VDSO is -# compiler generated. To avoid breaking Go tell GCC not to use r30. Impact on code -# generation is minimal, it will just use r29 instead. -ccflags-y += $(call cc-option, -ffixed-r30) - -asflags-y := -D__VDSO64__ -s - -targets += vdso64.lds -CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) - -# link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday.o FORCE - $(call if_changed,vdso64ld_and_check) - -# Generate VDSO offsets using helper script -gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh -quiet_cmd_vdsosym = VDSOSYM $@ - cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ - -include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE - $(call if_changed,vdsosym) - -# actual build commands -quiet_cmd_vdso64ld_and_check = VDSO64L $@ - cmd_vdso64ld_and_check = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^); $(cmd_vdso_check) diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S deleted file mode 100644 index 76c3c8cf8ece..000000000000 --- a/arch/powerpc/kernel/vdso64/cacheflush.S +++ /dev/null @@ -1,75 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * vDSO provided cache flush routines - * - * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), - * IBM Corp. - */ -#include -#include -#include -#include -#include - - .text - -/* - * Default "generic" version of __kernel_sync_dicache. - * - * void __kernel_sync_dicache(unsigned long start, unsigned long end) - * - * Flushes the data cache & invalidate the instruction cache for the - * provided range [start, end[ - */ -V_FUNCTION_BEGIN(__kernel_sync_dicache) - .cfi_startproc -BEGIN_FTR_SECTION - b 3f -END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) - mflr r12 - .cfi_register lr,r12 - get_datapage r10 - mtlr r12 - .cfi_restore lr - - lwz r7,CFG_DCACHE_BLOCKSZ(r10) - addi r5,r7,-1 - andc r6,r3,r5 /* round low to line bdy */ - subf r8,r6,r4 /* compute length */ - add r8,r8,r5 /* ensure we get enough */ - lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10) - srd. r8,r8,r9 /* compute line count */ - crclr cr0*4+so - beqlr /* nothing to do? */ - mtctr r8 -1: dcbst 0,r6 - add r6,r6,r7 - bdnz 1b - sync - -/* Now invalidate the instruction cache */ - - lwz r7,CFG_ICACHE_BLOCKSZ(r10) - addi r5,r7,-1 - andc r6,r3,r5 /* round low to line bdy */ - subf r8,r6,r4 /* compute length */ - add r8,r8,r5 - lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10) - srd. r8,r8,r9 /* compute line count */ - crclr cr0*4+so - beqlr /* nothing to do? */ - mtctr r8 -2: icbi 0,r6 - add r6,r6,r7 - bdnz 2b - isync - li r3,0 - blr -3: - crclr cr0*4+so - sync - isync - li r3,0 - blr - .cfi_endproc -V_FUNCTION_END(__kernel_sync_dicache) diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S deleted file mode 100644 index 00760dc69d68..000000000000 --- a/arch/powerpc/kernel/vdso64/datapage.S +++ /dev/null @@ -1,59 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Access to the shared data page by the vDSO & syscall map - * - * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp. - */ - -#include -#include -#include -#include -#include -#include - - .text - -/* - * void *__kernel_get_syscall_map(unsigned int *syscall_count) ; - * - * returns a pointer to the syscall map. the map is agnostic to the - * size of "long", unlike kernel bitops, it stores bits from top to - * bottom so that memory actually contains a linear bitmap - * check for syscall N by testing bit (0x80000000 >> (N & 0x1f)) of - * 32 bits int at N >> 5. - */ -V_FUNCTION_BEGIN(__kernel_get_syscall_map) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - mr r4,r3 - get_datapage r3 - mtlr r12 - addi r3,r3,CFG_SYSCALL_MAP64 - cmpldi cr0,r4,0 - crclr cr0*4+so - beqlr - li r0,NR_syscalls - stw r0,0(r4) - blr - .cfi_endproc -V_FUNCTION_END(__kernel_get_syscall_map) - - -/* - * void unsigned long __kernel_get_tbfreq(void); - * - * returns the timebase frequency in HZ - */ -V_FUNCTION_BEGIN(__kernel_get_tbfreq) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - get_datapage r3 - ld r3,CFG_TB_TICKS_PER_SEC(r3) - mtlr r12 - crclr cr0*4+so - blr - .cfi_endproc -V_FUNCTION_END(__kernel_get_tbfreq) diff --git a/arch/powerpc/kernel/vdso64/getcpu.S b/arch/powerpc/kernel/vdso64/getcpu.S deleted file mode 100644 index 12bbf236cdc4..000000000000 --- a/arch/powerpc/kernel/vdso64/getcpu.S +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * - * Copyright (C) IBM Corporation, 2012 - * - * Author: Anton Blanchard - */ -#include -#include - - .text -/* - * Exact prototype of getcpu - * - * int __kernel_getcpu(unsigned *cpu, unsigned *node); - * - */ -V_FUNCTION_BEGIN(__kernel_getcpu) - .cfi_startproc - mfspr r5,SPRN_SPRG_VDSO_READ - cmpdi cr0,r3,0 - cmpdi cr1,r4,0 - clrlwi r6,r5,16 - rlwinm r7,r5,16,31-15,31-0 - beq cr0,1f - stw r6,0(r3) -1: beq cr1,2f - stw r7,0(r4) -2: crclr cr0*4+so - li r3,0 /* always success */ - blr - .cfi_endproc -V_FUNCTION_END(__kernel_getcpu) diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S deleted file mode 100644 index d7a7bfb51081..000000000000 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ /dev/null @@ -1,58 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Userland implementation of gettimeofday() for 64 bits processes in a - * ppc64 kernel for use in the vDSO - * - * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), - * IBM Corp. - */ -#include -#include -#include -#include -#include -#include -#include - - .text -/* - * Exact prototype of gettimeofday - * - * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); - * - */ -V_FUNCTION_BEGIN(__kernel_gettimeofday) - cvdso_call __c_kernel_gettimeofday -V_FUNCTION_END(__kernel_gettimeofday) - - -/* - * Exact prototype of clock_gettime() - * - * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); - * - */ -V_FUNCTION_BEGIN(__kernel_clock_gettime) - cvdso_call __c_kernel_clock_gettime -V_FUNCTION_END(__kernel_clock_gettime) - - -/* - * Exact prototype of clock_getres() - * - * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); - * - */ -V_FUNCTION_BEGIN(__kernel_clock_getres) - cvdso_call __c_kernel_clock_getres -V_FUNCTION_END(__kernel_clock_getres) - -/* - * Exact prototype of time() - * - * time_t time(time *t); - * - */ -V_FUNCTION_BEGIN(__kernel_time) - cvdso_call_time __c_kernel_time -V_FUNCTION_END(__kernel_time) diff --git a/arch/powerpc/kernel/vdso64/note.S b/arch/powerpc/kernel/vdso64/note.S deleted file mode 100644 index dc2a509f7e8a..000000000000 --- a/arch/powerpc/kernel/vdso64/note.S +++ /dev/null @@ -1 +0,0 @@ -#include "../vdso32/note.S" diff --git a/arch/powerpc/kernel/vdso64/vgettimeofday.c b/arch/powerpc/kernel/vdso64/vgettimeofday.c deleted file mode 100644 index 5b5500058344..000000000000 --- a/arch/powerpc/kernel/vdso64/vgettimeofday.c +++ /dev/null @@ -1,29 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Powerpc userspace implementations of gettimeofday() and similar. - */ -#include -#include - -int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts, - const struct vdso_data *vd) -{ - return __cvdso_clock_gettime_data(vd, clock, ts); -} - -int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz, - const struct vdso_data *vd) -{ - return __cvdso_gettimeofday_data(vd, tv, tz); -} - -int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res, - const struct vdso_data *vd) -{ - return __cvdso_clock_getres_data(vd, clock_id, res); -} - -__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_data *vd) -{ - return __cvdso_time_data(vd, time); -} diff --git a/arch/powerpc/kernel/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64_wrapper.S index 1d56d81fe3b3..839d1a61411d 100644 --- a/arch/powerpc/kernel/vdso64_wrapper.S +++ b/arch/powerpc/kernel/vdso64_wrapper.S @@ -7,7 +7,7 @@ .globl vdso64_start, vdso64_end .balign PAGE_SIZE vdso64_start: - .incbin "arch/powerpc/kernel/vdso64/vdso64.so.dbg" + .incbin "arch/powerpc/kernel/vdso/vdso64.so.dbg" .balign PAGE_SIZE vdso64_end: From 9b97bea90072a075363a200dd7b54ad4a24e9491 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 21 Jan 2022 16:30:30 +0000 Subject: [PATCH 087/380] powerpc/vdso: Remove cvdso_call_time macro cvdso_call_time macro is very similar to cvdso_call macro. Add a call_time argument to cvdso_call which is 0 by default and set to 1 when using cvdso_call to call __c_kernel_time(). Return returned value as is with CR[SO] cleared when it is used for time(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/837a260ad86fc1ce297a562c2117fd69be5f7b5c.1642782130.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/vdso/gettimeofday.h | 37 ++++++-------------- arch/powerpc/kernel/vdso/gettimeofday.S | 2 +- 2 files changed, 11 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h index 1faff0be1111..df00e91c9a90 100644 --- a/arch/powerpc/include/asm/vdso/gettimeofday.h +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -9,12 +9,12 @@ #include /* - * The macros sets two stack frames, one for the caller and one for the callee + * The macro sets two stack frames, one for the caller and one for the callee * because there are no requirement for the caller to set a stack frame when * calling VDSO so it may have omitted to set one, especially on PPC64 */ -.macro cvdso_call funct +.macro cvdso_call funct call_time=0 .cfi_startproc PPC_STLU r1, -PPC_MIN_STKFRM(r1) mflr r0 @@ -25,45 +25,28 @@ PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) #endif get_datapage r5 + .ifeq \call_time addi r5, r5, VDSO_DATA_OFFSET + .else + addi r4, r5, VDSO_DATA_OFFSET + .endif bl DOTSYM(\funct) PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) #ifdef __powerpc64__ PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) #endif + .ifeq \call_time cmpwi r3, 0 + .endif mtlr r0 .cfi_restore lr addi r1, r1, 2 * PPC_MIN_STKFRM crclr so + .ifeq \call_time beqlr+ crset so neg r3, r3 - blr - .cfi_endproc -.endm - -.macro cvdso_call_time funct - .cfi_startproc - PPC_STLU r1, -PPC_MIN_STKFRM(r1) - mflr r0 - .cfi_register lr, r0 - PPC_STLU r1, -PPC_MIN_STKFRM(r1) - PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) -#ifdef __powerpc64__ - PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) -#endif - get_datapage r4 - addi r4, r4, VDSO_DATA_OFFSET - bl DOTSYM(\funct) - PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) -#ifdef __powerpc64__ - PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) -#endif - crclr so - mtlr r0 - .cfi_restore lr - addi r1, r1, 2 * PPC_MIN_STKFRM + .endif blr .cfi_endproc .endm diff --git a/arch/powerpc/kernel/vdso/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S index c875312274aa..397f290015bc 100644 --- a/arch/powerpc/kernel/vdso/gettimeofday.S +++ b/arch/powerpc/kernel/vdso/gettimeofday.S @@ -65,7 +65,7 @@ V_FUNCTION_END(__kernel_clock_getres) * */ V_FUNCTION_BEGIN(__kernel_time) - cvdso_call_time __c_kernel_time + cvdso_call __c_kernel_time call_time=1 V_FUNCTION_END(__kernel_time) /* Routines for restoring integer registers, called by the compiler. */ From 692b21d78046851e75dc25bba773189c670b49c2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 21 Jan 2022 16:30:34 +0000 Subject: [PATCH 088/380] powerpc/vdso: Move cvdso_call macro into gettimeofday.S Now that gettimeofday.S is unique, move cvdso_call macro into that file which is the only user. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/72720359d4c58e3a3b96dd74952741225faac3de.1642782130.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/vdso/gettimeofday.h | 52 +------------------- arch/powerpc/kernel/vdso/gettimeofday.S | 44 ++++++++++++++++- 2 files changed, 45 insertions(+), 51 deletions(-) diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h index df00e91c9a90..f0a4cf01e85c 100644 --- a/arch/powerpc/include/asm/vdso/gettimeofday.h +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -2,57 +2,9 @@ #ifndef _ASM_POWERPC_VDSO_GETTIMEOFDAY_H #define _ASM_POWERPC_VDSO_GETTIMEOFDAY_H +#ifndef __ASSEMBLY__ + #include - -#ifdef __ASSEMBLY__ - -#include - -/* - * The macro sets two stack frames, one for the caller and one for the callee - * because there are no requirement for the caller to set a stack frame when - * calling VDSO so it may have omitted to set one, especially on PPC64 - */ - -.macro cvdso_call funct call_time=0 - .cfi_startproc - PPC_STLU r1, -PPC_MIN_STKFRM(r1) - mflr r0 - .cfi_register lr, r0 - PPC_STLU r1, -PPC_MIN_STKFRM(r1) - PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) -#ifdef __powerpc64__ - PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) -#endif - get_datapage r5 - .ifeq \call_time - addi r5, r5, VDSO_DATA_OFFSET - .else - addi r4, r5, VDSO_DATA_OFFSET - .endif - bl DOTSYM(\funct) - PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) -#ifdef __powerpc64__ - PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) -#endif - .ifeq \call_time - cmpwi r3, 0 - .endif - mtlr r0 - .cfi_restore lr - addi r1, r1, 2 * PPC_MIN_STKFRM - crclr so - .ifeq \call_time - beqlr+ - crset so - neg r3, r3 - .endif - blr - .cfi_endproc -.endm - -#else - #include #include #include diff --git a/arch/powerpc/kernel/vdso/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S index 397f290015bc..eb9c81e1c218 100644 --- a/arch/powerpc/kernel/vdso/gettimeofday.S +++ b/arch/powerpc/kernel/vdso/gettimeofday.S @@ -12,7 +12,49 @@ #include #include #include -#include + +/* + * The macro sets two stack frames, one for the caller and one for the callee + * because there are no requirement for the caller to set a stack frame when + * calling VDSO so it may have omitted to set one, especially on PPC64 + */ + +.macro cvdso_call funct call_time=0 + .cfi_startproc + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + mflr r0 + .cfi_register lr, r0 + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) +#ifdef __powerpc64__ + PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) +#endif + get_datapage r5 + .ifeq \call_time + addi r5, r5, VDSO_DATA_OFFSET + .else + addi r4, r5, VDSO_DATA_OFFSET + .endif + bl DOTSYM(\funct) + PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) +#ifdef __powerpc64__ + PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) +#endif + .ifeq \call_time + cmpwi r3, 0 + .endif + mtlr r0 + .cfi_restore lr + addi r1, r1, 2 * PPC_MIN_STKFRM + crclr so + .ifeq \call_time + beqlr+ + crset so + neg r3, r3 + .endif + blr + .cfi_endproc +.endm .text /* From 92e6dc257bd5771cf8db662e4d371fdb58fcbf43 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 7 Feb 2022 16:12:47 -0600 Subject: [PATCH 089/380] powerpc/pseries: make pseries_devicetree_update() static pseries_devicetree_update() has only one call site, in the same file in which it is defined. Make it static. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220207221247.354454-1-nathanl@linux.ibm.com --- arch/powerpc/include/asm/rtas.h | 1 - arch/powerpc/platforms/pseries/mobility.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 82e5b055fa2a..00531af17ce0 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -274,7 +274,6 @@ extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal); #ifdef CONFIG_PPC_PSERIES extern time64_t last_rtas_event; extern int clobbering_unread_rtas_event(void); -extern int pseries_devicetree_update(s32 scope); extern void post_mobility_fixup(void); int rtas_syscall_dispatch_ibm_suspend_me(u64 handle); #else diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 85033f392c78..94077fa91959 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -265,7 +265,7 @@ static int add_dt_node(struct device_node *parent_dn, __be32 drc_index) return rc; } -int pseries_devicetree_update(s32 scope) +static int pseries_devicetree_update(s32 scope) { char *rtas_buf; __be32 *data; From 2504e5b9827f7fc76ed0e4593adc852ac7a19851 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Mon, 13 Sep 2021 11:47:20 +0530 Subject: [PATCH 090/380] selftests/powerpc/copyloops: Add memmove_64 test While debugging an issue, we wanted to check whether the arch specific kernel memmove implementation is correct. This selftest could help test that. Suggested-by: Aneesh Kumar K.V Suggested-by: Vaibhav Jain Signed-off-by: Ritesh Harjani Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/57242c1fe7aba6b7f0fcd0490303bfd5f222ee00.1631512686.git.riteshh@linux.ibm.com --- .../selftests/powerpc/copyloops/.gitignore | 1 + .../selftests/powerpc/copyloops/Makefile | 9 ++- .../selftests/powerpc/copyloops/asm/ppc_asm.h | 1 + .../selftests/powerpc/copyloops/mem_64.S | 1 + .../powerpc/copyloops/memcpy_stubs.S | 8 +++ .../powerpc/copyloops/memmove_validate.c | 58 +++++++++++++++++++ 6 files changed, 77 insertions(+), 1 deletion(-) create mode 120000 tools/testing/selftests/powerpc/copyloops/mem_64.S create mode 100644 tools/testing/selftests/powerpc/copyloops/memcpy_stubs.S create mode 100644 tools/testing/selftests/powerpc/copyloops/memmove_validate.c diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore index 994b11af765c..7283e8b07b75 100644 --- a/tools/testing/selftests/powerpc/copyloops/.gitignore +++ b/tools/testing/selftests/powerpc/copyloops/.gitignore @@ -13,3 +13,4 @@ copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 copy_mc_64 +memmove_64 diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile index 3095b1f1c02b..77594e697f2f 100644 --- a/tools/testing/selftests/powerpc/copyloops/Makefile +++ b/tools/testing/selftests/powerpc/copyloops/Makefile @@ -13,7 +13,8 @@ TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \ copyuser_p7_t0 copyuser_p7_t1 \ memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \ memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \ - copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 + copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 \ + memmove_64 EXTRA_SOURCES := validate.c ../harness.c stubs.S @@ -56,3 +57,9 @@ $(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \ -D COPY_LOOP=test___copy_tofrom_user_base \ -D SELFTEST_CASE=$(subst copyuser_64_exc_t,,$(notdir $@)) \ -o $@ $^ + +$(OUTPUT)/memmove_64: mem_64.S memcpy_64.S memmove_validate.c ../harness.c \ + memcpy_stubs.S + $(CC) $(CPPFLAGS) $(CFLAGS) \ + -D TEST_MEMMOVE=test_memmove \ + -o $@ $^ diff --git a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h index 58c1cef3e399..003e1b3d9300 100644 --- a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h +++ b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h @@ -26,6 +26,7 @@ #define _GLOBAL(A) FUNC_START(test_ ## A) #define _GLOBAL_TOC(A) _GLOBAL(A) #define _GLOBAL_TOC_KASAN(A) _GLOBAL(A) +#define _GLOBAL_KASAN(A) _GLOBAL(A) #define PPC_MTOCRF(A, B) mtocrf A, B diff --git a/tools/testing/selftests/powerpc/copyloops/mem_64.S b/tools/testing/selftests/powerpc/copyloops/mem_64.S new file mode 120000 index 000000000000..db254c9a5f5c --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/mem_64.S @@ -0,0 +1 @@ +../../../../../arch/powerpc/lib/mem_64.S \ No newline at end of file diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_stubs.S b/tools/testing/selftests/powerpc/copyloops/memcpy_stubs.S new file mode 100644 index 000000000000..d9baa832fa49 --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/memcpy_stubs.S @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include + +FUNC_START(memcpy) + b test_memcpy + +FUNC_START(backwards_memcpy) + b test_backwards_memcpy diff --git a/tools/testing/selftests/powerpc/copyloops/memmove_validate.c b/tools/testing/selftests/powerpc/copyloops/memmove_validate.c new file mode 100644 index 000000000000..1a23218b5757 --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/memmove_validate.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include "utils.h" + +void *TEST_MEMMOVE(const void *s1, const void *s2, size_t n); + +#define BUF_LEN 65536 +#define MAX_OFFSET 512 + +size_t max(size_t a, size_t b) +{ + if (a >= b) + return a; + return b; +} + +static int testcase_run(void) +{ + size_t i, src_off, dst_off, len; + + char *usermap = memalign(BUF_LEN, BUF_LEN); + char *kernelmap = memalign(BUF_LEN, BUF_LEN); + + assert(usermap != NULL); + assert(kernelmap != NULL); + + memset(usermap, 0, BUF_LEN); + memset(kernelmap, 0, BUF_LEN); + + for (i = 0; i < BUF_LEN; i++) { + usermap[i] = i & 0xff; + kernelmap[i] = i & 0xff; + } + + for (src_off = 0; src_off < MAX_OFFSET; src_off++) { + for (dst_off = 0; dst_off < MAX_OFFSET; dst_off++) { + for (len = 1; len < MAX_OFFSET - max(src_off, dst_off); len++) { + + memmove(usermap + dst_off, usermap + src_off, len); + TEST_MEMMOVE(kernelmap + dst_off, kernelmap + src_off, len); + if (memcmp(usermap, kernelmap, MAX_OFFSET) != 0) { + printf("memmove failed at %ld %ld %ld\n", + src_off, dst_off, len); + abort(); + } + } + } + } + return 0; +} + +int main(void) +{ + return test_harness(testcase_run, "memmove"); +} From 2354ad252b66695be02f4acd18e37bf6264f0464 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 11 Feb 2022 12:22:15 +0530 Subject: [PATCH 091/380] powerpc/mm: Update default hugetlb size early commit: d9c234005227 ("Do not depend on MAX_ORDER when grouping pages by mobility") introduced pageblock_order which will be used to group pages better. The kernel now groups pages based on the value of HPAGE_SHIFT. Hence HPAGE_SHIFT should be set before we call set_pageblock_order. set_pageblock_order happens early in the boot and default hugetlb page size should be initialized before that to compute the right pageblock_order value. Currently, default hugetlbe page size is set via arch_initcalls which happens late in the boot as shown via the below callstack: [c000000007383b10] [c000000001289328] hugetlbpage_init+0x2b8/0x2f8 [c000000007383bc0] [c0000000012749e4] do_one_initcall+0x14c/0x320 [c000000007383c90] [c00000000127505c] kernel_init_freeable+0x410/0x4e8 [c000000007383da0] [c000000000012664] kernel_init+0x30/0x15c [c000000007383e10] [c00000000000cf14] ret_from_kernel_thread+0x5c/0x64 and the pageblock_order initialization is done early during the boot. [c0000000018bfc80] [c0000000012ae120] set_pageblock_order+0x50/0x64 [c0000000018bfca0] [c0000000012b3d94] sparse_init+0x188/0x268 [c0000000018bfd60] [c000000001288bfc] initmem_init+0x28c/0x328 [c0000000018bfe50] [c00000000127b370] setup_arch+0x410/0x480 [c0000000018bfed0] [c00000000127401c] start_kernel+0xb8/0x934 [c0000000018bff90] [c00000000000d984] start_here_common+0x1c/0x98 delaying default hugetlb page size initialization implies the kernel will initialize pageblock_order to (MAX_ORDER - 1) which is not an optimal value for mobility grouping. IIUC we always had this issue. But it was not a problem for hash translation mode because (MAX_ORDER - 1) is the same as HUGETLB_PAGE_ORDER (8) in the case of hash (16MB). With radix, HUGETLB_PAGE_ORDER will be 5 (2M size) and hence pageblock_order should be 5 instead of 8. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220211065215.101767-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/hugetlb.h | 5 ++++- arch/powerpc/mm/book3s64/hugetlbpage.c | 2 +- arch/powerpc/mm/hugetlbpage.c | 5 +---- arch/powerpc/mm/init_64.c | 4 ++++ 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 962708fa1017..6a1a1ac5743b 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -15,7 +15,7 @@ extern bool hugetlb_disabled; -void __init hugetlbpage_init_default(void); +void __init hugetlbpage_init_defaultsize(void); int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len); @@ -76,6 +76,9 @@ static inline void __init gigantic_hugetlb_cma_reserve(void) { } +static inline void __init hugetlbpage_init_defaultsize(void) +{ +} #endif /* CONFIG_HUGETLB_PAGE */ #endif /* _ASM_POWERPC_HUGETLB_H */ diff --git a/arch/powerpc/mm/book3s64/hugetlbpage.c b/arch/powerpc/mm/book3s64/hugetlbpage.c index ea8f83afb0ae..3bc0eb21b2a0 100644 --- a/arch/powerpc/mm/book3s64/hugetlbpage.c +++ b/arch/powerpc/mm/book3s64/hugetlbpage.c @@ -150,7 +150,7 @@ void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr set_huge_pte_at(vma->vm_mm, addr, ptep, pte); } -void __init hugetlbpage_init_default(void) +void __init hugetlbpage_init_defaultsize(void) { /* Set default large page size. Currently, we pick 16M or 1M * depending on what is available diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index ddead41e2194..b642a5a8668f 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -664,10 +664,7 @@ static int __init hugetlbpage_init(void) configured = true; } - if (configured) { - if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE)) - hugetlbpage_init_default(); - } else + if (!configured) pr_info("Failed to initialize. Disabling HugeTLB"); return 0; diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 35f46bf54281..83c0ee9fbf05 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -59,6 +59,7 @@ #include #include #include +#include #include @@ -513,6 +514,9 @@ void __init mmu_early_init_devtree(void) } else hash__early_init_devtree(); + if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE)) + hugetlbpage_init_defaultsize(); + if (!(cur_cpu_spec->mmu_features & MMU_FTR_HPTE_TABLE) && !(cur_cpu_spec->mmu_features & MMU_FTR_TYPE_RADIX)) panic("kernel does not support any MMU type offered by platform"); From 5a72345e6a78120368fcc841b570331b6c5a50da Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 11 Feb 2022 17:32:37 +1100 Subject: [PATCH 092/380] powerpc: Fix STACKTRACE=n build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our skiroot_defconfig doesn't enable FTRACE, and so doesn't get STACKTRACE enabled either. That leads to a build failure since commit 1614b2b11fab ("arch: Make ARCH_STACKWALK independent of STACKTRACE") made stacktrace.c build even when STACKTRACE=n. arch/powerpc/kernel/stacktrace.c: In function ‘handle_backtrace_ipi’: arch/powerpc/kernel/stacktrace.c:171:2: error: implicit declaration of function ‘nmi_cpu_backtrace’ 171 | nmi_cpu_backtrace(regs); | ^~~~~~~~~~~~~~~~~ arch/powerpc/kernel/stacktrace.c: In function ‘arch_trigger_cpumask_backtrace’: arch/powerpc/kernel/stacktrace.c:226:2: error: implicit declaration of function ‘nmi_trigger_cpumask_backtrace’ 226 | nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This happens because our headers haven't defined arch_trigger_cpumask_backtrace, which causes lib/nmi_backtrace.c not to build nmi_cpu_backtrace(). The code in question doesn't actually depend on STACKTRACE=y, that was just added because arch_trigger_cpumask_backtrace() lived in stacktrace.c for convenience. So drop the dependency on CONFIG_STACKTRACE, that causes lib/nmi_backtrace.c to build nmi_cpu_backtrace() etc. and fixes the build. Fixes: 1614b2b11fab ("arch: Make ARCH_STACKWALK independent of STACKTRACE") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220212111349.2806972-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/nmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index 160abcb8e9fa..ea0e487f87b1 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -9,7 +9,7 @@ long soft_nmi_interrupt(struct pt_regs *regs); static inline void arch_touch_nmi_watchdog(void) {} #endif -#if defined(CONFIG_NMI_IPI) && defined(CONFIG_STACKTRACE) +#ifdef CONFIG_NMI_IPI extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self); #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace From 38a1756861b8fc2ea9afb93e231194c642a4e261 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 13 Feb 2022 10:02:41 +0100 Subject: [PATCH 093/380] powerpc: Don't allow the use of EMIT_BUG_ENTRY with BUGFLAG_WARNING Warnings in assembly must use EMIT_WARN_ENTRY in order to generate the necessary entry in exception table. Check in EMIT_BUG_ENTRY that flags don't include BUGFLAG_WARNING. This change avoids problems like the one fixed by commit fd1eaaaaa686 ("powerpc/64s: Use EMIT_WARN_ENTRY for SRR debug warnings"). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ddcb422102a37eb45f57694c7ef0ec6187964dff.1644742951.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/bug.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 02c08d1492f8..ecbae1832de3 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -11,7 +11,7 @@ #ifdef __ASSEMBLY__ #include #ifdef CONFIG_DEBUG_BUGVERBOSE -.macro EMIT_BUG_ENTRY addr,file,line,flags +.macro __EMIT_BUG_ENTRY addr,file,line,flags .section __bug_table,"aw" 5001: .4byte \addr - 5001b, 5002f - 5001b .short \line, \flags @@ -22,7 +22,7 @@ .previous .endm #else -.macro EMIT_BUG_ENTRY addr,file,line,flags +.macro __EMIT_BUG_ENTRY addr,file,line,flags .section __bug_table,"aw" 5001: .4byte \addr - 5001b .short \flags @@ -33,7 +33,14 @@ .macro EMIT_WARN_ENTRY addr,file,line,flags EX_TABLE(\addr,\addr+4) - EMIT_BUG_ENTRY \addr,\file,\line,\flags + __EMIT_BUG_ENTRY \addr,\file,\line,\flags +.endm + +.macro EMIT_BUG_ENTRY addr,file,line,flags + .if \flags & 1 /* BUGFLAG_WARNING */ + .err /* Use EMIT_WARN_ENTRY for warnings */ + .endif + __EMIT_BUG_ENTRY \addr,\file,\line,\flags .endm #else /* !__ASSEMBLY__ */ From f266c11bce79ea793a741e3da6dfe4d6007df8df Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 1 Feb 2022 19:11:40 +0100 Subject: [PATCH 094/380] iommu/vtd: Replace acpi_bus_get_device() Replace acpi_bus_get_device() that is going to be dropped with acpi_fetch_acpi_dev(). No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/1807113.tdWV9SEqCh@kreacher Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 915bff76fe96..6d10be50ec30 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -789,7 +789,8 @@ static int __init dmar_acpi_dev_scope_init(void) andd->device_name); continue; } - if (acpi_bus_get_device(h, &adev)) { + adev = acpi_fetch_acpi_dev(h); + if (!adev) { pr_err("Failed to get device for ACPI object %s\n", andd->device_name); continue; From 1fdbbfd5099f797a4dac05e7ef0192ba4a9c39b4 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Thu, 6 Jan 2022 10:43:02 +0800 Subject: [PATCH 095/380] iommu/ipmmu-vmsa: Check for error num after setting mask Because of the possible failure of the dma_supported(), the dma_set_mask_and_coherent() may return error num. Therefore, it should be better to check it and return the error if fails. Fixes: 1c894225bf5b ("iommu/ipmmu-vmsa: IPMMU device is 40-bit bus master") Signed-off-by: Jiasheng Jiang Reviewed-by: Nikita Yushchenko Link: https://lore.kernel.org/r/20220106024302.2574180-1-jiasheng@iscas.ac.cn Signed-off-by: Joerg Roedel --- drivers/iommu/ipmmu-vmsa.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index ca752bdc710f..61bd9a3004ed 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -1006,7 +1006,9 @@ static int ipmmu_probe(struct platform_device *pdev) bitmap_zero(mmu->ctx, IPMMU_CTX_MAX); mmu->features = of_device_get_match_data(&pdev->dev); memset(mmu->utlb_ctx, IPMMU_CTX_INVALID, mmu->features->num_utlbs); - dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); + if (ret) + return ret; /* Map I/O memory and request IRQ. */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); From da9f8386d6b7f2bb01098913c64b2b246f39cfce Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 8 Feb 2022 09:20:29 +0900 Subject: [PATCH 096/380] dt-bindings: iommu: renesas,ipmmu-vmsa: add r8a779f0 support Document the compatible values for the IPMMU-VMSA blocks in the Renesas R-Car S4-8 (R8A779F0) SoC and R-Car Gen4. Signed-off-by: Yoshihiro Shimoda Reviewed-by: Geert Uytterhoeven Acked-by: Rob Herring Link: https://lore.kernel.org/r/20220208002030.1319984-2-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Joerg Roedel --- .../devicetree/bindings/iommu/renesas,ipmmu-vmsa.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.yaml b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.yaml index ce0c715205c6..5159a87f3fa7 100644 --- a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.yaml +++ b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.yaml @@ -44,6 +44,10 @@ properties: - renesas,ipmmu-r8a77990 # R-Car E3 - renesas,ipmmu-r8a77995 # R-Car D3 - renesas,ipmmu-r8a779a0 # R-Car V3U + - items: + - enum: + - renesas,ipmmu-r8a779f0 # R-Car S4-8 + - const: renesas,rcar-gen4-ipmmu-vmsa # R-Car Gen4 reg: maxItems: 1 From ae684caf465b7d6a6bd917049ee3749ae4b19435 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 8 Feb 2022 09:20:30 +0900 Subject: [PATCH 097/380] iommu/ipmmu-vmsa: Add support for R-Car Gen4 Add support for R-Car Gen4 like r8a779f0 (R-Car S4-8). The IPMMU hardware design of r8a779f0 is the same as r8a779a0. So, rename "r8a779a0" to "rcar_gen4". Signed-off-by: Yoshihiro Shimoda Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20220208002030.1319984-3-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Joerg Roedel --- drivers/iommu/ipmmu-vmsa.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 61bd9a3004ed..4c3217b1a870 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -719,6 +719,7 @@ static int ipmmu_init_platform_device(struct device *dev, static const struct soc_device_attribute soc_needs_opt_in[] = { { .family = "R-Car Gen3", }, + { .family = "R-Car Gen4", }, { .family = "RZ/G2", }, { /* sentinel */ } }; @@ -743,7 +744,7 @@ static bool ipmmu_device_is_allowed(struct device *dev) unsigned int i; /* - * R-Car Gen3 and RZ/G2 use the allow list to opt-in devices. + * R-Car Gen3/4 and RZ/G2 use the allow list to opt-in devices. * For Other SoCs, this returns true anyway. */ if (!soc_device_match(soc_needs_opt_in)) @@ -926,7 +927,7 @@ static const struct ipmmu_features ipmmu_features_rcar_gen3 = { .utlb_offset_base = 0, }; -static const struct ipmmu_features ipmmu_features_r8a779a0 = { +static const struct ipmmu_features ipmmu_features_rcar_gen4 = { .use_ns_alias_offset = false, .has_cache_leaf_nodes = true, .number_of_contexts = 16, @@ -982,7 +983,10 @@ static const struct of_device_id ipmmu_of_ids[] = { .data = &ipmmu_features_rcar_gen3, }, { .compatible = "renesas,ipmmu-r8a779a0", - .data = &ipmmu_features_r8a779a0, + .data = &ipmmu_features_rcar_gen4, + }, { + .compatible = "renesas,rcar-gen4-ipmmu", + .data = &ipmmu_features_rcar_gen4, }, { /* Terminator */ }, From 114a6f5015df8eda0cf37302914cb15a3da3f086 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 10 Feb 2022 12:29:05 +0000 Subject: [PATCH 098/380] iommu: Remove trivial ops->capable implementations Implementing ops->capable to always return false is pointless since it's the default behaviour anyway. Clean up the unnecessary implementations. Signed-off-by: Robin Murphy Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/8413578c6f8a7cf75530b00cba8f10f5b88f8517.1644495614.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/msm_iommu.c | 6 ------ drivers/iommu/tegra-gart.c | 6 ------ drivers/iommu/tegra-smmu.c | 6 ------ 3 files changed, 18 deletions(-) diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 3a38352b603f..6a2e511edb85 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -558,11 +558,6 @@ fail: return ret; } -static bool msm_iommu_capable(enum iommu_cap cap) -{ - return false; -} - static void print_ctx_regs(void __iomem *base, int ctx) { unsigned int fsr = GET_FSR(base, ctx); @@ -672,7 +667,6 @@ fail: } static struct iommu_ops msm_iommu_ops = { - .capable = msm_iommu_capable, .domain_alloc = msm_iommu_domain_alloc, .domain_free = msm_iommu_domain_free, .attach_dev = msm_iommu_attach_dev, diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index 6a358f92c7e5..bbd287d19324 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -238,11 +238,6 @@ static phys_addr_t gart_iommu_iova_to_phys(struct iommu_domain *domain, return pte & GART_PAGE_MASK; } -static bool gart_iommu_capable(enum iommu_cap cap) -{ - return false; -} - static struct iommu_device *gart_iommu_probe_device(struct device *dev) { if (!dev_iommu_fwspec_get(dev)) @@ -276,7 +271,6 @@ static void gart_iommu_sync(struct iommu_domain *domain, } static const struct iommu_ops gart_iommu_ops = { - .capable = gart_iommu_capable, .domain_alloc = gart_iommu_domain_alloc, .domain_free = gart_iommu_domain_free, .attach_dev = gart_iommu_attach_dev, diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index e900e3c46903..43df44f918a1 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -272,11 +272,6 @@ static void tegra_smmu_free_asid(struct tegra_smmu *smmu, unsigned int id) clear_bit(id, smmu->asids); } -static bool tegra_smmu_capable(enum iommu_cap cap) -{ - return false; -} - static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type) { struct tegra_smmu_as *as; @@ -967,7 +962,6 @@ static int tegra_smmu_of_xlate(struct device *dev, } static const struct iommu_ops tegra_smmu_ops = { - .capable = tegra_smmu_capable, .domain_alloc = tegra_smmu_domain_alloc, .domain_free = tegra_smmu_domain_free, .attach_dev = tegra_smmu_attach_dev, From 6efd3b8356693898e87610e52bfecd81c70a419b Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 4 Feb 2022 20:16:41 +0000 Subject: [PATCH 099/380] iommu/rockchip: : Use standard driver registration It's been a long time since there was any reason to register IOMMU drivers early. Convert to the standard platform driver helper. CC: Heiko Stuebner Signed-off-by: Robin Murphy Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/c08d58bff340da6a829e76d66d2fa090a9718384.1644005728.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/rockchip-iommu.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 7f23ad61c094..204a93a72572 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -1407,9 +1407,4 @@ static struct platform_driver rk_iommu_driver = { .suppress_bind_attrs = true, }, }; - -static int __init rk_iommu_init(void) -{ - return platform_driver_register(&rk_iommu_driver); -} -subsys_initcall(rk_iommu_init); +builtin_platform_driver(rk_iommu_driver); From 6b813e0e48d77bc026aaea6683fc6e74f5695ab4 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 4 Feb 2022 20:16:40 +0000 Subject: [PATCH 100/380] iommu/msm: Use standard driver registration It's been a long time since there was any reason to register IOMMU drivers early. Convert to the standard platform driver helper. CC: Andy Gross CC: Bjorn Andersson Signed-off-by: Robin Murphy Link: https://lore.kernel.org/all/05ca5e1b29bdd350f4e20b9ceb031a2c281e23d2.1644005728.git.robin.murphy@arm.com/ Signed-off-by: Joerg Roedel --- drivers/iommu/msm_iommu.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 3a38352b603f..06bde6b66732 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -831,16 +831,4 @@ static struct platform_driver msm_iommu_driver = { .probe = msm_iommu_probe, .remove = msm_iommu_remove, }; - -static int __init msm_iommu_driver_init(void) -{ - int ret; - - ret = platform_driver_register(&msm_iommu_driver); - if (ret != 0) - pr_err("Failed to register IOMMU driver\n"); - - return ret; -} -subsys_initcall(msm_iommu_driver_init); - +builtin_platform_driver(msm_iommu_driver); From a063158b20afea3cf0dfebea4a17c4e985fb6017 Mon Sep 17 00:00:00 2001 From: David Heidelberg Date: Sun, 6 Feb 2022 21:29:45 +0100 Subject: [PATCH 101/380] iommu/msm: Simplify with dev_err_probe() Use the dev_err_probe() helper to simplify error handling during probe. This also handle scenario, when EDEFER is returned and useless error is printed. Fixes warnings as: msm_iommu 7500000.iommu: could not get smmu_pclk Signed-off-by: David Heidelberg Link: https://lore.kernel.org/r/20220206202945.465195-1-david@ixit.cz Signed-off-by: Joerg Roedel --- drivers/iommu/msm_iommu.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 06bde6b66732..e98de16c1a42 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -710,36 +710,32 @@ static int msm_iommu_probe(struct platform_device *pdev) INIT_LIST_HEAD(&iommu->ctx_list); iommu->pclk = devm_clk_get(iommu->dev, "smmu_pclk"); - if (IS_ERR(iommu->pclk)) { - dev_err(iommu->dev, "could not get smmu_pclk\n"); - return PTR_ERR(iommu->pclk); - } + if (IS_ERR(iommu->pclk)) + return dev_err_probe(iommu->dev, PTR_ERR(iommu->pclk), + "could not get smmu_pclk\n"); ret = clk_prepare(iommu->pclk); - if (ret) { - dev_err(iommu->dev, "could not prepare smmu_pclk\n"); - return ret; - } + if (ret) + return dev_err_probe(iommu->dev, ret, + "could not prepare smmu_pclk\n"); iommu->clk = devm_clk_get(iommu->dev, "iommu_clk"); if (IS_ERR(iommu->clk)) { - dev_err(iommu->dev, "could not get iommu_clk\n"); clk_unprepare(iommu->pclk); - return PTR_ERR(iommu->clk); + return dev_err_probe(iommu->dev, PTR_ERR(iommu->clk), + "could not get iommu_clk\n"); } ret = clk_prepare(iommu->clk); if (ret) { - dev_err(iommu->dev, "could not prepare iommu_clk\n"); clk_unprepare(iommu->pclk); - return ret; + return dev_err_probe(iommu->dev, ret, "could not prepare iommu_clk\n"); } r = platform_get_resource(pdev, IORESOURCE_MEM, 0); iommu->base = devm_ioremap_resource(iommu->dev, r); if (IS_ERR(iommu->base)) { - dev_err(iommu->dev, "could not get iommu base\n"); - ret = PTR_ERR(iommu->base); + ret = dev_err_probe(iommu->dev, PTR_ERR(iommu->base), "could not get iommu base\n"); goto fail; } ioaddr = r->start; From 32e92d9f6f876721cc4f565f8369d542b6266380 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 3 Feb 2022 17:59:20 +0800 Subject: [PATCH 102/380] iommu/iova: Separate out rcache init Currently the rcache structures are allocated for all IOVA domains, even if they do not use "fast" alloc+free interface. This is wasteful of memory. In addition, fails in init_iova_rcaches() are not handled safely, which is less than ideal. Make "fast" users call a separate rcache init explicitly, which includes error checking. Signed-off-by: John Garry Reviewed-by: Robin Murphy Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/1643882360-241739-1-git-send-email-john.garry@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 4 ++ drivers/iommu/iova.c | 73 ++++++++++++++++++++++------ drivers/vdpa/vdpa_user/iova_domain.c | 11 +++++ include/linux/iova.h | 15 ++---- 4 files changed, 77 insertions(+), 26 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index d85d54f2b549..b22034975301 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -525,6 +525,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, struct iommu_dma_cookie *cookie = domain->iova_cookie; unsigned long order, base_pfn; struct iova_domain *iovad; + int ret; if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE) return -EINVAL; @@ -559,6 +560,9 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, } init_iova_domain(iovad, 1UL << order, base_pfn); + ret = iova_domain_init_rcaches(iovad); + if (ret) + return ret; /* If the FQ fails we can simply fall back to strict mode */ if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain)) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index b28c9435b898..7e9c3a97c040 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -15,13 +15,14 @@ /* The anchor node sits above the top of the usable address space */ #define IOVA_ANCHOR ~0UL +#define IOVA_RANGE_CACHE_MAX_SIZE 6 /* log of max cached IOVA range size (in pages) */ + static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn, unsigned long size); static unsigned long iova_rcache_get(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn); -static void init_iova_rcaches(struct iova_domain *iovad); static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad); static void free_iova_rcaches(struct iova_domain *iovad); @@ -64,8 +65,6 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR; rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node); rb_insert_color(&iovad->anchor.node, &iovad->rbroot); - cpuhp_state_add_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD, &iovad->cpuhp_dead); - init_iova_rcaches(iovad); } EXPORT_SYMBOL_GPL(init_iova_domain); @@ -488,6 +487,13 @@ free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size) } EXPORT_SYMBOL_GPL(free_iova_fast); +static void iova_domain_free_rcaches(struct iova_domain *iovad) +{ + cpuhp_state_remove_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD, + &iovad->cpuhp_dead); + free_iova_rcaches(iovad); +} + /** * put_iova_domain - destroys the iova domain * @iovad: - iova domain in question. @@ -497,9 +503,9 @@ void put_iova_domain(struct iova_domain *iovad) { struct iova *iova, *tmp; - cpuhp_state_remove_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD, - &iovad->cpuhp_dead); - free_iova_rcaches(iovad); + if (iovad->rcaches) + iova_domain_free_rcaches(iovad); + rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node) free_iova_mem(iova); } @@ -608,6 +614,7 @@ EXPORT_SYMBOL_GPL(reserve_iova); */ #define IOVA_MAG_SIZE 128 +#define MAX_GLOBAL_MAGS 32 /* magazines per bin */ struct iova_magazine { unsigned long size; @@ -620,6 +627,13 @@ struct iova_cpu_rcache { struct iova_magazine *prev; }; +struct iova_rcache { + spinlock_t lock; + unsigned long depot_size; + struct iova_magazine *depot[MAX_GLOBAL_MAGS]; + struct iova_cpu_rcache __percpu *cpu_rcaches; +}; + static struct iova_magazine *iova_magazine_alloc(gfp_t flags) { return kzalloc(sizeof(struct iova_magazine), flags); @@ -693,28 +707,54 @@ static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn) mag->pfns[mag->size++] = pfn; } -static void init_iova_rcaches(struct iova_domain *iovad) +int iova_domain_init_rcaches(struct iova_domain *iovad) { - struct iova_cpu_rcache *cpu_rcache; - struct iova_rcache *rcache; unsigned int cpu; - int i; + int i, ret; + + iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_MAX_SIZE, + sizeof(struct iova_rcache), + GFP_KERNEL); + if (!iovad->rcaches) + return -ENOMEM; for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { + struct iova_cpu_rcache *cpu_rcache; + struct iova_rcache *rcache; + rcache = &iovad->rcaches[i]; spin_lock_init(&rcache->lock); rcache->depot_size = 0; - rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size()); - if (WARN_ON(!rcache->cpu_rcaches)) - continue; + rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), + cache_line_size()); + if (!rcache->cpu_rcaches) { + ret = -ENOMEM; + goto out_err; + } for_each_possible_cpu(cpu) { cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); + spin_lock_init(&cpu_rcache->lock); cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL); cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL); + if (!cpu_rcache->loaded || !cpu_rcache->prev) { + ret = -ENOMEM; + goto out_err; + } } } + + ret = cpuhp_state_add_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD, + &iovad->cpuhp_dead); + if (ret) + goto out_err; + return 0; + +out_err: + free_iova_rcaches(iovad); + return ret; } +EXPORT_SYMBOL_GPL(iova_domain_init_rcaches); /* * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and @@ -831,7 +871,7 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad, { unsigned int log_size = order_base_2(size); - if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) + if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE || !iovad->rcaches) return 0; return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size); @@ -849,6 +889,8 @@ static void free_iova_rcaches(struct iova_domain *iovad) for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { rcache = &iovad->rcaches[i]; + if (!rcache->cpu_rcaches) + break; for_each_possible_cpu(cpu) { cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); iova_magazine_free(cpu_rcache->loaded); @@ -858,6 +900,9 @@ static void free_iova_rcaches(struct iova_domain *iovad) for (j = 0; j < rcache->depot_size; ++j) iova_magazine_free(rcache->depot[j]); } + + kfree(iovad->rcaches); + iovad->rcaches = NULL; } /* diff --git a/drivers/vdpa/vdpa_user/iova_domain.c b/drivers/vdpa/vdpa_user/iova_domain.c index 2b1143f11d8f..22f7d43f8a68 100644 --- a/drivers/vdpa/vdpa_user/iova_domain.c +++ b/drivers/vdpa/vdpa_user/iova_domain.c @@ -480,6 +480,7 @@ vduse_domain_create(unsigned long iova_limit, size_t bounce_size) struct file *file; struct vduse_bounce_map *map; unsigned long pfn, bounce_pfns; + int ret; bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT; if (iova_limit <= bounce_size) @@ -513,10 +514,20 @@ vduse_domain_create(unsigned long iova_limit, size_t bounce_size) spin_lock_init(&domain->iotlb_lock); init_iova_domain(&domain->stream_iovad, PAGE_SIZE, IOVA_START_PFN); + ret = iova_domain_init_rcaches(&domain->stream_iovad); + if (ret) + goto err_iovad_stream; init_iova_domain(&domain->consistent_iovad, PAGE_SIZE, bounce_pfns); + ret = iova_domain_init_rcaches(&domain->consistent_iovad); + if (ret) + goto err_iovad_consistent; return domain; +err_iovad_consistent: + put_iova_domain(&domain->stream_iovad); +err_iovad_stream: + fput(file); err_file: vfree(domain->bounce_maps); err_map: diff --git a/include/linux/iova.h b/include/linux/iova.h index cea79cb9f26c..320a70e40233 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -21,18 +21,8 @@ struct iova { unsigned long pfn_lo; /* Lowest allocated pfn */ }; -struct iova_magazine; -struct iova_cpu_rcache; -#define IOVA_RANGE_CACHE_MAX_SIZE 6 /* log of max cached IOVA range size (in pages) */ -#define MAX_GLOBAL_MAGS 32 /* magazines per bin */ - -struct iova_rcache { - spinlock_t lock; - unsigned long depot_size; - struct iova_magazine *depot[MAX_GLOBAL_MAGS]; - struct iova_cpu_rcache __percpu *cpu_rcaches; -}; +struct iova_rcache; /* holds all the iova translations for a domain */ struct iova_domain { @@ -46,7 +36,7 @@ struct iova_domain { unsigned long max32_alloc_size; /* Size of last failed allocation */ struct iova anchor; /* rbtree lookup anchor */ - struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ + struct iova_rcache *rcaches; struct hlist_node cpuhp_dead; }; @@ -102,6 +92,7 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, unsigned long start_pfn); +int iova_domain_init_rcaches(struct iova_domain *iovad); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); #else From 67ff2f262619d7bb5101525672ead4f2a437a40f Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 26 Jan 2022 00:00:12 +0800 Subject: [PATCH 103/380] riscv: mm: init: mark satp_mode __ro_after_init satp_mode is never modified after init, so it can be marked as __ro_after_init. Signed-off-by: Jisheng Zhang Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index cf4d018b7d66..332a31978381 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -38,9 +38,9 @@ EXPORT_SYMBOL(kernel_map); #endif #ifdef CONFIG_64BIT -u64 satp_mode = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_48 : SATP_MODE_39; +u64 satp_mode __ro_after_init = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_48 : SATP_MODE_39; #else -u64 satp_mode = SATP_MODE_32; +u64 satp_mode __ro_after_init = SATP_MODE_32; #endif EXPORT_SYMBOL(satp_mode); From d10efa21a9374829a662ec1377237e85f430c024 Mon Sep 17 00:00:00 2001 From: Qinglin Pan Date: Thu, 27 Jan 2022 10:48:41 +0800 Subject: [PATCH 104/380] riscv: mm: Control p4d's folding by pgtable_l5_enabled To determine pgtable level at boot time, we can not use helper functions in include/asm-generic/pgtable-nop4d.h and must implement these functions. This patch uses pgtable_l5_enabled variable instead of including pgtable-nop4d.h to controle p4d's folding, and implements corresponding helper functions. Signed-off-by: Qinglin Pan Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgalloc.h | 49 +++++++++++++ arch/riscv/include/asm/pgtable-64.h | 106 +++++++++++++++++++++++++++- arch/riscv/include/asm/pgtable.h | 4 +- arch/riscv/mm/init.c | 2 + 4 files changed, 158 insertions(+), 3 deletions(-) diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index 11823004b87a..947f23d7b6af 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -59,6 +59,26 @@ static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d, } } +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) +{ + if (pgtable_l5_enabled) { + unsigned long pfn = virt_to_pfn(p4d); + + set_pgd(pgd, __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE)); + } +} + +static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd, + p4d_t *p4d) +{ + if (pgtable_l5_enabled) { + unsigned long pfn = virt_to_pfn(p4d); + + set_pgd_safe(pgd, + __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE)); + } +} + #define pud_alloc_one pud_alloc_one static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { @@ -76,6 +96,35 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) } #define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud) + +#define p4d_alloc_one p4d_alloc_one +static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + if (pgtable_l5_enabled) { + gfp_t gfp = GFP_PGTABLE_USER; + + if (mm == &init_mm) + gfp = GFP_PGTABLE_KERNEL; + return (p4d_t *)get_zeroed_page(gfp); + } + + return NULL; +} + +static inline void __p4d_free(struct mm_struct *mm, p4d_t *p4d) +{ + BUG_ON((unsigned long)p4d & (PAGE_SIZE-1)); + free_page((unsigned long)p4d); +} + +#define p4d_free p4d_free +static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) +{ + if (pgtable_l5_enabled) + __p4d_free(mm, p4d); +} + +#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d) #endif /* __PAGETABLE_PMD_FOLDED */ static inline pgd_t *pgd_alloc(struct mm_struct *mm) diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index bbbdd66e5e2f..7e246e9f8d70 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -9,16 +9,24 @@ #include extern bool pgtable_l4_enabled; +extern bool pgtable_l5_enabled; #define PGDIR_SHIFT_L3 30 #define PGDIR_SHIFT_L4 39 +#define PGDIR_SHIFT_L5 48 #define PGDIR_SIZE_L3 (_AC(1, UL) << PGDIR_SHIFT_L3) -#define PGDIR_SHIFT (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3) +#define PGDIR_SHIFT (pgtable_l5_enabled ? PGDIR_SHIFT_L5 : \ + (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3)) /* Size of region mapped by a page global directory */ #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) +/* p4d is folded into pgd in case of 4-level page table */ +#define P4D_SHIFT 39 +#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) +#define P4D_MASK (~(P4D_SIZE - 1)) + /* pud is folded into pgd in case of 3-level page table */ #define PUD_SHIFT 30 #define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) @@ -29,6 +37,15 @@ extern bool pgtable_l4_enabled; #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE - 1)) +/* Page 4th Directory entry */ +typedef struct { + unsigned long p4d; +} p4d_t; + +#define p4d_val(x) ((x).p4d) +#define __p4d(x) ((p4d_t) { (x) }) +#define PTRS_PER_P4D (PAGE_SIZE / sizeof(p4d_t)) + /* Page Upper Directory entry */ typedef struct { unsigned long pud; @@ -99,6 +116,15 @@ static inline struct page *pud_page(pud_t pud) return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT); } +#define mm_p4d_folded mm_p4d_folded +static inline bool mm_p4d_folded(struct mm_struct *mm) +{ + if (pgtable_l5_enabled) + return false; + + return true; +} + #define mm_pud_folded mm_pud_folded static inline bool mm_pud_folded(struct mm_struct *mm) { @@ -128,6 +154,9 @@ static inline unsigned long _pmd_pfn(pmd_t pmd) #define pud_ERROR(e) \ pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e)) +#define p4d_ERROR(e) \ + pr_err("%s:%d: bad p4d %016lx.\n", __FILE__, __LINE__, p4d_val(e)) + static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) { if (pgtable_l4_enabled) @@ -166,6 +195,16 @@ static inline void p4d_clear(p4d_t *p4d) set_p4d(p4d, __p4d(0)); } +static inline p4d_t pfn_p4d(unsigned long pfn, pgprot_t prot) +{ + return __p4d((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot)); +} + +static inline unsigned long _p4d_pfn(p4d_t p4d) +{ + return p4d_val(p4d) >> _PAGE_PFN_SHIFT; +} + static inline pud_t *p4d_pgtable(p4d_t p4d) { if (pgtable_l4_enabled) @@ -173,6 +212,7 @@ static inline pud_t *p4d_pgtable(p4d_t p4d) return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) }); } +#define p4d_page_vaddr(p4d) ((unsigned long)p4d_pgtable(p4d)) static inline struct page *p4d_page(p4d_t p4d) { @@ -190,4 +230,68 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) return (pud_t *)p4d; } +static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) +{ + if (pgtable_l5_enabled) + *pgdp = pgd; + else + set_p4d((p4d_t *)pgdp, (p4d_t){ pgd_val(pgd) }); +} + +static inline int pgd_none(pgd_t pgd) +{ + if (pgtable_l5_enabled) + return (pgd_val(pgd) == 0); + + return 0; +} + +static inline int pgd_present(pgd_t pgd) +{ + if (pgtable_l5_enabled) + return (pgd_val(pgd) & _PAGE_PRESENT); + + return 1; +} + +static inline int pgd_bad(pgd_t pgd) +{ + if (pgtable_l5_enabled) + return !pgd_present(pgd); + + return 0; +} + +static inline void pgd_clear(pgd_t *pgd) +{ + if (pgtable_l5_enabled) + set_pgd(pgd, __pgd(0)); +} + +static inline p4d_t *pgd_pgtable(pgd_t pgd) +{ + if (pgtable_l5_enabled) + return (p4d_t *)pfn_to_virt(pgd_val(pgd) >> _PAGE_PFN_SHIFT); + + return (p4d_t *)p4d_pgtable((p4d_t) { pgd_val(pgd) }); +} +#define pgd_page_vaddr(pgd) ((unsigned long)pgd_pgtable(pgd)) + +static inline struct page *pgd_page(pgd_t pgd) +{ + return pfn_to_page(pgd_val(pgd) >> _PAGE_PFN_SHIFT); +} +#define pgd_page(pgd) pgd_page(pgd) + +#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1)) + +#define p4d_offset p4d_offset +static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) +{ + if (pgtable_l5_enabled) + return pgd_pgtable(*pgd) + p4d_index(address); + + return (p4d_t *)pgd; +} + #endif /* _ASM_RISCV_PGTABLE_64_H */ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 7e949f25c933..58fece3133e9 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -62,7 +62,8 @@ * position vmemmap directly below the VMALLOC region. */ #ifdef CONFIG_64BIT -#define VA_BITS (pgtable_l4_enabled ? 48 : 39) +#define VA_BITS (pgtable_l5_enabled ? \ + 57 : (pgtable_l4_enabled ? 48 : 39)) #else #define VA_BITS 32 #endif @@ -102,7 +103,6 @@ #ifndef __ASSEMBLY__ -#include #include #include #include diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index cf4d018b7d66..9e4ed02e3d56 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -45,7 +45,9 @@ u64 satp_mode = SATP_MODE_32; EXPORT_SYMBOL(satp_mode); bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL); +bool pgtable_l5_enabled = false; EXPORT_SYMBOL(pgtable_l4_enabled); +EXPORT_SYMBOL(pgtable_l5_enabled); phys_addr_t phys_ram_base __ro_after_init; EXPORT_SYMBOL(phys_ram_base); From 677b9eb8810edc6c616a699018a83e24ed0cccab Mon Sep 17 00:00:00 2001 From: Qinglin Pan Date: Thu, 27 Jan 2022 10:48:42 +0800 Subject: [PATCH 105/380] riscv: mm: Prepare pt_ops helper functions for sv57 This patch prepare some pt_ops helper functions which will be used in creating sv57 mappings during boot time. Signed-off-by: Qinglin Pan Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/fixmap.h | 1 + arch/riscv/include/asm/pgtable.h | 2 + arch/riscv/mm/init.c | 131 +++++++++++++++++++++++++++---- 3 files changed, 119 insertions(+), 15 deletions(-) diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h index 58a718573ad6..3cfece8b6568 100644 --- a/arch/riscv/include/asm/fixmap.h +++ b/arch/riscv/include/asm/fixmap.h @@ -25,6 +25,7 @@ enum fixed_addresses { FIX_PTE, FIX_PMD, FIX_PUD, + FIX_P4D, FIX_TEXT_POKE1, FIX_TEXT_POKE0, FIX_EARLYCON_MEM_BASE, diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 58fece3133e9..351cabcd528e 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -133,6 +133,8 @@ struct pt_alloc_ops { phys_addr_t (*alloc_pmd)(uintptr_t va); pud_t *(*get_pud_virt)(phys_addr_t pa); phys_addr_t (*alloc_pud)(uintptr_t va); + p4d_t *(*get_p4d_virt)(phys_addr_t pa); + phys_addr_t (*alloc_p4d)(uintptr_t va); #endif }; diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 9e4ed02e3d56..425377c97c40 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -229,6 +229,7 @@ pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss; static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); +static p4d_t __maybe_unused early_dtb_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE); static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); @@ -319,6 +320,16 @@ static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); #define early_pmd ((pmd_t *)XIP_FIXUP(early_pmd)) #endif /* CONFIG_XIP_KERNEL */ +static p4d_t trampoline_p4d[PTRS_PER_P4D] __page_aligned_bss; +static p4d_t fixmap_p4d[PTRS_PER_P4D] __page_aligned_bss; +static p4d_t early_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); + +#ifdef CONFIG_XIP_KERNEL +#define trampoline_p4d ((p4d_t *)XIP_FIXUP(trampoline_p4d)) +#define fixmap_p4d ((p4d_t *)XIP_FIXUP(fixmap_p4d)) +#define early_p4d ((p4d_t *)XIP_FIXUP(early_p4d)) +#endif /* CONFIG_XIP_KERNEL */ + static pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss; static pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss; static pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE); @@ -433,6 +444,44 @@ static phys_addr_t alloc_pud_late(uintptr_t va) return __pa(vaddr); } +static p4d_t *__init get_p4d_virt_early(phys_addr_t pa) +{ + return (p4d_t *)((uintptr_t)pa); +} + +static p4d_t *__init get_p4d_virt_fixmap(phys_addr_t pa) +{ + clear_fixmap(FIX_P4D); + return (p4d_t *)set_fixmap_offset(FIX_P4D, pa); +} + +static p4d_t *__init get_p4d_virt_late(phys_addr_t pa) +{ + return (p4d_t *)__va(pa); +} + +static phys_addr_t __init alloc_p4d_early(uintptr_t va) +{ + /* Only one P4D is available for early mapping */ + BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT); + + return (uintptr_t)early_p4d; +} + +static phys_addr_t __init alloc_p4d_fixmap(uintptr_t va) +{ + return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); +} + +static phys_addr_t alloc_p4d_late(uintptr_t va) +{ + unsigned long vaddr; + + vaddr = __get_free_page(GFP_KERNEL); + BUG_ON(!vaddr); + return __pa(vaddr); +} + static void __init create_pud_mapping(pud_t *pudp, uintptr_t va, phys_addr_t pa, phys_addr_t sz, pgprot_t prot) @@ -460,21 +509,55 @@ static void __init create_pud_mapping(pud_t *pudp, create_pmd_mapping(nextp, va, pa, sz, prot); } -#define pgd_next_t pud_t -#define alloc_pgd_next(__va) (pgtable_l4_enabled ? \ - pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va)) -#define get_pgd_next_virt(__pa) (pgtable_l4_enabled ? \ - pt_ops.get_pud_virt(__pa) : (pgd_next_t *)pt_ops.get_pmd_virt(__pa)) +static void __init create_p4d_mapping(p4d_t *p4dp, + uintptr_t va, phys_addr_t pa, + phys_addr_t sz, pgprot_t prot) +{ + pud_t *nextp; + phys_addr_t next_phys; + uintptr_t p4d_index = p4d_index(va); + + if (sz == P4D_SIZE) { + if (p4d_val(p4dp[p4d_index]) == 0) + p4dp[p4d_index] = pfn_p4d(PFN_DOWN(pa), prot); + return; + } + + if (p4d_val(p4dp[p4d_index]) == 0) { + next_phys = pt_ops.alloc_pud(va); + p4dp[p4d_index] = pfn_p4d(PFN_DOWN(next_phys), PAGE_TABLE); + nextp = pt_ops.get_pud_virt(next_phys); + memset(nextp, 0, PAGE_SIZE); + } else { + next_phys = PFN_PHYS(_p4d_pfn(p4dp[p4d_index])); + nextp = pt_ops.get_pud_virt(next_phys); + } + + create_pud_mapping(nextp, va, pa, sz, prot); +} + +#define pgd_next_t p4d_t +#define alloc_pgd_next(__va) (pgtable_l5_enabled ? \ + pt_ops.alloc_p4d(__va) : (pgtable_l4_enabled ? \ + pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va))) +#define get_pgd_next_virt(__pa) (pgtable_l5_enabled ? \ + pt_ops.get_p4d_virt(__pa) : (pgd_next_t *)(pgtable_l4_enabled ? \ + pt_ops.get_pud_virt(__pa) : (pud_t *)pt_ops.get_pmd_virt(__pa))) #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ + (pgtable_l5_enabled ? \ + create_p4d_mapping(__nextp, __va, __pa, __sz, __prot) : \ (pgtable_l4_enabled ? \ - create_pud_mapping(__nextp, __va, __pa, __sz, __prot) : \ - create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot)) -#define fixmap_pgd_next (pgtable_l4_enabled ? \ - (uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd) -#define trampoline_pgd_next (pgtable_l4_enabled ? \ - (uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd) -#define early_dtb_pgd_next (pgtable_l4_enabled ? \ - (uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd) + create_pud_mapping((pud_t *)__nextp, __va, __pa, __sz, __prot) : \ + create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot))) +#define fixmap_pgd_next (pgtable_l5_enabled ? \ + (uintptr_t)fixmap_p4d : (pgtable_l4_enabled ? \ + (uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd)) +#define trampoline_pgd_next (pgtable_l5_enabled ? \ + (uintptr_t)trampoline_p4d : (pgtable_l4_enabled ? \ + (uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd)) +#define early_dtb_pgd_next (pgtable_l5_enabled ? \ + (uintptr_t)early_dtb_p4d : (pgtable_l4_enabled ? \ + (uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd)) #else #define pgd_next_t pte_t #define alloc_pgd_next(__va) pt_ops.alloc_pte(__va) @@ -483,6 +566,7 @@ static void __init create_pud_mapping(pud_t *pudp, create_pte_mapping(__nextp, __va, __pa, __sz, __prot) #define fixmap_pgd_next ((uintptr_t)fixmap_pte) #define early_dtb_pgd_next ((uintptr_t)early_dtb_pmd) +#define create_p4d_mapping(__pmdp, __va, __pa, __sz, __prot) #define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot) #define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot) #endif /* __PAGETABLE_PMD_FOLDED */ @@ -693,10 +777,13 @@ static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa) PGDIR_SIZE, IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL); - if (pgtable_l4_enabled) { + if (pgtable_l5_enabled) + create_p4d_mapping(early_dtb_p4d, DTB_EARLY_BASE_VA, + (uintptr_t)early_dtb_pud, P4D_SIZE, PAGE_TABLE); + + if (pgtable_l4_enabled) create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA, (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE); - } if (IS_ENABLED(CONFIG_64BIT)) { create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA, @@ -732,6 +819,8 @@ void __init pt_ops_set_early(void) pt_ops.get_pmd_virt = get_pmd_virt_early; pt_ops.alloc_pud = alloc_pud_early; pt_ops.get_pud_virt = get_pud_virt_early; + pt_ops.alloc_p4d = alloc_p4d_early; + pt_ops.get_p4d_virt = get_p4d_virt_early; #endif } @@ -752,6 +841,8 @@ void __init pt_ops_set_fixmap(void) pt_ops.get_pmd_virt = kernel_mapping_pa_to_va((uintptr_t)get_pmd_virt_fixmap); pt_ops.alloc_pud = kernel_mapping_pa_to_va((uintptr_t)alloc_pud_fixmap); pt_ops.get_pud_virt = kernel_mapping_pa_to_va((uintptr_t)get_pud_virt_fixmap); + pt_ops.alloc_p4d = kernel_mapping_pa_to_va((uintptr_t)alloc_p4d_fixmap); + pt_ops.get_p4d_virt = kernel_mapping_pa_to_va((uintptr_t)get_p4d_virt_fixmap); #endif } @@ -768,6 +859,8 @@ void __init pt_ops_set_late(void) pt_ops.get_pmd_virt = get_pmd_virt_late; pt_ops.alloc_pud = alloc_pud_late; pt_ops.get_pud_virt = get_pud_virt_late; + pt_ops.alloc_p4d = alloc_p4d_late; + pt_ops.get_p4d_virt = get_p4d_virt_late; #endif } @@ -828,6 +921,10 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE); #ifndef __PAGETABLE_PMD_FOLDED + /* Setup fixmap P4D and PUD */ + if (pgtable_l5_enabled) + create_p4d_mapping(fixmap_p4d, FIXADDR_START, + (uintptr_t)fixmap_pud, P4D_SIZE, PAGE_TABLE); /* Setup fixmap PUD and PMD */ if (pgtable_l4_enabled) create_pud_mapping(fixmap_pud, FIXADDR_START, @@ -837,6 +934,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) /* Setup trampoline PGD and PMD */ create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr, trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE); + if (pgtable_l5_enabled) + create_p4d_mapping(trampoline_p4d, kernel_map.virt_addr, + (uintptr_t)trampoline_pud, P4D_SIZE, PAGE_TABLE); if (pgtable_l4_enabled) create_pud_mapping(trampoline_pud, kernel_map.virt_addr, (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE); @@ -938,6 +1038,7 @@ static void __init setup_vm_final(void) clear_fixmap(FIX_PTE); clear_fixmap(FIX_PMD); clear_fixmap(FIX_PUD); + clear_fixmap(FIX_P4D); /* Move to swapper page table */ csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | satp_mode); From 011f09d1205285b215003a6c65408609ef78abac Mon Sep 17 00:00:00 2001 From: Qinglin Pan Date: Thu, 27 Jan 2022 10:48:43 +0800 Subject: [PATCH 106/380] riscv: mm: Set sv57 on defaultly This patch sets sv57 on defaultly if CONFIG_64BIT. And do fallback to try to set sv48 on boot time if sv57 is not supported in current hardware. Signed-off-by: Qinglin Pan Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 4 ++-- arch/riscv/include/asm/csr.h | 1 + arch/riscv/include/asm/page.h | 1 + arch/riscv/kernel/cpu.c | 4 +++- arch/riscv/mm/init.c | 35 +++++++++++++++++++++++++++-------- 5 files changed, 34 insertions(+), 11 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 5adcbd9b5e88..81682a152800 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -151,7 +151,7 @@ config PAGE_OFFSET hex default 0xC0000000 if 32BIT default 0x80000000 if 64BIT && !MMU - default 0xffffaf8000000000 if 64BIT + default 0xff60000000000000 if 64BIT config KASAN_SHADOW_OFFSET hex @@ -202,7 +202,7 @@ config FIX_EARLYCON_MEM config PGTABLE_LEVELS int - default 4 if 64BIT + default 5 if 64BIT default 2 config LOCKDEP_SUPPORT diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index ae711692eec9..299abdef0cd6 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -47,6 +47,7 @@ #define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL) #define SATP_MODE_39 _AC(0x8000000000000000, UL) #define SATP_MODE_48 _AC(0x9000000000000000, UL) +#define SATP_MODE_57 _AC(0xa000000000000000, UL) #define SATP_ASID_BITS 16 #define SATP_ASID_SHIFT 44 #define SATP_ASID_MASK _AC(0xFFFF, UL) diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 160e3a1e8f8b..618779409203 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -41,6 +41,7 @@ * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so * define the PAGE_OFFSET value for SV39. */ +#define PAGE_OFFSET_L4 _AC(0xffffaf8000000000, UL) #define PAGE_OFFSET_L3 _AC(0xffffffd800000000, UL) #else #define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index ad0a7e9f828b..3e7e35d50e69 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -79,7 +79,9 @@ static void print_mmu(struct seq_file *f) #if defined(CONFIG_32BIT) strncpy(sv_type, "sv32", 5); #elif defined(CONFIG_64BIT) - if (pgtable_l4_enabled) + if (pgtable_l5_enabled) + strncpy(sv_type, "sv57", 5); + else if (pgtable_l4_enabled) strncpy(sv_type, "sv48", 5); else strncpy(sv_type, "sv39", 5); diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 425377c97c40..df8ddde2e750 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -38,14 +38,14 @@ EXPORT_SYMBOL(kernel_map); #endif #ifdef CONFIG_64BIT -u64 satp_mode = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_48 : SATP_MODE_39; +u64 satp_mode = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_57 : SATP_MODE_39; #else u64 satp_mode = SATP_MODE_32; #endif EXPORT_SYMBOL(satp_mode); bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL); -bool pgtable_l5_enabled = false; +bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL); EXPORT_SYMBOL(pgtable_l4_enabled); EXPORT_SYMBOL(pgtable_l5_enabled); @@ -659,6 +659,13 @@ static __init pgprot_t pgprot_from_va(uintptr_t va) #endif /* CONFIG_STRICT_KERNEL_RWX */ #ifdef CONFIG_64BIT +static void __init disable_pgtable_l5(void) +{ + pgtable_l5_enabled = false; + kernel_map.page_offset = PAGE_OFFSET_L4; + satp_mode = SATP_MODE_48; +} + static void __init disable_pgtable_l4(void) { pgtable_l4_enabled = false; @@ -675,12 +682,12 @@ static void __init disable_pgtable_l4(void) static __init void set_satp_mode(void) { u64 identity_satp, hw_satp; - uintptr_t set_satp_mode_pmd; + uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK; + bool check_l4 = false; - set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK; - create_pgd_mapping(early_pg_dir, - set_satp_mode_pmd, (uintptr_t)early_pud, - PGDIR_SIZE, PAGE_TABLE); + create_p4d_mapping(early_p4d, + set_satp_mode_pmd, (uintptr_t)early_pud, + P4D_SIZE, PAGE_TABLE); create_pud_mapping(early_pud, set_satp_mode_pmd, (uintptr_t)early_pmd, PUD_SIZE, PAGE_TABLE); @@ -692,6 +699,11 @@ static __init void set_satp_mode(void) set_satp_mode_pmd + PMD_SIZE, set_satp_mode_pmd + PMD_SIZE, PMD_SIZE, PAGE_KERNEL_EXEC); +retry: + create_pgd_mapping(early_pg_dir, + set_satp_mode_pmd, + check_l4 ? (uintptr_t)early_pud : (uintptr_t)early_p4d, + PGDIR_SIZE, PAGE_TABLE); identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode; @@ -700,10 +712,17 @@ static __init void set_satp_mode(void) hw_satp = csr_swap(CSR_SATP, 0ULL); local_flush_tlb_all(); - if (hw_satp != identity_satp) + if (hw_satp != identity_satp) { + if (!check_l4) { + disable_pgtable_l5(); + check_l4 = true; + goto retry; + } disable_pgtable_l4(); + } memset(early_pg_dir, 0, PAGE_SIZE); + memset(early_p4d, 0, PAGE_SIZE); memset(early_pud, 0, PAGE_SIZE); memset(early_pmd, 0, PAGE_SIZE); } From 8fbdccd2b17335e1881a23865e98c63fcc345938 Mon Sep 17 00:00:00 2001 From: Qinglin Pan Date: Thu, 27 Jan 2022 10:48:44 +0800 Subject: [PATCH 107/380] riscv: mm: Support kasan for sv57 This patchset add kasan_populate and kasan_shallow_populate for sv57, and is tested on both qemu and unmatched with CONFIG_KASAN and CONFIG_KASAN_VMALLOC on. Signed-off-by: Qinglin Pan Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/kasan_init.c | 155 +++++++++++++++++++++++++++++++++---- 1 file changed, 141 insertions(+), 14 deletions(-) diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index f61f7ca6fe0f..3c99b24c9adf 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -111,6 +111,8 @@ static void __init kasan_populate_pud(pgd_t *pgd, * pt_ops facility. */ base_pud = pt_ops.get_pud_virt(pfn_to_phys(_pgd_pfn(*pgd))); + } else if (pgd_none(*pgd)) { + base_pud = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE); } else { base_pud = (pud_t *)pgd_page_vaddr(*pgd); if (base_pud == lm_alias(kasan_early_shadow_pud)) @@ -149,13 +151,72 @@ static void __init kasan_populate_pud(pgd_t *pgd, set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pud)), PAGE_TABLE)); } -#define kasan_early_shadow_pgd_next (pgtable_l4_enabled ? \ +static void __init kasan_populate_p4d(pgd_t *pgd, + unsigned long vaddr, unsigned long end, + bool early) +{ + phys_addr_t phys_addr; + p4d_t *p4dp, *base_p4d; + unsigned long next; + + if (early) { + /* + * We can't use pgd_page_vaddr here as it would return a linear + * mapping address but it is not mapped yet, but when populating + * early_pg_dir, we need the physical address and when populating + * swapper_pg_dir, we need the kernel virtual address so use + * pt_ops facility. + */ + base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(*pgd))); + } else { + base_p4d = (p4d_t *)pgd_page_vaddr(*pgd); + if (base_p4d == lm_alias(kasan_early_shadow_p4d)) + base_p4d = memblock_alloc(PTRS_PER_PUD * sizeof(p4d_t), PAGE_SIZE); + } + + p4dp = base_p4d + p4d_index(vaddr); + + do { + next = p4d_addr_end(vaddr, end); + + if (p4d_none(*p4dp) && IS_ALIGNED(vaddr, P4D_SIZE) && (next - vaddr) >= P4D_SIZE) { + if (early) { + phys_addr = __pa(((uintptr_t)kasan_early_shadow_pud)); + set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_TABLE)); + continue; + } else { + phys_addr = memblock_phys_alloc(P4D_SIZE, P4D_SIZE); + if (phys_addr) { + set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_KERNEL)); + continue; + } + } + } + + kasan_populate_pud((pgd_t *)p4dp, vaddr, next, early); + } while (p4dp++, vaddr = next, vaddr != end); + + /* + * Wait for the whole P4D to be populated before setting the P4D in + * the page table, otherwise, if we did set the P4D before populating + * it entirely, memblock could allocate a page at a physical address + * where KASAN is not populated yet and then we'd get a page fault. + */ + if (!early) + set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_p4d)), PAGE_TABLE)); +} + +#define kasan_early_shadow_pgd_next (pgtable_l5_enabled ? \ + (uintptr_t)kasan_early_shadow_p4d : \ + (pgtable_l4_enabled ? \ (uintptr_t)kasan_early_shadow_pud : \ - (uintptr_t)kasan_early_shadow_pmd) + (uintptr_t)kasan_early_shadow_pmd)) #define kasan_populate_pgd_next(pgdp, vaddr, next, early) \ + (pgtable_l5_enabled ? \ + kasan_populate_p4d(pgdp, vaddr, next, early) : \ (pgtable_l4_enabled ? \ kasan_populate_pud(pgdp, vaddr, next, early) : \ - kasan_populate_pmd((pud_t *)pgdp, vaddr, next)) + kasan_populate_pmd((pud_t *)pgdp, vaddr, next))) static void __init kasan_populate_pgd(pgd_t *pgdp, unsigned long vaddr, unsigned long end, @@ -219,6 +280,14 @@ asmlinkage void __init kasan_early_init(void) PAGE_TABLE)); } + if (pgtable_l5_enabled) { + for (i = 0; i < PTRS_PER_P4D; ++i) + set_p4d(kasan_early_shadow_p4d + i, + pfn_p4d(PFN_DOWN + (__pa(((uintptr_t)kasan_early_shadow_pud))), + PAGE_TABLE)); + } + kasan_populate_pgd(early_pg_dir + pgd_index(KASAN_SHADOW_START), KASAN_SHADOW_START, KASAN_SHADOW_END, true); @@ -244,9 +313,27 @@ static void __init kasan_populate(void *start, void *end) memset(start, KASAN_SHADOW_INIT, end - start); } +static void __init kasan_shallow_populate_pmd(pgd_t *pgdp, + unsigned long vaddr, unsigned long end) +{ + unsigned long next; + pmd_t *pmdp, *base_pmd; + bool is_kasan_pte; + + base_pmd = (pmd_t *)pgd_page_vaddr(*pgdp); + pmdp = base_pmd + pmd_index(vaddr); + + do { + next = pmd_addr_end(vaddr, end); + is_kasan_pte = (pmd_pgtable(*pmdp) == lm_alias(kasan_early_shadow_pte)); + + if (is_kasan_pte) + pmd_clear(pmdp); + } while (pmdp++, vaddr = next, vaddr != end); +} + static void __init kasan_shallow_populate_pud(pgd_t *pgdp, - unsigned long vaddr, unsigned long end, - bool kasan_populate) + unsigned long vaddr, unsigned long end) { unsigned long next; pud_t *pudp, *base_pud; @@ -256,21 +343,60 @@ static void __init kasan_shallow_populate_pud(pgd_t *pgdp, base_pud = (pud_t *)pgd_page_vaddr(*pgdp); pudp = base_pud + pud_index(vaddr); - if (kasan_populate) - memcpy(base_pud, (void *)kasan_early_shadow_pgd_next, - sizeof(pud_t) * PTRS_PER_PUD); - do { next = pud_addr_end(vaddr, end); is_kasan_pmd = (pud_pgtable(*pudp) == lm_alias(kasan_early_shadow_pmd)); - if (is_kasan_pmd) { - base_pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - set_pud(pudp, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE)); - } + if (!is_kasan_pmd) + continue; + + base_pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + set_pud(pudp, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE)); + + if (IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE) + continue; + + memcpy(base_pmd, (void *)kasan_early_shadow_pmd, PAGE_SIZE); + kasan_shallow_populate_pmd((pgd_t *)pudp, vaddr, next); } while (pudp++, vaddr = next, vaddr != end); } +static void __init kasan_shallow_populate_p4d(pgd_t *pgdp, + unsigned long vaddr, unsigned long end) +{ + unsigned long next; + p4d_t *p4dp, *base_p4d; + pud_t *base_pud; + bool is_kasan_pud; + + base_p4d = (p4d_t *)pgd_page_vaddr(*pgdp); + p4dp = base_p4d + p4d_index(vaddr); + + do { + next = p4d_addr_end(vaddr, end); + is_kasan_pud = (p4d_pgtable(*p4dp) == lm_alias(kasan_early_shadow_pud)); + + if (!is_kasan_pud) + continue; + + base_pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + set_p4d(p4dp, pfn_p4d(PFN_DOWN(__pa(base_pud)), PAGE_TABLE)); + + if (IS_ALIGNED(vaddr, P4D_SIZE) && (next - vaddr) >= P4D_SIZE) + continue; + + memcpy(base_pud, (void *)kasan_early_shadow_pud, PAGE_SIZE); + kasan_shallow_populate_pud((pgd_t *)p4dp, vaddr, next); + } while (p4dp++, vaddr = next, vaddr != end); +} + +#define kasan_shallow_populate_pgd_next(pgdp, vaddr, next) \ + (pgtable_l5_enabled ? \ + kasan_shallow_populate_p4d(pgdp, vaddr, next) : \ + (pgtable_l4_enabled ? \ + kasan_shallow_populate_pud(pgdp, vaddr, next) : \ + kasan_shallow_populate_pmd(pgdp, vaddr, next))) + static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long end) { unsigned long next; @@ -291,7 +417,8 @@ static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) continue; - kasan_shallow_populate_pud(pgd_k, vaddr, next, is_kasan_pgd_next); + memcpy(p, (void *)kasan_early_shadow_pgd_next, PAGE_SIZE); + kasan_shallow_populate_pgd_next(pgd_k, vaddr, next); } while (pgd_k++, vaddr = next, vaddr != end); } From cb7356986db020c96f37532042fdae6706e81df7 Mon Sep 17 00:00:00 2001 From: Paul Menzel Date: Mon, 14 Feb 2022 07:55:43 +0100 Subject: [PATCH 108/380] powerpc/boot: Add `otheros-too-big.bld` to .gitignore Currently, `git status` lists the file as untracked by git, so tell git to ignore it. Fixes: aa3bc365ee73 ("powerpc/ps3: Add check for otheros image size") Signed-off-by: Paul Menzel Acked-by: Geoff Levand Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220214065543.198992-1-pmenzel@molgen.mpg.de --- arch/powerpc/boot/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore index 1eee61b82341..a4716d138cfc 100644 --- a/arch/powerpc/boot/.gitignore +++ b/arch/powerpc/boot/.gitignore @@ -16,6 +16,7 @@ kernel-vmlinux.strip.c kernel-vmlinux.strip.gz mktree otheros.bld +otheros-too-big.bld uImage cuImage.* dtbImage.* From 8ddf4eff71e12e4295b54ac8b02439ad22271fd2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 9 Feb 2022 20:47:58 +0200 Subject: [PATCH 109/380] perf/smmuv3: Don't cast parameter in bit operations While in this particular case it would not be a (critical) issue, the pattern itself is bad and error prone in case somebody blindly copies to their code. Don't cast parameter to unsigned long pointer in the bit operations. Instead copy to a local variable on stack of a proper type and use. Note, new compilers might warn on this line for potential outbound access. Signed-off-by: Andy Shevchenko Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20220209184758.56578-1-andriy.shevchenko@linux.intel.com Signed-off-by: Will Deacon --- drivers/perf/arm_smmuv3_pmu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c index c49108a72865..00d4c45a8017 100644 --- a/drivers/perf/arm_smmuv3_pmu.c +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -654,6 +654,7 @@ static int smmu_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) static irqreturn_t smmu_pmu_handle_irq(int irq_num, void *data) { struct smmu_pmu *smmu_pmu = data; + DECLARE_BITMAP(ovs, BITS_PER_TYPE(u64)); u64 ovsr; unsigned int idx; @@ -663,7 +664,8 @@ static irqreturn_t smmu_pmu_handle_irq(int irq_num, void *data) writeq(ovsr, smmu_pmu->reloc_base + SMMU_PMCG_OVSCLR0); - for_each_set_bit(idx, (unsigned long *)&ovsr, smmu_pmu->num_counters) { + bitmap_from_u64(ovs, ovsr); + for_each_set_bit(idx, ovs, smmu_pmu->num_counters) { struct perf_event *event = smmu_pmu->events[idx]; struct hw_perf_event *hwc; From 34d8dac807f0ee3dc42ab45bdb284a3caf2b5ed1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 15 Feb 2022 19:31:22 +0100 Subject: [PATCH 110/380] powerpc/ftrace: Also save r1 in ftrace_caller() Also save r1 in ftrace_caller() r1 is needed during unwinding when the function_graph tracer is active. Fixes: 830213786c49 ("powerpc/ftrace: directly call of function graph tracer by ftrace caller") Reported-by: Naveen N. Rao Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ff535e86d3a69376a6d89168511d4e403835f18b.1644949750.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/trace/ftrace_mprofile.S | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S index 56da60e98327..8443902d5a05 100644 --- a/arch/powerpc/kernel/trace/ftrace_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S @@ -173,6 +173,10 @@ _GLOBAL(ftrace_caller) beq ftrace_no_trace #endif + /* Save previous stack pointer (r1) */ + addi r8, r1, SWITCH_FRAME_SIZE + PPC_STL r8, GPR1(r1) + /* Get the _mcount() call site out of LR */ mflr r7 PPC_STL r7, _NIP(r1) From df45a55788286c541449d82ee09fef3ac5ff77a1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 15 Feb 2022 19:31:23 +0100 Subject: [PATCH 111/380] powerpc/ftrace: Add recursion protection in prepare_ftrace_return() The function_graph_enter() does not provide any recursion protection. Add a protection in prepare_ftrace_return() in case function_graph_enter() calls something that gets function graph traced. Fixes: 830213786c49 ("powerpc/ftrace: directly call of function graph tracer by ftrace caller") Reported-by: Naveen N. Rao Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/74edf2ff0a60e66b0d9225a137100a86a0557032.1644949750.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/trace/ftrace.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 74a176e394ef..f21b8fbd418e 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -944,6 +944,7 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp) { unsigned long return_hooker; + int bit; if (unlikely(ftrace_graph_is_dead())) goto out; @@ -951,10 +952,16 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, if (unlikely(atomic_read(¤t->tracing_graph_pause))) goto out; + bit = ftrace_test_recursion_trylock(ip, parent); + if (bit < 0) + goto out; + return_hooker = ppc_function_entry(return_to_handler); if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp)) parent = return_hooker; + + ftrace_test_recursion_unlock(bit); out: return parent; } From fc75f87337983229b7355d6b77f30fb6e7f359ee Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 15 Feb 2022 19:31:24 +0100 Subject: [PATCH 112/380] powerpc/ftrace: Have arch_ftrace_get_regs() return NULL unless FL_SAVE_REGS is set When FL_SAVE_REGS is not set we get here via ftrace_caller() which doesn't save all registers. ftrace_caller() explicitely clears regs.msr, so we can rely on it to know where we come from. We don't expect MSR register to be 0 at all when involving ftrace. Fixes: 40b035efe288 ("powerpc/ftrace: Implement CONFIG_DYNAMIC_FTRACE_WITH_ARGS") Reported-by: Naveen N. Rao Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2f9a7e898c93cc7438ef5ccd47cb9c3a9c5b53ef.1644949750.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ftrace.h | 3 ++- arch/powerpc/kernel/trace/ftrace_mprofile.S | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 70b457097098..ff034ae4e472 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -30,7 +30,8 @@ struct ftrace_regs { static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) { - return &fregs->regs; + /* We clear regs.msr in ftrace_call */ + return fregs->regs.msr ? &fregs->regs : NULL; } static __always_inline void ftrace_instruction_pointer_set(struct ftrace_regs *fregs, diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S index 8443902d5a05..eb077270ec2f 100644 --- a/arch/powerpc/kernel/trace/ftrace_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S @@ -205,6 +205,10 @@ _GLOBAL(ftrace_caller) PPC_STL r0, _LINK(r1) mr r4, r0 + /* Clear MSR to flag as ftrace_caller versus frace_regs_caller */ + li r8, 0 + PPC_STL r8, _MSR(r1) + /* Load &pt_regs in r6 for call below */ addi r6, r1 ,STACK_FRAME_OVERHEAD From 76b372814b088aeb76f0f753d968c8aa6d297f2a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 15 Feb 2022 19:31:25 +0100 Subject: [PATCH 113/380] powerpc/ftrace: Style cleanup in ftrace_mprofile.S Add some line breaks to better match the file's style, add some space after comma and fix a couple of misplaced blanks. Suggested-by: Naveen N. Rao Signed-off-by: Christophe Leroy Reviewed-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/973506292d0c7b05c06530c8e11803ce38e5eda2.1644949750.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/trace/ftrace_mprofile.S | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S index eb077270ec2f..89639e64acd1 100644 --- a/arch/powerpc/kernel/trace/ftrace_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S @@ -87,8 +87,9 @@ _GLOBAL(ftrace_regs_caller) #endif #ifdef CONFIG_LIVEPATCH_64 - mr r14,r7 /* remember old NIP */ + mr r14, r7 /* remember old NIP */ #endif + /* Calculate ip from nip-4 into r3 for call below */ subi r3, r7, MCOUNT_INSN_SIZE @@ -102,7 +103,7 @@ _GLOBAL(ftrace_regs_caller) PPC_STL r11, _CCR(r1) /* Load &pt_regs in r6 for call below */ - addi r6, r1 ,STACK_FRAME_OVERHEAD + addi r6, r1, STACK_FRAME_OVERHEAD /* ftrace_call(r3, r4, r5, r6) */ .globl ftrace_regs_call @@ -113,6 +114,7 @@ ftrace_regs_call: /* Load ctr with the possibly modified NIP */ PPC_LL r3, _NIP(r1) mtctr r3 + #ifdef CONFIG_LIVEPATCH_64 cmpd r14, r3 /* has NIP been altered? */ #endif @@ -196,7 +198,7 @@ _GLOBAL(ftrace_caller) #ifdef CONFIG_LIVEPATCH_64 SAVE_GPR(14, r1) - mr r14,r7 /* remember old NIP */ + mr r14, r7 /* remember old NIP */ #endif /* Calculate ip from nip-4 into r3 for call below */ subi r3, r7, MCOUNT_INSN_SIZE @@ -210,7 +212,7 @@ _GLOBAL(ftrace_caller) PPC_STL r8, _MSR(r1) /* Load &pt_regs in r6 for call below */ - addi r6, r1 ,STACK_FRAME_OVERHEAD + addi r6, r1, STACK_FRAME_OVERHEAD /* ftrace_call(r3, r4, r5, r6) */ .globl ftrace_call @@ -220,6 +222,7 @@ ftrace_call: PPC_LL r3, _NIP(r1) mtctr r3 + #ifdef CONFIG_LIVEPATCH_64 cmpd r14, r3 /* has NIP been altered? */ REST_GPR(14, r1) @@ -244,6 +247,7 @@ ftrace_call: /* Based on the cmpd above, if the NIP was altered handle livepatch */ bne- livepatch_handler #endif + bctr /* jump after _mcount site */ #ifdef CONFIG_LIVEPATCH_64 From bbbca72352bb9484bc057c91a408332b35ee8f4c Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Tue, 25 Jan 2022 01:52:04 +0530 Subject: [PATCH 114/380] powerpc/papr_scm: Implement initial support for injecting smart errors Presently PAPR doesn't support injecting smart errors on an NVDIMM. This makes testing the NVDIMM health reporting functionality difficult as simulating NVDIMM health related events need a hacked up qemu version. To solve this problem this patch proposes simulating certain set of NVDIMM health related events in papr_scm. Specifically 'fatal' health state and 'dirty' shutdown state. These error can be injected via the user-space 'ndctl-inject-smart(1)' command. With the proposed patch and corresponding ndctl patches following command flow is expected: $ sudo ndctl list -DH -d nmem0 ... "health_state":"ok", "shutdown_state":"clean", ... # inject unsafe shutdown and fatal health error $ sudo ndctl inject-smart nmem0 -Uf ... "health_state":"fatal", "shutdown_state":"dirty", ... # uninject all errors $ sudo ndctl inject-smart nmem0 -N ... "health_state":"ok", "shutdown_state":"clean", ... The patch adds a new member 'health_bitmap_inject_mask' inside struct papr_scm_priv which is then bitwise ANDed to the health bitmap fetched from the hypervisor. The value for 'health_bitmap_inject_mask' is accessible from sysfs at nmemX/papr/health_bitmap_inject. A new PDSM named 'SMART_INJECT' is proposed that accepts newly introduced 'struct nd_papr_pdsm_smart_inject' as payload thats exchanged between libndctl and papr_scm to indicate the requested smart-error states. When the processing the PDSM 'SMART_INJECT', papr_pdsm_smart_inject() constructs a pair or 'inject_mask' and 'clear_mask' bitmaps from the payload and bit-blt it to the 'health_bitmap_inject_mask'. This ensures the after being fetched from the hypervisor, the health_bitmap reflects requested smart-error states. Signed-off-by: Vaibhav Jain Signed-off-by: Shivaprasad G Bhat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220124202204.1488346-1-vaibhav@linux.ibm.com --- Documentation/ABI/testing/sysfs-bus-papr-pmem | 12 +++ arch/powerpc/include/uapi/asm/papr_pdsm.h | 18 ++++ arch/powerpc/platforms/pseries/papr_scm.c | 90 ++++++++++++++++++- 3 files changed, 117 insertions(+), 3 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-bus-papr-pmem b/Documentation/ABI/testing/sysfs-bus-papr-pmem index 95254cec92bf..4ac0673901e7 100644 --- a/Documentation/ABI/testing/sysfs-bus-papr-pmem +++ b/Documentation/ABI/testing/sysfs-bus-papr-pmem @@ -61,3 +61,15 @@ Description: * "CchRHCnt" : Cache Read Hit Count * "CchWHCnt" : Cache Write Hit Count * "FastWCnt" : Fast Write Count + +What: /sys/bus/nd/devices/nmemX/papr/health_bitmap_inject +Date: Jan, 2022 +KernelVersion: v5.17 +Contact: linuxppc-dev , nvdimm@lists.linux.dev, +Description: + (RO) Reports the health bitmap inject bitmap that is applied to + bitmap received from PowerVM via the H_SCM_HEALTH. This is used + to forcibly set specific bits returned from Hcall. These is then + used to simulate various health or shutdown states for an nvdimm + and are set by user-space tools like ndctl by issuing a PAPR DSM. + diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h index 82488b1e7276..17439925045c 100644 --- a/arch/powerpc/include/uapi/asm/papr_pdsm.h +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h @@ -116,6 +116,22 @@ struct nd_papr_pdsm_health { }; }; +/* Flags for injecting specific smart errors */ +#define PDSM_SMART_INJECT_HEALTH_FATAL (1 << 0) +#define PDSM_SMART_INJECT_BAD_SHUTDOWN (1 << 1) + +struct nd_papr_pdsm_smart_inject { + union { + struct { + /* One or more of PDSM_SMART_INJECT_ */ + __u32 flags; + __u8 fatal_enable; + __u8 unsafe_shutdown_enable; + }; + __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; + }; +}; + /* * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel * via 'nd_cmd_pkg.nd_command' member of the ioctl struct @@ -123,12 +139,14 @@ struct nd_papr_pdsm_health { enum papr_pdsm { PAPR_PDSM_MIN = 0x0, PAPR_PDSM_HEALTH, + PAPR_PDSM_SMART_INJECT, PAPR_PDSM_MAX, }; /* Maximal union that can hold all possible payload types */ union nd_pdsm_payload { struct nd_papr_pdsm_health health; + struct nd_papr_pdsm_smart_inject smart_inject; __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; } __packed; diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index f48e87ac89c9..20aafd387840 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -120,6 +120,10 @@ struct papr_scm_priv { /* length of the stat buffer as expected by phyp */ size_t stat_buffer_len; + + /* The bits which needs to be overridden */ + u64 health_bitmap_inject_mask; + }; static int papr_scm_pmem_flush(struct nd_region *nd_region, @@ -347,19 +351,29 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p, static int __drc_pmem_query_health(struct papr_scm_priv *p) { unsigned long ret[PLPAR_HCALL_BUFSIZE]; + u64 bitmap = 0; long rc; /* issue the hcall */ rc = plpar_hcall(H_SCM_HEALTH, ret, p->drc_index); - if (rc != H_SUCCESS) { + if (rc == H_SUCCESS) + bitmap = ret[0] & ret[1]; + else if (rc == H_FUNCTION) + dev_info_once(&p->pdev->dev, + "Hcall H_SCM_HEALTH not implemented, assuming empty health bitmap"); + else { + dev_err(&p->pdev->dev, "Failed to query health information, Err:%ld\n", rc); return -ENXIO; } p->lasthealth_jiffies = jiffies; - p->health_bitmap = ret[0] & ret[1]; - + /* Allow injecting specific health bits via inject mask. */ + if (p->health_bitmap_inject_mask) + bitmap = (bitmap & ~p->health_bitmap_inject_mask) | + p->health_bitmap_inject_mask; + WRITE_ONCE(p->health_bitmap, bitmap); dev_dbg(&p->pdev->dev, "Queried dimm health info. Bitmap:0x%016lx Mask:0x%016lx\n", ret[0], ret[1]); @@ -669,6 +683,56 @@ out: return rc; } +/* Inject a smart error Add the dirty-shutdown-counter value to the pdsm */ +static int papr_pdsm_smart_inject(struct papr_scm_priv *p, + union nd_pdsm_payload *payload) +{ + int rc; + u32 supported_flags = 0; + u64 inject_mask = 0, clear_mask = 0; + u64 mask; + + /* Check for individual smart error flags and update inject/clear masks */ + if (payload->smart_inject.flags & PDSM_SMART_INJECT_HEALTH_FATAL) { + supported_flags |= PDSM_SMART_INJECT_HEALTH_FATAL; + if (payload->smart_inject.fatal_enable) + inject_mask |= PAPR_PMEM_HEALTH_FATAL; + else + clear_mask |= PAPR_PMEM_HEALTH_FATAL; + } + + if (payload->smart_inject.flags & PDSM_SMART_INJECT_BAD_SHUTDOWN) { + supported_flags |= PDSM_SMART_INJECT_BAD_SHUTDOWN; + if (payload->smart_inject.unsafe_shutdown_enable) + inject_mask |= PAPR_PMEM_SHUTDOWN_DIRTY; + else + clear_mask |= PAPR_PMEM_SHUTDOWN_DIRTY; + } + + dev_dbg(&p->pdev->dev, "[Smart-inject] inject_mask=%#llx clear_mask=%#llx\n", + inject_mask, clear_mask); + + /* Prevent concurrent access to dimm health bitmap related members */ + rc = mutex_lock_interruptible(&p->health_mutex); + if (rc) + return rc; + + /* Use inject/clear masks to set health_bitmap_inject_mask */ + mask = READ_ONCE(p->health_bitmap_inject_mask); + mask = (mask & ~clear_mask) | inject_mask; + WRITE_ONCE(p->health_bitmap_inject_mask, mask); + + /* Invalidate cached health bitmap */ + p->lasthealth_jiffies = 0; + + mutex_unlock(&p->health_mutex); + + /* Return the supported flags back to userspace */ + payload->smart_inject.flags = supported_flags; + + return sizeof(struct nd_papr_pdsm_health); +} + /* * 'struct pdsm_cmd_desc' * Identifies supported PDSMs' expected length of in/out payloads @@ -702,6 +766,12 @@ static const struct pdsm_cmd_desc __pdsm_cmd_descriptors[] = { .size_out = sizeof(struct nd_papr_pdsm_health), .service = papr_pdsm_health, }, + + [PAPR_PDSM_SMART_INJECT] = { + .size_in = sizeof(struct nd_papr_pdsm_smart_inject), + .size_out = sizeof(struct nd_papr_pdsm_smart_inject), + .service = papr_pdsm_smart_inject, + }, /* Empty */ [PAPR_PDSM_MAX] = { .size_in = 0, @@ -838,6 +908,19 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, return 0; } +static ssize_t health_bitmap_inject_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct nvdimm *dimm = to_nvdimm(dev); + struct papr_scm_priv *p = nvdimm_provider_data(dimm); + + return sprintf(buf, "%#llx\n", + READ_ONCE(p->health_bitmap_inject_mask)); +} + +static DEVICE_ATTR_ADMIN_RO(health_bitmap_inject); + static ssize_t perf_stats_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -952,6 +1035,7 @@ static struct attribute *papr_nd_attributes[] = { &dev_attr_flags.attr, &dev_attr_perf_stats.attr, &dev_attr_dirty_shutdown.attr, + &dev_attr_health_bitmap_inject.attr, NULL, }; From 81df21de8fb45d3a55d41da9c7f5724797d51ce6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 15 Feb 2022 13:40:56 +0100 Subject: [PATCH 115/380] powerpc: Fix 'sparse' checking on PPC64le 'sparse' is architecture agnostic and knows nothing about ELF ABI version. Just like it gets arch and powerpc type and endian from Makefile, it also need to get _CALL_ELF from there, otherwise it won't set PPC64_ELF_ABI_v2 macro for PPC64le and won't check the correct code. Signed-off-by: Christophe Leroy Reviewed-by: Kees Cook Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ac1312f2451aa558bb2a8806b4d0aa2020f0c176.1644928018.git.christophe.leroy@csgroup.eu --- arch/powerpc/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 5f16ac1583c5..e499f6023783 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -213,7 +213,7 @@ CHECKFLAGS += -m$(BITS) -D__powerpc__ -D__powerpc$(BITS)__ ifdef CONFIG_CPU_BIG_ENDIAN CHECKFLAGS += -D__BIG_ENDIAN__ else -CHECKFLAGS += -D__LITTLE_ENDIAN__ +CHECKFLAGS += -D__LITTLE_ENDIAN__ -D_CALL_ELF=2 endif ifdef CONFIG_476FPE_ERR46 From 5b23cb8cc6b0aab0535253cc2aa362572bab7072 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 15 Feb 2022 13:40:57 +0100 Subject: [PATCH 116/380] powerpc: Move and rename func_descr_t There are three architectures with function descriptors, try to have common names for the address they contain in order to refactor some functions into generic functions later. powerpc has 'entry' ia64 has 'ip' parisc has 'addr' Vote for 'addr' and update 'func_descr_t' accordingly. Move it in asm/elf.h to have it at the same place on all three architectures, remove the typedef which hides its real type, and change it to a smoother name 'struct func_desc'. Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin Reviewed-by: Kees Cook Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/529b2ba1d001e8f628ef0d30e8044c9b3d0a4921.1644928018.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/code-patching.h | 2 +- arch/powerpc/include/asm/elf.h | 6 ++++++ arch/powerpc/include/asm/types.h | 6 ------ arch/powerpc/kernel/signal_64.c | 8 ++++---- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index e26080539c31..409483b2d0ce 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -118,7 +118,7 @@ static inline unsigned long ppc_function_entry(void *func) * function's descriptor. The first entry in the descriptor is the * address of the function text. */ - return ((func_descr_t *)func)->entry; + return ((struct func_desc *)func)->addr; #else return (unsigned long)func; #endif diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index b8425e3cfd81..971589a21bc0 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -176,4 +176,10 @@ do { \ /* Relocate the kernel image to @final_address */ void relocate(unsigned long final_address); +struct func_desc { + unsigned long addr; + unsigned long toc; + unsigned long env; +}; + #endif /* _ASM_POWERPC_ELF_H */ diff --git a/arch/powerpc/include/asm/types.h b/arch/powerpc/include/asm/types.h index f1630c553efe..97da77bc48c9 100644 --- a/arch/powerpc/include/asm/types.h +++ b/arch/powerpc/include/asm/types.h @@ -23,12 +23,6 @@ typedef __vector128 vector128; -typedef struct { - unsigned long entry; - unsigned long toc; - unsigned long env; -} func_descr_t; - #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_TYPES_H */ diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index d1e1fc0acbea..73d483b07ff3 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -936,11 +936,11 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, * descriptor is the entry address of signal and the second * entry is the TOC value we need to use. */ - func_descr_t __user *funct_desc_ptr = - (func_descr_t __user *) ksig->ka.sa.sa_handler; + struct func_desc __user *ptr = + (struct func_desc __user *)ksig->ka.sa.sa_handler; - err |= get_user(regs->ctr, &funct_desc_ptr->entry); - err |= get_user(regs->gpr[2], &funct_desc_ptr->toc); + err |= get_user(regs->ctr, &ptr->addr); + err |= get_user(regs->gpr[2], &ptr->toc); } /* enter the signal handler in native-endian mode */ From d3e32b997a4ca2e7be71cb770bcb2c000ee20b36 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 15 Feb 2022 13:40:58 +0100 Subject: [PATCH 117/380] powerpc: Use 'struct func_desc' instead of 'struct ppc64_opd_entry' 'struct ppc64_opd_entry' is somehow redundant with 'struct func_desc', the later is more correct/complete as it includes the third field which is unused. So use 'struct func_desc' instead of 'struct ppc64_opd_entry' Signed-off-by: Christophe Leroy Reviewed-by: Kees Cook Reviewed-by: Daniel Axtens Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/34e76bac6cbe95a63ecd37df69fb7feb93b0ea7c.1644928018.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/sections.h | 4 ++-- arch/powerpc/kernel/module_64.c | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index 38f79e42bf3c..baca39f4c6d3 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -61,10 +61,10 @@ static inline int overlaps_kernel_text(unsigned long start, unsigned long end) #undef dereference_function_descriptor static inline void *dereference_function_descriptor(void *ptr) { - struct ppc64_opd_entry *desc = ptr; + struct func_desc *desc = ptr; void *p; - if (!get_kernel_nofault(p, (void *)&desc->funcaddr)) + if (!get_kernel_nofault(p, (void *)&desc->addr)) ptr = p; return ptr; } diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 5d77d3f5fbb5..d2082f236bc1 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -64,19 +64,19 @@ static unsigned int local_entry_offset(const Elf64_Sym *sym) #else /* An address is address of the OPD entry, which contains address of fn. */ -typedef struct ppc64_opd_entry func_desc_t; +typedef struct func_desc func_desc_t; static func_desc_t func_desc(unsigned long addr) { - return *(struct ppc64_opd_entry *)addr; + return *(struct func_desc *)addr; } static unsigned long func_addr(unsigned long addr) { - return func_desc(addr).funcaddr; + return func_desc(addr).addr; } static unsigned long stub_func_addr(func_desc_t func) { - return func.funcaddr; + return func.addr; } static unsigned int local_entry_offset(const Elf64_Sym *sym) { @@ -187,7 +187,7 @@ static int relacmp(const void *_x, const void *_y) static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, const Elf64_Shdr *sechdrs) { - /* One extra reloc so it's always 0-funcaddr terminated */ + /* One extra reloc so it's always 0-addr terminated */ unsigned long relocs = 1; unsigned i; From 0a9c5ae279c963149df9a84588281d3d607f7a1f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 15 Feb 2022 13:40:59 +0100 Subject: [PATCH 118/380] powerpc: Remove 'struct ppc64_opd_entry' 'struct ppc64_opd_entry' doesn't belong to uapi/asm/elf.h It was initially in module_64.c and commit 2d291e902791 ("Fix compile failure with non modular builds") moved it into asm/elf.h But it was by mistake added outside of __KERNEL__ section, therefore commit c3617f72036c ("UAPI: (Scripted) Disintegrate arch/powerpc/include/asm") moved it to uapi/asm/elf.h Now that it is not used anymore by the kernel, remove it. Signed-off-by: Christophe Leroy Reviewed-by: Kees Cook Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c309ccee65ec2e3802df7a7fe761d0a298584809.1644928018.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/uapi/asm/elf.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/powerpc/include/uapi/asm/elf.h b/arch/powerpc/include/uapi/asm/elf.h index 860c59291bfc..308857123a08 100644 --- a/arch/powerpc/include/uapi/asm/elf.h +++ b/arch/powerpc/include/uapi/asm/elf.h @@ -289,12 +289,4 @@ typedef elf_fpreg_t elf_vsrreghalf_t32[ELF_NVSRHALFREG]; /* Keep this the last entry. */ #define R_PPC64_NUM 253 -/* There's actually a third entry here, but it's unused */ -struct ppc64_opd_entry -{ - unsigned long funcaddr; - unsigned long r2; -}; - - #endif /* _UAPI_ASM_POWERPC_ELF_H */ From 2fd986377d546bedaf27e36554dc9090d272f15d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 15 Feb 2022 13:41:00 +0100 Subject: [PATCH 119/380] powerpc: Prepare func_desc_t for refactorisation In preparation of making func_desc_t generic, change the ELFv2 version to a struct containing 'addr' element. This allows using single helpers common to ELFv1 and ELFv2 and reduces the amount of #ifdef's Signed-off-by: Christophe Leroy Reviewed-by: Kees Cook Acked-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5c36105e08b27b98450535bff48d71b690c19739.1644928018.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/module_64.c | 36 ++++++++++++++++----------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index d2082f236bc1..f81bab3eb8e9 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -33,19 +33,17 @@ #ifdef PPC64_ELF_ABI_v2 /* An address is simply the address of the function. */ -typedef unsigned long func_desc_t; +typedef struct { + unsigned long addr; +} func_desc_t; static func_desc_t func_desc(unsigned long addr) { - return addr; -} -static unsigned long func_addr(unsigned long addr) -{ - return addr; -} -static unsigned long stub_func_addr(func_desc_t func) -{ - return func; + func_desc_t desc = { + .addr = addr, + }; + + return desc; } /* PowerPC64 specific values for the Elf64_Sym st_other field. */ @@ -70,14 +68,6 @@ static func_desc_t func_desc(unsigned long addr) { return *(struct func_desc *)addr; } -static unsigned long func_addr(unsigned long addr) -{ - return func_desc(addr).addr; -} -static unsigned long stub_func_addr(func_desc_t func) -{ - return func.addr; -} static unsigned int local_entry_offset(const Elf64_Sym *sym) { return 0; @@ -93,6 +83,16 @@ void *dereference_module_function_descriptor(struct module *mod, void *ptr) } #endif +static unsigned long func_addr(unsigned long addr) +{ + return func_desc(addr).addr; +} + +static unsigned long stub_func_addr(func_desc_t func) +{ + return func.addr; +} + #define STUB_MAGIC 0x73747562 /* stub */ /* Like PPC32, we need little trampolines to do > 24-bit jumps (into From 41a88b45479da873bfc5d29ba1a545a780c5329a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 15 Feb 2022 13:41:01 +0100 Subject: [PATCH 120/380] ia64: Rename 'ip' to 'addr' in 'struct fdesc' There are three architectures with function descriptors, try to have common names for the address they contain in order to refactor some functions into generic functions later. powerpc has 'entry' ia64 has 'ip' parisc has 'addr' Vote for 'addr' and update 'struct fdesc' accordingly. Signed-off-by: Christophe Leroy Reviewed-by: Kees Cook Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/65b73ac614e4c002c5819d40b42f6f426d2ee52b.1644928018.git.christophe.leroy@csgroup.eu --- arch/ia64/include/asm/elf.h | 2 +- arch/ia64/include/asm/sections.h | 2 +- arch/ia64/kernel/module.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/ia64/include/asm/elf.h b/arch/ia64/include/asm/elf.h index 6629301a2620..2ef5f9966ad1 100644 --- a/arch/ia64/include/asm/elf.h +++ b/arch/ia64/include/asm/elf.h @@ -226,7 +226,7 @@ struct got_entry { * Layout of the Function Descriptor */ struct fdesc { - uint64_t ip; + uint64_t addr; uint64_t gp; }; diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h index 3a033d2008b3..35f24e52149a 100644 --- a/arch/ia64/include/asm/sections.h +++ b/arch/ia64/include/asm/sections.h @@ -35,7 +35,7 @@ static inline void *dereference_function_descriptor(void *ptr) struct fdesc *desc = ptr; void *p; - if (!get_kernel_nofault(p, (void *)&desc->ip)) + if (!get_kernel_nofault(p, (void *)&desc->addr)) ptr = p; return ptr; } diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c index 360f36b0eb3f..8f62cf97f691 100644 --- a/arch/ia64/kernel/module.c +++ b/arch/ia64/kernel/module.c @@ -602,15 +602,15 @@ get_fdesc (struct module *mod, uint64_t value, int *okp) return value; /* Look for existing function descriptor. */ - while (fdesc->ip) { - if (fdesc->ip == value) + while (fdesc->addr) { + if (fdesc->addr == value) return (uint64_t)fdesc; if ((uint64_t) ++fdesc >= mod->arch.opd->sh_addr + mod->arch.opd->sh_size) BUG(); } /* Create new one */ - fdesc->ip = value; + fdesc->addr = value; fdesc->gp = mod->arch.gp; return (uint64_t) fdesc; } From a257cacc38718c83cee003487e03197f237f5c3f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 15 Feb 2022 13:41:02 +0100 Subject: [PATCH 121/380] asm-generic: Define CONFIG_HAVE_FUNCTION_DESCRIPTORS Replace HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR by a config option named CONFIG_HAVE_FUNCTION_DESCRIPTORS and use it instead of 'dereference_function_descriptor' macro to know whether an arch has function descriptors. To limit churn in one of the following patches, use an #ifdef/#else construct with empty first part instead of an #ifndef in asm-generic/sections.h On powerpc, make sure the config option matches the ABI used by the compiler with a BUILD_BUG_ON() and add missing _CALL_ELF=2 when calling 'sparse' so that sparse sees the same piece of code as GCC. And include a helper to check whether an arch has function descriptors or not : have_function_descriptors() Signed-off-by: Christophe Leroy Reviewed-by: Kees Cook Reviewed-by: Nicholas Piggin Acked-by: Helge Deller Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4a0f11fb0ea74a3197bc44dd7ba25e53a24fd03d.1644928018.git.christophe.leroy@csgroup.eu --- arch/Kconfig | 3 +++ arch/ia64/Kconfig | 1 + arch/ia64/include/asm/sections.h | 2 -- arch/parisc/Kconfig | 1 + arch/parisc/include/asm/sections.h | 2 -- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/sections.h | 2 -- arch/powerpc/kernel/ptrace/ptrace.c | 6 ++++++ include/asm-generic/sections.h | 8 +++++++- include/linux/kallsyms.h | 2 +- 10 files changed, 20 insertions(+), 8 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 678a80713b21..fe24174cb63c 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -205,6 +205,9 @@ config HAVE_FUNCTION_ERROR_INJECTION config HAVE_NMI bool +config HAVE_FUNCTION_DESCRIPTORS + bool + config TRACE_IRQFLAGS_SUPPORT bool diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index a7e01573abd8..da85c3b23b16 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -35,6 +35,7 @@ config IA64 select HAVE_SETUP_PER_CPU_AREA select TTY select HAVE_ARCH_TRACEHOOK + select HAVE_FUNCTION_DESCRIPTORS select HAVE_VIRT_CPU_ACCOUNTING select HUGETLB_PAGE_SIZE_VARIABLE if HUGETLB_PAGE select VIRT_TO_BUS diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h index 35f24e52149a..2460d365a057 100644 --- a/arch/ia64/include/asm/sections.h +++ b/arch/ia64/include/asm/sections.h @@ -27,8 +27,6 @@ extern char __start_gate_brl_fsys_bubble_down_patchlist[], __end_gate_brl_fsys_b extern char __start_unwind[], __end_unwind[]; extern char __start_ivt_text[], __end_ivt_text[]; -#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 - #undef dereference_function_descriptor static inline void *dereference_function_descriptor(void *ptr) { diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 43c1c880def6..82e7ab1a9764 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -69,6 +69,7 @@ config PARISC select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_SOFTIRQ_ON_OWN_STACK if IRQSTACKS select TRACE_IRQFLAGS_SUPPORT + select HAVE_FUNCTION_DESCRIPTORS if 64BIT help The PA-RISC microprocessor is designed by Hewlett-Packard and used diff --git a/arch/parisc/include/asm/sections.h b/arch/parisc/include/asm/sections.h index bb52aea0cb21..c8092e4d94de 100644 --- a/arch/parisc/include/asm/sections.h +++ b/arch/parisc/include/asm/sections.h @@ -9,8 +9,6 @@ extern char __alt_instructions[], __alt_instructions_end[]; #ifdef CONFIG_64BIT -#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 - #undef dereference_function_descriptor void *dereference_function_descriptor(void *); diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index b779603978e1..a0c9cd0bbc85 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -202,6 +202,7 @@ config PPC select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FUNCTION_DESCRIPTORS if PPC64 && !CPU_LITTLE_ENDIAN select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index baca39f4c6d3..7728a7a146c3 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -56,8 +56,6 @@ static inline int overlaps_kernel_text(unsigned long start, unsigned long end) #ifdef PPC64_ELF_ABI_v1 -#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 - #undef dereference_function_descriptor static inline void *dereference_function_descriptor(void *ptr) { diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index c43f77e2ac31..1212a812a7ab 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -445,4 +445,10 @@ void __init pt_regs_check(void) * real registers. */ BUILD_BUG_ON(PT_DSCR < sizeof(struct user_pt_regs) / sizeof(unsigned long)); + +#ifdef PPC64_ELF_ABI_v1 + BUILD_BUG_ON(!IS_ENABLED(CONFIG_HAVE_FUNCTION_DESCRIPTORS)); +#else + BUILD_BUG_ON(IS_ENABLED(CONFIG_HAVE_FUNCTION_DESCRIPTORS)); +#endif } diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 690f741764e1..3ef83e1aebee 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -59,11 +59,17 @@ extern char __noinstr_text_start[], __noinstr_text_end[]; extern __visible const void __nosave_begin, __nosave_end; /* Function descriptor handling (if any). Override in asm/sections.h */ -#ifndef dereference_function_descriptor +#ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS +#else #define dereference_function_descriptor(p) ((void *)(p)) #define dereference_kernel_function_descriptor(p) ((void *)(p)) #endif +static inline bool have_function_descriptors(void) +{ + return IS_ENABLED(CONFIG_HAVE_FUNCTION_DESCRIPTORS); +} + /** * memory_contains - checks if an object is contained within a memory region * @begin: virtual address of the beginning of the memory region diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 4176c7eca7b5..ce1bd2fbf23e 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -48,7 +48,7 @@ static inline int is_ksym_addr(unsigned long addr) static inline void *dereference_symbol_descriptor(void *ptr) { -#ifdef HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR +#ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS struct module *mod; ptr = dereference_kernel_function_descriptor(ptr); From 0dc690e4ef5b901e9d4b53520854fbd5c749e09d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 15 Feb 2022 13:41:03 +0100 Subject: [PATCH 122/380] asm-generic: Define 'func_desc_t' to commonly describe function descriptors We have three architectures using function descriptors, each with its own type and name. Add a common typedef that can be used in generic code. Also add a stub typedef for architecture without function descriptors, to avoid a forest of #ifdefs. It replaces the similar 'func_desc_t' previously defined in arch/powerpc/kernel/module_64.c Signed-off-by: Christophe Leroy Reviewed-by: Kees Cook Acked-by: Arnd Bergmann Acked-by: Helge Deller Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f1f91b142b3c1082bdc1586ce71c9bac1e75213c.1644928018.git.christophe.leroy@csgroup.eu --- arch/ia64/include/asm/sections.h | 3 +++ arch/parisc/include/asm/sections.h | 5 +++++ arch/powerpc/include/asm/sections.h | 4 ++++ arch/powerpc/kernel/module_64.c | 8 -------- include/asm-generic/sections.h | 5 +++++ 5 files changed, 17 insertions(+), 8 deletions(-) diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h index 2460d365a057..3abe0562b01a 100644 --- a/arch/ia64/include/asm/sections.h +++ b/arch/ia64/include/asm/sections.h @@ -9,6 +9,9 @@ #include #include + +typedef struct fdesc func_desc_t; + #include extern char __phys_per_cpu_start[]; diff --git a/arch/parisc/include/asm/sections.h b/arch/parisc/include/asm/sections.h index c8092e4d94de..ace1d4047a0b 100644 --- a/arch/parisc/include/asm/sections.h +++ b/arch/parisc/include/asm/sections.h @@ -2,6 +2,11 @@ #ifndef _PARISC_SECTIONS_H #define _PARISC_SECTIONS_H +#ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS +#include +typedef Elf64_Fdesc func_desc_t; +#endif + /* nothing to see, move along */ #include diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index 7728a7a146c3..fddfb3937868 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -6,6 +6,10 @@ #include #include +#ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS +typedef struct func_desc func_desc_t; +#endif + #include extern char __head_end[]; diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index f81bab3eb8e9..0337b46424bc 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -32,11 +32,6 @@ #ifdef PPC64_ELF_ABI_v2 -/* An address is simply the address of the function. */ -typedef struct { - unsigned long addr; -} func_desc_t; - static func_desc_t func_desc(unsigned long addr) { func_desc_t desc = { @@ -61,9 +56,6 @@ static unsigned int local_entry_offset(const Elf64_Sym *sym) } #else -/* An address is address of the OPD entry, which contains address of fn. */ -typedef struct func_desc func_desc_t; - static func_desc_t func_desc(unsigned long addr) { return *(struct func_desc *)addr; diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 3ef83e1aebee..bbf97502470c 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -63,6 +63,11 @@ extern __visible const void __nosave_begin, __nosave_end; #else #define dereference_function_descriptor(p) ((void *)(p)) #define dereference_kernel_function_descriptor(p) ((void *)(p)) + +/* An address is simply the address of the function. */ +typedef struct { + unsigned long addr; +} func_desc_t; #endif static inline bool have_function_descriptors(void) From e1478d8eaf27704db17a44dee4c53696ed01fc9c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 15 Feb 2022 13:41:04 +0100 Subject: [PATCH 123/380] asm-generic: Refactor dereference_[kernel]_function_descriptor() dereference_function_descriptor() and dereference_kernel_function_descriptor() are identical on the three architectures implementing them. Make them common and put them out-of-line in kernel/extable.c which is one of the users and has similar type of functions. Signed-off-by: Christophe Leroy Reviewed-by: Kees Cook Reviewed-by: Arnd Bergmann Acked-by: Helge Deller Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/449db09b2eba57f4ab05f80102a67d8675bc8bcd.1644928018.git.christophe.leroy@csgroup.eu --- arch/ia64/include/asm/sections.h | 19 ------------------- arch/parisc/include/asm/sections.h | 9 --------- arch/parisc/kernel/process.c | 21 --------------------- arch/powerpc/include/asm/sections.h | 23 ----------------------- include/asm-generic/sections.h | 2 ++ kernel/extable.c | 23 ++++++++++++++++++++++- 6 files changed, 24 insertions(+), 73 deletions(-) diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h index 3abe0562b01a..8e0875cf6071 100644 --- a/arch/ia64/include/asm/sections.h +++ b/arch/ia64/include/asm/sections.h @@ -30,23 +30,4 @@ extern char __start_gate_brl_fsys_bubble_down_patchlist[], __end_gate_brl_fsys_b extern char __start_unwind[], __end_unwind[]; extern char __start_ivt_text[], __end_ivt_text[]; -#undef dereference_function_descriptor -static inline void *dereference_function_descriptor(void *ptr) -{ - struct fdesc *desc = ptr; - void *p; - - if (!get_kernel_nofault(p, (void *)&desc->addr)) - ptr = p; - return ptr; -} - -#undef dereference_kernel_function_descriptor -static inline void *dereference_kernel_function_descriptor(void *ptr) -{ - if (ptr < (void *)__start_opd || ptr >= (void *)__end_opd) - return ptr; - return dereference_function_descriptor(ptr); -} - #endif /* _ASM_IA64_SECTIONS_H */ diff --git a/arch/parisc/include/asm/sections.h b/arch/parisc/include/asm/sections.h index ace1d4047a0b..33df42b5cc6d 100644 --- a/arch/parisc/include/asm/sections.h +++ b/arch/parisc/include/asm/sections.h @@ -12,13 +12,4 @@ typedef Elf64_Fdesc func_desc_t; extern char __alt_instructions[], __alt_instructions_end[]; -#ifdef CONFIG_64BIT - -#undef dereference_function_descriptor -void *dereference_function_descriptor(void *); - -#undef dereference_kernel_function_descriptor -void *dereference_kernel_function_descriptor(void *); -#endif - #endif diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index ea3d83b6fb62..2030c77592d3 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -263,27 +263,6 @@ __get_wchan(struct task_struct *p) return 0; } -#ifdef CONFIG_64BIT -void *dereference_function_descriptor(void *ptr) -{ - Elf64_Fdesc *desc = ptr; - void *p; - - if (!get_kernel_nofault(p, (void *)&desc->addr)) - ptr = p; - return ptr; -} - -void *dereference_kernel_function_descriptor(void *ptr) -{ - if (ptr < (void *)__start_opd || - ptr >= (void *)__end_opd) - return ptr; - - return dereference_function_descriptor(ptr); -} -#endif - static inline unsigned long brk_rnd(void) { return (get_random_int() & BRK_RND_MASK) << PAGE_SHIFT; diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index fddfb3937868..8be2c491c733 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -58,29 +58,6 @@ static inline int overlaps_kernel_text(unsigned long start, unsigned long end) (unsigned long)_stext < end; } -#ifdef PPC64_ELF_ABI_v1 - -#undef dereference_function_descriptor -static inline void *dereference_function_descriptor(void *ptr) -{ - struct func_desc *desc = ptr; - void *p; - - if (!get_kernel_nofault(p, (void *)&desc->addr)) - ptr = p; - return ptr; -} - -#undef dereference_kernel_function_descriptor -static inline void *dereference_kernel_function_descriptor(void *ptr) -{ - if (ptr < (void *)__start_opd || ptr >= (void *)__end_opd) - return ptr; - - return dereference_function_descriptor(ptr); -} -#endif /* PPC64_ELF_ABI_v1 */ - #endif #endif /* __KERNEL__ */ diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index bbf97502470c..d0f7bdd2fdf2 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -60,6 +60,8 @@ extern __visible const void __nosave_begin, __nosave_end; /* Function descriptor handling (if any). Override in asm/sections.h */ #ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS +void *dereference_function_descriptor(void *ptr); +void *dereference_kernel_function_descriptor(void *ptr); #else #define dereference_function_descriptor(p) ((void *)(p)) #define dereference_kernel_function_descriptor(p) ((void *)(p)) diff --git a/kernel/extable.c b/kernel/extable.c index b6f330f0fe74..394c39b86e38 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -3,6 +3,7 @@ Copyright (C) 2001 Rusty Russell, 2002 Rusty Russell IBM. */ +#include #include #include #include @@ -132,12 +133,32 @@ out: } /* - * On some architectures (PPC64, IA64) function pointers + * On some architectures (PPC64, IA64, PARISC) function pointers * are actually only tokens to some data that then holds the * real function address. As a result, to find if a function * pointer is part of the kernel text, we need to do some * special dereferencing first. */ +#ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS +void *dereference_function_descriptor(void *ptr) +{ + func_desc_t *desc = ptr; + void *p; + + if (!get_kernel_nofault(p, (void *)&desc->addr)) + ptr = p; + return ptr; +} + +void *dereference_kernel_function_descriptor(void *ptr) +{ + if (ptr < (void *)__start_opd || ptr >= (void *)__end_opd) + return ptr; + + return dereference_function_descriptor(ptr); +} +#endif + int func_ptr_is_kernel_text(void *ptr) { unsigned long addr; From 69b420ed8fd3917ac7073256b4929aa246b6fe31 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 15 Feb 2022 13:41:05 +0100 Subject: [PATCH 124/380] lkdtm: Force do_nothing() out of line LKDTM tests display that the run do_nothing() at a given address, but in reality do_nothing() is inlined into the caller. Force it out of line so that it really runs text at the displayed address. Signed-off-by: Christophe Leroy Acked-by: Kees Cook Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a5dcf4d2088e6aca47ab3b4c6d5c0f7fa064e25a.1644928018.git.christophe.leroy@csgroup.eu --- drivers/misc/lkdtm/perms.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c index 2dede2ef658f..60b3b2fe929d 100644 --- a/drivers/misc/lkdtm/perms.c +++ b/drivers/misc/lkdtm/perms.c @@ -21,7 +21,7 @@ /* This is non-const, so it will end up in the .data section. */ static u8 data_area[EXEC_SIZE]; -/* This is cost, so it will end up in the .rodata section. */ +/* This is const, so it will end up in the .rodata section. */ static const unsigned long rodata = 0xAA55AA55; /* This is marked __ro_after_init, so it should ultimately be .rodata. */ @@ -31,7 +31,7 @@ static unsigned long ro_after_init __ro_after_init = 0x55AA5500; * This just returns to the caller. It is designed to be copied into * non-executable memory regions. */ -static void do_nothing(void) +static noinline void do_nothing(void) { return; } From b64913394f123e819bffabc79a0e48f98e78dc5d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 15 Feb 2022 13:41:06 +0100 Subject: [PATCH 125/380] lkdtm: Really write into kernel text in WRITE_KERN WRITE_KERN is supposed to overwrite some kernel text, namely do_overwritten() function. But at the time being it overwrites do_overwritten() function descriptor, not function text. Fix it by dereferencing the function descriptor to obtain function text pointer. Export dereference_function_descriptor() for when LKDTM is built as a module. And make do_overwritten() noinline so that it is really do_overwritten() which is called by lkdtm_WRITE_KERN(). Signed-off-by: Christophe Leroy Acked-by: Kees Cook Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/31e58eaffb5bc51c07d8d4891d1982100ade8cfc.1644928018.git.christophe.leroy@csgroup.eu --- drivers/misc/lkdtm/perms.c | 8 +++++--- kernel/extable.c | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c index 60b3b2fe929d..035fcca441f0 100644 --- a/drivers/misc/lkdtm/perms.c +++ b/drivers/misc/lkdtm/perms.c @@ -10,6 +10,7 @@ #include #include #include +#include /* Whether or not to fill the target memory area with do_nothing(). */ #define CODE_WRITE true @@ -37,7 +38,7 @@ static noinline void do_nothing(void) } /* Must immediately follow do_nothing for size calculuations to work out. */ -static void do_overwritten(void) +static noinline void do_overwritten(void) { pr_info("do_overwritten wasn't overwritten!\n"); return; @@ -113,8 +114,9 @@ void lkdtm_WRITE_KERN(void) size_t size; volatile unsigned char *ptr; - size = (unsigned long)do_overwritten - (unsigned long)do_nothing; - ptr = (unsigned char *)do_overwritten; + size = (unsigned long)dereference_function_descriptor(do_overwritten) - + (unsigned long)dereference_function_descriptor(do_nothing); + ptr = dereference_function_descriptor(do_overwritten); pr_info("attempting bad %zu byte write at %px\n", size, ptr); memcpy((void *)ptr, (unsigned char *)do_nothing, size); diff --git a/kernel/extable.c b/kernel/extable.c index 394c39b86e38..bda5e9761541 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -149,6 +149,7 @@ void *dereference_function_descriptor(void *ptr) ptr = p; return ptr; } +EXPORT_SYMBOL_GPL(dereference_function_descriptor); void *dereference_kernel_function_descriptor(void *ptr) { From 72a86433049dcfe918886645ac3d19c1eaaa67ab Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 15 Feb 2022 13:41:07 +0100 Subject: [PATCH 126/380] lkdtm: Fix execute_[user]_location() execute_location() and execute_user_location() intent to copy do_nothing() text and execute it at a new location. However, at the time being it doesn't copy do_nothing() function but do_nothing() function descriptor which still points to the original text. So at the end it still executes do_nothing() at its original location allthough using a copied function descriptor. So, fix that by really copying do_nothing() text and build a new function descriptor by copying do_nothing() function descriptor and updating the target address with the new location. Also fix the displayed addresses by dereferencing do_nothing() function descriptor. Signed-off-by: Christophe Leroy Acked-by: Kees Cook Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4055839683d8d643cd99be121f4767c7c611b970.1644928018.git.christophe.leroy@csgroup.eu --- drivers/misc/lkdtm/perms.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c index 035fcca441f0..1cf24c4a79e9 100644 --- a/drivers/misc/lkdtm/perms.c +++ b/drivers/misc/lkdtm/perms.c @@ -44,19 +44,34 @@ static noinline void do_overwritten(void) return; } +static void *setup_function_descriptor(func_desc_t *fdesc, void *dst) +{ + if (!have_function_descriptors()) + return dst; + + memcpy(fdesc, do_nothing, sizeof(*fdesc)); + fdesc->addr = (unsigned long)dst; + barrier(); + + return fdesc; +} + static noinline void execute_location(void *dst, bool write) { - void (*func)(void) = dst; + void (*func)(void); + func_desc_t fdesc; + void *do_nothing_text = dereference_function_descriptor(do_nothing); - pr_info("attempting ok execution at %px\n", do_nothing); + pr_info("attempting ok execution at %px\n", do_nothing_text); do_nothing(); if (write == CODE_WRITE) { - memcpy(dst, do_nothing, EXEC_SIZE); + memcpy(dst, do_nothing_text, EXEC_SIZE); flush_icache_range((unsigned long)dst, (unsigned long)dst + EXEC_SIZE); } - pr_info("attempting bad execution at %px\n", func); + pr_info("attempting bad execution at %px\n", dst); + func = setup_function_descriptor(&fdesc, dst); func(); pr_err("FAIL: func returned\n"); } @@ -66,16 +81,19 @@ static void execute_user_location(void *dst) int copied; /* Intentionally crossing kernel/user memory boundary. */ - void (*func)(void) = dst; + void (*func)(void); + func_desc_t fdesc; + void *do_nothing_text = dereference_function_descriptor(do_nothing); - pr_info("attempting ok execution at %px\n", do_nothing); + pr_info("attempting ok execution at %px\n", do_nothing_text); do_nothing(); - copied = access_process_vm(current, (unsigned long)dst, do_nothing, + copied = access_process_vm(current, (unsigned long)dst, do_nothing_text, EXEC_SIZE, FOLL_WRITE); if (copied < EXEC_SIZE) return; - pr_info("attempting bad execution at %px\n", func); + pr_info("attempting bad execution at %px\n", dst); + func = setup_function_descriptor(&fdesc, dst); func(); pr_err("FAIL: func returned\n"); } @@ -153,7 +171,8 @@ void lkdtm_EXEC_VMALLOC(void) void lkdtm_EXEC_RODATA(void) { - execute_location(lkdtm_rodata_do_nothing, CODE_AS_IS); + execute_location(dereference_function_descriptor(lkdtm_rodata_do_nothing), + CODE_AS_IS); } void lkdtm_EXEC_USERSPACE(void) From 5e5a6c5441654d1b9e576ce4ca8a1759e701079e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 15 Feb 2022 13:41:08 +0100 Subject: [PATCH 127/380] lkdtm: Add a test for function descriptors protection Add WRITE_OPD to check that you can't modify function descriptors. Gives the following result when function descriptors are not protected: lkdtm: Performing direct entry WRITE_OPD lkdtm: attempting bad 16 bytes write at c00000000269b358 lkdtm: FAIL: survived bad write lkdtm: do_nothing was hijacked! Looks like a standard compiler barrier() is not enough to force GCC to use the modified function descriptor. Had to add a fake empty inline assembly to force GCC to reload the function descriptor. Signed-off-by: Christophe Leroy Acked-by: Kees Cook Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7eeba50d16a35e9d799820e43304150225f20197.1644928018.git.christophe.leroy@csgroup.eu --- drivers/misc/lkdtm/core.c | 1 + drivers/misc/lkdtm/lkdtm.h | 1 + drivers/misc/lkdtm/perms.c | 22 ++++++++++++++++++++++ tools/testing/selftests/lkdtm/tests.txt | 1 + 4 files changed, 25 insertions(+) diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c index f69b964b9952..e2228b6fc09b 100644 --- a/drivers/misc/lkdtm/core.c +++ b/drivers/misc/lkdtm/core.c @@ -149,6 +149,7 @@ static const struct crashtype crashtypes[] = { CRASHTYPE(WRITE_RO), CRASHTYPE(WRITE_RO_AFTER_INIT), CRASHTYPE(WRITE_KERN), + CRASHTYPE(WRITE_OPD), CRASHTYPE(REFCOUNT_INC_OVERFLOW), CRASHTYPE(REFCOUNT_ADD_OVERFLOW), CRASHTYPE(REFCOUNT_INC_NOT_ZERO_OVERFLOW), diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h index d6137c70ebbe..305fc2ec3f25 100644 --- a/drivers/misc/lkdtm/lkdtm.h +++ b/drivers/misc/lkdtm/lkdtm.h @@ -106,6 +106,7 @@ void __init lkdtm_perms_init(void); void lkdtm_WRITE_RO(void); void lkdtm_WRITE_RO_AFTER_INIT(void); void lkdtm_WRITE_KERN(void); +void lkdtm_WRITE_OPD(void); void lkdtm_EXEC_DATA(void); void lkdtm_EXEC_STACK(void); void lkdtm_EXEC_KMALLOC(void); diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c index 1cf24c4a79e9..2c6aba3ff32b 100644 --- a/drivers/misc/lkdtm/perms.c +++ b/drivers/misc/lkdtm/perms.c @@ -44,6 +44,11 @@ static noinline void do_overwritten(void) return; } +static noinline void do_almost_nothing(void) +{ + pr_info("do_nothing was hijacked!\n"); +} + static void *setup_function_descriptor(func_desc_t *fdesc, void *dst) { if (!have_function_descriptors()) @@ -144,6 +149,23 @@ void lkdtm_WRITE_KERN(void) do_overwritten(); } +void lkdtm_WRITE_OPD(void) +{ + size_t size = sizeof(func_desc_t); + void (*func)(void) = do_nothing; + + if (!have_function_descriptors()) { + pr_info("XFAIL: Platform doesn't use function descriptors.\n"); + return; + } + pr_info("attempting bad %zu bytes write at %px\n", size, do_nothing); + memcpy(do_nothing, do_almost_nothing, size); + pr_err("FAIL: survived bad write\n"); + + asm("" : "=m"(func)); + func(); +} + void lkdtm_EXEC_DATA(void) { execute_location(data_area, CODE_WRITE); diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt index 6b36b7f5dcf9..243c781f0780 100644 --- a/tools/testing/selftests/lkdtm/tests.txt +++ b/tools/testing/selftests/lkdtm/tests.txt @@ -44,6 +44,7 @@ ACCESS_NULL WRITE_RO WRITE_RO_AFTER_INIT WRITE_KERN +WRITE_OPD REFCOUNT_INC_OVERFLOW REFCOUNT_ADD_OVERFLOW REFCOUNT_INC_NOT_ZERO_OVERFLOW From b56d1cafefaa4550888bace142be404db36b96a1 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Fri, 18 Feb 2022 11:04:39 +0100 Subject: [PATCH 128/380] MIPS: remove asm/war.h The major part for workaround handling has already moved to config options. This change replaces the remaining defines by already available config options and gets rid of war.h Signed-off-by: Thomas Bogendoerfer --- arch/mips/Kconfig | 38 ++++++++++++++++ arch/mips/include/asm/futex.h | 1 - arch/mips/include/asm/mipsmtregs.h | 1 - arch/mips/include/asm/mipsregs.h | 1 - arch/mips/include/asm/war.h | 73 ------------------------------ arch/mips/kernel/entry.S | 1 - arch/mips/kernel/genex.S | 1 - arch/mips/kernel/r4k-bugs64.c | 9 ++-- arch/mips/kernel/scall32-o32.S | 1 - arch/mips/kernel/scall64-n64.S | 1 - arch/mips/kernel/signal.c | 1 - arch/mips/kernel/signal_n32.c | 1 - arch/mips/lib/delay.c | 1 - arch/mips/mm/c-octeon.c | 1 - arch/mips/mm/c-r4k.c | 1 - arch/mips/mm/page.c | 5 +- arch/mips/mm/tlbex.c | 1 - 17 files changed, 47 insertions(+), 91 deletions(-) delete mode 100644 arch/mips/include/asm/war.h diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index c6672547ac5e..f8f11542d044 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2521,13 +2521,51 @@ config CPU_HAS_SYNC # # CPU non-features # + +# Work around the "daddi" and "daddiu" CPU errata: +# +# - The `daddi' instruction fails to trap on overflow. +# "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0", +# erratum #23 +# +# - The `daddiu' instruction can produce an incorrect result. +# "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0", +# erratum #41 +# "MIPS R4000MC Errata, Processor Revision 2.2 and 3.0", erratum +# #15 +# "MIPS R4400PC/SC Errata, Processor Revision 1.0", erratum #7 +# "MIPS R4400MC Errata, Processor Revision 1.0", erratum #5 config CPU_DADDI_WORKAROUNDS bool +# Work around certain R4000 CPU errata (as implemented by GCC): +# +# - A double-word or a variable shift may give an incorrect result +# if executed immediately after starting an integer division: +# "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0", +# erratum #28 +# "MIPS R4000MC Errata, Processor Revision 2.2 and 3.0", erratum +# #19 +# +# - A double-word or a variable shift may give an incorrect result +# if executed while an integer multiplication is in progress: +# "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0", +# errata #16 & #28 +# +# - An integer division may give an incorrect result if started in +# a delay slot of a taken branch or a jump: +# "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0", +# erratum #52 config CPU_R4000_WORKAROUNDS bool select CPU_R4400_WORKAROUNDS +# Work around certain R4400 CPU errata (as implemented by GCC): +# +# - A double-word or a variable shift may give an incorrect result +# if executed immediately after starting an integer division: +# "MIPS R4400MC Errata, Processor Revision 1.0", erratum #10 +# "MIPS R4400MC Errata, Processor Revision 2.0 & 3.0", erratum #4 config CPU_R4400_WORKAROUNDS bool diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h index 8612a7e42d78..05832eb240fa 100644 --- a/arch/mips/include/asm/futex.h +++ b/arch/mips/include/asm/futex.h @@ -17,7 +17,6 @@ #include #include #include -#include #define arch_futex_atomic_op_inuser arch_futex_atomic_op_inuser #define futex_atomic_cmpxchg_inatomic futex_atomic_cmpxchg_inatomic diff --git a/arch/mips/include/asm/mipsmtregs.h b/arch/mips/include/asm/mipsmtregs.h index be4cf9d477be..a8d67c2f4f7b 100644 --- a/arch/mips/include/asm/mipsmtregs.h +++ b/arch/mips/include/asm/mipsmtregs.h @@ -9,7 +9,6 @@ #define _ASM_MIPSMTREGS_H #include -#include #ifndef __ASSEMBLY__ diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index 2616353b940c..305651af15b3 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -17,7 +17,6 @@ #include #include #include -#include /* * The following macros are especially useful for __asm__ diff --git a/arch/mips/include/asm/war.h b/arch/mips/include/asm/war.h deleted file mode 100644 index 21443f096238..000000000000 --- a/arch/mips/include/asm/war.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2002, 2004, 2007 by Ralf Baechle - * Copyright (C) 2007 Maciej W. Rozycki - */ -#ifndef _ASM_WAR_H -#define _ASM_WAR_H - -/* - * Work around certain R4000 CPU errata (as implemented by GCC): - * - * - A double-word or a variable shift may give an incorrect result - * if executed immediately after starting an integer division: - * "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0", - * erratum #28 - * "MIPS R4000MC Errata, Processor Revision 2.2 and 3.0", erratum - * #19 - * - * - A double-word or a variable shift may give an incorrect result - * if executed while an integer multiplication is in progress: - * "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0", - * errata #16 & #28 - * - * - An integer division may give an incorrect result if started in - * a delay slot of a taken branch or a jump: - * "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0", - * erratum #52 - */ -#ifdef CONFIG_CPU_R4000_WORKAROUNDS -#define R4000_WAR 1 -#else -#define R4000_WAR 0 -#endif - -/* - * Work around certain R4400 CPU errata (as implemented by GCC): - * - * - A double-word or a variable shift may give an incorrect result - * if executed immediately after starting an integer division: - * "MIPS R4400MC Errata, Processor Revision 1.0", erratum #10 - * "MIPS R4400MC Errata, Processor Revision 2.0 & 3.0", erratum #4 - */ -#ifdef CONFIG_CPU_R4400_WORKAROUNDS -#define R4400_WAR 1 -#else -#define R4400_WAR 0 -#endif - -/* - * Work around the "daddi" and "daddiu" CPU errata: - * - * - The `daddi' instruction fails to trap on overflow. - * "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0", - * erratum #23 - * - * - The `daddiu' instruction can produce an incorrect result. - * "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0", - * erratum #41 - * "MIPS R4000MC Errata, Processor Revision 2.2 and 3.0", erratum - * #15 - * "MIPS R4400PC/SC Errata, Processor Revision 1.0", erratum #7 - * "MIPS R4400MC Errata, Processor Revision 1.0", erratum #5 - */ -#ifdef CONFIG_CPU_DADDI_WORKAROUNDS -#define DADDI_WAR 1 -#else -#define DADDI_WAR 0 -#endif - -#endif /* _ASM_WAR_H */ diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S index 4b896f5023ff..d8ca173680f9 100644 --- a/arch/mips/kernel/entry.S +++ b/arch/mips/kernel/entry.S @@ -17,7 +17,6 @@ #include #include #include -#include #ifndef CONFIG_PREEMPTION #define resume_kernel restore_all diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index 743d75927b71..fc53ea2cf850 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -19,7 +19,6 @@ #include #include #include -#include #include __INIT diff --git a/arch/mips/kernel/r4k-bugs64.c b/arch/mips/kernel/r4k-bugs64.c index 35729c9e6cfa..6ffefb2c6971 100644 --- a/arch/mips/kernel/r4k-bugs64.c +++ b/arch/mips/kernel/r4k-bugs64.c @@ -163,7 +163,8 @@ static __always_inline __init void check_mult_sh(void) } pr_cont("no.\n"); - panic(bug64hit, !R4000_WAR ? r4kwar : nowar); + panic(bug64hit, + IS_ENABLED(CONFIG_CPU_R4000_WORKAROUNDS) ? nowar : r4kwar); } static volatile int daddi_ov; @@ -239,7 +240,8 @@ static __init void check_daddi(void) } pr_cont("no.\n"); - panic(bug64hit, !DADDI_WAR ? daddiwar : nowar); + panic(bug64hit, + IS_ENABLED(CONFIG_CPU_DADDI_WORKAROUNDS) ? nowar : daddiwar); } int daddiu_bug = -1; @@ -307,7 +309,8 @@ static __init void check_daddiu(void) } pr_cont("no.\n"); - panic(bug64hit, !DADDI_WAR ? daddiwar : nowar); + panic(bug64hit, + IS_ENABLED(CONFIG_CPU_DADDI_WORKAROUNDS) ? nowar : daddiwar); } void __init check_bugs64_early(void) diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 9bfce5f75f60..18dc9b345056 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -19,7 +19,6 @@ #include #include #include -#include #include .align 5 diff --git a/arch/mips/kernel/scall64-n64.S b/arch/mips/kernel/scall64-n64.S index 5f6ed4b4c399..e6264aa62e45 100644 --- a/arch/mips/kernel/scall64-n64.S +++ b/arch/mips/kernel/scall64-n64.S @@ -18,7 +18,6 @@ #include #include #include -#include #ifndef CONFIG_MIPS32_COMPAT /* Neither O32 nor N32, so define handle_sys here */ diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c index 5bce782e694c..71e309be86a2 100644 --- a/arch/mips/kernel/signal.c +++ b/arch/mips/kernel/signal.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/mips/kernel/signal_n32.c b/arch/mips/kernel/signal_n32.c index 7bd00fad61af..cfc77b69420a 100644 --- a/arch/mips/kernel/signal_n32.c +++ b/arch/mips/kernel/signal_n32.c @@ -24,7 +24,6 @@ #include #include #include -#include #include "signal-common.h" diff --git a/arch/mips/lib/delay.c b/arch/mips/lib/delay.c index 2e8dfc1d59c8..ccdb1fc1e4bf 100644 --- a/arch/mips/lib/delay.c +++ b/arch/mips/lib/delay.c @@ -16,7 +16,6 @@ #include #include -#include #ifndef CONFIG_CPU_DADDI_WORKAROUNDS #define GCC_DADDI_IMM_ASM() "I" diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c index 737870d8fd94..c7ed589de882 100644 --- a/arch/mips/mm/c-octeon.c +++ b/arch/mips/mm/c-octeon.c @@ -23,7 +23,6 @@ #include #include #include -#include #include diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 50261fd8eb21..ccb9e47322b0 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -33,7 +33,6 @@ #include #include #include -#include #include /* for run_uncached() */ #include #include diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c index 504bc4047c4c..d3b4459d0fe8 100644 --- a/arch/mips/mm/page.c +++ b/arch/mips/mm/page.c @@ -25,7 +25,6 @@ #include #include #include -#include #ifdef CONFIG_SIBYTE_DMA_PAGEOPS #include @@ -103,7 +102,9 @@ static int cache_line_size; static inline void pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, unsigned int off) { - if (cpu_has_64bit_gp_regs && DADDI_WAR && r4k_daddiu_bug()) { + if (cpu_has_64bit_gp_regs && + IS_ENABLED(CONFIG_CPU_DADDI_WORKAROUNDS) && + r4k_daddiu_bug()) { if (off > 0x7fff) { uasm_i_lui(buf, T9, uasm_rel_hi(off)); uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off)); diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index b131e6a77383..d9df2c43b15c 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include From 10242464e506bc8038a7d91943a7b36a6afbcee3 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Thu, 17 Feb 2022 15:21:17 +0100 Subject: [PATCH 129/380] MIPS: sibyte: Add missing __user annotations in sb_tbprof.c Add missing __user annotations to fix sparse errors. Signed-off-by: Thomas Bogendoerfer --- arch/mips/sibyte/common/sb_tbprof.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/mips/sibyte/common/sb_tbprof.c b/arch/mips/sibyte/common/sb_tbprof.c index f80d7a710333..bc47681e825a 100644 --- a/arch/mips/sibyte/common/sb_tbprof.c +++ b/arch/mips/sibyte/common/sb_tbprof.c @@ -437,13 +437,13 @@ static int sbprof_tb_release(struct inode *inode, struct file *filp) return 0; } -static ssize_t sbprof_tb_read(struct file *filp, char *buf, +static ssize_t sbprof_tb_read(struct file *filp, char __user *buf, size_t size, loff_t *offp) { int cur_sample, sample_off, cur_count, sample_left; char *src; int count = 0; - char *dest = buf; + char __user *dest = buf; long cur_off = *offp; if (!access_ok(buf, size)) @@ -512,7 +512,7 @@ static long sbprof_tb_ioctl(struct file *filp, if (err) break; - err = put_user(TB_FULL, (int *) arg); + err = put_user(TB_FULL, (int __user *) arg); break; } From b7c8c2c6b1b1329893675b76722144fee7242d29 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Mon, 7 Feb 2022 22:18:14 +0100 Subject: [PATCH 130/380] MIPS: OCTEON: Constify static irq_domain_ops The only usage of these structs is to pass their address to either irq_domain_add_tree() or irq_domain_create_linear(), both which takes pointers to const struct irq_domain_ops. Make them const to allow the compiler to put them in read-only memory. Signed-off-by: Rikard Falkeborn Signed-off-by: Thomas Bogendoerfer --- arch/mips/cavium-octeon/octeon-irq.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index 844f882096e6..07d7ff5a981d 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -1274,13 +1274,13 @@ static int octeon_irq_gpio_map(struct irq_domain *d, return r; } -static struct irq_domain_ops octeon_irq_domain_ciu_ops = { +static const struct irq_domain_ops octeon_irq_domain_ciu_ops = { .map = octeon_irq_ciu_map, .unmap = octeon_irq_free_cd, .xlate = octeon_irq_ciu_xlat, }; -static struct irq_domain_ops octeon_irq_domain_gpio_ops = { +static const struct irq_domain_ops octeon_irq_domain_gpio_ops = { .map = octeon_irq_gpio_map, .unmap = octeon_irq_free_cd, .xlate = octeon_irq_gpio_xlat, @@ -1974,7 +1974,7 @@ static int octeon_irq_ciu2_map(struct irq_domain *d, return 0; } -static struct irq_domain_ops octeon_irq_domain_ciu2_ops = { +static const struct irq_domain_ops octeon_irq_domain_ciu2_ops = { .map = octeon_irq_ciu2_map, .unmap = octeon_irq_free_cd, .xlate = octeon_irq_ciu2_xlat, @@ -2226,7 +2226,7 @@ static int octeon_irq_cib_map(struct irq_domain *d, return 0; } -static struct irq_domain_ops octeon_irq_domain_cib_ops = { +static const struct irq_domain_ops octeon_irq_domain_cib_ops = { .map = octeon_irq_cib_map, .unmap = octeon_irq_free_cd, .xlate = octeon_irq_cib_xlat, @@ -2578,7 +2578,7 @@ static int octeon_irq_ciu3_map(struct irq_domain *d, return octeon_irq_ciu3_mapx(d, virq, hw, &octeon_irq_chip_ciu3); } -static struct irq_domain_ops octeon_dflt_domain_ciu3_ops = { +static const struct irq_domain_ops octeon_dflt_domain_ciu3_ops = { .map = octeon_irq_ciu3_map, .unmap = octeon_irq_free_cd, .xlate = octeon_irq_ciu3_xlat, From 0ccd7890461f391e57b4abce808e31435c779b90 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Mon, 7 Feb 2022 22:18:15 +0100 Subject: [PATCH 131/380] MIPS: ath25: Constify static irq_domain_ops The only usage of these structs is to pass their address to either irq_domain_add_tree() or irq_domain_create_linear(), both which takes pointers to const struct irq_domain_ops. Make them const to allow the compiler to put them in read-only memory. Signed-off-by: Rikard Falkeborn Signed-off-by: Thomas Bogendoerfer --- arch/mips/ath25/ar2315.c | 2 +- arch/mips/ath25/ar5312.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/ath25/ar2315.c b/arch/mips/ath25/ar2315.c index 76e43a73ba1b..8ccf167c167e 100644 --- a/arch/mips/ath25/ar2315.c +++ b/arch/mips/ath25/ar2315.c @@ -112,7 +112,7 @@ static int ar2315_misc_irq_map(struct irq_domain *d, unsigned irq, return 0; } -static struct irq_domain_ops ar2315_misc_irq_domain_ops = { +static const struct irq_domain_ops ar2315_misc_irq_domain_ops = { .map = ar2315_misc_irq_map, }; diff --git a/arch/mips/ath25/ar5312.c b/arch/mips/ath25/ar5312.c index 822b639dbd1e..cfa103518113 100644 --- a/arch/mips/ath25/ar5312.c +++ b/arch/mips/ath25/ar5312.c @@ -116,7 +116,7 @@ static int ar5312_misc_irq_map(struct irq_domain *d, unsigned irq, return 0; } -static struct irq_domain_ops ar5312_misc_irq_domain_ops = { +static const struct irq_domain_ops ar5312_misc_irq_domain_ops = { .map = ar5312_misc_irq_map, }; From ce510accafdbf13e7d5863191934178a56a941f9 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Mon, 7 Feb 2022 22:18:16 +0100 Subject: [PATCH 132/380] MIPS: pci-ar2315: Constify static irq_domain_ops The only usage of ar2315_pci_irq_domain_ops is to pass its address to irq_domain_add_linear() which takes a pointer to const struct irq_domain_ops. Make it const to allow the compiler to put it in read-only memory. Signed-off-by: Rikard Falkeborn Signed-off-by: Thomas Bogendoerfer --- arch/mips/pci/pci-ar2315.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/pci/pci-ar2315.c b/arch/mips/pci/pci-ar2315.c index 9a4bfb4e63e3..30e0922f4cea 100644 --- a/arch/mips/pci/pci-ar2315.c +++ b/arch/mips/pci/pci-ar2315.c @@ -384,7 +384,7 @@ static int ar2315_pci_irq_map(struct irq_domain *d, unsigned irq, return 0; } -static struct irq_domain_ops ar2315_pci_irq_domain_ops = { +static const struct irq_domain_ops ar2315_pci_irq_domain_ops = { .map = ar2315_pci_irq_map, }; From d1ca45f93c3f95d1590d60b012cf8fcf6db633ee Mon Sep 17 00:00:00 2001 From: Nemanja Rakovic Date: Mon, 21 Feb 2022 13:12:25 +0100 Subject: [PATCH 133/380] mips: Enable KCSAN Replaces KASAN_SANITIZE with KCSAN_SANITIZE in boot/compressed/Makefile. Fixes: e0a8b93efa23 mips: Enable KCSAN Signed-off-by: Nemanja Rakovic Signed-off-by: Thomas Bogendoerfer --- arch/mips/boot/compressed/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index a35f78212ea9..6cc28173bee8 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile @@ -38,7 +38,7 @@ KBUILD_AFLAGS := $(KBUILD_AFLAGS) -D__ASSEMBLY__ \ KCOV_INSTRUMENT := n GCOV_PROFILE := n UBSAN_SANITIZE := n -KASAN_SANITIZE := n +KCSAN_SANITIZE := n # decompressor objects (linked with vmlinuz) vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o $(obj)/bswapsi.o From d56201d9440de302c8cb017761946e585f6cb74f Mon Sep 17 00:00:00 2001 From: Mayuresh Chitale Date: Fri, 4 Feb 2022 20:53:47 +0530 Subject: [PATCH 134/380] riscv: defconfig: enable hugetlbfs option Enabling hugetlbfs in the defconfigs to allow tuning KVM guest performance using huge pages. Signed-off-by: Mayuresh Chitale Reviewed-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/defconfig | 1 + arch/riscv/configs/rv32_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index f120fcc43d0a..7cd10ded7bf8 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -108,6 +108,7 @@ CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_NFS_V4_1=y diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig index 8b56a7f1eb06..e0e5c7c09ab8 100644 --- a/arch/riscv/configs/rv32_defconfig +++ b/arch/riscv/configs/rv32_defconfig @@ -100,6 +100,7 @@ CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_NFS_V4_1=y From 406a8c1d8fa59ae6a6462a6fb6ff892f6a4f7499 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 22 Feb 2022 15:05:30 +0100 Subject: [PATCH 135/380] powerpc: Remove remaining stab codes Following commit 12318163737c ("powerpc/32: Remove remaining .stabs annotations"), stabs code are not used anymore. Remove them. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d8b33342d7454f6ca4f368f5206896558dfa06f4.1645538722.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc_asm.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 3c06a33b5da4..4dea2d963738 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -692,12 +692,6 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96) #define evr30 30 #define evr31 31 -/* some stab codes */ -#define N_FUN 36 -#define N_RSYM 64 -#define N_SLINE 68 -#define N_SO 100 - #define RFSCV .long 0x4c0000a4 /* From fbb1d4b381b058ed60b39f1598532f559b441762 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 1 Sep 2021 12:42:08 -0700 Subject: [PATCH 136/380] MIPS: Modernize READ_IMPLIES_EXEC I'm doing some thread necromancy of https://lore.kernel.org/lkml/202007081624.82FA0CC1EA@keescook/ x86, arm64, and arm32 adjusted their READ_IMPLIES_EXEC logic to better align with the safer defaults and the interactions with other mappings, which I illustrated with this comment on x86: /* * An executable for which elf_read_implies_exec() returns TRUE will * have the READ_IMPLIES_EXEC personality flag set automatically. * * The decision process for determining the results are: * * CPU: | lacks NX* | has NX, ia32 | has NX, x86_64 | * ELF: | | | | * ---------------------|------------|------------------|----------------| * missing PT_GNU_STACK | exec-all | exec-all | exec-none | * PT_GNU_STACK == RWX | exec-stack | exec-stack | exec-stack | * PT_GNU_STACK == RW | exec-none | exec-none | exec-none | * * exec-all : all PROT_READ user mappings are executable, except when * backed by files on a noexec-filesystem. * exec-none : only PROT_EXEC user mappings are executable. * exec-stack: only the stack and PROT_EXEC user mappings are * executable. * * *this column has no architectural effect: NX markings are ignored by * hardware, but may have behavioral effects when "wants X" collides with * "cannot be X" constraints in memory permission flags, as in * https://lkml.kernel.org/r/20190418055759.GA3155@mellanox.com * */ For MIPS, the "lacks NX" above is the "!cpu_has_rixi" check. On x86, we decided that the READ_IMPLIES_EXEC flag needed to reflect the expectations, not the architectural behavior due to bad interactions as noted above, as always returning "1" on non-NX hardware breaks some mappings. The other part of the issue is "what does the MIPS toolchain do for PT_GNU_STACK?" The answer seems to be "by default, include PT_GNU_STACK, but mark it executable" (likely due to concerns over non-NX hardware): $ mipsel-linux-gnu-gcc -o hello_world hello_world.c $ llvm-readelf -lW hellow_world | grep GNU_STACK GNU_STACK 0x000000 0x00000000 0x00000000 0x00000 0x00000 RWE 0x10 Given that older hardware doesn't support non-executable memory, it seems safe to make the "PT_GNU_STACK is absent" logic mean "assume non-executable", but this might break very old software running on modern MIPS. This situation matches the ia32-on-x86_64 logic x86 uses (which assumes needing READ_IMPLIES_EXEC in that situation). But modern toolchains on modern MIPS hardware should follow a safer default (assume NX stack). A follow-up to this change would be to switch the MIPS toolchain to emit a non-executable PT_GNU_STACK, as this seems to be unneeded. Cc: Thomas Bogendoerfer Cc: Tiezhu Yang Cc: Xuefeng Li Cc: Juxin Gao Cc: linux-mips@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/elf.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c index 7b045d2a0b51..5582a4ca1e9e 100644 --- a/arch/mips/kernel/elf.c +++ b/arch/mips/kernel/elf.c @@ -328,16 +328,10 @@ void mips_set_personality_nan(struct arch_elf_state *state) int mips_elf_read_implies_exec(void *elf_ex, int exstack) { - if (exstack != EXSTACK_DISABLE_X) { - /* The binary doesn't request a non-executable stack */ - return 1; - } - - if (!cpu_has_rixi) { - /* The CPU doesn't support non-executable memory */ - return 1; - } - - return 0; + /* + * Set READ_IMPLIES_EXEC only on non-NX systems that + * do not request a specific state via PT_GNU_STACK. + */ + return (!cpu_has_rixi && exstack == EXSTACK_DEFAULT); } EXPORT_SYMBOL(mips_elf_read_implies_exec); From 8b91cee5eadd2021f55e6775f2d50bd56d00c217 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 4 Feb 2022 13:53:48 +1000 Subject: [PATCH 137/380] powerpc/64s/hash: Make hash faults work in NMI context Hash faults are not resoved in NMI context, instead causing the access to fail. This is done because perf interrupts can get backtraces including walking the user stack, and taking a hash fault on those could deadlock on the HPTE lock if the perf interrupt hits while the same HPTE lock is being held by the hash fault code. The user-access for the stack walking will notice the access failed and deal with that in the perf code. The reason to allow perf interrupts in is to better profile hash faults. The problem with this is any hash fault on a kernel access that happens in NMI context will crash, because kernel accesses must not fail. Hard lockups, system reset, machine checks that access vmalloc space including modules and including stack backtracing and symbol lookup in modules, per-cpu data, etc could all run into this problem. Fix this by disallowing perf interrupts in the hash fault code (the direct hash fault is covered by MSR[EE]=0 so the PMI disable just needs to extend to the preload case). This simplifies the tricky logic in hash faults and perf, at the cost of reduced profiling of hash faults. perf can still latch addresses when interrupts are disabled, it just won't get the stack trace at that point, so it would still find hot spots, just sometimes with confusing stack chains. An alternative could be to allow perf interrupts here but always do the slowpath stack walk if we are in nmi context, but that slows down all perf interrupt stack walking on hash though and it does not remove as much tricky code. Reported-by: Laurent Dufour Signed-off-by: Nicholas Piggin Tested-by: Laurent Dufour Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220204035348.545435-1-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 2 +- arch/powerpc/mm/book3s64/hash_utils.c | 54 ++++----------------------- arch/powerpc/perf/callchain.h | 9 +---- arch/powerpc/perf/callchain_64.c | 27 -------------- 4 files changed, 10 insertions(+), 82 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index fc28f46d2f9d..5404f7abbcf8 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -612,7 +612,7 @@ DECLARE_INTERRUPT_HANDLER_RAW(do_slb_fault); DECLARE_INTERRUPT_HANDLER(do_bad_segment_interrupt); /* hash_utils.c */ -DECLARE_INTERRUPT_HANDLER_RAW(do_hash_fault); +DECLARE_INTERRUPT_HANDLER(do_hash_fault); /* fault.c */ DECLARE_INTERRUPT_HANDLER(do_page_fault); diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 7abf82a698d3..985cabdd7f67 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1621,8 +1621,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap, } EXPORT_SYMBOL_GPL(hash_page); -DECLARE_INTERRUPT_HANDLER(__do_hash_fault); -DEFINE_INTERRUPT_HANDLER(__do_hash_fault) +DEFINE_INTERRUPT_HANDLER(do_hash_fault) { unsigned long ea = regs->dar; unsigned long dsisr = regs->dsisr; @@ -1681,35 +1680,6 @@ DEFINE_INTERRUPT_HANDLER(__do_hash_fault) } } -/* - * The _RAW interrupt entry checks for the in_nmi() case before - * running the full handler. - */ -DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault) -{ - /* - * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then - * don't call hash_page, just fail the fault. This is required to - * prevent re-entrancy problems in the hash code, namely perf - * interrupts hitting while something holds H_PAGE_BUSY, and taking a - * hash fault. See the comment in hash_preload(). - * - * We come here as a result of a DSI at a point where we don't want - * to call hash_page, such as when we are accessing memory (possibly - * user memory) inside a PMU interrupt that occurred while interrupts - * were soft-disabled. We want to invoke the exception handler for - * the access, or panic if there isn't a handler. - */ - if (unlikely(in_nmi())) { - do_bad_page_fault_segv(regs); - return 0; - } - - __do_hash_fault(regs); - - return 0; -} - #ifdef CONFIG_PPC_MM_SLICES static bool should_hash_preload(struct mm_struct *mm, unsigned long ea) { @@ -1776,26 +1746,18 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea, #endif /* CONFIG_PPC_64K_PAGES */ /* - * __hash_page_* must run with interrupts off, as it sets the - * H_PAGE_BUSY bit. It's possible for perf interrupts to hit at any - * time and may take a hash fault reading the user stack, see - * read_user_stack_slow() in the powerpc/perf code. + * __hash_page_* must run with interrupts off, including PMI interrupts + * off, as it sets the H_PAGE_BUSY bit. * - * If that takes a hash fault on the same page as we lock here, it - * will bail out when seeing H_PAGE_BUSY set, and retry the access - * leading to an infinite loop. - * - * Disabling interrupts here does not prevent perf interrupts, but it - * will prevent them taking hash faults (see the NMI test in - * do_hash_page), then read_user_stack's copy_from_user_nofault will - * fail and perf will fall back to read_user_stack_slow(), which - * walks the Linux page tables. + * It's otherwise possible for perf interrupts to hit at any time and + * may take a hash fault reading the user stack, which could take a + * hash miss and deadlock on the same H_PAGE_BUSY bit. * * Interrupts must also be off for the duration of the * mm_is_thread_local test and update, to prevent preempt running the * mm on another CPU (XXX: this may be racy vs kthread_use_mm). */ - local_irq_save(flags); + powerpc_local_irq_pmu_save(flags); /* Is that local to this CPU ? */ if (mm_is_thread_local(mm)) @@ -1820,7 +1782,7 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea, mm_ctx_user_psize(&mm->context), pte_val(*ptep)); - local_irq_restore(flags); + powerpc_local_irq_pmu_restore(flags); } /* diff --git a/arch/powerpc/perf/callchain.h b/arch/powerpc/perf/callchain.h index d6fa6e25234f..19a8d051ddf1 100644 --- a/arch/powerpc/perf/callchain.h +++ b/arch/powerpc/perf/callchain.h @@ -2,7 +2,6 @@ #ifndef _POWERPC_PERF_CALLCHAIN_H #define _POWERPC_PERF_CALLCHAIN_H -int read_user_stack_slow(const void __user *ptr, void *buf, int nb); void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry, @@ -26,17 +25,11 @@ static inline int __read_user_stack(const void __user *ptr, void *ret, size_t size) { unsigned long addr = (unsigned long)ptr; - int rc; if (addr > TASK_SIZE - size || (addr & (size - 1))) return -EFAULT; - rc = copy_from_user_nofault(ret, ptr, size); - - if (IS_ENABLED(CONFIG_PPC64) && !radix_enabled() && rc) - return read_user_stack_slow(ptr, ret, size); - - return rc; + return copy_from_user_nofault(ret, ptr, size); } #endif /* _POWERPC_PERF_CALLCHAIN_H */ diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c index 8d0df4226328..488e8a21a11e 100644 --- a/arch/powerpc/perf/callchain_64.c +++ b/arch/powerpc/perf/callchain_64.c @@ -18,33 +18,6 @@ #include "callchain.h" -/* - * On 64-bit we don't want to invoke hash_page on user addresses from - * interrupt context, so if the access faults, we read the page tables - * to find which page (if any) is mapped and access it directly. Radix - * has no need for this so it doesn't use read_user_stack_slow. - */ -int read_user_stack_slow(const void __user *ptr, void *buf, int nb) -{ - - unsigned long addr = (unsigned long) ptr; - unsigned long offset; - struct page *page; - void *kaddr; - - if (get_user_page_fast_only(addr, FOLL_WRITE, &page)) { - kaddr = page_address(page); - - /* align address to page boundary */ - offset = addr & ~PAGE_MASK; - - memcpy(buf, kaddr + offset, nb); - put_page(page); - return 0; - } - return -EFAULT; -} - static int read_user_stack_64(const unsigned long __user *ptr, unsigned long *ret) { return __read_user_stack(ptr, ret, sizeof(*ret)); From 8a0edc72bec25fa62450bfef1a150483558e1289 Mon Sep 17 00:00:00 2001 From: Guo Zhengkui Date: Wed, 23 Feb 2022 15:54:23 +0800 Subject: [PATCH 138/380] powerpc/module_64: fix array_size.cocci warning Fix following coccicheck warning: ./arch/powerpc/kernel/module_64.c:432:40-41: WARNING: Use ARRAY_SIZE. ARRAY_SIZE(arr) is a macro provided by the kernel. It makes sure that arr is an array, so it's safer than sizeof(arr) / sizeof(arr[0]) and more standard. Signed-off-by: Guo Zhengkui Reviewed-by: Russell Currey Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220223075426.20939-1-guozhengkui@vivo.com --- arch/powerpc/kernel/module_64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 6a45e6ddbe58..94d14cf99bca 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -429,7 +430,7 @@ static inline int create_stub(const Elf64_Shdr *sechdrs, if (is_mprofile_ftrace_call(name)) return create_ftrace_stub(entry, addr, me); - for (i = 0; i < sizeof(ppc64_stub_insns) / sizeof(u32); i++) { + for (i = 0; i < ARRAY_SIZE(ppc64_stub_insns); i++) { if (patch_instruction(&entry->jump[i], ppc_inst(ppc64_stub_insns[i]))) return 0; From f961e20f15ed35e9ca154a099897d600b78b0311 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 27 Jan 2022 12:49:53 +0530 Subject: [PATCH 139/380] selftests/powerpc/pmu: Include mmap_buffer field as part of struct event To enable the capturing of samples as part of perf event, add a new field "mmap_buffer" to "struct event". This field is a place-holder for sample collection Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220127072012.662451-2-kjain@linux.ibm.com --- tools/testing/selftests/powerpc/pmu/event.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/powerpc/pmu/event.h b/tools/testing/selftests/powerpc/pmu/event.h index 302eaab51706..23d20340a160 100644 --- a/tools/testing/selftests/powerpc/pmu/event.h +++ b/tools/testing/selftests/powerpc/pmu/event.h @@ -22,6 +22,11 @@ struct event { u64 running; u64 enabled; } result; + /* + * mmap buffer used while recording sample. + * Accessed as "struct perf_event_mmap_page" + */ + void *mmap_buffer; }; void event_init(struct event *e, u64 config); From 989192ac6ad54acccd5dd1c058055dacd6b94b30 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 16 Feb 2022 10:52:41 +0800 Subject: [PATCH 140/380] iommu/vt-d: Remove guest pasid related callbacks The guest pasid related callbacks are not called in the tree. Remove them to avoid dead code. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220216025249.3459465-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 208 ----------------------------------- drivers/iommu/intel/pasid.c | 161 --------------------------- drivers/iommu/intel/pasid.h | 4 - drivers/iommu/intel/svm.c | 209 ------------------------------------ include/linux/intel-iommu.h | 10 -- include/linux/intel-svm.h | 12 --- 6 files changed, 604 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 92fea3fbbb11..4f9d95067c8f 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4825,13 +4825,6 @@ static int prepare_domain_attach_device(struct iommu_domain *domain, if (!iommu) return -ENODEV; - if ((dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE) && - !ecap_nest(iommu->ecap)) { - dev_err(dev, "%s: iommu not support nested translation\n", - iommu->name); - return -EINVAL; - } - /* check if this iommu agaw is sufficient for max mapped address */ addr_width = agaw_to_width(iommu->agaw); if (addr_width > cap_mgaw(iommu->cap)) @@ -4919,185 +4912,6 @@ static void intel_iommu_aux_detach_device(struct iommu_domain *domain, aux_domain_remove_dev(to_dmar_domain(domain), dev); } -#ifdef CONFIG_INTEL_IOMMU_SVM -/* - * 2D array for converting and sanitizing IOMMU generic TLB granularity to - * VT-d granularity. Invalidation is typically included in the unmap operation - * as a result of DMA or VFIO unmap. However, for assigned devices guest - * owns the first level page tables. Invalidations of translation caches in the - * guest are trapped and passed down to the host. - * - * vIOMMU in the guest will only expose first level page tables, therefore - * we do not support IOTLB granularity for request without PASID (second level). - * - * For example, to find the VT-d granularity encoding for IOTLB - * type and page selective granularity within PASID: - * X: indexed by iommu cache type - * Y: indexed by enum iommu_inv_granularity - * [IOMMU_CACHE_INV_TYPE_IOTLB][IOMMU_INV_GRANU_ADDR] - */ - -static const int -inv_type_granu_table[IOMMU_CACHE_INV_TYPE_NR][IOMMU_INV_GRANU_NR] = { - /* - * PASID based IOTLB invalidation: PASID selective (per PASID), - * page selective (address granularity) - */ - {-EINVAL, QI_GRAN_NONG_PASID, QI_GRAN_PSI_PASID}, - /* PASID based dev TLBs */ - {-EINVAL, -EINVAL, QI_DEV_IOTLB_GRAN_PASID_SEL}, - /* PASID cache */ - {-EINVAL, -EINVAL, -EINVAL} -}; - -static inline int to_vtd_granularity(int type, int granu) -{ - return inv_type_granu_table[type][granu]; -} - -static inline u64 to_vtd_size(u64 granu_size, u64 nr_granules) -{ - u64 nr_pages = (granu_size * nr_granules) >> VTD_PAGE_SHIFT; - - /* VT-d size is encoded as 2^size of 4K pages, 0 for 4k, 9 for 2MB, etc. - * IOMMU cache invalidate API passes granu_size in bytes, and number of - * granu size in contiguous memory. - */ - return order_base_2(nr_pages); -} - -static int -intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev, - struct iommu_cache_invalidate_info *inv_info) -{ - struct dmar_domain *dmar_domain = to_dmar_domain(domain); - struct device_domain_info *info; - struct intel_iommu *iommu; - unsigned long flags; - int cache_type; - u8 bus, devfn; - u16 did, sid; - int ret = 0; - u64 size = 0; - - if (!inv_info || !dmar_domain) - return -EINVAL; - - if (!dev || !dev_is_pci(dev)) - return -ENODEV; - - iommu = device_to_iommu(dev, &bus, &devfn); - if (!iommu) - return -ENODEV; - - if (!(dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE)) - return -EINVAL; - - spin_lock_irqsave(&device_domain_lock, flags); - spin_lock(&iommu->lock); - info = get_domain_info(dev); - if (!info) { - ret = -EINVAL; - goto out_unlock; - } - did = dmar_domain->iommu_did[iommu->seq_id]; - sid = PCI_DEVID(bus, devfn); - - /* Size is only valid in address selective invalidation */ - if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) - size = to_vtd_size(inv_info->granu.addr_info.granule_size, - inv_info->granu.addr_info.nb_granules); - - for_each_set_bit(cache_type, - (unsigned long *)&inv_info->cache, - IOMMU_CACHE_INV_TYPE_NR) { - int granu = 0; - u64 pasid = 0; - u64 addr = 0; - - granu = to_vtd_granularity(cache_type, inv_info->granularity); - if (granu == -EINVAL) { - pr_err_ratelimited("Invalid cache type and granu combination %d/%d\n", - cache_type, inv_info->granularity); - break; - } - - /* - * PASID is stored in different locations based on the - * granularity. - */ - if (inv_info->granularity == IOMMU_INV_GRANU_PASID && - (inv_info->granu.pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID)) - pasid = inv_info->granu.pasid_info.pasid; - else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR && - (inv_info->granu.addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID)) - pasid = inv_info->granu.addr_info.pasid; - - switch (BIT(cache_type)) { - case IOMMU_CACHE_INV_TYPE_IOTLB: - /* HW will ignore LSB bits based on address mask */ - if (inv_info->granularity == IOMMU_INV_GRANU_ADDR && - size && - (inv_info->granu.addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) { - pr_err_ratelimited("User address not aligned, 0x%llx, size order %llu\n", - inv_info->granu.addr_info.addr, size); - } - - /* - * If granu is PASID-selective, address is ignored. - * We use npages = -1 to indicate that. - */ - qi_flush_piotlb(iommu, did, pasid, - mm_to_dma_pfn(inv_info->granu.addr_info.addr), - (granu == QI_GRAN_NONG_PASID) ? -1 : 1 << size, - inv_info->granu.addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF); - - if (!info->ats_enabled) - break; - /* - * Always flush device IOTLB if ATS is enabled. vIOMMU - * in the guest may assume IOTLB flush is inclusive, - * which is more efficient. - */ - fallthrough; - case IOMMU_CACHE_INV_TYPE_DEV_IOTLB: - /* - * PASID based device TLB invalidation does not support - * IOMMU_INV_GRANU_PASID granularity but only supports - * IOMMU_INV_GRANU_ADDR. - * The equivalent of that is we set the size to be the - * entire range of 64 bit. User only provides PASID info - * without address info. So we set addr to 0. - */ - if (inv_info->granularity == IOMMU_INV_GRANU_PASID) { - size = 64 - VTD_PAGE_SHIFT; - addr = 0; - } else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) { - addr = inv_info->granu.addr_info.addr; - } - - if (info->ats_enabled) - qi_flush_dev_iotlb_pasid(iommu, sid, - info->pfsid, pasid, - info->ats_qdep, addr, - size); - else - pr_warn_ratelimited("Passdown device IOTLB flush w/o ATS!\n"); - break; - default: - dev_err_ratelimited(dev, "Unsupported IOMMU invalidation type %d\n", - cache_type); - ret = -EINVAL; - } - } -out_unlock: - spin_unlock(&iommu->lock); - spin_unlock_irqrestore(&device_domain_lock, flags); - - return ret; -} -#endif - static int intel_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t hpa, size_t size, int iommu_prot, gfp_t gfp) @@ -5545,24 +5359,6 @@ static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain, return attach_deferred(dev); } -static int -intel_iommu_enable_nesting(struct iommu_domain *domain) -{ - struct dmar_domain *dmar_domain = to_dmar_domain(domain); - unsigned long flags; - int ret = -ENODEV; - - spin_lock_irqsave(&device_domain_lock, flags); - if (list_empty(&dmar_domain->devices)) { - dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE; - dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL; - ret = 0; - } - spin_unlock_irqrestore(&device_domain_lock, flags); - - return ret; -} - /* * Check that the device does not live on an external facing PCI port that is * marked as untrusted. Such devices should not be able to apply quirks and @@ -5599,7 +5395,6 @@ const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .domain_alloc = intel_iommu_domain_alloc, .domain_free = intel_iommu_domain_free, - .enable_nesting = intel_iommu_enable_nesting, .attach_dev = intel_iommu_attach_device, .detach_dev = intel_iommu_detach_device, .aux_attach_dev = intel_iommu_aux_attach_device, @@ -5624,9 +5419,6 @@ const struct iommu_ops intel_iommu_ops = { .def_domain_type = device_def_domain_type, .pgsize_bitmap = SZ_4K, #ifdef CONFIG_INTEL_IOMMU_SVM - .cache_invalidate = intel_iommu_sva_invalidate, - .sva_bind_gpasid = intel_svm_bind_gpasid, - .sva_unbind_gpasid = intel_svm_unbind_gpasid, .sva_bind = intel_svm_bind, .sva_unbind = intel_svm_unbind, .sva_get_pasid = intel_svm_get_pasid, diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 07c390aed1fe..10fb82ea467d 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -762,164 +762,3 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu, return 0; } - -static int -intel_pasid_setup_bind_data(struct intel_iommu *iommu, struct pasid_entry *pte, - struct iommu_gpasid_bind_data_vtd *pasid_data) -{ - /* - * Not all guest PASID table entry fields are passed down during bind, - * here we only set up the ones that are dependent on guest settings. - * Execution related bits such as NXE, SMEP are not supported. - * Other fields, such as snoop related, are set based on host needs - * regardless of guest settings. - */ - if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_SRE) { - if (!ecap_srs(iommu->ecap)) { - pr_err_ratelimited("No supervisor request support on %s\n", - iommu->name); - return -EINVAL; - } - pasid_set_sre(pte); - /* Enable write protect WP if guest requested */ - if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_WPE) - pasid_set_wpe(pte); - } - - if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_EAFE) { - if (!ecap_eafs(iommu->ecap)) { - pr_err_ratelimited("No extended access flag support on %s\n", - iommu->name); - return -EINVAL; - } - pasid_set_eafe(pte); - } - - /* - * Memory type is only applicable to devices inside processor coherent - * domain. Will add MTS support once coherent devices are available. - */ - if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_MTS_MASK) { - pr_warn_ratelimited("No memory type support %s\n", - iommu->name); - return -EINVAL; - } - - return 0; -} - -/** - * intel_pasid_setup_nested() - Set up PASID entry for nested translation. - * This could be used for guest shared virtual address. In this case, the - * first level page tables are used for GVA-GPA translation in the guest, - * second level page tables are used for GPA-HPA translation. - * - * @iommu: IOMMU which the device belong to - * @dev: Device to be set up for translation - * @gpgd: FLPTPTR: First Level Page translation pointer in GPA - * @pasid: PASID to be programmed in the device PASID table - * @pasid_data: Additional PASID info from the guest bind request - * @domain: Domain info for setting up second level page tables - * @addr_width: Address width of the first level (guest) - */ -int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, - pgd_t *gpgd, u32 pasid, - struct iommu_gpasid_bind_data_vtd *pasid_data, - struct dmar_domain *domain, int addr_width) -{ - struct pasid_entry *pte; - struct dma_pte *pgd; - int ret = 0; - u64 pgd_val; - int agaw; - u16 did; - - if (!ecap_nest(iommu->ecap)) { - pr_err_ratelimited("IOMMU: %s: No nested translation support\n", - iommu->name); - return -EINVAL; - } - - if (!(domain->flags & DOMAIN_FLAG_NESTING_MODE)) { - pr_err_ratelimited("Domain is not in nesting mode, %x\n", - domain->flags); - return -EINVAL; - } - - pte = intel_pasid_get_entry(dev, pasid); - if (WARN_ON(!pte)) - return -EINVAL; - - /* - * Caller must ensure PASID entry is not in use, i.e. not bind the - * same PASID to the same device twice. - */ - if (pasid_pte_is_present(pte)) - return -EBUSY; - - pasid_clear_entry(pte); - - /* Sanity checking performed by caller to make sure address - * width matching in two dimensions: - * 1. CPU vs. IOMMU - * 2. Guest vs. Host. - */ - switch (addr_width) { -#ifdef CONFIG_X86 - case ADDR_WIDTH_5LEVEL: - if (!cpu_feature_enabled(X86_FEATURE_LA57) || - !cap_5lp_support(iommu->cap)) { - dev_err_ratelimited(dev, - "5-level paging not supported\n"); - return -EINVAL; - } - - pasid_set_flpm(pte, 1); - break; -#endif - case ADDR_WIDTH_4LEVEL: - pasid_set_flpm(pte, 0); - break; - default: - dev_err_ratelimited(dev, "Invalid guest address width %d\n", - addr_width); - return -EINVAL; - } - - /* First level PGD is in GPA, must be supported by the second level */ - if ((uintptr_t)gpgd > domain->max_addr) { - dev_err_ratelimited(dev, - "Guest PGD %lx not supported, max %llx\n", - (uintptr_t)gpgd, domain->max_addr); - return -EINVAL; - } - pasid_set_flptr(pte, (uintptr_t)gpgd); - - ret = intel_pasid_setup_bind_data(iommu, pte, pasid_data); - if (ret) - return ret; - - /* Setup the second level based on the given domain */ - pgd = domain->pgd; - - agaw = iommu_skip_agaw(domain, iommu, &pgd); - if (agaw < 0) { - dev_err_ratelimited(dev, "Invalid domain page table\n"); - return -EINVAL; - } - pgd_val = virt_to_phys(pgd); - pasid_set_slptr(pte, pgd_val); - pasid_set_fault_enable(pte); - - did = domain->iommu_did[iommu->seq_id]; - pasid_set_domain_id(pte, did); - - pasid_set_address_width(pte, agaw); - pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); - - pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED); - pasid_set_present(pte); - pasid_flush_caches(iommu, pte, pasid, did); - - return ret; -} diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index d5552e2c160d..ab4408c824a5 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -118,10 +118,6 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, int intel_pasid_setup_pass_through(struct intel_iommu *iommu, struct dmar_domain *domain, struct device *dev, u32 pasid); -int intel_pasid_setup_nested(struct intel_iommu *iommu, - struct device *dev, pgd_t *pgd, u32 pasid, - struct iommu_gpasid_bind_data_vtd *pasid_data, - struct dmar_domain *domain, int addr_width); void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, u32 pasid, bool fault_ignore); diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 5b5d69b04fcc..d04c83dd3a58 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -318,193 +318,6 @@ out: return 0; } -int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, - struct iommu_gpasid_bind_data *data) -{ - struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); - struct intel_svm_dev *sdev = NULL; - struct dmar_domain *dmar_domain; - struct device_domain_info *info; - struct intel_svm *svm = NULL; - unsigned long iflags; - int ret = 0; - - if (WARN_ON(!iommu) || !data) - return -EINVAL; - - if (data->format != IOMMU_PASID_FORMAT_INTEL_VTD) - return -EINVAL; - - /* IOMMU core ensures argsz is more than the start of the union */ - if (data->argsz < offsetofend(struct iommu_gpasid_bind_data, vendor.vtd)) - return -EINVAL; - - /* Make sure no undefined flags are used in vendor data */ - if (data->vendor.vtd.flags & ~(IOMMU_SVA_VTD_GPASID_LAST - 1)) - return -EINVAL; - - if (!dev_is_pci(dev)) - return -ENOTSUPP; - - /* VT-d supports devices with full 20 bit PASIDs only */ - if (pci_max_pasids(to_pci_dev(dev)) != PASID_MAX) - return -EINVAL; - - /* - * We only check host PASID range, we have no knowledge to check - * guest PASID range. - */ - if (data->hpasid <= 0 || data->hpasid >= PASID_MAX) - return -EINVAL; - - info = get_domain_info(dev); - if (!info) - return -EINVAL; - - dmar_domain = to_dmar_domain(domain); - - mutex_lock(&pasid_mutex); - ret = pasid_to_svm_sdev(dev, data->hpasid, &svm, &sdev); - if (ret) - goto out; - - if (sdev) { - /* - * Do not allow multiple bindings of the same device-PASID since - * there is only one SL page tables per PASID. We may revisit - * once sharing PGD across domains are supported. - */ - dev_warn_ratelimited(dev, "Already bound with PASID %u\n", - svm->pasid); - ret = -EBUSY; - goto out; - } - - if (!svm) { - /* We come here when PASID has never been bond to a device. */ - svm = kzalloc(sizeof(*svm), GFP_KERNEL); - if (!svm) { - ret = -ENOMEM; - goto out; - } - /* REVISIT: upper layer/VFIO can track host process that bind - * the PASID. ioasid_set = mm might be sufficient for vfio to - * check pasid VMM ownership. We can drop the following line - * once VFIO and IOASID set check is in place. - */ - svm->mm = get_task_mm(current); - svm->pasid = data->hpasid; - if (data->flags & IOMMU_SVA_GPASID_VAL) { - svm->gpasid = data->gpasid; - svm->flags |= SVM_FLAG_GUEST_PASID; - } - pasid_private_add(data->hpasid, svm); - INIT_LIST_HEAD_RCU(&svm->devs); - mmput(svm->mm); - } - sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); - if (!sdev) { - ret = -ENOMEM; - goto out; - } - sdev->dev = dev; - sdev->sid = PCI_DEVID(info->bus, info->devfn); - sdev->iommu = iommu; - - /* Only count users if device has aux domains */ - if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX)) - sdev->users = 1; - - /* Set up device context entry for PASID if not enabled already */ - ret = intel_iommu_enable_pasid(iommu, sdev->dev); - if (ret) { - dev_err_ratelimited(dev, "Failed to enable PASID capability\n"); - kfree(sdev); - goto out; - } - - /* - * PASID table is per device for better security. Therefore, for - * each bind of a new device even with an existing PASID, we need to - * call the nested mode setup function here. - */ - spin_lock_irqsave(&iommu->lock, iflags); - ret = intel_pasid_setup_nested(iommu, dev, - (pgd_t *)(uintptr_t)data->gpgd, - data->hpasid, &data->vendor.vtd, dmar_domain, - data->addr_width); - spin_unlock_irqrestore(&iommu->lock, iflags); - if (ret) { - dev_err_ratelimited(dev, "Failed to set up PASID %llu in nested mode, Err %d\n", - data->hpasid, ret); - /* - * PASID entry should be in cleared state if nested mode - * set up failed. So we only need to clear IOASID tracking - * data such that free call will succeed. - */ - kfree(sdev); - goto out; - } - - svm->flags |= SVM_FLAG_GUEST_MODE; - - init_rcu_head(&sdev->rcu); - list_add_rcu(&sdev->list, &svm->devs); - out: - if (!IS_ERR_OR_NULL(svm) && list_empty(&svm->devs)) { - pasid_private_remove(data->hpasid); - kfree(svm); - } - - mutex_unlock(&pasid_mutex); - return ret; -} - -int intel_svm_unbind_gpasid(struct device *dev, u32 pasid) -{ - struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); - struct intel_svm_dev *sdev; - struct intel_svm *svm; - int ret; - - if (WARN_ON(!iommu)) - return -EINVAL; - - mutex_lock(&pasid_mutex); - ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev); - if (ret) - goto out; - - if (sdev) { - if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX)) - sdev->users--; - if (!sdev->users) { - list_del_rcu(&sdev->list); - intel_pasid_tear_down_entry(iommu, dev, - svm->pasid, false); - intel_svm_drain_prq(dev, svm->pasid); - kfree_rcu(sdev, rcu); - - if (list_empty(&svm->devs)) { - /* - * We do not free the IOASID here in that - * IOMMU driver did not allocate it. - * Unlike native SVM, IOASID for guest use was - * allocated prior to the bind call. - * In any case, if the free call comes before - * the unbind, IOMMU driver will get notified - * and perform cleanup. - */ - pasid_private_remove(pasid); - kfree(svm); - } - } - } -out: - mutex_unlock(&pasid_mutex); - return ret; -} - static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm, unsigned int flags) { @@ -1125,28 +938,6 @@ int intel_svm_page_response(struct device *dev, goto out; } - /* - * For responses from userspace, need to make sure that the - * pasid has been bound to its mm. - */ - if (svm->flags & SVM_FLAG_GUEST_MODE) { - struct mm_struct *mm; - - mm = get_task_mm(current); - if (!mm) { - ret = -EINVAL; - goto out; - } - - if (mm != svm->mm) { - ret = -ENODEV; - mmput(mm); - goto out; - } - - mmput(mm); - } - /* * Per VT-d spec. v3.0 ch7.7, system software must respond * with page group response if private data is present (PDP) diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 69230fd695ea..beaacb589abd 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -525,12 +525,6 @@ struct context_entry { */ #define DOMAIN_FLAG_USE_FIRST_LEVEL BIT(1) -/* - * Domain represents a virtual machine which demands iommu nested - * translation mode support. - */ -#define DOMAIN_FLAG_NESTING_MODE BIT(2) - struct dmar_domain { int nid; /* node id */ @@ -765,9 +759,6 @@ struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); extern void intel_svm_check(struct intel_iommu *iommu); extern int intel_svm_enable_prq(struct intel_iommu *iommu); extern int intel_svm_finish_prq(struct intel_iommu *iommu); -int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, - struct iommu_gpasid_bind_data *data); -int intel_svm_unbind_gpasid(struct device *dev, u32 pasid); struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata); void intel_svm_unbind(struct iommu_sva *handle); @@ -795,7 +786,6 @@ struct intel_svm { unsigned int flags; u32 pasid; - int gpasid; /* In case that guest PASID is different from host PASID */ struct list_head devs; }; #else diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 1b73bab7eeff..b3b125b332aa 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -25,17 +25,5 @@ * do such IOTLB flushes automatically. */ #define SVM_FLAG_SUPERVISOR_MODE BIT(0) -/* - * The SVM_FLAG_GUEST_MODE flag is used when a PASID bind is for guest - * processes. Compared to the host bind, the primary differences are: - * 1. mm life cycle management - * 2. fault reporting - */ -#define SVM_FLAG_GUEST_MODE BIT(1) -/* - * The SVM_FLAG_GUEST_PASID flag is used when a guest has its own PASID space, - * which requires guest and host PASID translation at both directions. - */ -#define SVM_FLAG_GUEST_PASID BIT(2) #endif /* __INTEL_SVM_H__ */ From 0c9f178778919bb500443f340647b44227778fb2 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 16 Feb 2022 10:52:42 +0800 Subject: [PATCH 141/380] iommu: Remove guest pasid related interfaces and definitions The guest pasid related uapi interfaces and definitions are not referenced anywhere in the tree. We've also reached a consensus to replace them with a new iommufd design. Remove them to avoid dead code. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220216025249.3459465-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 210 ------------------------------------- include/linux/iommu.h | 44 -------- include/uapi/linux/iommu.h | 181 -------------------------------- 3 files changed, 435 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 107dcf5938d6..3cbf4781e5bd 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2031,216 +2031,6 @@ int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) return 0; } -/* - * Check flags and other user provided data for valid combinations. We also - * make sure no reserved fields or unused flags are set. This is to ensure - * not breaking userspace in the future when these fields or flags are used. - */ -static int iommu_check_cache_invl_data(struct iommu_cache_invalidate_info *info) -{ - u32 mask; - int i; - - if (info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1) - return -EINVAL; - - mask = (1 << IOMMU_CACHE_INV_TYPE_NR) - 1; - if (info->cache & ~mask) - return -EINVAL; - - if (info->granularity >= IOMMU_INV_GRANU_NR) - return -EINVAL; - - switch (info->granularity) { - case IOMMU_INV_GRANU_ADDR: - if (info->cache & IOMMU_CACHE_INV_TYPE_PASID) - return -EINVAL; - - mask = IOMMU_INV_ADDR_FLAGS_PASID | - IOMMU_INV_ADDR_FLAGS_ARCHID | - IOMMU_INV_ADDR_FLAGS_LEAF; - - if (info->granu.addr_info.flags & ~mask) - return -EINVAL; - break; - case IOMMU_INV_GRANU_PASID: - mask = IOMMU_INV_PASID_FLAGS_PASID | - IOMMU_INV_PASID_FLAGS_ARCHID; - if (info->granu.pasid_info.flags & ~mask) - return -EINVAL; - - break; - case IOMMU_INV_GRANU_DOMAIN: - if (info->cache & IOMMU_CACHE_INV_TYPE_DEV_IOTLB) - return -EINVAL; - break; - default: - return -EINVAL; - } - - /* Check reserved padding fields */ - for (i = 0; i < sizeof(info->padding); i++) { - if (info->padding[i]) - return -EINVAL; - } - - return 0; -} - -int iommu_uapi_cache_invalidate(struct iommu_domain *domain, struct device *dev, - void __user *uinfo) -{ - struct iommu_cache_invalidate_info inv_info = { 0 }; - u32 minsz; - int ret; - - if (unlikely(!domain->ops->cache_invalidate)) - return -ENODEV; - - /* - * No new spaces can be added before the variable sized union, the - * minimum size is the offset to the union. - */ - minsz = offsetof(struct iommu_cache_invalidate_info, granu); - - /* Copy minsz from user to get flags and argsz */ - if (copy_from_user(&inv_info, uinfo, minsz)) - return -EFAULT; - - /* Fields before the variable size union are mandatory */ - if (inv_info.argsz < minsz) - return -EINVAL; - - /* PASID and address granu require additional info beyond minsz */ - if (inv_info.granularity == IOMMU_INV_GRANU_PASID && - inv_info.argsz < offsetofend(struct iommu_cache_invalidate_info, granu.pasid_info)) - return -EINVAL; - - if (inv_info.granularity == IOMMU_INV_GRANU_ADDR && - inv_info.argsz < offsetofend(struct iommu_cache_invalidate_info, granu.addr_info)) - return -EINVAL; - - /* - * User might be using a newer UAPI header which has a larger data - * size, we shall support the existing flags within the current - * size. Copy the remaining user data _after_ minsz but not more - * than the current kernel supported size. - */ - if (copy_from_user((void *)&inv_info + minsz, uinfo + minsz, - min_t(u32, inv_info.argsz, sizeof(inv_info)) - minsz)) - return -EFAULT; - - /* Now the argsz is validated, check the content */ - ret = iommu_check_cache_invl_data(&inv_info); - if (ret) - return ret; - - return domain->ops->cache_invalidate(domain, dev, &inv_info); -} -EXPORT_SYMBOL_GPL(iommu_uapi_cache_invalidate); - -static int iommu_check_bind_data(struct iommu_gpasid_bind_data *data) -{ - u64 mask; - int i; - - if (data->version != IOMMU_GPASID_BIND_VERSION_1) - return -EINVAL; - - /* Check the range of supported formats */ - if (data->format >= IOMMU_PASID_FORMAT_LAST) - return -EINVAL; - - /* Check all flags */ - mask = IOMMU_SVA_GPASID_VAL; - if (data->flags & ~mask) - return -EINVAL; - - /* Check reserved padding fields */ - for (i = 0; i < sizeof(data->padding); i++) { - if (data->padding[i]) - return -EINVAL; - } - - return 0; -} - -static int iommu_sva_prepare_bind_data(void __user *udata, - struct iommu_gpasid_bind_data *data) -{ - u32 minsz; - - /* - * No new spaces can be added before the variable sized union, the - * minimum size is the offset to the union. - */ - minsz = offsetof(struct iommu_gpasid_bind_data, vendor); - - /* Copy minsz from user to get flags and argsz */ - if (copy_from_user(data, udata, minsz)) - return -EFAULT; - - /* Fields before the variable size union are mandatory */ - if (data->argsz < minsz) - return -EINVAL; - /* - * User might be using a newer UAPI header, we shall let IOMMU vendor - * driver decide on what size it needs. Since the guest PASID bind data - * can be vendor specific, larger argsz could be the result of extension - * for one vendor but it should not affect another vendor. - * Copy the remaining user data _after_ minsz - */ - if (copy_from_user((void *)data + minsz, udata + minsz, - min_t(u32, data->argsz, sizeof(*data)) - minsz)) - return -EFAULT; - - return iommu_check_bind_data(data); -} - -int iommu_uapi_sva_bind_gpasid(struct iommu_domain *domain, struct device *dev, - void __user *udata) -{ - struct iommu_gpasid_bind_data data = { 0 }; - int ret; - - if (unlikely(!domain->ops->sva_bind_gpasid)) - return -ENODEV; - - ret = iommu_sva_prepare_bind_data(udata, &data); - if (ret) - return ret; - - return domain->ops->sva_bind_gpasid(domain, dev, &data); -} -EXPORT_SYMBOL_GPL(iommu_uapi_sva_bind_gpasid); - -int iommu_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev, - ioasid_t pasid) -{ - if (unlikely(!domain->ops->sva_unbind_gpasid)) - return -ENODEV; - - return domain->ops->sva_unbind_gpasid(dev, pasid); -} -EXPORT_SYMBOL_GPL(iommu_sva_unbind_gpasid); - -int iommu_uapi_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev, - void __user *udata) -{ - struct iommu_gpasid_bind_data data = { 0 }; - int ret; - - if (unlikely(!domain->ops->sva_bind_gpasid)) - return -ENODEV; - - ret = iommu_sva_prepare_bind_data(udata, &data); - if (ret) - return ret; - - return iommu_sva_unbind_gpasid(domain, dev, data.hpasid); -} -EXPORT_SYMBOL_GPL(iommu_uapi_sva_unbind_gpasid); - static void __iommu_detach_device(struct iommu_domain *domain, struct device *dev) { diff --git a/include/linux/iommu.h b/include/linux/iommu.h index de0c57a567c8..cde1d0aad60f 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -229,9 +229,6 @@ struct iommu_iotlb_gather { * @sva_unbind: Unbind process address space from device * @sva_get_pasid: Get PASID associated to a SVA handle * @page_response: handle page request response - * @cache_invalidate: invalidate translation caches - * @sva_bind_gpasid: bind guest pasid and mm - * @sva_unbind_gpasid: unbind guest pasid and mm * @def_domain_type: device default domain type, return value: * - IOMMU_DOMAIN_IDENTITY: must use an identity domain * - IOMMU_DOMAIN_DMA: must use a dma domain @@ -301,12 +298,6 @@ struct iommu_ops { int (*page_response)(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg); - int (*cache_invalidate)(struct iommu_domain *domain, struct device *dev, - struct iommu_cache_invalidate_info *inv_info); - int (*sva_bind_gpasid)(struct iommu_domain *domain, - struct device *dev, struct iommu_gpasid_bind_data *data); - - int (*sva_unbind_gpasid)(struct device *dev, u32 pasid); int (*def_domain_type)(struct device *dev); @@ -421,14 +412,6 @@ extern int iommu_attach_device(struct iommu_domain *domain, struct device *dev); extern void iommu_detach_device(struct iommu_domain *domain, struct device *dev); -extern int iommu_uapi_cache_invalidate(struct iommu_domain *domain, - struct device *dev, - void __user *uinfo); - -extern int iommu_uapi_sva_bind_gpasid(struct iommu_domain *domain, - struct device *dev, void __user *udata); -extern int iommu_uapi_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, void __user *udata); extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); @@ -1051,33 +1034,6 @@ static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle) return IOMMU_PASID_INVALID; } -static inline int -iommu_uapi_cache_invalidate(struct iommu_domain *domain, - struct device *dev, - struct iommu_cache_invalidate_info *inv_info) -{ - return -ENODEV; -} - -static inline int iommu_uapi_sva_bind_gpasid(struct iommu_domain *domain, - struct device *dev, void __user *udata) -{ - return -ENODEV; -} - -static inline int iommu_uapi_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, void __user *udata) -{ - return -ENODEV; -} - -static inline int iommu_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, - ioasid_t pasid) -{ - return -ENODEV; -} - static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) { return NULL; diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h index 59178fc229ca..65d8b0234f69 100644 --- a/include/uapi/linux/iommu.h +++ b/include/uapi/linux/iommu.h @@ -158,185 +158,4 @@ struct iommu_page_response { __u32 code; }; -/* defines the granularity of the invalidation */ -enum iommu_inv_granularity { - IOMMU_INV_GRANU_DOMAIN, /* domain-selective invalidation */ - IOMMU_INV_GRANU_PASID, /* PASID-selective invalidation */ - IOMMU_INV_GRANU_ADDR, /* page-selective invalidation */ - IOMMU_INV_GRANU_NR, /* number of invalidation granularities */ -}; - -/** - * struct iommu_inv_addr_info - Address Selective Invalidation Structure - * - * @flags: indicates the granularity of the address-selective invalidation - * - If the PASID bit is set, the @pasid field is populated and the invalidation - * relates to cache entries tagged with this PASID and matching the address - * range. - * - If ARCHID bit is set, @archid is populated and the invalidation relates - * to cache entries tagged with this architecture specific ID and matching - * the address range. - * - Both PASID and ARCHID can be set as they may tag different caches. - * - If neither PASID or ARCHID is set, global addr invalidation applies. - * - The LEAF flag indicates whether only the leaf PTE caching needs to be - * invalidated and other paging structure caches can be preserved. - * @pasid: process address space ID - * @archid: architecture-specific ID - * @addr: first stage/level input address - * @granule_size: page/block size of the mapping in bytes - * @nb_granules: number of contiguous granules to be invalidated - */ -struct iommu_inv_addr_info { -#define IOMMU_INV_ADDR_FLAGS_PASID (1 << 0) -#define IOMMU_INV_ADDR_FLAGS_ARCHID (1 << 1) -#define IOMMU_INV_ADDR_FLAGS_LEAF (1 << 2) - __u32 flags; - __u32 archid; - __u64 pasid; - __u64 addr; - __u64 granule_size; - __u64 nb_granules; -}; - -/** - * struct iommu_inv_pasid_info - PASID Selective Invalidation Structure - * - * @flags: indicates the granularity of the PASID-selective invalidation - * - If the PASID bit is set, the @pasid field is populated and the invalidation - * relates to cache entries tagged with this PASID and matching the address - * range. - * - If the ARCHID bit is set, the @archid is populated and the invalidation - * relates to cache entries tagged with this architecture specific ID and - * matching the address range. - * - Both PASID and ARCHID can be set as they may tag different caches. - * - At least one of PASID or ARCHID must be set. - * @pasid: process address space ID - * @archid: architecture-specific ID - */ -struct iommu_inv_pasid_info { -#define IOMMU_INV_PASID_FLAGS_PASID (1 << 0) -#define IOMMU_INV_PASID_FLAGS_ARCHID (1 << 1) - __u32 flags; - __u32 archid; - __u64 pasid; -}; - -/** - * struct iommu_cache_invalidate_info - First level/stage invalidation - * information - * @argsz: User filled size of this data - * @version: API version of this structure - * @cache: bitfield that allows to select which caches to invalidate - * @granularity: defines the lowest granularity used for the invalidation: - * domain > PASID > addr - * @padding: reserved for future use (should be zero) - * @pasid_info: invalidation data when @granularity is %IOMMU_INV_GRANU_PASID - * @addr_info: invalidation data when @granularity is %IOMMU_INV_GRANU_ADDR - * - * Not all the combinations of cache/granularity are valid: - * - * +--------------+---------------+---------------+---------------+ - * | type / | DEV_IOTLB | IOTLB | PASID | - * | granularity | | | cache | - * +==============+===============+===============+===============+ - * | DOMAIN | N/A | Y | Y | - * +--------------+---------------+---------------+---------------+ - * | PASID | Y | Y | Y | - * +--------------+---------------+---------------+---------------+ - * | ADDR | Y | Y | N/A | - * +--------------+---------------+---------------+---------------+ - * - * Invalidations by %IOMMU_INV_GRANU_DOMAIN don't take any argument other than - * @version and @cache. - * - * If multiple cache types are invalidated simultaneously, they all - * must support the used granularity. - */ -struct iommu_cache_invalidate_info { - __u32 argsz; -#define IOMMU_CACHE_INVALIDATE_INFO_VERSION_1 1 - __u32 version; -/* IOMMU paging structure cache */ -#define IOMMU_CACHE_INV_TYPE_IOTLB (1 << 0) /* IOMMU IOTLB */ -#define IOMMU_CACHE_INV_TYPE_DEV_IOTLB (1 << 1) /* Device IOTLB */ -#define IOMMU_CACHE_INV_TYPE_PASID (1 << 2) /* PASID cache */ -#define IOMMU_CACHE_INV_TYPE_NR (3) - __u8 cache; - __u8 granularity; - __u8 padding[6]; - union { - struct iommu_inv_pasid_info pasid_info; - struct iommu_inv_addr_info addr_info; - } granu; -}; - -/** - * struct iommu_gpasid_bind_data_vtd - Intel VT-d specific data on device and guest - * SVA binding. - * - * @flags: VT-d PASID table entry attributes - * @pat: Page attribute table data to compute effective memory type - * @emt: Extended memory type - * - * Only guest vIOMMU selectable and effective options are passed down to - * the host IOMMU. - */ -struct iommu_gpasid_bind_data_vtd { -#define IOMMU_SVA_VTD_GPASID_SRE (1 << 0) /* supervisor request */ -#define IOMMU_SVA_VTD_GPASID_EAFE (1 << 1) /* extended access enable */ -#define IOMMU_SVA_VTD_GPASID_PCD (1 << 2) /* page-level cache disable */ -#define IOMMU_SVA_VTD_GPASID_PWT (1 << 3) /* page-level write through */ -#define IOMMU_SVA_VTD_GPASID_EMTE (1 << 4) /* extended mem type enable */ -#define IOMMU_SVA_VTD_GPASID_CD (1 << 5) /* PASID-level cache disable */ -#define IOMMU_SVA_VTD_GPASID_WPE (1 << 6) /* Write protect enable */ -#define IOMMU_SVA_VTD_GPASID_LAST (1 << 7) - __u64 flags; - __u32 pat; - __u32 emt; -}; - -#define IOMMU_SVA_VTD_GPASID_MTS_MASK (IOMMU_SVA_VTD_GPASID_CD | \ - IOMMU_SVA_VTD_GPASID_EMTE | \ - IOMMU_SVA_VTD_GPASID_PCD | \ - IOMMU_SVA_VTD_GPASID_PWT) - -/** - * struct iommu_gpasid_bind_data - Information about device and guest PASID binding - * @argsz: User filled size of this data - * @version: Version of this data structure - * @format: PASID table entry format - * @flags: Additional information on guest bind request - * @gpgd: Guest page directory base of the guest mm to bind - * @hpasid: Process address space ID used for the guest mm in host IOMMU - * @gpasid: Process address space ID used for the guest mm in guest IOMMU - * @addr_width: Guest virtual address width - * @padding: Reserved for future use (should be zero) - * @vtd: Intel VT-d specific data - * - * Guest to host PASID mapping can be an identity or non-identity, where guest - * has its own PASID space. For non-identify mapping, guest to host PASID lookup - * is needed when VM programs guest PASID into an assigned device. VMM may - * trap such PASID programming then request host IOMMU driver to convert guest - * PASID to host PASID based on this bind data. - */ -struct iommu_gpasid_bind_data { - __u32 argsz; -#define IOMMU_GPASID_BIND_VERSION_1 1 - __u32 version; -#define IOMMU_PASID_FORMAT_INTEL_VTD 1 -#define IOMMU_PASID_FORMAT_LAST 2 - __u32 format; - __u32 addr_width; -#define IOMMU_SVA_GPASID_VAL (1 << 0) /* guest PASID valid */ - __u64 flags; - __u64 gpgd; - __u64 hpasid; - __u64 gpasid; - __u8 padding[8]; - /* Vendor specific data */ - union { - struct iommu_gpasid_bind_data_vtd vtd; - } vendor; -}; - #endif /* _UAPI_IOMMU_H */ From 241469685d8d54075f12a6f3eb0628f85e13ff37 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 16 Feb 2022 10:52:43 +0800 Subject: [PATCH 142/380] iommu/vt-d: Remove aux-domain related callbacks The aux-domain related callbacks are not called in the tree. Remove them to avoid dead code. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220216025249.3459465-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/debugfs.c | 3 +- drivers/iommu/intel/iommu.c | 309 +--------------------------------- include/linux/intel-iommu.h | 17 -- 3 files changed, 3 insertions(+), 326 deletions(-) diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c index 62e23ff3c987..db7a0ca73626 100644 --- a/drivers/iommu/intel/debugfs.c +++ b/drivers/iommu/intel/debugfs.c @@ -351,8 +351,7 @@ static int show_device_domain_translation(struct device *dev, void *data) if (!domain) return 0; - seq_printf(m, "Device %s with pasid %d @0x%llx\n", - dev_name(dev), domain->default_pasid, + seq_printf(m, "Device %s @0x%llx\n", dev_name(dev), (u64)virt_to_phys(domain->pgd)); seq_puts(m, "IOVA_PFN\t\tPML5E\t\t\tPML4E\t\t\tPDPE\t\t\tPDE\t\t\tPTE\n"); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 4f9d95067c8f..2b5f4e57a8bb 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1573,18 +1573,6 @@ static void domain_update_iotlb(struct dmar_domain *domain) break; } - if (!has_iotlb_device) { - struct subdev_domain_info *sinfo; - - list_for_each_entry(sinfo, &domain->subdevices, link_domain) { - info = get_domain_info(sinfo->pdev); - if (info && info->ats_enabled) { - has_iotlb_device = true; - break; - } - } - } - domain->has_iotlb_device = has_iotlb_device; } @@ -1682,7 +1670,6 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain, { unsigned long flags; struct device_domain_info *info; - struct subdev_domain_info *sinfo; if (!domain->has_iotlb_device) return; @@ -1691,27 +1678,9 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain, list_for_each_entry(info, &domain->devices, link) __iommu_flush_dev_iotlb(info, addr, mask); - list_for_each_entry(sinfo, &domain->subdevices, link_domain) { - info = get_domain_info(sinfo->pdev); - __iommu_flush_dev_iotlb(info, addr, mask); - } spin_unlock_irqrestore(&device_domain_lock, flags); } -static void domain_flush_piotlb(struct intel_iommu *iommu, - struct dmar_domain *domain, - u64 addr, unsigned long npages, bool ih) -{ - u16 did = domain->iommu_did[iommu->seq_id]; - - if (domain->default_pasid) - qi_flush_piotlb(iommu, did, domain->default_pasid, - addr, npages, ih); - - if (!list_empty(&domain->devices)) - qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, npages, ih); -} - static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, struct dmar_domain *domain, unsigned long pfn, unsigned int pages, @@ -1727,7 +1696,7 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, ih = 1 << 6; if (domain_use_first_level(domain)) { - domain_flush_piotlb(iommu, domain, addr, pages, ih); + qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, pages, ih); } else { /* * Fallback to domain selective flush if no PSI support or @@ -1776,7 +1745,7 @@ static void intel_flush_iotlb_all(struct iommu_domain *domain) u16 did = dmar_domain->iommu_did[iommu->seq_id]; if (domain_use_first_level(dmar_domain)) - domain_flush_piotlb(iommu, dmar_domain, 0, -1, 0); + qi_flush_piotlb(iommu, did, PASID_RID2PASID, 0, -1, 0); else iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); @@ -1983,7 +1952,6 @@ static struct dmar_domain *alloc_domain(unsigned int type) domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL; domain->has_iotlb_device = false; INIT_LIST_HEAD(&domain->devices); - INIT_LIST_HEAD(&domain->subdevices); return domain; } @@ -2676,8 +2644,6 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, info->domain = domain; info->iommu = iommu; info->pasid_table = NULL; - info->auxd_enabled = 0; - INIT_LIST_HEAD(&info->subdevices); if (dev && dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(info->dev); @@ -4637,183 +4603,6 @@ static void intel_iommu_domain_free(struct iommu_domain *domain) domain_exit(to_dmar_domain(domain)); } -/* - * Check whether a @domain could be attached to the @dev through the - * aux-domain attach/detach APIs. - */ -static inline bool -is_aux_domain(struct device *dev, struct iommu_domain *domain) -{ - struct device_domain_info *info = get_domain_info(dev); - - return info && info->auxd_enabled && - domain->type == IOMMU_DOMAIN_UNMANAGED; -} - -static inline struct subdev_domain_info * -lookup_subdev_info(struct dmar_domain *domain, struct device *dev) -{ - struct subdev_domain_info *sinfo; - - if (!list_empty(&domain->subdevices)) { - list_for_each_entry(sinfo, &domain->subdevices, link_domain) { - if (sinfo->pdev == dev) - return sinfo; - } - } - - return NULL; -} - -static int auxiliary_link_device(struct dmar_domain *domain, - struct device *dev) -{ - struct device_domain_info *info = get_domain_info(dev); - struct subdev_domain_info *sinfo = lookup_subdev_info(domain, dev); - - assert_spin_locked(&device_domain_lock); - if (WARN_ON(!info)) - return -EINVAL; - - if (!sinfo) { - sinfo = kzalloc(sizeof(*sinfo), GFP_ATOMIC); - if (!sinfo) - return -ENOMEM; - sinfo->domain = domain; - sinfo->pdev = dev; - list_add(&sinfo->link_phys, &info->subdevices); - list_add(&sinfo->link_domain, &domain->subdevices); - } - - return ++sinfo->users; -} - -static int auxiliary_unlink_device(struct dmar_domain *domain, - struct device *dev) -{ - struct device_domain_info *info = get_domain_info(dev); - struct subdev_domain_info *sinfo = lookup_subdev_info(domain, dev); - int ret; - - assert_spin_locked(&device_domain_lock); - if (WARN_ON(!info || !sinfo || sinfo->users <= 0)) - return -EINVAL; - - ret = --sinfo->users; - if (!ret) { - list_del(&sinfo->link_phys); - list_del(&sinfo->link_domain); - kfree(sinfo); - } - - return ret; -} - -static int aux_domain_add_dev(struct dmar_domain *domain, - struct device *dev) -{ - int ret; - unsigned long flags; - struct intel_iommu *iommu; - - iommu = device_to_iommu(dev, NULL, NULL); - if (!iommu) - return -ENODEV; - - if (domain->default_pasid <= 0) { - u32 pasid; - - /* No private data needed for the default pasid */ - pasid = ioasid_alloc(NULL, PASID_MIN, - pci_max_pasids(to_pci_dev(dev)) - 1, - NULL); - if (pasid == INVALID_IOASID) { - pr_err("Can't allocate default pasid\n"); - return -ENODEV; - } - domain->default_pasid = pasid; - } - - spin_lock_irqsave(&device_domain_lock, flags); - ret = auxiliary_link_device(domain, dev); - if (ret <= 0) - goto link_failed; - - /* - * Subdevices from the same physical device can be attached to the - * same domain. For such cases, only the first subdevice attachment - * needs to go through the full steps in this function. So if ret > - * 1, just goto out. - */ - if (ret > 1) - goto out; - - /* - * iommu->lock must be held to attach domain to iommu and setup the - * pasid entry for second level translation. - */ - spin_lock(&iommu->lock); - ret = domain_attach_iommu(domain, iommu); - if (ret) - goto attach_failed; - - /* Setup the PASID entry for mediated devices: */ - if (domain_use_first_level(domain)) - ret = domain_setup_first_level(iommu, domain, dev, - domain->default_pasid); - else - ret = intel_pasid_setup_second_level(iommu, domain, dev, - domain->default_pasid); - if (ret) - goto table_failed; - - spin_unlock(&iommu->lock); -out: - spin_unlock_irqrestore(&device_domain_lock, flags); - - return 0; - -table_failed: - domain_detach_iommu(domain, iommu); -attach_failed: - spin_unlock(&iommu->lock); - auxiliary_unlink_device(domain, dev); -link_failed: - spin_unlock_irqrestore(&device_domain_lock, flags); - if (list_empty(&domain->subdevices) && domain->default_pasid > 0) - ioasid_put(domain->default_pasid); - - return ret; -} - -static void aux_domain_remove_dev(struct dmar_domain *domain, - struct device *dev) -{ - struct device_domain_info *info; - struct intel_iommu *iommu; - unsigned long flags; - - if (!is_aux_domain(dev, &domain->domain)) - return; - - spin_lock_irqsave(&device_domain_lock, flags); - info = get_domain_info(dev); - iommu = info->iommu; - - if (!auxiliary_unlink_device(domain, dev)) { - spin_lock(&iommu->lock); - intel_pasid_tear_down_entry(iommu, dev, - domain->default_pasid, false); - domain_detach_iommu(domain, iommu); - spin_unlock(&iommu->lock); - } - - spin_unlock_irqrestore(&device_domain_lock, flags); - - if (list_empty(&domain->subdevices) && domain->default_pasid > 0) - ioasid_put(domain->default_pasid); -} - static int prepare_domain_attach_device(struct iommu_domain *domain, struct device *dev) { @@ -4866,9 +4655,6 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, return -EPERM; } - if (is_aux_domain(dev, domain)) - return -EPERM; - /* normally dev is not mapped */ if (unlikely(domain_context_mapped(dev))) { struct dmar_domain *old_domain; @@ -4885,33 +4671,12 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, return domain_add_dev_info(to_dmar_domain(domain), dev); } -static int intel_iommu_aux_attach_device(struct iommu_domain *domain, - struct device *dev) -{ - int ret; - - if (!is_aux_domain(dev, domain)) - return -EPERM; - - ret = prepare_domain_attach_device(domain, dev); - if (ret) - return ret; - - return aux_domain_add_dev(to_dmar_domain(domain), dev); -} - static void intel_iommu_detach_device(struct iommu_domain *domain, struct device *dev) { dmar_remove_one_dev_info(dev); } -static void intel_iommu_aux_detach_device(struct iommu_domain *domain, - struct device *dev) -{ - aux_domain_remove_dev(to_dmar_domain(domain), dev); -} - static int intel_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t hpa, size_t size, int iommu_prot, gfp_t gfp) @@ -5205,46 +4970,6 @@ static struct iommu_group *intel_iommu_device_group(struct device *dev) return generic_device_group(dev); } -static int intel_iommu_enable_auxd(struct device *dev) -{ - struct device_domain_info *info; - struct intel_iommu *iommu; - unsigned long flags; - int ret; - - iommu = device_to_iommu(dev, NULL, NULL); - if (!iommu || dmar_disabled) - return -EINVAL; - - if (!sm_supported(iommu) || !pasid_supported(iommu)) - return -EINVAL; - - ret = intel_iommu_enable_pasid(iommu, dev); - if (ret) - return -ENODEV; - - spin_lock_irqsave(&device_domain_lock, flags); - info = get_domain_info(dev); - info->auxd_enabled = 1; - spin_unlock_irqrestore(&device_domain_lock, flags); - - return 0; -} - -static int intel_iommu_disable_auxd(struct device *dev) -{ - struct device_domain_info *info; - unsigned long flags; - - spin_lock_irqsave(&device_domain_lock, flags); - info = get_domain_info(dev); - if (!WARN_ON(!info)) - info->auxd_enabled = 0; - spin_unlock_irqrestore(&device_domain_lock, flags); - - return 0; -} - static int intel_iommu_enable_sva(struct device *dev) { struct device_domain_info *info = get_domain_info(dev); @@ -5301,9 +5026,6 @@ static int intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) { switch (feat) { - case IOMMU_DEV_FEAT_AUX: - return intel_iommu_enable_auxd(dev); - case IOMMU_DEV_FEAT_IOPF: return intel_iommu_enable_iopf(dev); @@ -5319,9 +5041,6 @@ static int intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) { switch (feat) { - case IOMMU_DEV_FEAT_AUX: - return intel_iommu_disable_auxd(dev); - case IOMMU_DEV_FEAT_IOPF: return 0; @@ -5333,26 +5052,6 @@ intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) } } -static bool -intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat) -{ - struct device_domain_info *info = get_domain_info(dev); - - if (feat == IOMMU_DEV_FEAT_AUX) - return scalable_mode_support() && info && info->auxd_enabled; - - return false; -} - -static int -intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) -{ - struct dmar_domain *dmar_domain = to_dmar_domain(domain); - - return dmar_domain->default_pasid > 0 ? - dmar_domain->default_pasid : -EINVAL; -} - static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain, struct device *dev) { @@ -5397,9 +5096,6 @@ const struct iommu_ops intel_iommu_ops = { .domain_free = intel_iommu_domain_free, .attach_dev = intel_iommu_attach_device, .detach_dev = intel_iommu_detach_device, - .aux_attach_dev = intel_iommu_aux_attach_device, - .aux_detach_dev = intel_iommu_aux_detach_device, - .aux_get_pasid = intel_iommu_aux_get_pasid, .map_pages = intel_iommu_map_pages, .unmap_pages = intel_iommu_unmap_pages, .iotlb_sync_map = intel_iommu_iotlb_sync_map, @@ -5412,7 +5108,6 @@ const struct iommu_ops intel_iommu_ops = { .get_resv_regions = intel_iommu_get_resv_regions, .put_resv_regions = generic_iommu_put_resv_regions, .device_group = intel_iommu_device_group, - .dev_feat_enabled = intel_iommu_dev_feat_enabled, .dev_enable_feat = intel_iommu_dev_enable_feat, .dev_disable_feat = intel_iommu_dev_disable_feat, .is_attach_deferred = intel_iommu_is_attach_deferred, diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index beaacb589abd..5cfda90b2cca 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -542,7 +542,6 @@ struct dmar_domain { u8 iommu_snooping: 1; /* indicate snooping control feature */ struct list_head devices; /* all devices' list */ - struct list_head subdevices; /* all subdevices' list */ struct iova_domain iovad; /* iova's that belong to this domain */ struct dma_pte *pgd; /* virtual address */ @@ -557,11 +556,6 @@ struct dmar_domain { 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ u64 max_addr; /* maximum mapped address */ - u32 default_pasid; /* - * The default pasid used for non-SVM - * traffic on mediated devices. - */ - struct iommu_domain domain; /* generic domain data structure for iommu core */ }; @@ -614,21 +608,11 @@ struct intel_iommu { void *perf_statistic; }; -/* Per subdevice private data */ -struct subdev_domain_info { - struct list_head link_phys; /* link to phys device siblings */ - struct list_head link_domain; /* link to domain siblings */ - struct device *pdev; /* physical device derived from */ - struct dmar_domain *domain; /* aux-domain */ - int users; /* user count */ -}; - /* PCI domain-device relationship */ struct device_domain_info { struct list_head link; /* link to domain siblings */ struct list_head global; /* link to global list */ struct list_head table; /* link to pasid table */ - struct list_head subdevices; /* subdevices sibling */ u32 segment; /* PCI segment number */ u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ @@ -639,7 +623,6 @@ struct device_domain_info { u8 pri_enabled:1; u8 ats_supported:1; u8 ats_enabled:1; - u8 auxd_enabled:1; /* Multiple domains per device */ u8 ats_qdep; struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ struct intel_iommu *iommu; /* IOMMU used by this device */ From 8652d875939b6a1fe6d5c93cf4fb91df61cda5a7 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 16 Feb 2022 10:52:44 +0800 Subject: [PATCH 143/380] iommu: Remove aux-domain related interfaces and iommu_ops The aux-domain related interfaces and iommu_ops are not referenced anywhere in the tree. We've also reached a consensus to redesign it based the new iommufd framework. Remove them to avoid dead code. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220216025249.3459465-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 46 ------------------------------------------- include/linux/iommu.h | 29 --------------------------- 2 files changed, 75 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3cbf4781e5bd..0ebaf561a70e 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2749,8 +2749,6 @@ EXPORT_SYMBOL_GPL(iommu_dev_enable_feature); /* * The device drivers should do the necessary cleanups before calling this. - * For example, before disabling the aux-domain feature, the device driver - * should detach all aux-domains. Otherwise, this will return -EBUSY. */ int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) { @@ -2778,50 +2776,6 @@ bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat) } EXPORT_SYMBOL_GPL(iommu_dev_feature_enabled); -/* - * Aux-domain specific attach/detach. - * - * Only works if iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX) returns - * true. Also, as long as domains are attached to a device through this - * interface, any tries to call iommu_attach_device() should fail - * (iommu_detach_device() can't fail, so we fail when trying to re-attach). - * This should make us safe against a device being attached to a guest as a - * whole while there are still pasid users on it (aux and sva). - */ -int iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev) -{ - int ret = -ENODEV; - - if (domain->ops->aux_attach_dev) - ret = domain->ops->aux_attach_dev(domain, dev); - - if (!ret) - trace_attach_device_to_domain(dev); - - return ret; -} -EXPORT_SYMBOL_GPL(iommu_aux_attach_device); - -void iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev) -{ - if (domain->ops->aux_detach_dev) { - domain->ops->aux_detach_dev(domain, dev); - trace_detach_device_from_domain(dev); - } -} -EXPORT_SYMBOL_GPL(iommu_aux_detach_device); - -int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) -{ - int ret = -ENODEV; - - if (domain->ops->aux_get_pasid) - ret = domain->ops->aux_get_pasid(domain, dev); - - return ret; -} -EXPORT_SYMBOL_GPL(iommu_aux_get_pasid); - /** * iommu_sva_bind_device() - Bind a process address space to a device * @dev: the device diff --git a/include/linux/iommu.h b/include/linux/iommu.h index cde1d0aad60f..9983a01373b2 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -144,7 +144,6 @@ struct iommu_resv_region { /** * enum iommu_dev_features - Per device IOMMU features - * @IOMMU_DEV_FEAT_AUX: Auxiliary domain feature * @IOMMU_DEV_FEAT_SVA: Shared Virtual Addresses * @IOMMU_DEV_FEAT_IOPF: I/O Page Faults such as PRI or Stall. Generally * enabling %IOMMU_DEV_FEAT_SVA requires @@ -157,7 +156,6 @@ struct iommu_resv_region { * iommu_dev_has_feature(), and enable it using iommu_dev_enable_feature(). */ enum iommu_dev_features { - IOMMU_DEV_FEAT_AUX, IOMMU_DEV_FEAT_SVA, IOMMU_DEV_FEAT_IOPF, }; @@ -223,8 +221,6 @@ struct iommu_iotlb_gather { * @dev_has/enable/disable_feat: per device entries to check/enable/disable * iommu specific features. * @dev_feat_enabled: check enabled feature - * @aux_attach/detach_dev: aux-domain specific attach/detach entries. - * @aux_get_pasid: get the pasid given an aux-domain * @sva_bind: Bind process address space to device * @sva_unbind: Unbind process address space from device * @sva_get_pasid: Get PASID associated to a SVA handle @@ -285,11 +281,6 @@ struct iommu_ops { int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f); int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f); - /* Aux-domain specific attach/detach entries */ - int (*aux_attach_dev)(struct iommu_domain *domain, struct device *dev); - void (*aux_detach_dev)(struct iommu_domain *domain, struct device *dev); - int (*aux_get_pasid)(struct iommu_domain *domain, struct device *dev); - struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm, void *drvdata); void (*sva_unbind)(struct iommu_sva *handle); @@ -655,9 +646,6 @@ void iommu_release_device(struct device *dev); int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f); bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features f); -int iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev); -void iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev); -int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev); struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, @@ -1002,23 +990,6 @@ iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) return -ENODEV; } -static inline int -iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev) -{ - return -ENODEV; -} - -static inline void -iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev) -{ -} - -static inline int -iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) -{ - return -ENODEV; -} - static inline struct iommu_sva * iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata) { From 71fe30698dc3be6fd277c49c0c7b220c7ae92ffd Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 16 Feb 2022 10:52:45 +0800 Subject: [PATCH 144/380] iommu: Remove apply_resv_region The apply_resv_region callback in iommu_ops was introduced to reserve an IOVA range in the given DMA domain when the IOMMU driver manages the IOVA by itself. As all drivers converted to use dma-iommu in the core, there's no driver using this anymore. Remove it to avoid dead code. Suggested-by: Robin Murphy Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220216025249.3459465-6-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 3 --- include/linux/iommu.h | 4 ---- 2 files changed, 7 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 0ebaf561a70e..7cf073c56036 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -790,9 +790,6 @@ static int iommu_create_device_direct_mappings(struct iommu_group *group, dma_addr_t start, end, addr; size_t map_size = 0; - if (domain->ops->apply_resv_region) - domain->ops->apply_resv_region(dev, domain, entry); - start = ALIGN(entry->start, pg_size); end = ALIGN(entry->start + entry->length, pg_size); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 9983a01373b2..9ffa2e88f3d0 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -214,7 +214,6 @@ struct iommu_iotlb_gather { * @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*) * @get_resv_regions: Request list of reserved regions for a device * @put_resv_regions: Free list of reserved regions for a device - * @apply_resv_region: Temporary helper call-back for iova reserved ranges * @of_xlate: add OF master IDs to iommu grouping * @is_attach_deferred: Check if domain attach should be deferred from iommu * driver init to device driver init (default no) @@ -268,9 +267,6 @@ struct iommu_ops { /* Request/Free a list of reserved regions for a device */ void (*get_resv_regions)(struct device *dev, struct list_head *list); void (*put_resv_regions)(struct device *dev, struct list_head *list); - void (*apply_resv_region)(struct device *dev, - struct iommu_domain *domain, - struct iommu_resv_region *region); int (*of_xlate)(struct device *dev, struct of_phandle_args *args); bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev); From 7eef7f670086f06b3a461f1b4d1e84f793ed4861 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 16 Feb 2022 10:52:46 +0800 Subject: [PATCH 145/380] drm/nouveau/device: Get right pgsize_bitmap of iommu_domain The supported page sizes of an iommu_domain are saved in the pgsize_bitmap field. Retrieve the value from the right place. Signed-off-by: Lu Baolu Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20211218074546.1772553-1-baolu.lu@linux.intel.com Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220216025249.3459465-7-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c index d0d52c1d4aee..992cc285f2fe 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c @@ -133,7 +133,7 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev) * or equal to the system's PAGE_SIZE, with a preference if * both are equal. */ - pgsize_bitmap = tdev->iommu.domain->ops->pgsize_bitmap; + pgsize_bitmap = tdev->iommu.domain->pgsize_bitmap; if (pgsize_bitmap & PAGE_SIZE) { tdev->iommu.pgshift = PAGE_SHIFT; } else { From 3f6634d997dba8140b3a7cba01776b9638d70dff Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 16 Feb 2022 10:52:47 +0800 Subject: [PATCH 146/380] iommu: Use right way to retrieve iommu_ops The common iommu_ops is hooked to both device and domain. When a helper has both device and domain pointer, the way to get the iommu_ops looks messy in iommu core. This sorts out the way to get iommu_ops. The device related helpers go through device pointer, while the domain related ones go through domain pointer. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220216025249.3459465-8-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 50 +++++++++++++++++++++---------------------- include/linux/iommu.h | 11 ++++++++++ 2 files changed, 36 insertions(+), 25 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 7cf073c56036..7af0ee670deb 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -323,13 +323,14 @@ err_out: void iommu_release_device(struct device *dev) { - const struct iommu_ops *ops = dev->bus->iommu_ops; + const struct iommu_ops *ops; if (!dev->iommu) return; iommu_device_unlink(dev->iommu->iommu_dev, dev); + ops = dev_iommu_ops(dev); ops->release_device(dev); iommu_group_remove_device(dev); @@ -833,8 +834,10 @@ out: static bool iommu_is_attach_deferred(struct iommu_domain *domain, struct device *dev) { - if (domain->ops->is_attach_deferred) - return domain->ops->is_attach_deferred(domain, dev); + const struct iommu_ops *ops = dev_iommu_ops(dev); + + if (ops->is_attach_deferred) + return ops->is_attach_deferred(domain, dev); return false; } @@ -1252,10 +1255,10 @@ int iommu_page_response(struct device *dev, struct iommu_fault_event *evt; struct iommu_fault_page_request *prm; struct dev_iommu *param = dev->iommu; + const struct iommu_ops *ops = dev_iommu_ops(dev); bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID; - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - if (!domain || !domain->ops->page_response) + if (!ops->page_response) return -ENODEV; if (!param || !param->fault_param) @@ -1296,7 +1299,7 @@ int iommu_page_response(struct device *dev, msg->pasid = 0; } - ret = domain->ops->page_response(dev, evt, msg); + ret = ops->page_response(dev, evt, msg); list_del(&evt->list); kfree(evt); break; @@ -1521,7 +1524,7 @@ EXPORT_SYMBOL_GPL(fsl_mc_device_group); static int iommu_get_def_domain_type(struct device *dev) { - const struct iommu_ops *ops = dev->bus->iommu_ops; + const struct iommu_ops *ops = dev_iommu_ops(dev); if (dev_is_pci(dev) && to_pci_dev(dev)->untrusted) return IOMMU_DOMAIN_DMA; @@ -1580,7 +1583,7 @@ static int iommu_alloc_default_domain(struct iommu_group *group, */ static struct iommu_group *iommu_group_get_for_dev(struct device *dev) { - const struct iommu_ops *ops = dev->bus->iommu_ops; + const struct iommu_ops *ops = dev_iommu_ops(dev); struct iommu_group *group; int ret; @@ -1588,9 +1591,6 @@ static struct iommu_group *iommu_group_get_for_dev(struct device *dev) if (group) return group; - if (!ops) - return ERR_PTR(-EINVAL); - group = ops->device_group(dev); if (WARN_ON_ONCE(group == NULL)) return ERR_PTR(-EINVAL); @@ -1759,10 +1759,10 @@ static int __iommu_group_dma_attach(struct iommu_group *group) static int iommu_group_do_probe_finalize(struct device *dev, void *data) { - struct iommu_domain *domain = data; + const struct iommu_ops *ops = dev_iommu_ops(dev); - if (domain->ops->probe_finalize) - domain->ops->probe_finalize(dev); + if (ops->probe_finalize) + ops->probe_finalize(dev); return 0; } @@ -2020,7 +2020,7 @@ EXPORT_SYMBOL_GPL(iommu_attach_device); int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) { - const struct iommu_ops *ops = domain->ops; + const struct iommu_ops *ops = dev_iommu_ops(dev); if (ops->is_attach_deferred && ops->is_attach_deferred(domain, dev)) return __iommu_attach_device(domain, dev); @@ -2579,17 +2579,17 @@ EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks); void iommu_get_resv_regions(struct device *dev, struct list_head *list) { - const struct iommu_ops *ops = dev->bus->iommu_ops; + const struct iommu_ops *ops = dev_iommu_ops(dev); - if (ops && ops->get_resv_regions) + if (ops->get_resv_regions) ops->get_resv_regions(dev, list); } void iommu_put_resv_regions(struct device *dev, struct list_head *list) { - const struct iommu_ops *ops = dev->bus->iommu_ops; + const struct iommu_ops *ops = dev_iommu_ops(dev); - if (ops && ops->put_resv_regions) + if (ops->put_resv_regions) ops->put_resv_regions(dev, list); } @@ -2794,9 +2794,9 @@ iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata) { struct iommu_group *group; struct iommu_sva *handle = ERR_PTR(-EINVAL); - const struct iommu_ops *ops = dev->bus->iommu_ops; + const struct iommu_ops *ops = dev_iommu_ops(dev); - if (!ops || !ops->sva_bind) + if (!ops->sva_bind) return ERR_PTR(-ENODEV); group = iommu_group_get(dev); @@ -2837,9 +2837,9 @@ void iommu_sva_unbind_device(struct iommu_sva *handle) { struct iommu_group *group; struct device *dev = handle->dev; - const struct iommu_ops *ops = dev->bus->iommu_ops; + const struct iommu_ops *ops = dev_iommu_ops(dev); - if (!ops || !ops->sva_unbind) + if (!ops->sva_unbind) return; group = iommu_group_get(dev); @@ -2856,9 +2856,9 @@ EXPORT_SYMBOL_GPL(iommu_sva_unbind_device); u32 iommu_sva_get_pasid(struct iommu_sva *handle) { - const struct iommu_ops *ops = handle->dev->bus->iommu_ops; + const struct iommu_ops *ops = dev_iommu_ops(handle->dev); - if (!ops || !ops->sva_get_pasid) + if (!ops->sva_get_pasid) return IOMMU_PASID_INVALID; return ops->sva_get_pasid(handle); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 9ffa2e88f3d0..90f1b5e3809d 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -381,6 +381,17 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) }; } +static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) +{ + /* + * Assume that valid ops must be installed if iommu_probe_device() + * has succeeded. The device ops are essentially for internal use + * within the IOMMU subsystem itself, so we should be able to trust + * ourselves not to misuse the helper. + */ + return dev->iommu->iommu_dev->ops; +} + #define IOMMU_GROUP_NOTIFY_ADD_DEVICE 1 /* Device added */ #define IOMMU_GROUP_NOTIFY_DEL_DEVICE 2 /* Pre Device removed */ #define IOMMU_GROUP_NOTIFY_BIND_DRIVER 3 /* Pre Driver bind */ From 41bb23e70b50b89b6137cbecd37009d782454860 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 16 Feb 2022 10:52:48 +0800 Subject: [PATCH 147/380] iommu: Remove unused argument in is_attach_deferred The is_attach_deferred iommu_ops callback is a device op. The domain argument is unnecessary and never used. Remove it to make code clean. Suggested-by: Robin Murphy Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220216025249.3459465-9-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 3 +-- drivers/iommu/amd/iommu.c | 3 +-- drivers/iommu/amd/iommu_v2.c | 2 +- drivers/iommu/intel/iommu.c | 3 +-- drivers/iommu/iommu.c | 15 ++++++--------- include/linux/iommu.h | 2 +- 6 files changed, 11 insertions(+), 17 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 416815a525d6..3b2f06b7aca6 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -116,8 +116,7 @@ void amd_iommu_domain_clr_pt_root(struct protection_domain *domain) extern bool translation_pre_enabled(struct amd_iommu *iommu); -extern bool amd_iommu_is_attach_deferred(struct iommu_domain *domain, - struct device *dev); +extern bool amd_iommu_is_attach_deferred(struct device *dev); extern int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 461f1844ed1f..37f2fbb4b129 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2215,8 +2215,7 @@ static void amd_iommu_get_resv_regions(struct device *dev, list_add_tail(®ion->list, head); } -bool amd_iommu_is_attach_deferred(struct iommu_domain *domain, - struct device *dev) +bool amd_iommu_is_attach_deferred(struct device *dev) { struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c index 58da08cc3d01..7c94ec05d289 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c @@ -537,7 +537,7 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) ret = NOTIFY_DONE; /* In kdump kernel pci dev is not initialized yet -> send INVALID */ - if (amd_iommu_is_attach_deferred(NULL, &pdev->dev)) { + if (amd_iommu_is_attach_deferred(&pdev->dev)) { amd_iommu_complete_ppr(pdev, iommu_fault->pasid, PPR_INVALID, tag); goto out; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 2b5f4e57a8bb..80f1294be634 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -5052,8 +5052,7 @@ intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) } } -static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain, - struct device *dev) +static bool intel_iommu_is_attach_deferred(struct device *dev) { return attach_deferred(dev); } diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 7af0ee670deb..27276421d253 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -831,13 +831,12 @@ out: return ret; } -static bool iommu_is_attach_deferred(struct iommu_domain *domain, - struct device *dev) +static bool iommu_is_attach_deferred(struct device *dev) { const struct iommu_ops *ops = dev_iommu_ops(dev); if (ops->is_attach_deferred) - return ops->is_attach_deferred(domain, dev); + return ops->is_attach_deferred(dev); return false; } @@ -894,7 +893,7 @@ rename: mutex_lock(&group->mutex); list_add_tail(&device->list, &group->devices); - if (group->domain && !iommu_is_attach_deferred(group->domain, dev)) + if (group->domain && !iommu_is_attach_deferred(dev)) ret = __iommu_attach_device(group->domain, dev); mutex_unlock(&group->mutex); if (ret) @@ -1745,7 +1744,7 @@ static int iommu_group_do_dma_attach(struct device *dev, void *data) struct iommu_domain *domain = data; int ret = 0; - if (!iommu_is_attach_deferred(domain, dev)) + if (!iommu_is_attach_deferred(dev)) ret = __iommu_attach_device(domain, dev); return ret; @@ -2020,9 +2019,7 @@ EXPORT_SYMBOL_GPL(iommu_attach_device); int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) { - const struct iommu_ops *ops = dev_iommu_ops(dev); - - if (ops->is_attach_deferred && ops->is_attach_deferred(domain, dev)) + if (iommu_is_attach_deferred(dev)) return __iommu_attach_device(domain, dev); return 0; @@ -2031,7 +2028,7 @@ int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) static void __iommu_detach_device(struct iommu_domain *domain, struct device *dev) { - if (iommu_is_attach_deferred(domain, dev)) + if (iommu_is_attach_deferred(dev)) return; if (unlikely(domain->ops->detach_dev == NULL)) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 90f1b5e3809d..1ded6076a75c 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -269,7 +269,7 @@ struct iommu_ops { void (*put_resv_regions)(struct device *dev, struct list_head *list); int (*of_xlate)(struct device *dev, struct of_phandle_args *args); - bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev); + bool (*is_attach_deferred)(struct device *dev); /* Per device IOMMU features */ bool (*dev_has_feat)(struct device *dev, enum iommu_dev_features f); From 9a630a4b41a2639b65d024a5d2d88ed3ecca130a Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 16 Feb 2022 10:52:49 +0800 Subject: [PATCH 148/380] iommu: Split struct iommu_ops Move the domain specific operations out of struct iommu_ops into a new structure that only has domain specific operations. This solves the problem of needing to know if the method vector for a given operation needs to be retrieved from the device or the domain. Logically the domain ops are the ones that make sense for external subsystems and endpoint drivers to use, while device ops, with the sole exception of domain_alloc, are IOMMU API internals. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220216025249.3459465-10-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 20 +++-- drivers/iommu/apple-dart.c | 20 +++-- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 18 ++-- drivers/iommu/arm/arm-smmu/arm-smmu.c | 20 +++-- drivers/iommu/arm/arm-smmu/qcom_iommu.c | 18 ++-- drivers/iommu/exynos-iommu.c | 14 ++-- drivers/iommu/fsl_pamu_domain.c | 10 ++- drivers/iommu/intel/iommu.c | 20 +++-- drivers/iommu/iommu.c | 19 +++-- drivers/iommu/ipmmu-vmsa.c | 18 ++-- drivers/iommu/msm_iommu.c | 30 +++---- drivers/iommu/mtk_iommu.c | 20 +++-- drivers/iommu/mtk_iommu_v1.c | 14 ++-- drivers/iommu/omap-iommu.c | 14 ++-- drivers/iommu/rockchip-iommu.c | 14 ++-- drivers/iommu/s390-iommu.c | 14 ++-- drivers/iommu/sprd-iommu.c | 18 ++-- drivers/iommu/sun50i-iommu.c | 18 ++-- drivers/iommu/tegra-gart.c | 18 ++-- drivers/iommu/tegra-smmu.c | 14 ++-- drivers/iommu/virtio-iommu.c | 14 ++-- include/linux/iommu.h | 91 ++++++++++++--------- 22 files changed, 256 insertions(+), 200 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 37f2fbb4b129..2bed113c4e6d 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2268,13 +2268,6 @@ static int amd_iommu_def_domain_type(struct device *dev) const struct iommu_ops amd_iommu_ops = { .capable = amd_iommu_capable, .domain_alloc = amd_iommu_domain_alloc, - .domain_free = amd_iommu_domain_free, - .attach_dev = amd_iommu_attach_device, - .detach_dev = amd_iommu_detach_device, - .map = amd_iommu_map, - .iotlb_sync_map = amd_iommu_iotlb_sync_map, - .unmap = amd_iommu_unmap, - .iova_to_phys = amd_iommu_iova_to_phys, .probe_device = amd_iommu_probe_device, .release_device = amd_iommu_release_device, .probe_finalize = amd_iommu_probe_finalize, @@ -2283,9 +2276,18 @@ const struct iommu_ops amd_iommu_ops = { .put_resv_regions = generic_iommu_put_resv_regions, .is_attach_deferred = amd_iommu_is_attach_deferred, .pgsize_bitmap = AMD_IOMMU_PGSIZES, - .flush_iotlb_all = amd_iommu_flush_iotlb_all, - .iotlb_sync = amd_iommu_iotlb_sync, .def_domain_type = amd_iommu_def_domain_type, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = amd_iommu_attach_device, + .detach_dev = amd_iommu_detach_device, + .map = amd_iommu_map, + .unmap = amd_iommu_unmap, + .iotlb_sync_map = amd_iommu_iotlb_sync_map, + .iova_to_phys = amd_iommu_iova_to_phys, + .flush_iotlb_all = amd_iommu_flush_iotlb_all, + .iotlb_sync = amd_iommu_iotlb_sync, + .free = amd_iommu_domain_free, + } }; /***************************************************************************** diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 565ef5598811..decafb07ad08 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -765,15 +765,6 @@ static void apple_dart_get_resv_regions(struct device *dev, static const struct iommu_ops apple_dart_iommu_ops = { .domain_alloc = apple_dart_domain_alloc, - .domain_free = apple_dart_domain_free, - .attach_dev = apple_dart_attach_dev, - .detach_dev = apple_dart_detach_dev, - .map_pages = apple_dart_map_pages, - .unmap_pages = apple_dart_unmap_pages, - .flush_iotlb_all = apple_dart_flush_iotlb_all, - .iotlb_sync = apple_dart_iotlb_sync, - .iotlb_sync_map = apple_dart_iotlb_sync_map, - .iova_to_phys = apple_dart_iova_to_phys, .probe_device = apple_dart_probe_device, .release_device = apple_dart_release_device, .device_group = apple_dart_device_group, @@ -782,6 +773,17 @@ static const struct iommu_ops apple_dart_iommu_ops = { .get_resv_regions = apple_dart_get_resv_regions, .put_resv_regions = generic_iommu_put_resv_regions, .pgsize_bitmap = -1UL, /* Restricted during dart probe */ + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = apple_dart_attach_dev, + .detach_dev = apple_dart_detach_dev, + .map_pages = apple_dart_map_pages, + .unmap_pages = apple_dart_unmap_pages, + .flush_iotlb_all = apple_dart_flush_iotlb_all, + .iotlb_sync = apple_dart_iotlb_sync, + .iotlb_sync_map = apple_dart_iotlb_sync_map, + .iova_to_phys = apple_dart_iova_to_phys, + .free = apple_dart_domain_free, + } }; static irqreturn_t apple_dart_irq(int irq, void *dev) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 6dc6d8b6b368..fd49282c03a3 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2841,17 +2841,9 @@ static int arm_smmu_dev_disable_feature(struct device *dev, static struct iommu_ops arm_smmu_ops = { .capable = arm_smmu_capable, .domain_alloc = arm_smmu_domain_alloc, - .domain_free = arm_smmu_domain_free, - .attach_dev = arm_smmu_attach_dev, - .map_pages = arm_smmu_map_pages, - .unmap_pages = arm_smmu_unmap_pages, - .flush_iotlb_all = arm_smmu_flush_iotlb_all, - .iotlb_sync = arm_smmu_iotlb_sync, - .iova_to_phys = arm_smmu_iova_to_phys, .probe_device = arm_smmu_probe_device, .release_device = arm_smmu_release_device, .device_group = arm_smmu_device_group, - .enable_nesting = arm_smmu_enable_nesting, .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, .put_resv_regions = generic_iommu_put_resv_regions, @@ -2865,6 +2857,16 @@ static struct iommu_ops arm_smmu_ops = { .page_response = arm_smmu_page_response, .pgsize_bitmap = -1UL, /* Restricted during device attach */ .owner = THIS_MODULE, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = arm_smmu_attach_dev, + .map_pages = arm_smmu_map_pages, + .unmap_pages = arm_smmu_unmap_pages, + .flush_iotlb_all = arm_smmu_flush_iotlb_all, + .iotlb_sync = arm_smmu_iotlb_sync, + .iova_to_phys = arm_smmu_iova_to_phys, + .enable_nesting = arm_smmu_enable_nesting, + .free = arm_smmu_domain_free, + } }; /* Probing and initialisation functions */ diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 4bc75c4ce402..41321fb47152 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1583,25 +1583,27 @@ static int arm_smmu_def_domain_type(struct device *dev) static struct iommu_ops arm_smmu_ops = { .capable = arm_smmu_capable, .domain_alloc = arm_smmu_domain_alloc, - .domain_free = arm_smmu_domain_free, - .attach_dev = arm_smmu_attach_dev, - .map_pages = arm_smmu_map_pages, - .unmap_pages = arm_smmu_unmap_pages, - .flush_iotlb_all = arm_smmu_flush_iotlb_all, - .iotlb_sync = arm_smmu_iotlb_sync, - .iova_to_phys = arm_smmu_iova_to_phys, .probe_device = arm_smmu_probe_device, .release_device = arm_smmu_release_device, .probe_finalize = arm_smmu_probe_finalize, .device_group = arm_smmu_device_group, - .enable_nesting = arm_smmu_enable_nesting, - .set_pgtable_quirks = arm_smmu_set_pgtable_quirks, .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, .put_resv_regions = generic_iommu_put_resv_regions, .def_domain_type = arm_smmu_def_domain_type, .pgsize_bitmap = -1UL, /* Restricted during device attach */ .owner = THIS_MODULE, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = arm_smmu_attach_dev, + .map_pages = arm_smmu_map_pages, + .unmap_pages = arm_smmu_unmap_pages, + .flush_iotlb_all = arm_smmu_flush_iotlb_all, + .iotlb_sync = arm_smmu_iotlb_sync, + .iova_to_phys = arm_smmu_iova_to_phys, + .enable_nesting = arm_smmu_enable_nesting, + .set_pgtable_quirks = arm_smmu_set_pgtable_quirks, + .free = arm_smmu_domain_free, + } }; static void arm_smmu_device_reset(struct arm_smmu_device *smmu) diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index b91874cb6cf3..ed659de20661 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -590,19 +590,21 @@ static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) static const struct iommu_ops qcom_iommu_ops = { .capable = qcom_iommu_capable, .domain_alloc = qcom_iommu_domain_alloc, - .domain_free = qcom_iommu_domain_free, - .attach_dev = qcom_iommu_attach_dev, - .detach_dev = qcom_iommu_detach_dev, - .map = qcom_iommu_map, - .unmap = qcom_iommu_unmap, - .flush_iotlb_all = qcom_iommu_flush_iotlb_all, - .iotlb_sync = qcom_iommu_iotlb_sync, - .iova_to_phys = qcom_iommu_iova_to_phys, .probe_device = qcom_iommu_probe_device, .release_device = qcom_iommu_release_device, .device_group = generic_device_group, .of_xlate = qcom_iommu_of_xlate, .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = qcom_iommu_attach_dev, + .detach_dev = qcom_iommu_detach_dev, + .map = qcom_iommu_map, + .unmap = qcom_iommu_unmap, + .flush_iotlb_all = qcom_iommu_flush_iotlb_all, + .iotlb_sync = qcom_iommu_iotlb_sync, + .iova_to_phys = qcom_iommu_iova_to_phys, + .free = qcom_iommu_domain_free, + } }; static int qcom_iommu_sec_ptbl_init(struct device *dev) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 939ffa768986..71f2018e23fe 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -1309,17 +1309,19 @@ static int exynos_iommu_of_xlate(struct device *dev, static const struct iommu_ops exynos_iommu_ops = { .domain_alloc = exynos_iommu_domain_alloc, - .domain_free = exynos_iommu_domain_free, - .attach_dev = exynos_iommu_attach_device, - .detach_dev = exynos_iommu_detach_device, - .map = exynos_iommu_map, - .unmap = exynos_iommu_unmap, - .iova_to_phys = exynos_iommu_iova_to_phys, .device_group = generic_device_group, .probe_device = exynos_iommu_probe_device, .release_device = exynos_iommu_release_device, .pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE, .of_xlate = exynos_iommu_of_xlate, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = exynos_iommu_attach_device, + .detach_dev = exynos_iommu_detach_device, + .map = exynos_iommu_map, + .unmap = exynos_iommu_unmap, + .iova_to_phys = exynos_iommu_iova_to_phys, + .free = exynos_iommu_domain_free, + } }; static int __init exynos_iommu_init(void) diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index a47f47307109..69a4a62dc3b9 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -453,13 +453,15 @@ static void fsl_pamu_release_device(struct device *dev) static const struct iommu_ops fsl_pamu_ops = { .capable = fsl_pamu_capable, .domain_alloc = fsl_pamu_domain_alloc, - .domain_free = fsl_pamu_domain_free, - .attach_dev = fsl_pamu_attach_device, - .detach_dev = fsl_pamu_detach_device, - .iova_to_phys = fsl_pamu_iova_to_phys, .probe_device = fsl_pamu_probe_device, .release_device = fsl_pamu_release_device, .device_group = fsl_pamu_device_group, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = fsl_pamu_attach_device, + .detach_dev = fsl_pamu_detach_device, + .iova_to_phys = fsl_pamu_iova_to_phys, + .free = fsl_pamu_domain_free, + } }; int __init pamu_domain_init(void) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 80f1294be634..b549172e88ef 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -5092,15 +5092,6 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .domain_alloc = intel_iommu_domain_alloc, - .domain_free = intel_iommu_domain_free, - .attach_dev = intel_iommu_attach_device, - .detach_dev = intel_iommu_detach_device, - .map_pages = intel_iommu_map_pages, - .unmap_pages = intel_iommu_unmap_pages, - .iotlb_sync_map = intel_iommu_iotlb_sync_map, - .flush_iotlb_all = intel_flush_iotlb_all, - .iotlb_sync = intel_iommu_tlb_sync, - .iova_to_phys = intel_iommu_iova_to_phys, .probe_device = intel_iommu_probe_device, .probe_finalize = intel_iommu_probe_finalize, .release_device = intel_iommu_release_device, @@ -5118,6 +5109,17 @@ const struct iommu_ops intel_iommu_ops = { .sva_get_pasid = intel_svm_get_pasid, .page_response = intel_svm_page_response, #endif + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = intel_iommu_attach_device, + .detach_dev = intel_iommu_detach_device, + .map_pages = intel_iommu_map_pages, + .unmap_pages = intel_iommu_unmap_pages, + .iotlb_sync_map = intel_iommu_iotlb_sync_map, + .flush_iotlb_all = intel_flush_iotlb_all, + .iotlb_sync = intel_iommu_tlb_sync, + .iova_to_phys = intel_iommu_iova_to_phys, + .free = intel_iommu_domain_free, + } }; static void quirk_iommu_igfx(struct pci_dev *dev) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 27276421d253..f2c45b85b9fc 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1950,10 +1950,11 @@ static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, if (!domain) return NULL; - domain->ops = bus->iommu_ops; domain->type = type; /* Assume all sizes by default; the driver may override this later */ - domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap; + domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap; + if (!domain->ops) + domain->ops = bus->iommu_ops->default_domain_ops; if (iommu_is_dma_domain(domain) && iommu_get_dma_cookie(domain)) { iommu_domain_free(domain); @@ -1971,7 +1972,7 @@ EXPORT_SYMBOL_GPL(iommu_domain_alloc); void iommu_domain_free(struct iommu_domain *domain) { iommu_put_dma_cookie(domain); - domain->ops->domain_free(domain); + domain->ops->free(domain); } EXPORT_SYMBOL_GPL(iommu_domain_free); @@ -2242,7 +2243,7 @@ static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp, size_t *mapped) { - const struct iommu_ops *ops = domain->ops; + const struct iommu_domain_ops *ops = domain->ops; size_t pgsize, count; int ret; @@ -2265,7 +2266,7 @@ static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova, static int __iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { - const struct iommu_ops *ops = domain->ops; + const struct iommu_domain_ops *ops = domain->ops; unsigned long orig_iova = iova; unsigned int min_pagesz; size_t orig_size = size; @@ -2325,7 +2326,7 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova, static int _iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { - const struct iommu_ops *ops = domain->ops; + const struct iommu_domain_ops *ops = domain->ops; int ret; ret = __iommu_map(domain, iova, paddr, size, prot, gfp); @@ -2354,7 +2355,7 @@ static size_t __iommu_unmap_pages(struct iommu_domain *domain, unsigned long iova, size_t size, struct iommu_iotlb_gather *iotlb_gather) { - const struct iommu_ops *ops = domain->ops; + const struct iommu_domain_ops *ops = domain->ops; size_t pgsize, count; pgsize = iommu_pgsize(domain, iova, iova, size, &count); @@ -2367,7 +2368,7 @@ static size_t __iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size, struct iommu_iotlb_gather *iotlb_gather) { - const struct iommu_ops *ops = domain->ops; + const struct iommu_domain_ops *ops = domain->ops; size_t unmapped_page, unmapped = 0; unsigned long orig_iova = iova; unsigned int min_pagesz; @@ -2443,7 +2444,7 @@ static ssize_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, unsigned int nents, int prot, gfp_t gfp) { - const struct iommu_ops *ops = domain->ops; + const struct iommu_domain_ops *ops = domain->ops; size_t len = 0, mapped = 0; phys_addr_t start; unsigned int i = 0; diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index ca752bdc710f..00d5f7a33499 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -868,14 +868,6 @@ static struct iommu_group *ipmmu_find_group(struct device *dev) static const struct iommu_ops ipmmu_ops = { .domain_alloc = ipmmu_domain_alloc, - .domain_free = ipmmu_domain_free, - .attach_dev = ipmmu_attach_device, - .detach_dev = ipmmu_detach_device, - .map = ipmmu_map, - .unmap = ipmmu_unmap, - .flush_iotlb_all = ipmmu_flush_iotlb_all, - .iotlb_sync = ipmmu_iotlb_sync, - .iova_to_phys = ipmmu_iova_to_phys, .probe_device = ipmmu_probe_device, .release_device = ipmmu_release_device, .probe_finalize = ipmmu_probe_finalize, @@ -883,6 +875,16 @@ static const struct iommu_ops ipmmu_ops = { ? generic_device_group : ipmmu_find_group, .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, .of_xlate = ipmmu_of_xlate, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = ipmmu_attach_device, + .detach_dev = ipmmu_detach_device, + .map = ipmmu_map, + .unmap = ipmmu_unmap, + .flush_iotlb_all = ipmmu_flush_iotlb_all, + .iotlb_sync = ipmmu_iotlb_sync, + .iova_to_phys = ipmmu_iova_to_phys, + .free = ipmmu_domain_free, + } }; /* ----------------------------------------------------------------------------- diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 6a2e511edb85..4f441f1b750d 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -668,25 +668,27 @@ fail: static struct iommu_ops msm_iommu_ops = { .domain_alloc = msm_iommu_domain_alloc, - .domain_free = msm_iommu_domain_free, - .attach_dev = msm_iommu_attach_dev, - .detach_dev = msm_iommu_detach_dev, - .map = msm_iommu_map, - .unmap = msm_iommu_unmap, - /* - * Nothing is needed here, the barrier to guarantee - * completion of the tlb sync operation is implicitly - * taken care when the iommu client does a writel before - * kick starting the other master. - */ - .iotlb_sync = NULL, - .iotlb_sync_map = msm_iommu_sync_map, - .iova_to_phys = msm_iommu_iova_to_phys, .probe_device = msm_iommu_probe_device, .release_device = msm_iommu_release_device, .device_group = generic_device_group, .pgsize_bitmap = MSM_IOMMU_PGSIZES, .of_xlate = qcom_iommu_of_xlate, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = msm_iommu_attach_dev, + .detach_dev = msm_iommu_detach_dev, + .map = msm_iommu_map, + .unmap = msm_iommu_unmap, + /* + * Nothing is needed here, the barrier to guarantee + * completion of the tlb sync operation is implicitly + * taken care when the iommu client does a writel before + * kick starting the other master. + */ + .iotlb_sync = NULL, + .iotlb_sync_map = msm_iommu_sync_map, + .iova_to_phys = msm_iommu_iova_to_phys, + .free = msm_iommu_domain_free, + } }; static int msm_iommu_probe(struct platform_device *pdev) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 25b834104790..884c288a8f63 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -658,15 +658,6 @@ static void mtk_iommu_get_resv_regions(struct device *dev, static const struct iommu_ops mtk_iommu_ops = { .domain_alloc = mtk_iommu_domain_alloc, - .domain_free = mtk_iommu_domain_free, - .attach_dev = mtk_iommu_attach_device, - .detach_dev = mtk_iommu_detach_device, - .map = mtk_iommu_map, - .unmap = mtk_iommu_unmap, - .flush_iotlb_all = mtk_iommu_flush_iotlb_all, - .iotlb_sync = mtk_iommu_iotlb_sync, - .iotlb_sync_map = mtk_iommu_sync_map, - .iova_to_phys = mtk_iommu_iova_to_phys, .probe_device = mtk_iommu_probe_device, .release_device = mtk_iommu_release_device, .device_group = mtk_iommu_device_group, @@ -675,6 +666,17 @@ static const struct iommu_ops mtk_iommu_ops = { .put_resv_regions = generic_iommu_put_resv_regions, .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, .owner = THIS_MODULE, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = mtk_iommu_attach_device, + .detach_dev = mtk_iommu_detach_device, + .map = mtk_iommu_map, + .unmap = mtk_iommu_unmap, + .flush_iotlb_all = mtk_iommu_flush_iotlb_all, + .iotlb_sync = mtk_iommu_iotlb_sync, + .iotlb_sync_map = mtk_iommu_sync_map, + .iova_to_phys = mtk_iommu_iova_to_phys, + .free = mtk_iommu_domain_free, + } }; static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index be22fcf988ce..293ef7e1a861 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -514,12 +514,6 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) static const struct iommu_ops mtk_iommu_ops = { .domain_alloc = mtk_iommu_domain_alloc, - .domain_free = mtk_iommu_domain_free, - .attach_dev = mtk_iommu_attach_device, - .detach_dev = mtk_iommu_detach_device, - .map = mtk_iommu_map, - .unmap = mtk_iommu_unmap, - .iova_to_phys = mtk_iommu_iova_to_phys, .probe_device = mtk_iommu_probe_device, .probe_finalize = mtk_iommu_probe_finalize, .release_device = mtk_iommu_release_device, @@ -527,6 +521,14 @@ static const struct iommu_ops mtk_iommu_ops = { .device_group = generic_device_group, .pgsize_bitmap = ~0UL << MT2701_IOMMU_PAGE_SHIFT, .owner = THIS_MODULE, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = mtk_iommu_attach_device, + .detach_dev = mtk_iommu_detach_device, + .map = mtk_iommu_map, + .unmap = mtk_iommu_unmap, + .iova_to_phys = mtk_iommu_iova_to_phys, + .free = mtk_iommu_domain_free, + } }; static const struct of_device_id mtk_iommu_of_ids[] = { diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 980e4af3f06b..4aab631ef517 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1734,16 +1734,18 @@ static struct iommu_group *omap_iommu_device_group(struct device *dev) static const struct iommu_ops omap_iommu_ops = { .domain_alloc = omap_iommu_domain_alloc, - .domain_free = omap_iommu_domain_free, - .attach_dev = omap_iommu_attach_dev, - .detach_dev = omap_iommu_detach_dev, - .map = omap_iommu_map, - .unmap = omap_iommu_unmap, - .iova_to_phys = omap_iommu_iova_to_phys, .probe_device = omap_iommu_probe_device, .release_device = omap_iommu_release_device, .device_group = omap_iommu_device_group, .pgsize_bitmap = OMAP_IOMMU_PGSIZES, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = omap_iommu_attach_dev, + .detach_dev = omap_iommu_detach_dev, + .map = omap_iommu_map, + .unmap = omap_iommu_unmap, + .iova_to_phys = omap_iommu_iova_to_phys, + .free = omap_iommu_domain_free, + } }; static int __init omap_iommu_init(void) diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 7f23ad61c094..2ae97a84d41b 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -1187,17 +1187,19 @@ static int rk_iommu_of_xlate(struct device *dev, static const struct iommu_ops rk_iommu_ops = { .domain_alloc = rk_iommu_domain_alloc, - .domain_free = rk_iommu_domain_free, - .attach_dev = rk_iommu_attach_device, - .detach_dev = rk_iommu_detach_device, - .map = rk_iommu_map, - .unmap = rk_iommu_unmap, .probe_device = rk_iommu_probe_device, .release_device = rk_iommu_release_device, - .iova_to_phys = rk_iommu_iova_to_phys, .device_group = rk_iommu_device_group, .pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP, .of_xlate = rk_iommu_of_xlate, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = rk_iommu_attach_device, + .detach_dev = rk_iommu_detach_device, + .map = rk_iommu_map, + .unmap = rk_iommu_unmap, + .iova_to_phys = rk_iommu_iova_to_phys, + .free = rk_iommu_domain_free, + } }; static int rk_iommu_probe(struct platform_device *pdev) diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 50860ebdd087..3833e86c6e7b 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -363,16 +363,18 @@ void zpci_destroy_iommu(struct zpci_dev *zdev) static const struct iommu_ops s390_iommu_ops = { .capable = s390_iommu_capable, .domain_alloc = s390_domain_alloc, - .domain_free = s390_domain_free, - .attach_dev = s390_iommu_attach_device, - .detach_dev = s390_iommu_detach_device, - .map = s390_iommu_map, - .unmap = s390_iommu_unmap, - .iova_to_phys = s390_iommu_iova_to_phys, .probe_device = s390_iommu_probe_device, .release_device = s390_iommu_release_device, .device_group = generic_device_group, .pgsize_bitmap = S390_IOMMU_PGSIZES, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = s390_iommu_attach_device, + .detach_dev = s390_iommu_detach_device, + .map = s390_iommu_map, + .unmap = s390_iommu_unmap, + .iova_to_phys = s390_iommu_iova_to_phys, + .free = s390_domain_free, + } }; static int __init s390_iommu_init(void) diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index 27ac818b0354..bd409bab6286 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -416,20 +416,22 @@ static int sprd_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) static const struct iommu_ops sprd_iommu_ops = { .domain_alloc = sprd_iommu_domain_alloc, - .domain_free = sprd_iommu_domain_free, - .attach_dev = sprd_iommu_attach_device, - .detach_dev = sprd_iommu_detach_device, - .map = sprd_iommu_map, - .unmap = sprd_iommu_unmap, - .iotlb_sync_map = sprd_iommu_sync_map, - .iotlb_sync = sprd_iommu_sync, - .iova_to_phys = sprd_iommu_iova_to_phys, .probe_device = sprd_iommu_probe_device, .release_device = sprd_iommu_release_device, .device_group = sprd_iommu_device_group, .of_xlate = sprd_iommu_of_xlate, .pgsize_bitmap = ~0UL << SPRD_IOMMU_PAGE_SHIFT, .owner = THIS_MODULE, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = sprd_iommu_attach_device, + .detach_dev = sprd_iommu_detach_device, + .map = sprd_iommu_map, + .unmap = sprd_iommu_unmap, + .iotlb_sync_map = sprd_iommu_sync_map, + .iotlb_sync = sprd_iommu_sync, + .iova_to_phys = sprd_iommu_iova_to_phys, + .free = sprd_iommu_domain_free, + } }; static const struct of_device_id sprd_iommu_of_match[] = { diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index 92997021e188..c54ab477b8fd 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -760,19 +760,21 @@ static int sun50i_iommu_of_xlate(struct device *dev, static const struct iommu_ops sun50i_iommu_ops = { .pgsize_bitmap = SZ_4K, - .attach_dev = sun50i_iommu_attach_device, - .detach_dev = sun50i_iommu_detach_device, .device_group = sun50i_iommu_device_group, .domain_alloc = sun50i_iommu_domain_alloc, - .domain_free = sun50i_iommu_domain_free, - .flush_iotlb_all = sun50i_iommu_flush_iotlb_all, - .iotlb_sync = sun50i_iommu_iotlb_sync, - .iova_to_phys = sun50i_iommu_iova_to_phys, - .map = sun50i_iommu_map, .of_xlate = sun50i_iommu_of_xlate, .probe_device = sun50i_iommu_probe_device, .release_device = sun50i_iommu_release_device, - .unmap = sun50i_iommu_unmap, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = sun50i_iommu_attach_device, + .detach_dev = sun50i_iommu_detach_device, + .flush_iotlb_all = sun50i_iommu_flush_iotlb_all, + .iotlb_sync = sun50i_iommu_iotlb_sync, + .iova_to_phys = sun50i_iommu_iova_to_phys, + .map = sun50i_iommu_map, + .unmap = sun50i_iommu_unmap, + .free = sun50i_iommu_domain_free, + } }; static void sun50i_iommu_report_fault(struct sun50i_iommu *iommu, diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index bbd287d19324..a6700a40a6f8 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -272,19 +272,21 @@ static void gart_iommu_sync(struct iommu_domain *domain, static const struct iommu_ops gart_iommu_ops = { .domain_alloc = gart_iommu_domain_alloc, - .domain_free = gart_iommu_domain_free, - .attach_dev = gart_iommu_attach_dev, - .detach_dev = gart_iommu_detach_dev, .probe_device = gart_iommu_probe_device, .release_device = gart_iommu_release_device, .device_group = generic_device_group, - .map = gart_iommu_map, - .unmap = gart_iommu_unmap, - .iova_to_phys = gart_iommu_iova_to_phys, .pgsize_bitmap = GART_IOMMU_PGSIZES, .of_xlate = gart_iommu_of_xlate, - .iotlb_sync_map = gart_iommu_sync_map, - .iotlb_sync = gart_iommu_sync, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = gart_iommu_attach_dev, + .detach_dev = gart_iommu_detach_dev, + .map = gart_iommu_map, + .unmap = gart_iommu_unmap, + .iova_to_phys = gart_iommu_iova_to_phys, + .iotlb_sync_map = gart_iommu_sync_map, + .iotlb_sync = gart_iommu_sync, + .free = gart_iommu_domain_free, + } }; int tegra_gart_suspend(struct gart_device *gart) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 43df44f918a1..44c3716e16ee 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -963,17 +963,19 @@ static int tegra_smmu_of_xlate(struct device *dev, static const struct iommu_ops tegra_smmu_ops = { .domain_alloc = tegra_smmu_domain_alloc, - .domain_free = tegra_smmu_domain_free, - .attach_dev = tegra_smmu_attach_dev, - .detach_dev = tegra_smmu_detach_dev, .probe_device = tegra_smmu_probe_device, .release_device = tegra_smmu_release_device, .device_group = tegra_smmu_device_group, - .map = tegra_smmu_map, - .unmap = tegra_smmu_unmap, - .iova_to_phys = tegra_smmu_iova_to_phys, .of_xlate = tegra_smmu_of_xlate, .pgsize_bitmap = SZ_4K, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = tegra_smmu_attach_dev, + .detach_dev = tegra_smmu_detach_dev, + .map = tegra_smmu_map, + .unmap = tegra_smmu_unmap, + .iova_to_phys = tegra_smmu_iova_to_phys, + .free = tegra_smmu_domain_free, + } }; static void tegra_smmu_ahb_enable(void) diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index f2aa34f57454..25be4b822aa0 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -1008,12 +1008,6 @@ static int viommu_of_xlate(struct device *dev, struct of_phandle_args *args) static struct iommu_ops viommu_ops = { .domain_alloc = viommu_domain_alloc, - .domain_free = viommu_domain_free, - .attach_dev = viommu_attach_dev, - .map = viommu_map, - .unmap = viommu_unmap, - .iova_to_phys = viommu_iova_to_phys, - .iotlb_sync = viommu_iotlb_sync, .probe_device = viommu_probe_device, .probe_finalize = viommu_probe_finalize, .release_device = viommu_release_device, @@ -1022,6 +1016,14 @@ static struct iommu_ops viommu_ops = { .put_resv_regions = generic_iommu_put_resv_regions, .of_xlate = viommu_of_xlate, .owner = THIS_MODULE, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = viommu_attach_dev, + .map = viommu_map, + .unmap = viommu_unmap, + .iova_to_phys = viommu_iova_to_phys, + .iotlb_sync = viommu_iotlb_sync, + .free = viommu_domain_free, + } }; static int viommu_init_vqs(struct viommu_dev *viommu) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 1ded6076a75c..9208eca4b0d1 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -37,6 +37,7 @@ struct iommu_group; struct bus_type; struct device; struct iommu_domain; +struct iommu_domain_ops; struct notifier_block; struct iommu_sva; struct iommu_fault_event; @@ -88,7 +89,7 @@ struct iommu_domain_geometry { struct iommu_domain { unsigned type; - const struct iommu_ops *ops; + const struct iommu_domain_ops *ops; unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ iommu_fault_handler_t handler; void *handler_token; @@ -192,26 +193,11 @@ struct iommu_iotlb_gather { * struct iommu_ops - iommu ops and capabilities * @capable: check capability * @domain_alloc: allocate iommu domain - * @domain_free: free iommu domain - * @attach_dev: attach device to an iommu domain - * @detach_dev: detach device from an iommu domain - * @map: map a physically contiguous memory region to an iommu domain - * @map_pages: map a physically contiguous set of pages of the same size to - * an iommu domain. - * @unmap: unmap a physically contiguous memory region from an iommu domain - * @unmap_pages: unmap a number of pages of the same size from an iommu domain - * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain - * @iotlb_sync_map: Sync mappings created recently using @map to the hardware - * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush - * queue - * @iova_to_phys: translate iova to physical address * @probe_device: Add device to iommu driver handling * @release_device: Remove device from iommu driver handling * @probe_finalize: Do final setup work after the device is added to an IOMMU * group and attached to the groups domain * @device_group: find iommu group for a particular device - * @enable_nesting: Enable nesting - * @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*) * @get_resv_regions: Request list of reserved regions for a device * @put_resv_regions: Free list of reserved regions for a device * @of_xlate: add OF master IDs to iommu grouping @@ -228,6 +214,7 @@ struct iommu_iotlb_gather { * - IOMMU_DOMAIN_IDENTITY: must use an identity domain * - IOMMU_DOMAIN_DMA: must use a dma domain * - 0: use the default setting + * @default_domain_ops: the default ops for domains * @pgsize_bitmap: bitmap of all possible supported page sizes * @owner: Driver module providing these ops */ @@ -236,33 +223,11 @@ struct iommu_ops { /* Domain allocation and freeing by the iommu driver */ struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type); - void (*domain_free)(struct iommu_domain *); - int (*attach_dev)(struct iommu_domain *domain, struct device *dev); - void (*detach_dev)(struct iommu_domain *domain, struct device *dev); - int (*map)(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp); - int (*map_pages)(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t pgsize, size_t pgcount, - int prot, gfp_t gfp, size_t *mapped); - size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, - size_t size, struct iommu_iotlb_gather *iotlb_gather); - size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova, - size_t pgsize, size_t pgcount, - struct iommu_iotlb_gather *iotlb_gather); - void (*flush_iotlb_all)(struct iommu_domain *domain); - void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova, - size_t size); - void (*iotlb_sync)(struct iommu_domain *domain, - struct iommu_iotlb_gather *iotlb_gather); - phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); struct iommu_device *(*probe_device)(struct device *dev); void (*release_device)(struct device *dev); void (*probe_finalize)(struct device *dev); struct iommu_group *(*device_group)(struct device *dev); - int (*enable_nesting)(struct iommu_domain *domain); - int (*set_pgtable_quirks)(struct iommu_domain *domain, - unsigned long quirks); /* Request/Free a list of reserved regions for a device */ void (*get_resv_regions)(struct device *dev, struct list_head *list); @@ -288,10 +253,60 @@ struct iommu_ops { int (*def_domain_type)(struct device *dev); + const struct iommu_domain_ops *default_domain_ops; unsigned long pgsize_bitmap; struct module *owner; }; +/** + * struct iommu_domain_ops - domain specific operations + * @attach_dev: attach an iommu domain to a device + * @detach_dev: detach an iommu domain from a device + * @map: map a physically contiguous memory region to an iommu domain + * @map_pages: map a physically contiguous set of pages of the same size to + * an iommu domain. + * @unmap: unmap a physically contiguous memory region from an iommu domain + * @unmap_pages: unmap a number of pages of the same size from an iommu domain + * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain + * @iotlb_sync_map: Sync mappings created recently using @map to the hardware + * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush + * queue + * @iova_to_phys: translate iova to physical address + * @enable_nesting: Enable nesting + * @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*) + * @free: Release the domain after use. + */ +struct iommu_domain_ops { + int (*attach_dev)(struct iommu_domain *domain, struct device *dev); + void (*detach_dev)(struct iommu_domain *domain, struct device *dev); + + int (*map)(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp); + int (*map_pages)(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped); + size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, + size_t size, struct iommu_iotlb_gather *iotlb_gather); + size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova, + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *iotlb_gather); + + void (*flush_iotlb_all)(struct iommu_domain *domain); + void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova, + size_t size); + void (*iotlb_sync)(struct iommu_domain *domain, + struct iommu_iotlb_gather *iotlb_gather); + + phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, + dma_addr_t iova); + + int (*enable_nesting)(struct iommu_domain *domain); + int (*set_pgtable_quirks)(struct iommu_domain *domain, + unsigned long quirks); + + void (*free)(struct iommu_domain *domain); +}; + /** * struct iommu_device - IOMMU core representation of one IOMMU hardware * instance From 455481fc9a807798eca05f6fb0918ab88109d845 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Tue, 22 Feb 2022 10:04:28 +0100 Subject: [PATCH 149/380] MIPS: Remove TX39XX support No (active) developer owns this hardware, so let's remove Linux support. Signed-off-by: Thomas Bogendoerfer Acked-by: Guenter Roeck Reviewed-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Acked-by: Atsushi Nemoto --- arch/mips/Kbuild.platforms | 1 - arch/mips/Kconfig | 24 +- arch/mips/Makefile | 1 - arch/mips/configs/jmr3927_defconfig | 50 --- arch/mips/include/asm/cpu-features.h | 3 - arch/mips/include/asm/cpu-type.h | 6 - arch/mips/include/asm/cpu.h | 6 - arch/mips/include/asm/isadep.h | 2 +- .../asm/mach-ath25/cpu-feature-overrides.h | 1 - .../asm/mach-ath79/cpu-feature-overrides.h | 1 - .../asm/mach-au1x00/cpu-feature-overrides.h | 1 - .../asm/mach-bcm47xx/cpu-feature-overrides.h | 1 - .../cpu-feature-overrides.h | 1 - .../asm/mach-cobalt/cpu-feature-overrides.h | 1 - .../asm/mach-dec/cpu-feature-overrides.h | 1 - .../asm/mach-ingenic/cpu-feature-overrides.h | 1 - .../asm/mach-ip27/cpu-feature-overrides.h | 1 - .../asm/mach-ip30/cpu-feature-overrides.h | 1 - .../falcon/cpu-feature-overrides.h | 1 - .../mach-loongson2ef/cpu-feature-overrides.h | 1 - .../mach-loongson64/cpu-feature-overrides.h | 1 - .../mt7620/cpu-feature-overrides.h | 1 - .../mt7621/cpu-feature-overrides.h | 1 - .../rt288x/cpu-feature-overrides.h | 1 - .../rt305x/cpu-feature-overrides.h | 1 - .../rt3883/cpu-feature-overrides.h | 1 - .../asm/mach-rc32434/cpu-feature-overrides.h | 1 - arch/mips/include/asm/mach-tx39xx/ioremap.h | 25 -- .../include/asm/mach-tx39xx/mangle-port.h | 24 - arch/mips/include/asm/mach-tx39xx/spaces.h | 17 - arch/mips/include/asm/stackframe.h | 6 +- arch/mips/include/asm/txx9/boards.h | 3 - arch/mips/include/asm/txx9/jmr3927.h | 179 -------- arch/mips/include/asm/txx9/tx3927.h | 341 --------------- arch/mips/include/asm/txx9irq.h | 4 - arch/mips/include/asm/txx9tmr.h | 4 - arch/mips/include/asm/vermagic.h | 2 - arch/mips/kernel/Makefile | 1 - arch/mips/kernel/cpu-probe.c | 23 - arch/mips/kernel/cpu-r3k-probe.c | 22 - arch/mips/kernel/entry.S | 2 +- arch/mips/kernel/genex.S | 4 +- arch/mips/kernel/idle.c | 10 - arch/mips/kernel/irq_txx9.c | 13 - arch/mips/kernel/proc.c | 2 - arch/mips/kernel/process.c | 2 +- arch/mips/lib/Makefile | 1 - arch/mips/lib/r3k_dump_tlb.c | 4 - arch/mips/mm/Makefile | 1 - arch/mips/mm/c-tx39.c | 414 ------------------ arch/mips/mm/cache.c | 5 - arch/mips/mm/tlb-r3k.c | 40 +- arch/mips/pci/Makefile | 2 - arch/mips/pci/fixup-jmr3927.c | 79 ---- arch/mips/pci/ops-tx3927.c | 231 ---------- arch/mips/txx9/Kconfig | 18 - arch/mips/txx9/Makefile | 6 - arch/mips/txx9/Platform | 3 - arch/mips/txx9/generic/Makefile | 1 - arch/mips/txx9/generic/irq_tx3927.c | 25 -- arch/mips/txx9/generic/setup.c | 55 --- arch/mips/txx9/generic/setup_tx3927.c | 136 ------ arch/mips/txx9/jmr3927/Makefile | 6 - arch/mips/txx9/jmr3927/irq.c | 128 ------ arch/mips/txx9/jmr3927/prom.c | 52 --- arch/mips/txx9/jmr3927/setup.c | 223 ---------- drivers/dma/Kconfig | 2 +- drivers/watchdog/Kconfig | 2 +- 68 files changed, 18 insertions(+), 2212 deletions(-) delete mode 100644 arch/mips/configs/jmr3927_defconfig delete mode 100644 arch/mips/include/asm/mach-tx39xx/ioremap.h delete mode 100644 arch/mips/include/asm/mach-tx39xx/mangle-port.h delete mode 100644 arch/mips/include/asm/mach-tx39xx/spaces.h delete mode 100644 arch/mips/include/asm/txx9/jmr3927.h delete mode 100644 arch/mips/include/asm/txx9/tx3927.h delete mode 100644 arch/mips/mm/c-tx39.c delete mode 100644 arch/mips/pci/fixup-jmr3927.c delete mode 100644 arch/mips/pci/ops-tx3927.c delete mode 100644 arch/mips/txx9/generic/irq_tx3927.c delete mode 100644 arch/mips/txx9/generic/setup_tx3927.c delete mode 100644 arch/mips/txx9/jmr3927/Makefile delete mode 100644 arch/mips/txx9/jmr3927/irq.c delete mode 100644 arch/mips/txx9/jmr3927/prom.c delete mode 100644 arch/mips/txx9/jmr3927/setup.c diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms index 30193bcf9caa..1bc4282af064 100644 --- a/arch/mips/Kbuild.platforms +++ b/arch/mips/Kbuild.platforms @@ -32,7 +32,6 @@ platform-$(CONFIG_SIBYTE_SB1250) += sibyte/ platform-$(CONFIG_SIBYTE_BCM1x55) += sibyte/ platform-$(CONFIG_SIBYTE_BCM1x80) += sibyte/ platform-$(CONFIG_SNI_RM) += sni/ -platform-$(CONFIG_MACH_TX39XX) += txx9/ platform-$(CONFIG_MACH_TX49XX) += txx9/ platform-$(CONFIG_MACH_VR41XX) += vr41xx/ diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index f8f11542d044..59dabfd5a129 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -927,9 +927,6 @@ config SNI_RM Technology and now in turn merged with Fujitsu. Say Y here to support this machine type. -config MACH_TX39XX - bool "Toshiba TX39 series based machines" - config MACH_TX49XX bool "Toshiba TX49 series based machines" select WAR_TX49XX_ICACHE_INDEX_INV @@ -1584,12 +1581,6 @@ config CPU_R3000 might be a safe bet. If the resulting kernel does not work, try to recompile with R3000. -config CPU_TX39XX - bool "R39XX" - depends on SYS_HAS_CPU_TX39XX - select CPU_SUPPORTS_32BIT_KERNEL - select CPU_R3K_TLB - config CPU_VR41XX bool "R41xx" depends on SYS_HAS_CPU_VR41XX @@ -1916,9 +1907,6 @@ config SYS_HAS_CPU_P5600 config SYS_HAS_CPU_R3000 bool -config SYS_HAS_CPU_TX39XX - bool - config SYS_HAS_CPU_VR41XX bool @@ -2149,7 +2137,7 @@ config PAGE_SIZE_8KB config PAGE_SIZE_16KB bool "16kB" - depends on !CPU_R3000 && !CPU_TX39XX + depends on !CPU_R3000 help Using 16kB page size will result in higher performance kernel at the price of higher memory consumption. This option is available on @@ -2168,7 +2156,7 @@ config PAGE_SIZE_32KB config PAGE_SIZE_64KB bool "64kB" - depends on !CPU_R3000 && !CPU_TX39XX + depends on !CPU_R3000 help Using 64kB page size will result in higher performance kernel at the price of higher memory consumption. This option is available on @@ -2236,7 +2224,7 @@ config CPU_HAS_PREFETCH config CPU_GENERIC_DUMP_TLB bool - default y if !(CPU_R3000 || CPU_TX39XX) + default y if !CPU_R3000 config MIPS_FP_SUPPORT bool "Floating Point support" if EXPERT @@ -2256,7 +2244,7 @@ config MIPS_FP_SUPPORT config CPU_R2300_FPU bool depends on MIPS_FP_SUPPORT - default y if CPU_R3000 || CPU_TX39XX + default y if CPU_R3000 config CPU_R3K_TLB bool @@ -2575,13 +2563,13 @@ config CPU_R4X00_BUGS64 config MIPS_ASID_SHIFT int - default 6 if CPU_R3000 || CPU_TX39XX + default 6 if CPU_R3000 default 0 config MIPS_ASID_BITS int default 0 if MIPS_ASID_BITS_VARIABLE - default 6 if CPU_R3000 || CPU_TX39XX + default 6 if CPU_R3000 default 8 config MIPS_ASID_BITS_VARIABLE diff --git a/arch/mips/Makefile b/arch/mips/Makefile index e036fc025ccc..47ca5488434a 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -158,7 +158,6 @@ cflags-y += $(call as-option,-Wa$(comma)-mno-fix-loongson3-llsc,) # CPU-dependent compiler/assembler options for optimization. # cflags-$(CONFIG_CPU_R3000) += -march=r3000 -cflags-$(CONFIG_CPU_TX39XX) += -march=r3900 cflags-$(CONFIG_CPU_R4300) += -march=r4300 -Wa,--trap cflags-$(CONFIG_CPU_VR41XX) += -march=r4100 -Wa,--trap cflags-$(CONFIG_CPU_R4X00) += -march=r4600 -Wa,--trap diff --git a/arch/mips/configs/jmr3927_defconfig b/arch/mips/configs/jmr3927_defconfig deleted file mode 100644 index 24b96faf9b4e..000000000000 --- a/arch/mips/configs/jmr3927_defconfig +++ /dev/null @@ -1,50 +0,0 @@ -CONFIG_SYSVIPC=y -CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EXPERT=y -CONFIG_SLAB=y -CONFIG_MACH_TX39XX=y -CONFIG_TOSHIBA_JMR3927=y -# CONFIG_SECCOMP is not set -CONFIG_PCI=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_DIAG is not set -# CONFIG_IPV6 is not set -CONFIG_MTD=y -CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CFI=y -CONFIG_MTD_JEDECPROBE=y -CONFIG_MTD_CFI_AMDSTD=y -CONFIG_MTD_PHYSMAP=y -CONFIG_NETDEVICES=y -CONFIG_TC35815=y -# CONFIG_INPUT is not set -# CONFIG_SERIO is not set -# CONFIG_VT is not set -# CONFIG_UNIX98_PTYS is not set -CONFIG_SERIAL_NONSTANDARD=y -CONFIG_SERIAL_TXX9_CONSOLE=y -CONFIG_SERIAL_TXX9_STDSERIAL=y -# CONFIG_HW_RANDOM is not set -# CONFIG_HWMON is not set -CONFIG_WATCHDOG=y -CONFIG_TXX9_WDT=y -# CONFIG_USB_SUPPORT is not set -CONFIG_NEW_LEDS=y -CONFIG_LEDS_CLASS=y -CONFIG_LEDS_GPIO=y -CONFIG_LEDS_TRIGGERS=y -CONFIG_LEDS_TRIGGER_HEARTBEAT=y -CONFIG_RTC_CLASS=y -CONFIG_RTC_DRV_DS1742=y -CONFIG_PROC_KCORE=y -# CONFIG_MISC_FILESYSTEMS is not set -CONFIG_NFS_FS=y -CONFIG_ROOT_NFS=y diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index 3d71081afc55..de8cb2ccb781 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -120,9 +120,6 @@ #ifndef cpu_has_4k_cache #define cpu_has_4k_cache __isa_ge_or_opt(1, MIPS_CPU_4K_CACHE) #endif -#ifndef cpu_has_tx39_cache -#define cpu_has_tx39_cache __opt(MIPS_CPU_TX39_CACHE) -#endif #ifndef cpu_has_octeon_cache #define cpu_has_octeon_cache 0 #endif diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h index 5efe8c8b854e..5582ff0c247e 100644 --- a/arch/mips/include/asm/cpu-type.h +++ b/arch/mips/include/asm/cpu-type.h @@ -105,12 +105,6 @@ static inline int __pure __get_cpu_type(const int cpu_type) case CPU_R3081E: #endif -#ifdef CONFIG_SYS_HAS_CPU_TX39XX - case CPU_TX3912: - case CPU_TX3922: - case CPU_TX3927: -#endif - #ifdef CONFIG_SYS_HAS_CPU_VR41XX case CPU_VR41XX: case CPU_VR4111: diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h index 5c2f8d9cb7cf..00a3fc7d778d 100644 --- a/arch/mips/include/asm/cpu.h +++ b/arch/mips/include/asm/cpu.h @@ -309,11 +309,6 @@ enum cpu_type_enum { CPU_VR4122, CPU_VR4131, CPU_VR4133, CPU_VR4181, CPU_VR4181A, CPU_RM7000, CPU_SR71000, CPU_TX49XX, - /* - * TX3900 class processors - */ - CPU_TX3912, CPU_TX3922, CPU_TX3927, - /* * MIPS32 class processors */ @@ -367,7 +362,6 @@ enum cpu_type_enum { #define MIPS_CPU_4KEX BIT_ULL( 1) /* "R4K" exception model */ #define MIPS_CPU_3K_CACHE BIT_ULL( 2) /* R3000-style caches */ #define MIPS_CPU_4K_CACHE BIT_ULL( 3) /* R4000-style caches */ -#define MIPS_CPU_TX39_CACHE BIT_ULL( 4) /* TX3900-style caches */ #define MIPS_CPU_FPU BIT_ULL( 5) /* CPU has FPU */ #define MIPS_CPU_32FPR BIT_ULL( 6) /* 32 dbl. prec. FP registers */ #define MIPS_CPU_COUNTER BIT_ULL( 7) /* Cycle count/compare */ diff --git a/arch/mips/include/asm/isadep.h b/arch/mips/include/asm/isadep.h index d1683202399b..8fc1e3ae8d0c 100644 --- a/arch/mips/include/asm/isadep.h +++ b/arch/mips/include/asm/isadep.h @@ -10,7 +10,7 @@ #ifndef __ASM_ISADEP_H #define __ASM_ISADEP_H -#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) +#if defined(CONFIG_CPU_R3000) /* * R2000 or R3000 */ diff --git a/arch/mips/include/asm/mach-ath25/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ath25/cpu-feature-overrides.h index a54f20d956a2..ec3604c44ef2 100644 --- a/arch/mips/include/asm/mach-ath25/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ath25/cpu-feature-overrides.h @@ -18,7 +18,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_sb1_cache 0 #define cpu_has_fpu 0 #define cpu_has_32fpr 0 diff --git a/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h index 79ab3ad9fee8..44fd44a5fc42 100644 --- a/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h @@ -16,7 +16,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_sb1_cache 0 #define cpu_has_fpu 0 #define cpu_has_32fpr 0 diff --git a/arch/mips/include/asm/mach-au1x00/cpu-feature-overrides.h b/arch/mips/include/asm/mach-au1x00/cpu-feature-overrides.h index e6e527224a15..3c200303ae55 100644 --- a/arch/mips/include/asm/mach-au1x00/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-au1x00/cpu-feature-overrides.h @@ -21,7 +21,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_fpu 0 #define cpu_has_32fpr 0 #define cpu_has_counter 1 diff --git a/arch/mips/include/asm/mach-bcm47xx/cpu-feature-overrides.h b/arch/mips/include/asm/mach-bcm47xx/cpu-feature-overrides.h index b23ff47ea475..69899c1e122d 100644 --- a/arch/mips/include/asm/mach-bcm47xx/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-bcm47xx/cpu-feature-overrides.h @@ -6,7 +6,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_fpu 0 #define cpu_has_32fpr 0 #define cpu_has_counter 1 diff --git a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h index 513270c8adb9..9151dcd9d0d5 100644 --- a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h @@ -21,7 +21,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 0 -#define cpu_has_tx39_cache 0 #define cpu_has_counter 1 #define cpu_has_watch 1 #define cpu_has_divec 1 diff --git a/arch/mips/include/asm/mach-cobalt/cpu-feature-overrides.h b/arch/mips/include/asm/mach-cobalt/cpu-feature-overrides.h index 291fe90aafa5..03192458471d 100644 --- a/arch/mips/include/asm/mach-cobalt/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-cobalt/cpu-feature-overrides.h @@ -13,7 +13,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_32fpr 1 #define cpu_has_counter 1 #define cpu_has_watch 0 diff --git a/arch/mips/include/asm/mach-dec/cpu-feature-overrides.h b/arch/mips/include/asm/mach-dec/cpu-feature-overrides.h index 1896e88f6000..3ddc4b4dca26 100644 --- a/arch/mips/include/asm/mach-dec/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-dec/cpu-feature-overrides.h @@ -17,7 +17,6 @@ #define cpu_has_rixiex 0 #define cpu_has_maar 0 #define cpu_has_rw_llb 0 -#define cpu_has_tx39_cache 0 #define cpu_has_divec 0 #define cpu_has_prefetch 0 #define cpu_has_mcheck 0 diff --git a/arch/mips/include/asm/mach-ingenic/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ingenic/cpu-feature-overrides.h index 7c5e576f9d96..7ace50127f5a 100644 --- a/arch/mips/include/asm/mach-ingenic/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ingenic/cpu-feature-overrides.h @@ -11,7 +11,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_counter 0 #define cpu_has_watch 1 #define cpu_has_divec 1 diff --git a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h index 58f829c9b6c7..c8385c4e8664 100644 --- a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h @@ -25,7 +25,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_fpu 1 #define cpu_has_nofpuex 0 #define cpu_has_32fpr 1 diff --git a/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h index 49a93e82c252..8ad0c424a9af 100644 --- a/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h @@ -28,7 +28,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_fpu 1 #define cpu_has_nofpuex 0 #define cpu_has_32fpr 1 diff --git a/arch/mips/include/asm/mach-lantiq/falcon/cpu-feature-overrides.h b/arch/mips/include/asm/mach-lantiq/falcon/cpu-feature-overrides.h index 10226976f7b7..22607e61e57b 100644 --- a/arch/mips/include/asm/mach-lantiq/falcon/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-lantiq/falcon/cpu-feature-overrides.h @@ -15,7 +15,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_sb1_cache 0 #define cpu_has_fpu 0 #define cpu_has_32fpr 0 diff --git a/arch/mips/include/asm/mach-loongson2ef/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson2ef/cpu-feature-overrides.h index b2ee859ca0b7..eb0d1cfb9f3b 100644 --- a/arch/mips/include/asm/mach-loongson2ef/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-loongson2ef/cpu-feature-overrides.h @@ -34,7 +34,6 @@ #define cpu_has_mipsmt 0 #define cpu_has_smartmips 0 #define cpu_has_tlb 1 -#define cpu_has_tx39_cache 0 #define cpu_has_vce 0 #define cpu_has_veic 0 #define cpu_has_vint 0 diff --git a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h index eb181224eb4c..ebace9e4bdc1 100644 --- a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h @@ -36,7 +36,6 @@ #define cpu_has_mipsmt 0 #define cpu_has_smartmips 0 #define cpu_has_tlb 1 -#define cpu_has_tx39_cache 0 #define cpu_has_vce 0 #define cpu_has_veic 0 #define cpu_has_vint 0 diff --git a/arch/mips/include/asm/mach-ralink/mt7620/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/mt7620/cpu-feature-overrides.h index c4579f1705c2..85a62c99a52a 100644 --- a/arch/mips/include/asm/mach-ralink/mt7620/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ralink/mt7620/cpu-feature-overrides.h @@ -16,7 +16,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_sb1_cache 0 #define cpu_has_fpu 0 #define cpu_has_32fpr 0 diff --git a/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h index 168359a0a58d..3c19a94f5432 100644 --- a/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h @@ -17,7 +17,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_sb1_cache 0 #define cpu_has_fpu 0 #define cpu_has_32fpr 0 diff --git a/arch/mips/include/asm/mach-ralink/rt288x/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/rt288x/cpu-feature-overrides.h index fdaf8c9182bc..a850c1e46134 100644 --- a/arch/mips/include/asm/mach-ralink/rt288x/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ralink/rt288x/cpu-feature-overrides.h @@ -16,7 +16,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_sb1_cache 0 #define cpu_has_fpu 0 #define cpu_has_32fpr 0 diff --git a/arch/mips/include/asm/mach-ralink/rt305x/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/rt305x/cpu-feature-overrides.h index 7a385fe784a6..2d75264a9166 100644 --- a/arch/mips/include/asm/mach-ralink/rt305x/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ralink/rt305x/cpu-feature-overrides.h @@ -16,7 +16,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_sb1_cache 0 #define cpu_has_fpu 0 #define cpu_has_32fpr 0 diff --git a/arch/mips/include/asm/mach-ralink/rt3883/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/rt3883/cpu-feature-overrides.h index 0a61910f6521..accf2a325343 100644 --- a/arch/mips/include/asm/mach-ralink/rt3883/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ralink/rt3883/cpu-feature-overrides.h @@ -15,7 +15,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_sb1_cache 0 #define cpu_has_fpu 0 #define cpu_has_32fpr 0 diff --git a/arch/mips/include/asm/mach-rc32434/cpu-feature-overrides.h b/arch/mips/include/asm/mach-rc32434/cpu-feature-overrides.h index 8539ccfb69b7..36d45c9cf09c 100644 --- a/arch/mips/include/asm/mach-rc32434/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-rc32434/cpu-feature-overrides.h @@ -18,7 +18,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_sb1_cache 0 #define cpu_has_fpu 0 #define cpu_has_32fpr 0 diff --git a/arch/mips/include/asm/mach-tx39xx/ioremap.h b/arch/mips/include/asm/mach-tx39xx/ioremap.h deleted file mode 100644 index 157a7292397e..000000000000 --- a/arch/mips/include/asm/mach-tx39xx/ioremap.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * include/asm-mips/mach-tx39xx/ioremap.h - */ -#ifndef __ASM_MACH_TX39XX_IOREMAP_H -#define __ASM_MACH_TX39XX_IOREMAP_H - -#include - -static inline void __iomem *plat_ioremap(phys_addr_t offset, unsigned long size, - unsigned long flags) -{ -#define TXX9_DIRECTMAP_BASE 0xff000000ul - if (offset >= TXX9_DIRECTMAP_BASE && - offset < TXX9_DIRECTMAP_BASE + 0xff0000) - return (void __iomem *)offset; - return NULL; -} - -static inline int plat_iounmap(const volatile void __iomem *addr) -{ - return (unsigned long)addr >= TXX9_DIRECTMAP_BASE; -} - -#endif /* __ASM_MACH_TX39XX_IOREMAP_H */ diff --git a/arch/mips/include/asm/mach-tx39xx/mangle-port.h b/arch/mips/include/asm/mach-tx39xx/mangle-port.h deleted file mode 100644 index 95be459950f7..000000000000 --- a/arch/mips/include/asm/mach-tx39xx/mangle-port.h +++ /dev/null @@ -1,24 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_MACH_TX39XX_MANGLE_PORT_H -#define __ASM_MACH_TX39XX_MANGLE_PORT_H - -#if defined(CONFIG_TOSHIBA_JMR3927) -extern unsigned long (*__swizzle_addr_b)(unsigned long port); -#define NEEDS_TXX9_SWIZZLE_ADDR_B -#else -#define __swizzle_addr_b(port) (port) -#endif -#define __swizzle_addr_w(port) (port) -#define __swizzle_addr_l(port) (port) -#define __swizzle_addr_q(port) (port) - -#define ioswabb(a, x) (x) -#define __mem_ioswabb(a, x) (x) -#define ioswabw(a, x) le16_to_cpu((__force __le16)(x)) -#define __mem_ioswabw(a, x) (x) -#define ioswabl(a, x) le32_to_cpu((__force __le32)(x)) -#define __mem_ioswabl(a, x) (x) -#define ioswabq(a, x) le64_to_cpu((__force __le64)(x)) -#define __mem_ioswabq(a, x) (x) - -#endif /* __ASM_MACH_TX39XX_MANGLE_PORT_H */ diff --git a/arch/mips/include/asm/mach-tx39xx/spaces.h b/arch/mips/include/asm/mach-tx39xx/spaces.h deleted file mode 100644 index 151fe7a1cf1d..000000000000 --- a/arch/mips/include/asm/mach-tx39xx/spaces.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1994 - 1999, 2000, 03, 04 Ralf Baechle - * Copyright (C) 2000, 2002 Maciej W. Rozycki - * Copyright (C) 1990, 1999, 2000 Silicon Graphics, Inc. - */ -#ifndef _ASM_TX39XX_SPACES_H -#define _ASM_TX39XX_SPACES_H - -#define FIXADDR_TOP ((unsigned long)(long)(int)0xfefe0000) - -#include - -#endif /* __ASM_TX39XX_SPACES_H */ diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h index aa430a6c68b2..a8705aef47e1 100644 --- a/arch/mips/include/asm/stackframe.h +++ b/arch/mips/include/asm/stackframe.h @@ -42,7 +42,7 @@ cfi_restore \reg \offset \docfi .endm -#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) +#if defined(CONFIG_CPU_R3000) #define STATMASK 0x3f #else #define STATMASK 0x1f @@ -349,7 +349,7 @@ cfi_ld sp, PT_R29, \docfi .endm -#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) +#if defined(CONFIG_CPU_R3000) .macro RESTORE_SOME docfi=0 .set push @@ -478,7 +478,7 @@ .macro KMODE mfc0 t0, CP0_STATUS li t1, ST0_KERNEL_CUMASK | (STATMASK & ~1) -#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) +#if defined(CONFIG_CPU_R3000) andi t2, t0, ST0_IEP srl t2, 2 or t0, t2 diff --git a/arch/mips/include/asm/txx9/boards.h b/arch/mips/include/asm/txx9/boards.h index 70284e90dc53..6897ca4366d5 100644 --- a/arch/mips/include/asm/txx9/boards.h +++ b/arch/mips/include/asm/txx9/boards.h @@ -1,7 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifdef CONFIG_TOSHIBA_JMR3927 -BOARD_VEC(jmr3927_vec) -#endif #ifdef CONFIG_TOSHIBA_RBTX4927 BOARD_VEC(rbtx4927_vec) BOARD_VEC(rbtx4937_vec) diff --git a/arch/mips/include/asm/txx9/jmr3927.h b/arch/mips/include/asm/txx9/jmr3927.h deleted file mode 100644 index aab959dc30ba..000000000000 --- a/arch/mips/include/asm/txx9/jmr3927.h +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Defines for the TJSYS JMR-TX3927 - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2000-2001 Toshiba Corporation - */ -#ifndef __ASM_TXX9_JMR3927_H -#define __ASM_TXX9_JMR3927_H - -#include -#include -#include - -/* CS */ -#define JMR3927_ROMCE0 0x1fc00000 /* 4M */ -#define JMR3927_ROMCE1 0x1e000000 /* 4M */ -#define JMR3927_ROMCE2 0x14000000 /* 16M */ -#define JMR3927_ROMCE3 0x10000000 /* 64M */ -#define JMR3927_ROMCE5 0x1d000000 /* 4M */ -#define JMR3927_SDCS0 0x00000000 /* 32M */ -#define JMR3927_SDCS1 0x02000000 /* 32M */ -/* PCI Direct Mappings */ - -#define JMR3927_PCIMEM 0x08000000 -#define JMR3927_PCIMEM_SIZE 0x08000000 /* 128M */ -#define JMR3927_PCIIO 0x15000000 -#define JMR3927_PCIIO_SIZE 0x01000000 /* 16M */ - -#define JMR3927_SDRAM_SIZE 0x02000000 /* 32M */ -#define JMR3927_PORT_BASE KSEG1 - -/* Address map (virtual address) */ -#define JMR3927_ROM0_BASE (KSEG1 + JMR3927_ROMCE0) -#define JMR3927_ROM1_BASE (KSEG1 + JMR3927_ROMCE1) -#define JMR3927_IOC_BASE (KSEG1 + JMR3927_ROMCE2) -#define JMR3927_PCIMEM_BASE (KSEG1 + JMR3927_PCIMEM) -#define JMR3927_PCIIO_BASE (KSEG1 + JMR3927_PCIIO) - -#define JMR3927_IOC_REV_ADDR (JMR3927_IOC_BASE + 0x00000000) -#define JMR3927_IOC_NVRAMB_ADDR (JMR3927_IOC_BASE + 0x00010000) -#define JMR3927_IOC_LED_ADDR (JMR3927_IOC_BASE + 0x00020000) -#define JMR3927_IOC_DIPSW_ADDR (JMR3927_IOC_BASE + 0x00030000) -#define JMR3927_IOC_BREV_ADDR (JMR3927_IOC_BASE + 0x00040000) -#define JMR3927_IOC_DTR_ADDR (JMR3927_IOC_BASE + 0x00050000) -#define JMR3927_IOC_INTS1_ADDR (JMR3927_IOC_BASE + 0x00080000) -#define JMR3927_IOC_INTS2_ADDR (JMR3927_IOC_BASE + 0x00090000) -#define JMR3927_IOC_INTM_ADDR (JMR3927_IOC_BASE + 0x000a0000) -#define JMR3927_IOC_INTP_ADDR (JMR3927_IOC_BASE + 0x000b0000) -#define JMR3927_IOC_RESET_ADDR (JMR3927_IOC_BASE + 0x000f0000) - -/* Flash ROM */ -#define JMR3927_FLASH_BASE (JMR3927_ROM0_BASE) -#define JMR3927_FLASH_SIZE 0x00400000 - -/* bits for IOC_REV/IOC_BREV (high byte) */ -#define JMR3927_IDT_MASK 0xfc -#define JMR3927_REV_MASK 0x03 -#define JMR3927_IOC_IDT 0xe0 - -/* bits for IOC_INTS1/IOC_INTS2/IOC_INTM/IOC_INTP (high byte) */ -#define JMR3927_IOC_INTB_PCIA 0 -#define JMR3927_IOC_INTB_PCIB 1 -#define JMR3927_IOC_INTB_PCIC 2 -#define JMR3927_IOC_INTB_PCID 3 -#define JMR3927_IOC_INTB_MODEM 4 -#define JMR3927_IOC_INTB_INT6 5 -#define JMR3927_IOC_INTB_INT7 6 -#define JMR3927_IOC_INTB_SOFT 7 -#define JMR3927_IOC_INTF_PCIA (1 << JMR3927_IOC_INTF_PCIA) -#define JMR3927_IOC_INTF_PCIB (1 << JMR3927_IOC_INTB_PCIB) -#define JMR3927_IOC_INTF_PCIC (1 << JMR3927_IOC_INTB_PCIC) -#define JMR3927_IOC_INTF_PCID (1 << JMR3927_IOC_INTB_PCID) -#define JMR3927_IOC_INTF_MODEM (1 << JMR3927_IOC_INTB_MODEM) -#define JMR3927_IOC_INTF_INT6 (1 << JMR3927_IOC_INTB_INT6) -#define JMR3927_IOC_INTF_INT7 (1 << JMR3927_IOC_INTB_INT7) -#define JMR3927_IOC_INTF_SOFT (1 << JMR3927_IOC_INTB_SOFT) - -/* bits for IOC_RESET (high byte) */ -#define JMR3927_IOC_RESET_CPU 1 -#define JMR3927_IOC_RESET_PCI 2 - -#if defined(__BIG_ENDIAN) -#define jmr3927_ioc_reg_out(d, a) ((*(volatile unsigned char *)(a)) = (d)) -#define jmr3927_ioc_reg_in(a) (*(volatile unsigned char *)(a)) -#elif defined(__LITTLE_ENDIAN) -#define jmr3927_ioc_reg_out(d, a) ((*(volatile unsigned char *)((a)^1)) = (d)) -#define jmr3927_ioc_reg_in(a) (*(volatile unsigned char *)((a)^1)) -#else -#error "No Endian" -#endif - -/* LED macro */ -#define jmr3927_led_set(n/*0-16*/) jmr3927_ioc_reg_out(~(n), JMR3927_IOC_LED_ADDR) - -#define jmr3927_led_and_set(n/*0-16*/) jmr3927_ioc_reg_out((~(n)) & jmr3927_ioc_reg_in(JMR3927_IOC_LED_ADDR), JMR3927_IOC_LED_ADDR) - -/* DIPSW4 macro */ -#define jmr3927_dipsw1() (gpio_get_value(11) == 0) -#define jmr3927_dipsw2() (gpio_get_value(10) == 0) -#define jmr3927_dipsw3() ((jmr3927_ioc_reg_in(JMR3927_IOC_DIPSW_ADDR) & 2) == 0) -#define jmr3927_dipsw4() ((jmr3927_ioc_reg_in(JMR3927_IOC_DIPSW_ADDR) & 1) == 0) - -/* - * IRQ mappings - */ - -/* These are the virtual IRQ numbers, we divide all IRQ's into - * 'spaces', the 'space' determines where and how to enable/disable - * that particular IRQ on an JMR machine. Add new 'spaces' as new - * IRQ hardware is supported. - */ -#define JMR3927_NR_IRQ_IRC 16 /* On-Chip IRC */ -#define JMR3927_NR_IRQ_IOC 8 /* PCI/MODEM/INT[6:7] */ - -#define JMR3927_IRQ_IRC TXX9_IRQ_BASE -#define JMR3927_IRQ_IOC (JMR3927_IRQ_IRC + JMR3927_NR_IRQ_IRC) -#define JMR3927_IRQ_END (JMR3927_IRQ_IOC + JMR3927_NR_IRQ_IOC) - -#define JMR3927_IRQ_IRC_INT0 (JMR3927_IRQ_IRC + TX3927_IR_INT0) -#define JMR3927_IRQ_IRC_INT1 (JMR3927_IRQ_IRC + TX3927_IR_INT1) -#define JMR3927_IRQ_IRC_INT2 (JMR3927_IRQ_IRC + TX3927_IR_INT2) -#define JMR3927_IRQ_IRC_INT3 (JMR3927_IRQ_IRC + TX3927_IR_INT3) -#define JMR3927_IRQ_IRC_INT4 (JMR3927_IRQ_IRC + TX3927_IR_INT4) -#define JMR3927_IRQ_IRC_INT5 (JMR3927_IRQ_IRC + TX3927_IR_INT5) -#define JMR3927_IRQ_IRC_SIO0 (JMR3927_IRQ_IRC + TX3927_IR_SIO0) -#define JMR3927_IRQ_IRC_SIO1 (JMR3927_IRQ_IRC + TX3927_IR_SIO1) -#define JMR3927_IRQ_IRC_SIO(ch) (JMR3927_IRQ_IRC + TX3927_IR_SIO(ch)) -#define JMR3927_IRQ_IRC_DMA (JMR3927_IRQ_IRC + TX3927_IR_DMA) -#define JMR3927_IRQ_IRC_PIO (JMR3927_IRQ_IRC + TX3927_IR_PIO) -#define JMR3927_IRQ_IRC_PCI (JMR3927_IRQ_IRC + TX3927_IR_PCI) -#define JMR3927_IRQ_IRC_TMR(ch) (JMR3927_IRQ_IRC + TX3927_IR_TMR(ch)) -#define JMR3927_IRQ_IOC_PCIA (JMR3927_IRQ_IOC + JMR3927_IOC_INTB_PCIA) -#define JMR3927_IRQ_IOC_PCIB (JMR3927_IRQ_IOC + JMR3927_IOC_INTB_PCIB) -#define JMR3927_IRQ_IOC_PCIC (JMR3927_IRQ_IOC + JMR3927_IOC_INTB_PCIC) -#define JMR3927_IRQ_IOC_PCID (JMR3927_IRQ_IOC + JMR3927_IOC_INTB_PCID) -#define JMR3927_IRQ_IOC_MODEM (JMR3927_IRQ_IOC + JMR3927_IOC_INTB_MODEM) -#define JMR3927_IRQ_IOC_INT6 (JMR3927_IRQ_IOC + JMR3927_IOC_INTB_INT6) -#define JMR3927_IRQ_IOC_INT7 (JMR3927_IRQ_IOC + JMR3927_IOC_INTB_INT7) -#define JMR3927_IRQ_IOC_SOFT (JMR3927_IRQ_IOC + JMR3927_IOC_INTB_SOFT) - -/* IOC (PCI, MODEM) */ -#define JMR3927_IRQ_IOCINT JMR3927_IRQ_IRC_INT1 -/* TC35815 100M Ether (JMR-TX3912:JPW4:2-3 Short) */ -#define JMR3927_IRQ_ETHER0 JMR3927_IRQ_IRC_INT3 - -/* Clocks */ -#define JMR3927_CORECLK 132710400 /* 132.7MHz */ - -/* - * TX3927 Pin Configuration: - * - * PCFG bits Avail Dead - * SELSIO[1:0]:11 RXD[1:0], TXD[1:0] PIO[6:3] - * SELSIOC[0]:1 CTS[0], RTS[0] INT[5:4] - * SELSIOC[1]:0,SELDSF:0, GSDAO[0],GPCST[3] CTS[1], RTS[1],DSF, - * GDBGE* PIO[2:1] - * SELDMA[2]:1 DMAREQ[2],DMAACK[2] PIO[13:12] - * SELTMR[2:0]:000 TIMER[1:0] - * SELCS:0,SELDMA[1]:0 PIO[11;10] SDCS_CE[7:6], - * DMAREQ[1],DMAACK[1] - * SELDMA[0]:1 DMAREQ[0],DMAACK[0] PIO[9:8] - * SELDMA[3]:1 DMAREQ[3],DMAACK[3] PIO[15:14] - * SELDONE:1 DMADONE PIO[7] - * - * Usable pins are: - * RXD[1;0],TXD[1:0],CTS[0],RTS[0], - * DMAREQ[0,2,3],DMAACK[0,2,3],DMADONE,PIO[0,10,11] - * INT[3:0] - */ - -void jmr3927_prom_init(void); -void jmr3927_irq_setup(void); -struct pci_dev; -int jmr3927_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin); - -#endif /* __ASM_TXX9_JMR3927_H */ diff --git a/arch/mips/include/asm/txx9/tx3927.h b/arch/mips/include/asm/txx9/tx3927.h deleted file mode 100644 index 149fab4f8327..000000000000 --- a/arch/mips/include/asm/txx9/tx3927.h +++ /dev/null @@ -1,341 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2000 Toshiba Corporation - */ -#ifndef __ASM_TXX9_TX3927_H -#define __ASM_TXX9_TX3927_H - -#define TX3927_REG_BASE 0xfffe0000UL -#define TX3927_REG_SIZE 0x00010000 -#define TX3927_SDRAMC_REG (TX3927_REG_BASE + 0x8000) -#define TX3927_ROMC_REG (TX3927_REG_BASE + 0x9000) -#define TX3927_DMA_REG (TX3927_REG_BASE + 0xb000) -#define TX3927_IRC_REG (TX3927_REG_BASE + 0xc000) -#define TX3927_PCIC_REG (TX3927_REG_BASE + 0xd000) -#define TX3927_CCFG_REG (TX3927_REG_BASE + 0xe000) -#define TX3927_NR_TMR 3 -#define TX3927_TMR_REG(ch) (TX3927_REG_BASE + 0xf000 + (ch) * 0x100) -#define TX3927_NR_SIO 2 -#define TX3927_SIO_REG(ch) (TX3927_REG_BASE + 0xf300 + (ch) * 0x100) -#define TX3927_PIO_REG (TX3927_REG_BASE + 0xf500) - -struct tx3927_sdramc_reg { - volatile unsigned long cr[8]; - volatile unsigned long tr[3]; - volatile unsigned long cmd; - volatile unsigned long smrs[2]; -}; - -struct tx3927_romc_reg { - volatile unsigned long cr[8]; -}; - -struct tx3927_dma_reg { - struct tx3927_dma_ch_reg { - volatile unsigned long cha; - volatile unsigned long sar; - volatile unsigned long dar; - volatile unsigned long cntr; - volatile unsigned long sair; - volatile unsigned long dair; - volatile unsigned long ccr; - volatile unsigned long csr; - } ch[4]; - volatile unsigned long dbr[8]; - volatile unsigned long tdhr; - volatile unsigned long mcr; - volatile unsigned long unused0; -}; - -#include - -#ifdef __BIG_ENDIAN -#define endian_def_s2(e1, e2) \ - volatile unsigned short e1, e2 -#define endian_def_sb2(e1, e2, e3) \ - volatile unsigned short e1;volatile unsigned char e2, e3 -#define endian_def_b2s(e1, e2, e3) \ - volatile unsigned char e1, e2;volatile unsigned short e3 -#define endian_def_b4(e1, e2, e3, e4) \ - volatile unsigned char e1, e2, e3, e4 -#else -#define endian_def_s2(e1, e2) \ - volatile unsigned short e2, e1 -#define endian_def_sb2(e1, e2, e3) \ - volatile unsigned char e3, e2;volatile unsigned short e1 -#define endian_def_b2s(e1, e2, e3) \ - volatile unsigned short e3;volatile unsigned char e2, e1 -#define endian_def_b4(e1, e2, e3, e4) \ - volatile unsigned char e4, e3, e2, e1 -#endif - -struct tx3927_pcic_reg { - endian_def_s2(did, vid); - endian_def_s2(pcistat, pcicmd); - endian_def_b4(cc, scc, rpli, rid); - endian_def_b4(unused0, ht, mlt, cls); - volatile unsigned long ioba; /* +10 */ - volatile unsigned long mba; - volatile unsigned long unused1[5]; - endian_def_s2(svid, ssvid); - volatile unsigned long unused2; /* +30 */ - endian_def_sb2(unused3, unused4, capptr); - volatile unsigned long unused5; - endian_def_b4(ml, mg, ip, il); - volatile unsigned long unused6; /* +40 */ - volatile unsigned long istat; - volatile unsigned long iim; - volatile unsigned long rrt; - volatile unsigned long unused7[3]; /* +50 */ - volatile unsigned long ipbmma; - volatile unsigned long ipbioma; /* +60 */ - volatile unsigned long ilbmma; - volatile unsigned long ilbioma; - volatile unsigned long unused8[9]; - volatile unsigned long tc; /* +90 */ - volatile unsigned long tstat; - volatile unsigned long tim; - volatile unsigned long tccmd; - volatile unsigned long pcirrt; /* +a0 */ - volatile unsigned long pcirrt_cmd; - volatile unsigned long pcirrdt; - volatile unsigned long unused9[3]; - volatile unsigned long tlboap; - volatile unsigned long tlbiap; - volatile unsigned long tlbmma; /* +c0 */ - volatile unsigned long tlbioma; - volatile unsigned long sc_msg; - volatile unsigned long sc_be; - volatile unsigned long tbl; /* +d0 */ - volatile unsigned long unused10[3]; - volatile unsigned long pwmng; /* +e0 */ - volatile unsigned long pwmngs; - volatile unsigned long unused11[6]; - volatile unsigned long req_trace; /* +100 */ - volatile unsigned long pbapmc; - volatile unsigned long pbapms; - volatile unsigned long pbapmim; - volatile unsigned long bm; /* +110 */ - volatile unsigned long cpcibrs; - volatile unsigned long cpcibgs; - volatile unsigned long pbacs; - volatile unsigned long iobas; /* +120 */ - volatile unsigned long mbas; - volatile unsigned long lbc; - volatile unsigned long lbstat; - volatile unsigned long lbim; /* +130 */ - volatile unsigned long pcistatim; - volatile unsigned long ica; - volatile unsigned long icd; - volatile unsigned long iiadp; /* +140 */ - volatile unsigned long iscdp; - volatile unsigned long mmas; - volatile unsigned long iomas; - volatile unsigned long ipciaddr; /* +150 */ - volatile unsigned long ipcidata; - volatile unsigned long ipcibe; -}; - -struct tx3927_ccfg_reg { - volatile unsigned long ccfg; - volatile unsigned long crir; - volatile unsigned long pcfg; - volatile unsigned long tear; - volatile unsigned long pdcr; -}; - -/* - * SDRAMC - */ - -/* - * ROMC - */ - -/* - * DMA - */ -/* bits for MCR */ -#define TX3927_DMA_MCR_EIS(ch) (0x10000000<<(ch)) -#define TX3927_DMA_MCR_DIS(ch) (0x01000000<<(ch)) -#define TX3927_DMA_MCR_RSFIF 0x00000080 -#define TX3927_DMA_MCR_FIFUM(ch) (0x00000008<<(ch)) -#define TX3927_DMA_MCR_LE 0x00000004 -#define TX3927_DMA_MCR_RPRT 0x00000002 -#define TX3927_DMA_MCR_MSTEN 0x00000001 - -/* bits for CCRn */ -#define TX3927_DMA_CCR_DBINH 0x04000000 -#define TX3927_DMA_CCR_SBINH 0x02000000 -#define TX3927_DMA_CCR_CHRST 0x01000000 -#define TX3927_DMA_CCR_RVBYTE 0x00800000 -#define TX3927_DMA_CCR_ACKPOL 0x00400000 -#define TX3927_DMA_CCR_REQPL 0x00200000 -#define TX3927_DMA_CCR_EGREQ 0x00100000 -#define TX3927_DMA_CCR_CHDN 0x00080000 -#define TX3927_DMA_CCR_DNCTL 0x00060000 -#define TX3927_DMA_CCR_EXTRQ 0x00010000 -#define TX3927_DMA_CCR_INTRQD 0x0000e000 -#define TX3927_DMA_CCR_INTENE 0x00001000 -#define TX3927_DMA_CCR_INTENC 0x00000800 -#define TX3927_DMA_CCR_INTENT 0x00000400 -#define TX3927_DMA_CCR_CHNEN 0x00000200 -#define TX3927_DMA_CCR_XFACT 0x00000100 -#define TX3927_DMA_CCR_SNOP 0x00000080 -#define TX3927_DMA_CCR_DSTINC 0x00000040 -#define TX3927_DMA_CCR_SRCINC 0x00000020 -#define TX3927_DMA_CCR_XFSZ(order) (((order) << 2) & 0x0000001c) -#define TX3927_DMA_CCR_XFSZ_1W TX3927_DMA_CCR_XFSZ(2) -#define TX3927_DMA_CCR_XFSZ_4W TX3927_DMA_CCR_XFSZ(4) -#define TX3927_DMA_CCR_XFSZ_8W TX3927_DMA_CCR_XFSZ(5) -#define TX3927_DMA_CCR_XFSZ_16W TX3927_DMA_CCR_XFSZ(6) -#define TX3927_DMA_CCR_XFSZ_32W TX3927_DMA_CCR_XFSZ(7) -#define TX3927_DMA_CCR_MEMIO 0x00000002 -#define TX3927_DMA_CCR_ONEAD 0x00000001 - -/* bits for CSRn */ -#define TX3927_DMA_CSR_CHNACT 0x00000100 -#define TX3927_DMA_CSR_ABCHC 0x00000080 -#define TX3927_DMA_CSR_NCHNC 0x00000040 -#define TX3927_DMA_CSR_NTRNFC 0x00000020 -#define TX3927_DMA_CSR_EXTDN 0x00000010 -#define TX3927_DMA_CSR_CFERR 0x00000008 -#define TX3927_DMA_CSR_CHERR 0x00000004 -#define TX3927_DMA_CSR_DESERR 0x00000002 -#define TX3927_DMA_CSR_SORERR 0x00000001 - -/* - * IRC - */ -#define TX3927_IR_INT0 0 -#define TX3927_IR_INT1 1 -#define TX3927_IR_INT2 2 -#define TX3927_IR_INT3 3 -#define TX3927_IR_INT4 4 -#define TX3927_IR_INT5 5 -#define TX3927_IR_SIO0 6 -#define TX3927_IR_SIO1 7 -#define TX3927_IR_SIO(ch) (6 + (ch)) -#define TX3927_IR_DMA 8 -#define TX3927_IR_PIO 9 -#define TX3927_IR_PCI 10 -#define TX3927_IR_TMR(ch) (13 + (ch)) -#define TX3927_NUM_IR 16 - -/* - * PCIC - */ -/* bits for PCICMD */ -/* see PCI_COMMAND_XXX in linux/pci.h */ - -/* bits for PCISTAT */ -/* see PCI_STATUS_XXX in linux/pci.h */ -#define PCI_STATUS_NEW_CAP 0x0010 - -/* bits for ISTAT/IIM */ -#define TX3927_PCIC_IIM_ALL 0x00001600 - -/* bits for TC */ -#define TX3927_PCIC_TC_OF16E 0x00000020 -#define TX3927_PCIC_TC_IF8E 0x00000010 -#define TX3927_PCIC_TC_OF8E 0x00000008 - -/* bits for TSTAT/TIM */ -#define TX3927_PCIC_TIM_ALL 0x0003ffff - -/* bits for IOBA/MBA */ -/* see PCI_BASE_ADDRESS_XXX in linux/pci.h */ - -/* bits for PBAPMC */ -#define TX3927_PCIC_PBAPMC_RPBA 0x00000004 -#define TX3927_PCIC_PBAPMC_PBAEN 0x00000002 -#define TX3927_PCIC_PBAPMC_BMCEN 0x00000001 - -/* bits for LBSTAT/LBIM */ -#define TX3927_PCIC_LBIM_ALL 0x0000003e - -/* bits for PCISTATIM (see also PCI_STATUS_XXX in linux/pci.h */ -#define TX3927_PCIC_PCISTATIM_ALL 0x0000f900 - -/* bits for LBC */ -#define TX3927_PCIC_LBC_IBSE 0x00004000 -#define TX3927_PCIC_LBC_TIBSE 0x00002000 -#define TX3927_PCIC_LBC_TMFBSE 0x00001000 -#define TX3927_PCIC_LBC_HRST 0x00000800 -#define TX3927_PCIC_LBC_SRST 0x00000400 -#define TX3927_PCIC_LBC_EPCAD 0x00000200 -#define TX3927_PCIC_LBC_MSDSE 0x00000100 -#define TX3927_PCIC_LBC_CRR 0x00000080 -#define TX3927_PCIC_LBC_ILMDE 0x00000040 -#define TX3927_PCIC_LBC_ILIDE 0x00000020 - -#define TX3927_PCIC_IDSEL_AD_TO_SLOT(ad) ((ad) - 11) -#define TX3927_PCIC_MAX_DEVNU TX3927_PCIC_IDSEL_AD_TO_SLOT(32) - -/* - * CCFG - */ -/* CCFG : Chip Configuration */ -#define TX3927_CCFG_TLBOFF 0x00020000 -#define TX3927_CCFG_BEOW 0x00010000 -#define TX3927_CCFG_WR 0x00008000 -#define TX3927_CCFG_TOE 0x00004000 -#define TX3927_CCFG_PCIXARB 0x00002000 -#define TX3927_CCFG_PCI3 0x00001000 -#define TX3927_CCFG_PSNP 0x00000800 -#define TX3927_CCFG_PPRI 0x00000400 -#define TX3927_CCFG_PLLM 0x00000030 -#define TX3927_CCFG_ENDIAN 0x00000004 -#define TX3927_CCFG_HALT 0x00000002 -#define TX3927_CCFG_ACEHOLD 0x00000001 - -/* PCFG : Pin Configuration */ -#define TX3927_PCFG_SYSCLKEN 0x08000000 -#define TX3927_PCFG_SDRCLKEN_ALL 0x07c00000 -#define TX3927_PCFG_SDRCLKEN(ch) (0x00400000<<(ch)) -#define TX3927_PCFG_PCICLKEN_ALL 0x003c0000 -#define TX3927_PCFG_PCICLKEN(ch) (0x00040000<<(ch)) -#define TX3927_PCFG_SELALL 0x0003ffff -#define TX3927_PCFG_SELCS 0x00020000 -#define TX3927_PCFG_SELDSF 0x00010000 -#define TX3927_PCFG_SELSIOC_ALL 0x0000c000 -#define TX3927_PCFG_SELSIOC(ch) (0x00004000<<(ch)) -#define TX3927_PCFG_SELSIO_ALL 0x00003000 -#define TX3927_PCFG_SELSIO(ch) (0x00001000<<(ch)) -#define TX3927_PCFG_SELTMR_ALL 0x00000e00 -#define TX3927_PCFG_SELTMR(ch) (0x00000200<<(ch)) -#define TX3927_PCFG_SELDONE 0x00000100 -#define TX3927_PCFG_INTDMA_ALL 0x000000f0 -#define TX3927_PCFG_INTDMA(ch) (0x00000010<<(ch)) -#define TX3927_PCFG_SELDMA_ALL 0x0000000f -#define TX3927_PCFG_SELDMA(ch) (0x00000001<<(ch)) - -#define tx3927_sdramcptr ((struct tx3927_sdramc_reg *)TX3927_SDRAMC_REG) -#define tx3927_romcptr ((struct tx3927_romc_reg *)TX3927_ROMC_REG) -#define tx3927_dmaptr ((struct tx3927_dma_reg *)TX3927_DMA_REG) -#define tx3927_pcicptr ((struct tx3927_pcic_reg *)TX3927_PCIC_REG) -#define tx3927_ccfgptr ((struct tx3927_ccfg_reg *)TX3927_CCFG_REG) -#define tx3927_sioptr(ch) ((struct txx927_sio_reg *)TX3927_SIO_REG(ch)) -#define tx3927_pioptr ((struct txx9_pio_reg __iomem *)TX3927_PIO_REG) - -#define TX3927_REV_PCODE() (tx3927_ccfgptr->crir >> 16) -#define TX3927_ROMC_BA(ch) (tx3927_romcptr->cr[(ch)] & 0xfff00000) -#define TX3927_ROMC_SIZE(ch) \ - (0x00100000 << ((tx3927_romcptr->cr[(ch)] >> 8) & 0xf)) -#define TX3927_ROMC_WIDTH(ch) (32 >> ((tx3927_romcptr->cr[(ch)] >> 7) & 0x1)) - -void tx3927_wdt_init(void); -void tx3927_setup(void); -void tx3927_time_init(unsigned int evt_tmrnr, unsigned int src_tmrnr); -void tx3927_sio_init(unsigned int sclk, unsigned int cts_mask); -struct pci_controller; -void tx3927_pcic_setup(struct pci_controller *channel, - unsigned long sdram_size, int extarb); -void tx3927_setup_pcierr_irq(void); -void tx3927_irq_init(void); -void tx3927_mtd_init(int ch); - -#endif /* __ASM_TXX9_TX3927_H */ diff --git a/arch/mips/include/asm/txx9irq.h b/arch/mips/include/asm/txx9irq.h index 68a6650a4025..3875243bb56b 100644 --- a/arch/mips/include/asm/txx9irq.h +++ b/arch/mips/include/asm/txx9irq.h @@ -21,11 +21,7 @@ #endif #endif -#ifdef CONFIG_CPU_TX39XX -#define TXx9_MAX_IR 16 -#else #define TXx9_MAX_IR 32 -#endif void txx9_irq_init(unsigned long baseaddr); int txx9_irq(void); diff --git a/arch/mips/include/asm/txx9tmr.h b/arch/mips/include/asm/txx9tmr.h index 466a3def3866..a051b411368e 100644 --- a/arch/mips/include/asm/txx9tmr.h +++ b/arch/mips/include/asm/txx9tmr.h @@ -58,10 +58,6 @@ void txx9_clockevent_init(unsigned long baseaddr, int irq, unsigned int imbusclk); void txx9_tmr_init(unsigned long baseaddr); -#ifdef CONFIG_CPU_TX39XX -#define TXX9_TIMER_BITS 24 -#else #define TXX9_TIMER_BITS 32 -#endif #endif /* __ASM_TXX9TMR_H */ diff --git a/arch/mips/include/asm/vermagic.h b/arch/mips/include/asm/vermagic.h index 0904de0b5e09..1c33922eb945 100644 --- a/arch/mips/include/asm/vermagic.h +++ b/arch/mips/include/asm/vermagic.h @@ -22,8 +22,6 @@ #define MODULE_PROC_FAMILY "MIPS64_R6 " #elif defined CONFIG_CPU_R3000 #define MODULE_PROC_FAMILY "R3000 " -#elif defined CONFIG_CPU_TX39XX -#define MODULE_PROC_FAMILY "TX39XX " #elif defined CONFIG_CPU_VR41XX #define MODULE_PROC_FAMILY "VR41XX " #elif defined CONFIG_CPU_R4300 diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile index 814b3da30501..7c96282bff2e 100644 --- a/arch/mips/kernel/Makefile +++ b/arch/mips/kernel/Makefile @@ -44,7 +44,6 @@ obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o sw-y := r4k_switch.o sw-$(CONFIG_CPU_R3000) := r2300_switch.o -sw-$(CONFIG_CPU_TX39XX) := r2300_switch.o sw-$(CONFIG_CPU_CAVIUM_OCTEON) := octeon_switch.o obj-y += $(sw-y) diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 24a529c6c4be..f0ea92937546 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -1189,29 +1189,6 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) c->tlbsize = 48; break; #endif - case PRID_IMP_TX39: - c->fpu_msk31 |= FPU_CSR_CONDX | FPU_CSR_FS; - c->options = MIPS_CPU_TLB | MIPS_CPU_TX39_CACHE; - - if ((c->processor_id & 0xf0) == (PRID_REV_TX3927 & 0xf0)) { - c->cputype = CPU_TX3927; - __cpu_name[cpu] = "TX3927"; - c->tlbsize = 64; - } else { - switch (c->processor_id & PRID_REV_MASK) { - case PRID_REV_TX3912: - c->cputype = CPU_TX3912; - __cpu_name[cpu] = "TX3912"; - c->tlbsize = 32; - break; - case PRID_REV_TX3922: - c->cputype = CPU_TX3922; - __cpu_name[cpu] = "TX3922"; - c->tlbsize = 64; - break; - } - } - break; case PRID_IMP_R4700: c->cputype = CPU_R4700; __cpu_name[cpu] = "R4700"; diff --git a/arch/mips/kernel/cpu-r3k-probe.c b/arch/mips/kernel/cpu-r3k-probe.c index af654771918c..be93469c0e0e 100644 --- a/arch/mips/kernel/cpu-r3k-probe.c +++ b/arch/mips/kernel/cpu-r3k-probe.c @@ -118,28 +118,6 @@ void cpu_probe(void) c->options |= MIPS_CPU_FPU; c->tlbsize = 64; break; - case PRID_COMP_LEGACY | PRID_IMP_TX39: - c->options = MIPS_CPU_TLB | MIPS_CPU_TX39_CACHE; - - if ((c->processor_id & 0xf0) == (PRID_REV_TX3927 & 0xf0)) { - c->cputype = CPU_TX3927; - __cpu_name[cpu] = "TX3927"; - c->tlbsize = 64; - } else { - switch (c->processor_id & PRID_REV_MASK) { - case PRID_REV_TX3912: - c->cputype = CPU_TX3912; - __cpu_name[cpu] = "TX3912"; - c->tlbsize = 32; - break; - case PRID_REV_TX3922: - c->cputype = CPU_TX3922; - __cpu_name[cpu] = "TX3922"; - c->tlbsize = 64; - break; - } - } - break; } BUG_ON(!__cpu_name[cpu]); diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S index d8ca173680f9..891393626dc6 100644 --- a/arch/mips/kernel/entry.S +++ b/arch/mips/kernel/entry.S @@ -100,7 +100,7 @@ restore_partial: # restore partial frame SAVE_AT SAVE_TEMP LONG_L v0, PT_STATUS(sp) -#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) +#if defined(CONFIG_CPU_R3000) and v0, ST0_IEP #else and v0, ST0_IE diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index fc53ea2cf850..3425df6019c0 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -162,7 +162,7 @@ NESTED(handle_int, PT_SIZE, sp) .set push .set noat mfc0 k0, CP0_STATUS -#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) +#if defined(CONFIG_CPU_R3000) and k0, ST0_IEP bnez k0, 1f @@ -644,7 +644,7 @@ isrdhwr: get_saved_sp /* k1 := current_thread_info */ .set noreorder MFC0 k0, CP0_EPC -#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) +#if defined(CONFIG_CPU_R3000) ori k1, _THREAD_MASK xori k1, _THREAD_MASK LONG_L v1, TI_TP_VALUE(k1) diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c index c81b3a039470..146d9fa77f75 100644 --- a/arch/mips/kernel/idle.c +++ b/arch/mips/kernel/idle.c @@ -36,13 +36,6 @@ static void __cpuidle r3081_wait(void) raw_local_irq_enable(); } -static void __cpuidle r39xx_wait(void) -{ - if (!need_resched()) - write_c0_conf(read_c0_conf() | TX39_CONF_HALT); - raw_local_irq_enable(); -} - void __cpuidle r4k_wait(void) { raw_local_irq_enable(); @@ -147,9 +140,6 @@ void __init check_wait(void) case CPU_R3081E: cpu_wait = r3081_wait; break; - case CPU_TX3927: - cpu_wait = r39xx_wait; - break; case CPU_R4200: /* case CPU_R4300: */ case CPU_R4600: diff --git a/arch/mips/kernel/irq_txx9.c b/arch/mips/kernel/irq_txx9.c index ab00e490482f..af3ef4c9f7de 100644 --- a/arch/mips/kernel/irq_txx9.c +++ b/arch/mips/kernel/irq_txx9.c @@ -72,11 +72,6 @@ static void txx9_irq_unmask(struct irq_data *d) __raw_writel((__raw_readl(ilrp) & ~(0xff << ofs)) | (txx9irq[irq_nr].level << ofs), ilrp); -#ifdef CONFIG_CPU_TX39XX - /* update IRCSR */ - __raw_writel(0, &txx9_ircptr->imr); - __raw_writel(irc_elevel, &txx9_ircptr->imr); -#endif } static inline void txx9_irq_mask(struct irq_data *d) @@ -88,15 +83,7 @@ static inline void txx9_irq_mask(struct irq_data *d) __raw_writel((__raw_readl(ilrp) & ~(0xff << ofs)) | (irc_dlevel << ofs), ilrp); -#ifdef CONFIG_CPU_TX39XX - /* update IRCSR */ - __raw_writel(0, &txx9_ircptr->imr); - __raw_writel(irc_elevel, &txx9_ircptr->imr); - /* flush write buffer */ - __raw_readl(&txx9_ircptr->ssr); -#else mmiowb(); -#endif } static void txx9_irq_mask_ack(struct irq_data *d) diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c index 9f47a889b047..bb43bf850314 100644 --- a/arch/mips/kernel/proc.c +++ b/arch/mips/kernel/proc.c @@ -181,8 +181,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_puts(m, " 3k_cache"); if (cpu_has_4k_cache) seq_puts(m, " 4k_cache"); - if (cpu_has_tx39_cache) - seq_puts(m, " tx39_cache"); if (cpu_has_octeon_cache) seq_puts(m, " octeon_cache"); if (raw_cpu_has_fpu) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index cbff1b974f88..c2d5f4bfe1f3 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -128,7 +128,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, p->thread.reg17 = kthread_arg; p->thread.reg29 = childksp; p->thread.reg31 = (unsigned long) ret_from_kernel_thread; -#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) +#if defined(CONFIG_CPU_R3000) status = (status & ~(ST0_KUP | ST0_IEP | ST0_IEC)) | ((status & (ST0_KUC | ST0_IEC)) << 2); #else diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile index 479f50559c83..5d5b993cbc2b 100644 --- a/arch/mips/lib/Makefile +++ b/arch/mips/lib/Makefile @@ -13,7 +13,6 @@ lib-$(CONFIG_GENERIC_CSUM) := $(filter-out csum_partial.o, $(lib-y)) obj-$(CONFIG_CPU_GENERIC_DUMP_TLB) += dump_tlb.o obj-$(CONFIG_CPU_R3000) += r3k_dump_tlb.o -obj-$(CONFIG_CPU_TX39XX) += r3k_dump_tlb.o # libgcc-style stuff needed in the kernel obj-y += bswapsi.o bswapdi.o multi3.o diff --git a/arch/mips/lib/r3k_dump_tlb.c b/arch/mips/lib/r3k_dump_tlb.c index 10b4bf7f70a3..fcf594af0002 100644 --- a/arch/mips/lib/r3k_dump_tlb.c +++ b/arch/mips/lib/r3k_dump_tlb.c @@ -14,15 +14,11 @@ #include #include -extern int r3k_have_wired_reg; - void dump_tlb_regs(void) { pr_info("Index : %0x\n", read_c0_index()); pr_info("EntryHi : %0lx\n", read_c0_entryhi()); pr_info("EntryLo : %0lx\n", read_c0_entrylo0()); - if (r3k_have_wired_reg) - pr_info("Wired : %0x\n", read_c0_wired()); } static void dump_tlb(int first, int last) diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile index 4acc4f3d31f8..304692391519 100644 --- a/arch/mips/mm/Makefile +++ b/arch/mips/mm/Makefile @@ -36,7 +36,6 @@ obj-$(CONFIG_CPU_R3K_TLB) += tlb-r3k.o obj-$(CONFIG_CPU_R4K_CACHE_TLB) += c-r4k.o cex-gen.o tlb-r4k.o obj-$(CONFIG_CPU_R3000) += c-r3k.o obj-$(CONFIG_CPU_SB1) += c-r4k.o cerr-sb1.o cex-sb1.o tlb-r4k.o -obj-$(CONFIG_CPU_TX39XX) += c-tx39.o obj-$(CONFIG_CPU_CAVIUM_OCTEON) += c-octeon.o cex-oct.o tlb-r4k.o obj-$(CONFIG_IP22_CPU_SCACHE) += sc-ip22.o diff --git a/arch/mips/mm/c-tx39.c b/arch/mips/mm/c-tx39.c deleted file mode 100644 index 03dfbb40ec73..000000000000 --- a/arch/mips/mm/c-tx39.c +++ /dev/null @@ -1,414 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * r2300.c: R2000 and R3000 specific mmu/cache code. - * - * Copyright (C) 1996 David S. Miller (davem@davemloft.net) - * - * with a lot of changes to make this thing work for R3000s - * Tx39XX R4k style caches added. HK - * Copyright (C) 1998, 1999, 2000 Harald Koerfgen - * Copyright (C) 1998 Gleb Raiko & Vladimir Roganov - */ -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -/* For R3000 cores with R4000 style caches */ -static unsigned long icache_size, dcache_size; /* Size in bytes */ - -#include - -/* This sequence is required to ensure icache is disabled immediately */ -#define TX39_STOP_STREAMING() \ -__asm__ __volatile__( \ - ".set push\n\t" \ - ".set noreorder\n\t" \ - "b 1f\n\t" \ - "nop\n\t" \ - "1:\n\t" \ - ".set pop" \ - ) - -/* TX39H-style cache flush routines. */ -static void tx39h_flush_icache_all(void) -{ - unsigned long flags, config; - - /* disable icache (set ICE#) */ - local_irq_save(flags); - config = read_c0_conf(); - write_c0_conf(config & ~TX39_CONF_ICE); - TX39_STOP_STREAMING(); - blast_icache16(); - write_c0_conf(config); - local_irq_restore(flags); -} - -static void tx39h_dma_cache_wback_inv(unsigned long addr, unsigned long size) -{ - /* Catch bad driver code */ - BUG_ON(size == 0); - - iob(); - blast_inv_dcache_range(addr, addr + size); -} - - -/* TX39H2,TX39H3 */ -static inline void tx39_blast_dcache_page(unsigned long addr) -{ - if (current_cpu_type() != CPU_TX3912) - blast_dcache16_page(addr); -} - -static inline void tx39_blast_dcache_page_indexed(unsigned long addr) -{ - blast_dcache16_page_indexed(addr); -} - -static inline void tx39_blast_dcache(void) -{ - blast_dcache16(); -} - -static inline void tx39_blast_icache_page(unsigned long addr) -{ - unsigned long flags, config; - /* disable icache (set ICE#) */ - local_irq_save(flags); - config = read_c0_conf(); - write_c0_conf(config & ~TX39_CONF_ICE); - TX39_STOP_STREAMING(); - blast_icache16_page(addr); - write_c0_conf(config); - local_irq_restore(flags); -} - -static inline void tx39_blast_icache_page_indexed(unsigned long addr) -{ - unsigned long flags, config; - /* disable icache (set ICE#) */ - local_irq_save(flags); - config = read_c0_conf(); - write_c0_conf(config & ~TX39_CONF_ICE); - TX39_STOP_STREAMING(); - blast_icache16_page_indexed(addr); - write_c0_conf(config); - local_irq_restore(flags); -} - -static inline void tx39_blast_icache(void) -{ - unsigned long flags, config; - /* disable icache (set ICE#) */ - local_irq_save(flags); - config = read_c0_conf(); - write_c0_conf(config & ~TX39_CONF_ICE); - TX39_STOP_STREAMING(); - blast_icache16(); - write_c0_conf(config); - local_irq_restore(flags); -} - -static void tx39__flush_cache_vmap(void) -{ - tx39_blast_dcache(); -} - -static void tx39__flush_cache_vunmap(void) -{ - tx39_blast_dcache(); -} - -static inline void tx39_flush_cache_all(void) -{ - if (!cpu_has_dc_aliases) - return; - - tx39_blast_dcache(); -} - -static inline void tx39___flush_cache_all(void) -{ - tx39_blast_dcache(); - tx39_blast_icache(); -} - -static void tx39_flush_cache_mm(struct mm_struct *mm) -{ - if (!cpu_has_dc_aliases) - return; - - if (cpu_context(smp_processor_id(), mm) != 0) - tx39_blast_dcache(); -} - -static void tx39_flush_cache_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - if (!cpu_has_dc_aliases) - return; - if (!(cpu_context(smp_processor_id(), vma->vm_mm))) - return; - - tx39_blast_dcache(); -} - -static void tx39_flush_cache_page(struct vm_area_struct *vma, unsigned long page, unsigned long pfn) -{ - int exec = vma->vm_flags & VM_EXEC; - struct mm_struct *mm = vma->vm_mm; - pmd_t *pmdp; - pte_t *ptep; - - /* - * If ownes no valid ASID yet, cannot possibly have gotten - * this page into the cache. - */ - if (cpu_context(smp_processor_id(), mm) == 0) - return; - - page &= PAGE_MASK; - pmdp = pmd_off(mm, page); - ptep = pte_offset_kernel(pmdp, page); - - /* - * If the page isn't marked valid, the page cannot possibly be - * in the cache. - */ - if (!(pte_val(*ptep) & _PAGE_PRESENT)) - return; - - /* - * Doing flushes for another ASID than the current one is - * too difficult since stupid R4k caches do a TLB translation - * for every cache flush operation. So we do indexed flushes - * in that case, which doesn't overly flush the cache too much. - */ - if ((mm == current->active_mm) && (pte_val(*ptep) & _PAGE_VALID)) { - if (cpu_has_dc_aliases || exec) - tx39_blast_dcache_page(page); - if (exec) - tx39_blast_icache_page(page); - - return; - } - - /* - * Do indexed flush, too much work to get the (possible) TLB refills - * to work correctly. - */ - if (cpu_has_dc_aliases || exec) - tx39_blast_dcache_page_indexed(page); - if (exec) - tx39_blast_icache_page_indexed(page); -} - -static void local_tx39_flush_data_cache_page(void * addr) -{ - tx39_blast_dcache_page((unsigned long)addr); -} - -static void tx39_flush_data_cache_page(unsigned long addr) -{ - tx39_blast_dcache_page(addr); -} - -static void tx39_flush_icache_range(unsigned long start, unsigned long end) -{ - if (end - start > dcache_size) - tx39_blast_dcache(); - else - protected_blast_dcache_range(start, end); - - if (end - start > icache_size) - tx39_blast_icache(); - else { - unsigned long flags, config; - /* disable icache (set ICE#) */ - local_irq_save(flags); - config = read_c0_conf(); - write_c0_conf(config & ~TX39_CONF_ICE); - TX39_STOP_STREAMING(); - protected_blast_icache_range(start, end); - write_c0_conf(config); - local_irq_restore(flags); - } -} - -static void tx39_flush_kernel_vmap_range(unsigned long vaddr, int size) -{ - BUG(); -} - -static void tx39_dma_cache_wback_inv(unsigned long addr, unsigned long size) -{ - unsigned long end; - - if (((size | addr) & (PAGE_SIZE - 1)) == 0) { - end = addr + size; - do { - tx39_blast_dcache_page(addr); - addr += PAGE_SIZE; - } while(addr != end); - } else if (size > dcache_size) { - tx39_blast_dcache(); - } else { - blast_dcache_range(addr, addr + size); - } -} - -static void tx39_dma_cache_inv(unsigned long addr, unsigned long size) -{ - unsigned long end; - - if (((size | addr) & (PAGE_SIZE - 1)) == 0) { - end = addr + size; - do { - tx39_blast_dcache_page(addr); - addr += PAGE_SIZE; - } while(addr != end); - } else if (size > dcache_size) { - tx39_blast_dcache(); - } else { - blast_inv_dcache_range(addr, addr + size); - } -} - -static __init void tx39_probe_cache(void) -{ - unsigned long config; - - config = read_c0_conf(); - - icache_size = 1 << (10 + ((config & TX39_CONF_ICS_MASK) >> - TX39_CONF_ICS_SHIFT)); - dcache_size = 1 << (10 + ((config & TX39_CONF_DCS_MASK) >> - TX39_CONF_DCS_SHIFT)); - - current_cpu_data.icache.linesz = 16; - switch (current_cpu_type()) { - case CPU_TX3912: - current_cpu_data.icache.ways = 1; - current_cpu_data.dcache.ways = 1; - current_cpu_data.dcache.linesz = 4; - break; - - case CPU_TX3927: - current_cpu_data.icache.ways = 2; - current_cpu_data.dcache.ways = 2; - current_cpu_data.dcache.linesz = 16; - break; - - case CPU_TX3922: - default: - current_cpu_data.icache.ways = 1; - current_cpu_data.dcache.ways = 1; - current_cpu_data.dcache.linesz = 16; - break; - } -} - -void tx39_cache_init(void) -{ - extern void build_clear_page(void); - extern void build_copy_page(void); - unsigned long config; - - config = read_c0_conf(); - config &= ~TX39_CONF_WBON; - write_c0_conf(config); - - tx39_probe_cache(); - - switch (current_cpu_type()) { - case CPU_TX3912: - /* TX39/H core (writethru direct-map cache) */ - __flush_cache_vmap = tx39__flush_cache_vmap; - __flush_cache_vunmap = tx39__flush_cache_vunmap; - flush_cache_all = tx39h_flush_icache_all; - __flush_cache_all = tx39h_flush_icache_all; - flush_cache_mm = (void *) tx39h_flush_icache_all; - flush_cache_range = (void *) tx39h_flush_icache_all; - flush_cache_page = (void *) tx39h_flush_icache_all; - flush_icache_range = (void *) tx39h_flush_icache_all; - local_flush_icache_range = (void *) tx39h_flush_icache_all; - - local_flush_data_cache_page = (void *) tx39h_flush_icache_all; - flush_data_cache_page = (void *) tx39h_flush_icache_all; - - _dma_cache_wback_inv = tx39h_dma_cache_wback_inv; - - shm_align_mask = PAGE_SIZE - 1; - - break; - - case CPU_TX3922: - case CPU_TX3927: - default: - /* TX39/H2,H3 core (writeback 2way-set-associative cache) */ - /* board-dependent init code may set WBON */ - - __flush_cache_vmap = tx39__flush_cache_vmap; - __flush_cache_vunmap = tx39__flush_cache_vunmap; - - flush_cache_all = tx39_flush_cache_all; - __flush_cache_all = tx39___flush_cache_all; - flush_cache_mm = tx39_flush_cache_mm; - flush_cache_range = tx39_flush_cache_range; - flush_cache_page = tx39_flush_cache_page; - flush_icache_range = tx39_flush_icache_range; - local_flush_icache_range = tx39_flush_icache_range; - - __flush_kernel_vmap_range = tx39_flush_kernel_vmap_range; - - local_flush_data_cache_page = local_tx39_flush_data_cache_page; - flush_data_cache_page = tx39_flush_data_cache_page; - - _dma_cache_wback_inv = tx39_dma_cache_wback_inv; - _dma_cache_wback = tx39_dma_cache_wback_inv; - _dma_cache_inv = tx39_dma_cache_inv; - - shm_align_mask = max_t(unsigned long, - (dcache_size / current_cpu_data.dcache.ways) - 1, - PAGE_SIZE - 1); - - break; - } - - __flush_icache_user_range = flush_icache_range; - __local_flush_icache_user_range = local_flush_icache_range; - - current_cpu_data.icache.waysize = icache_size / current_cpu_data.icache.ways; - current_cpu_data.dcache.waysize = dcache_size / current_cpu_data.dcache.ways; - - current_cpu_data.icache.sets = - current_cpu_data.icache.waysize / current_cpu_data.icache.linesz; - current_cpu_data.dcache.sets = - current_cpu_data.dcache.waysize / current_cpu_data.dcache.linesz; - - if (current_cpu_data.dcache.waysize > PAGE_SIZE) - current_cpu_data.dcache.flags |= MIPS_CACHE_ALIASES; - - current_cpu_data.icache.waybit = 0; - current_cpu_data.dcache.waybit = 0; - - pr_info("Primary instruction cache %ldkB, linesize %d bytes\n", - icache_size >> 10, current_cpu_data.icache.linesz); - pr_info("Primary data cache %ldkB, linesize %d bytes\n", - dcache_size >> 10, current_cpu_data.dcache.linesz); - - build_clear_page(); - build_copy_page(); - tx39h_flush_icache_all(); -} diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c index 830ab91e574f..7be7240f7703 100644 --- a/arch/mips/mm/cache.c +++ b/arch/mips/mm/cache.c @@ -195,11 +195,6 @@ void cpu_cache_init(void) r4k_cache_init(); } - if (cpu_has_tx39_cache) { - extern void __weak tx39_cache_init(void); - - tx39_cache_init(); - } if (cpu_has_octeon_cache) { extern void __weak octeon_cache_init(void); diff --git a/arch/mips/mm/tlb-r3k.c b/arch/mips/mm/tlb-r3k.c index a36622ebea55..53dfa2b9316b 100644 --- a/arch/mips/mm/tlb-r3k.c +++ b/arch/mips/mm/tlb-r3k.c @@ -36,8 +36,6 @@ extern void build_tlb_refill_handler(void); "nop\n\t" \ ".set pop\n\t") -int r3k_have_wired_reg; /* Should be in cpu_data? */ - /* TLB operations. */ static void local_flush_tlb_from(int entry) { @@ -62,7 +60,7 @@ void local_flush_tlb_all(void) printk("[tlball]"); #endif local_irq_save(flags); - local_flush_tlb_from(r3k_have_wired_reg ? read_c0_wired() : 8); + local_flush_tlb_from(8); local_irq_restore(flags); } @@ -224,34 +222,7 @@ void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, unsigned long old_ctx; static unsigned long wired = 0; - if (r3k_have_wired_reg) { /* TX39XX */ - unsigned long old_pagemask; - unsigned long w; - -#ifdef DEBUG_TLB - printk("[tlbwired]\n", - entrylo0, entryhi, pagemask); -#endif - - local_irq_save(flags); - /* Save old context and create impossible VPN2 value */ - old_ctx = read_c0_entryhi() & asid_mask; - old_pagemask = read_c0_pagemask(); - w = read_c0_wired(); - write_c0_wired(w + 1); - write_c0_index(w << 8); - write_c0_pagemask(pagemask); - write_c0_entryhi(entryhi); - write_c0_entrylo0(entrylo0); - BARRIER; - tlb_write_indexed(); - - write_c0_entryhi(old_ctx); - write_c0_pagemask(old_pagemask); - local_flush_tlb_all(); - local_irq_restore(flags); - - } else if (wired < 8) { + if (wired < 8) { #ifdef DEBUG_TLB printk("[tlbwired]\n", entrylo0, entryhi); @@ -272,13 +243,6 @@ void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, void tlb_init(void) { - switch (current_cpu_type()) { - case CPU_TX3922: - case CPU_TX3927: - r3k_have_wired_reg = 1; - write_c0_wired(0); /* Set to 8 on reset... */ - break; - } local_flush_tlb_from(0); build_tlb_refill_handler(); } diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile index 9a6bc702608c..ed0388485a15 100644 --- a/arch/mips/pci/Makefile +++ b/arch/mips/pci/Makefile @@ -13,7 +13,6 @@ obj-$(CONFIG_PCI_DRIVERS_GENERIC)+= pci-generic.o obj-$(CONFIG_MIPS_BONITO64) += ops-bonito64.o obj-$(CONFIG_PCI_GT64XXX_PCI0) += ops-gt64xxx_pci0.o obj-$(CONFIG_MIPS_MSC) += ops-msc.o -obj-$(CONFIG_SOC_TX3927) += ops-tx3927.o obj-$(CONFIG_PCI_VR41XX) += ops-vr41xx.o pci-vr41xx.o obj-$(CONFIG_PCI_TX4927) += ops-tx4927.o obj-$(CONFIG_BCM47XX) += pci-bcm47xx.o @@ -46,7 +45,6 @@ obj-$(CONFIG_SOC_RT3883) += pci-rt3883.o obj-$(CONFIG_TANBAC_TB0219) += fixup-tb0219.o obj-$(CONFIG_TANBAC_TB0226) += fixup-tb0226.o obj-$(CONFIG_TANBAC_TB0287) += fixup-tb0287.o -obj-$(CONFIG_TOSHIBA_JMR3927) += fixup-jmr3927.o obj-$(CONFIG_SOC_TX4927) += pci-tx4927.o obj-$(CONFIG_SOC_TX4938) += pci-tx4938.o obj-$(CONFIG_TOSHIBA_RBTX4927) += fixup-rbtx4927.o diff --git a/arch/mips/pci/fixup-jmr3927.c b/arch/mips/pci/fixup-jmr3927.c deleted file mode 100644 index d3102eeea898..000000000000 --- a/arch/mips/pci/fixup-jmr3927.c +++ /dev/null @@ -1,79 +0,0 @@ -/* - * - * BRIEF MODULE DESCRIPTION - * Board specific pci fixups. - * - * Copyright 2001 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * ppopov@mvista.com or source@mvista.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#include -#include -#include - -int jmr3927_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) -{ - unsigned char irq = pin; - - /* IRQ rotation (PICMG) */ - irq--; /* 0-3 */ - if (slot == TX3927_PCIC_IDSEL_AD_TO_SLOT(23)) { - /* PCI CardSlot (IDSEL=A23, DevNu=12) */ - /* PCIA => PCIC (IDSEL=A23) */ - /* NOTE: JMR3927 JP1 must be set to OPEN */ - irq = (irq + 2) % 4; - } else if (slot == TX3927_PCIC_IDSEL_AD_TO_SLOT(22)) { - /* PCI CardSlot (IDSEL=A22, DevNu=11) */ - /* PCIA => PCIA (IDSEL=A22) */ - /* NOTE: JMR3927 JP1 must be set to OPEN */ - irq = (irq + 0) % 4; - } else { - /* PCI Backplane */ - if (txx9_pci_option & TXX9_PCI_OPT_PICMG) - irq = (irq + 33 - slot) % 4; - else - irq = (irq + 3 + slot) % 4; - } - irq++; /* 1-4 */ - - switch (irq) { - case 1: - irq = JMR3927_IRQ_IOC_PCIA; - break; - case 2: - irq = JMR3927_IRQ_IOC_PCIB; - break; - case 3: - irq = JMR3927_IRQ_IOC_PCIC; - break; - case 4: - irq = JMR3927_IRQ_IOC_PCID; - break; - } - - /* Check OnBoard Ethernet (IDSEL=A24, DevNu=13) */ - if (dev->bus->parent == NULL && - slot == TX3927_PCIC_IDSEL_AD_TO_SLOT(24)) - irq = JMR3927_IRQ_ETHER0; - return irq; -} diff --git a/arch/mips/pci/ops-tx3927.c b/arch/mips/pci/ops-tx3927.c deleted file mode 100644 index d35dc9c9ab9d..000000000000 --- a/arch/mips/pci/ops-tx3927.c +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Copyright 2001 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * ahennessy@mvista.com - * - * Copyright (C) 2000-2001 Toshiba Corporation - * Copyright (C) 2004 by Ralf Baechle (ralf@linux-mips.org) - * - * Based on arch/mips/ddb5xxx/ddb5477/pci_ops.c - * - * Define the pci_ops for TX3927. - * - * Much of the code is derived from the original DDB5074 port by - * Geert Uytterhoeven - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -static int mkaddr(struct pci_bus *bus, unsigned char devfn, unsigned char where) -{ - if (bus->parent == NULL && - devfn >= PCI_DEVFN(TX3927_PCIC_MAX_DEVNU, 0)) - return -1; - tx3927_pcicptr->ica = - ((bus->number & 0xff) << 0x10) | - ((devfn & 0xff) << 0x08) | - (where & 0xfc) | (bus->parent ? 1 : 0); - - /* clear M_ABORT and Disable M_ABORT Int. */ - tx3927_pcicptr->pcistat |= PCI_STATUS_REC_MASTER_ABORT; - tx3927_pcicptr->pcistatim &= ~PCI_STATUS_REC_MASTER_ABORT; - return 0; -} - -static inline int check_abort(void) -{ - if (tx3927_pcicptr->pcistat & PCI_STATUS_REC_MASTER_ABORT) { - tx3927_pcicptr->pcistat |= PCI_STATUS_REC_MASTER_ABORT; - tx3927_pcicptr->pcistatim |= PCI_STATUS_REC_MASTER_ABORT; - /* flush write buffer */ - iob(); - return PCIBIOS_DEVICE_NOT_FOUND; - } - return PCIBIOS_SUCCESSFUL; -} - -static int tx3927_pci_read_config(struct pci_bus *bus, unsigned int devfn, - int where, int size, u32 * val) -{ - if (mkaddr(bus, devfn, where)) { - *val = 0xffffffff; - return PCIBIOS_DEVICE_NOT_FOUND; - } - - switch (size) { - case 1: - *val = *(volatile u8 *) ((unsigned long) & tx3927_pcicptr->icd | (where & 3)); - break; - - case 2: - *val = le16_to_cpu(*(volatile u16 *) ((unsigned long) & tx3927_pcicptr->icd | (where & 3))); - break; - - case 4: - *val = le32_to_cpu(tx3927_pcicptr->icd); - break; - } - - return check_abort(); -} - -static int tx3927_pci_write_config(struct pci_bus *bus, unsigned int devfn, - int where, int size, u32 val) -{ - if (mkaddr(bus, devfn, where)) - return PCIBIOS_DEVICE_NOT_FOUND; - - switch (size) { - case 1: - *(volatile u8 *) ((unsigned long) & tx3927_pcicptr->icd | (where & 3)) = val; - break; - - case 2: - *(volatile u16 *) ((unsigned long) & tx3927_pcicptr->icd | (where & 2)) = - cpu_to_le16(val); - break; - - case 4: - tx3927_pcicptr->icd = cpu_to_le32(val); - } - - return check_abort(); -} - -static struct pci_ops tx3927_pci_ops = { - .read = tx3927_pci_read_config, - .write = tx3927_pci_write_config, -}; - -void __init tx3927_pcic_setup(struct pci_controller *channel, - unsigned long sdram_size, int extarb) -{ - unsigned long flags; - unsigned long io_base = - channel->io_resource->start + mips_io_port_base - IO_BASE; - unsigned long io_size = - channel->io_resource->end - channel->io_resource->start; - unsigned long io_pciaddr = - channel->io_resource->start - channel->io_offset; - unsigned long mem_base = - channel->mem_resource->start; - unsigned long mem_size = - channel->mem_resource->end - channel->mem_resource->start; - unsigned long mem_pciaddr = - channel->mem_resource->start - channel->mem_offset; - - printk(KERN_INFO "TX3927 PCIC -- DID:%04x VID:%04x RID:%02x Arbiter:%s", - tx3927_pcicptr->did, tx3927_pcicptr->vid, - tx3927_pcicptr->rid, - extarb ? "External" : "Internal"); - channel->pci_ops = &tx3927_pci_ops; - - local_irq_save(flags); - /* Disable External PCI Config. Access */ - tx3927_pcicptr->lbc = TX3927_PCIC_LBC_EPCAD; -#ifdef __BIG_ENDIAN - tx3927_pcicptr->lbc |= TX3927_PCIC_LBC_IBSE | - TX3927_PCIC_LBC_TIBSE | - TX3927_PCIC_LBC_TMFBSE | TX3927_PCIC_LBC_MSDSE; -#endif - /* LB->PCI mappings */ - tx3927_pcicptr->iomas = ~(io_size - 1); - tx3927_pcicptr->ilbioma = io_base; - tx3927_pcicptr->ipbioma = io_pciaddr; - tx3927_pcicptr->mmas = ~(mem_size - 1); - tx3927_pcicptr->ilbmma = mem_base; - tx3927_pcicptr->ipbmma = mem_pciaddr; - /* PCI->LB mappings */ - tx3927_pcicptr->iobas = 0xffffffff; - tx3927_pcicptr->ioba = 0; - tx3927_pcicptr->tlbioma = 0; - tx3927_pcicptr->mbas = ~(sdram_size - 1); - tx3927_pcicptr->mba = 0; - tx3927_pcicptr->tlbmma = 0; - /* Enable Direct mapping Address Space Decoder */ - tx3927_pcicptr->lbc |= TX3927_PCIC_LBC_ILMDE | TX3927_PCIC_LBC_ILIDE; - - /* Clear All Local Bus Status */ - tx3927_pcicptr->lbstat = TX3927_PCIC_LBIM_ALL; - /* Enable All Local Bus Interrupts */ - tx3927_pcicptr->lbim = TX3927_PCIC_LBIM_ALL; - /* Clear All PCI Status Error */ - tx3927_pcicptr->pcistat = TX3927_PCIC_PCISTATIM_ALL; - /* Enable All PCI Status Error Interrupts */ - tx3927_pcicptr->pcistatim = TX3927_PCIC_PCISTATIM_ALL; - - /* PCIC Int => IRC IRQ10 */ - tx3927_pcicptr->il = TX3927_IR_PCI; - /* Target Control (per errata) */ - tx3927_pcicptr->tc = TX3927_PCIC_TC_OF8E | TX3927_PCIC_TC_IF8E; - - /* Enable Bus Arbiter */ - if (!extarb) - tx3927_pcicptr->pbapmc = TX3927_PCIC_PBAPMC_PBAEN; - - tx3927_pcicptr->pcicmd = PCI_COMMAND_MASTER | - PCI_COMMAND_MEMORY | - PCI_COMMAND_IO | - PCI_COMMAND_PARITY | PCI_COMMAND_SERR; - local_irq_restore(flags); -} - -static irqreturn_t tx3927_pcierr_interrupt(int irq, void *dev_id) -{ - struct pt_regs *regs = get_irq_regs(); - - if (txx9_pci_err_action != TXX9_PCI_ERR_IGNORE) { - printk(KERN_WARNING "PCI error interrupt at 0x%08lx.\n", - regs->cp0_epc); - printk(KERN_WARNING "pcistat:%02x, lbstat:%04lx\n", - tx3927_pcicptr->pcistat, tx3927_pcicptr->lbstat); - } - if (txx9_pci_err_action != TXX9_PCI_ERR_PANIC) { - /* clear all pci errors */ - tx3927_pcicptr->pcistat |= TX3927_PCIC_PCISTATIM_ALL; - tx3927_pcicptr->istat = TX3927_PCIC_IIM_ALL; - tx3927_pcicptr->tstat = TX3927_PCIC_TIM_ALL; - tx3927_pcicptr->lbstat = TX3927_PCIC_LBIM_ALL; - return IRQ_HANDLED; - } - console_verbose(); - panic("PCI error."); -} - -void __init tx3927_setup_pcierr_irq(void) -{ - if (request_irq(TXX9_IRQ_BASE + TX3927_IR_PCI, - tx3927_pcierr_interrupt, - 0, "PCI error", - (void *)TX3927_PCIC_REG)) - printk(KERN_WARNING "Failed to request irq for PCIERR\n"); -} diff --git a/arch/mips/txx9/Kconfig b/arch/mips/txx9/Kconfig index 6c61feee6dd3..7335efa4d528 100644 --- a/arch/mips/txx9/Kconfig +++ b/arch/mips/txx9/Kconfig @@ -1,9 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -config MACH_TX39XX - bool - select MACH_TXX9 - select SYS_HAS_CPU_TX39XX - config MACH_TX49XX bool select BOOT_ELF32 @@ -24,11 +19,6 @@ config MACH_TXX9 select SYS_SUPPORTS_BIG_ENDIAN select COMMON_CLK -config TOSHIBA_JMR3927 - bool "Toshiba JMR-TX3927 board" - depends on MACH_TX39XX - select SOC_TX3927 - config TOSHIBA_RBTX4927 bool "Toshiba RBTX49[23]7 board" depends on MACH_TX49XX @@ -39,14 +29,6 @@ config TOSHIBA_RBTX4927 This Toshiba board is based on the TX4927 processor. Say Y here to support this machine type -config SOC_TX3927 - bool - select CEVT_TXX9 - imply HAS_TXX9_SERIAL - select HAVE_PCI - select IRQ_TXX9 - select GPIO_TXX9 - config SOC_TX4927 bool select CEVT_TXX9 diff --git a/arch/mips/txx9/Makefile b/arch/mips/txx9/Makefile index 53269910a48b..14c91f2678a3 100644 --- a/arch/mips/txx9/Makefile +++ b/arch/mips/txx9/Makefile @@ -2,14 +2,8 @@ # # Common TXx9 # -obj-$(CONFIG_MACH_TX39XX) += generic/ obj-$(CONFIG_MACH_TX49XX) += generic/ -# -# Toshiba JMR-TX3927 board -# -obj-$(CONFIG_TOSHIBA_JMR3927) += jmr3927/ - # # Toshiba RBTX49XX boards # diff --git a/arch/mips/txx9/Platform b/arch/mips/txx9/Platform index 7f4429ba22eb..e5a295068b3e 100644 --- a/arch/mips/txx9/Platform +++ b/arch/mips/txx9/Platform @@ -1,7 +1,4 @@ -cflags-$(CONFIG_MACH_TX39XX) += \ - -I$(srctree)/arch/mips/include/asm/mach-tx39xx cflags-$(CONFIG_MACH_TX49XX) += \ -I$(srctree)/arch/mips/include/asm/mach-tx49xx -load-$(CONFIG_MACH_TX39XX) += 0xffffffff80050000 load-$(CONFIG_MACH_TX49XX) += 0xffffffff80100000 diff --git a/arch/mips/txx9/generic/Makefile b/arch/mips/txx9/generic/Makefile index be5af9fe7c11..3c155c7e2be8 100644 --- a/arch/mips/txx9/generic/Makefile +++ b/arch/mips/txx9/generic/Makefile @@ -5,7 +5,6 @@ obj-y += setup.o obj-$(CONFIG_PCI) += pci.o -obj-$(CONFIG_SOC_TX3927) += setup_tx3927.o irq_tx3927.o obj-$(CONFIG_SOC_TX4927) += mem_tx4927.o setup_tx4927.o irq_tx4927.o obj-$(CONFIG_SOC_TX4938) += mem_tx4927.o setup_tx4938.o irq_tx4938.o obj-$(CONFIG_TOSHIBA_FPCIB0) += smsc_fdc37m81x.o diff --git a/arch/mips/txx9/generic/irq_tx3927.c b/arch/mips/txx9/generic/irq_tx3927.c deleted file mode 100644 index c683f593eda2..000000000000 --- a/arch/mips/txx9/generic/irq_tx3927.c +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Common tx3927 irq handler - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright 2001 MontaVista Software Inc. - * Copyright (C) 2000-2001 Toshiba Corporation - */ -#include -#include -#include - -void __init tx3927_irq_init(void) -{ - int i; - - txx9_irq_init(TX3927_IRC_REG); - /* raise priority for timers, sio */ - for (i = 0; i < TX3927_NR_TMR; i++) - txx9_irq_set_pri(TX3927_IR_TMR(i), 6); - for (i = 0; i < TX3927_NR_SIO; i++) - txx9_irq_set_pri(TX3927_IR_SIO(i), 7); -} diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c index 39cd1edf9d80..b098a3c76ae9 100644 --- a/arch/mips/txx9/generic/setup.c +++ b/arch/mips/txx9/generic/setup.c @@ -78,12 +78,7 @@ unsigned int txx9_master_clock; unsigned int txx9_cpu_clock; unsigned int txx9_gbus_clock; -#ifdef CONFIG_CPU_TX39XX -/* don't enable by default - see errata */ -int txx9_ccfg_toeon __initdata; -#else int txx9_ccfg_toeon __initdata = 1; -#endif #define BOARD_VEC(board) extern struct txx9_board_vec board; #include @@ -194,53 +189,6 @@ static void __init txx9_cache_fixup(void) if (conf & TX49_CONF_DC) pr_info("TX49XX D-Cache disabled.\n"); } -#elif defined(CONFIG_CPU_TX39XX) -/* flush all cache on very early stage (before tx39_cache_init) */ -static void __init early_flush_dcache(void) -{ - unsigned int conf = read_c0_config(); - unsigned int dc_size = 1 << (10 + ((conf & TX39_CONF_DCS_MASK) >> - TX39_CONF_DCS_SHIFT)); - unsigned int linesz = 16; - unsigned long addr, end; - - end = INDEX_BASE + dc_size / 2; - /* 2way, waybit=0 */ - for (addr = INDEX_BASE; addr < end; addr += linesz) { - cache_op(Index_Writeback_Inv_D, addr | 0); - cache_op(Index_Writeback_Inv_D, addr | 1); - } -} - -static void __init txx9_cache_fixup(void) -{ - unsigned int conf; - - conf = read_c0_config(); - /* flush and disable */ - if (txx9_ic_disable) { - conf &= ~TX39_CONF_ICE; - write_c0_config(conf); - } - if (txx9_dc_disable) { - early_flush_dcache(); - conf &= ~TX39_CONF_DCE; - write_c0_config(conf); - } - - /* enable cache */ - conf = read_c0_config(); - if (!txx9_ic_disable) - conf |= TX39_CONF_ICE; - if (!txx9_dc_disable) - conf |= TX39_CONF_DCE; - write_c0_config(conf); - - if (!(conf & TX39_CONF_ICE)) - pr_info("TX39XX I-Cache disabled.\n"); - if (!(conf & TX39_CONF_DCE)) - pr_info("TX39XX D-Cache disabled.\n"); -} #else static inline void txx9_cache_fixup(void) { @@ -302,9 +250,6 @@ static void __init select_board(void) } /* select "default" board */ -#ifdef CONFIG_TOSHIBA_JMR3927 - txx9_board_vec = &jmr3927_vec; -#endif #ifdef CONFIG_CPU_TX49XX switch (TX4938_REV_PCODE()) { #ifdef CONFIG_TOSHIBA_RBTX4927 diff --git a/arch/mips/txx9/generic/setup_tx3927.c b/arch/mips/txx9/generic/setup_tx3927.c deleted file mode 100644 index 33f7a7253963..000000000000 --- a/arch/mips/txx9/generic/setup_tx3927.c +++ /dev/null @@ -1,136 +0,0 @@ -/* - * TX3927 setup routines - * Based on linux/arch/mips/txx9/jmr3927/setup.c - * - * Copyright 2001 MontaVista Software Inc. - * Copyright (C) 2000-2001 Toshiba Corporation - * Copyright (C) 2007 Ralf Baechle (ralf@linux-mips.org) - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -void __init tx3927_wdt_init(void) -{ - txx9_wdt_init(TX3927_TMR_REG(2)); -} - -void __init tx3927_setup(void) -{ - int i; - unsigned int conf; - - txx9_reg_res_init(TX3927_REV_PCODE(), TX3927_REG_BASE, - TX3927_REG_SIZE); - - /* SDRAMC,ROMC are configured by PROM */ - for (i = 0; i < 8; i++) { - if (!(tx3927_romcptr->cr[i] & 0x8)) - continue; /* disabled */ - txx9_ce_res[i].start = (unsigned long)TX3927_ROMC_BA(i); - txx9_ce_res[i].end = - txx9_ce_res[i].start + TX3927_ROMC_SIZE(i) - 1; - request_resource(&iomem_resource, &txx9_ce_res[i]); - } - - /* clocks */ - txx9_gbus_clock = txx9_cpu_clock / 2; - /* change default value to udelay/mdelay take reasonable time */ - loops_per_jiffy = txx9_cpu_clock / HZ / 2; - - /* CCFG */ - /* enable Timeout BusError */ - if (txx9_ccfg_toeon) - tx3927_ccfgptr->ccfg |= TX3927_CCFG_TOE; - - /* clear BusErrorOnWrite flag */ - tx3927_ccfgptr->ccfg &= ~TX3927_CCFG_BEOW; - if (read_c0_conf() & TX39_CONF_WBON) - /* Disable PCI snoop */ - tx3927_ccfgptr->ccfg &= ~TX3927_CCFG_PSNP; - else - /* Enable PCI SNOOP - with write through only */ - tx3927_ccfgptr->ccfg |= TX3927_CCFG_PSNP; - /* do reset on watchdog */ - tx3927_ccfgptr->ccfg |= TX3927_CCFG_WR; - - pr_info("TX3927 -- CRIR:%08lx CCFG:%08lx PCFG:%08lx\n", - tx3927_ccfgptr->crir, tx3927_ccfgptr->ccfg, - tx3927_ccfgptr->pcfg); - - /* TMR */ - for (i = 0; i < TX3927_NR_TMR; i++) - txx9_tmr_init(TX3927_TMR_REG(i)); - - /* DMA */ - tx3927_dmaptr->mcr = 0; - for (i = 0; i < ARRAY_SIZE(tx3927_dmaptr->ch); i++) { - /* reset channel */ - tx3927_dmaptr->ch[i].ccr = TX3927_DMA_CCR_CHRST; - tx3927_dmaptr->ch[i].ccr = 0; - } - /* enable DMA */ -#ifdef __BIG_ENDIAN - tx3927_dmaptr->mcr = TX3927_DMA_MCR_MSTEN; -#else - tx3927_dmaptr->mcr = TX3927_DMA_MCR_MSTEN | TX3927_DMA_MCR_LE; -#endif - - /* PIO */ - __raw_writel(0, &tx3927_pioptr->maskcpu); - __raw_writel(0, &tx3927_pioptr->maskext); - - conf = read_c0_conf(); - if (conf & TX39_CONF_DCE) { - if (!(conf & TX39_CONF_WBON)) - pr_info("TX3927 D-Cache WriteThrough.\n"); - else if (!(conf & TX39_CONF_CWFON)) - pr_info("TX3927 D-Cache WriteBack.\n"); - else - pr_info("TX3927 D-Cache WriteBack (CWF) .\n"); - } -} - -void __init tx3927_time_init(unsigned int evt_tmrnr, unsigned int src_tmrnr) -{ - txx9_clockevent_init(TX3927_TMR_REG(evt_tmrnr), - TXX9_IRQ_BASE + TX3927_IR_TMR(evt_tmrnr), - TXX9_IMCLK); - txx9_clocksource_init(TX3927_TMR_REG(src_tmrnr), TXX9_IMCLK); -} - -void __init tx3927_sio_init(unsigned int sclk, unsigned int cts_mask) -{ - int i; - - for (i = 0; i < 2; i++) - txx9_sio_init(TX3927_SIO_REG(i), - TXX9_IRQ_BASE + TX3927_IR_SIO(i), - i, sclk, (1 << i) & cts_mask); -} - -void __init tx3927_mtd_init(int ch) -{ - struct physmap_flash_data pdata = { - .width = TX3927_ROMC_WIDTH(ch) / 8, - }; - unsigned long start = txx9_ce_res[ch].start; - unsigned long size = txx9_ce_res[ch].end - start + 1; - - if (!(tx3927_romcptr->cr[ch] & 0x8)) - return; /* disabled */ - txx9_physmap_flash_init(ch, start, size, &pdata); -} diff --git a/arch/mips/txx9/jmr3927/Makefile b/arch/mips/txx9/jmr3927/Makefile deleted file mode 100644 index 4bda0615d27e..000000000000 --- a/arch/mips/txx9/jmr3927/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Makefile for TOSHIBA JMR-TX3927 board -# - -obj-y += prom.o irq.o setup.o diff --git a/arch/mips/txx9/jmr3927/irq.c b/arch/mips/txx9/jmr3927/irq.c deleted file mode 100644 index c22c859a2c49..000000000000 --- a/arch/mips/txx9/jmr3927/irq.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright 2001 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * ahennessy@mvista.com - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2000-2001 Toshiba Corporation - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#include -#include -#include -#include - -#include -#include -#include -#include - -#if JMR3927_IRQ_END > NR_IRQS -#error JMR3927_IRQ_END > NR_IRQS -#endif - -/* - * CP0_STATUS is a thread's resource (saved/restored on context switch). - * So disable_irq/enable_irq MUST handle IOC/IRC registers. - */ -static void mask_irq_ioc(struct irq_data *d) -{ - /* 0: mask */ - unsigned int irq_nr = d->irq - JMR3927_IRQ_IOC; - unsigned char imask = jmr3927_ioc_reg_in(JMR3927_IOC_INTM_ADDR); - unsigned int bit = 1 << irq_nr; - jmr3927_ioc_reg_out(imask & ~bit, JMR3927_IOC_INTM_ADDR); - /* flush write buffer */ - (void)jmr3927_ioc_reg_in(JMR3927_IOC_REV_ADDR); -} -static void unmask_irq_ioc(struct irq_data *d) -{ - /* 0: mask */ - unsigned int irq_nr = d->irq - JMR3927_IRQ_IOC; - unsigned char imask = jmr3927_ioc_reg_in(JMR3927_IOC_INTM_ADDR); - unsigned int bit = 1 << irq_nr; - jmr3927_ioc_reg_out(imask | bit, JMR3927_IOC_INTM_ADDR); - /* flush write buffer */ - (void)jmr3927_ioc_reg_in(JMR3927_IOC_REV_ADDR); -} - -static int jmr3927_ioc_irqroute(void) -{ - unsigned char istat = jmr3927_ioc_reg_in(JMR3927_IOC_INTS2_ADDR); - int i; - - for (i = 0; i < JMR3927_NR_IRQ_IOC; i++) { - if (istat & (1 << i)) - return JMR3927_IRQ_IOC + i; - } - return -1; -} - -static int jmr3927_irq_dispatch(int pending) -{ - int irq; - - if ((pending & CAUSEF_IP7) == 0) - return -1; - irq = (pending >> CAUSEB_IP2) & 0x0f; - irq += JMR3927_IRQ_IRC; - if (irq == JMR3927_IRQ_IOCINT) - irq = jmr3927_ioc_irqroute(); - return irq; -} - -static struct irq_chip jmr3927_irq_ioc = { - .name = "jmr3927_ioc", - .irq_mask = mask_irq_ioc, - .irq_unmask = unmask_irq_ioc, -}; - -void __init jmr3927_irq_setup(void) -{ - int i; - - txx9_irq_dispatch = jmr3927_irq_dispatch; - /* Now, interrupt control disabled, */ - /* all IRC interrupts are masked, */ - /* all IRC interrupt mode are Low Active. */ - - /* mask all IOC interrupts */ - jmr3927_ioc_reg_out(0, JMR3927_IOC_INTM_ADDR); - /* setup IOC interrupt mode (SOFT:High Active, Others:Low Active) */ - jmr3927_ioc_reg_out(JMR3927_IOC_INTF_SOFT, JMR3927_IOC_INTP_ADDR); - - /* clear PCI Soft interrupts */ - jmr3927_ioc_reg_out(0, JMR3927_IOC_INTS1_ADDR); - /* clear PCI Reset interrupts */ - jmr3927_ioc_reg_out(0, JMR3927_IOC_RESET_ADDR); - - tx3927_irq_init(); - for (i = JMR3927_IRQ_IOC; i < JMR3927_IRQ_IOC + JMR3927_NR_IRQ_IOC; i++) - irq_set_chip_and_handler(i, &jmr3927_irq_ioc, - handle_level_irq); - - /* setup IOC interrupt 1 (PCI, MODEM) */ - irq_set_chained_handler(JMR3927_IRQ_IOCINT, handle_simple_irq); -} diff --git a/arch/mips/txx9/jmr3927/prom.c b/arch/mips/txx9/jmr3927/prom.c deleted file mode 100644 index 53c68de54d30..000000000000 --- a/arch/mips/txx9/jmr3927/prom.c +++ /dev/null @@ -1,52 +0,0 @@ -/* - * BRIEF MODULE DESCRIPTION - * PROM library initialisation code, assuming a version of - * pmon is the boot code. - * - * Copyright 2001 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * ahennessy@mvista.com - * - * Based on arch/mips/au1000/common/prom.c - * - * This file was derived from Carsten Langgaard's - * arch/mips/mips-boards/xx files. - * - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 1999,2000 MIPS Technologies, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#include -#include -#include -#include -#include - -void __init jmr3927_prom_init(void) -{ - /* CCFG */ - if ((tx3927_ccfgptr->ccfg & TX3927_CCFG_TLBOFF) == 0) - pr_err("TX3927 TLB off\n"); - - memblock_add(0, JMR3927_SDRAM_SIZE); - txx9_sio_putchar_init(TX3927_SIO_REG(1)); -} diff --git a/arch/mips/txx9/jmr3927/setup.c b/arch/mips/txx9/jmr3927/setup.c deleted file mode 100644 index 613943886e34..000000000000 --- a/arch/mips/txx9/jmr3927/setup.c +++ /dev/null @@ -1,223 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Copyright 2001 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * ahennessy@mvista.com - * - * Copyright (C) 2000-2001 Toshiba Corporation - * Copyright (C) 2007 Ralf Baechle (ralf@linux-mips.org) - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static void jmr3927_machine_restart(char *command) -{ - local_irq_disable(); -#if 1 /* Resetting PCI bus */ - jmr3927_ioc_reg_out(0, JMR3927_IOC_RESET_ADDR); - jmr3927_ioc_reg_out(JMR3927_IOC_RESET_PCI, JMR3927_IOC_RESET_ADDR); - (void)jmr3927_ioc_reg_in(JMR3927_IOC_RESET_ADDR); /* flush WB */ - mdelay(1); - jmr3927_ioc_reg_out(0, JMR3927_IOC_RESET_ADDR); -#endif - jmr3927_ioc_reg_out(JMR3927_IOC_RESET_CPU, JMR3927_IOC_RESET_ADDR); - /* fallback */ - (*_machine_halt)(); -} - -static void __init jmr3927_time_init(void) -{ - tx3927_time_init(0, 1); -} - -#define DO_WRITE_THROUGH - -static void jmr3927_board_init(void); - -static void __init jmr3927_mem_setup(void) -{ - set_io_port_base(JMR3927_PORT_BASE + JMR3927_PCIIO); - - _machine_restart = jmr3927_machine_restart; - - /* cache setup */ - { - unsigned int conf; -#ifdef DO_WRITE_THROUGH - int mips_config_cwfon = 0; - int mips_config_wbon = 0; -#else - int mips_config_cwfon = 1; - int mips_config_wbon = 1; -#endif - - conf = read_c0_conf(); - conf &= ~(TX39_CONF_WBON | TX39_CONF_CWFON); - conf |= mips_config_wbon ? TX39_CONF_WBON : 0; - conf |= mips_config_cwfon ? TX39_CONF_CWFON : 0; - - write_c0_conf(conf); - write_c0_cache(0); - } - - /* initialize board */ - jmr3927_board_init(); - - tx3927_sio_init(0, 1 << 1); /* ch1: noCTS */ -} - -static void __init jmr3927_pci_setup(void) -{ -#ifdef CONFIG_PCI - int extarb = !(tx3927_ccfgptr->ccfg & TX3927_CCFG_PCIXARB); - struct pci_controller *c; - - c = txx9_alloc_pci_controller(&txx9_primary_pcic, - JMR3927_PCIMEM, JMR3927_PCIMEM_SIZE, - JMR3927_PCIIO, JMR3927_PCIIO_SIZE); - register_pci_controller(c); - if (!extarb) { - /* Reset PCI Bus */ - jmr3927_ioc_reg_out(0, JMR3927_IOC_RESET_ADDR); - udelay(100); - jmr3927_ioc_reg_out(JMR3927_IOC_RESET_PCI, - JMR3927_IOC_RESET_ADDR); - udelay(100); - jmr3927_ioc_reg_out(0, JMR3927_IOC_RESET_ADDR); - } - tx3927_pcic_setup(c, JMR3927_SDRAM_SIZE, extarb); - tx3927_setup_pcierr_irq(); -#endif /* CONFIG_PCI */ -} - -static void __init jmr3927_board_init(void) -{ - txx9_cpu_clock = JMR3927_CORECLK; - /* SDRAMC are configured by PROM */ - - /* ROMC */ - tx3927_romcptr->cr[1] = JMR3927_ROMCE1 | 0x00030048; - tx3927_romcptr->cr[2] = JMR3927_ROMCE2 | 0x000064c8; - tx3927_romcptr->cr[3] = JMR3927_ROMCE3 | 0x0003f698; - tx3927_romcptr->cr[5] = JMR3927_ROMCE5 | 0x0000f218; - - /* Pin selection */ - tx3927_ccfgptr->pcfg &= ~TX3927_PCFG_SELALL; - tx3927_ccfgptr->pcfg |= - TX3927_PCFG_SELSIOC(0) | TX3927_PCFG_SELSIO_ALL | - (TX3927_PCFG_SELDMA_ALL & ~TX3927_PCFG_SELDMA(1)); - - tx3927_setup(); - - /* PIO[15:12] connected to LEDs */ - __raw_writel(0x0000f000, &tx3927_pioptr->dir); - - jmr3927_pci_setup(); - - /* SIO0 DTR on */ - jmr3927_ioc_reg_out(0, JMR3927_IOC_DTR_ADDR); - - jmr3927_led_set(0); - - pr_info("JMR-TX3927 (Rev %d) --- IOC(Rev %d) DIPSW:%d,%d,%d,%d\n", - jmr3927_ioc_reg_in(JMR3927_IOC_BREV_ADDR) & JMR3927_REV_MASK, - jmr3927_ioc_reg_in(JMR3927_IOC_REV_ADDR) & JMR3927_REV_MASK, - jmr3927_dipsw1(), jmr3927_dipsw2(), - jmr3927_dipsw3(), jmr3927_dipsw4()); -} - -/* This trick makes rtc-ds1742 driver usable as is. */ -static unsigned long jmr3927_swizzle_addr_b(unsigned long port) -{ - if ((port & 0xffff0000) != JMR3927_IOC_NVRAMB_ADDR) - return port; - port = (port & 0xffff0000) | (port & 0x7fff << 1); -#ifdef __BIG_ENDIAN - return port; -#else - return port | 1; -#endif -} - -static void __init jmr3927_rtc_init(void) -{ - static struct resource __initdata res = { - .start = JMR3927_IOC_NVRAMB_ADDR - IO_BASE, - .end = JMR3927_IOC_NVRAMB_ADDR - IO_BASE + 0x800 - 1, - .flags = IORESOURCE_MEM, - }; - platform_device_register_simple("rtc-ds1742", -1, &res, 1); -} - -static void __init jmr3927_mtd_init(void) -{ - int i; - - for (i = 0; i < 2; i++) - tx3927_mtd_init(i); -} - -static void __init jmr3927_device_init(void) -{ - unsigned long iocled_base = JMR3927_IOC_LED_ADDR - IO_BASE; -#ifdef __LITTLE_ENDIAN - iocled_base |= 1; -#endif - __swizzle_addr_b = jmr3927_swizzle_addr_b; - jmr3927_rtc_init(); - tx3927_wdt_init(); - jmr3927_mtd_init(); - txx9_iocled_init(iocled_base, -1, 8, 1, "green", NULL); -} - -static void __init jmr3927_arch_init(void) -{ - txx9_gpio_init(TX3927_PIO_REG, 0, 16); - - gpio_request(11, "dipsw1"); - gpio_request(10, "dipsw2"); -} - -struct txx9_board_vec jmr3927_vec __initdata = { - .system = "Toshiba JMR_TX3927", - .prom_init = jmr3927_prom_init, - .mem_setup = jmr3927_mem_setup, - .irq_setup = jmr3927_irq_setup, - .time_init = jmr3927_time_init, - .device_init = jmr3927_device_init, - .arch_init = jmr3927_arch_init, -#ifdef CONFIG_PCI - .pci_map_irq = jmr3927_pci_map_irq, -#endif -}; diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 6bcdb4e6a0d1..d5de3f77d3aa 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -623,7 +623,7 @@ config S3C24XX_DMAC config TXX9_DMAC tristate "Toshiba TXx9 SoC DMA support" - depends on MACH_TX49XX || MACH_TX39XX + depends on MACH_TX49XX select DMA_ENGINE help Support the TXx9 SoC internal DMA controller. This can be diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index c8fa79da23b3..aa42f0686591 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1718,7 +1718,7 @@ config AR7_WDT config TXX9_WDT tristate "Toshiba TXx9 Watchdog Timer" - depends on CPU_TX39XX || CPU_TX49XX || (MIPS && COMPILE_TEST) + depends on CPU_TX49XX || (MIPS && COMPILE_TEST) select WATCHDOG_CORE help Hardware driver for the built-in watchdog timer on TXx9 MIPS SoCs. From c315669e2fbd71bb9387066f60f0d91b0ceb28f3 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 27 Jan 2022 12:49:54 +0530 Subject: [PATCH 150/380] selftests/powerpc/pmu: Add support for perf sampling tests Add support functions for enabling perf sampling test in a new folder "sampling_tests" under "selftests/powerpc/pmu". This includes support functions for allocating and processing the mmap buffer. These functions are added/defined in "sampling_tests/misc.*" files. Also updates the corresponding Makefiles in "selftests/powerpc" and "sampling_tests" folder. Signed-off-by: Athira Rajeev [mpe: Drop unneeded bits from the Makefile] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220127072012.662451-3-kjain@linux.ibm.com --- tools/testing/selftests/powerpc/pmu/Makefile | 11 +- .../powerpc/pmu/sampling_tests/Makefile | 7 ++ .../powerpc/pmu/sampling_tests/misc.c | 105 ++++++++++++++++++ .../powerpc/pmu/sampling_tests/misc.h | 9 ++ 4 files changed, 130 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile index 904672fb78dd..edbd96d3b2ab 100644 --- a/tools/testing/selftests/powerpc/pmu/Makefile +++ b/tools/testing/selftests/powerpc/pmu/Makefile @@ -8,7 +8,7 @@ EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c top_srcdir = ../../../../.. include ../../lib.mk -all: $(TEST_GEN_PROGS) ebb +all: $(TEST_GEN_PROGS) ebb sampling_tests $(TEST_GEN_PROGS): $(EXTRA_SOURCES) @@ -26,25 +26,32 @@ DEFAULT_RUN_TESTS := $(RUN_TESTS) override define RUN_TESTS $(DEFAULT_RUN_TESTS) TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests + TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests endef DEFAULT_EMIT_TESTS := $(EMIT_TESTS) override define EMIT_TESTS $(DEFAULT_EMIT_TESTS) TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests + TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests endef DEFAULT_INSTALL_RULE := $(INSTALL_RULE) override define INSTALL_RULE $(DEFAULT_INSTALL_RULE) TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install + TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install endef clean: $(RM) $(TEST_GEN_PROGS) $(OUTPUT)/loop.o TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean + TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean ebb: TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all -.PHONY: all run_tests clean ebb +sampling_tests: + TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all + +.PHONY: all run_tests clean ebb sampling_tests diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile new file mode 100644 index 000000000000..ac3d03ffc428 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -m64 + +top_srcdir = ../../../../../.. +include ../../../lib.mk + +$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c misc.c misc.h diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c new file mode 100644 index 000000000000..4779b107f43b --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "misc.h" + +#define PAGE_SIZE sysconf(_SC_PAGESIZE) + +/* + * Allocate mmap buffer of "mmap_pages" number of + * pages. + */ +void *event_sample_buf_mmap(int fd, int mmap_pages) +{ + size_t page_size = sysconf(_SC_PAGESIZE); + size_t mmap_size; + void *buff; + + if (mmap_pages <= 0) + return NULL; + + if (fd <= 0) + return NULL; + + mmap_size = page_size * (1 + mmap_pages); + buff = mmap(NULL, mmap_size, + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + + if (buff == MAP_FAILED) { + perror("mmap() failed."); + return NULL; + } + return buff; +} + +/* + * Post process the mmap buffer. + * - If sample_count != NULL then return count of total + * number of samples present in the mmap buffer. + * - If sample_count == NULL then return the address + * of first sample from the mmap buffer + */ +void *__event_read_samples(void *sample_buff, size_t *size, u64 *sample_count) +{ + size_t page_size = sysconf(_SC_PAGESIZE); + struct perf_event_header *header = sample_buff + page_size; + struct perf_event_mmap_page *metadata_page = sample_buff; + unsigned long data_head, data_tail; + + /* + * PERF_RECORD_SAMPLE: + * struct { + * struct perf_event_header hdr; + * u64 data[]; + * }; + */ + + data_head = metadata_page->data_head; + /* sync memory before reading sample */ + mb(); + data_tail = metadata_page->data_tail; + + /* Check for sample_count */ + if (sample_count) + *sample_count = 0; + + while (1) { + /* + * Reads the mmap data buffer by moving + * the data_tail to know the last read data. + * data_head points to head in data buffer. + * refer "struct perf_event_mmap_page" in + * "include/uapi/linux/perf_event.h". + */ + if (data_head - data_tail < sizeof(header)) + return NULL; + + data_tail += sizeof(header); + if (header->type == PERF_RECORD_SAMPLE) { + *size = (header->size - sizeof(header)); + if (!sample_count) + return sample_buff + page_size + data_tail; + data_tail += *size; + *sample_count += 1; + } else { + *size = (header->size - sizeof(header)); + if ((metadata_page->data_tail + *size) > metadata_page->data_head) + data_tail = metadata_page->data_head; + else + data_tail += *size; + } + header = (struct perf_event_header *)((void *)header + header->size); + } + return NULL; +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h new file mode 100644 index 000000000000..291f9adba817 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include "../event.h" + +void *event_sample_buf_mmap(int fd, int mmap_pages); +void *__event_read_samples(void *sample_buff, size_t *size, u64 *sample_count); From 6523dce86222451e5ca2df8a46597a217084bfdf Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Thu, 27 Jan 2022 12:49:55 +0530 Subject: [PATCH 151/380] selftests/powerpc/pmu: Add macros to parse event codes Each platform has raw event encoding format which specifies the bit positions for different fields. The fields from event code gets translated into performance monitoring mode control register (MMCRx) settings. Patch add macros to extract individual fields from the event code. Add functions for sanity checks, since testcases currently are only supported in power9 and power10. Signed-off-by: Madhavan Srinivasan [mpe: Read PVR directly rather than using /proc/cpuinfo] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220127072012.662451-4-kjain@linux.ibm.com --- tools/testing/selftests/powerpc/include/reg.h | 4 + .../powerpc/pmu/sampling_tests/misc.c | 132 ++++++++++++++++++ .../powerpc/pmu/sampling_tests/misc.h | 36 +++++ .../selftests/powerpc/security/spectre_v2.c | 2 - 4 files changed, 172 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h index c0f2742a3a59..c422be8a42b2 100644 --- a/tools/testing/selftests/powerpc/include/reg.h +++ b/tools/testing/selftests/powerpc/include/reg.h @@ -52,6 +52,9 @@ #define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ #define SPRN_TAR 0x32f /* Target Address Register */ +#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) +#define SPRN_PVR 0x11F + #define SPRN_DSCR_PRIV 0x11 /* Privilege State DSCR */ #define SPRN_DSCR 0x03 /* Data Stream Control Register */ #define SPRN_PPR 896 /* Program Priority Register */ @@ -84,6 +87,7 @@ #define TEXASR_ROT 0x0000000002000000 /* MSR register bits */ +#define MSR_HV (1ul << 60) /* Hypervisor state */ #define MSR_TS_S_LG 33 /* Trans Mem state: Suspended */ #define MSR_TS_T_LG 34 /* Trans Mem state: Active */ diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c index 4779b107f43b..a86d7d125955 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2022, Athira Rajeev, IBM Corp. + * Copyright 2022, Madhavan Srinivasan, IBM Corp. */ #include @@ -16,6 +17,137 @@ #define PAGE_SIZE sysconf(_SC_PAGESIZE) +/* Storage for platform version */ +int pvr; +u64 platform_extended_mask; + +/* Mask and Shift for Event code fields */ +int ev_mask_pmcxsel, ev_shift_pmcxsel; //pmcxsel field +int ev_mask_marked, ev_shift_marked; //marked filed +int ev_mask_comb, ev_shift_comb; //combine field +int ev_mask_unit, ev_shift_unit; //unit field +int ev_mask_pmc, ev_shift_pmc; //pmc field +int ev_mask_cache, ev_shift_cache; //Cache sel field +int ev_mask_sample, ev_shift_sample; //Random sampling field +int ev_mask_thd_sel, ev_shift_thd_sel; //thresh_sel field +int ev_mask_thd_start, ev_shift_thd_start; //thresh_start field +int ev_mask_thd_stop, ev_shift_thd_stop; //thresh_stop field +int ev_mask_thd_cmp, ev_shift_thd_cmp; //thresh cmp field +int ev_mask_sm, ev_shift_sm; //SDAR mode field +int ev_mask_rsq, ev_shift_rsq; //radix scope qual field +int ev_mask_l2l3, ev_shift_l2l3; //l2l3 sel field +int ev_mask_mmcr3_src, ev_shift_mmcr3_src; //mmcr3 field + +static void init_ev_encodes(void) +{ + ev_mask_pmcxsel = 0xff; + ev_shift_pmcxsel = 0; + ev_mask_marked = 1; + ev_shift_marked = 8; + ev_mask_unit = 0xf; + ev_shift_unit = 12; + ev_mask_pmc = 0xf; + ev_shift_pmc = 16; + ev_mask_sample = 0x1f; + ev_shift_sample = 24; + ev_mask_thd_sel = 0x7; + ev_shift_thd_sel = 29; + ev_mask_thd_start = 0xf; + ev_shift_thd_start = 36; + ev_mask_thd_stop = 0xf; + ev_shift_thd_stop = 32; + + switch (pvr) { + case POWER10: + ev_mask_rsq = 1; + ev_shift_rsq = 9; + ev_mask_comb = 3; + ev_shift_comb = 10; + ev_mask_cache = 3; + ev_shift_cache = 20; + ev_mask_sm = 0x3; + ev_shift_sm = 22; + ev_mask_l2l3 = 0x1f; + ev_shift_l2l3 = 40; + ev_mask_mmcr3_src = 0x7fff; + ev_shift_mmcr3_src = 45; + break; + case POWER9: + ev_mask_comb = 3; + ev_shift_comb = 10; + ev_mask_cache = 0xf; + ev_shift_cache = 20; + ev_mask_thd_cmp = 0x3ff; + ev_shift_thd_cmp = 40; + ev_mask_sm = 0x3; + ev_shift_sm = 50; + break; + default: + FAIL_IF_EXIT(1); + } +} + +/* Return the extended regs mask value */ +static u64 perf_get_platform_reg_mask(void) +{ + if (have_hwcap2(PPC_FEATURE2_ARCH_3_1)) + return PERF_POWER10_MASK; + if (have_hwcap2(PPC_FEATURE2_ARCH_3_00)) + return PERF_POWER9_MASK; + + return -1; +} + +int check_extended_regs_support(void) +{ + int fd; + struct event event; + + event_init(&event, 0x1001e); + + event.attr.type = 4; + event.attr.sample_period = 1; + event.attr.disabled = 1; + event.attr.sample_type = PERF_SAMPLE_REGS_INTR; + event.attr.sample_regs_intr = platform_extended_mask; + + fd = event_open(&event); + if (fd != -1) + return 0; + + return -1; +} + +int check_pvr_for_sampling_tests(void) +{ + pvr = PVR_VER(mfspr(SPRN_PVR)); + + platform_extended_mask = perf_get_platform_reg_mask(); + + /* + * Check for supported platforms + * for sampling test + */ + if ((pvr != POWER10) && (pvr != POWER9)) + goto out; + + /* + * Check PMU driver registered by looking for + * PPC_FEATURE2_EBB bit in AT_HWCAP2 + */ + if (!have_hwcap2(PPC_FEATURE2_EBB)) + goto out; + + /* check if platform supports extended regs */ + if (check_extended_regs_support()) + goto out; + + init_ev_encodes(); + return 0; +out: + printf("%s: Sampling tests un-supported\n", __func__); + return -1; +} /* * Allocate mmap buffer of "mmap_pages" number of * pages. diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h index 291f9adba817..c8f64e8e749c 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h @@ -1,9 +1,45 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright 2022, Athira Rajeev, IBM Corp. + * Copyright 2022, Madhavan Srinivasan, IBM Corp. */ #include "../event.h" +#define POWER10 0x80 +#define POWER9 0x4e +#define PERF_POWER9_MASK 0x7f8ffffffffffff +#define PERF_POWER10_MASK 0x7ffffffffffffff + +extern int ev_mask_pmcxsel, ev_shift_pmcxsel; +extern int ev_mask_marked, ev_shift_marked; +extern int ev_mask_comb, ev_shift_comb; +extern int ev_mask_unit, ev_shift_unit; +extern int ev_mask_pmc, ev_shift_pmc; +extern int ev_mask_cache, ev_shift_cache; +extern int ev_mask_sample, ev_shift_sample; +extern int ev_mask_thd_sel, ev_shift_thd_sel; +extern int ev_mask_thd_start, ev_shift_thd_start; +extern int ev_mask_thd_stop, ev_shift_thd_stop; +extern int ev_mask_thd_cmp, ev_shift_thd_cmp; +extern int ev_mask_sm, ev_shift_sm; +extern int ev_mask_rsq, ev_shift_rsq; +extern int ev_mask_l2l3, ev_shift_l2l3; +extern int ev_mask_mmcr3_src, ev_shift_mmcr3_src; +extern int pvr; +extern u64 platform_extended_mask; +extern int check_pvr_for_sampling_tests(void); + +/* + * Event code field extraction macro. + * Raw event code is combination of multiple + * fields. Macro to extract individual fields + * + * x - Raw event code value + * y - Field to extract + */ +#define EV_CODE_EXTRACT(x, y) \ + ((x >> ev_shift_##y) & ev_mask_##y) + void *event_sample_buf_mmap(int fd, int mmap_pages); void *__event_read_samples(void *sample_buff, size_t *size, u64 *sample_count); diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c index 83647b8277e7..d42ca8c676c3 100644 --- a/tools/testing/selftests/powerpc/security/spectre_v2.c +++ b/tools/testing/selftests/powerpc/security/spectre_v2.c @@ -125,8 +125,6 @@ static enum spectre_v2_state get_sysfs_state(void) #define PM_BR_PRED_PCACHE 0x048a0 // P9 only #define PM_BR_MPRED_PCACHE 0x048b0 // P9 only -#define SPRN_PVR 287 - int spectre_v2_test(void) { enum spectre_v2_state state; From 5f6c3061af7ca3b0f9f8a20ec7a445671f7e6b5a Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Thu, 27 Jan 2022 12:49:56 +0530 Subject: [PATCH 152/380] selftests/powerpc/pmu: Add utility functions to post process the mmap buffer Add couple of basic utility functions to post process the mmap buffer. It includes function to read the total number of samples present in the mmap buffer and function to get the address of the first sample. Add function "get_intr_regs" which will return pointer to interrupt registers present in the sample, incase sample type PERF_SAMPLE_REGS_INTR is set. Add functions "get_reg_value" which can be used to read any interrupt register value from a given sample. Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220127072012.662451-5-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/misc.c | 175 ++++++++++++++++++ .../powerpc/pmu/sampling_tests/misc.h | 4 + 2 files changed, 179 insertions(+) diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c index a86d7d125955..fca054bbc094 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c @@ -2,6 +2,7 @@ /* * Copyright 2022, Athira Rajeev, IBM Corp. * Copyright 2022, Madhavan Srinivasan, IBM Corp. + * Copyright 2022, Kajol Jain, IBM Corp. */ #include @@ -235,3 +236,177 @@ void *__event_read_samples(void *sample_buff, size_t *size, u64 *sample_count) } return NULL; } + +int collect_samples(void *sample_buff) +{ + u64 sample_count; + size_t size = 0; + + __event_read_samples(sample_buff, &size, &sample_count); + return sample_count; +} + +static void *perf_read_first_sample(void *sample_buff, size_t *size) +{ + return __event_read_samples(sample_buff, size, NULL); +} + +u64 *get_intr_regs(struct event *event, void *sample_buff) +{ + u64 type = event->attr.sample_type; + u64 *intr_regs; + size_t size = 0; + + if ((type ^ PERF_SAMPLE_REGS_INTR)) + return NULL; + + intr_regs = (u64 *)perf_read_first_sample(sample_buff, &size); + if (!intr_regs) + return NULL; + + /* + * First entry in the sample buffer used to specify + * PERF_SAMPLE_REGS_ABI_64, skip perf regs abi to access + * interrupt registers. + */ + ++intr_regs; + + return intr_regs; +} + +static const unsigned int __perf_reg_mask(const char *register_name) +{ + if (!strcmp(register_name, "R0")) + return 0; + else if (!strcmp(register_name, "R1")) + return 1; + else if (!strcmp(register_name, "R2")) + return 2; + else if (!strcmp(register_name, "R3")) + return 3; + else if (!strcmp(register_name, "R4")) + return 4; + else if (!strcmp(register_name, "R5")) + return 5; + else if (!strcmp(register_name, "R6")) + return 6; + else if (!strcmp(register_name, "R7")) + return 7; + else if (!strcmp(register_name, "R8")) + return 8; + else if (!strcmp(register_name, "R9")) + return 9; + else if (!strcmp(register_name, "R10")) + return 10; + else if (!strcmp(register_name, "R11")) + return 11; + else if (!strcmp(register_name, "R12")) + return 12; + else if (!strcmp(register_name, "R13")) + return 13; + else if (!strcmp(register_name, "R14")) + return 14; + else if (!strcmp(register_name, "R15")) + return 15; + else if (!strcmp(register_name, "R16")) + return 16; + else if (!strcmp(register_name, "R17")) + return 17; + else if (!strcmp(register_name, "R18")) + return 18; + else if (!strcmp(register_name, "R19")) + return 19; + else if (!strcmp(register_name, "R20")) + return 20; + else if (!strcmp(register_name, "R21")) + return 21; + else if (!strcmp(register_name, "R22")) + return 22; + else if (!strcmp(register_name, "R23")) + return 23; + else if (!strcmp(register_name, "R24")) + return 24; + else if (!strcmp(register_name, "R25")) + return 25; + else if (!strcmp(register_name, "R26")) + return 26; + else if (!strcmp(register_name, "R27")) + return 27; + else if (!strcmp(register_name, "R28")) + return 28; + else if (!strcmp(register_name, "R29")) + return 29; + else if (!strcmp(register_name, "R30")) + return 30; + else if (!strcmp(register_name, "R31")) + return 31; + else if (!strcmp(register_name, "NIP")) + return 32; + else if (!strcmp(register_name, "MSR")) + return 33; + else if (!strcmp(register_name, "ORIG_R3")) + return 34; + else if (!strcmp(register_name, "CTR")) + return 35; + else if (!strcmp(register_name, "LINK")) + return 36; + else if (!strcmp(register_name, "XER")) + return 37; + else if (!strcmp(register_name, "CCR")) + return 38; + else if (!strcmp(register_name, "SOFTE")) + return 39; + else if (!strcmp(register_name, "TRAP")) + return 40; + else if (!strcmp(register_name, "DAR")) + return 41; + else if (!strcmp(register_name, "DSISR")) + return 42; + else if (!strcmp(register_name, "SIER")) + return 43; + else if (!strcmp(register_name, "MMCRA")) + return 44; + else if (!strcmp(register_name, "MMCR0")) + return 45; + else if (!strcmp(register_name, "MMCR1")) + return 46; + else if (!strcmp(register_name, "MMCR2")) + return 47; + else if (!strcmp(register_name, "MMCR3")) + return 48; + else if (!strcmp(register_name, "SIER2")) + return 49; + else if (!strcmp(register_name, "SIER3")) + return 50; + else if (!strcmp(register_name, "PMC1")) + return 51; + else if (!strcmp(register_name, "PMC2")) + return 52; + else if (!strcmp(register_name, "PMC3")) + return 53; + else if (!strcmp(register_name, "PMC4")) + return 54; + else if (!strcmp(register_name, "PMC5")) + return 55; + else if (!strcmp(register_name, "PMC6")) + return 56; + else if (!strcmp(register_name, "SDAR")) + return 57; + else if (!strcmp(register_name, "SIAR")) + return 58; + else + return -1; +} + +u64 get_reg_value(u64 *intr_regs, char *register_name) +{ + int register_bit_position; + + register_bit_position = __perf_reg_mask(register_name); + + if (register_bit_position < 0 || (!((platform_extended_mask >> + (register_bit_position - 1)) & 1))) + return -1; + + return *(intr_regs + register_bit_position); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h index c8f64e8e749c..a8d67fcad9ae 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h @@ -2,6 +2,7 @@ /* * Copyright 2022, Athira Rajeev, IBM Corp. * Copyright 2022, Madhavan Srinivasan, IBM Corp. + * Copyright 2022, Kajol Jain, IBM Corp. */ #include "../event.h" @@ -43,3 +44,6 @@ extern int check_pvr_for_sampling_tests(void); void *event_sample_buf_mmap(int fd, int mmap_pages); void *__event_read_samples(void *sample_buff, size_t *size, u64 *sample_count); +int collect_samples(void *sample_buff); +u64 *get_intr_regs(struct event *event, void *sample_buff); +u64 get_reg_value(u64 *intr_regs, char *register_name); From 54d4ba7f22d1ed5bfbc915771cf2e3e147cf03b2 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Thu, 27 Jan 2022 12:49:57 +0530 Subject: [PATCH 153/380] selftests/powerpc/pmu: Add event_init_sampling function Extended event_init_opts() to include initialization of sampling testcases. Patch adds an event_init_sampling() wrapper to initialize event attribute fields for sampling events. This includes initializing sample period, sample type and event type. Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220127072012.662451-6-kjain@linux.ibm.com --- tools/testing/selftests/powerpc/pmu/event.c | 19 ++++++++++++++++++- tools/testing/selftests/powerpc/pmu/event.h | 1 + 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/pmu/event.c b/tools/testing/selftests/powerpc/pmu/event.c index 48e3a413b15d..0c1c1bdba081 100644 --- a/tools/testing/selftests/powerpc/pmu/event.c +++ b/tools/testing/selftests/powerpc/pmu/event.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include "event.h" @@ -20,7 +21,8 @@ int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, group_fd, flags); } -void event_init_opts(struct event *e, u64 config, int type, char *name) +static void __event_init_opts(struct event *e, u64 config, + int type, char *name, bool sampling) { memset(e, 0, sizeof(*e)); @@ -32,6 +34,16 @@ void event_init_opts(struct event *e, u64 config, int type, char *name) /* This has to match the structure layout in the header */ e->attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | \ PERF_FORMAT_TOTAL_TIME_RUNNING; + if (sampling) { + e->attr.sample_period = 1000; + e->attr.sample_type = PERF_SAMPLE_REGS_INTR; + e->attr.disabled = 1; + } +} + +void event_init_opts(struct event *e, u64 config, int type, char *name) +{ + __event_init_opts(e, config, type, name, false); } void event_init_named(struct event *e, u64 config, char *name) @@ -44,6 +56,11 @@ void event_init(struct event *e, u64 config) event_init_opts(e, config, PERF_TYPE_RAW, "event"); } +void event_init_sampling(struct event *e, u64 config) +{ + __event_init_opts(e, config, PERF_TYPE_RAW, "event", true); +} + #define PERF_CURRENT_PID 0 #define PERF_NO_PID -1 #define PERF_NO_CPU -1 diff --git a/tools/testing/selftests/powerpc/pmu/event.h b/tools/testing/selftests/powerpc/pmu/event.h index 23d20340a160..51aad0b6d9ad 100644 --- a/tools/testing/selftests/powerpc/pmu/event.h +++ b/tools/testing/selftests/powerpc/pmu/event.h @@ -32,6 +32,7 @@ struct event { void event_init(struct event *e, u64 config); void event_init_named(struct event *e, u64 config, char *name); void event_init_opts(struct event *e, u64 config, int type, char *name); +void event_init_sampling(struct event *e, u64 config); int event_open_with_options(struct event *e, pid_t pid, int cpu, int group_fd); int event_open_with_group(struct event *e, int group_fd); int event_open_with_pid(struct event *e, pid_t pid); From 79c4e6aba8dfc9206acc68884498080f451121f7 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Thu, 27 Jan 2022 12:49:58 +0530 Subject: [PATCH 154/380] selftests/powerpc/pmu: Add macros to extract mmcr fields Along with it, Add macros and utility functions to fetch individual fields from Monitor Mode Control Register 2(MMCR2) register. Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220127072012.662451-7-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/misc.h | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h index a8d67fcad9ae..bac447e92cf6 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h @@ -47,3 +47,55 @@ void *__event_read_samples(void *sample_buff, size_t *size, u64 *sample_count); int collect_samples(void *sample_buff); u64 *get_intr_regs(struct event *event, void *sample_buff); u64 get_reg_value(u64 *intr_regs, char *register_name); + +static inline int get_mmcr2_fcs(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (63 - (((pmc) - 1) * 9)))) >> (63 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcp(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (62 - (((pmc) - 1) * 9)))) >> (62 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcpc(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (61 - (((pmc) - 1) * 9)))) >> (61 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcm1(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (60 - (((pmc) - 1) * 9)))) >> (60 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcm0(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (59 - (((pmc) - 1) * 9)))) >> (59 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcwait(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (58 - (((pmc) - 1) * 9)))) >> (58 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fch(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (57 - (((pmc) - 1) * 9)))) >> (57 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcti(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (56 - (((pmc) - 1) * 9)))) >> (56 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcta(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (55 - (((pmc) - 1) * 9)))) >> (55 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_l2l3(u64 mmcr2, int pmc) +{ + if (pvr == POWER10) + return ((mmcr2 & 0xf8) >> 3); + return 0; +} From 2b49e641063e7569493371ef433b9c4ce8c8dd8b Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 27 Jan 2022 12:49:59 +0530 Subject: [PATCH 155/380] selftests/powerpc/pmu: Add macro to extract mmcr0/mmcr1 fields Add macro and utility functions to fetch individual fields from Monitor Mode Control Register 0(MMCR0) and Monitor Mode Control Register 1(MMCR1) PMU register. Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220127072012.662451-8-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/misc.h | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h index bac447e92cf6..77690ca1dfc1 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h @@ -12,6 +12,10 @@ #define PERF_POWER9_MASK 0x7f8ffffffffffff #define PERF_POWER10_MASK 0x7ffffffffffffff +#define MMCR0_FC56 0x00000010UL /* freeze counters 5 and 6 */ +#define MMCR0_PMCCEXT 0x00000200UL /* PMCCEXT control */ +#define MMCR1_RSQ 0x200000000000ULL /* radix scope qual field */ + extern int ev_mask_pmcxsel, ev_shift_pmcxsel; extern int ev_mask_marked, ev_shift_marked; extern int ev_mask_comb, ev_shift_comb; @@ -48,6 +52,66 @@ int collect_samples(void *sample_buff); u64 *get_intr_regs(struct event *event, void *sample_buff); u64 get_reg_value(u64 *intr_regs, char *register_name); +static inline int get_mmcr0_fc56(u64 mmcr0, int pmc) +{ + return (mmcr0 & MMCR0_FC56); +} + +static inline int get_mmcr0_pmccext(u64 mmcr0, int pmc) +{ + return (mmcr0 & MMCR0_PMCCEXT); +} + +static inline int get_mmcr0_pmao(u64 mmcr0, int pmc) +{ + return ((mmcr0 >> 7) & 0x1); +} + +static inline int get_mmcr0_cc56run(u64 mmcr0, int pmc) +{ + return ((mmcr0 >> 8) & 0x1); +} + +static inline int get_mmcr0_pmcjce(u64 mmcr0, int pmc) +{ + return ((mmcr0 >> 14) & 0x1); +} + +static inline int get_mmcr0_pmc1ce(u64 mmcr0, int pmc) +{ + return ((mmcr0 >> 15) & 0x1); +} + +static inline int get_mmcr0_pmae(u64 mmcr0, int pmc) +{ + return ((mmcr0 >> 27) & 0x1); +} + +static inline int get_mmcr1_pmcxsel(u64 mmcr1, int pmc) +{ + return ((mmcr1 >> ((24 - (((pmc) - 1) * 8))) & 0xff)); +} + +static inline int get_mmcr1_unit(u64 mmcr1, int pmc) +{ + return ((mmcr1 >> ((60 - (4 * ((pmc) - 1))))) & 0xf); +} + +static inline int get_mmcr1_comb(u64 mmcr1, int pmc) +{ + return ((mmcr1 >> (38 - ((pmc - 1) * 2))) & 0x3); +} + +static inline int get_mmcr1_cache(u64 mmcr1, int pmc) +{ + return ((mmcr1 >> 46) & 0x3); +} + +static inline int get_mmcr1_rsq(u64 mmcr1, int pmc) +{ + return mmcr1 & MMCR1_RSQ; +} + static inline int get_mmcr2_fcs(u64 mmcr2, int pmc) { return ((mmcr2 & (1ull << (63 - (((pmc) - 1) * 9)))) >> (63 - (((pmc) - 1) * 9))); From 13307f9584ea9408d9959302074dc4e8308b6cab Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Thu, 27 Jan 2022 12:50:00 +0530 Subject: [PATCH 156/380] selftests/powerpc/pmu: Add macro to extract mmcr3 and mmcra fields Add macro and utility functions to fetch individual fields from Monitor Mode Control Register 3(MMCR3)and Monitor Mode Control Register A(MMCRA) PMU registers Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220127072012.662451-9-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/misc.h | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h index 77690ca1dfc1..7675f3177725 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h @@ -15,6 +15,7 @@ #define MMCR0_FC56 0x00000010UL /* freeze counters 5 and 6 */ #define MMCR0_PMCCEXT 0x00000200UL /* PMCCEXT control */ #define MMCR1_RSQ 0x200000000000ULL /* radix scope qual field */ +#define BHRB_DISABLE 0x2000000000ULL /* MMCRA BHRB DISABLE bit */ extern int ev_mask_pmcxsel, ev_shift_pmcxsel; extern int ev_mask_marked, ev_shift_marked; @@ -163,3 +164,64 @@ static inline int get_mmcr2_l2l3(u64 mmcr2, int pmc) return ((mmcr2 & 0xf8) >> 3); return 0; } + +static inline int get_mmcr3_src(u64 mmcr3, int pmc) +{ + if (pvr != POWER10) + return 0; + return ((mmcr3 >> ((49 - (15 * ((pmc) - 1))))) & 0x7fff); +} + +static inline int get_mmcra_thd_cmp(u64 mmcra, int pmc) +{ + if (pvr == POWER10) + return ((mmcra >> 45) & 0x7ff); + return ((mmcra >> 45) & 0x3ff); +} + +static inline int get_mmcra_sm(u64 mmcra, int pmc) +{ + return ((mmcra >> 42) & 0x3); +} + +static inline int get_mmcra_bhrb_disable(u64 mmcra, int pmc) +{ + if (pvr == POWER10) + return mmcra & BHRB_DISABLE; + return 0; +} + +static inline int get_mmcra_ifm(u64 mmcra, int pmc) +{ + return ((mmcra >> 30) & 0x3); +} + +static inline int get_mmcra_thd_sel(u64 mmcra, int pmc) +{ + return ((mmcra >> 16) & 0x7); +} + +static inline int get_mmcra_thd_start(u64 mmcra, int pmc) +{ + return ((mmcra >> 12) & 0xf); +} + +static inline int get_mmcra_thd_stop(u64 mmcra, int pmc) +{ + return ((mmcra >> 8) & 0xf); +} + +static inline int get_mmcra_rand_samp_elig(u64 mmcra, int pmc) +{ + return ((mmcra >> 4) & 0x7); +} + +static inline int get_mmcra_sample_mode(u64 mmcra, int pmc) +{ + return ((mmcra >> 1) & 0x3); +} + +static inline int get_mmcra_marked(u64 mmcra, int pmc) +{ + return mmcra & 0x1; +} From eb7aa044df18c6f7a88bc17fc4c9f4524652a290 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 27 Jan 2022 12:50:01 +0530 Subject: [PATCH 157/380] selftests/powerpc/pmu/: Add interface test for mmcr0 exception bits The testcase uses "instructions" event to verify two bits(PMAE and PMAO) in Monitor Mode Control Register 0 (MMCR0). At the time of interrupt, pmae bit ( which enables performance monitor exception ) is expected to be cleared and pmao (which indicates performance monitor alert) bit is expected to be set in MMCR0. And testcases handles these checks. Signed-off-by: Athira Rajeev [mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220127072012.662451-10-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/.gitignore | 1 + .../powerpc/pmu/sampling_tests/Makefile | 4 +- .../sampling_tests/mmcr0_exceptionbits_test.c | 59 +++++++++++++++++++ 3 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_exceptionbits_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore new file mode 100644 index 000000000000..067b9f3a7f84 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore @@ -0,0 +1 @@ +mmcr0_exceptionbits_test diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index ac3d03ffc428..c81db8b553f6 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -1,7 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -m64 +TEST_GEN_PROGS := mmcr0_exceptionbits_test + top_srcdir = ../../../../../.. include ../../../lib.mk -$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c misc.c misc.h +$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c misc.c misc.h ../loop.S diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_exceptionbits_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_exceptionbits_test.c new file mode 100644 index 000000000000..982aa56d2171 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_exceptionbits_test.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* + * A perf sampling test for mmcr0 + * fields : pmae, pmao. + */ +static int mmcr0_exceptionbits(void) +{ + struct event event; + u64 *intr_regs; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x500fa); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that pmae is cleared and pmao is set in MMCR0 */ + FAIL_IF(get_mmcr0_pmae(get_reg_value(intr_regs, "MMCR0"), 5)); + FAIL_IF(!get_mmcr0_pmao(get_reg_value(intr_regs, "MMCR0"), 5)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr0_exceptionbits, "mmcr0_exceptionbits"); +} From a7c0ab2e61484c0844eae2f208a06cc940338d83 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 27 Jan 2022 12:50:02 +0530 Subject: [PATCH 158/380] selftests/powerpc/pmu/: Add interface test for mmcr0_cc56run field The testcase uses event code 0x500fa ("instructions") to check the CC56RUN bit setting in Monitor Mode Control Register 0(MMCR0). In ISA v3.1 platform, this bit is expected to be set in MMCR0 when using Performance Monitor Counter 5 and 6 (PMC5 and PMC6). Verify this is done correctly by perf interface. CC56RUN bit makes PMC5 and PMC6 count regardless of the run latch state. This bit is set in power10 since PMC5 and PMC6 is used in power10 for counting instructions and cycles. Hence added a check to skip this test in other platforms Signed-off-by: Athira Rajeev [mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220127072012.662451-11-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/.gitignore | 1 + .../powerpc/pmu/sampling_tests/Makefile | 2 +- .../pmu/sampling_tests/mmcr0_cc56run_test.c | 59 +++++++++++++++++++ 3 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_cc56run_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore index 067b9f3a7f84..641634e3bace 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore @@ -1 +1,2 @@ mmcr0_exceptionbits_test +mmcr0_cc56run_test diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index c81db8b553f6..fc0b39957522 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -m64 -TEST_GEN_PROGS := mmcr0_exceptionbits_test +TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_cc56run_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_cc56run_test.c new file mode 100644 index 000000000000..ae4172f83817 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_cc56run_test.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* + * A perf sampling test for mmcr0 + * field: cc56run. + */ +static int mmcr0_cc56run(void) +{ + struct event event; + u64 *intr_regs; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x500fa); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that cc56run bit is set in MMCR0 */ + FAIL_IF(!get_mmcr0_cc56run(get_reg_value(intr_regs, "MMCR0"), 5)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr0_cc56run, "mmcr0_cc56run"); +} From b24142b9d2401468bcd8df157013306d5b4f6626 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 27 Jan 2022 12:50:03 +0530 Subject: [PATCH 159/380] selftests/powerpc/pmu/: Add interface test for mmcr0_pmccext bit The testcase uses cycles event to check the PMCCEXT bit setting in Monitor Mode Control Register 0 (MMCR0). Check if perf interface sets this control bit in MMCR0 on ISA v3.1 platform. Signed-off-by: Athira Rajeev [mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220127072012.662451-12-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/.gitignore | 1 + .../powerpc/pmu/sampling_tests/Makefile | 2 +- .../pmu/sampling_tests/mmcr0_pmccext_test.c | 59 +++++++++++++++++++ 3 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmccext_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore index 641634e3bace..991ed33eda20 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore @@ -1,2 +1,3 @@ mmcr0_exceptionbits_test mmcr0_cc56run_test +mmcr0_pmccext_test diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index fc0b39957522..7feaf5d387c2 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -m64 -TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test +TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmccext_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmccext_test.c new file mode 100644 index 000000000000..dfd186cd8eec --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmccext_test.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* + * A perf sampling test for mmcr0 + * field: pmccext + */ +static int mmcr0_pmccext(void) +{ + struct event event; + u64 *intr_regs; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x4001e); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that pmccext field is set in MMCR0 */ + FAIL_IF(!get_mmcr0_pmccext(get_reg_value(intr_regs, "MMCR0"), 4)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr0_pmccext, "mmcr0_pmccext"); +} From 9ac7c6d5e4b570f416d849b263a6f67a617b4fa5 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 27 Jan 2022 12:50:04 +0530 Subject: [PATCH 160/380] selftests/powerpc/pmu/: Add interface test for mmcr0_pmcjce field The testcase uses event code 0x500fa ("instructions") to verify the PMCjCE bit setting in Monitor Mode Control Register 0 (MMCR0). This bit is expected to be set in MMCR0 when using Performance Monitor Counter 5 (PMC5). Checks if perf interface sets this bit correctly. Signed-off-by: Athira Rajeev [mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220127072012.662451-13-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/.gitignore | 1 + .../powerpc/pmu/sampling_tests/Makefile | 3 +- .../pmu/sampling_tests/mmcr0_pmcjce_test.c | 58 +++++++++++++++++++ 3 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmcjce_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore index 991ed33eda20..0bc68fed074d 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore @@ -1,3 +1,4 @@ mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test +mmcr0_pmcjce_test diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index 7feaf5d387c2..28029c0f1399 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -m64 -TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test +TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \ + mmcr0_pmcjce_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmcjce_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmcjce_test.c new file mode 100644 index 000000000000..fdd8ed9bf725 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmcjce_test.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* + * A perf sampling test for mmcr0 + * field: pmcjce + */ +static int mmcr0_pmcjce(void) +{ + struct event event; + u64 *intr_regs; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x500fa); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that pmcjce field is set in MMCR0 */ + FAIL_IF(!get_mmcr0_pmcjce(get_reg_value(intr_regs, "MMCR0"), 5)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr0_pmcjce, "mmcr0_pmcjce"); +} From d5172f2585cd0fc9788aa9b25d3dce6483321792 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 27 Jan 2022 12:50:05 +0530 Subject: [PATCH 161/380] selftests/powerpc/pmu/: Add interface test for mmcr0_fc56 field using pmc1 The testcase uses event code 0x1001e to verify two bit settings (FC5-6 and PMC1CE) in Monitor Mode Control Register 0 (MMCR0). Check if FC5-6 bit to be set in MMCR0 when not using Performance Monitor Counter 5 and 6 (PMC5 and PMC6). And also PMC1CE is expected to be set when using PMC1. Test if these fields are programmed correctly via perf interface. Signed-off-by: Athira Rajeev [mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220127072012.662451-14-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/.gitignore | 1 + .../powerpc/pmu/sampling_tests/Makefile | 2 +- .../sampling_tests/mmcr0_fc56_pmc1ce_test.c | 59 +++++++++++++++++++ 3 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc1ce_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore index 0bc68fed074d..3229f088f542 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore @@ -2,3 +2,4 @@ mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test mmcr0_pmcjce_test +mmcr0_fc56_pmc1ce_test diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index 28029c0f1399..b6bc066e5047 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -2,7 +2,7 @@ CFLAGS += -m64 TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \ - mmcr0_pmcjce_test + mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc1ce_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc1ce_test.c new file mode 100644 index 000000000000..1c1813c182c0 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc1ce_test.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* + * A perf sampling test for mmcr0 + * fields: fc56, pmc1ce. + */ +static int mmcr0_fc56_pmc1ce(void) +{ + struct event event; + u64 *intr_regs; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x1001e); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that fc56, pmc1ce fields are set in MMCR0 */ + FAIL_IF(!get_mmcr0_fc56(get_reg_value(intr_regs, "MMCR0"), 1)); + FAIL_IF(!get_mmcr0_pmc1ce(get_reg_value(intr_regs, "MMCR0"), 1)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr0_fc56_pmc1ce, "mmcr0_fc56_pmc1ce"); +} From 6e11374b08723b2c43ae83bd5d48000d695f13a0 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 27 Jan 2022 12:50:06 +0530 Subject: [PATCH 162/380] selftests/powerpc/pmu/: Add interface test for mmcr0_pmc56 using pmc5 The testcase uses event code 0x500fa to verify the FC5-6 bit setting in Monitor Mode Control Register 0 (MMCR0). Check if FC5-6 bit is not set in MMCR0 when using Performance Monitor Counter 5 and 6 (PMC5 and PMC6). Signed-off-by: Athira Rajeev [mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220127072012.662451-15-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/.gitignore | 1 + .../powerpc/pmu/sampling_tests/Makefile | 2 +- .../sampling_tests/mmcr0_fc56_pmc56_test.c | 58 +++++++++++++++++++ 3 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc56_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore index 3229f088f542..1a3b7323acd1 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore @@ -3,3 +3,4 @@ mmcr0_cc56run_test mmcr0_pmccext_test mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test +mmcr0_fc56_pmc56_test diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index b6bc066e5047..790a7ff21a90 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -2,7 +2,7 @@ CFLAGS += -m64 TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \ - mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test + mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc56_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc56_test.c new file mode 100644 index 000000000000..332d24b5ab9c --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc56_test.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* + * A perf sampling test for mmcr0 + * fields: fc56_pmc56 + */ +static int mmcr0_fc56_pmc56(void) +{ + struct event event; + u64 *intr_regs; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x500fa); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that fc56 is not set in MMCR0 when using PMC5 */ + FAIL_IF(get_mmcr0_fc56(get_reg_value(intr_regs, "MMCR0"), 5)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr0_fc56_pmc56, "mmcr0_fc56_pmc56"); +} From 2becea3b6acf308642d6c0e9bd41caf7820753f5 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 27 Jan 2022 12:50:07 +0530 Subject: [PATCH 163/380] selftests/powerpc/pmu/: Add interface test for mmcr1_comb field The testcase uses event code "0x26880" to verify the settings for different fields in Monitor Mode Control Register 1 (MMCR1). The field include PMCxCOMB. Checks if this field are translated correctly via perf interface to MMCR1 Add selftest for mmcr1 comb field. Signed-off-by: Athira Rajeev [mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220127072012.662451-16-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/.gitignore | 1 + .../powerpc/pmu/sampling_tests/Makefile | 3 +- .../pmu/sampling_tests/mmcr1_comb_test.c | 66 +++++++++++++++++++ 3 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_comb_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore index 1a3b7323acd1..f0ad66d78ee0 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore @@ -4,3 +4,4 @@ mmcr0_pmccext_test mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test +mmcr1_comb_test diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index 790a7ff21a90..da87ffddc568 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -2,7 +2,8 @@ CFLAGS += -m64 TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \ - mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test + mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \ + mmcr1_comb_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_comb_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_comb_test.c new file mode 100644 index 000000000000..5aea6499ee9a --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_comb_test.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +/* All successful D-side store dispatches for this thread that were L2 Miss */ +#define EventCode 0x46880 + +extern void thirty_two_instruction_loop_with_ll_sc(u64 loops, u64 *ll_sc_target); + +/* + * A perf sampling test for mmcr1 + * fields : comb. + */ +static int mmcr1_comb(void) +{ + struct event event; + u64 *intr_regs; + u64 dummy; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop_with_ll_sc(10000000, &dummy); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* + * Verify that comb field match with + * corresponding event code fields + */ + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, comb) != + get_mmcr1_comb(get_reg_value(intr_regs, "MMCR1"), 4)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr1_comb, "mmcr1_comb"); +} From ac575b2606bf99a0d01a054196e24e1ad82c194d Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Thu, 27 Jan 2022 12:50:09 +0530 Subject: [PATCH 164/380] selftests/powerpc/pmu/: Add interface test for mmcr2_l2l3 field The testcases uses event code 0x010000046080 to verify the l2l3 bit setting for Monitor Mode Control Register 2 (MMCR2). check if this bit is set correctly via perf interface in ISA v3.1 platform. Signed-off-by: Madhavan Srinivasan [mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220127072012.662451-18-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/.gitignore | 1 + .../powerpc/pmu/sampling_tests/Makefile | 2 +- .../pmu/sampling_tests/mmcr2_l2l3_test.c | 74 +++++++++++++++++++ 3 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_l2l3_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore index f0ad66d78ee0..84bfc4d0e51c 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore @@ -5,3 +5,4 @@ mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test mmcr1_comb_test +mmcr2_l2l3_test diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index da87ffddc568..ec4c758e91a9 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -3,7 +3,7 @@ CFLAGS += -m64 TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \ mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \ - mmcr1_comb_test + mmcr1_comb_test mmcr2_l2l3_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_l2l3_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_l2l3_test.c new file mode 100644 index 000000000000..ceca597016b2 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_l2l3_test.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Madhavan Srinivasan, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +/* All successful D-side store dispatches for this thread */ +#define EventCode 0x010000046080 + +#define MALLOC_SIZE (0x10000 * 10) /* Ought to be enough .. */ + +/* + * A perf sampling test for mmcr2 + * fields : l2l3 + */ +static int mmcr2_l2l3(void) +{ + struct event event; + u64 *intr_regs; + char *p; + int i; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + p = malloc(MALLOC_SIZE); + FAIL_IF(!p); + + for (i = 0; i < MALLOC_SIZE; i += 0x10000) + p[i] = i; + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* + * Verify that l2l3 field of MMCR2 match with + * corresponding event code field + */ + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, l2l3) != + get_mmcr2_l2l3(get_reg_value(intr_regs, "MMCR2"), 4)); + + event_close(&event); + free(p); + + return 0; +} + +int main(void) +{ + return test_harness(mmcr2_l2l3, "mmcr2_l2l3"); +} From 9ee241f1b1447c7e8ca90902ab1888aa9e7a3c00 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Thu, 27 Jan 2022 12:50:10 +0530 Subject: [PATCH 165/380] selftests/powerpc/pmu/: Add interface test for mmcr2_fcs_fch fields The testcases uses cycles event to verify the freeze counter settings in Monitor Mode Control Register 2 (MMCR2). Event modifier (exclude_kernel) setting is used for the event attribute to check the FCxS and FCxH ( Freeze counter in privileged and hypervisor state ) settings via perf interface. Signed-off-by: Madhavan Srinivasan [mpe: Add error checking, check MSR for MSR_HV, drop GET_MMCR_FIELD, add to .gitignore] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220127072012.662451-19-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/.gitignore | 1 + .../powerpc/pmu/sampling_tests/Makefile | 2 +- .../pmu/sampling_tests/mmcr2_fcs_fch_test.c | 85 +++++++++++++++++++ 3 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_fcs_fch_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore index 84bfc4d0e51c..58d7551f4c2a 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore @@ -6,3 +6,4 @@ mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test mmcr1_comb_test mmcr2_l2l3_test +mmcr2_fcs_fch_test diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index ec4c758e91a9..b4271509fe70 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -3,7 +3,7 @@ CFLAGS += -m64 TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \ mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \ - mmcr1_comb_test mmcr2_l2l3_test + mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_fcs_fch_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_fcs_fch_test.c new file mode 100644 index 000000000000..4e242fd61b25 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_fcs_fch_test.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Madhavan Srinivasan, IBM Corp. + */ + +#include +#include +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +static bool is_hv; + +static void sig_usr2_handler(int signum, siginfo_t *info, void *data) +{ + ucontext_t *uctx = data; + + is_hv = !!(uctx->uc_mcontext.gp_regs[PT_MSR] & MSR_HV); +} + +/* + * A perf sampling test for mmcr2 + * fields : fcs, fch. + */ +static int mmcr2_fcs_fch(void) +{ + struct sigaction sigact = { + .sa_sigaction = sig_usr2_handler, + .sa_flags = SA_SIGINFO + }; + struct event event; + u64 *intr_regs; + + FAIL_IF(sigaction(SIGUSR2, &sigact, NULL)); + FAIL_IF(kill(getpid(), SIGUSR2)); + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x1001e); + event.attr.sample_regs_intr = platform_extended_mask; + event.attr.exclude_kernel = 1; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* + * Verify that fcs and fch field of MMCR2 match + * with corresponding modifier fields. + */ + if (is_hv) + FAIL_IF(event.attr.exclude_kernel != + get_mmcr2_fch(get_reg_value(intr_regs, "MMCR2"), 1)); + else + FAIL_IF(event.attr.exclude_kernel != + get_mmcr2_fcs(get_reg_value(intr_regs, "MMCR2"), 1)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr2_fcs_fch, "mmcr2_fcs_fch"); +} From 02f02feb6b50c67171fd56bc3fd0fd96118c5c12 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Thu, 27 Jan 2022 12:50:11 +0530 Subject: [PATCH 166/380] selftests/powerpc/pmu/: Add interface test for mmcr3_src fields The testcase uses event code 0x1340000001c040 to verify the settings for different src fields in Monitor Mode Control Register 3 (MMCR3). Checks if these fields are translated correctly via perf interface to MMCR3 on ISA v3.1 platform. Signed-off-by: Kajol Jain [mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220127072012.662451-20-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/.gitignore | 1 + .../powerpc/pmu/sampling_tests/Makefile | 3 +- .../pmu/sampling_tests/mmcr3_src_test.c | 67 +++++++++++++++++++ 3 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr3_src_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore index 58d7551f4c2a..2969a9e9ba72 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore @@ -7,3 +7,4 @@ mmcr0_fc56_pmc56_test mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test +mmcr3_src_test diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index b4271509fe70..cd2704b173b3 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -3,7 +3,8 @@ CFLAGS += -m64 TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \ mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \ - mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test + mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test \ + mmcr3_src_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr3_src_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr3_src_test.c new file mode 100644 index 000000000000..e154e2a4cc3a --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr3_src_test.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop_with_ll_sc(u64 loops, u64 *ll_sc_target); + +/* The data cache was reloaded from local core's L3 due to a demand load */ +#define EventCode 0x1340000001c040 + +/* + * A perf sampling test for mmcr3 + * fields. + */ +static int mmcr3_src(void) +{ + struct event event; + u64 *intr_regs; + u64 dummy; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make event overflow */ + thirty_two_instruction_loop_with_ll_sc(1000000, &dummy); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* + * Verify that src field of MMCR3 match with + * corresponding event code field + */ + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, mmcr3_src) != + get_mmcr3_src(get_reg_value(intr_regs, "MMCR3"), 1)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr3_src, "mmcr3_src"); +} From 29cf373c5766e6bd1b97056d2d678a41777669aa Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Thu, 27 Jan 2022 12:50:12 +0530 Subject: [PATCH 167/380] selftests/powerpc/pmu: Add interface test for mmcra register fields The testcase uses event code 0x35340401e0 to verify the settings for different fields in Monitor Mode Control Register A (MMCRA). The fields include thresh_start, thresh_stop thresh_select, sdar mode, sample and marked bit. Checks if these fields are translated correctly via perf interface to MMCRA. Signed-off-by: Kajol Jain [mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220127072012.662451-21-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/.gitignore | 1 + .../powerpc/pmu/sampling_tests/Makefile | 2 +- .../mmcra_thresh_marked_sample_test.c | 80 +++++++++++++++++++ 3 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_marked_sample_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore index 2969a9e9ba72..0fce5a694684 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore @@ -8,3 +8,4 @@ mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test mmcr3_src_test +mmcra_thresh_marked_sample_test diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index cd2704b173b3..a785c6a173b9 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -4,7 +4,7 @@ CFLAGS += -m64 TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \ mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \ mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test \ - mmcr3_src_test + mmcr3_src_test mmcra_thresh_marked_sample_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_marked_sample_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_marked_sample_test.c new file mode 100644 index 000000000000..022cc1655eb5 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_marked_sample_test.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +/* + * Primary PMU event used here is PM_MRK_INST_CMPL (0x401e0) + * Threshold event selection used is issue to complete for cycles + * Sampling criteria is Load only sampling + */ +#define EventCode 0x35340401e0 + +extern void thirty_two_instruction_loop_with_ll_sc(u64 loops, u64 *ll_sc_target); + +/* A perf sampling test to test mmcra fields */ +static int mmcra_thresh_marked_sample(void) +{ + struct event event; + u64 *intr_regs; + u64 dummy; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop_with_ll_sc(1000000, &dummy); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* + * Verify that thresh sel/start/stop, marked, random sample + * eligibility, sdar mode and sample mode fields match with + * the corresponding event code fields + */ + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, thd_sel) != + get_mmcra_thd_sel(get_reg_value(intr_regs, "MMCRA"), 4)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, thd_start) != + get_mmcra_thd_start(get_reg_value(intr_regs, "MMCRA"), 4)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, thd_stop) != + get_mmcra_thd_stop(get_reg_value(intr_regs, "MMCRA"), 4)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, marked) != + get_mmcra_marked(get_reg_value(intr_regs, "MMCRA"), 4)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, sample >> 2) != + get_mmcra_rand_samp_elig(get_reg_value(intr_regs, "MMCRA"), 4)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, sample & 0x3) != + get_mmcra_sample_mode(get_reg_value(intr_regs, "MMCRA"), 4)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, sm) != + get_mmcra_sm(get_reg_value(intr_regs, "MMCRA"), 4)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcra_thresh_marked_sample, "mmcra_thresh_marked_sample"); +} From 607451ce0aa9bdff590db4d087173edba6d7a29d Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Tue, 1 Feb 2022 16:23:05 +0530 Subject: [PATCH 168/380] powerpc/fadump: register for fadump as early as possible Crash recovery (fadump) is setup in the userspace by some service. This service rebuilds initrd with dump capture capability, if it is not already dump capture capable before proceeding to register for firmware assisted dump (echo 1 > /sys/kernel/fadump/registered). But arming the kernel with crash recovery support does not have to wait for userspace configuration. So, register for fadump while setting it up itself. This can at worst lead to a scenario, where /proc/vmcore is ready afer crash but the initrd does not know how/where to offload it, which is always better than not having a /proc/vmcore at all due to incomplete configuration in the userspace at the time of crash. Commit 0823c68b054b ("powerpc/fadump: re-register firmware-assisted dump if already registered") ensures this change does not break userspace. Signed-off-by: Hari Bathini [mpe: Reword comment] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220201105305.155511-1-hbathini@linux.ibm.com --- arch/powerpc/kernel/fadump.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index d03e488cfe9c..cf92f3fb17d2 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1637,9 +1637,11 @@ int __init setup_fadump(void) if (fw_dump.ops->fadump_process(&fw_dump) < 0) fadump_invalidate_release_mem(); } - /* Initialize the kernel dump memory structure for FAD registration. */ - else if (fw_dump.reserve_dump_area_size) + /* Initialize the kernel dump memory structure and register with f/w */ + else if (fw_dump.reserve_dump_area_size) { fw_dump.ops->fadump_init_mem_struct(&fw_dump); + register_fadump(); + } /* * In case of panic, fadump is triggered via ppc_panic_event() @@ -1651,7 +1653,12 @@ int __init setup_fadump(void) return 1; } -subsys_initcall(setup_fadump); +/* + * Use subsys_initcall_sync() here because there is dependency with + * crash_save_vmcoreinfo_init(), which mush run first to ensure vmcoreinfo initialization + * is done before regisering with f/w. + */ +subsys_initcall_sync(setup_fadump); #else /* !CONFIG_PRESERVE_FA_DUMP */ /* Scan the Firmware Assisted dump configuration details. */ From 973e2e6462405d85d3e8bb02d516d5fe6d1193ed Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 25 Feb 2022 17:36:22 +0100 Subject: [PATCH 169/380] powerpc/interrupt: Remove struct interrupt_state Since commit ceff77efa4f8 ("powerpc/64e/interrupt: Use new interrupt context tracking scheme") struct interrupt_state has been empty and unused. Remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1d862ce3eab3da6ca7ac47d4a78a18f154462511.1645806970.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/interrupt.h | 32 +++++++++++----------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 5404f7abbcf8..f3b2c93a5db1 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -123,9 +123,6 @@ static inline void nap_adjust_return(struct pt_regs *regs) #endif } -struct interrupt_state { -}; - static inline void booke_restore_dbcr0(void) { #ifdef CONFIG_PPC_ADV_DEBUG_REGS @@ -138,7 +135,7 @@ static inline void booke_restore_dbcr0(void) #endif } -static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) +static inline void interrupt_enter_prepare(struct pt_regs *regs) { #ifdef CONFIG_PPC32 if (!arch_irq_disabled_regs(regs)) @@ -228,17 +225,17 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup * However interrupt_nmi_exit_prepare does return directly to regs, because * NMIs do not do "exit work" or replay soft-masked interrupts. */ -static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) +static inline void interrupt_exit_prepare(struct pt_regs *regs) { } -static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) +static inline void interrupt_async_enter_prepare(struct pt_regs *regs) { #ifdef CONFIG_PPC64 /* Ensure interrupt_enter_prepare does not enable MSR[EE] */ local_paca->irq_happened |= PACA_IRQ_HARD_DIS; #endif - interrupt_enter_prepare(regs, state); + interrupt_enter_prepare(regs); #ifdef CONFIG_PPC_BOOK3S_64 /* * RI=1 is set by interrupt_enter_prepare, so this thread flags access @@ -251,7 +248,7 @@ static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct in irq_enter(); } -static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) +static inline void interrupt_async_exit_prepare(struct pt_regs *regs) { /* * Adjust at exit so the main handler sees the true NIA. This must @@ -262,7 +259,7 @@ static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct int nap_adjust_return(regs); irq_exit(); - interrupt_exit_prepare(regs, state); + interrupt_exit_prepare(regs); } struct interrupt_nmi_state { @@ -447,13 +444,11 @@ static __always_inline void ____##func(struct pt_regs *regs); \ \ interrupt_handler void func(struct pt_regs *regs) \ { \ - struct interrupt_state state; \ - \ - interrupt_enter_prepare(regs, &state); \ + interrupt_enter_prepare(regs); \ \ ____##func (regs); \ \ - interrupt_exit_prepare(regs, &state); \ + interrupt_exit_prepare(regs); \ } \ NOKPROBE_SYMBOL(func); \ \ @@ -482,14 +477,13 @@ static __always_inline long ____##func(struct pt_regs *regs); \ \ interrupt_handler long func(struct pt_regs *regs) \ { \ - struct interrupt_state state; \ long ret; \ \ - interrupt_enter_prepare(regs, &state); \ + interrupt_enter_prepare(regs); \ \ ret = ____##func (regs); \ \ - interrupt_exit_prepare(regs, &state); \ + interrupt_exit_prepare(regs); \ \ return ret; \ } \ @@ -518,13 +512,11 @@ static __always_inline void ____##func(struct pt_regs *regs); \ \ interrupt_handler void func(struct pt_regs *regs) \ { \ - struct interrupt_state state; \ - \ - interrupt_async_enter_prepare(regs, &state); \ + interrupt_async_enter_prepare(regs); \ \ ____##func (regs); \ \ - interrupt_async_exit_prepare(regs, &state); \ + interrupt_async_exit_prepare(regs); \ } \ NOKPROBE_SYMBOL(func); \ \ From 749ed4a20657bcea66a6e082ca3dc0d228cbec80 Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Thu, 24 Feb 2022 15:23:12 -0300 Subject: [PATCH 170/380] powerpc/mm/numa: skip NUMA_NO_NODE onlining in parse_numa_properties() Executing node_set_online() when nid = NUMA_NO_NODE results in an undefined behavior. node_set_online() will call node_set_state(), into __node_set(), into set_bit(), and since NUMA_NO_NODE is -1 we'll end up doing a negative shift operation inside arch/powerpc/include/asm/bitops.h. This potential UB was detected running a kernel with CONFIG_UBSAN. The behavior was introduced by commit 10f78fd0dabb ("powerpc/numa: Fix a regression on memoryless node 0"), where the check for nid > 0 was removed to fix a problem that was happening with nid = 0, but the result is that now we're trying to online NUMA_NO_NODE nids as well. Checking for nid >= 0 will allow node 0 to be onlined while avoiding this UB with NUMA_NO_NODE. Fixes: 10f78fd0dabb ("powerpc/numa: Fix a regression on memoryless node 0") Reported-by: Ping Fang Signed-off-by: Daniel Henrique Barboza Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220224182312.1012527-1-danielhb413@gmail.com --- arch/powerpc/mm/numa.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 9d5f710d2c20..b9b7fefbb64b 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -956,7 +956,9 @@ static int __init parse_numa_properties(void) of_node_put(cpu); } - node_set_online(nid); + /* node_set_online() is an UB if 'nid' is negative */ + if (likely(nid >= 0)) + node_set_online(nid); } get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); From 2863dd2db23e0407f6c50b8ba5c0e55abef894f1 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 15 Feb 2022 22:28:58 +1100 Subject: [PATCH 171/380] powerpc/Makefile: Don't pass -mcpu=powerpc64 when building 32-bit When CONFIG_GENERIC_CPU=y (true for all our defconfigs) we pass -mcpu=powerpc64 to the compiler, even when we're building a 32-bit kernel. This happens because we have an ifdef CONFIG_PPC_BOOK3S_64/else block in the Makefile that was written before 32-bit supported GENERIC_CPU. Prior to that the else block only applied to 64-bit Book3E. The GCC man page says -mcpu=powerpc64 "[specifies] a pure ... 64-bit big endian PowerPC ... architecture machine [type], with an appropriate, generic processor model assumed for scheduling purposes." It's unclear how that interacts with -m32, which we are also passing, although obviously -m32 is taking precedence in some sense, as the 32-bit kernel only contains 32-bit instructions. This was noticed by inspection, not via any bug reports, but it does affect code generation. Comparing before/after code generation, there are some changes to instruction scheduling, and the after case (with -mcpu=powerpc64 removed) the compiler seems more keen to use r8. Fix it by making the else case only apply to Book3E 64, which excludes 32-bit. Fixes: 0e00a8c9fd92 ("powerpc: Allow CPU selection also on PPC32") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220215112858.304779-1-mpe@ellerman.id.au --- arch/powerpc/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index ddc5a706760a..1e1ef4352f62 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -171,7 +171,7 @@ else CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,$(call cc-option,-mtune=power5)) CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mcpu=power5,-mcpu=power4) endif -else +else ifdef CONFIG_PPC_BOOK3E_64 CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64 endif From a633cb1edddaa643fadc70abc88f89a408fa834a Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Thu, 24 Feb 2022 17:22:13 +0100 Subject: [PATCH 172/380] powerpc/lib/sstep: Fix 'sthcx' instruction Looks like there been a copy paste mistake when added the instruction 'stbcx' twice and one was probably meant to be 'sthcx'. Changing to 'sthcx' from 'stbcx'. Fixes: 350779a29f11 ("powerpc: Handle most loads and stores in instruction emulation code") Cc: stable@vger.kernel.org # v4.14+ Reported-by: Arnd Bergmann Signed-off-by: Anders Roxell Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220224162215.3406642-1-anders.roxell@linaro.org --- arch/powerpc/lib/sstep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index ca38d026fd88..31de2c5b586e 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -3373,7 +3373,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) __put_user_asmx(op->val, ea, err, "stbcx.", cr); break; case 2: - __put_user_asmx(op->val, ea, err, "stbcx.", cr); + __put_user_asmx(op->val, ea, err, "sthcx.", cr); break; #endif case 4: From 8667d0d64dd1f84fd41b5897fd87fa9113ae05e3 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Thu, 24 Feb 2022 17:22:14 +0100 Subject: [PATCH 173/380] powerpc: Fix build errors with newer binutils Building tinyconfig with gcc (Debian 11.2.0-16) and assembler (Debian 2.37.90.20220207) the following build error shows up: {standard input}: Assembler messages: {standard input}:1190: Error: unrecognized opcode: `stbcix' {standard input}:1433: Error: unrecognized opcode: `lwzcix' {standard input}:1453: Error: unrecognized opcode: `stbcix' {standard input}:1460: Error: unrecognized opcode: `stwcix' {standard input}:1596: Error: unrecognized opcode: `stbcix' ... Rework to add assembler directives [1] around the instruction. Going through them one by one shows that the changes should be safe. Like __get_user_atomic_128_aligned() is only called in p9_hmi_special_emu(), which according to the name is specific to power9. And __raw_rm_read*() are only called in things that are powernv or book3s_hv specific. [1] https://sourceware.org/binutils/docs/as/PowerPC_002dPseudo.html#PowerPC_002dPseudo Cc: stable@vger.kernel.org Co-developed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann Signed-off-by: Anders Roxell Reviewed-by: Segher Boessenkool [mpe: Make commit subject more descriptive] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220224162215.3406642-2-anders.roxell@linaro.org --- arch/powerpc/include/asm/io.h | 40 ++++++++++++++++++++++------ arch/powerpc/include/asm/uaccess.h | 3 +++ arch/powerpc/platforms/powernv/rng.c | 6 ++++- 3 files changed, 40 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index beba4979bff9..fee979d3a1aa 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -359,25 +359,37 @@ static inline void __raw_writeq_be(unsigned long v, volatile void __iomem *addr) */ static inline void __raw_rm_writeb(u8 val, volatile void __iomem *paddr) { - __asm__ __volatile__("stbcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + stbcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } static inline void __raw_rm_writew(u16 val, volatile void __iomem *paddr) { - __asm__ __volatile__("sthcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + sthcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } static inline void __raw_rm_writel(u32 val, volatile void __iomem *paddr) { - __asm__ __volatile__("stwcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + stwcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr) { - __asm__ __volatile__("stdcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + stdcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } @@ -389,7 +401,10 @@ static inline void __raw_rm_writeq_be(u64 val, volatile void __iomem *paddr) static inline u8 __raw_rm_readb(volatile void __iomem *paddr) { u8 ret; - __asm__ __volatile__("lbzcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + lbzcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } @@ -397,7 +412,10 @@ static inline u8 __raw_rm_readb(volatile void __iomem *paddr) static inline u16 __raw_rm_readw(volatile void __iomem *paddr) { u16 ret; - __asm__ __volatile__("lhzcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + lhzcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } @@ -405,7 +423,10 @@ static inline u16 __raw_rm_readw(volatile void __iomem *paddr) static inline u32 __raw_rm_readl(volatile void __iomem *paddr) { u32 ret; - __asm__ __volatile__("lwzcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + lwzcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } @@ -413,7 +434,10 @@ static inline u32 __raw_rm_readl(volatile void __iomem *paddr) static inline u64 __raw_rm_readq(volatile void __iomem *paddr) { u64 ret; - __asm__ __volatile__("ldcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + ldcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 63316100080c..4a35423f766d 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -125,8 +125,11 @@ do { \ */ #define __get_user_atomic_128_aligned(kaddr, uaddr, err) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine altivec\n" \ "1: lvx 0,0,%1 # get user\n" \ " stvx 0,0,%2 # put kernel\n" \ + ".machine pop\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%3\n" \ diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index b4386714494a..e3d44b36ae98 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -43,7 +43,11 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val) unsigned long parity; /* Calculate the parity of the value */ - asm ("popcntd %0,%1" : "=r" (parity) : "r" (val)); + asm (".machine push; \ + .machine power7; \ + popcntd %0,%1; \ + .machine pop;" + : "=r" (parity) : "r" (val)); /* xor our value with the previous mask */ val ^= rng->mask; From 8219d31effa7be5dbc7ff915d7970672e028c701 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Thu, 24 Feb 2022 17:22:15 +0100 Subject: [PATCH 174/380] powerpc/lib/sstep: Fix build errors with newer binutils Building tinyconfig with gcc (Debian 11.2.0-16) and assembler (Debian 2.37.90.20220207) the following build error shows up: {standard input}: Assembler messages: {standard input}:10576: Error: unrecognized opcode: `stbcx.' {standard input}:10680: Error: unrecognized opcode: `lharx' {standard input}:10694: Error: unrecognized opcode: `lbarx' Rework to add assembler directives [1] around the instruction. The problem with this might be that we can trick a power6 into single-stepping through an stbcx. for instance, and it will execute that in kernel mode. [1] https://sourceware.org/binutils/docs/as/PowerPC_002dPseudo.html#PowerPC_002dPseudo Fixes: 350779a29f11 ("powerpc: Handle most loads and stores in instruction emulation code") Cc: stable@vger.kernel.org # v4.14+ Co-developed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann Signed-off-by: Anders Roxell Reviewed-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220224162215.3406642-3-anders.roxell@linaro.org --- arch/powerpc/lib/sstep.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 31de2c5b586e..ef6ea8e156ed 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1089,7 +1089,10 @@ NOKPROBE_SYMBOL(emulate_dcbz); #define __put_user_asmx(x, addr, err, op, cr) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine power8\n" \ "1: " op " %2,0,%3\n" \ + ".machine pop\n" \ " mfcr %1\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ @@ -1102,7 +1105,10 @@ NOKPROBE_SYMBOL(emulate_dcbz); #define __get_user_asmx(x, addr, err, op) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine power8\n" \ "1: "op" %1,0,%2\n" \ + ".machine pop\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%3\n" \ From 200ed341b8646ea893d5466974d6e107c46237ba Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 23 Feb 2022 22:04:32 -0800 Subject: [PATCH 175/380] mips: Implement "current_stack_pointer" To follow the existing per-arch conventions replace open-coded uses of asm "sp" as "current_stack_pointer". This will let it be used in non-arch places (like HARDENED_USERCOPY). Cc: Thomas Bogendoerfer Cc: Marc Zyngier Cc: Guenter Roeck Cc: Mark Rutland Cc: Yanteng Si Cc: linux-mips@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Thomas Bogendoerfer --- arch/mips/Kconfig | 1 + arch/mips/include/asm/thread_info.h | 2 ++ arch/mips/kernel/irq.c | 3 +-- arch/mips/lib/uncached.c | 4 +--- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 59dabfd5a129..451ee7abd22a 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -4,6 +4,7 @@ config MIPS default y select ARCH_32BIT_OFF_T if !64BIT select ARCH_BINFMT_ELF_STATE if MIPS_FP_SUPPORT + select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL if !64BIT select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_KCOV diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index 0b17aaa9e012..4463348d2372 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -69,6 +69,8 @@ static inline struct thread_info *current_thread_info(void) return __current_thread_info; } +register unsigned long current_stack_pointer __asm__("sp"); + #endif /* !__ASSEMBLY__ */ /* thread information allocation */ diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c index 5e11582fe308..fc313c49a417 100644 --- a/arch/mips/kernel/irq.c +++ b/arch/mips/kernel/irq.c @@ -75,9 +75,8 @@ void __init init_IRQ(void) #ifdef CONFIG_DEBUG_STACKOVERFLOW static inline void check_stack_overflow(void) { - unsigned long sp; + unsigned long sp = current_stack_pointer; - __asm__ __volatile__("move %0, $sp" : "=r" (sp)); sp &= THREAD_MASK; /* diff --git a/arch/mips/lib/uncached.c b/arch/mips/lib/uncached.c index f80a67c092b6..f8d4ca046c3e 100644 --- a/arch/mips/lib/uncached.c +++ b/arch/mips/lib/uncached.c @@ -40,9 +40,7 @@ unsigned long run_uncached(void *func) register long ret __asm__("$2"); long lfunc = (long)func, ufunc; long usp; - long sp; - - __asm__("move %0, $sp" : "=r" (sp)); + long sp = current_stack_pointer; if (sp >= (long)CKSEG0 && sp < (long)CKSEG2) usp = CKSEG1ADDR(sp); From 4a0a1436053b17e50b7c88858fb0824326641793 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Mon, 28 Feb 2022 15:35:37 +0800 Subject: [PATCH 176/380] mips: ralink: fix a refcount leak in ill_acc_of_setup() of_node_put(np) needs to be called when pdev == NULL. Signed-off-by: Hangyu Hua Signed-off-by: Thomas Bogendoerfer --- arch/mips/ralink/ill_acc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/ralink/ill_acc.c b/arch/mips/ralink/ill_acc.c index 115a69fc20ca..f395ae218470 100644 --- a/arch/mips/ralink/ill_acc.c +++ b/arch/mips/ralink/ill_acc.c @@ -61,6 +61,7 @@ static int __init ill_acc_of_setup(void) pdev = of_find_device_by_node(np); if (!pdev) { pr_err("%pOFn: failed to lookup pdev\n", np); + of_node_put(np); return -EINVAL; } From 42b01a553a56d9bc7c75b700fd274f1ec4a3763f Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sat, 29 Jan 2022 00:34:13 +0100 Subject: [PATCH 177/380] s390: always use the packed stack layout -mpacked-stack option has been supported by both minimum gcc and clang versions for a while. With commit e2bc3e91d91e ("scripts/min-tool-version.sh: Raise minimum clang version to 13.0.0 for s390") minimum clang version now also supports a combination of flags -mpacked-stack -mbackchain -pg -mfentry and fulfills all requirements to always enable the packed stack layout. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 16 +--------------- arch/s390/Makefile | 9 ++------- arch/s390/include/asm/stacktrace.h | 10 +--------- arch/s390/kernel/mcount.S | 6 +----- arch/s390/net/bpf_jit_comp.c | 1 - 5 files changed, 5 insertions(+), 37 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index be9f39fd06df..a492376d6e3f 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -155,7 +155,7 @@ config S390 select HAVE_DYNAMIC_FTRACE_WITH_ARGS select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS select HAVE_DYNAMIC_FTRACE_WITH_REGS - select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES + select HAVE_EBPF_JIT if HAVE_MARCH_Z196_FEATURES select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_FAST_GUP select HAVE_FENTRY @@ -656,20 +656,6 @@ config MAX_PHYSMEM_BITS Increasing the number of bits also increases the kernel image size. By default 46 bits (64TB) are supported. -config PACK_STACK - def_bool y - prompt "Pack kernel stack" - help - This option enables the compiler option -mkernel-backchain if it - is available. If the option is available the compiler supports - the new stack layout which dramatically reduces the minimum stack - frame size. With an old compiler a non-leaf function needs a - minimum of 96 bytes on 31 bit and 160 bytes on 64 bit. With - -mkernel-backchain the minimum size drops to 16 byte on 31 bit - and 24 byte on 64 bit. - - Say Y if you are unsure. - config CHECK_STACK def_bool y depends on !VMAP_STACK diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 609e3697324b..2edf25b44c4b 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -21,7 +21,7 @@ endif aflags_dwarf := -Wa,-gdwarf-2 KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__ KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf)) -KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 +KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbackchain KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables @@ -68,11 +68,6 @@ cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include # cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls -ifneq ($(call cc-option,-mpacked-stack -mbackchain -msoft-float),) -cflags-$(CONFIG_PACK_STACK) += -mpacked-stack -D__PACK_STACK -aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK -endif - KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y) KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y) @@ -111,7 +106,7 @@ endif # Test CFI features of binutils cfi := $(call as-instr,.cfi_startproc\n.cfi_val_offset 15$(comma)-160\n.cfi_endproc,-DCONFIG_AS_CFI_VAL_OFFSET=1) -KBUILD_CFLAGS += -mbackchain -msoft-float $(cflags-y) +KBUILD_CFLAGS += -mpacked-stack -mbackchain -msoft-float $(cflags-y) KBUILD_CFLAGS += -pipe -Wno-sign-compare KBUILD_CFLAGS += -fno-asynchronous-unwind-tables $(cfi) KBUILD_AFLAGS += $(aflags-y) $(cfi) diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index dd00d98804ec..275f4258fbd5 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -36,22 +36,14 @@ static inline bool on_stack(struct stack_info *info, /* * Stack layout of a C stack frame. + * Kernel uses the packed stack layout (-mpacked-stack). */ -#ifndef __PACK_STACK -struct stack_frame { - unsigned long back_chain; - unsigned long empty1[5]; - unsigned long gprs[10]; - unsigned int empty2[8]; -}; -#else struct stack_frame { unsigned long empty1[5]; unsigned int empty2[8]; unsigned long gprs[10]; unsigned long back_chain; }; -#endif /* * Unlike current_stack_pointer() which simply returns current value of %r15 diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 9d90b8bc6692..b88205224f3c 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -26,12 +26,8 @@ ENDPROC(ftrace_stub) #define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) #define STACK_PTREGS_ORIG_GPR2 (STACK_PTREGS + __PT_ORIG_GPR2) #define STACK_PTREGS_FLAGS (STACK_PTREGS + __PT_FLAGS) -#ifdef __PACK_STACK -/* allocate just enough for r14, r15 and backchain */ +/* packed stack: allocate just enough for r14, r15 and backchain */ #define TRACED_FUNC_FRAME_SIZE 24 -#else -#define TRACED_FUNC_FRAME_SIZE STACK_FRAME_OVERHEAD -#endif .macro ftrace_regs_entry, allregs=0 stg %r14,(__SF_GPRS+8*8)(%r15) # save traced function caller diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 9ff2bd83aad7..df5d4da06643 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -7,7 +7,6 @@ * - HAVE_MARCH_Z196_FEATURES: laal, laalg * - HAVE_MARCH_Z10_FEATURES: msfi, cgrj, clgrj * - HAVE_MARCH_Z9_109_FEATURES: alfi, llilf, clfi, oilf, nilf - * - PACK_STACK * - 64BIT * * Copyright IBM Corp. 2012,2015 From 81eac9079663bba7020b58c896baca839a6af8f0 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 1 Feb 2022 19:54:22 +0100 Subject: [PATCH 178/380] s390/test_unwind: show tests as skipped if unsupported Signed-off-by: Vasily Gorbik --- arch/s390/lib/test_unwind.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index bc7973359ae2..653bf170ee50 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -136,7 +136,6 @@ static __always_inline unsigned long get_psw_addr(void) return psw_addr; } -#ifdef CONFIG_KPROBES static int pgm_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct unwindme *u = unwindme; @@ -145,7 +144,6 @@ static int pgm_pre_handler(struct kprobe *p, struct pt_regs *regs) (u->flags & UWM_SP) ? u->sp : 0); return 0; } -#endif /* This function may or may not appear in the backtrace. */ static noinline int unwindme_func4(struct unwindme *u) @@ -157,11 +155,13 @@ static noinline int unwindme_func4(struct unwindme *u) wait_event(u->task_wq, kthread_should_park()); kthread_parkme(); return 0; -#ifdef CONFIG_KPROBES } else if (u->flags & UWM_PGM) { struct kprobe kp; int ret; + if (!IS_ENABLED(CONFIG_KPROBES)) + kunit_skip(current_test, "requires CONFIG_KPROBES"); + unwindme = u; memset(&kp, 0, sizeof(kp)); kp.symbol_name = "do_report_trap"; @@ -185,7 +185,6 @@ static noinline int unwindme_func4(struct unwindme *u) unregister_kprobe(&kp); unwindme = NULL; return u->ret; -#endif } else { struct pt_regs regs; @@ -327,7 +326,6 @@ static const struct test_params param_list[] = { .name = "UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS"}, {.flags = UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK, .name = "UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK"}, - #ifdef CONFIG_KPROBES {.flags = UWM_PGM, .name = "UWM_PGM"}, {.flags = UWM_PGM | UWM_SP, .name = "UWM_PGM | UWM_SP"}, @@ -335,7 +333,6 @@ static const struct test_params param_list[] = { .name = "UWM_PGM | UWM_REGS"}, {.flags = UWM_PGM | UWM_SP | UWM_REGS, .name = "UWM_PGM | UWM_SP | UWM_REGS"}, - #endif }; /* From 93bd3232448f699f47d06590aeb56edfebab4495 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 31 Jan 2022 19:00:56 +0100 Subject: [PATCH 179/380] s390/test_unwind: minor cleanup - make current_test static - use current_test consistently - add TEST_WITH_FLAGS macro to contract parametrized tests definition Signed-off-by: Vasily Gorbik --- arch/s390/lib/test_unwind.c | 72 +++++++++++++++---------------------- 1 file changed, 28 insertions(+), 44 deletions(-) diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index 653bf170ee50..744b47692399 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -16,7 +16,7 @@ #include #include -struct kunit *current_test; +static struct kunit *current_test; #define BT_BUF_SIZE (PAGE_SIZE * 4) @@ -254,7 +254,7 @@ static int test_unwind_irq(struct unwindme *u) } /* Spawns a task and passes it to test_unwind(). */ -static int test_unwind_task(struct kunit *test, struct unwindme *u) +static int test_unwind_task(struct unwindme *u) { struct task_struct *task; int ret; @@ -269,7 +269,7 @@ static int test_unwind_task(struct kunit *test, struct unwindme *u) */ task = kthread_run(unwindme_func1, u, "%s", __func__); if (IS_ERR(task)) { - kunit_err(test, "kthread_run() failed\n"); + kunit_err(current_test, "kthread_run() failed\n"); return PTR_ERR(task); } /* @@ -292,47 +292,31 @@ struct test_params { /* * Create required parameter list for tests */ +#define TEST_WITH_FLAGS(f) { .flags = f, .name = #f } static const struct test_params param_list[] = { - {.flags = UWM_DEFAULT, .name = "UWM_DEFAULT"}, - {.flags = UWM_SP, .name = "UWM_SP"}, - {.flags = UWM_REGS, .name = "UWM_REGS"}, - {.flags = UWM_SWITCH_STACK, - .name = "UWM_SWITCH_STACK"}, - {.flags = UWM_SP | UWM_REGS, - .name = "UWM_SP | UWM_REGS"}, - {.flags = UWM_CALLER | UWM_SP, - .name = "WM_CALLER | UWM_SP"}, - {.flags = UWM_CALLER | UWM_SP | UWM_REGS, - .name = "UWM_CALLER | UWM_SP | UWM_REGS"}, - {.flags = UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK, - .name = "UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK"}, - {.flags = UWM_THREAD, .name = "UWM_THREAD"}, - {.flags = UWM_THREAD | UWM_SP, - .name = "UWM_THREAD | UWM_SP"}, - {.flags = UWM_THREAD | UWM_CALLER | UWM_SP, - .name = "UWM_THREAD | UWM_CALLER | UWM_SP"}, - {.flags = UWM_IRQ, .name = "UWM_IRQ"}, - {.flags = UWM_IRQ | UWM_SWITCH_STACK, - .name = "UWM_IRQ | UWM_SWITCH_STACK"}, - {.flags = UWM_IRQ | UWM_SP, - .name = "UWM_IRQ | UWM_SP"}, - {.flags = UWM_IRQ | UWM_REGS, - .name = "UWM_IRQ | UWM_REGS"}, - {.flags = UWM_IRQ | UWM_SP | UWM_REGS, - .name = "UWM_IRQ | UWM_SP | UWM_REGS"}, - {.flags = UWM_IRQ | UWM_CALLER | UWM_SP, - .name = "UWM_IRQ | UWM_CALLER | UWM_SP"}, - {.flags = UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS, - .name = "UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS"}, - {.flags = UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK, - .name = "UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK"}, - {.flags = UWM_PGM, .name = "UWM_PGM"}, - {.flags = UWM_PGM | UWM_SP, - .name = "UWM_PGM | UWM_SP"}, - {.flags = UWM_PGM | UWM_REGS, - .name = "UWM_PGM | UWM_REGS"}, - {.flags = UWM_PGM | UWM_SP | UWM_REGS, - .name = "UWM_PGM | UWM_SP | UWM_REGS"}, + TEST_WITH_FLAGS(UWM_DEFAULT), + TEST_WITH_FLAGS(UWM_SP), + TEST_WITH_FLAGS(UWM_REGS), + TEST_WITH_FLAGS(UWM_SWITCH_STACK), + TEST_WITH_FLAGS(UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_CALLER | UWM_SP), + TEST_WITH_FLAGS(UWM_CALLER | UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK), + TEST_WITH_FLAGS(UWM_THREAD), + TEST_WITH_FLAGS(UWM_THREAD | UWM_SP), + TEST_WITH_FLAGS(UWM_THREAD | UWM_CALLER | UWM_SP), + TEST_WITH_FLAGS(UWM_IRQ), + TEST_WITH_FLAGS(UWM_IRQ | UWM_SWITCH_STACK), + TEST_WITH_FLAGS(UWM_IRQ | UWM_SP), + TEST_WITH_FLAGS(UWM_IRQ | UWM_REGS), + TEST_WITH_FLAGS(UWM_IRQ | UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_IRQ | UWM_CALLER | UWM_SP), + TEST_WITH_FLAGS(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK), + TEST_WITH_FLAGS(UWM_PGM), + TEST_WITH_FLAGS(UWM_PGM | UWM_SP), + TEST_WITH_FLAGS(UWM_PGM | UWM_REGS), + TEST_WITH_FLAGS(UWM_PGM | UWM_SP | UWM_REGS), }; /* @@ -357,7 +341,7 @@ static void test_unwind_flags(struct kunit *test) params = (const struct test_params *)test->param_value; u.flags = params->flags; if (u.flags & UWM_THREAD) - KUNIT_EXPECT_EQ(test, 0, test_unwind_task(test, &u)); + KUNIT_EXPECT_EQ(test, 0, test_unwind_task(&u)); else if (u.flags & UWM_IRQ) KUNIT_EXPECT_EQ(test, 0, test_unwind_irq(&u)); else From 8a0c9705502701cc6a5d87d70bc6b631ec939265 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 1 Feb 2022 21:04:16 +0100 Subject: [PATCH 180/380] s390/test_unwind: add "backtrace" module parameter By default no backtraces are printed when a test succeeds, but sometimes it is useful to spot issues automated test doesn't cover. Add "backtrace" module parameter to force it. Signed-off-by: Vasily Gorbik --- arch/s390/lib/test_unwind.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index 744b47692399..8be9ca263127 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -20,6 +20,10 @@ static struct kunit *current_test; #define BT_BUF_SIZE (PAGE_SIZE * 4) +static bool force_bt; +module_param_named(backtrace, force_bt, bool, 0444); +MODULE_PARM_DESC(backtrace, "print backtraces for all tests"); + /* * To avoid printk line limit split backtrace by lines */ @@ -98,7 +102,7 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs, kunit_err(current_test, "Maximum number of frames exceeded\n"); ret = -EINVAL; } - if (ret) + if (ret || force_bt) print_backtrace(bt); kfree(bt); return ret; From 829ec7491c401e4a3068955a438578d2c2ae8acc Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 31 Jan 2022 19:06:52 +0100 Subject: [PATCH 181/380] s390/test_unwind: add ftrace test Signed-off-by: Vasily Gorbik --- arch/s390/lib/test_unwind.c | 59 +++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index 8be9ca263127..2224694a0888 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -129,6 +130,7 @@ static struct unwindme *unwindme; #define UWM_SWITCH_STACK 0x10 /* Use call_on_stack. */ #define UWM_IRQ 0x20 /* Unwind from irq context. */ #define UWM_PGM 0x40 /* Unwind from program check handler. */ +#define UWM_FTRACE 0x80 /* Unwind from ftrace handler. */ static __always_inline unsigned long get_psw_addr(void) { @@ -149,6 +151,57 @@ static int pgm_pre_handler(struct kprobe *p, struct pt_regs *regs) return 0; } +static void notrace __used test_unwind_ftrace_handler(unsigned long ip, + unsigned long parent_ip, + struct ftrace_ops *fops, + struct ftrace_regs *fregs) +{ + struct unwindme *u = (struct unwindme *)fregs->regs.gprs[2]; + + u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? &fregs->regs : NULL, + (u->flags & UWM_SP) ? u->sp : 0); +} + +static noinline int test_unwind_ftraced_func(struct unwindme *u) +{ + return READ_ONCE(u)->ret; +} + +static int test_unwind_ftrace(struct unwindme *u) +{ + struct ftrace_ops *fops; + int ret; + +#ifndef CONFIG_DYNAMIC_FTRACE + kunit_skip(current_test, "requires CONFIG_DYNAMIC_FTRACE"); + fops = NULL; /* used */ +#else + fops = kunit_kzalloc(current_test, sizeof(*fops), GFP_KERNEL); + fops->func = test_unwind_ftrace_handler; + fops->flags = FTRACE_OPS_FL_DYNAMIC | + FTRACE_OPS_FL_RECURSION | + FTRACE_OPS_FL_SAVE_REGS | + FTRACE_OPS_FL_PERMANENT; +#endif + + ret = ftrace_set_filter_ip(fops, (unsigned long)test_unwind_ftraced_func, 0, 0); + if (ret) { + kunit_err(current_test, "failed to set ftrace filter (%d)\n", ret); + return -1; + } + + ret = register_ftrace_function(fops); + if (!ret) { + ret = test_unwind_ftraced_func(u); + unregister_ftrace_function(fops); + } else { + kunit_err(current_test, "failed to register ftrace handler (%d)\n", ret); + } + + ftrace_set_filter_ip(fops, (unsigned long)test_unwind_ftraced_func, 1, 0); + return ret; +} + /* This function may or may not appear in the backtrace. */ static noinline int unwindme_func4(struct unwindme *u) { @@ -189,6 +242,8 @@ static noinline int unwindme_func4(struct unwindme *u) unregister_kprobe(&kp); unwindme = NULL; return u->ret; + } else if (u->flags & UWM_FTRACE) { + return test_unwind_ftrace(u); } else { struct pt_regs regs; @@ -321,6 +376,10 @@ static const struct test_params param_list[] = { TEST_WITH_FLAGS(UWM_PGM | UWM_SP), TEST_WITH_FLAGS(UWM_PGM | UWM_REGS), TEST_WITH_FLAGS(UWM_PGM | UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_FTRACE), + TEST_WITH_FLAGS(UWM_FTRACE | UWM_SP), + TEST_WITH_FLAGS(UWM_FTRACE | UWM_REGS), + TEST_WITH_FLAGS(UWM_FTRACE | UWM_SP | UWM_REGS), }; /* From 9ba142f472c1e4ec514f3bad1134b6b6ad8f6c13 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 3 Feb 2022 00:49:41 +0100 Subject: [PATCH 182/380] s390/test_unwind: fix and extend kprobes test Running kprobe test on a kernel built with clang 14 didn't actually trigger pgm_pre_handler() and no unwinder code was called. Even though do_report_trap() is a global symbol, clang inlined it in several local functions including illegal_op() handler, so that kprobbing a global symbol didn't have a desired effect. To achieve the same test result (unwinding from a program check handler) introduce a local function and probe an instruction in the middle, so that kprobe doesn't take KPROBE_ON_FTRACE path. While at it, add another test for KPROBE_ON_FTRACE. Signed-off-by: Vasily Gorbik --- arch/s390/lib/test_unwind.c | 83 ++++++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 33 deletions(-) diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index 2224694a0888..b209014ce426 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -129,8 +129,9 @@ static struct unwindme *unwindme; #define UWM_CALLER 0x8 /* Unwind starting from caller. */ #define UWM_SWITCH_STACK 0x10 /* Use call_on_stack. */ #define UWM_IRQ 0x20 /* Unwind from irq context. */ -#define UWM_PGM 0x40 /* Unwind from program check handler. */ -#define UWM_FTRACE 0x80 /* Unwind from ftrace handler. */ +#define UWM_PGM 0x40 /* Unwind from program check handler */ +#define UWM_KPROBE_ON_FTRACE 0x80 /* Unwind from kprobe handler called via ftrace. */ +#define UWM_FTRACE 0x100 /* Unwind from ftrace handler. */ static __always_inline unsigned long get_psw_addr(void) { @@ -142,7 +143,7 @@ static __always_inline unsigned long get_psw_addr(void) return psw_addr; } -static int pgm_pre_handler(struct kprobe *p, struct pt_regs *regs) +static int kprobe_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct unwindme *u = unwindme; @@ -151,6 +152,46 @@ static int pgm_pre_handler(struct kprobe *p, struct pt_regs *regs) return 0; } +extern const char test_unwind_kprobed_insn[]; + +static noinline void test_unwind_kprobed_func(void) +{ + asm volatile( + " nopr %%r7\n" + "test_unwind_kprobed_insn:\n" + " nopr %%r7\n" + :); +} + +static int test_unwind_kprobe(struct unwindme *u) +{ + struct kprobe kp; + int ret; + + if (!IS_ENABLED(CONFIG_KPROBES)) + kunit_skip(current_test, "requires CONFIG_KPROBES"); + if (!IS_ENABLED(CONFIG_KPROBES_ON_FTRACE) && u->flags & UWM_KPROBE_ON_FTRACE) + kunit_skip(current_test, "requires CONFIG_KPROBES_ON_FTRACE"); + + u->ret = -1; /* make sure kprobe is called */ + unwindme = u; + memset(&kp, 0, sizeof(kp)); + kp.pre_handler = kprobe_pre_handler; + kp.addr = u->flags & UWM_KPROBE_ON_FTRACE ? + (kprobe_opcode_t *)test_unwind_kprobed_func : + (kprobe_opcode_t *)test_unwind_kprobed_insn; + ret = register_kprobe(&kp); + if (ret < 0) { + kunit_err(current_test, "register_kprobe failed %d\n", ret); + return -EINVAL; + } + + test_unwind_kprobed_func(); + unregister_kprobe(&kp); + unwindme = NULL; + return u->ret; +} + static void notrace __used test_unwind_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *fops, @@ -212,36 +253,8 @@ static noinline int unwindme_func4(struct unwindme *u) wait_event(u->task_wq, kthread_should_park()); kthread_parkme(); return 0; - } else if (u->flags & UWM_PGM) { - struct kprobe kp; - int ret; - - if (!IS_ENABLED(CONFIG_KPROBES)) - kunit_skip(current_test, "requires CONFIG_KPROBES"); - - unwindme = u; - memset(&kp, 0, sizeof(kp)); - kp.symbol_name = "do_report_trap"; - kp.pre_handler = pgm_pre_handler; - ret = register_kprobe(&kp); - if (ret < 0) { - kunit_err(current_test, "register_kprobe failed %d\n", ret); - return -EINVAL; - } - - /* - * Trigger operation exception; use insn notation to bypass - * llvm's integrated assembler sanity checks. - */ - asm volatile( - " .insn e,0x0000\n" /* illegal opcode */ - "0: nopr %%r7\n" - EX_TABLE(0b, 0b) - :); - - unregister_kprobe(&kp); - unwindme = NULL; - return u->ret; + } else if (u->flags & (UWM_PGM | UWM_KPROBE_ON_FTRACE)) { + return test_unwind_kprobe(u); } else if (u->flags & UWM_FTRACE) { return test_unwind_ftrace(u); } else { @@ -376,6 +389,10 @@ static const struct test_params param_list[] = { TEST_WITH_FLAGS(UWM_PGM | UWM_SP), TEST_WITH_FLAGS(UWM_PGM | UWM_REGS), TEST_WITH_FLAGS(UWM_PGM | UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_KPROBE_ON_FTRACE), + TEST_WITH_FLAGS(UWM_KPROBE_ON_FTRACE | UWM_SP), + TEST_WITH_FLAGS(UWM_KPROBE_ON_FTRACE | UWM_REGS), + TEST_WITH_FLAGS(UWM_KPROBE_ON_FTRACE | UWM_SP | UWM_REGS), TEST_WITH_FLAGS(UWM_FTRACE), TEST_WITH_FLAGS(UWM_FTRACE | UWM_SP), TEST_WITH_FLAGS(UWM_FTRACE | UWM_REGS), From 4f8206b882868b62dc15ddfc0c17bb031876afb5 Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Fri, 1 Feb 2019 16:21:11 -0500 Subject: [PATCH 183/380] s390/ap: driver callback to indicate resource in use Introduces a new driver callback to prevent a root user from re-assigning the APQN of a queue that is in use by a non-default host device driver to a default host device driver and vice versa. The callback will be invoked whenever a change to the AP bus's sysfs apmask or aqmask attributes would result in one or more APQNs being re-assigned. If the callback responds in the affirmative for any driver queried, the change to the apmask or aqmask will be rejected with a device busy error. For this patch, only non-default drivers will be queried. Currently, there is only one non-default driver, the vfio_ap device driver. The vfio_ap device driver facilitates pass-through of an AP queue to a guest. The idea here is that a guest may be administered by a different sysadmin than the host and we don't want AP resources to unexpectedly disappear from a guest's AP configuration (i.e., adapters and domains assigned to the matrix mdev). This will enforce the proper procedure for removing AP resources intended for guest usage which is to first unassign them from the matrix mdev, then unbind them from the vfio_ap device driver. Signed-off-by: Tony Krowiak Reviewed-by: Harald Freudenberger Reviewed-by: Halil Pasic Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.c | 145 ++++++++++++++++++++++++++++++++--- drivers/s390/crypto/ap_bus.h | 4 + 2 files changed, 139 insertions(+), 10 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 1986243f9cd3..d71d2d2c341f 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "ap_bus.h" #include "ap_debug.h" @@ -1067,6 +1068,23 @@ static int modify_bitmap(const char *str, unsigned long *bitmap, int bits) return 0; } +static int ap_parse_bitmap_str(const char *str, unsigned long *bitmap, int bits, + unsigned long *newmap) +{ + unsigned long size; + int rc; + + size = BITS_TO_LONGS(bits) * sizeof(unsigned long); + if (*str == '+' || *str == '-') { + memcpy(newmap, bitmap, size); + rc = modify_bitmap(str, newmap, bits); + } else { + memset(newmap, 0, size); + rc = hex2bitmap(str, newmap, bits); + } + return rc; +} + int ap_parse_mask_str(const char *str, unsigned long *bitmap, int bits, struct mutex *lock) @@ -1086,14 +1104,7 @@ int ap_parse_mask_str(const char *str, kfree(newmap); return -ERESTARTSYS; } - - if (*str == '+' || *str == '-') { - memcpy(newmap, bitmap, size); - rc = modify_bitmap(str, newmap, bits); - } else { - memset(newmap, 0, size); - rc = hex2bitmap(str, newmap, bits); - } + rc = ap_parse_bitmap_str(str, bitmap, bits, newmap); if (rc == 0) memcpy(bitmap, newmap, size); mutex_unlock(lock); @@ -1286,12 +1297,69 @@ static ssize_t apmask_show(struct bus_type *bus, char *buf) return rc; } +static int __verify_card_reservations(struct device_driver *drv, void *data) +{ + int rc = 0; + struct ap_driver *ap_drv = to_ap_drv(drv); + unsigned long *newapm = (unsigned long *)data; + + /* + * increase the driver's module refcounter to be sure it is not + * going away when we invoke the callback function. + */ + if (!try_module_get(drv->owner)) + return 0; + + if (ap_drv->in_use) { + rc = ap_drv->in_use(newapm, ap_perms.aqm); + if (rc) + rc = -EBUSY; + } + + /* release the driver's module */ + module_put(drv->owner); + + return rc; +} + +static int apmask_commit(unsigned long *newapm) +{ + int rc; + unsigned long reserved[BITS_TO_LONGS(AP_DEVICES)]; + + /* + * Check if any bits in the apmask have been set which will + * result in queues being removed from non-default drivers + */ + if (bitmap_andnot(reserved, newapm, ap_perms.apm, AP_DEVICES)) { + rc = bus_for_each_drv(&ap_bus_type, NULL, reserved, + __verify_card_reservations); + if (rc) + return rc; + } + + memcpy(ap_perms.apm, newapm, APMASKSIZE); + + return 0; +} + static ssize_t apmask_store(struct bus_type *bus, const char *buf, size_t count) { int rc; + DECLARE_BITMAP(newapm, AP_DEVICES); - rc = ap_parse_mask_str(buf, ap_perms.apm, AP_DEVICES, &ap_perms_mutex); + if (mutex_lock_interruptible(&ap_perms_mutex)) + return -ERESTARTSYS; + + rc = ap_parse_bitmap_str(buf, ap_perms.apm, AP_DEVICES, newapm); + if (rc) + goto done; + + rc = apmask_commit(newapm); + +done: + mutex_unlock(&ap_perms_mutex); if (rc) return rc; @@ -1317,12 +1385,69 @@ static ssize_t aqmask_show(struct bus_type *bus, char *buf) return rc; } +static int __verify_queue_reservations(struct device_driver *drv, void *data) +{ + int rc = 0; + struct ap_driver *ap_drv = to_ap_drv(drv); + unsigned long *newaqm = (unsigned long *)data; + + /* + * increase the driver's module refcounter to be sure it is not + * going away when we invoke the callback function. + */ + if (!try_module_get(drv->owner)) + return 0; + + if (ap_drv->in_use) { + rc = ap_drv->in_use(ap_perms.apm, newaqm); + if (rc) + return -EBUSY; + } + + /* release the driver's module */ + module_put(drv->owner); + + return rc; +} + +static int aqmask_commit(unsigned long *newaqm) +{ + int rc; + unsigned long reserved[BITS_TO_LONGS(AP_DOMAINS)]; + + /* + * Check if any bits in the aqmask have been set which will + * result in queues being removed from non-default drivers + */ + if (bitmap_andnot(reserved, newaqm, ap_perms.aqm, AP_DOMAINS)) { + rc = bus_for_each_drv(&ap_bus_type, NULL, reserved, + __verify_queue_reservations); + if (rc) + return rc; + } + + memcpy(ap_perms.aqm, newaqm, AQMASKSIZE); + + return 0; +} + static ssize_t aqmask_store(struct bus_type *bus, const char *buf, size_t count) { int rc; + DECLARE_BITMAP(newaqm, AP_DOMAINS); - rc = ap_parse_mask_str(buf, ap_perms.aqm, AP_DOMAINS, &ap_perms_mutex); + if (mutex_lock_interruptible(&ap_perms_mutex)) + return -ERESTARTSYS; + + rc = ap_parse_bitmap_str(buf, ap_perms.aqm, AP_DOMAINS, newaqm); + if (rc) + goto done; + + rc = aqmask_commit(newaqm); + +done: + mutex_unlock(&ap_perms_mutex); if (rc) return rc; diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 714c7583af5e..7dd992bad949 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -143,6 +143,7 @@ struct ap_driver { int (*probe)(struct ap_device *); void (*remove)(struct ap_device *); + int (*in_use)(unsigned long *apm, unsigned long *aqm); }; #define to_ap_drv(x) container_of((x), struct ap_driver, driver) @@ -290,6 +291,9 @@ void ap_queue_init_state(struct ap_queue *aq); struct ap_card *ap_card_create(int id, int queue_depth, int raw_type, int comp_type, unsigned int functions, int ml); +#define APMASKSIZE (BITS_TO_LONGS(AP_DEVICES) * sizeof(unsigned long)) +#define AQMASKSIZE (BITS_TO_LONGS(AP_DOMAINS) * sizeof(unsigned long)) + struct ap_perms { unsigned long ioctlm[BITS_TO_LONGS(AP_IOCTLS)]; unsigned long apm[BITS_TO_LONGS(AP_DEVICES)]; From 283915850a4455e8af40ce5b8d291dc79638cdae Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Fri, 1 Oct 2021 13:39:13 -0400 Subject: [PATCH 184/380] s390/ap: notify drivers on config changed and scan complete callbacks This patch introduces an extension to the ap bus to notify device drivers when the host AP configuration changes - i.e., adapters, domains or control domains are added or removed. When an adapter or domain is added to the host's AP configuration, the AP bus will create the associated queue devices in the linux sysfs device model. Each new type 10 (i.e., CEX4) or newer queue device with an APQN that is not reserved for the default device driver will get bound to the vfio_ap device driver. Likewise, whan an adapter or domain is removed from the host's AP configuration, the AP bus will remove the associated queue devices from the sysfs device model. Each of the queues that is bound to the vfio_ap device driver will get unbound. With the introduction of hot plug support, binding or unbinding of a queue device will result in plugging or unplugging one or more queues from a guest that is using the queue. If there are multiple changes to the host's AP configuration, it could result in the probe and remove callbacks getting invoked multiple times. Each time queues are plugged into or unplugged from a guest, the guest's VCPUs must be taken out of SIE. If this occurs multiple times due to changes in the host's AP configuration, that can have an undesirable negative affect on the guest's performance. To alleviate this problem, this patch introduces two new callbacks: one to notify the vfio_ap device driver when the AP bus scan routine detects a change to the host's AP configuration; and, one to notify the driver when the AP bus is done scanning. This will allow the vfio_ap driver to do bulk processing of all affected adapters, domains and control domains for affected guests rather than plugging or unplugging them one at a time when the probe or remove callback is invoked. The two new callbacks are: void (*on_config_changed)(struct ap_config_info *new_config_info, struct ap_config_info *old_config_info); This callback is invoked at the start of the AP bus scan function when it determines that the host AP configuration information has changed since the previous scan. This is done by storing an old and current QCI info struct and comparing them. If there is any difference, the callback is invoked. void (*on_scan_complete)(struct ap_config_info *new_config_info, struct ap_config_info *old_config_info); The on_scan_complete callback is invoked after the ap bus scan is completed if the host AP configuration data has changed. Signed-off-by: Tony Krowiak Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.c | 81 +++++++++++++++++++++++++++++++++++- drivers/s390/crypto/ap_bus.h | 12 ++++++ 2 files changed, 91 insertions(+), 2 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index d71d2d2c341f..f5fae8b62bdf 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -92,6 +92,7 @@ static atomic64_t ap_bindings_complete_count = ATOMIC64_INIT(0); static DECLARE_COMPLETION(ap_init_apqn_bindings_complete); static struct ap_config_info *ap_qci_info; +static struct ap_config_info *ap_qci_info_old; /* * AP bus related debug feature things. @@ -229,9 +230,14 @@ static void __init ap_init_qci_info(void) ap_qci_info = kzalloc(sizeof(*ap_qci_info), GFP_KERNEL); if (!ap_qci_info) return; + ap_qci_info_old = kzalloc(sizeof(*ap_qci_info_old), GFP_KERNEL); + if (!ap_qci_info_old) + return; if (ap_fetch_qci_info(ap_qci_info) != 0) { kfree(ap_qci_info); + kfree(ap_qci_info_old); ap_qci_info = NULL; + ap_qci_info_old = NULL; return; } AP_DBF_INFO("%s successful fetched initial qci info\n", __func__); @@ -248,6 +254,8 @@ static void __init ap_init_qci_info(void) __func__, ap_max_domain_id); } } + + memcpy(ap_qci_info_old, ap_qci_info, sizeof(*ap_qci_info)); } /* @@ -1630,6 +1638,49 @@ static int __match_queue_device_with_queue_id(struct device *dev, const void *da && AP_QID_QUEUE(to_ap_queue(dev)->qid) == (int)(long) data; } +/* Helper function for notify_config_changed */ +static int __drv_notify_config_changed(struct device_driver *drv, void *data) +{ + struct ap_driver *ap_drv = to_ap_drv(drv); + + if (try_module_get(drv->owner)) { + if (ap_drv->on_config_changed) + ap_drv->on_config_changed(ap_qci_info, ap_qci_info_old); + module_put(drv->owner); + } + + return 0; +} + +/* Notify all drivers about an qci config change */ +static inline void notify_config_changed(void) +{ + bus_for_each_drv(&ap_bus_type, NULL, NULL, + __drv_notify_config_changed); +} + +/* Helper function for notify_scan_complete */ +static int __drv_notify_scan_complete(struct device_driver *drv, void *data) +{ + struct ap_driver *ap_drv = to_ap_drv(drv); + + if (try_module_get(drv->owner)) { + if (ap_drv->on_scan_complete) + ap_drv->on_scan_complete(ap_qci_info, + ap_qci_info_old); + module_put(drv->owner); + } + + return 0; +} + +/* Notify all drivers about bus scan complete */ +static inline void notify_scan_complete(void) +{ + bus_for_each_drv(&ap_bus_type, NULL, NULL, + __drv_notify_scan_complete); +} + /* * Helper function for ap_scan_bus(). * Remove card device and associated queue devices. @@ -1917,6 +1968,25 @@ static inline void ap_scan_adapter(int ap) put_device(&ac->ap_dev.device); } +/** + * ap_get_configuration - get the host AP configuration + * + * Stores the host AP configuration information returned from the previous call + * to Query Configuration Information (QCI), then retrieves and stores the + * current AP configuration returned from QCI. + * + * Return: true if the host AP configuration changed between calls to QCI; + * otherwise, return false. + */ +static bool ap_get_configuration(void) +{ + memcpy(ap_qci_info_old, ap_qci_info, sizeof(*ap_qci_info)); + ap_fetch_qci_info(ap_qci_info); + + return memcmp(ap_qci_info, ap_qci_info_old, + sizeof(struct ap_config_info)) != 0; +} + /** * ap_scan_bus(): Scan the AP bus for new devices * Runs periodically, workqueue timer (ap_config_time) @@ -1924,9 +1994,12 @@ static inline void ap_scan_adapter(int ap) */ static void ap_scan_bus(struct work_struct *unused) { - int ap; + int ap, config_changed = 0; - ap_fetch_qci_info(ap_qci_info); + /* config change notify */ + config_changed = ap_get_configuration(); + if (config_changed) + notify_config_changed(); ap_select_domain(); AP_DBF_DBG("%s running\n", __func__); @@ -1935,6 +2008,10 @@ static void ap_scan_bus(struct work_struct *unused) for (ap = 0; ap <= ap_max_adapter_id; ap++) ap_scan_adapter(ap); + /* scan complete notify */ + if (config_changed) + notify_scan_complete(); + /* check if there is at least one queue available with default domain */ if (ap_domain_index >= 0) { struct device *dev = diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 7dd992bad949..dc6f563e8787 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -144,6 +144,18 @@ struct ap_driver { int (*probe)(struct ap_device *); void (*remove)(struct ap_device *); int (*in_use)(unsigned long *apm, unsigned long *aqm); + /* + * Called at the start of the ap bus scan function when + * the crypto config information (qci) has changed. + */ + void (*on_config_changed)(struct ap_config_info *new_config_info, + struct ap_config_info *old_config_info); + /* + * Called at the end of the ap bus scan function when + * the crypto config information (qci) has changed. + */ + void (*on_scan_complete)(struct ap_config_info *new_config_info, + struct ap_config_info *old_config_info); }; #define to_ap_drv(x) container_of((x), struct ap_driver, driver) From 8944d05f9bbf910c8b241e29a3de114900e31e42 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Wed, 16 Feb 2022 12:30:34 +0100 Subject: [PATCH 185/380] s390/ap: enable sysfs attribute scans to force AP bus rescan This patch switches the sysfs attribute /sys/bus/ap/scans from read-only to read-write. If there is something written to this attribute, an AP bus rescan is forced. If an AP bus scan is triggered this way a debug feature entry line reports this in /sys/kernel/debug/s390dbf/ap/sprintf. Signed-off-by: Harald Freudenberger Reviewed-by: Jakob Naucke Reviewed-by: Juergen Christ Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index f5fae8b62bdf..555cc3394fe3 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1472,7 +1472,17 @@ static ssize_t scans_show(struct bus_type *bus, char *buf) atomic64_read(&ap_scan_bus_count)); } -static BUS_ATTR_RO(scans); +static ssize_t scans_store(struct bus_type *bus, const char *buf, + size_t count) +{ + AP_DBF_INFO("%s force AP bus rescan\n", __func__); + + ap_bus_force_rescan(); + + return count; +} + +static BUS_ATTR_RW(scans); static ssize_t bindings_show(struct bus_type *bus, char *buf) { From 4851d2262236c27cafbecb0fd2440db1f1e726d3 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Tue, 15 Feb 2022 14:10:48 +0100 Subject: [PATCH 186/380] s390/smp: sort out physical vs virtual pointers usage With commit 5789284710aa ("s390/smp: reallocate IPL CPU lowcore") virtual addresses are wrongly passed to memblock_free_late() and SPX instructions on IPL CPU reinitialization. Note: this does not fix a bug currently, since virtual and physical addresses are identical. Fixes: 5789284710aa ("s390/smp: reallocate IPL CPU lowcore") Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- arch/s390/kernel/smp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 4f0e9f412f27..368b58e4c2e7 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -1252,7 +1252,7 @@ static __always_inline void set_new_lowcore(struct lowcore *lc) src.odd = sizeof(S390_lowcore); dst.even = (unsigned long) lc; dst.odd = sizeof(*lc); - pfx = (unsigned long) lc; + pfx = __pa(lc); asm volatile( " mvcl %[dst],%[src]\n" @@ -1292,8 +1292,8 @@ static int __init smp_reinit_ipl_cpu(void) local_irq_restore(flags); free_pages(lc_ipl->async_stack - STACK_INIT_OFFSET, THREAD_SIZE_ORDER); - memblock_free_late(lc_ipl->mcck_stack - STACK_INIT_OFFSET, THREAD_SIZE); - memblock_free_late((unsigned long) lc_ipl, sizeof(*lc_ipl)); + memblock_free_late(__pa(lc_ipl->mcck_stack - STACK_INIT_OFFSET), THREAD_SIZE); + memblock_free_late(__pa(lc_ipl), sizeof(*lc_ipl)); return 0; } From 96f6641a6a284102fe9f52c9789e98f0a18ece11 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 17 Feb 2022 15:46:01 +0100 Subject: [PATCH 187/380] s390/ptrace: remove opencoded offsetof Remove opencoded offsetof and use offsetof instead. The generated code is identical before/after this change. Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/ptrace.c | 164 ++++++++++++++++++-------------------- 1 file changed, 76 insertions(+), 88 deletions(-) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 0ea3d02b378d..ed3439515bb2 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -147,38 +147,36 @@ void ptrace_disable(struct task_struct *task) static inline unsigned long __peek_user_per(struct task_struct *child, addr_t addr) { - struct per_struct_kernel *dummy = NULL; - - if (addr == (addr_t) &dummy->cr9) + if (addr == offsetof(struct per_struct_kernel, cr9)) /* Control bits of the active per set. */ return test_thread_flag(TIF_SINGLE_STEP) ? PER_EVENT_IFETCH : child->thread.per_user.control; - else if (addr == (addr_t) &dummy->cr10) + else if (addr == offsetof(struct per_struct_kernel, cr10)) /* Start address of the active per set. */ return test_thread_flag(TIF_SINGLE_STEP) ? 0 : child->thread.per_user.start; - else if (addr == (addr_t) &dummy->cr11) + else if (addr == offsetof(struct per_struct_kernel, cr11)) /* End address of the active per set. */ return test_thread_flag(TIF_SINGLE_STEP) ? -1UL : child->thread.per_user.end; - else if (addr == (addr_t) &dummy->bits) + else if (addr == offsetof(struct per_struct_kernel, bits)) /* Single-step bit. */ return test_thread_flag(TIF_SINGLE_STEP) ? (1UL << (BITS_PER_LONG - 1)) : 0; - else if (addr == (addr_t) &dummy->starting_addr) + else if (addr == offsetof(struct per_struct_kernel, starting_addr)) /* Start address of the user specified per set. */ return child->thread.per_user.start; - else if (addr == (addr_t) &dummy->ending_addr) + else if (addr == offsetof(struct per_struct_kernel, ending_addr)) /* End address of the user specified per set. */ return child->thread.per_user.end; - else if (addr == (addr_t) &dummy->perc_atmid) + else if (addr == offsetof(struct per_struct_kernel, perc_atmid)) /* PER code, ATMID and AI of the last PER trap */ return (unsigned long) child->thread.per_event.cause << (BITS_PER_LONG - 16); - else if (addr == (addr_t) &dummy->address) + else if (addr == offsetof(struct per_struct_kernel, address)) /* Address of the last PER trap */ return child->thread.per_event.address; - else if (addr == (addr_t) &dummy->access_id) + else if (addr == offsetof(struct per_struct_kernel, access_id)) /* Access id of the last PER trap */ return (unsigned long) child->thread.per_event.paid << (BITS_PER_LONG - 8); @@ -196,61 +194,60 @@ static inline unsigned long __peek_user_per(struct task_struct *child, */ static unsigned long __peek_user(struct task_struct *child, addr_t addr) { - struct user *dummy = NULL; addr_t offset, tmp; - if (addr < (addr_t) &dummy->regs.acrs) { + if (addr < offsetof(struct user, regs.acrs)) { /* * psw and gprs are stored on the stack */ tmp = *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr); - if (addr == (addr_t) &dummy->regs.psw.mask) { + if (addr == offsetof(struct user, regs.psw.mask)) { /* Return a clean psw mask. */ tmp &= PSW_MASK_USER | PSW_MASK_RI; tmp |= PSW_USER_BITS; } - } else if (addr < (addr_t) &dummy->regs.orig_gpr2) { + } else if (addr < offsetof(struct user, regs.orig_gpr2)) { /* * access registers are stored in the thread structure */ - offset = addr - (addr_t) &dummy->regs.acrs; + offset = addr - offsetof(struct user, regs.acrs); /* * Very special case: old & broken 64 bit gdb reading * from acrs[15]. Result is a 64 bit value. Read the * 32 bit acrs[15] value and shift it by 32. Sick... */ - if (addr == (addr_t) &dummy->regs.acrs[15]) + if (addr == offsetof(struct user, regs.acrs[15])) tmp = ((unsigned long) child->thread.acrs[15]) << 32; else tmp = *(addr_t *)((addr_t) &child->thread.acrs + offset); - } else if (addr == (addr_t) &dummy->regs.orig_gpr2) { + } else if (addr == offsetof(struct user, regs.orig_gpr2)) { /* * orig_gpr2 is stored on the kernel stack */ tmp = (addr_t) task_pt_regs(child)->orig_gpr2; - } else if (addr < (addr_t) &dummy->regs.fp_regs) { + } else if (addr < offsetof(struct user, regs.fp_regs)) { /* * prevent reads of padding hole between * orig_gpr2 and fp_regs on s390. */ tmp = 0; - } else if (addr == (addr_t) &dummy->regs.fp_regs.fpc) { + } else if (addr == offsetof(struct user, regs.fp_regs.fpc)) { /* * floating point control reg. is in the thread structure */ tmp = child->thread.fpu.fpc; tmp <<= BITS_PER_LONG - 32; - } else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) { + } else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) { /* * floating point regs. are either in child->thread.fpu * or the child->thread.fpu.vxrs array */ - offset = addr - (addr_t) &dummy->regs.fp_regs.fprs; + offset = addr - offsetof(struct user, regs.fp_regs.fprs); if (MACHINE_HAS_VX) tmp = *(addr_t *) ((addr_t) child->thread.fpu.vxrs + 2*offset); @@ -258,11 +255,11 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr) tmp = *(addr_t *) ((addr_t) child->thread.fpu.fprs + offset); - } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) { + } else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) { /* * Handle access to the per_info structure. */ - addr -= (addr_t) &dummy->regs.per_info; + addr -= offsetof(struct user, regs.per_info); tmp = __peek_user_per(child, addr); } else @@ -281,8 +278,8 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data) * an alignment of 4. Programmers from hell... */ mask = __ADDR_MASK; - if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs && - addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2) + if (addr >= offsetof(struct user, regs.acrs) && + addr < offsetof(struct user, regs.orig_gpr2)) mask = 3; if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK) return -EIO; @@ -294,8 +291,6 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data) static inline void __poke_user_per(struct task_struct *child, addr_t addr, addr_t data) { - struct per_struct_kernel *dummy = NULL; - /* * There are only three fields in the per_info struct that the * debugger user can write to. @@ -308,14 +303,14 @@ static inline void __poke_user_per(struct task_struct *child, * addresses are used only if single stepping is not in effect. * Writes to any other field in per_info are ignored. */ - if (addr == (addr_t) &dummy->cr9) + if (addr == offsetof(struct per_struct_kernel, cr9)) /* PER event mask of the user specified per set. */ child->thread.per_user.control = data & (PER_EVENT_MASK | PER_CONTROL_MASK); - else if (addr == (addr_t) &dummy->starting_addr) + else if (addr == offsetof(struct per_struct_kernel, starting_addr)) /* Starting address of the user specified per set. */ child->thread.per_user.start = data; - else if (addr == (addr_t) &dummy->ending_addr) + else if (addr == offsetof(struct per_struct_kernel, ending_addr)) /* Ending address of the user specified per set. */ child->thread.per_user.end = data; } @@ -328,16 +323,15 @@ static inline void __poke_user_per(struct task_struct *child, */ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) { - struct user *dummy = NULL; addr_t offset; - if (addr < (addr_t) &dummy->regs.acrs) { + if (addr < offsetof(struct user, regs.acrs)) { struct pt_regs *regs = task_pt_regs(child); /* * psw and gprs are stored on the stack */ - if (addr == (addr_t) &dummy->regs.psw.mask) { + if (addr == offsetof(struct user, regs.psw.mask)) { unsigned long mask = PSW_MASK_USER; mask |= is_ri_task(child) ? PSW_MASK_RI : 0; @@ -359,36 +353,36 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) regs->int_code = 0x20000 | (data & 0xffff); } *(addr_t *)((addr_t) ®s->psw + addr) = data; - } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { + } else if (addr < offsetof(struct user, regs.orig_gpr2)) { /* * access registers are stored in the thread structure */ - offset = addr - (addr_t) &dummy->regs.acrs; + offset = addr - offsetof(struct user, regs.acrs); /* * Very special case: old & broken 64 bit gdb writing * to acrs[15] with a 64 bit value. Ignore the lower * half of the value and write the upper 32 bit to * acrs[15]. Sick... */ - if (addr == (addr_t) &dummy->regs.acrs[15]) + if (addr == offsetof(struct user, regs.acrs[15])) child->thread.acrs[15] = (unsigned int) (data >> 32); else *(addr_t *)((addr_t) &child->thread.acrs + offset) = data; - } else if (addr == (addr_t) &dummy->regs.orig_gpr2) { + } else if (addr == offsetof(struct user, regs.orig_gpr2)) { /* * orig_gpr2 is stored on the kernel stack */ task_pt_regs(child)->orig_gpr2 = data; - } else if (addr < (addr_t) &dummy->regs.fp_regs) { + } else if (addr < offsetof(struct user, regs.fp_regs)) { /* * prevent writes of padding hole between * orig_gpr2 and fp_regs on s390. */ return 0; - } else if (addr == (addr_t) &dummy->regs.fp_regs.fpc) { + } else if (addr == offsetof(struct user, regs.fp_regs.fpc)) { /* * floating point control reg. is in the thread structure */ @@ -397,12 +391,12 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) return -EINVAL; child->thread.fpu.fpc = data >> (BITS_PER_LONG - 32); - } else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) { + } else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) { /* * floating point regs. are either in child->thread.fpu * or the child->thread.fpu.vxrs array */ - offset = addr - (addr_t) &dummy->regs.fp_regs.fprs; + offset = addr - offsetof(struct user, regs.fp_regs.fprs); if (MACHINE_HAS_VX) *(addr_t *)((addr_t) child->thread.fpu.vxrs + 2*offset) = data; @@ -410,11 +404,11 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) *(addr_t *)((addr_t) child->thread.fpu.fprs + offset) = data; - } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) { + } else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) { /* * Handle access to the per_info structure. */ - addr -= (addr_t) &dummy->regs.per_info; + addr -= offsetof(struct user, regs.per_info); __poke_user_per(child, addr, data); } @@ -431,8 +425,8 @@ static int poke_user(struct task_struct *child, addr_t addr, addr_t data) * an alignment of 4. Programmers from hell indeed... */ mask = __ADDR_MASK; - if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs && - addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2) + if (addr >= offsetof(struct user, regs.acrs) && + addr < offsetof(struct user, regs.orig_gpr2)) mask = 3; if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK) return -EIO; @@ -540,37 +534,35 @@ long arch_ptrace(struct task_struct *child, long request, static inline __u32 __peek_user_per_compat(struct task_struct *child, addr_t addr) { - struct compat_per_struct_kernel *dummy32 = NULL; - - if (addr == (addr_t) &dummy32->cr9) + if (addr == offsetof(struct compat_per_struct_kernel, cr9)) /* Control bits of the active per set. */ return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? PER_EVENT_IFETCH : child->thread.per_user.control; - else if (addr == (addr_t) &dummy32->cr10) + else if (addr == offsetof(struct compat_per_struct_kernel, cr10)) /* Start address of the active per set. */ return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? 0 : child->thread.per_user.start; - else if (addr == (addr_t) &dummy32->cr11) + else if (addr == offsetof(struct compat_per_struct_kernel, cr11)) /* End address of the active per set. */ return test_thread_flag(TIF_SINGLE_STEP) ? PSW32_ADDR_INSN : child->thread.per_user.end; - else if (addr == (addr_t) &dummy32->bits) + else if (addr == offsetof(struct compat_per_struct_kernel, bits)) /* Single-step bit. */ return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? 0x80000000 : 0; - else if (addr == (addr_t) &dummy32->starting_addr) + else if (addr == offsetof(struct compat_per_struct_kernel, starting_addr)) /* Start address of the user specified per set. */ return (__u32) child->thread.per_user.start; - else if (addr == (addr_t) &dummy32->ending_addr) + else if (addr == offsetof(struct compat_per_struct_kernel, ending_addr)) /* End address of the user specified per set. */ return (__u32) child->thread.per_user.end; - else if (addr == (addr_t) &dummy32->perc_atmid) + else if (addr == offsetof(struct compat_per_struct_kernel, perc_atmid)) /* PER code, ATMID and AI of the last PER trap */ return (__u32) child->thread.per_event.cause << 16; - else if (addr == (addr_t) &dummy32->address) + else if (addr == offsetof(struct compat_per_struct_kernel, address)) /* Address of the last PER trap */ return (__u32) child->thread.per_event.address; - else if (addr == (addr_t) &dummy32->access_id) + else if (addr == offsetof(struct compat_per_struct_kernel, access_id)) /* Access id of the last PER trap */ return (__u32) child->thread.per_event.paid << 24; return 0; @@ -581,21 +573,20 @@ static inline __u32 __peek_user_per_compat(struct task_struct *child, */ static u32 __peek_user_compat(struct task_struct *child, addr_t addr) { - struct compat_user *dummy32 = NULL; addr_t offset; __u32 tmp; - if (addr < (addr_t) &dummy32->regs.acrs) { + if (addr < offsetof(struct compat_user, regs.acrs)) { struct pt_regs *regs = task_pt_regs(child); /* * psw and gprs are stored on the stack */ - if (addr == (addr_t) &dummy32->regs.psw.mask) { + if (addr == offsetof(struct compat_user, regs.psw.mask)) { /* Fake a 31 bit psw mask. */ tmp = (__u32)(regs->psw.mask >> 32); tmp &= PSW32_MASK_USER | PSW32_MASK_RI; tmp |= PSW32_USER_BITS; - } else if (addr == (addr_t) &dummy32->regs.psw.addr) { + } else if (addr == offsetof(struct compat_user, regs.psw.addr)) { /* Fake a 31 bit psw address. */ tmp = (__u32) regs->psw.addr | (__u32)(regs->psw.mask & PSW_MASK_BA); @@ -603,38 +594,38 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr) /* gpr 0-15 */ tmp = *(__u32 *)((addr_t) ®s->psw + addr*2 + 4); } - } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) { + } else if (addr < offsetof(struct compat_user, regs.orig_gpr2)) { /* * access registers are stored in the thread structure */ - offset = addr - (addr_t) &dummy32->regs.acrs; + offset = addr - offsetof(struct compat_user, regs.acrs); tmp = *(__u32*)((addr_t) &child->thread.acrs + offset); - } else if (addr == (addr_t) (&dummy32->regs.orig_gpr2)) { + } else if (addr == offsetof(struct compat_user, regs.orig_gpr2)) { /* * orig_gpr2 is stored on the kernel stack */ tmp = *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4); - } else if (addr < (addr_t) &dummy32->regs.fp_regs) { + } else if (addr < offsetof(struct compat_user, regs.fp_regs)) { /* * prevent reads of padding hole between * orig_gpr2 and fp_regs on s390. */ tmp = 0; - } else if (addr == (addr_t) &dummy32->regs.fp_regs.fpc) { + } else if (addr == offsetof(struct compat_user, regs.fp_regs.fpc)) { /* * floating point control reg. is in the thread structure */ tmp = child->thread.fpu.fpc; - } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) { + } else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) { /* * floating point regs. are either in child->thread.fpu * or the child->thread.fpu.vxrs array */ - offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs; + offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs); if (MACHINE_HAS_VX) tmp = *(__u32 *) ((addr_t) child->thread.fpu.vxrs + 2*offset); @@ -642,11 +633,11 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr) tmp = *(__u32 *) ((addr_t) child->thread.fpu.fprs + offset); - } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) { + } else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) { /* * Handle access to the per_info structure. */ - addr -= (addr_t) &dummy32->regs.per_info; + addr -= offsetof(struct compat_user, regs.per_info); tmp = __peek_user_per_compat(child, addr); } else @@ -673,16 +664,14 @@ static int peek_user_compat(struct task_struct *child, static inline void __poke_user_per_compat(struct task_struct *child, addr_t addr, __u32 data) { - struct compat_per_struct_kernel *dummy32 = NULL; - - if (addr == (addr_t) &dummy32->cr9) + if (addr == offsetof(struct compat_per_struct_kernel, cr9)) /* PER event mask of the user specified per set. */ child->thread.per_user.control = data & (PER_EVENT_MASK | PER_CONTROL_MASK); - else if (addr == (addr_t) &dummy32->starting_addr) + else if (addr == offsetof(struct compat_per_struct_kernel, starting_addr)) /* Starting address of the user specified per set. */ child->thread.per_user.start = data; - else if (addr == (addr_t) &dummy32->ending_addr) + else if (addr == offsetof(struct compat_per_struct_kernel, ending_addr)) /* Ending address of the user specified per set. */ child->thread.per_user.end = data; } @@ -693,16 +682,15 @@ static inline void __poke_user_per_compat(struct task_struct *child, static int __poke_user_compat(struct task_struct *child, addr_t addr, addr_t data) { - struct compat_user *dummy32 = NULL; __u32 tmp = (__u32) data; addr_t offset; - if (addr < (addr_t) &dummy32->regs.acrs) { + if (addr < offsetof(struct compat_user, regs.acrs)) { struct pt_regs *regs = task_pt_regs(child); /* * psw, gprs, acrs and orig_gpr2 are stored on the stack */ - if (addr == (addr_t) &dummy32->regs.psw.mask) { + if (addr == offsetof(struct compat_user, regs.psw.mask)) { __u32 mask = PSW32_MASK_USER; mask |= is_ri_task(child) ? PSW32_MASK_RI : 0; @@ -716,7 +704,7 @@ static int __poke_user_compat(struct task_struct *child, regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | (regs->psw.mask & PSW_MASK_BA) | (__u64)(tmp & mask) << 32; - } else if (addr == (addr_t) &dummy32->regs.psw.addr) { + } else if (addr == offsetof(struct compat_user, regs.psw.addr)) { /* Build a 64 bit psw address from 31 bit address. */ regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN; /* Transfer 31 bit amode bit to psw mask. */ @@ -732,27 +720,27 @@ static int __poke_user_compat(struct task_struct *child, /* gpr 0-15 */ *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; } - } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) { + } else if (addr < offsetof(struct compat_user, regs.orig_gpr2)) { /* * access registers are stored in the thread structure */ - offset = addr - (addr_t) &dummy32->regs.acrs; + offset = addr - offsetof(struct compat_user, regs.acrs); *(__u32*)((addr_t) &child->thread.acrs + offset) = tmp; - } else if (addr == (addr_t) (&dummy32->regs.orig_gpr2)) { + } else if (addr == offsetof(struct compat_user, regs.orig_gpr2)) { /* * orig_gpr2 is stored on the kernel stack */ *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4) = tmp; - } else if (addr < (addr_t) &dummy32->regs.fp_regs) { + } else if (addr < offsetof(struct compat_user, regs.fp_regs)) { /* * prevent writess of padding hole between * orig_gpr2 and fp_regs on s390. */ return 0; - } else if (addr == (addr_t) &dummy32->regs.fp_regs.fpc) { + } else if (addr == offsetof(struct compat_user, regs.fp_regs.fpc)) { /* * floating point control reg. is in the thread structure */ @@ -760,12 +748,12 @@ static int __poke_user_compat(struct task_struct *child, return -EINVAL; child->thread.fpu.fpc = data; - } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) { + } else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) { /* * floating point regs. are either in child->thread.fpu * or the child->thread.fpu.vxrs array */ - offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs; + offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs); if (MACHINE_HAS_VX) *(__u32 *)((addr_t) child->thread.fpu.vxrs + 2*offset) = tmp; @@ -773,11 +761,11 @@ static int __poke_user_compat(struct task_struct *child, *(__u32 *)((addr_t) child->thread.fpu.fprs + offset) = tmp; - } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) { + } else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) { /* * Handle access to the per_info structure. */ - addr -= (addr_t) &dummy32->regs.per_info; + addr -= offsetof(struct compat_user, regs.per_info); __poke_user_per_compat(child, addr, data); } From 1a5e3f262e0310365cf7b5c8b8fc3a6e94a19cb7 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 22 Feb 2022 15:27:52 +0100 Subject: [PATCH 188/380] s390/ftrace: make use of epsw to get psw mask Finally use epsw to create a complete psw mask within pt_regs. Without this only some bits are correct, while other bits are (incorrectly) always zero. The epsw instruction is quite heavy weight, however given that this only effects ftrace_regs_caller this seems to be the right thing, so we finally get a complete psw mask for ftrace kprobed functions. Reviewed-by: Sven Schnelle Acked-by: Ilya Leoshkevich Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/mcount.S | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index b88205224f3c..6ace43d7e8d2 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -33,9 +33,16 @@ ENDPROC(ftrace_stub) stg %r14,(__SF_GPRS+8*8)(%r15) # save traced function caller .if \allregs == 1 - lghi %r14,0 # save condition code - ipm %r14 # don't put any instructions - sllg %r14,%r14,16 # clobbering CC before this point + # save psw mask + # don't put any instructions clobbering CC before this point +#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES + epsw %r1,%r14 + risbg %r14,%r1,0,31,32 +#else + epsw %r14,%r1 + sllg %r14,%r14,32 + lr %r14,%r1 +#endif .endif lgr %r1,%r15 @@ -51,7 +58,6 @@ ENDPROC(ftrace_stub) .if \allregs == 1 stg %r14,(STACK_PTREGS_PSW)(%r15) - stosm (STACK_PTREGS_PSW)(%r15),0 #ifdef CONFIG_HAVE_MARCH_Z10_FEATURES mvghi STACK_PTREGS_FLAGS(%r15),_PIF_FTRACE_FULL_REGS #else From f0003a9e4c18ae71308fb6b24de8248ac6180777 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 25 Feb 2022 09:41:24 +0100 Subject: [PATCH 189/380] s390/entry: remove unused expoline thunk Remove __s390_indirect_jump_r13use_r14 expoline thunk unused since commit fbbdfca5c553 ("s390/entry.S: factor out SIEEXIT macro"). Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index dc7347e43ec2..1f6df6d4a914 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -190,7 +190,6 @@ _LPP_OFFSET = __LC_LPP #endif GEN_BR_THUNK %r14 - GEN_BR_THUNK %r14,%r13 .section .kprobes.text, "ax" .Ldummy: From e2aaae2d3677563c1bb6cd15fbddd701381823be Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 10 Feb 2022 16:08:29 +0100 Subject: [PATCH 190/380] s390/mm: add set_pXd()/set_pte() helper functions Add set_pXd()/set_pte() helper functions which must be used to update page table entries. The new helpers use WRITE_ONCE() to make sure that a page table entry is written to only once. Without this the compiler could otherwise generate code which writes several times to a page table entry when updating its contents from invalid to valid, which could lead to surprising results especially for multithreaded processes... Reviewed-by: Claudio Imbrenda Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/pgtable.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 008a6c856fa4..2aa8057c7ef8 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -881,6 +881,31 @@ static inline pgprot_t pte_pgprot(pte_t pte) * pgd/pmd/pte modification functions */ +static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) +{ + WRITE_ONCE(*pgdp, pgd); +} + +static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) +{ + WRITE_ONCE(*p4dp, p4d); +} + +static inline void set_pud(pud_t *pudp, pud_t pud) +{ + WRITE_ONCE(*pudp, pud); +} + +static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + WRITE_ONCE(*pmdp, pmd); +} + +static inline void set_pte(pte_t *ptep, pte_t pte) +{ + WRITE_ONCE(*ptep, pte); +} + static inline void pgd_clear(pgd_t *pgd) { if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1) From f29111f117aaafa2887e13923f6e88fc11f5e065 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 21 Feb 2022 21:18:29 +0100 Subject: [PATCH 191/380] s390/mm: add set_pte_bit()/clear_pte_bit() helper functions Add set_pte_bit()/clear_pte_bit() and set_pXd_bit()/clear_pXd_bit helper functions which are supposed to be used if bits within ptes/pXds are set/cleared. The only point of these helper functions is to get more readable code. This is quite similar to what arm64 has. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/pgtable.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 2aa8057c7ef8..4bbb6d38365f 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -538,6 +538,36 @@ static inline int mm_alloc_pgste(struct mm_struct *mm) return 0; } +static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot) +{ + return __pte(pte_val(pte) & ~pgprot_val(prot)); +} + +static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot) +{ + return __pte(pte_val(pte) | pgprot_val(prot)); +} + +static inline pmd_t clear_pmd_bit(pmd_t pmd, pgprot_t prot) +{ + return __pmd(pmd_val(pmd) & ~pgprot_val(prot)); +} + +static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot) +{ + return __pmd(pmd_val(pmd) | pgprot_val(prot)); +} + +static inline pud_t clear_pud_bit(pud_t pud, pgprot_t prot) +{ + return __pud(pud_val(pud) & ~pgprot_val(prot)); +} + +static inline pud_t set_pud_bit(pud_t pud, pgprot_t prot) +{ + return __pud(pud_val(pud) | pgprot_val(prot)); +} + /* * In the case that a guest uses storage keys * faults should no longer be backed by zero pages From b8e3b37900a57f9e7b32a92cb16313b32c41c0db Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 21 Feb 2022 20:50:07 +0100 Subject: [PATCH 192/380] s390/mm: use set_pXd()/set_pte() helper functions everywhere Use the new set_pXd()/set_pte() helper functions at all places where page table entries are modified. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/hugetlb.h | 4 ++-- arch/s390/include/asm/pgalloc.h | 8 ++++---- arch/s390/include/asm/pgtable.h | 18 +++++++++--------- arch/s390/mm/gmap.c | 12 ++++++------ arch/s390/mm/hugetlbpage.c | 2 +- arch/s390/mm/kasan_init.c | 8 ++++---- arch/s390/mm/pageattr.c | 17 +++++++++-------- arch/s390/mm/pgtable.c | 22 +++++++++++----------- arch/s390/mm/vmem.c | 10 +++++----- 9 files changed, 51 insertions(+), 50 deletions(-) diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index 60f9241e5e4a..bea47e7cc6a0 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -45,9 +45,9 @@ static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long sz) { if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) - pte_val(*ptep) = _REGION3_ENTRY_EMPTY; + set_pte(ptep, __pte(_REGION3_ENTRY_EMPTY)); else - pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY; + set_pte(ptep, __pte(_SEGMENT_ENTRY_EMPTY)); } static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index f14a555eff74..17eb618f1348 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -103,17 +103,17 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) { - pgd_val(*pgd) = _REGION1_ENTRY | __pa(p4d); + set_pgd(pgd, __pgd(_REGION1_ENTRY | __pa(p4d))); } static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) { - p4d_val(*p4d) = _REGION2_ENTRY | __pa(pud); + set_p4d(p4d, __p4d(_REGION2_ENTRY | __pa(pud))); } static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { - pud_val(*pud) = _REGION3_ENTRY | __pa(pmd); + set_pud(pud, __pud(_REGION3_ENTRY | __pa(pmd))); } static inline pgd_t *pgd_alloc(struct mm_struct *mm) @@ -129,7 +129,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte) { - pmd_val(*pmd) = _SEGMENT_ENTRY + __pa(pte); + set_pmd(pmd, __pmd(_SEGMENT_ENTRY | __pa(pte))); } #define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 4bbb6d38365f..2ea58513da97 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -939,29 +939,29 @@ static inline void set_pte(pte_t *ptep, pte_t pte) static inline void pgd_clear(pgd_t *pgd) { if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1) - pgd_val(*pgd) = _REGION1_ENTRY_EMPTY; + set_pgd(pgd, __pgd(_REGION1_ENTRY_EMPTY)); } static inline void p4d_clear(p4d_t *p4d) { if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) - p4d_val(*p4d) = _REGION2_ENTRY_EMPTY; + set_p4d(p4d, __p4d(_REGION2_ENTRY_EMPTY)); } static inline void pud_clear(pud_t *pud) { if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) - pud_val(*pud) = _REGION3_ENTRY_EMPTY; + set_pud(pud, __pud(_REGION3_ENTRY_EMPTY)); } static inline void pmd_clear(pmd_t *pmdp) { - pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; + set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); } static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_val(*ptep) = _PAGE_INVALID; + set_pte(ptep, __pte(_PAGE_INVALID)); } /* @@ -1169,7 +1169,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, if (full) { res = *ptep; - *ptep = __pte(_PAGE_INVALID); + set_pte(ptep, __pte(_PAGE_INVALID)); } else { res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID)); } @@ -1257,7 +1257,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, if (mm_has_pgste(mm)) ptep_set_pte_at(mm, addr, ptep, entry); else - *ptep = entry; + set_pte(ptep, entry); } /* @@ -1641,7 +1641,7 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, { if (!MACHINE_HAS_NX) pmd_val(entry) &= ~_SEGMENT_ENTRY_NOEXEC; - *pmdp = entry; + set_pmd(pmdp, entry); } static inline pmd_t pmd_mkhuge(pmd_t pmd) @@ -1666,7 +1666,7 @@ static inline pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma, { if (full) { pmd_t pmd = *pmdp; - *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); + set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); return pmd; } return pmdp_xchg_lazy(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index dfee0ebb2fac..8cdc77fee41c 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -985,7 +985,7 @@ static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr, } if (bits & GMAP_NOTIFY_MPROT) - pmd_val(*pmdp) |= _SEGMENT_ENTRY_GMAP_IN; + set_pmd(pmdp, set_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN))); /* Shadow GMAP protection needs split PMDs */ if (bits & GMAP_NOTIFY_SHADOW) @@ -1151,7 +1151,7 @@ int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val) address = pte_val(pte) & PAGE_MASK; address += gaddr & ~PAGE_MASK; *val = *(unsigned long *) address; - pte_val(*ptep) |= _PAGE_YOUNG; + set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_YOUNG))); /* Do *NOT* clear the _PAGE_INVALID bit! */ rc = 0; } @@ -2275,7 +2275,7 @@ EXPORT_SYMBOL_GPL(ptep_notify); static void pmdp_notify_gmap(struct gmap *gmap, pmd_t *pmdp, unsigned long gaddr) { - pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_IN; + set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN))); gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1); } @@ -2302,7 +2302,7 @@ static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new, __pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL); else __pmdp_csp(pmdp); - *pmdp = new; + set_pmd(pmdp, new); } static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr, @@ -2324,7 +2324,7 @@ static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr, _SEGMENT_ENTRY_GMAP_UC)); if (purge) __pmdp_csp(pmdp); - pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; + set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); } spin_unlock(&gmap->guest_table_lock); } @@ -2447,7 +2447,7 @@ static bool gmap_test_and_clear_dirty_pmd(struct gmap *gmap, pmd_t *pmdp, return false; /* Clear UC indication and reset protection */ - pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_UC; + set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_UC))); gmap_protect_pmd(gmap, gaddr, pmdp, PROT_READ, 0); return true; } diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 082793d497ec..082b72d29bb5 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -168,7 +168,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, rste |= _SEGMENT_ENTRY_LARGE; clear_huge_pte_skeys(mm, rste); - pte_val(*ptep) = rste; + set_pte(ptep, __pte(rste)); } pte_t huge_ptep_get(pte_t *ptep) diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index 483b9dbe0970..9f988d4582ed 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -175,7 +175,7 @@ static void __init kasan_early_pgtable_populate(unsigned long address, page = kasan_early_alloc_segment(); memset(page, 0, _SEGMENT_SIZE); } - pmd_val(*pm_dir) = __pa(page) | sgt_prot; + set_pmd(pm_dir, __pmd(__pa(page) | sgt_prot)); address = (address + PMD_SIZE) & PMD_MASK; continue; } @@ -194,16 +194,16 @@ static void __init kasan_early_pgtable_populate(unsigned long address, switch (mode) { case POPULATE_ONE2ONE: page = (void *)address; - pte_val(*pt_dir) = __pa(page) | pgt_prot; + set_pte(pt_dir, __pte(__pa(page) | pgt_prot)); break; case POPULATE_MAP: page = kasan_early_alloc_pages(0); memset(page, 0, PAGE_SIZE); - pte_val(*pt_dir) = __pa(page) | pgt_prot; + set_pte(pt_dir, __pte(__pa(page) | pgt_prot)); break; case POPULATE_ZERO_SHADOW: page = kasan_early_shadow_page; - pte_val(*pt_dir) = __pa(page) | pgt_prot_zero; + set_pte(pt_dir, __pte(__pa(page) | pgt_prot_zero)); break; case POPULATE_SHALLOW: /* should never happen */ diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 654019181a37..2959bfa0eb54 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -127,7 +127,7 @@ static int split_pmd_page(pmd_t *pmdp, unsigned long addr) prot &= ~_PAGE_NOEXEC; ptep = pt_dir; for (i = 0; i < PTRS_PER_PTE; i++) { - pte_val(*ptep) = pte_addr | prot; + set_pte(ptep, __pte(pte_addr | prot)); pte_addr += PAGE_SIZE; ptep++; } @@ -208,7 +208,7 @@ static int split_pud_page(pud_t *pudp, unsigned long addr) prot &= ~_SEGMENT_ENTRY_NOEXEC; pmdp = pm_dir; for (i = 0; i < PTRS_PER_PMD; i++) { - pmd_val(*pmdp) = pmd_addr | prot; + set_pmd(pmdp, __pmd(pmd_addr | prot)); pmd_addr += PMD_SIZE; pmdp++; } @@ -347,23 +347,24 @@ static void ipte_range(pte_t *pte, unsigned long address, int nr) void __kernel_map_pages(struct page *page, int numpages, int enable) { unsigned long address; + pte_t *ptep, pte; int nr, i, j; - pte_t *pte; for (i = 0; i < numpages;) { address = (unsigned long)page_to_virt(page + i); - pte = virt_to_kpte(address); - nr = (unsigned long)pte >> ilog2(sizeof(long)); + ptep = virt_to_kpte(address); + nr = (unsigned long)ptep >> ilog2(sizeof(long)); nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1)); nr = min(numpages - i, nr); if (enable) { for (j = 0; j < nr; j++) { - pte_val(*pte) &= ~_PAGE_INVALID; + pte = clear_pte_bit(*ptep, __pgprot(_PAGE_INVALID)); + set_pte(ptep, pte); address += PAGE_SIZE; - pte++; + ptep++; } } else { - ipte_range(pte, address, nr); + ipte_range(ptep, address, nr); } i += nr; } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index c16232cd0ec5..26f8a53f0ae1 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -115,7 +115,7 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm, atomic_inc(&mm->context.flush_count); if (cpumask_equal(&mm->context.cpu_attach_mask, cpumask_of(smp_processor_id()))) { - pte_val(*ptep) |= _PAGE_INVALID; + set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_INVALID))); mm->context.flush_mm = 1; } else ptep_ipte_global(mm, addr, ptep, nodat); @@ -232,7 +232,7 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) pgste_val(pgste) |= PGSTE_UC_BIT; } #endif - *ptep = entry; + set_pte(ptep, entry); return pgste; } @@ -280,7 +280,7 @@ static inline pte_t ptep_xchg_commit(struct mm_struct *mm, pgste = pgste_set_pte(ptep, pgste, new); pgste_set_unlock(ptep, pgste); } else { - *ptep = new; + set_pte(ptep, new); } return old; } @@ -352,7 +352,7 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pgste = pgste_set_pte(ptep, pgste, pte); pgste_set_unlock(ptep, pgste); } else { - *ptep = pte; + set_pte(ptep, pte); } preempt_enable(); } @@ -417,7 +417,7 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, atomic_inc(&mm->context.flush_count); if (cpumask_equal(&mm->context.cpu_attach_mask, cpumask_of(smp_processor_id()))) { - pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; + set_pmd(pmdp, set_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_INVALID))); mm->context.flush_mm = 1; if (mm_has_pgste(mm)) gmap_pmdp_invalidate(mm, addr); @@ -469,7 +469,7 @@ pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr, preempt_disable(); old = pmdp_flush_direct(mm, addr, pmdp); - *pmdp = new; + set_pmd(pmdp, new); preempt_enable(); return old; } @@ -482,7 +482,7 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, preempt_disable(); old = pmdp_flush_lazy(mm, addr, pmdp); - *pmdp = new; + set_pmd(pmdp, new); preempt_enable(); return old; } @@ -539,7 +539,7 @@ pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr, preempt_disable(); old = pudp_flush_direct(mm, addr, pudp); - *pudp = new; + set_pud(pudp, new); preempt_enable(); return old; } @@ -579,9 +579,9 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) list_del(lh); } ptep = (pte_t *) pgtable; - pte_val(*ptep) = _PAGE_INVALID; + set_pte(ptep, __pte(_PAGE_INVALID)); ptep++; - pte_val(*ptep) = _PAGE_INVALID; + set_pte(ptep, __pte(_PAGE_INVALID)); return pgtable; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -776,7 +776,7 @@ bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr, pte_val(pte) |= _PAGE_PROTECT; else pte_val(pte) |= _PAGE_INVALID; - *ptep = pte; + set_pte(ptep, pte); } pgste_set_unlock(ptep, pgste); return dirty; diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 5410775639c5..72c525ce7221 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -174,9 +174,9 @@ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr, if (!new_page) goto out; - pte_val(*pte) = __pa(new_page) | prot; + set_pte(pte, __pte(__pa(new_page) | prot)); } else { - pte_val(*pte) = __pa(addr) | prot; + set_pte(pte, __pte(__pa(addr) | prot)); } } else { continue; @@ -242,7 +242,7 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, IS_ALIGNED(next, PMD_SIZE) && MACHINE_HAS_EDAT1 && addr && direct && !debug_pagealloc_enabled()) { - pmd_val(*pmd) = __pa(addr) | prot; + set_pmd(pmd, __pmd(__pa(addr) | prot)); pages++; continue; } else if (!direct && MACHINE_HAS_EDAT1) { @@ -257,7 +257,7 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, */ new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE); if (new_page) { - pmd_val(*pmd) = __pa(new_page) | prot; + set_pmd(pmd, __pmd(__pa(new_page) | prot)); if (!IS_ALIGNED(addr, PMD_SIZE) || !IS_ALIGNED(next, PMD_SIZE)) { vmemmap_use_new_sub_pmd(addr, next); @@ -338,7 +338,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, IS_ALIGNED(next, PUD_SIZE) && MACHINE_HAS_EDAT2 && addr && direct && !debug_pagealloc_enabled()) { - pud_val(*pud) = __pa(addr) | prot; + set_pud(pud, __pud(__pa(addr) | prot)); pages++; continue; } From 4a366f519a3c8883a6bc24efa710bc0e75bc3558 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 21 Feb 2022 21:24:01 +0100 Subject: [PATCH 193/380] s390/mm,pgtable: don't use pte_val()/pXd_val() as lvalue Convert pgtable code so pte_val()/pXd_val() aren't used as lvalue anymore. This allows in later step to convert pte_val()/pXd_val() to functions, which in turn makes it impossible to use these macros to modify page table entries like they have been used before. Therefore a construct like this: pte_val(*pte) = __pa(addr) | prot; which would directly write into a page table, isn't possible anymore with the last step of this series. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/pgtable.h | 144 +++++++++++++++----------------- arch/s390/mm/pgtable.c | 22 ++--- 2 files changed, 77 insertions(+), 89 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 2ea58513da97..a3f26e3aa8b5 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -834,15 +834,13 @@ static inline int pte_soft_dirty(pte_t pte) static inline pte_t pte_mksoft_dirty(pte_t pte) { - pte_val(pte) |= _PAGE_SOFT_DIRTY; - return pte; + return set_pte_bit(pte, __pgprot(_PAGE_SOFT_DIRTY)); } #define pte_swp_mksoft_dirty pte_mksoft_dirty static inline pte_t pte_clear_soft_dirty(pte_t pte) { - pte_val(pte) &= ~_PAGE_SOFT_DIRTY; - return pte; + return clear_pte_bit(pte, __pgprot(_PAGE_SOFT_DIRTY)); } #define pte_swp_clear_soft_dirty pte_clear_soft_dirty @@ -853,14 +851,12 @@ static inline int pmd_soft_dirty(pmd_t pmd) static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) { - pmd_val(pmd) |= _SEGMENT_ENTRY_SOFT_DIRTY; - return pmd; + return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_SOFT_DIRTY)); } static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) { - pmd_val(pmd) &= ~_SEGMENT_ENTRY_SOFT_DIRTY; - return pmd; + return clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_SOFT_DIRTY)); } /* @@ -970,79 +966,74 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt */ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - pte_val(pte) &= _PAGE_CHG_MASK; - pte_val(pte) |= pgprot_val(newprot); + pte = clear_pte_bit(pte, __pgprot(~_PAGE_CHG_MASK)); + pte = set_pte_bit(pte, newprot); /* * newprot for PAGE_NONE, PAGE_RO, PAGE_RX, PAGE_RW and PAGE_RWX * has the invalid bit set, clear it again for readable, young pages */ if ((pte_val(pte) & _PAGE_YOUNG) && (pte_val(pte) & _PAGE_READ)) - pte_val(pte) &= ~_PAGE_INVALID; + pte = clear_pte_bit(pte, __pgprot(_PAGE_INVALID)); /* * newprot for PAGE_RO, PAGE_RX, PAGE_RW and PAGE_RWX has the page * protection bit set, clear it again for writable, dirty pages */ if ((pte_val(pte) & _PAGE_DIRTY) && (pte_val(pte) & _PAGE_WRITE)) - pte_val(pte) &= ~_PAGE_PROTECT; + pte = clear_pte_bit(pte, __pgprot(_PAGE_PROTECT)); return pte; } static inline pte_t pte_wrprotect(pte_t pte) { - pte_val(pte) &= ~_PAGE_WRITE; - pte_val(pte) |= _PAGE_PROTECT; - return pte; + pte = clear_pte_bit(pte, __pgprot(_PAGE_WRITE)); + return set_pte_bit(pte, __pgprot(_PAGE_PROTECT)); } static inline pte_t pte_mkwrite(pte_t pte) { - pte_val(pte) |= _PAGE_WRITE; + pte = set_pte_bit(pte, __pgprot(_PAGE_WRITE)); if (pte_val(pte) & _PAGE_DIRTY) - pte_val(pte) &= ~_PAGE_PROTECT; + pte = clear_pte_bit(pte, __pgprot(_PAGE_PROTECT)); return pte; } static inline pte_t pte_mkclean(pte_t pte) { - pte_val(pte) &= ~_PAGE_DIRTY; - pte_val(pte) |= _PAGE_PROTECT; - return pte; + pte = clear_pte_bit(pte, __pgprot(_PAGE_DIRTY)); + return set_pte_bit(pte, __pgprot(_PAGE_PROTECT)); } static inline pte_t pte_mkdirty(pte_t pte) { - pte_val(pte) |= _PAGE_DIRTY | _PAGE_SOFT_DIRTY; + pte = set_pte_bit(pte, __pgprot(_PAGE_DIRTY | _PAGE_SOFT_DIRTY)); if (pte_val(pte) & _PAGE_WRITE) - pte_val(pte) &= ~_PAGE_PROTECT; + pte = clear_pte_bit(pte, __pgprot(_PAGE_PROTECT)); return pte; } static inline pte_t pte_mkold(pte_t pte) { - pte_val(pte) &= ~_PAGE_YOUNG; - pte_val(pte) |= _PAGE_INVALID; - return pte; + pte = clear_pte_bit(pte, __pgprot(_PAGE_YOUNG)); + return set_pte_bit(pte, __pgprot(_PAGE_INVALID)); } static inline pte_t pte_mkyoung(pte_t pte) { - pte_val(pte) |= _PAGE_YOUNG; + pte = set_pte_bit(pte, __pgprot(_PAGE_YOUNG)); if (pte_val(pte) & _PAGE_READ) - pte_val(pte) &= ~_PAGE_INVALID; + pte = clear_pte_bit(pte, __pgprot(_PAGE_INVALID)); return pte; } static inline pte_t pte_mkspecial(pte_t pte) { - pte_val(pte) |= _PAGE_SPECIAL; - return pte; + return set_pte_bit(pte, __pgprot(_PAGE_SPECIAL)); } #ifdef CONFIG_HUGETLB_PAGE static inline pte_t pte_mkhuge(pte_t pte) { - pte_val(pte) |= _PAGE_LARGE; - return pte; + return set_pte_bit(pte, __pgprot(_PAGE_LARGE)); } #endif @@ -1253,7 +1244,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { if (pte_present(entry)) - pte_val(entry) &= ~_PAGE_UNUSED; + entry = clear_pte_bit(entry, __pgprot(_PAGE_UNUSED)); if (mm_has_pgste(mm)) ptep_set_pte_at(mm, addr, ptep, entry); else @@ -1268,9 +1259,9 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) { pte_t __pte; - pte_val(__pte) = physpage | pgprot_val(pgprot); + __pte = __pte(physpage | pgprot_val(pgprot)); if (!MACHINE_HAS_NX) - pte_val(__pte) &= ~_PAGE_NOEXEC; + __pte = clear_pte_bit(__pte, __pgprot(_PAGE_NOEXEC)); return pte_mkyoung(__pte); } @@ -1410,61 +1401,57 @@ static inline bool gup_fast_permitted(unsigned long start, unsigned long end) static inline pmd_t pmd_wrprotect(pmd_t pmd) { - pmd_val(pmd) &= ~_SEGMENT_ENTRY_WRITE; - pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; - return pmd; + pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_WRITE)); + return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT)); } static inline pmd_t pmd_mkwrite(pmd_t pmd) { - pmd_val(pmd) |= _SEGMENT_ENTRY_WRITE; + pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_WRITE)); if (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) - pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; + pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT)); return pmd; } static inline pmd_t pmd_mkclean(pmd_t pmd) { - pmd_val(pmd) &= ~_SEGMENT_ENTRY_DIRTY; - pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; - return pmd; + pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_DIRTY)); + return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT)); } static inline pmd_t pmd_mkdirty(pmd_t pmd) { - pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_SOFT_DIRTY; + pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_SOFT_DIRTY)); if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) - pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; + pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT)); return pmd; } static inline pud_t pud_wrprotect(pud_t pud) { - pud_val(pud) &= ~_REGION3_ENTRY_WRITE; - pud_val(pud) |= _REGION_ENTRY_PROTECT; - return pud; + pud = clear_pud_bit(pud, __pgprot(_REGION3_ENTRY_WRITE)); + return set_pud_bit(pud, __pgprot(_REGION_ENTRY_PROTECT)); } static inline pud_t pud_mkwrite(pud_t pud) { - pud_val(pud) |= _REGION3_ENTRY_WRITE; + pud = set_pud_bit(pud, __pgprot(_REGION3_ENTRY_WRITE)); if (pud_val(pud) & _REGION3_ENTRY_DIRTY) - pud_val(pud) &= ~_REGION_ENTRY_PROTECT; + pud = clear_pud_bit(pud, __pgprot(_REGION_ENTRY_PROTECT)); return pud; } static inline pud_t pud_mkclean(pud_t pud) { - pud_val(pud) &= ~_REGION3_ENTRY_DIRTY; - pud_val(pud) |= _REGION_ENTRY_PROTECT; - return pud; + pud = clear_pud_bit(pud, __pgprot(_REGION3_ENTRY_DIRTY)); + return set_pud_bit(pud, __pgprot(_REGION_ENTRY_PROTECT)); } static inline pud_t pud_mkdirty(pud_t pud) { - pud_val(pud) |= _REGION3_ENTRY_DIRTY | _REGION3_ENTRY_SOFT_DIRTY; + pud = set_pud_bit(pud, __pgprot(_REGION3_ENTRY_DIRTY | _REGION3_ENTRY_SOFT_DIRTY)); if (pud_val(pud) & _REGION3_ENTRY_WRITE) - pud_val(pud) &= ~_REGION_ENTRY_PROTECT; + pud = clear_pud_bit(pud, __pgprot(_REGION_ENTRY_PROTECT)); return pud; } @@ -1488,37 +1475,39 @@ static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot) static inline pmd_t pmd_mkyoung(pmd_t pmd) { - pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG; + pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_YOUNG)); if (pmd_val(pmd) & _SEGMENT_ENTRY_READ) - pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID; + pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_INVALID)); return pmd; } static inline pmd_t pmd_mkold(pmd_t pmd) { - pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG; - pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; - return pmd; + pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_YOUNG)); + return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_INVALID)); } static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { - pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE | - _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG | - _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SOFT_DIRTY; - pmd_val(pmd) |= massage_pgprot_pmd(newprot); + unsigned long mask; + + mask = _SEGMENT_ENTRY_ORIGIN_LARGE; + mask |= _SEGMENT_ENTRY_DIRTY; + mask |= _SEGMENT_ENTRY_YOUNG; + mask |= _SEGMENT_ENTRY_LARGE; + mask |= _SEGMENT_ENTRY_SOFT_DIRTY; + pmd = __pmd(pmd_val(pmd) & mask); + pmd = set_pmd_bit(pmd, __pgprot(massage_pgprot_pmd(newprot))); if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY)) - pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; + pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT)); if (!(pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG)) - pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; + pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_INVALID)); return pmd; } static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot) { - pmd_t __pmd; - pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot); - return __pmd; + return __pmd(physpage + massage_pgprot_pmd(pgprot)); } #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */ @@ -1640,16 +1629,15 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t entry) { if (!MACHINE_HAS_NX) - pmd_val(entry) &= ~_SEGMENT_ENTRY_NOEXEC; + entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC)); set_pmd(pmdp, entry); } static inline pmd_t pmd_mkhuge(pmd_t pmd) { - pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; - pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG; - pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; - return pmd; + pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_LARGE)); + pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_YOUNG)); + return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT)); } #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR @@ -1745,12 +1733,12 @@ static inline int has_transparent_hugepage(void) static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) { - pte_t pte; + unsigned long pteval; - pte_val(pte) = _PAGE_INVALID | _PAGE_PROTECT; - pte_val(pte) |= (offset & __SWP_OFFSET_MASK) << __SWP_OFFSET_SHIFT; - pte_val(pte) |= (type & __SWP_TYPE_MASK) << __SWP_TYPE_SHIFT; - return pte; + pteval = _PAGE_INVALID | _PAGE_PROTECT; + pteval |= (offset & __SWP_OFFSET_MASK) << __SWP_OFFSET_SHIFT; + pteval |= (type & __SWP_TYPE_MASK) << __SWP_TYPE_SHIFT; + return __pte(pteval); } static inline unsigned long __swp_type(swp_entry_t entry) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 26f8a53f0ae1..697df02362af 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -224,8 +224,8 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) * Without enhanced suppression-on-protection force * the dirty bit on for all writable ptes. */ - pte_val(entry) |= _PAGE_DIRTY; - pte_val(entry) &= ~_PAGE_PROTECT; + entry = set_pte_bit(entry, __pgprot(_PAGE_DIRTY)); + entry = clear_pte_bit(entry, __pgprot(_PAGE_PROTECT)); } if (!(pte_val(entry) & _PAGE_PROTECT)) /* This pte allows write access, set user-dirty */ @@ -275,7 +275,7 @@ static inline pte_t ptep_xchg_commit(struct mm_struct *mm, pgste = pgste_update_all(old, pgste, mm); if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) - pte_val(old) |= _PAGE_UNUSED; + old = set_pte_bit(old, __pgprot(_PAGE_UNUSED)); } pgste = pgste_set_pte(ptep, pgste, new); pgste_set_unlock(ptep, pgste); @@ -345,7 +345,7 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, struct mm_struct *mm = vma->vm_mm; if (!MACHINE_HAS_NX) - pte_val(pte) &= ~_PAGE_NOEXEC; + pte = clear_pte_bit(pte, __pgprot(_PAGE_NOEXEC)); if (mm_has_pgste(mm)) { pgste = pgste_get(ptep); pgste_set_key(ptep, pgste, pte, mm); @@ -646,12 +646,12 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr, if (prot == PROT_NONE && !pte_i) { ptep_flush_direct(mm, addr, ptep, nodat); pgste = pgste_update_all(entry, pgste, mm); - pte_val(entry) |= _PAGE_INVALID; + entry = set_pte_bit(entry, __pgprot(_PAGE_INVALID)); } if (prot == PROT_READ && !pte_p) { ptep_flush_direct(mm, addr, ptep, nodat); - pte_val(entry) &= ~_PAGE_INVALID; - pte_val(entry) |= _PAGE_PROTECT; + entry = clear_pte_bit(entry, __pgprot(_PAGE_INVALID)); + entry = set_pte_bit(entry, __pgprot(_PAGE_PROTECT)); } pgste_val(pgste) |= bit; pgste = pgste_set_pte(ptep, pgste, entry); @@ -675,8 +675,8 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, !(pte_val(pte) & _PAGE_PROTECT))) { pgste_val(spgste) |= PGSTE_VSIE_BIT; tpgste = pgste_get_lock(tptep); - pte_val(tpte) = (pte_val(spte) & PAGE_MASK) | - (pte_val(pte) & _PAGE_PROTECT); + tpte = __pte((pte_val(spte) & PAGE_MASK) | + (pte_val(pte) & _PAGE_PROTECT)); /* don't touch the storage key - it belongs to parent pgste */ tpgste = pgste_set_pte(tptep, tpgste, tpte); pgste_set_unlock(tptep, tpgste); @@ -773,9 +773,9 @@ bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr, nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); ptep_ipte_global(mm, addr, ptep, nodat); if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) - pte_val(pte) |= _PAGE_PROTECT; + pte = set_pte_bit(pte, __pgprot(_PAGE_PROTECT)); else - pte_val(pte) |= _PAGE_INVALID; + pte = set_pte_bit(pte, __pgprot(_PAGE_INVALID)); set_pte(ptep, pte); } pgste_set_unlock(ptep, pgste); From 869a9dbc10d25c5544169209fd4284f1f570c233 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 21 Feb 2022 21:24:35 +0100 Subject: [PATCH 194/380] s390/mm,pageattr: don't use pte_val()/pXd_val() as lvalue Convert pgtable code so pte_val()/pXd_val() aren't used as lvalue anymore. This allows in later step to convert pte_val()/pXd_val() to functions, which in turn makes it impossible to use these macros to modify page table entries like they have been used before. Therefore a construct like this: pte_val(*pte) = __pa(addr) | prot; which would directly write into a page table, isn't possible anymore with the last step of this series. Reviewed-by: Claudio Imbrenda Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/pageattr.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 2959bfa0eb54..85195c18b2e8 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -98,9 +98,9 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end, else if (flags & SET_MEMORY_RW) new = pte_mkwrite(pte_mkdirty(new)); if (flags & SET_MEMORY_NX) - pte_val(new) |= _PAGE_NOEXEC; + new = set_pte_bit(new, __pgprot(_PAGE_NOEXEC)); else if (flags & SET_MEMORY_X) - pte_val(new) &= ~_PAGE_NOEXEC; + new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC)); pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE); ptep++; addr += PAGE_SIZE; @@ -131,7 +131,7 @@ static int split_pmd_page(pmd_t *pmdp, unsigned long addr) pte_addr += PAGE_SIZE; ptep++; } - pmd_val(new) = __pa(pt_dir) | _SEGMENT_ENTRY; + new = __pmd(__pa(pt_dir) | _SEGMENT_ENTRY); pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT); update_page_count(PG_DIRECT_MAP_4K, PTRS_PER_PTE); update_page_count(PG_DIRECT_MAP_1M, -1); @@ -148,9 +148,9 @@ static void modify_pmd_page(pmd_t *pmdp, unsigned long addr, else if (flags & SET_MEMORY_RW) new = pmd_mkwrite(pmd_mkdirty(new)); if (flags & SET_MEMORY_NX) - pmd_val(new) |= _SEGMENT_ENTRY_NOEXEC; + new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC)); else if (flags & SET_MEMORY_X) - pmd_val(new) &= ~_SEGMENT_ENTRY_NOEXEC; + new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC)); pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT); } @@ -212,7 +212,7 @@ static int split_pud_page(pud_t *pudp, unsigned long addr) pmd_addr += PMD_SIZE; pmdp++; } - pud_val(new) = __pa(pm_dir) | _REGION3_ENTRY; + new = __pud(__pa(pm_dir) | _REGION3_ENTRY); pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); update_page_count(PG_DIRECT_MAP_1M, PTRS_PER_PMD); update_page_count(PG_DIRECT_MAP_2G, -1); @@ -229,9 +229,9 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr, else if (flags & SET_MEMORY_RW) new = pud_mkwrite(pud_mkdirty(new)); if (flags & SET_MEMORY_NX) - pud_val(new) |= _REGION_ENTRY_NOEXEC; + new = set_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC)); else if (flags & SET_MEMORY_X) - pud_val(new) &= ~_REGION_ENTRY_NOEXEC; + new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC)); pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); } From 933b7253adf61eb4b7be2e7b1ec64217a5d8434c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 21 Feb 2022 21:24:51 +0100 Subject: [PATCH 195/380] s390/mm,hugetlb: don't use pte_val()/pXd_val() as lvalue Convert pgtable code so pte_val()/pXd_val() aren't used as lvalue anymore. This allows in later step to convert pte_val()/pXd_val() to functions, which in turn makes it impossible to use these macros to modify page table entries like they have been used before. Therefore a construct like this: pte_val(*pte) = __pa(addr) | prot; which would directly write into a page table, isn't possible anymore with the last step of this series. Reviewed-by: Claudio Imbrenda Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/hugetlbpage.c | 34 +++++++++++++--------------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 082b72d29bb5..10e51ef9c79a 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -73,8 +73,8 @@ static inline unsigned long __pte_to_rste(pte_t pte) static inline pte_t __rste_to_pte(unsigned long rste) { + unsigned long pteval; int present; - pte_t pte; if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) present = pud_present(__pud(rste)); @@ -102,29 +102,21 @@ static inline pte_t __rste_to_pte(unsigned long rste) * u unused, l large */ if (present) { - pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE; - pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT; - pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_READ, - _PAGE_READ); - pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_WRITE, - _PAGE_WRITE); - pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_INVALID, - _PAGE_INVALID); - pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_PROTECT, - _PAGE_PROTECT); - pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_DIRTY, - _PAGE_DIRTY); - pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_YOUNG, - _PAGE_YOUNG); + pteval = rste & _SEGMENT_ENTRY_ORIGIN_LARGE; + pteval |= _PAGE_LARGE | _PAGE_PRESENT; + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_READ, _PAGE_READ); + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_WRITE, _PAGE_WRITE); + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_INVALID, _PAGE_INVALID); + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_PROTECT, _PAGE_PROTECT); + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_DIRTY, _PAGE_DIRTY); + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_YOUNG, _PAGE_YOUNG); #ifdef CONFIG_MEM_SOFT_DIRTY - pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, - _PAGE_SOFT_DIRTY); + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, _PAGE_SOFT_DIRTY); #endif - pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, - _PAGE_NOEXEC); + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, _PAGE_NOEXEC); } else - pte_val(pte) = _PAGE_INVALID; - return pte; + pteval = _PAGE_INVALID; + return __pte(pteval); } static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste) From e1fc74ff23463f308709b824eed0d73e0a345f4e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 21 Feb 2022 21:25:09 +0100 Subject: [PATCH 196/380] s390/mm,gmap: don't use pte_val()/pXd_val() as lvalue Convert pgtable code so pte_val()/pXd_val() aren't used as lvalue anymore. This allows in later step to convert pte_val()/pXd_val() to functions, which in turn makes it impossible to use these macros to modify page table entries like they have been used before. Therefore a construct like this: pte_val(*pte) = __pa(addr) | prot; which would directly write into a page table, isn't possible anymore with the last step of this series. Reviewed-by: Claudio Imbrenda Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/gmap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 8cdc77fee41c..a57224a4c141 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -974,13 +974,13 @@ static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr, return -EAGAIN; if (prot == PROT_NONE && !pmd_i) { - pmd_val(new) |= _SEGMENT_ENTRY_INVALID; + new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID)); gmap_pmdp_xchg(gmap, pmdp, new, gaddr); } if (prot == PROT_READ && !pmd_p) { - pmd_val(new) &= ~_SEGMENT_ENTRY_INVALID; - pmd_val(new) |= _SEGMENT_ENTRY_PROTECT; + new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID)); + new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_PROTECT)); gmap_pmdp_xchg(gmap, pmdp, new, gaddr); } @@ -2294,7 +2294,7 @@ static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new, { gaddr &= HPAGE_MASK; pmdp_notify_gmap(gmap, pmdp, gaddr); - pmd_val(new) &= ~_SEGMENT_ENTRY_GMAP_IN; + new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_GMAP_IN)); if (MACHINE_HAS_TLB_GUEST) __pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_GLOBAL); From 966ffbd8615a5c1b0bf474314bbd9ea8eb6ef423 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 21 Feb 2022 21:25:19 +0100 Subject: [PATCH 197/380] s390/mm: convert pte_val()/pXd_val() into functions Disallow constructs like this: pte_val(*pte) = __pa(addr) | prot; which would directly write into a page table. Users are supposed to use the set_pte()/set_pXd() primitives, which guarantee block concurrent (aka atomic) writes. Reviewed-by: Claudio Imbrenda Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/page.h | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index d98d17a36c7b..5a831a6b8fc9 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -90,11 +90,31 @@ typedef pte_t *pgtable_t; #define pgprot_val(x) ((x).pgprot) #define pgste_val(x) ((x).pgste) -#define pte_val(x) ((x).pte) -#define pmd_val(x) ((x).pmd) -#define pud_val(x) ((x).pud) -#define p4d_val(x) ((x).p4d) -#define pgd_val(x) ((x).pgd) + +static inline unsigned long pte_val(pte_t pte) +{ + return pte.pte; +} + +static inline unsigned long pmd_val(pmd_t pmd) +{ + return pmd.pmd; +} + +static inline unsigned long pud_val(pud_t pud) +{ + return pud.pud; +} + +static inline unsigned long p4d_val(p4d_t p4d) +{ + return p4d.p4d; +} + +static inline unsigned long pgd_val(pgd_t pgd) +{ + return pgd.pgd; +} #define __pgste(x) ((pgste_t) { (x) } ) #define __pte(x) ((pte_t) { (x) } ) From e40b38a41ce916d6a3a4751d59a01b6c0c03afd0 Mon Sep 17 00:00:00 2001 From: Nour-eddine Taleb Date: Thu, 3 Mar 2022 15:34:16 +0100 Subject: [PATCH 198/380] KVM: PPC: Book3S HV: remove unnecessary casts Remove unnecessary casts, from "void *" to "struct kvmppc_xics *" Signed-off-by: Nour-eddine Taleb Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220303143416.201851-1-kernel.noureddine@gmail.com --- arch/powerpc/kvm/book3s_xics.c | 2 +- arch/powerpc/kvm/book3s_xive.c | 2 +- arch/powerpc/kvm/book3s_xive_native.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 306c85e70eea..ab6d37d78c62 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -1431,7 +1431,7 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type) static void kvmppc_xics_init(struct kvm_device *dev) { - struct kvmppc_xics *xics = (struct kvmppc_xics *)dev->private; + struct kvmppc_xics *xics = dev->private; xics_debugfs_init(xics); } diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 37a56cbb1701..c0ce5531d9bc 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -2362,7 +2362,7 @@ static void xive_debugfs_init(struct kvmppc_xive *xive) static void kvmppc_xive_init(struct kvm_device *dev) { - struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private; + struct kvmppc_xive *xive = dev->private; /* Register some debug interfaces */ xive_debugfs_init(xive); diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 3c2b128e5f0f..f81ba6f84e72 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -1267,7 +1267,7 @@ static void xive_native_debugfs_init(struct kvmppc_xive *xive) static void kvmppc_xive_native_init(struct kvm_device *dev) { - struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private; + struct kvmppc_xive *xive = dev->private; /* Register some debug interfaces */ xive_native_debugfs_init(xive); From 06687a03805e29dcf068a8e6436ed2a2bbd8b9e9 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Tue, 1 Mar 2022 14:26:22 +0530 Subject: [PATCH 199/380] iommu/amd: Improve error handling for amd_iommu_init_pci Add error messages to prevent silent failure. Signed-off-by: Vasant Hegde Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220301085626.87680-2-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index b10fb52ea442..6b5af568f3d5 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1943,9 +1943,11 @@ static int __init amd_iommu_init_pci(void) for_each_iommu(iommu) { ret = iommu_init_pci(iommu); - if (ret) - break; - + if (ret) { + pr_err("IOMMU%d: Failed to initialize IOMMU Hardware (error=%d)!\n", + iommu->index, ret); + goto out; + } /* Need to setup range after PCI init */ iommu_set_cwwb_range(iommu); } @@ -1961,6 +1963,11 @@ static int __init amd_iommu_init_pci(void) * active. */ ret = amd_iommu_init_api(); + if (ret) { + pr_err("IOMMU: Failed to initialize IOMMU-API interface (error=%d)!\n", + ret); + goto out; + } init_device_table_dma(); @@ -1970,6 +1977,7 @@ static int __init amd_iommu_init_pci(void) if (!ret) print_iommu_info(); +out: return ret; } From 5b61343b50590fb04a3f6be2cdc4868091757262 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 3 Mar 2022 14:40:08 +0000 Subject: [PATCH 200/380] iommu/iova: Improve 32-bit free space estimate For various reasons based on the allocator behaviour and typical use-cases at the time, when the max32_alloc_size optimisation was introduced it seemed reasonable to couple the reset of the tracked size to the update of cached32_node upon freeing a relevant IOVA. However, since subsequent optimisations focused on helping genuine 32-bit devices make best use of even more limited address spaces, it is now a lot more likely for cached32_node to be anywhere in a "full" 32-bit address space, and as such more likely for space to become available from IOVAs below that node being freed. At this point, the short-cut in __cached_rbnode_delete_update() really doesn't hold up any more, and we need to fix the logic to reliably provide the expected behaviour. We still want cached32_node to only move upwards, but we should reset the allocation size if *any* 32-bit space has become available. Reported-by: Yunfei Wang Signed-off-by: Robin Murphy Reviewed-by: Miles Chen Link: https://lore.kernel.org/r/033815732d83ca73b13c11485ac39336f15c3b40.1646318408.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 7e9c3a97c040..db77aa675145 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -94,10 +94,11 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) cached_iova = to_iova(iovad->cached32_node); if (free == cached_iova || (free->pfn_hi < iovad->dma_32bit_pfn && - free->pfn_lo >= cached_iova->pfn_lo)) { + free->pfn_lo >= cached_iova->pfn_lo)) iovad->cached32_node = rb_next(&free->node); + + if (free->pfn_lo < iovad->dma_32bit_pfn) iovad->max32_alloc_size = iovad->dma_32bit_pfn; - } cached_iova = to_iova(iovad->cached_node); if (free->pfn_lo >= cached_iova->pfn_lo) From 17224e08af73fbbc5334226ae75feecb1e037cae Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Wed, 8 Dec 2021 14:07:40 +0200 Subject: [PATCH 201/380] iommu/mediatek: Remove for_each_m4u in tlb_sync_all The tlb_sync_all is called from these three functions: a) flush_iotlb_all: it will be called for each a iommu HW. b) tlb_flush_range_sync: it already has for_each_m4u. c) in irq: When IOMMU HW translation fault, Only need flush itself. Thus, No need for_each_m4u in this tlb_sync_all. Remove it. Signed-off-by: Yong Wu Reviewed-by: Dafna Hirschfeld Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20211208120744.2415-2-dafna.hirschfeld@collabora.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 25b834104790..1356137f91f9 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -210,17 +210,15 @@ static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom) static void mtk_iommu_tlb_flush_all(struct mtk_iommu_data *data) { - for_each_m4u(data) { - if (pm_runtime_get_if_in_use(data->dev) <= 0) - continue; + if (pm_runtime_get_if_in_use(data->dev) <= 0) + return; - writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, - data->base + data->plat_data->inv_sel_reg); - writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE); - wmb(); /* Make sure the tlb flush all done */ + writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, + data->base + data->plat_data->inv_sel_reg); + writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE); + wmb(); /* Make sure the tlb flush all done */ - pm_runtime_put(data->dev); - } + pm_runtime_put(data->dev); } static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, From 4ea794452ae7220bb02ad950d77d86adcbb86b10 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Wed, 8 Dec 2021 14:07:41 +0200 Subject: [PATCH 202/380] iommu/mediatek: Always check runtime PM status in tlb flush range callback In case of v4l2_reqbufs() it is possible, that a TLB flush is done without runtime PM being enabled. In that case the "Partial TLB flush timed out, falling back to full flush" warning is printed. Commit c0b57581b73b ("iommu/mediatek: Add power-domain operation") introduced has_pm as optimization to avoid checking runtime PM when there is no power domain attached. But without the PM domain there is still the device driver's runtime PM suspend handler, which disables the clock. Thus flushing should also be avoided when there is no PM domain involved. Signed-off-by: Sebastian Reichel Reviewed-by: Dafna Hirschfeld Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Yong Wu Link: https://lore.kernel.org/r/20211208120744.2415-3-dafna.hirschfeld@collabora.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 1356137f91f9..bcf1f5e11fbd 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -225,16 +225,13 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, size_t granule, struct mtk_iommu_data *data) { - bool has_pm = !!data->dev->pm_domain; unsigned long flags; int ret; u32 tmp; for_each_m4u(data) { - if (has_pm) { - if (pm_runtime_get_if_in_use(data->dev) <= 0) - continue; - } + if (pm_runtime_get_if_in_use(data->dev) <= 0) + continue; spin_lock_irqsave(&data->tlb_lock, flags); writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, @@ -259,8 +256,7 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, writel_relaxed(0, data->base + REG_MMU_CPE_DONE); spin_unlock_irqrestore(&data->tlb_lock, flags); - if (has_pm) - pm_runtime_put(data->dev); + pm_runtime_put(data->dev); } } From ad5042ecbe944bd34d0bdd23f3b728a7e92c0e12 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Wed, 8 Dec 2021 14:07:42 +0200 Subject: [PATCH 203/380] iommu/mediatek: Remove the power status checking in tlb flush all To simplify the code, Remove the power status checking in the tlb_flush_all, remove this: if (pm_runtime_get_if_in_use(data->dev) <= 0) continue; The mtk_iommu_tlb_flush_all is called from a) isr b) tlb flush range fail case c) iommu_create_device_direct_mappings In first two cases, the power and clock are always enabled. In the third case tlb flush is unnecessary because in a later patch in the series a full flush from the pm_runtime_resume callback is added. In addition, writing the tlb control register when the iommu is not resumed is ok and the write is ignored. Signed-off-by: Yong Wu [refactor commit log] Signed-off-by: Dafna Hirschfeld Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20211208120744.2415-4-dafna.hirschfeld@collabora.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index bcf1f5e11fbd..7cdf2819f4b7 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -210,15 +210,10 @@ static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom) static void mtk_iommu_tlb_flush_all(struct mtk_iommu_data *data) { - if (pm_runtime_get_if_in_use(data->dev) <= 0) - return; - writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, data->base + data->plat_data->inv_sel_reg); writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE); wmb(); /* Make sure the tlb flush all done */ - - pm_runtime_put(data->dev); } static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, From 15672b6dc5d03a947e9180db1d6564f43b0c6799 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Wed, 8 Dec 2021 14:07:43 +0200 Subject: [PATCH 204/380] iommu/mediatek: Add tlb_lock in tlb_flush_all The tlb_flush_all touches the registers controlling tlb operations. Protect it with the tlb_lock spinlock. This also require the range_sync func to release that spinlock before calling tlb_flush_all. Signed-off-by: Yong Wu [refactor commit log] Signed-off-by: Dafna Hirschfeld Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20211208120744.2415-5-dafna.hirschfeld@collabora.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 7cdf2819f4b7..7cbfe3bfaaab 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -210,10 +210,14 @@ static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom) static void mtk_iommu_tlb_flush_all(struct mtk_iommu_data *data) { + unsigned long flags; + + spin_lock_irqsave(&data->tlb_lock, flags); writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, data->base + data->plat_data->inv_sel_reg); writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE); wmb(); /* Make sure the tlb flush all done */ + spin_unlock_irqrestore(&data->tlb_lock, flags); } static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, @@ -242,14 +246,16 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, /* tlb sync */ ret = readl_poll_timeout_atomic(data->base + REG_MMU_CPE_DONE, tmp, tmp != 0, 10, 1000); + + /* Clear the CPE status */ + writel_relaxed(0, data->base + REG_MMU_CPE_DONE); + spin_unlock_irqrestore(&data->tlb_lock, flags); + if (ret) { dev_warn(data->dev, "Partial TLB flush timed out, falling back to full flush\n"); mtk_iommu_tlb_flush_all(data); } - /* Clear the CPE status */ - writel_relaxed(0, data->base + REG_MMU_CPE_DONE); - spin_unlock_irqrestore(&data->tlb_lock, flags); pm_runtime_put(data->dev); } From 4f23f6d45821701c68b99c2847a075472b53198c Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Wed, 8 Dec 2021 14:07:44 +0200 Subject: [PATCH 205/380] iommu/mediatek: Always tlb_flush_all when each PM resume Prepare for 2 HWs that sharing pgtable in different power-domains. When there are 2 M4U HWs, it may has problem in the flush_range in which we get the pm_status via the m4u dev, BUT that function don't reflect the real power-domain status of the HW since there may be other HW also use that power-domain. DAM allocation is often done while the allocating device is runtime suspended. In such a case the iommu will also be suspended and partial flushing of the tlb will not be executed. Therefore, we add a tlb_flush_all in the pm_runtime_resume to make sure the tlb is always clean. In other case, the iommu's power should be active via device link with smi. Signed-off-by: Yong Wu [move the call to mtk_iommu_tlb_flush_all to the bottom of resume cb, improve doc/log] Signed-off-by: Dafna Hirschfeld Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20211208120744.2415-6-dafna.hirschfeld@collabora.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 7cbfe3bfaaab..eb43e7edb9dc 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -975,6 +975,13 @@ static int __maybe_unused mtk_iommu_runtime_resume(struct device *dev) writel_relaxed(reg->ivrp_paddr, base + REG_MMU_IVRP_PADDR); writel_relaxed(reg->vld_pa_rng, base + REG_MMU_VLD_PA_RNG); writel(m4u_dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK, base + REG_MMU_PT_BASE_ADDR); + + /* + * Users may allocate dma buffer before they call pm_runtime_get, + * in which case it will lack the necessary tlb flush. + * Thus, make sure to update the tlb after each PM resume. + */ + mtk_iommu_tlb_flush_all(data); return 0; } From 402e6688a7df482cc57be0b0dd5b443d995073fb Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 1 Mar 2022 10:01:48 +0800 Subject: [PATCH 206/380] iommu/vt-d: Remove intel_iommu::domains The "domains" field of the intel_iommu structure keeps the mapping of domain_id to dmar_domain. This information is not used anywhere. Remove and cleanup it to avoid unnecessary memory consumption. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220214025704.3184654-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20220301020159.633356-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 68 ++----------------------------------- include/linux/intel-iommu.h | 1 - 2 files changed, 3 insertions(+), 66 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index b549172e88ef..e3b04d5d87b0 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -455,36 +455,6 @@ __setup("intel_iommu=", intel_iommu_setup); static struct kmem_cache *iommu_domain_cache; static struct kmem_cache *iommu_devinfo_cache; -static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did) -{ - struct dmar_domain **domains; - int idx = did >> 8; - - domains = iommu->domains[idx]; - if (!domains) - return NULL; - - return domains[did & 0xff]; -} - -static void set_iommu_domain(struct intel_iommu *iommu, u16 did, - struct dmar_domain *domain) -{ - struct dmar_domain **domains; - int idx = did >> 8; - - if (!iommu->domains[idx]) { - size_t size = 256 * sizeof(struct dmar_domain *); - iommu->domains[idx] = kzalloc(size, GFP_ATOMIC); - } - - domains = iommu->domains[idx]; - if (WARN_ON(!domains)) - return; - else - domains[did & 0xff] = domain; -} - void *alloc_pgtable_page(int node) { struct page *page; @@ -1751,8 +1721,7 @@ static void intel_flush_iotlb_all(struct iommu_domain *domain) DMA_TLB_DSI_FLUSH); if (!cap_caching_mode(iommu->cap)) - iommu_flush_dev_iotlb(get_iommu_domain(iommu, did), - 0, MAX_AGAW_PFN_WIDTH); + iommu_flush_dev_iotlb(dmar_domain, 0, MAX_AGAW_PFN_WIDTH); } } @@ -1815,7 +1784,6 @@ static void iommu_disable_translation(struct intel_iommu *iommu) static int iommu_init_domains(struct intel_iommu *iommu) { u32 ndomains; - size_t size; ndomains = cap_ndoms(iommu->cap); pr_debug("%s: Number of Domains supported <%d>\n", @@ -1827,24 +1795,6 @@ static int iommu_init_domains(struct intel_iommu *iommu) if (!iommu->domain_ids) return -ENOMEM; - size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **); - iommu->domains = kzalloc(size, GFP_KERNEL); - - if (iommu->domains) { - size = 256 * sizeof(struct dmar_domain *); - iommu->domains[0] = kzalloc(size, GFP_KERNEL); - } - - if (!iommu->domains || !iommu->domains[0]) { - pr_err("%s: Allocating domain array failed\n", - iommu->name); - bitmap_free(iommu->domain_ids); - kfree(iommu->domains); - iommu->domain_ids = NULL; - iommu->domains = NULL; - return -ENOMEM; - } - /* * If Caching mode is set, then invalid translations are tagged * with domain-id 0, hence we need to pre-allocate it. We also @@ -1871,7 +1821,7 @@ static void disable_dmar_iommu(struct intel_iommu *iommu) struct device_domain_info *info, *tmp; unsigned long flags; - if (!iommu->domains || !iommu->domain_ids) + if (!iommu->domain_ids) return; spin_lock_irqsave(&device_domain_lock, flags); @@ -1892,15 +1842,8 @@ static void disable_dmar_iommu(struct intel_iommu *iommu) static void free_dmar_iommu(struct intel_iommu *iommu) { - if ((iommu->domains) && (iommu->domain_ids)) { - int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8; - int i; - - for (i = 0; i < elems; i++) - kfree(iommu->domains[i]); - kfree(iommu->domains); + if (iommu->domain_ids) { bitmap_free(iommu->domain_ids); - iommu->domains = NULL; iommu->domain_ids = NULL; } @@ -1978,11 +1921,8 @@ static int domain_attach_iommu(struct dmar_domain *domain, } set_bit(num, iommu->domain_ids); - set_iommu_domain(iommu, num, domain); - domain->iommu_did[iommu->seq_id] = num; domain->nid = iommu->node; - domain_update_iommu_cap(domain); } @@ -2001,8 +1941,6 @@ static void domain_detach_iommu(struct dmar_domain *domain, if (domain->iommu_refcnt[iommu->seq_id] == 0) { num = domain->iommu_did[iommu->seq_id]; clear_bit(num, iommu->domain_ids); - set_iommu_domain(iommu, num, NULL); - domain_update_iommu_cap(domain); domain->iommu_did[iommu->seq_id] = 0; } diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 5cfda90b2cca..8c7591b5f3e2 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -578,7 +578,6 @@ struct intel_iommu { #ifdef CONFIG_INTEL_IOMMU unsigned long *domain_ids; /* bitmap of domains */ - struct dmar_domain ***domains; /* ptr to domains */ spinlock_t lock; /* protect context, domain ids */ struct root_entry *root_entry; /* virtual address */ From c5d27545fb2f3810c1ee192f4f3e0822172fa474 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 1 Mar 2022 10:01:49 +0800 Subject: [PATCH 207/380] iommu/vt-d: Remove finding domain in dmar_insert_one_dev_info() The Intel IOMMU driver has already converted to use default domain framework in iommu core. There's no need to find a domain for the device in the domain attaching path. Cleanup that code. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220214025704.3184654-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20220301020159.633356-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index e3b04d5d87b0..b3075933864e 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2554,7 +2554,6 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, struct device *dev, struct dmar_domain *domain) { - struct dmar_domain *found = NULL; struct device_domain_info *info; unsigned long flags; int ret; @@ -2605,26 +2604,6 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, } spin_lock_irqsave(&device_domain_lock, flags); - if (dev) - found = find_domain(dev); - - if (!found) { - struct device_domain_info *info2; - info2 = dmar_search_domain_by_dev_info(info->segment, info->bus, - info->devfn); - if (info2) { - found = info2->domain; - info2->dev = dev; - } - } - - if (found) { - spin_unlock_irqrestore(&device_domain_lock, flags); - free_devinfo_mem(info); - /* Caller must free the original domain */ - return found; - } - spin_lock(&iommu->lock); ret = domain_attach_iommu(domain, iommu); spin_unlock(&iommu->lock); From c8850a6e6d71e2f95cfe1eae8c8dd195dd05b950 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 1 Mar 2022 10:01:50 +0800 Subject: [PATCH 208/380] iommu/vt-d: Remove iova_cache_get/put() These have been done in drivers/iommu/dma-iommu.c. Remove this duplicate code. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220214025704.3184654-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20220301020159.633356-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index b3075933864e..7c2b427bea3b 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3381,9 +3381,6 @@ static inline int iommu_devinfo_cache_init(void) static int __init iommu_init_mempool(void) { int ret; - ret = iova_cache_get(); - if (ret) - return ret; ret = iommu_domain_cache_init(); if (ret) @@ -3395,7 +3392,6 @@ static int __init iommu_init_mempool(void) kmem_cache_destroy(iommu_domain_cache); domain_error: - iova_cache_put(); return -ENOMEM; } @@ -3404,7 +3400,6 @@ static void __init iommu_exit_mempool(void) { kmem_cache_destroy(iommu_devinfo_cache); kmem_cache_destroy(iommu_domain_cache); - iova_cache_put(); } static void __init init_no_remapping_devices(void) From ee2653bbe89df777e222ec1aa015c2ed7ea06b6b Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 1 Mar 2022 10:01:51 +0800 Subject: [PATCH 209/380] iommu/vt-d: Remove domain and devinfo mempool The domain and devinfo memory blocks are only allocated during device probe and released during remove. There's no hot-path context, hence no need for memory pools. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220214025704.3184654-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20220301020159.633356-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 104 ++---------------------------------- 1 file changed, 5 insertions(+), 99 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7c2b427bea3b..51db26d8606c 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -452,9 +452,6 @@ static int __init intel_iommu_setup(char *str) } __setup("intel_iommu=", intel_iommu_setup); -static struct kmem_cache *iommu_domain_cache; -static struct kmem_cache *iommu_devinfo_cache; - void *alloc_pgtable_page(int node) { struct page *page; @@ -471,26 +468,6 @@ void free_pgtable_page(void *vaddr) free_page((unsigned long)vaddr); } -static inline void *alloc_domain_mem(void) -{ - return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC); -} - -static void free_domain_mem(void *vaddr) -{ - kmem_cache_free(iommu_domain_cache, vaddr); -} - -static inline void * alloc_devinfo_mem(void) -{ - return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC); -} - -static inline void free_devinfo_mem(void *vaddr) -{ - kmem_cache_free(iommu_devinfo_cache, vaddr); -} - static inline int domain_type_is_si(struct dmar_domain *domain) { return domain->domain.type == IOMMU_DOMAIN_IDENTITY; @@ -1885,11 +1862,10 @@ static struct dmar_domain *alloc_domain(unsigned int type) { struct dmar_domain *domain; - domain = alloc_domain_mem(); + domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (!domain) return NULL; - memset(domain, 0, sizeof(*domain)); domain->nid = NUMA_NO_NODE; if (first_level_by_default(type)) domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL; @@ -1973,7 +1949,7 @@ static void domain_exit(struct dmar_domain *domain) put_pages_list(&freelist); } - free_domain_mem(domain); + kfree(domain); } /* @@ -2558,7 +2534,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, unsigned long flags; int ret; - info = alloc_devinfo_mem(); + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return NULL; @@ -2574,13 +2550,9 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, info->segment = pci_domain_nr(pdev->bus); } - info->ats_supported = info->pasid_supported = info->pri_supported = 0; - info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0; - info->ats_qdep = 0; info->dev = dev; info->domain = domain; info->iommu = iommu; - info->pasid_table = NULL; if (dev && dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(info->dev); @@ -2610,7 +2582,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, if (ret) { spin_unlock_irqrestore(&device_domain_lock, flags); - free_devinfo_mem(info); + kfree(info); return NULL; } @@ -3343,65 +3315,6 @@ error: return ret; } -static inline int iommu_domain_cache_init(void) -{ - int ret = 0; - - iommu_domain_cache = kmem_cache_create("iommu_domain", - sizeof(struct dmar_domain), - 0, - SLAB_HWCACHE_ALIGN, - - NULL); - if (!iommu_domain_cache) { - pr_err("Couldn't create iommu_domain cache\n"); - ret = -ENOMEM; - } - - return ret; -} - -static inline int iommu_devinfo_cache_init(void) -{ - int ret = 0; - - iommu_devinfo_cache = kmem_cache_create("iommu_devinfo", - sizeof(struct device_domain_info), - 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!iommu_devinfo_cache) { - pr_err("Couldn't create devinfo cache\n"); - ret = -ENOMEM; - } - - return ret; -} - -static int __init iommu_init_mempool(void) -{ - int ret; - - ret = iommu_domain_cache_init(); - if (ret) - goto domain_error; - - ret = iommu_devinfo_cache_init(); - if (!ret) - return ret; - - kmem_cache_destroy(iommu_domain_cache); -domain_error: - - return -ENOMEM; -} - -static void __init iommu_exit_mempool(void) -{ - kmem_cache_destroy(iommu_devinfo_cache); - kmem_cache_destroy(iommu_domain_cache); -} - static void __init init_no_remapping_devices(void) { struct dmar_drhd_unit *drhd; @@ -4253,12 +4166,6 @@ int __init intel_iommu_init(void) force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) || platform_optin_force_iommu(); - if (iommu_init_mempool()) { - if (force_on) - panic("tboot: Failed to initialize iommu memory\n"); - return -ENOMEM; - } - down_write(&dmar_global_lock); if (dmar_table_init()) { if (force_on) @@ -4379,7 +4286,6 @@ int __init intel_iommu_init(void) out_free_dmar: intel_iommu_free_dmars(); up_write(&dmar_global_lock); - iommu_exit_mempool(); return ret; } @@ -4436,7 +4342,7 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) domain_detach_iommu(domain, iommu); spin_unlock_irqrestore(&iommu->lock, flags); - free_devinfo_mem(info); + kfree(info); } static void dmar_remove_one_dev_info(struct device *dev) From 586081d3f6b13ec9dfdfdf3d7842a688b376fa5e Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 1 Mar 2022 10:01:52 +0800 Subject: [PATCH 210/380] iommu/vt-d: Remove DEFER_DEVICE_DOMAIN_INFO Allocate and set the per-device iommu private data during iommu device probe. This makes the per-device iommu private data always available during iommu_probe_device() and iommu_release_device(). With this changed, the dummy DEFER_DEVICE_DOMAIN_INFO pointer could be removed. The wrappers for getting the private data and domain are also cleaned. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220214025704.3184654-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20220301020159.633356-6-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/debugfs.c | 3 +- drivers/iommu/intel/iommu.c | 186 +++++++++++++--------------------- drivers/iommu/intel/pasid.c | 12 +-- drivers/iommu/intel/svm.c | 6 +- include/linux/intel-iommu.h | 2 - 5 files changed, 79 insertions(+), 130 deletions(-) diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c index db7a0ca73626..ed796eea4581 100644 --- a/drivers/iommu/intel/debugfs.c +++ b/drivers/iommu/intel/debugfs.c @@ -344,7 +344,8 @@ static void pgtable_walk_level(struct seq_file *m, struct dma_pte *pde, static int show_device_domain_translation(struct device *dev, void *data) { - struct dmar_domain *domain = find_domain(dev); + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct dmar_domain *domain = info->domain; struct seq_file *m = data; u64 path[6] = { 0 }; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 51db26d8606c..d9965e72d9a8 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -342,21 +342,6 @@ static int iommu_skip_te_disable; int intel_iommu_gfx_mapped; EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); -#define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2)) -struct device_domain_info *get_domain_info(struct device *dev) -{ - struct device_domain_info *info; - - if (!dev) - return NULL; - - info = dev_iommu_priv_get(dev); - if (unlikely(info == DEFER_DEVICE_DOMAIN_INFO)) - return NULL; - - return info; -} - DEFINE_SPINLOCK(device_domain_lock); static LIST_HEAD(device_domain_list); @@ -741,11 +726,6 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, return &context[devfn]; } -static bool attach_deferred(struct device *dev) -{ - return dev_iommu_priv_get(dev) == DEFER_DEVICE_DOMAIN_INFO; -} - /** * is_downstream_to_pci_bridge - test if a device belongs to the PCI * sub-hierarchy of a candidate PCI-PCI bridge @@ -2432,15 +2412,6 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 __iommu_flush_dev_iotlb(info, 0, MAX_AGAW_PFN_WIDTH); } -static inline void unlink_domain_info(struct device_domain_info *info) -{ - assert_spin_locked(&device_domain_lock); - list_del(&info->link); - list_del(&info->global); - if (info->dev) - dev_iommu_priv_set(info->dev, NULL); -} - static void domain_remove_dev_info(struct dmar_domain *domain) { struct device_domain_info *info, *tmp; @@ -2452,24 +2423,6 @@ static void domain_remove_dev_info(struct dmar_domain *domain) spin_unlock_irqrestore(&device_domain_lock, flags); } -struct dmar_domain *find_domain(struct device *dev) -{ - struct device_domain_info *info; - - if (unlikely(!dev || !dev->iommu)) - return NULL; - - if (unlikely(attach_deferred(dev))) - return NULL; - - /* No lock here, assumes no domain exit in normal case */ - info = get_domain_info(dev); - if (likely(info)) - return info->domain; - - return NULL; -} - static inline struct device_domain_info * dmar_search_domain_by_dev_info(int segment, int bus, int devfn) { @@ -2530,66 +2483,20 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, struct device *dev, struct dmar_domain *domain) { - struct device_domain_info *info; + struct device_domain_info *info = dev_iommu_priv_get(dev); unsigned long flags; int ret; - info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) - return NULL; - - if (!dev_is_real_dma_subdevice(dev)) { - info->bus = bus; - info->devfn = devfn; - info->segment = iommu->segment; - } else { - struct pci_dev *pdev = to_pci_dev(dev); - - info->bus = pdev->bus->number; - info->devfn = pdev->devfn; - info->segment = pci_domain_nr(pdev->bus); - } - - info->dev = dev; - info->domain = domain; - info->iommu = iommu; - - if (dev && dev_is_pci(dev)) { - struct pci_dev *pdev = to_pci_dev(info->dev); - - if (ecap_dev_iotlb_support(iommu->ecap) && - pci_ats_supported(pdev) && - dmar_find_matched_atsr_unit(pdev)) - info->ats_supported = 1; - - if (sm_supported(iommu)) { - if (pasid_supported(iommu)) { - int features = pci_pasid_features(pdev); - if (features >= 0) - info->pasid_supported = features | 1; - } - - if (info->ats_supported && ecap_prs(iommu->ecap) && - pci_pri_supported(pdev)) - info->pri_supported = 1; - } - } - spin_lock_irqsave(&device_domain_lock, flags); + info->domain = domain; spin_lock(&iommu->lock); ret = domain_attach_iommu(domain, iommu); spin_unlock(&iommu->lock); - if (ret) { spin_unlock_irqrestore(&device_domain_lock, flags); - kfree(info); return NULL; } - list_add(&info->link, &domain->devices); - list_add(&info->global, &device_domain_list); - if (dev) - dev_iommu_priv_set(dev, info); spin_unlock_irqrestore(&device_domain_lock, flags); /* PASID table is mandatory for a PCI device in scalable mode. */ @@ -4336,13 +4243,11 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) intel_pasid_free_table(info->dev); } - unlink_domain_info(info); + list_del(&info->link); spin_lock_irqsave(&iommu->lock, flags); domain_detach_iommu(domain, iommu); spin_unlock_irqrestore(&iommu->lock, flags); - - kfree(info); } static void dmar_remove_one_dev_info(struct device *dev) @@ -4351,7 +4256,7 @@ static void dmar_remove_one_dev_info(struct device *dev) unsigned long flags; spin_lock_irqsave(&device_domain_lock, flags); - info = get_domain_info(dev); + info = dev_iommu_priv_get(dev); if (info) __dmar_remove_one_dev_info(info); spin_unlock_irqrestore(&device_domain_lock, flags); @@ -4475,10 +4380,9 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, /* normally dev is not mapped */ if (unlikely(domain_context_mapped(dev))) { - struct dmar_domain *old_domain; + struct device_domain_info *info = dev_iommu_priv_get(dev); - old_domain = find_domain(dev); - if (old_domain) + if (info->domain) dmar_remove_one_dev_info(dev); } @@ -4642,28 +4546,73 @@ static bool intel_iommu_capable(enum iommu_cap cap) static struct iommu_device *intel_iommu_probe_device(struct device *dev) { + struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL; + struct device_domain_info *info; struct intel_iommu *iommu; + unsigned long flags; + u8 bus, devfn; - iommu = device_to_iommu(dev, NULL, NULL); + iommu = device_to_iommu(dev, &bus, &devfn); if (!iommu) return ERR_PTR(-ENODEV); - if (translation_pre_enabled(iommu)) - dev_iommu_priv_set(dev, DEFER_DEVICE_DOMAIN_INFO); + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return ERR_PTR(-ENOMEM); + + if (dev_is_real_dma_subdevice(dev)) { + info->bus = pdev->bus->number; + info->devfn = pdev->devfn; + info->segment = pci_domain_nr(pdev->bus); + } else { + info->bus = bus; + info->devfn = devfn; + info->segment = iommu->segment; + } + + info->dev = dev; + info->iommu = iommu; + if (dev_is_pci(dev)) { + if (ecap_dev_iotlb_support(iommu->ecap) && + pci_ats_supported(pdev) && + dmar_find_matched_atsr_unit(pdev)) + info->ats_supported = 1; + + if (sm_supported(iommu)) { + if (pasid_supported(iommu)) { + int features = pci_pasid_features(pdev); + + if (features >= 0) + info->pasid_supported = features | 1; + } + + if (info->ats_supported && ecap_prs(iommu->ecap) && + pci_pri_supported(pdev)) + info->pri_supported = 1; + } + } + + spin_lock_irqsave(&device_domain_lock, flags); + list_add(&info->global, &device_domain_list); + dev_iommu_priv_set(dev, info); + spin_unlock_irqrestore(&device_domain_lock, flags); return &iommu->iommu; } static void intel_iommu_release_device(struct device *dev) { - struct intel_iommu *iommu; - - iommu = device_to_iommu(dev, NULL, NULL); - if (!iommu) - return; + struct device_domain_info *info = dev_iommu_priv_get(dev); + unsigned long flags; dmar_remove_one_dev_info(dev); + spin_lock_irqsave(&device_domain_lock, flags); + dev_iommu_priv_set(dev, NULL); + list_del(&info->global); + spin_unlock_irqrestore(&device_domain_lock, flags); + + kfree(info); set_dma_ops(dev, NULL); } @@ -4732,14 +4681,14 @@ static void intel_iommu_get_resv_regions(struct device *device, int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) { - struct device_domain_info *info; + struct device_domain_info *info = dev_iommu_priv_get(dev); struct context_entry *context; struct dmar_domain *domain; unsigned long flags; u64 ctx_lo; int ret; - domain = find_domain(dev); + domain = info->domain; if (!domain) return -EINVAL; @@ -4747,8 +4696,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) spin_lock(&iommu->lock); ret = -EINVAL; - info = get_domain_info(dev); - if (!info || !info->pasid_supported) + if (!info->pasid_supported) goto out; context = iommu_context_addr(iommu, info->bus, info->devfn, 0); @@ -4790,7 +4738,7 @@ static struct iommu_group *intel_iommu_device_group(struct device *dev) static int intel_iommu_enable_sva(struct device *dev) { - struct device_domain_info *info = get_domain_info(dev); + struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu; int ret; @@ -4819,7 +4767,7 @@ static int intel_iommu_enable_sva(struct device *dev) static int intel_iommu_disable_sva(struct device *dev) { - struct device_domain_info *info = get_domain_info(dev); + struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; int ret; @@ -4832,7 +4780,7 @@ static int intel_iommu_disable_sva(struct device *dev) static int intel_iommu_enable_iopf(struct device *dev) { - struct device_domain_info *info = get_domain_info(dev); + struct device_domain_info *info = dev_iommu_priv_get(dev); if (info && info->pri_supported) return 0; @@ -4872,7 +4820,9 @@ intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) static bool intel_iommu_is_attach_deferred(struct device *dev) { - return attach_deferred(dev); + struct device_domain_info *info = dev_iommu_priv_get(dev); + + return translation_pre_enabled(info->iommu) && !info->domain; } /* diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 10fb82ea467d..f8d215d85695 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -150,7 +150,7 @@ int intel_pasid_alloc_table(struct device *dev) int size; might_sleep(); - info = get_domain_info(dev); + info = dev_iommu_priv_get(dev); if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table)) return -EINVAL; @@ -197,7 +197,7 @@ void intel_pasid_free_table(struct device *dev) struct pasid_entry *table; int i, max_pde; - info = get_domain_info(dev); + info = dev_iommu_priv_get(dev); if (!info || !dev_is_pci(dev) || !info->pasid_table) return; @@ -223,7 +223,7 @@ struct pasid_table *intel_pasid_get_table(struct device *dev) { struct device_domain_info *info; - info = get_domain_info(dev); + info = dev_iommu_priv_get(dev); if (!info) return NULL; @@ -234,7 +234,7 @@ static int intel_pasid_get_dev_max_id(struct device *dev) { struct device_domain_info *info; - info = get_domain_info(dev); + info = dev_iommu_priv_get(dev); if (!info || !info->pasid_table) return 0; @@ -254,7 +254,7 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) return NULL; dir = pasid_table->table; - info = get_domain_info(dev); + info = dev_iommu_priv_get(dev); dir_index = pasid >> PASID_PDE_SHIFT; index = pasid & PASID_PTE_MASK; @@ -487,7 +487,7 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu, struct device_domain_info *info; u16 sid, qdep, pfsid; - info = get_domain_info(dev); + info = dev_iommu_priv_get(dev); if (!info || !info->ats_enabled) return; diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index d04c83dd3a58..944e2408b6d2 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -200,7 +200,7 @@ static void __flush_svm_range_dev(struct intel_svm *svm, unsigned long address, unsigned long pages, int ih) { - struct device_domain_info *info = get_domain_info(sdev->dev); + struct device_domain_info *info = dev_iommu_priv_get(sdev->dev); if (WARN_ON(!pages)) return; @@ -337,7 +337,7 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, struct mm_struct *mm, unsigned int flags) { - struct device_domain_info *info = get_domain_info(dev); + struct device_domain_info *info = dev_iommu_priv_get(dev); unsigned long iflags, sflags; struct intel_svm_dev *sdev; struct intel_svm *svm; @@ -545,7 +545,7 @@ static void intel_svm_drain_prq(struct device *dev, u32 pasid) u16 sid, did; int qdep; - info = get_domain_info(dev); + info = dev_iommu_priv_get(dev); if (WARN_ON(!info || !dev_is_pci(dev))) return; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 8c7591b5f3e2..03f1134fc2fe 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -733,8 +733,6 @@ int for_each_device_domain(int (*fn)(struct device_domain_info *info, void *data), void *data); void iommu_flush_write_buffer(struct intel_iommu *iommu); int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev); -struct dmar_domain *find_domain(struct device *dev); -struct device_domain_info *get_domain_info(struct device *dev); struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); #ifdef CONFIG_INTEL_IOMMU_SVM From 763e656c693771d2190bc6c6010e71f403e47301 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 1 Mar 2022 10:01:53 +0800 Subject: [PATCH 211/380] iommu/vt-d: Remove unnecessary includes Remove unnecessary include files and sort the remaining alphabetically. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220214025704.3184654-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20220301020159.633356-7-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 34 +++++++--------------------------- 1 file changed, 7 insertions(+), 27 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index d9965e72d9a8..b6a6fea8525d 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -13,38 +13,18 @@ #define pr_fmt(fmt) "DMAR: " fmt #define dev_fmt(fmt) pr_fmt(fmt) -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include #include +#include #include #include +#include +#include +#include +#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include "../irq_remapping.h" #include "../iommu-sva-lib.h" From 782861df7dcd67b7fed47f3b6b9bf40851760c6f Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 1 Mar 2022 10:01:54 +0800 Subject: [PATCH 212/380] iommu/vt-d: Remove unnecessary prototypes Some prototypes in iommu.c are unnecessary. Delete them. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220214025704.3184654-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20220301020159.633356-8-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index b6a6fea8525d..dfd3698406fa 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -296,14 +296,9 @@ static LIST_HEAD(dmar_satc_units); /* bitmap for indexing intel_iommus */ static int g_num_of_iommus; -static void domain_exit(struct dmar_domain *domain); static void domain_remove_dev_info(struct dmar_domain *domain); static void dmar_remove_one_dev_info(struct device *dev); static void __dmar_remove_one_dev_info(struct device_domain_info *info); -static int intel_iommu_attach_device(struct iommu_domain *domain, - struct device *dev); -static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, - dma_addr_t iova); int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON); int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON); From 2187a57ef0c572bcca3ab2497cfcfce664ddbf70 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 1 Mar 2022 10:01:55 +0800 Subject: [PATCH 213/380] iommu/vt-d: Fix indentation of goto labels Remove blanks before goto labels. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220214025704.3184654-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20220301020159.633356-9-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index dfd3698406fa..d7fac0a1761d 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -827,7 +827,7 @@ struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) } if (pdev && drhd->include_all) { - got_pdev: +got_pdev: if (bus && devfn) { *bus = pdev->bus->number; *devfn = pdev->devfn; @@ -836,7 +836,7 @@ struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) } } iommu = NULL; - out: +out: if (iommu_is_dummy(iommu, dev)) iommu = NULL; From 2852631d96a643e0b21a9b14ad38caf465d7225f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 1 Mar 2022 10:01:56 +0800 Subject: [PATCH 214/380] iommu/vt-d: Move intel_iommu_ops to header file Compiler is not happy about hidden declaration of intel_iommu_ops. .../iommu.c:414:24: warning: symbol 'intel_iommu_ops' was not declared. Should it be static? Move declaration to header file to make compiler happy. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220207141240.8253-1-andriy.shevchenko@linux.intel.com Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20220301020159.633356-10-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 2 -- include/linux/intel-iommu.h | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 6d10be50ec30..4de960834a1b 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -66,8 +66,6 @@ static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)]; static int alloc_iommu(struct dmar_drhd_unit *drhd); static void free_iommu(struct intel_iommu *iommu); -extern const struct iommu_ops intel_iommu_ops; - static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd) { /* diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 03f1134fc2fe..4909d6c9ac21 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -783,6 +783,8 @@ bool context_present(struct context_entry *context); struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, u8 devfn, int alloc); +extern const struct iommu_ops intel_iommu_ops; + #ifdef CONFIG_INTEL_IOMMU extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); From 45967ffb9e50aa3472cc6c69a769ef0f09cced5d Mon Sep 17 00:00:00 2001 From: Marco Bonelli Date: Tue, 1 Mar 2022 10:01:57 +0800 Subject: [PATCH 215/380] iommu/vt-d: Add missing "__init" for rmrr_sanity_check() rmrr_sanity_check() calls arch_rmrr_sanity_check(), which is marked as "__init". Add the annotation for rmrr_sanity_check() too. This function is currently only called from dmar_parse_one_rmrr() which is also "__init". Signed-off-by: Marco Bonelli Link: https://lore.kernel.org/r/20220210023141.9208-1-marco@mebeim.net Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20220301020159.633356-11-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index d7fac0a1761d..2aa7afd90c3e 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3364,7 +3364,7 @@ static void __init init_iommu_pm_ops(void) static inline void init_iommu_pm_ops(void) {} #endif /* CONFIG_PM */ -static int rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr) +static int __init rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr) { if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) || !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) || From b897a1b7ad3f239fa19ee40aec429e96f5162669 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 1 Mar 2022 10:01:58 +0800 Subject: [PATCH 216/380] iommu/vt-d: Remove unused function intel_svm_capable() This is unused since commit 4048377414162 ("iommu/vt-d: Use iommu_sva_alloc(free)_pasid() helpers"). Signed-off-by: YueHaibing Link: https://lore.kernel.org/r/20220216113851.25004-1-yuehaibing@huawei.com Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20220301020159.633356-12-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/svm.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 944e2408b6d2..241d095b6dcf 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -168,11 +168,6 @@ int intel_svm_finish_prq(struct intel_iommu *iommu) return 0; } -static inline bool intel_svm_capable(struct intel_iommu *iommu) -{ - return iommu->flags & VTD_FLAG_SVM_CAPABLE; -} - void intel_svm_check(struct intel_iommu *iommu) { if (!pasid_supported(iommu)) From 97f2f2c5317f55ae3440733a090a96a251da222b Mon Sep 17 00:00:00 2001 From: Yian Chen Date: Tue, 1 Mar 2022 10:01:59 +0800 Subject: [PATCH 217/380] iommu/vt-d: Enable ATS for the devices in SATC table Starting from Intel VT-d v3.2, Intel platform BIOS can provide additional SATC table structure. SATC table includes a list of SoC integrated devices that support ATC (Address translation cache). Enabling ATC (via ATS capability) can be a functional requirement for SATC device operation or optional to enhance device performance/functionality. This is determined by the bit of ATC_REQUIRED in SATC table. When IOMMU is working in scalable mode, software chooses to always enable ATS for every device in SATC table because Intel SoC devices in SATC table are trusted to use ATS. On the other hand, if IOMMU is in legacy mode, ATS of SATC capable devices can work transparently to software and be automatically enabled by IOMMU hardware. As the result, there is no need for software to enable ATS on these devices. This also removes dmar_find_matched_atsr_unit() helper as it becomes dead code now. Signed-off-by: Yian Chen Link: https://lore.kernel.org/r/20220222185416.1722611-1-yian.chen@intel.com Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20220301020159.633356-13-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 40 +++++++++++++++++++++++++++++++++++-- include/linux/intel-iommu.h | 1 - 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 2aa7afd90c3e..8662f5c10721 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3693,7 +3693,31 @@ static void intel_iommu_free_dmars(void) } } -int dmar_find_matched_atsr_unit(struct pci_dev *dev) +static struct dmar_satc_unit *dmar_find_matched_satc_unit(struct pci_dev *dev) +{ + struct dmar_satc_unit *satcu; + struct acpi_dmar_satc *satc; + struct device *tmp; + int i; + + dev = pci_physfn(dev); + rcu_read_lock(); + + list_for_each_entry_rcu(satcu, &dmar_satc_units, list) { + satc = container_of(satcu->hdr, struct acpi_dmar_satc, header); + if (satc->segment != pci_domain_nr(dev->bus)) + continue; + for_each_dev_scope(satcu->devices, satcu->devices_cnt, i, tmp) + if (to_pci_dev(tmp) == dev) + goto out; + } + satcu = NULL; +out: + rcu_read_unlock(); + return satcu; +} + +static int dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu) { int i, ret = 1; struct pci_bus *bus; @@ -3701,8 +3725,20 @@ int dmar_find_matched_atsr_unit(struct pci_dev *dev) struct device *tmp; struct acpi_dmar_atsr *atsr; struct dmar_atsr_unit *atsru; + struct dmar_satc_unit *satcu; dev = pci_physfn(dev); + satcu = dmar_find_matched_satc_unit(dev); + if (satcu) + /* + * This device supports ATS as it is in SATC table. + * When IOMMU is in legacy mode, enabling ATS is done + * automatically by HW for the device that requires + * ATS, hence OS should not enable this device ATS + * to avoid duplicated TLB invalidation. + */ + return !(satcu->atc_required && !sm_supported(iommu)); + for (bus = dev->bus; bus; bus = bus->parent) { bridge = bus->self; /* If it's an integrated device, allow ATS */ @@ -4550,7 +4586,7 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) if (dev_is_pci(dev)) { if (ecap_dev_iotlb_support(iommu->ecap) && pci_ats_supported(pdev) && - dmar_find_matched_atsr_unit(pdev)) + dmar_ats_supported(pdev, iommu)) info->ats_supported = 1; if (sm_supported(iommu)) { diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 4909d6c9ac21..2f9891cb3d00 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -693,7 +693,6 @@ static inline int nr_pte_to_next_page(struct dma_pte *pte) } extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); -extern int dmar_find_matched_atsr_unit(struct pci_dev *dev); extern int dmar_enable_qi(struct intel_iommu *iommu); extern void dmar_disable_qi(struct intel_iommu *iommu); From b8f9a9aa02bfbf3557bebed066e0523aad482878 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 20 Dec 2021 16:46:02 +0800 Subject: [PATCH 218/380] net: xtensa: use strscpy to copy strings The strlcpy should not be used because it doesn't limit the source length. So that it will lead some potential bugs. But the strscpy doesn't require reading memory from the src string beyond the specified "count" bytes, and since the return value is easier to error-check than strlcpy()'s. In addition, the implementation is robust to the string changing out from underneath it, unlike the current strlcpy() implementation. Thus, replace strlcpy with strscpy. Signed-off-by: Jason Wang Message-Id: <20211220084602.952091-1-wangborong@cdjrlc.com> Signed-off-by: Max Filippov --- arch/xtensa/platforms/iss/network.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c index 962e5e145209..e62e31c88956 100644 --- a/arch/xtensa/platforms/iss/network.c +++ b/arch/xtensa/platforms/iss/network.c @@ -174,7 +174,7 @@ static int tuntap_open(struct iss_net_private *lp) memset(&ifr, 0, sizeof(ifr)); ifr.ifr_flags = IFF_TAP | IFF_NO_PI; - strlcpy(ifr.ifr_name, dev_name, sizeof(ifr.ifr_name)); + strscpy(ifr.ifr_name, dev_name, sizeof(ifr.ifr_name)); err = simc_ioctl(fd, TUNSETIFF, &ifr); if (err < 0) { @@ -249,7 +249,7 @@ static int tuntap_probe(struct iss_net_private *lp, int index, char *init) return 0; } - strlcpy(lp->tp.info.tuntap.dev_name, dev_name, + strscpy(lp->tp.info.tuntap.dev_name, dev_name, sizeof(lp->tp.info.tuntap.dev_name)); setup_etheraddr(dev, mac_str); From 9ddef266bcff8fdd138d61c4ada25cfdf062e3c1 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 20 Dec 2021 16:47:56 +0800 Subject: [PATCH 219/380] xtensa: use strscpy to copy strings The strlcpy should not be used because it doesn't limit the source length. So that it will lead some potential bugs. But the strscpy doesn't require reading memory from the src string beyond the specified "count" bytes, and since the return value is easier to error-check than strlcpy()'s. In addition, the implementation is robust to the string changing out from underneath it, unlike the current strlcpy() implementation. Thus, replace strlcpy with strscpy. Signed-off-by: Jason Wang Message-Id: <20211220084756.955307-1-wangborong@cdjrlc.com> Signed-off-by: Max Filippov --- arch/xtensa/kernel/setup.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 8db20cfb44ab..3f1842891482 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -140,7 +140,7 @@ __tagtable(BP_TAG_FDT, parse_tag_fdt); static int __init parse_tag_cmdline(const bp_tag_t* tag) { - strlcpy(command_line, (char *)(tag->data), COMMAND_LINE_SIZE); + strscpy(command_line, (char *)(tag->data), COMMAND_LINE_SIZE); return 0; } @@ -230,7 +230,7 @@ void __init early_init_devtree(void *params) of_scan_flat_dt(xtensa_dt_io_area, NULL); if (!command_line[0]) - strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); + strscpy(command_line, boot_command_line, COMMAND_LINE_SIZE); } #endif /* CONFIG_USE_OF */ @@ -260,7 +260,7 @@ void __init init_arch(bp_tag_t *bp_start) #ifdef CONFIG_CMDLINE_BOOL if (!command_line[0]) - strlcpy(command_line, default_command_line, COMMAND_LINE_SIZE); + strscpy(command_line, default_command_line, COMMAND_LINE_SIZE); #endif /* Early hook for platforms */ @@ -289,7 +289,7 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = command_line; platform_setup(cmdline_p); - strlcpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE); + strscpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE); /* Reserve some memory regions */ From 6496f3a717099526f09bc3c8269d00f9a97c7740 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 21 Jan 2022 15:02:58 -0600 Subject: [PATCH 220/380] xtensa: Remove unused early_read_config_byte() et al declarations early_read_config_byte() and similar are declared but never defined. Remove the unused declarations. Signed-off-by: Bjorn Helgaas Message-Id: <20220121210258.1152803-1-helgaas@kernel.org> Signed-off-by: Max Filippov --- arch/xtensa/include/asm/pci-bridge.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/arch/xtensa/include/asm/pci-bridge.h b/arch/xtensa/include/asm/pci-bridge.h index 405526912d9a..e320aa5bbedb 100644 --- a/arch/xtensa/include/asm/pci-bridge.h +++ b/arch/xtensa/include/asm/pci-bridge.h @@ -73,13 +73,4 @@ static inline void pcibios_init_resource(struct resource *res, res->child = NULL; } - -/* These are used for config access before all the PCI probing has been done. */ -int early_read_config_byte(struct pci_controller*, int, int, int, u8*); -int early_read_config_word(struct pci_controller*, int, int, int, u16*); -int early_read_config_dword(struct pci_controller*, int, int, int, u32*); -int early_write_config_byte(struct pci_controller*, int, int, int, u8); -int early_write_config_word(struct pci_controller*, int, int, int, u16); -int early_write_config_dword(struct pci_controller*, int, int, int, u32); - #endif /* _XTENSA_PCI_BRIDGE_H */ From d17b66417308996e7e64b270a3c7f3c1fbd4cfc8 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 23 Feb 2022 01:30:23 +0000 Subject: [PATCH 221/380] MIPS: fix fortify panic when copying asm exception handlers With KCFLAGS="-O3", I was able to trigger a fortify-source memcpy() overflow panic on set_vi_srs_handler(). Although O3 level is not supported in the mainline, under some conditions that may've happened with any optimization settings, it's just a matter of inlining luck. The panic itself is correct, more precisely, 50/50 false-positive and not at the same time. From the one side, no real overflow happens. Exception handler defined in asm just gets copied to some reserved places in the memory. But the reason behind is that C code refers to that exception handler declares it as `char`, i.e. something of 1 byte length. It's obvious that the asm function itself is way more than 1 byte, so fortify logics thought we are going to past the symbol declared. The standard way to refer to asm symbols from C code which is not supposed to be called from C is to declare them as `extern const u8[]`. This is fully correct from any point of view, as any code itself is just a bunch of bytes (including 0 as it is for syms like _stext/_etext/etc.), and the exact size is not known at the moment of compilation. Adjust the type of the except_vec_vi_*() and related variables. Make set_handler() take `const` as a second argument to avoid cast-away warnings and give a little more room for optimization. Signed-off-by: Alexander Lobakin Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/setup.h | 2 +- arch/mips/kernel/traps.c | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/mips/include/asm/setup.h b/arch/mips/include/asm/setup.h index bb36a400203d..8c56b862fd9c 100644 --- a/arch/mips/include/asm/setup.h +++ b/arch/mips/include/asm/setup.h @@ -16,7 +16,7 @@ static inline void setup_8250_early_printk_port(unsigned long base, unsigned int reg_shift, unsigned int timeout) {} #endif -extern void set_handler(unsigned long offset, void *addr, unsigned long len); +void set_handler(unsigned long offset, const void *addr, unsigned long len); extern void set_uncached_handler(unsigned long offset, void *addr, unsigned long len); typedef void (*vi_handler_t)(void); diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index a486486b2355..246c6a6b0261 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -2091,19 +2091,19 @@ static void *set_vi_srs_handler(int n, vi_handler_t addr, int srs) * If no shadow set is selected then use the default handler * that does normal register saving and standard interrupt exit */ - extern char except_vec_vi, except_vec_vi_lui; - extern char except_vec_vi_ori, except_vec_vi_end; - extern char rollback_except_vec_vi; - char *vec_start = using_rollback_handler() ? - &rollback_except_vec_vi : &except_vec_vi; + extern const u8 except_vec_vi[], except_vec_vi_lui[]; + extern const u8 except_vec_vi_ori[], except_vec_vi_end[]; + extern const u8 rollback_except_vec_vi[]; + const u8 *vec_start = using_rollback_handler() ? + rollback_except_vec_vi : except_vec_vi; #if defined(CONFIG_CPU_MICROMIPS) || defined(CONFIG_CPU_BIG_ENDIAN) - const int lui_offset = &except_vec_vi_lui - vec_start + 2; - const int ori_offset = &except_vec_vi_ori - vec_start + 2; + const int lui_offset = except_vec_vi_lui - vec_start + 2; + const int ori_offset = except_vec_vi_ori - vec_start + 2; #else - const int lui_offset = &except_vec_vi_lui - vec_start; - const int ori_offset = &except_vec_vi_ori - vec_start; + const int lui_offset = except_vec_vi_lui - vec_start; + const int ori_offset = except_vec_vi_ori - vec_start; #endif - const int handler_len = &except_vec_vi_end - vec_start; + const int handler_len = except_vec_vi_end - vec_start; if (handler_len > VECTORSPACING) { /* @@ -2311,7 +2311,7 @@ void per_cpu_trap_init(bool is_boot_cpu) } /* Install CPU exception handler */ -void set_handler(unsigned long offset, void *addr, unsigned long size) +void set_handler(unsigned long offset, const void *addr, unsigned long size) { #ifdef CONFIG_CPU_MICROMIPS memcpy((void *)(ebase + offset), ((unsigned char *)addr - 1), size); From 97bf0395c226907e1a9b908511a35192bf1e09bb Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 4 Mar 2022 08:24:27 -0800 Subject: [PATCH 222/380] mips: DEC: honor CONFIG_MIPS_FP_SUPPORT=n Include the DECstation interrupt handler in opting out of FPU support. Fixes a linker error: mips-linux-ld: arch/mips/dec/int-handler.o: in function `fpu': (.text+0x148): undefined reference to `handle_fpe_int' Fixes: 183b40f992c8 ("MIPS: Allow FP support to be disabled") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Paul Burton Cc: Thomas Bogendoerfer Cc: Maciej W. Rozycki Cc: linux-mips@vger.kernel.org Acked-by: Maciej W. Rozycki Signed-off-by: Thomas Bogendoerfer --- arch/mips/dec/int-handler.S | 6 +++--- arch/mips/dec/setup.c | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S index ea5b5a83f1e1..011d1d678840 100644 --- a/arch/mips/dec/int-handler.S +++ b/arch/mips/dec/int-handler.S @@ -131,7 +131,7 @@ */ mfc0 t0,CP0_CAUSE # get pending interrupts mfc0 t1,CP0_STATUS -#ifdef CONFIG_32BIT +#if defined(CONFIG_32BIT) && defined(CONFIG_MIPS_FP_SUPPORT) lw t2,cpu_fpu_mask #endif andi t0,ST0_IM # CAUSE.CE may be non-zero! @@ -139,7 +139,7 @@ beqz t0,spurious -#ifdef CONFIG_32BIT +#if defined(CONFIG_32BIT) && defined(CONFIG_MIPS_FP_SUPPORT) and t2,t0 bnez t2,fpu # handle FPU immediately #endif @@ -280,7 +280,7 @@ handle_it: j dec_irq_dispatch nop -#ifdef CONFIG_32BIT +#if defined(CONFIG_32BIT) && defined(CONFIG_MIPS_FP_SUPPORT) fpu: lw t0,fpu_kstat_irq nop diff --git a/arch/mips/dec/setup.c b/arch/mips/dec/setup.c index a8a30bb1dee8..82b00e45ce50 100644 --- a/arch/mips/dec/setup.c +++ b/arch/mips/dec/setup.c @@ -746,7 +746,8 @@ void __init arch_init_irq(void) dec_interrupt[DEC_IRQ_HALT] = -1; /* Register board interrupts: FPU and cascade. */ - if (dec_interrupt[DEC_IRQ_FPU] >= 0 && cpu_has_fpu) { + if (IS_ENABLED(CONFIG_MIPS_FP_SUPPORT) && + dec_interrupt[DEC_IRQ_FPU] >= 0 && cpu_has_fpu) { struct irq_desc *desc_fpu; int irq_fpu; From 244eae91a94c6dab82b3232967d10eeb9dfa21c6 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 4 Mar 2022 20:16:23 +0000 Subject: [PATCH 223/380] DEC: Limit PMAX memory probing to R3k systems Recent tightening of the opcode table in binutils so as to consistently disallow the assembly or disassembly of CP0 instructions not supported by the processor architecture chosen has caused a regression like below: arch/mips/dec/prom/locore.S: Assembler messages: arch/mips/dec/prom/locore.S:29: Error: opcode not supported on this processor: r4600 (mips3) `rfe' in a piece of code used to probe for memory with PMAX DECstation models, which have non-REX firmware. Those computers always have an R2000 CPU and consequently the exception handler used in memory probing uses the RFE instruction, which those processors use. While adding 64-bit support this code was correctly excluded for 64-bit configurations, however it should have also been excluded for irrelevant 32-bit configurations. Do this now then, and only enable PMAX memory probing for R3k systems. Reported-by: Jan-Benedict Glaw Reported-by: Sudip Mukherjee Signed-off-by: Maciej W. Rozycki Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org # v2.6.12+ Signed-off-by: Thomas Bogendoerfer --- arch/mips/dec/prom/Makefile | 2 +- arch/mips/include/asm/dec/prom.h | 15 +++++---------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/arch/mips/dec/prom/Makefile b/arch/mips/dec/prom/Makefile index d95016016b42..2bad87551203 100644 --- a/arch/mips/dec/prom/Makefile +++ b/arch/mips/dec/prom/Makefile @@ -6,4 +6,4 @@ lib-y += init.o memory.o cmdline.o identify.o console.o -lib-$(CONFIG_32BIT) += locore.o +lib-$(CONFIG_CPU_R3000) += locore.o diff --git a/arch/mips/include/asm/dec/prom.h b/arch/mips/include/asm/dec/prom.h index 62c7dfb90e06..1e1247add1cf 100644 --- a/arch/mips/include/asm/dec/prom.h +++ b/arch/mips/include/asm/dec/prom.h @@ -43,16 +43,11 @@ */ #define REX_PROM_MAGIC 0x30464354 -#ifdef CONFIG_64BIT - -#define prom_is_rex(magic) 1 /* KN04 and KN05 are REX PROMs. */ - -#else /* !CONFIG_64BIT */ - -#define prom_is_rex(magic) ((magic) == REX_PROM_MAGIC) - -#endif /* !CONFIG_64BIT */ - +/* KN04 and KN05 are REX PROMs, so only do the check for R3k systems. */ +static inline bool prom_is_rex(u32 magic) +{ + return !IS_ENABLED(CONFIG_CPU_R3000) || magic == REX_PROM_MAGIC; +} /* * 3MIN/MAXINE PROM entry points for DS5000/1xx's, DS5000/xx's and From 6ddcba9d480b6bcced4223a729794dfa6becb7eb Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 4 Mar 2022 21:13:11 +0000 Subject: [PATCH 224/380] MIPS: Sanitise Cavium switch cases in TLB handler synthesizers It makes no sense to fall through to `break'. Therefore reorder the switch statements so as to have the Cavium cases first, followed by the default case, which improves readability and pacifies code analysis tools. No change in semantics, assembly produced is exactly the same. Reported-by: kernel test robot Signed-off-by: Maciej W. Rozycki Fixes: bc431d2153cc ("MIPS: Fix fall-through warnings for Clang") Signed-off-by: Thomas Bogendoerfer --- arch/mips/mm/tlbex.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index d9df2c43b15c..8dbbd99fc7e8 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -2159,16 +2159,14 @@ static void build_r4000_tlb_load_handler(void) uasm_i_tlbr(&p); switch (current_cpu_type()) { - default: - if (cpu_has_mips_r2_exec_hazard) { - uasm_i_ehb(&p); - fallthrough; - case CPU_CAVIUM_OCTEON: case CPU_CAVIUM_OCTEON_PLUS: case CPU_CAVIUM_OCTEON2: - break; - } + break; + default: + if (cpu_has_mips_r2_exec_hazard) + uasm_i_ehb(&p); + break; } /* Examine entrylo 0 or 1 based on ptr. */ @@ -2235,15 +2233,14 @@ static void build_r4000_tlb_load_handler(void) uasm_i_tlbr(&p); switch (current_cpu_type()) { - default: - if (cpu_has_mips_r2_exec_hazard) { - uasm_i_ehb(&p); - case CPU_CAVIUM_OCTEON: case CPU_CAVIUM_OCTEON_PLUS: case CPU_CAVIUM_OCTEON2: - break; - } + break; + default: + if (cpu_has_mips_r2_exec_hazard) + uasm_i_ehb(&p); + break; } /* Examine entrylo 0 or 1 based on ptr. */ From 34275ac292ae141ebc1296b72f005f71b25998a7 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sun, 6 Mar 2022 15:16:48 +0000 Subject: [PATCH 225/380] mips: Always permit to build u-boot images The platforms where the kernel should be loaded above 0x8000.0000 do not support loading u-boot images, that doesn't mean that we shouldn't be able to generate them. Additionally, since commit 79876cc1d7b8 ("MIPS: new Kconfig option ZBOOT_LOAD_ADDRESS"), the $(zload-y) variable was no longer hardcoded, which made it impossible to use the uzImage.bin target. Fixes: 79876cc1d7b8 ("MIPS: new Kconfig option ZBOOT_LOAD_ADDRESS") Cc: Signed-off-by: Paul Cercueil Signed-off-by: Thomas Bogendoerfer --- arch/mips/Makefile | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 47ca5488434a..bb236de13133 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -339,14 +339,12 @@ drivers-$(CONFIG_PM) += arch/mips/power/ boot-y := vmlinux.bin boot-y += vmlinux.ecoff boot-y += vmlinux.srec -ifeq ($(shell expr $(load-y) \< 0xffffffff80000000 2> /dev/null), 0) boot-y += uImage boot-y += uImage.bin boot-y += uImage.bz2 boot-y += uImage.gz boot-y += uImage.lzma boot-y += uImage.lzo -endif boot-y += vmlinux.itb boot-y += vmlinux.gz.itb boot-y += vmlinux.bz2.itb @@ -358,9 +356,7 @@ bootz-y := vmlinuz bootz-y += vmlinuz.bin bootz-y += vmlinuz.ecoff bootz-y += vmlinuz.srec -ifeq ($(shell expr $(zload-y) \< 0xffffffff80000000 2> /dev/null), 0) bootz-y += uzImage.bin -endif bootz-y += vmlinuz.itb # From 3f059a7e8c13c62addc1808d13a41d1f6dc24bd1 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Sun, 29 Aug 2021 20:49:09 +0800 Subject: [PATCH 226/380] mips: remove reference to "newer Loongson-3" Newest Loongson-3 processors have moved to use LoongArch architecture. Sadly, the LL/SC issue is still existing on both latest Loongson-3 processors using MIPS64 (Loongson-3A4000) and LoongArch (Loongson-3A5000). As it's very unlikely there will be new Loongson-3 processors using MIPS64, let's stop people from false hoping. Signed-off-by: Xi Ruoyao Cc: Huacai Chen Signed-off-by: Thomas Bogendoerfer --- arch/mips/Kconfig | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 451ee7abd22a..3f58b45fc953 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1342,19 +1342,14 @@ config LOONGSON3_ENHANCEMENT new Loongson-3 machines only, please say 'Y' here. config CPU_LOONGSON3_WORKAROUNDS - bool "Old Loongson-3 LLSC Workarounds" + bool "Loongson-3 LLSC Workarounds" default y if SMP depends on CPU_LOONGSON64 help Loongson-3 processors have the llsc issues which require workarounds. Without workarounds the system may hang unexpectedly. - Newer Loongson-3 will fix these issues and no workarounds are needed. - The workarounds have no significant side effect on them but may - decrease the performance of the system so this option should be - disabled unless the kernel is intended to be run on old systems. - - If unsure, please say Y. + Say Y, unless you know what you are doing. config CPU_LOONGSON3_CPUCFG_EMULATION bool "Emulate the CPUCFG instruction on older Loongson cores" From 40562fe4fa3d94c7462ec909ab89b075e26c59ac Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Mon, 28 Feb 2022 17:11:28 -0800 Subject: [PATCH 227/380] powerpc/pseries/vas: Use common names in VAS capability structure nr_total/nr_used_credits provides credits usage to user space via sysfs and the same interface can be used on PowerNV in future. Changed with proper naming so that applicable on both pseries and PowerNV. Signed-off-by: Haren Myneni Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f4313e9f198ee4f8d4fa4d015d8d1873e17851e6.camel@linux.ibm.com --- arch/powerpc/platforms/pseries/vas.c | 10 +++++----- arch/powerpc/platforms/pseries/vas.h | 5 ++--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index d243ddc58827..18aae037ffe9 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -310,8 +310,8 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, cop_feat_caps = &caps->caps; - if (atomic_inc_return(&cop_feat_caps->used_lpar_creds) > - atomic_read(&cop_feat_caps->target_lpar_creds)) { + if (atomic_inc_return(&cop_feat_caps->nr_used_credits) > + atomic_read(&cop_feat_caps->nr_total_credits)) { pr_err("Credits are not available to allocate window\n"); rc = -EINVAL; goto out; @@ -385,7 +385,7 @@ out_free: free_irq_setup(txwin); h_deallocate_vas_window(txwin->vas_win.winid); out: - atomic_dec(&cop_feat_caps->used_lpar_creds); + atomic_dec(&cop_feat_caps->nr_used_credits); kfree(txwin); return ERR_PTR(rc); } @@ -445,7 +445,7 @@ static int vas_deallocate_window(struct vas_window *vwin) } list_del(&win->win_list); - atomic_dec(&caps->used_lpar_creds); + atomic_dec(&caps->nr_used_credits); mutex_unlock(&vas_pseries_mutex); put_vas_user_win_ref(&vwin->task_ref); @@ -521,7 +521,7 @@ static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, } caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds); caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds); - atomic_set(&caps->target_lpar_creds, + atomic_set(&caps->nr_total_credits, be16_to_cpu(hv_caps->target_lpar_creds)); if (feat == VAS_GZIP_DEF_FEAT) { caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds); diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h index 4ecb3fcabd10..d6ea8ab8b07a 100644 --- a/arch/powerpc/platforms/pseries/vas.h +++ b/arch/powerpc/platforms/pseries/vas.h @@ -72,9 +72,8 @@ struct vas_cop_feat_caps { }; /* Total LPAR available credits. Can be different from max LPAR */ /* credits due to DLPAR operation */ - atomic_t target_lpar_creds; - atomic_t used_lpar_creds; /* Used credits so far */ - u16 avail_lpar_creds; /* Remaining available credits */ + atomic_t nr_total_credits; /* Total credits assigned to LPAR */ + atomic_t nr_used_credits; /* Used credits so far */ }; /* From 976410cd2cb4c6ed53bd12c192fc46bbcc0fbce7 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Mon, 28 Feb 2022 17:12:04 -0800 Subject: [PATCH 228/380] powerpc/pseries/vas: Save PID in pseries_vas_window struct The kernel sets the VAS window with PID when it is opened in the hypervisor. During DLPAR operation, windows can be closed and reopened in the hypervisor when the credit is available. So saves this PID in pseries_vas_window struct when the window is opened initially and reuse it later during DLPAR operation. Signed-off-by: Haren Myneni Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a57cbe6d292fe49ad55a0b49c5679d6a24d8fe73.camel@linux.ibm.com --- arch/powerpc/platforms/pseries/vas.c | 9 +++++---- arch/powerpc/platforms/pseries/vas.h | 1 + 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index 18aae037ffe9..1035446f985b 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -107,7 +107,6 @@ static int h_deallocate_vas_window(u64 winid) static int h_modify_vas_window(struct pseries_vas_window *win) { long rc; - u32 lpid = mfspr(SPRN_PID); /* * AMR value is not supported in Linux VAS implementation. @@ -115,7 +114,7 @@ static int h_modify_vas_window(struct pseries_vas_window *win) */ do { rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW, - win->vas_win.winid, lpid, 0, + win->vas_win.winid, win->pid, 0, VAS_MOD_WIN_FLAGS, 0); rc = hcall_return_busy_check(rc); @@ -124,8 +123,8 @@ static int h_modify_vas_window(struct pseries_vas_window *win) if (rc == H_SUCCESS) return 0; - pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u lpid %u\n", - rc, win->vas_win.winid, lpid); + pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n", + rc, win->vas_win.winid, win->pid); return -EIO; } @@ -338,6 +337,8 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, } } + txwin->pid = mfspr(SPRN_PID); + /* * Allocate / Deallocate window hcalls and setup / free IRQs * have to be protected with mutex. diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h index d6ea8ab8b07a..2872532ed72a 100644 --- a/arch/powerpc/platforms/pseries/vas.h +++ b/arch/powerpc/platforms/pseries/vas.h @@ -114,6 +114,7 @@ struct pseries_vas_window { u64 domain[6]; /* Associativity domain Ids */ /* this window is allocated */ u64 util; + u32 pid; /* PID associated with this window */ /* List of windows opened which is used for LPM */ struct list_head win_list; From 1fe3a33ba0a37e7aa0df0acbe31d5dda7610c16e Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Mon, 28 Feb 2022 17:12:41 -0800 Subject: [PATCH 229/380] powerpc/vas: Add paste address mmap fault handler The user space opens VAS windows and issues NX requests by pasting CRB on the corresponding paste address mmap. When the system lost credits due to core removal, the kernel has to close the window in the hypervisor and make the window inactive by unmapping this paste address. Also the OS has to handle NX request page faults if the user space issue NX requests. This handler maps the new paste address with the same VMA when the window is active again (due to core add with DLPAR). Otherwise returns paste failure. Signed-off-by: Haren Myneni Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3956e1c1fdfde69127055ff1c0256c7d71104030.camel@linux.ibm.com --- arch/powerpc/include/asm/vas.h | 10 ++++ arch/powerpc/platforms/book3s/vas-api.c | 68 +++++++++++++++++++++++++ 2 files changed, 78 insertions(+) diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 57573d9c1e09..27251af18c65 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -29,6 +29,12 @@ #define VAS_THRESH_FIFO_GT_QTR_FULL 2 #define VAS_THRESH_FIFO_GT_EIGHTH_FULL 3 +/* + * VAS window Linux status bits + */ +#define VAS_WIN_ACTIVE 0x0 /* Used in platform independent */ + /* vas mmap() */ + /* * Get/Set bit fields */ @@ -59,6 +65,9 @@ struct vas_user_win_ref { struct pid *pid; /* PID of owner */ struct pid *tgid; /* Thread group ID of owner */ struct mm_struct *mm; /* Linux process mm_struct */ + struct mutex mmap_mutex; /* protects paste address mmap() */ + /* with DLPAR close/open windows */ + struct vm_area_struct *vma; /* Save VMA and used in DLPAR ops */ }; /* @@ -67,6 +76,7 @@ struct vas_user_win_ref { struct vas_window { u32 winid; u32 wcreds_max; /* Window credits */ + u32 status; /* Window status used in OS */ enum vas_cop_type cop; struct vas_user_win_ref task_ref; char *dbgname; diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index 4d82c92ddd52..217b4a624d09 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -316,6 +316,7 @@ static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg) return PTR_ERR(txwin); } + mutex_init(&txwin->task_ref.mmap_mutex); cp_inst->txwin = txwin; return 0; @@ -350,6 +351,70 @@ static int coproc_release(struct inode *inode, struct file *fp) return 0; } +/* + * This fault handler is invoked when the core generates page fault on + * the paste address. Happens if the kernel closes window in hypervisor + * (on pseries) due to lost credit or the paste address is not mapped. + */ +static vm_fault_t vas_mmap_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct file *fp = vma->vm_file; + struct coproc_instance *cp_inst = fp->private_data; + struct vas_window *txwin; + vm_fault_t fault; + u64 paste_addr; + + /* + * window is not opened. Shouldn't expect this error. + */ + if (!cp_inst || !cp_inst->txwin) { + pr_err("%s(): Unexpected fault on paste address with TX window closed\n", + __func__); + return VM_FAULT_SIGBUS; + } + + txwin = cp_inst->txwin; + /* + * When the LPAR lost credits due to core removal or during + * migration, invalidate the existing mapping for the current + * paste addresses and set windows in-active (zap_page_range in + * reconfig_close_windows()). + * New mapping will be done later after migration or new credits + * available. So continue to receive faults if the user space + * issue NX request. + */ + if (txwin->task_ref.vma != vmf->vma) { + pr_err("%s(): No previous mapping with paste address\n", + __func__); + return VM_FAULT_SIGBUS; + } + + mutex_lock(&txwin->task_ref.mmap_mutex); + /* + * The window may be inactive due to lost credit (Ex: core + * removal with DLPAR). If the window is active again when + * the credit is available, map the new paste address at the + * the window virtual address. + */ + if (txwin->status == VAS_WIN_ACTIVE) { + paste_addr = cp_inst->coproc->vops->paste_addr(txwin); + if (paste_addr) { + fault = vmf_insert_pfn(vma, vma->vm_start, + (paste_addr >> PAGE_SHIFT)); + mutex_unlock(&txwin->task_ref.mmap_mutex); + return fault; + } + } + mutex_unlock(&txwin->task_ref.mmap_mutex); + + return VM_FAULT_SIGBUS; +} + +static const struct vm_operations_struct vas_vm_ops = { + .fault = vas_mmap_fault, +}; + static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) { struct coproc_instance *cp_inst = fp->private_data; @@ -398,6 +463,9 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) pr_devel("%s(): paste addr %llx at %lx, rc %d\n", __func__, paste_addr, vma->vm_start, rc); + txwin->task_ref.vma = vma; + vma->vm_ops = &vas_vm_ops; + return rc; } From b5c63d90cc2de8ac6724fec84d1d72cfebcae41d Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Mon, 28 Feb 2022 17:13:15 -0800 Subject: [PATCH 230/380] powerpc/vas: Return paste instruction failure if no active window The VAS window may not be active if the system looses credits and the NX generates page fault when it receives request on unmap paste address. The kernel handles the fault by remap new paste address if the window is active again, Otherwise return the paste instruction failure if the executed instruction that caused the fault was a paste. Signed-off-by: Nicholas Piggin Signed-off-by: Haren Myneni Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/492b9aefd593061d51dda67ee4d2fc449c000dce.camel@linux.ibm.com --- arch/powerpc/include/asm/ppc-opcode.h | 2 + arch/powerpc/platforms/book3s/vas-api.c | 54 +++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 9675303b724e..82f1f0041c6f 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -262,6 +262,8 @@ #define PPC_INST_MFSPR_PVR 0x7c1f42a6 #define PPC_INST_MFSPR_PVR_MASK 0xfc1ffffe #define PPC_INST_MTMSRD 0x7c000164 +#define PPC_INST_PASTE 0x7c20070d +#define PPC_INST_PASTE_MASK 0xfc2007ff #define PPC_INST_POPCNTB 0x7c0000f4 #define PPC_INST_POPCNTB_MASK 0xfc0007fe #define PPC_INST_RFEBB 0x4c000124 diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index 217b4a624d09..82f32781c5d2 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -351,6 +351,41 @@ static int coproc_release(struct inode *inode, struct file *fp) return 0; } +/* + * If the executed instruction that caused the fault was a paste, then + * clear regs CR0[EQ], advance NIP, and return 0. Else return error code. + */ +static int do_fail_paste(void) +{ + struct pt_regs *regs = current->thread.regs; + u32 instword; + + if (WARN_ON_ONCE(!regs)) + return -EINVAL; + + if (WARN_ON_ONCE(!user_mode(regs))) + return -EINVAL; + + /* + * If we couldn't translate the instruction, the driver should + * return success without handling the fault, it will be retried + * or the instruction fetch will fault. + */ + if (get_user(instword, (u32 __user *)(regs->nip))) + return -EAGAIN; + + /* + * Not a paste instruction, driver may fail the fault. + */ + if ((instword & PPC_INST_PASTE_MASK) != PPC_INST_PASTE) + return -ENOENT; + + regs->ccr &= ~0xe0000000; /* Clear CR0[0-2] to fail paste */ + regs_add_return_ip(regs, 4); /* Emulate the paste */ + + return 0; +} + /* * This fault handler is invoked when the core generates page fault on * the paste address. Happens if the kernel closes window in hypervisor @@ -364,6 +399,7 @@ static vm_fault_t vas_mmap_fault(struct vm_fault *vmf) struct vas_window *txwin; vm_fault_t fault; u64 paste_addr; + int ret; /* * window is not opened. Shouldn't expect this error. @@ -408,6 +444,24 @@ static vm_fault_t vas_mmap_fault(struct vm_fault *vmf) } mutex_unlock(&txwin->task_ref.mmap_mutex); + /* + * Received this fault due to closing the actual window. + * It can happen during migration or lost credits. + * Since no mapping, return the paste instruction failure + * to the user space. + */ + ret = do_fail_paste(); + /* + * The user space can retry several times until success (needed + * for migration) or should fallback to SW compression or + * manage with the existing open windows if available. + * Looking at sysfs interface, it can determine whether these + * failures are coming during migration or core removal: + * nr_used_credits > nr_total_credits when lost credits + */ + if (!ret || (ret == -EAGAIN)) + return VM_FAULT_NOPAGE; + return VM_FAULT_SIGBUS; } From 6a8d4ca891aa5f9402973eab5d7d9cf3929678b7 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Mon, 28 Feb 2022 17:13:54 -0800 Subject: [PATCH 231/380] powerpc/vas: Map paste address only if window is active The paste address mapping is done with mmap() after the window is opened with ioctl. The partition has to close VAS windows in the hypervisor if it lost credits due to DLPAR core removal. But the kernel marks these windows inactive until the previously lost credits are available later. If the window is inactive due to DLPAR after this mmap(), the paste instruction returns failure until the the OS reopens this window again. Before the user space issuing mmap(), there is a possibility of happening DLPAR core removal event which causes the corresponding window inactive. So if the window is not active, return mmap() failure with -EACCES and expects the user space reissue mmap() when the window is active or open a new window when the credit is available. Signed-off-by: Haren Myneni Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/bbb203c26b324534e25658cb1dbbcb5160a2f93a.camel@linux.ibm.com --- arch/powerpc/platforms/book3s/vas-api.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index 82f32781c5d2..f9a1615b74da 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -497,10 +497,29 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) return -EACCES; } + /* + * The initial mmap is done after the window is opened + * with ioctl. But before mmap(), this window can be closed in + * the hypervisor due to lost credit (core removal on pseries). + * So if the window is not active, return mmap() failure with + * -EACCES and expects the user space reissue mmap() when it + * is active again or open new window when the credit is available. + * mmap_mutex protects the paste address mmap() with DLPAR + * close/open event and allows mmap() only when the window is + * active. + */ + mutex_lock(&txwin->task_ref.mmap_mutex); + if (txwin->status != VAS_WIN_ACTIVE) { + pr_err("%s(): Window is not active\n", __func__); + rc = -EACCES; + goto out; + } + paste_addr = cp_inst->coproc->vops->paste_addr(txwin); if (!paste_addr) { pr_err("%s(): Window paste address failed\n", __func__); - return -EINVAL; + rc = -EINVAL; + goto out; } pfn = paste_addr >> PAGE_SHIFT; @@ -520,6 +539,8 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) txwin->task_ref.vma = vma; vma->vm_ops = &vas_vm_ops; +out: + mutex_unlock(&txwin->task_ref.mmap_mutex); return rc; } From 8ef7b9e1765a52c8023d9133a2438ac9f6da486a Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Mon, 28 Feb 2022 17:14:28 -0800 Subject: [PATCH 232/380] powerpc/pseries/vas: Close windows with DLPAR core removal The hypervisor assigns vas credits (windows) for each LPAR based on the number of cores configured in that system. The OS is expected to release credits when cores are removed, and may allocate more when cores are added. So there is a possibility of using excessive credits (windows) in the LPAR and the hypervisor expects the system to close the excessive windows so that NX load can be equally distributed across all LPARs in the system. When the OS closes the excessive windows in the hypervisor, it sets the window status inactive and invalidates window virtual address mapping. The user space receives paste instruction failure if any NX requests are issued on the inactive window. Then the user space can use with the available open windows or retry NX requests until this window active again. This patch also adds the notifier for core removal/add to close windows in the hypervisor if the system lost credits (core removal) and reopen windows in the hypervisor when the previously lost credits are available. Signed-off-by: Haren Myneni Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/108928f9c00a48cc6a722315d482d07cf66acf5a.camel@linux.ibm.com --- arch/powerpc/include/asm/vas.h | 2 + arch/powerpc/platforms/pseries/vas.c | 207 +++++++++++++++++++++++++-- arch/powerpc/platforms/pseries/vas.h | 3 + 3 files changed, 204 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 27251af18c65..6baf7b9ffed4 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -34,6 +34,8 @@ */ #define VAS_WIN_ACTIVE 0x0 /* Used in platform independent */ /* vas mmap() */ +/* Window is closed in the hypervisor due to lost credit */ +#define VAS_WIN_NO_CRED_CLOSE 0x00000001 /* * Get/Set bit fields diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index 1035446f985b..a297720bcdae 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -370,13 +370,28 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, if (rc) goto out_free; - vas_user_win_add_mm_context(&txwin->vas_win.task_ref); txwin->win_type = cop_feat_caps->win_type; mutex_lock(&vas_pseries_mutex); - list_add(&txwin->win_list, &caps->list); + /* + * Possible to lose the acquired credit with DLPAR core + * removal after the window is opened. So if there are any + * closed windows (means with lost credits), do not give new + * window to user space. New windows will be opened only + * after the existing windows are reopened when credits are + * available. + */ + if (!caps->nr_close_wins) { + list_add(&txwin->win_list, &caps->list); + caps->nr_open_windows++; + mutex_unlock(&vas_pseries_mutex); + vas_user_win_add_mm_context(&txwin->vas_win.task_ref); + return &txwin->vas_win; + } mutex_unlock(&vas_pseries_mutex); - return &txwin->vas_win; + put_vas_user_win_ref(&txwin->vas_win.task_ref); + rc = -EBUSY; + pr_err("No credit is available to allocate window\n"); out_free: /* @@ -439,14 +454,24 @@ static int vas_deallocate_window(struct vas_window *vwin) caps = &vascaps[win->win_type].caps; mutex_lock(&vas_pseries_mutex); - rc = deallocate_free_window(win); - if (rc) { - mutex_unlock(&vas_pseries_mutex); - return rc; - } + /* + * VAS window is already closed in the hypervisor when + * lost the credit. So just remove the entry from + * the list, remove task references and free vas_window + * struct. + */ + if (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) { + rc = deallocate_free_window(win); + if (rc) { + mutex_unlock(&vas_pseries_mutex); + return rc; + } + } else + vascaps[win->win_type].nr_close_wins--; list_del(&win->win_list); atomic_dec(&caps->nr_used_credits); + vascaps[win->win_type].nr_open_windows--; mutex_unlock(&vas_pseries_mutex); put_vas_user_win_ref(&vwin->task_ref); @@ -501,6 +526,7 @@ static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, memset(vcaps, 0, sizeof(*vcaps)); INIT_LIST_HEAD(&vcaps->list); + vcaps->feat = feat; caps = &vcaps->caps; rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat, @@ -539,6 +565,168 @@ static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, return 0; } +/* + * The hypervisor reduces the available credits if the LPAR lost core. It + * means the excessive windows should not be active and the user space + * should not be using these windows to send compression requests to NX. + * So the kernel closes the excessive windows and unmap the paste address + * such that the user space receives paste instruction failure. Then up to + * the user space to fall back to SW compression and manage with the + * existing windows. + */ +static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds) +{ + struct pseries_vas_window *win, *tmp; + struct vas_user_win_ref *task_ref; + struct vm_area_struct *vma; + int rc = 0; + + list_for_each_entry_safe(win, tmp, &vcap->list, win_list) { + /* + * This window is already closed due to lost credit + * before. Go for next window. + */ + if (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) + continue; + + task_ref = &win->vas_win.task_ref; + mutex_lock(&task_ref->mmap_mutex); + vma = task_ref->vma; + /* + * Number of available credits are reduced, So select + * and close windows. + */ + win->vas_win.status |= VAS_WIN_NO_CRED_CLOSE; + + mmap_write_lock(task_ref->mm); + /* + * vma is set in the original mapping. But this mapping + * is done with mmap() after the window is opened with ioctl. + * so we may not see the original mapping if the core remove + * is done before the original mmap() and after the ioctl. + */ + if (vma) + zap_page_range(vma, vma->vm_start, + vma->vm_end - vma->vm_start); + + mmap_write_unlock(task_ref->mm); + mutex_unlock(&task_ref->mmap_mutex); + /* + * Close VAS window in the hypervisor, but do not + * free vas_window struct since it may be reused + * when the credit is available later (DLPAR with + * adding cores). This struct will be used + * later when the process issued with close(FD). + */ + rc = deallocate_free_window(win); + if (rc) + return rc; + + vcap->nr_close_wins++; + + if (!--excess_creds) + break; + } + + return 0; +} + +/* + * Get new VAS capabilities when the core add/removal configuration + * changes. Reconfig window configurations based on the credits + * availability from this new capabilities. + */ +static int vas_reconfig_capabilties(u8 type) +{ + struct hv_vas_cop_feat_caps *hv_caps; + struct vas_cop_feat_caps *caps; + int old_nr_creds, new_nr_creds; + struct vas_caps *vcaps; + int rc = 0, nr_active_wins; + + if (type >= VAS_MAX_FEAT_TYPE) { + pr_err("Invalid credit type %d\n", type); + return -EINVAL; + } + + vcaps = &vascaps[type]; + caps = &vcaps->caps; + + hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL); + if (!hv_caps) + return -ENOMEM; + + mutex_lock(&vas_pseries_mutex); + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, vcaps->feat, + (u64)virt_to_phys(hv_caps)); + if (rc) + goto out; + + new_nr_creds = be16_to_cpu(hv_caps->target_lpar_creds); + + old_nr_creds = atomic_read(&caps->nr_total_credits); + + atomic_set(&caps->nr_total_credits, new_nr_creds); + /* + * The total number of available credits may be decreased or + * inceased with DLPAR operation. Means some windows have to be + * closed / reopened. Hold the vas_pseries_mutex so that the + * the user space can not open new windows. + */ + if (old_nr_creds > new_nr_creds) { + /* + * # active windows is more than new LPAR available + * credits. So close the excessive windows. + * On pseries, each window will have 1 credit. + */ + nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins; + if (nr_active_wins > new_nr_creds) + rc = reconfig_close_windows(vcaps, + nr_active_wins - new_nr_creds); + } + +out: + mutex_unlock(&vas_pseries_mutex); + kfree(hv_caps); + return rc; +} +/* + * Total number of default credits available (target_credits) + * in LPAR depends on number of cores configured. It varies based on + * whether processors are in shared mode or dedicated mode. + * Get the notifier when CPU configuration is changed with DLPAR + * operation so that get the new target_credits (vas default capabilities) + * and then update the existing windows usage if needed. + */ +static int pseries_vas_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct of_reconfig_data *rd = data; + struct device_node *dn = rd->dn; + const __be32 *intserv = NULL; + int len, rc = 0; + + if ((action == OF_RECONFIG_ATTACH_NODE) || + (action == OF_RECONFIG_DETACH_NODE)) + intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", + &len); + /* + * Processor config is not changed + */ + if (!intserv) + return NOTIFY_OK; + + rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE); + if (rc) + pr_err("Failed reconfig VAS capabilities with DLPAR\n"); + + return rc; +} + +static struct notifier_block pseries_vas_nb = { + .notifier_call = pseries_vas_notifier, +}; + static int __init pseries_vas_init(void) { struct hv_vas_cop_feat_caps *hv_cop_caps; @@ -592,6 +780,9 @@ static int __init pseries_vas_init(void) goto out_cop; } + if (copypaste_feat && firmware_has_feature(FW_FEATURE_LPAR)) + of_reconfig_notifier_register(&pseries_vas_nb); + pr_info("GZIP feature is available\n"); out_cop: diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h index 2872532ed72a..701363cfd7c1 100644 --- a/arch/powerpc/platforms/pseries/vas.h +++ b/arch/powerpc/platforms/pseries/vas.h @@ -83,6 +83,9 @@ struct vas_cop_feat_caps { struct vas_caps { struct vas_cop_feat_caps caps; struct list_head list; /* List of open windows */ + int nr_close_wins; /* closed windows in the hypervisor for DLPAR */ + int nr_open_windows; /* Number of successful open windows */ + u8 feat; /* Feature type */ }; /* From c656cfe571a9b8b882e31177f554bd79141fc015 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Mon, 28 Feb 2022 17:15:04 -0800 Subject: [PATCH 233/380] powerpc/pseries/vas: Reopen windows with DLPAR core add VAS windows can be closed in the hypervisor due to lost credits when the core is removed and the kernel gets fault for NX requests on these inactive windows. If the NX requests are issued on these inactive windows, OS gets page faults and the paste failure will be returned to the user space. If the lost credits are available later with core add, reopen these windows and set them active. Later when the OS sees page faults on these active windows, it creates mapping on the new paste address. Then the user space can continue to use these windows and send HW compression requests to NX successfully. Signed-off-by: Haren Myneni Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d9f360e21355e6826142c81146acfa9b60bc7ecc.camel@linux.ibm.com --- arch/powerpc/platforms/pseries/vas.c | 91 +++++++++++++++++++++++++++- 1 file changed, 90 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index a297720bcdae..96178dd58adf 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -565,6 +565,88 @@ static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, return 0; } +/* + * VAS windows can be closed due to lost credits when the core is + * removed. So reopen them if credits are available due to DLPAR + * core add and set the window active status. When NX sees the page + * fault on the unmapped paste address, the kernel handles the fault + * by setting the remapping to new paste address if the window is + * active. + */ +static int reconfig_open_windows(struct vas_caps *vcaps, int creds) +{ + long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; + struct vas_cop_feat_caps *caps = &vcaps->caps; + struct pseries_vas_window *win = NULL, *tmp; + int rc, mv_ents = 0; + + /* + * Nothing to do if there are no closed windows. + */ + if (!vcaps->nr_close_wins) + return 0; + + /* + * For the core removal, the hypervisor reduces the credits + * assigned to the LPAR and the kernel closes VAS windows + * in the hypervisor depends on reduced credits. The kernel + * uses LIFO (the last windows that are opened will be closed + * first) and expects to open in the same order when credits + * are available. + * For example, 40 windows are closed when the LPAR lost 2 cores + * (dedicated). If 1 core is added, this LPAR can have 20 more + * credits. It means the kernel can reopen 20 windows. So move + * 20 entries in the VAS windows lost and reopen next 20 windows. + */ + if (vcaps->nr_close_wins > creds) + mv_ents = vcaps->nr_close_wins - creds; + + list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) { + if (!mv_ents) + break; + + mv_ents--; + } + + list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) { + /* + * Nothing to do on this window if it is not closed + * with VAS_WIN_NO_CRED_CLOSE + */ + if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) + continue; + + rc = allocate_setup_window(win, (u64 *)&domain[0], + caps->win_type); + if (rc) + return rc; + + rc = h_modify_vas_window(win); + if (rc) + goto out; + + mutex_lock(&win->vas_win.task_ref.mmap_mutex); + /* + * Set window status to active + */ + win->vas_win.status &= ~VAS_WIN_NO_CRED_CLOSE; + mutex_unlock(&win->vas_win.task_ref.mmap_mutex); + win->win_type = caps->win_type; + if (!--vcaps->nr_close_wins) + break; + } + + return 0; +out: + /* + * Window modify HCALL failed. So close the window to the + * hypervisor and return. + */ + free_irq_setup(win); + h_deallocate_vas_window(win->vas_win.winid); + return rc; +} + /* * The hypervisor reduces the available credits if the LPAR lost core. It * means the excessive windows should not be active and the user space @@ -673,7 +755,14 @@ static int vas_reconfig_capabilties(u8 type) * closed / reopened. Hold the vas_pseries_mutex so that the * the user space can not open new windows. */ - if (old_nr_creds > new_nr_creds) { + if (old_nr_creds < new_nr_creds) { + /* + * If the existing target credits is less than the new + * target, reopen windows if they are closed due to + * the previous DLPAR (core removal). + */ + rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds); + } else { /* * # active windows is more than new LPAR available * credits. So close the excessive windows. From b903737bc522e0ef3f45a2a60c364ff547572c9b Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Mon, 28 Feb 2022 17:15:36 -0800 Subject: [PATCH 234/380] powerpc/pseries/vas: sysfs interface to export capabilities The hypervisor provides the available VAS GZIP capabilities such as default or QoS window type and the target available credits in each type. This patch creates sysfs entries and exports the target, used and the available credits for each feature. This interface can be used by the user space to determine the credits usage or to set the target credits in the case of QoS type (for DLPAR). /sys/devices/vas/vas0/gzip/default_capabilities (default GZIP capabilities) nr_total_credits /* Total credits available. Can be /* changed with DLPAR operation */ nr_used_credits /* Used credits */ /sys/devices/vas/vas0/gzip/qos_capabilities (QoS GZIP capabilities) nr_total_credits nr_used_credits Signed-off-by: Haren Myneni Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/702d8b626ebfac2b52f4995eebeafe1c9a6fcb75.camel@linux.ibm.com --- arch/powerpc/platforms/pseries/Makefile | 2 +- arch/powerpc/platforms/pseries/vas-sysfs.c | 226 +++++++++++++++++++++ arch/powerpc/platforms/pseries/vas.c | 6 + arch/powerpc/platforms/pseries/vas.h | 6 + 4 files changed, 239 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/platforms/pseries/vas-sysfs.c diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index ee60b59024b4..29b522d2c755 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -29,6 +29,6 @@ obj-$(CONFIG_PPC_SVM) += svm.o obj-$(CONFIG_FA_DUMP) += rtas-fadump.o obj-$(CONFIG_SUSPEND) += suspend.o -obj-$(CONFIG_PPC_VAS) += vas.o +obj-$(CONFIG_PPC_VAS) += vas.o vas-sysfs.o obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += cc_platform.o diff --git a/arch/powerpc/platforms/pseries/vas-sysfs.c b/arch/powerpc/platforms/pseries/vas-sysfs.c new file mode 100644 index 000000000000..e24d3edb3021 --- /dev/null +++ b/arch/powerpc/platforms/pseries/vas-sysfs.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2022-23 IBM Corp. + */ + +#define pr_fmt(fmt) "vas: " fmt + +#include +#include +#include +#include +#include +#include + +#include "vas.h" + +#ifdef CONFIG_SYSFS +static struct kobject *pseries_vas_kobj; +static struct kobject *gzip_caps_kobj; + +struct vas_caps_entry { + struct kobject kobj; + struct vas_cop_feat_caps *caps; +}; + +#define to_caps_entry(entry) container_of(entry, struct vas_caps_entry, kobj) + +#define sysfs_caps_entry_read(_name) \ +static ssize_t _name##_show(struct vas_cop_feat_caps *caps, char *buf) \ +{ \ + return sprintf(buf, "%d\n", atomic_read(&caps->_name)); \ +} + +struct vas_sysfs_entry { + struct attribute attr; + ssize_t (*show)(struct vas_cop_feat_caps *, char *); + ssize_t (*store)(struct vas_cop_feat_caps *, const char *, size_t); +}; + +#define VAS_ATTR_RO(_name) \ + sysfs_caps_entry_read(_name); \ + static struct vas_sysfs_entry _name##_attribute = __ATTR(_name, \ + 0444, _name##_show, NULL); + +/* + * Create sysfs interface: + * /sys/devices/vas/vas0/gzip/default_capabilities + * This directory contains the following VAS GZIP capabilities + * for the defaule credit type. + * /sys/devices/vas/vas0/gzip/default_capabilities/nr_total_credits + * Total number of default credits assigned to the LPAR which + * can be changed with DLPAR operation. + * /sys/devices/vas/vas0/gzip/default_capabilities/nr_used_credits + * Number of credits used by the user space. One credit will + * be assigned for each window open. + * + * /sys/devices/vas/vas0/gzip/qos_capabilities + * This directory contains the following VAS GZIP capabilities + * for the Quality of Service (QoS) credit type. + * /sys/devices/vas/vas0/gzip/qos_capabilities/nr_total_credits + * Total number of QoS credits assigned to the LPAR. The user + * has to define this value using HMC interface. It can be + * changed dynamically by the user. + * /sys/devices/vas/vas0/gzip/qos_capabilities/nr_used_credits + * Number of credits used by the user space. + */ + +VAS_ATTR_RO(nr_total_credits); +VAS_ATTR_RO(nr_used_credits); + +static struct attribute *vas_capab_attrs[] = { + &nr_total_credits_attribute.attr, + &nr_used_credits_attribute.attr, + NULL, +}; + +static ssize_t vas_type_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct vas_caps_entry *centry; + struct vas_cop_feat_caps *caps; + struct vas_sysfs_entry *entry; + + centry = to_caps_entry(kobj); + caps = centry->caps; + entry = container_of(attr, struct vas_sysfs_entry, attr); + + if (!entry->show) + return -EIO; + + return entry->show(caps, buf); +} + +static ssize_t vas_type_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct vas_caps_entry *centry; + struct vas_cop_feat_caps *caps; + struct vas_sysfs_entry *entry; + + centry = to_caps_entry(kobj); + caps = centry->caps; + entry = container_of(attr, struct vas_sysfs_entry, attr); + if (!entry->store) + return -EIO; + + return entry->store(caps, buf, count); +} + +static void vas_type_release(struct kobject *kobj) +{ + struct vas_caps_entry *centry = to_caps_entry(kobj); + kfree(centry); +} + +static const struct sysfs_ops vas_sysfs_ops = { + .show = vas_type_show, + .store = vas_type_store, +}; + +static struct kobj_type vas_attr_type = { + .release = vas_type_release, + .sysfs_ops = &vas_sysfs_ops, + .default_attrs = vas_capab_attrs, +}; + +static char *vas_caps_kobj_name(struct vas_cop_feat_caps *caps, + struct kobject **kobj) +{ + if (caps->descriptor == VAS_GZIP_QOS_CAPABILITIES) { + *kobj = gzip_caps_kobj; + return "qos_capabilities"; + } else if (caps->descriptor == VAS_GZIP_DEFAULT_CAPABILITIES) { + *kobj = gzip_caps_kobj; + return "default_capabilities"; + } else + return "Unknown"; +} + +/* + * Add feature specific capability dir entry. + * Ex: VDefGzip or VQosGzip + */ +int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps) +{ + struct vas_caps_entry *centry; + struct kobject *kobj = NULL; + int ret = 0; + char *name; + + centry = kzalloc(sizeof(*centry), GFP_KERNEL); + if (!centry) + return -ENOMEM; + + kobject_init(¢ry->kobj, &vas_attr_type); + centry->caps = caps; + name = vas_caps_kobj_name(caps, &kobj); + + if (kobj) { + ret = kobject_add(¢ry->kobj, kobj, "%s", name); + + if (ret) { + pr_err("VAS: sysfs kobject add / event failed %d\n", + ret); + kobject_put(¢ry->kobj); + } + } + + return ret; +} + +static struct miscdevice vas_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "vas", +}; + +/* + * Add VAS and VasCaps (overall capabilities) dir entries. + */ +int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps) +{ + int ret; + + ret = misc_register(&vas_miscdev); + if (ret < 0) { + pr_err("%s: register vas misc device failed\n", __func__); + return ret; + } + + /* + * The hypervisor does not expose multiple VAS instances, but can + * see multiple VAS instances on PowerNV. So create 'vas0' directory + * on pseries. + */ + pseries_vas_kobj = kobject_create_and_add("vas0", + &vas_miscdev.this_device->kobj); + if (!pseries_vas_kobj) { + pr_err("Failed to create VAS sysfs entry\n"); + return -ENOMEM; + } + + if ((vas_caps->feat_type & VAS_GZIP_QOS_FEAT_BIT) || + (vas_caps->feat_type & VAS_GZIP_DEF_FEAT_BIT)) { + gzip_caps_kobj = kobject_create_and_add("gzip", + pseries_vas_kobj); + if (!gzip_caps_kobj) { + pr_err("Failed to create VAS GZIP capability entry\n"); + kobject_put(pseries_vas_kobj); + return -ENOMEM; + } + } + + return 0; +} + +#else +int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps) +{ + return 0; +} + +int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps) +{ + return 0; +} +#endif diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index 96178dd58adf..ca0ad191229d 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -560,6 +560,10 @@ static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, } } + rc = sysfs_add_vas_caps(caps); + if (rc) + return rc; + copypaste_feat = true; return 0; @@ -844,6 +848,8 @@ static int __init pseries_vas_init(void) caps_all.descriptor = be64_to_cpu(hv_caps->descriptor); caps_all.feat_type = be64_to_cpu(hv_caps->feat_type); + sysfs_pseries_vas_init(&caps_all); + hv_cop_caps = kmalloc(sizeof(*hv_cop_caps), GFP_KERNEL); if (!hv_cop_caps) { rc = -ENOMEM; diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h index 701363cfd7c1..f1bdb776021e 100644 --- a/arch/powerpc/platforms/pseries/vas.h +++ b/arch/powerpc/platforms/pseries/vas.h @@ -30,6 +30,9 @@ #define VAS_COPY_PASTE_USER_MODE 0x00000001 #define VAS_COP_OP_USER_MODE 0x00000010 +#define VAS_GZIP_QOS_CAPABILITIES 0x56516F73477A6970 +#define VAS_GZIP_DEFAULT_CAPABILITIES 0x56446566477A6970 + /* * Co-processor feature - GZIP QoS windows or GZIP default windows */ @@ -125,4 +128,7 @@ struct pseries_vas_window { char *name; int fault_virq; }; + +int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps); +int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps); #endif /* _VAS_H */ From 45f06eac30e5abebecc66e41e7c89d5b4413bac1 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Mon, 28 Feb 2022 17:16:10 -0800 Subject: [PATCH 235/380] powerpc/pseries/vas: Add 'update_total_credits' entry for QoS capabilities pseries supports two types of credits - Default (uses normal priority FIFO) and Qality of service (QoS uses high priority FIFO). The user decides the number of QoS credits and sets this value with HMC interface. The total credits for QoS capabilities can be changed dynamically with HMC interface which invokes drmgr to communicate to the kernel. This patch creats 'update_total_credits' entry for QoS capabilities so that drmgr command can write the new target QoS credits in sysfs. Instead of using this value, the kernel gets the new QoS capabilities from the hypervisor whenever update_total_credits is updated to make sure sync with the QoS target credits in the hypervisor. Signed-off-by: Haren Myneni Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b01ef31a0f964686d00243e7de7f09c73c07e69e.camel@linux.ibm.com --- arch/powerpc/platforms/pseries/vas-sysfs.c | 54 +++++++++++++++++++--- arch/powerpc/platforms/pseries/vas.c | 2 +- arch/powerpc/platforms/pseries/vas.h | 1 + 3 files changed, 50 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/platforms/pseries/vas-sysfs.c b/arch/powerpc/platforms/pseries/vas-sysfs.c index e24d3edb3021..4a7fcde5afc0 100644 --- a/arch/powerpc/platforms/pseries/vas-sysfs.c +++ b/arch/powerpc/platforms/pseries/vas-sysfs.c @@ -25,6 +25,27 @@ struct vas_caps_entry { #define to_caps_entry(entry) container_of(entry, struct vas_caps_entry, kobj) +/* + * This function is used to get the notification from the drmgr when + * QoS credits are changed. Though receiving the target total QoS + * credits here, get the official QoS capabilities from the hypervisor. + */ +static ssize_t update_total_credits_trigger(struct vas_cop_feat_caps *caps, + const char *buf, size_t count) +{ + int err; + u16 creds; + + err = kstrtou16(buf, 0, &creds); + if (!err) + err = vas_reconfig_capabilties(caps->win_type); + + if (err) + return -EINVAL; + + return count; +} + #define sysfs_caps_entry_read(_name) \ static ssize_t _name##_show(struct vas_cop_feat_caps *caps, char *buf) \ { \ @@ -63,17 +84,29 @@ struct vas_sysfs_entry { * changed dynamically by the user. * /sys/devices/vas/vas0/gzip/qos_capabilities/nr_used_credits * Number of credits used by the user space. + * /sys/devices/vas/vas0/gzip/qos_capabilities/update_total_credits + * Update total QoS credits dynamically */ VAS_ATTR_RO(nr_total_credits); VAS_ATTR_RO(nr_used_credits); -static struct attribute *vas_capab_attrs[] = { +static struct vas_sysfs_entry update_total_credits_attribute = + __ATTR(update_total_credits, 0200, NULL, update_total_credits_trigger); + +static struct attribute *vas_def_capab_attrs[] = { &nr_total_credits_attribute.attr, &nr_used_credits_attribute.attr, NULL, }; +static struct attribute *vas_qos_capab_attrs[] = { + &nr_total_credits_attribute.attr, + &nr_used_credits_attribute.attr, + &update_total_credits_attribute.attr, + NULL, +}; + static ssize_t vas_type_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -118,19 +151,29 @@ static const struct sysfs_ops vas_sysfs_ops = { .store = vas_type_store, }; -static struct kobj_type vas_attr_type = { +static struct kobj_type vas_def_attr_type = { .release = vas_type_release, .sysfs_ops = &vas_sysfs_ops, - .default_attrs = vas_capab_attrs, + .default_attrs = vas_def_capab_attrs, }; -static char *vas_caps_kobj_name(struct vas_cop_feat_caps *caps, +static struct kobj_type vas_qos_attr_type = { + .release = vas_type_release, + .sysfs_ops = &vas_sysfs_ops, + .default_attrs = vas_qos_capab_attrs, +}; + +static char *vas_caps_kobj_name(struct vas_caps_entry *centry, struct kobject **kobj) { + struct vas_cop_feat_caps *caps = centry->caps; + if (caps->descriptor == VAS_GZIP_QOS_CAPABILITIES) { + kobject_init(¢ry->kobj, &vas_qos_attr_type); *kobj = gzip_caps_kobj; return "qos_capabilities"; } else if (caps->descriptor == VAS_GZIP_DEFAULT_CAPABILITIES) { + kobject_init(¢ry->kobj, &vas_def_attr_type); *kobj = gzip_caps_kobj; return "default_capabilities"; } else @@ -152,9 +195,8 @@ int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps) if (!centry) return -ENOMEM; - kobject_init(¢ry->kobj, &vas_attr_type); centry->caps = caps; - name = vas_caps_kobj_name(caps, &kobj); + name = vas_caps_kobj_name(centry, &kobj); if (kobj) { ret = kobject_add(¢ry->kobj, kobj, "%s", name); diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index ca0ad191229d..591c7597db5a 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -722,7 +722,7 @@ static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds) * changes. Reconfig window configurations based on the credits * availability from this new capabilities. */ -static int vas_reconfig_capabilties(u8 type) +int vas_reconfig_capabilties(u8 type) { struct hv_vas_cop_feat_caps *hv_caps; struct vas_cop_feat_caps *caps; diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h index f1bdb776021e..4ddb1001a0aa 100644 --- a/arch/powerpc/platforms/pseries/vas.h +++ b/arch/powerpc/platforms/pseries/vas.h @@ -130,5 +130,6 @@ struct pseries_vas_window { }; int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps); +int vas_reconfig_capabilties(u8 type); int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps); #endif /* _VAS_H */ From 278fe1cc2205a05bfd92c794be3d207372b17289 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Sun, 27 Feb 2022 23:51:28 -0800 Subject: [PATCH 236/380] powerpc/pseries/vas: Define global hv_cop_caps struct The coprocessor capabilities struct is used to get default and QoS capabilities from the hypervisor during init, DLPAR event and migration. So instead of allocating this struct for each event, define global struct and reuse it which allows the migration code to avoid adding an error path. Also disable copy/paste feature flag if any capabilities HCALL is failed. Signed-off-by: Haren Myneni Reviewed-by: Nicholas Piggin Acked-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/57da6a270fcb9308cd57be7c88037029343080f7.camel@linux.ibm.com --- arch/powerpc/platforms/pseries/vas.c | 49 ++++++++++++---------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index 591c7597db5a..3bb219f54806 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -26,6 +26,7 @@ static struct vas_all_caps caps_all; static bool copypaste_feat; +static struct hv_vas_cop_feat_caps hv_cop_caps; static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE]; static DEFINE_MUTEX(vas_pseries_mutex); @@ -724,7 +725,6 @@ static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds) */ int vas_reconfig_capabilties(u8 type) { - struct hv_vas_cop_feat_caps *hv_caps; struct vas_cop_feat_caps *caps; int old_nr_creds, new_nr_creds; struct vas_caps *vcaps; @@ -738,17 +738,13 @@ int vas_reconfig_capabilties(u8 type) vcaps = &vascaps[type]; caps = &vcaps->caps; - hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL); - if (!hv_caps) - return -ENOMEM; - mutex_lock(&vas_pseries_mutex); rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, vcaps->feat, - (u64)virt_to_phys(hv_caps)); + (u64)virt_to_phys(&hv_cop_caps)); if (rc) goto out; - new_nr_creds = be16_to_cpu(hv_caps->target_lpar_creds); + new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); old_nr_creds = atomic_read(&caps->nr_total_credits); @@ -780,7 +776,6 @@ int vas_reconfig_capabilties(u8 type) out: mutex_unlock(&vas_pseries_mutex); - kfree(hv_caps); return rc; } /* @@ -822,9 +817,8 @@ static struct notifier_block pseries_vas_nb = { static int __init pseries_vas_init(void) { - struct hv_vas_cop_feat_caps *hv_cop_caps; struct hv_vas_all_caps *hv_caps; - int rc; + int rc = 0; /* * Linux supports user space COPY/PASTE only with Radix @@ -850,38 +844,37 @@ static int __init pseries_vas_init(void) sysfs_pseries_vas_init(&caps_all); - hv_cop_caps = kmalloc(sizeof(*hv_cop_caps), GFP_KERNEL); - if (!hv_cop_caps) { - rc = -ENOMEM; - goto out; - } /* * QOS capabilities available */ if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) { rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT, - VAS_GZIP_QOS_FEAT_TYPE, hv_cop_caps); + VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps); if (rc) - goto out_cop; + goto out; } /* * Default capabilities available */ - if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) { + if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT, - VAS_GZIP_DEF_FEAT_TYPE, hv_cop_caps); - if (rc) - goto out_cop; + VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps); + + if (!rc && copypaste_feat) { + if (firmware_has_feature(FW_FEATURE_LPAR)) + of_reconfig_notifier_register(&pseries_vas_nb); + + pr_info("GZIP feature is available\n"); + } else { + /* + * Should not happen, but only when get default + * capabilities HCALL failed. So disable copy paste + * feature. + */ + copypaste_feat = false; } - if (copypaste_feat && firmware_has_feature(FW_FEATURE_LPAR)) - of_reconfig_notifier_register(&pseries_vas_nb); - - pr_info("GZIP feature is available\n"); - -out_cop: - kfree(hv_cop_caps); out: kfree(hv_caps); return rc; From 716d7a2e3764cb79061371767bff1a691adb4e7f Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Sun, 27 Feb 2022 23:52:08 -0800 Subject: [PATCH 237/380] powerpc/pseries/vas: Modify reconfig open/close functions for migration VAS is a hardware engine stays on the chip. So when the partition migrates, all VAS windows on the source system have to be closed and reopen them on the destination after migration. The kernel has to consider both DLPAR CPU and migration events to take action on VAS windows. So using VAS_WIN_NO_CRED_CLOSE and VAS_WIN_MIGRATE_CLOSE status bits and windows will be reopened after migration only after both status bits are cleared. This patch make changes to the current reconfig_open/close_windows functions to support migration: - Set VAS_WIN_MIGRATE_CLOSE to the window status when closes and reopen windows with the same status during resume. - Continue to close all windows even if deallocate HCALL failed (should not happen) since no way to stop migration with the current LPM implementation. - If the DLPAR CPU event happens while migration is in progress, set VAS_WIN_NO_CRED_CLOSE to the window status. Close window happens with the first event (migration or DLPAR) and Reopen window happens only with the last event (migration or DLPAR). Signed-off-by: Haren Myneni Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0aad580387cb58379496b4cbbd7c5596e9ea70be.camel@linux.ibm.com --- arch/powerpc/include/asm/vas.h | 2 + arch/powerpc/platforms/pseries/vas.c | 90 ++++++++++++++++++++++------ 2 files changed, 74 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 6baf7b9ffed4..83afcb6c194b 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -36,6 +36,8 @@ /* vas mmap() */ /* Window is closed in the hypervisor due to lost credit */ #define VAS_WIN_NO_CRED_CLOSE 0x00000001 +/* Window is closed due to migration */ +#define VAS_WIN_MIGRATE_CLOSE 0x00000002 /* * Get/Set bit fields diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index 3bb219f54806..fbcf311da0ec 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -457,11 +457,12 @@ static int vas_deallocate_window(struct vas_window *vwin) mutex_lock(&vas_pseries_mutex); /* * VAS window is already closed in the hypervisor when - * lost the credit. So just remove the entry from - * the list, remove task references and free vas_window + * lost the credit or with migration. So just remove the entry + * from the list, remove task references and free vas_window * struct. */ - if (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) { + if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) && + !(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) { rc = deallocate_free_window(win); if (rc) { mutex_unlock(&vas_pseries_mutex); @@ -578,12 +579,14 @@ static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, * by setting the remapping to new paste address if the window is * active. */ -static int reconfig_open_windows(struct vas_caps *vcaps, int creds) +static int reconfig_open_windows(struct vas_caps *vcaps, int creds, + bool migrate) { long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; struct vas_cop_feat_caps *caps = &vcaps->caps; struct pseries_vas_window *win = NULL, *tmp; int rc, mv_ents = 0; + int flag; /* * Nothing to do if there are no closed windows. @@ -602,8 +605,10 @@ static int reconfig_open_windows(struct vas_caps *vcaps, int creds) * (dedicated). If 1 core is added, this LPAR can have 20 more * credits. It means the kernel can reopen 20 windows. So move * 20 entries in the VAS windows lost and reopen next 20 windows. + * For partition migration, reopen all windows that are closed + * during resume. */ - if (vcaps->nr_close_wins > creds) + if ((vcaps->nr_close_wins > creds) && !migrate) mv_ents = vcaps->nr_close_wins - creds; list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) { @@ -613,12 +618,35 @@ static int reconfig_open_windows(struct vas_caps *vcaps, int creds) mv_ents--; } + /* + * Open windows if they are closed only with migration or + * DLPAR (lost credit) before. + */ + if (migrate) + flag = VAS_WIN_MIGRATE_CLOSE; + else + flag = VAS_WIN_NO_CRED_CLOSE; + list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) { /* - * Nothing to do on this window if it is not closed - * with VAS_WIN_NO_CRED_CLOSE + * This window is closed with DLPAR and migration events. + * So reopen the window with the last event. + * The user space is not suspended with the current + * migration notifier. So the user space can issue DLPAR + * CPU hotplug while migration in progress. In this case + * this window will be opened with the last event. */ - if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) + if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) && + (win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) { + win->vas_win.status &= ~flag; + continue; + } + + /* + * Nothing to do on this window if it is not closed + * with this flag + */ + if (!(win->vas_win.status & flag)) continue; rc = allocate_setup_window(win, (u64 *)&domain[0], @@ -634,7 +662,7 @@ static int reconfig_open_windows(struct vas_caps *vcaps, int creds) /* * Set window status to active */ - win->vas_win.status &= ~VAS_WIN_NO_CRED_CLOSE; + win->vas_win.status &= ~flag; mutex_unlock(&win->vas_win.task_ref.mmap_mutex); win->win_type = caps->win_type; if (!--vcaps->nr_close_wins) @@ -661,20 +689,32 @@ out: * the user space to fall back to SW compression and manage with the * existing windows. */ -static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds) +static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds, + bool migrate) { struct pseries_vas_window *win, *tmp; struct vas_user_win_ref *task_ref; struct vm_area_struct *vma; - int rc = 0; + int rc = 0, flag; + + if (migrate) + flag = VAS_WIN_MIGRATE_CLOSE; + else + flag = VAS_WIN_NO_CRED_CLOSE; list_for_each_entry_safe(win, tmp, &vcap->list, win_list) { /* * This window is already closed due to lost credit - * before. Go for next window. + * or for migration before. Go for next window. + * For migration, nothing to do since this window + * closed for DLPAR and will be reopened even on + * the destination system with other DLPAR operation. */ - if (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) + if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) || + (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) { + win->vas_win.status |= flag; continue; + } task_ref = &win->vas_win.task_ref; mutex_lock(&task_ref->mmap_mutex); @@ -683,7 +723,7 @@ static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds) * Number of available credits are reduced, So select * and close windows. */ - win->vas_win.status |= VAS_WIN_NO_CRED_CLOSE; + win->vas_win.status |= flag; mmap_write_lock(task_ref->mm); /* @@ -706,12 +746,24 @@ static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds) * later when the process issued with close(FD). */ rc = deallocate_free_window(win); - if (rc) + /* + * This failure is from the hypervisor. + * No way to stop migration for these failures. + * So ignore error and continue closing other windows. + */ + if (rc && !migrate) return rc; vcap->nr_close_wins++; - if (!--excess_creds) + /* + * For migration, do not depend on lpar_creds in case if + * mismatch with the hypervisor value (should not happen). + * So close all active windows in the list and will be + * reopened windows based on the new lpar_creds on the + * destination system during resume. + */ + if (!migrate && !--excess_creds) break; } @@ -761,7 +813,8 @@ int vas_reconfig_capabilties(u8 type) * target, reopen windows if they are closed due to * the previous DLPAR (core removal). */ - rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds); + rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds, + false); } else { /* * # active windows is more than new LPAR available @@ -771,7 +824,8 @@ int vas_reconfig_capabilties(u8 type) nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins; if (nr_active_wins > new_nr_creds) rc = reconfig_close_windows(vcaps, - nr_active_wins - new_nr_creds); + nr_active_wins - new_nr_creds, + false); } out: From 37e6764895ef7431f45ff603a548549d409993d2 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Wed, 2 Mar 2022 00:51:58 -0800 Subject: [PATCH 238/380] powerpc/pseries/vas: Add VAS migration handler Since the VAS windows belong to the VAS hardware resource, the hypervisor expects the partition to close them on source partition and reopen them after the partition migrated on the destination machine. This handler is called before pseries_suspend() to close these windows and again invoked after migration. All active windows for both default and QoS types will be closed and mark them inactive and reopened after migration with this handler. During the migration, the user space receives paste instruction failure if it issues copy/paste on these inactive windows. The current migration implementation does not freeze the user space and applications can continue to open VAS windows while migration is in progress. So when the migration_in_progress flag is set, VAS open window API returns -EBUSY. Signed-off-by: Haren Myneni Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/05e45ff4f8babd2490ccb7ae923884f4aa21a7e5.camel@linux.ibm.com --- arch/powerpc/platforms/pseries/mobility.c | 5 ++ arch/powerpc/platforms/pseries/vas.c | 98 ++++++++++++++++++++++- arch/powerpc/platforms/pseries/vas.h | 14 ++++ 3 files changed, 116 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 94077fa91959..78f3f74c7056 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -26,6 +26,7 @@ #include #include #include "pseries.h" +#include "vas.h" /* vas_migration_handler() */ #include "../../kernel/cacheinfo.h" static struct kobject *mobility_kobj; @@ -669,12 +670,16 @@ static int pseries_migrate_partition(u64 handle) if (ret) return ret; + vas_migration_handler(VAS_SUSPEND); + ret = pseries_suspend(handle); if (ret == 0) post_mobility_fixup(); else pseries_cancel_migration(handle, ret); + vas_migration_handler(VAS_RESUME); + return ret; } diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index fbcf311da0ec..1f59d78c77a1 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -30,6 +30,7 @@ static struct hv_vas_cop_feat_caps hv_cop_caps; static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE]; static DEFINE_MUTEX(vas_pseries_mutex); +static bool migration_in_progress; static long hcall_return_busy_check(long rc) { @@ -356,7 +357,10 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, * same fault IRQ is not freed by the OS before. */ mutex_lock(&vas_pseries_mutex); - rc = allocate_setup_window(txwin, (u64 *)&domain[0], + if (migration_in_progress) + rc = -EBUSY; + else + rc = allocate_setup_window(txwin, (u64 *)&domain[0], cop_feat_caps->win_type); mutex_unlock(&vas_pseries_mutex); if (rc) @@ -869,6 +873,98 @@ static struct notifier_block pseries_vas_nb = { .notifier_call = pseries_vas_notifier, }; +/* + * For LPM, all windows have to be closed on the source partition + * before migration and reopen them on the destination partition + * after migration. So closing windows during suspend and + * reopen them during resume. + */ +int vas_migration_handler(int action) +{ + struct vas_cop_feat_caps *caps; + int old_nr_creds, new_nr_creds = 0; + struct vas_caps *vcaps; + int i, rc = 0; + + /* + * NX-GZIP is not enabled. Nothing to do for migration. + */ + if (!copypaste_feat) + return rc; + + mutex_lock(&vas_pseries_mutex); + + if (action == VAS_SUSPEND) + migration_in_progress = true; + else + migration_in_progress = false; + + for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) { + vcaps = &vascaps[i]; + caps = &vcaps->caps; + old_nr_creds = atomic_read(&caps->nr_total_credits); + + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, + vcaps->feat, + (u64)virt_to_phys(&hv_cop_caps)); + if (!rc) { + new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); + /* + * Should not happen. But incase print messages, close + * all windows in the list during suspend and reopen + * windows based on new lpar_creds on the destination + * system. + */ + if (old_nr_creds != new_nr_creds) { + pr_err("Target credits mismatch with the hypervisor\n"); + pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n", + action, old_nr_creds, new_nr_creds); + pr_err("Used creds: %d, Active creds: %d\n", + atomic_read(&caps->nr_used_credits), + vcaps->nr_open_windows - vcaps->nr_close_wins); + } + } else { + pr_err("state(%d): Get VAS capabilities failed with %d\n", + action, rc); + /* + * We can not stop migration with the current lpm + * implementation. So continue closing all windows in + * the list (during suspend) and return without + * opening windows (during resume) if VAS capabilities + * HCALL failed. + */ + if (action == VAS_RESUME) + goto out; + } + + switch (action) { + case VAS_SUSPEND: + rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows, + true); + break; + case VAS_RESUME: + atomic_set(&caps->nr_total_credits, new_nr_creds); + rc = reconfig_open_windows(vcaps, new_nr_creds, true); + break; + default: + /* should not happen */ + pr_err("Invalid migration action %d\n", action); + rc = -EINVAL; + goto out; + } + + /* + * Ignore errors during suspend and return for resume. + */ + if (rc && (action == VAS_RESUME)) + goto out; + } + +out: + mutex_unlock(&vas_pseries_mutex); + return rc; +} + static int __init pseries_vas_init(void) { struct hv_vas_all_caps *hv_caps; diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h index 4ddb1001a0aa..34177881e998 100644 --- a/arch/powerpc/platforms/pseries/vas.h +++ b/arch/powerpc/platforms/pseries/vas.h @@ -33,6 +33,11 @@ #define VAS_GZIP_QOS_CAPABILITIES 0x56516F73477A6970 #define VAS_GZIP_DEFAULT_CAPABILITIES 0x56446566477A6970 +enum vas_migrate_action { + VAS_SUSPEND, + VAS_RESUME, +}; + /* * Co-processor feature - GZIP QoS windows or GZIP default windows */ @@ -132,4 +137,13 @@ struct pseries_vas_window { int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps); int vas_reconfig_capabilties(u8 type); int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps); + +#ifdef CONFIG_PPC_VAS +int vas_migration_handler(int action); +#else +static inline int vas_migration_handler(int action) +{ + return 0; +} +#endif #endif /* _VAS_H */ From fa1321b11bd01752f5be2415e74a0e1a7c378262 Mon Sep 17 00:00:00 2001 From: Jakob Koschel Date: Mon, 28 Feb 2022 15:24:33 +0100 Subject: [PATCH 239/380] powerpc/sysdev: fix incorrect use to determine if list is empty 'gtm' will *always* be set by list_for_each_entry(). It is incorrect to assume that the iterator value will be NULL if the list is empty. Instead of checking the pointer it should be checked if the list is empty. Fixes: 83ff9dcf375c ("powerpc/sysdev: implement FSL GTM support") Signed-off-by: Jakob Koschel Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220228142434.576226-1-jakobkoschel@gmail.com --- arch/powerpc/sysdev/fsl_gtm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/sysdev/fsl_gtm.c b/arch/powerpc/sysdev/fsl_gtm.c index 8963eaffb1b7..39186ad6b3c3 100644 --- a/arch/powerpc/sysdev/fsl_gtm.c +++ b/arch/powerpc/sysdev/fsl_gtm.c @@ -86,7 +86,7 @@ static LIST_HEAD(gtms); */ struct gtm_timer *gtm_get_timer16(void) { - struct gtm *gtm = NULL; + struct gtm *gtm; int i; list_for_each_entry(gtm, >ms, list_node) { @@ -103,7 +103,7 @@ struct gtm_timer *gtm_get_timer16(void) spin_unlock_irq(>m->lock); } - if (gtm) + if (!list_empty(>ms)) return ERR_PTR(-EBUSY); return ERR_PTR(-ENODEV); } From d4679ac8ea2e5078704aa1c026db36580cc1bf9a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 22 Feb 2022 22:34:49 +1100 Subject: [PATCH 240/380] powerpc/64s: Don't use DSISR for SLB faults Since commit 46ddcb3950a2 ("powerpc/mm: Show if a bad page fault on data is read or write.") we use page_fault_is_write(regs->dsisr) in __bad_page_fault() to determine if the fault is for a read or write, and change the message printed accordingly. But SLB faults, aka Data Segment Interrupts, don't set DSISR (Data Storage Interrupt Status Register) to a useful value. All ISA versions from v2.03 through v3.1 specify that the Data Segment Interrupt sets DSISR "to an undefined value". As far as I can see there's no mention of SLB faults setting DSISR in any BookIV content either. This manifests as accesses that should be a read being incorrectly reported as writes, for example, using the xmon "dump" command: 0:mon> d 0x5deadbeef0000000 5deadbeef0000000 [359526.415354][ C6] BUG: Unable to handle kernel data access on write at 0x5deadbeef0000000 [359526.415611][ C6] Faulting instruction address: 0xc00000000010a300 cpu 0x6: Vector: 380 (Data SLB Access) at [c00000000ffbf400] pc: c00000000010a300: mread+0x90/0x190 If we disassemble the PC, we see a load instruction: 0:mon> di c00000000010a300 c00000000010a300 89490000 lbz r10,0(r9) We can also see in exceptions-64s.S that the data_access_slb block doesn't set IDSISR=1, which means it doesn't load DSISR into pt_regs. So the value we're using to determine if the fault is a read/write is some stale value in pt_regs from a previous page fault. Rework the printing logic to separate the SLB fault case out, and only print read/write in the cases where we can determine it. The result looks like eg: 0:mon> d 0x5deadbeef0000000 5deadbeef0000000 [ 721.779525][ C6] BUG: Unable to handle kernel data access at 0x5deadbeef0000000 [ 721.779697][ C6] Faulting instruction address: 0xc00000000014cbe0 cpu 0x6: Vector: 380 (Data SLB Access) at [c00000000ffbf390] 0:mon> d 0 0000000000000000 [ 742.793242][ C6] BUG: Kernel NULL pointer dereference at 0x00000000 [ 742.793316][ C6] Faulting instruction address: 0xc00000000014cbe0 cpu 0x6: Vector: 380 (Data SLB Access) at [c00000000ffbf390] Fixes: 46ddcb3950a2 ("powerpc/mm: Show if a bad page fault on data is read or write.") Reported-by: Nageswara R Sastry Signed-off-by: Michael Ellerman Reviewed-by: Nicholas Piggin Link: https://lore.kernel.org/r/20220222113449.319193-1-mpe@ellerman.id.au --- arch/powerpc/mm/fault.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index eb8ecd7343a9..7ba6d3eff636 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -567,18 +567,24 @@ NOKPROBE_SYMBOL(hash__do_page_fault); static void __bad_page_fault(struct pt_regs *regs, int sig) { int is_write = page_fault_is_write(regs->dsisr); + const char *msg; /* kernel has accessed a bad area */ + if (regs->dar < PAGE_SIZE) + msg = "Kernel NULL pointer dereference"; + else + msg = "Unable to handle kernel data access"; + switch (TRAP(regs)) { case INTERRUPT_DATA_STORAGE: - case INTERRUPT_DATA_SEGMENT: case INTERRUPT_H_DATA_STORAGE: - pr_alert("BUG: %s on %s at 0x%08lx\n", - regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" : - "Unable to handle kernel data access", + pr_alert("BUG: %s on %s at 0x%08lx\n", msg, is_write ? "write" : "read", regs->dar); break; + case INTERRUPT_DATA_SEGMENT: + pr_alert("BUG: %s at 0x%08lx\n", msg, regs->dar); + break; case INTERRUPT_INST_STORAGE: case INTERRUPT_INST_SEGMENT: pr_alert("BUG: Unable to handle kernel instruction fetch%s", From 591b4b268435f00d2f0b81f786c2c7bd5ef66416 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 23 Feb 2022 12:58:21 +1100 Subject: [PATCH 241/380] powerpc/code-patching: Pre-map patch area Paul reported a warning with DEBUG_ATOMIC_SLEEP=y: BUG: sleeping function called from invalid context at include/linux/sched/mm.h:256 in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 1, name: swapper/0 preempt_count: 0, expected: 0 ... Call Trace: dump_stack_lvl+0xa0/0xec (unreliable) __might_resched+0x2f4/0x310 kmem_cache_alloc+0x220/0x4b0 __pud_alloc+0x74/0x1d0 hash__map_kernel_page+0x2cc/0x390 do_patch_instruction+0x134/0x4a0 arch_jump_label_transform+0x64/0x78 __jump_label_update+0x148/0x180 static_key_enable_cpuslocked+0xd0/0x120 static_key_enable+0x30/0x50 check_kvm_guest+0x60/0x88 pSeries_smp_probe+0x54/0xb0 smp_prepare_cpus+0x3e0/0x430 kernel_init_freeable+0x20c/0x43c kernel_init+0x30/0x1a0 ret_from_kernel_thread+0x5c/0x64 Peter pointed out that this is because do_patch_instruction() has disabled interrupts, but then map_patch_area() calls map_kernel_page() then hash__map_kernel_page() which does a sleeping memory allocation. We only see the warning in KVM guests with SMT enabled, which is not particularly common, or on other platforms if CONFIG_KPROBES is disabled, also not common. The reason we don't see it in most configurations is that another path that happens to have interrupts enabled has allocated the required page tables for us, eg. there's a path in kprobes init that does that. That's just pure luck though. As Christophe suggested, the simplest solution is to do a dummy map/unmap when we initialise the patching, so that any required page table levels are pre-allocated before the first call to do_patch_instruction(). This works because the unmap doesn't free any page tables that were allocated by the map, it just clears the PTE, leaving the page table levels there for the next map. Reported-by: Paul Menzel Debugged-by: Peter Zijlstra Suggested-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220223015821.473097-1-mpe@ellerman.id.au --- arch/powerpc/lib/code-patching.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 906d43463366..00c68e7fb11e 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -43,9 +43,14 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t instr) #ifdef CONFIG_STRICT_KERNEL_RWX static DEFINE_PER_CPU(struct vm_struct *, text_poke_area); +static int map_patch_area(void *addr, unsigned long text_poke_addr); +static void unmap_patch_area(unsigned long addr); + static int text_area_cpu_up(unsigned int cpu) { struct vm_struct *area; + unsigned long addr; + int err; area = get_vm_area(PAGE_SIZE, VM_ALLOC); if (!area) { @@ -53,6 +58,15 @@ static int text_area_cpu_up(unsigned int cpu) cpu); return -1; } + + // Map/unmap the area to ensure all page tables are pre-allocated + addr = (unsigned long)area->addr; + err = map_patch_area(empty_zero_page, addr); + if (err) + return err; + + unmap_patch_area(addr); + this_cpu_write(text_poke_area, area); return 0; From acd7408d2748533d767387cb4308692fba543658 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Mon, 14 Feb 2022 16:11:35 +0530 Subject: [PATCH 242/380] powerpc/bpf: Skip branch range validation during first pass During the first pass, addrs[] is still being populated. So, all branches to following instructions will appear to be going to the start of the JIT program. Ignore branch range validation for such instructions and assume those to be in range. Branch range validation will happen during the second pass after addrs[] is setup properly. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/bc517413d11636e20dbfc88503dad14bcbe391e2.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index b75507fc8f6b..25a7190bcee9 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -27,7 +27,7 @@ #define PPC_JMP(dest) \ do { \ long offset = (long)(dest) - (ctx->idx * 4); \ - if (!is_offset_in_branch_range(offset)) { \ + if ((dest) != 0 && !is_offset_in_branch_range(offset)) { \ pr_err_ratelimited("Branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ return -ERANGE; \ } \ @@ -41,7 +41,7 @@ #define PPC_BCC_SHORT(cond, dest) \ do { \ long offset = (long)(dest) - (ctx->idx * 4); \ - if (!is_offset_in_cond_branch_range(offset)) { \ + if ((dest) != 0 && !is_offset_in_cond_branch_range(offset)) { \ pr_err_ratelimited("Conditional branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ return -ERANGE; \ } \ From bafb5898de5d2f15133774cb049fe55720b9c92f Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Mon, 14 Feb 2022 16:11:36 +0530 Subject: [PATCH 243/380] powerpc/bpf: Emit a single branch instruction for known short branch ranges PPC_BCC() emits two instructions to accommodate scenarios where we need to branch outside the range of a conditional branch. PPC_BCC_SHORT() emits a single branch instruction and can be used when the branch is known to be within a conditional branch range. Convert some of the uses of PPC_BCC() in the powerpc BPF JIT over to PPC_BCC_SHORT() where we know the branch range. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/edbca01377d1d5f472868bf6d8962b0a0d85b96f.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit_comp32.c | 8 ++++---- arch/powerpc/net/bpf_jit_comp64.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 43643f1c1034..5ba5340a6387 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -229,7 +229,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o EMIT(PPC_RAW_LWZ(_R0, b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); EMIT(PPC_RAW_CMPLW(b2p_index, _R0)); EMIT(PPC_RAW_LWZ(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); - PPC_BCC(COND_GE, out); + PPC_BCC_SHORT(COND_GE, out); /* * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) @@ -238,7 +238,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o EMIT(PPC_RAW_CMPLWI(_R0, MAX_TAIL_CALL_CNT)); /* tail_call_cnt++; */ EMIT(PPC_RAW_ADDIC(_R0, _R0, 1)); - PPC_BCC(COND_GE, out); + PPC_BCC_SHORT(COND_GE, out); /* prog = array->ptrs[index]; */ EMIT(PPC_RAW_RLWINM(_R3, b2p_index, 2, 0, 29)); @@ -251,7 +251,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o * goto out; */ EMIT(PPC_RAW_CMPLWI(_R3, 0)); - PPC_BCC(COND_EQ, out); + PPC_BCC_SHORT(COND_EQ, out); /* goto *(prog->bpf_func + prologue_size); */ EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_prog, bpf_func))); @@ -842,7 +842,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (BPF_MODE(code) == BPF_PROBE_MEM) { PPC_LI32(_R0, TASK_SIZE - off); EMIT(PPC_RAW_CMPLW(src_reg, _R0)); - PPC_BCC(COND_GT, (ctx->idx + 5) * 4); + PPC_BCC_SHORT(COND_GT, (ctx->idx + 4) * 4); EMIT(PPC_RAW_LI(dst_reg, 0)); /* * For BPF_DW case, "li reg_h,0" would be needed when diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index e1e8c934308a..b1ed8611091d 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -225,7 +225,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o EMIT(PPC_RAW_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); EMIT(PPC_RAW_RLWINM(b2p_index, b2p_index, 0, 0, 31)); EMIT(PPC_RAW_CMPLW(b2p_index, b2p[TMP_REG_1])); - PPC_BCC(COND_GE, out); + PPC_BCC_SHORT(COND_GE, out); /* * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) @@ -233,7 +233,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o */ PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); EMIT(PPC_RAW_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT)); - PPC_BCC(COND_GE, out); + PPC_BCC_SHORT(COND_GE, out); /* * tail_call_cnt++; @@ -251,7 +251,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o * goto out; */ EMIT(PPC_RAW_CMPLDI(b2p[TMP_REG_1], 0)); - PPC_BCC(COND_EQ, out); + PPC_BCC_SHORT(COND_EQ, out); /* goto *(prog->bpf_func + prologue_size); */ PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); @@ -807,7 +807,7 @@ emit_clear: else /* BOOK3S_64 */ PPC_LI64(b2p[TMP_REG_2], PAGE_OFFSET); EMIT(PPC_RAW_CMPLD(b2p[TMP_REG_1], b2p[TMP_REG_2])); - PPC_BCC(COND_GT, (ctx->idx + 4) * 4); + PPC_BCC_SHORT(COND_GT, (ctx->idx + 3) * 4); EMIT(PPC_RAW_LI(dst_reg, 0)); /* * Check if 'off' is word aligned because PPC_BPF_LL() From 0ffdbce6f4a89bb7c0002904d6438ec83cf05ce7 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Mon, 14 Feb 2022 16:11:37 +0530 Subject: [PATCH 244/380] powerpc/bpf: Handle large branch ranges with BPF_EXIT In some scenarios, it is possible that the program epilogue is outside the branch range for a BPF_EXIT instruction. Instead of rejecting such programs, emit epilogue as an alternate exit point from the program. Track the location of the same so that subsequent exits can take either of the two paths. Reported-by: Jordan Niethe Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/33aa2e92645a92712be23b18035a2c6dcb92ff8d.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit.h | 2 ++ arch/powerpc/net/bpf_jit_comp.c | 22 +++++++++++++++++++++- arch/powerpc/net/bpf_jit_comp32.c | 7 +++++-- arch/powerpc/net/bpf_jit_comp64.c | 7 +++++-- 4 files changed, 33 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 25a7190bcee9..e58cf29bb0cf 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -148,6 +148,7 @@ struct codegen_context { unsigned int stack_size; int b2p[ARRAY_SIZE(b2p)]; unsigned int exentry_idx; + unsigned int alt_exit_addr; }; #ifdef CONFIG_PPC32 @@ -183,6 +184,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx); void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx); void bpf_jit_realloc_regs(struct codegen_context *ctx); +int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr); int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx, int insn_idx, int jmp_off, int dst_reg); diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 56dd1f4e3e44..141e64585b64 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -89,6 +89,22 @@ static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image, return 0; } +int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr) +{ + if (!exit_addr || is_offset_in_branch_range(exit_addr - (ctx->idx * 4))) { + PPC_JMP(exit_addr); + } else if (ctx->alt_exit_addr) { + if (WARN_ON(!is_offset_in_branch_range((long)ctx->alt_exit_addr - (ctx->idx * 4)))) + return -1; + PPC_JMP(ctx->alt_exit_addr); + } else { + ctx->alt_exit_addr = ctx->idx * 4; + bpf_jit_build_epilogue(image, ctx); + } + + return 0; +} + struct powerpc64_jit_data { struct bpf_binary_header *header; u32 *addrs; @@ -177,8 +193,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) * If we have seen a tail call, we need a second pass. * This is because bpf_jit_emit_common_epilogue() is called * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen. + * We also need a second pass if we ended up with too large + * a program so as to ensure BPF_EXIT branches are in range. */ - if (cgctx.seen & SEEN_TAILCALL) { + if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) { cgctx.idx = 0; if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0)) { fp = org_fp; @@ -193,6 +211,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) * calculate total size from idx. */ bpf_jit_build_prologue(0, &cgctx); + addrs[fp->len] = cgctx.idx * 4; bpf_jit_build_epilogue(0, &cgctx); fixup_len = fp->aux->num_exentries * BPF_FIXUP_LEN * 4; @@ -233,6 +252,7 @@ skip_init_ctx: for (pass = 1; pass < 3; pass++) { /* Now build the prologue, body code & epilogue for real. */ cgctx.idx = 0; + cgctx.alt_exit_addr = 0; bpf_jit_build_prologue(code_base, &cgctx); if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass)) { bpf_jit_binary_free(bpf_hdr); diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 5ba5340a6387..8e743b7bf8f5 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -937,8 +937,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * * the epilogue. If we _are_ the last instruction, * we'll just fall through to the epilogue. */ - if (i != flen - 1) - PPC_JMP(exit_addr); + if (i != flen - 1) { + ret = bpf_jit_emit_exit_insn(image, ctx, _R0, exit_addr); + if (ret) + return ret; + } /* else fall through to the epilogue */ break; diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index b1ed8611091d..371bd5a16859 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -871,8 +871,11 @@ emit_clear: * the epilogue. If we _are_ the last instruction, * we'll just fall through to the epilogue. */ - if (i != flen - 1) - PPC_JMP(exit_addr); + if (i != flen - 1) { + ret = bpf_jit_emit_exit_insn(image, ctx, b2p[TMP_REG_1], exit_addr); + if (ret) + return ret; + } /* else fall through to the epilogue */ break; From c2067f7f88830cdd020c775ffefe84a8177337af Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Mon, 14 Feb 2022 16:11:38 +0530 Subject: [PATCH 245/380] powerpc64/bpf: Do not save/restore LR on each call to bpf_stf_barrier() Instead of saving and restoring LR before each invocation to bpf_stf_barrier(), set SEEN_FUNC flag so that we save/restore LR in prologue/epilogue. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4446f25478d82a2a4ac9dab2ebdfd88ddf923eb7.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit_comp64.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 371bd5a16859..27ac2fc76702 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -690,11 +690,10 @@ emit_clear: EMIT(PPC_RAW_ORI(_R31, _R31, 0)); break; case STF_BARRIER_FALLBACK: - EMIT(PPC_RAW_MFLR(b2p[TMP_REG_1])); + ctx->seen |= SEEN_FUNC; PPC_LI64(12, dereference_kernel_function_descriptor(bpf_stf_barrier)); EMIT(PPC_RAW_MTCTR(12)); EMIT(PPC_RAW_BCTRL()); - EMIT(PPC_RAW_MTLR(b2p[TMP_REG_1])); break; case STF_BARRIER_NONE: break; From 1d4866d5652f7a19dcbed0c4e366c3402c7775b7 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Mon, 14 Feb 2022 16:11:39 +0530 Subject: [PATCH 246/380] powerpc64/bpf: Use r12 for constant blinding In preparation for preserving kernel toc in r2, switch BPF_REG_AX from r2 to r12. r12 is not used by bpf JIT except during external helper/bpf calls, or with BPF_NOSPEC. These sequences aren't emitted when BPF_REG_AX is used for constant blinding and other purposes. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e109f98617eacb4512c17a48525e94eda42889e6.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit64.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h index b63b35e45e55..82cdfee41278 100644 --- a/arch/powerpc/net/bpf_jit64.h +++ b/arch/powerpc/net/bpf_jit64.h @@ -56,7 +56,7 @@ const int b2p[MAX_BPF_JIT_REG + 2] = { /* frame pointer aka BPF_REG_10 */ [BPF_REG_FP] = 31, /* eBPF jit internal registers */ - [BPF_REG_AX] = 2, + [BPF_REG_AX] = 12, [TMP_REG_1] = 9, [TMP_REG_2] = 10 }; From 4eeac2b0aaadc3d1943d348d8565f7cfb93272b9 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Mon, 14 Feb 2022 16:11:40 +0530 Subject: [PATCH 247/380] powerpc64: Set PPC64_ELF_ABI_v[1|2] macros to 1 Set macros to 1 so that they can be used with __is_defined(). Suggested-by: Christophe Leroy Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/abad4868416ddfd42893f99c0cad8e5faf998095.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/include/asm/types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/types.h b/arch/powerpc/include/asm/types.h index 97da77bc48c9..84078c28c1a2 100644 --- a/arch/powerpc/include/asm/types.h +++ b/arch/powerpc/include/asm/types.h @@ -13,9 +13,9 @@ #ifdef __powerpc64__ #if defined(_CALL_ELF) && _CALL_ELF == 2 -#define PPC64_ELF_ABI_v2 +#define PPC64_ELF_ABI_v2 1 #else -#define PPC64_ELF_ABI_v1 +#define PPC64_ELF_ABI_v1 1 #endif #endif /* __powerpc64__ */ From b10cb163c4b31b03ac5014abbfd0b868913fd8e3 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Mon, 14 Feb 2022 16:11:41 +0530 Subject: [PATCH 248/380] powerpc64/bpf elfv2: Setup kernel TOC in r2 on entry In preparation for using kernel TOC, load the same in r2 on entry. With elfv1, the kernel TOC is already setup by our caller. We adjust the number of instructions to skip on a tail call accordingly. We get rid of the #ifdef in bpf_jit_emit_tail_call() since FUNCTION_DESCR_SIZE is itself under a #ifdef. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/18a05a4ceec14a8617c9dd4b7128d0afa83fd14e.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit_comp64.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 27ac2fc76702..44314ee60155 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -73,6 +73,9 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; + if (__is_defined(PPC64_ELF_ABI_v2)) + PPC_BPF_LL(_R2, _R13, offsetof(struct paca_struct, kernel_toc)); + /* * Initialize tail_call_cnt if we do tail calls. * Otherwise, put in NOPs so that it can be skipped when we are @@ -87,8 +90,6 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) EMIT(PPC_RAW_NOP()); } -#define BPF_TAILCALL_PROLOGUE_SIZE 8 - if (bpf_has_stack_frame(ctx)) { /* * We need a stack frame, but we don't necessarily need to @@ -217,6 +218,10 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o */ int b2p_bpf_array = b2p[BPF_REG_2]; int b2p_index = b2p[BPF_REG_3]; + int bpf_tailcall_prologue_size = 8; + + if (__is_defined(PPC64_ELF_ABI_v2)) + bpf_tailcall_prologue_size += 4; /* skip past the toc load */ /* * if (index >= array->map.max_entries) @@ -255,13 +260,8 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o /* goto *(prog->bpf_func + prologue_size); */ PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); -#ifdef PPC64_ELF_ABI_v1 - /* skip past the function descriptor */ EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], - FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE)); -#else - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE)); -#endif + FUNCTION_DESCR_SIZE + bpf_tailcall_prologue_size)); EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1])); /* tear down stack, restore NVRs, ... */ From 43d636f8b4fd2ee668e75e835fae2fcf4bc0f699 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Mon, 14 Feb 2022 16:11:42 +0530 Subject: [PATCH 249/380] powerpc64/bpf elfv1: Do not load TOC before calling functions BPF helpers always reside in core kernel and all BPF programs use the kernel TOC. As such, there is no need to load the TOC before calling helpers or other BPF functions. Drop code to do the same. Add a check to ensure we don't proceed if this assumption ever changes in future. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a3cd3da4d24d95d845cd10382b1af083600c9074.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit.h | 2 +- arch/powerpc/net/bpf_jit_comp.c | 4 +++- arch/powerpc/net/bpf_jit_comp32.c | 8 +++++-- arch/powerpc/net/bpf_jit_comp64.c | 39 ++++++++++++++++--------------- 4 files changed, 30 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index e58cf29bb0cf..ea384ae836cc 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -178,7 +178,7 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i) ctx->seen &= ~(1 << (31 - i)); } -void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); +int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, u32 *addrs, int pass); void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx); diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 141e64585b64..635f7448ff79 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -59,7 +59,9 @@ static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image, */ tmp_idx = ctx->idx; ctx->idx = addrs[i] / 4; - bpf_jit_emit_func_call_rel(image, ctx, func_addr); + ret = bpf_jit_emit_func_call_rel(image, ctx, func_addr); + if (ret) + return ret; /* * Restore ctx->idx here. This is safe as the length diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 8e743b7bf8f5..511d2a203e7d 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -193,7 +193,7 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) EMIT(PPC_RAW_BLR()); } -void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) +int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) { s32 rel = (s32)func - (s32)(image + ctx->idx); @@ -209,6 +209,8 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun EMIT(PPC_RAW_MTCTR(_R0)); EMIT(PPC_RAW_BCTRL()); } + + return 0; } static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) @@ -961,7 +963,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5), _R1, 12)); } - bpf_jit_emit_func_call_rel(image, ctx, func_addr); + ret = bpf_jit_emit_func_call_rel(image, ctx, func_addr); + if (ret) + return ret; EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0) - 1, _R3)); EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0), _R4)); diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 44314ee60155..e9fd4694226f 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -147,9 +147,13 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) EMIT(PPC_RAW_BLR()); } -static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, - u64 func) +static int bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, u64 func) { + unsigned long func_addr = func ? ppc_function_entry((void *)func) : 0; + + if (WARN_ON_ONCE(!core_kernel_text(func_addr))) + return -EINVAL; + #ifdef PPC64_ELF_ABI_v1 /* func points to the function descriptor */ PPC_LI64(b2p[TMP_REG_2], func); @@ -157,25 +161,23 @@ static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); /* ... and move it to CTR */ EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1])); - /* - * Load TOC from function descriptor at offset 8. - * We can clobber r2 since we get called through a - * function pointer (so caller will save/restore r2) - * and since we don't use a TOC ourself. - */ - PPC_BPF_LL(2, b2p[TMP_REG_2], 8); #else /* We can clobber r12 */ PPC_FUNC_ADDR(12, func); EMIT(PPC_RAW_MTCTR(12)); #endif EMIT(PPC_RAW_BCTRL()); + + return 0; } -void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) +int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) { unsigned int i, ctx_idx = ctx->idx; + if (WARN_ON_ONCE(func && is_module_text_address(func))) + return -EINVAL; + /* Load function address into r12 */ PPC_LI64(12, func); @@ -193,19 +195,14 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun EMIT(PPC_RAW_NOP()); #ifdef PPC64_ELF_ABI_v1 - /* - * Load TOC from function descriptor at offset 8. - * We can clobber r2 since we get called through a - * function pointer (so caller will save/restore r2) - * and since we don't use a TOC ourself. - */ - PPC_BPF_LL(2, 12, 8); /* Load actual entry point from function descriptor */ PPC_BPF_LL(12, 12, 0); #endif EMIT(PPC_RAW_MTCTR(12)); EMIT(PPC_RAW_BCTRL()); + + return 0; } static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) @@ -890,9 +887,13 @@ emit_clear: return ret; if (func_addr_fixed) - bpf_jit_emit_func_call_hlp(image, ctx, func_addr); + ret = bpf_jit_emit_func_call_hlp(image, ctx, func_addr); else - bpf_jit_emit_func_call_rel(image, ctx, func_addr); + ret = bpf_jit_emit_func_call_rel(image, ctx, func_addr); + + if (ret) + return ret; + /* move return value from r3 to BPF_REG_0 */ EMIT(PPC_RAW_MR(b2p[BPF_REG_0], 3)); break; From feb6307289d85262c5aed04d6f192d38abba7c45 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Mon, 14 Feb 2022 16:11:43 +0530 Subject: [PATCH 250/380] powerpc64/bpf: Optimize instruction sequence used for function calls When calling BPF helpers, we load the function address to call into a register. This can result in upto 5 instructions. Optimize this by instead using the kernel toc in r2 and adjusting offset to the BPF helper. This works since all BPF helpers are part of kernel text, and all BPF programs/functions utilize the kernel TOC. Further more: - load the actual function entry address in elf v1, rather than loading it through the function descriptor address. - load the Local Entry Point (LEP) in elf v2 skipping TOC setup. - consolidate code across elf abi v1 and v2 by using r12 on both. Reported-by: Anton Blanchard Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1233c7544e60dcb021c52b1f840b0f21a87b33ed.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit_comp64.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index e9fd4694226f..bff200723e72 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -150,22 +150,20 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) static int bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, u64 func) { unsigned long func_addr = func ? ppc_function_entry((void *)func) : 0; + long reladdr; if (WARN_ON_ONCE(!core_kernel_text(func_addr))) return -EINVAL; -#ifdef PPC64_ELF_ABI_v1 - /* func points to the function descriptor */ - PPC_LI64(b2p[TMP_REG_2], func); - /* Load actual entry point from function descriptor */ - PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); - /* ... and move it to CTR */ - EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1])); -#else - /* We can clobber r12 */ - PPC_FUNC_ADDR(12, func); - EMIT(PPC_RAW_MTCTR(12)); -#endif + reladdr = func_addr - kernel_toc_addr(); + if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { + pr_err("eBPF: address of %ps out of range of kernel_toc.\n", (void *)func); + return -ERANGE; + } + + EMIT(PPC_RAW_ADDIS(_R12, _R2, PPC_HA(reladdr))); + EMIT(PPC_RAW_ADDI(_R12, _R12, PPC_LO(reladdr))); + EMIT(PPC_RAW_MTCTR(_R12)); EMIT(PPC_RAW_BCTRL()); return 0; @@ -178,6 +176,9 @@ int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func if (WARN_ON_ONCE(func && is_module_text_address(func))) return -EINVAL; + /* skip past descriptor if elf v1 */ + func += FUNCTION_DESCR_SIZE; + /* Load function address into r12 */ PPC_LI64(12, func); @@ -194,11 +195,6 @@ int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func for (i = ctx->idx - ctx_idx; i < 5; i++) EMIT(PPC_RAW_NOP()); -#ifdef PPC64_ELF_ABI_v1 - /* Load actual entry point from function descriptor */ - PPC_BPF_LL(12, 12, 0); -#endif - EMIT(PPC_RAW_MTCTR(12)); EMIT(PPC_RAW_BCTRL()); From 74bbe3f08463c48a27088be4823a5803b7f7d9a1 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Mon, 14 Feb 2022 16:11:44 +0530 Subject: [PATCH 251/380] powerpc/bpf: Rename PPC_BL_ABS() to PPC_BL() PPC_BL_ABS() is just doing a relative branch with link. The name suggests that it is for branching to an absolute address, which is incorrect. Rename the macro to a more appropriate PPC_BL(). Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f0e57b6c7a6ee40dba645535b70da46f46e8af5e.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit.h | 6 +++--- arch/powerpc/net/bpf_jit_comp32.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index ea384ae836cc..dd1b338ba064 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -34,9 +34,9 @@ EMIT(PPC_RAW_BRANCH(offset)); \ } while (0) -/* blr; (unconditional 'branch' with link) to absolute address */ -#define PPC_BL_ABS(dest) EMIT(PPC_INST_BL | \ - (((dest) - (unsigned long)(image + ctx->idx)) & 0x03fffffc)) +/* bl (unconditional 'branch' with link) */ +#define PPC_BL(dest) EMIT(PPC_INST_BL | (((dest) - (unsigned long)(image + ctx->idx)) & 0x03fffffc)) + /* "cond" here covers BO:BI fields. */ #define PPC_BCC_SHORT(cond, dest) \ do { \ diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 511d2a203e7d..b72fac52c3ca 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -198,7 +198,7 @@ int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func s32 rel = (s32)func - (s32)(image + ctx->idx); if (image && rel < 0x2000000 && rel >= -0x2000000) { - PPC_BL_ABS(func); + PPC_BL(func); EMIT(PPC_RAW_NOP()); EMIT(PPC_RAW_NOP()); EMIT(PPC_RAW_NOP()); From 391c271f4deb7356482d12f962a6fc018b6a3fb0 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Mon, 14 Feb 2022 16:11:45 +0530 Subject: [PATCH 252/380] powerpc64/bpf: Convert some of the uses of PPC_BPF_[LL|STL] to PPC_BPF_[LD|STD] PPC_BPF_[LL|STL] are macros meant for scenarios where we may have to deal with a non-word aligned offset. Limit their usage to only those scenarios by converting the rest to just use PPC_BPF_[LD|STD]. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0eb472428165a307f6fdaf22b0c33cbf13a9a635.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit_comp64.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index bff200723e72..411ac41dba42 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -74,7 +74,7 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) int i; if (__is_defined(PPC64_ELF_ABI_v2)) - PPC_BPF_LL(_R2, _R13, offsetof(struct paca_struct, kernel_toc)); + EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc))); /* * Initialize tail_call_cnt if we do tail calls. @@ -84,7 +84,7 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) if (ctx->seen & SEEN_TAILCALL) { EMIT(PPC_RAW_LI(b2p[TMP_REG_1], 0)); /* this goes in the redzone */ - PPC_BPF_STL(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8)); + EMIT(PPC_RAW_STD(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8))); } else { EMIT(PPC_RAW_NOP()); EMIT(PPC_RAW_NOP()); @@ -97,7 +97,7 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) */ if (ctx->seen & SEEN_FUNC) { EMIT(PPC_RAW_MFLR(_R0)); - PPC_BPF_STL(0, 1, PPC_LR_STKOFF); + EMIT(PPC_RAW_STD(0, 1, PPC_LR_STKOFF)); } PPC_BPF_STLU(1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size)); @@ -110,7 +110,7 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) if (bpf_is_seen_register(ctx, b2p[i])) - PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); + EMIT(PPC_RAW_STD(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]))); /* Setup frame pointer to point to the bpf stack area */ if (bpf_is_seen_register(ctx, b2p[BPF_REG_FP])) @@ -125,13 +125,13 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx /* Restore NVRs */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) if (bpf_is_seen_register(ctx, b2p[i])) - PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); + EMIT(PPC_RAW_LD(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]))); /* Tear down our stack frame */ if (bpf_has_stack_frame(ctx)) { EMIT(PPC_RAW_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size)); if (ctx->seen & SEEN_FUNC) { - PPC_BPF_LL(0, 1, PPC_LR_STKOFF); + EMIT(PPC_RAW_LD(0, 1, PPC_LR_STKOFF)); EMIT(PPC_RAW_MTLR(0)); } } @@ -229,7 +229,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) * goto out; */ - PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); + EMIT(PPC_RAW_LD(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx))); EMIT(PPC_RAW_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT)); PPC_BCC_SHORT(COND_GE, out); @@ -237,12 +237,12 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o * tail_call_cnt++; */ EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1)); - PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); + EMIT(PPC_RAW_STD(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx))); /* prog = array->ptrs[index]; */ EMIT(PPC_RAW_MULI(b2p[TMP_REG_1], b2p_index, 8)); EMIT(PPC_RAW_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array)); - PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs)); + EMIT(PPC_RAW_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs))); /* * if (prog == NULL) @@ -252,7 +252,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o PPC_BCC_SHORT(COND_EQ, out); /* goto *(prog->bpf_func + prologue_size); */ - PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); + EMIT(PPC_RAW_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func))); EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], FUNCTION_DESCR_SIZE + bpf_tailcall_prologue_size)); EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1])); @@ -628,7 +628,7 @@ bpf_alu32_trunc: break; case 64: /* Store the value to stack and then use byte-reverse loads */ - PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx)); + EMIT(PPC_RAW_STD(dst_reg, 1, bpf_jit_stack_local(ctx))); EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx))); if (cpu_has_feature(CPU_FTR_ARCH_206)) { EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1])); From 794abc08d75e9f2833f493090af14b748e182c5f Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Mon, 14 Feb 2022 16:11:46 +0530 Subject: [PATCH 253/380] powerpc64/bpf: Get rid of PPC_BPF_[LL|STL|STLU] macros All these macros now have a single user. Expand their usage in place. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e0526fc7633a34f983a7a330712b55bdfaf20482.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit64.h | 22 ---------------------- arch/powerpc/net/bpf_jit_comp64.c | 21 +++++++++++++++------ 2 files changed, 15 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h index 82cdfee41278..199348b72966 100644 --- a/arch/powerpc/net/bpf_jit64.h +++ b/arch/powerpc/net/bpf_jit64.h @@ -64,28 +64,6 @@ const int b2p[MAX_BPF_JIT_REG + 2] = { /* PPC NVR range -- update this if we ever use NVRs below r27 */ #define BPF_PPC_NVR_MIN 27 -/* - * WARNING: These can use TMP_REG_2 if the offset is not at word boundary, - * so ensure that it isn't in use already. - */ -#define PPC_BPF_LL(r, base, i) do { \ - if ((i) % 4) { \ - EMIT(PPC_RAW_LI(b2p[TMP_REG_2], (i)));\ - EMIT(PPC_RAW_LDX(r, base, \ - b2p[TMP_REG_2])); \ - } else \ - EMIT(PPC_RAW_LD(r, base, i)); \ - } while(0) -#define PPC_BPF_STL(r, base, i) do { \ - if ((i) % 4) { \ - EMIT(PPC_RAW_LI(b2p[TMP_REG_2], (i)));\ - EMIT(PPC_RAW_STDX(r, base, \ - b2p[TMP_REG_2])); \ - } else \ - EMIT(PPC_RAW_STD(r, base, i)); \ - } while(0) -#define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STDU(r, base, i)); } while(0) - #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 411ac41dba42..eeda636cd7be 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -100,7 +100,7 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) EMIT(PPC_RAW_STD(0, 1, PPC_LR_STKOFF)); } - PPC_BPF_STLU(1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size)); + EMIT(PPC_RAW_STDU(1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size))); } /* @@ -726,7 +726,12 @@ emit_clear: PPC_LI32(b2p[TMP_REG_1], imm); src_reg = b2p[TMP_REG_1]; } - PPC_BPF_STL(src_reg, dst_reg, off); + if (off % 4) { + EMIT(PPC_RAW_LI(b2p[TMP_REG_2], off)); + EMIT(PPC_RAW_STDX(src_reg, dst_reg, b2p[TMP_REG_2])); + } else { + EMIT(PPC_RAW_STD(src_reg, dst_reg, off)); + } break; /* @@ -802,9 +807,8 @@ emit_clear: PPC_BCC_SHORT(COND_GT, (ctx->idx + 3) * 4); EMIT(PPC_RAW_LI(dst_reg, 0)); /* - * Check if 'off' is word aligned because PPC_BPF_LL() - * (BPF_DW case) generates two instructions if 'off' is not - * word-aligned and one instruction otherwise. + * Check if 'off' is word aligned for BPF_DW, because + * we might generate two instructions. */ if (BPF_SIZE(code) == BPF_DW && (off & 3)) PPC_JMP((ctx->idx + 3) * 4); @@ -823,7 +827,12 @@ emit_clear: EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); break; case BPF_DW: - PPC_BPF_LL(dst_reg, src_reg, off); + if (off % 4) { + EMIT(PPC_RAW_LI(b2p[TMP_REG_1], off)); + EMIT(PPC_RAW_LDX(dst_reg, src_reg, b2p[TMP_REG_1])); + } else { + EMIT(PPC_RAW_LD(dst_reg, src_reg, off)); + } break; } From 7b187dcdb5d348aa916dcda769313512c08e85a5 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Mon, 14 Feb 2022 16:11:47 +0530 Subject: [PATCH 254/380] powerpc/bpf: Cleanup bpf_jit.h - PPC_EX32() is only used by ppc32 JIT. Move it to bpf_jit_comp32.c - PPC_LI64() is only valid in ppc64. #ifdef it - PPC_FUNC_ADDR() is not used anymore. Remove it. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/58f5b66b2f8546bbbee620f62103a8e97a63eb7c.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit.h | 10 +--------- arch/powerpc/net/bpf_jit_comp32.c | 2 ++ 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index dd1b338ba064..42a9adda31eb 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -59,10 +59,7 @@ EMIT(PPC_RAW_ORI(d, d, IMM_L(i))); \ } } while(0) -#ifdef CONFIG_PPC32 -#define PPC_EX32(r, i) EMIT(PPC_RAW_LI((r), (i) < 0 ? -1 : 0)) -#endif - +#ifdef CONFIG_PPC64 #define PPC_LI64(d, i) do { \ if ((long)(i) >= -2147483648 && \ (long)(i) < 2147483648) \ @@ -85,11 +82,6 @@ EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) & \ 0xffff)); \ } } while (0) - -#ifdef CONFIG_PPC64 -#define PPC_FUNC_ADDR(d,i) do { PPC_LI64(d, i); } while(0) -#else -#define PPC_FUNC_ADDR(d,i) do { PPC_LI32(d, i); } while(0) #endif /* diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index b72fac52c3ca..1dda7e3a3e9b 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -36,6 +36,8 @@ /* BPF register usage */ #define TMP_REG (MAX_BPF_JIT_REG + 0) +#define PPC_EX32(r, i) EMIT(PPC_RAW_LI((r), (i) < 0 ? -1 : 0)) + /* BPF to ppc register mappings */ const int b2p[MAX_BPF_JIT_REG + 1] = { /* function return value */ From 576a6c3a00c1a2a3645e039b126b52f6c7755e54 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Mon, 14 Feb 2022 16:11:48 +0530 Subject: [PATCH 255/380] powerpc/bpf: Move bpf_jit64.h into bpf_jit_comp64.c There is no need for a separate header anymore. Move the contents of bpf_jit64.h into bpf_jit_comp64.c Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b873a8e6eff7d91bf2a2cabdd53082aadfe20761.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit64.h | 69 ------------------------------- arch/powerpc/net/bpf_jit_comp64.c | 54 +++++++++++++++++++++++- 2 files changed, 53 insertions(+), 70 deletions(-) delete mode 100644 arch/powerpc/net/bpf_jit64.h diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h deleted file mode 100644 index 199348b72966..000000000000 --- a/arch/powerpc/net/bpf_jit64.h +++ /dev/null @@ -1,69 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * bpf_jit64.h: BPF JIT compiler for PPC64 - * - * Copyright 2016 Naveen N. Rao - * IBM Corporation - */ -#ifndef _BPF_JIT64_H -#define _BPF_JIT64_H - -#include "bpf_jit.h" - -/* - * Stack layout: - * Ensure the top half (upto local_tmp_var) stays consistent - * with our redzone usage. - * - * [ prev sp ] <------------- - * [ nv gpr save area ] 5*8 | - * [ tail_call_cnt ] 8 | - * [ local_tmp_var ] 16 | - * fp (r31) --> [ ebpf stack space ] upto 512 | - * [ frame header ] 32/112 | - * sp (r1) ---> [ stack pointer ] -------------- - */ - -/* for gpr non volatile registers BPG_REG_6 to 10 */ -#define BPF_PPC_STACK_SAVE (5*8) -/* for bpf JIT code internal usage */ -#define BPF_PPC_STACK_LOCALS 24 -/* stack frame excluding BPF stack, ensure this is quadword aligned */ -#define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \ - BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE) - -#ifndef __ASSEMBLY__ - -/* BPF register usage */ -#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) -#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) - -/* BPF to ppc register mappings */ -const int b2p[MAX_BPF_JIT_REG + 2] = { - /* function return value */ - [BPF_REG_0] = 8, - /* function arguments */ - [BPF_REG_1] = 3, - [BPF_REG_2] = 4, - [BPF_REG_3] = 5, - [BPF_REG_4] = 6, - [BPF_REG_5] = 7, - /* non volatile registers */ - [BPF_REG_6] = 27, - [BPF_REG_7] = 28, - [BPF_REG_8] = 29, - [BPF_REG_9] = 30, - /* frame pointer aka BPF_REG_10 */ - [BPF_REG_FP] = 31, - /* eBPF jit internal registers */ - [BPF_REG_AX] = 12, - [TMP_REG_1] = 9, - [TMP_REG_2] = 10 -}; - -/* PPC NVR range -- update this if we ever use NVRs below r27 */ -#define BPF_PPC_NVR_MIN 27 - -#endif /* !__ASSEMBLY__ */ - -#endif diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index eeda636cd7be..3e4ed5560947 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -17,7 +17,59 @@ #include #include -#include "bpf_jit64.h" +#include "bpf_jit.h" + +/* + * Stack layout: + * Ensure the top half (upto local_tmp_var) stays consistent + * with our redzone usage. + * + * [ prev sp ] <------------- + * [ nv gpr save area ] 5*8 | + * [ tail_call_cnt ] 8 | + * [ local_tmp_var ] 16 | + * fp (r31) --> [ ebpf stack space ] upto 512 | + * [ frame header ] 32/112 | + * sp (r1) ---> [ stack pointer ] -------------- + */ + +/* for gpr non volatile registers BPG_REG_6 to 10 */ +#define BPF_PPC_STACK_SAVE (5*8) +/* for bpf JIT code internal usage */ +#define BPF_PPC_STACK_LOCALS 24 +/* stack frame excluding BPF stack, ensure this is quadword aligned */ +#define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \ + BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE) + +/* BPF register usage */ +#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) +#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) + +/* BPF to ppc register mappings */ +const int b2p[MAX_BPF_JIT_REG + 2] = { + /* function return value */ + [BPF_REG_0] = 8, + /* function arguments */ + [BPF_REG_1] = 3, + [BPF_REG_2] = 4, + [BPF_REG_3] = 5, + [BPF_REG_4] = 6, + [BPF_REG_5] = 7, + /* non volatile registers */ + [BPF_REG_6] = 27, + [BPF_REG_7] = 28, + [BPF_REG_8] = 29, + [BPF_REG_9] = 30, + /* frame pointer aka BPF_REG_10 */ + [BPF_REG_FP] = 31, + /* eBPF jit internal registers */ + [BPF_REG_AX] = 12, + [TMP_REG_1] = 9, + [TMP_REG_2] = 10 +}; + +/* PPC NVR range -- update this if we ever use NVRs below r27 */ +#define BPF_PPC_NVR_MIN 27 static inline bool bpf_has_stack_frame(struct codegen_context *ctx) { From 036d559c0bdea75bf4840ba6780790d08572480c Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Mon, 14 Feb 2022 16:11:49 +0530 Subject: [PATCH 256/380] powerpc/bpf: Use _Rn macros for GPRs Use _Rn macros to specify register names to make their usage clear. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7df626b8cdc6141d4295ac16137c82ad570b6637.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit_comp32.c | 30 +++++++------- arch/powerpc/net/bpf_jit_comp64.c | 68 +++++++++++++++---------------- 2 files changed, 49 insertions(+), 49 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 1dda7e3a3e9b..1c86b489232a 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -41,23 +41,23 @@ /* BPF to ppc register mappings */ const int b2p[MAX_BPF_JIT_REG + 1] = { /* function return value */ - [BPF_REG_0] = 12, + [BPF_REG_0] = _R12, /* function arguments */ - [BPF_REG_1] = 4, - [BPF_REG_2] = 6, - [BPF_REG_3] = 8, - [BPF_REG_4] = 10, - [BPF_REG_5] = 22, + [BPF_REG_1] = _R4, + [BPF_REG_2] = _R6, + [BPF_REG_3] = _R8, + [BPF_REG_4] = _R10, + [BPF_REG_5] = _R22, /* non volatile registers */ - [BPF_REG_6] = 24, - [BPF_REG_7] = 26, - [BPF_REG_8] = 28, - [BPF_REG_9] = 30, + [BPF_REG_6] = _R24, + [BPF_REG_7] = _R26, + [BPF_REG_8] = _R28, + [BPF_REG_9] = _R30, /* frame pointer aka BPF_REG_10 */ - [BPF_REG_FP] = 18, + [BPF_REG_FP] = _R18, /* eBPF jit internal registers */ - [BPF_REG_AX] = 20, - [TMP_REG] = 31, /* 32 bits */ + [BPF_REG_AX] = _R20, + [TMP_REG] = _R31, /* 32 bits */ }; static int bpf_to_ppc(struct codegen_context *ctx, int reg) @@ -66,8 +66,8 @@ static int bpf_to_ppc(struct codegen_context *ctx, int reg) } /* PPC NVR range -- update this if we ever use NVRs below r17 */ -#define BPF_PPC_NVR_MIN 17 -#define BPF_PPC_TC 16 +#define BPF_PPC_NVR_MIN _R17 +#define BPF_PPC_TC _R16 static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) { diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 3e4ed5560947..ac06efa70223 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -48,28 +48,28 @@ /* BPF to ppc register mappings */ const int b2p[MAX_BPF_JIT_REG + 2] = { /* function return value */ - [BPF_REG_0] = 8, + [BPF_REG_0] = _R8, /* function arguments */ - [BPF_REG_1] = 3, - [BPF_REG_2] = 4, - [BPF_REG_3] = 5, - [BPF_REG_4] = 6, - [BPF_REG_5] = 7, + [BPF_REG_1] = _R3, + [BPF_REG_2] = _R4, + [BPF_REG_3] = _R5, + [BPF_REG_4] = _R6, + [BPF_REG_5] = _R7, /* non volatile registers */ - [BPF_REG_6] = 27, - [BPF_REG_7] = 28, - [BPF_REG_8] = 29, - [BPF_REG_9] = 30, + [BPF_REG_6] = _R27, + [BPF_REG_7] = _R28, + [BPF_REG_8] = _R29, + [BPF_REG_9] = _R30, /* frame pointer aka BPF_REG_10 */ - [BPF_REG_FP] = 31, + [BPF_REG_FP] = _R31, /* eBPF jit internal registers */ - [BPF_REG_AX] = 12, - [TMP_REG_1] = 9, - [TMP_REG_2] = 10 + [BPF_REG_AX] = _R12, + [TMP_REG_1] = _R9, + [TMP_REG_2] = _R10 }; /* PPC NVR range -- update this if we ever use NVRs below r27 */ -#define BPF_PPC_NVR_MIN 27 +#define BPF_PPC_NVR_MIN _R27 static inline bool bpf_has_stack_frame(struct codegen_context *ctx) { @@ -136,7 +136,7 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) if (ctx->seen & SEEN_TAILCALL) { EMIT(PPC_RAW_LI(b2p[TMP_REG_1], 0)); /* this goes in the redzone */ - EMIT(PPC_RAW_STD(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8))); + EMIT(PPC_RAW_STD(b2p[TMP_REG_1], _R1, -(BPF_PPC_STACK_SAVE + 8))); } else { EMIT(PPC_RAW_NOP()); EMIT(PPC_RAW_NOP()); @@ -149,10 +149,10 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) */ if (ctx->seen & SEEN_FUNC) { EMIT(PPC_RAW_MFLR(_R0)); - EMIT(PPC_RAW_STD(0, 1, PPC_LR_STKOFF)); + EMIT(PPC_RAW_STD(_R0, _R1, PPC_LR_STKOFF)); } - EMIT(PPC_RAW_STDU(1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size))); + EMIT(PPC_RAW_STDU(_R1, _R1, -(BPF_PPC_STACKFRAME + ctx->stack_size))); } /* @@ -162,11 +162,11 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) if (bpf_is_seen_register(ctx, b2p[i])) - EMIT(PPC_RAW_STD(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]))); + EMIT(PPC_RAW_STD(b2p[i], _R1, bpf_jit_stack_offsetof(ctx, b2p[i]))); /* Setup frame pointer to point to the bpf stack area */ if (bpf_is_seen_register(ctx, b2p[BPF_REG_FP])) - EMIT(PPC_RAW_ADDI(b2p[BPF_REG_FP], 1, + EMIT(PPC_RAW_ADDI(b2p[BPF_REG_FP], _R1, STACK_FRAME_MIN_SIZE + ctx->stack_size)); } @@ -177,14 +177,14 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx /* Restore NVRs */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) if (bpf_is_seen_register(ctx, b2p[i])) - EMIT(PPC_RAW_LD(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]))); + EMIT(PPC_RAW_LD(b2p[i], _R1, bpf_jit_stack_offsetof(ctx, b2p[i]))); /* Tear down our stack frame */ if (bpf_has_stack_frame(ctx)) { - EMIT(PPC_RAW_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size)); + EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME + ctx->stack_size)); if (ctx->seen & SEEN_FUNC) { - EMIT(PPC_RAW_LD(0, 1, PPC_LR_STKOFF)); - EMIT(PPC_RAW_MTLR(0)); + EMIT(PPC_RAW_LD(_R0, _R1, PPC_LR_STKOFF)); + EMIT(PPC_RAW_MTLR(_R0)); } } } @@ -194,7 +194,7 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) bpf_jit_emit_common_epilogue(image, ctx); /* Move result to r3 */ - EMIT(PPC_RAW_MR(3, b2p[BPF_REG_0])); + EMIT(PPC_RAW_MR(_R3, b2p[BPF_REG_0])); EMIT(PPC_RAW_BLR()); } @@ -232,7 +232,7 @@ int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func func += FUNCTION_DESCR_SIZE; /* Load function address into r12 */ - PPC_LI64(12, func); + PPC_LI64(_R12, func); /* For bpf-to-bpf function calls, the callee's address is unknown * until the last extra pass. As seen above, we use PPC_LI64() to @@ -247,7 +247,7 @@ int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func for (i = ctx->idx - ctx_idx; i < 5; i++) EMIT(PPC_RAW_NOP()); - EMIT(PPC_RAW_MTCTR(12)); + EMIT(PPC_RAW_MTCTR(_R12)); EMIT(PPC_RAW_BCTRL()); return 0; @@ -281,7 +281,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) * goto out; */ - EMIT(PPC_RAW_LD(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx))); + EMIT(PPC_RAW_LD(b2p[TMP_REG_1], _R1, bpf_jit_stack_tailcallcnt(ctx))); EMIT(PPC_RAW_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT)); PPC_BCC_SHORT(COND_GE, out); @@ -289,7 +289,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o * tail_call_cnt++; */ EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1)); - EMIT(PPC_RAW_STD(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx))); + EMIT(PPC_RAW_STD(b2p[TMP_REG_1], _R1, bpf_jit_stack_tailcallcnt(ctx))); /* prog = array->ptrs[index]; */ EMIT(PPC_RAW_MULI(b2p[TMP_REG_1], b2p_index, 8)); @@ -680,8 +680,8 @@ bpf_alu32_trunc: break; case 64: /* Store the value to stack and then use byte-reverse loads */ - EMIT(PPC_RAW_STD(dst_reg, 1, bpf_jit_stack_local(ctx))); - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx))); + EMIT(PPC_RAW_STD(dst_reg, _R1, bpf_jit_stack_local(ctx))); + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], _R1, bpf_jit_stack_local(ctx))); if (cpu_has_feature(CPU_FTR_ARCH_206)) { EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1])); } else { @@ -736,8 +736,8 @@ emit_clear: break; case STF_BARRIER_FALLBACK: ctx->seen |= SEEN_FUNC; - PPC_LI64(12, dereference_kernel_function_descriptor(bpf_stf_barrier)); - EMIT(PPC_RAW_MTCTR(12)); + PPC_LI64(_R12, dereference_kernel_function_descriptor(bpf_stf_barrier)); + EMIT(PPC_RAW_MTCTR(_R12)); EMIT(PPC_RAW_BCTRL()); break; case STF_BARRIER_NONE: @@ -952,7 +952,7 @@ emit_clear: return ret; /* move return value from r3 to BPF_REG_0 */ - EMIT(PPC_RAW_MR(b2p[BPF_REG_0], 3)); + EMIT(PPC_RAW_MR(b2p[BPF_REG_0], _R3)); break; /* From 3a3fc9bf103974d9a886fa37087d5d491c806e00 Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Mon, 14 Feb 2022 16:11:50 +0530 Subject: [PATCH 257/380] powerpc64/bpf: Store temp registers' bpf to ppc mapping In bpf_jit_build_body(), the mapping of TMP_REG_1 and TMP_REG_2's bpf register to ppc register is evalulated at every use despite not changing. Instead, determine the ppc register once and store the result. Signed-off-by: Jordan Niethe [Rebased, converted additional usage sites] Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0944e2f0fa6dd254ea401f1c946fb6c9a5294278.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit_comp64.c | 197 +++++++++++++----------------- 1 file changed, 86 insertions(+), 111 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index ac06efa70223..b4de0c35c8a4 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -357,6 +357,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * u32 dst_reg = b2p[insn[i].dst_reg]; u32 src_reg = b2p[insn[i].src_reg]; u32 size = BPF_SIZE(code); + u32 tmp1_reg = b2p[TMP_REG_1]; + u32 tmp2_reg = b2p[TMP_REG_2]; s16 off = insn[i].off; s32 imm = insn[i].imm; bool func_addr_fixed; @@ -407,8 +409,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * } else if (imm >= -32768 && imm < 32768) { EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm))); } else { - PPC_LI32(b2p[TMP_REG_1], imm); - EMIT(PPC_RAW_ADD(dst_reg, dst_reg, b2p[TMP_REG_1])); + PPC_LI32(tmp1_reg, imm); + EMIT(PPC_RAW_ADD(dst_reg, dst_reg, tmp1_reg)); } goto bpf_alu32_trunc; case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ @@ -418,8 +420,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * } else if (imm > -32768 && imm <= 32768) { EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(-imm))); } else { - PPC_LI32(b2p[TMP_REG_1], imm); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); + PPC_LI32(tmp1_reg, imm); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg)); } goto bpf_alu32_trunc; case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ @@ -434,32 +436,28 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (imm >= -32768 && imm < 32768) EMIT(PPC_RAW_MULI(dst_reg, dst_reg, IMM_L(imm))); else { - PPC_LI32(b2p[TMP_REG_1], imm); + PPC_LI32(tmp1_reg, imm); if (BPF_CLASS(code) == BPF_ALU) - EMIT(PPC_RAW_MULW(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, tmp1_reg)); else - EMIT(PPC_RAW_MULD(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_MULD(dst_reg, dst_reg, tmp1_reg)); } goto bpf_alu32_trunc; case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ if (BPF_OP(code) == BPF_MOD) { - EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg)); - EMIT(PPC_RAW_MULW(b2p[TMP_REG_1], src_reg, - b2p[TMP_REG_1])); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVWU(tmp1_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_MULW(tmp1_reg, src_reg, tmp1_reg)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg)); } else EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ if (BPF_OP(code) == BPF_MOD) { - EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_1], dst_reg, src_reg)); - EMIT(PPC_RAW_MULD(b2p[TMP_REG_1], src_reg, - b2p[TMP_REG_1])); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVDU(tmp1_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_MULD(tmp1_reg, src_reg, tmp1_reg)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg)); } else EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, src_reg)); break; @@ -478,35 +476,23 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * } } - PPC_LI32(b2p[TMP_REG_1], imm); + PPC_LI32(tmp1_reg, imm); switch (BPF_CLASS(code)) { case BPF_ALU: if (BPF_OP(code) == BPF_MOD) { - EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_2], - dst_reg, - b2p[TMP_REG_1])); - EMIT(PPC_RAW_MULW(b2p[TMP_REG_1], - b2p[TMP_REG_1], - b2p[TMP_REG_2])); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVWU(tmp2_reg, dst_reg, tmp1_reg)); + EMIT(PPC_RAW_MULW(tmp1_reg, tmp1_reg, tmp2_reg)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg)); } else - EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, tmp1_reg)); break; case BPF_ALU64: if (BPF_OP(code) == BPF_MOD) { - EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_2], - dst_reg, - b2p[TMP_REG_1])); - EMIT(PPC_RAW_MULD(b2p[TMP_REG_1], - b2p[TMP_REG_1], - b2p[TMP_REG_2])); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVDU(tmp2_reg, dst_reg, tmp1_reg)); + EMIT(PPC_RAW_MULD(tmp1_reg, tmp1_reg, tmp2_reg)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg)); } else - EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, tmp1_reg)); break; } goto bpf_alu32_trunc; @@ -528,8 +514,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm))); else { /* Sign-extended */ - PPC_LI32(b2p[TMP_REG_1], imm); - EMIT(PPC_RAW_AND(dst_reg, dst_reg, b2p[TMP_REG_1])); + PPC_LI32(tmp1_reg, imm); + EMIT(PPC_RAW_AND(dst_reg, dst_reg, tmp1_reg)); } goto bpf_alu32_trunc; case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */ @@ -540,8 +526,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */ if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { /* Sign-extended */ - PPC_LI32(b2p[TMP_REG_1], imm); - EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_1])); + PPC_LI32(tmp1_reg, imm); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp1_reg)); } else { if (IMM_L(imm)) EMIT(PPC_RAW_ORI(dst_reg, dst_reg, IMM_L(imm))); @@ -557,8 +543,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */ if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { /* Sign-extended */ - PPC_LI32(b2p[TMP_REG_1], imm); - EMIT(PPC_RAW_XOR(dst_reg, dst_reg, b2p[TMP_REG_1])); + PPC_LI32(tmp1_reg, imm); + EMIT(PPC_RAW_XOR(dst_reg, dst_reg, tmp1_reg)); } else { if (IMM_L(imm)) EMIT(PPC_RAW_XORI(dst_reg, dst_reg, IMM_L(imm))); @@ -659,11 +645,11 @@ bpf_alu32_trunc: switch (imm) { case 16: /* Rotate 8 bits left & mask with 0x0000ff00 */ - EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23)); + EMIT(PPC_RAW_RLWINM(tmp1_reg, dst_reg, 8, 16, 23)); /* Rotate 8 bits right & insert LSB to reg */ - EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31)); + EMIT(PPC_RAW_RLWIMI(tmp1_reg, dst_reg, 24, 24, 31)); /* Move result back to dst_reg */ - EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); + EMIT(PPC_RAW_MR(dst_reg, tmp1_reg)); break; case 32: /* @@ -671,28 +657,28 @@ bpf_alu32_trunc: * 2 bytes are already in their final position * -- byte 2 and 4 (of bytes 1, 2, 3 and 4) */ - EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31)); + EMIT(PPC_RAW_RLWINM(tmp1_reg, dst_reg, 8, 0, 31)); /* Rotate 24 bits and insert byte 1 */ - EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7)); + EMIT(PPC_RAW_RLWIMI(tmp1_reg, dst_reg, 24, 0, 7)); /* Rotate 24 bits and insert byte 3 */ - EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23)); - EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); + EMIT(PPC_RAW_RLWIMI(tmp1_reg, dst_reg, 24, 16, 23)); + EMIT(PPC_RAW_MR(dst_reg, tmp1_reg)); break; case 64: /* Store the value to stack and then use byte-reverse loads */ EMIT(PPC_RAW_STD(dst_reg, _R1, bpf_jit_stack_local(ctx))); - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], _R1, bpf_jit_stack_local(ctx))); + EMIT(PPC_RAW_ADDI(tmp1_reg, _R1, bpf_jit_stack_local(ctx))); if (cpu_has_feature(CPU_FTR_ARCH_206)) { - EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1])); + EMIT(PPC_RAW_LDBRX(dst_reg, 0, tmp1_reg)); } else { - EMIT(PPC_RAW_LWBRX(dst_reg, 0, b2p[TMP_REG_1])); + EMIT(PPC_RAW_LWBRX(dst_reg, 0, tmp1_reg)); if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN)) EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, 32)); - EMIT(PPC_RAW_LI(b2p[TMP_REG_2], 4)); - EMIT(PPC_RAW_LWBRX(b2p[TMP_REG_2], b2p[TMP_REG_2], b2p[TMP_REG_1])); + EMIT(PPC_RAW_LI(tmp2_reg, 4)); + EMIT(PPC_RAW_LWBRX(tmp2_reg, tmp2_reg, tmp1_reg)); if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) - EMIT(PPC_RAW_SLDI(b2p[TMP_REG_2], b2p[TMP_REG_2], 32)); - EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_2])); + EMIT(PPC_RAW_SLDI(tmp2_reg, tmp2_reg, 32)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp2_reg)); } break; } @@ -731,7 +717,7 @@ emit_clear: break; case STF_BARRIER_SYNC_ORI: EMIT(PPC_RAW_SYNC()); - EMIT(PPC_RAW_LD(b2p[TMP_REG_1], _R13, 0)); + EMIT(PPC_RAW_LD(tmp1_reg, _R13, 0)); EMIT(PPC_RAW_ORI(_R31, _R31, 0)); break; case STF_BARRIER_FALLBACK: @@ -751,36 +737,36 @@ emit_clear: case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */ if (BPF_CLASS(code) == BPF_ST) { - EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm)); - src_reg = b2p[TMP_REG_1]; + EMIT(PPC_RAW_LI(tmp1_reg, imm)); + src_reg = tmp1_reg; } EMIT(PPC_RAW_STB(src_reg, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */ if (BPF_CLASS(code) == BPF_ST) { - EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm)); - src_reg = b2p[TMP_REG_1]; + EMIT(PPC_RAW_LI(tmp1_reg, imm)); + src_reg = tmp1_reg; } EMIT(PPC_RAW_STH(src_reg, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */ if (BPF_CLASS(code) == BPF_ST) { - PPC_LI32(b2p[TMP_REG_1], imm); - src_reg = b2p[TMP_REG_1]; + PPC_LI32(tmp1_reg, imm); + src_reg = tmp1_reg; } EMIT(PPC_RAW_STW(src_reg, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */ if (BPF_CLASS(code) == BPF_ST) { - PPC_LI32(b2p[TMP_REG_1], imm); - src_reg = b2p[TMP_REG_1]; + PPC_LI32(tmp1_reg, imm); + src_reg = tmp1_reg; } if (off % 4) { - EMIT(PPC_RAW_LI(b2p[TMP_REG_2], off)); - EMIT(PPC_RAW_STDX(src_reg, dst_reg, b2p[TMP_REG_2])); + EMIT(PPC_RAW_LI(tmp2_reg, off)); + EMIT(PPC_RAW_STDX(src_reg, dst_reg, tmp2_reg)); } else { EMIT(PPC_RAW_STD(src_reg, dst_reg, off)); } @@ -800,14 +786,14 @@ emit_clear: /* *(u32 *)(dst + off) += src */ /* Get EA into TMP_REG_1 */ - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off)); + EMIT(PPC_RAW_ADDI(tmp1_reg, dst_reg, off)); tmp_idx = ctx->idx * 4; /* load value from memory into TMP_REG_2 */ - EMIT(PPC_RAW_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0)); + EMIT(PPC_RAW_LWARX(tmp2_reg, 0, tmp1_reg, 0)); /* add value from src_reg into this */ - EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg)); + EMIT(PPC_RAW_ADD(tmp2_reg, tmp2_reg, src_reg)); /* store result back */ - EMIT(PPC_RAW_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1])); + EMIT(PPC_RAW_STWCX(tmp2_reg, 0, tmp1_reg)); /* we're done if this succeeded */ PPC_BCC_SHORT(COND_NE, tmp_idx); break; @@ -820,11 +806,11 @@ emit_clear: } /* *(u64 *)(dst + off) += src */ - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off)); + EMIT(PPC_RAW_ADDI(tmp1_reg, dst_reg, off)); tmp_idx = ctx->idx * 4; - EMIT(PPC_RAW_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0)); - EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg)); - EMIT(PPC_RAW_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1])); + EMIT(PPC_RAW_LDARX(tmp2_reg, 0, tmp1_reg, 0)); + EMIT(PPC_RAW_ADD(tmp2_reg, tmp2_reg, src_reg)); + EMIT(PPC_RAW_STDCX(tmp2_reg, 0, tmp1_reg)); PPC_BCC_SHORT(COND_NE, tmp_idx); break; @@ -850,12 +836,12 @@ emit_clear: * set dst_reg=0 and move on. */ if (BPF_MODE(code) == BPF_PROBE_MEM) { - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], src_reg, off)); + EMIT(PPC_RAW_ADDI(tmp1_reg, src_reg, off)); if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) - PPC_LI64(b2p[TMP_REG_2], 0x8000000000000000ul); + PPC_LI64(tmp2_reg, 0x8000000000000000ul); else /* BOOK3S_64 */ - PPC_LI64(b2p[TMP_REG_2], PAGE_OFFSET); - EMIT(PPC_RAW_CMPLD(b2p[TMP_REG_1], b2p[TMP_REG_2])); + PPC_LI64(tmp2_reg, PAGE_OFFSET); + EMIT(PPC_RAW_CMPLD(tmp1_reg, tmp2_reg)); PPC_BCC_SHORT(COND_GT, (ctx->idx + 3) * 4); EMIT(PPC_RAW_LI(dst_reg, 0)); /* @@ -880,8 +866,8 @@ emit_clear: break; case BPF_DW: if (off % 4) { - EMIT(PPC_RAW_LI(b2p[TMP_REG_1], off)); - EMIT(PPC_RAW_LDX(dst_reg, src_reg, b2p[TMP_REG_1])); + EMIT(PPC_RAW_LI(tmp1_reg, off)); + EMIT(PPC_RAW_LDX(dst_reg, src_reg, tmp1_reg)); } else { EMIT(PPC_RAW_LD(dst_reg, src_reg, off)); } @@ -925,7 +911,7 @@ emit_clear: * we'll just fall through to the epilogue. */ if (i != flen - 1) { - ret = bpf_jit_emit_exit_insn(image, ctx, b2p[TMP_REG_1], exit_addr); + ret = bpf_jit_emit_exit_insn(image, ctx, tmp1_reg, exit_addr); if (ret) return ret; } @@ -1058,14 +1044,10 @@ cond_branch: case BPF_JMP | BPF_JSET | BPF_X: case BPF_JMP32 | BPF_JSET | BPF_X: if (BPF_CLASS(code) == BPF_JMP) { - EMIT(PPC_RAW_AND_DOT(b2p[TMP_REG_1], dst_reg, - src_reg)); + EMIT(PPC_RAW_AND_DOT(tmp1_reg, dst_reg, src_reg)); } else { - int tmp_reg = b2p[TMP_REG_1]; - - EMIT(PPC_RAW_AND(tmp_reg, dst_reg, src_reg)); - EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0, - 31)); + EMIT(PPC_RAW_AND(tmp1_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_RLWINM_DOT(tmp1_reg, tmp1_reg, 0, 0, 31)); } break; case BPF_JMP | BPF_JNE | BPF_K: @@ -1094,14 +1076,12 @@ cond_branch: EMIT(PPC_RAW_CMPLDI(dst_reg, imm)); } else { /* sign-extending load */ - PPC_LI32(b2p[TMP_REG_1], imm); + PPC_LI32(tmp1_reg, imm); /* ... but unsigned comparison */ if (is_jmp32) - EMIT(PPC_RAW_CMPLW(dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_CMPLW(dst_reg, tmp1_reg)); else - EMIT(PPC_RAW_CMPLD(dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_CMPLD(dst_reg, tmp1_reg)); } break; } @@ -1126,13 +1106,11 @@ cond_branch: else EMIT(PPC_RAW_CMPDI(dst_reg, imm)); } else { - PPC_LI32(b2p[TMP_REG_1], imm); + PPC_LI32(tmp1_reg, imm); if (is_jmp32) - EMIT(PPC_RAW_CMPW(dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_CMPW(dst_reg, tmp1_reg)); else - EMIT(PPC_RAW_CMPD(dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_CMPD(dst_reg, tmp1_reg)); } break; } @@ -1141,19 +1119,16 @@ cond_branch: /* andi does not sign-extend the immediate */ if (imm >= 0 && imm < 32768) /* PPC_ANDI is _only/always_ dot-form */ - EMIT(PPC_RAW_ANDI(b2p[TMP_REG_1], dst_reg, imm)); + EMIT(PPC_RAW_ANDI(tmp1_reg, dst_reg, imm)); else { - int tmp_reg = b2p[TMP_REG_1]; - - PPC_LI32(tmp_reg, imm); + PPC_LI32(tmp1_reg, imm); if (BPF_CLASS(code) == BPF_JMP) { - EMIT(PPC_RAW_AND_DOT(tmp_reg, dst_reg, - tmp_reg)); + EMIT(PPC_RAW_AND_DOT(tmp1_reg, dst_reg, + tmp1_reg)); } else { - EMIT(PPC_RAW_AND(tmp_reg, dst_reg, - tmp_reg)); - EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg, - 0, 0, 31)); + EMIT(PPC_RAW_AND(tmp1_reg, dst_reg, tmp1_reg)); + EMIT(PPC_RAW_RLWINM_DOT(tmp1_reg, tmp1_reg, + 0, 0, 31)); } } break; From 49c3af43e65fbcc13860e0cf5fb2507b13e9724c Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Mon, 14 Feb 2022 16:11:51 +0530 Subject: [PATCH 258/380] powerpc/bpf: Simplify bpf_to_ppc() and adopt it for powerpc64 Convert bpf_to_ppc() to a macro to help simplify its usage since codegen_context is available in all places it is used. Adopt it also for powerpc64 for uniformity and get rid of the global b2p structure. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/09f0540ce3e0cd4120b5b33993b5e73b6ef9e979.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit.h | 11 ++-- arch/powerpc/net/bpf_jit_comp.c | 8 +-- arch/powerpc/net/bpf_jit_comp32.c | 98 +++++++++++++++---------------- arch/powerpc/net/bpf_jit_comp64.c | 93 ++++++++++++++--------------- 4 files changed, 102 insertions(+), 108 deletions(-) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 42a9adda31eb..979701d360da 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -119,12 +119,6 @@ #define SEEN_FUNC 0x20000000 /* might call external helpers */ #define SEEN_TAILCALL 0x40000000 /* uses tail calls */ -#ifdef CONFIG_PPC64 -extern const int b2p[MAX_BPF_JIT_REG + 2]; -#else -extern const int b2p[MAX_BPF_JIT_REG + 1]; -#endif - struct codegen_context { /* * This is used to track register usage as well @@ -138,11 +132,13 @@ struct codegen_context { unsigned int seen; unsigned int idx; unsigned int stack_size; - int b2p[ARRAY_SIZE(b2p)]; + int b2p[MAX_BPF_JIT_REG + 2]; unsigned int exentry_idx; unsigned int alt_exit_addr; }; +#define bpf_to_ppc(r) (ctx->b2p[r]) + #ifdef CONFIG_PPC32 #define BPF_FIXUP_LEN 3 /* Three instructions => 12 bytes */ #else @@ -170,6 +166,7 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i) ctx->seen &= ~(1 << (31 - i)); } +void bpf_jit_init_reg_mapping(struct codegen_context *ctx); int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, u32 *addrs, int pass); diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 635f7448ff79..fc160d33c839 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -72,13 +72,13 @@ static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image, tmp_idx = ctx->idx; ctx->idx = addrs[i] / 4; #ifdef CONFIG_PPC32 - PPC_LI32(ctx->b2p[insn[i].dst_reg] - 1, (u32)insn[i + 1].imm); - PPC_LI32(ctx->b2p[insn[i].dst_reg], (u32)insn[i].imm); + PPC_LI32(bpf_to_ppc(insn[i].dst_reg) - 1, (u32)insn[i + 1].imm); + PPC_LI32(bpf_to_ppc(insn[i].dst_reg), (u32)insn[i].imm); for (j = ctx->idx - addrs[i] / 4; j < 4; j++) EMIT(PPC_RAW_NOP()); #else func_addr = ((u64)(u32)insn[i].imm) | (((u64)(u32)insn[i + 1].imm) << 32); - PPC_LI64(b2p[insn[i].dst_reg], func_addr); + PPC_LI64(bpf_to_ppc(insn[i].dst_reg), func_addr); /* overwrite rest with nops */ for (j = ctx->idx - addrs[i] / 4; j < 5; j++) EMIT(PPC_RAW_NOP()); @@ -179,7 +179,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) } memset(&cgctx, 0, sizeof(struct codegen_context)); - memcpy(cgctx.b2p, b2p, sizeof(cgctx.b2p)); + bpf_jit_init_reg_mapping(&cgctx); /* Make sure that the stack is quadword aligned. */ cgctx.stack_size = round_up(fp->aux->stack_depth, 16); diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 1c86b489232a..e46ed1e8c6ca 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -33,42 +33,38 @@ /* stack frame, ensure this is quadword aligned */ #define BPF_PPC_STACKFRAME(ctx) (STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_SAVE + (ctx)->stack_size) -/* BPF register usage */ -#define TMP_REG (MAX_BPF_JIT_REG + 0) - #define PPC_EX32(r, i) EMIT(PPC_RAW_LI((r), (i) < 0 ? -1 : 0)) -/* BPF to ppc register mappings */ -const int b2p[MAX_BPF_JIT_REG + 1] = { - /* function return value */ - [BPF_REG_0] = _R12, - /* function arguments */ - [BPF_REG_1] = _R4, - [BPF_REG_2] = _R6, - [BPF_REG_3] = _R8, - [BPF_REG_4] = _R10, - [BPF_REG_5] = _R22, - /* non volatile registers */ - [BPF_REG_6] = _R24, - [BPF_REG_7] = _R26, - [BPF_REG_8] = _R28, - [BPF_REG_9] = _R30, - /* frame pointer aka BPF_REG_10 */ - [BPF_REG_FP] = _R18, - /* eBPF jit internal registers */ - [BPF_REG_AX] = _R20, - [TMP_REG] = _R31, /* 32 bits */ -}; - -static int bpf_to_ppc(struct codegen_context *ctx, int reg) -{ - return ctx->b2p[reg]; -} - /* PPC NVR range -- update this if we ever use NVRs below r17 */ #define BPF_PPC_NVR_MIN _R17 #define BPF_PPC_TC _R16 +/* BPF register usage */ +#define TMP_REG (MAX_BPF_JIT_REG + 0) + +/* BPF to ppc register mappings */ +void bpf_jit_init_reg_mapping(struct codegen_context *ctx) +{ + /* function return value */ + ctx->b2p[BPF_REG_0] = _R12; + /* function arguments */ + ctx->b2p[BPF_REG_1] = _R4; + ctx->b2p[BPF_REG_2] = _R6; + ctx->b2p[BPF_REG_3] = _R8; + ctx->b2p[BPF_REG_4] = _R10; + ctx->b2p[BPF_REG_5] = _R22; + /* non volatile registers */ + ctx->b2p[BPF_REG_6] = _R24; + ctx->b2p[BPF_REG_7] = _R26; + ctx->b2p[BPF_REG_8] = _R28; + ctx->b2p[BPF_REG_9] = _R30; + /* frame pointer aka BPF_REG_10 */ + ctx->b2p[BPF_REG_FP] = _R18; + /* eBPF jit internal registers */ + ctx->b2p[BPF_REG_AX] = _R20; + ctx->b2p[TMP_REG] = _R31; /* 32 bits */ +} + static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) { if ((reg >= BPF_PPC_NVR_MIN && reg < 32) || reg == BPF_PPC_TC) @@ -118,8 +114,8 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) int i; /* First arg comes in as a 32 bits pointer. */ - EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_1), _R3)); - EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_1) - 1, 0)); + EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3)); + EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0)); EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx))); /* @@ -128,7 +124,7 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) * invoked through a tail call. */ if (ctx->seen & SEEN_TAILCALL) - EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_1) - 1, _R1, + EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_1) - 1, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); else EMIT(PPC_RAW_NOP()); @@ -150,15 +146,15 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) EMIT(PPC_RAW_STW(i, _R1, bpf_jit_stack_offsetof(ctx, i))); /* If needed retrieve arguments 9 and 10, ie 5th 64 bits arg.*/ - if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) { - EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5) - 1, _R1, BPF_PPC_STACKFRAME(ctx)) + 8); - EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5), _R1, BPF_PPC_STACKFRAME(ctx)) + 12); + if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_5))) { + EMIT(PPC_RAW_LWZ(bpf_to_ppc(BPF_REG_5) - 1, _R1, BPF_PPC_STACKFRAME(ctx)) + 8); + EMIT(PPC_RAW_LWZ(bpf_to_ppc(BPF_REG_5), _R1, BPF_PPC_STACKFRAME(ctx)) + 12); } /* Setup frame pointer to point to the bpf stack area */ - if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_FP))) { - EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_FP) - 1, 0)); - EMIT(PPC_RAW_ADDI(bpf_to_ppc(ctx, BPF_REG_FP), _R1, + if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP))) { + EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_FP) - 1, 0)); + EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1, STACK_FRAME_MIN_SIZE + ctx->stack_size)); } @@ -178,7 +174,7 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) { - EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(ctx, BPF_REG_0))); + EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0))); bpf_jit_emit_common_epilogue(image, ctx); @@ -223,8 +219,8 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o * r5-r6/BPF_REG_2 - pointer to bpf_array * r7-r8/BPF_REG_3 - index in bpf_array */ - int b2p_bpf_array = bpf_to_ppc(ctx, BPF_REG_2); - int b2p_index = bpf_to_ppc(ctx, BPF_REG_3); + int b2p_bpf_array = bpf_to_ppc(BPF_REG_2); + int b2p_index = bpf_to_ppc(BPF_REG_3); /* * if (index >= array->map.max_entries) @@ -270,7 +266,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o EMIT(PPC_RAW_MTCTR(_R3)); - EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(ctx, BPF_REG_1))); + EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_1))); /* tear restore NVRs, ... */ bpf_jit_emit_common_epilogue(image, ctx); @@ -294,11 +290,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * for (i = 0; i < flen; i++) { u32 code = insn[i].code; - u32 dst_reg = bpf_to_ppc(ctx, insn[i].dst_reg); + u32 dst_reg = bpf_to_ppc(insn[i].dst_reg); u32 dst_reg_h = dst_reg - 1; - u32 src_reg = bpf_to_ppc(ctx, insn[i].src_reg); + u32 src_reg = bpf_to_ppc(insn[i].src_reg); u32 src_reg_h = src_reg - 1; - u32 tmp_reg = bpf_to_ppc(ctx, TMP_REG); + u32 tmp_reg = bpf_to_ppc(TMP_REG); u32 size = BPF_SIZE(code); s16 off = insn[i].off; s32 imm = insn[i].imm; @@ -960,17 +956,17 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (ret < 0) return ret; - if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) { - EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5) - 1, _R1, 8)); - EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5), _R1, 12)); + if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_5))) { + EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_5) - 1, _R1, 8)); + EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_5), _R1, 12)); } ret = bpf_jit_emit_func_call_rel(image, ctx, func_addr); if (ret) return ret; - EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0) - 1, _R3)); - EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0), _R4)); + EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_0) - 1, _R3)); + EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_0), _R4)); break; /* diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index b4de0c35c8a4..585f257da045 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -46,27 +46,28 @@ #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* BPF to ppc register mappings */ -const int b2p[MAX_BPF_JIT_REG + 2] = { +void bpf_jit_init_reg_mapping(struct codegen_context *ctx) +{ /* function return value */ - [BPF_REG_0] = _R8, + ctx->b2p[BPF_REG_0] = _R8; /* function arguments */ - [BPF_REG_1] = _R3, - [BPF_REG_2] = _R4, - [BPF_REG_3] = _R5, - [BPF_REG_4] = _R6, - [BPF_REG_5] = _R7, + ctx->b2p[BPF_REG_1] = _R3; + ctx->b2p[BPF_REG_2] = _R4; + ctx->b2p[BPF_REG_3] = _R5; + ctx->b2p[BPF_REG_4] = _R6; + ctx->b2p[BPF_REG_5] = _R7; /* non volatile registers */ - [BPF_REG_6] = _R27, - [BPF_REG_7] = _R28, - [BPF_REG_8] = _R29, - [BPF_REG_9] = _R30, + ctx->b2p[BPF_REG_6] = _R27; + ctx->b2p[BPF_REG_7] = _R28; + ctx->b2p[BPF_REG_8] = _R29; + ctx->b2p[BPF_REG_9] = _R30; /* frame pointer aka BPF_REG_10 */ - [BPF_REG_FP] = _R31, + ctx->b2p[BPF_REG_FP] = _R31; /* eBPF jit internal registers */ - [BPF_REG_AX] = _R12, - [TMP_REG_1] = _R9, - [TMP_REG_2] = _R10 -}; + ctx->b2p[BPF_REG_AX] = _R12; + ctx->b2p[TMP_REG_1] = _R9; + ctx->b2p[TMP_REG_2] = _R10; +} /* PPC NVR range -- update this if we ever use NVRs below r27 */ #define BPF_PPC_NVR_MIN _R27 @@ -79,7 +80,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) * - the bpf program uses its stack area * The latter condition is deduced from the usage of BPF_REG_FP */ - return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, b2p[BPF_REG_FP]); + return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP)); } /* @@ -134,9 +135,9 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) * invoked through a tail call. */ if (ctx->seen & SEEN_TAILCALL) { - EMIT(PPC_RAW_LI(b2p[TMP_REG_1], 0)); + EMIT(PPC_RAW_LI(bpf_to_ppc(TMP_REG_1), 0)); /* this goes in the redzone */ - EMIT(PPC_RAW_STD(b2p[TMP_REG_1], _R1, -(BPF_PPC_STACK_SAVE + 8))); + EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, -(BPF_PPC_STACK_SAVE + 8))); } else { EMIT(PPC_RAW_NOP()); EMIT(PPC_RAW_NOP()); @@ -161,12 +162,12 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) * in the protected zone below the previous stack frame */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) - if (bpf_is_seen_register(ctx, b2p[i])) - EMIT(PPC_RAW_STD(b2p[i], _R1, bpf_jit_stack_offsetof(ctx, b2p[i]))); + if (bpf_is_seen_register(ctx, bpf_to_ppc(i))) + EMIT(PPC_RAW_STD(bpf_to_ppc(i), _R1, bpf_jit_stack_offsetof(ctx, bpf_to_ppc(i)))); /* Setup frame pointer to point to the bpf stack area */ - if (bpf_is_seen_register(ctx, b2p[BPF_REG_FP])) - EMIT(PPC_RAW_ADDI(b2p[BPF_REG_FP], _R1, + if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP))) + EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1, STACK_FRAME_MIN_SIZE + ctx->stack_size)); } @@ -176,8 +177,8 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx /* Restore NVRs */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) - if (bpf_is_seen_register(ctx, b2p[i])) - EMIT(PPC_RAW_LD(b2p[i], _R1, bpf_jit_stack_offsetof(ctx, b2p[i]))); + if (bpf_is_seen_register(ctx, bpf_to_ppc(i))) + EMIT(PPC_RAW_LD(bpf_to_ppc(i), _R1, bpf_jit_stack_offsetof(ctx, bpf_to_ppc(i)))); /* Tear down our stack frame */ if (bpf_has_stack_frame(ctx)) { @@ -194,7 +195,7 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) bpf_jit_emit_common_epilogue(image, ctx); /* Move result to r3 */ - EMIT(PPC_RAW_MR(_R3, b2p[BPF_REG_0])); + EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0))); EMIT(PPC_RAW_BLR()); } @@ -261,8 +262,8 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o * r4/BPF_REG_2 - pointer to bpf_array * r5/BPF_REG_3 - index in bpf_array */ - int b2p_bpf_array = b2p[BPF_REG_2]; - int b2p_index = b2p[BPF_REG_3]; + int b2p_bpf_array = bpf_to_ppc(BPF_REG_2); + int b2p_index = bpf_to_ppc(BPF_REG_3); int bpf_tailcall_prologue_size = 8; if (__is_defined(PPC64_ELF_ABI_v2)) @@ -272,42 +273,42 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o * if (index >= array->map.max_entries) * goto out; */ - EMIT(PPC_RAW_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); + EMIT(PPC_RAW_LWZ(bpf_to_ppc(TMP_REG_1), b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); EMIT(PPC_RAW_RLWINM(b2p_index, b2p_index, 0, 0, 31)); - EMIT(PPC_RAW_CMPLW(b2p_index, b2p[TMP_REG_1])); + EMIT(PPC_RAW_CMPLW(b2p_index, bpf_to_ppc(TMP_REG_1))); PPC_BCC_SHORT(COND_GE, out); /* * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) * goto out; */ - EMIT(PPC_RAW_LD(b2p[TMP_REG_1], _R1, bpf_jit_stack_tailcallcnt(ctx))); - EMIT(PPC_RAW_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT)); + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), _R1, bpf_jit_stack_tailcallcnt(ctx))); + EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_1), MAX_TAIL_CALL_CNT)); PPC_BCC_SHORT(COND_GE, out); /* * tail_call_cnt++; */ - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1)); - EMIT(PPC_RAW_STD(b2p[TMP_REG_1], _R1, bpf_jit_stack_tailcallcnt(ctx))); + EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), 1)); + EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, bpf_jit_stack_tailcallcnt(ctx))); /* prog = array->ptrs[index]; */ - EMIT(PPC_RAW_MULI(b2p[TMP_REG_1], b2p_index, 8)); - EMIT(PPC_RAW_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array)); - EMIT(PPC_RAW_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs))); + EMIT(PPC_RAW_MULI(bpf_to_ppc(TMP_REG_1), b2p_index, 8)); + EMIT(PPC_RAW_ADD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), b2p_bpf_array)); + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), offsetof(struct bpf_array, ptrs))); /* * if (prog == NULL) * goto out; */ - EMIT(PPC_RAW_CMPLDI(b2p[TMP_REG_1], 0)); + EMIT(PPC_RAW_CMPLDI(bpf_to_ppc(TMP_REG_1), 0)); PPC_BCC_SHORT(COND_EQ, out); /* goto *(prog->bpf_func + prologue_size); */ - EMIT(PPC_RAW_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func))); - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), offsetof(struct bpf_prog, bpf_func))); + EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), FUNCTION_DESCR_SIZE + bpf_tailcall_prologue_size)); - EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1])); + EMIT(PPC_RAW_MTCTR(bpf_to_ppc(TMP_REG_1))); /* tear down stack, restore NVRs, ... */ bpf_jit_emit_common_epilogue(image, ctx); @@ -354,11 +355,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * for (i = 0; i < flen; i++) { u32 code = insn[i].code; - u32 dst_reg = b2p[insn[i].dst_reg]; - u32 src_reg = b2p[insn[i].src_reg]; + u32 dst_reg = bpf_to_ppc(insn[i].dst_reg); + u32 src_reg = bpf_to_ppc(insn[i].src_reg); u32 size = BPF_SIZE(code); - u32 tmp1_reg = b2p[TMP_REG_1]; - u32 tmp2_reg = b2p[TMP_REG_2]; + u32 tmp1_reg = bpf_to_ppc(TMP_REG_1); + u32 tmp2_reg = bpf_to_ppc(TMP_REG_2); s16 off = insn[i].off; s32 imm = insn[i].imm; bool func_addr_fixed; @@ -938,7 +939,7 @@ emit_clear: return ret; /* move return value from r3 to BPF_REG_0 */ - EMIT(PPC_RAW_MR(b2p[BPF_REG_0], _R3)); + EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_0), _R3)); break; /* From 0f54bddefe7f5e4c98bea6f945ebdf85d1c44117 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Fri, 7 Jan 2022 19:44:26 +0530 Subject: [PATCH 259/380] powerpc/pseries: Parse control memory access error Add support to parse and log control memory access error for pseries. These changes are made according to PAPR v2.11 10.3.2.2.12. Signed-off-by: Ganesh Goudar Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220107141428.67862-1-ganeshgr@linux.ibm.com --- arch/powerpc/platforms/pseries/ras.c | 36 ++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 74c9b1b5bc66..2a158e828c99 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -60,11 +60,17 @@ struct pseries_mc_errorlog { * XX 2: Reserved. * XXX 3: Type of UE error. * - * For error_type != MC_ERROR_TYPE_UE + * For error_type == MC_ERROR_TYPE_SLB/ERAT/TLB * XXXXXXXX * X 1: Effective address provided. * XXXXX 5: Reserved. * XX 2: Type of SLB/ERAT/TLB error. + * + * For error_type == MC_ERROR_TYPE_CTRL_MEM_ACCESS + * XXXXXXXX + * X 1: Error causing address provided. + * XXX 3: Type of error. + * XXXX 4: Reserved. */ u8 sub_err_type; u8 reserved_1[6]; @@ -80,6 +86,7 @@ struct pseries_mc_errorlog { #define MC_ERROR_TYPE_TLB 0x04 #define MC_ERROR_TYPE_D_CACHE 0x05 #define MC_ERROR_TYPE_I_CACHE 0x07 +#define MC_ERROR_TYPE_CTRL_MEM_ACCESS 0x08 /* RTAS pseries MCE error sub types */ #define MC_ERROR_UE_INDETERMINATE 0 @@ -90,6 +97,7 @@ struct pseries_mc_errorlog { #define UE_EFFECTIVE_ADDR_PROVIDED 0x40 #define UE_LOGICAL_ADDR_PROVIDED 0x20 +#define MC_EFFECTIVE_ADDR_PROVIDED 0x80 #define MC_ERROR_SLB_PARITY 0 #define MC_ERROR_SLB_MULTIHIT 1 @@ -103,6 +111,9 @@ struct pseries_mc_errorlog { #define MC_ERROR_TLB_MULTIHIT 2 #define MC_ERROR_TLB_INDETERMINATE 3 +#define MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK 0 +#define MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS 1 + static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog) { switch (mlog->error_type) { @@ -112,6 +123,8 @@ static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog) case MC_ERROR_TYPE_ERAT: case MC_ERROR_TYPE_TLB: return (mlog->sub_err_type & 0x03); + case MC_ERROR_TYPE_CTRL_MEM_ACCESS: + return (mlog->sub_err_type & 0x70) >> 4; default: return 0; } @@ -658,7 +671,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE; break; } - if (mce_log->sub_err_type & 0x80) + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) eaddr = be64_to_cpu(mce_log->effective_address); break; case MC_ERROR_TYPE_ERAT: @@ -675,7 +688,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE; break; } - if (mce_log->sub_err_type & 0x80) + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) eaddr = be64_to_cpu(mce_log->effective_address); break; case MC_ERROR_TYPE_TLB: @@ -692,7 +705,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE; break; } - if (mce_log->sub_err_type & 0x80) + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) eaddr = be64_to_cpu(mce_log->effective_address); break; case MC_ERROR_TYPE_D_CACHE: @@ -701,6 +714,21 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, case MC_ERROR_TYPE_I_CACHE: mce_err.error_type = MCE_ERROR_TYPE_ICACHE; break; + case MC_ERROR_TYPE_CTRL_MEM_ACCESS: + mce_err.error_type = MCE_ERROR_TYPE_RA; + switch (err_sub_type) { + case MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK: + mce_err.u.ra_error_type = + MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN; + break; + case MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS: + mce_err.u.ra_error_type = + MCE_RA_ERROR_LOAD_STORE_FOREIGN; + break; + } + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) + eaddr = be64_to_cpu(mce_log->effective_address); + break; case MC_ERROR_TYPE_UNKNOWN: default: mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; From 0f4ef8a3bf784f250abc7d0155ae4e9fa22d8210 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Fri, 7 Jan 2022 19:44:27 +0530 Subject: [PATCH 260/380] selftests/powerpc: Add test for real address error handling Add test for real address or control memory address access error handling, using NX-GZIP engine. The error is injected by accessing the control memory address using illegal instruction, on successful handling the process attempting to access control memory address using illegal instruction receives SIGBUS. Signed-off-by: Ganesh Goudar Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220107141428.67862-2-ganeshgr@linux.ibm.com --- tools/testing/selftests/powerpc/Makefile | 3 +- tools/testing/selftests/powerpc/mce/Makefile | 7 ++ .../selftests/powerpc/mce/inject-ra-err.c | 65 +++++++++++++++++++ tools/testing/selftests/powerpc/mce/vas-api.h | 1 + 4 files changed, 75 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/mce/Makefile create mode 100644 tools/testing/selftests/powerpc/mce/inject-ra-err.c create mode 120000 tools/testing/selftests/powerpc/mce/vas-api.h diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index 0830e63818c1..4830372d7416 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -31,7 +31,8 @@ SUB_DIRS = alignment \ vphn \ math \ ptrace \ - security + security \ + mce endif diff --git a/tools/testing/selftests/powerpc/mce/Makefile b/tools/testing/selftests/powerpc/mce/Makefile new file mode 100644 index 000000000000..2424513982d9 --- /dev/null +++ b/tools/testing/selftests/powerpc/mce/Makefile @@ -0,0 +1,7 @@ +#SPDX-License-Identifier: GPL-2.0-or-later + +TEST_GEN_PROGS := inject-ra-err + +include ../../lib.mk + +$(TEST_GEN_PROGS): ../harness.c diff --git a/tools/testing/selftests/powerpc/mce/inject-ra-err.c b/tools/testing/selftests/powerpc/mce/inject-ra-err.c new file mode 100644 index 000000000000..94323c34d9a6 --- /dev/null +++ b/tools/testing/selftests/powerpc/mce/inject-ra-err.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vas-api.h" +#include "utils.h" + +static bool faulted; + +static void sigbus_handler(int n, siginfo_t *info, void *ctxt_v) +{ + ucontext_t *ctxt = (ucontext_t *)ctxt_v; + struct pt_regs *regs = ctxt->uc_mcontext.regs; + + faulted = true; + regs->nip += 4; +} + +static int test_ra_error(void) +{ + struct vas_tx_win_open_attr attr; + int fd, *paste_addr; + char *devname = "/dev/crypto/nx-gzip"; + struct sigaction act = { + .sa_sigaction = sigbus_handler, + .sa_flags = SA_SIGINFO, + }; + + memset(&attr, 0, sizeof(attr)); + attr.version = 1; + attr.vas_id = 0; + + SKIP_IF(access(devname, F_OK)); + + fd = open(devname, O_RDWR); + FAIL_IF(fd < 0); + FAIL_IF(ioctl(fd, VAS_TX_WIN_OPEN, &attr) < 0); + FAIL_IF(sigaction(SIGBUS, &act, NULL) != 0); + + paste_addr = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0ULL); + + /* The following assignment triggers exception */ + mb(); + *paste_addr = 1; + mb(); + + FAIL_IF(!faulted); + + return 0; +} + +int main(void) +{ + return test_harness(test_ra_error, "inject-ra-err"); +} + diff --git a/tools/testing/selftests/powerpc/mce/vas-api.h b/tools/testing/selftests/powerpc/mce/vas-api.h new file mode 120000 index 000000000000..1455c1bcd351 --- /dev/null +++ b/tools/testing/selftests/powerpc/mce/vas-api.h @@ -0,0 +1 @@ +../../../../../arch/powerpc/include/uapi/asm/vas-api.h \ No newline at end of file From 0a182611d149b5b747014fbb230ec35b20a45c86 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Fri, 7 Jan 2022 19:44:28 +0530 Subject: [PATCH 261/380] powerpc/mce: Modify the real address error logging messages To avoid ambiguity, modify the strings in real address error logging messages to "foreign/control memory" from "foreign", Since the error discriptions in P9 user manual and P10 user manual are different for same type of errors. P9 User Manual for MCE: DSISR:59 Host real address to foreign space during translation. DSISR:60 Host real address to foreign space on a load or store access. P10 User Manual for MCE: DSISR:59 D-side tablewalk used a host real address in the control memory address range. DSISR:60 D-side operand access to control memory address space. Signed-off-by: Ganesh Goudar Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220107141428.67862-3-ganeshgr@linux.ibm.com --- arch/powerpc/kernel/mce.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 2503dd4713b9..811c867ad6c6 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -401,14 +401,14 @@ void machine_check_print_event_info(struct machine_check_event *evt, static const char *mc_ra_types[] = { "Indeterminate", "Instruction fetch (bad)", - "Instruction fetch (foreign)", + "Instruction fetch (foreign/control memory)", "Page table walk ifetch (bad)", - "Page table walk ifetch (foreign)", + "Page table walk ifetch (foreign/control memory)", "Load (bad)", "Store (bad)", "Page table walk Load/Store (bad)", - "Page table walk Load/Store (foreign)", - "Load/Store (foreign)", + "Page table walk Load/Store (foreign/control memory)", + "Load/Store (foreign/control memory)", }; static const char *mc_link_types[] = { "Indeterminate", From cc15ff3275694fedc33cd3d53212a43eec7aa0bc Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Thu, 20 Jan 2022 17:49:31 +0530 Subject: [PATCH 262/380] powerpc/mce: Avoid using irq_work_queue() in realmode In realmode mce handler we use irq_work_queue() to defer the processing of mce events, irq_work_queue() can only be called when translation is enabled because it touches memory outside RMA, hence we enable translation before calling irq_work_queue and disable on return, though it is not safe to do in realmode. To avoid this, program the decrementer and call the event processing functions from timer handler. Signed-off-by: Ganesh Goudar Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220120121931.517974-1-ganeshgr@linux.ibm.com --- arch/powerpc/include/asm/machdep.h | 2 + arch/powerpc/include/asm/mce.h | 13 +++++ arch/powerpc/include/asm/paca.h | 1 + arch/powerpc/kernel/mce.c | 60 +++++++++++++----------- arch/powerpc/kernel/time.c | 2 + arch/powerpc/platforms/pseries/pseries.h | 1 + arch/powerpc/platforms/pseries/ras.c | 32 +------------ arch/powerpc/platforms/pseries/setup.c | 1 + 8 files changed, 53 insertions(+), 59 deletions(-) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 06ac7ef07c85..358d171ae8e0 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -94,6 +94,8 @@ struct machdep_calls { /* Called during machine check exception to retrive fixup address. */ bool (*mce_check_early_recovery)(struct pt_regs *regs); + void (*machine_check_log_err)(void); + /* Motherboard/chipset features. This is a kind of general purpose * hook used to control some machine specific features (like reset * lines, chip power control, etc...). diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index 331d944280b8..c9f0936bd3c9 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -235,8 +235,21 @@ extern void machine_check_print_event_info(struct machine_check_event *evt, unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr); extern void mce_common_process_ue(struct pt_regs *regs, struct mce_error_info *mce_err); +void mce_irq_work_queue(void); int mce_register_notifier(struct notifier_block *nb); int mce_unregister_notifier(struct notifier_block *nb); + +#ifdef CONFIG_PPC_BOOK3S_64 +void mce_run_irq_context_handlers(void); +#else +static inline void mce_run_irq_context_handlers(void) { }; +#endif /* CONFIG_PPC_BOOK3S_64 */ + +#ifdef CONFIG_PPC_BOOK3S_64 +void set_mce_pending_irq_work(void); +void clear_mce_pending_irq_work(void); +#endif /* CONFIG_PPC_BOOK3S_64 */ + #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void); void flush_erat(void); diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 295573a82c66..8330968ca346 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -288,6 +288,7 @@ struct paca_struct { #endif #ifdef CONFIG_PPC_BOOK3S_64 struct mce_info *mce_info; + u8 mce_pending_irq_work; #endif /* CONFIG_PPC_BOOK3S_64 */ } ____cacheline_aligned; diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 811c867ad6c6..671d727d06f7 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -28,19 +28,9 @@ #include "setup.h" -static void machine_check_process_queued_event(struct irq_work *work); -static void machine_check_ue_irq_work(struct irq_work *work); static void machine_check_ue_event(struct machine_check_event *evt); static void machine_process_ue_event(struct work_struct *work); -static struct irq_work mce_event_process_work = { - .func = machine_check_process_queued_event, -}; - -static struct irq_work mce_ue_event_irq_work = { - .func = machine_check_ue_irq_work, -}; - static DECLARE_WORK(mce_ue_event_work, machine_process_ue_event); static BLOCKING_NOTIFIER_HEAD(mce_notifier_list); @@ -89,6 +79,13 @@ static void mce_set_error_info(struct machine_check_event *mce, } } +void mce_irq_work_queue(void) +{ + /* Raise decrementer interrupt */ + arch_irq_work_raise(); + set_mce_pending_irq_work(); +} + /* * Decode and save high level MCE information into per cpu buffer which * is an array of machine_check_event structure. @@ -217,7 +214,7 @@ void release_mce_event(void) get_mce_event(NULL, true); } -static void machine_check_ue_irq_work(struct irq_work *work) +static void machine_check_ue_work(void) { schedule_work(&mce_ue_event_work); } @@ -239,7 +236,7 @@ static void machine_check_ue_event(struct machine_check_event *evt) evt, sizeof(*evt)); /* Queue work to process this event later. */ - irq_work_queue(&mce_ue_event_irq_work); + mce_irq_work_queue(); } /* @@ -249,7 +246,6 @@ void machine_check_queue_event(void) { int index; struct machine_check_event evt; - unsigned long msr; if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) return; @@ -263,20 +259,7 @@ void machine_check_queue_event(void) memcpy(&local_paca->mce_info->mce_event_queue[index], &evt, sizeof(evt)); - /* - * Queue irq work to process this event later. Before - * queuing the work enable translation for non radix LPAR, - * as irq_work_queue may try to access memory outside RMO - * region. - */ - if (!radix_enabled() && firmware_has_feature(FW_FEATURE_LPAR)) { - msr = mfmsr(); - mtmsr(msr | MSR_IR | MSR_DR); - irq_work_queue(&mce_event_process_work); - mtmsr(msr); - } else { - irq_work_queue(&mce_event_process_work); - } + mce_irq_work_queue(); } void mce_common_process_ue(struct pt_regs *regs, @@ -338,7 +321,7 @@ static void machine_process_ue_event(struct work_struct *work) * process pending MCE event from the mce event queue. This function will be * called during syscall exit. */ -static void machine_check_process_queued_event(struct irq_work *work) +static void machine_check_process_queued_event(void) { int index; struct machine_check_event *evt; @@ -363,6 +346,27 @@ static void machine_check_process_queued_event(struct irq_work *work) } } +void set_mce_pending_irq_work(void) +{ + local_paca->mce_pending_irq_work = 1; +} + +void clear_mce_pending_irq_work(void) +{ + local_paca->mce_pending_irq_work = 0; +} + +void mce_run_irq_context_handlers(void) +{ + if (unlikely(local_paca->mce_pending_irq_work)) { + if (ppc_md.machine_check_log_err) + ppc_md.machine_check_log_err(); + machine_check_process_queued_event(); + machine_check_ue_work(); + clear_mce_pending_irq_work(); + } +} + void machine_check_print_event_info(struct machine_check_event *evt, bool user_mode, bool in_guest) { diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index cd0b8b71ecdd..17598bba54eb 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -70,6 +70,7 @@ #include #include #include +#include /* powerpc clocksource/clockevent code */ @@ -638,6 +639,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) if (test_irq_work_pending()) { clear_irq_work_pending(); + mce_run_irq_context_handlers(); irq_work_run(); } diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 56c9ef9052e9..af162aeeae86 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -21,6 +21,7 @@ struct pt_regs; extern int pSeries_system_reset_exception(struct pt_regs *regs); extern int pSeries_machine_check_exception(struct pt_regs *regs); extern long pseries_machine_check_realmode(struct pt_regs *regs); +void pSeries_machine_check_log_err(void); #ifdef CONFIG_SMP extern void smp_init_pseries(void); diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 2a158e828c99..f12516c3998c 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -23,11 +23,6 @@ static DEFINE_SPINLOCK(ras_log_buf_lock); static int ras_check_exception_token; -static void mce_process_errlog_event(struct irq_work *work); -static struct irq_work mce_errlog_process_work = { - .func = mce_process_errlog_event, -}; - #define EPOW_SENSOR_TOKEN 9 #define EPOW_SENSOR_INDEX 0 @@ -745,7 +740,6 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) struct pseries_errorlog *pseries_log; struct pseries_mc_errorlog *mce_log = NULL; int disposition = rtas_error_disposition(errp); - unsigned long msr; u8 error_type; if (!rtas_error_extended(errp)) @@ -759,40 +753,16 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) error_type = mce_log->error_type; disposition = mce_handle_err_realmode(disposition, error_type); - - /* - * Enable translation as we will be accessing per-cpu variables - * in save_mce_event() which may fall outside RMO region, also - * leave it enabled because subsequently we will be queuing work - * to workqueues where again per-cpu variables accessed, besides - * fwnmi_release_errinfo() crashes when called in realmode on - * pseries. - * Note: All the realmode handling like flushing SLB entries for - * SLB multihit is done by now. - */ out: - msr = mfmsr(); - mtmsr(msr | MSR_IR | MSR_DR); - disposition = mce_handle_err_virtmode(regs, errp, mce_log, disposition); - - /* - * Queue irq work to log this rtas event later. - * irq_work_queue uses per-cpu variables, so do this in virt - * mode as well. - */ - irq_work_queue(&mce_errlog_process_work); - - mtmsr(msr); - return disposition; } /* * Process MCE rtas errlog event. */ -static void mce_process_errlog_event(struct irq_work *work) +void pSeries_machine_check_log_err(void) { struct rtas_error_log *err; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 83a04d967a59..069d7b3bb142 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -1086,6 +1086,7 @@ define_machine(pseries) { .system_reset_exception = pSeries_system_reset_exception, .machine_check_early = pseries_machine_check_realmode, .machine_check_exception = pSeries_machine_check_exception, + .machine_check_log_err = pSeries_machine_check_log_err, #ifdef CONFIG_KEXEC_CORE .machine_kexec = pSeries_machine_kexec, .kexec_cpu_down = pseries_kexec_cpu_down, From 3c14b73454cf9f6e2146443fdfbdfb912c0efed3 Mon Sep 17 00:00:00 2001 From: "Pratik R. Sampat" Date: Thu, 17 Feb 2022 16:23:20 +0530 Subject: [PATCH 263/380] powerpc/pseries: Interface to represent PAPR firmware attributes Adds a syscall interface to represent the energy and frequency related PAPR attributes on the system using the new H_CALL "H_GET_ENERGY_SCALE_INFO". H_GET_EM_PARMS H_CALL was previously responsible for exporting this information in the lparcfg, however the H_GET_EM_PARMS H_CALL will be deprecated P10 onwards. The H_GET_ENERGY_SCALE_INFO H_CALL is of the following call format: hcall( uint64 H_GET_ENERGY_SCALE_INFO, // Get energy scale info uint64 flags, // Per the flag request uint64 firstAttributeId,// The attribute id uint64 bufferAddress, // Guest physical address of the output buffer uint64 bufferSize // The size in bytes of the output buffer ); As specified in PAPR+ v2.11, section 14.14.3. This H_CALL can query either all the attributes at once with firstAttributeId = 0, flags = 0 as well as query only one attribute at a time with firstAttributeId = id, flags = 1. The output buffer consists of the following 1. number of attributes - 8 bytes 2. array offset to the data location - 8 bytes 3. version info - 1 byte 4. A data array of size num attributes, which contains the following: a. attribute ID - 8 bytes b. attribute value in number - 8 bytes c. attribute name in string - 64 bytes d. attribute value in string - 64 bytes The new H_CALL exports information in direct string value format, hence a new interface has been introduced in /sys/firmware/papr/energy_scale_info to export this information to userspace so that the firmware can add new values without the need for the kernel to be changed. The H_CALL returns the name, numeric value and string value (if exists) The format of exposing the sysfs information is as follows: /sys/firmware/papr/energy_scale_info/ |-- / |-- desc |-- value |-- value_desc (if exists) |-- / |-- desc |-- value |-- value_desc (if exists) ... The energy information that is exported is useful for userspace tools such as powerpc-utils. Currently these tools infer the "power_mode_data" value in the lparcfg, which in turn is obtained from the to be deprecated H_GET_EM_PARMS H_CALL. On future platforms, such userspace utilities will have to look at the data returned from the new H_CALL being populated in this new sysfs interface and report this information directly without the need of interpretation. Signed-off-by: Pratik R. Sampat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220217105321.52941-2-psampat@linux.ibm.com --- .../sysfs-firmware-papr-energy-scale-info | 29 ++ arch/powerpc/include/asm/firmware.h | 4 +- arch/powerpc/include/asm/hvcall.h | 3 +- arch/powerpc/kvm/trace_hv.h | 1 + arch/powerpc/platforms/pseries/Makefile | 3 +- arch/powerpc/platforms/pseries/firmware.c | 1 + .../pseries/papr_platform_attributes.c | 361 ++++++++++++++++++ 7 files changed, 399 insertions(+), 3 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-firmware-papr-energy-scale-info create mode 100644 arch/powerpc/platforms/pseries/papr_platform_attributes.c diff --git a/Documentation/ABI/testing/sysfs-firmware-papr-energy-scale-info b/Documentation/ABI/testing/sysfs-firmware-papr-energy-scale-info new file mode 100644 index 000000000000..141a6b371469 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-firmware-papr-energy-scale-info @@ -0,0 +1,29 @@ +What: /sys/firmware/papr/energy_scale_info +Date: February 2022 +Contact: Linux for PowerPC mailing list +Description: Directory hosting a set of platform attributes like + energy/frequency on Linux running as a PAPR guest. + + Each file in a directory contains a platform + attribute hierarchy pertaining to performance/ + energy-savings mode and processor frequency. + +What: /sys/firmware/papr/energy_scale_info/ +Date: February 2022 +Contact: Linux for PowerPC mailing list +Description: Energy, frequency attributes directory for POWERVM servers + +What: /sys/firmware/papr/energy_scale_info//desc +Date: February 2022 +Contact: Linux for PowerPC mailing list +Description: String description of the energy attribute of + +What: /sys/firmware/papr/energy_scale_info//value +Date: February 2022 +Contact: Linux for PowerPC mailing list +Description: Numeric value of the energy attribute of + +What: /sys/firmware/papr/energy_scale_info//value_desc +Date: February 2022 +Contact: Linux for PowerPC mailing list +Description: String value of the energy attribute of diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 9b702d2b80fb..8dddd34b8ecf 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -54,6 +54,7 @@ #define FW_FEATURE_STUFF_TCE ASM_CONST(0x0000008000000000) #define FW_FEATURE_RPT_INVALIDATE ASM_CONST(0x0000010000000000) #define FW_FEATURE_FORM2_AFFINITY ASM_CONST(0x0000020000000000) +#define FW_FEATURE_ENERGY_SCALE_INFO ASM_CONST(0x0000040000000000) #ifndef __ASSEMBLY__ @@ -74,7 +75,8 @@ enum { FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 | FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE | FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR | - FW_FEATURE_RPT_INVALIDATE | FW_FEATURE_FORM2_AFFINITY, + FW_FEATURE_RPT_INVALIDATE | FW_FEATURE_FORM2_AFFINITY | + FW_FEATURE_ENERGY_SCALE_INFO, FW_FEATURE_PSERIES_ALWAYS = 0, FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_ULTRAVISOR, FW_FEATURE_POWERNV_ALWAYS = 0, diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 9bcf345cb208..48f510ba9f4a 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -323,7 +323,8 @@ #define H_SCM_PERFORMANCE_STATS 0x418 #define H_RPT_INVALIDATE 0x448 #define H_SCM_FLUSH 0x44C -#define MAX_HCALL_OPCODE H_SCM_FLUSH +#define H_GET_ENERGY_SCALE_INFO 0x450 +#define MAX_HCALL_OPCODE H_GET_ENERGY_SCALE_INFO /* Scope args for H_SCM_UNBIND_ALL */ #define H_UNBIND_SCOPE_ALL (0x1) diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index 830a126e095d..38cd0ed0a617 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -115,6 +115,7 @@ {H_VASI_STATE, "H_VASI_STATE"}, \ {H_ENABLE_CRQ, "H_ENABLE_CRQ"}, \ {H_GET_EM_PARMS, "H_GET_EM_PARMS"}, \ + {H_GET_ENERGY_SCALE_INFO, "H_GET_ENERGY_SCALE_INFO"}, \ {H_SET_MPP, "H_SET_MPP"}, \ {H_GET_MPP, "H_GET_MPP"}, \ {H_HOME_NODE_ASSOCIATIVITY, "H_HOME_NODE_ASSOCIATIVITY"}, \ diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 29b522d2c755..9764e1a2ed5c 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -6,7 +6,8 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \ of_helpers.o \ setup.o iommu.o event_sources.o ras.o \ firmware.o power.o dlpar.o mobility.o rng.o \ - pci.o pci_dlpar.o eeh_pseries.o msi.o + pci.o pci_dlpar.o eeh_pseries.o msi.o \ + papr_platform_attributes.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_KEXEC_CORE) += kexec.o obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c index f162156b7b68..09c119b2f623 100644 --- a/arch/powerpc/platforms/pseries/firmware.c +++ b/arch/powerpc/platforms/pseries/firmware.c @@ -66,6 +66,7 @@ hypertas_fw_features_table[] = { {FW_FEATURE_BLOCK_REMOVE, "hcall-block-remove"}, {FW_FEATURE_PAPR_SCM, "hcall-scm"}, {FW_FEATURE_RPT_INVALIDATE, "hcall-rpt-invalidate"}, + {FW_FEATURE_ENERGY_SCALE_INFO, "hcall-energy-scale-info"}, }; /* Build up the firmware features bitmask using the contents of diff --git a/arch/powerpc/platforms/pseries/papr_platform_attributes.c b/arch/powerpc/platforms/pseries/papr_platform_attributes.c new file mode 100644 index 000000000000..515150417bb3 --- /dev/null +++ b/arch/powerpc/platforms/pseries/papr_platform_attributes.c @@ -0,0 +1,361 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Platform energy and frequency attributes driver + * + * This driver creates a sys file at /sys/firmware/papr/ which encapsulates a + * directory structure containing files in keyword - value pairs that specify + * energy and frequency configuration of the system. + * + * The format of exposing the sysfs information is as follows: + * /sys/firmware/papr/energy_scale_info/ + * |-- / + * |-- desc + * |-- value + * |-- value_desc (if exists) + * |-- / + * |-- desc + * |-- value + * |-- value_desc (if exists) + * + * Copyright 2022 IBM Corp. + */ + +#include +#include + +#include "pseries.h" + +/* + * Flag attributes to fetch either all or one attribute from the HCALL + * flag = BE(0) => fetch all attributes with firstAttributeId = 0 + * flag = BE(1) => fetch a single attribute with firstAttributeId = id + */ +#define ESI_FLAGS_ALL 0 +#define ESI_FLAGS_SINGLE (1ull << 63) + +#define KOBJ_MAX_ATTRS 3 + +#define ESI_HDR_SIZE sizeof(struct h_energy_scale_info_hdr) +#define ESI_ATTR_SIZE sizeof(struct energy_scale_attribute) +#define CURR_MAX_ESI_ATTRS 8 + +struct energy_scale_attribute { + __be64 id; + __be64 val; + u8 desc[64]; + u8 value_desc[64]; +} __packed; + +struct h_energy_scale_info_hdr { + __be64 num_attrs; + __be64 array_offset; + u8 data_header_version; +} __packed; + +struct papr_attr { + u64 id; + struct kobj_attribute kobj_attr; +}; + +struct papr_group { + struct attribute_group pg; + struct papr_attr pgattrs[KOBJ_MAX_ATTRS]; +}; + +static struct papr_group *papr_groups; +/* /sys/firmware/papr */ +static struct kobject *papr_kobj; +/* /sys/firmware/papr/energy_scale_info */ +static struct kobject *esi_kobj; + +/* + * Energy modes can change dynamically hence making a new hcall each time the + * information needs to be retrieved + */ +static int papr_get_attr(u64 id, struct energy_scale_attribute *esi) +{ + int esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * ESI_ATTR_SIZE); + int ret, max_esi_attrs = CURR_MAX_ESI_ATTRS; + struct energy_scale_attribute *curr_esi; + struct h_energy_scale_info_hdr *hdr; + char *buf; + + buf = kmalloc(esi_buf_size, GFP_KERNEL); + if (buf == NULL) + return -ENOMEM; + +retry: + ret = plpar_hcall_norets(H_GET_ENERGY_SCALE_INFO, ESI_FLAGS_SINGLE, + id, virt_to_phys(buf), + esi_buf_size); + + /* + * If the hcall fails with not enough memory for either the + * header or data, attempt to allocate more + */ + if (ret == H_PARTIAL || ret == H_P4) { + char *temp_buf; + + max_esi_attrs += 4; + esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * max_esi_attrs); + + temp_buf = krealloc(buf, esi_buf_size, GFP_KERNEL); + if (temp_buf) + buf = temp_buf; + else + return -ENOMEM; + + goto retry; + } + + if (ret != H_SUCCESS) { + pr_warn("hcall failed: H_GET_ENERGY_SCALE_INFO"); + ret = -EIO; + goto out_buf; + } + + hdr = (struct h_energy_scale_info_hdr *) buf; + curr_esi = (struct energy_scale_attribute *) + (buf + be64_to_cpu(hdr->array_offset)); + + if (esi_buf_size < + be64_to_cpu(hdr->array_offset) + (be64_to_cpu(hdr->num_attrs) + * sizeof(struct energy_scale_attribute))) { + ret = -EIO; + goto out_buf; + } + + *esi = *curr_esi; + +out_buf: + kfree(buf); + + return ret; +} + +/* + * Extract and export the description of the energy scale attributes + */ +static ssize_t desc_show(struct kobject *kobj, + struct kobj_attribute *kobj_attr, + char *buf) +{ + struct papr_attr *pattr = container_of(kobj_attr, struct papr_attr, + kobj_attr); + struct energy_scale_attribute esi; + int ret; + + ret = papr_get_attr(pattr->id, &esi); + if (ret) + return ret; + + return sysfs_emit(buf, "%s\n", esi.desc); +} + +/* + * Extract and export the numeric value of the energy scale attributes + */ +static ssize_t val_show(struct kobject *kobj, + struct kobj_attribute *kobj_attr, + char *buf) +{ + struct papr_attr *pattr = container_of(kobj_attr, struct papr_attr, + kobj_attr); + struct energy_scale_attribute esi; + int ret; + + ret = papr_get_attr(pattr->id, &esi); + if (ret) + return ret; + + return sysfs_emit(buf, "%llu\n", be64_to_cpu(esi.val)); +} + +/* + * Extract and export the value description in string format of the energy + * scale attributes + */ +static ssize_t val_desc_show(struct kobject *kobj, + struct kobj_attribute *kobj_attr, + char *buf) +{ + struct papr_attr *pattr = container_of(kobj_attr, struct papr_attr, + kobj_attr); + struct energy_scale_attribute esi; + int ret; + + ret = papr_get_attr(pattr->id, &esi); + if (ret) + return ret; + + return sysfs_emit(buf, "%s\n", esi.value_desc); +} + +static struct papr_ops_info { + const char *attr_name; + ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *kobj_attr, + char *buf); +} ops_info[KOBJ_MAX_ATTRS] = { + { "desc", desc_show }, + { "value", val_show }, + { "value_desc", val_desc_show }, +}; + +static void add_attr(u64 id, int index, struct papr_attr *attr) +{ + attr->id = id; + sysfs_attr_init(&attr->kobj_attr.attr); + attr->kobj_attr.attr.name = ops_info[index].attr_name; + attr->kobj_attr.attr.mode = 0444; + attr->kobj_attr.show = ops_info[index].show; +} + +static int add_attr_group(u64 id, struct papr_group *pg, bool show_val_desc) +{ + int i; + + for (i = 0; i < KOBJ_MAX_ATTRS; i++) { + if (!strcmp(ops_info[i].attr_name, "value_desc") && + !show_val_desc) { + continue; + } + add_attr(id, i, &pg->pgattrs[i]); + pg->pg.attrs[i] = &pg->pgattrs[i].kobj_attr.attr; + } + + return sysfs_create_group(esi_kobj, &pg->pg); +} + + +static int __init papr_init(void) +{ + int esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * ESI_ATTR_SIZE); + int ret, idx, i, max_esi_attrs = CURR_MAX_ESI_ATTRS; + struct h_energy_scale_info_hdr *esi_hdr; + struct energy_scale_attribute *esi_attrs; + uint64_t num_attrs; + char *esi_buf; + + if (!firmware_has_feature(FW_FEATURE_LPAR) || + !firmware_has_feature(FW_FEATURE_ENERGY_SCALE_INFO)) { + return -ENXIO; + } + + esi_buf = kmalloc(esi_buf_size, GFP_KERNEL); + if (esi_buf == NULL) + return -ENOMEM; + /* + * hcall( + * uint64 H_GET_ENERGY_SCALE_INFO, // Get energy scale info + * uint64 flags, // Per the flag request + * uint64 firstAttributeId, // The attribute id + * uint64 bufferAddress, // Guest physical address of the output buffer + * uint64 bufferSize); // The size in bytes of the output buffer + */ +retry: + + ret = plpar_hcall_norets(H_GET_ENERGY_SCALE_INFO, ESI_FLAGS_ALL, 0, + virt_to_phys(esi_buf), esi_buf_size); + + /* + * If the hcall fails with not enough memory for either the + * header or data, attempt to allocate more + */ + if (ret == H_PARTIAL || ret == H_P4) { + char *temp_esi_buf; + + max_esi_attrs += 4; + esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * max_esi_attrs); + + temp_esi_buf = krealloc(esi_buf, esi_buf_size, GFP_KERNEL); + if (temp_esi_buf) + esi_buf = temp_esi_buf; + else + return -ENOMEM; + + goto retry; + } + + if (ret != H_SUCCESS) { + pr_warn("hcall failed: H_GET_ENERGY_SCALE_INFO, ret: %d\n", ret); + goto out_free_esi_buf; + } + + esi_hdr = (struct h_energy_scale_info_hdr *) esi_buf; + num_attrs = be64_to_cpu(esi_hdr->num_attrs); + esi_attrs = (struct energy_scale_attribute *) + (esi_buf + be64_to_cpu(esi_hdr->array_offset)); + + if (esi_buf_size < + be64_to_cpu(esi_hdr->array_offset) + + (num_attrs * sizeof(struct energy_scale_attribute))) { + goto out_free_esi_buf; + } + + papr_groups = kcalloc(num_attrs, sizeof(*papr_groups), GFP_KERNEL); + if (!papr_groups) + goto out_free_esi_buf; + + papr_kobj = kobject_create_and_add("papr", firmware_kobj); + if (!papr_kobj) { + pr_warn("kobject_create_and_add papr failed\n"); + goto out_papr_groups; + } + + esi_kobj = kobject_create_and_add("energy_scale_info", papr_kobj); + if (!esi_kobj) { + pr_warn("kobject_create_and_add energy_scale_info failed\n"); + goto out_kobj; + } + + /* Allocate the groups before registering */ + for (idx = 0; idx < num_attrs; idx++) { + papr_groups[idx].pg.attrs = kcalloc(KOBJ_MAX_ATTRS + 1, + sizeof(*papr_groups[idx].pg.attrs), + GFP_KERNEL); + if (!papr_groups[idx].pg.attrs) + goto out_pgattrs; + + papr_groups[idx].pg.name = kasprintf(GFP_KERNEL, "%lld", + be64_to_cpu(esi_attrs[idx].id)); + if (papr_groups[idx].pg.name == NULL) + goto out_pgattrs; + } + + for (idx = 0; idx < num_attrs; idx++) { + bool show_val_desc = true; + + /* Do not add the value desc attr if it does not exist */ + if (strnlen(esi_attrs[idx].value_desc, + sizeof(esi_attrs[idx].value_desc)) == 0) + show_val_desc = false; + + if (add_attr_group(be64_to_cpu(esi_attrs[idx].id), + &papr_groups[idx], + show_val_desc)) { + pr_warn("Failed to create papr attribute group %s\n", + papr_groups[idx].pg.name); + idx = num_attrs; + goto out_pgattrs; + } + } + + kfree(esi_buf); + return 0; +out_pgattrs: + for (i = 0; i < idx ; i++) { + kfree(papr_groups[i].pg.attrs); + kfree(papr_groups[i].pg.name); + } + kobject_put(esi_kobj); +out_kobj: + kobject_put(papr_kobj); +out_papr_groups: + kfree(papr_groups); +out_free_esi_buf: + kfree(esi_buf); + + return -ENOMEM; +} + +machine_device_initcall(pseries, papr_init); From 57201d657eb76d735298405d3200a3b1f67197e1 Mon Sep 17 00:00:00 2001 From: "Pratik R. Sampat" Date: Thu, 17 Feb 2022 16:23:21 +0530 Subject: [PATCH 264/380] selftest/powerpc: Add PAPR sysfs attributes sniff test Include a testcase to check if the sysfs files for energy and frequency related have its related attribute files exist and populated Signed-off-by: Pratik R. Sampat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220217105321.52941-3-psampat@linux.ibm.com --- tools/testing/selftests/powerpc/Makefile | 1 + .../powerpc/papr_attributes/.gitignore | 2 + .../powerpc/papr_attributes/Makefile | 7 ++ .../powerpc/papr_attributes/attr_test.c | 107 ++++++++++++++++++ 4 files changed, 117 insertions(+) create mode 100644 tools/testing/selftests/powerpc/papr_attributes/.gitignore create mode 100644 tools/testing/selftests/powerpc/papr_attributes/Makefile create mode 100644 tools/testing/selftests/powerpc/papr_attributes/attr_test.c diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index 4830372d7416..6ba95cd19e42 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -30,6 +30,7 @@ SUB_DIRS = alignment \ eeh \ vphn \ math \ + papr_attributes \ ptrace \ security \ mce diff --git a/tools/testing/selftests/powerpc/papr_attributes/.gitignore b/tools/testing/selftests/powerpc/papr_attributes/.gitignore new file mode 100644 index 000000000000..d5f42b6d9e99 --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_attributes/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +attr_test diff --git a/tools/testing/selftests/powerpc/papr_attributes/Makefile b/tools/testing/selftests/powerpc/papr_attributes/Makefile new file mode 100644 index 000000000000..e899712d49db --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_attributes/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +TEST_GEN_PROGS := attr_test + +top_srcdir = ../../../../.. +include ../../lib.mk + +$(TEST_GEN_PROGS): ../harness.c ../utils.c \ No newline at end of file diff --git a/tools/testing/selftests/powerpc/papr_attributes/attr_test.c b/tools/testing/selftests/powerpc/papr_attributes/attr_test.c new file mode 100644 index 000000000000..bab0dc06e90b --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_attributes/attr_test.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PAPR Energy attributes sniff test + * This checks if the papr folders and contents are populated relating to + * the energy and frequency attributes + * + * Copyright 2022, Pratik Rajesh Sampat, IBM Corp. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" + +enum energy_freq_attrs { + POWER_PERFORMANCE_MODE = 1, + IDLE_POWER_SAVER_STATUS = 2, + MIN_FREQ = 3, + STAT_FREQ = 4, + MAX_FREQ = 6, + PROC_FOLDING_STATUS = 8 +}; + +enum type { + INVALID, + STR_VAL, + NUM_VAL +}; + +int value_type(int id) +{ + int val_type; + + switch (id) { + case POWER_PERFORMANCE_MODE: + case IDLE_POWER_SAVER_STATUS: + val_type = STR_VAL; + break; + case MIN_FREQ: + case STAT_FREQ: + case MAX_FREQ: + case PROC_FOLDING_STATUS: + val_type = NUM_VAL; + break; + default: + val_type = INVALID; + } + + return val_type; +} + +int verify_energy_info(void) +{ + const char *path = "/sys/firmware/papr/energy_scale_info"; + struct dirent *entry; + struct stat s; + DIR *dirp; + + if (stat(path, &s) || !S_ISDIR(s.st_mode)) + return -1; + dirp = opendir(path); + + while ((entry = readdir(dirp)) != NULL) { + char file_name[64]; + int id, attr_type; + FILE *f; + + if (strcmp(entry->d_name, ".") == 0 || + strcmp(entry->d_name, "..") == 0) + continue; + + id = atoi(entry->d_name); + attr_type = value_type(id); + if (attr_type == INVALID) + return -1; + + /* Check if the files exist and have data in them */ + sprintf(file_name, "%s/%d/desc", path, id); + f = fopen(file_name, "r"); + if (!f || fgetc(f) == EOF) + return -1; + + sprintf(file_name, "%s/%d/value", path, id); + f = fopen(file_name, "r"); + if (!f || fgetc(f) == EOF) + return -1; + + if (attr_type == STR_VAL) { + sprintf(file_name, "%s/%d/value_desc", path, id); + f = fopen(file_name, "r"); + if (!f || fgetc(f) == EOF) + return -1; + } + } + + return 0; +} + +int main(void) +{ + return test_harness(verify_energy_info, "papr_attributes"); +} From 9bdb2eec3dde4d66b71cb4bbaebde0caed2cf0e3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 17 Feb 2022 13:01:56 +0100 Subject: [PATCH 265/380] powerpc/ftrace: Don't use lmw/stmw in ftrace_regs_caller() For the same reason as commit a85c728cb5e1 ("powerpc/32: Don't use lmw/stmw for saving/restoring non volatile regs"), don't use lmw/stmw in ftrace_regs_caller(). Use the same macros for PPC32 and PPC64. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ec286d2cc6989668a96f14543275437d2f3f0e3a.1645099283.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/trace/ftrace_mprofile.S | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S index 89639e64acd1..76dab07fd8fd 100644 --- a/arch/powerpc/kernel/trace/ftrace_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S @@ -43,18 +43,16 @@ _GLOBAL(ftrace_regs_caller) /* Save all gprs to pt_regs */ SAVE_GPR(0, r1) -#ifdef CONFIG_PPC64 SAVE_GPRS(2, 11, r1) +#ifdef CONFIG_PPC64 /* Ok to continue? */ lbz r3, PACA_FTRACE_ENABLED(r13) cmpdi r3, 0 beq ftrace_no_trace +#endif SAVE_GPRS(12, 31, r1) -#else - stmw r2, GPR2(r1) -#endif /* Save previous stack pointer (r1) */ addi r8, r1, SWITCH_FRAME_SIZE @@ -120,11 +118,7 @@ ftrace_regs_call: #endif /* Restore gprs */ -#ifdef CONFIG_PPC64 REST_GPRS(2, 31, r1) -#else - lmw r2, GPR2(r1) -#endif /* Restore possibly modified LR */ PPC_LL r0, _LINK(r1) From 228216716cb5f9a19d70bfc40bdc0d6a7fb7e72f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 17 Feb 2022 13:01:57 +0100 Subject: [PATCH 266/380] powerpc/ftrace: Refactor ftrace_{regs_}caller ftrace_caller() and frace_regs_caller() have now a lot in common. Refactor them using GAS macros. Signed-off-by: Christophe Leroy Reviewed-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9d7df9e4fc98a86051489f61d3c9bc67f92f7e27.1645099283.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/trace/ftrace_mprofile.S | 147 ++++++-------------- 1 file changed, 45 insertions(+), 102 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S index 76dab07fd8fd..630b2de9957b 100644 --- a/arch/powerpc/kernel/trace/ftrace_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S @@ -32,7 +32,7 @@ * Our job is to save the register state into a struct pt_regs (on the stack) * and then arrange for the ftrace function to be called. */ -_GLOBAL(ftrace_regs_caller) +.macro ftrace_regs_entry allregs /* Save the original return address in A's stack frame */ #ifdef CONFIG_MPROFILE_KERNEL PPC_STL r0,LRSAVE(r1) @@ -43,7 +43,7 @@ _GLOBAL(ftrace_regs_caller) /* Save all gprs to pt_regs */ SAVE_GPR(0, r1) - SAVE_GPRS(2, 11, r1) + SAVE_GPRS(3, 10, r1) #ifdef CONFIG_PPC64 /* Ok to continue? */ @@ -52,17 +52,29 @@ _GLOBAL(ftrace_regs_caller) beq ftrace_no_trace #endif - SAVE_GPRS(12, 31, r1) + .if \allregs == 1 + SAVE_GPR(2, r1) + SAVE_GPRS(11, 31, r1) + .else +#ifdef CONFIG_LIVEPATCH_64 + SAVE_GPR(14, r1) +#endif + .endif /* Save previous stack pointer (r1) */ addi r8, r1, SWITCH_FRAME_SIZE PPC_STL r8, GPR1(r1) + .if \allregs == 1 /* Load special regs for save below */ mfmsr r8 mfctr r9 mfxer r10 mfcr r11 + .else + /* Clear MSR to flag as ftrace_caller versus frace_regs_caller */ + li r8, 0 + .endif /* Get the _mcount() call site out of LR */ mflr r7 @@ -96,19 +108,17 @@ _GLOBAL(ftrace_regs_caller) /* Save special regs */ PPC_STL r8, _MSR(r1) + .if \allregs == 1 PPC_STL r9, _CTR(r1) PPC_STL r10, _XER(r1) PPC_STL r11, _CCR(r1) + .endif /* Load &pt_regs in r6 for call below */ addi r6, r1, STACK_FRAME_OVERHEAD +.endm - /* ftrace_call(r3, r4, r5, r6) */ -.globl ftrace_regs_call -ftrace_regs_call: - bl ftrace_stub - nop - +.macro ftrace_regs_exit allregs /* Load ctr with the possibly modified NIP */ PPC_LL r3, _NIP(r1) mtctr r3 @@ -118,7 +128,14 @@ ftrace_regs_call: #endif /* Restore gprs */ + .if \allregs == 1 REST_GPRS(2, 31, r1) + .else + REST_GPRS(3, 10, r1) +#ifdef CONFIG_LIVEPATCH_64 + REST_GPR(14, r1) +#endif + .endif /* Restore possibly modified LR */ PPC_LL r0, _LINK(r1) @@ -137,6 +154,25 @@ ftrace_regs_call: bne- livepatch_handler #endif bctr /* jump after _mcount site */ +.endm + +_GLOBAL(ftrace_regs_caller) + ftrace_regs_entry 1 + /* ftrace_call(r3, r4, r5, r6) */ +.globl ftrace_regs_call +ftrace_regs_call: + bl ftrace_stub + nop + ftrace_regs_exit 1 + +_GLOBAL(ftrace_caller) + ftrace_regs_entry 0 + /* ftrace_call(r3, r4, r5, r6) */ +.globl ftrace_call +ftrace_call: + bl ftrace_stub + nop + ftrace_regs_exit 0 _GLOBAL(ftrace_stub) blr @@ -151,99 +187,6 @@ ftrace_no_trace: bctr #endif -_GLOBAL(ftrace_caller) - /* Save the original return address in A's stack frame */ -#ifdef CONFIG_MPROFILE_KERNEL - PPC_STL r0, LRSAVE(r1) -#endif - - /* Create our stack frame + pt_regs */ - PPC_STLU r1, -SWITCH_FRAME_SIZE(r1) - - /* Save all gprs to pt_regs */ - SAVE_GPRS(3, 10, r1) - -#ifdef CONFIG_PPC64 - lbz r3, PACA_FTRACE_ENABLED(r13) - cmpdi r3, 0 - beq ftrace_no_trace -#endif - - /* Save previous stack pointer (r1) */ - addi r8, r1, SWITCH_FRAME_SIZE - PPC_STL r8, GPR1(r1) - - /* Get the _mcount() call site out of LR */ - mflr r7 - PPC_STL r7, _NIP(r1) - -#ifdef CONFIG_PPC64 - /* Save callee's TOC in the ABI compliant location */ - std r2, 24(r1) - ld r2, PACATOC(r13) /* get kernel TOC in r2 */ - - addis r3, r2, function_trace_op@toc@ha - addi r3, r3, function_trace_op@toc@l - ld r5, 0(r3) -#else - lis r3,function_trace_op@ha - lwz r5,function_trace_op@l(r3) -#endif - -#ifdef CONFIG_LIVEPATCH_64 - SAVE_GPR(14, r1) - mr r14, r7 /* remember old NIP */ -#endif - /* Calculate ip from nip-4 into r3 for call below */ - subi r3, r7, MCOUNT_INSN_SIZE - - /* Put the original return address in r4 as parent_ip */ - PPC_STL r0, _LINK(r1) - mr r4, r0 - - /* Clear MSR to flag as ftrace_caller versus frace_regs_caller */ - li r8, 0 - PPC_STL r8, _MSR(r1) - - /* Load &pt_regs in r6 for call below */ - addi r6, r1, STACK_FRAME_OVERHEAD - - /* ftrace_call(r3, r4, r5, r6) */ -.globl ftrace_call -ftrace_call: - bl ftrace_stub - nop - - PPC_LL r3, _NIP(r1) - mtctr r3 - -#ifdef CONFIG_LIVEPATCH_64 - cmpd r14, r3 /* has NIP been altered? */ - REST_GPR(14, r1) -#endif - - /* Restore gprs */ - REST_GPRS(3, 10, r1) - -#ifdef CONFIG_PPC64 - /* Restore callee's TOC */ - ld r2, 24(r1) -#endif - - /* Restore possibly modified LR */ - PPC_LL r0, _LINK(r1) - mtlr r0 - - /* Pop our stack frame */ - addi r1, r1, SWITCH_FRAME_SIZE - -#ifdef CONFIG_LIVEPATCH_64 - /* Based on the cmpd above, if the NIP was altered handle livepatch */ - bne- livepatch_handler -#endif - - bctr /* jump after _mcount site */ - #ifdef CONFIG_LIVEPATCH_64 /* * This function runs in the mcount context, between two functions. As From a5f04d1f2724db036ba4087873c0691881932bc9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 17 Feb 2022 13:01:58 +0100 Subject: [PATCH 267/380] powerpc/ftrace: Regroup PPC64 specific operations in ftrace_mprofile.S CONFIG_MPROFILE_KERNEL is only for PPC64 and ftrace_mprofile.o is build on PPC64 only when CONFIG_MPROFILE_KERNEL is defined. Move saving of r0 inside #ifdef PPC64 Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/619dfb672bf4f1b777a4b3f8b4f14e637fea2716.1645099283.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/trace/ftrace_mprofile.S | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S index 630b2de9957b..f5d31c458e6b 100644 --- a/arch/powerpc/kernel/trace/ftrace_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S @@ -33,11 +33,6 @@ * and then arrange for the ftrace function to be called. */ .macro ftrace_regs_entry allregs - /* Save the original return address in A's stack frame */ -#ifdef CONFIG_MPROFILE_KERNEL - PPC_STL r0,LRSAVE(r1) -#endif - /* Create our stack frame + pt_regs */ PPC_STLU r1,-SWITCH_FRAME_SIZE(r1) @@ -46,6 +41,8 @@ SAVE_GPRS(3, 10, r1) #ifdef CONFIG_PPC64 + /* Save the original return address in A's stack frame */ + std r0, LRSAVE+SWITCH_FRAME_SIZE(r1) /* Ok to continue? */ lbz r3, PACA_FTRACE_ENABLED(r13) cmpdi r3, 0 From 2ca48dbb210767b9e7166d7d47febc8fcd1da6e1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 17 Feb 2022 13:01:59 +0100 Subject: [PATCH 268/380] powerpc/ftrace: Use STK_GOT in ftrace_mprofile.S Instead of open coding offset value 24, use STK_GOT when accessing got register in stack. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9042bb30fa972056715fe5b6598a7c8049681293.1645099283.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/trace/ftrace_mprofile.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S index f5d31c458e6b..4fa23e260cab 100644 --- a/arch/powerpc/kernel/trace/ftrace_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S @@ -82,7 +82,7 @@ #ifdef CONFIG_PPC64 /* Save callee's TOC in the ABI compliant location */ - std r2, 24(r1) + std r2, STK_GOT(r1) ld r2,PACATOC(r13) /* get kernel TOC in r2 */ addis r3,r2,function_trace_op@toc@ha @@ -140,7 +140,7 @@ #ifdef CONFIG_PPC64 /* Restore callee's TOC */ - ld r2, 24(r1) + ld r2, STK_GOT(r1) #endif /* Pop our stack frame */ From e86debbbb5f89c2575110cfdce89d1820577aa94 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 4 Mar 2022 18:04:02 +0100 Subject: [PATCH 269/380] powerpc: Cleanup asm-prototypes.c Last call to sys_swapcontext() from ASM was removed by commit fbcee2ebe8ed ("powerpc/32: Always save non volatile GPRs at syscall entry") sys_debug_setcontext() prototype not needed anymore since commit f3675644e172 ("powerpc/syscalls: signal_{32, 64} - switch to SYSCALL_DEFINE") sys_switch_endian() prototype not needed anymore since commit 81dac8177862 ("powerpc/64: Make sys_switch_endian() traceable") Signed-off-by: Christophe Leroy [mpe: Keep _mcount() prototype to avoid modpost errors] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3ed660a585df2080ea8412ec20fbf652f5bf013a.1646413435.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/asm-prototypes.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 41b8a1e1144a..cca69093f03d 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -57,12 +57,7 @@ int enter_vmx_ops(void); void *exit_vmx_ops(void *dest); /* signals, syscalls and interrupts */ -long sys_swapcontext(struct ucontext __user *old_ctx, - struct ucontext __user *new_ctx, - long ctx_size); #ifdef CONFIG_PPC32 -long sys_debug_setcontext(struct ucontext __user *ctx, - int ndbg, struct sig_dbg_op __user *dbg); int ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct __kernel_old_timeval __user *tvp); @@ -81,7 +76,6 @@ unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs); long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, u32 len_high, u32 len_low); -long sys_switch_endian(void); /* prom_init (OpenFirmware) */ unsigned long __init prom_init(unsigned long r3, unsigned long r4, From e15c703be48edc3b2f96b66d4f548dc88b44266c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 4 Mar 2022 18:04:03 +0100 Subject: [PATCH 270/380] powerpc/smp: Declare current_set static current_set extern not needed anymore since commit eafd825ed710 ("powerpc/64: Simplify __secondary_start paca->kstack handling") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a55eb65c9d7319f0af3c31e3f6ba36522f10003d.1646413435.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/asm-prototypes.h | 1 - arch/powerpc/kernel/smp.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index cca69093f03d..ce4e355c96a2 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -20,7 +20,6 @@ #include /* SMP */ -extern struct task_struct *current_set[NR_CPUS]; extern struct task_struct *secondary_current; void start_secondary(void *unused); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index b7fd6a72aa76..7e30a6fe5adf 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -716,7 +716,7 @@ void smp_send_stop(void) } #endif /* CONFIG_NMI_IPI */ -struct task_struct *current_set[NR_CPUS]; +static struct task_struct *current_set[NR_CPUS]; static void smp_store_cpu_info(int id) { From a4abd55a2490fd6407ddb6810e41f64ebd66d3af Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 4 Mar 2022 18:04:04 +0100 Subject: [PATCH 271/380] powerpc/kexec: Declare kexec_paca static kexec_paca is exclusively used in kexec/core_64.c Declare it static. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/094983ee851644165b7700c73cac63cfe20596cd.1646413435.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/asm-prototypes.h | 2 -- arch/powerpc/kexec/core_64.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index ce4e355c96a2..eede369ba94f 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -24,9 +24,7 @@ extern struct task_struct *secondary_current; void start_secondary(void *unused); /* kexec */ -struct paca_struct; struct kimage; -extern struct paca_struct kexec_paca; void kexec_copy_flush(struct kimage *image); /* pseries hcall tracing */ diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c index 635b5fc30b53..2d49dce129f2 100644 --- a/arch/powerpc/kexec/core_64.c +++ b/arch/powerpc/kexec/core_64.c @@ -291,7 +291,7 @@ static union thread_union kexec_stack __init_task_data = * For similar reasons to the stack above, the kexecing CPU needs to be on a * static PACA; we switch to kexec_paca. */ -struct paca_struct kexec_paca; +static struct paca_struct kexec_paca; /* Our assembly helper, in misc_64.S */ extern void kexec_sequence(void *newstack, unsigned long start, From 6fad9ddc7d9f6dfb8a53c192f746c26e0d72ee35 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sat, 5 Mar 2022 21:01:49 -0800 Subject: [PATCH 272/380] xtensa: rename PT_SIZE to PT_KERNEL_SIZE PT_SIZE is used by the xtensa port to designate kernel exception frame size. In preparation for struct pt_regs size change rename PT_SIZE to PT_KERNEL_SIZE for clarity and change its definition to always cover only the kernel exception frame. Signed-off-by: Max Filippov Reviewed-by: Kees Cook --- arch/xtensa/kernel/asm-offsets.c | 2 +- arch/xtensa/kernel/entry.S | 14 +++++++------- arch/xtensa/kernel/vectors.S | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c index dc5c83cad9be..6c2fc3e1d77c 100644 --- a/arch/xtensa/kernel/asm-offsets.c +++ b/arch/xtensa/kernel/asm-offsets.c @@ -63,7 +63,7 @@ int main(void) DEFINE(PT_AREG15, offsetof (struct pt_regs, areg[15])); DEFINE(PT_WINDOWBASE, offsetof (struct pt_regs, windowbase)); DEFINE(PT_WINDOWSTART, offsetof(struct pt_regs, windowstart)); - DEFINE(PT_SIZE, sizeof(struct pt_regs)); + DEFINE(PT_KERNEL_SIZE, offsetof(struct pt_regs, areg[16])); DEFINE(PT_AREG_END, offsetof (struct pt_regs, areg[XCHAL_NUM_AREGS])); DEFINE(PT_USER_SIZE, offsetof(struct pt_regs, areg[XCHAL_NUM_AREGS])); DEFINE(PT_XTREGS_OPT, offsetof(struct pt_regs, xtregs_opt)); diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index a1029a5b6a1d..77a7c8da3ff5 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -341,8 +341,8 @@ KABI_W _bbsi.l a2, 3, 1f /* Copy spill slots of a0 and a1 to imitate movsp * in order to keep exception stack continuous */ - l32i a3, a1, PT_SIZE - l32i a0, a1, PT_SIZE + 4 + l32i a3, a1, PT_KERNEL_SIZE + l32i a0, a1, PT_KERNEL_SIZE + 4 s32e a3, a1, -16 s32e a0, a1, -12 #endif @@ -706,12 +706,12 @@ kernel_exception_exit: addi a0, a1, -16 l32i a3, a0, 0 l32i a4, a0, 4 - s32i a3, a1, PT_SIZE+0 - s32i a4, a1, PT_SIZE+4 + s32i a3, a1, PT_KERNEL_SIZE + 0 + s32i a4, a1, PT_KERNEL_SIZE + 4 l32i a3, a0, 8 l32i a4, a0, 12 - s32i a3, a1, PT_SIZE+8 - s32i a4, a1, PT_SIZE+12 + s32i a3, a1, PT_KERNEL_SIZE + 8 + s32i a4, a1, PT_KERNEL_SIZE + 12 /* Common exception exit. * We restore the special register and the current window frame, and @@ -821,7 +821,7 @@ ENTRY(debug_exception) bbsi.l a2, PS_UM_BIT, 2f # jump if user mode - addi a2, a1, -16-PT_SIZE # assume kernel stack + addi a2, a1, -16 - PT_KERNEL_SIZE # assume kernel stack 3: l32i a0, a3, DT_DEBUG_SAVE s32i a1, a2, PT_AREG1 diff --git a/arch/xtensa/kernel/vectors.S b/arch/xtensa/kernel/vectors.S index 407ece204e7c..1073fe4a584d 100644 --- a/arch/xtensa/kernel/vectors.S +++ b/arch/xtensa/kernel/vectors.S @@ -88,7 +88,7 @@ ENDPROC(_UserExceptionVector) * Kernel exception vector. (Exceptions with PS.UM == 0, PS.EXCM == 0) * * We get this exception when we were already in kernel space. - * We decrement the current stack pointer (kernel) by PT_SIZE and + * We decrement the current stack pointer (kernel) by PT_KERNEL_SIZE and * jump to the first-level handler associated with the exception cause. * * Note: we need to preserve space for the spill region. @@ -100,7 +100,7 @@ ENTRY(_KernelExceptionVector) xsr a3, excsave1 # save a3, and get dispatch table wsr a2, depc # save a2 - addi a2, a1, -16-PT_SIZE # adjust stack pointer + addi a2, a1, -16 - PT_KERNEL_SIZE # adjust stack pointer s32i a0, a2, PT_AREG0 # save a0 to ESF rsr a0, exccause # retrieve exception cause s32i a0, a2, PT_DEPC # mark it as a regular exception From 5b835d4cade1723c5944a231cc685801f5a5cc6c Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sat, 5 Mar 2022 21:05:54 -0800 Subject: [PATCH 273/380] xtensa: use XCHAL_NUM_AREGS as pt_regs::areg size struct pt_regs is used to access both kernel and user exception frames. User exception frames may contain up to XCHAL_NUM_AREG registers that task creation and signal delivery code may access, but pt_regs::areg array has only 16 entries that cover only the kernel exception frame. This results in the following build error: arch/xtensa/kernel/process.c: In function 'copy_thread': arch/xtensa/kernel/process.c:262:52: error: array subscript 53 is above array bounds of 'long unsigned int[16]' [-Werror=array-bounds] 262 | put_user(regs->areg[caller_ars+1], Change struct pt_regs::areg size to XCHAL_NUM_AREGS so that it covers the whole user exception frame. Adjust task_pt_regs and drop additional register copying code from copy_thread now that the whole user exception stack frame is copied. Reported-by: Kees Cook Signed-off-by: Max Filippov Reviewed-by: Kees Cook --- arch/xtensa/include/asm/ptrace.h | 7 +++---- arch/xtensa/kernel/process.c | 10 ---------- 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/arch/xtensa/include/asm/ptrace.h b/arch/xtensa/include/asm/ptrace.h index b109416dc07e..308f209a4740 100644 --- a/arch/xtensa/include/asm/ptrace.h +++ b/arch/xtensa/include/asm/ptrace.h @@ -44,6 +44,7 @@ #ifndef __ASSEMBLY__ #include +#include /* * This struct defines the way the registers are stored on the @@ -77,14 +78,12 @@ struct pt_regs { /* current register frame. * Note: The ESF for kernel exceptions ends after 16 registers! */ - unsigned long areg[16]; + unsigned long areg[XCHAL_NUM_AREGS]; }; -#include - # define arch_has_single_step() (1) # define task_pt_regs(tsk) ((struct pt_regs*) \ - (task_stack_page(tsk) + KERNEL_STACK_SIZE - (XCHAL_NUM_AREGS-16)*4) - 1) + (task_stack_page(tsk) + KERNEL_STACK_SIZE) - 1) # define user_mode(regs) (((regs)->ps & 0x00000020)!=0) # define instruction_pointer(regs) ((regs)->pc) # define return_pointer(regs) (MAKE_PC_FROM_RA((regs)->areg[0], \ diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index bd80df890b1e..e8bfbca5f001 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -232,10 +232,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn, p->thread.ra = MAKE_RA_FOR_CALL( (unsigned long)ret_from_fork, 0x1); - /* This does not copy all the regs. - * In a bout of brilliance or madness, - * ARs beyond a0-a15 exist past the end of the struct. - */ *childregs = *regs; childregs->areg[1] = usp; childregs->areg[2] = 0; @@ -265,14 +261,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn, childregs->wmask = 1; childregs->windowstart = 1; childregs->windowbase = 0; - } else { - int len = childregs->wmask & ~0xf; - memcpy(&childregs->areg[XCHAL_NUM_AREGS - len/4], - ®s->areg[XCHAL_NUM_AREGS - len/4], len); } - childregs->syscall = regs->syscall; - if (clone_flags & CLONE_SETTLS) childregs->threadptr = tls; } else { From 97dfad194ca8de04c7292d4f4c8dc493c0d20f85 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 17 Feb 2022 14:24:15 +0000 Subject: [PATCH 274/380] iommu/arm-smmu: Account for PMU interrupts In preparation for SMMUv2 PMU support, rejig our IRQ setup code to account for PMU interrupts as additional resources. We can simplify the whole flow by only storing the context IRQs, since the global IRQs are devres-managed and we never refer to them beyond the initial request. CC: Lad Prabhakar Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/b2a40caaf1622eb35c555074a0d72f4f0513cff9.1645106346.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 93 +++++++++++++-------------- drivers/iommu/arm/arm-smmu/arm-smmu.h | 5 +- 2 files changed, 46 insertions(+), 52 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 4bc75c4ce402..e50fcf37af58 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -807,7 +807,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, * Request context fault interrupt. Do this last to avoid the * handler seeing a half-initialised domain state. */ - irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx]; + irq = smmu->irqs[cfg->irptndx]; if (smmu->impl && smmu->impl->context_fault) context_fault = smmu->impl->context_fault; @@ -858,7 +858,7 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) arm_smmu_write_context_bank(smmu, cfg->cbndx); if (cfg->irptndx != ARM_SMMU_INVALID_IRPTNDX) { - irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx]; + irq = smmu->irqs[cfg->irptndx]; devm_free_irq(smmu->dev, irq, domain); } @@ -1951,8 +1951,8 @@ static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu) return ret; } -static int arm_smmu_device_acpi_probe(struct platform_device *pdev, - struct arm_smmu_device *smmu) +static int arm_smmu_device_acpi_probe(struct arm_smmu_device *smmu, + u32 *global_irqs, u32 *pmu_irqs) { struct device *dev = smmu->dev; struct acpi_iort_node *node = @@ -1968,7 +1968,8 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev, return ret; /* Ignore the configuration access interrupt */ - smmu->num_global_irqs = 1; + *global_irqs = 1; + *pmu_irqs = 0; if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK) smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK; @@ -1976,25 +1977,24 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev, return 0; } #else -static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev, - struct arm_smmu_device *smmu) +static inline int arm_smmu_device_acpi_probe(struct arm_smmu_device *smmu, + u32 *global_irqs, u32 *pmu_irqs) { return -ENODEV; } #endif -static int arm_smmu_device_dt_probe(struct platform_device *pdev, - struct arm_smmu_device *smmu) +static int arm_smmu_device_dt_probe(struct arm_smmu_device *smmu, + u32 *global_irqs, u32 *pmu_irqs) { const struct arm_smmu_match_data *data; - struct device *dev = &pdev->dev; + struct device *dev = smmu->dev; bool legacy_binding; - if (of_property_read_u32(dev->of_node, "#global-interrupts", - &smmu->num_global_irqs)) { - dev_err(dev, "missing #global-interrupts property\n"); - return -ENODEV; - } + if (of_property_read_u32(dev->of_node, "#global-interrupts", global_irqs)) + return dev_err_probe(dev, -ENODEV, + "missing #global-interrupts property\n"); + *pmu_irqs = 0; data = of_device_get_match_data(dev); smmu->version = data->version; @@ -2073,6 +2073,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev) struct arm_smmu_device *smmu; struct device *dev = &pdev->dev; int num_irqs, i, err; + u32 global_irqs, pmu_irqs; irqreturn_t (*global_fault)(int irq, void *dev); smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL); @@ -2083,10 +2084,9 @@ static int arm_smmu_device_probe(struct platform_device *pdev) smmu->dev = dev; if (dev->of_node) - err = arm_smmu_device_dt_probe(pdev, smmu); + err = arm_smmu_device_dt_probe(smmu, &global_irqs, &pmu_irqs); else - err = arm_smmu_device_acpi_probe(pdev, smmu); - + err = arm_smmu_device_acpi_probe(smmu, &global_irqs, &pmu_irqs); if (err) return err; @@ -2105,31 +2105,25 @@ static int arm_smmu_device_probe(struct platform_device *pdev) if (IS_ERR(smmu)) return PTR_ERR(smmu); - num_irqs = 0; - while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) { - num_irqs++; - if (num_irqs > smmu->num_global_irqs) - smmu->num_context_irqs++; - } + num_irqs = platform_irq_count(pdev); - if (!smmu->num_context_irqs) { - dev_err(dev, "found %d interrupts but expected at least %d\n", - num_irqs, smmu->num_global_irqs + 1); - return -ENODEV; - } + smmu->num_context_irqs = num_irqs - global_irqs - pmu_irqs; + if (smmu->num_context_irqs <= 0) + return dev_err_probe(dev, -ENODEV, + "found %d interrupts but expected at least %d\n", + num_irqs, global_irqs + pmu_irqs + 1); - smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs), - GFP_KERNEL); - if (!smmu->irqs) { - dev_err(dev, "failed to allocate %d irqs\n", num_irqs); - return -ENOMEM; - } + smmu->irqs = devm_kcalloc(dev, smmu->num_context_irqs, + sizeof(*smmu->irqs), GFP_KERNEL); + if (!smmu->irqs) + return dev_err_probe(dev, -ENOMEM, "failed to allocate %d irqs\n", + smmu->num_context_irqs); - for (i = 0; i < num_irqs; ++i) { - int irq = platform_get_irq(pdev, i); + for (i = 0; i < smmu->num_context_irqs; i++) { + int irq = platform_get_irq(pdev, global_irqs + pmu_irqs + i); if (irq < 0) - return -ENODEV; + return irq; smmu->irqs[i] = irq; } @@ -2165,17 +2159,18 @@ static int arm_smmu_device_probe(struct platform_device *pdev) else global_fault = arm_smmu_global_fault; - for (i = 0; i < smmu->num_global_irqs; ++i) { - err = devm_request_irq(smmu->dev, smmu->irqs[i], - global_fault, - IRQF_SHARED, - "arm-smmu global fault", - smmu); - if (err) { - dev_err(dev, "failed to request global IRQ %d (%u)\n", - i, smmu->irqs[i]); - return err; - } + for (i = 0; i < global_irqs; i++) { + int irq = platform_get_irq(pdev, i); + + if (irq < 0) + return irq; + + err = devm_request_irq(dev, irq, global_fault, IRQF_SHARED, + "arm-smmu global fault", smmu); + if (err) + return dev_err_probe(dev, err, + "failed to request global IRQ %d (%u)\n", + i, irq); } err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL, diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h index 432de2f742c3..2b9b42fb6f30 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h @@ -318,11 +318,10 @@ struct arm_smmu_device { unsigned long pa_size; unsigned long pgsize_bitmap; - u32 num_global_irqs; - u32 num_context_irqs; + int num_context_irqs; + int num_clks; unsigned int *irqs; struct clk_bulk_data *clks; - int num_clks; spinlock_t global_sync_lock; From d64e5e9120a6afc8ebb9e9b46c1302f13b16b68d Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Thu, 11 Nov 2021 14:31:46 +0100 Subject: [PATCH 275/380] s390/ap/zcrypt: debug feature improvements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds some debug feature improvements related to some failures happened in the past. With CEX8 the max request and response sizes have been extended but the user space applications did not rework their code and thus ran into receive buffer issues. This ffdc patch here helps with additional checks and debug feature messages in debugging and pointing to the root cause of some failures related to wrong buffer sizes. Signed-off-by: Harald Freudenberger Reviewed-by: Jürgen Christ Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_queue.c | 3 ++- drivers/s390/crypto/zcrypt_api.c | 19 +++++++++++++++ drivers/s390/crypto/zcrypt_msgtype50.c | 8 +++++++ drivers/s390/crypto/zcrypt_msgtype6.c | 32 +++++++++++++++++++++++--- 4 files changed, 58 insertions(+), 4 deletions(-) diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 1901449768dd..921a82a41e66 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -455,7 +455,7 @@ static ap_func_t *ap_jumptable[NR_AP_SM_STATES][NR_AP_SM_EVENTS] = { enum ap_sm_wait ap_sm_event(struct ap_queue *aq, enum ap_sm_event event) { - if (aq->dev_state > AP_DEV_STATE_UNINITIATED) + if (aq->config && aq->dev_state > AP_DEV_STATE_UNINITIATED) return ap_jumptable[aq->sm_state][event](aq); else return AP_SM_WAIT_NONE; @@ -915,6 +915,7 @@ void ap_queue_init_state(struct ap_queue *aq) spin_lock_bh(&aq->lock); aq->dev_state = AP_DEV_STATE_OPERATING; aq->sm_state = AP_SM_STATE_RESET_START; + aq->last_err_rc = 0; ap_wait(ap_sm_event(aq, AP_SM_EVENT_POLL)); spin_unlock_bh(&aq->lock); } diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 9811ab81f3c4..00f6859c4e72 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -714,6 +714,8 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms, spin_unlock(&zcrypt_list_lock); if (!pref_zq) { + ZCRYPT_DBF_DBG("%s no matching queue found => ENODEV\n", + __func__); rc = -ENODEV; goto out; } @@ -822,6 +824,8 @@ static long zcrypt_rsa_crt(struct ap_perms *perms, spin_unlock(&zcrypt_list_lock); if (!pref_zq) { + ZCRYPT_DBF_DBG("%s no matching queue found => ENODEV\n", + __func__); rc = -ENODEV; goto out; } @@ -940,6 +944,8 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms, spin_unlock(&zcrypt_list_lock); if (!pref_zq) { + ZCRYPT_DBF_DBG("%s no match for address %02x.%04x => ENODEV\n", + __func__, xcRB->user_defined, *domain); rc = -ENODEV; goto out; } @@ -1112,6 +1118,17 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms, spin_unlock(&zcrypt_list_lock); if (!pref_zq) { + if (targets && target_num == 1) { + ZCRYPT_DBF_DBG("%s no match for address %02x.%04x => ENODEV\n", + __func__, (int) targets->ap_id, + (int) targets->dom_id); + } else if (targets) { + ZCRYPT_DBF_DBG("%s no match for %d target addrs => ENODEV\n", + __func__, (int) target_num); + } else { + ZCRYPT_DBF_DBG("%s no match for address ff.ffff => ENODEV\n", + __func__); + } rc = -ENODEV; goto out_free; } @@ -1188,6 +1205,8 @@ static long zcrypt_rng(char *buffer) spin_unlock(&zcrypt_list_lock); if (!pref_zq) { + ZCRYPT_DBF_DBG("%s no matching queue found => ENODEV\n", + __func__); rc = -ENODEV; goto out; } diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c index f42e8c511184..b6dcdd2a66d4 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.c +++ b/drivers/s390/crypto/zcrypt_msgtype50.c @@ -497,6 +497,10 @@ static long zcrypt_cex2a_modexpo(struct zcrypt_queue *zq, ap_cancel_message(zq->queue, ap_msg); out: ap_msg->private = NULL; + if (rc) + ZCRYPT_DBF_DBG("%s send me cprb at dev=%02x.%04x rc=%d\n", + __func__, AP_QID_CARD(zq->queue->qid), + AP_QID_QUEUE(zq->queue->qid), rc); return rc; } @@ -542,6 +546,10 @@ static long zcrypt_cex2a_modexpo_crt(struct zcrypt_queue *zq, ap_cancel_message(zq->queue, ap_msg); out: ap_msg->private = NULL; + if (rc) + ZCRYPT_DBF_DBG("%s send crt cprb at dev=%02x.%04x rc=%d\n", + __func__, AP_QID_CARD(zq->queue->qid), + AP_QID_QUEUE(zq->queue->qid), rc); return rc; } diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index 8582dd0d6969..df283729191a 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -714,17 +714,31 @@ static int convert_type86_xcrb(bool userspace, struct zcrypt_queue *zq, char *data = reply->msg; /* Copy CPRB to user */ + if (xcRB->reply_control_blk_length < msg->fmt2.count1) { + ZCRYPT_DBF_DBG("%s reply_control_blk_length %u < required %u => EMSGSIZE\n", + __func__, xcRB->reply_control_blk_length, + msg->fmt2.count1); + return -EMSGSIZE; + } if (z_copy_to_user(userspace, xcRB->reply_control_blk_addr, data + msg->fmt2.offset1, msg->fmt2.count1)) return -EFAULT; xcRB->reply_control_blk_length = msg->fmt2.count1; /* Copy data buffer to user */ - if (msg->fmt2.count2) + if (msg->fmt2.count2) { + if (xcRB->reply_data_length < msg->fmt2.count2) { + ZCRYPT_DBF_DBG("%s reply_data_length %u < required %u => EMSGSIZE\n", + __func__, xcRB->reply_data_length, + msg->fmt2.count2); + return -EMSGSIZE; + } if (z_copy_to_user(userspace, xcRB->reply_data_addr, data + msg->fmt2.offset2, msg->fmt2.count2)) return -EFAULT; + } xcRB->reply_data_length = msg->fmt2.count2; + return 0; } @@ -744,8 +758,12 @@ static int convert_type86_ep11_xcrb(bool userspace, struct zcrypt_queue *zq, struct type86_fmt2_msg *msg = reply->msg; char *data = reply->msg; - if (xcRB->resp_len < msg->fmt2.count1) - return -EINVAL; + if (xcRB->resp_len < msg->fmt2.count1) { + ZCRYPT_DBF_DBG("%s resp_len %u < required %u => EMSGSIZE\n", + __func__, (unsigned int)xcRB->resp_len, + msg->fmt2.count1); + return -EMSGSIZE; + } /* Copy response CPRB to user */ if (z_copy_to_user(userspace, (char __force __user *)xcRB->resp, @@ -1167,6 +1185,10 @@ static long zcrypt_msgtype6_send_cprb(bool userspace, struct zcrypt_queue *zq, /* Signal pending. */ ap_cancel_message(zq->queue, ap_msg); out: + if (rc) + ZCRYPT_DBF_DBG("%s send cprb at dev=%02x.%04x rc=%d\n", + __func__, AP_QID_CARD(zq->queue->qid), + AP_QID_QUEUE(zq->queue->qid), rc); return rc; } @@ -1272,6 +1294,10 @@ static long zcrypt_msgtype6_send_ep11_cprb(bool userspace, struct zcrypt_queue * /* Signal pending. */ ap_cancel_message(zq->queue, ap_msg); out: + if (rc) + ZCRYPT_DBF_DBG("%s send cprb at dev=%02x.%04x rc=%d\n", + __func__, AP_QID_CARD(zq->queue->qid), + AP_QID_QUEUE(zq->queue->qid), rc); return rc; } From 985214af939b9935dac94aa6fb56c85039fb77e8 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 16 Nov 2021 14:54:19 +0100 Subject: [PATCH 276/380] s390/zcrypt: CEX8S exploitation support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds CEX8 exploitation support for the AP bus code, the zcrypt device driver zoo and the vfio device driver. Signed-off-by: Harald Freudenberger Reviewed-by: Jürgen Christ Signed-off-by: Vasily Gorbik --- arch/s390/include/uapi/asm/zcrypt.h | 2 +- drivers/s390/crypto/ap_bus.c | 12 ++--- drivers/s390/crypto/ap_bus.h | 1 + drivers/s390/crypto/vfio_ap_drv.c | 2 + drivers/s390/crypto/zcrypt_cex4.c | 75 ++++++++++++++++++++--------- 5 files changed, 63 insertions(+), 29 deletions(-) diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h index 22fd202856bc..2f04a5499d74 100644 --- a/arch/s390/include/uapi/asm/zcrypt.h +++ b/arch/s390/include/uapi/asm/zcrypt.h @@ -288,7 +288,7 @@ struct zcrypt_device_matrix_ext { * 0x08: CEX3A * 0x0a: CEX4 * 0x0b: CEX5 - * 0x0c: CEX6 and CEX7 + * 0x0c: CEX6, CEX7 or CEX8 * 0x0d: device is disabled * * ZCRYPT_QDEPTH_MASK diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 555cc3394fe3..0f8f4230dbc2 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1589,24 +1589,24 @@ static int ap_get_compatible_type(ap_qid_t qid, int rawtype, unsigned int func) AP_QID_QUEUE(qid), rawtype); return 0; } - /* up to CEX7 known and fully supported */ - if (rawtype <= AP_DEVICE_TYPE_CEX7) + /* up to CEX8 known and fully supported */ + if (rawtype <= AP_DEVICE_TYPE_CEX8) return rawtype; /* - * unknown new type > CEX7, check for compatibility + * unknown new type > CEX8, check for compatibility * to the highest known and supported type which is - * currently CEX7 with the help of the QACT function. + * currently CEX8 with the help of the QACT function. */ if (ap_qact_available()) { struct ap_queue_status status; union ap_qact_ap_info apinfo = {0}; apinfo.mode = (func >> 26) & 0x07; - apinfo.cat = AP_DEVICE_TYPE_CEX7; + apinfo.cat = AP_DEVICE_TYPE_CEX8; status = ap_qact(qid, 0, &apinfo); if (status.response_code == AP_RESPONSE_NORMAL && apinfo.cat >= AP_DEVICE_TYPE_CEX2A - && apinfo.cat <= AP_DEVICE_TYPE_CEX7) + && apinfo.cat <= AP_DEVICE_TYPE_CEX8) comp_type = apinfo.cat; } if (!comp_type) diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index dc6f563e8787..5700d4024681 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -70,6 +70,7 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr) #define AP_DEVICE_TYPE_CEX5 11 #define AP_DEVICE_TYPE_CEX6 12 #define AP_DEVICE_TYPE_CEX7 13 +#define AP_DEVICE_TYPE_CEX8 14 /* * Known function facilities diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c index 82b6ffee06c5..29ebd54f8919 100644 --- a/drivers/s390/crypto/vfio_ap_drv.c +++ b/drivers/s390/crypto/vfio_ap_drv.c @@ -41,6 +41,8 @@ static struct ap_device_id ap_queue_ids[] = { .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE }, { .dev_type = AP_DEVICE_TYPE_CEX7, .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE }, + { .dev_type = AP_DEVICE_TYPE_CEX8, + .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE }, { /* end of sibling */ }, }; diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c index 06024bbe9a58..fe5664c7589e 100644 --- a/drivers/s390/crypto/zcrypt_cex4.c +++ b/drivers/s390/crypto/zcrypt_cex4.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright IBM Corp. 2012, 2019 + * Copyright IBM Corp. 2012, 2022 * Author(s): Holger Dengler */ @@ -36,8 +36,8 @@ #define CEX4_CLEANUP_TIME (900*HZ) MODULE_AUTHOR("IBM Corporation"); -MODULE_DESCRIPTION("CEX4/CEX5/CEX6/CEX7 Cryptographic Card device driver, " \ - "Copyright IBM Corp. 2019"); +MODULE_DESCRIPTION("CEX[45678] Cryptographic Card device driver, " \ + "Copyright IBM Corp. 2022"); MODULE_LICENSE("GPL"); static struct ap_device_id zcrypt_cex4_card_ids[] = { @@ -49,6 +49,8 @@ static struct ap_device_id zcrypt_cex4_card_ids[] = { .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE }, { .dev_type = AP_DEVICE_TYPE_CEX7, .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE }, + { .dev_type = AP_DEVICE_TYPE_CEX8, + .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE }, { /* end of list */ }, }; @@ -63,6 +65,8 @@ static struct ap_device_id zcrypt_cex4_queue_ids[] = { .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE }, { .dev_type = AP_DEVICE_TYPE_CEX7, .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE }, + { .dev_type = AP_DEVICE_TYPE_CEX8, + .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE }, { /* end of list */ }, }; @@ -395,7 +399,7 @@ static const struct attribute_group ep11_queue_attr_grp = { }; /* - * Probe function for CEX4/CEX5/CEX6/CEX7 card device. It always + * Probe function for CEX[45678] card device. It always * accepts the AP device since the bus_match already checked * the hardware type. * @ap_dev: pointer to the AP device. @@ -414,6 +418,8 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) 6, 9, 20, 17, 65, 438, 0, 0}; static const int CEX7A_SPEED_IDX[NUM_OPS] = { 6, 8, 17, 15, 54, 362, 0, 0}; + static const int CEX8A_SPEED_IDX[NUM_OPS] = { + 6, 8, 17, 15, 54, 362, 0, 0}; static const int CEX4C_SPEED_IDX[NUM_OPS] = { 59, 69, 308, 83, 278, 2204, 209, 40}; @@ -423,6 +429,8 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) 16, 20, 32, 27, 77, 455, 24, 9}; static const int CEX7C_SPEED_IDX[NUM_OPS] = { 14, 16, 26, 23, 64, 376, 23, 8}; + static const int CEX8C_SPEED_IDX[NUM_OPS] = { + 14, 16, 26, 23, 64, 376, 23, 8}; static const int CEX4P_SPEED_IDX[NUM_OPS] = { 0, 0, 0, 0, 0, 0, 0, 50}; @@ -432,6 +440,8 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) 0, 0, 0, 0, 0, 0, 0, 9}; static const int CEX7P_SPEED_IDX[NUM_OPS] = { 0, 0, 0, 0, 0, 0, 0, 8}; + static const int CEX8P_SPEED_IDX[NUM_OPS] = { + 0, 0, 0, 0, 0, 0, 0, 8}; struct ap_card *ac = to_ap_card(&ap_dev->device); struct zcrypt_card *zc; @@ -455,13 +465,20 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) zc->type_string = "CEX6A"; zc->user_space_type = ZCRYPT_CEX6; zc->speed_rating = CEX6A_SPEED_IDX; - } else { + } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX7) { zc->type_string = "CEX7A"; + zc->speed_rating = CEX7A_SPEED_IDX; + /* wrong user space type, just for compatibility + * with the ZCRYPT_STATUS_MASK ioctl. + */ + zc->user_space_type = ZCRYPT_CEX6; + } else { + zc->type_string = "CEX8A"; + zc->speed_rating = CEX8A_SPEED_IDX; /* wrong user space type, just for compatibility * with the ZCRYPT_STATUS_MASK ioctl. */ zc->user_space_type = ZCRYPT_CEX6; - zc->speed_rating = CEX7A_SPEED_IDX; } zc->min_mod_size = CEX4A_MIN_MOD_SIZE; if (ap_test_bit(&ac->functions, AP_FUNC_MEX4K) && @@ -477,32 +494,39 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) } else if (ap_test_bit(&ac->functions, AP_FUNC_COPRO)) { if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX4) { zc->type_string = "CEX4C"; - /* wrong user space type, must be CEX4 + zc->speed_rating = CEX4C_SPEED_IDX; + /* wrong user space type, must be CEX3C * just keep it for cca compatibility */ zc->user_space_type = ZCRYPT_CEX3C; - zc->speed_rating = CEX4C_SPEED_IDX; } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX5) { zc->type_string = "CEX5C"; - /* wrong user space type, must be CEX5 + zc->speed_rating = CEX5C_SPEED_IDX; + /* wrong user space type, must be CEX3C * just keep it for cca compatibility */ zc->user_space_type = ZCRYPT_CEX3C; - zc->speed_rating = CEX5C_SPEED_IDX; } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX6) { zc->type_string = "CEX6C"; - /* wrong user space type, must be CEX6 - * just keep it for cca compatibility - */ - zc->user_space_type = ZCRYPT_CEX3C; zc->speed_rating = CEX6C_SPEED_IDX; - } else { - zc->type_string = "CEX7C"; - /* wrong user space type, must be CEX7 + /* wrong user space type, must be CEX3C * just keep it for cca compatibility */ zc->user_space_type = ZCRYPT_CEX3C; + } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX7) { + zc->type_string = "CEX7C"; zc->speed_rating = CEX7C_SPEED_IDX; + /* wrong user space type, must be CEX3C + * just keep it for cca compatibility + */ + zc->user_space_type = ZCRYPT_CEX3C; + } else { + zc->type_string = "CEX8C"; + zc->speed_rating = CEX8C_SPEED_IDX; + /* wrong user space type, must be CEX3C + * just keep it for cca compatibility + */ + zc->user_space_type = ZCRYPT_CEX3C; } zc->min_mod_size = CEX4C_MIN_MOD_SIZE; zc->max_mod_size = CEX4C_MAX_MOD_SIZE; @@ -520,13 +544,20 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) zc->type_string = "CEX6P"; zc->user_space_type = ZCRYPT_CEX6; zc->speed_rating = CEX6P_SPEED_IDX; - } else { + } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX7) { zc->type_string = "CEX7P"; + zc->speed_rating = CEX7P_SPEED_IDX; + /* wrong user space type, just for compatibility + * with the ZCRYPT_STATUS_MASK ioctl. + */ + zc->user_space_type = ZCRYPT_CEX6; + } else { + zc->type_string = "CEX8P"; + zc->speed_rating = CEX8P_SPEED_IDX; /* wrong user space type, just for compatibility * with the ZCRYPT_STATUS_MASK ioctl. */ zc->user_space_type = ZCRYPT_CEX6; - zc->speed_rating = CEX7P_SPEED_IDX; } zc->min_mod_size = CEX4C_MIN_MOD_SIZE; zc->max_mod_size = CEX4C_MAX_MOD_SIZE; @@ -563,7 +594,7 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) } /* - * This is called to remove the CEX4/CEX5/CEX6/CEX7 card driver + * This is called to remove the CEX[45678] card driver * information if an AP card device is removed. */ static void zcrypt_cex4_card_remove(struct ap_device *ap_dev) @@ -587,7 +618,7 @@ static struct ap_driver zcrypt_cex4_card_driver = { }; /* - * Probe function for CEX4/CEX5/CEX6/CEX7 queue device. It always + * Probe function for CEX[45678] queue device. It always * accepts the AP device since the bus_match already checked * the hardware type. * @ap_dev: pointer to the AP device. @@ -653,7 +684,7 @@ static int zcrypt_cex4_queue_probe(struct ap_device *ap_dev) } /* - * This is called to remove the CEX4/CEX5/CEX6/CEX7 queue driver + * This is called to remove the CEX[45678] queue driver * information if an AP queue device is removed. */ static void zcrypt_cex4_queue_remove(struct ap_device *ap_dev) From a7e701dba1234adbfbacad5ce19656c5606728da Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Wed, 17 Nov 2021 15:38:39 +0100 Subject: [PATCH 277/380] s390/zcrypt: handle checkstopped cards with new state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A crypto card may be in checkstopped state. With this patch this is handled as a new state in the ap card and ap queue structs. There is also a new card sysfs attribute /sys/devices/ap/cardxx/chkstop and a new queue sysfs attribute /sys/devices/ap/cardxx/xx.yyyy/chkstop displaying the checkstop state of the card or queue. Please note that the queue's checkstop state is only a copy of the card's checkstop state but makes maintenance much easier. The checkstop state expressed here is the result of an RC 0x04 (CHECKSTOP) during an AP command, mostly the PQAP(TAPQ) command which is 'testing' the queue. Signed-off-by: Harald Freudenberger Reviewed-by: Jürgen Christ Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.c | 78 ++++++++++++++++++++++++++------ drivers/s390/crypto/ap_bus.h | 2 + drivers/s390/crypto/ap_card.c | 11 +++++ drivers/s390/crypto/ap_queue.c | 18 +++++++- drivers/s390/crypto/zcrypt_api.c | 26 +++++------ 5 files changed, 105 insertions(+), 30 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 0f8f4230dbc2..fdf16cb70881 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -323,7 +323,7 @@ EXPORT_SYMBOL(ap_test_config_ctrl_domain); * false otherwise. */ static bool ap_queue_info(ap_qid_t qid, int *q_type, unsigned int *q_fac, - int *q_depth, int *q_ml, bool *q_decfg) + int *q_depth, int *q_ml, bool *q_decfg, bool *q_cstop) { struct ap_queue_status status; union { @@ -366,6 +366,7 @@ static bool ap_queue_info(ap_qid_t qid, int *q_type, unsigned int *q_fac, *q_depth = tapq_info.tapq_gr2.qd; *q_ml = tapq_info.tapq_gr2.ml; *q_decfg = status.response_code == AP_RESPONSE_DECONFIGURED; + *q_cstop = status.response_code == AP_RESPONSE_CHECKSTOPPED; switch (*q_type) { /* For CEX2 and CEX3 the available functions * are not reflected by the facilities bits. @@ -1710,7 +1711,7 @@ static inline void ap_scan_rm_card_dev_and_queue_devs(struct ap_card *ac) */ static inline void ap_scan_domains(struct ap_card *ac) { - bool decfg; + bool decfg, chkstop; ap_qid_t qid; unsigned int func; struct device *dev; @@ -1739,7 +1740,8 @@ static inline void ap_scan_domains(struct ap_card *ac) continue; } /* domain is valid, get info from this APQN */ - if (!ap_queue_info(qid, &type, &func, &depth, &ml, &decfg)) { + if (!ap_queue_info(qid, &type, &func, &depth, + &ml, &decfg, &chkstop)) { if (aq) { AP_DBF_INFO("%s(%d,%d) queue_info() failed, rm queue dev\n", __func__, ac->id, dom); @@ -1758,6 +1760,7 @@ static inline void ap_scan_domains(struct ap_card *ac) } aq->card = ac; aq->config = !decfg; + aq->chkstop = chkstop; dev = &aq->ap_dev.device; dev->bus = &ap_bus_type; dev->parent = &ac->ap_dev.device; @@ -1774,13 +1777,43 @@ static inline void ap_scan_domains(struct ap_card *ac) if (decfg) AP_DBF_INFO("%s(%d,%d) new (decfg) queue dev created\n", __func__, ac->id, dom); + else if (chkstop) + AP_DBF_INFO("%s(%d,%d) new (chkstop) queue dev created\n", + __func__, ac->id, dom); else AP_DBF_INFO("%s(%d,%d) new queue dev created\n", __func__, ac->id, dom); goto put_dev_and_continue; } - /* Check config state on the already existing queue device */ + /* handle state changes on already existing queue device */ spin_lock_bh(&aq->lock); + /* checkstop state */ + if (chkstop && !aq->chkstop) { + /* checkstop on */ + aq->chkstop = true; + if (aq->dev_state > AP_DEV_STATE_UNINITIATED) { + aq->dev_state = AP_DEV_STATE_ERROR; + aq->last_err_rc = AP_RESPONSE_CHECKSTOPPED; + } + spin_unlock_bh(&aq->lock); + AP_DBF_DBG("%s(%d,%d) queue dev checkstop on\n", + __func__, ac->id, dom); + /* 'receive' pending messages with -EAGAIN */ + ap_flush_queue(aq); + goto put_dev_and_continue; + } else if (!chkstop && aq->chkstop) { + /* checkstop off */ + aq->chkstop = false; + if (aq->dev_state > AP_DEV_STATE_UNINITIATED) { + aq->dev_state = AP_DEV_STATE_OPERATING; + aq->sm_state = AP_SM_STATE_RESET_START; + } + spin_unlock_bh(&aq->lock); + AP_DBF_DBG("%s(%d,%d) queue dev checkstop off\n", + __func__, ac->id, dom); + goto put_dev_and_continue; + } + /* config state change */ if (decfg && aq->config) { /* config off this queue device */ aq->config = false; @@ -1789,14 +1822,13 @@ static inline void ap_scan_domains(struct ap_card *ac) aq->last_err_rc = AP_RESPONSE_DECONFIGURED; } spin_unlock_bh(&aq->lock); - AP_DBF_INFO("%s(%d,%d) queue dev config off\n", - __func__, ac->id, dom); + AP_DBF_DBG("%s(%d,%d) queue dev config off\n", + __func__, ac->id, dom); ap_send_config_uevent(&aq->ap_dev, aq->config); /* 'receive' pending messages with -EAGAIN */ ap_flush_queue(aq); goto put_dev_and_continue; - } - if (!decfg && !aq->config) { + } else if (!decfg && !aq->config) { /* config on this queue device */ aq->config = true; if (aq->dev_state > AP_DEV_STATE_UNINITIATED) { @@ -1804,8 +1836,8 @@ static inline void ap_scan_domains(struct ap_card *ac) aq->sm_state = AP_SM_STATE_RESET_START; } spin_unlock_bh(&aq->lock); - AP_DBF_INFO("%s(%d,%d) queue dev config on\n", - __func__, ac->id, dom); + AP_DBF_DBG("%s(%d,%d) queue dev config on\n", + __func__, ac->id, dom); ap_send_config_uevent(&aq->ap_dev, aq->config); goto put_dev_and_continue; } @@ -1832,7 +1864,7 @@ put_dev_and_continue: */ static inline void ap_scan_adapter(int ap) { - bool decfg; + bool decfg, chkstop; ap_qid_t qid; unsigned int func; struct device *dev; @@ -1866,8 +1898,8 @@ static inline void ap_scan_adapter(int ap) for (dom = 0; dom <= ap_max_domain_id; dom++) if (ap_test_config_usage_domain(dom)) { qid = AP_MKQID(ap, dom); - if (ap_queue_info(qid, &type, &func, - &depth, &ml, &decfg)) + if (ap_queue_info(qid, &type, &func, &depth, + &ml, &decfg, &chkstop)) break; } if (dom > ap_max_domain_id) { @@ -1912,13 +1944,25 @@ static inline void ap_scan_adapter(int ap) put_device(dev); ac = NULL; } else { + /* handle checkstop state change */ + if (chkstop && !ac->chkstop) { + /* checkstop on */ + ac->chkstop = true; + AP_DBF_INFO("%s(%d) card dev checkstop on\n", + __func__, ap); + } else if (!chkstop && ac->chkstop) { + /* checkstop off */ + ac->chkstop = false; + AP_DBF_INFO("%s(%d) card dev checkstop off\n", + __func__, ap); + } + /* handle config state change */ if (decfg && ac->config) { ac->config = false; AP_DBF_INFO("%s(%d) card dev config off\n", __func__, ap); ap_send_config_uevent(&ac->ap_dev, ac->config); - } - if (!decfg && !ac->config) { + } else if (!decfg && !ac->config) { ac->config = true; AP_DBF_INFO("%s(%d) card dev config on\n", __func__, ap); @@ -1942,6 +1986,7 @@ static inline void ap_scan_adapter(int ap) return; } ac->config = !decfg; + ac->chkstop = chkstop; dev = &ac->ap_dev.device; dev->bus = &ap_bus_type; dev->parent = ap_root_device; @@ -1966,6 +2011,9 @@ static inline void ap_scan_adapter(int ap) if (decfg) AP_DBF_INFO("%s(%d) new (decfg) card dev type=%d func=0x%08x created\n", __func__, ap, type, func); + else if (chkstop) + AP_DBF_INFO("%s(%d) new (chkstop) card dev type=%d func=0x%08x created\n", + __func__, ap, type, func); else AP_DBF_INFO("%s(%d) new card dev type=%d func=0x%08x created\n", __func__, ap, type, func); diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 5700d4024681..703d6782ce65 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -179,6 +179,7 @@ struct ap_card { int id; /* AP card number. */ unsigned int maxmsgsize; /* AP msg limit for this card */ bool config; /* configured state */ + bool chkstop; /* checkstop state */ atomic64_t total_request_count; /* # requests ever for this AP device.*/ }; @@ -191,6 +192,7 @@ struct ap_queue { spinlock_t lock; /* Per device lock. */ enum ap_dev_state dev_state; /* queue device state */ bool config; /* configured state */ + bool chkstop; /* checkstop state */ ap_qid_t qid; /* AP queue id. */ bool interrupt; /* indicate if interrupts are enabled */ int queue_count; /* # messages currently on AP queue. */ diff --git a/drivers/s390/crypto/ap_card.c b/drivers/s390/crypto/ap_card.c index 196325a66662..6b2170cf186e 100644 --- a/drivers/s390/crypto/ap_card.c +++ b/drivers/s390/crypto/ap_card.c @@ -174,6 +174,16 @@ static ssize_t config_store(struct device *dev, static DEVICE_ATTR_RW(config); +static ssize_t chkstop_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ap_card *ac = to_ap_card(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", ac->chkstop ? 1 : 0); +} + +static DEVICE_ATTR_RO(chkstop); + static ssize_t max_msg_size_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -194,6 +204,7 @@ static struct attribute *ap_card_dev_attrs[] = { &dev_attr_pendingq_count.attr, &dev_attr_modalias.attr, &dev_attr_config.attr, + &dev_attr_chkstop.attr, &dev_attr_max_msg_size.attr, NULL }; diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 921a82a41e66..205045cd998d 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -455,7 +455,8 @@ static ap_func_t *ap_jumptable[NR_AP_SM_STATES][NR_AP_SM_EVENTS] = { enum ap_sm_wait ap_sm_event(struct ap_queue *aq, enum ap_sm_event event) { - if (aq->config && aq->dev_state > AP_DEV_STATE_UNINITIATED) + if (aq->config && !aq->chkstop && + aq->dev_state > AP_DEV_STATE_UNINITIATED) return ap_jumptable[aq->sm_state][event](aq); else return AP_SM_WAIT_NONE; @@ -615,6 +616,20 @@ static ssize_t config_show(struct device *dev, static DEVICE_ATTR_RO(config); +static ssize_t chkstop_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ap_queue *aq = to_ap_queue(dev); + int rc; + + spin_lock_bh(&aq->lock); + rc = scnprintf(buf, PAGE_SIZE, "%d\n", aq->chkstop ? 1 : 0); + spin_unlock_bh(&aq->lock); + return rc; +} + +static DEVICE_ATTR_RO(chkstop); + #ifdef CONFIG_ZCRYPT_DEBUG static ssize_t states_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -729,6 +744,7 @@ static struct attribute *ap_queue_dev_attrs[] = { &dev_attr_reset.attr, &dev_attr_interrupt.attr, &dev_attr_config.attr, + &dev_attr_chkstop.attr, #ifdef CONFIG_ZCRYPT_DEBUG &dev_attr_states.attr, &dev_attr_last_err_rc.attr, diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 00f6859c4e72..8b81e6385d55 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -671,7 +671,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms, spin_lock(&zcrypt_list_lock); for_each_zcrypt_card(zc) { /* Check for useable accelarator or CCA card */ - if (!zc->online || !zc->card->config || + if (!zc->online || !zc->card->config || zc->card->chkstop || !(zc->card->functions & 0x18000000)) continue; /* Check for size limits */ @@ -692,7 +692,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms, for_each_zcrypt_queue(zq, zc) { /* check if device is useable and eligible */ if (!zq->online || !zq->ops->rsa_modexpo || - !zq->queue->config) + !zq->queue->config || zq->queue->chkstop) continue; /* check if device node has admission for this queue */ if (!zcrypt_check_queue(perms, @@ -781,7 +781,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms, spin_lock(&zcrypt_list_lock); for_each_zcrypt_card(zc) { /* Check for useable accelarator or CCA card */ - if (!zc->online || !zc->card->config || + if (!zc->online || !zc->card->config || zc->card->chkstop || !(zc->card->functions & 0x18000000)) continue; /* Check for size limits */ @@ -802,7 +802,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms, for_each_zcrypt_queue(zq, zc) { /* check if device is useable and eligible */ if (!zq->online || !zq->ops->rsa_modexpo_crt || - !zq->queue->config) + !zq->queue->config || zq->queue->chkstop) continue; /* check if device node has admission for this queue */ if (!zcrypt_check_queue(perms, @@ -895,7 +895,7 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms, spin_lock(&zcrypt_list_lock); for_each_zcrypt_card(zc) { /* Check for useable CCA card */ - if (!zc->online || !zc->card->config || + if (!zc->online || !zc->card->config || zc->card->chkstop || !(zc->card->functions & 0x10000000)) continue; /* Check for user selected CCA card */ @@ -918,9 +918,8 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms, continue; for_each_zcrypt_queue(zq, zc) { /* check for device useable and eligible */ - if (!zq->online || - !zq->ops->send_cprb || - !zq->queue->config || + if (!zq->online || !zq->ops->send_cprb || + !zq->queue->config || zq->queue->chkstop || (tdom != AUTOSEL_DOM && tdom != AP_QID_QUEUE(zq->queue->qid))) continue; @@ -1068,7 +1067,7 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms, spin_lock(&zcrypt_list_lock); for_each_zcrypt_card(zc) { /* Check for useable EP11 card */ - if (!zc->online || !zc->card->config || + if (!zc->online || !zc->card->config || zc->card->chkstop || !(zc->card->functions & 0x04000000)) continue; /* Check for user selected EP11 card */ @@ -1091,9 +1090,8 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms, continue; for_each_zcrypt_queue(zq, zc) { /* check if device is useable and eligible */ - if (!zq->online || - !zq->ops->send_ep11_cprb || - !zq->queue->config || + if (!zq->online || !zq->ops->send_ep11_cprb || + !zq->queue->config || zq->queue->chkstop || (targets && !is_desired_ep11_queue(zq->queue->qid, target_num, targets))) @@ -1182,7 +1180,7 @@ static long zcrypt_rng(char *buffer) spin_lock(&zcrypt_list_lock); for_each_zcrypt_card(zc) { /* Check for useable CCA card */ - if (!zc->online || !zc->card->config || + if (!zc->online || !zc->card->config || zc->card->chkstop || !(zc->card->functions & 0x10000000)) continue; /* get weight index of the card device */ @@ -1192,7 +1190,7 @@ static long zcrypt_rng(char *buffer) for_each_zcrypt_queue(zq, zc) { /* check if device is useable and eligible */ if (!zq->online || !zq->ops->rng || - !zq->queue->config) + !zq->queue->config || zq->queue->chkstop) continue; if (!zcrypt_queue_compare(zq, pref_zq, wgt, pref_wgt)) continue; From 383366b58016361cc8a2e4c585b7d581eb76263a Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 23 Nov 2021 15:16:06 +0100 Subject: [PATCH 278/380] s390/zcrypt: Support CPRB minor version T7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a new CPRB minor version T7 to be supported with this patch. Together with this the functions which extract the CPRB data from userspace and prepare the AP message do now check the CPRB minor version and provide some info in the flag field of the ap message struct for further processing. The 3 functions doing this job have been renamed to prep_cca_ap_msg, prep_ep11_ap_msg and prep_rng_ap_msg to reflect their job better (old was get..fc). This patch also introduces two new flags to be used internal with the flag field of the struct ap_message: AP_MSG_FLAG_USAGE is set when prep_cca_ap_msg or prep_ep11_ap_msg come to the conclusion that this is a ordinary crypto load CPRB (which means T2 for CCA CPRBs and no admin bit for EP11 CPRBs). AP_MSG_FLAG_ADMIN is set when prep_cca_ap_msg or prep_ep11_ap_msg think, this is an administrative (control) crypto load CPRB (which means T3, T5, T6 or T7 for CCA CPRBs and admin bit set for EP11 CPRBs). Signed-off-by: Harald Freudenberger Reviewed-by: Jürgen Christ Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.h | 4 +- drivers/s390/crypto/zcrypt_api.c | 6 +-- drivers/s390/crypto/zcrypt_msgtype50.c | 4 +- drivers/s390/crypto/zcrypt_msgtype50.h | 4 +- drivers/s390/crypto/zcrypt_msgtype6.c | 56 +++++++++++++++++++------- drivers/s390/crypto/zcrypt_msgtype6.h | 13 +++--- 6 files changed, 60 insertions(+), 27 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 703d6782ce65..8fd5a17bdf99 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -251,7 +251,9 @@ struct ap_message { struct ap_message *); }; -#define AP_MSG_FLAG_SPECIAL 1 /* flag msg as 'special' with NQAP */ +#define AP_MSG_FLAG_SPECIAL 0x0001 /* flag msg as 'special' with NQAP */ +#define AP_MSG_FLAG_USAGE 0x0002 /* CCA, EP11: usage (no admin) msg */ +#define AP_MSG_FLAG_ADMIN 0x0004 /* CCA, EP11: admin (=control) msg */ /** * ap_init_message() - Initialize ap_message. diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 8b81e6385d55..af3e83541886 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -876,7 +876,7 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms, } #endif - rc = get_cprb_fc(userspace, xcRB, &ap_msg, &func_code, &domain); + rc = prep_cca_ap_msg(userspace, xcRB, &ap_msg, &func_code, &domain); if (rc) goto out; @@ -1058,7 +1058,7 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms, } } - rc = get_ep11cprb_fc(userspace, xcrb, &ap_msg, &func_code); + rc = prep_ep11_ap_msg(userspace, xcrb, &ap_msg, &func_code); if (rc) goto out_free; @@ -1171,7 +1171,7 @@ static long zcrypt_rng(char *buffer) trace_s390_zcrypt_req(buffer, TP_HWRNGCPRB); ap_init_message(&ap_msg); - rc = get_rng_fc(&ap_msg, &func_code, &domain); + rc = prep_rng_ap_msg(&ap_msg, &func_code, &domain); if (rc) goto out; diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c index b6dcdd2a66d4..259145aa393f 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.c +++ b/drivers/s390/crypto/zcrypt_msgtype50.c @@ -156,7 +156,7 @@ struct type80_hdr { unsigned char reserved3[8]; } __packed; -unsigned int get_rsa_modex_fc(struct ica_rsa_modexpo *mex, int *fcode) +int get_rsa_modex_fc(struct ica_rsa_modexpo *mex, int *fcode) { if (!mex->inputdatalength) @@ -172,7 +172,7 @@ unsigned int get_rsa_modex_fc(struct ica_rsa_modexpo *mex, int *fcode) return 0; } -unsigned int get_rsa_crt_fc(struct ica_rsa_modexpo_crt *crt, int *fcode) +int get_rsa_crt_fc(struct ica_rsa_modexpo_crt *crt, int *fcode) { if (!crt->inputdatalength) diff --git a/drivers/s390/crypto/zcrypt_msgtype50.h b/drivers/s390/crypto/zcrypt_msgtype50.h index 66bec4f45c56..eb49f06bed29 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.h +++ b/drivers/s390/crypto/zcrypt_msgtype50.h @@ -20,8 +20,8 @@ #define MSGTYPE_ADJUSTMENT 0x08 /* type04 extension (not needed in type50) */ -unsigned int get_rsa_modex_fc(struct ica_rsa_modexpo *, int *); -unsigned int get_rsa_crt_fc(struct ica_rsa_modexpo_crt *, int *); +int get_rsa_modex_fc(struct ica_rsa_modexpo *mex, int *fc); +int get_rsa_crt_fc(struct ica_rsa_modexpo_crt *crt, int *fc); void zcrypt_msgtype50_init(void); void zcrypt_msgtype50_exit(void); diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index df283729191a..98755b57104f 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -472,6 +472,7 @@ static int XCRB_msg_to_type6CPRB_msgX(bool userspace, struct ap_message *ap_msg, *fcode = (msg->hdr.function_code[0] << 8) | msg->hdr.function_code[1]; *dom = (unsigned short *)&msg->cprbx.domain; + /* check subfunction, US and AU need special flag with NQAP */ if (memcmp(function_code, "US", 2) == 0 || memcmp(function_code, "AU", 2) == 0) ap_msg->flags |= AP_MSG_FLAG_SPECIAL; @@ -481,6 +482,23 @@ static int XCRB_msg_to_type6CPRB_msgX(bool userspace, struct ap_message *ap_msg, ap_msg->flags ^= AP_MSG_FLAG_SPECIAL; #endif + /* check CPRB minor version, set info bits in ap_message flag field */ + switch (*(unsigned short *)(&msg->cprbx.func_id[0])) { + case 0x5432: /* "T2" */ + ap_msg->flags |= AP_MSG_FLAG_USAGE; + break; + case 0x5433: /* "T3" */ + case 0x5435: /* "T5" */ + case 0x5436: /* "T6" */ + case 0x5437: /* "T7" */ + ap_msg->flags |= AP_MSG_FLAG_ADMIN; + break; + default: + ZCRYPT_DBF_DBG("%s unknown CPRB minor version '%c%c'\n", + __func__, msg->cprbx.func_id[0], + msg->cprbx.func_id[1]); + } + /* copy data block */ if (xcRB->request_data_length && z_copy_from_user(userspace, req_data, xcRB->request_data_address, @@ -568,6 +586,12 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(bool userspace, struct ap_message *ap ap_msg->flags ^= AP_MSG_FLAG_SPECIAL; #endif + /* set info bits in ap_message flag field */ + if (msg->cprbx.flags & 0x80) + ap_msg->flags |= AP_MSG_FLAG_ADMIN; + else + ap_msg->flags |= AP_MSG_FLAG_USAGE; + return 0; } @@ -1131,15 +1155,17 @@ out_free: } /* - * Fetch function code from cprb. - * Extracting the fc requires to copy the cprb from userspace. - * So this function allocates memory and needs an ap_msg prepared + * Prepare a CCA AP msg request. + * Prepare a CCA AP msg: fetch the required data from userspace, + * prepare the AP msg, fill some info into the ap_message struct, + * extract some data from the CPRB and give back to the caller. + * This function allocates memory and needs an ap_msg prepared * by the caller with ap_init_message(). Also the caller has to * make sure ap_release_message() is always called even on failure. */ -unsigned int get_cprb_fc(bool userspace, struct ica_xcRB *xcRB, - struct ap_message *ap_msg, - unsigned int *func_code, unsigned short **dom) +int prep_cca_ap_msg(bool userspace, struct ica_xcRB *xcRB, + struct ap_message *ap_msg, + unsigned int *func_code, unsigned short **dom) { struct response_type resp_type = { .type = CEXXC_RESPONSE_TYPE_XCRB, @@ -1193,15 +1219,17 @@ out: } /* - * Fetch function code from ep11 cprb. - * Extracting the fc requires to copy the ep11 cprb from userspace. - * So this function allocates memory and needs an ap_msg prepared + * Prepare an EP11 AP msg request. + * Prepare an EP11 AP msg: fetch the required data from userspace, + * prepare the AP msg, fill some info into the ap_message struct, + * extract some data from the CPRB and give back to the caller. + * This function allocates memory and needs an ap_msg prepared * by the caller with ap_init_message(). Also the caller has to * make sure ap_release_message() is always called even on failure. */ -unsigned int get_ep11cprb_fc(bool userspace, struct ep11_urb *xcrb, - struct ap_message *ap_msg, - unsigned int *func_code) +int prep_ep11_ap_msg(bool userspace, struct ep11_urb *xcrb, + struct ap_message *ap_msg, + unsigned int *func_code) { struct response_type resp_type = { .type = CEXXC_RESPONSE_TYPE_EP11, @@ -1301,8 +1329,8 @@ out: return rc; } -unsigned int get_rng_fc(struct ap_message *ap_msg, int *func_code, - unsigned int *domain) +int prep_rng_ap_msg(struct ap_message *ap_msg, int *func_code, + unsigned int *domain) { struct response_type resp_type = { .type = CEXXC_RESPONSE_TYPE_XCRB, diff --git a/drivers/s390/crypto/zcrypt_msgtype6.h b/drivers/s390/crypto/zcrypt_msgtype6.h index 155c73514bac..ec960d01cca0 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.h +++ b/drivers/s390/crypto/zcrypt_msgtype6.h @@ -94,11 +94,14 @@ struct type86_fmt2_ext { unsigned int offset4; /* 0x00000000 */ } __packed; -unsigned int get_cprb_fc(bool userspace, struct ica_xcRB *, struct ap_message *, - unsigned int *, unsigned short **); -unsigned int get_ep11cprb_fc(bool userspace, struct ep11_urb *, struct ap_message *, - unsigned int *); -unsigned int get_rng_fc(struct ap_message *, int *, unsigned int *); +int prep_cca_ap_msg(bool userspace, struct ica_xcRB *xcrb, + struct ap_message *ap_msg, + unsigned int *fc, unsigned short **dom); +int prep_ep11_ap_msg(bool userspace, struct ep11_urb *xcrb, + struct ap_message *ap_msg, + unsigned int *fc); +int prep_rng_ap_msg(struct ap_message *ap_msg, + int *fc, unsigned int *dom); #define LOW 10 #define MEDIUM 100 From 252a1ff777639ad13978a614f2cde1f0c43a7c2f Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 23 Nov 2021 16:02:47 +0100 Subject: [PATCH 279/380] s390/zcrypt: change reply buffer size offering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of offering the user space given receive buffer size to the crypto card firmware as limit for the reply message offer the internal per queue reply buffer size. As the queue's reply buffer is always adjusted to the max message size possible for this card this may offer more buffer space. However, now it is important to check the user space reply buffer on pushing back the reply. If the reply does not fit into the user space provided buffer the ioctl will fail with errno EMSGSIZE. Signed-off-by: Harald Freudenberger Reviewed-by: Jürgen Christ Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/zcrypt_msgtype6.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index 98755b57104f..be3f46a014f4 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -1197,6 +1197,21 @@ static long zcrypt_msgtype6_send_cprb(bool userspace, struct zcrypt_queue *zq, { int rc; struct response_type *rtype = (struct response_type *)(ap_msg->private); + struct { + struct type6_hdr hdr; + struct CPRBX cprbx; + /* ... more data blocks ... */ + } __packed * msg = ap_msg->msg; + + /* + * Set the queue's reply buffer length minus 128 byte padding + * as reply limit for the card firmware. + */ + msg->hdr.FromCardLen1 = min_t(unsigned int, msg->hdr.FromCardLen1, + zq->reply.bufsize - 128); + if (msg->hdr.FromCardLen2) + msg->hdr.FromCardLen2 = + zq->reply.bufsize - msg->hdr.FromCardLen1 - 128; init_completion(&rtype->work); rc = ap_queue_message(zq->queue, ap_msg); @@ -1277,7 +1292,6 @@ static long zcrypt_msgtype6_send_ep11_cprb(bool userspace, struct zcrypt_queue * unsigned int dom_val; /* domain id */ } __packed * payload_hdr = NULL; - /* * The target domain field within the cprb body/payload block will be * replaced by the usage domain for non-management commands only. @@ -1309,6 +1323,13 @@ static long zcrypt_msgtype6_send_ep11_cprb(bool userspace, struct zcrypt_queue * AP_QID_QUEUE(zq->queue->qid); } + /* + * Set the queue's reply buffer length minus the two prepend headers + * as reply limit for the card firmware. + */ + msg->hdr.FromCardLen1 = zq->reply.bufsize - + sizeof(struct type86_hdr) - sizeof(struct type86_fmt2_ext); + init_completion(&rtype->work); rc = ap_queue_message(zq->queue, ap_msg); if (rc) From 1024063effc3ba86d1fec0f2ee0a9259a1065ed5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Christ?= Date: Mon, 10 Jan 2022 13:33:30 +0100 Subject: [PATCH 280/380] s390/zcrypt: Provide target domain for EP11 cprbs to scheduling function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The scheduling function will get an extension which will process the target_id value from an EP11 cprb. This patch extracts the value during preparation of the ap message. Signed-off-by: Jürgen Christ Signed-off-by: Harald Freudenberger Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/zcrypt_api.c | 4 ++-- drivers/s390/crypto/zcrypt_msgtype6.c | 10 +++++++--- drivers/s390/crypto/zcrypt_msgtype6.h | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index af3e83541886..80e2a306709a 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -1021,7 +1021,7 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms, struct ep11_target_dev *targets; unsigned short target_num; unsigned int wgt = 0, pref_wgt = 0; - unsigned int func_code; + unsigned int func_code, domain; struct ap_message ap_msg; int cpen, qpen, qid = 0, rc = -ENODEV; struct module *mod; @@ -1058,7 +1058,7 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms, } } - rc = prep_ep11_ap_msg(userspace, xcrb, &ap_msg, &func_code); + rc = prep_ep11_ap_msg(userspace, xcrb, &ap_msg, &func_code, &domain); if (rc) goto out_free; diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index be3f46a014f4..57d885158cf0 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -510,7 +510,8 @@ static int XCRB_msg_to_type6CPRB_msgX(bool userspace, struct ap_message *ap_msg, static int xcrb_msg_to_type6_ep11cprb_msgx(bool userspace, struct ap_message *ap_msg, struct ep11_urb *xcRB, - unsigned int *fcode) + unsigned int *fcode, + unsigned int *domain) { unsigned int lfmt; static struct type6_hdr static_type6_ep11_hdr = { @@ -592,6 +593,8 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(bool userspace, struct ap_message *ap else ap_msg->flags |= AP_MSG_FLAG_USAGE; + *domain = msg->cprbx.target_id; + return 0; } @@ -1244,7 +1247,7 @@ out: */ int prep_ep11_ap_msg(bool userspace, struct ep11_urb *xcrb, struct ap_message *ap_msg, - unsigned int *func_code) + unsigned int *func_code, unsigned int *domain) { struct response_type resp_type = { .type = CEXXC_RESPONSE_TYPE_EP11, @@ -1260,7 +1263,8 @@ int prep_ep11_ap_msg(bool userspace, struct ep11_urb *xcrb, ap_msg->private = kmemdup(&resp_type, sizeof(resp_type), GFP_KERNEL); if (!ap_msg->private) return -ENOMEM; - return xcrb_msg_to_type6_ep11cprb_msgx(userspace, ap_msg, xcrb, func_code); + return xcrb_msg_to_type6_ep11cprb_msgx(userspace, ap_msg, xcrb, + func_code, domain); } /* diff --git a/drivers/s390/crypto/zcrypt_msgtype6.h b/drivers/s390/crypto/zcrypt_msgtype6.h index ec960d01cca0..9da4f4175c44 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.h +++ b/drivers/s390/crypto/zcrypt_msgtype6.h @@ -99,7 +99,7 @@ int prep_cca_ap_msg(bool userspace, struct ica_xcRB *xcrb, unsigned int *fc, unsigned short **dom); int prep_ep11_ap_msg(bool userspace, struct ep11_urb *xcrb, struct ap_message *ap_msg, - unsigned int *fc); + unsigned int *fc, unsigned int *dom); int prep_rng_ap_msg(struct ap_message *ap_msg, int *fc, unsigned int *dom); From 9d792ef17f18734bca823910b89254dec37b50c5 Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Tue, 14 Dec 2021 15:54:16 +0100 Subject: [PATCH 281/380] s390/airq: use DMA memory for summary indicators Protected virtualization guests have to use shared pages for airq notifier bit vectors and summary bytes or bits, thus these need to be allocated as DMA coherent memory. Commit b50623e5db80 ("s390/airq: use DMA memory for adapter interrupts") took care of the notifier bit vectors, but omitted to take care of the summary bytes/bits. In practice this omission is not a big deal, because the summary ain't necessarily allocated here, but can be supplied by the driver. Currently all the I/O we have for SE guests is virtio-ccw, and virtio-ccw uses a self-allocated array of summary indicators. Let us cover all our bases nevertheless! Signed-off-by: Halil Pasic Reviewed-by: Harald Freudenberger Signed-off-by: Vasily Gorbik --- drivers/s390/cio/airq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/cio/airq.c b/drivers/s390/cio/airq.c index e56535c99888..c0ed364bf446 100644 --- a/drivers/s390/cio/airq.c +++ b/drivers/s390/cio/airq.c @@ -44,7 +44,7 @@ int register_adapter_interrupt(struct airq_struct *airq) if (!airq->handler || airq->isc > MAX_ISC) return -EINVAL; if (!airq->lsi_ptr) { - airq->lsi_ptr = kzalloc(1, GFP_KERNEL); + airq->lsi_ptr = cio_dma_zalloc(1); if (!airq->lsi_ptr) return -ENOMEM; airq->flags |= AIRQ_PTR_ALLOCATED; @@ -79,7 +79,7 @@ void unregister_adapter_interrupt(struct airq_struct *airq) synchronize_rcu(); isc_unregister(airq->isc); if (airq->flags & AIRQ_PTR_ALLOCATED) { - kfree(airq->lsi_ptr); + cio_dma_free(airq->lsi_ptr, 1); airq->lsi_ptr = NULL; airq->flags &= ~AIRQ_PTR_ALLOCATED; } From a156f09c9063bc6c11502fc49a6d006489f25eb3 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 28 Feb 2022 10:45:43 +0100 Subject: [PATCH 282/380] s390/extable: sort amode31 extable early The early program check handler is active before the amode31 extable is sorted. Therefore in case a program check happens early within the amode31 code the extable entry might not be found. Fix this by sorting the amode31 extable early. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/early.c | 6 ++++++ arch/s390/kernel/traps.c | 1 - 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index b8cfac4918d9..3dae0c04d33b 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -293,6 +293,11 @@ static void __init check_image_bootable(void) disabled_wait(); } +static void __init sort_amode31_extable(void) +{ + sort_extable(__start_amode31_ex_table, __stop_amode31_ex_table); +} + void __init startup_init(void) { sclp_early_adjust_va(); @@ -301,6 +306,7 @@ void __init startup_init(void) time_early_init(); init_kernel_storage_key(); lockdep_off(); + sort_amode31_extable(); setup_lowcore_early(); setup_facility_list(); detect_machine_type(); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 2b780786fc68..c2b1ffe88acf 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -291,7 +291,6 @@ static void __init test_monitor_call(void) void __init trap_init(void) { - sort_extable(__start_amode31_ex_table, __stop_amode31_ex_table); local_mcck_enable(); test_monitor_call(); } From 1952954569d1907eaf6df4b15bb23969e57b6599 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 28 Feb 2022 10:53:34 +0100 Subject: [PATCH 283/380] s390/extable: search amode31 extable last It is very unlikely that an exception happens within the amode31 text section, therefore safe a couple of cycles for the common case, and search the amode31 extable last. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/fault.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index ff16ce0d04ee..1336541103e7 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -230,13 +230,13 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code) const struct exception_table_entry *s390_search_extables(unsigned long addr) { const struct exception_table_entry *fixup; + size_t num; - fixup = search_extable(__start_amode31_ex_table, - __stop_amode31_ex_table - __start_amode31_ex_table, - addr); - if (!fixup) - fixup = search_exception_tables(addr); - return fixup; + fixup = search_exception_tables(addr); + if (fixup) + return fixup; + num = __stop_amode31_ex_table - __start_amode31_ex_table; + return search_extable(__start_amode31_ex_table, num, addr); } static noinline void do_no_context(struct pt_regs *regs) From d09a307fde1c943d23ccb9fecc9a0e1a569732ad Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 28 Feb 2022 11:22:12 +0100 Subject: [PATCH 284/380] s390/extable: move EX_TABLE define to asm-extable.h Follow arm64 and riscv and move the EX_TABLE define to asm-extable.h which is a lot less generic than the current linkage.h. Also make sure that all files which contain EX_TABLE usages actually include the new header file. This should make sure that the files always compile and there won't be any random compile breakage due to other header file dependencies. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/hypfs/hypfs_vm.c | 1 + arch/s390/include/asm/ap.h | 1 + arch/s390/include/asm/asm-extable.h | 21 +++++++++++++++++++++ arch/s390/include/asm/cpu_mf.h | 1 + arch/s390/include/asm/diag.h | 1 + arch/s390/include/asm/fpu/api.h | 1 + arch/s390/include/asm/futex.h | 1 + arch/s390/include/asm/linkage.h | 18 ------------------ arch/s390/include/asm/mmu.h | 1 + arch/s390/include/asm/uaccess.h | 1 + arch/s390/kernel/diag.c | 1 + arch/s390/kernel/early.c | 1 + arch/s390/kernel/entry.S | 1 + arch/s390/kernel/entry.h | 1 + arch/s390/kernel/ipl.c | 1 + arch/s390/kernel/sysinfo.c | 1 + arch/s390/kernel/text_amode31.S | 1 + arch/s390/kernel/traps.c | 1 + arch/s390/lib/uaccess.c | 1 + arch/s390/mm/fault.c | 1 + arch/s390/mm/maccess.c | 1 + arch/s390/mm/page-states.c | 1 + arch/s390/net/bpf_jit_comp.c | 1 + arch/s390/pci/pci_clp.c | 1 + arch/s390/pci/pci_insn.c | 1 + arch/s390/pci/pci_mmio.c | 1 + drivers/s390/block/dasd_diag.c | 2 +- drivers/s390/char/diag_ftp.c | 1 + drivers/s390/char/sclp.h | 1 + drivers/s390/cio/ioasm.c | 1 + 30 files changed, 49 insertions(+), 19 deletions(-) create mode 100644 arch/s390/include/asm/asm-extable.h diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index e8f15dbb89d0..3765c2d81df5 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index c0c8a1f6c35d..ae75da592ccb 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -13,6 +13,7 @@ #define _ASM_S390_AP_H_ #include +#include /** * The ap_qid_t identifier of an ap queue. diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h new file mode 100644 index 000000000000..620390f17f0c --- /dev/null +++ b/arch/s390/include/asm/asm-extable.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_EXTABLE_H +#define __ASM_EXTABLE_H + +#include +#include + +#define __EX_TABLE(_section, _fault, _target) \ + stringify_in_c(.section _section,"a";) \ + stringify_in_c(.align 8;) \ + stringify_in_c(.long (_fault) - .;) \ + stringify_in_c(.long (_target) - .;) \ + stringify_in_c(.quad 0;) \ + stringify_in_c(.previous) + +#define EX_TABLE(_fault, _target) \ + __EX_TABLE(__ex_table, _fault, _target) +#define EX_TABLE_AMODE31(_fault, _target) \ + __EX_TABLE(.amode31.ex_table, _fault, _target) + +#endif /* __ASM_EXTABLE_H */ diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index e3f12db46cfc..78bb336600bf 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -10,6 +10,7 @@ #define _ASM_S390_CPU_MF_H #include +#include #include asm(".include \"asm/cpu_mf-insn.h\"\n"); diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index bdcd64f0c1d7..56e99c286d12 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -11,6 +11,7 @@ #include #include +#include enum diag_stat_enum { DIAG_STAT_X008, diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h index a959b815a58b..b714ed0ef688 100644 --- a/arch/s390/include/asm/fpu/api.h +++ b/arch/s390/include/asm/fpu/api.h @@ -45,6 +45,7 @@ #define _ASM_S390_FPU_API_H #include +#include void save_fpu_regs(void); void load_fpu_regs(void); diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index c22debfcebf1..e08c882dccaa 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h @@ -4,6 +4,7 @@ #include #include +#include #include #include diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h index 1ffea75b8ebc..c76777b15fec 100644 --- a/arch/s390/include/asm/linkage.h +++ b/arch/s390/include/asm/linkage.h @@ -2,27 +2,9 @@ #ifndef __ASM_LINKAGE_H #define __ASM_LINKAGE_H -#include #include #define __ALIGN .align 16, 0x07 #define __ALIGN_STR __stringify(__ALIGN) -/* - * Helper macro for exception table entries - */ - -#define __EX_TABLE(_section, _fault, _target) \ - stringify_in_c(.section _section,"a";) \ - stringify_in_c(.align 8;) \ - stringify_in_c(.long (_fault) - .;) \ - stringify_in_c(.long (_target) - .;) \ - stringify_in_c(.quad 0;) \ - stringify_in_c(.previous) - -#define EX_TABLE(_fault, _target) \ - __EX_TABLE(__ex_table, _fault, _target) -#define EX_TABLE_AMODE31(_fault, _target) \ - __EX_TABLE(.amode31.ex_table, _fault, _target) - #endif diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index e12ff0f29d1a..82aae78e1315 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -4,6 +4,7 @@ #include #include +#include typedef struct { spinlock_t lock; diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index f14f4ade15a9..2c029ee89b7c 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -13,6 +13,7 @@ /* * User space memory access functions */ +#include #include #include #include diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index 76a656b2146f..a778714e4d8b 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 3dae0c04d33b..9d151808d03e 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 1f6df6d4a914..3781de26f207 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -10,6 +10,7 @@ #include #include +#include #include #include #include diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 6083090be1f4..56e5e3712fbb 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index a93142785bbc..28ae7df26c4a 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index ef3f2659876c..b5e364358ce4 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/text_amode31.S b/arch/s390/kernel/text_amode31.S index 868e4a604110..2c8b14cc5556 100644 --- a/arch/s390/kernel/text_amode31.S +++ b/arch/s390/kernel/text_amode31.S @@ -6,6 +6,7 @@ */ #include +#include #include #include diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index c2b1ffe88acf..309cb0503feb 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include "entry.h" diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index 8a5d21461889..f846ebe038fa 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 1336541103e7..adaf5f5c9a45 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 4cc5020f4e18..421efa46946b 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index 18a6381097a9..d5ea09d78938 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index df5d4da06643..f884d1b9ca79 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index be077b39da33..63f3e057c168 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index 4dd58b196cea..1710d006ee93 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index c5b35ea129cf..080c88620723 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index db5987281010..e9edf3b6ed7c 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -19,7 +19,7 @@ #include #include #include - +#include #include #include #include diff --git a/drivers/s390/char/diag_ftp.c b/drivers/s390/char/diag_ftp.c index 6bf1058de873..36bbd6b6e210 100644 --- a/drivers/s390/char/diag_ftp.c +++ b/drivers/s390/char/diag_ftp.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h index 8a30e77db469..86dd2cde0f78 100644 --- a/drivers/s390/char/sclp.h +++ b/drivers/s390/char/sclp.h @@ -11,6 +11,7 @@ #include #include +#include #include #include diff --git a/drivers/s390/cio/ioasm.c b/drivers/s390/cio/ioasm.c index 180913007824..acf1edd36549 100644 --- a/drivers/s390/cio/ioasm.c +++ b/drivers/s390/cio/ioasm.c @@ -5,6 +5,7 @@ #include +#include #include #include #include From 0741ec112ca67ea95dc8158a7bc7547ae36cbecc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 28 Feb 2022 11:37:52 +0100 Subject: [PATCH 285/380] s390/extable: move extable related functions to mm/extable.c Just like arm64, riscv, and x86 move extable related functions to mm/extable.c. This is currently only one function, but this will change with subsequent changes. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/Makefile | 2 +- arch/s390/mm/extable.c | 16 ++++++++++++++++ arch/s390/mm/fault.c | 12 ------------ 3 files changed, 17 insertions(+), 13 deletions(-) create mode 100644 arch/s390/mm/extable.c diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index cd67e94c16aa..57e4f3a24829 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -4,7 +4,7 @@ # obj-y := init.o fault.o extmem.o mmap.o vmem.o maccess.o -obj-y += page-states.o pageattr.o pgtable.o pgalloc.o +obj-y += page-states.o pageattr.o pgtable.o pgalloc.o extable.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c new file mode 100644 index 000000000000..a4eb3d8aae7b --- /dev/null +++ b/arch/s390/mm/extable.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +const struct exception_table_entry *s390_search_extables(unsigned long addr) +{ + const struct exception_table_entry *fixup; + size_t num; + + fixup = search_exception_tables(addr); + if (fixup) + return fixup; + num = __stop_amode31_ex_table - __start_amode31_ex_table; + return search_extable(__start_amode31_ex_table, num, addr); +} diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index adaf5f5c9a45..caa4ab0ff80a 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -228,18 +228,6 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code) (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK)); } -const struct exception_table_entry *s390_search_extables(unsigned long addr) -{ - const struct exception_table_entry *fixup; - size_t num; - - fixup = search_exception_tables(addr); - if (fixup) - return fixup; - num = __stop_amode31_ex_table - __start_amode31_ex_table; - return search_extable(__start_amode31_ex_table, num, addr); -} - static noinline void do_no_context(struct pt_regs *regs) { const struct exception_table_entry *fixup; From cfa45c5e0d36b87f99e76f1060526eac032dd624 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 28 Feb 2022 13:31:33 +0100 Subject: [PATCH 286/380] s390/base: pass pt_regs to early program check handler Pass pt_regs to early program check handler like it is done for every other interrupt and exception handler. Also the passed pt_regs can be changed by the called function and the changes register contents and psw contents will be taken into account when returning. In addition the return psw will not be copied to the program check old psw in lowcore, but to the usual return psw location, like it is also done by the regular program check handler. This allows also to get rid of the code that disabled lowcore protection when changing the return address. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/processor.h | 2 +- arch/s390/kernel/base.S | 22 ++++++++++++++++------ arch/s390/kernel/early.c | 14 +++----------- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 8fd9772c7370..022cf0925e56 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -313,7 +313,7 @@ static __always_inline void __noreturn disabled_wait(void) * Basic Program Check Handler. */ extern void s390_base_pgm_handler(void); -extern void (*s390_base_pgm_handler_fn)(void); +extern void (*s390_base_pgm_handler_fn)(struct pt_regs *regs); #define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S index f7fe4033df36..172c23c8ca00 100644 --- a/arch/s390/kernel/base.S +++ b/arch/s390/kernel/base.S @@ -14,18 +14,28 @@ GEN_BR_THUNK %r9 GEN_BR_THUNK %r14 +__PT_R0 = __PT_GPRS +__PT_R8 = __PT_GPRS + 64 + ENTRY(s390_base_pgm_handler) - stmg %r0,%r15,__LC_SAVE_AREA_SYNC - basr %r13,0 -0: aghi %r15,-STACK_FRAME_OVERHEAD + stmg %r8,%r15,__LC_SAVE_AREA_SYNC + aghi %r15,-(STACK_FRAME_OVERHEAD+__PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_PSW(16,%r11),__LC_PGM_OLD_PSW + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC + lgr %r2,%r11 larl %r1,s390_base_pgm_handler_fn lg %r9,0(%r1) ltgr %r9,%r9 jz 1f BASR_EX %r14,%r9 - lmg %r0,%r15,__LC_SAVE_AREA_SYNC - lpswe __LC_PGM_OLD_PSW -1: lpswe disabled_wait_psw-0b(%r13) + mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) + lpswe __LC_RETURN_PSW +1: larl %r13,disabled_wait_psw + lpswe 0(%r13) ENDPROC(s390_base_pgm_handler) .align 8 diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 9d151808d03e..5715d1aab173 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -149,22 +149,14 @@ static __init void setup_topology(void) topology_max_mnest = max_mnest; } -static void early_pgm_check_handler(void) +static void early_pgm_check_handler(struct pt_regs *regs) { const struct exception_table_entry *fixup; - unsigned long cr0, cr0_new; - unsigned long addr; - addr = S390_lowcore.program_old_psw.addr; - fixup = s390_search_extables(addr); + fixup = s390_search_extables(regs->psw.addr); if (!fixup) disabled_wait(); - /* Disable low address protection before storing into lowcore. */ - __ctl_store(cr0, 0, 0); - cr0_new = cr0 & ~(1UL << 28); - __ctl_load(cr0_new, 0, 0); - S390_lowcore.program_old_psw.addr = extable_fixup(fixup); - __ctl_load(cr0, 0, 0); + regs->psw.addr = extable_fixup(fixup); } static noinline __init void setup_lowcore_early(void) From 46fee16f571250d6cef74af73ffd47512da981a2 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 28 Feb 2022 14:29:25 +0100 Subject: [PATCH 287/380] s390/extable: add and use fixup_exception helper function Add and use fixup_exception helper function in order to remove the duplicated exception handler fixup code at several places. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/extable.h | 13 ++----------- arch/s390/kernel/early.c | 6 +----- arch/s390/kernel/kprobes.c | 5 +---- arch/s390/kernel/traps.c | 10 ++-------- arch/s390/mm/extable.c | 15 +++++++++++++++ arch/s390/mm/fault.c | 7 +------ 6 files changed, 22 insertions(+), 34 deletions(-) diff --git a/arch/s390/include/asm/extable.h b/arch/s390/include/asm/extable.h index 8511f0e59290..d39d7159832a 100644 --- a/arch/s390/include/asm/extable.h +++ b/arch/s390/include/asm/extable.h @@ -49,17 +49,6 @@ ex_fixup_handler(const struct exception_table_entry *x) return (ex_handler_t)((unsigned long)&x->handler + x->handler); } -static inline bool ex_handle(const struct exception_table_entry *x, - struct pt_regs *regs) -{ - ex_handler_t handler = ex_fixup_handler(x); - - if (unlikely(handler)) - return handler(x, regs); - regs->psw.addr = extable_fixup(x); - return true; -} - #define ARCH_HAS_RELATIVE_EXTABLE static inline void swap_ex_entry_fixup(struct exception_table_entry *a, @@ -78,4 +67,6 @@ static inline void swap_ex_entry_fixup(struct exception_table_entry *a, } #define swap_ex_entry_fixup swap_ex_entry_fixup +bool fixup_exception(struct pt_regs *regs); + #endif diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 5715d1aab173..08cc86a0db90 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -151,12 +151,8 @@ static __init void setup_topology(void) static void early_pgm_check_handler(struct pt_regs *regs) { - const struct exception_table_entry *fixup; - - fixup = s390_search_extables(regs->psw.addr); - if (!fixup) + if (!fixup_exception(regs)) disabled_wait(); - regs->psw.addr = extable_fixup(fixup); } static noinline __init void setup_lowcore_early(void) diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index e27a7d3b0364..7e2910e4172b 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -465,7 +465,6 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); struct kprobe *p = kprobe_running(); - const struct exception_table_entry *entry; switch(kcb->kprobe_status) { case KPROBE_HIT_SS: @@ -487,10 +486,8 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr) * In case the user-specified fault handler returned * zero, try to fix up. */ - entry = s390_search_extables(regs->psw.addr); - if (entry && ex_handle(entry, regs)) + if (fixup_exception(regs)) return 1; - /* * fixup_exception() could not handle it, * Let do_page_fault() fix it. diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 309cb0503feb..7f0fadd10d68 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -54,9 +54,7 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) force_sig_fault(si_signo, si_code, get_trap_ip(regs)); report_user_fault(regs, si_signo, 0); } else { - const struct exception_table_entry *fixup; - fixup = s390_search_extables(regs->psw.addr); - if (!fixup || !ex_handle(fixup, regs)) + if (!fixup_exception(regs)) die(regs, str); } } @@ -245,16 +243,12 @@ static void space_switch_exception(struct pt_regs *regs) static void monitor_event_exception(struct pt_regs *regs) { - const struct exception_table_entry *fixup; - if (user_mode(regs)) return; switch (report_bug(regs->psw.addr - (regs->int_code >> 16), regs)) { case BUG_TRAP_TYPE_NONE: - fixup = s390_search_extables(regs->psw.addr); - if (fixup) - ex_handle(fixup, regs); + fixup_exception(regs); break; case BUG_TRAP_TYPE_WARN: break; diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c index a4eb3d8aae7b..d6ca75570dcf 100644 --- a/arch/s390/mm/extable.c +++ b/arch/s390/mm/extable.c @@ -14,3 +14,18 @@ const struct exception_table_entry *s390_search_extables(unsigned long addr) num = __stop_amode31_ex_table - __start_amode31_ex_table; return search_extable(__start_amode31_ex_table, num, addr); } + +bool fixup_exception(struct pt_regs *regs) +{ + const struct exception_table_entry *ex; + ex_handler_t handler; + + ex = s390_search_extables(instruction_pointer(regs)); + if (!ex) + return false; + handler = ex_fixup_handler(ex); + if (unlikely(handler)) + return handler(ex, regs); + regs->psw.addr = extable_fixup(ex); + return true; +} diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index caa4ab0ff80a..e173b6187ad5 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -230,13 +230,8 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code) static noinline void do_no_context(struct pt_regs *regs) { - const struct exception_table_entry *fixup; - - /* Are we prepared to handle this kernel fault? */ - fixup = s390_search_extables(regs->psw.addr); - if (fixup && ex_handle(fixup, regs)) + if (fixup_exception(regs)) return; - /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. From 3d66718cd62d45f3210f047248eab9e76d227e47 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 28 Feb 2022 14:52:42 +0100 Subject: [PATCH 288/380] s390/extable: convert to relative table with data Follow arm64, riscv, and x86 and change extable layout to common "relative table with data". This allows to get rid of s390 specific code in sorttable.c. The main difference to before is that extable entries do not contain a relative function pointer anymore. Instead data and type fields are added. The type field is used to indicate which exception handler needs to be called, while the data field is currently unused. Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/asm-extable.h | 15 ++++++---- arch/s390/include/asm/extable.h | 36 ++++++++++++------------ arch/s390/mm/extable.c | 21 ++++++++++---- arch/s390/net/bpf_jit_comp.c | 5 ++-- scripts/sorttable.c | 43 +---------------------------- 5 files changed, 46 insertions(+), 74 deletions(-) diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h index 620390f17f0c..61484a5f1209 100644 --- a/arch/s390/include/asm/asm-extable.h +++ b/arch/s390/include/asm/asm-extable.h @@ -5,17 +5,22 @@ #include #include -#define __EX_TABLE(_section, _fault, _target) \ +#define EX_TYPE_NONE 0 +#define EX_TYPE_FIXUP 1 +#define EX_TYPE_BPF 2 + +#define __EX_TABLE(_section, _fault, _target, _type) \ stringify_in_c(.section _section,"a";) \ - stringify_in_c(.align 8;) \ + stringify_in_c(.align 4;) \ stringify_in_c(.long (_fault) - .;) \ stringify_in_c(.long (_target) - .;) \ - stringify_in_c(.quad 0;) \ + stringify_in_c(.short (_type);) \ + stringify_in_c(.short 0;) \ stringify_in_c(.previous) #define EX_TABLE(_fault, _target) \ - __EX_TABLE(__ex_table, _fault, _target) + __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_FIXUP) #define EX_TABLE_AMODE31(_fault, _target) \ - __EX_TABLE(.amode31.ex_table, _fault, _target) + __EX_TABLE(.amode31.ex_table, _fault, _target, EX_TYPE_FIXUP) #endif /* __ASM_EXTABLE_H */ diff --git a/arch/s390/include/asm/extable.h b/arch/s390/include/asm/extable.h index d39d7159832a..af6ba52743e9 100644 --- a/arch/s390/include/asm/extable.h +++ b/arch/s390/include/asm/extable.h @@ -25,7 +25,7 @@ struct exception_table_entry { int insn, fixup; - long handler; + short type, data; }; extern struct exception_table_entry *__start_amode31_ex_table; @@ -38,17 +38,6 @@ static inline unsigned long extable_fixup(const struct exception_table_entry *x) return (unsigned long)&x->fixup + x->fixup; } -typedef bool (*ex_handler_t)(const struct exception_table_entry *, - struct pt_regs *); - -static inline ex_handler_t -ex_fixup_handler(const struct exception_table_entry *x) -{ - if (likely(!x->handler)) - return NULL; - return (ex_handler_t)((unsigned long)&x->handler + x->handler); -} - #define ARCH_HAS_RELATIVE_EXTABLE static inline void swap_ex_entry_fixup(struct exception_table_entry *a, @@ -58,15 +47,26 @@ static inline void swap_ex_entry_fixup(struct exception_table_entry *a, { a->fixup = b->fixup + delta; b->fixup = tmp.fixup - delta; - a->handler = b->handler; - if (a->handler) - a->handler += delta; - b->handler = tmp.handler; - if (b->handler) - b->handler -= delta; + a->type = b->type; + b->type = tmp.type; + a->data = b->data; + b->data = tmp.data; } #define swap_ex_entry_fixup swap_ex_entry_fixup +#ifdef CONFIG_BPF_JIT + +bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs); + +#else /* !CONFIG_BPF_JIT */ + +static inline bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs) +{ + return false; +} + +#endif /* CONFIG_BPF_JIT */ + bool fixup_exception(struct pt_regs *regs); #endif diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c index d6ca75570dcf..ac6b736ac883 100644 --- a/arch/s390/mm/extable.c +++ b/arch/s390/mm/extable.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include +#include #include const struct exception_table_entry *s390_search_extables(unsigned long addr) @@ -15,17 +17,24 @@ const struct exception_table_entry *s390_search_extables(unsigned long addr) return search_extable(__start_amode31_ex_table, num, addr); } +static bool ex_handler_fixup(const struct exception_table_entry *ex, struct pt_regs *regs) +{ + regs->psw.addr = extable_fixup(ex); + return true; +} + bool fixup_exception(struct pt_regs *regs) { const struct exception_table_entry *ex; - ex_handler_t handler; ex = s390_search_extables(instruction_pointer(regs)); if (!ex) return false; - handler = ex_fixup_handler(ex); - if (unlikely(handler)) - return handler(ex, regs); - regs->psw.addr = extable_fixup(ex); - return true; + switch (ex->type) { + case EX_TYPE_FIXUP: + return ex_handler_fixup(ex, regs); + case EX_TYPE_BPF: + return ex_handler_bpf(ex, regs); + } + panic("invalid exception table entry"); } diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index f884d1b9ca79..a1a3a10c514c 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -622,8 +622,7 @@ static int get_probe_mem_regno(const u8 *insn) return insn[1] >> 4; } -static bool ex_handler_bpf(const struct exception_table_entry *x, - struct pt_regs *regs) +bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs) { int regno; u8 *insn; @@ -678,7 +677,7 @@ static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp, /* JIT bug - landing pad and extable must be close. */ return -1; ex->fixup = delta; - ex->handler = (u8 *)ex_handler_bpf - (u8 *)&ex->handler; + ex->type = EX_TYPE_BPF; jit->excnt++; } return 0; diff --git a/scripts/sorttable.c b/scripts/sorttable.c index 3a8ea5ed553d..d00504c5f530 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -261,45 +261,6 @@ static void sort_relative_table_with_data(char *extab_image, int image_size) } } -static void s390_sort_relative_table(char *extab_image, int image_size) -{ - int i; - - for (i = 0; i < image_size; i += 16) { - char *loc = extab_image + i; - uint64_t handler; - - w(r((uint32_t *)loc) + i, (uint32_t *)loc); - w(r((uint32_t *)(loc + 4)) + (i + 4), (uint32_t *)(loc + 4)); - /* - * 0 is a special self-relative handler value, which means that - * handler should be ignored. It is safe, because it means that - * handler field points to itself, which should never happen. - * When creating extable-relative values, keep it as 0, since - * this should never occur either: it would mean that handler - * field points to the first extable entry. - */ - handler = r8((uint64_t *)(loc + 8)); - if (handler) - handler += i + 8; - w8(handler, (uint64_t *)(loc + 8)); - } - - qsort(extab_image, image_size / 16, 16, compare_relative_table); - - for (i = 0; i < image_size; i += 16) { - char *loc = extab_image + i; - uint64_t handler; - - w(r((uint32_t *)loc) - i, (uint32_t *)loc); - w(r((uint32_t *)(loc + 4)) - (i + 4), (uint32_t *)(loc + 4)); - handler = r8((uint64_t *)(loc + 8)); - if (handler) - handler -= i + 8; - w8(handler, (uint64_t *)(loc + 8)); - } -} - static int do_file(char const *const fname, void *addr) { int rc = -1; @@ -340,12 +301,10 @@ static int do_file(char const *const fname, void *addr) case EM_386: case EM_AARCH64: case EM_RISCV: + case EM_S390: case EM_X86_64: custom_sort = sort_relative_table_with_data; break; - case EM_S390: - custom_sort = s390_sort_relative_table; - break; case EM_PARISC: case EM_PPC: case EM_PPC64: From 484a8ed8b7d145ff38889e4598a4804e9d7e8ca6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 28 Feb 2022 15:02:46 +0100 Subject: [PATCH 289/380] s390/extable: add dedicated uaccess handler This is more or less a combination of commit 2e77a62cb3a6 ("arm64: extable: add a dedicated uaccess handler") and commit 4b5305decc84 ("x86/extable: Extend extable functionality"). To describe the problem that needs to solved let's cite the full arm64 commit message: ------ For inline assembly, we place exception fixups out-of-line in the `.fixup` section such that these are out of the way of the fast path. This has a few drawbacks: * Since the fixup code is anonymous, backtraces will symbolize fixups as offsets from the nearest prior symbol, currently `__entry_tramp_text_end`. This is confusing, and painful to debug without access to the relevant vmlinux. * Since the exception handler adjusts the PC to execute the fixup, and the fixup uses a direct branch back into the function it fixes, backtraces of fixups miss the original function. This is confusing, and violates requirements for RELIABLE_STACKTRACE (and therefore LIVEPATCH). * Inline assembly and associated fixups are generated from templates, and we have many copies of logically identical fixups which only differ in which specific registers are written to and which address is branched to at the end of the fixup. This is potentially wasteful of I-cache resources, and makes it hard to add additional logic to fixups without significant bloat. This patch address all three concerns for inline uaccess fixups by adding a dedicated exception handler which updates registers in exception context and subsequent returns back into the function which faulted, removing the need for fixups specialized to each faulting instruction. Other than backtracing, there should be no functional change as a result of this patch. ------ Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/asm-extable.h | 27 +++++++++++++++++++++++++++ arch/s390/include/asm/uaccess.h | 24 ++++++------------------ arch/s390/mm/extable.c | 10 ++++++++++ 3 files changed, 43 insertions(+), 18 deletions(-) diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h index 61484a5f1209..fb62df5e16a2 100644 --- a/arch/s390/include/asm/asm-extable.h +++ b/arch/s390/include/asm/asm-extable.h @@ -8,6 +8,7 @@ #define EX_TYPE_NONE 0 #define EX_TYPE_FIXUP 1 #define EX_TYPE_BPF 2 +#define EX_TYPE_UACCESS 3 #define __EX_TABLE(_section, _fault, _target, _type) \ stringify_in_c(.section _section,"a";) \ @@ -18,9 +19,35 @@ stringify_in_c(.short 0;) \ stringify_in_c(.previous) +#define __EX_TABLE_UA(_section, _fault, _target, _type, _reg) \ + stringify_in_c(.section _section,"a";) \ + stringify_in_c(.align 4;) \ + stringify_in_c(.long (_fault) - .;) \ + stringify_in_c(.long (_target) - .;) \ + stringify_in_c(.short (_type);) \ + stringify_in_c(.macro extable_reg reg;) \ + stringify_in_c(.set found, 0;) \ + stringify_in_c(.set regnr, 0;) \ + stringify_in_c(.irp rs,r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,r13,r14,r15;) \ + stringify_in_c(.ifc "\reg", "%%\rs";) \ + stringify_in_c(.set found, 1;) \ + stringify_in_c(.short regnr;) \ + stringify_in_c(.endif;) \ + stringify_in_c(.set regnr, regnr+1;) \ + stringify_in_c(.endr;) \ + stringify_in_c(.ifne (found != 1);) \ + stringify_in_c(.error "extable_reg: bad register argument";) \ + stringify_in_c(.endif;) \ + stringify_in_c(.endm;) \ + stringify_in_c(extable_reg _reg;) \ + stringify_in_c(.purgem extable_reg;) \ + stringify_in_c(.previous) + #define EX_TABLE(_fault, _target) \ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_FIXUP) #define EX_TABLE_AMODE31(_fault, _target) \ __EX_TABLE(.amode31.ex_table, _fault, _target, EX_TYPE_FIXUP) +#define EX_TABLE_UA(_fault, _target, _reg) \ + __EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UACCESS, _reg) #endif /* __ASM_EXTABLE_H */ diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 2c029ee89b7c..44b18800721a 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -81,14 +81,10 @@ union oac { "0: mvcos %[_to],%[_from],%[_size]\n" \ "1: xr %[rc],%[rc]\n" \ "2:\n" \ - ".pushsection .fixup, \"ax\"\n" \ - "3: lhi %[rc],%[retval]\n" \ - " jg 2b\n" \ - ".popsection\n" \ - EX_TABLE(0b,3b) EX_TABLE(1b,3b) \ + EX_TABLE_UA(0b,2b,%[rc]) EX_TABLE_UA(1b,2b,%[rc]) \ : [rc] "=&d" (__rc), [_to] "+Q" (*(to)) \ : [_size] "d" (size), [_from] "Q" (*(from)), \ - [retval] "K" (-EFAULT), [spec] "d" (oac_spec.val) \ + [spec] "d" (oac_spec.val) \ : "cc", "0"); \ __rc; \ }) @@ -295,13 +291,9 @@ int __noreturn __put_kernel_bad(void); "0: " insn " %2,%1\n" \ "1: xr %0,%0\n" \ "2:\n" \ - ".pushsection .fixup, \"ax\"\n" \ - "3: lhi %0,%3\n" \ - " jg 2b\n" \ - ".popsection\n" \ - EX_TABLE(0b,3b) EX_TABLE(1b,3b) \ + EX_TABLE_UA(0b,2b,%0) EX_TABLE_UA(1b,2b,%0) \ : "=d" (__rc), "+Q" (*(to)) \ - : "d" (val), "K" (-EFAULT) \ + : "d" (val) \ : "cc"); \ __rc; \ }) @@ -342,13 +334,9 @@ int __noreturn __get_kernel_bad(void); "0: " insn " %1,%2\n" \ "1: xr %0,%0\n" \ "2:\n" \ - ".pushsection .fixup, \"ax\"\n" \ - "3: lhi %0,%3\n" \ - " jg 2b\n" \ - ".popsection\n" \ - EX_TABLE(0b,3b) EX_TABLE(1b,3b) \ + EX_TABLE_UA(0b,2b,%0) EX_TABLE_UA(1b,2b,%0) \ : "=d" (__rc), "+d" (val) \ - : "Q" (*(from)), "K" (-EFAULT) \ + : "Q" (*(from)) \ : "cc"); \ __rc; \ }) diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c index ac6b736ac883..8ac8ad2474a0 100644 --- a/arch/s390/mm/extable.c +++ b/arch/s390/mm/extable.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include #include @@ -23,6 +24,13 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex, struct pt_r return true; } +static bool ex_handler_uaccess(const struct exception_table_entry *ex, struct pt_regs *regs) +{ + regs->gprs[ex->data] = -EFAULT; + regs->psw.addr = extable_fixup(ex); + return true; +} + bool fixup_exception(struct pt_regs *regs) { const struct exception_table_entry *ex; @@ -35,6 +43,8 @@ bool fixup_exception(struct pt_regs *regs) return ex_handler_fixup(ex, regs); case EX_TYPE_BPF: return ex_handler_bpf(ex, regs); + case EX_TYPE_UACCESS: + return ex_handler_uaccess(ex, regs); } panic("invalid exception table entry"); } From 7fc8c362e782042bf36ceeb9343f8217d3d7dbb9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 27 Feb 2022 21:32:54 +0100 Subject: [PATCH 290/380] s390/bpf: encode register within extable entry Instead of decoding the instruction that faulted to get the register which needs to be zeroed, simply encode its number into the extable entries during code generation. This allows to get rid of a bit of code, and is also what other architectures are doing. Acked-by: Alexander Gordeev Reviewed-by: Ilya Leoshkevich Tested-by: Ilya Leoshkevich Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/net/bpf_jit_comp.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index a1a3a10c514c..e1e57a30ac66 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -624,16 +624,8 @@ static int get_probe_mem_regno(const u8 *insn) bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs) { - int regno; - u8 *insn; - regs->psw.addr = extable_fixup(x); - insn = (u8 *)__rewind_psw(regs->psw, regs->int_code >> 16); - regno = get_probe_mem_regno(insn); - if (WARN_ON_ONCE(regno < 0)) - /* JIT bug - unexpected instruction. */ - return false; - regs->gprs[regno] = 0; + regs->gprs[x->data] = 0; return true; } @@ -641,16 +633,17 @@ static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp, int probe_prg, int nop_prg) { struct exception_table_entry *ex; + int reg, prg; s64 delta; u8 *insn; - int prg; int i; if (!fp->aux->extable) /* Do nothing during early JIT passes. */ return 0; insn = jit->prg_buf + probe_prg; - if (WARN_ON_ONCE(get_probe_mem_regno(insn) < 0)) + reg = get_probe_mem_regno(insn); + if (WARN_ON_ONCE(reg < 0)) /* JIT bug - unexpected probe instruction. */ return -1; if (WARN_ON_ONCE(probe_prg + insn_length(*insn) != nop_prg)) @@ -678,6 +671,7 @@ static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp, return -1; ex->fixup = delta; ex->type = EX_TYPE_BPF; + ex->data = reg; jit->excnt++; } return 0; From df5a95f481c4ef62199fb7e25263a035b4a337d2 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 28 Feb 2022 18:36:46 +0100 Subject: [PATCH 291/380] s390: remove .fixup section The only user is gone. Remove the section. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vmlinux.lds.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 42c43521878f..2e526f11b91e 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -49,7 +49,6 @@ SECTIONS SOFTIRQENTRY_TEXT FTRACE_HOTPATCH_TRAMPOLINES_TEXT *(.text.*_indirect_*) - *(.fixup) *(.gnu.warning) . = ALIGN(PAGE_SIZE); _etext = .; /* End of text section */ From 6b1ca46ab3aacc905eaaae0f52cb3311f76d7bd6 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 3 Mar 2022 16:38:34 +0100 Subject: [PATCH 292/380] s390/test_unwind: avoid build warning with W=1 Fix the following build warning with W=1 arch/s390/lib/test_unwind.c:172:21: warning: variable 'fops' set but not used [-Wunused-but-set-variable] struct ftrace_ops *fops; Reported-by: kernel test robot Signed-off-by: Vasily Gorbik --- arch/s390/lib/test_unwind.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index b209014ce426..07ef89eeb85a 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -210,19 +210,18 @@ static noinline int test_unwind_ftraced_func(struct unwindme *u) static int test_unwind_ftrace(struct unwindme *u) { - struct ftrace_ops *fops; int ret; +#ifdef CONFIG_DYNAMIC_FTRACE + struct ftrace_ops *fops; -#ifndef CONFIG_DYNAMIC_FTRACE - kunit_skip(current_test, "requires CONFIG_DYNAMIC_FTRACE"); - fops = NULL; /* used */ -#else fops = kunit_kzalloc(current_test, sizeof(*fops), GFP_KERNEL); fops->func = test_unwind_ftrace_handler; fops->flags = FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_RECURSION | FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_PERMANENT; +#else + kunit_skip(current_test, "requires CONFIG_DYNAMIC_FTRACE"); #endif ret = ftrace_set_filter_ip(fops, (unsigned long)test_unwind_ftraced_func, 0, 0); From 50b7c4688da9c3c9045c4db58e8a4ace58d28603 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 4 Mar 2022 15:14:06 +0100 Subject: [PATCH 293/380] s390/asm-offsets: remove unused defines Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/asm-offsets.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index a496b08ea5d1..c253d3faf443 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -50,9 +50,7 @@ int main(void) BLANK(); /* idle data offsets */ OFFSET(__CLOCK_IDLE_ENTER, s390_idle_data, clock_idle_enter); - OFFSET(__CLOCK_IDLE_EXIT, s390_idle_data, clock_idle_exit); OFFSET(__TIMER_IDLE_ENTER, s390_idle_data, timer_idle_enter); - OFFSET(__TIMER_IDLE_EXIT, s390_idle_data, timer_idle_exit); OFFSET(__MT_CYCLES_ENTER, s390_idle_data, mt_cycles_enter); BLANK(); /* hardware defined lowcore locations 0x000 - 0x1ff */ @@ -123,9 +121,6 @@ int main(void) OFFSET(__LC_USER_ASCE, lowcore, user_asce); OFFSET(__LC_LPP, lowcore, lpp); OFFSET(__LC_CURRENT_PID, lowcore, current_pid); - OFFSET(__LC_PERCPU_OFFSET, lowcore, percpu_offset); - OFFSET(__LC_MACHINE_FLAGS, lowcore, machine_flags); - OFFSET(__LC_PREEMPT_COUNT, lowcore, preempt_count); OFFSET(__LC_GMAP, lowcore, gmap); OFFSET(__LC_BR_R1, lowcore, br_r1_trampoline); OFFSET(__LC_LAST_BREAK, lowcore, last_break); From 0ecf337fa2e4a6d1f35d7fbb9efb7ca0069a1683 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 4 Mar 2022 15:15:33 +0100 Subject: [PATCH 294/380] s390/signal: fix typo in comments Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/compat_signal.c | 2 +- arch/s390/kernel/signal.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index cca142fbb516..eee1ad3e1b29 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -89,7 +89,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) _sigregs32 user_sregs; int i; - /* Alwys make any pending restarted system call return -EINTR */ + /* Always make any pending restarted system call return -EINTR */ current->restart_block.fn = do_no_restart_syscall; if (__copy_from_user(&user_sregs, &sregs->regs, sizeof(user_sregs))) diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 307f5d99514d..5ff8d915ec7a 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -141,7 +141,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) { _sigregs user_sregs; - /* Alwys make any pending restarted system call return -EINTR */ + /* Always make any pending restarted system call return -EINTR */ current->restart_block.fn = do_no_restart_syscall; if (__copy_from_user(&user_sregs, sregs, sizeof(user_sregs))) From 52b739e2780c7a15e5be06f1691d92ba5f962f79 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 6 Mar 2022 11:15:27 +0100 Subject: [PATCH 295/380] s390/traps: get rid of magic cast for program interruption code Add a proper union in lowcore to reflect architecture and get rid of a "magic" cast in order to read the full program interruption code. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/lowcore.h | 9 +++++++-- arch/s390/kernel/traps.c | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 1262f5003acf..28a2c6ba795e 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -34,8 +34,13 @@ struct lowcore { __u32 ext_int_code_addr; }; __u32 svc_int_code; /* 0x0088 */ - __u16 pgm_ilc; /* 0x008c */ - __u16 pgm_code; /* 0x008e */ + union { + struct { + __u16 pgm_ilc; /* 0x008c */ + __u16 pgm_code; /* 0x008e */ + }; + __u32 pgm_int_code; + }; __u32 data_exc_code; /* 0x0090 */ __u16 mon_class_num; /* 0x0094 */ __u8 per_code; /* 0x0096 */ diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 7f0fadd10d68..a3c94dfcbe16 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -297,7 +297,7 @@ void noinstr __do_pgm_check(struct pt_regs *regs) unsigned int trapnr; irqentry_state_t state; - regs->int_code = *(u32 *)&S390_lowcore.pgm_ilc; + regs->int_code = S390_lowcore.pgm_int_code; regs->int_parm_long = S390_lowcore.trans_exc_code; state = irqentry_enter(regs); From 998e78004fe4dd9219b039efe763e19d10d9a6f9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 6 Mar 2022 10:59:05 +0100 Subject: [PATCH 296/380] s390/traps: get rid of magic cast for per code Add a proper union in lowcore to reflect architecture and get rid of a "magic" cast in order to read the full per code. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/lowcore.h | 9 +++++++-- arch/s390/kernel/traps.c | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 28a2c6ba795e..9829d6b44a20 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -43,8 +43,13 @@ struct lowcore { }; __u32 data_exc_code; /* 0x0090 */ __u16 mon_class_num; /* 0x0094 */ - __u8 per_code; /* 0x0096 */ - __u8 per_atmid; /* 0x0097 */ + union { + struct { + __u8 per_code; /* 0x0096 */ + __u8 per_atmid; /* 0x0097 */ + }; + __u16 per_code_combined; + }; __u64 per_address; /* 0x0098 */ __u8 exc_access_id; /* 0x00a0 */ __u8 per_access_id; /* 0x00a1 */ diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index a3c94dfcbe16..674c65019434 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -322,7 +322,7 @@ void noinstr __do_pgm_check(struct pt_regs *regs) set_thread_flag(TIF_PER_TRAP); ev->address = S390_lowcore.per_address; - ev->cause = *(u16 *)&S390_lowcore.per_code; + ev->cause = S390_lowcore.per_code_combined; ev->paid = S390_lowcore.per_access_id; } else { /* PER event in kernel is kprobes */ From 7d8484c4151d94f3c1d257e65e0c1a2ec3c945e7 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 6 Mar 2022 11:11:05 +0100 Subject: [PATCH 297/380] s390/irq: use assignment instead of cast Change struct ext_code to contain a union which allows to simply assign the int_code instead of using a cast. In order to keep the patch small the anonymous union is embedded within the existing struct instead of changing the struct ext_code to a union. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/irq.h | 9 +++++++-- arch/s390/kernel/irq.c | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index 9f75d67b8c20..89902f754740 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -81,8 +81,13 @@ static __always_inline void inc_irq_stat(enum interruption_class irq) } struct ext_code { - unsigned short subcode; - unsigned short code; + union { + struct { + unsigned short subcode; + unsigned short code; + }; + unsigned int int_code; + }; }; typedef void (*ext_int_handler_t)(struct ext_code, unsigned int, unsigned long); diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index cb7099682340..3033f616e256 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -342,7 +342,7 @@ static irqreturn_t do_ext_interrupt(int irq, void *dummy) struct ext_int_info *p; int index; - ext_code = *(struct ext_code *) ®s->int_code; + ext_code.int_code = regs->int_code; if (ext_code.code != EXT_IRQ_CLK_COMP) set_cpu_flag(CIF_NOHZ_DELAY); From 2268169c14e5f00377512932cf2b7bc78e4577ad Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 24 Feb 2022 16:43:23 +0100 Subject: [PATCH 298/380] s390: remove unused expoline to BC instructions This reverts commit 6deaa3bbca80 ("s390: extend expoline to BC instructions"). Expolines to BC instructions were added to be utilized by commit de5cb6eb514e ("s390: use expoline thunks in the BPF JIT"). But corresponding code has been removed by commit e1cf4befa297 ("bpf, s390x: remove ld_abs/ld_ind"). And compiler does not generate such expolines as well. Compared to regular expolines, expolines to BC instructions contain displacement and all possible variations cannot be generated in advance, making kpatch support more complicated. So, remove those to avoid future usages. Acked-by: Heiko Carstens Acked-by: Sumanth Korikkar Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/nospec-insn.h | 57 ----------------------------- arch/s390/kernel/nospec-branch.c | 25 +++---------- 2 files changed, 5 insertions(+), 77 deletions(-) diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h index 0033dcd663b1..c419c9e87265 100644 --- a/arch/s390/include/asm/nospec-insn.h +++ b/arch/s390/include/asm/nospec-insn.h @@ -35,18 +35,10 @@ _LC_BR_R1 = __LC_BR_R1 __THUNK_PROLOG_NAME __s390_indirect_jump_r\r2\()use_r\r1 .endm - .macro __THUNK_PROLOG_BC d0,r1,r2 - __THUNK_PROLOG_NAME __s390_indirect_branch_\d0\()_\r2\()use_\r1 - .endm - .macro __THUNK_BR r1,r2 jg __s390_indirect_jump_r\r2\()use_r\r1 .endm - .macro __THUNK_BC d0,r1,r2 - jg __s390_indirect_branch_\d0\()_\r2\()use_\r1 - .endm - .macro __THUNK_BRASL r1,r2,r3 brasl \r1,__s390_indirect_jump_r\r3\()use_r\r2 .endm @@ -89,23 +81,6 @@ _LC_BR_R1 = __LC_BR_R1 .endif .endm - .macro __DECODE_DRR expand,disp,reg,ruse - .set __decode_fail,1 - .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 - .ifc \reg,%r\r1 - .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 - .ifc \ruse,%r\r2 - \expand \disp,\r1,\r2 - .set __decode_fail,0 - .endif - .endr - .endif - .endr - .if __decode_fail == 1 - .error "__DECODE_DRR failed" - .endif - .endm - .macro __THUNK_EX_BR reg,ruse # Be very careful when adding instructions to this macro! # The ALTERNATIVE replacement code has a .+10 which targets @@ -126,42 +101,17 @@ _LC_BR_R1 = __LC_BR_R1 555: br \reg .endm - .macro __THUNK_EX_BC disp,reg,ruse -#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES - exrl 0,556f - j . -#else - larl \ruse,556f - ex 0,0(\ruse) - j . -#endif -556: b \disp(\reg) - .endm - .macro GEN_BR_THUNK reg,ruse=%r1 __DECODE_RR __THUNK_PROLOG_BR,\reg,\ruse __THUNK_EX_BR \reg,\ruse __THUNK_EPILOG .endm - .macro GEN_B_THUNK disp,reg,ruse=%r1 - __DECODE_DRR __THUNK_PROLOG_BC,\disp,\reg,\ruse - __THUNK_EX_BC \disp,\reg,\ruse - __THUNK_EPILOG - .endm - .macro BR_EX reg,ruse=%r1 557: __DECODE_RR __THUNK_BR,\reg,\ruse .pushsection .s390_indirect_branches,"a",@progbits .long 557b-. .popsection - .endm - - .macro B_EX disp,reg,ruse=%r1 -558: __DECODE_DRR __THUNK_BC,\disp,\reg,\ruse - .pushsection .s390_indirect_branches,"a",@progbits - .long 558b-. - .popsection .endm .macro BASR_EX rsave,rtarget,ruse=%r1 @@ -173,17 +123,10 @@ _LC_BR_R1 = __LC_BR_R1 #else .macro GEN_BR_THUNK reg,ruse=%r1 - .endm - - .macro GEN_B_THUNK disp,reg,ruse=%r1 .endm .macro BR_EX reg,ruse=%r1 br \reg - .endm - - .macro B_EX disp,reg,ruse=%r1 - b \disp(\reg) .endm .macro BASR_EX rsave,rtarget,ruse=%r1 diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 60e6fec27bba..c302e0a7d38f 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -105,6 +105,7 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end) s32 *epo; /* Second part of the instruction replace is always a nop */ + memcpy(insnbuf + 2, branch, sizeof(branch)); for (epo = start; epo < end; epo++) { instr = (u8 *) epo + *epo; if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x04) @@ -125,34 +126,18 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end) br = thunk + (*(int *)(thunk + 2)) * 2; else continue; - /* Check for unconditional branch 0x07f? or 0x47f???? */ - if ((br[0] & 0xbf) != 0x07 || (br[1] & 0xf0) != 0xf0) + if (br[0] != 0x07 || (br[1] & 0xf0) != 0xf0) continue; - - memcpy(insnbuf + 2, branch, sizeof(branch)); switch (type) { case BRCL_EXPOLINE: + /* brcl to thunk, replace with br + nop */ insnbuf[0] = br[0]; insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f); - if (br[0] == 0x47) { - /* brcl to b, replace with bc + nopr */ - insnbuf[2] = br[2]; - insnbuf[3] = br[3]; - } else { - /* brcl to br, replace with bcr + nop */ - } break; case BRASL_EXPOLINE: + /* brasl to thunk, replace with basr + nop */ + insnbuf[0] = 0x0d; insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f); - if (br[0] == 0x47) { - /* brasl to b, replace with bas + nopr */ - insnbuf[0] = 0x4d; - insnbuf[2] = br[2]; - insnbuf[3] = br[3]; - } else { - /* brasl to br, replace with basr + nop */ - insnbuf[0] = 0x0d; - } break; } From 76222808fc253cb9ea66c3e0e8d1946933f25b70 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 4 Mar 2022 18:04:05 +0100 Subject: [PATCH 299/380] powerpc: Move C prototypes out of asm-prototypes.h We originally added asm-prototypes.h in commit 42f5b4cacd78 ("powerpc: Introduce asm-prototypes.h"). It's purpose was for prototypes of C functions that are only called from asm, in order to fix sparse warnings about missing prototypes. A few months later Nick added a different use case in commit 4efca4ed05cb ("kbuild: modversions for EXPORT_SYMBOL() for asm") for C prototypes for exported asm functions. This is basically the inverse of our original usage. Since then we've added various prototypes to asm-prototypes.h for both reasons, meaning we now need to unstitch it all. Dispatch prototypes of C functions into relevant headers and keep only the prototypes for functions defined in assembly. For the time being, leave prom_init() there because moving it into asm/prom.h or asm/setup.h conflicts with drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowrom.o This will be fixed later by untaggling asm/pci.h and asm/prom.h or by renaming the function in shadowrom.c Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/62d46904eca74042097acf4cb12c175e3067f3d1.1646413435.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/asm-prototypes.h | 49 ------------------- arch/powerpc/include/asm/ftrace.h | 3 ++ arch/powerpc/include/asm/hvcall.h | 5 ++ arch/powerpc/include/asm/interrupt.h | 11 +++++ arch/powerpc/include/asm/kexec.h | 2 + arch/powerpc/include/asm/processor.h | 8 +++ arch/powerpc/include/asm/setup.h | 7 +++ arch/powerpc/include/asm/smp.h | 3 ++ arch/powerpc/include/asm/syscalls.h | 4 ++ arch/powerpc/kernel/early_32.c | 1 - arch/powerpc/kernel/interrupt.c | 1 - arch/powerpc/kernel/irq.c | 1 - arch/powerpc/kernel/mce.c | 1 - arch/powerpc/kernel/ptrace/ptrace.c | 1 - arch/powerpc/kernel/setup_64.c | 1 - arch/powerpc/kernel/smp.c | 1 - arch/powerpc/kernel/syscalls.c | 1 - arch/powerpc/kernel/tau_6xx.c | 1 - arch/powerpc/kernel/time.c | 1 - arch/powerpc/kernel/trace/ftrace.c | 1 - arch/powerpc/kexec/core_64.c | 1 - arch/powerpc/kvm/book3s_hv_builtin.c | 1 - arch/powerpc/kvm/book3s_hv_rm_xive.c | 1 - arch/powerpc/lib/vmx-helper.c | 1 - arch/powerpc/mm/book3s64/slb.c | 1 - arch/powerpc/mm/fault.c | 1 - arch/powerpc/platforms/powernv/idle.c | 1 - .../platforms/powernv/opal-tracepoints.c | 1 - arch/powerpc/platforms/pseries/lpar.c | 1 - 29 files changed, 43 insertions(+), 69 deletions(-) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index eede369ba94f..d995c65d18ab 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -19,19 +19,6 @@ #include -/* SMP */ -extern struct task_struct *secondary_current; -void start_secondary(void *unused); - -/* kexec */ -struct kimage; -void kexec_copy_flush(struct kimage *image); - -/* pseries hcall tracing */ -extern struct static_key hcall_tracepoint_key; -void __trace_hcall_entry(unsigned long opcode, unsigned long *args); -void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf); - /* Ultravisor */ #if defined(CONFIG_PPC_POWERNV) || defined(CONFIG_PPC_SVM) long ucall_norets(unsigned long opcode, ...); @@ -47,43 +34,12 @@ int64_t __opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3, int64_t a4, int64_t a5, int64_t a6, int64_t a7, int64_t opcode, uint64_t msr); -/* VMX copying */ -int enter_vmx_usercopy(void); -int exit_vmx_usercopy(void); -int enter_vmx_ops(void); -void *exit_vmx_ops(void *dest); - -/* signals, syscalls and interrupts */ -#ifdef CONFIG_PPC32 -int -ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, - struct __kernel_old_timeval __user *tvp); -unsigned long __init early_init(unsigned long dt_ptr); -void __init machine_init(u64 dt_ptr); -#endif -long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, unsigned long r0, struct pt_regs *regs); -notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv); -notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs); -notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs); -#ifdef CONFIG_PPC64 -unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs); -unsigned long interrupt_exit_user_restart(struct pt_regs *regs); -unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs); -#endif - -long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, - u32 len_high, u32 len_low); - /* prom_init (OpenFirmware) */ unsigned long __init prom_init(unsigned long r3, unsigned long r4, unsigned long pp, unsigned long r6, unsigned long r7, unsigned long kbase); -/* setup */ -void __init early_setup(unsigned long dt_ptr); -void early_setup_secondary(void); - /* misc runtime */ extern u64 __bswapdi2(u64); extern s64 __lshrdi3(s64, int); @@ -94,11 +50,6 @@ extern int __ucmpdi2(u64, u64); /* tracing */ void _mcount(void); -unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, - unsigned long sp); - -void pnv_power9_force_smt4_catch(void); -void pnv_power9_force_smt4_release(void); /* Transaction memory related */ void tm_enable(void); diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index ff034ae4e472..d83758acd1c7 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -19,6 +19,9 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) return addr; } +unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, + unsigned long sp); + struct dyn_arch_ftrace { struct module *mod; }; diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 48f510ba9f4a..d92a20a85395 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -501,6 +501,11 @@ long plpar_hcall_raw(unsigned long opcode, unsigned long *retbuf, ...); long plpar_hcall9(unsigned long opcode, unsigned long *retbuf, ...); long plpar_hcall9_raw(unsigned long opcode, unsigned long *retbuf, ...); +/* pseries hcall tracing */ +extern struct static_key hcall_tracepoint_key; +void __trace_hcall_entry(unsigned long opcode, unsigned long *args); +void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf); + struct hvcall_mpp_data { unsigned long entitled_mem; unsigned long mapped_mem; diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index f3b2c93a5db1..f964ef5c57d8 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -636,6 +636,17 @@ static inline void interrupt_cond_local_irq_enable(struct pt_regs *regs) local_irq_enable(); } +long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, + unsigned long r0, struct pt_regs *regs); +notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv); +notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs); +notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs); +#ifdef CONFIG_PPC64 +unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs); +unsigned long interrupt_exit_user_restart(struct pt_regs *regs); +unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs); +#endif + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_INTERRUPT_H */ diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 8ebdd23d987c..2aefe14e1442 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -96,6 +96,8 @@ static inline bool kdump_in_progress(void) void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_code_buffer, unsigned long start_address) __noreturn; +void kexec_copy_flush(struct kimage *image); + #ifdef CONFIG_KEXEC_FILE extern const struct kexec_file_ops kexec_elf64_ops; diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 2c8686d9e964..39c25021030f 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -411,6 +411,8 @@ extern int powersave_nap; /* set if nap mode can be used in idle loop */ extern void power7_idle_type(unsigned long type); extern void arch300_idle_type(unsigned long stop_psscr_val, unsigned long stop_psscr_mask); +void pnv_power9_force_smt4_catch(void); +void pnv_power9_force_smt4_release(void); extern int fix_alignment(struct pt_regs *); @@ -427,6 +429,12 @@ extern int fix_alignment(struct pt_regs *); int do_mathemu(struct pt_regs *regs); +/* VMX copying */ +int enter_vmx_usercopy(void); +int exit_vmx_usercopy(void); +int enter_vmx_ops(void); +void *exit_vmx_ops(void *dest); + #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_PROCESSOR_H */ diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index d0d3dd531c7f..049ca26893e6 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -76,6 +76,13 @@ static inline void setup_spectre_v2(void) {} #endif void __init do_btb_flush_fixups(void); +#ifdef CONFIG_PPC32 +unsigned long __init early_init(unsigned long dt_ptr); +void __init machine_init(u64 dt_ptr); +#endif +void __init early_setup(unsigned long dt_ptr); +void early_setup_secondary(void); + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_SETUP_H */ diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 007332a4a732..60ab739a5e3b 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -60,6 +60,9 @@ struct smp_ops_t { #endif }; +extern struct task_struct *secondary_current; + +void start_secondary(void *unused); extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us); extern int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us); extern void smp_send_debugger_break(void); diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h index 7ee66ae5444d..a2b13e55254f 100644 --- a/arch/powerpc/include/asm/syscalls.h +++ b/arch/powerpc/include/asm/syscalls.h @@ -18,6 +18,10 @@ asmlinkage long sys_mmap2(unsigned long addr, size_t len, unsigned long fd, unsigned long pgoff); asmlinkage long ppc64_personality(unsigned long personality); asmlinkage long sys_rtas(struct rtas_args __user *uargs); +int ppc_select(int n, fd_set __user *inp, fd_set __user *outp, + fd_set __user *exp, struct __kernel_old_timeval __user *tvp); +long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, + u32 len_high, u32 len_low); #ifdef CONFIG_COMPAT unsigned long compat_sys_mmap2(unsigned long addr, size_t len, diff --git a/arch/powerpc/kernel/early_32.c b/arch/powerpc/kernel/early_32.c index ef2ad4945904..03f1135ef64f 100644 --- a/arch/powerpc/kernel/early_32.c +++ b/arch/powerpc/kernel/early_32.c @@ -8,7 +8,6 @@ #include #include #include -#include /* * We're called here very early in the boot. diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 7cd6ce3ec423..784ea3289c84 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -5,7 +5,6 @@ #include #include /* for show_regs */ -#include #include #include #include diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 2cf31a97126c..752fb182eacb 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -64,7 +64,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 671d727d06f7..18173199b79d 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -24,7 +24,6 @@ #include #include #include -#include #include "setup.h" diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index 1212a812a7ab..55742ef1f991 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -22,7 +22,6 @@ #include #include -#include #include #define CREATE_TRACE_POINTS diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index be8577ac9397..e547066a06aa 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -67,7 +67,6 @@ #include #include #include -#include #include "setup.h" diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 7e30a6fe5adf..de0f6f09a5dd 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -57,7 +57,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index 825931e400df..c4f5b4ce926f 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -35,7 +35,6 @@ #include #include #include -#include static inline long do_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index 8e83d19fe8fa..828d0f4106d2 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -29,7 +29,6 @@ #include #include #include -#include #include "setup.h" diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 17598bba54eb..958e2929776f 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -69,7 +69,6 @@ #include #include #include -#include #include /* powerpc clocksource/clockevent code */ diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index f21b8fbd418e..4ee04aacf9f1 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -22,7 +22,6 @@ #include #include -#include #include #include #include diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c index 2d49dce129f2..6cc7793b8420 100644 --- a/arch/powerpc/kexec/core_64.c +++ b/arch/powerpc/kexec/core_64.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 7d6d91338c3f..7e52d0beee77 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -15,7 +15,6 @@ #include #include -#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_hv_rm_xive.c b/arch/powerpc/kvm/book3s_hv_rm_xive.c index 6f18632e30e9..dd9880731bd6 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xive.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xive.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c index 62e6c3045252..f76a50291fd7 100644 --- a/arch/powerpc/lib/vmx-helper.c +++ b/arch/powerpc/lib/vmx-helper.c @@ -9,7 +9,6 @@ #include #include #include -#include int enter_vmx_usercopy(void) { diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index 31f4cef3adac..81091b9587f6 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -9,7 +9,6 @@ * Copyright (C) 2002 Anton Blanchard , IBM */ -#include #include #include #include diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 7ba6d3eff636..d53fed4eccbd 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -35,7 +35,6 @@ #include #include -#include #include #include #include diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 9942289f379b..a6677a111aca 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -12,7 +12,6 @@ #include #include -#include #include #include #include diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c index f16a43540e30..91b36541b9e5 100644 --- a/arch/powerpc/platforms/powernv/opal-tracepoints.c +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c @@ -2,7 +2,6 @@ #include #include #include -#include #ifdef CONFIG_JUMP_LABEL struct static_key opal_tracepoint_key = STATIC_KEY_INIT; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index f8899d506ea4..760581c5752f 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include "pseries.h" From 1a76e520ee1831a81dabf8a9a58c6453f700026e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 4 Mar 2022 17:12:22 +1100 Subject: [PATCH 300/380] powerpc/64e: Tie PPC_BOOK3E_64 to PPC_FSL_BOOK3E Since the IBM A2 CPU support was removed, see commit fb5a515704d7 ("powerpc: Remove platforms/wsp and associated pieces"), the only 64-bit Book3E CPUs we support are Freescale (NXP) ones. However our Kconfig still allows configurating a kernel that has 64-bit Book3E support, but no Freescale CPU support enabled. Such a kernel would never boot, it doesn't know about any CPUs. It also causes build errors, as reported by lkp, because PPC_BARRIER_NOSPEC is not enabled in such a configuration: powerpc64-linux-ld: arch/powerpc/net/bpf_jit_comp64.o:(.toc+0x0): undefined reference to `powerpc_security_features' To fix this, force PPC_FSL_BOOK3E to be selected whenever we are building a 64-bit Book3E kernel. Reported-by: kernel test robot Reported-by: Naveen N. Rao Suggested-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220304061222.2478720-1-mpe@ellerman.id.au --- arch/powerpc/platforms/Kconfig.cputype | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 87bc1929ee5a..e2e1fec91c6e 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -107,6 +107,7 @@ config PPC_BOOK3S_64 config PPC_BOOK3E_64 bool "Embedded processors" + select PPC_FSL_BOOK3E select PPC_FPU # Make it a choice ? select PPC_SMP_MUXED_IPI select PPC_DOORBELL @@ -295,7 +296,7 @@ config FSL_BOOKE config PPC_FSL_BOOK3E bool select ARCH_SUPPORTS_HUGETLBFS if PHYS_64BIT || PPC64 - select FSL_EMB_PERFMON + imply FSL_EMB_PERFMON select PPC_SMP_MUXED_IPI select PPC_DOORBELL select PPC_KUEP From d601fd24e6964967f115f036a840f4f28488f63f Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Wed, 2 Mar 2022 10:19:59 +0800 Subject: [PATCH 301/380] powerpc/secvar: fix refcount leak in format_show() Refcount leak will happen when format_show returns failure in multiple cases. Unified management of of_node_put can fix this problem. Signed-off-by: Hangyu Hua Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220302021959.10959-1-hbh25y@gmail.com --- arch/powerpc/kernel/secvar-sysfs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c index a0a78aba2083..1ee4640a2641 100644 --- a/arch/powerpc/kernel/secvar-sysfs.c +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -26,15 +26,18 @@ static ssize_t format_show(struct kobject *kobj, struct kobj_attribute *attr, const char *format; node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend"); - if (!of_device_is_available(node)) - return -ENODEV; + if (!of_device_is_available(node)) { + rc = -ENODEV; + goto out; + } rc = of_property_read_string(node, "format", &format); if (rc) - return rc; + goto out; rc = sprintf(buf, "%s\n", format); +out: of_node_put(node); return rc; From 5986f6b6575ac830ede9648cfb64353c58067a9f Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 8 Mar 2022 18:09:28 +0800 Subject: [PATCH 302/380] powerpc/spufs: Fix build warning when CONFIG_PROC_FS=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit arch/powerpc/platforms/cell/spufs/sched.c:1055:12: warning: ‘show_spu_loadavg’ defined but not used [-Wunused-function] static int show_spu_loadavg(struct seq_file *s, void *private) ^~~~~~~~~~~~~~~~ Move it into #ifdef block to fix this, also remove unneeded semicolon. Signed-off-by: YueHaibing Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220308100928.23540-1-yuehaibing@huawei.com --- arch/powerpc/platforms/cell/spufs/sched.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index d058f6233e66..99bd027a7f7c 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -1052,6 +1052,7 @@ void spuctx_switch_state(struct spu_context *ctx, } } +#ifdef CONFIG_PROC_FS static int show_spu_loadavg(struct seq_file *s, void *private) { int a, b, c; @@ -1073,7 +1074,8 @@ static int show_spu_loadavg(struct seq_file *s, void *private) atomic_read(&nr_spu_contexts), idr_get_cursor(&task_active_pid_ns(current)->idr) - 1); return 0; -}; +} +#endif int __init spu_sched_init(void) { From 6b3a3e12f8e6eea47428bb39aaf58832b50bb379 Mon Sep 17 00:00:00 2001 From: Rohan McLure Date: Tue, 8 Mar 2022 10:14:14 +1100 Subject: [PATCH 303/380] powerpc: declare unmodified attribute_group usages const Inspired by (bd75b4ef4977: Constify static attribute_group structs), accepted by linux-next, reported: https://patchwork.ozlabs.org/project/linuxppc-dev/patch/20220210202805.7750-4-rikard.falkeborn@gmail.com/ Nearly all singletons of type struct attribute_group are never modified, and so are candidates for being const. Declare them as const. Signed-off-by: Rohan McLure Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220307231414.86560-1-rmclure@linux.ibm.com --- arch/powerpc/include/asm/spu.h | 4 ++-- arch/powerpc/perf/generic-compat-pmu.c | 4 ++-- arch/powerpc/perf/hv-24x7.c | 6 +++--- arch/powerpc/perf/hv-gpci.c | 8 ++++---- arch/powerpc/perf/imc-pmu.c | 6 +++--- arch/powerpc/perf/isa207-common.c | 2 +- arch/powerpc/perf/power10-pmu.c | 6 +++--- arch/powerpc/perf/power7-pmu.c | 4 ++-- arch/powerpc/perf/power8-pmu.c | 4 ++-- arch/powerpc/perf/power9-pmu.c | 6 +++--- arch/powerpc/platforms/cell/cbe_thermal.c | 2 +- arch/powerpc/platforms/cell/spu_base.c | 4 ++-- arch/powerpc/platforms/powernv/opal-core.c | 2 +- arch/powerpc/platforms/powernv/opal-dump.c | 2 +- arch/powerpc/platforms/powernv/opal-flash.c | 2 +- arch/powerpc/platforms/pseries/papr_scm.c | 2 +- arch/powerpc/platforms/pseries/power.c | 2 +- 17 files changed, 33 insertions(+), 33 deletions(-) diff --git a/arch/powerpc/include/asm/spu.h b/arch/powerpc/include/asm/spu.h index 8a2d11ba0dae..96ad4510c895 100644 --- a/arch/powerpc/include/asm/spu.h +++ b/arch/powerpc/include/asm/spu.h @@ -249,8 +249,8 @@ void unregister_spu_syscalls(struct spufs_calls *calls); int spu_add_dev_attr(struct device_attribute *attr); void spu_remove_dev_attr(struct device_attribute *attr); -int spu_add_dev_attr_group(struct attribute_group *attrs); -void spu_remove_dev_attr_group(struct attribute_group *attrs); +int spu_add_dev_attr_group(const struct attribute_group *attrs); +void spu_remove_dev_attr_group(const struct attribute_group *attrs); extern void notify_spus_active(void); extern void do_notify_spus_active(void); diff --git a/arch/powerpc/perf/generic-compat-pmu.c b/arch/powerpc/perf/generic-compat-pmu.c index b6e25f75109d..f3db88aee4dd 100644 --- a/arch/powerpc/perf/generic-compat-pmu.c +++ b/arch/powerpc/perf/generic-compat-pmu.c @@ -130,7 +130,7 @@ static struct attribute *generic_compat_events_attr[] = { NULL }; -static struct attribute_group generic_compat_pmu_events_group = { +static const struct attribute_group generic_compat_pmu_events_group = { .name = "events", .attrs = generic_compat_events_attr, }; @@ -146,7 +146,7 @@ static struct attribute *generic_compat_pmu_format_attr[] = { NULL, }; -static struct attribute_group generic_compat_pmu_format_group = { +static const struct attribute_group generic_compat_pmu_format_group = { .name = "format", .attrs = generic_compat_pmu_format_attr, }; diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 1e8aa934e37e..12c1777187fc 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -204,7 +204,7 @@ static struct attribute *format_attrs[] = { NULL, }; -static struct attribute_group format_group = { +static const struct attribute_group format_group = { .name = "format", .attrs = format_attrs, }; @@ -1148,7 +1148,7 @@ static struct attribute *cpumask_attrs[] = { NULL, }; -static struct attribute_group cpumask_attr_group = { +static const struct attribute_group cpumask_attr_group = { .attrs = cpumask_attrs, }; @@ -1162,7 +1162,7 @@ static struct attribute *if_attrs[] = { NULL, }; -static struct attribute_group if_group = { +static const struct attribute_group if_group = { .name = "interface", .bin_attrs = if_bin_attrs, .attrs = if_attrs, diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index c756228a081f..5eb60ed5b5e8 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -65,12 +65,12 @@ static struct attribute *format_attrs[] = { NULL, }; -static struct attribute_group format_group = { +static const struct attribute_group format_group = { .name = "format", .attrs = format_attrs, }; -static struct attribute_group event_group = { +static const struct attribute_group event_group = { .name = "events", .attrs = hv_gpci_event_attrs, }; @@ -126,11 +126,11 @@ static struct attribute *cpumask_attrs[] = { NULL, }; -static struct attribute_group cpumask_attr_group = { +static const struct attribute_group cpumask_attr_group = { .attrs = cpumask_attrs, }; -static struct attribute_group interface_group = { +static const struct attribute_group interface_group = { .name = "interface", .attrs = interface_attrs, }; diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index e7583fbcc8fa..526d4b767534 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -71,7 +71,7 @@ static struct attribute *imc_format_attrs[] = { NULL, }; -static struct attribute_group imc_format_group = { +static const struct attribute_group imc_format_group = { .name = "format", .attrs = imc_format_attrs, }; @@ -90,7 +90,7 @@ static struct attribute *trace_imc_format_attrs[] = { NULL, }; -static struct attribute_group trace_imc_format_group = { +static const struct attribute_group trace_imc_format_group = { .name = "format", .attrs = trace_imc_format_attrs, }; @@ -125,7 +125,7 @@ static struct attribute *imc_pmu_cpumask_attrs[] = { NULL, }; -static struct attribute_group imc_pmu_cpumask_attr_group = { +static const struct attribute_group imc_pmu_cpumask_attr_group = { .attrs = imc_pmu_cpumask_attrs, }; diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 4037ea652522..a74d382ecbb7 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -37,7 +37,7 @@ static struct attribute *isa207_pmu_format_attr[] = { NULL, }; -struct attribute_group isa207_pmu_format_group = { +const struct attribute_group isa207_pmu_format_group = { .name = "format", .attrs = isa207_pmu_format_attr, }; diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index 0975ad0b42c4..d3398100a60f 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -200,12 +200,12 @@ static struct attribute *power10_events_attr[] = { NULL }; -static struct attribute_group power10_pmu_events_group_dd1 = { +static const struct attribute_group power10_pmu_events_group_dd1 = { .name = "events", .attrs = power10_events_attr_dd1, }; -static struct attribute_group power10_pmu_events_group = { +static const struct attribute_group power10_pmu_events_group = { .name = "events", .attrs = power10_events_attr, }; @@ -253,7 +253,7 @@ static struct attribute *power10_pmu_format_attr[] = { NULL, }; -static struct attribute_group power10_pmu_format_group = { +static const struct attribute_group power10_pmu_format_group = { .name = "format", .attrs = power10_pmu_format_attr, }; diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index 99b5ba314ea7..a74211410b8d 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -405,7 +405,7 @@ static struct attribute *power7_events_attr[] = { NULL }; -static struct attribute_group power7_pmu_events_group = { +static const struct attribute_group power7_pmu_events_group = { .name = "events", .attrs = power7_events_attr, }; @@ -417,7 +417,7 @@ static struct attribute *power7_pmu_format_attr[] = { NULL, }; -static struct attribute_group power7_pmu_format_group = { +static const struct attribute_group power7_pmu_format_group = { .name = "format", .attrs = power7_pmu_format_attr, }; diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index f21194b5604a..e37b1e714d2b 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -92,7 +92,7 @@ enum { */ /* PowerISA v2.07 format attribute structure*/ -extern struct attribute_group isa207_pmu_format_group; +extern const struct attribute_group isa207_pmu_format_group; /* Table of alternatives, sorted by column 0 */ static const unsigned int event_alternatives[][MAX_ALT] = { @@ -182,7 +182,7 @@ static struct attribute *power8_events_attr[] = { NULL }; -static struct attribute_group power8_pmu_events_group = { +static const struct attribute_group power8_pmu_events_group = { .name = "events", .attrs = power8_events_attr, }; diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 4b7c17e36100..c9eb5232e68b 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -96,7 +96,7 @@ extern u64 PERF_REG_EXTENDED_MASK; #define PVR_POWER9_CUMULUS 0x00002000 /* PowerISA v2.07 format attribute structure*/ -extern struct attribute_group isa207_pmu_format_group; +extern const struct attribute_group isa207_pmu_format_group; int p9_dd21_bl_ev[] = { PM_MRK_ST_DONE_L2, @@ -217,7 +217,7 @@ static struct attribute *power9_events_attr[] = { NULL }; -static struct attribute_group power9_pmu_events_group = { +static const struct attribute_group power9_pmu_events_group = { .name = "events", .attrs = power9_events_attr, }; @@ -253,7 +253,7 @@ static struct attribute *power9_pmu_format_attr[] = { NULL, }; -static struct attribute_group power9_pmu_format_group = { +static const struct attribute_group power9_pmu_format_group = { .name = "format", .attrs = power9_pmu_format_attr, }; diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c index 2ece77f49bc3..abb5e527b4db 100644 --- a/arch/powerpc/platforms/cell/cbe_thermal.c +++ b/arch/powerpc/platforms/cell/cbe_thermal.c @@ -255,7 +255,7 @@ static struct attribute *spu_attributes[] = { NULL, }; -static struct attribute_group spu_attribute_group = { +static const struct attribute_group spu_attribute_group = { .name = "thermal", .attrs = spu_attributes, }; diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index 83cea9e7ee72..2eecba3345c3 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -490,7 +490,7 @@ int spu_add_dev_attr(struct device_attribute *attr) } EXPORT_SYMBOL_GPL(spu_add_dev_attr); -int spu_add_dev_attr_group(struct attribute_group *attrs) +int spu_add_dev_attr_group(const struct attribute_group *attrs) { struct spu *spu; int rc = 0; @@ -529,7 +529,7 @@ void spu_remove_dev_attr(struct device_attribute *attr) } EXPORT_SYMBOL_GPL(spu_remove_dev_attr); -void spu_remove_dev_attr_group(struct attribute_group *attrs) +void spu_remove_dev_attr_group(const struct attribute_group *attrs) { struct spu *spu; diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c index 0331f1973f0e..b97bc179f65a 100644 --- a/arch/powerpc/platforms/powernv/opal-core.c +++ b/arch/powerpc/platforms/powernv/opal-core.c @@ -603,7 +603,7 @@ static struct bin_attribute *mpipl_bin_attr[] = { }; -static struct attribute_group mpipl_group = { +static const struct attribute_group mpipl_group = { .attrs = mpipl_attr, .bin_attrs = mpipl_bin_attr, }; diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 410ed5b9de29..16c5860f1372 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -150,7 +150,7 @@ static struct attribute *initiate_attrs[] = { NULL, }; -static struct attribute_group initiate_attr_group = { +static const struct attribute_group initiate_attr_group = { .attrs = initiate_attrs, }; diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c index 7e7d38b17420..18481a8c52fa 100644 --- a/arch/powerpc/platforms/powernv/opal-flash.c +++ b/arch/powerpc/platforms/powernv/opal-flash.c @@ -512,7 +512,7 @@ static struct attribute *image_op_attrs[] = { NULL /* need to NULL terminate the list of attributes */ }; -static struct attribute_group image_op_attr_group = { +static const struct attribute_group image_op_attr_group = { .attrs = image_op_attrs, }; diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 20aafd387840..1238b94b3cc1 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -1039,7 +1039,7 @@ static struct attribute *papr_nd_attributes[] = { NULL, }; -static struct attribute_group papr_nd_attribute_group = { +static const struct attribute_group papr_nd_attribute_group = { .name = "papr", .is_visible = papr_nd_attribute_visible, .attrs = papr_nd_attributes, diff --git a/arch/powerpc/platforms/pseries/power.c b/arch/powerpc/platforms/pseries/power.c index ee343ec6ab94..3676cb297767 100644 --- a/arch/powerpc/platforms/pseries/power.c +++ b/arch/powerpc/platforms/pseries/power.c @@ -51,7 +51,7 @@ static struct attribute *g[] = { NULL, }; -static struct attribute_group attr_group = { +static const struct attribute_group attr_group = { .attrs = g, }; From 434d2defa93b59b8e845b49bd6882cefd009f516 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Tue, 1 Mar 2022 14:26:23 +0530 Subject: [PATCH 304/380] iommu/amd: Call memunmap in error path Unmap old_devtb in error path. Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220301085626.87680-3-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 6b5af568f3d5..f7e7d208063c 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -980,6 +980,7 @@ static bool copy_device_table(void) get_order(dev_table_size)); if (old_dev_tbl_cpy == NULL) { pr_err("Failed to allocate memory for copying old device table!\n"); + memunmap(old_devtb); return false; } @@ -1010,6 +1011,7 @@ static bool copy_device_table(void) if ((int_ctl != DTE_IRQ_REMAP_INTCTL) || (int_tab_len != DTE_INTTABLEN)) { pr_err("Wrong old irq remapping flag: %#x\n", devid); + memunmap(old_devtb); return false; } From 3bf01426a574b1fe0ec21aabb7148aff777e623f Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Tue, 1 Mar 2022 14:26:24 +0530 Subject: [PATCH 305/380] iommu/amd: Clean up function declarations Remove unused declarations and add static keyword as needed. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220301085626.87680-4-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 4 ---- drivers/iommu/amd/init.c | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 416815a525d6..94d33626d692 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -14,10 +14,6 @@ extern irqreturn_t amd_iommu_int_thread(int irq, void *data); extern irqreturn_t amd_iommu_int_handler(int irq, void *data); extern void amd_iommu_apply_erratum_63(u16 devid); -extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); -extern int amd_iommu_init_devices(void); -extern void amd_iommu_uninit_devices(void); -extern void amd_iommu_init_notifier(void); extern int amd_iommu_init_api(void); #ifdef CONFIG_AMD_IOMMU_DEBUGFS diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index f7e7d208063c..07d6dd302989 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -661,7 +661,7 @@ static int __init alloc_command_buffer(struct amd_iommu *iommu) * This function resets the command buffer if the IOMMU stopped fetching * commands from it. */ -void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu) +static void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu) { iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); From c1d5b57a1ebb97beb34ab8fe6347fb2ba65674b8 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Tue, 1 Mar 2022 14:26:25 +0530 Subject: [PATCH 306/380] iommu/amd: Remove unused struct fault.devid This variable has not been used since it was introduced. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220301085626.87680-5-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu_v2.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c index 58da08cc3d01..2daf37c21b85 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c @@ -71,7 +71,6 @@ struct fault { struct pasid_state *state; struct mm_struct *mm; u64 address; - u16 devid; u32 pasid; u16 tag; u16 finish; From 9f968fc70d85558b37cd721a4c08143f69ff9990 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Tue, 1 Mar 2022 14:26:26 +0530 Subject: [PATCH 307/380] iommu/amd: Improve amd_iommu_v2_exit() During module exit, the current logic loops through all possible 16-bit device ID space to search for existing devices and clean up device state structures. This can be simplified by looping through the device state list. Also, refactor various clean up logic into free_device_state() for better reusability. Signed-off-by: Vasant Hegde Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220301085626.87680-6-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu_v2.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c index 2daf37c21b85..c72969ac4956 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c @@ -24,7 +24,6 @@ MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Joerg Roedel "); -#define MAX_DEVICES 0x10000 #define PRI_QUEUE_SIZE 512 struct pri_queue { @@ -124,6 +123,15 @@ static void free_device_state(struct device_state *dev_state) { struct iommu_group *group; + /* Get rid of any remaining pasid states */ + free_pasid_states(dev_state); + + /* + * Wait until the last reference is dropped before freeing + * the device state. + */ + wait_event(dev_state->wq, !atomic_read(&dev_state->count)); + /* * First detach device from domain - No more PRI requests will arrive * from that device after it is unbound from the IOMMUv2 domain. @@ -849,15 +857,7 @@ void amd_iommu_free_device(struct pci_dev *pdev) spin_unlock_irqrestore(&state_lock, flags); - /* Get rid of any remaining pasid states */ - free_pasid_states(dev_state); - put_device_state(dev_state); - /* - * Wait until the last reference is dropped before freeing - * the device state. - */ - wait_event(dev_state->wq, !atomic_read(&dev_state->count)); free_device_state(dev_state); } EXPORT_SYMBOL(amd_iommu_free_device); @@ -954,8 +954,8 @@ out: static void __exit amd_iommu_v2_exit(void) { - struct device_state *dev_state; - int i; + struct device_state *dev_state, *next; + unsigned long flags; if (!amd_iommu_v2_supported()) return; @@ -968,18 +968,18 @@ static void __exit amd_iommu_v2_exit(void) * The loop below might call flush_workqueue(), so call * destroy_workqueue() after it */ - for (i = 0; i < MAX_DEVICES; ++i) { - dev_state = get_device_state(i); - - if (dev_state == NULL) - continue; + spin_lock_irqsave(&state_lock, flags); + list_for_each_entry_safe(dev_state, next, &state_list, list) { WARN_ON_ONCE(1); put_device_state(dev_state); - amd_iommu_free_device(dev_state->pdev); + list_del(&dev_state->list); + free_device_state(dev_state); } + spin_unlock_irqrestore(&state_lock, flags); + destroy_workqueue(iommu_wq); } From 0b0057cc4193c7cd9c0829a440e4901b29ce4ff8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 11 Feb 2022 09:51:32 +0100 Subject: [PATCH 308/380] powerpc/bitops: Force inlining of fls() Building a kernel with CONFIG_CC_OPTIMISE_FOR_SIZE leads to the following functions being copied several times in vmlinux: 31 times __ilog2_u32() 34 times fls() Disassembly follows: c00f476c : c00f476c: 7c 63 00 34 cntlzw r3,r3 c00f4770: 20 63 00 20 subfic r3,r3,32 c00f4774: 4e 80 00 20 blr c00f4778 <__ilog2_u32>: c00f4778: 94 21 ff f0 stwu r1,-16(r1) c00f477c: 7c 08 02 a6 mflr r0 c00f4780: 90 01 00 14 stw r0,20(r1) c00f4784: 4b ff ff e9 bl c00f476c c00f4788: 80 01 00 14 lwz r0,20(r1) c00f478c: 38 63 ff ff addi r3,r3,-1 c00f4790: 7c 08 03 a6 mtlr r0 c00f4794: 38 21 00 10 addi r1,r1,16 c00f4798: 4e 80 00 20 blr When forcing inlining of fls(), we get c0008b80 <__ilog2_u32>: c0008b80: 7c 63 00 34 cntlzw r3,r3 c0008b84: 20 63 00 1f subfic r3,r3,31 c0008b88: 4e 80 00 20 blr vmlinux size gets reduced by 1 kbyte with that change. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/adc9c9d6378f6b5008246ca717993d7870188efb.1644569473.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/bitops.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index ea5d27dda8cf..344fba3b16eb 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -287,7 +287,7 @@ static inline void arch___clear_bit_unlock(int nr, volatile unsigned long *addr) * fls: find last (most-significant) bit set. * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. */ -static inline int fls(unsigned int x) +static __always_inline int fls(unsigned int x) { int lz; @@ -305,7 +305,7 @@ static inline int fls(unsigned int x) * 32-bit fls calls. */ #ifdef CONFIG_PPC64 -static inline int fls64(__u64 x) +static __always_inline int fls64(__u64 x) { int lz; From 792993919349fefba20f58ae4843c80e8b01f518 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 11 Feb 2022 15:16:51 +0100 Subject: [PATCH 309/380] powerpc/64: Force inlining of prevent_user_access() and set_kuap() A ppc64_defconfig build exhibits about 10 copied of prevent_user_access(). It also have one copy of set_kuap(). c000000000017340 <.prevent_user_access.constprop.0>: c00000000001a038: 4b ff d3 09 bl c000000000017340 <.prevent_user_access.constprop.0> c00000000001aabc: 4b ff c8 85 bl c000000000017340 <.prevent_user_access.constprop.0> c00000000001ab38: 4b ff c8 09 bl c000000000017340 <.prevent_user_access.constprop.0> c00000000001ade0: 4b ff c5 61 bl c000000000017340 <.prevent_user_access.constprop.0> c000000000039b90 <.prevent_user_access.constprop.0>: c00000000003ac08: 4b ff ef 89 bl c000000000039b90 <.prevent_user_access.constprop.0> c00000000003b9d0: 4b ff e1 c1 bl c000000000039b90 <.prevent_user_access.constprop.0> c00000000003ba54: 4b ff e1 3d bl c000000000039b90 <.prevent_user_access.constprop.0> c00000000003bbfc: 4b ff df 95 bl c000000000039b90 <.prevent_user_access.constprop.0> c00000000015dde0 <.prevent_user_access.constprop.0>: c0000000001612c0: 4b ff cb 21 bl c00000000015dde0 <.prevent_user_access.constprop.0> c000000000161b54: 4b ff c2 8d bl c00000000015dde0 <.prevent_user_access.constprop.0> c000000000188cf0 <.prevent_user_access.constprop.0>: c00000000018d658: 4b ff b6 99 bl c000000000188cf0 <.prevent_user_access.constprop.0> c00000000030fe20 <.prevent_user_access.constprop.0>: c0000000003123d4: 4b ff da 4d bl c00000000030fe20 <.prevent_user_access.constprop.0> c000000000313970: 4b ff c4 b1 bl c00000000030fe20 <.prevent_user_access.constprop.0> c0000000005e6bd0 <.prevent_user_access.constprop.0>: c0000000005e7d8c: 4b ff ee 45 bl c0000000005e6bd0 <.prevent_user_access.constprop.0> c0000000007bcae0 <.prevent_user_access.constprop.0>: c0000000007bda10: 4b ff f0 d1 bl c0000000007bcae0 <.prevent_user_access.constprop.0> c0000000007bda54: 4b ff f0 8d bl c0000000007bcae0 <.prevent_user_access.constprop.0> c0000000007bdd28: 4b ff ed b9 bl c0000000007bcae0 <.prevent_user_access.constprop.0> c0000000007c0390: 4b ff c7 51 bl c0000000007bcae0 <.prevent_user_access.constprop.0> c00000000094e4f0 <.prevent_user_access.constprop.0>: c000000000950e40: 4b ff d6 b1 bl c00000000094e4f0 <.prevent_user_access.constprop.0> c00000000097d2d0 <.prevent_user_access.constprop.0>: c0000000009813fc: 4b ff be d5 bl c00000000097d2d0 <.prevent_user_access.constprop.0> c000000000acd540 <.prevent_user_access.constprop.0>: c000000000ad1d60: 4b ff b7 e1 bl c000000000acd540 <.prevent_user_access.constprop.0> c000000000e5d680 <.prevent_user_access.constprop.0>: c000000000e64b60: 4b ff 8b 21 bl c000000000e5d680 <.prevent_user_access.constprop.0> c000000000e64b6c: 4b ff 8b 15 bl c000000000e5d680 <.prevent_user_access.constprop.0> c000000000e64c38: 4b ff 8a 49 bl c000000000e5d680 <.prevent_user_access.constprop.0> When building signal_64.c with -Winline the following messages appear: ./arch/powerpc/include/asm/book3s/64/kup.h:331:20: error: inlining failed in call to 'set_kuap': call is unlikely and code size would grow [-Werror=inline] ./arch/powerpc/include/asm/book3s/64/kup.h:401:20: error: inlining failed in call to 'prevent_user_access.constprop': call is unlikely and code size would grow [-Werror=inline] Those functions are used on hot pathes and have been expected to be inlined at all time. Force them inline. This patch reduces the kernel text size by 700 bytes, confirming that not inlining those functions is not worth it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/eff9b2b211957fa2e8707e46f31674097fd563a3.1644588972.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/64/kup.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index 69fcf63eec94..54cf46808157 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -328,7 +328,7 @@ static inline unsigned long get_kuap(void) return mfspr(SPRN_AMR); } -static inline void set_kuap(unsigned long value) +static __always_inline void set_kuap(unsigned long value) { if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) return; @@ -398,7 +398,7 @@ static __always_inline void allow_user_access(void __user *to, const void __user #endif /* !CONFIG_PPC_KUAP */ -static inline void prevent_user_access(unsigned long dir) +static __always_inline void prevent_user_access(unsigned long dir) { set_kuap(AMR_KUAP_BLOCKED); if (static_branch_unlikely(&uaccess_flush_key)) From 9f5196065eeb96fee1a15f2eae31fe1fc7623ade Mon Sep 17 00:00:00 2001 From: jing yangyang Date: Thu, 19 Aug 2021 19:49:01 -0700 Subject: [PATCH 310/380] powerpc/ps3: remove unneeded semicolons Eliminate the following coccicheck warnings: ./arch/powerpc/platforms/ps3/system-bus.c:606:2-3: Unneeded semicolon ./arch/powerpc/platforms/ps3/system-bus.c:765:2-3: Unneeded semicolon Reported-by: Zeal Robot Signed-off-by: jing yangyang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/01647102607ce9640c9f27786d976109a3c4ea7e.1629197153.git.jing.yangyang@zte.com.cn --- arch/powerpc/platforms/ps3/system-bus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index c8b50fec56bf..b637bf292047 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -603,7 +603,7 @@ static dma_addr_t ps3_ioc0_map_page(struct device *_dev, struct page *page, default: /* not happned */ BUG(); - }; + } result = ps3_dma_map(dev->d_region, (unsigned long)ptr, size, &bus_addr, iopte_flag); @@ -762,7 +762,7 @@ int ps3_system_bus_device_register(struct ps3_system_bus_device *dev) break; default: BUG(); - }; + } dev->core.of_node = NULL; set_dev_node(&dev->core, 0); From 3fd46e551f67f4303c3276a0d6cd20baf2d192c4 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Wed, 23 Feb 2022 15:02:23 +0800 Subject: [PATCH 311/380] powerpc: 8xx: fix a return value error in mpc8xx_pic_init mpc8xx_pic_init() should return -ENOMEM instead of 0 when irq_domain_add_linear() return NULL. This cause mpc8xx_pics_init to continue executing even if mpc8xx_pic_host is NULL. Fixes: cc76404feaed ("powerpc/8xx: Fix possible device node reference leak") Signed-off-by: Hangyu Hua Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220223070223.26845-1-hbh25y@gmail.com --- arch/powerpc/platforms/8xx/pic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c index f2ba837249d6..04a6abf14c29 100644 --- a/arch/powerpc/platforms/8xx/pic.c +++ b/arch/powerpc/platforms/8xx/pic.c @@ -153,6 +153,7 @@ int __init mpc8xx_pic_init(void) if (mpc8xx_pic_host == NULL) { printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n"); ret = -ENOMEM; + goto out; } ret = 0; From d799769188529abc6cbf035a10087a51f7832b6b Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 9 Mar 2022 17:18:22 +1100 Subject: [PATCH 312/380] powerpc/64: Add UADDR64 relocation support When ld detects unaligned relocations, it emits R_PPC64_UADDR64 relocations instead of R_PPC64_RELATIVE. Currently R_PPC64_UADDR64 are detected by arch/powerpc/tools/relocs_check.sh and expected not to work. Below is a simple chunk to trigger this behaviour (this disables optimization for the demonstration purposes only, this also happens with -O1/-O2 when CONFIG_PRINTK_INDEX=y, for example): \#pragma GCC push_options \#pragma GCC optimize ("O0") struct entry { const char *file; int line; } __attribute__((packed)); static const struct entry e1 = { .file = __FILE__, .line = __LINE__ }; static const struct entry e2 = { .file = __FILE__, .line = __LINE__ }; ... prom_printf("e1=%s %lx %lx\n", e1.file, (unsigned long) e1.file, mfmsr()); prom_printf("e2=%s %lx\n", e2.file, (unsigned long) e2.file); \#pragma GCC pop_options This adds support for UADDR64 for 64bit. This reuses __dynamic_symtab from the 32bit code which supports more relocation types already. Because RELACOUNT includes only R_PPC64_RELATIVE, this replaces it with RELASZ which is the size of all relocation records. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220309061822.168173-1-aik@ozlabs.ru --- arch/powerpc/kernel/reloc_64.S | 67 +++++++++++++++++++++--------- arch/powerpc/kernel/vmlinux.lds.S | 2 - arch/powerpc/tools/relocs_check.sh | 7 +--- 3 files changed, 48 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S index 02d4719bf43a..232e4549defe 100644 --- a/arch/powerpc/kernel/reloc_64.S +++ b/arch/powerpc/kernel/reloc_64.S @@ -8,8 +8,10 @@ #include RELA = 7 -RELACOUNT = 0x6ffffff9 +RELASZ = 8 +RELAENT = 9 R_PPC64_RELATIVE = 22 +R_PPC64_UADDR64 = 43 /* * r3 = desired final address of kernel @@ -25,29 +27,38 @@ _GLOBAL(relocate) add r9,r9,r12 /* r9 has runtime addr of .rela.dyn section */ ld r10,(p_st - 0b)(r12) add r10,r10,r12 /* r10 has runtime addr of _stext */ + ld r13,(p_sym - 0b)(r12) + add r13,r13,r12 /* r13 has runtime addr of .dynsym */ /* - * Scan the dynamic section for the RELA and RELACOUNT entries. + * Scan the dynamic section for the RELA, RELASZ and RELAENT entries. */ li r7,0 li r8,0 -1: ld r6,0(r11) /* get tag */ +.Ltags: + ld r6,0(r11) /* get tag */ cmpdi r6,0 - beq 4f /* end of list */ + beq .Lend_of_list /* end of list */ cmpdi r6,RELA bne 2f ld r7,8(r11) /* get RELA pointer in r7 */ - b 3f -2: addis r6,r6,(-RELACOUNT)@ha - cmpdi r6,RELACOUNT@l + b 4f +2: cmpdi r6,RELASZ bne 3f - ld r8,8(r11) /* get RELACOUNT value in r8 */ -3: addi r11,r11,16 - b 1b -4: cmpdi r7,0 /* check we have both RELA and RELACOUNT */ + ld r8,8(r11) /* get RELASZ value in r8 */ + b 4f +3: cmpdi r6,RELAENT + bne 4f + ld r12,8(r11) /* get RELAENT value in r12 */ +4: addi r11,r11,16 + b .Ltags +.Lend_of_list: + cmpdi r7,0 /* check we have RELA, RELASZ, RELAENT */ cmpdi cr1,r8,0 - beq 6f - beq cr1,6f + beq .Lout + beq cr1,.Lout + cmpdi r12,0 + beq .Lout /* * Work out linktime address of _stext and hence the @@ -62,23 +73,39 @@ _GLOBAL(relocate) /* * Run through the list of relocations and process the - * R_PPC64_RELATIVE ones. + * R_PPC64_RELATIVE and R_PPC64_UADDR64 ones. */ + divd r8,r8,r12 /* RELASZ / RELAENT */ mtctr r8 -5: ld r0,8(9) /* ELF64_R_TYPE(reloc->r_info) */ +.Lrels: ld r0,8(r9) /* ELF64_R_TYPE(reloc->r_info) */ cmpdi r0,R_PPC64_RELATIVE - bne 6f + bne .Luaddr64 ld r6,0(r9) /* reloc->r_offset */ ld r0,16(r9) /* reloc->r_addend */ + b .Lstore +.Luaddr64: + srdi r14,r0,32 /* ELF64_R_SYM(reloc->r_info) */ + clrldi r0,r0,32 + cmpdi r0,R_PPC64_UADDR64 + bne .Lnext + ld r6,0(r9) + ld r0,16(r9) + mulli r14,r14,24 /* 24 == sizeof(elf64_sym) */ + add r14,r14,r13 /* elf64_sym[ELF64_R_SYM] */ + ld r14,8(r14) + add r0,r0,r14 +.Lstore: add r0,r0,r3 stdx r0,r7,r6 - addi r9,r9,24 - bdnz 5b - -6: blr +.Lnext: + add r9,r9,r12 + bdnz .Lrels +.Lout: + blr .balign 8 p_dyn: .8byte __dynamic_start - 0b p_rela: .8byte __rela_dyn_start - 0b +p_sym: .8byte __dynamic_symtab - 0b p_st: .8byte _stext - 0b diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 2bcca818136a..fe22d940412f 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -281,9 +281,7 @@ SECTIONS . = ALIGN(8); .dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET) { -#ifdef CONFIG_PPC32 __dynamic_symtab = .; -#endif *(.dynsym) } .dynstr : AT(ADDR(.dynstr) - LOAD_OFFSET) { *(.dynstr) } diff --git a/arch/powerpc/tools/relocs_check.sh b/arch/powerpc/tools/relocs_check.sh index 014e00e74d2b..63792af00417 100755 --- a/arch/powerpc/tools/relocs_check.sh +++ b/arch/powerpc/tools/relocs_check.sh @@ -39,6 +39,7 @@ $objdump -R "$vmlinux" | # R_PPC_NONE grep -F -w -v 'R_PPC64_RELATIVE R_PPC64_NONE +R_PPC64_UADDR64 R_PPC_ADDR16_LO R_PPC_ADDR16_HI R_PPC_ADDR16_HA @@ -54,9 +55,3 @@ fi num_bad=$(echo "$bad_relocs" | wc -l) echo "WARNING: $num_bad bad relocations" echo "$bad_relocs" - -# If we see this type of relocation it's an idication that -# we /may/ be using an old version of binutils. -if echo "$bad_relocs" | grep -q -F -w R_PPC64_UADDR64; then - echo "WARNING: You need at least binutils >= 2.19 to build a CONFIG_RELOCATABLE kernel" -fi From 8c9ab55c0fbdc76cb876140c2dad75a610bb23ef Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 28 Dec 2021 01:20:20 -0800 Subject: [PATCH 313/380] xtensa: add missing XCHAL_HAVE_WINDOWED check Add missing preprocessor conditions to secondary reset vector code. Fixes: 09af39f649da ("xtensa: use register window specific opcodes only when present") Signed-off-by: Max Filippov --- arch/xtensa/kernel/mxhead.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/xtensa/kernel/mxhead.S b/arch/xtensa/kernel/mxhead.S index 9f3843742726..b702c0908b1f 100644 --- a/arch/xtensa/kernel/mxhead.S +++ b/arch/xtensa/kernel/mxhead.S @@ -37,11 +37,13 @@ _SetupOCD: * xt-gdb to single step via DEBUG exceptions received directly * by ocd. */ +#if XCHAL_HAVE_WINDOWED movi a1, 1 movi a0, 0 wsr a1, windowstart wsr a0, windowbase rsync +#endif movi a1, LOCKLEVEL wsr a1, ps From dbf4ed894c0fd85d421f7b3b9758ce95398d2925 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 18 Oct 2021 04:29:27 -0700 Subject: [PATCH 314/380] xtensa: add helpers for division, remainder and shifts Don't rely on libgcc presence, build own versions of the helpers with correct ABI. Signed-off-by: Max Filippov --- arch/xtensa/include/asm/asmmacro.h | 34 ++++++++ arch/xtensa/lib/Makefile | 2 + arch/xtensa/lib/ashldi3.S | 28 ++++++ arch/xtensa/lib/ashrdi3.S | 28 ++++++ arch/xtensa/lib/divsi3.S | 74 ++++++++++++++++ arch/xtensa/lib/lshrdi3.S | 28 ++++++ arch/xtensa/lib/modsi3.S | 87 +++++++++++++++++++ arch/xtensa/lib/mulsi3.S | 133 +++++++++++++++++++++++++++++ arch/xtensa/lib/udivsi3.S | 68 +++++++++++++++ arch/xtensa/lib/umodsi3.S | 57 +++++++++++++ 10 files changed, 539 insertions(+) create mode 100644 arch/xtensa/lib/ashldi3.S create mode 100644 arch/xtensa/lib/ashrdi3.S create mode 100644 arch/xtensa/lib/divsi3.S create mode 100644 arch/xtensa/lib/lshrdi3.S create mode 100644 arch/xtensa/lib/modsi3.S create mode 100644 arch/xtensa/lib/mulsi3.S create mode 100644 arch/xtensa/lib/udivsi3.S create mode 100644 arch/xtensa/lib/umodsi3.S diff --git a/arch/xtensa/include/asm/asmmacro.h b/arch/xtensa/include/asm/asmmacro.h index 809c507d1825..3190157950fa 100644 --- a/arch/xtensa/include/asm/asmmacro.h +++ b/arch/xtensa/include/asm/asmmacro.h @@ -191,6 +191,40 @@ #endif .endm + .macro do_nsau cnt, val, tmp, a +#if XCHAL_HAVE_NSA + nsau \cnt, \val +#else + mov \a, \val + movi \cnt, 0 + extui \tmp, \a, 16, 16 + bnez \tmp, 0f + movi \cnt, 16 + slli \a, \a, 16 +0: + extui \tmp, \a, 24, 8 + bnez \tmp, 1f + addi \cnt, \cnt, 8 + slli \a, \a, 8 +1: + movi \tmp, __nsau_data + extui \a, \a, 24, 8 + add \tmp, \tmp, \a + l8ui \tmp, \tmp, 0 + add \cnt, \cnt, \tmp +#endif /* !XCHAL_HAVE_NSA */ + .endm + + .macro do_abs dst, src, tmp +#if XCHAL_HAVE_ABS + abs \dst, \src +#else + neg \tmp, \src + movgez \tmp, \src, \src + mov \dst, \tmp +#endif + .endm + #define XTENSA_STACK_ALIGNMENT 16 #if defined(__XTENSA_WINDOWED_ABI__) diff --git a/arch/xtensa/lib/Makefile b/arch/xtensa/lib/Makefile index 9437ca51f18a..5848c133f7ea 100644 --- a/arch/xtensa/lib/Makefile +++ b/arch/xtensa/lib/Makefile @@ -4,5 +4,7 @@ # lib-y += memcopy.o memset.o checksum.o \ + ashldi3.o ashrdi3.o lshrdi3.o \ + divsi3.o udivsi3.o modsi3.o umodsi3.o mulsi3.o \ usercopy.o strncpy_user.o strnlen_user.o lib-$(CONFIG_PCI) += pci-auto.o diff --git a/arch/xtensa/lib/ashldi3.S b/arch/xtensa/lib/ashldi3.S new file mode 100644 index 000000000000..67fb0da9e432 --- /dev/null +++ b/arch/xtensa/lib/ashldi3.S @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */ +#include +#include +#include + +#ifdef __XTENSA_EB__ +#define uh a2 +#define ul a3 +#else +#define uh a3 +#define ul a2 +#endif /* __XTENSA_EB__ */ + +ENTRY(__ashldi3) + + abi_entry_default + ssl a4 + bgei a4, 32, .Llow_only + src uh, uh, ul + sll ul, ul + abi_ret_default + +.Llow_only: + sll uh, ul + movi ul, 0 + abi_ret_default + +ENDPROC(__ashldi3) diff --git a/arch/xtensa/lib/ashrdi3.S b/arch/xtensa/lib/ashrdi3.S new file mode 100644 index 000000000000..cbf052c512cc --- /dev/null +++ b/arch/xtensa/lib/ashrdi3.S @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */ +#include +#include +#include + +#ifdef __XTENSA_EB__ +#define uh a2 +#define ul a3 +#else +#define uh a3 +#define ul a2 +#endif /* __XTENSA_EB__ */ + +ENTRY(__ashrdi3) + + abi_entry_default + ssr a4 + bgei a4, 32, .Lhigh_only + src ul, uh, ul + sra uh, uh + abi_ret_default + +.Lhigh_only: + sra ul, uh + srai uh, uh, 31 + abi_ret_default + +ENDPROC(__ashrdi3) diff --git a/arch/xtensa/lib/divsi3.S b/arch/xtensa/lib/divsi3.S new file mode 100644 index 000000000000..b044b4744a8b --- /dev/null +++ b/arch/xtensa/lib/divsi3.S @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */ +#include +#include +#include + +ENTRY(__divsi3) + + abi_entry_default +#if XCHAL_HAVE_DIV32 + quos a2, a2, a3 +#else + xor a7, a2, a3 /* sign = dividend ^ divisor */ + do_abs a6, a2, a4 /* udividend = abs (dividend) */ + do_abs a3, a3, a4 /* udivisor = abs (divisor) */ + bltui a3, 2, .Lle_one /* check if udivisor <= 1 */ + do_nsau a5, a6, a2, a8 /* udividend_shift = nsau (udividend) */ + do_nsau a4, a3, a2, a8 /* udivisor_shift = nsau (udivisor) */ + bgeu a5, a4, .Lspecial + + sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */ + ssl a4 + sll a3, a3 /* udivisor <<= count */ + movi a2, 0 /* quotient = 0 */ + + /* test-subtract-and-shift loop; one quotient bit on each iteration */ +#if XCHAL_HAVE_LOOPS + loopnez a4, .Lloopend +#endif /* XCHAL_HAVE_LOOPS */ +.Lloop: + bltu a6, a3, .Lzerobit + sub a6, a6, a3 + addi a2, a2, 1 +.Lzerobit: + slli a2, a2, 1 + srli a3, a3, 1 +#if !XCHAL_HAVE_LOOPS + addi a4, a4, -1 + bnez a4, .Lloop +#endif /* !XCHAL_HAVE_LOOPS */ +.Lloopend: + + bltu a6, a3, .Lreturn + addi a2, a2, 1 /* increment if udividend >= udivisor */ +.Lreturn: + neg a5, a2 + movltz a2, a5, a7 /* return (sign < 0) ? -quotient : quotient */ + abi_ret_default + +.Lle_one: + beqz a3, .Lerror + neg a2, a6 /* if udivisor == 1, then return... */ + movgez a2, a6, a7 /* (sign < 0) ? -udividend : udividend */ + abi_ret_default + +.Lspecial: + bltu a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */ + movi a2, 1 + movi a4, -1 + movltz a2, a4, a7 /* else return (sign < 0) ? -1 : 1 */ + abi_ret_default + +.Lerror: + /* Divide by zero: Use an illegal instruction to force an exception. + The subsequent "DIV0" string can be recognized by the exception + handler to identify the real cause of the exception. */ + ill + .ascii "DIV0" + +.Lreturn0: + movi a2, 0 +#endif /* XCHAL_HAVE_DIV32 */ + abi_ret_default + +ENDPROC(__divsi3) diff --git a/arch/xtensa/lib/lshrdi3.S b/arch/xtensa/lib/lshrdi3.S new file mode 100644 index 000000000000..129ef8d1725b --- /dev/null +++ b/arch/xtensa/lib/lshrdi3.S @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */ +#include +#include +#include + +#ifdef __XTENSA_EB__ +#define uh a2 +#define ul a3 +#else +#define uh a3 +#define ul a2 +#endif /* __XTENSA_EB__ */ + +ENTRY(__lshrdi3) + + abi_entry_default + ssr a4 + bgei a4, 32, .Lhigh_only + src ul, uh, ul + srl uh, uh + abi_ret_default + +.Lhigh_only: + srl ul, uh + movi uh, 0 + abi_ret_default + +ENDPROC(__lshrdi3) diff --git a/arch/xtensa/lib/modsi3.S b/arch/xtensa/lib/modsi3.S new file mode 100644 index 000000000000..d00e77181e20 --- /dev/null +++ b/arch/xtensa/lib/modsi3.S @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */ +#include +#include +#include + +ENTRY(__modsi3) + + abi_entry_default +#if XCHAL_HAVE_DIV32 + rems a2, a2, a3 +#else + mov a7, a2 /* save original (signed) dividend */ + do_abs a2, a2, a4 /* udividend = abs (dividend) */ + do_abs a3, a3, a4 /* udivisor = abs (divisor) */ + bltui a3, 2, .Lle_one /* check if udivisor <= 1 */ + do_nsau a5, a2, a6, a8 /* udividend_shift = nsau (udividend) */ + do_nsau a4, a3, a6, a8 /* udivisor_shift = nsau (udivisor) */ + bgeu a5, a4, .Lspecial + + sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */ + ssl a4 + sll a3, a3 /* udivisor <<= count */ + + /* test-subtract-and-shift loop */ +#if XCHAL_HAVE_LOOPS + loopnez a4, .Lloopend +#endif /* XCHAL_HAVE_LOOPS */ +.Lloop: + bltu a2, a3, .Lzerobit + sub a2, a2, a3 +.Lzerobit: + srli a3, a3, 1 +#if !XCHAL_HAVE_LOOPS + addi a4, a4, -1 + bnez a4, .Lloop +#endif /* !XCHAL_HAVE_LOOPS */ +.Lloopend: + +.Lspecial: + bltu a2, a3, .Lreturn + sub a2, a2, a3 /* subtract again if udividend >= udivisor */ +.Lreturn: + bgez a7, .Lpositive + neg a2, a2 /* if (dividend < 0), return -udividend */ +.Lpositive: + abi_ret_default + +.Lle_one: + bnez a3, .Lreturn0 + + /* Divide by zero: Use an illegal instruction to force an exception. + The subsequent "DIV0" string can be recognized by the exception + handler to identify the real cause of the exception. */ + ill + .ascii "DIV0" + +.Lreturn0: + movi a2, 0 +#endif /* XCHAL_HAVE_DIV32 */ + abi_ret_default + +ENDPROC(__modsi3) + +#if !XCHAL_HAVE_NSA + .section .rodata + .align 4 + .global __nsau_data + .type __nsau_data, @object +__nsau_data: + .byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4 + .byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 + .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 + .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 + .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .size __nsau_data, . - __nsau_data +#endif /* !XCHAL_HAVE_NSA */ diff --git a/arch/xtensa/lib/mulsi3.S b/arch/xtensa/lib/mulsi3.S new file mode 100644 index 000000000000..91a9d7c62f96 --- /dev/null +++ b/arch/xtensa/lib/mulsi3.S @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */ +#include +#include +#include + + .macro do_addx2 dst, as, at, tmp +#if XCHAL_HAVE_ADDX + addx2 \dst, \as, \at +#else + slli \tmp, \as, 1 + add \dst, \tmp, \at +#endif + .endm + + .macro do_addx4 dst, as, at, tmp +#if XCHAL_HAVE_ADDX + addx4 \dst, \as, \at +#else + slli \tmp, \as, 2 + add \dst, \tmp, \at +#endif + .endm + + .macro do_addx8 dst, as, at, tmp +#if XCHAL_HAVE_ADDX + addx8 \dst, \as, \at +#else + slli \tmp, \as, 3 + add \dst, \tmp, \at +#endif + .endm + +ENTRY(__mulsi3) + + abi_entry_default + +#if XCHAL_HAVE_MUL32 + mull a2, a2, a3 + +#elif XCHAL_HAVE_MUL16 + or a4, a2, a3 + srai a4, a4, 16 + bnez a4, .LMUL16 + mul16u a2, a2, a3 + abi_ret_default +.LMUL16: + srai a4, a2, 16 + srai a5, a3, 16 + mul16u a7, a4, a3 + mul16u a6, a5, a2 + mul16u a4, a2, a3 + add a7, a7, a6 + slli a7, a7, 16 + add a2, a7, a4 + +#elif XCHAL_HAVE_MAC16 + mul.aa.hl a2, a3 + mula.aa.lh a2, a3 + rsr a5, ACCLO + umul.aa.ll a2, a3 + rsr a4, ACCLO + slli a5, a5, 16 + add a2, a4, a5 + +#else /* !MUL32 && !MUL16 && !MAC16 */ + + /* Multiply one bit at a time, but unroll the loop 4x to better + exploit the addx instructions and avoid overhead. + Peel the first iteration to save a cycle on init. */ + + /* Avoid negative numbers. */ + xor a5, a2, a3 /* Top bit is 1 if one input is negative. */ + do_abs a3, a3, a6 + do_abs a2, a2, a6 + + /* Swap so the second argument is smaller. */ + sub a7, a2, a3 + mov a4, a3 + movgez a4, a2, a7 /* a4 = max (a2, a3) */ + movltz a3, a2, a7 /* a3 = min (a2, a3) */ + + movi a2, 0 + extui a6, a3, 0, 1 + movnez a2, a4, a6 + + do_addx2 a7, a4, a2, a7 + extui a6, a3, 1, 1 + movnez a2, a7, a6 + + do_addx4 a7, a4, a2, a7 + extui a6, a3, 2, 1 + movnez a2, a7, a6 + + do_addx8 a7, a4, a2, a7 + extui a6, a3, 3, 1 + movnez a2, a7, a6 + + bgeui a3, 16, .Lmult_main_loop + neg a3, a2 + movltz a2, a3, a5 + abi_ret_default + + .align 4 +.Lmult_main_loop: + srli a3, a3, 4 + slli a4, a4, 4 + + add a7, a4, a2 + extui a6, a3, 0, 1 + movnez a2, a7, a6 + + do_addx2 a7, a4, a2, a7 + extui a6, a3, 1, 1 + movnez a2, a7, a6 + + do_addx4 a7, a4, a2, a7 + extui a6, a3, 2, 1 + movnez a2, a7, a6 + + do_addx8 a7, a4, a2, a7 + extui a6, a3, 3, 1 + movnez a2, a7, a6 + + bgeui a3, 16, .Lmult_main_loop + + neg a3, a2 + movltz a2, a3, a5 + +#endif /* !MUL32 && !MUL16 && !MAC16 */ + + abi_ret_default + +ENDPROC(__mulsi3) diff --git a/arch/xtensa/lib/udivsi3.S b/arch/xtensa/lib/udivsi3.S new file mode 100644 index 000000000000..d2477e0786cf --- /dev/null +++ b/arch/xtensa/lib/udivsi3.S @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */ +#include +#include +#include + +ENTRY(__udivsi3) + + abi_entry_default +#if XCHAL_HAVE_DIV32 + quou a2, a2, a3 +#else + bltui a3, 2, .Lle_one /* check if the divisor <= 1 */ + + mov a6, a2 /* keep dividend in a6 */ + do_nsau a5, a6, a2, a7 /* dividend_shift = nsau (dividend) */ + do_nsau a4, a3, a2, a7 /* divisor_shift = nsau (divisor) */ + bgeu a5, a4, .Lspecial + + sub a4, a4, a5 /* count = divisor_shift - dividend_shift */ + ssl a4 + sll a3, a3 /* divisor <<= count */ + movi a2, 0 /* quotient = 0 */ + + /* test-subtract-and-shift loop; one quotient bit on each iteration */ +#if XCHAL_HAVE_LOOPS + loopnez a4, .Lloopend +#endif /* XCHAL_HAVE_LOOPS */ +.Lloop: + bltu a6, a3, .Lzerobit + sub a6, a6, a3 + addi a2, a2, 1 +.Lzerobit: + slli a2, a2, 1 + srli a3, a3, 1 +#if !XCHAL_HAVE_LOOPS + addi a4, a4, -1 + bnez a4, .Lloop +#endif /* !XCHAL_HAVE_LOOPS */ +.Lloopend: + + bltu a6, a3, .Lreturn + addi a2, a2, 1 /* increment quotient if dividend >= divisor */ +.Lreturn: + abi_ret_default + +.Lle_one: + beqz a3, .Lerror /* if divisor == 1, return the dividend */ + abi_ret_default + +.Lspecial: + /* return dividend >= divisor */ + bltu a6, a3, .Lreturn0 + movi a2, 1 + abi_ret_default + +.Lerror: + /* Divide by zero: Use an illegal instruction to force an exception. + The subsequent "DIV0" string can be recognized by the exception + handler to identify the real cause of the exception. */ + ill + .ascii "DIV0" + +.Lreturn0: + movi a2, 0 +#endif /* XCHAL_HAVE_DIV32 */ + abi_ret_default + +ENDPROC(__udivsi3) diff --git a/arch/xtensa/lib/umodsi3.S b/arch/xtensa/lib/umodsi3.S new file mode 100644 index 000000000000..5f031bfa0354 --- /dev/null +++ b/arch/xtensa/lib/umodsi3.S @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */ +#include +#include +#include + +ENTRY(__umodsi3) + + abi_entry_default +#if XCHAL_HAVE_DIV32 + remu a2, a2, a3 +#else + bltui a3, 2, .Lle_one /* check if the divisor is <= 1 */ + + do_nsau a5, a2, a6, a7 /* dividend_shift = nsau (dividend) */ + do_nsau a4, a3, a6, a7 /* divisor_shift = nsau (divisor) */ + bgeu a5, a4, .Lspecial + + sub a4, a4, a5 /* count = divisor_shift - dividend_shift */ + ssl a4 + sll a3, a3 /* divisor <<= count */ + + /* test-subtract-and-shift loop */ +#if XCHAL_HAVE_LOOPS + loopnez a4, .Lloopend +#endif /* XCHAL_HAVE_LOOPS */ +.Lloop: + bltu a2, a3, .Lzerobit + sub a2, a2, a3 +.Lzerobit: + srli a3, a3, 1 +#if !XCHAL_HAVE_LOOPS + addi a4, a4, -1 + bnez a4, .Lloop +#endif /* !XCHAL_HAVE_LOOPS */ +.Lloopend: + +.Lspecial: + bltu a2, a3, .Lreturn + sub a2, a2, a3 /* subtract once more if dividend >= divisor */ +.Lreturn: + abi_ret_default + +.Lle_one: + bnez a3, .Lreturn0 + + /* Divide by zero: Use an illegal instruction to force an exception. + The subsequent "DIV0" string can be recognized by the exception + handler to identify the real cause of the exception. */ + ill + .ascii "DIV0" + +.Lreturn0: + movi a2, 0 +#endif /* XCHAL_HAVE_DIV32 */ + abi_ret_default + +ENDPROC(__umodsi3) From 19c5699f9affbd6824f4db0e19fcdc9d609a5e6b Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 18 Oct 2021 05:38:06 -0700 Subject: [PATCH 315/380] xtensa: don't link with libgcc libgcc may be absent or may have different ABI than the kernel. Don't link with it. Drop declarations and export for helpers that are not implemented. Use generic versions of DI mode multiplication and comparisons. Drop register window spilling helper as it should never be used by the compiler-generated code. Signed-off-by: Max Filippov --- arch/xtensa/Kconfig | 3 +++ arch/xtensa/Makefile | 6 +----- arch/xtensa/kernel/xtensa_ksyms.c | 14 -------------- 3 files changed, 4 insertions(+), 19 deletions(-) diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 8ac599aa6d99..b324f6743073 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -20,6 +20,9 @@ config XTENSA select DMA_REMAP if MMU select GENERIC_ATOMIC64 select GENERIC_IRQ_SHOW + select GENERIC_LIB_CMPDI2 + select GENERIC_LIB_MULDI3 + select GENERIC_LIB_UCMPDI2 select GENERIC_PCI_IOMAP select GENERIC_SCHED_CLOCK select HAVE_ARCH_AUDITSYSCALL diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile index ee2769519eaf..725cd4cd8b30 100644 --- a/arch/xtensa/Makefile +++ b/arch/xtensa/Makefile @@ -51,13 +51,9 @@ KBUILD_CPPFLAGS += $(patsubst %,-I$(srctree)/%include,$(vardirs) $(plfdirs)) KBUILD_DEFCONFIG := iss_defconfig -# Find libgcc.a - -LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) - head-y := arch/xtensa/kernel/head.o -libs-y += arch/xtensa/lib/ $(LIBGCC) +libs-y += arch/xtensa/lib/ boot := arch/xtensa/boot diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c index d79edbb98d2a..b0bc8897c924 100644 --- a/arch/xtensa/kernel/xtensa_ksyms.c +++ b/arch/xtensa/kernel/xtensa_ksyms.c @@ -59,32 +59,18 @@ extern long long __ashldi3(long long, int); extern long long __lshrdi3(long long, int); extern int __divsi3(int, int); extern int __modsi3(int, int); -extern long long __muldi3(long long, long long); extern int __mulsi3(int, int); extern unsigned int __udivsi3(unsigned int, unsigned int); extern unsigned int __umodsi3(unsigned int, unsigned int); -extern unsigned long long __umoddi3(unsigned long long, unsigned long long); -extern unsigned long long __udivdi3(unsigned long long, unsigned long long); -extern int __ucmpdi2(int, int); EXPORT_SYMBOL(__ashldi3); EXPORT_SYMBOL(__ashrdi3); EXPORT_SYMBOL(__lshrdi3); EXPORT_SYMBOL(__divsi3); EXPORT_SYMBOL(__modsi3); -EXPORT_SYMBOL(__muldi3); EXPORT_SYMBOL(__mulsi3); EXPORT_SYMBOL(__udivsi3); EXPORT_SYMBOL(__umodsi3); -EXPORT_SYMBOL(__udivdi3); -EXPORT_SYMBOL(__umoddi3); -EXPORT_SYMBOL(__ucmpdi2); - -void __xtensa_libgcc_window_spill(void) -{ - BUG(); -} -EXPORT_SYMBOL(__xtensa_libgcc_window_spill); unsigned int __sync_fetch_and_and_4(volatile void *p, unsigned int v) { From c20e1117d9aa22c558646a1060ddd1dd042fb107 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 23 Apr 2021 00:34:44 -0700 Subject: [PATCH 316/380] xtensa: add kernel ABI selection to Kconfig Add choice to use default or call0 ABI for the kernel code. If call0 ABI is chosen add '-mabi=call0' to the flags. The toolchain support for this option is rather new so only enable it when the compiler supports it. Signed-off-by: Max Filippov --- arch/xtensa/Kconfig | 35 +++++++++++++++++++++++++++++++++++ arch/xtensa/Makefile | 4 ++++ 2 files changed, 39 insertions(+) diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index b324f6743073..cf03ed54c8f0 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -92,6 +92,9 @@ config CPU_BIG_ENDIAN config CPU_LITTLE_ENDIAN def_bool !CPU_BIG_ENDIAN +config CC_HAVE_CALL0_ABI + def_bool $(success,test "$(shell,echo __XTENSA_CALL0_ABI__ | $(CC) -mabi=call0 -E -P - 2>/dev/null)" = 1) + menu "Processor type and features" choice @@ -250,6 +253,38 @@ config FAST_SYSCALL_SPILL_REGISTERS If unsure, say N. +choice + prompt "Kernel ABI" + default KERNEL_ABI_DEFAULT + help + Select ABI for the kernel code. This ABI is independent of the + supported userspace ABI and any combination of the + kernel/userspace ABI is possible and should work. + + In case both kernel and userspace support only call0 ABI + all register windows support code will be omitted from the + build. + + If unsure, choose the default ABI. + +config KERNEL_ABI_DEFAULT + bool "Default ABI" + help + Select this option to compile kernel code with the default ABI + selected for the toolchain. + Normally cores with windowed registers option use windowed ABI and + cores without it use call0 ABI. + +config KERNEL_ABI_CALL0 + bool "Call0 ABI" if CC_HAVE_CALL0_ABI + help + Select this option to compile kernel code with call0 ABI even with + toolchain that defaults to windowed ABI. + When this option is not selected the default toolchain ABI will + be used for the kernel code. + +endchoice + config USER_ABI_CALL0 bool diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile index 725cd4cd8b30..5097caa7bf0c 100644 --- a/arch/xtensa/Makefile +++ b/arch/xtensa/Makefile @@ -35,6 +35,10 @@ KBUILD_CFLAGS += -ffreestanding -D__linux__ KBUILD_CFLAGS += -pipe -mlongcalls -mtext-section-literals KBUILD_CFLAGS += $(call cc-option,-mforce-no-pic,) KBUILD_CFLAGS += $(call cc-option,-mno-serialize-volatile,) +ifneq ($(CONFIG_KERNEL_ABI_CALL0),) +KBUILD_CFLAGS += -mabi=call0 +KBUILD_AFLAGS += -mabi=call0 +endif KBUILD_AFLAGS += -mlongcalls -mtext-section-literals From 89b184f980f012421fb22c939b50b3dfd7fe16b3 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 28 Dec 2021 01:28:53 -0800 Subject: [PATCH 317/380] xtensa: make secondary reset vector support conditional Whether xtensa cores start from primary or secondary reset vector is configurable and may be chosen by board designer or controlled at runtime. When secondary reset vector is unused its location in memory may not be writable. Make secondary reset vector support conditional and don't build and load secondary reset vector code when it is disabled. Signed-off-by: Max Filippov --- arch/xtensa/Kconfig | 9 +++++++++ arch/xtensa/kernel/Makefile | 3 ++- arch/xtensa/kernel/setup.c | 2 +- arch/xtensa/kernel/vmlinux.lds.S | 4 ++-- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index cf03ed54c8f0..a02c3388f521 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -227,6 +227,15 @@ config HOTPLUG_CPU Say N if you want to disable CPU hotplug. +config SECONDARY_RESET_VECTOR + bool "Secondary cores use alternative reset vector" + default y + depends on HAVE_SMP + help + Secondary cores may be configured to use alternative reset vector, + or all cores may use primary reset vector. + Say Y here to supply handler for the alternative reset location. + config FAST_SYSCALL_XTENSA bool "Enable fast atomic syscalls" default n diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile index d4082c6a121b..5fd6cd15e0fb 100644 --- a/arch/xtensa/kernel/Makefile +++ b/arch/xtensa/kernel/Makefile @@ -13,7 +13,8 @@ obj-$(CONFIG_MMU) += pci-dma.o obj-$(CONFIG_PCI) += pci.o obj-$(CONFIG_MODULES) += xtensa_ksyms.o module.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o -obj-$(CONFIG_SMP) += smp.o mxhead.o +obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_SECONDARY_RESET_VECTOR) += mxhead.o obj-$(CONFIG_XTENSA_VARIANT_HAVE_PERF_EVENTS) += perf_event.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_S32C1I_SELFTEST) += s32c1i_selftest.o diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 3f1842891482..9191738f9941 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -349,7 +349,7 @@ void __init setup_arch(char **cmdline_p) #endif /* CONFIG_VECTORS_ADDR */ -#ifdef CONFIG_SMP +#ifdef CONFIG_SECONDARY_RESET_VECTOR mem_reserve(__pa(_SecondaryResetVector_text_start), __pa(_SecondaryResetVector_text_end)); #endif diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index eee270a039a4..965a3952c47b 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -207,7 +207,7 @@ SECTIONS RELOCATE_ENTRY(_xip_data, .data); RELOCATE_ENTRY(_xip_init_data, .init.data); #endif -#if defined(CONFIG_SMP) +#if defined(CONFIG_SECONDARY_RESET_VECTOR) RELOCATE_ENTRY(_SecondaryResetVector_text, .SecondaryResetVector.text); #endif @@ -303,7 +303,7 @@ SECTIONS #define LAST .DoubleExceptionVector.text #endif -#if defined(CONFIG_SMP) +#if defined(CONFIG_SECONDARY_RESET_VECTOR) SECTION_VECTOR4 (_SecondaryResetVector_text, .SecondaryResetVector.text, From 213556235526abffe4b7fdec2aabf4d751a7c0f9 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Mon, 14 Feb 2022 13:58:31 +0000 Subject: [PATCH 318/380] dt-bindings: soc/microchip: update syscontroller compatibles The Polarfire SoC is currently using two different compatible string prefixes. Fix this by changing "polarfire-soc-*" strings to "mpfs-*" in its system controller in order to match the compatible string used in the soc binding and device tree Reviewed-by: Geert Uytterhoeven Signed-off-by: Conor Dooley Acked-by: Rob Herring Acked-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- ...larfire-soc-mailbox.yaml => microchip,mpfs-mailbox.yaml} | 6 +++--- ...s-controller.yaml => microchip,mpfs-sys-controller.yaml} | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) rename Documentation/devicetree/bindings/mailbox/{microchip,polarfire-soc-mailbox.yaml => microchip,mpfs-mailbox.yaml} (82%) rename Documentation/devicetree/bindings/soc/microchip/{microchip,polarfire-soc-sys-controller.yaml => microchip,mpfs-sys-controller.yaml} (75%) diff --git a/Documentation/devicetree/bindings/mailbox/microchip,polarfire-soc-mailbox.yaml b/Documentation/devicetree/bindings/mailbox/microchip,mpfs-mailbox.yaml similarity index 82% rename from Documentation/devicetree/bindings/mailbox/microchip,polarfire-soc-mailbox.yaml rename to Documentation/devicetree/bindings/mailbox/microchip,mpfs-mailbox.yaml index bbb173ea483c..082d397d3e89 100644 --- a/Documentation/devicetree/bindings/mailbox/microchip,polarfire-soc-mailbox.yaml +++ b/Documentation/devicetree/bindings/mailbox/microchip,mpfs-mailbox.yaml @@ -1,7 +1,7 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) %YAML 1.2 --- -$id: "http://devicetree.org/schemas/mailbox/microchip,polarfire-soc-mailbox.yaml#" +$id: "http://devicetree.org/schemas/mailbox/microchip,mpfs-mailbox.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" title: Microchip PolarFire SoC (MPFS) MSS (microprocessor subsystem) mailbox controller @@ -11,7 +11,7 @@ maintainers: properties: compatible: - const: microchip,polarfire-soc-mailbox + const: microchip,mpfs-mailbox reg: items: @@ -38,7 +38,7 @@ examples: #address-cells = <2>; #size-cells = <2>; mbox: mailbox@37020000 { - compatible = "microchip,polarfire-soc-mailbox"; + compatible = "microchip,mpfs-mailbox"; reg = <0x0 0x37020000 0x0 0x1000>, <0x0 0x2000318c 0x0 0x40>; interrupt-parent = <&L1>; interrupts = <96>; diff --git a/Documentation/devicetree/bindings/soc/microchip/microchip,polarfire-soc-sys-controller.yaml b/Documentation/devicetree/bindings/soc/microchip/microchip,mpfs-sys-controller.yaml similarity index 75% rename from Documentation/devicetree/bindings/soc/microchip/microchip,polarfire-soc-sys-controller.yaml rename to Documentation/devicetree/bindings/soc/microchip/microchip,mpfs-sys-controller.yaml index 2cd3bc6bd8d6..f699772fedf3 100644 --- a/Documentation/devicetree/bindings/soc/microchip/microchip,polarfire-soc-sys-controller.yaml +++ b/Documentation/devicetree/bindings/soc/microchip/microchip,mpfs-sys-controller.yaml @@ -1,7 +1,7 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) %YAML 1.2 --- -$id: "http://devicetree.org/schemas/soc/microchip/microchip,polarfire-soc-sys-controller.yaml#" +$id: "http://devicetree.org/schemas/soc/microchip/microchip,mpfs-sys-controller.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" title: Microchip PolarFire SoC (MPFS) MSS (microprocessor subsystem) system controller @@ -19,7 +19,7 @@ properties: maxItems: 1 compatible: - const: microchip,polarfire-soc-sys-controller + const: microchip,mpfs-sys-controller required: - compatible @@ -30,6 +30,6 @@ additionalProperties: false examples: - | syscontroller: syscontroller { - compatible = "microchip,polarfire-soc-sys-controller"; + compatible = "microchip,mpfs-sys-controller"; mboxes = <&mbox 0>; }; From b435a1728c9f6811f6f337ebe80e3aca210f1b17 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Mon, 14 Feb 2022 13:58:32 +0000 Subject: [PATCH 319/380] dt-bindings: soc/microchip: add info about services to mpfs sysctrl The services actually provided by the system controller are not documented so add some words about what the system controller can actually do. Add a link to the oneline documentation with the specific details of each individual service. Also, drop the unneeded label from the example. Signed-off-by: Conor Dooley Reviewed-by: Rob Herring Signed-off-by: Palmer Dabbelt --- .../soc/microchip/microchip,mpfs-sys-controller.yaml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/soc/microchip/microchip,mpfs-sys-controller.yaml b/Documentation/devicetree/bindings/soc/microchip/microchip,mpfs-sys-controller.yaml index f699772fedf3..b0dae51e1d42 100644 --- a/Documentation/devicetree/bindings/soc/microchip/microchip,mpfs-sys-controller.yaml +++ b/Documentation/devicetree/bindings/soc/microchip/microchip,mpfs-sys-controller.yaml @@ -10,9 +10,14 @@ maintainers: - Conor Dooley description: | - The PolarFire SoC system controller is communicated with via a mailbox. - This document describes the bindings for the client portion of that mailbox. + PolarFire SoC devices include a microcontroller acting as the system controller, + which provides "services" to the main processor and to the FPGA fabric. These + services include hardware rng, reprogramming of the FPGA and verfification of the + eNVM contents etc. More information on these services can be found online, at + https://onlinedocs.microchip.com/pr/GUID-1409CF11-8EF9-4C24-A94E-70979A688632-en-US-1/index.html + Communication with the system controller is done via a mailbox, of which the client + portion is documented here. properties: mboxes: @@ -29,7 +34,7 @@ additionalProperties: false examples: - | - syscontroller: syscontroller { + syscontroller { compatible = "microchip,mpfs-sys-controller"; mboxes = <&mbox 0>; }; From 4cbcc0d7b397b903b4b2ba749c36b8837d5dc782 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Mon, 14 Feb 2022 13:58:33 +0000 Subject: [PATCH 320/380] dt-bindings: rtc: add bindings for microchip mpfs rtc Add device tree bindings for the real time clock on the Microchip PolarFire SoC. Signed-off-by: Daire McNamara Signed-off-by: Conor Dooley Acked-by: Palmer Dabbelt Reviewed-by: Rob Herring Acked-by: Alexandre Belloni Signed-off-by: Palmer Dabbelt --- .../bindings/rtc/microchip,mfps-rtc.yaml | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 Documentation/devicetree/bindings/rtc/microchip,mfps-rtc.yaml diff --git a/Documentation/devicetree/bindings/rtc/microchip,mfps-rtc.yaml b/Documentation/devicetree/bindings/rtc/microchip,mfps-rtc.yaml new file mode 100644 index 000000000000..a2e984ea3553 --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/microchip,mfps-rtc.yaml @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/rtc/microchip,mfps-rtc.yaml# + +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Microchip PolarFire Soc (MPFS) RTC Device Tree Bindings + +allOf: + - $ref: rtc.yaml# + +maintainers: + - Daire McNamara + - Lewis Hanly + +properties: + compatible: + enum: + - microchip,mpfs-rtc + + reg: + maxItems: 1 + + interrupts: + items: + - description: | + RTC_WAKEUP interrupt + - description: | + RTC_MATCH, asserted when the content of the Alarm register is equal + to that of the RTC's count register. + + clocks: + maxItems: 1 + + clock-names: + items: + - const: rtc + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + +additionalProperties: false + +examples: + - | + rtc@20124000 { + compatible = "microchip,mpfs-rtc"; + reg = <0x20124000 0x1000>; + clocks = <&clkcfg 21>; + clock-names = "rtc"; + interrupts = <80>, <81>; + }; +... From 735806d8a68e96fd7cfca8497dc9dd672ce22236 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Mon, 14 Feb 2022 13:58:34 +0000 Subject: [PATCH 321/380] dt-bindings: gpio: add bindings for microchip mpfs gpio Add device tree bindings for the gpio controller on the Microchip PolarFire SoC. Reviewed-by: Rob Herring Signed-off-by: Conor Dooley Acked-by: Palmer Dabbelt Acked-by: Bartosz Golaszewski Signed-off-by: Palmer Dabbelt --- .../bindings/gpio/microchip,mpfs-gpio.yaml | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 Documentation/devicetree/bindings/gpio/microchip,mpfs-gpio.yaml diff --git a/Documentation/devicetree/bindings/gpio/microchip,mpfs-gpio.yaml b/Documentation/devicetree/bindings/gpio/microchip,mpfs-gpio.yaml new file mode 100644 index 000000000000..110651eafa70 --- /dev/null +++ b/Documentation/devicetree/bindings/gpio/microchip,mpfs-gpio.yaml @@ -0,0 +1,79 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/gpio/microchip,mpfs-gpio.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Microchip MPFS GPIO Controller Device Tree Bindings + +maintainers: + - Conor Dooley + +properties: + compatible: + items: + - enum: + - microchip,mpfs-gpio + + reg: + maxItems: 1 + + interrupts: + description: + Interrupt mapping, one per GPIO. Maximum 32 GPIOs. + minItems: 1 + maxItems: 32 + + interrupt-controller: true + + clocks: + maxItems: 1 + + "#gpio-cells": + const: 2 + + "#interrupt-cells": + const: 1 + + ngpios: + description: + The number of GPIOs available. + minimum: 1 + maximum: 32 + default: 32 + + gpio-controller: true + +required: + - compatible + - reg + - interrupts + - "#interrupt-cells" + - interrupt-controller + - "#gpio-cells" + - gpio-controller + - clocks + +additionalProperties: false + +examples: + - | + gpio@20122000 { + compatible = "microchip,mpfs-gpio"; + reg = <0x20122000 0x1000>; + clocks = <&clkcfg 25>; + interrupt-parent = <&plic>; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <1>; + interrupts = <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>; + }; +... From df77f7735786ece2fcd8875b036a511ffcadfab6 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Mon, 14 Feb 2022 13:58:35 +0000 Subject: [PATCH 322/380] dt-bindings: pwm: add microchip corepwm binding Add device tree bindings for the Microchip fpga fabric based "core" PWM controller. Reviewed-by: Rob Herring Signed-off-by: Conor Dooley Acked-by: Palmer Dabbelt Acked-by: Uwe Kleine-K=F6nig Acked-by: Thierry Reding Signed-off-by: Palmer Dabbelt --- .../bindings/pwm/microchip,corepwm.yaml | 81 +++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 Documentation/devicetree/bindings/pwm/microchip,corepwm.yaml diff --git a/Documentation/devicetree/bindings/pwm/microchip,corepwm.yaml b/Documentation/devicetree/bindings/pwm/microchip,corepwm.yaml new file mode 100644 index 000000000000..a7fae1772a81 --- /dev/null +++ b/Documentation/devicetree/bindings/pwm/microchip,corepwm.yaml @@ -0,0 +1,81 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pwm/microchip,corepwm.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Microchip IP corePWM controller bindings + +maintainers: + - Conor Dooley + +description: | + corePWM is an 16 channel pulse width modulator FPGA IP + + https://www.microsemi.com/existing-parts/parts/152118 + +allOf: + - $ref: pwm.yaml# + +properties: + compatible: + items: + - const: microchip,corepwm-rtl-v4 + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + "#pwm-cells": + const: 2 + + microchip,sync-update-mask: + description: | + Depending on how the IP is instantiated, there are two modes of operation. + In synchronous mode, all channels are updated at the beginning of the PWM period, + and in asynchronous mode updates happen as the control registers are written. + A 16 bit wide "SHADOW_REG_EN" parameter of the IP core controls whether synchronous + mode is possible for each channel, and is set by the bitstream programmed to the + FPGA. If the IP core is instantiated with SHADOW_REG_ENx=1, both registers that + control the duty cycle for channel x have a second "shadow"/buffer reg synthesised. + At runtime a bit wide register exposed to APB can be used to toggle on/off + synchronised mode for all channels it has been synthesised for. + Each bit of "microchip,sync-update-mask" corresponds to a PWM channel & represents + whether synchronous mode is possible for the PWM channel. + + $ref: /schemas/types.yaml#/definitions/uint32 + default: 0 + + microchip,dac-mode-mask: + description: | + Optional, per-channel Low Ripple DAC mode is possible on this IP core. It creates + a minimum period pulse train whose High/Low average is that of the chosen duty + cycle. This "DAC" will have far better bandwidth and ripple performance than the + standard PWM algorithm can achieve. A 16 bit DAC_MODE module parameter of the IP + core, set at instantiation and by the bitstream programmed to the FPGA, determines + whether a given channel operates in regular PWM or DAC mode. + Each bit corresponds to a PWM channel & represents whether DAC mode is enabled + for that channel. + + $ref: /schemas/types.yaml#/definitions/uint32 + default: 0 + +required: + - compatible + - reg + - clocks + +additionalProperties: false + +examples: + - | + pwm@41000000 { + compatible = "microchip,corepwm-rtl-v4"; + microchip,sync-update-mask = /bits/ 32 <0>; + clocks = <&clkcfg 30>; + reg = <0x41000000 0xF0>; + #pwm-cells = <2>; + }; From 6546f920868e13ef2fe027e791dcdca49483cb6d Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Mon, 14 Feb 2022 13:58:36 +0000 Subject: [PATCH 323/380] riscv: dts: microchip: use clk defines for icicle kit Update the Microchip Icicle kit device tree by replacing clock related magic numbers with their defined counterparts. Signed-off-by: Conor Dooley Acked-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- .../microchip/microchip-mpfs-icicle-kit.dts | 2 +- .../boot/dts/microchip/microchip-mpfs.dtsi | 25 ++++++++++--------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts index 0c748ae1b006..6d19ba196f12 100644 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts @@ -31,7 +31,7 @@ memory@80000000 { device_type = "memory"; reg = <0x0 0x80000000 0x0 0x40000000>; - clocks = <&clkcfg 26>; + clocks = <&clkcfg CLK_DDRC>; }; }; diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi index 869aaf0d5c06..717e39b30a15 100644 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi @@ -2,6 +2,7 @@ /* Copyright (c) 2020 Microchip Technology Inc */ /dts-v1/; +#include "dt-bindings/clock/microchip,mpfs-clock.h" / { #address-cells = <2>; @@ -14,7 +15,6 @@ #size-cells = <0>; cpu@0 { - clock-frequency = <0>; compatible = "sifive,e51", "sifive,rocket0", "riscv"; device_type = "cpu"; i-cache-block-size = <64>; @@ -22,6 +22,7 @@ i-cache-size = <16384>; reg = <0>; riscv,isa = "rv64imac"; + clocks = <&clkcfg CLK_CPU>; status = "disabled"; cpu0_intc: interrupt-controller { @@ -32,7 +33,6 @@ }; cpu@1 { - clock-frequency = <0>; compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; d-cache-block-size = <64>; d-cache-sets = <64>; @@ -48,6 +48,7 @@ mmu-type = "riscv,sv39"; reg = <1>; riscv,isa = "rv64imafdc"; + clocks = <&clkcfg CLK_CPU>; tlb-split; status = "okay"; @@ -59,7 +60,6 @@ }; cpu@2 { - clock-frequency = <0>; compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; d-cache-block-size = <64>; d-cache-sets = <64>; @@ -75,6 +75,7 @@ mmu-type = "riscv,sv39"; reg = <2>; riscv,isa = "rv64imafdc"; + clocks = <&clkcfg CLK_CPU>; tlb-split; status = "okay"; @@ -86,7 +87,6 @@ }; cpu@3 { - clock-frequency = <0>; compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; d-cache-block-size = <64>; d-cache-sets = <64>; @@ -102,6 +102,7 @@ mmu-type = "riscv,sv39"; reg = <3>; riscv,isa = "rv64imafdc"; + clocks = <&clkcfg CLK_CPU>; tlb-split; status = "okay"; @@ -113,7 +114,6 @@ }; cpu@4 { - clock-frequency = <0>; compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; d-cache-block-size = <64>; d-cache-sets = <64>; @@ -129,6 +129,7 @@ mmu-type = "riscv,sv39"; reg = <4>; riscv,isa = "rv64imafdc"; + clocks = <&clkcfg CLK_CPU>; tlb-split; status = "okay"; cpu4_intc: interrupt-controller { @@ -210,7 +211,7 @@ interrupt-parent = <&plic>; interrupts = <90>; current-speed = <115200>; - clocks = <&clkcfg 8>; + clocks = <&clkcfg CLK_MMUART0>; status = "disabled"; }; @@ -222,7 +223,7 @@ interrupt-parent = <&plic>; interrupts = <91>; current-speed = <115200>; - clocks = <&clkcfg 9>; + clocks = <&clkcfg CLK_MMUART1>; status = "disabled"; }; @@ -234,7 +235,7 @@ interrupt-parent = <&plic>; interrupts = <92>; current-speed = <115200>; - clocks = <&clkcfg 10>; + clocks = <&clkcfg CLK_MMUART2>; status = "disabled"; }; @@ -246,7 +247,7 @@ interrupt-parent = <&plic>; interrupts = <93>; current-speed = <115200>; - clocks = <&clkcfg 11>; + clocks = <&clkcfg CLK_MMUART3>; status = "disabled"; }; @@ -256,7 +257,7 @@ reg = <0x0 0x20008000 0x0 0x1000>; interrupt-parent = <&plic>; interrupts = <88>, <89>; - clocks = <&clkcfg 6>; + clocks = <&clkcfg CLK_MMC>; max-frequency = <200000000>; status = "disabled"; }; @@ -267,7 +268,7 @@ interrupt-parent = <&plic>; interrupts = <64>, <65>, <66>, <67>; local-mac-address = [00 00 00 00 00 00]; - clocks = <&clkcfg 4>, <&clkcfg 2>; + clocks = <&clkcfg CLK_MAC0>, <&clkcfg CLK_AHB>; clock-names = "pclk", "hclk"; status = "disabled"; #address-cells = <1>; @@ -280,7 +281,7 @@ interrupt-parent = <&plic>; interrupts = <70>, <71>, <72>, <73>; local-mac-address = [00 00 00 00 00 00]; - clocks = <&clkcfg 5>, <&clkcfg 2>; + clocks = <&clkcfg CLK_MAC1>, <&clkcfg CLK_AHB>; status = "disabled"; clock-names = "pclk", "hclk"; #address-cells = <1>; From 72560c6559b82b2ded469856f7a0bb425fba9b7e Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Mon, 14 Feb 2022 13:58:37 +0000 Subject: [PATCH 324/380] riscv: dts: microchip: add fpga fabric section to icicle kit Split the device tree for the Microchip MPFS into two sections by adding microchip-mpfs-fabric.dtsi, which contains peripherals contained in the FPGA fabric. Signed-off-by: Conor Dooley Acked-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- .../dts/microchip/microchip-mpfs-fabric.dtsi | 25 +++++++++++++++++++ .../microchip/microchip-mpfs-icicle-kit.dts | 8 ++++++ .../boot/dts/microchip/microchip-mpfs.dtsi | 1 + 3 files changed, 34 insertions(+) create mode 100644 arch/riscv/boot/dts/microchip/microchip-mpfs-fabric.dtsi diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-fabric.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs-fabric.dtsi new file mode 100644 index 000000000000..854320e17b28 --- /dev/null +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-fabric.dtsi @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Copyright (c) 2020-2021 Microchip Technology Inc */ + +/ { + core_pwm0: pwm@41000000 { + compatible = "microchip,corepwm-rtl-v4"; + reg = <0x0 0x41000000 0x0 0xF0>; + microchip,sync-update-mask = /bits/ 32 <0>; + #pwm-cells = <2>; + clocks = <&clkcfg CLK_FIC3>; + status = "disabled"; + }; + + i2c2: i2c@44000000 { + compatible = "microchip,corei2c-rtl-v7"; + reg = <0x0 0x44000000 0x0 0x1000>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&clkcfg CLK_FIC3>; + interrupt-parent = <&plic>; + interrupts = <122>; + clock-frequency = <100000>; + status = "disabled"; + }; +}; diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts index 6d19ba196f12..ab803f71626a 100644 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts @@ -68,6 +68,10 @@ sd-uhs-sdr104; }; +&i2c2 { + status = "okay"; +}; + &emac0 { phy-mode = "sgmii"; phy-handle = <&phy0>; @@ -86,3 +90,7 @@ ti,fifo-depth = <0x01>; }; }; + +&core_pwm0 { + status = "okay"; +}; diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi index 717e39b30a15..c7d73756c9b8 100644 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi @@ -3,6 +3,7 @@ /dts-v1/; #include "dt-bindings/clock/microchip,mpfs-clock.h" +#include "microchip-mpfs-fabric.dtsi" / { #address-cells = <2>; From c5094f371008c5296a91594b6a24d506383afadb Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Mon, 14 Feb 2022 13:58:38 +0000 Subject: [PATCH 325/380] riscv: dts: microchip: refactor icicle kit device tree Assorted minor changes to the MPFS/Icicle kit device tree: - rename serial to mmuart to match microchip documentation - move phy0 inside mac1 node to match phy configuration - add labels where missing (cpus, cache controller) - add missing address cells & interrupts to MACs Signed-off-by: Conor Dooley Acked-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- .../microchip/microchip-mpfs-icicle-kit.dts | 37 +++++----- .../boot/dts/microchip/microchip-mpfs.dtsi | 69 +++++++++---------- 2 files changed, 54 insertions(+), 52 deletions(-) diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts index ab803f71626a..c51bd7cf500f 100644 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts @@ -1,5 +1,5 @@ // SPDX-License-Identifier: (GPL-2.0 OR MIT) -/* Copyright (c) 2020 Microchip Technology Inc */ +/* Copyright (c) 2020-2021 Microchip Technology Inc */ /dts-v1/; @@ -13,11 +13,11 @@ compatible = "microchip,mpfs-icicle-kit", "microchip,mpfs"; aliases { - ethernet0 = &emac1; - serial0 = &serial0; - serial1 = &serial1; - serial2 = &serial2; - serial3 = &serial3; + ethernet0 = &mac1; + serial0 = &mmuart0; + serial1 = &mmuart1; + serial2 = &mmuart2; + serial3 = &mmuart3; }; chosen { @@ -39,19 +39,19 @@ clock-frequency = <600000000>; }; -&serial0 { +&mmuart0 { status = "okay"; }; -&serial1 { +&mmuart1 { status = "okay"; }; -&serial2 { +&mmuart2 { status = "okay"; }; -&serial3 { +&mmuart3 { status = "okay"; }; @@ -61,7 +61,10 @@ bus-width = <4>; disable-wp; cap-sd-highspeed; + cap-mmc-highspeed; card-detect-delay = <200>; + mmc-ddr-1_8v; + mmc-hs200-1_8v; sd-uhs-sdr12; sd-uhs-sdr25; sd-uhs-sdr50; @@ -72,22 +75,22 @@ status = "okay"; }; -&emac0 { +&mac0 { phy-mode = "sgmii"; phy-handle = <&phy0>; - phy0: ethernet-phy@8 { - reg = <8>; - ti,fifo-depth = <0x01>; - }; }; -&emac1 { +&mac1 { status = "okay"; phy-mode = "sgmii"; phy-handle = <&phy1>; phy1: ethernet-phy@9 { reg = <9>; - ti,fifo-depth = <0x01>; + ti,fifo-depth = <0x1>; + }; + phy0: ethernet-phy@8 { + reg = <8>; + ti,fifo-depth = <0x1>; }; }; diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi index c7d73756c9b8..62bd00092bcc 100644 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi @@ -1,5 +1,5 @@ // SPDX-License-Identifier: (GPL-2.0 OR MIT) -/* Copyright (c) 2020 Microchip Technology Inc */ +/* Copyright (c) 2020-2021 Microchip Technology Inc */ /dts-v1/; #include "dt-bindings/clock/microchip,mpfs-clock.h" @@ -15,7 +15,7 @@ #address-cells = <1>; #size-cells = <0>; - cpu@0 { + cpu0: cpu@0 { compatible = "sifive,e51", "sifive,rocket0", "riscv"; device_type = "cpu"; i-cache-block-size = <64>; @@ -33,7 +33,7 @@ }; }; - cpu@1 { + cpu1: cpu@1 { compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; d-cache-block-size = <64>; d-cache-sets = <64>; @@ -60,7 +60,7 @@ }; }; - cpu@2 { + cpu2: cpu@2 { compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; d-cache-block-size = <64>; d-cache-sets = <64>; @@ -87,7 +87,7 @@ }; }; - cpu@3 { + cpu3: cpu@3 { compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; d-cache-block-size = <64>; d-cache-sets = <64>; @@ -114,7 +114,7 @@ }; }; - cpu@4 { + cpu4: cpu@4 { compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; d-cache-block-size = <64>; d-cache-sets = <64>; @@ -152,8 +152,9 @@ compatible = "simple-bus"; ranges; - cache-controller@2010000 { + cctrllr: cache-controller@2010000 { compatible = "sifive,fu540-c000-ccache", "cache"; + reg = <0x0 0x2010000 0x0 0x1000>; cache-block-size = <64>; cache-level = <2>; cache-sets = <1024>; @@ -161,10 +162,9 @@ cache-unified; interrupt-parent = <&plic>; interrupts = <1>, <2>, <3>; - reg = <0x0 0x2010000 0x0 0x1000>; }; - clint@2000000 { + clint: clint@2000000 { compatible = "sifive,fu540-c000-clint", "sifive,clint0"; reg = <0x0 0x2000000 0x0 0xC000>; interrupts-extended = <&cpu0_intc 3>, <&cpu0_intc 7>, @@ -174,6 +174,15 @@ <&cpu4_intc 3>, <&cpu4_intc 7>; }; + dma@3000000 { + compatible = "sifive,fu540-c000-pdma"; + reg = <0x0 0x3000000 0x0 0x8000>; + interrupt-parent = <&plic>; + interrupts = <23>, <24>, <25>, <26>, <27>, <28>, <29>, + <30>; + #dma-cells = <1>; + }; + plic: interrupt-controller@c000000 { compatible = "sifive,fu540-c000-plic", "sifive,plic-1.0.0"; reg = <0x0 0xc000000 0x0 0x4000000>; @@ -188,15 +197,6 @@ riscv,ndev = <186>; }; - dma@3000000 { - compatible = "sifive,fu540-c000-pdma"; - reg = <0x0 0x3000000 0x0 0x8000>; - interrupt-parent = <&plic>; - interrupts = <23>, <24>, <25>, <26>, <27>, <28>, <29>, - <30>; - #dma-cells = <1>; - }; - clkcfg: clkcfg@20002000 { compatible = "microchip,mpfs-clkcfg"; reg = <0x0 0x20002000 0x0 0x1000>; @@ -204,7 +204,7 @@ #clock-cells = <1>; }; - serial0: serial@20000000 { + mmuart0: serial@20000000 { compatible = "ns16550a"; reg = <0x0 0x20000000 0x0 0x400>; reg-io-width = <4>; @@ -216,7 +216,7 @@ status = "disabled"; }; - serial1: serial@20100000 { + mmuart1: serial@20100000 { compatible = "ns16550a"; reg = <0x0 0x20100000 0x0 0x400>; reg-io-width = <4>; @@ -228,7 +228,7 @@ status = "disabled"; }; - serial2: serial@20102000 { + mmuart2: serial@20102000 { compatible = "ns16550a"; reg = <0x0 0x20102000 0x0 0x400>; reg-io-width = <4>; @@ -240,7 +240,7 @@ status = "disabled"; }; - serial3: serial@20104000 { + mmuart3: serial@20104000 { compatible = "ns16550a"; reg = <0x0 0x20104000 0x0 0x400>; reg-io-width = <4>; @@ -257,37 +257,36 @@ compatible = "microchip,mpfs-sd4hc", "cdns,sd4hc"; reg = <0x0 0x20008000 0x0 0x1000>; interrupt-parent = <&plic>; - interrupts = <88>, <89>; + interrupts = <88>; clocks = <&clkcfg CLK_MMC>; max-frequency = <200000000>; status = "disabled"; }; - emac0: ethernet@20110000 { + mac0: ethernet@20110000 { compatible = "cdns,macb"; reg = <0x0 0x20110000 0x0 0x2000>; + #address-cells = <1>; + #size-cells = <0>; interrupt-parent = <&plic>; - interrupts = <64>, <65>, <66>, <67>; + interrupts = <64>, <65>, <66>, <67>, <68>, <69>; local-mac-address = [00 00 00 00 00 00]; clocks = <&clkcfg CLK_MAC0>, <&clkcfg CLK_AHB>; clock-names = "pclk", "hclk"; status = "disabled"; - #address-cells = <1>; - #size-cells = <0>; }; - emac1: ethernet@20112000 { + mac1: ethernet@20112000 { compatible = "cdns,macb"; reg = <0x0 0x20112000 0x0 0x2000>; - interrupt-parent = <&plic>; - interrupts = <70>, <71>, <72>, <73>; - local-mac-address = [00 00 00 00 00 00]; - clocks = <&clkcfg CLK_MAC1>, <&clkcfg CLK_AHB>; - status = "disabled"; - clock-names = "pclk", "hclk"; #address-cells = <1>; #size-cells = <0>; + interrupt-parent = <&plic>; + interrupts = <70>, <71>, <72>, <73>, <74>, <75>; + local-mac-address = [00 00 00 00 00 00]; + clocks = <&clkcfg CLK_MAC1>, <&clkcfg CLK_AHB>; + clock-names = "pclk", "hclk"; + status = "disabled"; }; - }; }; From 5b28df37d311ef95e6349af97e423842817bf06a Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Mon, 14 Feb 2022 13:58:39 +0000 Subject: [PATCH 326/380] riscv: dts: microchip: update peripherals in icicle kit device tree Assorted minor changes to the MPFS/Icicle kit device tree: - enable mmuart4 instead of mmuart0 - remove sifive pdma - split memory node to match updated fpga design - move stdout path to serial1 to avoid collision with bootloader running on the e51 Signed-off-by: Conor Dooley Acked-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- .../microchip/microchip-mpfs-icicle-kit.dts | 23 +++++++++++++------ .../boot/dts/microchip/microchip-mpfs.dtsi | 23 +++++++++++-------- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts index c51bd7cf500f..dc5f351b10c4 100644 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts @@ -18,20 +18,29 @@ serial1 = &mmuart1; serial2 = &mmuart2; serial3 = &mmuart3; + serial4 = &mmuart4; }; chosen { - stdout-path = "serial0:115200n8"; + stdout-path = "serial1:115200n8"; }; cpus { timebase-frequency = ; }; - memory@80000000 { + ddrc_cache_lo: memory@80000000 { device_type = "memory"; - reg = <0x0 0x80000000 0x0 0x40000000>; + reg = <0x0 0x80000000 0x0 0x2e000000>; clocks = <&clkcfg CLK_DDRC>; + status = "okay"; + }; + + ddrc_cache_hi: memory@1000000000 { + device_type = "memory"; + reg = <0x10 0x0 0x0 0x40000000>; + clocks = <&clkcfg CLK_DDRC>; + status = "okay"; }; }; @@ -39,10 +48,6 @@ clock-frequency = <600000000>; }; -&mmuart0 { - status = "okay"; -}; - &mmuart1 { status = "okay"; }; @@ -55,6 +60,10 @@ status = "okay"; }; +&mmuart4 { + status = "okay"; +}; + &mmc { status = "okay"; diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi index 62bd00092bcc..5e7aaaf42cde 100644 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi @@ -174,15 +174,6 @@ <&cpu4_intc 3>, <&cpu4_intc 7>; }; - dma@3000000 { - compatible = "sifive,fu540-c000-pdma"; - reg = <0x0 0x3000000 0x0 0x8000>; - interrupt-parent = <&plic>; - interrupts = <23>, <24>, <25>, <26>, <27>, <28>, <29>, - <30>; - #dma-cells = <1>; - }; - plic: interrupt-controller@c000000 { compatible = "sifive,fu540-c000-plic", "sifive,plic-1.0.0"; reg = <0x0 0xc000000 0x0 0x4000000>; @@ -213,7 +204,7 @@ interrupts = <90>; current-speed = <115200>; clocks = <&clkcfg CLK_MMUART0>; - status = "disabled"; + status = "disabled"; /* Reserved for the HSS */ }; mmuart1: serial@20100000 { @@ -252,6 +243,18 @@ status = "disabled"; }; + mmuart4: serial@20106000 { + compatible = "ns16550a"; + reg = <0x0 0x20106000 0x0 0x400>; + reg-io-width = <4>; + reg-shift = <2>; + interrupt-parent = <&plic>; + interrupts = <94>; + clocks = <&clkcfg CLK_MMUART4>; + current-speed = <115200>; + status = "disabled"; + }; + /* Common node entry for emmc/sd */ mmc: mmc@20008000 { compatible = "microchip,mpfs-sd4hc", "cdns,sd4hc"; From 528a5b1f2556dda1feddf4d6299813981182e6c6 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Mon, 14 Feb 2022 13:58:40 +0000 Subject: [PATCH 327/380] riscv: dts: microchip: add new peripherals to icicle kit device tree Add new peripherals to the MPFS, and enable them in the Icicle kit device tree: 2x SPI, QSPI, 3x GPIO, 2x I2C, Real Time Counter, PCIE controller, USB host & system controller. Signed-off-by: Conor Dooley Acked-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- .../microchip/microchip-mpfs-icicle-kit.dts | 53 ++++++ .../boot/dts/microchip/microchip-mpfs.dtsi | 160 ++++++++++++++++++ 2 files changed, 213 insertions(+) diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts index dc5f351b10c4..cd2fe80fa81a 100644 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts @@ -80,6 +80,26 @@ sd-uhs-sdr104; }; +&spi0 { + status = "okay"; +}; + +&spi1 { + status = "okay"; +}; + +&qspi { + status = "okay"; +}; + +&i2c0 { + status = "okay"; +}; + +&i2c1 { + status = "okay"; +}; + &i2c2 { status = "okay"; }; @@ -103,6 +123,39 @@ }; }; +&gpio2 { + interrupts = <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>; + status = "okay"; +}; + +&rtc { + status = "okay"; +}; + +&usb { + status = "okay"; + dr_mode = "host"; +}; + +&mbox { + status = "okay"; +}; + +&syscontroller { + status = "okay"; +}; + +&pcie { + status = "okay"; +}; + &core_pwm0 { status = "okay"; }; diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi index 5e7aaaf42cde..c5c9d1360de0 100644 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi @@ -266,6 +266,66 @@ status = "disabled"; }; + spi0: spi@20108000 { + compatible = "microchip,mpfs-spi"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x0 0x20108000 0x0 0x1000>; + interrupt-parent = <&plic>; + interrupts = <54>; + clocks = <&clkcfg CLK_SPI0>; + spi-max-frequency = <25000000>; + status = "disabled"; + }; + + spi1: spi@20109000 { + compatible = "microchip,mpfs-spi"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x0 0x20109000 0x0 0x1000>; + interrupt-parent = <&plic>; + interrupts = <55>; + clocks = <&clkcfg CLK_SPI1>; + spi-max-frequency = <25000000>; + status = "disabled"; + }; + + qspi: spi@21000000 { + compatible = "microchip,mpfs-qspi"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x0 0x21000000 0x0 0x1000>; + interrupt-parent = <&plic>; + interrupts = <85>; + clocks = <&clkcfg CLK_QSPI>; + spi-max-frequency = <25000000>; + status = "disabled"; + }; + + i2c0: i2c@2010a000 { + compatible = "microchip,mpfs-i2c", "microchip,corei2c-rtl-v7"; + reg = <0x0 0x2010a000 0x0 0x1000>; + #address-cells = <1>; + #size-cells = <0>; + interrupt-parent = <&plic>; + interrupts = <58>; + clocks = <&clkcfg CLK_I2C0>; + clock-frequency = <100000>; + status = "disabled"; + }; + + i2c1: i2c@2010b000 { + compatible = "microchip,mpfs-i2c", "microchip,corei2c-rtl-v7"; + reg = <0x0 0x2010b000 0x0 0x1000>; + #address-cells = <1>; + #size-cells = <0>; + interrupt-parent = <&plic>; + interrupts = <61>; + clocks = <&clkcfg CLK_I2C1>; + clock-frequency = <100000>; + status = "disabled"; + }; + mac0: ethernet@20110000 { compatible = "cdns,macb"; reg = <0x0 0x20110000 0x0 0x2000>; @@ -291,5 +351,105 @@ clock-names = "pclk", "hclk"; status = "disabled"; }; + + gpio0: gpio@20120000 { + compatible = "microchip,mpfs-gpio"; + reg = <0x0 0x20120000 0x0 0x1000>; + interrupt-parent = <&plic>; + interrupt-controller; + #interrupt-cells = <1>; + clocks = <&clkcfg CLK_GPIO0>; + gpio-controller; + #gpio-cells = <2>; + status = "disabled"; + }; + + gpio1: gpio@20121000 { + compatible = "microchip,mpfs-gpio"; + reg = <000 0x20121000 0x0 0x1000>; + interrupt-parent = <&plic>; + interrupt-controller; + #interrupt-cells = <1>; + clocks = <&clkcfg CLK_GPIO1>; + gpio-controller; + #gpio-cells = <2>; + status = "disabled"; + }; + + gpio2: gpio@20122000 { + compatible = "microchip,mpfs-gpio"; + reg = <0x0 0x20122000 0x0 0x1000>; + interrupt-parent = <&plic>; + interrupt-controller; + #interrupt-cells = <1>; + clocks = <&clkcfg CLK_GPIO2>; + gpio-controller; + #gpio-cells = <2>; + status = "disabled"; + }; + + rtc: rtc@20124000 { + compatible = "microchip,mpfs-rtc"; + reg = <0x0 0x20124000 0x0 0x1000>; + interrupt-parent = <&plic>; + interrupts = <80>, <81>; + clocks = <&clkcfg CLK_RTC>; + clock-names = "rtc"; + status = "disabled"; + }; + + usb: usb@20201000 { + compatible = "microchip,mpfs-musb"; + reg = <0x0 0x20201000 0x0 0x1000>; + interrupt-parent = <&plic>; + interrupts = <86>, <87>; + clocks = <&clkcfg CLK_USB>; + interrupt-names = "dma","mc"; + status = "disabled"; + }; + + pcie: pcie@2000000000 { + compatible = "microchip,pcie-host-1.0"; + #address-cells = <0x3>; + #interrupt-cells = <0x1>; + #size-cells = <0x2>; + device_type = "pci"; + reg = <0x20 0x0 0x0 0x8000000>, <0x0 0x43000000 0x0 0x10000>; + reg-names = "cfg", "apb"; + bus-range = <0x0 0x7f>; + interrupt-parent = <&plic>; + interrupts = <119>; + interrupt-map = <0 0 0 1 &pcie_intc 0>, + <0 0 0 2 &pcie_intc 1>, + <0 0 0 3 &pcie_intc 2>, + <0 0 0 4 &pcie_intc 3>; + interrupt-map-mask = <0 0 0 7>; + clocks = <&clkcfg CLK_FIC0>, <&clkcfg CLK_FIC1>, <&clkcfg CLK_FIC3>; + clock-names = "fic0", "fic1", "fic3"; + ranges = <0x3000000 0x0 0x8000000 0x20 0x8000000 0x0 0x80000000>; + msi-parent = <&pcie>; + msi-controller; + microchip,axi-m-atr0 = <0x10 0x0>; + status = "disabled"; + pcie_intc: legacy-interrupt-controller { + #address-cells = <0>; + #interrupt-cells = <1>; + interrupt-controller; + }; + }; + + mbox: mailbox@37020000 { + compatible = "microchip,mpfs-mailbox"; + reg = <0x0 0x37020000 0x0 0x1000>, <0x0 0x2000318C 0x0 0x40>; + interrupt-parent = <&plic>; + interrupts = <96>; + #mbox-cells = <1>; + status = "disabled"; + }; + + syscontroller: syscontroller { + compatible = "microchip,mpfs-sys-controller"; + mboxes = <&mbox 0>; + }; }; }; From 48e8641c2bf0a5b2a80666b2fe2b50d6b7c42709 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Mon, 14 Feb 2022 13:58:41 +0000 Subject: [PATCH 328/380] MAINTAINERS: update riscv/microchip entry Update the RISC-V/Microchip entry by adding the microchip dts directory and myself as maintainer Reviewed-by: Lewis Hanly Signed-off-by: Conor Dooley Acked-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index ea3e6c914384..779a550dc95b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16575,8 +16575,10 @@ K: riscv RISC-V/MICROCHIP POLARFIRE SOC SUPPORT M: Lewis Hanly +M: Conor Dooley L: linux-riscv@lists.infradead.org S: Supported +F: arch/riscv/boot/dts/microchip/ F: drivers/mailbox/mailbox-mpfs.c F: drivers/soc/microchip/ F: include/soc/microchip/mpfs.h From 6c2797cd51218a451a52edc2ca533f89d1a2af10 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 8 Mar 2022 10:49:58 +0100 Subject: [PATCH 329/380] s390/pci: make zpci_set_irq()/zpci_clear_irq() static Commit c1e18c17bda68 ("s390/pci: add zpci_set_irq()/zpci_clear_irq()") made zpci_set_irq()/zpci_clear_irq() non-static in preparation for using them in zpci_hot_reset_device(). The version of zpci_hot_reset_device() that was finally merged however exploits the fact that IRQs and DMA is implicitly disabled by clp_disable_fh() so the call to zpci_clear_irq() was never added. There are no other calls outside pci_irq.c so lets make both functions static. Signed-off-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/pci.h | 3 --- arch/s390/pci/pci_irq.c | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 90824be5ce9a..fdb9745ee998 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -283,9 +283,6 @@ int zpci_dma_exit_device(struct zpci_dev *zdev); int __init zpci_irq_init(void); void __init zpci_irq_exit(void); -int zpci_set_irq(struct zpci_dev *zdev); -int zpci_clear_irq(struct zpci_dev *zdev); - /* FMB */ int zpci_fmb_enable_device(struct zpci_dev *); int zpci_fmb_disable_device(struct zpci_dev *); diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index 2b6062c486f5..500cd2dbdf53 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -99,7 +99,7 @@ static int zpci_clear_directed_irq(struct zpci_dev *zdev) } /* Register adapter interruptions */ -int zpci_set_irq(struct zpci_dev *zdev) +static int zpci_set_irq(struct zpci_dev *zdev) { int rc; @@ -115,7 +115,7 @@ int zpci_set_irq(struct zpci_dev *zdev) } /* Clear adapter interruptions */ -int zpci_clear_irq(struct zpci_dev *zdev) +static int zpci_clear_irq(struct zpci_dev *zdev) { int rc; From eed38cd2f46f6c4f97c730abd8f23a19db0afbc8 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 28 Feb 2022 13:15:59 +0100 Subject: [PATCH 330/380] s390/nospec: generate single register thunks if possible Currently assembler generated expoline thunks are always in a form __s390_indirect_jump_rXuse_rX even when exrl instruction is available and no additional register is utilized. Generate __s390_indirect_jump_rX versions using a single register if the kernel is built for z10 or newer machine, which have exrl instruction available. Thunks generated are identical to the ones generated by the compiler. This helps to reduce the number of thunks for newer machines generations. Acked-by: Sumanth Korikkar Acked-by: Ilya Leoshkevich Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/nospec-insn.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h index c419c9e87265..4397eae15e34 100644 --- a/arch/s390/include/asm/nospec-insn.h +++ b/arch/s390/include/asm/nospec-insn.h @@ -31,6 +31,19 @@ _LC_BR_R1 = __LC_BR_R1 .popsection .endm +#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES + .macro __THUNK_PROLOG_BR r1,r2 + __THUNK_PROLOG_NAME __s390_indirect_jump_r\r1 + .endm + + .macro __THUNK_BR r1,r2 + jg __s390_indirect_jump_r\r1 + .endm + + .macro __THUNK_BRASL r1,r2,r3 + brasl \r1,__s390_indirect_jump_r\r2 + .endm +#else .macro __THUNK_PROLOG_BR r1,r2 __THUNK_PROLOG_NAME __s390_indirect_jump_r\r2\()use_r\r1 .endm @@ -42,6 +55,7 @@ _LC_BR_R1 = __LC_BR_R1 .macro __THUNK_BRASL r1,r2,r3 brasl \r1,__s390_indirect_jump_r\r3\()use_r\r2 .endm +#endif .macro __DECODE_RR expand,reg,ruse .set __decode_fail,1 From 1d2ad084800edad81cdc955304272742b10721c7 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 6 Mar 2022 20:56:07 +0100 Subject: [PATCH 331/380] s390/nospec: add an option to use thunk-extern Currently with -mindirect-branch=thunk and -mfunction-return=thunk compiler options expoline thunks are put into individual COMDAT group sections. s390 is the only architecture which has group sections and it has implications for kpatch and objtool tools support. Using -mindirect-branch=thunk-extern and -mfunction-return=thunk-extern is an alternative, which comes with a need to generate all required expoline thunks manually. Unfortunately modules area is too far away from the kernel image, and expolines from the kernel image cannon be used. But since all new distributions (except Debian) build kernels for machine generations newer than z10, where "exrl" instruction is available, that leaves only 16 expolines thunks possible. Provide an option to build the kernel with -mindirect-branch=thunk-extern and -mfunction-return=thunk-extern for z10 or newer. This also requires to postlink expoline thunks into all modules explicitly. Currently modules already contain most expolines anyhow. Unfortunately -mindirect-branch=thunk-extern and -mfunction-return=thunk-extern options support is broken in gcc <= 11.2. Additional compile test is required to verify proper gcc support. Acked-by: Ilya Leoshkevich Co-developed-by: Sumanth Korikkar Signed-off-by: Sumanth Korikkar Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 15 +++++++++++++++ arch/s390/Makefile | 14 +++++++++----- arch/s390/include/asm/nospec-insn.h | 10 ++++++++++ arch/s390/lib/Makefile | 1 + arch/s390/lib/expoline.S | 12 ++++++++++++ arch/s390/tools/gcc-thunk-extern.sh | 24 ++++++++++++++++++++++++ scripts/mod/modpost.c | 5 +++++ 7 files changed, 76 insertions(+), 5 deletions(-) create mode 100644 arch/s390/lib/expoline.S create mode 100755 arch/s390/tools/gcc-thunk-extern.sh diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index a492376d6e3f..115b8cddefee 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -585,6 +585,7 @@ config KERNEL_NOBP config EXPOLINE def_bool n + depends on $(cc-option,-mindirect-branch=thunk) prompt "Avoid speculative indirect branches in the kernel" help Compile the kernel with the expoline compiler options to guard @@ -595,6 +596,20 @@ config EXPOLINE If unsure, say N. +config EXPOLINE_EXTERN + def_bool n + depends on EXPOLINE + depends on HAVE_MARCH_Z10_FEATURES + depends on CC_IS_GCC && GCC_VERSION >= 110200 + depends on $(success,$(srctree)/arch/s390/tools/gcc-thunk-extern.sh $(CC)) + prompt "Generate expolines as extern functions." + help + This option is required for some tooling like kpatch. The kernel is + compiled with -mindirect-branch=thunk-extern and requires a newer + compiler. + + If unsure, say N. + choice prompt "Expoline default" depends on EXPOLINE diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 2edf25b44c4b..bd75128b7d79 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -81,14 +81,18 @@ ifneq ($(call cc-option,-mstack-size=8192 -mstack-guard=128),) endif ifdef CONFIG_EXPOLINE - ifneq ($(call cc-option,$(CC_FLAGS_MARCH) -mindirect-branch=thunk),) + ifdef CONFIG_EXPOLINE_EXTERN + KBUILD_LDFLAGS_MODULE += arch/s390/lib/expoline.o + CC_FLAGS_EXPOLINE := -mindirect-branch=thunk-extern + CC_FLAGS_EXPOLINE += -mfunction-return=thunk-extern + else CC_FLAGS_EXPOLINE := -mindirect-branch=thunk CC_FLAGS_EXPOLINE += -mfunction-return=thunk - CC_FLAGS_EXPOLINE += -mindirect-branch-table - export CC_FLAGS_EXPOLINE - cflags-y += $(CC_FLAGS_EXPOLINE) -DCC_USING_EXPOLINE - aflags-y += -DCC_USING_EXPOLINE endif + CC_FLAGS_EXPOLINE += -mindirect-branch-table + export CC_FLAGS_EXPOLINE + cflags-y += $(CC_FLAGS_EXPOLINE) -DCC_USING_EXPOLINE + aflags-y += -DCC_USING_EXPOLINE endif ifdef CONFIG_FUNCTION_TRACER diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h index 4397eae15e34..bbb5c4d84db9 100644 --- a/arch/s390/include/asm/nospec-insn.h +++ b/arch/s390/include/asm/nospec-insn.h @@ -18,7 +18,11 @@ _LC_BR_R1 = __LC_BR_R1 * the various thunks are merged into a single copy. */ .macro __THUNK_PROLOG_NAME name +#ifdef CONFIG_EXPOLINE_EXTERN + .pushsection .text,"ax",@progbits +#else .pushsection .text.\name,"axG",@progbits,\name,comdat +#endif .globl \name .hidden \name .type \name,@function @@ -115,7 +119,13 @@ _LC_BR_R1 = __LC_BR_R1 555: br \reg .endm +#ifdef CONFIG_EXPOLINE_EXTERN .macro GEN_BR_THUNK reg,ruse=%r1 + .endm + .macro GEN_BR_THUNK_EXTERN reg,ruse=%r1 +#else + .macro GEN_BR_THUNK reg,ruse=%r1 +#endif __DECODE_RR __THUNK_PROLOG_BR,\reg,\ruse __THUNK_EX_BR \reg,\ruse __THUNK_EPILOG diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 69feb8ed3312..5d415b3db6d1 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -7,6 +7,7 @@ lib-y += delay.o string.o uaccess.o find.o spinlock.o obj-y += mem.o xor.o lib-$(CONFIG_KPROBES) += probes.o lib-$(CONFIG_UPROBES) += probes.o +obj-$(CONFIG_EXPOLINE_EXTERN) += expoline.o obj-$(CONFIG_S390_KPROBES_SANITY_TEST) += test_kprobes_s390.o test_kprobes_s390-objs += test_kprobes_asm.o test_kprobes.o diff --git a/arch/s390/lib/expoline.S b/arch/s390/lib/expoline.S new file mode 100644 index 000000000000..92ed8409a7a4 --- /dev/null +++ b/arch/s390/lib/expoline.S @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include + +.macro GEN_ALL_BR_THUNK_EXTERN + .irp r1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + GEN_BR_THUNK_EXTERN %r\r1 + .endr +.endm + +GEN_ALL_BR_THUNK_EXTERN diff --git a/arch/s390/tools/gcc-thunk-extern.sh b/arch/s390/tools/gcc-thunk-extern.sh new file mode 100755 index 000000000000..20bcbf6dd7ab --- /dev/null +++ b/arch/s390/tools/gcc-thunk-extern.sh @@ -0,0 +1,24 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# Borrowed from gcc: gcc/testsuite/gcc.target/s390/nobp-section-type-conflict.c +# Checks that we don't get error: section type conflict with ‘put_page’. + +cat << "END" | $@ -x c - -fno-PIE -march=z10 -mindirect-branch=thunk-extern -mfunction-return=thunk-extern -mindirect-branch-table -O2 -c -o /dev/null +int a; +int b (void); +void c (int); + +static void +put_page (void) +{ + if (b ()) + c (a); +} + +__attribute__ ((__section__ (".init.text"), __cold__)) void +d (void) +{ + put_page (); + put_page (); +} +END diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 6bfa33217914..dbc0aaf69e43 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -658,6 +658,11 @@ static int ignore_undef_symbol(struct elf_info *info, const char *symname) strstarts(symname, "_savevr_") || strcmp(symname, ".TOC.") == 0) return 1; + + if (info->hdr->e_machine == EM_S390) + /* Expoline thunks are linked on all kernel modules during final link of .ko */ + if (strstarts(symname, "__s390_indirect_jump_r")) + return 1; /* Do not ignore this symbol */ return 0; } From 602bf1687e6f475de2fe29bb1ed81d03bdc06b6d Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 6 Mar 2022 22:30:42 +0100 Subject: [PATCH 332/380] s390/nospec: align and size extern thunks Kernel has full control over how extern thunks generated by arch/s390/lib/expoline.S look like. Align them to 16 bytes like other symbols. Also set proper symbols size which is important for tooling. Acked-by: Ilya Leoshkevich Acked-by: Sumanth Korikkar Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/nospec-insn.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h index bbb5c4d84db9..c670c1dd1293 100644 --- a/arch/s390/include/asm/nospec-insn.h +++ b/arch/s390/include/asm/nospec-insn.h @@ -20,6 +20,7 @@ _LC_BR_R1 = __LC_BR_R1 .macro __THUNK_PROLOG_NAME name #ifdef CONFIG_EXPOLINE_EXTERN .pushsection .text,"ax",@progbits + .align 16,0x07 #else .pushsection .text.\name,"axG",@progbits,\name,comdat #endif @@ -30,8 +31,11 @@ _LC_BR_R1 = __LC_BR_R1 CFI_STARTPROC .endm - .macro __THUNK_EPILOG + .macro __THUNK_EPILOG_NAME name CFI_ENDPROC +#ifdef CONFIG_EXPOLINE_EXTERN + .size \name, .-\name +#endif .popsection .endm @@ -40,6 +44,10 @@ _LC_BR_R1 = __LC_BR_R1 __THUNK_PROLOG_NAME __s390_indirect_jump_r\r1 .endm + .macro __THUNK_EPILOG_BR r1,r2 + __THUNK_EPILOG_NAME __s390_indirect_jump_r\r1 + .endm + .macro __THUNK_BR r1,r2 jg __s390_indirect_jump_r\r1 .endm @@ -52,6 +60,10 @@ _LC_BR_R1 = __LC_BR_R1 __THUNK_PROLOG_NAME __s390_indirect_jump_r\r2\()use_r\r1 .endm + .macro __THUNK_EPILOG_BR r1,r2 + __THUNK_EPILOG_NAME __s390_indirect_jump_r\r2\()use_r\r1 + .endm + .macro __THUNK_BR r1,r2 jg __s390_indirect_jump_r\r2\()use_r\r1 .endm @@ -128,7 +140,7 @@ _LC_BR_R1 = __LC_BR_R1 #endif __DECODE_RR __THUNK_PROLOG_BR,\reg,\ruse __THUNK_EX_BR \reg,\ruse - __THUNK_EPILOG + __DECODE_RR __THUNK_EPILOG_BR,\reg,\ruse .endm .macro BR_EX reg,ruse=%r1 From 432b1cc78e985d3c783f1accb2507fbf5a87583d Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 11 Feb 2022 19:22:06 +0100 Subject: [PATCH 333/380] s390/uaccess: Add copy_from/to_user_key functions Add copy_from/to_user_key functions, which perform storage key checking. These functions can be used by KVM for emulating instructions that need to be key checked. These functions differ from their non _key counterparts in include/linux/uaccess.h only in the additional key argument and must be kept in sync with those. Since the existing uaccess implementation on s390 makes use of move instructions that support having an additional access key supplied, we can implement raw_copy_from/to_user_key by enhancing the existing implementation. Signed-off-by: Janis Schoetterl-Glausch Acked-by: Heiko Carstens Reviewed-by: Christian Borntraeger Acked-by: Janosch Frank Link: https://lore.kernel.org/r/20220211182215.2730017-2-scgl@linux.ibm.com Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/uaccess.h | 22 +++++++++ arch/s390/lib/uaccess.c | 81 +++++++++++++++++++++++++-------- 2 files changed, 85 insertions(+), 18 deletions(-) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 44b18800721a..70916e26777d 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -45,6 +45,28 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n); #define INLINE_COPY_TO_USER #endif +unsigned long __must_check +_copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key); + +static __always_inline unsigned long __must_check +copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key) +{ + if (likely(check_copy_size(to, n, false))) + n = _copy_from_user_key(to, from, n, key); + return n; +} + +unsigned long __must_check +_copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned long key); + +static __always_inline unsigned long __must_check +copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned long key) +{ + if (likely(check_copy_size(from, n, true))) + n = _copy_to_user_key(to, from, n, key); + return n; +} + int __put_user_bad(void) __attribute__((noreturn)); int __get_user_bad(void) __attribute__((noreturn)); diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index f846ebe038fa..fe7803c653a2 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -60,11 +60,13 @@ static inline int copy_with_mvcos(void) #endif static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr, - unsigned long size) + unsigned long size, unsigned long key) { unsigned long tmp1, tmp2; union oac spec = { + .oac2.key = key, .oac2.as = PSW_BITS_AS_SECONDARY, + .oac2.k = 1, .oac2.a = 1, }; @@ -95,19 +97,19 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr } static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, - unsigned long size) + unsigned long size, unsigned long key) { unsigned long tmp1, tmp2; tmp1 = -256UL; asm volatile( " sacf 0\n" - "0: mvcp 0(%0,%2),0(%1),%3\n" + "0: mvcp 0(%0,%2),0(%1),%[key]\n" "7: jz 5f\n" "1: algr %0,%3\n" " la %1,256(%1)\n" " la %2,256(%2)\n" - "2: mvcp 0(%0,%2),0(%1),%3\n" + "2: mvcp 0(%0,%2),0(%1),%[key]\n" "8: jnz 1b\n" " j 5f\n" "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ @@ -116,7 +118,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, " slgr %4,%1\n" " clgr %0,%4\n" /* copy crosses next page boundary? */ " jnh 6f\n" - "4: mvcp 0(%4,%2),0(%1),%3\n" + "4: mvcp 0(%4,%2),0(%1),%[key]\n" "9: slgr %0,%4\n" " j 6f\n" "5: slgr %0,%0\n" @@ -124,24 +126,49 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b) EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : : "cc", "memory"); + : [key] "d" (key << 4) + : "cc", "memory"); return size; } +static unsigned long raw_copy_from_user_key(void *to, const void __user *from, + unsigned long n, unsigned long key) +{ + if (copy_with_mvcos()) + return copy_from_user_mvcos(to, from, n, key); + return copy_from_user_mvcp(to, from, n, key); +} + unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { - if (copy_with_mvcos()) - return copy_from_user_mvcos(to, from, n); - return copy_from_user_mvcp(to, from, n); + return raw_copy_from_user_key(to, from, n, 0); } EXPORT_SYMBOL(raw_copy_from_user); +unsigned long _copy_from_user_key(void *to, const void __user *from, + unsigned long n, unsigned long key) +{ + unsigned long res = n; + + might_fault(); + if (!should_fail_usercopy()) { + instrument_copy_from_user(to, from, n); + res = raw_copy_from_user_key(to, from, n, key); + } + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; +} +EXPORT_SYMBOL(_copy_from_user_key); + static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, - unsigned long size) + unsigned long size, unsigned long key) { unsigned long tmp1, tmp2; union oac spec = { + .oac1.key = key, .oac1.as = PSW_BITS_AS_SECONDARY, + .oac1.k = 1, .oac1.a = 1, }; @@ -172,19 +199,19 @@ static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, } static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, - unsigned long size) + unsigned long size, unsigned long key) { unsigned long tmp1, tmp2; tmp1 = -256UL; asm volatile( " sacf 0\n" - "0: mvcs 0(%0,%1),0(%2),%3\n" + "0: mvcs 0(%0,%1),0(%2),%[key]\n" "7: jz 5f\n" "1: algr %0,%3\n" " la %1,256(%1)\n" " la %2,256(%2)\n" - "2: mvcs 0(%0,%1),0(%2),%3\n" + "2: mvcs 0(%0,%1),0(%2),%[key]\n" "8: jnz 1b\n" " j 5f\n" "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ @@ -193,7 +220,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, " slgr %4,%1\n" " clgr %0,%4\n" /* copy crosses next page boundary? */ " jnh 6f\n" - "4: mvcs 0(%4,%1),0(%2),%3\n" + "4: mvcs 0(%4,%1),0(%2),%[key]\n" "9: slgr %0,%4\n" " j 6f\n" "5: slgr %0,%0\n" @@ -201,18 +228,36 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b) EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : : "cc", "memory"); + : [key] "d" (key << 4) + : "cc", "memory"); return size; } +static unsigned long raw_copy_to_user_key(void __user *to, const void *from, + unsigned long n, unsigned long key) +{ + if (copy_with_mvcos()) + return copy_to_user_mvcos(to, from, n, key); + return copy_to_user_mvcs(to, from, n, key); +} + unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) { - if (copy_with_mvcos()) - return copy_to_user_mvcos(to, from, n); - return copy_to_user_mvcs(to, from, n); + return raw_copy_to_user_key(to, from, n, 0); } EXPORT_SYMBOL(raw_copy_to_user); +unsigned long _copy_to_user_key(void __user *to, const void *from, + unsigned long n, unsigned long key) +{ + might_fault(); + if (should_fail_usercopy()) + return n; + instrument_copy_to_user(to, from, n); + return raw_copy_to_user_key(to, from, n, key); +} +EXPORT_SYMBOL(_copy_to_user_key); + static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size) { unsigned long tmp1, tmp2; From 4efd417f298bc23bc8b6ac5db5ff79af5ec92ac5 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 24 Feb 2022 22:43:31 +0100 Subject: [PATCH 334/380] s390: raise minimum supported machine generation to z10 Machine generations up to z9 (released in May 2006) have been officially out of service for several years now (z9 end of service - January 31, 2019). No distributions build kernels supporting those old machine generations anymore, except Debian, which seems to pick the oldest supported generation. The team supporting Debian on s390 has been notified about the change. Raising minimum supported machine generation to z10 helps to reduce maintenance cost and effectively remove code, which is not getting enough testing coverage due to lack of older hardware and distributions support. Besides that this unblocks some optimization opportunities and allows to use wider instruction set in asm files for future features implementation. Due to this change spectre mitigation and usercopy implementations could be drastically simplified and many newer instructions could be converted from ".insn" encoding to instruction names. Acked-by: Ilya Leoshkevich Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 68 +---------- arch/s390/Makefile | 6 - arch/s390/include/asm/bitops.h | 12 -- arch/s390/include/asm/lowcore.h | 8 +- arch/s390/include/asm/nospec-insn.h | 34 ------ arch/s390/include/asm/timex.h | 4 - arch/s390/include/asm/uaccess.h | 18 --- arch/s390/kernel/asm-offsets.c | 1 - arch/s390/kernel/cache.c | 6 - arch/s390/kernel/ftrace.c | 17 +-- arch/s390/kernel/ftrace.h | 2 - arch/s390/kernel/mcount.S | 11 -- arch/s390/kernel/module.c | 12 +- arch/s390/kernel/nospec-branch.c | 6 - arch/s390/kernel/perf_cpum_cf.c | 11 +- arch/s390/kernel/processor.c | 22 +--- arch/s390/kernel/setup.c | 1 - arch/s390/kernel/smp.c | 1 - arch/s390/kernel/uprobes.c | 16 +-- arch/s390/lib/uaccess.c | 170 ++-------------------------- arch/s390/mm/vmem.c | 8 +- arch/s390/net/bpf_jit_comp.c | 31 ++--- arch/s390/tools/gen_facilities.c | 8 -- 23 files changed, 38 insertions(+), 435 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 115b8cddefee..fe8d89176feb 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -120,7 +120,6 @@ config S390 select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS2 - select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES select DMA_OPS if PCI select DYNAMIC_FTRACE if FUNCTION_TRACER select GENERIC_ALLOCATOR @@ -230,20 +229,8 @@ source "kernel/livepatch/Kconfig" menu "Processor type and features" -config HAVE_MARCH_Z900_FEATURES - def_bool n - -config HAVE_MARCH_Z990_FEATURES - def_bool n - select HAVE_MARCH_Z900_FEATURES - -config HAVE_MARCH_Z9_109_FEATURES - def_bool n - select HAVE_MARCH_Z990_FEATURES - config HAVE_MARCH_Z10_FEATURES def_bool n - select HAVE_MARCH_Z9_109_FEATURES config HAVE_MARCH_Z196_FEATURES def_bool n @@ -269,41 +256,13 @@ choice prompt "Processor type" default MARCH_Z196 -config MARCH_Z900 - bool "IBM zSeries model z800 and z900" - select HAVE_MARCH_Z900_FEATURES - depends on $(cc-option,-march=z900) - help - Select this to enable optimizations for model z800/z900 (2064 and - 2066 series). This will enable some optimizations that are not - available on older ESA/390 (31 Bit) only CPUs. - -config MARCH_Z990 - bool "IBM zSeries model z890 and z990" - select HAVE_MARCH_Z990_FEATURES - depends on $(cc-option,-march=z990) - help - Select this to enable optimizations for model z890/z990 (2084 and - 2086 series). The kernel will be slightly faster but will not work - on older machines. - -config MARCH_Z9_109 - bool "IBM System z9" - select HAVE_MARCH_Z9_109_FEATURES - depends on $(cc-option,-march=z9-109) - help - Select this to enable optimizations for IBM System z9 (2094 and - 2096 series). The kernel will be slightly faster but will not work - on older machines. - config MARCH_Z10 bool "IBM System z10" select HAVE_MARCH_Z10_FEATURES depends on $(cc-option,-march=z10) help - Select this to enable optimizations for IBM System z10 (2097 and - 2098 series). The kernel will be slightly faster but will not work - on older machines. + Select this to enable optimizations for IBM System z10 (2097 and 2098 + series). This is the oldest machine generation currently supported. config MARCH_Z196 bool "IBM zEnterprise 114 and 196" @@ -352,15 +311,6 @@ config MARCH_Z15 endchoice -config MARCH_Z900_TUNE - def_bool TUNE_Z900 || MARCH_Z900 && TUNE_DEFAULT - -config MARCH_Z990_TUNE - def_bool TUNE_Z990 || MARCH_Z990 && TUNE_DEFAULT - -config MARCH_Z9_109_TUNE - def_bool TUNE_Z9_109 || MARCH_Z9_109 && TUNE_DEFAULT - config MARCH_Z10_TUNE def_bool TUNE_Z10 || MARCH_Z10 && TUNE_DEFAULT @@ -396,21 +346,8 @@ config TUNE_DEFAULT Tune the generated code for the target processor for which the kernel will be compiled. -config TUNE_Z900 - bool "IBM zSeries model z800 and z900" - depends on $(cc-option,-mtune=z900) - -config TUNE_Z990 - bool "IBM zSeries model z890 and z990" - depends on $(cc-option,-mtune=z990) - -config TUNE_Z9_109 - bool "IBM System z9" - depends on $(cc-option,-mtune=z9-109) - config TUNE_Z10 bool "IBM System z10" - depends on $(cc-option,-mtune=z10) config TUNE_Z196 bool "IBM zEnterprise 114 and 196" @@ -599,7 +536,6 @@ config EXPOLINE config EXPOLINE_EXTERN def_bool n depends on EXPOLINE - depends on HAVE_MARCH_Z10_FEATURES depends on CC_IS_GCC && GCC_VERSION >= 110200 depends on $(success,$(srctree)/arch/s390/tools/gcc-thunk-extern.sh $(CC)) prompt "Generate expolines as extern functions." diff --git a/arch/s390/Makefile b/arch/s390/Makefile index bd75128b7d79..7a65bca1e5af 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -36,9 +36,6 @@ CHECKFLAGS += -D__s390__ -D__s390x__ export LD_BFD -mflags-$(CONFIG_MARCH_Z900) := -march=z900 -mflags-$(CONFIG_MARCH_Z990) := -march=z990 -mflags-$(CONFIG_MARCH_Z9_109) := -march=z9-109 mflags-$(CONFIG_MARCH_Z10) := -march=z10 mflags-$(CONFIG_MARCH_Z196) := -march=z196 mflags-$(CONFIG_MARCH_ZEC12) := -march=zEC12 @@ -51,9 +48,6 @@ export CC_FLAGS_MARCH := $(mflags-y) aflags-y += $(mflags-y) cflags-y += $(mflags-y) -cflags-$(CONFIG_MARCH_Z900_TUNE) += -mtune=z900 -cflags-$(CONFIG_MARCH_Z990_TUNE) += -mtune=z990 -cflags-$(CONFIG_MARCH_Z9_109_TUNE) += -mtune=z9-109 cflags-$(CONFIG_MARCH_Z10_TUNE) += -mtune=z10 cflags-$(CONFIG_MARCH_Z196_TUNE) += -mtune=z196 cflags-$(CONFIG_MARCH_ZEC12_TUNE) += -mtune=zEC12 diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 1d40630128a5..191dc7898b0f 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -256,8 +256,6 @@ static inline bool test_bit_inv(unsigned long nr, return test_bit(nr ^ (BITS_PER_LONG - 1), ptr); } -#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES - /** * __flogr - find leftmost one * @word - The word to search @@ -376,16 +374,6 @@ static inline int fls(unsigned int word) return fls64(word); } -#else /* CONFIG_HAVE_MARCH_Z9_109_FEATURES */ - -#include -#include -#include -#include -#include - -#endif /* CONFIG_HAVE_MARCH_Z9_109_FEATURES */ - #include #include #include diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 9829d6b44a20..56002aeacabf 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -163,11 +163,9 @@ struct lowcore { __u64 gmap; /* 0x03d0 */ __u8 pad_0x03d8[0x0400-0x03d8]; /* 0x03d8 */ - /* br %r1 trampoline */ - __u16 br_r1_trampoline; /* 0x0400 */ - __u32 return_lpswe; /* 0x0402 */ - __u32 return_mcck_lpswe; /* 0x0406 */ - __u8 pad_0x040a[0x0e00-0x040a]; /* 0x040a */ + __u32 return_lpswe; /* 0x0400 */ + __u32 return_mcck_lpswe; /* 0x0404 */ + __u8 pad_0x040a[0x0e00-0x0408]; /* 0x0408 */ /* * 0xe00 contains the address of the IPL Parameter Information diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h index c670c1dd1293..62a99095f380 100644 --- a/arch/s390/include/asm/nospec-insn.h +++ b/arch/s390/include/asm/nospec-insn.h @@ -10,8 +10,6 @@ #ifdef CC_USING_EXPOLINE -_LC_BR_R1 = __LC_BR_R1 - /* * The expoline macros are used to create thunks in the same format * as gcc generates them. The 'comdat' section flag makes sure that @@ -39,7 +37,6 @@ _LC_BR_R1 = __LC_BR_R1 .popsection .endm -#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES .macro __THUNK_PROLOG_BR r1,r2 __THUNK_PROLOG_NAME __s390_indirect_jump_r\r1 .endm @@ -55,23 +52,6 @@ _LC_BR_R1 = __LC_BR_R1 .macro __THUNK_BRASL r1,r2,r3 brasl \r1,__s390_indirect_jump_r\r2 .endm -#else - .macro __THUNK_PROLOG_BR r1,r2 - __THUNK_PROLOG_NAME __s390_indirect_jump_r\r2\()use_r\r1 - .endm - - .macro __THUNK_EPILOG_BR r1,r2 - __THUNK_EPILOG_NAME __s390_indirect_jump_r\r2\()use_r\r1 - .endm - - .macro __THUNK_BR r1,r2 - jg __s390_indirect_jump_r\r2\()use_r\r1 - .endm - - .macro __THUNK_BRASL r1,r2,r3 - brasl \r1,__s390_indirect_jump_r\r3\()use_r\r2 - .endm -#endif .macro __DECODE_RR expand,reg,ruse .set __decode_fail,1 @@ -112,22 +92,8 @@ _LC_BR_R1 = __LC_BR_R1 .endm .macro __THUNK_EX_BR reg,ruse - # Be very careful when adding instructions to this macro! - # The ALTERNATIVE replacement code has a .+10 which targets - # the "br \reg" after the code has been patched. -#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES exrl 0,555f j . -#else - .ifc \reg,%r1 - ALTERNATIVE "ex %r0,_LC_BR_R1", ".insn ril,0xc60000000000,0,.+10", 35 - j . - .else - larl \ruse,555f - ex 0,0(\ruse) - j . - .endif -#endif 555: br \reg .endm diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 50d9b04ecbd1..ca9a8ab1261a 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -187,14 +187,10 @@ static inline unsigned long get_tod_clock(void) static inline unsigned long get_tod_clock_fast(void) { -#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES unsigned long clk; asm volatile("stckf %0" : "=Q" (clk) : : "cc"); return clk; -#else - return get_tod_clock(); -#endif } static inline cycles_t get_cycles(void) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 70916e26777d..e8b4b1a7ec82 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -92,8 +92,6 @@ union oac { }; }; -#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES - #define __put_get_user_asm(to, from, size, oac_spec) \ ({ \ int __rc; \ @@ -187,22 +185,6 @@ static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsign return rc; } -#else /* CONFIG_HAVE_MARCH_Z10_FEATURES */ - -static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) -{ - size = raw_copy_to_user(ptr, x, size); - return size ? -EFAULT : 0; -} - -static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) -{ - size = raw_copy_from_user(x, ptr, size); - return size ? -EFAULT : 0; -} - -#endif /* CONFIG_HAVE_MARCH_Z10_FEATURES */ - /* * These are the main single-value transfer routines. They automatically * use the right size if we just have the right pointer type. diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index c253d3faf443..7c74f0e17e5a 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -122,7 +122,6 @@ int main(void) OFFSET(__LC_LPP, lowcore, lpp); OFFSET(__LC_CURRENT_PID, lowcore, current_pid); OFFSET(__LC_GMAP, lowcore, gmap); - OFFSET(__LC_BR_R1, lowcore, br_r1_trampoline); OFFSET(__LC_LAST_BREAK, lowcore, last_break); /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ OFFSET(__LC_DUMP_REIPL, lowcore, ipib); diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c index 8a9c3bf69f48..7ee3651d00ab 100644 --- a/arch/s390/kernel/cache.c +++ b/arch/s390/kernel/cache.c @@ -70,8 +70,6 @@ void show_cacheinfo(struct seq_file *m) struct cacheinfo *cache; int idx; - if (!test_facility(34)) - return; this_cpu_ci = get_cpu_cacheinfo(cpumask_any(cpu_online_mask)); for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) { cache = this_cpu_ci->info_list + idx; @@ -131,8 +129,6 @@ int init_cache_level(unsigned int cpu) union cache_topology ct; enum cache_type ctype; - if (!test_facility(34)) - return -EOPNOTSUPP; if (!this_cpu_ci) return -EINVAL; ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0); @@ -156,8 +152,6 @@ int populate_cache_leaves(unsigned int cpu) union cache_topology ct; enum cache_type ctype; - if (!test_facility(34)) - return -EOPNOTSUPP; ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0); for (idx = 0, level = 0; level < this_cpu_ci->num_levels && idx < this_cpu_ci->num_leaves; idx++, level++) { diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index d8b96c5923c0..5e1f7bc00a25 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -58,15 +58,6 @@ asm( ); #ifdef CONFIG_EXPOLINE -asm( - " .align 16\n" - "ftrace_shared_hotpatch_trampoline_ex:\n" - " lmg %r0,%r1,2(%r1)\n" - " ex %r0," __stringify(__LC_BR_R1) "(%r0)\n" - " j .\n" - "ftrace_shared_hotpatch_trampoline_ex_end:\n" -); - asm( " .align 16\n" "ftrace_shared_hotpatch_trampoline_exrl:\n" @@ -90,12 +81,8 @@ static const char *ftrace_shared_hotpatch_trampoline(const char **end) tend = ftrace_shared_hotpatch_trampoline_br_end; #ifdef CONFIG_EXPOLINE if (!nospec_disable) { - tstart = ftrace_shared_hotpatch_trampoline_ex; - tend = ftrace_shared_hotpatch_trampoline_ex_end; - if (test_facility(35)) { /* exrl */ - tstart = ftrace_shared_hotpatch_trampoline_exrl; - tend = ftrace_shared_hotpatch_trampoline_exrl_end; - } + tstart = ftrace_shared_hotpatch_trampoline_exrl; + tend = ftrace_shared_hotpatch_trampoline_exrl_end; } #endif /* CONFIG_EXPOLINE */ if (end) diff --git a/arch/s390/kernel/ftrace.h b/arch/s390/kernel/ftrace.h index 69e416f4c6b0..7f75a9616406 100644 --- a/arch/s390/kernel/ftrace.h +++ b/arch/s390/kernel/ftrace.h @@ -16,8 +16,6 @@ extern struct ftrace_hotpatch_trampoline __ftrace_hotpatch_trampolines_start[]; extern struct ftrace_hotpatch_trampoline __ftrace_hotpatch_trampolines_end[]; extern const char ftrace_shared_hotpatch_trampoline_br[]; extern const char ftrace_shared_hotpatch_trampoline_br_end[]; -extern const char ftrace_shared_hotpatch_trampoline_ex[]; -extern const char ftrace_shared_hotpatch_trampoline_ex_end[]; extern const char ftrace_shared_hotpatch_trampoline_exrl[]; extern const char ftrace_shared_hotpatch_trampoline_exrl_end[]; extern const char ftrace_plt_template[]; diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 6ace43d7e8d2..1326927a17b5 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -35,14 +35,8 @@ ENDPROC(ftrace_stub) .if \allregs == 1 # save psw mask # don't put any instructions clobbering CC before this point -#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES epsw %r1,%r14 risbg %r14,%r1,0,31,32 -#else - epsw %r14,%r1 - sllg %r14,%r14,32 - lr %r14,%r1 -#endif .endif lgr %r1,%r15 @@ -58,12 +52,7 @@ ENDPROC(ftrace_stub) .if \allregs == 1 stg %r14,(STACK_PTREGS_PSW)(%r15) -#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES mvghi STACK_PTREGS_FLAGS(%r15),_PIF_FTRACE_FULL_REGS -#else - lghi %r14,_PIF_FTRACE_FULL_REGS - stg %r14,STACK_PTREGS_FLAGS(%r15) -#endif .else xc STACK_PTREGS_FLAGS(8,%r15),STACK_PTREGS_FLAGS(%r15) .endif diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index b032e556eeb7..c0dd72db77b8 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -517,15 +517,9 @@ int module_finalize(const Elf_Ehdr *hdr, ij = me->core_layout.base + me->arch.plt_offset + me->arch.plt_size - PLT_ENTRY_SIZE; - if (test_facility(35)) { - ij[0] = 0xc6000000; /* exrl %r0,.+10 */ - ij[1] = 0x0005a7f4; /* j . */ - ij[2] = 0x000007f1; /* br %r1 */ - } else { - ij[0] = 0x44000000 | (unsigned int) - offsetof(struct lowcore, br_r1_trampoline); - ij[1] = 0xa7f40000; /* j . */ - } + ij[0] = 0xc6000000; /* exrl %r0,.+10 */ + ij[1] = 0x0005a7f4; /* j . */ + ij[2] = 0x000007f1; /* br %r1 */ } secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index c302e0a7d38f..717bbcc056e5 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -118,12 +118,6 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end) if (thunk[0] == 0xc6 && thunk[1] == 0x00) /* exrl %r0, */ br = thunk + (*(int *)(thunk + 2)) * 2; - else if (thunk[0] == 0xc0 && (thunk[1] & 0x0f) == 0x00 && - thunk[6] == 0x44 && thunk[7] == 0x00 && - (thunk[8] & 0x0f) == 0x00 && thunk[9] == 0x00 && - (thunk[1] & 0xf0) == (thunk[8] & 0xf0)) - /* larl %rx, + ex %r0,0(%rx) */ - br = thunk + (*(int *)(thunk + 2)) * 2; else continue; if (br[0] != 0x07 || (br[1] & 0xf0) != 0xf0) diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index ee8707abdb6a..483ab5e10164 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -1451,6 +1451,8 @@ static size_t cfdiag_maxsize(struct cpumf_ctr_info *info) /* Get the CPU speed, try sampling facility first and CPU attributes second. */ static void cfdiag_get_cpu_speed(void) { + unsigned long mhz; + if (cpum_sf_avail()) { /* Sampling facility first */ struct hws_qsi_info_block si; @@ -1464,12 +1466,9 @@ static void cfdiag_get_cpu_speed(void) /* Fallback: CPU speed extract static part. Used in case * CPU Measurement Sampling Facility is turned off. */ - if (test_facility(34)) { - unsigned long mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0); - - if (mhz != -1UL) - cfdiag_cpu_speed = mhz & 0xffffffff; - } + mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0); + if (mhz != -1UL) + cfdiag_cpu_speed = mhz & 0xffffffff; } static int cfset_init(void) diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index d9d4a806979e..7a74ea5f7531 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -172,8 +172,7 @@ static void show_cpu_summary(struct seq_file *m, void *v) static int __init setup_hwcaps(void) { /* instructions named N3, "backported" to esa-mode */ - if (test_facility(0)) - elf_hwcap |= HWCAP_ESAN3; + elf_hwcap |= HWCAP_ESAN3; /* z/Architecture mode active */ elf_hwcap |= HWCAP_ZARCH; @@ -191,8 +190,7 @@ static int __init setup_hwcaps(void) elf_hwcap |= HWCAP_LDISP; /* extended-immediate */ - if (test_facility(21)) - elf_hwcap |= HWCAP_EIMM; + elf_hwcap |= HWCAP_EIMM; /* extended-translation facility 3 enhancement */ if (test_facility(22) && test_facility(30)) @@ -262,21 +260,7 @@ static int __init setup_elf_platform(void) get_cpu_id(&cpu_id); add_device_randomness(&cpu_id, sizeof(cpu_id)); switch (cpu_id.machine) { - case 0x2064: - case 0x2066: - default: /* Use "z900" as default for 64 bit kernels. */ - strcpy(elf_platform, "z900"); - break; - case 0x2084: - case 0x2086: - strcpy(elf_platform, "z990"); - break; - case 0x2094: - case 0x2096: - strcpy(elf_platform, "z9-109"); - break; - case 0x2097: - case 0x2098: + default: /* Use "z10" as default. */ strcpy(elf_platform, "z10"); break; case 0x2817: diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 05327be3a982..84e23fcc1106 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -490,7 +490,6 @@ static void __init setup_lowcore_dat_off(void) lc->spinlock_lockval = arch_spin_lockval(0); lc->spinlock_index = 0; arch_spin_lock_setup(0); - lc->br_r1_trampoline = 0x07f1; /* br %r1 */ lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); lc->preempt_count = PREEMPT_DISABLED; diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 368b58e4c2e7..127da1850b06 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -207,7 +207,6 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) lc->cpu_nr = cpu; lc->spinlock_lockval = arch_spin_lockval(cpu); lc->spinlock_index = 0; - lc->br_r1_trampoline = 0x07f1; /* br %r1 */ lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); lc->preempt_count = PREEMPT_DISABLED; diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c index bd3ef121c379..b88345ef8bd9 100644 --- a/arch/s390/kernel/uprobes.c +++ b/arch/s390/kernel/uprobes.c @@ -177,9 +177,7 @@ static void adjust_psw_addr(psw_t *psw, unsigned long len) __typeof__(*(ptr)) input; \ int __rc = 0; \ \ - if (!test_facility(34)) \ - __rc = EMU_ILLEGAL_OP; \ - else if ((u64 __force)ptr & mask) \ + if ((u64 __force)ptr & mask) \ __rc = EMU_SPECIFICATION; \ else if (get_user(input, ptr)) \ __rc = EMU_ADDRESSING; \ @@ -194,9 +192,7 @@ static void adjust_psw_addr(psw_t *psw, unsigned long len) __typeof__(ptr) __ptr = (ptr); \ int __rc = 0; \ \ - if (!test_facility(34)) \ - __rc = EMU_ILLEGAL_OP; \ - else if ((u64 __force)__ptr & mask) \ + if ((u64 __force)__ptr & mask) \ __rc = EMU_SPECIFICATION; \ else if (put_user(*(input), __ptr)) \ __rc = EMU_ADDRESSING; \ @@ -213,9 +209,7 @@ static void adjust_psw_addr(psw_t *psw, unsigned long len) __typeof__(*(ptr)) input; \ int __rc = 0; \ \ - if (!test_facility(34)) \ - __rc = EMU_ILLEGAL_OP; \ - else if ((u64 __force)ptr & mask) \ + if ((u64 __force)ptr & mask) \ __rc = EMU_SPECIFICATION; \ else if (get_user(input, ptr)) \ __rc = EMU_ADDRESSING; \ @@ -327,10 +321,6 @@ static void handle_insn_ril(struct arch_uprobe *auprobe, struct pt_regs *regs) break; case 0xc6: switch (insn->opc1) { - case 0x02: /* pfdrl */ - if (!test_facility(34)) - rc = EMU_ILLEGAL_OP; - break; case 0x04: /* cghrl */ rc = emu_cmp_ril(regs, (s16 __user *)uptr, &rx->s64); break; diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index fe7803c653a2..a37f6fdc01e8 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -8,14 +8,10 @@ * Gerald Schaefer (gerald.schaefer@de.ibm.com) */ -#include #include #include -#include #include #include -#include -#include #ifdef CONFIG_DEBUG_ENTRY void debug_user_asce(int exit) @@ -35,32 +31,8 @@ void debug_user_asce(int exit) } #endif /*CONFIG_DEBUG_ENTRY */ -#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES -static DEFINE_STATIC_KEY_FALSE(have_mvcos); - -static int __init uaccess_init(void) -{ - if (test_facility(27)) - static_branch_enable(&have_mvcos); - return 0; -} -early_initcall(uaccess_init); - -static inline int copy_with_mvcos(void) -{ - if (static_branch_likely(&have_mvcos)) - return 1; - return 0; -} -#else -static inline int copy_with_mvcos(void) -{ - return 1; -} -#endif - -static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr, - unsigned long size, unsigned long key) +static unsigned long raw_copy_from_user_key(void *to, const void __user *from, + unsigned long size, unsigned long key) { unsigned long tmp1, tmp2; union oac spec = { @@ -90,55 +62,12 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr "4: slgr %0,%0\n" "5:\n" EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b) - : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) + : "+a" (size), "+a" (from), "+a" (to), "+a" (tmp1), "=a" (tmp2) : [spec] "d" (spec.val) : "cc", "memory", "0"); return size; } -static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, - unsigned long size, unsigned long key) -{ - unsigned long tmp1, tmp2; - - tmp1 = -256UL; - asm volatile( - " sacf 0\n" - "0: mvcp 0(%0,%2),0(%1),%[key]\n" - "7: jz 5f\n" - "1: algr %0,%3\n" - " la %1,256(%1)\n" - " la %2,256(%2)\n" - "2: mvcp 0(%0,%2),0(%1),%[key]\n" - "8: jnz 1b\n" - " j 5f\n" - "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ - " lghi %3,-4096\n" - " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */ - " slgr %4,%1\n" - " clgr %0,%4\n" /* copy crosses next page boundary? */ - " jnh 6f\n" - "4: mvcp 0(%4,%2),0(%1),%[key]\n" - "9: slgr %0,%4\n" - " j 6f\n" - "5: slgr %0,%0\n" - "6: sacf 768\n" - EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b) - EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) - : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : [key] "d" (key << 4) - : "cc", "memory"); - return size; -} - -static unsigned long raw_copy_from_user_key(void *to, const void __user *from, - unsigned long n, unsigned long key) -{ - if (copy_with_mvcos()) - return copy_from_user_mvcos(to, from, n, key); - return copy_from_user_mvcp(to, from, n, key); -} - unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { return raw_copy_from_user_key(to, from, n, 0); @@ -161,8 +90,8 @@ unsigned long _copy_from_user_key(void *to, const void __user *from, } EXPORT_SYMBOL(_copy_from_user_key); -static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, - unsigned long size, unsigned long key) +static unsigned long raw_copy_to_user_key(void __user *to, const void *from, + unsigned long size, unsigned long key) { unsigned long tmp1, tmp2; union oac spec = { @@ -192,55 +121,12 @@ static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, "4: slgr %0,%0\n" "5:\n" EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b) - : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) + : "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2) : [spec] "d" (spec.val) : "cc", "memory", "0"); return size; } -static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, - unsigned long size, unsigned long key) -{ - unsigned long tmp1, tmp2; - - tmp1 = -256UL; - asm volatile( - " sacf 0\n" - "0: mvcs 0(%0,%1),0(%2),%[key]\n" - "7: jz 5f\n" - "1: algr %0,%3\n" - " la %1,256(%1)\n" - " la %2,256(%2)\n" - "2: mvcs 0(%0,%1),0(%2),%[key]\n" - "8: jnz 1b\n" - " j 5f\n" - "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ - " lghi %3,-4096\n" - " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */ - " slgr %4,%1\n" - " clgr %0,%4\n" /* copy crosses next page boundary? */ - " jnh 6f\n" - "4: mvcs 0(%4,%1),0(%2),%[key]\n" - "9: slgr %0,%4\n" - " j 6f\n" - "5: slgr %0,%0\n" - "6: sacf 768\n" - EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b) - EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) - : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : [key] "d" (key << 4) - : "cc", "memory"); - return size; -} - -static unsigned long raw_copy_to_user_key(void __user *to, const void *from, - unsigned long n, unsigned long key) -{ - if (copy_with_mvcos()) - return copy_to_user_mvcos(to, from, n, key); - return copy_to_user_mvcs(to, from, n, key); -} - unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) { return raw_copy_to_user_key(to, from, n, 0); @@ -258,7 +144,7 @@ unsigned long _copy_to_user_key(void __user *to, const void *from, } EXPORT_SYMBOL(_copy_to_user_key); -static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size) +unsigned long __clear_user(void __user *to, unsigned long size) { unsigned long tmp1, tmp2; union oac spec = { @@ -290,46 +176,4 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size : "cc", "memory", "0"); return size; } - -static inline unsigned long clear_user_xc(void __user *to, unsigned long size) -{ - unsigned long tmp1, tmp2; - - asm volatile( - " sacf 256\n" - " aghi %0,-1\n" - " jo 5f\n" - " bras %3,3f\n" - " xc 0(1,%1),0(%1)\n" - "0: aghi %0,257\n" - " la %2,255(%1)\n" /* %2 = ptr + 255 */ - " srl %2,12\n" - " sll %2,12\n" /* %2 = (ptr + 255) & -4096 */ - " slgr %2,%1\n" - " clgr %0,%2\n" /* clear crosses next page boundary? */ - " jnh 5f\n" - " aghi %2,-1\n" - "1: ex %2,0(%3)\n" - " aghi %2,1\n" - " slgr %0,%2\n" - " j 5f\n" - "2: xc 0(256,%1),0(%1)\n" - " la %1,256(%1)\n" - "3: aghi %0,-256\n" - " jnm 2b\n" - "4: ex %0,0(%3)\n" - "5: slgr %0,%0\n" - "6: sacf 768\n" - EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) - : "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2) - : : "cc", "memory"); - return size; -} - -unsigned long __clear_user(void __user *to, unsigned long size) -{ - if (copy_with_mvcos()) - return clear_user_mvcos(to, size); - return clear_user_xc(to, size); -} EXPORT_SYMBOL(__clear_user); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 72c525ce7221..c2583f921ca8 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -584,13 +584,9 @@ void __init vmem_map_init(void) __set_memory(__stext_amode31, (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT, SET_MEMORY_RO | SET_MEMORY_X); - if (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)) { - /* - * Lowcore must be executable for LPSWE - * and expoline trampoline branch instructions. - */ + /* lowcore must be executable for LPSWE */ + if (!static_key_enabled(&cpu_has_bear)) set_memory_x(0, 1); - } pr_info("Write protected kernel read-only data: %luk\n", (unsigned long)(__end_rodata - _stext) >> 10); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index e1e57a30ac66..aede9a3ca3f7 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -570,15 +570,8 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) if (nospec_uses_trampoline()) { jit->r14_thunk_ip = jit->prg; /* Generate __s390_indirect_jump_r14 thunk */ - if (test_facility(35)) { - /* exrl %r0,.+10 */ - EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10); - } else { - /* larl %r1,.+14 */ - EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14); - /* ex 0,0(%r1) */ - EMIT4_DISP(0x44000000, REG_0, REG_1, 0); - } + /* exrl %r0,.+10 */ + EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10); /* j . */ EMIT4_PCREL(0xa7f40000, 0); } @@ -589,20 +582,12 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) (is_first_pass(jit) || (jit->seen & SEEN_FUNC))) { jit->r1_thunk_ip = jit->prg; /* Generate __s390_indirect_jump_r1 thunk */ - if (test_facility(35)) { - /* exrl %r0,.+10 */ - EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10); - /* j . */ - EMIT4_PCREL(0xa7f40000, 0); - /* br %r1 */ - _EMIT2(0x07f1); - } else { - /* ex 0,S390_lowcore.br_r1_tampoline */ - EMIT4_DISP(0x44000000, REG_0, REG_0, - offsetof(struct lowcore, br_r1_trampoline)); - /* j . */ - EMIT4_PCREL(0xa7f40000, 0); - } + /* exrl %r0,.+10 */ + EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10); + /* j . */ + EMIT4_PCREL(0xa7f40000, 0); + /* br %r1 */ + _EMIT2(0x07f1); } } diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c index 606324e56e4e..530dd941d140 100644 --- a/arch/s390/tools/gen_facilities.c +++ b/arch/s390/tools/gen_facilities.c @@ -27,24 +27,16 @@ static struct facility_def facility_defs[] = { */ .name = "FACILITIES_ALS", .bits = (int[]){ -#ifdef CONFIG_HAVE_MARCH_Z900_FEATURES 0, /* N3 instructions */ 1, /* z/Arch mode installed */ -#endif -#ifdef CONFIG_HAVE_MARCH_Z990_FEATURES 18, /* long displacement facility */ -#endif -#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES 21, /* extended-immediate facility */ 25, /* store clock fast */ -#endif -#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES 27, /* mvcos */ 32, /* compare and swap and store */ 33, /* compare and swap and store 2 */ 34, /* general instructions extension */ 35, /* execute extensions */ -#endif #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES 45, /* fast-BCR, etc. */ #endif From bedc96698f482ac5904998b16ba7a800ec71e220 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 25 Feb 2022 10:18:14 +0100 Subject: [PATCH 335/380] s390/nospec: move to single register thunks Assembler generated expoline thunks were in a form __s390_indirect_jump_rXuse_rX when exrl instruction has not been available. Now with z10 as minimum supported machine generation there is no need for 2 register thunks, always generate __s390_indirect_jump_rX versions. Acked-by: Heiko Carstens Acked-by: Sumanth Korikkar Acked-by: Ilya Leoshkevich Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/nospec-insn.h | 74 +++++++++++++---------------- 1 file changed, 33 insertions(+), 41 deletions(-) diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h index 62a99095f380..2cfcd5ac3a8b 100644 --- a/arch/s390/include/asm/nospec-insn.h +++ b/arch/s390/include/asm/nospec-insn.h @@ -37,28 +37,41 @@ .popsection .endm - .macro __THUNK_PROLOG_BR r1,r2 + .macro __THUNK_PROLOG_BR r1 __THUNK_PROLOG_NAME __s390_indirect_jump_r\r1 .endm - .macro __THUNK_EPILOG_BR r1,r2 + .macro __THUNK_EPILOG_BR r1 __THUNK_EPILOG_NAME __s390_indirect_jump_r\r1 .endm - .macro __THUNK_BR r1,r2 + .macro __THUNK_BR r1 jg __s390_indirect_jump_r\r1 .endm - .macro __THUNK_BRASL r1,r2,r3 + .macro __THUNK_BRASL r1,r2 brasl \r1,__s390_indirect_jump_r\r2 .endm - .macro __DECODE_RR expand,reg,ruse + .macro __DECODE_R expand,reg .set __decode_fail,1 .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 .ifc \reg,%r\r1 + \expand \r1 + .set __decode_fail,0 + .endif + .endr + .if __decode_fail == 1 + .error "__DECODE_R failed" + .endif + .endm + + .macro __DECODE_RR expand,rsave,rtarget + .set __decode_fail,1 + .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \rsave,%r\r1 .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 - .ifc \ruse,%r\r2 + .ifc \rtarget,%r\r2 \expand \r1,\r2 .set __decode_fail,0 .endif @@ -70,68 +83,47 @@ .endif .endm - .macro __DECODE_RRR expand,rsave,rtarget,ruse - .set __decode_fail,1 - .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 - .ifc \rsave,%r\r1 - .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 - .ifc \rtarget,%r\r2 - .irp r3,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 - .ifc \ruse,%r\r3 - \expand \r1,\r2,\r3 - .set __decode_fail,0 - .endif - .endr - .endif - .endr - .endif - .endr - .if __decode_fail == 1 - .error "__DECODE_RRR failed" - .endif - .endm - - .macro __THUNK_EX_BR reg,ruse + .macro __THUNK_EX_BR reg exrl 0,555f j . 555: br \reg .endm #ifdef CONFIG_EXPOLINE_EXTERN - .macro GEN_BR_THUNK reg,ruse=%r1 + .macro GEN_BR_THUNK reg .endm - .macro GEN_BR_THUNK_EXTERN reg,ruse=%r1 + .macro GEN_BR_THUNK_EXTERN reg #else - .macro GEN_BR_THUNK reg,ruse=%r1 + .macro GEN_BR_THUNK reg #endif - __DECODE_RR __THUNK_PROLOG_BR,\reg,\ruse - __THUNK_EX_BR \reg,\ruse - __DECODE_RR __THUNK_EPILOG_BR,\reg,\ruse + __DECODE_R __THUNK_PROLOG_BR,\reg + __THUNK_EX_BR \reg + __DECODE_R __THUNK_EPILOG_BR,\reg .endm - .macro BR_EX reg,ruse=%r1 -557: __DECODE_RR __THUNK_BR,\reg,\ruse + .macro BR_EX reg +557: __DECODE_R __THUNK_BR,\reg .pushsection .s390_indirect_branches,"a",@progbits .long 557b-. .popsection .endm - .macro BASR_EX rsave,rtarget,ruse=%r1 -559: __DECODE_RRR __THUNK_BRASL,\rsave,\rtarget,\ruse + .macro BASR_EX rsave,rtarget +559: __DECODE_RR __THUNK_BRASL,\rsave,\rtarget .pushsection .s390_indirect_branches,"a",@progbits .long 559b-. .popsection .endm #else - .macro GEN_BR_THUNK reg,ruse=%r1 + .macro GEN_BR_THUNK reg .endm - .macro BR_EX reg,ruse=%r1 + .macro BR_EX reg br \reg .endm - .macro BASR_EX rsave,rtarget,ruse=%r1 + .macro BASR_EX rsave,rtarget basr \rsave,\rtarget .endm #endif /* CC_USING_EXPOLINE */ From 10bc15ba3a853723267016274b58be6c9a6e64da Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 25 Feb 2022 10:38:23 +0100 Subject: [PATCH 336/380] s390: assume stckf is always present With z10 as minimum supported machine generation the store-clock-fast facility (25) is always present and checked in als code. Drop alternatives and always use stckf. Acked-by: Ilya Leoshkevich Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 11 +++-------- arch/s390/kernel/vtime.c | 13 ++++++------- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 3781de26f207..8f15e418968a 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -98,11 +98,6 @@ _LPP_OFFSET = __LC_LPP #endif .endm - .macro STCK savearea - ALTERNATIVE ".insn s,0xb2050000,\savearea", \ - ".insn s,0xb27c0000,\savearea", 25 - .endm - /* * The TSTMSK macro generates a test-under-mask instruction by * calculating the memory offset for the specified mask value. @@ -442,7 +437,7 @@ ENDPROC(pgm_check_handler) */ .macro INT_HANDLER name,lc_old_psw,handler ENTRY(\name) - STCK __LC_INT_CLOCK + stckf __LC_INT_CLOCK stpt __LC_SYS_ENTER_TIMER STBEAR __LC_LAST_BREAK BPOFF @@ -514,7 +509,7 @@ ENTRY(psw_idle) .Lpsw_idle_stcctm: oi __LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT BPON - STCK __CLOCK_IDLE_ENTER(%r2) + stckf __CLOCK_IDLE_ENTER(%r2) stpt __TIMER_IDLE_ENTER(%r2) lpswe __SF_EMPTY(%r15) .globl psw_idle_exit @@ -526,7 +521,7 @@ ENDPROC(psw_idle) * Machine check handler routines */ ENTRY(mcck_int_handler) - STCK __LC_MCCK_CLOCK + stckf __LC_MCCK_CLOCK BPOFF la %r1,4095 # validate r1 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # validate cpu timer diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index f216a1b2f825..9436f3053b88 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -128,13 +128,12 @@ static int do_account_vtime(struct task_struct *tsk) timer = S390_lowcore.last_update_timer; clock = S390_lowcore.last_update_clock; - /* Use STORE CLOCK by default, STORE CLOCK FAST if available. */ - alternative_io("stpt %0\n .insn s,0xb2050000,%1\n", - "stpt %0\n .insn s,0xb27c0000,%1\n", - 25, - ASM_OUTPUT2("=Q" (S390_lowcore.last_update_timer), - "=Q" (S390_lowcore.last_update_clock)), - ASM_NO_INPUT_CLOBBER("cc")); + asm volatile( + " stpt %0\n" /* Store current cpu timer value */ + " stckf %1" /* Store current tod clock value */ + : "=Q" (S390_lowcore.last_update_timer), + "=Q" (S390_lowcore.last_update_clock) + : : "cc"); clock = S390_lowcore.last_update_clock - clock; timer -= S390_lowcore.last_update_timer; From 731efc9613ee073c8944d0d56616d421cf906b0b Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 25 Feb 2022 10:39:02 +0100 Subject: [PATCH 337/380] s390: convert ".insn" encoding to instruction names With z10 as minimum supported machine generation many ".insn" encodings could be now converted to instruction names. There are couple of exceptions - stfle is used from the als code built for z900 and cannot be converted - few ".insn" directives encode unsupported instruction formats The generated code is identical before/after this change. Acked-by: Ilya Leoshkevich Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/crypto/chacha-s390.S | 20 ++++++++++---------- arch/s390/include/asm/cpu_mf.h | 12 ++++++------ arch/s390/include/asm/pgtable.h | 16 ++++++++-------- arch/s390/include/asm/processor.h | 3 +-- arch/s390/include/asm/timex.h | 2 +- arch/s390/include/asm/tlbflush.h | 4 +--- arch/s390/kernel/entry.S | 4 ++-- arch/s390/kernel/ftrace.c | 2 +- arch/s390/lib/uaccess.c | 12 ++++++------ arch/s390/mm/gmap.c | 2 +- 10 files changed, 37 insertions(+), 40 deletions(-) diff --git a/arch/s390/crypto/chacha-s390.S b/arch/s390/crypto/chacha-s390.S index badf5c49717d..9b033622191c 100644 --- a/arch/s390/crypto/chacha-s390.S +++ b/arch/s390/crypto/chacha-s390.S @@ -312,7 +312,7 @@ ENTRY(chacha20_vx_4x) VPERM XC0,XC0,XC0,BEPERM VPERM XD0,XD0,XD0,BEPERM - .insn rilu,0xc20e00000000,LEN,0x40 # clgfi LEN,0x40 + clgfi LEN,0x40 jl .Ltail_4x VLM XT0,XT3,0,INP,0 @@ -339,7 +339,7 @@ ENTRY(chacha20_vx_4x) VPERM XC0,XC0,XC0,BEPERM VPERM XD0,XD0,XD0,BEPERM - .insn rilu,0xc20e00000000,LEN,0x40 # clgfi LEN,0x40 + clgfi LEN,0x40 jl .Ltail_4x VLM XT0,XT3,0,INP,0 @@ -366,7 +366,7 @@ ENTRY(chacha20_vx_4x) VPERM XC0,XC0,XC0,BEPERM VPERM XD0,XD0,XD0,BEPERM - .insn rilu,0xc20e00000000,LEN,0x40 # clgfi LEN,0x40 + clgfi LEN,0x40 jl .Ltail_4x VLM XT0,XT3,0,INP,0 @@ -472,7 +472,7 @@ ENDPROC(chacha20_vx_4x) #define T3 %v30 ENTRY(chacha20_vx) - .insn rilu,0xc20e00000000,LEN,256 # clgfi LEN,256 + clgfi LEN,256 jle chacha20_vx_4x stmg %r6,%r7,6*8(SP) @@ -725,7 +725,7 @@ ENTRY(chacha20_vx) VPERM C0,C0,C0,BEPERM VPERM D0,D0,D0,BEPERM - .insn rilu,0xc20e00000000,LEN,0x40 # clgfi LEN,0x40 + clgfi LEN,0x40 jl .Ltail_vx VAF D2,D2,T2 # +K[3]+2 @@ -754,7 +754,7 @@ ENTRY(chacha20_vx) VPERM C0,C1,C1,BEPERM VPERM D0,D1,D1,BEPERM - .insn rilu,0xc20e00000000,LEN,0x40 # clgfi LEN,0x40 + clgfi LEN,0x40 jl .Ltail_vx VLM A1,D1,0,INP,0 @@ -780,7 +780,7 @@ ENTRY(chacha20_vx) VPERM C0,C2,C2,BEPERM VPERM D0,D2,D2,BEPERM - .insn rilu,0xc20e00000000,LEN,0x40 # clgfi LEN,0x40 + clgfi LEN,0x40 jl .Ltail_vx VLM A1,D1,0,INP,0 @@ -807,7 +807,7 @@ ENTRY(chacha20_vx) VPERM C0,C3,C3,BEPERM VPERM D0,D3,D3,BEPERM - .insn rilu,0xc20e00000000,LEN,0x40 # clgfi LEN,0x40 + clgfi LEN,0x40 jl .Ltail_vx VAF D3,D2,T1 # K[3]+4 @@ -837,7 +837,7 @@ ENTRY(chacha20_vx) VPERM C0,C4,C4,BEPERM VPERM D0,D4,D4,BEPERM - .insn rilu,0xc20e00000000,LEN,0x40 # clgfi LEN,0x40 + clgfi LEN,0x40 jl .Ltail_vx VLM A1,D1,0,INP,0 @@ -864,7 +864,7 @@ ENTRY(chacha20_vx) VPERM C0,C5,C5,BEPERM VPERM D0,D5,D5,BEPERM - .insn rilu,0xc20e00000000,LEN,0x40 # clgfi LEN,0x40 + clgfi LEN,0x40 jl .Ltail_vx VLM A1,D1,0,INP,0 diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index 78bb336600bf..feaba12dbecb 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -160,7 +160,7 @@ struct hws_trailer_entry { /* Load program parameter */ static inline void lpp(void *pp) { - asm volatile(".insn s,0xb2800000,0(%0)\n":: "a" (pp) : "memory"); + asm volatile("lpp 0(%0)\n" :: "a" (pp) : "memory"); } /* Query counter information */ @@ -169,7 +169,7 @@ static inline int qctri(struct cpumf_ctr_info *info) int rc = -EINVAL; asm volatile ( - "0: .insn s,0xb28e0000,%1\n" + "0: qctri %1\n" "1: lhi %0,0\n" "2:\n" EX_TABLE(1b, 2b) @@ -183,7 +183,7 @@ static inline int lcctl(u64 ctl) int cc; asm volatile ( - " .insn s,0xb2840000,%1\n" + " lcctl %1\n" " ipm %0\n" " srl %0,28\n" : "=d" (cc) : "Q" (ctl) : "cc"); @@ -197,7 +197,7 @@ static inline int __ecctr(u64 ctr, u64 *content) int cc; asm volatile ( - " .insn rre,0xb2e40000,%0,%2\n" + " ecctr %0,%2\n" " ipm %1\n" " srl %1,28\n" : "=d" (_content), "=d" (cc) : "d" (ctr) : "cc"); @@ -247,7 +247,7 @@ static inline int qsi(struct hws_qsi_info_block *info) int cc = 1; asm volatile( - "0: .insn s,0xb2860000,%1\n" + "0: qsi %1\n" "1: lhi %0,0\n" "2:\n" EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) @@ -262,7 +262,7 @@ static inline int lsctl(struct hws_lsctl_request_block *req) cc = 1; asm volatile( - "0: .insn s,0xb2870000,0(%1)\n" + "0: lsctl 0(%1)\n" "1: ipm %0\n" " srl %0,28\n" "2:\n" diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index a3f26e3aa8b5..9df679152620 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -600,7 +600,7 @@ static inline void cspg(unsigned long *ptr, unsigned long old, unsigned long new unsigned long address = (unsigned long)ptr | 1; asm volatile( - " .insn rre,0xb98a0000,%[r1],%[address]" + " cspg %[r1],%[address]" : [r1] "+&d" (r1.pair), "+m" (*ptr) : [address] "d" (address) : "cc"); @@ -1052,7 +1052,7 @@ static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep, if (__builtin_constant_p(opt) && opt == 0) { /* Invalidation + TLB flush for the pte */ asm volatile( - " .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]" + " ipte %[r1],%[r2],0,%[m4]" : "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address), [m4] "i" (local)); return; @@ -1061,7 +1061,7 @@ static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep, /* Invalidate ptes with options + TLB flush of the ptes */ opt = opt | (asce & _ASCE_ORIGIN); asm volatile( - " .insn rrf,0xb2210000,%[r1],%[r2],%[r3],%[m4]" + " ipte %[r1],%[r2],%[r3],%[m4]" : [r2] "+a" (address), [r3] "+a" (opt) : [r1] "a" (pto), [m4] "i" (local) : "memory"); } @@ -1074,7 +1074,7 @@ static __always_inline void __ptep_ipte_range(unsigned long address, int nr, /* Invalidate a range of ptes + TLB flush of the ptes */ do { asm volatile( - " .insn rrf,0xb2210000,%[r1],%[r2],%[r3],%[m4]" + " ipte %[r1],%[r2],%[r3],%[m4]" : [r2] "+a" (address), [r3] "+a" (nr) : [r1] "a" (pto), [m4] "i" (local) : "memory"); } while (nr != 255); @@ -1535,7 +1535,7 @@ static __always_inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp, if (__builtin_constant_p(opt) && opt == 0) { /* flush without guest asce */ asm volatile( - " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]" + " idte %[r1],0,%[r2],%[m4]" : "+m" (*pmdp) : [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK)), [m4] "i" (local) @@ -1543,7 +1543,7 @@ static __always_inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp, } else { /* flush with guest asce */ asm volatile( - " .insn rrf,0xb98e0000,%[r1],%[r2],%[r3],%[m4]" + " idte %[r1],%[r3],%[r2],%[m4]" : "+m" (*pmdp) : [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK) | opt), [r3] "a" (asce), [m4] "i" (local) @@ -1562,7 +1562,7 @@ static __always_inline void __pudp_idte(unsigned long addr, pud_t *pudp, if (__builtin_constant_p(opt) && opt == 0) { /* flush without guest asce */ asm volatile( - " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]" + " idte %[r1],0,%[r2],%[m4]" : "+m" (*pudp) : [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK)), [m4] "i" (local) @@ -1570,7 +1570,7 @@ static __always_inline void __pudp_idte(unsigned long addr, pud_t *pudp, } else { /* flush with guest asce */ asm volatile( - " .insn rrf,0xb98e0000,%[r1],%[r2],%[r3],%[m4]" + " idte %[r1],%[r3],%[r2],%[m4]" : "+m" (*pudp) : [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK) | opt), [r3] "a" (asce), [m4] "i" (local) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 022cf0925e56..84ec63145325 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -225,8 +225,7 @@ static inline unsigned long __ecag(unsigned int asi, unsigned char parm) { unsigned long val; - asm volatile(".insn rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */ - : "=d" (val) : "a" (asi << 8 | parm)); + asm volatile("ecag %0,0,0(%1)" : "=d" (val) : "a" (asi << 8 | parm)); return val; } diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index ca9a8ab1261a..2cfce42aa7fc 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -148,7 +148,7 @@ struct ptff_qui { asm volatile( \ " lgr 0,%[reg0]\n" \ " lgr 1,%[reg1]\n" \ - " .insn e,0x0104\n" \ + " ptff\n" \ " ipm %[rc]\n" \ " srl %[rc],28\n" \ : [rc] "=&d" (rc), "+m" (*(struct addrtype *)reg1) \ diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 6448bb5be10c..a6e2cd89b609 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -25,9 +25,7 @@ static inline void __tlb_flush_idte(unsigned long asce) if (MACHINE_HAS_TLB_GUEST) opt |= IDTE_GUEST_ASCE; /* Global TLB flush for the mm */ - asm volatile( - " .insn rrf,0xb98e0000,0,%0,%1,0" - : : "a" (opt), "a" (asce) : "cc"); + asm volatile("idte 0,%1,%0" : : "a" (opt), "a" (asce) : "cc"); } /* diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 8f15e418968a..a601a518b569 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -226,7 +226,7 @@ ENTRY(__switch_to) aghi %r3,__TASK_pid mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task - ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40 + ALTERNATIVE "", "lpp _LPP_OFFSET", 40 BR_EX %r14 ENDPROC(__switch_to) @@ -648,7 +648,7 @@ ENTRY(mcck_int_handler) ENDPROC(mcck_int_handler) ENTRY(restart_int_handler) - ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40 + ALTERNATIVE "", "lpp _LPP_OFFSET", 40 stg %r15,__LC_SAVE_AREA_RESTART TSTMSK __LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4 jz 0f diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 5e1f7bc00a25..1852d46babb1 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -62,7 +62,7 @@ asm( " .align 16\n" "ftrace_shared_hotpatch_trampoline_exrl:\n" " lmg %r0,%r1,2(%r1)\n" - " .insn ril,0xc60000000000,%r0,0f\n" /* exrl */ + " exrl %r0,0f\n" " j .\n" "0: br %r1\n" "ftrace_shared_hotpatch_trampoline_exrl_end:\n" diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index a37f6fdc01e8..d7b3b193d108 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -45,7 +45,7 @@ static unsigned long raw_copy_from_user_key(void *to, const void __user *from, tmp1 = -4096UL; asm volatile( " lr 0,%[spec]\n" - "0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n" + "0: mvcos 0(%2),0(%1),%0\n" "6: jz 4f\n" "1: algr %0,%3\n" " slgr %1,%3\n" @@ -56,7 +56,7 @@ static unsigned long raw_copy_from_user_key(void *to, const void __user *from, " slgr %4,%1\n" " clgr %0,%4\n" /* copy crosses next page boundary? */ " jnh 5f\n" - "3: .insn ss,0xc80000000000,0(%4,%2),0(%1),0\n" + "3: mvcos 0(%2),0(%1),%4\n" "7: slgr %0,%4\n" " j 5f\n" "4: slgr %0,%0\n" @@ -104,7 +104,7 @@ static unsigned long raw_copy_to_user_key(void __user *to, const void *from, tmp1 = -4096UL; asm volatile( " lr 0,%[spec]\n" - "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n" + "0: mvcos 0(%1),0(%2),%0\n" "6: jz 4f\n" "1: algr %0,%3\n" " slgr %1,%3\n" @@ -115,7 +115,7 @@ static unsigned long raw_copy_to_user_key(void __user *to, const void *from, " slgr %4,%1\n" " clgr %0,%4\n" /* copy crosses next page boundary? */ " jnh 5f\n" - "3: .insn ss,0xc80000000000,0(%4,%1),0(%2),0\n" + "3: mvcos 0(%1),0(%2),%4\n" "7: slgr %0,%4\n" " j 5f\n" "4: slgr %0,%0\n" @@ -155,7 +155,7 @@ unsigned long __clear_user(void __user *to, unsigned long size) tmp1 = -4096UL; asm volatile( " lr 0,%[spec]\n" - "0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n" + "0: mvcos 0(%1),0(%4),%0\n" " jz 4f\n" "1: algr %0,%2\n" " slgr %1,%2\n" @@ -165,7 +165,7 @@ unsigned long __clear_user(void __user *to, unsigned long size) " slgr %3,%1\n" " clgr %0,%3\n" /* copy crosses next page boundary? */ " jnh 5f\n" - "3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n" + "3: mvcos 0(%1),0(%4),%3\n" " slgr %0,%3\n" " j 5f\n" "4: slgr %0,%0\n" diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index a57224a4c141..af03cacf34ec 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -1278,7 +1278,7 @@ static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr, static inline void gmap_idte_one(unsigned long asce, unsigned long vaddr) { asm volatile( - " .insn rrf,0xb98e0000,%0,%1,0,0" + " idte %0,0,%1" : : "a" (asce), "a" (vaddr) : "cc", "memory"); } From 63bf38ff5bc3d8cee6c6a089657876d0b669cae1 Mon Sep 17 00:00:00 2001 From: Tobias Huschle Date: Mon, 21 Feb 2022 12:55:52 +0100 Subject: [PATCH 338/380] s390/kprobes: Avoid additional kprobe in kretprobe handling So far, s390 registered a krobe on __kretprobe_trampoline which is called everytime a kretprobe fires. This kprobe would then determine the correct return address and adjust the psw accordingly, such that the kretprobe would branch to the appropriate address after completion. Some other archs handle kretprobes without such an additional kprobe. This approach is adopted to s390 with this patch. Furthermore, the __kretprobe_trampoline now uses an assembler function to correctly gather the register and psw content to be passed to the registered kretprobe handler as struct pt_regs. After completion, the register content and the psw are set based on the contents of said pt_regs struct. Note that a change to the psw address in struct pt_regs will not have an impact, as the probe will still return to the original return address of the probed function. The return address is now recovered by using the appropriate function arch_kretprobe_fixup_return. The no longer needed kprobe is removed. Reviewed-by: Vasily Gorbik Signed-off-by: Tobias Huschle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/kprobes.h | 1 + arch/s390/kernel/Makefile | 4 ++- arch/s390/kernel/kprobes.c | 38 +++++++++--------------- arch/s390/kernel/mcount.S | 51 +++++++++++++++++++++++++++------ 4 files changed, 60 insertions(+), 34 deletions(-) diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index 5eb722c984e4..598095f4b924 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -71,6 +71,7 @@ struct kprobe_ctlblk { void arch_remove_kprobe(struct kprobe *p); void __kretprobe_trampoline(void); +void trampoline_probe_handler(struct pt_regs *regs); int kprobe_fault_handler(struct pt_regs *regs, int trapnr); int kprobe_exceptions_notify(struct notifier_block *self, diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index be8007f367aa..c8d1b6aa823e 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -57,7 +57,9 @@ obj-$(CONFIG_COMPAT) += $(compat-obj-y) obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_KPROBES) += kprobes_insn_page.o -obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o +obj-$(CONFIG_KPROBES) += mcount.o +obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o +obj-$(CONFIG_FUNCTION_TRACER) += mcount.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UPROBES) += uprobes.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 7e2910e4172b..e32c14fd1282 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -372,33 +372,26 @@ static int kprobe_handler(struct pt_regs *regs) } NOKPROBE_SYMBOL(kprobe_handler); -/* - * Function return probe trampoline: - * - init_kprobes() establishes a probepoint here - * - When the probed function returns, this probe - * causes the handlers to fire - */ -static void __used kretprobe_trampoline_holder(void) +void arch_kretprobe_fixup_return(struct pt_regs *regs, + kprobe_opcode_t *correct_ret_addr) { - asm volatile(".global __kretprobe_trampoline\n" - "__kretprobe_trampoline: bcr 0,0\n"); + /* Replace fake return address with real one. */ + regs->gprs[14] = (unsigned long)correct_ret_addr; } +NOKPROBE_SYMBOL(arch_kretprobe_fixup_return); /* - * Called when the probe at kretprobe trampoline is hit + * Called from __kretprobe_trampoline */ -static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +void trampoline_probe_handler(struct pt_regs *regs) { - regs->psw.addr = __kretprobe_trampoline_handler(regs, NULL); - /* - * By returning a non-zero value, we are telling - * kprobe_handler() that we don't want the post_handler - * to run (and have re-enabled preemption) - */ - return 1; + kretprobe_trampoline_handler(regs, NULL); } NOKPROBE_SYMBOL(trampoline_probe_handler); +/* assembler function that handles the kretprobes must not be probed itself */ +NOKPROBE_SYMBOL(__kretprobe_trampoline); + /* * Called after single-stepping. p->addr is the address of the * instruction whose first byte has been replaced by the "breakpoint" @@ -551,18 +544,13 @@ int kprobe_exceptions_notify(struct notifier_block *self, } NOKPROBE_SYMBOL(kprobe_exceptions_notify); -static struct kprobe trampoline = { - .addr = (kprobe_opcode_t *) &__kretprobe_trampoline, - .pre_handler = trampoline_probe_handler -}; - int __init arch_init_kprobes(void) { - return register_kprobe(&trampoline); + return 0; } int arch_trampoline_kprobe(struct kprobe *p) { - return p->addr == (kprobe_opcode_t *) &__kretprobe_trampoline; + return 0; } NOKPROBE_SYMBOL(arch_trampoline_kprobe); diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 1326927a17b5..4786bfe02144 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -11,14 +11,6 @@ #include #include - GEN_BR_THUNK %r1 - GEN_BR_THUNK %r14 - - .section .kprobes.text, "ax" - -ENTRY(ftrace_stub) - BR_EX %r14 -ENDPROC(ftrace_stub) #define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE) #define STACK_PTREGS (STACK_FRAME_OVERHEAD) @@ -29,6 +21,17 @@ ENDPROC(ftrace_stub) /* packed stack: allocate just enough for r14, r15 and backchain */ #define TRACED_FUNC_FRAME_SIZE 24 +#ifdef CONFIG_FUNCTION_TRACER + + GEN_BR_THUNK %r1 + GEN_BR_THUNK %r14 + + .section .kprobes.text, "ax" + +ENTRY(ftrace_stub) + BR_EX %r14 +ENDPROC(ftrace_stub) + .macro ftrace_regs_entry, allregs=0 stg %r14,(__SF_GPRS+8*8)(%r15) # save traced function caller @@ -130,3 +133,35 @@ SYM_FUNC_START(return_to_handler) SYM_FUNC_END(return_to_handler) #endif +#endif /* CONFIG_FUNCTION_TRACER */ + +#ifdef CONFIG_KPROBES + +SYM_FUNC_START(__kretprobe_trampoline) + + stg %r14,(__SF_GPRS+8*8)(%r15) + lay %r15,-STACK_FRAME_SIZE(%r15) + stmg %r0,%r14,STACK_PTREGS_GPRS(%r15) + + # store original stack pointer in backchain and pt_regs + lay %r7,STACK_FRAME_SIZE(%r15) + stg %r7,__SF_BACKCHAIN(%r15) + stg %r7,STACK_PTREGS_GPRS+(15*8)(%r15) + + # store full psw + epsw %r2,%r3 + risbg %r3,%r2,0,31,32 + stg %r3,STACK_PTREGS_PSW(%r15) + larl %r1,__kretprobe_trampoline + stg %r1,STACK_PTREGS_PSW+8(%r15) + + lay %r2,STACK_PTREGS(%r15) + brasl %r14,trampoline_probe_handler + + mvc __SF_EMPTY(16,%r7),STACK_PTREGS_PSW(%r15) + lmg %r0,%r15,STACK_PTREGS_GPRS(%r15) + lpswe __SF_EMPTY(%r15) + +SYM_FUNC_END(__kretprobe_trampoline) + +#endif /* CONFIG_KPROBES */ From c65f677b62d6180cc174e06f953f7fe860adf6d1 Mon Sep 17 00:00:00 2001 From: Tobias Huschle Date: Thu, 24 Feb 2022 16:14:21 +0100 Subject: [PATCH 339/380] s390/test_unwind: add kretprobe tests Add tests to verify that s390 kretprobes maintain a correct stack chain and ensure their proper function. Reviewed-by: Vasily Gorbik Signed-off-by: Tobias Huschle Signed-off-by: Vasily Gorbik --- arch/s390/lib/test_unwind.c | 56 +++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index 07ef89eeb85a..c01f02887de4 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -132,6 +132,7 @@ static struct unwindme *unwindme; #define UWM_PGM 0x40 /* Unwind from program check handler */ #define UWM_KPROBE_ON_FTRACE 0x80 /* Unwind from kprobe handler called via ftrace. */ #define UWM_FTRACE 0x100 /* Unwind from ftrace handler. */ +#define UWM_KRETPROBE 0x200 /* Unwind kretprobe handlers. */ static __always_inline unsigned long get_psw_addr(void) { @@ -143,6 +144,55 @@ static __always_inline unsigned long get_psw_addr(void) return psw_addr; } +static int kretprobe_ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs) +{ + struct unwindme *u = unwindme; + + u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? regs : NULL, + (u->flags & UWM_SP) ? u->sp : 0); + + return 0; +} + +static noinline notrace void test_unwind_kretprobed_func(void) +{ + asm volatile(" nop\n"); +} + +static noinline void test_unwind_kretprobed_func_caller(void) +{ + test_unwind_kretprobed_func(); +} + +static int test_unwind_kretprobe(struct unwindme *u) +{ + int ret; + struct kretprobe my_kretprobe; + + if (!IS_ENABLED(CONFIG_KPROBES)) + kunit_skip(current_test, "requires CONFIG_KPROBES"); + + u->ret = -1; /* make sure kprobe is called */ + unwindme = u; + + memset(&my_kretprobe, 0, sizeof(my_kretprobe)); + my_kretprobe.handler = kretprobe_ret_handler; + my_kretprobe.maxactive = 1; + my_kretprobe.kp.addr = (kprobe_opcode_t *)test_unwind_kretprobed_func; + + ret = register_kretprobe(&my_kretprobe); + + if (ret < 0) { + kunit_err(current_test, "register_kretprobe failed %d\n", ret); + return -EINVAL; + } + + test_unwind_kretprobed_func_caller(); + unregister_kretprobe(&my_kretprobe); + unwindme = NULL; + return u->ret; +} + static int kprobe_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct unwindme *u = unwindme; @@ -254,6 +304,8 @@ static noinline int unwindme_func4(struct unwindme *u) return 0; } else if (u->flags & (UWM_PGM | UWM_KPROBE_ON_FTRACE)) { return test_unwind_kprobe(u); + } else if (u->flags & (UWM_KRETPROBE)) { + return test_unwind_kretprobe(u); } else if (u->flags & UWM_FTRACE) { return test_unwind_ftrace(u); } else { @@ -396,6 +448,10 @@ static const struct test_params param_list[] = { TEST_WITH_FLAGS(UWM_FTRACE | UWM_SP), TEST_WITH_FLAGS(UWM_FTRACE | UWM_REGS), TEST_WITH_FLAGS(UWM_FTRACE | UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_KRETPROBE), + TEST_WITH_FLAGS(UWM_KRETPROBE | UWM_SP), + TEST_WITH_FLAGS(UWM_KRETPROBE | UWM_REGS), + TEST_WITH_FLAGS(UWM_KRETPROBE | UWM_SP | UWM_REGS), }; /* From 9d1f0ec9f71780e69ceb9d91697600c747d6e02e Mon Sep 17 00:00:00 2001 From: "Michael T. Kloos" Date: Mon, 7 Mar 2022 20:03:21 -0500 Subject: [PATCH 340/380] riscv: Fixed misaligned memory access. Fixed pointer comparison. Rewrote the RISC-V memmove() assembly implementation. The previous implementation did not check memory alignment and it compared 2 pointers with a signed comparison. The misaligned memory access would cause the kernel to crash on systems that did not emulate it in firmware and did not support it in hardware. Firmware emulation is slow and may not exist. The RISC-V spec does not guarantee that support for misaligned memory accesses will exist. It should not be depended on. This patch now checks for XLEN granularity of co-alignment between the pointers. Failing that, copying is done by loading from the 2 contiguous and naturally aligned XLEN memory locations containing the overlapping XLEN sized data to be copied. The data is shifted into the correct place and binary or'ed together on each iteration. The result is then stored into the corresponding naturally aligned XLEN sized location in the destination. For unaligned data at the terminations of the regions to be copied or for copies less than (2 * XLEN) in size, byte copy is used. This patch also now uses unsigned comparison for the pointers and migrates to the newer assembler annotations from the now deprecated ones. Signed-off-by: Michael T. Kloos Signed-off-by: Palmer Dabbelt --- arch/riscv/lib/memmove.S | 352 +++++++++++++++++++++++++++++++++------ 1 file changed, 302 insertions(+), 50 deletions(-) diff --git a/arch/riscv/lib/memmove.S b/arch/riscv/lib/memmove.S index 07d1d2152ba5..e0609e1f0864 100644 --- a/arch/riscv/lib/memmove.S +++ b/arch/riscv/lib/memmove.S @@ -1,64 +1,316 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2022 Michael T. Kloos + */ #include #include -ENTRY(__memmove) -WEAK(memmove) - move t0, a0 - move t1, a1 +SYM_FUNC_START(__memmove) +SYM_FUNC_START_WEAK(memmove) + /* + * Returns + * a0 - dest + * + * Parameters + * a0 - Inclusive first byte of dest + * a1 - Inclusive first byte of src + * a2 - Length of copy n + * + * Because the return matches the parameter register a0, + * we will not clobber or modify that register. + * + * Note: This currently only works on little-endian. + * To port to big-endian, reverse the direction of shifts + * in the 2 misaligned fixup copy loops. + */ - beq a0, a1, exit_memcpy - beqz a2, exit_memcpy - srli t2, a2, 0x2 + /* Return if nothing to do */ + beq a0, a1, return_from_memmove + beqz a2, return_from_memmove - slt t3, a0, a1 - beqz t3, do_reverse + /* + * Register Uses + * Forward Copy: a1 - Index counter of src + * Reverse Copy: a4 - Index counter of src + * Forward Copy: t3 - Index counter of dest + * Reverse Copy: t4 - Index counter of dest + * Both Copy Modes: t5 - Inclusive first multibyte/aligned of dest + * Both Copy Modes: t6 - Non-Inclusive last multibyte/aligned of dest + * Both Copy Modes: t0 - Link / Temporary for load-store + * Both Copy Modes: t1 - Temporary for load-store + * Both Copy Modes: t2 - Temporary for load-store + * Both Copy Modes: a5 - dest to src alignment offset + * Both Copy Modes: a6 - Shift ammount + * Both Copy Modes: a7 - Inverse Shift ammount + * Both Copy Modes: a2 - Alternate breakpoint for unrolled loops + */ - andi a2, a2, 0x3 - li t4, 1 - beqz t2, byte_copy + /* + * Solve for some register values now. + * Byte copy does not need t5 or t6. + */ + mv t3, a0 + add t4, a0, a2 + add a4, a1, a2 -word_copy: - lw t3, 0(a1) - addi t2, t2, -1 - addi a1, a1, 4 - sw t3, 0(a0) - addi a0, a0, 4 - bnez t2, word_copy - beqz a2, exit_memcpy - j byte_copy + /* + * Byte copy if copying less than (2 * SZREG) bytes. This can + * cause problems with the bulk copy implementation and is + * small enough not to bother. + */ + andi t0, a2, -(2 * SZREG) + beqz t0, byte_copy -do_reverse: - add a0, a0, a2 - add a1, a1, a2 - andi a2, a2, 0x3 - li t4, -1 - beqz t2, reverse_byte_copy + /* + * Now solve for t5 and t6. + */ + andi t5, t3, -SZREG + andi t6, t4, -SZREG + /* + * If dest(Register t3) rounded down to the nearest naturally + * aligned SZREG address, does not equal dest, then add SZREG + * to find the low-bound of SZREG alignment in the dest memory + * region. Note that this could overshoot the dest memory + * region if n is less than SZREG. This is one reason why + * we always byte copy if n is less than SZREG. + * Otherwise, dest is already naturally aligned to SZREG. + */ + beq t5, t3, 1f + addi t5, t5, SZREG + 1: -reverse_word_copy: - addi a1, a1, -4 - addi t2, t2, -1 - lw t3, 0(a1) - addi a0, a0, -4 - sw t3, 0(a0) - bnez t2, reverse_word_copy - beqz a2, exit_memcpy + /* + * If the dest and src are co-aligned to SZREG, then there is + * no need for the full rigmarole of a full misaligned fixup copy. + * Instead, do a simpler co-aligned copy. + */ + xor t0, a0, a1 + andi t1, t0, (SZREG - 1) + beqz t1, coaligned_copy + /* Fall through to misaligned fixup copy */ -reverse_byte_copy: - addi a0, a0, -1 - addi a1, a1, -1 +misaligned_fixup_copy: + bltu a1, a0, misaligned_fixup_copy_reverse +misaligned_fixup_copy_forward: + jal t0, byte_copy_until_aligned_forward + + andi a5, a1, (SZREG - 1) /* Find the alignment offset of src (a1) */ + slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */ + sub a5, a1, t3 /* Find the difference between src and dest */ + andi a1, a1, -SZREG /* Align the src pointer */ + addi a2, t6, SZREG /* The other breakpoint for the unrolled loop*/ + + /* + * Compute The Inverse Shift + * a7 = XLEN - a6 = XLEN + -a6 + * 2s complement negation to find the negative: -a6 = ~a6 + 1 + * Add that to XLEN. XLEN = SZREG * 8. + */ + not a7, a6 + addi a7, a7, (SZREG * 8 + 1) + + /* + * Fix Misalignment Copy Loop - Forward + * load_val0 = load_ptr[0]; + * do { + * load_val1 = load_ptr[1]; + * store_ptr += 2; + * store_ptr[0 - 2] = (load_val0 >> {a6}) | (load_val1 << {a7}); + * + * if (store_ptr == {a2}) + * break; + * + * load_val0 = load_ptr[2]; + * load_ptr += 2; + * store_ptr[1 - 2] = (load_val1 >> {a6}) | (load_val0 << {a7}); + * + * } while (store_ptr != store_ptr_end); + * store_ptr = store_ptr_end; + */ + + REG_L t0, (0 * SZREG)(a1) + 1: + REG_L t1, (1 * SZREG)(a1) + addi t3, t3, (2 * SZREG) + srl t0, t0, a6 + sll t2, t1, a7 + or t2, t0, t2 + REG_S t2, ((0 * SZREG) - (2 * SZREG))(t3) + + beq t3, a2, 2f + + REG_L t0, (2 * SZREG)(a1) + addi a1, a1, (2 * SZREG) + srl t1, t1, a6 + sll t2, t0, a7 + or t2, t1, t2 + REG_S t2, ((1 * SZREG) - (2 * SZREG))(t3) + + bne t3, t6, 1b + 2: + mv t3, t6 /* Fix the dest pointer in case the loop was broken */ + + add a1, t3, a5 /* Restore the src pointer */ + j byte_copy_forward /* Copy any remaining bytes */ + +misaligned_fixup_copy_reverse: + jal t0, byte_copy_until_aligned_reverse + + andi a5, a4, (SZREG - 1) /* Find the alignment offset of src (a4) */ + slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */ + sub a5, a4, t4 /* Find the difference between src and dest */ + andi a4, a4, -SZREG /* Align the src pointer */ + addi a2, t5, -SZREG /* The other breakpoint for the unrolled loop*/ + + /* + * Compute The Inverse Shift + * a7 = XLEN - a6 = XLEN + -a6 + * 2s complement negation to find the negative: -a6 = ~a6 + 1 + * Add that to XLEN. XLEN = SZREG * 8. + */ + not a7, a6 + addi a7, a7, (SZREG * 8 + 1) + + /* + * Fix Misalignment Copy Loop - Reverse + * load_val1 = load_ptr[0]; + * do { + * load_val0 = load_ptr[-1]; + * store_ptr -= 2; + * store_ptr[1] = (load_val0 >> {a6}) | (load_val1 << {a7}); + * + * if (store_ptr == {a2}) + * break; + * + * load_val1 = load_ptr[-2]; + * load_ptr -= 2; + * store_ptr[0] = (load_val1 >> {a6}) | (load_val0 << {a7}); + * + * } while (store_ptr != store_ptr_end); + * store_ptr = store_ptr_end; + */ + + REG_L t1, ( 0 * SZREG)(a4) + 1: + REG_L t0, (-1 * SZREG)(a4) + addi t4, t4, (-2 * SZREG) + sll t1, t1, a7 + srl t2, t0, a6 + or t2, t1, t2 + REG_S t2, ( 1 * SZREG)(t4) + + beq t4, a2, 2f + + REG_L t1, (-2 * SZREG)(a4) + addi a4, a4, (-2 * SZREG) + sll t0, t0, a7 + srl t2, t1, a6 + or t2, t0, t2 + REG_S t2, ( 0 * SZREG)(t4) + + bne t4, t5, 1b + 2: + mv t4, t5 /* Fix the dest pointer in case the loop was broken */ + + add a4, t4, a5 /* Restore the src pointer */ + j byte_copy_reverse /* Copy any remaining bytes */ + +/* + * Simple copy loops for SZREG co-aligned memory locations. + * These also make calls to do byte copies for any unaligned + * data at their terminations. + */ +coaligned_copy: + bltu a1, a0, coaligned_copy_reverse + +coaligned_copy_forward: + jal t0, byte_copy_until_aligned_forward + + 1: + REG_L t1, ( 0 * SZREG)(a1) + addi a1, a1, SZREG + addi t3, t3, SZREG + REG_S t1, (-1 * SZREG)(t3) + bne t3, t6, 1b + + j byte_copy_forward /* Copy any remaining bytes */ + +coaligned_copy_reverse: + jal t0, byte_copy_until_aligned_reverse + + 1: + REG_L t1, (-1 * SZREG)(a4) + addi a4, a4, -SZREG + addi t4, t4, -SZREG + REG_S t1, ( 0 * SZREG)(t4) + bne t4, t5, 1b + + j byte_copy_reverse /* Copy any remaining bytes */ + +/* + * These are basically sub-functions within the function. They + * are used to byte copy until the dest pointer is in alignment. + * At which point, a bulk copy method can be used by the + * calling code. These work on the same registers as the bulk + * copy loops. Therefore, the register values can be picked + * up from where they were left and we avoid code duplication + * without any overhead except the call in and return jumps. + */ +byte_copy_until_aligned_forward: + beq t3, t5, 2f + 1: + lb t1, 0(a1) + addi a1, a1, 1 + addi t3, t3, 1 + sb t1, -1(t3) + bne t3, t5, 1b + 2: + jalr zero, 0x0(t0) /* Return to multibyte copy loop */ + +byte_copy_until_aligned_reverse: + beq t4, t6, 2f + 1: + lb t1, -1(a4) + addi a4, a4, -1 + addi t4, t4, -1 + sb t1, 0(t4) + bne t4, t6, 1b + 2: + jalr zero, 0x0(t0) /* Return to multibyte copy loop */ + +/* + * Simple byte copy loops. + * These will byte copy until they reach the end of data to copy. + * At that point, they will call to return from memmove. + */ byte_copy: - lb t3, 0(a1) - addi a2, a2, -1 - sb t3, 0(a0) - add a1, a1, t4 - add a0, a0, t4 - bnez a2, byte_copy + bltu a1, a0, byte_copy_reverse -exit_memcpy: - move a0, t0 - move a1, t1 - ret -END(__memmove) +byte_copy_forward: + beq t3, t4, 2f + 1: + lb t1, 0(a1) + addi a1, a1, 1 + addi t3, t3, 1 + sb t1, -1(t3) + bne t3, t4, 1b + 2: + ret + +byte_copy_reverse: + beq t4, t3, 2f + 1: + lb t1, -1(a4) + addi a4, a4, -1 + addi t4, t4, -1 + sb t1, 0(t4) + bne t4, t3, 1b + 2: + +return_from_memmove: + ret + +SYM_FUNC_END(memmove) +SYM_FUNC_END(__memmove) From 4528668ca331f7ce5999b7746657b46db5b3b785 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Wed, 9 Mar 2022 09:17:10 +0000 Subject: [PATCH 341/380] mips: cdmm: Fix refcount leak in mips_cdmm_phys_base The of_find_compatible_node() function returns a node pointer with refcount incremented, We should use of_node_put() on it when done Add the missing of_node_put() to release the refcount. Fixes: 2121aa3e2312 ("mips: cdmm: Add mti,mips-cdmm dtb node support") Signed-off-by: Miaoqian Lin Acked-by: Serge Semin Signed-off-by: Thomas Bogendoerfer --- drivers/bus/mips_cdmm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bus/mips_cdmm.c b/drivers/bus/mips_cdmm.c index 626dedd110cb..fca0d0669aa9 100644 --- a/drivers/bus/mips_cdmm.c +++ b/drivers/bus/mips_cdmm.c @@ -351,6 +351,7 @@ phys_addr_t __weak mips_cdmm_phys_base(void) np = of_find_compatible_node(NULL, NULL, "mti,mips-cdmm"); if (np) { err = of_address_to_resource(np, 0, &res); + of_node_put(np); if (!err) return res.start; } From 4d409ca3e5107a1b300d0f36550d61c5da4fd8a7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 9 Mar 2022 10:50:32 -0800 Subject: [PATCH 342/380] MIPS: boot/compressed: Use array reference for image bounds As done with other image addresses in other architectures, use an explicit flexible array instead of "address of char", which can trip bounds checking done by the compiler. Found when building with -Warray-bounds: In file included from ./include/linux/byteorder/little_endian.h:5, from ./arch/mips/include/uapi/asm/byteorder.h:15, from ./arch/mips/include/asm/bitops.h:21, from ./include/linux/bitops.h:33, from ./include/linux/kernel.h:22, from arch/mips/boot/compressed/decompress.c:13: arch/mips/boot/compressed/decompress.c: In function 'decompress_kernel': ./include/asm-generic/unaligned.h:14:8: warning: array subscript -1 is outside array bounds of 'unsigned char[1]' [-Warray-bounds] 14 | __pptr->x; \ | ~~~~~~^~~ ./include/uapi/linux/byteorder/little_endian.h:35:51: note: in definition of macro '__le32_to_cpu' 35 | #define __le32_to_cpu(x) ((__force __u32)(__le32)(x)) | ^ ./include/asm-generic/unaligned.h:32:21: note: in expansion of macro '__get_unaligned_t' 32 | return le32_to_cpu(__get_unaligned_t(__le32, p)); | ^~~~~~~~~~~~~~~~~ arch/mips/boot/compressed/decompress.c:29:37: note: while referencing '__image_end' 29 | extern unsigned char __image_begin, __image_end; | ^~~~~~~~~~~ Cc: Thomas Bogendoerfer Cc: Randy Dunlap Cc: linux-mips@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Signed-off-by: Thomas Bogendoerfer --- arch/mips/boot/compressed/decompress.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c index aae1346a509a..5b38a802e101 100644 --- a/arch/mips/boot/compressed/decompress.c +++ b/arch/mips/boot/compressed/decompress.c @@ -26,7 +26,7 @@ unsigned long free_mem_ptr; unsigned long free_mem_end_ptr; /* The linker tells us where the image is. */ -extern unsigned char __image_begin, __image_end; +extern unsigned char __image_begin[], __image_end[]; /* debug interfaces */ #ifdef CONFIG_DEBUG_ZBOOT @@ -91,9 +91,9 @@ void decompress_kernel(unsigned long boot_heap_start) { unsigned long zimage_start, zimage_size; - zimage_start = (unsigned long)(&__image_begin); - zimage_size = (unsigned long)(&__image_end) - - (unsigned long)(&__image_begin); + zimage_start = (unsigned long)(__image_begin); + zimage_size = (unsigned long)(__image_end) - + (unsigned long)(__image_begin); puts("zimage at: "); puthex(zimage_start); @@ -121,7 +121,7 @@ void decompress_kernel(unsigned long boot_heap_start) dtb_size = fdt_totalsize((void *)&__appended_dtb); /* last four bytes is always image size in little endian */ - image_size = get_unaligned_le32((void *)&__image_end - 4); + image_size = get_unaligned_le32((void *)__image_end - 4); /* The device tree's address must be properly aligned */ image_size = ALIGN(image_size, STRUCT_ALIGNMENT); From b847bd64ea9f484510e27065cb2bccc58d9b829b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 9 Mar 2022 14:09:39 -0800 Subject: [PATCH 343/380] MIPS: Only use current_stack_pointer on GCC Unfortunately, Clang did not have support for "sp" as a global register definition, and was crashing after the addition of current_stack_pointer. This has been fixed in Clang 14, but earlier Clang versions need to avoid this code, so add a versioned test and revert back to the open-coded asm instances. Fixes Clang build error: fatal error: error in backend: Invalid register name global variable Fixes: 200ed341b864 ("mips: Implement "current_stack_pointer"") Reported-by: Nathan Chancellor Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/lkml/YikTQRql+il3HbrK@dev-arch.thelio-3990X Cc: Thomas Bogendoerfer Cc: Marc Zyngier Cc: Guenter Roeck Cc: Yanteng Si Cc: linux-mips@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: Nick Desaulniers Signed-off-by: Thomas Bogendoerfer --- arch/mips/Kconfig | 2 +- arch/mips/include/asm/thread_info.h | 2 ++ arch/mips/kernel/irq.c | 3 ++- arch/mips/lib/uncached.c | 4 +++- 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 3f58b45fc953..0dae5f1e61cc 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -4,7 +4,7 @@ config MIPS default y select ARCH_32BIT_OFF_T if !64BIT select ARCH_BINFMT_ELF_STATE if MIPS_FP_SUPPORT - select ARCH_HAS_CURRENT_STACK_POINTER + select ARCH_HAS_CURRENT_STACK_POINTER if !CC_IS_CLANG || CLANG_VERSION >= 140000 select ARCH_HAS_DEBUG_VIRTUAL if !64BIT select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_KCOV diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index 4463348d2372..ecae7470faa4 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -69,7 +69,9 @@ static inline struct thread_info *current_thread_info(void) return __current_thread_info; } +#ifdef CONFIG_ARCH_HAS_CURRENT_STACK_POINTER register unsigned long current_stack_pointer __asm__("sp"); +#endif #endif /* !__ASSEMBLY__ */ diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c index fc313c49a417..5e11582fe308 100644 --- a/arch/mips/kernel/irq.c +++ b/arch/mips/kernel/irq.c @@ -75,8 +75,9 @@ void __init init_IRQ(void) #ifdef CONFIG_DEBUG_STACKOVERFLOW static inline void check_stack_overflow(void) { - unsigned long sp = current_stack_pointer; + unsigned long sp; + __asm__ __volatile__("move %0, $sp" : "=r" (sp)); sp &= THREAD_MASK; /* diff --git a/arch/mips/lib/uncached.c b/arch/mips/lib/uncached.c index f8d4ca046c3e..f80a67c092b6 100644 --- a/arch/mips/lib/uncached.c +++ b/arch/mips/lib/uncached.c @@ -40,7 +40,9 @@ unsigned long run_uncached(void *func) register long ret __asm__("$2"); long lfunc = (long)func, ufunc; long usp; - long sp = current_stack_pointer; + long sp; + + __asm__("move %0, $sp" : "=r" (sp)); if (sp >= (long)CKSEG0 && sp < (long)CKSEG2) usp = CKSEG1ADDR(sp); From 8755d57ba1ff910666572fab9e32890e8cc6ed3b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 11 Mar 2022 20:20:26 -0800 Subject: [PATCH 344/380] MIPS: RB532: fix return value of __setup handler __setup() handlers should return 1 to obsolete_checksetup() in init/main.c to indicate that the boot option has been handled. A return of 0 causes the boot option/value to be listed as an Unknown kernel parameter and added to init's (limited) argument or environment strings. Also, error return codes don't mean anything to obsolete_checksetup() -- only non-zero (usually 1) or zero. So return 1 from setup_kmac(). Fixes: 9e21c7e40b7e ("MIPS: RB532: Replace parse_mac_addr() with mac_pton().") Fixes: 73b4390fb234 ("[MIPS] Routerboard 532: Support for base system") Signed-off-by: Randy Dunlap From: Igor Zhbanov Link: lore.kernel.org/r/64644a2f-4a20-bab3-1e15-3b2cdd0defe3@omprussia.ru Cc: Thomas Bogendoerfer Cc: linux-mips@vger.kernel.org Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Phil Sutter Cc: Florian Fainelli Cc: Ralf Baechle Cc: Daniel Walter Signed-off-by: Thomas Bogendoerfer --- arch/mips/rb532/devices.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c index 04684990e28e..b7f6f782d9a1 100644 --- a/arch/mips/rb532/devices.c +++ b/arch/mips/rb532/devices.c @@ -301,11 +301,9 @@ static int __init plat_setup_devices(void) static int __init setup_kmac(char *s) { printk(KERN_INFO "korina mac = %s\n", s); - if (!mac_pton(s, korina_dev0_data.mac)) { + if (!mac_pton(s, korina_dev0_data.mac)) printk(KERN_ERR "Invalid mac\n"); - return -EINVAL; - } - return 0; + return 1; } __setup("kmac=", setup_kmac); From 2bc5bab9a763d520937e4f3fe8df51c6a1eceb97 Mon Sep 17 00:00:00 2001 From: Yaliang Wang Date: Thu, 10 Mar 2022 19:31:16 +0800 Subject: [PATCH 345/380] MIPS: pgalloc: fix memory leak caused by pgd_free() pgd page is freed by generic implementation pgd_free() since commit f9cb654cb550 ("asm-generic: pgalloc: provide generic pgd_free()"), however, there are scenarios that the system uses more than one page as the pgd table, in such cases the generic implementation pgd_free() won't be applicable anymore. For example, when PAGE_SIZE_4KB is enabled and MIPS_VA_BITS_48 is not enabled in a 64bit system, the macro "PGD_ORDER" will be set as "1", which will cause allocating two pages as the pgd table. Well, at the same time, the generic implementation pgd_free() just free one pgd page, which will result in the memory leak. The memory leak can be easily detected by executing shell command: "while true; do ls > /dev/null; grep MemFree /proc/meminfo; done" Fixes: f9cb654cb550 ("asm-generic: pgalloc: provide generic pgd_free()") Signed-off-by: Yaliang Wang Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/pgalloc.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index c7925d0e9874..867e9c3db76e 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -15,6 +15,7 @@ #define __HAVE_ARCH_PMD_ALLOC_ONE #define __HAVE_ARCH_PUD_ALLOC_ONE +#define __HAVE_ARCH_PGD_FREE #include static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, @@ -48,6 +49,11 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) extern void pgd_init(unsigned long page); extern pgd_t *pgd_alloc(struct mm_struct *mm); +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + free_pages((unsigned long)pgd, PGD_ORDER); +} + #define __pte_free_tlb(tlb,pte,address) \ do { \ pgtable_pte_page_dtor(pte); \ From 89fa1268932420c64155bb4de9a9252a8c9e475d Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 10 Mar 2022 14:50:10 +0800 Subject: [PATCH 346/380] MIPS: Remove redundant check in device_tree_init() In device_tree_init(), unflatten_and_copy_device_tree() checks initial_boot_params, so remove the redundant check. drivers/of/fdt.c void __init unflatten_and_copy_device_tree(void) { int size; void *dt; if (!initial_boot_params) { pr_warn("No valid device tree found, continuing without\n"); return; } ... } Signed-off-by: Tiezhu Yang Signed-off-by: Thomas Bogendoerfer --- arch/mips/loongson64/setup.c | 3 --- arch/mips/pic32/pic32mzda/init.c | 3 --- 2 files changed, 6 deletions(-) diff --git a/arch/mips/loongson64/setup.c b/arch/mips/loongson64/setup.c index 6fe3ffffcaa6..1973eb091e10 100644 --- a/arch/mips/loongson64/setup.c +++ b/arch/mips/loongson64/setup.c @@ -39,8 +39,5 @@ void __init plat_mem_setup(void) void __init device_tree_init(void) { - if (!initial_boot_params) - return; - unflatten_and_copy_device_tree(); } diff --git a/arch/mips/pic32/pic32mzda/init.c b/arch/mips/pic32/pic32mzda/init.c index 764f2d022fae..a1860f2138dc 100644 --- a/arch/mips/pic32/pic32mzda/init.c +++ b/arch/mips/pic32/pic32mzda/init.c @@ -80,9 +80,6 @@ void __init prom_init(void) void __init device_tree_init(void) { - if (!initial_boot_params) - return; - unflatten_and_copy_device_tree(); } From cd04d58e829dd074b0af1c1556c5130227f9f23a Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 10 Mar 2022 14:50:11 +0800 Subject: [PATCH 347/380] MIPS: Remove redundant definitions of device_tree_init() There exists many same definitions of device_tree_init() for various platforms, add a weak function in arch/mips/kernel/prom.c to clean up the related code. Signed-off-by: Tiezhu Yang Signed-off-by: Thomas Bogendoerfer --- arch/mips/ath79/setup.c | 5 ----- arch/mips/kernel/prom.c | 5 +++++ arch/mips/lantiq/prom.c | 5 ----- arch/mips/loongson64/setup.c | 5 ----- arch/mips/mti-malta/Makefile | 1 - arch/mips/mti-malta/malta-dt.c | 15 --------------- arch/mips/pic32/pic32mzda/init.c | 5 ----- arch/mips/ralink/of.c | 5 ----- 8 files changed, 5 insertions(+), 41 deletions(-) delete mode 100644 arch/mips/mti-malta/malta-dt.c diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c index 45d4d717e47c..4e18cdcf65a0 100644 --- a/arch/mips/ath79/setup.c +++ b/arch/mips/ath79/setup.c @@ -269,8 +269,3 @@ void __init arch_init_irq(void) { irqchip_init(); } - -void __init device_tree_init(void) -{ - unflatten_and_copy_device_tree(); -} diff --git a/arch/mips/kernel/prom.c b/arch/mips/kernel/prom.c index 6abebd57b218..7db6ff9aed7d 100644 --- a/arch/mips/kernel/prom.c +++ b/arch/mips/kernel/prom.c @@ -64,4 +64,9 @@ int __init __dt_register_buses(const char *bus0, const char *bus1) return 0; } +void __weak __init device_tree_init(void) +{ + unflatten_and_copy_device_tree(); +} + #endif diff --git a/arch/mips/lantiq/prom.c b/arch/mips/lantiq/prom.c index bc9f58fcbdf9..c731082a0c42 100644 --- a/arch/mips/lantiq/prom.c +++ b/arch/mips/lantiq/prom.c @@ -84,11 +84,6 @@ void __init plat_mem_setup(void) __dt_setup_arch(dtb); } -void __init device_tree_init(void) -{ - unflatten_and_copy_device_tree(); -} - void __init prom_init(void) { /* call the soc specific detetcion code and get it to fill soc_info */ diff --git a/arch/mips/loongson64/setup.c b/arch/mips/loongson64/setup.c index 1973eb091e10..3cd11c2b308b 100644 --- a/arch/mips/loongson64/setup.c +++ b/arch/mips/loongson64/setup.c @@ -36,8 +36,3 @@ void __init plat_mem_setup(void) if (loongson_fdt_blob) __dt_setup_arch(loongson_fdt_blob); } - -void __init device_tree_init(void) -{ - unflatten_and_copy_device_tree(); -} diff --git a/arch/mips/mti-malta/Makefile b/arch/mips/mti-malta/Makefile index 94c11f5eac74..13bbd12bfa65 100644 --- a/arch/mips/mti-malta/Makefile +++ b/arch/mips/mti-malta/Makefile @@ -6,7 +6,6 @@ # Copyright (C) 2008 Wind River Systems, Inc. # written by Ralf Baechle # -obj-y += malta-dt.o obj-y += malta-dtshim.o obj-y += malta-init.o obj-y += malta-int.o diff --git a/arch/mips/mti-malta/malta-dt.c b/arch/mips/mti-malta/malta-dt.c deleted file mode 100644 index d045c9149418..000000000000 --- a/arch/mips/mti-malta/malta-dt.c +++ /dev/null @@ -1,15 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2015 Imagination Technologies - * Author: Paul Burton - */ - -#include -#include -#include -#include - -void __init device_tree_init(void) -{ - unflatten_and_copy_device_tree(); -} diff --git a/arch/mips/pic32/pic32mzda/init.c b/arch/mips/pic32/pic32mzda/init.c index a1860f2138dc..129915616763 100644 --- a/arch/mips/pic32/pic32mzda/init.c +++ b/arch/mips/pic32/pic32mzda/init.c @@ -78,11 +78,6 @@ void __init prom_init(void) pic32_init_cmdline((int)fw_arg0, (char **)fw_arg1); } -void __init device_tree_init(void) -{ - unflatten_and_copy_device_tree(); -} - static struct pic32_sdhci_platform_data sdhci_data = { .setup_dma = pic32_set_sdhci_adma_fifo_threshold, }; diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c index 35a87a2da10b..587c7b998769 100644 --- a/arch/mips/ralink/of.c +++ b/arch/mips/ralink/of.c @@ -48,11 +48,6 @@ __iomem void *plat_of_remap_node(const char *node) return ioremap(res.start, resource_size(&res)); } -void __init device_tree_init(void) -{ - unflatten_and_copy_device_tree(); -} - void __init plat_mem_setup(void) { void *dtb; From 588d08201c2d91c0b7408eb1deb73f6c8fce9ea3 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 10 Mar 2022 14:50:12 +0800 Subject: [PATCH 348/380] MIPS: Fix wrong comments in asm/prom.h In arch/mips/include/asm/prom.h, it should use "!CONFIG_USE_OF" after #else and #endif. Signed-off-by: Tiezhu Yang Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/prom.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/include/asm/prom.h b/arch/mips/include/asm/prom.h index c42e07671934..2d74406089d7 100644 --- a/arch/mips/include/asm/prom.h +++ b/arch/mips/include/asm/prom.h @@ -20,9 +20,9 @@ struct boot_param_header; extern void __dt_setup_arch(void *bph); extern int __dt_register_buses(const char *bus0, const char *bus1); -#else /* CONFIG_OF */ +#else /* !CONFIG_USE_OF */ static inline void device_tree_init(void) { } -#endif /* CONFIG_OF */ +#endif /* !CONFIG_USE_OF */ extern char *mips_get_machine_name(void); extern void mips_set_machine_name(const char *name); From d64e3eab75a8e1e900c0fda2410a2df8893d8f85 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 12 Mar 2022 22:59:36 -0800 Subject: [PATCH 349/380] powerpc/xive: fix return value of __setup handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit __setup() handlers should return 1 to obsolete_checksetup() in init/main.c to indicate that the boot option has been handled. A return of 0 causes the boot option/value to be listed as an Unknown kernel parameter and added to init's (limited) argument or environment strings. Also, error return codes don't mean anything to obsolete_checksetup() -- only non-zero (usually 1) or zero. So return 1 from xive_off() and xive_store_eoi_cmdline(). Fixes: 243e25112d06 ("powerpc/xive: Native exploitation of the XIVE interrupt controller") Fixes: c21ee04f11ae ("powerpc/xive: Add a kernel parameter for StoreEOI") [lore.kernel.org/r/64644a2f-4a20-bab3-1e15-3b2cdd0defe3@omprussia.ru] Reported-by: Igor Zhbanov : Signed-off-by: Randy Dunlap Reviewed-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220313065936.4363-1-rdunlap@infradead.org --- arch/powerpc/sysdev/xive/common.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 32863b4daf72..bb5bda6b2357 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1708,20 +1708,20 @@ __be32 *xive_queue_page_alloc(unsigned int cpu, u32 queue_shift) static int __init xive_off(char *arg) { xive_cmdline_disabled = true; - return 0; + return 1; } __setup("xive=off", xive_off); static int __init xive_store_eoi_cmdline(char *arg) { if (!arg) - return -EINVAL; + return 1; if (strncmp(arg, "off", 3) == 0) { pr_info("StoreEOI disabled on kernel command line\n"); xive_store_eoi = false; } - return 0; + return 1; } __setup("xive.store-eoi=", xive_store_eoi_cmdline); From 9d71165d3934e607070c4e48458c0cf161b1baea Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 11 Mar 2022 12:47:33 +1000 Subject: [PATCH 350/380] powerpc/tm: Fix more userspace r13 corruption Commit cf13435b730a ("powerpc/tm: Fix userspace r13 corruption") fixes a problem in treclaim where a SLB miss can occur on the thread_struct->ckpt_regs while SCRATCH0 is live with the saved user r13 value, clobbering it with the kernel r13 and ultimately resulting in kernel r13 being stored in ckpt_regs. There is an equivalent problem in trechkpt where the user r13 value is loaded into r13 from chkpt_regs to be recheckpointed, but a SLB miss could occur on ckpt_regs accesses after that, which will result in r13 being clobbered with a kernel value and that will get recheckpointed and then restored to user registers. The same memory page is accessed right before this critical window where a SLB miss could cause corruption, so hitting the bug requires the SLB entry be removed within a small window of instructions, which is possible if a SLB related MCE hits there. PAPR also permits the hypervisor to discard this SLB entry (because slb_shadow->persistent is only set to SLB_NUM_BOLTED) although it's not known whether any implementations would do this (KVM does not). So this is an extremely unlikely bug, only found by inspection. Fix this by also storing user r13 in a temporary location on the kernel stack and don't change the r13 register from kernel r13 until the RI=0 critical section that does not fault. The SCRATCH0 change is not strictly part of the fix, it's only used in the RI=0 section so it does not have the same problem as the previous SCRATCH0 bug. Fixes: 98ae22e15b43 ("powerpc: Add helper functions for transactional memory context switching") Cc: stable@vger.kernel.org # v3.9+ Signed-off-by: Nicholas Piggin Acked-by: Michael Neuling Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220311024733.48926-1-npiggin@gmail.com --- arch/powerpc/kernel/tm.S | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 3beecc32940b..5a0f023a26e9 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -443,7 +443,8 @@ restore_gprs: REST_GPR(0, r7) /* GPR0 */ REST_GPRS(2, 4, r7) /* GPR2-4 */ - REST_GPRS(8, 31, r7) /* GPR8-31 */ + REST_GPRS(8, 12, r7) /* GPR8-12 */ + REST_GPRS(14, 31, r7) /* GPR14-31 */ /* Load up PPR and DSCR here so we don't run with user values for long */ mtspr SPRN_DSCR, r5 @@ -479,18 +480,24 @@ restore_gprs: REST_GPR(6, r7) /* - * Store r1 and r5 on the stack so that we can access them after we - * clear MSR RI. + * Store user r1 and r5 and r13 on the stack (in the unused save + * areas / compiler reserved areas), so that we can access them after + * we clear MSR RI. */ REST_GPR(5, r7) std r5, -8(r1) - ld r5, GPR1(r7) + ld r5, GPR13(r7) std r5, -16(r1) + ld r5, GPR1(r7) + std r5, -24(r1) REST_GPR(7, r7) - /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */ + /* Stash the stack pointer away for use after recheckpoint */ + std r1, PACAR1(r13) + + /* Clear MSR RI since we are about to clobber r13. EE is already off */ li r5, 0 mtmsrd r5, 1 @@ -501,9 +508,9 @@ restore_gprs: * until we turn MSR RI back on. */ - SET_SCRATCH0(r1) ld r5, -8(r1) - ld r1, -16(r1) + ld r13, -16(r1) + ld r1, -24(r1) /* Commit register state as checkpointed state: */ TRECHKPT @@ -519,9 +526,9 @@ restore_gprs: */ GET_PACA(r13) - GET_SCRATCH0(r1) + ld r1, PACAR1(r13) - /* R1 is restored, so we are recoverable again. EE is still off */ + /* R13, R1 is restored, so we are recoverable again. EE is still off */ li r4, MSR_RI mtmsrd r4, 1 From cf74ff52e352112be78c4c4c3637a37ec36a6608 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 25 Jan 2022 00:39:29 +1000 Subject: [PATCH 351/380] powerpc/time: Fix KVM host re-arming a timer beyond decrementer range If the next host timer is beyond decrementer range, timer_rearm_host_dec will leave decrementer not programmed. This will not cause a problem for the host it will just set the decrementer correctly when the decrementer interrupt hits, it seems safer not to leave the next host decrementer interrupt timing able to be influenced by a guest. This code is only used in the P9 KVM paths so it's unlikely to be hit practically unless large decrementer is force disabled in the host. Fixes: 25aa145856cd ("powerpc/time: add API for KVM to re-arm the host timer/decrementer") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220124143930.3923442-2-npiggin@gmail.com --- arch/powerpc/kernel/time.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 958e2929776f..0bb80f0bd0a6 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -582,8 +582,9 @@ void timer_rearm_host_dec(u64 now) local_paca->irq_happened |= PACA_IRQ_DEC; } else { now = *next_tb - now; - if (now <= decrementer_max) - set_dec_or_work(now); + if (now > decrementer_max) + now = decrementer_max; + set_dec_or_work(now); } } EXPORT_SYMBOL_GPL(timer_rearm_host_dec); From 35de589cb8793573ed56a915af9cb4b5f15ad7d7 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 25 Jan 2022 00:39:30 +1000 Subject: [PATCH 352/380] powerpc/time: improve decrementer clockevent processing The stop/shutdown op should not use decrementer_set_next_event because that sets decrementers_next_tb to now + decrementer_max, which means a decrementer interrupt that occurs after that time will call the clockevent event handler unexpectedly. Set next_tb to ~0 here to prevent any clock event call. Init all clockevents to stopped. Then the decrementer clockevent device always has event_handler set and applicable because we know the clock event device was not stopped. So make this call unconditional to show that it is always called. next_tb need not be set to ~0 before the event handler is called because it will stop the clockevent device if there is no other timer. Finally, the timer broadcast interrupt should not modify next_tb because it is not involved with the local decrementer clockevent on this CPU. This doesn't fix a known bug, just tidies the code. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220124143930.3923442-3-npiggin@gmail.com --- arch/powerpc/kernel/time.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 0bb80f0bd0a6..f5cbfe5efd25 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -107,7 +107,12 @@ struct clock_event_device decrementer_clockevent = { }; EXPORT_SYMBOL(decrementer_clockevent); -DEFINE_PER_CPU(u64, decrementers_next_tb); +/* + * This always puts next_tb beyond now, so the clock event will never fire + * with the usual comparison, no need for a separate test for stopped. + */ +#define DEC_CLOCKEVENT_STOPPED ~0ULL +DEFINE_PER_CPU(u64, decrementers_next_tb) = DEC_CLOCKEVENT_STOPPED; EXPORT_SYMBOL_GPL(decrementers_next_tb); static DEFINE_PER_CPU(struct clock_event_device, decrementers); @@ -645,9 +650,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) now = get_tb(); if (now >= *next_tb) { - *next_tb = ~(u64)0; - if (evt->event_handler) - evt->event_handler(evt); + evt->event_handler(evt); __this_cpu_inc(irq_stat.timer_irqs_event); } else { now = *next_tb - now; @@ -666,9 +669,6 @@ EXPORT_SYMBOL(timer_interrupt); #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST void timer_broadcast_interrupt(void) { - u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); - - *next_tb = ~(u64)0; tick_receive_broadcast(); __this_cpu_inc(irq_stat.broadcast_irqs_event); } @@ -894,7 +894,9 @@ static int decrementer_set_next_event(unsigned long evt, static int decrementer_shutdown(struct clock_event_device *dev) { - decrementer_set_next_event(decrementer_max, dev); + __this_cpu_write(decrementers_next_tb, DEC_CLOCKEVENT_STOPPED); + set_dec_or_work(decrementer_max); + return 0; } From f406f2d03e07afc199dd8cf501f361dde6be8a69 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Wed, 16 Mar 2022 02:04:17 -0700 Subject: [PATCH 353/380] xtensa: fix stop_machine_cpuslocked call in patch_text patch_text must invoke patch_text_stop_machine on all online CPUs, but it calls stop_machine_cpuslocked with NULL cpumask. As a result only one CPU runs patch_text_stop_machine potentially leaving stale icache entries on other CPUs. Fix that by calling stop_machine_cpuslocked with cpu_online_mask as the last argument. Cc: stable@vger.kernel.org Fixes: 64711f9a47d4 ("xtensa: implement jump_label support") Signed-off-by: Max Filippov --- arch/xtensa/kernel/jump_label.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/xtensa/kernel/jump_label.c b/arch/xtensa/kernel/jump_label.c index 61cf6497a646..0dde21e0d3de 100644 --- a/arch/xtensa/kernel/jump_label.c +++ b/arch/xtensa/kernel/jump_label.c @@ -61,7 +61,7 @@ static void patch_text(unsigned long addr, const void *data, size_t sz) .data = data, }; stop_machine_cpuslocked(patch_text_stop_machine, - &patch, NULL); + &patch, cpu_online_mask); } else { unsigned long flags; From e85d29ba4b24f68e7a78cb85c55e754362eeb2de Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Thu, 17 Mar 2022 02:49:41 -0700 Subject: [PATCH 354/380] xtensa: fix DTC warning unit_address_format DTC issues the following warnings when building xtfpga device trees: /soc/flash@00000000/partition@0x0: unit name should not have leading "0x" /soc/flash@00000000/partition@0x6000000: unit name should not have leading "0x" /soc/flash@00000000/partition@0x6800000: unit name should not have leading "0x" /soc/flash@00000000/partition@0x7fe0000: unit name should not have leading "0x" Drop leading 0x from flash partition unit names. Signed-off-by: Max Filippov --- arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi | 8 ++++---- arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi | 8 ++++---- arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi b/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi index 9bf8bad1dd18..c33932568aa7 100644 --- a/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi +++ b/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi @@ -8,19 +8,19 @@ reg = <0x00000000 0x08000000>; bank-width = <2>; device-width = <2>; - partition@0x0 { + partition@0 { label = "data"; reg = <0x00000000 0x06000000>; }; - partition@0x6000000 { + partition@6000000 { label = "boot loader area"; reg = <0x06000000 0x00800000>; }; - partition@0x6800000 { + partition@6800000 { label = "kernel image"; reg = <0x06800000 0x017e0000>; }; - partition@0x7fe0000 { + partition@7fe0000 { label = "boot environment"; reg = <0x07fe0000 0x00020000>; }; diff --git a/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi b/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi index 40c2f81f7cb6..7bde2ab2d6fb 100644 --- a/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi +++ b/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi @@ -8,19 +8,19 @@ reg = <0x08000000 0x01000000>; bank-width = <2>; device-width = <2>; - partition@0x0 { + partition@0 { label = "boot loader area"; reg = <0x00000000 0x00400000>; }; - partition@0x400000 { + partition@400000 { label = "kernel image"; reg = <0x00400000 0x00600000>; }; - partition@0xa00000 { + partition@a00000 { label = "data"; reg = <0x00a00000 0x005e0000>; }; - partition@0xfe0000 { + partition@fe0000 { label = "boot environment"; reg = <0x00fe0000 0x00020000>; }; diff --git a/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi b/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi index fb8d3a9f33c2..0655b868749a 100644 --- a/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi +++ b/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi @@ -8,11 +8,11 @@ reg = <0x08000000 0x00400000>; bank-width = <2>; device-width = <2>; - partition@0x0 { + partition@0 { label = "boot loader area"; reg = <0x00000000 0x003f0000>; }; - partition@0x3f0000 { + partition@3f0000 { label = "boot environment"; reg = <0x003f0000 0x00010000>; }; From 58004f266918912771ee71f46bfb92bf64ab9108 Mon Sep 17 00:00:00 2001 From: Tsukasa OI Date: Mon, 14 Mar 2022 13:38:40 -0700 Subject: [PATCH 355/380] RISC-V: Correctly print supported extensions This commit replaces BITS_PER_LONG with number of alphabet letters. Current ISA pretty-printing code expects extension 'a' (bit 0) through 'z' (bit 25). Although bit 26 and higher is not currently used (thus never cause an issue in practice), it will be an annoying problem if we start to use those in the future. This commit disables printing high bits for now. Reviewed-by: Anup Patel Tested-by: Heiko Stuebner Signed-off-by: Tsukasa OI Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpufeature.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index d959d207a40d..dd3d57eb4eea 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -13,6 +13,8 @@ #include #include +#define NUM_ALPHA_EXTS ('z' - 'a' + 1) + unsigned long elf_hwcap __read_mostly; /* Host ISA bitmap */ @@ -63,7 +65,7 @@ void __init riscv_fill_hwcap(void) { struct device_node *node; const char *isa; - char print_str[BITS_PER_LONG + 1]; + char print_str[NUM_ALPHA_EXTS + 1]; size_t i, j, isa_len; static unsigned long isa2hwcap[256] = {0}; @@ -133,13 +135,13 @@ void __init riscv_fill_hwcap(void) } memset(print_str, 0, sizeof(print_str)); - for (i = 0, j = 0; i < BITS_PER_LONG; i++) + for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++) if (riscv_isa[0] & BIT_MASK(i)) print_str[j++] = (char)('a' + i); pr_info("riscv: ISA extensions %s\n", print_str); memset(print_str, 0, sizeof(print_str)); - for (i = 0, j = 0; i < BITS_PER_LONG; i++) + for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++) if (elf_hwcap & BIT_MASK(i)) print_str[j++] = (char)('a' + i); pr_info("riscv: ELF capabilities %s\n", print_str); From 2a31c54be097c74344b4fab20ea6104012d2cb8b Mon Sep 17 00:00:00 2001 From: Tsukasa OI Date: Mon, 14 Mar 2022 13:38:41 -0700 Subject: [PATCH 356/380] RISC-V: Minimal parser for "riscv, isa" strings Current hart ISA ("riscv,isa") parser don't correctly parse: 1. Multi-letter extensions 2. Version numbers All ISA extensions ratified recently has multi-letter extensions (except 'H'). The current "riscv,isa" parser that is easily confused by multi-letter extensions and "p" in version numbers can be a huge problem for adding new extensions through the device tree. Leaving it would create incompatible hacks and would make "riscv,isa" value unreliable. This commit implements minimal parser for "riscv,isa" strings. With this, we can safely ignore multi-letter extensions and version numbers. [Improved commit text and fixed a bug around 's' in base extension] Signed-off-by: Atish Patra [Fixed workaround for QEMU] Signed-off-by: Tsukasa OI Tested-by: Heiko Stuebner Reviewed-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpufeature.c | 72 ++++++++++++++++++++++++++++------ 1 file changed, 61 insertions(+), 11 deletions(-) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index dd3d57eb4eea..72c5f6ef56b5 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -66,7 +67,7 @@ void __init riscv_fill_hwcap(void) struct device_node *node; const char *isa; char print_str[NUM_ALPHA_EXTS + 1]; - size_t i, j, isa_len; + int i, j; static unsigned long isa2hwcap[256] = {0}; isa2hwcap['i'] = isa2hwcap['I'] = COMPAT_HWCAP_ISA_I; @@ -92,23 +93,72 @@ void __init riscv_fill_hwcap(void) continue; } - i = 0; - isa_len = strlen(isa); #if IS_ENABLED(CONFIG_32BIT) if (!strncmp(isa, "rv32", 4)) - i += 4; + isa += 4; #elif IS_ENABLED(CONFIG_64BIT) if (!strncmp(isa, "rv64", 4)) - i += 4; + isa += 4; #endif - for (; i < isa_len; ++i) { - this_hwcap |= isa2hwcap[(unsigned char)(isa[i])]; + for (; *isa; ++isa) { + const char *ext = isa++; + const char *ext_end = isa; + bool ext_long = false, ext_err = false; + + switch (*ext) { + case 's': + /** + * Workaround for invalid single-letter 's' & 'u'(QEMU). + * No need to set the bit in riscv_isa as 's' & 'u' are + * not valid ISA extensions. It works until multi-letter + * extension starting with "Su" appears. + */ + if (ext[-1] != '_' && ext[1] == 'u') { + ++isa; + ext_err = true; + break; + } + fallthrough; + case 'x': + case 'z': + ext_long = true; + /* Multi-letter extension must be delimited */ + for (; *isa && *isa != '_'; ++isa) + if (!islower(*isa) && !isdigit(*isa)) + ext_err = true; + break; + default: + if (unlikely(!islower(*ext))) { + ext_err = true; + break; + } + /* Find next extension */ + if (!isdigit(*isa)) + break; + /* Skip the minor version */ + while (isdigit(*++isa)) + ; + if (*isa != 'p') + break; + if (!isdigit(*++isa)) { + --isa; + break; + } + /* Skip the major version */ + while (isdigit(*++isa)) + ; + break; + } + if (*isa != '_') + --isa; /* - * TODO: X, Y and Z extension parsing for Host ISA - * bitmap will be added in-future. + * TODO: Full version-aware handling including + * multi-letter extensions will be added in-future. */ - if ('a' <= isa[i] && isa[i] < 'x') - this_isa |= (1UL << (isa[i] - 'a')); + if (ext_err || ext_long) + continue; + this_hwcap |= isa2hwcap[(unsigned char)(*ext)]; + this_isa |= (1UL << (*ext - 'a')); } /* From 40a4d0dfbcf00c8534ab8724041705b3db6c5ce2 Mon Sep 17 00:00:00 2001 From: Tsukasa OI Date: Mon, 14 Mar 2022 13:38:42 -0700 Subject: [PATCH 357/380] RISC-V: Extract multi-letter extension names from "riscv, isa" Currently, there is no usage for version numbers in extensions as any ratified non base ISA extension will always at v1.0. Extract the extension names in place for future parsing. Tested-by: Heiko Stuebner Reviewed-by: Anup Patel Signed-off-by: Tsukasa OI [Improved commit text and comments] Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpufeature.c | 35 ++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 72c5f6ef56b5..b0df7eff47f7 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -124,8 +124,28 @@ void __init riscv_fill_hwcap(void) ext_long = true; /* Multi-letter extension must be delimited */ for (; *isa && *isa != '_'; ++isa) - if (!islower(*isa) && !isdigit(*isa)) + if (unlikely(!islower(*isa) + && !isdigit(*isa))) ext_err = true; + /* Parse backwards */ + ext_end = isa; + if (unlikely(ext_err)) + break; + if (!isdigit(ext_end[-1])) + break; + /* Skip the minor version */ + while (isdigit(*--ext_end)) + ; + if (ext_end[0] != 'p' + || !isdigit(ext_end[-1])) { + /* Advance it to offset the pre-decrement */ + ++ext_end; + break; + } + /* Skip the major version */ + while (isdigit(*--ext_end)) + ; + ++ext_end; break; default: if (unlikely(!islower(*ext))) { @@ -151,14 +171,13 @@ void __init riscv_fill_hwcap(void) } if (*isa != '_') --isa; - /* - * TODO: Full version-aware handling including - * multi-letter extensions will be added in-future. - */ - if (ext_err || ext_long) + + if (unlikely(ext_err)) continue; - this_hwcap |= isa2hwcap[(unsigned char)(*ext)]; - this_isa |= (1UL << (*ext - 'a')); + if (!ext_long) { + this_hwcap |= isa2hwcap[(unsigned char)(*ext)]; + this_isa |= (1UL << (*ext - 'a')); + } } /* From 02d52fbd940af7d7fe0a523e99938113b2addd35 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Mon, 14 Mar 2022 13:38:43 -0700 Subject: [PATCH 358/380] RISC-V: Implement multi-letter ISA extension probing framework Multi-letter extensions can be probed using exising riscv_isa_extension_available API now. It doesn't support versioning right now as there is no use case for it. Individual extension specific implementation will be added during each extension support. Tested-by: Heiko Stuebner Signed-off-by: Atish Patra Reviewed-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/hwcap.h | 18 ++++++++++++++++++ arch/riscv/kernel/cpufeature.c | 22 ++++++++++++++++------ 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index 5ce50468aff1..170bd80da520 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -34,7 +34,25 @@ extern unsigned long elf_hwcap; #define RISCV_ISA_EXT_s ('s' - 'a') #define RISCV_ISA_EXT_u ('u' - 'a') +/* + * Increse this to higher value as kernel support more ISA extensions. + */ #define RISCV_ISA_EXT_MAX 64 +#define RISCV_ISA_EXT_NAME_LEN_MAX 32 + +/* The base ID for multi-letter ISA extensions */ +#define RISCV_ISA_EXT_BASE 26 + +/* + * This enum represent the logical ID for each multi-letter RISC-V ISA extension. + * The logical ID should start from RISCV_ISA_EXT_BASE and must not exceed + * RISCV_ISA_EXT_MAX. 0-25 range is reserved for single letter + * extensions while all the multi-letter extensions should define the next + * available logical extension id. + */ +enum riscv_isa_ext_id { + RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX, +}; unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap); diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index b0df7eff47f7..3455fdfd680e 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -83,7 +83,7 @@ void __init riscv_fill_hwcap(void) for_each_of_cpu_node(node) { unsigned long this_hwcap = 0; - unsigned long this_isa = 0; + DECLARE_BITMAP(this_isa, RISCV_ISA_EXT_MAX); if (riscv_of_processor_hartid(node) < 0) continue; @@ -100,6 +100,7 @@ void __init riscv_fill_hwcap(void) if (!strncmp(isa, "rv64", 4)) isa += 4; #endif + bitmap_zero(this_isa, RISCV_ISA_EXT_MAX); for (; *isa; ++isa) { const char *ext = isa++; const char *ext_end = isa; @@ -172,12 +173,20 @@ void __init riscv_fill_hwcap(void) if (*isa != '_') --isa; +#define SET_ISA_EXT_MAP(name, bit) \ + do { \ + if ((ext_end - ext == sizeof(name) - 1) && \ + !memcmp(ext, name, sizeof(name) - 1)) \ + set_bit(bit, this_isa); \ + } while (false) \ + if (unlikely(ext_err)) continue; if (!ext_long) { this_hwcap |= isa2hwcap[(unsigned char)(*ext)]; - this_isa |= (1UL << (*ext - 'a')); + set_bit(*ext - 'a', this_isa); } +#undef SET_ISA_EXT_MAP } /* @@ -190,10 +199,11 @@ void __init riscv_fill_hwcap(void) else elf_hwcap = this_hwcap; - if (riscv_isa[0]) - riscv_isa[0] &= this_isa; + if (bitmap_weight(riscv_isa, RISCV_ISA_EXT_MAX)) + bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX); else - riscv_isa[0] = this_isa; + bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX); + } /* We don't support systems with F but without D, so mask those out @@ -207,7 +217,7 @@ void __init riscv_fill_hwcap(void) for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++) if (riscv_isa[0] & BIT_MASK(i)) print_str[j++] = (char)('a' + i); - pr_info("riscv: ISA extensions %s\n", print_str); + pr_info("riscv: base ISA extensions %s\n", print_str); memset(print_str, 0, sizeof(print_str)); for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++) From 3f96db125d68127ffef6fdeeb777d94ccf95c09f Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Mon, 14 Mar 2022 13:38:44 -0700 Subject: [PATCH 359/380] RISC-V: Do no continue isa string parsing without correct XLEN The isa string should begin with either rv64 or rv32. Otherwise, it is an incorrect isa string. Currently, the string parsing continues even if it doesnot begin with current XLEN. Fix this by checking if it found "rv64" or "rv32" in the beginning. Tested-by: Heiko Stuebner Signed-off-by: Atish Patra Reviewed-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpufeature.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 3455fdfd680e..a43c08af5f4b 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -84,6 +84,7 @@ void __init riscv_fill_hwcap(void) for_each_of_cpu_node(node) { unsigned long this_hwcap = 0; DECLARE_BITMAP(this_isa, RISCV_ISA_EXT_MAX); + const char *temp; if (riscv_of_processor_hartid(node) < 0) continue; @@ -93,6 +94,7 @@ void __init riscv_fill_hwcap(void) continue; } + temp = isa; #if IS_ENABLED(CONFIG_32BIT) if (!strncmp(isa, "rv32", 4)) isa += 4; @@ -100,6 +102,9 @@ void __init riscv_fill_hwcap(void) if (!strncmp(isa, "rv64", 4)) isa += 4; #endif + /* The riscv,isa DT property must start with rv64 or rv32 */ + if (temp == isa) + continue; bitmap_zero(this_isa, RISCV_ISA_EXT_MAX); for (; *isa; ++isa) { const char *ext = isa++; From a9b202606c69312cdaa4db187837820ebf7213b2 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Mon, 14 Mar 2022 13:38:45 -0700 Subject: [PATCH 360/380] RISC-V: Improve /proc/cpuinfo output for ISA extensions Currently, the /proc/cpuinfo outputs the entire riscv,isa string which is not ideal when we have multiple ISA extensions present in the ISA string. Some of them may not be enabled in kernel as well. Same goes for the single letter extensions as well which prints the entire ISA string. Some of they may not be valid ISA extensions as well (e.g 'su') Parse only the valid & enabled ISA extension and print them. Tested-by: Heiko Stuebner Signed-off-by: Atish Patra Reviewed-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/hwcap.h | 7 ++++ arch/riscv/kernel/cpu.c | 65 ++++++++++++++++++++++++++++++++-- 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index 170bd80da520..691fc9c8099b 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -54,6 +54,13 @@ enum riscv_isa_ext_id { RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX, }; +struct riscv_isa_ext_data { + /* Name of the extension displayed to userspace via /proc/cpuinfo */ + char uprop[RISCV_ISA_EXT_NAME_LEN_MAX]; + /* The logical ISA extension ID */ + unsigned int isa_ext_id; +}; + unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap); #define riscv_isa_extension_mask(ext) BIT_MASK(RISCV_ISA_EXT_##ext) diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index ad0a7e9f828b..fc115e307ef5 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -63,12 +64,72 @@ int riscv_of_parent_hartid(struct device_node *node) } #ifdef CONFIG_PROC_FS +#define __RISCV_ISA_EXT_DATA(UPROP, EXTID) \ + { \ + .uprop = #UPROP, \ + .isa_ext_id = EXTID, \ + } +/** + * Here are the ordering rules of extension naming defined by RISC-V + * specification : + * 1. All extensions should be separated from other multi-letter extensions + * from other multi-letter extensions by an underscore. + * 2. The first letter following the 'Z' conventionally indicates the most + * closely related alphabetical extension category, IMAFDQLCBKJTPVH. + * If multiple 'Z' extensions are named, they should be ordered first + * by category, then alphabetically within a category. + * 3. Standard supervisor-level extensions (starts with 'S') should be + * listed after standard unprivileged extensions. If multiple + * supervisor-level extensions are listed, they should be ordered + * alphabetically. + * 4. Non-standard extensions (starts with 'X') must be listed after all + * standard extensions. They must be separated from other multi-letter + * extensions by an underscore. + */ +static struct riscv_isa_ext_data isa_ext_arr[] = { + __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX), +}; + +static void print_isa_ext(struct seq_file *f) +{ + struct riscv_isa_ext_data *edata; + int i = 0, arr_sz; + + arr_sz = ARRAY_SIZE(isa_ext_arr) - 1; + + /* No extension support available */ + if (arr_sz <= 0) + return; + + for (i = 0; i <= arr_sz; i++) { + edata = &isa_ext_arr[i]; + if (!__riscv_isa_extension_available(NULL, edata->isa_ext_id)) + continue; + seq_printf(f, "_%s", edata->uprop); + } +} + +/** + * These are the only valid base (single letter) ISA extensions as per the spec. + * It also specifies the canonical order in which it appears in the spec. + * Some of the extension may just be a place holder for now (B, K, P, J). + * This should be updated once corresponding extensions are ratified. + */ +static const char base_riscv_exts[13] = "imafdqcbkjpvh"; static void print_isa(struct seq_file *f, const char *isa) { - /* Print the entire ISA as it is */ + int i; + seq_puts(f, "isa\t\t: "); - seq_write(f, isa, strlen(isa)); + /* Print the rv[64/32] part */ + seq_write(f, isa, 4); + for (i = 0; i < sizeof(base_riscv_exts); i++) { + if (__riscv_isa_extension_available(NULL, base_riscv_exts[i] - 'a')) + /* Print only enabled the base ISA extensions */ + seq_write(f, &base_riscv_exts[i], 1); + } + print_isa_ext(f); seq_puts(f, "\n"); } From 8931ddd8d6a55fcefb20f44a38ba42bb746f0b62 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 17 Mar 2022 12:52:59 +0100 Subject: [PATCH 361/380] MIPS: ingenic: correct unit node address Unit node addresses should not have leading 0x: Warning (unit_address_format): /nemc@13410000/efuse@d0/eth-mac-addr@0x22: unit name should not have leading "0x" Signed-off-by: Krzysztof Kozlowski Reviewed-by: Paul Cercueil Signed-off-by: Thomas Bogendoerfer --- arch/mips/boot/dts/ingenic/jz4780.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi index 3f9ea47a10cd..b998301f179c 100644 --- a/arch/mips/boot/dts/ingenic/jz4780.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi @@ -510,7 +510,7 @@ #address-cells = <1>; #size-cells = <1>; - eth0_addr: eth-mac-addr@0x22 { + eth0_addr: eth-mac-addr@22 { reg = <0x22 0x6>; }; }; From e94dc6bbdf29787a5ddb01c143a074c31e427dc7 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sat, 19 Mar 2022 11:06:30 -0700 Subject: [PATCH 362/380] xtensa: merge stack alignment definitions xtensa currently has two different definitions for stack alignment. Replace it with single definition usable in both C and assembly. Signed-off-by: Max Filippov --- arch/xtensa/include/asm/asmmacro.h | 2 -- arch/xtensa/include/asm/core.h | 7 +++++++ arch/xtensa/include/asm/processor.h | 6 +----- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/xtensa/include/asm/asmmacro.h b/arch/xtensa/include/asm/asmmacro.h index 3190157950fa..e3474ca411ff 100644 --- a/arch/xtensa/include/asm/asmmacro.h +++ b/arch/xtensa/include/asm/asmmacro.h @@ -225,8 +225,6 @@ #endif .endm -#define XTENSA_STACK_ALIGNMENT 16 - #if defined(__XTENSA_WINDOWED_ABI__) /* Assembly instructions for windowed kernel ABI. */ diff --git a/arch/xtensa/include/asm/core.h b/arch/xtensa/include/asm/core.h index 9138077e567d..f856d2bcb9f3 100644 --- a/arch/xtensa/include/asm/core.h +++ b/arch/xtensa/include/asm/core.h @@ -37,4 +37,11 @@ #endif #endif +/* Xtensa ABI requires stack alignment to be at least 16 */ +#if XCHAL_DATA_WIDTH > 16 +#define XTENSA_STACK_ALIGNMENT XCHAL_DATA_WIDTH +#else +#define XTENSA_STACK_ALIGNMENT 16 +#endif + #endif diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index 37d3e9887fe7..ba62bbcea160 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -18,11 +18,7 @@ #include #include -/* Xtensa ABI requires stack alignment to be at least 16 */ - -#define STACK_ALIGN (XCHAL_DATA_WIDTH > 16 ? XCHAL_DATA_WIDTH : 16) - -#define ARCH_SLAB_MINALIGN STACK_ALIGN +#define ARCH_SLAB_MINALIGN XTENSA_STACK_ALIGNMENT /* * User space process size: 1 GB. From e6d423aaaea13e6dc48e42472aeebc8607ae2574 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sat, 19 Mar 2022 05:58:21 -0700 Subject: [PATCH 363/380] xtensa: rearrange NMI exit path NMI exit path to userspace should neither check TIF_DB_DISABLED nor call check_tlb_sanity because NMI shouldn't touch anything related to userspace. Drop kernel/userspace check in NMI exit path. Signed-off-by: Max Filippov --- arch/xtensa/kernel/entry.S | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index 77a7c8da3ff5..8d671898d582 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -488,9 +488,10 @@ KABI_W or a3, a3, a2 common_exception_return: #if XTENSA_FAKE_NMI - l32i a2, a1, PT_EXCCAUSE - movi a3, EXCCAUSE_MAPPED_NMI - beq a2, a3, .LNMIexit + l32i abi_tmp0, a1, PT_EXCCAUSE + movi abi_tmp1, EXCCAUSE_MAPPED_NMI + l32i abi_saved1, a1, PT_PS + beq abi_tmp0, abi_tmp1, 4f #endif 1: irq_save a2, a3 @@ -550,12 +551,6 @@ common_exception_return: j 4f #endif -#if XTENSA_FAKE_NMI -.LNMIexit: - l32i abi_saved1, a1, PT_PS - _bbci.l abi_saved1, PS_UM_BIT, 4f -#endif - 5: #ifdef CONFIG_HAVE_HW_BREAKPOINT _bbci.l a4, TIF_DB_DISABLED, 7f From e7e9614b6b3a4fb897d9766337858e3f5e1e1855 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sat, 19 Mar 2022 05:52:07 -0700 Subject: [PATCH 364/380] xtensa: clean up kernel exit assembly code Don't use numeric labels for complex branching logic. Mark each branch with named local label and use them. Rearrange exit back to kernel mode to avoid conditional label definition. Signed-off-by: Max Filippov --- arch/xtensa/kernel/entry.S | 50 +++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index 8d671898d582..6b6eff658795 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -491,9 +491,9 @@ common_exception_return: l32i abi_tmp0, a1, PT_EXCCAUSE movi abi_tmp1, EXCCAUSE_MAPPED_NMI l32i abi_saved1, a1, PT_PS - beq abi_tmp0, abi_tmp1, 4f + beq abi_tmp0, abi_tmp1, .Lrestore_state #endif -1: +.Ltif_loop: irq_save a2, a3 #ifdef CONFIG_TRACE_IRQFLAGS abi_call trace_hardirqs_off @@ -504,7 +504,7 @@ common_exception_return: l32i abi_saved1, a1, PT_PS GET_THREAD_INFO(a2, a1) l32i a4, a2, TI_FLAGS - _bbci.l abi_saved1, PS_UM_BIT, 6f + _bbci.l abi_saved1, PS_UM_BIT, .Lexit_tif_loop_kernel /* Specific to a user exception exit: * We need to check some flags for signal handling and rescheduling, @@ -513,12 +513,12 @@ common_exception_return: * Note that we don't disable interrupts here. */ - _bbsi.l a4, TIF_NEED_RESCHED, 3f + _bbsi.l a4, TIF_NEED_RESCHED, .Lresched movi a2, _TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NOTIFY_SIGNAL - bnone a4, a2, 5f + bnone a4, a2, .Lexit_tif_loop_user -2: l32i a4, a1, PT_DEPC - bgeui a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 4f + l32i a4, a1, PT_DEPC + bgeui a4, VALID_DOUBLE_EXCEPTION_ADDRESS, .Lrestore_state /* Call do_signal() */ @@ -528,42 +528,41 @@ common_exception_return: rsil a2, 0 mov abi_arg0, a1 abi_call do_notify_resume # int do_notify_resume(struct pt_regs*) - j 1b - -3: /* Reschedule */ + j .Ltif_loop +.Lresched: #ifdef CONFIG_TRACE_IRQFLAGS abi_call trace_hardirqs_on #endif rsil a2, 0 abi_call schedule # void schedule (void) - j 1b + j .Ltif_loop +.Lexit_tif_loop_kernel: #ifdef CONFIG_PREEMPTION -6: - _bbci.l a4, TIF_NEED_RESCHED, 4f + _bbci.l a4, TIF_NEED_RESCHED, .Lrestore_state /* Check current_thread_info->preempt_count */ l32i a4, a2, TI_PRE_COUNT - bnez a4, 4f + bnez a4, .Lrestore_state abi_call preempt_schedule_irq - j 4f #endif + j .Lrestore_state -5: +.Lexit_tif_loop_user: #ifdef CONFIG_HAVE_HW_BREAKPOINT - _bbci.l a4, TIF_DB_DISABLED, 7f + _bbci.l a4, TIF_DB_DISABLED, 1f abi_call restore_dbreak -7: +1: #endif #ifdef CONFIG_DEBUG_TLB_SANITY l32i a4, a1, PT_DEPC - bgeui a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 4f + bgeui a4, VALID_DOUBLE_EXCEPTION_ADDRESS, .Lrestore_state abi_call check_tlb_sanity #endif -6: -4: + +.Lrestore_state: #ifdef CONFIG_TRACE_IRQFLAGS extui a4, abi_saved1, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH bgei a4, LOCKLEVEL, 1f @@ -601,7 +600,7 @@ user_exception_exit: rsr a1, depc # restore stack pointer l32i a2, a1, PT_WMASK # register frames saved (in bits 4...9) rotw -1 # we restore a4..a7 - _bltui a6, 16, 1f # only have to restore current window? + _bltui a6, 16, .Lclear_regs # only have to restore current window? /* The working registers are a0 and a3. We are restoring to * a4..a7. Be careful not to destroy what we have just restored. @@ -613,18 +612,19 @@ user_exception_exit: mov a2, a6 mov a3, a5 -2: rotw -1 # a0..a3 become a4..a7 +1: rotw -1 # a0..a3 become a4..a7 addi a3, a7, -4*4 # next iteration addi a2, a6, -16 # decrementing Y in WMASK l32i a4, a3, PT_AREG_END + 0 l32i a5, a3, PT_AREG_END + 4 l32i a6, a3, PT_AREG_END + 8 l32i a7, a3, PT_AREG_END + 12 - _bgeui a2, 16, 2b + _bgeui a2, 16, 1b /* Clear unrestored registers (don't leak anything to user-land */ -1: rsr a0, windowbase +.Lclear_regs: + rsr a0, windowbase rsr a3, sar sub a3, a0, a3 beqz a3, 2f From 7dc0eb0b6d9f3e2b6a560a04f86ef065a4531a9f Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sat, 19 Mar 2022 05:49:24 -0700 Subject: [PATCH 365/380] xtensa: enable plugin support Compiler plugins can be built starting with xtensa gcc 12. Enable plugin support for xtensa when gcc-12 or newer is used. Signed-off-by: Max Filippov --- arch/xtensa/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index a02c3388f521..ad91836ce7a8 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -34,6 +34,7 @@ config XTENSA select HAVE_DMA_CONTIGUOUS select HAVE_EXIT_THREAD select HAVE_FUNCTION_TRACER + select HAVE_GCC_PLUGINS if GCC_VERSION >= 120000 select HAVE_HW_BREAKPOINT if PERF_EVENTS select HAVE_IRQ_TIME_ACCOUNTING select HAVE_PCI From a3d0245c58f962ee99d4440ea0eaf45fb7f5a5cc Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sun, 20 Mar 2022 09:40:14 -0700 Subject: [PATCH 366/380] xtensa: fix xtensa_wsr always writing 0 The commit cad6fade6e78 ("xtensa: clean up WSR*/RSR*/get_sr/set_sr") replaced 'WSR' macro in the function xtensa_wsr with 'xtensa_set_sr', but variable 'v' in the xtensa_set_sr body shadowed the argument 'v' passed to it, resulting in wrong value written to debug registers. Fix that by removing intermediate variable from the xtensa_set_sr macro body. Cc: stable@vger.kernel.org Fixes: cad6fade6e78 ("xtensa: clean up WSR*/RSR*/get_sr/set_sr") Signed-off-by: Max Filippov --- arch/xtensa/include/asm/processor.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index ba62bbcea160..67ccc3d48c8c 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -242,8 +242,8 @@ extern unsigned long __get_wchan(struct task_struct *p); #define xtensa_set_sr(x, sr) \ ({ \ - unsigned int v = (unsigned int)(x); \ - __asm__ __volatile__ ("wsr %0, "__stringify(sr) :: "a"(v)); \ + __asm__ __volatile__ ("wsr %0, "__stringify(sr) :: \ + "a"((unsigned int)(x))); \ }) #define xtensa_get_sr(sr) \ From fe2640bd7a62f1f7c3f55fbda31084085075bc30 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 18 Mar 2022 14:42:19 +1100 Subject: [PATCH 367/380] powerpc/pseries: Fix use after free in remove_phb_dynamic() In remove_phb_dynamic() we use &phb->io_resource, after we've called device_unregister(&host_bridge->dev). But the unregister may have freed phb, because pcibios_free_controller_deferred() is the release function for the host_bridge. If there are no outstanding references when we call device_unregister() then phb will be freed out from under us. This has gone mainly unnoticed, but with slub_debug and page_poison enabled it can lead to a crash: PID: 7574 TASK: c0000000d492cb80 CPU: 13 COMMAND: "drmgr" #0 [c0000000e4f075a0] crash_kexec at c00000000027d7dc #1 [c0000000e4f075d0] oops_end at c000000000029608 #2 [c0000000e4f07650] __bad_page_fault at c0000000000904b4 #3 [c0000000e4f076c0] do_bad_slb_fault at c00000000009a5a8 #4 [c0000000e4f076f0] data_access_slb_common_virt at c000000000008b30 Data SLB Access [380] exception frame: R0: c000000000167250 R1: c0000000e4f07a00 R2: c000000002a46100 R3: c000000002b39ce8 R4: 00000000000000c0 R5: 00000000000000a9 R6: 3894674d000000c0 R7: 0000000000000000 R8: 00000000000000ff R9: 0000000000000100 R10: 6b6b6b6b6b6b6b6b R11: 0000000000008000 R12: c00000000023da80 R13: c0000009ffd38b00 R14: 0000000000000000 R15: 000000011c87f0f0 R16: 0000000000000006 R17: 0000000000000003 R18: 0000000000000002 R19: 0000000000000004 R20: 0000000000000005 R21: 000000011c87ede8 R22: 000000011c87c5a8 R23: 000000011c87d3a0 R24: 0000000000000000 R25: 0000000000000001 R26: c0000000e4f07cc8 R27: c00000004d1cc400 R28: c0080000031d00e8 R29: c00000004d23d800 R30: c00000004d1d2400 R31: c00000004d1d2540 NIP: c000000000167258 MSR: 8000000000009033 OR3: c000000000e9f474 CTR: 0000000000000000 LR: c000000000167250 XER: 0000000020040003 CCR: 0000000024088420 MQ: 0000000000000000 DAR: 6b6b6b6b6b6b6ba3 DSISR: c0000000e4f07920 Syscall Result: fffffffffffffff2 [NIP : release_resource+56] [LR : release_resource+48] #5 [c0000000e4f07a00] release_resource at c000000000167258 (unreliable) #6 [c0000000e4f07a30] remove_phb_dynamic at c000000000105648 #7 [c0000000e4f07ab0] dlpar_remove_slot at c0080000031a09e8 [rpadlpar_io] #8 [c0000000e4f07b50] remove_slot_store at c0080000031a0b9c [rpadlpar_io] #9 [c0000000e4f07be0] kobj_attr_store at c000000000817d8c #10 [c0000000e4f07c00] sysfs_kf_write at c00000000063e504 #11 [c0000000e4f07c20] kernfs_fop_write_iter at c00000000063d868 #12 [c0000000e4f07c70] new_sync_write at c00000000054339c #13 [c0000000e4f07d10] vfs_write at c000000000546624 #14 [c0000000e4f07d60] ksys_write at c0000000005469f4 #15 [c0000000e4f07db0] system_call_exception at c000000000030840 #16 [c0000000e4f07e10] system_call_vectored_common at c00000000000c168 To avoid it, we can take a reference to the host_bridge->dev until we're done using phb. Then when we drop the reference the phb will be freed. Fixes: 2dd9c11b9d4d ("powerpc/pseries: use pci_host_bridge.release_fn() to kfree(phb)") Reported-by: David Dai Signed-off-by: Michael Ellerman Tested-by: Sachin Sant Link: https://lore.kernel.org/r/20220318034219.1188008-1-mpe@ellerman.id.au --- arch/powerpc/platforms/pseries/pci_dlpar.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 90c9d3531694..4ba824568119 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -78,6 +78,9 @@ int remove_phb_dynamic(struct pci_controller *phb) pseries_msi_free_domains(phb); + /* Keep a reference so phb isn't freed yet */ + get_device(&host_bridge->dev); + /* Remove the PCI bus and unregister the bridge device from sysfs */ phb->bus = NULL; pci_remove_bus(b); @@ -101,6 +104,7 @@ int remove_phb_dynamic(struct pci_controller *phb) * the pcibios_free_controller_deferred() callback; * see pseries_root_bridge_prepare(). */ + put_device(&host_bridge->dev); return 0; } From 9dc6ce80213635cdca611d8b89f74bf010c1a8c6 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Fri, 18 Feb 2022 16:46:52 -0800 Subject: [PATCH 368/380] RISC-V: Remove the current perf implementation The current perf implementation in RISC-V is not very useful as it can not count any events other than cycle/instructions. Moreover, perf record can not be used or the events can not be started or stopped. Remove the implementation now for a better platform driver in future that will implement most of the missing functionality. Reviewed-by: Anup Patel Signed-off-by: Atish Patra Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 13 - arch/riscv/include/asm/perf_event.h | 72 ----- arch/riscv/kernel/Makefile | 1 - arch/riscv/kernel/perf_event.c | 485 ---------------------------- 4 files changed, 571 deletions(-) delete mode 100644 arch/riscv/kernel/perf_event.c diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 5adcbd9b5e88..00cc3d4c77d1 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -333,19 +333,6 @@ config RISCV_ISA_C If you don't know what to do here, say Y. -menu "supported PMU type" - depends on PERF_EVENTS - -config RISCV_BASE_PMU - bool "Base Performance Monitoring Unit" - def_bool y - help - A base PMU that serves as a reference implementation and has limited - feature of perf. It can run on any RISC-V machines so serves as the - fallback, but this option can also be disable to reduce kernel size. - -endmenu - config FPU bool "FPU support" default y diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h index 062efd3a1d5d..d42c901f9a97 100644 --- a/arch/riscv/include/asm/perf_event.h +++ b/arch/riscv/include/asm/perf_event.h @@ -9,77 +9,5 @@ #define _ASM_RISCV_PERF_EVENT_H #include -#include -#include - -#ifdef CONFIG_RISCV_BASE_PMU -#define RISCV_BASE_COUNTERS 2 - -/* - * The RISCV_MAX_COUNTERS parameter should be specified. - */ - -#define RISCV_MAX_COUNTERS 2 - -/* - * These are the indexes of bits in counteren register *minus* 1, - * except for cycle. It would be coherent if it can directly mapped - * to counteren bit definition, but there is a *time* register at - * counteren[1]. Per-cpu structure is scarce resource here. - * - * According to the spec, an implementation can support counter up to - * mhpmcounter31, but many high-end processors has at most 6 general - * PMCs, we give the definition to MHPMCOUNTER8 here. - */ -#define RISCV_PMU_CYCLE 0 -#define RISCV_PMU_INSTRET 1 -#define RISCV_PMU_MHPMCOUNTER3 2 -#define RISCV_PMU_MHPMCOUNTER4 3 -#define RISCV_PMU_MHPMCOUNTER5 4 -#define RISCV_PMU_MHPMCOUNTER6 5 -#define RISCV_PMU_MHPMCOUNTER7 6 -#define RISCV_PMU_MHPMCOUNTER8 7 - -#define RISCV_OP_UNSUPP (-EOPNOTSUPP) - -struct cpu_hw_events { - /* # currently enabled events*/ - int n_events; - /* currently enabled events */ - struct perf_event *events[RISCV_MAX_COUNTERS]; - /* vendor-defined PMU data */ - void *platform; -}; - -struct riscv_pmu { - struct pmu *pmu; - - /* generic hw/cache events table */ - const int *hw_events; - const int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX]; - /* method used to map hw/cache events */ - int (*map_hw_event)(u64 config); - int (*map_cache_event)(u64 config); - - /* max generic hw events in map */ - int max_events; - /* number total counters, 2(base) + x(general) */ - int num_counters; - /* the width of the counter */ - int counter_width; - - /* vendor-defined PMU features */ - void *platform; - - irqreturn_t (*handle_irq)(int irq_num, void *dev); - int irq; -}; - -#endif -#ifdef CONFIG_PERF_EVENTS #define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs -#endif - #endif /* _ASM_RISCV_PERF_EVENT_H */ diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 612556faa527..fb63b462ff85 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -51,7 +51,6 @@ obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o -obj-$(CONFIG_RISCV_BASE_PMU) += perf_event.o obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o obj-$(CONFIG_RISCV_SBI) += sbi.o diff --git a/arch/riscv/kernel/perf_event.c b/arch/riscv/kernel/perf_event.c deleted file mode 100644 index c835f0362d94..000000000000 --- a/arch/riscv/kernel/perf_event.c +++ /dev/null @@ -1,485 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2008 Thomas Gleixner - * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar - * Copyright (C) 2009 Jaswinder Singh Rajput - * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter - * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra - * Copyright (C) 2009 Intel Corporation, - * Copyright (C) 2009 Google, Inc., Stephane Eranian - * Copyright 2014 Tilera Corporation. All Rights Reserved. - * Copyright (C) 2018 Andes Technology Corporation - * - * Perf_events support for RISC-V platforms. - * - * Since the spec. (as of now, Priv-Spec 1.10) does not provide enough - * functionality for perf event to fully work, this file provides - * the very basic framework only. - * - * For platform portings, please check Documentations/riscv/pmu.txt. - * - * The Copyright line includes x86 and tile ones. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static const struct riscv_pmu *riscv_pmu __read_mostly; -static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); - -/* - * Hardware & cache maps and their methods - */ - -static const int riscv_hw_event_map[] = { - [PERF_COUNT_HW_CPU_CYCLES] = RISCV_PMU_CYCLE, - [PERF_COUNT_HW_INSTRUCTIONS] = RISCV_PMU_INSTRET, - [PERF_COUNT_HW_CACHE_REFERENCES] = RISCV_OP_UNSUPP, - [PERF_COUNT_HW_CACHE_MISSES] = RISCV_OP_UNSUPP, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = RISCV_OP_UNSUPP, - [PERF_COUNT_HW_BRANCH_MISSES] = RISCV_OP_UNSUPP, - [PERF_COUNT_HW_BUS_CYCLES] = RISCV_OP_UNSUPP, -}; - -#define C(x) PERF_COUNT_HW_CACHE_##x -static const int riscv_cache_event_map[PERF_COUNT_HW_CACHE_MAX] -[PERF_COUNT_HW_CACHE_OP_MAX] -[PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - }, - [C(DTLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - }, -}; - -static int riscv_map_hw_event(u64 config) -{ - if (config >= riscv_pmu->max_events) - return -EINVAL; - - return riscv_pmu->hw_events[config]; -} - -static int riscv_map_cache_decode(u64 config, unsigned int *type, - unsigned int *op, unsigned int *result) -{ - return -ENOENT; -} - -static int riscv_map_cache_event(u64 config) -{ - unsigned int type, op, result; - int err = -ENOENT; - int code; - - err = riscv_map_cache_decode(config, &type, &op, &result); - if (!riscv_pmu->cache_events || err) - return err; - - if (type >= PERF_COUNT_HW_CACHE_MAX || - op >= PERF_COUNT_HW_CACHE_OP_MAX || - result >= PERF_COUNT_HW_CACHE_RESULT_MAX) - return -EINVAL; - - code = (*riscv_pmu->cache_events)[type][op][result]; - if (code == RISCV_OP_UNSUPP) - return -EINVAL; - - return code; -} - -/* - * Low-level functions: reading/writing counters - */ - -static inline u64 read_counter(int idx) -{ - u64 val = 0; - - switch (idx) { - case RISCV_PMU_CYCLE: - val = csr_read(CSR_CYCLE); - break; - case RISCV_PMU_INSTRET: - val = csr_read(CSR_INSTRET); - break; - default: - WARN_ON_ONCE(idx < 0 || idx > RISCV_MAX_COUNTERS); - return -EINVAL; - } - - return val; -} - -static inline void write_counter(int idx, u64 value) -{ - /* currently not supported */ - WARN_ON_ONCE(1); -} - -/* - * pmu->read: read and update the counter - * - * Other architectures' implementation often have a xxx_perf_event_update - * routine, which can return counter values when called in the IRQ, but - * return void when being called by the pmu->read method. - */ -static void riscv_pmu_read(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - u64 prev_raw_count, new_raw_count; - u64 oldval; - int idx = hwc->idx; - u64 delta; - - do { - prev_raw_count = local64_read(&hwc->prev_count); - new_raw_count = read_counter(idx); - - oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count); - } while (oldval != prev_raw_count); - - /* - * delta is the value to update the counter we maintain in the kernel. - */ - delta = (new_raw_count - prev_raw_count) & - ((1ULL << riscv_pmu->counter_width) - 1); - local64_add(delta, &event->count); - /* - * Something like local64_sub(delta, &hwc->period_left) here is - * needed if there is an interrupt for perf. - */ -} - -/* - * State transition functions: - * - * stop()/start() & add()/del() - */ - -/* - * pmu->stop: stop the counter - */ -static void riscv_pmu_stop(struct perf_event *event, int flags) -{ - struct hw_perf_event *hwc = &event->hw; - - WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); - hwc->state |= PERF_HES_STOPPED; - - if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { - riscv_pmu->pmu->read(event); - hwc->state |= PERF_HES_UPTODATE; - } -} - -/* - * pmu->start: start the event. - */ -static void riscv_pmu_start(struct perf_event *event, int flags) -{ - struct hw_perf_event *hwc = &event->hw; - - if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) - return; - - if (flags & PERF_EF_RELOAD) { - WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); - - /* - * Set the counter to the period to the next interrupt here, - * if you have any. - */ - } - - hwc->state = 0; - perf_event_update_userpage(event); - - /* - * Since we cannot write to counters, this serves as an initialization - * to the delta-mechanism in pmu->read(); otherwise, the delta would be - * wrong when pmu->read is called for the first time. - */ - local64_set(&hwc->prev_count, read_counter(hwc->idx)); -} - -/* - * pmu->add: add the event to PMU. - */ -static int riscv_pmu_add(struct perf_event *event, int flags) -{ - struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; - - if (cpuc->n_events == riscv_pmu->num_counters) - return -ENOSPC; - - /* - * We don't have general conunters, so no binding-event-to-counter - * process here. - * - * Indexing using hwc->config generally not works, since config may - * contain extra information, but here the only info we have in - * hwc->config is the event index. - */ - hwc->idx = hwc->config; - cpuc->events[hwc->idx] = event; - cpuc->n_events++; - - hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; - - if (flags & PERF_EF_START) - riscv_pmu->pmu->start(event, PERF_EF_RELOAD); - - return 0; -} - -/* - * pmu->del: delete the event from PMU. - */ -static void riscv_pmu_del(struct perf_event *event, int flags) -{ - struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; - - cpuc->events[hwc->idx] = NULL; - cpuc->n_events--; - riscv_pmu->pmu->stop(event, PERF_EF_UPDATE); - perf_event_update_userpage(event); -} - -/* - * Interrupt: a skeletion for reference. - */ - -static DEFINE_MUTEX(pmc_reserve_mutex); - -static irqreturn_t riscv_base_pmu_handle_irq(int irq_num, void *dev) -{ - return IRQ_NONE; -} - -static int reserve_pmc_hardware(void) -{ - int err = 0; - - mutex_lock(&pmc_reserve_mutex); - if (riscv_pmu->irq >= 0 && riscv_pmu->handle_irq) { - err = request_irq(riscv_pmu->irq, riscv_pmu->handle_irq, - IRQF_PERCPU, "riscv-base-perf", NULL); - } - mutex_unlock(&pmc_reserve_mutex); - - return err; -} - -static void release_pmc_hardware(void) -{ - mutex_lock(&pmc_reserve_mutex); - if (riscv_pmu->irq >= 0) - free_irq(riscv_pmu->irq, NULL); - mutex_unlock(&pmc_reserve_mutex); -} - -/* - * Event Initialization/Finalization - */ - -static atomic_t riscv_active_events = ATOMIC_INIT(0); - -static void riscv_event_destroy(struct perf_event *event) -{ - if (atomic_dec_return(&riscv_active_events) == 0) - release_pmc_hardware(); -} - -static int riscv_event_init(struct perf_event *event) -{ - struct perf_event_attr *attr = &event->attr; - struct hw_perf_event *hwc = &event->hw; - int err; - int code; - - if (atomic_inc_return(&riscv_active_events) == 1) { - err = reserve_pmc_hardware(); - - if (err) { - pr_warn("PMC hardware not available\n"); - atomic_dec(&riscv_active_events); - return -EBUSY; - } - } - - switch (event->attr.type) { - case PERF_TYPE_HARDWARE: - code = riscv_pmu->map_hw_event(attr->config); - break; - case PERF_TYPE_HW_CACHE: - code = riscv_pmu->map_cache_event(attr->config); - break; - case PERF_TYPE_RAW: - return -EOPNOTSUPP; - default: - return -ENOENT; - } - - event->destroy = riscv_event_destroy; - if (code < 0) { - event->destroy(event); - return code; - } - - /* - * idx is set to -1 because the index of a general event should not be - * decided until binding to some counter in pmu->add(). - * - * But since we don't have such support, later in pmu->add(), we just - * use hwc->config as the index instead. - */ - hwc->config = code; - hwc->idx = -1; - - return 0; -} - -/* - * Initialization - */ - -static struct pmu min_pmu = { - .name = "riscv-base", - .event_init = riscv_event_init, - .add = riscv_pmu_add, - .del = riscv_pmu_del, - .start = riscv_pmu_start, - .stop = riscv_pmu_stop, - .read = riscv_pmu_read, -}; - -static const struct riscv_pmu riscv_base_pmu = { - .pmu = &min_pmu, - .max_events = ARRAY_SIZE(riscv_hw_event_map), - .map_hw_event = riscv_map_hw_event, - .hw_events = riscv_hw_event_map, - .map_cache_event = riscv_map_cache_event, - .cache_events = &riscv_cache_event_map, - .counter_width = 63, - .num_counters = RISCV_BASE_COUNTERS + 0, - .handle_irq = &riscv_base_pmu_handle_irq, - - /* This means this PMU has no IRQ. */ - .irq = -1, -}; - -static const struct of_device_id riscv_pmu_of_ids[] = { - {.compatible = "riscv,base-pmu", .data = &riscv_base_pmu}, - { /* sentinel value */ } -}; - -static int __init init_hw_perf_events(void) -{ - struct device_node *node = of_find_node_by_type(NULL, "pmu"); - const struct of_device_id *of_id; - - riscv_pmu = &riscv_base_pmu; - - if (node) { - of_id = of_match_node(riscv_pmu_of_ids, node); - - if (of_id) - riscv_pmu = of_id->data; - of_node_put(node); - } - - perf_pmu_register(riscv_pmu->pmu, "cpu", PERF_TYPE_RAW); - return 0; -} -arch_initcall(init_hw_perf_events); From c631121dd16ee57ceb0d6a48e1357382b98350fd Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Fri, 18 Feb 2022 16:46:53 -0800 Subject: [PATCH 369/380] RISC-V: Add CSR encodings for all HPMCOUNTERS Linux kernel can directly read these counters as the HPMCOUNTERS CSRs are accessible in S-mode. Reviewed-by: Anup Patel Signed-off-by: Atish Patra Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/csr.h | 58 ++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index ae711692eec9..ce493df11177 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -150,9 +150,67 @@ #define CSR_CYCLE 0xc00 #define CSR_TIME 0xc01 #define CSR_INSTRET 0xc02 +#define CSR_HPMCOUNTER3 0xc03 +#define CSR_HPMCOUNTER4 0xc04 +#define CSR_HPMCOUNTER5 0xc05 +#define CSR_HPMCOUNTER6 0xc06 +#define CSR_HPMCOUNTER7 0xc07 +#define CSR_HPMCOUNTER8 0xc08 +#define CSR_HPMCOUNTER9 0xc09 +#define CSR_HPMCOUNTER10 0xc0a +#define CSR_HPMCOUNTER11 0xc0b +#define CSR_HPMCOUNTER12 0xc0c +#define CSR_HPMCOUNTER13 0xc0d +#define CSR_HPMCOUNTER14 0xc0e +#define CSR_HPMCOUNTER15 0xc0f +#define CSR_HPMCOUNTER16 0xc10 +#define CSR_HPMCOUNTER17 0xc11 +#define CSR_HPMCOUNTER18 0xc12 +#define CSR_HPMCOUNTER19 0xc13 +#define CSR_HPMCOUNTER20 0xc14 +#define CSR_HPMCOUNTER21 0xc15 +#define CSR_HPMCOUNTER22 0xc16 +#define CSR_HPMCOUNTER23 0xc17 +#define CSR_HPMCOUNTER24 0xc18 +#define CSR_HPMCOUNTER25 0xc19 +#define CSR_HPMCOUNTER26 0xc1a +#define CSR_HPMCOUNTER27 0xc1b +#define CSR_HPMCOUNTER28 0xc1c +#define CSR_HPMCOUNTER29 0xc1d +#define CSR_HPMCOUNTER30 0xc1e +#define CSR_HPMCOUNTER31 0xc1f #define CSR_CYCLEH 0xc80 #define CSR_TIMEH 0xc81 #define CSR_INSTRETH 0xc82 +#define CSR_HPMCOUNTER3H 0xc83 +#define CSR_HPMCOUNTER4H 0xc84 +#define CSR_HPMCOUNTER5H 0xc85 +#define CSR_HPMCOUNTER6H 0xc86 +#define CSR_HPMCOUNTER7H 0xc87 +#define CSR_HPMCOUNTER8H 0xc88 +#define CSR_HPMCOUNTER9H 0xc89 +#define CSR_HPMCOUNTER10H 0xc8a +#define CSR_HPMCOUNTER11H 0xc8b +#define CSR_HPMCOUNTER12H 0xc8c +#define CSR_HPMCOUNTER13H 0xc8d +#define CSR_HPMCOUNTER14H 0xc8e +#define CSR_HPMCOUNTER15H 0xc8f +#define CSR_HPMCOUNTER16H 0xc90 +#define CSR_HPMCOUNTER17H 0xc91 +#define CSR_HPMCOUNTER18H 0xc92 +#define CSR_HPMCOUNTER19H 0xc93 +#define CSR_HPMCOUNTER20H 0xc94 +#define CSR_HPMCOUNTER21H 0xc95 +#define CSR_HPMCOUNTER22H 0xc96 +#define CSR_HPMCOUNTER23H 0xc97 +#define CSR_HPMCOUNTER24H 0xc98 +#define CSR_HPMCOUNTER25H 0xc99 +#define CSR_HPMCOUNTER26H 0xc9a +#define CSR_HPMCOUNTER27H 0xc9b +#define CSR_HPMCOUNTER28H 0xc9c +#define CSR_HPMCOUNTER29H 0xc9d +#define CSR_HPMCOUNTER30H 0xc9e +#define CSR_HPMCOUNTER31H 0xc9f #define CSR_SSTATUS 0x100 #define CSR_SIE 0x104 From f5bfa23f576fdcc8e0b7fbff44cf70bd69ff9bdb Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Fri, 18 Feb 2022 16:46:54 -0800 Subject: [PATCH 370/380] RISC-V: Add a perf core library for pmu drivers Implement a perf core library that can support all the essential perf features in future. It can also accommodate any type of PMU implementation in future. Currently, both SBI based perf driver and legacy driver implemented uses the library. Most of the common perf functionalities are kept in this core library wile PMU specific driver can implement PMU specific features. For example, the SBI specific functionality will be implemented in the SBI specific driver. Reviewed-by: Anup Patel Signed-off-by: Atish Patra Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- drivers/perf/Kconfig | 10 + drivers/perf/Makefile | 1 + drivers/perf/riscv_pmu.c | 322 +++++++++++++++++++++++++++++++++ include/linux/perf/riscv_pmu.h | 65 +++++++ 4 files changed, 398 insertions(+) create mode 100644 drivers/perf/riscv_pmu.c create mode 100644 include/linux/perf/riscv_pmu.h diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index e1a0c44bc686..dbc0e3f98be9 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -56,6 +56,16 @@ config ARM_PMU Say y if you want to use CPU performance monitors on ARM-based systems. +config RISCV_PMU + depends on RISCV + bool "RISC-V PMU framework" + default y + help + Say y if you want to use CPU performance monitors on RISCV-based + systems. This provides the core PMU framework that abstracts common + PMU functionalities in a core library so that different PMU drivers + can reuse it. + config ARM_PMU_ACPI depends on ARM_PMU && ACPI def_bool y diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index 2db5418d5b0a..09082dea154b 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o obj-$(CONFIG_HISI_PMU) += hisilicon/ obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o +obj-$(CONFIG_RISCV_PMU) += riscv_pmu.o obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c new file mode 100644 index 000000000000..590a5789c128 --- /dev/null +++ b/drivers/perf/riscv_pmu.c @@ -0,0 +1,322 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * RISC-V performance counter support. + * + * Copyright (C) 2021 Western Digital Corporation or its affiliates. + * + * This implementation is based on old RISC-V perf and ARM perf event code + * which are in turn based on sparc64 and x86 code. + */ + +#include +#include +#include +#include +#include +#include + +static unsigned long csr_read_num(int csr_num) +{ +#define switchcase_csr_read(__csr_num, __val) {\ + case __csr_num: \ + __val = csr_read(__csr_num); \ + break; } +#define switchcase_csr_read_2(__csr_num, __val) {\ + switchcase_csr_read(__csr_num + 0, __val) \ + switchcase_csr_read(__csr_num + 1, __val)} +#define switchcase_csr_read_4(__csr_num, __val) {\ + switchcase_csr_read_2(__csr_num + 0, __val) \ + switchcase_csr_read_2(__csr_num + 2, __val)} +#define switchcase_csr_read_8(__csr_num, __val) {\ + switchcase_csr_read_4(__csr_num + 0, __val) \ + switchcase_csr_read_4(__csr_num + 4, __val)} +#define switchcase_csr_read_16(__csr_num, __val) {\ + switchcase_csr_read_8(__csr_num + 0, __val) \ + switchcase_csr_read_8(__csr_num + 8, __val)} +#define switchcase_csr_read_32(__csr_num, __val) {\ + switchcase_csr_read_16(__csr_num + 0, __val) \ + switchcase_csr_read_16(__csr_num + 16, __val)} + + unsigned long ret = 0; + + switch (csr_num) { + switchcase_csr_read_32(CSR_CYCLE, ret) + switchcase_csr_read_32(CSR_CYCLEH, ret) + default : + break; + } + + return ret; +#undef switchcase_csr_read_32 +#undef switchcase_csr_read_16 +#undef switchcase_csr_read_8 +#undef switchcase_csr_read_4 +#undef switchcase_csr_read_2 +#undef switchcase_csr_read +} + +/* + * Read the CSR of a corresponding counter. + */ +unsigned long riscv_pmu_ctr_read_csr(unsigned long csr) +{ + if (csr < CSR_CYCLE || csr > CSR_HPMCOUNTER31H || + (csr > CSR_HPMCOUNTER31 && csr < CSR_CYCLEH)) { + pr_err("Invalid performance counter csr %lx\n", csr); + return -EINVAL; + } + + return csr_read_num(csr); +} + +u64 riscv_pmu_ctr_get_width_mask(struct perf_event *event) +{ + int cwidth; + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + if (!rvpmu->ctr_get_width) + /** + * If the pmu driver doesn't support counter width, set it to default + * maximum allowed by the specification. + */ + cwidth = 63; + else { + if (hwc->idx == -1) + /* Handle init case where idx is not initialized yet */ + cwidth = rvpmu->ctr_get_width(0); + else + cwidth = rvpmu->ctr_get_width(hwc->idx); + } + + return GENMASK_ULL(cwidth, 0); +} + +u64 riscv_pmu_event_update(struct perf_event *event) +{ + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + u64 prev_raw_count, new_raw_count; + unsigned long cmask; + u64 oldval, delta; + + if (!rvpmu->ctr_read) + return 0; + + cmask = riscv_pmu_ctr_get_width_mask(event); + + do { + prev_raw_count = local64_read(&hwc->prev_count); + new_raw_count = rvpmu->ctr_read(event); + oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count); + } while (oldval != prev_raw_count); + + delta = (new_raw_count - prev_raw_count) & cmask; + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); + + return delta; +} + +static void riscv_pmu_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); + + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + + if (!(hwc->state & PERF_HES_STOPPED)) { + if (rvpmu->ctr_stop) { + rvpmu->ctr_stop(event, 0); + hwc->state |= PERF_HES_STOPPED; + } + riscv_pmu_event_update(event); + hwc->state |= PERF_HES_UPTODATE; + } +} + +int riscv_pmu_event_set_period(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + s64 left = local64_read(&hwc->period_left); + s64 period = hwc->sample_period; + int overflow = 0; + uint64_t max_period = riscv_pmu_ctr_get_width_mask(event); + + if (unlikely(left <= -period)) { + left = period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + overflow = 1; + } + + if (unlikely(left <= 0)) { + left += period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + overflow = 1; + } + + /* + * Limit the maximum period to prevent the counter value + * from overtaking the one we are about to program. In + * effect we are reducing max_period to account for + * interrupt latency (and we are being very conservative). + */ + if (left > (max_period >> 1)) + left = (max_period >> 1); + + local64_set(&hwc->prev_count, (u64)-left); + perf_event_update_userpage(event); + + return overflow; +} + +static void riscv_pmu_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); + uint64_t max_period = riscv_pmu_ctr_get_width_mask(event); + u64 init_val; + + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + + hwc->state = 0; + riscv_pmu_event_set_period(event); + init_val = local64_read(&hwc->prev_count) & max_period; + rvpmu->ctr_start(event, init_val); + perf_event_update_userpage(event); +} + +static int riscv_pmu_add(struct perf_event *event, int flags) +{ + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); + struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx; + + idx = rvpmu->ctr_get_idx(event); + if (idx < 0) + return idx; + + hwc->idx = idx; + cpuc->events[idx] = event; + cpuc->n_events++; + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (flags & PERF_EF_START) + riscv_pmu_start(event, PERF_EF_RELOAD); + + /* Propagate our changes to the userspace mapping. */ + perf_event_update_userpage(event); + + return 0; +} + +static void riscv_pmu_del(struct perf_event *event, int flags) +{ + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); + struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); + struct hw_perf_event *hwc = &event->hw; + + riscv_pmu_stop(event, PERF_EF_UPDATE); + cpuc->events[hwc->idx] = NULL; + /* The firmware need to reset the counter mapping */ + if (rvpmu->ctr_stop) + rvpmu->ctr_stop(event, RISCV_PMU_STOP_FLAG_RESET); + cpuc->n_events--; + if (rvpmu->ctr_clear_idx) + rvpmu->ctr_clear_idx(event); + perf_event_update_userpage(event); + hwc->idx = -1; +} + +static void riscv_pmu_read(struct perf_event *event) +{ + riscv_pmu_event_update(event); +} + +static int riscv_pmu_event_init(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); + int mapped_event; + u64 event_config = 0; + uint64_t cmask; + + hwc->flags = 0; + mapped_event = rvpmu->event_map(event, &event_config); + if (mapped_event < 0) { + pr_debug("event %x:%llx not supported\n", event->attr.type, + event->attr.config); + return mapped_event; + } + + /* + * idx is set to -1 because the index of a general event should not be + * decided until binding to some counter in pmu->add(). + * config will contain the information about counter CSR + * the idx will contain the counter index + */ + hwc->config = event_config; + hwc->idx = -1; + hwc->event_base = mapped_event; + + if (!is_sampling_event(event)) { + /* + * For non-sampling runs, limit the sample_period to half + * of the counter width. That way, the new counter value + * is far less likely to overtake the previous one unless + * you have some serious IRQ latency issues. + */ + cmask = riscv_pmu_ctr_get_width_mask(event); + hwc->sample_period = cmask >> 1; + hwc->last_period = hwc->sample_period; + local64_set(&hwc->period_left, hwc->sample_period); + } + + return 0; +} + +struct riscv_pmu *riscv_pmu_alloc(void) +{ + struct riscv_pmu *pmu; + int cpuid, i; + struct cpu_hw_events *cpuc; + + pmu = kzalloc(sizeof(*pmu), GFP_KERNEL); + if (!pmu) + goto out; + + pmu->hw_events = alloc_percpu_gfp(struct cpu_hw_events, GFP_KERNEL); + if (!pmu->hw_events) { + pr_info("failed to allocate per-cpu PMU data.\n"); + goto out_free_pmu; + } + + for_each_possible_cpu(cpuid) { + cpuc = per_cpu_ptr(pmu->hw_events, cpuid); + cpuc->n_events = 0; + for (i = 0; i < RISCV_MAX_COUNTERS; i++) + cpuc->events[i] = NULL; + } + pmu->pmu = (struct pmu) { + .event_init = riscv_pmu_event_init, + .add = riscv_pmu_add, + .del = riscv_pmu_del, + .start = riscv_pmu_start, + .stop = riscv_pmu_stop, + .read = riscv_pmu_read, + }; + + return pmu; + +out_free_pmu: + kfree(pmu); +out: + return NULL; +} diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h new file mode 100644 index 000000000000..0d8979765d79 --- /dev/null +++ b/include/linux/perf/riscv_pmu.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 SiFive + * Copyright (C) 2018 Andes Technology Corporation + * Copyright (C) 2021 Western Digital Corporation or its affiliates. + * + */ + +#ifndef _ASM_RISCV_PERF_EVENT_H +#define _ASM_RISCV_PERF_EVENT_H + +#include +#include +#include + +#ifdef CONFIG_RISCV_PMU + +/* + * The RISCV_MAX_COUNTERS parameter should be specified. + */ + +#define RISCV_MAX_COUNTERS 64 +#define RISCV_OP_UNSUPP (-EOPNOTSUPP) +#define RISCV_PMU_PDEV_NAME "riscv-pmu" + +#define RISCV_PMU_STOP_FLAG_RESET 1 + +struct cpu_hw_events { + /* currently enabled events */ + int n_events; + /* currently enabled events */ + struct perf_event *events[RISCV_MAX_COUNTERS]; + /* currently enabled counters */ + DECLARE_BITMAP(used_event_ctrs, RISCV_MAX_COUNTERS); +}; + +struct riscv_pmu { + struct pmu pmu; + char *name; + + irqreturn_t (*handle_irq)(int irq_num, void *dev); + + int num_counters; + u64 (*ctr_read)(struct perf_event *event); + int (*ctr_get_idx)(struct perf_event *event); + int (*ctr_get_width)(int idx); + void (*ctr_clear_idx)(struct perf_event *event); + void (*ctr_start)(struct perf_event *event, u64 init_val); + void (*ctr_stop)(struct perf_event *event, unsigned long flag); + int (*event_map)(struct perf_event *event, u64 *config); + + struct cpu_hw_events __percpu *hw_events; + struct hlist_node node; +}; + +#define to_riscv_pmu(p) (container_of(p, struct riscv_pmu, pmu)) +unsigned long riscv_pmu_ctr_read_csr(unsigned long csr); +int riscv_pmu_event_set_period(struct perf_event *event); +uint64_t riscv_pmu_ctr_get_width_mask(struct perf_event *event); +u64 riscv_pmu_event_update(struct perf_event *event); +struct riscv_pmu *riscv_pmu_alloc(void); + +#endif /* CONFIG_RISCV_PMU */ + +#endif /* _ASM_RISCV_PERF_EVENT_H */ From 9b3e150e310ee71d7bae1e31c38a300cfa5e951b Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Fri, 18 Feb 2022 16:46:55 -0800 Subject: [PATCH 371/380] RISC-V: Add a simple platform driver for RISC-V legacy perf The old RISC-V perf implementation allowed counting of only cycle/instruction counters using perf. Restore that feature by implementing a simple platform driver under a separate config to provide backward compatibility. Any existing software stack will continue to work as it is. However, it provides an easy way out in future where we can remove the legacy driver. Reviewed-by: Anup Patel Signed-off-by: Atish Patra Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- drivers/perf/Kconfig | 10 +++ drivers/perf/Makefile | 1 + drivers/perf/riscv_pmu_legacy.c | 142 ++++++++++++++++++++++++++++++++ include/linux/perf/riscv_pmu.h | 6 ++ 4 files changed, 159 insertions(+) create mode 100644 drivers/perf/riscv_pmu_legacy.c diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index dbc0e3f98be9..386162ad858a 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -66,6 +66,16 @@ config RISCV_PMU PMU functionalities in a core library so that different PMU drivers can reuse it. +config RISCV_PMU_LEGACY + depends on RISCV_PMU + bool "RISC-V legacy PMU implementation" + default y + help + Say y if you want to use the legacy CPU performance monitor + implementation on RISC-V based systems. This only allows counting + of cycle/instruction counter and doesn't support counter overflow, + or programmable counters. It will be removed in future. + config ARM_PMU_ACPI depends on ARM_PMU && ACPI def_bool y diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index 09082dea154b..c3d3268d495b 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_HISI_PMU) += hisilicon/ obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o obj-$(CONFIG_RISCV_PMU) += riscv_pmu.o +obj-$(CONFIG_RISCV_PMU_LEGACY) += riscv_pmu_legacy.o obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o diff --git a/drivers/perf/riscv_pmu_legacy.c b/drivers/perf/riscv_pmu_legacy.c new file mode 100644 index 000000000000..342778782359 --- /dev/null +++ b/drivers/perf/riscv_pmu_legacy.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * RISC-V performance counter support. + * + * Copyright (C) 2021 Western Digital Corporation or its affiliates. + * + * This implementation is based on old RISC-V perf and ARM perf event code + * which are in turn based on sparc64 and x86 code. + */ + +#include +#include +#include + +#define RISCV_PMU_LEGACY_CYCLE 0 +#define RISCV_PMU_LEGACY_INSTRET 1 +#define RISCV_PMU_LEGACY_NUM_CTR 2 + +static bool pmu_init_done; + +static int pmu_legacy_ctr_get_idx(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + + if (event->attr.type != PERF_TYPE_HARDWARE) + return -EOPNOTSUPP; + if (attr->config == PERF_COUNT_HW_CPU_CYCLES) + return RISCV_PMU_LEGACY_CYCLE; + else if (attr->config == PERF_COUNT_HW_INSTRUCTIONS) + return RISCV_PMU_LEGACY_INSTRET; + else + return -EOPNOTSUPP; +} + +/* For legacy config & counter index are same */ +static int pmu_legacy_event_map(struct perf_event *event, u64 *config) +{ + return pmu_legacy_ctr_get_idx(event); +} + +static u64 pmu_legacy_read_ctr(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + u64 val; + + if (idx == RISCV_PMU_LEGACY_CYCLE) { + val = riscv_pmu_ctr_read_csr(CSR_CYCLE); + if (IS_ENABLED(CONFIG_32BIT)) + val = (u64)riscv_pmu_ctr_read_csr(CSR_CYCLEH) << 32 | val; + } else if (idx == RISCV_PMU_LEGACY_INSTRET) { + val = riscv_pmu_ctr_read_csr(CSR_INSTRET); + if (IS_ENABLED(CONFIG_32BIT)) + val = ((u64)riscv_pmu_ctr_read_csr(CSR_INSTRETH)) << 32 | val; + } else + return 0; + + return val; +} + +static void pmu_legacy_ctr_start(struct perf_event *event, u64 ival) +{ + struct hw_perf_event *hwc = &event->hw; + u64 initial_val = pmu_legacy_read_ctr(event); + + /** + * The legacy method doesn't really have a start/stop method. + * It also can not update the counter with a initial value. + * But we still need to set the prev_count so that read() can compute + * the delta. Just use the current counter value to set the prev_count. + */ + local64_set(&hwc->prev_count, initial_val); +} + +/** + * This is just a simple implementation to allow legacy implementations + * compatible with new RISC-V PMU driver framework. + * This driver only allows reading two counters i.e CYCLE & INSTRET. + * However, it can not start or stop the counter. Thus, it is not very useful + * will be removed in future. + */ +static void pmu_legacy_init(struct riscv_pmu *pmu) +{ + pr_info("Legacy PMU implementation is available\n"); + + pmu->num_counters = RISCV_PMU_LEGACY_NUM_CTR; + pmu->ctr_start = pmu_legacy_ctr_start; + pmu->ctr_stop = NULL; + pmu->event_map = pmu_legacy_event_map; + pmu->ctr_get_idx = pmu_legacy_ctr_get_idx; + pmu->ctr_get_width = NULL; + pmu->ctr_clear_idx = NULL; + pmu->ctr_read = pmu_legacy_read_ctr; + + perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW); +} + +static int pmu_legacy_device_probe(struct platform_device *pdev) +{ + struct riscv_pmu *pmu = NULL; + + pmu = riscv_pmu_alloc(); + if (!pmu) + return -ENOMEM; + pmu_legacy_init(pmu); + + return 0; +} + +static struct platform_driver pmu_legacy_driver = { + .probe = pmu_legacy_device_probe, + .driver = { + .name = RISCV_PMU_LEGACY_PDEV_NAME, + }, +}; + +static int __init riscv_pmu_legacy_devinit(void) +{ + int ret; + struct platform_device *pdev; + + if (likely(pmu_init_done)) + return 0; + + ret = platform_driver_register(&pmu_legacy_driver); + if (ret) + return ret; + + pdev = platform_device_register_simple(RISCV_PMU_LEGACY_PDEV_NAME, -1, NULL, 0); + if (IS_ERR(pdev)) { + platform_driver_unregister(&pmu_legacy_driver); + return PTR_ERR(pdev); + } + + return ret; +} +late_initcall(riscv_pmu_legacy_devinit); + +void riscv_pmu_legacy_skip_init(void) +{ + pmu_init_done = true; +} diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h index 0d8979765d79..9140c491fc54 100644 --- a/include/linux/perf/riscv_pmu.h +++ b/include/linux/perf/riscv_pmu.h @@ -22,6 +22,7 @@ #define RISCV_MAX_COUNTERS 64 #define RISCV_OP_UNSUPP (-EOPNOTSUPP) #define RISCV_PMU_PDEV_NAME "riscv-pmu" +#define RISCV_PMU_LEGACY_PDEV_NAME "riscv-pmu-legacy" #define RISCV_PMU_STOP_FLAG_RESET 1 @@ -58,6 +59,11 @@ unsigned long riscv_pmu_ctr_read_csr(unsigned long csr); int riscv_pmu_event_set_period(struct perf_event *event); uint64_t riscv_pmu_ctr_get_width_mask(struct perf_event *event); u64 riscv_pmu_event_update(struct perf_event *event); +#ifdef CONFIG_RISCV_PMU_LEGACY +void riscv_pmu_legacy_skip_init(void); +#else +static inline void riscv_pmu_legacy_skip_init(void) {}; +#endif struct riscv_pmu *riscv_pmu_alloc(void); #endif /* CONFIG_RISCV_PMU */ From 90beae5185c260074db409241247630036cf93a0 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Fri, 18 Feb 2022 16:46:56 -0800 Subject: [PATCH 372/380] RISC-V: Add RISC-V SBI PMU extension definitions This patch adds all the definitions defined by the SBI PMU extension. Reviewed-by: Anup Patel Signed-off-by: Atish Patra Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/sbi.h | 95 ++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index d1c37479d828..4a430ae60eaa 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -29,6 +29,7 @@ enum sbi_ext_id { SBI_EXT_RFENCE = 0x52464E43, SBI_EXT_HSM = 0x48534D, SBI_EXT_SRST = 0x53525354, + SBI_EXT_PMU = 0x504D55, /* Experimentals extensions must lie within this range */ SBI_EXT_EXPERIMENTAL_START = 0x08000000, @@ -95,6 +96,98 @@ enum sbi_srst_reset_reason { SBI_SRST_RESET_REASON_SYS_FAILURE, }; +enum sbi_ext_pmu_fid { + SBI_EXT_PMU_NUM_COUNTERS = 0, + SBI_EXT_PMU_COUNTER_GET_INFO, + SBI_EXT_PMU_COUNTER_CFG_MATCH, + SBI_EXT_PMU_COUNTER_START, + SBI_EXT_PMU_COUNTER_STOP, + SBI_EXT_PMU_COUNTER_FW_READ, +}; + +#define RISCV_PMU_RAW_EVENT_MASK GENMASK_ULL(55, 0) +#define RISCV_PMU_RAW_EVENT_IDX 0x20000 + +/** General pmu event codes specified in SBI PMU extension */ +enum sbi_pmu_hw_generic_events_t { + SBI_PMU_HW_NO_EVENT = 0, + SBI_PMU_HW_CPU_CYCLES = 1, + SBI_PMU_HW_INSTRUCTIONS = 2, + SBI_PMU_HW_CACHE_REFERENCES = 3, + SBI_PMU_HW_CACHE_MISSES = 4, + SBI_PMU_HW_BRANCH_INSTRUCTIONS = 5, + SBI_PMU_HW_BRANCH_MISSES = 6, + SBI_PMU_HW_BUS_CYCLES = 7, + SBI_PMU_HW_STALLED_CYCLES_FRONTEND = 8, + SBI_PMU_HW_STALLED_CYCLES_BACKEND = 9, + SBI_PMU_HW_REF_CPU_CYCLES = 10, + + SBI_PMU_HW_GENERAL_MAX, +}; + +/** + * Special "firmware" events provided by the firmware, even if the hardware + * does not support performance events. These events are encoded as a raw + * event type in Linux kernel perf framework. + */ +enum sbi_pmu_fw_generic_events_t { + SBI_PMU_FW_MISALIGNED_LOAD = 0, + SBI_PMU_FW_MISALIGNED_STORE = 1, + SBI_PMU_FW_ACCESS_LOAD = 2, + SBI_PMU_FW_ACCESS_STORE = 3, + SBI_PMU_FW_ILLEGAL_INSN = 4, + SBI_PMU_FW_SET_TIMER = 5, + SBI_PMU_FW_IPI_SENT = 6, + SBI_PMU_FW_IPI_RECVD = 7, + SBI_PMU_FW_FENCE_I_SENT = 8, + SBI_PMU_FW_FENCE_I_RECVD = 9, + SBI_PMU_FW_SFENCE_VMA_SENT = 10, + SBI_PMU_FW_SFENCE_VMA_RCVD = 11, + SBI_PMU_FW_SFENCE_VMA_ASID_SENT = 12, + SBI_PMU_FW_SFENCE_VMA_ASID_RCVD = 13, + + SBI_PMU_FW_HFENCE_GVMA_SENT = 14, + SBI_PMU_FW_HFENCE_GVMA_RCVD = 15, + SBI_PMU_FW_HFENCE_GVMA_VMID_SENT = 16, + SBI_PMU_FW_HFENCE_GVMA_VMID_RCVD = 17, + + SBI_PMU_FW_HFENCE_VVMA_SENT = 18, + SBI_PMU_FW_HFENCE_VVMA_RCVD = 19, + SBI_PMU_FW_HFENCE_VVMA_ASID_SENT = 20, + SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD = 21, + SBI_PMU_FW_MAX, +}; + +/* SBI PMU event types */ +enum sbi_pmu_event_type { + SBI_PMU_EVENT_TYPE_HW = 0x0, + SBI_PMU_EVENT_TYPE_CACHE = 0x1, + SBI_PMU_EVENT_TYPE_RAW = 0x2, + SBI_PMU_EVENT_TYPE_FW = 0xf, +}; + +/* SBI PMU event types */ +enum sbi_pmu_ctr_type { + SBI_PMU_CTR_TYPE_HW = 0x0, + SBI_PMU_CTR_TYPE_FW, +}; + +/* Flags defined for config matching function */ +#define SBI_PMU_CFG_FLAG_SKIP_MATCH (1 << 0) +#define SBI_PMU_CFG_FLAG_CLEAR_VALUE (1 << 1) +#define SBI_PMU_CFG_FLAG_AUTO_START (1 << 2) +#define SBI_PMU_CFG_FLAG_SET_VUINH (1 << 3) +#define SBI_PMU_CFG_FLAG_SET_VSNH (1 << 4) +#define SBI_PMU_CFG_FLAG_SET_UINH (1 << 5) +#define SBI_PMU_CFG_FLAG_SET_SINH (1 << 6) +#define SBI_PMU_CFG_FLAG_SET_MINH (1 << 7) + +/* Flags defined for counter start function */ +#define SBI_PMU_START_FLAG_SET_INIT_VALUE (1 << 0) + +/* Flags defined for counter stop function */ +#define SBI_PMU_STOP_FLAG_RESET (1 << 0) + #define SBI_SPEC_VERSION_DEFAULT 0x1 #define SBI_SPEC_VERSION_MAJOR_SHIFT 24 #define SBI_SPEC_VERSION_MAJOR_MASK 0x7f @@ -108,6 +201,8 @@ enum sbi_srst_reset_reason { #define SBI_ERR_DENIED -4 #define SBI_ERR_INVALID_ADDRESS -5 #define SBI_ERR_ALREADY_AVAILABLE -6 +#define SBI_ERR_ALREADY_STARTED -7 +#define SBI_ERR_ALREADY_STOPPED -8 extern unsigned long sbi_spec_version; struct sbiret { From e9991434596f5373dfd75857b445eb92a9253c56 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Fri, 18 Feb 2022 16:46:57 -0800 Subject: [PATCH 373/380] RISC-V: Add perf platform driver based on SBI PMU extension RISC-V SBI specification added a PMU extension that allows to configure start/stop any pmu counter. The RISC-V perf can use most of the generic perf features except interrupt overflow and event filtering based on privilege mode which will be added in future. It also allows to monitor a handful of firmware counters that can provide insights into firmware activity during a performance analysis. Signed-off-by: Atish Patra Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- drivers/perf/Kconfig | 10 + drivers/perf/Makefile | 1 + drivers/perf/riscv_pmu.c | 2 + drivers/perf/riscv_pmu_sbi.c | 578 +++++++++++++++++++++++++++++++++ include/linux/cpuhotplug.h | 1 + include/linux/perf/riscv_pmu.h | 6 +- 6 files changed, 596 insertions(+), 2 deletions(-) create mode 100644 drivers/perf/riscv_pmu_sbi.c diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 386162ad858a..5645b5615c14 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -76,6 +76,16 @@ config RISCV_PMU_LEGACY of cycle/instruction counter and doesn't support counter overflow, or programmable counters. It will be removed in future. +config RISCV_PMU_SBI + depends on RISCV_PMU && RISCV_SBI + bool "RISC-V PMU based on SBI PMU extension" + default y + help + Say y if you want to use the CPU performance monitor + using SBI PMU extension on RISC-V based systems. This option provides + full perf feature support i.e. counter overflow, privilege mode + filtering, counter configuration. + config ARM_PMU_ACPI depends on ARM_PMU && ACPI def_bool y diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index c3d3268d495b..f149735166e1 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o obj-$(CONFIG_RISCV_PMU) += riscv_pmu.o obj-$(CONFIG_RISCV_PMU_LEGACY) += riscv_pmu_legacy.o +obj-$(CONFIG_RISCV_PMU_SBI) += riscv_pmu_sbi.o obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c index 590a5789c128..b2b8d2074ed0 100644 --- a/drivers/perf/riscv_pmu.c +++ b/drivers/perf/riscv_pmu.c @@ -15,6 +15,8 @@ #include #include +#include + static unsigned long csr_read_num(int csr_num) { #define switchcase_csr_read(__csr_num, __val) {\ diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c new file mode 100644 index 000000000000..815d5c509c64 --- /dev/null +++ b/drivers/perf/riscv_pmu_sbi.c @@ -0,0 +1,578 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * RISC-V performance counter support. + * + * Copyright (C) 2021 Western Digital Corporation or its affiliates. + * + * This code is based on ARM perf event code which is in turn based on + * sparc64 and x86 code. + */ + +#define pr_fmt(fmt) "riscv-pmu-sbi: " fmt + +#include +#include +#include + +#include + +union sbi_pmu_ctr_info { + unsigned long value; + struct { + unsigned long csr:12; + unsigned long width:6; +#if __riscv_xlen == 32 + unsigned long reserved:13; +#else + unsigned long reserved:45; +#endif + unsigned long type:1; + }; +}; + +/** + * RISC-V doesn't have hetergenous harts yet. This need to be part of + * per_cpu in case of harts with different pmu counters + */ +static union sbi_pmu_ctr_info *pmu_ctr_list; + +struct sbi_pmu_event_data { + union { + union { + struct hw_gen_event { + uint32_t event_code:16; + uint32_t event_type:4; + uint32_t reserved:12; + } hw_gen_event; + struct hw_cache_event { + uint32_t result_id:1; + uint32_t op_id:2; + uint32_t cache_id:13; + uint32_t event_type:4; + uint32_t reserved:12; + } hw_cache_event; + }; + uint32_t event_idx; + }; +}; + +static const struct sbi_pmu_event_data pmu_hw_event_map[] = { + [PERF_COUNT_HW_CPU_CYCLES] = {.hw_gen_event = { + SBI_PMU_HW_CPU_CYCLES, + SBI_PMU_EVENT_TYPE_HW, 0}}, + [PERF_COUNT_HW_INSTRUCTIONS] = {.hw_gen_event = { + SBI_PMU_HW_INSTRUCTIONS, + SBI_PMU_EVENT_TYPE_HW, 0}}, + [PERF_COUNT_HW_CACHE_REFERENCES] = {.hw_gen_event = { + SBI_PMU_HW_CACHE_REFERENCES, + SBI_PMU_EVENT_TYPE_HW, 0}}, + [PERF_COUNT_HW_CACHE_MISSES] = {.hw_gen_event = { + SBI_PMU_HW_CACHE_MISSES, + SBI_PMU_EVENT_TYPE_HW, 0}}, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = {.hw_gen_event = { + SBI_PMU_HW_BRANCH_INSTRUCTIONS, + SBI_PMU_EVENT_TYPE_HW, 0}}, + [PERF_COUNT_HW_BRANCH_MISSES] = {.hw_gen_event = { + SBI_PMU_HW_BRANCH_MISSES, + SBI_PMU_EVENT_TYPE_HW, 0}}, + [PERF_COUNT_HW_BUS_CYCLES] = {.hw_gen_event = { + SBI_PMU_HW_BUS_CYCLES, + SBI_PMU_EVENT_TYPE_HW, 0}}, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = {.hw_gen_event = { + SBI_PMU_HW_STALLED_CYCLES_FRONTEND, + SBI_PMU_EVENT_TYPE_HW, 0}}, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = {.hw_gen_event = { + SBI_PMU_HW_STALLED_CYCLES_BACKEND, + SBI_PMU_EVENT_TYPE_HW, 0}}, + [PERF_COUNT_HW_REF_CPU_CYCLES] = {.hw_gen_event = { + SBI_PMU_HW_REF_CPU_CYCLES, + SBI_PMU_EVENT_TYPE_HW, 0}}, +}; + +#define C(x) PERF_COUNT_HW_CACHE_##x +static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX] +[PERF_COUNT_HW_CACHE_OP_MAX] +[PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_READ), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_READ), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_WRITE), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_WRITE), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_PREFETCH), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_PREFETCH), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_READ), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), C(OP_READ), + C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_WRITE), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_WRITE), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_PREFETCH), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_PREFETCH), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_READ), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_READ), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_WRITE), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_WRITE), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_PREFETCH), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_PREFETCH), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_READ), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_READ), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_WRITE), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_WRITE), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_PREFETCH), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_PREFETCH), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_READ), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_READ), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_WRITE), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_WRITE), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_PREFETCH), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_PREFETCH), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_READ), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_READ), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_WRITE), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_WRITE), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_PREFETCH), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_PREFETCH), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + }, + [C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_READ), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_READ), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_WRITE), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_WRITE), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_PREFETCH), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_PREFETCH), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + }, +}; + +static int pmu_sbi_ctr_get_width(int idx) +{ + return pmu_ctr_list[idx].width; +} + +static bool pmu_sbi_ctr_is_fw(int cidx) +{ + union sbi_pmu_ctr_info *info; + + info = &pmu_ctr_list[cidx]; + if (!info) + return false; + + return (info->type == SBI_PMU_CTR_TYPE_FW) ? true : false; +} + +static int pmu_sbi_ctr_get_idx(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); + struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); + struct sbiret ret; + int idx; + uint64_t cbase = 0; + uint64_t cmask = GENMASK_ULL(rvpmu->num_counters - 1, 0); + unsigned long cflags = 0; + + if (event->attr.exclude_kernel) + cflags |= SBI_PMU_CFG_FLAG_SET_SINH; + if (event->attr.exclude_user) + cflags |= SBI_PMU_CFG_FLAG_SET_UINH; + + /* retrieve the available counter index */ + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase, cmask, + cflags, hwc->event_base, hwc->config, 0); + if (ret.error) { + pr_debug("Not able to find a counter for event %lx config %llx\n", + hwc->event_base, hwc->config); + return sbi_err_map_linux_errno(ret.error); + } + + idx = ret.value; + if (idx >= rvpmu->num_counters || !pmu_ctr_list[idx].value) + return -ENOENT; + + /* Additional sanity check for the counter id */ + if (pmu_sbi_ctr_is_fw(idx)) { + if (!test_and_set_bit(idx, cpuc->used_fw_ctrs)) + return idx; + } else { + if (!test_and_set_bit(idx, cpuc->used_hw_ctrs)) + return idx; + } + + return -ENOENT; +} + +static void pmu_sbi_ctr_clear_idx(struct perf_event *event) +{ + + struct hw_perf_event *hwc = &event->hw; + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); + struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); + int idx = hwc->idx; + + if (pmu_sbi_ctr_is_fw(idx)) + clear_bit(idx, cpuc->used_fw_ctrs); + else + clear_bit(idx, cpuc->used_hw_ctrs); +} + +static int pmu_event_find_cache(u64 config) +{ + unsigned int cache_type, cache_op, cache_result, ret; + + cache_type = (config >> 0) & 0xff; + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) + return -EINVAL; + + cache_op = (config >> 8) & 0xff; + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) + return -EINVAL; + + cache_result = (config >> 16) & 0xff; + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + ret = pmu_cache_event_map[cache_type][cache_op][cache_result].event_idx; + + return ret; +} + +static bool pmu_sbi_is_fw_event(struct perf_event *event) +{ + u32 type = event->attr.type; + u64 config = event->attr.config; + + if ((type == PERF_TYPE_RAW) && ((config >> 63) == 1)) + return true; + else + return false; +} + +static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig) +{ + u32 type = event->attr.type; + u64 config = event->attr.config; + int bSoftware; + u64 raw_config_val; + int ret; + + switch (type) { + case PERF_TYPE_HARDWARE: + if (config >= PERF_COUNT_HW_MAX) + return -EINVAL; + ret = pmu_hw_event_map[event->attr.config].event_idx; + break; + case PERF_TYPE_HW_CACHE: + ret = pmu_event_find_cache(config); + break; + case PERF_TYPE_RAW: + /* + * As per SBI specification, the upper 16 bits must be unused for + * a raw event. Use the MSB (63b) to distinguish between hardware + * raw event and firmware events. + */ + bSoftware = config >> 63; + raw_config_val = config & RISCV_PMU_RAW_EVENT_MASK; + if (bSoftware) { + if (raw_config_val < SBI_PMU_FW_MAX) + ret = (raw_config_val & 0xFFFF) | + (SBI_PMU_EVENT_TYPE_FW << 16); + else + return -EINVAL; + } else { + ret = RISCV_PMU_RAW_EVENT_IDX; + *econfig = raw_config_val; + } + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static u64 pmu_sbi_ctr_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + struct sbiret ret; + union sbi_pmu_ctr_info info; + u64 val = 0; + + if (pmu_sbi_is_fw_event(event)) { + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ, + hwc->idx, 0, 0, 0, 0, 0); + if (!ret.error) + val = ret.value; + } else { + info = pmu_ctr_list[idx]; + val = riscv_pmu_ctr_read_csr(info.csr); + if (IS_ENABLED(CONFIG_32BIT)) + val = ((u64)riscv_pmu_ctr_read_csr(info.csr + 0x80)) << 31 | val; + } + + return val; +} + +static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival) +{ + struct sbiret ret; + struct hw_perf_event *hwc = &event->hw; + unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, hwc->idx, + 1, flag, ival, ival >> 32, 0); + if (ret.error && (ret.error != SBI_ERR_ALREADY_STARTED)) + pr_err("Starting counter idx %d failed with error %d\n", + hwc->idx, sbi_err_map_linux_errno(ret.error)); +} + +static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag) +{ + struct sbiret ret; + struct hw_perf_event *hwc = &event->hw; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, hwc->idx, 1, flag, 0, 0, 0); + if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) && + flag != SBI_PMU_STOP_FLAG_RESET) + pr_err("Stopping counter idx %d failed with error %d\n", + hwc->idx, sbi_err_map_linux_errno(ret.error)); +} + +static int pmu_sbi_find_num_ctrs(void) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_NUM_COUNTERS, 0, 0, 0, 0, 0, 0); + if (!ret.error) + return ret.value; + else + return sbi_err_map_linux_errno(ret.error); +} + +static int pmu_sbi_get_ctrinfo(int nctr) +{ + struct sbiret ret; + int i, num_hw_ctr = 0, num_fw_ctr = 0; + union sbi_pmu_ctr_info cinfo; + + pmu_ctr_list = kcalloc(nctr, sizeof(*pmu_ctr_list), GFP_KERNEL); + if (!pmu_ctr_list) + return -ENOMEM; + + for (i = 0; i <= nctr; i++) { + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i, 0, 0, 0, 0, 0); + if (ret.error) + /* The logical counter ids are not expected to be contiguous */ + continue; + cinfo.value = ret.value; + if (cinfo.type == SBI_PMU_CTR_TYPE_FW) + num_fw_ctr++; + else + num_hw_ctr++; + pmu_ctr_list[i].value = cinfo.value; + } + + pr_info("%d firmware and %d hardware counters\n", num_fw_ctr, num_hw_ctr); + + return 0; +} + +static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct riscv_pmu *pmu = hlist_entry_safe(node, struct riscv_pmu, node); + + /* Enable the access for TIME csr only from the user mode now */ + csr_write(CSR_SCOUNTEREN, 0x2); + + /* Stop all the counters so that they can be enabled from perf */ + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, + 0, GENMASK_ULL(pmu->num_counters - 1, 0), 0, 0, 0, 0); + + return 0; +} + +static int pmu_sbi_dying_cpu(unsigned int cpu, struct hlist_node *node) +{ + /* Disable all counters access for user mode now */ + csr_write(CSR_SCOUNTEREN, 0x0); + + return 0; +} + +static int pmu_sbi_device_probe(struct platform_device *pdev) +{ + struct riscv_pmu *pmu = NULL; + int num_counters; + int ret; + + pr_info("SBI PMU extension is available\n"); + pmu = riscv_pmu_alloc(); + if (!pmu) + return -ENOMEM; + + num_counters = pmu_sbi_find_num_ctrs(); + if (num_counters < 0) { + pr_err("SBI PMU extension doesn't provide any counters\n"); + return -ENODEV; + } + + /* cache all the information about counters now */ + if (pmu_sbi_get_ctrinfo(num_counters)) + return -ENODEV; + + pmu->num_counters = num_counters; + pmu->ctr_start = pmu_sbi_ctr_start; + pmu->ctr_stop = pmu_sbi_ctr_stop; + pmu->event_map = pmu_sbi_event_map; + pmu->ctr_get_idx = pmu_sbi_ctr_get_idx; + pmu->ctr_get_width = pmu_sbi_ctr_get_width; + pmu->ctr_clear_idx = pmu_sbi_ctr_clear_idx; + pmu->ctr_read = pmu_sbi_ctr_read; + + ret = cpuhp_state_add_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node); + if (ret) + return ret; + + ret = perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW); + if (ret) { + cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node); + return ret; + } + + return 0; +} + +static struct platform_driver pmu_sbi_driver = { + .probe = pmu_sbi_device_probe, + .driver = { + .name = RISCV_PMU_PDEV_NAME, + }, +}; + +static int __init pmu_sbi_devinit(void) +{ + int ret; + struct platform_device *pdev; + + if (sbi_spec_version < sbi_mk_version(0, 3) || + sbi_probe_extension(SBI_EXT_PMU) <= 0) { + return 0; + } + + ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_RISCV_STARTING, + "perf/riscv/pmu:starting", + pmu_sbi_starting_cpu, pmu_sbi_dying_cpu); + if (ret) { + pr_err("CPU hotplug notifier could not be registered: %d\n", + ret); + return ret; + } + + ret = platform_driver_register(&pmu_sbi_driver); + if (ret) + return ret; + + pdev = platform_device_register_simple(RISCV_PMU_PDEV_NAME, -1, NULL, 0); + if (IS_ERR(pdev)) { + platform_driver_unregister(&pmu_sbi_driver); + return PTR_ERR(pdev); + } + + /* Notify legacy implementation that SBI pmu is available*/ + riscv_pmu_legacy_skip_init(); + + return ret; +} +device_initcall(pmu_sbi_devinit) diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 411a428ace4d..51c7e4929df7 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -165,6 +165,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING, CPUHP_AP_PERF_ARM_ACPI_STARTING, CPUHP_AP_PERF_ARM_STARTING, + CPUHP_AP_PERF_RISCV_STARTING, CPUHP_AP_ARM_L2X0_STARTING, CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING, CPUHP_AP_ARM_ARCH_TIMER_STARTING, diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h index 9140c491fc54..0f226948c0ca 100644 --- a/include/linux/perf/riscv_pmu.h +++ b/include/linux/perf/riscv_pmu.h @@ -31,8 +31,10 @@ struct cpu_hw_events { int n_events; /* currently enabled events */ struct perf_event *events[RISCV_MAX_COUNTERS]; - /* currently enabled counters */ - DECLARE_BITMAP(used_event_ctrs, RISCV_MAX_COUNTERS); + /* currently enabled hardware counters */ + DECLARE_BITMAP(used_hw_ctrs, RISCV_MAX_COUNTERS); + /* currently enabled firmware counters */ + DECLARE_BITMAP(used_fw_ctrs, RISCV_MAX_COUNTERS); }; struct riscv_pmu { From 4905ec2fb7e6421c14c9fb7276f5aa92f60f2b98 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Fri, 18 Feb 2022 16:46:58 -0800 Subject: [PATCH 374/380] RISC-V: Add sscofpmf extension support The sscofpmf extension allows counter overflow and filtering for programmable counters. Enable the perf driver to handle the overflow interrupt. The overflow interrupt is a hart local interrupt. Thus, per cpu overflow interrupts are setup as a child under the root INTC irq domain. Signed-off-by: Atish Patra Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/csr.h | 8 +- arch/riscv/include/asm/hwcap.h | 1 + arch/riscv/kernel/cpu.c | 1 + arch/riscv/kernel/cpufeature.c | 2 + drivers/perf/riscv_pmu_sbi.c | 222 ++++++++++++++++++++++++++++++++- include/linux/perf/riscv_pmu.h | 2 + 6 files changed, 230 insertions(+), 6 deletions(-) diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index ce493df11177..8b2e48077731 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -65,6 +65,7 @@ #define IRQ_S_EXT 9 #define IRQ_VS_EXT 10 #define IRQ_M_EXT 11 +#define IRQ_PMU_OVF 13 /* Exception causes */ #define EXC_INST_MISALIGNED 0 @@ -212,6 +213,8 @@ #define CSR_HPMCOUNTER30H 0xc9e #define CSR_HPMCOUNTER31H 0xc9f +#define CSR_SSCOUNTOVF 0xda0 + #define CSR_SSTATUS 0x100 #define CSR_SIE 0x104 #define CSR_STVEC 0x105 @@ -298,7 +301,10 @@ # define RV_IRQ_SOFT IRQ_S_SOFT # define RV_IRQ_TIMER IRQ_S_TIMER # define RV_IRQ_EXT IRQ_S_EXT -#endif /* CONFIG_RISCV_M_MODE */ +# define RV_IRQ_PMU IRQ_PMU_OVF +# define SIP_LCOFIP (_AC(0x1, UL) << IRQ_PMU_OVF) + +#endif /* !CONFIG_RISCV_M_MODE */ /* IE/IP (Supervisor/Machine Interrupt Enable/Pending) flags */ #define IE_SIE (_AC(0x1, UL) << RV_IRQ_SOFT) diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index 691fc9c8099b..0734e42f74f2 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -51,6 +51,7 @@ extern unsigned long elf_hwcap; * available logical extension id. */ enum riscv_isa_ext_id { + RISCV_ISA_EXT_SSCOFPMF = RISCV_ISA_EXT_BASE, RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX, }; diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index fc115e307ef5..c11cb576394d 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -87,6 +87,7 @@ int riscv_of_parent_hartid(struct device_node *node) * extensions by an underscore. */ static struct riscv_isa_ext_data isa_ext_arr[] = { + __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF), __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX), }; diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index a43c08af5f4b..1b2d42d7f589 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -190,6 +190,8 @@ void __init riscv_fill_hwcap(void) if (!ext_long) { this_hwcap |= isa2hwcap[(unsigned char)(*ext)]; set_bit(*ext - 'a', this_isa); + } else { + SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF); } #undef SET_ISA_EXT_MAP } diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 815d5c509c64..a1317a483512 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -13,8 +13,13 @@ #include #include #include +#include +#include +#include +#include #include +#include union sbi_pmu_ctr_info { unsigned long value; @@ -35,6 +40,7 @@ union sbi_pmu_ctr_info { * per_cpu in case of harts with different pmu counters */ static union sbi_pmu_ctr_info *pmu_ctr_list; +static unsigned int riscv_pmu_irq; struct sbi_pmu_event_data { union { @@ -469,33 +475,229 @@ static int pmu_sbi_get_ctrinfo(int nctr) return 0; } +static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu) +{ + /** + * No need to check the error because we are disabling all the counters + * which may include counters that are not enabled yet. + */ + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, + 0, GENMASK_ULL(pmu->num_counters - 1, 0), 0, 0, 0, 0); +} + +static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu) +{ + struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events); + + /* No need to check the error here as we can't do anything about the error */ + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, 0, + cpu_hw_evt->used_hw_ctrs[0], 0, 0, 0, 0); +} + +/** + * This function starts all the used counters in two step approach. + * Any counter that did not overflow can be start in a single step + * while the overflowed counters need to be started with updated initialization + * value. + */ +static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu, + unsigned long ctr_ovf_mask) +{ + int idx = 0; + struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events); + struct perf_event *event; + unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE; + unsigned long ctr_start_mask = 0; + uint64_t max_period; + struct hw_perf_event *hwc; + u64 init_val = 0; + + ctr_start_mask = cpu_hw_evt->used_hw_ctrs[0] & ~ctr_ovf_mask; + + /* Start all the counters that did not overflow in a single shot */ + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, 0, ctr_start_mask, + 0, 0, 0, 0); + + /* Reinitialize and start all the counter that overflowed */ + while (ctr_ovf_mask) { + if (ctr_ovf_mask & 0x01) { + event = cpu_hw_evt->events[idx]; + hwc = &event->hw; + max_period = riscv_pmu_ctr_get_width_mask(event); + init_val = local64_read(&hwc->prev_count) & max_period; + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx, 1, + flag, init_val, 0, 0); + } + ctr_ovf_mask = ctr_ovf_mask >> 1; + idx++; + } +} + +static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev) +{ + struct perf_sample_data data; + struct pt_regs *regs; + struct hw_perf_event *hw_evt; + union sbi_pmu_ctr_info *info; + int lidx, hidx, fidx; + struct riscv_pmu *pmu; + struct perf_event *event; + unsigned long overflow; + unsigned long overflowed_ctrs = 0; + struct cpu_hw_events *cpu_hw_evt = dev; + + if (WARN_ON_ONCE(!cpu_hw_evt)) + return IRQ_NONE; + + /* Firmware counter don't support overflow yet */ + fidx = find_first_bit(cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS); + event = cpu_hw_evt->events[fidx]; + if (!event) { + csr_clear(CSR_SIP, SIP_LCOFIP); + return IRQ_NONE; + } + + pmu = to_riscv_pmu(event->pmu); + pmu_sbi_stop_hw_ctrs(pmu); + + /* Overflow status register should only be read after counter are stopped */ + overflow = csr_read(CSR_SSCOUNTOVF); + + /** + * Overflow interrupt pending bit should only be cleared after stopping + * all the counters to avoid any race condition. + */ + csr_clear(CSR_SIP, SIP_LCOFIP); + + /* No overflow bit is set */ + if (!overflow) + return IRQ_NONE; + + regs = get_irq_regs(); + + for_each_set_bit(lidx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS) { + struct perf_event *event = cpu_hw_evt->events[lidx]; + + /* Skip if invalid event or user did not request a sampling */ + if (!event || !is_sampling_event(event)) + continue; + + info = &pmu_ctr_list[lidx]; + /* Do a sanity check */ + if (!info || info->type != SBI_PMU_CTR_TYPE_HW) + continue; + + /* compute hardware counter index */ + hidx = info->csr - CSR_CYCLE; + /* check if the corresponding bit is set in sscountovf */ + if (!(overflow & (1 << hidx))) + continue; + + /* + * Keep a track of overflowed counters so that they can be started + * with updated initial value. + */ + overflowed_ctrs |= 1 << lidx; + hw_evt = &event->hw; + riscv_pmu_event_update(event); + perf_sample_data_init(&data, 0, hw_evt->last_period); + if (riscv_pmu_event_set_period(event)) { + /* + * Unlike other ISAs, RISC-V don't have to disable interrupts + * to avoid throttling here. As per the specification, the + * interrupt remains disabled until the OF bit is set. + * Interrupts are enabled again only during the start. + * TODO: We will need to stop the guest counters once + * virtualization support is added. + */ + perf_event_overflow(event, &data, regs); + } + } + pmu_sbi_start_overflow_mask(pmu, overflowed_ctrs); + + return IRQ_HANDLED; +} + static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node) { struct riscv_pmu *pmu = hlist_entry_safe(node, struct riscv_pmu, node); + struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events); /* Enable the access for TIME csr only from the user mode now */ csr_write(CSR_SCOUNTEREN, 0x2); /* Stop all the counters so that they can be enabled from perf */ - sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, - 0, GENMASK_ULL(pmu->num_counters - 1, 0), 0, 0, 0, 0); + pmu_sbi_stop_all(pmu); + + if (riscv_isa_extension_available(NULL, SSCOFPMF)) { + cpu_hw_evt->irq = riscv_pmu_irq; + csr_clear(CSR_IP, BIT(RV_IRQ_PMU)); + csr_set(CSR_IE, BIT(RV_IRQ_PMU)); + enable_percpu_irq(riscv_pmu_irq, IRQ_TYPE_NONE); + } return 0; } static int pmu_sbi_dying_cpu(unsigned int cpu, struct hlist_node *node) { + if (riscv_isa_extension_available(NULL, SSCOFPMF)) { + disable_percpu_irq(riscv_pmu_irq); + csr_clear(CSR_IE, BIT(RV_IRQ_PMU)); + } + /* Disable all counters access for user mode now */ csr_write(CSR_SCOUNTEREN, 0x0); return 0; } +static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pdev) +{ + int ret; + struct cpu_hw_events __percpu *hw_events = pmu->hw_events; + struct device_node *cpu, *child; + struct irq_domain *domain = NULL; + + if (!riscv_isa_extension_available(NULL, SSCOFPMF)) + return -EOPNOTSUPP; + + for_each_of_cpu_node(cpu) { + child = of_get_compatible_child(cpu, "riscv,cpu-intc"); + if (!child) { + pr_err("Failed to find INTC node\n"); + return -ENODEV; + } + domain = irq_find_host(child); + of_node_put(child); + if (domain) + break; + } + if (!domain) { + pr_err("Failed to find INTC IRQ root domain\n"); + return -ENODEV; + } + + riscv_pmu_irq = irq_create_mapping(domain, RV_IRQ_PMU); + if (!riscv_pmu_irq) { + pr_err("Failed to map PMU interrupt for node\n"); + return -ENODEV; + } + + ret = request_percpu_irq(riscv_pmu_irq, pmu_sbi_ovf_handler, "riscv-pmu", hw_events); + if (ret) { + pr_err("registering percpu irq failed [%d]\n", ret); + return ret; + } + + return 0; +} + static int pmu_sbi_device_probe(struct platform_device *pdev) { struct riscv_pmu *pmu = NULL; int num_counters; - int ret; + int ret = -ENODEV; pr_info("SBI PMU extension is available\n"); pmu = riscv_pmu_alloc(); @@ -505,13 +707,19 @@ static int pmu_sbi_device_probe(struct platform_device *pdev) num_counters = pmu_sbi_find_num_ctrs(); if (num_counters < 0) { pr_err("SBI PMU extension doesn't provide any counters\n"); - return -ENODEV; + goto out_free; } /* cache all the information about counters now */ if (pmu_sbi_get_ctrinfo(num_counters)) - return -ENODEV; + goto out_free; + ret = pmu_sbi_setup_irqs(pmu, pdev); + if (ret < 0) { + pr_info("Perf sampling/filtering is not supported as sscof extension is not available\n"); + pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE; + } pmu->num_counters = num_counters; pmu->ctr_start = pmu_sbi_ctr_start; pmu->ctr_stop = pmu_sbi_ctr_stop; @@ -532,6 +740,10 @@ static int pmu_sbi_device_probe(struct platform_device *pdev) } return 0; + +out_free: + kfree(pmu); + return ret; } static struct platform_driver pmu_sbi_driver = { diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h index 0f226948c0ca..46f9b6fe306e 100644 --- a/include/linux/perf/riscv_pmu.h +++ b/include/linux/perf/riscv_pmu.h @@ -29,6 +29,8 @@ struct cpu_hw_events { /* currently enabled events */ int n_events; + /* Counter overflow interrupt */ + int irq; /* currently enabled events */ struct perf_event *events[RISCV_MAX_COUNTERS]; /* currently enabled hardware counters */ From 23b1f18326ec3f6966ac8851b20b6d3d22380520 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Fri, 18 Feb 2022 16:46:59 -0800 Subject: [PATCH 375/380] Documentation: riscv: Remove the old documentation The existing pmu documentation describes the limitation of perf infrastructure in RISC-V ISA and limited feature set of perf in RISC-V. However, SBI PMU extension and sscofpmf extension(ISA extension) allows to implement most of the required features of perf. Remove the old documentation which is not accurate anymore. Reviewed-by: Anup Patel Signed-off-by: Atish Patra Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- Documentation/riscv/pmu.rst | 255 ------------------------------------ 1 file changed, 255 deletions(-) delete mode 100644 Documentation/riscv/pmu.rst diff --git a/Documentation/riscv/pmu.rst b/Documentation/riscv/pmu.rst deleted file mode 100644 index acb216b99c26..000000000000 --- a/Documentation/riscv/pmu.rst +++ /dev/null @@ -1,255 +0,0 @@ -=================================== -Supporting PMUs on RISC-V platforms -=================================== - -Alan Kao , Mar 2018 - -Introduction ------------- - -As of this writing, perf_event-related features mentioned in The RISC-V ISA -Privileged Version 1.10 are as follows: -(please check the manual for more details) - -* [m|s]counteren -* mcycle[h], cycle[h] -* minstret[h], instret[h] -* mhpeventx, mhpcounterx[h] - -With such function set only, porting perf would require a lot of work, due to -the lack of the following general architectural performance monitoring features: - -* Enabling/Disabling counters - Counters are just free-running all the time in our case. -* Interrupt caused by counter overflow - No such feature in the spec. -* Interrupt indicator - It is not possible to have many interrupt ports for all counters, so an - interrupt indicator is required for software to tell which counter has - just overflowed. -* Writing to counters - There will be an SBI to support this since the kernel cannot modify the - counters [1]. Alternatively, some vendor considers to implement - hardware-extension for M-S-U model machines to write counters directly. - -This document aims to provide developers a quick guide on supporting their -PMUs in the kernel. The following sections briefly explain perf' mechanism -and todos. - -You may check previous discussions here [1][2]. Also, it might be helpful -to check the appendix for related kernel structures. - - -1. Initialization ------------------ - -*riscv_pmu* is a global pointer of type *struct riscv_pmu*, which contains -various methods according to perf's internal convention and PMU-specific -parameters. One should declare such instance to represent the PMU. By default, -*riscv_pmu* points to a constant structure *riscv_base_pmu*, which has very -basic support to a baseline QEMU model. - -Then he/she can either assign the instance's pointer to *riscv_pmu* so that -the minimal and already-implemented logic can be leveraged, or invent his/her -own *riscv_init_platform_pmu* implementation. - -In other words, existing sources of *riscv_base_pmu* merely provide a -reference implementation. Developers can flexibly decide how many parts they -can leverage, and in the most extreme case, they can customize every function -according to their needs. - - -2. Event Initialization ------------------------ - -When a user launches a perf command to monitor some events, it is first -interpreted by the userspace perf tool into multiple *perf_event_open* -system calls, and then each of them calls to the body of *event_init* -member function that was assigned in the previous step. In *riscv_base_pmu*'s -case, it is *riscv_event_init*. - -The main purpose of this function is to translate the event provided by user -into bitmap, so that HW-related control registers or counters can directly be -manipulated. The translation is based on the mappings and methods provided in -*riscv_pmu*. - -Note that some features can be done in this stage as well: - -(1) interrupt setting, which is stated in the next section; -(2) privilege level setting (user space only, kernel space only, both); -(3) destructor setting. Normally it is sufficient to apply *riscv_destroy_event*; -(4) tweaks for non-sampling events, which will be utilized by functions such as - *perf_adjust_period*, usually something like the follows:: - - if (!is_sampling_event(event)) { - hwc->sample_period = x86_pmu.max_period; - hwc->last_period = hwc->sample_period; - local64_set(&hwc->period_left, hwc->sample_period); - } - -In the case of *riscv_base_pmu*, only (3) is provided for now. - - -3. Interrupt ------------- - -3.1. Interrupt Initialization - -This often occurs at the beginning of the *event_init* method. In common -practice, this should be a code segment like:: - - int x86_reserve_hardware(void) - { - int err = 0; - - if (!atomic_inc_not_zero(&pmc_refcount)) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_read(&pmc_refcount) == 0) { - if (!reserve_pmc_hardware()) - err = -EBUSY; - else - reserve_ds_buffers(); - } - if (!err) - atomic_inc(&pmc_refcount); - mutex_unlock(&pmc_reserve_mutex); - } - - return err; - } - -And the magic is in *reserve_pmc_hardware*, which usually does atomic -operations to make implemented IRQ accessible from some global function pointer. -*release_pmc_hardware* serves the opposite purpose, and it is used in event -destructors mentioned in previous section. - -(Note: From the implementations in all the architectures, the *reserve/release* -pair are always IRQ settings, so the *pmc_hardware* seems somehow misleading. -It does NOT deal with the binding between an event and a physical counter, -which will be introduced in the next section.) - -3.2. IRQ Structure - -Basically, a IRQ runs the following pseudo code:: - - for each hardware counter that triggered this overflow - - get the event of this counter - - // following two steps are defined as *read()*, - // check the section Reading/Writing Counters for details. - count the delta value since previous interrupt - update the event->count (# event occurs) by adding delta, and - event->hw.period_left by subtracting delta - - if the event overflows - sample data - set the counter appropriately for the next overflow - - if the event overflows again - too frequently, throttle this event - fi - fi - - end for - -However as of this writing, none of the RISC-V implementations have designed an -interrupt for perf, so the details are to be completed in the future. - -4. Reading/Writing Counters ---------------------------- - -They seem symmetric but perf treats them quite differently. For reading, there -is a *read* interface in *struct pmu*, but it serves more than just reading. -According to the context, the *read* function not only reads the content of the -counter (event->count), but also updates the left period to the next interrupt -(event->hw.period_left). - -But the core of perf does not need direct write to counters. Writing counters -is hidden behind the abstraction of 1) *pmu->start*, literally start counting so one -has to set the counter to a good value for the next interrupt; 2) inside the IRQ -it should set the counter to the same resonable value. - -Reading is not a problem in RISC-V but writing would need some effort, since -counters are not allowed to be written by S-mode. - - -5. add()/del()/start()/stop() ------------------------------ - -Basic idea: add()/del() adds/deletes events to/from a PMU, and start()/stop() -starts/stop the counter of some event in the PMU. All of them take the same -arguments: *struct perf_event *event* and *int flag*. - -Consider perf as a state machine, then you will find that these functions serve -as the state transition process between those states. -Three states (event->hw.state) are defined: - -* PERF_HES_STOPPED: the counter is stopped -* PERF_HES_UPTODATE: the event->count is up-to-date -* PERF_HES_ARCH: arch-dependent usage ... we don't need this for now - -A normal flow of these state transitions are as follows: - -* A user launches a perf event, resulting in calling to *event_init*. -* When being context-switched in, *add* is called by the perf core, with a flag - PERF_EF_START, which means that the event should be started after it is added. - At this stage, a general event is bound to a physical counter, if any. - The state changes to PERF_HES_STOPPED and PERF_HES_UPTODATE, because it is now - stopped, and the (software) event count does not need updating. - - - *start* is then called, and the counter is enabled. - With flag PERF_EF_RELOAD, it writes an appropriate value to the counter (check - previous section for detail). - Nothing is written if the flag does not contain PERF_EF_RELOAD. - The state now is reset to none, because it is neither stopped nor updated - (the counting already started) - -* When being context-switched out, *del* is called. It then checks out all the - events in the PMU and calls *stop* to update their counts. - - - *stop* is called by *del* - and the perf core with flag PERF_EF_UPDATE, and it often shares the same - subroutine as *read* with the same logic. - The state changes to PERF_HES_STOPPED and PERF_HES_UPTODATE, again. - - - Life cycle of these two pairs: *add* and *del* are called repeatedly as - tasks switch in-and-out; *start* and *stop* is also called when the perf core - needs a quick stop-and-start, for instance, when the interrupt period is being - adjusted. - -Current implementation is sufficient for now and can be easily extended to -features in the future. - -A. Related Structures ---------------------- - -* struct pmu: include/linux/perf_event.h -* struct riscv_pmu: arch/riscv/include/asm/perf_event.h - - Both structures are designed to be read-only. - - *struct pmu* defines some function pointer interfaces, and most of them take - *struct perf_event* as a main argument, dealing with perf events according to - perf's internal state machine (check kernel/events/core.c for details). - - *struct riscv_pmu* defines PMU-specific parameters. The naming follows the - convention of all other architectures. - -* struct perf_event: include/linux/perf_event.h -* struct hw_perf_event - - The generic structure that represents perf events, and the hardware-related - details. - -* struct riscv_hw_events: arch/riscv/include/asm/perf_event.h - - The structure that holds the status of events, has two fixed members: - the number of events and the array of the events. - -References ----------- - -[1] https://github.com/riscv/riscv-linux/pull/124 - -[2] https://groups.google.com/a/groups.riscv.org/forum/#!topic/sw-dev/f19TmCNP6yA From 33363c336516e4beb9dd7e8265b369ff96d07dcb Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Fri, 18 Feb 2022 16:47:00 -0800 Subject: [PATCH 376/380] MAINTAINERS: Add entry for RISC-V PMU drivers Add myself and Anup as maintainer for RISC-V PMU drivers. Signed-off-by: Atish Patra Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index ea3e6c914384..6f2f7fea1e6d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16561,6 +16561,15 @@ S: Maintained F: drivers/mtd/nand/raw/r852.c F: drivers/mtd/nand/raw/r852.h +RISC-V PMU DRIVERS +M: Atish Patra +R: Anup Patel +L: linux-riscv@lists.infradead.org +S: Supported +F: drivers/perf/riscv_pmu.c +F: drivers/perf/riscv_pmu_legacy.c +F: drivers/perf/riscv_pmu_sbi.c + RISC-V ARCHITECTURE M: Paul Walmsley M: Palmer Dabbelt From 1c4664faa38923330d478f046dc743a00c1e2dec Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 3 Jan 2022 12:08:31 -0800 Subject: [PATCH 377/380] xtensa: define update_mmu_tlb function Before the commit f9ce0be71d1f ("mm: Cleanup faultaround and finish_fault() codepaths") there was a call to update_mmu_cache in alloc_set_pte that used to invalidate TLB entry caching invalid PTE that caused a page fault. That commit removed that call so now invalid TLB entry survives causing repetitive page faults on the CPU that took the initial fault until that TLB entry is occasionally evicted. This issue is spotted by the xtensa TLB sanity checker. Fix this issue by defining update_mmu_tlb function that flushes TLB entry for the faulting address. Cc: stable@vger.kernel.org # 5.12+ Signed-off-by: Max Filippov --- arch/xtensa/include/asm/pgtable.h | 4 ++++ arch/xtensa/mm/tlb.c | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index bd5aeb795567..a63eca126657 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -411,6 +411,10 @@ extern void update_mmu_cache(struct vm_area_struct * vma, typedef pte_t *pte_addr_t; +void update_mmu_tlb(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep); +#define __HAVE_ARCH_UPDATE_MMU_TLB + #endif /* !defined (__ASSEMBLY__) */ #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG diff --git a/arch/xtensa/mm/tlb.c b/arch/xtensa/mm/tlb.c index f436cf2efd8b..27a477dae232 100644 --- a/arch/xtensa/mm/tlb.c +++ b/arch/xtensa/mm/tlb.c @@ -162,6 +162,12 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) } } +void update_mmu_tlb(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) +{ + local_flush_tlb_page(vma, address); +} + #ifdef CONFIG_DEBUG_TLB_SANITY static unsigned get_pte_for_vaddr(unsigned vaddr) From 93917ad50972e6298885d81b37b6a8602eb0b188 Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Tue, 8 Mar 2022 16:32:52 +0800 Subject: [PATCH 378/380] RISC-V: Add support for restartable sequence Add calls to rseq_signal_deliver() and rseq_syscall() to introduce RSEQ support. 1. Call the rseq_signal_deliver() function to fixup on the pre-signal frame when a signal is delivered on top of a restartable sequence critical section. 2. Check that system calls are not invoked from within rseq critical sections by invoking rseq_signal() from ret_from_syscall(). With CONFIG_DEBUG_RSEQ, such behavior results in termination of the process with SIGSEGV. Signed-off-by: Vincent Chen Reviewed-by: Mathieu Desnoyers Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/kernel/entry.S | 4 ++++ arch/riscv/kernel/signal.c | 2 ++ 3 files changed, 7 insertions(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 5adcbd9b5e88..67a671b099b6 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -101,6 +101,7 @@ config RISCV select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS + select HAVE_RSEQ select IRQ_DOMAIN select IRQ_FORCED_THREADING select MODULES_USE_ELF_RELA if MODULES diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index ed29e9c8f660..56ada2a583fa 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -225,6 +225,10 @@ ret_from_syscall: * (If it was configured with SECCOMP_RET_ERRNO/TRACE) */ ret_from_syscall_rejected: +#ifdef CONFIG_DEBUG_RSEQ + move a0, sp + call rseq_syscall +#endif /* Trace syscalls, but only if requested by the user. */ REG_L t0, TASK_TI_FLAGS(tp) andi t0, t0, _TIF_SYSCALL_WORK diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index c2d5ecbe5526..16da3c3b53a1 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -258,6 +258,8 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) } } + rseq_signal_deliver(ksig, regs); + /* Set up the stack frame */ ret = setup_rt_frame(ksig, oldset, regs); From 6d1a6f464efd596779d1b272b3dc8170c5fa189f Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Tue, 8 Mar 2022 16:32:53 +0800 Subject: [PATCH 379/380] rseq/selftests: Add support for RISC-V Add support for RISC-V in the rseq selftests, which covers both 64-bit and 32-bit ISA with little endian mode. Signed-off-by: Vincent Chen Tested-by: Eric Lin Reviewed-by: Mathieu Desnoyers Signed-off-by: Palmer Dabbelt --- tools/testing/selftests/rseq/param_test.c | 23 + tools/testing/selftests/rseq/rseq-riscv.h | 677 ++++++++++++++++++++++ tools/testing/selftests/rseq/rseq.h | 2 + 3 files changed, 702 insertions(+) create mode 100644 tools/testing/selftests/rseq/rseq-riscv.h diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index 699ad5f93c34..0a6b8eafd444 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -207,6 +207,29 @@ unsigned int yield_mod_cnt, nr_abort; "addiu " INJECT_ASM_REG ", -1\n\t" \ "bnez " INJECT_ASM_REG ", 222b\n\t" \ "333:\n\t" +#elif defined(__riscv) + +#define RSEQ_INJECT_INPUT \ + , [loop_cnt_1]"m"(loop_cnt[1]) \ + , [loop_cnt_2]"m"(loop_cnt[2]) \ + , [loop_cnt_3]"m"(loop_cnt[3]) \ + , [loop_cnt_4]"m"(loop_cnt[4]) \ + , [loop_cnt_5]"m"(loop_cnt[5]) \ + , [loop_cnt_6]"m"(loop_cnt[6]) + +#define INJECT_ASM_REG "t1" + +#define RSEQ_INJECT_CLOBBER \ + , INJECT_ASM_REG + +#define RSEQ_INJECT_ASM(n) \ + "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ + "beqz " INJECT_ASM_REG ", 333f\n\t" \ + "222:\n\t" \ + "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \ + "bnez " INJECT_ASM_REG ", 222b\n\t" \ + "333:\n\t" + #else #error unsupported target diff --git a/tools/testing/selftests/rseq/rseq-riscv.h b/tools/testing/selftests/rseq/rseq-riscv.h new file mode 100644 index 000000000000..b86642f90d7f --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-riscv.h @@ -0,0 +1,677 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Select the instruction "csrw mhartid, x0" as the RSEQ_SIG. Unlike + * other architectures, the ebreak instruction has no immediate field for + * distinguishing purposes. Hence, ebreak is not suitable as RSEQ_SIG. + * "csrw mhartid, x0" can also satisfy the RSEQ requirement because it + * is an uncommon instruction and will raise an illegal instruction + * exception when executed in all modes. + */ +#include + +#if defined(__BYTE_ORDER) ? (__BYTE_ORDER == __LITTLE_ENDIAN) : defined(__LITTLE_ENDIAN) +#define RSEQ_SIG 0xf1401073 /* csrr mhartid, x0 */ +#else +#error "Currently, RSEQ only supports Little-Endian version" +#endif + +#if __riscv_xlen == 64 +#define __REG_SEL(a, b) a +#elif __riscv_xlen == 32 +#define __REG_SEL(a, b) b +#endif + +#define REG_L __REG_SEL("ld ", "lw ") +#define REG_S __REG_SEL("sd ", "sw ") + +#define RISCV_FENCE(p, s) \ + __asm__ __volatile__ ("fence " #p "," #s : : : "memory") +#define rseq_smp_mb() RISCV_FENCE(rw, rw) +#define rseq_smp_rmb() RISCV_FENCE(r, r) +#define rseq_smp_wmb() RISCV_FENCE(w, w) +#define RSEQ_ASM_TMP_REG_1 "t6" +#define RSEQ_ASM_TMP_REG_2 "t5" +#define RSEQ_ASM_TMP_REG_3 "t4" +#define RSEQ_ASM_TMP_REG_4 "t3" + +#define rseq_smp_load_acquire(p) \ +__extension__ ({ \ + __typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p)); \ + RISCV_FENCE(r, rw) \ + ____p1; \ +}) + +#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb() + +#define rseq_smp_store_release(p, v) \ +do { \ + RISCV_FENCE(rw, w); \ + RSEQ_WRITE_ONCE(*(p), v); \ +} while (0) + +#ifdef RSEQ_SKIP_FASTPATH +#include "rseq-skip.h" +#else /* !RSEQ_SKIP_FASTPATH */ + +#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip, \ + post_commit_offset, abort_ip) \ + ".pushsection __rseq_cs, \"aw\"\n" \ + ".balign 32\n" \ + __rseq_str(label) ":\n" \ + ".long " __rseq_str(version) ", " __rseq_str(flags) "\n" \ + ".quad " __rseq_str(start_ip) ", " \ + __rseq_str(post_commit_offset) ", " \ + __rseq_str(abort_ip) "\n" \ + ".popsection\n\t" \ + ".pushsection __rseq_cs_ptr_array, \"aw\"\n" \ + ".quad " __rseq_str(label) "b\n" \ + ".popsection\n" + +#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \ + __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \ + ((post_commit_ip) - (start_ip)), abort_ip) + +/* + * Exit points of a rseq critical section consist of all instructions outside + * of the critical section where a critical section can either branch to or + * reach through the normal course of its execution. The abort IP and the + * post-commit IP are already part of the __rseq_cs section and should not be + * explicitly defined as additional exit points. Knowing all exit points is + * useful to assist debuggers stepping over the critical section. + */ +#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \ + ".pushsection __rseq_exit_point_array, \"aw\"\n" \ + ".quad " __rseq_str(start_ip) ", " __rseq_str(exit_ip) "\n" \ + ".popsection\n" + +#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ + RSEQ_INJECT_ASM(1) \ + "la "RSEQ_ASM_TMP_REG_1 ", " __rseq_str(cs_label) "\n" \ + REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(rseq_cs) "]\n" \ + __rseq_str(label) ":\n" + +#define RSEQ_ASM_DEFINE_ABORT(label, abort_label) \ + "j 222f\n" \ + ".balign 4\n" \ + ".long " __rseq_str(RSEQ_SIG) "\n" \ + __rseq_str(label) ":\n" \ + "j %l[" __rseq_str(abort_label) "]\n" \ + "222:\n" + +#define RSEQ_ASM_OP_STORE(value, var) \ + REG_S "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n" + +#define RSEQ_ASM_OP_CMPEQ(var, expect, label) \ + REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \ + "bne "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \ + __rseq_str(label) "\n" + +#define RSEQ_ASM_OP_CMPEQ32(var, expect, label) \ + "lw "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \ + "bne "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \ + __rseq_str(label) "\n" + +#define RSEQ_ASM_OP_CMPNE(var, expect, label) \ + REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \ + "beq "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \ + __rseq_str(label) "\n" + +#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \ + RSEQ_INJECT_ASM(2) \ + RSEQ_ASM_OP_CMPEQ32(current_cpu_id, cpu_id, label) + +#define RSEQ_ASM_OP_R_LOAD(var) \ + REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" + +#define RSEQ_ASM_OP_R_STORE(var) \ + REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" + +#define RSEQ_ASM_OP_R_LOAD_OFF(offset) \ + "add "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(offset) "], " \ + RSEQ_ASM_TMP_REG_1 "\n" \ + REG_L RSEQ_ASM_TMP_REG_1 ", (" RSEQ_ASM_TMP_REG_1 ")\n" + +#define RSEQ_ASM_OP_R_ADD(count) \ + "add "RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 \ + ", %[" __rseq_str(count) "]\n" + +#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \ + RSEQ_ASM_OP_STORE(value, var) \ + __rseq_str(post_commit_label) ":\n" + +#define RSEQ_ASM_OP_FINAL_STORE_RELEASE(value, var, post_commit_label) \ + "fence rw, w\n" \ + RSEQ_ASM_OP_STORE(value, var) \ + __rseq_str(post_commit_label) ":\n" + +#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label) \ + REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \ + __rseq_str(post_commit_label) ":\n" + +#define RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) \ + "beqz %[" __rseq_str(len) "], 333f\n" \ + "mv " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(len) "]\n" \ + "mv " RSEQ_ASM_TMP_REG_2 ", %[" __rseq_str(src) "]\n" \ + "mv " RSEQ_ASM_TMP_REG_3 ", %[" __rseq_str(dst) "]\n" \ + "222:\n" \ + "lb " RSEQ_ASM_TMP_REG_4 ", 0(" RSEQ_ASM_TMP_REG_2 ")\n" \ + "sb " RSEQ_ASM_TMP_REG_4 ", 0(" RSEQ_ASM_TMP_REG_3 ")\n" \ + "addi " RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 ", -1\n" \ + "addi " RSEQ_ASM_TMP_REG_2 ", " RSEQ_ASM_TMP_REG_2 ", 1\n" \ + "addi " RSEQ_ASM_TMP_REG_3 ", " RSEQ_ASM_TMP_REG_3 ", 1\n" \ + "bnez " RSEQ_ASM_TMP_REG_1 ", 222b\n" \ + "333:\n" + +#define RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, post_commit_label) \ + "mv " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(ptr) "]\n" \ + RSEQ_ASM_OP_R_ADD(off) \ + REG_L RSEQ_ASM_TMP_REG_1 ", 0(" RSEQ_ASM_TMP_REG_1 ")\n" \ + RSEQ_ASM_OP_R_ADD(inc) \ + __rseq_str(post_commit_label) ":\n" + +static inline __always_inline +int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]") + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]") +#endif + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __always_inline +int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, + off_t voffp, intptr_t *load, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPNE(v, expectnot, "%l[cmpfail]") + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CMPNE(v, expectnot, "%l[error2]") +#endif + RSEQ_ASM_OP_R_LOAD(v) + RSEQ_ASM_OP_R_STORE(load) + RSEQ_ASM_OP_R_LOAD_OFF(voffp) + RSEQ_ASM_OP_R_FINAL_STORE(v, 3) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [v] "m" (*v), + [expectnot] "r" (expectnot), + [load] "m" (*load), + [voffp] "r" (voffp) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __always_inline +int rseq_addv(intptr_t *v, intptr_t count, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") +#endif + RSEQ_ASM_OP_R_LOAD(v) + RSEQ_ASM_OP_R_ADD(count) + RSEQ_ASM_OP_R_FINAL_STORE(v, 3) + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [v] "m" (*v), + [count] "r" (count) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +#endif +} + +static inline __always_inline +int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]") + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]") +#endif + RSEQ_ASM_OP_STORE(newv2, v2) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [expect] "r" (expect), + [v] "m" (*v), + [newv] "r" (newv), + [v2] "m" (*v2), + [newv2] "r" (newv2) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __always_inline +int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]") + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]") +#endif + RSEQ_ASM_OP_STORE(newv2, v2) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [expect] "r" (expect), + [v] "m" (*v), + [newv] "r" (newv), + [v2] "m" (*v2), + [newv2] "r" (newv2) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __always_inline +int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error3]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]") + RSEQ_INJECT_ASM(4) + RSEQ_ASM_OP_CMPEQ(v2, expect2, "%l[cmpfail]") + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]") + RSEQ_ASM_OP_CMPEQ(v2, expect2, "%l[error3]") +#endif + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [v] "m" (*v), + [expect] "r" (expect), + [v2] "m" (*v2), + [expect2] "r" (expect2), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2, error3 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +error3: + rseq_bug("2nd expected value comparison failed"); +#endif +} + +static inline __always_inline +int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]") + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]") +#endif + RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [expect] "r" (expect), + [v] "m" (*v), + [newv] "r" (newv), + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1, RSEQ_ASM_TMP_REG_2, + RSEQ_ASM_TMP_REG_3, RSEQ_ASM_TMP_REG_4 + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __always_inline +int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]") + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]") +#endif + RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [expect] "r" (expect), + [v] "m" (*v), + [newv] "r" (newv), + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1, RSEQ_ASM_TMP_REG_2, + RSEQ_ASM_TMP_REG_3, RSEQ_ASM_TMP_REG_4 + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +#define RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV + +/* + * pval = *(ptr+off) + * *pval += inc; + */ +static inline __always_inline +int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") +#endif + RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, 3) + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [ptr] "r" (ptr), + [off] "er" (off), + [inc] "er" (inc) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +#endif +} + +#endif /* !RSEQ_SKIP_FASTPATH */ diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index 3f63eb362b92..72efb6d3d84e 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -79,6 +79,8 @@ extern int __rseq_handled; #include #elif defined(__s390__) #include +#elif defined(__riscv) +#include #else #error unsupported target #endif From f8f9f21c7848e63133c16c899f3d84aa54eb79fe Mon Sep 17 00:00:00 2001 From: Feiyang Chen Date: Sat, 19 Mar 2022 17:40:02 +0800 Subject: [PATCH 380/380] MIPS: Fix build error for loongson64 and sgi-ip27 Select HAVE_ARCH_NODEDATA_EXTENSION for loongson64 to fix build error when CONFIG_NUMA=y: mips64el-unknown-linux-gnu-ld: mm/page_alloc.o: in function `free_area_init': (.init.text+0x1714): undefined reference to `node_data' mips64el-unknown-linux-gnu-ld: (.init.text+0x1730): undefined reference to `node_data' Also, select HAVE_ARCH_NODEDATA_EXTENSION for sgi-ip27 to fix build error: mips64el-unknown-linux-gnu-ld: mm/page_alloc.o: in function `free_area_init': page_alloc.c:(.init.text+0x1ba8): undefined reference to `node_data' mips64el-unknown-linux-gnu-ld: page_alloc.c:(.init.text+0x1bcc): undefined reference to `node_data' mips64el-unknown-linux-gnu-ld: page_alloc.c:(.init.text+0x1be4): undefined reference to `node_data' mips64el-unknown-linux-gnu-ld: page_alloc.c:(.init.text+0x1bf4): undefined reference to `node_data' Signed-off-by: Feiyang Chen Reviewed-by: Huacai Chen Signed-off-by: Thomas Bogendoerfer --- arch/mips/Kconfig | 5 +++++ arch/mips/loongson64/numa.c | 10 ++++++++++ arch/mips/sgi-ip27/ip27-memory.c | 10 ++++++++++ 3 files changed, 25 insertions(+) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 0dae5f1e61cc..de3b32a507d2 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -513,6 +513,7 @@ config MACH_LOONGSON64 select USE_OF select BUILTIN_DTB select PCI_HOST_GENERIC + select HAVE_ARCH_NODEDATA_EXTENSION if NUMA help This enables the support of Loongson-2/3 family of machines. @@ -709,6 +710,7 @@ config SGI_IP27 select WAR_R10000_LLSC select MIPS_L1_CACHE_SHIFT_7 select NUMA + select HAVE_ARCH_NODEDATA_EXTENSION help This are the SGI Origin 200, Origin 2000 and Onyx 2 Graphics workstations. To compile a Linux kernel that runs on these, say Y @@ -2708,6 +2710,9 @@ config NUMA config SYS_SUPPORTS_NUMA bool +config HAVE_ARCH_NODEDATA_EXTENSION + bool + config RELOCATABLE bool "Relocatable kernel" depends on SYS_SUPPORTS_RELOCATABLE diff --git a/arch/mips/loongson64/numa.c b/arch/mips/loongson64/numa.c index e8e3e48c5333..69a533148efd 100644 --- a/arch/mips/loongson64/numa.c +++ b/arch/mips/loongson64/numa.c @@ -197,3 +197,13 @@ void __init prom_init_numa_memory(void) prom_meminit(); } EXPORT_SYMBOL(prom_init_numa_memory); + +pg_data_t * __init arch_alloc_nodedata(int nid) +{ + return memblock_alloc(sizeof(pg_data_t), SMP_CACHE_BYTES); +} + +void arch_refresh_nodedata(int nid, pg_data_t *pgdat) +{ + __node_data[nid] = pgdat; +} diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index adc2faeecf7c..f79c48393716 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -422,3 +422,13 @@ void __init mem_init(void) memblock_free_all(); setup_zero_pages(); /* This comes from node 0 */ } + +pg_data_t * __init arch_alloc_nodedata(int nid) +{ + return memblock_alloc(sizeof(pg_data_t), SMP_CACHE_BYTES); +} + +void arch_refresh_nodedata(int nid, pg_data_t *pgdat) +{ + __node_data[nid] = (struct node_data *)pgdat; +}