diff --git a/Documentation/ABI/testing/sysfs-bus-papr-pmem b/Documentation/ABI/testing/sysfs-bus-papr-pmem index 95254cec92bf..4ac0673901e7 100644 --- a/Documentation/ABI/testing/sysfs-bus-papr-pmem +++ b/Documentation/ABI/testing/sysfs-bus-papr-pmem @@ -61,3 +61,15 @@ Description: * "CchRHCnt" : Cache Read Hit Count * "CchWHCnt" : Cache Write Hit Count * "FastWCnt" : Fast Write Count + +What: /sys/bus/nd/devices/nmemX/papr/health_bitmap_inject +Date: Jan, 2022 +KernelVersion: v5.17 +Contact: linuxppc-dev , nvdimm@lists.linux.dev, +Description: + (RO) Reports the health bitmap inject bitmap that is applied to + bitmap received from PowerVM via the H_SCM_HEALTH. This is used + to forcibly set specific bits returned from Hcall. These is then + used to simulate various health or shutdown states for an nvdimm + and are set by user-space tools like ndctl by issuing a PAPR DSM. + diff --git a/Documentation/ABI/testing/sysfs-firmware-papr-energy-scale-info b/Documentation/ABI/testing/sysfs-firmware-papr-energy-scale-info new file mode 100644 index 000000000000..141a6b371469 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-firmware-papr-energy-scale-info @@ -0,0 +1,29 @@ +What: /sys/firmware/papr/energy_scale_info +Date: February 2022 +Contact: Linux for PowerPC mailing list +Description: Directory hosting a set of platform attributes like + energy/frequency on Linux running as a PAPR guest. + + Each file in a directory contains a platform + attribute hierarchy pertaining to performance/ + energy-savings mode and processor frequency. + +What: /sys/firmware/papr/energy_scale_info/ +Date: February 2022 +Contact: Linux for PowerPC mailing list +Description: Energy, frequency attributes directory for POWERVM servers + +What: /sys/firmware/papr/energy_scale_info//desc +Date: February 2022 +Contact: Linux for PowerPC mailing list +Description: String description of the energy attribute of + +What: /sys/firmware/papr/energy_scale_info//value +Date: February 2022 +Contact: Linux for PowerPC mailing list +Description: Numeric value of the energy attribute of + +What: /sys/firmware/papr/energy_scale_info//value_desc +Date: February 2022 +Contact: Linux for PowerPC mailing list +Description: String value of the energy attribute of diff --git a/Documentation/devicetree/bindings/clock/microchip,mpfs.yaml b/Documentation/devicetree/bindings/clock/microchip,mpfs.yaml new file mode 100644 index 000000000000..0c15afa2214c --- /dev/null +++ b/Documentation/devicetree/bindings/clock/microchip,mpfs.yaml @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/microchip,mpfs.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Microchip PolarFire Clock Control Module Binding + +maintainers: + - Daire McNamara + +description: | + Microchip PolarFire clock control (CLKCFG) is an integrated clock controller, + which gates and enables all peripheral clocks. + + This device tree binding describes 33 gate clocks. Clocks are referenced by + user nodes by the CLKCFG node phandle and the clock index in the group, from + 0 to 32. + +properties: + compatible: + const: microchip,mpfs-clkcfg + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + '#clock-cells': + const: 1 + description: | + The clock consumer should specify the desired clock by having the clock + ID in its "clocks" phandle cell. See include/dt-bindings/clock/microchip,mpfs-clock.h + for the full list of PolarFire clock IDs. + +required: + - compatible + - reg + - clocks + - '#clock-cells' + +additionalProperties: false + +examples: + # Clock Config node: + - | + #include + soc { + #address-cells = <2>; + #size-cells = <2>; + clkcfg: clock-controller@20002000 { + compatible = "microchip,mpfs-clkcfg"; + reg = <0x0 0x20002000 0x0 0x1000>; + clocks = <&ref>; + #clock-cells = <1>; + }; + }; diff --git a/Documentation/devicetree/bindings/gpio/microchip,mpfs-gpio.yaml b/Documentation/devicetree/bindings/gpio/microchip,mpfs-gpio.yaml new file mode 100644 index 000000000000..110651eafa70 --- /dev/null +++ b/Documentation/devicetree/bindings/gpio/microchip,mpfs-gpio.yaml @@ -0,0 +1,79 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/gpio/microchip,mpfs-gpio.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Microchip MPFS GPIO Controller Device Tree Bindings + +maintainers: + - Conor Dooley + +properties: + compatible: + items: + - enum: + - microchip,mpfs-gpio + + reg: + maxItems: 1 + + interrupts: + description: + Interrupt mapping, one per GPIO. Maximum 32 GPIOs. + minItems: 1 + maxItems: 32 + + interrupt-controller: true + + clocks: + maxItems: 1 + + "#gpio-cells": + const: 2 + + "#interrupt-cells": + const: 1 + + ngpios: + description: + The number of GPIOs available. + minimum: 1 + maximum: 32 + default: 32 + + gpio-controller: true + +required: + - compatible + - reg + - interrupts + - "#interrupt-cells" + - interrupt-controller + - "#gpio-cells" + - gpio-controller + - clocks + +additionalProperties: false + +examples: + - | + gpio@20122000 { + compatible = "microchip,mpfs-gpio"; + reg = <0x20122000 0x1000>; + clocks = <&clkcfg 25>; + interrupt-parent = <&plic>; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <1>; + interrupts = <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>; + }; +... diff --git a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.yaml b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.yaml index ce0c715205c6..5159a87f3fa7 100644 --- a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.yaml +++ b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.yaml @@ -44,6 +44,10 @@ properties: - renesas,ipmmu-r8a77990 # R-Car E3 - renesas,ipmmu-r8a77995 # R-Car D3 - renesas,ipmmu-r8a779a0 # R-Car V3U + - items: + - enum: + - renesas,ipmmu-r8a779f0 # R-Car S4-8 + - const: renesas,rcar-gen4-ipmmu-vmsa # R-Car Gen4 reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/mailbox/microchip,polarfire-soc-mailbox.yaml b/Documentation/devicetree/bindings/mailbox/microchip,mpfs-mailbox.yaml similarity index 82% rename from Documentation/devicetree/bindings/mailbox/microchip,polarfire-soc-mailbox.yaml rename to Documentation/devicetree/bindings/mailbox/microchip,mpfs-mailbox.yaml index bbb173ea483c..082d397d3e89 100644 --- a/Documentation/devicetree/bindings/mailbox/microchip,polarfire-soc-mailbox.yaml +++ b/Documentation/devicetree/bindings/mailbox/microchip,mpfs-mailbox.yaml @@ -1,7 +1,7 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) %YAML 1.2 --- -$id: "http://devicetree.org/schemas/mailbox/microchip,polarfire-soc-mailbox.yaml#" +$id: "http://devicetree.org/schemas/mailbox/microchip,mpfs-mailbox.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" title: Microchip PolarFire SoC (MPFS) MSS (microprocessor subsystem) mailbox controller @@ -11,7 +11,7 @@ maintainers: properties: compatible: - const: microchip,polarfire-soc-mailbox + const: microchip,mpfs-mailbox reg: items: @@ -38,7 +38,7 @@ examples: #address-cells = <2>; #size-cells = <2>; mbox: mailbox@37020000 { - compatible = "microchip,polarfire-soc-mailbox"; + compatible = "microchip,mpfs-mailbox"; reg = <0x0 0x37020000 0x0 0x1000>, <0x0 0x2000318c 0x0 0x40>; interrupt-parent = <&L1>; interrupts = <96>; diff --git a/Documentation/devicetree/bindings/pwm/microchip,corepwm.yaml b/Documentation/devicetree/bindings/pwm/microchip,corepwm.yaml new file mode 100644 index 000000000000..a7fae1772a81 --- /dev/null +++ b/Documentation/devicetree/bindings/pwm/microchip,corepwm.yaml @@ -0,0 +1,81 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pwm/microchip,corepwm.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Microchip IP corePWM controller bindings + +maintainers: + - Conor Dooley + +description: | + corePWM is an 16 channel pulse width modulator FPGA IP + + https://www.microsemi.com/existing-parts/parts/152118 + +allOf: + - $ref: pwm.yaml# + +properties: + compatible: + items: + - const: microchip,corepwm-rtl-v4 + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + "#pwm-cells": + const: 2 + + microchip,sync-update-mask: + description: | + Depending on how the IP is instantiated, there are two modes of operation. + In synchronous mode, all channels are updated at the beginning of the PWM period, + and in asynchronous mode updates happen as the control registers are written. + A 16 bit wide "SHADOW_REG_EN" parameter of the IP core controls whether synchronous + mode is possible for each channel, and is set by the bitstream programmed to the + FPGA. If the IP core is instantiated with SHADOW_REG_ENx=1, both registers that + control the duty cycle for channel x have a second "shadow"/buffer reg synthesised. + At runtime a bit wide register exposed to APB can be used to toggle on/off + synchronised mode for all channels it has been synthesised for. + Each bit of "microchip,sync-update-mask" corresponds to a PWM channel & represents + whether synchronous mode is possible for the PWM channel. + + $ref: /schemas/types.yaml#/definitions/uint32 + default: 0 + + microchip,dac-mode-mask: + description: | + Optional, per-channel Low Ripple DAC mode is possible on this IP core. It creates + a minimum period pulse train whose High/Low average is that of the chosen duty + cycle. This "DAC" will have far better bandwidth and ripple performance than the + standard PWM algorithm can achieve. A 16 bit DAC_MODE module parameter of the IP + core, set at instantiation and by the bitstream programmed to the FPGA, determines + whether a given channel operates in regular PWM or DAC mode. + Each bit corresponds to a PWM channel & represents whether DAC mode is enabled + for that channel. + + $ref: /schemas/types.yaml#/definitions/uint32 + default: 0 + +required: + - compatible + - reg + - clocks + +additionalProperties: false + +examples: + - | + pwm@41000000 { + compatible = "microchip,corepwm-rtl-v4"; + microchip,sync-update-mask = /bits/ 32 <0>; + clocks = <&clkcfg 30>; + reg = <0x41000000 0xF0>; + #pwm-cells = <2>; + }; diff --git a/Documentation/devicetree/bindings/rtc/microchip,mfps-rtc.yaml b/Documentation/devicetree/bindings/rtc/microchip,mfps-rtc.yaml new file mode 100644 index 000000000000..a2e984ea3553 --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/microchip,mfps-rtc.yaml @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/rtc/microchip,mfps-rtc.yaml# + +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Microchip PolarFire Soc (MPFS) RTC Device Tree Bindings + +allOf: + - $ref: rtc.yaml# + +maintainers: + - Daire McNamara + - Lewis Hanly + +properties: + compatible: + enum: + - microchip,mpfs-rtc + + reg: + maxItems: 1 + + interrupts: + items: + - description: | + RTC_WAKEUP interrupt + - description: | + RTC_MATCH, asserted when the content of the Alarm register is equal + to that of the RTC's count register. + + clocks: + maxItems: 1 + + clock-names: + items: + - const: rtc + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + +additionalProperties: false + +examples: + - | + rtc@20124000 { + compatible = "microchip,mpfs-rtc"; + reg = <0x20124000 0x1000>; + clocks = <&clkcfg 21>; + clock-names = "rtc"; + interrupts = <80>, <81>; + }; +... diff --git a/Documentation/devicetree/bindings/soc/microchip/microchip,mpfs-sys-controller.yaml b/Documentation/devicetree/bindings/soc/microchip/microchip,mpfs-sys-controller.yaml new file mode 100644 index 000000000000..b0dae51e1d42 --- /dev/null +++ b/Documentation/devicetree/bindings/soc/microchip/microchip,mpfs-sys-controller.yaml @@ -0,0 +1,40 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/soc/microchip/microchip,mpfs-sys-controller.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: Microchip PolarFire SoC (MPFS) MSS (microprocessor subsystem) system controller + +maintainers: + - Conor Dooley + +description: | + PolarFire SoC devices include a microcontroller acting as the system controller, + which provides "services" to the main processor and to the FPGA fabric. These + services include hardware rng, reprogramming of the FPGA and verfification of the + eNVM contents etc. More information on these services can be found online, at + https://onlinedocs.microchip.com/pr/GUID-1409CF11-8EF9-4C24-A94E-70979A688632-en-US-1/index.html + + Communication with the system controller is done via a mailbox, of which the client + portion is documented here. + +properties: + mboxes: + maxItems: 1 + + compatible: + const: microchip,mpfs-sys-controller + +required: + - compatible + - mboxes + +additionalProperties: false + +examples: + - | + syscontroller { + compatible = "microchip,mpfs-sys-controller"; + mboxes = <&mbox 0>; + }; diff --git a/Documentation/devicetree/bindings/soc/microchip/microchip,polarfire-soc-sys-controller.yaml b/Documentation/devicetree/bindings/soc/microchip/microchip,polarfire-soc-sys-controller.yaml deleted file mode 100644 index 2cd3bc6bd8d6..000000000000 --- a/Documentation/devicetree/bindings/soc/microchip/microchip,polarfire-soc-sys-controller.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -%YAML 1.2 ---- -$id: "http://devicetree.org/schemas/soc/microchip/microchip,polarfire-soc-sys-controller.yaml#" -$schema: "http://devicetree.org/meta-schemas/core.yaml#" - -title: Microchip PolarFire SoC (MPFS) MSS (microprocessor subsystem) system controller - -maintainers: - - Conor Dooley - -description: | - The PolarFire SoC system controller is communicated with via a mailbox. - This document describes the bindings for the client portion of that mailbox. - - -properties: - mboxes: - maxItems: 1 - - compatible: - const: microchip,polarfire-soc-sys-controller - -required: - - compatible - - mboxes - -additionalProperties: false - -examples: - - | - syscontroller: syscontroller { - compatible = "microchip,polarfire-soc-sys-controller"; - mboxes = <&mbox 0>; - }; diff --git a/Documentation/riscv/pmu.rst b/Documentation/riscv/pmu.rst deleted file mode 100644 index acb216b99c26..000000000000 --- a/Documentation/riscv/pmu.rst +++ /dev/null @@ -1,255 +0,0 @@ -=================================== -Supporting PMUs on RISC-V platforms -=================================== - -Alan Kao , Mar 2018 - -Introduction ------------- - -As of this writing, perf_event-related features mentioned in The RISC-V ISA -Privileged Version 1.10 are as follows: -(please check the manual for more details) - -* [m|s]counteren -* mcycle[h], cycle[h] -* minstret[h], instret[h] -* mhpeventx, mhpcounterx[h] - -With such function set only, porting perf would require a lot of work, due to -the lack of the following general architectural performance monitoring features: - -* Enabling/Disabling counters - Counters are just free-running all the time in our case. -* Interrupt caused by counter overflow - No such feature in the spec. -* Interrupt indicator - It is not possible to have many interrupt ports for all counters, so an - interrupt indicator is required for software to tell which counter has - just overflowed. -* Writing to counters - There will be an SBI to support this since the kernel cannot modify the - counters [1]. Alternatively, some vendor considers to implement - hardware-extension for M-S-U model machines to write counters directly. - -This document aims to provide developers a quick guide on supporting their -PMUs in the kernel. The following sections briefly explain perf' mechanism -and todos. - -You may check previous discussions here [1][2]. Also, it might be helpful -to check the appendix for related kernel structures. - - -1. Initialization ------------------ - -*riscv_pmu* is a global pointer of type *struct riscv_pmu*, which contains -various methods according to perf's internal convention and PMU-specific -parameters. One should declare such instance to represent the PMU. By default, -*riscv_pmu* points to a constant structure *riscv_base_pmu*, which has very -basic support to a baseline QEMU model. - -Then he/she can either assign the instance's pointer to *riscv_pmu* so that -the minimal and already-implemented logic can be leveraged, or invent his/her -own *riscv_init_platform_pmu* implementation. - -In other words, existing sources of *riscv_base_pmu* merely provide a -reference implementation. Developers can flexibly decide how many parts they -can leverage, and in the most extreme case, they can customize every function -according to their needs. - - -2. Event Initialization ------------------------ - -When a user launches a perf command to monitor some events, it is first -interpreted by the userspace perf tool into multiple *perf_event_open* -system calls, and then each of them calls to the body of *event_init* -member function that was assigned in the previous step. In *riscv_base_pmu*'s -case, it is *riscv_event_init*. - -The main purpose of this function is to translate the event provided by user -into bitmap, so that HW-related control registers or counters can directly be -manipulated. The translation is based on the mappings and methods provided in -*riscv_pmu*. - -Note that some features can be done in this stage as well: - -(1) interrupt setting, which is stated in the next section; -(2) privilege level setting (user space only, kernel space only, both); -(3) destructor setting. Normally it is sufficient to apply *riscv_destroy_event*; -(4) tweaks for non-sampling events, which will be utilized by functions such as - *perf_adjust_period*, usually something like the follows:: - - if (!is_sampling_event(event)) { - hwc->sample_period = x86_pmu.max_period; - hwc->last_period = hwc->sample_period; - local64_set(&hwc->period_left, hwc->sample_period); - } - -In the case of *riscv_base_pmu*, only (3) is provided for now. - - -3. Interrupt ------------- - -3.1. Interrupt Initialization - -This often occurs at the beginning of the *event_init* method. In common -practice, this should be a code segment like:: - - int x86_reserve_hardware(void) - { - int err = 0; - - if (!atomic_inc_not_zero(&pmc_refcount)) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_read(&pmc_refcount) == 0) { - if (!reserve_pmc_hardware()) - err = -EBUSY; - else - reserve_ds_buffers(); - } - if (!err) - atomic_inc(&pmc_refcount); - mutex_unlock(&pmc_reserve_mutex); - } - - return err; - } - -And the magic is in *reserve_pmc_hardware*, which usually does atomic -operations to make implemented IRQ accessible from some global function pointer. -*release_pmc_hardware* serves the opposite purpose, and it is used in event -destructors mentioned in previous section. - -(Note: From the implementations in all the architectures, the *reserve/release* -pair are always IRQ settings, so the *pmc_hardware* seems somehow misleading. -It does NOT deal with the binding between an event and a physical counter, -which will be introduced in the next section.) - -3.2. IRQ Structure - -Basically, a IRQ runs the following pseudo code:: - - for each hardware counter that triggered this overflow - - get the event of this counter - - // following two steps are defined as *read()*, - // check the section Reading/Writing Counters for details. - count the delta value since previous interrupt - update the event->count (# event occurs) by adding delta, and - event->hw.period_left by subtracting delta - - if the event overflows - sample data - set the counter appropriately for the next overflow - - if the event overflows again - too frequently, throttle this event - fi - fi - - end for - -However as of this writing, none of the RISC-V implementations have designed an -interrupt for perf, so the details are to be completed in the future. - -4. Reading/Writing Counters ---------------------------- - -They seem symmetric but perf treats them quite differently. For reading, there -is a *read* interface in *struct pmu*, but it serves more than just reading. -According to the context, the *read* function not only reads the content of the -counter (event->count), but also updates the left period to the next interrupt -(event->hw.period_left). - -But the core of perf does not need direct write to counters. Writing counters -is hidden behind the abstraction of 1) *pmu->start*, literally start counting so one -has to set the counter to a good value for the next interrupt; 2) inside the IRQ -it should set the counter to the same resonable value. - -Reading is not a problem in RISC-V but writing would need some effort, since -counters are not allowed to be written by S-mode. - - -5. add()/del()/start()/stop() ------------------------------ - -Basic idea: add()/del() adds/deletes events to/from a PMU, and start()/stop() -starts/stop the counter of some event in the PMU. All of them take the same -arguments: *struct perf_event *event* and *int flag*. - -Consider perf as a state machine, then you will find that these functions serve -as the state transition process between those states. -Three states (event->hw.state) are defined: - -* PERF_HES_STOPPED: the counter is stopped -* PERF_HES_UPTODATE: the event->count is up-to-date -* PERF_HES_ARCH: arch-dependent usage ... we don't need this for now - -A normal flow of these state transitions are as follows: - -* A user launches a perf event, resulting in calling to *event_init*. -* When being context-switched in, *add* is called by the perf core, with a flag - PERF_EF_START, which means that the event should be started after it is added. - At this stage, a general event is bound to a physical counter, if any. - The state changes to PERF_HES_STOPPED and PERF_HES_UPTODATE, because it is now - stopped, and the (software) event count does not need updating. - - - *start* is then called, and the counter is enabled. - With flag PERF_EF_RELOAD, it writes an appropriate value to the counter (check - previous section for detail). - Nothing is written if the flag does not contain PERF_EF_RELOAD. - The state now is reset to none, because it is neither stopped nor updated - (the counting already started) - -* When being context-switched out, *del* is called. It then checks out all the - events in the PMU and calls *stop* to update their counts. - - - *stop* is called by *del* - and the perf core with flag PERF_EF_UPDATE, and it often shares the same - subroutine as *read* with the same logic. - The state changes to PERF_HES_STOPPED and PERF_HES_UPTODATE, again. - - - Life cycle of these two pairs: *add* and *del* are called repeatedly as - tasks switch in-and-out; *start* and *stop* is also called when the perf core - needs a quick stop-and-start, for instance, when the interrupt period is being - adjusted. - -Current implementation is sufficient for now and can be easily extended to -features in the future. - -A. Related Structures ---------------------- - -* struct pmu: include/linux/perf_event.h -* struct riscv_pmu: arch/riscv/include/asm/perf_event.h - - Both structures are designed to be read-only. - - *struct pmu* defines some function pointer interfaces, and most of them take - *struct perf_event* as a main argument, dealing with perf events according to - perf's internal state machine (check kernel/events/core.c for details). - - *struct riscv_pmu* defines PMU-specific parameters. The naming follows the - convention of all other architectures. - -* struct perf_event: include/linux/perf_event.h -* struct hw_perf_event - - The generic structure that represents perf events, and the hardware-related - details. - -* struct riscv_hw_events: arch/riscv/include/asm/perf_event.h - - The structure that holds the status of events, has two fixed members: - the number of events and the array of the events. - -References ----------- - -[1] https://github.com/riscv/riscv-linux/pull/124 - -[2] https://groups.google.com/a/groups.riscv.org/forum/#!topic/sw-dev/f19TmCNP6yA diff --git a/MAINTAINERS b/MAINTAINERS index ada123bed41e..5fcde9581539 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16714,6 +16714,15 @@ S: Maintained F: drivers/mtd/nand/raw/r852.c F: drivers/mtd/nand/raw/r852.h +RISC-V PMU DRIVERS +M: Atish Patra +R: Anup Patel +L: linux-riscv@lists.infradead.org +S: Supported +F: drivers/perf/riscv_pmu.c +F: drivers/perf/riscv_pmu_legacy.c +F: drivers/perf/riscv_pmu_sbi.c + RISC-V ARCHITECTURE M: Paul Walmsley M: Palmer Dabbelt @@ -16728,8 +16737,10 @@ K: riscv RISC-V/MICROCHIP POLARFIRE SOC SUPPORT M: Lewis Hanly +M: Conor Dooley L: linux-riscv@lists.infradead.org S: Supported +F: arch/riscv/boot/dts/microchip/ F: drivers/mailbox/mailbox-mpfs.c F: drivers/soc/microchip/ F: include/soc/microchip/mpfs.h @@ -17026,9 +17037,7 @@ L: linux-s390@vger.kernel.org S: Supported W: http://www.ibm.com/developerworks/linux/linux390/ F: Documentation/s390/vfio-ap.rst -F: drivers/s390/crypto/vfio_ap_drv.c -F: drivers/s390/crypto/vfio_ap_ops.c -F: drivers/s390/crypto/vfio_ap_private.h +F: drivers/s390/crypto/vfio_ap* S390 VFIO-CCW DRIVER M: Eric Farman diff --git a/arch/Kconfig b/arch/Kconfig index 9af3a597cc67..84bc1de02720 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -202,6 +202,9 @@ config HAVE_FUNCTION_ERROR_INJECTION config HAVE_NMI bool +config HAVE_FUNCTION_DESCRIPTORS + bool + config TRACE_IRQFLAGS_SUPPORT bool diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index b2317324827f..cb93769a9f2a 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -36,6 +36,7 @@ config IA64 select HAVE_SETUP_PER_CPU_AREA select TTY select HAVE_ARCH_TRACEHOOK + select HAVE_FUNCTION_DESCRIPTORS select HAVE_VIRT_CPU_ACCOUNTING select HUGETLB_PAGE_SIZE_VARIABLE if HUGETLB_PAGE select VIRT_TO_BUS diff --git a/arch/ia64/include/asm/elf.h b/arch/ia64/include/asm/elf.h index 6629301a2620..2ef5f9966ad1 100644 --- a/arch/ia64/include/asm/elf.h +++ b/arch/ia64/include/asm/elf.h @@ -226,7 +226,7 @@ struct got_entry { * Layout of the Function Descriptor */ struct fdesc { - uint64_t ip; + uint64_t addr; uint64_t gp; }; diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h index 3a033d2008b3..8e0875cf6071 100644 --- a/arch/ia64/include/asm/sections.h +++ b/arch/ia64/include/asm/sections.h @@ -9,6 +9,9 @@ #include #include + +typedef struct fdesc func_desc_t; + #include extern char __phys_per_cpu_start[]; @@ -27,25 +30,4 @@ extern char __start_gate_brl_fsys_bubble_down_patchlist[], __end_gate_brl_fsys_b extern char __start_unwind[], __end_unwind[]; extern char __start_ivt_text[], __end_ivt_text[]; -#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 - -#undef dereference_function_descriptor -static inline void *dereference_function_descriptor(void *ptr) -{ - struct fdesc *desc = ptr; - void *p; - - if (!get_kernel_nofault(p, (void *)&desc->ip)) - ptr = p; - return ptr; -} - -#undef dereference_kernel_function_descriptor -static inline void *dereference_kernel_function_descriptor(void *ptr) -{ - if (ptr < (void *)__start_opd || ptr >= (void *)__end_opd) - return ptr; - return dereference_function_descriptor(ptr); -} - #endif /* _ASM_IA64_SECTIONS_H */ diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c index 360f36b0eb3f..8f62cf97f691 100644 --- a/arch/ia64/kernel/module.c +++ b/arch/ia64/kernel/module.c @@ -602,15 +602,15 @@ get_fdesc (struct module *mod, uint64_t value, int *okp) return value; /* Look for existing function descriptor. */ - while (fdesc->ip) { - if (fdesc->ip == value) + while (fdesc->addr) { + if (fdesc->addr == value) return (uint64_t)fdesc; if ((uint64_t) ++fdesc >= mod->arch.opd->sh_addr + mod->arch.opd->sh_size) BUG(); } /* Create new one */ - fdesc->ip = value; + fdesc->addr = value; fdesc->gp = mod->arch.gp; return (uint64_t) fdesc; } diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms index 30193bcf9caa..1bc4282af064 100644 --- a/arch/mips/Kbuild.platforms +++ b/arch/mips/Kbuild.platforms @@ -32,7 +32,6 @@ platform-$(CONFIG_SIBYTE_SB1250) += sibyte/ platform-$(CONFIG_SIBYTE_BCM1x55) += sibyte/ platform-$(CONFIG_SIBYTE_BCM1x80) += sibyte/ platform-$(CONFIG_SNI_RM) += sni/ -platform-$(CONFIG_MACH_TX39XX) += txx9/ platform-$(CONFIG_MACH_TX49XX) += txx9/ platform-$(CONFIG_MACH_VR41XX) += vr41xx/ diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 058446f01487..de3b32a507d2 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -4,6 +4,7 @@ config MIPS default y select ARCH_32BIT_OFF_T if !64BIT select ARCH_BINFMT_ELF_STATE if MIPS_FP_SUPPORT + select ARCH_HAS_CURRENT_STACK_POINTER if !CC_IS_CLANG || CLANG_VERSION >= 140000 select ARCH_HAS_DEBUG_VIRTUAL if !64BIT select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_KCOV @@ -101,6 +102,7 @@ config MIPS select TRACE_IRQFLAGS_SUPPORT select VIRT_TO_BUS select ARCH_HAS_ELFCORE_COMPAT + select HAVE_ARCH_KCSAN if 64BIT config MIPS_FIXUP_BIGPHYS_ADDR bool @@ -511,6 +513,7 @@ config MACH_LOONGSON64 select USE_OF select BUILTIN_DTB select PCI_HOST_GENERIC + select HAVE_ARCH_NODEDATA_EXTENSION if NUMA help This enables the support of Loongson-2/3 family of machines. @@ -707,6 +710,7 @@ config SGI_IP27 select WAR_R10000_LLSC select MIPS_L1_CACHE_SHIFT_7 select NUMA + select HAVE_ARCH_NODEDATA_EXTENSION help This are the SGI Origin 200, Origin 2000 and Onyx 2 Graphics workstations. To compile a Linux kernel that runs on these, say Y @@ -926,9 +930,6 @@ config SNI_RM Technology and now in turn merged with Fujitsu. Say Y here to support this machine type. -config MACH_TX39XX - bool "Toshiba TX39 series based machines" - config MACH_TX49XX bool "Toshiba TX49 series based machines" select WAR_TX49XX_ICACHE_INDEX_INV @@ -1343,19 +1344,14 @@ config LOONGSON3_ENHANCEMENT new Loongson-3 machines only, please say 'Y' here. config CPU_LOONGSON3_WORKAROUNDS - bool "Old Loongson-3 LLSC Workarounds" + bool "Loongson-3 LLSC Workarounds" default y if SMP depends on CPU_LOONGSON64 help Loongson-3 processors have the llsc issues which require workarounds. Without workarounds the system may hang unexpectedly. - Newer Loongson-3 will fix these issues and no workarounds are needed. - The workarounds have no significant side effect on them but may - decrease the performance of the system so this option should be - disabled unless the kernel is intended to be run on old systems. - - If unsure, please say Y. + Say Y, unless you know what you are doing. config CPU_LOONGSON3_CPUCFG_EMULATION bool "Emulate the CPUCFG instruction on older Loongson cores" @@ -1583,12 +1579,6 @@ config CPU_R3000 might be a safe bet. If the resulting kernel does not work, try to recompile with R3000. -config CPU_TX39XX - bool "R39XX" - depends on SYS_HAS_CPU_TX39XX - select CPU_SUPPORTS_32BIT_KERNEL - select CPU_R3K_TLB - config CPU_VR41XX bool "R41xx" depends on SYS_HAS_CPU_VR41XX @@ -1915,9 +1905,6 @@ config SYS_HAS_CPU_P5600 config SYS_HAS_CPU_R3000 bool -config SYS_HAS_CPU_TX39XX - bool - config SYS_HAS_CPU_VR41XX bool @@ -2148,7 +2135,7 @@ config PAGE_SIZE_8KB config PAGE_SIZE_16KB bool "16kB" - depends on !CPU_R3000 && !CPU_TX39XX + depends on !CPU_R3000 help Using 16kB page size will result in higher performance kernel at the price of higher memory consumption. This option is available on @@ -2167,7 +2154,7 @@ config PAGE_SIZE_32KB config PAGE_SIZE_64KB bool "64kB" - depends on !CPU_R3000 && !CPU_TX39XX + depends on !CPU_R3000 help Using 64kB page size will result in higher performance kernel at the price of higher memory consumption. This option is available on @@ -2235,7 +2222,7 @@ config CPU_HAS_PREFETCH config CPU_GENERIC_DUMP_TLB bool - default y if !(CPU_R3000 || CPU_TX39XX) + default y if !CPU_R3000 config MIPS_FP_SUPPORT bool "Floating Point support" if EXPERT @@ -2255,7 +2242,7 @@ config MIPS_FP_SUPPORT config CPU_R2300_FPU bool depends on MIPS_FP_SUPPORT - default y if CPU_R3000 || CPU_TX39XX + default y if CPU_R3000 config CPU_R3K_TLB bool @@ -2520,13 +2507,51 @@ config CPU_HAS_SYNC # # CPU non-features # + +# Work around the "daddi" and "daddiu" CPU errata: +# +# - The `daddi' instruction fails to trap on overflow. +# "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0", +# erratum #23 +# +# - The `daddiu' instruction can produce an incorrect result. +# "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0", +# erratum #41 +# "MIPS R4000MC Errata, Processor Revision 2.2 and 3.0", erratum +# #15 +# "MIPS R4400PC/SC Errata, Processor Revision 1.0", erratum #7 +# "MIPS R4400MC Errata, Processor Revision 1.0", erratum #5 config CPU_DADDI_WORKAROUNDS bool +# Work around certain R4000 CPU errata (as implemented by GCC): +# +# - A double-word or a variable shift may give an incorrect result +# if executed immediately after starting an integer division: +# "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0", +# erratum #28 +# "MIPS R4000MC Errata, Processor Revision 2.2 and 3.0", erratum +# #19 +# +# - A double-word or a variable shift may give an incorrect result +# if executed while an integer multiplication is in progress: +# "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0", +# errata #16 & #28 +# +# - An integer division may give an incorrect result if started in +# a delay slot of a taken branch or a jump: +# "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0", +# erratum #52 config CPU_R4000_WORKAROUNDS bool select CPU_R4400_WORKAROUNDS +# Work around certain R4400 CPU errata (as implemented by GCC): +# +# - A double-word or a variable shift may give an incorrect result +# if executed immediately after starting an integer division: +# "MIPS R4400MC Errata, Processor Revision 1.0", erratum #10 +# "MIPS R4400MC Errata, Processor Revision 2.0 & 3.0", erratum #4 config CPU_R4400_WORKAROUNDS bool @@ -2536,13 +2561,13 @@ config CPU_R4X00_BUGS64 config MIPS_ASID_SHIFT int - default 6 if CPU_R3000 || CPU_TX39XX + default 6 if CPU_R3000 default 0 config MIPS_ASID_BITS int default 0 if MIPS_ASID_BITS_VARIABLE - default 6 if CPU_R3000 || CPU_TX39XX + default 6 if CPU_R3000 default 8 config MIPS_ASID_BITS_VARIABLE @@ -2685,6 +2710,9 @@ config NUMA config SYS_SUPPORTS_NUMA bool +config HAVE_ARCH_NODEDATA_EXTENSION + bool + config RELOCATABLE bool "Relocatable kernel" depends on SYS_SUPPORTS_RELOCATABLE @@ -3202,6 +3230,10 @@ config MIPS32_N32 If unsure, say N. +config CC_HAS_MNO_BRANCH_LIKELY + def_bool y + depends on $(cc-option,-mno-branch-likely) + menu "Power management options" config ARCH_HIBERNATION_POSSIBLE diff --git a/arch/mips/Makefile b/arch/mips/Makefile index e036fc025ccc..bb236de13133 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -158,7 +158,6 @@ cflags-y += $(call as-option,-Wa$(comma)-mno-fix-loongson3-llsc,) # CPU-dependent compiler/assembler options for optimization. # cflags-$(CONFIG_CPU_R3000) += -march=r3000 -cflags-$(CONFIG_CPU_TX39XX) += -march=r3900 cflags-$(CONFIG_CPU_R4300) += -march=r4300 -Wa,--trap cflags-$(CONFIG_CPU_VR41XX) += -march=r4100 -Wa,--trap cflags-$(CONFIG_CPU_R4X00) += -march=r4600 -Wa,--trap @@ -340,14 +339,12 @@ drivers-$(CONFIG_PM) += arch/mips/power/ boot-y := vmlinux.bin boot-y += vmlinux.ecoff boot-y += vmlinux.srec -ifeq ($(shell expr $(load-y) \< 0xffffffff80000000 2> /dev/null), 0) boot-y += uImage boot-y += uImage.bin boot-y += uImage.bz2 boot-y += uImage.gz boot-y += uImage.lzma boot-y += uImage.lzo -endif boot-y += vmlinux.itb boot-y += vmlinux.gz.itb boot-y += vmlinux.bz2.itb @@ -359,9 +356,7 @@ bootz-y := vmlinuz bootz-y += vmlinuz.bin bootz-y += vmlinuz.ecoff bootz-y += vmlinuz.srec -ifeq ($(shell expr $(zload-y) \< 0xffffffff80000000 2> /dev/null), 0) bootz-y += uzImage.bin -endif bootz-y += vmlinuz.itb # diff --git a/arch/mips/ath25/ar2315.c b/arch/mips/ath25/ar2315.c index 76e43a73ba1b..8ccf167c167e 100644 --- a/arch/mips/ath25/ar2315.c +++ b/arch/mips/ath25/ar2315.c @@ -112,7 +112,7 @@ static int ar2315_misc_irq_map(struct irq_domain *d, unsigned irq, return 0; } -static struct irq_domain_ops ar2315_misc_irq_domain_ops = { +static const struct irq_domain_ops ar2315_misc_irq_domain_ops = { .map = ar2315_misc_irq_map, }; diff --git a/arch/mips/ath25/ar5312.c b/arch/mips/ath25/ar5312.c index 822b639dbd1e..cfa103518113 100644 --- a/arch/mips/ath25/ar5312.c +++ b/arch/mips/ath25/ar5312.c @@ -116,7 +116,7 @@ static int ar5312_misc_irq_map(struct irq_domain *d, unsigned irq, return 0; } -static struct irq_domain_ops ar5312_misc_irq_domain_ops = { +static const struct irq_domain_ops ar5312_misc_irq_domain_ops = { .map = ar5312_misc_irq_map, }; diff --git a/arch/mips/ath79/early_printk.c b/arch/mips/ath79/early_printk.c index 782732cd1a2b..8751d067f98f 100644 --- a/arch/mips/ath79/early_printk.c +++ b/arch/mips/ath79/early_printk.c @@ -121,6 +121,7 @@ static void prom_putchar_init(void) case REV_ID_MAJOR_QCA9558: case REV_ID_MAJOR_TP9343: case REV_ID_MAJOR_QCA956X: + case REV_ID_MAJOR_QCN550X: _prom_putchar = prom_putchar_ar71xx; break; diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c index 0ac435fe2dc9..4e18cdcf65a0 100644 --- a/arch/mips/ath79/setup.c +++ b/arch/mips/ath79/setup.c @@ -168,6 +168,12 @@ static void __init ath79_detect_sys_type(void) rev = id & QCA956X_REV_ID_REVISION_MASK; break; + case REV_ID_MAJOR_QCN550X: + ath79_soc = ATH79_SOC_QCA956X; + chip = "550X"; + rev = id & QCA956X_REV_ID_REVISION_MASK; + break; + case REV_ID_MAJOR_TP9343: ath79_soc = ATH79_SOC_TP9343; chip = "9343"; @@ -263,8 +269,3 @@ void __init arch_init_irq(void) { irqchip_init(); } - -void __init device_tree_init(void) -{ - unflatten_and_copy_device_tree(); -} diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index 5a15d51e8884..6cc28173bee8 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile @@ -38,6 +38,7 @@ KBUILD_AFLAGS := $(KBUILD_AFLAGS) -D__ASSEMBLY__ \ KCOV_INSTRUMENT := n GCOV_PROFILE := n UBSAN_SANITIZE := n +KCSAN_SANITIZE := n # decompressor objects (linked with vmlinuz) vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o $(obj)/bswapsi.o diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c index aae1346a509a..5b38a802e101 100644 --- a/arch/mips/boot/compressed/decompress.c +++ b/arch/mips/boot/compressed/decompress.c @@ -26,7 +26,7 @@ unsigned long free_mem_ptr; unsigned long free_mem_end_ptr; /* The linker tells us where the image is. */ -extern unsigned char __image_begin, __image_end; +extern unsigned char __image_begin[], __image_end[]; /* debug interfaces */ #ifdef CONFIG_DEBUG_ZBOOT @@ -91,9 +91,9 @@ void decompress_kernel(unsigned long boot_heap_start) { unsigned long zimage_start, zimage_size; - zimage_start = (unsigned long)(&__image_begin); - zimage_size = (unsigned long)(&__image_end) - - (unsigned long)(&__image_begin); + zimage_start = (unsigned long)(__image_begin); + zimage_size = (unsigned long)(__image_end) - + (unsigned long)(__image_begin); puts("zimage at: "); puthex(zimage_start); @@ -121,7 +121,7 @@ void decompress_kernel(unsigned long boot_heap_start) dtb_size = fdt_totalsize((void *)&__appended_dtb); /* last four bytes is always image size in little endian */ - image_size = get_unaligned_le32((void *)&__image_end - 4); + image_size = get_unaligned_le32((void *)__image_end - 4); /* The device tree's address must be properly aligned */ image_size = ALIGN(image_size, STRUCT_ALIGNMENT); diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi index 3f9ea47a10cd..b998301f179c 100644 --- a/arch/mips/boot/dts/ingenic/jz4780.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi @@ -510,7 +510,7 @@ #address-cells = <1>; #size-cells = <1>; - eth0_addr: eth-mac-addr@0x22 { + eth0_addr: eth-mac-addr@22 { reg = <0x22 0x6>; }; }; diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index 844f882096e6..07d7ff5a981d 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -1274,13 +1274,13 @@ static int octeon_irq_gpio_map(struct irq_domain *d, return r; } -static struct irq_domain_ops octeon_irq_domain_ciu_ops = { +static const struct irq_domain_ops octeon_irq_domain_ciu_ops = { .map = octeon_irq_ciu_map, .unmap = octeon_irq_free_cd, .xlate = octeon_irq_ciu_xlat, }; -static struct irq_domain_ops octeon_irq_domain_gpio_ops = { +static const struct irq_domain_ops octeon_irq_domain_gpio_ops = { .map = octeon_irq_gpio_map, .unmap = octeon_irq_free_cd, .xlate = octeon_irq_gpio_xlat, @@ -1974,7 +1974,7 @@ static int octeon_irq_ciu2_map(struct irq_domain *d, return 0; } -static struct irq_domain_ops octeon_irq_domain_ciu2_ops = { +static const struct irq_domain_ops octeon_irq_domain_ciu2_ops = { .map = octeon_irq_ciu2_map, .unmap = octeon_irq_free_cd, .xlate = octeon_irq_ciu2_xlat, @@ -2226,7 +2226,7 @@ static int octeon_irq_cib_map(struct irq_domain *d, return 0; } -static struct irq_domain_ops octeon_irq_domain_cib_ops = { +static const struct irq_domain_ops octeon_irq_domain_cib_ops = { .map = octeon_irq_cib_map, .unmap = octeon_irq_free_cd, .xlate = octeon_irq_cib_xlat, @@ -2578,7 +2578,7 @@ static int octeon_irq_ciu3_map(struct irq_domain *d, return octeon_irq_ciu3_mapx(d, virq, hw, &octeon_irq_chip_ciu3); } -static struct irq_domain_ops octeon_dflt_domain_ciu3_ops = { +static const struct irq_domain_ops octeon_dflt_domain_ciu3_ops = { .map = octeon_irq_ciu3_map, .unmap = octeon_irq_free_cd, .xlate = octeon_irq_ciu3_xlat, diff --git a/arch/mips/configs/jmr3927_defconfig b/arch/mips/configs/jmr3927_defconfig deleted file mode 100644 index 24b96faf9b4e..000000000000 --- a/arch/mips/configs/jmr3927_defconfig +++ /dev/null @@ -1,50 +0,0 @@ -CONFIG_SYSVIPC=y -CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EXPERT=y -CONFIG_SLAB=y -CONFIG_MACH_TX39XX=y -CONFIG_TOSHIBA_JMR3927=y -# CONFIG_SECCOMP is not set -CONFIG_PCI=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_DIAG is not set -# CONFIG_IPV6 is not set -CONFIG_MTD=y -CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CFI=y -CONFIG_MTD_JEDECPROBE=y -CONFIG_MTD_CFI_AMDSTD=y -CONFIG_MTD_PHYSMAP=y -CONFIG_NETDEVICES=y -CONFIG_TC35815=y -# CONFIG_INPUT is not set -# CONFIG_SERIO is not set -# CONFIG_VT is not set -# CONFIG_UNIX98_PTYS is not set -CONFIG_SERIAL_NONSTANDARD=y -CONFIG_SERIAL_TXX9_CONSOLE=y -CONFIG_SERIAL_TXX9_STDSERIAL=y -# CONFIG_HW_RANDOM is not set -# CONFIG_HWMON is not set -CONFIG_WATCHDOG=y -CONFIG_TXX9_WDT=y -# CONFIG_USB_SUPPORT is not set -CONFIG_NEW_LEDS=y -CONFIG_LEDS_CLASS=y -CONFIG_LEDS_GPIO=y -CONFIG_LEDS_TRIGGERS=y -CONFIG_LEDS_TRIGGER_HEARTBEAT=y -CONFIG_RTC_CLASS=y -CONFIG_RTC_DRV_DS1742=y -CONFIG_PROC_KCORE=y -# CONFIG_MISC_FILESYSTEMS is not set -CONFIG_NFS_FS=y -CONFIG_ROOT_NFS=y diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig index 1c220d152a50..7a5bdd236a2a 100644 --- a/arch/mips/configs/malta_defconfig +++ b/arch/mips/configs/malta_defconfig @@ -4,6 +4,7 @@ CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=15 CONFIG_NAMESPACES=y CONFIG_RELAY=y +CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y # CONFIG_COMPAT_BRK is not set CONFIG_SLAB=y diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S index ea5b5a83f1e1..011d1d678840 100644 --- a/arch/mips/dec/int-handler.S +++ b/arch/mips/dec/int-handler.S @@ -131,7 +131,7 @@ */ mfc0 t0,CP0_CAUSE # get pending interrupts mfc0 t1,CP0_STATUS -#ifdef CONFIG_32BIT +#if defined(CONFIG_32BIT) && defined(CONFIG_MIPS_FP_SUPPORT) lw t2,cpu_fpu_mask #endif andi t0,ST0_IM # CAUSE.CE may be non-zero! @@ -139,7 +139,7 @@ beqz t0,spurious -#ifdef CONFIG_32BIT +#if defined(CONFIG_32BIT) && defined(CONFIG_MIPS_FP_SUPPORT) and t2,t0 bnez t2,fpu # handle FPU immediately #endif @@ -280,7 +280,7 @@ handle_it: j dec_irq_dispatch nop -#ifdef CONFIG_32BIT +#if defined(CONFIG_32BIT) && defined(CONFIG_MIPS_FP_SUPPORT) fpu: lw t0,fpu_kstat_irq nop diff --git a/arch/mips/dec/prom/Makefile b/arch/mips/dec/prom/Makefile index d95016016b42..2bad87551203 100644 --- a/arch/mips/dec/prom/Makefile +++ b/arch/mips/dec/prom/Makefile @@ -6,4 +6,4 @@ lib-y += init.o memory.o cmdline.o identify.o console.o -lib-$(CONFIG_32BIT) += locore.o +lib-$(CONFIG_CPU_R3000) += locore.o diff --git a/arch/mips/dec/setup.c b/arch/mips/dec/setup.c index a8a30bb1dee8..82b00e45ce50 100644 --- a/arch/mips/dec/setup.c +++ b/arch/mips/dec/setup.c @@ -746,7 +746,8 @@ void __init arch_init_irq(void) dec_interrupt[DEC_IRQ_HALT] = -1; /* Register board interrupts: FPU and cascade. */ - if (dec_interrupt[DEC_IRQ_FPU] >= 0 && cpu_has_fpu) { + if (IS_ENABLED(CONFIG_MIPS_FP_SUPPORT) && + dec_interrupt[DEC_IRQ_FPU] >= 0 && cpu_has_fpu) { struct irq_desc *desc_fpu; int irq_fpu; diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index 3d71081afc55..de8cb2ccb781 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -120,9 +120,6 @@ #ifndef cpu_has_4k_cache #define cpu_has_4k_cache __isa_ge_or_opt(1, MIPS_CPU_4K_CACHE) #endif -#ifndef cpu_has_tx39_cache -#define cpu_has_tx39_cache __opt(MIPS_CPU_TX39_CACHE) -#endif #ifndef cpu_has_octeon_cache #define cpu_has_octeon_cache 0 #endif diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h index 5efe8c8b854e..5582ff0c247e 100644 --- a/arch/mips/include/asm/cpu-type.h +++ b/arch/mips/include/asm/cpu-type.h @@ -105,12 +105,6 @@ static inline int __pure __get_cpu_type(const int cpu_type) case CPU_R3081E: #endif -#ifdef CONFIG_SYS_HAS_CPU_TX39XX - case CPU_TX3912: - case CPU_TX3922: - case CPU_TX3927: -#endif - #ifdef CONFIG_SYS_HAS_CPU_VR41XX case CPU_VR41XX: case CPU_VR4111: diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h index 5c2f8d9cb7cf..00a3fc7d778d 100644 --- a/arch/mips/include/asm/cpu.h +++ b/arch/mips/include/asm/cpu.h @@ -309,11 +309,6 @@ enum cpu_type_enum { CPU_VR4122, CPU_VR4131, CPU_VR4133, CPU_VR4181, CPU_VR4181A, CPU_RM7000, CPU_SR71000, CPU_TX49XX, - /* - * TX3900 class processors - */ - CPU_TX3912, CPU_TX3922, CPU_TX3927, - /* * MIPS32 class processors */ @@ -367,7 +362,6 @@ enum cpu_type_enum { #define MIPS_CPU_4KEX BIT_ULL( 1) /* "R4K" exception model */ #define MIPS_CPU_3K_CACHE BIT_ULL( 2) /* R3000-style caches */ #define MIPS_CPU_4K_CACHE BIT_ULL( 3) /* R4000-style caches */ -#define MIPS_CPU_TX39_CACHE BIT_ULL( 4) /* TX3900-style caches */ #define MIPS_CPU_FPU BIT_ULL( 5) /* CPU has FPU */ #define MIPS_CPU_32FPR BIT_ULL( 6) /* 32 dbl. prec. FP registers */ #define MIPS_CPU_COUNTER BIT_ULL( 7) /* Cycle count/compare */ diff --git a/arch/mips/include/asm/dec/prom.h b/arch/mips/include/asm/dec/prom.h index 62c7dfb90e06..1e1247add1cf 100644 --- a/arch/mips/include/asm/dec/prom.h +++ b/arch/mips/include/asm/dec/prom.h @@ -43,16 +43,11 @@ */ #define REX_PROM_MAGIC 0x30464354 -#ifdef CONFIG_64BIT - -#define prom_is_rex(magic) 1 /* KN04 and KN05 are REX PROMs. */ - -#else /* !CONFIG_64BIT */ - -#define prom_is_rex(magic) ((magic) == REX_PROM_MAGIC) - -#endif /* !CONFIG_64BIT */ - +/* KN04 and KN05 are REX PROMs, so only do the check for R3k systems. */ +static inline bool prom_is_rex(u32 magic) +{ + return !IS_ENABLED(CONFIG_CPU_R3000) || magic == REX_PROM_MAGIC; +} /* * 3MIN/MAXINE PROM entry points for DS5000/1xx's, DS5000/xx's and diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h index 8612a7e42d78..05832eb240fa 100644 --- a/arch/mips/include/asm/futex.h +++ b/arch/mips/include/asm/futex.h @@ -17,7 +17,6 @@ #include #include #include -#include #define arch_futex_atomic_op_inuser arch_futex_atomic_op_inuser #define futex_atomic_cmpxchg_inatomic futex_atomic_cmpxchg_inatomic diff --git a/arch/mips/include/asm/isadep.h b/arch/mips/include/asm/isadep.h index d1683202399b..8fc1e3ae8d0c 100644 --- a/arch/mips/include/asm/isadep.h +++ b/arch/mips/include/asm/isadep.h @@ -10,7 +10,7 @@ #ifndef __ASM_ISADEP_H #define __ASM_ISADEP_H -#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) +#if defined(CONFIG_CPU_R3000) /* * R2000 or R3000 */ diff --git a/arch/mips/include/asm/mach-ath25/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ath25/cpu-feature-overrides.h index a54f20d956a2..ec3604c44ef2 100644 --- a/arch/mips/include/asm/mach-ath25/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ath25/cpu-feature-overrides.h @@ -18,7 +18,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_sb1_cache 0 #define cpu_has_fpu 0 #define cpu_has_32fpr 0 diff --git a/arch/mips/include/asm/mach-ath79/ar71xx_regs.h b/arch/mips/include/asm/mach-ath79/ar71xx_regs.h index 1f9e571af67c..5f837060724e 100644 --- a/arch/mips/include/asm/mach-ath79/ar71xx_regs.h +++ b/arch/mips/include/asm/mach-ath79/ar71xx_regs.h @@ -862,6 +862,7 @@ #define REV_ID_MAJOR_QCA9558 0x1130 #define REV_ID_MAJOR_TP9343 0x0150 #define REV_ID_MAJOR_QCA956X 0x1150 +#define REV_ID_MAJOR_QCN550X 0x2170 #define AR71XX_REV_ID_MINOR_MASK 0x3 #define AR71XX_REV_ID_MINOR_AR7130 0x0 diff --git a/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h index 79ab3ad9fee8..44fd44a5fc42 100644 --- a/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h @@ -16,7 +16,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_sb1_cache 0 #define cpu_has_fpu 0 #define cpu_has_32fpr 0 diff --git a/arch/mips/include/asm/mach-au1x00/cpu-feature-overrides.h b/arch/mips/include/asm/mach-au1x00/cpu-feature-overrides.h index e6e527224a15..3c200303ae55 100644 --- a/arch/mips/include/asm/mach-au1x00/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-au1x00/cpu-feature-overrides.h @@ -21,7 +21,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_fpu 0 #define cpu_has_32fpr 0 #define cpu_has_counter 1 diff --git a/arch/mips/include/asm/mach-bcm47xx/cpu-feature-overrides.h b/arch/mips/include/asm/mach-bcm47xx/cpu-feature-overrides.h index b23ff47ea475..69899c1e122d 100644 --- a/arch/mips/include/asm/mach-bcm47xx/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-bcm47xx/cpu-feature-overrides.h @@ -6,7 +6,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_fpu 0 #define cpu_has_32fpr 0 #define cpu_has_counter 1 diff --git a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h index 513270c8adb9..9151dcd9d0d5 100644 --- a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h @@ -21,7 +21,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 0 -#define cpu_has_tx39_cache 0 #define cpu_has_counter 1 #define cpu_has_watch 1 #define cpu_has_divec 1 diff --git a/arch/mips/include/asm/mach-cobalt/cpu-feature-overrides.h b/arch/mips/include/asm/mach-cobalt/cpu-feature-overrides.h index 291fe90aafa5..03192458471d 100644 --- a/arch/mips/include/asm/mach-cobalt/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-cobalt/cpu-feature-overrides.h @@ -13,7 +13,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_32fpr 1 #define cpu_has_counter 1 #define cpu_has_watch 0 diff --git a/arch/mips/include/asm/mach-dec/cpu-feature-overrides.h b/arch/mips/include/asm/mach-dec/cpu-feature-overrides.h index 1896e88f6000..3ddc4b4dca26 100644 --- a/arch/mips/include/asm/mach-dec/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-dec/cpu-feature-overrides.h @@ -17,7 +17,6 @@ #define cpu_has_rixiex 0 #define cpu_has_maar 0 #define cpu_has_rw_llb 0 -#define cpu_has_tx39_cache 0 #define cpu_has_divec 0 #define cpu_has_prefetch 0 #define cpu_has_mcheck 0 diff --git a/arch/mips/include/asm/mach-ingenic/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ingenic/cpu-feature-overrides.h index 7c5e576f9d96..7ace50127f5a 100644 --- a/arch/mips/include/asm/mach-ingenic/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ingenic/cpu-feature-overrides.h @@ -11,7 +11,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_counter 0 #define cpu_has_watch 1 #define cpu_has_divec 1 diff --git a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h index 58f829c9b6c7..c8385c4e8664 100644 --- a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h @@ -25,7 +25,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_fpu 1 #define cpu_has_nofpuex 0 #define cpu_has_32fpr 1 diff --git a/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h index 49a93e82c252..8ad0c424a9af 100644 --- a/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h @@ -28,7 +28,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_fpu 1 #define cpu_has_nofpuex 0 #define cpu_has_32fpr 1 diff --git a/arch/mips/include/asm/mach-lantiq/falcon/cpu-feature-overrides.h b/arch/mips/include/asm/mach-lantiq/falcon/cpu-feature-overrides.h index 10226976f7b7..22607e61e57b 100644 --- a/arch/mips/include/asm/mach-lantiq/falcon/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-lantiq/falcon/cpu-feature-overrides.h @@ -15,7 +15,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_sb1_cache 0 #define cpu_has_fpu 0 #define cpu_has_32fpr 0 diff --git a/arch/mips/include/asm/mach-loongson2ef/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson2ef/cpu-feature-overrides.h index b2ee859ca0b7..eb0d1cfb9f3b 100644 --- a/arch/mips/include/asm/mach-loongson2ef/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-loongson2ef/cpu-feature-overrides.h @@ -34,7 +34,6 @@ #define cpu_has_mipsmt 0 #define cpu_has_smartmips 0 #define cpu_has_tlb 1 -#define cpu_has_tx39_cache 0 #define cpu_has_vce 0 #define cpu_has_veic 0 #define cpu_has_vint 0 diff --git a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h index eb181224eb4c..ebace9e4bdc1 100644 --- a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h @@ -36,7 +36,6 @@ #define cpu_has_mipsmt 0 #define cpu_has_smartmips 0 #define cpu_has_tlb 1 -#define cpu_has_tx39_cache 0 #define cpu_has_vce 0 #define cpu_has_veic 0 #define cpu_has_vint 0 diff --git a/arch/mips/include/asm/mach-ralink/mt7620/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/mt7620/cpu-feature-overrides.h index c4579f1705c2..85a62c99a52a 100644 --- a/arch/mips/include/asm/mach-ralink/mt7620/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ralink/mt7620/cpu-feature-overrides.h @@ -16,7 +16,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_sb1_cache 0 #define cpu_has_fpu 0 #define cpu_has_32fpr 0 diff --git a/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h index 168359a0a58d..3c19a94f5432 100644 --- a/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h @@ -17,7 +17,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_sb1_cache 0 #define cpu_has_fpu 0 #define cpu_has_32fpr 0 diff --git a/arch/mips/include/asm/mach-ralink/rt288x/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/rt288x/cpu-feature-overrides.h index fdaf8c9182bc..a850c1e46134 100644 --- a/arch/mips/include/asm/mach-ralink/rt288x/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ralink/rt288x/cpu-feature-overrides.h @@ -16,7 +16,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_sb1_cache 0 #define cpu_has_fpu 0 #define cpu_has_32fpr 0 diff --git a/arch/mips/include/asm/mach-ralink/rt305x/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/rt305x/cpu-feature-overrides.h index 7a385fe784a6..2d75264a9166 100644 --- a/arch/mips/include/asm/mach-ralink/rt305x/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ralink/rt305x/cpu-feature-overrides.h @@ -16,7 +16,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_sb1_cache 0 #define cpu_has_fpu 0 #define cpu_has_32fpr 0 diff --git a/arch/mips/include/asm/mach-ralink/rt3883/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/rt3883/cpu-feature-overrides.h index 0a61910f6521..accf2a325343 100644 --- a/arch/mips/include/asm/mach-ralink/rt3883/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ralink/rt3883/cpu-feature-overrides.h @@ -15,7 +15,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_sb1_cache 0 #define cpu_has_fpu 0 #define cpu_has_32fpr 0 diff --git a/arch/mips/include/asm/mach-rc32434/cpu-feature-overrides.h b/arch/mips/include/asm/mach-rc32434/cpu-feature-overrides.h index 8539ccfb69b7..36d45c9cf09c 100644 --- a/arch/mips/include/asm/mach-rc32434/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-rc32434/cpu-feature-overrides.h @@ -18,7 +18,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_tx39_cache 0 #define cpu_has_sb1_cache 0 #define cpu_has_fpu 0 #define cpu_has_32fpr 0 diff --git a/arch/mips/include/asm/mach-tx39xx/ioremap.h b/arch/mips/include/asm/mach-tx39xx/ioremap.h deleted file mode 100644 index 157a7292397e..000000000000 --- a/arch/mips/include/asm/mach-tx39xx/ioremap.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * include/asm-mips/mach-tx39xx/ioremap.h - */ -#ifndef __ASM_MACH_TX39XX_IOREMAP_H -#define __ASM_MACH_TX39XX_IOREMAP_H - -#include - -static inline void __iomem *plat_ioremap(phys_addr_t offset, unsigned long size, - unsigned long flags) -{ -#define TXX9_DIRECTMAP_BASE 0xff000000ul - if (offset >= TXX9_DIRECTMAP_BASE && - offset < TXX9_DIRECTMAP_BASE + 0xff0000) - return (void __iomem *)offset; - return NULL; -} - -static inline int plat_iounmap(const volatile void __iomem *addr) -{ - return (unsigned long)addr >= TXX9_DIRECTMAP_BASE; -} - -#endif /* __ASM_MACH_TX39XX_IOREMAP_H */ diff --git a/arch/mips/include/asm/mach-tx39xx/mangle-port.h b/arch/mips/include/asm/mach-tx39xx/mangle-port.h deleted file mode 100644 index 95be459950f7..000000000000 --- a/arch/mips/include/asm/mach-tx39xx/mangle-port.h +++ /dev/null @@ -1,24 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_MACH_TX39XX_MANGLE_PORT_H -#define __ASM_MACH_TX39XX_MANGLE_PORT_H - -#if defined(CONFIG_TOSHIBA_JMR3927) -extern unsigned long (*__swizzle_addr_b)(unsigned long port); -#define NEEDS_TXX9_SWIZZLE_ADDR_B -#else -#define __swizzle_addr_b(port) (port) -#endif -#define __swizzle_addr_w(port) (port) -#define __swizzle_addr_l(port) (port) -#define __swizzle_addr_q(port) (port) - -#define ioswabb(a, x) (x) -#define __mem_ioswabb(a, x) (x) -#define ioswabw(a, x) le16_to_cpu((__force __le16)(x)) -#define __mem_ioswabw(a, x) (x) -#define ioswabl(a, x) le32_to_cpu((__force __le32)(x)) -#define __mem_ioswabl(a, x) (x) -#define ioswabq(a, x) le64_to_cpu((__force __le64)(x)) -#define __mem_ioswabq(a, x) (x) - -#endif /* __ASM_MACH_TX39XX_MANGLE_PORT_H */ diff --git a/arch/mips/include/asm/mach-tx39xx/spaces.h b/arch/mips/include/asm/mach-tx39xx/spaces.h deleted file mode 100644 index 151fe7a1cf1d..000000000000 --- a/arch/mips/include/asm/mach-tx39xx/spaces.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1994 - 1999, 2000, 03, 04 Ralf Baechle - * Copyright (C) 2000, 2002 Maciej W. Rozycki - * Copyright (C) 1990, 1999, 2000 Silicon Graphics, Inc. - */ -#ifndef _ASM_TX39XX_SPACES_H -#define _ASM_TX39XX_SPACES_H - -#define FIXADDR_TOP ((unsigned long)(long)(int)0xfefe0000) - -#include - -#endif /* __ASM_TX39XX_SPACES_H */ diff --git a/arch/mips/include/asm/mipsmtregs.h b/arch/mips/include/asm/mipsmtregs.h index be4cf9d477be..a8d67c2f4f7b 100644 --- a/arch/mips/include/asm/mipsmtregs.h +++ b/arch/mips/include/asm/mipsmtregs.h @@ -9,7 +9,6 @@ #define _ASM_MIPSMTREGS_H #include -#include #ifndef __ASSEMBLY__ diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index 2616353b940c..305651af15b3 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -17,7 +17,6 @@ #include #include #include -#include /* * The following macros are especially useful for __asm__ diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index c7925d0e9874..867e9c3db76e 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -15,6 +15,7 @@ #define __HAVE_ARCH_PMD_ALLOC_ONE #define __HAVE_ARCH_PUD_ALLOC_ONE +#define __HAVE_ARCH_PGD_FREE #include static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, @@ -48,6 +49,11 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) extern void pgd_init(unsigned long page); extern pgd_t *pgd_alloc(struct mm_struct *mm); +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + free_pages((unsigned long)pgd, PGD_ORDER); +} + #define __pte_free_tlb(tlb,pte,address) \ do { \ pgtable_pte_page_dtor(pte); \ diff --git a/arch/mips/include/asm/prom.h b/arch/mips/include/asm/prom.h index c42e07671934..2d74406089d7 100644 --- a/arch/mips/include/asm/prom.h +++ b/arch/mips/include/asm/prom.h @@ -20,9 +20,9 @@ struct boot_param_header; extern void __dt_setup_arch(void *bph); extern int __dt_register_buses(const char *bus0, const char *bus1); -#else /* CONFIG_OF */ +#else /* !CONFIG_USE_OF */ static inline void device_tree_init(void) { } -#endif /* CONFIG_OF */ +#endif /* !CONFIG_USE_OF */ extern char *mips_get_machine_name(void); extern void mips_set_machine_name(const char *name); diff --git a/arch/mips/include/asm/setup.h b/arch/mips/include/asm/setup.h index bb36a400203d..8c56b862fd9c 100644 --- a/arch/mips/include/asm/setup.h +++ b/arch/mips/include/asm/setup.h @@ -16,7 +16,7 @@ static inline void setup_8250_early_printk_port(unsigned long base, unsigned int reg_shift, unsigned int timeout) {} #endif -extern void set_handler(unsigned long offset, void *addr, unsigned long len); +void set_handler(unsigned long offset, const void *addr, unsigned long len); extern void set_uncached_handler(unsigned long offset, void *addr, unsigned long len); typedef void (*vi_handler_t)(void); diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h index aa430a6c68b2..a8705aef47e1 100644 --- a/arch/mips/include/asm/stackframe.h +++ b/arch/mips/include/asm/stackframe.h @@ -42,7 +42,7 @@ cfi_restore \reg \offset \docfi .endm -#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) +#if defined(CONFIG_CPU_R3000) #define STATMASK 0x3f #else #define STATMASK 0x1f @@ -349,7 +349,7 @@ cfi_ld sp, PT_R29, \docfi .endm -#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) +#if defined(CONFIG_CPU_R3000) .macro RESTORE_SOME docfi=0 .set push @@ -478,7 +478,7 @@ .macro KMODE mfc0 t0, CP0_STATUS li t1, ST0_KERNEL_CUMASK | (STATMASK & ~1) -#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) +#if defined(CONFIG_CPU_R3000) andi t2, t0, ST0_IEP srl t2, 2 or t0, t2 diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index 0b17aaa9e012..ecae7470faa4 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -69,6 +69,10 @@ static inline struct thread_info *current_thread_info(void) return __current_thread_info; } +#ifdef CONFIG_ARCH_HAS_CURRENT_STACK_POINTER +register unsigned long current_stack_pointer __asm__("sp"); +#endif + #endif /* !__ASSEMBLY__ */ /* thread information allocation */ diff --git a/arch/mips/include/asm/txx9/boards.h b/arch/mips/include/asm/txx9/boards.h index 70284e90dc53..6897ca4366d5 100644 --- a/arch/mips/include/asm/txx9/boards.h +++ b/arch/mips/include/asm/txx9/boards.h @@ -1,7 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifdef CONFIG_TOSHIBA_JMR3927 -BOARD_VEC(jmr3927_vec) -#endif #ifdef CONFIG_TOSHIBA_RBTX4927 BOARD_VEC(rbtx4927_vec) BOARD_VEC(rbtx4937_vec) diff --git a/arch/mips/include/asm/txx9/jmr3927.h b/arch/mips/include/asm/txx9/jmr3927.h deleted file mode 100644 index aab959dc30ba..000000000000 --- a/arch/mips/include/asm/txx9/jmr3927.h +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Defines for the TJSYS JMR-TX3927 - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2000-2001 Toshiba Corporation - */ -#ifndef __ASM_TXX9_JMR3927_H -#define __ASM_TXX9_JMR3927_H - -#include -#include -#include - -/* CS */ -#define JMR3927_ROMCE0 0x1fc00000 /* 4M */ -#define JMR3927_ROMCE1 0x1e000000 /* 4M */ -#define JMR3927_ROMCE2 0x14000000 /* 16M */ -#define JMR3927_ROMCE3 0x10000000 /* 64M */ -#define JMR3927_ROMCE5 0x1d000000 /* 4M */ -#define JMR3927_SDCS0 0x00000000 /* 32M */ -#define JMR3927_SDCS1 0x02000000 /* 32M */ -/* PCI Direct Mappings */ - -#define JMR3927_PCIMEM 0x08000000 -#define JMR3927_PCIMEM_SIZE 0x08000000 /* 128M */ -#define JMR3927_PCIIO 0x15000000 -#define JMR3927_PCIIO_SIZE 0x01000000 /* 16M */ - -#define JMR3927_SDRAM_SIZE 0x02000000 /* 32M */ -#define JMR3927_PORT_BASE KSEG1 - -/* Address map (virtual address) */ -#define JMR3927_ROM0_BASE (KSEG1 + JMR3927_ROMCE0) -#define JMR3927_ROM1_BASE (KSEG1 + JMR3927_ROMCE1) -#define JMR3927_IOC_BASE (KSEG1 + JMR3927_ROMCE2) -#define JMR3927_PCIMEM_BASE (KSEG1 + JMR3927_PCIMEM) -#define JMR3927_PCIIO_BASE (KSEG1 + JMR3927_PCIIO) - -#define JMR3927_IOC_REV_ADDR (JMR3927_IOC_BASE + 0x00000000) -#define JMR3927_IOC_NVRAMB_ADDR (JMR3927_IOC_BASE + 0x00010000) -#define JMR3927_IOC_LED_ADDR (JMR3927_IOC_BASE + 0x00020000) -#define JMR3927_IOC_DIPSW_ADDR (JMR3927_IOC_BASE + 0x00030000) -#define JMR3927_IOC_BREV_ADDR (JMR3927_IOC_BASE + 0x00040000) -#define JMR3927_IOC_DTR_ADDR (JMR3927_IOC_BASE + 0x00050000) -#define JMR3927_IOC_INTS1_ADDR (JMR3927_IOC_BASE + 0x00080000) -#define JMR3927_IOC_INTS2_ADDR (JMR3927_IOC_BASE + 0x00090000) -#define JMR3927_IOC_INTM_ADDR (JMR3927_IOC_BASE + 0x000a0000) -#define JMR3927_IOC_INTP_ADDR (JMR3927_IOC_BASE + 0x000b0000) -#define JMR3927_IOC_RESET_ADDR (JMR3927_IOC_BASE + 0x000f0000) - -/* Flash ROM */ -#define JMR3927_FLASH_BASE (JMR3927_ROM0_BASE) -#define JMR3927_FLASH_SIZE 0x00400000 - -/* bits for IOC_REV/IOC_BREV (high byte) */ -#define JMR3927_IDT_MASK 0xfc -#define JMR3927_REV_MASK 0x03 -#define JMR3927_IOC_IDT 0xe0 - -/* bits for IOC_INTS1/IOC_INTS2/IOC_INTM/IOC_INTP (high byte) */ -#define JMR3927_IOC_INTB_PCIA 0 -#define JMR3927_IOC_INTB_PCIB 1 -#define JMR3927_IOC_INTB_PCIC 2 -#define JMR3927_IOC_INTB_PCID 3 -#define JMR3927_IOC_INTB_MODEM 4 -#define JMR3927_IOC_INTB_INT6 5 -#define JMR3927_IOC_INTB_INT7 6 -#define JMR3927_IOC_INTB_SOFT 7 -#define JMR3927_IOC_INTF_PCIA (1 << JMR3927_IOC_INTF_PCIA) -#define JMR3927_IOC_INTF_PCIB (1 << JMR3927_IOC_INTB_PCIB) -#define JMR3927_IOC_INTF_PCIC (1 << JMR3927_IOC_INTB_PCIC) -#define JMR3927_IOC_INTF_PCID (1 << JMR3927_IOC_INTB_PCID) -#define JMR3927_IOC_INTF_MODEM (1 << JMR3927_IOC_INTB_MODEM) -#define JMR3927_IOC_INTF_INT6 (1 << JMR3927_IOC_INTB_INT6) -#define JMR3927_IOC_INTF_INT7 (1 << JMR3927_IOC_INTB_INT7) -#define JMR3927_IOC_INTF_SOFT (1 << JMR3927_IOC_INTB_SOFT) - -/* bits for IOC_RESET (high byte) */ -#define JMR3927_IOC_RESET_CPU 1 -#define JMR3927_IOC_RESET_PCI 2 - -#if defined(__BIG_ENDIAN) -#define jmr3927_ioc_reg_out(d, a) ((*(volatile unsigned char *)(a)) = (d)) -#define jmr3927_ioc_reg_in(a) (*(volatile unsigned char *)(a)) -#elif defined(__LITTLE_ENDIAN) -#define jmr3927_ioc_reg_out(d, a) ((*(volatile unsigned char *)((a)^1)) = (d)) -#define jmr3927_ioc_reg_in(a) (*(volatile unsigned char *)((a)^1)) -#else -#error "No Endian" -#endif - -/* LED macro */ -#define jmr3927_led_set(n/*0-16*/) jmr3927_ioc_reg_out(~(n), JMR3927_IOC_LED_ADDR) - -#define jmr3927_led_and_set(n/*0-16*/) jmr3927_ioc_reg_out((~(n)) & jmr3927_ioc_reg_in(JMR3927_IOC_LED_ADDR), JMR3927_IOC_LED_ADDR) - -/* DIPSW4 macro */ -#define jmr3927_dipsw1() (gpio_get_value(11) == 0) -#define jmr3927_dipsw2() (gpio_get_value(10) == 0) -#define jmr3927_dipsw3() ((jmr3927_ioc_reg_in(JMR3927_IOC_DIPSW_ADDR) & 2) == 0) -#define jmr3927_dipsw4() ((jmr3927_ioc_reg_in(JMR3927_IOC_DIPSW_ADDR) & 1) == 0) - -/* - * IRQ mappings - */ - -/* These are the virtual IRQ numbers, we divide all IRQ's into - * 'spaces', the 'space' determines where and how to enable/disable - * that particular IRQ on an JMR machine. Add new 'spaces' as new - * IRQ hardware is supported. - */ -#define JMR3927_NR_IRQ_IRC 16 /* On-Chip IRC */ -#define JMR3927_NR_IRQ_IOC 8 /* PCI/MODEM/INT[6:7] */ - -#define JMR3927_IRQ_IRC TXX9_IRQ_BASE -#define JMR3927_IRQ_IOC (JMR3927_IRQ_IRC + JMR3927_NR_IRQ_IRC) -#define JMR3927_IRQ_END (JMR3927_IRQ_IOC + JMR3927_NR_IRQ_IOC) - -#define JMR3927_IRQ_IRC_INT0 (JMR3927_IRQ_IRC + TX3927_IR_INT0) -#define JMR3927_IRQ_IRC_INT1 (JMR3927_IRQ_IRC + TX3927_IR_INT1) -#define JMR3927_IRQ_IRC_INT2 (JMR3927_IRQ_IRC + TX3927_IR_INT2) -#define JMR3927_IRQ_IRC_INT3 (JMR3927_IRQ_IRC + TX3927_IR_INT3) -#define JMR3927_IRQ_IRC_INT4 (JMR3927_IRQ_IRC + TX3927_IR_INT4) -#define JMR3927_IRQ_IRC_INT5 (JMR3927_IRQ_IRC + TX3927_IR_INT5) -#define JMR3927_IRQ_IRC_SIO0 (JMR3927_IRQ_IRC + TX3927_IR_SIO0) -#define JMR3927_IRQ_IRC_SIO1 (JMR3927_IRQ_IRC + TX3927_IR_SIO1) -#define JMR3927_IRQ_IRC_SIO(ch) (JMR3927_IRQ_IRC + TX3927_IR_SIO(ch)) -#define JMR3927_IRQ_IRC_DMA (JMR3927_IRQ_IRC + TX3927_IR_DMA) -#define JMR3927_IRQ_IRC_PIO (JMR3927_IRQ_IRC + TX3927_IR_PIO) -#define JMR3927_IRQ_IRC_PCI (JMR3927_IRQ_IRC + TX3927_IR_PCI) -#define JMR3927_IRQ_IRC_TMR(ch) (JMR3927_IRQ_IRC + TX3927_IR_TMR(ch)) -#define JMR3927_IRQ_IOC_PCIA (JMR3927_IRQ_IOC + JMR3927_IOC_INTB_PCIA) -#define JMR3927_IRQ_IOC_PCIB (JMR3927_IRQ_IOC + JMR3927_IOC_INTB_PCIB) -#define JMR3927_IRQ_IOC_PCIC (JMR3927_IRQ_IOC + JMR3927_IOC_INTB_PCIC) -#define JMR3927_IRQ_IOC_PCID (JMR3927_IRQ_IOC + JMR3927_IOC_INTB_PCID) -#define JMR3927_IRQ_IOC_MODEM (JMR3927_IRQ_IOC + JMR3927_IOC_INTB_MODEM) -#define JMR3927_IRQ_IOC_INT6 (JMR3927_IRQ_IOC + JMR3927_IOC_INTB_INT6) -#define JMR3927_IRQ_IOC_INT7 (JMR3927_IRQ_IOC + JMR3927_IOC_INTB_INT7) -#define JMR3927_IRQ_IOC_SOFT (JMR3927_IRQ_IOC + JMR3927_IOC_INTB_SOFT) - -/* IOC (PCI, MODEM) */ -#define JMR3927_IRQ_IOCINT JMR3927_IRQ_IRC_INT1 -/* TC35815 100M Ether (JMR-TX3912:JPW4:2-3 Short) */ -#define JMR3927_IRQ_ETHER0 JMR3927_IRQ_IRC_INT3 - -/* Clocks */ -#define JMR3927_CORECLK 132710400 /* 132.7MHz */ - -/* - * TX3927 Pin Configuration: - * - * PCFG bits Avail Dead - * SELSIO[1:0]:11 RXD[1:0], TXD[1:0] PIO[6:3] - * SELSIOC[0]:1 CTS[0], RTS[0] INT[5:4] - * SELSIOC[1]:0,SELDSF:0, GSDAO[0],GPCST[3] CTS[1], RTS[1],DSF, - * GDBGE* PIO[2:1] - * SELDMA[2]:1 DMAREQ[2],DMAACK[2] PIO[13:12] - * SELTMR[2:0]:000 TIMER[1:0] - * SELCS:0,SELDMA[1]:0 PIO[11;10] SDCS_CE[7:6], - * DMAREQ[1],DMAACK[1] - * SELDMA[0]:1 DMAREQ[0],DMAACK[0] PIO[9:8] - * SELDMA[3]:1 DMAREQ[3],DMAACK[3] PIO[15:14] - * SELDONE:1 DMADONE PIO[7] - * - * Usable pins are: - * RXD[1;0],TXD[1:0],CTS[0],RTS[0], - * DMAREQ[0,2,3],DMAACK[0,2,3],DMADONE,PIO[0,10,11] - * INT[3:0] - */ - -void jmr3927_prom_init(void); -void jmr3927_irq_setup(void); -struct pci_dev; -int jmr3927_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin); - -#endif /* __ASM_TXX9_JMR3927_H */ diff --git a/arch/mips/include/asm/txx9/tx3927.h b/arch/mips/include/asm/txx9/tx3927.h deleted file mode 100644 index 149fab4f8327..000000000000 --- a/arch/mips/include/asm/txx9/tx3927.h +++ /dev/null @@ -1,341 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2000 Toshiba Corporation - */ -#ifndef __ASM_TXX9_TX3927_H -#define __ASM_TXX9_TX3927_H - -#define TX3927_REG_BASE 0xfffe0000UL -#define TX3927_REG_SIZE 0x00010000 -#define TX3927_SDRAMC_REG (TX3927_REG_BASE + 0x8000) -#define TX3927_ROMC_REG (TX3927_REG_BASE + 0x9000) -#define TX3927_DMA_REG (TX3927_REG_BASE + 0xb000) -#define TX3927_IRC_REG (TX3927_REG_BASE + 0xc000) -#define TX3927_PCIC_REG (TX3927_REG_BASE + 0xd000) -#define TX3927_CCFG_REG (TX3927_REG_BASE + 0xe000) -#define TX3927_NR_TMR 3 -#define TX3927_TMR_REG(ch) (TX3927_REG_BASE + 0xf000 + (ch) * 0x100) -#define TX3927_NR_SIO 2 -#define TX3927_SIO_REG(ch) (TX3927_REG_BASE + 0xf300 + (ch) * 0x100) -#define TX3927_PIO_REG (TX3927_REG_BASE + 0xf500) - -struct tx3927_sdramc_reg { - volatile unsigned long cr[8]; - volatile unsigned long tr[3]; - volatile unsigned long cmd; - volatile unsigned long smrs[2]; -}; - -struct tx3927_romc_reg { - volatile unsigned long cr[8]; -}; - -struct tx3927_dma_reg { - struct tx3927_dma_ch_reg { - volatile unsigned long cha; - volatile unsigned long sar; - volatile unsigned long dar; - volatile unsigned long cntr; - volatile unsigned long sair; - volatile unsigned long dair; - volatile unsigned long ccr; - volatile unsigned long csr; - } ch[4]; - volatile unsigned long dbr[8]; - volatile unsigned long tdhr; - volatile unsigned long mcr; - volatile unsigned long unused0; -}; - -#include - -#ifdef __BIG_ENDIAN -#define endian_def_s2(e1, e2) \ - volatile unsigned short e1, e2 -#define endian_def_sb2(e1, e2, e3) \ - volatile unsigned short e1;volatile unsigned char e2, e3 -#define endian_def_b2s(e1, e2, e3) \ - volatile unsigned char e1, e2;volatile unsigned short e3 -#define endian_def_b4(e1, e2, e3, e4) \ - volatile unsigned char e1, e2, e3, e4 -#else -#define endian_def_s2(e1, e2) \ - volatile unsigned short e2, e1 -#define endian_def_sb2(e1, e2, e3) \ - volatile unsigned char e3, e2;volatile unsigned short e1 -#define endian_def_b2s(e1, e2, e3) \ - volatile unsigned short e3;volatile unsigned char e2, e1 -#define endian_def_b4(e1, e2, e3, e4) \ - volatile unsigned char e4, e3, e2, e1 -#endif - -struct tx3927_pcic_reg { - endian_def_s2(did, vid); - endian_def_s2(pcistat, pcicmd); - endian_def_b4(cc, scc, rpli, rid); - endian_def_b4(unused0, ht, mlt, cls); - volatile unsigned long ioba; /* +10 */ - volatile unsigned long mba; - volatile unsigned long unused1[5]; - endian_def_s2(svid, ssvid); - volatile unsigned long unused2; /* +30 */ - endian_def_sb2(unused3, unused4, capptr); - volatile unsigned long unused5; - endian_def_b4(ml, mg, ip, il); - volatile unsigned long unused6; /* +40 */ - volatile unsigned long istat; - volatile unsigned long iim; - volatile unsigned long rrt; - volatile unsigned long unused7[3]; /* +50 */ - volatile unsigned long ipbmma; - volatile unsigned long ipbioma; /* +60 */ - volatile unsigned long ilbmma; - volatile unsigned long ilbioma; - volatile unsigned long unused8[9]; - volatile unsigned long tc; /* +90 */ - volatile unsigned long tstat; - volatile unsigned long tim; - volatile unsigned long tccmd; - volatile unsigned long pcirrt; /* +a0 */ - volatile unsigned long pcirrt_cmd; - volatile unsigned long pcirrdt; - volatile unsigned long unused9[3]; - volatile unsigned long tlboap; - volatile unsigned long tlbiap; - volatile unsigned long tlbmma; /* +c0 */ - volatile unsigned long tlbioma; - volatile unsigned long sc_msg; - volatile unsigned long sc_be; - volatile unsigned long tbl; /* +d0 */ - volatile unsigned long unused10[3]; - volatile unsigned long pwmng; /* +e0 */ - volatile unsigned long pwmngs; - volatile unsigned long unused11[6]; - volatile unsigned long req_trace; /* +100 */ - volatile unsigned long pbapmc; - volatile unsigned long pbapms; - volatile unsigned long pbapmim; - volatile unsigned long bm; /* +110 */ - volatile unsigned long cpcibrs; - volatile unsigned long cpcibgs; - volatile unsigned long pbacs; - volatile unsigned long iobas; /* +120 */ - volatile unsigned long mbas; - volatile unsigned long lbc; - volatile unsigned long lbstat; - volatile unsigned long lbim; /* +130 */ - volatile unsigned long pcistatim; - volatile unsigned long ica; - volatile unsigned long icd; - volatile unsigned long iiadp; /* +140 */ - volatile unsigned long iscdp; - volatile unsigned long mmas; - volatile unsigned long iomas; - volatile unsigned long ipciaddr; /* +150 */ - volatile unsigned long ipcidata; - volatile unsigned long ipcibe; -}; - -struct tx3927_ccfg_reg { - volatile unsigned long ccfg; - volatile unsigned long crir; - volatile unsigned long pcfg; - volatile unsigned long tear; - volatile unsigned long pdcr; -}; - -/* - * SDRAMC - */ - -/* - * ROMC - */ - -/* - * DMA - */ -/* bits for MCR */ -#define TX3927_DMA_MCR_EIS(ch) (0x10000000<<(ch)) -#define TX3927_DMA_MCR_DIS(ch) (0x01000000<<(ch)) -#define TX3927_DMA_MCR_RSFIF 0x00000080 -#define TX3927_DMA_MCR_FIFUM(ch) (0x00000008<<(ch)) -#define TX3927_DMA_MCR_LE 0x00000004 -#define TX3927_DMA_MCR_RPRT 0x00000002 -#define TX3927_DMA_MCR_MSTEN 0x00000001 - -/* bits for CCRn */ -#define TX3927_DMA_CCR_DBINH 0x04000000 -#define TX3927_DMA_CCR_SBINH 0x02000000 -#define TX3927_DMA_CCR_CHRST 0x01000000 -#define TX3927_DMA_CCR_RVBYTE 0x00800000 -#define TX3927_DMA_CCR_ACKPOL 0x00400000 -#define TX3927_DMA_CCR_REQPL 0x00200000 -#define TX3927_DMA_CCR_EGREQ 0x00100000 -#define TX3927_DMA_CCR_CHDN 0x00080000 -#define TX3927_DMA_CCR_DNCTL 0x00060000 -#define TX3927_DMA_CCR_EXTRQ 0x00010000 -#define TX3927_DMA_CCR_INTRQD 0x0000e000 -#define TX3927_DMA_CCR_INTENE 0x00001000 -#define TX3927_DMA_CCR_INTENC 0x00000800 -#define TX3927_DMA_CCR_INTENT 0x00000400 -#define TX3927_DMA_CCR_CHNEN 0x00000200 -#define TX3927_DMA_CCR_XFACT 0x00000100 -#define TX3927_DMA_CCR_SNOP 0x00000080 -#define TX3927_DMA_CCR_DSTINC 0x00000040 -#define TX3927_DMA_CCR_SRCINC 0x00000020 -#define TX3927_DMA_CCR_XFSZ(order) (((order) << 2) & 0x0000001c) -#define TX3927_DMA_CCR_XFSZ_1W TX3927_DMA_CCR_XFSZ(2) -#define TX3927_DMA_CCR_XFSZ_4W TX3927_DMA_CCR_XFSZ(4) -#define TX3927_DMA_CCR_XFSZ_8W TX3927_DMA_CCR_XFSZ(5) -#define TX3927_DMA_CCR_XFSZ_16W TX3927_DMA_CCR_XFSZ(6) -#define TX3927_DMA_CCR_XFSZ_32W TX3927_DMA_CCR_XFSZ(7) -#define TX3927_DMA_CCR_MEMIO 0x00000002 -#define TX3927_DMA_CCR_ONEAD 0x00000001 - -/* bits for CSRn */ -#define TX3927_DMA_CSR_CHNACT 0x00000100 -#define TX3927_DMA_CSR_ABCHC 0x00000080 -#define TX3927_DMA_CSR_NCHNC 0x00000040 -#define TX3927_DMA_CSR_NTRNFC 0x00000020 -#define TX3927_DMA_CSR_EXTDN 0x00000010 -#define TX3927_DMA_CSR_CFERR 0x00000008 -#define TX3927_DMA_CSR_CHERR 0x00000004 -#define TX3927_DMA_CSR_DESERR 0x00000002 -#define TX3927_DMA_CSR_SORERR 0x00000001 - -/* - * IRC - */ -#define TX3927_IR_INT0 0 -#define TX3927_IR_INT1 1 -#define TX3927_IR_INT2 2 -#define TX3927_IR_INT3 3 -#define TX3927_IR_INT4 4 -#define TX3927_IR_INT5 5 -#define TX3927_IR_SIO0 6 -#define TX3927_IR_SIO1 7 -#define TX3927_IR_SIO(ch) (6 + (ch)) -#define TX3927_IR_DMA 8 -#define TX3927_IR_PIO 9 -#define TX3927_IR_PCI 10 -#define TX3927_IR_TMR(ch) (13 + (ch)) -#define TX3927_NUM_IR 16 - -/* - * PCIC - */ -/* bits for PCICMD */ -/* see PCI_COMMAND_XXX in linux/pci.h */ - -/* bits for PCISTAT */ -/* see PCI_STATUS_XXX in linux/pci.h */ -#define PCI_STATUS_NEW_CAP 0x0010 - -/* bits for ISTAT/IIM */ -#define TX3927_PCIC_IIM_ALL 0x00001600 - -/* bits for TC */ -#define TX3927_PCIC_TC_OF16E 0x00000020 -#define TX3927_PCIC_TC_IF8E 0x00000010 -#define TX3927_PCIC_TC_OF8E 0x00000008 - -/* bits for TSTAT/TIM */ -#define TX3927_PCIC_TIM_ALL 0x0003ffff - -/* bits for IOBA/MBA */ -/* see PCI_BASE_ADDRESS_XXX in linux/pci.h */ - -/* bits for PBAPMC */ -#define TX3927_PCIC_PBAPMC_RPBA 0x00000004 -#define TX3927_PCIC_PBAPMC_PBAEN 0x00000002 -#define TX3927_PCIC_PBAPMC_BMCEN 0x00000001 - -/* bits for LBSTAT/LBIM */ -#define TX3927_PCIC_LBIM_ALL 0x0000003e - -/* bits for PCISTATIM (see also PCI_STATUS_XXX in linux/pci.h */ -#define TX3927_PCIC_PCISTATIM_ALL 0x0000f900 - -/* bits for LBC */ -#define TX3927_PCIC_LBC_IBSE 0x00004000 -#define TX3927_PCIC_LBC_TIBSE 0x00002000 -#define TX3927_PCIC_LBC_TMFBSE 0x00001000 -#define TX3927_PCIC_LBC_HRST 0x00000800 -#define TX3927_PCIC_LBC_SRST 0x00000400 -#define TX3927_PCIC_LBC_EPCAD 0x00000200 -#define TX3927_PCIC_LBC_MSDSE 0x00000100 -#define TX3927_PCIC_LBC_CRR 0x00000080 -#define TX3927_PCIC_LBC_ILMDE 0x00000040 -#define TX3927_PCIC_LBC_ILIDE 0x00000020 - -#define TX3927_PCIC_IDSEL_AD_TO_SLOT(ad) ((ad) - 11) -#define TX3927_PCIC_MAX_DEVNU TX3927_PCIC_IDSEL_AD_TO_SLOT(32) - -/* - * CCFG - */ -/* CCFG : Chip Configuration */ -#define TX3927_CCFG_TLBOFF 0x00020000 -#define TX3927_CCFG_BEOW 0x00010000 -#define TX3927_CCFG_WR 0x00008000 -#define TX3927_CCFG_TOE 0x00004000 -#define TX3927_CCFG_PCIXARB 0x00002000 -#define TX3927_CCFG_PCI3 0x00001000 -#define TX3927_CCFG_PSNP 0x00000800 -#define TX3927_CCFG_PPRI 0x00000400 -#define TX3927_CCFG_PLLM 0x00000030 -#define TX3927_CCFG_ENDIAN 0x00000004 -#define TX3927_CCFG_HALT 0x00000002 -#define TX3927_CCFG_ACEHOLD 0x00000001 - -/* PCFG : Pin Configuration */ -#define TX3927_PCFG_SYSCLKEN 0x08000000 -#define TX3927_PCFG_SDRCLKEN_ALL 0x07c00000 -#define TX3927_PCFG_SDRCLKEN(ch) (0x00400000<<(ch)) -#define TX3927_PCFG_PCICLKEN_ALL 0x003c0000 -#define TX3927_PCFG_PCICLKEN(ch) (0x00040000<<(ch)) -#define TX3927_PCFG_SELALL 0x0003ffff -#define TX3927_PCFG_SELCS 0x00020000 -#define TX3927_PCFG_SELDSF 0x00010000 -#define TX3927_PCFG_SELSIOC_ALL 0x0000c000 -#define TX3927_PCFG_SELSIOC(ch) (0x00004000<<(ch)) -#define TX3927_PCFG_SELSIO_ALL 0x00003000 -#define TX3927_PCFG_SELSIO(ch) (0x00001000<<(ch)) -#define TX3927_PCFG_SELTMR_ALL 0x00000e00 -#define TX3927_PCFG_SELTMR(ch) (0x00000200<<(ch)) -#define TX3927_PCFG_SELDONE 0x00000100 -#define TX3927_PCFG_INTDMA_ALL 0x000000f0 -#define TX3927_PCFG_INTDMA(ch) (0x00000010<<(ch)) -#define TX3927_PCFG_SELDMA_ALL 0x0000000f -#define TX3927_PCFG_SELDMA(ch) (0x00000001<<(ch)) - -#define tx3927_sdramcptr ((struct tx3927_sdramc_reg *)TX3927_SDRAMC_REG) -#define tx3927_romcptr ((struct tx3927_romc_reg *)TX3927_ROMC_REG) -#define tx3927_dmaptr ((struct tx3927_dma_reg *)TX3927_DMA_REG) -#define tx3927_pcicptr ((struct tx3927_pcic_reg *)TX3927_PCIC_REG) -#define tx3927_ccfgptr ((struct tx3927_ccfg_reg *)TX3927_CCFG_REG) -#define tx3927_sioptr(ch) ((struct txx927_sio_reg *)TX3927_SIO_REG(ch)) -#define tx3927_pioptr ((struct txx9_pio_reg __iomem *)TX3927_PIO_REG) - -#define TX3927_REV_PCODE() (tx3927_ccfgptr->crir >> 16) -#define TX3927_ROMC_BA(ch) (tx3927_romcptr->cr[(ch)] & 0xfff00000) -#define TX3927_ROMC_SIZE(ch) \ - (0x00100000 << ((tx3927_romcptr->cr[(ch)] >> 8) & 0xf)) -#define TX3927_ROMC_WIDTH(ch) (32 >> ((tx3927_romcptr->cr[(ch)] >> 7) & 0x1)) - -void tx3927_wdt_init(void); -void tx3927_setup(void); -void tx3927_time_init(unsigned int evt_tmrnr, unsigned int src_tmrnr); -void tx3927_sio_init(unsigned int sclk, unsigned int cts_mask); -struct pci_controller; -void tx3927_pcic_setup(struct pci_controller *channel, - unsigned long sdram_size, int extarb); -void tx3927_setup_pcierr_irq(void); -void tx3927_irq_init(void); -void tx3927_mtd_init(int ch); - -#endif /* __ASM_TXX9_TX3927_H */ diff --git a/arch/mips/include/asm/txx9irq.h b/arch/mips/include/asm/txx9irq.h index 68a6650a4025..3875243bb56b 100644 --- a/arch/mips/include/asm/txx9irq.h +++ b/arch/mips/include/asm/txx9irq.h @@ -21,11 +21,7 @@ #endif #endif -#ifdef CONFIG_CPU_TX39XX -#define TXx9_MAX_IR 16 -#else #define TXx9_MAX_IR 32 -#endif void txx9_irq_init(unsigned long baseaddr); int txx9_irq(void); diff --git a/arch/mips/include/asm/txx9tmr.h b/arch/mips/include/asm/txx9tmr.h index 466a3def3866..a051b411368e 100644 --- a/arch/mips/include/asm/txx9tmr.h +++ b/arch/mips/include/asm/txx9tmr.h @@ -58,10 +58,6 @@ void txx9_clockevent_init(unsigned long baseaddr, int irq, unsigned int imbusclk); void txx9_tmr_init(unsigned long baseaddr); -#ifdef CONFIG_CPU_TX39XX -#define TXX9_TIMER_BITS 24 -#else #define TXX9_TIMER_BITS 32 -#endif #endif /* __ASM_TXX9TMR_H */ diff --git a/arch/mips/include/asm/vermagic.h b/arch/mips/include/asm/vermagic.h index 0904de0b5e09..1c33922eb945 100644 --- a/arch/mips/include/asm/vermagic.h +++ b/arch/mips/include/asm/vermagic.h @@ -22,8 +22,6 @@ #define MODULE_PROC_FAMILY "MIPS64_R6 " #elif defined CONFIG_CPU_R3000 #define MODULE_PROC_FAMILY "R3000 " -#elif defined CONFIG_CPU_TX39XX -#define MODULE_PROC_FAMILY "TX39XX " #elif defined CONFIG_CPU_VR41XX #define MODULE_PROC_FAMILY "VR41XX " #elif defined CONFIG_CPU_R4300 diff --git a/arch/mips/include/asm/war.h b/arch/mips/include/asm/war.h deleted file mode 100644 index 21443f096238..000000000000 --- a/arch/mips/include/asm/war.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2002, 2004, 2007 by Ralf Baechle - * Copyright (C) 2007 Maciej W. Rozycki - */ -#ifndef _ASM_WAR_H -#define _ASM_WAR_H - -/* - * Work around certain R4000 CPU errata (as implemented by GCC): - * - * - A double-word or a variable shift may give an incorrect result - * if executed immediately after starting an integer division: - * "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0", - * erratum #28 - * "MIPS R4000MC Errata, Processor Revision 2.2 and 3.0", erratum - * #19 - * - * - A double-word or a variable shift may give an incorrect result - * if executed while an integer multiplication is in progress: - * "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0", - * errata #16 & #28 - * - * - An integer division may give an incorrect result if started in - * a delay slot of a taken branch or a jump: - * "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0", - * erratum #52 - */ -#ifdef CONFIG_CPU_R4000_WORKAROUNDS -#define R4000_WAR 1 -#else -#define R4000_WAR 0 -#endif - -/* - * Work around certain R4400 CPU errata (as implemented by GCC): - * - * - A double-word or a variable shift may give an incorrect result - * if executed immediately after starting an integer division: - * "MIPS R4400MC Errata, Processor Revision 1.0", erratum #10 - * "MIPS R4400MC Errata, Processor Revision 2.0 & 3.0", erratum #4 - */ -#ifdef CONFIG_CPU_R4400_WORKAROUNDS -#define R4400_WAR 1 -#else -#define R4400_WAR 0 -#endif - -/* - * Work around the "daddi" and "daddiu" CPU errata: - * - * - The `daddi' instruction fails to trap on overflow. - * "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0", - * erratum #23 - * - * - The `daddiu' instruction can produce an incorrect result. - * "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0", - * erratum #41 - * "MIPS R4000MC Errata, Processor Revision 2.2 and 3.0", erratum - * #15 - * "MIPS R4400PC/SC Errata, Processor Revision 1.0", erratum #7 - * "MIPS R4400MC Errata, Processor Revision 1.0", erratum #5 - */ -#ifdef CONFIG_CPU_DADDI_WORKAROUNDS -#define DADDI_WAR 1 -#else -#define DADDI_WAR 0 -#endif - -#endif /* _ASM_WAR_H */ diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile index 814b3da30501..7c96282bff2e 100644 --- a/arch/mips/kernel/Makefile +++ b/arch/mips/kernel/Makefile @@ -44,7 +44,6 @@ obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o sw-y := r4k_switch.o sw-$(CONFIG_CPU_R3000) := r2300_switch.o -sw-$(CONFIG_CPU_TX39XX) := r2300_switch.o sw-$(CONFIG_CPU_CAVIUM_OCTEON) := octeon_switch.o obj-y += $(sw-y) diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 24a529c6c4be..f0ea92937546 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -1189,29 +1189,6 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) c->tlbsize = 48; break; #endif - case PRID_IMP_TX39: - c->fpu_msk31 |= FPU_CSR_CONDX | FPU_CSR_FS; - c->options = MIPS_CPU_TLB | MIPS_CPU_TX39_CACHE; - - if ((c->processor_id & 0xf0) == (PRID_REV_TX3927 & 0xf0)) { - c->cputype = CPU_TX3927; - __cpu_name[cpu] = "TX3927"; - c->tlbsize = 64; - } else { - switch (c->processor_id & PRID_REV_MASK) { - case PRID_REV_TX3912: - c->cputype = CPU_TX3912; - __cpu_name[cpu] = "TX3912"; - c->tlbsize = 32; - break; - case PRID_REV_TX3922: - c->cputype = CPU_TX3922; - __cpu_name[cpu] = "TX3922"; - c->tlbsize = 64; - break; - } - } - break; case PRID_IMP_R4700: c->cputype = CPU_R4700; __cpu_name[cpu] = "R4700"; diff --git a/arch/mips/kernel/cpu-r3k-probe.c b/arch/mips/kernel/cpu-r3k-probe.c index af654771918c..be93469c0e0e 100644 --- a/arch/mips/kernel/cpu-r3k-probe.c +++ b/arch/mips/kernel/cpu-r3k-probe.c @@ -118,28 +118,6 @@ void cpu_probe(void) c->options |= MIPS_CPU_FPU; c->tlbsize = 64; break; - case PRID_COMP_LEGACY | PRID_IMP_TX39: - c->options = MIPS_CPU_TLB | MIPS_CPU_TX39_CACHE; - - if ((c->processor_id & 0xf0) == (PRID_REV_TX3927 & 0xf0)) { - c->cputype = CPU_TX3927; - __cpu_name[cpu] = "TX3927"; - c->tlbsize = 64; - } else { - switch (c->processor_id & PRID_REV_MASK) { - case PRID_REV_TX3912: - c->cputype = CPU_TX3912; - __cpu_name[cpu] = "TX3912"; - c->tlbsize = 32; - break; - case PRID_REV_TX3922: - c->cputype = CPU_TX3922; - __cpu_name[cpu] = "TX3922"; - c->tlbsize = 64; - break; - } - } - break; } BUG_ON(!__cpu_name[cpu]); diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c index 7b045d2a0b51..5582a4ca1e9e 100644 --- a/arch/mips/kernel/elf.c +++ b/arch/mips/kernel/elf.c @@ -328,16 +328,10 @@ void mips_set_personality_nan(struct arch_elf_state *state) int mips_elf_read_implies_exec(void *elf_ex, int exstack) { - if (exstack != EXSTACK_DISABLE_X) { - /* The binary doesn't request a non-executable stack */ - return 1; - } - - if (!cpu_has_rixi) { - /* The CPU doesn't support non-executable memory */ - return 1; - } - - return 0; + /* + * Set READ_IMPLIES_EXEC only on non-NX systems that + * do not request a specific state via PT_GNU_STACK. + */ + return (!cpu_has_rixi && exstack == EXSTACK_DEFAULT); } EXPORT_SYMBOL(mips_elf_read_implies_exec); diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S index 4b896f5023ff..891393626dc6 100644 --- a/arch/mips/kernel/entry.S +++ b/arch/mips/kernel/entry.S @@ -17,7 +17,6 @@ #include #include #include -#include #ifndef CONFIG_PREEMPTION #define resume_kernel restore_all @@ -101,7 +100,7 @@ restore_partial: # restore partial frame SAVE_AT SAVE_TEMP LONG_L v0, PT_STATUS(sp) -#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) +#if defined(CONFIG_CPU_R3000) and v0, ST0_IEP #else and v0, ST0_IE diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index 743d75927b71..3425df6019c0 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -19,7 +19,6 @@ #include #include #include -#include #include __INIT @@ -163,7 +162,7 @@ NESTED(handle_int, PT_SIZE, sp) .set push .set noat mfc0 k0, CP0_STATUS -#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) +#if defined(CONFIG_CPU_R3000) and k0, ST0_IEP bnez k0, 1f @@ -645,7 +644,7 @@ isrdhwr: get_saved_sp /* k1 := current_thread_info */ .set noreorder MFC0 k0, CP0_EPC -#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) +#if defined(CONFIG_CPU_R3000) ori k1, _THREAD_MASK xori k1, _THREAD_MASK LONG_L v1, TI_TP_VALUE(k1) diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c index c81b3a039470..146d9fa77f75 100644 --- a/arch/mips/kernel/idle.c +++ b/arch/mips/kernel/idle.c @@ -36,13 +36,6 @@ static void __cpuidle r3081_wait(void) raw_local_irq_enable(); } -static void __cpuidle r39xx_wait(void) -{ - if (!need_resched()) - write_c0_conf(read_c0_conf() | TX39_CONF_HALT); - raw_local_irq_enable(); -} - void __cpuidle r4k_wait(void) { raw_local_irq_enable(); @@ -147,9 +140,6 @@ void __init check_wait(void) case CPU_R3081E: cpu_wait = r3081_wait; break; - case CPU_TX3927: - cpu_wait = r39xx_wait; - break; case CPU_R4200: /* case CPU_R4300: */ case CPU_R4600: diff --git a/arch/mips/kernel/irq_txx9.c b/arch/mips/kernel/irq_txx9.c index ab00e490482f..af3ef4c9f7de 100644 --- a/arch/mips/kernel/irq_txx9.c +++ b/arch/mips/kernel/irq_txx9.c @@ -72,11 +72,6 @@ static void txx9_irq_unmask(struct irq_data *d) __raw_writel((__raw_readl(ilrp) & ~(0xff << ofs)) | (txx9irq[irq_nr].level << ofs), ilrp); -#ifdef CONFIG_CPU_TX39XX - /* update IRCSR */ - __raw_writel(0, &txx9_ircptr->imr); - __raw_writel(irc_elevel, &txx9_ircptr->imr); -#endif } static inline void txx9_irq_mask(struct irq_data *d) @@ -88,15 +83,7 @@ static inline void txx9_irq_mask(struct irq_data *d) __raw_writel((__raw_readl(ilrp) & ~(0xff << ofs)) | (irc_dlevel << ofs), ilrp); -#ifdef CONFIG_CPU_TX39XX - /* update IRCSR */ - __raw_writel(0, &txx9_ircptr->imr); - __raw_writel(irc_elevel, &txx9_ircptr->imr); - /* flush write buffer */ - __raw_readl(&txx9_ircptr->ssr); -#else mmiowb(); -#endif } static void txx9_irq_mask_ack(struct irq_data *d) diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c index 9f47a889b047..bb43bf850314 100644 --- a/arch/mips/kernel/proc.c +++ b/arch/mips/kernel/proc.c @@ -181,8 +181,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_puts(m, " 3k_cache"); if (cpu_has_4k_cache) seq_puts(m, " 4k_cache"); - if (cpu_has_tx39_cache) - seq_puts(m, " tx39_cache"); if (cpu_has_octeon_cache) seq_puts(m, " octeon_cache"); if (raw_cpu_has_fpu) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index cbff1b974f88..c2d5f4bfe1f3 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -128,7 +128,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, p->thread.reg17 = kthread_arg; p->thread.reg29 = childksp; p->thread.reg31 = (unsigned long) ret_from_kernel_thread; -#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) +#if defined(CONFIG_CPU_R3000) status = (status & ~(ST0_KUP | ST0_IEP | ST0_IEC)) | ((status & (ST0_KUC | ST0_IEC)) << 2); #else diff --git a/arch/mips/kernel/prom.c b/arch/mips/kernel/prom.c index 6abebd57b218..7db6ff9aed7d 100644 --- a/arch/mips/kernel/prom.c +++ b/arch/mips/kernel/prom.c @@ -64,4 +64,9 @@ int __init __dt_register_buses(const char *bus0, const char *bus1) return 0; } +void __weak __init device_tree_init(void) +{ + unflatten_and_copy_device_tree(); +} + #endif diff --git a/arch/mips/kernel/r4k-bugs64.c b/arch/mips/kernel/r4k-bugs64.c index 35729c9e6cfa..6ffefb2c6971 100644 --- a/arch/mips/kernel/r4k-bugs64.c +++ b/arch/mips/kernel/r4k-bugs64.c @@ -163,7 +163,8 @@ static __always_inline __init void check_mult_sh(void) } pr_cont("no.\n"); - panic(bug64hit, !R4000_WAR ? r4kwar : nowar); + panic(bug64hit, + IS_ENABLED(CONFIG_CPU_R4000_WORKAROUNDS) ? nowar : r4kwar); } static volatile int daddi_ov; @@ -239,7 +240,8 @@ static __init void check_daddi(void) } pr_cont("no.\n"); - panic(bug64hit, !DADDI_WAR ? daddiwar : nowar); + panic(bug64hit, + IS_ENABLED(CONFIG_CPU_DADDI_WORKAROUNDS) ? nowar : daddiwar); } int daddiu_bug = -1; @@ -307,7 +309,8 @@ static __init void check_daddiu(void) } pr_cont("no.\n"); - panic(bug64hit, !DADDI_WAR ? daddiwar : nowar); + panic(bug64hit, + IS_ENABLED(CONFIG_CPU_DADDI_WORKAROUNDS) ? nowar : daddiwar); } void __init check_bugs64_early(void) diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 9bfce5f75f60..18dc9b345056 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -19,7 +19,6 @@ #include #include #include -#include #include .align 5 diff --git a/arch/mips/kernel/scall64-n64.S b/arch/mips/kernel/scall64-n64.S index 5f6ed4b4c399..e6264aa62e45 100644 --- a/arch/mips/kernel/scall64-n64.S +++ b/arch/mips/kernel/scall64-n64.S @@ -18,7 +18,6 @@ #include #include #include -#include #ifndef CONFIG_MIPS32_COMPAT /* Neither O32 nor N32, so define handle_sys here */ diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c index 5bce782e694c..71e309be86a2 100644 --- a/arch/mips/kernel/signal.c +++ b/arch/mips/kernel/signal.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/mips/kernel/signal_n32.c b/arch/mips/kernel/signal_n32.c index 7bd00fad61af..cfc77b69420a 100644 --- a/arch/mips/kernel/signal_n32.c +++ b/arch/mips/kernel/signal_n32.c @@ -24,7 +24,6 @@ #include #include #include -#include #include "signal-common.h" diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index a486486b2355..246c6a6b0261 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -2091,19 +2091,19 @@ static void *set_vi_srs_handler(int n, vi_handler_t addr, int srs) * If no shadow set is selected then use the default handler * that does normal register saving and standard interrupt exit */ - extern char except_vec_vi, except_vec_vi_lui; - extern char except_vec_vi_ori, except_vec_vi_end; - extern char rollback_except_vec_vi; - char *vec_start = using_rollback_handler() ? - &rollback_except_vec_vi : &except_vec_vi; + extern const u8 except_vec_vi[], except_vec_vi_lui[]; + extern const u8 except_vec_vi_ori[], except_vec_vi_end[]; + extern const u8 rollback_except_vec_vi[]; + const u8 *vec_start = using_rollback_handler() ? + rollback_except_vec_vi : except_vec_vi; #if defined(CONFIG_CPU_MICROMIPS) || defined(CONFIG_CPU_BIG_ENDIAN) - const int lui_offset = &except_vec_vi_lui - vec_start + 2; - const int ori_offset = &except_vec_vi_ori - vec_start + 2; + const int lui_offset = except_vec_vi_lui - vec_start + 2; + const int ori_offset = except_vec_vi_ori - vec_start + 2; #else - const int lui_offset = &except_vec_vi_lui - vec_start; - const int ori_offset = &except_vec_vi_ori - vec_start; + const int lui_offset = except_vec_vi_lui - vec_start; + const int ori_offset = except_vec_vi_ori - vec_start; #endif - const int handler_len = &except_vec_vi_end - vec_start; + const int handler_len = except_vec_vi_end - vec_start; if (handler_len > VECTORSPACING) { /* @@ -2311,7 +2311,7 @@ void per_cpu_trap_init(bool is_boot_cpu) } /* Install CPU exception handler */ -void set_handler(unsigned long offset, void *addr, unsigned long size) +void set_handler(unsigned long offset, const void *addr, unsigned long size) { #ifdef CONFIG_CPU_MICROMIPS memcpy((void *)(ebase + offset), ((unsigned char *)addr - 1), size); diff --git a/arch/mips/lantiq/prom.c b/arch/mips/lantiq/prom.c index bc9f58fcbdf9..c731082a0c42 100644 --- a/arch/mips/lantiq/prom.c +++ b/arch/mips/lantiq/prom.c @@ -84,11 +84,6 @@ void __init plat_mem_setup(void) __dt_setup_arch(dtb); } -void __init device_tree_init(void) -{ - unflatten_and_copy_device_tree(); -} - void __init prom_init(void) { /* call the soc specific detetcion code and get it to fill soc_info */ diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile index 479f50559c83..5d5b993cbc2b 100644 --- a/arch/mips/lib/Makefile +++ b/arch/mips/lib/Makefile @@ -13,7 +13,6 @@ lib-$(CONFIG_GENERIC_CSUM) := $(filter-out csum_partial.o, $(lib-y)) obj-$(CONFIG_CPU_GENERIC_DUMP_TLB) += dump_tlb.o obj-$(CONFIG_CPU_R3000) += r3k_dump_tlb.o -obj-$(CONFIG_CPU_TX39XX) += r3k_dump_tlb.o # libgcc-style stuff needed in the kernel obj-y += bswapsi.o bswapdi.o multi3.o diff --git a/arch/mips/lib/delay.c b/arch/mips/lib/delay.c index 2e8dfc1d59c8..ccdb1fc1e4bf 100644 --- a/arch/mips/lib/delay.c +++ b/arch/mips/lib/delay.c @@ -16,7 +16,6 @@ #include #include -#include #ifndef CONFIG_CPU_DADDI_WORKAROUNDS #define GCC_DADDI_IMM_ASM() "I" diff --git a/arch/mips/lib/r3k_dump_tlb.c b/arch/mips/lib/r3k_dump_tlb.c index 10b4bf7f70a3..fcf594af0002 100644 --- a/arch/mips/lib/r3k_dump_tlb.c +++ b/arch/mips/lib/r3k_dump_tlb.c @@ -14,15 +14,11 @@ #include #include -extern int r3k_have_wired_reg; - void dump_tlb_regs(void) { pr_info("Index : %0x\n", read_c0_index()); pr_info("EntryHi : %0lx\n", read_c0_entryhi()); pr_info("EntryLo : %0lx\n", read_c0_entrylo0()); - if (r3k_have_wired_reg) - pr_info("Wired : %0x\n", read_c0_wired()); } static void dump_tlb(int first, int last) diff --git a/arch/mips/loongson2ef/Platform b/arch/mips/loongson2ef/Platform index 50e659aca543..eebabf9df6ac 100644 --- a/arch/mips/loongson2ef/Platform +++ b/arch/mips/loongson2ef/Platform @@ -41,6 +41,7 @@ cflags-y += $(call cc-option,-mno-loongson-mmi) # Loongson Machines' Support # -cflags-$(CONFIG_MACH_LOONGSON2EF) += -I$(srctree)/arch/mips/include/asm/mach-loongson2ef -mno-branch-likely +cflags-$(CONFIG_MACH_LOONGSON2EF) += -I$(srctree)/arch/mips/include/asm/mach-loongson2ef +cflags-$(CONFIG_CC_HAS_MNO_BRANCH_LIKELY) += -mno-branch-likely load-$(CONFIG_LEMOTE_FULOONG2E) += 0xffffffff80100000 load-$(CONFIG_LEMOTE_MACH2F) += 0xffffffff80200000 diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform index 3e660d6d3c2b..473404cae1c4 100644 --- a/arch/mips/loongson64/Platform +++ b/arch/mips/loongson64/Platform @@ -5,24 +5,9 @@ cflags-$(CONFIG_CPU_LOONGSON64) += -Wa,--trap -# -# binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a -# as MIPS64 R2; older versions as just R1. This leaves the possibility open -# that GCC might generate R2 code for -march=loongson3a which then is rejected -# by GAS. The cc-option can't probe for this behaviour so -march=loongson3a -# can't easily be used safely within the kbuild framework. -# -ifeq ($(call cc-ifversion, -ge, 0409, y), y) - ifeq ($(call ld-ifversion, -ge, 22500, y), y) - cflags-$(CONFIG_CPU_LOONGSON64) += \ - $(call cc-option,-march=loongson3a -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64) - else - cflags-$(CONFIG_CPU_LOONGSON64) += \ - $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64) - endif -else - cflags-$(CONFIG_CPU_LOONGSON64) += \ - $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64) +ifdef CONFIG_CPU_LOONGSON64 +cflags-$(CONFIG_CC_IS_GCC) += -march=loongson3a +cflags-$(CONFIG_CC_IS_CLANG) += -march=mips64r2 endif # Some -march= flags enable MMI instructions, and GCC complains about that @@ -33,5 +18,6 @@ cflags-y += $(call cc-option,-mno-loongson-mmi) # Loongson Machines' Support # -cflags-$(CONFIG_MACH_LOONGSON64) += -I$(srctree)/arch/mips/include/asm/mach-loongson64 -mno-branch-likely +cflags-$(CONFIG_MACH_LOONGSON64) += -I$(srctree)/arch/mips/include/asm/mach-loongson64 +cflags-$(CONFIG_CC_HAS_MNO_BRANCH_LIKELY) += -mno-branch-likely load-$(CONFIG_CPU_LOONGSON64) += 0xffffffff80200000 diff --git a/arch/mips/loongson64/numa.c b/arch/mips/loongson64/numa.c index e8e3e48c5333..69a533148efd 100644 --- a/arch/mips/loongson64/numa.c +++ b/arch/mips/loongson64/numa.c @@ -197,3 +197,13 @@ void __init prom_init_numa_memory(void) prom_meminit(); } EXPORT_SYMBOL(prom_init_numa_memory); + +pg_data_t * __init arch_alloc_nodedata(int nid) +{ + return memblock_alloc(sizeof(pg_data_t), SMP_CACHE_BYTES); +} + +void arch_refresh_nodedata(int nid, pg_data_t *pgdat) +{ + __node_data[nid] = pgdat; +} diff --git a/arch/mips/loongson64/setup.c b/arch/mips/loongson64/setup.c index 6fe3ffffcaa6..3cd11c2b308b 100644 --- a/arch/mips/loongson64/setup.c +++ b/arch/mips/loongson64/setup.c @@ -36,11 +36,3 @@ void __init plat_mem_setup(void) if (loongson_fdt_blob) __dt_setup_arch(loongson_fdt_blob); } - -void __init device_tree_init(void) -{ - if (!initial_boot_params) - return; - - unflatten_and_copy_device_tree(); -} diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile index 4acc4f3d31f8..304692391519 100644 --- a/arch/mips/mm/Makefile +++ b/arch/mips/mm/Makefile @@ -36,7 +36,6 @@ obj-$(CONFIG_CPU_R3K_TLB) += tlb-r3k.o obj-$(CONFIG_CPU_R4K_CACHE_TLB) += c-r4k.o cex-gen.o tlb-r4k.o obj-$(CONFIG_CPU_R3000) += c-r3k.o obj-$(CONFIG_CPU_SB1) += c-r4k.o cerr-sb1.o cex-sb1.o tlb-r4k.o -obj-$(CONFIG_CPU_TX39XX) += c-tx39.o obj-$(CONFIG_CPU_CAVIUM_OCTEON) += c-octeon.o cex-oct.o tlb-r4k.o obj-$(CONFIG_IP22_CPU_SCACHE) += sc-ip22.o diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c index 737870d8fd94..c7ed589de882 100644 --- a/arch/mips/mm/c-octeon.c +++ b/arch/mips/mm/c-octeon.c @@ -23,7 +23,6 @@ #include #include #include -#include #include diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 50261fd8eb21..ccb9e47322b0 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -33,7 +33,6 @@ #include #include #include -#include #include /* for run_uncached() */ #include #include diff --git a/arch/mips/mm/c-tx39.c b/arch/mips/mm/c-tx39.c deleted file mode 100644 index 03dfbb40ec73..000000000000 --- a/arch/mips/mm/c-tx39.c +++ /dev/null @@ -1,414 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * r2300.c: R2000 and R3000 specific mmu/cache code. - * - * Copyright (C) 1996 David S. Miller (davem@davemloft.net) - * - * with a lot of changes to make this thing work for R3000s - * Tx39XX R4k style caches added. HK - * Copyright (C) 1998, 1999, 2000 Harald Koerfgen - * Copyright (C) 1998 Gleb Raiko & Vladimir Roganov - */ -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -/* For R3000 cores with R4000 style caches */ -static unsigned long icache_size, dcache_size; /* Size in bytes */ - -#include - -/* This sequence is required to ensure icache is disabled immediately */ -#define TX39_STOP_STREAMING() \ -__asm__ __volatile__( \ - ".set push\n\t" \ - ".set noreorder\n\t" \ - "b 1f\n\t" \ - "nop\n\t" \ - "1:\n\t" \ - ".set pop" \ - ) - -/* TX39H-style cache flush routines. */ -static void tx39h_flush_icache_all(void) -{ - unsigned long flags, config; - - /* disable icache (set ICE#) */ - local_irq_save(flags); - config = read_c0_conf(); - write_c0_conf(config & ~TX39_CONF_ICE); - TX39_STOP_STREAMING(); - blast_icache16(); - write_c0_conf(config); - local_irq_restore(flags); -} - -static void tx39h_dma_cache_wback_inv(unsigned long addr, unsigned long size) -{ - /* Catch bad driver code */ - BUG_ON(size == 0); - - iob(); - blast_inv_dcache_range(addr, addr + size); -} - - -/* TX39H2,TX39H3 */ -static inline void tx39_blast_dcache_page(unsigned long addr) -{ - if (current_cpu_type() != CPU_TX3912) - blast_dcache16_page(addr); -} - -static inline void tx39_blast_dcache_page_indexed(unsigned long addr) -{ - blast_dcache16_page_indexed(addr); -} - -static inline void tx39_blast_dcache(void) -{ - blast_dcache16(); -} - -static inline void tx39_blast_icache_page(unsigned long addr) -{ - unsigned long flags, config; - /* disable icache (set ICE#) */ - local_irq_save(flags); - config = read_c0_conf(); - write_c0_conf(config & ~TX39_CONF_ICE); - TX39_STOP_STREAMING(); - blast_icache16_page(addr); - write_c0_conf(config); - local_irq_restore(flags); -} - -static inline void tx39_blast_icache_page_indexed(unsigned long addr) -{ - unsigned long flags, config; - /* disable icache (set ICE#) */ - local_irq_save(flags); - config = read_c0_conf(); - write_c0_conf(config & ~TX39_CONF_ICE); - TX39_STOP_STREAMING(); - blast_icache16_page_indexed(addr); - write_c0_conf(config); - local_irq_restore(flags); -} - -static inline void tx39_blast_icache(void) -{ - unsigned long flags, config; - /* disable icache (set ICE#) */ - local_irq_save(flags); - config = read_c0_conf(); - write_c0_conf(config & ~TX39_CONF_ICE); - TX39_STOP_STREAMING(); - blast_icache16(); - write_c0_conf(config); - local_irq_restore(flags); -} - -static void tx39__flush_cache_vmap(void) -{ - tx39_blast_dcache(); -} - -static void tx39__flush_cache_vunmap(void) -{ - tx39_blast_dcache(); -} - -static inline void tx39_flush_cache_all(void) -{ - if (!cpu_has_dc_aliases) - return; - - tx39_blast_dcache(); -} - -static inline void tx39___flush_cache_all(void) -{ - tx39_blast_dcache(); - tx39_blast_icache(); -} - -static void tx39_flush_cache_mm(struct mm_struct *mm) -{ - if (!cpu_has_dc_aliases) - return; - - if (cpu_context(smp_processor_id(), mm) != 0) - tx39_blast_dcache(); -} - -static void tx39_flush_cache_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - if (!cpu_has_dc_aliases) - return; - if (!(cpu_context(smp_processor_id(), vma->vm_mm))) - return; - - tx39_blast_dcache(); -} - -static void tx39_flush_cache_page(struct vm_area_struct *vma, unsigned long page, unsigned long pfn) -{ - int exec = vma->vm_flags & VM_EXEC; - struct mm_struct *mm = vma->vm_mm; - pmd_t *pmdp; - pte_t *ptep; - - /* - * If ownes no valid ASID yet, cannot possibly have gotten - * this page into the cache. - */ - if (cpu_context(smp_processor_id(), mm) == 0) - return; - - page &= PAGE_MASK; - pmdp = pmd_off(mm, page); - ptep = pte_offset_kernel(pmdp, page); - - /* - * If the page isn't marked valid, the page cannot possibly be - * in the cache. - */ - if (!(pte_val(*ptep) & _PAGE_PRESENT)) - return; - - /* - * Doing flushes for another ASID than the current one is - * too difficult since stupid R4k caches do a TLB translation - * for every cache flush operation. So we do indexed flushes - * in that case, which doesn't overly flush the cache too much. - */ - if ((mm == current->active_mm) && (pte_val(*ptep) & _PAGE_VALID)) { - if (cpu_has_dc_aliases || exec) - tx39_blast_dcache_page(page); - if (exec) - tx39_blast_icache_page(page); - - return; - } - - /* - * Do indexed flush, too much work to get the (possible) TLB refills - * to work correctly. - */ - if (cpu_has_dc_aliases || exec) - tx39_blast_dcache_page_indexed(page); - if (exec) - tx39_blast_icache_page_indexed(page); -} - -static void local_tx39_flush_data_cache_page(void * addr) -{ - tx39_blast_dcache_page((unsigned long)addr); -} - -static void tx39_flush_data_cache_page(unsigned long addr) -{ - tx39_blast_dcache_page(addr); -} - -static void tx39_flush_icache_range(unsigned long start, unsigned long end) -{ - if (end - start > dcache_size) - tx39_blast_dcache(); - else - protected_blast_dcache_range(start, end); - - if (end - start > icache_size) - tx39_blast_icache(); - else { - unsigned long flags, config; - /* disable icache (set ICE#) */ - local_irq_save(flags); - config = read_c0_conf(); - write_c0_conf(config & ~TX39_CONF_ICE); - TX39_STOP_STREAMING(); - protected_blast_icache_range(start, end); - write_c0_conf(config); - local_irq_restore(flags); - } -} - -static void tx39_flush_kernel_vmap_range(unsigned long vaddr, int size) -{ - BUG(); -} - -static void tx39_dma_cache_wback_inv(unsigned long addr, unsigned long size) -{ - unsigned long end; - - if (((size | addr) & (PAGE_SIZE - 1)) == 0) { - end = addr + size; - do { - tx39_blast_dcache_page(addr); - addr += PAGE_SIZE; - } while(addr != end); - } else if (size > dcache_size) { - tx39_blast_dcache(); - } else { - blast_dcache_range(addr, addr + size); - } -} - -static void tx39_dma_cache_inv(unsigned long addr, unsigned long size) -{ - unsigned long end; - - if (((size | addr) & (PAGE_SIZE - 1)) == 0) { - end = addr + size; - do { - tx39_blast_dcache_page(addr); - addr += PAGE_SIZE; - } while(addr != end); - } else if (size > dcache_size) { - tx39_blast_dcache(); - } else { - blast_inv_dcache_range(addr, addr + size); - } -} - -static __init void tx39_probe_cache(void) -{ - unsigned long config; - - config = read_c0_conf(); - - icache_size = 1 << (10 + ((config & TX39_CONF_ICS_MASK) >> - TX39_CONF_ICS_SHIFT)); - dcache_size = 1 << (10 + ((config & TX39_CONF_DCS_MASK) >> - TX39_CONF_DCS_SHIFT)); - - current_cpu_data.icache.linesz = 16; - switch (current_cpu_type()) { - case CPU_TX3912: - current_cpu_data.icache.ways = 1; - current_cpu_data.dcache.ways = 1; - current_cpu_data.dcache.linesz = 4; - break; - - case CPU_TX3927: - current_cpu_data.icache.ways = 2; - current_cpu_data.dcache.ways = 2; - current_cpu_data.dcache.linesz = 16; - break; - - case CPU_TX3922: - default: - current_cpu_data.icache.ways = 1; - current_cpu_data.dcache.ways = 1; - current_cpu_data.dcache.linesz = 16; - break; - } -} - -void tx39_cache_init(void) -{ - extern void build_clear_page(void); - extern void build_copy_page(void); - unsigned long config; - - config = read_c0_conf(); - config &= ~TX39_CONF_WBON; - write_c0_conf(config); - - tx39_probe_cache(); - - switch (current_cpu_type()) { - case CPU_TX3912: - /* TX39/H core (writethru direct-map cache) */ - __flush_cache_vmap = tx39__flush_cache_vmap; - __flush_cache_vunmap = tx39__flush_cache_vunmap; - flush_cache_all = tx39h_flush_icache_all; - __flush_cache_all = tx39h_flush_icache_all; - flush_cache_mm = (void *) tx39h_flush_icache_all; - flush_cache_range = (void *) tx39h_flush_icache_all; - flush_cache_page = (void *) tx39h_flush_icache_all; - flush_icache_range = (void *) tx39h_flush_icache_all; - local_flush_icache_range = (void *) tx39h_flush_icache_all; - - local_flush_data_cache_page = (void *) tx39h_flush_icache_all; - flush_data_cache_page = (void *) tx39h_flush_icache_all; - - _dma_cache_wback_inv = tx39h_dma_cache_wback_inv; - - shm_align_mask = PAGE_SIZE - 1; - - break; - - case CPU_TX3922: - case CPU_TX3927: - default: - /* TX39/H2,H3 core (writeback 2way-set-associative cache) */ - /* board-dependent init code may set WBON */ - - __flush_cache_vmap = tx39__flush_cache_vmap; - __flush_cache_vunmap = tx39__flush_cache_vunmap; - - flush_cache_all = tx39_flush_cache_all; - __flush_cache_all = tx39___flush_cache_all; - flush_cache_mm = tx39_flush_cache_mm; - flush_cache_range = tx39_flush_cache_range; - flush_cache_page = tx39_flush_cache_page; - flush_icache_range = tx39_flush_icache_range; - local_flush_icache_range = tx39_flush_icache_range; - - __flush_kernel_vmap_range = tx39_flush_kernel_vmap_range; - - local_flush_data_cache_page = local_tx39_flush_data_cache_page; - flush_data_cache_page = tx39_flush_data_cache_page; - - _dma_cache_wback_inv = tx39_dma_cache_wback_inv; - _dma_cache_wback = tx39_dma_cache_wback_inv; - _dma_cache_inv = tx39_dma_cache_inv; - - shm_align_mask = max_t(unsigned long, - (dcache_size / current_cpu_data.dcache.ways) - 1, - PAGE_SIZE - 1); - - break; - } - - __flush_icache_user_range = flush_icache_range; - __local_flush_icache_user_range = local_flush_icache_range; - - current_cpu_data.icache.waysize = icache_size / current_cpu_data.icache.ways; - current_cpu_data.dcache.waysize = dcache_size / current_cpu_data.dcache.ways; - - current_cpu_data.icache.sets = - current_cpu_data.icache.waysize / current_cpu_data.icache.linesz; - current_cpu_data.dcache.sets = - current_cpu_data.dcache.waysize / current_cpu_data.dcache.linesz; - - if (current_cpu_data.dcache.waysize > PAGE_SIZE) - current_cpu_data.dcache.flags |= MIPS_CACHE_ALIASES; - - current_cpu_data.icache.waybit = 0; - current_cpu_data.dcache.waybit = 0; - - pr_info("Primary instruction cache %ldkB, linesize %d bytes\n", - icache_size >> 10, current_cpu_data.icache.linesz); - pr_info("Primary data cache %ldkB, linesize %d bytes\n", - dcache_size >> 10, current_cpu_data.dcache.linesz); - - build_clear_page(); - build_copy_page(); - tx39h_flush_icache_all(); -} diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c index 830ab91e574f..7be7240f7703 100644 --- a/arch/mips/mm/cache.c +++ b/arch/mips/mm/cache.c @@ -195,11 +195,6 @@ void cpu_cache_init(void) r4k_cache_init(); } - if (cpu_has_tx39_cache) { - extern void __weak tx39_cache_init(void); - - tx39_cache_init(); - } if (cpu_has_octeon_cache) { extern void __weak octeon_cache_init(void); diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c index 504bc4047c4c..d3b4459d0fe8 100644 --- a/arch/mips/mm/page.c +++ b/arch/mips/mm/page.c @@ -25,7 +25,6 @@ #include #include #include -#include #ifdef CONFIG_SIBYTE_DMA_PAGEOPS #include @@ -103,7 +102,9 @@ static int cache_line_size; static inline void pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, unsigned int off) { - if (cpu_has_64bit_gp_regs && DADDI_WAR && r4k_daddiu_bug()) { + if (cpu_has_64bit_gp_regs && + IS_ENABLED(CONFIG_CPU_DADDI_WORKAROUNDS) && + r4k_daddiu_bug()) { if (off > 0x7fff) { uasm_i_lui(buf, T9, uasm_rel_hi(off)); uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off)); diff --git a/arch/mips/mm/tlb-r3k.c b/arch/mips/mm/tlb-r3k.c index a36622ebea55..53dfa2b9316b 100644 --- a/arch/mips/mm/tlb-r3k.c +++ b/arch/mips/mm/tlb-r3k.c @@ -36,8 +36,6 @@ extern void build_tlb_refill_handler(void); "nop\n\t" \ ".set pop\n\t") -int r3k_have_wired_reg; /* Should be in cpu_data? */ - /* TLB operations. */ static void local_flush_tlb_from(int entry) { @@ -62,7 +60,7 @@ void local_flush_tlb_all(void) printk("[tlball]"); #endif local_irq_save(flags); - local_flush_tlb_from(r3k_have_wired_reg ? read_c0_wired() : 8); + local_flush_tlb_from(8); local_irq_restore(flags); } @@ -224,34 +222,7 @@ void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, unsigned long old_ctx; static unsigned long wired = 0; - if (r3k_have_wired_reg) { /* TX39XX */ - unsigned long old_pagemask; - unsigned long w; - -#ifdef DEBUG_TLB - printk("[tlbwired]\n", - entrylo0, entryhi, pagemask); -#endif - - local_irq_save(flags); - /* Save old context and create impossible VPN2 value */ - old_ctx = read_c0_entryhi() & asid_mask; - old_pagemask = read_c0_pagemask(); - w = read_c0_wired(); - write_c0_wired(w + 1); - write_c0_index(w << 8); - write_c0_pagemask(pagemask); - write_c0_entryhi(entryhi); - write_c0_entrylo0(entrylo0); - BARRIER; - tlb_write_indexed(); - - write_c0_entryhi(old_ctx); - write_c0_pagemask(old_pagemask); - local_flush_tlb_all(); - local_irq_restore(flags); - - } else if (wired < 8) { + if (wired < 8) { #ifdef DEBUG_TLB printk("[tlbwired]\n", entrylo0, entryhi); @@ -272,13 +243,6 @@ void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, void tlb_init(void) { - switch (current_cpu_type()) { - case CPU_TX3922: - case CPU_TX3927: - r3k_have_wired_reg = 1; - write_c0_wired(0); /* Set to 8 on reset... */ - break; - } local_flush_tlb_from(0); build_tlb_refill_handler(); } diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index b131e6a77383..8dbbd99fc7e8 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include @@ -2160,16 +2159,14 @@ static void build_r4000_tlb_load_handler(void) uasm_i_tlbr(&p); switch (current_cpu_type()) { - default: - if (cpu_has_mips_r2_exec_hazard) { - uasm_i_ehb(&p); - fallthrough; - case CPU_CAVIUM_OCTEON: case CPU_CAVIUM_OCTEON_PLUS: case CPU_CAVIUM_OCTEON2: - break; - } + break; + default: + if (cpu_has_mips_r2_exec_hazard) + uasm_i_ehb(&p); + break; } /* Examine entrylo 0 or 1 based on ptr. */ @@ -2236,15 +2233,14 @@ static void build_r4000_tlb_load_handler(void) uasm_i_tlbr(&p); switch (current_cpu_type()) { - default: - if (cpu_has_mips_r2_exec_hazard) { - uasm_i_ehb(&p); - case CPU_CAVIUM_OCTEON: case CPU_CAVIUM_OCTEON_PLUS: case CPU_CAVIUM_OCTEON2: - break; - } + break; + default: + if (cpu_has_mips_r2_exec_hazard) + uasm_i_ehb(&p); + break; } /* Examine entrylo 0 or 1 based on ptr. */ diff --git a/arch/mips/mti-malta/Makefile b/arch/mips/mti-malta/Makefile index 94c11f5eac74..13bbd12bfa65 100644 --- a/arch/mips/mti-malta/Makefile +++ b/arch/mips/mti-malta/Makefile @@ -6,7 +6,6 @@ # Copyright (C) 2008 Wind River Systems, Inc. # written by Ralf Baechle # -obj-y += malta-dt.o obj-y += malta-dtshim.o obj-y += malta-init.o obj-y += malta-int.o diff --git a/arch/mips/mti-malta/malta-dt.c b/arch/mips/mti-malta/malta-dt.c deleted file mode 100644 index d045c9149418..000000000000 --- a/arch/mips/mti-malta/malta-dt.c +++ /dev/null @@ -1,15 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2015 Imagination Technologies - * Author: Paul Burton - */ - -#include -#include -#include -#include - -void __init device_tree_init(void) -{ - unflatten_and_copy_device_tree(); -} diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile index 9a6bc702608c..ed0388485a15 100644 --- a/arch/mips/pci/Makefile +++ b/arch/mips/pci/Makefile @@ -13,7 +13,6 @@ obj-$(CONFIG_PCI_DRIVERS_GENERIC)+= pci-generic.o obj-$(CONFIG_MIPS_BONITO64) += ops-bonito64.o obj-$(CONFIG_PCI_GT64XXX_PCI0) += ops-gt64xxx_pci0.o obj-$(CONFIG_MIPS_MSC) += ops-msc.o -obj-$(CONFIG_SOC_TX3927) += ops-tx3927.o obj-$(CONFIG_PCI_VR41XX) += ops-vr41xx.o pci-vr41xx.o obj-$(CONFIG_PCI_TX4927) += ops-tx4927.o obj-$(CONFIG_BCM47XX) += pci-bcm47xx.o @@ -46,7 +45,6 @@ obj-$(CONFIG_SOC_RT3883) += pci-rt3883.o obj-$(CONFIG_TANBAC_TB0219) += fixup-tb0219.o obj-$(CONFIG_TANBAC_TB0226) += fixup-tb0226.o obj-$(CONFIG_TANBAC_TB0287) += fixup-tb0287.o -obj-$(CONFIG_TOSHIBA_JMR3927) += fixup-jmr3927.o obj-$(CONFIG_SOC_TX4927) += pci-tx4927.o obj-$(CONFIG_SOC_TX4938) += pci-tx4938.o obj-$(CONFIG_TOSHIBA_RBTX4927) += fixup-rbtx4927.o diff --git a/arch/mips/pci/fixup-jmr3927.c b/arch/mips/pci/fixup-jmr3927.c deleted file mode 100644 index d3102eeea898..000000000000 --- a/arch/mips/pci/fixup-jmr3927.c +++ /dev/null @@ -1,79 +0,0 @@ -/* - * - * BRIEF MODULE DESCRIPTION - * Board specific pci fixups. - * - * Copyright 2001 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * ppopov@mvista.com or source@mvista.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#include -#include -#include - -int jmr3927_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) -{ - unsigned char irq = pin; - - /* IRQ rotation (PICMG) */ - irq--; /* 0-3 */ - if (slot == TX3927_PCIC_IDSEL_AD_TO_SLOT(23)) { - /* PCI CardSlot (IDSEL=A23, DevNu=12) */ - /* PCIA => PCIC (IDSEL=A23) */ - /* NOTE: JMR3927 JP1 must be set to OPEN */ - irq = (irq + 2) % 4; - } else if (slot == TX3927_PCIC_IDSEL_AD_TO_SLOT(22)) { - /* PCI CardSlot (IDSEL=A22, DevNu=11) */ - /* PCIA => PCIA (IDSEL=A22) */ - /* NOTE: JMR3927 JP1 must be set to OPEN */ - irq = (irq + 0) % 4; - } else { - /* PCI Backplane */ - if (txx9_pci_option & TXX9_PCI_OPT_PICMG) - irq = (irq + 33 - slot) % 4; - else - irq = (irq + 3 + slot) % 4; - } - irq++; /* 1-4 */ - - switch (irq) { - case 1: - irq = JMR3927_IRQ_IOC_PCIA; - break; - case 2: - irq = JMR3927_IRQ_IOC_PCIB; - break; - case 3: - irq = JMR3927_IRQ_IOC_PCIC; - break; - case 4: - irq = JMR3927_IRQ_IOC_PCID; - break; - } - - /* Check OnBoard Ethernet (IDSEL=A24, DevNu=13) */ - if (dev->bus->parent == NULL && - slot == TX3927_PCIC_IDSEL_AD_TO_SLOT(24)) - irq = JMR3927_IRQ_ETHER0; - return irq; -} diff --git a/arch/mips/pci/ops-tx3927.c b/arch/mips/pci/ops-tx3927.c deleted file mode 100644 index d35dc9c9ab9d..000000000000 --- a/arch/mips/pci/ops-tx3927.c +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Copyright 2001 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * ahennessy@mvista.com - * - * Copyright (C) 2000-2001 Toshiba Corporation - * Copyright (C) 2004 by Ralf Baechle (ralf@linux-mips.org) - * - * Based on arch/mips/ddb5xxx/ddb5477/pci_ops.c - * - * Define the pci_ops for TX3927. - * - * Much of the code is derived from the original DDB5074 port by - * Geert Uytterhoeven - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -static int mkaddr(struct pci_bus *bus, unsigned char devfn, unsigned char where) -{ - if (bus->parent == NULL && - devfn >= PCI_DEVFN(TX3927_PCIC_MAX_DEVNU, 0)) - return -1; - tx3927_pcicptr->ica = - ((bus->number & 0xff) << 0x10) | - ((devfn & 0xff) << 0x08) | - (where & 0xfc) | (bus->parent ? 1 : 0); - - /* clear M_ABORT and Disable M_ABORT Int. */ - tx3927_pcicptr->pcistat |= PCI_STATUS_REC_MASTER_ABORT; - tx3927_pcicptr->pcistatim &= ~PCI_STATUS_REC_MASTER_ABORT; - return 0; -} - -static inline int check_abort(void) -{ - if (tx3927_pcicptr->pcistat & PCI_STATUS_REC_MASTER_ABORT) { - tx3927_pcicptr->pcistat |= PCI_STATUS_REC_MASTER_ABORT; - tx3927_pcicptr->pcistatim |= PCI_STATUS_REC_MASTER_ABORT; - /* flush write buffer */ - iob(); - return PCIBIOS_DEVICE_NOT_FOUND; - } - return PCIBIOS_SUCCESSFUL; -} - -static int tx3927_pci_read_config(struct pci_bus *bus, unsigned int devfn, - int where, int size, u32 * val) -{ - if (mkaddr(bus, devfn, where)) { - *val = 0xffffffff; - return PCIBIOS_DEVICE_NOT_FOUND; - } - - switch (size) { - case 1: - *val = *(volatile u8 *) ((unsigned long) & tx3927_pcicptr->icd | (where & 3)); - break; - - case 2: - *val = le16_to_cpu(*(volatile u16 *) ((unsigned long) & tx3927_pcicptr->icd | (where & 3))); - break; - - case 4: - *val = le32_to_cpu(tx3927_pcicptr->icd); - break; - } - - return check_abort(); -} - -static int tx3927_pci_write_config(struct pci_bus *bus, unsigned int devfn, - int where, int size, u32 val) -{ - if (mkaddr(bus, devfn, where)) - return PCIBIOS_DEVICE_NOT_FOUND; - - switch (size) { - case 1: - *(volatile u8 *) ((unsigned long) & tx3927_pcicptr->icd | (where & 3)) = val; - break; - - case 2: - *(volatile u16 *) ((unsigned long) & tx3927_pcicptr->icd | (where & 2)) = - cpu_to_le16(val); - break; - - case 4: - tx3927_pcicptr->icd = cpu_to_le32(val); - } - - return check_abort(); -} - -static struct pci_ops tx3927_pci_ops = { - .read = tx3927_pci_read_config, - .write = tx3927_pci_write_config, -}; - -void __init tx3927_pcic_setup(struct pci_controller *channel, - unsigned long sdram_size, int extarb) -{ - unsigned long flags; - unsigned long io_base = - channel->io_resource->start + mips_io_port_base - IO_BASE; - unsigned long io_size = - channel->io_resource->end - channel->io_resource->start; - unsigned long io_pciaddr = - channel->io_resource->start - channel->io_offset; - unsigned long mem_base = - channel->mem_resource->start; - unsigned long mem_size = - channel->mem_resource->end - channel->mem_resource->start; - unsigned long mem_pciaddr = - channel->mem_resource->start - channel->mem_offset; - - printk(KERN_INFO "TX3927 PCIC -- DID:%04x VID:%04x RID:%02x Arbiter:%s", - tx3927_pcicptr->did, tx3927_pcicptr->vid, - tx3927_pcicptr->rid, - extarb ? "External" : "Internal"); - channel->pci_ops = &tx3927_pci_ops; - - local_irq_save(flags); - /* Disable External PCI Config. Access */ - tx3927_pcicptr->lbc = TX3927_PCIC_LBC_EPCAD; -#ifdef __BIG_ENDIAN - tx3927_pcicptr->lbc |= TX3927_PCIC_LBC_IBSE | - TX3927_PCIC_LBC_TIBSE | - TX3927_PCIC_LBC_TMFBSE | TX3927_PCIC_LBC_MSDSE; -#endif - /* LB->PCI mappings */ - tx3927_pcicptr->iomas = ~(io_size - 1); - tx3927_pcicptr->ilbioma = io_base; - tx3927_pcicptr->ipbioma = io_pciaddr; - tx3927_pcicptr->mmas = ~(mem_size - 1); - tx3927_pcicptr->ilbmma = mem_base; - tx3927_pcicptr->ipbmma = mem_pciaddr; - /* PCI->LB mappings */ - tx3927_pcicptr->iobas = 0xffffffff; - tx3927_pcicptr->ioba = 0; - tx3927_pcicptr->tlbioma = 0; - tx3927_pcicptr->mbas = ~(sdram_size - 1); - tx3927_pcicptr->mba = 0; - tx3927_pcicptr->tlbmma = 0; - /* Enable Direct mapping Address Space Decoder */ - tx3927_pcicptr->lbc |= TX3927_PCIC_LBC_ILMDE | TX3927_PCIC_LBC_ILIDE; - - /* Clear All Local Bus Status */ - tx3927_pcicptr->lbstat = TX3927_PCIC_LBIM_ALL; - /* Enable All Local Bus Interrupts */ - tx3927_pcicptr->lbim = TX3927_PCIC_LBIM_ALL; - /* Clear All PCI Status Error */ - tx3927_pcicptr->pcistat = TX3927_PCIC_PCISTATIM_ALL; - /* Enable All PCI Status Error Interrupts */ - tx3927_pcicptr->pcistatim = TX3927_PCIC_PCISTATIM_ALL; - - /* PCIC Int => IRC IRQ10 */ - tx3927_pcicptr->il = TX3927_IR_PCI; - /* Target Control (per errata) */ - tx3927_pcicptr->tc = TX3927_PCIC_TC_OF8E | TX3927_PCIC_TC_IF8E; - - /* Enable Bus Arbiter */ - if (!extarb) - tx3927_pcicptr->pbapmc = TX3927_PCIC_PBAPMC_PBAEN; - - tx3927_pcicptr->pcicmd = PCI_COMMAND_MASTER | - PCI_COMMAND_MEMORY | - PCI_COMMAND_IO | - PCI_COMMAND_PARITY | PCI_COMMAND_SERR; - local_irq_restore(flags); -} - -static irqreturn_t tx3927_pcierr_interrupt(int irq, void *dev_id) -{ - struct pt_regs *regs = get_irq_regs(); - - if (txx9_pci_err_action != TXX9_PCI_ERR_IGNORE) { - printk(KERN_WARNING "PCI error interrupt at 0x%08lx.\n", - regs->cp0_epc); - printk(KERN_WARNING "pcistat:%02x, lbstat:%04lx\n", - tx3927_pcicptr->pcistat, tx3927_pcicptr->lbstat); - } - if (txx9_pci_err_action != TXX9_PCI_ERR_PANIC) { - /* clear all pci errors */ - tx3927_pcicptr->pcistat |= TX3927_PCIC_PCISTATIM_ALL; - tx3927_pcicptr->istat = TX3927_PCIC_IIM_ALL; - tx3927_pcicptr->tstat = TX3927_PCIC_TIM_ALL; - tx3927_pcicptr->lbstat = TX3927_PCIC_LBIM_ALL; - return IRQ_HANDLED; - } - console_verbose(); - panic("PCI error."); -} - -void __init tx3927_setup_pcierr_irq(void) -{ - if (request_irq(TXX9_IRQ_BASE + TX3927_IR_PCI, - tx3927_pcierr_interrupt, - 0, "PCI error", - (void *)TX3927_PCIC_REG)) - printk(KERN_WARNING "Failed to request irq for PCIERR\n"); -} diff --git a/arch/mips/pci/pci-ar2315.c b/arch/mips/pci/pci-ar2315.c index 9a4bfb4e63e3..30e0922f4cea 100644 --- a/arch/mips/pci/pci-ar2315.c +++ b/arch/mips/pci/pci-ar2315.c @@ -384,7 +384,7 @@ static int ar2315_pci_irq_map(struct irq_domain *d, unsigned irq, return 0; } -static struct irq_domain_ops ar2315_pci_irq_domain_ops = { +static const struct irq_domain_ops ar2315_pci_irq_domain_ops = { .map = ar2315_pci_irq_map, }; diff --git a/arch/mips/pic32/pic32mzda/init.c b/arch/mips/pic32/pic32mzda/init.c index 764f2d022fae..129915616763 100644 --- a/arch/mips/pic32/pic32mzda/init.c +++ b/arch/mips/pic32/pic32mzda/init.c @@ -78,14 +78,6 @@ void __init prom_init(void) pic32_init_cmdline((int)fw_arg0, (char **)fw_arg1); } -void __init device_tree_init(void) -{ - if (!initial_boot_params) - return; - - unflatten_and_copy_device_tree(); -} - static struct pic32_sdhci_platform_data sdhci_data = { .setup_dma = pic32_set_sdhci_adma_fifo_threshold, }; diff --git a/arch/mips/ralink/ill_acc.c b/arch/mips/ralink/ill_acc.c index 115a69fc20ca..f395ae218470 100644 --- a/arch/mips/ralink/ill_acc.c +++ b/arch/mips/ralink/ill_acc.c @@ -61,6 +61,7 @@ static int __init ill_acc_of_setup(void) pdev = of_find_device_by_node(np); if (!pdev) { pr_err("%pOFn: failed to lookup pdev\n", np); + of_node_put(np); return -EINVAL; } diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c index 35a87a2da10b..587c7b998769 100644 --- a/arch/mips/ralink/of.c +++ b/arch/mips/ralink/of.c @@ -48,11 +48,6 @@ __iomem void *plat_of_remap_node(const char *node) return ioremap(res.start, resource_size(&res)); } -void __init device_tree_init(void) -{ - unflatten_and_copy_device_tree(); -} - void __init plat_mem_setup(void) { void *dtb; diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c index 04684990e28e..b7f6f782d9a1 100644 --- a/arch/mips/rb532/devices.c +++ b/arch/mips/rb532/devices.c @@ -301,11 +301,9 @@ static int __init plat_setup_devices(void) static int __init setup_kmac(char *s) { printk(KERN_INFO "korina mac = %s\n", s); - if (!mac_pton(s, korina_dev0_data.mac)) { + if (!mac_pton(s, korina_dev0_data.mac)) printk(KERN_ERR "Invalid mac\n"); - return -EINVAL; - } - return 0; + return 1; } __setup("kmac=", setup_kmac); diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index adc2faeecf7c..f79c48393716 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -422,3 +422,13 @@ void __init mem_init(void) memblock_free_all(); setup_zero_pages(); /* This comes from node 0 */ } + +pg_data_t * __init arch_alloc_nodedata(int nid) +{ + return memblock_alloc(sizeof(pg_data_t), SMP_CACHE_BYTES); +} + +void arch_refresh_nodedata(int nid, pg_data_t *pgdat) +{ + __node_data[nid] = (struct node_data *)pgdat; +} diff --git a/arch/mips/sibyte/common/sb_tbprof.c b/arch/mips/sibyte/common/sb_tbprof.c index f80d7a710333..bc47681e825a 100644 --- a/arch/mips/sibyte/common/sb_tbprof.c +++ b/arch/mips/sibyte/common/sb_tbprof.c @@ -437,13 +437,13 @@ static int sbprof_tb_release(struct inode *inode, struct file *filp) return 0; } -static ssize_t sbprof_tb_read(struct file *filp, char *buf, +static ssize_t sbprof_tb_read(struct file *filp, char __user *buf, size_t size, loff_t *offp) { int cur_sample, sample_off, cur_count, sample_left; char *src; int count = 0; - char *dest = buf; + char __user *dest = buf; long cur_off = *offp; if (!access_ok(buf, size)) @@ -512,7 +512,7 @@ static long sbprof_tb_ioctl(struct file *filp, if (err) break; - err = put_user(TB_FULL, (int *) arg); + err = put_user(TB_FULL, (int __user *) arg); break; } diff --git a/arch/mips/txx9/Kconfig b/arch/mips/txx9/Kconfig index 6c61feee6dd3..7335efa4d528 100644 --- a/arch/mips/txx9/Kconfig +++ b/arch/mips/txx9/Kconfig @@ -1,9 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -config MACH_TX39XX - bool - select MACH_TXX9 - select SYS_HAS_CPU_TX39XX - config MACH_TX49XX bool select BOOT_ELF32 @@ -24,11 +19,6 @@ config MACH_TXX9 select SYS_SUPPORTS_BIG_ENDIAN select COMMON_CLK -config TOSHIBA_JMR3927 - bool "Toshiba JMR-TX3927 board" - depends on MACH_TX39XX - select SOC_TX3927 - config TOSHIBA_RBTX4927 bool "Toshiba RBTX49[23]7 board" depends on MACH_TX49XX @@ -39,14 +29,6 @@ config TOSHIBA_RBTX4927 This Toshiba board is based on the TX4927 processor. Say Y here to support this machine type -config SOC_TX3927 - bool - select CEVT_TXX9 - imply HAS_TXX9_SERIAL - select HAVE_PCI - select IRQ_TXX9 - select GPIO_TXX9 - config SOC_TX4927 bool select CEVT_TXX9 diff --git a/arch/mips/txx9/Makefile b/arch/mips/txx9/Makefile index 53269910a48b..14c91f2678a3 100644 --- a/arch/mips/txx9/Makefile +++ b/arch/mips/txx9/Makefile @@ -2,14 +2,8 @@ # # Common TXx9 # -obj-$(CONFIG_MACH_TX39XX) += generic/ obj-$(CONFIG_MACH_TX49XX) += generic/ -# -# Toshiba JMR-TX3927 board -# -obj-$(CONFIG_TOSHIBA_JMR3927) += jmr3927/ - # # Toshiba RBTX49XX boards # diff --git a/arch/mips/txx9/Platform b/arch/mips/txx9/Platform index 7f4429ba22eb..e5a295068b3e 100644 --- a/arch/mips/txx9/Platform +++ b/arch/mips/txx9/Platform @@ -1,7 +1,4 @@ -cflags-$(CONFIG_MACH_TX39XX) += \ - -I$(srctree)/arch/mips/include/asm/mach-tx39xx cflags-$(CONFIG_MACH_TX49XX) += \ -I$(srctree)/arch/mips/include/asm/mach-tx49xx -load-$(CONFIG_MACH_TX39XX) += 0xffffffff80050000 load-$(CONFIG_MACH_TX49XX) += 0xffffffff80100000 diff --git a/arch/mips/txx9/generic/Makefile b/arch/mips/txx9/generic/Makefile index be5af9fe7c11..3c155c7e2be8 100644 --- a/arch/mips/txx9/generic/Makefile +++ b/arch/mips/txx9/generic/Makefile @@ -5,7 +5,6 @@ obj-y += setup.o obj-$(CONFIG_PCI) += pci.o -obj-$(CONFIG_SOC_TX3927) += setup_tx3927.o irq_tx3927.o obj-$(CONFIG_SOC_TX4927) += mem_tx4927.o setup_tx4927.o irq_tx4927.o obj-$(CONFIG_SOC_TX4938) += mem_tx4927.o setup_tx4938.o irq_tx4938.o obj-$(CONFIG_TOSHIBA_FPCIB0) += smsc_fdc37m81x.o diff --git a/arch/mips/txx9/generic/irq_tx3927.c b/arch/mips/txx9/generic/irq_tx3927.c deleted file mode 100644 index c683f593eda2..000000000000 --- a/arch/mips/txx9/generic/irq_tx3927.c +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Common tx3927 irq handler - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright 2001 MontaVista Software Inc. - * Copyright (C) 2000-2001 Toshiba Corporation - */ -#include -#include -#include - -void __init tx3927_irq_init(void) -{ - int i; - - txx9_irq_init(TX3927_IRC_REG); - /* raise priority for timers, sio */ - for (i = 0; i < TX3927_NR_TMR; i++) - txx9_irq_set_pri(TX3927_IR_TMR(i), 6); - for (i = 0; i < TX3927_NR_SIO; i++) - txx9_irq_set_pri(TX3927_IR_SIO(i), 7); -} diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c index 39cd1edf9d80..b098a3c76ae9 100644 --- a/arch/mips/txx9/generic/setup.c +++ b/arch/mips/txx9/generic/setup.c @@ -78,12 +78,7 @@ unsigned int txx9_master_clock; unsigned int txx9_cpu_clock; unsigned int txx9_gbus_clock; -#ifdef CONFIG_CPU_TX39XX -/* don't enable by default - see errata */ -int txx9_ccfg_toeon __initdata; -#else int txx9_ccfg_toeon __initdata = 1; -#endif #define BOARD_VEC(board) extern struct txx9_board_vec board; #include @@ -194,53 +189,6 @@ static void __init txx9_cache_fixup(void) if (conf & TX49_CONF_DC) pr_info("TX49XX D-Cache disabled.\n"); } -#elif defined(CONFIG_CPU_TX39XX) -/* flush all cache on very early stage (before tx39_cache_init) */ -static void __init early_flush_dcache(void) -{ - unsigned int conf = read_c0_config(); - unsigned int dc_size = 1 << (10 + ((conf & TX39_CONF_DCS_MASK) >> - TX39_CONF_DCS_SHIFT)); - unsigned int linesz = 16; - unsigned long addr, end; - - end = INDEX_BASE + dc_size / 2; - /* 2way, waybit=0 */ - for (addr = INDEX_BASE; addr < end; addr += linesz) { - cache_op(Index_Writeback_Inv_D, addr | 0); - cache_op(Index_Writeback_Inv_D, addr | 1); - } -} - -static void __init txx9_cache_fixup(void) -{ - unsigned int conf; - - conf = read_c0_config(); - /* flush and disable */ - if (txx9_ic_disable) { - conf &= ~TX39_CONF_ICE; - write_c0_config(conf); - } - if (txx9_dc_disable) { - early_flush_dcache(); - conf &= ~TX39_CONF_DCE; - write_c0_config(conf); - } - - /* enable cache */ - conf = read_c0_config(); - if (!txx9_ic_disable) - conf |= TX39_CONF_ICE; - if (!txx9_dc_disable) - conf |= TX39_CONF_DCE; - write_c0_config(conf); - - if (!(conf & TX39_CONF_ICE)) - pr_info("TX39XX I-Cache disabled.\n"); - if (!(conf & TX39_CONF_DCE)) - pr_info("TX39XX D-Cache disabled.\n"); -} #else static inline void txx9_cache_fixup(void) { @@ -302,9 +250,6 @@ static void __init select_board(void) } /* select "default" board */ -#ifdef CONFIG_TOSHIBA_JMR3927 - txx9_board_vec = &jmr3927_vec; -#endif #ifdef CONFIG_CPU_TX49XX switch (TX4938_REV_PCODE()) { #ifdef CONFIG_TOSHIBA_RBTX4927 diff --git a/arch/mips/txx9/generic/setup_tx3927.c b/arch/mips/txx9/generic/setup_tx3927.c deleted file mode 100644 index 33f7a7253963..000000000000 --- a/arch/mips/txx9/generic/setup_tx3927.c +++ /dev/null @@ -1,136 +0,0 @@ -/* - * TX3927 setup routines - * Based on linux/arch/mips/txx9/jmr3927/setup.c - * - * Copyright 2001 MontaVista Software Inc. - * Copyright (C) 2000-2001 Toshiba Corporation - * Copyright (C) 2007 Ralf Baechle (ralf@linux-mips.org) - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -void __init tx3927_wdt_init(void) -{ - txx9_wdt_init(TX3927_TMR_REG(2)); -} - -void __init tx3927_setup(void) -{ - int i; - unsigned int conf; - - txx9_reg_res_init(TX3927_REV_PCODE(), TX3927_REG_BASE, - TX3927_REG_SIZE); - - /* SDRAMC,ROMC are configured by PROM */ - for (i = 0; i < 8; i++) { - if (!(tx3927_romcptr->cr[i] & 0x8)) - continue; /* disabled */ - txx9_ce_res[i].start = (unsigned long)TX3927_ROMC_BA(i); - txx9_ce_res[i].end = - txx9_ce_res[i].start + TX3927_ROMC_SIZE(i) - 1; - request_resource(&iomem_resource, &txx9_ce_res[i]); - } - - /* clocks */ - txx9_gbus_clock = txx9_cpu_clock / 2; - /* change default value to udelay/mdelay take reasonable time */ - loops_per_jiffy = txx9_cpu_clock / HZ / 2; - - /* CCFG */ - /* enable Timeout BusError */ - if (txx9_ccfg_toeon) - tx3927_ccfgptr->ccfg |= TX3927_CCFG_TOE; - - /* clear BusErrorOnWrite flag */ - tx3927_ccfgptr->ccfg &= ~TX3927_CCFG_BEOW; - if (read_c0_conf() & TX39_CONF_WBON) - /* Disable PCI snoop */ - tx3927_ccfgptr->ccfg &= ~TX3927_CCFG_PSNP; - else - /* Enable PCI SNOOP - with write through only */ - tx3927_ccfgptr->ccfg |= TX3927_CCFG_PSNP; - /* do reset on watchdog */ - tx3927_ccfgptr->ccfg |= TX3927_CCFG_WR; - - pr_info("TX3927 -- CRIR:%08lx CCFG:%08lx PCFG:%08lx\n", - tx3927_ccfgptr->crir, tx3927_ccfgptr->ccfg, - tx3927_ccfgptr->pcfg); - - /* TMR */ - for (i = 0; i < TX3927_NR_TMR; i++) - txx9_tmr_init(TX3927_TMR_REG(i)); - - /* DMA */ - tx3927_dmaptr->mcr = 0; - for (i = 0; i < ARRAY_SIZE(tx3927_dmaptr->ch); i++) { - /* reset channel */ - tx3927_dmaptr->ch[i].ccr = TX3927_DMA_CCR_CHRST; - tx3927_dmaptr->ch[i].ccr = 0; - } - /* enable DMA */ -#ifdef __BIG_ENDIAN - tx3927_dmaptr->mcr = TX3927_DMA_MCR_MSTEN; -#else - tx3927_dmaptr->mcr = TX3927_DMA_MCR_MSTEN | TX3927_DMA_MCR_LE; -#endif - - /* PIO */ - __raw_writel(0, &tx3927_pioptr->maskcpu); - __raw_writel(0, &tx3927_pioptr->maskext); - - conf = read_c0_conf(); - if (conf & TX39_CONF_DCE) { - if (!(conf & TX39_CONF_WBON)) - pr_info("TX3927 D-Cache WriteThrough.\n"); - else if (!(conf & TX39_CONF_CWFON)) - pr_info("TX3927 D-Cache WriteBack.\n"); - else - pr_info("TX3927 D-Cache WriteBack (CWF) .\n"); - } -} - -void __init tx3927_time_init(unsigned int evt_tmrnr, unsigned int src_tmrnr) -{ - txx9_clockevent_init(TX3927_TMR_REG(evt_tmrnr), - TXX9_IRQ_BASE + TX3927_IR_TMR(evt_tmrnr), - TXX9_IMCLK); - txx9_clocksource_init(TX3927_TMR_REG(src_tmrnr), TXX9_IMCLK); -} - -void __init tx3927_sio_init(unsigned int sclk, unsigned int cts_mask) -{ - int i; - - for (i = 0; i < 2; i++) - txx9_sio_init(TX3927_SIO_REG(i), - TXX9_IRQ_BASE + TX3927_IR_SIO(i), - i, sclk, (1 << i) & cts_mask); -} - -void __init tx3927_mtd_init(int ch) -{ - struct physmap_flash_data pdata = { - .width = TX3927_ROMC_WIDTH(ch) / 8, - }; - unsigned long start = txx9_ce_res[ch].start; - unsigned long size = txx9_ce_res[ch].end - start + 1; - - if (!(tx3927_romcptr->cr[ch] & 0x8)) - return; /* disabled */ - txx9_physmap_flash_init(ch, start, size, &pdata); -} diff --git a/arch/mips/txx9/jmr3927/Makefile b/arch/mips/txx9/jmr3927/Makefile deleted file mode 100644 index 4bda0615d27e..000000000000 --- a/arch/mips/txx9/jmr3927/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Makefile for TOSHIBA JMR-TX3927 board -# - -obj-y += prom.o irq.o setup.o diff --git a/arch/mips/txx9/jmr3927/irq.c b/arch/mips/txx9/jmr3927/irq.c deleted file mode 100644 index c22c859a2c49..000000000000 --- a/arch/mips/txx9/jmr3927/irq.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright 2001 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * ahennessy@mvista.com - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2000-2001 Toshiba Corporation - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#include -#include -#include -#include - -#include -#include -#include -#include - -#if JMR3927_IRQ_END > NR_IRQS -#error JMR3927_IRQ_END > NR_IRQS -#endif - -/* - * CP0_STATUS is a thread's resource (saved/restored on context switch). - * So disable_irq/enable_irq MUST handle IOC/IRC registers. - */ -static void mask_irq_ioc(struct irq_data *d) -{ - /* 0: mask */ - unsigned int irq_nr = d->irq - JMR3927_IRQ_IOC; - unsigned char imask = jmr3927_ioc_reg_in(JMR3927_IOC_INTM_ADDR); - unsigned int bit = 1 << irq_nr; - jmr3927_ioc_reg_out(imask & ~bit, JMR3927_IOC_INTM_ADDR); - /* flush write buffer */ - (void)jmr3927_ioc_reg_in(JMR3927_IOC_REV_ADDR); -} -static void unmask_irq_ioc(struct irq_data *d) -{ - /* 0: mask */ - unsigned int irq_nr = d->irq - JMR3927_IRQ_IOC; - unsigned char imask = jmr3927_ioc_reg_in(JMR3927_IOC_INTM_ADDR); - unsigned int bit = 1 << irq_nr; - jmr3927_ioc_reg_out(imask | bit, JMR3927_IOC_INTM_ADDR); - /* flush write buffer */ - (void)jmr3927_ioc_reg_in(JMR3927_IOC_REV_ADDR); -} - -static int jmr3927_ioc_irqroute(void) -{ - unsigned char istat = jmr3927_ioc_reg_in(JMR3927_IOC_INTS2_ADDR); - int i; - - for (i = 0; i < JMR3927_NR_IRQ_IOC; i++) { - if (istat & (1 << i)) - return JMR3927_IRQ_IOC + i; - } - return -1; -} - -static int jmr3927_irq_dispatch(int pending) -{ - int irq; - - if ((pending & CAUSEF_IP7) == 0) - return -1; - irq = (pending >> CAUSEB_IP2) & 0x0f; - irq += JMR3927_IRQ_IRC; - if (irq == JMR3927_IRQ_IOCINT) - irq = jmr3927_ioc_irqroute(); - return irq; -} - -static struct irq_chip jmr3927_irq_ioc = { - .name = "jmr3927_ioc", - .irq_mask = mask_irq_ioc, - .irq_unmask = unmask_irq_ioc, -}; - -void __init jmr3927_irq_setup(void) -{ - int i; - - txx9_irq_dispatch = jmr3927_irq_dispatch; - /* Now, interrupt control disabled, */ - /* all IRC interrupts are masked, */ - /* all IRC interrupt mode are Low Active. */ - - /* mask all IOC interrupts */ - jmr3927_ioc_reg_out(0, JMR3927_IOC_INTM_ADDR); - /* setup IOC interrupt mode (SOFT:High Active, Others:Low Active) */ - jmr3927_ioc_reg_out(JMR3927_IOC_INTF_SOFT, JMR3927_IOC_INTP_ADDR); - - /* clear PCI Soft interrupts */ - jmr3927_ioc_reg_out(0, JMR3927_IOC_INTS1_ADDR); - /* clear PCI Reset interrupts */ - jmr3927_ioc_reg_out(0, JMR3927_IOC_RESET_ADDR); - - tx3927_irq_init(); - for (i = JMR3927_IRQ_IOC; i < JMR3927_IRQ_IOC + JMR3927_NR_IRQ_IOC; i++) - irq_set_chip_and_handler(i, &jmr3927_irq_ioc, - handle_level_irq); - - /* setup IOC interrupt 1 (PCI, MODEM) */ - irq_set_chained_handler(JMR3927_IRQ_IOCINT, handle_simple_irq); -} diff --git a/arch/mips/txx9/jmr3927/prom.c b/arch/mips/txx9/jmr3927/prom.c deleted file mode 100644 index 53c68de54d30..000000000000 --- a/arch/mips/txx9/jmr3927/prom.c +++ /dev/null @@ -1,52 +0,0 @@ -/* - * BRIEF MODULE DESCRIPTION - * PROM library initialisation code, assuming a version of - * pmon is the boot code. - * - * Copyright 2001 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * ahennessy@mvista.com - * - * Based on arch/mips/au1000/common/prom.c - * - * This file was derived from Carsten Langgaard's - * arch/mips/mips-boards/xx files. - * - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 1999,2000 MIPS Technologies, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#include -#include -#include -#include -#include - -void __init jmr3927_prom_init(void) -{ - /* CCFG */ - if ((tx3927_ccfgptr->ccfg & TX3927_CCFG_TLBOFF) == 0) - pr_err("TX3927 TLB off\n"); - - memblock_add(0, JMR3927_SDRAM_SIZE); - txx9_sio_putchar_init(TX3927_SIO_REG(1)); -} diff --git a/arch/mips/txx9/jmr3927/setup.c b/arch/mips/txx9/jmr3927/setup.c deleted file mode 100644 index 613943886e34..000000000000 --- a/arch/mips/txx9/jmr3927/setup.c +++ /dev/null @@ -1,223 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Copyright 2001 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * ahennessy@mvista.com - * - * Copyright (C) 2000-2001 Toshiba Corporation - * Copyright (C) 2007 Ralf Baechle (ralf@linux-mips.org) - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static void jmr3927_machine_restart(char *command) -{ - local_irq_disable(); -#if 1 /* Resetting PCI bus */ - jmr3927_ioc_reg_out(0, JMR3927_IOC_RESET_ADDR); - jmr3927_ioc_reg_out(JMR3927_IOC_RESET_PCI, JMR3927_IOC_RESET_ADDR); - (void)jmr3927_ioc_reg_in(JMR3927_IOC_RESET_ADDR); /* flush WB */ - mdelay(1); - jmr3927_ioc_reg_out(0, JMR3927_IOC_RESET_ADDR); -#endif - jmr3927_ioc_reg_out(JMR3927_IOC_RESET_CPU, JMR3927_IOC_RESET_ADDR); - /* fallback */ - (*_machine_halt)(); -} - -static void __init jmr3927_time_init(void) -{ - tx3927_time_init(0, 1); -} - -#define DO_WRITE_THROUGH - -static void jmr3927_board_init(void); - -static void __init jmr3927_mem_setup(void) -{ - set_io_port_base(JMR3927_PORT_BASE + JMR3927_PCIIO); - - _machine_restart = jmr3927_machine_restart; - - /* cache setup */ - { - unsigned int conf; -#ifdef DO_WRITE_THROUGH - int mips_config_cwfon = 0; - int mips_config_wbon = 0; -#else - int mips_config_cwfon = 1; - int mips_config_wbon = 1; -#endif - - conf = read_c0_conf(); - conf &= ~(TX39_CONF_WBON | TX39_CONF_CWFON); - conf |= mips_config_wbon ? TX39_CONF_WBON : 0; - conf |= mips_config_cwfon ? TX39_CONF_CWFON : 0; - - write_c0_conf(conf); - write_c0_cache(0); - } - - /* initialize board */ - jmr3927_board_init(); - - tx3927_sio_init(0, 1 << 1); /* ch1: noCTS */ -} - -static void __init jmr3927_pci_setup(void) -{ -#ifdef CONFIG_PCI - int extarb = !(tx3927_ccfgptr->ccfg & TX3927_CCFG_PCIXARB); - struct pci_controller *c; - - c = txx9_alloc_pci_controller(&txx9_primary_pcic, - JMR3927_PCIMEM, JMR3927_PCIMEM_SIZE, - JMR3927_PCIIO, JMR3927_PCIIO_SIZE); - register_pci_controller(c); - if (!extarb) { - /* Reset PCI Bus */ - jmr3927_ioc_reg_out(0, JMR3927_IOC_RESET_ADDR); - udelay(100); - jmr3927_ioc_reg_out(JMR3927_IOC_RESET_PCI, - JMR3927_IOC_RESET_ADDR); - udelay(100); - jmr3927_ioc_reg_out(0, JMR3927_IOC_RESET_ADDR); - } - tx3927_pcic_setup(c, JMR3927_SDRAM_SIZE, extarb); - tx3927_setup_pcierr_irq(); -#endif /* CONFIG_PCI */ -} - -static void __init jmr3927_board_init(void) -{ - txx9_cpu_clock = JMR3927_CORECLK; - /* SDRAMC are configured by PROM */ - - /* ROMC */ - tx3927_romcptr->cr[1] = JMR3927_ROMCE1 | 0x00030048; - tx3927_romcptr->cr[2] = JMR3927_ROMCE2 | 0x000064c8; - tx3927_romcptr->cr[3] = JMR3927_ROMCE3 | 0x0003f698; - tx3927_romcptr->cr[5] = JMR3927_ROMCE5 | 0x0000f218; - - /* Pin selection */ - tx3927_ccfgptr->pcfg &= ~TX3927_PCFG_SELALL; - tx3927_ccfgptr->pcfg |= - TX3927_PCFG_SELSIOC(0) | TX3927_PCFG_SELSIO_ALL | - (TX3927_PCFG_SELDMA_ALL & ~TX3927_PCFG_SELDMA(1)); - - tx3927_setup(); - - /* PIO[15:12] connected to LEDs */ - __raw_writel(0x0000f000, &tx3927_pioptr->dir); - - jmr3927_pci_setup(); - - /* SIO0 DTR on */ - jmr3927_ioc_reg_out(0, JMR3927_IOC_DTR_ADDR); - - jmr3927_led_set(0); - - pr_info("JMR-TX3927 (Rev %d) --- IOC(Rev %d) DIPSW:%d,%d,%d,%d\n", - jmr3927_ioc_reg_in(JMR3927_IOC_BREV_ADDR) & JMR3927_REV_MASK, - jmr3927_ioc_reg_in(JMR3927_IOC_REV_ADDR) & JMR3927_REV_MASK, - jmr3927_dipsw1(), jmr3927_dipsw2(), - jmr3927_dipsw3(), jmr3927_dipsw4()); -} - -/* This trick makes rtc-ds1742 driver usable as is. */ -static unsigned long jmr3927_swizzle_addr_b(unsigned long port) -{ - if ((port & 0xffff0000) != JMR3927_IOC_NVRAMB_ADDR) - return port; - port = (port & 0xffff0000) | (port & 0x7fff << 1); -#ifdef __BIG_ENDIAN - return port; -#else - return port | 1; -#endif -} - -static void __init jmr3927_rtc_init(void) -{ - static struct resource __initdata res = { - .start = JMR3927_IOC_NVRAMB_ADDR - IO_BASE, - .end = JMR3927_IOC_NVRAMB_ADDR - IO_BASE + 0x800 - 1, - .flags = IORESOURCE_MEM, - }; - platform_device_register_simple("rtc-ds1742", -1, &res, 1); -} - -static void __init jmr3927_mtd_init(void) -{ - int i; - - for (i = 0; i < 2; i++) - tx3927_mtd_init(i); -} - -static void __init jmr3927_device_init(void) -{ - unsigned long iocled_base = JMR3927_IOC_LED_ADDR - IO_BASE; -#ifdef __LITTLE_ENDIAN - iocled_base |= 1; -#endif - __swizzle_addr_b = jmr3927_swizzle_addr_b; - jmr3927_rtc_init(); - tx3927_wdt_init(); - jmr3927_mtd_init(); - txx9_iocled_init(iocled_base, -1, 8, 1, "green", NULL); -} - -static void __init jmr3927_arch_init(void) -{ - txx9_gpio_init(TX3927_PIO_REG, 0, 16); - - gpio_request(11, "dipsw1"); - gpio_request(10, "dipsw2"); -} - -struct txx9_board_vec jmr3927_vec __initdata = { - .system = "Toshiba JMR_TX3927", - .prom_init = jmr3927_prom_init, - .mem_setup = jmr3927_mem_setup, - .irq_setup = jmr3927_irq_setup, - .time_init = jmr3927_time_init, - .device_init = jmr3927_device_init, - .arch_init = jmr3927_arch_init, -#ifdef CONFIG_PCI - .pci_map_irq = jmr3927_pci_map_irq, -#endif -}; diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index d65f55f67e19..f72658b3a53f 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -1,6 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 # Objects to go into the VDSO. +# Sanitizer runtimes are unavailable and cannot be linked here. + KCSAN_SANITIZE := n + # Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before # the inclusion of generic Makefile. ARCH_REL_TYPE_ABS := R_MIPS_JUMP_SLOT|R_MIPS_GLOB_DAT diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 00cb889bd9a6..90fc95bd55ca 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -72,6 +72,7 @@ config PARISC select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_SOFTIRQ_ON_OWN_STACK if IRQSTACKS select TRACE_IRQFLAGS_SUPPORT + select HAVE_FUNCTION_DESCRIPTORS if 64BIT help The PA-RISC microprocessor is designed by Hewlett-Packard and used diff --git a/arch/parisc/include/asm/sections.h b/arch/parisc/include/asm/sections.h index bb52aea0cb21..33df42b5cc6d 100644 --- a/arch/parisc/include/asm/sections.h +++ b/arch/parisc/include/asm/sections.h @@ -2,20 +2,14 @@ #ifndef _PARISC_SECTIONS_H #define _PARISC_SECTIONS_H +#ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS +#include +typedef Elf64_Fdesc func_desc_t; +#endif + /* nothing to see, move along */ #include extern char __alt_instructions[], __alt_instructions_end[]; -#ifdef CONFIG_64BIT - -#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 - -#undef dereference_function_descriptor -void *dereference_function_descriptor(void *); - -#undef dereference_kernel_function_descriptor -void *dereference_kernel_function_descriptor(void *); -#endif - #endif diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index ea3d83b6fb62..2030c77592d3 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -263,27 +263,6 @@ __get_wchan(struct task_struct *p) return 0; } -#ifdef CONFIG_64BIT -void *dereference_function_descriptor(void *ptr) -{ - Elf64_Fdesc *desc = ptr; - void *p; - - if (!get_kernel_nofault(p, (void *)&desc->addr)) - ptr = p; - return ptr; -} - -void *dereference_kernel_function_descriptor(void *ptr) -{ - if (ptr < (void *)__start_opd || - ptr >= (void *)__end_opd) - return ptr; - - return dereference_function_descriptor(ptr); -} -#endif - static inline unsigned long brk_rnd(void) { return (get_random_int() & BRK_RND_MASK) << PAGE_SHIFT; diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 7e7387bd7d53..174edabb74fa 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -9,6 +9,10 @@ config 64BIT bool default y if PPC64 +config LIVEPATCH_64 + def_bool PPC64 + depends on LIVEPATCH + config MMU bool default y @@ -132,7 +136,7 @@ config PPC select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX if (PPC_BOOK3S || PPC_8xx || 40x) && !HIBERNATION select ARCH_HAS_STRICT_KERNEL_RWX if FSL_BOOKE && !HIBERNATION && !RANDOMIZE_BASE - select ARCH_HAS_STRICT_MODULE_RWX if ARCH_HAS_STRICT_KERNEL_RWX && !PPC_BOOK3S_32 + select ARCH_HAS_STRICT_MODULE_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE select ARCH_HAS_UBSAN_SANITIZE_ALL @@ -198,11 +202,13 @@ config PPC select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_ARGS if MPROFILE_KERNEL || PPC32 select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL || PPC32 select HAVE_EBPF_JIT select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FUNCTION_DESCRIPTORS if PPC64 && !CPU_LITTLE_ENDIAN select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER @@ -222,7 +228,7 @@ config PPC select HAVE_KPROBES_ON_FTRACE select HAVE_KRETPROBES select HAVE_LD_DEAD_CODE_DATA_ELIMINATION - select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS && PPC64 + select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) select HAVE_OPTPROBES diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 5f16ac1583c5..eb541e730d3c 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -171,7 +171,7 @@ else CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,$(call cc-option,-mtune=power5)) CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mcpu=power5,-mcpu=power4) endif -else +else ifdef CONFIG_PPC_BOOK3E_64 CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64 endif @@ -213,7 +213,7 @@ CHECKFLAGS += -m$(BITS) -D__powerpc__ -D__powerpc$(BITS)__ ifdef CONFIG_CPU_BIG_ENDIAN CHECKFLAGS += -D__BIG_ENDIAN__ else -CHECKFLAGS += -D__LITTLE_ENDIAN__ +CHECKFLAGS += -D__LITTLE_ENDIAN__ -D_CALL_ELF=2 endif ifdef CONFIG_476FPE_ERR46 @@ -421,9 +421,9 @@ ifeq ($(KBUILD_EXTMOD),) prepare: vdso_prepare vdso_prepare: prepare0 $(if $(CONFIG_VDSO32),$(Q)$(MAKE) \ - $(build)=arch/powerpc/kernel/vdso32 include/generated/vdso32-offsets.h) + $(build)=arch/powerpc/kernel/vdso include/generated/vdso32-offsets.h) $(if $(CONFIG_PPC64),$(Q)$(MAKE) \ - $(build)=arch/powerpc/kernel/vdso64 include/generated/vdso64-offsets.h) + $(build)=arch/powerpc/kernel/vdso include/generated/vdso64-offsets.h) endif archprepare: checkbin diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore index 1eee61b82341..a4716d138cfc 100644 --- a/arch/powerpc/boot/.gitignore +++ b/arch/powerpc/boot/.gitignore @@ -16,6 +16,7 @@ kernel-vmlinux.strip.c kernel-vmlinux.strip.gz mktree otheros.bld +otheros-too-big.bld uImage cuImage.* dtbImage.* diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts new file mode 100644 index 000000000000..73f8c998c64d --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * T1040RDB-REV-A Device Tree Source + * + * Copyright 2014 - 2015 Freescale Semiconductor Inc. + * + */ + +#include "t1040rdb.dts" + +/ { + model = "fsl,T1040RDB-REV-A"; + compatible = "fsl,T1040RDB-REV-A"; +}; + +&seville_port0 { + label = "ETH5"; +}; + +&seville_port2 { + label = "ETH7"; +}; + +&seville_port4 { + label = "ETH9"; +}; + +&seville_port6 { + label = "ETH11"; +}; diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb.dts b/arch/powerpc/boot/dts/fsl/t1040rdb.dts index af0c8a6f5613..b6733e7e6580 100644 --- a/arch/powerpc/boot/dts/fsl/t1040rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1040rdb.dts @@ -119,7 +119,7 @@ managed = "in-band-status"; phy-handle = <&phy_qsgmii_0>; phy-mode = "qsgmii"; - label = "ETH5"; + label = "ETH3"; status = "okay"; }; @@ -135,7 +135,7 @@ managed = "in-band-status"; phy-handle = <&phy_qsgmii_2>; phy-mode = "qsgmii"; - label = "ETH7"; + label = "ETH5"; status = "okay"; }; @@ -151,7 +151,7 @@ managed = "in-band-status"; phy-handle = <&phy_qsgmii_4>; phy-mode = "qsgmii"; - label = "ETH9"; + label = "ETH7"; status = "okay"; }; @@ -167,7 +167,7 @@ managed = "in-band-status"; phy-handle = <&phy_qsgmii_6>; phy-mode = "qsgmii"; - label = "ETH11"; + label = "ETH9"; status = "okay"; }; diff --git a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi index 099a598c74c0..bfe1ed5be337 100644 --- a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi +++ b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi @@ -139,12 +139,12 @@ fman@400000 { ethernet@e6000 { phy-handle = <&phy_rgmii_0>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; ethernet@e8000 { phy-handle = <&phy_rgmii_1>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; mdio0: mdio@fc000 { diff --git a/arch/powerpc/boot/dts/xpedite5200.dts b/arch/powerpc/boot/dts/xpedite5200.dts index 840ea84bbb59..74b346f2d43c 100644 --- a/arch/powerpc/boot/dts/xpedite5200.dts +++ b/arch/powerpc/boot/dts/xpedite5200.dts @@ -132,7 +132,7 @@ reg = <0x68>; }; - dtt@48 { + dtt@34 { compatible = "maxim,max1237"; reg = <0x34>; }; diff --git a/arch/powerpc/boot/dts/xpedite5200_xmon.dts b/arch/powerpc/boot/dts/xpedite5200_xmon.dts index 449fc1b5dc23..d491c7a8f979 100644 --- a/arch/powerpc/boot/dts/xpedite5200_xmon.dts +++ b/arch/powerpc/boot/dts/xpedite5200_xmon.dts @@ -136,7 +136,7 @@ reg = <0x68>; }; - dtt@48 { + dtt@34 { compatible = "maxim,max1237"; reg = <0x34>; }; diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h index 2b736d9fbb1b..2bc53c646ccd 100644 --- a/arch/powerpc/include/asm/asm-compat.h +++ b/arch/powerpc/include/asm/asm-compat.h @@ -21,6 +21,7 @@ #define PPC_STLCX stringify_in_c(stdcx.) #define PPC_CNTLZL stringify_in_c(cntlzd) #define PPC_MTOCRF(FXM, RS) MTOCRF((FXM), RS) +#define PPC_SRL stringify_in_c(srd) #define PPC_LR_STKOFF 16 #define PPC_MIN_STKFRM 112 @@ -54,6 +55,7 @@ #define PPC_STLCX stringify_in_c(stwcx.) #define PPC_CNTLZL stringify_in_c(cntlzw) #define PPC_MTOCRF stringify_in_c(mtcrf) +#define PPC_SRL stringify_in_c(srw) #define PPC_LR_STKOFF 4 #define PPC_MIN_STKFRM 16 diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 41b8a1e1144a..d995c65d18ab 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -19,22 +19,6 @@ #include -/* SMP */ -extern struct task_struct *current_set[NR_CPUS]; -extern struct task_struct *secondary_current; -void start_secondary(void *unused); - -/* kexec */ -struct paca_struct; -struct kimage; -extern struct paca_struct kexec_paca; -void kexec_copy_flush(struct kimage *image); - -/* pseries hcall tracing */ -extern struct static_key hcall_tracepoint_key; -void __trace_hcall_entry(unsigned long opcode, unsigned long *args); -void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf); - /* Ultravisor */ #if defined(CONFIG_PPC_POWERNV) || defined(CONFIG_PPC_SVM) long ucall_norets(unsigned long opcode, ...); @@ -50,49 +34,12 @@ int64_t __opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3, int64_t a4, int64_t a5, int64_t a6, int64_t a7, int64_t opcode, uint64_t msr); -/* VMX copying */ -int enter_vmx_usercopy(void); -int exit_vmx_usercopy(void); -int enter_vmx_ops(void); -void *exit_vmx_ops(void *dest); - -/* signals, syscalls and interrupts */ -long sys_swapcontext(struct ucontext __user *old_ctx, - struct ucontext __user *new_ctx, - long ctx_size); -#ifdef CONFIG_PPC32 -long sys_debug_setcontext(struct ucontext __user *ctx, - int ndbg, struct sig_dbg_op __user *dbg); -int -ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, - struct __kernel_old_timeval __user *tvp); -unsigned long __init early_init(unsigned long dt_ptr); -void __init machine_init(u64 dt_ptr); -#endif -long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, unsigned long r0, struct pt_regs *regs); -notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv); -notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs); -notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs); -#ifdef CONFIG_PPC64 -unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs); -unsigned long interrupt_exit_user_restart(struct pt_regs *regs); -unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs); -#endif - -long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, - u32 len_high, u32 len_low); -long sys_switch_endian(void); - /* prom_init (OpenFirmware) */ unsigned long __init prom_init(unsigned long r3, unsigned long r4, unsigned long pp, unsigned long r6, unsigned long r7, unsigned long kbase); -/* setup */ -void __init early_setup(unsigned long dt_ptr); -void early_setup_secondary(void); - /* misc runtime */ extern u64 __bswapdi2(u64); extern s64 __lshrdi3(s64, int); @@ -103,11 +50,6 @@ extern int __ucmpdi2(u64, u64); /* tracing */ void _mcount(void); -unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, - unsigned long sp); - -void pnv_power9_force_smt4_catch(void); -void pnv_power9_force_smt4_release(void); /* Transaction memory related */ void tm_enable(void); diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index ea5d27dda8cf..344fba3b16eb 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -287,7 +287,7 @@ static inline void arch___clear_bit_unlock(int nr, volatile unsigned long *addr) * fls: find last (most-significant) bit set. * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. */ -static inline int fls(unsigned int x) +static __always_inline int fls(unsigned int x) { int lz; @@ -305,7 +305,7 @@ static inline int fls(unsigned int x) * 32-bit fls calls. */ #ifdef CONFIG_PPC64 -static inline int fls64(__u64 x) +static __always_inline int fls64(__u64 x) { int lz; diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 95e06f2a8e23..40041ac713d9 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -298,28 +298,35 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p unsigned long clr, unsigned long set, int huge) { pte_basic_t old; - unsigned long tmp; - __asm__ __volatile__( + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) { + unsigned long tmp; + + asm volatile( #ifndef CONFIG_PTE_64BIT -"1: lwarx %0, 0, %3\n" -" andc %1, %0, %4\n" + "1: lwarx %0, 0, %3\n" + " andc %1, %0, %4\n" #else -"1: lwarx %L0, 0, %3\n" -" lwz %0, -4(%3)\n" -" andc %1, %L0, %4\n" + "1: lwarx %L0, 0, %3\n" + " lwz %0, -4(%3)\n" + " andc %1, %L0, %4\n" #endif -" or %1, %1, %5\n" -" stwcx. %1, 0, %3\n" -" bne- 1b" - : "=&r" (old), "=&r" (tmp), "=m" (*p) + " or %1, %1, %5\n" + " stwcx. %1, 0, %3\n" + " bne- 1b" + : "=&r" (old), "=&r" (tmp), "=m" (*p) #ifndef CONFIG_PTE_64BIT - : "r" (p), + : "r" (p), #else - : "b" ((unsigned long)(p) + 4), + : "b" ((unsigned long)(p) + 4), #endif - "r" (clr), "r" (set), "m" (*p) - : "cc" ); + "r" (clr), "r" (set), "m" (*p) + : "cc" ); + } else { + old = pte_val(*p); + + *p = __pte((old & ~(pte_basic_t)clr) | set); + } return old; } diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index 69fcf63eec94..54cf46808157 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -328,7 +328,7 @@ static inline unsigned long get_kuap(void) return mfspr(SPRN_AMR); } -static inline void set_kuap(unsigned long value) +static __always_inline void set_kuap(unsigned long value) { if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) return; @@ -398,7 +398,7 @@ static __always_inline void allow_user_access(void __user *to, const void __user #endif /* !CONFIG_PPC_KUAP */ -static inline void prevent_user_access(unsigned long dir) +static __always_inline void prevent_user_access(unsigned long dir) { set_kuap(AMR_KUAP_BLOCKED); if (static_branch_unlikely(&uaccess_flush_key)) diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 02c08d1492f8..ecbae1832de3 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -11,7 +11,7 @@ #ifdef __ASSEMBLY__ #include #ifdef CONFIG_DEBUG_BUGVERBOSE -.macro EMIT_BUG_ENTRY addr,file,line,flags +.macro __EMIT_BUG_ENTRY addr,file,line,flags .section __bug_table,"aw" 5001: .4byte \addr - 5001b, 5002f - 5001b .short \line, \flags @@ -22,7 +22,7 @@ .previous .endm #else -.macro EMIT_BUG_ENTRY addr,file,line,flags +.macro __EMIT_BUG_ENTRY addr,file,line,flags .section __bug_table,"aw" 5001: .4byte \addr - 5001b .short \flags @@ -33,7 +33,14 @@ .macro EMIT_WARN_ENTRY addr,file,line,flags EX_TABLE(\addr,\addr+4) - EMIT_BUG_ENTRY \addr,\file,\line,\flags + __EMIT_BUG_ENTRY \addr,\file,\line,\flags +.endm + +.macro EMIT_BUG_ENTRY addr,file,line,flags + .if \flags & 1 /* BUGFLAG_WARNING */ + .err /* Use EMIT_WARN_ENTRY for warnings */ + .endif + __EMIT_BUG_ENTRY \addr,\file,\line,\flags .endm #else /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index e26080539c31..409483b2d0ce 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -118,7 +118,7 @@ static inline unsigned long ppc_function_entry(void *func) * function's descriptor. The first entry in the descriptor is the * address of the function text. */ - return ((func_descr_t *)func)->entry; + return ((struct func_desc *)func)->addr; #else return (unsigned long)func; #endif diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index b8425e3cfd81..971589a21bc0 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -176,4 +176,10 @@ do { \ /* Relocate the kernel image to @final_address */ void relocate(unsigned long final_address); +struct func_desc { + unsigned long addr; + unsigned long toc; + unsigned long env; +}; + #endif /* _ASM_POWERPC_ELF_H */ diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h index c99ba08a408d..cdf3c6df5123 100644 --- a/arch/powerpc/include/asm/epapr_hcalls.h +++ b/arch/powerpc/include/asm/epapr_hcalls.h @@ -65,7 +65,7 @@ * but the gcc inline assembly syntax does not allow us to specify registers * on the clobber list that are also on the input/output list. Therefore, * the lists of clobbered registers depends on the number of register - * parmeters ("+r" and "=r") passed to the hypercall. + * parameters ("+r" and "=r") passed to the hypercall. * * Each assembly block should use one of the HCALL_CLOBBERSx macros. As a * general rule, 'x' is the number of parameters passed to the assembly diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 9b702d2b80fb..8dddd34b8ecf 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -54,6 +54,7 @@ #define FW_FEATURE_STUFF_TCE ASM_CONST(0x0000008000000000) #define FW_FEATURE_RPT_INVALIDATE ASM_CONST(0x0000010000000000) #define FW_FEATURE_FORM2_AFFINITY ASM_CONST(0x0000020000000000) +#define FW_FEATURE_ENERGY_SCALE_INFO ASM_CONST(0x0000040000000000) #ifndef __ASSEMBLY__ @@ -74,7 +75,8 @@ enum { FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 | FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE | FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR | - FW_FEATURE_RPT_INVALIDATE | FW_FEATURE_FORM2_AFFINITY, + FW_FEATURE_RPT_INVALIDATE | FW_FEATURE_FORM2_AFFINITY | + FW_FEATURE_ENERGY_SCALE_INFO, FW_FEATURE_PSERIES_ALWAYS = 0, FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_ULTRAVISOR, FW_FEATURE_POWERNV_ALWAYS = 0, diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index debe8c4f7062..d83758acd1c7 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -10,44 +10,7 @@ #define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR -#ifdef __ASSEMBLY__ - -/* Based off of objdump output from glibc */ - -#define MCOUNT_SAVE_FRAME \ - stwu r1,-48(r1); \ - stw r3, 12(r1); \ - stw r4, 16(r1); \ - stw r5, 20(r1); \ - stw r6, 24(r1); \ - mflr r3; \ - lwz r4, 52(r1); \ - mfcr r5; \ - stw r7, 28(r1); \ - stw r8, 32(r1); \ - stw r9, 36(r1); \ - stw r10,40(r1); \ - stw r3, 44(r1); \ - stw r5, 8(r1) - -#define MCOUNT_RESTORE_FRAME \ - lwz r6, 8(r1); \ - lwz r0, 44(r1); \ - lwz r3, 12(r1); \ - mtctr r0; \ - lwz r4, 16(r1); \ - mtcr r6; \ - lwz r5, 20(r1); \ - lwz r6, 24(r1); \ - lwz r0, 52(r1); \ - lwz r7, 28(r1); \ - lwz r8, 32(r1); \ - mtlr r0; \ - lwz r9, 36(r1); \ - lwz r10,40(r1); \ - addi r1, r1, 48 - -#else /* !__ASSEMBLY__ */ +#ifndef __ASSEMBLY__ extern void _mcount(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) @@ -56,9 +19,36 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) return addr; } +unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, + unsigned long sp); + struct dyn_arch_ftrace { struct module *mod; }; + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS +struct ftrace_regs { + struct pt_regs regs; +}; + +static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) +{ + /* We clear regs.msr in ftrace_call */ + return fregs->regs.msr ? &fregs->regs : NULL; +} + +static __always_inline void ftrace_instruction_pointer_set(struct ftrace_regs *fregs, + unsigned long ip) +{ + regs_set_return_ip(&fregs->regs, ip); +} + +struct ftrace_ops; + +#define ftrace_graph_func ftrace_graph_func +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs); +#endif #endif /* __ASSEMBLY__ */ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 962708fa1017..6a1a1ac5743b 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -15,7 +15,7 @@ extern bool hugetlb_disabled; -void __init hugetlbpage_init_default(void); +void __init hugetlbpage_init_defaultsize(void); int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len); @@ -76,6 +76,9 @@ static inline void __init gigantic_hugetlb_cma_reserve(void) { } +static inline void __init hugetlbpage_init_defaultsize(void) +{ +} #endif /* CONFIG_HUGETLB_PAGE */ #endif /* _ASM_POWERPC_HUGETLB_H */ diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 9bcf345cb208..d92a20a85395 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -323,7 +323,8 @@ #define H_SCM_PERFORMANCE_STATS 0x418 #define H_RPT_INVALIDATE 0x448 #define H_SCM_FLUSH 0x44C -#define MAX_HCALL_OPCODE H_SCM_FLUSH +#define H_GET_ENERGY_SCALE_INFO 0x450 +#define MAX_HCALL_OPCODE H_GET_ENERGY_SCALE_INFO /* Scope args for H_SCM_UNBIND_ALL */ #define H_UNBIND_SCOPE_ALL (0x1) @@ -500,6 +501,11 @@ long plpar_hcall_raw(unsigned long opcode, unsigned long *retbuf, ...); long plpar_hcall9(unsigned long opcode, unsigned long *retbuf, ...); long plpar_hcall9_raw(unsigned long opcode, unsigned long *retbuf, ...); +/* pseries hcall tracing */ +extern struct static_key hcall_tracepoint_key; +void __trace_hcall_entry(unsigned long opcode, unsigned long *args); +void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf); + struct hvcall_mpp_data { unsigned long entitled_mem; unsigned long mapped_mem; diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index fc28f46d2f9d..f964ef5c57d8 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -123,9 +123,6 @@ static inline void nap_adjust_return(struct pt_regs *regs) #endif } -struct interrupt_state { -}; - static inline void booke_restore_dbcr0(void) { #ifdef CONFIG_PPC_ADV_DEBUG_REGS @@ -138,7 +135,7 @@ static inline void booke_restore_dbcr0(void) #endif } -static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) +static inline void interrupt_enter_prepare(struct pt_regs *regs) { #ifdef CONFIG_PPC32 if (!arch_irq_disabled_regs(regs)) @@ -228,17 +225,17 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup * However interrupt_nmi_exit_prepare does return directly to regs, because * NMIs do not do "exit work" or replay soft-masked interrupts. */ -static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) +static inline void interrupt_exit_prepare(struct pt_regs *regs) { } -static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) +static inline void interrupt_async_enter_prepare(struct pt_regs *regs) { #ifdef CONFIG_PPC64 /* Ensure interrupt_enter_prepare does not enable MSR[EE] */ local_paca->irq_happened |= PACA_IRQ_HARD_DIS; #endif - interrupt_enter_prepare(regs, state); + interrupt_enter_prepare(regs); #ifdef CONFIG_PPC_BOOK3S_64 /* * RI=1 is set by interrupt_enter_prepare, so this thread flags access @@ -251,7 +248,7 @@ static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct in irq_enter(); } -static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) +static inline void interrupt_async_exit_prepare(struct pt_regs *regs) { /* * Adjust at exit so the main handler sees the true NIA. This must @@ -262,7 +259,7 @@ static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct int nap_adjust_return(regs); irq_exit(); - interrupt_exit_prepare(regs, state); + interrupt_exit_prepare(regs); } struct interrupt_nmi_state { @@ -447,13 +444,11 @@ static __always_inline void ____##func(struct pt_regs *regs); \ \ interrupt_handler void func(struct pt_regs *regs) \ { \ - struct interrupt_state state; \ - \ - interrupt_enter_prepare(regs, &state); \ + interrupt_enter_prepare(regs); \ \ ____##func (regs); \ \ - interrupt_exit_prepare(regs, &state); \ + interrupt_exit_prepare(regs); \ } \ NOKPROBE_SYMBOL(func); \ \ @@ -482,14 +477,13 @@ static __always_inline long ____##func(struct pt_regs *regs); \ \ interrupt_handler long func(struct pt_regs *regs) \ { \ - struct interrupt_state state; \ long ret; \ \ - interrupt_enter_prepare(regs, &state); \ + interrupt_enter_prepare(regs); \ \ ret = ____##func (regs); \ \ - interrupt_exit_prepare(regs, &state); \ + interrupt_exit_prepare(regs); \ \ return ret; \ } \ @@ -518,13 +512,11 @@ static __always_inline void ____##func(struct pt_regs *regs); \ \ interrupt_handler void func(struct pt_regs *regs) \ { \ - struct interrupt_state state; \ - \ - interrupt_async_enter_prepare(regs, &state); \ + interrupt_async_enter_prepare(regs); \ \ ____##func (regs); \ \ - interrupt_async_exit_prepare(regs, &state); \ + interrupt_async_exit_prepare(regs); \ } \ NOKPROBE_SYMBOL(func); \ \ @@ -612,7 +604,7 @@ DECLARE_INTERRUPT_HANDLER_RAW(do_slb_fault); DECLARE_INTERRUPT_HANDLER(do_bad_segment_interrupt); /* hash_utils.c */ -DECLARE_INTERRUPT_HANDLER_RAW(do_hash_fault); +DECLARE_INTERRUPT_HANDLER(do_hash_fault); /* fault.c */ DECLARE_INTERRUPT_HANDLER(do_page_fault); @@ -644,6 +636,17 @@ static inline void interrupt_cond_local_irq_enable(struct pt_regs *regs) local_irq_enable(); } +long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, + unsigned long r0, struct pt_regs *regs); +notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv); +notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs); +notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs); +#ifdef CONFIG_PPC64 +unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs); +unsigned long interrupt_exit_user_restart(struct pt_regs *regs); +unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs); +#endif + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_INTERRUPT_H */ diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index beba4979bff9..fee979d3a1aa 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -359,25 +359,37 @@ static inline void __raw_writeq_be(unsigned long v, volatile void __iomem *addr) */ static inline void __raw_rm_writeb(u8 val, volatile void __iomem *paddr) { - __asm__ __volatile__("stbcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + stbcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } static inline void __raw_rm_writew(u16 val, volatile void __iomem *paddr) { - __asm__ __volatile__("sthcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + sthcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } static inline void __raw_rm_writel(u32 val, volatile void __iomem *paddr) { - __asm__ __volatile__("stwcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + stwcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr) { - __asm__ __volatile__("stdcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + stdcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } @@ -389,7 +401,10 @@ static inline void __raw_rm_writeq_be(u64 val, volatile void __iomem *paddr) static inline u8 __raw_rm_readb(volatile void __iomem *paddr) { u8 ret; - __asm__ __volatile__("lbzcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + lbzcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } @@ -397,7 +412,10 @@ static inline u8 __raw_rm_readb(volatile void __iomem *paddr) static inline u16 __raw_rm_readw(volatile void __iomem *paddr) { u16 ret; - __asm__ __volatile__("lhzcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + lhzcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } @@ -405,7 +423,10 @@ static inline u16 __raw_rm_readw(volatile void __iomem *paddr) static inline u32 __raw_rm_readl(volatile void __iomem *paddr) { u32 ret; - __asm__ __volatile__("lwzcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + lwzcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } @@ -413,7 +434,10 @@ static inline u32 __raw_rm_readl(volatile void __iomem *paddr) static inline u64 __raw_rm_readq(volatile void __iomem *paddr) { u64 ret; - __asm__ __volatile__("ldcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + ldcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 8ebdd23d987c..2aefe14e1442 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -96,6 +96,8 @@ static inline bool kdump_in_progress(void) void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_code_buffer, unsigned long start_address) __noreturn; +void kexec_copy_flush(struct kimage *image); + #ifdef CONFIG_KEXEC_FILE extern const struct kexec_file_ops kexec_elf64_ops; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index d9bf60bf0816..faf301d0dec0 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -26,6 +26,8 @@ #include #include +#define __KVM_HAVE_ARCH_VCPU_DEBUGFS + #define KVM_MAX_VCPUS NR_CPUS #define KVM_MAX_VCORES NR_CPUS @@ -295,7 +297,6 @@ struct kvm_arch { bool dawr1_enabled; pgd_t *pgtable; u64 process_table; - struct dentry *debugfs_dir; struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */ #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE @@ -673,7 +674,6 @@ struct kvm_vcpu_arch { u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES]; u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES]; u64 timing_last_exit; - struct dentry *debugfs_exit_timing; #endif #ifdef CONFIG_PPC_BOOK3S @@ -831,8 +831,6 @@ struct kvm_vcpu_arch { struct kvmhv_tb_accumulator rm_exit; /* real-mode exit code */ struct kvmhv_tb_accumulator guest_time; /* guest execution */ struct kvmhv_tb_accumulator cede_time; /* time napping inside guest */ - - struct dentry *debugfs_dir; #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */ }; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index a14dbcd1b8ce..c583d0c37f31 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -314,6 +314,8 @@ struct kvmppc_ops { int (*svm_off)(struct kvm *kvm); int (*enable_dawr1)(struct kvm *kvm); bool (*hash_v3_possible)(void); + int (*create_vm_debugfs)(struct kvm *kvm); + int (*create_vcpu_debugfs)(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry); }; extern struct kvmppc_ops *kvmppc_hv_ops; diff --git a/arch/powerpc/include/asm/livepatch.h b/arch/powerpc/include/asm/livepatch.h index 4fe018cc207b..6f10de6af6e3 100644 --- a/arch/powerpc/include/asm/livepatch.h +++ b/arch/powerpc/include/asm/livepatch.h @@ -14,21 +14,21 @@ #ifdef CONFIG_LIVEPATCH static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long ip) { - struct pt_regs *regs = ftrace_get_regs(fregs); - - regs_set_return_ip(regs, ip); + ftrace_instruction_pointer_set(fregs, ip); } #define klp_get_ftrace_location klp_get_ftrace_location static inline unsigned long klp_get_ftrace_location(unsigned long faddr) { /* - * Live patch works only with -mprofile-kernel on PPC. In this case, - * the ftrace location is always within the first 16 bytes. + * Live patch works on PPC32 and only with -mprofile-kernel on PPC64. In + * both cases, the ftrace location is always within the first 16 bytes. */ return ftrace_location_range(faddr, faddr + 16); } +#endif /* CONFIG_LIVEPATCH */ +#ifdef CONFIG_LIVEPATCH_64 static inline void klp_init_thread_info(struct task_struct *p) { /* + 1 to account for STACK_END_MAGIC */ @@ -36,6 +36,6 @@ static inline void klp_init_thread_info(struct task_struct *p) } #else static inline void klp_init_thread_info(struct task_struct *p) { } -#endif /* CONFIG_LIVEPATCH */ +#endif #endif /* _ASM_POWERPC_LIVEPATCH_H */ diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index e821037f74f0..358d171ae8e0 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -10,11 +10,6 @@ #include -/* We export this macro for external modules like Alsa to know if - * ppc_md.feature_call is implemented or not - */ -#define CONFIG_PPC_HAS_FEATURE_CALLS - struct pt_regs; struct pci_bus; struct device_node; @@ -99,6 +94,8 @@ struct machdep_calls { /* Called during machine check exception to retrive fixup address. */ bool (*mce_check_early_recovery)(struct pt_regs *regs); + void (*machine_check_log_err)(void); + /* Motherboard/chipset features. This is a kind of general purpose * hook used to control some machine specific features (like reset * lines, chip power control, etc...). @@ -235,21 +232,6 @@ extern struct machdep_calls *machine_id; machine_id == &mach_##name; \ }) -#ifdef CONFIG_PPC_PMAC -/* - * Power macintoshes have either a CUDA, PMU or SMU controlling - * system reset, power, NVRAM, RTC. - */ -typedef enum sys_ctrler_kind { - SYS_CTRLER_UNKNOWN = 0, - SYS_CTRLER_CUDA = 1, - SYS_CTRLER_PMU = 2, - SYS_CTRLER_SMU = 3, -} sys_ctrler_t; -extern sys_ctrler_t sys_ctrler; - -#endif /* CONFIG_PPC_PMAC */ - static inline void log_error(char *buf, unsigned int err_type, int fatal) { if (ppc_md.log_error) diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index 331d944280b8..c9f0936bd3c9 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -235,8 +235,21 @@ extern void machine_check_print_event_info(struct machine_check_event *evt, unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr); extern void mce_common_process_ue(struct pt_regs *regs, struct mce_error_info *mce_err); +void mce_irq_work_queue(void); int mce_register_notifier(struct notifier_block *nb); int mce_unregister_notifier(struct notifier_block *nb); + +#ifdef CONFIG_PPC_BOOK3S_64 +void mce_run_irq_context_handlers(void); +#else +static inline void mce_run_irq_context_handlers(void) { }; +#endif /* CONFIG_PPC_BOOK3S_64 */ + +#ifdef CONFIG_PPC_BOOK3S_64 +void set_mce_pending_irq_work(void); +void clear_mce_pending_irq_work(void); +#endif /* CONFIG_PPC_BOOK3S_64 */ + #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void); void flush_erat(void); diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index dcc9b338e042..9091e4904a6b 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -338,9 +338,6 @@ static inline int pte_young(pte_t pte) return pte_val(pte) & _PAGE_ACCESSED; } -#define __HAVE_ARCH_PTE_SAME -#define pte_same(A,B) ((pte_val(A) ^ pte_val(B)) == 0) - /* * Note that on Book E processors, the pmd contains the kernel virtual * (lowmem) address of the pte page. The physical address is less useful diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 78888b0c30f6..57083f95e82b 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -282,9 +282,6 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, flush_tlb_page(vma, address); } -#define __HAVE_ARCH_PTE_SAME -#define pte_same(A,B) ((pte_val(A) ^ pte_val(B)) == 0) - #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) #define pmd_ERROR(e) \ diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 295573a82c66..8330968ca346 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -288,6 +288,7 @@ struct paca_struct { #endif #ifdef CONFIG_PPC_BOOK3S_64 struct mce_info *mce_info; + u8 mce_pending_irq_work; #endif /* CONFIG_PPC_BOOK3S_64 */ } ____cacheline_aligned; diff --git a/arch/powerpc/include/asm/pmac_feature.h b/arch/powerpc/include/asm/pmac_feature.h index e08e829261b6..2495866f2e97 100644 --- a/arch/powerpc/include/asm/pmac_feature.h +++ b/arch/powerpc/include/asm/pmac_feature.h @@ -401,5 +401,17 @@ extern u32 __iomem *uninorth_base; */ extern int pmac_get_uninorth_variant(void); +/* + * Power macintoshes have either a CUDA, PMU or SMU controlling + * system reset, power, NVRAM, RTC. + */ +typedef enum sys_ctrler_kind { + SYS_CTRLER_UNKNOWN = 0, + SYS_CTRLER_CUDA = 1, + SYS_CTRLER_PMU = 2, + SYS_CTRLER_SMU = 3, +} sys_ctrler_t; +extern sys_ctrler_t sys_ctrler; + #endif /* __ASM_POWERPC_PMAC_FEATURE_H */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 9675303b724e..82f1f0041c6f 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -262,6 +262,8 @@ #define PPC_INST_MFSPR_PVR 0x7c1f42a6 #define PPC_INST_MFSPR_PVR_MASK 0xfc1ffffe #define PPC_INST_MTMSRD 0x7c000164 +#define PPC_INST_PASTE 0x7c20070d +#define PPC_INST_PASTE_MASK 0xfc2007ff #define PPC_INST_POPCNTB 0x7c0000f4 #define PPC_INST_POPCNTB_MASK 0xfc0007fe #define PPC_INST_RFEBB 0x4c000124 diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index f21e6bde17a1..4dea2d963738 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -203,12 +203,7 @@ GLUE(.,name): #else /* 32-bit */ -#define _ENTRY(n) \ - .globl n; \ -n: - #define _GLOBAL(n) \ - .stabs __stringify(n:F-1),N_FUN,0,0,n;\ .globl n; \ n: @@ -697,12 +692,6 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96) #define evr30 30 #define evr31 31 -/* some stab codes */ -#define N_FUN 36 -#define N_RSYM 64 -#define N_SLINE 68 -#define N_SO 100 - #define RFSCV .long 0x4c0000a4 /* diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 2c8686d9e964..39c25021030f 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -411,6 +411,8 @@ extern int powersave_nap; /* set if nap mode can be used in idle loop */ extern void power7_idle_type(unsigned long type); extern void arch300_idle_type(unsigned long stop_psscr_val, unsigned long stop_psscr_mask); +void pnv_power9_force_smt4_catch(void); +void pnv_power9_force_smt4_release(void); extern int fix_alignment(struct pt_regs *); @@ -427,6 +429,12 @@ extern int fix_alignment(struct pt_regs *); int do_mathemu(struct pt_regs *regs); +/* VMX copying */ +int enter_vmx_usercopy(void); +int exit_vmx_usercopy(void); +int enter_vmx_ops(void); +void *exit_vmx_ops(void *dest); + #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_PROCESSOR_H */ diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 82e5b055fa2a..00531af17ce0 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -274,7 +274,6 @@ extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal); #ifdef CONFIG_PPC_PSERIES extern time64_t last_rtas_event; extern int clobbering_unread_rtas_event(void); -extern int pseries_devicetree_update(s32 scope); extern void post_mobility_fixup(void); int rtas_syscall_dispatch_ibm_suspend_me(u64 handle); #else diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index 38f79e42bf3c..8be2c491c733 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -6,6 +6,10 @@ #include #include +#ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS +typedef struct func_desc func_desc_t; +#endif + #include extern char __head_end[]; @@ -54,31 +58,6 @@ static inline int overlaps_kernel_text(unsigned long start, unsigned long end) (unsigned long)_stext < end; } -#ifdef PPC64_ELF_ABI_v1 - -#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 - -#undef dereference_function_descriptor -static inline void *dereference_function_descriptor(void *ptr) -{ - struct ppc64_opd_entry *desc = ptr; - void *p; - - if (!get_kernel_nofault(p, (void *)&desc->funcaddr)) - ptr = p; - return ptr; -} - -#undef dereference_kernel_function_descriptor -static inline void *dereference_kernel_function_descriptor(void *ptr) -{ - if (ptr < (void *)__start_opd || ptr >= (void *)__end_opd) - return ptr; - - return dereference_function_descriptor(ptr); -} -#endif /* PPC64_ELF_ABI_v1 */ - #endif #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/set_memory.h b/arch/powerpc/include/asm/set_memory.h index b040094f7920..7ebc807aa8cc 100644 --- a/arch/powerpc/include/asm/set_memory.h +++ b/arch/powerpc/include/asm/set_memory.h @@ -6,6 +6,8 @@ #define SET_MEMORY_RW 1 #define SET_MEMORY_NX 2 #define SET_MEMORY_X 3 +#define SET_MEMORY_NP 4 /* Set memory non present */ +#define SET_MEMORY_P 5 /* Set memory present */ int change_memory_attr(unsigned long addr, int numpages, long action); @@ -29,6 +31,14 @@ static inline int set_memory_x(unsigned long addr, int numpages) return change_memory_attr(addr, numpages, SET_MEMORY_X); } -int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot); +static inline int set_memory_np(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_NP); +} + +static inline int set_memory_p(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_P); +} #endif diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index d0d3dd531c7f..049ca26893e6 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -76,6 +76,13 @@ static inline void setup_spectre_v2(void) {} #endif void __init do_btb_flush_fixups(void); +#ifdef CONFIG_PPC32 +unsigned long __init early_init(unsigned long dt_ptr); +void __init machine_init(u64 dt_ptr); +#endif +void __init early_setup(unsigned long dt_ptr); +void early_setup_secondary(void); + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_SETUP_H */ diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 007332a4a732..60ab739a5e3b 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -60,6 +60,9 @@ struct smp_ops_t { #endif }; +extern struct task_struct *secondary_current; + +void start_secondary(void *unused); extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us); extern int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us); extern void smp_send_debugger_break(void); diff --git a/arch/powerpc/include/asm/spu.h b/arch/powerpc/include/asm/spu.h index 8a2d11ba0dae..96ad4510c895 100644 --- a/arch/powerpc/include/asm/spu.h +++ b/arch/powerpc/include/asm/spu.h @@ -249,8 +249,8 @@ void unregister_spu_syscalls(struct spufs_calls *calls); int spu_add_dev_attr(struct device_attribute *attr); void spu_remove_dev_attr(struct device_attribute *attr); -int spu_add_dev_attr_group(struct attribute_group *attrs); -void spu_remove_dev_attr_group(struct attribute_group *attrs); +int spu_add_dev_attr_group(const struct attribute_group *attrs); +void spu_remove_dev_attr_group(const struct attribute_group *attrs); extern void notify_spus_active(void); extern void do_notify_spus_active(void); diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h index 7ee66ae5444d..a2b13e55254f 100644 --- a/arch/powerpc/include/asm/syscalls.h +++ b/arch/powerpc/include/asm/syscalls.h @@ -18,6 +18,10 @@ asmlinkage long sys_mmap2(unsigned long addr, size_t len, unsigned long fd, unsigned long pgoff); asmlinkage long ppc64_personality(unsigned long personality); asmlinkage long sys_rtas(struct rtas_args __user *uargs); +int ppc_select(int n, fd_set __user *inp, fd_set __user *outp, + fd_set __user *exp, struct __kernel_old_timeval __user *tvp); +long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, + u32 len_high, u32 len_low); #ifdef CONFIG_COMPAT unsigned long compat_sys_mmap2(unsigned long addr, size_t len, diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index d6e649b3c70b..125328d1b980 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -51,7 +51,7 @@ struct thread_info { unsigned int cpu; #endif unsigned long local_flags; /* private flags for thread */ -#ifdef CONFIG_LIVEPATCH +#ifdef CONFIG_LIVEPATCH_64 unsigned long *livepatch_sp; #endif #if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC32) diff --git a/arch/powerpc/include/asm/types.h b/arch/powerpc/include/asm/types.h index f1630c553efe..84078c28c1a2 100644 --- a/arch/powerpc/include/asm/types.h +++ b/arch/powerpc/include/asm/types.h @@ -13,9 +13,9 @@ #ifdef __powerpc64__ #if defined(_CALL_ELF) && _CALL_ELF == 2 -#define PPC64_ELF_ABI_v2 +#define PPC64_ELF_ABI_v2 1 #else -#define PPC64_ELF_ABI_v1 +#define PPC64_ELF_ABI_v1 1 #endif #endif /* __powerpc64__ */ @@ -23,12 +23,6 @@ typedef __vector128 vector128; -typedef struct { - unsigned long entry; - unsigned long toc; - unsigned long env; -} func_descr_t; - #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_TYPES_H */ diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 2e83217f52de..9b82b38ff867 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -116,8 +116,11 @@ do { \ */ #define __get_user_atomic_128_aligned(kaddr, uaddr, err) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine altivec\n" \ "1: lvx 0,0,%1 # get user\n" \ " stvx 0,0,%2 # put kernel\n" \ + ".machine pop\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%3\n" \ diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 57573d9c1e09..83afcb6c194b 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -29,6 +29,16 @@ #define VAS_THRESH_FIFO_GT_QTR_FULL 2 #define VAS_THRESH_FIFO_GT_EIGHTH_FULL 3 +/* + * VAS window Linux status bits + */ +#define VAS_WIN_ACTIVE 0x0 /* Used in platform independent */ + /* vas mmap() */ +/* Window is closed in the hypervisor due to lost credit */ +#define VAS_WIN_NO_CRED_CLOSE 0x00000001 +/* Window is closed due to migration */ +#define VAS_WIN_MIGRATE_CLOSE 0x00000002 + /* * Get/Set bit fields */ @@ -59,6 +69,9 @@ struct vas_user_win_ref { struct pid *pid; /* PID of owner */ struct pid *tgid; /* Thread group ID of owner */ struct mm_struct *mm; /* Linux process mm_struct */ + struct mutex mmap_mutex; /* protects paste address mmap() */ + /* with DLPAR close/open windows */ + struct vm_area_struct *vma; /* Save VMA and used in DLPAR ops */ }; /* @@ -67,6 +80,7 @@ struct vas_user_win_ref { struct vas_window { u32 winid; u32 wcreds_max; /* Window credits */ + u32 status; /* Window status used in OS */ enum vas_cop_type cop; struct vas_user_win_ref task_ref; char *dbgname; diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h index 1faff0be1111..f0a4cf01e85c 100644 --- a/arch/powerpc/include/asm/vdso/gettimeofday.h +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -2,74 +2,9 @@ #ifndef _ASM_POWERPC_VDSO_GETTIMEOFDAY_H #define _ASM_POWERPC_VDSO_GETTIMEOFDAY_H +#ifndef __ASSEMBLY__ + #include - -#ifdef __ASSEMBLY__ - -#include - -/* - * The macros sets two stack frames, one for the caller and one for the callee - * because there are no requirement for the caller to set a stack frame when - * calling VDSO so it may have omitted to set one, especially on PPC64 - */ - -.macro cvdso_call funct - .cfi_startproc - PPC_STLU r1, -PPC_MIN_STKFRM(r1) - mflr r0 - .cfi_register lr, r0 - PPC_STLU r1, -PPC_MIN_STKFRM(r1) - PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) -#ifdef __powerpc64__ - PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) -#endif - get_datapage r5 - addi r5, r5, VDSO_DATA_OFFSET - bl DOTSYM(\funct) - PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) -#ifdef __powerpc64__ - PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) -#endif - cmpwi r3, 0 - mtlr r0 - .cfi_restore lr - addi r1, r1, 2 * PPC_MIN_STKFRM - crclr so - beqlr+ - crset so - neg r3, r3 - blr - .cfi_endproc -.endm - -.macro cvdso_call_time funct - .cfi_startproc - PPC_STLU r1, -PPC_MIN_STKFRM(r1) - mflr r0 - .cfi_register lr, r0 - PPC_STLU r1, -PPC_MIN_STKFRM(r1) - PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) -#ifdef __powerpc64__ - PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) -#endif - get_datapage r4 - addi r4, r4, VDSO_DATA_OFFSET - bl DOTSYM(\funct) - PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) -#ifdef __powerpc64__ - PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) -#endif - crclr so - mtlr r0 - .cfi_restore lr - addi r1, r1, 2 * PPC_MIN_STKFRM - blr - .cfi_endproc -.endm - -#else - #include #include #include diff --git a/arch/powerpc/include/uapi/asm/elf.h b/arch/powerpc/include/uapi/asm/elf.h index 860c59291bfc..308857123a08 100644 --- a/arch/powerpc/include/uapi/asm/elf.h +++ b/arch/powerpc/include/uapi/asm/elf.h @@ -289,12 +289,4 @@ typedef elf_fpreg_t elf_vsrreghalf_t32[ELF_NVSRHALFREG]; /* Keep this the last entry. */ #define R_PPC64_NUM 253 -/* There's actually a third entry here, but it's unused */ -struct ppc64_opd_entry -{ - unsigned long funcaddr; - unsigned long r2; -}; - - #endif /* _UAPI_ASM_POWERPC_ELF_H */ diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h index 82488b1e7276..17439925045c 100644 --- a/arch/powerpc/include/uapi/asm/papr_pdsm.h +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h @@ -116,6 +116,22 @@ struct nd_papr_pdsm_health { }; }; +/* Flags for injecting specific smart errors */ +#define PDSM_SMART_INJECT_HEALTH_FATAL (1 << 0) +#define PDSM_SMART_INJECT_BAD_SHUTDOWN (1 << 1) + +struct nd_papr_pdsm_smart_inject { + union { + struct { + /* One or more of PDSM_SMART_INJECT_ */ + __u32 flags; + __u8 fatal_enable; + __u8 unsafe_shutdown_enable; + }; + __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; + }; +}; + /* * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel * via 'nd_cmd_pkg.nd_command' member of the ioctl struct @@ -123,12 +139,14 @@ struct nd_papr_pdsm_health { enum papr_pdsm { PAPR_PDSM_MIN = 0x0, PAPR_PDSM_HEALTH, + PAPR_PDSM_SMART_INJECT, PAPR_PDSM_MAX, }; /* Maximal union that can hold all possible payload types */ union nd_pdsm_payload { struct nd_papr_pdsm_health health; + struct nd_papr_pdsm_smart_inject smart_inject; __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; } __packed; diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 4d7829399570..4ddd161aef32 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -194,8 +194,8 @@ targets += prom_init_check clean-files := vmlinux.lds # Force dependency (incbin is bad) -$(obj)/vdso32_wrapper.o : $(obj)/vdso32/vdso32.so.dbg -$(obj)/vdso64_wrapper.o : $(obj)/vdso64/vdso64.so.dbg +$(obj)/vdso32_wrapper.o : $(obj)/vdso/vdso32.so.dbg +$(obj)/vdso64_wrapper.o : $(obj)/vdso/vdso64.so.dbg # for cleaning -subdir- += vdso32 vdso64 +subdir- += vdso diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 7582f3e3a330..eec536aef83a 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -94,7 +94,7 @@ int main(void) OFFSET(TASK_CPU, task_struct, thread_info.cpu); #endif -#ifdef CONFIG_LIVEPATCH +#ifdef CONFIG_LIVEPATCH_64 OFFSET(TI_livepatch_sp, thread_info, livepatch_sp); #endif diff --git a/arch/powerpc/kernel/early_32.c b/arch/powerpc/kernel/early_32.c index ef2ad4945904..03f1135ef64f 100644 --- a/arch/powerpc/kernel/early_32.c +++ b/arch/powerpc/kernel/early_32.c @@ -8,7 +8,6 @@ #include #include #include -#include /* * We're called here very early in the boot. diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 4fdb7c77fda1..65562c4a0a69 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1643,9 +1643,11 @@ int __init setup_fadump(void) if (fw_dump.ops->fadump_process(&fw_dump) < 0) fadump_invalidate_release_mem(); } - /* Initialize the kernel dump memory structure for FAD registration. */ - else if (fw_dump.reserve_dump_area_size) + /* Initialize the kernel dump memory structure and register with f/w */ + else if (fw_dump.reserve_dump_area_size) { fw_dump.ops->fadump_init_mem_struct(&fw_dump); + register_fadump(); + } /* * In case of panic, fadump is triggered via ppc_panic_event() @@ -1657,7 +1659,12 @@ int __init setup_fadump(void) return 1; } -subsys_initcall(setup_fadump); +/* + * Use subsys_initcall_sync() here because there is dependency with + * crash_save_vmcoreinfo_init(), which mush run first to ensure vmcoreinfo initialization + * is done before regisering with f/w. + */ +subsys_initcall_sync(setup_fadump); #else /* !CONFIG_PRESERVE_FA_DUMP */ /* Scan the Firmware Assisted dump configuration details. */ diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index b6c6d1de5fd5..088f500896c7 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -53,8 +53,8 @@ * This is all going to change RSN when we add bi_recs....... -- Dan */ __HEAD -_ENTRY(_stext); -_ENTRY(_start); +_GLOBAL(_stext); +_GLOBAL(_start); mr r31,r3 /* save device tree ptr */ @@ -82,19 +82,19 @@ turn_on_mmu: */ . = 0xc0 crit_save: -_ENTRY(crit_r10) +_GLOBAL(crit_r10) .space 4 -_ENTRY(crit_r11) +_GLOBAL(crit_r11) .space 4 -_ENTRY(crit_srr0) +_GLOBAL(crit_srr0) .space 4 -_ENTRY(crit_srr1) +_GLOBAL(crit_srr1) .space 4 -_ENTRY(crit_r1) +_GLOBAL(crit_r1) .space 4 -_ENTRY(crit_dear) +_GLOBAL(crit_dear) .space 4 -_ENTRY(crit_esr) +_GLOBAL(crit_esr) .space 4 /* diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index b73a56466903..f15cb9fdb692 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -52,8 +52,8 @@ * */ __HEAD -_ENTRY(_stext); -_ENTRY(_start); +_GLOBAL(_stext); +_GLOBAL(_start); /* * Reserve a word at a fixed location to store the address * of abatron_pteptrs diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 0d073b9fd52c..0b05f2be66b9 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -53,8 +53,8 @@ #define PAGE_SHIFT_8M 23 __HEAD -_ENTRY(_stext); -_ENTRY(_start); +_GLOBAL(_stext); +_GLOBAL(_start); /* MPC8xx * This port was done on an MBX board with an 860. Right now I only diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index b876ef8c70a7..6c739beb938c 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -50,16 +50,13 @@ mtspr SPRN_DBAT##n##L,RB __HEAD - .stabs "arch/powerpc/kernel/",N_SO,0,0,0f - .stabs "head_book3s_32.S",N_SO,0,0,0f -0: -_ENTRY(_stext); +_GLOBAL(_stext); /* * _start is defined this way because the XCOFF loader in the OpenFirmware * on the powermac expects the entry point to be a procedure descriptor. */ -_ENTRY(_start); +_GLOBAL(_start); /* * These are here for legacy reasons, the kernel used to * need to look like a coff function entry for the pmac @@ -504,14 +501,12 @@ DataLoadTLBMiss: lwz r0,0(r2) /* get linux-style pte */ andc. r1,r1,r0 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ - /* - * NOTE! We are assuming this is not an SMP system, otherwise - * we would need to update the pte atomically with lwarx/stwcx. - */ /* Convert linux-style PTE to low word of PPC-style PTE */ rlwinm r1,r0,32-9,30,30 /* _PAGE_RW -> PP msb */ rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ + rlwimi r1,r0,32-3,24,24 /* _PAGE_RW -> _PAGE_DIRTY */ rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */ + xori r1,r1,_PAGE_DIRTY /* clear dirty when not rw */ ori r1,r1,0xe04 /* clear out reserved bits */ andc r1,r0,r1 /* PP = user? rw? 1: 3: 0 */ BEGIN_FTR_SECTION @@ -586,10 +581,6 @@ DataStoreTLBMiss: lwz r0,0(r2) /* get linux-style pte */ andc. r1,r1,r0 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ - /* - * NOTE! We are assuming this is not an SMP system, otherwise - * we would need to update the pte atomically with lwarx/stwcx. - */ /* Convert linux-style PTE to low word of PPC-style PTE */ rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ li r1,0xe06 /* clear out reserved bits & PP msb */ @@ -784,7 +775,7 @@ relocate_kernel: * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5. */ -_ENTRY(copy_and_flush) +_GLOBAL(copy_and_flush) addi r5,r5,-4 addi r6,r6,-4 4: li r0,L1_CACHE_BYTES/4 @@ -1082,7 +1073,7 @@ BEGIN_MMU_FTR_SECTION END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) blr -_ENTRY(update_bats) +_GLOBAL(update_bats) lis r4, 1f@h ori r4, r4, 1f@l tophys(r4, r4) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index ac2b4dcf5fd3..f0db4f52bc00 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -54,8 +54,8 @@ * */ __HEAD -_ENTRY(_stext); -_ENTRY(_start); +_GLOBAL(_stext); +_GLOBAL(_start); /* * Reserve a word at a fixed location to store the address * of abatron_pteptrs @@ -154,7 +154,7 @@ _ENTRY(_start); * if needed */ -_ENTRY(__early_start) +_GLOBAL(__early_start) LOAD_REG_ADDR_PIC(r20, kernstart_virt_addr) lwz r20,0(r20) diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 7cd6ce3ec423..784ea3289c84 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -5,7 +5,6 @@ #include #include /* for show_regs */ -#include #include #include #include diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 2cf31a97126c..752fb182eacb 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -64,7 +64,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 2503dd4713b9..18173199b79d 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -24,23 +24,12 @@ #include #include #include -#include #include "setup.h" -static void machine_check_process_queued_event(struct irq_work *work); -static void machine_check_ue_irq_work(struct irq_work *work); static void machine_check_ue_event(struct machine_check_event *evt); static void machine_process_ue_event(struct work_struct *work); -static struct irq_work mce_event_process_work = { - .func = machine_check_process_queued_event, -}; - -static struct irq_work mce_ue_event_irq_work = { - .func = machine_check_ue_irq_work, -}; - static DECLARE_WORK(mce_ue_event_work, machine_process_ue_event); static BLOCKING_NOTIFIER_HEAD(mce_notifier_list); @@ -89,6 +78,13 @@ static void mce_set_error_info(struct machine_check_event *mce, } } +void mce_irq_work_queue(void) +{ + /* Raise decrementer interrupt */ + arch_irq_work_raise(); + set_mce_pending_irq_work(); +} + /* * Decode and save high level MCE information into per cpu buffer which * is an array of machine_check_event structure. @@ -217,7 +213,7 @@ void release_mce_event(void) get_mce_event(NULL, true); } -static void machine_check_ue_irq_work(struct irq_work *work) +static void machine_check_ue_work(void) { schedule_work(&mce_ue_event_work); } @@ -239,7 +235,7 @@ static void machine_check_ue_event(struct machine_check_event *evt) evt, sizeof(*evt)); /* Queue work to process this event later. */ - irq_work_queue(&mce_ue_event_irq_work); + mce_irq_work_queue(); } /* @@ -249,7 +245,6 @@ void machine_check_queue_event(void) { int index; struct machine_check_event evt; - unsigned long msr; if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) return; @@ -263,20 +258,7 @@ void machine_check_queue_event(void) memcpy(&local_paca->mce_info->mce_event_queue[index], &evt, sizeof(evt)); - /* - * Queue irq work to process this event later. Before - * queuing the work enable translation for non radix LPAR, - * as irq_work_queue may try to access memory outside RMO - * region. - */ - if (!radix_enabled() && firmware_has_feature(FW_FEATURE_LPAR)) { - msr = mfmsr(); - mtmsr(msr | MSR_IR | MSR_DR); - irq_work_queue(&mce_event_process_work); - mtmsr(msr); - } else { - irq_work_queue(&mce_event_process_work); - } + mce_irq_work_queue(); } void mce_common_process_ue(struct pt_regs *regs, @@ -338,7 +320,7 @@ static void machine_process_ue_event(struct work_struct *work) * process pending MCE event from the mce event queue. This function will be * called during syscall exit. */ -static void machine_check_process_queued_event(struct irq_work *work) +static void machine_check_process_queued_event(void) { int index; struct machine_check_event *evt; @@ -363,6 +345,27 @@ static void machine_check_process_queued_event(struct irq_work *work) } } +void set_mce_pending_irq_work(void) +{ + local_paca->mce_pending_irq_work = 1; +} + +void clear_mce_pending_irq_work(void) +{ + local_paca->mce_pending_irq_work = 0; +} + +void mce_run_irq_context_handlers(void) +{ + if (unlikely(local_paca->mce_pending_irq_work)) { + if (ppc_md.machine_check_log_err) + ppc_md.machine_check_log_err(); + machine_check_process_queued_event(); + machine_check_ue_work(); + clear_mce_pending_irq_work(); + } +} + void machine_check_print_event_info(struct machine_check_event *evt, bool user_mode, bool in_guest) { @@ -401,14 +404,14 @@ void machine_check_print_event_info(struct machine_check_event *evt, static const char *mc_ra_types[] = { "Indeterminate", "Instruction fetch (bad)", - "Instruction fetch (foreign)", + "Instruction fetch (foreign/control memory)", "Page table walk ifetch (bad)", - "Page table walk ifetch (foreign)", + "Page table walk ifetch (foreign/control memory)", "Load (bad)", "Store (bad)", "Page table walk Load/Store (bad)", - "Page table walk Load/Store (foreign)", - "Load/Store (foreign)", + "Page table walk Load/Store (foreign/control memory)", + "Load/Store (foreign/control memory)", }; static const char *mc_link_types[] = { "Indeterminate", diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index a491ad481d85..a0432ef46967 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -18,6 +18,7 @@ #include #include #include +#include /* Count how many different relocations (different symbol, different addend) */ @@ -174,15 +175,25 @@ static uint32_t do_plt_call(void *location, entry++; } - entry->jump[0] = PPC_RAW_LIS(_R12, PPC_HA(val)); - entry->jump[1] = PPC_RAW_ADDI(_R12, _R12, PPC_LO(val)); - entry->jump[2] = PPC_RAW_MTCTR(_R12); - entry->jump[3] = PPC_RAW_BCTR(); + if (patch_instruction(&entry->jump[0], ppc_inst(PPC_RAW_LIS(_R12, PPC_HA(val))))) + return 0; + if (patch_instruction(&entry->jump[1], ppc_inst(PPC_RAW_ADDI(_R12, _R12, PPC_LO(val))))) + return 0; + if (patch_instruction(&entry->jump[2], ppc_inst(PPC_RAW_MTCTR(_R12)))) + return 0; + if (patch_instruction(&entry->jump[3], ppc_inst(PPC_RAW_BCTR()))) + return 0; pr_debug("Initialized plt for 0x%x at %p\n", val, entry); return (uint32_t)entry; } +static int patch_location_16(uint32_t *loc, u16 value) +{ + loc = PTR_ALIGN_DOWN(loc, sizeof(u32)); + return patch_instruction(loc, ppc_inst((*loc & 0xffff0000) | value)); +} + int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, @@ -216,37 +227,42 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, case R_PPC_ADDR16_LO: /* Low half of the symbol */ - *(uint16_t *)location = value; + if (patch_location_16(location, PPC_LO(value))) + return -EFAULT; break; case R_PPC_ADDR16_HI: /* Higher half of the symbol */ - *(uint16_t *)location = (value >> 16); + if (patch_location_16(location, PPC_HI(value))) + return -EFAULT; break; case R_PPC_ADDR16_HA: - /* Sign-adjusted lower 16 bits: PPC ELF ABI says: - (((x >> 16) + ((x & 0x8000) ? 1 : 0))) & 0xFFFF. - This is the same, only sane. - */ - *(uint16_t *)location = (value + 0x8000) >> 16; + if (patch_location_16(location, PPC_HA(value))) + return -EFAULT; break; case R_PPC_REL24: if ((int)(value - (uint32_t)location) < -0x02000000 - || (int)(value - (uint32_t)location) >= 0x02000000) + || (int)(value - (uint32_t)location) >= 0x02000000) { value = do_plt_call(location, value, sechdrs, module); + if (!value) + return -EFAULT; + } /* Only replace bits 2 through 26 */ pr_debug("REL24 value = %08X. location = %08X\n", value, (uint32_t)location); pr_debug("Location before: %08X.\n", *(uint32_t *)location); - *(uint32_t *)location - = (*(uint32_t *)location & ~0x03fffffc) + value = (*(uint32_t *)location & ~0x03fffffc) | ((value - (uint32_t)location) & 0x03fffffc); + + if (patch_instruction(location, ppc_inst(value))) + return -EFAULT; + pr_debug("Location after: %08X.\n", *(uint32_t *)location); pr_debug("ie. jump to %08X+%08X = %08X\n", diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 5d77d3f5fbb5..794720530442 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -32,20 +33,13 @@ #ifdef PPC64_ELF_ABI_v2 -/* An address is simply the address of the function. */ -typedef unsigned long func_desc_t; - static func_desc_t func_desc(unsigned long addr) { - return addr; -} -static unsigned long func_addr(unsigned long addr) -{ - return addr; -} -static unsigned long stub_func_addr(func_desc_t func) -{ - return func; + func_desc_t desc = { + .addr = addr, + }; + + return desc; } /* PowerPC64 specific values for the Elf64_Sym st_other field. */ @@ -63,20 +57,9 @@ static unsigned int local_entry_offset(const Elf64_Sym *sym) } #else -/* An address is address of the OPD entry, which contains address of fn. */ -typedef struct ppc64_opd_entry func_desc_t; - static func_desc_t func_desc(unsigned long addr) { - return *(struct ppc64_opd_entry *)addr; -} -static unsigned long func_addr(unsigned long addr) -{ - return func_desc(addr).funcaddr; -} -static unsigned long stub_func_addr(func_desc_t func) -{ - return func.funcaddr; + return *(struct func_desc *)addr; } static unsigned int local_entry_offset(const Elf64_Sym *sym) { @@ -93,6 +76,16 @@ void *dereference_module_function_descriptor(struct module *mod, void *ptr) } #endif +static unsigned long func_addr(unsigned long addr) +{ + return func_desc(addr).addr; +} + +static unsigned long stub_func_addr(func_desc_t func) +{ + return func.addr; +} + #define STUB_MAGIC 0x73747562 /* stub */ /* Like PPC32, we need little trampolines to do > 24-bit jumps (into @@ -187,7 +180,7 @@ static int relacmp(const void *_x, const void *_y) static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, const Elf64_Shdr *sechdrs) { - /* One extra reloc so it's always 0-funcaddr terminated */ + /* One extra reloc so it's always 0-addr terminated */ unsigned long relocs = 1; unsigned i; @@ -277,6 +270,12 @@ static Elf64_Sym *find_dot_toc(Elf64_Shdr *sechdrs, return NULL; } +bool module_init_section(const char *name) +{ + /* We don't handle .init for the moment: always return false. */ + return false; +} + int module_frob_arch_sections(Elf64_Ehdr *hdr, Elf64_Shdr *sechdrs, char *secstrings, @@ -286,7 +285,6 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, /* Find .toc and .stubs sections, symtab and strtab */ for (i = 1; i < hdr->e_shnum; i++) { - char *p; if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0) me->arch.stubs_section = i; else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0) { @@ -298,10 +296,6 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, dedotify_versions((void *)hdr + sechdrs[i].sh_offset, sechdrs[i].sh_size); - /* We don't handle .init for the moment: rename to _init */ - while ((p = strstr(secstrings + sechdrs[i].sh_name, ".init"))) - p[0] = '_'; - if (sechdrs[i].sh_type == SHT_SYMTAB) dedotify((void *)hdr + sechdrs[i].sh_offset, sechdrs[i].sh_size / sizeof(Elf64_Sym), @@ -428,7 +422,7 @@ static inline int create_stub(const Elf64_Shdr *sechdrs, if (is_mprofile_ftrace_call(name)) return create_ftrace_stub(entry, addr, me); - for (i = 0; i < sizeof(ppc64_stub_insns) / sizeof(u32); i++) { + for (i = 0; i < ARRAY_SIZE(ppc64_stub_insns); i++) { if (patch_instruction(&entry->jump[i], ppc_inst(ppc64_stub_insns[i]))) return 0; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 3d30d40a0e9c..86c4f009563d 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -352,6 +352,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node, be32_to_cpu(intserv[found_thread])); boot_cpuid = found; + // Pass the boot CPU's hard CPU id back to our caller + *((u32 *)data) = be32_to_cpu(intserv[found_thread]); + /* * PAPR defines "logical" PVR values for cpus that * meet various levels of the architecture: @@ -388,9 +391,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node, cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT; else if (!dt_cpu_ftrs_in_use()) cur_cpu_spec->cpu_features |= CPU_FTR_SMT; - allocate_paca(boot_cpuid); #endif - set_hard_smp_processor_id(found, be32_to_cpu(intserv[found_thread])); return 0; } @@ -714,6 +715,7 @@ static inline void save_fscr_to_task(void) {} void __init early_init_devtree(void *params) { + u32 boot_cpu_hwid; phys_addr_t limit; DBG(" -> early_init_devtree(%px)\n", params); @@ -790,8 +792,6 @@ void __init early_init_devtree(void *params) * FIXME .. and the initrd too? */ move_device_tree(); - allocate_paca_ptrs(); - DBG("Scanning CPUs ...\n"); dt_cpu_ftrs_scan(); @@ -799,7 +799,7 @@ void __init early_init_devtree(void *params) /* Retrieve CPU related informations from the flat tree * (altivec support, boot CPU ID, ...) */ - of_scan_flat_dt(early_init_dt_scan_cpus, NULL); + of_scan_flat_dt(early_init_dt_scan_cpus, &boot_cpu_hwid); if (boot_cpuid < 0) { printk("Failed to identify boot CPU !\n"); BUG(); @@ -816,6 +816,11 @@ void __init early_init_devtree(void *params) mmu_early_init_devtree(); + // NB. paca is not installed until later in early_setup() + allocate_paca_ptrs(); + allocate_paca(boot_cpuid); + set_hard_smp_processor_id(boot_cpuid, boot_cpu_hwid); + #ifdef CONFIG_PPC_POWERNV /* Scan and build the list of machine check recoverable ranges */ of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL); diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index b8be1d6668b5..f15bc78caf71 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -841,7 +841,7 @@ static const struct user_regset_view user_ppc_compat_view = { const struct user_regset_view *task_user_regset_view(struct task_struct *task) { - if (IS_ENABLED(CONFIG_PPC64) && test_tsk_thread_flag(task, TIF_32BIT)) + if (IS_ENABLED(CONFIG_COMPAT) && is_tsk_32bit_task(task)) return &user_ppc_compat_view; return &user_ppc_native_view; } diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index c43f77e2ac31..55742ef1f991 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -22,7 +22,6 @@ #include #include -#include #include #define CREATE_TRACE_POINTS @@ -445,4 +444,10 @@ void __init pt_regs_check(void) * real registers. */ BUILD_BUG_ON(PT_DSCR < sizeof(struct user_pt_regs) / sizeof(unsigned long)); + +#ifdef PPC64_ELF_ABI_v1 + BUILD_BUG_ON(!IS_ENABLED(CONFIG_HAVE_FUNCTION_DESCRIPTORS)); +#else + BUILD_BUG_ON(IS_ENABLED(CONFIG_HAVE_FUNCTION_DESCRIPTORS)); +#endif } diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S index 02d4719bf43a..232e4549defe 100644 --- a/arch/powerpc/kernel/reloc_64.S +++ b/arch/powerpc/kernel/reloc_64.S @@ -8,8 +8,10 @@ #include RELA = 7 -RELACOUNT = 0x6ffffff9 +RELASZ = 8 +RELAENT = 9 R_PPC64_RELATIVE = 22 +R_PPC64_UADDR64 = 43 /* * r3 = desired final address of kernel @@ -25,29 +27,38 @@ _GLOBAL(relocate) add r9,r9,r12 /* r9 has runtime addr of .rela.dyn section */ ld r10,(p_st - 0b)(r12) add r10,r10,r12 /* r10 has runtime addr of _stext */ + ld r13,(p_sym - 0b)(r12) + add r13,r13,r12 /* r13 has runtime addr of .dynsym */ /* - * Scan the dynamic section for the RELA and RELACOUNT entries. + * Scan the dynamic section for the RELA, RELASZ and RELAENT entries. */ li r7,0 li r8,0 -1: ld r6,0(r11) /* get tag */ +.Ltags: + ld r6,0(r11) /* get tag */ cmpdi r6,0 - beq 4f /* end of list */ + beq .Lend_of_list /* end of list */ cmpdi r6,RELA bne 2f ld r7,8(r11) /* get RELA pointer in r7 */ - b 3f -2: addis r6,r6,(-RELACOUNT)@ha - cmpdi r6,RELACOUNT@l + b 4f +2: cmpdi r6,RELASZ bne 3f - ld r8,8(r11) /* get RELACOUNT value in r8 */ -3: addi r11,r11,16 - b 1b -4: cmpdi r7,0 /* check we have both RELA and RELACOUNT */ + ld r8,8(r11) /* get RELASZ value in r8 */ + b 4f +3: cmpdi r6,RELAENT + bne 4f + ld r12,8(r11) /* get RELAENT value in r12 */ +4: addi r11,r11,16 + b .Ltags +.Lend_of_list: + cmpdi r7,0 /* check we have RELA, RELASZ, RELAENT */ cmpdi cr1,r8,0 - beq 6f - beq cr1,6f + beq .Lout + beq cr1,.Lout + cmpdi r12,0 + beq .Lout /* * Work out linktime address of _stext and hence the @@ -62,23 +73,39 @@ _GLOBAL(relocate) /* * Run through the list of relocations and process the - * R_PPC64_RELATIVE ones. + * R_PPC64_RELATIVE and R_PPC64_UADDR64 ones. */ + divd r8,r8,r12 /* RELASZ / RELAENT */ mtctr r8 -5: ld r0,8(9) /* ELF64_R_TYPE(reloc->r_info) */ +.Lrels: ld r0,8(r9) /* ELF64_R_TYPE(reloc->r_info) */ cmpdi r0,R_PPC64_RELATIVE - bne 6f + bne .Luaddr64 ld r6,0(r9) /* reloc->r_offset */ ld r0,16(r9) /* reloc->r_addend */ + b .Lstore +.Luaddr64: + srdi r14,r0,32 /* ELF64_R_SYM(reloc->r_info) */ + clrldi r0,r0,32 + cmpdi r0,R_PPC64_UADDR64 + bne .Lnext + ld r6,0(r9) + ld r0,16(r9) + mulli r14,r14,24 /* 24 == sizeof(elf64_sym) */ + add r14,r14,r13 /* elf64_sym[ELF64_R_SYM] */ + ld r14,8(r14) + add r0,r0,r14 +.Lstore: add r0,r0,r3 stdx r0,r7,r6 - addi r9,r9,24 - bdnz 5b - -6: blr +.Lnext: + add r9,r9,r12 + bdnz .Lrels +.Lout: + blr .balign 8 p_dyn: .8byte __dynamic_start - 0b p_rela: .8byte __rela_dyn_start - 0b +p_sym: .8byte __dynamic_symtab - 0b p_st: .8byte _stext - 0b diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 733e6ef36758..1f42aabbbab3 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1313,6 +1313,12 @@ int __init early_init_dt_scan_rtas(unsigned long node, entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL); sizep = of_get_flat_dt_prop(node, "rtas-size", NULL); +#ifdef CONFIG_PPC64 + /* need this feature to decide the crashkernel offset */ + if (of_get_flat_dt_prop(node, "ibm,hypertas-functions", NULL)) + powerpc_firmware_features |= FW_FEATURE_LPAR; +#endif + if (basep && entryp && sizep) { rtas.base = *basep; rtas.entry = *entryp; diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index e159d4093d98..d96fd14bd7c9 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -747,14 +747,29 @@ static int count_cache_flush_get(void *data, u64 *val) return 0; } +static int link_stack_flush_get(void *data, u64 *val) +{ + if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) + *val = 0; + else + *val = 1; + + return 0; +} + DEFINE_DEBUGFS_ATTRIBUTE(fops_count_cache_flush, count_cache_flush_get, count_cache_flush_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_link_stack_flush, link_stack_flush_get, + count_cache_flush_set, "%llu\n"); static __init int count_cache_flush_debugfs_init(void) { debugfs_create_file_unsafe("count_cache_flush", 0600, arch_debugfs_dir, NULL, &fops_count_cache_flush); + debugfs_create_file_unsafe("link_stack_flush", 0600, + arch_debugfs_dir, NULL, + &fops_link_stack_flush); return 0; } device_initcall(count_cache_flush_debugfs_init); diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c index a0a78aba2083..1ee4640a2641 100644 --- a/arch/powerpc/kernel/secvar-sysfs.c +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -26,15 +26,18 @@ static ssize_t format_show(struct kobject *kobj, struct kobj_attribute *attr, const char *format; node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend"); - if (!of_device_is_available(node)) - return -ENODEV; + if (!of_device_is_available(node)) { + rc = -ENODEV; + goto out; + } rc = of_property_read_string(node, "format", &format); if (rc) - return rc; + goto out; rc = sprintf(buf, "%s\n", format); +out: of_node_put(node); return rc; diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index f8da937df918..518ae5aa9410 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -456,8 +456,8 @@ void __init smp_setup_cpu_maps(void) intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len); if (intserv) { - DBG(" ibm,ppc-interrupt-server#s -> %d threads\n", - nthreads); + DBG(" ibm,ppc-interrupt-server#s -> %lu threads\n", + (len / sizeof(int))); } else { DBG(" no ibm,ppc-interrupt-server#s -> 1 thread\n"); intserv = of_get_property(dn, "reg", &len); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index be8577ac9397..e547066a06aa 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -67,7 +67,6 @@ #include #include #include -#include #include "setup.h" diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index d1e1fc0acbea..73d483b07ff3 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -936,11 +936,11 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, * descriptor is the entry address of signal and the second * entry is the TOC value we need to use. */ - func_descr_t __user *funct_desc_ptr = - (func_descr_t __user *) ksig->ka.sa.sa_handler; + struct func_desc __user *ptr = + (struct func_desc __user *)ksig->ka.sa.sa_handler; - err |= get_user(regs->ctr, &funct_desc_ptr->entry); - err |= get_user(regs->gpr[2], &funct_desc_ptr->toc); + err |= get_user(regs->ctr, &ptr->addr); + err |= get_user(regs->gpr[2], &ptr->toc); } /* enter the signal handler in native-endian mode */ diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index b7fd6a72aa76..de0f6f09a5dd 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -57,7 +57,6 @@ #include #include #include -#include #include #include #include @@ -716,7 +715,7 @@ void smp_send_stop(void) } #endif /* CONFIG_NMI_IPI */ -struct task_struct *current_set[NR_CPUS]; +static struct task_struct *current_set[NR_CPUS]; static void smp_store_cpu_info(int id) { diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index 825931e400df..c4f5b4ce926f 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -35,7 +35,6 @@ #include #include #include -#include static inline long do_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index 8e83d19fe8fa..828d0f4106d2 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -29,7 +29,6 @@ #include #include #include -#include #include "setup.h" diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index cd0b8b71ecdd..f5cbfe5efd25 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -69,7 +69,7 @@ #include #include #include -#include +#include /* powerpc clocksource/clockevent code */ @@ -107,7 +107,12 @@ struct clock_event_device decrementer_clockevent = { }; EXPORT_SYMBOL(decrementer_clockevent); -DEFINE_PER_CPU(u64, decrementers_next_tb); +/* + * This always puts next_tb beyond now, so the clock event will never fire + * with the usual comparison, no need for a separate test for stopped. + */ +#define DEC_CLOCKEVENT_STOPPED ~0ULL +DEFINE_PER_CPU(u64, decrementers_next_tb) = DEC_CLOCKEVENT_STOPPED; EXPORT_SYMBOL_GPL(decrementers_next_tb); static DEFINE_PER_CPU(struct clock_event_device, decrementers); @@ -582,8 +587,9 @@ void timer_rearm_host_dec(u64 now) local_paca->irq_happened |= PACA_IRQ_DEC; } else { now = *next_tb - now; - if (now <= decrementer_max) - set_dec_or_work(now); + if (now > decrementer_max) + now = decrementer_max; + set_dec_or_work(now); } } EXPORT_SYMBOL_GPL(timer_rearm_host_dec); @@ -638,14 +644,13 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) if (test_irq_work_pending()) { clear_irq_work_pending(); + mce_run_irq_context_handlers(); irq_work_run(); } now = get_tb(); if (now >= *next_tb) { - *next_tb = ~(u64)0; - if (evt->event_handler) - evt->event_handler(evt); + evt->event_handler(evt); __this_cpu_inc(irq_stat.timer_irqs_event); } else { now = *next_tb - now; @@ -664,9 +669,6 @@ EXPORT_SYMBOL(timer_interrupt); #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST void timer_broadcast_interrupt(void) { - u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); - - *next_tb = ~(u64)0; tick_receive_broadcast(); __this_cpu_inc(irq_stat.broadcast_irqs_event); } @@ -892,7 +894,9 @@ static int decrementer_set_next_event(unsigned long evt, static int decrementer_shutdown(struct clock_event_device *dev) { - decrementer_set_next_event(decrementer_max, dev); + __this_cpu_write(decrementers_next_tb, DEC_CLOCKEVENT_STOPPED); + set_dec_or_work(decrementer_max); + return 0; } diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 3beecc32940b..5a0f023a26e9 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -443,7 +443,8 @@ restore_gprs: REST_GPR(0, r7) /* GPR0 */ REST_GPRS(2, 4, r7) /* GPR2-4 */ - REST_GPRS(8, 31, r7) /* GPR8-31 */ + REST_GPRS(8, 12, r7) /* GPR8-12 */ + REST_GPRS(14, 31, r7) /* GPR14-31 */ /* Load up PPR and DSCR here so we don't run with user values for long */ mtspr SPRN_DSCR, r5 @@ -479,18 +480,24 @@ restore_gprs: REST_GPR(6, r7) /* - * Store r1 and r5 on the stack so that we can access them after we - * clear MSR RI. + * Store user r1 and r5 and r13 on the stack (in the unused save + * areas / compiler reserved areas), so that we can access them after + * we clear MSR RI. */ REST_GPR(5, r7) std r5, -8(r1) - ld r5, GPR1(r7) + ld r5, GPR13(r7) std r5, -16(r1) + ld r5, GPR1(r7) + std r5, -24(r1) REST_GPR(7, r7) - /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */ + /* Stash the stack pointer away for use after recheckpoint */ + std r1, PACAR1(r13) + + /* Clear MSR RI since we are about to clobber r13. EE is already off */ li r5, 0 mtmsrd r5, 1 @@ -501,9 +508,9 @@ restore_gprs: * until we turn MSR RI back on. */ - SET_SCRATCH0(r1) ld r5, -8(r1) - ld r1, -16(r1) + ld r13, -16(r1) + ld r1, -24(r1) /* Commit register state as checkpointed state: */ TRECHKPT @@ -519,9 +526,9 @@ restore_gprs: */ GET_PACA(r13) - GET_SCRATCH0(r1) + ld r1, PACAR1(r13) - /* R1 is restored, so we are recoverable again. EE is still off */ + /* R13, R1 is restored, so we are recoverable again. EE is still off */ li r4, MSR_RI mtmsrd r4, 1 diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile index 858503775c58..542aa7a8b2b4 100644 --- a/arch/powerpc/kernel/trace/Makefile +++ b/arch/powerpc/kernel/trace/Makefile @@ -8,13 +8,13 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) endif -obj32-$(CONFIG_FUNCTION_TRACER) += ftrace_32.o -obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64.o +obj32-$(CONFIG_FUNCTION_TRACER) += ftrace_mprofile.o ifdef CONFIG_MPROFILE_KERNEL -obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_mprofile.o +obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_mprofile.o else obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_pg.o endif +obj-$(CONFIG_FUNCTION_TRACER) += ftrace_low.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 80b6285769f2..4ee04aacf9f1 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -22,7 +22,6 @@ #include #include -#include #include #include #include @@ -910,30 +909,30 @@ int __init ftrace_dyn_arch_init(void) extern void ftrace_graph_call(void); extern void ftrace_graph_stub(void); -int ftrace_enable_ftrace_graph_caller(void) +static int ftrace_modify_ftrace_graph_caller(bool enable) { unsigned long ip = (unsigned long)(&ftrace_graph_call); unsigned long addr = (unsigned long)(&ftrace_graph_caller); unsigned long stub = (unsigned long)(&ftrace_graph_stub); ppc_inst_t old, new; - old = ftrace_call_replace(ip, stub, 0); - new = ftrace_call_replace(ip, addr, 0); + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS)) + return 0; + + old = ftrace_call_replace(ip, enable ? stub : addr, 0); + new = ftrace_call_replace(ip, enable ? addr : stub, 0); return ftrace_modify_code(ip, old, new); } +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_ftrace_graph_caller(true); +} + int ftrace_disable_ftrace_graph_caller(void) { - unsigned long ip = (unsigned long)(&ftrace_graph_call); - unsigned long addr = (unsigned long)(&ftrace_graph_caller); - unsigned long stub = (unsigned long)(&ftrace_graph_stub); - ppc_inst_t old, new; - - old = ftrace_call_replace(ip, addr, 0); - new = ftrace_call_replace(ip, stub, 0); - - return ftrace_modify_code(ip, old, new); + return ftrace_modify_ftrace_graph_caller(false); } /* @@ -944,6 +943,7 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp) { unsigned long return_hooker; + int bit; if (unlikely(ftrace_graph_is_dead())) goto out; @@ -951,13 +951,27 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, if (unlikely(atomic_read(¤t->tracing_graph_pause))) goto out; + bit = ftrace_test_recursion_trylock(ip, parent); + if (bit < 0) + goto out; + return_hooker = ppc_function_entry(return_to_handler); if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp)) parent = return_hooker; + + ftrace_test_recursion_unlock(bit); out: return parent; } + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs) +{ + fregs->regs.link = prepare_ftrace_return(parent_ip, ip, fregs->regs.gpr[1]); +} +#endif #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ #ifdef PPC64_ELF_ABI_v1 diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S deleted file mode 100644 index 0a02c0cb12d9..000000000000 --- a/arch/powerpc/kernel/trace/ftrace_32.S +++ /dev/null @@ -1,187 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Split from entry_32.S - */ - -#include -#include -#include -#include -#include -#include -#include - -_GLOBAL(mcount) -_GLOBAL(_mcount) - /* - * It is required that _mcount on PPC32 must preserve the - * link register. But we have r12 to play with. We use r12 - * to push the return address back to the caller of mcount - * into the ctr register, restore the link register and - * then jump back using the ctr register. - */ - mflr r12 - mtctr r12 - mtlr r0 - bctr -EXPORT_SYMBOL(_mcount) - -_GLOBAL(ftrace_caller) - MCOUNT_SAVE_FRAME - /* r3 ends up with link register */ - subi r3, r3, MCOUNT_INSN_SIZE - lis r5,function_trace_op@ha - lwz r5,function_trace_op@l(r5) - li r6, 0 -.globl ftrace_call -ftrace_call: - bl ftrace_stub - nop - MCOUNT_RESTORE_FRAME -ftrace_caller_common: -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -.globl ftrace_graph_call -ftrace_graph_call: - b ftrace_graph_stub -_GLOBAL(ftrace_graph_stub) -#endif - /* old link register ends up in ctr reg */ - bctr - - -_GLOBAL(ftrace_stub) - blr - -_GLOBAL(ftrace_regs_caller) - /* Save the original return address in A's stack frame */ - stw r0,LRSAVE(r1) - - /* Create our stack frame + pt_regs */ - stwu r1,-INT_FRAME_SIZE(r1) - - /* Save all gprs to pt_regs */ - stw r0, GPR0(r1) - stmw r2, GPR2(r1) - - /* Save previous stack pointer (r1) */ - addi r8, r1, INT_FRAME_SIZE - stw r8, GPR1(r1) - - /* Load special regs for save below */ - mfmsr r8 - mfctr r9 - mfxer r10 - mfcr r11 - - /* Get the _mcount() call site out of LR */ - mflr r7 - /* Save it as pt_regs->nip */ - stw r7, _NIP(r1) - /* Save the read LR in pt_regs->link */ - stw r0, _LINK(r1) - - lis r3,function_trace_op@ha - lwz r5,function_trace_op@l(r3) - - /* Calculate ip from nip-4 into r3 for call below */ - subi r3, r7, MCOUNT_INSN_SIZE - - /* Put the original return address in r4 as parent_ip */ - mr r4, r0 - - /* Save special regs */ - stw r8, _MSR(r1) - stw r9, _CTR(r1) - stw r10, _XER(r1) - stw r11, _CCR(r1) - - /* Load &pt_regs in r6 for call below */ - addi r6, r1, STACK_FRAME_OVERHEAD - - /* ftrace_call(r3, r4, r5, r6) */ -.globl ftrace_regs_call -ftrace_regs_call: - bl ftrace_stub - nop - - /* Load ctr with the possibly modified NIP */ - lwz r3, _NIP(r1) - mtctr r3 - - /* Restore gprs */ - lmw r2, GPR2(r1) - - /* Restore possibly modified LR */ - lwz r0, _LINK(r1) - mtlr r0 - - /* Pop our stack frame */ - addi r1, r1, INT_FRAME_SIZE - - b ftrace_caller_common - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -_GLOBAL(ftrace_graph_caller) - stwu r1,-48(r1) - stw r3, 12(r1) - stw r4, 16(r1) - stw r5, 20(r1) - stw r6, 24(r1) - stw r7, 28(r1) - stw r8, 32(r1) - stw r9, 36(r1) - stw r10,40(r1) - - addi r5, r1, 48 - mfctr r4 /* ftrace_caller has moved local addr here */ - stw r4, 44(r1) - mflr r3 /* ftrace_caller has restored LR from stack */ - subi r4, r4, MCOUNT_INSN_SIZE - - bl prepare_ftrace_return - nop - - /* - * prepare_ftrace_return gives us the address we divert to. - * Change the LR in the callers stack frame to this. - */ - stw r3,52(r1) - mtlr r3 - lwz r0,44(r1) - mtctr r0 - - lwz r3, 12(r1) - lwz r4, 16(r1) - lwz r5, 20(r1) - lwz r6, 24(r1) - lwz r7, 28(r1) - lwz r8, 32(r1) - lwz r9, 36(r1) - lwz r10,40(r1) - - addi r1, r1, 48 - - bctr - -_GLOBAL(return_to_handler) - /* need to save return values */ - stwu r1, -32(r1) - stw r3, 20(r1) - stw r4, 16(r1) - stw r31, 12(r1) - mr r31, r1 - - bl ftrace_return_to_handler - nop - - /* return value has real return address */ - mtlr r3 - - lwz r3, 20(r1) - lwz r4, 16(r1) - lwz r31,12(r1) - lwz r1, 0(r1) - - /* Jump back to real return address */ - blr -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/trace/ftrace_64.S b/arch/powerpc/kernel/trace/ftrace_low.S similarity index 85% rename from arch/powerpc/kernel/trace/ftrace_64.S rename to arch/powerpc/kernel/trace/ftrace_low.S index 25e5b9e47c06..0bddf1fa6636 100644 --- a/arch/powerpc/kernel/trace/ftrace_64.S +++ b/arch/powerpc/kernel/trace/ftrace_low.S @@ -10,6 +10,7 @@ #include #include +#ifdef CONFIG_PPC64 .pushsection ".tramp.ftrace.text","aw",@progbits; .globl ftrace_tramp_text ftrace_tramp_text: @@ -21,6 +22,7 @@ ftrace_tramp_text: ftrace_tramp_init: .space 64 .popsection +#endif _GLOBAL(mcount) _GLOBAL(_mcount) @@ -33,6 +35,7 @@ EXPORT_SYMBOL(_mcount) #ifdef CONFIG_FUNCTION_GRAPH_TRACER _GLOBAL(return_to_handler) /* need to save return values */ +#ifdef CONFIG_PPC64 std r4, -32(r1) std r3, -24(r1) /* save TOC */ @@ -46,6 +49,11 @@ _GLOBAL(return_to_handler) * Switch to our TOC to run inside the core kernel. */ ld r2, PACATOC(r13) +#else + stwu r1, -16(r1) + stw r3, 8(r1) + stw r4, 12(r1) +#endif bl ftrace_return_to_handler nop @@ -53,11 +61,17 @@ _GLOBAL(return_to_handler) /* return value has real return address */ mtlr r3 +#ifdef CONFIG_PPC64 ld r1, 0(r1) ld r4, -32(r1) ld r3, -24(r1) ld r2, -16(r1) ld r31, -8(r1) +#else + lwz r3, 8(r1) + lwz r4, 12(r1) + addi r1, r1, 16 +#endif /* Jump back to real return address */ blr diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S similarity index 62% rename from arch/powerpc/kernel/trace/ftrace_64_mprofile.S rename to arch/powerpc/kernel/trace/ftrace_mprofile.S index d636fc755f60..4fa23e260cab 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S @@ -32,52 +32,71 @@ * Our job is to save the register state into a struct pt_regs (on the stack) * and then arrange for the ftrace function to be called. */ -_GLOBAL(ftrace_regs_caller) - /* Save the original return address in A's stack frame */ - std r0,LRSAVE(r1) - +.macro ftrace_regs_entry allregs /* Create our stack frame + pt_regs */ - stdu r1,-SWITCH_FRAME_SIZE(r1) + PPC_STLU r1,-SWITCH_FRAME_SIZE(r1) /* Save all gprs to pt_regs */ SAVE_GPR(0, r1) - SAVE_GPRS(2, 11, r1) + SAVE_GPRS(3, 10, r1) +#ifdef CONFIG_PPC64 + /* Save the original return address in A's stack frame */ + std r0, LRSAVE+SWITCH_FRAME_SIZE(r1) /* Ok to continue? */ lbz r3, PACA_FTRACE_ENABLED(r13) cmpdi r3, 0 beq ftrace_no_trace +#endif - SAVE_GPRS(12, 31, r1) + .if \allregs == 1 + SAVE_GPR(2, r1) + SAVE_GPRS(11, 31, r1) + .else +#ifdef CONFIG_LIVEPATCH_64 + SAVE_GPR(14, r1) +#endif + .endif /* Save previous stack pointer (r1) */ addi r8, r1, SWITCH_FRAME_SIZE - std r8, GPR1(r1) + PPC_STL r8, GPR1(r1) + .if \allregs == 1 /* Load special regs for save below */ mfmsr r8 mfctr r9 mfxer r10 mfcr r11 + .else + /* Clear MSR to flag as ftrace_caller versus frace_regs_caller */ + li r8, 0 + .endif /* Get the _mcount() call site out of LR */ mflr r7 /* Save it as pt_regs->nip */ - std r7, _NIP(r1) + PPC_STL r7, _NIP(r1) /* Save the read LR in pt_regs->link */ - std r0, _LINK(r1) + PPC_STL r0, _LINK(r1) +#ifdef CONFIG_PPC64 /* Save callee's TOC in the ABI compliant location */ - std r2, 24(r1) + std r2, STK_GOT(r1) ld r2,PACATOC(r13) /* get kernel TOC in r2 */ addis r3,r2,function_trace_op@toc@ha addi r3,r3,function_trace_op@toc@l ld r5,0(r3) - -#ifdef CONFIG_LIVEPATCH - mr r14,r7 /* remember old NIP */ +#else + lis r3,function_trace_op@ha + lwz r5,function_trace_op@l(r3) #endif + +#ifdef CONFIG_LIVEPATCH_64 + mr r14, r7 /* remember old NIP */ +#endif + /* Calculate ip from nip-4 into r3 for call below */ subi r3, r7, MCOUNT_INSN_SIZE @@ -85,59 +104,77 @@ _GLOBAL(ftrace_regs_caller) mr r4, r0 /* Save special regs */ - std r8, _MSR(r1) - std r9, _CTR(r1) - std r10, _XER(r1) - std r11, _CCR(r1) + PPC_STL r8, _MSR(r1) + .if \allregs == 1 + PPC_STL r9, _CTR(r1) + PPC_STL r10, _XER(r1) + PPC_STL r11, _CCR(r1) + .endif /* Load &pt_regs in r6 for call below */ - addi r6, r1 ,STACK_FRAME_OVERHEAD + addi r6, r1, STACK_FRAME_OVERHEAD +.endm +.macro ftrace_regs_exit allregs + /* Load ctr with the possibly modified NIP */ + PPC_LL r3, _NIP(r1) + mtctr r3 + +#ifdef CONFIG_LIVEPATCH_64 + cmpd r14, r3 /* has NIP been altered? */ +#endif + + /* Restore gprs */ + .if \allregs == 1 + REST_GPRS(2, 31, r1) + .else + REST_GPRS(3, 10, r1) +#ifdef CONFIG_LIVEPATCH_64 + REST_GPR(14, r1) +#endif + .endif + + /* Restore possibly modified LR */ + PPC_LL r0, _LINK(r1) + mtlr r0 + +#ifdef CONFIG_PPC64 + /* Restore callee's TOC */ + ld r2, STK_GOT(r1) +#endif + + /* Pop our stack frame */ + addi r1, r1, SWITCH_FRAME_SIZE + +#ifdef CONFIG_LIVEPATCH_64 + /* Based on the cmpd above, if the NIP was altered handle livepatch */ + bne- livepatch_handler +#endif + bctr /* jump after _mcount site */ +.endm + +_GLOBAL(ftrace_regs_caller) + ftrace_regs_entry 1 /* ftrace_call(r3, r4, r5, r6) */ .globl ftrace_regs_call ftrace_regs_call: bl ftrace_stub nop + ftrace_regs_exit 1 - /* Load ctr with the possibly modified NIP */ - ld r3, _NIP(r1) - mtctr r3 -#ifdef CONFIG_LIVEPATCH - cmpd r14, r3 /* has NIP been altered? */ -#endif - - /* Restore gprs */ - REST_GPR(0, r1) - REST_GPRS(2, 31, r1) - - /* Restore possibly modified LR */ - ld r0, _LINK(r1) - mtlr r0 - - /* Restore callee's TOC */ - ld r2, 24(r1) - - /* Pop our stack frame */ - addi r1, r1, SWITCH_FRAME_SIZE - -#ifdef CONFIG_LIVEPATCH - /* Based on the cmpd above, if the NIP was altered handle livepatch */ - bne- livepatch_handler -#endif - -ftrace_caller_common: -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -.globl ftrace_graph_call -ftrace_graph_call: - b ftrace_graph_stub -_GLOBAL(ftrace_graph_stub) -#endif - - bctr /* jump after _mcount site */ +_GLOBAL(ftrace_caller) + ftrace_regs_entry 0 + /* ftrace_call(r3, r4, r5, r6) */ +.globl ftrace_call +ftrace_call: + bl ftrace_stub + nop + ftrace_regs_exit 0 _GLOBAL(ftrace_stub) blr +#ifdef CONFIG_PPC64 ftrace_no_trace: mflr r3 mtctr r3 @@ -145,68 +182,9 @@ ftrace_no_trace: addi r1, r1, SWITCH_FRAME_SIZE mtlr r0 bctr +#endif -_GLOBAL(ftrace_caller) - /* Save the original return address in A's stack frame */ - std r0, LRSAVE(r1) - - /* Create our stack frame + pt_regs */ - stdu r1, -SWITCH_FRAME_SIZE(r1) - - /* Save all gprs to pt_regs */ - SAVE_GPRS(3, 10, r1) - - lbz r3, PACA_FTRACE_ENABLED(r13) - cmpdi r3, 0 - beq ftrace_no_trace - - /* Get the _mcount() call site out of LR */ - mflr r7 - std r7, _NIP(r1) - - /* Save callee's TOC in the ABI compliant location */ - std r2, 24(r1) - ld r2, PACATOC(r13) /* get kernel TOC in r2 */ - - addis r3, r2, function_trace_op@toc@ha - addi r3, r3, function_trace_op@toc@l - ld r5, 0(r3) - - /* Calculate ip from nip-4 into r3 for call below */ - subi r3, r7, MCOUNT_INSN_SIZE - - /* Put the original return address in r4 as parent_ip */ - mr r4, r0 - - /* Set pt_regs to NULL */ - li r6, 0 - - /* ftrace_call(r3, r4, r5, r6) */ -.globl ftrace_call -ftrace_call: - bl ftrace_stub - nop - - ld r3, _NIP(r1) - mtctr r3 - - /* Restore gprs */ - REST_GPRS(3, 10, r1) - - /* Restore callee's TOC */ - ld r2, 24(r1) - - /* Pop our stack frame */ - addi r1, r1, SWITCH_FRAME_SIZE - - /* Reload original LR */ - ld r0, LRSAVE(r1) - mtlr r0 - - /* Handle function_graph or go back */ - b ftrace_caller_common - -#ifdef CONFIG_LIVEPATCH +#ifdef CONFIG_LIVEPATCH_64 /* * This function runs in the mcount context, between two functions. As * such it can only clobber registers which are volatile and used in @@ -273,55 +251,3 @@ livepatch_handler: /* Return to original caller of live patched function */ blr #endif /* CONFIG_LIVEPATCH */ - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -_GLOBAL(ftrace_graph_caller) - stdu r1, -112(r1) - /* with -mprofile-kernel, parameter regs are still alive at _mcount */ - std r10, 104(r1) - std r9, 96(r1) - std r8, 88(r1) - std r7, 80(r1) - std r6, 72(r1) - std r5, 64(r1) - std r4, 56(r1) - std r3, 48(r1) - - /* Save callee's TOC in the ABI compliant location */ - std r2, 24(r1) - ld r2, PACATOC(r13) /* get kernel TOC in r2 */ - - addi r5, r1, 112 - mfctr r4 /* ftrace_caller has moved local addr here */ - std r4, 40(r1) - mflr r3 /* ftrace_caller has restored LR from stack */ - subi r4, r4, MCOUNT_INSN_SIZE - - bl prepare_ftrace_return - nop - - /* - * prepare_ftrace_return gives us the address we divert to. - * Change the LR to this. - */ - mtlr r3 - - ld r0, 40(r1) - mtctr r0 - ld r10, 104(r1) - ld r9, 96(r1) - ld r8, 88(r1) - ld r7, 80(r1) - ld r6, 72(r1) - ld r5, 64(r1) - ld r4, 56(r1) - ld r3, 48(r1) - - /* Restore callee's TOC */ - ld r2, 24(r1) - - addi r1, r1, 112 - mflr r0 - std r0, LRSAVE(r1) - bctr -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/vdso32/.gitignore b/arch/powerpc/kernel/vdso/.gitignore similarity index 72% rename from arch/powerpc/kernel/vdso32/.gitignore rename to arch/powerpc/kernel/vdso/.gitignore index 824b863ec6bd..dd9bdd67758b 100644 --- a/arch/powerpc/kernel/vdso32/.gitignore +++ b/arch/powerpc/kernel/vdso/.gitignore @@ -1,3 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only vdso32.lds vdso32.so.dbg +vdso64.lds +vdso64.so.dbg diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile new file mode 100644 index 000000000000..954974287ee7 --- /dev/null +++ b/arch/powerpc/kernel/vdso/Makefile @@ -0,0 +1,103 @@ +# SPDX-License-Identifier: GPL-2.0 + +# List of files in the vdso, has to be asm only for now + +ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN|R_PPC_REL24 +include $(srctree)/lib/vdso/Makefile + +obj-vdso32 = sigtramp32-32.o gettimeofday-32.o datapage-32.o cacheflush-32.o note-32.o getcpu-32.o +obj-vdso64 = sigtramp64-64.o gettimeofday-64.o datapage-64.o cacheflush-64.o note-64.o getcpu-64.o + +ifneq ($(c-gettimeofday-y),) + CFLAGS_vgettimeofday-32.o += -include $(c-gettimeofday-y) + CFLAGS_vgettimeofday-32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) + CFLAGS_vgettimeofday-32.o += $(call cc-option, -fno-stack-protector) + CFLAGS_vgettimeofday-32.o += -DDISABLE_BRANCH_PROFILING + CFLAGS_vgettimeofday-32.o += -ffreestanding -fasynchronous-unwind-tables + CFLAGS_REMOVE_vgettimeofday-32.o = $(CC_FLAGS_FTRACE) + CFLAGS_REMOVE_vgettimeofday-32.o += -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc + CFLAGS_vgettimeofday-64.o += -include $(c-gettimeofday-y) + CFLAGS_vgettimeofday-64.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) + CFLAGS_vgettimeofday-64.o += $(call cc-option, -fno-stack-protector) + CFLAGS_vgettimeofday-64.o += -DDISABLE_BRANCH_PROFILING + CFLAGS_vgettimeofday-64.o += -ffreestanding -fasynchronous-unwind-tables + CFLAGS_REMOVE_vgettimeofday-64.o = $(CC_FLAGS_FTRACE) +# Go prior to 1.16.x assumes r30 is not clobbered by any VDSO code. That used to be true +# by accident when the VDSO was hand-written asm code, but may not be now that the VDSO is +# compiler generated. To avoid breaking Go tell GCC not to use r30. Impact on code +# generation is minimal, it will just use r29 instead. + CFLAGS_vgettimeofday-64.o += $(call cc-option, -ffixed-r30) +endif + +# Build rules + +ifdef CROSS32_COMPILE + VDSOCC := $(CROSS32_COMPILE)gcc +else + VDSOCC := $(CC) +endif + +targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday-32.o +obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) +targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o +obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) + +GCOV_PROFILE := n +KCOV_INSTRUMENT := n +UBSAN_SANITIZE := n +KASAN_SANITIZE := n + +ccflags-y := -shared -fno-common -fno-builtin -nostdlib -Wl,--hash-style=both + +CC32FLAGS := -Wl,-soname=linux-vdso32.so.1 -m32 +AS32FLAGS := -D__VDSO32__ -s + +CC64FLAGS := -Wl,-soname=linux-vdso64.so.1 +AS64FLAGS := -D__VDSO64__ -s + +targets += vdso32.lds +CPPFLAGS_vdso32.lds += -P -C -Upowerpc +targets += vdso64.lds +CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) + +# link rule for the .so file, .lds has to be first +$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o FORCE + $(call if_changed,vdso32ld_and_check) +$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o FORCE + $(call if_changed,vdso64ld_and_check) + +# assembly rules for the .S files +$(obj-vdso32): %-32.o: %.S FORCE + $(call if_changed_dep,vdso32as) +$(obj)/vgettimeofday-32.o: %-32.o: %.c FORCE + $(call if_changed_dep,vdso32cc) +$(obj-vdso64): %-64.o: %.S FORCE + $(call if_changed_dep,vdso64as) +$(obj)/vgettimeofday-64.o: %-64.o: %.c FORCE + $(call if_changed_dep,cc_o_c) + +# Generate VDSO offsets using helper script +gen-vdso32sym := $(srctree)/$(src)/gen_vdso32_offsets.sh +quiet_cmd_vdso32sym = VDSO32SYM $@ + cmd_vdso32sym = $(NM) $< | $(gen-vdso32sym) | LC_ALL=C sort > $@ +gen-vdso64sym := $(srctree)/$(src)/gen_vdso64_offsets.sh +quiet_cmd_vdso64sym = VDSO64SYM $@ + cmd_vdso64sym = $(NM) $< | $(gen-vdso64sym) | LC_ALL=C sort > $@ + +include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE + $(call if_changed,vdso32sym) +include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE + $(call if_changed,vdso64sym) + +# actual build commands +quiet_cmd_vdso32ld_and_check = VDSO32L $@ + cmd_vdso32ld_and_check = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) ; $(cmd_vdso_check) +quiet_cmd_vdso32as = VDSO32A $@ + cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) $(AS32FLAGS) -c -o $@ $< +quiet_cmd_vdso32cc = VDSO32C $@ + cmd_vdso32cc = $(VDSOCC) $(c_flags) $(CC32FLAGS) -c -o $@ $< + +quiet_cmd_vdso64ld_and_check = VDSO64L $@ + cmd_vdso64ld_and_check = $(VDSOCC) $(c_flags) $(CC64FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) ; $(cmd_vdso_check) +quiet_cmd_vdso64as = VDSO64A $@ + cmd_vdso64as = $(VDSOCC) $(a_flags) $(CC64FLAGS) $(AS64FLAGS) -c -o $@ $< diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso/cacheflush.S similarity index 95% rename from arch/powerpc/kernel/vdso32/cacheflush.S rename to arch/powerpc/kernel/vdso/cacheflush.S index f340e82d1981..d4e43ab2d5df 100644 --- a/arch/powerpc/kernel/vdso32/cacheflush.S +++ b/arch/powerpc/kernel/vdso/cacheflush.S @@ -46,7 +46,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) add r8,r8,r5 /* ensure we get enough */ #ifdef CONFIG_PPC64 lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10) - srw. r8,r8,r9 /* compute line count */ + PPC_SRL. r8,r8,r9 /* compute line count */ #else srwi. r8, r8, L1_CACHE_SHIFT mr r7, r6 @@ -72,7 +72,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) subf r8,r6,r4 /* compute length */ add r8,r8,r5 lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10) - srw. r8,r8,r9 /* compute line count */ + PPC_SRL. r8,r8,r9 /* compute line count */ crclr cr0*4+so beqlr /* nothing to do? */ #endif diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso/datapage.S similarity index 91% rename from arch/powerpc/kernel/vdso32/datapage.S rename to arch/powerpc/kernel/vdso/datapage.S index 65244416ab94..db8e167f0166 100644 --- a/arch/powerpc/kernel/vdso32/datapage.S +++ b/arch/powerpc/kernel/vdso/datapage.S @@ -30,11 +30,15 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) mr. r4,r3 get_datapage r3 mtlr r12 +#ifdef __powerpc64__ + addi r3,r3,CFG_SYSCALL_MAP64 +#else addi r3,r3,CFG_SYSCALL_MAP32 +#endif + crclr cr0*4+so beqlr li r0,NR_syscalls stw r0,0(r4) - crclr cr0*4+so blr .cfi_endproc V_FUNCTION_END(__kernel_get_syscall_map) @@ -49,8 +53,10 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) mflr r12 .cfi_register lr,r12 get_datapage r3 +#ifndef __powerpc64__ lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3) - lwz r3,CFG_TB_TICKS_PER_SEC(r3) +#endif + PPC_LL r3,CFG_TB_TICKS_PER_SEC(r3) mtlr r12 crclr cr0*4+so blr diff --git a/arch/powerpc/kernel/vdso32/gen_vdso_offsets.sh b/arch/powerpc/kernel/vdso/gen_vdso32_offsets.sh similarity index 100% rename from arch/powerpc/kernel/vdso32/gen_vdso_offsets.sh rename to arch/powerpc/kernel/vdso/gen_vdso32_offsets.sh diff --git a/arch/powerpc/kernel/vdso64/gen_vdso_offsets.sh b/arch/powerpc/kernel/vdso/gen_vdso64_offsets.sh similarity index 100% rename from arch/powerpc/kernel/vdso64/gen_vdso_offsets.sh rename to arch/powerpc/kernel/vdso/gen_vdso64_offsets.sh diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso/getcpu.S similarity index 95% rename from arch/powerpc/kernel/vdso32/getcpu.S rename to arch/powerpc/kernel/vdso/getcpu.S index ff5e214fec41..8e08ccf19062 100644 --- a/arch/powerpc/kernel/vdso32/getcpu.S +++ b/arch/powerpc/kernel/vdso/getcpu.S @@ -19,8 +19,8 @@ V_FUNCTION_BEGIN(__kernel_getcpu) .cfi_startproc mfspr r5,SPRN_SPRG_VDSO_READ - cmpwi cr0,r3,0 - cmpwi cr1,r4,0 + PPC_LCMPI cr0,r3,0 + PPC_LCMPI cr1,r4,0 clrlwi r6,r5,16 rlwinm r7,r5,16,31-15,31-0 beq cr0,1f diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S similarity index 60% rename from arch/powerpc/kernel/vdso32/gettimeofday.S rename to arch/powerpc/kernel/vdso/gettimeofday.S index d21d08140a5e..eb9c81e1c218 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso/gettimeofday.S @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ /* - * Userland implementation of gettimeofday() for 32 bits processes in a - * ppc64 kernel for use in the vDSO + * Userland implementation of gettimeofday() for processes + * for use in the vDSO * * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org, * IBM Corp. @@ -12,7 +12,49 @@ #include #include #include -#include + +/* + * The macro sets two stack frames, one for the caller and one for the callee + * because there are no requirement for the caller to set a stack frame when + * calling VDSO so it may have omitted to set one, especially on PPC64 + */ + +.macro cvdso_call funct call_time=0 + .cfi_startproc + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + mflr r0 + .cfi_register lr, r0 + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) +#ifdef __powerpc64__ + PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) +#endif + get_datapage r5 + .ifeq \call_time + addi r5, r5, VDSO_DATA_OFFSET + .else + addi r4, r5, VDSO_DATA_OFFSET + .endif + bl DOTSYM(\funct) + PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) +#ifdef __powerpc64__ + PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) +#endif + .ifeq \call_time + cmpwi r3, 0 + .endif + mtlr r0 + .cfi_restore lr + addi r1, r1, 2 * PPC_MIN_STKFRM + crclr so + .ifeq \call_time + beqlr+ + crset so + neg r3, r3 + .endif + blr + .cfi_endproc +.endm .text /* @@ -41,9 +83,11 @@ V_FUNCTION_END(__kernel_clock_gettime) * int __kernel_clock_gettime64(clockid_t clock_id, struct __timespec64 *ts); * */ +#ifndef __powerpc64__ V_FUNCTION_BEGIN(__kernel_clock_gettime64) cvdso_call __c_kernel_clock_gettime64 V_FUNCTION_END(__kernel_clock_gettime64) +#endif /* * Exact prototype of clock_getres() @@ -63,12 +107,13 @@ V_FUNCTION_END(__kernel_clock_getres) * */ V_FUNCTION_BEGIN(__kernel_time) - cvdso_call_time __c_kernel_time + cvdso_call __c_kernel_time call_time=1 V_FUNCTION_END(__kernel_time) /* Routines for restoring integer registers, called by the compiler. */ /* Called with r11 pointing to the stack header word of the caller of the */ /* function, just beyond the end of the integer restore area. */ +#ifndef __powerpc64__ _GLOBAL(_restgpr_31_x) _GLOBAL(_rest32gpr_31_x) lwz r0,4(r11) @@ -76,3 +121,4 @@ _GLOBAL(_rest32gpr_31_x) mtlr r0 mr r1,r11 blr +#endif diff --git a/arch/powerpc/kernel/vdso32/note.S b/arch/powerpc/kernel/vdso/note.S similarity index 100% rename from arch/powerpc/kernel/vdso32/note.S rename to arch/powerpc/kernel/vdso/note.S diff --git a/arch/powerpc/kernel/vdso32/sigtramp.S b/arch/powerpc/kernel/vdso/sigtramp32.S similarity index 100% rename from arch/powerpc/kernel/vdso32/sigtramp.S rename to arch/powerpc/kernel/vdso/sigtramp32.S diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso/sigtramp64.S similarity index 100% rename from arch/powerpc/kernel/vdso64/sigtramp.S rename to arch/powerpc/kernel/vdso/sigtramp64.S diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso/vdso32.lds.S similarity index 100% rename from arch/powerpc/kernel/vdso32/vdso32.lds.S rename to arch/powerpc/kernel/vdso/vdso32.lds.S diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso/vdso64.lds.S similarity index 100% rename from arch/powerpc/kernel/vdso64/vdso64.lds.S rename to arch/powerpc/kernel/vdso/vdso64.lds.S diff --git a/arch/powerpc/kernel/vdso32/vgettimeofday.c b/arch/powerpc/kernel/vdso/vgettimeofday.c similarity index 70% rename from arch/powerpc/kernel/vdso32/vgettimeofday.c rename to arch/powerpc/kernel/vdso/vgettimeofday.c index 65fb03fb1731..55a287c9a736 100644 --- a/arch/powerpc/kernel/vdso32/vgettimeofday.c +++ b/arch/powerpc/kernel/vdso/vgettimeofday.c @@ -2,8 +2,22 @@ /* * Powerpc userspace implementations of gettimeofday() and similar. */ +#include #include +#ifdef __powerpc64__ +int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts, + const struct vdso_data *vd) +{ + return __cvdso_clock_gettime_data(vd, clock, ts); +} + +int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res, + const struct vdso_data *vd) +{ + return __cvdso_clock_getres_data(vd, clock_id, res); +} +#else int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts, const struct vdso_data *vd) { @@ -16,18 +30,19 @@ int __c_kernel_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts, return __cvdso_clock_gettime_data(vd, clock, ts); } +int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res, + const struct vdso_data *vd) +{ + return __cvdso_clock_getres_time32_data(vd, clock_id, res); +} +#endif + int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz, const struct vdso_data *vd) { return __cvdso_gettimeofday_data(vd, tv, tz); } -int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res, - const struct vdso_data *vd) -{ - return __cvdso_clock_getres_time32_data(vd, clock_id, res); -} - __kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_data *vd) { return __cvdso_time_data(vd, time); diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile deleted file mode 100644 index 7d9a6fee0e3d..000000000000 --- a/arch/powerpc/kernel/vdso32/Makefile +++ /dev/null @@ -1,73 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 - -# List of files in the vdso, has to be asm only for now - -ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN|R_PPC_REL24 -include $(srctree)/lib/vdso/Makefile - -obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o - -ifneq ($(c-gettimeofday-y),) - CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) - CFLAGS_vgettimeofday.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) - CFLAGS_vgettimeofday.o += $(call cc-option, -fno-stack-protector) - CFLAGS_vgettimeofday.o += -DDISABLE_BRANCH_PROFILING - CFLAGS_vgettimeofday.o += -ffreestanding -fasynchronous-unwind-tables - CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -endif - -# Build rules - -ifdef CROSS32_COMPILE - VDSOCC := $(CROSS32_COMPILE)gcc -else - VDSOCC := $(CC) -endif - -CC32FLAGS := -ifdef CONFIG_PPC64 -CC32FLAGS += -m32 -KBUILD_CFLAGS := $(filter-out -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc,$(KBUILD_CFLAGS)) -endif - -targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday.o -obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) - -GCOV_PROFILE := n -KCOV_INSTRUMENT := n -UBSAN_SANITIZE := n -KASAN_SANITIZE := n - -ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ - -Wl,-soname=linux-vdso32.so.1 -Wl,--hash-style=both -asflags-y := -D__VDSO32__ -s - -obj-y += vdso32_wrapper.o -targets += vdso32.lds -CPPFLAGS_vdso32.lds += -P -C -Upowerpc - -# link rule for the .so file, .lds has to be first -$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday.o FORCE - $(call if_changed,vdso32ld_and_check) - -# assembly rules for the .S files -$(obj-vdso32): %.o: %.S FORCE - $(call if_changed_dep,vdso32as) -$(obj)/vgettimeofday.o: %.o: %.c FORCE - $(call if_changed_dep,vdso32cc) - -# Generate VDSO offsets using helper script -gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh -quiet_cmd_vdsosym = VDSOSYM $@ - cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ - -include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE - $(call if_changed,vdsosym) - -# actual build commands -quiet_cmd_vdso32ld_and_check = VDSO32L $@ - cmd_vdso32ld_and_check = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) ; $(cmd_vdso_check) -quiet_cmd_vdso32as = VDSO32A $@ - cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) -c -o $@ $< -quiet_cmd_vdso32cc = VDSO32C $@ - cmd_vdso32cc = $(VDSOCC) $(c_flags) $(CC32FLAGS) -c -o $@ $< diff --git a/arch/powerpc/kernel/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32_wrapper.S index 3f5ef035b0a9..10f92f265d51 100644 --- a/arch/powerpc/kernel/vdso32_wrapper.S +++ b/arch/powerpc/kernel/vdso32_wrapper.S @@ -7,7 +7,7 @@ .globl vdso32_start, vdso32_end .balign PAGE_SIZE vdso32_start: - .incbin "arch/powerpc/kernel/vdso32/vdso32.so.dbg" + .incbin "arch/powerpc/kernel/vdso/vdso32.so.dbg" .balign PAGE_SIZE vdso32_end: diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile deleted file mode 100644 index 3c5baaa6f1e7..000000000000 --- a/arch/powerpc/kernel/vdso64/Makefile +++ /dev/null @@ -1,56 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# List of files in the vdso, has to be asm only for now - -ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN|R_PPC_REL24 -include $(srctree)/lib/vdso/Makefile - -obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o - -ifneq ($(c-gettimeofday-y),) - CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) - CFLAGS_vgettimeofday.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) - CFLAGS_vgettimeofday.o += $(call cc-option, -fno-stack-protector) - CFLAGS_vgettimeofday.o += -DDISABLE_BRANCH_PROFILING - CFLAGS_vgettimeofday.o += -ffreestanding -fasynchronous-unwind-tables - CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -endif - -# Build rules - -targets := $(obj-vdso64) vdso64.so.dbg vgettimeofday.o -obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) - -GCOV_PROFILE := n -KCOV_INSTRUMENT := n -UBSAN_SANITIZE := n -KASAN_SANITIZE := n - -ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ - -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both - -# Go prior to 1.16.x assumes r30 is not clobbered by any VDSO code. That used to be true -# by accident when the VDSO was hand-written asm code, but may not be now that the VDSO is -# compiler generated. To avoid breaking Go tell GCC not to use r30. Impact on code -# generation is minimal, it will just use r29 instead. -ccflags-y += $(call cc-option, -ffixed-r30) - -asflags-y := -D__VDSO64__ -s - -targets += vdso64.lds -CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) - -# link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday.o FORCE - $(call if_changed,vdso64ld_and_check) - -# Generate VDSO offsets using helper script -gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh -quiet_cmd_vdsosym = VDSOSYM $@ - cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ - -include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE - $(call if_changed,vdsosym) - -# actual build commands -quiet_cmd_vdso64ld_and_check = VDSO64L $@ - cmd_vdso64ld_and_check = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^); $(cmd_vdso_check) diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S deleted file mode 100644 index 76c3c8cf8ece..000000000000 --- a/arch/powerpc/kernel/vdso64/cacheflush.S +++ /dev/null @@ -1,75 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * vDSO provided cache flush routines - * - * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), - * IBM Corp. - */ -#include -#include -#include -#include -#include - - .text - -/* - * Default "generic" version of __kernel_sync_dicache. - * - * void __kernel_sync_dicache(unsigned long start, unsigned long end) - * - * Flushes the data cache & invalidate the instruction cache for the - * provided range [start, end[ - */ -V_FUNCTION_BEGIN(__kernel_sync_dicache) - .cfi_startproc -BEGIN_FTR_SECTION - b 3f -END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) - mflr r12 - .cfi_register lr,r12 - get_datapage r10 - mtlr r12 - .cfi_restore lr - - lwz r7,CFG_DCACHE_BLOCKSZ(r10) - addi r5,r7,-1 - andc r6,r3,r5 /* round low to line bdy */ - subf r8,r6,r4 /* compute length */ - add r8,r8,r5 /* ensure we get enough */ - lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10) - srd. r8,r8,r9 /* compute line count */ - crclr cr0*4+so - beqlr /* nothing to do? */ - mtctr r8 -1: dcbst 0,r6 - add r6,r6,r7 - bdnz 1b - sync - -/* Now invalidate the instruction cache */ - - lwz r7,CFG_ICACHE_BLOCKSZ(r10) - addi r5,r7,-1 - andc r6,r3,r5 /* round low to line bdy */ - subf r8,r6,r4 /* compute length */ - add r8,r8,r5 - lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10) - srd. r8,r8,r9 /* compute line count */ - crclr cr0*4+so - beqlr /* nothing to do? */ - mtctr r8 -2: icbi 0,r6 - add r6,r6,r7 - bdnz 2b - isync - li r3,0 - blr -3: - crclr cr0*4+so - sync - isync - li r3,0 - blr - .cfi_endproc -V_FUNCTION_END(__kernel_sync_dicache) diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S deleted file mode 100644 index 00760dc69d68..000000000000 --- a/arch/powerpc/kernel/vdso64/datapage.S +++ /dev/null @@ -1,59 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Access to the shared data page by the vDSO & syscall map - * - * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp. - */ - -#include -#include -#include -#include -#include -#include - - .text - -/* - * void *__kernel_get_syscall_map(unsigned int *syscall_count) ; - * - * returns a pointer to the syscall map. the map is agnostic to the - * size of "long", unlike kernel bitops, it stores bits from top to - * bottom so that memory actually contains a linear bitmap - * check for syscall N by testing bit (0x80000000 >> (N & 0x1f)) of - * 32 bits int at N >> 5. - */ -V_FUNCTION_BEGIN(__kernel_get_syscall_map) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - mr r4,r3 - get_datapage r3 - mtlr r12 - addi r3,r3,CFG_SYSCALL_MAP64 - cmpldi cr0,r4,0 - crclr cr0*4+so - beqlr - li r0,NR_syscalls - stw r0,0(r4) - blr - .cfi_endproc -V_FUNCTION_END(__kernel_get_syscall_map) - - -/* - * void unsigned long __kernel_get_tbfreq(void); - * - * returns the timebase frequency in HZ - */ -V_FUNCTION_BEGIN(__kernel_get_tbfreq) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - get_datapage r3 - ld r3,CFG_TB_TICKS_PER_SEC(r3) - mtlr r12 - crclr cr0*4+so - blr - .cfi_endproc -V_FUNCTION_END(__kernel_get_tbfreq) diff --git a/arch/powerpc/kernel/vdso64/getcpu.S b/arch/powerpc/kernel/vdso64/getcpu.S deleted file mode 100644 index 12bbf236cdc4..000000000000 --- a/arch/powerpc/kernel/vdso64/getcpu.S +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * - * Copyright (C) IBM Corporation, 2012 - * - * Author: Anton Blanchard - */ -#include -#include - - .text -/* - * Exact prototype of getcpu - * - * int __kernel_getcpu(unsigned *cpu, unsigned *node); - * - */ -V_FUNCTION_BEGIN(__kernel_getcpu) - .cfi_startproc - mfspr r5,SPRN_SPRG_VDSO_READ - cmpdi cr0,r3,0 - cmpdi cr1,r4,0 - clrlwi r6,r5,16 - rlwinm r7,r5,16,31-15,31-0 - beq cr0,1f - stw r6,0(r3) -1: beq cr1,2f - stw r7,0(r4) -2: crclr cr0*4+so - li r3,0 /* always success */ - blr - .cfi_endproc -V_FUNCTION_END(__kernel_getcpu) diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S deleted file mode 100644 index d7a7bfb51081..000000000000 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ /dev/null @@ -1,58 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Userland implementation of gettimeofday() for 64 bits processes in a - * ppc64 kernel for use in the vDSO - * - * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), - * IBM Corp. - */ -#include -#include -#include -#include -#include -#include -#include - - .text -/* - * Exact prototype of gettimeofday - * - * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); - * - */ -V_FUNCTION_BEGIN(__kernel_gettimeofday) - cvdso_call __c_kernel_gettimeofday -V_FUNCTION_END(__kernel_gettimeofday) - - -/* - * Exact prototype of clock_gettime() - * - * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); - * - */ -V_FUNCTION_BEGIN(__kernel_clock_gettime) - cvdso_call __c_kernel_clock_gettime -V_FUNCTION_END(__kernel_clock_gettime) - - -/* - * Exact prototype of clock_getres() - * - * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); - * - */ -V_FUNCTION_BEGIN(__kernel_clock_getres) - cvdso_call __c_kernel_clock_getres -V_FUNCTION_END(__kernel_clock_getres) - -/* - * Exact prototype of time() - * - * time_t time(time *t); - * - */ -V_FUNCTION_BEGIN(__kernel_time) - cvdso_call_time __c_kernel_time -V_FUNCTION_END(__kernel_time) diff --git a/arch/powerpc/kernel/vdso64/note.S b/arch/powerpc/kernel/vdso64/note.S deleted file mode 100644 index dc2a509f7e8a..000000000000 --- a/arch/powerpc/kernel/vdso64/note.S +++ /dev/null @@ -1 +0,0 @@ -#include "../vdso32/note.S" diff --git a/arch/powerpc/kernel/vdso64/vgettimeofday.c b/arch/powerpc/kernel/vdso64/vgettimeofday.c deleted file mode 100644 index 5b5500058344..000000000000 --- a/arch/powerpc/kernel/vdso64/vgettimeofday.c +++ /dev/null @@ -1,29 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Powerpc userspace implementations of gettimeofday() and similar. - */ -#include -#include - -int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts, - const struct vdso_data *vd) -{ - return __cvdso_clock_gettime_data(vd, clock, ts); -} - -int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz, - const struct vdso_data *vd) -{ - return __cvdso_gettimeofday_data(vd, tv, tz); -} - -int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res, - const struct vdso_data *vd) -{ - return __cvdso_clock_getres_data(vd, clock_id, res); -} - -__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_data *vd) -{ - return __cvdso_time_data(vd, time); -} diff --git a/arch/powerpc/kernel/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64_wrapper.S index 1d56d81fe3b3..839d1a61411d 100644 --- a/arch/powerpc/kernel/vdso64_wrapper.S +++ b/arch/powerpc/kernel/vdso64_wrapper.S @@ -7,7 +7,7 @@ .globl vdso64_start, vdso64_end .balign PAGE_SIZE vdso64_start: - .incbin "arch/powerpc/kernel/vdso64/vdso64.so.dbg" + .incbin "arch/powerpc/kernel/vdso/vdso64.so.dbg" .balign PAGE_SIZE vdso64_end: diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 2bcca818136a..fe22d940412f 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -281,9 +281,7 @@ SECTIONS . = ALIGN(8); .dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET) { -#ifdef CONFIG_PPC32 __dynamic_symtab = .; -#endif *(.dynsym) } .dynstr : AT(ADDR(.dynstr) - LOAD_OFFSET) { *(.dynstr) } diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index 8b68d9f91a03..abf5897ae88c 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -134,11 +134,18 @@ void __init reserve_crashkernel(void) if (!crashk_res.start) { #ifdef CONFIG_PPC64 /* - * On 64bit we split the RMO in half but cap it at half of - * a small SLB (128MB) since the crash kernel needs to place - * itself and some stacks to be in the first segment. + * On the LPAR platform place the crash kernel to mid of + * RMA size (512MB or more) to ensure the crash kernel + * gets enough space to place itself and some stack to be + * in the first segment. At the same time normal kernel + * also get enough space to allocate memory for essential + * system resource in the first segment. Keep the crash + * kernel starts at 128MB offset on other platforms. */ - crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2)); + if (firmware_has_feature(FW_FEATURE_LPAR)) + crashk_res.start = ppc64_rma_size / 2; + else + crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2)); #else crashk_res.start = KDUMP_KERNELBASE; #endif diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c index 635b5fc30b53..6cc7793b8420 100644 --- a/arch/powerpc/kexec/core_64.c +++ b/arch/powerpc/kexec/core_64.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include @@ -291,7 +290,7 @@ static union thread_union kexec_stack __init_task_data = * For similar reasons to the stack above, the kexecing CPU needs to be on a * static PACA; we switch to kexec_paca. */ -struct paca_struct kexec_paca; +static struct paca_struct kexec_paca; /* Our assembly helper, in misc_64.S */ extern void kexec_sequence(void *newstack, unsigned long start, diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index c3e31fef0be1..1ae09992c9ea 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -228,7 +228,7 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) struct kvmppc_sid_map *map; struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); u16 sid_map_mask; - static int backwards_map = 0; + static int backwards_map; if (kvmppc_get_msr(vcpu) & MSR_PR) gvsid |= VSID_PR; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 213232914367..0aeb51738ca9 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -2112,7 +2112,7 @@ static const struct file_operations debugfs_htab_fops = { void kvmppc_mmu_debugfs_init(struct kvm *kvm) { - debugfs_create_file("htab", 0400, kvm->arch.debugfs_dir, kvm, + debugfs_create_file("htab", 0400, kvm->debugfs_dentry, kvm, &debugfs_htab_fops); } diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 8cebe5542256..e4ce2a35483f 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -1454,7 +1454,7 @@ static const struct file_operations debugfs_radix_fops = { void kvmhv_radix_debugfs_init(struct kvm *kvm) { - debugfs_create_file("radix", 0400, kvm->arch.debugfs_dir, kvm, + debugfs_create_file("radix", 0400, kvm->debugfs_dentry, kvm, &debugfs_radix_fops); } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 84c89f08ae9a..c886557638a1 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2767,20 +2767,17 @@ static const struct file_operations debugfs_timings_ops = { }; /* Create a debugfs directory for the vcpu */ -static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id) +static int kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry) { - char buf[16]; - struct kvm *kvm = vcpu->kvm; - - snprintf(buf, sizeof(buf), "vcpu%u", id); - vcpu->arch.debugfs_dir = debugfs_create_dir(buf, kvm->arch.debugfs_dir); - debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir, vcpu, + debugfs_create_file("timings", 0444, debugfs_dentry, vcpu, &debugfs_timings_ops); + return 0; } #else /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */ -static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id) +static int kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry) { + return 0; } #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */ @@ -2903,8 +2900,6 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) vcpu->arch.cpu_type = KVM_CPU_3S_64; kvmppc_sanity_check(vcpu); - debugfs_vcpu_init(vcpu, id); - return 0; } @@ -5223,7 +5218,6 @@ void kvmppc_free_host_rm_ops(void) static int kvmppc_core_init_vm_hv(struct kvm *kvm) { unsigned long lpcr, lpid; - char buf[32]; int ret; mutex_init(&kvm->arch.uvmem_lock); @@ -5356,15 +5350,14 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) kvm->arch.smt_mode = 1; kvm->arch.emul_smt_mode = 1; - /* - * Create a debugfs directory for the VM - */ - snprintf(buf, sizeof(buf), "vm%d", current->pid); - kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir); + return 0; +} + +static int kvmppc_arch_create_vm_debugfs_hv(struct kvm *kvm) +{ kvmppc_mmu_debugfs_init(kvm); if (radix_enabled()) kvmhv_radix_debugfs_init(kvm); - return 0; } @@ -5379,8 +5372,6 @@ static void kvmppc_free_vcores(struct kvm *kvm) static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) { - debugfs_remove_recursive(kvm->arch.debugfs_dir); - if (!cpu_has_feature(CPU_FTR_ARCH_300)) kvm_hv_vm_deactivated(); @@ -6042,6 +6033,8 @@ static struct kvmppc_ops kvm_ops_hv = { .svm_off = kvmhv_svm_off, .enable_dawr1 = kvmhv_enable_dawr1, .hash_v3_possible = kvmppc_hash_v3_possible, + .create_vcpu_debugfs = kvmppc_arch_create_vcpu_debugfs_hv, + .create_vm_debugfs = kvmppc_arch_create_vm_debugfs_hv, }; static int kvm_init_subcore_bitmap(void) @@ -6103,7 +6096,7 @@ static int kvmppc_book3s_init_hv(void) if (!cpu_has_feature(CPU_FTR_ARCH_300)) { r = kvm_init_subcore_bitmap(); if (r) - return r; + goto err; } /* @@ -6119,30 +6112,42 @@ static int kvmppc_book3s_init_hv(void) np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc"); if (!np) { pr_err("KVM-HV: Cannot determine method for accessing XICS\n"); - return -ENODEV; + r = -ENODEV; + goto err; } /* presence of intc confirmed - node can be dropped again */ of_node_put(np); } #endif - kvm_ops_hv.owner = THIS_MODULE; - kvmppc_hv_ops = &kvm_ops_hv; - init_default_hcalls(); init_vcore_lists(); r = kvmppc_mmu_hv_init(); if (r) - return r; + goto err; - if (kvmppc_radix_possible()) + if (kvmppc_radix_possible()) { r = kvmppc_radix_init(); + if (r) + goto err; + } r = kvmppc_uvmem_init(); - if (r < 0) + if (r < 0) { pr_err("KVM-HV: kvmppc_uvmem_init failed %d\n", r); + return r; + } + + kvm_ops_hv.owner = THIS_MODULE; + kvmppc_hv_ops = &kvm_ops_hv; + + return 0; + +err: + kvmhv_nested_exit(); + kvmppc_radix_exit(); return r; } diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 7d6d91338c3f..7e52d0beee77 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -15,7 +15,6 @@ #include #include -#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_hv_rm_xive.c b/arch/powerpc/kvm/book3s_hv_rm_xive.c index 6f18632e30e9..dd9880731bd6 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xive.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xive.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 9cc466006e8b..ab6d37d78c62 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -1016,19 +1016,10 @@ DEFINE_SHOW_ATTRIBUTE(xics_debug); static void xics_debugfs_init(struct kvmppc_xics *xics) { - char *name; - - name = kasprintf(GFP_KERNEL, "kvm-xics-%p", xics); - if (!name) { - pr_err("%s: no memory for name\n", __func__); - return; - } - - xics->dentry = debugfs_create_file(name, 0444, arch_debugfs_dir, + xics->dentry = debugfs_create_file("xics", 0444, xics->kvm->debugfs_dentry, xics, &xics_debug_fops); - pr_debug("%s: created %s\n", __func__, name); - kfree(name); + pr_debug("%s: created\n", __func__); } static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm, @@ -1440,7 +1431,7 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type) static void kvmppc_xics_init(struct kvm_device *dev) { - struct kvmppc_xics *xics = (struct kvmppc_xics *)dev->private; + struct kvmppc_xics *xics = dev->private; xics_debugfs_init(xics); } diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index e216c068075d..c0ce5531d9bc 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -2354,24 +2354,15 @@ DEFINE_SHOW_ATTRIBUTE(xive_debug); static void xive_debugfs_init(struct kvmppc_xive *xive) { - char *name; - - name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive); - if (!name) { - pr_err("%s: no memory for name\n", __func__); - return; - } - - xive->dentry = debugfs_create_file(name, S_IRUGO, arch_debugfs_dir, + xive->dentry = debugfs_create_file("xive", S_IRUGO, xive->kvm->debugfs_dentry, xive, &xive_debug_fops); - pr_debug("%s: created %s\n", __func__, name); - kfree(name); + pr_debug("%s: created\n", __func__); } static void kvmppc_xive_init(struct kvm_device *dev) { - struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private; + struct kvmppc_xive *xive = dev->private; /* Register some debug interfaces */ xive_debugfs_init(xive); diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 561a5bfe0468..f81ba6f84e72 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -1259,24 +1259,15 @@ DEFINE_SHOW_ATTRIBUTE(xive_native_debug); static void xive_native_debugfs_init(struct kvmppc_xive *xive) { - char *name; - - name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive); - if (!name) { - pr_err("%s: no memory for name\n", __func__); - return; - } - - xive->dentry = debugfs_create_file(name, 0444, arch_debugfs_dir, + xive->dentry = debugfs_create_file("xive", 0444, xive->kvm->debugfs_dentry, xive, &xive_native_debug_fops); - pr_debug("%s: created %s\n", __func__, name); - kfree(name); + pr_debug("%s: created\n", __func__); } static void kvmppc_xive_native_init(struct kvm_device *dev) { - struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private; + struct kvmppc_xive *xive = dev->private; /* Register some debug interfaces */ xive_native_debugfs_init(xive); diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 7e8b69015d20..c8b2b4478545 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -495,6 +495,7 @@ static struct kvmppc_ops kvm_ops_e500 = { .emulate_op = kvmppc_core_emulate_op_e500, .emulate_mtspr = kvmppc_core_emulate_mtspr_e500, .emulate_mfspr = kvmppc_core_emulate_mfspr_e500, + .create_vcpu_debugfs = kvmppc_create_vcpu_debugfs_e500, }; static int __init kvmppc_e500_init(void) diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 1c189b5aadcc..fa0d8dbbe484 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -381,6 +381,7 @@ static struct kvmppc_ops kvm_ops_e500mc = { .emulate_op = kvmppc_core_emulate_op_e500, .emulate_mtspr = kvmppc_core_emulate_mtspr_e500, .emulate_mfspr = kvmppc_core_emulate_mfspr_e500, + .create_vcpu_debugfs = kvmppc_create_vcpu_debugfs_e500, }; static int __init kvmppc_e500mc_init(void) diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index 48272a9b9c30..cfc9114b87d0 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -73,7 +73,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) { u32 inst; enum emulation_result emulated = EMULATE_FAIL; - int advance = 1; struct instruction_op op; /* this default type might be overwritten by subcategories */ @@ -98,6 +97,8 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) int type = op.type & INSTR_TYPE_MASK; int size = GETSIZE(op.type); + vcpu->mmio_is_write = OP_IS_STORE(type); + switch (type) { case LOAD: { int instr_byte_swap = op.type & BYTEREV; @@ -355,15 +356,10 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) } } - if (emulated == EMULATE_FAIL) { - advance = 0; - kvmppc_core_queue_program(vcpu, 0); - } - trace_kvm_ppc_instr(inst, kvmppc_get_pc(vcpu), emulated); /* Advance past emulated instruction. */ - if (advance) + if (emulated != EMULATE_FAIL) kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4); return emulated; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 2ad0ccd202d5..9772b176e406 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -307,9 +307,31 @@ int kvmppc_emulate_mmio(struct kvm_vcpu *vcpu) u32 last_inst; kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); - /* XXX Deliver Program interrupt to guest. */ - pr_emerg("%s: emulation failed (%08x)\n", __func__, last_inst); - r = RESUME_HOST; + kvm_debug_ratelimited("Guest access to device memory using unsupported instruction (opcode: %#08x)\n", + last_inst); + + /* + * Injecting a Data Storage here is a bit more + * accurate since the instruction that caused the + * access could still be a valid one. + */ + if (!IS_ENABLED(CONFIG_BOOKE)) { + ulong dsisr = DSISR_BADACCESS; + + if (vcpu->mmio_is_write) + dsisr |= DSISR_ISSTORE; + + kvmppc_core_queue_data_storage(vcpu, vcpu->arch.vaddr_accessed, dsisr); + } else { + /* + * BookE does not send a SIGBUS on a bad + * fault, so use a Program interrupt instead + * to avoid a fault loop. + */ + kvmppc_core_queue_program(vcpu, 0); + } + + r = RESUME_GUEST; break; } default: @@ -431,6 +453,8 @@ int kvm_arch_check_processor_compat(void *opaque) int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { struct kvmppc_ops *kvm_ops = NULL; + int r; + /* * if we have both HV and PR enabled, default is HV */ @@ -452,11 +476,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) } else goto err_out; - if (kvm_ops->owner && !try_module_get(kvm_ops->owner)) + if (!try_module_get(kvm_ops->owner)) return -ENOENT; kvm->arch.kvm_ops = kvm_ops; - return kvmppc_core_init_vm(kvm); + r = kvmppc_core_init_vm(kvm); + if (r) + module_put(kvm_ops->owner); + return r; err_out: return -EINVAL; } @@ -755,7 +782,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) rcuwait_init(&vcpu->arch.wait); vcpu->arch.waitp = &vcpu->arch.wait; - kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id); return 0; out_vcpu_uninit: @@ -772,8 +798,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) /* Make sure we're not using the vcpu anymore */ hrtimer_cancel(&vcpu->arch.dec_timer); - kvmppc_remove_vcpu_debugfs(vcpu); - switch (vcpu->arch.irq_type) { case KVMPPC_IRQ_MPIC: kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu); @@ -1114,10 +1138,8 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu) struct kvm_run *run = vcpu->run; u64 gpr; - if (run->mmio.len > sizeof(gpr)) { - printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); + if (run->mmio.len > sizeof(gpr)) return; - } if (!vcpu->arch.mmio_host_swabbed) { switch (run->mmio.len) { @@ -1236,10 +1258,8 @@ static int __kvmppc_handle_load(struct kvm_vcpu *vcpu, host_swabbed = !is_default_endian; } - if (bytes > sizeof(run->mmio.data)) { - printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, - run->mmio.len); - } + if (bytes > sizeof(run->mmio.data)) + return EMULATE_FAIL; run->mmio.phys_addr = vcpu->arch.paddr_accessed; run->mmio.len = bytes; @@ -1325,10 +1345,8 @@ int kvmppc_handle_store(struct kvm_vcpu *vcpu, host_swabbed = !is_default_endian; } - if (bytes > sizeof(run->mmio.data)) { - printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, - run->mmio.len); - } + if (bytes > sizeof(run->mmio.data)) + return EMULATE_FAIL; run->mmio.phys_addr = vcpu->arch.paddr_accessed; run->mmio.len = bytes; @@ -1499,7 +1517,7 @@ int kvmppc_handle_vmx_load(struct kvm_vcpu *vcpu, { enum emulation_result emulated = EMULATE_DONE; - if (vcpu->arch.mmio_vsx_copy_nums > 2) + if (vcpu->arch.mmio_vmx_copy_nums > 2) return EMULATE_FAIL; while (vcpu->arch.mmio_vmx_copy_nums) { @@ -1596,7 +1614,7 @@ int kvmppc_handle_vmx_store(struct kvm_vcpu *vcpu, unsigned int index = rs & KVM_MMIO_REG_MASK; enum emulation_result emulated = EMULATE_DONE; - if (vcpu->arch.mmio_vsx_copy_nums > 2) + if (vcpu->arch.mmio_vmx_copy_nums > 2) return EMULATE_FAIL; vcpu->arch.io_gpr = rs; @@ -1841,6 +1859,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) #ifdef CONFIG_ALTIVEC out: #endif + + /* + * We're already returning to userspace, don't pass the + * RESUME_HOST flags along. + */ + if (r > 0) + r = 0; + vcpu_put(vcpu); return r; } @@ -2497,3 +2523,16 @@ int kvm_arch_init(void *opaque) } EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr); + +void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry) +{ + if (vcpu->kvm->arch.kvm_ops->create_vcpu_debugfs) + vcpu->kvm->arch.kvm_ops->create_vcpu_debugfs(vcpu, debugfs_dentry); +} + +int kvm_arch_create_vm_debugfs(struct kvm *kvm) +{ + if (kvm->arch.kvm_ops->create_vm_debugfs) + kvm->arch.kvm_ops->create_vm_debugfs(kvm); + return 0; +} diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c index ba56a5cbba97..25071331f8c1 100644 --- a/arch/powerpc/kvm/timing.c +++ b/arch/powerpc/kvm/timing.c @@ -204,21 +204,10 @@ static const struct file_operations kvmppc_exit_timing_fops = { .release = single_release, }; -void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id) +int kvmppc_create_vcpu_debugfs_e500(struct kvm_vcpu *vcpu, + struct dentry *debugfs_dentry) { - static char dbg_fname[50]; - struct dentry *debugfs_file; - - snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing", - current->pid, id); - debugfs_file = debugfs_create_file(dbg_fname, 0666, kvm_debugfs_dir, - vcpu, &kvmppc_exit_timing_fops); - - vcpu->arch.debugfs_exit_timing = debugfs_file; -} - -void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) -{ - debugfs_remove(vcpu->arch.debugfs_exit_timing); - vcpu->arch.debugfs_exit_timing = NULL; + debugfs_create_file("timing", 0666, debugfs_dentry, + vcpu, &kvmppc_exit_timing_fops); + return 0; } diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h index feef7885ba82..45817ab82bb4 100644 --- a/arch/powerpc/kvm/timing.h +++ b/arch/powerpc/kvm/timing.h @@ -14,8 +14,8 @@ #ifdef CONFIG_KVM_EXIT_TIMING void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu); void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu); -void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id); -void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu); +int kvmppc_create_vcpu_debugfs_e500(struct kvm_vcpu *vcpu, + struct dentry *debugfs_dentry); static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) { @@ -26,9 +26,11 @@ static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) /* if exit timing is not configured there is no need to build the c file */ static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {} static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {} -static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, - unsigned int id) {} -static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {} +static inline int kvmppc_create_vcpu_debugfs_e500(struct kvm_vcpu *vcpu, + struct dentry *debugfs_dentry) +{ + return 0; +} static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {} #endif /* CONFIG_KVM_EXIT_TIMING */ diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index 830a126e095d..38cd0ed0a617 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -115,6 +115,7 @@ {H_VASI_STATE, "H_VASI_STATE"}, \ {H_ENABLE_CRQ, "H_ENABLE_CRQ"}, \ {H_GET_EM_PARMS, "H_GET_EM_PARMS"}, \ + {H_GET_ENERGY_SCALE_INFO, "H_GET_ENERGY_SCALE_INFO"}, \ {H_SET_MPP, "H_SET_MPP"}, \ {H_GET_MPP, "H_GET_MPP"}, \ {H_HOME_NODE_ASSOCIATIVITY, "H_HOME_NODE_ASSOCIATIVITY"}, \ diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index 27d9070617df..4541e8e29467 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -116,9 +116,6 @@ EXPORT_SYMBOL(__csum_partial) EX_TABLE(8 ## n ## 7b, fault); .text - .stabs "arch/powerpc/lib/",N_SO,0,0,0f - .stabs "checksum_32.S",N_SO,0,0,0f -0: CACHELINE_BYTES = L1_CACHE_BYTES LG_CACHELINE_BYTES = L1_CACHE_SHIFT diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 906d43463366..00c68e7fb11e 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -43,9 +43,14 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t instr) #ifdef CONFIG_STRICT_KERNEL_RWX static DEFINE_PER_CPU(struct vm_struct *, text_poke_area); +static int map_patch_area(void *addr, unsigned long text_poke_addr); +static void unmap_patch_area(unsigned long addr); + static int text_area_cpu_up(unsigned int cpu) { struct vm_struct *area; + unsigned long addr; + int err; area = get_vm_area(PAGE_SIZE, VM_ALLOC); if (!area) { @@ -53,6 +58,15 @@ static int text_area_cpu_up(unsigned int cpu) cpu); return -1; } + + // Map/unmap the area to ensure all page tables are pre-allocated + addr = (unsigned long)area->addr; + err = map_patch_area(empty_zero_page, addr); + if (err) + return err; + + unmap_patch_area(addr); + this_cpu_write(text_poke_area, area); return 0; diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index a3bcf4786e4a..3e9c27c46331 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -57,9 +57,6 @@ EX_TABLE(8 ## n ## 7b,9 ## n ## 1b) .text - .stabs "arch/powerpc/lib/",N_SO,0,0,0f - .stabs "copy_32.S",N_SO,0,0,0f -0: CACHELINE_BYTES = L1_CACHE_BYTES LG_CACHELINE_BYTES = L1_CACHE_SHIFT diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 5ed88fbee0db..6f79bde6d6c2 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -75,10 +75,8 @@ extern int do_stqcx(unsigned long ea, unsigned long val0, unsigned long val1, static nokprobe_inline unsigned long truncate_if_32bit(unsigned long msr, unsigned long val) { -#ifdef __powerpc64__ if ((msr & MSR_64BIT) == 0) val &= 0xffffffffUL; -#endif return val; } @@ -1065,15 +1063,9 @@ Efault: int emulate_dcbz(unsigned long ea, struct pt_regs *regs) { int err; - unsigned long size; + unsigned long size = l1_dcache_bytes(); -#ifdef __powerpc64__ - size = ppc64_caches.l1d.block_size; - if (!(regs->msr & MSR_64BIT)) - ea &= 0xffffffffUL; -#else - size = L1_CACHE_BYTES; -#endif + ea = truncate_if_32bit(regs->msr, ea); ea &= ~(size - 1); if (!address_ok(regs, ea, size)) return -EFAULT; @@ -1097,7 +1089,10 @@ NOKPROBE_SYMBOL(emulate_dcbz); #define __put_user_asmx(x, addr, err, op, cr) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine power8\n" \ "1: " op " %2,0,%3\n" \ + ".machine pop\n" \ " mfcr %1\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ @@ -1110,7 +1105,10 @@ NOKPROBE_SYMBOL(emulate_dcbz); #define __get_user_asmx(x, addr, err, op) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine power8\n" \ "1: "op" %1,0,%2\n" \ + ".machine pop\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%3\n" \ @@ -1139,10 +1137,8 @@ static nokprobe_inline void set_cr0(const struct pt_regs *regs, op->type |= SETCC; op->ccval = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000); -#ifdef __powerpc64__ if (!(regs->msr & MSR_64BIT)) val = (int) val; -#endif if (val < 0) op->ccval |= 0x80000000; else if (val > 0) @@ -1173,12 +1169,8 @@ static nokprobe_inline void add_with_carry(const struct pt_regs *regs, op->type = COMPUTE + SETREG + SETXER; op->reg = rd; op->val = val; -#ifdef __powerpc64__ - if (!(regs->msr & MSR_64BIT)) { - val = (unsigned int) val; - val1 = (unsigned int) val1; - } -#endif + val = truncate_if_32bit(regs->msr, val); + val1 = truncate_if_32bit(regs->msr, val1); op->xerval = regs->xer; if (val < val1 || (carry_in && val == val1)) op->xerval |= XER_CA; @@ -3389,7 +3381,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) __put_user_asmx(op->val, ea, err, "stbcx.", cr); break; case 2: - __put_user_asmx(op->val, ea, err, "stbcx.", cr); + __put_user_asmx(op->val, ea, err, "sthcx.", cr); break; #endif case 4: diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c index 62e6c3045252..f76a50291fd7 100644 --- a/arch/powerpc/lib/vmx-helper.c +++ b/arch/powerpc/lib/vmx-helper.c @@ -9,7 +9,6 @@ #include #include #include -#include int enter_vmx_usercopy(void) { diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 7abf82a698d3..985cabdd7f67 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1621,8 +1621,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap, } EXPORT_SYMBOL_GPL(hash_page); -DECLARE_INTERRUPT_HANDLER(__do_hash_fault); -DEFINE_INTERRUPT_HANDLER(__do_hash_fault) +DEFINE_INTERRUPT_HANDLER(do_hash_fault) { unsigned long ea = regs->dar; unsigned long dsisr = regs->dsisr; @@ -1681,35 +1680,6 @@ DEFINE_INTERRUPT_HANDLER(__do_hash_fault) } } -/* - * The _RAW interrupt entry checks for the in_nmi() case before - * running the full handler. - */ -DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault) -{ - /* - * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then - * don't call hash_page, just fail the fault. This is required to - * prevent re-entrancy problems in the hash code, namely perf - * interrupts hitting while something holds H_PAGE_BUSY, and taking a - * hash fault. See the comment in hash_preload(). - * - * We come here as a result of a DSI at a point where we don't want - * to call hash_page, such as when we are accessing memory (possibly - * user memory) inside a PMU interrupt that occurred while interrupts - * were soft-disabled. We want to invoke the exception handler for - * the access, or panic if there isn't a handler. - */ - if (unlikely(in_nmi())) { - do_bad_page_fault_segv(regs); - return 0; - } - - __do_hash_fault(regs); - - return 0; -} - #ifdef CONFIG_PPC_MM_SLICES static bool should_hash_preload(struct mm_struct *mm, unsigned long ea) { @@ -1776,26 +1746,18 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea, #endif /* CONFIG_PPC_64K_PAGES */ /* - * __hash_page_* must run with interrupts off, as it sets the - * H_PAGE_BUSY bit. It's possible for perf interrupts to hit at any - * time and may take a hash fault reading the user stack, see - * read_user_stack_slow() in the powerpc/perf code. + * __hash_page_* must run with interrupts off, including PMI interrupts + * off, as it sets the H_PAGE_BUSY bit. * - * If that takes a hash fault on the same page as we lock here, it - * will bail out when seeing H_PAGE_BUSY set, and retry the access - * leading to an infinite loop. - * - * Disabling interrupts here does not prevent perf interrupts, but it - * will prevent them taking hash faults (see the NMI test in - * do_hash_page), then read_user_stack's copy_from_user_nofault will - * fail and perf will fall back to read_user_stack_slow(), which - * walks the Linux page tables. + * It's otherwise possible for perf interrupts to hit at any time and + * may take a hash fault reading the user stack, which could take a + * hash miss and deadlock on the same H_PAGE_BUSY bit. * * Interrupts must also be off for the duration of the * mm_is_thread_local test and update, to prevent preempt running the * mm on another CPU (XXX: this may be racy vs kthread_use_mm). */ - local_irq_save(flags); + powerpc_local_irq_pmu_save(flags); /* Is that local to this CPU ? */ if (mm_is_thread_local(mm)) @@ -1820,7 +1782,7 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea, mm_ctx_user_psize(&mm->context), pte_val(*ptep)); - local_irq_restore(flags); + powerpc_local_irq_pmu_restore(flags); } /* diff --git a/arch/powerpc/mm/book3s64/hugetlbpage.c b/arch/powerpc/mm/book3s64/hugetlbpage.c index ea8f83afb0ae..3bc0eb21b2a0 100644 --- a/arch/powerpc/mm/book3s64/hugetlbpage.c +++ b/arch/powerpc/mm/book3s64/hugetlbpage.c @@ -150,7 +150,7 @@ void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr set_huge_pte_at(vma->vm_mm, addr, ptep, pte); } -void __init hugetlbpage_init_default(void) +void __init hugetlbpage_init_defaultsize(void) { /* Set default large page size. Currently, we pick 16M or 1M * depending on what is available diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index 31f4cef3adac..81091b9587f6 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -9,7 +9,6 @@ * Copyright (C) 2002 Anton Blanchard , IBM */ -#include #include #include #include diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index eb8ecd7343a9..d53fed4eccbd 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -35,7 +35,6 @@ #include #include -#include #include #include #include @@ -567,18 +566,24 @@ NOKPROBE_SYMBOL(hash__do_page_fault); static void __bad_page_fault(struct pt_regs *regs, int sig) { int is_write = page_fault_is_write(regs->dsisr); + const char *msg; /* kernel has accessed a bad area */ + if (regs->dar < PAGE_SIZE) + msg = "Kernel NULL pointer dereference"; + else + msg = "Unable to handle kernel data access"; + switch (TRAP(regs)) { case INTERRUPT_DATA_STORAGE: - case INTERRUPT_DATA_SEGMENT: case INTERRUPT_H_DATA_STORAGE: - pr_alert("BUG: %s on %s at 0x%08lx\n", - regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" : - "Unable to handle kernel data access", + pr_alert("BUG: %s on %s at 0x%08lx\n", msg, is_write ? "write" : "read", regs->dar); break; + case INTERRUPT_DATA_SEGMENT: + pr_alert("BUG: %s at 0x%08lx\n", msg, regs->dar); + break; case INTERRUPT_INST_STORAGE: case INTERRUPT_INST_SEGMENT: pr_alert("BUG: Unable to handle kernel instruction fetch%s", diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index ddead41e2194..b642a5a8668f 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -664,10 +664,7 @@ static int __init hugetlbpage_init(void) configured = true; } - if (configured) { - if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE)) - hugetlbpage_init_default(); - } else + if (!configured) pr_info("Failed to initialize. Disabling HugeTLB"); return 0; diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 35f46bf54281..83c0ee9fbf05 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -59,6 +59,7 @@ #include #include #include +#include #include @@ -513,6 +514,9 @@ void __init mmu_early_init_devtree(void) } else hash__early_init_devtree(); + if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE)) + hugetlbpage_init_defaultsize(); + if (!(cur_cpu_spec->mmu_features & MMU_FTR_HPTE_TABLE) && !(cur_cpu_spec->mmu_features & MMU_FTR_TYPE_RADIX)) panic("kernel does not support any MMU type offered by platform"); diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index cf8770b1a692..f3e4d069e0ba 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -83,13 +83,12 @@ void __init kasan_update_early_region(unsigned long k_start, unsigned long k_end, pte_t pte) { unsigned long k_cur; - phys_addr_t pa = __pa(kasan_early_shadow_page); for (k_cur = k_start; k_cur != k_end; k_cur += PAGE_SIZE) { pmd_t *pmd = pmd_off_k(k_cur); pte_t *ptep = pte_offset_kernel(pmd, k_cur); - if ((pte_val(*ptep) & PTE_RPN_MASK) != pa) + if (pte_page(*ptep) != virt_to_page(lm_alias(kasan_early_shadow_page))) continue; __set_pte_at(&init_mm, k_cur, ptep, pte, 0); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 9d5f710d2c20..b9b7fefbb64b 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -956,7 +956,9 @@ static int __init parse_numa_properties(void) of_node_put(cpu); } - node_set_online(nid); + /* node_set_online() is an UB if 'nid' is negative */ + if (likely(nid >= 0)) + node_set_online(nid); } get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c index edea388e9d3f..85753e32a4de 100644 --- a/arch/powerpc/mm/pageattr.c +++ b/arch/powerpc/mm/pageattr.c @@ -15,12 +15,14 @@ #include +static pte_basic_t pte_update_delta(pte_t *ptep, unsigned long addr, + unsigned long old, unsigned long new) +{ + return pte_update(&init_mm, addr, ptep, old & ~new, new & ~old, 0); +} + /* - * Updates the attributes of a page in three steps: - * - * 1. take the page_table_lock - * 2. install the new entry with the updated attributes - * 3. flush the TLB + * Updates the attributes of a page atomically. * * This sequence is safe against concurrent updates, and also allows updating the * attributes of a page currently being executed or accessed. @@ -28,41 +30,39 @@ static int change_page_attr(pte_t *ptep, unsigned long addr, void *data) { long action = (long)data; - pte_t pte; - spin_lock(&init_mm.page_table_lock); - - pte = ptep_get(ptep); - - /* modify the PTE bits as desired, then apply */ + /* modify the PTE bits as desired */ switch (action) { case SET_MEMORY_RO: - pte = pte_wrprotect(pte); + /* Don't clear DIRTY bit */ + pte_update_delta(ptep, addr, _PAGE_KERNEL_RW & ~_PAGE_DIRTY, _PAGE_KERNEL_RO); break; case SET_MEMORY_RW: - pte = pte_mkwrite(pte_mkdirty(pte)); + pte_update_delta(ptep, addr, _PAGE_KERNEL_RO, _PAGE_KERNEL_RW); break; case SET_MEMORY_NX: - pte = pte_exprotect(pte); + pte_update_delta(ptep, addr, _PAGE_KERNEL_ROX, _PAGE_KERNEL_RO); break; case SET_MEMORY_X: - pte = pte_mkexec(pte); + pte_update_delta(ptep, addr, _PAGE_KERNEL_RO, _PAGE_KERNEL_ROX); + break; + case SET_MEMORY_NP: + pte_update(&init_mm, addr, ptep, _PAGE_PRESENT, 0, 0); + break; + case SET_MEMORY_P: + pte_update(&init_mm, addr, ptep, 0, _PAGE_PRESENT, 0); break; default: WARN_ON_ONCE(1); break; } - pte_update(&init_mm, addr, ptep, ~0UL, pte_val(pte), 0); - /* See ptesync comment in radix__set_pte_at() */ if (radix_enabled()) asm volatile("ptesync": : :"memory"); flush_tlb_kernel_range(addr, addr + PAGE_SIZE); - spin_unlock(&init_mm.page_table_lock); - return 0; } @@ -96,36 +96,3 @@ int change_memory_attr(unsigned long addr, int numpages, long action) return apply_to_existing_page_range(&init_mm, start, size, change_page_attr, (void *)action); } - -/* - * Set the attributes of a page: - * - * This function is used by PPC32 at the end of init to set final kernel memory - * protection. It includes changing the maping of the page it is executing from - * and data pages it is using. - */ -static int set_page_attr(pte_t *ptep, unsigned long addr, void *data) -{ - pgprot_t prot = __pgprot((unsigned long)data); - - spin_lock(&init_mm.page_table_lock); - - set_pte_at(&init_mm, addr, ptep, pte_modify(*ptep, prot)); - flush_tlb_kernel_range(addr, addr + PAGE_SIZE); - - spin_unlock(&init_mm.page_table_lock); - - return 0; -} - -int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot) -{ - unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE); - unsigned long sz = numpages * PAGE_SIZE; - - if (numpages <= 0) - return 0; - - return apply_to_existing_page_range(&init_mm, start, sz, set_page_attr, - (void *)pgprot_val(prot)); -} diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 906e4e4328b2..a56ade39dc68 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -135,10 +135,12 @@ void mark_initmem_nx(void) unsigned long numpages = PFN_UP((unsigned long)_einittext) - PFN_DOWN((unsigned long)_sinittext); - if (v_block_mapped((unsigned long)_sinittext)) + if (v_block_mapped((unsigned long)_sinittext)) { mmu_mark_initmem_nx(); - else - set_memory_attr((unsigned long)_sinittext, numpages, PAGE_KERNEL); + } else { + set_memory_nx((unsigned long)_sinittext, numpages); + set_memory_rw((unsigned long)_sinittext, numpages); + } } #ifdef CONFIG_STRICT_KERNEL_RWX @@ -146,24 +148,23 @@ void mark_rodata_ro(void) { unsigned long numpages; + if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX) && mmu_has_feature(MMU_FTR_HPTE_TABLE)) + pr_warn("This platform has HASH MMU, STRICT_MODULE_RWX won't work\n"); + if (v_block_mapped((unsigned long)_stext + 1)) { mmu_mark_rodata_ro(); ptdump_check_wx(); return; } - numpages = PFN_UP((unsigned long)_etext) - - PFN_DOWN((unsigned long)_stext); - - set_memory_attr((unsigned long)_stext, numpages, PAGE_KERNEL_ROX); /* - * mark .rodata as read only. Use __init_begin rather than __end_rodata - * to cover NOTES and EXCEPTION_TABLE. + * mark .text and .rodata as read only. Use __init_begin rather than + * __end_rodata to cover NOTES and EXCEPTION_TABLE. */ numpages = PFN_UP((unsigned long)__init_begin) - - PFN_DOWN((unsigned long)__start_rodata); + PFN_DOWN((unsigned long)_stext); - set_memory_attr((unsigned long)__start_rodata, numpages, PAGE_KERNEL_RO); + set_memory_ro((unsigned long)_stext, numpages); // mark_initmem_nx() should have already run by now ptdump_check_wx(); @@ -179,8 +180,8 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) return; if (enable) - set_memory_attr(addr, numpages, PAGE_KERNEL); + set_memory_p(addr, numpages); else - set_memory_attr(addr, numpages, __pgprot(0)); + set_memory_np(addr, numpages); } #endif /* CONFIG_DEBUG_PAGEALLOC */ diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c index c7f824d294b2..9a601587836b 100644 --- a/arch/powerpc/mm/ptdump/hashpagetable.c +++ b/arch/powerpc/mm/ptdump/hashpagetable.c @@ -238,7 +238,10 @@ static int native_find(unsigned long ea, int psize, bool primary, u64 *v, u64 static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *r) { - struct hash_pte ptes[4]; + struct { + unsigned long v; + unsigned long r; + } ptes[4]; unsigned long vsid, vpn, hash, hpte_group, want_v; int i, j, ssize = mmu_kernel_ssize; long lpar_rc = 0; diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index b20a2a83a6e7..979701d360da 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -27,21 +27,21 @@ #define PPC_JMP(dest) \ do { \ long offset = (long)(dest) - (ctx->idx * 4); \ - if (!is_offset_in_branch_range(offset)) { \ + if ((dest) != 0 && !is_offset_in_branch_range(offset)) { \ pr_err_ratelimited("Branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ return -ERANGE; \ } \ EMIT(PPC_RAW_BRANCH(offset)); \ } while (0) -/* blr; (unconditional 'branch' with link) to absolute address */ -#define PPC_BL_ABS(dest) EMIT(PPC_INST_BL | \ - (((dest) - (unsigned long)(image + ctx->idx)) & 0x03fffffc)) +/* bl (unconditional 'branch' with link) */ +#define PPC_BL(dest) EMIT(PPC_INST_BL | (((dest) - (unsigned long)(image + ctx->idx)) & 0x03fffffc)) + /* "cond" here covers BO:BI fields. */ #define PPC_BCC_SHORT(cond, dest) \ do { \ long offset = (long)(dest) - (ctx->idx * 4); \ - if (!is_offset_in_cond_branch_range(offset)) { \ + if ((dest) != 0 && !is_offset_in_cond_branch_range(offset)) { \ pr_err_ratelimited("Conditional branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ return -ERANGE; \ } \ @@ -59,10 +59,7 @@ EMIT(PPC_RAW_ORI(d, d, IMM_L(i))); \ } } while(0) -#ifdef CONFIG_PPC32 -#define PPC_EX32(r, i) EMIT(PPC_RAW_LI((r), (i) < 0 ? -1 : 0)) -#endif - +#ifdef CONFIG_PPC64 #define PPC_LI64(d, i) do { \ if ((long)(i) >= -2147483648 && \ (long)(i) < 2147483648) \ @@ -85,11 +82,6 @@ EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) & \ 0xffff)); \ } } while (0) - -#ifdef CONFIG_PPC64 -#define PPC_FUNC_ADDR(d,i) do { PPC_LI64(d, i); } while(0) -#else -#define PPC_FUNC_ADDR(d,i) do { PPC_LI32(d, i); } while(0) #endif /* @@ -127,15 +119,6 @@ #define SEEN_FUNC 0x20000000 /* might call external helpers */ #define SEEN_TAILCALL 0x40000000 /* uses tail calls */ -#define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */ -#define SEEN_NVREG_MASK 0x0003ffff /* Non volatile registers r14-r31 */ - -#ifdef CONFIG_PPC64 -extern const int b2p[MAX_BPF_JIT_REG + 2]; -#else -extern const int b2p[MAX_BPF_JIT_REG + 1]; -#endif - struct codegen_context { /* * This is used to track register usage as well @@ -149,10 +132,13 @@ struct codegen_context { unsigned int seen; unsigned int idx; unsigned int stack_size; - int b2p[ARRAY_SIZE(b2p)]; + int b2p[MAX_BPF_JIT_REG + 2]; unsigned int exentry_idx; + unsigned int alt_exit_addr; }; +#define bpf_to_ppc(r) (ctx->b2p[r]) + #ifdef CONFIG_PPC32 #define BPF_FIXUP_LEN 3 /* Three instructions => 12 bytes */ #else @@ -180,12 +166,14 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i) ctx->seen &= ~(1 << (31 - i)); } -void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); +void bpf_jit_init_reg_mapping(struct codegen_context *ctx); +int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, u32 *addrs, int pass); void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx); void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx); void bpf_jit_realloc_regs(struct codegen_context *ctx); +int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr); int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx, int insn_idx, int jmp_off, int dst_reg); diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h deleted file mode 100644 index b63b35e45e55..000000000000 --- a/arch/powerpc/net/bpf_jit64.h +++ /dev/null @@ -1,91 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * bpf_jit64.h: BPF JIT compiler for PPC64 - * - * Copyright 2016 Naveen N. Rao - * IBM Corporation - */ -#ifndef _BPF_JIT64_H -#define _BPF_JIT64_H - -#include "bpf_jit.h" - -/* - * Stack layout: - * Ensure the top half (upto local_tmp_var) stays consistent - * with our redzone usage. - * - * [ prev sp ] <------------- - * [ nv gpr save area ] 5*8 | - * [ tail_call_cnt ] 8 | - * [ local_tmp_var ] 16 | - * fp (r31) --> [ ebpf stack space ] upto 512 | - * [ frame header ] 32/112 | - * sp (r1) ---> [ stack pointer ] -------------- - */ - -/* for gpr non volatile registers BPG_REG_6 to 10 */ -#define BPF_PPC_STACK_SAVE (5*8) -/* for bpf JIT code internal usage */ -#define BPF_PPC_STACK_LOCALS 24 -/* stack frame excluding BPF stack, ensure this is quadword aligned */ -#define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \ - BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE) - -#ifndef __ASSEMBLY__ - -/* BPF register usage */ -#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) -#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) - -/* BPF to ppc register mappings */ -const int b2p[MAX_BPF_JIT_REG + 2] = { - /* function return value */ - [BPF_REG_0] = 8, - /* function arguments */ - [BPF_REG_1] = 3, - [BPF_REG_2] = 4, - [BPF_REG_3] = 5, - [BPF_REG_4] = 6, - [BPF_REG_5] = 7, - /* non volatile registers */ - [BPF_REG_6] = 27, - [BPF_REG_7] = 28, - [BPF_REG_8] = 29, - [BPF_REG_9] = 30, - /* frame pointer aka BPF_REG_10 */ - [BPF_REG_FP] = 31, - /* eBPF jit internal registers */ - [BPF_REG_AX] = 2, - [TMP_REG_1] = 9, - [TMP_REG_2] = 10 -}; - -/* PPC NVR range -- update this if we ever use NVRs below r27 */ -#define BPF_PPC_NVR_MIN 27 - -/* - * WARNING: These can use TMP_REG_2 if the offset is not at word boundary, - * so ensure that it isn't in use already. - */ -#define PPC_BPF_LL(r, base, i) do { \ - if ((i) % 4) { \ - EMIT(PPC_RAW_LI(b2p[TMP_REG_2], (i)));\ - EMIT(PPC_RAW_LDX(r, base, \ - b2p[TMP_REG_2])); \ - } else \ - EMIT(PPC_RAW_LD(r, base, i)); \ - } while(0) -#define PPC_BPF_STL(r, base, i) do { \ - if ((i) % 4) { \ - EMIT(PPC_RAW_LI(b2p[TMP_REG_2], (i)));\ - EMIT(PPC_RAW_STDX(r, base, \ - b2p[TMP_REG_2])); \ - } else \ - EMIT(PPC_RAW_STD(r, base, i)); \ - } while(0) -#define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STDU(r, base, i)); } while(0) - -#endif /* !__ASSEMBLY__ */ - -#endif diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index a4f4d347e6bd..427185256216 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -59,7 +59,9 @@ static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image, */ tmp_idx = ctx->idx; ctx->idx = addrs[i] / 4; - bpf_jit_emit_func_call_rel(image, ctx, func_addr); + ret = bpf_jit_emit_func_call_rel(image, ctx, func_addr); + if (ret) + return ret; /* * Restore ctx->idx here. This is safe as the length @@ -70,13 +72,13 @@ static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image, tmp_idx = ctx->idx; ctx->idx = addrs[i] / 4; #ifdef CONFIG_PPC32 - PPC_LI32(ctx->b2p[insn[i].dst_reg] - 1, (u32)insn[i + 1].imm); - PPC_LI32(ctx->b2p[insn[i].dst_reg], (u32)insn[i].imm); + PPC_LI32(bpf_to_ppc(insn[i].dst_reg) - 1, (u32)insn[i + 1].imm); + PPC_LI32(bpf_to_ppc(insn[i].dst_reg), (u32)insn[i].imm); for (j = ctx->idx - addrs[i] / 4; j < 4; j++) EMIT(PPC_RAW_NOP()); #else func_addr = ((u64)(u32)insn[i].imm) | (((u64)(u32)insn[i + 1].imm) << 32); - PPC_LI64(b2p[insn[i].dst_reg], func_addr); + PPC_LI64(bpf_to_ppc(insn[i].dst_reg), func_addr); /* overwrite rest with nops */ for (j = ctx->idx - addrs[i] / 4; j < 5; j++) EMIT(PPC_RAW_NOP()); @@ -89,6 +91,22 @@ static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image, return 0; } +int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr) +{ + if (!exit_addr || is_offset_in_branch_range(exit_addr - (ctx->idx * 4))) { + PPC_JMP(exit_addr); + } else if (ctx->alt_exit_addr) { + if (WARN_ON(!is_offset_in_branch_range((long)ctx->alt_exit_addr - (ctx->idx * 4)))) + return -1; + PPC_JMP(ctx->alt_exit_addr); + } else { + ctx->alt_exit_addr = ctx->idx * 4; + bpf_jit_build_epilogue(image, ctx); + } + + return 0; +} + struct powerpc64_jit_data { struct bpf_binary_header *header; u32 *addrs; @@ -161,7 +179,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) } memset(&cgctx, 0, sizeof(struct codegen_context)); - memcpy(cgctx.b2p, b2p, sizeof(cgctx.b2p)); + bpf_jit_init_reg_mapping(&cgctx); /* Make sure that the stack is quadword aligned. */ cgctx.stack_size = round_up(fp->aux->stack_depth, 16); @@ -177,8 +195,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) * If we have seen a tail call, we need a second pass. * This is because bpf_jit_emit_common_epilogue() is called * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen. + * We also need a second pass if we ended up with too large + * a program so as to ensure BPF_EXIT branches are in range. */ - if (cgctx.seen & SEEN_TAILCALL) { + if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) { cgctx.idx = 0; if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0)) { fp = org_fp; @@ -193,6 +213,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) * calculate total size from idx. */ bpf_jit_build_prologue(0, &cgctx); + addrs[fp->len] = cgctx.idx * 4; bpf_jit_build_epilogue(0, &cgctx); fixup_len = fp->aux->num_exentries * BPF_FIXUP_LEN * 4; @@ -233,6 +254,7 @@ skip_init_ctx: for (pass = 1; pass < 3; pass++) { /* Now build the prologue, body code & epilogue for real. */ cgctx.idx = 0; + cgctx.alt_exit_addr = 0; bpf_jit_build_prologue(code_base, &cgctx); if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass)) { bpf_jit_binary_free(bpf_hdr); diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index cf8dd8aea386..e46ed1e8c6ca 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -33,39 +33,37 @@ /* stack frame, ensure this is quadword aligned */ #define BPF_PPC_STACKFRAME(ctx) (STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_SAVE + (ctx)->stack_size) -/* BPF register usage */ -#define TMP_REG (MAX_BPF_JIT_REG + 0) - -/* BPF to ppc register mappings */ -const int b2p[MAX_BPF_JIT_REG + 1] = { - /* function return value */ - [BPF_REG_0] = 12, - /* function arguments */ - [BPF_REG_1] = 4, - [BPF_REG_2] = 6, - [BPF_REG_3] = 8, - [BPF_REG_4] = 10, - [BPF_REG_5] = 22, - /* non volatile registers */ - [BPF_REG_6] = 24, - [BPF_REG_7] = 26, - [BPF_REG_8] = 28, - [BPF_REG_9] = 30, - /* frame pointer aka BPF_REG_10 */ - [BPF_REG_FP] = 18, - /* eBPF jit internal registers */ - [BPF_REG_AX] = 20, - [TMP_REG] = 31, /* 32 bits */ -}; - -static int bpf_to_ppc(struct codegen_context *ctx, int reg) -{ - return ctx->b2p[reg]; -} +#define PPC_EX32(r, i) EMIT(PPC_RAW_LI((r), (i) < 0 ? -1 : 0)) /* PPC NVR range -- update this if we ever use NVRs below r17 */ -#define BPF_PPC_NVR_MIN 17 -#define BPF_PPC_TC 16 +#define BPF_PPC_NVR_MIN _R17 +#define BPF_PPC_TC _R16 + +/* BPF register usage */ +#define TMP_REG (MAX_BPF_JIT_REG + 0) + +/* BPF to ppc register mappings */ +void bpf_jit_init_reg_mapping(struct codegen_context *ctx) +{ + /* function return value */ + ctx->b2p[BPF_REG_0] = _R12; + /* function arguments */ + ctx->b2p[BPF_REG_1] = _R4; + ctx->b2p[BPF_REG_2] = _R6; + ctx->b2p[BPF_REG_3] = _R8; + ctx->b2p[BPF_REG_4] = _R10; + ctx->b2p[BPF_REG_5] = _R22; + /* non volatile registers */ + ctx->b2p[BPF_REG_6] = _R24; + ctx->b2p[BPF_REG_7] = _R26; + ctx->b2p[BPF_REG_8] = _R28; + ctx->b2p[BPF_REG_9] = _R30; + /* frame pointer aka BPF_REG_10 */ + ctx->b2p[BPF_REG_FP] = _R18; + /* eBPF jit internal registers */ + ctx->b2p[BPF_REG_AX] = _R20; + ctx->b2p[TMP_REG] = _R31; /* 32 bits */ +} static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) { @@ -77,14 +75,22 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) return BPF_PPC_STACKFRAME(ctx) - 4; } +#define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */ +#define SEEN_NVREG_FULL_MASK 0x0003ffff /* Non volatile registers r14-r31 */ +#define SEEN_NVREG_TEMP_MASK 0x00001e01 /* BPF_REG_5, BPF_REG_AX, TMP_REG */ + void bpf_jit_realloc_regs(struct codegen_context *ctx) { - if (ctx->seen & SEEN_FUNC) - return; + unsigned int nvreg_mask; - while (ctx->seen & SEEN_NVREG_MASK && + if (ctx->seen & SEEN_FUNC) + nvreg_mask = SEEN_NVREG_TEMP_MASK; + else + nvreg_mask = SEEN_NVREG_FULL_MASK; + + while (ctx->seen & nvreg_mask && (ctx->seen & SEEN_VREG_MASK) != SEEN_VREG_MASK) { - int old = 32 - fls(ctx->seen & (SEEN_NVREG_MASK & 0xaaaaaaab)); + int old = 32 - fls(ctx->seen & (nvreg_mask & 0xaaaaaaab)); int new = 32 - fls(~ctx->seen & (SEEN_VREG_MASK & 0xaaaaaaaa)); int i; @@ -108,8 +114,8 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) int i; /* First arg comes in as a 32 bits pointer. */ - EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_1), _R3)); - EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_1) - 1, 0)); + EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3)); + EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0)); EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx))); /* @@ -118,7 +124,7 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) * invoked through a tail call. */ if (ctx->seen & SEEN_TAILCALL) - EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_1) - 1, _R1, + EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_1) - 1, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); else EMIT(PPC_RAW_NOP()); @@ -140,15 +146,15 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) EMIT(PPC_RAW_STW(i, _R1, bpf_jit_stack_offsetof(ctx, i))); /* If needed retrieve arguments 9 and 10, ie 5th 64 bits arg.*/ - if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) { - EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5) - 1, _R1, BPF_PPC_STACKFRAME(ctx)) + 8); - EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5), _R1, BPF_PPC_STACKFRAME(ctx)) + 12); + if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_5))) { + EMIT(PPC_RAW_LWZ(bpf_to_ppc(BPF_REG_5) - 1, _R1, BPF_PPC_STACKFRAME(ctx)) + 8); + EMIT(PPC_RAW_LWZ(bpf_to_ppc(BPF_REG_5), _R1, BPF_PPC_STACKFRAME(ctx)) + 12); } /* Setup frame pointer to point to the bpf stack area */ - if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_FP))) { - EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_FP) - 1, 0)); - EMIT(PPC_RAW_ADDI(bpf_to_ppc(ctx, BPF_REG_FP), _R1, + if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP))) { + EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_FP) - 1, 0)); + EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1, STACK_FRAME_MIN_SIZE + ctx->stack_size)); } @@ -168,7 +174,7 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) { - EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(ctx, BPF_REG_0))); + EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0))); bpf_jit_emit_common_epilogue(image, ctx); @@ -185,12 +191,12 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) EMIT(PPC_RAW_BLR()); } -void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) +int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) { s32 rel = (s32)func - (s32)(image + ctx->idx); if (image && rel < 0x2000000 && rel >= -0x2000000) { - PPC_BL_ABS(func); + PPC_BL(func); EMIT(PPC_RAW_NOP()); EMIT(PPC_RAW_NOP()); EMIT(PPC_RAW_NOP()); @@ -201,6 +207,8 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun EMIT(PPC_RAW_MTCTR(_R0)); EMIT(PPC_RAW_BCTRL()); } + + return 0; } static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) @@ -211,8 +219,8 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o * r5-r6/BPF_REG_2 - pointer to bpf_array * r7-r8/BPF_REG_3 - index in bpf_array */ - int b2p_bpf_array = bpf_to_ppc(ctx, BPF_REG_2); - int b2p_index = bpf_to_ppc(ctx, BPF_REG_3); + int b2p_bpf_array = bpf_to_ppc(BPF_REG_2); + int b2p_index = bpf_to_ppc(BPF_REG_3); /* * if (index >= array->map.max_entries) @@ -221,7 +229,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o EMIT(PPC_RAW_LWZ(_R0, b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); EMIT(PPC_RAW_CMPLW(b2p_index, _R0)); EMIT(PPC_RAW_LWZ(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); - PPC_BCC(COND_GE, out); + PPC_BCC_SHORT(COND_GE, out); /* * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) @@ -230,7 +238,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o EMIT(PPC_RAW_CMPLWI(_R0, MAX_TAIL_CALL_CNT)); /* tail_call_cnt++; */ EMIT(PPC_RAW_ADDIC(_R0, _R0, 1)); - PPC_BCC(COND_GE, out); + PPC_BCC_SHORT(COND_GE, out); /* prog = array->ptrs[index]; */ EMIT(PPC_RAW_RLWINM(_R3, b2p_index, 2, 0, 29)); @@ -243,7 +251,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o * goto out; */ EMIT(PPC_RAW_CMPLWI(_R3, 0)); - PPC_BCC(COND_EQ, out); + PPC_BCC_SHORT(COND_EQ, out); /* goto *(prog->bpf_func + prologue_size); */ EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_prog, bpf_func))); @@ -258,7 +266,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o EMIT(PPC_RAW_MTCTR(_R3)); - EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(ctx, BPF_REG_1))); + EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_1))); /* tear restore NVRs, ... */ bpf_jit_emit_common_epilogue(image, ctx); @@ -282,11 +290,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * for (i = 0; i < flen; i++) { u32 code = insn[i].code; - u32 dst_reg = bpf_to_ppc(ctx, insn[i].dst_reg); + u32 dst_reg = bpf_to_ppc(insn[i].dst_reg); u32 dst_reg_h = dst_reg - 1; - u32 src_reg = bpf_to_ppc(ctx, insn[i].src_reg); + u32 src_reg = bpf_to_ppc(insn[i].src_reg); u32 src_reg_h = src_reg - 1; - u32 tmp_reg = bpf_to_ppc(ctx, TMP_REG); + u32 tmp_reg = bpf_to_ppc(TMP_REG); u32 size = BPF_SIZE(code); s16 off = insn[i].off; s32 imm = insn[i].imm; @@ -834,7 +842,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (BPF_MODE(code) == BPF_PROBE_MEM) { PPC_LI32(_R0, TASK_SIZE - off); EMIT(PPC_RAW_CMPLW(src_reg, _R0)); - PPC_BCC(COND_GT, (ctx->idx + 5) * 4); + PPC_BCC_SHORT(COND_GT, (ctx->idx + 4) * 4); EMIT(PPC_RAW_LI(dst_reg, 0)); /* * For BPF_DW case, "li reg_h,0" would be needed when @@ -929,8 +937,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * * the epilogue. If we _are_ the last instruction, * we'll just fall through to the epilogue. */ - if (i != flen - 1) - PPC_JMP(exit_addr); + if (i != flen - 1) { + ret = bpf_jit_emit_exit_insn(image, ctx, _R0, exit_addr); + if (ret) + return ret; + } /* else fall through to the epilogue */ break; @@ -945,15 +956,17 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (ret < 0) return ret; - if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) { - EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5) - 1, _R1, 8)); - EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5), _R1, 12)); + if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_5))) { + EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_5) - 1, _R1, 8)); + EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_5), _R1, 12)); } - bpf_jit_emit_func_call_rel(image, ctx, func_addr); + ret = bpf_jit_emit_func_call_rel(image, ctx, func_addr); + if (ret) + return ret; - EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0) - 1, _R3)); - EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0), _R4)); + EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_0) - 1, _R3)); + EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_0), _R4)); break; /* diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index e1e8c934308a..585f257da045 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -17,7 +17,60 @@ #include #include -#include "bpf_jit64.h" +#include "bpf_jit.h" + +/* + * Stack layout: + * Ensure the top half (upto local_tmp_var) stays consistent + * with our redzone usage. + * + * [ prev sp ] <------------- + * [ nv gpr save area ] 5*8 | + * [ tail_call_cnt ] 8 | + * [ local_tmp_var ] 16 | + * fp (r31) --> [ ebpf stack space ] upto 512 | + * [ frame header ] 32/112 | + * sp (r1) ---> [ stack pointer ] -------------- + */ + +/* for gpr non volatile registers BPG_REG_6 to 10 */ +#define BPF_PPC_STACK_SAVE (5*8) +/* for bpf JIT code internal usage */ +#define BPF_PPC_STACK_LOCALS 24 +/* stack frame excluding BPF stack, ensure this is quadword aligned */ +#define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \ + BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE) + +/* BPF register usage */ +#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) +#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) + +/* BPF to ppc register mappings */ +void bpf_jit_init_reg_mapping(struct codegen_context *ctx) +{ + /* function return value */ + ctx->b2p[BPF_REG_0] = _R8; + /* function arguments */ + ctx->b2p[BPF_REG_1] = _R3; + ctx->b2p[BPF_REG_2] = _R4; + ctx->b2p[BPF_REG_3] = _R5; + ctx->b2p[BPF_REG_4] = _R6; + ctx->b2p[BPF_REG_5] = _R7; + /* non volatile registers */ + ctx->b2p[BPF_REG_6] = _R27; + ctx->b2p[BPF_REG_7] = _R28; + ctx->b2p[BPF_REG_8] = _R29; + ctx->b2p[BPF_REG_9] = _R30; + /* frame pointer aka BPF_REG_10 */ + ctx->b2p[BPF_REG_FP] = _R31; + /* eBPF jit internal registers */ + ctx->b2p[BPF_REG_AX] = _R12; + ctx->b2p[TMP_REG_1] = _R9; + ctx->b2p[TMP_REG_2] = _R10; +} + +/* PPC NVR range -- update this if we ever use NVRs below r27 */ +#define BPF_PPC_NVR_MIN _R27 static inline bool bpf_has_stack_frame(struct codegen_context *ctx) { @@ -27,7 +80,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) * - the bpf program uses its stack area * The latter condition is deduced from the usage of BPF_REG_FP */ - return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, b2p[BPF_REG_FP]); + return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP)); } /* @@ -73,22 +126,23 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; + if (__is_defined(PPC64_ELF_ABI_v2)) + EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc))); + /* * Initialize tail_call_cnt if we do tail calls. * Otherwise, put in NOPs so that it can be skipped when we are * invoked through a tail call. */ if (ctx->seen & SEEN_TAILCALL) { - EMIT(PPC_RAW_LI(b2p[TMP_REG_1], 0)); + EMIT(PPC_RAW_LI(bpf_to_ppc(TMP_REG_1), 0)); /* this goes in the redzone */ - PPC_BPF_STL(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8)); + EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, -(BPF_PPC_STACK_SAVE + 8))); } else { EMIT(PPC_RAW_NOP()); EMIT(PPC_RAW_NOP()); } -#define BPF_TAILCALL_PROLOGUE_SIZE 8 - if (bpf_has_stack_frame(ctx)) { /* * We need a stack frame, but we don't necessarily need to @@ -96,10 +150,10 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) */ if (ctx->seen & SEEN_FUNC) { EMIT(PPC_RAW_MFLR(_R0)); - PPC_BPF_STL(0, 1, PPC_LR_STKOFF); + EMIT(PPC_RAW_STD(_R0, _R1, PPC_LR_STKOFF)); } - PPC_BPF_STLU(1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size)); + EMIT(PPC_RAW_STDU(_R1, _R1, -(BPF_PPC_STACKFRAME + ctx->stack_size))); } /* @@ -108,12 +162,12 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) * in the protected zone below the previous stack frame */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) - if (bpf_is_seen_register(ctx, b2p[i])) - PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); + if (bpf_is_seen_register(ctx, bpf_to_ppc(i))) + EMIT(PPC_RAW_STD(bpf_to_ppc(i), _R1, bpf_jit_stack_offsetof(ctx, bpf_to_ppc(i)))); /* Setup frame pointer to point to the bpf stack area */ - if (bpf_is_seen_register(ctx, b2p[BPF_REG_FP])) - EMIT(PPC_RAW_ADDI(b2p[BPF_REG_FP], 1, + if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP))) + EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1, STACK_FRAME_MIN_SIZE + ctx->stack_size)); } @@ -123,15 +177,15 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx /* Restore NVRs */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) - if (bpf_is_seen_register(ctx, b2p[i])) - PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); + if (bpf_is_seen_register(ctx, bpf_to_ppc(i))) + EMIT(PPC_RAW_LD(bpf_to_ppc(i), _R1, bpf_jit_stack_offsetof(ctx, bpf_to_ppc(i)))); /* Tear down our stack frame */ if (bpf_has_stack_frame(ctx)) { - EMIT(PPC_RAW_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size)); + EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME + ctx->stack_size)); if (ctx->seen & SEEN_FUNC) { - PPC_BPF_LL(0, 1, PPC_LR_STKOFF); - EMIT(PPC_RAW_MTLR(0)); + EMIT(PPC_RAW_LD(_R0, _R1, PPC_LR_STKOFF)); + EMIT(PPC_RAW_MTLR(_R0)); } } } @@ -141,42 +195,45 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) bpf_jit_emit_common_epilogue(image, ctx); /* Move result to r3 */ - EMIT(PPC_RAW_MR(3, b2p[BPF_REG_0])); + EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0))); EMIT(PPC_RAW_BLR()); } -static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, - u64 func) +static int bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, u64 func) { -#ifdef PPC64_ELF_ABI_v1 - /* func points to the function descriptor */ - PPC_LI64(b2p[TMP_REG_2], func); - /* Load actual entry point from function descriptor */ - PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); - /* ... and move it to CTR */ - EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1])); - /* - * Load TOC from function descriptor at offset 8. - * We can clobber r2 since we get called through a - * function pointer (so caller will save/restore r2) - * and since we don't use a TOC ourself. - */ - PPC_BPF_LL(2, b2p[TMP_REG_2], 8); -#else - /* We can clobber r12 */ - PPC_FUNC_ADDR(12, func); - EMIT(PPC_RAW_MTCTR(12)); -#endif + unsigned long func_addr = func ? ppc_function_entry((void *)func) : 0; + long reladdr; + + if (WARN_ON_ONCE(!core_kernel_text(func_addr))) + return -EINVAL; + + reladdr = func_addr - kernel_toc_addr(); + if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { + pr_err("eBPF: address of %ps out of range of kernel_toc.\n", (void *)func); + return -ERANGE; + } + + EMIT(PPC_RAW_ADDIS(_R12, _R2, PPC_HA(reladdr))); + EMIT(PPC_RAW_ADDI(_R12, _R12, PPC_LO(reladdr))); + EMIT(PPC_RAW_MTCTR(_R12)); EMIT(PPC_RAW_BCTRL()); + + return 0; } -void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) +int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) { unsigned int i, ctx_idx = ctx->idx; + if (WARN_ON_ONCE(func && is_module_text_address(func))) + return -EINVAL; + + /* skip past descriptor if elf v1 */ + func += FUNCTION_DESCR_SIZE; + /* Load function address into r12 */ - PPC_LI64(12, func); + PPC_LI64(_R12, func); /* For bpf-to-bpf function calls, the callee's address is unknown * until the last extra pass. As seen above, we use PPC_LI64() to @@ -191,20 +248,10 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun for (i = ctx->idx - ctx_idx; i < 5; i++) EMIT(PPC_RAW_NOP()); -#ifdef PPC64_ELF_ABI_v1 - /* - * Load TOC from function descriptor at offset 8. - * We can clobber r2 since we get called through a - * function pointer (so caller will save/restore r2) - * and since we don't use a TOC ourself. - */ - PPC_BPF_LL(2, 12, 8); - /* Load actual entry point from function descriptor */ - PPC_BPF_LL(12, 12, 0); -#endif - - EMIT(PPC_RAW_MTCTR(12)); + EMIT(PPC_RAW_MTCTR(_R12)); EMIT(PPC_RAW_BCTRL()); + + return 0; } static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) @@ -215,54 +262,53 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o * r4/BPF_REG_2 - pointer to bpf_array * r5/BPF_REG_3 - index in bpf_array */ - int b2p_bpf_array = b2p[BPF_REG_2]; - int b2p_index = b2p[BPF_REG_3]; + int b2p_bpf_array = bpf_to_ppc(BPF_REG_2); + int b2p_index = bpf_to_ppc(BPF_REG_3); + int bpf_tailcall_prologue_size = 8; + + if (__is_defined(PPC64_ELF_ABI_v2)) + bpf_tailcall_prologue_size += 4; /* skip past the toc load */ /* * if (index >= array->map.max_entries) * goto out; */ - EMIT(PPC_RAW_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); + EMIT(PPC_RAW_LWZ(bpf_to_ppc(TMP_REG_1), b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); EMIT(PPC_RAW_RLWINM(b2p_index, b2p_index, 0, 0, 31)); - EMIT(PPC_RAW_CMPLW(b2p_index, b2p[TMP_REG_1])); - PPC_BCC(COND_GE, out); + EMIT(PPC_RAW_CMPLW(b2p_index, bpf_to_ppc(TMP_REG_1))); + PPC_BCC_SHORT(COND_GE, out); /* * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) * goto out; */ - PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); - EMIT(PPC_RAW_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT)); - PPC_BCC(COND_GE, out); + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), _R1, bpf_jit_stack_tailcallcnt(ctx))); + EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_1), MAX_TAIL_CALL_CNT)); + PPC_BCC_SHORT(COND_GE, out); /* * tail_call_cnt++; */ - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1)); - PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); + EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), 1)); + EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, bpf_jit_stack_tailcallcnt(ctx))); /* prog = array->ptrs[index]; */ - EMIT(PPC_RAW_MULI(b2p[TMP_REG_1], b2p_index, 8)); - EMIT(PPC_RAW_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array)); - PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs)); + EMIT(PPC_RAW_MULI(bpf_to_ppc(TMP_REG_1), b2p_index, 8)); + EMIT(PPC_RAW_ADD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), b2p_bpf_array)); + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), offsetof(struct bpf_array, ptrs))); /* * if (prog == NULL) * goto out; */ - EMIT(PPC_RAW_CMPLDI(b2p[TMP_REG_1], 0)); - PPC_BCC(COND_EQ, out); + EMIT(PPC_RAW_CMPLDI(bpf_to_ppc(TMP_REG_1), 0)); + PPC_BCC_SHORT(COND_EQ, out); /* goto *(prog->bpf_func + prologue_size); */ - PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); -#ifdef PPC64_ELF_ABI_v1 - /* skip past the function descriptor */ - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], - FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE)); -#else - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE)); -#endif - EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1])); + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), offsetof(struct bpf_prog, bpf_func))); + EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), + FUNCTION_DESCR_SIZE + bpf_tailcall_prologue_size)); + EMIT(PPC_RAW_MTCTR(bpf_to_ppc(TMP_REG_1))); /* tear down stack, restore NVRs, ... */ bpf_jit_emit_common_epilogue(image, ctx); @@ -309,9 +355,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * for (i = 0; i < flen; i++) { u32 code = insn[i].code; - u32 dst_reg = b2p[insn[i].dst_reg]; - u32 src_reg = b2p[insn[i].src_reg]; + u32 dst_reg = bpf_to_ppc(insn[i].dst_reg); + u32 src_reg = bpf_to_ppc(insn[i].src_reg); u32 size = BPF_SIZE(code); + u32 tmp1_reg = bpf_to_ppc(TMP_REG_1); + u32 tmp2_reg = bpf_to_ppc(TMP_REG_2); s16 off = insn[i].off; s32 imm = insn[i].imm; bool func_addr_fixed; @@ -362,8 +410,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * } else if (imm >= -32768 && imm < 32768) { EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm))); } else { - PPC_LI32(b2p[TMP_REG_1], imm); - EMIT(PPC_RAW_ADD(dst_reg, dst_reg, b2p[TMP_REG_1])); + PPC_LI32(tmp1_reg, imm); + EMIT(PPC_RAW_ADD(dst_reg, dst_reg, tmp1_reg)); } goto bpf_alu32_trunc; case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ @@ -373,8 +421,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * } else if (imm > -32768 && imm <= 32768) { EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(-imm))); } else { - PPC_LI32(b2p[TMP_REG_1], imm); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); + PPC_LI32(tmp1_reg, imm); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg)); } goto bpf_alu32_trunc; case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ @@ -389,32 +437,28 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (imm >= -32768 && imm < 32768) EMIT(PPC_RAW_MULI(dst_reg, dst_reg, IMM_L(imm))); else { - PPC_LI32(b2p[TMP_REG_1], imm); + PPC_LI32(tmp1_reg, imm); if (BPF_CLASS(code) == BPF_ALU) - EMIT(PPC_RAW_MULW(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, tmp1_reg)); else - EMIT(PPC_RAW_MULD(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_MULD(dst_reg, dst_reg, tmp1_reg)); } goto bpf_alu32_trunc; case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ if (BPF_OP(code) == BPF_MOD) { - EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg)); - EMIT(PPC_RAW_MULW(b2p[TMP_REG_1], src_reg, - b2p[TMP_REG_1])); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVWU(tmp1_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_MULW(tmp1_reg, src_reg, tmp1_reg)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg)); } else EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ if (BPF_OP(code) == BPF_MOD) { - EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_1], dst_reg, src_reg)); - EMIT(PPC_RAW_MULD(b2p[TMP_REG_1], src_reg, - b2p[TMP_REG_1])); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVDU(tmp1_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_MULD(tmp1_reg, src_reg, tmp1_reg)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg)); } else EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, src_reg)); break; @@ -433,35 +477,23 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * } } - PPC_LI32(b2p[TMP_REG_1], imm); + PPC_LI32(tmp1_reg, imm); switch (BPF_CLASS(code)) { case BPF_ALU: if (BPF_OP(code) == BPF_MOD) { - EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_2], - dst_reg, - b2p[TMP_REG_1])); - EMIT(PPC_RAW_MULW(b2p[TMP_REG_1], - b2p[TMP_REG_1], - b2p[TMP_REG_2])); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVWU(tmp2_reg, dst_reg, tmp1_reg)); + EMIT(PPC_RAW_MULW(tmp1_reg, tmp1_reg, tmp2_reg)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg)); } else - EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, tmp1_reg)); break; case BPF_ALU64: if (BPF_OP(code) == BPF_MOD) { - EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_2], - dst_reg, - b2p[TMP_REG_1])); - EMIT(PPC_RAW_MULD(b2p[TMP_REG_1], - b2p[TMP_REG_1], - b2p[TMP_REG_2])); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVDU(tmp2_reg, dst_reg, tmp1_reg)); + EMIT(PPC_RAW_MULD(tmp1_reg, tmp1_reg, tmp2_reg)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg)); } else - EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, tmp1_reg)); break; } goto bpf_alu32_trunc; @@ -483,8 +515,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm))); else { /* Sign-extended */ - PPC_LI32(b2p[TMP_REG_1], imm); - EMIT(PPC_RAW_AND(dst_reg, dst_reg, b2p[TMP_REG_1])); + PPC_LI32(tmp1_reg, imm); + EMIT(PPC_RAW_AND(dst_reg, dst_reg, tmp1_reg)); } goto bpf_alu32_trunc; case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */ @@ -495,8 +527,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */ if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { /* Sign-extended */ - PPC_LI32(b2p[TMP_REG_1], imm); - EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_1])); + PPC_LI32(tmp1_reg, imm); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp1_reg)); } else { if (IMM_L(imm)) EMIT(PPC_RAW_ORI(dst_reg, dst_reg, IMM_L(imm))); @@ -512,8 +544,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */ if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { /* Sign-extended */ - PPC_LI32(b2p[TMP_REG_1], imm); - EMIT(PPC_RAW_XOR(dst_reg, dst_reg, b2p[TMP_REG_1])); + PPC_LI32(tmp1_reg, imm); + EMIT(PPC_RAW_XOR(dst_reg, dst_reg, tmp1_reg)); } else { if (IMM_L(imm)) EMIT(PPC_RAW_XORI(dst_reg, dst_reg, IMM_L(imm))); @@ -614,11 +646,11 @@ bpf_alu32_trunc: switch (imm) { case 16: /* Rotate 8 bits left & mask with 0x0000ff00 */ - EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23)); + EMIT(PPC_RAW_RLWINM(tmp1_reg, dst_reg, 8, 16, 23)); /* Rotate 8 bits right & insert LSB to reg */ - EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31)); + EMIT(PPC_RAW_RLWIMI(tmp1_reg, dst_reg, 24, 24, 31)); /* Move result back to dst_reg */ - EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); + EMIT(PPC_RAW_MR(dst_reg, tmp1_reg)); break; case 32: /* @@ -626,28 +658,28 @@ bpf_alu32_trunc: * 2 bytes are already in their final position * -- byte 2 and 4 (of bytes 1, 2, 3 and 4) */ - EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31)); + EMIT(PPC_RAW_RLWINM(tmp1_reg, dst_reg, 8, 0, 31)); /* Rotate 24 bits and insert byte 1 */ - EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7)); + EMIT(PPC_RAW_RLWIMI(tmp1_reg, dst_reg, 24, 0, 7)); /* Rotate 24 bits and insert byte 3 */ - EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23)); - EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); + EMIT(PPC_RAW_RLWIMI(tmp1_reg, dst_reg, 24, 16, 23)); + EMIT(PPC_RAW_MR(dst_reg, tmp1_reg)); break; case 64: /* Store the value to stack and then use byte-reverse loads */ - PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx)); - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx))); + EMIT(PPC_RAW_STD(dst_reg, _R1, bpf_jit_stack_local(ctx))); + EMIT(PPC_RAW_ADDI(tmp1_reg, _R1, bpf_jit_stack_local(ctx))); if (cpu_has_feature(CPU_FTR_ARCH_206)) { - EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1])); + EMIT(PPC_RAW_LDBRX(dst_reg, 0, tmp1_reg)); } else { - EMIT(PPC_RAW_LWBRX(dst_reg, 0, b2p[TMP_REG_1])); + EMIT(PPC_RAW_LWBRX(dst_reg, 0, tmp1_reg)); if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN)) EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, 32)); - EMIT(PPC_RAW_LI(b2p[TMP_REG_2], 4)); - EMIT(PPC_RAW_LWBRX(b2p[TMP_REG_2], b2p[TMP_REG_2], b2p[TMP_REG_1])); + EMIT(PPC_RAW_LI(tmp2_reg, 4)); + EMIT(PPC_RAW_LWBRX(tmp2_reg, tmp2_reg, tmp1_reg)); if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) - EMIT(PPC_RAW_SLDI(b2p[TMP_REG_2], b2p[TMP_REG_2], 32)); - EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_2])); + EMIT(PPC_RAW_SLDI(tmp2_reg, tmp2_reg, 32)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp2_reg)); } break; } @@ -686,15 +718,14 @@ emit_clear: break; case STF_BARRIER_SYNC_ORI: EMIT(PPC_RAW_SYNC()); - EMIT(PPC_RAW_LD(b2p[TMP_REG_1], _R13, 0)); + EMIT(PPC_RAW_LD(tmp1_reg, _R13, 0)); EMIT(PPC_RAW_ORI(_R31, _R31, 0)); break; case STF_BARRIER_FALLBACK: - EMIT(PPC_RAW_MFLR(b2p[TMP_REG_1])); - PPC_LI64(12, dereference_kernel_function_descriptor(bpf_stf_barrier)); - EMIT(PPC_RAW_MTCTR(12)); + ctx->seen |= SEEN_FUNC; + PPC_LI64(_R12, dereference_kernel_function_descriptor(bpf_stf_barrier)); + EMIT(PPC_RAW_MTCTR(_R12)); EMIT(PPC_RAW_BCTRL()); - EMIT(PPC_RAW_MTLR(b2p[TMP_REG_1])); break; case STF_BARRIER_NONE: break; @@ -707,34 +738,39 @@ emit_clear: case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */ if (BPF_CLASS(code) == BPF_ST) { - EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm)); - src_reg = b2p[TMP_REG_1]; + EMIT(PPC_RAW_LI(tmp1_reg, imm)); + src_reg = tmp1_reg; } EMIT(PPC_RAW_STB(src_reg, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */ if (BPF_CLASS(code) == BPF_ST) { - EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm)); - src_reg = b2p[TMP_REG_1]; + EMIT(PPC_RAW_LI(tmp1_reg, imm)); + src_reg = tmp1_reg; } EMIT(PPC_RAW_STH(src_reg, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */ if (BPF_CLASS(code) == BPF_ST) { - PPC_LI32(b2p[TMP_REG_1], imm); - src_reg = b2p[TMP_REG_1]; + PPC_LI32(tmp1_reg, imm); + src_reg = tmp1_reg; } EMIT(PPC_RAW_STW(src_reg, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */ if (BPF_CLASS(code) == BPF_ST) { - PPC_LI32(b2p[TMP_REG_1], imm); - src_reg = b2p[TMP_REG_1]; + PPC_LI32(tmp1_reg, imm); + src_reg = tmp1_reg; + } + if (off % 4) { + EMIT(PPC_RAW_LI(tmp2_reg, off)); + EMIT(PPC_RAW_STDX(src_reg, dst_reg, tmp2_reg)); + } else { + EMIT(PPC_RAW_STD(src_reg, dst_reg, off)); } - PPC_BPF_STL(src_reg, dst_reg, off); break; /* @@ -751,14 +787,14 @@ emit_clear: /* *(u32 *)(dst + off) += src */ /* Get EA into TMP_REG_1 */ - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off)); + EMIT(PPC_RAW_ADDI(tmp1_reg, dst_reg, off)); tmp_idx = ctx->idx * 4; /* load value from memory into TMP_REG_2 */ - EMIT(PPC_RAW_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0)); + EMIT(PPC_RAW_LWARX(tmp2_reg, 0, tmp1_reg, 0)); /* add value from src_reg into this */ - EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg)); + EMIT(PPC_RAW_ADD(tmp2_reg, tmp2_reg, src_reg)); /* store result back */ - EMIT(PPC_RAW_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1])); + EMIT(PPC_RAW_STWCX(tmp2_reg, 0, tmp1_reg)); /* we're done if this succeeded */ PPC_BCC_SHORT(COND_NE, tmp_idx); break; @@ -771,11 +807,11 @@ emit_clear: } /* *(u64 *)(dst + off) += src */ - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off)); + EMIT(PPC_RAW_ADDI(tmp1_reg, dst_reg, off)); tmp_idx = ctx->idx * 4; - EMIT(PPC_RAW_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0)); - EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg)); - EMIT(PPC_RAW_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1])); + EMIT(PPC_RAW_LDARX(tmp2_reg, 0, tmp1_reg, 0)); + EMIT(PPC_RAW_ADD(tmp2_reg, tmp2_reg, src_reg)); + EMIT(PPC_RAW_STDCX(tmp2_reg, 0, tmp1_reg)); PPC_BCC_SHORT(COND_NE, tmp_idx); break; @@ -801,18 +837,17 @@ emit_clear: * set dst_reg=0 and move on. */ if (BPF_MODE(code) == BPF_PROBE_MEM) { - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], src_reg, off)); + EMIT(PPC_RAW_ADDI(tmp1_reg, src_reg, off)); if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) - PPC_LI64(b2p[TMP_REG_2], 0x8000000000000000ul); + PPC_LI64(tmp2_reg, 0x8000000000000000ul); else /* BOOK3S_64 */ - PPC_LI64(b2p[TMP_REG_2], PAGE_OFFSET); - EMIT(PPC_RAW_CMPLD(b2p[TMP_REG_1], b2p[TMP_REG_2])); - PPC_BCC(COND_GT, (ctx->idx + 4) * 4); + PPC_LI64(tmp2_reg, PAGE_OFFSET); + EMIT(PPC_RAW_CMPLD(tmp1_reg, tmp2_reg)); + PPC_BCC_SHORT(COND_GT, (ctx->idx + 3) * 4); EMIT(PPC_RAW_LI(dst_reg, 0)); /* - * Check if 'off' is word aligned because PPC_BPF_LL() - * (BPF_DW case) generates two instructions if 'off' is not - * word-aligned and one instruction otherwise. + * Check if 'off' is word aligned for BPF_DW, because + * we might generate two instructions. */ if (BPF_SIZE(code) == BPF_DW && (off & 3)) PPC_JMP((ctx->idx + 3) * 4); @@ -831,7 +866,12 @@ emit_clear: EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); break; case BPF_DW: - PPC_BPF_LL(dst_reg, src_reg, off); + if (off % 4) { + EMIT(PPC_RAW_LI(tmp1_reg, off)); + EMIT(PPC_RAW_LDX(dst_reg, src_reg, tmp1_reg)); + } else { + EMIT(PPC_RAW_LD(dst_reg, src_reg, off)); + } break; } @@ -871,8 +911,11 @@ emit_clear: * the epilogue. If we _are_ the last instruction, * we'll just fall through to the epilogue. */ - if (i != flen - 1) - PPC_JMP(exit_addr); + if (i != flen - 1) { + ret = bpf_jit_emit_exit_insn(image, ctx, tmp1_reg, exit_addr); + if (ret) + return ret; + } /* else fall through to the epilogue */ break; @@ -888,11 +931,15 @@ emit_clear: return ret; if (func_addr_fixed) - bpf_jit_emit_func_call_hlp(image, ctx, func_addr); + ret = bpf_jit_emit_func_call_hlp(image, ctx, func_addr); else - bpf_jit_emit_func_call_rel(image, ctx, func_addr); + ret = bpf_jit_emit_func_call_rel(image, ctx, func_addr); + + if (ret) + return ret; + /* move return value from r3 to BPF_REG_0 */ - EMIT(PPC_RAW_MR(b2p[BPF_REG_0], 3)); + EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_0), _R3)); break; /* @@ -998,14 +1045,10 @@ cond_branch: case BPF_JMP | BPF_JSET | BPF_X: case BPF_JMP32 | BPF_JSET | BPF_X: if (BPF_CLASS(code) == BPF_JMP) { - EMIT(PPC_RAW_AND_DOT(b2p[TMP_REG_1], dst_reg, - src_reg)); + EMIT(PPC_RAW_AND_DOT(tmp1_reg, dst_reg, src_reg)); } else { - int tmp_reg = b2p[TMP_REG_1]; - - EMIT(PPC_RAW_AND(tmp_reg, dst_reg, src_reg)); - EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0, - 31)); + EMIT(PPC_RAW_AND(tmp1_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_RLWINM_DOT(tmp1_reg, tmp1_reg, 0, 0, 31)); } break; case BPF_JMP | BPF_JNE | BPF_K: @@ -1034,14 +1077,12 @@ cond_branch: EMIT(PPC_RAW_CMPLDI(dst_reg, imm)); } else { /* sign-extending load */ - PPC_LI32(b2p[TMP_REG_1], imm); + PPC_LI32(tmp1_reg, imm); /* ... but unsigned comparison */ if (is_jmp32) - EMIT(PPC_RAW_CMPLW(dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_CMPLW(dst_reg, tmp1_reg)); else - EMIT(PPC_RAW_CMPLD(dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_CMPLD(dst_reg, tmp1_reg)); } break; } @@ -1066,13 +1107,11 @@ cond_branch: else EMIT(PPC_RAW_CMPDI(dst_reg, imm)); } else { - PPC_LI32(b2p[TMP_REG_1], imm); + PPC_LI32(tmp1_reg, imm); if (is_jmp32) - EMIT(PPC_RAW_CMPW(dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_CMPW(dst_reg, tmp1_reg)); else - EMIT(PPC_RAW_CMPD(dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_CMPD(dst_reg, tmp1_reg)); } break; } @@ -1081,19 +1120,16 @@ cond_branch: /* andi does not sign-extend the immediate */ if (imm >= 0 && imm < 32768) /* PPC_ANDI is _only/always_ dot-form */ - EMIT(PPC_RAW_ANDI(b2p[TMP_REG_1], dst_reg, imm)); + EMIT(PPC_RAW_ANDI(tmp1_reg, dst_reg, imm)); else { - int tmp_reg = b2p[TMP_REG_1]; - - PPC_LI32(tmp_reg, imm); + PPC_LI32(tmp1_reg, imm); if (BPF_CLASS(code) == BPF_JMP) { - EMIT(PPC_RAW_AND_DOT(tmp_reg, dst_reg, - tmp_reg)); + EMIT(PPC_RAW_AND_DOT(tmp1_reg, dst_reg, + tmp1_reg)); } else { - EMIT(PPC_RAW_AND(tmp_reg, dst_reg, - tmp_reg)); - EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg, - 0, 0, 31)); + EMIT(PPC_RAW_AND(tmp1_reg, dst_reg, tmp1_reg)); + EMIT(PPC_RAW_RLWINM_DOT(tmp1_reg, tmp1_reg, + 0, 0, 31)); } } break; diff --git a/arch/powerpc/perf/callchain.h b/arch/powerpc/perf/callchain.h index d6fa6e25234f..19a8d051ddf1 100644 --- a/arch/powerpc/perf/callchain.h +++ b/arch/powerpc/perf/callchain.h @@ -2,7 +2,6 @@ #ifndef _POWERPC_PERF_CALLCHAIN_H #define _POWERPC_PERF_CALLCHAIN_H -int read_user_stack_slow(const void __user *ptr, void *buf, int nb); void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry, @@ -26,17 +25,11 @@ static inline int __read_user_stack(const void __user *ptr, void *ret, size_t size) { unsigned long addr = (unsigned long)ptr; - int rc; if (addr > TASK_SIZE - size || (addr & (size - 1))) return -EFAULT; - rc = copy_from_user_nofault(ret, ptr, size); - - if (IS_ENABLED(CONFIG_PPC64) && !radix_enabled() && rc) - return read_user_stack_slow(ptr, ret, size); - - return rc; + return copy_from_user_nofault(ret, ptr, size); } #endif /* _POWERPC_PERF_CALLCHAIN_H */ diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c index 8d0df4226328..488e8a21a11e 100644 --- a/arch/powerpc/perf/callchain_64.c +++ b/arch/powerpc/perf/callchain_64.c @@ -18,33 +18,6 @@ #include "callchain.h" -/* - * On 64-bit we don't want to invoke hash_page on user addresses from - * interrupt context, so if the access faults, we read the page tables - * to find which page (if any) is mapped and access it directly. Radix - * has no need for this so it doesn't use read_user_stack_slow. - */ -int read_user_stack_slow(const void __user *ptr, void *buf, int nb) -{ - - unsigned long addr = (unsigned long) ptr; - unsigned long offset; - struct page *page; - void *kaddr; - - if (get_user_page_fast_only(addr, FOLL_WRITE, &page)) { - kaddr = page_address(page); - - /* align address to page boundary */ - offset = addr & ~PAGE_MASK; - - memcpy(buf, kaddr + offset, nb); - put_page(page); - return 0; - } - return -EFAULT; -} - static int read_user_stack_64(const unsigned long __user *ptr, unsigned long *ret) { return __read_user_stack(ptr, ret, sizeof(*ret)); diff --git a/arch/powerpc/perf/generic-compat-pmu.c b/arch/powerpc/perf/generic-compat-pmu.c index b6e25f75109d..f3db88aee4dd 100644 --- a/arch/powerpc/perf/generic-compat-pmu.c +++ b/arch/powerpc/perf/generic-compat-pmu.c @@ -130,7 +130,7 @@ static struct attribute *generic_compat_events_attr[] = { NULL }; -static struct attribute_group generic_compat_pmu_events_group = { +static const struct attribute_group generic_compat_pmu_events_group = { .name = "events", .attrs = generic_compat_events_attr, }; @@ -146,7 +146,7 @@ static struct attribute *generic_compat_pmu_format_attr[] = { NULL, }; -static struct attribute_group generic_compat_pmu_format_group = { +static const struct attribute_group generic_compat_pmu_format_group = { .name = "format", .attrs = generic_compat_pmu_format_attr, }; diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 1e8aa934e37e..12c1777187fc 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -204,7 +204,7 @@ static struct attribute *format_attrs[] = { NULL, }; -static struct attribute_group format_group = { +static const struct attribute_group format_group = { .name = "format", .attrs = format_attrs, }; @@ -1148,7 +1148,7 @@ static struct attribute *cpumask_attrs[] = { NULL, }; -static struct attribute_group cpumask_attr_group = { +static const struct attribute_group cpumask_attr_group = { .attrs = cpumask_attrs, }; @@ -1162,7 +1162,7 @@ static struct attribute *if_attrs[] = { NULL, }; -static struct attribute_group if_group = { +static const struct attribute_group if_group = { .name = "interface", .bin_attrs = if_bin_attrs, .attrs = if_attrs, diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index c756228a081f..5eb60ed5b5e8 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -65,12 +65,12 @@ static struct attribute *format_attrs[] = { NULL, }; -static struct attribute_group format_group = { +static const struct attribute_group format_group = { .name = "format", .attrs = format_attrs, }; -static struct attribute_group event_group = { +static const struct attribute_group event_group = { .name = "events", .attrs = hv_gpci_event_attrs, }; @@ -126,11 +126,11 @@ static struct attribute *cpumask_attrs[] = { NULL, }; -static struct attribute_group cpumask_attr_group = { +static const struct attribute_group cpumask_attr_group = { .attrs = cpumask_attrs, }; -static struct attribute_group interface_group = { +static const struct attribute_group interface_group = { .name = "interface", .attrs = interface_attrs, }; diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index e106909ff9c3..526d4b767534 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -71,7 +71,7 @@ static struct attribute *imc_format_attrs[] = { NULL, }; -static struct attribute_group imc_format_group = { +static const struct attribute_group imc_format_group = { .name = "format", .attrs = imc_format_attrs, }; @@ -90,7 +90,7 @@ static struct attribute *trace_imc_format_attrs[] = { NULL, }; -static struct attribute_group trace_imc_format_group = { +static const struct attribute_group trace_imc_format_group = { .name = "format", .attrs = trace_imc_format_attrs, }; @@ -125,7 +125,7 @@ static struct attribute *imc_pmu_cpumask_attrs[] = { NULL, }; -static struct attribute_group imc_pmu_cpumask_attr_group = { +static const struct attribute_group imc_pmu_cpumask_attr_group = { .attrs = imc_pmu_cpumask_attrs, }; @@ -1457,7 +1457,11 @@ static int trace_imc_event_init(struct perf_event *event) event->hw.idx = -1; - event->pmu->task_ctx_nr = perf_hw_context; + /* + * There can only be a single PMU for perf_hw_context events which is assigned to + * core PMU. Hence use "perf_sw_context" for trace_imc. + */ + event->pmu->task_ctx_nr = perf_sw_context; event->destroy = reset_global_refc; return 0; } diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 4037ea652522..a74d382ecbb7 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -37,7 +37,7 @@ static struct attribute *isa207_pmu_format_attr[] = { NULL, }; -struct attribute_group isa207_pmu_format_group = { +const struct attribute_group isa207_pmu_format_group = { .name = "format", .attrs = isa207_pmu_format_attr, }; diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index 51d31b65e423..350dccb0143c 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c @@ -134,12 +134,10 @@ int perf_reg_validate(u64 mask) u64 perf_reg_abi(struct task_struct *task) { -#ifdef CONFIG_PPC64 - if (!test_tsk_thread_flag(task, TIF_32BIT)) - return PERF_SAMPLE_REGS_ABI_64; + if (is_tsk_32bit_task(task)) + return PERF_SAMPLE_REGS_ABI_32; else -#endif - return PERF_SAMPLE_REGS_ABI_32; + return PERF_SAMPLE_REGS_ABI_64; } void perf_get_regs_user(struct perf_regs *regs_user, diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index 0975ad0b42c4..d3398100a60f 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -200,12 +200,12 @@ static struct attribute *power10_events_attr[] = { NULL }; -static struct attribute_group power10_pmu_events_group_dd1 = { +static const struct attribute_group power10_pmu_events_group_dd1 = { .name = "events", .attrs = power10_events_attr_dd1, }; -static struct attribute_group power10_pmu_events_group = { +static const struct attribute_group power10_pmu_events_group = { .name = "events", .attrs = power10_events_attr, }; @@ -253,7 +253,7 @@ static struct attribute *power10_pmu_format_attr[] = { NULL, }; -static struct attribute_group power10_pmu_format_group = { +static const struct attribute_group power10_pmu_format_group = { .name = "format", .attrs = power10_pmu_format_attr, }; diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index 99b5ba314ea7..a74211410b8d 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -405,7 +405,7 @@ static struct attribute *power7_events_attr[] = { NULL }; -static struct attribute_group power7_pmu_events_group = { +static const struct attribute_group power7_pmu_events_group = { .name = "events", .attrs = power7_events_attr, }; @@ -417,7 +417,7 @@ static struct attribute *power7_pmu_format_attr[] = { NULL, }; -static struct attribute_group power7_pmu_format_group = { +static const struct attribute_group power7_pmu_format_group = { .name = "format", .attrs = power7_pmu_format_attr, }; diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index f21194b5604a..e37b1e714d2b 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -92,7 +92,7 @@ enum { */ /* PowerISA v2.07 format attribute structure*/ -extern struct attribute_group isa207_pmu_format_group; +extern const struct attribute_group isa207_pmu_format_group; /* Table of alternatives, sorted by column 0 */ static const unsigned int event_alternatives[][MAX_ALT] = { @@ -182,7 +182,7 @@ static struct attribute *power8_events_attr[] = { NULL }; -static struct attribute_group power8_pmu_events_group = { +static const struct attribute_group power8_pmu_events_group = { .name = "events", .attrs = power8_events_attr, }; diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 4b7c17e36100..c9eb5232e68b 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -96,7 +96,7 @@ extern u64 PERF_REG_EXTENDED_MASK; #define PVR_POWER9_CUMULUS 0x00002000 /* PowerISA v2.07 format attribute structure*/ -extern struct attribute_group isa207_pmu_format_group; +extern const struct attribute_group isa207_pmu_format_group; int p9_dd21_bl_ev[] = { PM_MRK_ST_DONE_L2, @@ -217,7 +217,7 @@ static struct attribute *power9_events_attr[] = { NULL }; -static struct attribute_group power9_pmu_events_group = { +static const struct attribute_group power9_pmu_events_group = { .name = "events", .attrs = power9_events_attr, }; @@ -253,7 +253,7 @@ static struct attribute *power9_pmu_format_attr[] = { NULL, }; -static struct attribute_group power9_pmu_format_group = { +static const struct attribute_group power9_pmu_format_group = { .name = "format", .attrs = power9_pmu_format_attr, }; diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index 8d6029099848..17ae75d62518 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -37,7 +37,7 @@ void __init corenet_gen_pic_init(void) unsigned int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU | MPIC_NO_RESET; - if (ppc_md.get_irq == mpic_get_coreint_irq) + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) && !IS_ENABLED(CONFIG_KEXEC_CORE)) flags |= MPIC_ENABLE_COREINT; mpic = mpic_alloc(NULL, 0, flags, 0, 512, " OpenPIC "); diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c index a4127b0b161f..4c4d577effd9 100644 --- a/arch/powerpc/platforms/85xx/qemu_e500.c +++ b/arch/powerpc/platforms/85xx/qemu_e500.c @@ -67,4 +67,9 @@ define_machine(qemu_e500) { .get_irq = mpic_get_coreint_irq, .calibrate_decr = generic_calibrate_decr, .progress = udbg_progress, +#ifdef CONFIG_PPC64 + .power_save = book3e_idle, +#else + .power_save = e500_idle, +#endif }; diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c index b697918b727d..a6b8ffcbf01a 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c @@ -95,12 +95,6 @@ static int __init mpc86xx_hpcn_probe(void) if (of_machine_is_compatible("fsl,mpc8641hpcn")) return 1; /* Looks good */ - /* Be nice and don't give silent boot death. Delete this in 2.6.27 */ - if (of_machine_is_compatible("mpc86xx")) { - pr_warn("WARNING: your dts/dtb is old. You must update before the next kernel release.\n"); - return 1; - } - return 0; } diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c index f2ba837249d6..04a6abf14c29 100644 --- a/arch/powerpc/platforms/8xx/pic.c +++ b/arch/powerpc/platforms/8xx/pic.c @@ -153,6 +153,7 @@ int __init mpc8xx_pic_init(void) if (mpc8xx_pic_host == NULL) { printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n"); ret = -ENOMEM; + goto out; } ret = 0; diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 87bc1929ee5a..e2e1fec91c6e 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -107,6 +107,7 @@ config PPC_BOOK3S_64 config PPC_BOOK3E_64 bool "Embedded processors" + select PPC_FSL_BOOK3E select PPC_FPU # Make it a choice ? select PPC_SMP_MUXED_IPI select PPC_DOORBELL @@ -295,7 +296,7 @@ config FSL_BOOKE config PPC_FSL_BOOK3E bool select ARCH_SUPPORTS_HUGETLBFS if PHYS_64BIT || PPC64 - select FSL_EMB_PERFMON + imply FSL_EMB_PERFMON select PPC_SMP_MUXED_IPI select PPC_DOORBELL select PPC_KUEP diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index 4d82c92ddd52..f9a1615b74da 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -316,6 +316,7 @@ static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg) return PTR_ERR(txwin); } + mutex_init(&txwin->task_ref.mmap_mutex); cp_inst->txwin = txwin; return 0; @@ -350,6 +351,124 @@ static int coproc_release(struct inode *inode, struct file *fp) return 0; } +/* + * If the executed instruction that caused the fault was a paste, then + * clear regs CR0[EQ], advance NIP, and return 0. Else return error code. + */ +static int do_fail_paste(void) +{ + struct pt_regs *regs = current->thread.regs; + u32 instword; + + if (WARN_ON_ONCE(!regs)) + return -EINVAL; + + if (WARN_ON_ONCE(!user_mode(regs))) + return -EINVAL; + + /* + * If we couldn't translate the instruction, the driver should + * return success without handling the fault, it will be retried + * or the instruction fetch will fault. + */ + if (get_user(instword, (u32 __user *)(regs->nip))) + return -EAGAIN; + + /* + * Not a paste instruction, driver may fail the fault. + */ + if ((instword & PPC_INST_PASTE_MASK) != PPC_INST_PASTE) + return -ENOENT; + + regs->ccr &= ~0xe0000000; /* Clear CR0[0-2] to fail paste */ + regs_add_return_ip(regs, 4); /* Emulate the paste */ + + return 0; +} + +/* + * This fault handler is invoked when the core generates page fault on + * the paste address. Happens if the kernel closes window in hypervisor + * (on pseries) due to lost credit or the paste address is not mapped. + */ +static vm_fault_t vas_mmap_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct file *fp = vma->vm_file; + struct coproc_instance *cp_inst = fp->private_data; + struct vas_window *txwin; + vm_fault_t fault; + u64 paste_addr; + int ret; + + /* + * window is not opened. Shouldn't expect this error. + */ + if (!cp_inst || !cp_inst->txwin) { + pr_err("%s(): Unexpected fault on paste address with TX window closed\n", + __func__); + return VM_FAULT_SIGBUS; + } + + txwin = cp_inst->txwin; + /* + * When the LPAR lost credits due to core removal or during + * migration, invalidate the existing mapping for the current + * paste addresses and set windows in-active (zap_page_range in + * reconfig_close_windows()). + * New mapping will be done later after migration or new credits + * available. So continue to receive faults if the user space + * issue NX request. + */ + if (txwin->task_ref.vma != vmf->vma) { + pr_err("%s(): No previous mapping with paste address\n", + __func__); + return VM_FAULT_SIGBUS; + } + + mutex_lock(&txwin->task_ref.mmap_mutex); + /* + * The window may be inactive due to lost credit (Ex: core + * removal with DLPAR). If the window is active again when + * the credit is available, map the new paste address at the + * the window virtual address. + */ + if (txwin->status == VAS_WIN_ACTIVE) { + paste_addr = cp_inst->coproc->vops->paste_addr(txwin); + if (paste_addr) { + fault = vmf_insert_pfn(vma, vma->vm_start, + (paste_addr >> PAGE_SHIFT)); + mutex_unlock(&txwin->task_ref.mmap_mutex); + return fault; + } + } + mutex_unlock(&txwin->task_ref.mmap_mutex); + + /* + * Received this fault due to closing the actual window. + * It can happen during migration or lost credits. + * Since no mapping, return the paste instruction failure + * to the user space. + */ + ret = do_fail_paste(); + /* + * The user space can retry several times until success (needed + * for migration) or should fallback to SW compression or + * manage with the existing open windows if available. + * Looking at sysfs interface, it can determine whether these + * failures are coming during migration or core removal: + * nr_used_credits > nr_total_credits when lost credits + */ + if (!ret || (ret == -EAGAIN)) + return VM_FAULT_NOPAGE; + + return VM_FAULT_SIGBUS; +} + +static const struct vm_operations_struct vas_vm_ops = { + .fault = vas_mmap_fault, +}; + static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) { struct coproc_instance *cp_inst = fp->private_data; @@ -378,10 +497,29 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) return -EACCES; } + /* + * The initial mmap is done after the window is opened + * with ioctl. But before mmap(), this window can be closed in + * the hypervisor due to lost credit (core removal on pseries). + * So if the window is not active, return mmap() failure with + * -EACCES and expects the user space reissue mmap() when it + * is active again or open new window when the credit is available. + * mmap_mutex protects the paste address mmap() with DLPAR + * close/open event and allows mmap() only when the window is + * active. + */ + mutex_lock(&txwin->task_ref.mmap_mutex); + if (txwin->status != VAS_WIN_ACTIVE) { + pr_err("%s(): Window is not active\n", __func__); + rc = -EACCES; + goto out; + } + paste_addr = cp_inst->coproc->vops->paste_addr(txwin); if (!paste_addr) { pr_err("%s(): Window paste address failed\n", __func__); - return -EINVAL; + rc = -EINVAL; + goto out; } pfn = paste_addr >> PAGE_SHIFT; @@ -398,6 +536,11 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) pr_devel("%s(): paste addr %llx at %lx, rc %d\n", __func__, paste_addr, vma->vm_start, rc); + txwin->task_ref.vma = vma; + vma->vm_ops = &vas_vm_ops; + +out: + mutex_unlock(&txwin->task_ref.mmap_mutex); return rc; } diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c index 2ece77f49bc3..abb5e527b4db 100644 --- a/arch/powerpc/platforms/cell/cbe_thermal.c +++ b/arch/powerpc/platforms/cell/cbe_thermal.c @@ -255,7 +255,7 @@ static struct attribute *spu_attributes[] = { NULL, }; -static struct attribute_group spu_attribute_group = { +static const struct attribute_group spu_attribute_group = { .name = "thermal", .attrs = spu_attributes, }; diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index 83cea9e7ee72..2eecba3345c3 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -490,7 +490,7 @@ int spu_add_dev_attr(struct device_attribute *attr) } EXPORT_SYMBOL_GPL(spu_add_dev_attr); -int spu_add_dev_attr_group(struct attribute_group *attrs) +int spu_add_dev_attr_group(const struct attribute_group *attrs) { struct spu *spu; int rc = 0; @@ -529,7 +529,7 @@ void spu_remove_dev_attr(struct device_attribute *attr) } EXPORT_SYMBOL_GPL(spu_remove_dev_attr); -void spu_remove_dev_attr_group(struct attribute_group *attrs) +void spu_remove_dev_attr_group(const struct attribute_group *attrs) { struct spu *spu; diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 369206489895..99bd027a7f7c 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -340,8 +340,7 @@ static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff, static void aff_set_ref_point_location(struct spu_gang *gang) { int mem_aff, gs, lowest_offset; - struct spu_context *ctx; - struct spu *tmp; + struct spu_context *tmp, *ctx; mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM; lowest_offset = 0; @@ -1053,6 +1052,7 @@ void spuctx_switch_state(struct spu_context *ctx, } } +#ifdef CONFIG_PROC_FS static int show_spu_loadavg(struct seq_file *s, void *private) { int a, b, c; @@ -1074,7 +1074,8 @@ static int show_spu_loadavg(struct seq_file *s, void *private) atomic_read(&nr_spu_contexts), idr_get_cursor(&task_active_pid_ns(current)->idr) - 1); return 0; -}; +} +#endif int __init spu_sched_init(void) { diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h index 29d2036dcc9d..ba8d4e97095b 100644 --- a/arch/powerpc/platforms/powermac/pmac.h +++ b/arch/powerpc/platforms/powermac/pmac.h @@ -5,6 +5,8 @@ #include #include +#include + /* * Declaration for the various functions exported by the * pmac_* files. Mostly for use by pmac_setup diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 9942289f379b..a6677a111aca 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -12,7 +12,6 @@ #include #include -#include #include #include #include diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c index 0331f1973f0e..b97bc179f65a 100644 --- a/arch/powerpc/platforms/powernv/opal-core.c +++ b/arch/powerpc/platforms/powernv/opal-core.c @@ -603,7 +603,7 @@ static struct bin_attribute *mpipl_bin_attr[] = { }; -static struct attribute_group mpipl_group = { +static const struct attribute_group mpipl_group = { .attrs = mpipl_attr, .bin_attrs = mpipl_bin_attr, }; diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 410ed5b9de29..16c5860f1372 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -150,7 +150,7 @@ static struct attribute *initiate_attrs[] = { NULL, }; -static struct attribute_group initiate_attr_group = { +static const struct attribute_group initiate_attr_group = { .attrs = initiate_attrs, }; diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c index 7e7d38b17420..18481a8c52fa 100644 --- a/arch/powerpc/platforms/powernv/opal-flash.c +++ b/arch/powerpc/platforms/powernv/opal-flash.c @@ -512,7 +512,7 @@ static struct attribute *image_op_attrs[] = { NULL /* need to NULL terminate the list of attributes */ }; -static struct attribute_group image_op_attr_group = { +static const struct attribute_group image_op_attr_group = { .attrs = image_op_attrs, }; diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c index f16a43540e30..91b36541b9e5 100644 --- a/arch/powerpc/platforms/powernv/opal-tracepoints.c +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c @@ -2,7 +2,6 @@ #include #include #include -#include #ifdef CONFIG_JUMP_LABEL struct static_key opal_tracepoint_key = STATIC_KEY_INIT; diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index b4386714494a..e3d44b36ae98 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -43,7 +43,11 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val) unsigned long parity; /* Calculate the parity of the value */ - asm ("popcntd %0,%1" : "=r" (parity) : "r" (val)); + asm (".machine push; \ + .machine power7; \ + popcntd %0,%1; \ + .machine pop;" + : "=r" (parity) : "r" (val)); /* xor our value with the previous mask */ val ^= rng->mask; diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index c8b50fec56bf..b637bf292047 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -603,7 +603,7 @@ static dma_addr_t ps3_ioc0_map_page(struct device *_dev, struct page *page, default: /* not happned */ BUG(); - }; + } result = ps3_dma_map(dev->d_region, (unsigned long)ptr, size, &bus_addr, iopte_flag); @@ -762,7 +762,7 @@ int ps3_system_bus_device_register(struct ps3_system_bus_device *dev) break; default: BUG(); - }; + } dev->core.of_node = NULL; set_dev_node(&dev->core, 0); diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index ee60b59024b4..9764e1a2ed5c 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -6,7 +6,8 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \ of_helpers.o \ setup.o iommu.o event_sources.o ras.o \ firmware.o power.o dlpar.o mobility.o rng.o \ - pci.o pci_dlpar.o eeh_pseries.o msi.o + pci.o pci_dlpar.o eeh_pseries.o msi.o \ + papr_platform_attributes.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_KEXEC_CORE) += kexec.o obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o @@ -29,6 +30,6 @@ obj-$(CONFIG_PPC_SVM) += svm.o obj-$(CONFIG_FA_DUMP) += rtas-fadump.o obj-$(CONFIG_SUSPEND) += suspend.o -obj-$(CONFIG_PPC_VAS) += vas.o +obj-$(CONFIG_PPC_VAS) += vas.o vas-sysfs.o obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += cc_platform.o diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c index f162156b7b68..09c119b2f623 100644 --- a/arch/powerpc/platforms/pseries/firmware.c +++ b/arch/powerpc/platforms/pseries/firmware.c @@ -66,6 +66,7 @@ hypertas_fw_features_table[] = { {FW_FEATURE_BLOCK_REMOVE, "hcall-block-remove"}, {FW_FEATURE_PAPR_SCM, "hcall-scm"}, {FW_FEATURE_RPT_INVALIDATE, "hcall-rpt-invalidate"}, + {FW_FEATURE_ENERGY_SCALE_INFO, "hcall-energy-scale-info"}, }; /* Build up the firmware features bitmask using the contents of diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index f8899d506ea4..760581c5752f 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include "pseries.h" diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index c7940fcfc911..2119c003fcf9 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -311,6 +311,92 @@ static void parse_mpp_x_data(struct seq_file *m) seq_printf(m, "coalesce_pool_spurr=%ld\n", mpp_x_data.pool_spurr_cycles); } +/* + * PAPR defines, in section "7.3.16 System Parameters Option", the token 55 to + * read the LPAR name, and the largest output data to 4000 + 2 bytes length. + */ +#define SPLPAR_LPAR_NAME_TOKEN 55 +#define GET_SYS_PARM_BUF_SIZE 4002 +#if GET_SYS_PARM_BUF_SIZE > RTAS_DATA_BUF_SIZE +#error "GET_SYS_PARM_BUF_SIZE is larger than RTAS_DATA_BUF_SIZE" +#endif + +/* + * Read the lpar name using the RTAS ibm,get-system-parameter call. + * + * The name read through this call is updated if changes are made by the end + * user on the hypervisor side. + * + * Some hypervisor (like Qemu) may not provide this value. In that case, a non + * null value is returned. + */ +static int read_rtas_lpar_name(struct seq_file *m) +{ + int rc, len, token; + union { + char raw_buffer[GET_SYS_PARM_BUF_SIZE]; + struct { + __be16 len; + char name[GET_SYS_PARM_BUF_SIZE-2]; + }; + } *local_buffer; + + token = rtas_token("ibm,get-system-parameter"); + if (token == RTAS_UNKNOWN_SERVICE) + return -EINVAL; + + local_buffer = kmalloc(sizeof(*local_buffer), GFP_KERNEL); + if (!local_buffer) + return -ENOMEM; + + do { + spin_lock(&rtas_data_buf_lock); + memset(rtas_data_buf, 0, sizeof(*local_buffer)); + rc = rtas_call(token, 3, 1, NULL, SPLPAR_LPAR_NAME_TOKEN, + __pa(rtas_data_buf), sizeof(*local_buffer)); + if (!rc) + memcpy(local_buffer->raw_buffer, rtas_data_buf, + sizeof(local_buffer->raw_buffer)); + spin_unlock(&rtas_data_buf_lock); + } while (rtas_busy_delay(rc)); + + if (!rc) { + /* Force end of string */ + len = min((int) be16_to_cpu(local_buffer->len), + (int) sizeof(local_buffer->name)-1); + local_buffer->name[len] = '\0'; + + seq_printf(m, "partition_name=%s\n", local_buffer->name); + } else + rc = -ENODATA; + + kfree(local_buffer); + return rc; +} + +/* + * Read the LPAR name from the Device Tree. + * + * The value read in the DT is not updated if the end-user is touching the LPAR + * name on the hypervisor side. + */ +static int read_dt_lpar_name(struct seq_file *m) +{ + const char *name; + + if (of_property_read_string(of_root, "ibm,partition-name", &name)) + return -ENOENT; + + seq_printf(m, "partition_name=%s\n", name); + return 0; +} + +static void read_lpar_name(struct seq_file *m) +{ + if (read_rtas_lpar_name(m) && read_dt_lpar_name(m)) + pr_err_once("Error can't get the LPAR name"); +} + #define SPLPAR_CHARACTERISTICS_TOKEN 20 #define SPLPAR_MAXLENGTH 1026*(sizeof(char)) @@ -496,6 +582,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) if (firmware_has_feature(FW_FEATURE_SPLPAR)) { /* this call handles the ibm,get-system-parameter contents */ + read_lpar_name(m); parse_system_parameter_string(m); parse_ppp_data(m); parse_mpp_data(m); diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 85033f392c78..78f3f74c7056 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -26,6 +26,7 @@ #include #include #include "pseries.h" +#include "vas.h" /* vas_migration_handler() */ #include "../../kernel/cacheinfo.h" static struct kobject *mobility_kobj; @@ -265,7 +266,7 @@ static int add_dt_node(struct device_node *parent_dn, __be32 drc_index) return rc; } -int pseries_devicetree_update(s32 scope) +static int pseries_devicetree_update(s32 scope) { char *rtas_buf; __be32 *data; @@ -669,12 +670,16 @@ static int pseries_migrate_partition(u64 handle) if (ret) return ret; + vas_migration_handler(VAS_SUSPEND); + ret = pseries_suspend(handle); if (ret == 0) post_mobility_fixup(); else pseries_cancel_migration(handle, ret); + vas_migration_handler(VAS_RESUME); + return ret; } diff --git a/arch/powerpc/platforms/pseries/papr_platform_attributes.c b/arch/powerpc/platforms/pseries/papr_platform_attributes.c new file mode 100644 index 000000000000..515150417bb3 --- /dev/null +++ b/arch/powerpc/platforms/pseries/papr_platform_attributes.c @@ -0,0 +1,361 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Platform energy and frequency attributes driver + * + * This driver creates a sys file at /sys/firmware/papr/ which encapsulates a + * directory structure containing files in keyword - value pairs that specify + * energy and frequency configuration of the system. + * + * The format of exposing the sysfs information is as follows: + * /sys/firmware/papr/energy_scale_info/ + * |-- / + * |-- desc + * |-- value + * |-- value_desc (if exists) + * |-- / + * |-- desc + * |-- value + * |-- value_desc (if exists) + * + * Copyright 2022 IBM Corp. + */ + +#include +#include + +#include "pseries.h" + +/* + * Flag attributes to fetch either all or one attribute from the HCALL + * flag = BE(0) => fetch all attributes with firstAttributeId = 0 + * flag = BE(1) => fetch a single attribute with firstAttributeId = id + */ +#define ESI_FLAGS_ALL 0 +#define ESI_FLAGS_SINGLE (1ull << 63) + +#define KOBJ_MAX_ATTRS 3 + +#define ESI_HDR_SIZE sizeof(struct h_energy_scale_info_hdr) +#define ESI_ATTR_SIZE sizeof(struct energy_scale_attribute) +#define CURR_MAX_ESI_ATTRS 8 + +struct energy_scale_attribute { + __be64 id; + __be64 val; + u8 desc[64]; + u8 value_desc[64]; +} __packed; + +struct h_energy_scale_info_hdr { + __be64 num_attrs; + __be64 array_offset; + u8 data_header_version; +} __packed; + +struct papr_attr { + u64 id; + struct kobj_attribute kobj_attr; +}; + +struct papr_group { + struct attribute_group pg; + struct papr_attr pgattrs[KOBJ_MAX_ATTRS]; +}; + +static struct papr_group *papr_groups; +/* /sys/firmware/papr */ +static struct kobject *papr_kobj; +/* /sys/firmware/papr/energy_scale_info */ +static struct kobject *esi_kobj; + +/* + * Energy modes can change dynamically hence making a new hcall each time the + * information needs to be retrieved + */ +static int papr_get_attr(u64 id, struct energy_scale_attribute *esi) +{ + int esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * ESI_ATTR_SIZE); + int ret, max_esi_attrs = CURR_MAX_ESI_ATTRS; + struct energy_scale_attribute *curr_esi; + struct h_energy_scale_info_hdr *hdr; + char *buf; + + buf = kmalloc(esi_buf_size, GFP_KERNEL); + if (buf == NULL) + return -ENOMEM; + +retry: + ret = plpar_hcall_norets(H_GET_ENERGY_SCALE_INFO, ESI_FLAGS_SINGLE, + id, virt_to_phys(buf), + esi_buf_size); + + /* + * If the hcall fails with not enough memory for either the + * header or data, attempt to allocate more + */ + if (ret == H_PARTIAL || ret == H_P4) { + char *temp_buf; + + max_esi_attrs += 4; + esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * max_esi_attrs); + + temp_buf = krealloc(buf, esi_buf_size, GFP_KERNEL); + if (temp_buf) + buf = temp_buf; + else + return -ENOMEM; + + goto retry; + } + + if (ret != H_SUCCESS) { + pr_warn("hcall failed: H_GET_ENERGY_SCALE_INFO"); + ret = -EIO; + goto out_buf; + } + + hdr = (struct h_energy_scale_info_hdr *) buf; + curr_esi = (struct energy_scale_attribute *) + (buf + be64_to_cpu(hdr->array_offset)); + + if (esi_buf_size < + be64_to_cpu(hdr->array_offset) + (be64_to_cpu(hdr->num_attrs) + * sizeof(struct energy_scale_attribute))) { + ret = -EIO; + goto out_buf; + } + + *esi = *curr_esi; + +out_buf: + kfree(buf); + + return ret; +} + +/* + * Extract and export the description of the energy scale attributes + */ +static ssize_t desc_show(struct kobject *kobj, + struct kobj_attribute *kobj_attr, + char *buf) +{ + struct papr_attr *pattr = container_of(kobj_attr, struct papr_attr, + kobj_attr); + struct energy_scale_attribute esi; + int ret; + + ret = papr_get_attr(pattr->id, &esi); + if (ret) + return ret; + + return sysfs_emit(buf, "%s\n", esi.desc); +} + +/* + * Extract and export the numeric value of the energy scale attributes + */ +static ssize_t val_show(struct kobject *kobj, + struct kobj_attribute *kobj_attr, + char *buf) +{ + struct papr_attr *pattr = container_of(kobj_attr, struct papr_attr, + kobj_attr); + struct energy_scale_attribute esi; + int ret; + + ret = papr_get_attr(pattr->id, &esi); + if (ret) + return ret; + + return sysfs_emit(buf, "%llu\n", be64_to_cpu(esi.val)); +} + +/* + * Extract and export the value description in string format of the energy + * scale attributes + */ +static ssize_t val_desc_show(struct kobject *kobj, + struct kobj_attribute *kobj_attr, + char *buf) +{ + struct papr_attr *pattr = container_of(kobj_attr, struct papr_attr, + kobj_attr); + struct energy_scale_attribute esi; + int ret; + + ret = papr_get_attr(pattr->id, &esi); + if (ret) + return ret; + + return sysfs_emit(buf, "%s\n", esi.value_desc); +} + +static struct papr_ops_info { + const char *attr_name; + ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *kobj_attr, + char *buf); +} ops_info[KOBJ_MAX_ATTRS] = { + { "desc", desc_show }, + { "value", val_show }, + { "value_desc", val_desc_show }, +}; + +static void add_attr(u64 id, int index, struct papr_attr *attr) +{ + attr->id = id; + sysfs_attr_init(&attr->kobj_attr.attr); + attr->kobj_attr.attr.name = ops_info[index].attr_name; + attr->kobj_attr.attr.mode = 0444; + attr->kobj_attr.show = ops_info[index].show; +} + +static int add_attr_group(u64 id, struct papr_group *pg, bool show_val_desc) +{ + int i; + + for (i = 0; i < KOBJ_MAX_ATTRS; i++) { + if (!strcmp(ops_info[i].attr_name, "value_desc") && + !show_val_desc) { + continue; + } + add_attr(id, i, &pg->pgattrs[i]); + pg->pg.attrs[i] = &pg->pgattrs[i].kobj_attr.attr; + } + + return sysfs_create_group(esi_kobj, &pg->pg); +} + + +static int __init papr_init(void) +{ + int esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * ESI_ATTR_SIZE); + int ret, idx, i, max_esi_attrs = CURR_MAX_ESI_ATTRS; + struct h_energy_scale_info_hdr *esi_hdr; + struct energy_scale_attribute *esi_attrs; + uint64_t num_attrs; + char *esi_buf; + + if (!firmware_has_feature(FW_FEATURE_LPAR) || + !firmware_has_feature(FW_FEATURE_ENERGY_SCALE_INFO)) { + return -ENXIO; + } + + esi_buf = kmalloc(esi_buf_size, GFP_KERNEL); + if (esi_buf == NULL) + return -ENOMEM; + /* + * hcall( + * uint64 H_GET_ENERGY_SCALE_INFO, // Get energy scale info + * uint64 flags, // Per the flag request + * uint64 firstAttributeId, // The attribute id + * uint64 bufferAddress, // Guest physical address of the output buffer + * uint64 bufferSize); // The size in bytes of the output buffer + */ +retry: + + ret = plpar_hcall_norets(H_GET_ENERGY_SCALE_INFO, ESI_FLAGS_ALL, 0, + virt_to_phys(esi_buf), esi_buf_size); + + /* + * If the hcall fails with not enough memory for either the + * header or data, attempt to allocate more + */ + if (ret == H_PARTIAL || ret == H_P4) { + char *temp_esi_buf; + + max_esi_attrs += 4; + esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * max_esi_attrs); + + temp_esi_buf = krealloc(esi_buf, esi_buf_size, GFP_KERNEL); + if (temp_esi_buf) + esi_buf = temp_esi_buf; + else + return -ENOMEM; + + goto retry; + } + + if (ret != H_SUCCESS) { + pr_warn("hcall failed: H_GET_ENERGY_SCALE_INFO, ret: %d\n", ret); + goto out_free_esi_buf; + } + + esi_hdr = (struct h_energy_scale_info_hdr *) esi_buf; + num_attrs = be64_to_cpu(esi_hdr->num_attrs); + esi_attrs = (struct energy_scale_attribute *) + (esi_buf + be64_to_cpu(esi_hdr->array_offset)); + + if (esi_buf_size < + be64_to_cpu(esi_hdr->array_offset) + + (num_attrs * sizeof(struct energy_scale_attribute))) { + goto out_free_esi_buf; + } + + papr_groups = kcalloc(num_attrs, sizeof(*papr_groups), GFP_KERNEL); + if (!papr_groups) + goto out_free_esi_buf; + + papr_kobj = kobject_create_and_add("papr", firmware_kobj); + if (!papr_kobj) { + pr_warn("kobject_create_and_add papr failed\n"); + goto out_papr_groups; + } + + esi_kobj = kobject_create_and_add("energy_scale_info", papr_kobj); + if (!esi_kobj) { + pr_warn("kobject_create_and_add energy_scale_info failed\n"); + goto out_kobj; + } + + /* Allocate the groups before registering */ + for (idx = 0; idx < num_attrs; idx++) { + papr_groups[idx].pg.attrs = kcalloc(KOBJ_MAX_ATTRS + 1, + sizeof(*papr_groups[idx].pg.attrs), + GFP_KERNEL); + if (!papr_groups[idx].pg.attrs) + goto out_pgattrs; + + papr_groups[idx].pg.name = kasprintf(GFP_KERNEL, "%lld", + be64_to_cpu(esi_attrs[idx].id)); + if (papr_groups[idx].pg.name == NULL) + goto out_pgattrs; + } + + for (idx = 0; idx < num_attrs; idx++) { + bool show_val_desc = true; + + /* Do not add the value desc attr if it does not exist */ + if (strnlen(esi_attrs[idx].value_desc, + sizeof(esi_attrs[idx].value_desc)) == 0) + show_val_desc = false; + + if (add_attr_group(be64_to_cpu(esi_attrs[idx].id), + &papr_groups[idx], + show_val_desc)) { + pr_warn("Failed to create papr attribute group %s\n", + papr_groups[idx].pg.name); + idx = num_attrs; + goto out_pgattrs; + } + } + + kfree(esi_buf); + return 0; +out_pgattrs: + for (i = 0; i < idx ; i++) { + kfree(papr_groups[i].pg.attrs); + kfree(papr_groups[i].pg.name); + } + kobject_put(esi_kobj); +out_kobj: + kobject_put(papr_kobj); +out_papr_groups: + kfree(papr_groups); +out_free_esi_buf: + kfree(esi_buf); + + return -ENOMEM; +} + +machine_device_initcall(pseries, papr_init); diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index f48e87ac89c9..1238b94b3cc1 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -120,6 +120,10 @@ struct papr_scm_priv { /* length of the stat buffer as expected by phyp */ size_t stat_buffer_len; + + /* The bits which needs to be overridden */ + u64 health_bitmap_inject_mask; + }; static int papr_scm_pmem_flush(struct nd_region *nd_region, @@ -347,19 +351,29 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p, static int __drc_pmem_query_health(struct papr_scm_priv *p) { unsigned long ret[PLPAR_HCALL_BUFSIZE]; + u64 bitmap = 0; long rc; /* issue the hcall */ rc = plpar_hcall(H_SCM_HEALTH, ret, p->drc_index); - if (rc != H_SUCCESS) { + if (rc == H_SUCCESS) + bitmap = ret[0] & ret[1]; + else if (rc == H_FUNCTION) + dev_info_once(&p->pdev->dev, + "Hcall H_SCM_HEALTH not implemented, assuming empty health bitmap"); + else { + dev_err(&p->pdev->dev, "Failed to query health information, Err:%ld\n", rc); return -ENXIO; } p->lasthealth_jiffies = jiffies; - p->health_bitmap = ret[0] & ret[1]; - + /* Allow injecting specific health bits via inject mask. */ + if (p->health_bitmap_inject_mask) + bitmap = (bitmap & ~p->health_bitmap_inject_mask) | + p->health_bitmap_inject_mask; + WRITE_ONCE(p->health_bitmap, bitmap); dev_dbg(&p->pdev->dev, "Queried dimm health info. Bitmap:0x%016lx Mask:0x%016lx\n", ret[0], ret[1]); @@ -669,6 +683,56 @@ out: return rc; } +/* Inject a smart error Add the dirty-shutdown-counter value to the pdsm */ +static int papr_pdsm_smart_inject(struct papr_scm_priv *p, + union nd_pdsm_payload *payload) +{ + int rc; + u32 supported_flags = 0; + u64 inject_mask = 0, clear_mask = 0; + u64 mask; + + /* Check for individual smart error flags and update inject/clear masks */ + if (payload->smart_inject.flags & PDSM_SMART_INJECT_HEALTH_FATAL) { + supported_flags |= PDSM_SMART_INJECT_HEALTH_FATAL; + if (payload->smart_inject.fatal_enable) + inject_mask |= PAPR_PMEM_HEALTH_FATAL; + else + clear_mask |= PAPR_PMEM_HEALTH_FATAL; + } + + if (payload->smart_inject.flags & PDSM_SMART_INJECT_BAD_SHUTDOWN) { + supported_flags |= PDSM_SMART_INJECT_BAD_SHUTDOWN; + if (payload->smart_inject.unsafe_shutdown_enable) + inject_mask |= PAPR_PMEM_SHUTDOWN_DIRTY; + else + clear_mask |= PAPR_PMEM_SHUTDOWN_DIRTY; + } + + dev_dbg(&p->pdev->dev, "[Smart-inject] inject_mask=%#llx clear_mask=%#llx\n", + inject_mask, clear_mask); + + /* Prevent concurrent access to dimm health bitmap related members */ + rc = mutex_lock_interruptible(&p->health_mutex); + if (rc) + return rc; + + /* Use inject/clear masks to set health_bitmap_inject_mask */ + mask = READ_ONCE(p->health_bitmap_inject_mask); + mask = (mask & ~clear_mask) | inject_mask; + WRITE_ONCE(p->health_bitmap_inject_mask, mask); + + /* Invalidate cached health bitmap */ + p->lasthealth_jiffies = 0; + + mutex_unlock(&p->health_mutex); + + /* Return the supported flags back to userspace */ + payload->smart_inject.flags = supported_flags; + + return sizeof(struct nd_papr_pdsm_health); +} + /* * 'struct pdsm_cmd_desc' * Identifies supported PDSMs' expected length of in/out payloads @@ -702,6 +766,12 @@ static const struct pdsm_cmd_desc __pdsm_cmd_descriptors[] = { .size_out = sizeof(struct nd_papr_pdsm_health), .service = papr_pdsm_health, }, + + [PAPR_PDSM_SMART_INJECT] = { + .size_in = sizeof(struct nd_papr_pdsm_smart_inject), + .size_out = sizeof(struct nd_papr_pdsm_smart_inject), + .service = papr_pdsm_smart_inject, + }, /* Empty */ [PAPR_PDSM_MAX] = { .size_in = 0, @@ -838,6 +908,19 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, return 0; } +static ssize_t health_bitmap_inject_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct nvdimm *dimm = to_nvdimm(dev); + struct papr_scm_priv *p = nvdimm_provider_data(dimm); + + return sprintf(buf, "%#llx\n", + READ_ONCE(p->health_bitmap_inject_mask)); +} + +static DEVICE_ATTR_ADMIN_RO(health_bitmap_inject); + static ssize_t perf_stats_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -952,10 +1035,11 @@ static struct attribute *papr_nd_attributes[] = { &dev_attr_flags.attr, &dev_attr_perf_stats.attr, &dev_attr_dirty_shutdown.attr, + &dev_attr_health_bitmap_inject.attr, NULL, }; -static struct attribute_group papr_nd_attribute_group = { +static const struct attribute_group papr_nd_attribute_group = { .name = "papr", .is_visible = papr_nd_attribute_visible, .attrs = papr_nd_attributes, diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 90c9d3531694..4ba824568119 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -78,6 +78,9 @@ int remove_phb_dynamic(struct pci_controller *phb) pseries_msi_free_domains(phb); + /* Keep a reference so phb isn't freed yet */ + get_device(&host_bridge->dev); + /* Remove the PCI bus and unregister the bridge device from sysfs */ phb->bus = NULL; pci_remove_bus(b); @@ -101,6 +104,7 @@ int remove_phb_dynamic(struct pci_controller *phb) * the pcibios_free_controller_deferred() callback; * see pseries_root_bridge_prepare(). */ + put_device(&host_bridge->dev); return 0; } diff --git a/arch/powerpc/platforms/pseries/power.c b/arch/powerpc/platforms/pseries/power.c index ee343ec6ab94..3676cb297767 100644 --- a/arch/powerpc/platforms/pseries/power.c +++ b/arch/powerpc/platforms/pseries/power.c @@ -51,7 +51,7 @@ static struct attribute *g[] = { NULL, }; -static struct attribute_group attr_group = { +static const struct attribute_group attr_group = { .attrs = g, }; diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 56c9ef9052e9..af162aeeae86 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -21,6 +21,7 @@ struct pt_regs; extern int pSeries_system_reset_exception(struct pt_regs *regs); extern int pSeries_machine_check_exception(struct pt_regs *regs); extern long pseries_machine_check_realmode(struct pt_regs *regs); +void pSeries_machine_check_log_err(void); #ifdef CONFIG_SMP extern void smp_init_pseries(void); diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 74c9b1b5bc66..f12516c3998c 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -23,11 +23,6 @@ static DEFINE_SPINLOCK(ras_log_buf_lock); static int ras_check_exception_token; -static void mce_process_errlog_event(struct irq_work *work); -static struct irq_work mce_errlog_process_work = { - .func = mce_process_errlog_event, -}; - #define EPOW_SENSOR_TOKEN 9 #define EPOW_SENSOR_INDEX 0 @@ -60,11 +55,17 @@ struct pseries_mc_errorlog { * XX 2: Reserved. * XXX 3: Type of UE error. * - * For error_type != MC_ERROR_TYPE_UE + * For error_type == MC_ERROR_TYPE_SLB/ERAT/TLB * XXXXXXXX * X 1: Effective address provided. * XXXXX 5: Reserved. * XX 2: Type of SLB/ERAT/TLB error. + * + * For error_type == MC_ERROR_TYPE_CTRL_MEM_ACCESS + * XXXXXXXX + * X 1: Error causing address provided. + * XXX 3: Type of error. + * XXXX 4: Reserved. */ u8 sub_err_type; u8 reserved_1[6]; @@ -80,6 +81,7 @@ struct pseries_mc_errorlog { #define MC_ERROR_TYPE_TLB 0x04 #define MC_ERROR_TYPE_D_CACHE 0x05 #define MC_ERROR_TYPE_I_CACHE 0x07 +#define MC_ERROR_TYPE_CTRL_MEM_ACCESS 0x08 /* RTAS pseries MCE error sub types */ #define MC_ERROR_UE_INDETERMINATE 0 @@ -90,6 +92,7 @@ struct pseries_mc_errorlog { #define UE_EFFECTIVE_ADDR_PROVIDED 0x40 #define UE_LOGICAL_ADDR_PROVIDED 0x20 +#define MC_EFFECTIVE_ADDR_PROVIDED 0x80 #define MC_ERROR_SLB_PARITY 0 #define MC_ERROR_SLB_MULTIHIT 1 @@ -103,6 +106,9 @@ struct pseries_mc_errorlog { #define MC_ERROR_TLB_MULTIHIT 2 #define MC_ERROR_TLB_INDETERMINATE 3 +#define MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK 0 +#define MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS 1 + static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog) { switch (mlog->error_type) { @@ -112,6 +118,8 @@ static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog) case MC_ERROR_TYPE_ERAT: case MC_ERROR_TYPE_TLB: return (mlog->sub_err_type & 0x03); + case MC_ERROR_TYPE_CTRL_MEM_ACCESS: + return (mlog->sub_err_type & 0x70) >> 4; default: return 0; } @@ -658,7 +666,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE; break; } - if (mce_log->sub_err_type & 0x80) + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) eaddr = be64_to_cpu(mce_log->effective_address); break; case MC_ERROR_TYPE_ERAT: @@ -675,7 +683,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE; break; } - if (mce_log->sub_err_type & 0x80) + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) eaddr = be64_to_cpu(mce_log->effective_address); break; case MC_ERROR_TYPE_TLB: @@ -692,7 +700,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE; break; } - if (mce_log->sub_err_type & 0x80) + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) eaddr = be64_to_cpu(mce_log->effective_address); break; case MC_ERROR_TYPE_D_CACHE: @@ -701,6 +709,21 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, case MC_ERROR_TYPE_I_CACHE: mce_err.error_type = MCE_ERROR_TYPE_ICACHE; break; + case MC_ERROR_TYPE_CTRL_MEM_ACCESS: + mce_err.error_type = MCE_ERROR_TYPE_RA; + switch (err_sub_type) { + case MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK: + mce_err.u.ra_error_type = + MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN; + break; + case MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS: + mce_err.u.ra_error_type = + MCE_RA_ERROR_LOAD_STORE_FOREIGN; + break; + } + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) + eaddr = be64_to_cpu(mce_log->effective_address); + break; case MC_ERROR_TYPE_UNKNOWN: default: mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; @@ -717,7 +740,6 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) struct pseries_errorlog *pseries_log; struct pseries_mc_errorlog *mce_log = NULL; int disposition = rtas_error_disposition(errp); - unsigned long msr; u8 error_type; if (!rtas_error_extended(errp)) @@ -731,40 +753,16 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) error_type = mce_log->error_type; disposition = mce_handle_err_realmode(disposition, error_type); - - /* - * Enable translation as we will be accessing per-cpu variables - * in save_mce_event() which may fall outside RMO region, also - * leave it enabled because subsequently we will be queuing work - * to workqueues where again per-cpu variables accessed, besides - * fwnmi_release_errinfo() crashes when called in realmode on - * pseries. - * Note: All the realmode handling like flushing SLB entries for - * SLB multihit is done by now. - */ out: - msr = mfmsr(); - mtmsr(msr | MSR_IR | MSR_DR); - disposition = mce_handle_err_virtmode(regs, errp, mce_log, disposition); - - /* - * Queue irq work to log this rtas event later. - * irq_work_queue uses per-cpu variables, so do this in virt - * mode as well. - */ - irq_work_queue(&mce_errlog_process_work); - - mtmsr(msr); - return disposition; } /* * Process MCE rtas errlog event. */ -static void mce_process_errlog_event(struct irq_work *work) +void pSeries_machine_check_log_err(void) { struct rtas_error_log *err; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 83a04d967a59..069d7b3bb142 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -1086,6 +1086,7 @@ define_machine(pseries) { .system_reset_exception = pSeries_system_reset_exception, .machine_check_early = pseries_machine_check_realmode, .machine_check_exception = pSeries_machine_check_exception, + .machine_check_log_err = pSeries_machine_check_log_err, #ifdef CONFIG_KEXEC_CORE .machine_kexec = pSeries_machine_kexec, .kexec_cpu_down = pseries_kexec_cpu_down, diff --git a/arch/powerpc/platforms/pseries/vas-sysfs.c b/arch/powerpc/platforms/pseries/vas-sysfs.c new file mode 100644 index 000000000000..4a7fcde5afc0 --- /dev/null +++ b/arch/powerpc/platforms/pseries/vas-sysfs.c @@ -0,0 +1,268 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2022-23 IBM Corp. + */ + +#define pr_fmt(fmt) "vas: " fmt + +#include +#include +#include +#include +#include +#include + +#include "vas.h" + +#ifdef CONFIG_SYSFS +static struct kobject *pseries_vas_kobj; +static struct kobject *gzip_caps_kobj; + +struct vas_caps_entry { + struct kobject kobj; + struct vas_cop_feat_caps *caps; +}; + +#define to_caps_entry(entry) container_of(entry, struct vas_caps_entry, kobj) + +/* + * This function is used to get the notification from the drmgr when + * QoS credits are changed. Though receiving the target total QoS + * credits here, get the official QoS capabilities from the hypervisor. + */ +static ssize_t update_total_credits_trigger(struct vas_cop_feat_caps *caps, + const char *buf, size_t count) +{ + int err; + u16 creds; + + err = kstrtou16(buf, 0, &creds); + if (!err) + err = vas_reconfig_capabilties(caps->win_type); + + if (err) + return -EINVAL; + + return count; +} + +#define sysfs_caps_entry_read(_name) \ +static ssize_t _name##_show(struct vas_cop_feat_caps *caps, char *buf) \ +{ \ + return sprintf(buf, "%d\n", atomic_read(&caps->_name)); \ +} + +struct vas_sysfs_entry { + struct attribute attr; + ssize_t (*show)(struct vas_cop_feat_caps *, char *); + ssize_t (*store)(struct vas_cop_feat_caps *, const char *, size_t); +}; + +#define VAS_ATTR_RO(_name) \ + sysfs_caps_entry_read(_name); \ + static struct vas_sysfs_entry _name##_attribute = __ATTR(_name, \ + 0444, _name##_show, NULL); + +/* + * Create sysfs interface: + * /sys/devices/vas/vas0/gzip/default_capabilities + * This directory contains the following VAS GZIP capabilities + * for the defaule credit type. + * /sys/devices/vas/vas0/gzip/default_capabilities/nr_total_credits + * Total number of default credits assigned to the LPAR which + * can be changed with DLPAR operation. + * /sys/devices/vas/vas0/gzip/default_capabilities/nr_used_credits + * Number of credits used by the user space. One credit will + * be assigned for each window open. + * + * /sys/devices/vas/vas0/gzip/qos_capabilities + * This directory contains the following VAS GZIP capabilities + * for the Quality of Service (QoS) credit type. + * /sys/devices/vas/vas0/gzip/qos_capabilities/nr_total_credits + * Total number of QoS credits assigned to the LPAR. The user + * has to define this value using HMC interface. It can be + * changed dynamically by the user. + * /sys/devices/vas/vas0/gzip/qos_capabilities/nr_used_credits + * Number of credits used by the user space. + * /sys/devices/vas/vas0/gzip/qos_capabilities/update_total_credits + * Update total QoS credits dynamically + */ + +VAS_ATTR_RO(nr_total_credits); +VAS_ATTR_RO(nr_used_credits); + +static struct vas_sysfs_entry update_total_credits_attribute = + __ATTR(update_total_credits, 0200, NULL, update_total_credits_trigger); + +static struct attribute *vas_def_capab_attrs[] = { + &nr_total_credits_attribute.attr, + &nr_used_credits_attribute.attr, + NULL, +}; + +static struct attribute *vas_qos_capab_attrs[] = { + &nr_total_credits_attribute.attr, + &nr_used_credits_attribute.attr, + &update_total_credits_attribute.attr, + NULL, +}; + +static ssize_t vas_type_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct vas_caps_entry *centry; + struct vas_cop_feat_caps *caps; + struct vas_sysfs_entry *entry; + + centry = to_caps_entry(kobj); + caps = centry->caps; + entry = container_of(attr, struct vas_sysfs_entry, attr); + + if (!entry->show) + return -EIO; + + return entry->show(caps, buf); +} + +static ssize_t vas_type_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct vas_caps_entry *centry; + struct vas_cop_feat_caps *caps; + struct vas_sysfs_entry *entry; + + centry = to_caps_entry(kobj); + caps = centry->caps; + entry = container_of(attr, struct vas_sysfs_entry, attr); + if (!entry->store) + return -EIO; + + return entry->store(caps, buf, count); +} + +static void vas_type_release(struct kobject *kobj) +{ + struct vas_caps_entry *centry = to_caps_entry(kobj); + kfree(centry); +} + +static const struct sysfs_ops vas_sysfs_ops = { + .show = vas_type_show, + .store = vas_type_store, +}; + +static struct kobj_type vas_def_attr_type = { + .release = vas_type_release, + .sysfs_ops = &vas_sysfs_ops, + .default_attrs = vas_def_capab_attrs, +}; + +static struct kobj_type vas_qos_attr_type = { + .release = vas_type_release, + .sysfs_ops = &vas_sysfs_ops, + .default_attrs = vas_qos_capab_attrs, +}; + +static char *vas_caps_kobj_name(struct vas_caps_entry *centry, + struct kobject **kobj) +{ + struct vas_cop_feat_caps *caps = centry->caps; + + if (caps->descriptor == VAS_GZIP_QOS_CAPABILITIES) { + kobject_init(¢ry->kobj, &vas_qos_attr_type); + *kobj = gzip_caps_kobj; + return "qos_capabilities"; + } else if (caps->descriptor == VAS_GZIP_DEFAULT_CAPABILITIES) { + kobject_init(¢ry->kobj, &vas_def_attr_type); + *kobj = gzip_caps_kobj; + return "default_capabilities"; + } else + return "Unknown"; +} + +/* + * Add feature specific capability dir entry. + * Ex: VDefGzip or VQosGzip + */ +int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps) +{ + struct vas_caps_entry *centry; + struct kobject *kobj = NULL; + int ret = 0; + char *name; + + centry = kzalloc(sizeof(*centry), GFP_KERNEL); + if (!centry) + return -ENOMEM; + + centry->caps = caps; + name = vas_caps_kobj_name(centry, &kobj); + + if (kobj) { + ret = kobject_add(¢ry->kobj, kobj, "%s", name); + + if (ret) { + pr_err("VAS: sysfs kobject add / event failed %d\n", + ret); + kobject_put(¢ry->kobj); + } + } + + return ret; +} + +static struct miscdevice vas_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "vas", +}; + +/* + * Add VAS and VasCaps (overall capabilities) dir entries. + */ +int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps) +{ + int ret; + + ret = misc_register(&vas_miscdev); + if (ret < 0) { + pr_err("%s: register vas misc device failed\n", __func__); + return ret; + } + + /* + * The hypervisor does not expose multiple VAS instances, but can + * see multiple VAS instances on PowerNV. So create 'vas0' directory + * on pseries. + */ + pseries_vas_kobj = kobject_create_and_add("vas0", + &vas_miscdev.this_device->kobj); + if (!pseries_vas_kobj) { + pr_err("Failed to create VAS sysfs entry\n"); + return -ENOMEM; + } + + if ((vas_caps->feat_type & VAS_GZIP_QOS_FEAT_BIT) || + (vas_caps->feat_type & VAS_GZIP_DEF_FEAT_BIT)) { + gzip_caps_kobj = kobject_create_and_add("gzip", + pseries_vas_kobj); + if (!gzip_caps_kobj) { + pr_err("Failed to create VAS GZIP capability entry\n"); + kobject_put(pseries_vas_kobj); + return -ENOMEM; + } + } + + return 0; +} + +#else +int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps) +{ + return 0; +} + +int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps) +{ + return 0; +} +#endif diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index d243ddc58827..1f59d78c77a1 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -26,9 +26,11 @@ static struct vas_all_caps caps_all; static bool copypaste_feat; +static struct hv_vas_cop_feat_caps hv_cop_caps; static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE]; static DEFINE_MUTEX(vas_pseries_mutex); +static bool migration_in_progress; static long hcall_return_busy_check(long rc) { @@ -107,7 +109,6 @@ static int h_deallocate_vas_window(u64 winid) static int h_modify_vas_window(struct pseries_vas_window *win) { long rc; - u32 lpid = mfspr(SPRN_PID); /* * AMR value is not supported in Linux VAS implementation. @@ -115,7 +116,7 @@ static int h_modify_vas_window(struct pseries_vas_window *win) */ do { rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW, - win->vas_win.winid, lpid, 0, + win->vas_win.winid, win->pid, 0, VAS_MOD_WIN_FLAGS, 0); rc = hcall_return_busy_check(rc); @@ -124,8 +125,8 @@ static int h_modify_vas_window(struct pseries_vas_window *win) if (rc == H_SUCCESS) return 0; - pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u lpid %u\n", - rc, win->vas_win.winid, lpid); + pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n", + rc, win->vas_win.winid, win->pid); return -EIO; } @@ -310,8 +311,8 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, cop_feat_caps = &caps->caps; - if (atomic_inc_return(&cop_feat_caps->used_lpar_creds) > - atomic_read(&cop_feat_caps->target_lpar_creds)) { + if (atomic_inc_return(&cop_feat_caps->nr_used_credits) > + atomic_read(&cop_feat_caps->nr_total_credits)) { pr_err("Credits are not available to allocate window\n"); rc = -EINVAL; goto out; @@ -338,6 +339,8 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, } } + txwin->pid = mfspr(SPRN_PID); + /* * Allocate / Deallocate window hcalls and setup / free IRQs * have to be protected with mutex. @@ -354,7 +357,10 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, * same fault IRQ is not freed by the OS before. */ mutex_lock(&vas_pseries_mutex); - rc = allocate_setup_window(txwin, (u64 *)&domain[0], + if (migration_in_progress) + rc = -EBUSY; + else + rc = allocate_setup_window(txwin, (u64 *)&domain[0], cop_feat_caps->win_type); mutex_unlock(&vas_pseries_mutex); if (rc) @@ -369,13 +375,28 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, if (rc) goto out_free; - vas_user_win_add_mm_context(&txwin->vas_win.task_ref); txwin->win_type = cop_feat_caps->win_type; mutex_lock(&vas_pseries_mutex); - list_add(&txwin->win_list, &caps->list); + /* + * Possible to lose the acquired credit with DLPAR core + * removal after the window is opened. So if there are any + * closed windows (means with lost credits), do not give new + * window to user space. New windows will be opened only + * after the existing windows are reopened when credits are + * available. + */ + if (!caps->nr_close_wins) { + list_add(&txwin->win_list, &caps->list); + caps->nr_open_windows++; + mutex_unlock(&vas_pseries_mutex); + vas_user_win_add_mm_context(&txwin->vas_win.task_ref); + return &txwin->vas_win; + } mutex_unlock(&vas_pseries_mutex); - return &txwin->vas_win; + put_vas_user_win_ref(&txwin->vas_win.task_ref); + rc = -EBUSY; + pr_err("No credit is available to allocate window\n"); out_free: /* @@ -385,7 +406,7 @@ out_free: free_irq_setup(txwin); h_deallocate_vas_window(txwin->vas_win.winid); out: - atomic_dec(&cop_feat_caps->used_lpar_creds); + atomic_dec(&cop_feat_caps->nr_used_credits); kfree(txwin); return ERR_PTR(rc); } @@ -438,14 +459,25 @@ static int vas_deallocate_window(struct vas_window *vwin) caps = &vascaps[win->win_type].caps; mutex_lock(&vas_pseries_mutex); - rc = deallocate_free_window(win); - if (rc) { - mutex_unlock(&vas_pseries_mutex); - return rc; - } + /* + * VAS window is already closed in the hypervisor when + * lost the credit or with migration. So just remove the entry + * from the list, remove task references and free vas_window + * struct. + */ + if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) && + !(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) { + rc = deallocate_free_window(win); + if (rc) { + mutex_unlock(&vas_pseries_mutex); + return rc; + } + } else + vascaps[win->win_type].nr_close_wins--; list_del(&win->win_list); - atomic_dec(&caps->used_lpar_creds); + atomic_dec(&caps->nr_used_credits); + vascaps[win->win_type].nr_open_windows--; mutex_unlock(&vas_pseries_mutex); put_vas_user_win_ref(&vwin->task_ref); @@ -500,6 +532,7 @@ static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, memset(vcaps, 0, sizeof(*vcaps)); INIT_LIST_HEAD(&vcaps->list); + vcaps->feat = feat; caps = &vcaps->caps; rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat, @@ -521,7 +554,7 @@ static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, } caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds); caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds); - atomic_set(&caps->target_lpar_creds, + atomic_set(&caps->nr_total_credits, be16_to_cpu(hv_caps->target_lpar_creds)); if (feat == VAS_GZIP_DEF_FEAT) { caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds); @@ -533,16 +566,409 @@ static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, } } + rc = sysfs_add_vas_caps(caps); + if (rc) + return rc; + copypaste_feat = true; return 0; } +/* + * VAS windows can be closed due to lost credits when the core is + * removed. So reopen them if credits are available due to DLPAR + * core add and set the window active status. When NX sees the page + * fault on the unmapped paste address, the kernel handles the fault + * by setting the remapping to new paste address if the window is + * active. + */ +static int reconfig_open_windows(struct vas_caps *vcaps, int creds, + bool migrate) +{ + long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; + struct vas_cop_feat_caps *caps = &vcaps->caps; + struct pseries_vas_window *win = NULL, *tmp; + int rc, mv_ents = 0; + int flag; + + /* + * Nothing to do if there are no closed windows. + */ + if (!vcaps->nr_close_wins) + return 0; + + /* + * For the core removal, the hypervisor reduces the credits + * assigned to the LPAR and the kernel closes VAS windows + * in the hypervisor depends on reduced credits. The kernel + * uses LIFO (the last windows that are opened will be closed + * first) and expects to open in the same order when credits + * are available. + * For example, 40 windows are closed when the LPAR lost 2 cores + * (dedicated). If 1 core is added, this LPAR can have 20 more + * credits. It means the kernel can reopen 20 windows. So move + * 20 entries in the VAS windows lost and reopen next 20 windows. + * For partition migration, reopen all windows that are closed + * during resume. + */ + if ((vcaps->nr_close_wins > creds) && !migrate) + mv_ents = vcaps->nr_close_wins - creds; + + list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) { + if (!mv_ents) + break; + + mv_ents--; + } + + /* + * Open windows if they are closed only with migration or + * DLPAR (lost credit) before. + */ + if (migrate) + flag = VAS_WIN_MIGRATE_CLOSE; + else + flag = VAS_WIN_NO_CRED_CLOSE; + + list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) { + /* + * This window is closed with DLPAR and migration events. + * So reopen the window with the last event. + * The user space is not suspended with the current + * migration notifier. So the user space can issue DLPAR + * CPU hotplug while migration in progress. In this case + * this window will be opened with the last event. + */ + if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) && + (win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) { + win->vas_win.status &= ~flag; + continue; + } + + /* + * Nothing to do on this window if it is not closed + * with this flag + */ + if (!(win->vas_win.status & flag)) + continue; + + rc = allocate_setup_window(win, (u64 *)&domain[0], + caps->win_type); + if (rc) + return rc; + + rc = h_modify_vas_window(win); + if (rc) + goto out; + + mutex_lock(&win->vas_win.task_ref.mmap_mutex); + /* + * Set window status to active + */ + win->vas_win.status &= ~flag; + mutex_unlock(&win->vas_win.task_ref.mmap_mutex); + win->win_type = caps->win_type; + if (!--vcaps->nr_close_wins) + break; + } + + return 0; +out: + /* + * Window modify HCALL failed. So close the window to the + * hypervisor and return. + */ + free_irq_setup(win); + h_deallocate_vas_window(win->vas_win.winid); + return rc; +} + +/* + * The hypervisor reduces the available credits if the LPAR lost core. It + * means the excessive windows should not be active and the user space + * should not be using these windows to send compression requests to NX. + * So the kernel closes the excessive windows and unmap the paste address + * such that the user space receives paste instruction failure. Then up to + * the user space to fall back to SW compression and manage with the + * existing windows. + */ +static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds, + bool migrate) +{ + struct pseries_vas_window *win, *tmp; + struct vas_user_win_ref *task_ref; + struct vm_area_struct *vma; + int rc = 0, flag; + + if (migrate) + flag = VAS_WIN_MIGRATE_CLOSE; + else + flag = VAS_WIN_NO_CRED_CLOSE; + + list_for_each_entry_safe(win, tmp, &vcap->list, win_list) { + /* + * This window is already closed due to lost credit + * or for migration before. Go for next window. + * For migration, nothing to do since this window + * closed for DLPAR and will be reopened even on + * the destination system with other DLPAR operation. + */ + if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) || + (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) { + win->vas_win.status |= flag; + continue; + } + + task_ref = &win->vas_win.task_ref; + mutex_lock(&task_ref->mmap_mutex); + vma = task_ref->vma; + /* + * Number of available credits are reduced, So select + * and close windows. + */ + win->vas_win.status |= flag; + + mmap_write_lock(task_ref->mm); + /* + * vma is set in the original mapping. But this mapping + * is done with mmap() after the window is opened with ioctl. + * so we may not see the original mapping if the core remove + * is done before the original mmap() and after the ioctl. + */ + if (vma) + zap_page_range(vma, vma->vm_start, + vma->vm_end - vma->vm_start); + + mmap_write_unlock(task_ref->mm); + mutex_unlock(&task_ref->mmap_mutex); + /* + * Close VAS window in the hypervisor, but do not + * free vas_window struct since it may be reused + * when the credit is available later (DLPAR with + * adding cores). This struct will be used + * later when the process issued with close(FD). + */ + rc = deallocate_free_window(win); + /* + * This failure is from the hypervisor. + * No way to stop migration for these failures. + * So ignore error and continue closing other windows. + */ + if (rc && !migrate) + return rc; + + vcap->nr_close_wins++; + + /* + * For migration, do not depend on lpar_creds in case if + * mismatch with the hypervisor value (should not happen). + * So close all active windows in the list and will be + * reopened windows based on the new lpar_creds on the + * destination system during resume. + */ + if (!migrate && !--excess_creds) + break; + } + + return 0; +} + +/* + * Get new VAS capabilities when the core add/removal configuration + * changes. Reconfig window configurations based on the credits + * availability from this new capabilities. + */ +int vas_reconfig_capabilties(u8 type) +{ + struct vas_cop_feat_caps *caps; + int old_nr_creds, new_nr_creds; + struct vas_caps *vcaps; + int rc = 0, nr_active_wins; + + if (type >= VAS_MAX_FEAT_TYPE) { + pr_err("Invalid credit type %d\n", type); + return -EINVAL; + } + + vcaps = &vascaps[type]; + caps = &vcaps->caps; + + mutex_lock(&vas_pseries_mutex); + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, vcaps->feat, + (u64)virt_to_phys(&hv_cop_caps)); + if (rc) + goto out; + + new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); + + old_nr_creds = atomic_read(&caps->nr_total_credits); + + atomic_set(&caps->nr_total_credits, new_nr_creds); + /* + * The total number of available credits may be decreased or + * inceased with DLPAR operation. Means some windows have to be + * closed / reopened. Hold the vas_pseries_mutex so that the + * the user space can not open new windows. + */ + if (old_nr_creds < new_nr_creds) { + /* + * If the existing target credits is less than the new + * target, reopen windows if they are closed due to + * the previous DLPAR (core removal). + */ + rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds, + false); + } else { + /* + * # active windows is more than new LPAR available + * credits. So close the excessive windows. + * On pseries, each window will have 1 credit. + */ + nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins; + if (nr_active_wins > new_nr_creds) + rc = reconfig_close_windows(vcaps, + nr_active_wins - new_nr_creds, + false); + } + +out: + mutex_unlock(&vas_pseries_mutex); + return rc; +} +/* + * Total number of default credits available (target_credits) + * in LPAR depends on number of cores configured. It varies based on + * whether processors are in shared mode or dedicated mode. + * Get the notifier when CPU configuration is changed with DLPAR + * operation so that get the new target_credits (vas default capabilities) + * and then update the existing windows usage if needed. + */ +static int pseries_vas_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct of_reconfig_data *rd = data; + struct device_node *dn = rd->dn; + const __be32 *intserv = NULL; + int len, rc = 0; + + if ((action == OF_RECONFIG_ATTACH_NODE) || + (action == OF_RECONFIG_DETACH_NODE)) + intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", + &len); + /* + * Processor config is not changed + */ + if (!intserv) + return NOTIFY_OK; + + rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE); + if (rc) + pr_err("Failed reconfig VAS capabilities with DLPAR\n"); + + return rc; +} + +static struct notifier_block pseries_vas_nb = { + .notifier_call = pseries_vas_notifier, +}; + +/* + * For LPM, all windows have to be closed on the source partition + * before migration and reopen them on the destination partition + * after migration. So closing windows during suspend and + * reopen them during resume. + */ +int vas_migration_handler(int action) +{ + struct vas_cop_feat_caps *caps; + int old_nr_creds, new_nr_creds = 0; + struct vas_caps *vcaps; + int i, rc = 0; + + /* + * NX-GZIP is not enabled. Nothing to do for migration. + */ + if (!copypaste_feat) + return rc; + + mutex_lock(&vas_pseries_mutex); + + if (action == VAS_SUSPEND) + migration_in_progress = true; + else + migration_in_progress = false; + + for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) { + vcaps = &vascaps[i]; + caps = &vcaps->caps; + old_nr_creds = atomic_read(&caps->nr_total_credits); + + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, + vcaps->feat, + (u64)virt_to_phys(&hv_cop_caps)); + if (!rc) { + new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); + /* + * Should not happen. But incase print messages, close + * all windows in the list during suspend and reopen + * windows based on new lpar_creds on the destination + * system. + */ + if (old_nr_creds != new_nr_creds) { + pr_err("Target credits mismatch with the hypervisor\n"); + pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n", + action, old_nr_creds, new_nr_creds); + pr_err("Used creds: %d, Active creds: %d\n", + atomic_read(&caps->nr_used_credits), + vcaps->nr_open_windows - vcaps->nr_close_wins); + } + } else { + pr_err("state(%d): Get VAS capabilities failed with %d\n", + action, rc); + /* + * We can not stop migration with the current lpm + * implementation. So continue closing all windows in + * the list (during suspend) and return without + * opening windows (during resume) if VAS capabilities + * HCALL failed. + */ + if (action == VAS_RESUME) + goto out; + } + + switch (action) { + case VAS_SUSPEND: + rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows, + true); + break; + case VAS_RESUME: + atomic_set(&caps->nr_total_credits, new_nr_creds); + rc = reconfig_open_windows(vcaps, new_nr_creds, true); + break; + default: + /* should not happen */ + pr_err("Invalid migration action %d\n", action); + rc = -EINVAL; + goto out; + } + + /* + * Ignore errors during suspend and return for resume. + */ + if (rc && (action == VAS_RESUME)) + goto out; + } + +out: + mutex_unlock(&vas_pseries_mutex); + return rc; +} + static int __init pseries_vas_init(void) { - struct hv_vas_cop_feat_caps *hv_cop_caps; struct hv_vas_all_caps *hv_caps; - int rc; + int rc = 0; /* * Linux supports user space COPY/PASTE only with Radix @@ -566,35 +992,39 @@ static int __init pseries_vas_init(void) caps_all.descriptor = be64_to_cpu(hv_caps->descriptor); caps_all.feat_type = be64_to_cpu(hv_caps->feat_type); - hv_cop_caps = kmalloc(sizeof(*hv_cop_caps), GFP_KERNEL); - if (!hv_cop_caps) { - rc = -ENOMEM; - goto out; - } + sysfs_pseries_vas_init(&caps_all); + /* * QOS capabilities available */ if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) { rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT, - VAS_GZIP_QOS_FEAT_TYPE, hv_cop_caps); + VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps); if (rc) - goto out_cop; + goto out; } /* * Default capabilities available */ - if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) { + if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT, - VAS_GZIP_DEF_FEAT_TYPE, hv_cop_caps); - if (rc) - goto out_cop; + VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps); + + if (!rc && copypaste_feat) { + if (firmware_has_feature(FW_FEATURE_LPAR)) + of_reconfig_notifier_register(&pseries_vas_nb); + + pr_info("GZIP feature is available\n"); + } else { + /* + * Should not happen, but only when get default + * capabilities HCALL failed. So disable copy paste + * feature. + */ + copypaste_feat = false; } - pr_info("GZIP feature is available\n"); - -out_cop: - kfree(hv_cop_caps); out: kfree(hv_caps); return rc; diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h index 4ecb3fcabd10..34177881e998 100644 --- a/arch/powerpc/platforms/pseries/vas.h +++ b/arch/powerpc/platforms/pseries/vas.h @@ -30,6 +30,14 @@ #define VAS_COPY_PASTE_USER_MODE 0x00000001 #define VAS_COP_OP_USER_MODE 0x00000010 +#define VAS_GZIP_QOS_CAPABILITIES 0x56516F73477A6970 +#define VAS_GZIP_DEFAULT_CAPABILITIES 0x56446566477A6970 + +enum vas_migrate_action { + VAS_SUSPEND, + VAS_RESUME, +}; + /* * Co-processor feature - GZIP QoS windows or GZIP default windows */ @@ -72,9 +80,8 @@ struct vas_cop_feat_caps { }; /* Total LPAR available credits. Can be different from max LPAR */ /* credits due to DLPAR operation */ - atomic_t target_lpar_creds; - atomic_t used_lpar_creds; /* Used credits so far */ - u16 avail_lpar_creds; /* Remaining available credits */ + atomic_t nr_total_credits; /* Total credits assigned to LPAR */ + atomic_t nr_used_credits; /* Used credits so far */ }; /* @@ -84,6 +91,9 @@ struct vas_cop_feat_caps { struct vas_caps { struct vas_cop_feat_caps caps; struct list_head list; /* List of open windows */ + int nr_close_wins; /* closed windows in the hypervisor for DLPAR */ + int nr_open_windows; /* Number of successful open windows */ + u8 feat; /* Feature type */ }; /* @@ -115,6 +125,7 @@ struct pseries_vas_window { u64 domain[6]; /* Associativity domain Ids */ /* this window is allocated */ u64 util; + u32 pid; /* PID associated with this window */ /* List of windows opened which is used for LPM */ struct list_head win_list; @@ -122,4 +133,17 @@ struct pseries_vas_window { char *name; int fault_virq; }; + +int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps); +int vas_reconfig_capabilties(u8 type); +int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps); + +#ifdef CONFIG_PPC_VAS +int vas_migration_handler(int action); +#else +static inline int vas_migration_handler(int action) +{ + return 0; +} +#endif #endif /* _VAS_H */ diff --git a/arch/powerpc/sysdev/fsl_gtm.c b/arch/powerpc/sysdev/fsl_gtm.c index 8963eaffb1b7..39186ad6b3c3 100644 --- a/arch/powerpc/sysdev/fsl_gtm.c +++ b/arch/powerpc/sysdev/fsl_gtm.c @@ -86,7 +86,7 @@ static LIST_HEAD(gtms); */ struct gtm_timer *gtm_get_timer16(void) { - struct gtm *gtm = NULL; + struct gtm *gtm; int i; list_for_each_entry(gtm, >ms, list_node) { @@ -103,7 +103,7 @@ struct gtm_timer *gtm_get_timer16(void) spin_unlock_irq(>m->lock); } - if (gtm) + if (!list_empty(>ms)) return ERR_PTR(-EBUSY); return ERR_PTR(-ENODEV); } diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index d5cb48b61bbd..dbcbaa4c0663 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -1404,10 +1404,8 @@ struct mpic * __init mpic_alloc(struct device_node *node, * with device trees generated by older versions of QEMU. * fsl_version will be zero if MPIC_FSL is not set. */ - if (fsl_version < 0x400 && (flags & MPIC_ENABLE_COREINT)) { - WARN_ON(ppc_md.get_irq != mpic_get_coreint_irq); + if (fsl_version < 0x400 && (flags & MPIC_ENABLE_COREINT)) ppc_md.get_irq = mpic_get_irq; - } /* Reset */ diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 1ca5564bda9d..bb5bda6b2357 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1708,20 +1708,20 @@ __be32 *xive_queue_page_alloc(unsigned int cpu, u32 queue_shift) static int __init xive_off(char *arg) { xive_cmdline_disabled = true; - return 0; + return 1; } __setup("xive=off", xive_off); static int __init xive_store_eoi_cmdline(char *arg) { if (!arg) - return -EINVAL; + return 1; if (strncmp(arg, "off", 3) == 0) { pr_info("StoreEOI disabled on kernel command line\n"); xive_store_eoi = false; } - return 0; + return 1; } __setup("xive.store-eoi=", xive_store_eoi_cmdline); @@ -1791,7 +1791,7 @@ static int xive_ipi_debug_show(struct seq_file *m, void *private) if (xive_ops->debug_show) xive_ops->debug_show(m, private); - for_each_possible_cpu(cpu) + for_each_online_cpu(cpu) xive_debug_show_ipi(m, cpu); return 0; } diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 928f95004501..29456c255f9f 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -67,6 +67,17 @@ static int __init xive_irq_bitmap_add(int base, int count) return 0; } +static void xive_irq_bitmap_remove_all(void) +{ + struct xive_irq_bitmap *xibm, *tmp; + + list_for_each_entry_safe(xibm, tmp, &xive_irq_bitmaps, list) { + list_del(&xibm->list); + kfree(xibm->bitmap); + kfree(xibm); + } +} + static int __xive_irq_bitmap_alloc(struct xive_irq_bitmap *xibm) { int irq; @@ -803,7 +814,7 @@ bool __init xive_spapr_init(void) u32 val; u32 len; const __be32 *reg; - int i; + int i, err; if (xive_spapr_disabled()) return false; @@ -828,23 +839,26 @@ bool __init xive_spapr_init(void) } if (!xive_get_max_prio(&max_prio)) - return false; + goto err_unmap; /* Feed the IRQ number allocator with the ranges given in the DT */ reg = of_get_property(np, "ibm,xive-lisn-ranges", &len); if (!reg) { pr_err("Failed to read 'ibm,xive-lisn-ranges' property\n"); - return false; + goto err_unmap; } if (len % (2 * sizeof(u32)) != 0) { pr_err("invalid 'ibm,xive-lisn-ranges' property\n"); - return false; + goto err_unmap; } - for (i = 0; i < len / (2 * sizeof(u32)); i++, reg += 2) - xive_irq_bitmap_add(be32_to_cpu(reg[0]), - be32_to_cpu(reg[1])); + for (i = 0; i < len / (2 * sizeof(u32)); i++, reg += 2) { + err = xive_irq_bitmap_add(be32_to_cpu(reg[0]), + be32_to_cpu(reg[1])); + if (err < 0) + goto err_mem_free; + } /* Iterate the EQ sizes and pick one */ of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, reg, val) { @@ -855,10 +869,16 @@ bool __init xive_spapr_init(void) /* Initialize XIVE core with our backend */ if (!xive_core_init(np, &xive_spapr_ops, tima, TM_QW1_OS, max_prio)) - return false; + goto err_mem_free; pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10)); return true; + +err_mem_free: + xive_irq_bitmap_remove_all(); +err_unmap: + iounmap(tima); + return false; } machine_arch_initcall(pseries, xive_core_debug_init); diff --git a/arch/powerpc/tools/relocs_check.sh b/arch/powerpc/tools/relocs_check.sh index 014e00e74d2b..63792af00417 100755 --- a/arch/powerpc/tools/relocs_check.sh +++ b/arch/powerpc/tools/relocs_check.sh @@ -39,6 +39,7 @@ $objdump -R "$vmlinux" | # R_PPC_NONE grep -F -w -v 'R_PPC64_RELATIVE R_PPC64_NONE +R_PPC64_UADDR64 R_PPC_ADDR16_LO R_PPC_ADDR16_HI R_PPC_ADDR16_HA @@ -54,9 +55,3 @@ fi num_bad=$(echo "$bad_relocs" | wc -l) echo "WARNING: $num_bad bad relocations" echo "$bad_relocs" - -# If we see this type of relocation it's an idication that -# we /may/ be using an old version of binutils. -if echo "$bad_relocs" | grep -q -F -w R_PPC64_UADDR64; then - echo "WARNING: You need at least binutils >= 2.19 to build a CONFIG_RELOCATABLE kernel" -fi diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 0804b9a11934..ea8ec8a960bd 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -102,6 +102,7 @@ config RISCV select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS + select HAVE_RSEQ select IRQ_DOMAIN select IRQ_FORCED_THREADING select MODULES_USE_ELF_RELA if MODULES @@ -152,7 +153,7 @@ config PAGE_OFFSET hex default 0xC0000000 if 32BIT default 0x80000000 if 64BIT && !MMU - default 0xffffaf8000000000 if 64BIT + default 0xff60000000000000 if 64BIT config KASAN_SHADOW_OFFSET hex @@ -200,7 +201,7 @@ config FIX_EARLYCON_MEM config PGTABLE_LEVELS int - default 4 if 64BIT + default 5 if 64BIT default 2 config LOCKDEP_SUPPORT @@ -331,19 +332,6 @@ config RISCV_ISA_C If you don't know what to do here, say Y. -menu "supported PMU type" - depends on PERF_EVENTS - -config RISCV_BASE_PMU - bool "Base Performance Monitoring Unit" - def_bool y - help - A base PMU that serves as a reference implementation and has limited - feature of perf. It can run on any RISC-V machines so serves as the - fallback, but this option can also be disable to reduce kernel size. - -endmenu - config FPU bool "FPU support" default y diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-fabric.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs-fabric.dtsi new file mode 100644 index 000000000000..854320e17b28 --- /dev/null +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-fabric.dtsi @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Copyright (c) 2020-2021 Microchip Technology Inc */ + +/ { + core_pwm0: pwm@41000000 { + compatible = "microchip,corepwm-rtl-v4"; + reg = <0x0 0x41000000 0x0 0xF0>; + microchip,sync-update-mask = /bits/ 32 <0>; + #pwm-cells = <2>; + clocks = <&clkcfg CLK_FIC3>; + status = "disabled"; + }; + + i2c2: i2c@44000000 { + compatible = "microchip,corei2c-rtl-v7"; + reg = <0x0 0x44000000 0x0 0x1000>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&clkcfg CLK_FIC3>; + interrupt-parent = <&plic>; + interrupts = <122>; + clock-frequency = <100000>; + status = "disabled"; + }; +}; diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts index 0c748ae1b006..cd2fe80fa81a 100644 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts @@ -1,5 +1,5 @@ // SPDX-License-Identifier: (GPL-2.0 OR MIT) -/* Copyright (c) 2020 Microchip Technology Inc */ +/* Copyright (c) 2020-2021 Microchip Technology Inc */ /dts-v1/; @@ -13,25 +13,34 @@ compatible = "microchip,mpfs-icicle-kit", "microchip,mpfs"; aliases { - ethernet0 = &emac1; - serial0 = &serial0; - serial1 = &serial1; - serial2 = &serial2; - serial3 = &serial3; + ethernet0 = &mac1; + serial0 = &mmuart0; + serial1 = &mmuart1; + serial2 = &mmuart2; + serial3 = &mmuart3; + serial4 = &mmuart4; }; chosen { - stdout-path = "serial0:115200n8"; + stdout-path = "serial1:115200n8"; }; cpus { timebase-frequency = ; }; - memory@80000000 { + ddrc_cache_lo: memory@80000000 { device_type = "memory"; - reg = <0x0 0x80000000 0x0 0x40000000>; - clocks = <&clkcfg 26>; + reg = <0x0 0x80000000 0x0 0x2e000000>; + clocks = <&clkcfg CLK_DDRC>; + status = "okay"; + }; + + ddrc_cache_hi: memory@1000000000 { + device_type = "memory"; + reg = <0x10 0x0 0x0 0x40000000>; + clocks = <&clkcfg CLK_DDRC>; + status = "okay"; }; }; @@ -39,19 +48,19 @@ clock-frequency = <600000000>; }; -&serial0 { +&mmuart1 { status = "okay"; }; -&serial1 { +&mmuart2 { status = "okay"; }; -&serial2 { +&mmuart3 { status = "okay"; }; -&serial3 { +&mmuart4 { status = "okay"; }; @@ -61,28 +70,92 @@ bus-width = <4>; disable-wp; cap-sd-highspeed; + cap-mmc-highspeed; card-detect-delay = <200>; + mmc-ddr-1_8v; + mmc-hs200-1_8v; sd-uhs-sdr12; sd-uhs-sdr25; sd-uhs-sdr50; sd-uhs-sdr104; }; -&emac0 { - phy-mode = "sgmii"; - phy-handle = <&phy0>; - phy0: ethernet-phy@8 { - reg = <8>; - ti,fifo-depth = <0x01>; - }; +&spi0 { + status = "okay"; }; -&emac1 { +&spi1 { + status = "okay"; +}; + +&qspi { + status = "okay"; +}; + +&i2c0 { + status = "okay"; +}; + +&i2c1 { + status = "okay"; +}; + +&i2c2 { + status = "okay"; +}; + +&mac0 { + phy-mode = "sgmii"; + phy-handle = <&phy0>; +}; + +&mac1 { status = "okay"; phy-mode = "sgmii"; phy-handle = <&phy1>; phy1: ethernet-phy@9 { reg = <9>; - ti,fifo-depth = <0x01>; + ti,fifo-depth = <0x1>; + }; + phy0: ethernet-phy@8 { + reg = <8>; + ti,fifo-depth = <0x1>; }; }; + +&gpio2 { + interrupts = <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>, + <53>, <53>, <53>, <53>; + status = "okay"; +}; + +&rtc { + status = "okay"; +}; + +&usb { + status = "okay"; + dr_mode = "host"; +}; + +&mbox { + status = "okay"; +}; + +&syscontroller { + status = "okay"; +}; + +&pcie { + status = "okay"; +}; + +&core_pwm0 { + status = "okay"; +}; diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi index 869aaf0d5c06..c5c9d1360de0 100644 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi @@ -1,7 +1,9 @@ // SPDX-License-Identifier: (GPL-2.0 OR MIT) -/* Copyright (c) 2020 Microchip Technology Inc */ +/* Copyright (c) 2020-2021 Microchip Technology Inc */ /dts-v1/; +#include "dt-bindings/clock/microchip,mpfs-clock.h" +#include "microchip-mpfs-fabric.dtsi" / { #address-cells = <2>; @@ -13,8 +15,7 @@ #address-cells = <1>; #size-cells = <0>; - cpu@0 { - clock-frequency = <0>; + cpu0: cpu@0 { compatible = "sifive,e51", "sifive,rocket0", "riscv"; device_type = "cpu"; i-cache-block-size = <64>; @@ -22,6 +23,7 @@ i-cache-size = <16384>; reg = <0>; riscv,isa = "rv64imac"; + clocks = <&clkcfg CLK_CPU>; status = "disabled"; cpu0_intc: interrupt-controller { @@ -31,8 +33,7 @@ }; }; - cpu@1 { - clock-frequency = <0>; + cpu1: cpu@1 { compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; d-cache-block-size = <64>; d-cache-sets = <64>; @@ -48,6 +49,7 @@ mmu-type = "riscv,sv39"; reg = <1>; riscv,isa = "rv64imafdc"; + clocks = <&clkcfg CLK_CPU>; tlb-split; status = "okay"; @@ -58,8 +60,7 @@ }; }; - cpu@2 { - clock-frequency = <0>; + cpu2: cpu@2 { compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; d-cache-block-size = <64>; d-cache-sets = <64>; @@ -75,6 +76,7 @@ mmu-type = "riscv,sv39"; reg = <2>; riscv,isa = "rv64imafdc"; + clocks = <&clkcfg CLK_CPU>; tlb-split; status = "okay"; @@ -85,8 +87,7 @@ }; }; - cpu@3 { - clock-frequency = <0>; + cpu3: cpu@3 { compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; d-cache-block-size = <64>; d-cache-sets = <64>; @@ -102,6 +103,7 @@ mmu-type = "riscv,sv39"; reg = <3>; riscv,isa = "rv64imafdc"; + clocks = <&clkcfg CLK_CPU>; tlb-split; status = "okay"; @@ -112,8 +114,7 @@ }; }; - cpu@4 { - clock-frequency = <0>; + cpu4: cpu@4 { compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; d-cache-block-size = <64>; d-cache-sets = <64>; @@ -129,6 +130,7 @@ mmu-type = "riscv,sv39"; reg = <4>; riscv,isa = "rv64imafdc"; + clocks = <&clkcfg CLK_CPU>; tlb-split; status = "okay"; cpu4_intc: interrupt-controller { @@ -150,8 +152,9 @@ compatible = "simple-bus"; ranges; - cache-controller@2010000 { + cctrllr: cache-controller@2010000 { compatible = "sifive,fu540-c000-ccache", "cache"; + reg = <0x0 0x2010000 0x0 0x1000>; cache-block-size = <64>; cache-level = <2>; cache-sets = <1024>; @@ -159,10 +162,9 @@ cache-unified; interrupt-parent = <&plic>; interrupts = <1>, <2>, <3>; - reg = <0x0 0x2010000 0x0 0x1000>; }; - clint@2000000 { + clint: clint@2000000 { compatible = "sifive,fu540-c000-clint", "sifive,clint0"; reg = <0x0 0x2000000 0x0 0xC000>; interrupts-extended = <&cpu0_intc 3>, <&cpu0_intc 7>, @@ -186,15 +188,6 @@ riscv,ndev = <186>; }; - dma@3000000 { - compatible = "sifive,fu540-c000-pdma"; - reg = <0x0 0x3000000 0x0 0x8000>; - interrupt-parent = <&plic>; - interrupts = <23>, <24>, <25>, <26>, <27>, <28>, <29>, - <30>; - #dma-cells = <1>; - }; - clkcfg: clkcfg@20002000 { compatible = "microchip,mpfs-clkcfg"; reg = <0x0 0x20002000 0x0 0x1000>; @@ -202,7 +195,7 @@ #clock-cells = <1>; }; - serial0: serial@20000000 { + mmuart0: serial@20000000 { compatible = "ns16550a"; reg = <0x0 0x20000000 0x0 0x400>; reg-io-width = <4>; @@ -210,11 +203,11 @@ interrupt-parent = <&plic>; interrupts = <90>; current-speed = <115200>; - clocks = <&clkcfg 8>; - status = "disabled"; + clocks = <&clkcfg CLK_MMUART0>; + status = "disabled"; /* Reserved for the HSS */ }; - serial1: serial@20100000 { + mmuart1: serial@20100000 { compatible = "ns16550a"; reg = <0x0 0x20100000 0x0 0x400>; reg-io-width = <4>; @@ -222,11 +215,11 @@ interrupt-parent = <&plic>; interrupts = <91>; current-speed = <115200>; - clocks = <&clkcfg 9>; + clocks = <&clkcfg CLK_MMUART1>; status = "disabled"; }; - serial2: serial@20102000 { + mmuart2: serial@20102000 { compatible = "ns16550a"; reg = <0x0 0x20102000 0x0 0x400>; reg-io-width = <4>; @@ -234,11 +227,11 @@ interrupt-parent = <&plic>; interrupts = <92>; current-speed = <115200>; - clocks = <&clkcfg 10>; + clocks = <&clkcfg CLK_MMUART2>; status = "disabled"; }; - serial3: serial@20104000 { + mmuart3: serial@20104000 { compatible = "ns16550a"; reg = <0x0 0x20104000 0x0 0x400>; reg-io-width = <4>; @@ -246,7 +239,19 @@ interrupt-parent = <&plic>; interrupts = <93>; current-speed = <115200>; - clocks = <&clkcfg 11>; + clocks = <&clkcfg CLK_MMUART3>; + status = "disabled"; + }; + + mmuart4: serial@20106000 { + compatible = "ns16550a"; + reg = <0x0 0x20106000 0x0 0x400>; + reg-io-width = <4>; + reg-shift = <2>; + interrupt-parent = <&plic>; + interrupts = <94>; + clocks = <&clkcfg CLK_MMUART4>; + current-speed = <115200>; status = "disabled"; }; @@ -255,37 +260,196 @@ compatible = "microchip,mpfs-sd4hc", "cdns,sd4hc"; reg = <0x0 0x20008000 0x0 0x1000>; interrupt-parent = <&plic>; - interrupts = <88>, <89>; - clocks = <&clkcfg 6>; + interrupts = <88>; + clocks = <&clkcfg CLK_MMC>; max-frequency = <200000000>; status = "disabled"; }; - emac0: ethernet@20110000 { + spi0: spi@20108000 { + compatible = "microchip,mpfs-spi"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x0 0x20108000 0x0 0x1000>; + interrupt-parent = <&plic>; + interrupts = <54>; + clocks = <&clkcfg CLK_SPI0>; + spi-max-frequency = <25000000>; + status = "disabled"; + }; + + spi1: spi@20109000 { + compatible = "microchip,mpfs-spi"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x0 0x20109000 0x0 0x1000>; + interrupt-parent = <&plic>; + interrupts = <55>; + clocks = <&clkcfg CLK_SPI1>; + spi-max-frequency = <25000000>; + status = "disabled"; + }; + + qspi: spi@21000000 { + compatible = "microchip,mpfs-qspi"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x0 0x21000000 0x0 0x1000>; + interrupt-parent = <&plic>; + interrupts = <85>; + clocks = <&clkcfg CLK_QSPI>; + spi-max-frequency = <25000000>; + status = "disabled"; + }; + + i2c0: i2c@2010a000 { + compatible = "microchip,mpfs-i2c", "microchip,corei2c-rtl-v7"; + reg = <0x0 0x2010a000 0x0 0x1000>; + #address-cells = <1>; + #size-cells = <0>; + interrupt-parent = <&plic>; + interrupts = <58>; + clocks = <&clkcfg CLK_I2C0>; + clock-frequency = <100000>; + status = "disabled"; + }; + + i2c1: i2c@2010b000 { + compatible = "microchip,mpfs-i2c", "microchip,corei2c-rtl-v7"; + reg = <0x0 0x2010b000 0x0 0x1000>; + #address-cells = <1>; + #size-cells = <0>; + interrupt-parent = <&plic>; + interrupts = <61>; + clocks = <&clkcfg CLK_I2C1>; + clock-frequency = <100000>; + status = "disabled"; + }; + + mac0: ethernet@20110000 { compatible = "cdns,macb"; reg = <0x0 0x20110000 0x0 0x2000>; - interrupt-parent = <&plic>; - interrupts = <64>, <65>, <66>, <67>; - local-mac-address = [00 00 00 00 00 00]; - clocks = <&clkcfg 4>, <&clkcfg 2>; - clock-names = "pclk", "hclk"; - status = "disabled"; #address-cells = <1>; #size-cells = <0>; + interrupt-parent = <&plic>; + interrupts = <64>, <65>, <66>, <67>, <68>, <69>; + local-mac-address = [00 00 00 00 00 00]; + clocks = <&clkcfg CLK_MAC0>, <&clkcfg CLK_AHB>; + clock-names = "pclk", "hclk"; + status = "disabled"; }; - emac1: ethernet@20112000 { + mac1: ethernet@20112000 { compatible = "cdns,macb"; reg = <0x0 0x20112000 0x0 0x2000>; - interrupt-parent = <&plic>; - interrupts = <70>, <71>, <72>, <73>; - local-mac-address = [00 00 00 00 00 00]; - clocks = <&clkcfg 5>, <&clkcfg 2>; - status = "disabled"; - clock-names = "pclk", "hclk"; #address-cells = <1>; #size-cells = <0>; + interrupt-parent = <&plic>; + interrupts = <70>, <71>, <72>, <73>, <74>, <75>; + local-mac-address = [00 00 00 00 00 00]; + clocks = <&clkcfg CLK_MAC1>, <&clkcfg CLK_AHB>; + clock-names = "pclk", "hclk"; + status = "disabled"; }; + gpio0: gpio@20120000 { + compatible = "microchip,mpfs-gpio"; + reg = <0x0 0x20120000 0x0 0x1000>; + interrupt-parent = <&plic>; + interrupt-controller; + #interrupt-cells = <1>; + clocks = <&clkcfg CLK_GPIO0>; + gpio-controller; + #gpio-cells = <2>; + status = "disabled"; + }; + + gpio1: gpio@20121000 { + compatible = "microchip,mpfs-gpio"; + reg = <000 0x20121000 0x0 0x1000>; + interrupt-parent = <&plic>; + interrupt-controller; + #interrupt-cells = <1>; + clocks = <&clkcfg CLK_GPIO1>; + gpio-controller; + #gpio-cells = <2>; + status = "disabled"; + }; + + gpio2: gpio@20122000 { + compatible = "microchip,mpfs-gpio"; + reg = <0x0 0x20122000 0x0 0x1000>; + interrupt-parent = <&plic>; + interrupt-controller; + #interrupt-cells = <1>; + clocks = <&clkcfg CLK_GPIO2>; + gpio-controller; + #gpio-cells = <2>; + status = "disabled"; + }; + + rtc: rtc@20124000 { + compatible = "microchip,mpfs-rtc"; + reg = <0x0 0x20124000 0x0 0x1000>; + interrupt-parent = <&plic>; + interrupts = <80>, <81>; + clocks = <&clkcfg CLK_RTC>; + clock-names = "rtc"; + status = "disabled"; + }; + + usb: usb@20201000 { + compatible = "microchip,mpfs-musb"; + reg = <0x0 0x20201000 0x0 0x1000>; + interrupt-parent = <&plic>; + interrupts = <86>, <87>; + clocks = <&clkcfg CLK_USB>; + interrupt-names = "dma","mc"; + status = "disabled"; + }; + + pcie: pcie@2000000000 { + compatible = "microchip,pcie-host-1.0"; + #address-cells = <0x3>; + #interrupt-cells = <0x1>; + #size-cells = <0x2>; + device_type = "pci"; + reg = <0x20 0x0 0x0 0x8000000>, <0x0 0x43000000 0x0 0x10000>; + reg-names = "cfg", "apb"; + bus-range = <0x0 0x7f>; + interrupt-parent = <&plic>; + interrupts = <119>; + interrupt-map = <0 0 0 1 &pcie_intc 0>, + <0 0 0 2 &pcie_intc 1>, + <0 0 0 3 &pcie_intc 2>, + <0 0 0 4 &pcie_intc 3>; + interrupt-map-mask = <0 0 0 7>; + clocks = <&clkcfg CLK_FIC0>, <&clkcfg CLK_FIC1>, <&clkcfg CLK_FIC3>; + clock-names = "fic0", "fic1", "fic3"; + ranges = <0x3000000 0x0 0x8000000 0x20 0x8000000 0x0 0x80000000>; + msi-parent = <&pcie>; + msi-controller; + microchip,axi-m-atr0 = <0x10 0x0>; + status = "disabled"; + pcie_intc: legacy-interrupt-controller { + #address-cells = <0>; + #interrupt-cells = <1>; + interrupt-controller; + }; + }; + + mbox: mailbox@37020000 { + compatible = "microchip,mpfs-mailbox"; + reg = <0x0 0x37020000 0x0 0x1000>, <0x0 0x2000318C 0x0 0x40>; + interrupt-parent = <&plic>; + interrupts = <96>; + #mbox-cells = <1>; + status = "disabled"; + }; + + syscontroller: syscontroller { + compatible = "microchip,mpfs-sys-controller"; + mboxes = <&mbox 0>; + }; }; }; diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index f120fcc43d0a..7cd10ded7bf8 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -108,6 +108,7 @@ CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_NFS_V4_1=y diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig index 8b56a7f1eb06..e0e5c7c09ab8 100644 --- a/arch/riscv/configs/rv32_defconfig +++ b/arch/riscv/configs/rv32_defconfig @@ -100,6 +100,7 @@ CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_NFS_V4_1=y diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index ae711692eec9..e935f27b10fd 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -47,6 +47,7 @@ #define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL) #define SATP_MODE_39 _AC(0x8000000000000000, UL) #define SATP_MODE_48 _AC(0x9000000000000000, UL) +#define SATP_MODE_57 _AC(0xa000000000000000, UL) #define SATP_ASID_BITS 16 #define SATP_ASID_SHIFT 44 #define SATP_ASID_MASK _AC(0xFFFF, UL) @@ -65,6 +66,7 @@ #define IRQ_S_EXT 9 #define IRQ_VS_EXT 10 #define IRQ_M_EXT 11 +#define IRQ_PMU_OVF 13 /* Exception causes */ #define EXC_INST_MISALIGNED 0 @@ -150,9 +152,69 @@ #define CSR_CYCLE 0xc00 #define CSR_TIME 0xc01 #define CSR_INSTRET 0xc02 +#define CSR_HPMCOUNTER3 0xc03 +#define CSR_HPMCOUNTER4 0xc04 +#define CSR_HPMCOUNTER5 0xc05 +#define CSR_HPMCOUNTER6 0xc06 +#define CSR_HPMCOUNTER7 0xc07 +#define CSR_HPMCOUNTER8 0xc08 +#define CSR_HPMCOUNTER9 0xc09 +#define CSR_HPMCOUNTER10 0xc0a +#define CSR_HPMCOUNTER11 0xc0b +#define CSR_HPMCOUNTER12 0xc0c +#define CSR_HPMCOUNTER13 0xc0d +#define CSR_HPMCOUNTER14 0xc0e +#define CSR_HPMCOUNTER15 0xc0f +#define CSR_HPMCOUNTER16 0xc10 +#define CSR_HPMCOUNTER17 0xc11 +#define CSR_HPMCOUNTER18 0xc12 +#define CSR_HPMCOUNTER19 0xc13 +#define CSR_HPMCOUNTER20 0xc14 +#define CSR_HPMCOUNTER21 0xc15 +#define CSR_HPMCOUNTER22 0xc16 +#define CSR_HPMCOUNTER23 0xc17 +#define CSR_HPMCOUNTER24 0xc18 +#define CSR_HPMCOUNTER25 0xc19 +#define CSR_HPMCOUNTER26 0xc1a +#define CSR_HPMCOUNTER27 0xc1b +#define CSR_HPMCOUNTER28 0xc1c +#define CSR_HPMCOUNTER29 0xc1d +#define CSR_HPMCOUNTER30 0xc1e +#define CSR_HPMCOUNTER31 0xc1f #define CSR_CYCLEH 0xc80 #define CSR_TIMEH 0xc81 #define CSR_INSTRETH 0xc82 +#define CSR_HPMCOUNTER3H 0xc83 +#define CSR_HPMCOUNTER4H 0xc84 +#define CSR_HPMCOUNTER5H 0xc85 +#define CSR_HPMCOUNTER6H 0xc86 +#define CSR_HPMCOUNTER7H 0xc87 +#define CSR_HPMCOUNTER8H 0xc88 +#define CSR_HPMCOUNTER9H 0xc89 +#define CSR_HPMCOUNTER10H 0xc8a +#define CSR_HPMCOUNTER11H 0xc8b +#define CSR_HPMCOUNTER12H 0xc8c +#define CSR_HPMCOUNTER13H 0xc8d +#define CSR_HPMCOUNTER14H 0xc8e +#define CSR_HPMCOUNTER15H 0xc8f +#define CSR_HPMCOUNTER16H 0xc90 +#define CSR_HPMCOUNTER17H 0xc91 +#define CSR_HPMCOUNTER18H 0xc92 +#define CSR_HPMCOUNTER19H 0xc93 +#define CSR_HPMCOUNTER20H 0xc94 +#define CSR_HPMCOUNTER21H 0xc95 +#define CSR_HPMCOUNTER22H 0xc96 +#define CSR_HPMCOUNTER23H 0xc97 +#define CSR_HPMCOUNTER24H 0xc98 +#define CSR_HPMCOUNTER25H 0xc99 +#define CSR_HPMCOUNTER26H 0xc9a +#define CSR_HPMCOUNTER27H 0xc9b +#define CSR_HPMCOUNTER28H 0xc9c +#define CSR_HPMCOUNTER29H 0xc9d +#define CSR_HPMCOUNTER30H 0xc9e +#define CSR_HPMCOUNTER31H 0xc9f + +#define CSR_SSCOUNTOVF 0xda0 #define CSR_SSTATUS 0x100 #define CSR_SIE 0x104 @@ -240,7 +302,10 @@ # define RV_IRQ_SOFT IRQ_S_SOFT # define RV_IRQ_TIMER IRQ_S_TIMER # define RV_IRQ_EXT IRQ_S_EXT -#endif /* CONFIG_RISCV_M_MODE */ +# define RV_IRQ_PMU IRQ_PMU_OVF +# define SIP_LCOFIP (_AC(0x1, UL) << IRQ_PMU_OVF) + +#endif /* !CONFIG_RISCV_M_MODE */ /* IE/IP (Supervisor/Machine Interrupt Enable/Pending) flags */ #define IE_SIE (_AC(0x1, UL) << RV_IRQ_SOFT) diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h index 58a718573ad6..3cfece8b6568 100644 --- a/arch/riscv/include/asm/fixmap.h +++ b/arch/riscv/include/asm/fixmap.h @@ -25,6 +25,7 @@ enum fixed_addresses { FIX_PTE, FIX_PMD, FIX_PUD, + FIX_P4D, FIX_TEXT_POKE1, FIX_TEXT_POKE0, FIX_EARLYCON_MEM_BASE, diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index 5ce50468aff1..0734e42f74f2 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -34,7 +34,33 @@ extern unsigned long elf_hwcap; #define RISCV_ISA_EXT_s ('s' - 'a') #define RISCV_ISA_EXT_u ('u' - 'a') +/* + * Increse this to higher value as kernel support more ISA extensions. + */ #define RISCV_ISA_EXT_MAX 64 +#define RISCV_ISA_EXT_NAME_LEN_MAX 32 + +/* The base ID for multi-letter ISA extensions */ +#define RISCV_ISA_EXT_BASE 26 + +/* + * This enum represent the logical ID for each multi-letter RISC-V ISA extension. + * The logical ID should start from RISCV_ISA_EXT_BASE and must not exceed + * RISCV_ISA_EXT_MAX. 0-25 range is reserved for single letter + * extensions while all the multi-letter extensions should define the next + * available logical extension id. + */ +enum riscv_isa_ext_id { + RISCV_ISA_EXT_SSCOFPMF = RISCV_ISA_EXT_BASE, + RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX, +}; + +struct riscv_isa_ext_data { + /* Name of the extension displayed to userspace via /proc/cpuinfo */ + char uprop[RISCV_ISA_EXT_NAME_LEN_MAX]; + /* The logical ISA extension ID */ + unsigned int isa_ext_id; +}; unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap); diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 004372f8da54..1526e410e802 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -41,6 +41,7 @@ * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so * define the PAGE_OFFSET value for SV39. */ +#define PAGE_OFFSET_L4 _AC(0xffffaf8000000000, UL) #define PAGE_OFFSET_L3 _AC(0xffffffd800000000, UL) #else #define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h index 062efd3a1d5d..d42c901f9a97 100644 --- a/arch/riscv/include/asm/perf_event.h +++ b/arch/riscv/include/asm/perf_event.h @@ -9,77 +9,5 @@ #define _ASM_RISCV_PERF_EVENT_H #include -#include -#include - -#ifdef CONFIG_RISCV_BASE_PMU -#define RISCV_BASE_COUNTERS 2 - -/* - * The RISCV_MAX_COUNTERS parameter should be specified. - */ - -#define RISCV_MAX_COUNTERS 2 - -/* - * These are the indexes of bits in counteren register *minus* 1, - * except for cycle. It would be coherent if it can directly mapped - * to counteren bit definition, but there is a *time* register at - * counteren[1]. Per-cpu structure is scarce resource here. - * - * According to the spec, an implementation can support counter up to - * mhpmcounter31, but many high-end processors has at most 6 general - * PMCs, we give the definition to MHPMCOUNTER8 here. - */ -#define RISCV_PMU_CYCLE 0 -#define RISCV_PMU_INSTRET 1 -#define RISCV_PMU_MHPMCOUNTER3 2 -#define RISCV_PMU_MHPMCOUNTER4 3 -#define RISCV_PMU_MHPMCOUNTER5 4 -#define RISCV_PMU_MHPMCOUNTER6 5 -#define RISCV_PMU_MHPMCOUNTER7 6 -#define RISCV_PMU_MHPMCOUNTER8 7 - -#define RISCV_OP_UNSUPP (-EOPNOTSUPP) - -struct cpu_hw_events { - /* # currently enabled events*/ - int n_events; - /* currently enabled events */ - struct perf_event *events[RISCV_MAX_COUNTERS]; - /* vendor-defined PMU data */ - void *platform; -}; - -struct riscv_pmu { - struct pmu *pmu; - - /* generic hw/cache events table */ - const int *hw_events; - const int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX]; - /* method used to map hw/cache events */ - int (*map_hw_event)(u64 config); - int (*map_cache_event)(u64 config); - - /* max generic hw events in map */ - int max_events; - /* number total counters, 2(base) + x(general) */ - int num_counters; - /* the width of the counter */ - int counter_width; - - /* vendor-defined PMU features */ - void *platform; - - irqreturn_t (*handle_irq)(int irq_num, void *dev); - int irq; -}; - -#endif -#ifdef CONFIG_PERF_EVENTS #define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs -#endif - #endif /* _ASM_RISCV_PERF_EVENT_H */ diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index 11823004b87a..947f23d7b6af 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -59,6 +59,26 @@ static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d, } } +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) +{ + if (pgtable_l5_enabled) { + unsigned long pfn = virt_to_pfn(p4d); + + set_pgd(pgd, __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE)); + } +} + +static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd, + p4d_t *p4d) +{ + if (pgtable_l5_enabled) { + unsigned long pfn = virt_to_pfn(p4d); + + set_pgd_safe(pgd, + __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE)); + } +} + #define pud_alloc_one pud_alloc_one static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { @@ -76,6 +96,35 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) } #define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud) + +#define p4d_alloc_one p4d_alloc_one +static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + if (pgtable_l5_enabled) { + gfp_t gfp = GFP_PGTABLE_USER; + + if (mm == &init_mm) + gfp = GFP_PGTABLE_KERNEL; + return (p4d_t *)get_zeroed_page(gfp); + } + + return NULL; +} + +static inline void __p4d_free(struct mm_struct *mm, p4d_t *p4d) +{ + BUG_ON((unsigned long)p4d & (PAGE_SIZE-1)); + free_page((unsigned long)p4d); +} + +#define p4d_free p4d_free +static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) +{ + if (pgtable_l5_enabled) + __p4d_free(mm, p4d); +} + +#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d) #endif /* __PAGETABLE_PMD_FOLDED */ static inline pgd_t *pgd_alloc(struct mm_struct *mm) diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index bbbdd66e5e2f..7e246e9f8d70 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -9,16 +9,24 @@ #include extern bool pgtable_l4_enabled; +extern bool pgtable_l5_enabled; #define PGDIR_SHIFT_L3 30 #define PGDIR_SHIFT_L4 39 +#define PGDIR_SHIFT_L5 48 #define PGDIR_SIZE_L3 (_AC(1, UL) << PGDIR_SHIFT_L3) -#define PGDIR_SHIFT (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3) +#define PGDIR_SHIFT (pgtable_l5_enabled ? PGDIR_SHIFT_L5 : \ + (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3)) /* Size of region mapped by a page global directory */ #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) +/* p4d is folded into pgd in case of 4-level page table */ +#define P4D_SHIFT 39 +#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) +#define P4D_MASK (~(P4D_SIZE - 1)) + /* pud is folded into pgd in case of 3-level page table */ #define PUD_SHIFT 30 #define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) @@ -29,6 +37,15 @@ extern bool pgtable_l4_enabled; #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE - 1)) +/* Page 4th Directory entry */ +typedef struct { + unsigned long p4d; +} p4d_t; + +#define p4d_val(x) ((x).p4d) +#define __p4d(x) ((p4d_t) { (x) }) +#define PTRS_PER_P4D (PAGE_SIZE / sizeof(p4d_t)) + /* Page Upper Directory entry */ typedef struct { unsigned long pud; @@ -99,6 +116,15 @@ static inline struct page *pud_page(pud_t pud) return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT); } +#define mm_p4d_folded mm_p4d_folded +static inline bool mm_p4d_folded(struct mm_struct *mm) +{ + if (pgtable_l5_enabled) + return false; + + return true; +} + #define mm_pud_folded mm_pud_folded static inline bool mm_pud_folded(struct mm_struct *mm) { @@ -128,6 +154,9 @@ static inline unsigned long _pmd_pfn(pmd_t pmd) #define pud_ERROR(e) \ pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e)) +#define p4d_ERROR(e) \ + pr_err("%s:%d: bad p4d %016lx.\n", __FILE__, __LINE__, p4d_val(e)) + static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) { if (pgtable_l4_enabled) @@ -166,6 +195,16 @@ static inline void p4d_clear(p4d_t *p4d) set_p4d(p4d, __p4d(0)); } +static inline p4d_t pfn_p4d(unsigned long pfn, pgprot_t prot) +{ + return __p4d((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot)); +} + +static inline unsigned long _p4d_pfn(p4d_t p4d) +{ + return p4d_val(p4d) >> _PAGE_PFN_SHIFT; +} + static inline pud_t *p4d_pgtable(p4d_t p4d) { if (pgtable_l4_enabled) @@ -173,6 +212,7 @@ static inline pud_t *p4d_pgtable(p4d_t p4d) return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) }); } +#define p4d_page_vaddr(p4d) ((unsigned long)p4d_pgtable(p4d)) static inline struct page *p4d_page(p4d_t p4d) { @@ -190,4 +230,68 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) return (pud_t *)p4d; } +static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) +{ + if (pgtable_l5_enabled) + *pgdp = pgd; + else + set_p4d((p4d_t *)pgdp, (p4d_t){ pgd_val(pgd) }); +} + +static inline int pgd_none(pgd_t pgd) +{ + if (pgtable_l5_enabled) + return (pgd_val(pgd) == 0); + + return 0; +} + +static inline int pgd_present(pgd_t pgd) +{ + if (pgtable_l5_enabled) + return (pgd_val(pgd) & _PAGE_PRESENT); + + return 1; +} + +static inline int pgd_bad(pgd_t pgd) +{ + if (pgtable_l5_enabled) + return !pgd_present(pgd); + + return 0; +} + +static inline void pgd_clear(pgd_t *pgd) +{ + if (pgtable_l5_enabled) + set_pgd(pgd, __pgd(0)); +} + +static inline p4d_t *pgd_pgtable(pgd_t pgd) +{ + if (pgtable_l5_enabled) + return (p4d_t *)pfn_to_virt(pgd_val(pgd) >> _PAGE_PFN_SHIFT); + + return (p4d_t *)p4d_pgtable((p4d_t) { pgd_val(pgd) }); +} +#define pgd_page_vaddr(pgd) ((unsigned long)pgd_pgtable(pgd)) + +static inline struct page *pgd_page(pgd_t pgd) +{ + return pfn_to_page(pgd_val(pgd) >> _PAGE_PFN_SHIFT); +} +#define pgd_page(pgd) pgd_page(pgd) + +#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1)) + +#define p4d_offset p4d_offset +static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) +{ + if (pgtable_l5_enabled) + return pgd_pgtable(*pgd) + p4d_index(address); + + return (p4d_t *)pgd; +} + #endif /* _ASM_RISCV_PGTABLE_64_H */ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index e3549e50de95..046b44225623 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -63,7 +63,8 @@ * position vmemmap directly below the VMALLOC region. */ #ifdef CONFIG_64BIT -#define VA_BITS (pgtable_l4_enabled ? 48 : 39) +#define VA_BITS (pgtable_l5_enabled ? \ + 57 : (pgtable_l4_enabled ? 48 : 39)) #else #define VA_BITS 32 #endif @@ -103,7 +104,6 @@ #ifndef __ASSEMBLY__ -#include #include #include #include @@ -134,6 +134,8 @@ struct pt_alloc_ops { phys_addr_t (*alloc_pmd)(uintptr_t va); pud_t *(*get_pud_virt)(phys_addr_t pa); phys_addr_t (*alloc_pud)(uintptr_t va); + p4d_t *(*get_p4d_virt)(phys_addr_t pa); + phys_addr_t (*alloc_p4d)(uintptr_t va); #endif }; diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 06133b4f8e20..9e3c2cf1edaf 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -29,6 +29,7 @@ enum sbi_ext_id { SBI_EXT_RFENCE = 0x52464E43, SBI_EXT_HSM = 0x48534D, SBI_EXT_SRST = 0x53525354, + SBI_EXT_PMU = 0x504D55, /* Experimentals extensions must lie within this range */ SBI_EXT_EXPERIMENTAL_START = 0x08000000, @@ -112,6 +113,98 @@ enum sbi_srst_reset_reason { SBI_SRST_RESET_REASON_SYS_FAILURE, }; +enum sbi_ext_pmu_fid { + SBI_EXT_PMU_NUM_COUNTERS = 0, + SBI_EXT_PMU_COUNTER_GET_INFO, + SBI_EXT_PMU_COUNTER_CFG_MATCH, + SBI_EXT_PMU_COUNTER_START, + SBI_EXT_PMU_COUNTER_STOP, + SBI_EXT_PMU_COUNTER_FW_READ, +}; + +#define RISCV_PMU_RAW_EVENT_MASK GENMASK_ULL(55, 0) +#define RISCV_PMU_RAW_EVENT_IDX 0x20000 + +/** General pmu event codes specified in SBI PMU extension */ +enum sbi_pmu_hw_generic_events_t { + SBI_PMU_HW_NO_EVENT = 0, + SBI_PMU_HW_CPU_CYCLES = 1, + SBI_PMU_HW_INSTRUCTIONS = 2, + SBI_PMU_HW_CACHE_REFERENCES = 3, + SBI_PMU_HW_CACHE_MISSES = 4, + SBI_PMU_HW_BRANCH_INSTRUCTIONS = 5, + SBI_PMU_HW_BRANCH_MISSES = 6, + SBI_PMU_HW_BUS_CYCLES = 7, + SBI_PMU_HW_STALLED_CYCLES_FRONTEND = 8, + SBI_PMU_HW_STALLED_CYCLES_BACKEND = 9, + SBI_PMU_HW_REF_CPU_CYCLES = 10, + + SBI_PMU_HW_GENERAL_MAX, +}; + +/** + * Special "firmware" events provided by the firmware, even if the hardware + * does not support performance events. These events are encoded as a raw + * event type in Linux kernel perf framework. + */ +enum sbi_pmu_fw_generic_events_t { + SBI_PMU_FW_MISALIGNED_LOAD = 0, + SBI_PMU_FW_MISALIGNED_STORE = 1, + SBI_PMU_FW_ACCESS_LOAD = 2, + SBI_PMU_FW_ACCESS_STORE = 3, + SBI_PMU_FW_ILLEGAL_INSN = 4, + SBI_PMU_FW_SET_TIMER = 5, + SBI_PMU_FW_IPI_SENT = 6, + SBI_PMU_FW_IPI_RECVD = 7, + SBI_PMU_FW_FENCE_I_SENT = 8, + SBI_PMU_FW_FENCE_I_RECVD = 9, + SBI_PMU_FW_SFENCE_VMA_SENT = 10, + SBI_PMU_FW_SFENCE_VMA_RCVD = 11, + SBI_PMU_FW_SFENCE_VMA_ASID_SENT = 12, + SBI_PMU_FW_SFENCE_VMA_ASID_RCVD = 13, + + SBI_PMU_FW_HFENCE_GVMA_SENT = 14, + SBI_PMU_FW_HFENCE_GVMA_RCVD = 15, + SBI_PMU_FW_HFENCE_GVMA_VMID_SENT = 16, + SBI_PMU_FW_HFENCE_GVMA_VMID_RCVD = 17, + + SBI_PMU_FW_HFENCE_VVMA_SENT = 18, + SBI_PMU_FW_HFENCE_VVMA_RCVD = 19, + SBI_PMU_FW_HFENCE_VVMA_ASID_SENT = 20, + SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD = 21, + SBI_PMU_FW_MAX, +}; + +/* SBI PMU event types */ +enum sbi_pmu_event_type { + SBI_PMU_EVENT_TYPE_HW = 0x0, + SBI_PMU_EVENT_TYPE_CACHE = 0x1, + SBI_PMU_EVENT_TYPE_RAW = 0x2, + SBI_PMU_EVENT_TYPE_FW = 0xf, +}; + +/* SBI PMU event types */ +enum sbi_pmu_ctr_type { + SBI_PMU_CTR_TYPE_HW = 0x0, + SBI_PMU_CTR_TYPE_FW, +}; + +/* Flags defined for config matching function */ +#define SBI_PMU_CFG_FLAG_SKIP_MATCH (1 << 0) +#define SBI_PMU_CFG_FLAG_CLEAR_VALUE (1 << 1) +#define SBI_PMU_CFG_FLAG_AUTO_START (1 << 2) +#define SBI_PMU_CFG_FLAG_SET_VUINH (1 << 3) +#define SBI_PMU_CFG_FLAG_SET_VSNH (1 << 4) +#define SBI_PMU_CFG_FLAG_SET_UINH (1 << 5) +#define SBI_PMU_CFG_FLAG_SET_SINH (1 << 6) +#define SBI_PMU_CFG_FLAG_SET_MINH (1 << 7) + +/* Flags defined for counter start function */ +#define SBI_PMU_START_FLAG_SET_INIT_VALUE (1 << 0) + +/* Flags defined for counter stop function */ +#define SBI_PMU_STOP_FLAG_RESET (1 << 0) + #define SBI_SPEC_VERSION_DEFAULT 0x1 #define SBI_SPEC_VERSION_MAJOR_SHIFT 24 #define SBI_SPEC_VERSION_MAJOR_MASK 0x7f @@ -125,6 +218,8 @@ enum sbi_srst_reset_reason { #define SBI_ERR_DENIED -4 #define SBI_ERR_INVALID_ADDRESS -5 #define SBI_ERR_ALREADY_AVAILABLE -6 +#define SBI_ERR_ALREADY_STARTED -7 +#define SBI_ERR_ALREADY_STOPPED -8 extern unsigned long sbi_spec_version; struct sbiret { diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index ffc87e76b1dd..e0133d113216 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -53,7 +53,6 @@ obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o obj-$(CONFIG_TRACE_IRQFLAGS) += trace_irq.o -obj-$(CONFIG_RISCV_BASE_PMU) += perf_event.o obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o obj-$(CONFIG_RISCV_SBI) += sbi.o diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index ad0a7e9f828b..d2a936195295 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -63,12 +64,73 @@ int riscv_of_parent_hartid(struct device_node *node) } #ifdef CONFIG_PROC_FS +#define __RISCV_ISA_EXT_DATA(UPROP, EXTID) \ + { \ + .uprop = #UPROP, \ + .isa_ext_id = EXTID, \ + } +/** + * Here are the ordering rules of extension naming defined by RISC-V + * specification : + * 1. All extensions should be separated from other multi-letter extensions + * from other multi-letter extensions by an underscore. + * 2. The first letter following the 'Z' conventionally indicates the most + * closely related alphabetical extension category, IMAFDQLCBKJTPVH. + * If multiple 'Z' extensions are named, they should be ordered first + * by category, then alphabetically within a category. + * 3. Standard supervisor-level extensions (starts with 'S') should be + * listed after standard unprivileged extensions. If multiple + * supervisor-level extensions are listed, they should be ordered + * alphabetically. + * 4. Non-standard extensions (starts with 'X') must be listed after all + * standard extensions. They must be separated from other multi-letter + * extensions by an underscore. + */ +static struct riscv_isa_ext_data isa_ext_arr[] = { + __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF), + __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX), +}; + +static void print_isa_ext(struct seq_file *f) +{ + struct riscv_isa_ext_data *edata; + int i = 0, arr_sz; + + arr_sz = ARRAY_SIZE(isa_ext_arr) - 1; + + /* No extension support available */ + if (arr_sz <= 0) + return; + + for (i = 0; i <= arr_sz; i++) { + edata = &isa_ext_arr[i]; + if (!__riscv_isa_extension_available(NULL, edata->isa_ext_id)) + continue; + seq_printf(f, "_%s", edata->uprop); + } +} + +/** + * These are the only valid base (single letter) ISA extensions as per the spec. + * It also specifies the canonical order in which it appears in the spec. + * Some of the extension may just be a place holder for now (B, K, P, J). + * This should be updated once corresponding extensions are ratified. + */ +static const char base_riscv_exts[13] = "imafdqcbkjpvh"; static void print_isa(struct seq_file *f, const char *isa) { - /* Print the entire ISA as it is */ + int i; + seq_puts(f, "isa\t\t: "); - seq_write(f, isa, strlen(isa)); + /* Print the rv[64/32] part */ + seq_write(f, isa, 4); + for (i = 0; i < sizeof(base_riscv_exts); i++) { + if (__riscv_isa_extension_available(NULL, base_riscv_exts[i] - 'a')) + /* Print only enabled the base ISA extensions */ + seq_write(f, &base_riscv_exts[i], 1); + } + print_isa_ext(f); seq_puts(f, "\n"); } @@ -79,7 +141,9 @@ static void print_mmu(struct seq_file *f) #if defined(CONFIG_32BIT) strncpy(sv_type, "sv32", 5); #elif defined(CONFIG_64BIT) - if (pgtable_l4_enabled) + if (pgtable_l5_enabled) + strncpy(sv_type, "sv57", 5); + else if (pgtable_l4_enabled) strncpy(sv_type, "sv48", 5); else strncpy(sv_type, "sv39", 5); diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index d959d207a40d..1b2d42d7f589 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -7,12 +7,15 @@ */ #include +#include #include #include #include #include #include +#define NUM_ALPHA_EXTS ('z' - 'a' + 1) + unsigned long elf_hwcap __read_mostly; /* Host ISA bitmap */ @@ -63,8 +66,8 @@ void __init riscv_fill_hwcap(void) { struct device_node *node; const char *isa; - char print_str[BITS_PER_LONG + 1]; - size_t i, j, isa_len; + char print_str[NUM_ALPHA_EXTS + 1]; + int i, j; static unsigned long isa2hwcap[256] = {0}; isa2hwcap['i'] = isa2hwcap['I'] = COMPAT_HWCAP_ISA_I; @@ -80,7 +83,8 @@ void __init riscv_fill_hwcap(void) for_each_of_cpu_node(node) { unsigned long this_hwcap = 0; - unsigned long this_isa = 0; + DECLARE_BITMAP(this_isa, RISCV_ISA_EXT_MAX); + const char *temp; if (riscv_of_processor_hartid(node) < 0) continue; @@ -90,23 +94,106 @@ void __init riscv_fill_hwcap(void) continue; } - i = 0; - isa_len = strlen(isa); + temp = isa; #if IS_ENABLED(CONFIG_32BIT) if (!strncmp(isa, "rv32", 4)) - i += 4; + isa += 4; #elif IS_ENABLED(CONFIG_64BIT) if (!strncmp(isa, "rv64", 4)) - i += 4; + isa += 4; #endif - for (; i < isa_len; ++i) { - this_hwcap |= isa2hwcap[(unsigned char)(isa[i])]; - /* - * TODO: X, Y and Z extension parsing for Host ISA - * bitmap will be added in-future. - */ - if ('a' <= isa[i] && isa[i] < 'x') - this_isa |= (1UL << (isa[i] - 'a')); + /* The riscv,isa DT property must start with rv64 or rv32 */ + if (temp == isa) + continue; + bitmap_zero(this_isa, RISCV_ISA_EXT_MAX); + for (; *isa; ++isa) { + const char *ext = isa++; + const char *ext_end = isa; + bool ext_long = false, ext_err = false; + + switch (*ext) { + case 's': + /** + * Workaround for invalid single-letter 's' & 'u'(QEMU). + * No need to set the bit in riscv_isa as 's' & 'u' are + * not valid ISA extensions. It works until multi-letter + * extension starting with "Su" appears. + */ + if (ext[-1] != '_' && ext[1] == 'u') { + ++isa; + ext_err = true; + break; + } + fallthrough; + case 'x': + case 'z': + ext_long = true; + /* Multi-letter extension must be delimited */ + for (; *isa && *isa != '_'; ++isa) + if (unlikely(!islower(*isa) + && !isdigit(*isa))) + ext_err = true; + /* Parse backwards */ + ext_end = isa; + if (unlikely(ext_err)) + break; + if (!isdigit(ext_end[-1])) + break; + /* Skip the minor version */ + while (isdigit(*--ext_end)) + ; + if (ext_end[0] != 'p' + || !isdigit(ext_end[-1])) { + /* Advance it to offset the pre-decrement */ + ++ext_end; + break; + } + /* Skip the major version */ + while (isdigit(*--ext_end)) + ; + ++ext_end; + break; + default: + if (unlikely(!islower(*ext))) { + ext_err = true; + break; + } + /* Find next extension */ + if (!isdigit(*isa)) + break; + /* Skip the minor version */ + while (isdigit(*++isa)) + ; + if (*isa != 'p') + break; + if (!isdigit(*++isa)) { + --isa; + break; + } + /* Skip the major version */ + while (isdigit(*++isa)) + ; + break; + } + if (*isa != '_') + --isa; + +#define SET_ISA_EXT_MAP(name, bit) \ + do { \ + if ((ext_end - ext == sizeof(name) - 1) && \ + !memcmp(ext, name, sizeof(name) - 1)) \ + set_bit(bit, this_isa); \ + } while (false) \ + + if (unlikely(ext_err)) + continue; + if (!ext_long) { + this_hwcap |= isa2hwcap[(unsigned char)(*ext)]; + set_bit(*ext - 'a', this_isa); + } else { + SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF); + } +#undef SET_ISA_EXT_MAP } /* @@ -119,10 +206,11 @@ void __init riscv_fill_hwcap(void) else elf_hwcap = this_hwcap; - if (riscv_isa[0]) - riscv_isa[0] &= this_isa; + if (bitmap_weight(riscv_isa, RISCV_ISA_EXT_MAX)) + bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX); else - riscv_isa[0] = this_isa; + bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX); + } /* We don't support systems with F but without D, so mask those out @@ -133,13 +221,13 @@ void __init riscv_fill_hwcap(void) } memset(print_str, 0, sizeof(print_str)); - for (i = 0, j = 0; i < BITS_PER_LONG; i++) + for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++) if (riscv_isa[0] & BIT_MASK(i)) print_str[j++] = (char)('a' + i); - pr_info("riscv: ISA extensions %s\n", print_str); + pr_info("riscv: base ISA extensions %s\n", print_str); memset(print_str, 0, sizeof(print_str)); - for (i = 0, j = 0; i < BITS_PER_LONG; i++) + for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++) if (elf_hwcap & BIT_MASK(i)) print_str[j++] = (char)('a' + i); pr_info("riscv: ELF capabilities %s\n", print_str); diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index d6a46ed0bf05..c8b9ce274b9a 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -225,6 +225,10 @@ ret_from_syscall: * (If it was configured with SECCOMP_RET_ERRNO/TRACE) */ ret_from_syscall_rejected: +#ifdef CONFIG_DEBUG_RSEQ + move a0, sp + call rseq_syscall +#endif /* Trace syscalls, but only if requested by the user. */ REG_L t0, TASK_TI_FLAGS(tp) andi t0, t0, _TIF_SYSCALL_WORK diff --git a/arch/riscv/kernel/perf_event.c b/arch/riscv/kernel/perf_event.c deleted file mode 100644 index c835f0362d94..000000000000 --- a/arch/riscv/kernel/perf_event.c +++ /dev/null @@ -1,485 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2008 Thomas Gleixner - * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar - * Copyright (C) 2009 Jaswinder Singh Rajput - * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter - * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra - * Copyright (C) 2009 Intel Corporation, - * Copyright (C) 2009 Google, Inc., Stephane Eranian - * Copyright 2014 Tilera Corporation. All Rights Reserved. - * Copyright (C) 2018 Andes Technology Corporation - * - * Perf_events support for RISC-V platforms. - * - * Since the spec. (as of now, Priv-Spec 1.10) does not provide enough - * functionality for perf event to fully work, this file provides - * the very basic framework only. - * - * For platform portings, please check Documentations/riscv/pmu.txt. - * - * The Copyright line includes x86 and tile ones. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static const struct riscv_pmu *riscv_pmu __read_mostly; -static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); - -/* - * Hardware & cache maps and their methods - */ - -static const int riscv_hw_event_map[] = { - [PERF_COUNT_HW_CPU_CYCLES] = RISCV_PMU_CYCLE, - [PERF_COUNT_HW_INSTRUCTIONS] = RISCV_PMU_INSTRET, - [PERF_COUNT_HW_CACHE_REFERENCES] = RISCV_OP_UNSUPP, - [PERF_COUNT_HW_CACHE_MISSES] = RISCV_OP_UNSUPP, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = RISCV_OP_UNSUPP, - [PERF_COUNT_HW_BRANCH_MISSES] = RISCV_OP_UNSUPP, - [PERF_COUNT_HW_BUS_CYCLES] = RISCV_OP_UNSUPP, -}; - -#define C(x) PERF_COUNT_HW_CACHE_##x -static const int riscv_cache_event_map[PERF_COUNT_HW_CACHE_MAX] -[PERF_COUNT_HW_CACHE_OP_MAX] -[PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - }, - [C(DTLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - }, -}; - -static int riscv_map_hw_event(u64 config) -{ - if (config >= riscv_pmu->max_events) - return -EINVAL; - - return riscv_pmu->hw_events[config]; -} - -static int riscv_map_cache_decode(u64 config, unsigned int *type, - unsigned int *op, unsigned int *result) -{ - return -ENOENT; -} - -static int riscv_map_cache_event(u64 config) -{ - unsigned int type, op, result; - int err = -ENOENT; - int code; - - err = riscv_map_cache_decode(config, &type, &op, &result); - if (!riscv_pmu->cache_events || err) - return err; - - if (type >= PERF_COUNT_HW_CACHE_MAX || - op >= PERF_COUNT_HW_CACHE_OP_MAX || - result >= PERF_COUNT_HW_CACHE_RESULT_MAX) - return -EINVAL; - - code = (*riscv_pmu->cache_events)[type][op][result]; - if (code == RISCV_OP_UNSUPP) - return -EINVAL; - - return code; -} - -/* - * Low-level functions: reading/writing counters - */ - -static inline u64 read_counter(int idx) -{ - u64 val = 0; - - switch (idx) { - case RISCV_PMU_CYCLE: - val = csr_read(CSR_CYCLE); - break; - case RISCV_PMU_INSTRET: - val = csr_read(CSR_INSTRET); - break; - default: - WARN_ON_ONCE(idx < 0 || idx > RISCV_MAX_COUNTERS); - return -EINVAL; - } - - return val; -} - -static inline void write_counter(int idx, u64 value) -{ - /* currently not supported */ - WARN_ON_ONCE(1); -} - -/* - * pmu->read: read and update the counter - * - * Other architectures' implementation often have a xxx_perf_event_update - * routine, which can return counter values when called in the IRQ, but - * return void when being called by the pmu->read method. - */ -static void riscv_pmu_read(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - u64 prev_raw_count, new_raw_count; - u64 oldval; - int idx = hwc->idx; - u64 delta; - - do { - prev_raw_count = local64_read(&hwc->prev_count); - new_raw_count = read_counter(idx); - - oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count); - } while (oldval != prev_raw_count); - - /* - * delta is the value to update the counter we maintain in the kernel. - */ - delta = (new_raw_count - prev_raw_count) & - ((1ULL << riscv_pmu->counter_width) - 1); - local64_add(delta, &event->count); - /* - * Something like local64_sub(delta, &hwc->period_left) here is - * needed if there is an interrupt for perf. - */ -} - -/* - * State transition functions: - * - * stop()/start() & add()/del() - */ - -/* - * pmu->stop: stop the counter - */ -static void riscv_pmu_stop(struct perf_event *event, int flags) -{ - struct hw_perf_event *hwc = &event->hw; - - WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); - hwc->state |= PERF_HES_STOPPED; - - if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { - riscv_pmu->pmu->read(event); - hwc->state |= PERF_HES_UPTODATE; - } -} - -/* - * pmu->start: start the event. - */ -static void riscv_pmu_start(struct perf_event *event, int flags) -{ - struct hw_perf_event *hwc = &event->hw; - - if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) - return; - - if (flags & PERF_EF_RELOAD) { - WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); - - /* - * Set the counter to the period to the next interrupt here, - * if you have any. - */ - } - - hwc->state = 0; - perf_event_update_userpage(event); - - /* - * Since we cannot write to counters, this serves as an initialization - * to the delta-mechanism in pmu->read(); otherwise, the delta would be - * wrong when pmu->read is called for the first time. - */ - local64_set(&hwc->prev_count, read_counter(hwc->idx)); -} - -/* - * pmu->add: add the event to PMU. - */ -static int riscv_pmu_add(struct perf_event *event, int flags) -{ - struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; - - if (cpuc->n_events == riscv_pmu->num_counters) - return -ENOSPC; - - /* - * We don't have general conunters, so no binding-event-to-counter - * process here. - * - * Indexing using hwc->config generally not works, since config may - * contain extra information, but here the only info we have in - * hwc->config is the event index. - */ - hwc->idx = hwc->config; - cpuc->events[hwc->idx] = event; - cpuc->n_events++; - - hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; - - if (flags & PERF_EF_START) - riscv_pmu->pmu->start(event, PERF_EF_RELOAD); - - return 0; -} - -/* - * pmu->del: delete the event from PMU. - */ -static void riscv_pmu_del(struct perf_event *event, int flags) -{ - struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; - - cpuc->events[hwc->idx] = NULL; - cpuc->n_events--; - riscv_pmu->pmu->stop(event, PERF_EF_UPDATE); - perf_event_update_userpage(event); -} - -/* - * Interrupt: a skeletion for reference. - */ - -static DEFINE_MUTEX(pmc_reserve_mutex); - -static irqreturn_t riscv_base_pmu_handle_irq(int irq_num, void *dev) -{ - return IRQ_NONE; -} - -static int reserve_pmc_hardware(void) -{ - int err = 0; - - mutex_lock(&pmc_reserve_mutex); - if (riscv_pmu->irq >= 0 && riscv_pmu->handle_irq) { - err = request_irq(riscv_pmu->irq, riscv_pmu->handle_irq, - IRQF_PERCPU, "riscv-base-perf", NULL); - } - mutex_unlock(&pmc_reserve_mutex); - - return err; -} - -static void release_pmc_hardware(void) -{ - mutex_lock(&pmc_reserve_mutex); - if (riscv_pmu->irq >= 0) - free_irq(riscv_pmu->irq, NULL); - mutex_unlock(&pmc_reserve_mutex); -} - -/* - * Event Initialization/Finalization - */ - -static atomic_t riscv_active_events = ATOMIC_INIT(0); - -static void riscv_event_destroy(struct perf_event *event) -{ - if (atomic_dec_return(&riscv_active_events) == 0) - release_pmc_hardware(); -} - -static int riscv_event_init(struct perf_event *event) -{ - struct perf_event_attr *attr = &event->attr; - struct hw_perf_event *hwc = &event->hw; - int err; - int code; - - if (atomic_inc_return(&riscv_active_events) == 1) { - err = reserve_pmc_hardware(); - - if (err) { - pr_warn("PMC hardware not available\n"); - atomic_dec(&riscv_active_events); - return -EBUSY; - } - } - - switch (event->attr.type) { - case PERF_TYPE_HARDWARE: - code = riscv_pmu->map_hw_event(attr->config); - break; - case PERF_TYPE_HW_CACHE: - code = riscv_pmu->map_cache_event(attr->config); - break; - case PERF_TYPE_RAW: - return -EOPNOTSUPP; - default: - return -ENOENT; - } - - event->destroy = riscv_event_destroy; - if (code < 0) { - event->destroy(event); - return code; - } - - /* - * idx is set to -1 because the index of a general event should not be - * decided until binding to some counter in pmu->add(). - * - * But since we don't have such support, later in pmu->add(), we just - * use hwc->config as the index instead. - */ - hwc->config = code; - hwc->idx = -1; - - return 0; -} - -/* - * Initialization - */ - -static struct pmu min_pmu = { - .name = "riscv-base", - .event_init = riscv_event_init, - .add = riscv_pmu_add, - .del = riscv_pmu_del, - .start = riscv_pmu_start, - .stop = riscv_pmu_stop, - .read = riscv_pmu_read, -}; - -static const struct riscv_pmu riscv_base_pmu = { - .pmu = &min_pmu, - .max_events = ARRAY_SIZE(riscv_hw_event_map), - .map_hw_event = riscv_map_hw_event, - .hw_events = riscv_hw_event_map, - .map_cache_event = riscv_map_cache_event, - .cache_events = &riscv_cache_event_map, - .counter_width = 63, - .num_counters = RISCV_BASE_COUNTERS + 0, - .handle_irq = &riscv_base_pmu_handle_irq, - - /* This means this PMU has no IRQ. */ - .irq = -1, -}; - -static const struct of_device_id riscv_pmu_of_ids[] = { - {.compatible = "riscv,base-pmu", .data = &riscv_base_pmu}, - { /* sentinel value */ } -}; - -static int __init init_hw_perf_events(void) -{ - struct device_node *node = of_find_node_by_type(NULL, "pmu"); - const struct of_device_id *of_id; - - riscv_pmu = &riscv_base_pmu; - - if (node) { - of_id = of_match_node(riscv_pmu_of_ids, node); - - if (of_id) - riscv_pmu = of_id->data; - of_node_put(node); - } - - perf_pmu_register(riscv_pmu->pmu, "cpu", PERF_TYPE_RAW); - return 0; -} -arch_initcall(init_hw_perf_events); diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index c2d5ecbe5526..16da3c3b53a1 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -258,6 +258,8 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) } } + rseq_signal_deliver(ksig, regs); + /* Set up the stack frame */ ret = setup_rt_frame(ksig, oldset, regs); diff --git a/arch/riscv/lib/memmove.S b/arch/riscv/lib/memmove.S index 07d1d2152ba5..e0609e1f0864 100644 --- a/arch/riscv/lib/memmove.S +++ b/arch/riscv/lib/memmove.S @@ -1,64 +1,316 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2022 Michael T. Kloos + */ #include #include -ENTRY(__memmove) -WEAK(memmove) - move t0, a0 - move t1, a1 +SYM_FUNC_START(__memmove) +SYM_FUNC_START_WEAK(memmove) + /* + * Returns + * a0 - dest + * + * Parameters + * a0 - Inclusive first byte of dest + * a1 - Inclusive first byte of src + * a2 - Length of copy n + * + * Because the return matches the parameter register a0, + * we will not clobber or modify that register. + * + * Note: This currently only works on little-endian. + * To port to big-endian, reverse the direction of shifts + * in the 2 misaligned fixup copy loops. + */ - beq a0, a1, exit_memcpy - beqz a2, exit_memcpy - srli t2, a2, 0x2 + /* Return if nothing to do */ + beq a0, a1, return_from_memmove + beqz a2, return_from_memmove - slt t3, a0, a1 - beqz t3, do_reverse + /* + * Register Uses + * Forward Copy: a1 - Index counter of src + * Reverse Copy: a4 - Index counter of src + * Forward Copy: t3 - Index counter of dest + * Reverse Copy: t4 - Index counter of dest + * Both Copy Modes: t5 - Inclusive first multibyte/aligned of dest + * Both Copy Modes: t6 - Non-Inclusive last multibyte/aligned of dest + * Both Copy Modes: t0 - Link / Temporary for load-store + * Both Copy Modes: t1 - Temporary for load-store + * Both Copy Modes: t2 - Temporary for load-store + * Both Copy Modes: a5 - dest to src alignment offset + * Both Copy Modes: a6 - Shift ammount + * Both Copy Modes: a7 - Inverse Shift ammount + * Both Copy Modes: a2 - Alternate breakpoint for unrolled loops + */ - andi a2, a2, 0x3 - li t4, 1 - beqz t2, byte_copy + /* + * Solve for some register values now. + * Byte copy does not need t5 or t6. + */ + mv t3, a0 + add t4, a0, a2 + add a4, a1, a2 -word_copy: - lw t3, 0(a1) - addi t2, t2, -1 - addi a1, a1, 4 - sw t3, 0(a0) - addi a0, a0, 4 - bnez t2, word_copy - beqz a2, exit_memcpy - j byte_copy + /* + * Byte copy if copying less than (2 * SZREG) bytes. This can + * cause problems with the bulk copy implementation and is + * small enough not to bother. + */ + andi t0, a2, -(2 * SZREG) + beqz t0, byte_copy -do_reverse: - add a0, a0, a2 - add a1, a1, a2 - andi a2, a2, 0x3 - li t4, -1 - beqz t2, reverse_byte_copy + /* + * Now solve for t5 and t6. + */ + andi t5, t3, -SZREG + andi t6, t4, -SZREG + /* + * If dest(Register t3) rounded down to the nearest naturally + * aligned SZREG address, does not equal dest, then add SZREG + * to find the low-bound of SZREG alignment in the dest memory + * region. Note that this could overshoot the dest memory + * region if n is less than SZREG. This is one reason why + * we always byte copy if n is less than SZREG. + * Otherwise, dest is already naturally aligned to SZREG. + */ + beq t5, t3, 1f + addi t5, t5, SZREG + 1: -reverse_word_copy: - addi a1, a1, -4 - addi t2, t2, -1 - lw t3, 0(a1) - addi a0, a0, -4 - sw t3, 0(a0) - bnez t2, reverse_word_copy - beqz a2, exit_memcpy + /* + * If the dest and src are co-aligned to SZREG, then there is + * no need for the full rigmarole of a full misaligned fixup copy. + * Instead, do a simpler co-aligned copy. + */ + xor t0, a0, a1 + andi t1, t0, (SZREG - 1) + beqz t1, coaligned_copy + /* Fall through to misaligned fixup copy */ -reverse_byte_copy: - addi a0, a0, -1 - addi a1, a1, -1 +misaligned_fixup_copy: + bltu a1, a0, misaligned_fixup_copy_reverse +misaligned_fixup_copy_forward: + jal t0, byte_copy_until_aligned_forward + + andi a5, a1, (SZREG - 1) /* Find the alignment offset of src (a1) */ + slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */ + sub a5, a1, t3 /* Find the difference between src and dest */ + andi a1, a1, -SZREG /* Align the src pointer */ + addi a2, t6, SZREG /* The other breakpoint for the unrolled loop*/ + + /* + * Compute The Inverse Shift + * a7 = XLEN - a6 = XLEN + -a6 + * 2s complement negation to find the negative: -a6 = ~a6 + 1 + * Add that to XLEN. XLEN = SZREG * 8. + */ + not a7, a6 + addi a7, a7, (SZREG * 8 + 1) + + /* + * Fix Misalignment Copy Loop - Forward + * load_val0 = load_ptr[0]; + * do { + * load_val1 = load_ptr[1]; + * store_ptr += 2; + * store_ptr[0 - 2] = (load_val0 >> {a6}) | (load_val1 << {a7}); + * + * if (store_ptr == {a2}) + * break; + * + * load_val0 = load_ptr[2]; + * load_ptr += 2; + * store_ptr[1 - 2] = (load_val1 >> {a6}) | (load_val0 << {a7}); + * + * } while (store_ptr != store_ptr_end); + * store_ptr = store_ptr_end; + */ + + REG_L t0, (0 * SZREG)(a1) + 1: + REG_L t1, (1 * SZREG)(a1) + addi t3, t3, (2 * SZREG) + srl t0, t0, a6 + sll t2, t1, a7 + or t2, t0, t2 + REG_S t2, ((0 * SZREG) - (2 * SZREG))(t3) + + beq t3, a2, 2f + + REG_L t0, (2 * SZREG)(a1) + addi a1, a1, (2 * SZREG) + srl t1, t1, a6 + sll t2, t0, a7 + or t2, t1, t2 + REG_S t2, ((1 * SZREG) - (2 * SZREG))(t3) + + bne t3, t6, 1b + 2: + mv t3, t6 /* Fix the dest pointer in case the loop was broken */ + + add a1, t3, a5 /* Restore the src pointer */ + j byte_copy_forward /* Copy any remaining bytes */ + +misaligned_fixup_copy_reverse: + jal t0, byte_copy_until_aligned_reverse + + andi a5, a4, (SZREG - 1) /* Find the alignment offset of src (a4) */ + slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */ + sub a5, a4, t4 /* Find the difference between src and dest */ + andi a4, a4, -SZREG /* Align the src pointer */ + addi a2, t5, -SZREG /* The other breakpoint for the unrolled loop*/ + + /* + * Compute The Inverse Shift + * a7 = XLEN - a6 = XLEN + -a6 + * 2s complement negation to find the negative: -a6 = ~a6 + 1 + * Add that to XLEN. XLEN = SZREG * 8. + */ + not a7, a6 + addi a7, a7, (SZREG * 8 + 1) + + /* + * Fix Misalignment Copy Loop - Reverse + * load_val1 = load_ptr[0]; + * do { + * load_val0 = load_ptr[-1]; + * store_ptr -= 2; + * store_ptr[1] = (load_val0 >> {a6}) | (load_val1 << {a7}); + * + * if (store_ptr == {a2}) + * break; + * + * load_val1 = load_ptr[-2]; + * load_ptr -= 2; + * store_ptr[0] = (load_val1 >> {a6}) | (load_val0 << {a7}); + * + * } while (store_ptr != store_ptr_end); + * store_ptr = store_ptr_end; + */ + + REG_L t1, ( 0 * SZREG)(a4) + 1: + REG_L t0, (-1 * SZREG)(a4) + addi t4, t4, (-2 * SZREG) + sll t1, t1, a7 + srl t2, t0, a6 + or t2, t1, t2 + REG_S t2, ( 1 * SZREG)(t4) + + beq t4, a2, 2f + + REG_L t1, (-2 * SZREG)(a4) + addi a4, a4, (-2 * SZREG) + sll t0, t0, a7 + srl t2, t1, a6 + or t2, t0, t2 + REG_S t2, ( 0 * SZREG)(t4) + + bne t4, t5, 1b + 2: + mv t4, t5 /* Fix the dest pointer in case the loop was broken */ + + add a4, t4, a5 /* Restore the src pointer */ + j byte_copy_reverse /* Copy any remaining bytes */ + +/* + * Simple copy loops for SZREG co-aligned memory locations. + * These also make calls to do byte copies for any unaligned + * data at their terminations. + */ +coaligned_copy: + bltu a1, a0, coaligned_copy_reverse + +coaligned_copy_forward: + jal t0, byte_copy_until_aligned_forward + + 1: + REG_L t1, ( 0 * SZREG)(a1) + addi a1, a1, SZREG + addi t3, t3, SZREG + REG_S t1, (-1 * SZREG)(t3) + bne t3, t6, 1b + + j byte_copy_forward /* Copy any remaining bytes */ + +coaligned_copy_reverse: + jal t0, byte_copy_until_aligned_reverse + + 1: + REG_L t1, (-1 * SZREG)(a4) + addi a4, a4, -SZREG + addi t4, t4, -SZREG + REG_S t1, ( 0 * SZREG)(t4) + bne t4, t5, 1b + + j byte_copy_reverse /* Copy any remaining bytes */ + +/* + * These are basically sub-functions within the function. They + * are used to byte copy until the dest pointer is in alignment. + * At which point, a bulk copy method can be used by the + * calling code. These work on the same registers as the bulk + * copy loops. Therefore, the register values can be picked + * up from where they were left and we avoid code duplication + * without any overhead except the call in and return jumps. + */ +byte_copy_until_aligned_forward: + beq t3, t5, 2f + 1: + lb t1, 0(a1) + addi a1, a1, 1 + addi t3, t3, 1 + sb t1, -1(t3) + bne t3, t5, 1b + 2: + jalr zero, 0x0(t0) /* Return to multibyte copy loop */ + +byte_copy_until_aligned_reverse: + beq t4, t6, 2f + 1: + lb t1, -1(a4) + addi a4, a4, -1 + addi t4, t4, -1 + sb t1, 0(t4) + bne t4, t6, 1b + 2: + jalr zero, 0x0(t0) /* Return to multibyte copy loop */ + +/* + * Simple byte copy loops. + * These will byte copy until they reach the end of data to copy. + * At that point, they will call to return from memmove. + */ byte_copy: - lb t3, 0(a1) - addi a2, a2, -1 - sb t3, 0(a0) - add a1, a1, t4 - add a0, a0, t4 - bnez a2, byte_copy + bltu a1, a0, byte_copy_reverse -exit_memcpy: - move a0, t0 - move a1, t1 - ret -END(__memmove) +byte_copy_forward: + beq t3, t4, 2f + 1: + lb t1, 0(a1) + addi a1, a1, 1 + addi t3, t3, 1 + sb t1, -1(t3) + bne t3, t4, 1b + 2: + ret + +byte_copy_reverse: + beq t4, t3, 2f + 1: + lb t1, -1(a4) + addi a4, a4, -1 + addi t4, t4, -1 + sb t1, 0(t4) + bne t4, t3, 1b + 2: + +return_from_memmove: + ret + +SYM_FUNC_END(memmove) +SYM_FUNC_END(__memmove) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index f6275b317129..9535bea8688c 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -38,14 +38,16 @@ EXPORT_SYMBOL(kernel_map); #endif #ifdef CONFIG_64BIT -u64 satp_mode = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_48 : SATP_MODE_39; +u64 satp_mode __ro_after_init = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_57 : SATP_MODE_39; #else -u64 satp_mode = SATP_MODE_32; +u64 satp_mode __ro_after_init = SATP_MODE_32; #endif EXPORT_SYMBOL(satp_mode); bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL); +bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL); EXPORT_SYMBOL(pgtable_l4_enabled); +EXPORT_SYMBOL(pgtable_l5_enabled); phys_addr_t phys_ram_base __ro_after_init; EXPORT_SYMBOL(phys_ram_base); @@ -227,6 +229,7 @@ pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss; static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); +static p4d_t __maybe_unused early_dtb_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE); static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); @@ -318,6 +321,16 @@ static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); #define early_pmd ((pmd_t *)XIP_FIXUP(early_pmd)) #endif /* CONFIG_XIP_KERNEL */ +static p4d_t trampoline_p4d[PTRS_PER_P4D] __page_aligned_bss; +static p4d_t fixmap_p4d[PTRS_PER_P4D] __page_aligned_bss; +static p4d_t early_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); + +#ifdef CONFIG_XIP_KERNEL +#define trampoline_p4d ((p4d_t *)XIP_FIXUP(trampoline_p4d)) +#define fixmap_p4d ((p4d_t *)XIP_FIXUP(fixmap_p4d)) +#define early_p4d ((p4d_t *)XIP_FIXUP(early_p4d)) +#endif /* CONFIG_XIP_KERNEL */ + static pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss; static pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss; static pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE); @@ -432,6 +445,44 @@ static phys_addr_t alloc_pud_late(uintptr_t va) return __pa(vaddr); } +static p4d_t *__init get_p4d_virt_early(phys_addr_t pa) +{ + return (p4d_t *)((uintptr_t)pa); +} + +static p4d_t *__init get_p4d_virt_fixmap(phys_addr_t pa) +{ + clear_fixmap(FIX_P4D); + return (p4d_t *)set_fixmap_offset(FIX_P4D, pa); +} + +static p4d_t *__init get_p4d_virt_late(phys_addr_t pa) +{ + return (p4d_t *)__va(pa); +} + +static phys_addr_t __init alloc_p4d_early(uintptr_t va) +{ + /* Only one P4D is available for early mapping */ + BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT); + + return (uintptr_t)early_p4d; +} + +static phys_addr_t __init alloc_p4d_fixmap(uintptr_t va) +{ + return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); +} + +static phys_addr_t alloc_p4d_late(uintptr_t va) +{ + unsigned long vaddr; + + vaddr = __get_free_page(GFP_KERNEL); + BUG_ON(!vaddr); + return __pa(vaddr); +} + static void __init create_pud_mapping(pud_t *pudp, uintptr_t va, phys_addr_t pa, phys_addr_t sz, pgprot_t prot) @@ -459,21 +510,55 @@ static void __init create_pud_mapping(pud_t *pudp, create_pmd_mapping(nextp, va, pa, sz, prot); } -#define pgd_next_t pud_t -#define alloc_pgd_next(__va) (pgtable_l4_enabled ? \ - pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va)) -#define get_pgd_next_virt(__pa) (pgtable_l4_enabled ? \ - pt_ops.get_pud_virt(__pa) : (pgd_next_t *)pt_ops.get_pmd_virt(__pa)) +static void __init create_p4d_mapping(p4d_t *p4dp, + uintptr_t va, phys_addr_t pa, + phys_addr_t sz, pgprot_t prot) +{ + pud_t *nextp; + phys_addr_t next_phys; + uintptr_t p4d_index = p4d_index(va); + + if (sz == P4D_SIZE) { + if (p4d_val(p4dp[p4d_index]) == 0) + p4dp[p4d_index] = pfn_p4d(PFN_DOWN(pa), prot); + return; + } + + if (p4d_val(p4dp[p4d_index]) == 0) { + next_phys = pt_ops.alloc_pud(va); + p4dp[p4d_index] = pfn_p4d(PFN_DOWN(next_phys), PAGE_TABLE); + nextp = pt_ops.get_pud_virt(next_phys); + memset(nextp, 0, PAGE_SIZE); + } else { + next_phys = PFN_PHYS(_p4d_pfn(p4dp[p4d_index])); + nextp = pt_ops.get_pud_virt(next_phys); + } + + create_pud_mapping(nextp, va, pa, sz, prot); +} + +#define pgd_next_t p4d_t +#define alloc_pgd_next(__va) (pgtable_l5_enabled ? \ + pt_ops.alloc_p4d(__va) : (pgtable_l4_enabled ? \ + pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va))) +#define get_pgd_next_virt(__pa) (pgtable_l5_enabled ? \ + pt_ops.get_p4d_virt(__pa) : (pgd_next_t *)(pgtable_l4_enabled ? \ + pt_ops.get_pud_virt(__pa) : (pud_t *)pt_ops.get_pmd_virt(__pa))) #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ + (pgtable_l5_enabled ? \ + create_p4d_mapping(__nextp, __va, __pa, __sz, __prot) : \ (pgtable_l4_enabled ? \ - create_pud_mapping(__nextp, __va, __pa, __sz, __prot) : \ - create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot)) -#define fixmap_pgd_next (pgtable_l4_enabled ? \ - (uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd) -#define trampoline_pgd_next (pgtable_l4_enabled ? \ - (uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd) -#define early_dtb_pgd_next (pgtable_l4_enabled ? \ - (uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd) + create_pud_mapping((pud_t *)__nextp, __va, __pa, __sz, __prot) : \ + create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot))) +#define fixmap_pgd_next (pgtable_l5_enabled ? \ + (uintptr_t)fixmap_p4d : (pgtable_l4_enabled ? \ + (uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd)) +#define trampoline_pgd_next (pgtable_l5_enabled ? \ + (uintptr_t)trampoline_p4d : (pgtable_l4_enabled ? \ + (uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd)) +#define early_dtb_pgd_next (pgtable_l5_enabled ? \ + (uintptr_t)early_dtb_p4d : (pgtable_l4_enabled ? \ + (uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd)) #else #define pgd_next_t pte_t #define alloc_pgd_next(__va) pt_ops.alloc_pte(__va) @@ -482,6 +567,7 @@ static void __init create_pud_mapping(pud_t *pudp, create_pte_mapping(__nextp, __va, __pa, __sz, __prot) #define fixmap_pgd_next ((uintptr_t)fixmap_pte) #define early_dtb_pgd_next ((uintptr_t)early_dtb_pmd) +#define create_p4d_mapping(__pmdp, __va, __pa, __sz, __prot) #define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot) #define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot) #endif /* __PAGETABLE_PMD_FOLDED */ @@ -575,6 +661,13 @@ static __init pgprot_t pgprot_from_va(uintptr_t va) #endif /* CONFIG_STRICT_KERNEL_RWX */ #ifdef CONFIG_64BIT +static void __init disable_pgtable_l5(void) +{ + pgtable_l5_enabled = false; + kernel_map.page_offset = PAGE_OFFSET_L4; + satp_mode = SATP_MODE_48; +} + static void __init disable_pgtable_l4(void) { pgtable_l4_enabled = false; @@ -591,12 +684,12 @@ static void __init disable_pgtable_l4(void) static __init void set_satp_mode(void) { u64 identity_satp, hw_satp; - uintptr_t set_satp_mode_pmd; + uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK; + bool check_l4 = false; - set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK; - create_pgd_mapping(early_pg_dir, - set_satp_mode_pmd, (uintptr_t)early_pud, - PGDIR_SIZE, PAGE_TABLE); + create_p4d_mapping(early_p4d, + set_satp_mode_pmd, (uintptr_t)early_pud, + P4D_SIZE, PAGE_TABLE); create_pud_mapping(early_pud, set_satp_mode_pmd, (uintptr_t)early_pmd, PUD_SIZE, PAGE_TABLE); @@ -608,6 +701,11 @@ static __init void set_satp_mode(void) set_satp_mode_pmd + PMD_SIZE, set_satp_mode_pmd + PMD_SIZE, PMD_SIZE, PAGE_KERNEL_EXEC); +retry: + create_pgd_mapping(early_pg_dir, + set_satp_mode_pmd, + check_l4 ? (uintptr_t)early_pud : (uintptr_t)early_p4d, + PGDIR_SIZE, PAGE_TABLE); identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode; @@ -616,10 +714,17 @@ static __init void set_satp_mode(void) hw_satp = csr_swap(CSR_SATP, 0ULL); local_flush_tlb_all(); - if (hw_satp != identity_satp) + if (hw_satp != identity_satp) { + if (!check_l4) { + disable_pgtable_l5(); + check_l4 = true; + goto retry; + } disable_pgtable_l4(); + } memset(early_pg_dir, 0, PAGE_SIZE); + memset(early_p4d, 0, PAGE_SIZE); memset(early_pud, 0, PAGE_SIZE); memset(early_pmd, 0, PAGE_SIZE); } @@ -693,10 +798,13 @@ static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa) PGDIR_SIZE, IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL); - if (pgtable_l4_enabled) { + if (pgtable_l5_enabled) + create_p4d_mapping(early_dtb_p4d, DTB_EARLY_BASE_VA, + (uintptr_t)early_dtb_pud, P4D_SIZE, PAGE_TABLE); + + if (pgtable_l4_enabled) create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA, (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE); - } if (IS_ENABLED(CONFIG_64BIT)) { create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA, @@ -732,6 +840,8 @@ void __init pt_ops_set_early(void) pt_ops.get_pmd_virt = get_pmd_virt_early; pt_ops.alloc_pud = alloc_pud_early; pt_ops.get_pud_virt = get_pud_virt_early; + pt_ops.alloc_p4d = alloc_p4d_early; + pt_ops.get_p4d_virt = get_p4d_virt_early; #endif } @@ -752,6 +862,8 @@ void __init pt_ops_set_fixmap(void) pt_ops.get_pmd_virt = kernel_mapping_pa_to_va((uintptr_t)get_pmd_virt_fixmap); pt_ops.alloc_pud = kernel_mapping_pa_to_va((uintptr_t)alloc_pud_fixmap); pt_ops.get_pud_virt = kernel_mapping_pa_to_va((uintptr_t)get_pud_virt_fixmap); + pt_ops.alloc_p4d = kernel_mapping_pa_to_va((uintptr_t)alloc_p4d_fixmap); + pt_ops.get_p4d_virt = kernel_mapping_pa_to_va((uintptr_t)get_p4d_virt_fixmap); #endif } @@ -768,6 +880,8 @@ void __init pt_ops_set_late(void) pt_ops.get_pmd_virt = get_pmd_virt_late; pt_ops.alloc_pud = alloc_pud_late; pt_ops.get_pud_virt = get_pud_virt_late; + pt_ops.alloc_p4d = alloc_p4d_late; + pt_ops.get_p4d_virt = get_p4d_virt_late; #endif } @@ -828,6 +942,10 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE); #ifndef __PAGETABLE_PMD_FOLDED + /* Setup fixmap P4D and PUD */ + if (pgtable_l5_enabled) + create_p4d_mapping(fixmap_p4d, FIXADDR_START, + (uintptr_t)fixmap_pud, P4D_SIZE, PAGE_TABLE); /* Setup fixmap PUD and PMD */ if (pgtable_l4_enabled) create_pud_mapping(fixmap_pud, FIXADDR_START, @@ -837,6 +955,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) /* Setup trampoline PGD and PMD */ create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr, trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE); + if (pgtable_l5_enabled) + create_p4d_mapping(trampoline_p4d, kernel_map.virt_addr, + (uintptr_t)trampoline_pud, P4D_SIZE, PAGE_TABLE); if (pgtable_l4_enabled) create_pud_mapping(trampoline_pud, kernel_map.virt_addr, (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE); @@ -938,6 +1059,7 @@ static void __init setup_vm_final(void) clear_fixmap(FIX_PTE); clear_fixmap(FIX_PMD); clear_fixmap(FIX_PUD); + clear_fixmap(FIX_P4D); /* Move to swapper page table */ csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | satp_mode); diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index cd1a145257b7..a22e418dbd82 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -111,6 +111,8 @@ static void __init kasan_populate_pud(pgd_t *pgd, * pt_ops facility. */ base_pud = pt_ops.get_pud_virt(pfn_to_phys(_pgd_pfn(*pgd))); + } else if (pgd_none(*pgd)) { + base_pud = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE); } else { base_pud = (pud_t *)pgd_page_vaddr(*pgd); if (base_pud == lm_alias(kasan_early_shadow_pud)) { @@ -152,13 +154,72 @@ static void __init kasan_populate_pud(pgd_t *pgd, set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pud)), PAGE_TABLE)); } -#define kasan_early_shadow_pgd_next (pgtable_l4_enabled ? \ +static void __init kasan_populate_p4d(pgd_t *pgd, + unsigned long vaddr, unsigned long end, + bool early) +{ + phys_addr_t phys_addr; + p4d_t *p4dp, *base_p4d; + unsigned long next; + + if (early) { + /* + * We can't use pgd_page_vaddr here as it would return a linear + * mapping address but it is not mapped yet, but when populating + * early_pg_dir, we need the physical address and when populating + * swapper_pg_dir, we need the kernel virtual address so use + * pt_ops facility. + */ + base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(*pgd))); + } else { + base_p4d = (p4d_t *)pgd_page_vaddr(*pgd); + if (base_p4d == lm_alias(kasan_early_shadow_p4d)) + base_p4d = memblock_alloc(PTRS_PER_PUD * sizeof(p4d_t), PAGE_SIZE); + } + + p4dp = base_p4d + p4d_index(vaddr); + + do { + next = p4d_addr_end(vaddr, end); + + if (p4d_none(*p4dp) && IS_ALIGNED(vaddr, P4D_SIZE) && (next - vaddr) >= P4D_SIZE) { + if (early) { + phys_addr = __pa(((uintptr_t)kasan_early_shadow_pud)); + set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_TABLE)); + continue; + } else { + phys_addr = memblock_phys_alloc(P4D_SIZE, P4D_SIZE); + if (phys_addr) { + set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_KERNEL)); + continue; + } + } + } + + kasan_populate_pud((pgd_t *)p4dp, vaddr, next, early); + } while (p4dp++, vaddr = next, vaddr != end); + + /* + * Wait for the whole P4D to be populated before setting the P4D in + * the page table, otherwise, if we did set the P4D before populating + * it entirely, memblock could allocate a page at a physical address + * where KASAN is not populated yet and then we'd get a page fault. + */ + if (!early) + set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_p4d)), PAGE_TABLE)); +} + +#define kasan_early_shadow_pgd_next (pgtable_l5_enabled ? \ + (uintptr_t)kasan_early_shadow_p4d : \ + (pgtable_l4_enabled ? \ (uintptr_t)kasan_early_shadow_pud : \ - (uintptr_t)kasan_early_shadow_pmd) + (uintptr_t)kasan_early_shadow_pmd)) #define kasan_populate_pgd_next(pgdp, vaddr, next, early) \ + (pgtable_l5_enabled ? \ + kasan_populate_p4d(pgdp, vaddr, next, early) : \ (pgtable_l4_enabled ? \ kasan_populate_pud(pgdp, vaddr, next, early) : \ - kasan_populate_pmd((pud_t *)pgdp, vaddr, next)) + kasan_populate_pmd((pud_t *)pgdp, vaddr, next))) static void __init kasan_populate_pgd(pgd_t *pgdp, unsigned long vaddr, unsigned long end, @@ -221,6 +282,14 @@ asmlinkage void __init kasan_early_init(void) PAGE_TABLE)); } + if (pgtable_l5_enabled) { + for (i = 0; i < PTRS_PER_P4D; ++i) + set_p4d(kasan_early_shadow_p4d + i, + pfn_p4d(PFN_DOWN + (__pa(((uintptr_t)kasan_early_shadow_pud))), + PAGE_TABLE)); + } + kasan_populate_pgd(early_pg_dir + pgd_index(KASAN_SHADOW_START), KASAN_SHADOW_START, KASAN_SHADOW_END, true); @@ -246,9 +315,27 @@ static void __init kasan_populate(void *start, void *end) memset(start, KASAN_SHADOW_INIT, end - start); } +static void __init kasan_shallow_populate_pmd(pgd_t *pgdp, + unsigned long vaddr, unsigned long end) +{ + unsigned long next; + pmd_t *pmdp, *base_pmd; + bool is_kasan_pte; + + base_pmd = (pmd_t *)pgd_page_vaddr(*pgdp); + pmdp = base_pmd + pmd_index(vaddr); + + do { + next = pmd_addr_end(vaddr, end); + is_kasan_pte = (pmd_pgtable(*pmdp) == lm_alias(kasan_early_shadow_pte)); + + if (is_kasan_pte) + pmd_clear(pmdp); + } while (pmdp++, vaddr = next, vaddr != end); +} + static void __init kasan_shallow_populate_pud(pgd_t *pgdp, - unsigned long vaddr, unsigned long end, - bool kasan_populate) + unsigned long vaddr, unsigned long end) { unsigned long next; pud_t *pudp, *base_pud; @@ -258,21 +345,60 @@ static void __init kasan_shallow_populate_pud(pgd_t *pgdp, base_pud = (pud_t *)pgd_page_vaddr(*pgdp); pudp = base_pud + pud_index(vaddr); - if (kasan_populate) - memcpy(base_pud, (void *)kasan_early_shadow_pgd_next, - sizeof(pud_t) * PTRS_PER_PUD); - do { next = pud_addr_end(vaddr, end); is_kasan_pmd = (pud_pgtable(*pudp) == lm_alias(kasan_early_shadow_pmd)); - if (is_kasan_pmd) { - base_pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - set_pud(pudp, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE)); - } + if (!is_kasan_pmd) + continue; + + base_pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + set_pud(pudp, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE)); + + if (IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE) + continue; + + memcpy(base_pmd, (void *)kasan_early_shadow_pmd, PAGE_SIZE); + kasan_shallow_populate_pmd((pgd_t *)pudp, vaddr, next); } while (pudp++, vaddr = next, vaddr != end); } +static void __init kasan_shallow_populate_p4d(pgd_t *pgdp, + unsigned long vaddr, unsigned long end) +{ + unsigned long next; + p4d_t *p4dp, *base_p4d; + pud_t *base_pud; + bool is_kasan_pud; + + base_p4d = (p4d_t *)pgd_page_vaddr(*pgdp); + p4dp = base_p4d + p4d_index(vaddr); + + do { + next = p4d_addr_end(vaddr, end); + is_kasan_pud = (p4d_pgtable(*p4dp) == lm_alias(kasan_early_shadow_pud)); + + if (!is_kasan_pud) + continue; + + base_pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + set_p4d(p4dp, pfn_p4d(PFN_DOWN(__pa(base_pud)), PAGE_TABLE)); + + if (IS_ALIGNED(vaddr, P4D_SIZE) && (next - vaddr) >= P4D_SIZE) + continue; + + memcpy(base_pud, (void *)kasan_early_shadow_pud, PAGE_SIZE); + kasan_shallow_populate_pud((pgd_t *)p4dp, vaddr, next); + } while (p4dp++, vaddr = next, vaddr != end); +} + +#define kasan_shallow_populate_pgd_next(pgdp, vaddr, next) \ + (pgtable_l5_enabled ? \ + kasan_shallow_populate_p4d(pgdp, vaddr, next) : \ + (pgtable_l4_enabled ? \ + kasan_shallow_populate_pud(pgdp, vaddr, next) : \ + kasan_shallow_populate_pmd(pgdp, vaddr, next))) + static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long end) { unsigned long next; @@ -293,7 +419,8 @@ static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) continue; - kasan_shallow_populate_pud(pgd_k, vaddr, next, is_kasan_pgd_next); + memcpy(p, (void *)kasan_early_shadow_pgd_next, PAGE_SIZE); + kasan_shallow_populate_pgd_next(pgd_k, vaddr, next); } while (pgd_k++, vaddr = next, vaddr != end); } diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 2e11ef97c8b9..9b80e8bed3f6 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -122,7 +122,6 @@ config S390 select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS2 - select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES select DMA_OPS if PCI select DYNAMIC_FTRACE if FUNCTION_TRACER select GENERIC_ALLOCATOR @@ -157,7 +156,7 @@ config S390 select HAVE_DYNAMIC_FTRACE_WITH_ARGS select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS select HAVE_DYNAMIC_FTRACE_WITH_REGS - select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES + select HAVE_EBPF_JIT if HAVE_MARCH_Z196_FEATURES select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_FAST_GUP select HAVE_FENTRY @@ -232,20 +231,8 @@ source "kernel/livepatch/Kconfig" menu "Processor type and features" -config HAVE_MARCH_Z900_FEATURES - def_bool n - -config HAVE_MARCH_Z990_FEATURES - def_bool n - select HAVE_MARCH_Z900_FEATURES - -config HAVE_MARCH_Z9_109_FEATURES - def_bool n - select HAVE_MARCH_Z990_FEATURES - config HAVE_MARCH_Z10_FEATURES def_bool n - select HAVE_MARCH_Z9_109_FEATURES config HAVE_MARCH_Z196_FEATURES def_bool n @@ -271,41 +258,13 @@ choice prompt "Processor type" default MARCH_Z196 -config MARCH_Z900 - bool "IBM zSeries model z800 and z900" - select HAVE_MARCH_Z900_FEATURES - depends on $(cc-option,-march=z900) - help - Select this to enable optimizations for model z800/z900 (2064 and - 2066 series). This will enable some optimizations that are not - available on older ESA/390 (31 Bit) only CPUs. - -config MARCH_Z990 - bool "IBM zSeries model z890 and z990" - select HAVE_MARCH_Z990_FEATURES - depends on $(cc-option,-march=z990) - help - Select this to enable optimizations for model z890/z990 (2084 and - 2086 series). The kernel will be slightly faster but will not work - on older machines. - -config MARCH_Z9_109 - bool "IBM System z9" - select HAVE_MARCH_Z9_109_FEATURES - depends on $(cc-option,-march=z9-109) - help - Select this to enable optimizations for IBM System z9 (2094 and - 2096 series). The kernel will be slightly faster but will not work - on older machines. - config MARCH_Z10 bool "IBM System z10" select HAVE_MARCH_Z10_FEATURES depends on $(cc-option,-march=z10) help - Select this to enable optimizations for IBM System z10 (2097 and - 2098 series). The kernel will be slightly faster but will not work - on older machines. + Select this to enable optimizations for IBM System z10 (2097 and 2098 + series). This is the oldest machine generation currently supported. config MARCH_Z196 bool "IBM zEnterprise 114 and 196" @@ -354,15 +313,6 @@ config MARCH_Z15 endchoice -config MARCH_Z900_TUNE - def_bool TUNE_Z900 || MARCH_Z900 && TUNE_DEFAULT - -config MARCH_Z990_TUNE - def_bool TUNE_Z990 || MARCH_Z990 && TUNE_DEFAULT - -config MARCH_Z9_109_TUNE - def_bool TUNE_Z9_109 || MARCH_Z9_109 && TUNE_DEFAULT - config MARCH_Z10_TUNE def_bool TUNE_Z10 || MARCH_Z10 && TUNE_DEFAULT @@ -398,21 +348,8 @@ config TUNE_DEFAULT Tune the generated code for the target processor for which the kernel will be compiled. -config TUNE_Z900 - bool "IBM zSeries model z800 and z900" - depends on $(cc-option,-mtune=z900) - -config TUNE_Z990 - bool "IBM zSeries model z890 and z990" - depends on $(cc-option,-mtune=z990) - -config TUNE_Z9_109 - bool "IBM System z9" - depends on $(cc-option,-mtune=z9-109) - config TUNE_Z10 bool "IBM System z10" - depends on $(cc-option,-mtune=z10) config TUNE_Z196 bool "IBM zEnterprise 114 and 196" @@ -587,6 +524,7 @@ config KERNEL_NOBP config EXPOLINE def_bool n + depends on $(cc-option,-mindirect-branch=thunk) prompt "Avoid speculative indirect branches in the kernel" help Compile the kernel with the expoline compiler options to guard @@ -597,6 +535,19 @@ config EXPOLINE If unsure, say N. +config EXPOLINE_EXTERN + def_bool n + depends on EXPOLINE + depends on CC_IS_GCC && GCC_VERSION >= 110200 + depends on $(success,$(srctree)/arch/s390/tools/gcc-thunk-extern.sh $(CC)) + prompt "Generate expolines as extern functions." + help + This option is required for some tooling like kpatch. The kernel is + compiled with -mindirect-branch=thunk-extern and requires a newer + compiler. + + If unsure, say N. + choice prompt "Expoline default" depends on EXPOLINE @@ -658,20 +609,6 @@ config MAX_PHYSMEM_BITS Increasing the number of bits also increases the kernel image size. By default 46 bits (64TB) are supported. -config PACK_STACK - def_bool y - prompt "Pack kernel stack" - help - This option enables the compiler option -mkernel-backchain if it - is available. If the option is available the compiler supports - the new stack layout which dramatically reduces the minimum stack - frame size. With an old compiler a non-leaf function needs a - minimum of 96 bytes on 31 bit and 160 bytes on 64 bit. With - -mkernel-backchain the minimum size drops to 16 byte on 31 bit - and 24 byte on 64 bit. - - Say Y if you are unsure. - config CHECK_STACK def_bool y depends on !VMAP_STACK diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 609e3697324b..7a65bca1e5af 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -21,7 +21,7 @@ endif aflags_dwarf := -Wa,-gdwarf-2 KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__ KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf)) -KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 +KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbackchain KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables @@ -36,9 +36,6 @@ CHECKFLAGS += -D__s390__ -D__s390x__ export LD_BFD -mflags-$(CONFIG_MARCH_Z900) := -march=z900 -mflags-$(CONFIG_MARCH_Z990) := -march=z990 -mflags-$(CONFIG_MARCH_Z9_109) := -march=z9-109 mflags-$(CONFIG_MARCH_Z10) := -march=z10 mflags-$(CONFIG_MARCH_Z196) := -march=z196 mflags-$(CONFIG_MARCH_ZEC12) := -march=zEC12 @@ -51,9 +48,6 @@ export CC_FLAGS_MARCH := $(mflags-y) aflags-y += $(mflags-y) cflags-y += $(mflags-y) -cflags-$(CONFIG_MARCH_Z900_TUNE) += -mtune=z900 -cflags-$(CONFIG_MARCH_Z990_TUNE) += -mtune=z990 -cflags-$(CONFIG_MARCH_Z9_109_TUNE) += -mtune=z9-109 cflags-$(CONFIG_MARCH_Z10_TUNE) += -mtune=z10 cflags-$(CONFIG_MARCH_Z196_TUNE) += -mtune=z196 cflags-$(CONFIG_MARCH_ZEC12_TUNE) += -mtune=zEC12 @@ -68,11 +62,6 @@ cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include # cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls -ifneq ($(call cc-option,-mpacked-stack -mbackchain -msoft-float),) -cflags-$(CONFIG_PACK_STACK) += -mpacked-stack -D__PACK_STACK -aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK -endif - KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y) KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y) @@ -86,14 +75,18 @@ ifneq ($(call cc-option,-mstack-size=8192 -mstack-guard=128),) endif ifdef CONFIG_EXPOLINE - ifneq ($(call cc-option,$(CC_FLAGS_MARCH) -mindirect-branch=thunk),) + ifdef CONFIG_EXPOLINE_EXTERN + KBUILD_LDFLAGS_MODULE += arch/s390/lib/expoline.o + CC_FLAGS_EXPOLINE := -mindirect-branch=thunk-extern + CC_FLAGS_EXPOLINE += -mfunction-return=thunk-extern + else CC_FLAGS_EXPOLINE := -mindirect-branch=thunk CC_FLAGS_EXPOLINE += -mfunction-return=thunk - CC_FLAGS_EXPOLINE += -mindirect-branch-table - export CC_FLAGS_EXPOLINE - cflags-y += $(CC_FLAGS_EXPOLINE) -DCC_USING_EXPOLINE - aflags-y += -DCC_USING_EXPOLINE endif + CC_FLAGS_EXPOLINE += -mindirect-branch-table + export CC_FLAGS_EXPOLINE + cflags-y += $(CC_FLAGS_EXPOLINE) -DCC_USING_EXPOLINE + aflags-y += -DCC_USING_EXPOLINE endif ifdef CONFIG_FUNCTION_TRACER @@ -111,7 +104,7 @@ endif # Test CFI features of binutils cfi := $(call as-instr,.cfi_startproc\n.cfi_val_offset 15$(comma)-160\n.cfi_endproc,-DCONFIG_AS_CFI_VAL_OFFSET=1) -KBUILD_CFLAGS += -mbackchain -msoft-float $(cflags-y) +KBUILD_CFLAGS += -mpacked-stack -mbackchain -msoft-float $(cflags-y) KBUILD_CFLAGS += -pipe -Wno-sign-compare KBUILD_CFLAGS += -fno-asynchronous-unwind-tables $(cfi) KBUILD_AFLAGS += $(aflags-y) $(cfi) diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index 3a252d140c55..666692429db0 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -5,7 +5,6 @@ * Author(s): Hartmut Penner * Martin Schwidefsky * Rob van der Heij - * Heiko Carstens * * There are 5 different IPL methods * 1) load the image directly into ram at address 0 and do an PSW restart diff --git a/arch/s390/crypto/chacha-s390.S b/arch/s390/crypto/chacha-s390.S index badf5c49717d..9b033622191c 100644 --- a/arch/s390/crypto/chacha-s390.S +++ b/arch/s390/crypto/chacha-s390.S @@ -312,7 +312,7 @@ ENTRY(chacha20_vx_4x) VPERM XC0,XC0,XC0,BEPERM VPERM XD0,XD0,XD0,BEPERM - .insn rilu,0xc20e00000000,LEN,0x40 # clgfi LEN,0x40 + clgfi LEN,0x40 jl .Ltail_4x VLM XT0,XT3,0,INP,0 @@ -339,7 +339,7 @@ ENTRY(chacha20_vx_4x) VPERM XC0,XC0,XC0,BEPERM VPERM XD0,XD0,XD0,BEPERM - .insn rilu,0xc20e00000000,LEN,0x40 # clgfi LEN,0x40 + clgfi LEN,0x40 jl .Ltail_4x VLM XT0,XT3,0,INP,0 @@ -366,7 +366,7 @@ ENTRY(chacha20_vx_4x) VPERM XC0,XC0,XC0,BEPERM VPERM XD0,XD0,XD0,BEPERM - .insn rilu,0xc20e00000000,LEN,0x40 # clgfi LEN,0x40 + clgfi LEN,0x40 jl .Ltail_4x VLM XT0,XT3,0,INP,0 @@ -472,7 +472,7 @@ ENDPROC(chacha20_vx_4x) #define T3 %v30 ENTRY(chacha20_vx) - .insn rilu,0xc20e00000000,LEN,256 # clgfi LEN,256 + clgfi LEN,256 jle chacha20_vx_4x stmg %r6,%r7,6*8(SP) @@ -725,7 +725,7 @@ ENTRY(chacha20_vx) VPERM C0,C0,C0,BEPERM VPERM D0,D0,D0,BEPERM - .insn rilu,0xc20e00000000,LEN,0x40 # clgfi LEN,0x40 + clgfi LEN,0x40 jl .Ltail_vx VAF D2,D2,T2 # +K[3]+2 @@ -754,7 +754,7 @@ ENTRY(chacha20_vx) VPERM C0,C1,C1,BEPERM VPERM D0,D1,D1,BEPERM - .insn rilu,0xc20e00000000,LEN,0x40 # clgfi LEN,0x40 + clgfi LEN,0x40 jl .Ltail_vx VLM A1,D1,0,INP,0 @@ -780,7 +780,7 @@ ENTRY(chacha20_vx) VPERM C0,C2,C2,BEPERM VPERM D0,D2,D2,BEPERM - .insn rilu,0xc20e00000000,LEN,0x40 # clgfi LEN,0x40 + clgfi LEN,0x40 jl .Ltail_vx VLM A1,D1,0,INP,0 @@ -807,7 +807,7 @@ ENTRY(chacha20_vx) VPERM C0,C3,C3,BEPERM VPERM D0,D3,D3,BEPERM - .insn rilu,0xc20e00000000,LEN,0x40 # clgfi LEN,0x40 + clgfi LEN,0x40 jl .Ltail_vx VAF D3,D2,T1 # K[3]+4 @@ -837,7 +837,7 @@ ENTRY(chacha20_vx) VPERM C0,C4,C4,BEPERM VPERM D0,D4,D4,BEPERM - .insn rilu,0xc20e00000000,LEN,0x40 # clgfi LEN,0x40 + clgfi LEN,0x40 jl .Ltail_vx VLM A1,D1,0,INP,0 @@ -864,7 +864,7 @@ ENTRY(chacha20_vx) VPERM C0,C5,C5,BEPERM VPERM D0,D5,D5,BEPERM - .insn rilu,0xc20e00000000,LEN,0x40 # clgfi LEN,0x40 + clgfi LEN,0x40 jl .Ltail_vx VLM A1,D1,0,INP,0 diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index e8f15dbb89d0..3765c2d81df5 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index c0c8a1f6c35d..ae75da592ccb 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -13,6 +13,7 @@ #define _ASM_S390_AP_H_ #include +#include /** * The ap_qid_t identifier of an ap queue. diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h new file mode 100644 index 000000000000..fb62df5e16a2 --- /dev/null +++ b/arch/s390/include/asm/asm-extable.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_EXTABLE_H +#define __ASM_EXTABLE_H + +#include +#include + +#define EX_TYPE_NONE 0 +#define EX_TYPE_FIXUP 1 +#define EX_TYPE_BPF 2 +#define EX_TYPE_UACCESS 3 + +#define __EX_TABLE(_section, _fault, _target, _type) \ + stringify_in_c(.section _section,"a";) \ + stringify_in_c(.align 4;) \ + stringify_in_c(.long (_fault) - .;) \ + stringify_in_c(.long (_target) - .;) \ + stringify_in_c(.short (_type);) \ + stringify_in_c(.short 0;) \ + stringify_in_c(.previous) + +#define __EX_TABLE_UA(_section, _fault, _target, _type, _reg) \ + stringify_in_c(.section _section,"a";) \ + stringify_in_c(.align 4;) \ + stringify_in_c(.long (_fault) - .;) \ + stringify_in_c(.long (_target) - .;) \ + stringify_in_c(.short (_type);) \ + stringify_in_c(.macro extable_reg reg;) \ + stringify_in_c(.set found, 0;) \ + stringify_in_c(.set regnr, 0;) \ + stringify_in_c(.irp rs,r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,r13,r14,r15;) \ + stringify_in_c(.ifc "\reg", "%%\rs";) \ + stringify_in_c(.set found, 1;) \ + stringify_in_c(.short regnr;) \ + stringify_in_c(.endif;) \ + stringify_in_c(.set regnr, regnr+1;) \ + stringify_in_c(.endr;) \ + stringify_in_c(.ifne (found != 1);) \ + stringify_in_c(.error "extable_reg: bad register argument";) \ + stringify_in_c(.endif;) \ + stringify_in_c(.endm;) \ + stringify_in_c(extable_reg _reg;) \ + stringify_in_c(.purgem extable_reg;) \ + stringify_in_c(.previous) + +#define EX_TABLE(_fault, _target) \ + __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_FIXUP) +#define EX_TABLE_AMODE31(_fault, _target) \ + __EX_TABLE(.amode31.ex_table, _fault, _target, EX_TYPE_FIXUP) +#define EX_TABLE_UA(_fault, _target, _reg) \ + __EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UACCESS, _reg) + +#endif /* __ASM_EXTABLE_H */ diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 1d40630128a5..191dc7898b0f 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -256,8 +256,6 @@ static inline bool test_bit_inv(unsigned long nr, return test_bit(nr ^ (BITS_PER_LONG - 1), ptr); } -#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES - /** * __flogr - find leftmost one * @word - The word to search @@ -376,16 +374,6 @@ static inline int fls(unsigned int word) return fls64(word); } -#else /* CONFIG_HAVE_MARCH_Z9_109_FEATURES */ - -#include -#include -#include -#include -#include - -#endif /* CONFIG_HAVE_MARCH_Z9_109_FEATURES */ - #include #include #include diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index e3f12db46cfc..feaba12dbecb 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -10,6 +10,7 @@ #define _ASM_S390_CPU_MF_H #include +#include #include asm(".include \"asm/cpu_mf-insn.h\"\n"); @@ -159,7 +160,7 @@ struct hws_trailer_entry { /* Load program parameter */ static inline void lpp(void *pp) { - asm volatile(".insn s,0xb2800000,0(%0)\n":: "a" (pp) : "memory"); + asm volatile("lpp 0(%0)\n" :: "a" (pp) : "memory"); } /* Query counter information */ @@ -168,7 +169,7 @@ static inline int qctri(struct cpumf_ctr_info *info) int rc = -EINVAL; asm volatile ( - "0: .insn s,0xb28e0000,%1\n" + "0: qctri %1\n" "1: lhi %0,0\n" "2:\n" EX_TABLE(1b, 2b) @@ -182,7 +183,7 @@ static inline int lcctl(u64 ctl) int cc; asm volatile ( - " .insn s,0xb2840000,%1\n" + " lcctl %1\n" " ipm %0\n" " srl %0,28\n" : "=d" (cc) : "Q" (ctl) : "cc"); @@ -196,7 +197,7 @@ static inline int __ecctr(u64 ctr, u64 *content) int cc; asm volatile ( - " .insn rre,0xb2e40000,%0,%2\n" + " ecctr %0,%2\n" " ipm %1\n" " srl %1,28\n" : "=d" (_content), "=d" (cc) : "d" (ctr) : "cc"); @@ -246,7 +247,7 @@ static inline int qsi(struct hws_qsi_info_block *info) int cc = 1; asm volatile( - "0: .insn s,0xb2860000,%1\n" + "0: qsi %1\n" "1: lhi %0,0\n" "2:\n" EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) @@ -261,7 +262,7 @@ static inline int lsctl(struct hws_lsctl_request_block *req) cc = 1; asm volatile( - "0: .insn s,0xb2870000,0(%1)\n" + "0: lsctl 0(%1)\n" "1: ipm %0\n" " srl %0,28\n" "2:\n" diff --git a/arch/s390/include/asm/crw.h b/arch/s390/include/asm/crw.h index c6ebfd31f1db..97456d98fe76 100644 --- a/arch/s390/include/asm/crw.h +++ b/arch/s390/include/asm/crw.h @@ -5,7 +5,6 @@ * Author(s): Ingo Adlung , * Martin Schwidefsky , * Cornelia Huck , - * Heiko Carstens , */ #ifndef _ASM_S390_CRW_H diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index bdcd64f0c1d7..56e99c286d12 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -11,6 +11,7 @@ #include #include +#include enum diag_stat_enum { DIAG_STAT_X008, diff --git a/arch/s390/include/asm/extable.h b/arch/s390/include/asm/extable.h index 8511f0e59290..af6ba52743e9 100644 --- a/arch/s390/include/asm/extable.h +++ b/arch/s390/include/asm/extable.h @@ -25,7 +25,7 @@ struct exception_table_entry { int insn, fixup; - long handler; + short type, data; }; extern struct exception_table_entry *__start_amode31_ex_table; @@ -38,28 +38,6 @@ static inline unsigned long extable_fixup(const struct exception_table_entry *x) return (unsigned long)&x->fixup + x->fixup; } -typedef bool (*ex_handler_t)(const struct exception_table_entry *, - struct pt_regs *); - -static inline ex_handler_t -ex_fixup_handler(const struct exception_table_entry *x) -{ - if (likely(!x->handler)) - return NULL; - return (ex_handler_t)((unsigned long)&x->handler + x->handler); -} - -static inline bool ex_handle(const struct exception_table_entry *x, - struct pt_regs *regs) -{ - ex_handler_t handler = ex_fixup_handler(x); - - if (unlikely(handler)) - return handler(x, regs); - regs->psw.addr = extable_fixup(x); - return true; -} - #define ARCH_HAS_RELATIVE_EXTABLE static inline void swap_ex_entry_fixup(struct exception_table_entry *a, @@ -69,13 +47,26 @@ static inline void swap_ex_entry_fixup(struct exception_table_entry *a, { a->fixup = b->fixup + delta; b->fixup = tmp.fixup - delta; - a->handler = b->handler; - if (a->handler) - a->handler += delta; - b->handler = tmp.handler; - if (b->handler) - b->handler -= delta; + a->type = b->type; + b->type = tmp.type; + a->data = b->data; + b->data = tmp.data; } #define swap_ex_entry_fixup swap_ex_entry_fixup +#ifdef CONFIG_BPF_JIT + +bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs); + +#else /* !CONFIG_BPF_JIT */ + +static inline bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs) +{ + return false; +} + +#endif /* CONFIG_BPF_JIT */ + +bool fixup_exception(struct pt_regs *regs); + #endif diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h index a959b815a58b..b714ed0ef688 100644 --- a/arch/s390/include/asm/fpu/api.h +++ b/arch/s390/include/asm/fpu/api.h @@ -45,6 +45,7 @@ #define _ASM_S390_FPU_API_H #include +#include void save_fpu_regs(void); void load_fpu_regs(void); diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index c22debfcebf1..e08c882dccaa 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h @@ -4,6 +4,7 @@ #include #include +#include #include #include diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index 60f9241e5e4a..bea47e7cc6a0 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -45,9 +45,9 @@ static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long sz) { if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) - pte_val(*ptep) = _REGION3_ENTRY_EMPTY; + set_pte(ptep, __pte(_REGION3_ENTRY_EMPTY)); else - pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY; + set_pte(ptep, __pte(_SEGMENT_ENTRY_EMPTY)); } static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index 9f75d67b8c20..89902f754740 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -81,8 +81,13 @@ static __always_inline void inc_irq_stat(enum interruption_class irq) } struct ext_code { - unsigned short subcode; - unsigned short code; + union { + struct { + unsigned short subcode; + unsigned short code; + }; + unsigned int int_code; + }; }; typedef void (*ext_int_handler_t)(struct ext_code, unsigned int, unsigned long); diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index 5eb722c984e4..598095f4b924 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -71,6 +71,7 @@ struct kprobe_ctlblk { void arch_remove_kprobe(struct kprobe *p); void __kretprobe_trampoline(void); +void trampoline_probe_handler(struct pt_regs *regs); int kprobe_fault_handler(struct pt_regs *regs, int trapnr); int kprobe_exceptions_notify(struct notifier_block *self, diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h index 1ffea75b8ebc..c76777b15fec 100644 --- a/arch/s390/include/asm/linkage.h +++ b/arch/s390/include/asm/linkage.h @@ -2,27 +2,9 @@ #ifndef __ASM_LINKAGE_H #define __ASM_LINKAGE_H -#include #include #define __ALIGN .align 16, 0x07 #define __ALIGN_STR __stringify(__ALIGN) -/* - * Helper macro for exception table entries - */ - -#define __EX_TABLE(_section, _fault, _target) \ - stringify_in_c(.section _section,"a";) \ - stringify_in_c(.align 8;) \ - stringify_in_c(.long (_fault) - .;) \ - stringify_in_c(.long (_target) - .;) \ - stringify_in_c(.quad 0;) \ - stringify_in_c(.previous) - -#define EX_TABLE(_fault, _target) \ - __EX_TABLE(__ex_table, _fault, _target) -#define EX_TABLE_AMODE31(_fault, _target) \ - __EX_TABLE(.amode31.ex_table, _fault, _target) - #endif diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 1262f5003acf..56002aeacabf 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -34,12 +34,22 @@ struct lowcore { __u32 ext_int_code_addr; }; __u32 svc_int_code; /* 0x0088 */ - __u16 pgm_ilc; /* 0x008c */ - __u16 pgm_code; /* 0x008e */ + union { + struct { + __u16 pgm_ilc; /* 0x008c */ + __u16 pgm_code; /* 0x008e */ + }; + __u32 pgm_int_code; + }; __u32 data_exc_code; /* 0x0090 */ __u16 mon_class_num; /* 0x0094 */ - __u8 per_code; /* 0x0096 */ - __u8 per_atmid; /* 0x0097 */ + union { + struct { + __u8 per_code; /* 0x0096 */ + __u8 per_atmid; /* 0x0097 */ + }; + __u16 per_code_combined; + }; __u64 per_address; /* 0x0098 */ __u8 exc_access_id; /* 0x00a0 */ __u8 per_access_id; /* 0x00a1 */ @@ -153,11 +163,9 @@ struct lowcore { __u64 gmap; /* 0x03d0 */ __u8 pad_0x03d8[0x0400-0x03d8]; /* 0x03d8 */ - /* br %r1 trampoline */ - __u16 br_r1_trampoline; /* 0x0400 */ - __u32 return_lpswe; /* 0x0402 */ - __u32 return_mcck_lpswe; /* 0x0406 */ - __u8 pad_0x040a[0x0e00-0x040a]; /* 0x040a */ + __u32 return_lpswe; /* 0x0400 */ + __u32 return_mcck_lpswe; /* 0x0404 */ + __u8 pad_0x040a[0x0e00-0x0408]; /* 0x0408 */ /* * 0xe00 contains the address of the IPL Parameter Information diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index e12ff0f29d1a..82aae78e1315 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -4,6 +4,7 @@ #include #include +#include typedef struct { spinlock_t lock; diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index 55c9051dddfd..292083083830 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h @@ -6,7 +6,6 @@ * Author(s): Ingo Adlung , * Martin Schwidefsky , * Cornelia Huck , - * Heiko Carstens , */ #ifndef _ASM_S390_NMI_H diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h index 0033dcd663b1..2cfcd5ac3a8b 100644 --- a/arch/s390/include/asm/nospec-insn.h +++ b/arch/s390/include/asm/nospec-insn.h @@ -10,15 +10,18 @@ #ifdef CC_USING_EXPOLINE -_LC_BR_R1 = __LC_BR_R1 - /* * The expoline macros are used to create thunks in the same format * as gcc generates them. The 'comdat' section flag makes sure that * the various thunks are merged into a single copy. */ .macro __THUNK_PROLOG_NAME name +#ifdef CONFIG_EXPOLINE_EXTERN + .pushsection .text,"ax",@progbits + .align 16,0x07 +#else .pushsection .text.\name,"axG",@progbits,\name,comdat +#endif .globl \name .hidden \name .type \name,@function @@ -26,37 +29,49 @@ _LC_BR_R1 = __LC_BR_R1 CFI_STARTPROC .endm - .macro __THUNK_EPILOG + .macro __THUNK_EPILOG_NAME name CFI_ENDPROC +#ifdef CONFIG_EXPOLINE_EXTERN + .size \name, .-\name +#endif .popsection .endm - .macro __THUNK_PROLOG_BR r1,r2 - __THUNK_PROLOG_NAME __s390_indirect_jump_r\r2\()use_r\r1 + .macro __THUNK_PROLOG_BR r1 + __THUNK_PROLOG_NAME __s390_indirect_jump_r\r1 .endm - .macro __THUNK_PROLOG_BC d0,r1,r2 - __THUNK_PROLOG_NAME __s390_indirect_branch_\d0\()_\r2\()use_\r1 + .macro __THUNK_EPILOG_BR r1 + __THUNK_EPILOG_NAME __s390_indirect_jump_r\r1 .endm - .macro __THUNK_BR r1,r2 - jg __s390_indirect_jump_r\r2\()use_r\r1 + .macro __THUNK_BR r1 + jg __s390_indirect_jump_r\r1 .endm - .macro __THUNK_BC d0,r1,r2 - jg __s390_indirect_branch_\d0\()_\r2\()use_\r1 + .macro __THUNK_BRASL r1,r2 + brasl \r1,__s390_indirect_jump_r\r2 .endm - .macro __THUNK_BRASL r1,r2,r3 - brasl \r1,__s390_indirect_jump_r\r3\()use_r\r2 - .endm - - .macro __DECODE_RR expand,reg,ruse + .macro __DECODE_R expand,reg .set __decode_fail,1 .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 .ifc \reg,%r\r1 + \expand \r1 + .set __decode_fail,0 + .endif + .endr + .if __decode_fail == 1 + .error "__DECODE_R failed" + .endif + .endm + + .macro __DECODE_RR expand,rsave,rtarget + .set __decode_fail,1 + .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \rsave,%r\r1 .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 - .ifc \ruse,%r\r2 + .ifc \rtarget,%r\r2 \expand \r1,\r2 .set __decode_fail,0 .endif @@ -68,125 +83,47 @@ _LC_BR_R1 = __LC_BR_R1 .endif .endm - .macro __DECODE_RRR expand,rsave,rtarget,ruse - .set __decode_fail,1 - .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 - .ifc \rsave,%r\r1 - .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 - .ifc \rtarget,%r\r2 - .irp r3,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 - .ifc \ruse,%r\r3 - \expand \r1,\r2,\r3 - .set __decode_fail,0 - .endif - .endr - .endif - .endr - .endif - .endr - .if __decode_fail == 1 - .error "__DECODE_RRR failed" - .endif - .endm - - .macro __DECODE_DRR expand,disp,reg,ruse - .set __decode_fail,1 - .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 - .ifc \reg,%r\r1 - .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 - .ifc \ruse,%r\r2 - \expand \disp,\r1,\r2 - .set __decode_fail,0 - .endif - .endr - .endif - .endr - .if __decode_fail == 1 - .error "__DECODE_DRR failed" - .endif - .endm - - .macro __THUNK_EX_BR reg,ruse - # Be very careful when adding instructions to this macro! - # The ALTERNATIVE replacement code has a .+10 which targets - # the "br \reg" after the code has been patched. -#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES + .macro __THUNK_EX_BR reg exrl 0,555f j . -#else - .ifc \reg,%r1 - ALTERNATIVE "ex %r0,_LC_BR_R1", ".insn ril,0xc60000000000,0,.+10", 35 - j . - .else - larl \ruse,555f - ex 0,0(\ruse) - j . - .endif -#endif 555: br \reg .endm - .macro __THUNK_EX_BC disp,reg,ruse -#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES - exrl 0,556f - j . +#ifdef CONFIG_EXPOLINE_EXTERN + .macro GEN_BR_THUNK reg + .endm + .macro GEN_BR_THUNK_EXTERN reg #else - larl \ruse,556f - ex 0,0(\ruse) - j . + .macro GEN_BR_THUNK reg #endif -556: b \disp(\reg) + __DECODE_R __THUNK_PROLOG_BR,\reg + __THUNK_EX_BR \reg + __DECODE_R __THUNK_EPILOG_BR,\reg .endm - .macro GEN_BR_THUNK reg,ruse=%r1 - __DECODE_RR __THUNK_PROLOG_BR,\reg,\ruse - __THUNK_EX_BR \reg,\ruse - __THUNK_EPILOG - .endm - - .macro GEN_B_THUNK disp,reg,ruse=%r1 - __DECODE_DRR __THUNK_PROLOG_BC,\disp,\reg,\ruse - __THUNK_EX_BC \disp,\reg,\ruse - __THUNK_EPILOG - .endm - - .macro BR_EX reg,ruse=%r1 -557: __DECODE_RR __THUNK_BR,\reg,\ruse + .macro BR_EX reg +557: __DECODE_R __THUNK_BR,\reg .pushsection .s390_indirect_branches,"a",@progbits .long 557b-. .popsection .endm - .macro B_EX disp,reg,ruse=%r1 -558: __DECODE_DRR __THUNK_BC,\disp,\reg,\ruse - .pushsection .s390_indirect_branches,"a",@progbits - .long 558b-. - .popsection - .endm - - .macro BASR_EX rsave,rtarget,ruse=%r1 -559: __DECODE_RRR __THUNK_BRASL,\rsave,\rtarget,\ruse + .macro BASR_EX rsave,rtarget +559: __DECODE_RR __THUNK_BRASL,\rsave,\rtarget .pushsection .s390_indirect_branches,"a",@progbits .long 559b-. .popsection .endm #else - .macro GEN_BR_THUNK reg,ruse=%r1 + .macro GEN_BR_THUNK reg .endm - .macro GEN_B_THUNK disp,reg,ruse=%r1 - .endm - - .macro BR_EX reg,ruse=%r1 + .macro BR_EX reg br \reg .endm - .macro B_EX disp,reg,ruse=%r1 - b \disp(\reg) - .endm - - .macro BASR_EX rsave,rtarget,ruse=%r1 + .macro BASR_EX rsave,rtarget basr \rsave,\rtarget .endm #endif /* CC_USING_EXPOLINE */ diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h index 3c89279d2a4b..147a8d547ef9 100644 --- a/arch/s390/include/asm/os_info.h +++ b/arch/s390/include/asm/os_info.h @@ -39,7 +39,7 @@ u32 os_info_csum(struct os_info *os_info); #ifdef CONFIG_CRASH_DUMP void *os_info_old_entry(int nr, unsigned long *size); -int copy_oldmem_kernel(void *dst, void *src, size_t count); +int copy_oldmem_kernel(void *dst, unsigned long src, size_t count); #else static inline void *os_info_old_entry(int nr, unsigned long *size) { diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index cfc4d6fb2385..61dea67bb9c7 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -92,11 +92,31 @@ typedef pte_t *pgtable_t; #define pgprot_val(x) ((x).pgprot) #define pgste_val(x) ((x).pgste) -#define pte_val(x) ((x).pte) -#define pmd_val(x) ((x).pmd) -#define pud_val(x) ((x).pud) -#define p4d_val(x) ((x).p4d) -#define pgd_val(x) ((x).pgd) + +static inline unsigned long pte_val(pte_t pte) +{ + return pte.pte; +} + +static inline unsigned long pmd_val(pmd_t pmd) +{ + return pmd.pmd; +} + +static inline unsigned long pud_val(pud_t pud) +{ + return pud.pud; +} + +static inline unsigned long p4d_val(p4d_t p4d) +{ + return p4d.p4d; +} + +static inline unsigned long pgd_val(pgd_t pgd) +{ + return pgd.pgd; +} #define __pgste(x) ((pgste_t) { (x) } ) #define __pte(x) ((pte_t) { (x) } ) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 90824be5ce9a..fdb9745ee998 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -283,9 +283,6 @@ int zpci_dma_exit_device(struct zpci_dev *zdev); int __init zpci_irq_init(void); void __init zpci_irq_exit(void); -int zpci_set_irq(struct zpci_dev *zdev); -int zpci_clear_irq(struct zpci_dev *zdev); - /* FMB */ int zpci_fmb_enable_device(struct zpci_dev *); int zpci_fmb_disable_device(struct zpci_dev *); diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index f14a555eff74..17eb618f1348 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -103,17 +103,17 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) { - pgd_val(*pgd) = _REGION1_ENTRY | __pa(p4d); + set_pgd(pgd, __pgd(_REGION1_ENTRY | __pa(p4d))); } static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) { - p4d_val(*p4d) = _REGION2_ENTRY | __pa(pud); + set_p4d(p4d, __p4d(_REGION2_ENTRY | __pa(pud))); } static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { - pud_val(*pud) = _REGION3_ENTRY | __pa(pmd); + set_pud(pud, __pud(_REGION3_ENTRY | __pa(pmd))); } static inline pgd_t *pgd_alloc(struct mm_struct *mm) @@ -129,7 +129,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte) { - pmd_val(*pmd) = _SEGMENT_ENTRY + __pa(pte); + set_pmd(pmd, __pmd(_SEGMENT_ENTRY | __pa(pte))); } #define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 008a6c856fa4..9df679152620 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -538,6 +538,36 @@ static inline int mm_alloc_pgste(struct mm_struct *mm) return 0; } +static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot) +{ + return __pte(pte_val(pte) & ~pgprot_val(prot)); +} + +static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot) +{ + return __pte(pte_val(pte) | pgprot_val(prot)); +} + +static inline pmd_t clear_pmd_bit(pmd_t pmd, pgprot_t prot) +{ + return __pmd(pmd_val(pmd) & ~pgprot_val(prot)); +} + +static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot) +{ + return __pmd(pmd_val(pmd) | pgprot_val(prot)); +} + +static inline pud_t clear_pud_bit(pud_t pud, pgprot_t prot) +{ + return __pud(pud_val(pud) & ~pgprot_val(prot)); +} + +static inline pud_t set_pud_bit(pud_t pud, pgprot_t prot) +{ + return __pud(pud_val(pud) | pgprot_val(prot)); +} + /* * In the case that a guest uses storage keys * faults should no longer be backed by zero pages @@ -570,7 +600,7 @@ static inline void cspg(unsigned long *ptr, unsigned long old, unsigned long new unsigned long address = (unsigned long)ptr | 1; asm volatile( - " .insn rre,0xb98a0000,%[r1],%[address]" + " cspg %[r1],%[address]" : [r1] "+&d" (r1.pair), "+m" (*ptr) : [address] "d" (address) : "cc"); @@ -804,15 +834,13 @@ static inline int pte_soft_dirty(pte_t pte) static inline pte_t pte_mksoft_dirty(pte_t pte) { - pte_val(pte) |= _PAGE_SOFT_DIRTY; - return pte; + return set_pte_bit(pte, __pgprot(_PAGE_SOFT_DIRTY)); } #define pte_swp_mksoft_dirty pte_mksoft_dirty static inline pte_t pte_clear_soft_dirty(pte_t pte) { - pte_val(pte) &= ~_PAGE_SOFT_DIRTY; - return pte; + return clear_pte_bit(pte, __pgprot(_PAGE_SOFT_DIRTY)); } #define pte_swp_clear_soft_dirty pte_clear_soft_dirty @@ -823,14 +851,12 @@ static inline int pmd_soft_dirty(pmd_t pmd) static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) { - pmd_val(pmd) |= _SEGMENT_ENTRY_SOFT_DIRTY; - return pmd; + return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_SOFT_DIRTY)); } static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) { - pmd_val(pmd) &= ~_SEGMENT_ENTRY_SOFT_DIRTY; - return pmd; + return clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_SOFT_DIRTY)); } /* @@ -881,32 +907,57 @@ static inline pgprot_t pte_pgprot(pte_t pte) * pgd/pmd/pte modification functions */ +static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) +{ + WRITE_ONCE(*pgdp, pgd); +} + +static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) +{ + WRITE_ONCE(*p4dp, p4d); +} + +static inline void set_pud(pud_t *pudp, pud_t pud) +{ + WRITE_ONCE(*pudp, pud); +} + +static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + WRITE_ONCE(*pmdp, pmd); +} + +static inline void set_pte(pte_t *ptep, pte_t pte) +{ + WRITE_ONCE(*ptep, pte); +} + static inline void pgd_clear(pgd_t *pgd) { if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1) - pgd_val(*pgd) = _REGION1_ENTRY_EMPTY; + set_pgd(pgd, __pgd(_REGION1_ENTRY_EMPTY)); } static inline void p4d_clear(p4d_t *p4d) { if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) - p4d_val(*p4d) = _REGION2_ENTRY_EMPTY; + set_p4d(p4d, __p4d(_REGION2_ENTRY_EMPTY)); } static inline void pud_clear(pud_t *pud) { if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) - pud_val(*pud) = _REGION3_ENTRY_EMPTY; + set_pud(pud, __pud(_REGION3_ENTRY_EMPTY)); } static inline void pmd_clear(pmd_t *pmdp) { - pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; + set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); } static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_val(*ptep) = _PAGE_INVALID; + set_pte(ptep, __pte(_PAGE_INVALID)); } /* @@ -915,79 +966,74 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt */ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - pte_val(pte) &= _PAGE_CHG_MASK; - pte_val(pte) |= pgprot_val(newprot); + pte = clear_pte_bit(pte, __pgprot(~_PAGE_CHG_MASK)); + pte = set_pte_bit(pte, newprot); /* * newprot for PAGE_NONE, PAGE_RO, PAGE_RX, PAGE_RW and PAGE_RWX * has the invalid bit set, clear it again for readable, young pages */ if ((pte_val(pte) & _PAGE_YOUNG) && (pte_val(pte) & _PAGE_READ)) - pte_val(pte) &= ~_PAGE_INVALID; + pte = clear_pte_bit(pte, __pgprot(_PAGE_INVALID)); /* * newprot for PAGE_RO, PAGE_RX, PAGE_RW and PAGE_RWX has the page * protection bit set, clear it again for writable, dirty pages */ if ((pte_val(pte) & _PAGE_DIRTY) && (pte_val(pte) & _PAGE_WRITE)) - pte_val(pte) &= ~_PAGE_PROTECT; + pte = clear_pte_bit(pte, __pgprot(_PAGE_PROTECT)); return pte; } static inline pte_t pte_wrprotect(pte_t pte) { - pte_val(pte) &= ~_PAGE_WRITE; - pte_val(pte) |= _PAGE_PROTECT; - return pte; + pte = clear_pte_bit(pte, __pgprot(_PAGE_WRITE)); + return set_pte_bit(pte, __pgprot(_PAGE_PROTECT)); } static inline pte_t pte_mkwrite(pte_t pte) { - pte_val(pte) |= _PAGE_WRITE; + pte = set_pte_bit(pte, __pgprot(_PAGE_WRITE)); if (pte_val(pte) & _PAGE_DIRTY) - pte_val(pte) &= ~_PAGE_PROTECT; + pte = clear_pte_bit(pte, __pgprot(_PAGE_PROTECT)); return pte; } static inline pte_t pte_mkclean(pte_t pte) { - pte_val(pte) &= ~_PAGE_DIRTY; - pte_val(pte) |= _PAGE_PROTECT; - return pte; + pte = clear_pte_bit(pte, __pgprot(_PAGE_DIRTY)); + return set_pte_bit(pte, __pgprot(_PAGE_PROTECT)); } static inline pte_t pte_mkdirty(pte_t pte) { - pte_val(pte) |= _PAGE_DIRTY | _PAGE_SOFT_DIRTY; + pte = set_pte_bit(pte, __pgprot(_PAGE_DIRTY | _PAGE_SOFT_DIRTY)); if (pte_val(pte) & _PAGE_WRITE) - pte_val(pte) &= ~_PAGE_PROTECT; + pte = clear_pte_bit(pte, __pgprot(_PAGE_PROTECT)); return pte; } static inline pte_t pte_mkold(pte_t pte) { - pte_val(pte) &= ~_PAGE_YOUNG; - pte_val(pte) |= _PAGE_INVALID; - return pte; + pte = clear_pte_bit(pte, __pgprot(_PAGE_YOUNG)); + return set_pte_bit(pte, __pgprot(_PAGE_INVALID)); } static inline pte_t pte_mkyoung(pte_t pte) { - pte_val(pte) |= _PAGE_YOUNG; + pte = set_pte_bit(pte, __pgprot(_PAGE_YOUNG)); if (pte_val(pte) & _PAGE_READ) - pte_val(pte) &= ~_PAGE_INVALID; + pte = clear_pte_bit(pte, __pgprot(_PAGE_INVALID)); return pte; } static inline pte_t pte_mkspecial(pte_t pte) { - pte_val(pte) |= _PAGE_SPECIAL; - return pte; + return set_pte_bit(pte, __pgprot(_PAGE_SPECIAL)); } #ifdef CONFIG_HUGETLB_PAGE static inline pte_t pte_mkhuge(pte_t pte) { - pte_val(pte) |= _PAGE_LARGE; - return pte; + return set_pte_bit(pte, __pgprot(_PAGE_LARGE)); } #endif @@ -1006,7 +1052,7 @@ static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep, if (__builtin_constant_p(opt) && opt == 0) { /* Invalidation + TLB flush for the pte */ asm volatile( - " .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]" + " ipte %[r1],%[r2],0,%[m4]" : "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address), [m4] "i" (local)); return; @@ -1015,7 +1061,7 @@ static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep, /* Invalidate ptes with options + TLB flush of the ptes */ opt = opt | (asce & _ASCE_ORIGIN); asm volatile( - " .insn rrf,0xb2210000,%[r1],%[r2],%[r3],%[m4]" + " ipte %[r1],%[r2],%[r3],%[m4]" : [r2] "+a" (address), [r3] "+a" (opt) : [r1] "a" (pto), [m4] "i" (local) : "memory"); } @@ -1028,7 +1074,7 @@ static __always_inline void __ptep_ipte_range(unsigned long address, int nr, /* Invalidate a range of ptes + TLB flush of the ptes */ do { asm volatile( - " .insn rrf,0xb2210000,%[r1],%[r2],%[r3],%[m4]" + " ipte %[r1],%[r2],%[r3],%[m4]" : [r2] "+a" (address), [r3] "+a" (nr) : [r1] "a" (pto), [m4] "i" (local) : "memory"); } while (nr != 255); @@ -1114,7 +1160,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, if (full) { res = *ptep; - *ptep = __pte(_PAGE_INVALID); + set_pte(ptep, __pte(_PAGE_INVALID)); } else { res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID)); } @@ -1198,11 +1244,11 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { if (pte_present(entry)) - pte_val(entry) &= ~_PAGE_UNUSED; + entry = clear_pte_bit(entry, __pgprot(_PAGE_UNUSED)); if (mm_has_pgste(mm)) ptep_set_pte_at(mm, addr, ptep, entry); else - *ptep = entry; + set_pte(ptep, entry); } /* @@ -1213,9 +1259,9 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) { pte_t __pte; - pte_val(__pte) = physpage | pgprot_val(pgprot); + __pte = __pte(physpage | pgprot_val(pgprot)); if (!MACHINE_HAS_NX) - pte_val(__pte) &= ~_PAGE_NOEXEC; + __pte = clear_pte_bit(__pte, __pgprot(_PAGE_NOEXEC)); return pte_mkyoung(__pte); } @@ -1355,61 +1401,57 @@ static inline bool gup_fast_permitted(unsigned long start, unsigned long end) static inline pmd_t pmd_wrprotect(pmd_t pmd) { - pmd_val(pmd) &= ~_SEGMENT_ENTRY_WRITE; - pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; - return pmd; + pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_WRITE)); + return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT)); } static inline pmd_t pmd_mkwrite(pmd_t pmd) { - pmd_val(pmd) |= _SEGMENT_ENTRY_WRITE; + pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_WRITE)); if (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) - pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; + pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT)); return pmd; } static inline pmd_t pmd_mkclean(pmd_t pmd) { - pmd_val(pmd) &= ~_SEGMENT_ENTRY_DIRTY; - pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; - return pmd; + pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_DIRTY)); + return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT)); } static inline pmd_t pmd_mkdirty(pmd_t pmd) { - pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_SOFT_DIRTY; + pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_SOFT_DIRTY)); if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) - pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; + pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT)); return pmd; } static inline pud_t pud_wrprotect(pud_t pud) { - pud_val(pud) &= ~_REGION3_ENTRY_WRITE; - pud_val(pud) |= _REGION_ENTRY_PROTECT; - return pud; + pud = clear_pud_bit(pud, __pgprot(_REGION3_ENTRY_WRITE)); + return set_pud_bit(pud, __pgprot(_REGION_ENTRY_PROTECT)); } static inline pud_t pud_mkwrite(pud_t pud) { - pud_val(pud) |= _REGION3_ENTRY_WRITE; + pud = set_pud_bit(pud, __pgprot(_REGION3_ENTRY_WRITE)); if (pud_val(pud) & _REGION3_ENTRY_DIRTY) - pud_val(pud) &= ~_REGION_ENTRY_PROTECT; + pud = clear_pud_bit(pud, __pgprot(_REGION_ENTRY_PROTECT)); return pud; } static inline pud_t pud_mkclean(pud_t pud) { - pud_val(pud) &= ~_REGION3_ENTRY_DIRTY; - pud_val(pud) |= _REGION_ENTRY_PROTECT; - return pud; + pud = clear_pud_bit(pud, __pgprot(_REGION3_ENTRY_DIRTY)); + return set_pud_bit(pud, __pgprot(_REGION_ENTRY_PROTECT)); } static inline pud_t pud_mkdirty(pud_t pud) { - pud_val(pud) |= _REGION3_ENTRY_DIRTY | _REGION3_ENTRY_SOFT_DIRTY; + pud = set_pud_bit(pud, __pgprot(_REGION3_ENTRY_DIRTY | _REGION3_ENTRY_SOFT_DIRTY)); if (pud_val(pud) & _REGION3_ENTRY_WRITE) - pud_val(pud) &= ~_REGION_ENTRY_PROTECT; + pud = clear_pud_bit(pud, __pgprot(_REGION_ENTRY_PROTECT)); return pud; } @@ -1433,37 +1475,39 @@ static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot) static inline pmd_t pmd_mkyoung(pmd_t pmd) { - pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG; + pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_YOUNG)); if (pmd_val(pmd) & _SEGMENT_ENTRY_READ) - pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID; + pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_INVALID)); return pmd; } static inline pmd_t pmd_mkold(pmd_t pmd) { - pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG; - pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; - return pmd; + pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_YOUNG)); + return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_INVALID)); } static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { - pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE | - _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG | - _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SOFT_DIRTY; - pmd_val(pmd) |= massage_pgprot_pmd(newprot); + unsigned long mask; + + mask = _SEGMENT_ENTRY_ORIGIN_LARGE; + mask |= _SEGMENT_ENTRY_DIRTY; + mask |= _SEGMENT_ENTRY_YOUNG; + mask |= _SEGMENT_ENTRY_LARGE; + mask |= _SEGMENT_ENTRY_SOFT_DIRTY; + pmd = __pmd(pmd_val(pmd) & mask); + pmd = set_pmd_bit(pmd, __pgprot(massage_pgprot_pmd(newprot))); if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY)) - pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; + pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT)); if (!(pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG)) - pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; + pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_INVALID)); return pmd; } static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot) { - pmd_t __pmd; - pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot); - return __pmd; + return __pmd(physpage + massage_pgprot_pmd(pgprot)); } #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */ @@ -1491,7 +1535,7 @@ static __always_inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp, if (__builtin_constant_p(opt) && opt == 0) { /* flush without guest asce */ asm volatile( - " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]" + " idte %[r1],0,%[r2],%[m4]" : "+m" (*pmdp) : [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK)), [m4] "i" (local) @@ -1499,7 +1543,7 @@ static __always_inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp, } else { /* flush with guest asce */ asm volatile( - " .insn rrf,0xb98e0000,%[r1],%[r2],%[r3],%[m4]" + " idte %[r1],%[r3],%[r2],%[m4]" : "+m" (*pmdp) : [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK) | opt), [r3] "a" (asce), [m4] "i" (local) @@ -1518,7 +1562,7 @@ static __always_inline void __pudp_idte(unsigned long addr, pud_t *pudp, if (__builtin_constant_p(opt) && opt == 0) { /* flush without guest asce */ asm volatile( - " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]" + " idte %[r1],0,%[r2],%[m4]" : "+m" (*pudp) : [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK)), [m4] "i" (local) @@ -1526,7 +1570,7 @@ static __always_inline void __pudp_idte(unsigned long addr, pud_t *pudp, } else { /* flush with guest asce */ asm volatile( - " .insn rrf,0xb98e0000,%[r1],%[r2],%[r3],%[m4]" + " idte %[r1],%[r3],%[r2],%[m4]" : "+m" (*pudp) : [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK) | opt), [r3] "a" (asce), [m4] "i" (local) @@ -1585,16 +1629,15 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t entry) { if (!MACHINE_HAS_NX) - pmd_val(entry) &= ~_SEGMENT_ENTRY_NOEXEC; - *pmdp = entry; + entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC)); + set_pmd(pmdp, entry); } static inline pmd_t pmd_mkhuge(pmd_t pmd) { - pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; - pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG; - pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; - return pmd; + pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_LARGE)); + pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_YOUNG)); + return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT)); } #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR @@ -1611,7 +1654,7 @@ static inline pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma, { if (full) { pmd_t pmd = *pmdp; - *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); + set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); return pmd; } return pmdp_xchg_lazy(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); @@ -1690,12 +1733,12 @@ static inline int has_transparent_hugepage(void) static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) { - pte_t pte; + unsigned long pteval; - pte_val(pte) = _PAGE_INVALID | _PAGE_PROTECT; - pte_val(pte) |= (offset & __SWP_OFFSET_MASK) << __SWP_OFFSET_SHIFT; - pte_val(pte) |= (type & __SWP_TYPE_MASK) << __SWP_TYPE_SHIFT; - return pte; + pteval = _PAGE_INVALID | _PAGE_PROTECT; + pteval |= (offset & __SWP_OFFSET_MASK) << __SWP_OFFSET_SHIFT; + pteval |= (type & __SWP_TYPE_MASK) << __SWP_TYPE_SHIFT; + return __pte(pteval); } static inline unsigned long __swp_type(swp_entry_t entry) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 5581b64a4236..84ec63145325 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -225,8 +225,7 @@ static inline unsigned long __ecag(unsigned int asi, unsigned char parm) { unsigned long val; - asm volatile(".insn rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */ - : "=d" (val) : "a" (asi << 8 | parm)); + asm volatile("ecag %0,0,0(%1)" : "=d" (val) : "a" (asi << 8 | parm)); return val; } @@ -313,11 +312,11 @@ static __always_inline void __noreturn disabled_wait(void) * Basic Program Check Handler. */ extern void s390_base_pgm_handler(void); -extern void (*s390_base_pgm_handler_fn)(void); +extern void (*s390_base_pgm_handler_fn)(struct pt_regs *regs); #define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL -extern int memcpy_real(void *, void *, size_t); +extern int memcpy_real(void *, unsigned long, size_t); extern void memcpy_absolute(void *, void *, size_t); #define mem_assign_absolute(dest, val) do { \ diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 22b3213c6c9d..04cb1e7582a6 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright IBM Corp. 2007 - * Author(s): Heiko Carstens */ #ifndef _ASM_S390_SCLP_H diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index f16f4d054ae2..7f5d4763357b 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -3,7 +3,6 @@ * Copyright IBM Corp. 1999, 2012 * Author(s): Denis Joseph Barrow, * Martin Schwidefsky , - * Heiko Carstens , */ #ifndef __ASM_SMP_H #define __ASM_SMP_H diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index dd00d98804ec..275f4258fbd5 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -36,22 +36,14 @@ static inline bool on_stack(struct stack_info *info, /* * Stack layout of a C stack frame. + * Kernel uses the packed stack layout (-mpacked-stack). */ -#ifndef __PACK_STACK -struct stack_frame { - unsigned long back_chain; - unsigned long empty1[5]; - unsigned long gprs[10]; - unsigned int empty2[8]; -}; -#else struct stack_frame { unsigned long empty1[5]; unsigned int empty2[8]; unsigned long gprs[10]; unsigned long back_chain; }; -#endif /* * Unlike current_stack_pointer() which simply returns current value of %r15 diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 50d9b04ecbd1..2cfce42aa7fc 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -148,7 +148,7 @@ struct ptff_qui { asm volatile( \ " lgr 0,%[reg0]\n" \ " lgr 1,%[reg1]\n" \ - " .insn e,0x0104\n" \ + " ptff\n" \ " ipm %[rc]\n" \ " srl %[rc],28\n" \ : [rc] "=&d" (rc), "+m" (*(struct addrtype *)reg1) \ @@ -187,14 +187,10 @@ static inline unsigned long get_tod_clock(void) static inline unsigned long get_tod_clock_fast(void) { -#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES unsigned long clk; asm volatile("stckf %0" : "=Q" (clk) : : "cc"); return clk; -#else - return get_tod_clock(); -#endif } static inline cycles_t get_cycles(void) diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 6448bb5be10c..a6e2cd89b609 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -25,9 +25,7 @@ static inline void __tlb_flush_idte(unsigned long asce) if (MACHINE_HAS_TLB_GUEST) opt |= IDTE_GUEST_ASCE; /* Global TLB flush for the mm */ - asm volatile( - " .insn rrf,0xb98e0000,0,%0,%1,0" - : : "a" (opt), "a" (asce) : "cc"); + asm volatile("idte 0,%1,%0" : : "a" (opt), "a" (asce) : "cc"); } /* diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index e1be769cbf9f..1f150a7cfb3d 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -13,6 +13,7 @@ /* * User space memory access functions */ +#include #include #include #include @@ -79,8 +80,6 @@ union oac { }; }; -#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES - #define __put_get_user_asm(to, from, size, oac_spec) \ ({ \ int __rc; \ @@ -90,14 +89,10 @@ union oac { "0: mvcos %[_to],%[_from],%[_size]\n" \ "1: xr %[rc],%[rc]\n" \ "2:\n" \ - ".pushsection .fixup, \"ax\"\n" \ - "3: lhi %[rc],%[retval]\n" \ - " jg 2b\n" \ - ".popsection\n" \ - EX_TABLE(0b,3b) EX_TABLE(1b,3b) \ + EX_TABLE_UA(0b,2b,%[rc]) EX_TABLE_UA(1b,2b,%[rc]) \ : [rc] "=&d" (__rc), [_to] "+Q" (*(to)) \ : [_size] "d" (size), [_from] "Q" (*(from)), \ - [retval] "K" (-EFAULT), [spec] "d" (oac_spec.val) \ + [spec] "d" (oac_spec.val) \ : "cc", "0"); \ __rc; \ }) @@ -178,22 +173,6 @@ static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsign return rc; } -#else /* CONFIG_HAVE_MARCH_Z10_FEATURES */ - -static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) -{ - size = raw_copy_to_user(ptr, x, size); - return size ? -EFAULT : 0; -} - -static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) -{ - size = raw_copy_from_user(x, ptr, size); - return size ? -EFAULT : 0; -} - -#endif /* CONFIG_HAVE_MARCH_Z10_FEATURES */ - /* * These are the main single-value transfer routines. They automatically * use the right size if we just have the right pointer type. @@ -289,7 +268,7 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo return __clear_user(to, n); } -int copy_to_user_real(void __user *dest, void *src, unsigned long count); +int copy_to_user_real(void __user *dest, unsigned long src, unsigned long count); void *s390_kernel_write(void *dst, const void *src, size_t size); int __noreturn __put_kernel_bad(void); @@ -302,13 +281,9 @@ int __noreturn __put_kernel_bad(void); "0: " insn " %2,%1\n" \ "1: xr %0,%0\n" \ "2:\n" \ - ".pushsection .fixup, \"ax\"\n" \ - "3: lhi %0,%3\n" \ - " jg 2b\n" \ - ".popsection\n" \ - EX_TABLE(0b,3b) EX_TABLE(1b,3b) \ + EX_TABLE_UA(0b,2b,%0) EX_TABLE_UA(1b,2b,%0) \ : "=d" (__rc), "+Q" (*(to)) \ - : "d" (val), "K" (-EFAULT) \ + : "d" (val) \ : "cc"); \ __rc; \ }) @@ -349,13 +324,9 @@ int __noreturn __get_kernel_bad(void); "0: " insn " %1,%2\n" \ "1: xr %0,%0\n" \ "2:\n" \ - ".pushsection .fixup, \"ax\"\n" \ - "3: lhi %0,%3\n" \ - " jg 2b\n" \ - ".popsection\n" \ - EX_TABLE(0b,3b) EX_TABLE(1b,3b) \ + EX_TABLE_UA(0b,2b,%0) EX_TABLE_UA(1b,2b,%0) \ : "=d" (__rc), "+d" (val) \ - : "Q" (*(from)), "K" (-EFAULT) \ + : "Q" (*(from)) \ : "cc"); \ __rc; \ }) diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h index 22fd202856bc..2f04a5499d74 100644 --- a/arch/s390/include/uapi/asm/zcrypt.h +++ b/arch/s390/include/uapi/asm/zcrypt.h @@ -288,7 +288,7 @@ struct zcrypt_device_matrix_ext { * 0x08: CEX3A * 0x0a: CEX4 * 0x0b: CEX5 - * 0x0c: CEX6 and CEX7 + * 0x0c: CEX6, CEX7 or CEX8 * 0x0d: device is disabled * * ZCRYPT_QDEPTH_MASK diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index be8007f367aa..c8d1b6aa823e 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -57,7 +57,9 @@ obj-$(CONFIG_COMPAT) += $(compat-obj-y) obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_KPROBES) += kprobes_insn_page.o -obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o +obj-$(CONFIG_KPROBES) += mcount.o +obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o +obj-$(CONFIG_FUNCTION_TRACER) += mcount.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UPROBES) += uprobes.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 8e00bb228662..7c74f0e17e5a 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -50,9 +50,7 @@ int main(void) BLANK(); /* idle data offsets */ OFFSET(__CLOCK_IDLE_ENTER, s390_idle_data, clock_idle_enter); - OFFSET(__CLOCK_IDLE_EXIT, s390_idle_data, clock_idle_exit); OFFSET(__TIMER_IDLE_ENTER, s390_idle_data, timer_idle_enter); - OFFSET(__TIMER_IDLE_EXIT, s390_idle_data, timer_idle_exit); OFFSET(__MT_CYCLES_ENTER, s390_idle_data, mt_cycles_enter); BLANK(); /* hardware defined lowcore locations 0x000 - 0x1ff */ @@ -123,14 +121,12 @@ int main(void) OFFSET(__LC_USER_ASCE, lowcore, user_asce); OFFSET(__LC_LPP, lowcore, lpp); OFFSET(__LC_CURRENT_PID, lowcore, current_pid); - OFFSET(__LC_PERCPU_OFFSET, lowcore, percpu_offset); - OFFSET(__LC_MACHINE_FLAGS, lowcore, machine_flags); - OFFSET(__LC_PREEMPT_COUNT, lowcore, preempt_count); OFFSET(__LC_GMAP, lowcore, gmap); - OFFSET(__LC_BR_R1, lowcore, br_r1_trampoline); OFFSET(__LC_LAST_BREAK, lowcore, last_break); /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ OFFSET(__LC_DUMP_REIPL, lowcore, ipib); + OFFSET(__LC_VMCORE_INFO, lowcore, vmcore_info); + OFFSET(__LC_OS_INFO, lowcore, os_info); /* hardware defined lowcore locations 0x1000 - 0x18ff */ OFFSET(__LC_MCESAD, lowcore, mcesad); OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2); diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S index d255c69c1779..172c23c8ca00 100644 --- a/arch/s390/kernel/base.S +++ b/arch/s390/kernel/base.S @@ -3,8 +3,7 @@ * arch/s390/kernel/base.S * * Copyright IBM Corp. 2006, 2007 - * Author(s): Heiko Carstens - * Michael Holzheu + * Author(s): Michael Holzheu */ #include @@ -15,18 +14,28 @@ GEN_BR_THUNK %r9 GEN_BR_THUNK %r14 +__PT_R0 = __PT_GPRS +__PT_R8 = __PT_GPRS + 64 + ENTRY(s390_base_pgm_handler) - stmg %r0,%r15,__LC_SAVE_AREA_SYNC - basr %r13,0 -0: aghi %r15,-STACK_FRAME_OVERHEAD + stmg %r8,%r15,__LC_SAVE_AREA_SYNC + aghi %r15,-(STACK_FRAME_OVERHEAD+__PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_PSW(16,%r11),__LC_PGM_OLD_PSW + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC + lgr %r2,%r11 larl %r1,s390_base_pgm_handler_fn lg %r9,0(%r1) ltgr %r9,%r9 jz 1f BASR_EX %r14,%r9 - lmg %r0,%r15,__LC_SAVE_AREA_SYNC - lpswe __LC_PGM_OLD_PSW -1: lpswe disabled_wait_psw-0b(%r13) + mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) + lpswe __LC_RETURN_PSW +1: larl %r13,disabled_wait_psw + lpswe 0(%r13) ENDPROC(s390_base_pgm_handler) .align 8 diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c index d66825e53fce..7ee3651d00ab 100644 --- a/arch/s390/kernel/cache.c +++ b/arch/s390/kernel/cache.c @@ -3,7 +3,6 @@ * Extract CPU cache information and expose them via sysfs. * * Copyright IBM Corp. 2012 - * Author(s): Heiko Carstens */ #include @@ -71,8 +70,6 @@ void show_cacheinfo(struct seq_file *m) struct cacheinfo *cache; int idx; - if (!test_facility(34)) - return; this_cpu_ci = get_cpu_cacheinfo(cpumask_any(cpu_online_mask)); for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) { cache = this_cpu_ci->info_list + idx; @@ -132,8 +129,6 @@ int init_cache_level(unsigned int cpu) union cache_topology ct; enum cache_type ctype; - if (!test_facility(34)) - return -EOPNOTSUPP; if (!this_cpu_ci) return -EINVAL; ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0); @@ -157,8 +152,6 @@ int populate_cache_leaves(unsigned int cpu) union cache_topology ct; enum cache_type ctype; - if (!test_facility(34)) - return -EOPNOTSUPP; ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0); for (idx = 0, level = 0; level < this_cpu_ci->num_levels && idx < this_cpu_ci->num_leaves; idx++, level++) { diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index cca142fbb516..eee1ad3e1b29 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -89,7 +89,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) _sigregs32 user_sregs; int i; - /* Alwys make any pending restarted system call return -EINTR */ + /* Always make any pending restarted system call return -EINTR */ current->restart_block.fn = do_no_restart_syscall; if (__copy_from_user(&user_sregs, &sregs->regs, sizeof(user_sregs))) diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index af8202121642..69819b765250 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -132,28 +132,27 @@ static inline void *load_real_addr(void *addr) /* * Copy memory of the old, dumped system to a kernel space virtual address */ -int copy_oldmem_kernel(void *dst, void *src, size_t count) +int copy_oldmem_kernel(void *dst, unsigned long src, size_t count) { - unsigned long from, len; + unsigned long len; void *ra; int rc; while (count) { - from = __pa(src); - if (!oldmem_data.start && from < sclp.hsa_size) { + if (!oldmem_data.start && src < sclp.hsa_size) { /* Copy from zfcp/nvme dump HSA area */ - len = min(count, sclp.hsa_size - from); - rc = memcpy_hsa_kernel(dst, from, len); + len = min(count, sclp.hsa_size - src); + rc = memcpy_hsa_kernel(dst, src, len); if (rc) return rc; } else { /* Check for swapped kdump oldmem areas */ - if (oldmem_data.start && from - oldmem_data.start < oldmem_data.size) { - from -= oldmem_data.start; - len = min(count, oldmem_data.size - from); - } else if (oldmem_data.start && from < oldmem_data.size) { - len = min(count, oldmem_data.size - from); - from += oldmem_data.start; + if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) { + src -= oldmem_data.start; + len = min(count, oldmem_data.size - src); + } else if (oldmem_data.start && src < oldmem_data.size) { + len = min(count, oldmem_data.size - src); + src += oldmem_data.start; } else { len = count; } @@ -163,7 +162,7 @@ int copy_oldmem_kernel(void *dst, void *src, size_t count) } else { ra = dst; } - if (memcpy_real(ra, (void *) from, len)) + if (memcpy_real(ra, src, len)) return -EFAULT; } dst += len; @@ -176,31 +175,30 @@ int copy_oldmem_kernel(void *dst, void *src, size_t count) /* * Copy memory of the old, dumped system to a user space virtual address */ -static int copy_oldmem_user(void __user *dst, void *src, size_t count) +static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count) { - unsigned long from, len; + unsigned long len; int rc; while (count) { - from = __pa(src); - if (!oldmem_data.start && from < sclp.hsa_size) { + if (!oldmem_data.start && src < sclp.hsa_size) { /* Copy from zfcp/nvme dump HSA area */ - len = min(count, sclp.hsa_size - from); - rc = memcpy_hsa_user(dst, from, len); + len = min(count, sclp.hsa_size - src); + rc = memcpy_hsa_user(dst, src, len); if (rc) return rc; } else { /* Check for swapped kdump oldmem areas */ - if (oldmem_data.start && from - oldmem_data.start < oldmem_data.size) { - from -= oldmem_data.start; - len = min(count, oldmem_data.size - from); - } else if (oldmem_data.start && from < oldmem_data.size) { - len = min(count, oldmem_data.size - from); - from += oldmem_data.start; + if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) { + src -= oldmem_data.start; + len = min(count, oldmem_data.size - src); + } else if (oldmem_data.start && src < oldmem_data.size) { + len = min(count, oldmem_data.size - src); + src += oldmem_data.start; } else { len = count; } - rc = copy_to_user_real(dst, (void *) from, count); + rc = copy_to_user_real(dst, src, count); if (rc) return rc; } @@ -217,12 +215,12 @@ static int copy_oldmem_user(void __user *dst, void *src, size_t count) ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, unsigned long offset, int userbuf) { - void *src; + unsigned long src; int rc; if (!csize) return 0; - src = (void *) (pfn << PAGE_SHIFT) + offset; + src = pfn_to_phys(pfn) + offset; if (userbuf) rc = copy_oldmem_user((void __force __user *) buf, src, csize); else @@ -429,10 +427,10 @@ static void *nt_prpsinfo(void *ptr) static void *get_vmcoreinfo_old(unsigned long *size) { char nt_name[11], *vmcoreinfo; + unsigned long addr; Elf64_Nhdr note; - void *addr; - if (copy_oldmem_kernel(&addr, &S390_lowcore.vmcore_info, sizeof(addr))) + if (copy_oldmem_kernel(&addr, __LC_VMCORE_INFO, sizeof(addr))) return NULL; memset(nt_name, 0, sizeof(nt_name)); if (copy_oldmem_kernel(¬e, addr, sizeof(note))) diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index 76a656b2146f..a778714e4d8b 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 3cdf68c53614..08cc86a0db90 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -2,7 +2,6 @@ /* * Copyright IBM Corp. 2007, 2009 * Author(s): Hongjie Yang , - * Heiko Carstens */ #define KMSG_COMPONENT "setup" @@ -18,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -149,22 +149,10 @@ static __init void setup_topology(void) topology_max_mnest = max_mnest; } -static void early_pgm_check_handler(void) +static void early_pgm_check_handler(struct pt_regs *regs) { - const struct exception_table_entry *fixup; - unsigned long cr0, cr0_new; - unsigned long addr; - - addr = S390_lowcore.program_old_psw.addr; - fixup = s390_search_extables(addr); - if (!fixup) + if (!fixup_exception(regs)) disabled_wait(); - /* Disable low address protection before storing into lowcore. */ - __ctl_store(cr0, 0, 0); - cr0_new = cr0 & ~(1UL << 28); - __ctl_load(cr0_new, 0, 0); - S390_lowcore.program_old_psw.addr = extable_fixup(fixup); - __ctl_load(cr0, 0, 0); } static noinline __init void setup_lowcore_early(void) @@ -294,6 +282,11 @@ static void __init check_image_bootable(void) disabled_wait(); } +static void __init sort_amode31_extable(void) +{ + sort_extable(__start_amode31_ex_table, __stop_amode31_ex_table); +} + void __init startup_init(void) { sclp_early_adjust_va(); @@ -302,6 +295,7 @@ void __init startup_init(void) time_early_init(); init_kernel_storage_key(); lockdep_off(); + sort_amode31_extable(); setup_lowcore_early(); setup_facility_list(); detect_machine_type(); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 01bae1d51113..a601a518b569 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -6,11 +6,11 @@ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Hartmut Penner (hp@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), - * Heiko Carstens */ #include #include +#include #include #include #include @@ -98,11 +98,6 @@ _LPP_OFFSET = __LC_LPP #endif .endm - .macro STCK savearea - ALTERNATIVE ".insn s,0xb2050000,\savearea", \ - ".insn s,0xb27c0000,\savearea", 25 - .endm - /* * The TSTMSK macro generates a test-under-mask instruction by * calculating the memory offset for the specified mask value. @@ -191,7 +186,6 @@ _LPP_OFFSET = __LC_LPP #endif GEN_BR_THUNK %r14 - GEN_BR_THUNK %r14,%r13 .section .kprobes.text, "ax" .Ldummy: @@ -232,7 +226,7 @@ ENTRY(__switch_to) aghi %r3,__TASK_pid mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task - ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40 + ALTERNATIVE "", "lpp _LPP_OFFSET", 40 BR_EX %r14 ENDPROC(__switch_to) @@ -443,7 +437,7 @@ ENDPROC(pgm_check_handler) */ .macro INT_HANDLER name,lc_old_psw,handler ENTRY(\name) - STCK __LC_INT_CLOCK + stckf __LC_INT_CLOCK stpt __LC_SYS_ENTER_TIMER STBEAR __LC_LAST_BREAK BPOFF @@ -515,7 +509,7 @@ ENTRY(psw_idle) .Lpsw_idle_stcctm: oi __LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT BPON - STCK __CLOCK_IDLE_ENTER(%r2) + stckf __CLOCK_IDLE_ENTER(%r2) stpt __TIMER_IDLE_ENTER(%r2) lpswe __SF_EMPTY(%r15) .globl psw_idle_exit @@ -527,7 +521,7 @@ ENDPROC(psw_idle) * Machine check handler routines */ ENTRY(mcck_int_handler) - STCK __LC_MCCK_CLOCK + stckf __LC_MCCK_CLOCK BPOFF la %r1,4095 # validate r1 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # validate cpu timer @@ -654,7 +648,7 @@ ENTRY(mcck_int_handler) ENDPROC(mcck_int_handler) ENTRY(restart_int_handler) - ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40 + ALTERNATIVE "", "lpp _LPP_OFFSET", 40 stg %r15,__LC_SAVE_AREA_RESTART TSTMSK __LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4 jz 0f diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 6083090be1f4..56e5e3712fbb 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 89c0870d5679..1852d46babb1 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -4,8 +4,7 @@ * * Copyright IBM Corp. 2009,2014 * - * Author(s): Heiko Carstens , - * Martin Schwidefsky + * Author(s): Martin Schwidefsky */ #include @@ -59,20 +58,11 @@ asm( ); #ifdef CONFIG_EXPOLINE -asm( - " .align 16\n" - "ftrace_shared_hotpatch_trampoline_ex:\n" - " lmg %r0,%r1,2(%r1)\n" - " ex %r0," __stringify(__LC_BR_R1) "(%r0)\n" - " j .\n" - "ftrace_shared_hotpatch_trampoline_ex_end:\n" -); - asm( " .align 16\n" "ftrace_shared_hotpatch_trampoline_exrl:\n" " lmg %r0,%r1,2(%r1)\n" - " .insn ril,0xc60000000000,%r0,0f\n" /* exrl */ + " exrl %r0,0f\n" " j .\n" "0: br %r1\n" "ftrace_shared_hotpatch_trampoline_exrl_end:\n" @@ -91,12 +81,8 @@ static const char *ftrace_shared_hotpatch_trampoline(const char **end) tend = ftrace_shared_hotpatch_trampoline_br_end; #ifdef CONFIG_EXPOLINE if (!nospec_disable) { - tstart = ftrace_shared_hotpatch_trampoline_ex; - tend = ftrace_shared_hotpatch_trampoline_ex_end; - if (test_facility(35)) { /* exrl */ - tstart = ftrace_shared_hotpatch_trampoline_exrl; - tend = ftrace_shared_hotpatch_trampoline_exrl_end; - } + tstart = ftrace_shared_hotpatch_trampoline_exrl; + tend = ftrace_shared_hotpatch_trampoline_exrl_end; } #endif /* CONFIG_EXPOLINE */ if (end) @@ -194,25 +180,26 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, return 0; } -static void brcl_disable(void *brcl) +static int ftrace_patch_branch_mask(void *addr, u16 expected, bool enable) { - u8 op = 0x04; /* set mask field to zero */ + u16 old; + u8 op; - s390_kernel_write((char *)brcl + 1, &op, sizeof(op)); + if (get_kernel_nofault(old, addr)) + return -EFAULT; + if (old != expected) + return -EINVAL; + /* set mask field to all ones or zeroes */ + op = enable ? 0xf4 : 0x04; + s390_kernel_write((char *)addr + 1, &op, sizeof(op)); + return 0; } int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - brcl_disable((void *)rec->ip); - return 0; -} - -static void brcl_enable(void *brcl) -{ - u8 op = 0xf4; /* set mask field to all ones */ - - s390_kernel_write((char *)brcl + 1, &op, sizeof(op)); + /* Expect brcl 0xf,... */ + return ftrace_patch_branch_mask((void *)rec->ip, 0xc0f4, false); } int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) @@ -223,8 +210,8 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) if (IS_ERR(trampoline)) return PTR_ERR(trampoline); s390_kernel_write(&trampoline->interceptor, &addr, sizeof(addr)); - brcl_enable((void *)rec->ip); - return 0; + /* Expect brcl 0x0,... */ + return ftrace_patch_branch_mask((void *)rec->ip, 0xc004, true); } int ftrace_update_ftrace_func(ftrace_func_t func) @@ -297,14 +284,24 @@ NOKPROBE_SYMBOL(prepare_ftrace_return); */ int ftrace_enable_ftrace_graph_caller(void) { - brcl_disable(ftrace_graph_caller); + int rc; + + /* Expect brc 0xf,... */ + rc = ftrace_patch_branch_mask(ftrace_graph_caller, 0xa7f4, false); + if (rc) + return rc; text_poke_sync_lock(); return 0; } int ftrace_disable_ftrace_graph_caller(void) { - brcl_enable(ftrace_graph_caller); + int rc; + + /* Expect brc 0x0,... */ + rc = ftrace_patch_branch_mask(ftrace_graph_caller, 0xa704, true); + if (rc) + return rc; text_poke_sync_lock(); return 0; } diff --git a/arch/s390/kernel/ftrace.h b/arch/s390/kernel/ftrace.h index 69e416f4c6b0..7f75a9616406 100644 --- a/arch/s390/kernel/ftrace.h +++ b/arch/s390/kernel/ftrace.h @@ -16,8 +16,6 @@ extern struct ftrace_hotpatch_trampoline __ftrace_hotpatch_trampolines_start[]; extern struct ftrace_hotpatch_trampoline __ftrace_hotpatch_trampolines_end[]; extern const char ftrace_shared_hotpatch_trampoline_br[]; extern const char ftrace_shared_hotpatch_trampoline_br_end[]; -extern const char ftrace_shared_hotpatch_trampoline_ex[]; -extern const char ftrace_shared_hotpatch_trampoline_ex_end[]; extern const char ftrace_shared_hotpatch_trampoline_exrl[]; extern const char ftrace_shared_hotpatch_trampoline_exrl_end[]; extern const char ftrace_plt_template[]; diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 42f9a325a257..d7b8b6ad574d 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -5,7 +5,6 @@ * Author(s): Hartmut Penner * Martin Schwidefsky * Rob van der Heij - * Heiko Carstens * */ diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 5ad1dde23dc5..28ae7df26c4a 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -4,7 +4,6 @@ * * Copyright IBM Corp. 2005, 2012 * Author(s): Michael Holzheu - * Heiko Carstens * Volker Sameske */ @@ -20,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index cb7099682340..3033f616e256 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -342,7 +342,7 @@ static irqreturn_t do_ext_interrupt(int irq, void *dummy) struct ext_int_info *p; int index; - ext_code = *(struct ext_code *) ®s->int_code; + ext_code.int_code = regs->int_code; if (ext_code.code != EXT_IRQ_CLK_COMP) set_cpu_flag(CIF_NOHZ_DELAY); diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index e27a7d3b0364..e32c14fd1282 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -372,33 +372,26 @@ static int kprobe_handler(struct pt_regs *regs) } NOKPROBE_SYMBOL(kprobe_handler); -/* - * Function return probe trampoline: - * - init_kprobes() establishes a probepoint here - * - When the probed function returns, this probe - * causes the handlers to fire - */ -static void __used kretprobe_trampoline_holder(void) +void arch_kretprobe_fixup_return(struct pt_regs *regs, + kprobe_opcode_t *correct_ret_addr) { - asm volatile(".global __kretprobe_trampoline\n" - "__kretprobe_trampoline: bcr 0,0\n"); + /* Replace fake return address with real one. */ + regs->gprs[14] = (unsigned long)correct_ret_addr; } +NOKPROBE_SYMBOL(arch_kretprobe_fixup_return); /* - * Called when the probe at kretprobe trampoline is hit + * Called from __kretprobe_trampoline */ -static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +void trampoline_probe_handler(struct pt_regs *regs) { - regs->psw.addr = __kretprobe_trampoline_handler(regs, NULL); - /* - * By returning a non-zero value, we are telling - * kprobe_handler() that we don't want the post_handler - * to run (and have re-enabled preemption) - */ - return 1; + kretprobe_trampoline_handler(regs, NULL); } NOKPROBE_SYMBOL(trampoline_probe_handler); +/* assembler function that handles the kretprobes must not be probed itself */ +NOKPROBE_SYMBOL(__kretprobe_trampoline); + /* * Called after single-stepping. p->addr is the address of the * instruction whose first byte has been replaced by the "breakpoint" @@ -465,7 +458,6 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); struct kprobe *p = kprobe_running(); - const struct exception_table_entry *entry; switch(kcb->kprobe_status) { case KPROBE_HIT_SS: @@ -487,10 +479,8 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr) * In case the user-specified fault handler returned * zero, try to fix up. */ - entry = s390_search_extables(regs->psw.addr); - if (entry && ex_handle(entry, regs)) + if (fixup_exception(regs)) return 1; - /* * fixup_exception() could not handle it, * Let do_page_fault() fix it. @@ -554,18 +544,13 @@ int kprobe_exceptions_notify(struct notifier_block *self, } NOKPROBE_SYMBOL(kprobe_exceptions_notify); -static struct kprobe trampoline = { - .addr = (kprobe_opcode_t *) &__kretprobe_trampoline, - .pre_handler = trampoline_probe_handler -}; - int __init arch_init_kprobes(void) { - return register_kprobe(&trampoline); + return 0; } int arch_trampoline_kprobe(struct kprobe *p) { - return p->addr == (kprobe_opcode_t *) &__kretprobe_trampoline; + return 0; } NOKPROBE_SYMBOL(arch_trampoline_kprobe); diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c index 3b895971c3d0..6652e54cf3db 100644 --- a/arch/s390/kernel/lgr.c +++ b/arch/s390/kernel/lgr.c @@ -88,8 +88,7 @@ static void lgr_stsi_2_2_2(struct lgr_info *lgr_info) if (stsi(si, 2, 2, 2)) return; cpascii(lgr_info->name, si->name, sizeof(si->name)); - memcpy(&lgr_info->lpar_number, &si->lpar_number, - sizeof(lgr_info->lpar_number)); + lgr_info->lpar_number = si->lpar_number; } /* diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index a16467b3825e..088d57a3083f 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -3,7 +3,6 @@ * Copyright IBM Corp. 2005, 2011 * * Author(s): Rolf Adelsberger, - * Heiko Carstens * Michael Holzheu */ diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index a24177dcd12a..4786bfe02144 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -2,8 +2,6 @@ /* * Copyright IBM Corp. 2008, 2009 * - * Author(s): Heiko Carstens , - * */ #include @@ -13,6 +11,18 @@ #include #include + +#define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE) +#define STACK_PTREGS (STACK_FRAME_OVERHEAD) +#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) +#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) +#define STACK_PTREGS_ORIG_GPR2 (STACK_PTREGS + __PT_ORIG_GPR2) +#define STACK_PTREGS_FLAGS (STACK_PTREGS + __PT_FLAGS) +/* packed stack: allocate just enough for r14, r15 and backchain */ +#define TRACED_FUNC_FRAME_SIZE 24 + +#ifdef CONFIG_FUNCTION_TRACER + GEN_BR_THUNK %r1 GEN_BR_THUNK %r14 @@ -22,26 +32,14 @@ ENTRY(ftrace_stub) BR_EX %r14 ENDPROC(ftrace_stub) -#define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE) -#define STACK_PTREGS (STACK_FRAME_OVERHEAD) -#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) -#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) -#define STACK_PTREGS_ORIG_GPR2 (STACK_PTREGS + __PT_ORIG_GPR2) -#define STACK_PTREGS_FLAGS (STACK_PTREGS + __PT_FLAGS) -#ifdef __PACK_STACK -/* allocate just enough for r14, r15 and backchain */ -#define TRACED_FUNC_FRAME_SIZE 24 -#else -#define TRACED_FUNC_FRAME_SIZE STACK_FRAME_OVERHEAD -#endif - .macro ftrace_regs_entry, allregs=0 stg %r14,(__SF_GPRS+8*8)(%r15) # save traced function caller .if \allregs == 1 - lghi %r14,0 # save condition code - ipm %r14 # don't put any instructions - sllg %r14,%r14,16 # clobbering CC before this point + # save psw mask + # don't put any instructions clobbering CC before this point + epsw %r1,%r14 + risbg %r14,%r1,0,31,32 .endif lgr %r1,%r15 @@ -57,13 +55,7 @@ ENDPROC(ftrace_stub) .if \allregs == 1 stg %r14,(STACK_PTREGS_PSW)(%r15) - stosm (STACK_PTREGS_PSW)(%r15),0 -#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES mvghi STACK_PTREGS_FLAGS(%r15),_PIF_FTRACE_FULL_REGS -#else - lghi %r14,_PIF_FTRACE_FULL_REGS - stg %r14,STACK_PTREGS_FLAGS(%r15) -#endif .else xc STACK_PTREGS_FLAGS(8,%r15),STACK_PTREGS_FLAGS(%r15) .endif @@ -141,3 +133,35 @@ SYM_FUNC_START(return_to_handler) SYM_FUNC_END(return_to_handler) #endif +#endif /* CONFIG_FUNCTION_TRACER */ + +#ifdef CONFIG_KPROBES + +SYM_FUNC_START(__kretprobe_trampoline) + + stg %r14,(__SF_GPRS+8*8)(%r15) + lay %r15,-STACK_FRAME_SIZE(%r15) + stmg %r0,%r14,STACK_PTREGS_GPRS(%r15) + + # store original stack pointer in backchain and pt_regs + lay %r7,STACK_FRAME_SIZE(%r15) + stg %r7,__SF_BACKCHAIN(%r15) + stg %r7,STACK_PTREGS_GPRS+(15*8)(%r15) + + # store full psw + epsw %r2,%r3 + risbg %r3,%r2,0,31,32 + stg %r3,STACK_PTREGS_PSW(%r15) + larl %r1,__kretprobe_trampoline + stg %r1,STACK_PTREGS_PSW+8(%r15) + + lay %r2,STACK_PTREGS(%r15) + brasl %r14,trampoline_probe_handler + + mvc __SF_EMPTY(16,%r7),STACK_PTREGS_PSW(%r15) + lmg %r0,%r15,STACK_PTREGS_GPRS(%r15) + lpswe __SF_EMPTY(%r15) + +SYM_FUNC_END(__kretprobe_trampoline) + +#endif /* CONFIG_KPROBES */ diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index b032e556eeb7..c0dd72db77b8 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -517,15 +517,9 @@ int module_finalize(const Elf_Ehdr *hdr, ij = me->core_layout.base + me->arch.plt_offset + me->arch.plt_size - PLT_ENTRY_SIZE; - if (test_facility(35)) { - ij[0] = 0xc6000000; /* exrl %r0,.+10 */ - ij[1] = 0x0005a7f4; /* j . */ - ij[2] = 0x000007f1; /* br %r1 */ - } else { - ij[0] = 0x44000000 | (unsigned int) - offsetof(struct lowcore, br_r1_trampoline); - ij[1] = 0xa7f40000; /* j . */ - } + ij[0] = 0xc6000000; /* exrl %r0,.+10 */ + ij[1] = 0x0005a7f4; /* j . */ + ij[2] = 0x000007f1; /* br %r1 */ } secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 651a51914e34..fc60e29b8690 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -6,7 +6,6 @@ * Author(s): Ingo Adlung , * Martin Schwidefsky , * Cornelia Huck , - * Heiko Carstens , */ #include diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 60e6fec27bba..717bbcc056e5 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -105,6 +105,7 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end) s32 *epo; /* Second part of the instruction replace is always a nop */ + memcpy(insnbuf + 2, branch, sizeof(branch)); for (epo = start; epo < end; epo++) { instr = (u8 *) epo + *epo; if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x04) @@ -117,42 +118,20 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end) if (thunk[0] == 0xc6 && thunk[1] == 0x00) /* exrl %r0, */ br = thunk + (*(int *)(thunk + 2)) * 2; - else if (thunk[0] == 0xc0 && (thunk[1] & 0x0f) == 0x00 && - thunk[6] == 0x44 && thunk[7] == 0x00 && - (thunk[8] & 0x0f) == 0x00 && thunk[9] == 0x00 && - (thunk[1] & 0xf0) == (thunk[8] & 0xf0)) - /* larl %rx, + ex %r0,0(%rx) */ - br = thunk + (*(int *)(thunk + 2)) * 2; else continue; - /* Check for unconditional branch 0x07f? or 0x47f???? */ - if ((br[0] & 0xbf) != 0x07 || (br[1] & 0xf0) != 0xf0) + if (br[0] != 0x07 || (br[1] & 0xf0) != 0xf0) continue; - - memcpy(insnbuf + 2, branch, sizeof(branch)); switch (type) { case BRCL_EXPOLINE: + /* brcl to thunk, replace with br + nop */ insnbuf[0] = br[0]; insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f); - if (br[0] == 0x47) { - /* brcl to b, replace with bc + nopr */ - insnbuf[2] = br[2]; - insnbuf[3] = br[3]; - } else { - /* brcl to br, replace with bcr + nop */ - } break; case BRASL_EXPOLINE: + /* brasl to thunk, replace with basr + nop */ + insnbuf[0] = 0x0d; insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f); - if (br[0] == 0x47) { - /* brasl to b, replace with bas + nopr */ - insnbuf[0] = 0x4d; - insnbuf[2] = br[2]; - insnbuf[3] = br[3]; - } else { - /* brasl to br, replace with basr + nop */ - insnbuf[0] = 0x0d; - } break; } diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c index 4bef35b79b93..6b5b64e67eee 100644 --- a/arch/s390/kernel/os_info.c +++ b/arch/s390/kernel/os_info.c @@ -15,6 +15,7 @@ #include #include #include +#include /* * OS info structure has to be page aligned @@ -45,7 +46,7 @@ void os_info_crashkernel_add(unsigned long base, unsigned long size) */ void os_info_entry_add(int nr, void *ptr, u64 size) { - os_info.entry[nr].addr = (u64)(unsigned long)ptr; + os_info.entry[nr].addr = __pa(ptr); os_info.entry[nr].size = size; os_info.entry[nr].csum = (__force u32)csum_partial(ptr, size, 0); os_info.csum = os_info_csum(&os_info); @@ -62,7 +63,7 @@ void __init os_info_init(void) os_info.version_minor = OS_INFO_VERSION_MINOR; os_info.magic = OS_INFO_MAGIC; os_info.csum = os_info_csum(&os_info); - mem_assign_absolute(S390_lowcore.os_info, (unsigned long) ptr); + mem_assign_absolute(S390_lowcore.os_info, __pa(ptr)); } #ifdef CONFIG_CRASH_DUMP @@ -90,7 +91,7 @@ static void os_info_old_alloc(int nr, int align) goto fail; } buf_align = PTR_ALIGN(buf, align); - if (copy_oldmem_kernel(buf_align, (void *) addr, size)) { + if (copy_oldmem_kernel(buf_align, addr, size)) { msg = "copy failed"; goto fail_free; } @@ -123,15 +124,14 @@ static void os_info_old_init(void) return; if (!oldmem_data.start) goto fail; - if (copy_oldmem_kernel(&addr, &S390_lowcore.os_info, sizeof(addr))) + if (copy_oldmem_kernel(&addr, __LC_OS_INFO, sizeof(addr))) goto fail; if (addr == 0 || addr % PAGE_SIZE) goto fail; os_info_old = kzalloc(sizeof(*os_info_old), GFP_KERNEL); if (!os_info_old) goto fail; - if (copy_oldmem_kernel(os_info_old, (void *) addr, - sizeof(*os_info_old))) + if (copy_oldmem_kernel(os_info_old, addr, sizeof(*os_info_old))) goto fail_free; if (os_info_old->magic != OS_INFO_MAGIC) goto fail_free; diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index ee8707abdb6a..483ab5e10164 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -1451,6 +1451,8 @@ static size_t cfdiag_maxsize(struct cpumf_ctr_info *info) /* Get the CPU speed, try sampling facility first and CPU attributes second. */ static void cfdiag_get_cpu_speed(void) { + unsigned long mhz; + if (cpum_sf_avail()) { /* Sampling facility first */ struct hws_qsi_info_block si; @@ -1464,12 +1466,9 @@ static void cfdiag_get_cpu_speed(void) /* Fallback: CPU speed extract static part. Used in case * CPU Measurement Sampling Facility is turned off. */ - if (test_facility(34)) { - unsigned long mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0); - - if (mhz != -1UL) - cfdiag_cpu_speed = mhz & 0xffffffff; - } + mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0); + if (mhz != -1UL) + cfdiag_cpu_speed = mhz & 0xffffffff; } static int cfset_init(void) diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index d9d4a806979e..7a74ea5f7531 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -172,8 +172,7 @@ static void show_cpu_summary(struct seq_file *m, void *v) static int __init setup_hwcaps(void) { /* instructions named N3, "backported" to esa-mode */ - if (test_facility(0)) - elf_hwcap |= HWCAP_ESAN3; + elf_hwcap |= HWCAP_ESAN3; /* z/Architecture mode active */ elf_hwcap |= HWCAP_ZARCH; @@ -191,8 +190,7 @@ static int __init setup_hwcaps(void) elf_hwcap |= HWCAP_LDISP; /* extended-immediate */ - if (test_facility(21)) - elf_hwcap |= HWCAP_EIMM; + elf_hwcap |= HWCAP_EIMM; /* extended-translation facility 3 enhancement */ if (test_facility(22) && test_facility(30)) @@ -262,21 +260,7 @@ static int __init setup_elf_platform(void) get_cpu_id(&cpu_id); add_device_randomness(&cpu_id, sizeof(cpu_id)); switch (cpu_id.machine) { - case 0x2064: - case 0x2066: - default: /* Use "z900" as default for 64 bit kernels. */ - strcpy(elf_platform, "z900"); - break; - case 0x2084: - case 0x2086: - strcpy(elf_platform, "z990"); - break; - case 0x2094: - case 0x2096: - strcpy(elf_platform, "z9-109"); - break; - case 0x2097: - case 0x2098: + default: /* Use "z10" as default. */ strcpy(elf_platform, "z10"); break; case 0x2817: diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 0ea3d02b378d..ed3439515bb2 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -147,38 +147,36 @@ void ptrace_disable(struct task_struct *task) static inline unsigned long __peek_user_per(struct task_struct *child, addr_t addr) { - struct per_struct_kernel *dummy = NULL; - - if (addr == (addr_t) &dummy->cr9) + if (addr == offsetof(struct per_struct_kernel, cr9)) /* Control bits of the active per set. */ return test_thread_flag(TIF_SINGLE_STEP) ? PER_EVENT_IFETCH : child->thread.per_user.control; - else if (addr == (addr_t) &dummy->cr10) + else if (addr == offsetof(struct per_struct_kernel, cr10)) /* Start address of the active per set. */ return test_thread_flag(TIF_SINGLE_STEP) ? 0 : child->thread.per_user.start; - else if (addr == (addr_t) &dummy->cr11) + else if (addr == offsetof(struct per_struct_kernel, cr11)) /* End address of the active per set. */ return test_thread_flag(TIF_SINGLE_STEP) ? -1UL : child->thread.per_user.end; - else if (addr == (addr_t) &dummy->bits) + else if (addr == offsetof(struct per_struct_kernel, bits)) /* Single-step bit. */ return test_thread_flag(TIF_SINGLE_STEP) ? (1UL << (BITS_PER_LONG - 1)) : 0; - else if (addr == (addr_t) &dummy->starting_addr) + else if (addr == offsetof(struct per_struct_kernel, starting_addr)) /* Start address of the user specified per set. */ return child->thread.per_user.start; - else if (addr == (addr_t) &dummy->ending_addr) + else if (addr == offsetof(struct per_struct_kernel, ending_addr)) /* End address of the user specified per set. */ return child->thread.per_user.end; - else if (addr == (addr_t) &dummy->perc_atmid) + else if (addr == offsetof(struct per_struct_kernel, perc_atmid)) /* PER code, ATMID and AI of the last PER trap */ return (unsigned long) child->thread.per_event.cause << (BITS_PER_LONG - 16); - else if (addr == (addr_t) &dummy->address) + else if (addr == offsetof(struct per_struct_kernel, address)) /* Address of the last PER trap */ return child->thread.per_event.address; - else if (addr == (addr_t) &dummy->access_id) + else if (addr == offsetof(struct per_struct_kernel, access_id)) /* Access id of the last PER trap */ return (unsigned long) child->thread.per_event.paid << (BITS_PER_LONG - 8); @@ -196,61 +194,60 @@ static inline unsigned long __peek_user_per(struct task_struct *child, */ static unsigned long __peek_user(struct task_struct *child, addr_t addr) { - struct user *dummy = NULL; addr_t offset, tmp; - if (addr < (addr_t) &dummy->regs.acrs) { + if (addr < offsetof(struct user, regs.acrs)) { /* * psw and gprs are stored on the stack */ tmp = *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr); - if (addr == (addr_t) &dummy->regs.psw.mask) { + if (addr == offsetof(struct user, regs.psw.mask)) { /* Return a clean psw mask. */ tmp &= PSW_MASK_USER | PSW_MASK_RI; tmp |= PSW_USER_BITS; } - } else if (addr < (addr_t) &dummy->regs.orig_gpr2) { + } else if (addr < offsetof(struct user, regs.orig_gpr2)) { /* * access registers are stored in the thread structure */ - offset = addr - (addr_t) &dummy->regs.acrs; + offset = addr - offsetof(struct user, regs.acrs); /* * Very special case: old & broken 64 bit gdb reading * from acrs[15]. Result is a 64 bit value. Read the * 32 bit acrs[15] value and shift it by 32. Sick... */ - if (addr == (addr_t) &dummy->regs.acrs[15]) + if (addr == offsetof(struct user, regs.acrs[15])) tmp = ((unsigned long) child->thread.acrs[15]) << 32; else tmp = *(addr_t *)((addr_t) &child->thread.acrs + offset); - } else if (addr == (addr_t) &dummy->regs.orig_gpr2) { + } else if (addr == offsetof(struct user, regs.orig_gpr2)) { /* * orig_gpr2 is stored on the kernel stack */ tmp = (addr_t) task_pt_regs(child)->orig_gpr2; - } else if (addr < (addr_t) &dummy->regs.fp_regs) { + } else if (addr < offsetof(struct user, regs.fp_regs)) { /* * prevent reads of padding hole between * orig_gpr2 and fp_regs on s390. */ tmp = 0; - } else if (addr == (addr_t) &dummy->regs.fp_regs.fpc) { + } else if (addr == offsetof(struct user, regs.fp_regs.fpc)) { /* * floating point control reg. is in the thread structure */ tmp = child->thread.fpu.fpc; tmp <<= BITS_PER_LONG - 32; - } else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) { + } else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) { /* * floating point regs. are either in child->thread.fpu * or the child->thread.fpu.vxrs array */ - offset = addr - (addr_t) &dummy->regs.fp_regs.fprs; + offset = addr - offsetof(struct user, regs.fp_regs.fprs); if (MACHINE_HAS_VX) tmp = *(addr_t *) ((addr_t) child->thread.fpu.vxrs + 2*offset); @@ -258,11 +255,11 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr) tmp = *(addr_t *) ((addr_t) child->thread.fpu.fprs + offset); - } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) { + } else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) { /* * Handle access to the per_info structure. */ - addr -= (addr_t) &dummy->regs.per_info; + addr -= offsetof(struct user, regs.per_info); tmp = __peek_user_per(child, addr); } else @@ -281,8 +278,8 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data) * an alignment of 4. Programmers from hell... */ mask = __ADDR_MASK; - if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs && - addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2) + if (addr >= offsetof(struct user, regs.acrs) && + addr < offsetof(struct user, regs.orig_gpr2)) mask = 3; if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK) return -EIO; @@ -294,8 +291,6 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data) static inline void __poke_user_per(struct task_struct *child, addr_t addr, addr_t data) { - struct per_struct_kernel *dummy = NULL; - /* * There are only three fields in the per_info struct that the * debugger user can write to. @@ -308,14 +303,14 @@ static inline void __poke_user_per(struct task_struct *child, * addresses are used only if single stepping is not in effect. * Writes to any other field in per_info are ignored. */ - if (addr == (addr_t) &dummy->cr9) + if (addr == offsetof(struct per_struct_kernel, cr9)) /* PER event mask of the user specified per set. */ child->thread.per_user.control = data & (PER_EVENT_MASK | PER_CONTROL_MASK); - else if (addr == (addr_t) &dummy->starting_addr) + else if (addr == offsetof(struct per_struct_kernel, starting_addr)) /* Starting address of the user specified per set. */ child->thread.per_user.start = data; - else if (addr == (addr_t) &dummy->ending_addr) + else if (addr == offsetof(struct per_struct_kernel, ending_addr)) /* Ending address of the user specified per set. */ child->thread.per_user.end = data; } @@ -328,16 +323,15 @@ static inline void __poke_user_per(struct task_struct *child, */ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) { - struct user *dummy = NULL; addr_t offset; - if (addr < (addr_t) &dummy->regs.acrs) { + if (addr < offsetof(struct user, regs.acrs)) { struct pt_regs *regs = task_pt_regs(child); /* * psw and gprs are stored on the stack */ - if (addr == (addr_t) &dummy->regs.psw.mask) { + if (addr == offsetof(struct user, regs.psw.mask)) { unsigned long mask = PSW_MASK_USER; mask |= is_ri_task(child) ? PSW_MASK_RI : 0; @@ -359,36 +353,36 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) regs->int_code = 0x20000 | (data & 0xffff); } *(addr_t *)((addr_t) ®s->psw + addr) = data; - } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { + } else if (addr < offsetof(struct user, regs.orig_gpr2)) { /* * access registers are stored in the thread structure */ - offset = addr - (addr_t) &dummy->regs.acrs; + offset = addr - offsetof(struct user, regs.acrs); /* * Very special case: old & broken 64 bit gdb writing * to acrs[15] with a 64 bit value. Ignore the lower * half of the value and write the upper 32 bit to * acrs[15]. Sick... */ - if (addr == (addr_t) &dummy->regs.acrs[15]) + if (addr == offsetof(struct user, regs.acrs[15])) child->thread.acrs[15] = (unsigned int) (data >> 32); else *(addr_t *)((addr_t) &child->thread.acrs + offset) = data; - } else if (addr == (addr_t) &dummy->regs.orig_gpr2) { + } else if (addr == offsetof(struct user, regs.orig_gpr2)) { /* * orig_gpr2 is stored on the kernel stack */ task_pt_regs(child)->orig_gpr2 = data; - } else if (addr < (addr_t) &dummy->regs.fp_regs) { + } else if (addr < offsetof(struct user, regs.fp_regs)) { /* * prevent writes of padding hole between * orig_gpr2 and fp_regs on s390. */ return 0; - } else if (addr == (addr_t) &dummy->regs.fp_regs.fpc) { + } else if (addr == offsetof(struct user, regs.fp_regs.fpc)) { /* * floating point control reg. is in the thread structure */ @@ -397,12 +391,12 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) return -EINVAL; child->thread.fpu.fpc = data >> (BITS_PER_LONG - 32); - } else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) { + } else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) { /* * floating point regs. are either in child->thread.fpu * or the child->thread.fpu.vxrs array */ - offset = addr - (addr_t) &dummy->regs.fp_regs.fprs; + offset = addr - offsetof(struct user, regs.fp_regs.fprs); if (MACHINE_HAS_VX) *(addr_t *)((addr_t) child->thread.fpu.vxrs + 2*offset) = data; @@ -410,11 +404,11 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) *(addr_t *)((addr_t) child->thread.fpu.fprs + offset) = data; - } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) { + } else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) { /* * Handle access to the per_info structure. */ - addr -= (addr_t) &dummy->regs.per_info; + addr -= offsetof(struct user, regs.per_info); __poke_user_per(child, addr, data); } @@ -431,8 +425,8 @@ static int poke_user(struct task_struct *child, addr_t addr, addr_t data) * an alignment of 4. Programmers from hell indeed... */ mask = __ADDR_MASK; - if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs && - addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2) + if (addr >= offsetof(struct user, regs.acrs) && + addr < offsetof(struct user, regs.orig_gpr2)) mask = 3; if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK) return -EIO; @@ -540,37 +534,35 @@ long arch_ptrace(struct task_struct *child, long request, static inline __u32 __peek_user_per_compat(struct task_struct *child, addr_t addr) { - struct compat_per_struct_kernel *dummy32 = NULL; - - if (addr == (addr_t) &dummy32->cr9) + if (addr == offsetof(struct compat_per_struct_kernel, cr9)) /* Control bits of the active per set. */ return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? PER_EVENT_IFETCH : child->thread.per_user.control; - else if (addr == (addr_t) &dummy32->cr10) + else if (addr == offsetof(struct compat_per_struct_kernel, cr10)) /* Start address of the active per set. */ return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? 0 : child->thread.per_user.start; - else if (addr == (addr_t) &dummy32->cr11) + else if (addr == offsetof(struct compat_per_struct_kernel, cr11)) /* End address of the active per set. */ return test_thread_flag(TIF_SINGLE_STEP) ? PSW32_ADDR_INSN : child->thread.per_user.end; - else if (addr == (addr_t) &dummy32->bits) + else if (addr == offsetof(struct compat_per_struct_kernel, bits)) /* Single-step bit. */ return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? 0x80000000 : 0; - else if (addr == (addr_t) &dummy32->starting_addr) + else if (addr == offsetof(struct compat_per_struct_kernel, starting_addr)) /* Start address of the user specified per set. */ return (__u32) child->thread.per_user.start; - else if (addr == (addr_t) &dummy32->ending_addr) + else if (addr == offsetof(struct compat_per_struct_kernel, ending_addr)) /* End address of the user specified per set. */ return (__u32) child->thread.per_user.end; - else if (addr == (addr_t) &dummy32->perc_atmid) + else if (addr == offsetof(struct compat_per_struct_kernel, perc_atmid)) /* PER code, ATMID and AI of the last PER trap */ return (__u32) child->thread.per_event.cause << 16; - else if (addr == (addr_t) &dummy32->address) + else if (addr == offsetof(struct compat_per_struct_kernel, address)) /* Address of the last PER trap */ return (__u32) child->thread.per_event.address; - else if (addr == (addr_t) &dummy32->access_id) + else if (addr == offsetof(struct compat_per_struct_kernel, access_id)) /* Access id of the last PER trap */ return (__u32) child->thread.per_event.paid << 24; return 0; @@ -581,21 +573,20 @@ static inline __u32 __peek_user_per_compat(struct task_struct *child, */ static u32 __peek_user_compat(struct task_struct *child, addr_t addr) { - struct compat_user *dummy32 = NULL; addr_t offset; __u32 tmp; - if (addr < (addr_t) &dummy32->regs.acrs) { + if (addr < offsetof(struct compat_user, regs.acrs)) { struct pt_regs *regs = task_pt_regs(child); /* * psw and gprs are stored on the stack */ - if (addr == (addr_t) &dummy32->regs.psw.mask) { + if (addr == offsetof(struct compat_user, regs.psw.mask)) { /* Fake a 31 bit psw mask. */ tmp = (__u32)(regs->psw.mask >> 32); tmp &= PSW32_MASK_USER | PSW32_MASK_RI; tmp |= PSW32_USER_BITS; - } else if (addr == (addr_t) &dummy32->regs.psw.addr) { + } else if (addr == offsetof(struct compat_user, regs.psw.addr)) { /* Fake a 31 bit psw address. */ tmp = (__u32) regs->psw.addr | (__u32)(regs->psw.mask & PSW_MASK_BA); @@ -603,38 +594,38 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr) /* gpr 0-15 */ tmp = *(__u32 *)((addr_t) ®s->psw + addr*2 + 4); } - } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) { + } else if (addr < offsetof(struct compat_user, regs.orig_gpr2)) { /* * access registers are stored in the thread structure */ - offset = addr - (addr_t) &dummy32->regs.acrs; + offset = addr - offsetof(struct compat_user, regs.acrs); tmp = *(__u32*)((addr_t) &child->thread.acrs + offset); - } else if (addr == (addr_t) (&dummy32->regs.orig_gpr2)) { + } else if (addr == offsetof(struct compat_user, regs.orig_gpr2)) { /* * orig_gpr2 is stored on the kernel stack */ tmp = *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4); - } else if (addr < (addr_t) &dummy32->regs.fp_regs) { + } else if (addr < offsetof(struct compat_user, regs.fp_regs)) { /* * prevent reads of padding hole between * orig_gpr2 and fp_regs on s390. */ tmp = 0; - } else if (addr == (addr_t) &dummy32->regs.fp_regs.fpc) { + } else if (addr == offsetof(struct compat_user, regs.fp_regs.fpc)) { /* * floating point control reg. is in the thread structure */ tmp = child->thread.fpu.fpc; - } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) { + } else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) { /* * floating point regs. are either in child->thread.fpu * or the child->thread.fpu.vxrs array */ - offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs; + offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs); if (MACHINE_HAS_VX) tmp = *(__u32 *) ((addr_t) child->thread.fpu.vxrs + 2*offset); @@ -642,11 +633,11 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr) tmp = *(__u32 *) ((addr_t) child->thread.fpu.fprs + offset); - } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) { + } else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) { /* * Handle access to the per_info structure. */ - addr -= (addr_t) &dummy32->regs.per_info; + addr -= offsetof(struct compat_user, regs.per_info); tmp = __peek_user_per_compat(child, addr); } else @@ -673,16 +664,14 @@ static int peek_user_compat(struct task_struct *child, static inline void __poke_user_per_compat(struct task_struct *child, addr_t addr, __u32 data) { - struct compat_per_struct_kernel *dummy32 = NULL; - - if (addr == (addr_t) &dummy32->cr9) + if (addr == offsetof(struct compat_per_struct_kernel, cr9)) /* PER event mask of the user specified per set. */ child->thread.per_user.control = data & (PER_EVENT_MASK | PER_CONTROL_MASK); - else if (addr == (addr_t) &dummy32->starting_addr) + else if (addr == offsetof(struct compat_per_struct_kernel, starting_addr)) /* Starting address of the user specified per set. */ child->thread.per_user.start = data; - else if (addr == (addr_t) &dummy32->ending_addr) + else if (addr == offsetof(struct compat_per_struct_kernel, ending_addr)) /* Ending address of the user specified per set. */ child->thread.per_user.end = data; } @@ -693,16 +682,15 @@ static inline void __poke_user_per_compat(struct task_struct *child, static int __poke_user_compat(struct task_struct *child, addr_t addr, addr_t data) { - struct compat_user *dummy32 = NULL; __u32 tmp = (__u32) data; addr_t offset; - if (addr < (addr_t) &dummy32->regs.acrs) { + if (addr < offsetof(struct compat_user, regs.acrs)) { struct pt_regs *regs = task_pt_regs(child); /* * psw, gprs, acrs and orig_gpr2 are stored on the stack */ - if (addr == (addr_t) &dummy32->regs.psw.mask) { + if (addr == offsetof(struct compat_user, regs.psw.mask)) { __u32 mask = PSW32_MASK_USER; mask |= is_ri_task(child) ? PSW32_MASK_RI : 0; @@ -716,7 +704,7 @@ static int __poke_user_compat(struct task_struct *child, regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | (regs->psw.mask & PSW_MASK_BA) | (__u64)(tmp & mask) << 32; - } else if (addr == (addr_t) &dummy32->regs.psw.addr) { + } else if (addr == offsetof(struct compat_user, regs.psw.addr)) { /* Build a 64 bit psw address from 31 bit address. */ regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN; /* Transfer 31 bit amode bit to psw mask. */ @@ -732,27 +720,27 @@ static int __poke_user_compat(struct task_struct *child, /* gpr 0-15 */ *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; } - } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) { + } else if (addr < offsetof(struct compat_user, regs.orig_gpr2)) { /* * access registers are stored in the thread structure */ - offset = addr - (addr_t) &dummy32->regs.acrs; + offset = addr - offsetof(struct compat_user, regs.acrs); *(__u32*)((addr_t) &child->thread.acrs + offset) = tmp; - } else if (addr == (addr_t) (&dummy32->regs.orig_gpr2)) { + } else if (addr == offsetof(struct compat_user, regs.orig_gpr2)) { /* * orig_gpr2 is stored on the kernel stack */ *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4) = tmp; - } else if (addr < (addr_t) &dummy32->regs.fp_regs) { + } else if (addr < offsetof(struct compat_user, regs.fp_regs)) { /* * prevent writess of padding hole between * orig_gpr2 and fp_regs on s390. */ return 0; - } else if (addr == (addr_t) &dummy32->regs.fp_regs.fpc) { + } else if (addr == offsetof(struct compat_user, regs.fp_regs.fpc)) { /* * floating point control reg. is in the thread structure */ @@ -760,12 +748,12 @@ static int __poke_user_compat(struct task_struct *child, return -EINVAL; child->thread.fpu.fpc = data; - } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) { + } else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) { /* * floating point regs. are either in child->thread.fpu * or the child->thread.fpu.vxrs array */ - offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs; + offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs); if (MACHINE_HAS_VX) *(__u32 *)((addr_t) child->thread.fpu.vxrs + 2*offset) = tmp; @@ -773,11 +761,11 @@ static int __poke_user_compat(struct task_struct *child, *(__u32 *)((addr_t) child->thread.fpu.fprs + offset) = tmp; - } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) { + } else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) { /* * Handle access to the per_info structure. */ - addr -= (addr_t) &dummy32->regs.per_info; + addr -= offsetof(struct compat_user, regs.per_info); __poke_user_per_compat(child, addr, data); } diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S index fe396673e8a6..9438368c3632 100644 --- a/arch/s390/kernel/relocate_kernel.S +++ b/arch/s390/kernel/relocate_kernel.S @@ -2,8 +2,7 @@ /* * Copyright IBM Corp. 2005 * - * Author(s): Rolf Adelsberger, - * Heiko Carstens + * Author(s): Rolf Adelsberger * */ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 05327be3a982..84e23fcc1106 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -490,7 +490,6 @@ static void __init setup_lowcore_dat_off(void) lc->spinlock_lockval = arch_spin_lockval(0); lc->spinlock_index = 0; arch_spin_lock_setup(0); - lc->br_r1_trampoline = 0x07f1; /* br %r1 */ lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); lc->preempt_count = PREEMPT_DISABLED; diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 307f5d99514d..5ff8d915ec7a 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -141,7 +141,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) { _sigregs user_sregs; - /* Alwys make any pending restarted system call return -EINTR */ + /* Always make any pending restarted system call return -EINTR */ current->restart_block.fn = do_no_restart_syscall; if (__copy_from_user(&user_sregs, sregs, sizeof(user_sregs))) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 2bad902d8437..127da1850b06 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -5,7 +5,6 @@ * Copyright IBM Corp. 1999, 2012 * Author(s): Denis Joseph Barrow, * Martin Schwidefsky , - * Heiko Carstens , * * based on other smp stuff by * (c) 1995 Alan Cox, CymruNET Ltd @@ -208,7 +207,6 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) lc->cpu_nr = cpu; lc->spinlock_lockval = arch_spin_lockval(cpu); lc->spinlock_index = 0; - lc->br_r1_trampoline = 0x07f1; /* br %r1 */ lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); lc->preempt_count = PREEMPT_DISABLED; @@ -671,7 +669,7 @@ static __init void smp_save_cpu_regs(struct save_area *sa, u16 addr, bool is_boot_cpu, void *regs) { if (is_boot_cpu) - copy_oldmem_kernel(regs, (void *) __LC_FPREGS_SAVE_AREA, 512); + copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512); else __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(regs)); save_area_add_regs(sa, regs); @@ -1253,7 +1251,7 @@ static __always_inline void set_new_lowcore(struct lowcore *lc) src.odd = sizeof(S390_lowcore); dst.even = (unsigned long) lc; dst.odd = sizeof(*lc); - pfx = (unsigned long) lc; + pfx = __pa(lc); asm volatile( " mvcl %[dst],%[src]\n" @@ -1293,8 +1291,8 @@ static int __init smp_reinit_ipl_cpu(void) local_irq_restore(flags); free_pages(lc_ipl->async_stack - STACK_INIT_OFFSET, THREAD_SIZE_ORDER); - memblock_free_late(lc_ipl->mcck_stack - STACK_INIT_OFFSET, THREAD_SIZE); - memblock_free_late((unsigned long) lc_ipl, sizeof(*lc_ipl)); + memblock_free_late(__pa(lc_ipl->mcck_stack - STACK_INIT_OFFSET), THREAD_SIZE); + memblock_free_late(__pa(lc_ipl), sizeof(*lc_ipl)); return 0; } diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index b7bb1981e9ee..7ee455e8e3d5 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -3,7 +3,6 @@ * Stack trace management functions * * Copyright IBM Corp. 2006 - * Author(s): Heiko Carstens */ #include diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index ef3f2659876c..b5e364358ce4 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/text_amode31.S b/arch/s390/kernel/text_amode31.S index 868e4a604110..2c8b14cc5556 100644 --- a/arch/s390/kernel/text_amode31.S +++ b/arch/s390/kernel/text_amode31.S @@ -6,6 +6,7 @@ */ #include +#include #include #include diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 58f8291950cb..c6eecd4a5302 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007, 2011 - * Author(s): Heiko Carstens */ #define KMSG_COMPONENT "cpu" diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 2b780786fc68..674c65019434 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include "entry.h" @@ -53,9 +54,7 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) force_sig_fault(si_signo, si_code, get_trap_ip(regs)); report_user_fault(regs, si_signo, 0); } else { - const struct exception_table_entry *fixup; - fixup = s390_search_extables(regs->psw.addr); - if (!fixup || !ex_handle(fixup, regs)) + if (!fixup_exception(regs)) die(regs, str); } } @@ -244,16 +243,12 @@ static void space_switch_exception(struct pt_regs *regs) static void monitor_event_exception(struct pt_regs *regs) { - const struct exception_table_entry *fixup; - if (user_mode(regs)) return; switch (report_bug(regs->psw.addr - (regs->int_code >> 16), regs)) { case BUG_TRAP_TYPE_NONE: - fixup = s390_search_extables(regs->psw.addr); - if (fixup) - ex_handle(fixup, regs); + fixup_exception(regs); break; case BUG_TRAP_TYPE_WARN: break; @@ -291,7 +286,6 @@ static void __init test_monitor_call(void) void __init trap_init(void) { - sort_extable(__start_amode31_ex_table, __stop_amode31_ex_table); local_mcck_enable(); test_monitor_call(); } @@ -303,7 +297,7 @@ void noinstr __do_pgm_check(struct pt_regs *regs) unsigned int trapnr; irqentry_state_t state; - regs->int_code = *(u32 *)&S390_lowcore.pgm_ilc; + regs->int_code = S390_lowcore.pgm_int_code; regs->int_parm_long = S390_lowcore.trans_exc_code; state = irqentry_enter(regs); @@ -328,7 +322,7 @@ void noinstr __do_pgm_check(struct pt_regs *regs) set_thread_flag(TIF_PER_TRAP); ev->address = S390_lowcore.per_address; - ev->cause = *(u16 *)&S390_lowcore.per_code; + ev->cause = S390_lowcore.per_code_combined; ev->paid = S390_lowcore.per_access_id; } else { /* PER event in kernel is kprobes */ diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c index bd3ef121c379..b88345ef8bd9 100644 --- a/arch/s390/kernel/uprobes.c +++ b/arch/s390/kernel/uprobes.c @@ -177,9 +177,7 @@ static void adjust_psw_addr(psw_t *psw, unsigned long len) __typeof__(*(ptr)) input; \ int __rc = 0; \ \ - if (!test_facility(34)) \ - __rc = EMU_ILLEGAL_OP; \ - else if ((u64 __force)ptr & mask) \ + if ((u64 __force)ptr & mask) \ __rc = EMU_SPECIFICATION; \ else if (get_user(input, ptr)) \ __rc = EMU_ADDRESSING; \ @@ -194,9 +192,7 @@ static void adjust_psw_addr(psw_t *psw, unsigned long len) __typeof__(ptr) __ptr = (ptr); \ int __rc = 0; \ \ - if (!test_facility(34)) \ - __rc = EMU_ILLEGAL_OP; \ - else if ((u64 __force)__ptr & mask) \ + if ((u64 __force)__ptr & mask) \ __rc = EMU_SPECIFICATION; \ else if (put_user(*(input), __ptr)) \ __rc = EMU_ADDRESSING; \ @@ -213,9 +209,7 @@ static void adjust_psw_addr(psw_t *psw, unsigned long len) __typeof__(*(ptr)) input; \ int __rc = 0; \ \ - if (!test_facility(34)) \ - __rc = EMU_ILLEGAL_OP; \ - else if ((u64 __force)ptr & mask) \ + if ((u64 __force)ptr & mask) \ __rc = EMU_SPECIFICATION; \ else if (get_user(input, ptr)) \ __rc = EMU_ADDRESSING; \ @@ -327,10 +321,6 @@ static void handle_insn_ril(struct arch_uprobe *auprobe, struct pt_regs *regs) break; case 0xc6: switch (insn->opc1) { - case 0x02: /* pfdrl */ - if (!test_facility(34)) - rc = EMU_ILLEGAL_OP; - break; case 0x04: /* cghrl */ rc = emu_cmp_ril(regs, (s16 __user *)uptr, &rx->s64); break; diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 42c43521878f..2e526f11b91e 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -49,7 +49,6 @@ SECTIONS SOFTIRQENTRY_TEXT FTRACE_HOTPATCH_TRAMPOLINES_TEXT *(.text.*_indirect_*) - *(.fixup) *(.gnu.warning) . = ALIGN(PAGE_SIZE); _etext = .; /* End of text section */ diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index f216a1b2f825..9436f3053b88 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -128,13 +128,12 @@ static int do_account_vtime(struct task_struct *tsk) timer = S390_lowcore.last_update_timer; clock = S390_lowcore.last_update_clock; - /* Use STORE CLOCK by default, STORE CLOCK FAST if available. */ - alternative_io("stpt %0\n .insn s,0xb2050000,%1\n", - "stpt %0\n .insn s,0xb27c0000,%1\n", - 25, - ASM_OUTPUT2("=Q" (S390_lowcore.last_update_timer), - "=Q" (S390_lowcore.last_update_clock)), - ASM_NO_INPUT_CLOBBER("cc")); + asm volatile( + " stpt %0\n" /* Store current cpu timer value */ + " stckf %1" /* Store current tod clock value */ + : "=Q" (S390_lowcore.last_update_timer), + "=Q" (S390_lowcore.last_update_clock) + : : "cc"); clock = S390_lowcore.last_update_clock - clock; timer -= S390_lowcore.last_update_timer; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ca96f84db2cc..ab73d99483fe 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -6,7 +6,6 @@ * * Author(s): Carsten Otte * Christian Borntraeger - * Heiko Carstens * Christian Ehrhardt * Jason J. Herne */ diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 69feb8ed3312..5d415b3db6d1 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -7,6 +7,7 @@ lib-y += delay.o string.o uaccess.o find.o spinlock.o obj-y += mem.o xor.o lib-$(CONFIG_KPROBES) += probes.o lib-$(CONFIG_UPROBES) += probes.o +obj-$(CONFIG_EXPOLINE_EXTERN) += expoline.o obj-$(CONFIG_S390_KPROBES_SANITY_TEST) += test_kprobes_s390.o test_kprobes_s390-objs += test_kprobes_asm.o test_kprobes.o diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index bccbf394ae7e..f7f5adea8940 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -4,7 +4,6 @@ * * Copyright IBM Corp. 1999, 2008 * Author(s): Martin Schwidefsky , - * Heiko Carstens , */ #include diff --git a/arch/s390/lib/expoline.S b/arch/s390/lib/expoline.S new file mode 100644 index 000000000000..92ed8409a7a4 --- /dev/null +++ b/arch/s390/lib/expoline.S @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include + +.macro GEN_ALL_BR_THUNK_EXTERN + .irp r1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + GEN_BR_THUNK_EXTERN %r\r1 + .endr +.endm + +GEN_ALL_BR_THUNK_EXTERN diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index bc7973359ae2..c01f02887de4 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -16,10 +17,14 @@ #include #include -struct kunit *current_test; +static struct kunit *current_test; #define BT_BUF_SIZE (PAGE_SIZE * 4) +static bool force_bt; +module_param_named(backtrace, force_bt, bool, 0444); +MODULE_PARM_DESC(backtrace, "print backtraces for all tests"); + /* * To avoid printk line limit split backtrace by lines */ @@ -98,7 +103,7 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs, kunit_err(current_test, "Maximum number of frames exceeded\n"); ret = -EINVAL; } - if (ret) + if (ret || force_bt) print_backtrace(bt); kfree(bt); return ret; @@ -124,7 +129,10 @@ static struct unwindme *unwindme; #define UWM_CALLER 0x8 /* Unwind starting from caller. */ #define UWM_SWITCH_STACK 0x10 /* Use call_on_stack. */ #define UWM_IRQ 0x20 /* Unwind from irq context. */ -#define UWM_PGM 0x40 /* Unwind from program check handler. */ +#define UWM_PGM 0x40 /* Unwind from program check handler */ +#define UWM_KPROBE_ON_FTRACE 0x80 /* Unwind from kprobe handler called via ftrace. */ +#define UWM_FTRACE 0x100 /* Unwind from ftrace handler. */ +#define UWM_KRETPROBE 0x200 /* Unwind kretprobe handlers. */ static __always_inline unsigned long get_psw_addr(void) { @@ -136,8 +144,56 @@ static __always_inline unsigned long get_psw_addr(void) return psw_addr; } -#ifdef CONFIG_KPROBES -static int pgm_pre_handler(struct kprobe *p, struct pt_regs *regs) +static int kretprobe_ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs) +{ + struct unwindme *u = unwindme; + + u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? regs : NULL, + (u->flags & UWM_SP) ? u->sp : 0); + + return 0; +} + +static noinline notrace void test_unwind_kretprobed_func(void) +{ + asm volatile(" nop\n"); +} + +static noinline void test_unwind_kretprobed_func_caller(void) +{ + test_unwind_kretprobed_func(); +} + +static int test_unwind_kretprobe(struct unwindme *u) +{ + int ret; + struct kretprobe my_kretprobe; + + if (!IS_ENABLED(CONFIG_KPROBES)) + kunit_skip(current_test, "requires CONFIG_KPROBES"); + + u->ret = -1; /* make sure kprobe is called */ + unwindme = u; + + memset(&my_kretprobe, 0, sizeof(my_kretprobe)); + my_kretprobe.handler = kretprobe_ret_handler; + my_kretprobe.maxactive = 1; + my_kretprobe.kp.addr = (kprobe_opcode_t *)test_unwind_kretprobed_func; + + ret = register_kretprobe(&my_kretprobe); + + if (ret < 0) { + kunit_err(current_test, "register_kretprobe failed %d\n", ret); + return -EINVAL; + } + + test_unwind_kretprobed_func_caller(); + unregister_kretprobe(&my_kretprobe); + unwindme = NULL; + return u->ret; +} + +static int kprobe_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct unwindme *u = unwindme; @@ -145,8 +201,97 @@ static int pgm_pre_handler(struct kprobe *p, struct pt_regs *regs) (u->flags & UWM_SP) ? u->sp : 0); return 0; } + +extern const char test_unwind_kprobed_insn[]; + +static noinline void test_unwind_kprobed_func(void) +{ + asm volatile( + " nopr %%r7\n" + "test_unwind_kprobed_insn:\n" + " nopr %%r7\n" + :); +} + +static int test_unwind_kprobe(struct unwindme *u) +{ + struct kprobe kp; + int ret; + + if (!IS_ENABLED(CONFIG_KPROBES)) + kunit_skip(current_test, "requires CONFIG_KPROBES"); + if (!IS_ENABLED(CONFIG_KPROBES_ON_FTRACE) && u->flags & UWM_KPROBE_ON_FTRACE) + kunit_skip(current_test, "requires CONFIG_KPROBES_ON_FTRACE"); + + u->ret = -1; /* make sure kprobe is called */ + unwindme = u; + memset(&kp, 0, sizeof(kp)); + kp.pre_handler = kprobe_pre_handler; + kp.addr = u->flags & UWM_KPROBE_ON_FTRACE ? + (kprobe_opcode_t *)test_unwind_kprobed_func : + (kprobe_opcode_t *)test_unwind_kprobed_insn; + ret = register_kprobe(&kp); + if (ret < 0) { + kunit_err(current_test, "register_kprobe failed %d\n", ret); + return -EINVAL; + } + + test_unwind_kprobed_func(); + unregister_kprobe(&kp); + unwindme = NULL; + return u->ret; +} + +static void notrace __used test_unwind_ftrace_handler(unsigned long ip, + unsigned long parent_ip, + struct ftrace_ops *fops, + struct ftrace_regs *fregs) +{ + struct unwindme *u = (struct unwindme *)fregs->regs.gprs[2]; + + u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? &fregs->regs : NULL, + (u->flags & UWM_SP) ? u->sp : 0); +} + +static noinline int test_unwind_ftraced_func(struct unwindme *u) +{ + return READ_ONCE(u)->ret; +} + +static int test_unwind_ftrace(struct unwindme *u) +{ + int ret; +#ifdef CONFIG_DYNAMIC_FTRACE + struct ftrace_ops *fops; + + fops = kunit_kzalloc(current_test, sizeof(*fops), GFP_KERNEL); + fops->func = test_unwind_ftrace_handler; + fops->flags = FTRACE_OPS_FL_DYNAMIC | + FTRACE_OPS_FL_RECURSION | + FTRACE_OPS_FL_SAVE_REGS | + FTRACE_OPS_FL_PERMANENT; +#else + kunit_skip(current_test, "requires CONFIG_DYNAMIC_FTRACE"); #endif + ret = ftrace_set_filter_ip(fops, (unsigned long)test_unwind_ftraced_func, 0, 0); + if (ret) { + kunit_err(current_test, "failed to set ftrace filter (%d)\n", ret); + return -1; + } + + ret = register_ftrace_function(fops); + if (!ret) { + ret = test_unwind_ftraced_func(u); + unregister_ftrace_function(fops); + } else { + kunit_err(current_test, "failed to register ftrace handler (%d)\n", ret); + } + + ftrace_set_filter_ip(fops, (unsigned long)test_unwind_ftraced_func, 1, 0); + return ret; +} + /* This function may or may not appear in the backtrace. */ static noinline int unwindme_func4(struct unwindme *u) { @@ -157,35 +302,12 @@ static noinline int unwindme_func4(struct unwindme *u) wait_event(u->task_wq, kthread_should_park()); kthread_parkme(); return 0; -#ifdef CONFIG_KPROBES - } else if (u->flags & UWM_PGM) { - struct kprobe kp; - int ret; - - unwindme = u; - memset(&kp, 0, sizeof(kp)); - kp.symbol_name = "do_report_trap"; - kp.pre_handler = pgm_pre_handler; - ret = register_kprobe(&kp); - if (ret < 0) { - kunit_err(current_test, "register_kprobe failed %d\n", ret); - return -EINVAL; - } - - /* - * Trigger operation exception; use insn notation to bypass - * llvm's integrated assembler sanity checks. - */ - asm volatile( - " .insn e,0x0000\n" /* illegal opcode */ - "0: nopr %%r7\n" - EX_TABLE(0b, 0b) - :); - - unregister_kprobe(&kp); - unwindme = NULL; - return u->ret; -#endif + } else if (u->flags & (UWM_PGM | UWM_KPROBE_ON_FTRACE)) { + return test_unwind_kprobe(u); + } else if (u->flags & (UWM_KRETPROBE)) { + return test_unwind_kretprobe(u); + } else if (u->flags & UWM_FTRACE) { + return test_unwind_ftrace(u); } else { struct pt_regs regs; @@ -255,7 +377,7 @@ static int test_unwind_irq(struct unwindme *u) } /* Spawns a task and passes it to test_unwind(). */ -static int test_unwind_task(struct kunit *test, struct unwindme *u) +static int test_unwind_task(struct unwindme *u) { struct task_struct *task; int ret; @@ -270,7 +392,7 @@ static int test_unwind_task(struct kunit *test, struct unwindme *u) */ task = kthread_run(unwindme_func1, u, "%s", __func__); if (IS_ERR(task)) { - kunit_err(test, "kthread_run() failed\n"); + kunit_err(current_test, "kthread_run() failed\n"); return PTR_ERR(task); } /* @@ -293,49 +415,43 @@ struct test_params { /* * Create required parameter list for tests */ +#define TEST_WITH_FLAGS(f) { .flags = f, .name = #f } static const struct test_params param_list[] = { - {.flags = UWM_DEFAULT, .name = "UWM_DEFAULT"}, - {.flags = UWM_SP, .name = "UWM_SP"}, - {.flags = UWM_REGS, .name = "UWM_REGS"}, - {.flags = UWM_SWITCH_STACK, - .name = "UWM_SWITCH_STACK"}, - {.flags = UWM_SP | UWM_REGS, - .name = "UWM_SP | UWM_REGS"}, - {.flags = UWM_CALLER | UWM_SP, - .name = "WM_CALLER | UWM_SP"}, - {.flags = UWM_CALLER | UWM_SP | UWM_REGS, - .name = "UWM_CALLER | UWM_SP | UWM_REGS"}, - {.flags = UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK, - .name = "UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK"}, - {.flags = UWM_THREAD, .name = "UWM_THREAD"}, - {.flags = UWM_THREAD | UWM_SP, - .name = "UWM_THREAD | UWM_SP"}, - {.flags = UWM_THREAD | UWM_CALLER | UWM_SP, - .name = "UWM_THREAD | UWM_CALLER | UWM_SP"}, - {.flags = UWM_IRQ, .name = "UWM_IRQ"}, - {.flags = UWM_IRQ | UWM_SWITCH_STACK, - .name = "UWM_IRQ | UWM_SWITCH_STACK"}, - {.flags = UWM_IRQ | UWM_SP, - .name = "UWM_IRQ | UWM_SP"}, - {.flags = UWM_IRQ | UWM_REGS, - .name = "UWM_IRQ | UWM_REGS"}, - {.flags = UWM_IRQ | UWM_SP | UWM_REGS, - .name = "UWM_IRQ | UWM_SP | UWM_REGS"}, - {.flags = UWM_IRQ | UWM_CALLER | UWM_SP, - .name = "UWM_IRQ | UWM_CALLER | UWM_SP"}, - {.flags = UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS, - .name = "UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS"}, - {.flags = UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK, - .name = "UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK"}, - #ifdef CONFIG_KPROBES - {.flags = UWM_PGM, .name = "UWM_PGM"}, - {.flags = UWM_PGM | UWM_SP, - .name = "UWM_PGM | UWM_SP"}, - {.flags = UWM_PGM | UWM_REGS, - .name = "UWM_PGM | UWM_REGS"}, - {.flags = UWM_PGM | UWM_SP | UWM_REGS, - .name = "UWM_PGM | UWM_SP | UWM_REGS"}, - #endif + TEST_WITH_FLAGS(UWM_DEFAULT), + TEST_WITH_FLAGS(UWM_SP), + TEST_WITH_FLAGS(UWM_REGS), + TEST_WITH_FLAGS(UWM_SWITCH_STACK), + TEST_WITH_FLAGS(UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_CALLER | UWM_SP), + TEST_WITH_FLAGS(UWM_CALLER | UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK), + TEST_WITH_FLAGS(UWM_THREAD), + TEST_WITH_FLAGS(UWM_THREAD | UWM_SP), + TEST_WITH_FLAGS(UWM_THREAD | UWM_CALLER | UWM_SP), + TEST_WITH_FLAGS(UWM_IRQ), + TEST_WITH_FLAGS(UWM_IRQ | UWM_SWITCH_STACK), + TEST_WITH_FLAGS(UWM_IRQ | UWM_SP), + TEST_WITH_FLAGS(UWM_IRQ | UWM_REGS), + TEST_WITH_FLAGS(UWM_IRQ | UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_IRQ | UWM_CALLER | UWM_SP), + TEST_WITH_FLAGS(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK), + TEST_WITH_FLAGS(UWM_PGM), + TEST_WITH_FLAGS(UWM_PGM | UWM_SP), + TEST_WITH_FLAGS(UWM_PGM | UWM_REGS), + TEST_WITH_FLAGS(UWM_PGM | UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_KPROBE_ON_FTRACE), + TEST_WITH_FLAGS(UWM_KPROBE_ON_FTRACE | UWM_SP), + TEST_WITH_FLAGS(UWM_KPROBE_ON_FTRACE | UWM_REGS), + TEST_WITH_FLAGS(UWM_KPROBE_ON_FTRACE | UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_FTRACE), + TEST_WITH_FLAGS(UWM_FTRACE | UWM_SP), + TEST_WITH_FLAGS(UWM_FTRACE | UWM_REGS), + TEST_WITH_FLAGS(UWM_FTRACE | UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_KRETPROBE), + TEST_WITH_FLAGS(UWM_KRETPROBE | UWM_SP), + TEST_WITH_FLAGS(UWM_KRETPROBE | UWM_REGS), + TEST_WITH_FLAGS(UWM_KRETPROBE | UWM_SP | UWM_REGS), }; /* @@ -360,7 +476,7 @@ static void test_unwind_flags(struct kunit *test) params = (const struct test_params *)test->param_value; u.flags = params->flags; if (u.flags & UWM_THREAD) - KUNIT_EXPECT_EQ(test, 0, test_unwind_task(test, &u)); + KUNIT_EXPECT_EQ(test, 0, test_unwind_task(&u)); else if (u.flags & UWM_IRQ) KUNIT_EXPECT_EQ(test, 0, test_unwind_irq(&u)); else diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index b709239feb5d..d7b3b193d108 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -8,13 +8,10 @@ * Gerald Schaefer (gerald.schaefer@de.ibm.com) */ -#include #include #include -#include #include -#include -#include +#include #ifdef CONFIG_DEBUG_ENTRY void debug_user_asce(int exit) @@ -34,32 +31,8 @@ void debug_user_asce(int exit) } #endif /*CONFIG_DEBUG_ENTRY */ -#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES -static DEFINE_STATIC_KEY_FALSE(have_mvcos); - -static int __init uaccess_init(void) -{ - if (test_facility(27)) - static_branch_enable(&have_mvcos); - return 0; -} -early_initcall(uaccess_init); - -static inline int copy_with_mvcos(void) -{ - if (static_branch_likely(&have_mvcos)) - return 1; - return 0; -} -#else -static inline int copy_with_mvcos(void) -{ - return 1; -} -#endif - -static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr, - unsigned long size, unsigned long key) +static unsigned long raw_copy_from_user_key(void *to, const void __user *from, + unsigned long size, unsigned long key) { unsigned long tmp1, tmp2; union oac spec = { @@ -72,7 +45,7 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr tmp1 = -4096UL; asm volatile( " lr 0,%[spec]\n" - "0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n" + "0: mvcos 0(%2),0(%1),%0\n" "6: jz 4f\n" "1: algr %0,%3\n" " slgr %1,%3\n" @@ -83,61 +56,18 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr " slgr %4,%1\n" " clgr %0,%4\n" /* copy crosses next page boundary? */ " jnh 5f\n" - "3: .insn ss,0xc80000000000,0(%4,%2),0(%1),0\n" + "3: mvcos 0(%2),0(%1),%4\n" "7: slgr %0,%4\n" " j 5f\n" "4: slgr %0,%0\n" "5:\n" EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b) - : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) + : "+a" (size), "+a" (from), "+a" (to), "+a" (tmp1), "=a" (tmp2) : [spec] "d" (spec.val) : "cc", "memory", "0"); return size; } -static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, - unsigned long size, unsigned long key) -{ - unsigned long tmp1, tmp2; - - tmp1 = -256UL; - asm volatile( - " sacf 0\n" - "0: mvcp 0(%0,%2),0(%1),%[key]\n" - "7: jz 5f\n" - "1: algr %0,%3\n" - " la %1,256(%1)\n" - " la %2,256(%2)\n" - "2: mvcp 0(%0,%2),0(%1),%[key]\n" - "8: jnz 1b\n" - " j 5f\n" - "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ - " lghi %3,-4096\n" - " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */ - " slgr %4,%1\n" - " clgr %0,%4\n" /* copy crosses next page boundary? */ - " jnh 6f\n" - "4: mvcp 0(%4,%2),0(%1),%[key]\n" - "9: slgr %0,%4\n" - " j 6f\n" - "5: slgr %0,%0\n" - "6: sacf 768\n" - EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b) - EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) - : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : [key] "d" (key << 4) - : "cc", "memory"); - return size; -} - -static unsigned long raw_copy_from_user_key(void *to, const void __user *from, - unsigned long n, unsigned long key) -{ - if (copy_with_mvcos()) - return copy_from_user_mvcos(to, from, n, key); - return copy_from_user_mvcp(to, from, n, key); -} - unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { return raw_copy_from_user_key(to, from, n, 0); @@ -160,8 +90,8 @@ unsigned long _copy_from_user_key(void *to, const void __user *from, } EXPORT_SYMBOL(_copy_from_user_key); -static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, - unsigned long size, unsigned long key) +static unsigned long raw_copy_to_user_key(void __user *to, const void *from, + unsigned long size, unsigned long key) { unsigned long tmp1, tmp2; union oac spec = { @@ -174,7 +104,7 @@ static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, tmp1 = -4096UL; asm volatile( " lr 0,%[spec]\n" - "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n" + "0: mvcos 0(%1),0(%2),%0\n" "6: jz 4f\n" "1: algr %0,%3\n" " slgr %1,%3\n" @@ -185,61 +115,18 @@ static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, " slgr %4,%1\n" " clgr %0,%4\n" /* copy crosses next page boundary? */ " jnh 5f\n" - "3: .insn ss,0xc80000000000,0(%4,%1),0(%2),0\n" + "3: mvcos 0(%1),0(%2),%4\n" "7: slgr %0,%4\n" " j 5f\n" "4: slgr %0,%0\n" "5:\n" EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b) - : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) + : "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2) : [spec] "d" (spec.val) : "cc", "memory", "0"); return size; } -static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, - unsigned long size, unsigned long key) -{ - unsigned long tmp1, tmp2; - - tmp1 = -256UL; - asm volatile( - " sacf 0\n" - "0: mvcs 0(%0,%1),0(%2),%[key]\n" - "7: jz 5f\n" - "1: algr %0,%3\n" - " la %1,256(%1)\n" - " la %2,256(%2)\n" - "2: mvcs 0(%0,%1),0(%2),%[key]\n" - "8: jnz 1b\n" - " j 5f\n" - "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ - " lghi %3,-4096\n" - " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */ - " slgr %4,%1\n" - " clgr %0,%4\n" /* copy crosses next page boundary? */ - " jnh 6f\n" - "4: mvcs 0(%4,%1),0(%2),%[key]\n" - "9: slgr %0,%4\n" - " j 6f\n" - "5: slgr %0,%0\n" - "6: sacf 768\n" - EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b) - EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) - : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : [key] "d" (key << 4) - : "cc", "memory"); - return size; -} - -static unsigned long raw_copy_to_user_key(void __user *to, const void *from, - unsigned long n, unsigned long key) -{ - if (copy_with_mvcos()) - return copy_to_user_mvcos(to, from, n, key); - return copy_to_user_mvcs(to, from, n, key); -} - unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) { return raw_copy_to_user_key(to, from, n, 0); @@ -257,7 +144,7 @@ unsigned long _copy_to_user_key(void __user *to, const void *from, } EXPORT_SYMBOL(_copy_to_user_key); -static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size) +unsigned long __clear_user(void __user *to, unsigned long size) { unsigned long tmp1, tmp2; union oac spec = { @@ -268,7 +155,7 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size tmp1 = -4096UL; asm volatile( " lr 0,%[spec]\n" - "0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n" + "0: mvcos 0(%1),0(%4),%0\n" " jz 4f\n" "1: algr %0,%2\n" " slgr %1,%2\n" @@ -278,7 +165,7 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size " slgr %3,%1\n" " clgr %0,%3\n" /* copy crosses next page boundary? */ " jnh 5f\n" - "3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n" + "3: mvcos 0(%1),0(%4),%3\n" " slgr %0,%3\n" " j 5f\n" "4: slgr %0,%0\n" @@ -289,46 +176,4 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size : "cc", "memory", "0"); return size; } - -static inline unsigned long clear_user_xc(void __user *to, unsigned long size) -{ - unsigned long tmp1, tmp2; - - asm volatile( - " sacf 256\n" - " aghi %0,-1\n" - " jo 5f\n" - " bras %3,3f\n" - " xc 0(1,%1),0(%1)\n" - "0: aghi %0,257\n" - " la %2,255(%1)\n" /* %2 = ptr + 255 */ - " srl %2,12\n" - " sll %2,12\n" /* %2 = (ptr + 255) & -4096 */ - " slgr %2,%1\n" - " clgr %0,%2\n" /* clear crosses next page boundary? */ - " jnh 5f\n" - " aghi %2,-1\n" - "1: ex %2,0(%3)\n" - " aghi %2,1\n" - " slgr %0,%2\n" - " j 5f\n" - "2: xc 0(256,%1),0(%1)\n" - " la %1,256(%1)\n" - "3: aghi %0,-256\n" - " jnm 2b\n" - "4: ex %0,0(%3)\n" - "5: slgr %0,%0\n" - "6: sacf 768\n" - EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) - : "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2) - : : "cc", "memory"); - return size; -} - -unsigned long __clear_user(void __user *to, unsigned long size) -{ - if (copy_with_mvcos()) - return clear_user_mvcos(to, size); - return clear_user_xc(to, size); -} EXPORT_SYMBOL(__clear_user); diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index cd67e94c16aa..57e4f3a24829 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -4,7 +4,7 @@ # obj-y := init.o fault.o extmem.o mmap.o vmem.o maccess.o -obj-y += page-states.o pageattr.o pgtable.o pgalloc.o +obj-y += page-states.o pageattr.o pgtable.o pgalloc.o extable.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c new file mode 100644 index 000000000000..8ac8ad2474a0 --- /dev/null +++ b/arch/s390/mm/extable.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include + +const struct exception_table_entry *s390_search_extables(unsigned long addr) +{ + const struct exception_table_entry *fixup; + size_t num; + + fixup = search_exception_tables(addr); + if (fixup) + return fixup; + num = __stop_amode31_ex_table - __start_amode31_ex_table; + return search_extable(__start_amode31_ex_table, num, addr); +} + +static bool ex_handler_fixup(const struct exception_table_entry *ex, struct pt_regs *regs) +{ + regs->psw.addr = extable_fixup(ex); + return true; +} + +static bool ex_handler_uaccess(const struct exception_table_entry *ex, struct pt_regs *regs) +{ + regs->gprs[ex->data] = -EFAULT; + regs->psw.addr = extable_fixup(ex); + return true; +} + +bool fixup_exception(struct pt_regs *regs) +{ + const struct exception_table_entry *ex; + + ex = s390_search_extables(instruction_pointer(regs)); + if (!ex) + return false; + switch (ex->type) { + case EX_TYPE_FIXUP: + return ex_handler_fixup(ex, regs); + case EX_TYPE_BPF: + return ex_handler_bpf(ex, regs); + case EX_TYPE_UACCESS: + return ex_handler_uaccess(ex, regs); + } + panic("invalid exception table entry"); +} diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index ff16ce0d04ee..e173b6187ad5 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -227,27 +228,10 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code) (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK)); } -const struct exception_table_entry *s390_search_extables(unsigned long addr) -{ - const struct exception_table_entry *fixup; - - fixup = search_extable(__start_amode31_ex_table, - __stop_amode31_ex_table - __start_amode31_ex_table, - addr); - if (!fixup) - fixup = search_exception_tables(addr); - return fixup; -} - static noinline void do_no_context(struct pt_regs *regs) { - const struct exception_table_entry *fixup; - - /* Are we prepared to handle this kernel fault? */ - fixup = s390_search_extables(regs->psw.addr); - if (fixup && ex_handle(fixup, regs)) + if (fixup_exception(regs)) return; - /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index dfee0ebb2fac..af03cacf34ec 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -974,18 +974,18 @@ static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr, return -EAGAIN; if (prot == PROT_NONE && !pmd_i) { - pmd_val(new) |= _SEGMENT_ENTRY_INVALID; + new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID)); gmap_pmdp_xchg(gmap, pmdp, new, gaddr); } if (prot == PROT_READ && !pmd_p) { - pmd_val(new) &= ~_SEGMENT_ENTRY_INVALID; - pmd_val(new) |= _SEGMENT_ENTRY_PROTECT; + new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID)); + new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_PROTECT)); gmap_pmdp_xchg(gmap, pmdp, new, gaddr); } if (bits & GMAP_NOTIFY_MPROT) - pmd_val(*pmdp) |= _SEGMENT_ENTRY_GMAP_IN; + set_pmd(pmdp, set_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN))); /* Shadow GMAP protection needs split PMDs */ if (bits & GMAP_NOTIFY_SHADOW) @@ -1151,7 +1151,7 @@ int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val) address = pte_val(pte) & PAGE_MASK; address += gaddr & ~PAGE_MASK; *val = *(unsigned long *) address; - pte_val(*ptep) |= _PAGE_YOUNG; + set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_YOUNG))); /* Do *NOT* clear the _PAGE_INVALID bit! */ rc = 0; } @@ -1278,7 +1278,7 @@ static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr, static inline void gmap_idte_one(unsigned long asce, unsigned long vaddr) { asm volatile( - " .insn rrf,0xb98e0000,%0,%1,0,0" + " idte %0,0,%1" : : "a" (asce), "a" (vaddr) : "cc", "memory"); } @@ -2275,7 +2275,7 @@ EXPORT_SYMBOL_GPL(ptep_notify); static void pmdp_notify_gmap(struct gmap *gmap, pmd_t *pmdp, unsigned long gaddr) { - pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_IN; + set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN))); gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1); } @@ -2294,7 +2294,7 @@ static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new, { gaddr &= HPAGE_MASK; pmdp_notify_gmap(gmap, pmdp, gaddr); - pmd_val(new) &= ~_SEGMENT_ENTRY_GMAP_IN; + new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_GMAP_IN)); if (MACHINE_HAS_TLB_GUEST) __pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_GLOBAL); @@ -2302,7 +2302,7 @@ static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new, __pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL); else __pmdp_csp(pmdp); - *pmdp = new; + set_pmd(pmdp, new); } static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr, @@ -2324,7 +2324,7 @@ static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr, _SEGMENT_ENTRY_GMAP_UC)); if (purge) __pmdp_csp(pmdp); - pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; + set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); } spin_unlock(&gmap->guest_table_lock); } @@ -2447,7 +2447,7 @@ static bool gmap_test_and_clear_dirty_pmd(struct gmap *gmap, pmd_t *pmdp, return false; /* Clear UC indication and reset protection */ - pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_UC; + set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_UC))); gmap_protect_pmd(gmap, gaddr, pmdp, PROT_READ, 0); return true; } diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 082793d497ec..10e51ef9c79a 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -73,8 +73,8 @@ static inline unsigned long __pte_to_rste(pte_t pte) static inline pte_t __rste_to_pte(unsigned long rste) { + unsigned long pteval; int present; - pte_t pte; if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) present = pud_present(__pud(rste)); @@ -102,29 +102,21 @@ static inline pte_t __rste_to_pte(unsigned long rste) * u unused, l large */ if (present) { - pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE; - pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT; - pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_READ, - _PAGE_READ); - pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_WRITE, - _PAGE_WRITE); - pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_INVALID, - _PAGE_INVALID); - pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_PROTECT, - _PAGE_PROTECT); - pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_DIRTY, - _PAGE_DIRTY); - pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_YOUNG, - _PAGE_YOUNG); + pteval = rste & _SEGMENT_ENTRY_ORIGIN_LARGE; + pteval |= _PAGE_LARGE | _PAGE_PRESENT; + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_READ, _PAGE_READ); + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_WRITE, _PAGE_WRITE); + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_INVALID, _PAGE_INVALID); + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_PROTECT, _PAGE_PROTECT); + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_DIRTY, _PAGE_DIRTY); + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_YOUNG, _PAGE_YOUNG); #ifdef CONFIG_MEM_SOFT_DIRTY - pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, - _PAGE_SOFT_DIRTY); + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, _PAGE_SOFT_DIRTY); #endif - pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, - _PAGE_NOEXEC); + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, _PAGE_NOEXEC); } else - pte_val(pte) = _PAGE_INVALID; - return pte; + pteval = _PAGE_INVALID; + return __pte(pteval); } static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste) @@ -168,7 +160,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, rste |= _SEGMENT_ENTRY_LARGE; clear_huge_pte_skeys(mm, rste); - pte_val(*ptep) = rste; + set_pte(ptep, __pte(rste)); } pte_t huge_ptep_get(pte_t *ptep) diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index 483b9dbe0970..9f988d4582ed 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -175,7 +175,7 @@ static void __init kasan_early_pgtable_populate(unsigned long address, page = kasan_early_alloc_segment(); memset(page, 0, _SEGMENT_SIZE); } - pmd_val(*pm_dir) = __pa(page) | sgt_prot; + set_pmd(pm_dir, __pmd(__pa(page) | sgt_prot)); address = (address + PMD_SIZE) & PMD_MASK; continue; } @@ -194,16 +194,16 @@ static void __init kasan_early_pgtable_populate(unsigned long address, switch (mode) { case POPULATE_ONE2ONE: page = (void *)address; - pte_val(*pt_dir) = __pa(page) | pgt_prot; + set_pte(pt_dir, __pte(__pa(page) | pgt_prot)); break; case POPULATE_MAP: page = kasan_early_alloc_pages(0); memset(page, 0, PAGE_SIZE); - pte_val(*pt_dir) = __pa(page) | pgt_prot; + set_pte(pt_dir, __pte(__pa(page) | pgt_prot)); break; case POPULATE_ZERO_SHADOW: page = kasan_early_shadow_page; - pte_val(*pt_dir) = __pa(page) | pgt_prot_zero; + set_pte(pt_dir, __pte(__pa(page) | pgt_prot_zero)); break; case POPULATE_SHALLOW: /* should never happen */ diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 9663ce3625bc..421efa46946b 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -4,8 +4,6 @@ * * Copyright IBM Corp. 2009, 2015 * - * Author(s): Heiko Carstens , - * */ #include @@ -14,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -123,7 +122,7 @@ static unsigned long __no_sanitize_address _memcpy_real(unsigned long dest, /* * Copy memory in real mode (kernel to kernel) */ -int memcpy_real(void *dest, void *src, size_t count) +int memcpy_real(void *dest, unsigned long src, size_t count) { unsigned long _dest = (unsigned long)dest; unsigned long _src = (unsigned long)src; @@ -175,7 +174,7 @@ void memcpy_absolute(void *dest, void *src, size_t count) /* * Copy memory from kernel (real) to user (virtual) */ -int copy_to_user_real(void __user *dest, void *src, unsigned long count) +int copy_to_user_real(void __user *dest, unsigned long src, unsigned long count) { int offs = 0, size, rc; char *buf; @@ -201,15 +200,15 @@ out: /* * Check if physical address is within prefix or zero page */ -static int is_swapped(unsigned long addr) +static int is_swapped(phys_addr_t addr) { - unsigned long lc; + phys_addr_t lc; int cpu; if (addr < sizeof(struct lowcore)) return 1; for_each_online_cpu(cpu) { - lc = (unsigned long) lowcore_ptr[cpu]; + lc = virt_to_phys(lowcore_ptr[cpu]); if (addr > lc + sizeof(struct lowcore) - 1 || addr < lc) continue; return 1; @@ -225,7 +224,8 @@ static int is_swapped(unsigned long addr) */ void *xlate_dev_mem_ptr(phys_addr_t addr) { - void *bounce = (void *) addr; + void *ptr = phys_to_virt(addr); + void *bounce = ptr; unsigned long size; cpus_read_lock(); @@ -234,7 +234,7 @@ void *xlate_dev_mem_ptr(phys_addr_t addr) size = PAGE_SIZE - (addr & ~PAGE_MASK); bounce = (void *) __get_free_page(GFP_ATOMIC); if (bounce) - memcpy_absolute(bounce, (void *) addr, size); + memcpy_absolute(bounce, ptr, size); } preempt_enable(); cpus_read_unlock(); @@ -244,8 +244,8 @@ void *xlate_dev_mem_ptr(phys_addr_t addr) /* * Free converted buffer for /dev/mem access (if necessary) */ -void unxlate_dev_mem_ptr(phys_addr_t addr, void *buf) +void unxlate_dev_mem_ptr(phys_addr_t addr, void *ptr) { - if ((void *) addr != buf) - free_page((unsigned long) buf); + if (addr != virt_to_phys(ptr)) + free_page((unsigned long)ptr); } diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index 18a6381097a9..d5ea09d78938 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 654019181a37..85195c18b2e8 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -98,9 +98,9 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end, else if (flags & SET_MEMORY_RW) new = pte_mkwrite(pte_mkdirty(new)); if (flags & SET_MEMORY_NX) - pte_val(new) |= _PAGE_NOEXEC; + new = set_pte_bit(new, __pgprot(_PAGE_NOEXEC)); else if (flags & SET_MEMORY_X) - pte_val(new) &= ~_PAGE_NOEXEC; + new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC)); pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE); ptep++; addr += PAGE_SIZE; @@ -127,11 +127,11 @@ static int split_pmd_page(pmd_t *pmdp, unsigned long addr) prot &= ~_PAGE_NOEXEC; ptep = pt_dir; for (i = 0; i < PTRS_PER_PTE; i++) { - pte_val(*ptep) = pte_addr | prot; + set_pte(ptep, __pte(pte_addr | prot)); pte_addr += PAGE_SIZE; ptep++; } - pmd_val(new) = __pa(pt_dir) | _SEGMENT_ENTRY; + new = __pmd(__pa(pt_dir) | _SEGMENT_ENTRY); pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT); update_page_count(PG_DIRECT_MAP_4K, PTRS_PER_PTE); update_page_count(PG_DIRECT_MAP_1M, -1); @@ -148,9 +148,9 @@ static void modify_pmd_page(pmd_t *pmdp, unsigned long addr, else if (flags & SET_MEMORY_RW) new = pmd_mkwrite(pmd_mkdirty(new)); if (flags & SET_MEMORY_NX) - pmd_val(new) |= _SEGMENT_ENTRY_NOEXEC; + new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC)); else if (flags & SET_MEMORY_X) - pmd_val(new) &= ~_SEGMENT_ENTRY_NOEXEC; + new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC)); pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT); } @@ -208,11 +208,11 @@ static int split_pud_page(pud_t *pudp, unsigned long addr) prot &= ~_SEGMENT_ENTRY_NOEXEC; pmdp = pm_dir; for (i = 0; i < PTRS_PER_PMD; i++) { - pmd_val(*pmdp) = pmd_addr | prot; + set_pmd(pmdp, __pmd(pmd_addr | prot)); pmd_addr += PMD_SIZE; pmdp++; } - pud_val(new) = __pa(pm_dir) | _REGION3_ENTRY; + new = __pud(__pa(pm_dir) | _REGION3_ENTRY); pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); update_page_count(PG_DIRECT_MAP_1M, PTRS_PER_PMD); update_page_count(PG_DIRECT_MAP_2G, -1); @@ -229,9 +229,9 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr, else if (flags & SET_MEMORY_RW) new = pud_mkwrite(pud_mkdirty(new)); if (flags & SET_MEMORY_NX) - pud_val(new) |= _REGION_ENTRY_NOEXEC; + new = set_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC)); else if (flags & SET_MEMORY_X) - pud_val(new) &= ~_REGION_ENTRY_NOEXEC; + new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC)); pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); } @@ -347,23 +347,24 @@ static void ipte_range(pte_t *pte, unsigned long address, int nr) void __kernel_map_pages(struct page *page, int numpages, int enable) { unsigned long address; + pte_t *ptep, pte; int nr, i, j; - pte_t *pte; for (i = 0; i < numpages;) { address = (unsigned long)page_to_virt(page + i); - pte = virt_to_kpte(address); - nr = (unsigned long)pte >> ilog2(sizeof(long)); + ptep = virt_to_kpte(address); + nr = (unsigned long)ptep >> ilog2(sizeof(long)); nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1)); nr = min(numpages - i, nr); if (enable) { for (j = 0; j < nr; j++) { - pte_val(*pte) &= ~_PAGE_INVALID; + pte = clear_pte_bit(*ptep, __pgprot(_PAGE_INVALID)); + set_pte(ptep, pte); address += PAGE_SIZE; - pte++; + ptep++; } } else { - ipte_range(pte, address, nr); + ipte_range(ptep, address, nr); } i += nr; } diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index fd35c1a0213b..2de48b2c1b04 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -53,17 +53,17 @@ __initcall(page_table_register_sysctl); unsigned long *crst_table_alloc(struct mm_struct *mm) { - struct page *page = alloc_pages(GFP_KERNEL, 2); + struct page *page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); if (!page) return NULL; - arch_set_page_dat(page, 2); + arch_set_page_dat(page, CRST_ALLOC_ORDER); return (unsigned long *) page_to_virt(page); } void crst_table_free(struct mm_struct *mm, unsigned long *table) { - free_pages((unsigned long) table, 2); + free_pages((unsigned long)table, CRST_ALLOC_ORDER); } static void __crst_table_upgrade(void *arg) @@ -403,7 +403,7 @@ void __tlb_remove_table(void *_table) switch (half) { case 0x00U: /* pmd, pud, or p4d */ - free_pages((unsigned long) table, 2); + free_pages((unsigned long)table, CRST_ALLOC_ORDER); return; case 0x01U: /* lower 2K of a 4K page table */ case 0x02U: /* higher 2K of a 4K page table */ diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index c16232cd0ec5..697df02362af 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -115,7 +115,7 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm, atomic_inc(&mm->context.flush_count); if (cpumask_equal(&mm->context.cpu_attach_mask, cpumask_of(smp_processor_id()))) { - pte_val(*ptep) |= _PAGE_INVALID; + set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_INVALID))); mm->context.flush_mm = 1; } else ptep_ipte_global(mm, addr, ptep, nodat); @@ -224,15 +224,15 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) * Without enhanced suppression-on-protection force * the dirty bit on for all writable ptes. */ - pte_val(entry) |= _PAGE_DIRTY; - pte_val(entry) &= ~_PAGE_PROTECT; + entry = set_pte_bit(entry, __pgprot(_PAGE_DIRTY)); + entry = clear_pte_bit(entry, __pgprot(_PAGE_PROTECT)); } if (!(pte_val(entry) & _PAGE_PROTECT)) /* This pte allows write access, set user-dirty */ pgste_val(pgste) |= PGSTE_UC_BIT; } #endif - *ptep = entry; + set_pte(ptep, entry); return pgste; } @@ -275,12 +275,12 @@ static inline pte_t ptep_xchg_commit(struct mm_struct *mm, pgste = pgste_update_all(old, pgste, mm); if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) - pte_val(old) |= _PAGE_UNUSED; + old = set_pte_bit(old, __pgprot(_PAGE_UNUSED)); } pgste = pgste_set_pte(ptep, pgste, new); pgste_set_unlock(ptep, pgste); } else { - *ptep = new; + set_pte(ptep, new); } return old; } @@ -345,14 +345,14 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, struct mm_struct *mm = vma->vm_mm; if (!MACHINE_HAS_NX) - pte_val(pte) &= ~_PAGE_NOEXEC; + pte = clear_pte_bit(pte, __pgprot(_PAGE_NOEXEC)); if (mm_has_pgste(mm)) { pgste = pgste_get(ptep); pgste_set_key(ptep, pgste, pte, mm); pgste = pgste_set_pte(ptep, pgste, pte); pgste_set_unlock(ptep, pgste); } else { - *ptep = pte; + set_pte(ptep, pte); } preempt_enable(); } @@ -417,7 +417,7 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, atomic_inc(&mm->context.flush_count); if (cpumask_equal(&mm->context.cpu_attach_mask, cpumask_of(smp_processor_id()))) { - pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; + set_pmd(pmdp, set_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_INVALID))); mm->context.flush_mm = 1; if (mm_has_pgste(mm)) gmap_pmdp_invalidate(mm, addr); @@ -469,7 +469,7 @@ pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr, preempt_disable(); old = pmdp_flush_direct(mm, addr, pmdp); - *pmdp = new; + set_pmd(pmdp, new); preempt_enable(); return old; } @@ -482,7 +482,7 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, preempt_disable(); old = pmdp_flush_lazy(mm, addr, pmdp); - *pmdp = new; + set_pmd(pmdp, new); preempt_enable(); return old; } @@ -539,7 +539,7 @@ pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr, preempt_disable(); old = pudp_flush_direct(mm, addr, pudp); - *pudp = new; + set_pud(pudp, new); preempt_enable(); return old; } @@ -579,9 +579,9 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) list_del(lh); } ptep = (pte_t *) pgtable; - pte_val(*ptep) = _PAGE_INVALID; + set_pte(ptep, __pte(_PAGE_INVALID)); ptep++; - pte_val(*ptep) = _PAGE_INVALID; + set_pte(ptep, __pte(_PAGE_INVALID)); return pgtable; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -646,12 +646,12 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr, if (prot == PROT_NONE && !pte_i) { ptep_flush_direct(mm, addr, ptep, nodat); pgste = pgste_update_all(entry, pgste, mm); - pte_val(entry) |= _PAGE_INVALID; + entry = set_pte_bit(entry, __pgprot(_PAGE_INVALID)); } if (prot == PROT_READ && !pte_p) { ptep_flush_direct(mm, addr, ptep, nodat); - pte_val(entry) &= ~_PAGE_INVALID; - pte_val(entry) |= _PAGE_PROTECT; + entry = clear_pte_bit(entry, __pgprot(_PAGE_INVALID)); + entry = set_pte_bit(entry, __pgprot(_PAGE_PROTECT)); } pgste_val(pgste) |= bit; pgste = pgste_set_pte(ptep, pgste, entry); @@ -675,8 +675,8 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, !(pte_val(pte) & _PAGE_PROTECT))) { pgste_val(spgste) |= PGSTE_VSIE_BIT; tpgste = pgste_get_lock(tptep); - pte_val(tpte) = (pte_val(spte) & PAGE_MASK) | - (pte_val(pte) & _PAGE_PROTECT); + tpte = __pte((pte_val(spte) & PAGE_MASK) | + (pte_val(pte) & _PAGE_PROTECT)); /* don't touch the storage key - it belongs to parent pgste */ tpgste = pgste_set_pte(tptep, tpgste, tpte); pgste_set_unlock(tptep, tpgste); @@ -773,10 +773,10 @@ bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr, nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); ptep_ipte_global(mm, addr, ptep, nodat); if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) - pte_val(pte) |= _PAGE_PROTECT; + pte = set_pte_bit(pte, __pgprot(_PAGE_PROTECT)); else - pte_val(pte) |= _PAGE_INVALID; - *ptep = pte; + pte = set_pte_bit(pte, __pgprot(_PAGE_INVALID)); + set_pte(ptep, pte); } pgste_set_unlock(ptep, pgste); return dirty; diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 7d9705eeb02f..c2583f921ca8 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2006 - * Author(s): Heiko Carstens */ #include @@ -175,9 +174,9 @@ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr, if (!new_page) goto out; - pte_val(*pte) = __pa(new_page) | prot; + set_pte(pte, __pte(__pa(new_page) | prot)); } else { - pte_val(*pte) = __pa(addr) | prot; + set_pte(pte, __pte(__pa(addr) | prot)); } } else { continue; @@ -243,7 +242,7 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, IS_ALIGNED(next, PMD_SIZE) && MACHINE_HAS_EDAT1 && addr && direct && !debug_pagealloc_enabled()) { - pmd_val(*pmd) = __pa(addr) | prot; + set_pmd(pmd, __pmd(__pa(addr) | prot)); pages++; continue; } else if (!direct && MACHINE_HAS_EDAT1) { @@ -258,7 +257,7 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, */ new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE); if (new_page) { - pmd_val(*pmd) = __pa(new_page) | prot; + set_pmd(pmd, __pmd(__pa(new_page) | prot)); if (!IS_ALIGNED(addr, PMD_SIZE) || !IS_ALIGNED(next, PMD_SIZE)) { vmemmap_use_new_sub_pmd(addr, next); @@ -339,7 +338,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, IS_ALIGNED(next, PUD_SIZE) && MACHINE_HAS_EDAT2 && addr && direct && !debug_pagealloc_enabled()) { - pud_val(*pud) = __pa(addr) | prot; + set_pud(pud, __pud(__pa(addr) | prot)); pages++; continue; } @@ -585,13 +584,9 @@ void __init vmem_map_init(void) __set_memory(__stext_amode31, (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT, SET_MEMORY_RO | SET_MEMORY_X); - if (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)) { - /* - * Lowcore must be executable for LPSWE - * and expoline trampoline branch instructions. - */ + /* lowcore must be executable for LPSWE */ + if (!static_key_enabled(&cpu_has_bear)) set_memory_x(0, 1); - } pr_info("Write protected kernel read-only data: %luk\n", (unsigned long)(__end_rodata - _stext) >> 10); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 9ff2bd83aad7..aede9a3ca3f7 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -7,7 +7,6 @@ * - HAVE_MARCH_Z196_FEATURES: laal, laalg * - HAVE_MARCH_Z10_FEATURES: msfi, cgrj, clgrj * - HAVE_MARCH_Z9_109_FEATURES: alfi, llilf, clfi, oilf, nilf - * - PACK_STACK * - 64BIT * * Copyright IBM Corp. 2012,2015 @@ -26,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -570,15 +570,8 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) if (nospec_uses_trampoline()) { jit->r14_thunk_ip = jit->prg; /* Generate __s390_indirect_jump_r14 thunk */ - if (test_facility(35)) { - /* exrl %r0,.+10 */ - EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10); - } else { - /* larl %r1,.+14 */ - EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14); - /* ex 0,0(%r1) */ - EMIT4_DISP(0x44000000, REG_0, REG_1, 0); - } + /* exrl %r0,.+10 */ + EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10); /* j . */ EMIT4_PCREL(0xa7f40000, 0); } @@ -589,20 +582,12 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) (is_first_pass(jit) || (jit->seen & SEEN_FUNC))) { jit->r1_thunk_ip = jit->prg; /* Generate __s390_indirect_jump_r1 thunk */ - if (test_facility(35)) { - /* exrl %r0,.+10 */ - EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10); - /* j . */ - EMIT4_PCREL(0xa7f40000, 0); - /* br %r1 */ - _EMIT2(0x07f1); - } else { - /* ex 0,S390_lowcore.br_r1_tampoline */ - EMIT4_DISP(0x44000000, REG_0, REG_0, - offsetof(struct lowcore, br_r1_trampoline)); - /* j . */ - EMIT4_PCREL(0xa7f40000, 0); - } + /* exrl %r0,.+10 */ + EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10); + /* j . */ + EMIT4_PCREL(0xa7f40000, 0); + /* br %r1 */ + _EMIT2(0x07f1); } } @@ -622,19 +607,10 @@ static int get_probe_mem_regno(const u8 *insn) return insn[1] >> 4; } -static bool ex_handler_bpf(const struct exception_table_entry *x, - struct pt_regs *regs) +bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs) { - int regno; - u8 *insn; - regs->psw.addr = extable_fixup(x); - insn = (u8 *)__rewind_psw(regs->psw, regs->int_code >> 16); - regno = get_probe_mem_regno(insn); - if (WARN_ON_ONCE(regno < 0)) - /* JIT bug - unexpected instruction. */ - return false; - regs->gprs[regno] = 0; + regs->gprs[x->data] = 0; return true; } @@ -642,16 +618,17 @@ static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp, int probe_prg, int nop_prg) { struct exception_table_entry *ex; + int reg, prg; s64 delta; u8 *insn; - int prg; int i; if (!fp->aux->extable) /* Do nothing during early JIT passes. */ return 0; insn = jit->prg_buf + probe_prg; - if (WARN_ON_ONCE(get_probe_mem_regno(insn) < 0)) + reg = get_probe_mem_regno(insn); + if (WARN_ON_ONCE(reg < 0)) /* JIT bug - unexpected probe instruction. */ return -1; if (WARN_ON_ONCE(probe_prg + insn_length(*insn) != nop_prg)) @@ -678,7 +655,8 @@ static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp, /* JIT bug - landing pad and extable must be close. */ return -1; ex->fixup = delta; - ex->handler = (u8 *)ex_handler_bpf - (u8 *)&ex->handler; + ex->type = EX_TYPE_BPF; + ex->data = reg; jit->excnt++; } return 0; diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index be077b39da33..63f3e057c168 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index 4dd58b196cea..1710d006ee93 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index 2b6062c486f5..500cd2dbdf53 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -99,7 +99,7 @@ static int zpci_clear_directed_irq(struct zpci_dev *zdev) } /* Register adapter interruptions */ -int zpci_set_irq(struct zpci_dev *zdev) +static int zpci_set_irq(struct zpci_dev *zdev) { int rc; @@ -115,7 +115,7 @@ int zpci_set_irq(struct zpci_dev *zdev) } /* Clear adapter interruptions */ -int zpci_clear_irq(struct zpci_dev *zdev) +static int zpci_clear_irq(struct zpci_dev *zdev) { int rc; diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index c5b35ea129cf..080c88620723 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include diff --git a/arch/s390/tools/gcc-thunk-extern.sh b/arch/s390/tools/gcc-thunk-extern.sh new file mode 100755 index 000000000000..20bcbf6dd7ab --- /dev/null +++ b/arch/s390/tools/gcc-thunk-extern.sh @@ -0,0 +1,24 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# Borrowed from gcc: gcc/testsuite/gcc.target/s390/nobp-section-type-conflict.c +# Checks that we don't get error: section type conflict with ‘put_page’. + +cat << "END" | $@ -x c - -fno-PIE -march=z10 -mindirect-branch=thunk-extern -mfunction-return=thunk-extern -mindirect-branch-table -O2 -c -o /dev/null +int a; +int b (void); +void c (int); + +static void +put_page (void) +{ + if (b ()) + c (a); +} + +__attribute__ ((__section__ (".init.text"), __cold__)) void +d (void) +{ + put_page (); + put_page (); +} +END diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c index 606324e56e4e..530dd941d140 100644 --- a/arch/s390/tools/gen_facilities.c +++ b/arch/s390/tools/gen_facilities.c @@ -27,24 +27,16 @@ static struct facility_def facility_defs[] = { */ .name = "FACILITIES_ALS", .bits = (int[]){ -#ifdef CONFIG_HAVE_MARCH_Z900_FEATURES 0, /* N3 instructions */ 1, /* z/Arch mode installed */ -#endif -#ifdef CONFIG_HAVE_MARCH_Z990_FEATURES 18, /* long displacement facility */ -#endif -#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES 21, /* extended-immediate facility */ 25, /* store clock fast */ -#endif -#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES 27, /* mvcos */ 32, /* compare and swap and store */ 33, /* compare and swap and store 2 */ 34, /* general instructions extension */ 35, /* execute extensions */ -#endif #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES 45, /* fast-BCR, etc. */ #endif diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 95bdbaaa5ae6..f46c7d2d439c 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -21,6 +21,9 @@ config XTENSA select DMA_REMAP if MMU select GENERIC_ATOMIC64 select GENERIC_IRQ_SHOW + select GENERIC_LIB_CMPDI2 + select GENERIC_LIB_MULDI3 + select GENERIC_LIB_UCMPDI2 select GENERIC_PCI_IOMAP select GENERIC_SCHED_CLOCK select HAVE_ARCH_AUDITSYSCALL @@ -32,6 +35,7 @@ config XTENSA select HAVE_DMA_CONTIGUOUS select HAVE_EXIT_THREAD select HAVE_FUNCTION_TRACER + select HAVE_GCC_PLUGINS if GCC_VERSION >= 120000 select HAVE_HW_BREAKPOINT if PERF_EVENTS select HAVE_IRQ_TIME_ACCOUNTING select HAVE_PCI @@ -89,6 +93,9 @@ config CPU_BIG_ENDIAN config CPU_LITTLE_ENDIAN def_bool !CPU_BIG_ENDIAN +config CC_HAVE_CALL0_ABI + def_bool $(success,test "$(shell,echo __XTENSA_CALL0_ABI__ | $(CC) -mabi=call0 -E -P - 2>/dev/null)" = 1) + menu "Processor type and features" choice @@ -221,6 +228,15 @@ config HOTPLUG_CPU Say N if you want to disable CPU hotplug. +config SECONDARY_RESET_VECTOR + bool "Secondary cores use alternative reset vector" + default y + depends on HAVE_SMP + help + Secondary cores may be configured to use alternative reset vector, + or all cores may use primary reset vector. + Say Y here to supply handler for the alternative reset location. + config FAST_SYSCALL_XTENSA bool "Enable fast atomic syscalls" default n @@ -247,6 +263,38 @@ config FAST_SYSCALL_SPILL_REGISTERS If unsure, say N. +choice + prompt "Kernel ABI" + default KERNEL_ABI_DEFAULT + help + Select ABI for the kernel code. This ABI is independent of the + supported userspace ABI and any combination of the + kernel/userspace ABI is possible and should work. + + In case both kernel and userspace support only call0 ABI + all register windows support code will be omitted from the + build. + + If unsure, choose the default ABI. + +config KERNEL_ABI_DEFAULT + bool "Default ABI" + help + Select this option to compile kernel code with the default ABI + selected for the toolchain. + Normally cores with windowed registers option use windowed ABI and + cores without it use call0 ABI. + +config KERNEL_ABI_CALL0 + bool "Call0 ABI" if CC_HAVE_CALL0_ABI + help + Select this option to compile kernel code with call0 ABI even with + toolchain that defaults to windowed ABI. + When this option is not selected the default toolchain ABI will + be used for the kernel code. + +endchoice + config USER_ABI_CALL0 bool diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile index ee2769519eaf..5097caa7bf0c 100644 --- a/arch/xtensa/Makefile +++ b/arch/xtensa/Makefile @@ -35,6 +35,10 @@ KBUILD_CFLAGS += -ffreestanding -D__linux__ KBUILD_CFLAGS += -pipe -mlongcalls -mtext-section-literals KBUILD_CFLAGS += $(call cc-option,-mforce-no-pic,) KBUILD_CFLAGS += $(call cc-option,-mno-serialize-volatile,) +ifneq ($(CONFIG_KERNEL_ABI_CALL0),) +KBUILD_CFLAGS += -mabi=call0 +KBUILD_AFLAGS += -mabi=call0 +endif KBUILD_AFLAGS += -mlongcalls -mtext-section-literals @@ -51,13 +55,9 @@ KBUILD_CPPFLAGS += $(patsubst %,-I$(srctree)/%include,$(vardirs) $(plfdirs)) KBUILD_DEFCONFIG := iss_defconfig -# Find libgcc.a - -LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) - head-y := arch/xtensa/kernel/head.o -libs-y += arch/xtensa/lib/ $(LIBGCC) +libs-y += arch/xtensa/lib/ boot := arch/xtensa/boot diff --git a/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi b/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi index 9bf8bad1dd18..c33932568aa7 100644 --- a/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi +++ b/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi @@ -8,19 +8,19 @@ reg = <0x00000000 0x08000000>; bank-width = <2>; device-width = <2>; - partition@0x0 { + partition@0 { label = "data"; reg = <0x00000000 0x06000000>; }; - partition@0x6000000 { + partition@6000000 { label = "boot loader area"; reg = <0x06000000 0x00800000>; }; - partition@0x6800000 { + partition@6800000 { label = "kernel image"; reg = <0x06800000 0x017e0000>; }; - partition@0x7fe0000 { + partition@7fe0000 { label = "boot environment"; reg = <0x07fe0000 0x00020000>; }; diff --git a/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi b/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi index 40c2f81f7cb6..7bde2ab2d6fb 100644 --- a/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi +++ b/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi @@ -8,19 +8,19 @@ reg = <0x08000000 0x01000000>; bank-width = <2>; device-width = <2>; - partition@0x0 { + partition@0 { label = "boot loader area"; reg = <0x00000000 0x00400000>; }; - partition@0x400000 { + partition@400000 { label = "kernel image"; reg = <0x00400000 0x00600000>; }; - partition@0xa00000 { + partition@a00000 { label = "data"; reg = <0x00a00000 0x005e0000>; }; - partition@0xfe0000 { + partition@fe0000 { label = "boot environment"; reg = <0x00fe0000 0x00020000>; }; diff --git a/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi b/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi index fb8d3a9f33c2..0655b868749a 100644 --- a/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi +++ b/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi @@ -8,11 +8,11 @@ reg = <0x08000000 0x00400000>; bank-width = <2>; device-width = <2>; - partition@0x0 { + partition@0 { label = "boot loader area"; reg = <0x00000000 0x003f0000>; }; - partition@0x3f0000 { + partition@3f0000 { label = "boot environment"; reg = <0x003f0000 0x00010000>; }; diff --git a/arch/xtensa/include/asm/asmmacro.h b/arch/xtensa/include/asm/asmmacro.h index 809c507d1825..e3474ca411ff 100644 --- a/arch/xtensa/include/asm/asmmacro.h +++ b/arch/xtensa/include/asm/asmmacro.h @@ -191,7 +191,39 @@ #endif .endm -#define XTENSA_STACK_ALIGNMENT 16 + .macro do_nsau cnt, val, tmp, a +#if XCHAL_HAVE_NSA + nsau \cnt, \val +#else + mov \a, \val + movi \cnt, 0 + extui \tmp, \a, 16, 16 + bnez \tmp, 0f + movi \cnt, 16 + slli \a, \a, 16 +0: + extui \tmp, \a, 24, 8 + bnez \tmp, 1f + addi \cnt, \cnt, 8 + slli \a, \a, 8 +1: + movi \tmp, __nsau_data + extui \a, \a, 24, 8 + add \tmp, \tmp, \a + l8ui \tmp, \tmp, 0 + add \cnt, \cnt, \tmp +#endif /* !XCHAL_HAVE_NSA */ + .endm + + .macro do_abs dst, src, tmp +#if XCHAL_HAVE_ABS + abs \dst, \src +#else + neg \tmp, \src + movgez \tmp, \src, \src + mov \dst, \tmp +#endif + .endm #if defined(__XTENSA_WINDOWED_ABI__) diff --git a/arch/xtensa/include/asm/core.h b/arch/xtensa/include/asm/core.h index 9138077e567d..f856d2bcb9f3 100644 --- a/arch/xtensa/include/asm/core.h +++ b/arch/xtensa/include/asm/core.h @@ -37,4 +37,11 @@ #endif #endif +/* Xtensa ABI requires stack alignment to be at least 16 */ +#if XCHAL_DATA_WIDTH > 16 +#define XTENSA_STACK_ALIGNMENT XCHAL_DATA_WIDTH +#else +#define XTENSA_STACK_ALIGNMENT 16 +#endif + #endif diff --git a/arch/xtensa/include/asm/pci-bridge.h b/arch/xtensa/include/asm/pci-bridge.h index 405526912d9a..e320aa5bbedb 100644 --- a/arch/xtensa/include/asm/pci-bridge.h +++ b/arch/xtensa/include/asm/pci-bridge.h @@ -73,13 +73,4 @@ static inline void pcibios_init_resource(struct resource *res, res->child = NULL; } - -/* These are used for config access before all the PCI probing has been done. */ -int early_read_config_byte(struct pci_controller*, int, int, int, u8*); -int early_read_config_word(struct pci_controller*, int, int, int, u16*); -int early_read_config_dword(struct pci_controller*, int, int, int, u32*); -int early_write_config_byte(struct pci_controller*, int, int, int, u8); -int early_write_config_word(struct pci_controller*, int, int, int, u16); -int early_write_config_dword(struct pci_controller*, int, int, int, u32); - #endif /* _XTENSA_PCI_BRIDGE_H */ diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index 8da562f5da73..0a91376131c5 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -412,6 +412,10 @@ extern void update_mmu_cache(struct vm_area_struct * vma, typedef pte_t *pte_addr_t; +void update_mmu_tlb(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep); +#define __HAVE_ARCH_UPDATE_MMU_TLB + #endif /* !defined (__ASSEMBLY__) */ #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index abad7c3df46f..4489a27d527a 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -18,11 +18,7 @@ #include #include -/* Xtensa ABI requires stack alignment to be at least 16 */ - -#define STACK_ALIGN (XCHAL_DATA_WIDTH > 16 ? XCHAL_DATA_WIDTH : 16) - -#define ARCH_SLAB_MINALIGN STACK_ALIGN +#define ARCH_SLAB_MINALIGN XTENSA_STACK_ALIGNMENT /* * User space process size: 1 GB. @@ -239,8 +235,8 @@ extern unsigned long __get_wchan(struct task_struct *p); #define xtensa_set_sr(x, sr) \ ({ \ - unsigned int v = (unsigned int)(x); \ - __asm__ __volatile__ ("wsr %0, "__stringify(sr) :: "a"(v)); \ + __asm__ __volatile__ ("wsr %0, "__stringify(sr) :: \ + "a"((unsigned int)(x))); \ }) #define xtensa_get_sr(sr) \ diff --git a/arch/xtensa/include/asm/ptrace.h b/arch/xtensa/include/asm/ptrace.h index b109416dc07e..308f209a4740 100644 --- a/arch/xtensa/include/asm/ptrace.h +++ b/arch/xtensa/include/asm/ptrace.h @@ -44,6 +44,7 @@ #ifndef __ASSEMBLY__ #include +#include /* * This struct defines the way the registers are stored on the @@ -77,14 +78,12 @@ struct pt_regs { /* current register frame. * Note: The ESF for kernel exceptions ends after 16 registers! */ - unsigned long areg[16]; + unsigned long areg[XCHAL_NUM_AREGS]; }; -#include - # define arch_has_single_step() (1) # define task_pt_regs(tsk) ((struct pt_regs*) \ - (task_stack_page(tsk) + KERNEL_STACK_SIZE - (XCHAL_NUM_AREGS-16)*4) - 1) + (task_stack_page(tsk) + KERNEL_STACK_SIZE) - 1) # define user_mode(regs) (((regs)->ps & 0x00000020)!=0) # define instruction_pointer(regs) ((regs)->pc) # define return_pointer(regs) (MAKE_PC_FROM_RA((regs)->areg[0], \ diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile index d4082c6a121b..5fd6cd15e0fb 100644 --- a/arch/xtensa/kernel/Makefile +++ b/arch/xtensa/kernel/Makefile @@ -13,7 +13,8 @@ obj-$(CONFIG_MMU) += pci-dma.o obj-$(CONFIG_PCI) += pci.o obj-$(CONFIG_MODULES) += xtensa_ksyms.o module.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o -obj-$(CONFIG_SMP) += smp.o mxhead.o +obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_SECONDARY_RESET_VECTOR) += mxhead.o obj-$(CONFIG_XTENSA_VARIANT_HAVE_PERF_EVENTS) += perf_event.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_S32C1I_SELFTEST) += s32c1i_selftest.o diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c index f1fd1390d069..37278e2785fb 100644 --- a/arch/xtensa/kernel/asm-offsets.c +++ b/arch/xtensa/kernel/asm-offsets.c @@ -63,7 +63,7 @@ int main(void) DEFINE(PT_AREG15, offsetof (struct pt_regs, areg[15])); DEFINE(PT_WINDOWBASE, offsetof (struct pt_regs, windowbase)); DEFINE(PT_WINDOWSTART, offsetof(struct pt_regs, windowstart)); - DEFINE(PT_SIZE, sizeof(struct pt_regs)); + DEFINE(PT_KERNEL_SIZE, offsetof(struct pt_regs, areg[16])); DEFINE(PT_AREG_END, offsetof (struct pt_regs, areg[XCHAL_NUM_AREGS])); DEFINE(PT_USER_SIZE, offsetof(struct pt_regs, areg[XCHAL_NUM_AREGS])); DEFINE(PT_XTREGS_OPT, offsetof(struct pt_regs, xtregs_opt)); diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index a1029a5b6a1d..6b6eff658795 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -341,8 +341,8 @@ KABI_W _bbsi.l a2, 3, 1f /* Copy spill slots of a0 and a1 to imitate movsp * in order to keep exception stack continuous */ - l32i a3, a1, PT_SIZE - l32i a0, a1, PT_SIZE + 4 + l32i a3, a1, PT_KERNEL_SIZE + l32i a0, a1, PT_KERNEL_SIZE + 4 s32e a3, a1, -16 s32e a0, a1, -12 #endif @@ -488,11 +488,12 @@ KABI_W or a3, a3, a2 common_exception_return: #if XTENSA_FAKE_NMI - l32i a2, a1, PT_EXCCAUSE - movi a3, EXCCAUSE_MAPPED_NMI - beq a2, a3, .LNMIexit + l32i abi_tmp0, a1, PT_EXCCAUSE + movi abi_tmp1, EXCCAUSE_MAPPED_NMI + l32i abi_saved1, a1, PT_PS + beq abi_tmp0, abi_tmp1, .Lrestore_state #endif -1: +.Ltif_loop: irq_save a2, a3 #ifdef CONFIG_TRACE_IRQFLAGS abi_call trace_hardirqs_off @@ -503,7 +504,7 @@ common_exception_return: l32i abi_saved1, a1, PT_PS GET_THREAD_INFO(a2, a1) l32i a4, a2, TI_FLAGS - _bbci.l abi_saved1, PS_UM_BIT, 6f + _bbci.l abi_saved1, PS_UM_BIT, .Lexit_tif_loop_kernel /* Specific to a user exception exit: * We need to check some flags for signal handling and rescheduling, @@ -512,12 +513,12 @@ common_exception_return: * Note that we don't disable interrupts here. */ - _bbsi.l a4, TIF_NEED_RESCHED, 3f + _bbsi.l a4, TIF_NEED_RESCHED, .Lresched movi a2, _TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NOTIFY_SIGNAL - bnone a4, a2, 5f + bnone a4, a2, .Lexit_tif_loop_user -2: l32i a4, a1, PT_DEPC - bgeui a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 4f + l32i a4, a1, PT_DEPC + bgeui a4, VALID_DOUBLE_EXCEPTION_ADDRESS, .Lrestore_state /* Call do_signal() */ @@ -527,48 +528,41 @@ common_exception_return: rsil a2, 0 mov abi_arg0, a1 abi_call do_notify_resume # int do_notify_resume(struct pt_regs*) - j 1b - -3: /* Reschedule */ + j .Ltif_loop +.Lresched: #ifdef CONFIG_TRACE_IRQFLAGS abi_call trace_hardirqs_on #endif rsil a2, 0 abi_call schedule # void schedule (void) - j 1b + j .Ltif_loop +.Lexit_tif_loop_kernel: #ifdef CONFIG_PREEMPTION -6: - _bbci.l a4, TIF_NEED_RESCHED, 4f + _bbci.l a4, TIF_NEED_RESCHED, .Lrestore_state /* Check current_thread_info->preempt_count */ l32i a4, a2, TI_PRE_COUNT - bnez a4, 4f + bnez a4, .Lrestore_state abi_call preempt_schedule_irq - j 4f #endif + j .Lrestore_state -#if XTENSA_FAKE_NMI -.LNMIexit: - l32i abi_saved1, a1, PT_PS - _bbci.l abi_saved1, PS_UM_BIT, 4f -#endif - -5: +.Lexit_tif_loop_user: #ifdef CONFIG_HAVE_HW_BREAKPOINT - _bbci.l a4, TIF_DB_DISABLED, 7f + _bbci.l a4, TIF_DB_DISABLED, 1f abi_call restore_dbreak -7: +1: #endif #ifdef CONFIG_DEBUG_TLB_SANITY l32i a4, a1, PT_DEPC - bgeui a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 4f + bgeui a4, VALID_DOUBLE_EXCEPTION_ADDRESS, .Lrestore_state abi_call check_tlb_sanity #endif -6: -4: + +.Lrestore_state: #ifdef CONFIG_TRACE_IRQFLAGS extui a4, abi_saved1, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH bgei a4, LOCKLEVEL, 1f @@ -606,7 +600,7 @@ user_exception_exit: rsr a1, depc # restore stack pointer l32i a2, a1, PT_WMASK # register frames saved (in bits 4...9) rotw -1 # we restore a4..a7 - _bltui a6, 16, 1f # only have to restore current window? + _bltui a6, 16, .Lclear_regs # only have to restore current window? /* The working registers are a0 and a3. We are restoring to * a4..a7. Be careful not to destroy what we have just restored. @@ -618,18 +612,19 @@ user_exception_exit: mov a2, a6 mov a3, a5 -2: rotw -1 # a0..a3 become a4..a7 +1: rotw -1 # a0..a3 become a4..a7 addi a3, a7, -4*4 # next iteration addi a2, a6, -16 # decrementing Y in WMASK l32i a4, a3, PT_AREG_END + 0 l32i a5, a3, PT_AREG_END + 4 l32i a6, a3, PT_AREG_END + 8 l32i a7, a3, PT_AREG_END + 12 - _bgeui a2, 16, 2b + _bgeui a2, 16, 1b /* Clear unrestored registers (don't leak anything to user-land */ -1: rsr a0, windowbase +.Lclear_regs: + rsr a0, windowbase rsr a3, sar sub a3, a0, a3 beqz a3, 2f @@ -706,12 +701,12 @@ kernel_exception_exit: addi a0, a1, -16 l32i a3, a0, 0 l32i a4, a0, 4 - s32i a3, a1, PT_SIZE+0 - s32i a4, a1, PT_SIZE+4 + s32i a3, a1, PT_KERNEL_SIZE + 0 + s32i a4, a1, PT_KERNEL_SIZE + 4 l32i a3, a0, 8 l32i a4, a0, 12 - s32i a3, a1, PT_SIZE+8 - s32i a4, a1, PT_SIZE+12 + s32i a3, a1, PT_KERNEL_SIZE + 8 + s32i a4, a1, PT_KERNEL_SIZE + 12 /* Common exception exit. * We restore the special register and the current window frame, and @@ -821,7 +816,7 @@ ENTRY(debug_exception) bbsi.l a2, PS_UM_BIT, 2f # jump if user mode - addi a2, a1, -16-PT_SIZE # assume kernel stack + addi a2, a1, -16 - PT_KERNEL_SIZE # assume kernel stack 3: l32i a0, a3, DT_DEBUG_SAVE s32i a1, a2, PT_AREG1 diff --git a/arch/xtensa/kernel/jump_label.c b/arch/xtensa/kernel/jump_label.c index 61cf6497a646..0dde21e0d3de 100644 --- a/arch/xtensa/kernel/jump_label.c +++ b/arch/xtensa/kernel/jump_label.c @@ -61,7 +61,7 @@ static void patch_text(unsigned long addr, const void *data, size_t sz) .data = data, }; stop_machine_cpuslocked(patch_text_stop_machine, - &patch, NULL); + &patch, cpu_online_mask); } else { unsigned long flags; diff --git a/arch/xtensa/kernel/mxhead.S b/arch/xtensa/kernel/mxhead.S index 9f3843742726..b702c0908b1f 100644 --- a/arch/xtensa/kernel/mxhead.S +++ b/arch/xtensa/kernel/mxhead.S @@ -37,11 +37,13 @@ _SetupOCD: * xt-gdb to single step via DEBUG exceptions received directly * by ocd. */ +#if XCHAL_HAVE_WINDOWED movi a1, 1 movi a0, 0 wsr a1, windowstart wsr a0, windowbase rsync +#endif movi a1, LOCKLEVEL wsr a1, ps diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index bd80df890b1e..e8bfbca5f001 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -232,10 +232,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn, p->thread.ra = MAKE_RA_FOR_CALL( (unsigned long)ret_from_fork, 0x1); - /* This does not copy all the regs. - * In a bout of brilliance or madness, - * ARs beyond a0-a15 exist past the end of the struct. - */ *childregs = *regs; childregs->areg[1] = usp; childregs->areg[2] = 0; @@ -265,14 +261,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn, childregs->wmask = 1; childregs->windowstart = 1; childregs->windowbase = 0; - } else { - int len = childregs->wmask & ~0xf; - memcpy(&childregs->areg[XCHAL_NUM_AREGS - len/4], - ®s->areg[XCHAL_NUM_AREGS - len/4], len); } - childregs->syscall = regs->syscall; - if (clone_flags & CLONE_SETTLS) childregs->threadptr = tls; } else { diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 8db20cfb44ab..9191738f9941 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -140,7 +140,7 @@ __tagtable(BP_TAG_FDT, parse_tag_fdt); static int __init parse_tag_cmdline(const bp_tag_t* tag) { - strlcpy(command_line, (char *)(tag->data), COMMAND_LINE_SIZE); + strscpy(command_line, (char *)(tag->data), COMMAND_LINE_SIZE); return 0; } @@ -230,7 +230,7 @@ void __init early_init_devtree(void *params) of_scan_flat_dt(xtensa_dt_io_area, NULL); if (!command_line[0]) - strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); + strscpy(command_line, boot_command_line, COMMAND_LINE_SIZE); } #endif /* CONFIG_USE_OF */ @@ -260,7 +260,7 @@ void __init init_arch(bp_tag_t *bp_start) #ifdef CONFIG_CMDLINE_BOOL if (!command_line[0]) - strlcpy(command_line, default_command_line, COMMAND_LINE_SIZE); + strscpy(command_line, default_command_line, COMMAND_LINE_SIZE); #endif /* Early hook for platforms */ @@ -289,7 +289,7 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = command_line; platform_setup(cmdline_p); - strlcpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE); + strscpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE); /* Reserve some memory regions */ @@ -349,7 +349,7 @@ void __init setup_arch(char **cmdline_p) #endif /* CONFIG_VECTORS_ADDR */ -#ifdef CONFIG_SMP +#ifdef CONFIG_SECONDARY_RESET_VECTOR mem_reserve(__pa(_SecondaryResetVector_text_start), __pa(_SecondaryResetVector_text_end)); #endif diff --git a/arch/xtensa/kernel/vectors.S b/arch/xtensa/kernel/vectors.S index 407ece204e7c..1073fe4a584d 100644 --- a/arch/xtensa/kernel/vectors.S +++ b/arch/xtensa/kernel/vectors.S @@ -88,7 +88,7 @@ ENDPROC(_UserExceptionVector) * Kernel exception vector. (Exceptions with PS.UM == 0, PS.EXCM == 0) * * We get this exception when we were already in kernel space. - * We decrement the current stack pointer (kernel) by PT_SIZE and + * We decrement the current stack pointer (kernel) by PT_KERNEL_SIZE and * jump to the first-level handler associated with the exception cause. * * Note: we need to preserve space for the spill region. @@ -100,7 +100,7 @@ ENTRY(_KernelExceptionVector) xsr a3, excsave1 # save a3, and get dispatch table wsr a2, depc # save a2 - addi a2, a1, -16-PT_SIZE # adjust stack pointer + addi a2, a1, -16 - PT_KERNEL_SIZE # adjust stack pointer s32i a0, a2, PT_AREG0 # save a0 to ESF rsr a0, exccause # retrieve exception cause s32i a0, a2, PT_DEPC # mark it as a regular exception diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index eee270a039a4..965a3952c47b 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -207,7 +207,7 @@ SECTIONS RELOCATE_ENTRY(_xip_data, .data); RELOCATE_ENTRY(_xip_init_data, .init.data); #endif -#if defined(CONFIG_SMP) +#if defined(CONFIG_SECONDARY_RESET_VECTOR) RELOCATE_ENTRY(_SecondaryResetVector_text, .SecondaryResetVector.text); #endif @@ -303,7 +303,7 @@ SECTIONS #define LAST .DoubleExceptionVector.text #endif -#if defined(CONFIG_SMP) +#if defined(CONFIG_SECONDARY_RESET_VECTOR) SECTION_VECTOR4 (_SecondaryResetVector_text, .SecondaryResetVector.text, diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c index d79edbb98d2a..b0bc8897c924 100644 --- a/arch/xtensa/kernel/xtensa_ksyms.c +++ b/arch/xtensa/kernel/xtensa_ksyms.c @@ -59,32 +59,18 @@ extern long long __ashldi3(long long, int); extern long long __lshrdi3(long long, int); extern int __divsi3(int, int); extern int __modsi3(int, int); -extern long long __muldi3(long long, long long); extern int __mulsi3(int, int); extern unsigned int __udivsi3(unsigned int, unsigned int); extern unsigned int __umodsi3(unsigned int, unsigned int); -extern unsigned long long __umoddi3(unsigned long long, unsigned long long); -extern unsigned long long __udivdi3(unsigned long long, unsigned long long); -extern int __ucmpdi2(int, int); EXPORT_SYMBOL(__ashldi3); EXPORT_SYMBOL(__ashrdi3); EXPORT_SYMBOL(__lshrdi3); EXPORT_SYMBOL(__divsi3); EXPORT_SYMBOL(__modsi3); -EXPORT_SYMBOL(__muldi3); EXPORT_SYMBOL(__mulsi3); EXPORT_SYMBOL(__udivsi3); EXPORT_SYMBOL(__umodsi3); -EXPORT_SYMBOL(__udivdi3); -EXPORT_SYMBOL(__umoddi3); -EXPORT_SYMBOL(__ucmpdi2); - -void __xtensa_libgcc_window_spill(void) -{ - BUG(); -} -EXPORT_SYMBOL(__xtensa_libgcc_window_spill); unsigned int __sync_fetch_and_and_4(volatile void *p, unsigned int v) { diff --git a/arch/xtensa/lib/Makefile b/arch/xtensa/lib/Makefile index 9437ca51f18a..5848c133f7ea 100644 --- a/arch/xtensa/lib/Makefile +++ b/arch/xtensa/lib/Makefile @@ -4,5 +4,7 @@ # lib-y += memcopy.o memset.o checksum.o \ + ashldi3.o ashrdi3.o lshrdi3.o \ + divsi3.o udivsi3.o modsi3.o umodsi3.o mulsi3.o \ usercopy.o strncpy_user.o strnlen_user.o lib-$(CONFIG_PCI) += pci-auto.o diff --git a/arch/xtensa/lib/ashldi3.S b/arch/xtensa/lib/ashldi3.S new file mode 100644 index 000000000000..67fb0da9e432 --- /dev/null +++ b/arch/xtensa/lib/ashldi3.S @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */ +#include +#include +#include + +#ifdef __XTENSA_EB__ +#define uh a2 +#define ul a3 +#else +#define uh a3 +#define ul a2 +#endif /* __XTENSA_EB__ */ + +ENTRY(__ashldi3) + + abi_entry_default + ssl a4 + bgei a4, 32, .Llow_only + src uh, uh, ul + sll ul, ul + abi_ret_default + +.Llow_only: + sll uh, ul + movi ul, 0 + abi_ret_default + +ENDPROC(__ashldi3) diff --git a/arch/xtensa/lib/ashrdi3.S b/arch/xtensa/lib/ashrdi3.S new file mode 100644 index 000000000000..cbf052c512cc --- /dev/null +++ b/arch/xtensa/lib/ashrdi3.S @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */ +#include +#include +#include + +#ifdef __XTENSA_EB__ +#define uh a2 +#define ul a3 +#else +#define uh a3 +#define ul a2 +#endif /* __XTENSA_EB__ */ + +ENTRY(__ashrdi3) + + abi_entry_default + ssr a4 + bgei a4, 32, .Lhigh_only + src ul, uh, ul + sra uh, uh + abi_ret_default + +.Lhigh_only: + sra ul, uh + srai uh, uh, 31 + abi_ret_default + +ENDPROC(__ashrdi3) diff --git a/arch/xtensa/lib/divsi3.S b/arch/xtensa/lib/divsi3.S new file mode 100644 index 000000000000..b044b4744a8b --- /dev/null +++ b/arch/xtensa/lib/divsi3.S @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */ +#include +#include +#include + +ENTRY(__divsi3) + + abi_entry_default +#if XCHAL_HAVE_DIV32 + quos a2, a2, a3 +#else + xor a7, a2, a3 /* sign = dividend ^ divisor */ + do_abs a6, a2, a4 /* udividend = abs (dividend) */ + do_abs a3, a3, a4 /* udivisor = abs (divisor) */ + bltui a3, 2, .Lle_one /* check if udivisor <= 1 */ + do_nsau a5, a6, a2, a8 /* udividend_shift = nsau (udividend) */ + do_nsau a4, a3, a2, a8 /* udivisor_shift = nsau (udivisor) */ + bgeu a5, a4, .Lspecial + + sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */ + ssl a4 + sll a3, a3 /* udivisor <<= count */ + movi a2, 0 /* quotient = 0 */ + + /* test-subtract-and-shift loop; one quotient bit on each iteration */ +#if XCHAL_HAVE_LOOPS + loopnez a4, .Lloopend +#endif /* XCHAL_HAVE_LOOPS */ +.Lloop: + bltu a6, a3, .Lzerobit + sub a6, a6, a3 + addi a2, a2, 1 +.Lzerobit: + slli a2, a2, 1 + srli a3, a3, 1 +#if !XCHAL_HAVE_LOOPS + addi a4, a4, -1 + bnez a4, .Lloop +#endif /* !XCHAL_HAVE_LOOPS */ +.Lloopend: + + bltu a6, a3, .Lreturn + addi a2, a2, 1 /* increment if udividend >= udivisor */ +.Lreturn: + neg a5, a2 + movltz a2, a5, a7 /* return (sign < 0) ? -quotient : quotient */ + abi_ret_default + +.Lle_one: + beqz a3, .Lerror + neg a2, a6 /* if udivisor == 1, then return... */ + movgez a2, a6, a7 /* (sign < 0) ? -udividend : udividend */ + abi_ret_default + +.Lspecial: + bltu a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */ + movi a2, 1 + movi a4, -1 + movltz a2, a4, a7 /* else return (sign < 0) ? -1 : 1 */ + abi_ret_default + +.Lerror: + /* Divide by zero: Use an illegal instruction to force an exception. + The subsequent "DIV0" string can be recognized by the exception + handler to identify the real cause of the exception. */ + ill + .ascii "DIV0" + +.Lreturn0: + movi a2, 0 +#endif /* XCHAL_HAVE_DIV32 */ + abi_ret_default + +ENDPROC(__divsi3) diff --git a/arch/xtensa/lib/lshrdi3.S b/arch/xtensa/lib/lshrdi3.S new file mode 100644 index 000000000000..129ef8d1725b --- /dev/null +++ b/arch/xtensa/lib/lshrdi3.S @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */ +#include +#include +#include + +#ifdef __XTENSA_EB__ +#define uh a2 +#define ul a3 +#else +#define uh a3 +#define ul a2 +#endif /* __XTENSA_EB__ */ + +ENTRY(__lshrdi3) + + abi_entry_default + ssr a4 + bgei a4, 32, .Lhigh_only + src ul, uh, ul + srl uh, uh + abi_ret_default + +.Lhigh_only: + srl ul, uh + movi uh, 0 + abi_ret_default + +ENDPROC(__lshrdi3) diff --git a/arch/xtensa/lib/modsi3.S b/arch/xtensa/lib/modsi3.S new file mode 100644 index 000000000000..d00e77181e20 --- /dev/null +++ b/arch/xtensa/lib/modsi3.S @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */ +#include +#include +#include + +ENTRY(__modsi3) + + abi_entry_default +#if XCHAL_HAVE_DIV32 + rems a2, a2, a3 +#else + mov a7, a2 /* save original (signed) dividend */ + do_abs a2, a2, a4 /* udividend = abs (dividend) */ + do_abs a3, a3, a4 /* udivisor = abs (divisor) */ + bltui a3, 2, .Lle_one /* check if udivisor <= 1 */ + do_nsau a5, a2, a6, a8 /* udividend_shift = nsau (udividend) */ + do_nsau a4, a3, a6, a8 /* udivisor_shift = nsau (udivisor) */ + bgeu a5, a4, .Lspecial + + sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */ + ssl a4 + sll a3, a3 /* udivisor <<= count */ + + /* test-subtract-and-shift loop */ +#if XCHAL_HAVE_LOOPS + loopnez a4, .Lloopend +#endif /* XCHAL_HAVE_LOOPS */ +.Lloop: + bltu a2, a3, .Lzerobit + sub a2, a2, a3 +.Lzerobit: + srli a3, a3, 1 +#if !XCHAL_HAVE_LOOPS + addi a4, a4, -1 + bnez a4, .Lloop +#endif /* !XCHAL_HAVE_LOOPS */ +.Lloopend: + +.Lspecial: + bltu a2, a3, .Lreturn + sub a2, a2, a3 /* subtract again if udividend >= udivisor */ +.Lreturn: + bgez a7, .Lpositive + neg a2, a2 /* if (dividend < 0), return -udividend */ +.Lpositive: + abi_ret_default + +.Lle_one: + bnez a3, .Lreturn0 + + /* Divide by zero: Use an illegal instruction to force an exception. + The subsequent "DIV0" string can be recognized by the exception + handler to identify the real cause of the exception. */ + ill + .ascii "DIV0" + +.Lreturn0: + movi a2, 0 +#endif /* XCHAL_HAVE_DIV32 */ + abi_ret_default + +ENDPROC(__modsi3) + +#if !XCHAL_HAVE_NSA + .section .rodata + .align 4 + .global __nsau_data + .type __nsau_data, @object +__nsau_data: + .byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4 + .byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 + .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 + .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 + .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .size __nsau_data, . - __nsau_data +#endif /* !XCHAL_HAVE_NSA */ diff --git a/arch/xtensa/lib/mulsi3.S b/arch/xtensa/lib/mulsi3.S new file mode 100644 index 000000000000..91a9d7c62f96 --- /dev/null +++ b/arch/xtensa/lib/mulsi3.S @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */ +#include +#include +#include + + .macro do_addx2 dst, as, at, tmp +#if XCHAL_HAVE_ADDX + addx2 \dst, \as, \at +#else + slli \tmp, \as, 1 + add \dst, \tmp, \at +#endif + .endm + + .macro do_addx4 dst, as, at, tmp +#if XCHAL_HAVE_ADDX + addx4 \dst, \as, \at +#else + slli \tmp, \as, 2 + add \dst, \tmp, \at +#endif + .endm + + .macro do_addx8 dst, as, at, tmp +#if XCHAL_HAVE_ADDX + addx8 \dst, \as, \at +#else + slli \tmp, \as, 3 + add \dst, \tmp, \at +#endif + .endm + +ENTRY(__mulsi3) + + abi_entry_default + +#if XCHAL_HAVE_MUL32 + mull a2, a2, a3 + +#elif XCHAL_HAVE_MUL16 + or a4, a2, a3 + srai a4, a4, 16 + bnez a4, .LMUL16 + mul16u a2, a2, a3 + abi_ret_default +.LMUL16: + srai a4, a2, 16 + srai a5, a3, 16 + mul16u a7, a4, a3 + mul16u a6, a5, a2 + mul16u a4, a2, a3 + add a7, a7, a6 + slli a7, a7, 16 + add a2, a7, a4 + +#elif XCHAL_HAVE_MAC16 + mul.aa.hl a2, a3 + mula.aa.lh a2, a3 + rsr a5, ACCLO + umul.aa.ll a2, a3 + rsr a4, ACCLO + slli a5, a5, 16 + add a2, a4, a5 + +#else /* !MUL32 && !MUL16 && !MAC16 */ + + /* Multiply one bit at a time, but unroll the loop 4x to better + exploit the addx instructions and avoid overhead. + Peel the first iteration to save a cycle on init. */ + + /* Avoid negative numbers. */ + xor a5, a2, a3 /* Top bit is 1 if one input is negative. */ + do_abs a3, a3, a6 + do_abs a2, a2, a6 + + /* Swap so the second argument is smaller. */ + sub a7, a2, a3 + mov a4, a3 + movgez a4, a2, a7 /* a4 = max (a2, a3) */ + movltz a3, a2, a7 /* a3 = min (a2, a3) */ + + movi a2, 0 + extui a6, a3, 0, 1 + movnez a2, a4, a6 + + do_addx2 a7, a4, a2, a7 + extui a6, a3, 1, 1 + movnez a2, a7, a6 + + do_addx4 a7, a4, a2, a7 + extui a6, a3, 2, 1 + movnez a2, a7, a6 + + do_addx8 a7, a4, a2, a7 + extui a6, a3, 3, 1 + movnez a2, a7, a6 + + bgeui a3, 16, .Lmult_main_loop + neg a3, a2 + movltz a2, a3, a5 + abi_ret_default + + .align 4 +.Lmult_main_loop: + srli a3, a3, 4 + slli a4, a4, 4 + + add a7, a4, a2 + extui a6, a3, 0, 1 + movnez a2, a7, a6 + + do_addx2 a7, a4, a2, a7 + extui a6, a3, 1, 1 + movnez a2, a7, a6 + + do_addx4 a7, a4, a2, a7 + extui a6, a3, 2, 1 + movnez a2, a7, a6 + + do_addx8 a7, a4, a2, a7 + extui a6, a3, 3, 1 + movnez a2, a7, a6 + + bgeui a3, 16, .Lmult_main_loop + + neg a3, a2 + movltz a2, a3, a5 + +#endif /* !MUL32 && !MUL16 && !MAC16 */ + + abi_ret_default + +ENDPROC(__mulsi3) diff --git a/arch/xtensa/lib/udivsi3.S b/arch/xtensa/lib/udivsi3.S new file mode 100644 index 000000000000..d2477e0786cf --- /dev/null +++ b/arch/xtensa/lib/udivsi3.S @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */ +#include +#include +#include + +ENTRY(__udivsi3) + + abi_entry_default +#if XCHAL_HAVE_DIV32 + quou a2, a2, a3 +#else + bltui a3, 2, .Lle_one /* check if the divisor <= 1 */ + + mov a6, a2 /* keep dividend in a6 */ + do_nsau a5, a6, a2, a7 /* dividend_shift = nsau (dividend) */ + do_nsau a4, a3, a2, a7 /* divisor_shift = nsau (divisor) */ + bgeu a5, a4, .Lspecial + + sub a4, a4, a5 /* count = divisor_shift - dividend_shift */ + ssl a4 + sll a3, a3 /* divisor <<= count */ + movi a2, 0 /* quotient = 0 */ + + /* test-subtract-and-shift loop; one quotient bit on each iteration */ +#if XCHAL_HAVE_LOOPS + loopnez a4, .Lloopend +#endif /* XCHAL_HAVE_LOOPS */ +.Lloop: + bltu a6, a3, .Lzerobit + sub a6, a6, a3 + addi a2, a2, 1 +.Lzerobit: + slli a2, a2, 1 + srli a3, a3, 1 +#if !XCHAL_HAVE_LOOPS + addi a4, a4, -1 + bnez a4, .Lloop +#endif /* !XCHAL_HAVE_LOOPS */ +.Lloopend: + + bltu a6, a3, .Lreturn + addi a2, a2, 1 /* increment quotient if dividend >= divisor */ +.Lreturn: + abi_ret_default + +.Lle_one: + beqz a3, .Lerror /* if divisor == 1, return the dividend */ + abi_ret_default + +.Lspecial: + /* return dividend >= divisor */ + bltu a6, a3, .Lreturn0 + movi a2, 1 + abi_ret_default + +.Lerror: + /* Divide by zero: Use an illegal instruction to force an exception. + The subsequent "DIV0" string can be recognized by the exception + handler to identify the real cause of the exception. */ + ill + .ascii "DIV0" + +.Lreturn0: + movi a2, 0 +#endif /* XCHAL_HAVE_DIV32 */ + abi_ret_default + +ENDPROC(__udivsi3) diff --git a/arch/xtensa/lib/umodsi3.S b/arch/xtensa/lib/umodsi3.S new file mode 100644 index 000000000000..5f031bfa0354 --- /dev/null +++ b/arch/xtensa/lib/umodsi3.S @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */ +#include +#include +#include + +ENTRY(__umodsi3) + + abi_entry_default +#if XCHAL_HAVE_DIV32 + remu a2, a2, a3 +#else + bltui a3, 2, .Lle_one /* check if the divisor is <= 1 */ + + do_nsau a5, a2, a6, a7 /* dividend_shift = nsau (dividend) */ + do_nsau a4, a3, a6, a7 /* divisor_shift = nsau (divisor) */ + bgeu a5, a4, .Lspecial + + sub a4, a4, a5 /* count = divisor_shift - dividend_shift */ + ssl a4 + sll a3, a3 /* divisor <<= count */ + + /* test-subtract-and-shift loop */ +#if XCHAL_HAVE_LOOPS + loopnez a4, .Lloopend +#endif /* XCHAL_HAVE_LOOPS */ +.Lloop: + bltu a2, a3, .Lzerobit + sub a2, a2, a3 +.Lzerobit: + srli a3, a3, 1 +#if !XCHAL_HAVE_LOOPS + addi a4, a4, -1 + bnez a4, .Lloop +#endif /* !XCHAL_HAVE_LOOPS */ +.Lloopend: + +.Lspecial: + bltu a2, a3, .Lreturn + sub a2, a2, a3 /* subtract once more if dividend >= divisor */ +.Lreturn: + abi_ret_default + +.Lle_one: + bnez a3, .Lreturn0 + + /* Divide by zero: Use an illegal instruction to force an exception. + The subsequent "DIV0" string can be recognized by the exception + handler to identify the real cause of the exception. */ + ill + .ascii "DIV0" + +.Lreturn0: + movi a2, 0 +#endif /* XCHAL_HAVE_DIV32 */ + abi_ret_default + +ENDPROC(__umodsi3) diff --git a/arch/xtensa/mm/tlb.c b/arch/xtensa/mm/tlb.c index f436cf2efd8b..27a477dae232 100644 --- a/arch/xtensa/mm/tlb.c +++ b/arch/xtensa/mm/tlb.c @@ -162,6 +162,12 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) } } +void update_mmu_tlb(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) +{ + local_flush_tlb_page(vma, address); +} + #ifdef CONFIG_DEBUG_TLB_SANITY static unsigned get_pte_for_vaddr(unsigned vaddr) diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c index 9fb99d18e3c2..be3aaaad8bee 100644 --- a/arch/xtensa/platforms/iss/network.c +++ b/arch/xtensa/platforms/iss/network.c @@ -174,7 +174,7 @@ static int tuntap_open(struct iss_net_private *lp) memset(&ifr, 0, sizeof(ifr)); ifr.ifr_flags = IFF_TAP | IFF_NO_PI; - strlcpy(ifr.ifr_name, dev_name, sizeof(ifr.ifr_name)); + strscpy(ifr.ifr_name, dev_name, sizeof(ifr.ifr_name)); err = simc_ioctl(fd, TUNSETIFF, &ifr); if (err < 0) { @@ -249,7 +249,7 @@ static int tuntap_probe(struct iss_net_private *lp, int index, char *init) return 0; } - strlcpy(lp->tp.info.tuntap.dev_name, dev_name, + strscpy(lp->tp.info.tuntap.dev_name, dev_name, sizeof(lp->tp.info.tuntap.dev_name)); setup_etheraddr(dev, mac_str); diff --git a/drivers/bus/mips_cdmm.c b/drivers/bus/mips_cdmm.c index 626dedd110cb..fca0d0669aa9 100644 --- a/drivers/bus/mips_cdmm.c +++ b/drivers/bus/mips_cdmm.c @@ -351,6 +351,7 @@ phys_addr_t __weak mips_cdmm_phys_base(void) np = of_find_compatible_node(NULL, NULL, "mti,mips-cdmm"); if (np) { err = of_address_to_resource(np, 0, &res); + of_node_put(np); if (!err) return res.start; } diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 6bcdb4e6a0d1..d5de3f77d3aa 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -623,7 +623,7 @@ config S3C24XX_DMAC config TXX9_DMAC tristate "Toshiba TXx9 SoC DMA support" - depends on MACH_TX49XX || MACH_TX39XX + depends on MACH_TX49XX select DMA_ENGINE help Support the TXx9 SoC internal DMA controller. This can be diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c index d0d52c1d4aee..992cc285f2fe 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c @@ -133,7 +133,7 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev) * or equal to the system's PAGE_SIZE, with a preference if * both are equal. */ - pgsize_bitmap = tdev->iommu.domain->ops->pgsize_bitmap; + pgsize_bitmap = tdev->iommu.domain->pgsize_bitmap; if (pgsize_bitmap & PAGE_SIZE) { tdev->iommu.pgshift = PAGE_SHIFT; } else { diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index bb95edf74415..1ab31074f5b3 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -15,7 +15,6 @@ extern irqreturn_t amd_iommu_int_thread(int irq, void *data); extern irqreturn_t amd_iommu_int_handler(int irq, void *data); extern void amd_iommu_apply_erratum_63(u16 devid); extern void amd_iommu_restart_event_logging(struct amd_iommu *iommu); -extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); extern int amd_iommu_init_devices(void); extern void amd_iommu_uninit_devices(void); extern void amd_iommu_init_notifier(void); @@ -117,8 +116,7 @@ void amd_iommu_domain_clr_pt_root(struct protection_domain *domain) extern bool translation_pre_enabled(struct amd_iommu *iommu); -extern bool amd_iommu_is_attach_deferred(struct iommu_domain *domain, - struct device *dev); +extern bool amd_iommu_is_attach_deferred(struct device *dev); extern int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line); diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 7bfe37e52e21..b4a798c7b347 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -671,7 +671,7 @@ void amd_iommu_restart_event_logging(struct amd_iommu *iommu) * This function resets the command buffer if the IOMMU stopped fetching * commands from it. */ -void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu) +static void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu) { iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); @@ -990,6 +990,7 @@ static bool copy_device_table(void) get_order(dev_table_size)); if (old_dev_tbl_cpy == NULL) { pr_err("Failed to allocate memory for copying old device table!\n"); + memunmap(old_devtb); return false; } @@ -1020,6 +1021,7 @@ static bool copy_device_table(void) if ((int_ctl != DTE_IRQ_REMAP_INTCTL) || (int_tab_len != DTE_INTTABLEN)) { pr_err("Wrong old irq remapping flag: %#x\n", devid); + memunmap(old_devtb); return false; } @@ -1953,9 +1955,11 @@ static int __init amd_iommu_init_pci(void) for_each_iommu(iommu) { ret = iommu_init_pci(iommu); - if (ret) - break; - + if (ret) { + pr_err("IOMMU%d: Failed to initialize IOMMU Hardware (error=%d)!\n", + iommu->index, ret); + goto out; + } /* Need to setup range after PCI init */ iommu_set_cwwb_range(iommu); } @@ -1971,6 +1975,11 @@ static int __init amd_iommu_init_pci(void) * active. */ ret = amd_iommu_init_api(); + if (ret) { + pr_err("IOMMU: Failed to initialize IOMMU-API interface (error=%d)!\n", + ret); + goto out; + } init_device_table_dma(); @@ -1980,6 +1989,7 @@ static int __init amd_iommu_init_pci(void) if (!ret) print_iommu_info(); +out: return ret; } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index a18b549951bb..a1ada7bff44e 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2221,8 +2221,7 @@ static void amd_iommu_get_resv_regions(struct device *dev, list_add_tail(®ion->list, head); } -bool amd_iommu_is_attach_deferred(struct iommu_domain *domain, - struct device *dev) +bool amd_iommu_is_attach_deferred(struct device *dev) { struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); @@ -2275,13 +2274,6 @@ static int amd_iommu_def_domain_type(struct device *dev) const struct iommu_ops amd_iommu_ops = { .capable = amd_iommu_capable, .domain_alloc = amd_iommu_domain_alloc, - .domain_free = amd_iommu_domain_free, - .attach_dev = amd_iommu_attach_device, - .detach_dev = amd_iommu_detach_device, - .map = amd_iommu_map, - .iotlb_sync_map = amd_iommu_iotlb_sync_map, - .unmap = amd_iommu_unmap, - .iova_to_phys = amd_iommu_iova_to_phys, .probe_device = amd_iommu_probe_device, .release_device = amd_iommu_release_device, .probe_finalize = amd_iommu_probe_finalize, @@ -2290,9 +2282,18 @@ const struct iommu_ops amd_iommu_ops = { .put_resv_regions = generic_iommu_put_resv_regions, .is_attach_deferred = amd_iommu_is_attach_deferred, .pgsize_bitmap = AMD_IOMMU_PGSIZES, - .flush_iotlb_all = amd_iommu_flush_iotlb_all, - .iotlb_sync = amd_iommu_iotlb_sync, .def_domain_type = amd_iommu_def_domain_type, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = amd_iommu_attach_device, + .detach_dev = amd_iommu_detach_device, + .map = amd_iommu_map, + .unmap = amd_iommu_unmap, + .iotlb_sync_map = amd_iommu_iotlb_sync_map, + .iova_to_phys = amd_iommu_iova_to_phys, + .flush_iotlb_all = amd_iommu_flush_iotlb_all, + .iotlb_sync = amd_iommu_iotlb_sync, + .free = amd_iommu_domain_free, + } }; /***************************************************************************** diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c index 58da08cc3d01..e56b137ceabd 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c @@ -24,7 +24,6 @@ MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Joerg Roedel "); -#define MAX_DEVICES 0x10000 #define PRI_QUEUE_SIZE 512 struct pri_queue { @@ -71,7 +70,6 @@ struct fault { struct pasid_state *state; struct mm_struct *mm; u64 address; - u16 devid; u32 pasid; u16 tag; u16 finish; @@ -125,6 +123,15 @@ static void free_device_state(struct device_state *dev_state) { struct iommu_group *group; + /* Get rid of any remaining pasid states */ + free_pasid_states(dev_state); + + /* + * Wait until the last reference is dropped before freeing + * the device state. + */ + wait_event(dev_state->wq, !atomic_read(&dev_state->count)); + /* * First detach device from domain - No more PRI requests will arrive * from that device after it is unbound from the IOMMUv2 domain. @@ -537,7 +544,7 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) ret = NOTIFY_DONE; /* In kdump kernel pci dev is not initialized yet -> send INVALID */ - if (amd_iommu_is_attach_deferred(NULL, &pdev->dev)) { + if (amd_iommu_is_attach_deferred(&pdev->dev)) { amd_iommu_complete_ppr(pdev, iommu_fault->pasid, PPR_INVALID, tag); goto out; @@ -850,15 +857,7 @@ void amd_iommu_free_device(struct pci_dev *pdev) spin_unlock_irqrestore(&state_lock, flags); - /* Get rid of any remaining pasid states */ - free_pasid_states(dev_state); - put_device_state(dev_state); - /* - * Wait until the last reference is dropped before freeing - * the device state. - */ - wait_event(dev_state->wq, !atomic_read(&dev_state->count)); free_device_state(dev_state); } EXPORT_SYMBOL(amd_iommu_free_device); @@ -955,8 +954,8 @@ out: static void __exit amd_iommu_v2_exit(void) { - struct device_state *dev_state; - int i; + struct device_state *dev_state, *next; + unsigned long flags; if (!amd_iommu_v2_supported()) return; @@ -969,18 +968,18 @@ static void __exit amd_iommu_v2_exit(void) * The loop below might call flush_workqueue(), so call * destroy_workqueue() after it */ - for (i = 0; i < MAX_DEVICES; ++i) { - dev_state = get_device_state(i); - - if (dev_state == NULL) - continue; + spin_lock_irqsave(&state_lock, flags); + list_for_each_entry_safe(dev_state, next, &state_list, list) { WARN_ON_ONCE(1); put_device_state(dev_state); - amd_iommu_free_device(dev_state->pdev); + list_del(&dev_state->list); + free_device_state(dev_state); } + spin_unlock_irqrestore(&state_lock, flags); + destroy_workqueue(iommu_wq); } diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 565ef5598811..decafb07ad08 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -765,15 +765,6 @@ static void apple_dart_get_resv_regions(struct device *dev, static const struct iommu_ops apple_dart_iommu_ops = { .domain_alloc = apple_dart_domain_alloc, - .domain_free = apple_dart_domain_free, - .attach_dev = apple_dart_attach_dev, - .detach_dev = apple_dart_detach_dev, - .map_pages = apple_dart_map_pages, - .unmap_pages = apple_dart_unmap_pages, - .flush_iotlb_all = apple_dart_flush_iotlb_all, - .iotlb_sync = apple_dart_iotlb_sync, - .iotlb_sync_map = apple_dart_iotlb_sync_map, - .iova_to_phys = apple_dart_iova_to_phys, .probe_device = apple_dart_probe_device, .release_device = apple_dart_release_device, .device_group = apple_dart_device_group, @@ -782,6 +773,17 @@ static const struct iommu_ops apple_dart_iommu_ops = { .get_resv_regions = apple_dart_get_resv_regions, .put_resv_regions = generic_iommu_put_resv_regions, .pgsize_bitmap = -1UL, /* Restricted during dart probe */ + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = apple_dart_attach_dev, + .detach_dev = apple_dart_detach_dev, + .map_pages = apple_dart_map_pages, + .unmap_pages = apple_dart_unmap_pages, + .flush_iotlb_all = apple_dart_flush_iotlb_all, + .iotlb_sync = apple_dart_iotlb_sync, + .iotlb_sync_map = apple_dart_iotlb_sync_map, + .iova_to_phys = apple_dart_iova_to_phys, + .free = apple_dart_domain_free, + } }; static irqreturn_t apple_dart_irq(int irq, void *dev) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 6dc6d8b6b368..627a3ed5ee8f 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1558,6 +1558,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) dev_info(smmu->dev, "\t0x%016llx\n", (unsigned long long)evt[i]); + cond_resched(); } /* @@ -2841,17 +2842,9 @@ static int arm_smmu_dev_disable_feature(struct device *dev, static struct iommu_ops arm_smmu_ops = { .capable = arm_smmu_capable, .domain_alloc = arm_smmu_domain_alloc, - .domain_free = arm_smmu_domain_free, - .attach_dev = arm_smmu_attach_dev, - .map_pages = arm_smmu_map_pages, - .unmap_pages = arm_smmu_unmap_pages, - .flush_iotlb_all = arm_smmu_flush_iotlb_all, - .iotlb_sync = arm_smmu_iotlb_sync, - .iova_to_phys = arm_smmu_iova_to_phys, .probe_device = arm_smmu_probe_device, .release_device = arm_smmu_release_device, .device_group = arm_smmu_device_group, - .enable_nesting = arm_smmu_enable_nesting, .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, .put_resv_regions = generic_iommu_put_resv_regions, @@ -2865,6 +2858,16 @@ static struct iommu_ops arm_smmu_ops = { .page_response = arm_smmu_page_response, .pgsize_bitmap = -1UL, /* Restricted during device attach */ .owner = THIS_MODULE, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = arm_smmu_attach_dev, + .map_pages = arm_smmu_map_pages, + .unmap_pages = arm_smmu_unmap_pages, + .flush_iotlb_all = arm_smmu_flush_iotlb_all, + .iotlb_sync = arm_smmu_iotlb_sync, + .iova_to_phys = arm_smmu_iova_to_phys, + .enable_nesting = arm_smmu_enable_nesting, + .free = arm_smmu_domain_free, + } }; /* Probing and initialisation functions */ @@ -2911,32 +2914,20 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, return 0; } -static void arm_smmu_cmdq_free_bitmap(void *data) -{ - unsigned long *bitmap = data; - bitmap_free(bitmap); -} - static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu) { - int ret = 0; struct arm_smmu_cmdq *cmdq = &smmu->cmdq; unsigned int nents = 1 << cmdq->q.llq.max_n_shift; - atomic_long_t *bitmap; atomic_set(&cmdq->owner_prod, 0); atomic_set(&cmdq->lock, 0); - bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL); - if (!bitmap) { - dev_err(smmu->dev, "failed to allocate cmdq bitmap\n"); - ret = -ENOMEM; - } else { - cmdq->valid_map = bitmap; - devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap); - } + cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents, + GFP_KERNEL); + if (!cmdq->valid_map) + return -ENOMEM; - return ret; + return 0; } static int arm_smmu_init_queues(struct arm_smmu_device *smmu) @@ -2981,10 +2972,10 @@ static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu) { unsigned int i; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; - size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents; void *strtab = smmu->strtab_cfg.strtab; - cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL); + cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents, + sizeof(*cfg->l1_desc), GFP_KERNEL); if (!cfg->l1_desc) return -ENOMEM; diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 4bc75c4ce402..568cce590ccc 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -807,7 +807,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, * Request context fault interrupt. Do this last to avoid the * handler seeing a half-initialised domain state. */ - irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx]; + irq = smmu->irqs[cfg->irptndx]; if (smmu->impl && smmu->impl->context_fault) context_fault = smmu->impl->context_fault; @@ -858,7 +858,7 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) arm_smmu_write_context_bank(smmu, cfg->cbndx); if (cfg->irptndx != ARM_SMMU_INVALID_IRPTNDX) { - irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx]; + irq = smmu->irqs[cfg->irptndx]; devm_free_irq(smmu->dev, irq, domain); } @@ -1583,25 +1583,27 @@ static int arm_smmu_def_domain_type(struct device *dev) static struct iommu_ops arm_smmu_ops = { .capable = arm_smmu_capable, .domain_alloc = arm_smmu_domain_alloc, - .domain_free = arm_smmu_domain_free, - .attach_dev = arm_smmu_attach_dev, - .map_pages = arm_smmu_map_pages, - .unmap_pages = arm_smmu_unmap_pages, - .flush_iotlb_all = arm_smmu_flush_iotlb_all, - .iotlb_sync = arm_smmu_iotlb_sync, - .iova_to_phys = arm_smmu_iova_to_phys, .probe_device = arm_smmu_probe_device, .release_device = arm_smmu_release_device, .probe_finalize = arm_smmu_probe_finalize, .device_group = arm_smmu_device_group, - .enable_nesting = arm_smmu_enable_nesting, - .set_pgtable_quirks = arm_smmu_set_pgtable_quirks, .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, .put_resv_regions = generic_iommu_put_resv_regions, .def_domain_type = arm_smmu_def_domain_type, .pgsize_bitmap = -1UL, /* Restricted during device attach */ .owner = THIS_MODULE, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = arm_smmu_attach_dev, + .map_pages = arm_smmu_map_pages, + .unmap_pages = arm_smmu_unmap_pages, + .flush_iotlb_all = arm_smmu_flush_iotlb_all, + .iotlb_sync = arm_smmu_iotlb_sync, + .iova_to_phys = arm_smmu_iova_to_phys, + .enable_nesting = arm_smmu_enable_nesting, + .set_pgtable_quirks = arm_smmu_set_pgtable_quirks, + .free = arm_smmu_domain_free, + } }; static void arm_smmu_device_reset(struct arm_smmu_device *smmu) @@ -1951,8 +1953,8 @@ static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu) return ret; } -static int arm_smmu_device_acpi_probe(struct platform_device *pdev, - struct arm_smmu_device *smmu) +static int arm_smmu_device_acpi_probe(struct arm_smmu_device *smmu, + u32 *global_irqs, u32 *pmu_irqs) { struct device *dev = smmu->dev; struct acpi_iort_node *node = @@ -1968,7 +1970,8 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev, return ret; /* Ignore the configuration access interrupt */ - smmu->num_global_irqs = 1; + *global_irqs = 1; + *pmu_irqs = 0; if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK) smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK; @@ -1976,25 +1979,24 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev, return 0; } #else -static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev, - struct arm_smmu_device *smmu) +static inline int arm_smmu_device_acpi_probe(struct arm_smmu_device *smmu, + u32 *global_irqs, u32 *pmu_irqs) { return -ENODEV; } #endif -static int arm_smmu_device_dt_probe(struct platform_device *pdev, - struct arm_smmu_device *smmu) +static int arm_smmu_device_dt_probe(struct arm_smmu_device *smmu, + u32 *global_irqs, u32 *pmu_irqs) { const struct arm_smmu_match_data *data; - struct device *dev = &pdev->dev; + struct device *dev = smmu->dev; bool legacy_binding; - if (of_property_read_u32(dev->of_node, "#global-interrupts", - &smmu->num_global_irqs)) { - dev_err(dev, "missing #global-interrupts property\n"); - return -ENODEV; - } + if (of_property_read_u32(dev->of_node, "#global-interrupts", global_irqs)) + return dev_err_probe(dev, -ENODEV, + "missing #global-interrupts property\n"); + *pmu_irqs = 0; data = of_device_get_match_data(dev); smmu->version = data->version; @@ -2073,6 +2075,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev) struct arm_smmu_device *smmu; struct device *dev = &pdev->dev; int num_irqs, i, err; + u32 global_irqs, pmu_irqs; irqreturn_t (*global_fault)(int irq, void *dev); smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL); @@ -2083,10 +2086,9 @@ static int arm_smmu_device_probe(struct platform_device *pdev) smmu->dev = dev; if (dev->of_node) - err = arm_smmu_device_dt_probe(pdev, smmu); + err = arm_smmu_device_dt_probe(smmu, &global_irqs, &pmu_irqs); else - err = arm_smmu_device_acpi_probe(pdev, smmu); - + err = arm_smmu_device_acpi_probe(smmu, &global_irqs, &pmu_irqs); if (err) return err; @@ -2105,31 +2107,25 @@ static int arm_smmu_device_probe(struct platform_device *pdev) if (IS_ERR(smmu)) return PTR_ERR(smmu); - num_irqs = 0; - while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) { - num_irqs++; - if (num_irqs > smmu->num_global_irqs) - smmu->num_context_irqs++; - } + num_irqs = platform_irq_count(pdev); - if (!smmu->num_context_irqs) { - dev_err(dev, "found %d interrupts but expected at least %d\n", - num_irqs, smmu->num_global_irqs + 1); - return -ENODEV; - } + smmu->num_context_irqs = num_irqs - global_irqs - pmu_irqs; + if (smmu->num_context_irqs <= 0) + return dev_err_probe(dev, -ENODEV, + "found %d interrupts but expected at least %d\n", + num_irqs, global_irqs + pmu_irqs + 1); - smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs), - GFP_KERNEL); - if (!smmu->irqs) { - dev_err(dev, "failed to allocate %d irqs\n", num_irqs); - return -ENOMEM; - } + smmu->irqs = devm_kcalloc(dev, smmu->num_context_irqs, + sizeof(*smmu->irqs), GFP_KERNEL); + if (!smmu->irqs) + return dev_err_probe(dev, -ENOMEM, "failed to allocate %d irqs\n", + smmu->num_context_irqs); - for (i = 0; i < num_irqs; ++i) { - int irq = platform_get_irq(pdev, i); + for (i = 0; i < smmu->num_context_irqs; i++) { + int irq = platform_get_irq(pdev, global_irqs + pmu_irqs + i); if (irq < 0) - return -ENODEV; + return irq; smmu->irqs[i] = irq; } @@ -2165,17 +2161,18 @@ static int arm_smmu_device_probe(struct platform_device *pdev) else global_fault = arm_smmu_global_fault; - for (i = 0; i < smmu->num_global_irqs; ++i) { - err = devm_request_irq(smmu->dev, smmu->irqs[i], - global_fault, - IRQF_SHARED, - "arm-smmu global fault", - smmu); - if (err) { - dev_err(dev, "failed to request global IRQ %d (%u)\n", - i, smmu->irqs[i]); - return err; - } + for (i = 0; i < global_irqs; i++) { + int irq = platform_get_irq(pdev, i); + + if (irq < 0) + return irq; + + err = devm_request_irq(dev, irq, global_fault, IRQF_SHARED, + "arm-smmu global fault", smmu); + if (err) + return dev_err_probe(dev, err, + "failed to request global IRQ %d (%u)\n", + i, irq); } err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL, diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h index 432de2f742c3..2b9b42fb6f30 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h @@ -318,11 +318,10 @@ struct arm_smmu_device { unsigned long pa_size; unsigned long pgsize_bitmap; - u32 num_global_irqs; - u32 num_context_irqs; + int num_context_irqs; + int num_clks; unsigned int *irqs; struct clk_bulk_data *clks; - int num_clks; spinlock_t global_sync_lock; diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index b91874cb6cf3..4c077c38fbd6 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -590,19 +590,21 @@ static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) static const struct iommu_ops qcom_iommu_ops = { .capable = qcom_iommu_capable, .domain_alloc = qcom_iommu_domain_alloc, - .domain_free = qcom_iommu_domain_free, - .attach_dev = qcom_iommu_attach_dev, - .detach_dev = qcom_iommu_detach_dev, - .map = qcom_iommu_map, - .unmap = qcom_iommu_unmap, - .flush_iotlb_all = qcom_iommu_flush_iotlb_all, - .iotlb_sync = qcom_iommu_iotlb_sync, - .iova_to_phys = qcom_iommu_iova_to_phys, .probe_device = qcom_iommu_probe_device, .release_device = qcom_iommu_release_device, .device_group = generic_device_group, .of_xlate = qcom_iommu_of_xlate, .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = qcom_iommu_attach_dev, + .detach_dev = qcom_iommu_detach_dev, + .map = qcom_iommu_map, + .unmap = qcom_iommu_unmap, + .flush_iotlb_all = qcom_iommu_flush_iotlb_all, + .iotlb_sync = qcom_iommu_iotlb_sync, + .iova_to_phys = qcom_iommu_iova_to_phys, + .free = qcom_iommu_domain_free, + } }; static int qcom_iommu_sec_ptbl_init(struct device *dev) @@ -827,20 +829,20 @@ static int qcom_iommu_device_probe(struct platform_device *pdev) ret = devm_of_platform_populate(dev); if (ret) { dev_err(dev, "Failed to populate iommu contexts\n"); - return ret; + goto err_pm_disable; } ret = iommu_device_sysfs_add(&qcom_iommu->iommu, dev, NULL, dev_name(dev)); if (ret) { dev_err(dev, "Failed to register iommu in sysfs\n"); - return ret; + goto err_pm_disable; } ret = iommu_device_register(&qcom_iommu->iommu, &qcom_iommu_ops, dev); if (ret) { dev_err(dev, "Failed to register iommu\n"); - return ret; + goto err_pm_disable; } bus_set_iommu(&platform_bus_type, &qcom_iommu_ops); @@ -852,6 +854,10 @@ static int qcom_iommu_device_probe(struct platform_device *pdev) } return 0; + +err_pm_disable: + pm_runtime_disable(dev); + return ret; } static int qcom_iommu_device_remove(struct platform_device *pdev) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 4c1d633ecc49..b1ea554fda63 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -526,6 +526,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, struct iommu_dma_cookie *cookie = domain->iova_cookie; unsigned long order, base_pfn; struct iova_domain *iovad; + int ret; if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE) return -EINVAL; @@ -560,6 +561,9 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, } init_iova_domain(iovad, 1UL << order, base_pfn); + ret = iova_domain_init_rcaches(iovad); + if (ret) + return ret; /* If the FQ fails we can simply fall back to strict mode */ if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain)) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 939ffa768986..71f2018e23fe 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -1309,17 +1309,19 @@ static int exynos_iommu_of_xlate(struct device *dev, static const struct iommu_ops exynos_iommu_ops = { .domain_alloc = exynos_iommu_domain_alloc, - .domain_free = exynos_iommu_domain_free, - .attach_dev = exynos_iommu_attach_device, - .detach_dev = exynos_iommu_detach_device, - .map = exynos_iommu_map, - .unmap = exynos_iommu_unmap, - .iova_to_phys = exynos_iommu_iova_to_phys, .device_group = generic_device_group, .probe_device = exynos_iommu_probe_device, .release_device = exynos_iommu_release_device, .pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE, .of_xlate = exynos_iommu_of_xlate, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = exynos_iommu_attach_device, + .detach_dev = exynos_iommu_detach_device, + .map = exynos_iommu_map, + .unmap = exynos_iommu_unmap, + .iova_to_phys = exynos_iommu_iova_to_phys, + .free = exynos_iommu_domain_free, + } }; static int __init exynos_iommu_init(void) diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index a47f47307109..69a4a62dc3b9 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -453,13 +453,15 @@ static void fsl_pamu_release_device(struct device *dev) static const struct iommu_ops fsl_pamu_ops = { .capable = fsl_pamu_capable, .domain_alloc = fsl_pamu_domain_alloc, - .domain_free = fsl_pamu_domain_free, - .attach_dev = fsl_pamu_attach_device, - .detach_dev = fsl_pamu_detach_device, - .iova_to_phys = fsl_pamu_iova_to_phys, .probe_device = fsl_pamu_probe_device, .release_device = fsl_pamu_release_device, .device_group = fsl_pamu_device_group, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = fsl_pamu_attach_device, + .detach_dev = fsl_pamu_detach_device, + .iova_to_phys = fsl_pamu_iova_to_phys, + .free = fsl_pamu_domain_free, + } }; int __init pamu_domain_init(void) diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c index 62e23ff3c987..ed796eea4581 100644 --- a/drivers/iommu/intel/debugfs.c +++ b/drivers/iommu/intel/debugfs.c @@ -344,15 +344,15 @@ static void pgtable_walk_level(struct seq_file *m, struct dma_pte *pde, static int show_device_domain_translation(struct device *dev, void *data) { - struct dmar_domain *domain = find_domain(dev); + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct dmar_domain *domain = info->domain; struct seq_file *m = data; u64 path[6] = { 0 }; if (!domain) return 0; - seq_printf(m, "Device %s with pasid %d @0x%llx\n", - dev_name(dev), domain->default_pasid, + seq_printf(m, "Device %s @0x%llx\n", dev_name(dev), (u64)virt_to_phys(domain->pgd)); seq_puts(m, "IOVA_PFN\t\tPML5E\t\t\tPML4E\t\t\tPDPE\t\t\tPDE\t\t\tPTE\n"); diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 915bff76fe96..4de960834a1b 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -66,8 +66,6 @@ static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)]; static int alloc_iommu(struct dmar_drhd_unit *drhd); static void free_iommu(struct intel_iommu *iommu); -extern const struct iommu_ops intel_iommu_ops; - static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd) { /* @@ -789,7 +787,8 @@ static int __init dmar_acpi_dev_scope_init(void) andd->device_name); continue; } - if (acpi_bus_get_device(h, &adev)) { + adev = acpi_fetch_acpi_dev(h); + if (!adev) { pr_err("Failed to get device for ACPI object %s\n", andd->device_name); continue; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 1ce1741a7fa4..df5c62ecf942 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -13,38 +13,18 @@ #define pr_fmt(fmt) "DMAR: " fmt #define dev_fmt(fmt) pr_fmt(fmt) -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include #include +#include #include #include +#include +#include +#include +#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include "../irq_remapping.h" #include "../iommu-sva-lib.h" @@ -316,14 +296,9 @@ static LIST_HEAD(dmar_satc_units); /* bitmap for indexing intel_iommus */ static int g_num_of_iommus; -static void domain_exit(struct dmar_domain *domain); static void domain_remove_dev_info(struct dmar_domain *domain); static void dmar_remove_one_dev_info(struct device *dev); static void __dmar_remove_one_dev_info(struct device_domain_info *info); -static int intel_iommu_attach_device(struct iommu_domain *domain, - struct device *dev); -static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, - dma_addr_t iova); int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON); int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON); @@ -342,21 +317,6 @@ static int iommu_skip_te_disable; int intel_iommu_gfx_mapped; EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); -#define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2)) -struct device_domain_info *get_domain_info(struct device *dev) -{ - struct device_domain_info *info; - - if (!dev) - return NULL; - - info = dev_iommu_priv_get(dev); - if (unlikely(info == DEFER_DEVICE_DOMAIN_INFO)) - return NULL; - - return info; -} - DEFINE_SPINLOCK(device_domain_lock); static LIST_HEAD(device_domain_list); @@ -452,39 +412,6 @@ static int __init intel_iommu_setup(char *str) } __setup("intel_iommu=", intel_iommu_setup); -static struct kmem_cache *iommu_domain_cache; -static struct kmem_cache *iommu_devinfo_cache; - -static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did) -{ - struct dmar_domain **domains; - int idx = did >> 8; - - domains = iommu->domains[idx]; - if (!domains) - return NULL; - - return domains[did & 0xff]; -} - -static void set_iommu_domain(struct intel_iommu *iommu, u16 did, - struct dmar_domain *domain) -{ - struct dmar_domain **domains; - int idx = did >> 8; - - if (!iommu->domains[idx]) { - size_t size = 256 * sizeof(struct dmar_domain *); - iommu->domains[idx] = kzalloc(size, GFP_ATOMIC); - } - - domains = iommu->domains[idx]; - if (WARN_ON(!domains)) - return; - else - domains[did & 0xff] = domain; -} - void *alloc_pgtable_page(int node) { struct page *page; @@ -501,26 +428,6 @@ void free_pgtable_page(void *vaddr) free_page((unsigned long)vaddr); } -static inline void *alloc_domain_mem(void) -{ - return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC); -} - -static void free_domain_mem(void *vaddr) -{ - kmem_cache_free(iommu_domain_cache, vaddr); -} - -static inline void * alloc_devinfo_mem(void) -{ - return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC); -} - -static inline void free_devinfo_mem(void *vaddr) -{ - kmem_cache_free(iommu_devinfo_cache, vaddr); -} - static inline int domain_type_is_si(struct dmar_domain *domain) { return domain->domain.type == IOMMU_DOMAIN_IDENTITY; @@ -794,11 +701,6 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, return &context[devfn]; } -static bool attach_deferred(struct device *dev) -{ - return dev_iommu_priv_get(dev) == DEFER_DEVICE_DOMAIN_INFO; -} - /** * is_downstream_to_pci_bridge - test if a device belongs to the PCI * sub-hierarchy of a candidate PCI-PCI bridge @@ -925,7 +827,7 @@ struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) } if (pdev && drhd->include_all) { - got_pdev: +got_pdev: if (bus && devfn) { *bus = pdev->bus->number; *devfn = pdev->devfn; @@ -934,7 +836,7 @@ struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) } } iommu = NULL; - out: +out: if (iommu_is_dummy(iommu, dev)) iommu = NULL; @@ -1573,18 +1475,6 @@ static void domain_update_iotlb(struct dmar_domain *domain) break; } - if (!has_iotlb_device) { - struct subdev_domain_info *sinfo; - - list_for_each_entry(sinfo, &domain->subdevices, link_domain) { - info = get_domain_info(sinfo->pdev); - if (info && info->ats_enabled) { - has_iotlb_device = true; - break; - } - } - } - domain->has_iotlb_device = has_iotlb_device; } @@ -1682,7 +1572,6 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain, { unsigned long flags; struct device_domain_info *info; - struct subdev_domain_info *sinfo; if (!domain->has_iotlb_device) return; @@ -1691,27 +1580,9 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain, list_for_each_entry(info, &domain->devices, link) __iommu_flush_dev_iotlb(info, addr, mask); - list_for_each_entry(sinfo, &domain->subdevices, link_domain) { - info = get_domain_info(sinfo->pdev); - __iommu_flush_dev_iotlb(info, addr, mask); - } spin_unlock_irqrestore(&device_domain_lock, flags); } -static void domain_flush_piotlb(struct intel_iommu *iommu, - struct dmar_domain *domain, - u64 addr, unsigned long npages, bool ih) -{ - u16 did = domain->iommu_did[iommu->seq_id]; - - if (domain->default_pasid) - qi_flush_piotlb(iommu, did, domain->default_pasid, - addr, npages, ih); - - if (!list_empty(&domain->devices)) - qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, npages, ih); -} - static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, struct dmar_domain *domain, unsigned long pfn, unsigned int pages, @@ -1727,7 +1598,7 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, ih = 1 << 6; if (domain_use_first_level(domain)) { - domain_flush_piotlb(iommu, domain, addr, pages, ih); + qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, pages, ih); } else { /* * Fallback to domain selective flush if no PSI support or @@ -1776,14 +1647,13 @@ static void intel_flush_iotlb_all(struct iommu_domain *domain) u16 did = dmar_domain->iommu_did[iommu->seq_id]; if (domain_use_first_level(dmar_domain)) - domain_flush_piotlb(iommu, dmar_domain, 0, -1, 0); + qi_flush_piotlb(iommu, did, PASID_RID2PASID, 0, -1, 0); else iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); if (!cap_caching_mode(iommu->cap)) - iommu_flush_dev_iotlb(get_iommu_domain(iommu, did), - 0, MAX_AGAW_PFN_WIDTH); + iommu_flush_dev_iotlb(dmar_domain, 0, MAX_AGAW_PFN_WIDTH); } } @@ -1846,7 +1716,6 @@ static void iommu_disable_translation(struct intel_iommu *iommu) static int iommu_init_domains(struct intel_iommu *iommu) { u32 ndomains; - size_t size; ndomains = cap_ndoms(iommu->cap); pr_debug("%s: Number of Domains supported <%d>\n", @@ -1858,24 +1727,6 @@ static int iommu_init_domains(struct intel_iommu *iommu) if (!iommu->domain_ids) return -ENOMEM; - size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **); - iommu->domains = kzalloc(size, GFP_KERNEL); - - if (iommu->domains) { - size = 256 * sizeof(struct dmar_domain *); - iommu->domains[0] = kzalloc(size, GFP_KERNEL); - } - - if (!iommu->domains || !iommu->domains[0]) { - pr_err("%s: Allocating domain array failed\n", - iommu->name); - bitmap_free(iommu->domain_ids); - kfree(iommu->domains); - iommu->domain_ids = NULL; - iommu->domains = NULL; - return -ENOMEM; - } - /* * If Caching mode is set, then invalid translations are tagged * with domain-id 0, hence we need to pre-allocate it. We also @@ -1902,7 +1753,7 @@ static void disable_dmar_iommu(struct intel_iommu *iommu) struct device_domain_info *info, *tmp; unsigned long flags; - if (!iommu->domains || !iommu->domain_ids) + if (!iommu->domain_ids) return; spin_lock_irqsave(&device_domain_lock, flags); @@ -1923,15 +1774,8 @@ static void disable_dmar_iommu(struct intel_iommu *iommu) static void free_dmar_iommu(struct intel_iommu *iommu) { - if ((iommu->domains) && (iommu->domain_ids)) { - int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8; - int i; - - for (i = 0; i < elems; i++) - kfree(iommu->domains[i]); - kfree(iommu->domains); + if (iommu->domain_ids) { bitmap_free(iommu->domain_ids); - iommu->domains = NULL; iommu->domain_ids = NULL; } @@ -1973,17 +1817,15 @@ static struct dmar_domain *alloc_domain(unsigned int type) { struct dmar_domain *domain; - domain = alloc_domain_mem(); + domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (!domain) return NULL; - memset(domain, 0, sizeof(*domain)); domain->nid = NUMA_NO_NODE; if (first_level_by_default(type)) domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL; domain->has_iotlb_device = false; INIT_LIST_HEAD(&domain->devices); - INIT_LIST_HEAD(&domain->subdevices); return domain; } @@ -2010,11 +1852,8 @@ static int domain_attach_iommu(struct dmar_domain *domain, } set_bit(num, iommu->domain_ids); - set_iommu_domain(iommu, num, domain); - domain->iommu_did[iommu->seq_id] = num; domain->nid = iommu->node; - domain_update_iommu_cap(domain); } @@ -2033,8 +1872,6 @@ static void domain_detach_iommu(struct dmar_domain *domain, if (domain->iommu_refcnt[iommu->seq_id] == 0) { num = domain->iommu_did[iommu->seq_id]; clear_bit(num, iommu->domain_ids); - set_iommu_domain(iommu, num, NULL); - domain_update_iommu_cap(domain); domain->iommu_did[iommu->seq_id] = 0; } @@ -2067,7 +1904,7 @@ static void domain_exit(struct dmar_domain *domain) put_pages_list(&freelist); } - free_domain_mem(domain); + kfree(domain); } /* @@ -2550,15 +2387,6 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 __iommu_flush_dev_iotlb(info, 0, MAX_AGAW_PFN_WIDTH); } -static inline void unlink_domain_info(struct device_domain_info *info) -{ - assert_spin_locked(&device_domain_lock); - list_del(&info->link); - list_del(&info->global); - if (info->dev) - dev_iommu_priv_set(info->dev, NULL); -} - static void domain_remove_dev_info(struct dmar_domain *domain) { struct device_domain_info *info, *tmp; @@ -2570,24 +2398,6 @@ static void domain_remove_dev_info(struct dmar_domain *domain) spin_unlock_irqrestore(&device_domain_lock, flags); } -struct dmar_domain *find_domain(struct device *dev) -{ - struct device_domain_info *info; - - if (unlikely(!dev || !dev->iommu)) - return NULL; - - if (unlikely(attach_deferred(dev))) - return NULL; - - /* No lock here, assumes no domain exit in normal case */ - info = get_domain_info(dev); - if (likely(info)) - return info->domain; - - return NULL; -} - static inline struct device_domain_info * dmar_search_domain_by_dev_info(int segment, int bus, int devfn) { @@ -2648,93 +2458,20 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, struct device *dev, struct dmar_domain *domain) { - struct dmar_domain *found = NULL; - struct device_domain_info *info; + struct device_domain_info *info = dev_iommu_priv_get(dev); unsigned long flags; int ret; - info = alloc_devinfo_mem(); - if (!info) - return NULL; - - if (!dev_is_real_dma_subdevice(dev)) { - info->bus = bus; - info->devfn = devfn; - info->segment = iommu->segment; - } else { - struct pci_dev *pdev = to_pci_dev(dev); - - info->bus = pdev->bus->number; - info->devfn = pdev->devfn; - info->segment = pci_domain_nr(pdev->bus); - } - - info->ats_supported = info->pasid_supported = info->pri_supported = 0; - info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0; - info->ats_qdep = 0; - info->dev = dev; - info->domain = domain; - info->iommu = iommu; - info->pasid_table = NULL; - info->auxd_enabled = 0; - INIT_LIST_HEAD(&info->subdevices); - - if (dev && dev_is_pci(dev)) { - struct pci_dev *pdev = to_pci_dev(info->dev); - - if (ecap_dev_iotlb_support(iommu->ecap) && - pci_ats_supported(pdev) && - dmar_find_matched_atsr_unit(pdev)) - info->ats_supported = 1; - - if (sm_supported(iommu)) { - if (pasid_supported(iommu)) { - int features = pci_pasid_features(pdev); - if (features >= 0) - info->pasid_supported = features | 1; - } - - if (info->ats_supported && ecap_prs(iommu->ecap) && - pci_pri_supported(pdev)) - info->pri_supported = 1; - } - } - spin_lock_irqsave(&device_domain_lock, flags); - if (dev) - found = find_domain(dev); - - if (!found) { - struct device_domain_info *info2; - info2 = dmar_search_domain_by_dev_info(info->segment, info->bus, - info->devfn); - if (info2) { - found = info2->domain; - info2->dev = dev; - } - } - - if (found) { - spin_unlock_irqrestore(&device_domain_lock, flags); - free_devinfo_mem(info); - /* Caller must free the original domain */ - return found; - } - + info->domain = domain; spin_lock(&iommu->lock); ret = domain_attach_iommu(domain, iommu); spin_unlock(&iommu->lock); - if (ret) { spin_unlock_irqrestore(&device_domain_lock, flags); - free_devinfo_mem(info); return NULL; } - list_add(&info->link, &domain->devices); - list_add(&info->global, &device_domain_list); - if (dev) - dev_iommu_priv_set(dev, info); spin_unlock_irqrestore(&device_domain_lock, flags); /* PASID table is mandatory for a PCI device in scalable mode. */ @@ -3460,70 +3197,6 @@ error: return ret; } -static inline int iommu_domain_cache_init(void) -{ - int ret = 0; - - iommu_domain_cache = kmem_cache_create("iommu_domain", - sizeof(struct dmar_domain), - 0, - SLAB_HWCACHE_ALIGN, - - NULL); - if (!iommu_domain_cache) { - pr_err("Couldn't create iommu_domain cache\n"); - ret = -ENOMEM; - } - - return ret; -} - -static inline int iommu_devinfo_cache_init(void) -{ - int ret = 0; - - iommu_devinfo_cache = kmem_cache_create("iommu_devinfo", - sizeof(struct device_domain_info), - 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!iommu_devinfo_cache) { - pr_err("Couldn't create devinfo cache\n"); - ret = -ENOMEM; - } - - return ret; -} - -static int __init iommu_init_mempool(void) -{ - int ret; - ret = iova_cache_get(); - if (ret) - return ret; - - ret = iommu_domain_cache_init(); - if (ret) - goto domain_error; - - ret = iommu_devinfo_cache_init(); - if (!ret) - return ret; - - kmem_cache_destroy(iommu_domain_cache); -domain_error: - iova_cache_put(); - - return -ENOMEM; -} - -static void __init iommu_exit_mempool(void) -{ - kmem_cache_destroy(iommu_devinfo_cache); - kmem_cache_destroy(iommu_domain_cache); - iova_cache_put(); -} - static void __init init_no_remapping_devices(void) { struct dmar_drhd_unit *drhd; @@ -3691,7 +3364,7 @@ static void __init init_iommu_pm_ops(void) static inline void init_iommu_pm_ops(void) {} #endif /* CONFIG_PM */ -static int rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr) +static int __init rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr) { if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) || !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) || @@ -4020,7 +3693,31 @@ static void intel_iommu_free_dmars(void) } } -int dmar_find_matched_atsr_unit(struct pci_dev *dev) +static struct dmar_satc_unit *dmar_find_matched_satc_unit(struct pci_dev *dev) +{ + struct dmar_satc_unit *satcu; + struct acpi_dmar_satc *satc; + struct device *tmp; + int i; + + dev = pci_physfn(dev); + rcu_read_lock(); + + list_for_each_entry_rcu(satcu, &dmar_satc_units, list) { + satc = container_of(satcu->hdr, struct acpi_dmar_satc, header); + if (satc->segment != pci_domain_nr(dev->bus)) + continue; + for_each_dev_scope(satcu->devices, satcu->devices_cnt, i, tmp) + if (to_pci_dev(tmp) == dev) + goto out; + } + satcu = NULL; +out: + rcu_read_unlock(); + return satcu; +} + +static int dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu) { int i, ret = 1; struct pci_bus *bus; @@ -4028,8 +3725,20 @@ int dmar_find_matched_atsr_unit(struct pci_dev *dev) struct device *tmp; struct acpi_dmar_atsr *atsr; struct dmar_atsr_unit *atsru; + struct dmar_satc_unit *satcu; dev = pci_physfn(dev); + satcu = dmar_find_matched_satc_unit(dev); + if (satcu) + /* + * This device supports ATS as it is in SATC table. + * When IOMMU is in legacy mode, enabling ATS is done + * automatically by HW for the device that requires + * ATS, hence OS should not enable this device ATS + * to avoid duplicated TLB invalidation. + */ + return !(satcu->atc_required && !sm_supported(iommu)); + for (bus = dev->bus; bus; bus = bus->parent) { bridge = bus->self; /* If it's an integrated device, allow ATS */ @@ -4375,12 +4084,6 @@ int __init intel_iommu_init(void) force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) || platform_optin_force_iommu(); - if (iommu_init_mempool()) { - if (force_on) - panic("tboot: Failed to initialize iommu memory\n"); - return -ENOMEM; - } - down_write(&dmar_global_lock); if (dmar_table_init()) { if (force_on) @@ -4501,7 +4204,6 @@ int __init intel_iommu_init(void) out_free_dmar: intel_iommu_free_dmars(); up_write(&dmar_global_lock); - iommu_exit_mempool(); return ret; } @@ -4552,13 +4254,11 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) intel_pasid_free_table(info->dev); } - unlink_domain_info(info); + list_del(&info->link); spin_lock_irqsave(&iommu->lock, flags); domain_detach_iommu(domain, iommu); spin_unlock_irqrestore(&iommu->lock, flags); - - free_devinfo_mem(info); } static void dmar_remove_one_dev_info(struct device *dev) @@ -4567,7 +4267,7 @@ static void dmar_remove_one_dev_info(struct device *dev) unsigned long flags; spin_lock_irqsave(&device_domain_lock, flags); - info = get_domain_info(dev); + info = dev_iommu_priv_get(dev); if (info) __dmar_remove_one_dev_info(info); spin_unlock_irqrestore(&device_domain_lock, flags); @@ -4637,183 +4337,6 @@ static void intel_iommu_domain_free(struct iommu_domain *domain) domain_exit(to_dmar_domain(domain)); } -/* - * Check whether a @domain could be attached to the @dev through the - * aux-domain attach/detach APIs. - */ -static inline bool -is_aux_domain(struct device *dev, struct iommu_domain *domain) -{ - struct device_domain_info *info = get_domain_info(dev); - - return info && info->auxd_enabled && - domain->type == IOMMU_DOMAIN_UNMANAGED; -} - -static inline struct subdev_domain_info * -lookup_subdev_info(struct dmar_domain *domain, struct device *dev) -{ - struct subdev_domain_info *sinfo; - - if (!list_empty(&domain->subdevices)) { - list_for_each_entry(sinfo, &domain->subdevices, link_domain) { - if (sinfo->pdev == dev) - return sinfo; - } - } - - return NULL; -} - -static int auxiliary_link_device(struct dmar_domain *domain, - struct device *dev) -{ - struct device_domain_info *info = get_domain_info(dev); - struct subdev_domain_info *sinfo = lookup_subdev_info(domain, dev); - - assert_spin_locked(&device_domain_lock); - if (WARN_ON(!info)) - return -EINVAL; - - if (!sinfo) { - sinfo = kzalloc(sizeof(*sinfo), GFP_ATOMIC); - if (!sinfo) - return -ENOMEM; - sinfo->domain = domain; - sinfo->pdev = dev; - list_add(&sinfo->link_phys, &info->subdevices); - list_add(&sinfo->link_domain, &domain->subdevices); - } - - return ++sinfo->users; -} - -static int auxiliary_unlink_device(struct dmar_domain *domain, - struct device *dev) -{ - struct device_domain_info *info = get_domain_info(dev); - struct subdev_domain_info *sinfo = lookup_subdev_info(domain, dev); - int ret; - - assert_spin_locked(&device_domain_lock); - if (WARN_ON(!info || !sinfo || sinfo->users <= 0)) - return -EINVAL; - - ret = --sinfo->users; - if (!ret) { - list_del(&sinfo->link_phys); - list_del(&sinfo->link_domain); - kfree(sinfo); - } - - return ret; -} - -static int aux_domain_add_dev(struct dmar_domain *domain, - struct device *dev) -{ - int ret; - unsigned long flags; - struct intel_iommu *iommu; - - iommu = device_to_iommu(dev, NULL, NULL); - if (!iommu) - return -ENODEV; - - if (domain->default_pasid <= 0) { - u32 pasid; - - /* No private data needed for the default pasid */ - pasid = ioasid_alloc(NULL, PASID_MIN, - pci_max_pasids(to_pci_dev(dev)) - 1, - NULL); - if (pasid == INVALID_IOASID) { - pr_err("Can't allocate default pasid\n"); - return -ENODEV; - } - domain->default_pasid = pasid; - } - - spin_lock_irqsave(&device_domain_lock, flags); - ret = auxiliary_link_device(domain, dev); - if (ret <= 0) - goto link_failed; - - /* - * Subdevices from the same physical device can be attached to the - * same domain. For such cases, only the first subdevice attachment - * needs to go through the full steps in this function. So if ret > - * 1, just goto out. - */ - if (ret > 1) - goto out; - - /* - * iommu->lock must be held to attach domain to iommu and setup the - * pasid entry for second level translation. - */ - spin_lock(&iommu->lock); - ret = domain_attach_iommu(domain, iommu); - if (ret) - goto attach_failed; - - /* Setup the PASID entry for mediated devices: */ - if (domain_use_first_level(domain)) - ret = domain_setup_first_level(iommu, domain, dev, - domain->default_pasid); - else - ret = intel_pasid_setup_second_level(iommu, domain, dev, - domain->default_pasid); - if (ret) - goto table_failed; - - spin_unlock(&iommu->lock); -out: - spin_unlock_irqrestore(&device_domain_lock, flags); - - return 0; - -table_failed: - domain_detach_iommu(domain, iommu); -attach_failed: - spin_unlock(&iommu->lock); - auxiliary_unlink_device(domain, dev); -link_failed: - spin_unlock_irqrestore(&device_domain_lock, flags); - if (list_empty(&domain->subdevices) && domain->default_pasid > 0) - ioasid_free(domain->default_pasid); - - return ret; -} - -static void aux_domain_remove_dev(struct dmar_domain *domain, - struct device *dev) -{ - struct device_domain_info *info; - struct intel_iommu *iommu; - unsigned long flags; - - if (!is_aux_domain(dev, &domain->domain)) - return; - - spin_lock_irqsave(&device_domain_lock, flags); - info = get_domain_info(dev); - iommu = info->iommu; - - if (!auxiliary_unlink_device(domain, dev)) { - spin_lock(&iommu->lock); - intel_pasid_tear_down_entry(iommu, dev, - domain->default_pasid, false); - domain_detach_iommu(domain, iommu); - spin_unlock(&iommu->lock); - } - - spin_unlock_irqrestore(&device_domain_lock, flags); - - if (list_empty(&domain->subdevices) && domain->default_pasid > 0) - ioasid_free(domain->default_pasid); -} - static int prepare_domain_attach_device(struct iommu_domain *domain, struct device *dev) { @@ -4825,13 +4348,6 @@ static int prepare_domain_attach_device(struct iommu_domain *domain, if (!iommu) return -ENODEV; - if ((dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE) && - !ecap_nest(iommu->ecap)) { - dev_err(dev, "%s: iommu not support nested translation\n", - iommu->name); - return -EINVAL; - } - /* check if this iommu agaw is sufficient for max mapped address */ addr_width = agaw_to_width(iommu->agaw); if (addr_width > cap_mgaw(iommu->cap)) @@ -4873,15 +4389,11 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, return -EPERM; } - if (is_aux_domain(dev, domain)) - return -EPERM; - /* normally dev is not mapped */ if (unlikely(domain_context_mapped(dev))) { - struct dmar_domain *old_domain; + struct device_domain_info *info = dev_iommu_priv_get(dev); - old_domain = find_domain(dev); - if (old_domain) + if (info->domain) dmar_remove_one_dev_info(dev); } @@ -4892,212 +4404,12 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, return domain_add_dev_info(to_dmar_domain(domain), dev); } -static int intel_iommu_aux_attach_device(struct iommu_domain *domain, - struct device *dev) -{ - int ret; - - if (!is_aux_domain(dev, domain)) - return -EPERM; - - ret = prepare_domain_attach_device(domain, dev); - if (ret) - return ret; - - return aux_domain_add_dev(to_dmar_domain(domain), dev); -} - static void intel_iommu_detach_device(struct iommu_domain *domain, struct device *dev) { dmar_remove_one_dev_info(dev); } -static void intel_iommu_aux_detach_device(struct iommu_domain *domain, - struct device *dev) -{ - aux_domain_remove_dev(to_dmar_domain(domain), dev); -} - -#ifdef CONFIG_INTEL_IOMMU_SVM -/* - * 2D array for converting and sanitizing IOMMU generic TLB granularity to - * VT-d granularity. Invalidation is typically included in the unmap operation - * as a result of DMA or VFIO unmap. However, for assigned devices guest - * owns the first level page tables. Invalidations of translation caches in the - * guest are trapped and passed down to the host. - * - * vIOMMU in the guest will only expose first level page tables, therefore - * we do not support IOTLB granularity for request without PASID (second level). - * - * For example, to find the VT-d granularity encoding for IOTLB - * type and page selective granularity within PASID: - * X: indexed by iommu cache type - * Y: indexed by enum iommu_inv_granularity - * [IOMMU_CACHE_INV_TYPE_IOTLB][IOMMU_INV_GRANU_ADDR] - */ - -static const int -inv_type_granu_table[IOMMU_CACHE_INV_TYPE_NR][IOMMU_INV_GRANU_NR] = { - /* - * PASID based IOTLB invalidation: PASID selective (per PASID), - * page selective (address granularity) - */ - {-EINVAL, QI_GRAN_NONG_PASID, QI_GRAN_PSI_PASID}, - /* PASID based dev TLBs */ - {-EINVAL, -EINVAL, QI_DEV_IOTLB_GRAN_PASID_SEL}, - /* PASID cache */ - {-EINVAL, -EINVAL, -EINVAL} -}; - -static inline int to_vtd_granularity(int type, int granu) -{ - return inv_type_granu_table[type][granu]; -} - -static inline u64 to_vtd_size(u64 granu_size, u64 nr_granules) -{ - u64 nr_pages = (granu_size * nr_granules) >> VTD_PAGE_SHIFT; - - /* VT-d size is encoded as 2^size of 4K pages, 0 for 4k, 9 for 2MB, etc. - * IOMMU cache invalidate API passes granu_size in bytes, and number of - * granu size in contiguous memory. - */ - return order_base_2(nr_pages); -} - -static int -intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev, - struct iommu_cache_invalidate_info *inv_info) -{ - struct dmar_domain *dmar_domain = to_dmar_domain(domain); - struct device_domain_info *info; - struct intel_iommu *iommu; - unsigned long flags; - int cache_type; - u8 bus, devfn; - u16 did, sid; - int ret = 0; - u64 size = 0; - - if (!inv_info || !dmar_domain) - return -EINVAL; - - if (!dev || !dev_is_pci(dev)) - return -ENODEV; - - iommu = device_to_iommu(dev, &bus, &devfn); - if (!iommu) - return -ENODEV; - - if (!(dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE)) - return -EINVAL; - - spin_lock_irqsave(&device_domain_lock, flags); - spin_lock(&iommu->lock); - info = get_domain_info(dev); - if (!info) { - ret = -EINVAL; - goto out_unlock; - } - did = dmar_domain->iommu_did[iommu->seq_id]; - sid = PCI_DEVID(bus, devfn); - - /* Size is only valid in address selective invalidation */ - if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) - size = to_vtd_size(inv_info->granu.addr_info.granule_size, - inv_info->granu.addr_info.nb_granules); - - for_each_set_bit(cache_type, - (unsigned long *)&inv_info->cache, - IOMMU_CACHE_INV_TYPE_NR) { - int granu = 0; - u64 pasid = 0; - u64 addr = 0; - - granu = to_vtd_granularity(cache_type, inv_info->granularity); - if (granu == -EINVAL) { - pr_err_ratelimited("Invalid cache type and granu combination %d/%d\n", - cache_type, inv_info->granularity); - break; - } - - /* - * PASID is stored in different locations based on the - * granularity. - */ - if (inv_info->granularity == IOMMU_INV_GRANU_PASID && - (inv_info->granu.pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID)) - pasid = inv_info->granu.pasid_info.pasid; - else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR && - (inv_info->granu.addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID)) - pasid = inv_info->granu.addr_info.pasid; - - switch (BIT(cache_type)) { - case IOMMU_CACHE_INV_TYPE_IOTLB: - /* HW will ignore LSB bits based on address mask */ - if (inv_info->granularity == IOMMU_INV_GRANU_ADDR && - size && - (inv_info->granu.addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) { - pr_err_ratelimited("User address not aligned, 0x%llx, size order %llu\n", - inv_info->granu.addr_info.addr, size); - } - - /* - * If granu is PASID-selective, address is ignored. - * We use npages = -1 to indicate that. - */ - qi_flush_piotlb(iommu, did, pasid, - mm_to_dma_pfn(inv_info->granu.addr_info.addr), - (granu == QI_GRAN_NONG_PASID) ? -1 : 1 << size, - inv_info->granu.addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF); - - if (!info->ats_enabled) - break; - /* - * Always flush device IOTLB if ATS is enabled. vIOMMU - * in the guest may assume IOTLB flush is inclusive, - * which is more efficient. - */ - fallthrough; - case IOMMU_CACHE_INV_TYPE_DEV_IOTLB: - /* - * PASID based device TLB invalidation does not support - * IOMMU_INV_GRANU_PASID granularity but only supports - * IOMMU_INV_GRANU_ADDR. - * The equivalent of that is we set the size to be the - * entire range of 64 bit. User only provides PASID info - * without address info. So we set addr to 0. - */ - if (inv_info->granularity == IOMMU_INV_GRANU_PASID) { - size = 64 - VTD_PAGE_SHIFT; - addr = 0; - } else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) { - addr = inv_info->granu.addr_info.addr; - } - - if (info->ats_enabled) - qi_flush_dev_iotlb_pasid(iommu, sid, - info->pfsid, pasid, - info->ats_qdep, addr, - size); - else - pr_warn_ratelimited("Passdown device IOTLB flush w/o ATS!\n"); - break; - default: - dev_err_ratelimited(dev, "Unsupported IOMMU invalidation type %d\n", - cache_type); - ret = -EINVAL; - } - } -out_unlock: - spin_unlock(&iommu->lock); - spin_unlock_irqrestore(&device_domain_lock, flags); - - return ret; -} -#endif - static int intel_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t hpa, size_t size, int iommu_prot, gfp_t gfp) @@ -5245,28 +4557,73 @@ static bool intel_iommu_capable(enum iommu_cap cap) static struct iommu_device *intel_iommu_probe_device(struct device *dev) { + struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL; + struct device_domain_info *info; struct intel_iommu *iommu; + unsigned long flags; + u8 bus, devfn; - iommu = device_to_iommu(dev, NULL, NULL); + iommu = device_to_iommu(dev, &bus, &devfn); if (!iommu) return ERR_PTR(-ENODEV); - if (translation_pre_enabled(iommu)) - dev_iommu_priv_set(dev, DEFER_DEVICE_DOMAIN_INFO); + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return ERR_PTR(-ENOMEM); + + if (dev_is_real_dma_subdevice(dev)) { + info->bus = pdev->bus->number; + info->devfn = pdev->devfn; + info->segment = pci_domain_nr(pdev->bus); + } else { + info->bus = bus; + info->devfn = devfn; + info->segment = iommu->segment; + } + + info->dev = dev; + info->iommu = iommu; + if (dev_is_pci(dev)) { + if (ecap_dev_iotlb_support(iommu->ecap) && + pci_ats_supported(pdev) && + dmar_ats_supported(pdev, iommu)) + info->ats_supported = 1; + + if (sm_supported(iommu)) { + if (pasid_supported(iommu)) { + int features = pci_pasid_features(pdev); + + if (features >= 0) + info->pasid_supported = features | 1; + } + + if (info->ats_supported && ecap_prs(iommu->ecap) && + pci_pri_supported(pdev)) + info->pri_supported = 1; + } + } + + spin_lock_irqsave(&device_domain_lock, flags); + list_add(&info->global, &device_domain_list); + dev_iommu_priv_set(dev, info); + spin_unlock_irqrestore(&device_domain_lock, flags); return &iommu->iommu; } static void intel_iommu_release_device(struct device *dev) { - struct intel_iommu *iommu; - - iommu = device_to_iommu(dev, NULL, NULL); - if (!iommu) - return; + struct device_domain_info *info = dev_iommu_priv_get(dev); + unsigned long flags; dmar_remove_one_dev_info(dev); + spin_lock_irqsave(&device_domain_lock, flags); + dev_iommu_priv_set(dev, NULL); + list_del(&info->global); + spin_unlock_irqrestore(&device_domain_lock, flags); + + kfree(info); set_dma_ops(dev, NULL); } @@ -5335,14 +4692,14 @@ static void intel_iommu_get_resv_regions(struct device *device, int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) { - struct device_domain_info *info; + struct device_domain_info *info = dev_iommu_priv_get(dev); struct context_entry *context; struct dmar_domain *domain; unsigned long flags; u64 ctx_lo; int ret; - domain = find_domain(dev); + domain = info->domain; if (!domain) return -EINVAL; @@ -5350,8 +4707,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) spin_lock(&iommu->lock); ret = -EINVAL; - info = get_domain_info(dev); - if (!info || !info->pasid_supported) + if (!info->pasid_supported) goto out; context = iommu_context_addr(iommu, info->bus, info->devfn, 0); @@ -5391,49 +4747,9 @@ static struct iommu_group *intel_iommu_device_group(struct device *dev) return generic_device_group(dev); } -static int intel_iommu_enable_auxd(struct device *dev) -{ - struct device_domain_info *info; - struct intel_iommu *iommu; - unsigned long flags; - int ret; - - iommu = device_to_iommu(dev, NULL, NULL); - if (!iommu || dmar_disabled) - return -EINVAL; - - if (!sm_supported(iommu) || !pasid_supported(iommu)) - return -EINVAL; - - ret = intel_iommu_enable_pasid(iommu, dev); - if (ret) - return -ENODEV; - - spin_lock_irqsave(&device_domain_lock, flags); - info = get_domain_info(dev); - info->auxd_enabled = 1; - spin_unlock_irqrestore(&device_domain_lock, flags); - - return 0; -} - -static int intel_iommu_disable_auxd(struct device *dev) -{ - struct device_domain_info *info; - unsigned long flags; - - spin_lock_irqsave(&device_domain_lock, flags); - info = get_domain_info(dev); - if (!WARN_ON(!info)) - info->auxd_enabled = 0; - spin_unlock_irqrestore(&device_domain_lock, flags); - - return 0; -} - static int intel_iommu_enable_sva(struct device *dev) { - struct device_domain_info *info = get_domain_info(dev); + struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu; int ret; @@ -5462,7 +4778,7 @@ static int intel_iommu_enable_sva(struct device *dev) static int intel_iommu_disable_sva(struct device *dev) { - struct device_domain_info *info = get_domain_info(dev); + struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; int ret; @@ -5475,7 +4791,7 @@ static int intel_iommu_disable_sva(struct device *dev) static int intel_iommu_enable_iopf(struct device *dev) { - struct device_domain_info *info = get_domain_info(dev); + struct device_domain_info *info = dev_iommu_priv_get(dev); if (info && info->pri_supported) return 0; @@ -5487,9 +4803,6 @@ static int intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) { switch (feat) { - case IOMMU_DEV_FEAT_AUX: - return intel_iommu_enable_auxd(dev); - case IOMMU_DEV_FEAT_IOPF: return intel_iommu_enable_iopf(dev); @@ -5505,9 +4818,6 @@ static int intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) { switch (feat) { - case IOMMU_DEV_FEAT_AUX: - return intel_iommu_disable_auxd(dev); - case IOMMU_DEV_FEAT_IOPF: return 0; @@ -5519,48 +4829,11 @@ intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) } } -static bool -intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat) +static bool intel_iommu_is_attach_deferred(struct device *dev) { - struct device_domain_info *info = get_domain_info(dev); + struct device_domain_info *info = dev_iommu_priv_get(dev); - if (feat == IOMMU_DEV_FEAT_AUX) - return scalable_mode_support() && info && info->auxd_enabled; - - return false; -} - -static int -intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) -{ - struct dmar_domain *dmar_domain = to_dmar_domain(domain); - - return dmar_domain->default_pasid > 0 ? - dmar_domain->default_pasid : -EINVAL; -} - -static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain, - struct device *dev) -{ - return attach_deferred(dev); -} - -static int -intel_iommu_enable_nesting(struct iommu_domain *domain) -{ - struct dmar_domain *dmar_domain = to_dmar_domain(domain); - unsigned long flags; - int ret = -ENODEV; - - spin_lock_irqsave(&device_domain_lock, flags); - if (list_empty(&dmar_domain->devices)) { - dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE; - dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL; - ret = 0; - } - spin_unlock_irqrestore(&device_domain_lock, flags); - - return ret; + return translation_pre_enabled(info->iommu) && !info->domain; } /* @@ -5598,40 +4871,34 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .domain_alloc = intel_iommu_domain_alloc, - .domain_free = intel_iommu_domain_free, - .enable_nesting = intel_iommu_enable_nesting, - .attach_dev = intel_iommu_attach_device, - .detach_dev = intel_iommu_detach_device, - .aux_attach_dev = intel_iommu_aux_attach_device, - .aux_detach_dev = intel_iommu_aux_detach_device, - .aux_get_pasid = intel_iommu_aux_get_pasid, - .map_pages = intel_iommu_map_pages, - .unmap_pages = intel_iommu_unmap_pages, - .iotlb_sync_map = intel_iommu_iotlb_sync_map, - .flush_iotlb_all = intel_flush_iotlb_all, - .iotlb_sync = intel_iommu_tlb_sync, - .iova_to_phys = intel_iommu_iova_to_phys, .probe_device = intel_iommu_probe_device, .probe_finalize = intel_iommu_probe_finalize, .release_device = intel_iommu_release_device, .get_resv_regions = intel_iommu_get_resv_regions, .put_resv_regions = generic_iommu_put_resv_regions, .device_group = intel_iommu_device_group, - .dev_feat_enabled = intel_iommu_dev_feat_enabled, .dev_enable_feat = intel_iommu_dev_enable_feat, .dev_disable_feat = intel_iommu_dev_disable_feat, .is_attach_deferred = intel_iommu_is_attach_deferred, .def_domain_type = device_def_domain_type, .pgsize_bitmap = SZ_4K, #ifdef CONFIG_INTEL_IOMMU_SVM - .cache_invalidate = intel_iommu_sva_invalidate, - .sva_bind_gpasid = intel_svm_bind_gpasid, - .sva_unbind_gpasid = intel_svm_unbind_gpasid, .sva_bind = intel_svm_bind, .sva_unbind = intel_svm_unbind, .sva_get_pasid = intel_svm_get_pasid, .page_response = intel_svm_page_response, #endif + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = intel_iommu_attach_device, + .detach_dev = intel_iommu_detach_device, + .map_pages = intel_iommu_map_pages, + .unmap_pages = intel_iommu_unmap_pages, + .iotlb_sync_map = intel_iommu_iotlb_sync_map, + .flush_iotlb_all = intel_flush_iotlb_all, + .iotlb_sync = intel_iommu_tlb_sync, + .iova_to_phys = intel_iommu_iova_to_phys, + .free = intel_iommu_domain_free, + } }; static void quirk_iommu_igfx(struct pci_dev *dev) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 07c390aed1fe..f8d215d85695 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -150,7 +150,7 @@ int intel_pasid_alloc_table(struct device *dev) int size; might_sleep(); - info = get_domain_info(dev); + info = dev_iommu_priv_get(dev); if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table)) return -EINVAL; @@ -197,7 +197,7 @@ void intel_pasid_free_table(struct device *dev) struct pasid_entry *table; int i, max_pde; - info = get_domain_info(dev); + info = dev_iommu_priv_get(dev); if (!info || !dev_is_pci(dev) || !info->pasid_table) return; @@ -223,7 +223,7 @@ struct pasid_table *intel_pasid_get_table(struct device *dev) { struct device_domain_info *info; - info = get_domain_info(dev); + info = dev_iommu_priv_get(dev); if (!info) return NULL; @@ -234,7 +234,7 @@ static int intel_pasid_get_dev_max_id(struct device *dev) { struct device_domain_info *info; - info = get_domain_info(dev); + info = dev_iommu_priv_get(dev); if (!info || !info->pasid_table) return 0; @@ -254,7 +254,7 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) return NULL; dir = pasid_table->table; - info = get_domain_info(dev); + info = dev_iommu_priv_get(dev); dir_index = pasid >> PASID_PDE_SHIFT; index = pasid & PASID_PTE_MASK; @@ -487,7 +487,7 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu, struct device_domain_info *info; u16 sid, qdep, pfsid; - info = get_domain_info(dev); + info = dev_iommu_priv_get(dev); if (!info || !info->ats_enabled) return; @@ -762,164 +762,3 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu, return 0; } - -static int -intel_pasid_setup_bind_data(struct intel_iommu *iommu, struct pasid_entry *pte, - struct iommu_gpasid_bind_data_vtd *pasid_data) -{ - /* - * Not all guest PASID table entry fields are passed down during bind, - * here we only set up the ones that are dependent on guest settings. - * Execution related bits such as NXE, SMEP are not supported. - * Other fields, such as snoop related, are set based on host needs - * regardless of guest settings. - */ - if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_SRE) { - if (!ecap_srs(iommu->ecap)) { - pr_err_ratelimited("No supervisor request support on %s\n", - iommu->name); - return -EINVAL; - } - pasid_set_sre(pte); - /* Enable write protect WP if guest requested */ - if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_WPE) - pasid_set_wpe(pte); - } - - if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_EAFE) { - if (!ecap_eafs(iommu->ecap)) { - pr_err_ratelimited("No extended access flag support on %s\n", - iommu->name); - return -EINVAL; - } - pasid_set_eafe(pte); - } - - /* - * Memory type is only applicable to devices inside processor coherent - * domain. Will add MTS support once coherent devices are available. - */ - if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_MTS_MASK) { - pr_warn_ratelimited("No memory type support %s\n", - iommu->name); - return -EINVAL; - } - - return 0; -} - -/** - * intel_pasid_setup_nested() - Set up PASID entry for nested translation. - * This could be used for guest shared virtual address. In this case, the - * first level page tables are used for GVA-GPA translation in the guest, - * second level page tables are used for GPA-HPA translation. - * - * @iommu: IOMMU which the device belong to - * @dev: Device to be set up for translation - * @gpgd: FLPTPTR: First Level Page translation pointer in GPA - * @pasid: PASID to be programmed in the device PASID table - * @pasid_data: Additional PASID info from the guest bind request - * @domain: Domain info for setting up second level page tables - * @addr_width: Address width of the first level (guest) - */ -int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, - pgd_t *gpgd, u32 pasid, - struct iommu_gpasid_bind_data_vtd *pasid_data, - struct dmar_domain *domain, int addr_width) -{ - struct pasid_entry *pte; - struct dma_pte *pgd; - int ret = 0; - u64 pgd_val; - int agaw; - u16 did; - - if (!ecap_nest(iommu->ecap)) { - pr_err_ratelimited("IOMMU: %s: No nested translation support\n", - iommu->name); - return -EINVAL; - } - - if (!(domain->flags & DOMAIN_FLAG_NESTING_MODE)) { - pr_err_ratelimited("Domain is not in nesting mode, %x\n", - domain->flags); - return -EINVAL; - } - - pte = intel_pasid_get_entry(dev, pasid); - if (WARN_ON(!pte)) - return -EINVAL; - - /* - * Caller must ensure PASID entry is not in use, i.e. not bind the - * same PASID to the same device twice. - */ - if (pasid_pte_is_present(pte)) - return -EBUSY; - - pasid_clear_entry(pte); - - /* Sanity checking performed by caller to make sure address - * width matching in two dimensions: - * 1. CPU vs. IOMMU - * 2. Guest vs. Host. - */ - switch (addr_width) { -#ifdef CONFIG_X86 - case ADDR_WIDTH_5LEVEL: - if (!cpu_feature_enabled(X86_FEATURE_LA57) || - !cap_5lp_support(iommu->cap)) { - dev_err_ratelimited(dev, - "5-level paging not supported\n"); - return -EINVAL; - } - - pasid_set_flpm(pte, 1); - break; -#endif - case ADDR_WIDTH_4LEVEL: - pasid_set_flpm(pte, 0); - break; - default: - dev_err_ratelimited(dev, "Invalid guest address width %d\n", - addr_width); - return -EINVAL; - } - - /* First level PGD is in GPA, must be supported by the second level */ - if ((uintptr_t)gpgd > domain->max_addr) { - dev_err_ratelimited(dev, - "Guest PGD %lx not supported, max %llx\n", - (uintptr_t)gpgd, domain->max_addr); - return -EINVAL; - } - pasid_set_flptr(pte, (uintptr_t)gpgd); - - ret = intel_pasid_setup_bind_data(iommu, pte, pasid_data); - if (ret) - return ret; - - /* Setup the second level based on the given domain */ - pgd = domain->pgd; - - agaw = iommu_skip_agaw(domain, iommu, &pgd); - if (agaw < 0) { - dev_err_ratelimited(dev, "Invalid domain page table\n"); - return -EINVAL; - } - pgd_val = virt_to_phys(pgd); - pasid_set_slptr(pte, pgd_val); - pasid_set_fault_enable(pte); - - did = domain->iommu_did[iommu->seq_id]; - pasid_set_domain_id(pte, did); - - pasid_set_address_width(pte, agaw); - pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); - - pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED); - pasid_set_present(pte); - pasid_flush_caches(iommu, pte, pasid, did); - - return ret; -} diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index d5552e2c160d..ab4408c824a5 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -118,10 +118,6 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, int intel_pasid_setup_pass_through(struct intel_iommu *iommu, struct dmar_domain *domain, struct device *dev, u32 pasid); -int intel_pasid_setup_nested(struct intel_iommu *iommu, - struct device *dev, pgd_t *pgd, u32 pasid, - struct iommu_gpasid_bind_data_vtd *pasid_data, - struct dmar_domain *domain, int addr_width); void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, u32 pasid, bool fault_ignore); diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 51ac2096b3da..23a38763c1d1 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -168,11 +168,6 @@ int intel_svm_finish_prq(struct intel_iommu *iommu) return 0; } -static inline bool intel_svm_capable(struct intel_iommu *iommu) -{ - return iommu->flags & VTD_FLAG_SVM_CAPABLE; -} - void intel_svm_check(struct intel_iommu *iommu) { if (!pasid_supported(iommu)) @@ -200,7 +195,7 @@ static void __flush_svm_range_dev(struct intel_svm *svm, unsigned long address, unsigned long pages, int ih) { - struct device_domain_info *info = get_domain_info(sdev->dev); + struct device_domain_info *info = dev_iommu_priv_get(sdev->dev); if (WARN_ON(!pages)) return; @@ -318,193 +313,6 @@ out: return 0; } -int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, - struct iommu_gpasid_bind_data *data) -{ - struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); - struct intel_svm_dev *sdev = NULL; - struct dmar_domain *dmar_domain; - struct device_domain_info *info; - struct intel_svm *svm = NULL; - unsigned long iflags; - int ret = 0; - - if (WARN_ON(!iommu) || !data) - return -EINVAL; - - if (data->format != IOMMU_PASID_FORMAT_INTEL_VTD) - return -EINVAL; - - /* IOMMU core ensures argsz is more than the start of the union */ - if (data->argsz < offsetofend(struct iommu_gpasid_bind_data, vendor.vtd)) - return -EINVAL; - - /* Make sure no undefined flags are used in vendor data */ - if (data->vendor.vtd.flags & ~(IOMMU_SVA_VTD_GPASID_LAST - 1)) - return -EINVAL; - - if (!dev_is_pci(dev)) - return -ENOTSUPP; - - /* VT-d supports devices with full 20 bit PASIDs only */ - if (pci_max_pasids(to_pci_dev(dev)) != PASID_MAX) - return -EINVAL; - - /* - * We only check host PASID range, we have no knowledge to check - * guest PASID range. - */ - if (data->hpasid <= 0 || data->hpasid >= PASID_MAX) - return -EINVAL; - - info = get_domain_info(dev); - if (!info) - return -EINVAL; - - dmar_domain = to_dmar_domain(domain); - - mutex_lock(&pasid_mutex); - ret = pasid_to_svm_sdev(dev, data->hpasid, &svm, &sdev); - if (ret) - goto out; - - if (sdev) { - /* - * Do not allow multiple bindings of the same device-PASID since - * there is only one SL page tables per PASID. We may revisit - * once sharing PGD across domains are supported. - */ - dev_warn_ratelimited(dev, "Already bound with PASID %u\n", - svm->pasid); - ret = -EBUSY; - goto out; - } - - if (!svm) { - /* We come here when PASID has never been bond to a device. */ - svm = kzalloc(sizeof(*svm), GFP_KERNEL); - if (!svm) { - ret = -ENOMEM; - goto out; - } - /* REVISIT: upper layer/VFIO can track host process that bind - * the PASID. ioasid_set = mm might be sufficient for vfio to - * check pasid VMM ownership. We can drop the following line - * once VFIO and IOASID set check is in place. - */ - svm->mm = get_task_mm(current); - svm->pasid = data->hpasid; - if (data->flags & IOMMU_SVA_GPASID_VAL) { - svm->gpasid = data->gpasid; - svm->flags |= SVM_FLAG_GUEST_PASID; - } - pasid_private_add(data->hpasid, svm); - INIT_LIST_HEAD_RCU(&svm->devs); - mmput(svm->mm); - } - sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); - if (!sdev) { - ret = -ENOMEM; - goto out; - } - sdev->dev = dev; - sdev->sid = PCI_DEVID(info->bus, info->devfn); - sdev->iommu = iommu; - - /* Only count users if device has aux domains */ - if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX)) - sdev->users = 1; - - /* Set up device context entry for PASID if not enabled already */ - ret = intel_iommu_enable_pasid(iommu, sdev->dev); - if (ret) { - dev_err_ratelimited(dev, "Failed to enable PASID capability\n"); - kfree(sdev); - goto out; - } - - /* - * PASID table is per device for better security. Therefore, for - * each bind of a new device even with an existing PASID, we need to - * call the nested mode setup function here. - */ - spin_lock_irqsave(&iommu->lock, iflags); - ret = intel_pasid_setup_nested(iommu, dev, - (pgd_t *)(uintptr_t)data->gpgd, - data->hpasid, &data->vendor.vtd, dmar_domain, - data->addr_width); - spin_unlock_irqrestore(&iommu->lock, iflags); - if (ret) { - dev_err_ratelimited(dev, "Failed to set up PASID %llu in nested mode, Err %d\n", - data->hpasid, ret); - /* - * PASID entry should be in cleared state if nested mode - * set up failed. So we only need to clear IOASID tracking - * data such that free call will succeed. - */ - kfree(sdev); - goto out; - } - - svm->flags |= SVM_FLAG_GUEST_MODE; - - init_rcu_head(&sdev->rcu); - list_add_rcu(&sdev->list, &svm->devs); - out: - if (!IS_ERR_OR_NULL(svm) && list_empty(&svm->devs)) { - pasid_private_remove(data->hpasid); - kfree(svm); - } - - mutex_unlock(&pasid_mutex); - return ret; -} - -int intel_svm_unbind_gpasid(struct device *dev, u32 pasid) -{ - struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); - struct intel_svm_dev *sdev; - struct intel_svm *svm; - int ret; - - if (WARN_ON(!iommu)) - return -EINVAL; - - mutex_lock(&pasid_mutex); - ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev); - if (ret) - goto out; - - if (sdev) { - if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX)) - sdev->users--; - if (!sdev->users) { - list_del_rcu(&sdev->list); - intel_pasid_tear_down_entry(iommu, dev, - svm->pasid, false); - intel_svm_drain_prq(dev, svm->pasid); - kfree_rcu(sdev, rcu); - - if (list_empty(&svm->devs)) { - /* - * We do not free the IOASID here in that - * IOMMU driver did not allocate it. - * Unlike native SVM, IOASID for guest use was - * allocated prior to the bind call. - * In any case, if the free call comes before - * the unbind, IOMMU driver will get notified - * and perform cleanup. - */ - pasid_private_remove(pasid); - kfree(svm); - } - } - } -out: - mutex_unlock(&pasid_mutex); - return ret; -} - static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm, unsigned int flags) { @@ -519,7 +327,7 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, struct mm_struct *mm, unsigned int flags) { - struct device_domain_info *info = get_domain_info(dev); + struct device_domain_info *info = dev_iommu_priv_get(dev); unsigned long iflags, sflags; struct intel_svm_dev *sdev; struct intel_svm *svm; @@ -725,7 +533,7 @@ static void intel_svm_drain_prq(struct device *dev, u32 pasid) u16 sid, did; int qdep; - info = get_domain_info(dev); + info = dev_iommu_priv_get(dev); if (WARN_ON(!info || !dev_is_pci(dev))) return; @@ -1116,28 +924,6 @@ int intel_svm_page_response(struct device *dev, goto out; } - /* - * For responses from userspace, need to make sure that the - * pasid has been bound to its mm. - */ - if (svm->flags & SVM_FLAG_GUEST_MODE) { - struct mm_struct *mm; - - mm = get_task_mm(current); - if (!mm) { - ret = -EINVAL; - goto out; - } - - if (mm != svm->mm) { - ret = -ENODEV; - mmput(mm); - goto out; - } - - mmput(mm); - } - /* * Per VT-d spec. v3.0 ch7.7, system software must respond * with page group response if private data is present (PDP) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 107dcf5938d6..f2c45b85b9fc 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -323,13 +323,14 @@ err_out: void iommu_release_device(struct device *dev) { - const struct iommu_ops *ops = dev->bus->iommu_ops; + const struct iommu_ops *ops; if (!dev->iommu) return; iommu_device_unlink(dev->iommu->iommu_dev, dev); + ops = dev_iommu_ops(dev); ops->release_device(dev); iommu_group_remove_device(dev); @@ -790,9 +791,6 @@ static int iommu_create_device_direct_mappings(struct iommu_group *group, dma_addr_t start, end, addr; size_t map_size = 0; - if (domain->ops->apply_resv_region) - domain->ops->apply_resv_region(dev, domain, entry); - start = ALIGN(entry->start, pg_size); end = ALIGN(entry->start + entry->length, pg_size); @@ -833,11 +831,12 @@ out: return ret; } -static bool iommu_is_attach_deferred(struct iommu_domain *domain, - struct device *dev) +static bool iommu_is_attach_deferred(struct device *dev) { - if (domain->ops->is_attach_deferred) - return domain->ops->is_attach_deferred(domain, dev); + const struct iommu_ops *ops = dev_iommu_ops(dev); + + if (ops->is_attach_deferred) + return ops->is_attach_deferred(dev); return false; } @@ -894,7 +893,7 @@ rename: mutex_lock(&group->mutex); list_add_tail(&device->list, &group->devices); - if (group->domain && !iommu_is_attach_deferred(group->domain, dev)) + if (group->domain && !iommu_is_attach_deferred(dev)) ret = __iommu_attach_device(group->domain, dev); mutex_unlock(&group->mutex); if (ret) @@ -1255,10 +1254,10 @@ int iommu_page_response(struct device *dev, struct iommu_fault_event *evt; struct iommu_fault_page_request *prm; struct dev_iommu *param = dev->iommu; + const struct iommu_ops *ops = dev_iommu_ops(dev); bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID; - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - if (!domain || !domain->ops->page_response) + if (!ops->page_response) return -ENODEV; if (!param || !param->fault_param) @@ -1299,7 +1298,7 @@ int iommu_page_response(struct device *dev, msg->pasid = 0; } - ret = domain->ops->page_response(dev, evt, msg); + ret = ops->page_response(dev, evt, msg); list_del(&evt->list); kfree(evt); break; @@ -1524,7 +1523,7 @@ EXPORT_SYMBOL_GPL(fsl_mc_device_group); static int iommu_get_def_domain_type(struct device *dev) { - const struct iommu_ops *ops = dev->bus->iommu_ops; + const struct iommu_ops *ops = dev_iommu_ops(dev); if (dev_is_pci(dev) && to_pci_dev(dev)->untrusted) return IOMMU_DOMAIN_DMA; @@ -1583,7 +1582,7 @@ static int iommu_alloc_default_domain(struct iommu_group *group, */ static struct iommu_group *iommu_group_get_for_dev(struct device *dev) { - const struct iommu_ops *ops = dev->bus->iommu_ops; + const struct iommu_ops *ops = dev_iommu_ops(dev); struct iommu_group *group; int ret; @@ -1591,9 +1590,6 @@ static struct iommu_group *iommu_group_get_for_dev(struct device *dev) if (group) return group; - if (!ops) - return ERR_PTR(-EINVAL); - group = ops->device_group(dev); if (WARN_ON_ONCE(group == NULL)) return ERR_PTR(-EINVAL); @@ -1748,7 +1744,7 @@ static int iommu_group_do_dma_attach(struct device *dev, void *data) struct iommu_domain *domain = data; int ret = 0; - if (!iommu_is_attach_deferred(domain, dev)) + if (!iommu_is_attach_deferred(dev)) ret = __iommu_attach_device(domain, dev); return ret; @@ -1762,10 +1758,10 @@ static int __iommu_group_dma_attach(struct iommu_group *group) static int iommu_group_do_probe_finalize(struct device *dev, void *data) { - struct iommu_domain *domain = data; + const struct iommu_ops *ops = dev_iommu_ops(dev); - if (domain->ops->probe_finalize) - domain->ops->probe_finalize(dev); + if (ops->probe_finalize) + ops->probe_finalize(dev); return 0; } @@ -1954,10 +1950,11 @@ static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, if (!domain) return NULL; - domain->ops = bus->iommu_ops; domain->type = type; /* Assume all sizes by default; the driver may override this later */ - domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap; + domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap; + if (!domain->ops) + domain->ops = bus->iommu_ops->default_domain_ops; if (iommu_is_dma_domain(domain) && iommu_get_dma_cookie(domain)) { iommu_domain_free(domain); @@ -1975,7 +1972,7 @@ EXPORT_SYMBOL_GPL(iommu_domain_alloc); void iommu_domain_free(struct iommu_domain *domain) { iommu_put_dma_cookie(domain); - domain->ops->domain_free(domain); + domain->ops->free(domain); } EXPORT_SYMBOL_GPL(iommu_domain_free); @@ -2023,228 +2020,16 @@ EXPORT_SYMBOL_GPL(iommu_attach_device); int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) { - const struct iommu_ops *ops = domain->ops; - - if (ops->is_attach_deferred && ops->is_attach_deferred(domain, dev)) + if (iommu_is_attach_deferred(dev)) return __iommu_attach_device(domain, dev); return 0; } -/* - * Check flags and other user provided data for valid combinations. We also - * make sure no reserved fields or unused flags are set. This is to ensure - * not breaking userspace in the future when these fields or flags are used. - */ -static int iommu_check_cache_invl_data(struct iommu_cache_invalidate_info *info) -{ - u32 mask; - int i; - - if (info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1) - return -EINVAL; - - mask = (1 << IOMMU_CACHE_INV_TYPE_NR) - 1; - if (info->cache & ~mask) - return -EINVAL; - - if (info->granularity >= IOMMU_INV_GRANU_NR) - return -EINVAL; - - switch (info->granularity) { - case IOMMU_INV_GRANU_ADDR: - if (info->cache & IOMMU_CACHE_INV_TYPE_PASID) - return -EINVAL; - - mask = IOMMU_INV_ADDR_FLAGS_PASID | - IOMMU_INV_ADDR_FLAGS_ARCHID | - IOMMU_INV_ADDR_FLAGS_LEAF; - - if (info->granu.addr_info.flags & ~mask) - return -EINVAL; - break; - case IOMMU_INV_GRANU_PASID: - mask = IOMMU_INV_PASID_FLAGS_PASID | - IOMMU_INV_PASID_FLAGS_ARCHID; - if (info->granu.pasid_info.flags & ~mask) - return -EINVAL; - - break; - case IOMMU_INV_GRANU_DOMAIN: - if (info->cache & IOMMU_CACHE_INV_TYPE_DEV_IOTLB) - return -EINVAL; - break; - default: - return -EINVAL; - } - - /* Check reserved padding fields */ - for (i = 0; i < sizeof(info->padding); i++) { - if (info->padding[i]) - return -EINVAL; - } - - return 0; -} - -int iommu_uapi_cache_invalidate(struct iommu_domain *domain, struct device *dev, - void __user *uinfo) -{ - struct iommu_cache_invalidate_info inv_info = { 0 }; - u32 minsz; - int ret; - - if (unlikely(!domain->ops->cache_invalidate)) - return -ENODEV; - - /* - * No new spaces can be added before the variable sized union, the - * minimum size is the offset to the union. - */ - minsz = offsetof(struct iommu_cache_invalidate_info, granu); - - /* Copy minsz from user to get flags and argsz */ - if (copy_from_user(&inv_info, uinfo, minsz)) - return -EFAULT; - - /* Fields before the variable size union are mandatory */ - if (inv_info.argsz < minsz) - return -EINVAL; - - /* PASID and address granu require additional info beyond minsz */ - if (inv_info.granularity == IOMMU_INV_GRANU_PASID && - inv_info.argsz < offsetofend(struct iommu_cache_invalidate_info, granu.pasid_info)) - return -EINVAL; - - if (inv_info.granularity == IOMMU_INV_GRANU_ADDR && - inv_info.argsz < offsetofend(struct iommu_cache_invalidate_info, granu.addr_info)) - return -EINVAL; - - /* - * User might be using a newer UAPI header which has a larger data - * size, we shall support the existing flags within the current - * size. Copy the remaining user data _after_ minsz but not more - * than the current kernel supported size. - */ - if (copy_from_user((void *)&inv_info + minsz, uinfo + minsz, - min_t(u32, inv_info.argsz, sizeof(inv_info)) - minsz)) - return -EFAULT; - - /* Now the argsz is validated, check the content */ - ret = iommu_check_cache_invl_data(&inv_info); - if (ret) - return ret; - - return domain->ops->cache_invalidate(domain, dev, &inv_info); -} -EXPORT_SYMBOL_GPL(iommu_uapi_cache_invalidate); - -static int iommu_check_bind_data(struct iommu_gpasid_bind_data *data) -{ - u64 mask; - int i; - - if (data->version != IOMMU_GPASID_BIND_VERSION_1) - return -EINVAL; - - /* Check the range of supported formats */ - if (data->format >= IOMMU_PASID_FORMAT_LAST) - return -EINVAL; - - /* Check all flags */ - mask = IOMMU_SVA_GPASID_VAL; - if (data->flags & ~mask) - return -EINVAL; - - /* Check reserved padding fields */ - for (i = 0; i < sizeof(data->padding); i++) { - if (data->padding[i]) - return -EINVAL; - } - - return 0; -} - -static int iommu_sva_prepare_bind_data(void __user *udata, - struct iommu_gpasid_bind_data *data) -{ - u32 minsz; - - /* - * No new spaces can be added before the variable sized union, the - * minimum size is the offset to the union. - */ - minsz = offsetof(struct iommu_gpasid_bind_data, vendor); - - /* Copy minsz from user to get flags and argsz */ - if (copy_from_user(data, udata, minsz)) - return -EFAULT; - - /* Fields before the variable size union are mandatory */ - if (data->argsz < minsz) - return -EINVAL; - /* - * User might be using a newer UAPI header, we shall let IOMMU vendor - * driver decide on what size it needs. Since the guest PASID bind data - * can be vendor specific, larger argsz could be the result of extension - * for one vendor but it should not affect another vendor. - * Copy the remaining user data _after_ minsz - */ - if (copy_from_user((void *)data + minsz, udata + minsz, - min_t(u32, data->argsz, sizeof(*data)) - minsz)) - return -EFAULT; - - return iommu_check_bind_data(data); -} - -int iommu_uapi_sva_bind_gpasid(struct iommu_domain *domain, struct device *dev, - void __user *udata) -{ - struct iommu_gpasid_bind_data data = { 0 }; - int ret; - - if (unlikely(!domain->ops->sva_bind_gpasid)) - return -ENODEV; - - ret = iommu_sva_prepare_bind_data(udata, &data); - if (ret) - return ret; - - return domain->ops->sva_bind_gpasid(domain, dev, &data); -} -EXPORT_SYMBOL_GPL(iommu_uapi_sva_bind_gpasid); - -int iommu_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev, - ioasid_t pasid) -{ - if (unlikely(!domain->ops->sva_unbind_gpasid)) - return -ENODEV; - - return domain->ops->sva_unbind_gpasid(dev, pasid); -} -EXPORT_SYMBOL_GPL(iommu_sva_unbind_gpasid); - -int iommu_uapi_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev, - void __user *udata) -{ - struct iommu_gpasid_bind_data data = { 0 }; - int ret; - - if (unlikely(!domain->ops->sva_bind_gpasid)) - return -ENODEV; - - ret = iommu_sva_prepare_bind_data(udata, &data); - if (ret) - return ret; - - return iommu_sva_unbind_gpasid(domain, dev, data.hpasid); -} -EXPORT_SYMBOL_GPL(iommu_uapi_sva_unbind_gpasid); - static void __iommu_detach_device(struct iommu_domain *domain, struct device *dev) { - if (iommu_is_attach_deferred(domain, dev)) + if (iommu_is_attach_deferred(dev)) return; if (unlikely(domain->ops->detach_dev == NULL)) @@ -2458,7 +2243,7 @@ static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp, size_t *mapped) { - const struct iommu_ops *ops = domain->ops; + const struct iommu_domain_ops *ops = domain->ops; size_t pgsize, count; int ret; @@ -2481,7 +2266,7 @@ static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova, static int __iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { - const struct iommu_ops *ops = domain->ops; + const struct iommu_domain_ops *ops = domain->ops; unsigned long orig_iova = iova; unsigned int min_pagesz; size_t orig_size = size; @@ -2541,7 +2326,7 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova, static int _iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { - const struct iommu_ops *ops = domain->ops; + const struct iommu_domain_ops *ops = domain->ops; int ret; ret = __iommu_map(domain, iova, paddr, size, prot, gfp); @@ -2570,7 +2355,7 @@ static size_t __iommu_unmap_pages(struct iommu_domain *domain, unsigned long iova, size_t size, struct iommu_iotlb_gather *iotlb_gather) { - const struct iommu_ops *ops = domain->ops; + const struct iommu_domain_ops *ops = domain->ops; size_t pgsize, count; pgsize = iommu_pgsize(domain, iova, iova, size, &count); @@ -2583,7 +2368,7 @@ static size_t __iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size, struct iommu_iotlb_gather *iotlb_gather) { - const struct iommu_ops *ops = domain->ops; + const struct iommu_domain_ops *ops = domain->ops; size_t unmapped_page, unmapped = 0; unsigned long orig_iova = iova; unsigned int min_pagesz; @@ -2659,7 +2444,7 @@ static ssize_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, unsigned int nents, int prot, gfp_t gfp) { - const struct iommu_ops *ops = domain->ops; + const struct iommu_domain_ops *ops = domain->ops; size_t len = 0, mapped = 0; phys_addr_t start; unsigned int i = 0; @@ -2792,17 +2577,17 @@ EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks); void iommu_get_resv_regions(struct device *dev, struct list_head *list) { - const struct iommu_ops *ops = dev->bus->iommu_ops; + const struct iommu_ops *ops = dev_iommu_ops(dev); - if (ops && ops->get_resv_regions) + if (ops->get_resv_regions) ops->get_resv_regions(dev, list); } void iommu_put_resv_regions(struct device *dev, struct list_head *list) { - const struct iommu_ops *ops = dev->bus->iommu_ops; + const struct iommu_ops *ops = dev_iommu_ops(dev); - if (ops && ops->put_resv_regions) + if (ops->put_resv_regions) ops->put_resv_regions(dev, list); } @@ -2959,8 +2744,6 @@ EXPORT_SYMBOL_GPL(iommu_dev_enable_feature); /* * The device drivers should do the necessary cleanups before calling this. - * For example, before disabling the aux-domain feature, the device driver - * should detach all aux-domains. Otherwise, this will return -EBUSY. */ int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) { @@ -2988,50 +2771,6 @@ bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat) } EXPORT_SYMBOL_GPL(iommu_dev_feature_enabled); -/* - * Aux-domain specific attach/detach. - * - * Only works if iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX) returns - * true. Also, as long as domains are attached to a device through this - * interface, any tries to call iommu_attach_device() should fail - * (iommu_detach_device() can't fail, so we fail when trying to re-attach). - * This should make us safe against a device being attached to a guest as a - * whole while there are still pasid users on it (aux and sva). - */ -int iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev) -{ - int ret = -ENODEV; - - if (domain->ops->aux_attach_dev) - ret = domain->ops->aux_attach_dev(domain, dev); - - if (!ret) - trace_attach_device_to_domain(dev); - - return ret; -} -EXPORT_SYMBOL_GPL(iommu_aux_attach_device); - -void iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev) -{ - if (domain->ops->aux_detach_dev) { - domain->ops->aux_detach_dev(domain, dev); - trace_detach_device_from_domain(dev); - } -} -EXPORT_SYMBOL_GPL(iommu_aux_detach_device); - -int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) -{ - int ret = -ENODEV; - - if (domain->ops->aux_get_pasid) - ret = domain->ops->aux_get_pasid(domain, dev); - - return ret; -} -EXPORT_SYMBOL_GPL(iommu_aux_get_pasid); - /** * iommu_sva_bind_device() - Bind a process address space to a device * @dev: the device @@ -3053,9 +2792,9 @@ iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata) { struct iommu_group *group; struct iommu_sva *handle = ERR_PTR(-EINVAL); - const struct iommu_ops *ops = dev->bus->iommu_ops; + const struct iommu_ops *ops = dev_iommu_ops(dev); - if (!ops || !ops->sva_bind) + if (!ops->sva_bind) return ERR_PTR(-ENODEV); group = iommu_group_get(dev); @@ -3096,9 +2835,9 @@ void iommu_sva_unbind_device(struct iommu_sva *handle) { struct iommu_group *group; struct device *dev = handle->dev; - const struct iommu_ops *ops = dev->bus->iommu_ops; + const struct iommu_ops *ops = dev_iommu_ops(dev); - if (!ops || !ops->sva_unbind) + if (!ops->sva_unbind) return; group = iommu_group_get(dev); @@ -3115,9 +2854,9 @@ EXPORT_SYMBOL_GPL(iommu_sva_unbind_device); u32 iommu_sva_get_pasid(struct iommu_sva *handle) { - const struct iommu_ops *ops = handle->dev->bus->iommu_ops; + const struct iommu_ops *ops = dev_iommu_ops(handle->dev); - if (!ops || !ops->sva_get_pasid) + if (!ops->sva_get_pasid) return IOMMU_PASID_INVALID; return ops->sva_get_pasid(handle); diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index b28c9435b898..db77aa675145 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -15,13 +15,14 @@ /* The anchor node sits above the top of the usable address space */ #define IOVA_ANCHOR ~0UL +#define IOVA_RANGE_CACHE_MAX_SIZE 6 /* log of max cached IOVA range size (in pages) */ + static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn, unsigned long size); static unsigned long iova_rcache_get(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn); -static void init_iova_rcaches(struct iova_domain *iovad); static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad); static void free_iova_rcaches(struct iova_domain *iovad); @@ -64,8 +65,6 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR; rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node); rb_insert_color(&iovad->anchor.node, &iovad->rbroot); - cpuhp_state_add_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD, &iovad->cpuhp_dead); - init_iova_rcaches(iovad); } EXPORT_SYMBOL_GPL(init_iova_domain); @@ -95,10 +94,11 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) cached_iova = to_iova(iovad->cached32_node); if (free == cached_iova || (free->pfn_hi < iovad->dma_32bit_pfn && - free->pfn_lo >= cached_iova->pfn_lo)) { + free->pfn_lo >= cached_iova->pfn_lo)) iovad->cached32_node = rb_next(&free->node); + + if (free->pfn_lo < iovad->dma_32bit_pfn) iovad->max32_alloc_size = iovad->dma_32bit_pfn; - } cached_iova = to_iova(iovad->cached_node); if (free->pfn_lo >= cached_iova->pfn_lo) @@ -488,6 +488,13 @@ free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size) } EXPORT_SYMBOL_GPL(free_iova_fast); +static void iova_domain_free_rcaches(struct iova_domain *iovad) +{ + cpuhp_state_remove_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD, + &iovad->cpuhp_dead); + free_iova_rcaches(iovad); +} + /** * put_iova_domain - destroys the iova domain * @iovad: - iova domain in question. @@ -497,9 +504,9 @@ void put_iova_domain(struct iova_domain *iovad) { struct iova *iova, *tmp; - cpuhp_state_remove_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD, - &iovad->cpuhp_dead); - free_iova_rcaches(iovad); + if (iovad->rcaches) + iova_domain_free_rcaches(iovad); + rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node) free_iova_mem(iova); } @@ -608,6 +615,7 @@ EXPORT_SYMBOL_GPL(reserve_iova); */ #define IOVA_MAG_SIZE 128 +#define MAX_GLOBAL_MAGS 32 /* magazines per bin */ struct iova_magazine { unsigned long size; @@ -620,6 +628,13 @@ struct iova_cpu_rcache { struct iova_magazine *prev; }; +struct iova_rcache { + spinlock_t lock; + unsigned long depot_size; + struct iova_magazine *depot[MAX_GLOBAL_MAGS]; + struct iova_cpu_rcache __percpu *cpu_rcaches; +}; + static struct iova_magazine *iova_magazine_alloc(gfp_t flags) { return kzalloc(sizeof(struct iova_magazine), flags); @@ -693,28 +708,54 @@ static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn) mag->pfns[mag->size++] = pfn; } -static void init_iova_rcaches(struct iova_domain *iovad) +int iova_domain_init_rcaches(struct iova_domain *iovad) { - struct iova_cpu_rcache *cpu_rcache; - struct iova_rcache *rcache; unsigned int cpu; - int i; + int i, ret; + + iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_MAX_SIZE, + sizeof(struct iova_rcache), + GFP_KERNEL); + if (!iovad->rcaches) + return -ENOMEM; for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { + struct iova_cpu_rcache *cpu_rcache; + struct iova_rcache *rcache; + rcache = &iovad->rcaches[i]; spin_lock_init(&rcache->lock); rcache->depot_size = 0; - rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size()); - if (WARN_ON(!rcache->cpu_rcaches)) - continue; + rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), + cache_line_size()); + if (!rcache->cpu_rcaches) { + ret = -ENOMEM; + goto out_err; + } for_each_possible_cpu(cpu) { cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); + spin_lock_init(&cpu_rcache->lock); cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL); cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL); + if (!cpu_rcache->loaded || !cpu_rcache->prev) { + ret = -ENOMEM; + goto out_err; + } } } + + ret = cpuhp_state_add_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD, + &iovad->cpuhp_dead); + if (ret) + goto out_err; + return 0; + +out_err: + free_iova_rcaches(iovad); + return ret; } +EXPORT_SYMBOL_GPL(iova_domain_init_rcaches); /* * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and @@ -831,7 +872,7 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad, { unsigned int log_size = order_base_2(size); - if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) + if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE || !iovad->rcaches) return 0; return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size); @@ -849,6 +890,8 @@ static void free_iova_rcaches(struct iova_domain *iovad) for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { rcache = &iovad->rcaches[i]; + if (!rcache->cpu_rcaches) + break; for_each_possible_cpu(cpu) { cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); iova_magazine_free(cpu_rcache->loaded); @@ -858,6 +901,9 @@ static void free_iova_rcaches(struct iova_domain *iovad) for (j = 0; j < rcache->depot_size; ++j) iova_magazine_free(rcache->depot[j]); } + + kfree(iovad->rcaches); + iovad->rcaches = NULL; } /* diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index ca752bdc710f..8fdb84b3642b 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -719,6 +719,7 @@ static int ipmmu_init_platform_device(struct device *dev, static const struct soc_device_attribute soc_needs_opt_in[] = { { .family = "R-Car Gen3", }, + { .family = "R-Car Gen4", }, { .family = "RZ/G2", }, { /* sentinel */ } }; @@ -743,7 +744,7 @@ static bool ipmmu_device_is_allowed(struct device *dev) unsigned int i; /* - * R-Car Gen3 and RZ/G2 use the allow list to opt-in devices. + * R-Car Gen3/4 and RZ/G2 use the allow list to opt-in devices. * For Other SoCs, this returns true anyway. */ if (!soc_device_match(soc_needs_opt_in)) @@ -868,14 +869,6 @@ static struct iommu_group *ipmmu_find_group(struct device *dev) static const struct iommu_ops ipmmu_ops = { .domain_alloc = ipmmu_domain_alloc, - .domain_free = ipmmu_domain_free, - .attach_dev = ipmmu_attach_device, - .detach_dev = ipmmu_detach_device, - .map = ipmmu_map, - .unmap = ipmmu_unmap, - .flush_iotlb_all = ipmmu_flush_iotlb_all, - .iotlb_sync = ipmmu_iotlb_sync, - .iova_to_phys = ipmmu_iova_to_phys, .probe_device = ipmmu_probe_device, .release_device = ipmmu_release_device, .probe_finalize = ipmmu_probe_finalize, @@ -883,6 +876,16 @@ static const struct iommu_ops ipmmu_ops = { ? generic_device_group : ipmmu_find_group, .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, .of_xlate = ipmmu_of_xlate, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = ipmmu_attach_device, + .detach_dev = ipmmu_detach_device, + .map = ipmmu_map, + .unmap = ipmmu_unmap, + .flush_iotlb_all = ipmmu_flush_iotlb_all, + .iotlb_sync = ipmmu_iotlb_sync, + .iova_to_phys = ipmmu_iova_to_phys, + .free = ipmmu_domain_free, + } }; /* ----------------------------------------------------------------------------- @@ -926,7 +929,7 @@ static const struct ipmmu_features ipmmu_features_rcar_gen3 = { .utlb_offset_base = 0, }; -static const struct ipmmu_features ipmmu_features_r8a779a0 = { +static const struct ipmmu_features ipmmu_features_rcar_gen4 = { .use_ns_alias_offset = false, .has_cache_leaf_nodes = true, .number_of_contexts = 16, @@ -982,7 +985,10 @@ static const struct of_device_id ipmmu_of_ids[] = { .data = &ipmmu_features_rcar_gen3, }, { .compatible = "renesas,ipmmu-r8a779a0", - .data = &ipmmu_features_r8a779a0, + .data = &ipmmu_features_rcar_gen4, + }, { + .compatible = "renesas,rcar-gen4-ipmmu", + .data = &ipmmu_features_rcar_gen4, }, { /* Terminator */ }, @@ -1006,7 +1012,9 @@ static int ipmmu_probe(struct platform_device *pdev) bitmap_zero(mmu->ctx, IPMMU_CTX_MAX); mmu->features = of_device_get_match_data(&pdev->dev); memset(mmu->utlb_ctx, IPMMU_CTX_INVALID, mmu->features->num_utlbs); - dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); + if (ret) + return ret; /* Map I/O memory and request IRQ. */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 3a38352b603f..50f57624610f 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -558,11 +558,6 @@ fail: return ret; } -static bool msm_iommu_capable(enum iommu_cap cap) -{ - return false; -} - static void print_ctx_regs(void __iomem *base, int ctx) { unsigned int fsr = GET_FSR(base, ctx); @@ -672,27 +667,28 @@ fail: } static struct iommu_ops msm_iommu_ops = { - .capable = msm_iommu_capable, .domain_alloc = msm_iommu_domain_alloc, - .domain_free = msm_iommu_domain_free, - .attach_dev = msm_iommu_attach_dev, - .detach_dev = msm_iommu_detach_dev, - .map = msm_iommu_map, - .unmap = msm_iommu_unmap, - /* - * Nothing is needed here, the barrier to guarantee - * completion of the tlb sync operation is implicitly - * taken care when the iommu client does a writel before - * kick starting the other master. - */ - .iotlb_sync = NULL, - .iotlb_sync_map = msm_iommu_sync_map, - .iova_to_phys = msm_iommu_iova_to_phys, .probe_device = msm_iommu_probe_device, .release_device = msm_iommu_release_device, .device_group = generic_device_group, .pgsize_bitmap = MSM_IOMMU_PGSIZES, .of_xlate = qcom_iommu_of_xlate, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = msm_iommu_attach_dev, + .detach_dev = msm_iommu_detach_dev, + .map = msm_iommu_map, + .unmap = msm_iommu_unmap, + /* + * Nothing is needed here, the barrier to guarantee + * completion of the tlb sync operation is implicitly + * taken care when the iommu client does a writel before + * kick starting the other master. + */ + .iotlb_sync = NULL, + .iotlb_sync_map = msm_iommu_sync_map, + .iova_to_phys = msm_iommu_iova_to_phys, + .free = msm_iommu_domain_free, + } }; static int msm_iommu_probe(struct platform_device *pdev) @@ -710,36 +706,32 @@ static int msm_iommu_probe(struct platform_device *pdev) INIT_LIST_HEAD(&iommu->ctx_list); iommu->pclk = devm_clk_get(iommu->dev, "smmu_pclk"); - if (IS_ERR(iommu->pclk)) { - dev_err(iommu->dev, "could not get smmu_pclk\n"); - return PTR_ERR(iommu->pclk); - } + if (IS_ERR(iommu->pclk)) + return dev_err_probe(iommu->dev, PTR_ERR(iommu->pclk), + "could not get smmu_pclk\n"); ret = clk_prepare(iommu->pclk); - if (ret) { - dev_err(iommu->dev, "could not prepare smmu_pclk\n"); - return ret; - } + if (ret) + return dev_err_probe(iommu->dev, ret, + "could not prepare smmu_pclk\n"); iommu->clk = devm_clk_get(iommu->dev, "iommu_clk"); if (IS_ERR(iommu->clk)) { - dev_err(iommu->dev, "could not get iommu_clk\n"); clk_unprepare(iommu->pclk); - return PTR_ERR(iommu->clk); + return dev_err_probe(iommu->dev, PTR_ERR(iommu->clk), + "could not get iommu_clk\n"); } ret = clk_prepare(iommu->clk); if (ret) { - dev_err(iommu->dev, "could not prepare iommu_clk\n"); clk_unprepare(iommu->pclk); - return ret; + return dev_err_probe(iommu->dev, ret, "could not prepare iommu_clk\n"); } r = platform_get_resource(pdev, IORESOURCE_MEM, 0); iommu->base = devm_ioremap_resource(iommu->dev, r); if (IS_ERR(iommu->base)) { - dev_err(iommu->dev, "could not get iommu base\n"); - ret = PTR_ERR(iommu->base); + ret = dev_err_probe(iommu->dev, PTR_ERR(iommu->base), "could not get iommu base\n"); goto fail; } ioaddr = r->start; @@ -831,16 +823,4 @@ static struct platform_driver msm_iommu_driver = { .probe = msm_iommu_probe, .remove = msm_iommu_remove, }; - -static int __init msm_iommu_driver_init(void) -{ - int ret; - - ret = platform_driver_register(&msm_iommu_driver); - if (ret != 0) - pr_err("Failed to register IOMMU driver\n"); - - return ret; -} -subsys_initcall(msm_iommu_driver_init); - +builtin_platform_driver(msm_iommu_driver); diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 77df61092be3..55c87d6f4a1f 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -210,33 +210,27 @@ static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom) static void mtk_iommu_tlb_flush_all(struct mtk_iommu_data *data) { - for_each_m4u(data) { - if (pm_runtime_get_if_in_use(data->dev) <= 0) - continue; + unsigned long flags; - writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, - data->base + data->plat_data->inv_sel_reg); - writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE); - wmb(); /* Make sure the tlb flush all done */ - - pm_runtime_put(data->dev); - } + spin_lock_irqsave(&data->tlb_lock, flags); + writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, + data->base + data->plat_data->inv_sel_reg); + writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE); + wmb(); /* Make sure the tlb flush all done */ + spin_unlock_irqrestore(&data->tlb_lock, flags); } static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, size_t granule, struct mtk_iommu_data *data) { - bool has_pm = !!data->dev->pm_domain; unsigned long flags; int ret; u32 tmp; for_each_m4u(data) { - if (has_pm) { - if (pm_runtime_get_if_in_use(data->dev) <= 0) - continue; - } + if (pm_runtime_get_if_in_use(data->dev) <= 0) + continue; spin_lock_irqsave(&data->tlb_lock, flags); writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, @@ -252,17 +246,18 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, /* tlb sync */ ret = readl_poll_timeout_atomic(data->base + REG_MMU_CPE_DONE, tmp, tmp != 0, 10, 1000); + + /* Clear the CPE status */ + writel_relaxed(0, data->base + REG_MMU_CPE_DONE); + spin_unlock_irqrestore(&data->tlb_lock, flags); + if (ret) { dev_warn(data->dev, "Partial TLB flush timed out, falling back to full flush\n"); mtk_iommu_tlb_flush_all(data); } - /* Clear the CPE status */ - writel_relaxed(0, data->base + REG_MMU_CPE_DONE); - spin_unlock_irqrestore(&data->tlb_lock, flags); - if (has_pm) - pm_runtime_put(data->dev); + pm_runtime_put(data->dev); } } @@ -688,15 +683,6 @@ static void mtk_iommu_get_resv_regions(struct device *dev, static const struct iommu_ops mtk_iommu_ops = { .domain_alloc = mtk_iommu_domain_alloc, - .domain_free = mtk_iommu_domain_free, - .attach_dev = mtk_iommu_attach_device, - .detach_dev = mtk_iommu_detach_device, - .map = mtk_iommu_map, - .unmap = mtk_iommu_unmap, - .flush_iotlb_all = mtk_iommu_flush_iotlb_all, - .iotlb_sync = mtk_iommu_iotlb_sync, - .iotlb_sync_map = mtk_iommu_sync_map, - .iova_to_phys = mtk_iommu_iova_to_phys, .probe_device = mtk_iommu_probe_device, .release_device = mtk_iommu_release_device, .device_group = mtk_iommu_device_group, @@ -705,6 +691,17 @@ static const struct iommu_ops mtk_iommu_ops = { .put_resv_regions = generic_iommu_put_resv_regions, .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, .owner = THIS_MODULE, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = mtk_iommu_attach_device, + .detach_dev = mtk_iommu_detach_device, + .map = mtk_iommu_map, + .unmap = mtk_iommu_unmap, + .flush_iotlb_all = mtk_iommu_flush_iotlb_all, + .iotlb_sync = mtk_iommu_iotlb_sync, + .iotlb_sync_map = mtk_iommu_sync_map, + .iova_to_phys = mtk_iommu_iova_to_phys, + .free = mtk_iommu_domain_free, + } }; static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) @@ -1014,6 +1011,13 @@ static int __maybe_unused mtk_iommu_runtime_resume(struct device *dev) writel_relaxed(reg->ivrp_paddr, base + REG_MMU_IVRP_PADDR); writel_relaxed(reg->vld_pa_rng, base + REG_MMU_VLD_PA_RNG); writel(m4u_dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK, base + REG_MMU_PT_BASE_ADDR); + + /* + * Users may allocate dma buffer before they call pm_runtime_get, + * in which case it will lack the necessary tlb flush. + * Thus, make sure to update the tlb after each PM resume. + */ + mtk_iommu_tlb_flush_all(data); return 0; } diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 4052aad75a81..eaba793fa261 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -550,12 +550,6 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) static const struct iommu_ops mtk_iommu_ops = { .domain_alloc = mtk_iommu_domain_alloc, - .domain_free = mtk_iommu_domain_free, - .attach_dev = mtk_iommu_attach_device, - .detach_dev = mtk_iommu_detach_device, - .map = mtk_iommu_map, - .unmap = mtk_iommu_unmap, - .iova_to_phys = mtk_iommu_iova_to_phys, .probe_device = mtk_iommu_probe_device, .probe_finalize = mtk_iommu_probe_finalize, .release_device = mtk_iommu_release_device, @@ -563,6 +557,14 @@ static const struct iommu_ops mtk_iommu_ops = { .device_group = generic_device_group, .pgsize_bitmap = ~0UL << MT2701_IOMMU_PAGE_SHIFT, .owner = THIS_MODULE, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = mtk_iommu_attach_device, + .detach_dev = mtk_iommu_detach_device, + .map = mtk_iommu_map, + .unmap = mtk_iommu_unmap, + .iova_to_phys = mtk_iommu_iova_to_phys, + .free = mtk_iommu_domain_free, + } }; static const struct of_device_id mtk_iommu_of_ids[] = { diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 980e4af3f06b..4aab631ef517 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1734,16 +1734,18 @@ static struct iommu_group *omap_iommu_device_group(struct device *dev) static const struct iommu_ops omap_iommu_ops = { .domain_alloc = omap_iommu_domain_alloc, - .domain_free = omap_iommu_domain_free, - .attach_dev = omap_iommu_attach_dev, - .detach_dev = omap_iommu_detach_dev, - .map = omap_iommu_map, - .unmap = omap_iommu_unmap, - .iova_to_phys = omap_iommu_iova_to_phys, .probe_device = omap_iommu_probe_device, .release_device = omap_iommu_release_device, .device_group = omap_iommu_device_group, .pgsize_bitmap = OMAP_IOMMU_PGSIZES, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = omap_iommu_attach_dev, + .detach_dev = omap_iommu_detach_dev, + .map = omap_iommu_map, + .unmap = omap_iommu_unmap, + .iova_to_phys = omap_iommu_iova_to_phys, + .free = omap_iommu_domain_free, + } }; static int __init omap_iommu_init(void) diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 7f23ad61c094..ab57c4b8fade 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -1187,17 +1187,19 @@ static int rk_iommu_of_xlate(struct device *dev, static const struct iommu_ops rk_iommu_ops = { .domain_alloc = rk_iommu_domain_alloc, - .domain_free = rk_iommu_domain_free, - .attach_dev = rk_iommu_attach_device, - .detach_dev = rk_iommu_detach_device, - .map = rk_iommu_map, - .unmap = rk_iommu_unmap, .probe_device = rk_iommu_probe_device, .release_device = rk_iommu_release_device, - .iova_to_phys = rk_iommu_iova_to_phys, .device_group = rk_iommu_device_group, .pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP, .of_xlate = rk_iommu_of_xlate, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = rk_iommu_attach_device, + .detach_dev = rk_iommu_detach_device, + .map = rk_iommu_map, + .unmap = rk_iommu_unmap, + .iova_to_phys = rk_iommu_iova_to_phys, + .free = rk_iommu_domain_free, + } }; static int rk_iommu_probe(struct platform_device *pdev) @@ -1407,9 +1409,4 @@ static struct platform_driver rk_iommu_driver = { .suppress_bind_attrs = true, }, }; - -static int __init rk_iommu_init(void) -{ - return platform_driver_register(&rk_iommu_driver); -} -subsys_initcall(rk_iommu_init); +builtin_platform_driver(rk_iommu_driver); diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 50860ebdd087..3833e86c6e7b 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -363,16 +363,18 @@ void zpci_destroy_iommu(struct zpci_dev *zdev) static const struct iommu_ops s390_iommu_ops = { .capable = s390_iommu_capable, .domain_alloc = s390_domain_alloc, - .domain_free = s390_domain_free, - .attach_dev = s390_iommu_attach_device, - .detach_dev = s390_iommu_detach_device, - .map = s390_iommu_map, - .unmap = s390_iommu_unmap, - .iova_to_phys = s390_iommu_iova_to_phys, .probe_device = s390_iommu_probe_device, .release_device = s390_iommu_release_device, .device_group = generic_device_group, .pgsize_bitmap = S390_IOMMU_PGSIZES, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = s390_iommu_attach_device, + .detach_dev = s390_iommu_detach_device, + .map = s390_iommu_map, + .unmap = s390_iommu_unmap, + .iova_to_phys = s390_iommu_iova_to_phys, + .free = s390_domain_free, + } }; static int __init s390_iommu_init(void) diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index 27ac818b0354..bd409bab6286 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -416,20 +416,22 @@ static int sprd_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) static const struct iommu_ops sprd_iommu_ops = { .domain_alloc = sprd_iommu_domain_alloc, - .domain_free = sprd_iommu_domain_free, - .attach_dev = sprd_iommu_attach_device, - .detach_dev = sprd_iommu_detach_device, - .map = sprd_iommu_map, - .unmap = sprd_iommu_unmap, - .iotlb_sync_map = sprd_iommu_sync_map, - .iotlb_sync = sprd_iommu_sync, - .iova_to_phys = sprd_iommu_iova_to_phys, .probe_device = sprd_iommu_probe_device, .release_device = sprd_iommu_release_device, .device_group = sprd_iommu_device_group, .of_xlate = sprd_iommu_of_xlate, .pgsize_bitmap = ~0UL << SPRD_IOMMU_PAGE_SHIFT, .owner = THIS_MODULE, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = sprd_iommu_attach_device, + .detach_dev = sprd_iommu_detach_device, + .map = sprd_iommu_map, + .unmap = sprd_iommu_unmap, + .iotlb_sync_map = sprd_iommu_sync_map, + .iotlb_sync = sprd_iommu_sync, + .iova_to_phys = sprd_iommu_iova_to_phys, + .free = sprd_iommu_domain_free, + } }; static const struct of_device_id sprd_iommu_of_match[] = { diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index 92997021e188..c54ab477b8fd 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -760,19 +760,21 @@ static int sun50i_iommu_of_xlate(struct device *dev, static const struct iommu_ops sun50i_iommu_ops = { .pgsize_bitmap = SZ_4K, - .attach_dev = sun50i_iommu_attach_device, - .detach_dev = sun50i_iommu_detach_device, .device_group = sun50i_iommu_device_group, .domain_alloc = sun50i_iommu_domain_alloc, - .domain_free = sun50i_iommu_domain_free, - .flush_iotlb_all = sun50i_iommu_flush_iotlb_all, - .iotlb_sync = sun50i_iommu_iotlb_sync, - .iova_to_phys = sun50i_iommu_iova_to_phys, - .map = sun50i_iommu_map, .of_xlate = sun50i_iommu_of_xlate, .probe_device = sun50i_iommu_probe_device, .release_device = sun50i_iommu_release_device, - .unmap = sun50i_iommu_unmap, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = sun50i_iommu_attach_device, + .detach_dev = sun50i_iommu_detach_device, + .flush_iotlb_all = sun50i_iommu_flush_iotlb_all, + .iotlb_sync = sun50i_iommu_iotlb_sync, + .iova_to_phys = sun50i_iommu_iova_to_phys, + .map = sun50i_iommu_map, + .unmap = sun50i_iommu_unmap, + .free = sun50i_iommu_domain_free, + } }; static void sun50i_iommu_report_fault(struct sun50i_iommu *iommu, diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index 6a358f92c7e5..a6700a40a6f8 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -238,11 +238,6 @@ static phys_addr_t gart_iommu_iova_to_phys(struct iommu_domain *domain, return pte & GART_PAGE_MASK; } -static bool gart_iommu_capable(enum iommu_cap cap) -{ - return false; -} - static struct iommu_device *gart_iommu_probe_device(struct device *dev) { if (!dev_iommu_fwspec_get(dev)) @@ -276,21 +271,22 @@ static void gart_iommu_sync(struct iommu_domain *domain, } static const struct iommu_ops gart_iommu_ops = { - .capable = gart_iommu_capable, .domain_alloc = gart_iommu_domain_alloc, - .domain_free = gart_iommu_domain_free, - .attach_dev = gart_iommu_attach_dev, - .detach_dev = gart_iommu_detach_dev, .probe_device = gart_iommu_probe_device, .release_device = gart_iommu_release_device, .device_group = generic_device_group, - .map = gart_iommu_map, - .unmap = gart_iommu_unmap, - .iova_to_phys = gart_iommu_iova_to_phys, .pgsize_bitmap = GART_IOMMU_PGSIZES, .of_xlate = gart_iommu_of_xlate, - .iotlb_sync_map = gart_iommu_sync_map, - .iotlb_sync = gart_iommu_sync, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = gart_iommu_attach_dev, + .detach_dev = gart_iommu_detach_dev, + .map = gart_iommu_map, + .unmap = gart_iommu_unmap, + .iova_to_phys = gart_iommu_iova_to_phys, + .iotlb_sync_map = gart_iommu_sync_map, + .iotlb_sync = gart_iommu_sync, + .free = gart_iommu_domain_free, + } }; int tegra_gart_suspend(struct gart_device *gart) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 2561ce8a2ce8..1fea68e551f1 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -272,11 +272,6 @@ static void tegra_smmu_free_asid(struct tegra_smmu *smmu, unsigned int id) clear_bit(id, smmu->asids); } -static bool tegra_smmu_capable(enum iommu_cap cap) -{ - return false; -} - static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type) { struct tegra_smmu_as *as; @@ -969,19 +964,20 @@ static int tegra_smmu_of_xlate(struct device *dev, } static const struct iommu_ops tegra_smmu_ops = { - .capable = tegra_smmu_capable, .domain_alloc = tegra_smmu_domain_alloc, - .domain_free = tegra_smmu_domain_free, - .attach_dev = tegra_smmu_attach_dev, - .detach_dev = tegra_smmu_detach_dev, .probe_device = tegra_smmu_probe_device, .release_device = tegra_smmu_release_device, .device_group = tegra_smmu_device_group, - .map = tegra_smmu_map, - .unmap = tegra_smmu_unmap, - .iova_to_phys = tegra_smmu_iova_to_phys, .of_xlate = tegra_smmu_of_xlate, .pgsize_bitmap = SZ_4K, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = tegra_smmu_attach_dev, + .detach_dev = tegra_smmu_detach_dev, + .map = tegra_smmu_map, + .unmap = tegra_smmu_unmap, + .iova_to_phys = tegra_smmu_iova_to_phys, + .free = tegra_smmu_domain_free, + } }; static void tegra_smmu_ahb_enable(void) diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index f2aa34f57454..25be4b822aa0 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -1008,12 +1008,6 @@ static int viommu_of_xlate(struct device *dev, struct of_phandle_args *args) static struct iommu_ops viommu_ops = { .domain_alloc = viommu_domain_alloc, - .domain_free = viommu_domain_free, - .attach_dev = viommu_attach_dev, - .map = viommu_map, - .unmap = viommu_unmap, - .iova_to_phys = viommu_iova_to_phys, - .iotlb_sync = viommu_iotlb_sync, .probe_device = viommu_probe_device, .probe_finalize = viommu_probe_finalize, .release_device = viommu_release_device, @@ -1022,6 +1016,14 @@ static struct iommu_ops viommu_ops = { .put_resv_regions = generic_iommu_put_resv_regions, .of_xlate = viommu_of_xlate, .owner = THIS_MODULE, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = viommu_attach_dev, + .map = viommu_map, + .unmap = viommu_unmap, + .iova_to_phys = viommu_iova_to_phys, + .iotlb_sync = viommu_iotlb_sync, + .free = viommu_domain_free, + } }; static int viommu_init_vqs(struct viommu_dev *viommu) diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c index c1fdf2896021..1943a007e2d5 100644 --- a/drivers/macintosh/macio_asic.c +++ b/drivers/macintosh/macio_asic.c @@ -756,7 +756,7 @@ MODULE_DEVICE_TABLE (pci, pci_ids); /* pci driver glue; this is a "new style" PCI driver module */ static struct pci_driver macio_pci_driver = { - .name = (char *) "macio", + .name = "macio", .id_table = pci_ids, .probe = macio_pci_probe, diff --git a/drivers/macintosh/via-cuda.c b/drivers/macintosh/via-cuda.c index cd267392289c..3d0d0b9d471d 100644 --- a/drivers/macintosh/via-cuda.c +++ b/drivers/macintosh/via-cuda.c @@ -21,6 +21,7 @@ #ifdef CONFIG_PPC #include #include +#include #else #include #include diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c index f69b964b9952..e2228b6fc09b 100644 --- a/drivers/misc/lkdtm/core.c +++ b/drivers/misc/lkdtm/core.c @@ -149,6 +149,7 @@ static const struct crashtype crashtypes[] = { CRASHTYPE(WRITE_RO), CRASHTYPE(WRITE_RO_AFTER_INIT), CRASHTYPE(WRITE_KERN), + CRASHTYPE(WRITE_OPD), CRASHTYPE(REFCOUNT_INC_OVERFLOW), CRASHTYPE(REFCOUNT_ADD_OVERFLOW), CRASHTYPE(REFCOUNT_INC_NOT_ZERO_OVERFLOW), diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h index d6137c70ebbe..305fc2ec3f25 100644 --- a/drivers/misc/lkdtm/lkdtm.h +++ b/drivers/misc/lkdtm/lkdtm.h @@ -106,6 +106,7 @@ void __init lkdtm_perms_init(void); void lkdtm_WRITE_RO(void); void lkdtm_WRITE_RO_AFTER_INIT(void); void lkdtm_WRITE_KERN(void); +void lkdtm_WRITE_OPD(void); void lkdtm_EXEC_DATA(void); void lkdtm_EXEC_STACK(void); void lkdtm_EXEC_KMALLOC(void); diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c index 2dede2ef658f..2c6aba3ff32b 100644 --- a/drivers/misc/lkdtm/perms.c +++ b/drivers/misc/lkdtm/perms.c @@ -10,6 +10,7 @@ #include #include #include +#include /* Whether or not to fill the target memory area with do_nothing(). */ #define CODE_WRITE true @@ -21,7 +22,7 @@ /* This is non-const, so it will end up in the .data section. */ static u8 data_area[EXEC_SIZE]; -/* This is cost, so it will end up in the .rodata section. */ +/* This is const, so it will end up in the .rodata section. */ static const unsigned long rodata = 0xAA55AA55; /* This is marked __ro_after_init, so it should ultimately be .rodata. */ @@ -31,31 +32,51 @@ static unsigned long ro_after_init __ro_after_init = 0x55AA5500; * This just returns to the caller. It is designed to be copied into * non-executable memory regions. */ -static void do_nothing(void) +static noinline void do_nothing(void) { return; } /* Must immediately follow do_nothing for size calculuations to work out. */ -static void do_overwritten(void) +static noinline void do_overwritten(void) { pr_info("do_overwritten wasn't overwritten!\n"); return; } +static noinline void do_almost_nothing(void) +{ + pr_info("do_nothing was hijacked!\n"); +} + +static void *setup_function_descriptor(func_desc_t *fdesc, void *dst) +{ + if (!have_function_descriptors()) + return dst; + + memcpy(fdesc, do_nothing, sizeof(*fdesc)); + fdesc->addr = (unsigned long)dst; + barrier(); + + return fdesc; +} + static noinline void execute_location(void *dst, bool write) { - void (*func)(void) = dst; + void (*func)(void); + func_desc_t fdesc; + void *do_nothing_text = dereference_function_descriptor(do_nothing); - pr_info("attempting ok execution at %px\n", do_nothing); + pr_info("attempting ok execution at %px\n", do_nothing_text); do_nothing(); if (write == CODE_WRITE) { - memcpy(dst, do_nothing, EXEC_SIZE); + memcpy(dst, do_nothing_text, EXEC_SIZE); flush_icache_range((unsigned long)dst, (unsigned long)dst + EXEC_SIZE); } - pr_info("attempting bad execution at %px\n", func); + pr_info("attempting bad execution at %px\n", dst); + func = setup_function_descriptor(&fdesc, dst); func(); pr_err("FAIL: func returned\n"); } @@ -65,16 +86,19 @@ static void execute_user_location(void *dst) int copied; /* Intentionally crossing kernel/user memory boundary. */ - void (*func)(void) = dst; + void (*func)(void); + func_desc_t fdesc; + void *do_nothing_text = dereference_function_descriptor(do_nothing); - pr_info("attempting ok execution at %px\n", do_nothing); + pr_info("attempting ok execution at %px\n", do_nothing_text); do_nothing(); - copied = access_process_vm(current, (unsigned long)dst, do_nothing, + copied = access_process_vm(current, (unsigned long)dst, do_nothing_text, EXEC_SIZE, FOLL_WRITE); if (copied < EXEC_SIZE) return; - pr_info("attempting bad execution at %px\n", func); + pr_info("attempting bad execution at %px\n", dst); + func = setup_function_descriptor(&fdesc, dst); func(); pr_err("FAIL: func returned\n"); } @@ -113,8 +137,9 @@ void lkdtm_WRITE_KERN(void) size_t size; volatile unsigned char *ptr; - size = (unsigned long)do_overwritten - (unsigned long)do_nothing; - ptr = (unsigned char *)do_overwritten; + size = (unsigned long)dereference_function_descriptor(do_overwritten) - + (unsigned long)dereference_function_descriptor(do_nothing); + ptr = dereference_function_descriptor(do_overwritten); pr_info("attempting bad %zu byte write at %px\n", size, ptr); memcpy((void *)ptr, (unsigned char *)do_nothing, size); @@ -124,6 +149,23 @@ void lkdtm_WRITE_KERN(void) do_overwritten(); } +void lkdtm_WRITE_OPD(void) +{ + size_t size = sizeof(func_desc_t); + void (*func)(void) = do_nothing; + + if (!have_function_descriptors()) { + pr_info("XFAIL: Platform doesn't use function descriptors.\n"); + return; + } + pr_info("attempting bad %zu bytes write at %px\n", size, do_nothing); + memcpy(do_nothing, do_almost_nothing, size); + pr_err("FAIL: survived bad write\n"); + + asm("" : "=m"(func)); + func(); +} + void lkdtm_EXEC_DATA(void) { execute_location(data_area, CODE_WRITE); @@ -151,7 +193,8 @@ void lkdtm_EXEC_VMALLOC(void) void lkdtm_EXEC_RODATA(void) { - execute_location(lkdtm_rodata_do_nothing, CODE_AS_IS); + execute_location(dereference_function_descriptor(lkdtm_rodata_do_nothing), + CODE_AS_IS); } void lkdtm_EXEC_USERSPACE(void) diff --git a/drivers/pci/hotplug/rpadlpar_core.c b/drivers/pci/hotplug/rpadlpar_core.c index 0a3c80ba66be..e6991ff67526 100644 --- a/drivers/pci/hotplug/rpadlpar_core.c +++ b/drivers/pci/hotplug/rpadlpar_core.c @@ -478,3 +478,4 @@ static void __exit rpadlpar_io_exit(void) module_init(rpadlpar_io_init); module_exit(rpadlpar_io_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("RPA Dynamic Logical Partitioning driver for I/O slots"); diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index d05ca6ebbb9d..afdcb91601d2 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -56,6 +56,36 @@ config ARM_PMU Say y if you want to use CPU performance monitors on ARM-based systems. +config RISCV_PMU + depends on RISCV + bool "RISC-V PMU framework" + default y + help + Say y if you want to use CPU performance monitors on RISCV-based + systems. This provides the core PMU framework that abstracts common + PMU functionalities in a core library so that different PMU drivers + can reuse it. + +config RISCV_PMU_LEGACY + depends on RISCV_PMU + bool "RISC-V legacy PMU implementation" + default y + help + Say y if you want to use the legacy CPU performance monitor + implementation on RISC-V based systems. This only allows counting + of cycle/instruction counter and doesn't support counter overflow, + or programmable counters. It will be removed in future. + +config RISCV_PMU_SBI + depends on RISCV_PMU && RISCV_SBI + bool "RISC-V PMU based on SBI PMU extension" + default y + help + Say y if you want to use the CPU performance monitor + using SBI PMU extension on RISC-V based systems. This option provides + full perf feature support i.e. counter overflow, privilege mode + filtering, counter configuration. + config ARM_PMU_ACPI depends on ARM_PMU && ACPI def_bool y diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index 4f43080ec54e..57a279c61df5 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -10,6 +10,9 @@ obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o obj-$(CONFIG_HISI_PMU) += hisilicon/ obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o +obj-$(CONFIG_RISCV_PMU) += riscv_pmu.o +obj-$(CONFIG_RISCV_PMU_LEGACY) += riscv_pmu_legacy.o +obj-$(CONFIG_RISCV_PMU_SBI) += riscv_pmu_sbi.o obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c index c49108a72865..00d4c45a8017 100644 --- a/drivers/perf/arm_smmuv3_pmu.c +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -654,6 +654,7 @@ static int smmu_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) static irqreturn_t smmu_pmu_handle_irq(int irq_num, void *data) { struct smmu_pmu *smmu_pmu = data; + DECLARE_BITMAP(ovs, BITS_PER_TYPE(u64)); u64 ovsr; unsigned int idx; @@ -663,7 +664,8 @@ static irqreturn_t smmu_pmu_handle_irq(int irq_num, void *data) writeq(ovsr, smmu_pmu->reloc_base + SMMU_PMCG_OVSCLR0); - for_each_set_bit(idx, (unsigned long *)&ovsr, smmu_pmu->num_counters) { + bitmap_from_u64(ovs, ovsr); + for_each_set_bit(idx, ovs, smmu_pmu->num_counters) { struct perf_event *event = smmu_pmu->events[idx]; struct hw_perf_event *hwc; diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c new file mode 100644 index 000000000000..b2b8d2074ed0 --- /dev/null +++ b/drivers/perf/riscv_pmu.c @@ -0,0 +1,324 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * RISC-V performance counter support. + * + * Copyright (C) 2021 Western Digital Corporation or its affiliates. + * + * This implementation is based on old RISC-V perf and ARM perf event code + * which are in turn based on sparc64 and x86 code. + */ + +#include +#include +#include +#include +#include +#include + +#include + +static unsigned long csr_read_num(int csr_num) +{ +#define switchcase_csr_read(__csr_num, __val) {\ + case __csr_num: \ + __val = csr_read(__csr_num); \ + break; } +#define switchcase_csr_read_2(__csr_num, __val) {\ + switchcase_csr_read(__csr_num + 0, __val) \ + switchcase_csr_read(__csr_num + 1, __val)} +#define switchcase_csr_read_4(__csr_num, __val) {\ + switchcase_csr_read_2(__csr_num + 0, __val) \ + switchcase_csr_read_2(__csr_num + 2, __val)} +#define switchcase_csr_read_8(__csr_num, __val) {\ + switchcase_csr_read_4(__csr_num + 0, __val) \ + switchcase_csr_read_4(__csr_num + 4, __val)} +#define switchcase_csr_read_16(__csr_num, __val) {\ + switchcase_csr_read_8(__csr_num + 0, __val) \ + switchcase_csr_read_8(__csr_num + 8, __val)} +#define switchcase_csr_read_32(__csr_num, __val) {\ + switchcase_csr_read_16(__csr_num + 0, __val) \ + switchcase_csr_read_16(__csr_num + 16, __val)} + + unsigned long ret = 0; + + switch (csr_num) { + switchcase_csr_read_32(CSR_CYCLE, ret) + switchcase_csr_read_32(CSR_CYCLEH, ret) + default : + break; + } + + return ret; +#undef switchcase_csr_read_32 +#undef switchcase_csr_read_16 +#undef switchcase_csr_read_8 +#undef switchcase_csr_read_4 +#undef switchcase_csr_read_2 +#undef switchcase_csr_read +} + +/* + * Read the CSR of a corresponding counter. + */ +unsigned long riscv_pmu_ctr_read_csr(unsigned long csr) +{ + if (csr < CSR_CYCLE || csr > CSR_HPMCOUNTER31H || + (csr > CSR_HPMCOUNTER31 && csr < CSR_CYCLEH)) { + pr_err("Invalid performance counter csr %lx\n", csr); + return -EINVAL; + } + + return csr_read_num(csr); +} + +u64 riscv_pmu_ctr_get_width_mask(struct perf_event *event) +{ + int cwidth; + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + if (!rvpmu->ctr_get_width) + /** + * If the pmu driver doesn't support counter width, set it to default + * maximum allowed by the specification. + */ + cwidth = 63; + else { + if (hwc->idx == -1) + /* Handle init case where idx is not initialized yet */ + cwidth = rvpmu->ctr_get_width(0); + else + cwidth = rvpmu->ctr_get_width(hwc->idx); + } + + return GENMASK_ULL(cwidth, 0); +} + +u64 riscv_pmu_event_update(struct perf_event *event) +{ + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + u64 prev_raw_count, new_raw_count; + unsigned long cmask; + u64 oldval, delta; + + if (!rvpmu->ctr_read) + return 0; + + cmask = riscv_pmu_ctr_get_width_mask(event); + + do { + prev_raw_count = local64_read(&hwc->prev_count); + new_raw_count = rvpmu->ctr_read(event); + oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count); + } while (oldval != prev_raw_count); + + delta = (new_raw_count - prev_raw_count) & cmask; + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); + + return delta; +} + +static void riscv_pmu_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); + + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + + if (!(hwc->state & PERF_HES_STOPPED)) { + if (rvpmu->ctr_stop) { + rvpmu->ctr_stop(event, 0); + hwc->state |= PERF_HES_STOPPED; + } + riscv_pmu_event_update(event); + hwc->state |= PERF_HES_UPTODATE; + } +} + +int riscv_pmu_event_set_period(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + s64 left = local64_read(&hwc->period_left); + s64 period = hwc->sample_period; + int overflow = 0; + uint64_t max_period = riscv_pmu_ctr_get_width_mask(event); + + if (unlikely(left <= -period)) { + left = period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + overflow = 1; + } + + if (unlikely(left <= 0)) { + left += period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + overflow = 1; + } + + /* + * Limit the maximum period to prevent the counter value + * from overtaking the one we are about to program. In + * effect we are reducing max_period to account for + * interrupt latency (and we are being very conservative). + */ + if (left > (max_period >> 1)) + left = (max_period >> 1); + + local64_set(&hwc->prev_count, (u64)-left); + perf_event_update_userpage(event); + + return overflow; +} + +static void riscv_pmu_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); + uint64_t max_period = riscv_pmu_ctr_get_width_mask(event); + u64 init_val; + + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + + hwc->state = 0; + riscv_pmu_event_set_period(event); + init_val = local64_read(&hwc->prev_count) & max_period; + rvpmu->ctr_start(event, init_val); + perf_event_update_userpage(event); +} + +static int riscv_pmu_add(struct perf_event *event, int flags) +{ + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); + struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx; + + idx = rvpmu->ctr_get_idx(event); + if (idx < 0) + return idx; + + hwc->idx = idx; + cpuc->events[idx] = event; + cpuc->n_events++; + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (flags & PERF_EF_START) + riscv_pmu_start(event, PERF_EF_RELOAD); + + /* Propagate our changes to the userspace mapping. */ + perf_event_update_userpage(event); + + return 0; +} + +static void riscv_pmu_del(struct perf_event *event, int flags) +{ + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); + struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); + struct hw_perf_event *hwc = &event->hw; + + riscv_pmu_stop(event, PERF_EF_UPDATE); + cpuc->events[hwc->idx] = NULL; + /* The firmware need to reset the counter mapping */ + if (rvpmu->ctr_stop) + rvpmu->ctr_stop(event, RISCV_PMU_STOP_FLAG_RESET); + cpuc->n_events--; + if (rvpmu->ctr_clear_idx) + rvpmu->ctr_clear_idx(event); + perf_event_update_userpage(event); + hwc->idx = -1; +} + +static void riscv_pmu_read(struct perf_event *event) +{ + riscv_pmu_event_update(event); +} + +static int riscv_pmu_event_init(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); + int mapped_event; + u64 event_config = 0; + uint64_t cmask; + + hwc->flags = 0; + mapped_event = rvpmu->event_map(event, &event_config); + if (mapped_event < 0) { + pr_debug("event %x:%llx not supported\n", event->attr.type, + event->attr.config); + return mapped_event; + } + + /* + * idx is set to -1 because the index of a general event should not be + * decided until binding to some counter in pmu->add(). + * config will contain the information about counter CSR + * the idx will contain the counter index + */ + hwc->config = event_config; + hwc->idx = -1; + hwc->event_base = mapped_event; + + if (!is_sampling_event(event)) { + /* + * For non-sampling runs, limit the sample_period to half + * of the counter width. That way, the new counter value + * is far less likely to overtake the previous one unless + * you have some serious IRQ latency issues. + */ + cmask = riscv_pmu_ctr_get_width_mask(event); + hwc->sample_period = cmask >> 1; + hwc->last_period = hwc->sample_period; + local64_set(&hwc->period_left, hwc->sample_period); + } + + return 0; +} + +struct riscv_pmu *riscv_pmu_alloc(void) +{ + struct riscv_pmu *pmu; + int cpuid, i; + struct cpu_hw_events *cpuc; + + pmu = kzalloc(sizeof(*pmu), GFP_KERNEL); + if (!pmu) + goto out; + + pmu->hw_events = alloc_percpu_gfp(struct cpu_hw_events, GFP_KERNEL); + if (!pmu->hw_events) { + pr_info("failed to allocate per-cpu PMU data.\n"); + goto out_free_pmu; + } + + for_each_possible_cpu(cpuid) { + cpuc = per_cpu_ptr(pmu->hw_events, cpuid); + cpuc->n_events = 0; + for (i = 0; i < RISCV_MAX_COUNTERS; i++) + cpuc->events[i] = NULL; + } + pmu->pmu = (struct pmu) { + .event_init = riscv_pmu_event_init, + .add = riscv_pmu_add, + .del = riscv_pmu_del, + .start = riscv_pmu_start, + .stop = riscv_pmu_stop, + .read = riscv_pmu_read, + }; + + return pmu; + +out_free_pmu: + kfree(pmu); +out: + return NULL; +} diff --git a/drivers/perf/riscv_pmu_legacy.c b/drivers/perf/riscv_pmu_legacy.c new file mode 100644 index 000000000000..342778782359 --- /dev/null +++ b/drivers/perf/riscv_pmu_legacy.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * RISC-V performance counter support. + * + * Copyright (C) 2021 Western Digital Corporation or its affiliates. + * + * This implementation is based on old RISC-V perf and ARM perf event code + * which are in turn based on sparc64 and x86 code. + */ + +#include +#include +#include + +#define RISCV_PMU_LEGACY_CYCLE 0 +#define RISCV_PMU_LEGACY_INSTRET 1 +#define RISCV_PMU_LEGACY_NUM_CTR 2 + +static bool pmu_init_done; + +static int pmu_legacy_ctr_get_idx(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + + if (event->attr.type != PERF_TYPE_HARDWARE) + return -EOPNOTSUPP; + if (attr->config == PERF_COUNT_HW_CPU_CYCLES) + return RISCV_PMU_LEGACY_CYCLE; + else if (attr->config == PERF_COUNT_HW_INSTRUCTIONS) + return RISCV_PMU_LEGACY_INSTRET; + else + return -EOPNOTSUPP; +} + +/* For legacy config & counter index are same */ +static int pmu_legacy_event_map(struct perf_event *event, u64 *config) +{ + return pmu_legacy_ctr_get_idx(event); +} + +static u64 pmu_legacy_read_ctr(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + u64 val; + + if (idx == RISCV_PMU_LEGACY_CYCLE) { + val = riscv_pmu_ctr_read_csr(CSR_CYCLE); + if (IS_ENABLED(CONFIG_32BIT)) + val = (u64)riscv_pmu_ctr_read_csr(CSR_CYCLEH) << 32 | val; + } else if (idx == RISCV_PMU_LEGACY_INSTRET) { + val = riscv_pmu_ctr_read_csr(CSR_INSTRET); + if (IS_ENABLED(CONFIG_32BIT)) + val = ((u64)riscv_pmu_ctr_read_csr(CSR_INSTRETH)) << 32 | val; + } else + return 0; + + return val; +} + +static void pmu_legacy_ctr_start(struct perf_event *event, u64 ival) +{ + struct hw_perf_event *hwc = &event->hw; + u64 initial_val = pmu_legacy_read_ctr(event); + + /** + * The legacy method doesn't really have a start/stop method. + * It also can not update the counter with a initial value. + * But we still need to set the prev_count so that read() can compute + * the delta. Just use the current counter value to set the prev_count. + */ + local64_set(&hwc->prev_count, initial_val); +} + +/** + * This is just a simple implementation to allow legacy implementations + * compatible with new RISC-V PMU driver framework. + * This driver only allows reading two counters i.e CYCLE & INSTRET. + * However, it can not start or stop the counter. Thus, it is not very useful + * will be removed in future. + */ +static void pmu_legacy_init(struct riscv_pmu *pmu) +{ + pr_info("Legacy PMU implementation is available\n"); + + pmu->num_counters = RISCV_PMU_LEGACY_NUM_CTR; + pmu->ctr_start = pmu_legacy_ctr_start; + pmu->ctr_stop = NULL; + pmu->event_map = pmu_legacy_event_map; + pmu->ctr_get_idx = pmu_legacy_ctr_get_idx; + pmu->ctr_get_width = NULL; + pmu->ctr_clear_idx = NULL; + pmu->ctr_read = pmu_legacy_read_ctr; + + perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW); +} + +static int pmu_legacy_device_probe(struct platform_device *pdev) +{ + struct riscv_pmu *pmu = NULL; + + pmu = riscv_pmu_alloc(); + if (!pmu) + return -ENOMEM; + pmu_legacy_init(pmu); + + return 0; +} + +static struct platform_driver pmu_legacy_driver = { + .probe = pmu_legacy_device_probe, + .driver = { + .name = RISCV_PMU_LEGACY_PDEV_NAME, + }, +}; + +static int __init riscv_pmu_legacy_devinit(void) +{ + int ret; + struct platform_device *pdev; + + if (likely(pmu_init_done)) + return 0; + + ret = platform_driver_register(&pmu_legacy_driver); + if (ret) + return ret; + + pdev = platform_device_register_simple(RISCV_PMU_LEGACY_PDEV_NAME, -1, NULL, 0); + if (IS_ERR(pdev)) { + platform_driver_unregister(&pmu_legacy_driver); + return PTR_ERR(pdev); + } + + return ret; +} +late_initcall(riscv_pmu_legacy_devinit); + +void riscv_pmu_legacy_skip_init(void) +{ + pmu_init_done = true; +} diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c new file mode 100644 index 000000000000..a1317a483512 --- /dev/null +++ b/drivers/perf/riscv_pmu_sbi.c @@ -0,0 +1,790 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * RISC-V performance counter support. + * + * Copyright (C) 2021 Western Digital Corporation or its affiliates. + * + * This code is based on ARM perf event code which is in turn based on + * sparc64 and x86 code. + */ + +#define pr_fmt(fmt) "riscv-pmu-sbi: " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +union sbi_pmu_ctr_info { + unsigned long value; + struct { + unsigned long csr:12; + unsigned long width:6; +#if __riscv_xlen == 32 + unsigned long reserved:13; +#else + unsigned long reserved:45; +#endif + unsigned long type:1; + }; +}; + +/** + * RISC-V doesn't have hetergenous harts yet. This need to be part of + * per_cpu in case of harts with different pmu counters + */ +static union sbi_pmu_ctr_info *pmu_ctr_list; +static unsigned int riscv_pmu_irq; + +struct sbi_pmu_event_data { + union { + union { + struct hw_gen_event { + uint32_t event_code:16; + uint32_t event_type:4; + uint32_t reserved:12; + } hw_gen_event; + struct hw_cache_event { + uint32_t result_id:1; + uint32_t op_id:2; + uint32_t cache_id:13; + uint32_t event_type:4; + uint32_t reserved:12; + } hw_cache_event; + }; + uint32_t event_idx; + }; +}; + +static const struct sbi_pmu_event_data pmu_hw_event_map[] = { + [PERF_COUNT_HW_CPU_CYCLES] = {.hw_gen_event = { + SBI_PMU_HW_CPU_CYCLES, + SBI_PMU_EVENT_TYPE_HW, 0}}, + [PERF_COUNT_HW_INSTRUCTIONS] = {.hw_gen_event = { + SBI_PMU_HW_INSTRUCTIONS, + SBI_PMU_EVENT_TYPE_HW, 0}}, + [PERF_COUNT_HW_CACHE_REFERENCES] = {.hw_gen_event = { + SBI_PMU_HW_CACHE_REFERENCES, + SBI_PMU_EVENT_TYPE_HW, 0}}, + [PERF_COUNT_HW_CACHE_MISSES] = {.hw_gen_event = { + SBI_PMU_HW_CACHE_MISSES, + SBI_PMU_EVENT_TYPE_HW, 0}}, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = {.hw_gen_event = { + SBI_PMU_HW_BRANCH_INSTRUCTIONS, + SBI_PMU_EVENT_TYPE_HW, 0}}, + [PERF_COUNT_HW_BRANCH_MISSES] = {.hw_gen_event = { + SBI_PMU_HW_BRANCH_MISSES, + SBI_PMU_EVENT_TYPE_HW, 0}}, + [PERF_COUNT_HW_BUS_CYCLES] = {.hw_gen_event = { + SBI_PMU_HW_BUS_CYCLES, + SBI_PMU_EVENT_TYPE_HW, 0}}, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = {.hw_gen_event = { + SBI_PMU_HW_STALLED_CYCLES_FRONTEND, + SBI_PMU_EVENT_TYPE_HW, 0}}, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = {.hw_gen_event = { + SBI_PMU_HW_STALLED_CYCLES_BACKEND, + SBI_PMU_EVENT_TYPE_HW, 0}}, + [PERF_COUNT_HW_REF_CPU_CYCLES] = {.hw_gen_event = { + SBI_PMU_HW_REF_CPU_CYCLES, + SBI_PMU_EVENT_TYPE_HW, 0}}, +}; + +#define C(x) PERF_COUNT_HW_CACHE_##x +static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX] +[PERF_COUNT_HW_CACHE_OP_MAX] +[PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_READ), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_READ), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_WRITE), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_WRITE), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_PREFETCH), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_PREFETCH), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_READ), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), C(OP_READ), + C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_WRITE), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_WRITE), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_PREFETCH), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_PREFETCH), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_READ), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_READ), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_WRITE), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_WRITE), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_PREFETCH), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_PREFETCH), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_READ), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_READ), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_WRITE), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_WRITE), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_PREFETCH), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_PREFETCH), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_READ), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_READ), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_WRITE), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_WRITE), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_PREFETCH), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_PREFETCH), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_READ), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_READ), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_WRITE), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_WRITE), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_PREFETCH), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_PREFETCH), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + }, + [C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_READ), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_READ), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_WRITE), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_WRITE), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), + C(OP_PREFETCH), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), + C(OP_PREFETCH), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}}, + }, + }, +}; + +static int pmu_sbi_ctr_get_width(int idx) +{ + return pmu_ctr_list[idx].width; +} + +static bool pmu_sbi_ctr_is_fw(int cidx) +{ + union sbi_pmu_ctr_info *info; + + info = &pmu_ctr_list[cidx]; + if (!info) + return false; + + return (info->type == SBI_PMU_CTR_TYPE_FW) ? true : false; +} + +static int pmu_sbi_ctr_get_idx(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); + struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); + struct sbiret ret; + int idx; + uint64_t cbase = 0; + uint64_t cmask = GENMASK_ULL(rvpmu->num_counters - 1, 0); + unsigned long cflags = 0; + + if (event->attr.exclude_kernel) + cflags |= SBI_PMU_CFG_FLAG_SET_SINH; + if (event->attr.exclude_user) + cflags |= SBI_PMU_CFG_FLAG_SET_UINH; + + /* retrieve the available counter index */ + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase, cmask, + cflags, hwc->event_base, hwc->config, 0); + if (ret.error) { + pr_debug("Not able to find a counter for event %lx config %llx\n", + hwc->event_base, hwc->config); + return sbi_err_map_linux_errno(ret.error); + } + + idx = ret.value; + if (idx >= rvpmu->num_counters || !pmu_ctr_list[idx].value) + return -ENOENT; + + /* Additional sanity check for the counter id */ + if (pmu_sbi_ctr_is_fw(idx)) { + if (!test_and_set_bit(idx, cpuc->used_fw_ctrs)) + return idx; + } else { + if (!test_and_set_bit(idx, cpuc->used_hw_ctrs)) + return idx; + } + + return -ENOENT; +} + +static void pmu_sbi_ctr_clear_idx(struct perf_event *event) +{ + + struct hw_perf_event *hwc = &event->hw; + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); + struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); + int idx = hwc->idx; + + if (pmu_sbi_ctr_is_fw(idx)) + clear_bit(idx, cpuc->used_fw_ctrs); + else + clear_bit(idx, cpuc->used_hw_ctrs); +} + +static int pmu_event_find_cache(u64 config) +{ + unsigned int cache_type, cache_op, cache_result, ret; + + cache_type = (config >> 0) & 0xff; + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) + return -EINVAL; + + cache_op = (config >> 8) & 0xff; + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) + return -EINVAL; + + cache_result = (config >> 16) & 0xff; + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + ret = pmu_cache_event_map[cache_type][cache_op][cache_result].event_idx; + + return ret; +} + +static bool pmu_sbi_is_fw_event(struct perf_event *event) +{ + u32 type = event->attr.type; + u64 config = event->attr.config; + + if ((type == PERF_TYPE_RAW) && ((config >> 63) == 1)) + return true; + else + return false; +} + +static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig) +{ + u32 type = event->attr.type; + u64 config = event->attr.config; + int bSoftware; + u64 raw_config_val; + int ret; + + switch (type) { + case PERF_TYPE_HARDWARE: + if (config >= PERF_COUNT_HW_MAX) + return -EINVAL; + ret = pmu_hw_event_map[event->attr.config].event_idx; + break; + case PERF_TYPE_HW_CACHE: + ret = pmu_event_find_cache(config); + break; + case PERF_TYPE_RAW: + /* + * As per SBI specification, the upper 16 bits must be unused for + * a raw event. Use the MSB (63b) to distinguish between hardware + * raw event and firmware events. + */ + bSoftware = config >> 63; + raw_config_val = config & RISCV_PMU_RAW_EVENT_MASK; + if (bSoftware) { + if (raw_config_val < SBI_PMU_FW_MAX) + ret = (raw_config_val & 0xFFFF) | + (SBI_PMU_EVENT_TYPE_FW << 16); + else + return -EINVAL; + } else { + ret = RISCV_PMU_RAW_EVENT_IDX; + *econfig = raw_config_val; + } + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static u64 pmu_sbi_ctr_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + struct sbiret ret; + union sbi_pmu_ctr_info info; + u64 val = 0; + + if (pmu_sbi_is_fw_event(event)) { + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ, + hwc->idx, 0, 0, 0, 0, 0); + if (!ret.error) + val = ret.value; + } else { + info = pmu_ctr_list[idx]; + val = riscv_pmu_ctr_read_csr(info.csr); + if (IS_ENABLED(CONFIG_32BIT)) + val = ((u64)riscv_pmu_ctr_read_csr(info.csr + 0x80)) << 31 | val; + } + + return val; +} + +static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival) +{ + struct sbiret ret; + struct hw_perf_event *hwc = &event->hw; + unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, hwc->idx, + 1, flag, ival, ival >> 32, 0); + if (ret.error && (ret.error != SBI_ERR_ALREADY_STARTED)) + pr_err("Starting counter idx %d failed with error %d\n", + hwc->idx, sbi_err_map_linux_errno(ret.error)); +} + +static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag) +{ + struct sbiret ret; + struct hw_perf_event *hwc = &event->hw; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, hwc->idx, 1, flag, 0, 0, 0); + if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) && + flag != SBI_PMU_STOP_FLAG_RESET) + pr_err("Stopping counter idx %d failed with error %d\n", + hwc->idx, sbi_err_map_linux_errno(ret.error)); +} + +static int pmu_sbi_find_num_ctrs(void) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_NUM_COUNTERS, 0, 0, 0, 0, 0, 0); + if (!ret.error) + return ret.value; + else + return sbi_err_map_linux_errno(ret.error); +} + +static int pmu_sbi_get_ctrinfo(int nctr) +{ + struct sbiret ret; + int i, num_hw_ctr = 0, num_fw_ctr = 0; + union sbi_pmu_ctr_info cinfo; + + pmu_ctr_list = kcalloc(nctr, sizeof(*pmu_ctr_list), GFP_KERNEL); + if (!pmu_ctr_list) + return -ENOMEM; + + for (i = 0; i <= nctr; i++) { + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i, 0, 0, 0, 0, 0); + if (ret.error) + /* The logical counter ids are not expected to be contiguous */ + continue; + cinfo.value = ret.value; + if (cinfo.type == SBI_PMU_CTR_TYPE_FW) + num_fw_ctr++; + else + num_hw_ctr++; + pmu_ctr_list[i].value = cinfo.value; + } + + pr_info("%d firmware and %d hardware counters\n", num_fw_ctr, num_hw_ctr); + + return 0; +} + +static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu) +{ + /** + * No need to check the error because we are disabling all the counters + * which may include counters that are not enabled yet. + */ + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, + 0, GENMASK_ULL(pmu->num_counters - 1, 0), 0, 0, 0, 0); +} + +static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu) +{ + struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events); + + /* No need to check the error here as we can't do anything about the error */ + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, 0, + cpu_hw_evt->used_hw_ctrs[0], 0, 0, 0, 0); +} + +/** + * This function starts all the used counters in two step approach. + * Any counter that did not overflow can be start in a single step + * while the overflowed counters need to be started with updated initialization + * value. + */ +static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu, + unsigned long ctr_ovf_mask) +{ + int idx = 0; + struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events); + struct perf_event *event; + unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE; + unsigned long ctr_start_mask = 0; + uint64_t max_period; + struct hw_perf_event *hwc; + u64 init_val = 0; + + ctr_start_mask = cpu_hw_evt->used_hw_ctrs[0] & ~ctr_ovf_mask; + + /* Start all the counters that did not overflow in a single shot */ + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, 0, ctr_start_mask, + 0, 0, 0, 0); + + /* Reinitialize and start all the counter that overflowed */ + while (ctr_ovf_mask) { + if (ctr_ovf_mask & 0x01) { + event = cpu_hw_evt->events[idx]; + hwc = &event->hw; + max_period = riscv_pmu_ctr_get_width_mask(event); + init_val = local64_read(&hwc->prev_count) & max_period; + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx, 1, + flag, init_val, 0, 0); + } + ctr_ovf_mask = ctr_ovf_mask >> 1; + idx++; + } +} + +static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev) +{ + struct perf_sample_data data; + struct pt_regs *regs; + struct hw_perf_event *hw_evt; + union sbi_pmu_ctr_info *info; + int lidx, hidx, fidx; + struct riscv_pmu *pmu; + struct perf_event *event; + unsigned long overflow; + unsigned long overflowed_ctrs = 0; + struct cpu_hw_events *cpu_hw_evt = dev; + + if (WARN_ON_ONCE(!cpu_hw_evt)) + return IRQ_NONE; + + /* Firmware counter don't support overflow yet */ + fidx = find_first_bit(cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS); + event = cpu_hw_evt->events[fidx]; + if (!event) { + csr_clear(CSR_SIP, SIP_LCOFIP); + return IRQ_NONE; + } + + pmu = to_riscv_pmu(event->pmu); + pmu_sbi_stop_hw_ctrs(pmu); + + /* Overflow status register should only be read after counter are stopped */ + overflow = csr_read(CSR_SSCOUNTOVF); + + /** + * Overflow interrupt pending bit should only be cleared after stopping + * all the counters to avoid any race condition. + */ + csr_clear(CSR_SIP, SIP_LCOFIP); + + /* No overflow bit is set */ + if (!overflow) + return IRQ_NONE; + + regs = get_irq_regs(); + + for_each_set_bit(lidx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS) { + struct perf_event *event = cpu_hw_evt->events[lidx]; + + /* Skip if invalid event or user did not request a sampling */ + if (!event || !is_sampling_event(event)) + continue; + + info = &pmu_ctr_list[lidx]; + /* Do a sanity check */ + if (!info || info->type != SBI_PMU_CTR_TYPE_HW) + continue; + + /* compute hardware counter index */ + hidx = info->csr - CSR_CYCLE; + /* check if the corresponding bit is set in sscountovf */ + if (!(overflow & (1 << hidx))) + continue; + + /* + * Keep a track of overflowed counters so that they can be started + * with updated initial value. + */ + overflowed_ctrs |= 1 << lidx; + hw_evt = &event->hw; + riscv_pmu_event_update(event); + perf_sample_data_init(&data, 0, hw_evt->last_period); + if (riscv_pmu_event_set_period(event)) { + /* + * Unlike other ISAs, RISC-V don't have to disable interrupts + * to avoid throttling here. As per the specification, the + * interrupt remains disabled until the OF bit is set. + * Interrupts are enabled again only during the start. + * TODO: We will need to stop the guest counters once + * virtualization support is added. + */ + perf_event_overflow(event, &data, regs); + } + } + pmu_sbi_start_overflow_mask(pmu, overflowed_ctrs); + + return IRQ_HANDLED; +} + +static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct riscv_pmu *pmu = hlist_entry_safe(node, struct riscv_pmu, node); + struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events); + + /* Enable the access for TIME csr only from the user mode now */ + csr_write(CSR_SCOUNTEREN, 0x2); + + /* Stop all the counters so that they can be enabled from perf */ + pmu_sbi_stop_all(pmu); + + if (riscv_isa_extension_available(NULL, SSCOFPMF)) { + cpu_hw_evt->irq = riscv_pmu_irq; + csr_clear(CSR_IP, BIT(RV_IRQ_PMU)); + csr_set(CSR_IE, BIT(RV_IRQ_PMU)); + enable_percpu_irq(riscv_pmu_irq, IRQ_TYPE_NONE); + } + + return 0; +} + +static int pmu_sbi_dying_cpu(unsigned int cpu, struct hlist_node *node) +{ + if (riscv_isa_extension_available(NULL, SSCOFPMF)) { + disable_percpu_irq(riscv_pmu_irq); + csr_clear(CSR_IE, BIT(RV_IRQ_PMU)); + } + + /* Disable all counters access for user mode now */ + csr_write(CSR_SCOUNTEREN, 0x0); + + return 0; +} + +static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pdev) +{ + int ret; + struct cpu_hw_events __percpu *hw_events = pmu->hw_events; + struct device_node *cpu, *child; + struct irq_domain *domain = NULL; + + if (!riscv_isa_extension_available(NULL, SSCOFPMF)) + return -EOPNOTSUPP; + + for_each_of_cpu_node(cpu) { + child = of_get_compatible_child(cpu, "riscv,cpu-intc"); + if (!child) { + pr_err("Failed to find INTC node\n"); + return -ENODEV; + } + domain = irq_find_host(child); + of_node_put(child); + if (domain) + break; + } + if (!domain) { + pr_err("Failed to find INTC IRQ root domain\n"); + return -ENODEV; + } + + riscv_pmu_irq = irq_create_mapping(domain, RV_IRQ_PMU); + if (!riscv_pmu_irq) { + pr_err("Failed to map PMU interrupt for node\n"); + return -ENODEV; + } + + ret = request_percpu_irq(riscv_pmu_irq, pmu_sbi_ovf_handler, "riscv-pmu", hw_events); + if (ret) { + pr_err("registering percpu irq failed [%d]\n", ret); + return ret; + } + + return 0; +} + +static int pmu_sbi_device_probe(struct platform_device *pdev) +{ + struct riscv_pmu *pmu = NULL; + int num_counters; + int ret = -ENODEV; + + pr_info("SBI PMU extension is available\n"); + pmu = riscv_pmu_alloc(); + if (!pmu) + return -ENOMEM; + + num_counters = pmu_sbi_find_num_ctrs(); + if (num_counters < 0) { + pr_err("SBI PMU extension doesn't provide any counters\n"); + goto out_free; + } + + /* cache all the information about counters now */ + if (pmu_sbi_get_ctrinfo(num_counters)) + goto out_free; + + ret = pmu_sbi_setup_irqs(pmu, pdev); + if (ret < 0) { + pr_info("Perf sampling/filtering is not supported as sscof extension is not available\n"); + pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE; + } + pmu->num_counters = num_counters; + pmu->ctr_start = pmu_sbi_ctr_start; + pmu->ctr_stop = pmu_sbi_ctr_stop; + pmu->event_map = pmu_sbi_event_map; + pmu->ctr_get_idx = pmu_sbi_ctr_get_idx; + pmu->ctr_get_width = pmu_sbi_ctr_get_width; + pmu->ctr_clear_idx = pmu_sbi_ctr_clear_idx; + pmu->ctr_read = pmu_sbi_ctr_read; + + ret = cpuhp_state_add_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node); + if (ret) + return ret; + + ret = perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW); + if (ret) { + cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node); + return ret; + } + + return 0; + +out_free: + kfree(pmu); + return ret; +} + +static struct platform_driver pmu_sbi_driver = { + .probe = pmu_sbi_device_probe, + .driver = { + .name = RISCV_PMU_PDEV_NAME, + }, +}; + +static int __init pmu_sbi_devinit(void) +{ + int ret; + struct platform_device *pdev; + + if (sbi_spec_version < sbi_mk_version(0, 3) || + sbi_probe_extension(SBI_EXT_PMU) <= 0) { + return 0; + } + + ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_RISCV_STARTING, + "perf/riscv/pmu:starting", + pmu_sbi_starting_cpu, pmu_sbi_dying_cpu); + if (ret) { + pr_err("CPU hotplug notifier could not be registered: %d\n", + ret); + return ret; + } + + ret = platform_driver_register(&pmu_sbi_driver); + if (ret) + return ret; + + pdev = platform_device_register_simple(RISCV_PMU_PDEV_NAME, -1, NULL, 0); + if (IS_ERR(pdev)) { + platform_driver_unregister(&pmu_sbi_driver); + return PTR_ERR(pdev); + } + + /* Notify legacy implementation that SBI pmu is available*/ + riscv_pmu_legacy_skip_init(); + + return ret; +} +device_initcall(pmu_sbi_devinit) diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index db5987281010..e9edf3b6ed7c 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -19,7 +19,7 @@ #include #include #include - +#include #include #include #include diff --git a/drivers/s390/char/diag_ftp.c b/drivers/s390/char/diag_ftp.c index 6bf1058de873..36bbd6b6e210 100644 --- a/drivers/s390/char/diag_ftp.c +++ b/drivers/s390/char/diag_ftp.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h index 8a30e77db469..86dd2cde0f78 100644 --- a/drivers/s390/char/sclp.h +++ b/drivers/s390/char/sclp.h @@ -11,6 +11,7 @@ #include #include +#include #include #include diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index 998933e83610..15971997cfe2 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -2,8 +2,7 @@ /* * Copyright IBM Corp. 2007,2012 * - * Author(s): Heiko Carstens , - * Peter Oberparleiter + * Author(s): Peter Oberparleiter */ #define KMSG_COMPONENT "sclp_cmd" diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c index c365110f2dae..10383e936461 100644 --- a/drivers/s390/char/sclp_config.c +++ b/drivers/s390/char/sclp_config.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007 - * Author(s): Heiko Carstens */ #define KMSG_COMPONENT "sclp_config" diff --git a/drivers/s390/char/sclp_sdias.c b/drivers/s390/char/sclp_sdias.c index 215d4b4a5ff5..e915a343fcf5 100644 --- a/drivers/s390/char/sclp_sdias.c +++ b/drivers/s390/char/sclp_sdias.c @@ -184,7 +184,7 @@ int sclp_sdias_copy(void *dest, int start_blk, int nr_blks) sccb->evbuf.asa_size = SDIAS_ASA_SIZE_64; sccb->evbuf.event_status = 0; sccb->evbuf.blk_cnt = nr_blks; - sccb->evbuf.asa = (unsigned long)dest; + sccb->evbuf.asa = __pa(dest); sccb->evbuf.fbn = start_blk; sccb->evbuf.lbn = 0; sccb->evbuf.dbs = 1; diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 3ba2d934a3e8..516783ba950f 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -229,8 +229,7 @@ static int __init zcore_reipl_init(void) rc = memcpy_hsa_kernel(zcore_ipl_block, ipib_info.ipib, PAGE_SIZE); else - rc = memcpy_real(zcore_ipl_block, (void *) ipib_info.ipib, - PAGE_SIZE); + rc = memcpy_real(zcore_ipl_block, ipib_info.ipib, PAGE_SIZE); if (rc || (__force u32)csum_partial(zcore_ipl_block, zcore_ipl_block->hdr.len, 0) != ipib_info.checksum) { TRACE("Checksum does not match\n"); diff --git a/drivers/s390/cio/airq.c b/drivers/s390/cio/airq.c index e56535c99888..c0ed364bf446 100644 --- a/drivers/s390/cio/airq.c +++ b/drivers/s390/cio/airq.c @@ -44,7 +44,7 @@ int register_adapter_interrupt(struct airq_struct *airq) if (!airq->handler || airq->isc > MAX_ISC) return -EINVAL; if (!airq->lsi_ptr) { - airq->lsi_ptr = kzalloc(1, GFP_KERNEL); + airq->lsi_ptr = cio_dma_zalloc(1); if (!airq->lsi_ptr) return -ENOMEM; airq->flags |= AIRQ_PTR_ALLOCATED; @@ -79,7 +79,7 @@ void unregister_adapter_interrupt(struct airq_struct *airq) synchronize_rcu(); isc_unregister(airq->isc); if (airq->flags & AIRQ_PTR_ALLOCATED) { - kfree(airq->lsi_ptr); + cio_dma_free(airq->lsi_ptr, 1); airq->lsi_ptr = NULL; airq->flags &= ~AIRQ_PTR_ALLOCATED; } diff --git a/drivers/s390/cio/crw.c b/drivers/s390/cio/crw.c index fc285ca41141..7b02a6349c4d 100644 --- a/drivers/s390/cio/crw.c +++ b/drivers/s390/cio/crw.c @@ -6,7 +6,6 @@ * Author(s): Ingo Adlung , * Martin Schwidefsky , * Cornelia Huck , - * Heiko Carstens , */ #include diff --git a/drivers/s390/cio/ioasm.c b/drivers/s390/cio/ioasm.c index 180913007824..acf1edd36549 100644 --- a/drivers/s390/cio/ioasm.c +++ b/drivers/s390/cio/ioasm.c @@ -5,6 +5,7 @@ #include +#include #include #include #include diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 1986243f9cd3..fdf16cb70881 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "ap_bus.h" #include "ap_debug.h" @@ -91,6 +92,7 @@ static atomic64_t ap_bindings_complete_count = ATOMIC64_INIT(0); static DECLARE_COMPLETION(ap_init_apqn_bindings_complete); static struct ap_config_info *ap_qci_info; +static struct ap_config_info *ap_qci_info_old; /* * AP bus related debug feature things. @@ -228,9 +230,14 @@ static void __init ap_init_qci_info(void) ap_qci_info = kzalloc(sizeof(*ap_qci_info), GFP_KERNEL); if (!ap_qci_info) return; + ap_qci_info_old = kzalloc(sizeof(*ap_qci_info_old), GFP_KERNEL); + if (!ap_qci_info_old) + return; if (ap_fetch_qci_info(ap_qci_info) != 0) { kfree(ap_qci_info); + kfree(ap_qci_info_old); ap_qci_info = NULL; + ap_qci_info_old = NULL; return; } AP_DBF_INFO("%s successful fetched initial qci info\n", __func__); @@ -247,6 +254,8 @@ static void __init ap_init_qci_info(void) __func__, ap_max_domain_id); } } + + memcpy(ap_qci_info_old, ap_qci_info, sizeof(*ap_qci_info)); } /* @@ -314,7 +323,7 @@ EXPORT_SYMBOL(ap_test_config_ctrl_domain); * false otherwise. */ static bool ap_queue_info(ap_qid_t qid, int *q_type, unsigned int *q_fac, - int *q_depth, int *q_ml, bool *q_decfg) + int *q_depth, int *q_ml, bool *q_decfg, bool *q_cstop) { struct ap_queue_status status; union { @@ -357,6 +366,7 @@ static bool ap_queue_info(ap_qid_t qid, int *q_type, unsigned int *q_fac, *q_depth = tapq_info.tapq_gr2.qd; *q_ml = tapq_info.tapq_gr2.ml; *q_decfg = status.response_code == AP_RESPONSE_DECONFIGURED; + *q_cstop = status.response_code == AP_RESPONSE_CHECKSTOPPED; switch (*q_type) { /* For CEX2 and CEX3 the available functions * are not reflected by the facilities bits. @@ -1067,6 +1077,23 @@ static int modify_bitmap(const char *str, unsigned long *bitmap, int bits) return 0; } +static int ap_parse_bitmap_str(const char *str, unsigned long *bitmap, int bits, + unsigned long *newmap) +{ + unsigned long size; + int rc; + + size = BITS_TO_LONGS(bits) * sizeof(unsigned long); + if (*str == '+' || *str == '-') { + memcpy(newmap, bitmap, size); + rc = modify_bitmap(str, newmap, bits); + } else { + memset(newmap, 0, size); + rc = hex2bitmap(str, newmap, bits); + } + return rc; +} + int ap_parse_mask_str(const char *str, unsigned long *bitmap, int bits, struct mutex *lock) @@ -1086,14 +1113,7 @@ int ap_parse_mask_str(const char *str, kfree(newmap); return -ERESTARTSYS; } - - if (*str == '+' || *str == '-') { - memcpy(newmap, bitmap, size); - rc = modify_bitmap(str, newmap, bits); - } else { - memset(newmap, 0, size); - rc = hex2bitmap(str, newmap, bits); - } + rc = ap_parse_bitmap_str(str, bitmap, bits, newmap); if (rc == 0) memcpy(bitmap, newmap, size); mutex_unlock(lock); @@ -1286,12 +1306,69 @@ static ssize_t apmask_show(struct bus_type *bus, char *buf) return rc; } +static int __verify_card_reservations(struct device_driver *drv, void *data) +{ + int rc = 0; + struct ap_driver *ap_drv = to_ap_drv(drv); + unsigned long *newapm = (unsigned long *)data; + + /* + * increase the driver's module refcounter to be sure it is not + * going away when we invoke the callback function. + */ + if (!try_module_get(drv->owner)) + return 0; + + if (ap_drv->in_use) { + rc = ap_drv->in_use(newapm, ap_perms.aqm); + if (rc) + rc = -EBUSY; + } + + /* release the driver's module */ + module_put(drv->owner); + + return rc; +} + +static int apmask_commit(unsigned long *newapm) +{ + int rc; + unsigned long reserved[BITS_TO_LONGS(AP_DEVICES)]; + + /* + * Check if any bits in the apmask have been set which will + * result in queues being removed from non-default drivers + */ + if (bitmap_andnot(reserved, newapm, ap_perms.apm, AP_DEVICES)) { + rc = bus_for_each_drv(&ap_bus_type, NULL, reserved, + __verify_card_reservations); + if (rc) + return rc; + } + + memcpy(ap_perms.apm, newapm, APMASKSIZE); + + return 0; +} + static ssize_t apmask_store(struct bus_type *bus, const char *buf, size_t count) { int rc; + DECLARE_BITMAP(newapm, AP_DEVICES); - rc = ap_parse_mask_str(buf, ap_perms.apm, AP_DEVICES, &ap_perms_mutex); + if (mutex_lock_interruptible(&ap_perms_mutex)) + return -ERESTARTSYS; + + rc = ap_parse_bitmap_str(buf, ap_perms.apm, AP_DEVICES, newapm); + if (rc) + goto done; + + rc = apmask_commit(newapm); + +done: + mutex_unlock(&ap_perms_mutex); if (rc) return rc; @@ -1317,12 +1394,69 @@ static ssize_t aqmask_show(struct bus_type *bus, char *buf) return rc; } +static int __verify_queue_reservations(struct device_driver *drv, void *data) +{ + int rc = 0; + struct ap_driver *ap_drv = to_ap_drv(drv); + unsigned long *newaqm = (unsigned long *)data; + + /* + * increase the driver's module refcounter to be sure it is not + * going away when we invoke the callback function. + */ + if (!try_module_get(drv->owner)) + return 0; + + if (ap_drv->in_use) { + rc = ap_drv->in_use(ap_perms.apm, newaqm); + if (rc) + return -EBUSY; + } + + /* release the driver's module */ + module_put(drv->owner); + + return rc; +} + +static int aqmask_commit(unsigned long *newaqm) +{ + int rc; + unsigned long reserved[BITS_TO_LONGS(AP_DOMAINS)]; + + /* + * Check if any bits in the aqmask have been set which will + * result in queues being removed from non-default drivers + */ + if (bitmap_andnot(reserved, newaqm, ap_perms.aqm, AP_DOMAINS)) { + rc = bus_for_each_drv(&ap_bus_type, NULL, reserved, + __verify_queue_reservations); + if (rc) + return rc; + } + + memcpy(ap_perms.aqm, newaqm, AQMASKSIZE); + + return 0; +} + static ssize_t aqmask_store(struct bus_type *bus, const char *buf, size_t count) { int rc; + DECLARE_BITMAP(newaqm, AP_DOMAINS); - rc = ap_parse_mask_str(buf, ap_perms.aqm, AP_DOMAINS, &ap_perms_mutex); + if (mutex_lock_interruptible(&ap_perms_mutex)) + return -ERESTARTSYS; + + rc = ap_parse_bitmap_str(buf, ap_perms.aqm, AP_DOMAINS, newaqm); + if (rc) + goto done; + + rc = aqmask_commit(newaqm); + +done: + mutex_unlock(&ap_perms_mutex); if (rc) return rc; @@ -1339,7 +1473,17 @@ static ssize_t scans_show(struct bus_type *bus, char *buf) atomic64_read(&ap_scan_bus_count)); } -static BUS_ATTR_RO(scans); +static ssize_t scans_store(struct bus_type *bus, const char *buf, + size_t count) +{ + AP_DBF_INFO("%s force AP bus rescan\n", __func__); + + ap_bus_force_rescan(); + + return count; +} + +static BUS_ATTR_RW(scans); static ssize_t bindings_show(struct bus_type *bus, char *buf) { @@ -1446,24 +1590,24 @@ static int ap_get_compatible_type(ap_qid_t qid, int rawtype, unsigned int func) AP_QID_QUEUE(qid), rawtype); return 0; } - /* up to CEX7 known and fully supported */ - if (rawtype <= AP_DEVICE_TYPE_CEX7) + /* up to CEX8 known and fully supported */ + if (rawtype <= AP_DEVICE_TYPE_CEX8) return rawtype; /* - * unknown new type > CEX7, check for compatibility + * unknown new type > CEX8, check for compatibility * to the highest known and supported type which is - * currently CEX7 with the help of the QACT function. + * currently CEX8 with the help of the QACT function. */ if (ap_qact_available()) { struct ap_queue_status status; union ap_qact_ap_info apinfo = {0}; apinfo.mode = (func >> 26) & 0x07; - apinfo.cat = AP_DEVICE_TYPE_CEX7; + apinfo.cat = AP_DEVICE_TYPE_CEX8; status = ap_qact(qid, 0, &apinfo); if (status.response_code == AP_RESPONSE_NORMAL && apinfo.cat >= AP_DEVICE_TYPE_CEX2A - && apinfo.cat <= AP_DEVICE_TYPE_CEX7) + && apinfo.cat <= AP_DEVICE_TYPE_CEX8) comp_type = apinfo.cat; } if (!comp_type) @@ -1505,6 +1649,49 @@ static int __match_queue_device_with_queue_id(struct device *dev, const void *da && AP_QID_QUEUE(to_ap_queue(dev)->qid) == (int)(long) data; } +/* Helper function for notify_config_changed */ +static int __drv_notify_config_changed(struct device_driver *drv, void *data) +{ + struct ap_driver *ap_drv = to_ap_drv(drv); + + if (try_module_get(drv->owner)) { + if (ap_drv->on_config_changed) + ap_drv->on_config_changed(ap_qci_info, ap_qci_info_old); + module_put(drv->owner); + } + + return 0; +} + +/* Notify all drivers about an qci config change */ +static inline void notify_config_changed(void) +{ + bus_for_each_drv(&ap_bus_type, NULL, NULL, + __drv_notify_config_changed); +} + +/* Helper function for notify_scan_complete */ +static int __drv_notify_scan_complete(struct device_driver *drv, void *data) +{ + struct ap_driver *ap_drv = to_ap_drv(drv); + + if (try_module_get(drv->owner)) { + if (ap_drv->on_scan_complete) + ap_drv->on_scan_complete(ap_qci_info, + ap_qci_info_old); + module_put(drv->owner); + } + + return 0; +} + +/* Notify all drivers about bus scan complete */ +static inline void notify_scan_complete(void) +{ + bus_for_each_drv(&ap_bus_type, NULL, NULL, + __drv_notify_scan_complete); +} + /* * Helper function for ap_scan_bus(). * Remove card device and associated queue devices. @@ -1524,7 +1711,7 @@ static inline void ap_scan_rm_card_dev_and_queue_devs(struct ap_card *ac) */ static inline void ap_scan_domains(struct ap_card *ac) { - bool decfg; + bool decfg, chkstop; ap_qid_t qid; unsigned int func; struct device *dev; @@ -1553,7 +1740,8 @@ static inline void ap_scan_domains(struct ap_card *ac) continue; } /* domain is valid, get info from this APQN */ - if (!ap_queue_info(qid, &type, &func, &depth, &ml, &decfg)) { + if (!ap_queue_info(qid, &type, &func, &depth, + &ml, &decfg, &chkstop)) { if (aq) { AP_DBF_INFO("%s(%d,%d) queue_info() failed, rm queue dev\n", __func__, ac->id, dom); @@ -1572,6 +1760,7 @@ static inline void ap_scan_domains(struct ap_card *ac) } aq->card = ac; aq->config = !decfg; + aq->chkstop = chkstop; dev = &aq->ap_dev.device; dev->bus = &ap_bus_type; dev->parent = &ac->ap_dev.device; @@ -1588,13 +1777,43 @@ static inline void ap_scan_domains(struct ap_card *ac) if (decfg) AP_DBF_INFO("%s(%d,%d) new (decfg) queue dev created\n", __func__, ac->id, dom); + else if (chkstop) + AP_DBF_INFO("%s(%d,%d) new (chkstop) queue dev created\n", + __func__, ac->id, dom); else AP_DBF_INFO("%s(%d,%d) new queue dev created\n", __func__, ac->id, dom); goto put_dev_and_continue; } - /* Check config state on the already existing queue device */ + /* handle state changes on already existing queue device */ spin_lock_bh(&aq->lock); + /* checkstop state */ + if (chkstop && !aq->chkstop) { + /* checkstop on */ + aq->chkstop = true; + if (aq->dev_state > AP_DEV_STATE_UNINITIATED) { + aq->dev_state = AP_DEV_STATE_ERROR; + aq->last_err_rc = AP_RESPONSE_CHECKSTOPPED; + } + spin_unlock_bh(&aq->lock); + AP_DBF_DBG("%s(%d,%d) queue dev checkstop on\n", + __func__, ac->id, dom); + /* 'receive' pending messages with -EAGAIN */ + ap_flush_queue(aq); + goto put_dev_and_continue; + } else if (!chkstop && aq->chkstop) { + /* checkstop off */ + aq->chkstop = false; + if (aq->dev_state > AP_DEV_STATE_UNINITIATED) { + aq->dev_state = AP_DEV_STATE_OPERATING; + aq->sm_state = AP_SM_STATE_RESET_START; + } + spin_unlock_bh(&aq->lock); + AP_DBF_DBG("%s(%d,%d) queue dev checkstop off\n", + __func__, ac->id, dom); + goto put_dev_and_continue; + } + /* config state change */ if (decfg && aq->config) { /* config off this queue device */ aq->config = false; @@ -1603,14 +1822,13 @@ static inline void ap_scan_domains(struct ap_card *ac) aq->last_err_rc = AP_RESPONSE_DECONFIGURED; } spin_unlock_bh(&aq->lock); - AP_DBF_INFO("%s(%d,%d) queue dev config off\n", - __func__, ac->id, dom); + AP_DBF_DBG("%s(%d,%d) queue dev config off\n", + __func__, ac->id, dom); ap_send_config_uevent(&aq->ap_dev, aq->config); /* 'receive' pending messages with -EAGAIN */ ap_flush_queue(aq); goto put_dev_and_continue; - } - if (!decfg && !aq->config) { + } else if (!decfg && !aq->config) { /* config on this queue device */ aq->config = true; if (aq->dev_state > AP_DEV_STATE_UNINITIATED) { @@ -1618,8 +1836,8 @@ static inline void ap_scan_domains(struct ap_card *ac) aq->sm_state = AP_SM_STATE_RESET_START; } spin_unlock_bh(&aq->lock); - AP_DBF_INFO("%s(%d,%d) queue dev config on\n", - __func__, ac->id, dom); + AP_DBF_DBG("%s(%d,%d) queue dev config on\n", + __func__, ac->id, dom); ap_send_config_uevent(&aq->ap_dev, aq->config); goto put_dev_and_continue; } @@ -1646,7 +1864,7 @@ put_dev_and_continue: */ static inline void ap_scan_adapter(int ap) { - bool decfg; + bool decfg, chkstop; ap_qid_t qid; unsigned int func; struct device *dev; @@ -1680,8 +1898,8 @@ static inline void ap_scan_adapter(int ap) for (dom = 0; dom <= ap_max_domain_id; dom++) if (ap_test_config_usage_domain(dom)) { qid = AP_MKQID(ap, dom); - if (ap_queue_info(qid, &type, &func, - &depth, &ml, &decfg)) + if (ap_queue_info(qid, &type, &func, &depth, + &ml, &decfg, &chkstop)) break; } if (dom > ap_max_domain_id) { @@ -1726,13 +1944,25 @@ static inline void ap_scan_adapter(int ap) put_device(dev); ac = NULL; } else { + /* handle checkstop state change */ + if (chkstop && !ac->chkstop) { + /* checkstop on */ + ac->chkstop = true; + AP_DBF_INFO("%s(%d) card dev checkstop on\n", + __func__, ap); + } else if (!chkstop && ac->chkstop) { + /* checkstop off */ + ac->chkstop = false; + AP_DBF_INFO("%s(%d) card dev checkstop off\n", + __func__, ap); + } + /* handle config state change */ if (decfg && ac->config) { ac->config = false; AP_DBF_INFO("%s(%d) card dev config off\n", __func__, ap); ap_send_config_uevent(&ac->ap_dev, ac->config); - } - if (!decfg && !ac->config) { + } else if (!decfg && !ac->config) { ac->config = true; AP_DBF_INFO("%s(%d) card dev config on\n", __func__, ap); @@ -1756,6 +1986,7 @@ static inline void ap_scan_adapter(int ap) return; } ac->config = !decfg; + ac->chkstop = chkstop; dev = &ac->ap_dev.device; dev->bus = &ap_bus_type; dev->parent = ap_root_device; @@ -1780,6 +2011,9 @@ static inline void ap_scan_adapter(int ap) if (decfg) AP_DBF_INFO("%s(%d) new (decfg) card dev type=%d func=0x%08x created\n", __func__, ap, type, func); + else if (chkstop) + AP_DBF_INFO("%s(%d) new (chkstop) card dev type=%d func=0x%08x created\n", + __func__, ap, type, func); else AP_DBF_INFO("%s(%d) new card dev type=%d func=0x%08x created\n", __func__, ap, type, func); @@ -1792,6 +2026,25 @@ static inline void ap_scan_adapter(int ap) put_device(&ac->ap_dev.device); } +/** + * ap_get_configuration - get the host AP configuration + * + * Stores the host AP configuration information returned from the previous call + * to Query Configuration Information (QCI), then retrieves and stores the + * current AP configuration returned from QCI. + * + * Return: true if the host AP configuration changed between calls to QCI; + * otherwise, return false. + */ +static bool ap_get_configuration(void) +{ + memcpy(ap_qci_info_old, ap_qci_info, sizeof(*ap_qci_info)); + ap_fetch_qci_info(ap_qci_info); + + return memcmp(ap_qci_info, ap_qci_info_old, + sizeof(struct ap_config_info)) != 0; +} + /** * ap_scan_bus(): Scan the AP bus for new devices * Runs periodically, workqueue timer (ap_config_time) @@ -1799,9 +2052,12 @@ static inline void ap_scan_adapter(int ap) */ static void ap_scan_bus(struct work_struct *unused) { - int ap; + int ap, config_changed = 0; - ap_fetch_qci_info(ap_qci_info); + /* config change notify */ + config_changed = ap_get_configuration(); + if (config_changed) + notify_config_changed(); ap_select_domain(); AP_DBF_DBG("%s running\n", __func__); @@ -1810,6 +2066,10 @@ static void ap_scan_bus(struct work_struct *unused) for (ap = 0; ap <= ap_max_adapter_id; ap++) ap_scan_adapter(ap); + /* scan complete notify */ + if (config_changed) + notify_scan_complete(); + /* check if there is at least one queue available with default domain */ if (ap_domain_index >= 0) { struct device *dev = diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 95b577754b35..8fd5a17bdf99 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -47,6 +47,7 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr) #define AP_RESPONSE_BUSY 0x05 #define AP_RESPONSE_INVALID_ADDRESS 0x06 #define AP_RESPONSE_OTHERWISE_CHANGED 0x07 +#define AP_RESPONSE_INVALID_GISA 0x08 #define AP_RESPONSE_Q_FULL 0x10 #define AP_RESPONSE_NO_PENDING_REPLY 0x10 #define AP_RESPONSE_INDEX_TOO_BIG 0x11 @@ -69,6 +70,7 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr) #define AP_DEVICE_TYPE_CEX5 11 #define AP_DEVICE_TYPE_CEX6 12 #define AP_DEVICE_TYPE_CEX7 13 +#define AP_DEVICE_TYPE_CEX8 14 /* * Known function facilities @@ -142,6 +144,19 @@ struct ap_driver { int (*probe)(struct ap_device *); void (*remove)(struct ap_device *); + int (*in_use)(unsigned long *apm, unsigned long *aqm); + /* + * Called at the start of the ap bus scan function when + * the crypto config information (qci) has changed. + */ + void (*on_config_changed)(struct ap_config_info *new_config_info, + struct ap_config_info *old_config_info); + /* + * Called at the end of the ap bus scan function when + * the crypto config information (qci) has changed. + */ + void (*on_scan_complete)(struct ap_config_info *new_config_info, + struct ap_config_info *old_config_info); }; #define to_ap_drv(x) container_of((x), struct ap_driver, driver) @@ -164,6 +179,7 @@ struct ap_card { int id; /* AP card number. */ unsigned int maxmsgsize; /* AP msg limit for this card */ bool config; /* configured state */ + bool chkstop; /* checkstop state */ atomic64_t total_request_count; /* # requests ever for this AP device.*/ }; @@ -176,6 +192,7 @@ struct ap_queue { spinlock_t lock; /* Per device lock. */ enum ap_dev_state dev_state; /* queue device state */ bool config; /* configured state */ + bool chkstop; /* checkstop state */ ap_qid_t qid; /* AP queue id. */ bool interrupt; /* indicate if interrupts are enabled */ int queue_count; /* # messages currently on AP queue. */ @@ -234,7 +251,9 @@ struct ap_message { struct ap_message *); }; -#define AP_MSG_FLAG_SPECIAL 1 /* flag msg as 'special' with NQAP */ +#define AP_MSG_FLAG_SPECIAL 0x0001 /* flag msg as 'special' with NQAP */ +#define AP_MSG_FLAG_USAGE 0x0002 /* CCA, EP11: usage (no admin) msg */ +#define AP_MSG_FLAG_ADMIN 0x0004 /* CCA, EP11: admin (=control) msg */ /** * ap_init_message() - Initialize ap_message. @@ -289,6 +308,9 @@ void ap_queue_init_state(struct ap_queue *aq); struct ap_card *ap_card_create(int id, int queue_depth, int raw_type, int comp_type, unsigned int functions, int ml); +#define APMASKSIZE (BITS_TO_LONGS(AP_DEVICES) * sizeof(unsigned long)) +#define AQMASKSIZE (BITS_TO_LONGS(AP_DOMAINS) * sizeof(unsigned long)) + struct ap_perms { unsigned long ioctlm[BITS_TO_LONGS(AP_IOCTLS)]; unsigned long apm[BITS_TO_LONGS(AP_DEVICES)]; diff --git a/drivers/s390/crypto/ap_card.c b/drivers/s390/crypto/ap_card.c index 196325a66662..6b2170cf186e 100644 --- a/drivers/s390/crypto/ap_card.c +++ b/drivers/s390/crypto/ap_card.c @@ -174,6 +174,16 @@ static ssize_t config_store(struct device *dev, static DEVICE_ATTR_RW(config); +static ssize_t chkstop_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ap_card *ac = to_ap_card(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", ac->chkstop ? 1 : 0); +} + +static DEVICE_ATTR_RO(chkstop); + static ssize_t max_msg_size_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -194,6 +204,7 @@ static struct attribute *ap_card_dev_attrs[] = { &dev_attr_pendingq_count.attr, &dev_attr_modalias.attr, &dev_attr_config.attr, + &dev_attr_chkstop.attr, &dev_attr_max_msg_size.attr, NULL }; diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 1901449768dd..205045cd998d 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -455,7 +455,8 @@ static ap_func_t *ap_jumptable[NR_AP_SM_STATES][NR_AP_SM_EVENTS] = { enum ap_sm_wait ap_sm_event(struct ap_queue *aq, enum ap_sm_event event) { - if (aq->dev_state > AP_DEV_STATE_UNINITIATED) + if (aq->config && !aq->chkstop && + aq->dev_state > AP_DEV_STATE_UNINITIATED) return ap_jumptable[aq->sm_state][event](aq); else return AP_SM_WAIT_NONE; @@ -615,6 +616,20 @@ static ssize_t config_show(struct device *dev, static DEVICE_ATTR_RO(config); +static ssize_t chkstop_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ap_queue *aq = to_ap_queue(dev); + int rc; + + spin_lock_bh(&aq->lock); + rc = scnprintf(buf, PAGE_SIZE, "%d\n", aq->chkstop ? 1 : 0); + spin_unlock_bh(&aq->lock); + return rc; +} + +static DEVICE_ATTR_RO(chkstop); + #ifdef CONFIG_ZCRYPT_DEBUG static ssize_t states_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -729,6 +744,7 @@ static struct attribute *ap_queue_dev_attrs[] = { &dev_attr_reset.attr, &dev_attr_interrupt.attr, &dev_attr_config.attr, + &dev_attr_chkstop.attr, #ifdef CONFIG_ZCRYPT_DEBUG &dev_attr_states.attr, &dev_attr_last_err_rc.attr, @@ -915,6 +931,7 @@ void ap_queue_init_state(struct ap_queue *aq) spin_lock_bh(&aq->lock); aq->dev_state = AP_DEV_STATE_OPERATING; aq->sm_state = AP_SM_STATE_RESET_START; + aq->last_err_rc = 0; ap_wait(ap_sm_event(aq, AP_SM_EVENT_POLL)); spin_unlock_bh(&aq->lock); } diff --git a/drivers/s390/crypto/vfio_ap_debug.h b/drivers/s390/crypto/vfio_ap_debug.h new file mode 100644 index 000000000000..180156121421 --- /dev/null +++ b/drivers/s390/crypto/vfio_ap_debug.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright IBM Corp. 2022 + * + * Author(s): Tony Krowiak + */ +#ifndef VFIO_AP_DEBUG_H +#define VFIO_AP_DEBUG_H + +#include + +#define DBF_ERR 3 /* error conditions */ +#define DBF_WARN 4 /* warning conditions */ +#define DBF_INFO 5 /* informational */ +#define DBF_DEBUG 6 /* for debugging only */ + +#define DBF_MAX_SPRINTF_ARGS 10 + +#define VFIO_AP_DBF(...) \ + debug_sprintf_event(vfio_ap_dbf_info, ##__VA_ARGS__) +#define VFIO_AP_DBF_ERR(...) \ + debug_sprintf_event(vfio_ap_dbf_info, DBF_ERR, ##__VA_ARGS__) +#define VFIO_AP_DBF_WARN(...) \ + debug_sprintf_event(vfio_ap_dbf_info, DBF_WARN, ##__VA_ARGS__) +#define VFIO_AP_DBF_INFO(...) \ + debug_sprintf_event(vfio_ap_dbf_info, DBF_INFO, ##__VA_ARGS__) +#define VFIO_AP_DBF_DBG(...) \ + debug_sprintf_event(vfio_ap_dbf_info, DBF_DEBUG, ##__VA_ARGS__) + +extern debug_info_t *vfio_ap_dbf_info; + +#endif /* VFIO_AP_DEBUG_H */ diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c index e043ae236630..29ebd54f8919 100644 --- a/drivers/s390/crypto/vfio_ap_drv.c +++ b/drivers/s390/crypto/vfio_ap_drv.c @@ -14,6 +14,7 @@ #include #include #include "vfio_ap_private.h" +#include "vfio_ap_debug.h" #define VFIO_AP_ROOT_NAME "vfio_ap" #define VFIO_AP_DEV_NAME "matrix" @@ -26,6 +27,7 @@ MODULE_DESCRIPTION("VFIO AP device driver, Copyright IBM Corp. 2018"); MODULE_LICENSE("GPL v2"); struct ap_matrix_dev *matrix_dev; +debug_info_t *vfio_ap_dbf_info; /* Only type 10 adapters (CEX4 and later) are supported * by the AP matrix device driver @@ -39,6 +41,8 @@ static struct ap_device_id ap_queue_ids[] = { .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE }, { .dev_type = AP_DEVICE_TYPE_CEX7, .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE }, + { .dev_type = AP_DEVICE_TYPE_CEX8, + .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE }, { /* end of sibling */ }, }; @@ -250,10 +254,28 @@ static void vfio_ap_matrix_dev_destroy(void) root_device_unregister(root_device); } +static int __init vfio_ap_dbf_info_init(void) +{ + vfio_ap_dbf_info = debug_register("vfio_ap", 1, 1, + DBF_MAX_SPRINTF_ARGS * sizeof(long)); + + if (!vfio_ap_dbf_info) + return -ENOENT; + + debug_register_view(vfio_ap_dbf_info, &debug_sprintf_view); + debug_set_level(vfio_ap_dbf_info, DBF_WARN); + + return 0; +} + static int __init vfio_ap_init(void) { int ret; + ret = vfio_ap_dbf_info_init(); + if (ret) + return ret; + /* If there are no AP instructions, there is nothing to pass through. */ if (!ap_instructions_available()) return -ENODEV; @@ -284,6 +306,7 @@ static void __exit vfio_ap_exit(void) vfio_ap_mdev_unregister(); ap_driver_unregister(&vfio_ap_drv); vfio_ap_matrix_dev_destroy(); + debug_unregister(vfio_ap_dbf_info); } module_init(vfio_ap_init); diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index abc0b9b88386..7dc26365e29a 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -16,10 +16,12 @@ #include #include #include +#include #include #include #include "vfio_ap_private.h" +#include "vfio_ap_debug.h" #define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough" #define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device" @@ -183,12 +185,44 @@ end_free: return status; } +/** + * vfio_ap_validate_nib - validate a notification indicator byte (nib) address. + * + * @vcpu: the object representing the vcpu executing the PQAP(AQIC) instruction. + * @nib: the location for storing the nib address. + * @g_pfn: the location for storing the page frame number of the page containing + * the nib. + * + * When the PQAP(AQIC) instruction is executed, general register 2 contains the + * address of the notification indicator byte (nib) used for IRQ notification. + * This function parses the nib from gr2 and calculates the page frame + * number for the guest of the page containing the nib. The values are + * stored in @nib and @g_pfn respectively. + * + * The g_pfn of the nib is then validated to ensure the nib address is valid. + * + * Return: returns zero if the nib address is a valid; otherwise, returns + * -EINVAL. + */ +static int vfio_ap_validate_nib(struct kvm_vcpu *vcpu, unsigned long *nib, + unsigned long *g_pfn) +{ + *nib = vcpu->run->s.regs.gprs[2]; + *g_pfn = *nib >> PAGE_SHIFT; + + if (kvm_is_error_hva(gfn_to_hva(vcpu->kvm, *g_pfn))) + return -EINVAL; + + return 0; +} + /** * vfio_ap_irq_enable - Enable Interruption for a APQN * * @q: the vfio_ap_queue holding AQIC parameters * @isc: the guest ISC to register with the GIB interface - * @nib: the notification indicator byte to pin. + * @vcpu: the vcpu object containing the registers specifying the parameters + * passed to the PQAP(AQIC) instruction. * * Pin the NIB saved in *q * Register the guest ISC to GIB interface and retrieve the @@ -204,22 +238,36 @@ end_free: */ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, int isc, - unsigned long nib) + struct kvm_vcpu *vcpu) { + unsigned long nib; struct ap_qirq_ctrl aqic_gisa = {}; struct ap_queue_status status = {}; struct kvm_s390_gisa *gisa; + int nisc; struct kvm *kvm; unsigned long h_nib, g_pfn, h_pfn; int ret; - g_pfn = nib >> PAGE_SHIFT; + /* Verify that the notification indicator byte address is valid */ + if (vfio_ap_validate_nib(vcpu, &nib, &g_pfn)) { + VFIO_AP_DBF_WARN("%s: invalid NIB address: nib=%#lx, g_pfn=%#lx, apqn=%#04x\n", + __func__, nib, g_pfn, q->apqn); + + status.response_code = AP_RESPONSE_INVALID_ADDRESS; + return status; + } + ret = vfio_pin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1, IOMMU_READ | IOMMU_WRITE, &h_pfn); switch (ret) { case 1: break; default: + VFIO_AP_DBF_WARN("%s: vfio_pin_pages failed: rc=%d," + "nib=%#lx, g_pfn=%#lx, apqn=%#04x\n", + __func__, ret, nib, g_pfn, q->apqn); + status.response_code = AP_RESPONSE_INVALID_ADDRESS; return status; } @@ -229,7 +277,17 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, h_nib = (h_pfn << PAGE_SHIFT) | (nib & ~PAGE_MASK); aqic_gisa.gisc = isc; - aqic_gisa.isc = kvm_s390_gisc_register(kvm, isc); + + nisc = kvm_s390_gisc_register(kvm, isc); + if (nisc < 0) { + VFIO_AP_DBF_WARN("%s: gisc registration failed: nisc=%d, isc=%d, apqn=%#04x\n", + __func__, nisc, isc, q->apqn); + + status.response_code = AP_RESPONSE_INVALID_GISA; + return status; + } + + aqic_gisa.isc = nisc; aqic_gisa.ir = 1; aqic_gisa.gisa = (uint64_t)gisa >> 4; @@ -253,9 +311,61 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, break; } + if (status.response_code != AP_RESPONSE_NORMAL) { + VFIO_AP_DBF_WARN("%s: PQAP(AQIC) failed with status=%#02x: " + "zone=%#x, ir=%#x, gisc=%#x, f=%#x," + "gisa=%#x, isc=%#x, apqn=%#04x\n", + __func__, status.response_code, + aqic_gisa.zone, aqic_gisa.ir, aqic_gisa.gisc, + aqic_gisa.gf, aqic_gisa.gisa, aqic_gisa.isc, + q->apqn); + } + return status; } +/** + * vfio_ap_le_guid_to_be_uuid - convert a little endian guid array into an array + * of big endian elements that can be passed by + * value to an s390dbf sprintf event function to + * format a UUID string. + * + * @guid: the object containing the little endian guid + * @uuid: a six-element array of long values that can be passed by value as + * arguments for a formatting string specifying a UUID. + * + * The S390 Debug Feature (s390dbf) allows the use of "%s" in the sprintf + * event functions if the memory for the passed string is available as long as + * the debug feature exists. Since a mediated device can be removed at any + * time, it's name can not be used because %s passes the reference to the string + * in memory and the reference will go stale once the device is removed . + * + * The s390dbf string formatting function allows a maximum of 9 arguments for a + * message to be displayed in the 'sprintf' view. In order to use the bytes + * comprising the mediated device's UUID to display the mediated device name, + * they will have to be converted into an array whose elements can be passed by + * value to sprintf. For example: + * + * guid array: { 83, 78, 17, 62, bb, f1, f0, 47, 91, 4d, 32, a2, 2e, 3a, 88, 04 } + * mdev name: 62177883-f1bb-47f0-914d-32a22e3a8804 + * array returned: { 62177883, f1bb, 47f0, 914d, 32a2, 2e3a8804 } + * formatting string: "%08lx-%04lx-%04lx-%04lx-%02lx%04lx" + */ +static void vfio_ap_le_guid_to_be_uuid(guid_t *guid, unsigned long *uuid) +{ + /* + * The input guid is ordered in little endian, so it needs to be + * reordered for displaying a UUID as a string. This specifies the + * guid indices in proper order. + */ + uuid[0] = le32_to_cpup((__le32 *)guid); + uuid[1] = le16_to_cpup((__le16 *)&guid->b[4]); + uuid[2] = le16_to_cpup((__le16 *)&guid->b[6]); + uuid[3] = *((__u16 *)&guid->b[8]); + uuid[4] = *((__u16 *)&guid->b[10]); + uuid[5] = *((__u32 *)&guid->b[12]); +} + /** * handle_pqap - PQAP instruction callback * @@ -281,37 +391,54 @@ static int handle_pqap(struct kvm_vcpu *vcpu) { uint64_t status; uint16_t apqn; + unsigned long uuid[6]; struct vfio_ap_queue *q; struct ap_queue_status qstatus = { .response_code = AP_RESPONSE_Q_NOT_AVAIL, }; struct ap_matrix_mdev *matrix_mdev; - /* If we do not use the AIV facility just go to userland */ - if (!(vcpu->arch.sie_block->eca & ECA_AIV)) - return -EOPNOTSUPP; - apqn = vcpu->run->s.regs.gprs[0] & 0xffff; - mutex_lock(&matrix_dev->lock); - if (!vcpu->kvm->arch.crypto.pqap_hook) + /* If we do not use the AIV facility just go to userland */ + if (!(vcpu->arch.sie_block->eca & ECA_AIV)) { + VFIO_AP_DBF_WARN("%s: AIV facility not installed: apqn=0x%04x, eca=0x%04x\n", + __func__, apqn, vcpu->arch.sie_block->eca); + + return -EOPNOTSUPP; + } + + mutex_lock(&matrix_dev->lock); + if (!vcpu->kvm->arch.crypto.pqap_hook) { + VFIO_AP_DBF_WARN("%s: PQAP(AQIC) hook not registered with the vfio_ap driver: apqn=0x%04x\n", + __func__, apqn); goto out_unlock; + } + matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook, struct ap_matrix_mdev, pqap_hook); /* If the there is no guest using the mdev, there is nothing to do */ - if (!matrix_mdev->kvm) + if (!matrix_mdev->kvm) { + vfio_ap_le_guid_to_be_uuid(&matrix_mdev->mdev->uuid, uuid); + VFIO_AP_DBF_WARN("%s: mdev %08lx-%04lx-%04lx-%04lx-%04lx%08lx not in use: apqn=0x%04x\n", + __func__, uuid[0], uuid[1], uuid[2], + uuid[3], uuid[4], uuid[5], apqn); goto out_unlock; + } q = vfio_ap_get_queue(matrix_mdev, apqn); - if (!q) + if (!q) { + VFIO_AP_DBF_WARN("%s: Queue %02x.%04x not bound to the vfio_ap driver\n", + __func__, AP_QID_CARD(apqn), + AP_QID_QUEUE(apqn)); goto out_unlock; + } status = vcpu->run->s.regs.gprs[1]; /* If IR bit(16) is set we enable the interrupt */ if ((status >> (63 - 16)) & 0x01) - qstatus = vfio_ap_irq_enable(q, status & 0x07, - vcpu->run->s.regs.gprs[2]); + qstatus = vfio_ap_irq_enable(q, status & 0x07, vcpu); else qstatus = vfio_ap_irq_disable(q); diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 9811ab81f3c4..80e2a306709a 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -671,7 +671,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms, spin_lock(&zcrypt_list_lock); for_each_zcrypt_card(zc) { /* Check for useable accelarator or CCA card */ - if (!zc->online || !zc->card->config || + if (!zc->online || !zc->card->config || zc->card->chkstop || !(zc->card->functions & 0x18000000)) continue; /* Check for size limits */ @@ -692,7 +692,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms, for_each_zcrypt_queue(zq, zc) { /* check if device is useable and eligible */ if (!zq->online || !zq->ops->rsa_modexpo || - !zq->queue->config) + !zq->queue->config || zq->queue->chkstop) continue; /* check if device node has admission for this queue */ if (!zcrypt_check_queue(perms, @@ -714,6 +714,8 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms, spin_unlock(&zcrypt_list_lock); if (!pref_zq) { + ZCRYPT_DBF_DBG("%s no matching queue found => ENODEV\n", + __func__); rc = -ENODEV; goto out; } @@ -779,7 +781,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms, spin_lock(&zcrypt_list_lock); for_each_zcrypt_card(zc) { /* Check for useable accelarator or CCA card */ - if (!zc->online || !zc->card->config || + if (!zc->online || !zc->card->config || zc->card->chkstop || !(zc->card->functions & 0x18000000)) continue; /* Check for size limits */ @@ -800,7 +802,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms, for_each_zcrypt_queue(zq, zc) { /* check if device is useable and eligible */ if (!zq->online || !zq->ops->rsa_modexpo_crt || - !zq->queue->config) + !zq->queue->config || zq->queue->chkstop) continue; /* check if device node has admission for this queue */ if (!zcrypt_check_queue(perms, @@ -822,6 +824,8 @@ static long zcrypt_rsa_crt(struct ap_perms *perms, spin_unlock(&zcrypt_list_lock); if (!pref_zq) { + ZCRYPT_DBF_DBG("%s no matching queue found => ENODEV\n", + __func__); rc = -ENODEV; goto out; } @@ -872,7 +876,7 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms, } #endif - rc = get_cprb_fc(userspace, xcRB, &ap_msg, &func_code, &domain); + rc = prep_cca_ap_msg(userspace, xcRB, &ap_msg, &func_code, &domain); if (rc) goto out; @@ -891,7 +895,7 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms, spin_lock(&zcrypt_list_lock); for_each_zcrypt_card(zc) { /* Check for useable CCA card */ - if (!zc->online || !zc->card->config || + if (!zc->online || !zc->card->config || zc->card->chkstop || !(zc->card->functions & 0x10000000)) continue; /* Check for user selected CCA card */ @@ -914,9 +918,8 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms, continue; for_each_zcrypt_queue(zq, zc) { /* check for device useable and eligible */ - if (!zq->online || - !zq->ops->send_cprb || - !zq->queue->config || + if (!zq->online || !zq->ops->send_cprb || + !zq->queue->config || zq->queue->chkstop || (tdom != AUTOSEL_DOM && tdom != AP_QID_QUEUE(zq->queue->qid))) continue; @@ -940,6 +943,8 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms, spin_unlock(&zcrypt_list_lock); if (!pref_zq) { + ZCRYPT_DBF_DBG("%s no match for address %02x.%04x => ENODEV\n", + __func__, xcRB->user_defined, *domain); rc = -ENODEV; goto out; } @@ -1016,7 +1021,7 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms, struct ep11_target_dev *targets; unsigned short target_num; unsigned int wgt = 0, pref_wgt = 0; - unsigned int func_code; + unsigned int func_code, domain; struct ap_message ap_msg; int cpen, qpen, qid = 0, rc = -ENODEV; struct module *mod; @@ -1053,7 +1058,7 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms, } } - rc = get_ep11cprb_fc(userspace, xcrb, &ap_msg, &func_code); + rc = prep_ep11_ap_msg(userspace, xcrb, &ap_msg, &func_code, &domain); if (rc) goto out_free; @@ -1062,7 +1067,7 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms, spin_lock(&zcrypt_list_lock); for_each_zcrypt_card(zc) { /* Check for useable EP11 card */ - if (!zc->online || !zc->card->config || + if (!zc->online || !zc->card->config || zc->card->chkstop || !(zc->card->functions & 0x04000000)) continue; /* Check for user selected EP11 card */ @@ -1085,9 +1090,8 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms, continue; for_each_zcrypt_queue(zq, zc) { /* check if device is useable and eligible */ - if (!zq->online || - !zq->ops->send_ep11_cprb || - !zq->queue->config || + if (!zq->online || !zq->ops->send_ep11_cprb || + !zq->queue->config || zq->queue->chkstop || (targets && !is_desired_ep11_queue(zq->queue->qid, target_num, targets))) @@ -1112,6 +1116,17 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms, spin_unlock(&zcrypt_list_lock); if (!pref_zq) { + if (targets && target_num == 1) { + ZCRYPT_DBF_DBG("%s no match for address %02x.%04x => ENODEV\n", + __func__, (int) targets->ap_id, + (int) targets->dom_id); + } else if (targets) { + ZCRYPT_DBF_DBG("%s no match for %d target addrs => ENODEV\n", + __func__, (int) target_num); + } else { + ZCRYPT_DBF_DBG("%s no match for address ff.ffff => ENODEV\n", + __func__); + } rc = -ENODEV; goto out_free; } @@ -1156,7 +1171,7 @@ static long zcrypt_rng(char *buffer) trace_s390_zcrypt_req(buffer, TP_HWRNGCPRB); ap_init_message(&ap_msg); - rc = get_rng_fc(&ap_msg, &func_code, &domain); + rc = prep_rng_ap_msg(&ap_msg, &func_code, &domain); if (rc) goto out; @@ -1165,7 +1180,7 @@ static long zcrypt_rng(char *buffer) spin_lock(&zcrypt_list_lock); for_each_zcrypt_card(zc) { /* Check for useable CCA card */ - if (!zc->online || !zc->card->config || + if (!zc->online || !zc->card->config || zc->card->chkstop || !(zc->card->functions & 0x10000000)) continue; /* get weight index of the card device */ @@ -1175,7 +1190,7 @@ static long zcrypt_rng(char *buffer) for_each_zcrypt_queue(zq, zc) { /* check if device is useable and eligible */ if (!zq->online || !zq->ops->rng || - !zq->queue->config) + !zq->queue->config || zq->queue->chkstop) continue; if (!zcrypt_queue_compare(zq, pref_zq, wgt, pref_wgt)) continue; @@ -1188,6 +1203,8 @@ static long zcrypt_rng(char *buffer) spin_unlock(&zcrypt_list_lock); if (!pref_zq) { + ZCRYPT_DBF_DBG("%s no matching queue found => ENODEV\n", + __func__); rc = -ENODEV; goto out; } diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c index 06024bbe9a58..fe5664c7589e 100644 --- a/drivers/s390/crypto/zcrypt_cex4.c +++ b/drivers/s390/crypto/zcrypt_cex4.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright IBM Corp. 2012, 2019 + * Copyright IBM Corp. 2012, 2022 * Author(s): Holger Dengler */ @@ -36,8 +36,8 @@ #define CEX4_CLEANUP_TIME (900*HZ) MODULE_AUTHOR("IBM Corporation"); -MODULE_DESCRIPTION("CEX4/CEX5/CEX6/CEX7 Cryptographic Card device driver, " \ - "Copyright IBM Corp. 2019"); +MODULE_DESCRIPTION("CEX[45678] Cryptographic Card device driver, " \ + "Copyright IBM Corp. 2022"); MODULE_LICENSE("GPL"); static struct ap_device_id zcrypt_cex4_card_ids[] = { @@ -49,6 +49,8 @@ static struct ap_device_id zcrypt_cex4_card_ids[] = { .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE }, { .dev_type = AP_DEVICE_TYPE_CEX7, .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE }, + { .dev_type = AP_DEVICE_TYPE_CEX8, + .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE }, { /* end of list */ }, }; @@ -63,6 +65,8 @@ static struct ap_device_id zcrypt_cex4_queue_ids[] = { .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE }, { .dev_type = AP_DEVICE_TYPE_CEX7, .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE }, + { .dev_type = AP_DEVICE_TYPE_CEX8, + .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE }, { /* end of list */ }, }; @@ -395,7 +399,7 @@ static const struct attribute_group ep11_queue_attr_grp = { }; /* - * Probe function for CEX4/CEX5/CEX6/CEX7 card device. It always + * Probe function for CEX[45678] card device. It always * accepts the AP device since the bus_match already checked * the hardware type. * @ap_dev: pointer to the AP device. @@ -414,6 +418,8 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) 6, 9, 20, 17, 65, 438, 0, 0}; static const int CEX7A_SPEED_IDX[NUM_OPS] = { 6, 8, 17, 15, 54, 362, 0, 0}; + static const int CEX8A_SPEED_IDX[NUM_OPS] = { + 6, 8, 17, 15, 54, 362, 0, 0}; static const int CEX4C_SPEED_IDX[NUM_OPS] = { 59, 69, 308, 83, 278, 2204, 209, 40}; @@ -423,6 +429,8 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) 16, 20, 32, 27, 77, 455, 24, 9}; static const int CEX7C_SPEED_IDX[NUM_OPS] = { 14, 16, 26, 23, 64, 376, 23, 8}; + static const int CEX8C_SPEED_IDX[NUM_OPS] = { + 14, 16, 26, 23, 64, 376, 23, 8}; static const int CEX4P_SPEED_IDX[NUM_OPS] = { 0, 0, 0, 0, 0, 0, 0, 50}; @@ -432,6 +440,8 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) 0, 0, 0, 0, 0, 0, 0, 9}; static const int CEX7P_SPEED_IDX[NUM_OPS] = { 0, 0, 0, 0, 0, 0, 0, 8}; + static const int CEX8P_SPEED_IDX[NUM_OPS] = { + 0, 0, 0, 0, 0, 0, 0, 8}; struct ap_card *ac = to_ap_card(&ap_dev->device); struct zcrypt_card *zc; @@ -455,13 +465,20 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) zc->type_string = "CEX6A"; zc->user_space_type = ZCRYPT_CEX6; zc->speed_rating = CEX6A_SPEED_IDX; - } else { + } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX7) { zc->type_string = "CEX7A"; + zc->speed_rating = CEX7A_SPEED_IDX; + /* wrong user space type, just for compatibility + * with the ZCRYPT_STATUS_MASK ioctl. + */ + zc->user_space_type = ZCRYPT_CEX6; + } else { + zc->type_string = "CEX8A"; + zc->speed_rating = CEX8A_SPEED_IDX; /* wrong user space type, just for compatibility * with the ZCRYPT_STATUS_MASK ioctl. */ zc->user_space_type = ZCRYPT_CEX6; - zc->speed_rating = CEX7A_SPEED_IDX; } zc->min_mod_size = CEX4A_MIN_MOD_SIZE; if (ap_test_bit(&ac->functions, AP_FUNC_MEX4K) && @@ -477,32 +494,39 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) } else if (ap_test_bit(&ac->functions, AP_FUNC_COPRO)) { if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX4) { zc->type_string = "CEX4C"; - /* wrong user space type, must be CEX4 + zc->speed_rating = CEX4C_SPEED_IDX; + /* wrong user space type, must be CEX3C * just keep it for cca compatibility */ zc->user_space_type = ZCRYPT_CEX3C; - zc->speed_rating = CEX4C_SPEED_IDX; } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX5) { zc->type_string = "CEX5C"; - /* wrong user space type, must be CEX5 + zc->speed_rating = CEX5C_SPEED_IDX; + /* wrong user space type, must be CEX3C * just keep it for cca compatibility */ zc->user_space_type = ZCRYPT_CEX3C; - zc->speed_rating = CEX5C_SPEED_IDX; } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX6) { zc->type_string = "CEX6C"; - /* wrong user space type, must be CEX6 - * just keep it for cca compatibility - */ - zc->user_space_type = ZCRYPT_CEX3C; zc->speed_rating = CEX6C_SPEED_IDX; - } else { - zc->type_string = "CEX7C"; - /* wrong user space type, must be CEX7 + /* wrong user space type, must be CEX3C * just keep it for cca compatibility */ zc->user_space_type = ZCRYPT_CEX3C; + } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX7) { + zc->type_string = "CEX7C"; zc->speed_rating = CEX7C_SPEED_IDX; + /* wrong user space type, must be CEX3C + * just keep it for cca compatibility + */ + zc->user_space_type = ZCRYPT_CEX3C; + } else { + zc->type_string = "CEX8C"; + zc->speed_rating = CEX8C_SPEED_IDX; + /* wrong user space type, must be CEX3C + * just keep it for cca compatibility + */ + zc->user_space_type = ZCRYPT_CEX3C; } zc->min_mod_size = CEX4C_MIN_MOD_SIZE; zc->max_mod_size = CEX4C_MAX_MOD_SIZE; @@ -520,13 +544,20 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) zc->type_string = "CEX6P"; zc->user_space_type = ZCRYPT_CEX6; zc->speed_rating = CEX6P_SPEED_IDX; - } else { + } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX7) { zc->type_string = "CEX7P"; + zc->speed_rating = CEX7P_SPEED_IDX; + /* wrong user space type, just for compatibility + * with the ZCRYPT_STATUS_MASK ioctl. + */ + zc->user_space_type = ZCRYPT_CEX6; + } else { + zc->type_string = "CEX8P"; + zc->speed_rating = CEX8P_SPEED_IDX; /* wrong user space type, just for compatibility * with the ZCRYPT_STATUS_MASK ioctl. */ zc->user_space_type = ZCRYPT_CEX6; - zc->speed_rating = CEX7P_SPEED_IDX; } zc->min_mod_size = CEX4C_MIN_MOD_SIZE; zc->max_mod_size = CEX4C_MAX_MOD_SIZE; @@ -563,7 +594,7 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) } /* - * This is called to remove the CEX4/CEX5/CEX6/CEX7 card driver + * This is called to remove the CEX[45678] card driver * information if an AP card device is removed. */ static void zcrypt_cex4_card_remove(struct ap_device *ap_dev) @@ -587,7 +618,7 @@ static struct ap_driver zcrypt_cex4_card_driver = { }; /* - * Probe function for CEX4/CEX5/CEX6/CEX7 queue device. It always + * Probe function for CEX[45678] queue device. It always * accepts the AP device since the bus_match already checked * the hardware type. * @ap_dev: pointer to the AP device. @@ -653,7 +684,7 @@ static int zcrypt_cex4_queue_probe(struct ap_device *ap_dev) } /* - * This is called to remove the CEX4/CEX5/CEX6/CEX7 queue driver + * This is called to remove the CEX[45678] queue driver * information if an AP queue device is removed. */ static void zcrypt_cex4_queue_remove(struct ap_device *ap_dev) diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c index f42e8c511184..259145aa393f 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.c +++ b/drivers/s390/crypto/zcrypt_msgtype50.c @@ -156,7 +156,7 @@ struct type80_hdr { unsigned char reserved3[8]; } __packed; -unsigned int get_rsa_modex_fc(struct ica_rsa_modexpo *mex, int *fcode) +int get_rsa_modex_fc(struct ica_rsa_modexpo *mex, int *fcode) { if (!mex->inputdatalength) @@ -172,7 +172,7 @@ unsigned int get_rsa_modex_fc(struct ica_rsa_modexpo *mex, int *fcode) return 0; } -unsigned int get_rsa_crt_fc(struct ica_rsa_modexpo_crt *crt, int *fcode) +int get_rsa_crt_fc(struct ica_rsa_modexpo_crt *crt, int *fcode) { if (!crt->inputdatalength) @@ -497,6 +497,10 @@ static long zcrypt_cex2a_modexpo(struct zcrypt_queue *zq, ap_cancel_message(zq->queue, ap_msg); out: ap_msg->private = NULL; + if (rc) + ZCRYPT_DBF_DBG("%s send me cprb at dev=%02x.%04x rc=%d\n", + __func__, AP_QID_CARD(zq->queue->qid), + AP_QID_QUEUE(zq->queue->qid), rc); return rc; } @@ -542,6 +546,10 @@ static long zcrypt_cex2a_modexpo_crt(struct zcrypt_queue *zq, ap_cancel_message(zq->queue, ap_msg); out: ap_msg->private = NULL; + if (rc) + ZCRYPT_DBF_DBG("%s send crt cprb at dev=%02x.%04x rc=%d\n", + __func__, AP_QID_CARD(zq->queue->qid), + AP_QID_QUEUE(zq->queue->qid), rc); return rc; } diff --git a/drivers/s390/crypto/zcrypt_msgtype50.h b/drivers/s390/crypto/zcrypt_msgtype50.h index 66bec4f45c56..eb49f06bed29 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.h +++ b/drivers/s390/crypto/zcrypt_msgtype50.h @@ -20,8 +20,8 @@ #define MSGTYPE_ADJUSTMENT 0x08 /* type04 extension (not needed in type50) */ -unsigned int get_rsa_modex_fc(struct ica_rsa_modexpo *, int *); -unsigned int get_rsa_crt_fc(struct ica_rsa_modexpo_crt *, int *); +int get_rsa_modex_fc(struct ica_rsa_modexpo *mex, int *fc); +int get_rsa_crt_fc(struct ica_rsa_modexpo_crt *crt, int *fc); void zcrypt_msgtype50_init(void); void zcrypt_msgtype50_exit(void); diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index 8582dd0d6969..57d885158cf0 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -472,6 +472,7 @@ static int XCRB_msg_to_type6CPRB_msgX(bool userspace, struct ap_message *ap_msg, *fcode = (msg->hdr.function_code[0] << 8) | msg->hdr.function_code[1]; *dom = (unsigned short *)&msg->cprbx.domain; + /* check subfunction, US and AU need special flag with NQAP */ if (memcmp(function_code, "US", 2) == 0 || memcmp(function_code, "AU", 2) == 0) ap_msg->flags |= AP_MSG_FLAG_SPECIAL; @@ -481,6 +482,23 @@ static int XCRB_msg_to_type6CPRB_msgX(bool userspace, struct ap_message *ap_msg, ap_msg->flags ^= AP_MSG_FLAG_SPECIAL; #endif + /* check CPRB minor version, set info bits in ap_message flag field */ + switch (*(unsigned short *)(&msg->cprbx.func_id[0])) { + case 0x5432: /* "T2" */ + ap_msg->flags |= AP_MSG_FLAG_USAGE; + break; + case 0x5433: /* "T3" */ + case 0x5435: /* "T5" */ + case 0x5436: /* "T6" */ + case 0x5437: /* "T7" */ + ap_msg->flags |= AP_MSG_FLAG_ADMIN; + break; + default: + ZCRYPT_DBF_DBG("%s unknown CPRB minor version '%c%c'\n", + __func__, msg->cprbx.func_id[0], + msg->cprbx.func_id[1]); + } + /* copy data block */ if (xcRB->request_data_length && z_copy_from_user(userspace, req_data, xcRB->request_data_address, @@ -492,7 +510,8 @@ static int XCRB_msg_to_type6CPRB_msgX(bool userspace, struct ap_message *ap_msg, static int xcrb_msg_to_type6_ep11cprb_msgx(bool userspace, struct ap_message *ap_msg, struct ep11_urb *xcRB, - unsigned int *fcode) + unsigned int *fcode, + unsigned int *domain) { unsigned int lfmt; static struct type6_hdr static_type6_ep11_hdr = { @@ -568,6 +587,14 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(bool userspace, struct ap_message *ap ap_msg->flags ^= AP_MSG_FLAG_SPECIAL; #endif + /* set info bits in ap_message flag field */ + if (msg->cprbx.flags & 0x80) + ap_msg->flags |= AP_MSG_FLAG_ADMIN; + else + ap_msg->flags |= AP_MSG_FLAG_USAGE; + + *domain = msg->cprbx.target_id; + return 0; } @@ -714,17 +741,31 @@ static int convert_type86_xcrb(bool userspace, struct zcrypt_queue *zq, char *data = reply->msg; /* Copy CPRB to user */ + if (xcRB->reply_control_blk_length < msg->fmt2.count1) { + ZCRYPT_DBF_DBG("%s reply_control_blk_length %u < required %u => EMSGSIZE\n", + __func__, xcRB->reply_control_blk_length, + msg->fmt2.count1); + return -EMSGSIZE; + } if (z_copy_to_user(userspace, xcRB->reply_control_blk_addr, data + msg->fmt2.offset1, msg->fmt2.count1)) return -EFAULT; xcRB->reply_control_blk_length = msg->fmt2.count1; /* Copy data buffer to user */ - if (msg->fmt2.count2) + if (msg->fmt2.count2) { + if (xcRB->reply_data_length < msg->fmt2.count2) { + ZCRYPT_DBF_DBG("%s reply_data_length %u < required %u => EMSGSIZE\n", + __func__, xcRB->reply_data_length, + msg->fmt2.count2); + return -EMSGSIZE; + } if (z_copy_to_user(userspace, xcRB->reply_data_addr, data + msg->fmt2.offset2, msg->fmt2.count2)) return -EFAULT; + } xcRB->reply_data_length = msg->fmt2.count2; + return 0; } @@ -744,8 +785,12 @@ static int convert_type86_ep11_xcrb(bool userspace, struct zcrypt_queue *zq, struct type86_fmt2_msg *msg = reply->msg; char *data = reply->msg; - if (xcRB->resp_len < msg->fmt2.count1) - return -EINVAL; + if (xcRB->resp_len < msg->fmt2.count1) { + ZCRYPT_DBF_DBG("%s resp_len %u < required %u => EMSGSIZE\n", + __func__, (unsigned int)xcRB->resp_len, + msg->fmt2.count1); + return -EMSGSIZE; + } /* Copy response CPRB to user */ if (z_copy_to_user(userspace, (char __force __user *)xcRB->resp, @@ -1113,15 +1158,17 @@ out_free: } /* - * Fetch function code from cprb. - * Extracting the fc requires to copy the cprb from userspace. - * So this function allocates memory and needs an ap_msg prepared + * Prepare a CCA AP msg request. + * Prepare a CCA AP msg: fetch the required data from userspace, + * prepare the AP msg, fill some info into the ap_message struct, + * extract some data from the CPRB and give back to the caller. + * This function allocates memory and needs an ap_msg prepared * by the caller with ap_init_message(). Also the caller has to * make sure ap_release_message() is always called even on failure. */ -unsigned int get_cprb_fc(bool userspace, struct ica_xcRB *xcRB, - struct ap_message *ap_msg, - unsigned int *func_code, unsigned short **dom) +int prep_cca_ap_msg(bool userspace, struct ica_xcRB *xcRB, + struct ap_message *ap_msg, + unsigned int *func_code, unsigned short **dom) { struct response_type resp_type = { .type = CEXXC_RESPONSE_TYPE_XCRB, @@ -1153,6 +1200,21 @@ static long zcrypt_msgtype6_send_cprb(bool userspace, struct zcrypt_queue *zq, { int rc; struct response_type *rtype = (struct response_type *)(ap_msg->private); + struct { + struct type6_hdr hdr; + struct CPRBX cprbx; + /* ... more data blocks ... */ + } __packed * msg = ap_msg->msg; + + /* + * Set the queue's reply buffer length minus 128 byte padding + * as reply limit for the card firmware. + */ + msg->hdr.FromCardLen1 = min_t(unsigned int, msg->hdr.FromCardLen1, + zq->reply.bufsize - 128); + if (msg->hdr.FromCardLen2) + msg->hdr.FromCardLen2 = + zq->reply.bufsize - msg->hdr.FromCardLen1 - 128; init_completion(&rtype->work); rc = ap_queue_message(zq->queue, ap_msg); @@ -1167,19 +1229,25 @@ static long zcrypt_msgtype6_send_cprb(bool userspace, struct zcrypt_queue *zq, /* Signal pending. */ ap_cancel_message(zq->queue, ap_msg); out: + if (rc) + ZCRYPT_DBF_DBG("%s send cprb at dev=%02x.%04x rc=%d\n", + __func__, AP_QID_CARD(zq->queue->qid), + AP_QID_QUEUE(zq->queue->qid), rc); return rc; } /* - * Fetch function code from ep11 cprb. - * Extracting the fc requires to copy the ep11 cprb from userspace. - * So this function allocates memory and needs an ap_msg prepared + * Prepare an EP11 AP msg request. + * Prepare an EP11 AP msg: fetch the required data from userspace, + * prepare the AP msg, fill some info into the ap_message struct, + * extract some data from the CPRB and give back to the caller. + * This function allocates memory and needs an ap_msg prepared * by the caller with ap_init_message(). Also the caller has to * make sure ap_release_message() is always called even on failure. */ -unsigned int get_ep11cprb_fc(bool userspace, struct ep11_urb *xcrb, - struct ap_message *ap_msg, - unsigned int *func_code) +int prep_ep11_ap_msg(bool userspace, struct ep11_urb *xcrb, + struct ap_message *ap_msg, + unsigned int *func_code, unsigned int *domain) { struct response_type resp_type = { .type = CEXXC_RESPONSE_TYPE_EP11, @@ -1195,7 +1263,8 @@ unsigned int get_ep11cprb_fc(bool userspace, struct ep11_urb *xcrb, ap_msg->private = kmemdup(&resp_type, sizeof(resp_type), GFP_KERNEL); if (!ap_msg->private) return -ENOMEM; - return xcrb_msg_to_type6_ep11cprb_msgx(userspace, ap_msg, xcrb, func_code); + return xcrb_msg_to_type6_ep11cprb_msgx(userspace, ap_msg, xcrb, + func_code, domain); } /* @@ -1227,7 +1296,6 @@ static long zcrypt_msgtype6_send_ep11_cprb(bool userspace, struct zcrypt_queue * unsigned int dom_val; /* domain id */ } __packed * payload_hdr = NULL; - /* * The target domain field within the cprb body/payload block will be * replaced by the usage domain for non-management commands only. @@ -1259,6 +1327,13 @@ static long zcrypt_msgtype6_send_ep11_cprb(bool userspace, struct zcrypt_queue * AP_QID_QUEUE(zq->queue->qid); } + /* + * Set the queue's reply buffer length minus the two prepend headers + * as reply limit for the card firmware. + */ + msg->hdr.FromCardLen1 = zq->reply.bufsize - + sizeof(struct type86_hdr) - sizeof(struct type86_fmt2_ext); + init_completion(&rtype->work); rc = ap_queue_message(zq->queue, ap_msg); if (rc) @@ -1272,11 +1347,15 @@ static long zcrypt_msgtype6_send_ep11_cprb(bool userspace, struct zcrypt_queue * /* Signal pending. */ ap_cancel_message(zq->queue, ap_msg); out: + if (rc) + ZCRYPT_DBF_DBG("%s send cprb at dev=%02x.%04x rc=%d\n", + __func__, AP_QID_CARD(zq->queue->qid), + AP_QID_QUEUE(zq->queue->qid), rc); return rc; } -unsigned int get_rng_fc(struct ap_message *ap_msg, int *func_code, - unsigned int *domain) +int prep_rng_ap_msg(struct ap_message *ap_msg, int *func_code, + unsigned int *domain) { struct response_type resp_type = { .type = CEXXC_RESPONSE_TYPE_XCRB, diff --git a/drivers/s390/crypto/zcrypt_msgtype6.h b/drivers/s390/crypto/zcrypt_msgtype6.h index 155c73514bac..9da4f4175c44 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.h +++ b/drivers/s390/crypto/zcrypt_msgtype6.h @@ -94,11 +94,14 @@ struct type86_fmt2_ext { unsigned int offset4; /* 0x00000000 */ } __packed; -unsigned int get_cprb_fc(bool userspace, struct ica_xcRB *, struct ap_message *, - unsigned int *, unsigned short **); -unsigned int get_ep11cprb_fc(bool userspace, struct ep11_urb *, struct ap_message *, - unsigned int *); -unsigned int get_rng_fc(struct ap_message *, int *, unsigned int *); +int prep_cca_ap_msg(bool userspace, struct ica_xcRB *xcrb, + struct ap_message *ap_msg, + unsigned int *fc, unsigned short **dom); +int prep_ep11_ap_msg(bool userspace, struct ep11_urb *xcrb, + struct ap_message *ap_msg, + unsigned int *fc, unsigned int *dom); +int prep_rng_ap_msg(struct ap_message *ap_msg, + int *fc, unsigned int *dom); #define LOW 10 #define MEDIUM 100 diff --git a/drivers/vdpa/vdpa_user/iova_domain.c b/drivers/vdpa/vdpa_user/iova_domain.c index 0a4d93edc4c0..6daa3978d290 100644 --- a/drivers/vdpa/vdpa_user/iova_domain.c +++ b/drivers/vdpa/vdpa_user/iova_domain.c @@ -480,6 +480,7 @@ vduse_domain_create(unsigned long iova_limit, size_t bounce_size) struct file *file; struct vduse_bounce_map *map; unsigned long pfn, bounce_pfns; + int ret; bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT; if (iova_limit <= bounce_size) @@ -513,10 +514,20 @@ vduse_domain_create(unsigned long iova_limit, size_t bounce_size) spin_lock_init(&domain->iotlb_lock); init_iova_domain(&domain->stream_iovad, PAGE_SIZE, IOVA_START_PFN); + ret = iova_domain_init_rcaches(&domain->stream_iovad); + if (ret) + goto err_iovad_stream; init_iova_domain(&domain->consistent_iovad, PAGE_SIZE, bounce_pfns); + ret = iova_domain_init_rcaches(&domain->consistent_iovad); + if (ret) + goto err_iovad_consistent; return domain; +err_iovad_consistent: + put_iova_domain(&domain->stream_iovad); +err_iovad_stream: + fput(file); err_file: vfree(domain->bounce_maps); err_map: diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index c8fa79da23b3..aa42f0686591 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1718,7 +1718,7 @@ config AR7_WDT config TXX9_WDT tristate "Toshiba TXx9 Watchdog Timer" - depends on CPU_TX39XX || CPU_TX49XX || (MIPS && COMPILE_TEST) + depends on CPU_TX49XX || (MIPS && COMPILE_TEST) select WATCHDOG_CORE help Hardware driver for the built-in watchdog timer on TXx9 MIPS SoCs. diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 690f741764e1..d0f7bdd2fdf2 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -59,11 +59,24 @@ extern char __noinstr_text_start[], __noinstr_text_end[]; extern __visible const void __nosave_begin, __nosave_end; /* Function descriptor handling (if any). Override in asm/sections.h */ -#ifndef dereference_function_descriptor +#ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS +void *dereference_function_descriptor(void *ptr); +void *dereference_kernel_function_descriptor(void *ptr); +#else #define dereference_function_descriptor(p) ((void *)(p)) #define dereference_kernel_function_descriptor(p) ((void *)(p)) + +/* An address is simply the address of the function. */ +typedef struct { + unsigned long addr; +} func_desc_t; #endif +static inline bool have_function_descriptors(void) +{ + return IS_ENABLED(CONFIG_HAVE_FUNCTION_DESCRIPTORS); +} + /** * memory_contains - checks if an object is contained within a memory region * @begin: virtual address of the beginning of the memory region diff --git a/include/dt-bindings/clock/microchip,mpfs-clock.h b/include/dt-bindings/clock/microchip,mpfs-clock.h new file mode 100644 index 000000000000..73f2a9324857 --- /dev/null +++ b/include/dt-bindings/clock/microchip,mpfs-clock.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Daire McNamara, + * Copyright (C) 2020 Microchip Technology Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_MICROCHIP_MPFS_H_ +#define _DT_BINDINGS_CLK_MICROCHIP_MPFS_H_ + +#define CLK_CPU 0 +#define CLK_AXI 1 +#define CLK_AHB 2 + +#define CLK_ENVM 3 +#define CLK_MAC0 4 +#define CLK_MAC1 5 +#define CLK_MMC 6 +#define CLK_TIMER 7 +#define CLK_MMUART0 8 +#define CLK_MMUART1 9 +#define CLK_MMUART2 10 +#define CLK_MMUART3 11 +#define CLK_MMUART4 12 +#define CLK_SPI0 13 +#define CLK_SPI1 14 +#define CLK_I2C0 15 +#define CLK_I2C1 16 +#define CLK_CAN0 17 +#define CLK_CAN1 18 +#define CLK_USB 19 +#define CLK_RESERVED 20 +#define CLK_RTC 21 +#define CLK_QSPI 22 +#define CLK_GPIO0 23 +#define CLK_GPIO1 24 +#define CLK_GPIO2 25 +#define CLK_DDRC 26 +#define CLK_FIC0 27 +#define CLK_FIC1 28 +#define CLK_FIC2 29 +#define CLK_FIC3 30 +#define CLK_ATHENA 31 +#define CLK_CFM 32 + +#endif /* _DT_BINDINGS_CLK_MICROCHIP_MPFS_H_ */ diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index c7dce7883179..82e33137f917 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -166,6 +166,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING, CPUHP_AP_PERF_ARM_ACPI_STARTING, CPUHP_AP_PERF_ARM_STARTING, + CPUHP_AP_PERF_RISCV_STARTING, CPUHP_AP_ARM_L2X0_STARTING, CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING, CPUHP_AP_ARM_ARCH_TIMER_STARTING, diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 69230fd695ea..2f9891cb3d00 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -525,12 +525,6 @@ struct context_entry { */ #define DOMAIN_FLAG_USE_FIRST_LEVEL BIT(1) -/* - * Domain represents a virtual machine which demands iommu nested - * translation mode support. - */ -#define DOMAIN_FLAG_NESTING_MODE BIT(2) - struct dmar_domain { int nid; /* node id */ @@ -548,7 +542,6 @@ struct dmar_domain { u8 iommu_snooping: 1; /* indicate snooping control feature */ struct list_head devices; /* all devices' list */ - struct list_head subdevices; /* all subdevices' list */ struct iova_domain iovad; /* iova's that belong to this domain */ struct dma_pte *pgd; /* virtual address */ @@ -563,11 +556,6 @@ struct dmar_domain { 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ u64 max_addr; /* maximum mapped address */ - u32 default_pasid; /* - * The default pasid used for non-SVM - * traffic on mediated devices. - */ - struct iommu_domain domain; /* generic domain data structure for iommu core */ }; @@ -590,7 +578,6 @@ struct intel_iommu { #ifdef CONFIG_INTEL_IOMMU unsigned long *domain_ids; /* bitmap of domains */ - struct dmar_domain ***domains; /* ptr to domains */ spinlock_t lock; /* protect context, domain ids */ struct root_entry *root_entry; /* virtual address */ @@ -620,21 +607,11 @@ struct intel_iommu { void *perf_statistic; }; -/* Per subdevice private data */ -struct subdev_domain_info { - struct list_head link_phys; /* link to phys device siblings */ - struct list_head link_domain; /* link to domain siblings */ - struct device *pdev; /* physical device derived from */ - struct dmar_domain *domain; /* aux-domain */ - int users; /* user count */ -}; - /* PCI domain-device relationship */ struct device_domain_info { struct list_head link; /* link to domain siblings */ struct list_head global; /* link to global list */ struct list_head table; /* link to pasid table */ - struct list_head subdevices; /* subdevices sibling */ u32 segment; /* PCI segment number */ u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ @@ -645,7 +622,6 @@ struct device_domain_info { u8 pri_enabled:1; u8 ats_supported:1; u8 ats_enabled:1; - u8 auxd_enabled:1; /* Multiple domains per device */ u8 ats_qdep; struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ struct intel_iommu *iommu; /* IOMMU used by this device */ @@ -717,7 +693,6 @@ static inline int nr_pte_to_next_page(struct dma_pte *pte) } extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); -extern int dmar_find_matched_atsr_unit(struct pci_dev *dev); extern int dmar_enable_qi(struct intel_iommu *iommu); extern void dmar_disable_qi(struct intel_iommu *iommu); @@ -757,17 +732,12 @@ int for_each_device_domain(int (*fn)(struct device_domain_info *info, void *data), void *data); void iommu_flush_write_buffer(struct intel_iommu *iommu); int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev); -struct dmar_domain *find_domain(struct device *dev); -struct device_domain_info *get_domain_info(struct device *dev); struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); #ifdef CONFIG_INTEL_IOMMU_SVM extern void intel_svm_check(struct intel_iommu *iommu); extern int intel_svm_enable_prq(struct intel_iommu *iommu); extern int intel_svm_finish_prq(struct intel_iommu *iommu); -int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, - struct iommu_gpasid_bind_data *data); -int intel_svm_unbind_gpasid(struct device *dev, u32 pasid); struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata); void intel_svm_unbind(struct iommu_sva *handle); @@ -795,7 +765,6 @@ struct intel_svm { unsigned int flags; u32 pasid; - int gpasid; /* In case that guest PASID is different from host PASID */ struct list_head devs; }; #else @@ -813,6 +782,8 @@ bool context_present(struct context_entry *context); struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, u8 devfn, int alloc); +extern const struct iommu_ops intel_iommu_ops; + #ifdef CONFIG_INTEL_IOMMU extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 1b73bab7eeff..b3b125b332aa 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -25,17 +25,5 @@ * do such IOTLB flushes automatically. */ #define SVM_FLAG_SUPERVISOR_MODE BIT(0) -/* - * The SVM_FLAG_GUEST_MODE flag is used when a PASID bind is for guest - * processes. Compared to the host bind, the primary differences are: - * 1. mm life cycle management - * 2. fault reporting - */ -#define SVM_FLAG_GUEST_MODE BIT(1) -/* - * The SVM_FLAG_GUEST_PASID flag is used when a guest has its own PASID space, - * which requires guest and host PASID translation at both directions. - */ -#define SVM_FLAG_GUEST_PASID BIT(2) #endif /* __INTEL_SVM_H__ */ diff --git a/include/linux/iommu.h b/include/linux/iommu.h index de0c57a567c8..9208eca4b0d1 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -37,6 +37,7 @@ struct iommu_group; struct bus_type; struct device; struct iommu_domain; +struct iommu_domain_ops; struct notifier_block; struct iommu_sva; struct iommu_fault_event; @@ -88,7 +89,7 @@ struct iommu_domain_geometry { struct iommu_domain { unsigned type; - const struct iommu_ops *ops; + const struct iommu_domain_ops *ops; unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ iommu_fault_handler_t handler; void *handler_token; @@ -144,7 +145,6 @@ struct iommu_resv_region { /** * enum iommu_dev_features - Per device IOMMU features - * @IOMMU_DEV_FEAT_AUX: Auxiliary domain feature * @IOMMU_DEV_FEAT_SVA: Shared Virtual Addresses * @IOMMU_DEV_FEAT_IOPF: I/O Page Faults such as PRI or Stall. Generally * enabling %IOMMU_DEV_FEAT_SVA requires @@ -157,7 +157,6 @@ struct iommu_resv_region { * iommu_dev_has_feature(), and enable it using iommu_dev_enable_feature(). */ enum iommu_dev_features { - IOMMU_DEV_FEAT_AUX, IOMMU_DEV_FEAT_SVA, IOMMU_DEV_FEAT_IOPF, }; @@ -194,9 +193,75 @@ struct iommu_iotlb_gather { * struct iommu_ops - iommu ops and capabilities * @capable: check capability * @domain_alloc: allocate iommu domain - * @domain_free: free iommu domain - * @attach_dev: attach device to an iommu domain - * @detach_dev: detach device from an iommu domain + * @probe_device: Add device to iommu driver handling + * @release_device: Remove device from iommu driver handling + * @probe_finalize: Do final setup work after the device is added to an IOMMU + * group and attached to the groups domain + * @device_group: find iommu group for a particular device + * @get_resv_regions: Request list of reserved regions for a device + * @put_resv_regions: Free list of reserved regions for a device + * @of_xlate: add OF master IDs to iommu grouping + * @is_attach_deferred: Check if domain attach should be deferred from iommu + * driver init to device driver init (default no) + * @dev_has/enable/disable_feat: per device entries to check/enable/disable + * iommu specific features. + * @dev_feat_enabled: check enabled feature + * @sva_bind: Bind process address space to device + * @sva_unbind: Unbind process address space from device + * @sva_get_pasid: Get PASID associated to a SVA handle + * @page_response: handle page request response + * @def_domain_type: device default domain type, return value: + * - IOMMU_DOMAIN_IDENTITY: must use an identity domain + * - IOMMU_DOMAIN_DMA: must use a dma domain + * - 0: use the default setting + * @default_domain_ops: the default ops for domains + * @pgsize_bitmap: bitmap of all possible supported page sizes + * @owner: Driver module providing these ops + */ +struct iommu_ops { + bool (*capable)(enum iommu_cap); + + /* Domain allocation and freeing by the iommu driver */ + struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type); + + struct iommu_device *(*probe_device)(struct device *dev); + void (*release_device)(struct device *dev); + void (*probe_finalize)(struct device *dev); + struct iommu_group *(*device_group)(struct device *dev); + + /* Request/Free a list of reserved regions for a device */ + void (*get_resv_regions)(struct device *dev, struct list_head *list); + void (*put_resv_regions)(struct device *dev, struct list_head *list); + + int (*of_xlate)(struct device *dev, struct of_phandle_args *args); + bool (*is_attach_deferred)(struct device *dev); + + /* Per device IOMMU features */ + bool (*dev_has_feat)(struct device *dev, enum iommu_dev_features f); + bool (*dev_feat_enabled)(struct device *dev, enum iommu_dev_features f); + int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f); + int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f); + + struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm, + void *drvdata); + void (*sva_unbind)(struct iommu_sva *handle); + u32 (*sva_get_pasid)(struct iommu_sva *handle); + + int (*page_response)(struct device *dev, + struct iommu_fault_event *evt, + struct iommu_page_response *msg); + + int (*def_domain_type)(struct device *dev); + + const struct iommu_domain_ops *default_domain_ops; + unsigned long pgsize_bitmap; + struct module *owner; +}; + +/** + * struct iommu_domain_ops - domain specific operations + * @attach_dev: attach an iommu domain to a device + * @detach_dev: detach an iommu domain from a device * @map: map a physically contiguous memory region to an iommu domain * @map_pages: map a physically contiguous set of pages of the same size to * an iommu domain. @@ -207,111 +272,39 @@ struct iommu_iotlb_gather { * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush * queue * @iova_to_phys: translate iova to physical address - * @probe_device: Add device to iommu driver handling - * @release_device: Remove device from iommu driver handling - * @probe_finalize: Do final setup work after the device is added to an IOMMU - * group and attached to the groups domain - * @device_group: find iommu group for a particular device * @enable_nesting: Enable nesting * @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*) - * @get_resv_regions: Request list of reserved regions for a device - * @put_resv_regions: Free list of reserved regions for a device - * @apply_resv_region: Temporary helper call-back for iova reserved ranges - * @of_xlate: add OF master IDs to iommu grouping - * @is_attach_deferred: Check if domain attach should be deferred from iommu - * driver init to device driver init (default no) - * @dev_has/enable/disable_feat: per device entries to check/enable/disable - * iommu specific features. - * @dev_feat_enabled: check enabled feature - * @aux_attach/detach_dev: aux-domain specific attach/detach entries. - * @aux_get_pasid: get the pasid given an aux-domain - * @sva_bind: Bind process address space to device - * @sva_unbind: Unbind process address space from device - * @sva_get_pasid: Get PASID associated to a SVA handle - * @page_response: handle page request response - * @cache_invalidate: invalidate translation caches - * @sva_bind_gpasid: bind guest pasid and mm - * @sva_unbind_gpasid: unbind guest pasid and mm - * @def_domain_type: device default domain type, return value: - * - IOMMU_DOMAIN_IDENTITY: must use an identity domain - * - IOMMU_DOMAIN_DMA: must use a dma domain - * - 0: use the default setting - * @pgsize_bitmap: bitmap of all possible supported page sizes - * @owner: Driver module providing these ops + * @free: Release the domain after use. */ -struct iommu_ops { - bool (*capable)(enum iommu_cap); - - /* Domain allocation and freeing by the iommu driver */ - struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type); - void (*domain_free)(struct iommu_domain *); - +struct iommu_domain_ops { int (*attach_dev)(struct iommu_domain *domain, struct device *dev); void (*detach_dev)(struct iommu_domain *domain, struct device *dev); + int (*map)(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp); int (*map_pages)(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped); size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, - size_t size, struct iommu_iotlb_gather *iotlb_gather); + size_t size, struct iommu_iotlb_gather *iotlb_gather); size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova, size_t pgsize, size_t pgcount, struct iommu_iotlb_gather *iotlb_gather); + void (*flush_iotlb_all)(struct iommu_domain *domain); void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova, size_t size); void (*iotlb_sync)(struct iommu_domain *domain, struct iommu_iotlb_gather *iotlb_gather); - phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); - struct iommu_device *(*probe_device)(struct device *dev); - void (*release_device)(struct device *dev); - void (*probe_finalize)(struct device *dev); - struct iommu_group *(*device_group)(struct device *dev); + + phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, + dma_addr_t iova); + int (*enable_nesting)(struct iommu_domain *domain); int (*set_pgtable_quirks)(struct iommu_domain *domain, unsigned long quirks); - /* Request/Free a list of reserved regions for a device */ - void (*get_resv_regions)(struct device *dev, struct list_head *list); - void (*put_resv_regions)(struct device *dev, struct list_head *list); - void (*apply_resv_region)(struct device *dev, - struct iommu_domain *domain, - struct iommu_resv_region *region); - - int (*of_xlate)(struct device *dev, struct of_phandle_args *args); - bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev); - - /* Per device IOMMU features */ - bool (*dev_has_feat)(struct device *dev, enum iommu_dev_features f); - bool (*dev_feat_enabled)(struct device *dev, enum iommu_dev_features f); - int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f); - int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f); - - /* Aux-domain specific attach/detach entries */ - int (*aux_attach_dev)(struct iommu_domain *domain, struct device *dev); - void (*aux_detach_dev)(struct iommu_domain *domain, struct device *dev); - int (*aux_get_pasid)(struct iommu_domain *domain, struct device *dev); - - struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm, - void *drvdata); - void (*sva_unbind)(struct iommu_sva *handle); - u32 (*sva_get_pasid)(struct iommu_sva *handle); - - int (*page_response)(struct device *dev, - struct iommu_fault_event *evt, - struct iommu_page_response *msg); - int (*cache_invalidate)(struct iommu_domain *domain, struct device *dev, - struct iommu_cache_invalidate_info *inv_info); - int (*sva_bind_gpasid)(struct iommu_domain *domain, - struct device *dev, struct iommu_gpasid_bind_data *data); - - int (*sva_unbind_gpasid)(struct device *dev, u32 pasid); - - int (*def_domain_type)(struct device *dev); - - unsigned long pgsize_bitmap; - struct module *owner; + void (*free)(struct iommu_domain *domain); }; /** @@ -403,6 +396,17 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) }; } +static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) +{ + /* + * Assume that valid ops must be installed if iommu_probe_device() + * has succeeded. The device ops are essentially for internal use + * within the IOMMU subsystem itself, so we should be able to trust + * ourselves not to misuse the helper. + */ + return dev->iommu->iommu_dev->ops; +} + #define IOMMU_GROUP_NOTIFY_ADD_DEVICE 1 /* Device added */ #define IOMMU_GROUP_NOTIFY_DEL_DEVICE 2 /* Pre Device removed */ #define IOMMU_GROUP_NOTIFY_BIND_DRIVER 3 /* Pre Driver bind */ @@ -421,14 +425,6 @@ extern int iommu_attach_device(struct iommu_domain *domain, struct device *dev); extern void iommu_detach_device(struct iommu_domain *domain, struct device *dev); -extern int iommu_uapi_cache_invalidate(struct iommu_domain *domain, - struct device *dev, - void __user *uinfo); - -extern int iommu_uapi_sva_bind_gpasid(struct iommu_domain *domain, - struct device *dev, void __user *udata); -extern int iommu_uapi_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, void __user *udata); extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); @@ -672,9 +668,6 @@ void iommu_release_device(struct device *dev); int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f); bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features f); -int iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev); -void iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev); -int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev); struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, @@ -1019,23 +1012,6 @@ iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) return -ENODEV; } -static inline int -iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev) -{ - return -ENODEV; -} - -static inline void -iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev) -{ -} - -static inline int -iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) -{ - return -ENODEV; -} - static inline struct iommu_sva * iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata) { @@ -1051,33 +1027,6 @@ static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle) return IOMMU_PASID_INVALID; } -static inline int -iommu_uapi_cache_invalidate(struct iommu_domain *domain, - struct device *dev, - struct iommu_cache_invalidate_info *inv_info) -{ - return -ENODEV; -} - -static inline int iommu_uapi_sva_bind_gpasid(struct iommu_domain *domain, - struct device *dev, void __user *udata) -{ - return -ENODEV; -} - -static inline int iommu_uapi_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, void __user *udata) -{ - return -ENODEV; -} - -static inline int iommu_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, - ioasid_t pasid) -{ - return -ENODEV; -} - static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) { return NULL; diff --git a/include/linux/iova.h b/include/linux/iova.h index cea79cb9f26c..320a70e40233 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -21,18 +21,8 @@ struct iova { unsigned long pfn_lo; /* Lowest allocated pfn */ }; -struct iova_magazine; -struct iova_cpu_rcache; -#define IOVA_RANGE_CACHE_MAX_SIZE 6 /* log of max cached IOVA range size (in pages) */ -#define MAX_GLOBAL_MAGS 32 /* magazines per bin */ - -struct iova_rcache { - spinlock_t lock; - unsigned long depot_size; - struct iova_magazine *depot[MAX_GLOBAL_MAGS]; - struct iova_cpu_rcache __percpu *cpu_rcaches; -}; +struct iova_rcache; /* holds all the iova translations for a domain */ struct iova_domain { @@ -46,7 +36,7 @@ struct iova_domain { unsigned long max32_alloc_size; /* Size of last failed allocation */ struct iova anchor; /* rbtree lookup anchor */ - struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ + struct iova_rcache *rcaches; struct hlist_node cpuhp_dead; }; @@ -102,6 +92,7 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, unsigned long start_pfn); +int iova_domain_init_rcaches(struct iova_domain *iovad); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); #else diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 4176c7eca7b5..ce1bd2fbf23e 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -48,7 +48,7 @@ static inline int is_ksym_addr(unsigned long addr) static inline void *dereference_symbol_descriptor(void *ptr) { -#ifdef HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR +#ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS struct module *mod; ptr = dereference_kernel_function_descriptor(ptr); diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h new file mode 100644 index 000000000000..46f9b6fe306e --- /dev/null +++ b/include/linux/perf/riscv_pmu.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 SiFive + * Copyright (C) 2018 Andes Technology Corporation + * Copyright (C) 2021 Western Digital Corporation or its affiliates. + * + */ + +#ifndef _ASM_RISCV_PERF_EVENT_H +#define _ASM_RISCV_PERF_EVENT_H + +#include +#include +#include + +#ifdef CONFIG_RISCV_PMU + +/* + * The RISCV_MAX_COUNTERS parameter should be specified. + */ + +#define RISCV_MAX_COUNTERS 64 +#define RISCV_OP_UNSUPP (-EOPNOTSUPP) +#define RISCV_PMU_PDEV_NAME "riscv-pmu" +#define RISCV_PMU_LEGACY_PDEV_NAME "riscv-pmu-legacy" + +#define RISCV_PMU_STOP_FLAG_RESET 1 + +struct cpu_hw_events { + /* currently enabled events */ + int n_events; + /* Counter overflow interrupt */ + int irq; + /* currently enabled events */ + struct perf_event *events[RISCV_MAX_COUNTERS]; + /* currently enabled hardware counters */ + DECLARE_BITMAP(used_hw_ctrs, RISCV_MAX_COUNTERS); + /* currently enabled firmware counters */ + DECLARE_BITMAP(used_fw_ctrs, RISCV_MAX_COUNTERS); +}; + +struct riscv_pmu { + struct pmu pmu; + char *name; + + irqreturn_t (*handle_irq)(int irq_num, void *dev); + + int num_counters; + u64 (*ctr_read)(struct perf_event *event); + int (*ctr_get_idx)(struct perf_event *event); + int (*ctr_get_width)(int idx); + void (*ctr_clear_idx)(struct perf_event *event); + void (*ctr_start)(struct perf_event *event, u64 init_val); + void (*ctr_stop)(struct perf_event *event, unsigned long flag); + int (*event_map)(struct perf_event *event, u64 *config); + + struct cpu_hw_events __percpu *hw_events; + struct hlist_node node; +}; + +#define to_riscv_pmu(p) (container_of(p, struct riscv_pmu, pmu)) +unsigned long riscv_pmu_ctr_read_csr(unsigned long csr); +int riscv_pmu_event_set_period(struct perf_event *event); +uint64_t riscv_pmu_ctr_get_width_mask(struct perf_event *event); +u64 riscv_pmu_event_update(struct perf_event *event); +#ifdef CONFIG_RISCV_PMU_LEGACY +void riscv_pmu_legacy_skip_init(void); +#else +static inline void riscv_pmu_legacy_skip_init(void) {}; +#endif +struct riscv_pmu *riscv_pmu_alloc(void); + +#endif /* CONFIG_RISCV_PMU */ + +#endif /* _ASM_RISCV_PERF_EVENT_H */ diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h index 59178fc229ca..65d8b0234f69 100644 --- a/include/uapi/linux/iommu.h +++ b/include/uapi/linux/iommu.h @@ -158,185 +158,4 @@ struct iommu_page_response { __u32 code; }; -/* defines the granularity of the invalidation */ -enum iommu_inv_granularity { - IOMMU_INV_GRANU_DOMAIN, /* domain-selective invalidation */ - IOMMU_INV_GRANU_PASID, /* PASID-selective invalidation */ - IOMMU_INV_GRANU_ADDR, /* page-selective invalidation */ - IOMMU_INV_GRANU_NR, /* number of invalidation granularities */ -}; - -/** - * struct iommu_inv_addr_info - Address Selective Invalidation Structure - * - * @flags: indicates the granularity of the address-selective invalidation - * - If the PASID bit is set, the @pasid field is populated and the invalidation - * relates to cache entries tagged with this PASID and matching the address - * range. - * - If ARCHID bit is set, @archid is populated and the invalidation relates - * to cache entries tagged with this architecture specific ID and matching - * the address range. - * - Both PASID and ARCHID can be set as they may tag different caches. - * - If neither PASID or ARCHID is set, global addr invalidation applies. - * - The LEAF flag indicates whether only the leaf PTE caching needs to be - * invalidated and other paging structure caches can be preserved. - * @pasid: process address space ID - * @archid: architecture-specific ID - * @addr: first stage/level input address - * @granule_size: page/block size of the mapping in bytes - * @nb_granules: number of contiguous granules to be invalidated - */ -struct iommu_inv_addr_info { -#define IOMMU_INV_ADDR_FLAGS_PASID (1 << 0) -#define IOMMU_INV_ADDR_FLAGS_ARCHID (1 << 1) -#define IOMMU_INV_ADDR_FLAGS_LEAF (1 << 2) - __u32 flags; - __u32 archid; - __u64 pasid; - __u64 addr; - __u64 granule_size; - __u64 nb_granules; -}; - -/** - * struct iommu_inv_pasid_info - PASID Selective Invalidation Structure - * - * @flags: indicates the granularity of the PASID-selective invalidation - * - If the PASID bit is set, the @pasid field is populated and the invalidation - * relates to cache entries tagged with this PASID and matching the address - * range. - * - If the ARCHID bit is set, the @archid is populated and the invalidation - * relates to cache entries tagged with this architecture specific ID and - * matching the address range. - * - Both PASID and ARCHID can be set as they may tag different caches. - * - At least one of PASID or ARCHID must be set. - * @pasid: process address space ID - * @archid: architecture-specific ID - */ -struct iommu_inv_pasid_info { -#define IOMMU_INV_PASID_FLAGS_PASID (1 << 0) -#define IOMMU_INV_PASID_FLAGS_ARCHID (1 << 1) - __u32 flags; - __u32 archid; - __u64 pasid; -}; - -/** - * struct iommu_cache_invalidate_info - First level/stage invalidation - * information - * @argsz: User filled size of this data - * @version: API version of this structure - * @cache: bitfield that allows to select which caches to invalidate - * @granularity: defines the lowest granularity used for the invalidation: - * domain > PASID > addr - * @padding: reserved for future use (should be zero) - * @pasid_info: invalidation data when @granularity is %IOMMU_INV_GRANU_PASID - * @addr_info: invalidation data when @granularity is %IOMMU_INV_GRANU_ADDR - * - * Not all the combinations of cache/granularity are valid: - * - * +--------------+---------------+---------------+---------------+ - * | type / | DEV_IOTLB | IOTLB | PASID | - * | granularity | | | cache | - * +==============+===============+===============+===============+ - * | DOMAIN | N/A | Y | Y | - * +--------------+---------------+---------------+---------------+ - * | PASID | Y | Y | Y | - * +--------------+---------------+---------------+---------------+ - * | ADDR | Y | Y | N/A | - * +--------------+---------------+---------------+---------------+ - * - * Invalidations by %IOMMU_INV_GRANU_DOMAIN don't take any argument other than - * @version and @cache. - * - * If multiple cache types are invalidated simultaneously, they all - * must support the used granularity. - */ -struct iommu_cache_invalidate_info { - __u32 argsz; -#define IOMMU_CACHE_INVALIDATE_INFO_VERSION_1 1 - __u32 version; -/* IOMMU paging structure cache */ -#define IOMMU_CACHE_INV_TYPE_IOTLB (1 << 0) /* IOMMU IOTLB */ -#define IOMMU_CACHE_INV_TYPE_DEV_IOTLB (1 << 1) /* Device IOTLB */ -#define IOMMU_CACHE_INV_TYPE_PASID (1 << 2) /* PASID cache */ -#define IOMMU_CACHE_INV_TYPE_NR (3) - __u8 cache; - __u8 granularity; - __u8 padding[6]; - union { - struct iommu_inv_pasid_info pasid_info; - struct iommu_inv_addr_info addr_info; - } granu; -}; - -/** - * struct iommu_gpasid_bind_data_vtd - Intel VT-d specific data on device and guest - * SVA binding. - * - * @flags: VT-d PASID table entry attributes - * @pat: Page attribute table data to compute effective memory type - * @emt: Extended memory type - * - * Only guest vIOMMU selectable and effective options are passed down to - * the host IOMMU. - */ -struct iommu_gpasid_bind_data_vtd { -#define IOMMU_SVA_VTD_GPASID_SRE (1 << 0) /* supervisor request */ -#define IOMMU_SVA_VTD_GPASID_EAFE (1 << 1) /* extended access enable */ -#define IOMMU_SVA_VTD_GPASID_PCD (1 << 2) /* page-level cache disable */ -#define IOMMU_SVA_VTD_GPASID_PWT (1 << 3) /* page-level write through */ -#define IOMMU_SVA_VTD_GPASID_EMTE (1 << 4) /* extended mem type enable */ -#define IOMMU_SVA_VTD_GPASID_CD (1 << 5) /* PASID-level cache disable */ -#define IOMMU_SVA_VTD_GPASID_WPE (1 << 6) /* Write protect enable */ -#define IOMMU_SVA_VTD_GPASID_LAST (1 << 7) - __u64 flags; - __u32 pat; - __u32 emt; -}; - -#define IOMMU_SVA_VTD_GPASID_MTS_MASK (IOMMU_SVA_VTD_GPASID_CD | \ - IOMMU_SVA_VTD_GPASID_EMTE | \ - IOMMU_SVA_VTD_GPASID_PCD | \ - IOMMU_SVA_VTD_GPASID_PWT) - -/** - * struct iommu_gpasid_bind_data - Information about device and guest PASID binding - * @argsz: User filled size of this data - * @version: Version of this data structure - * @format: PASID table entry format - * @flags: Additional information on guest bind request - * @gpgd: Guest page directory base of the guest mm to bind - * @hpasid: Process address space ID used for the guest mm in host IOMMU - * @gpasid: Process address space ID used for the guest mm in guest IOMMU - * @addr_width: Guest virtual address width - * @padding: Reserved for future use (should be zero) - * @vtd: Intel VT-d specific data - * - * Guest to host PASID mapping can be an identity or non-identity, where guest - * has its own PASID space. For non-identify mapping, guest to host PASID lookup - * is needed when VM programs guest PASID into an assigned device. VMM may - * trap such PASID programming then request host IOMMU driver to convert guest - * PASID to host PASID based on this bind data. - */ -struct iommu_gpasid_bind_data { - __u32 argsz; -#define IOMMU_GPASID_BIND_VERSION_1 1 - __u32 version; -#define IOMMU_PASID_FORMAT_INTEL_VTD 1 -#define IOMMU_PASID_FORMAT_LAST 2 - __u32 format; - __u32 addr_width; -#define IOMMU_SVA_GPASID_VAL (1 << 0) /* guest PASID valid */ - __u64 flags; - __u64 gpgd; - __u64 hpasid; - __u64 gpasid; - __u8 padding[8]; - /* Vendor specific data */ - union { - struct iommu_gpasid_bind_data_vtd vtd; - } vendor; -}; - #endif /* _UAPI_IOMMU_H */ diff --git a/kernel/extable.c b/kernel/extable.c index b6f330f0fe74..bda5e9761541 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -3,6 +3,7 @@ Copyright (C) 2001 Rusty Russell, 2002 Rusty Russell IBM. */ +#include #include #include #include @@ -132,12 +133,33 @@ out: } /* - * On some architectures (PPC64, IA64) function pointers + * On some architectures (PPC64, IA64, PARISC) function pointers * are actually only tokens to some data that then holds the * real function address. As a result, to find if a function * pointer is part of the kernel text, we need to do some * special dereferencing first. */ +#ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS +void *dereference_function_descriptor(void *ptr) +{ + func_desc_t *desc = ptr; + void *p; + + if (!get_kernel_nofault(p, (void *)&desc->addr)) + ptr = p; + return ptr; +} +EXPORT_SYMBOL_GPL(dereference_function_descriptor); + +void *dereference_kernel_function_descriptor(void *ptr) +{ + if (ptr < (void *)__start_opd || ptr >= (void *)__end_opd) + return ptr; + + return dereference_function_descriptor(ptr); +} +#endif + int func_ptr_is_kernel_text(void *ptr) { unsigned long addr; diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 585494ec464f..bc475e62279d 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -190,7 +190,7 @@ static int klp_find_object_symbol(const char *objname, const char *name, return -EINVAL; } -static int klp_resolve_symbols(Elf64_Shdr *sechdrs, const char *strtab, +static int klp_resolve_symbols(Elf_Shdr *sechdrs, const char *strtab, unsigned int symndx, Elf_Shdr *relasec, const char *sec_objname) { @@ -218,7 +218,7 @@ static int klp_resolve_symbols(Elf64_Shdr *sechdrs, const char *strtab, relas = (Elf_Rela *) relasec->sh_addr; /* For each rela in this klp relocation section */ for (i = 0; i < relasec->sh_size / sizeof(Elf_Rela); i++) { - sym = (Elf64_Sym *)sechdrs[symndx].sh_addr + ELF_R_SYM(relas[i].r_info); + sym = (Elf_Sym *)sechdrs[symndx].sh_addr + ELF_R_SYM(relas[i].r_info); if (sym->st_shndx != SHN_LIVEPATCH) { pr_err("symbol %s is not marked as a livepatch symbol\n", strtab + sym->st_name); diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index be8d9d877aaa..cf95d1323d4f 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -659,6 +659,11 @@ static int ignore_undef_symbol(struct elf_info *info, const char *symname) strstarts(symname, "_savevr_") || strcmp(symname, ".TOC.") == 0) return 1; + + if (info->hdr->e_machine == EM_S390) + /* Expoline thunks are linked on all kernel modules during final link of .ko */ + if (strstarts(symname, "__s390_indirect_jump_r")) + return 1; /* Do not ignore this symbol */ return 0; } diff --git a/scripts/sorttable.c b/scripts/sorttable.c index 3a8ea5ed553d..d00504c5f530 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -261,45 +261,6 @@ static void sort_relative_table_with_data(char *extab_image, int image_size) } } -static void s390_sort_relative_table(char *extab_image, int image_size) -{ - int i; - - for (i = 0; i < image_size; i += 16) { - char *loc = extab_image + i; - uint64_t handler; - - w(r((uint32_t *)loc) + i, (uint32_t *)loc); - w(r((uint32_t *)(loc + 4)) + (i + 4), (uint32_t *)(loc + 4)); - /* - * 0 is a special self-relative handler value, which means that - * handler should be ignored. It is safe, because it means that - * handler field points to itself, which should never happen. - * When creating extable-relative values, keep it as 0, since - * this should never occur either: it would mean that handler - * field points to the first extable entry. - */ - handler = r8((uint64_t *)(loc + 8)); - if (handler) - handler += i + 8; - w8(handler, (uint64_t *)(loc + 8)); - } - - qsort(extab_image, image_size / 16, 16, compare_relative_table); - - for (i = 0; i < image_size; i += 16) { - char *loc = extab_image + i; - uint64_t handler; - - w(r((uint32_t *)loc) - i, (uint32_t *)loc); - w(r((uint32_t *)(loc + 4)) - (i + 4), (uint32_t *)(loc + 4)); - handler = r8((uint64_t *)(loc + 8)); - if (handler) - handler -= i + 8; - w8(handler, (uint64_t *)(loc + 8)); - } -} - static int do_file(char const *const fname, void *addr) { int rc = -1; @@ -340,12 +301,10 @@ static int do_file(char const *const fname, void *addr) case EM_386: case EM_AARCH64: case EM_RISCV: + case EM_S390: case EM_X86_64: custom_sort = sort_relative_table_with_data; break; - case EM_S390: - custom_sort = s390_sort_relative_table; - break; case EM_PARISC: case EM_PPC: case EM_PPC64: diff --git a/sound/ppc/pmac.h b/sound/ppc/pmac.h index a758caf689d2..b6f454130463 100644 --- a/sound/ppc/pmac.h +++ b/sound/ppc/pmac.h @@ -26,6 +26,7 @@ #include #include #include +#include /* maximum number of fragments */ #define PMAC_MAX_FRAGS 32 diff --git a/tools/perf/arch/s390/util/dwarf-regs.c b/tools/perf/arch/s390/util/dwarf-regs.c index a8ace5cc6301..dfddb3099bfa 100644 --- a/tools/perf/arch/s390/util/dwarf-regs.c +++ b/tools/perf/arch/s390/util/dwarf-regs.c @@ -3,8 +3,7 @@ * Mapping of DWARF debug register numbers into register names. * * Copyright IBM Corp. 2010, 2017 - * Author(s): Heiko Carstens , - * Hendrik Brueckner + * Author(s): Hendrik Brueckner * */ diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt index 6b36b7f5dcf9..243c781f0780 100644 --- a/tools/testing/selftests/lkdtm/tests.txt +++ b/tools/testing/selftests/lkdtm/tests.txt @@ -44,6 +44,7 @@ ACCESS_NULL WRITE_RO WRITE_RO_AFTER_INIT WRITE_KERN +WRITE_OPD REFCOUNT_INC_OVERFLOW REFCOUNT_ADD_OVERFLOW REFCOUNT_INC_NOT_ZERO_OVERFLOW diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index 0830e63818c1..6ba95cd19e42 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -30,8 +30,10 @@ SUB_DIRS = alignment \ eeh \ vphn \ math \ + papr_attributes \ ptrace \ - security + security \ + mce endif diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore index 994b11af765c..7283e8b07b75 100644 --- a/tools/testing/selftests/powerpc/copyloops/.gitignore +++ b/tools/testing/selftests/powerpc/copyloops/.gitignore @@ -13,3 +13,4 @@ copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 copy_mc_64 +memmove_64 diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile index 3095b1f1c02b..77594e697f2f 100644 --- a/tools/testing/selftests/powerpc/copyloops/Makefile +++ b/tools/testing/selftests/powerpc/copyloops/Makefile @@ -13,7 +13,8 @@ TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \ copyuser_p7_t0 copyuser_p7_t1 \ memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \ memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \ - copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 + copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 \ + memmove_64 EXTRA_SOURCES := validate.c ../harness.c stubs.S @@ -56,3 +57,9 @@ $(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \ -D COPY_LOOP=test___copy_tofrom_user_base \ -D SELFTEST_CASE=$(subst copyuser_64_exc_t,,$(notdir $@)) \ -o $@ $^ + +$(OUTPUT)/memmove_64: mem_64.S memcpy_64.S memmove_validate.c ../harness.c \ + memcpy_stubs.S + $(CC) $(CPPFLAGS) $(CFLAGS) \ + -D TEST_MEMMOVE=test_memmove \ + -o $@ $^ diff --git a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h index 58c1cef3e399..003e1b3d9300 100644 --- a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h +++ b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h @@ -26,6 +26,7 @@ #define _GLOBAL(A) FUNC_START(test_ ## A) #define _GLOBAL_TOC(A) _GLOBAL(A) #define _GLOBAL_TOC_KASAN(A) _GLOBAL(A) +#define _GLOBAL_KASAN(A) _GLOBAL(A) #define PPC_MTOCRF(A, B) mtocrf A, B diff --git a/tools/testing/selftests/powerpc/copyloops/mem_64.S b/tools/testing/selftests/powerpc/copyloops/mem_64.S new file mode 120000 index 000000000000..db254c9a5f5c --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/mem_64.S @@ -0,0 +1 @@ +../../../../../arch/powerpc/lib/mem_64.S \ No newline at end of file diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_stubs.S b/tools/testing/selftests/powerpc/copyloops/memcpy_stubs.S new file mode 100644 index 000000000000..d9baa832fa49 --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/memcpy_stubs.S @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include + +FUNC_START(memcpy) + b test_memcpy + +FUNC_START(backwards_memcpy) + b test_backwards_memcpy diff --git a/tools/testing/selftests/powerpc/copyloops/memmove_validate.c b/tools/testing/selftests/powerpc/copyloops/memmove_validate.c new file mode 100644 index 000000000000..1a23218b5757 --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/memmove_validate.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include "utils.h" + +void *TEST_MEMMOVE(const void *s1, const void *s2, size_t n); + +#define BUF_LEN 65536 +#define MAX_OFFSET 512 + +size_t max(size_t a, size_t b) +{ + if (a >= b) + return a; + return b; +} + +static int testcase_run(void) +{ + size_t i, src_off, dst_off, len; + + char *usermap = memalign(BUF_LEN, BUF_LEN); + char *kernelmap = memalign(BUF_LEN, BUF_LEN); + + assert(usermap != NULL); + assert(kernelmap != NULL); + + memset(usermap, 0, BUF_LEN); + memset(kernelmap, 0, BUF_LEN); + + for (i = 0; i < BUF_LEN; i++) { + usermap[i] = i & 0xff; + kernelmap[i] = i & 0xff; + } + + for (src_off = 0; src_off < MAX_OFFSET; src_off++) { + for (dst_off = 0; dst_off < MAX_OFFSET; dst_off++) { + for (len = 1; len < MAX_OFFSET - max(src_off, dst_off); len++) { + + memmove(usermap + dst_off, usermap + src_off, len); + TEST_MEMMOVE(kernelmap + dst_off, kernelmap + src_off, len); + if (memcmp(usermap, kernelmap, MAX_OFFSET) != 0) { + printf("memmove failed at %ld %ld %ld\n", + src_off, dst_off, len); + abort(); + } + } + } + } + return 0; +} + +int main(void) +{ + return test_harness(testcase_run, "memmove"); +} diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h index c0f2742a3a59..c422be8a42b2 100644 --- a/tools/testing/selftests/powerpc/include/reg.h +++ b/tools/testing/selftests/powerpc/include/reg.h @@ -52,6 +52,9 @@ #define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ #define SPRN_TAR 0x32f /* Target Address Register */ +#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) +#define SPRN_PVR 0x11F + #define SPRN_DSCR_PRIV 0x11 /* Privilege State DSCR */ #define SPRN_DSCR 0x03 /* Data Stream Control Register */ #define SPRN_PPR 896 /* Program Priority Register */ @@ -84,6 +87,7 @@ #define TEXASR_ROT 0x0000000002000000 /* MSR register bits */ +#define MSR_HV (1ul << 60) /* Hypervisor state */ #define MSR_TS_S_LG 33 /* Trans Mem state: Suspended */ #define MSR_TS_T_LG 34 /* Trans Mem state: Active */ diff --git a/tools/testing/selftests/powerpc/mce/Makefile b/tools/testing/selftests/powerpc/mce/Makefile new file mode 100644 index 000000000000..2424513982d9 --- /dev/null +++ b/tools/testing/selftests/powerpc/mce/Makefile @@ -0,0 +1,7 @@ +#SPDX-License-Identifier: GPL-2.0-or-later + +TEST_GEN_PROGS := inject-ra-err + +include ../../lib.mk + +$(TEST_GEN_PROGS): ../harness.c diff --git a/tools/testing/selftests/powerpc/mce/inject-ra-err.c b/tools/testing/selftests/powerpc/mce/inject-ra-err.c new file mode 100644 index 000000000000..94323c34d9a6 --- /dev/null +++ b/tools/testing/selftests/powerpc/mce/inject-ra-err.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vas-api.h" +#include "utils.h" + +static bool faulted; + +static void sigbus_handler(int n, siginfo_t *info, void *ctxt_v) +{ + ucontext_t *ctxt = (ucontext_t *)ctxt_v; + struct pt_regs *regs = ctxt->uc_mcontext.regs; + + faulted = true; + regs->nip += 4; +} + +static int test_ra_error(void) +{ + struct vas_tx_win_open_attr attr; + int fd, *paste_addr; + char *devname = "/dev/crypto/nx-gzip"; + struct sigaction act = { + .sa_sigaction = sigbus_handler, + .sa_flags = SA_SIGINFO, + }; + + memset(&attr, 0, sizeof(attr)); + attr.version = 1; + attr.vas_id = 0; + + SKIP_IF(access(devname, F_OK)); + + fd = open(devname, O_RDWR); + FAIL_IF(fd < 0); + FAIL_IF(ioctl(fd, VAS_TX_WIN_OPEN, &attr) < 0); + FAIL_IF(sigaction(SIGBUS, &act, NULL) != 0); + + paste_addr = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0ULL); + + /* The following assignment triggers exception */ + mb(); + *paste_addr = 1; + mb(); + + FAIL_IF(!faulted); + + return 0; +} + +int main(void) +{ + return test_harness(test_ra_error, "inject-ra-err"); +} + diff --git a/tools/testing/selftests/powerpc/mce/vas-api.h b/tools/testing/selftests/powerpc/mce/vas-api.h new file mode 120000 index 000000000000..1455c1bcd351 --- /dev/null +++ b/tools/testing/selftests/powerpc/mce/vas-api.h @@ -0,0 +1 @@ +../../../../../arch/powerpc/include/uapi/asm/vas-api.h \ No newline at end of file diff --git a/arch/powerpc/kernel/vdso64/.gitignore b/tools/testing/selftests/powerpc/papr_attributes/.gitignore similarity index 61% rename from arch/powerpc/kernel/vdso64/.gitignore rename to tools/testing/selftests/powerpc/papr_attributes/.gitignore index 84151a7ba31d..d5f42b6d9e99 100644 --- a/arch/powerpc/kernel/vdso64/.gitignore +++ b/tools/testing/selftests/powerpc/papr_attributes/.gitignore @@ -1,3 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -vdso64.lds -vdso64.so.dbg +attr_test diff --git a/tools/testing/selftests/powerpc/papr_attributes/Makefile b/tools/testing/selftests/powerpc/papr_attributes/Makefile new file mode 100644 index 000000000000..e899712d49db --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_attributes/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +TEST_GEN_PROGS := attr_test + +top_srcdir = ../../../../.. +include ../../lib.mk + +$(TEST_GEN_PROGS): ../harness.c ../utils.c \ No newline at end of file diff --git a/tools/testing/selftests/powerpc/papr_attributes/attr_test.c b/tools/testing/selftests/powerpc/papr_attributes/attr_test.c new file mode 100644 index 000000000000..bab0dc06e90b --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_attributes/attr_test.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PAPR Energy attributes sniff test + * This checks if the papr folders and contents are populated relating to + * the energy and frequency attributes + * + * Copyright 2022, Pratik Rajesh Sampat, IBM Corp. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" + +enum energy_freq_attrs { + POWER_PERFORMANCE_MODE = 1, + IDLE_POWER_SAVER_STATUS = 2, + MIN_FREQ = 3, + STAT_FREQ = 4, + MAX_FREQ = 6, + PROC_FOLDING_STATUS = 8 +}; + +enum type { + INVALID, + STR_VAL, + NUM_VAL +}; + +int value_type(int id) +{ + int val_type; + + switch (id) { + case POWER_PERFORMANCE_MODE: + case IDLE_POWER_SAVER_STATUS: + val_type = STR_VAL; + break; + case MIN_FREQ: + case STAT_FREQ: + case MAX_FREQ: + case PROC_FOLDING_STATUS: + val_type = NUM_VAL; + break; + default: + val_type = INVALID; + } + + return val_type; +} + +int verify_energy_info(void) +{ + const char *path = "/sys/firmware/papr/energy_scale_info"; + struct dirent *entry; + struct stat s; + DIR *dirp; + + if (stat(path, &s) || !S_ISDIR(s.st_mode)) + return -1; + dirp = opendir(path); + + while ((entry = readdir(dirp)) != NULL) { + char file_name[64]; + int id, attr_type; + FILE *f; + + if (strcmp(entry->d_name, ".") == 0 || + strcmp(entry->d_name, "..") == 0) + continue; + + id = atoi(entry->d_name); + attr_type = value_type(id); + if (attr_type == INVALID) + return -1; + + /* Check if the files exist and have data in them */ + sprintf(file_name, "%s/%d/desc", path, id); + f = fopen(file_name, "r"); + if (!f || fgetc(f) == EOF) + return -1; + + sprintf(file_name, "%s/%d/value", path, id); + f = fopen(file_name, "r"); + if (!f || fgetc(f) == EOF) + return -1; + + if (attr_type == STR_VAL) { + sprintf(file_name, "%s/%d/value_desc", path, id); + f = fopen(file_name, "r"); + if (!f || fgetc(f) == EOF) + return -1; + } + } + + return 0; +} + +int main(void) +{ + return test_harness(verify_energy_info, "papr_attributes"); +} diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile index 904672fb78dd..edbd96d3b2ab 100644 --- a/tools/testing/selftests/powerpc/pmu/Makefile +++ b/tools/testing/selftests/powerpc/pmu/Makefile @@ -8,7 +8,7 @@ EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c top_srcdir = ../../../../.. include ../../lib.mk -all: $(TEST_GEN_PROGS) ebb +all: $(TEST_GEN_PROGS) ebb sampling_tests $(TEST_GEN_PROGS): $(EXTRA_SOURCES) @@ -26,25 +26,32 @@ DEFAULT_RUN_TESTS := $(RUN_TESTS) override define RUN_TESTS $(DEFAULT_RUN_TESTS) TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests + TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests endef DEFAULT_EMIT_TESTS := $(EMIT_TESTS) override define EMIT_TESTS $(DEFAULT_EMIT_TESTS) TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests + TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests endef DEFAULT_INSTALL_RULE := $(INSTALL_RULE) override define INSTALL_RULE $(DEFAULT_INSTALL_RULE) TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install + TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install endef clean: $(RM) $(TEST_GEN_PROGS) $(OUTPUT)/loop.o TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean + TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean ebb: TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all -.PHONY: all run_tests clean ebb +sampling_tests: + TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all + +.PHONY: all run_tests clean ebb sampling_tests diff --git a/tools/testing/selftests/powerpc/pmu/event.c b/tools/testing/selftests/powerpc/pmu/event.c index 48e3a413b15d..0c1c1bdba081 100644 --- a/tools/testing/selftests/powerpc/pmu/event.c +++ b/tools/testing/selftests/powerpc/pmu/event.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include "event.h" @@ -20,7 +21,8 @@ int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, group_fd, flags); } -void event_init_opts(struct event *e, u64 config, int type, char *name) +static void __event_init_opts(struct event *e, u64 config, + int type, char *name, bool sampling) { memset(e, 0, sizeof(*e)); @@ -32,6 +34,16 @@ void event_init_opts(struct event *e, u64 config, int type, char *name) /* This has to match the structure layout in the header */ e->attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | \ PERF_FORMAT_TOTAL_TIME_RUNNING; + if (sampling) { + e->attr.sample_period = 1000; + e->attr.sample_type = PERF_SAMPLE_REGS_INTR; + e->attr.disabled = 1; + } +} + +void event_init_opts(struct event *e, u64 config, int type, char *name) +{ + __event_init_opts(e, config, type, name, false); } void event_init_named(struct event *e, u64 config, char *name) @@ -44,6 +56,11 @@ void event_init(struct event *e, u64 config) event_init_opts(e, config, PERF_TYPE_RAW, "event"); } +void event_init_sampling(struct event *e, u64 config) +{ + __event_init_opts(e, config, PERF_TYPE_RAW, "event", true); +} + #define PERF_CURRENT_PID 0 #define PERF_NO_PID -1 #define PERF_NO_CPU -1 diff --git a/tools/testing/selftests/powerpc/pmu/event.h b/tools/testing/selftests/powerpc/pmu/event.h index 302eaab51706..51aad0b6d9ad 100644 --- a/tools/testing/selftests/powerpc/pmu/event.h +++ b/tools/testing/selftests/powerpc/pmu/event.h @@ -22,11 +22,17 @@ struct event { u64 running; u64 enabled; } result; + /* + * mmap buffer used while recording sample. + * Accessed as "struct perf_event_mmap_page" + */ + void *mmap_buffer; }; void event_init(struct event *e, u64 config); void event_init_named(struct event *e, u64 config, char *name); void event_init_opts(struct event *e, u64 config, int type, char *name); +void event_init_sampling(struct event *e, u64 config); int event_open_with_options(struct event *e, pid_t pid, int cpu, int group_fd); int event_open_with_group(struct event *e, int group_fd); int event_open_with_pid(struct event *e, pid_t pid); diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore new file mode 100644 index 000000000000..0fce5a694684 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore @@ -0,0 +1,11 @@ +mmcr0_exceptionbits_test +mmcr0_cc56run_test +mmcr0_pmccext_test +mmcr0_pmcjce_test +mmcr0_fc56_pmc1ce_test +mmcr0_fc56_pmc56_test +mmcr1_comb_test +mmcr2_l2l3_test +mmcr2_fcs_fch_test +mmcr3_src_test +mmcra_thresh_marked_sample_test diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile new file mode 100644 index 000000000000..a785c6a173b9 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -m64 + +TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \ + mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \ + mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test \ + mmcr3_src_test mmcra_thresh_marked_sample_test + +top_srcdir = ../../../../../.. +include ../../../lib.mk + +$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c misc.c misc.h ../loop.S diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c new file mode 100644 index 000000000000..fca054bbc094 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c @@ -0,0 +1,412 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + * Copyright 2022, Madhavan Srinivasan, IBM Corp. + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "misc.h" + +#define PAGE_SIZE sysconf(_SC_PAGESIZE) + +/* Storage for platform version */ +int pvr; +u64 platform_extended_mask; + +/* Mask and Shift for Event code fields */ +int ev_mask_pmcxsel, ev_shift_pmcxsel; //pmcxsel field +int ev_mask_marked, ev_shift_marked; //marked filed +int ev_mask_comb, ev_shift_comb; //combine field +int ev_mask_unit, ev_shift_unit; //unit field +int ev_mask_pmc, ev_shift_pmc; //pmc field +int ev_mask_cache, ev_shift_cache; //Cache sel field +int ev_mask_sample, ev_shift_sample; //Random sampling field +int ev_mask_thd_sel, ev_shift_thd_sel; //thresh_sel field +int ev_mask_thd_start, ev_shift_thd_start; //thresh_start field +int ev_mask_thd_stop, ev_shift_thd_stop; //thresh_stop field +int ev_mask_thd_cmp, ev_shift_thd_cmp; //thresh cmp field +int ev_mask_sm, ev_shift_sm; //SDAR mode field +int ev_mask_rsq, ev_shift_rsq; //radix scope qual field +int ev_mask_l2l3, ev_shift_l2l3; //l2l3 sel field +int ev_mask_mmcr3_src, ev_shift_mmcr3_src; //mmcr3 field + +static void init_ev_encodes(void) +{ + ev_mask_pmcxsel = 0xff; + ev_shift_pmcxsel = 0; + ev_mask_marked = 1; + ev_shift_marked = 8; + ev_mask_unit = 0xf; + ev_shift_unit = 12; + ev_mask_pmc = 0xf; + ev_shift_pmc = 16; + ev_mask_sample = 0x1f; + ev_shift_sample = 24; + ev_mask_thd_sel = 0x7; + ev_shift_thd_sel = 29; + ev_mask_thd_start = 0xf; + ev_shift_thd_start = 36; + ev_mask_thd_stop = 0xf; + ev_shift_thd_stop = 32; + + switch (pvr) { + case POWER10: + ev_mask_rsq = 1; + ev_shift_rsq = 9; + ev_mask_comb = 3; + ev_shift_comb = 10; + ev_mask_cache = 3; + ev_shift_cache = 20; + ev_mask_sm = 0x3; + ev_shift_sm = 22; + ev_mask_l2l3 = 0x1f; + ev_shift_l2l3 = 40; + ev_mask_mmcr3_src = 0x7fff; + ev_shift_mmcr3_src = 45; + break; + case POWER9: + ev_mask_comb = 3; + ev_shift_comb = 10; + ev_mask_cache = 0xf; + ev_shift_cache = 20; + ev_mask_thd_cmp = 0x3ff; + ev_shift_thd_cmp = 40; + ev_mask_sm = 0x3; + ev_shift_sm = 50; + break; + default: + FAIL_IF_EXIT(1); + } +} + +/* Return the extended regs mask value */ +static u64 perf_get_platform_reg_mask(void) +{ + if (have_hwcap2(PPC_FEATURE2_ARCH_3_1)) + return PERF_POWER10_MASK; + if (have_hwcap2(PPC_FEATURE2_ARCH_3_00)) + return PERF_POWER9_MASK; + + return -1; +} + +int check_extended_regs_support(void) +{ + int fd; + struct event event; + + event_init(&event, 0x1001e); + + event.attr.type = 4; + event.attr.sample_period = 1; + event.attr.disabled = 1; + event.attr.sample_type = PERF_SAMPLE_REGS_INTR; + event.attr.sample_regs_intr = platform_extended_mask; + + fd = event_open(&event); + if (fd != -1) + return 0; + + return -1; +} + +int check_pvr_for_sampling_tests(void) +{ + pvr = PVR_VER(mfspr(SPRN_PVR)); + + platform_extended_mask = perf_get_platform_reg_mask(); + + /* + * Check for supported platforms + * for sampling test + */ + if ((pvr != POWER10) && (pvr != POWER9)) + goto out; + + /* + * Check PMU driver registered by looking for + * PPC_FEATURE2_EBB bit in AT_HWCAP2 + */ + if (!have_hwcap2(PPC_FEATURE2_EBB)) + goto out; + + /* check if platform supports extended regs */ + if (check_extended_regs_support()) + goto out; + + init_ev_encodes(); + return 0; +out: + printf("%s: Sampling tests un-supported\n", __func__); + return -1; +} +/* + * Allocate mmap buffer of "mmap_pages" number of + * pages. + */ +void *event_sample_buf_mmap(int fd, int mmap_pages) +{ + size_t page_size = sysconf(_SC_PAGESIZE); + size_t mmap_size; + void *buff; + + if (mmap_pages <= 0) + return NULL; + + if (fd <= 0) + return NULL; + + mmap_size = page_size * (1 + mmap_pages); + buff = mmap(NULL, mmap_size, + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + + if (buff == MAP_FAILED) { + perror("mmap() failed."); + return NULL; + } + return buff; +} + +/* + * Post process the mmap buffer. + * - If sample_count != NULL then return count of total + * number of samples present in the mmap buffer. + * - If sample_count == NULL then return the address + * of first sample from the mmap buffer + */ +void *__event_read_samples(void *sample_buff, size_t *size, u64 *sample_count) +{ + size_t page_size = sysconf(_SC_PAGESIZE); + struct perf_event_header *header = sample_buff + page_size; + struct perf_event_mmap_page *metadata_page = sample_buff; + unsigned long data_head, data_tail; + + /* + * PERF_RECORD_SAMPLE: + * struct { + * struct perf_event_header hdr; + * u64 data[]; + * }; + */ + + data_head = metadata_page->data_head; + /* sync memory before reading sample */ + mb(); + data_tail = metadata_page->data_tail; + + /* Check for sample_count */ + if (sample_count) + *sample_count = 0; + + while (1) { + /* + * Reads the mmap data buffer by moving + * the data_tail to know the last read data. + * data_head points to head in data buffer. + * refer "struct perf_event_mmap_page" in + * "include/uapi/linux/perf_event.h". + */ + if (data_head - data_tail < sizeof(header)) + return NULL; + + data_tail += sizeof(header); + if (header->type == PERF_RECORD_SAMPLE) { + *size = (header->size - sizeof(header)); + if (!sample_count) + return sample_buff + page_size + data_tail; + data_tail += *size; + *sample_count += 1; + } else { + *size = (header->size - sizeof(header)); + if ((metadata_page->data_tail + *size) > metadata_page->data_head) + data_tail = metadata_page->data_head; + else + data_tail += *size; + } + header = (struct perf_event_header *)((void *)header + header->size); + } + return NULL; +} + +int collect_samples(void *sample_buff) +{ + u64 sample_count; + size_t size = 0; + + __event_read_samples(sample_buff, &size, &sample_count); + return sample_count; +} + +static void *perf_read_first_sample(void *sample_buff, size_t *size) +{ + return __event_read_samples(sample_buff, size, NULL); +} + +u64 *get_intr_regs(struct event *event, void *sample_buff) +{ + u64 type = event->attr.sample_type; + u64 *intr_regs; + size_t size = 0; + + if ((type ^ PERF_SAMPLE_REGS_INTR)) + return NULL; + + intr_regs = (u64 *)perf_read_first_sample(sample_buff, &size); + if (!intr_regs) + return NULL; + + /* + * First entry in the sample buffer used to specify + * PERF_SAMPLE_REGS_ABI_64, skip perf regs abi to access + * interrupt registers. + */ + ++intr_regs; + + return intr_regs; +} + +static const unsigned int __perf_reg_mask(const char *register_name) +{ + if (!strcmp(register_name, "R0")) + return 0; + else if (!strcmp(register_name, "R1")) + return 1; + else if (!strcmp(register_name, "R2")) + return 2; + else if (!strcmp(register_name, "R3")) + return 3; + else if (!strcmp(register_name, "R4")) + return 4; + else if (!strcmp(register_name, "R5")) + return 5; + else if (!strcmp(register_name, "R6")) + return 6; + else if (!strcmp(register_name, "R7")) + return 7; + else if (!strcmp(register_name, "R8")) + return 8; + else if (!strcmp(register_name, "R9")) + return 9; + else if (!strcmp(register_name, "R10")) + return 10; + else if (!strcmp(register_name, "R11")) + return 11; + else if (!strcmp(register_name, "R12")) + return 12; + else if (!strcmp(register_name, "R13")) + return 13; + else if (!strcmp(register_name, "R14")) + return 14; + else if (!strcmp(register_name, "R15")) + return 15; + else if (!strcmp(register_name, "R16")) + return 16; + else if (!strcmp(register_name, "R17")) + return 17; + else if (!strcmp(register_name, "R18")) + return 18; + else if (!strcmp(register_name, "R19")) + return 19; + else if (!strcmp(register_name, "R20")) + return 20; + else if (!strcmp(register_name, "R21")) + return 21; + else if (!strcmp(register_name, "R22")) + return 22; + else if (!strcmp(register_name, "R23")) + return 23; + else if (!strcmp(register_name, "R24")) + return 24; + else if (!strcmp(register_name, "R25")) + return 25; + else if (!strcmp(register_name, "R26")) + return 26; + else if (!strcmp(register_name, "R27")) + return 27; + else if (!strcmp(register_name, "R28")) + return 28; + else if (!strcmp(register_name, "R29")) + return 29; + else if (!strcmp(register_name, "R30")) + return 30; + else if (!strcmp(register_name, "R31")) + return 31; + else if (!strcmp(register_name, "NIP")) + return 32; + else if (!strcmp(register_name, "MSR")) + return 33; + else if (!strcmp(register_name, "ORIG_R3")) + return 34; + else if (!strcmp(register_name, "CTR")) + return 35; + else if (!strcmp(register_name, "LINK")) + return 36; + else if (!strcmp(register_name, "XER")) + return 37; + else if (!strcmp(register_name, "CCR")) + return 38; + else if (!strcmp(register_name, "SOFTE")) + return 39; + else if (!strcmp(register_name, "TRAP")) + return 40; + else if (!strcmp(register_name, "DAR")) + return 41; + else if (!strcmp(register_name, "DSISR")) + return 42; + else if (!strcmp(register_name, "SIER")) + return 43; + else if (!strcmp(register_name, "MMCRA")) + return 44; + else if (!strcmp(register_name, "MMCR0")) + return 45; + else if (!strcmp(register_name, "MMCR1")) + return 46; + else if (!strcmp(register_name, "MMCR2")) + return 47; + else if (!strcmp(register_name, "MMCR3")) + return 48; + else if (!strcmp(register_name, "SIER2")) + return 49; + else if (!strcmp(register_name, "SIER3")) + return 50; + else if (!strcmp(register_name, "PMC1")) + return 51; + else if (!strcmp(register_name, "PMC2")) + return 52; + else if (!strcmp(register_name, "PMC3")) + return 53; + else if (!strcmp(register_name, "PMC4")) + return 54; + else if (!strcmp(register_name, "PMC5")) + return 55; + else if (!strcmp(register_name, "PMC6")) + return 56; + else if (!strcmp(register_name, "SDAR")) + return 57; + else if (!strcmp(register_name, "SIAR")) + return 58; + else + return -1; +} + +u64 get_reg_value(u64 *intr_regs, char *register_name) +{ + int register_bit_position; + + register_bit_position = __perf_reg_mask(register_name); + + if (register_bit_position < 0 || (!((platform_extended_mask >> + (register_bit_position - 1)) & 1))) + return -1; + + return *(intr_regs + register_bit_position); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h new file mode 100644 index 000000000000..7675f3177725 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h @@ -0,0 +1,227 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + * Copyright 2022, Madhavan Srinivasan, IBM Corp. + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include "../event.h" + +#define POWER10 0x80 +#define POWER9 0x4e +#define PERF_POWER9_MASK 0x7f8ffffffffffff +#define PERF_POWER10_MASK 0x7ffffffffffffff + +#define MMCR0_FC56 0x00000010UL /* freeze counters 5 and 6 */ +#define MMCR0_PMCCEXT 0x00000200UL /* PMCCEXT control */ +#define MMCR1_RSQ 0x200000000000ULL /* radix scope qual field */ +#define BHRB_DISABLE 0x2000000000ULL /* MMCRA BHRB DISABLE bit */ + +extern int ev_mask_pmcxsel, ev_shift_pmcxsel; +extern int ev_mask_marked, ev_shift_marked; +extern int ev_mask_comb, ev_shift_comb; +extern int ev_mask_unit, ev_shift_unit; +extern int ev_mask_pmc, ev_shift_pmc; +extern int ev_mask_cache, ev_shift_cache; +extern int ev_mask_sample, ev_shift_sample; +extern int ev_mask_thd_sel, ev_shift_thd_sel; +extern int ev_mask_thd_start, ev_shift_thd_start; +extern int ev_mask_thd_stop, ev_shift_thd_stop; +extern int ev_mask_thd_cmp, ev_shift_thd_cmp; +extern int ev_mask_sm, ev_shift_sm; +extern int ev_mask_rsq, ev_shift_rsq; +extern int ev_mask_l2l3, ev_shift_l2l3; +extern int ev_mask_mmcr3_src, ev_shift_mmcr3_src; +extern int pvr; +extern u64 platform_extended_mask; +extern int check_pvr_for_sampling_tests(void); + +/* + * Event code field extraction macro. + * Raw event code is combination of multiple + * fields. Macro to extract individual fields + * + * x - Raw event code value + * y - Field to extract + */ +#define EV_CODE_EXTRACT(x, y) \ + ((x >> ev_shift_##y) & ev_mask_##y) + +void *event_sample_buf_mmap(int fd, int mmap_pages); +void *__event_read_samples(void *sample_buff, size_t *size, u64 *sample_count); +int collect_samples(void *sample_buff); +u64 *get_intr_regs(struct event *event, void *sample_buff); +u64 get_reg_value(u64 *intr_regs, char *register_name); + +static inline int get_mmcr0_fc56(u64 mmcr0, int pmc) +{ + return (mmcr0 & MMCR0_FC56); +} + +static inline int get_mmcr0_pmccext(u64 mmcr0, int pmc) +{ + return (mmcr0 & MMCR0_PMCCEXT); +} + +static inline int get_mmcr0_pmao(u64 mmcr0, int pmc) +{ + return ((mmcr0 >> 7) & 0x1); +} + +static inline int get_mmcr0_cc56run(u64 mmcr0, int pmc) +{ + return ((mmcr0 >> 8) & 0x1); +} + +static inline int get_mmcr0_pmcjce(u64 mmcr0, int pmc) +{ + return ((mmcr0 >> 14) & 0x1); +} + +static inline int get_mmcr0_pmc1ce(u64 mmcr0, int pmc) +{ + return ((mmcr0 >> 15) & 0x1); +} + +static inline int get_mmcr0_pmae(u64 mmcr0, int pmc) +{ + return ((mmcr0 >> 27) & 0x1); +} + +static inline int get_mmcr1_pmcxsel(u64 mmcr1, int pmc) +{ + return ((mmcr1 >> ((24 - (((pmc) - 1) * 8))) & 0xff)); +} + +static inline int get_mmcr1_unit(u64 mmcr1, int pmc) +{ + return ((mmcr1 >> ((60 - (4 * ((pmc) - 1))))) & 0xf); +} + +static inline int get_mmcr1_comb(u64 mmcr1, int pmc) +{ + return ((mmcr1 >> (38 - ((pmc - 1) * 2))) & 0x3); +} + +static inline int get_mmcr1_cache(u64 mmcr1, int pmc) +{ + return ((mmcr1 >> 46) & 0x3); +} + +static inline int get_mmcr1_rsq(u64 mmcr1, int pmc) +{ + return mmcr1 & MMCR1_RSQ; +} + +static inline int get_mmcr2_fcs(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (63 - (((pmc) - 1) * 9)))) >> (63 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcp(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (62 - (((pmc) - 1) * 9)))) >> (62 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcpc(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (61 - (((pmc) - 1) * 9)))) >> (61 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcm1(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (60 - (((pmc) - 1) * 9)))) >> (60 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcm0(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (59 - (((pmc) - 1) * 9)))) >> (59 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcwait(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (58 - (((pmc) - 1) * 9)))) >> (58 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fch(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (57 - (((pmc) - 1) * 9)))) >> (57 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcti(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (56 - (((pmc) - 1) * 9)))) >> (56 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcta(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (55 - (((pmc) - 1) * 9)))) >> (55 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_l2l3(u64 mmcr2, int pmc) +{ + if (pvr == POWER10) + return ((mmcr2 & 0xf8) >> 3); + return 0; +} + +static inline int get_mmcr3_src(u64 mmcr3, int pmc) +{ + if (pvr != POWER10) + return 0; + return ((mmcr3 >> ((49 - (15 * ((pmc) - 1))))) & 0x7fff); +} + +static inline int get_mmcra_thd_cmp(u64 mmcra, int pmc) +{ + if (pvr == POWER10) + return ((mmcra >> 45) & 0x7ff); + return ((mmcra >> 45) & 0x3ff); +} + +static inline int get_mmcra_sm(u64 mmcra, int pmc) +{ + return ((mmcra >> 42) & 0x3); +} + +static inline int get_mmcra_bhrb_disable(u64 mmcra, int pmc) +{ + if (pvr == POWER10) + return mmcra & BHRB_DISABLE; + return 0; +} + +static inline int get_mmcra_ifm(u64 mmcra, int pmc) +{ + return ((mmcra >> 30) & 0x3); +} + +static inline int get_mmcra_thd_sel(u64 mmcra, int pmc) +{ + return ((mmcra >> 16) & 0x7); +} + +static inline int get_mmcra_thd_start(u64 mmcra, int pmc) +{ + return ((mmcra >> 12) & 0xf); +} + +static inline int get_mmcra_thd_stop(u64 mmcra, int pmc) +{ + return ((mmcra >> 8) & 0xf); +} + +static inline int get_mmcra_rand_samp_elig(u64 mmcra, int pmc) +{ + return ((mmcra >> 4) & 0x7); +} + +static inline int get_mmcra_sample_mode(u64 mmcra, int pmc) +{ + return ((mmcra >> 1) & 0x3); +} + +static inline int get_mmcra_marked(u64 mmcra, int pmc) +{ + return mmcra & 0x1; +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_cc56run_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_cc56run_test.c new file mode 100644 index 000000000000..ae4172f83817 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_cc56run_test.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* + * A perf sampling test for mmcr0 + * field: cc56run. + */ +static int mmcr0_cc56run(void) +{ + struct event event; + u64 *intr_regs; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x500fa); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that cc56run bit is set in MMCR0 */ + FAIL_IF(!get_mmcr0_cc56run(get_reg_value(intr_regs, "MMCR0"), 5)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr0_cc56run, "mmcr0_cc56run"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_exceptionbits_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_exceptionbits_test.c new file mode 100644 index 000000000000..982aa56d2171 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_exceptionbits_test.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* + * A perf sampling test for mmcr0 + * fields : pmae, pmao. + */ +static int mmcr0_exceptionbits(void) +{ + struct event event; + u64 *intr_regs; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x500fa); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that pmae is cleared and pmao is set in MMCR0 */ + FAIL_IF(get_mmcr0_pmae(get_reg_value(intr_regs, "MMCR0"), 5)); + FAIL_IF(!get_mmcr0_pmao(get_reg_value(intr_regs, "MMCR0"), 5)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr0_exceptionbits, "mmcr0_exceptionbits"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc1ce_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc1ce_test.c new file mode 100644 index 000000000000..1c1813c182c0 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc1ce_test.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* + * A perf sampling test for mmcr0 + * fields: fc56, pmc1ce. + */ +static int mmcr0_fc56_pmc1ce(void) +{ + struct event event; + u64 *intr_regs; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x1001e); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that fc56, pmc1ce fields are set in MMCR0 */ + FAIL_IF(!get_mmcr0_fc56(get_reg_value(intr_regs, "MMCR0"), 1)); + FAIL_IF(!get_mmcr0_pmc1ce(get_reg_value(intr_regs, "MMCR0"), 1)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr0_fc56_pmc1ce, "mmcr0_fc56_pmc1ce"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc56_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc56_test.c new file mode 100644 index 000000000000..332d24b5ab9c --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc56_test.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* + * A perf sampling test for mmcr0 + * fields: fc56_pmc56 + */ +static int mmcr0_fc56_pmc56(void) +{ + struct event event; + u64 *intr_regs; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x500fa); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that fc56 is not set in MMCR0 when using PMC5 */ + FAIL_IF(get_mmcr0_fc56(get_reg_value(intr_regs, "MMCR0"), 5)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr0_fc56_pmc56, "mmcr0_fc56_pmc56"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmccext_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmccext_test.c new file mode 100644 index 000000000000..dfd186cd8eec --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmccext_test.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* + * A perf sampling test for mmcr0 + * field: pmccext + */ +static int mmcr0_pmccext(void) +{ + struct event event; + u64 *intr_regs; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x4001e); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that pmccext field is set in MMCR0 */ + FAIL_IF(!get_mmcr0_pmccext(get_reg_value(intr_regs, "MMCR0"), 4)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr0_pmccext, "mmcr0_pmccext"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmcjce_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmcjce_test.c new file mode 100644 index 000000000000..fdd8ed9bf725 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmcjce_test.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* + * A perf sampling test for mmcr0 + * field: pmcjce + */ +static int mmcr0_pmcjce(void) +{ + struct event event; + u64 *intr_regs; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x500fa); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that pmcjce field is set in MMCR0 */ + FAIL_IF(!get_mmcr0_pmcjce(get_reg_value(intr_regs, "MMCR0"), 5)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr0_pmcjce, "mmcr0_pmcjce"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_comb_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_comb_test.c new file mode 100644 index 000000000000..5aea6499ee9a --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_comb_test.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +/* All successful D-side store dispatches for this thread that were L2 Miss */ +#define EventCode 0x46880 + +extern void thirty_two_instruction_loop_with_ll_sc(u64 loops, u64 *ll_sc_target); + +/* + * A perf sampling test for mmcr1 + * fields : comb. + */ +static int mmcr1_comb(void) +{ + struct event event; + u64 *intr_regs; + u64 dummy; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop_with_ll_sc(10000000, &dummy); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* + * Verify that comb field match with + * corresponding event code fields + */ + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, comb) != + get_mmcr1_comb(get_reg_value(intr_regs, "MMCR1"), 4)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr1_comb, "mmcr1_comb"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_fcs_fch_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_fcs_fch_test.c new file mode 100644 index 000000000000..4e242fd61b25 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_fcs_fch_test.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Madhavan Srinivasan, IBM Corp. + */ + +#include +#include +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +static bool is_hv; + +static void sig_usr2_handler(int signum, siginfo_t *info, void *data) +{ + ucontext_t *uctx = data; + + is_hv = !!(uctx->uc_mcontext.gp_regs[PT_MSR] & MSR_HV); +} + +/* + * A perf sampling test for mmcr2 + * fields : fcs, fch. + */ +static int mmcr2_fcs_fch(void) +{ + struct sigaction sigact = { + .sa_sigaction = sig_usr2_handler, + .sa_flags = SA_SIGINFO + }; + struct event event; + u64 *intr_regs; + + FAIL_IF(sigaction(SIGUSR2, &sigact, NULL)); + FAIL_IF(kill(getpid(), SIGUSR2)); + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x1001e); + event.attr.sample_regs_intr = platform_extended_mask; + event.attr.exclude_kernel = 1; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* + * Verify that fcs and fch field of MMCR2 match + * with corresponding modifier fields. + */ + if (is_hv) + FAIL_IF(event.attr.exclude_kernel != + get_mmcr2_fch(get_reg_value(intr_regs, "MMCR2"), 1)); + else + FAIL_IF(event.attr.exclude_kernel != + get_mmcr2_fcs(get_reg_value(intr_regs, "MMCR2"), 1)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr2_fcs_fch, "mmcr2_fcs_fch"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_l2l3_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_l2l3_test.c new file mode 100644 index 000000000000..ceca597016b2 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_l2l3_test.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Madhavan Srinivasan, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +/* All successful D-side store dispatches for this thread */ +#define EventCode 0x010000046080 + +#define MALLOC_SIZE (0x10000 * 10) /* Ought to be enough .. */ + +/* + * A perf sampling test for mmcr2 + * fields : l2l3 + */ +static int mmcr2_l2l3(void) +{ + struct event event; + u64 *intr_regs; + char *p; + int i; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + p = malloc(MALLOC_SIZE); + FAIL_IF(!p); + + for (i = 0; i < MALLOC_SIZE; i += 0x10000) + p[i] = i; + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* + * Verify that l2l3 field of MMCR2 match with + * corresponding event code field + */ + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, l2l3) != + get_mmcr2_l2l3(get_reg_value(intr_regs, "MMCR2"), 4)); + + event_close(&event); + free(p); + + return 0; +} + +int main(void) +{ + return test_harness(mmcr2_l2l3, "mmcr2_l2l3"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr3_src_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr3_src_test.c new file mode 100644 index 000000000000..e154e2a4cc3a --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr3_src_test.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop_with_ll_sc(u64 loops, u64 *ll_sc_target); + +/* The data cache was reloaded from local core's L3 due to a demand load */ +#define EventCode 0x1340000001c040 + +/* + * A perf sampling test for mmcr3 + * fields. + */ +static int mmcr3_src(void) +{ + struct event event; + u64 *intr_regs; + u64 dummy; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make event overflow */ + thirty_two_instruction_loop_with_ll_sc(1000000, &dummy); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* + * Verify that src field of MMCR3 match with + * corresponding event code field + */ + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, mmcr3_src) != + get_mmcr3_src(get_reg_value(intr_regs, "MMCR3"), 1)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr3_src, "mmcr3_src"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_marked_sample_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_marked_sample_test.c new file mode 100644 index 000000000000..022cc1655eb5 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_marked_sample_test.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +/* + * Primary PMU event used here is PM_MRK_INST_CMPL (0x401e0) + * Threshold event selection used is issue to complete for cycles + * Sampling criteria is Load only sampling + */ +#define EventCode 0x35340401e0 + +extern void thirty_two_instruction_loop_with_ll_sc(u64 loops, u64 *ll_sc_target); + +/* A perf sampling test to test mmcra fields */ +static int mmcra_thresh_marked_sample(void) +{ + struct event event; + u64 *intr_regs; + u64 dummy; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop_with_ll_sc(1000000, &dummy); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* + * Verify that thresh sel/start/stop, marked, random sample + * eligibility, sdar mode and sample mode fields match with + * the corresponding event code fields + */ + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, thd_sel) != + get_mmcra_thd_sel(get_reg_value(intr_regs, "MMCRA"), 4)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, thd_start) != + get_mmcra_thd_start(get_reg_value(intr_regs, "MMCRA"), 4)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, thd_stop) != + get_mmcra_thd_stop(get_reg_value(intr_regs, "MMCRA"), 4)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, marked) != + get_mmcra_marked(get_reg_value(intr_regs, "MMCRA"), 4)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, sample >> 2) != + get_mmcra_rand_samp_elig(get_reg_value(intr_regs, "MMCRA"), 4)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, sample & 0x3) != + get_mmcra_sample_mode(get_reg_value(intr_regs, "MMCRA"), 4)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, sm) != + get_mmcra_sm(get_reg_value(intr_regs, "MMCRA"), 4)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcra_thresh_marked_sample, "mmcra_thresh_marked_sample"); +} diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c index 83647b8277e7..d42ca8c676c3 100644 --- a/tools/testing/selftests/powerpc/security/spectre_v2.c +++ b/tools/testing/selftests/powerpc/security/spectre_v2.c @@ -125,8 +125,6 @@ static enum spectre_v2_state get_sysfs_state(void) #define PM_BR_PRED_PCACHE 0x048a0 // P9 only #define PM_BR_MPRED_PCACHE 0x048b0 // P9 only -#define SPRN_PVR 287 - int spectre_v2_test(void) { enum spectre_v2_state state; diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index da23c22d5882..ef29bc16f358 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -207,6 +207,29 @@ unsigned int yield_mod_cnt, nr_abort; "addiu " INJECT_ASM_REG ", -1\n\t" \ "bnez " INJECT_ASM_REG ", 222b\n\t" \ "333:\n\t" +#elif defined(__riscv) + +#define RSEQ_INJECT_INPUT \ + , [loop_cnt_1]"m"(loop_cnt[1]) \ + , [loop_cnt_2]"m"(loop_cnt[2]) \ + , [loop_cnt_3]"m"(loop_cnt[3]) \ + , [loop_cnt_4]"m"(loop_cnt[4]) \ + , [loop_cnt_5]"m"(loop_cnt[5]) \ + , [loop_cnt_6]"m"(loop_cnt[6]) + +#define INJECT_ASM_REG "t1" + +#define RSEQ_INJECT_CLOBBER \ + , INJECT_ASM_REG + +#define RSEQ_INJECT_ASM(n) \ + "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ + "beqz " INJECT_ASM_REG ", 333f\n\t" \ + "222:\n\t" \ + "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \ + "bnez " INJECT_ASM_REG ", 222b\n\t" \ + "333:\n\t" + #else #error unsupported target diff --git a/tools/testing/selftests/rseq/rseq-riscv.h b/tools/testing/selftests/rseq/rseq-riscv.h new file mode 100644 index 000000000000..b86642f90d7f --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-riscv.h @@ -0,0 +1,677 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Select the instruction "csrw mhartid, x0" as the RSEQ_SIG. Unlike + * other architectures, the ebreak instruction has no immediate field for + * distinguishing purposes. Hence, ebreak is not suitable as RSEQ_SIG. + * "csrw mhartid, x0" can also satisfy the RSEQ requirement because it + * is an uncommon instruction and will raise an illegal instruction + * exception when executed in all modes. + */ +#include + +#if defined(__BYTE_ORDER) ? (__BYTE_ORDER == __LITTLE_ENDIAN) : defined(__LITTLE_ENDIAN) +#define RSEQ_SIG 0xf1401073 /* csrr mhartid, x0 */ +#else +#error "Currently, RSEQ only supports Little-Endian version" +#endif + +#if __riscv_xlen == 64 +#define __REG_SEL(a, b) a +#elif __riscv_xlen == 32 +#define __REG_SEL(a, b) b +#endif + +#define REG_L __REG_SEL("ld ", "lw ") +#define REG_S __REG_SEL("sd ", "sw ") + +#define RISCV_FENCE(p, s) \ + __asm__ __volatile__ ("fence " #p "," #s : : : "memory") +#define rseq_smp_mb() RISCV_FENCE(rw, rw) +#define rseq_smp_rmb() RISCV_FENCE(r, r) +#define rseq_smp_wmb() RISCV_FENCE(w, w) +#define RSEQ_ASM_TMP_REG_1 "t6" +#define RSEQ_ASM_TMP_REG_2 "t5" +#define RSEQ_ASM_TMP_REG_3 "t4" +#define RSEQ_ASM_TMP_REG_4 "t3" + +#define rseq_smp_load_acquire(p) \ +__extension__ ({ \ + __typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p)); \ + RISCV_FENCE(r, rw) \ + ____p1; \ +}) + +#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb() + +#define rseq_smp_store_release(p, v) \ +do { \ + RISCV_FENCE(rw, w); \ + RSEQ_WRITE_ONCE(*(p), v); \ +} while (0) + +#ifdef RSEQ_SKIP_FASTPATH +#include "rseq-skip.h" +#else /* !RSEQ_SKIP_FASTPATH */ + +#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip, \ + post_commit_offset, abort_ip) \ + ".pushsection __rseq_cs, \"aw\"\n" \ + ".balign 32\n" \ + __rseq_str(label) ":\n" \ + ".long " __rseq_str(version) ", " __rseq_str(flags) "\n" \ + ".quad " __rseq_str(start_ip) ", " \ + __rseq_str(post_commit_offset) ", " \ + __rseq_str(abort_ip) "\n" \ + ".popsection\n\t" \ + ".pushsection __rseq_cs_ptr_array, \"aw\"\n" \ + ".quad " __rseq_str(label) "b\n" \ + ".popsection\n" + +#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \ + __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \ + ((post_commit_ip) - (start_ip)), abort_ip) + +/* + * Exit points of a rseq critical section consist of all instructions outside + * of the critical section where a critical section can either branch to or + * reach through the normal course of its execution. The abort IP and the + * post-commit IP are already part of the __rseq_cs section and should not be + * explicitly defined as additional exit points. Knowing all exit points is + * useful to assist debuggers stepping over the critical section. + */ +#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \ + ".pushsection __rseq_exit_point_array, \"aw\"\n" \ + ".quad " __rseq_str(start_ip) ", " __rseq_str(exit_ip) "\n" \ + ".popsection\n" + +#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ + RSEQ_INJECT_ASM(1) \ + "la "RSEQ_ASM_TMP_REG_1 ", " __rseq_str(cs_label) "\n" \ + REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(rseq_cs) "]\n" \ + __rseq_str(label) ":\n" + +#define RSEQ_ASM_DEFINE_ABORT(label, abort_label) \ + "j 222f\n" \ + ".balign 4\n" \ + ".long " __rseq_str(RSEQ_SIG) "\n" \ + __rseq_str(label) ":\n" \ + "j %l[" __rseq_str(abort_label) "]\n" \ + "222:\n" + +#define RSEQ_ASM_OP_STORE(value, var) \ + REG_S "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n" + +#define RSEQ_ASM_OP_CMPEQ(var, expect, label) \ + REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \ + "bne "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \ + __rseq_str(label) "\n" + +#define RSEQ_ASM_OP_CMPEQ32(var, expect, label) \ + "lw "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \ + "bne "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \ + __rseq_str(label) "\n" + +#define RSEQ_ASM_OP_CMPNE(var, expect, label) \ + REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \ + "beq "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \ + __rseq_str(label) "\n" + +#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \ + RSEQ_INJECT_ASM(2) \ + RSEQ_ASM_OP_CMPEQ32(current_cpu_id, cpu_id, label) + +#define RSEQ_ASM_OP_R_LOAD(var) \ + REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" + +#define RSEQ_ASM_OP_R_STORE(var) \ + REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" + +#define RSEQ_ASM_OP_R_LOAD_OFF(offset) \ + "add "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(offset) "], " \ + RSEQ_ASM_TMP_REG_1 "\n" \ + REG_L RSEQ_ASM_TMP_REG_1 ", (" RSEQ_ASM_TMP_REG_1 ")\n" + +#define RSEQ_ASM_OP_R_ADD(count) \ + "add "RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 \ + ", %[" __rseq_str(count) "]\n" + +#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \ + RSEQ_ASM_OP_STORE(value, var) \ + __rseq_str(post_commit_label) ":\n" + +#define RSEQ_ASM_OP_FINAL_STORE_RELEASE(value, var, post_commit_label) \ + "fence rw, w\n" \ + RSEQ_ASM_OP_STORE(value, var) \ + __rseq_str(post_commit_label) ":\n" + +#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label) \ + REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \ + __rseq_str(post_commit_label) ":\n" + +#define RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) \ + "beqz %[" __rseq_str(len) "], 333f\n" \ + "mv " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(len) "]\n" \ + "mv " RSEQ_ASM_TMP_REG_2 ", %[" __rseq_str(src) "]\n" \ + "mv " RSEQ_ASM_TMP_REG_3 ", %[" __rseq_str(dst) "]\n" \ + "222:\n" \ + "lb " RSEQ_ASM_TMP_REG_4 ", 0(" RSEQ_ASM_TMP_REG_2 ")\n" \ + "sb " RSEQ_ASM_TMP_REG_4 ", 0(" RSEQ_ASM_TMP_REG_3 ")\n" \ + "addi " RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 ", -1\n" \ + "addi " RSEQ_ASM_TMP_REG_2 ", " RSEQ_ASM_TMP_REG_2 ", 1\n" \ + "addi " RSEQ_ASM_TMP_REG_3 ", " RSEQ_ASM_TMP_REG_3 ", 1\n" \ + "bnez " RSEQ_ASM_TMP_REG_1 ", 222b\n" \ + "333:\n" + +#define RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, post_commit_label) \ + "mv " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(ptr) "]\n" \ + RSEQ_ASM_OP_R_ADD(off) \ + REG_L RSEQ_ASM_TMP_REG_1 ", 0(" RSEQ_ASM_TMP_REG_1 ")\n" \ + RSEQ_ASM_OP_R_ADD(inc) \ + __rseq_str(post_commit_label) ":\n" + +static inline __always_inline +int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]") + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]") +#endif + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __always_inline +int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, + off_t voffp, intptr_t *load, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPNE(v, expectnot, "%l[cmpfail]") + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CMPNE(v, expectnot, "%l[error2]") +#endif + RSEQ_ASM_OP_R_LOAD(v) + RSEQ_ASM_OP_R_STORE(load) + RSEQ_ASM_OP_R_LOAD_OFF(voffp) + RSEQ_ASM_OP_R_FINAL_STORE(v, 3) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [v] "m" (*v), + [expectnot] "r" (expectnot), + [load] "m" (*load), + [voffp] "r" (voffp) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __always_inline +int rseq_addv(intptr_t *v, intptr_t count, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") +#endif + RSEQ_ASM_OP_R_LOAD(v) + RSEQ_ASM_OP_R_ADD(count) + RSEQ_ASM_OP_R_FINAL_STORE(v, 3) + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [v] "m" (*v), + [count] "r" (count) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +#endif +} + +static inline __always_inline +int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]") + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]") +#endif + RSEQ_ASM_OP_STORE(newv2, v2) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [expect] "r" (expect), + [v] "m" (*v), + [newv] "r" (newv), + [v2] "m" (*v2), + [newv2] "r" (newv2) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __always_inline +int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]") + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]") +#endif + RSEQ_ASM_OP_STORE(newv2, v2) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [expect] "r" (expect), + [v] "m" (*v), + [newv] "r" (newv), + [v2] "m" (*v2), + [newv2] "r" (newv2) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __always_inline +int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error3]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]") + RSEQ_INJECT_ASM(4) + RSEQ_ASM_OP_CMPEQ(v2, expect2, "%l[cmpfail]") + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]") + RSEQ_ASM_OP_CMPEQ(v2, expect2, "%l[error3]") +#endif + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [v] "m" (*v), + [expect] "r" (expect), + [v2] "m" (*v2), + [expect2] "r" (expect2), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2, error3 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +error3: + rseq_bug("2nd expected value comparison failed"); +#endif +} + +static inline __always_inline +int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]") + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]") +#endif + RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [expect] "r" (expect), + [v] "m" (*v), + [newv] "r" (newv), + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1, RSEQ_ASM_TMP_REG_2, + RSEQ_ASM_TMP_REG_3, RSEQ_ASM_TMP_REG_4 + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __always_inline +int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]") + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]") +#endif + RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [expect] "r" (expect), + [v] "m" (*v), + [newv] "r" (newv), + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1, RSEQ_ASM_TMP_REG_2, + RSEQ_ASM_TMP_REG_3, RSEQ_ASM_TMP_REG_4 + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +#define RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV + +/* + * pval = *(ptr+off) + * *pval += inc; + */ +static inline __always_inline +int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") +#endif + RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, 3) + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [ptr] "r" (ptr), + [off] "er" (off), + [inc] "er" (inc) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +#endif +} + +#endif /* !RSEQ_SKIP_FASTPATH */ diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index 9d850b290c2e..6f7513384bf5 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -93,6 +93,8 @@ static inline struct rseq_abi *rseq_get_abi(void) #include #elif defined(__s390__) #include +#elif defined(__riscv) +#include #else #error unsupported target #endif