mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-06 10:58:48 +09:00
Merge cda4351252 ("Merge tag 'folio-5.18d' of git://git.infradead.org/users/willy/pagecache") into android-mainline
Steps on the way to 5.18-rc1 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: I1a4a3d7d261b27fc157d08f54b189a6892602e35
This commit is contained in:
1
.mailmap
1
.mailmap
@@ -213,6 +213,7 @@ Kees Cook <keescook@chromium.org> <kees@ubuntu.com>
|
||||
Keith Busch <kbusch@kernel.org> <keith.busch@intel.com>
|
||||
Keith Busch <kbusch@kernel.org> <keith.busch@linux.intel.com>
|
||||
Kenneth W Chen <kenneth.w.chen@intel.com>
|
||||
Kirill Tkhai <kirill.tkhai@openvz.org> <ktkhai@virtuozzo.com>
|
||||
Konstantin Khlebnikov <koct9i@gmail.com> <khlebnikov@yandex-team.ru>
|
||||
Konstantin Khlebnikov <koct9i@gmail.com> <k.khlebnikov@samsung.com>
|
||||
Koushik <raghavendra.koushik@neterion.com>
|
||||
|
||||
@@ -315,11 +315,15 @@ indeed the normal API is implemented in terms of the advanced API. The
|
||||
advanced API is only available to modules with a GPL-compatible license.
|
||||
|
||||
The advanced API is based around the xa_state. This is an opaque data
|
||||
structure which you declare on the stack using the XA_STATE()
|
||||
macro. This macro initialises the xa_state ready to start walking
|
||||
around the XArray. It is used as a cursor to maintain the position
|
||||
in the XArray and let you compose various operations together without
|
||||
having to restart from the top every time.
|
||||
structure which you declare on the stack using the XA_STATE() macro.
|
||||
This macro initialises the xa_state ready to start walking around the
|
||||
XArray. It is used as a cursor to maintain the position in the XArray
|
||||
and let you compose various operations together without having to restart
|
||||
from the top every time. The contents of the xa_state are protected by
|
||||
the rcu_read_lock() or the xas_lock(). If you need to drop whichever of
|
||||
those locks is protecting your state and tree, you must call xas_pause()
|
||||
so that future calls do not rely on the parts of the state which were
|
||||
left unprotected.
|
||||
|
||||
The xa_state is also used to store errors. You can call
|
||||
xas_error() to retrieve the error. All operations check whether
|
||||
|
||||
@@ -81,4 +81,4 @@ Example:
|
||||
};
|
||||
};
|
||||
|
||||
[1]. Documentation/devicetree/bindings/arm/idle-states.yaml
|
||||
[1]. Documentation/devicetree/bindings/cpu/idle-states.yaml
|
||||
|
||||
@@ -101,7 +101,7 @@ properties:
|
||||
bindings in [1]) must specify this property.
|
||||
|
||||
[1] Kernel documentation - ARM idle states bindings
|
||||
Documentation/devicetree/bindings/arm/idle-states.yaml
|
||||
Documentation/devicetree/bindings/cpu/idle-states.yaml
|
||||
|
||||
patternProperties:
|
||||
"^power-domain-":
|
||||
|
||||
@@ -1,25 +1,30 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/arm/idle-states.yaml#
|
||||
$id: http://devicetree.org/schemas/cpu/idle-states.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: ARM idle states binding description
|
||||
title: Idle states binding description
|
||||
|
||||
maintainers:
|
||||
- Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
|
||||
- Anup Patel <anup@brainfault.org>
|
||||
|
||||
description: |+
|
||||
==========================================
|
||||
1 - Introduction
|
||||
==========================================
|
||||
|
||||
ARM systems contain HW capable of managing power consumption dynamically,
|
||||
where cores can be put in different low-power states (ranging from simple wfi
|
||||
to power gating) according to OS PM policies. The CPU states representing the
|
||||
range of dynamic idle states that a processor can enter at run-time, can be
|
||||
specified through device tree bindings representing the parameters required to
|
||||
enter/exit specific idle states on a given processor.
|
||||
ARM and RISC-V systems contain HW capable of managing power consumption
|
||||
dynamically, where cores can be put in different low-power states (ranging
|
||||
from simple wfi to power gating) according to OS PM policies. The CPU states
|
||||
representing the range of dynamic idle states that a processor can enter at
|
||||
run-time, can be specified through device tree bindings representing the
|
||||
parameters required to enter/exit specific idle states on a given processor.
|
||||
|
||||
==========================================
|
||||
2 - ARM idle states
|
||||
==========================================
|
||||
|
||||
According to the Server Base System Architecture document (SBSA, [3]), the
|
||||
power states an ARM CPU can be put into are identified by the following list:
|
||||
@@ -43,8 +48,23 @@ description: |+
|
||||
The device tree binding definition for ARM idle states is the subject of this
|
||||
document.
|
||||
|
||||
==========================================
|
||||
3 - RISC-V idle states
|
||||
==========================================
|
||||
|
||||
On RISC-V systems, the HARTs (or CPUs) [6] can be put in platform specific
|
||||
suspend (or idle) states (ranging from simple WFI, power gating, etc). The
|
||||
RISC-V SBI v0.3 (or higher) [7] hart state management extension provides a
|
||||
standard mechanism for OS to request HART state transitions.
|
||||
|
||||
The platform specific suspend (or idle) states of a hart can be either
|
||||
retentive or non-rententive in nature. A retentive suspend state will
|
||||
preserve HART registers and CSR values for all privilege modes whereas
|
||||
a non-retentive suspend state will not preserve HART registers and CSR
|
||||
values.
|
||||
|
||||
===========================================
|
||||
2 - idle-states definitions
|
||||
4 - idle-states definitions
|
||||
===========================================
|
||||
|
||||
Idle states are characterized for a specific system through a set of
|
||||
@@ -211,10 +231,10 @@ description: |+
|
||||
properties specification that is the subject of the following sections.
|
||||
|
||||
===========================================
|
||||
3 - idle-states node
|
||||
5 - idle-states node
|
||||
===========================================
|
||||
|
||||
ARM processor idle states are defined within the idle-states node, which is
|
||||
The processor idle states are defined within the idle-states node, which is
|
||||
a direct child of the cpus node [1] and provides a container where the
|
||||
processor idle states, defined as device tree nodes, are listed.
|
||||
|
||||
@@ -223,7 +243,7 @@ description: |+
|
||||
just supports idle_standby, an idle-states node is not required.
|
||||
|
||||
===========================================
|
||||
4 - References
|
||||
6 - References
|
||||
===========================================
|
||||
|
||||
[1] ARM Linux Kernel documentation - CPUs bindings
|
||||
@@ -238,9 +258,15 @@ description: |+
|
||||
[4] ARM Architecture Reference Manuals
|
||||
http://infocenter.arm.com/help/index.jsp
|
||||
|
||||
[6] ARM Linux Kernel documentation - Booting AArch64 Linux
|
||||
[5] ARM Linux Kernel documentation - Booting AArch64 Linux
|
||||
Documentation/arm64/booting.rst
|
||||
|
||||
[6] RISC-V Linux Kernel documentation - CPUs bindings
|
||||
Documentation/devicetree/bindings/riscv/cpus.yaml
|
||||
|
||||
[7] RISC-V Supervisor Binary Interface (SBI)
|
||||
http://github.com/riscv/riscv-sbi-doc/riscv-sbi.adoc
|
||||
|
||||
properties:
|
||||
$nodename:
|
||||
const: idle-states
|
||||
@@ -253,7 +279,7 @@ properties:
|
||||
On ARM 32-bit systems this property is optional
|
||||
|
||||
This assumes that the "enable-method" property is set to "psci" in the cpu
|
||||
node[6] that is responsible for setting up CPU idle management in the OS
|
||||
node[5] that is responsible for setting up CPU idle management in the OS
|
||||
implementation.
|
||||
const: psci
|
||||
|
||||
@@ -265,8 +291,8 @@ patternProperties:
|
||||
as follows.
|
||||
|
||||
The idle state entered by executing the wfi instruction (idle_standby
|
||||
SBSA,[3][4]) is considered standard on all ARM platforms and therefore
|
||||
must not be listed.
|
||||
SBSA,[3][4]) is considered standard on all ARM and RISC-V platforms and
|
||||
therefore must not be listed.
|
||||
|
||||
In addition to the properties listed above, a state node may require
|
||||
additional properties specific to the entry-method defined in the
|
||||
@@ -275,7 +301,27 @@ patternProperties:
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: arm,idle-state
|
||||
enum:
|
||||
- arm,idle-state
|
||||
- riscv,idle-state
|
||||
|
||||
arm,psci-suspend-param:
|
||||
$ref: /schemas/types.yaml#/definitions/uint32
|
||||
description: |
|
||||
power_state parameter to pass to the ARM PSCI suspend call.
|
||||
|
||||
Device tree nodes that require usage of PSCI CPU_SUSPEND function
|
||||
(i.e. idle states node with entry-method property is set to "psci")
|
||||
must specify this property.
|
||||
|
||||
riscv,sbi-suspend-param:
|
||||
$ref: /schemas/types.yaml#/definitions/uint32
|
||||
description: |
|
||||
suspend_type parameter to pass to the RISC-V SBI HSM suspend call.
|
||||
|
||||
This property is required in idle state nodes of device tree meant
|
||||
for RISC-V systems. For more details on the suspend_type parameter
|
||||
refer the SBI specifiation v0.3 (or higher) [7].
|
||||
|
||||
local-timer-stop:
|
||||
description:
|
||||
@@ -317,6 +363,8 @@ patternProperties:
|
||||
description:
|
||||
A string used as a descriptive name for the idle state.
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- entry-latency-us
|
||||
@@ -658,4 +706,150 @@ examples:
|
||||
};
|
||||
};
|
||||
|
||||
- |
|
||||
// Example 3 (RISC-V 64-bit, 4-cpu systems, two clusters):
|
||||
|
||||
cpus {
|
||||
#size-cells = <0>;
|
||||
#address-cells = <1>;
|
||||
|
||||
cpu@0 {
|
||||
device_type = "cpu";
|
||||
compatible = "riscv";
|
||||
reg = <0x0>;
|
||||
riscv,isa = "rv64imafdc";
|
||||
mmu-type = "riscv,sv48";
|
||||
cpu-idle-states = <&CPU_RET_0_0 &CPU_NONRET_0_0
|
||||
&CLUSTER_RET_0 &CLUSTER_NONRET_0>;
|
||||
|
||||
cpu_intc0: interrupt-controller {
|
||||
#interrupt-cells = <1>;
|
||||
compatible = "riscv,cpu-intc";
|
||||
interrupt-controller;
|
||||
};
|
||||
};
|
||||
|
||||
cpu@1 {
|
||||
device_type = "cpu";
|
||||
compatible = "riscv";
|
||||
reg = <0x1>;
|
||||
riscv,isa = "rv64imafdc";
|
||||
mmu-type = "riscv,sv48";
|
||||
cpu-idle-states = <&CPU_RET_0_0 &CPU_NONRET_0_0
|
||||
&CLUSTER_RET_0 &CLUSTER_NONRET_0>;
|
||||
|
||||
cpu_intc1: interrupt-controller {
|
||||
#interrupt-cells = <1>;
|
||||
compatible = "riscv,cpu-intc";
|
||||
interrupt-controller;
|
||||
};
|
||||
};
|
||||
|
||||
cpu@10 {
|
||||
device_type = "cpu";
|
||||
compatible = "riscv";
|
||||
reg = <0x10>;
|
||||
riscv,isa = "rv64imafdc";
|
||||
mmu-type = "riscv,sv48";
|
||||
cpu-idle-states = <&CPU_RET_1_0 &CPU_NONRET_1_0
|
||||
&CLUSTER_RET_1 &CLUSTER_NONRET_1>;
|
||||
|
||||
cpu_intc10: interrupt-controller {
|
||||
#interrupt-cells = <1>;
|
||||
compatible = "riscv,cpu-intc";
|
||||
interrupt-controller;
|
||||
};
|
||||
};
|
||||
|
||||
cpu@11 {
|
||||
device_type = "cpu";
|
||||
compatible = "riscv";
|
||||
reg = <0x11>;
|
||||
riscv,isa = "rv64imafdc";
|
||||
mmu-type = "riscv,sv48";
|
||||
cpu-idle-states = <&CPU_RET_1_0 &CPU_NONRET_1_0
|
||||
&CLUSTER_RET_1 &CLUSTER_NONRET_1>;
|
||||
|
||||
cpu_intc11: interrupt-controller {
|
||||
#interrupt-cells = <1>;
|
||||
compatible = "riscv,cpu-intc";
|
||||
interrupt-controller;
|
||||
};
|
||||
};
|
||||
|
||||
idle-states {
|
||||
CPU_RET_0_0: cpu-retentive-0-0 {
|
||||
compatible = "riscv,idle-state";
|
||||
riscv,sbi-suspend-param = <0x10000000>;
|
||||
entry-latency-us = <20>;
|
||||
exit-latency-us = <40>;
|
||||
min-residency-us = <80>;
|
||||
};
|
||||
|
||||
CPU_NONRET_0_0: cpu-nonretentive-0-0 {
|
||||
compatible = "riscv,idle-state";
|
||||
riscv,sbi-suspend-param = <0x90000000>;
|
||||
entry-latency-us = <250>;
|
||||
exit-latency-us = <500>;
|
||||
min-residency-us = <950>;
|
||||
};
|
||||
|
||||
CLUSTER_RET_0: cluster-retentive-0 {
|
||||
compatible = "riscv,idle-state";
|
||||
riscv,sbi-suspend-param = <0x11000000>;
|
||||
local-timer-stop;
|
||||
entry-latency-us = <50>;
|
||||
exit-latency-us = <100>;
|
||||
min-residency-us = <250>;
|
||||
wakeup-latency-us = <130>;
|
||||
};
|
||||
|
||||
CLUSTER_NONRET_0: cluster-nonretentive-0 {
|
||||
compatible = "riscv,idle-state";
|
||||
riscv,sbi-suspend-param = <0x91000000>;
|
||||
local-timer-stop;
|
||||
entry-latency-us = <600>;
|
||||
exit-latency-us = <1100>;
|
||||
min-residency-us = <2700>;
|
||||
wakeup-latency-us = <1500>;
|
||||
};
|
||||
|
||||
CPU_RET_1_0: cpu-retentive-1-0 {
|
||||
compatible = "riscv,idle-state";
|
||||
riscv,sbi-suspend-param = <0x10000010>;
|
||||
entry-latency-us = <20>;
|
||||
exit-latency-us = <40>;
|
||||
min-residency-us = <80>;
|
||||
};
|
||||
|
||||
CPU_NONRET_1_0: cpu-nonretentive-1-0 {
|
||||
compatible = "riscv,idle-state";
|
||||
riscv,sbi-suspend-param = <0x90000010>;
|
||||
entry-latency-us = <250>;
|
||||
exit-latency-us = <500>;
|
||||
min-residency-us = <950>;
|
||||
};
|
||||
|
||||
CLUSTER_RET_1: cluster-retentive-1 {
|
||||
compatible = "riscv,idle-state";
|
||||
riscv,sbi-suspend-param = <0x11000010>;
|
||||
local-timer-stop;
|
||||
entry-latency-us = <50>;
|
||||
exit-latency-us = <100>;
|
||||
min-residency-us = <250>;
|
||||
wakeup-latency-us = <130>;
|
||||
};
|
||||
|
||||
CLUSTER_NONRET_1: cluster-nonretentive-1 {
|
||||
compatible = "riscv,idle-state";
|
||||
riscv,sbi-suspend-param = <0x91000010>;
|
||||
local-timer-stop;
|
||||
entry-latency-us = <600>;
|
||||
exit-latency-us = <1100>;
|
||||
min-residency-us = <2700>;
|
||||
wakeup-latency-us = <1500>;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
...
|
||||
@@ -99,6 +99,12 @@ properties:
|
||||
- compatible
|
||||
- interrupt-controller
|
||||
|
||||
cpu-idle-states:
|
||||
$ref: '/schemas/types.yaml#/definitions/phandle-array'
|
||||
description: |
|
||||
List of phandles to idle state nodes supported
|
||||
by this hart (see ./idle-states.yaml).
|
||||
|
||||
required:
|
||||
- riscv,isa
|
||||
- interrupt-controller
|
||||
|
||||
@@ -549,7 +549,7 @@ Pagecache
|
||||
~~~~~~~~~
|
||||
|
||||
For filesystems using Linux's pagecache, the ``->readpage()`` and
|
||||
``->readpages()`` methods must be modified to verify pages before they
|
||||
``->readahead()`` methods must be modified to verify pages before they
|
||||
are marked Uptodate. Merely hooking ``->read_iter()`` would be
|
||||
insufficient, since ``->read_iter()`` is not used for memory maps.
|
||||
|
||||
@@ -611,7 +611,7 @@ workqueue, and then the workqueue work does the decryption or
|
||||
verification. Finally, pages where no decryption or verity error
|
||||
occurred are marked Uptodate, and the pages are unlocked.
|
||||
|
||||
Files on ext4 and f2fs may contain holes. Normally, ``->readpages()``
|
||||
Files on ext4 and f2fs may contain holes. Normally, ``->readahead()``
|
||||
simply zeroes holes and sets the corresponding pages Uptodate; no bios
|
||||
are issued. To prevent this case from bypassing fs-verity, these
|
||||
filesystems use fsverity_verify_page() to verify hole pages.
|
||||
@@ -778,7 +778,7 @@ weren't already directly answered in other parts of this document.
|
||||
- To prevent bypassing verification, pages must not be marked
|
||||
Uptodate until they've been verified. Currently, each
|
||||
filesystem is responsible for marking pages Uptodate via
|
||||
``->readpages()``. Therefore, currently it's not possible for
|
||||
``->readahead()``. Therefore, currently it's not possible for
|
||||
the VFS to do the verification on its own. Changing this would
|
||||
require significant changes to the VFS and all filesystems.
|
||||
|
||||
|
||||
@@ -241,8 +241,6 @@ prototypes::
|
||||
int (*writepages)(struct address_space *, struct writeback_control *);
|
||||
bool (*dirty_folio)(struct address_space *, struct folio *folio);
|
||||
void (*readahead)(struct readahead_control *);
|
||||
int (*readpages)(struct file *filp, struct address_space *mapping,
|
||||
struct list_head *pages, unsigned nr_pages);
|
||||
int (*write_begin)(struct file *, struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned flags,
|
||||
struct page **pagep, void **fsdata);
|
||||
@@ -274,7 +272,6 @@ readpage: yes, unlocks shared
|
||||
writepages:
|
||||
dirty_folio maybe
|
||||
readahead: yes, unlocks shared
|
||||
readpages: no shared
|
||||
write_begin: locks the page exclusive
|
||||
write_end: yes, unlocks exclusive
|
||||
bmap:
|
||||
@@ -300,9 +297,6 @@ completion.
|
||||
|
||||
->readahead() unlocks the pages that I/O is attempted on like ->readpage().
|
||||
|
||||
->readpages() populates the pagecache with the passed pages and starts
|
||||
I/O against them. They come unlocked upon I/O completion.
|
||||
|
||||
->writepage() is used for two purposes: for "memory cleansing" and for
|
||||
"sync". These are quite different operations and the behaviour may differ
|
||||
depending upon the mode.
|
||||
|
||||
@@ -726,8 +726,6 @@ cache in your filesystem. The following members are defined:
|
||||
int (*writepages)(struct address_space *, struct writeback_control *);
|
||||
bool (*dirty_folio)(struct address_space *, struct folio *);
|
||||
void (*readahead)(struct readahead_control *);
|
||||
int (*readpages)(struct file *filp, struct address_space *mapping,
|
||||
struct list_head *pages, unsigned nr_pages);
|
||||
int (*write_begin)(struct file *, struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned flags,
|
||||
struct page **pagep, void **fsdata);
|
||||
@@ -817,15 +815,6 @@ cache in your filesystem. The following members are defined:
|
||||
completes successfully. Setting PageError on any page will be
|
||||
ignored; simply unlock the page if an I/O error occurs.
|
||||
|
||||
``readpages``
|
||||
called by the VM to read pages associated with the address_space
|
||||
object. This is essentially just a vector version of readpage.
|
||||
Instead of just one page, several pages are requested.
|
||||
readpages is only used for read-ahead, so read errors are
|
||||
ignored. If anything goes wrong, feel free to give up.
|
||||
This interface is deprecated and will be removed by the end of
|
||||
2020; implement readahead instead.
|
||||
|
||||
``write_begin``
|
||||
Called by the generic buffered write code to ask the filesystem
|
||||
to prepare to write len bytes at the given offset in the file.
|
||||
|
||||
@@ -7,7 +7,6 @@ RISC-V architecture
|
||||
|
||||
boot-image-header
|
||||
vm-layout
|
||||
pmu
|
||||
patch-acceptance
|
||||
|
||||
features
|
||||
|
||||
@@ -125,7 +125,6 @@ Usage
|
||||
additional function:
|
||||
|
||||
Cull:
|
||||
-c Cull by comparing stacktrace instead of total block.
|
||||
--cull <rules>
|
||||
Specify culling rules.Culling syntax is key[,key[,...]].Choose a
|
||||
multi-letter key from the **STANDARD FORMAT SPECIFIERS** section.
|
||||
|
||||
@@ -52,8 +52,13 @@ The infrastructure may also be able to handle other conditions that make pages
|
||||
unevictable, either by definition or by circumstance, in the future.
|
||||
|
||||
|
||||
The Unevictable Page List
|
||||
-------------------------
|
||||
The Unevictable LRU Page List
|
||||
-----------------------------
|
||||
|
||||
The Unevictable LRU page list is a lie. It was never an LRU-ordered list, but a
|
||||
companion to the LRU-ordered anonymous and file, active and inactive page lists;
|
||||
and now it is not even a page list. But following familiar convention, here in
|
||||
this document and in the source, we often imagine it as a fifth LRU page list.
|
||||
|
||||
The Unevictable LRU infrastructure consists of an additional, per-node, LRU list
|
||||
called the "unevictable" list and an associated page flag, PG_unevictable, to
|
||||
@@ -63,8 +68,8 @@ The PG_unevictable flag is analogous to, and mutually exclusive with, the
|
||||
PG_active flag in that it indicates on which LRU list a page resides when
|
||||
PG_lru is set.
|
||||
|
||||
The Unevictable LRU infrastructure maintains unevictable pages on an additional
|
||||
LRU list for a few reasons:
|
||||
The Unevictable LRU infrastructure maintains unevictable pages as if they were
|
||||
on an additional LRU list for a few reasons:
|
||||
|
||||
(1) We get to "treat unevictable pages just like we treat other pages in the
|
||||
system - which means we get to use the same code to manipulate them, the
|
||||
@@ -72,13 +77,11 @@ LRU list for a few reasons:
|
||||
of the statistics, etc..." [Rik van Riel]
|
||||
|
||||
(2) We want to be able to migrate unevictable pages between nodes for memory
|
||||
defragmentation, workload management and memory hotplug. The linux kernel
|
||||
defragmentation, workload management and memory hotplug. The Linux kernel
|
||||
can only migrate pages that it can successfully isolate from the LRU
|
||||
lists. If we were to maintain pages elsewhere than on an LRU-like list,
|
||||
where they can be found by isolate_lru_page(), we would prevent their
|
||||
migration, unless we reworked migration code to find the unevictable pages
|
||||
itself.
|
||||
|
||||
lists (or "Movable" pages: outside of consideration here). If we were to
|
||||
maintain pages elsewhere than on an LRU-like list, where they can be
|
||||
detected by isolate_lru_page(), we would prevent their migration.
|
||||
|
||||
The unevictable list does not differentiate between file-backed and anonymous,
|
||||
swap-backed pages. This differentiation is only important while the pages are,
|
||||
@@ -92,8 +95,8 @@ Memory Control Group Interaction
|
||||
--------------------------------
|
||||
|
||||
The unevictable LRU facility interacts with the memory control group [aka
|
||||
memory controller; see Documentation/admin-guide/cgroup-v1/memory.rst] by extending the
|
||||
lru_list enum.
|
||||
memory controller; see Documentation/admin-guide/cgroup-v1/memory.rst] by
|
||||
extending the lru_list enum.
|
||||
|
||||
The memory controller data structure automatically gets a per-node unevictable
|
||||
list as a result of the "arrayification" of the per-node LRU lists (one per
|
||||
@@ -143,7 +146,6 @@ These are currently used in three places in the kernel:
|
||||
and this mark remains for the life of the inode.
|
||||
|
||||
(2) By SYSV SHM to mark SHM_LOCK'd address spaces until SHM_UNLOCK is called.
|
||||
|
||||
Note that SHM_LOCK is not required to page in the locked pages if they're
|
||||
swapped out; the application must touch the pages manually if it wants to
|
||||
ensure they're in memory.
|
||||
@@ -156,19 +158,19 @@ These are currently used in three places in the kernel:
|
||||
Detecting Unevictable Pages
|
||||
---------------------------
|
||||
|
||||
The function page_evictable() in vmscan.c determines whether a page is
|
||||
The function page_evictable() in mm/internal.h determines whether a page is
|
||||
evictable or not using the query function outlined above [see section
|
||||
:ref:`Marking address spaces unevictable <mark_addr_space_unevict>`]
|
||||
to check the AS_UNEVICTABLE flag.
|
||||
|
||||
For address spaces that are so marked after being populated (as SHM regions
|
||||
might be), the lock action (eg: SHM_LOCK) can be lazy, and need not populate
|
||||
might be), the lock action (e.g. SHM_LOCK) can be lazy, and need not populate
|
||||
the page tables for the region as does, for example, mlock(), nor need it make
|
||||
any special effort to push any pages in the SHM_LOCK'd area to the unevictable
|
||||
list. Instead, vmscan will do this if and when it encounters the pages during
|
||||
a reclamation scan.
|
||||
|
||||
On an unlock action (such as SHM_UNLOCK), the unlocker (eg: shmctl()) must scan
|
||||
On an unlock action (such as SHM_UNLOCK), the unlocker (e.g. shmctl()) must scan
|
||||
the pages in the region and "rescue" them from the unevictable list if no other
|
||||
condition is keeping them unevictable. If an unevictable region is destroyed,
|
||||
the pages are also "rescued" from the unevictable list in the process of
|
||||
@@ -176,7 +178,7 @@ freeing them.
|
||||
|
||||
page_evictable() also checks for mlocked pages by testing an additional page
|
||||
flag, PG_mlocked (as wrapped by PageMlocked()), which is set when a page is
|
||||
faulted into a VM_LOCKED vma, or found in a vma being VM_LOCKED.
|
||||
faulted into a VM_LOCKED VMA, or found in a VMA being VM_LOCKED.
|
||||
|
||||
|
||||
Vmscan's Handling of Unevictable Pages
|
||||
@@ -186,28 +188,23 @@ If unevictable pages are culled in the fault path, or moved to the unevictable
|
||||
list at mlock() or mmap() time, vmscan will not encounter the pages until they
|
||||
have become evictable again (via munlock() for example) and have been "rescued"
|
||||
from the unevictable list. However, there may be situations where we decide,
|
||||
for the sake of expediency, to leave a unevictable page on one of the regular
|
||||
for the sake of expediency, to leave an unevictable page on one of the regular
|
||||
active/inactive LRU lists for vmscan to deal with. vmscan checks for such
|
||||
pages in all of the shrink_{active|inactive|page}_list() functions and will
|
||||
"cull" such pages that it encounters: that is, it diverts those pages to the
|
||||
unevictable list for the node being scanned.
|
||||
unevictable list for the memory cgroup and node being scanned.
|
||||
|
||||
There may be situations where a page is mapped into a VM_LOCKED VMA, but the
|
||||
page is not marked as PG_mlocked. Such pages will make it all the way to
|
||||
shrink_page_list() where they will be detected when vmscan walks the reverse
|
||||
map in try_to_unmap(). If try_to_unmap() returns SWAP_MLOCK,
|
||||
shrink_page_list() will cull the page at that point.
|
||||
shrink_active_list() or shrink_page_list() where they will be detected when
|
||||
vmscan walks the reverse map in page_referenced() or try_to_unmap(). The page
|
||||
is culled to the unevictable list when it is released by the shrinker.
|
||||
|
||||
To "cull" an unevictable page, vmscan simply puts the page back on the LRU list
|
||||
using putback_lru_page() - the inverse operation to isolate_lru_page() - after
|
||||
dropping the page lock. Because the condition which makes the page unevictable
|
||||
may change once the page is unlocked, putback_lru_page() will recheck the
|
||||
unevictable state of a page that it places on the unevictable list. If the
|
||||
page has become unevictable, putback_lru_page() removes it from the list and
|
||||
retries, including the page_unevictable() test. Because such a race is a rare
|
||||
event and movement of pages onto the unevictable list should be rare, these
|
||||
extra evictabilty checks should not occur in the majority of calls to
|
||||
putback_lru_page().
|
||||
may change once the page is unlocked, __pagevec_lru_add_fn() will recheck the
|
||||
unevictable state of a page before placing it on the unevictable list.
|
||||
|
||||
|
||||
MLOCKED Pages
|
||||
@@ -227,16 +224,25 @@ Nick posted his patch as an alternative to a patch posted by Christoph Lameter
|
||||
to achieve the same objective: hiding mlocked pages from vmscan.
|
||||
|
||||
In Nick's patch, he used one of the struct page LRU list link fields as a count
|
||||
of VM_LOCKED VMAs that map the page. This use of the link field for a count
|
||||
prevented the management of the pages on an LRU list, and thus mlocked pages
|
||||
were not migratable as isolate_lru_page() could not find them, and the LRU list
|
||||
link field was not available to the migration subsystem.
|
||||
of VM_LOCKED VMAs that map the page (Rik van Riel had the same idea three years
|
||||
earlier). But this use of the link field for a count prevented the management
|
||||
of the pages on an LRU list, and thus mlocked pages were not migratable as
|
||||
isolate_lru_page() could not detect them, and the LRU list link field was not
|
||||
available to the migration subsystem.
|
||||
|
||||
Nick resolved this by putting mlocked pages back on the lru list before
|
||||
Nick resolved this by putting mlocked pages back on the LRU list before
|
||||
attempting to isolate them, thus abandoning the count of VM_LOCKED VMAs. When
|
||||
Nick's patch was integrated with the Unevictable LRU work, the count was
|
||||
replaced by walking the reverse map to determine whether any VM_LOCKED VMAs
|
||||
mapped the page. More on this below.
|
||||
replaced by walking the reverse map when munlocking, to determine whether any
|
||||
other VM_LOCKED VMAs still mapped the page.
|
||||
|
||||
However, walking the reverse map for each page when munlocking was ugly and
|
||||
inefficient, and could lead to catastrophic contention on a file's rmap lock,
|
||||
when many processes which had it mlocked were trying to exit. In 5.18, the
|
||||
idea of keeping mlock_count in Unevictable LRU list link field was revived and
|
||||
put to work, without preventing the migration of mlocked pages. This is why
|
||||
the "Unevictable LRU list" cannot be a linked list of pages now; but there was
|
||||
no use for that linked list anyway - though its size is maintained for meminfo.
|
||||
|
||||
|
||||
Basic Management
|
||||
@@ -250,22 +256,18 @@ PageMlocked() functions.
|
||||
A PG_mlocked page will be placed on the unevictable list when it is added to
|
||||
the LRU. Such pages can be "noticed" by memory management in several places:
|
||||
|
||||
(1) in the mlock()/mlockall() system call handlers;
|
||||
(1) in the mlock()/mlock2()/mlockall() system call handlers;
|
||||
|
||||
(2) in the mmap() system call handler when mmapping a region with the
|
||||
MAP_LOCKED flag;
|
||||
|
||||
(3) mmapping a region in a task that has called mlockall() with the MCL_FUTURE
|
||||
flag
|
||||
flag;
|
||||
|
||||
(4) in the fault path, if mlocked pages are "culled" in the fault path,
|
||||
and when a VM_LOCKED stack segment is expanded; or
|
||||
(4) in the fault path and when a VM_LOCKED stack segment is expanded; or
|
||||
|
||||
(5) as mentioned above, in vmscan:shrink_page_list() when attempting to
|
||||
reclaim a page in a VM_LOCKED VMA via try_to_unmap()
|
||||
|
||||
all of which result in the VM_LOCKED flag being set for the VMA if it doesn't
|
||||
already have it set.
|
||||
reclaim a page in a VM_LOCKED VMA by page_referenced() or try_to_unmap().
|
||||
|
||||
mlocked pages become unlocked and rescued from the unevictable list when:
|
||||
|
||||
@@ -280,51 +282,53 @@ mlocked pages become unlocked and rescued from the unevictable list when:
|
||||
(4) before a page is COW'd in a VM_LOCKED VMA.
|
||||
|
||||
|
||||
mlock()/mlockall() System Call Handling
|
||||
---------------------------------------
|
||||
mlock()/mlock2()/mlockall() System Call Handling
|
||||
------------------------------------------------
|
||||
|
||||
Both [do\_]mlock() and [do\_]mlockall() system call handlers call mlock_fixup()
|
||||
mlock(), mlock2() and mlockall() system call handlers proceed to mlock_fixup()
|
||||
for each VMA in the range specified by the call. In the case of mlockall(),
|
||||
this is the entire active address space of the task. Note that mlock_fixup()
|
||||
is used for both mlocking and munlocking a range of memory. A call to mlock()
|
||||
an already VM_LOCKED VMA, or to munlock() a VMA that is not VM_LOCKED is
|
||||
treated as a no-op, and mlock_fixup() simply returns.
|
||||
an already VM_LOCKED VMA, or to munlock() a VMA that is not VM_LOCKED, is
|
||||
treated as a no-op and mlock_fixup() simply returns.
|
||||
|
||||
If the VMA passes some filtering as described in "Filtering Special Vmas"
|
||||
If the VMA passes some filtering as described in "Filtering Special VMAs"
|
||||
below, mlock_fixup() will attempt to merge the VMA with its neighbors or split
|
||||
off a subset of the VMA if the range does not cover the entire VMA. Once the
|
||||
VMA has been merged or split or neither, mlock_fixup() will call
|
||||
populate_vma_page_range() to fault in the pages via get_user_pages() and to
|
||||
mark the pages as mlocked via mlock_vma_page().
|
||||
off a subset of the VMA if the range does not cover the entire VMA. Any pages
|
||||
already present in the VMA are then marked as mlocked by mlock_page() via
|
||||
mlock_pte_range() via walk_page_range() via mlock_vma_pages_range().
|
||||
|
||||
Before returning from the system call, do_mlock() or mlockall() will call
|
||||
__mm_populate() to fault in the remaining pages via get_user_pages() and to
|
||||
mark those pages as mlocked as they are faulted.
|
||||
|
||||
Note that the VMA being mlocked might be mapped with PROT_NONE. In this case,
|
||||
get_user_pages() will be unable to fault in the pages. That's okay. If pages
|
||||
do end up getting faulted into this VM_LOCKED VMA, we'll handle them in the
|
||||
fault path or in vmscan.
|
||||
do end up getting faulted into this VM_LOCKED VMA, they will be handled in the
|
||||
fault path - which is also how mlock2()'s MLOCK_ONFAULT areas are handled.
|
||||
|
||||
Also note that a page returned by get_user_pages() could be truncated or
|
||||
migrated out from under us, while we're trying to mlock it. To detect this,
|
||||
populate_vma_page_range() checks page_mapping() after acquiring the page lock.
|
||||
If the page is still associated with its mapping, we'll go ahead and call
|
||||
mlock_vma_page(). If the mapping is gone, we just unlock the page and move on.
|
||||
In the worst case, this will result in a page mapped in a VM_LOCKED VMA
|
||||
remaining on a normal LRU list without being PageMlocked(). Again, vmscan will
|
||||
detect and cull such pages.
|
||||
For each PTE (or PMD) being faulted into a VMA, the page add rmap function
|
||||
calls mlock_vma_page(), which calls mlock_page() when the VMA is VM_LOCKED
|
||||
(unless it is a PTE mapping of a part of a transparent huge page). Or when
|
||||
it is a newly allocated anonymous page, lru_cache_add_inactive_or_unevictable()
|
||||
calls mlock_new_page() instead: similar to mlock_page(), but can make better
|
||||
judgments, since this page is held exclusively and known not to be on LRU yet.
|
||||
|
||||
mlock_vma_page() will call TestSetPageMlocked() for each page returned by
|
||||
get_user_pages(). We use TestSetPageMlocked() because the page might already
|
||||
be mlocked by another task/VMA and we don't want to do extra work. We
|
||||
especially do not want to count an mlocked page more than once in the
|
||||
statistics. If the page was already mlocked, mlock_vma_page() need do nothing
|
||||
more.
|
||||
mlock_page() sets PageMlocked immediately, then places the page on the CPU's
|
||||
mlock pagevec, to batch up the rest of the work to be done under lru_lock by
|
||||
__mlock_page(). __mlock_page() sets PageUnevictable, initializes mlock_count
|
||||
and moves the page to unevictable state ("the unevictable LRU", but with
|
||||
mlock_count in place of LRU threading). Or if the page was already PageLRU
|
||||
and PageUnevictable and PageMlocked, it simply increments the mlock_count.
|
||||
|
||||
If the page was NOT already mlocked, mlock_vma_page() attempts to isolate the
|
||||
page from the LRU, as it is likely on the appropriate active or inactive list
|
||||
at that time. If the isolate_lru_page() succeeds, mlock_vma_page() will put
|
||||
back the page - by calling putback_lru_page() - which will notice that the page
|
||||
is now mlocked and divert the page to the node's unevictable list. If
|
||||
mlock_vma_page() is unable to isolate the page from the LRU, vmscan will handle
|
||||
it later if and when it attempts to reclaim the page.
|
||||
But in practice that may not work ideally: the page may not yet be on an LRU, or
|
||||
it may have been temporarily isolated from LRU. In such cases the mlock_count
|
||||
field cannot be touched, but will be set to 0 later when __pagevec_lru_add_fn()
|
||||
returns the page to "LRU". Races prohibit mlock_count from being set to 1 then:
|
||||
rather than risk stranding a page indefinitely as unevictable, always err with
|
||||
mlock_count on the low side, so that when munlocked the page will be rescued to
|
||||
an evictable LRU, then perhaps be mlocked again later if vmscan finds it in a
|
||||
VM_LOCKED VMA.
|
||||
|
||||
|
||||
Filtering Special VMAs
|
||||
@@ -339,68 +343,48 @@ mlock_fixup() filters several classes of "special" VMAs:
|
||||
so there is no sense in attempting to visit them.
|
||||
|
||||
2) VMAs mapping hugetlbfs page are already effectively pinned into memory. We
|
||||
neither need nor want to mlock() these pages. However, to preserve the
|
||||
prior behavior of mlock() - before the unevictable/mlock changes -
|
||||
mlock_fixup() will call make_pages_present() in the hugetlbfs VMA range to
|
||||
allocate the huge pages and populate the ptes.
|
||||
neither need nor want to mlock() these pages. But __mm_populate() includes
|
||||
hugetlbfs ranges, allocating the huge pages and populating the PTEs.
|
||||
|
||||
3) VMAs with VM_DONTEXPAND are generally userspace mappings of kernel pages,
|
||||
such as the VDSO page, relay channel pages, etc. These pages
|
||||
are inherently unevictable and are not managed on the LRU lists.
|
||||
mlock_fixup() treats these VMAs the same as hugetlbfs VMAs. It calls
|
||||
make_pages_present() to populate the ptes.
|
||||
such as the VDSO page, relay channel pages, etc. These pages are inherently
|
||||
unevictable and are not managed on the LRU lists. __mm_populate() includes
|
||||
these ranges, populating the PTEs if not already populated.
|
||||
|
||||
4) VMAs with VM_MIXEDMAP set are not marked VM_LOCKED, but __mm_populate()
|
||||
includes these ranges, populating the PTEs if not already populated.
|
||||
|
||||
Note that for all of these special VMAs, mlock_fixup() does not set the
|
||||
VM_LOCKED flag. Therefore, we won't have to deal with them later during
|
||||
munlock(), munmap() or task exit. Neither does mlock_fixup() account these
|
||||
VMAs against the task's "locked_vm".
|
||||
|
||||
.. _munlock_munlockall_handling:
|
||||
|
||||
munlock()/munlockall() System Call Handling
|
||||
-------------------------------------------
|
||||
|
||||
The munlock() and munlockall() system calls are handled by the same functions -
|
||||
do_mlock[all]() - as the mlock() and mlockall() system calls with the unlock vs
|
||||
lock operation indicated by an argument. So, these system calls are also
|
||||
handled by mlock_fixup(). Again, if called for an already munlocked VMA,
|
||||
mlock_fixup() simply returns. Because of the VMA filtering discussed above,
|
||||
VM_LOCKED will not be set in any "special" VMAs. So, these VMAs will be
|
||||
ignored for munlock.
|
||||
The munlock() and munlockall() system calls are handled by the same
|
||||
mlock_fixup() function as mlock(), mlock2() and mlockall() system calls are.
|
||||
If called to munlock an already munlocked VMA, mlock_fixup() simply returns.
|
||||
Because of the VMA filtering discussed above, VM_LOCKED will not be set in
|
||||
any "special" VMAs. So, those VMAs will be ignored for munlock.
|
||||
|
||||
If the VMA is VM_LOCKED, mlock_fixup() again attempts to merge or split off the
|
||||
specified range. The range is then munlocked via the function
|
||||
populate_vma_page_range() - the same function used to mlock a VMA range -
|
||||
passing a flag to indicate that munlock() is being performed.
|
||||
specified range. All pages in the VMA are then munlocked by munlock_page() via
|
||||
mlock_pte_range() via walk_page_range() via mlock_vma_pages_range() - the same
|
||||
function used when mlocking a VMA range, with new flags for the VMA indicating
|
||||
that it is munlock() being performed.
|
||||
|
||||
Because the VMA access protections could have been changed to PROT_NONE after
|
||||
faulting in and mlocking pages, get_user_pages() was unreliable for visiting
|
||||
these pages for munlocking. Because we don't want to leave pages mlocked,
|
||||
get_user_pages() was enhanced to accept a flag to ignore the permissions when
|
||||
fetching the pages - all of which should be resident as a result of previous
|
||||
mlocking.
|
||||
munlock_page() uses the mlock pagevec to batch up work to be done under
|
||||
lru_lock by __munlock_page(). __munlock_page() decrements the page's
|
||||
mlock_count, and when that reaches 0 it clears PageMlocked and clears
|
||||
PageUnevictable, moving the page from unevictable state to inactive LRU.
|
||||
|
||||
For munlock(), populate_vma_page_range() unlocks individual pages by calling
|
||||
munlock_vma_page(). munlock_vma_page() unconditionally clears the PG_mlocked
|
||||
flag using TestClearPageMlocked(). As with mlock_vma_page(),
|
||||
munlock_vma_page() use the Test*PageMlocked() function to handle the case where
|
||||
the page might have already been unlocked by another task. If the page was
|
||||
mlocked, munlock_vma_page() updates that zone statistics for the number of
|
||||
mlocked pages. Note, however, that at this point we haven't checked whether
|
||||
the page is mapped by other VM_LOCKED VMAs.
|
||||
|
||||
We can't call page_mlock(), the function that walks the reverse map to
|
||||
check for other VM_LOCKED VMAs, without first isolating the page from the LRU.
|
||||
page_mlock() is a variant of try_to_unmap() and thus requires that the page
|
||||
not be on an LRU list [more on these below]. However, the call to
|
||||
isolate_lru_page() could fail, in which case we can't call page_mlock(). So,
|
||||
we go ahead and clear PG_mlocked up front, as this might be the only chance we
|
||||
have. If we can successfully isolate the page, we go ahead and call
|
||||
page_mlock(), which will restore the PG_mlocked flag and update the zone
|
||||
page statistics if it finds another VMA holding the page mlocked. If we fail
|
||||
to isolate the page, we'll have left a potentially mlocked page on the LRU.
|
||||
This is fine, because we'll catch it later if and if vmscan tries to reclaim
|
||||
the page. This should be relatively rare.
|
||||
But in practice that may not work ideally: the page may not yet have reached
|
||||
"the unevictable LRU", or it may have been temporarily isolated from it. In
|
||||
those cases its mlock_count field is unusable and must be assumed to be 0: so
|
||||
that the page will be rescued to an evictable LRU, then perhaps be mlocked
|
||||
again later if vmscan finds it in a VM_LOCKED VMA.
|
||||
|
||||
|
||||
Migrating MLOCKED Pages
|
||||
@@ -410,33 +394,38 @@ A page that is being migrated has been isolated from the LRU lists and is held
|
||||
locked across unmapping of the page, updating the page's address space entry
|
||||
and copying the contents and state, until the page table entry has been
|
||||
replaced with an entry that refers to the new page. Linux supports migration
|
||||
of mlocked pages and other unevictable pages. This involves simply moving the
|
||||
PG_mlocked and PG_unevictable states from the old page to the new page.
|
||||
of mlocked pages and other unevictable pages. PG_mlocked is cleared from the
|
||||
the old page when it is unmapped from the last VM_LOCKED VMA, and set when the
|
||||
new page is mapped in place of migration entry in a VM_LOCKED VMA. If the page
|
||||
was unevictable because mlocked, PG_unevictable follows PG_mlocked; but if the
|
||||
page was unevictable for other reasons, PG_unevictable is copied explicitly.
|
||||
|
||||
Note that page migration can race with mlocking or munlocking of the same page.
|
||||
This has been discussed from the mlock/munlock perspective in the respective
|
||||
sections above. Both processes (migration and m[un]locking) hold the page
|
||||
locked. This provides the first level of synchronization. Page migration
|
||||
zeros out the page_mapping of the old page before unlocking it, so m[un]lock
|
||||
can skip these pages by testing the page mapping under page lock.
|
||||
There is mostly no problem since page migration requires unmapping all PTEs of
|
||||
the old page (including munlock where VM_LOCKED), then mapping in the new page
|
||||
(including mlock where VM_LOCKED). The page table locks provide sufficient
|
||||
synchronization.
|
||||
|
||||
To complete page migration, we place the new and old pages back onto the LRU
|
||||
after dropping the page lock. The "unneeded" page - old page on success, new
|
||||
page on failure - will be freed when the reference count held by the migration
|
||||
process is released. To ensure that we don't strand pages on the unevictable
|
||||
list because of a race between munlock and migration, page migration uses the
|
||||
putback_lru_page() function to add migrated pages back to the LRU.
|
||||
However, since mlock_vma_pages_range() starts by setting VM_LOCKED on a VMA,
|
||||
before mlocking any pages already present, if one of those pages were migrated
|
||||
before mlock_pte_range() reached it, it would get counted twice in mlock_count.
|
||||
To prevent that, mlock_vma_pages_range() temporarily marks the VMA as VM_IO,
|
||||
so that mlock_vma_page() will skip it.
|
||||
|
||||
To complete page migration, we place the old and new pages back onto the LRU
|
||||
afterwards. The "unneeded" page - old page on success, new page on failure -
|
||||
is freed when the reference count held by the migration process is released.
|
||||
|
||||
|
||||
Compacting MLOCKED Pages
|
||||
------------------------
|
||||
|
||||
The unevictable LRU can be scanned for compactable regions and the default
|
||||
behavior is to do so. /proc/sys/vm/compact_unevictable_allowed controls
|
||||
this behavior (see Documentation/admin-guide/sysctl/vm.rst). Once scanning of the
|
||||
unevictable LRU is enabled, the work of compaction is mostly handled by
|
||||
the page migration code and the same work flow as described in MIGRATING
|
||||
MLOCKED PAGES will apply.
|
||||
The memory map can be scanned for compactable regions and the default behavior
|
||||
is to let unevictable pages be moved. /proc/sys/vm/compact_unevictable_allowed
|
||||
controls this behavior (see Documentation/admin-guide/sysctl/vm.rst). The work
|
||||
of compaction is mostly handled by the page migration code and the same work
|
||||
flow as described in Migrating MLOCKED Pages will apply.
|
||||
|
||||
|
||||
MLOCKING Transparent Huge Pages
|
||||
-------------------------------
|
||||
@@ -445,51 +434,44 @@ A transparent huge page is represented by a single entry on an LRU list.
|
||||
Therefore, we can only make unevictable an entire compound page, not
|
||||
individual subpages.
|
||||
|
||||
If a user tries to mlock() part of a huge page, we want the rest of the
|
||||
page to be reclaimable.
|
||||
If a user tries to mlock() part of a huge page, and no user mlock()s the
|
||||
whole of the huge page, we want the rest of the page to be reclaimable.
|
||||
|
||||
We cannot just split the page on partial mlock() as split_huge_page() can
|
||||
fail and new intermittent failure mode for the syscall is undesirable.
|
||||
fail and a new intermittent failure mode for the syscall is undesirable.
|
||||
|
||||
We handle this by keeping PTE-mapped huge pages on normal LRU lists: the
|
||||
PMD on border of VM_LOCKED VMA will be split into PTE table.
|
||||
We handle this by keeping PTE-mlocked huge pages on evictable LRU lists:
|
||||
the PMD on the border of a VM_LOCKED VMA will be split into a PTE table.
|
||||
|
||||
This way the huge page is accessible for vmscan. Under memory pressure the
|
||||
This way the huge page is accessible for vmscan. Under memory pressure the
|
||||
page will be split, subpages which belong to VM_LOCKED VMAs will be moved
|
||||
to unevictable LRU and the rest can be reclaimed.
|
||||
to the unevictable LRU and the rest can be reclaimed.
|
||||
|
||||
/proc/meminfo's Unevictable and Mlocked amounts do not include those parts
|
||||
of a transparent huge page which are mapped only by PTEs in VM_LOCKED VMAs.
|
||||
|
||||
See also comment in follow_trans_huge_pmd().
|
||||
|
||||
mmap(MAP_LOCKED) System Call Handling
|
||||
-------------------------------------
|
||||
|
||||
In addition the mlock()/mlockall() system calls, an application can request
|
||||
that a region of memory be mlocked supplying the MAP_LOCKED flag to the mmap()
|
||||
call. There is one important and subtle difference here, though. mmap() + mlock()
|
||||
will fail if the range cannot be faulted in (e.g. because mm_populate fails)
|
||||
and returns with ENOMEM while mmap(MAP_LOCKED) will not fail. The mmaped
|
||||
area will still have properties of the locked area - aka. pages will not get
|
||||
swapped out - but major page faults to fault memory in might still happen.
|
||||
In addition to the mlock(), mlock2() and mlockall() system calls, an application
|
||||
can request that a region of memory be mlocked by supplying the MAP_LOCKED flag
|
||||
to the mmap() call. There is one important and subtle difference here, though.
|
||||
mmap() + mlock() will fail if the range cannot be faulted in (e.g. because
|
||||
mm_populate fails) and returns with ENOMEM while mmap(MAP_LOCKED) will not fail.
|
||||
The mmaped area will still have properties of the locked area - pages will not
|
||||
get swapped out - but major page faults to fault memory in might still happen.
|
||||
|
||||
Furthermore, any mmap() call or brk() call that expands the heap by a
|
||||
task that has previously called mlockall() with the MCL_FUTURE flag will result
|
||||
Furthermore, any mmap() call or brk() call that expands the heap by a task
|
||||
that has previously called mlockall() with the MCL_FUTURE flag will result
|
||||
in the newly mapped memory being mlocked. Before the unevictable/mlock
|
||||
changes, the kernel simply called make_pages_present() to allocate pages and
|
||||
populate the page table.
|
||||
changes, the kernel simply called make_pages_present() to allocate pages
|
||||
and populate the page table.
|
||||
|
||||
To mlock a range of memory under the unevictable/mlock infrastructure, the
|
||||
mmap() handler and task address space expansion functions call
|
||||
To mlock a range of memory under the unevictable/mlock infrastructure,
|
||||
the mmap() handler and task address space expansion functions call
|
||||
populate_vma_page_range() specifying the vma and the address range to mlock.
|
||||
|
||||
The callers of populate_vma_page_range() will have already added the memory range
|
||||
to be mlocked to the task's "locked_vm". To account for filtered VMAs,
|
||||
populate_vma_page_range() returns the number of pages NOT mlocked. All of the
|
||||
callers then subtract a non-negative return value from the task's locked_vm. A
|
||||
negative return value represent an error - for example, from get_user_pages()
|
||||
attempting to fault in a VMA with PROT_NONE access. In this case, we leave the
|
||||
memory range accounted as locked_vm, as the protections could be changed later
|
||||
and pages allocated into that region.
|
||||
|
||||
|
||||
munmap()/exit()/exec() System Call Handling
|
||||
-------------------------------------------
|
||||
@@ -500,81 +482,53 @@ munlock the pages if we're removing the last VM_LOCKED VMA that maps the pages.
|
||||
Before the unevictable/mlock changes, mlocking did not mark the pages in any
|
||||
way, so unmapping them required no processing.
|
||||
|
||||
To munlock a range of memory under the unevictable/mlock infrastructure, the
|
||||
munmap() handler and task address space call tear down function
|
||||
munlock_vma_pages_all(). The name reflects the observation that one always
|
||||
specifies the entire VMA range when munlock()ing during unmap of a region.
|
||||
Because of the VMA filtering when mlocking() regions, only "normal" VMAs that
|
||||
actually contain mlocked pages will be passed to munlock_vma_pages_all().
|
||||
For each PTE (or PMD) being unmapped from a VMA, page_remove_rmap() calls
|
||||
munlock_vma_page(), which calls munlock_page() when the VMA is VM_LOCKED
|
||||
(unless it was a PTE mapping of a part of a transparent huge page).
|
||||
|
||||
munlock_vma_pages_all() clears the VM_LOCKED VMA flag and, like mlock_fixup()
|
||||
for the munlock case, calls __munlock_vma_pages_range() to walk the page table
|
||||
for the VMA's memory range and munlock_vma_page() each resident page mapped by
|
||||
the VMA. This effectively munlocks the page, only if this is the last
|
||||
VM_LOCKED VMA that maps the page.
|
||||
munlock_page() uses the mlock pagevec to batch up work to be done under
|
||||
lru_lock by __munlock_page(). __munlock_page() decrements the page's
|
||||
mlock_count, and when that reaches 0 it clears PageMlocked and clears
|
||||
PageUnevictable, moving the page from unevictable state to inactive LRU.
|
||||
|
||||
But in practice that may not work ideally: the page may not yet have reached
|
||||
"the unevictable LRU", or it may have been temporarily isolated from it. In
|
||||
those cases its mlock_count field is unusable and must be assumed to be 0: so
|
||||
that the page will be rescued to an evictable LRU, then perhaps be mlocked
|
||||
again later if vmscan finds it in a VM_LOCKED VMA.
|
||||
|
||||
|
||||
try_to_unmap()
|
||||
--------------
|
||||
Truncating MLOCKED Pages
|
||||
------------------------
|
||||
|
||||
Pages can, of course, be mapped into multiple VMAs. Some of these VMAs may
|
||||
have VM_LOCKED flag set. It is possible for a page mapped into one or more
|
||||
VM_LOCKED VMAs not to have the PG_mlocked flag set and therefore reside on one
|
||||
of the active or inactive LRU lists. This could happen if, for example, a task
|
||||
in the process of munlocking the page could not isolate the page from the LRU.
|
||||
As a result, vmscan/shrink_page_list() might encounter such a page as described
|
||||
in section "vmscan's handling of unevictable pages". To handle this situation,
|
||||
try_to_unmap() checks for VM_LOCKED VMAs while it is walking a page's reverse
|
||||
map.
|
||||
File truncation or hole punching forcibly unmaps the deleted pages from
|
||||
userspace; truncation even unmaps and deletes any private anonymous pages
|
||||
which had been Copied-On-Write from the file pages now being truncated.
|
||||
|
||||
try_to_unmap() is always called, by either vmscan for reclaim or for page
|
||||
migration, with the argument page locked and isolated from the LRU. Separate
|
||||
functions handle anonymous and mapped file and KSM pages, as these types of
|
||||
pages have different reverse map lookup mechanisms, with different locking.
|
||||
In each case, whether rmap_walk_anon() or rmap_walk_file() or rmap_walk_ksm(),
|
||||
it will call try_to_unmap_one() for every VMA which might contain the page.
|
||||
Mlocked pages can be munlocked and deleted in this way: like with munmap(),
|
||||
for each PTE (or PMD) being unmapped from a VMA, page_remove_rmap() calls
|
||||
munlock_vma_page(), which calls munlock_page() when the VMA is VM_LOCKED
|
||||
(unless it was a PTE mapping of a part of a transparent huge page).
|
||||
|
||||
When trying to reclaim, if try_to_unmap_one() finds the page in a VM_LOCKED
|
||||
VMA, it will then mlock the page via mlock_vma_page() instead of unmapping it,
|
||||
and return SWAP_MLOCK to indicate that the page is unevictable: and the scan
|
||||
stops there.
|
||||
|
||||
mlock_vma_page() is called while holding the page table's lock (in addition
|
||||
to the page lock, and the rmap lock): to serialize against concurrent mlock or
|
||||
munlock or munmap system calls, mm teardown (munlock_vma_pages_all), reclaim,
|
||||
holepunching, and truncation of file pages and their anonymous COWed pages.
|
||||
|
||||
|
||||
page_mlock() Reverse Map Scan
|
||||
---------------------------------
|
||||
|
||||
When munlock_vma_page() [see section :ref:`munlock()/munlockall() System Call
|
||||
Handling <munlock_munlockall_handling>` above] tries to munlock a
|
||||
page, it needs to determine whether or not the page is mapped by any
|
||||
VM_LOCKED VMA without actually attempting to unmap all PTEs from the
|
||||
page. For this purpose, the unevictable/mlock infrastructure
|
||||
introduced a variant of try_to_unmap() called page_mlock().
|
||||
|
||||
page_mlock() walks the respective reverse maps looking for VM_LOCKED VMAs. When
|
||||
such a VMA is found the page is mlocked via mlock_vma_page(). This undoes the
|
||||
pre-clearing of the page's PG_mlocked done by munlock_vma_page.
|
||||
|
||||
Note that page_mlock()'s reverse map walk must visit every VMA in a page's
|
||||
reverse map to determine that a page is NOT mapped into any VM_LOCKED VMA.
|
||||
However, the scan can terminate when it encounters a VM_LOCKED VMA.
|
||||
Although page_mlock() might be called a great many times when munlocking a
|
||||
large region or tearing down a large address space that has been mlocked via
|
||||
mlockall(), overall this is a fairly rare event.
|
||||
However, if there is a racing munlock(), since mlock_vma_pages_range() starts
|
||||
munlocking by clearing VM_LOCKED from a VMA, before munlocking all the pages
|
||||
present, if one of those pages were unmapped by truncation or hole punch before
|
||||
mlock_pte_range() reached it, it would not be recognized as mlocked by this VMA,
|
||||
and would not be counted out of mlock_count. In this rare case, a page may
|
||||
still appear as PageMlocked after it has been fully unmapped: and it is left to
|
||||
release_pages() (or __page_cache_release()) to clear it and update statistics
|
||||
before freeing (this event is counted in /proc/vmstat unevictable_pgs_cleared,
|
||||
which is usually 0).
|
||||
|
||||
|
||||
Page Reclaim in shrink_*_list()
|
||||
-------------------------------
|
||||
|
||||
shrink_active_list() culls any obviously unevictable pages - i.e.
|
||||
!page_evictable(page) - diverting these to the unevictable list.
|
||||
vmscan's shrink_active_list() culls any obviously unevictable pages -
|
||||
i.e. !page_evictable(page) pages - diverting those to the unevictable list.
|
||||
However, shrink_active_list() only sees unevictable pages that made it onto the
|
||||
active/inactive lru lists. Note that these pages do not have PageUnevictable
|
||||
set - otherwise they would be on the unevictable list and shrink_active_list
|
||||
active/inactive LRU lists. Note that these pages do not have PageUnevictable
|
||||
set - otherwise they would be on the unevictable list and shrink_active_list()
|
||||
would never see them.
|
||||
|
||||
Some examples of these unevictable pages on the LRU lists are:
|
||||
@@ -586,20 +540,15 @@ Some examples of these unevictable pages on the LRU lists are:
|
||||
when an application accesses the page the first time after SHM_LOCK'ing
|
||||
the segment.
|
||||
|
||||
(3) mlocked pages that could not be isolated from the LRU and moved to the
|
||||
unevictable list in mlock_vma_page().
|
||||
(3) pages still mapped into VM_LOCKED VMAs, which should be marked mlocked,
|
||||
but events left mlock_count too low, so they were munlocked too early.
|
||||
|
||||
shrink_inactive_list() also diverts any unevictable pages that it finds on the
|
||||
inactive lists to the appropriate node's unevictable list.
|
||||
vmscan's shrink_inactive_list() and shrink_page_list() also divert obviously
|
||||
unevictable pages found on the inactive lists to the appropriate memory cgroup
|
||||
and node unevictable list.
|
||||
|
||||
shrink_inactive_list() should only see SHM_LOCK'd pages that became SHM_LOCK'd
|
||||
after shrink_active_list() had moved them to the inactive list, or pages mapped
|
||||
into VM_LOCKED VMAs that munlock_vma_page() couldn't isolate from the LRU to
|
||||
recheck via page_mlock(). shrink_inactive_list() won't notice the latter,
|
||||
but will pass on to shrink_page_list().
|
||||
|
||||
shrink_page_list() again culls obviously unevictable pages that it could
|
||||
encounter for similar reason to shrink_inactive_list(). Pages mapped into
|
||||
VM_LOCKED VMAs but without PG_mlocked set will make it all the way to
|
||||
try_to_unmap(). shrink_page_list() will divert them to the unevictable list
|
||||
when try_to_unmap() returns SWAP_MLOCK, as discussed above.
|
||||
rmap's page_referenced_one(), called via vmscan's shrink_active_list() or
|
||||
shrink_page_list(), and rmap's try_to_unmap_one() called via shrink_page_list(),
|
||||
check for (3) pages still mapped into VM_LOCKED VMAs, and call mlock_vma_page()
|
||||
to correct them. Such pages are culled to the unevictable list when released
|
||||
by the shrinker.
|
||||
|
||||
14
MAINTAINERS
14
MAINTAINERS
@@ -5157,6 +5157,20 @@ S: Supported
|
||||
F: drivers/cpuidle/cpuidle-psci.h
|
||||
F: drivers/cpuidle/cpuidle-psci-domain.c
|
||||
|
||||
CPUIDLE DRIVER - DT IDLE PM DOMAIN
|
||||
M: Ulf Hansson <ulf.hansson@linaro.org>
|
||||
L: linux-pm@vger.kernel.org
|
||||
S: Supported
|
||||
F: drivers/cpuidle/dt_idle_genpd.c
|
||||
F: drivers/cpuidle/dt_idle_genpd.h
|
||||
|
||||
CPUIDLE DRIVER - RISC-V SBI
|
||||
M: Anup Patel <anup@brainfault.org>
|
||||
L: linux-pm@vger.kernel.org
|
||||
L: linux-riscv@lists.infradead.org
|
||||
S: Maintained
|
||||
F: drivers/cpuidle/cpuidle-riscv-sbi.c
|
||||
|
||||
CRAMFS FILESYSTEM
|
||||
M: Nicolas Pitre <nico@fluxnic.net>
|
||||
S: Maintained
|
||||
|
||||
@@ -134,9 +134,9 @@
|
||||
reg = <0xb4100000 0x1000>;
|
||||
interrupts = <0 105 0x4>;
|
||||
status = "disabled";
|
||||
dmas = <&dwdma0 12 0 1>,
|
||||
<&dwdma0 13 1 0>;
|
||||
dma-names = "tx", "rx";
|
||||
dmas = <&dwdma0 13 0 1>,
|
||||
<&dwdma0 12 1 0>;
|
||||
dma-names = "rx", "tx";
|
||||
};
|
||||
|
||||
thermal@e07008c4 {
|
||||
|
||||
@@ -284,9 +284,9 @@
|
||||
#size-cells = <0>;
|
||||
interrupts = <0 31 0x4>;
|
||||
status = "disabled";
|
||||
dmas = <&dwdma0 4 0 0>,
|
||||
<&dwdma0 5 0 0>;
|
||||
dma-names = "tx", "rx";
|
||||
dmas = <&dwdma0 5 0 0>,
|
||||
<&dwdma0 4 0 0>;
|
||||
dma-names = "rx", "tx";
|
||||
};
|
||||
|
||||
rtc@e0580000 {
|
||||
|
||||
@@ -59,8 +59,13 @@ static void __init omap_optee_init_check(void)
|
||||
u32 omap_secure_dispatcher(u32 idx, u32 flag, u32 nargs, u32 arg1, u32 arg2,
|
||||
u32 arg3, u32 arg4)
|
||||
{
|
||||
static u32 buf[NR_CPUS][5];
|
||||
u32 *param;
|
||||
int cpu;
|
||||
u32 ret;
|
||||
u32 param[5];
|
||||
|
||||
cpu = get_cpu();
|
||||
param = buf[cpu];
|
||||
|
||||
param[0] = nargs;
|
||||
param[1] = arg1;
|
||||
@@ -76,6 +81,8 @@ u32 omap_secure_dispatcher(u32 idx, u32 flag, u32 nargs, u32 arg1, u32 arg2,
|
||||
outer_clean_range(__pa(param), __pa(param + 5));
|
||||
ret = omap_smc2(idx, flag, __pa(param));
|
||||
|
||||
put_cpu();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -119,8 +126,8 @@ phys_addr_t omap_secure_ram_mempool_base(void)
|
||||
#if defined(CONFIG_ARCH_OMAP3) && defined(CONFIG_PM)
|
||||
u32 omap3_save_secure_ram(void __iomem *addr, int size)
|
||||
{
|
||||
static u32 param[5];
|
||||
u32 ret;
|
||||
u32 param[5];
|
||||
|
||||
if (size != OMAP3_SAVE_SECURE_RAM_SZ)
|
||||
return OMAP3_SAVE_SECURE_RAM_SZ;
|
||||
@@ -153,8 +160,8 @@ u32 omap3_save_secure_ram(void __iomem *addr, int size)
|
||||
u32 rx51_secure_dispatcher(u32 idx, u32 process, u32 flag, u32 nargs,
|
||||
u32 arg1, u32 arg2, u32 arg3, u32 arg4)
|
||||
{
|
||||
static u32 param[5];
|
||||
u32 ret;
|
||||
u32 param[5];
|
||||
|
||||
param[0] = nargs+1; /* RX-51 needs number of arguments + 1 */
|
||||
param[1] = arg1;
|
||||
|
||||
@@ -1,4 +1,2 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
dtb-$(CONFIG_ARCH_SEATTLE) += amd-overdrive.dtb \
|
||||
amd-overdrive-rev-b0.dtb amd-overdrive-rev-b1.dtb \
|
||||
husky.dtb
|
||||
dtb-$(CONFIG_ARCH_SEATTLE) += amd-overdrive-rev-b0.dtb amd-overdrive-rev-b1.dtb
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
/dts-v1/;
|
||||
|
||||
/include/ "amd-seattle-soc.dtsi"
|
||||
/include/ "amd-seattle-cpus.dtsi"
|
||||
|
||||
/ {
|
||||
model = "AMD Seattle (Rev.B0) Development Board (Overdrive)";
|
||||
@@ -36,14 +37,6 @@
|
||||
status = "ok";
|
||||
};
|
||||
|
||||
&gpio2 {
|
||||
status = "ok";
|
||||
};
|
||||
|
||||
&gpio3 {
|
||||
status = "ok";
|
||||
};
|
||||
|
||||
&gpio4 {
|
||||
status = "ok";
|
||||
};
|
||||
@@ -79,10 +72,6 @@
|
||||
};
|
||||
};
|
||||
|
||||
&ipmi_kcs {
|
||||
status = "ok";
|
||||
};
|
||||
|
||||
&smb0 {
|
||||
/include/ "amd-seattle-xgbe-b.dtsi"
|
||||
};
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
/dts-v1/;
|
||||
|
||||
/include/ "amd-seattle-soc.dtsi"
|
||||
/include/ "amd-seattle-cpus.dtsi"
|
||||
|
||||
/ {
|
||||
model = "AMD Seattle (Rev.B1) Development Board (Overdrive)";
|
||||
|
||||
@@ -1,66 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* DTS file for AMD Seattle Overdrive Development Board
|
||||
*
|
||||
* Copyright (C) 2014 Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
/dts-v1/;
|
||||
|
||||
/include/ "amd-seattle-soc.dtsi"
|
||||
|
||||
/ {
|
||||
model = "AMD Seattle Development Board (Overdrive)";
|
||||
compatible = "amd,seattle-overdrive", "amd,seattle";
|
||||
|
||||
chosen {
|
||||
stdout-path = &serial0;
|
||||
};
|
||||
};
|
||||
|
||||
&ccp0 {
|
||||
status = "ok";
|
||||
};
|
||||
|
||||
&gpio0 {
|
||||
status = "ok";
|
||||
};
|
||||
|
||||
&gpio1 {
|
||||
status = "ok";
|
||||
};
|
||||
|
||||
&i2c0 {
|
||||
status = "ok";
|
||||
};
|
||||
|
||||
&pcie0 {
|
||||
status = "ok";
|
||||
};
|
||||
|
||||
&spi0 {
|
||||
status = "ok";
|
||||
};
|
||||
|
||||
&spi1 {
|
||||
status = "ok";
|
||||
sdcard0: sdcard@0 {
|
||||
compatible = "mmc-spi-slot";
|
||||
reg = <0>;
|
||||
spi-max-frequency = <20000000>;
|
||||
voltage-ranges = <3200 3400>;
|
||||
gpios = <&gpio0 7 0>;
|
||||
interrupt-parent = <&gpio0>;
|
||||
interrupts = <7 3>;
|
||||
pl022,hierarchy = <0>;
|
||||
pl022,interface = <0>;
|
||||
pl022,com-mode = <0x0>;
|
||||
pl022,rx-level-trig = <0>;
|
||||
pl022,tx-level-trig = <0>;
|
||||
};
|
||||
};
|
||||
|
||||
&v2m0 {
|
||||
arm,msi-base-spi = <64>;
|
||||
arm,msi-num-spis = <256>;
|
||||
};
|
||||
224
arch/arm64/boot/dts/amd/amd-seattle-cpus.dtsi
Normal file
224
arch/arm64/boot/dts/amd/amd-seattle-cpus.dtsi
Normal file
@@ -0,0 +1,224 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/ {
|
||||
cpus {
|
||||
#address-cells = <0x1>;
|
||||
#size-cells = <0x0>;
|
||||
|
||||
cpu-map {
|
||||
cluster0 {
|
||||
core0 {
|
||||
cpu = <&CPU0>;
|
||||
};
|
||||
core1 {
|
||||
cpu = <&CPU1>;
|
||||
};
|
||||
};
|
||||
cluster1 {
|
||||
core0 {
|
||||
cpu = <&CPU2>;
|
||||
};
|
||||
core1 {
|
||||
cpu = <&CPU3>;
|
||||
};
|
||||
};
|
||||
cluster2 {
|
||||
core0 {
|
||||
cpu = <&CPU4>;
|
||||
};
|
||||
core1 {
|
||||
cpu = <&CPU5>;
|
||||
};
|
||||
};
|
||||
cluster3 {
|
||||
core0 {
|
||||
cpu = <&CPU6>;
|
||||
};
|
||||
core1 {
|
||||
cpu = <&CPU7>;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
CPU0: cpu@0 {
|
||||
device_type = "cpu";
|
||||
compatible = "arm,cortex-a57";
|
||||
reg = <0x0>;
|
||||
enable-method = "psci";
|
||||
|
||||
i-cache-size = <0xC000>;
|
||||
i-cache-line-size = <64>;
|
||||
i-cache-sets = <256>;
|
||||
d-cache-size = <0x8000>;
|
||||
d-cache-line-size = <64>;
|
||||
d-cache-sets = <256>;
|
||||
l2-cache = <&L2_0>;
|
||||
|
||||
};
|
||||
|
||||
CPU1: cpu@1 {
|
||||
device_type = "cpu";
|
||||
compatible = "arm,cortex-a57";
|
||||
reg = <0x1>;
|
||||
enable-method = "psci";
|
||||
|
||||
i-cache-size = <0xC000>;
|
||||
i-cache-line-size = <64>;
|
||||
i-cache-sets = <256>;
|
||||
d-cache-size = <0x8000>;
|
||||
d-cache-line-size = <64>;
|
||||
d-cache-sets = <256>;
|
||||
l2-cache = <&L2_0>;
|
||||
};
|
||||
|
||||
CPU2: cpu@100 {
|
||||
device_type = "cpu";
|
||||
compatible = "arm,cortex-a57";
|
||||
reg = <0x100>;
|
||||
enable-method = "psci";
|
||||
|
||||
i-cache-size = <0xC000>;
|
||||
i-cache-line-size = <64>;
|
||||
i-cache-sets = <256>;
|
||||
d-cache-size = <0x8000>;
|
||||
d-cache-line-size = <64>;
|
||||
d-cache-sets = <256>;
|
||||
l2-cache = <&L2_1>;
|
||||
};
|
||||
|
||||
CPU3: cpu@101 {
|
||||
device_type = "cpu";
|
||||
compatible = "arm,cortex-a57";
|
||||
reg = <0x101>;
|
||||
enable-method = "psci";
|
||||
|
||||
i-cache-size = <0xC000>;
|
||||
i-cache-line-size = <64>;
|
||||
i-cache-sets = <256>;
|
||||
d-cache-size = <0x8000>;
|
||||
d-cache-line-size = <64>;
|
||||
d-cache-sets = <256>;
|
||||
l2-cache = <&L2_1>;
|
||||
};
|
||||
|
||||
CPU4: cpu@200 {
|
||||
device_type = "cpu";
|
||||
compatible = "arm,cortex-a57";
|
||||
reg = <0x200>;
|
||||
enable-method = "psci";
|
||||
|
||||
i-cache-size = <0xC000>;
|
||||
i-cache-line-size = <64>;
|
||||
i-cache-sets = <256>;
|
||||
d-cache-size = <0x8000>;
|
||||
d-cache-line-size = <64>;
|
||||
d-cache-sets = <256>;
|
||||
l2-cache = <&L2_2>;
|
||||
};
|
||||
|
||||
CPU5: cpu@201 {
|
||||
device_type = "cpu";
|
||||
compatible = "arm,cortex-a57";
|
||||
reg = <0x201>;
|
||||
enable-method = "psci";
|
||||
|
||||
i-cache-size = <0xC000>;
|
||||
i-cache-line-size = <64>;
|
||||
i-cache-sets = <256>;
|
||||
d-cache-size = <0x8000>;
|
||||
d-cache-line-size = <64>;
|
||||
d-cache-sets = <256>;
|
||||
l2-cache = <&L2_2>;
|
||||
};
|
||||
|
||||
CPU6: cpu@300 {
|
||||
device_type = "cpu";
|
||||
compatible = "arm,cortex-a57";
|
||||
reg = <0x300>;
|
||||
enable-method = "psci";
|
||||
|
||||
i-cache-size = <0xC000>;
|
||||
i-cache-line-size = <64>;
|
||||
i-cache-sets = <256>;
|
||||
d-cache-size = <0x8000>;
|
||||
d-cache-line-size = <64>;
|
||||
d-cache-sets = <256>;
|
||||
l2-cache = <&L2_3>;
|
||||
};
|
||||
|
||||
CPU7: cpu@301 {
|
||||
device_type = "cpu";
|
||||
compatible = "arm,cortex-a57";
|
||||
reg = <0x301>;
|
||||
enable-method = "psci";
|
||||
|
||||
i-cache-size = <0xC000>;
|
||||
i-cache-line-size = <64>;
|
||||
i-cache-sets = <256>;
|
||||
d-cache-size = <0x8000>;
|
||||
d-cache-line-size = <64>;
|
||||
d-cache-sets = <256>;
|
||||
l2-cache = <&L2_3>;
|
||||
};
|
||||
};
|
||||
|
||||
L2_0: l2-cache0 {
|
||||
cache-size = <0x100000>;
|
||||
cache-line-size = <64>;
|
||||
cache-sets = <1024>;
|
||||
cache-unified;
|
||||
next-level-cache = <&L3>;
|
||||
};
|
||||
|
||||
L2_1: l2-cache1 {
|
||||
cache-size = <0x100000>;
|
||||
cache-line-size = <64>;
|
||||
cache-sets = <1024>;
|
||||
cache-unified;
|
||||
next-level-cache = <&L3>;
|
||||
};
|
||||
|
||||
L2_2: l2-cache2 {
|
||||
cache-size = <0x100000>;
|
||||
cache-line-size = <64>;
|
||||
cache-sets = <1024>;
|
||||
cache-unified;
|
||||
next-level-cache = <&L3>;
|
||||
};
|
||||
|
||||
L2_3: l2-cache3 {
|
||||
cache-size = <0x100000>;
|
||||
cache-line-size = <64>;
|
||||
cache-sets = <1024>;
|
||||
cache-unified;
|
||||
next-level-cache = <&L3>;
|
||||
};
|
||||
|
||||
L3: l3-cache {
|
||||
cache-level = <3>;
|
||||
cache-size = <0x800000>;
|
||||
cache-line-size = <64>;
|
||||
cache-sets = <8192>;
|
||||
cache-unified;
|
||||
};
|
||||
|
||||
pmu {
|
||||
compatible = "arm,cortex-a57-pmu";
|
||||
interrupts = <0x0 0x7 0x4>,
|
||||
<0x0 0x8 0x4>,
|
||||
<0x0 0x9 0x4>,
|
||||
<0x0 0xa 0x4>,
|
||||
<0x0 0xb 0x4>,
|
||||
<0x0 0xc 0x4>,
|
||||
<0x0 0xd 0x4>,
|
||||
<0x0 0xe 0x4>;
|
||||
interrupt-affinity = <&CPU0>,
|
||||
<&CPU1>,
|
||||
<&CPU2>,
|
||||
<&CPU3>,
|
||||
<&CPU4>,
|
||||
<&CPU5>,
|
||||
<&CPU6>,
|
||||
<&CPU7>;
|
||||
};
|
||||
};
|
||||
@@ -38,18 +38,6 @@
|
||||
<1 10 0xff04>;
|
||||
};
|
||||
|
||||
pmu {
|
||||
compatible = "arm,armv8-pmuv3";
|
||||
interrupts = <0 7 4>,
|
||||
<0 8 4>,
|
||||
<0 9 4>,
|
||||
<0 10 4>,
|
||||
<0 11 4>,
|
||||
<0 12 4>,
|
||||
<0 13 4>,
|
||||
<0 14 4>;
|
||||
};
|
||||
|
||||
smb0: smb {
|
||||
compatible = "simple-bus";
|
||||
#address-cells = <2>;
|
||||
@@ -70,6 +58,7 @@
|
||||
reg = <0 0xe0300000 0 0xf0000>;
|
||||
interrupts = <0 355 4>;
|
||||
clocks = <&sataclk_333mhz>;
|
||||
iommus = <&sata0_smmu 0x0 0x1f>;
|
||||
dma-coherent;
|
||||
};
|
||||
|
||||
@@ -80,6 +69,27 @@
|
||||
reg = <0 0xe0d00000 0 0xf0000>;
|
||||
interrupts = <0 354 4>;
|
||||
clocks = <&sataclk_333mhz>;
|
||||
iommus = <&sata1_smmu 0x0e>,
|
||||
<&sata1_smmu 0x0f>,
|
||||
<&sata1_smmu 0x1e>;
|
||||
dma-coherent;
|
||||
};
|
||||
|
||||
sata0_smmu: iommu@e0200000 {
|
||||
compatible = "arm,mmu-401";
|
||||
reg = <0 0xe0200000 0 0x10000>;
|
||||
#global-interrupts = <1>;
|
||||
interrupts = <0 332 4>, <0 332 4>;
|
||||
#iommu-cells = <2>;
|
||||
dma-coherent;
|
||||
};
|
||||
|
||||
sata1_smmu: iommu@e0c00000 {
|
||||
compatible = "arm,mmu-401";
|
||||
reg = <0 0xe0c00000 0 0x10000>;
|
||||
#global-interrupts = <1>;
|
||||
interrupts = <0 331 4>, <0 331 4>;
|
||||
#iommu-cells = <1>;
|
||||
dma-coherent;
|
||||
};
|
||||
|
||||
@@ -201,6 +211,10 @@
|
||||
reg = <0 0xe0100000 0 0x10000>;
|
||||
interrupts = <0 3 4>;
|
||||
dma-coherent;
|
||||
iommus = <&sata1_smmu 0x00>,
|
||||
<&sata1_smmu 0x02>,
|
||||
<&sata1_smmu 0x40>,
|
||||
<&sata1_smmu 0x42>;
|
||||
};
|
||||
|
||||
pcie0: pcie@f0000000 {
|
||||
@@ -213,12 +227,22 @@
|
||||
msi-parent = <&v2m0>;
|
||||
reg = <0 0xf0000000 0 0x10000000>;
|
||||
|
||||
interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
|
||||
interrupt-map-mask = <0xff00 0x0 0x0 0x7>;
|
||||
interrupt-map =
|
||||
<0x1000 0x0 0x0 0x1 &gic0 0x0 0x0 0x0 0x120 0x1>,
|
||||
<0x1000 0x0 0x0 0x2 &gic0 0x0 0x0 0x0 0x121 0x1>,
|
||||
<0x1000 0x0 0x0 0x3 &gic0 0x0 0x0 0x0 0x122 0x1>,
|
||||
<0x1000 0x0 0x0 0x4 &gic0 0x0 0x0 0x0 0x123 0x1>;
|
||||
<0x1100 0x0 0x0 0x1 &gic0 0x0 0x0 0x0 0x120 0x1>,
|
||||
<0x1100 0x0 0x0 0x2 &gic0 0x0 0x0 0x0 0x121 0x1>,
|
||||
<0x1100 0x0 0x0 0x3 &gic0 0x0 0x0 0x0 0x122 0x1>,
|
||||
<0x1100 0x0 0x0 0x4 &gic0 0x0 0x0 0x0 0x123 0x1>,
|
||||
|
||||
<0x1200 0x0 0x0 0x1 &gic0 0x0 0x0 0x0 0x124 0x1>,
|
||||
<0x1200 0x0 0x0 0x2 &gic0 0x0 0x0 0x0 0x125 0x1>,
|
||||
<0x1200 0x0 0x0 0x3 &gic0 0x0 0x0 0x0 0x126 0x1>,
|
||||
<0x1200 0x0 0x0 0x4 &gic0 0x0 0x0 0x0 0x127 0x1>,
|
||||
|
||||
<0x1300 0x0 0x0 0x1 &gic0 0x0 0x0 0x0 0x128 0x1>,
|
||||
<0x1300 0x0 0x0 0x2 &gic0 0x0 0x0 0x0 0x129 0x1>,
|
||||
<0x1300 0x0 0x0 0x3 &gic0 0x0 0x0 0x0 0x12a 0x1>,
|
||||
<0x1300 0x0 0x0 0x4 &gic0 0x0 0x0 0x0 0x12b 0x1>;
|
||||
|
||||
dma-coherent;
|
||||
dma-ranges = <0x43000000 0x0 0x0 0x0 0x0 0x100 0x0>;
|
||||
@@ -227,8 +251,18 @@
|
||||
<0x01000000 0x00 0x00000000 0x00 0xefff0000 0x00 0x00010000>,
|
||||
/* 32-bit MMIO (size=2G) */
|
||||
<0x02000000 0x00 0x40000000 0x00 0x40000000 0x00 0x80000000>,
|
||||
/* 64-bit MMIO (size= 124G) */
|
||||
/* 64-bit MMIO (size= 508G) */
|
||||
<0x03000000 0x01 0x00000000 0x01 0x00000000 0x7f 0x00000000>;
|
||||
iommu-map = <0x0 &pcie_smmu 0x0 0x10000>;
|
||||
};
|
||||
|
||||
pcie_smmu: iommu@e0a00000 {
|
||||
compatible = "arm,mmu-401";
|
||||
reg = <0 0xe0a00000 0 0x10000>;
|
||||
#global-interrupts = <1>;
|
||||
interrupts = <0 333 4>, <0 333 4>;
|
||||
#iommu-cells = <1>;
|
||||
dma-coherent;
|
||||
};
|
||||
|
||||
/* Perf CCN504 PMU */
|
||||
|
||||
@@ -55,7 +55,7 @@
|
||||
clocks = <&xgmacclk0_dma_250mhz>, <&xgmacclk0_ptp_250mhz>;
|
||||
clock-names = "dma_clk", "ptp_clk";
|
||||
phy-mode = "xgmii";
|
||||
#stream-id-cells = <16>;
|
||||
iommus = <&xgmac0_smmu 0x00 0x17>; /* 0-7, 16-23 */
|
||||
dma-coherent;
|
||||
};
|
||||
|
||||
@@ -81,11 +81,11 @@
|
||||
clocks = <&xgmacclk1_dma_250mhz>, <&xgmacclk1_ptp_250mhz>;
|
||||
clock-names = "dma_clk", "ptp_clk";
|
||||
phy-mode = "xgmii";
|
||||
#stream-id-cells = <16>;
|
||||
iommus = <&xgmac1_smmu 0x00 0x17>; /* 0-7, 16-23 */
|
||||
dma-coherent;
|
||||
};
|
||||
|
||||
xgmac0_smmu: smmu@e0600000 {
|
||||
xgmac0_smmu: iommu@e0600000 {
|
||||
compatible = "arm,mmu-401";
|
||||
reg = <0 0xe0600000 0 0x10000>;
|
||||
#global-interrupts = <1>;
|
||||
@@ -94,14 +94,11 @@
|
||||
*/
|
||||
<0 336 4>,
|
||||
<0 336 4>;
|
||||
|
||||
mmu-masters = <&xgmac0
|
||||
0 1 2 3 4 5 6 7
|
||||
16 17 18 19 20 21 22 23
|
||||
>;
|
||||
#iommu-cells = <2>;
|
||||
dma-coherent;
|
||||
};
|
||||
|
||||
xgmac1_smmu: smmu@e0800000 {
|
||||
xgmac1_smmu: iommu@e0800000 {
|
||||
compatible = "arm,mmu-401";
|
||||
reg = <0 0xe0800000 0 0x10000>;
|
||||
#global-interrupts = <1>;
|
||||
@@ -110,9 +107,6 @@
|
||||
*/
|
||||
<0 335 4>,
|
||||
<0 335 4>;
|
||||
|
||||
mmu-masters = <&xgmac1
|
||||
0 1 2 3 4 5 6 7
|
||||
16 17 18 19 20 21 22 23
|
||||
>;
|
||||
#iommu-cells = <2>;
|
||||
dma-coherent;
|
||||
};
|
||||
|
||||
@@ -1,84 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* DTS file for AMD/Linaro 96Boards Enterprise Edition Server (Husky) Board
|
||||
* Note: Based-on AMD Seattle Rev.B0
|
||||
*
|
||||
* Copyright (C) 2015 Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
/dts-v1/;
|
||||
|
||||
/include/ "amd-seattle-soc.dtsi"
|
||||
|
||||
/ {
|
||||
model = "Linaro 96Boards Enterprise Edition Server (Husky) Board";
|
||||
compatible = "amd,seattle-overdrive", "amd,seattle";
|
||||
|
||||
chosen {
|
||||
stdout-path = &serial0;
|
||||
};
|
||||
|
||||
psci {
|
||||
compatible = "arm,psci-0.2";
|
||||
method = "smc";
|
||||
};
|
||||
};
|
||||
|
||||
&ccp0 {
|
||||
status = "ok";
|
||||
amd,zlib-support = <1>;
|
||||
};
|
||||
|
||||
/**
|
||||
* NOTE: In Rev.B, gpio0 is reserved.
|
||||
*/
|
||||
&gpio1 {
|
||||
status = "ok";
|
||||
};
|
||||
|
||||
&gpio2 {
|
||||
status = "ok";
|
||||
};
|
||||
|
||||
&gpio3 {
|
||||
status = "ok";
|
||||
};
|
||||
|
||||
&gpio4 {
|
||||
status = "ok";
|
||||
};
|
||||
|
||||
&i2c0 {
|
||||
status = "ok";
|
||||
};
|
||||
|
||||
&i2c1 {
|
||||
status = "ok";
|
||||
};
|
||||
|
||||
&pcie0 {
|
||||
status = "ok";
|
||||
};
|
||||
|
||||
&spi0 {
|
||||
status = "ok";
|
||||
};
|
||||
|
||||
&spi1 {
|
||||
status = "ok";
|
||||
sdcard0: sdcard@0 {
|
||||
compatible = "mmc-spi-slot";
|
||||
reg = <0>;
|
||||
spi-max-frequency = <20000000>;
|
||||
voltage-ranges = <3200 3400>;
|
||||
pl022,hierarchy = <0>;
|
||||
pl022,interface = <0>;
|
||||
pl022,com-mode = <0x0>;
|
||||
pl022,rx-level-trig = <0>;
|
||||
pl022,tx-level-trig = <0>;
|
||||
};
|
||||
};
|
||||
|
||||
&smb0 {
|
||||
/include/ "amd-seattle-xgbe-b.dtsi"
|
||||
};
|
||||
@@ -536,9 +536,9 @@
|
||||
clock-names = "i2c";
|
||||
clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
|
||||
QORIQ_CLK_PLL_DIV(1)>;
|
||||
dmas = <&edma0 1 39>,
|
||||
<&edma0 1 38>;
|
||||
dma-names = "tx", "rx";
|
||||
dmas = <&edma0 1 38>,
|
||||
<&edma0 1 39>;
|
||||
dma-names = "rx", "tx";
|
||||
status = "disabled";
|
||||
};
|
||||
|
||||
|
||||
@@ -499,9 +499,9 @@
|
||||
interrupts = <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>;
|
||||
clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
|
||||
QORIQ_CLK_PLL_DIV(2)>;
|
||||
dmas = <&edma0 1 39>,
|
||||
<&edma0 1 38>;
|
||||
dma-names = "tx", "rx";
|
||||
dmas = <&edma0 1 38>,
|
||||
<&edma0 1 39>;
|
||||
dma-names = "rx", "tx";
|
||||
status = "disabled";
|
||||
};
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ config RISCV
|
||||
select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
|
||||
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
|
||||
select ARCH_HAS_BINFMT_FLAT
|
||||
select ARCH_HAS_CURRENT_STACK_POINTER
|
||||
select ARCH_HAS_DEBUG_VM_PGTABLE
|
||||
select ARCH_HAS_DEBUG_VIRTUAL if MMU
|
||||
select ARCH_HAS_DEBUG_WX
|
||||
@@ -47,6 +48,7 @@ config RISCV
|
||||
select CLONE_BACKWARDS
|
||||
select CLINT_TIMER if !MMU
|
||||
select COMMON_CLK
|
||||
select CPU_PM if CPU_IDLE
|
||||
select EDAC_SUPPORT
|
||||
select GENERIC_ARCH_TOPOLOGY if SMP
|
||||
select GENERIC_ATOMIC64 if !64BIT
|
||||
@@ -533,4 +535,10 @@ source "kernel/power/Kconfig"
|
||||
|
||||
endmenu
|
||||
|
||||
menu "CPU Power Management"
|
||||
|
||||
source "drivers/cpuidle/Kconfig"
|
||||
|
||||
endmenu
|
||||
|
||||
source "arch/riscv/kvm/Kconfig"
|
||||
|
||||
@@ -36,6 +36,9 @@ config SOC_VIRT
|
||||
select GOLDFISH
|
||||
select RTC_DRV_GOLDFISH if RTC_CLASS
|
||||
select SIFIVE_PLIC
|
||||
select PM_GENERIC_DOMAINS if PM
|
||||
select PM_GENERIC_DOMAINS_OF if PM && OF
|
||||
select RISCV_SBI_CPUIDLE if CPU_IDLE
|
||||
help
|
||||
This enables support for QEMU Virt Machine.
|
||||
|
||||
|
||||
@@ -203,6 +203,8 @@
|
||||
compatible = "jedec,spi-nor";
|
||||
reg = <0>;
|
||||
spi-max-frequency = <50000000>;
|
||||
spi-tx-bus-width = <4>;
|
||||
spi-rx-bus-width = <4>;
|
||||
m25p,fast-read;
|
||||
broken-flash-reset;
|
||||
};
|
||||
|
||||
@@ -205,6 +205,8 @@
|
||||
compatible = "jedec,spi-nor";
|
||||
reg = <0>;
|
||||
spi-max-frequency = <50000000>;
|
||||
spi-tx-bus-width = <4>;
|
||||
spi-rx-bus-width = <4>;
|
||||
m25p,fast-read;
|
||||
broken-flash-reset;
|
||||
};
|
||||
|
||||
@@ -213,6 +213,8 @@
|
||||
compatible = "jedec,spi-nor";
|
||||
reg = <0>;
|
||||
spi-max-frequency = <50000000>;
|
||||
spi-tx-bus-width = <4>;
|
||||
spi-rx-bus-width = <4>;
|
||||
m25p,fast-read;
|
||||
broken-flash-reset;
|
||||
};
|
||||
|
||||
@@ -178,6 +178,8 @@
|
||||
compatible = "jedec,spi-nor";
|
||||
reg = <0>;
|
||||
spi-max-frequency = <50000000>;
|
||||
spi-tx-bus-width = <4>;
|
||||
spi-rx-bus-width = <4>;
|
||||
m25p,fast-read;
|
||||
broken-flash-reset;
|
||||
};
|
||||
|
||||
@@ -15,11 +15,14 @@ CONFIG_CHECKPOINT_RESTORE=y
|
||||
CONFIG_BLK_DEV_INITRD=y
|
||||
CONFIG_EXPERT=y
|
||||
# CONFIG_SYSFS_SYSCALL is not set
|
||||
CONFIG_PROFILING=y
|
||||
CONFIG_SOC_MICROCHIP_POLARFIRE=y
|
||||
CONFIG_SOC_SIFIVE=y
|
||||
CONFIG_SOC_VIRT=y
|
||||
CONFIG_SMP=y
|
||||
CONFIG_HOTPLUG_CPU=y
|
||||
CONFIG_PM=y
|
||||
CONFIG_CPU_IDLE=y
|
||||
CONFIG_VIRTUALIZATION=y
|
||||
CONFIG_KVM=m
|
||||
CONFIG_JUMP_LABEL=y
|
||||
@@ -64,8 +67,6 @@ CONFIG_INPUT_MOUSEDEV=y
|
||||
CONFIG_SERIAL_8250=y
|
||||
CONFIG_SERIAL_8250_CONSOLE=y
|
||||
CONFIG_SERIAL_OF_PLATFORM=y
|
||||
CONFIG_SERIAL_EARLYCON_RISCV_SBI=y
|
||||
CONFIG_HVC_RISCV_SBI=y
|
||||
CONFIG_VIRTIO_CONSOLE=y
|
||||
CONFIG_HW_RANDOM=y
|
||||
CONFIG_HW_RANDOM_VIRTIO=y
|
||||
|
||||
@@ -21,7 +21,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
|
||||
# CONFIG_AIO is not set
|
||||
# CONFIG_IO_URING is not set
|
||||
# CONFIG_ADVISE_SYSCALLS is not set
|
||||
# CONFIG_MEMBARRIER is not set
|
||||
# CONFIG_KALLSYMS is not set
|
||||
CONFIG_EMBEDDED=y
|
||||
# CONFIG_VM_EVENT_COUNTERS is not set
|
||||
|
||||
@@ -13,7 +13,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
|
||||
# CONFIG_AIO is not set
|
||||
# CONFIG_IO_URING is not set
|
||||
# CONFIG_ADVISE_SYSCALLS is not set
|
||||
# CONFIG_MEMBARRIER is not set
|
||||
# CONFIG_KALLSYMS is not set
|
||||
CONFIG_EMBEDDED=y
|
||||
# CONFIG_VM_EVENT_COUNTERS is not set
|
||||
|
||||
@@ -19,7 +19,6 @@ CONFIG_EXPERT=y
|
||||
# CONFIG_AIO is not set
|
||||
# CONFIG_IO_URING is not set
|
||||
# CONFIG_ADVISE_SYSCALLS is not set
|
||||
# CONFIG_MEMBARRIER is not set
|
||||
# CONFIG_KALLSYMS is not set
|
||||
# CONFIG_VM_EVENT_COUNTERS is not set
|
||||
# CONFIG_COMPAT_BRK is not set
|
||||
|
||||
@@ -15,11 +15,14 @@ CONFIG_CHECKPOINT_RESTORE=y
|
||||
CONFIG_BLK_DEV_INITRD=y
|
||||
CONFIG_EXPERT=y
|
||||
# CONFIG_SYSFS_SYSCALL is not set
|
||||
CONFIG_PROFILING=y
|
||||
CONFIG_SOC_SIFIVE=y
|
||||
CONFIG_SOC_VIRT=y
|
||||
CONFIG_ARCH_RV32I=y
|
||||
CONFIG_SMP=y
|
||||
CONFIG_HOTPLUG_CPU=y
|
||||
CONFIG_PM=y
|
||||
CONFIG_CPU_IDLE=y
|
||||
CONFIG_VIRTUALIZATION=y
|
||||
CONFIG_KVM=m
|
||||
CONFIG_JUMP_LABEL=y
|
||||
@@ -62,8 +65,6 @@ CONFIG_INPUT_MOUSEDEV=y
|
||||
CONFIG_SERIAL_8250=y
|
||||
CONFIG_SERIAL_8250_CONSOLE=y
|
||||
CONFIG_SERIAL_OF_PLATFORM=y
|
||||
CONFIG_SERIAL_EARLYCON_RISCV_SBI=y
|
||||
CONFIG_HVC_RISCV_SBI=y
|
||||
CONFIG_VIRTIO_CONSOLE=y
|
||||
CONFIG_HW_RANDOM=y
|
||||
CONFIG_HW_RANDOM_VIRTIO=y
|
||||
|
||||
@@ -67,4 +67,30 @@
|
||||
#error "Unexpected __SIZEOF_SHORT__"
|
||||
#endif
|
||||
|
||||
#ifdef __ASSEMBLY__
|
||||
|
||||
/* Common assembly source macros */
|
||||
|
||||
#ifdef CONFIG_XIP_KERNEL
|
||||
.macro XIP_FIXUP_OFFSET reg
|
||||
REG_L t0, _xip_fixup
|
||||
add \reg, \reg, t0
|
||||
.endm
|
||||
.macro XIP_FIXUP_FLASH_OFFSET reg
|
||||
la t1, __data_loc
|
||||
REG_L t1, _xip_phys_offset
|
||||
sub \reg, \reg, t1
|
||||
add \reg, \reg, t0
|
||||
.endm
|
||||
_xip_fixup: .dword CONFIG_PHYS_RAM_BASE - CONFIG_XIP_PHYS_ADDR - XIP_OFFSET
|
||||
_xip_phys_offset: .dword CONFIG_XIP_PHYS_ADDR + XIP_OFFSET
|
||||
#else
|
||||
.macro XIP_FIXUP_OFFSET reg
|
||||
.endm
|
||||
.macro XIP_FIXUP_FLASH_OFFSET reg
|
||||
.endm
|
||||
#endif /* CONFIG_XIP_KERNEL */
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif /* _ASM_RISCV_ASM_H */
|
||||
|
||||
24
arch/riscv/include/asm/cpuidle.h
Normal file
24
arch/riscv/include/asm/cpuidle.h
Normal file
@@ -0,0 +1,24 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2021 Allwinner Ltd
|
||||
* Copyright (C) 2021 Western Digital Corporation or its affiliates.
|
||||
*/
|
||||
|
||||
#ifndef _ASM_RISCV_CPUIDLE_H
|
||||
#define _ASM_RISCV_CPUIDLE_H
|
||||
|
||||
#include <asm/barrier.h>
|
||||
#include <asm/processor.h>
|
||||
|
||||
static inline void cpu_do_idle(void)
|
||||
{
|
||||
/*
|
||||
* Add mb() here to ensure that all
|
||||
* IO/MEM accesses are completed prior
|
||||
* to entering WFI.
|
||||
*/
|
||||
mb();
|
||||
wait_for_interrupt();
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -33,6 +33,8 @@ static __always_inline struct task_struct *get_current(void)
|
||||
|
||||
#define current get_current()
|
||||
|
||||
register unsigned long current_stack_pointer __asm__("sp");
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif /* _ASM_RISCV_CURRENT_H */
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
/* Copyright (C) 2017 Andes Technology Corporation */
|
||||
#ifdef CONFIG_MODULE_SECTIONS
|
||||
SECTIONS {
|
||||
.plt (NOLOAD) : { BYTE(0) }
|
||||
.got (NOLOAD) : { BYTE(0) }
|
||||
.got.plt (NOLOAD) : { BYTE(0) }
|
||||
.plt : { BYTE(0) }
|
||||
.got : { BYTE(0) }
|
||||
.got.plt : { BYTE(0) }
|
||||
}
|
||||
#endif
|
||||
|
||||
36
arch/riscv/include/asm/suspend.h
Normal file
36
arch/riscv/include/asm/suspend.h
Normal file
@@ -0,0 +1,36 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2021 Western Digital Corporation or its affiliates.
|
||||
* Copyright (c) 2022 Ventana Micro Systems Inc.
|
||||
*/
|
||||
|
||||
#ifndef _ASM_RISCV_SUSPEND_H
|
||||
#define _ASM_RISCV_SUSPEND_H
|
||||
|
||||
#include <asm/ptrace.h>
|
||||
|
||||
struct suspend_context {
|
||||
/* Saved and restored by low-level functions */
|
||||
struct pt_regs regs;
|
||||
/* Saved and restored by high-level functions */
|
||||
unsigned long scratch;
|
||||
unsigned long tvec;
|
||||
unsigned long ie;
|
||||
#ifdef CONFIG_MMU
|
||||
unsigned long satp;
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Low-level CPU suspend entry function */
|
||||
int __cpu_suspend_enter(struct suspend_context *context);
|
||||
|
||||
/* High-level CPU suspend which will save context and call finish() */
|
||||
int cpu_suspend(unsigned long arg,
|
||||
int (*finish)(unsigned long arg,
|
||||
unsigned long entry,
|
||||
unsigned long context));
|
||||
|
||||
/* Low-level CPU resume entry function */
|
||||
int __cpu_resume_enter(unsigned long hartid, unsigned long context);
|
||||
|
||||
#endif
|
||||
@@ -11,11 +11,17 @@
|
||||
#include <asm/page.h>
|
||||
#include <linux/const.h>
|
||||
|
||||
#ifdef CONFIG_KASAN
|
||||
#define KASAN_STACK_ORDER 1
|
||||
#else
|
||||
#define KASAN_STACK_ORDER 0
|
||||
#endif
|
||||
|
||||
/* thread information allocation */
|
||||
#ifdef CONFIG_64BIT
|
||||
#define THREAD_SIZE_ORDER (2)
|
||||
#define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER)
|
||||
#else
|
||||
#define THREAD_SIZE_ORDER (1)
|
||||
#define THREAD_SIZE_ORDER (1 + KASAN_STACK_ORDER)
|
||||
#endif
|
||||
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
|
||||
|
||||
|
||||
@@ -48,6 +48,8 @@ obj-$(CONFIG_RISCV_BOOT_SPINWAIT) += cpu_ops_spinwait.o
|
||||
obj-$(CONFIG_MODULES) += module.o
|
||||
obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o
|
||||
|
||||
obj-$(CONFIG_CPU_PM) += suspend_entry.o suspend.o
|
||||
|
||||
obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o
|
||||
obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include <asm/thread_info.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/cpu_ops_sbi.h>
|
||||
#include <asm/suspend.h>
|
||||
|
||||
void asm_offsets(void);
|
||||
|
||||
@@ -113,6 +114,8 @@ void asm_offsets(void)
|
||||
OFFSET(PT_BADADDR, pt_regs, badaddr);
|
||||
OFFSET(PT_CAUSE, pt_regs, cause);
|
||||
|
||||
OFFSET(SUSPEND_CONTEXT_REGS, suspend_context, regs);
|
||||
|
||||
OFFSET(KVM_ARCH_GUEST_ZERO, kvm_vcpu_arch, guest_context.zero);
|
||||
OFFSET(KVM_ARCH_GUEST_RA, kvm_vcpu_arch, guest_context.ra);
|
||||
OFFSET(KVM_ARCH_GUEST_SP, kvm_vcpu_arch, guest_context.sp);
|
||||
|
||||
@@ -69,11 +69,11 @@ int riscv_of_parent_hartid(struct device_node *node)
|
||||
.uprop = #UPROP, \
|
||||
.isa_ext_id = EXTID, \
|
||||
}
|
||||
/**
|
||||
/*
|
||||
* Here are the ordering rules of extension naming defined by RISC-V
|
||||
* specification :
|
||||
* 1. All extensions should be separated from other multi-letter extensions
|
||||
* from other multi-letter extensions by an underscore.
|
||||
* by an underscore.
|
||||
* 2. The first letter following the 'Z' conventionally indicates the most
|
||||
* closely related alphabetical extension category, IMAFDQLCBKJTPVH.
|
||||
* If multiple 'Z' extensions are named, they should be ordered first
|
||||
@@ -110,7 +110,7 @@ static void print_isa_ext(struct seq_file *f)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* These are the only valid base (single letter) ISA extensions as per the spec.
|
||||
* It also specifies the canonical order in which it appears in the spec.
|
||||
* Some of the extension may just be a place holder for now (B, K, P, J).
|
||||
|
||||
@@ -21,7 +21,7 @@ const struct cpu_operations cpu_ops_sbi;
|
||||
* be invoked from multiple threads in parallel. Define a per cpu data
|
||||
* to handle that.
|
||||
*/
|
||||
DEFINE_PER_CPU(struct sbi_hart_boot_data, boot_data);
|
||||
static DEFINE_PER_CPU(struct sbi_hart_boot_data, boot_data);
|
||||
|
||||
static int sbi_hsm_hart_start(unsigned long hartid, unsigned long saddr,
|
||||
unsigned long priv)
|
||||
|
||||
@@ -16,26 +16,6 @@
|
||||
#include <asm/image.h>
|
||||
#include "efi-header.S"
|
||||
|
||||
#ifdef CONFIG_XIP_KERNEL
|
||||
.macro XIP_FIXUP_OFFSET reg
|
||||
REG_L t0, _xip_fixup
|
||||
add \reg, \reg, t0
|
||||
.endm
|
||||
.macro XIP_FIXUP_FLASH_OFFSET reg
|
||||
la t0, __data_loc
|
||||
REG_L t1, _xip_phys_offset
|
||||
sub \reg, \reg, t1
|
||||
add \reg, \reg, t0
|
||||
.endm
|
||||
_xip_fixup: .dword CONFIG_PHYS_RAM_BASE - CONFIG_XIP_PHYS_ADDR - XIP_OFFSET
|
||||
_xip_phys_offset: .dword CONFIG_XIP_PHYS_ADDR + XIP_OFFSET
|
||||
#else
|
||||
.macro XIP_FIXUP_OFFSET reg
|
||||
.endm
|
||||
.macro XIP_FIXUP_FLASH_OFFSET reg
|
||||
.endm
|
||||
#endif /* CONFIG_XIP_KERNEL */
|
||||
|
||||
__HEAD
|
||||
ENTRY(_start)
|
||||
/*
|
||||
@@ -89,7 +69,8 @@ pe_head_start:
|
||||
|
||||
.align 2
|
||||
#ifdef CONFIG_MMU
|
||||
relocate:
|
||||
.global relocate_enable_mmu
|
||||
relocate_enable_mmu:
|
||||
/* Relocate return address */
|
||||
la a1, kernel_map
|
||||
XIP_FIXUP_OFFSET a1
|
||||
@@ -184,7 +165,7 @@ secondary_start_sbi:
|
||||
/* Enable virtual memory and relocate to virtual address */
|
||||
la a0, swapper_pg_dir
|
||||
XIP_FIXUP_OFFSET a0
|
||||
call relocate
|
||||
call relocate_enable_mmu
|
||||
#endif
|
||||
call setup_trap_vector
|
||||
tail smp_callin
|
||||
@@ -328,7 +309,7 @@ clear_bss_done:
|
||||
#ifdef CONFIG_MMU
|
||||
la a0, early_pg_dir
|
||||
XIP_FIXUP_OFFSET a0
|
||||
call relocate
|
||||
call relocate_enable_mmu
|
||||
#endif /* CONFIG_MMU */
|
||||
|
||||
call setup_trap_vector
|
||||
|
||||
@@ -69,7 +69,7 @@ static int apply_r_riscv_jal_rela(struct module *me, u32 *location,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int apply_r_riscv_rcv_branch_rela(struct module *me, u32 *location,
|
||||
static int apply_r_riscv_rvc_branch_rela(struct module *me, u32 *location,
|
||||
Elf_Addr v)
|
||||
{
|
||||
ptrdiff_t offset = (void *)v - (void *)location;
|
||||
@@ -301,7 +301,7 @@ static int (*reloc_handlers_rela[]) (struct module *me, u32 *location,
|
||||
[R_RISCV_64] = apply_r_riscv_64_rela,
|
||||
[R_RISCV_BRANCH] = apply_r_riscv_branch_rela,
|
||||
[R_RISCV_JAL] = apply_r_riscv_jal_rela,
|
||||
[R_RISCV_RVC_BRANCH] = apply_r_riscv_rcv_branch_rela,
|
||||
[R_RISCV_RVC_BRANCH] = apply_r_riscv_rvc_branch_rela,
|
||||
[R_RISCV_RVC_JUMP] = apply_r_riscv_rvc_jump_rela,
|
||||
[R_RISCV_PCREL_HI20] = apply_r_riscv_pcrel_hi20_rela,
|
||||
[R_RISCV_PCREL_LO12_I] = apply_r_riscv_pcrel_lo12_i_rela,
|
||||
|
||||
@@ -68,7 +68,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
|
||||
|
||||
static bool fill_callchain(void *entry, unsigned long pc)
|
||||
{
|
||||
return perf_callchain_store(entry, pc);
|
||||
return perf_callchain_store(entry, pc) == 0;
|
||||
}
|
||||
|
||||
void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
#include <asm/string.h>
|
||||
#include <asm/switch_to.h>
|
||||
#include <asm/thread_info.h>
|
||||
#include <asm/cpuidle.h>
|
||||
|
||||
register unsigned long gp_in_global __asm__("gp");
|
||||
|
||||
@@ -37,7 +38,7 @@ extern asmlinkage void ret_from_kernel_thread(void);
|
||||
|
||||
void arch_cpu_idle(void)
|
||||
{
|
||||
wait_for_interrupt();
|
||||
cpu_do_idle();
|
||||
raw_local_irq_enable();
|
||||
}
|
||||
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
|
||||
#include <asm/stacktrace.h>
|
||||
|
||||
register unsigned long sp_in_global __asm__("sp");
|
||||
|
||||
#ifdef CONFIG_FRAME_POINTER
|
||||
|
||||
void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
|
||||
@@ -30,7 +28,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
|
||||
pc = instruction_pointer(regs);
|
||||
} else if (task == NULL || task == current) {
|
||||
fp = (unsigned long)__builtin_frame_address(0);
|
||||
sp = sp_in_global;
|
||||
sp = current_stack_pointer;
|
||||
pc = (unsigned long)walk_stackframe;
|
||||
} else {
|
||||
/* task blocked in __switch_to */
|
||||
@@ -78,7 +76,7 @@ void notrace walk_stackframe(struct task_struct *task,
|
||||
sp = user_stack_pointer(regs);
|
||||
pc = instruction_pointer(regs);
|
||||
} else if (task == NULL || task == current) {
|
||||
sp = sp_in_global;
|
||||
sp = current_stack_pointer;
|
||||
pc = (unsigned long)walk_stackframe;
|
||||
} else {
|
||||
/* task blocked in __switch_to */
|
||||
|
||||
87
arch/riscv/kernel/suspend.c
Normal file
87
arch/riscv/kernel/suspend.c
Normal file
@@ -0,0 +1,87 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2021 Western Digital Corporation or its affiliates.
|
||||
* Copyright (c) 2022 Ventana Micro Systems Inc.
|
||||
*/
|
||||
|
||||
#include <linux/ftrace.h>
|
||||
#include <asm/csr.h>
|
||||
#include <asm/suspend.h>
|
||||
|
||||
static void suspend_save_csrs(struct suspend_context *context)
|
||||
{
|
||||
context->scratch = csr_read(CSR_SCRATCH);
|
||||
context->tvec = csr_read(CSR_TVEC);
|
||||
context->ie = csr_read(CSR_IE);
|
||||
|
||||
/*
|
||||
* No need to save/restore IP CSR (i.e. MIP or SIP) because:
|
||||
*
|
||||
* 1. For no-MMU (M-mode) kernel, the bits in MIP are set by
|
||||
* external devices (such as interrupt controller, timer, etc).
|
||||
* 2. For MMU (S-mode) kernel, the bits in SIP are set by
|
||||
* M-mode firmware and external devices (such as interrupt
|
||||
* controller, etc).
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
context->satp = csr_read(CSR_SATP);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void suspend_restore_csrs(struct suspend_context *context)
|
||||
{
|
||||
csr_write(CSR_SCRATCH, context->scratch);
|
||||
csr_write(CSR_TVEC, context->tvec);
|
||||
csr_write(CSR_IE, context->ie);
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
csr_write(CSR_SATP, context->satp);
|
||||
#endif
|
||||
}
|
||||
|
||||
int cpu_suspend(unsigned long arg,
|
||||
int (*finish)(unsigned long arg,
|
||||
unsigned long entry,
|
||||
unsigned long context))
|
||||
{
|
||||
int rc = 0;
|
||||
struct suspend_context context = { 0 };
|
||||
|
||||
/* Finisher should be non-NULL */
|
||||
if (!finish)
|
||||
return -EINVAL;
|
||||
|
||||
/* Save additional CSRs*/
|
||||
suspend_save_csrs(&context);
|
||||
|
||||
/*
|
||||
* Function graph tracer state gets incosistent when the kernel
|
||||
* calls functions that never return (aka finishers) hence disable
|
||||
* graph tracing during their execution.
|
||||
*/
|
||||
pause_graph_tracing();
|
||||
|
||||
/* Save context on stack */
|
||||
if (__cpu_suspend_enter(&context)) {
|
||||
/* Call the finisher */
|
||||
rc = finish(arg, __pa_symbol(__cpu_resume_enter),
|
||||
(ulong)&context);
|
||||
|
||||
/*
|
||||
* Should never reach here, unless the suspend finisher
|
||||
* fails. Successful cpu_suspend() should return from
|
||||
* __cpu_resume_entry()
|
||||
*/
|
||||
if (!rc)
|
||||
rc = -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/* Enable function graph tracer */
|
||||
unpause_graph_tracing();
|
||||
|
||||
/* Restore additional CSRs */
|
||||
suspend_restore_csrs(&context);
|
||||
|
||||
return rc;
|
||||
}
|
||||
124
arch/riscv/kernel/suspend_entry.S
Normal file
124
arch/riscv/kernel/suspend_entry.S
Normal file
@@ -0,0 +1,124 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2021 Western Digital Corporation or its affiliates.
|
||||
* Copyright (c) 2022 Ventana Micro Systems Inc.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/asm.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/csr.h>
|
||||
|
||||
.text
|
||||
.altmacro
|
||||
.option norelax
|
||||
|
||||
ENTRY(__cpu_suspend_enter)
|
||||
/* Save registers (except A0 and T0-T6) */
|
||||
REG_S ra, (SUSPEND_CONTEXT_REGS + PT_RA)(a0)
|
||||
REG_S sp, (SUSPEND_CONTEXT_REGS + PT_SP)(a0)
|
||||
REG_S gp, (SUSPEND_CONTEXT_REGS + PT_GP)(a0)
|
||||
REG_S tp, (SUSPEND_CONTEXT_REGS + PT_TP)(a0)
|
||||
REG_S s0, (SUSPEND_CONTEXT_REGS + PT_S0)(a0)
|
||||
REG_S s1, (SUSPEND_CONTEXT_REGS + PT_S1)(a0)
|
||||
REG_S a1, (SUSPEND_CONTEXT_REGS + PT_A1)(a0)
|
||||
REG_S a2, (SUSPEND_CONTEXT_REGS + PT_A2)(a0)
|
||||
REG_S a3, (SUSPEND_CONTEXT_REGS + PT_A3)(a0)
|
||||
REG_S a4, (SUSPEND_CONTEXT_REGS + PT_A4)(a0)
|
||||
REG_S a5, (SUSPEND_CONTEXT_REGS + PT_A5)(a0)
|
||||
REG_S a6, (SUSPEND_CONTEXT_REGS + PT_A6)(a0)
|
||||
REG_S a7, (SUSPEND_CONTEXT_REGS + PT_A7)(a0)
|
||||
REG_S s2, (SUSPEND_CONTEXT_REGS + PT_S2)(a0)
|
||||
REG_S s3, (SUSPEND_CONTEXT_REGS + PT_S3)(a0)
|
||||
REG_S s4, (SUSPEND_CONTEXT_REGS + PT_S4)(a0)
|
||||
REG_S s5, (SUSPEND_CONTEXT_REGS + PT_S5)(a0)
|
||||
REG_S s6, (SUSPEND_CONTEXT_REGS + PT_S6)(a0)
|
||||
REG_S s7, (SUSPEND_CONTEXT_REGS + PT_S7)(a0)
|
||||
REG_S s8, (SUSPEND_CONTEXT_REGS + PT_S8)(a0)
|
||||
REG_S s9, (SUSPEND_CONTEXT_REGS + PT_S9)(a0)
|
||||
REG_S s10, (SUSPEND_CONTEXT_REGS + PT_S10)(a0)
|
||||
REG_S s11, (SUSPEND_CONTEXT_REGS + PT_S11)(a0)
|
||||
|
||||
/* Save CSRs */
|
||||
csrr t0, CSR_EPC
|
||||
REG_S t0, (SUSPEND_CONTEXT_REGS + PT_EPC)(a0)
|
||||
csrr t0, CSR_STATUS
|
||||
REG_S t0, (SUSPEND_CONTEXT_REGS + PT_STATUS)(a0)
|
||||
csrr t0, CSR_TVAL
|
||||
REG_S t0, (SUSPEND_CONTEXT_REGS + PT_BADADDR)(a0)
|
||||
csrr t0, CSR_CAUSE
|
||||
REG_S t0, (SUSPEND_CONTEXT_REGS + PT_CAUSE)(a0)
|
||||
|
||||
/* Return non-zero value */
|
||||
li a0, 1
|
||||
|
||||
/* Return to C code */
|
||||
ret
|
||||
END(__cpu_suspend_enter)
|
||||
|
||||
ENTRY(__cpu_resume_enter)
|
||||
/* Load the global pointer */
|
||||
.option push
|
||||
.option norelax
|
||||
la gp, __global_pointer$
|
||||
.option pop
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
/* Save A0 and A1 */
|
||||
add t0, a0, zero
|
||||
add t1, a1, zero
|
||||
|
||||
/* Enable MMU */
|
||||
la a0, swapper_pg_dir
|
||||
XIP_FIXUP_OFFSET a0
|
||||
call relocate_enable_mmu
|
||||
|
||||
/* Restore A0 and A1 */
|
||||
add a0, t0, zero
|
||||
add a1, t1, zero
|
||||
#endif
|
||||
|
||||
/* Make A0 point to suspend context */
|
||||
add a0, a1, zero
|
||||
|
||||
/* Restore CSRs */
|
||||
REG_L t0, (SUSPEND_CONTEXT_REGS + PT_EPC)(a0)
|
||||
csrw CSR_EPC, t0
|
||||
REG_L t0, (SUSPEND_CONTEXT_REGS + PT_STATUS)(a0)
|
||||
csrw CSR_STATUS, t0
|
||||
REG_L t0, (SUSPEND_CONTEXT_REGS + PT_BADADDR)(a0)
|
||||
csrw CSR_TVAL, t0
|
||||
REG_L t0, (SUSPEND_CONTEXT_REGS + PT_CAUSE)(a0)
|
||||
csrw CSR_CAUSE, t0
|
||||
|
||||
/* Restore registers (except A0 and T0-T6) */
|
||||
REG_L ra, (SUSPEND_CONTEXT_REGS + PT_RA)(a0)
|
||||
REG_L sp, (SUSPEND_CONTEXT_REGS + PT_SP)(a0)
|
||||
REG_L gp, (SUSPEND_CONTEXT_REGS + PT_GP)(a0)
|
||||
REG_L tp, (SUSPEND_CONTEXT_REGS + PT_TP)(a0)
|
||||
REG_L s0, (SUSPEND_CONTEXT_REGS + PT_S0)(a0)
|
||||
REG_L s1, (SUSPEND_CONTEXT_REGS + PT_S1)(a0)
|
||||
REG_L a1, (SUSPEND_CONTEXT_REGS + PT_A1)(a0)
|
||||
REG_L a2, (SUSPEND_CONTEXT_REGS + PT_A2)(a0)
|
||||
REG_L a3, (SUSPEND_CONTEXT_REGS + PT_A3)(a0)
|
||||
REG_L a4, (SUSPEND_CONTEXT_REGS + PT_A4)(a0)
|
||||
REG_L a5, (SUSPEND_CONTEXT_REGS + PT_A5)(a0)
|
||||
REG_L a6, (SUSPEND_CONTEXT_REGS + PT_A6)(a0)
|
||||
REG_L a7, (SUSPEND_CONTEXT_REGS + PT_A7)(a0)
|
||||
REG_L s2, (SUSPEND_CONTEXT_REGS + PT_S2)(a0)
|
||||
REG_L s3, (SUSPEND_CONTEXT_REGS + PT_S3)(a0)
|
||||
REG_L s4, (SUSPEND_CONTEXT_REGS + PT_S4)(a0)
|
||||
REG_L s5, (SUSPEND_CONTEXT_REGS + PT_S5)(a0)
|
||||
REG_L s6, (SUSPEND_CONTEXT_REGS + PT_S6)(a0)
|
||||
REG_L s7, (SUSPEND_CONTEXT_REGS + PT_S7)(a0)
|
||||
REG_L s8, (SUSPEND_CONTEXT_REGS + PT_S8)(a0)
|
||||
REG_L s9, (SUSPEND_CONTEXT_REGS + PT_S9)(a0)
|
||||
REG_L s10, (SUSPEND_CONTEXT_REGS + PT_S10)(a0)
|
||||
REG_L s11, (SUSPEND_CONTEXT_REGS + PT_S11)(a0)
|
||||
|
||||
/* Return zero value */
|
||||
add a0, zero, zero
|
||||
|
||||
/* Return to C code */
|
||||
ret
|
||||
END(__cpu_resume_enter)
|
||||
@@ -58,6 +58,7 @@ config S390
|
||||
select ALTERNATE_USER_ADDRESS_SPACE
|
||||
select ARCH_32BIT_USTAT_F_TINODE
|
||||
select ARCH_BINFMT_ELF_STATE
|
||||
select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE
|
||||
select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM
|
||||
select ARCH_ENABLE_MEMORY_HOTREMOVE
|
||||
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
|
||||
|
||||
@@ -37,9 +37,15 @@
|
||||
* a 2-byte nop if the size of the area is not divisible by 6.
|
||||
*/
|
||||
.macro alt_pad_fill bytes
|
||||
.fill ( \bytes ) / 6, 6, 0xc0040000
|
||||
.fill ( \bytes ) % 6 / 4, 4, 0x47000000
|
||||
.fill ( \bytes ) % 6 % 4 / 2, 2, 0x0700
|
||||
.rept ( \bytes ) / 6
|
||||
brcl 0,0
|
||||
.endr
|
||||
.rept ( \bytes ) % 6 / 4
|
||||
nop
|
||||
.endr
|
||||
.rept ( \bytes ) % 6 % 4 / 2
|
||||
nopr
|
||||
.endr
|
||||
.endm
|
||||
|
||||
/*
|
||||
|
||||
@@ -71,11 +71,18 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
|
||||
".if " oldinstr_pad_len(num) " > 6\n" \
|
||||
"\tjg " e_oldinstr_pad_end "f\n" \
|
||||
"6620:\n" \
|
||||
"\t.fill (" oldinstr_pad_len(num) " - (6620b-662b)) / 2, 2, 0x0700\n" \
|
||||
"\t.rept (" oldinstr_pad_len(num) " - (6620b-662b)) / 2\n" \
|
||||
"\tnopr\n" \
|
||||
".else\n" \
|
||||
"\t.fill " oldinstr_pad_len(num) " / 6, 6, 0xc0040000\n" \
|
||||
"\t.fill " oldinstr_pad_len(num) " %% 6 / 4, 4, 0x47000000\n" \
|
||||
"\t.fill " oldinstr_pad_len(num) " %% 6 %% 4 / 2, 2, 0x0700\n" \
|
||||
"\t.rept " oldinstr_pad_len(num) " / 6\n" \
|
||||
"\t.brcl 0,0\n" \
|
||||
"\t.endr\n" \
|
||||
"\t.rept " oldinstr_pad_len(num) " %% 6 / 4\n" \
|
||||
"\tnop\n" \
|
||||
"\t.endr\n" \
|
||||
"\t.rept " oldinstr_pad_len(num) " %% 6 %% 4 / 2\n" \
|
||||
"\tnopr\n" \
|
||||
".endr\n" \
|
||||
".endif\n"
|
||||
|
||||
#define OLDINSTR(oldinstr, num) \
|
||||
|
||||
@@ -60,11 +60,11 @@ static inline bool ap_instructions_available(void)
|
||||
unsigned long reg1 = 0;
|
||||
|
||||
asm volatile(
|
||||
" lgr 0,%[reg0]\n" /* qid into gr0 */
|
||||
" lghi 1,0\n" /* 0 into gr1 */
|
||||
" lghi 2,0\n" /* 0 into gr2 */
|
||||
" .long 0xb2af0000\n" /* PQAP(TAPQ) */
|
||||
"0: la %[reg1],1\n" /* 1 into reg1 */
|
||||
" lgr 0,%[reg0]\n" /* qid into gr0 */
|
||||
" lghi 1,0\n" /* 0 into gr1 */
|
||||
" lghi 2,0\n" /* 0 into gr2 */
|
||||
" .insn rre,0xb2af0000,0,0\n" /* PQAP(TAPQ) */
|
||||
"0: la %[reg1],1\n" /* 1 into reg1 */
|
||||
"1:\n"
|
||||
EX_TABLE(0b, 1b)
|
||||
: [reg1] "+&d" (reg1)
|
||||
@@ -86,11 +86,11 @@ static inline struct ap_queue_status ap_tapq(ap_qid_t qid, unsigned long *info)
|
||||
unsigned long reg2;
|
||||
|
||||
asm volatile(
|
||||
" lgr 0,%[qid]\n" /* qid into gr0 */
|
||||
" lghi 2,0\n" /* 0 into gr2 */
|
||||
" .long 0xb2af0000\n" /* PQAP(TAPQ) */
|
||||
" lgr %[reg1],1\n" /* gr1 (status) into reg1 */
|
||||
" lgr %[reg2],2\n" /* gr2 into reg2 */
|
||||
" lgr 0,%[qid]\n" /* qid into gr0 */
|
||||
" lghi 2,0\n" /* 0 into gr2 */
|
||||
" .insn rre,0xb2af0000,0,0\n" /* PQAP(TAPQ) */
|
||||
" lgr %[reg1],1\n" /* gr1 (status) into reg1 */
|
||||
" lgr %[reg2],2\n" /* gr2 into reg2 */
|
||||
: [reg1] "=&d" (reg1), [reg2] "=&d" (reg2)
|
||||
: [qid] "d" (qid)
|
||||
: "cc", "0", "1", "2");
|
||||
@@ -128,9 +128,9 @@ static inline struct ap_queue_status ap_rapq(ap_qid_t qid)
|
||||
struct ap_queue_status reg1;
|
||||
|
||||
asm volatile(
|
||||
" lgr 0,%[reg0]\n" /* qid arg into gr0 */
|
||||
" .long 0xb2af0000\n" /* PQAP(RAPQ) */
|
||||
" lgr %[reg1],1\n" /* gr1 (status) into reg1 */
|
||||
" lgr 0,%[reg0]\n" /* qid arg into gr0 */
|
||||
" .insn rre,0xb2af0000,0,0\n" /* PQAP(RAPQ) */
|
||||
" lgr %[reg1],1\n" /* gr1 (status) into reg1 */
|
||||
: [reg1] "=&d" (reg1)
|
||||
: [reg0] "d" (reg0)
|
||||
: "cc", "0", "1");
|
||||
@@ -149,9 +149,9 @@ static inline struct ap_queue_status ap_zapq(ap_qid_t qid)
|
||||
struct ap_queue_status reg1;
|
||||
|
||||
asm volatile(
|
||||
" lgr 0,%[reg0]\n" /* qid arg into gr0 */
|
||||
" .long 0xb2af0000\n" /* PQAP(ZAPQ) */
|
||||
" lgr %[reg1],1\n" /* gr1 (status) into reg1 */
|
||||
" lgr 0,%[reg0]\n" /* qid arg into gr0 */
|
||||
" .insn rre,0xb2af0000,0,0\n" /* PQAP(ZAPQ) */
|
||||
" lgr %[reg1],1\n" /* gr1 (status) into reg1 */
|
||||
: [reg1] "=&d" (reg1)
|
||||
: [reg0] "d" (reg0)
|
||||
: "cc", "0", "1");
|
||||
@@ -190,10 +190,10 @@ static inline int ap_qci(struct ap_config_info *config)
|
||||
struct ap_config_info *reg2 = config;
|
||||
|
||||
asm volatile(
|
||||
" lgr 0,%[reg0]\n" /* QCI fc into gr0 */
|
||||
" lgr 2,%[reg2]\n" /* ptr to config into gr2 */
|
||||
" .long 0xb2af0000\n" /* PQAP(QCI) */
|
||||
"0: la %[reg1],0\n" /* good case, QCI fc available */
|
||||
" lgr 0,%[reg0]\n" /* QCI fc into gr0 */
|
||||
" lgr 2,%[reg2]\n" /* ptr to config into gr2 */
|
||||
" .insn rre,0xb2af0000,0,0\n" /* PQAP(QCI) */
|
||||
"0: la %[reg1],0\n" /* good case, QCI fc available */
|
||||
"1:\n"
|
||||
EX_TABLE(0b, 1b)
|
||||
: [reg1] "+&d" (reg1)
|
||||
@@ -246,11 +246,11 @@ static inline struct ap_queue_status ap_aqic(ap_qid_t qid,
|
||||
reg1.qirqctrl = qirqctrl;
|
||||
|
||||
asm volatile(
|
||||
" lgr 0,%[reg0]\n" /* qid param into gr0 */
|
||||
" lgr 1,%[reg1]\n" /* irq ctrl into gr1 */
|
||||
" lgr 2,%[reg2]\n" /* ni addr into gr2 */
|
||||
" .long 0xb2af0000\n" /* PQAP(AQIC) */
|
||||
" lgr %[reg1],1\n" /* gr1 (status) into reg1 */
|
||||
" lgr 0,%[reg0]\n" /* qid param into gr0 */
|
||||
" lgr 1,%[reg1]\n" /* irq ctrl into gr1 */
|
||||
" lgr 2,%[reg2]\n" /* ni addr into gr2 */
|
||||
" .insn rre,0xb2af0000,0,0\n" /* PQAP(AQIC) */
|
||||
" lgr %[reg1],1\n" /* gr1 (status) into reg1 */
|
||||
: [reg1] "+&d" (reg1)
|
||||
: [reg0] "d" (reg0), [reg2] "d" (reg2)
|
||||
: "cc", "0", "1", "2");
|
||||
@@ -297,11 +297,11 @@ static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit,
|
||||
reg1.value = apinfo->val;
|
||||
|
||||
asm volatile(
|
||||
" lgr 0,%[reg0]\n" /* qid param into gr0 */
|
||||
" lgr 1,%[reg1]\n" /* qact in info into gr1 */
|
||||
" .long 0xb2af0000\n" /* PQAP(QACT) */
|
||||
" lgr %[reg1],1\n" /* gr1 (status) into reg1 */
|
||||
" lgr %[reg2],2\n" /* qact out info into reg2 */
|
||||
" lgr 0,%[reg0]\n" /* qid param into gr0 */
|
||||
" lgr 1,%[reg1]\n" /* qact in info into gr1 */
|
||||
" .insn rre,0xb2af0000,0,0\n" /* PQAP(QACT) */
|
||||
" lgr %[reg1],1\n" /* gr1 (status) into reg1 */
|
||||
" lgr %[reg2],2\n" /* qact out info into reg2 */
|
||||
: [reg1] "+&d" (reg1), [reg2] "=&d" (reg2)
|
||||
: [reg0] "d" (reg0)
|
||||
: "cc", "0", "1", "2");
|
||||
|
||||
@@ -74,8 +74,17 @@ static __always_inline void __ctl_clear_bit(unsigned int cr, unsigned int bit)
|
||||
__ctl_load(reg, cr, cr);
|
||||
}
|
||||
|
||||
void smp_ctl_set_bit(int cr, int bit);
|
||||
void smp_ctl_clear_bit(int cr, int bit);
|
||||
void smp_ctl_set_clear_bit(int cr, int bit, bool set);
|
||||
|
||||
static inline void ctl_set_bit(int cr, int bit)
|
||||
{
|
||||
smp_ctl_set_clear_bit(cr, bit, true);
|
||||
}
|
||||
|
||||
static inline void ctl_clear_bit(int cr, int bit)
|
||||
{
|
||||
smp_ctl_set_clear_bit(cr, bit, false);
|
||||
}
|
||||
|
||||
union ctlreg0 {
|
||||
unsigned long val;
|
||||
@@ -130,8 +139,5 @@ union ctlreg15 {
|
||||
};
|
||||
};
|
||||
|
||||
#define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit)
|
||||
#define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit)
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
#endif /* __ASM_CTL_REG_H */
|
||||
|
||||
@@ -319,11 +319,18 @@ extern void (*s390_base_pgm_handler_fn)(struct pt_regs *regs);
|
||||
extern int memcpy_real(void *, unsigned long, size_t);
|
||||
extern void memcpy_absolute(void *, void *, size_t);
|
||||
|
||||
#define mem_assign_absolute(dest, val) do { \
|
||||
__typeof__(dest) __tmp = (val); \
|
||||
\
|
||||
BUILD_BUG_ON(sizeof(__tmp) != sizeof(val)); \
|
||||
memcpy_absolute(&(dest), &__tmp, sizeof(__tmp)); \
|
||||
#define put_abs_lowcore(member, x) do { \
|
||||
unsigned long __abs_address = offsetof(struct lowcore, member); \
|
||||
__typeof__(((struct lowcore *)0)->member) __tmp = (x); \
|
||||
\
|
||||
memcpy_absolute(__va(__abs_address), &__tmp, sizeof(__tmp)); \
|
||||
} while (0)
|
||||
|
||||
#define get_abs_lowcore(x, member) do { \
|
||||
unsigned long __abs_address = offsetof(struct lowcore, member); \
|
||||
__typeof__(((struct lowcore *)0)->member) *__ptr = &(x); \
|
||||
\
|
||||
memcpy_absolute(__ptr, __va(__abs_address), sizeof(*__ptr)); \
|
||||
} while (0)
|
||||
|
||||
extern int s390_isolate_bp(void);
|
||||
|
||||
@@ -78,7 +78,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp)
|
||||
{
|
||||
typecheck(int, lp->lock);
|
||||
asm_inline volatile(
|
||||
ALTERNATIVE("", ".long 0xb2fa0070", 49) /* NIAI 7 */
|
||||
ALTERNATIVE("", ".insn rre,0xb2fa0000,7,0", 49) /* NIAI 7 */
|
||||
" sth %1,%0\n"
|
||||
: "=R" (((unsigned short *) &lp->lock)[1])
|
||||
: "d" (0) : "cc", "memory");
|
||||
|
||||
@@ -162,4 +162,4 @@
|
||||
__diag_pop(); \
|
||||
static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
|
||||
|
||||
#endif /* _ASM_X86_SYSCALL_WRAPPER_H */
|
||||
#endif /* _ASM_S390_SYSCALL_WRAPPER_H */
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/llist.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/stacktrace.h>
|
||||
|
||||
@@ -36,10 +38,21 @@ struct unwind_state {
|
||||
struct pt_regs *regs;
|
||||
unsigned long sp, ip;
|
||||
int graph_idx;
|
||||
struct llist_node *kr_cur;
|
||||
bool reliable;
|
||||
bool error;
|
||||
};
|
||||
|
||||
/* Recover the return address modified by kretprobe and ftrace_graph. */
|
||||
static inline unsigned long unwind_recover_ret_addr(struct unwind_state *state,
|
||||
unsigned long ip)
|
||||
{
|
||||
ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, NULL);
|
||||
if (is_kretprobe_trampoline(ip))
|
||||
ip = kretprobe_find_ret_addr(state->task, (void *)state->sp, &state->kr_cur);
|
||||
return ip;
|
||||
}
|
||||
|
||||
void __unwind_start(struct unwind_state *state, struct task_struct *task,
|
||||
struct pt_regs *regs, unsigned long first_frame);
|
||||
bool unwind_next_frame(struct unwind_state *state);
|
||||
|
||||
@@ -121,22 +121,22 @@ _LPP_OFFSET = __LC_LPP
|
||||
.endm
|
||||
|
||||
.macro BPOFF
|
||||
ALTERNATIVE "", ".long 0xb2e8c000", 82
|
||||
ALTERNATIVE "", ".insn rrf,0xb2e80000,0,0,12,0", 82
|
||||
.endm
|
||||
|
||||
.macro BPON
|
||||
ALTERNATIVE "", ".long 0xb2e8d000", 82
|
||||
ALTERNATIVE "", ".insn rrf,0xb2e80000,0,0,13,0", 82
|
||||
.endm
|
||||
|
||||
.macro BPENTER tif_ptr,tif_mask
|
||||
ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .long 0xb2e8d000", \
|
||||
ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .insn rrf,0xb2e80000,0,0,13,0", \
|
||||
"", 82
|
||||
.endm
|
||||
|
||||
.macro BPEXIT tif_ptr,tif_mask
|
||||
TSTMSK \tif_ptr,\tif_mask
|
||||
ALTERNATIVE "jz .+8; .long 0xb2e8c000", \
|
||||
"jnz .+8; .long 0xb2e8d000", 82
|
||||
ALTERNATIVE "jz .+8; .insn rrf,0xb2e80000,0,0,12,0", \
|
||||
"jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", 82
|
||||
.endm
|
||||
|
||||
/*
|
||||
|
||||
@@ -1646,8 +1646,8 @@ static void dump_reipl_run(struct shutdown_trigger *trigger)
|
||||
|
||||
csum = (__force unsigned int)
|
||||
csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0);
|
||||
mem_assign_absolute(S390_lowcore.ipib, ipib);
|
||||
mem_assign_absolute(S390_lowcore.ipib_checksum, csum);
|
||||
put_abs_lowcore(ipib, ipib);
|
||||
put_abs_lowcore(ipib_checksum, csum);
|
||||
dump_run(trigger);
|
||||
}
|
||||
|
||||
|
||||
@@ -284,11 +284,11 @@ NOKPROBE_SYMBOL(pop_kprobe);
|
||||
|
||||
void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
|
||||
{
|
||||
ri->ret_addr = (kprobe_opcode_t *) regs->gprs[14];
|
||||
ri->fp = NULL;
|
||||
ri->ret_addr = (kprobe_opcode_t *)regs->gprs[14];
|
||||
ri->fp = (void *)regs->gprs[15];
|
||||
|
||||
/* Replace the return addr with trampoline addr */
|
||||
regs->gprs[14] = (unsigned long) &__kretprobe_trampoline;
|
||||
regs->gprs[14] = (unsigned long)&__kretprobe_trampoline;
|
||||
}
|
||||
NOKPROBE_SYMBOL(arch_prepare_kretprobe);
|
||||
|
||||
@@ -385,7 +385,7 @@ NOKPROBE_SYMBOL(arch_kretprobe_fixup_return);
|
||||
*/
|
||||
void trampoline_probe_handler(struct pt_regs *regs)
|
||||
{
|
||||
kretprobe_trampoline_handler(regs, NULL);
|
||||
kretprobe_trampoline_handler(regs, (void *)regs->gprs[15]);
|
||||
}
|
||||
NOKPROBE_SYMBOL(trampoline_probe_handler);
|
||||
|
||||
|
||||
@@ -226,7 +226,7 @@ void arch_crash_save_vmcoreinfo(void)
|
||||
vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31);
|
||||
vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31);
|
||||
vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
|
||||
mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
|
||||
put_abs_lowcore(vmcore_info, paddr_vmcoreinfo_note());
|
||||
}
|
||||
|
||||
void machine_shutdown(void)
|
||||
|
||||
@@ -63,7 +63,7 @@ void __init os_info_init(void)
|
||||
os_info.version_minor = OS_INFO_VERSION_MINOR;
|
||||
os_info.magic = OS_INFO_MAGIC;
|
||||
os_info.csum = os_info_csum(&os_info);
|
||||
mem_assign_absolute(S390_lowcore.os_info, __pa(ptr));
|
||||
put_abs_lowcore(os_info, __pa(ptr));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CRASH_DUMP
|
||||
|
||||
@@ -481,11 +481,11 @@ static void __init setup_lowcore_dat_off(void)
|
||||
lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET;
|
||||
|
||||
/* Setup absolute zero lowcore */
|
||||
mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack);
|
||||
mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn);
|
||||
mem_assign_absolute(S390_lowcore.restart_data, lc->restart_data);
|
||||
mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source);
|
||||
mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw);
|
||||
put_abs_lowcore(restart_stack, lc->restart_stack);
|
||||
put_abs_lowcore(restart_fn, lc->restart_fn);
|
||||
put_abs_lowcore(restart_data, lc->restart_data);
|
||||
put_abs_lowcore(restart_source, lc->restart_source);
|
||||
put_abs_lowcore(restart_psw, lc->restart_psw);
|
||||
|
||||
lc->spinlock_lockval = arch_spin_lockval(0);
|
||||
lc->spinlock_index = 0;
|
||||
@@ -501,6 +501,7 @@ static void __init setup_lowcore_dat_off(void)
|
||||
static void __init setup_lowcore_dat_on(void)
|
||||
{
|
||||
struct lowcore *lc = lowcore_ptr[0];
|
||||
int cr;
|
||||
|
||||
__ctl_clear_bit(0, 28);
|
||||
S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
|
||||
@@ -509,10 +510,10 @@ static void __init setup_lowcore_dat_on(void)
|
||||
S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT;
|
||||
__ctl_store(S390_lowcore.cregs_save_area, 0, 15);
|
||||
__ctl_set_bit(0, 28);
|
||||
mem_assign_absolute(S390_lowcore.restart_flags, RESTART_FLAG_CTLREGS);
|
||||
mem_assign_absolute(S390_lowcore.program_new_psw, lc->program_new_psw);
|
||||
memcpy_absolute(&S390_lowcore.cregs_save_area, lc->cregs_save_area,
|
||||
sizeof(S390_lowcore.cregs_save_area));
|
||||
put_abs_lowcore(restart_flags, RESTART_FLAG_CTLREGS);
|
||||
put_abs_lowcore(program_new_psw, lc->program_new_psw);
|
||||
for (cr = 0; cr < ARRAY_SIZE(lc->cregs_save_area); cr++)
|
||||
put_abs_lowcore(cregs_save_area[cr], lc->cregs_save_area[cr]);
|
||||
}
|
||||
|
||||
static struct resource code_resource = {
|
||||
|
||||
@@ -213,7 +213,7 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
|
||||
if (nmi_alloc_mcesa(&lc->mcesad))
|
||||
goto out;
|
||||
lowcore_ptr[cpu] = lc;
|
||||
pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, (u32)(unsigned long) lc);
|
||||
pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, __pa(lc));
|
||||
return 0;
|
||||
|
||||
out:
|
||||
@@ -326,10 +326,17 @@ static void pcpu_delegate(struct pcpu *pcpu,
|
||||
/* Stop target cpu (if func returns this stops the current cpu). */
|
||||
pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
|
||||
/* Restart func on the target cpu and stop the current cpu. */
|
||||
mem_assign_absolute(lc->restart_stack, stack);
|
||||
mem_assign_absolute(lc->restart_fn, (unsigned long) func);
|
||||
mem_assign_absolute(lc->restart_data, (unsigned long) data);
|
||||
mem_assign_absolute(lc->restart_source, source_cpu);
|
||||
if (lc) {
|
||||
lc->restart_stack = stack;
|
||||
lc->restart_fn = (unsigned long)func;
|
||||
lc->restart_data = (unsigned long)data;
|
||||
lc->restart_source = source_cpu;
|
||||
} else {
|
||||
put_abs_lowcore(restart_stack, stack);
|
||||
put_abs_lowcore(restart_fn, (unsigned long)func);
|
||||
put_abs_lowcore(restart_data, (unsigned long)data);
|
||||
put_abs_lowcore(restart_source, source_cpu);
|
||||
}
|
||||
__bpon();
|
||||
asm volatile(
|
||||
"0: sigp 0,%0,%2 # sigp restart to target cpu\n"
|
||||
@@ -570,39 +577,27 @@ static void smp_ctl_bit_callback(void *info)
|
||||
}
|
||||
|
||||
static DEFINE_SPINLOCK(ctl_lock);
|
||||
static unsigned long ctlreg;
|
||||
|
||||
/*
|
||||
* Set a bit in a control register of all cpus
|
||||
*/
|
||||
void smp_ctl_set_bit(int cr, int bit)
|
||||
void smp_ctl_set_clear_bit(int cr, int bit, bool set)
|
||||
{
|
||||
struct ec_creg_mask_parms parms = { 1UL << bit, -1UL, cr };
|
||||
struct ec_creg_mask_parms parms = { .cr = cr, };
|
||||
u64 ctlreg;
|
||||
|
||||
if (set) {
|
||||
parms.orval = 1UL << bit;
|
||||
parms.andval = -1UL;
|
||||
} else {
|
||||
parms.orval = 0;
|
||||
parms.andval = ~(1UL << bit);
|
||||
}
|
||||
spin_lock(&ctl_lock);
|
||||
memcpy_absolute(&ctlreg, &S390_lowcore.cregs_save_area[cr], sizeof(ctlreg));
|
||||
__set_bit(bit, &ctlreg);
|
||||
memcpy_absolute(&S390_lowcore.cregs_save_area[cr], &ctlreg, sizeof(ctlreg));
|
||||
get_abs_lowcore(ctlreg, cregs_save_area[cr]);
|
||||
ctlreg = (ctlreg & parms.andval) | parms.orval;
|
||||
put_abs_lowcore(cregs_save_area[cr], ctlreg);
|
||||
spin_unlock(&ctl_lock);
|
||||
on_each_cpu(smp_ctl_bit_callback, &parms, 1);
|
||||
}
|
||||
EXPORT_SYMBOL(smp_ctl_set_bit);
|
||||
|
||||
/*
|
||||
* Clear a bit in a control register of all cpus
|
||||
*/
|
||||
void smp_ctl_clear_bit(int cr, int bit)
|
||||
{
|
||||
struct ec_creg_mask_parms parms = { 0, ~(1UL << bit), cr };
|
||||
|
||||
spin_lock(&ctl_lock);
|
||||
memcpy_absolute(&ctlreg, &S390_lowcore.cregs_save_area[cr], sizeof(ctlreg));
|
||||
__clear_bit(bit, &ctlreg);
|
||||
memcpy_absolute(&S390_lowcore.cregs_save_area[cr], &ctlreg, sizeof(ctlreg));
|
||||
spin_unlock(&ctl_lock);
|
||||
on_each_cpu(smp_ctl_bit_callback, &parms, 1);
|
||||
}
|
||||
EXPORT_SYMBOL(smp_ctl_clear_bit);
|
||||
EXPORT_SYMBOL(smp_ctl_set_clear_bit);
|
||||
|
||||
#ifdef CONFIG_CRASH_DUMP
|
||||
|
||||
|
||||
@@ -141,10 +141,10 @@ static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc)
|
||||
do_trap(regs, SIGFPE, si_code, "floating point exception");
|
||||
}
|
||||
|
||||
static void translation_exception(struct pt_regs *regs)
|
||||
static void translation_specification_exception(struct pt_regs *regs)
|
||||
{
|
||||
/* May never happen. */
|
||||
panic("Translation exception");
|
||||
panic("Translation-Specification Exception");
|
||||
}
|
||||
|
||||
static void illegal_op(struct pt_regs *regs)
|
||||
@@ -368,7 +368,7 @@ static void (*pgm_check_table[128])(struct pt_regs *regs) = {
|
||||
[0x0f] = hfp_divide_exception,
|
||||
[0x10] = do_dat_exception,
|
||||
[0x11] = do_dat_exception,
|
||||
[0x12] = translation_exception,
|
||||
[0x12] = translation_specification_exception,
|
||||
[0x13] = special_op_exception,
|
||||
[0x14] = default_trap_handler,
|
||||
[0x15] = operand_exception,
|
||||
|
||||
@@ -64,8 +64,8 @@ bool unwind_next_frame(struct unwind_state *state)
|
||||
ip = READ_ONCE_NOCHECK(sf->gprs[8]);
|
||||
reliable = false;
|
||||
regs = NULL;
|
||||
if (!__kernel_text_address(ip)) {
|
||||
/* skip bogus %r14 */
|
||||
/* skip bogus %r14 or if is the same as regs->psw.addr */
|
||||
if (!__kernel_text_address(ip) || state->ip == unwind_recover_ret_addr(state, ip)) {
|
||||
state->regs = NULL;
|
||||
return unwind_next_frame(state);
|
||||
}
|
||||
@@ -103,13 +103,11 @@ bool unwind_next_frame(struct unwind_state *state)
|
||||
if (sp & 0x7)
|
||||
goto out_err;
|
||||
|
||||
ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, (void *) sp);
|
||||
|
||||
/* Update unwind state */
|
||||
state->sp = sp;
|
||||
state->ip = ip;
|
||||
state->regs = regs;
|
||||
state->reliable = reliable;
|
||||
state->ip = unwind_recover_ret_addr(state, ip);
|
||||
return true;
|
||||
|
||||
out_err:
|
||||
@@ -161,12 +159,10 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
|
||||
ip = READ_ONCE_NOCHECK(sf->gprs[8]);
|
||||
}
|
||||
|
||||
ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, NULL);
|
||||
|
||||
/* Update unwind state */
|
||||
state->sp = sp;
|
||||
state->ip = ip;
|
||||
state->reliable = true;
|
||||
state->ip = unwind_recover_ret_addr(state, ip);
|
||||
|
||||
if (!first_frame)
|
||||
return;
|
||||
|
||||
@@ -75,7 +75,7 @@ static inline int arch_load_niai4(int *lock)
|
||||
int owner;
|
||||
|
||||
asm_inline volatile(
|
||||
ALTERNATIVE("", ".long 0xb2fa0040", 49) /* NIAI 4 */
|
||||
ALTERNATIVE("", ".insn rre,0xb2fa0000,4,0", 49) /* NIAI 4 */
|
||||
" l %0,%1\n"
|
||||
: "=d" (owner) : "Q" (*lock) : "memory");
|
||||
return owner;
|
||||
@@ -86,7 +86,7 @@ static inline int arch_cmpxchg_niai8(int *lock, int old, int new)
|
||||
int expected = old;
|
||||
|
||||
asm_inline volatile(
|
||||
ALTERNATIVE("", ".long 0xb2fa0080", 49) /* NIAI 8 */
|
||||
ALTERNATIVE("", ".insn rre,0xb2fa0000,8,0", 49) /* NIAI 8 */
|
||||
" cs %0,%3,%1\n"
|
||||
: "=d" (old), "=Q" (*lock)
|
||||
: "0" (old), "d" (new), "Q" (*lock)
|
||||
|
||||
@@ -47,7 +47,7 @@ static void print_backtrace(char *bt)
|
||||
static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long sp)
|
||||
{
|
||||
int frame_count, prev_is_func2, seen_func2_func1;
|
||||
int frame_count, prev_is_func2, seen_func2_func1, seen_kretprobe_trampoline;
|
||||
const int max_frames = 128;
|
||||
struct unwind_state state;
|
||||
size_t bt_pos = 0;
|
||||
@@ -63,6 +63,7 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
|
||||
frame_count = 0;
|
||||
prev_is_func2 = 0;
|
||||
seen_func2_func1 = 0;
|
||||
seen_kretprobe_trampoline = 0;
|
||||
unwind_for_each_frame(&state, task, regs, sp) {
|
||||
unsigned long addr = unwind_get_return_address(&state);
|
||||
char sym[KSYM_SYMBOL_LEN];
|
||||
@@ -88,6 +89,8 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
|
||||
if (prev_is_func2 && str_has_prefix(sym, "unwindme_func1"))
|
||||
seen_func2_func1 = 1;
|
||||
prev_is_func2 = str_has_prefix(sym, "unwindme_func2");
|
||||
if (str_has_prefix(sym, "__kretprobe_trampoline+0x0/"))
|
||||
seen_kretprobe_trampoline = 1;
|
||||
}
|
||||
|
||||
/* Check the results. */
|
||||
@@ -103,6 +106,10 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
|
||||
kunit_err(current_test, "Maximum number of frames exceeded\n");
|
||||
ret = -EINVAL;
|
||||
}
|
||||
if (seen_kretprobe_trampoline) {
|
||||
kunit_err(current_test, "__kretprobe_trampoline+0x0 in unwinding results\n");
|
||||
ret = -EINVAL;
|
||||
}
|
||||
if (ret || force_bt)
|
||||
print_backtrace(bt);
|
||||
kfree(bt);
|
||||
@@ -132,36 +139,50 @@ static struct unwindme *unwindme;
|
||||
#define UWM_PGM 0x40 /* Unwind from program check handler */
|
||||
#define UWM_KPROBE_ON_FTRACE 0x80 /* Unwind from kprobe handler called via ftrace. */
|
||||
#define UWM_FTRACE 0x100 /* Unwind from ftrace handler. */
|
||||
#define UWM_KRETPROBE 0x200 /* Unwind kretprobe handlers. */
|
||||
#define UWM_KRETPROBE 0x200 /* Unwind through kretprobed function. */
|
||||
#define UWM_KRETPROBE_HANDLER 0x400 /* Unwind from kretprobe handler. */
|
||||
|
||||
static __always_inline unsigned long get_psw_addr(void)
|
||||
static __always_inline struct pt_regs fake_pt_regs(void)
|
||||
{
|
||||
unsigned long psw_addr;
|
||||
struct pt_regs regs;
|
||||
|
||||
memset(®s, 0, sizeof(regs));
|
||||
regs.gprs[15] = current_stack_pointer();
|
||||
|
||||
asm volatile(
|
||||
"basr %[psw_addr],0\n"
|
||||
: [psw_addr] "=d" (psw_addr));
|
||||
return psw_addr;
|
||||
: [psw_addr] "=d" (regs.psw.addr));
|
||||
return regs;
|
||||
}
|
||||
|
||||
static int kretprobe_ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
|
||||
{
|
||||
struct unwindme *u = unwindme;
|
||||
|
||||
if (!(u->flags & UWM_KRETPROBE_HANDLER))
|
||||
return 0;
|
||||
|
||||
u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? regs : NULL,
|
||||
(u->flags & UWM_SP) ? u->sp : 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static noinline notrace void test_unwind_kretprobed_func(void)
|
||||
static noinline notrace int test_unwind_kretprobed_func(struct unwindme *u)
|
||||
{
|
||||
asm volatile(" nop\n");
|
||||
struct pt_regs regs;
|
||||
|
||||
if (!(u->flags & UWM_KRETPROBE))
|
||||
return 0;
|
||||
|
||||
regs = fake_pt_regs();
|
||||
return test_unwind(NULL, (u->flags & UWM_REGS) ? ®s : NULL,
|
||||
(u->flags & UWM_SP) ? u->sp : 0);
|
||||
}
|
||||
|
||||
static noinline void test_unwind_kretprobed_func_caller(void)
|
||||
static noinline int test_unwind_kretprobed_func_caller(struct unwindme *u)
|
||||
{
|
||||
test_unwind_kretprobed_func();
|
||||
return test_unwind_kretprobed_func(u);
|
||||
}
|
||||
|
||||
static int test_unwind_kretprobe(struct unwindme *u)
|
||||
@@ -187,10 +208,12 @@ static int test_unwind_kretprobe(struct unwindme *u)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
test_unwind_kretprobed_func_caller();
|
||||
ret = test_unwind_kretprobed_func_caller(u);
|
||||
unregister_kretprobe(&my_kretprobe);
|
||||
unwindme = NULL;
|
||||
return u->ret;
|
||||
if (u->flags & UWM_KRETPROBE_HANDLER)
|
||||
ret = u->ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kprobe_pre_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
@@ -304,16 +327,13 @@ static noinline int unwindme_func4(struct unwindme *u)
|
||||
return 0;
|
||||
} else if (u->flags & (UWM_PGM | UWM_KPROBE_ON_FTRACE)) {
|
||||
return test_unwind_kprobe(u);
|
||||
} else if (u->flags & (UWM_KRETPROBE)) {
|
||||
} else if (u->flags & (UWM_KRETPROBE | UWM_KRETPROBE_HANDLER)) {
|
||||
return test_unwind_kretprobe(u);
|
||||
} else if (u->flags & UWM_FTRACE) {
|
||||
return test_unwind_ftrace(u);
|
||||
} else {
|
||||
struct pt_regs regs;
|
||||
struct pt_regs regs = fake_pt_regs();
|
||||
|
||||
memset(®s, 0, sizeof(regs));
|
||||
regs.psw.addr = get_psw_addr();
|
||||
regs.gprs[15] = current_stack_pointer();
|
||||
return test_unwind(NULL,
|
||||
(u->flags & UWM_REGS) ? ®s : NULL,
|
||||
(u->flags & UWM_SP) ? u->sp : 0);
|
||||
@@ -452,6 +472,10 @@ static const struct test_params param_list[] = {
|
||||
TEST_WITH_FLAGS(UWM_KRETPROBE | UWM_SP),
|
||||
TEST_WITH_FLAGS(UWM_KRETPROBE | UWM_REGS),
|
||||
TEST_WITH_FLAGS(UWM_KRETPROBE | UWM_SP | UWM_REGS),
|
||||
TEST_WITH_FLAGS(UWM_KRETPROBE_HANDLER),
|
||||
TEST_WITH_FLAGS(UWM_KRETPROBE_HANDLER | UWM_SP),
|
||||
TEST_WITH_FLAGS(UWM_KRETPROBE_HANDLER | UWM_REGS),
|
||||
TEST_WITH_FLAGS(UWM_KRETPROBE_HANDLER | UWM_SP | UWM_REGS),
|
||||
};
|
||||
|
||||
/*
|
||||
|
||||
@@ -69,6 +69,7 @@ struct zpci_dev *get_zdev_by_fid(u32 fid)
|
||||
list_for_each_entry(tmp, &zpci_list, entry) {
|
||||
if (tmp->fid == fid) {
|
||||
zdev = tmp;
|
||||
zpci_zdev_get(zdev);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -399,7 +400,7 @@ EXPORT_SYMBOL(pci_iounmap);
|
||||
static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
|
||||
int size, u32 *val)
|
||||
{
|
||||
struct zpci_dev *zdev = get_zdev_by_bus(bus, devfn);
|
||||
struct zpci_dev *zdev = zdev_from_bus(bus, devfn);
|
||||
|
||||
return (zdev) ? zpci_cfg_load(zdev, where, val, size) : -ENODEV;
|
||||
}
|
||||
@@ -407,7 +408,7 @@ static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
|
||||
static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
|
||||
int size, u32 val)
|
||||
{
|
||||
struct zpci_dev *zdev = get_zdev_by_bus(bus, devfn);
|
||||
struct zpci_dev *zdev = zdev_from_bus(bus, devfn);
|
||||
|
||||
return (zdev) ? zpci_cfg_store(zdev, where, val, size) : -ENODEV;
|
||||
}
|
||||
|
||||
@@ -19,7 +19,8 @@ void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error);
|
||||
void zpci_release_device(struct kref *kref);
|
||||
static inline void zpci_zdev_put(struct zpci_dev *zdev)
|
||||
{
|
||||
kref_put(&zdev->kref, zpci_release_device);
|
||||
if (zdev)
|
||||
kref_put(&zdev->kref, zpci_release_device);
|
||||
}
|
||||
|
||||
static inline void zpci_zdev_get(struct zpci_dev *zdev)
|
||||
@@ -32,8 +33,8 @@ void zpci_free_domain(int domain);
|
||||
int zpci_setup_bus_resources(struct zpci_dev *zdev,
|
||||
struct list_head *resources);
|
||||
|
||||
static inline struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus,
|
||||
unsigned int devfn)
|
||||
static inline struct zpci_dev *zdev_from_bus(struct pci_bus *bus,
|
||||
unsigned int devfn)
|
||||
{
|
||||
struct zpci_bus *zbus = bus->sysdata;
|
||||
|
||||
|
||||
@@ -23,6 +23,8 @@
|
||||
#include <asm/clp.h>
|
||||
#include <uapi/asm/clp.h>
|
||||
|
||||
#include "pci_bus.h"
|
||||
|
||||
bool zpci_unique_uid;
|
||||
|
||||
void update_uid_checking(bool new)
|
||||
@@ -404,8 +406,11 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data)
|
||||
return;
|
||||
|
||||
zdev = get_zdev_by_fid(entry->fid);
|
||||
if (!zdev)
|
||||
zpci_create_device(entry->fid, entry->fh, entry->config_state);
|
||||
if (zdev) {
|
||||
zpci_zdev_put(zdev);
|
||||
return;
|
||||
}
|
||||
zpci_create_device(entry->fid, entry->fh, entry->config_state);
|
||||
}
|
||||
|
||||
int clp_scan_pci_devices(void)
|
||||
|
||||
@@ -269,7 +269,7 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
|
||||
pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
|
||||
|
||||
if (!pdev)
|
||||
return;
|
||||
goto no_pdev;
|
||||
|
||||
switch (ccdf->pec) {
|
||||
case 0x003a: /* Service Action or Error Recovery Successful */
|
||||
@@ -286,6 +286,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
|
||||
break;
|
||||
}
|
||||
pci_dev_put(pdev);
|
||||
no_pdev:
|
||||
zpci_zdev_put(zdev);
|
||||
}
|
||||
|
||||
void zpci_event_error(void *data)
|
||||
@@ -314,6 +316,7 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
|
||||
static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
|
||||
{
|
||||
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
|
||||
bool existing_zdev = !!zdev;
|
||||
enum zpci_state state;
|
||||
|
||||
zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n",
|
||||
@@ -378,6 +381,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (existing_zdev)
|
||||
zpci_zdev_put(zdev);
|
||||
}
|
||||
|
||||
void zpci_event_availability(void *data)
|
||||
|
||||
@@ -47,6 +47,10 @@ config CPU_IDLE_GOV_HALTPOLL
|
||||
config DT_IDLE_STATES
|
||||
bool
|
||||
|
||||
config DT_IDLE_GENPD
|
||||
depends on PM_GENERIC_DOMAINS_OF
|
||||
bool
|
||||
|
||||
menu "ARM CPU Idle Drivers"
|
||||
depends on ARM || ARM64
|
||||
source "drivers/cpuidle/Kconfig.arm"
|
||||
@@ -62,6 +66,11 @@ depends on PPC
|
||||
source "drivers/cpuidle/Kconfig.powerpc"
|
||||
endmenu
|
||||
|
||||
menu "RISC-V CPU Idle Drivers"
|
||||
depends on RISCV
|
||||
source "drivers/cpuidle/Kconfig.riscv"
|
||||
endmenu
|
||||
|
||||
config HALTPOLL_CPUIDLE
|
||||
tristate "Halt poll cpuidle driver"
|
||||
depends on X86 && KVM_GUEST
|
||||
|
||||
@@ -27,6 +27,7 @@ config ARM_PSCI_CPUIDLE_DOMAIN
|
||||
bool "PSCI CPU idle Domain"
|
||||
depends on ARM_PSCI_CPUIDLE
|
||||
depends on PM_GENERIC_DOMAINS_OF
|
||||
select DT_IDLE_GENPD
|
||||
default y
|
||||
help
|
||||
Select this to enable the PSCI based CPUidle driver to use PM domains,
|
||||
|
||||
15
drivers/cpuidle/Kconfig.riscv
Normal file
15
drivers/cpuidle/Kconfig.riscv
Normal file
@@ -0,0 +1,15 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
#
|
||||
# RISC-V CPU Idle drivers
|
||||
#
|
||||
|
||||
config RISCV_SBI_CPUIDLE
|
||||
bool "RISC-V SBI CPU idle Driver"
|
||||
depends on RISCV_SBI
|
||||
select DT_IDLE_STATES
|
||||
select CPU_IDLE_MULTIPLE_DRIVERS
|
||||
select DT_IDLE_GENPD if PM_GENERIC_DOMAINS_OF
|
||||
help
|
||||
Select this option to enable RISC-V SBI firmware based CPU idle
|
||||
driver for RISC-V systems. This drivers also supports hierarchical
|
||||
DT based layout of the idle state.
|
||||
@@ -6,6 +6,7 @@
|
||||
obj-y += cpuidle.o driver.o governor.o sysfs.o governors/
|
||||
obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o
|
||||
obj-$(CONFIG_DT_IDLE_STATES) += dt_idle_states.o
|
||||
obj-$(CONFIG_DT_IDLE_GENPD) += dt_idle_genpd.o
|
||||
obj-$(CONFIG_ARCH_HAS_CPU_RELAX) += poll_state.o
|
||||
obj-$(CONFIG_HALTPOLL_CPUIDLE) += cpuidle-haltpoll.o
|
||||
|
||||
@@ -34,3 +35,7 @@ obj-$(CONFIG_MIPS_CPS_CPUIDLE) += cpuidle-cps.o
|
||||
# POWERPC drivers
|
||||
obj-$(CONFIG_PSERIES_CPUIDLE) += cpuidle-pseries.o
|
||||
obj-$(CONFIG_POWERNV_CPUIDLE) += cpuidle-powernv.o
|
||||
|
||||
###############################################################################
|
||||
# RISC-V drivers
|
||||
obj-$(CONFIG_RISCV_SBI_CPUIDLE) += cpuidle-riscv-sbi.o
|
||||
|
||||
@@ -47,73 +47,14 @@ static int psci_pd_power_off(struct generic_pm_domain *pd)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int psci_pd_parse_state_nodes(struct genpd_power_state *states,
|
||||
int state_count)
|
||||
{
|
||||
int i, ret;
|
||||
u32 psci_state, *psci_state_buf;
|
||||
|
||||
for (i = 0; i < state_count; i++) {
|
||||
ret = psci_dt_parse_state_node(to_of_node(states[i].fwnode),
|
||||
&psci_state);
|
||||
if (ret)
|
||||
goto free_state;
|
||||
|
||||
psci_state_buf = kmalloc(sizeof(u32), GFP_KERNEL);
|
||||
if (!psci_state_buf) {
|
||||
ret = -ENOMEM;
|
||||
goto free_state;
|
||||
}
|
||||
*psci_state_buf = psci_state;
|
||||
states[i].data = psci_state_buf;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
free_state:
|
||||
i--;
|
||||
for (; i >= 0; i--)
|
||||
kfree(states[i].data);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int psci_pd_parse_states(struct device_node *np,
|
||||
struct genpd_power_state **states, int *state_count)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* Parse the domain idle states. */
|
||||
ret = of_genpd_parse_idle_states(np, states, state_count);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Fill out the PSCI specifics for each found state. */
|
||||
ret = psci_pd_parse_state_nodes(*states, *state_count);
|
||||
if (ret)
|
||||
kfree(*states);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void psci_pd_free_states(struct genpd_power_state *states,
|
||||
unsigned int state_count)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < state_count; i++)
|
||||
kfree(states[i].data);
|
||||
kfree(states);
|
||||
}
|
||||
|
||||
static int psci_pd_init(struct device_node *np, bool use_osi)
|
||||
{
|
||||
struct generic_pm_domain *pd;
|
||||
struct psci_pd_provider *pd_provider;
|
||||
struct dev_power_governor *pd_gov;
|
||||
struct genpd_power_state *states = NULL;
|
||||
int ret = -ENOMEM, state_count = 0;
|
||||
|
||||
pd = kzalloc(sizeof(*pd), GFP_KERNEL);
|
||||
pd = dt_idle_pd_alloc(np, psci_dt_parse_state_node);
|
||||
if (!pd)
|
||||
goto out;
|
||||
|
||||
@@ -121,22 +62,6 @@ static int psci_pd_init(struct device_node *np, bool use_osi)
|
||||
if (!pd_provider)
|
||||
goto free_pd;
|
||||
|
||||
pd->name = kasprintf(GFP_KERNEL, "%pOF", np);
|
||||
if (!pd->name)
|
||||
goto free_pd_prov;
|
||||
|
||||
/*
|
||||
* Parse the domain idle states and let genpd manage the state selection
|
||||
* for those being compatible with "domain-idle-state".
|
||||
*/
|
||||
ret = psci_pd_parse_states(np, &states, &state_count);
|
||||
if (ret)
|
||||
goto free_name;
|
||||
|
||||
pd->free_states = psci_pd_free_states;
|
||||
pd->name = kbasename(pd->name);
|
||||
pd->states = states;
|
||||
pd->state_count = state_count;
|
||||
pd->flags |= GENPD_FLAG_IRQ_SAFE | GENPD_FLAG_CPU_DOMAIN;
|
||||
|
||||
/* Allow power off when OSI has been successfully enabled. */
|
||||
@@ -149,10 +74,8 @@ static int psci_pd_init(struct device_node *np, bool use_osi)
|
||||
pd_gov = state_count > 0 ? &pm_domain_cpu_gov : NULL;
|
||||
|
||||
ret = pm_genpd_init(pd, pd_gov, false);
|
||||
if (ret) {
|
||||
psci_pd_free_states(states, state_count);
|
||||
goto free_name;
|
||||
}
|
||||
if (ret)
|
||||
goto free_pd_prov;
|
||||
|
||||
ret = of_genpd_add_provider_simple(np, pd);
|
||||
if (ret)
|
||||
@@ -166,12 +89,10 @@ static int psci_pd_init(struct device_node *np, bool use_osi)
|
||||
|
||||
remove_pd:
|
||||
pm_genpd_remove(pd);
|
||||
free_name:
|
||||
kfree(pd->name);
|
||||
free_pd_prov:
|
||||
kfree(pd_provider);
|
||||
free_pd:
|
||||
kfree(pd);
|
||||
dt_idle_pd_free(pd);
|
||||
out:
|
||||
pr_err("failed to init PM domain ret=%d %pOF\n", ret, np);
|
||||
return ret;
|
||||
@@ -195,30 +116,6 @@ static void psci_pd_remove(void)
|
||||
}
|
||||
}
|
||||
|
||||
static int psci_pd_init_topology(struct device_node *np)
|
||||
{
|
||||
struct device_node *node;
|
||||
struct of_phandle_args child, parent;
|
||||
int ret;
|
||||
|
||||
for_each_child_of_node(np, node) {
|
||||
if (of_parse_phandle_with_args(node, "power-domains",
|
||||
"#power-domain-cells", 0, &parent))
|
||||
continue;
|
||||
|
||||
child.np = node;
|
||||
child.args_count = 0;
|
||||
ret = of_genpd_add_subdomain(&parent, &child);
|
||||
of_node_put(parent.np);
|
||||
if (ret) {
|
||||
of_node_put(node);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool psci_pd_try_set_osi_mode(void)
|
||||
{
|
||||
int ret;
|
||||
@@ -282,7 +179,7 @@ static int psci_cpuidle_domain_probe(struct platform_device *pdev)
|
||||
goto no_pd;
|
||||
|
||||
/* Link genpd masters/subdomains to model the CPU topology. */
|
||||
ret = psci_pd_init_topology(np);
|
||||
ret = dt_idle_pd_init_topology(np);
|
||||
if (ret)
|
||||
goto remove_pd;
|
||||
|
||||
@@ -314,28 +211,3 @@ static int __init psci_idle_init_domains(void)
|
||||
return platform_driver_register(&psci_cpuidle_domain_driver);
|
||||
}
|
||||
subsys_initcall(psci_idle_init_domains);
|
||||
|
||||
struct device *psci_dt_attach_cpu(int cpu)
|
||||
{
|
||||
struct device *dev;
|
||||
|
||||
dev = dev_pm_domain_attach_by_name(get_cpu_device(cpu), "psci");
|
||||
if (IS_ERR_OR_NULL(dev))
|
||||
return dev;
|
||||
|
||||
pm_runtime_irq_safe(dev);
|
||||
if (cpu_online(cpu))
|
||||
pm_runtime_get_sync(dev);
|
||||
|
||||
dev_pm_syscore_device(dev, true);
|
||||
|
||||
return dev;
|
||||
}
|
||||
|
||||
void psci_dt_detach_cpu(struct device *dev)
|
||||
{
|
||||
if (IS_ERR_OR_NULL(dev))
|
||||
return;
|
||||
|
||||
dev_pm_domain_detach(dev, false);
|
||||
}
|
||||
|
||||
@@ -10,8 +10,19 @@ void psci_set_domain_state(u32 state);
|
||||
int psci_dt_parse_state_node(struct device_node *np, u32 *state);
|
||||
|
||||
#ifdef CONFIG_ARM_PSCI_CPUIDLE_DOMAIN
|
||||
struct device *psci_dt_attach_cpu(int cpu);
|
||||
void psci_dt_detach_cpu(struct device *dev);
|
||||
|
||||
#include "dt_idle_genpd.h"
|
||||
|
||||
static inline struct device *psci_dt_attach_cpu(int cpu)
|
||||
{
|
||||
return dt_idle_attach_cpu(cpu, "psci");
|
||||
}
|
||||
|
||||
static inline void psci_dt_detach_cpu(struct device *dev)
|
||||
{
|
||||
dt_idle_detach_cpu(dev);
|
||||
}
|
||||
|
||||
#else
|
||||
static inline struct device *psci_dt_attach_cpu(int cpu) { return NULL; }
|
||||
static inline void psci_dt_detach_cpu(struct device *dev) { }
|
||||
|
||||
627
drivers/cpuidle/cpuidle-riscv-sbi.c
Normal file
627
drivers/cpuidle/cpuidle-riscv-sbi.c
Normal file
@@ -0,0 +1,627 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* RISC-V SBI CPU idle driver.
|
||||
*
|
||||
* Copyright (c) 2021 Western Digital Corporation or its affiliates.
|
||||
* Copyright (c) 2022 Ventana Micro Systems Inc.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "cpuidle-riscv-sbi: " fmt
|
||||
|
||||
#include <linux/cpuidle.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/cpu_pm.h>
|
||||
#include <linux/cpu_cooling.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/of_device.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/pm_domain.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
#include <asm/cpuidle.h>
|
||||
#include <asm/sbi.h>
|
||||
#include <asm/suspend.h>
|
||||
|
||||
#include "dt_idle_states.h"
|
||||
#include "dt_idle_genpd.h"
|
||||
|
||||
struct sbi_cpuidle_data {
|
||||
u32 *states;
|
||||
struct device *dev;
|
||||
};
|
||||
|
||||
struct sbi_domain_state {
|
||||
bool available;
|
||||
u32 state;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU_READ_MOSTLY(struct sbi_cpuidle_data, sbi_cpuidle_data);
|
||||
static DEFINE_PER_CPU(struct sbi_domain_state, domain_state);
|
||||
static bool sbi_cpuidle_use_osi;
|
||||
static bool sbi_cpuidle_use_cpuhp;
|
||||
static bool sbi_cpuidle_pd_allow_domain_state;
|
||||
|
||||
static inline void sbi_set_domain_state(u32 state)
|
||||
{
|
||||
struct sbi_domain_state *data = this_cpu_ptr(&domain_state);
|
||||
|
||||
data->available = true;
|
||||
data->state = state;
|
||||
}
|
||||
|
||||
static inline u32 sbi_get_domain_state(void)
|
||||
{
|
||||
struct sbi_domain_state *data = this_cpu_ptr(&domain_state);
|
||||
|
||||
return data->state;
|
||||
}
|
||||
|
||||
static inline void sbi_clear_domain_state(void)
|
||||
{
|
||||
struct sbi_domain_state *data = this_cpu_ptr(&domain_state);
|
||||
|
||||
data->available = false;
|
||||
}
|
||||
|
||||
static inline bool sbi_is_domain_state_available(void)
|
||||
{
|
||||
struct sbi_domain_state *data = this_cpu_ptr(&domain_state);
|
||||
|
||||
return data->available;
|
||||
}
|
||||
|
||||
static int sbi_suspend_finisher(unsigned long suspend_type,
|
||||
unsigned long resume_addr,
|
||||
unsigned long opaque)
|
||||
{
|
||||
struct sbiret ret;
|
||||
|
||||
ret = sbi_ecall(SBI_EXT_HSM, SBI_EXT_HSM_HART_SUSPEND,
|
||||
suspend_type, resume_addr, opaque, 0, 0, 0);
|
||||
|
||||
return (ret.error) ? sbi_err_map_linux_errno(ret.error) : 0;
|
||||
}
|
||||
|
||||
static int sbi_suspend(u32 state)
|
||||
{
|
||||
if (state & SBI_HSM_SUSP_NON_RET_BIT)
|
||||
return cpu_suspend(state, sbi_suspend_finisher);
|
||||
else
|
||||
return sbi_suspend_finisher(state, 0, 0);
|
||||
}
|
||||
|
||||
static int sbi_cpuidle_enter_state(struct cpuidle_device *dev,
|
||||
struct cpuidle_driver *drv, int idx)
|
||||
{
|
||||
u32 *states = __this_cpu_read(sbi_cpuidle_data.states);
|
||||
|
||||
return CPU_PM_CPU_IDLE_ENTER_PARAM(sbi_suspend, idx, states[idx]);
|
||||
}
|
||||
|
||||
static int __sbi_enter_domain_idle_state(struct cpuidle_device *dev,
|
||||
struct cpuidle_driver *drv, int idx,
|
||||
bool s2idle)
|
||||
{
|
||||
struct sbi_cpuidle_data *data = this_cpu_ptr(&sbi_cpuidle_data);
|
||||
u32 *states = data->states;
|
||||
struct device *pd_dev = data->dev;
|
||||
u32 state;
|
||||
int ret;
|
||||
|
||||
ret = cpu_pm_enter();
|
||||
if (ret)
|
||||
return -1;
|
||||
|
||||
/* Do runtime PM to manage a hierarchical CPU toplogy. */
|
||||
rcu_irq_enter_irqson();
|
||||
if (s2idle)
|
||||
dev_pm_genpd_suspend(pd_dev);
|
||||
else
|
||||
pm_runtime_put_sync_suspend(pd_dev);
|
||||
rcu_irq_exit_irqson();
|
||||
|
||||
if (sbi_is_domain_state_available())
|
||||
state = sbi_get_domain_state();
|
||||
else
|
||||
state = states[idx];
|
||||
|
||||
ret = sbi_suspend(state) ? -1 : idx;
|
||||
|
||||
rcu_irq_enter_irqson();
|
||||
if (s2idle)
|
||||
dev_pm_genpd_resume(pd_dev);
|
||||
else
|
||||
pm_runtime_get_sync(pd_dev);
|
||||
rcu_irq_exit_irqson();
|
||||
|
||||
cpu_pm_exit();
|
||||
|
||||
/* Clear the domain state to start fresh when back from idle. */
|
||||
sbi_clear_domain_state();
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int sbi_enter_domain_idle_state(struct cpuidle_device *dev,
|
||||
struct cpuidle_driver *drv, int idx)
|
||||
{
|
||||
return __sbi_enter_domain_idle_state(dev, drv, idx, false);
|
||||
}
|
||||
|
||||
static int sbi_enter_s2idle_domain_idle_state(struct cpuidle_device *dev,
|
||||
struct cpuidle_driver *drv,
|
||||
int idx)
|
||||
{
|
||||
return __sbi_enter_domain_idle_state(dev, drv, idx, true);
|
||||
}
|
||||
|
||||
static int sbi_cpuidle_cpuhp_up(unsigned int cpu)
|
||||
{
|
||||
struct device *pd_dev = __this_cpu_read(sbi_cpuidle_data.dev);
|
||||
|
||||
if (pd_dev)
|
||||
pm_runtime_get_sync(pd_dev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sbi_cpuidle_cpuhp_down(unsigned int cpu)
|
||||
{
|
||||
struct device *pd_dev = __this_cpu_read(sbi_cpuidle_data.dev);
|
||||
|
||||
if (pd_dev) {
|
||||
pm_runtime_put_sync(pd_dev);
|
||||
/* Clear domain state to start fresh at next online. */
|
||||
sbi_clear_domain_state();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void sbi_idle_init_cpuhp(void)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (!sbi_cpuidle_use_cpuhp)
|
||||
return;
|
||||
|
||||
err = cpuhp_setup_state_nocalls(CPUHP_AP_CPU_PM_STARTING,
|
||||
"cpuidle/sbi:online",
|
||||
sbi_cpuidle_cpuhp_up,
|
||||
sbi_cpuidle_cpuhp_down);
|
||||
if (err)
|
||||
pr_warn("Failed %d while setup cpuhp state\n", err);
|
||||
}
|
||||
|
||||
static const struct of_device_id sbi_cpuidle_state_match[] = {
|
||||
{ .compatible = "riscv,idle-state",
|
||||
.data = sbi_cpuidle_enter_state },
|
||||
{ },
|
||||
};
|
||||
|
||||
static bool sbi_suspend_state_is_valid(u32 state)
|
||||
{
|
||||
if (state > SBI_HSM_SUSPEND_RET_DEFAULT &&
|
||||
state < SBI_HSM_SUSPEND_RET_PLATFORM)
|
||||
return false;
|
||||
if (state > SBI_HSM_SUSPEND_NON_RET_DEFAULT &&
|
||||
state < SBI_HSM_SUSPEND_NON_RET_PLATFORM)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static int sbi_dt_parse_state_node(struct device_node *np, u32 *state)
|
||||
{
|
||||
int err = of_property_read_u32(np, "riscv,sbi-suspend-param", state);
|
||||
|
||||
if (err) {
|
||||
pr_warn("%pOF missing riscv,sbi-suspend-param property\n", np);
|
||||
return err;
|
||||
}
|
||||
|
||||
if (!sbi_suspend_state_is_valid(*state)) {
|
||||
pr_warn("Invalid SBI suspend state %#x\n", *state);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sbi_dt_cpu_init_topology(struct cpuidle_driver *drv,
|
||||
struct sbi_cpuidle_data *data,
|
||||
unsigned int state_count, int cpu)
|
||||
{
|
||||
/* Currently limit the hierarchical topology to be used in OSI mode. */
|
||||
if (!sbi_cpuidle_use_osi)
|
||||
return 0;
|
||||
|
||||
data->dev = dt_idle_attach_cpu(cpu, "sbi");
|
||||
if (IS_ERR_OR_NULL(data->dev))
|
||||
return PTR_ERR_OR_ZERO(data->dev);
|
||||
|
||||
/*
|
||||
* Using the deepest state for the CPU to trigger a potential selection
|
||||
* of a shared state for the domain, assumes the domain states are all
|
||||
* deeper states.
|
||||
*/
|
||||
drv->states[state_count - 1].enter = sbi_enter_domain_idle_state;
|
||||
drv->states[state_count - 1].enter_s2idle =
|
||||
sbi_enter_s2idle_domain_idle_state;
|
||||
sbi_cpuidle_use_cpuhp = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sbi_cpuidle_dt_init_states(struct device *dev,
|
||||
struct cpuidle_driver *drv,
|
||||
unsigned int cpu,
|
||||
unsigned int state_count)
|
||||
{
|
||||
struct sbi_cpuidle_data *data = per_cpu_ptr(&sbi_cpuidle_data, cpu);
|
||||
struct device_node *state_node;
|
||||
struct device_node *cpu_node;
|
||||
u32 *states;
|
||||
int i, ret;
|
||||
|
||||
cpu_node = of_cpu_device_node_get(cpu);
|
||||
if (!cpu_node)
|
||||
return -ENODEV;
|
||||
|
||||
states = devm_kcalloc(dev, state_count, sizeof(*states), GFP_KERNEL);
|
||||
if (!states) {
|
||||
ret = -ENOMEM;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* Parse SBI specific details from state DT nodes */
|
||||
for (i = 1; i < state_count; i++) {
|
||||
state_node = of_get_cpu_state_node(cpu_node, i - 1);
|
||||
if (!state_node)
|
||||
break;
|
||||
|
||||
ret = sbi_dt_parse_state_node(state_node, &states[i]);
|
||||
of_node_put(state_node);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
pr_debug("sbi-state %#x index %d\n", states[i], i);
|
||||
}
|
||||
if (i != state_count) {
|
||||
ret = -ENODEV;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* Initialize optional data, used for the hierarchical topology. */
|
||||
ret = sbi_dt_cpu_init_topology(drv, data, state_count, cpu);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
/* Store states in the per-cpu struct. */
|
||||
data->states = states;
|
||||
|
||||
fail:
|
||||
of_node_put(cpu_node);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void sbi_cpuidle_deinit_cpu(int cpu)
|
||||
{
|
||||
struct sbi_cpuidle_data *data = per_cpu_ptr(&sbi_cpuidle_data, cpu);
|
||||
|
||||
dt_idle_detach_cpu(data->dev);
|
||||
sbi_cpuidle_use_cpuhp = false;
|
||||
}
|
||||
|
||||
static int sbi_cpuidle_init_cpu(struct device *dev, int cpu)
|
||||
{
|
||||
struct cpuidle_driver *drv;
|
||||
unsigned int state_count = 0;
|
||||
int ret = 0;
|
||||
|
||||
drv = devm_kzalloc(dev, sizeof(*drv), GFP_KERNEL);
|
||||
if (!drv)
|
||||
return -ENOMEM;
|
||||
|
||||
drv->name = "sbi_cpuidle";
|
||||
drv->owner = THIS_MODULE;
|
||||
drv->cpumask = (struct cpumask *)cpumask_of(cpu);
|
||||
|
||||
/* RISC-V architectural WFI to be represented as state index 0. */
|
||||
drv->states[0].enter = sbi_cpuidle_enter_state;
|
||||
drv->states[0].exit_latency = 1;
|
||||
drv->states[0].target_residency = 1;
|
||||
drv->states[0].power_usage = UINT_MAX;
|
||||
strcpy(drv->states[0].name, "WFI");
|
||||
strcpy(drv->states[0].desc, "RISC-V WFI");
|
||||
|
||||
/*
|
||||
* If no DT idle states are detected (ret == 0) let the driver
|
||||
* initialization fail accordingly since there is no reason to
|
||||
* initialize the idle driver if only wfi is supported, the
|
||||
* default archictectural back-end already executes wfi
|
||||
* on idle entry.
|
||||
*/
|
||||
ret = dt_init_idle_driver(drv, sbi_cpuidle_state_match, 1);
|
||||
if (ret <= 0) {
|
||||
pr_debug("HART%ld: failed to parse DT idle states\n",
|
||||
cpuid_to_hartid_map(cpu));
|
||||
return ret ? : -ENODEV;
|
||||
}
|
||||
state_count = ret + 1; /* Include WFI state as well */
|
||||
|
||||
/* Initialize idle states from DT. */
|
||||
ret = sbi_cpuidle_dt_init_states(dev, drv, cpu, state_count);
|
||||
if (ret) {
|
||||
pr_err("HART%ld: failed to init idle states\n",
|
||||
cpuid_to_hartid_map(cpu));
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = cpuidle_register(drv, NULL);
|
||||
if (ret)
|
||||
goto deinit;
|
||||
|
||||
cpuidle_cooling_register(drv);
|
||||
|
||||
return 0;
|
||||
deinit:
|
||||
sbi_cpuidle_deinit_cpu(cpu);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void sbi_cpuidle_domain_sync_state(struct device *dev)
|
||||
{
|
||||
/*
|
||||
* All devices have now been attached/probed to the PM domain
|
||||
* topology, hence it's fine to allow domain states to be picked.
|
||||
*/
|
||||
sbi_cpuidle_pd_allow_domain_state = true;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DT_IDLE_GENPD
|
||||
|
||||
static int sbi_cpuidle_pd_power_off(struct generic_pm_domain *pd)
|
||||
{
|
||||
struct genpd_power_state *state = &pd->states[pd->state_idx];
|
||||
u32 *pd_state;
|
||||
|
||||
if (!state->data)
|
||||
return 0;
|
||||
|
||||
if (!sbi_cpuidle_pd_allow_domain_state)
|
||||
return -EBUSY;
|
||||
|
||||
/* OSI mode is enabled, set the corresponding domain state. */
|
||||
pd_state = state->data;
|
||||
sbi_set_domain_state(*pd_state);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct sbi_pd_provider {
|
||||
struct list_head link;
|
||||
struct device_node *node;
|
||||
};
|
||||
|
||||
static LIST_HEAD(sbi_pd_providers);
|
||||
|
||||
static int sbi_pd_init(struct device_node *np)
|
||||
{
|
||||
struct generic_pm_domain *pd;
|
||||
struct sbi_pd_provider *pd_provider;
|
||||
struct dev_power_governor *pd_gov;
|
||||
int ret = -ENOMEM, state_count = 0;
|
||||
|
||||
pd = dt_idle_pd_alloc(np, sbi_dt_parse_state_node);
|
||||
if (!pd)
|
||||
goto out;
|
||||
|
||||
pd_provider = kzalloc(sizeof(*pd_provider), GFP_KERNEL);
|
||||
if (!pd_provider)
|
||||
goto free_pd;
|
||||
|
||||
pd->flags |= GENPD_FLAG_IRQ_SAFE | GENPD_FLAG_CPU_DOMAIN;
|
||||
|
||||
/* Allow power off when OSI is available. */
|
||||
if (sbi_cpuidle_use_osi)
|
||||
pd->power_off = sbi_cpuidle_pd_power_off;
|
||||
else
|
||||
pd->flags |= GENPD_FLAG_ALWAYS_ON;
|
||||
|
||||
/* Use governor for CPU PM domains if it has some states to manage. */
|
||||
pd_gov = state_count > 0 ? &pm_domain_cpu_gov : NULL;
|
||||
|
||||
ret = pm_genpd_init(pd, pd_gov, false);
|
||||
if (ret)
|
||||
goto free_pd_prov;
|
||||
|
||||
ret = of_genpd_add_provider_simple(np, pd);
|
||||
if (ret)
|
||||
goto remove_pd;
|
||||
|
||||
pd_provider->node = of_node_get(np);
|
||||
list_add(&pd_provider->link, &sbi_pd_providers);
|
||||
|
||||
pr_debug("init PM domain %s\n", pd->name);
|
||||
return 0;
|
||||
|
||||
remove_pd:
|
||||
pm_genpd_remove(pd);
|
||||
free_pd_prov:
|
||||
kfree(pd_provider);
|
||||
free_pd:
|
||||
dt_idle_pd_free(pd);
|
||||
out:
|
||||
pr_err("failed to init PM domain ret=%d %pOF\n", ret, np);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void sbi_pd_remove(void)
|
||||
{
|
||||
struct sbi_pd_provider *pd_provider, *it;
|
||||
struct generic_pm_domain *genpd;
|
||||
|
||||
list_for_each_entry_safe(pd_provider, it, &sbi_pd_providers, link) {
|
||||
of_genpd_del_provider(pd_provider->node);
|
||||
|
||||
genpd = of_genpd_remove_last(pd_provider->node);
|
||||
if (!IS_ERR(genpd))
|
||||
kfree(genpd);
|
||||
|
||||
of_node_put(pd_provider->node);
|
||||
list_del(&pd_provider->link);
|
||||
kfree(pd_provider);
|
||||
}
|
||||
}
|
||||
|
||||
static int sbi_genpd_probe(struct device_node *np)
|
||||
{
|
||||
struct device_node *node;
|
||||
int ret = 0, pd_count = 0;
|
||||
|
||||
if (!np)
|
||||
return -ENODEV;
|
||||
|
||||
/*
|
||||
* Parse child nodes for the "#power-domain-cells" property and
|
||||
* initialize a genpd/genpd-of-provider pair when it's found.
|
||||
*/
|
||||
for_each_child_of_node(np, node) {
|
||||
if (!of_find_property(node, "#power-domain-cells", NULL))
|
||||
continue;
|
||||
|
||||
ret = sbi_pd_init(node);
|
||||
if (ret)
|
||||
goto put_node;
|
||||
|
||||
pd_count++;
|
||||
}
|
||||
|
||||
/* Bail out if not using the hierarchical CPU topology. */
|
||||
if (!pd_count)
|
||||
goto no_pd;
|
||||
|
||||
/* Link genpd masters/subdomains to model the CPU topology. */
|
||||
ret = dt_idle_pd_init_topology(np);
|
||||
if (ret)
|
||||
goto remove_pd;
|
||||
|
||||
return 0;
|
||||
|
||||
put_node:
|
||||
of_node_put(node);
|
||||
remove_pd:
|
||||
sbi_pd_remove();
|
||||
pr_err("failed to create CPU PM domains ret=%d\n", ret);
|
||||
no_pd:
|
||||
return ret;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline int sbi_genpd_probe(struct device_node *np)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static int sbi_cpuidle_probe(struct platform_device *pdev)
|
||||
{
|
||||
int cpu, ret;
|
||||
struct cpuidle_driver *drv;
|
||||
struct cpuidle_device *dev;
|
||||
struct device_node *np, *pds_node;
|
||||
|
||||
/* Detect OSI support based on CPU DT nodes */
|
||||
sbi_cpuidle_use_osi = true;
|
||||
for_each_possible_cpu(cpu) {
|
||||
np = of_cpu_device_node_get(cpu);
|
||||
if (np &&
|
||||
of_find_property(np, "power-domains", NULL) &&
|
||||
of_find_property(np, "power-domain-names", NULL)) {
|
||||
continue;
|
||||
} else {
|
||||
sbi_cpuidle_use_osi = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Populate generic power domains from DT nodes */
|
||||
pds_node = of_find_node_by_path("/cpus/power-domains");
|
||||
if (pds_node) {
|
||||
ret = sbi_genpd_probe(pds_node);
|
||||
of_node_put(pds_node);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Initialize CPU idle driver for each CPU */
|
||||
for_each_possible_cpu(cpu) {
|
||||
ret = sbi_cpuidle_init_cpu(&pdev->dev, cpu);
|
||||
if (ret) {
|
||||
pr_debug("HART%ld: idle driver init failed\n",
|
||||
cpuid_to_hartid_map(cpu));
|
||||
goto out_fail;
|
||||
}
|
||||
}
|
||||
|
||||
/* Setup CPU hotplut notifiers */
|
||||
sbi_idle_init_cpuhp();
|
||||
|
||||
pr_info("idle driver registered for all CPUs\n");
|
||||
|
||||
return 0;
|
||||
|
||||
out_fail:
|
||||
while (--cpu >= 0) {
|
||||
dev = per_cpu(cpuidle_devices, cpu);
|
||||
drv = cpuidle_get_cpu_driver(dev);
|
||||
cpuidle_unregister(drv);
|
||||
sbi_cpuidle_deinit_cpu(cpu);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct platform_driver sbi_cpuidle_driver = {
|
||||
.probe = sbi_cpuidle_probe,
|
||||
.driver = {
|
||||
.name = "sbi-cpuidle",
|
||||
.sync_state = sbi_cpuidle_domain_sync_state,
|
||||
},
|
||||
};
|
||||
|
||||
static int __init sbi_cpuidle_init(void)
|
||||
{
|
||||
int ret;
|
||||
struct platform_device *pdev;
|
||||
|
||||
/*
|
||||
* The SBI HSM suspend function is only available when:
|
||||
* 1) SBI version is 0.3 or higher
|
||||
* 2) SBI HSM extension is available
|
||||
*/
|
||||
if ((sbi_spec_version < sbi_mk_version(0, 3)) ||
|
||||
sbi_probe_extension(SBI_EXT_HSM) <= 0) {
|
||||
pr_info("HSM suspend not available\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = platform_driver_register(&sbi_cpuidle_driver);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
pdev = platform_device_register_simple("sbi-cpuidle",
|
||||
-1, NULL, 0);
|
||||
if (IS_ERR(pdev)) {
|
||||
platform_driver_unregister(&sbi_cpuidle_driver);
|
||||
return PTR_ERR(pdev);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
device_initcall(sbi_cpuidle_init);
|
||||
178
drivers/cpuidle/dt_idle_genpd.c
Normal file
178
drivers/cpuidle/dt_idle_genpd.c
Normal file
@@ -0,0 +1,178 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* PM domains for CPUs via genpd.
|
||||
*
|
||||
* Copyright (C) 2019 Linaro Ltd.
|
||||
* Author: Ulf Hansson <ulf.hansson@linaro.org>
|
||||
*
|
||||
* Copyright (c) 2021 Western Digital Corporation or its affiliates.
|
||||
* Copyright (c) 2022 Ventana Micro Systems Inc.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "dt-idle-genpd: " fmt
|
||||
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/pm_domain.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
#include "dt_idle_genpd.h"
|
||||
|
||||
static int pd_parse_state_nodes(
|
||||
int (*parse_state)(struct device_node *, u32 *),
|
||||
struct genpd_power_state *states, int state_count)
|
||||
{
|
||||
int i, ret;
|
||||
u32 state, *state_buf;
|
||||
|
||||
for (i = 0; i < state_count; i++) {
|
||||
ret = parse_state(to_of_node(states[i].fwnode), &state);
|
||||
if (ret)
|
||||
goto free_state;
|
||||
|
||||
state_buf = kmalloc(sizeof(u32), GFP_KERNEL);
|
||||
if (!state_buf) {
|
||||
ret = -ENOMEM;
|
||||
goto free_state;
|
||||
}
|
||||
*state_buf = state;
|
||||
states[i].data = state_buf;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
free_state:
|
||||
i--;
|
||||
for (; i >= 0; i--)
|
||||
kfree(states[i].data);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int pd_parse_states(struct device_node *np,
|
||||
int (*parse_state)(struct device_node *, u32 *),
|
||||
struct genpd_power_state **states,
|
||||
int *state_count)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* Parse the domain idle states. */
|
||||
ret = of_genpd_parse_idle_states(np, states, state_count);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Fill out the dt specifics for each found state. */
|
||||
ret = pd_parse_state_nodes(parse_state, *states, *state_count);
|
||||
if (ret)
|
||||
kfree(*states);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void pd_free_states(struct genpd_power_state *states,
|
||||
unsigned int state_count)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < state_count; i++)
|
||||
kfree(states[i].data);
|
||||
kfree(states);
|
||||
}
|
||||
|
||||
void dt_idle_pd_free(struct generic_pm_domain *pd)
|
||||
{
|
||||
pd_free_states(pd->states, pd->state_count);
|
||||
kfree(pd->name);
|
||||
kfree(pd);
|
||||
}
|
||||
|
||||
struct generic_pm_domain *dt_idle_pd_alloc(struct device_node *np,
|
||||
int (*parse_state)(struct device_node *, u32 *))
|
||||
{
|
||||
struct generic_pm_domain *pd;
|
||||
struct genpd_power_state *states = NULL;
|
||||
int ret, state_count = 0;
|
||||
|
||||
pd = kzalloc(sizeof(*pd), GFP_KERNEL);
|
||||
if (!pd)
|
||||
goto out;
|
||||
|
||||
pd->name = kasprintf(GFP_KERNEL, "%pOF", np);
|
||||
if (!pd->name)
|
||||
goto free_pd;
|
||||
|
||||
/*
|
||||
* Parse the domain idle states and let genpd manage the state selection
|
||||
* for those being compatible with "domain-idle-state".
|
||||
*/
|
||||
ret = pd_parse_states(np, parse_state, &states, &state_count);
|
||||
if (ret)
|
||||
goto free_name;
|
||||
|
||||
pd->free_states = pd_free_states;
|
||||
pd->name = kbasename(pd->name);
|
||||
pd->states = states;
|
||||
pd->state_count = state_count;
|
||||
|
||||
pr_debug("alloc PM domain %s\n", pd->name);
|
||||
return pd;
|
||||
|
||||
free_name:
|
||||
kfree(pd->name);
|
||||
free_pd:
|
||||
kfree(pd);
|
||||
out:
|
||||
pr_err("failed to alloc PM domain %pOF\n", np);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int dt_idle_pd_init_topology(struct device_node *np)
|
||||
{
|
||||
struct device_node *node;
|
||||
struct of_phandle_args child, parent;
|
||||
int ret;
|
||||
|
||||
for_each_child_of_node(np, node) {
|
||||
if (of_parse_phandle_with_args(node, "power-domains",
|
||||
"#power-domain-cells", 0, &parent))
|
||||
continue;
|
||||
|
||||
child.np = node;
|
||||
child.args_count = 0;
|
||||
ret = of_genpd_add_subdomain(&parent, &child);
|
||||
of_node_put(parent.np);
|
||||
if (ret) {
|
||||
of_node_put(node);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct device *dt_idle_attach_cpu(int cpu, const char *name)
|
||||
{
|
||||
struct device *dev;
|
||||
|
||||
dev = dev_pm_domain_attach_by_name(get_cpu_device(cpu), name);
|
||||
if (IS_ERR_OR_NULL(dev))
|
||||
return dev;
|
||||
|
||||
pm_runtime_irq_safe(dev);
|
||||
if (cpu_online(cpu))
|
||||
pm_runtime_get_sync(dev);
|
||||
|
||||
dev_pm_syscore_device(dev, true);
|
||||
|
||||
return dev;
|
||||
}
|
||||
|
||||
void dt_idle_detach_cpu(struct device *dev)
|
||||
{
|
||||
if (IS_ERR_OR_NULL(dev))
|
||||
return;
|
||||
|
||||
dev_pm_domain_detach(dev, false);
|
||||
}
|
||||
50
drivers/cpuidle/dt_idle_genpd.h
Normal file
50
drivers/cpuidle/dt_idle_genpd.h
Normal file
@@ -0,0 +1,50 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __DT_IDLE_GENPD
|
||||
#define __DT_IDLE_GENPD
|
||||
|
||||
struct device_node;
|
||||
struct generic_pm_domain;
|
||||
|
||||
#ifdef CONFIG_DT_IDLE_GENPD
|
||||
|
||||
void dt_idle_pd_free(struct generic_pm_domain *pd);
|
||||
|
||||
struct generic_pm_domain *dt_idle_pd_alloc(struct device_node *np,
|
||||
int (*parse_state)(struct device_node *, u32 *));
|
||||
|
||||
int dt_idle_pd_init_topology(struct device_node *np);
|
||||
|
||||
struct device *dt_idle_attach_cpu(int cpu, const char *name);
|
||||
|
||||
void dt_idle_detach_cpu(struct device *dev);
|
||||
|
||||
#else
|
||||
|
||||
static inline void dt_idle_pd_free(struct generic_pm_domain *pd)
|
||||
{
|
||||
}
|
||||
|
||||
static inline struct generic_pm_domain *dt_idle_pd_alloc(
|
||||
struct device_node *np,
|
||||
int (*parse_state)(struct device_node *, u32 *))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline int dt_idle_pd_init_topology(struct device_node *np)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline struct device *dt_idle_attach_cpu(int cpu, const char *name)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void dt_idle_detach_cpu(struct device *dev)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -745,9 +745,7 @@ sclp_sync_wait(void)
|
||||
/* Loop until driver state indicates finished request */
|
||||
while (sclp_running_state != sclp_running_state_idle) {
|
||||
/* Check for expired request timer */
|
||||
if (timer_pending(&sclp_request_timer) &&
|
||||
get_tod_clock_fast() > timeout &&
|
||||
del_timer(&sclp_request_timer))
|
||||
if (get_tod_clock_fast() > timeout && del_timer(&sclp_request_timer))
|
||||
sclp_request_timer.function(&sclp_request_timer);
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
@@ -109,8 +109,7 @@ static void sclp_console_sync_queue(void)
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&sclp_con_lock, flags);
|
||||
if (timer_pending(&sclp_con_timer))
|
||||
del_timer(&sclp_con_timer);
|
||||
del_timer(&sclp_con_timer);
|
||||
while (sclp_con_queue_running) {
|
||||
spin_unlock_irqrestore(&sclp_con_lock, flags);
|
||||
sclp_sync_wait();
|
||||
|
||||
@@ -231,8 +231,7 @@ sclp_vt220_emit_current(void)
|
||||
list_add_tail(&sclp_vt220_current_request->list,
|
||||
&sclp_vt220_outqueue);
|
||||
sclp_vt220_current_request = NULL;
|
||||
if (timer_pending(&sclp_vt220_timer))
|
||||
del_timer(&sclp_vt220_timer);
|
||||
del_timer(&sclp_vt220_timer);
|
||||
}
|
||||
sclp_vt220_flush_later = 0;
|
||||
}
|
||||
@@ -776,8 +775,7 @@ static void __sclp_vt220_flush_buffer(void)
|
||||
|
||||
sclp_vt220_emit_current();
|
||||
spin_lock_irqsave(&sclp_vt220_lock, flags);
|
||||
if (timer_pending(&sclp_vt220_timer))
|
||||
del_timer(&sclp_vt220_timer);
|
||||
del_timer(&sclp_vt220_timer);
|
||||
while (sclp_vt220_queue_running) {
|
||||
spin_unlock_irqrestore(&sclp_vt220_lock, flags);
|
||||
sclp_sync_wait();
|
||||
|
||||
@@ -354,10 +354,10 @@ tape_34xx_unit_check(struct tape_device *device, struct tape_request *request,
|
||||
if ((
|
||||
sense[0] == SENSE_DATA_CHECK ||
|
||||
sense[0] == SENSE_EQUIPMENT_CHECK ||
|
||||
sense[0] == SENSE_EQUIPMENT_CHECK + SENSE_DEFERRED_UNIT_CHECK
|
||||
sense[0] == (SENSE_EQUIPMENT_CHECK | SENSE_DEFERRED_UNIT_CHECK)
|
||||
) && (
|
||||
sense[1] == SENSE_DRIVE_ONLINE ||
|
||||
sense[1] == SENSE_BEGINNING_OF_TAPE + SENSE_WRITE_MODE
|
||||
sense[1] == (SENSE_BEGINNING_OF_TAPE | SENSE_WRITE_MODE)
|
||||
)) {
|
||||
switch (request->op) {
|
||||
/*
|
||||
|
||||
@@ -113,16 +113,10 @@ ccw_device_timeout(struct timer_list *t)
|
||||
void
|
||||
ccw_device_set_timeout(struct ccw_device *cdev, int expires)
|
||||
{
|
||||
if (expires == 0) {
|
||||
if (expires == 0)
|
||||
del_timer(&cdev->private->timer);
|
||||
return;
|
||||
}
|
||||
if (timer_pending(&cdev->private->timer)) {
|
||||
if (mod_timer(&cdev->private->timer, jiffies + expires))
|
||||
return;
|
||||
}
|
||||
cdev->private->timer.expires = jiffies + expires;
|
||||
add_timer(&cdev->private->timer);
|
||||
else
|
||||
mod_timer(&cdev->private->timer, jiffies + expires);
|
||||
}
|
||||
|
||||
int
|
||||
|
||||
@@ -112,16 +112,10 @@ static void eadm_subchannel_set_timeout(struct subchannel *sch, int expires)
|
||||
{
|
||||
struct eadm_private *private = get_eadm_private(sch);
|
||||
|
||||
if (expires == 0) {
|
||||
if (expires == 0)
|
||||
del_timer(&private->timer);
|
||||
return;
|
||||
}
|
||||
if (timer_pending(&private->timer)) {
|
||||
if (mod_timer(&private->timer, jiffies + expires))
|
||||
return;
|
||||
}
|
||||
private->timer.expires = jiffies + expires;
|
||||
add_timer(&private->timer);
|
||||
else
|
||||
mod_timer(&private->timer, jiffies + expires);
|
||||
}
|
||||
|
||||
static void eadm_subchannel_irq(struct subchannel *sch)
|
||||
|
||||
@@ -315,6 +315,7 @@ struct ap_perms {
|
||||
unsigned long ioctlm[BITS_TO_LONGS(AP_IOCTLS)];
|
||||
unsigned long apm[BITS_TO_LONGS(AP_DEVICES)];
|
||||
unsigned long aqm[BITS_TO_LONGS(AP_DOMAINS)];
|
||||
unsigned long adm[BITS_TO_LONGS(AP_DOMAINS)];
|
||||
};
|
||||
extern struct ap_perms ap_perms;
|
||||
extern struct mutex ap_perms_mutex;
|
||||
|
||||
@@ -155,7 +155,7 @@ static int pkey_skey2pkey(const u8 *key, struct pkey_protkey *pkey)
|
||||
/*
|
||||
* The cca_xxx2protkey call may fail when a card has been
|
||||
* addressed where the master key was changed after last fetch
|
||||
* of the mkvp into the cache. Try 3 times: First witout verify
|
||||
* of the mkvp into the cache. Try 3 times: First without verify
|
||||
* then with verify and last round with verify and old master
|
||||
* key verification pattern match not ignored.
|
||||
*/
|
||||
|
||||
@@ -1189,13 +1189,6 @@ static const struct attribute_group *vfio_ap_mdev_attr_groups[] = {
|
||||
* @matrix_mdev: a mediated matrix device
|
||||
* @kvm: reference to KVM instance
|
||||
*
|
||||
* Note: The matrix_dev->lock must be taken prior to calling
|
||||
* this function; however, the lock will be temporarily released while the
|
||||
* guest's AP configuration is set to avoid a potential lockdep splat.
|
||||
* The kvm->lock is taken to set the guest's AP configuration which, under
|
||||
* certain circumstances, will result in a circular lock dependency if this is
|
||||
* done under the @matrix_mdev->lock.
|
||||
*
|
||||
* Return: 0 if no other mediated matrix device has a reference to @kvm;
|
||||
* otherwise, returns an -EPERM.
|
||||
*/
|
||||
@@ -1269,18 +1262,11 @@ static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
|
||||
* by @matrix_mdev.
|
||||
*
|
||||
* @matrix_mdev: a matrix mediated device
|
||||
* @kvm: the pointer to the kvm structure being unset.
|
||||
*
|
||||
* Note: The matrix_dev->lock must be taken prior to calling
|
||||
* this function; however, the lock will be temporarily released while the
|
||||
* guest's AP configuration is cleared to avoid a potential lockdep splat.
|
||||
* The kvm->lock is taken to clear the guest's AP configuration which, under
|
||||
* certain circumstances, will result in a circular lock dependency if this is
|
||||
* done under the @matrix_mdev->lock.
|
||||
*/
|
||||
static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev,
|
||||
struct kvm *kvm)
|
||||
static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
|
||||
{
|
||||
struct kvm *kvm = matrix_mdev->kvm;
|
||||
|
||||
if (kvm && kvm->arch.crypto.crycbd) {
|
||||
down_write(&kvm->arch.crypto.pqap_hook_rwsem);
|
||||
kvm->arch.crypto.pqap_hook = NULL;
|
||||
@@ -1311,7 +1297,7 @@ static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
|
||||
matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
|
||||
|
||||
if (!data)
|
||||
vfio_ap_mdev_unset_kvm(matrix_mdev, matrix_mdev->kvm);
|
||||
vfio_ap_mdev_unset_kvm(matrix_mdev);
|
||||
else if (vfio_ap_mdev_set_kvm(matrix_mdev, data))
|
||||
notify_rc = NOTIFY_DONE;
|
||||
|
||||
@@ -1448,7 +1434,7 @@ static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
|
||||
&matrix_mdev->iommu_notifier);
|
||||
vfio_unregister_notifier(vdev->dev, VFIO_GROUP_NOTIFY,
|
||||
&matrix_mdev->group_notifier);
|
||||
vfio_ap_mdev_unset_kvm(matrix_mdev, matrix_mdev->kvm);
|
||||
vfio_ap_mdev_unset_kvm(matrix_mdev);
|
||||
}
|
||||
|
||||
static int vfio_ap_mdev_get_device_info(unsigned long arg)
|
||||
|
||||
@@ -285,10 +285,53 @@ static ssize_t aqmask_store(struct device *dev,
|
||||
|
||||
static DEVICE_ATTR_RW(aqmask);
|
||||
|
||||
static ssize_t admask_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
int i, rc;
|
||||
struct zcdn_device *zcdndev = to_zcdn_dev(dev);
|
||||
|
||||
if (mutex_lock_interruptible(&ap_perms_mutex))
|
||||
return -ERESTARTSYS;
|
||||
|
||||
buf[0] = '0';
|
||||
buf[1] = 'x';
|
||||
for (i = 0; i < sizeof(zcdndev->perms.adm) / sizeof(long); i++)
|
||||
snprintf(buf + 2 + 2 * i * sizeof(long),
|
||||
PAGE_SIZE - 2 - 2 * i * sizeof(long),
|
||||
"%016lx", zcdndev->perms.adm[i]);
|
||||
buf[2 + 2 * i * sizeof(long)] = '\n';
|
||||
buf[2 + 2 * i * sizeof(long) + 1] = '\0';
|
||||
rc = 2 + 2 * i * sizeof(long) + 1;
|
||||
|
||||
mutex_unlock(&ap_perms_mutex);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static ssize_t admask_store(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
int rc;
|
||||
struct zcdn_device *zcdndev = to_zcdn_dev(dev);
|
||||
|
||||
rc = ap_parse_mask_str(buf, zcdndev->perms.adm,
|
||||
AP_DOMAINS, &ap_perms_mutex);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR_RW(admask);
|
||||
|
||||
static struct attribute *zcdn_dev_attrs[] = {
|
||||
&dev_attr_ioctlmask.attr,
|
||||
&dev_attr_apmask.attr,
|
||||
&dev_attr_aqmask.attr,
|
||||
&dev_attr_admask.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
@@ -880,11 +923,22 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms,
|
||||
if (rc)
|
||||
goto out;
|
||||
|
||||
tdom = *domain;
|
||||
if (perms != &ap_perms && tdom < AP_DOMAINS) {
|
||||
if (ap_msg.flags & AP_MSG_FLAG_ADMIN) {
|
||||
if (!test_bit_inv(tdom, perms->adm)) {
|
||||
rc = -ENODEV;
|
||||
goto out;
|
||||
}
|
||||
} else if ((ap_msg.flags & AP_MSG_FLAG_USAGE) == 0) {
|
||||
rc = -EOPNOTSUPP;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* If a valid target domain is set and this domain is NOT a usage
|
||||
* domain but a control only domain, autoselect target domain.
|
||||
*/
|
||||
tdom = *domain;
|
||||
if (tdom < AP_DOMAINS &&
|
||||
!ap_test_config_usage_domain(tdom) &&
|
||||
ap_test_config_ctrl_domain(tdom))
|
||||
@@ -1062,6 +1116,18 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms,
|
||||
if (rc)
|
||||
goto out_free;
|
||||
|
||||
if (perms != &ap_perms && domain < AUTOSEL_DOM) {
|
||||
if (ap_msg.flags & AP_MSG_FLAG_ADMIN) {
|
||||
if (!test_bit_inv(domain, perms->adm)) {
|
||||
rc = -ENODEV;
|
||||
goto out_free;
|
||||
}
|
||||
} else if ((ap_msg.flags & AP_MSG_FLAG_USAGE) == 0) {
|
||||
rc = -EOPNOTSUPP;
|
||||
goto out_free;
|
||||
}
|
||||
}
|
||||
|
||||
pref_zc = NULL;
|
||||
pref_zq = NULL;
|
||||
spin_lock(&zcrypt_list_lock);
|
||||
|
||||
@@ -90,7 +90,7 @@ static ssize_t online_store(struct device *dev,
|
||||
list_for_each_entry(zq, &zc->zqueues, list)
|
||||
maxzqs++;
|
||||
if (maxzqs > 0)
|
||||
zq_uelist = kcalloc(maxzqs + 1, sizeof(zq), GFP_ATOMIC);
|
||||
zq_uelist = kcalloc(maxzqs + 1, sizeof(*zq_uelist), GFP_ATOMIC);
|
||||
list_for_each_entry(zq, &zc->zqueues, list)
|
||||
if (zcrypt_queue_force_online(zq, online))
|
||||
if (zq_uelist) {
|
||||
|
||||
@@ -1109,7 +1109,7 @@ static int ep11_wrapkey(u16 card, u16 domain,
|
||||
if (kb->head.type == TOKTYPE_NON_CCA &&
|
||||
kb->head.version == TOKVER_EP11_AES) {
|
||||
has_header = true;
|
||||
keysize = kb->head.len < keysize ? kb->head.len : keysize;
|
||||
keysize = min_t(size_t, kb->head.len, keysize);
|
||||
}
|
||||
|
||||
/* request cprb and payload */
|
||||
|
||||
@@ -8296,7 +8296,7 @@ static void btrfs_invalidate_folio(struct folio *folio, size_t offset,
|
||||
* cover the full folio, like invalidating the last folio, we're
|
||||
* still safe to wait for ordered extent to finish.
|
||||
*/
|
||||
if (!(offset == 0 && length == PAGE_SIZE)) {
|
||||
if (!(offset == 0 && length == folio_size(folio))) {
|
||||
btrfs_releasepage(&folio->page, GFP_NOFS);
|
||||
return;
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user