mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-06 19:08:57 +09:00
Merge 6661224e66 ("Merge tag 'unicode-for-next-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/krisman/unicode") into android-mainline
Steps on the way to 5.17-rc1 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: I15f33e57d4d97cdfcbbd3245c05bd85e8751de20
This commit is contained in:
2
.mailmap
2
.mailmap
@@ -49,10 +49,12 @@ Andy Adamson <andros@citi.umich.edu>
|
||||
Antoine Tenart <atenart@kernel.org> <antoine.tenart@bootlin.com>
|
||||
Antoine Tenart <atenart@kernel.org> <antoine.tenart@free-electrons.com>
|
||||
Antonio Ospite <ao2@ao2.it> <ao2@amarulasolutions.com>
|
||||
Anup Patel <anup@brainfault.org> <anup.patel@wdc.com>
|
||||
Archit Taneja <archit@ti.com>
|
||||
Ard Biesheuvel <ardb@kernel.org> <ard.biesheuvel@linaro.org>
|
||||
Arnaud Patard <arnaud.patard@rtp-net.org>
|
||||
Arnd Bergmann <arnd@arndb.de>
|
||||
Atish Patra <atishp@atishpatra.org> <atish.patra@wdc.com>
|
||||
Axel Dyks <xl@xlsigned.net>
|
||||
Axel Lin <axel.lin@gmail.com>
|
||||
Bart Van Assche <bvanassche@acm.org> <bart.vanassche@sandisk.com>
|
||||
|
||||
@@ -10,6 +10,9 @@ title: Amlogic specific extensions to the Synopsys Designware HDMI Controller
|
||||
maintainers:
|
||||
- Neil Armstrong <narmstrong@baylibre.com>
|
||||
|
||||
allOf:
|
||||
- $ref: /schemas/sound/name-prefix.yaml#
|
||||
|
||||
description: |
|
||||
The Amlogic Meson Synopsys Designware Integration is composed of
|
||||
- A Synopsys DesignWare HDMI Controller IP
|
||||
@@ -99,6 +102,8 @@ properties:
|
||||
"#sound-dai-cells":
|
||||
const: 0
|
||||
|
||||
sound-name-prefix: true
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
|
||||
@@ -78,6 +78,10 @@ properties:
|
||||
interrupts:
|
||||
maxItems: 1
|
||||
|
||||
amlogic,canvas:
|
||||
description: should point to a canvas provider node
|
||||
$ref: /schemas/types.yaml#/definitions/phandle
|
||||
|
||||
power-domains:
|
||||
maxItems: 1
|
||||
description: phandle to the associated power domain
|
||||
@@ -106,6 +110,7 @@ required:
|
||||
- port@1
|
||||
- "#address-cells"
|
||||
- "#size-cells"
|
||||
- amlogic,canvas
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
@@ -118,6 +123,7 @@ examples:
|
||||
interrupts = <3>;
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
amlogic,canvas = <&canvas>;
|
||||
|
||||
/* CVBS VDAC output port */
|
||||
port@0 {
|
||||
|
||||
@@ -146,11 +146,15 @@ examples:
|
||||
#address-cells = <3>;
|
||||
#size-cells = <2>;
|
||||
#interrupt-cells = <1>;
|
||||
interrupts = <GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>,
|
||||
interrupts = <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>,
|
||||
<GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>;
|
||||
interrupt-names = "pcie", "msi";
|
||||
interrupt-map-mask = <0x0 0x0 0x0 0x7>;
|
||||
interrupt-map = <0 0 0 1 &gicv2 GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>;
|
||||
interrupt-map = <0 0 0 1 &gicv2 GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH
|
||||
0 0 0 2 &gicv2 GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH
|
||||
0 0 0 3 &gicv2 GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH
|
||||
0 0 0 4 &gicv2 GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH>;
|
||||
|
||||
msi-parent = <&pcie0>;
|
||||
msi-controller;
|
||||
ranges = <0x02000000 0x0 0xf8000000 0x6 0x00000000 0x0 0x04000000>;
|
||||
@@ -158,5 +162,24 @@ examples:
|
||||
<0x42000000 0x1 0x80000000 0x3 0x00000000 0x0 0x80000000>;
|
||||
brcm,enable-ssc;
|
||||
brcm,scb-sizes = <0x0000000080000000 0x0000000080000000>;
|
||||
|
||||
/* PCIe bridge, Root Port */
|
||||
pci@0,0 {
|
||||
#address-cells = <3>;
|
||||
#size-cells = <2>;
|
||||
reg = <0x0 0x0 0x0 0x0 0x0>;
|
||||
compatible = "pciclass,0604";
|
||||
device_type = "pci";
|
||||
vpcie3v3-supply = <&vreg7>;
|
||||
ranges;
|
||||
|
||||
/* PCIe endpoint */
|
||||
pci-ep@0,0 {
|
||||
assigned-addresses =
|
||||
<0x82010000 0x0 0xf8000000 0x6 0x00000000 0x0 0x2000>;
|
||||
reg = <0x0 0x0 0x0 0x0 0x0>;
|
||||
compatible = "pci14e4,1688";
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
@@ -127,6 +127,12 @@ properties:
|
||||
enum: [1, 2, 3, 4]
|
||||
default: 1
|
||||
|
||||
phys:
|
||||
maxItems: 1
|
||||
|
||||
phy-names:
|
||||
const: pcie-phy
|
||||
|
||||
reset-gpio:
|
||||
description: Should specify the GPIO for controlling the PCI bus device
|
||||
reset signal. It's not polarity aware and defaults to active-low reset
|
||||
|
||||
@@ -32,8 +32,12 @@ properties:
|
||||
maxItems: 1
|
||||
|
||||
ti,syscon-pcie-mode:
|
||||
$ref: /schemas/types.yaml#/definitions/phandle-array
|
||||
items:
|
||||
- items:
|
||||
- description: Phandle to the SYSCON entry
|
||||
- description: pcie_ctrl register offset within SYSCON
|
||||
description: Phandle to the SYSCON entry required for configuring PCIe in RC or EP mode.
|
||||
$ref: /schemas/types.yaml#/definitions/phandle
|
||||
|
||||
interrupts:
|
||||
minItems: 1
|
||||
@@ -65,7 +69,7 @@ examples:
|
||||
<0x5506000 0x1000>;
|
||||
reg-names = "app", "dbics", "addr_space", "atu";
|
||||
power-domains = <&k3_pds 120 TI_SCI_PD_EXCLUSIVE>;
|
||||
ti,syscon-pcie-mode = <&pcie0_mode>;
|
||||
ti,syscon-pcie-mode = <&scm_conf 0x4060>;
|
||||
max-link-speed = <2>;
|
||||
dma-coherent;
|
||||
interrupts = <GIC_SPI 340 IRQ_TYPE_EDGE_RISING>;
|
||||
|
||||
@@ -36,12 +36,20 @@ properties:
|
||||
maxItems: 1
|
||||
|
||||
ti,syscon-pcie-id:
|
||||
$ref: /schemas/types.yaml#/definitions/phandle-array
|
||||
items:
|
||||
- items:
|
||||
- description: Phandle to the SYSCON entry
|
||||
- description: pcie_device_id register offset within SYSCON
|
||||
description: Phandle to the SYSCON entry required for getting PCIe device/vendor ID
|
||||
$ref: /schemas/types.yaml#/definitions/phandle
|
||||
|
||||
ti,syscon-pcie-mode:
|
||||
$ref: /schemas/types.yaml#/definitions/phandle-array
|
||||
items:
|
||||
- items:
|
||||
- description: Phandle to the SYSCON entry
|
||||
- description: pcie_ctrl register offset within SYSCON
|
||||
description: Phandle to the SYSCON entry required for configuring PCIe in RC or EP mode.
|
||||
$ref: /schemas/types.yaml#/definitions/phandle
|
||||
|
||||
msi-map: true
|
||||
|
||||
@@ -87,8 +95,8 @@ examples:
|
||||
#size-cells = <2>;
|
||||
ranges = <0x81000000 0 0 0x10020000 0 0x00010000>,
|
||||
<0x82000000 0 0x10030000 0x10030000 0 0x07FD0000>;
|
||||
ti,syscon-pcie-id = <&pcie_devid>;
|
||||
ti,syscon-pcie-mode = <&pcie0_mode>;
|
||||
ti,syscon-pcie-id = <&scm_conf 0x0210>;
|
||||
ti,syscon-pcie-mode = <&scm_conf 0x4060>;
|
||||
bus-range = <0x0 0xff>;
|
||||
max-link-speed = <2>;
|
||||
dma-coherent;
|
||||
|
||||
30
Documentation/livepatch/api.rst
Normal file
30
Documentation/livepatch/api.rst
Normal file
@@ -0,0 +1,30 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=================
|
||||
Livepatching APIs
|
||||
=================
|
||||
|
||||
Livepatch Enablement
|
||||
====================
|
||||
|
||||
.. kernel-doc:: kernel/livepatch/core.c
|
||||
:export:
|
||||
|
||||
|
||||
Shadow Variables
|
||||
================
|
||||
|
||||
.. kernel-doc:: kernel/livepatch/shadow.c
|
||||
:export:
|
||||
|
||||
System State Changes
|
||||
====================
|
||||
|
||||
.. kernel-doc:: kernel/livepatch/state.c
|
||||
:export:
|
||||
|
||||
Object Types
|
||||
============
|
||||
|
||||
.. kernel-doc:: include/linux/livepatch.h
|
||||
:identifiers: klp_patch klp_object klp_func klp_callbacks klp_state
|
||||
@@ -14,6 +14,7 @@ Kernel Livepatching
|
||||
shadow-vars
|
||||
system-state
|
||||
reliable-stacktrace
|
||||
api
|
||||
|
||||
.. only:: subproject and html
|
||||
|
||||
|
||||
@@ -82,8 +82,8 @@ to do actions that can be done only once when a new variable is allocated.
|
||||
- call destructor function if defined
|
||||
- free shadow variable
|
||||
|
||||
* klp_shadow_free_all() - detach and free all <*, id> shadow variables
|
||||
- find and remove any <*, id> references from global hashtable
|
||||
* klp_shadow_free_all() - detach and free all <_, id> shadow variables
|
||||
- find and remove any <_, id> references from global hashtable
|
||||
|
||||
- if found
|
||||
|
||||
|
||||
@@ -52,12 +52,12 @@ struct klp_state:
|
||||
|
||||
The state can be manipulated using two functions:
|
||||
|
||||
- *klp_get_state(patch, id)*
|
||||
- klp_get_state()
|
||||
|
||||
- Get struct klp_state associated with the given livepatch
|
||||
and state id.
|
||||
|
||||
- *klp_get_prev_state(id)*
|
||||
- klp_get_prev_state()
|
||||
|
||||
- Get struct klp_state associated with the given feature id and
|
||||
already installed livepatches.
|
||||
|
||||
41
Documentation/tools/rtla/Makefile
Normal file
41
Documentation/tools/rtla/Makefile
Normal file
@@ -0,0 +1,41 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
# Based on bpftool's Documentation Makefile
|
||||
|
||||
INSTALL ?= install
|
||||
RM ?= rm -f
|
||||
RMDIR ?= rmdir --ignore-fail-on-non-empty
|
||||
|
||||
PREFIX ?= /usr/share
|
||||
MANDIR ?= $(PREFIX)/man
|
||||
MAN1DIR = $(MANDIR)/man1
|
||||
|
||||
MAN1_RST = $(wildcard rtla*.rst)
|
||||
|
||||
_DOC_MAN1 = $(patsubst %.rst,%.1,$(MAN1_RST))
|
||||
DOC_MAN1 = $(addprefix $(OUTPUT),$(_DOC_MAN1))
|
||||
|
||||
RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
|
||||
RST2MAN_OPTS += --verbose
|
||||
|
||||
$(OUTPUT)%.1: %.rst
|
||||
ifndef RST2MAN_DEP
|
||||
$(error "rst2man not found, but required to generate man pages")
|
||||
endif
|
||||
rst2man $(RST2MAN_OPTS) $< > $@
|
||||
|
||||
man1: $(DOC_MAN1)
|
||||
man: man1
|
||||
|
||||
clean:
|
||||
$(RM) $(DOC_MAN1)
|
||||
|
||||
install: man
|
||||
$(INSTALL) -d -m 755 $(DESTDIR)$(MAN1DIR)
|
||||
$(INSTALL) -m 644 $(DOC_MAN1) $(DESTDIR)$(MAN1DIR)
|
||||
|
||||
uninstall:
|
||||
$(RM) $(addprefix $(DESTDIR)$(MAN1DIR)/,$(_DOC_MAN1))
|
||||
$(RMDIR) $(DESTDIR)$(MAN1DIR)
|
||||
|
||||
.PHONY: man man1 clean install uninstall
|
||||
.DEFAULT_GOAL := man
|
||||
12
Documentation/tools/rtla/common_appendix.rst
Normal file
12
Documentation/tools/rtla/common_appendix.rst
Normal file
@@ -0,0 +1,12 @@
|
||||
REPORTING BUGS
|
||||
==============
|
||||
Report bugs to <lkml@vger.kernel.org>
|
||||
|
||||
LICENSE
|
||||
=======
|
||||
**rtla** is Free Software licensed under the GNU GPLv2
|
||||
|
||||
COPYING
|
||||
=======
|
||||
Copyright \(C) 2021 Red Hat, Inc. Free use of this software is granted under
|
||||
the terms of the GNU Public License (GPL).
|
||||
23
Documentation/tools/rtla/common_hist_options.rst
Normal file
23
Documentation/tools/rtla/common_hist_options.rst
Normal file
@@ -0,0 +1,23 @@
|
||||
**-b**, **--bucket-size** *N*
|
||||
|
||||
Set the histogram bucket size (default *1*).
|
||||
|
||||
**-e**, **--entries** *N*
|
||||
|
||||
Set the number of entries of the histogram (default 256).
|
||||
|
||||
**--no-header**
|
||||
|
||||
Do not print header.
|
||||
|
||||
**--no-summary**
|
||||
|
||||
Do not print summary.
|
||||
|
||||
**--no-index**
|
||||
|
||||
Do not print index.
|
||||
|
||||
**--with-zeros**
|
||||
|
||||
Print zero only entries.
|
||||
28
Documentation/tools/rtla/common_options.rst
Normal file
28
Documentation/tools/rtla/common_options.rst
Normal file
@@ -0,0 +1,28 @@
|
||||
**-c**, **--cpus** *cpu-list*
|
||||
|
||||
Set the osnoise tracer to run the sample threads in the cpu-list.
|
||||
|
||||
**-d**, **--duration** *time[s|m|h|d]*
|
||||
|
||||
Set the duration of the session.
|
||||
|
||||
**-D**, **--debug**
|
||||
|
||||
Print debug info.
|
||||
|
||||
**-t**, **--trace**\[*=file*]
|
||||
|
||||
Save the stopped trace to [*file|osnoise_trace.txt*].
|
||||
|
||||
**-P**, **--priority** *o:prio|r:prio|f:prio|d:runtime:period*
|
||||
|
||||
Set scheduling parameters to the osnoise tracer threads, the format to set the priority are:
|
||||
|
||||
- *o:prio* - use SCHED_OTHER with *prio*;
|
||||
- *r:prio* - use SCHED_RR with *prio*;
|
||||
- *f:prio* - use SCHED_FIFO with *prio*;
|
||||
- *d:runtime[us|ms|s]:period[us|ms|s]* - use SCHED_DEADLINE with *runtime* and *period* in nanoseconds.
|
||||
|
||||
**-h**, **--help**
|
||||
|
||||
Print help menu.
|
||||
8
Documentation/tools/rtla/common_osnoise_description.rst
Normal file
8
Documentation/tools/rtla/common_osnoise_description.rst
Normal file
@@ -0,0 +1,8 @@
|
||||
The **rtla osnoise** tool is an interface for the *osnoise* tracer. The
|
||||
*osnoise* tracer dispatches a kernel thread per-cpu. These threads read the
|
||||
time in a loop while with preemption, softirq and IRQs enabled, thus
|
||||
allowing all the sources of operating systme noise during its execution.
|
||||
The *osnoise*'s tracer threads take note of the delta between each time
|
||||
read, along with an interference counter of all sources of interference.
|
||||
At the end of each period, the *osnoise* tracer displays a summary of
|
||||
the results.
|
||||
17
Documentation/tools/rtla/common_osnoise_options.rst
Normal file
17
Documentation/tools/rtla/common_osnoise_options.rst
Normal file
@@ -0,0 +1,17 @@
|
||||
**-p**, **--period** *us*
|
||||
|
||||
Set the *osnoise* tracer period in microseconds.
|
||||
|
||||
**-r**, **--runtime** *us*
|
||||
|
||||
Set the *osnoise* tracer runtime in microseconds.
|
||||
|
||||
**-s**, **--stop** *us*
|
||||
|
||||
Stop the trace if a single sample is higher than the argument in microseconds.
|
||||
If **-T** is set, it will also save the trace to the output.
|
||||
|
||||
**-S**, **--stop-total** *us*
|
||||
|
||||
Stop the trace if the total sample is higher than the argument in microseconds.
|
||||
If **-T** is set, it will also save the trace to the output.
|
||||
10
Documentation/tools/rtla/common_timerlat_description.rst
Normal file
10
Documentation/tools/rtla/common_timerlat_description.rst
Normal file
@@ -0,0 +1,10 @@
|
||||
The **rtla timerlat** tool is an interface for the *timerlat* tracer. The
|
||||
*timerlat* tracer dispatches a kernel thread per-cpu. These threads
|
||||
set a periodic timer to wake themselves up and go back to sleep. After
|
||||
the wakeup, they collect and generate useful information for the
|
||||
debugging of operating system timer latency.
|
||||
|
||||
The *timerlat* tracer outputs information in two ways. It periodically
|
||||
prints the timer latency at the timer *IRQ* handler and the *Thread*
|
||||
handler. It also enable the trace of the most relevant information via
|
||||
**osnoise:** tracepoints.
|
||||
16
Documentation/tools/rtla/common_timerlat_options.rst
Normal file
16
Documentation/tools/rtla/common_timerlat_options.rst
Normal file
@@ -0,0 +1,16 @@
|
||||
**-p**, **--period** *us*
|
||||
|
||||
Set the *timerlat* tracer period in microseconds.
|
||||
|
||||
**-i**, **--irq** *us*
|
||||
|
||||
Stop trace if the *IRQ* latency is higher than the argument in us.
|
||||
|
||||
**-T**, **--thread** *us*
|
||||
|
||||
Stop trace if the *Thread* latency is higher than the argument in us.
|
||||
|
||||
**-s**, **--stack** *us*
|
||||
|
||||
Save the stack trace at the *IRQ* if a *Thread* latency is higher than the
|
||||
argument in us.
|
||||
3
Documentation/tools/rtla/common_top_options.rst
Normal file
3
Documentation/tools/rtla/common_top_options.rst
Normal file
@@ -0,0 +1,3 @@
|
||||
**-q**, **--quiet**
|
||||
|
||||
Print only a summary at the end of the session.
|
||||
66
Documentation/tools/rtla/rtla-osnoise-hist.rst
Normal file
66
Documentation/tools/rtla/rtla-osnoise-hist.rst
Normal file
@@ -0,0 +1,66 @@
|
||||
===================
|
||||
rtla-osnoise-hist
|
||||
===================
|
||||
------------------------------------------------------
|
||||
Display a histogram of the osnoise tracer samples
|
||||
------------------------------------------------------
|
||||
|
||||
:Manual section: 1
|
||||
|
||||
SYNOPSIS
|
||||
========
|
||||
**rtla osnoise hist** [*OPTIONS*]
|
||||
|
||||
DESCRIPTION
|
||||
===========
|
||||
.. include:: common_osnoise_description.rst
|
||||
|
||||
The **rtla osnoise hist** tool collects all **osnoise:sample_threshold**
|
||||
occurrence in a histogram, displaying the results in a user-friendly way.
|
||||
The tool also allows many configurations of the *osnoise* tracer and the
|
||||
collection of the tracer output.
|
||||
|
||||
OPTIONS
|
||||
=======
|
||||
.. include:: common_osnoise_options.rst
|
||||
|
||||
.. include:: common_hist_options.rst
|
||||
|
||||
.. include:: common_options.rst
|
||||
|
||||
EXAMPLE
|
||||
=======
|
||||
In the example below, *osnoise* tracer threads are set to run with real-time
|
||||
priority *FIFO:1*, on CPUs *0-11*, for *900ms* at each period (*1s* by
|
||||
default). The reason for reducing the runtime is to avoid starving the
|
||||
**rtla** tool. The tool is also set to run for *one minute*. The output
|
||||
histogram is set to group outputs in buckets of *10us* and *25* entries::
|
||||
|
||||
[root@f34 ~/]# rtla osnoise hist -P F:1 -c 0-11 -r 900000 -d 1M -b 10 -e 25
|
||||
# RTLA osnoise histogram
|
||||
# Time unit is microseconds (us)
|
||||
# Duration: 0 00:01:00
|
||||
Index CPU-000 CPU-001 CPU-002 CPU-003 CPU-004 CPU-005 CPU-006 CPU-007 CPU-008 CPU-009 CPU-010 CPU-011
|
||||
0 42982 46287 51779 53740 52024 44817 49898 36500 50408 50128 49523 52377
|
||||
10 12224 8356 2912 878 2667 10155 4573 18894 4214 4836 5708 2413
|
||||
20 8 5 12 2 13 24 20 41 29 53 39 39
|
||||
30 1 1 0 0 10 3 6 19 15 31 30 38
|
||||
40 0 0 0 0 0 4 2 7 2 3 8 11
|
||||
50 0 0 0 0 0 0 0 0 0 1 1 2
|
||||
over: 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
count: 55215 54649 54703 54620 54714 55003 54499 55461 54668 55052 55309 54880
|
||||
min: 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
avg: 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
max: 30 30 20 20 30 40 40 40 40 50 50 50
|
||||
|
||||
SEE ALSO
|
||||
========
|
||||
**rtla-osnoise**\(1), **rtla-osnoise-top**\(1)
|
||||
|
||||
*osnoise* tracer documentation: <https://www.kernel.org/doc/html/latest/trace/osnoise-tracer.html>
|
||||
|
||||
AUTHOR
|
||||
======
|
||||
Written by Daniel Bristot de Oliveira <bristot@kernel.org>
|
||||
|
||||
.. include:: common_appendix.rst
|
||||
61
Documentation/tools/rtla/rtla-osnoise-top.rst
Normal file
61
Documentation/tools/rtla/rtla-osnoise-top.rst
Normal file
@@ -0,0 +1,61 @@
|
||||
===================
|
||||
rtla-osnoise-top
|
||||
===================
|
||||
-----------------------------------------------
|
||||
Display a summary of the operating system noise
|
||||
-----------------------------------------------
|
||||
|
||||
:Manual section: 1
|
||||
|
||||
SYNOPSIS
|
||||
========
|
||||
**rtla osnoise top** [*OPTIONS*]
|
||||
|
||||
DESCRIPTION
|
||||
===========
|
||||
.. include:: common_osnoise_description.rst
|
||||
|
||||
**rtla osnoise top** collects the periodic summary from the *osnoise* tracer,
|
||||
including the counters of the occurrence of the interference source,
|
||||
displaying the results in a user-friendly format.
|
||||
|
||||
The tool also allows many configurations of the *osnoise* tracer and the
|
||||
collection of the tracer output.
|
||||
|
||||
OPTIONS
|
||||
=======
|
||||
.. include:: common_osnoise_options.rst
|
||||
|
||||
.. include:: common_top_options.rst
|
||||
|
||||
.. include:: common_options.rst
|
||||
|
||||
EXAMPLE
|
||||
=======
|
||||
In the example below, the **rtla osnoise top** tool is set to run with a
|
||||
real-time priority *FIFO:1*, on CPUs *0-3*, for *900ms* at each period
|
||||
(*1s* by default). The reason for reducing the runtime is to avoid starving
|
||||
the rtla tool. The tool is also set to run for *one minute* and to display
|
||||
a summary of the report at the end of the session::
|
||||
|
||||
[root@f34 ~]# rtla osnoise top -P F:1 -c 0-3 -r 900000 -d 1M -q
|
||||
Operating System Noise
|
||||
duration: 0 00:01:00 | time is in us
|
||||
CPU Period Runtime Noise % CPU Aval Max Noise Max Single HW NMI IRQ Softirq Thread
|
||||
0 #59 53100000 304896 99.42580 6978 56 549 0 53111 1590 13
|
||||
1 #59 53100000 338339 99.36282 8092 24 399 0 53130 1448 31
|
||||
2 #59 53100000 290842 99.45227 6582 39 855 0 53110 1406 12
|
||||
3 #59 53100000 204935 99.61405 6251 33 290 0 53156 1460 12
|
||||
|
||||
SEE ALSO
|
||||
========
|
||||
|
||||
**rtla-osnoise**\(1), **rtla-osnoise-hist**\(1)
|
||||
|
||||
Osnoise tracer documentation: <https://www.kernel.org/doc/html/latest/trace/osnoise-tracer.html>
|
||||
|
||||
AUTHOR
|
||||
======
|
||||
Written by Daniel Bristot de Oliveira <bristot@kernel.org>
|
||||
|
||||
.. include:: common_appendix.rst
|
||||
59
Documentation/tools/rtla/rtla-osnoise.rst
Normal file
59
Documentation/tools/rtla/rtla-osnoise.rst
Normal file
@@ -0,0 +1,59 @@
|
||||
===============
|
||||
rtla-osnoise
|
||||
===============
|
||||
------------------------------------------------------------------
|
||||
Measure the operating system noise
|
||||
------------------------------------------------------------------
|
||||
|
||||
:Manual section: 1
|
||||
|
||||
SYNOPSIS
|
||||
========
|
||||
**rtla osnoise** [*MODE*] ...
|
||||
|
||||
DESCRIPTION
|
||||
===========
|
||||
|
||||
.. include:: common_osnoise_description.rst
|
||||
|
||||
The *osnoise* tracer outputs information in two ways. It periodically prints
|
||||
a summary of the noise of the operating system, including the counters of
|
||||
the occurrence of the source of interference. It also provides information
|
||||
for each noise via the **osnoise:** tracepoints. The **rtla osnoise top**
|
||||
mode displays information about the periodic summary from the *osnoise* tracer.
|
||||
The **rtla osnoise hist** mode displays information about the noise using
|
||||
the **osnoise:** tracepoints. For further details, please refer to the
|
||||
respective man page.
|
||||
|
||||
MODES
|
||||
=====
|
||||
**top**
|
||||
|
||||
Prints the summary from osnoise tracer.
|
||||
|
||||
**hist**
|
||||
|
||||
Prints a histogram of osnoise samples.
|
||||
|
||||
If no MODE is given, the top mode is called, passing the arguments.
|
||||
|
||||
OPTIONS
|
||||
=======
|
||||
|
||||
**-h**, **--help**
|
||||
|
||||
Display the help text.
|
||||
|
||||
For other options, see the man page for the corresponding mode.
|
||||
|
||||
SEE ALSO
|
||||
========
|
||||
**rtla-osnoise-top**\(1), **rtla-osnoise-hist**\(1)
|
||||
|
||||
Osnoise tracer documentation: <https://www.kernel.org/doc/html/latest/trace/osnoise-tracer.html>
|
||||
|
||||
AUTHOR
|
||||
======
|
||||
Written by Daniel Bristot de Oliveira <bristot@kernel.org>
|
||||
|
||||
.. include:: common_appendix.rst
|
||||
106
Documentation/tools/rtla/rtla-timerlat-hist.rst
Normal file
106
Documentation/tools/rtla/rtla-timerlat-hist.rst
Normal file
@@ -0,0 +1,106 @@
|
||||
=====================
|
||||
rtla-timerlat-hist
|
||||
=====================
|
||||
------------------------------------------------
|
||||
Histograms of the operating system timer latency
|
||||
------------------------------------------------
|
||||
|
||||
:Manual section: 1
|
||||
|
||||
SYNOPSIS
|
||||
========
|
||||
**rtla timerlat hist** [*OPTIONS*] ...
|
||||
|
||||
DESCRIPTION
|
||||
===========
|
||||
|
||||
.. include:: common_timerlat_description.rst
|
||||
|
||||
The **rtla timerlat hist** displays a histogram of each tracer event
|
||||
occurrence. This tool uses the periodic information, and the
|
||||
**osnoise:** tracepoints are enabled when using the **-T** option.
|
||||
|
||||
OPTIONS
|
||||
=======
|
||||
|
||||
.. include:: common_timerlat_options.rst
|
||||
|
||||
.. include:: common_hist_options.rst
|
||||
|
||||
.. include:: common_options.rst
|
||||
|
||||
EXAMPLE
|
||||
=======
|
||||
In the example below, **rtla timerlat hist** is set to run for *10* minutes,
|
||||
in the cpus *0-4*, *skipping zero* only lines. Moreover, **rtla timerlat
|
||||
hist** will change the priority of the *timelat* threads to run under
|
||||
*SCHED_DEADLINE* priority, with a *10us* runtime every *1ms* period. The
|
||||
*1ms* period is also passed to the *timerlat* tracer::
|
||||
|
||||
[root@alien ~]# timerlat hist -d 10m -c 0-4 -P d:100us:1ms -p 1ms
|
||||
# RTLA timerlat histogram
|
||||
# Time unit is microseconds (us)
|
||||
# Duration: 0 00:10:00
|
||||
Index IRQ-000 Thr-000 IRQ-001 Thr-001 IRQ-002 Thr-002 IRQ-003 Thr-003 IRQ-004 Thr-004
|
||||
0 276489 0 206089 0 466018 0 481102 0 205546 0
|
||||
1 318327 35487 388149 30024 94531 48382 83082 71078 388026 55730
|
||||
2 3282 122584 4019 126527 28231 109012 23311 89309 4568 98739
|
||||
3 940 11815 837 9863 6209 16227 6895 17196 910 9780
|
||||
4 444 17287 424 11574 2097 38443 2169 36736 462 13476
|
||||
5 206 43291 255 25581 1223 101908 1304 101137 236 28913
|
||||
6 132 101501 96 64584 635 213774 757 215471 99 73453
|
||||
7 74 169347 65 124758 350 57466 441 53639 69 148573
|
||||
8 53 85183 31 156751 229 9052 306 9026 39 139907
|
||||
9 22 10387 12 42762 161 2554 225 2689 19 26192
|
||||
10 13 1898 8 5770 114 1247 128 1405 13 3772
|
||||
11 9 560 9 924 71 686 76 765 8 713
|
||||
12 4 256 2 360 50 411 64 474 3 278
|
||||
13 2 167 2 172 43 256 53 350 4 180
|
||||
14 1 88 1 116 15 198 42 223 0 115
|
||||
15 2 63 3 94 11 139 20 150 0 58
|
||||
16 2 37 0 56 5 78 10 102 0 39
|
||||
17 0 18 0 28 4 57 8 80 0 15
|
||||
18 0 8 0 17 2 50 6 56 0 12
|
||||
19 0 9 0 5 0 19 0 48 0 18
|
||||
20 0 4 0 8 0 11 2 27 0 4
|
||||
21 0 2 0 3 1 9 1 18 0 6
|
||||
22 0 1 0 3 1 7 0 3 0 5
|
||||
23 0 2 0 4 0 2 0 7 0 2
|
||||
24 0 2 0 2 1 3 0 3 0 5
|
||||
25 0 0 0 1 0 1 0 1 0 3
|
||||
26 0 1 0 0 0 2 0 2 0 0
|
||||
27 0 0 0 3 0 1 0 0 0 1
|
||||
28 0 0 0 3 0 0 0 1 0 0
|
||||
29 0 0 0 2 0 2 0 1 0 3
|
||||
30 0 1 0 0 0 0 0 0 0 0
|
||||
31 0 1 0 0 0 0 0 2 0 2
|
||||
32 0 0 0 1 0 2 0 0 0 0
|
||||
33 0 0 0 2 0 0 0 0 0 1
|
||||
34 0 0 0 0 0 0 0 0 0 2
|
||||
35 0 1 0 1 0 0 0 0 0 1
|
||||
36 0 1 0 0 0 1 0 1 0 0
|
||||
37 0 0 0 1 0 0 0 0 0 0
|
||||
40 0 0 0 0 0 1 0 1 0 0
|
||||
41 0 0 0 0 0 0 0 0 0 1
|
||||
42 0 0 0 0 0 0 0 0 0 1
|
||||
44 0 0 0 0 0 1 0 0 0 0
|
||||
46 0 0 0 0 0 0 0 1 0 0
|
||||
47 0 0 0 0 0 0 0 0 0 1
|
||||
50 0 0 0 0 0 0 0 0 0 1
|
||||
54 0 0 0 1 0 0 0 0 0 0
|
||||
58 0 0 0 1 0 0 0 0 0 0
|
||||
over: 0 0 0 0 0 0 0 0 0 0
|
||||
count: 600002 600002 600002 600002 600002 600002 600002 600002 600002 600002
|
||||
min: 0 1 0 1 0 1 0 1 0 1
|
||||
avg: 0 5 0 5 0 4 0 4 0 5
|
||||
max: 16 36 15 58 24 44 21 46 13 50
|
||||
|
||||
SEE ALSO
|
||||
========
|
||||
**rtla-timerlat**\(1), **rtla-timerlat-top**\(1)
|
||||
|
||||
*timerlat* tracer documentation: <https://www.kernel.org/doc/html/latest/trace/timerlat-tracer.html>
|
||||
|
||||
AUTHOR
|
||||
======
|
||||
Written by Daniel Bristot de Oliveira <bristot@kernel.org>
|
||||
145
Documentation/tools/rtla/rtla-timerlat-top.rst
Normal file
145
Documentation/tools/rtla/rtla-timerlat-top.rst
Normal file
@@ -0,0 +1,145 @@
|
||||
====================
|
||||
rtla-timerlat-top
|
||||
====================
|
||||
-------------------------------------------
|
||||
Measures the operating system timer latency
|
||||
-------------------------------------------
|
||||
|
||||
:Manual section: 1
|
||||
|
||||
SYNOPSIS
|
||||
========
|
||||
**rtla timerlat top** [*OPTIONS*] ...
|
||||
|
||||
DESCRIPTION
|
||||
===========
|
||||
|
||||
.. include:: common_timerlat_description.rst
|
||||
|
||||
The **rtla timerlat top** displays a summary of the periodic output
|
||||
from the *timerlat* tracer. It also provides information for each
|
||||
operating system noise via the **osnoise:** tracepoints that can be
|
||||
seem with the option **-T**.
|
||||
|
||||
OPTIONS
|
||||
=======
|
||||
|
||||
.. include:: common_timerlat_options.rst
|
||||
|
||||
.. include:: common_top_options.rst
|
||||
|
||||
.. include:: common_options.rst
|
||||
|
||||
EXAMPLE
|
||||
=======
|
||||
|
||||
In the example below, the *timerlat* tracer is set to capture the stack trace at
|
||||
the IRQ handler, printing it to the buffer if the *Thread* timer latency is
|
||||
higher than *30 us*. It is also set to stop the session if a *Thread* timer
|
||||
latency higher than *30 us* is hit. Finally, it is set to save the trace
|
||||
buffer if the stop condition is hit::
|
||||
|
||||
[root@alien ~]# rtla timerlat top -s 30 -t 30 -T
|
||||
Timer Latency
|
||||
0 00:00:59 | IRQ Timer Latency (us) | Thread Timer Latency (us)
|
||||
CPU COUNT | cur min avg max | cur min avg max
|
||||
0 #58634 | 1 0 1 10 | 11 2 10 23
|
||||
1 #58634 | 1 0 1 9 | 12 2 9 23
|
||||
2 #58634 | 0 0 1 11 | 10 2 9 23
|
||||
3 #58634 | 1 0 1 11 | 11 2 9 24
|
||||
4 #58634 | 1 0 1 10 | 11 2 9 26
|
||||
5 #58634 | 1 0 1 8 | 10 2 9 25
|
||||
6 #58634 | 12 0 1 12 | 30 2 10 30 <--- CPU with spike
|
||||
7 #58634 | 1 0 1 9 | 11 2 9 23
|
||||
8 #58633 | 1 0 1 9 | 11 2 9 26
|
||||
9 #58633 | 1 0 1 9 | 10 2 9 26
|
||||
10 #58633 | 1 0 1 13 | 11 2 9 28
|
||||
11 #58633 | 1 0 1 13 | 12 2 9 24
|
||||
12 #58633 | 1 0 1 8 | 10 2 9 23
|
||||
13 #58633 | 1 0 1 10 | 10 2 9 22
|
||||
14 #58633 | 1 0 1 18 | 12 2 9 27
|
||||
15 #58633 | 1 0 1 10 | 11 2 9 28
|
||||
16 #58633 | 0 0 1 11 | 7 2 9 26
|
||||
17 #58633 | 1 0 1 13 | 10 2 9 24
|
||||
18 #58633 | 1 0 1 9 | 13 2 9 22
|
||||
19 #58633 | 1 0 1 10 | 11 2 9 23
|
||||
20 #58633 | 1 0 1 12 | 11 2 9 28
|
||||
21 #58633 | 1 0 1 14 | 11 2 9 24
|
||||
22 #58633 | 1 0 1 8 | 11 2 9 22
|
||||
23 #58633 | 1 0 1 10 | 11 2 9 27
|
||||
timerlat hit stop tracing
|
||||
saving trace to timerlat_trace.txt
|
||||
[root@alien bristot]# tail -60 timerlat_trace.txt
|
||||
[...]
|
||||
timerlat/5-79755 [005] ....... 426.271226: #58634 context thread timer_latency 10823 ns
|
||||
sh-109404 [006] dnLh213 426.271247: #58634 context irq timer_latency 12505 ns
|
||||
sh-109404 [006] dNLh313 426.271258: irq_noise: local_timer:236 start 426.271245463 duration 12553 ns
|
||||
sh-109404 [006] d...313 426.271263: thread_noise: sh:109404 start 426.271245853 duration 4769 ns
|
||||
timerlat/6-79756 [006] ....... 426.271264: #58634 context thread timer_latency 30328 ns
|
||||
timerlat/6-79756 [006] ....1.. 426.271265: <stack trace>
|
||||
=> timerlat_irq
|
||||
=> __hrtimer_run_queues
|
||||
=> hrtimer_interrupt
|
||||
=> __sysvec_apic_timer_interrupt
|
||||
=> sysvec_apic_timer_interrupt
|
||||
=> asm_sysvec_apic_timer_interrupt
|
||||
=> _raw_spin_unlock_irqrestore <---- spinlock that disabled interrupt.
|
||||
=> try_to_wake_up
|
||||
=> autoremove_wake_function
|
||||
=> __wake_up_common
|
||||
=> __wake_up_common_lock
|
||||
=> ep_poll_callback
|
||||
=> __wake_up_common
|
||||
=> __wake_up_common_lock
|
||||
=> fsnotify_add_event
|
||||
=> inotify_handle_inode_event
|
||||
=> fsnotify
|
||||
=> __fsnotify_parent
|
||||
=> __fput
|
||||
=> task_work_run
|
||||
=> exit_to_user_mode_prepare
|
||||
=> syscall_exit_to_user_mode
|
||||
=> do_syscall_64
|
||||
=> entry_SYSCALL_64_after_hwframe
|
||||
=> 0x7265000001378c
|
||||
=> 0x10000cea7
|
||||
=> 0x25a00000204a
|
||||
=> 0x12e302d00000000
|
||||
=> 0x19b51010901b6
|
||||
=> 0x283ce00726500
|
||||
=> 0x61ea308872
|
||||
=> 0x00000fe3
|
||||
bash-109109 [007] d..h... 426.271265: #58634 context irq timer_latency 1211 ns
|
||||
timerlat/6-79756 [006] ....... 426.271267: timerlat_main: stop tracing hit on cpu 6
|
||||
|
||||
In the trace, it is possible the notice that the *IRQ* timer latency was
|
||||
already high, accounting *12505 ns*. The IRQ delay was caused by the
|
||||
*bash-109109* process that disabled IRQs in the wake-up path
|
||||
(*_try_to_wake_up()* function). The duration of the IRQ handler that woke
|
||||
up the timerlat thread, informed with the **osnoise:irq_noise** event, was
|
||||
also high and added more *12553 ns* to the Thread latency. Finally, the
|
||||
**osnoise:thread_noise** added by the currently running thread (including
|
||||
the scheduling overhead) added more *4769 ns*. Summing up these values,
|
||||
the *Thread* timer latency accounted for *30328 ns*.
|
||||
|
||||
The primary reason for this high value is the wake-up path that was hit
|
||||
twice during this case: when the *bash-109109* was waking up a thread
|
||||
and then when the *timerlat* thread was awakened. This information can
|
||||
then be used as the starting point of a more fine-grained analysis.
|
||||
|
||||
Note that **rtla timerlat** was dispatched without changing *timerlat* tracer
|
||||
threads' priority. That is generally not needed because these threads hava
|
||||
priority *FIFO:95* by default, which is a common priority used by real-time
|
||||
kernel developers to analyze scheduling delays.
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
**rtla-timerlat**\(1), **rtla-timerlat-hist**\(1)
|
||||
|
||||
*timerlat* tracer documentation: <https://www.kernel.org/doc/html/latest/trace/timerlat-tracer.html>
|
||||
|
||||
AUTHOR
|
||||
------
|
||||
Written by Daniel Bristot de Oliveira <bristot@kernel.org>
|
||||
|
||||
.. include:: common_appendix.rst
|
||||
57
Documentation/tools/rtla/rtla-timerlat.rst
Normal file
57
Documentation/tools/rtla/rtla-timerlat.rst
Normal file
@@ -0,0 +1,57 @@
|
||||
================
|
||||
rtla-timerlat
|
||||
================
|
||||
-------------------------------------------
|
||||
Measures the operating system timer latency
|
||||
-------------------------------------------
|
||||
|
||||
:Manual section: 1
|
||||
|
||||
SYNOPSIS
|
||||
========
|
||||
**rtla timerlat** [*MODE*] ...
|
||||
|
||||
DESCRIPTION
|
||||
===========
|
||||
|
||||
.. include:: common_timerlat_description.rst
|
||||
|
||||
The *timerlat* tracer outputs information in two ways. It periodically
|
||||
prints the timer latency at the timer *IRQ* handler and the *Thread* handler.
|
||||
It also provides information for each noise via the **osnoise:** tracepoints.
|
||||
The **rtla timerlat top** mode displays a summary of the periodic output
|
||||
from the *timerlat* tracer. The **rtla hist hist** mode displays a histogram
|
||||
of each tracer event occurrence. For further details, please refer to the
|
||||
respective man page.
|
||||
|
||||
MODES
|
||||
=====
|
||||
**top**
|
||||
|
||||
Prints the summary from *timerlat* tracer.
|
||||
|
||||
**hist**
|
||||
|
||||
Prints a histogram of timerlat samples.
|
||||
|
||||
If no *MODE* is given, the top mode is called, passing the arguments.
|
||||
|
||||
OPTIONS
|
||||
=======
|
||||
**-h**, **--help**
|
||||
|
||||
Display the help text.
|
||||
|
||||
For other options, see the man page for the corresponding mode.
|
||||
|
||||
SEE ALSO
|
||||
========
|
||||
**rtla-timerlat-top**\(1), **rtla-timerlat-hist**\(1)
|
||||
|
||||
*timerlat* tracer documentation: <https://www.kernel.org/doc/html/latest/trace/timerlat-tracer.html>
|
||||
|
||||
AUTHOR
|
||||
======
|
||||
Written by Daniel Bristot de Oliveira <bristot@kernel.org>
|
||||
|
||||
.. include:: common_appendix.rst
|
||||
48
Documentation/tools/rtla/rtla.rst
Normal file
48
Documentation/tools/rtla/rtla.rst
Normal file
@@ -0,0 +1,48 @@
|
||||
=========
|
||||
rtla
|
||||
=========
|
||||
--------------------------------
|
||||
Real-time Linux Analysis tool
|
||||
--------------------------------
|
||||
|
||||
:Manual section: 1
|
||||
|
||||
SYNOPSIS
|
||||
========
|
||||
**rtla** *COMMAND* [*OPTIONS*]
|
||||
|
||||
DESCRIPTION
|
||||
===========
|
||||
The **rtla** is a meta-tool that includes a set of commands that aims to
|
||||
analyze the real-time properties of Linux. But instead of testing Linux
|
||||
as a black box, **rtla** leverages kernel tracing capabilities to provide
|
||||
precise information about the properties and root causes of unexpected
|
||||
results.
|
||||
|
||||
COMMANDS
|
||||
========
|
||||
**osnoise**
|
||||
|
||||
Gives information about the operating system noise (osnoise).
|
||||
|
||||
**timerlat**
|
||||
|
||||
Measures the IRQ and thread timer latency.
|
||||
|
||||
OPTIONS
|
||||
=======
|
||||
**-h**, **--help**
|
||||
|
||||
Display the help text.
|
||||
|
||||
For other options, see the man page for the corresponding command.
|
||||
|
||||
SEE ALSO
|
||||
========
|
||||
**rtla-osnoise**\(1), **rtla-timerlat**\(1)
|
||||
|
||||
AUTHOR
|
||||
======
|
||||
Daniel Bristot de Oliveira <bristot@kernel.org>
|
||||
|
||||
.. include:: common_appendix.rst
|
||||
@@ -198,6 +198,15 @@ The glob (~) accepts a wild card character (\*,?) and character classes
|
||||
prev_comm ~ "*sh*"
|
||||
prev_comm ~ "ba*sh"
|
||||
|
||||
If the field is a pointer that points into user space (for example
|
||||
"filename" from sys_enter_openat), then you have to append ".ustring" to the
|
||||
field name::
|
||||
|
||||
filename.ustring ~ "password"
|
||||
|
||||
As the kernel will have to know how to retrieve the memory that the pointer
|
||||
is at from user space.
|
||||
|
||||
5.2 Setting filters
|
||||
-------------------
|
||||
|
||||
@@ -230,6 +239,16 @@ Currently the caret ('^') for an error always appears at the beginning of
|
||||
the filter string; the error message should still be useful though
|
||||
even without more accurate position info.
|
||||
|
||||
5.2.1 Filter limitations
|
||||
------------------------
|
||||
|
||||
If a filter is placed on a string pointer ``(char *)`` that does not point
|
||||
to a string on the ring buffer, but instead points to kernel or user space
|
||||
memory, then, for safety reasons, at most 1024 bytes of the content is
|
||||
copied onto a temporary buffer to do the compare. If the copy of the memory
|
||||
faults (the pointer points to memory that should not be accessed), then the
|
||||
string compare will be treated as not matching.
|
||||
|
||||
5.3 Clearing filters
|
||||
--------------------
|
||||
|
||||
|
||||
@@ -371,6 +371,9 @@ The bits in the dirty bitmap are cleared before the ioctl returns, unless
|
||||
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is enabled. For more information,
|
||||
see the description of the capability.
|
||||
|
||||
Note that the Xen shared info page, if configured, shall always be assumed
|
||||
to be dirty. KVM will not explicitly mark it such.
|
||||
|
||||
4.9 KVM_SET_MEMORY_ALIAS
|
||||
------------------------
|
||||
|
||||
@@ -1566,6 +1569,7 @@ otherwise it will return EBUSY error.
|
||||
|
||||
struct kvm_xsave {
|
||||
__u32 region[1024];
|
||||
__u32 extra[0];
|
||||
};
|
||||
|
||||
This ioctl would copy current vcpu's xsave struct to the userspace.
|
||||
@@ -1574,7 +1578,7 @@ This ioctl would copy current vcpu's xsave struct to the userspace.
|
||||
4.43 KVM_SET_XSAVE
|
||||
------------------
|
||||
|
||||
:Capability: KVM_CAP_XSAVE
|
||||
:Capability: KVM_CAP_XSAVE and KVM_CAP_XSAVE2
|
||||
:Architectures: x86
|
||||
:Type: vcpu ioctl
|
||||
:Parameters: struct kvm_xsave (in)
|
||||
@@ -1585,9 +1589,18 @@ This ioctl would copy current vcpu's xsave struct to the userspace.
|
||||
|
||||
struct kvm_xsave {
|
||||
__u32 region[1024];
|
||||
__u32 extra[0];
|
||||
};
|
||||
|
||||
This ioctl would copy userspace's xsave struct to the kernel.
|
||||
This ioctl would copy userspace's xsave struct to the kernel. It copies
|
||||
as many bytes as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2),
|
||||
when invoked on the vm file descriptor. The size value returned by
|
||||
KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) will always be at least 4096.
|
||||
Currently, it is only greater than 4096 if a dynamic feature has been
|
||||
enabled with ``arch_prctl()``, but this may change in the future.
|
||||
|
||||
The offsets of the state save areas in struct kvm_xsave follow the
|
||||
contents of CPUID leaf 0xD on the host.
|
||||
|
||||
|
||||
4.44 KVM_GET_XCRS
|
||||
@@ -1684,6 +1697,10 @@ userspace capabilities, and with user requirements (for example, the
|
||||
user may wish to constrain cpuid to emulate older hardware, or for
|
||||
feature consistency across a cluster).
|
||||
|
||||
Dynamically-enabled feature bits need to be requested with
|
||||
``arch_prctl()`` before calling this ioctl. Feature bits that have not
|
||||
been requested are excluded from the result.
|
||||
|
||||
Note that certain capabilities, such as KVM_CAP_X86_DISABLE_EXITS, may
|
||||
expose cpuid features (e.g. MONITOR) which are not supported by kvm in
|
||||
its default configuration. If userspace enables such capabilities, it
|
||||
@@ -1796,6 +1813,7 @@ No flags are specified so far, the corresponding field must be set to zero.
|
||||
struct kvm_irq_routing_msi msi;
|
||||
struct kvm_irq_routing_s390_adapter adapter;
|
||||
struct kvm_irq_routing_hv_sint hv_sint;
|
||||
struct kvm_irq_routing_xen_evtchn xen_evtchn;
|
||||
__u32 pad[8];
|
||||
} u;
|
||||
};
|
||||
@@ -1805,6 +1823,7 @@ No flags are specified so far, the corresponding field must be set to zero.
|
||||
#define KVM_IRQ_ROUTING_MSI 2
|
||||
#define KVM_IRQ_ROUTING_S390_ADAPTER 3
|
||||
#define KVM_IRQ_ROUTING_HV_SINT 4
|
||||
#define KVM_IRQ_ROUTING_XEN_EVTCHN 5
|
||||
|
||||
flags:
|
||||
|
||||
@@ -1856,6 +1875,20 @@ address_hi must be zero.
|
||||
__u32 sint;
|
||||
};
|
||||
|
||||
struct kvm_irq_routing_xen_evtchn {
|
||||
__u32 port;
|
||||
__u32 vcpu;
|
||||
__u32 priority;
|
||||
};
|
||||
|
||||
|
||||
When KVM_CAP_XEN_HVM includes the KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL bit
|
||||
in its indication of supported features, routing to Xen event channels
|
||||
is supported. Although the priority field is present, only the value
|
||||
KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL is supported, which means delivery by
|
||||
2 level event channels. FIFO event channel support may be added in
|
||||
the future.
|
||||
|
||||
|
||||
4.55 KVM_SET_TSC_KHZ
|
||||
--------------------
|
||||
@@ -3701,7 +3734,7 @@ KVM with the currently defined set of flags.
|
||||
:Architectures: s390
|
||||
:Type: vm ioctl
|
||||
:Parameters: struct kvm_s390_skeys
|
||||
:Returns: 0 on success, KVM_S390_GET_KEYS_NONE if guest is not using storage
|
||||
:Returns: 0 on success, KVM_S390_GET_SKEYS_NONE if guest is not using storage
|
||||
keys, negative value on error
|
||||
|
||||
This ioctl is used to get guest storage key values on the s390
|
||||
@@ -3720,7 +3753,7 @@ you want to get.
|
||||
|
||||
The count field is the number of consecutive frames (starting from start_gfn)
|
||||
whose storage keys to get. The count field must be at least 1 and the maximum
|
||||
allowed value is defined as KVM_S390_SKEYS_ALLOC_MAX. Values outside this range
|
||||
allowed value is defined as KVM_S390_SKEYS_MAX. Values outside this range
|
||||
will cause the ioctl to return -EINVAL.
|
||||
|
||||
The skeydata_addr field is the address to a buffer large enough to hold count
|
||||
@@ -3744,7 +3777,7 @@ you want to set.
|
||||
|
||||
The count field is the number of consecutive frames (starting from start_gfn)
|
||||
whose storage keys to get. The count field must be at least 1 and the maximum
|
||||
allowed value is defined as KVM_S390_SKEYS_ALLOC_MAX. Values outside this range
|
||||
allowed value is defined as KVM_S390_SKEYS_MAX. Values outside this range
|
||||
will cause the ioctl to return -EINVAL.
|
||||
|
||||
The skeydata_addr field is the address to a buffer containing count bytes of
|
||||
@@ -5134,6 +5167,15 @@ KVM_XEN_ATTR_TYPE_SHARED_INFO
|
||||
not aware of the Xen CPU id which is used as the index into the
|
||||
vcpu_info[] array, so cannot know the correct default location.
|
||||
|
||||
Note that the shared info page may be constantly written to by KVM;
|
||||
it contains the event channel bitmap used to deliver interrupts to
|
||||
a Xen guest, amongst other things. It is exempt from dirty tracking
|
||||
mechanisms — KVM will not explicitly mark the page as dirty each
|
||||
time an event channel interrupt is delivered to the guest! Thus,
|
||||
userspace should always assume that the designated GFN is dirty if
|
||||
any vCPU has been running or any event channel interrupts can be
|
||||
routed to the guest.
|
||||
|
||||
KVM_XEN_ATTR_TYPE_UPCALL_VECTOR
|
||||
Sets the exception vector used to deliver Xen event channel upcalls.
|
||||
|
||||
@@ -5503,6 +5545,34 @@ the trailing ``'\0'``, is indicated by ``name_size`` in the header.
|
||||
The Stats Data block contains an array of 64-bit values in the same order
|
||||
as the descriptors in Descriptors block.
|
||||
|
||||
4.42 KVM_GET_XSAVE2
|
||||
------------------
|
||||
|
||||
:Capability: KVM_CAP_XSAVE2
|
||||
:Architectures: x86
|
||||
:Type: vcpu ioctl
|
||||
:Parameters: struct kvm_xsave (out)
|
||||
:Returns: 0 on success, -1 on error
|
||||
|
||||
|
||||
::
|
||||
|
||||
struct kvm_xsave {
|
||||
__u32 region[1024];
|
||||
__u32 extra[0];
|
||||
};
|
||||
|
||||
This ioctl would copy current vcpu's xsave struct to the userspace. It
|
||||
copies as many bytes as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
|
||||
when invoked on the vm file descriptor. The size value returned by
|
||||
KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) will always be at least 4096.
|
||||
Currently, it is only greater than 4096 if a dynamic feature has been
|
||||
enabled with ``arch_prctl()``, but this may change in the future.
|
||||
|
||||
The offsets of the state save areas in struct kvm_xsave follow the contents
|
||||
of CPUID leaf 0xD on the host.
|
||||
|
||||
|
||||
5. The kvm_run structure
|
||||
========================
|
||||
|
||||
@@ -7401,6 +7471,7 @@ PVHVM guests. Valid flags are::
|
||||
#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
|
||||
#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
|
||||
#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 2)
|
||||
#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 3)
|
||||
|
||||
The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG
|
||||
ioctl is available, for the guest to set its hypercall page.
|
||||
@@ -7420,6 +7491,10 @@ The KVM_XEN_HVM_CONFIG_RUNSTATE flag indicates that the runstate-related
|
||||
features KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR/_CURRENT/_DATA/_ADJUST are
|
||||
supported by the KVM_XEN_VCPU_SET_ATTR/KVM_XEN_VCPU_GET_ATTR ioctls.
|
||||
|
||||
The KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL flag indicates that IRQ routing entries
|
||||
of the type KVM_IRQ_ROUTING_XEN_EVTCHN are supported, with the priority
|
||||
field set to indicate 2 level event channel delivery.
|
||||
|
||||
8.31 KVM_CAP_PPC_MULTITCE
|
||||
-------------------------
|
||||
|
||||
|
||||
@@ -161,7 +161,7 @@ Shadow pages contain the following information:
|
||||
If clear, this page corresponds to a guest page table denoted by the gfn
|
||||
field.
|
||||
role.quadrant:
|
||||
When role.gpte_is_8_bytes=0, the guest uses 32-bit gptes while the host uses 64-bit
|
||||
When role.has_4_byte_gpte=1, the guest uses 32-bit gptes while the host uses 64-bit
|
||||
sptes. That means a guest page table contains more ptes than the host,
|
||||
so multiple shadow pages are needed to shadow one guest page.
|
||||
For first-level shadow pages, role.quadrant can be 0 or 1 and denotes the
|
||||
@@ -177,9 +177,9 @@ Shadow pages contain the following information:
|
||||
The page is invalid and should not be used. It is a root page that is
|
||||
currently pinned (by a cpu hardware register pointing to it); once it is
|
||||
unpinned it will be destroyed.
|
||||
role.gpte_is_8_bytes:
|
||||
Reflects the size of the guest PTE for which the page is valid, i.e. '1'
|
||||
if 64-bit gptes are in use, '0' if 32-bit gptes are in use.
|
||||
role.has_4_byte_gpte:
|
||||
Reflects the size of the guest PTE for which the page is valid, i.e. '0'
|
||||
if direct map or 64-bit gptes are in use, '1' if 32-bit gptes are in use.
|
||||
role.efer_nx:
|
||||
Contains the value of efer.nx for which the page is valid.
|
||||
role.cr0_wp:
|
||||
|
||||
22
MAINTAINERS
22
MAINTAINERS
@@ -226,6 +226,7 @@ F: drivers/net/ethernet/8390/
|
||||
M: Eric Van Hensbergen <ericvh@gmail.com>
|
||||
M: Latchesar Ionkov <lucho@ionkov.net>
|
||||
M: Dominique Martinet <asmadeus@codewreck.org>
|
||||
R: Christian Schoenebeck <linux_oss@crudebyte.com>
|
||||
L: v9fs-developer@lists.sourceforge.net
|
||||
S: Maintained
|
||||
W: http://swik.net/v9fs
|
||||
@@ -7416,7 +7417,6 @@ F: include/uapi/scsi/fc/
|
||||
|
||||
FILE LOCKING (flock() and fcntl()/lockf())
|
||||
M: Jeff Layton <jlayton@kernel.org>
|
||||
M: "J. Bruce Fields" <bfields@fieldses.org>
|
||||
L: linux-fsdevel@vger.kernel.org
|
||||
S: Maintained
|
||||
F: fs/fcntl.c
|
||||
@@ -10434,12 +10434,11 @@ S: Odd Fixes
|
||||
W: http://kernelnewbies.org/KernelJanitors
|
||||
|
||||
KERNEL NFSD, SUNRPC, AND LOCKD SERVERS
|
||||
M: "J. Bruce Fields" <bfields@fieldses.org>
|
||||
M: Chuck Lever <chuck.lever@oracle.com>
|
||||
L: linux-nfs@vger.kernel.org
|
||||
S: Supported
|
||||
W: http://nfs.sourceforge.net/
|
||||
T: git git://linux-nfs.org/~bfields/linux.git
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux.git
|
||||
F: fs/lockd/
|
||||
F: fs/nfs_common/
|
||||
F: fs/nfsd/
|
||||
@@ -10547,8 +10546,8 @@ F: arch/powerpc/kernel/kvm*
|
||||
F: arch/powerpc/kvm/
|
||||
|
||||
KERNEL VIRTUAL MACHINE FOR RISC-V (KVM/riscv)
|
||||
M: Anup Patel <anup.patel@wdc.com>
|
||||
R: Atish Patra <atish.patra@wdc.com>
|
||||
M: Anup Patel <anup@brainfault.org>
|
||||
R: Atish Patra <atishp@atishpatra.org>
|
||||
L: kvm@vger.kernel.org
|
||||
L: kvm-riscv@lists.infradead.org
|
||||
L: linux-riscv@lists.infradead.org
|
||||
@@ -14898,6 +14897,19 @@ L: linux-pci@vger.kernel.org
|
||||
S: Supported
|
||||
F: Documentation/PCI/pci-error-recovery.rst
|
||||
|
||||
PCI PEER-TO-PEER DMA (P2PDMA)
|
||||
M: Bjorn Helgaas <bhelgaas@google.com>
|
||||
M: Logan Gunthorpe <logang@deltatee.com>
|
||||
L: linux-pci@vger.kernel.org
|
||||
S: Supported
|
||||
Q: https://patchwork.kernel.org/project/linux-pci/list/
|
||||
B: https://bugzilla.kernel.org
|
||||
C: irc://irc.oftc.net/linux-pci
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git
|
||||
F: Documentation/driver-api/pci/p2pdma.rst
|
||||
F: drivers/pci/p2pdma.c
|
||||
F: include/linux/pci-p2pdma.h
|
||||
|
||||
PCI MSI DRIVER FOR ALTERA MSI IP
|
||||
M: Joyce Ooi <joyce.ooi@intel.com>
|
||||
L: linux-pci@vger.kernel.org
|
||||
|
||||
@@ -180,7 +180,10 @@ void pci_ioremap_set_mem_type(int mem_type);
|
||||
static inline void pci_ioremap_set_mem_type(int mem_type) {}
|
||||
#endif
|
||||
|
||||
extern int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr);
|
||||
struct resource;
|
||||
|
||||
#define pci_remap_iospace pci_remap_iospace
|
||||
int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr);
|
||||
|
||||
/*
|
||||
* PCI configuration space mapping function.
|
||||
|
||||
@@ -38,6 +38,7 @@ static int num_pcie_ports;
|
||||
static int __init dove_pcie_setup(int nr, struct pci_sys_data *sys)
|
||||
{
|
||||
struct pcie_port *pp;
|
||||
struct resource realio;
|
||||
|
||||
if (nr >= num_pcie_ports)
|
||||
return 0;
|
||||
@@ -53,10 +54,10 @@ static int __init dove_pcie_setup(int nr, struct pci_sys_data *sys)
|
||||
|
||||
orion_pcie_setup(pp->base);
|
||||
|
||||
if (pp->index == 0)
|
||||
pci_ioremap_io(sys->busnr * SZ_64K, DOVE_PCIE0_IO_PHYS_BASE);
|
||||
else
|
||||
pci_ioremap_io(sys->busnr * SZ_64K, DOVE_PCIE1_IO_PHYS_BASE);
|
||||
realio.start = sys->busnr * SZ_64K;
|
||||
realio.end = realio.start + SZ_64K - 1;
|
||||
pci_remap_iospace(&realio, pp->index == 0 ? DOVE_PCIE0_IO_PHYS_BASE :
|
||||
DOVE_PCIE1_IO_PHYS_BASE);
|
||||
|
||||
/*
|
||||
* IORESOURCE_MEM
|
||||
|
||||
@@ -185,6 +185,7 @@ iop3xx_pci_abort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
|
||||
int iop3xx_pci_setup(int nr, struct pci_sys_data *sys)
|
||||
{
|
||||
struct resource *res;
|
||||
struct resource realio;
|
||||
|
||||
if (nr != 0)
|
||||
return 0;
|
||||
@@ -206,7 +207,9 @@ int iop3xx_pci_setup(int nr, struct pci_sys_data *sys)
|
||||
|
||||
pci_add_resource_offset(&sys->resources, res, sys->mem_offset);
|
||||
|
||||
pci_ioremap_io(0, IOP3XX_PCI_LOWER_IO_PA);
|
||||
realio.start = 0;
|
||||
realio.end = realio.start + SZ_64K - 1;
|
||||
pci_remap_iospace(&realio, IOP3XX_PCI_LOWER_IO_PA);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -101,6 +101,7 @@ static void __init mv78xx0_pcie_preinit(void)
|
||||
static int __init mv78xx0_pcie_setup(int nr, struct pci_sys_data *sys)
|
||||
{
|
||||
struct pcie_port *pp;
|
||||
struct resource realio;
|
||||
|
||||
if (nr >= num_pcie_ports)
|
||||
return 0;
|
||||
@@ -115,7 +116,9 @@ static int __init mv78xx0_pcie_setup(int nr, struct pci_sys_data *sys)
|
||||
orion_pcie_set_local_bus_nr(pp->base, sys->busnr);
|
||||
orion_pcie_setup(pp->base);
|
||||
|
||||
pci_ioremap_io(nr * SZ_64K, MV78XX0_PCIE_IO_PHYS_BASE(nr));
|
||||
realio.start = nr * SZ_64K;
|
||||
realio.end = realio.start + SZ_64K - 1;
|
||||
pci_remap_iospace(&realio, MV78XX0_PCIE_IO_PHYS_BASE(nr));
|
||||
|
||||
pci_add_resource_offset(&sys->resources, &pp->res, sys->mem_offset);
|
||||
|
||||
|
||||
@@ -142,6 +142,7 @@ static struct pci_ops pcie_ops = {
|
||||
static int __init pcie_setup(struct pci_sys_data *sys)
|
||||
{
|
||||
struct resource *res;
|
||||
struct resource realio;
|
||||
int dev;
|
||||
|
||||
/*
|
||||
@@ -164,7 +165,9 @@ static int __init pcie_setup(struct pci_sys_data *sys)
|
||||
pcie_ops.read = pcie_rd_conf_wa;
|
||||
}
|
||||
|
||||
pci_ioremap_io(sys->busnr * SZ_64K, ORION5X_PCIE_IO_PHYS_BASE);
|
||||
realio.start = sys->busnr * SZ_64K;
|
||||
realio.end = realio.start + SZ_64K - 1;
|
||||
pci_remap_iospace(&realio, ORION5X_PCIE_IO_PHYS_BASE);
|
||||
|
||||
/*
|
||||
* Request resources.
|
||||
@@ -466,6 +469,7 @@ static void __init orion5x_setup_pci_wins(void)
|
||||
static int __init pci_setup(struct pci_sys_data *sys)
|
||||
{
|
||||
struct resource *res;
|
||||
struct resource realio;
|
||||
|
||||
/*
|
||||
* Point PCI unit MBUS decode windows to DRAM space.
|
||||
@@ -482,7 +486,9 @@ static int __init pci_setup(struct pci_sys_data *sys)
|
||||
*/
|
||||
orion5x_setbits(PCI_CMD, PCI_CMD_HOST_REORDER);
|
||||
|
||||
pci_ioremap_io(sys->busnr * SZ_64K, ORION5X_PCI_IO_PHYS_BASE);
|
||||
realio.start = sys->busnr * SZ_64K;
|
||||
realio.end = realio.start + SZ_64K - 1;
|
||||
pci_remap_iospace(&realio, ORION5X_PCI_IO_PHYS_BASE);
|
||||
|
||||
/*
|
||||
* Request resources
|
||||
|
||||
@@ -459,16 +459,20 @@ void pci_ioremap_set_mem_type(int mem_type)
|
||||
pci_ioremap_mem_type = mem_type;
|
||||
}
|
||||
|
||||
int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr)
|
||||
int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr)
|
||||
{
|
||||
BUG_ON(offset + SZ_64K - 1 > IO_SPACE_LIMIT);
|
||||
unsigned long vaddr = (unsigned long)PCI_IOBASE + res->start;
|
||||
|
||||
return ioremap_page_range(PCI_IO_VIRT_BASE + offset,
|
||||
PCI_IO_VIRT_BASE + offset + SZ_64K,
|
||||
phys_addr,
|
||||
if (!(res->flags & IORESOURCE_IO))
|
||||
return -EINVAL;
|
||||
|
||||
if (res->end > IO_SPACE_LIMIT)
|
||||
return -EINVAL;
|
||||
|
||||
return ioremap_page_range(vaddr, vaddr + resource_size(res), phys_addr,
|
||||
__pgprot(get_mem_type(pci_ioremap_mem_type)->prot_pte));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(pci_ioremap_io);
|
||||
EXPORT_SYMBOL(pci_remap_iospace);
|
||||
|
||||
void __iomem *pci_remap_cfgspace(resource_size_t res_cookie, size_t size)
|
||||
{
|
||||
|
||||
@@ -64,6 +64,15 @@
|
||||
#define HV_REGISTER_STIMER0_CONFIG 0x000B0000
|
||||
#define HV_REGISTER_STIMER0_COUNT 0x000B0001
|
||||
|
||||
union hv_msi_entry {
|
||||
u64 as_uint64[2];
|
||||
struct {
|
||||
u64 address;
|
||||
u32 data;
|
||||
u32 reserved;
|
||||
} __packed;
|
||||
};
|
||||
|
||||
#include <asm-generic/hyperv-tlfs.h>
|
||||
|
||||
#endif
|
||||
|
||||
@@ -63,6 +63,7 @@ enum __kvm_host_smccc_func {
|
||||
|
||||
/* Hypercalls available after pKVM finalisation */
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_adjust_pc,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context,
|
||||
|
||||
@@ -41,6 +41,8 @@ void kvm_inject_vabt(struct kvm_vcpu *vcpu);
|
||||
void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
|
||||
void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
|
||||
|
||||
void kvm_vcpu_wfi(struct kvm_vcpu *vcpu);
|
||||
|
||||
static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return !(vcpu->arch.hcr_el2 & HCR_RW);
|
||||
@@ -386,7 +388,7 @@ static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
|
||||
*vcpu_cpsr(vcpu) |= PSR_AA32_E_BIT;
|
||||
} else {
|
||||
u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
|
||||
sctlr |= (1 << 25);
|
||||
sctlr |= SCTLR_ELx_EE;
|
||||
vcpu_write_sys_reg(vcpu, sctlr, SCTLR_EL1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,7 +26,6 @@
|
||||
#include <asm/fpsimd.h>
|
||||
#include <asm/kvm.h>
|
||||
#include <asm/kvm_asm.h>
|
||||
#include <asm/thread_info.h>
|
||||
|
||||
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
|
||||
|
||||
@@ -298,9 +297,6 @@ struct kvm_vcpu_arch {
|
||||
/* Exception Information */
|
||||
struct kvm_vcpu_fault_info fault;
|
||||
|
||||
/* State of various workarounds, see kvm_asm.h for bit assignment */
|
||||
u64 workaround_flags;
|
||||
|
||||
/* Miscellaneous vcpu state flags */
|
||||
u64 flags;
|
||||
|
||||
@@ -321,8 +317,8 @@ struct kvm_vcpu_arch {
|
||||
struct kvm_guest_debug_arch vcpu_debug_state;
|
||||
struct kvm_guest_debug_arch external_debug_state;
|
||||
|
||||
struct thread_info *host_thread_info; /* hyp VA */
|
||||
struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */
|
||||
struct task_struct *parent_task;
|
||||
|
||||
struct {
|
||||
/* {Break,watch}point registers */
|
||||
@@ -367,9 +363,6 @@ struct kvm_vcpu_arch {
|
||||
int target;
|
||||
DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
|
||||
|
||||
/* Detect first run of a vcpu */
|
||||
bool has_run_once;
|
||||
|
||||
/* Virtual SError ESR to restore when HCR_EL2.VSE is set */
|
||||
u64 vsesr_el2;
|
||||
|
||||
@@ -411,20 +404,17 @@ struct kvm_vcpu_arch {
|
||||
#define KVM_ARM64_DEBUG_DIRTY (1 << 0)
|
||||
#define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */
|
||||
#define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */
|
||||
#define KVM_ARM64_HOST_SVE_IN_USE (1 << 3) /* backup for host TIF_SVE */
|
||||
#define KVM_ARM64_HOST_SVE_ENABLED (1 << 4) /* SVE enabled for EL0 */
|
||||
#define KVM_ARM64_GUEST_HAS_SVE (1 << 5) /* SVE exposed to guest */
|
||||
#define KVM_ARM64_VCPU_SVE_FINALIZED (1 << 6) /* SVE config completed */
|
||||
#define KVM_ARM64_GUEST_HAS_PTRAUTH (1 << 7) /* PTRAUTH exposed to guest */
|
||||
#define KVM_ARM64_PENDING_EXCEPTION (1 << 8) /* Exception pending */
|
||||
/*
|
||||
* Overlaps with KVM_ARM64_EXCEPT_MASK on purpose so that it can't be
|
||||
* set together with an exception...
|
||||
*/
|
||||
#define KVM_ARM64_INCREMENT_PC (1 << 9) /* Increment PC */
|
||||
#define KVM_ARM64_EXCEPT_MASK (7 << 9) /* Target EL/MODE */
|
||||
#define KVM_ARM64_DEBUG_STATE_SAVE_SPE (1 << 12) /* Save SPE context if active */
|
||||
#define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */
|
||||
|
||||
#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
|
||||
KVM_GUESTDBG_USE_SW_BP | \
|
||||
KVM_GUESTDBG_USE_HW | \
|
||||
KVM_GUESTDBG_SINGLESTEP)
|
||||
/*
|
||||
* When KVM_ARM64_PENDING_EXCEPTION is set, KVM_ARM64_EXCEPT_MASK can
|
||||
* take the following values:
|
||||
@@ -442,11 +432,14 @@ struct kvm_vcpu_arch {
|
||||
#define KVM_ARM64_EXCEPT_AA64_EL1 (0 << 11)
|
||||
#define KVM_ARM64_EXCEPT_AA64_EL2 (1 << 11)
|
||||
|
||||
/*
|
||||
* Overlaps with KVM_ARM64_EXCEPT_MASK on purpose so that it can't be
|
||||
* set together with an exception...
|
||||
*/
|
||||
#define KVM_ARM64_INCREMENT_PC (1 << 9) /* Increment PC */
|
||||
#define KVM_ARM64_DEBUG_STATE_SAVE_SPE (1 << 12) /* Save SPE context if active */
|
||||
#define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */
|
||||
#define KVM_ARM64_FP_FOREIGN_FPSTATE (1 << 14)
|
||||
|
||||
#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
|
||||
KVM_GUESTDBG_USE_SW_BP | \
|
||||
KVM_GUESTDBG_USE_HW | \
|
||||
KVM_GUESTDBG_SINGLESTEP)
|
||||
|
||||
#define vcpu_has_sve(vcpu) (system_supports_sve() && \
|
||||
((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_SVE))
|
||||
@@ -606,6 +599,8 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
|
||||
void kvm_arm_halt_guest(struct kvm *kvm);
|
||||
void kvm_arm_resume_guest(struct kvm *kvm);
|
||||
|
||||
#define vcpu_has_run_once(vcpu) !!rcu_access_pointer((vcpu)->pid)
|
||||
|
||||
#ifndef __KVM_NVHE_HYPERVISOR__
|
||||
#define kvm_call_hyp_nvhe(f, ...) \
|
||||
({ \
|
||||
@@ -724,7 +719,6 @@ void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu);
|
||||
static inline void kvm_arch_hardware_unsetup(void) {}
|
||||
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
|
||||
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
|
||||
static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
|
||||
|
||||
void kvm_arm_init_debug(void);
|
||||
void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu);
|
||||
@@ -744,8 +738,10 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
|
||||
/* Guest/host FPSIMD coordination helpers */
|
||||
int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
|
||||
void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
|
||||
void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu);
|
||||
void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu);
|
||||
void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu);
|
||||
void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu);
|
||||
|
||||
static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
|
||||
{
|
||||
@@ -756,12 +752,7 @@ static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
|
||||
void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu);
|
||||
void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu);
|
||||
|
||||
#ifdef CONFIG_KVM /* Avoid conflicts with core headers if CONFIG_KVM=n */
|
||||
static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_arch_vcpu_run_map_fp(vcpu);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KVM
|
||||
void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr);
|
||||
void kvm_clr_pmu_events(u32 clr);
|
||||
|
||||
|
||||
@@ -90,7 +90,6 @@ void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu);
|
||||
|
||||
void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
|
||||
void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
|
||||
void __sve_save_state(void *sve_pffr, u32 *fpsr);
|
||||
void __sve_restore_state(void *sve_pffr, u32 *fpsr);
|
||||
|
||||
#ifndef __KVM_NVHE_HYPERVISOR__
|
||||
|
||||
@@ -150,6 +150,8 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)
|
||||
#include <asm/kvm_pgtable.h>
|
||||
#include <asm/stage2_pgtable.h>
|
||||
|
||||
int kvm_share_hyp(void *from, void *to);
|
||||
void kvm_unshare_hyp(void *from, void *to);
|
||||
int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
|
||||
int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
|
||||
void __iomem **kaddr,
|
||||
|
||||
@@ -251,6 +251,27 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt);
|
||||
int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
|
||||
enum kvm_pgtable_prot prot);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_hyp_unmap() - Remove a mapping from a hypervisor stage-1 page-table.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_hyp_init().
|
||||
* @addr: Virtual address from which to remove the mapping.
|
||||
* @size: Size of the mapping.
|
||||
*
|
||||
* The offset of @addr within a page is ignored, @size is rounded-up to
|
||||
* the next page boundary and @phys is rounded-down to the previous page
|
||||
* boundary.
|
||||
*
|
||||
* TLB invalidation is performed for each page-table entry cleared during the
|
||||
* unmapping operation and the reference count for the page-table page
|
||||
* containing the cleared entry is decremented, with unreferenced pages being
|
||||
* freed. The unmapping operation will stop early if it encounters either an
|
||||
* invalid page-table entry or a valid block mapping which maps beyond the range
|
||||
* being unmapped.
|
||||
*
|
||||
* Return: Number of bytes unmapped, which may be 0.
|
||||
*/
|
||||
u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
|
||||
|
||||
/**
|
||||
* kvm_get_vtcr() - Helper to construct VTCR_EL2
|
||||
* @mmfr0: Sanitized value of SYS_ID_AA64MMFR0_EL1 register.
|
||||
@@ -270,8 +291,7 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
|
||||
/**
|
||||
* __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
|
||||
* @pgt: Uninitialised page-table structure to initialise.
|
||||
* @arch: Arch-specific KVM structure representing the guest virtual
|
||||
* machine.
|
||||
* @mmu: S2 MMU context for this S2 translation
|
||||
* @mm_ops: Memory management callbacks.
|
||||
* @flags: Stage-2 configuration flags.
|
||||
* @force_pte_cb: Function that returns true if page level mappings must
|
||||
@@ -279,13 +299,13 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
|
||||
*
|
||||
* Return: 0 on success, negative error code on failure.
|
||||
*/
|
||||
int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
|
||||
int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
|
||||
struct kvm_pgtable_mm_ops *mm_ops,
|
||||
enum kvm_pgtable_stage2_flags flags,
|
||||
kvm_pgtable_force_pte_cb_t force_pte_cb);
|
||||
|
||||
#define kvm_pgtable_stage2_init(pgt, arch, mm_ops) \
|
||||
__kvm_pgtable_stage2_init(pgt, arch, mm_ops, 0, NULL)
|
||||
#define kvm_pgtable_stage2_init(pgt, mmu, mm_ops) \
|
||||
__kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL)
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
|
||||
|
||||
71
arch/arm64/include/asm/kvm_pkvm.h
Normal file
71
arch/arm64/include/asm/kvm_pkvm.h
Normal file
@@ -0,0 +1,71 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2020 - Google LLC
|
||||
* Author: Quentin Perret <qperret@google.com>
|
||||
*/
|
||||
#ifndef __ARM64_KVM_PKVM_H__
|
||||
#define __ARM64_KVM_PKVM_H__
|
||||
|
||||
#include <linux/memblock.h>
|
||||
#include <asm/kvm_pgtable.h>
|
||||
|
||||
#define HYP_MEMBLOCK_REGIONS 128
|
||||
|
||||
extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
|
||||
extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
|
||||
|
||||
static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
|
||||
{
|
||||
unsigned long total = 0, i;
|
||||
|
||||
/* Provision the worst case scenario */
|
||||
for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) {
|
||||
nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE);
|
||||
total += nr_pages;
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
static inline unsigned long __hyp_pgtable_total_pages(void)
|
||||
{
|
||||
unsigned long res = 0, i;
|
||||
|
||||
/* Cover all of memory with page-granularity */
|
||||
for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
|
||||
struct memblock_region *reg = &kvm_nvhe_sym(hyp_memory)[i];
|
||||
res += __hyp_pgtable_max_pages(reg->size >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static inline unsigned long hyp_s1_pgtable_pages(void)
|
||||
{
|
||||
unsigned long res;
|
||||
|
||||
res = __hyp_pgtable_total_pages();
|
||||
|
||||
/* Allow 1 GiB for private mappings */
|
||||
res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static inline unsigned long host_s2_pgtable_pages(void)
|
||||
{
|
||||
unsigned long res;
|
||||
|
||||
/*
|
||||
* Include an extra 16 pages to safely upper-bound the worst case of
|
||||
* concatenated pgds.
|
||||
*/
|
||||
res = __hyp_pgtable_total_pages() + 16;
|
||||
|
||||
/* Allow 1 GiB for MMIO mappings */
|
||||
res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
#endif /* __ARM64_KVM_PKVM_H__ */
|
||||
@@ -15,6 +15,7 @@
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <linux/refcount.h>
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
typedef struct {
|
||||
atomic64_t id;
|
||||
|
||||
@@ -953,6 +953,7 @@
|
||||
#define ID_AA64DFR0_PMUVER_8_1 0x4
|
||||
#define ID_AA64DFR0_PMUVER_8_4 0x5
|
||||
#define ID_AA64DFR0_PMUVER_8_5 0x6
|
||||
#define ID_AA64DFR0_PMUVER_8_7 0x7
|
||||
#define ID_AA64DFR0_PMUVER_IMP_DEF 0xf
|
||||
|
||||
#define ID_AA64DFR0_PMSVER_8_2 0x1
|
||||
|
||||
@@ -111,7 +111,6 @@ int main(void)
|
||||
#ifdef CONFIG_KVM
|
||||
DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt));
|
||||
DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1));
|
||||
DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags));
|
||||
DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
|
||||
DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs));
|
||||
DEFINE(CPU_RGSR_EL1, offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1]));
|
||||
|
||||
@@ -79,7 +79,11 @@
|
||||
* indicate whether or not the userland FPSIMD state of the current task is
|
||||
* present in the registers. The flag is set unless the FPSIMD registers of this
|
||||
* CPU currently contain the most recent userland FPSIMD state of the current
|
||||
* task.
|
||||
* task. If the task is behaving as a VMM, then this is will be managed by
|
||||
* KVM which will clear it to indicate that the vcpu FPSIMD state is currently
|
||||
* loaded on the CPU, allowing the state to be saved if a FPSIMD-aware
|
||||
* softirq kicks in. Upon vcpu_put(), KVM will save the vcpu FP state and
|
||||
* flag the register state as invalid.
|
||||
*
|
||||
* In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may
|
||||
* save the task's FPSIMD context back to task_struct from softirq context.
|
||||
|
||||
2
arch/arm64/kvm/.gitignore
vendored
Normal file
2
arch/arm64/kvm/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
hyp_constants.h
|
||||
@@ -40,6 +40,7 @@ menuconfig KVM
|
||||
select HAVE_KVM_VCPU_RUN_PID_CHANGE
|
||||
select SCHED_INFO
|
||||
select GUEST_PERF_EVENTS if PERF_EVENTS
|
||||
select INTERVAL_TREE
|
||||
help
|
||||
Support hosting virtualized guest machines.
|
||||
|
||||
|
||||
@@ -5,17 +5,15 @@
|
||||
|
||||
ccflags-y += -I $(srctree)/$(src)
|
||||
|
||||
KVM=../../../virt/kvm
|
||||
include $(srctree)/virt/kvm/Makefile.kvm
|
||||
|
||||
obj-$(CONFIG_KVM) += kvm.o
|
||||
obj-$(CONFIG_KVM) += hyp/
|
||||
|
||||
kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
|
||||
$(KVM)/vfio.o $(KVM)/irqchip.o $(KVM)/binary_stats.o \
|
||||
arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
|
||||
kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
|
||||
inject_fault.o va_layout.o handle_exit.o \
|
||||
guest.o debug.o reset.o sys_regs.o \
|
||||
vgic-sys-reg-v3.o fpsimd.o pmu.o \
|
||||
vgic-sys-reg-v3.o fpsimd.o pmu.o pkvm.o \
|
||||
arch_timer.o trng.o\
|
||||
vgic/vgic.o vgic/vgic-init.o \
|
||||
vgic/vgic-irqfd.o vgic/vgic-v2.o \
|
||||
@@ -25,3 +23,19 @@ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
|
||||
vgic/vgic-its.o vgic/vgic-debug.o
|
||||
|
||||
kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o
|
||||
|
||||
always-y := hyp_constants.h hyp-constants.s
|
||||
|
||||
define rule_gen_hyp_constants
|
||||
$(call filechk,offsets,__HYP_CONSTANTS_H__)
|
||||
endef
|
||||
|
||||
CFLAGS_hyp-constants.o = -I $(srctree)/$(src)/hyp/include
|
||||
$(obj)/hyp-constants.s: $(src)/hyp/hyp-constants.c FORCE
|
||||
$(call if_changed_dep,cc_s_c)
|
||||
|
||||
$(obj)/hyp_constants.h: $(obj)/hyp-constants.s FORCE
|
||||
$(call if_changed_rule,gen_hyp_constants)
|
||||
|
||||
obj-kvm := $(addprefix $(obj)/, $(kvm-y))
|
||||
$(obj-kvm): $(obj)/hyp_constants.h
|
||||
|
||||
@@ -467,7 +467,7 @@ out:
|
||||
}
|
||||
|
||||
/*
|
||||
* Schedule the background timer before calling kvm_vcpu_block, so that this
|
||||
* Schedule the background timer before calling kvm_vcpu_halt, so that this
|
||||
* thread is removed from its waitqueue and made runnable when there's a timer
|
||||
* interrupt to handle.
|
||||
*/
|
||||
@@ -649,7 +649,6 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
|
||||
struct timer_map map;
|
||||
struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
|
||||
|
||||
if (unlikely(!timer->enabled))
|
||||
return;
|
||||
@@ -672,7 +671,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
if (map.emul_ptimer)
|
||||
soft_timer_cancel(&map.emul_ptimer->hrtimer);
|
||||
|
||||
if (rcuwait_active(wait))
|
||||
if (kvm_vcpu_is_blocking(vcpu))
|
||||
kvm_timer_blocking(vcpu);
|
||||
|
||||
/*
|
||||
@@ -750,7 +749,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
/* Make the updates of cntvoff for all vtimer contexts atomic */
|
||||
static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff)
|
||||
{
|
||||
int i;
|
||||
unsigned long i;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct kvm_vcpu *tmp;
|
||||
|
||||
@@ -1189,8 +1188,8 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
|
||||
|
||||
static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int vtimer_irq, ptimer_irq;
|
||||
int i, ret;
|
||||
int vtimer_irq, ptimer_irq, ret;
|
||||
unsigned long i;
|
||||
|
||||
vtimer_irq = vcpu_vtimer(vcpu)->irq.irq;
|
||||
ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu));
|
||||
@@ -1297,7 +1296,7 @@ void kvm_timer_init_vhe(void)
|
||||
static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
int i;
|
||||
unsigned long i;
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
vcpu_vtimer(vcpu)->irq.irq = vtimer_irq;
|
||||
|
||||
@@ -146,7 +146,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP);
|
||||
ret = kvm_share_hyp(kvm, kvm + 1);
|
||||
if (ret)
|
||||
goto out_free_stage2_pgd;
|
||||
|
||||
@@ -175,19 +175,13 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
|
||||
*/
|
||||
void kvm_arch_destroy_vm(struct kvm *kvm)
|
||||
{
|
||||
int i;
|
||||
|
||||
bitmap_free(kvm->arch.pmu_filter);
|
||||
|
||||
kvm_vgic_destroy(kvm);
|
||||
|
||||
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
|
||||
if (kvm->vcpus[i]) {
|
||||
kvm_vcpu_destroy(kvm->vcpus[i]);
|
||||
kvm->vcpus[i] = NULL;
|
||||
}
|
||||
}
|
||||
atomic_set(&kvm->online_vcpus, 0);
|
||||
kvm_destroy_vcpus(kvm);
|
||||
|
||||
kvm_unshare_hyp(kvm, kvm + 1);
|
||||
}
|
||||
|
||||
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
@@ -342,7 +336,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP);
|
||||
return kvm_share_hyp(vcpu, vcpu + 1);
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
||||
@@ -351,7 +345,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
||||
|
||||
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm)))
|
||||
if (vcpu_has_run_once(vcpu) && unlikely(!irqchip_in_kernel(vcpu->kvm)))
|
||||
static_branch_dec(&userspace_irqchip_in_use);
|
||||
|
||||
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
|
||||
@@ -368,27 +362,12 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
|
||||
|
||||
void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* If we're about to block (most likely because we've just hit a
|
||||
* WFI), we need to sync back the state of the GIC CPU interface
|
||||
* so that we have the latest PMR and group enables. This ensures
|
||||
* that kvm_arch_vcpu_runnable has up-to-date data to decide
|
||||
* whether we have pending interrupts.
|
||||
*
|
||||
* For the same reason, we want to tell GICv4 that we need
|
||||
* doorbells to be signalled, should an interrupt become pending.
|
||||
*/
|
||||
preempt_disable();
|
||||
kvm_vgic_vmcr_sync(vcpu);
|
||||
vgic_v4_put(vcpu, true);
|
||||
preempt_enable();
|
||||
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
preempt_disable();
|
||||
vgic_v4_load(vcpu);
|
||||
preempt_enable();
|
||||
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
@@ -591,18 +570,33 @@ static void update_vmid(struct kvm_vmid *vmid)
|
||||
spin_unlock(&kvm_vmid_lock);
|
||||
}
|
||||
|
||||
static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
|
||||
static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.target >= 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle both the initialisation that is being done when the vcpu is
|
||||
* run for the first time, as well as the updates that must be
|
||||
* performed each time we get a new thread dealing with this vcpu.
|
||||
*/
|
||||
int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
int ret = 0;
|
||||
int ret;
|
||||
|
||||
if (likely(vcpu->arch.has_run_once))
|
||||
return 0;
|
||||
if (!kvm_vcpu_initialized(vcpu))
|
||||
return -ENOEXEC;
|
||||
|
||||
if (!kvm_arm_vcpu_is_finalized(vcpu))
|
||||
return -EPERM;
|
||||
|
||||
vcpu->arch.has_run_once = true;
|
||||
ret = kvm_arch_vcpu_run_map_fp(vcpu);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (likely(vcpu_has_run_once(vcpu)))
|
||||
return 0;
|
||||
|
||||
kvm_arm_vcpu_init_debug(vcpu);
|
||||
|
||||
@@ -614,12 +608,6 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
|
||||
ret = kvm_vgic_map_resources(kvm);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else {
|
||||
/*
|
||||
* Tell the rest of the code that there are userspace irqchip
|
||||
* VMs in the wild.
|
||||
*/
|
||||
static_branch_inc(&userspace_irqchip_in_use);
|
||||
}
|
||||
|
||||
ret = kvm_timer_enable(vcpu);
|
||||
@@ -627,6 +615,16 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
|
||||
return ret;
|
||||
|
||||
ret = kvm_arm_pmu_v3_enable(vcpu);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!irqchip_in_kernel(kvm)) {
|
||||
/*
|
||||
* Tell the rest of the code that there are userspace irqchip
|
||||
* VMs in the wild.
|
||||
*/
|
||||
static_branch_inc(&userspace_irqchip_in_use);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize traps for protected VMs.
|
||||
@@ -646,7 +644,7 @@ bool kvm_arch_intc_initialized(struct kvm *kvm)
|
||||
|
||||
void kvm_arm_halt_guest(struct kvm *kvm)
|
||||
{
|
||||
int i;
|
||||
unsigned long i;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
@@ -656,12 +654,12 @@ void kvm_arm_halt_guest(struct kvm *kvm)
|
||||
|
||||
void kvm_arm_resume_guest(struct kvm *kvm)
|
||||
{
|
||||
int i;
|
||||
unsigned long i;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
vcpu->arch.pause = false;
|
||||
rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu));
|
||||
__kvm_vcpu_wake_up(vcpu);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -686,9 +684,37 @@ static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
|
||||
smp_rmb();
|
||||
}
|
||||
|
||||
static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
|
||||
/**
|
||||
* kvm_vcpu_wfi - emulate Wait-For-Interrupt behavior
|
||||
* @vcpu: The VCPU pointer
|
||||
*
|
||||
* Suspend execution of a vCPU until a valid wake event is detected, i.e. until
|
||||
* the vCPU is runnable. The vCPU may or may not be scheduled out, depending
|
||||
* on when a wake event arrives, e.g. there may already be a pending wake event.
|
||||
*/
|
||||
void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.target >= 0;
|
||||
/*
|
||||
* Sync back the state of the GIC CPU interface so that we have
|
||||
* the latest PMR and group enables. This ensures that
|
||||
* kvm_arch_vcpu_runnable has up-to-date data to decide whether
|
||||
* we have pending interrupts, e.g. when determining if the
|
||||
* vCPU should block.
|
||||
*
|
||||
* For the same reason, we want to tell GICv4 that we need
|
||||
* doorbells to be signalled, should an interrupt become pending.
|
||||
*/
|
||||
preempt_disable();
|
||||
kvm_vgic_vmcr_sync(vcpu);
|
||||
vgic_v4_put(vcpu, true);
|
||||
preempt_enable();
|
||||
|
||||
kvm_vcpu_halt(vcpu);
|
||||
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
|
||||
|
||||
preempt_disable();
|
||||
vgic_v4_load(vcpu);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static void check_vcpu_requests(struct kvm_vcpu *vcpu)
|
||||
@@ -786,13 +812,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
struct kvm_run *run = vcpu->run;
|
||||
int ret;
|
||||
|
||||
if (unlikely(!kvm_vcpu_initialized(vcpu)))
|
||||
return -ENOEXEC;
|
||||
|
||||
ret = kvm_vcpu_first_run_init(vcpu);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (run->exit_reason == KVM_EXIT_MMIO) {
|
||||
ret = kvm_handle_mmio_return(vcpu);
|
||||
if (ret)
|
||||
@@ -856,6 +875,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
kvm_arm_setup_debug(vcpu);
|
||||
kvm_arch_vcpu_ctxflush_fp(vcpu);
|
||||
|
||||
/**************************************************************
|
||||
* Enter the guest
|
||||
@@ -1130,7 +1150,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
|
||||
* need to invalidate the I-cache though, as FWB does *not*
|
||||
* imply CTR_EL0.DIC.
|
||||
*/
|
||||
if (vcpu->arch.has_run_once) {
|
||||
if (vcpu_has_run_once(vcpu)) {
|
||||
if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
|
||||
stage2_unmap_vm(vcpu->kvm);
|
||||
else
|
||||
@@ -2043,7 +2063,7 @@ static int finalize_hyp_mode(void)
|
||||
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
int i;
|
||||
unsigned long i;
|
||||
|
||||
mpidr &= MPIDR_HWID_BITMASK;
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
*/
|
||||
#include <linux/irqflags.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/thread_info.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <asm/fpsimd.h>
|
||||
#include <asm/kvm_asm.h>
|
||||
@@ -15,6 +14,19 @@
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include <asm/sysreg.h>
|
||||
|
||||
void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct task_struct *p = vcpu->arch.parent_task;
|
||||
struct user_fpsimd_state *fpsimd;
|
||||
|
||||
if (!is_protected_kvm_enabled() || !p)
|
||||
return;
|
||||
|
||||
fpsimd = &p->thread.uw.fpsimd_state;
|
||||
kvm_unshare_hyp(fpsimd, fpsimd + 1);
|
||||
put_task_struct(p);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called on entry to KVM_RUN unless this vcpu previously ran at least
|
||||
* once and the most recent prior KVM_RUN for this vcpu was called from
|
||||
@@ -28,36 +40,29 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int ret;
|
||||
|
||||
struct thread_info *ti = ¤t->thread_info;
|
||||
struct user_fpsimd_state *fpsimd = ¤t->thread.uw.fpsimd_state;
|
||||
|
||||
kvm_vcpu_unshare_task_fp(vcpu);
|
||||
|
||||
/* Make sure the host task fpsimd state is visible to hyp: */
|
||||
ret = kvm_share_hyp(fpsimd, fpsimd + 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd);
|
||||
|
||||
/*
|
||||
* Make sure the host task thread flags and fpsimd state are
|
||||
* visible to hyp:
|
||||
* We need to keep current's task_struct pinned until its data has been
|
||||
* unshared with the hypervisor to make sure it is not re-used by the
|
||||
* kernel and donated to someone else while already shared -- see
|
||||
* kvm_vcpu_unshare_task_fp() for the matching put_task_struct().
|
||||
*/
|
||||
ret = create_hyp_mappings(ti, ti + 1, PAGE_HYP);
|
||||
if (ret)
|
||||
goto error;
|
||||
|
||||
ret = create_hyp_mappings(fpsimd, fpsimd + 1, PAGE_HYP);
|
||||
if (ret)
|
||||
goto error;
|
||||
|
||||
if (vcpu->arch.sve_state) {
|
||||
void *sve_end;
|
||||
|
||||
sve_end = vcpu->arch.sve_state + vcpu_sve_state_size(vcpu);
|
||||
|
||||
ret = create_hyp_mappings(vcpu->arch.sve_state, sve_end,
|
||||
PAGE_HYP);
|
||||
if (ret)
|
||||
goto error;
|
||||
if (is_protected_kvm_enabled()) {
|
||||
get_task_struct(current);
|
||||
vcpu->arch.parent_task = current;
|
||||
}
|
||||
|
||||
vcpu->arch.host_thread_info = kern_hyp_va(ti);
|
||||
vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd);
|
||||
error:
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -66,26 +71,27 @@ error:
|
||||
*
|
||||
* Here, we just set the correct metadata to indicate that the FPSIMD
|
||||
* state in the cpu regs (if any) belongs to current on the host.
|
||||
*
|
||||
* TIF_SVE is backed up here, since it may get clobbered with guest state.
|
||||
* This flag is restored by kvm_arch_vcpu_put_fp(vcpu).
|
||||
*/
|
||||
void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
BUG_ON(!current->mm);
|
||||
BUG_ON(test_thread_flag(TIF_SVE));
|
||||
|
||||
vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
|
||||
KVM_ARM64_HOST_SVE_IN_USE |
|
||||
KVM_ARM64_HOST_SVE_ENABLED);
|
||||
vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED;
|
||||
vcpu->arch.flags |= KVM_ARM64_FP_HOST;
|
||||
|
||||
if (test_thread_flag(TIF_SVE))
|
||||
vcpu->arch.flags |= KVM_ARM64_HOST_SVE_IN_USE;
|
||||
|
||||
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
|
||||
vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED;
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (test_thread_flag(TIF_FOREIGN_FPSTATE))
|
||||
vcpu->arch.flags |= KVM_ARM64_FP_FOREIGN_FPSTATE;
|
||||
else
|
||||
vcpu->arch.flags &= ~KVM_ARM64_FP_FOREIGN_FPSTATE;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the guest FPSIMD state was loaded, update the host's context
|
||||
* tracking data mark the CPU FPSIMD regs as dirty and belonging to vcpu
|
||||
@@ -115,13 +121,11 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
|
||||
void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long flags;
|
||||
bool host_has_sve = system_supports_sve();
|
||||
bool guest_has_sve = vcpu_has_sve(vcpu);
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
|
||||
if (guest_has_sve) {
|
||||
if (vcpu_has_sve(vcpu)) {
|
||||
__vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
|
||||
|
||||
/* Restore the VL that was saved when bound to the CPU */
|
||||
@@ -131,7 +135,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
fpsimd_save_and_flush_cpu_state();
|
||||
} else if (has_vhe() && host_has_sve) {
|
||||
} else if (has_vhe() && system_supports_sve()) {
|
||||
/*
|
||||
* The FPSIMD/SVE state in the CPU has not been touched, and we
|
||||
* have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been
|
||||
@@ -145,8 +149,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
|
||||
sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0);
|
||||
}
|
||||
|
||||
update_thread_flag(TIF_SVE,
|
||||
vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE);
|
||||
update_thread_flag(TIF_SVE, 0);
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
@@ -82,7 +82,7 @@ static int handle_no_fpsimd(struct kvm_vcpu *vcpu)
|
||||
*
|
||||
* WFE: Yield the CPU and come back to this vcpu when the scheduler
|
||||
* decides to.
|
||||
* WFI: Simply call kvm_vcpu_block(), which will halt execution of
|
||||
* WFI: Simply call kvm_vcpu_halt(), which will halt execution of
|
||||
* world-switches and schedule other host processes until there is an
|
||||
* incoming IRQ or FIQ to the VM.
|
||||
*/
|
||||
@@ -95,8 +95,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
|
||||
} else {
|
||||
trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
|
||||
vcpu->stat.wfi_exit_stat++;
|
||||
kvm_vcpu_block(vcpu);
|
||||
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
|
||||
kvm_vcpu_wfi(vcpu);
|
||||
}
|
||||
|
||||
kvm_incr_pc(vcpu);
|
||||
|
||||
@@ -10,4 +10,4 @@ subdir-ccflags-y := -I$(incdir) \
|
||||
-DDISABLE_BRANCH_PROFILING \
|
||||
$(DISABLE_STACKLEAK_PLUGIN)
|
||||
|
||||
obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o reserved_mem.o
|
||||
obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o
|
||||
|
||||
@@ -25,9 +25,3 @@ SYM_FUNC_START(__sve_restore_state)
|
||||
sve_load 0, x1, x2, 3
|
||||
ret
|
||||
SYM_FUNC_END(__sve_restore_state)
|
||||
|
||||
SYM_FUNC_START(__sve_save_state)
|
||||
mov x2, #1
|
||||
sve_save 0, x1, x2, 3
|
||||
ret
|
||||
SYM_FUNC_END(__sve_save_state)
|
||||
|
||||
10
arch/arm64/kvm/hyp/hyp-constants.c
Normal file
10
arch/arm64/kvm/hyp/hyp-constants.c
Normal file
@@ -0,0 +1,10 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
#include <linux/kbuild.h>
|
||||
#include <nvhe/memory.h>
|
||||
|
||||
int main(void)
|
||||
{
|
||||
DEFINE(STRUCT_HYP_PAGE_SIZE, sizeof(struct hyp_page));
|
||||
return 0;
|
||||
}
|
||||
@@ -29,7 +29,6 @@
|
||||
#include <asm/fpsimd.h>
|
||||
#include <asm/debug-monitors.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/thread_info.h>
|
||||
|
||||
struct kvm_exception_table_entry {
|
||||
int insn, fixup;
|
||||
@@ -49,7 +48,7 @@ static inline bool update_fp_enabled(struct kvm_vcpu *vcpu)
|
||||
* trap the accesses.
|
||||
*/
|
||||
if (!system_supports_fpsimd() ||
|
||||
vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE)
|
||||
vcpu->arch.flags & KVM_ARM64_FP_FOREIGN_FPSTATE)
|
||||
vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
|
||||
KVM_ARM64_FP_HOST);
|
||||
|
||||
@@ -143,16 +142,6 @@ static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
|
||||
return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault);
|
||||
}
|
||||
|
||||
static inline void __hyp_sve_save_host(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct thread_struct *thread;
|
||||
|
||||
thread = container_of(vcpu->arch.host_fpsimd_state, struct thread_struct,
|
||||
uw.fpsimd_state);
|
||||
|
||||
__sve_save_state(sve_pffr(thread), &vcpu->arch.host_fpsimd_state->fpsr);
|
||||
}
|
||||
|
||||
static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
|
||||
@@ -169,21 +158,14 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
bool sve_guest, sve_host;
|
||||
bool sve_guest;
|
||||
u8 esr_ec;
|
||||
u64 reg;
|
||||
|
||||
if (!system_supports_fpsimd())
|
||||
return false;
|
||||
|
||||
if (system_supports_sve()) {
|
||||
sve_guest = vcpu_has_sve(vcpu);
|
||||
sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE;
|
||||
} else {
|
||||
sve_guest = false;
|
||||
sve_host = false;
|
||||
}
|
||||
|
||||
sve_guest = vcpu_has_sve(vcpu);
|
||||
esr_ec = kvm_vcpu_trap_get_class(vcpu);
|
||||
|
||||
/* Don't handle SVE traps for non-SVE vcpus here: */
|
||||
@@ -207,11 +189,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
isb();
|
||||
|
||||
if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
|
||||
if (sve_host)
|
||||
__hyp_sve_save_host(vcpu);
|
||||
else
|
||||
__fpsimd_save_state(vcpu->arch.host_fpsimd_state);
|
||||
|
||||
__fpsimd_save_state(vcpu->arch.host_fpsimd_state);
|
||||
vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
|
||||
}
|
||||
|
||||
|
||||
@@ -24,6 +24,11 @@ enum pkvm_page_state {
|
||||
PKVM_PAGE_OWNED = 0ULL,
|
||||
PKVM_PAGE_SHARED_OWNED = KVM_PGTABLE_PROT_SW0,
|
||||
PKVM_PAGE_SHARED_BORROWED = KVM_PGTABLE_PROT_SW1,
|
||||
__PKVM_PAGE_RESERVED = KVM_PGTABLE_PROT_SW0 |
|
||||
KVM_PGTABLE_PROT_SW1,
|
||||
|
||||
/* Meta-states which aren't encoded directly in the PTE's SW bits */
|
||||
PKVM_NOPAGE,
|
||||
};
|
||||
|
||||
#define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1)
|
||||
@@ -50,6 +55,7 @@ extern const u8 pkvm_hyp_id;
|
||||
|
||||
int __pkvm_prot_finalize(void);
|
||||
int __pkvm_host_share_hyp(u64 pfn);
|
||||
int __pkvm_host_unshare_hyp(u64 pfn);
|
||||
|
||||
bool addr_is_memory(phys_addr_t phys);
|
||||
int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
|
||||
|
||||
@@ -10,13 +10,8 @@
|
||||
#include <nvhe/memory.h>
|
||||
#include <nvhe/spinlock.h>
|
||||
|
||||
#define HYP_MEMBLOCK_REGIONS 128
|
||||
extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
|
||||
extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
|
||||
extern struct kvm_pgtable pkvm_pgtable;
|
||||
extern hyp_spinlock_t pkvm_pgd_lock;
|
||||
extern struct hyp_pool hpool;
|
||||
extern u64 __io_map_base;
|
||||
|
||||
int hyp_create_idmap(u32 hyp_va_bits);
|
||||
int hyp_map_vectors(void);
|
||||
@@ -39,58 +34,4 @@ static inline void hyp_vmemmap_range(phys_addr_t phys, unsigned long size,
|
||||
*end = ALIGN(*end, PAGE_SIZE);
|
||||
}
|
||||
|
||||
static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
|
||||
{
|
||||
unsigned long total = 0, i;
|
||||
|
||||
/* Provision the worst case scenario */
|
||||
for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) {
|
||||
nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE);
|
||||
total += nr_pages;
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
static inline unsigned long __hyp_pgtable_total_pages(void)
|
||||
{
|
||||
unsigned long res = 0, i;
|
||||
|
||||
/* Cover all of memory with page-granularity */
|
||||
for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
|
||||
struct memblock_region *reg = &kvm_nvhe_sym(hyp_memory)[i];
|
||||
res += __hyp_pgtable_max_pages(reg->size >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static inline unsigned long hyp_s1_pgtable_pages(void)
|
||||
{
|
||||
unsigned long res;
|
||||
|
||||
res = __hyp_pgtable_total_pages();
|
||||
|
||||
/* Allow 1 GiB for private mappings */
|
||||
res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static inline unsigned long host_s2_pgtable_pages(void)
|
||||
{
|
||||
unsigned long res;
|
||||
|
||||
/*
|
||||
* Include an extra 16 pages to safely upper-bound the worst case of
|
||||
* concatenated pgds.
|
||||
*/
|
||||
res = __hyp_pgtable_total_pages() + 16;
|
||||
|
||||
/* Allow 1 GiB for MMIO mappings */
|
||||
res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
#endif /* __KVM_HYP_MM_H */
|
||||
|
||||
@@ -43,6 +43,9 @@ void *hyp_early_alloc_page(void *arg)
|
||||
return hyp_early_alloc_contig(1);
|
||||
}
|
||||
|
||||
static void hyp_early_alloc_get_page(void *addr) { }
|
||||
static void hyp_early_alloc_put_page(void *addr) { }
|
||||
|
||||
void hyp_early_alloc_init(void *virt, unsigned long size)
|
||||
{
|
||||
base = cur = (unsigned long)virt;
|
||||
@@ -51,4 +54,6 @@ void hyp_early_alloc_init(void *virt, unsigned long size)
|
||||
hyp_early_alloc_mm_ops.zalloc_page = hyp_early_alloc_page;
|
||||
hyp_early_alloc_mm_ops.phys_to_virt = hyp_phys_to_virt;
|
||||
hyp_early_alloc_mm_ops.virt_to_phys = hyp_virt_to_phys;
|
||||
hyp_early_alloc_mm_ops.get_page = hyp_early_alloc_get_page;
|
||||
hyp_early_alloc_mm_ops.put_page = hyp_early_alloc_put_page;
|
||||
}
|
||||
|
||||
@@ -147,6 +147,13 @@ static void handle___pkvm_host_share_hyp(struct kvm_cpu_context *host_ctxt)
|
||||
cpu_reg(host_ctxt, 1) = __pkvm_host_share_hyp(pfn);
|
||||
}
|
||||
|
||||
static void handle___pkvm_host_unshare_hyp(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(u64, pfn, host_ctxt, 1);
|
||||
|
||||
cpu_reg(host_ctxt, 1) = __pkvm_host_unshare_hyp(pfn);
|
||||
}
|
||||
|
||||
static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(phys_addr_t, phys, host_ctxt, 1);
|
||||
@@ -184,6 +191,7 @@ static const hcall_t host_hcall[] = {
|
||||
HANDLE_FUNC(__pkvm_prot_finalize),
|
||||
|
||||
HANDLE_FUNC(__pkvm_host_share_hyp),
|
||||
HANDLE_FUNC(__pkvm_host_unshare_hyp),
|
||||
HANDLE_FUNC(__kvm_adjust_pc),
|
||||
HANDLE_FUNC(__kvm_vcpu_run),
|
||||
HANDLE_FUNC(__kvm_flush_vm_context),
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include <asm/kvm_pgtable.h>
|
||||
#include <asm/kvm_pkvm.h>
|
||||
#include <asm/stage2_pgtable.h>
|
||||
|
||||
#include <hyp/fault.h>
|
||||
@@ -27,6 +28,26 @@ static struct hyp_pool host_s2_pool;
|
||||
|
||||
const u8 pkvm_hyp_id = 1;
|
||||
|
||||
static void host_lock_component(void)
|
||||
{
|
||||
hyp_spin_lock(&host_kvm.lock);
|
||||
}
|
||||
|
||||
static void host_unlock_component(void)
|
||||
{
|
||||
hyp_spin_unlock(&host_kvm.lock);
|
||||
}
|
||||
|
||||
static void hyp_lock_component(void)
|
||||
{
|
||||
hyp_spin_lock(&pkvm_pgd_lock);
|
||||
}
|
||||
|
||||
static void hyp_unlock_component(void)
|
||||
{
|
||||
hyp_spin_unlock(&pkvm_pgd_lock);
|
||||
}
|
||||
|
||||
static void *host_s2_zalloc_pages_exact(size_t size)
|
||||
{
|
||||
void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size));
|
||||
@@ -103,19 +124,19 @@ int kvm_host_prepare_stage2(void *pgt_pool_base)
|
||||
|
||||
prepare_host_vtcr();
|
||||
hyp_spin_lock_init(&host_kvm.lock);
|
||||
mmu->arch = &host_kvm.arch;
|
||||
|
||||
ret = prepare_s2_pool(pgt_pool_base);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, &host_kvm.arch,
|
||||
ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, mmu,
|
||||
&host_kvm.mm_ops, KVM_HOST_S2_FLAGS,
|
||||
host_stage2_force_pte_cb);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd);
|
||||
mmu->arch = &host_kvm.arch;
|
||||
mmu->pgt = &host_kvm.pgt;
|
||||
WRITE_ONCE(mmu->vmid.vmid_gen, 0);
|
||||
WRITE_ONCE(mmu->vmid.vmid, 0);
|
||||
@@ -338,102 +359,14 @@ static int host_stage2_idmap(u64 addr)
|
||||
|
||||
prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT;
|
||||
|
||||
hyp_spin_lock(&host_kvm.lock);
|
||||
host_lock_component();
|
||||
ret = host_stage2_adjust_range(addr, &range);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot);
|
||||
unlock:
|
||||
hyp_spin_unlock(&host_kvm.lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool check_prot(enum kvm_pgtable_prot prot,
|
||||
enum kvm_pgtable_prot required,
|
||||
enum kvm_pgtable_prot denied)
|
||||
{
|
||||
return (prot & (required | denied)) == required;
|
||||
}
|
||||
|
||||
int __pkvm_host_share_hyp(u64 pfn)
|
||||
{
|
||||
phys_addr_t addr = hyp_pfn_to_phys(pfn);
|
||||
enum kvm_pgtable_prot prot, cur;
|
||||
void *virt = __hyp_va(addr);
|
||||
enum pkvm_page_state state;
|
||||
kvm_pte_t pte;
|
||||
int ret;
|
||||
|
||||
if (!addr_is_memory(addr))
|
||||
return -EINVAL;
|
||||
|
||||
hyp_spin_lock(&host_kvm.lock);
|
||||
hyp_spin_lock(&pkvm_pgd_lock);
|
||||
|
||||
ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, NULL);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
if (!pte)
|
||||
goto map_shared;
|
||||
|
||||
/*
|
||||
* Check attributes in the host stage-2 PTE. We need the page to be:
|
||||
* - mapped RWX as we're sharing memory;
|
||||
* - not borrowed, as that implies absence of ownership.
|
||||
* Otherwise, we can't let it got through
|
||||
*/
|
||||
cur = kvm_pgtable_stage2_pte_prot(pte);
|
||||
prot = pkvm_mkstate(0, PKVM_PAGE_SHARED_BORROWED);
|
||||
if (!check_prot(cur, PKVM_HOST_MEM_PROT, prot)) {
|
||||
ret = -EPERM;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
state = pkvm_getstate(cur);
|
||||
if (state == PKVM_PAGE_OWNED)
|
||||
goto map_shared;
|
||||
|
||||
/*
|
||||
* Tolerate double-sharing the same page, but this requires
|
||||
* cross-checking the hypervisor stage-1.
|
||||
*/
|
||||
if (state != PKVM_PAGE_SHARED_OWNED) {
|
||||
ret = -EPERM;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
ret = kvm_pgtable_get_leaf(&pkvm_pgtable, (u64)virt, &pte, NULL);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
/*
|
||||
* If the page has been shared with the hypervisor, it must be
|
||||
* already mapped as SHARED_BORROWED in its stage-1.
|
||||
*/
|
||||
cur = kvm_pgtable_hyp_pte_prot(pte);
|
||||
prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
|
||||
if (!check_prot(cur, prot, ~prot))
|
||||
ret = -EPERM;
|
||||
goto unlock;
|
||||
|
||||
map_shared:
|
||||
/*
|
||||
* If the page is not yet shared, adjust mappings in both page-tables
|
||||
* while both locks are held.
|
||||
*/
|
||||
prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
|
||||
ret = pkvm_create_mappings_locked(virt, virt + PAGE_SIZE, prot);
|
||||
BUG_ON(ret);
|
||||
|
||||
prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED);
|
||||
ret = host_stage2_idmap_locked(addr, PAGE_SIZE, prot);
|
||||
BUG_ON(ret);
|
||||
|
||||
unlock:
|
||||
hyp_spin_unlock(&pkvm_pgd_lock);
|
||||
hyp_spin_unlock(&host_kvm.lock);
|
||||
host_unlock_component();
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -451,3 +384,421 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
|
||||
ret = host_stage2_idmap(addr);
|
||||
BUG_ON(ret && ret != -EAGAIN);
|
||||
}
|
||||
|
||||
/* This corresponds to locking order */
|
||||
enum pkvm_component_id {
|
||||
PKVM_ID_HOST,
|
||||
PKVM_ID_HYP,
|
||||
};
|
||||
|
||||
struct pkvm_mem_transition {
|
||||
u64 nr_pages;
|
||||
|
||||
struct {
|
||||
enum pkvm_component_id id;
|
||||
/* Address in the initiator's address space */
|
||||
u64 addr;
|
||||
|
||||
union {
|
||||
struct {
|
||||
/* Address in the completer's address space */
|
||||
u64 completer_addr;
|
||||
} host;
|
||||
};
|
||||
} initiator;
|
||||
|
||||
struct {
|
||||
enum pkvm_component_id id;
|
||||
} completer;
|
||||
};
|
||||
|
||||
struct pkvm_mem_share {
|
||||
const struct pkvm_mem_transition tx;
|
||||
const enum kvm_pgtable_prot completer_prot;
|
||||
};
|
||||
|
||||
struct check_walk_data {
|
||||
enum pkvm_page_state desired;
|
||||
enum pkvm_page_state (*get_page_state)(kvm_pte_t pte);
|
||||
};
|
||||
|
||||
static int __check_page_state_visitor(u64 addr, u64 end, u32 level,
|
||||
kvm_pte_t *ptep,
|
||||
enum kvm_pgtable_walk_flags flag,
|
||||
void * const arg)
|
||||
{
|
||||
struct check_walk_data *d = arg;
|
||||
kvm_pte_t pte = *ptep;
|
||||
|
||||
if (kvm_pte_valid(pte) && !addr_is_memory(kvm_pte_to_phys(pte)))
|
||||
return -EINVAL;
|
||||
|
||||
return d->get_page_state(pte) == d->desired ? 0 : -EPERM;
|
||||
}
|
||||
|
||||
static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
struct check_walk_data *data)
|
||||
{
|
||||
struct kvm_pgtable_walker walker = {
|
||||
.cb = __check_page_state_visitor,
|
||||
.arg = data,
|
||||
.flags = KVM_PGTABLE_WALK_LEAF,
|
||||
};
|
||||
|
||||
return kvm_pgtable_walk(pgt, addr, size, &walker);
|
||||
}
|
||||
|
||||
static enum pkvm_page_state host_get_page_state(kvm_pte_t pte)
|
||||
{
|
||||
if (!kvm_pte_valid(pte) && pte)
|
||||
return PKVM_NOPAGE;
|
||||
|
||||
return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
|
||||
}
|
||||
|
||||
static int __host_check_page_state_range(u64 addr, u64 size,
|
||||
enum pkvm_page_state state)
|
||||
{
|
||||
struct check_walk_data d = {
|
||||
.desired = state,
|
||||
.get_page_state = host_get_page_state,
|
||||
};
|
||||
|
||||
hyp_assert_lock_held(&host_kvm.lock);
|
||||
return check_page_state_range(&host_kvm.pgt, addr, size, &d);
|
||||
}
|
||||
|
||||
static int __host_set_page_state_range(u64 addr, u64 size,
|
||||
enum pkvm_page_state state)
|
||||
{
|
||||
enum kvm_pgtable_prot prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, state);
|
||||
|
||||
return host_stage2_idmap_locked(addr, size, prot);
|
||||
}
|
||||
|
||||
static int host_request_owned_transition(u64 *completer_addr,
|
||||
const struct pkvm_mem_transition *tx)
|
||||
{
|
||||
u64 size = tx->nr_pages * PAGE_SIZE;
|
||||
u64 addr = tx->initiator.addr;
|
||||
|
||||
*completer_addr = tx->initiator.host.completer_addr;
|
||||
return __host_check_page_state_range(addr, size, PKVM_PAGE_OWNED);
|
||||
}
|
||||
|
||||
static int host_request_unshare(u64 *completer_addr,
|
||||
const struct pkvm_mem_transition *tx)
|
||||
{
|
||||
u64 size = tx->nr_pages * PAGE_SIZE;
|
||||
u64 addr = tx->initiator.addr;
|
||||
|
||||
*completer_addr = tx->initiator.host.completer_addr;
|
||||
return __host_check_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED);
|
||||
}
|
||||
|
||||
static int host_initiate_share(u64 *completer_addr,
|
||||
const struct pkvm_mem_transition *tx)
|
||||
{
|
||||
u64 size = tx->nr_pages * PAGE_SIZE;
|
||||
u64 addr = tx->initiator.addr;
|
||||
|
||||
*completer_addr = tx->initiator.host.completer_addr;
|
||||
return __host_set_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED);
|
||||
}
|
||||
|
||||
static int host_initiate_unshare(u64 *completer_addr,
|
||||
const struct pkvm_mem_transition *tx)
|
||||
{
|
||||
u64 size = tx->nr_pages * PAGE_SIZE;
|
||||
u64 addr = tx->initiator.addr;
|
||||
|
||||
*completer_addr = tx->initiator.host.completer_addr;
|
||||
return __host_set_page_state_range(addr, size, PKVM_PAGE_OWNED);
|
||||
}
|
||||
|
||||
static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte)
|
||||
{
|
||||
if (!kvm_pte_valid(pte))
|
||||
return PKVM_NOPAGE;
|
||||
|
||||
return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
|
||||
}
|
||||
|
||||
static int __hyp_check_page_state_range(u64 addr, u64 size,
|
||||
enum pkvm_page_state state)
|
||||
{
|
||||
struct check_walk_data d = {
|
||||
.desired = state,
|
||||
.get_page_state = hyp_get_page_state,
|
||||
};
|
||||
|
||||
hyp_assert_lock_held(&pkvm_pgd_lock);
|
||||
return check_page_state_range(&pkvm_pgtable, addr, size, &d);
|
||||
}
|
||||
|
||||
static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx)
|
||||
{
|
||||
return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) ||
|
||||
tx->initiator.id != PKVM_ID_HOST);
|
||||
}
|
||||
|
||||
static int hyp_ack_share(u64 addr, const struct pkvm_mem_transition *tx,
|
||||
enum kvm_pgtable_prot perms)
|
||||
{
|
||||
u64 size = tx->nr_pages * PAGE_SIZE;
|
||||
|
||||
if (perms != PAGE_HYP)
|
||||
return -EPERM;
|
||||
|
||||
if (__hyp_ack_skip_pgtable_check(tx))
|
||||
return 0;
|
||||
|
||||
return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE);
|
||||
}
|
||||
|
||||
static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx)
|
||||
{
|
||||
u64 size = tx->nr_pages * PAGE_SIZE;
|
||||
|
||||
if (__hyp_ack_skip_pgtable_check(tx))
|
||||
return 0;
|
||||
|
||||
return __hyp_check_page_state_range(addr, size,
|
||||
PKVM_PAGE_SHARED_BORROWED);
|
||||
}
|
||||
|
||||
static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx,
|
||||
enum kvm_pgtable_prot perms)
|
||||
{
|
||||
void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE);
|
||||
enum kvm_pgtable_prot prot;
|
||||
|
||||
prot = pkvm_mkstate(perms, PKVM_PAGE_SHARED_BORROWED);
|
||||
return pkvm_create_mappings_locked(start, end, prot);
|
||||
}
|
||||
|
||||
static int hyp_complete_unshare(u64 addr, const struct pkvm_mem_transition *tx)
|
||||
{
|
||||
u64 size = tx->nr_pages * PAGE_SIZE;
|
||||
int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, addr, size);
|
||||
|
||||
return (ret != size) ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static int check_share(struct pkvm_mem_share *share)
|
||||
{
|
||||
const struct pkvm_mem_transition *tx = &share->tx;
|
||||
u64 completer_addr;
|
||||
int ret;
|
||||
|
||||
switch (tx->initiator.id) {
|
||||
case PKVM_ID_HOST:
|
||||
ret = host_request_owned_transition(&completer_addr, tx);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
switch (tx->completer.id) {
|
||||
case PKVM_ID_HYP:
|
||||
ret = hyp_ack_share(completer_addr, tx, share->completer_prot);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __do_share(struct pkvm_mem_share *share)
|
||||
{
|
||||
const struct pkvm_mem_transition *tx = &share->tx;
|
||||
u64 completer_addr;
|
||||
int ret;
|
||||
|
||||
switch (tx->initiator.id) {
|
||||
case PKVM_ID_HOST:
|
||||
ret = host_initiate_share(&completer_addr, tx);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
switch (tx->completer.id) {
|
||||
case PKVM_ID_HYP:
|
||||
ret = hyp_complete_share(completer_addr, tx, share->completer_prot);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* do_share():
|
||||
*
|
||||
* The page owner grants access to another component with a given set
|
||||
* of permissions.
|
||||
*
|
||||
* Initiator: OWNED => SHARED_OWNED
|
||||
* Completer: NOPAGE => SHARED_BORROWED
|
||||
*/
|
||||
static int do_share(struct pkvm_mem_share *share)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = check_share(share);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return WARN_ON(__do_share(share));
|
||||
}
|
||||
|
||||
static int check_unshare(struct pkvm_mem_share *share)
|
||||
{
|
||||
const struct pkvm_mem_transition *tx = &share->tx;
|
||||
u64 completer_addr;
|
||||
int ret;
|
||||
|
||||
switch (tx->initiator.id) {
|
||||
case PKVM_ID_HOST:
|
||||
ret = host_request_unshare(&completer_addr, tx);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
switch (tx->completer.id) {
|
||||
case PKVM_ID_HYP:
|
||||
ret = hyp_ack_unshare(completer_addr, tx);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __do_unshare(struct pkvm_mem_share *share)
|
||||
{
|
||||
const struct pkvm_mem_transition *tx = &share->tx;
|
||||
u64 completer_addr;
|
||||
int ret;
|
||||
|
||||
switch (tx->initiator.id) {
|
||||
case PKVM_ID_HOST:
|
||||
ret = host_initiate_unshare(&completer_addr, tx);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
switch (tx->completer.id) {
|
||||
case PKVM_ID_HYP:
|
||||
ret = hyp_complete_unshare(completer_addr, tx);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* do_unshare():
|
||||
*
|
||||
* The page owner revokes access from another component for a range of
|
||||
* pages which were previously shared using do_share().
|
||||
*
|
||||
* Initiator: SHARED_OWNED => OWNED
|
||||
* Completer: SHARED_BORROWED => NOPAGE
|
||||
*/
|
||||
static int do_unshare(struct pkvm_mem_share *share)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = check_unshare(share);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return WARN_ON(__do_unshare(share));
|
||||
}
|
||||
|
||||
int __pkvm_host_share_hyp(u64 pfn)
|
||||
{
|
||||
int ret;
|
||||
u64 host_addr = hyp_pfn_to_phys(pfn);
|
||||
u64 hyp_addr = (u64)__hyp_va(host_addr);
|
||||
struct pkvm_mem_share share = {
|
||||
.tx = {
|
||||
.nr_pages = 1,
|
||||
.initiator = {
|
||||
.id = PKVM_ID_HOST,
|
||||
.addr = host_addr,
|
||||
.host = {
|
||||
.completer_addr = hyp_addr,
|
||||
},
|
||||
},
|
||||
.completer = {
|
||||
.id = PKVM_ID_HYP,
|
||||
},
|
||||
},
|
||||
.completer_prot = PAGE_HYP,
|
||||
};
|
||||
|
||||
host_lock_component();
|
||||
hyp_lock_component();
|
||||
|
||||
ret = do_share(&share);
|
||||
|
||||
hyp_unlock_component();
|
||||
host_unlock_component();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __pkvm_host_unshare_hyp(u64 pfn)
|
||||
{
|
||||
int ret;
|
||||
u64 host_addr = hyp_pfn_to_phys(pfn);
|
||||
u64 hyp_addr = (u64)__hyp_va(host_addr);
|
||||
struct pkvm_mem_share share = {
|
||||
.tx = {
|
||||
.nr_pages = 1,
|
||||
.initiator = {
|
||||
.id = PKVM_ID_HOST,
|
||||
.addr = host_addr,
|
||||
.host = {
|
||||
.completer_addr = hyp_addr,
|
||||
},
|
||||
},
|
||||
.completer = {
|
||||
.id = PKVM_ID_HYP,
|
||||
},
|
||||
},
|
||||
.completer_prot = PAGE_HYP,
|
||||
};
|
||||
|
||||
host_lock_component();
|
||||
hyp_lock_component();
|
||||
|
||||
ret = do_unshare(&share);
|
||||
|
||||
hyp_unlock_component();
|
||||
host_unlock_component();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include <asm/kvm_pgtable.h>
|
||||
#include <asm/kvm_pkvm.h>
|
||||
#include <asm/spectre.h>
|
||||
|
||||
#include <nvhe/early_alloc.h>
|
||||
@@ -18,11 +19,12 @@
|
||||
|
||||
struct kvm_pgtable pkvm_pgtable;
|
||||
hyp_spinlock_t pkvm_pgd_lock;
|
||||
u64 __io_map_base;
|
||||
|
||||
struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS];
|
||||
unsigned int hyp_memblock_nr;
|
||||
|
||||
static u64 __io_map_base;
|
||||
|
||||
static int __pkvm_create_mappings(unsigned long start, unsigned long size,
|
||||
unsigned long phys, enum kvm_pgtable_prot prot)
|
||||
{
|
||||
|
||||
@@ -241,7 +241,7 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
|
||||
int i;
|
||||
|
||||
hyp_spin_lock_init(&pool->lock);
|
||||
pool->max_order = min(MAX_ORDER, get_order(nr_pages << PAGE_SHIFT));
|
||||
pool->max_order = min(MAX_ORDER, get_order((nr_pages + 1) << PAGE_SHIFT));
|
||||
for (i = 0; i < pool->max_order; i++)
|
||||
INIT_LIST_HEAD(&pool->free_area[i]);
|
||||
pool->range_start = phys;
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include <asm/kvm_pgtable.h>
|
||||
#include <asm/kvm_pkvm.h>
|
||||
|
||||
#include <nvhe/early_alloc.h>
|
||||
#include <nvhe/fixed_config.h>
|
||||
@@ -17,7 +18,6 @@
|
||||
#include <nvhe/mm.h>
|
||||
#include <nvhe/trap_handler.h>
|
||||
|
||||
struct hyp_pool hpool;
|
||||
unsigned long hyp_nr_cpus;
|
||||
|
||||
#define hyp_percpu_size ((unsigned long)__per_cpu_end - \
|
||||
@@ -27,6 +27,7 @@ static void *vmemmap_base;
|
||||
static void *hyp_pgt_base;
|
||||
static void *host_s2_pgt_base;
|
||||
static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops;
|
||||
static struct hyp_pool hpool;
|
||||
|
||||
static int divide_memory_pool(void *virt, unsigned long size)
|
||||
{
|
||||
@@ -165,6 +166,7 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
|
||||
enum kvm_pgtable_walk_flags flag,
|
||||
void * const arg)
|
||||
{
|
||||
struct kvm_pgtable_mm_ops *mm_ops = arg;
|
||||
enum kvm_pgtable_prot prot;
|
||||
enum pkvm_page_state state;
|
||||
kvm_pte_t pte = *ptep;
|
||||
@@ -173,6 +175,15 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
|
||||
if (!kvm_pte_valid(pte))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Fix-up the refcount for the page-table pages as the early allocator
|
||||
* was unable to access the hyp_vmemmap and so the buddy allocator has
|
||||
* initialised the refcount to '1'.
|
||||
*/
|
||||
mm_ops->get_page(ptep);
|
||||
if (flag != KVM_PGTABLE_WALK_LEAF)
|
||||
return 0;
|
||||
|
||||
if (level != (KVM_PGTABLE_MAX_LEVELS - 1))
|
||||
return -EINVAL;
|
||||
|
||||
@@ -205,7 +216,8 @@ static int finalize_host_mappings(void)
|
||||
{
|
||||
struct kvm_pgtable_walker walker = {
|
||||
.cb = finalize_host_mappings_walker,
|
||||
.flags = KVM_PGTABLE_WALK_LEAF,
|
||||
.flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
|
||||
.arg = pkvm_pgtable.mm_ops,
|
||||
};
|
||||
int i, ret;
|
||||
|
||||
@@ -240,19 +252,20 @@ void __noreturn __pkvm_init_finalise(void)
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = finalize_host_mappings();
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
pkvm_pgtable_mm_ops = (struct kvm_pgtable_mm_ops) {
|
||||
.zalloc_page = hyp_zalloc_hyp_page,
|
||||
.phys_to_virt = hyp_phys_to_virt,
|
||||
.virt_to_phys = hyp_virt_to_phys,
|
||||
.get_page = hpool_get_page,
|
||||
.put_page = hpool_put_page,
|
||||
.page_count = hyp_page_count,
|
||||
};
|
||||
pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops;
|
||||
|
||||
ret = finalize_host_mappings();
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
out:
|
||||
/*
|
||||
* We tail-called to here from handle___pkvm_init() and will not return,
|
||||
|
||||
@@ -25,7 +25,6 @@
|
||||
#include <asm/fpsimd.h>
|
||||
#include <asm/debug-monitors.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/thread_info.h>
|
||||
|
||||
#include <nvhe/fixed_config.h>
|
||||
#include <nvhe/mem_protect.h>
|
||||
|
||||
@@ -383,21 +383,6 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)
|
||||
return prot;
|
||||
}
|
||||
|
||||
static bool hyp_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
|
||||
{
|
||||
/*
|
||||
* Tolerate KVM recreating the exact same mapping, or changing software
|
||||
* bits if the existing mapping was valid.
|
||||
*/
|
||||
if (old == new)
|
||||
return false;
|
||||
|
||||
if (!kvm_pte_valid(old))
|
||||
return true;
|
||||
|
||||
return !WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW);
|
||||
}
|
||||
|
||||
static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
|
||||
kvm_pte_t *ptep, struct hyp_map_data *data)
|
||||
{
|
||||
@@ -407,11 +392,16 @@ static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
|
||||
if (!kvm_block_mapping_supported(addr, end, phys, level))
|
||||
return false;
|
||||
|
||||
new = kvm_init_valid_leaf_pte(phys, data->attr, level);
|
||||
if (hyp_pte_needs_update(old, new))
|
||||
smp_store_release(ptep, new);
|
||||
|
||||
data->phys += granule;
|
||||
new = kvm_init_valid_leaf_pte(phys, data->attr, level);
|
||||
if (old == new)
|
||||
return true;
|
||||
if (!kvm_pte_valid(old))
|
||||
data->mm_ops->get_page(ptep);
|
||||
else if (WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW))
|
||||
return false;
|
||||
|
||||
smp_store_release(ptep, new);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -433,6 +423,7 @@ static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
||||
return -ENOMEM;
|
||||
|
||||
kvm_set_table_pte(ptep, childp, mm_ops);
|
||||
mm_ops->get_page(ptep);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -460,6 +451,69 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct hyp_unmap_data {
|
||||
u64 unmapped;
|
||||
struct kvm_pgtable_mm_ops *mm_ops;
|
||||
};
|
||||
|
||||
static int hyp_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
||||
enum kvm_pgtable_walk_flags flag, void * const arg)
|
||||
{
|
||||
kvm_pte_t pte = *ptep, *childp = NULL;
|
||||
u64 granule = kvm_granule_size(level);
|
||||
struct hyp_unmap_data *data = arg;
|
||||
struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
|
||||
|
||||
if (!kvm_pte_valid(pte))
|
||||
return -EINVAL;
|
||||
|
||||
if (kvm_pte_table(pte, level)) {
|
||||
childp = kvm_pte_follow(pte, mm_ops);
|
||||
|
||||
if (mm_ops->page_count(childp) != 1)
|
||||
return 0;
|
||||
|
||||
kvm_clear_pte(ptep);
|
||||
dsb(ishst);
|
||||
__tlbi_level(vae2is, __TLBI_VADDR(addr, 0), level);
|
||||
} else {
|
||||
if (end - addr < granule)
|
||||
return -EINVAL;
|
||||
|
||||
kvm_clear_pte(ptep);
|
||||
dsb(ishst);
|
||||
__tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level);
|
||||
data->unmapped += granule;
|
||||
}
|
||||
|
||||
dsb(ish);
|
||||
isb();
|
||||
mm_ops->put_page(ptep);
|
||||
|
||||
if (childp)
|
||||
mm_ops->put_page(childp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
|
||||
{
|
||||
struct hyp_unmap_data unmap_data = {
|
||||
.mm_ops = pgt->mm_ops,
|
||||
};
|
||||
struct kvm_pgtable_walker walker = {
|
||||
.cb = hyp_unmap_walker,
|
||||
.arg = &unmap_data,
|
||||
.flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
|
||||
};
|
||||
|
||||
if (!pgt->mm_ops->page_count)
|
||||
return 0;
|
||||
|
||||
kvm_pgtable_walk(pgt, addr, size, &walker);
|
||||
return unmap_data.unmapped;
|
||||
}
|
||||
|
||||
int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
|
||||
struct kvm_pgtable_mm_ops *mm_ops)
|
||||
{
|
||||
@@ -482,8 +536,16 @@ static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
||||
enum kvm_pgtable_walk_flags flag, void * const arg)
|
||||
{
|
||||
struct kvm_pgtable_mm_ops *mm_ops = arg;
|
||||
kvm_pte_t pte = *ptep;
|
||||
|
||||
if (!kvm_pte_valid(pte))
|
||||
return 0;
|
||||
|
||||
mm_ops->put_page(ptep);
|
||||
|
||||
if (kvm_pte_table(pte, level))
|
||||
mm_ops->put_page(kvm_pte_follow(pte, mm_ops));
|
||||
|
||||
mm_ops->put_page((void *)kvm_pte_follow(*ptep, mm_ops));
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -491,7 +553,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
|
||||
{
|
||||
struct kvm_pgtable_walker walker = {
|
||||
.cb = hyp_free_walker,
|
||||
.flags = KVM_PGTABLE_WALK_TABLE_POST,
|
||||
.flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
|
||||
.arg = pgt->mm_ops,
|
||||
};
|
||||
|
||||
@@ -1116,13 +1178,13 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
|
||||
}
|
||||
|
||||
|
||||
int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
|
||||
int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
|
||||
struct kvm_pgtable_mm_ops *mm_ops,
|
||||
enum kvm_pgtable_stage2_flags flags,
|
||||
kvm_pgtable_force_pte_cb_t force_pte_cb)
|
||||
{
|
||||
size_t pgd_sz;
|
||||
u64 vtcr = arch->vtcr;
|
||||
u64 vtcr = mmu->arch->vtcr;
|
||||
u32 ia_bits = VTCR_EL2_IPA(vtcr);
|
||||
u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
|
||||
u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
|
||||
@@ -1135,7 +1197,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
|
||||
pgt->ia_bits = ia_bits;
|
||||
pgt->start_level = start_level;
|
||||
pgt->mm_ops = mm_ops;
|
||||
pgt->mmu = &arch->mmu;
|
||||
pgt->mmu = mmu;
|
||||
pgt->flags = flags;
|
||||
pgt->force_pte_cb = force_pte_cb;
|
||||
|
||||
|
||||
@@ -24,7 +24,6 @@
|
||||
#include <asm/fpsimd.h>
|
||||
#include <asm/debug-monitors.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/thread_info.h>
|
||||
|
||||
/* VHE specific context */
|
||||
DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
|
||||
|
||||
@@ -210,13 +210,13 @@ static void stage2_flush_vm(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *memslot;
|
||||
int idx;
|
||||
int idx, bkt;
|
||||
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
|
||||
slots = kvm_memslots(kvm);
|
||||
kvm_for_each_memslot(memslot, slots)
|
||||
kvm_for_each_memslot(memslot, bkt, slots)
|
||||
stage2_flush_memslot(kvm, memslot);
|
||||
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
@@ -239,6 +239,9 @@ void free_hyp_pgds(void)
|
||||
|
||||
static bool kvm_host_owns_hyp_mappings(void)
|
||||
{
|
||||
if (is_kernel_in_hyp_mode())
|
||||
return false;
|
||||
|
||||
if (static_branch_likely(&kvm_protected_mode_initialized))
|
||||
return false;
|
||||
|
||||
@@ -281,14 +284,117 @@ static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
|
||||
}
|
||||
}
|
||||
|
||||
static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end)
|
||||
struct hyp_shared_pfn {
|
||||
u64 pfn;
|
||||
int count;
|
||||
struct rb_node node;
|
||||
};
|
||||
|
||||
static DEFINE_MUTEX(hyp_shared_pfns_lock);
|
||||
static struct rb_root hyp_shared_pfns = RB_ROOT;
|
||||
|
||||
static struct hyp_shared_pfn *find_shared_pfn(u64 pfn, struct rb_node ***node,
|
||||
struct rb_node **parent)
|
||||
{
|
||||
phys_addr_t addr;
|
||||
struct hyp_shared_pfn *this;
|
||||
|
||||
*node = &hyp_shared_pfns.rb_node;
|
||||
*parent = NULL;
|
||||
while (**node) {
|
||||
this = container_of(**node, struct hyp_shared_pfn, node);
|
||||
*parent = **node;
|
||||
if (this->pfn < pfn)
|
||||
*node = &((**node)->rb_left);
|
||||
else if (this->pfn > pfn)
|
||||
*node = &((**node)->rb_right);
|
||||
else
|
||||
return this;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int share_pfn_hyp(u64 pfn)
|
||||
{
|
||||
struct rb_node **node, *parent;
|
||||
struct hyp_shared_pfn *this;
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&hyp_shared_pfns_lock);
|
||||
this = find_shared_pfn(pfn, &node, &parent);
|
||||
if (this) {
|
||||
this->count++;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
this = kzalloc(sizeof(*this), GFP_KERNEL);
|
||||
if (!this) {
|
||||
ret = -ENOMEM;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
this->pfn = pfn;
|
||||
this->count = 1;
|
||||
rb_link_node(&this->node, parent, node);
|
||||
rb_insert_color(&this->node, &hyp_shared_pfns);
|
||||
ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, pfn, 1);
|
||||
unlock:
|
||||
mutex_unlock(&hyp_shared_pfns_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int unshare_pfn_hyp(u64 pfn)
|
||||
{
|
||||
struct rb_node **node, *parent;
|
||||
struct hyp_shared_pfn *this;
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&hyp_shared_pfns_lock);
|
||||
this = find_shared_pfn(pfn, &node, &parent);
|
||||
if (WARN_ON(!this)) {
|
||||
ret = -ENOENT;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
this->count--;
|
||||
if (this->count)
|
||||
goto unlock;
|
||||
|
||||
rb_erase(&this->node, &hyp_shared_pfns);
|
||||
kfree(this);
|
||||
ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp, pfn, 1);
|
||||
unlock:
|
||||
mutex_unlock(&hyp_shared_pfns_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kvm_share_hyp(void *from, void *to)
|
||||
{
|
||||
phys_addr_t start, end, cur;
|
||||
u64 pfn;
|
||||
int ret;
|
||||
|
||||
for (addr = ALIGN_DOWN(start, PAGE_SIZE); addr < end; addr += PAGE_SIZE) {
|
||||
ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp,
|
||||
__phys_to_pfn(addr));
|
||||
if (is_kernel_in_hyp_mode())
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* The share hcall maps things in the 'fixed-offset' region of the hyp
|
||||
* VA space, so we can only share physically contiguous data-structures
|
||||
* for now.
|
||||
*/
|
||||
if (is_vmalloc_or_module_addr(from) || is_vmalloc_or_module_addr(to))
|
||||
return -EINVAL;
|
||||
|
||||
if (kvm_host_owns_hyp_mappings())
|
||||
return create_hyp_mappings(from, to, PAGE_HYP);
|
||||
|
||||
start = ALIGN_DOWN(__pa(from), PAGE_SIZE);
|
||||
end = PAGE_ALIGN(__pa(to));
|
||||
for (cur = start; cur < end; cur += PAGE_SIZE) {
|
||||
pfn = __phys_to_pfn(cur);
|
||||
ret = share_pfn_hyp(pfn);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@@ -296,6 +402,22 @@ static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_unshare_hyp(void *from, void *to)
|
||||
{
|
||||
phys_addr_t start, end, cur;
|
||||
u64 pfn;
|
||||
|
||||
if (is_kernel_in_hyp_mode() || kvm_host_owns_hyp_mappings() || !from)
|
||||
return;
|
||||
|
||||
start = ALIGN_DOWN(__pa(from), PAGE_SIZE);
|
||||
end = PAGE_ALIGN(__pa(to));
|
||||
for (cur = start; cur < end; cur += PAGE_SIZE) {
|
||||
pfn = __phys_to_pfn(cur);
|
||||
WARN_ON(unshare_pfn_hyp(pfn));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
|
||||
* @from: The virtual kernel start address of the range
|
||||
@@ -316,12 +438,8 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
|
||||
if (is_kernel_in_hyp_mode())
|
||||
return 0;
|
||||
|
||||
if (!kvm_host_owns_hyp_mappings()) {
|
||||
if (WARN_ON(prot != PAGE_HYP))
|
||||
return -EPERM;
|
||||
return pkvm_share_hyp(kvm_kaddr_to_phys(from),
|
||||
kvm_kaddr_to_phys(to));
|
||||
}
|
||||
if (!kvm_host_owns_hyp_mappings())
|
||||
return -EPERM;
|
||||
|
||||
start = start & PAGE_MASK;
|
||||
end = PAGE_ALIGN(end);
|
||||
@@ -407,6 +525,9 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
|
||||
unsigned long addr;
|
||||
int ret;
|
||||
|
||||
if (is_protected_kvm_enabled())
|
||||
return -EPERM;
|
||||
|
||||
*kaddr = ioremap(phys_addr, size);
|
||||
if (!*kaddr)
|
||||
return -ENOMEM;
|
||||
@@ -516,7 +637,8 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
|
||||
if (!pgt)
|
||||
return -ENOMEM;
|
||||
|
||||
err = kvm_pgtable_stage2_init(pgt, &kvm->arch, &kvm_s2_mm_ops);
|
||||
mmu->arch = &kvm->arch;
|
||||
err = kvm_pgtable_stage2_init(pgt, mmu, &kvm_s2_mm_ops);
|
||||
if (err)
|
||||
goto out_free_pgtable;
|
||||
|
||||
@@ -529,7 +651,6 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
|
||||
for_each_possible_cpu(cpu)
|
||||
*per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1;
|
||||
|
||||
mmu->arch = &kvm->arch;
|
||||
mmu->pgt = pgt;
|
||||
mmu->pgd_phys = __pa(pgt->pgd);
|
||||
WRITE_ONCE(mmu->vmid.vmid_gen, 0);
|
||||
@@ -595,14 +716,14 @@ void stage2_unmap_vm(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *memslot;
|
||||
int idx;
|
||||
int idx, bkt;
|
||||
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
mmap_read_lock(current->mm);
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
|
||||
slots = kvm_memslots(kvm);
|
||||
kvm_for_each_memslot(memslot, slots)
|
||||
kvm_for_each_memslot(memslot, bkt, slots)
|
||||
stage2_unmap_memslot(kvm, memslot);
|
||||
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
@@ -650,6 +771,9 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
|
||||
KVM_PGTABLE_PROT_R |
|
||||
(writable ? KVM_PGTABLE_PROT_W : 0);
|
||||
|
||||
if (is_protected_kvm_enabled())
|
||||
return -EPERM;
|
||||
|
||||
size += offset_in_page(guest_ipa);
|
||||
guest_ipa &= PAGE_MASK;
|
||||
|
||||
@@ -1463,7 +1587,6 @@ out:
|
||||
}
|
||||
|
||||
void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
struct kvm_memory_slot *old,
|
||||
const struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change)
|
||||
@@ -1473,25 +1596,24 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
* allocated dirty_bitmap[], dirty pages will be tracked while the
|
||||
* memory slot is write protected.
|
||||
*/
|
||||
if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
|
||||
if (change != KVM_MR_DELETE && new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
|
||||
/*
|
||||
* If we're with initial-all-set, we don't need to write
|
||||
* protect any pages because they're all reported as dirty.
|
||||
* Huge pages and normal pages will be write protect gradually.
|
||||
*/
|
||||
if (!kvm_dirty_log_manual_protect_and_init_set(kvm)) {
|
||||
kvm_mmu_wp_memory_region(kvm, mem->slot);
|
||||
kvm_mmu_wp_memory_region(kvm, new->id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old,
|
||||
struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
hva_t hva = mem->userspace_addr;
|
||||
hva_t reg_end = hva + mem->memory_size;
|
||||
hva_t hva, reg_end;
|
||||
int ret = 0;
|
||||
|
||||
if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
|
||||
@@ -1502,9 +1624,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
* Prevent userspace from creating a memory region outside of the IPA
|
||||
* space addressable by the KVM guest IPA space.
|
||||
*/
|
||||
if ((memslot->base_gfn + memslot->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT))
|
||||
if ((new->base_gfn + new->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT))
|
||||
return -EFAULT;
|
||||
|
||||
hva = new->userspace_addr;
|
||||
reg_end = hva + (new->npages << PAGE_SHIFT);
|
||||
|
||||
mmap_read_lock(current->mm);
|
||||
/*
|
||||
* A memory region could potentially cover multiple VMAs, and any holes
|
||||
@@ -1536,7 +1661,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
|
||||
if (vma->vm_flags & VM_PFNMAP) {
|
||||
/* IO region dirty page logging not allowed */
|
||||
if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
|
||||
if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -8,10 +8,9 @@
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
#include <asm/kvm_host.h>
|
||||
#include <asm/kvm_pkvm.h>
|
||||
|
||||
#include <nvhe/memory.h>
|
||||
#include <nvhe/mm.h>
|
||||
#include "hyp_constants.h"
|
||||
|
||||
static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
|
||||
static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
|
||||
@@ -82,7 +81,8 @@ void __init kvm_hyp_reserve(void)
|
||||
do {
|
||||
prev = nr_pages;
|
||||
nr_pages = hyp_mem_pages + prev;
|
||||
nr_pages = DIV_ROUND_UP(nr_pages * sizeof(struct hyp_page), PAGE_SIZE);
|
||||
nr_pages = DIV_ROUND_UP(nr_pages * STRUCT_HYP_PAGE_SIZE,
|
||||
PAGE_SIZE);
|
||||
nr_pages += __hyp_pgtable_max_pages(nr_pages);
|
||||
} while (nr_pages != prev);
|
||||
hyp_mem_pages += nr_pages;
|
||||
@@ -30,6 +30,7 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm)
|
||||
case ID_AA64DFR0_PMUVER_8_1:
|
||||
case ID_AA64DFR0_PMUVER_8_4:
|
||||
case ID_AA64DFR0_PMUVER_8_5:
|
||||
case ID_AA64DFR0_PMUVER_8_7:
|
||||
return GENMASK(15, 0);
|
||||
default: /* Shouldn't be here, just for sanity */
|
||||
WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver);
|
||||
@@ -902,7 +903,7 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
|
||||
{
|
||||
int i;
|
||||
unsigned long i;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
|
||||
@@ -46,7 +46,7 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
|
||||
* specification (ARM DEN 0022A). This means all suspend states
|
||||
* for KVM will preserve the register state.
|
||||
*/
|
||||
kvm_vcpu_block(vcpu);
|
||||
kvm_vcpu_halt(vcpu);
|
||||
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
|
||||
|
||||
return PSCI_RET_SUCCESS;
|
||||
@@ -109,7 +109,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
|
||||
|
||||
/*
|
||||
* Make sure the reset request is observed if the change to
|
||||
* power_state is observed.
|
||||
* power_off is observed.
|
||||
*/
|
||||
smp_wmb();
|
||||
|
||||
@@ -121,8 +121,8 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
|
||||
|
||||
static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int i, matching_cpus = 0;
|
||||
unsigned long mpidr;
|
||||
int matching_cpus = 0;
|
||||
unsigned long i, mpidr;
|
||||
unsigned long target_affinity;
|
||||
unsigned long target_affinity_mask;
|
||||
unsigned long lowest_affinity_level;
|
||||
@@ -164,7 +164,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
|
||||
|
||||
static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
|
||||
{
|
||||
int i;
|
||||
unsigned long i;
|
||||
struct kvm_vcpu *tmp;
|
||||
|
||||
/*
|
||||
|
||||
@@ -94,22 +94,31 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
void *buf;
|
||||
unsigned int vl;
|
||||
size_t reg_sz;
|
||||
int ret;
|
||||
|
||||
vl = vcpu->arch.sve_max_vl;
|
||||
|
||||
/*
|
||||
* Responsibility for these properties is shared between
|
||||
* kvm_arm_init_arch_resources(), kvm_vcpu_enable_sve() and
|
||||
* kvm_arm_init_sve(), kvm_vcpu_enable_sve() and
|
||||
* set_sve_vls(). Double-check here just to be sure:
|
||||
*/
|
||||
if (WARN_ON(!sve_vl_valid(vl) || vl > sve_max_virtualisable_vl() ||
|
||||
vl > VL_ARCH_MAX))
|
||||
return -EIO;
|
||||
|
||||
buf = kzalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)), GFP_KERNEL_ACCOUNT);
|
||||
reg_sz = vcpu_sve_state_size(vcpu);
|
||||
buf = kzalloc(reg_sz, GFP_KERNEL_ACCOUNT);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = kvm_share_hyp(buf, buf + reg_sz);
|
||||
if (ret) {
|
||||
kfree(buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
vcpu->arch.sve_state = buf;
|
||||
vcpu->arch.flags |= KVM_ARM64_VCPU_SVE_FINALIZED;
|
||||
return 0;
|
||||
@@ -141,7 +150,13 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu)
|
||||
|
||||
void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kfree(vcpu->arch.sve_state);
|
||||
void *sve_state = vcpu->arch.sve_state;
|
||||
|
||||
kvm_vcpu_unshare_task_fp(vcpu);
|
||||
kvm_unshare_hyp(vcpu, vcpu + 1);
|
||||
if (sve_state)
|
||||
kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu));
|
||||
kfree(sve_state);
|
||||
}
|
||||
|
||||
static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu)
|
||||
@@ -170,7 +185,7 @@ static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_vcpu *tmp;
|
||||
bool is32bit;
|
||||
int i;
|
||||
unsigned long i;
|
||||
|
||||
is32bit = vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT);
|
||||
if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit)
|
||||
@@ -193,10 +208,9 @@ static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu)
|
||||
* kvm_reset_vcpu - sets core registers and sys_regs to reset value
|
||||
* @vcpu: The VCPU pointer
|
||||
*
|
||||
* This function finds the right table above and sets the registers on
|
||||
* the virtual CPU struct to their architecturally defined reset
|
||||
* values, except for registers whose reset is deferred until
|
||||
* kvm_arm_vcpu_finalize().
|
||||
* This function sets the registers on the virtual CPU struct to their
|
||||
* architecturally defined reset values, except for registers whose reset is
|
||||
* deferred until kvm_arm_vcpu_finalize().
|
||||
*
|
||||
* Note: This function can be called from two paths: The KVM_ARM_VCPU_INIT
|
||||
* ioctl or as part of handling a request issued by another VCPU in the PSCI
|
||||
|
||||
@@ -70,8 +70,9 @@ void kvm_vgic_early_init(struct kvm *kvm)
|
||||
*/
|
||||
int kvm_vgic_create(struct kvm *kvm, u32 type)
|
||||
{
|
||||
int i, ret;
|
||||
struct kvm_vcpu *vcpu;
|
||||
unsigned long i;
|
||||
int ret;
|
||||
|
||||
if (irqchip_in_kernel(kvm))
|
||||
return -EEXIST;
|
||||
@@ -91,7 +92,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
|
||||
return ret;
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
if (vcpu->arch.has_run_once)
|
||||
if (vcpu_has_run_once(vcpu))
|
||||
goto out_unlock;
|
||||
}
|
||||
ret = 0;
|
||||
@@ -255,7 +256,8 @@ int vgic_init(struct kvm *kvm)
|
||||
{
|
||||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
struct kvm_vcpu *vcpu;
|
||||
int ret = 0, i, idx;
|
||||
int ret = 0, i;
|
||||
unsigned long idx;
|
||||
|
||||
if (vgic_initialized(kvm))
|
||||
return 0;
|
||||
@@ -308,7 +310,7 @@ int vgic_init(struct kvm *kvm)
|
||||
goto out;
|
||||
}
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
kvm_for_each_vcpu(idx, vcpu, kvm)
|
||||
kvm_vgic_vcpu_enable(vcpu);
|
||||
|
||||
ret = kvm_vgic_setup_default_irq_routing(kvm);
|
||||
@@ -370,7 +372,7 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||
static void __kvm_vgic_destroy(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
int i;
|
||||
unsigned long i;
|
||||
|
||||
vgic_debug_destroy(kvm);
|
||||
|
||||
|
||||
@@ -325,7 +325,7 @@ void unlock_all_vcpus(struct kvm *kvm)
|
||||
bool lock_all_vcpus(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_vcpu *tmp_vcpu;
|
||||
int c;
|
||||
unsigned long c;
|
||||
|
||||
/*
|
||||
* Any time a vcpu is run, vcpu_load is called which tries to grab the
|
||||
|
||||
@@ -113,9 +113,8 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu,
|
||||
int intid = val & 0xf;
|
||||
int targets = (val >> 16) & 0xff;
|
||||
int mode = (val >> 24) & 0x03;
|
||||
int c;
|
||||
struct kvm_vcpu *vcpu;
|
||||
unsigned long flags;
|
||||
unsigned long flags, c;
|
||||
|
||||
switch (mode) {
|
||||
case 0x0: /* as specified by targets */
|
||||
|
||||
@@ -754,7 +754,8 @@ static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu)
|
||||
static int vgic_register_all_redist_iodevs(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
int c, ret = 0;
|
||||
unsigned long c;
|
||||
int ret = 0;
|
||||
|
||||
kvm_for_each_vcpu(c, vcpu, kvm) {
|
||||
ret = vgic_register_redist_iodev(vcpu);
|
||||
@@ -763,10 +764,12 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm)
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
/* The current c failed, so we start with the previous one. */
|
||||
/* The current c failed, so iterate over the previous ones. */
|
||||
int i;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
for (c--; c >= 0; c--) {
|
||||
vcpu = kvm_get_vcpu(kvm, c);
|
||||
for (i = 0; i < c; i++) {
|
||||
vcpu = kvm_get_vcpu(kvm, i);
|
||||
vgic_unregister_redist_iodev(vcpu);
|
||||
}
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
@@ -995,10 +998,10 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1)
|
||||
struct kvm_vcpu *c_vcpu;
|
||||
u16 target_cpus;
|
||||
u64 mpidr;
|
||||
int sgi, c;
|
||||
int sgi;
|
||||
int vcpu_id = vcpu->vcpu_id;
|
||||
bool broadcast;
|
||||
unsigned long flags;
|
||||
unsigned long c, flags;
|
||||
|
||||
sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT;
|
||||
broadcast = reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT);
|
||||
|
||||
@@ -1050,7 +1050,7 @@ static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct kvm_io_device_ops kvm_io_gic_ops = {
|
||||
const struct kvm_io_device_ops kvm_io_gic_ops = {
|
||||
.read = dispatch_mmio_read,
|
||||
.write = dispatch_mmio_write,
|
||||
};
|
||||
|
||||
@@ -34,7 +34,7 @@ struct vgic_register_region {
|
||||
};
|
||||
};
|
||||
|
||||
extern struct kvm_io_device_ops kvm_io_gic_ops;
|
||||
extern const struct kvm_io_device_ops kvm_io_gic_ops;
|
||||
|
||||
#define VGIC_ACCESS_8bit 1
|
||||
#define VGIC_ACCESS_32bit 2
|
||||
|
||||
@@ -293,12 +293,12 @@ int vgic_v2_map_resources(struct kvm *kvm)
|
||||
|
||||
if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
|
||||
IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) {
|
||||
kvm_err("Need to set vgic cpu and dist addresses first\n");
|
||||
kvm_debug("Need to set vgic cpu and dist addresses first\n");
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
if (!vgic_v2_check_base(dist->vgic_dist_base, dist->vgic_cpu_base)) {
|
||||
kvm_err("VGIC CPU and dist frames overlap\n");
|
||||
kvm_debug("VGIC CPU and dist frames overlap\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@@ -345,6 +345,11 @@ int vgic_v2_probe(const struct gic_kvm_info *info)
|
||||
int ret;
|
||||
u32 vtr;
|
||||
|
||||
if (is_protected_kvm_enabled()) {
|
||||
kvm_err("GICv2 not supported in protected mode\n");
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
if (!info->vctrl.start) {
|
||||
kvm_err("GICH not present in the firmware table\n");
|
||||
return -ENXIO;
|
||||
|
||||
@@ -542,24 +542,24 @@ int vgic_v3_map_resources(struct kvm *kvm)
|
||||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
struct kvm_vcpu *vcpu;
|
||||
int ret = 0;
|
||||
int c;
|
||||
unsigned long c;
|
||||
|
||||
kvm_for_each_vcpu(c, vcpu, kvm) {
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
|
||||
if (IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) {
|
||||
kvm_debug("vcpu %d redistributor base not set\n", c);
|
||||
kvm_debug("vcpu %ld redistributor base not set\n", c);
|
||||
return -ENXIO;
|
||||
}
|
||||
}
|
||||
|
||||
if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base)) {
|
||||
kvm_err("Need to set vgic distributor addresses first\n");
|
||||
kvm_debug("Need to set vgic distributor addresses first\n");
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
if (!vgic_v3_check_base(kvm)) {
|
||||
kvm_err("VGIC redist and dist frames overlap\n");
|
||||
kvm_debug("VGIC redist and dist frames overlap\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@@ -651,7 +651,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
|
||||
} else if (!PAGE_ALIGNED(info->vcpu.start)) {
|
||||
pr_warn("GICV physical address 0x%llx not page aligned\n",
|
||||
(unsigned long long)info->vcpu.start);
|
||||
} else {
|
||||
} else if (kvm_get_mode() != KVM_MODE_PROTECTED) {
|
||||
kvm_vgic_global_state.vcpu_base = info->vcpu.start;
|
||||
kvm_vgic_global_state.can_emulate_gicv2 = true;
|
||||
ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2);
|
||||
|
||||
@@ -189,7 +189,7 @@ void vgic_v4_configure_vsgis(struct kvm *kvm)
|
||||
{
|
||||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
struct kvm_vcpu *vcpu;
|
||||
int i;
|
||||
unsigned long i;
|
||||
|
||||
kvm_arm_halt_guest(kvm);
|
||||
|
||||
@@ -235,7 +235,8 @@ int vgic_v4_init(struct kvm *kvm)
|
||||
{
|
||||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
struct kvm_vcpu *vcpu;
|
||||
int i, nr_vcpus, ret;
|
||||
int nr_vcpus, ret;
|
||||
unsigned long i;
|
||||
|
||||
if (!kvm_vgic_global_state.has_gicv4)
|
||||
return 0; /* Nothing to see here... move along. */
|
||||
|
||||
@@ -990,7 +990,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
|
||||
void vgic_kick_vcpus(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
int c;
|
||||
unsigned long c;
|
||||
|
||||
/*
|
||||
* We've injected an interrupt, time to find out who deserves
|
||||
|
||||
@@ -898,7 +898,6 @@ static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
|
||||
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
|
||||
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
|
||||
|
||||
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
|
||||
int kvm_arch_flush_remote_tlb(struct kvm *kvm);
|
||||
|
||||
@@ -27,6 +27,7 @@ config KVM
|
||||
select KVM_MMIO
|
||||
select MMU_NOTIFIER
|
||||
select SRCU
|
||||
select INTERVAL_TREE
|
||||
help
|
||||
Support for hosting Guest kernels.
|
||||
|
||||
|
||||
@@ -2,9 +2,10 @@
|
||||
# Makefile for KVM support for MIPS
|
||||
#
|
||||
|
||||
include $(srctree)/virt/kvm/Makefile.kvm
|
||||
|
||||
ccflags-y += -Ivirt/kvm -Iarch/mips/kvm
|
||||
|
||||
kvm-y := $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o eventfd.o binary_stats.o)
|
||||
kvm-$(CONFIG_CPU_HAS_MSA) += msa.o
|
||||
|
||||
kvm-y += mips.o emulate.o entry.o \
|
||||
|
||||
@@ -952,7 +952,7 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu)
|
||||
if (!vcpu->arch.pending_exceptions) {
|
||||
kvm_vz_lose_htimer(vcpu);
|
||||
vcpu->arch.wait = 1;
|
||||
kvm_vcpu_block(vcpu);
|
||||
kvm_vcpu_halt(vcpu);
|
||||
|
||||
/*
|
||||
* We we are runnable, then definitely go off to user space to
|
||||
|
||||
@@ -120,7 +120,7 @@ static int loongson_vipi_write(struct loongson_kvm_ipi *ipi,
|
||||
s->status |= data;
|
||||
irq.cpu = id;
|
||||
irq.irq = 6;
|
||||
kvm_vcpu_ioctl_interrupt(kvm->vcpus[id], &irq);
|
||||
kvm_vcpu_ioctl_interrupt(kvm_get_vcpu(kvm, id), &irq);
|
||||
break;
|
||||
|
||||
case CORE0_CLEAR_OFF:
|
||||
@@ -128,7 +128,7 @@ static int loongson_vipi_write(struct loongson_kvm_ipi *ipi,
|
||||
if (!s->status) {
|
||||
irq.cpu = id;
|
||||
irq.irq = -6;
|
||||
kvm_vcpu_ioctl_interrupt(kvm->vcpus[id], &irq);
|
||||
kvm_vcpu_ioctl_interrupt(kvm_get_vcpu(kvm, id), &irq);
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
@@ -171,25 +171,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_mips_free_vcpus(struct kvm *kvm)
|
||||
{
|
||||
unsigned int i;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
kvm_vcpu_destroy(vcpu);
|
||||
}
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
|
||||
kvm->vcpus[i] = NULL;
|
||||
|
||||
atomic_set(&kvm->online_vcpus, 0);
|
||||
|
||||
mutex_unlock(&kvm->lock);
|
||||
}
|
||||
|
||||
static void kvm_mips_free_gpa_pt(struct kvm *kvm)
|
||||
{
|
||||
/* It should always be safe to remove after flushing the whole range */
|
||||
@@ -199,7 +180,7 @@ static void kvm_mips_free_gpa_pt(struct kvm *kvm)
|
||||
|
||||
void kvm_arch_destroy_vm(struct kvm *kvm)
|
||||
{
|
||||
kvm_mips_free_vcpus(kvm);
|
||||
kvm_destroy_vcpus(kvm);
|
||||
kvm_mips_free_gpa_pt(kvm);
|
||||
}
|
||||
|
||||
@@ -233,25 +214,20 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||
}
|
||||
|
||||
int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old,
|
||||
struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
struct kvm_memory_slot *old,
|
||||
const struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
int needs_flush;
|
||||
|
||||
kvm_debug("%s: kvm: %p slot: %d, GPA: %llx, size: %llx, QVA: %llx\n",
|
||||
__func__, kvm, mem->slot, mem->guest_phys_addr,
|
||||
mem->memory_size, mem->userspace_addr);
|
||||
|
||||
/*
|
||||
* If dirty page logging is enabled, write protect all pages in the slot
|
||||
* ready for dirty logging.
|
||||
@@ -498,7 +474,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
|
||||
if (irq->cpu == -1)
|
||||
dvcpu = vcpu;
|
||||
else
|
||||
dvcpu = vcpu->kvm->vcpus[irq->cpu];
|
||||
dvcpu = kvm_get_vcpu(vcpu->kvm, irq->cpu);
|
||||
|
||||
if (intr == 2 || intr == 3 || intr == 4 || intr == 6) {
|
||||
kvm_mips_callbacks->queue_io_int(dvcpu, irq);
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sys_soc.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/bug.h>
|
||||
|
||||
#include <asm/bootinfo.h>
|
||||
#include <asm/mipsregs.h>
|
||||
@@ -22,6 +24,35 @@
|
||||
|
||||
static void *detect_magic __initdata = detect_memory_region;
|
||||
|
||||
int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
|
||||
{
|
||||
struct resource_entry *entry;
|
||||
resource_size_t mask;
|
||||
|
||||
entry = resource_list_first_type(&bridge->windows, IORESOURCE_MEM);
|
||||
if (!entry) {
|
||||
pr_err("Cannot get memory resource\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (mips_cps_numiocu(0)) {
|
||||
/*
|
||||
* Hardware doesn't accept mask values with 1s after
|
||||
* 0s (e.g. 0xffef), so warn if that's happen
|
||||
*/
|
||||
mask = ~(entry->res->end - entry->res->start) & CM_GCR_REGn_MASK_ADDRMASK;
|
||||
WARN_ON(mask && BIT(ffz(~mask)) - 1 != ~mask);
|
||||
|
||||
write_gcr_reg1_base(entry->res->start);
|
||||
write_gcr_reg1_mask(mask | CM_GCR_REGn_MASK_CMTGT_IOCU0);
|
||||
pr_info("PCI coherence region base: 0x%08llx, mask/settings: 0x%08llx\n",
|
||||
(unsigned long long)read_gcr_reg1_base(),
|
||||
(unsigned long long)read_gcr_reg1_mask());
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
phys_addr_t mips_cpc_default_phys_base(void)
|
||||
{
|
||||
panic("Cannot detect cpc address");
|
||||
|
||||
@@ -752,6 +752,7 @@ struct kvm_vcpu_arch {
|
||||
u8 irq_pending; /* Used by XIVE to signal pending guest irqs */
|
||||
u32 last_inst;
|
||||
|
||||
struct rcuwait wait;
|
||||
struct rcuwait *waitp;
|
||||
struct kvmppc_vcore *vcore;
|
||||
int ret;
|
||||
@@ -867,6 +868,5 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
|
||||
static inline void kvm_arch_exit(void) {}
|
||||
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
|
||||
|
||||
#endif /* __POWERPC_KVM_HOST_H__ */
|
||||
|
||||
@@ -200,12 +200,11 @@ extern void kvmppc_core_destroy_vm(struct kvm *kvm);
|
||||
extern void kvmppc_core_free_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot);
|
||||
extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old,
|
||||
struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change);
|
||||
extern void kvmppc_core_commit_memory_region(struct kvm *kvm,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old,
|
||||
struct kvm_memory_slot *old,
|
||||
const struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change);
|
||||
extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm,
|
||||
@@ -274,12 +273,11 @@ struct kvmppc_ops {
|
||||
int (*get_dirty_log)(struct kvm *kvm, struct kvm_dirty_log *log);
|
||||
void (*flush_memslot)(struct kvm *kvm, struct kvm_memory_slot *memslot);
|
||||
int (*prepare_memory_region)(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old,
|
||||
struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change);
|
||||
void (*commit_memory_region)(struct kvm *kvm,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old,
|
||||
struct kvm_memory_slot *old,
|
||||
const struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change);
|
||||
bool (*unmap_gfn_range)(struct kvm *kvm, struct kvm_gfn_range *range);
|
||||
|
||||
@@ -26,6 +26,7 @@ config KVM
|
||||
select KVM_VFIO
|
||||
select IRQ_BYPASS_MANAGER
|
||||
select HAVE_KVM_IRQ_BYPASS
|
||||
select INTERVAL_TREE
|
||||
|
||||
config KVM_BOOK3S_HANDLER
|
||||
bool
|
||||
|
||||
@@ -4,11 +4,8 @@
|
||||
#
|
||||
|
||||
ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
|
||||
KVM := ../../../virt/kvm
|
||||
|
||||
common-objs-y = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/binary_stats.o
|
||||
common-objs-$(CONFIG_KVM_VFIO) += $(KVM)/vfio.o
|
||||
common-objs-$(CONFIG_KVM_MMIO) += $(KVM)/coalesced_mmio.o
|
||||
include $(srctree)/virt/kvm/Makefile.kvm
|
||||
|
||||
common-objs-y += powerpc.o emulate_loadstore.o
|
||||
obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
|
||||
@@ -125,9 +122,8 @@ kvm-book3s_32-objs := \
|
||||
kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
|
||||
|
||||
kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o
|
||||
kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o
|
||||
|
||||
kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
|
||||
kvm-y += $(kvm-objs-m) $(kvm-objs-y)
|
||||
|
||||
obj-$(CONFIG_KVM_E500V2) += kvm.o
|
||||
obj-$(CONFIG_KVM_E500MC) += kvm.o
|
||||
|
||||
@@ -847,21 +847,19 @@ void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
|
||||
}
|
||||
|
||||
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
enum kvm_mr_change change)
|
||||
const struct kvm_memory_slot *old,
|
||||
struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem,
|
||||
change);
|
||||
return kvm->arch.kvm_ops->prepare_memory_region(kvm, old, new, change);
|
||||
}
|
||||
|
||||
void kvmppc_core_commit_memory_region(struct kvm *kvm,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old,
|
||||
struct kvm_memory_slot *old,
|
||||
const struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new, change);
|
||||
kvm->arch.kvm_ops->commit_memory_region(kvm, old, new, change);
|
||||
}
|
||||
|
||||
bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
|
||||
@@ -337,7 +337,7 @@ static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum,
|
||||
|
||||
static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large)
|
||||
{
|
||||
int i;
|
||||
unsigned long i;
|
||||
struct kvm_vcpu *v;
|
||||
|
||||
/* flush this VA on all cpus */
|
||||
|
||||
@@ -530,7 +530,7 @@ static void kvmppc_mmu_book3s_64_tlbie(struct kvm_vcpu *vcpu, ulong va,
|
||||
bool large)
|
||||
{
|
||||
u64 mask = 0xFFFFFFFFFULL;
|
||||
long i;
|
||||
unsigned long i;
|
||||
struct kvm_vcpu *v;
|
||||
|
||||
dprintk("KVM MMU: tlbie(0x%lx)\n", va);
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user