Merge 6661224e66 ("Merge tag 'unicode-for-next-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/krisman/unicode") into android-mainline

Steps on the way to 5.17-rc1

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: I15f33e57d4d97cdfcbbd3245c05bd85e8751de20
This commit is contained in:
Greg Kroah-Hartman
2022-02-01 15:12:40 +01:00
527 changed files with 20456 additions and 6756 deletions

View File

@@ -49,10 +49,12 @@ Andy Adamson <andros@citi.umich.edu>
Antoine Tenart <atenart@kernel.org> <antoine.tenart@bootlin.com>
Antoine Tenart <atenart@kernel.org> <antoine.tenart@free-electrons.com>
Antonio Ospite <ao2@ao2.it> <ao2@amarulasolutions.com>
Anup Patel <anup@brainfault.org> <anup.patel@wdc.com>
Archit Taneja <archit@ti.com>
Ard Biesheuvel <ardb@kernel.org> <ard.biesheuvel@linaro.org>
Arnaud Patard <arnaud.patard@rtp-net.org>
Arnd Bergmann <arnd@arndb.de>
Atish Patra <atishp@atishpatra.org> <atish.patra@wdc.com>
Axel Dyks <xl@xlsigned.net>
Axel Lin <axel.lin@gmail.com>
Bart Van Assche <bvanassche@acm.org> <bart.vanassche@sandisk.com>

View File

@@ -10,6 +10,9 @@ title: Amlogic specific extensions to the Synopsys Designware HDMI Controller
maintainers:
- Neil Armstrong <narmstrong@baylibre.com>
allOf:
- $ref: /schemas/sound/name-prefix.yaml#
description: |
The Amlogic Meson Synopsys Designware Integration is composed of
- A Synopsys DesignWare HDMI Controller IP
@@ -99,6 +102,8 @@ properties:
"#sound-dai-cells":
const: 0
sound-name-prefix: true
required:
- compatible
- reg

View File

@@ -78,6 +78,10 @@ properties:
interrupts:
maxItems: 1
amlogic,canvas:
description: should point to a canvas provider node
$ref: /schemas/types.yaml#/definitions/phandle
power-domains:
maxItems: 1
description: phandle to the associated power domain
@@ -106,6 +110,7 @@ required:
- port@1
- "#address-cells"
- "#size-cells"
- amlogic,canvas
additionalProperties: false
@@ -118,6 +123,7 @@ examples:
interrupts = <3>;
#address-cells = <1>;
#size-cells = <0>;
amlogic,canvas = <&canvas>;
/* CVBS VDAC output port */
port@0 {

View File

@@ -146,11 +146,15 @@ examples:
#address-cells = <3>;
#size-cells = <2>;
#interrupt-cells = <1>;
interrupts = <GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>,
interrupts = <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "pcie", "msi";
interrupt-map-mask = <0x0 0x0 0x0 0x7>;
interrupt-map = <0 0 0 1 &gicv2 GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>;
interrupt-map = <0 0 0 1 &gicv2 GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH
0 0 0 2 &gicv2 GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH
0 0 0 3 &gicv2 GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH
0 0 0 4 &gicv2 GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH>;
msi-parent = <&pcie0>;
msi-controller;
ranges = <0x02000000 0x0 0xf8000000 0x6 0x00000000 0x0 0x04000000>;
@@ -158,5 +162,24 @@ examples:
<0x42000000 0x1 0x80000000 0x3 0x00000000 0x0 0x80000000>;
brcm,enable-ssc;
brcm,scb-sizes = <0x0000000080000000 0x0000000080000000>;
/* PCIe bridge, Root Port */
pci@0,0 {
#address-cells = <3>;
#size-cells = <2>;
reg = <0x0 0x0 0x0 0x0 0x0>;
compatible = "pciclass,0604";
device_type = "pci";
vpcie3v3-supply = <&vreg7>;
ranges;
/* PCIe endpoint */
pci-ep@0,0 {
assigned-addresses =
<0x82010000 0x0 0xf8000000 0x6 0x00000000 0x0 0x2000>;
reg = <0x0 0x0 0x0 0x0 0x0>;
compatible = "pci14e4,1688";
};
};
};
};

View File

@@ -127,6 +127,12 @@ properties:
enum: [1, 2, 3, 4]
default: 1
phys:
maxItems: 1
phy-names:
const: pcie-phy
reset-gpio:
description: Should specify the GPIO for controlling the PCI bus device
reset signal. It's not polarity aware and defaults to active-low reset

View File

@@ -32,8 +32,12 @@ properties:
maxItems: 1
ti,syscon-pcie-mode:
$ref: /schemas/types.yaml#/definitions/phandle-array
items:
- items:
- description: Phandle to the SYSCON entry
- description: pcie_ctrl register offset within SYSCON
description: Phandle to the SYSCON entry required for configuring PCIe in RC or EP mode.
$ref: /schemas/types.yaml#/definitions/phandle
interrupts:
minItems: 1
@@ -65,7 +69,7 @@ examples:
<0x5506000 0x1000>;
reg-names = "app", "dbics", "addr_space", "atu";
power-domains = <&k3_pds 120 TI_SCI_PD_EXCLUSIVE>;
ti,syscon-pcie-mode = <&pcie0_mode>;
ti,syscon-pcie-mode = <&scm_conf 0x4060>;
max-link-speed = <2>;
dma-coherent;
interrupts = <GIC_SPI 340 IRQ_TYPE_EDGE_RISING>;

View File

@@ -36,12 +36,20 @@ properties:
maxItems: 1
ti,syscon-pcie-id:
$ref: /schemas/types.yaml#/definitions/phandle-array
items:
- items:
- description: Phandle to the SYSCON entry
- description: pcie_device_id register offset within SYSCON
description: Phandle to the SYSCON entry required for getting PCIe device/vendor ID
$ref: /schemas/types.yaml#/definitions/phandle
ti,syscon-pcie-mode:
$ref: /schemas/types.yaml#/definitions/phandle-array
items:
- items:
- description: Phandle to the SYSCON entry
- description: pcie_ctrl register offset within SYSCON
description: Phandle to the SYSCON entry required for configuring PCIe in RC or EP mode.
$ref: /schemas/types.yaml#/definitions/phandle
msi-map: true
@@ -87,8 +95,8 @@ examples:
#size-cells = <2>;
ranges = <0x81000000 0 0 0x10020000 0 0x00010000>,
<0x82000000 0 0x10030000 0x10030000 0 0x07FD0000>;
ti,syscon-pcie-id = <&pcie_devid>;
ti,syscon-pcie-mode = <&pcie0_mode>;
ti,syscon-pcie-id = <&scm_conf 0x0210>;
ti,syscon-pcie-mode = <&scm_conf 0x4060>;
bus-range = <0x0 0xff>;
max-link-speed = <2>;
dma-coherent;

View File

@@ -0,0 +1,30 @@
.. SPDX-License-Identifier: GPL-2.0
=================
Livepatching APIs
=================
Livepatch Enablement
====================
.. kernel-doc:: kernel/livepatch/core.c
:export:
Shadow Variables
================
.. kernel-doc:: kernel/livepatch/shadow.c
:export:
System State Changes
====================
.. kernel-doc:: kernel/livepatch/state.c
:export:
Object Types
============
.. kernel-doc:: include/linux/livepatch.h
:identifiers: klp_patch klp_object klp_func klp_callbacks klp_state

View File

@@ -14,6 +14,7 @@ Kernel Livepatching
shadow-vars
system-state
reliable-stacktrace
api
.. only:: subproject and html

View File

@@ -82,8 +82,8 @@ to do actions that can be done only once when a new variable is allocated.
- call destructor function if defined
- free shadow variable
* klp_shadow_free_all() - detach and free all <*, id> shadow variables
- find and remove any <*, id> references from global hashtable
* klp_shadow_free_all() - detach and free all <_, id> shadow variables
- find and remove any <_, id> references from global hashtable
- if found

View File

@@ -52,12 +52,12 @@ struct klp_state:
The state can be manipulated using two functions:
- *klp_get_state(patch, id)*
- klp_get_state()
- Get struct klp_state associated with the given livepatch
and state id.
- *klp_get_prev_state(id)*
- klp_get_prev_state()
- Get struct klp_state associated with the given feature id and
already installed livepatches.

View File

@@ -0,0 +1,41 @@
# SPDX-License-Identifier: GPL-2.0-only
# Based on bpftool's Documentation Makefile
INSTALL ?= install
RM ?= rm -f
RMDIR ?= rmdir --ignore-fail-on-non-empty
PREFIX ?= /usr/share
MANDIR ?= $(PREFIX)/man
MAN1DIR = $(MANDIR)/man1
MAN1_RST = $(wildcard rtla*.rst)
_DOC_MAN1 = $(patsubst %.rst,%.1,$(MAN1_RST))
DOC_MAN1 = $(addprefix $(OUTPUT),$(_DOC_MAN1))
RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
RST2MAN_OPTS += --verbose
$(OUTPUT)%.1: %.rst
ifndef RST2MAN_DEP
$(error "rst2man not found, but required to generate man pages")
endif
rst2man $(RST2MAN_OPTS) $< > $@
man1: $(DOC_MAN1)
man: man1
clean:
$(RM) $(DOC_MAN1)
install: man
$(INSTALL) -d -m 755 $(DESTDIR)$(MAN1DIR)
$(INSTALL) -m 644 $(DOC_MAN1) $(DESTDIR)$(MAN1DIR)
uninstall:
$(RM) $(addprefix $(DESTDIR)$(MAN1DIR)/,$(_DOC_MAN1))
$(RMDIR) $(DESTDIR)$(MAN1DIR)
.PHONY: man man1 clean install uninstall
.DEFAULT_GOAL := man

View File

@@ -0,0 +1,12 @@
REPORTING BUGS
==============
Report bugs to <lkml@vger.kernel.org>
LICENSE
=======
**rtla** is Free Software licensed under the GNU GPLv2
COPYING
=======
Copyright \(C) 2021 Red Hat, Inc. Free use of this software is granted under
the terms of the GNU Public License (GPL).

View File

@@ -0,0 +1,23 @@
**-b**, **--bucket-size** *N*
Set the histogram bucket size (default *1*).
**-e**, **--entries** *N*
Set the number of entries of the histogram (default 256).
**--no-header**
Do not print header.
**--no-summary**
Do not print summary.
**--no-index**
Do not print index.
**--with-zeros**
Print zero only entries.

View File

@@ -0,0 +1,28 @@
**-c**, **--cpus** *cpu-list*
Set the osnoise tracer to run the sample threads in the cpu-list.
**-d**, **--duration** *time[s|m|h|d]*
Set the duration of the session.
**-D**, **--debug**
Print debug info.
**-t**, **--trace**\[*=file*]
Save the stopped trace to [*file|osnoise_trace.txt*].
**-P**, **--priority** *o:prio|r:prio|f:prio|d:runtime:period*
Set scheduling parameters to the osnoise tracer threads, the format to set the priority are:
- *o:prio* - use SCHED_OTHER with *prio*;
- *r:prio* - use SCHED_RR with *prio*;
- *f:prio* - use SCHED_FIFO with *prio*;
- *d:runtime[us|ms|s]:period[us|ms|s]* - use SCHED_DEADLINE with *runtime* and *period* in nanoseconds.
**-h**, **--help**
Print help menu.

View File

@@ -0,0 +1,8 @@
The **rtla osnoise** tool is an interface for the *osnoise* tracer. The
*osnoise* tracer dispatches a kernel thread per-cpu. These threads read the
time in a loop while with preemption, softirq and IRQs enabled, thus
allowing all the sources of operating systme noise during its execution.
The *osnoise*'s tracer threads take note of the delta between each time
read, along with an interference counter of all sources of interference.
At the end of each period, the *osnoise* tracer displays a summary of
the results.

View File

@@ -0,0 +1,17 @@
**-p**, **--period** *us*
Set the *osnoise* tracer period in microseconds.
**-r**, **--runtime** *us*
Set the *osnoise* tracer runtime in microseconds.
**-s**, **--stop** *us*
Stop the trace if a single sample is higher than the argument in microseconds.
If **-T** is set, it will also save the trace to the output.
**-S**, **--stop-total** *us*
Stop the trace if the total sample is higher than the argument in microseconds.
If **-T** is set, it will also save the trace to the output.

View File

@@ -0,0 +1,10 @@
The **rtla timerlat** tool is an interface for the *timerlat* tracer. The
*timerlat* tracer dispatches a kernel thread per-cpu. These threads
set a periodic timer to wake themselves up and go back to sleep. After
the wakeup, they collect and generate useful information for the
debugging of operating system timer latency.
The *timerlat* tracer outputs information in two ways. It periodically
prints the timer latency at the timer *IRQ* handler and the *Thread*
handler. It also enable the trace of the most relevant information via
**osnoise:** tracepoints.

View File

@@ -0,0 +1,16 @@
**-p**, **--period** *us*
Set the *timerlat* tracer period in microseconds.
**-i**, **--irq** *us*
Stop trace if the *IRQ* latency is higher than the argument in us.
**-T**, **--thread** *us*
Stop trace if the *Thread* latency is higher than the argument in us.
**-s**, **--stack** *us*
Save the stack trace at the *IRQ* if a *Thread* latency is higher than the
argument in us.

View File

@@ -0,0 +1,3 @@
**-q**, **--quiet**
Print only a summary at the end of the session.

View File

@@ -0,0 +1,66 @@
===================
rtla-osnoise-hist
===================
------------------------------------------------------
Display a histogram of the osnoise tracer samples
------------------------------------------------------
:Manual section: 1
SYNOPSIS
========
**rtla osnoise hist** [*OPTIONS*]
DESCRIPTION
===========
.. include:: common_osnoise_description.rst
The **rtla osnoise hist** tool collects all **osnoise:sample_threshold**
occurrence in a histogram, displaying the results in a user-friendly way.
The tool also allows many configurations of the *osnoise* tracer and the
collection of the tracer output.
OPTIONS
=======
.. include:: common_osnoise_options.rst
.. include:: common_hist_options.rst
.. include:: common_options.rst
EXAMPLE
=======
In the example below, *osnoise* tracer threads are set to run with real-time
priority *FIFO:1*, on CPUs *0-11*, for *900ms* at each period (*1s* by
default). The reason for reducing the runtime is to avoid starving the
**rtla** tool. The tool is also set to run for *one minute*. The output
histogram is set to group outputs in buckets of *10us* and *25* entries::
[root@f34 ~/]# rtla osnoise hist -P F:1 -c 0-11 -r 900000 -d 1M -b 10 -e 25
# RTLA osnoise histogram
# Time unit is microseconds (us)
# Duration: 0 00:01:00
Index CPU-000 CPU-001 CPU-002 CPU-003 CPU-004 CPU-005 CPU-006 CPU-007 CPU-008 CPU-009 CPU-010 CPU-011
0 42982 46287 51779 53740 52024 44817 49898 36500 50408 50128 49523 52377
10 12224 8356 2912 878 2667 10155 4573 18894 4214 4836 5708 2413
20 8 5 12 2 13 24 20 41 29 53 39 39
30 1 1 0 0 10 3 6 19 15 31 30 38
40 0 0 0 0 0 4 2 7 2 3 8 11
50 0 0 0 0 0 0 0 0 0 1 1 2
over: 0 0 0 0 0 0 0 0 0 0 0 0
count: 55215 54649 54703 54620 54714 55003 54499 55461 54668 55052 55309 54880
min: 0 0 0 0 0 0 0 0 0 0 0 0
avg: 0 0 0 0 0 0 0 0 0 0 0 0
max: 30 30 20 20 30 40 40 40 40 50 50 50
SEE ALSO
========
**rtla-osnoise**\(1), **rtla-osnoise-top**\(1)
*osnoise* tracer documentation: <https://www.kernel.org/doc/html/latest/trace/osnoise-tracer.html>
AUTHOR
======
Written by Daniel Bristot de Oliveira <bristot@kernel.org>
.. include:: common_appendix.rst

View File

@@ -0,0 +1,61 @@
===================
rtla-osnoise-top
===================
-----------------------------------------------
Display a summary of the operating system noise
-----------------------------------------------
:Manual section: 1
SYNOPSIS
========
**rtla osnoise top** [*OPTIONS*]
DESCRIPTION
===========
.. include:: common_osnoise_description.rst
**rtla osnoise top** collects the periodic summary from the *osnoise* tracer,
including the counters of the occurrence of the interference source,
displaying the results in a user-friendly format.
The tool also allows many configurations of the *osnoise* tracer and the
collection of the tracer output.
OPTIONS
=======
.. include:: common_osnoise_options.rst
.. include:: common_top_options.rst
.. include:: common_options.rst
EXAMPLE
=======
In the example below, the **rtla osnoise top** tool is set to run with a
real-time priority *FIFO:1*, on CPUs *0-3*, for *900ms* at each period
(*1s* by default). The reason for reducing the runtime is to avoid starving
the rtla tool. The tool is also set to run for *one minute* and to display
a summary of the report at the end of the session::
[root@f34 ~]# rtla osnoise top -P F:1 -c 0-3 -r 900000 -d 1M -q
Operating System Noise
duration: 0 00:01:00 | time is in us
CPU Period Runtime Noise % CPU Aval Max Noise Max Single HW NMI IRQ Softirq Thread
0 #59 53100000 304896 99.42580 6978 56 549 0 53111 1590 13
1 #59 53100000 338339 99.36282 8092 24 399 0 53130 1448 31
2 #59 53100000 290842 99.45227 6582 39 855 0 53110 1406 12
3 #59 53100000 204935 99.61405 6251 33 290 0 53156 1460 12
SEE ALSO
========
**rtla-osnoise**\(1), **rtla-osnoise-hist**\(1)
Osnoise tracer documentation: <https://www.kernel.org/doc/html/latest/trace/osnoise-tracer.html>
AUTHOR
======
Written by Daniel Bristot de Oliveira <bristot@kernel.org>
.. include:: common_appendix.rst

View File

@@ -0,0 +1,59 @@
===============
rtla-osnoise
===============
------------------------------------------------------------------
Measure the operating system noise
------------------------------------------------------------------
:Manual section: 1
SYNOPSIS
========
**rtla osnoise** [*MODE*] ...
DESCRIPTION
===========
.. include:: common_osnoise_description.rst
The *osnoise* tracer outputs information in two ways. It periodically prints
a summary of the noise of the operating system, including the counters of
the occurrence of the source of interference. It also provides information
for each noise via the **osnoise:** tracepoints. The **rtla osnoise top**
mode displays information about the periodic summary from the *osnoise* tracer.
The **rtla osnoise hist** mode displays information about the noise using
the **osnoise:** tracepoints. For further details, please refer to the
respective man page.
MODES
=====
**top**
Prints the summary from osnoise tracer.
**hist**
Prints a histogram of osnoise samples.
If no MODE is given, the top mode is called, passing the arguments.
OPTIONS
=======
**-h**, **--help**
Display the help text.
For other options, see the man page for the corresponding mode.
SEE ALSO
========
**rtla-osnoise-top**\(1), **rtla-osnoise-hist**\(1)
Osnoise tracer documentation: <https://www.kernel.org/doc/html/latest/trace/osnoise-tracer.html>
AUTHOR
======
Written by Daniel Bristot de Oliveira <bristot@kernel.org>
.. include:: common_appendix.rst

View File

@@ -0,0 +1,106 @@
=====================
rtla-timerlat-hist
=====================
------------------------------------------------
Histograms of the operating system timer latency
------------------------------------------------
:Manual section: 1
SYNOPSIS
========
**rtla timerlat hist** [*OPTIONS*] ...
DESCRIPTION
===========
.. include:: common_timerlat_description.rst
The **rtla timerlat hist** displays a histogram of each tracer event
occurrence. This tool uses the periodic information, and the
**osnoise:** tracepoints are enabled when using the **-T** option.
OPTIONS
=======
.. include:: common_timerlat_options.rst
.. include:: common_hist_options.rst
.. include:: common_options.rst
EXAMPLE
=======
In the example below, **rtla timerlat hist** is set to run for *10* minutes,
in the cpus *0-4*, *skipping zero* only lines. Moreover, **rtla timerlat
hist** will change the priority of the *timelat* threads to run under
*SCHED_DEADLINE* priority, with a *10us* runtime every *1ms* period. The
*1ms* period is also passed to the *timerlat* tracer::
[root@alien ~]# timerlat hist -d 10m -c 0-4 -P d:100us:1ms -p 1ms
# RTLA timerlat histogram
# Time unit is microseconds (us)
# Duration: 0 00:10:00
Index IRQ-000 Thr-000 IRQ-001 Thr-001 IRQ-002 Thr-002 IRQ-003 Thr-003 IRQ-004 Thr-004
0 276489 0 206089 0 466018 0 481102 0 205546 0
1 318327 35487 388149 30024 94531 48382 83082 71078 388026 55730
2 3282 122584 4019 126527 28231 109012 23311 89309 4568 98739
3 940 11815 837 9863 6209 16227 6895 17196 910 9780
4 444 17287 424 11574 2097 38443 2169 36736 462 13476
5 206 43291 255 25581 1223 101908 1304 101137 236 28913
6 132 101501 96 64584 635 213774 757 215471 99 73453
7 74 169347 65 124758 350 57466 441 53639 69 148573
8 53 85183 31 156751 229 9052 306 9026 39 139907
9 22 10387 12 42762 161 2554 225 2689 19 26192
10 13 1898 8 5770 114 1247 128 1405 13 3772
11 9 560 9 924 71 686 76 765 8 713
12 4 256 2 360 50 411 64 474 3 278
13 2 167 2 172 43 256 53 350 4 180
14 1 88 1 116 15 198 42 223 0 115
15 2 63 3 94 11 139 20 150 0 58
16 2 37 0 56 5 78 10 102 0 39
17 0 18 0 28 4 57 8 80 0 15
18 0 8 0 17 2 50 6 56 0 12
19 0 9 0 5 0 19 0 48 0 18
20 0 4 0 8 0 11 2 27 0 4
21 0 2 0 3 1 9 1 18 0 6
22 0 1 0 3 1 7 0 3 0 5
23 0 2 0 4 0 2 0 7 0 2
24 0 2 0 2 1 3 0 3 0 5
25 0 0 0 1 0 1 0 1 0 3
26 0 1 0 0 0 2 0 2 0 0
27 0 0 0 3 0 1 0 0 0 1
28 0 0 0 3 0 0 0 1 0 0
29 0 0 0 2 0 2 0 1 0 3
30 0 1 0 0 0 0 0 0 0 0
31 0 1 0 0 0 0 0 2 0 2
32 0 0 0 1 0 2 0 0 0 0
33 0 0 0 2 0 0 0 0 0 1
34 0 0 0 0 0 0 0 0 0 2
35 0 1 0 1 0 0 0 0 0 1
36 0 1 0 0 0 1 0 1 0 0
37 0 0 0 1 0 0 0 0 0 0
40 0 0 0 0 0 1 0 1 0 0
41 0 0 0 0 0 0 0 0 0 1
42 0 0 0 0 0 0 0 0 0 1
44 0 0 0 0 0 1 0 0 0 0
46 0 0 0 0 0 0 0 1 0 0
47 0 0 0 0 0 0 0 0 0 1
50 0 0 0 0 0 0 0 0 0 1
54 0 0 0 1 0 0 0 0 0 0
58 0 0 0 1 0 0 0 0 0 0
over: 0 0 0 0 0 0 0 0 0 0
count: 600002 600002 600002 600002 600002 600002 600002 600002 600002 600002
min: 0 1 0 1 0 1 0 1 0 1
avg: 0 5 0 5 0 4 0 4 0 5
max: 16 36 15 58 24 44 21 46 13 50
SEE ALSO
========
**rtla-timerlat**\(1), **rtla-timerlat-top**\(1)
*timerlat* tracer documentation: <https://www.kernel.org/doc/html/latest/trace/timerlat-tracer.html>
AUTHOR
======
Written by Daniel Bristot de Oliveira <bristot@kernel.org>

View File

@@ -0,0 +1,145 @@
====================
rtla-timerlat-top
====================
-------------------------------------------
Measures the operating system timer latency
-------------------------------------------
:Manual section: 1
SYNOPSIS
========
**rtla timerlat top** [*OPTIONS*] ...
DESCRIPTION
===========
.. include:: common_timerlat_description.rst
The **rtla timerlat top** displays a summary of the periodic output
from the *timerlat* tracer. It also provides information for each
operating system noise via the **osnoise:** tracepoints that can be
seem with the option **-T**.
OPTIONS
=======
.. include:: common_timerlat_options.rst
.. include:: common_top_options.rst
.. include:: common_options.rst
EXAMPLE
=======
In the example below, the *timerlat* tracer is set to capture the stack trace at
the IRQ handler, printing it to the buffer if the *Thread* timer latency is
higher than *30 us*. It is also set to stop the session if a *Thread* timer
latency higher than *30 us* is hit. Finally, it is set to save the trace
buffer if the stop condition is hit::
[root@alien ~]# rtla timerlat top -s 30 -t 30 -T
Timer Latency
0 00:00:59 | IRQ Timer Latency (us) | Thread Timer Latency (us)
CPU COUNT | cur min avg max | cur min avg max
0 #58634 | 1 0 1 10 | 11 2 10 23
1 #58634 | 1 0 1 9 | 12 2 9 23
2 #58634 | 0 0 1 11 | 10 2 9 23
3 #58634 | 1 0 1 11 | 11 2 9 24
4 #58634 | 1 0 1 10 | 11 2 9 26
5 #58634 | 1 0 1 8 | 10 2 9 25
6 #58634 | 12 0 1 12 | 30 2 10 30 <--- CPU with spike
7 #58634 | 1 0 1 9 | 11 2 9 23
8 #58633 | 1 0 1 9 | 11 2 9 26
9 #58633 | 1 0 1 9 | 10 2 9 26
10 #58633 | 1 0 1 13 | 11 2 9 28
11 #58633 | 1 0 1 13 | 12 2 9 24
12 #58633 | 1 0 1 8 | 10 2 9 23
13 #58633 | 1 0 1 10 | 10 2 9 22
14 #58633 | 1 0 1 18 | 12 2 9 27
15 #58633 | 1 0 1 10 | 11 2 9 28
16 #58633 | 0 0 1 11 | 7 2 9 26
17 #58633 | 1 0 1 13 | 10 2 9 24
18 #58633 | 1 0 1 9 | 13 2 9 22
19 #58633 | 1 0 1 10 | 11 2 9 23
20 #58633 | 1 0 1 12 | 11 2 9 28
21 #58633 | 1 0 1 14 | 11 2 9 24
22 #58633 | 1 0 1 8 | 11 2 9 22
23 #58633 | 1 0 1 10 | 11 2 9 27
timerlat hit stop tracing
saving trace to timerlat_trace.txt
[root@alien bristot]# tail -60 timerlat_trace.txt
[...]
timerlat/5-79755 [005] ....... 426.271226: #58634 context thread timer_latency 10823 ns
sh-109404 [006] dnLh213 426.271247: #58634 context irq timer_latency 12505 ns
sh-109404 [006] dNLh313 426.271258: irq_noise: local_timer:236 start 426.271245463 duration 12553 ns
sh-109404 [006] d...313 426.271263: thread_noise: sh:109404 start 426.271245853 duration 4769 ns
timerlat/6-79756 [006] ....... 426.271264: #58634 context thread timer_latency 30328 ns
timerlat/6-79756 [006] ....1.. 426.271265: <stack trace>
=> timerlat_irq
=> __hrtimer_run_queues
=> hrtimer_interrupt
=> __sysvec_apic_timer_interrupt
=> sysvec_apic_timer_interrupt
=> asm_sysvec_apic_timer_interrupt
=> _raw_spin_unlock_irqrestore <---- spinlock that disabled interrupt.
=> try_to_wake_up
=> autoremove_wake_function
=> __wake_up_common
=> __wake_up_common_lock
=> ep_poll_callback
=> __wake_up_common
=> __wake_up_common_lock
=> fsnotify_add_event
=> inotify_handle_inode_event
=> fsnotify
=> __fsnotify_parent
=> __fput
=> task_work_run
=> exit_to_user_mode_prepare
=> syscall_exit_to_user_mode
=> do_syscall_64
=> entry_SYSCALL_64_after_hwframe
=> 0x7265000001378c
=> 0x10000cea7
=> 0x25a00000204a
=> 0x12e302d00000000
=> 0x19b51010901b6
=> 0x283ce00726500
=> 0x61ea308872
=> 0x00000fe3
bash-109109 [007] d..h... 426.271265: #58634 context irq timer_latency 1211 ns
timerlat/6-79756 [006] ....... 426.271267: timerlat_main: stop tracing hit on cpu 6
In the trace, it is possible the notice that the *IRQ* timer latency was
already high, accounting *12505 ns*. The IRQ delay was caused by the
*bash-109109* process that disabled IRQs in the wake-up path
(*_try_to_wake_up()* function). The duration of the IRQ handler that woke
up the timerlat thread, informed with the **osnoise:irq_noise** event, was
also high and added more *12553 ns* to the Thread latency. Finally, the
**osnoise:thread_noise** added by the currently running thread (including
the scheduling overhead) added more *4769 ns*. Summing up these values,
the *Thread* timer latency accounted for *30328 ns*.
The primary reason for this high value is the wake-up path that was hit
twice during this case: when the *bash-109109* was waking up a thread
and then when the *timerlat* thread was awakened. This information can
then be used as the starting point of a more fine-grained analysis.
Note that **rtla timerlat** was dispatched without changing *timerlat* tracer
threads' priority. That is generally not needed because these threads hava
priority *FIFO:95* by default, which is a common priority used by real-time
kernel developers to analyze scheduling delays.
SEE ALSO
--------
**rtla-timerlat**\(1), **rtla-timerlat-hist**\(1)
*timerlat* tracer documentation: <https://www.kernel.org/doc/html/latest/trace/timerlat-tracer.html>
AUTHOR
------
Written by Daniel Bristot de Oliveira <bristot@kernel.org>
.. include:: common_appendix.rst

View File

@@ -0,0 +1,57 @@
================
rtla-timerlat
================
-------------------------------------------
Measures the operating system timer latency
-------------------------------------------
:Manual section: 1
SYNOPSIS
========
**rtla timerlat** [*MODE*] ...
DESCRIPTION
===========
.. include:: common_timerlat_description.rst
The *timerlat* tracer outputs information in two ways. It periodically
prints the timer latency at the timer *IRQ* handler and the *Thread* handler.
It also provides information for each noise via the **osnoise:** tracepoints.
The **rtla timerlat top** mode displays a summary of the periodic output
from the *timerlat* tracer. The **rtla hist hist** mode displays a histogram
of each tracer event occurrence. For further details, please refer to the
respective man page.
MODES
=====
**top**
Prints the summary from *timerlat* tracer.
**hist**
Prints a histogram of timerlat samples.
If no *MODE* is given, the top mode is called, passing the arguments.
OPTIONS
=======
**-h**, **--help**
Display the help text.
For other options, see the man page for the corresponding mode.
SEE ALSO
========
**rtla-timerlat-top**\(1), **rtla-timerlat-hist**\(1)
*timerlat* tracer documentation: <https://www.kernel.org/doc/html/latest/trace/timerlat-tracer.html>
AUTHOR
======
Written by Daniel Bristot de Oliveira <bristot@kernel.org>
.. include:: common_appendix.rst

View File

@@ -0,0 +1,48 @@
=========
rtla
=========
--------------------------------
Real-time Linux Analysis tool
--------------------------------
:Manual section: 1
SYNOPSIS
========
**rtla** *COMMAND* [*OPTIONS*]
DESCRIPTION
===========
The **rtla** is a meta-tool that includes a set of commands that aims to
analyze the real-time properties of Linux. But instead of testing Linux
as a black box, **rtla** leverages kernel tracing capabilities to provide
precise information about the properties and root causes of unexpected
results.
COMMANDS
========
**osnoise**
Gives information about the operating system noise (osnoise).
**timerlat**
Measures the IRQ and thread timer latency.
OPTIONS
=======
**-h**, **--help**
Display the help text.
For other options, see the man page for the corresponding command.
SEE ALSO
========
**rtla-osnoise**\(1), **rtla-timerlat**\(1)
AUTHOR
======
Daniel Bristot de Oliveira <bristot@kernel.org>
.. include:: common_appendix.rst

View File

@@ -198,6 +198,15 @@ The glob (~) accepts a wild card character (\*,?) and character classes
prev_comm ~ "*sh*"
prev_comm ~ "ba*sh"
If the field is a pointer that points into user space (for example
"filename" from sys_enter_openat), then you have to append ".ustring" to the
field name::
filename.ustring ~ "password"
As the kernel will have to know how to retrieve the memory that the pointer
is at from user space.
5.2 Setting filters
-------------------
@@ -230,6 +239,16 @@ Currently the caret ('^') for an error always appears at the beginning of
the filter string; the error message should still be useful though
even without more accurate position info.
5.2.1 Filter limitations
------------------------
If a filter is placed on a string pointer ``(char *)`` that does not point
to a string on the ring buffer, but instead points to kernel or user space
memory, then, for safety reasons, at most 1024 bytes of the content is
copied onto a temporary buffer to do the compare. If the copy of the memory
faults (the pointer points to memory that should not be accessed), then the
string compare will be treated as not matching.
5.3 Clearing filters
--------------------

View File

@@ -371,6 +371,9 @@ The bits in the dirty bitmap are cleared before the ioctl returns, unless
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is enabled. For more information,
see the description of the capability.
Note that the Xen shared info page, if configured, shall always be assumed
to be dirty. KVM will not explicitly mark it such.
4.9 KVM_SET_MEMORY_ALIAS
------------------------
@@ -1566,6 +1569,7 @@ otherwise it will return EBUSY error.
struct kvm_xsave {
__u32 region[1024];
__u32 extra[0];
};
This ioctl would copy current vcpu's xsave struct to the userspace.
@@ -1574,7 +1578,7 @@ This ioctl would copy current vcpu's xsave struct to the userspace.
4.43 KVM_SET_XSAVE
------------------
:Capability: KVM_CAP_XSAVE
:Capability: KVM_CAP_XSAVE and KVM_CAP_XSAVE2
:Architectures: x86
:Type: vcpu ioctl
:Parameters: struct kvm_xsave (in)
@@ -1585,9 +1589,18 @@ This ioctl would copy current vcpu's xsave struct to the userspace.
struct kvm_xsave {
__u32 region[1024];
__u32 extra[0];
};
This ioctl would copy userspace's xsave struct to the kernel.
This ioctl would copy userspace's xsave struct to the kernel. It copies
as many bytes as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2),
when invoked on the vm file descriptor. The size value returned by
KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) will always be at least 4096.
Currently, it is only greater than 4096 if a dynamic feature has been
enabled with ``arch_prctl()``, but this may change in the future.
The offsets of the state save areas in struct kvm_xsave follow the
contents of CPUID leaf 0xD on the host.
4.44 KVM_GET_XCRS
@@ -1684,6 +1697,10 @@ userspace capabilities, and with user requirements (for example, the
user may wish to constrain cpuid to emulate older hardware, or for
feature consistency across a cluster).
Dynamically-enabled feature bits need to be requested with
``arch_prctl()`` before calling this ioctl. Feature bits that have not
been requested are excluded from the result.
Note that certain capabilities, such as KVM_CAP_X86_DISABLE_EXITS, may
expose cpuid features (e.g. MONITOR) which are not supported by kvm in
its default configuration. If userspace enables such capabilities, it
@@ -1796,6 +1813,7 @@ No flags are specified so far, the corresponding field must be set to zero.
struct kvm_irq_routing_msi msi;
struct kvm_irq_routing_s390_adapter adapter;
struct kvm_irq_routing_hv_sint hv_sint;
struct kvm_irq_routing_xen_evtchn xen_evtchn;
__u32 pad[8];
} u;
};
@@ -1805,6 +1823,7 @@ No flags are specified so far, the corresponding field must be set to zero.
#define KVM_IRQ_ROUTING_MSI 2
#define KVM_IRQ_ROUTING_S390_ADAPTER 3
#define KVM_IRQ_ROUTING_HV_SINT 4
#define KVM_IRQ_ROUTING_XEN_EVTCHN 5
flags:
@@ -1856,6 +1875,20 @@ address_hi must be zero.
__u32 sint;
};
struct kvm_irq_routing_xen_evtchn {
__u32 port;
__u32 vcpu;
__u32 priority;
};
When KVM_CAP_XEN_HVM includes the KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL bit
in its indication of supported features, routing to Xen event channels
is supported. Although the priority field is present, only the value
KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL is supported, which means delivery by
2 level event channels. FIFO event channel support may be added in
the future.
4.55 KVM_SET_TSC_KHZ
--------------------
@@ -3701,7 +3734,7 @@ KVM with the currently defined set of flags.
:Architectures: s390
:Type: vm ioctl
:Parameters: struct kvm_s390_skeys
:Returns: 0 on success, KVM_S390_GET_KEYS_NONE if guest is not using storage
:Returns: 0 on success, KVM_S390_GET_SKEYS_NONE if guest is not using storage
keys, negative value on error
This ioctl is used to get guest storage key values on the s390
@@ -3720,7 +3753,7 @@ you want to get.
The count field is the number of consecutive frames (starting from start_gfn)
whose storage keys to get. The count field must be at least 1 and the maximum
allowed value is defined as KVM_S390_SKEYS_ALLOC_MAX. Values outside this range
allowed value is defined as KVM_S390_SKEYS_MAX. Values outside this range
will cause the ioctl to return -EINVAL.
The skeydata_addr field is the address to a buffer large enough to hold count
@@ -3744,7 +3777,7 @@ you want to set.
The count field is the number of consecutive frames (starting from start_gfn)
whose storage keys to get. The count field must be at least 1 and the maximum
allowed value is defined as KVM_S390_SKEYS_ALLOC_MAX. Values outside this range
allowed value is defined as KVM_S390_SKEYS_MAX. Values outside this range
will cause the ioctl to return -EINVAL.
The skeydata_addr field is the address to a buffer containing count bytes of
@@ -5134,6 +5167,15 @@ KVM_XEN_ATTR_TYPE_SHARED_INFO
not aware of the Xen CPU id which is used as the index into the
vcpu_info[] array, so cannot know the correct default location.
Note that the shared info page may be constantly written to by KVM;
it contains the event channel bitmap used to deliver interrupts to
a Xen guest, amongst other things. It is exempt from dirty tracking
mechanisms — KVM will not explicitly mark the page as dirty each
time an event channel interrupt is delivered to the guest! Thus,
userspace should always assume that the designated GFN is dirty if
any vCPU has been running or any event channel interrupts can be
routed to the guest.
KVM_XEN_ATTR_TYPE_UPCALL_VECTOR
Sets the exception vector used to deliver Xen event channel upcalls.
@@ -5503,6 +5545,34 @@ the trailing ``'\0'``, is indicated by ``name_size`` in the header.
The Stats Data block contains an array of 64-bit values in the same order
as the descriptors in Descriptors block.
4.42 KVM_GET_XSAVE2
------------------
:Capability: KVM_CAP_XSAVE2
:Architectures: x86
:Type: vcpu ioctl
:Parameters: struct kvm_xsave (out)
:Returns: 0 on success, -1 on error
::
struct kvm_xsave {
__u32 region[1024];
__u32 extra[0];
};
This ioctl would copy current vcpu's xsave struct to the userspace. It
copies as many bytes as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
when invoked on the vm file descriptor. The size value returned by
KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) will always be at least 4096.
Currently, it is only greater than 4096 if a dynamic feature has been
enabled with ``arch_prctl()``, but this may change in the future.
The offsets of the state save areas in struct kvm_xsave follow the contents
of CPUID leaf 0xD on the host.
5. The kvm_run structure
========================
@@ -7401,6 +7471,7 @@ PVHVM guests. Valid flags are::
#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 2)
#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 3)
The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG
ioctl is available, for the guest to set its hypercall page.
@@ -7420,6 +7491,10 @@ The KVM_XEN_HVM_CONFIG_RUNSTATE flag indicates that the runstate-related
features KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR/_CURRENT/_DATA/_ADJUST are
supported by the KVM_XEN_VCPU_SET_ATTR/KVM_XEN_VCPU_GET_ATTR ioctls.
The KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL flag indicates that IRQ routing entries
of the type KVM_IRQ_ROUTING_XEN_EVTCHN are supported, with the priority
field set to indicate 2 level event channel delivery.
8.31 KVM_CAP_PPC_MULTITCE
-------------------------

View File

@@ -161,7 +161,7 @@ Shadow pages contain the following information:
If clear, this page corresponds to a guest page table denoted by the gfn
field.
role.quadrant:
When role.gpte_is_8_bytes=0, the guest uses 32-bit gptes while the host uses 64-bit
When role.has_4_byte_gpte=1, the guest uses 32-bit gptes while the host uses 64-bit
sptes. That means a guest page table contains more ptes than the host,
so multiple shadow pages are needed to shadow one guest page.
For first-level shadow pages, role.quadrant can be 0 or 1 and denotes the
@@ -177,9 +177,9 @@ Shadow pages contain the following information:
The page is invalid and should not be used. It is a root page that is
currently pinned (by a cpu hardware register pointing to it); once it is
unpinned it will be destroyed.
role.gpte_is_8_bytes:
Reflects the size of the guest PTE for which the page is valid, i.e. '1'
if 64-bit gptes are in use, '0' if 32-bit gptes are in use.
role.has_4_byte_gpte:
Reflects the size of the guest PTE for which the page is valid, i.e. '0'
if direct map or 64-bit gptes are in use, '1' if 32-bit gptes are in use.
role.efer_nx:
Contains the value of efer.nx for which the page is valid.
role.cr0_wp:

View File

@@ -226,6 +226,7 @@ F: drivers/net/ethernet/8390/
M: Eric Van Hensbergen <ericvh@gmail.com>
M: Latchesar Ionkov <lucho@ionkov.net>
M: Dominique Martinet <asmadeus@codewreck.org>
R: Christian Schoenebeck <linux_oss@crudebyte.com>
L: v9fs-developer@lists.sourceforge.net
S: Maintained
W: http://swik.net/v9fs
@@ -7416,7 +7417,6 @@ F: include/uapi/scsi/fc/
FILE LOCKING (flock() and fcntl()/lockf())
M: Jeff Layton <jlayton@kernel.org>
M: "J. Bruce Fields" <bfields@fieldses.org>
L: linux-fsdevel@vger.kernel.org
S: Maintained
F: fs/fcntl.c
@@ -10434,12 +10434,11 @@ S: Odd Fixes
W: http://kernelnewbies.org/KernelJanitors
KERNEL NFSD, SUNRPC, AND LOCKD SERVERS
M: "J. Bruce Fields" <bfields@fieldses.org>
M: Chuck Lever <chuck.lever@oracle.com>
L: linux-nfs@vger.kernel.org
S: Supported
W: http://nfs.sourceforge.net/
T: git git://linux-nfs.org/~bfields/linux.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux.git
F: fs/lockd/
F: fs/nfs_common/
F: fs/nfsd/
@@ -10547,8 +10546,8 @@ F: arch/powerpc/kernel/kvm*
F: arch/powerpc/kvm/
KERNEL VIRTUAL MACHINE FOR RISC-V (KVM/riscv)
M: Anup Patel <anup.patel@wdc.com>
R: Atish Patra <atish.patra@wdc.com>
M: Anup Patel <anup@brainfault.org>
R: Atish Patra <atishp@atishpatra.org>
L: kvm@vger.kernel.org
L: kvm-riscv@lists.infradead.org
L: linux-riscv@lists.infradead.org
@@ -14898,6 +14897,19 @@ L: linux-pci@vger.kernel.org
S: Supported
F: Documentation/PCI/pci-error-recovery.rst
PCI PEER-TO-PEER DMA (P2PDMA)
M: Bjorn Helgaas <bhelgaas@google.com>
M: Logan Gunthorpe <logang@deltatee.com>
L: linux-pci@vger.kernel.org
S: Supported
Q: https://patchwork.kernel.org/project/linux-pci/list/
B: https://bugzilla.kernel.org
C: irc://irc.oftc.net/linux-pci
T: git git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git
F: Documentation/driver-api/pci/p2pdma.rst
F: drivers/pci/p2pdma.c
F: include/linux/pci-p2pdma.h
PCI MSI DRIVER FOR ALTERA MSI IP
M: Joyce Ooi <joyce.ooi@intel.com>
L: linux-pci@vger.kernel.org

View File

@@ -180,7 +180,10 @@ void pci_ioremap_set_mem_type(int mem_type);
static inline void pci_ioremap_set_mem_type(int mem_type) {}
#endif
extern int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr);
struct resource;
#define pci_remap_iospace pci_remap_iospace
int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr);
/*
* PCI configuration space mapping function.

View File

@@ -38,6 +38,7 @@ static int num_pcie_ports;
static int __init dove_pcie_setup(int nr, struct pci_sys_data *sys)
{
struct pcie_port *pp;
struct resource realio;
if (nr >= num_pcie_ports)
return 0;
@@ -53,10 +54,10 @@ static int __init dove_pcie_setup(int nr, struct pci_sys_data *sys)
orion_pcie_setup(pp->base);
if (pp->index == 0)
pci_ioremap_io(sys->busnr * SZ_64K, DOVE_PCIE0_IO_PHYS_BASE);
else
pci_ioremap_io(sys->busnr * SZ_64K, DOVE_PCIE1_IO_PHYS_BASE);
realio.start = sys->busnr * SZ_64K;
realio.end = realio.start + SZ_64K - 1;
pci_remap_iospace(&realio, pp->index == 0 ? DOVE_PCIE0_IO_PHYS_BASE :
DOVE_PCIE1_IO_PHYS_BASE);
/*
* IORESOURCE_MEM

View File

@@ -185,6 +185,7 @@ iop3xx_pci_abort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
int iop3xx_pci_setup(int nr, struct pci_sys_data *sys)
{
struct resource *res;
struct resource realio;
if (nr != 0)
return 0;
@@ -206,7 +207,9 @@ int iop3xx_pci_setup(int nr, struct pci_sys_data *sys)
pci_add_resource_offset(&sys->resources, res, sys->mem_offset);
pci_ioremap_io(0, IOP3XX_PCI_LOWER_IO_PA);
realio.start = 0;
realio.end = realio.start + SZ_64K - 1;
pci_remap_iospace(&realio, IOP3XX_PCI_LOWER_IO_PA);
return 1;
}

View File

@@ -101,6 +101,7 @@ static void __init mv78xx0_pcie_preinit(void)
static int __init mv78xx0_pcie_setup(int nr, struct pci_sys_data *sys)
{
struct pcie_port *pp;
struct resource realio;
if (nr >= num_pcie_ports)
return 0;
@@ -115,7 +116,9 @@ static int __init mv78xx0_pcie_setup(int nr, struct pci_sys_data *sys)
orion_pcie_set_local_bus_nr(pp->base, sys->busnr);
orion_pcie_setup(pp->base);
pci_ioremap_io(nr * SZ_64K, MV78XX0_PCIE_IO_PHYS_BASE(nr));
realio.start = nr * SZ_64K;
realio.end = realio.start + SZ_64K - 1;
pci_remap_iospace(&realio, MV78XX0_PCIE_IO_PHYS_BASE(nr));
pci_add_resource_offset(&sys->resources, &pp->res, sys->mem_offset);

View File

@@ -142,6 +142,7 @@ static struct pci_ops pcie_ops = {
static int __init pcie_setup(struct pci_sys_data *sys)
{
struct resource *res;
struct resource realio;
int dev;
/*
@@ -164,7 +165,9 @@ static int __init pcie_setup(struct pci_sys_data *sys)
pcie_ops.read = pcie_rd_conf_wa;
}
pci_ioremap_io(sys->busnr * SZ_64K, ORION5X_PCIE_IO_PHYS_BASE);
realio.start = sys->busnr * SZ_64K;
realio.end = realio.start + SZ_64K - 1;
pci_remap_iospace(&realio, ORION5X_PCIE_IO_PHYS_BASE);
/*
* Request resources.
@@ -466,6 +469,7 @@ static void __init orion5x_setup_pci_wins(void)
static int __init pci_setup(struct pci_sys_data *sys)
{
struct resource *res;
struct resource realio;
/*
* Point PCI unit MBUS decode windows to DRAM space.
@@ -482,7 +486,9 @@ static int __init pci_setup(struct pci_sys_data *sys)
*/
orion5x_setbits(PCI_CMD, PCI_CMD_HOST_REORDER);
pci_ioremap_io(sys->busnr * SZ_64K, ORION5X_PCI_IO_PHYS_BASE);
realio.start = sys->busnr * SZ_64K;
realio.end = realio.start + SZ_64K - 1;
pci_remap_iospace(&realio, ORION5X_PCI_IO_PHYS_BASE);
/*
* Request resources

View File

@@ -459,16 +459,20 @@ void pci_ioremap_set_mem_type(int mem_type)
pci_ioremap_mem_type = mem_type;
}
int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr)
int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr)
{
BUG_ON(offset + SZ_64K - 1 > IO_SPACE_LIMIT);
unsigned long vaddr = (unsigned long)PCI_IOBASE + res->start;
return ioremap_page_range(PCI_IO_VIRT_BASE + offset,
PCI_IO_VIRT_BASE + offset + SZ_64K,
phys_addr,
if (!(res->flags & IORESOURCE_IO))
return -EINVAL;
if (res->end > IO_SPACE_LIMIT)
return -EINVAL;
return ioremap_page_range(vaddr, vaddr + resource_size(res), phys_addr,
__pgprot(get_mem_type(pci_ioremap_mem_type)->prot_pte));
}
EXPORT_SYMBOL_GPL(pci_ioremap_io);
EXPORT_SYMBOL(pci_remap_iospace);
void __iomem *pci_remap_cfgspace(resource_size_t res_cookie, size_t size)
{

View File

@@ -64,6 +64,15 @@
#define HV_REGISTER_STIMER0_CONFIG 0x000B0000
#define HV_REGISTER_STIMER0_COUNT 0x000B0001
union hv_msi_entry {
u64 as_uint64[2];
struct {
u64 address;
u32 data;
u32 reserved;
} __packed;
};
#include <asm-generic/hyperv-tlfs.h>
#endif

View File

@@ -63,6 +63,7 @@ enum __kvm_host_smccc_func {
/* Hypercalls available after pKVM finalisation */
__KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp,
__KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp,
__KVM_HOST_SMCCC_FUNC___kvm_adjust_pc,
__KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
__KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context,

View File

@@ -41,6 +41,8 @@ void kvm_inject_vabt(struct kvm_vcpu *vcpu);
void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
void kvm_vcpu_wfi(struct kvm_vcpu *vcpu);
static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
{
return !(vcpu->arch.hcr_el2 & HCR_RW);
@@ -386,7 +388,7 @@ static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
*vcpu_cpsr(vcpu) |= PSR_AA32_E_BIT;
} else {
u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
sctlr |= (1 << 25);
sctlr |= SCTLR_ELx_EE;
vcpu_write_sys_reg(vcpu, sctlr, SCTLR_EL1);
}
}

View File

@@ -26,7 +26,6 @@
#include <asm/fpsimd.h>
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
#include <asm/thread_info.h>
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
@@ -298,9 +297,6 @@ struct kvm_vcpu_arch {
/* Exception Information */
struct kvm_vcpu_fault_info fault;
/* State of various workarounds, see kvm_asm.h for bit assignment */
u64 workaround_flags;
/* Miscellaneous vcpu state flags */
u64 flags;
@@ -321,8 +317,8 @@ struct kvm_vcpu_arch {
struct kvm_guest_debug_arch vcpu_debug_state;
struct kvm_guest_debug_arch external_debug_state;
struct thread_info *host_thread_info; /* hyp VA */
struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */
struct task_struct *parent_task;
struct {
/* {Break,watch}point registers */
@@ -367,9 +363,6 @@ struct kvm_vcpu_arch {
int target;
DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
/* Detect first run of a vcpu */
bool has_run_once;
/* Virtual SError ESR to restore when HCR_EL2.VSE is set */
u64 vsesr_el2;
@@ -411,20 +404,17 @@ struct kvm_vcpu_arch {
#define KVM_ARM64_DEBUG_DIRTY (1 << 0)
#define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */
#define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */
#define KVM_ARM64_HOST_SVE_IN_USE (1 << 3) /* backup for host TIF_SVE */
#define KVM_ARM64_HOST_SVE_ENABLED (1 << 4) /* SVE enabled for EL0 */
#define KVM_ARM64_GUEST_HAS_SVE (1 << 5) /* SVE exposed to guest */
#define KVM_ARM64_VCPU_SVE_FINALIZED (1 << 6) /* SVE config completed */
#define KVM_ARM64_GUEST_HAS_PTRAUTH (1 << 7) /* PTRAUTH exposed to guest */
#define KVM_ARM64_PENDING_EXCEPTION (1 << 8) /* Exception pending */
/*
* Overlaps with KVM_ARM64_EXCEPT_MASK on purpose so that it can't be
* set together with an exception...
*/
#define KVM_ARM64_INCREMENT_PC (1 << 9) /* Increment PC */
#define KVM_ARM64_EXCEPT_MASK (7 << 9) /* Target EL/MODE */
#define KVM_ARM64_DEBUG_STATE_SAVE_SPE (1 << 12) /* Save SPE context if active */
#define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */
#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
KVM_GUESTDBG_USE_SW_BP | \
KVM_GUESTDBG_USE_HW | \
KVM_GUESTDBG_SINGLESTEP)
/*
* When KVM_ARM64_PENDING_EXCEPTION is set, KVM_ARM64_EXCEPT_MASK can
* take the following values:
@@ -442,11 +432,14 @@ struct kvm_vcpu_arch {
#define KVM_ARM64_EXCEPT_AA64_EL1 (0 << 11)
#define KVM_ARM64_EXCEPT_AA64_EL2 (1 << 11)
/*
* Overlaps with KVM_ARM64_EXCEPT_MASK on purpose so that it can't be
* set together with an exception...
*/
#define KVM_ARM64_INCREMENT_PC (1 << 9) /* Increment PC */
#define KVM_ARM64_DEBUG_STATE_SAVE_SPE (1 << 12) /* Save SPE context if active */
#define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */
#define KVM_ARM64_FP_FOREIGN_FPSTATE (1 << 14)
#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
KVM_GUESTDBG_USE_SW_BP | \
KVM_GUESTDBG_USE_HW | \
KVM_GUESTDBG_SINGLESTEP)
#define vcpu_has_sve(vcpu) (system_supports_sve() && \
((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_SVE))
@@ -606,6 +599,8 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
void kvm_arm_halt_guest(struct kvm *kvm);
void kvm_arm_resume_guest(struct kvm *kvm);
#define vcpu_has_run_once(vcpu) !!rcu_access_pointer((vcpu)->pid)
#ifndef __KVM_NVHE_HYPERVISOR__
#define kvm_call_hyp_nvhe(f, ...) \
({ \
@@ -724,7 +719,6 @@ void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu);
static inline void kvm_arch_hardware_unsetup(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
void kvm_arm_init_debug(void);
void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu);
@@ -744,8 +738,10 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
/* Guest/host FPSIMD coordination helpers */
int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu);
void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu);
static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
{
@@ -756,12 +752,7 @@ static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu);
#ifdef CONFIG_KVM /* Avoid conflicts with core headers if CONFIG_KVM=n */
static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
{
return kvm_arch_vcpu_run_map_fp(vcpu);
}
#ifdef CONFIG_KVM
void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr);
void kvm_clr_pmu_events(u32 clr);

View File

@@ -90,7 +90,6 @@ void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu);
void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
void __sve_save_state(void *sve_pffr, u32 *fpsr);
void __sve_restore_state(void *sve_pffr, u32 *fpsr);
#ifndef __KVM_NVHE_HYPERVISOR__

View File

@@ -150,6 +150,8 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)
#include <asm/kvm_pgtable.h>
#include <asm/stage2_pgtable.h>
int kvm_share_hyp(void *from, void *to);
void kvm_unshare_hyp(void *from, void *to);
int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
void __iomem **kaddr,

View File

@@ -251,6 +251,27 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt);
int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
enum kvm_pgtable_prot prot);
/**
* kvm_pgtable_hyp_unmap() - Remove a mapping from a hypervisor stage-1 page-table.
* @pgt: Page-table structure initialised by kvm_pgtable_hyp_init().
* @addr: Virtual address from which to remove the mapping.
* @size: Size of the mapping.
*
* The offset of @addr within a page is ignored, @size is rounded-up to
* the next page boundary and @phys is rounded-down to the previous page
* boundary.
*
* TLB invalidation is performed for each page-table entry cleared during the
* unmapping operation and the reference count for the page-table page
* containing the cleared entry is decremented, with unreferenced pages being
* freed. The unmapping operation will stop early if it encounters either an
* invalid page-table entry or a valid block mapping which maps beyond the range
* being unmapped.
*
* Return: Number of bytes unmapped, which may be 0.
*/
u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
/**
* kvm_get_vtcr() - Helper to construct VTCR_EL2
* @mmfr0: Sanitized value of SYS_ID_AA64MMFR0_EL1 register.
@@ -270,8 +291,7 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
/**
* __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
* @pgt: Uninitialised page-table structure to initialise.
* @arch: Arch-specific KVM structure representing the guest virtual
* machine.
* @mmu: S2 MMU context for this S2 translation
* @mm_ops: Memory management callbacks.
* @flags: Stage-2 configuration flags.
* @force_pte_cb: Function that returns true if page level mappings must
@@ -279,13 +299,13 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
*
* Return: 0 on success, negative error code on failure.
*/
int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
struct kvm_pgtable_mm_ops *mm_ops,
enum kvm_pgtable_stage2_flags flags,
kvm_pgtable_force_pte_cb_t force_pte_cb);
#define kvm_pgtable_stage2_init(pgt, arch, mm_ops) \
__kvm_pgtable_stage2_init(pgt, arch, mm_ops, 0, NULL)
#define kvm_pgtable_stage2_init(pgt, mmu, mm_ops) \
__kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL)
/**
* kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.

View File

@@ -0,0 +1,71 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2020 - Google LLC
* Author: Quentin Perret <qperret@google.com>
*/
#ifndef __ARM64_KVM_PKVM_H__
#define __ARM64_KVM_PKVM_H__
#include <linux/memblock.h>
#include <asm/kvm_pgtable.h>
#define HYP_MEMBLOCK_REGIONS 128
extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
{
unsigned long total = 0, i;
/* Provision the worst case scenario */
for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) {
nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE);
total += nr_pages;
}
return total;
}
static inline unsigned long __hyp_pgtable_total_pages(void)
{
unsigned long res = 0, i;
/* Cover all of memory with page-granularity */
for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
struct memblock_region *reg = &kvm_nvhe_sym(hyp_memory)[i];
res += __hyp_pgtable_max_pages(reg->size >> PAGE_SHIFT);
}
return res;
}
static inline unsigned long hyp_s1_pgtable_pages(void)
{
unsigned long res;
res = __hyp_pgtable_total_pages();
/* Allow 1 GiB for private mappings */
res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
return res;
}
static inline unsigned long host_s2_pgtable_pages(void)
{
unsigned long res;
/*
* Include an extra 16 pages to safely upper-bound the worst case of
* concatenated pgds.
*/
res = __hyp_pgtable_total_pages() + 16;
/* Allow 1 GiB for MMIO mappings */
res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
return res;
}
#endif /* __ARM64_KVM_PKVM_H__ */

View File

@@ -15,6 +15,7 @@
#ifndef __ASSEMBLY__
#include <linux/refcount.h>
#include <asm/cpufeature.h>
typedef struct {
atomic64_t id;

View File

@@ -953,6 +953,7 @@
#define ID_AA64DFR0_PMUVER_8_1 0x4
#define ID_AA64DFR0_PMUVER_8_4 0x5
#define ID_AA64DFR0_PMUVER_8_5 0x6
#define ID_AA64DFR0_PMUVER_8_7 0x7
#define ID_AA64DFR0_PMUVER_IMP_DEF 0xf
#define ID_AA64DFR0_PMSVER_8_2 0x1

View File

@@ -111,7 +111,6 @@ int main(void)
#ifdef CONFIG_KVM
DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt));
DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1));
DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags));
DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs));
DEFINE(CPU_RGSR_EL1, offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1]));

View File

@@ -79,7 +79,11 @@
* indicate whether or not the userland FPSIMD state of the current task is
* present in the registers. The flag is set unless the FPSIMD registers of this
* CPU currently contain the most recent userland FPSIMD state of the current
* task.
* task. If the task is behaving as a VMM, then this is will be managed by
* KVM which will clear it to indicate that the vcpu FPSIMD state is currently
* loaded on the CPU, allowing the state to be saved if a FPSIMD-aware
* softirq kicks in. Upon vcpu_put(), KVM will save the vcpu FP state and
* flag the register state as invalid.
*
* In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may
* save the task's FPSIMD context back to task_struct from softirq context.

2
arch/arm64/kvm/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
hyp_constants.h

View File

@@ -40,6 +40,7 @@ menuconfig KVM
select HAVE_KVM_VCPU_RUN_PID_CHANGE
select SCHED_INFO
select GUEST_PERF_EVENTS if PERF_EVENTS
select INTERVAL_TREE
help
Support hosting virtualized guest machines.

View File

@@ -5,17 +5,15 @@
ccflags-y += -I $(srctree)/$(src)
KVM=../../../virt/kvm
include $(srctree)/virt/kvm/Makefile.kvm
obj-$(CONFIG_KVM) += kvm.o
obj-$(CONFIG_KVM) += hyp/
kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
$(KVM)/vfio.o $(KVM)/irqchip.o $(KVM)/binary_stats.o \
arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
inject_fault.o va_layout.o handle_exit.o \
guest.o debug.o reset.o sys_regs.o \
vgic-sys-reg-v3.o fpsimd.o pmu.o \
vgic-sys-reg-v3.o fpsimd.o pmu.o pkvm.o \
arch_timer.o trng.o\
vgic/vgic.o vgic/vgic-init.o \
vgic/vgic-irqfd.o vgic/vgic-v2.o \
@@ -25,3 +23,19 @@ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
vgic/vgic-its.o vgic/vgic-debug.o
kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o
always-y := hyp_constants.h hyp-constants.s
define rule_gen_hyp_constants
$(call filechk,offsets,__HYP_CONSTANTS_H__)
endef
CFLAGS_hyp-constants.o = -I $(srctree)/$(src)/hyp/include
$(obj)/hyp-constants.s: $(src)/hyp/hyp-constants.c FORCE
$(call if_changed_dep,cc_s_c)
$(obj)/hyp_constants.h: $(obj)/hyp-constants.s FORCE
$(call if_changed_rule,gen_hyp_constants)
obj-kvm := $(addprefix $(obj)/, $(kvm-y))
$(obj-kvm): $(obj)/hyp_constants.h

View File

@@ -467,7 +467,7 @@ out:
}
/*
* Schedule the background timer before calling kvm_vcpu_block, so that this
* Schedule the background timer before calling kvm_vcpu_halt, so that this
* thread is removed from its waitqueue and made runnable when there's a timer
* interrupt to handle.
*/
@@ -649,7 +649,6 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
struct timer_map map;
struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
if (unlikely(!timer->enabled))
return;
@@ -672,7 +671,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
if (map.emul_ptimer)
soft_timer_cancel(&map.emul_ptimer->hrtimer);
if (rcuwait_active(wait))
if (kvm_vcpu_is_blocking(vcpu))
kvm_timer_blocking(vcpu);
/*
@@ -750,7 +749,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
/* Make the updates of cntvoff for all vtimer contexts atomic */
static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff)
{
int i;
unsigned long i;
struct kvm *kvm = vcpu->kvm;
struct kvm_vcpu *tmp;
@@ -1189,8 +1188,8 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
{
int vtimer_irq, ptimer_irq;
int i, ret;
int vtimer_irq, ptimer_irq, ret;
unsigned long i;
vtimer_irq = vcpu_vtimer(vcpu)->irq.irq;
ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu));
@@ -1297,7 +1296,7 @@ void kvm_timer_init_vhe(void)
static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq)
{
struct kvm_vcpu *vcpu;
int i;
unsigned long i;
kvm_for_each_vcpu(i, vcpu, kvm) {
vcpu_vtimer(vcpu)->irq.irq = vtimer_irq;

View File

@@ -146,7 +146,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (ret)
return ret;
ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP);
ret = kvm_share_hyp(kvm, kvm + 1);
if (ret)
goto out_free_stage2_pgd;
@@ -175,19 +175,13 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
*/
void kvm_arch_destroy_vm(struct kvm *kvm)
{
int i;
bitmap_free(kvm->arch.pmu_filter);
kvm_vgic_destroy(kvm);
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
if (kvm->vcpus[i]) {
kvm_vcpu_destroy(kvm->vcpus[i]);
kvm->vcpus[i] = NULL;
}
}
atomic_set(&kvm->online_vcpus, 0);
kvm_destroy_vcpus(kvm);
kvm_unshare_hyp(kvm, kvm + 1);
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@ -342,7 +336,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
if (err)
return err;
return create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP);
return kvm_share_hyp(vcpu, vcpu + 1);
}
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
@@ -351,7 +345,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm)))
if (vcpu_has_run_once(vcpu) && unlikely(!irqchip_in_kernel(vcpu->kvm)))
static_branch_dec(&userspace_irqchip_in_use);
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
@@ -368,27 +362,12 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
{
/*
* If we're about to block (most likely because we've just hit a
* WFI), we need to sync back the state of the GIC CPU interface
* so that we have the latest PMR and group enables. This ensures
* that kvm_arch_vcpu_runnable has up-to-date data to decide
* whether we have pending interrupts.
*
* For the same reason, we want to tell GICv4 that we need
* doorbells to be signalled, should an interrupt become pending.
*/
preempt_disable();
kvm_vgic_vmcr_sync(vcpu);
vgic_v4_put(vcpu, true);
preempt_enable();
}
void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
{
preempt_disable();
vgic_v4_load(vcpu);
preempt_enable();
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -591,18 +570,33 @@ static void update_vmid(struct kvm_vmid *vmid)
spin_unlock(&kvm_vmid_lock);
}
static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
{
return vcpu->arch.target >= 0;
}
/*
* Handle both the initialisation that is being done when the vcpu is
* run for the first time, as well as the updates that must be
* performed each time we get a new thread dealing with this vcpu.
*/
int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
int ret = 0;
int ret;
if (likely(vcpu->arch.has_run_once))
return 0;
if (!kvm_vcpu_initialized(vcpu))
return -ENOEXEC;
if (!kvm_arm_vcpu_is_finalized(vcpu))
return -EPERM;
vcpu->arch.has_run_once = true;
ret = kvm_arch_vcpu_run_map_fp(vcpu);
if (ret)
return ret;
if (likely(vcpu_has_run_once(vcpu)))
return 0;
kvm_arm_vcpu_init_debug(vcpu);
@@ -614,12 +608,6 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
ret = kvm_vgic_map_resources(kvm);
if (ret)
return ret;
} else {
/*
* Tell the rest of the code that there are userspace irqchip
* VMs in the wild.
*/
static_branch_inc(&userspace_irqchip_in_use);
}
ret = kvm_timer_enable(vcpu);
@@ -627,6 +615,16 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
return ret;
ret = kvm_arm_pmu_v3_enable(vcpu);
if (ret)
return ret;
if (!irqchip_in_kernel(kvm)) {
/*
* Tell the rest of the code that there are userspace irqchip
* VMs in the wild.
*/
static_branch_inc(&userspace_irqchip_in_use);
}
/*
* Initialize traps for protected VMs.
@@ -646,7 +644,7 @@ bool kvm_arch_intc_initialized(struct kvm *kvm)
void kvm_arm_halt_guest(struct kvm *kvm)
{
int i;
unsigned long i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm)
@@ -656,12 +654,12 @@ void kvm_arm_halt_guest(struct kvm *kvm)
void kvm_arm_resume_guest(struct kvm *kvm)
{
int i;
unsigned long i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm) {
vcpu->arch.pause = false;
rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu));
__kvm_vcpu_wake_up(vcpu);
}
}
@@ -686,9 +684,37 @@ static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
smp_rmb();
}
static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
/**
* kvm_vcpu_wfi - emulate Wait-For-Interrupt behavior
* @vcpu: The VCPU pointer
*
* Suspend execution of a vCPU until a valid wake event is detected, i.e. until
* the vCPU is runnable. The vCPU may or may not be scheduled out, depending
* on when a wake event arrives, e.g. there may already be a pending wake event.
*/
void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
{
return vcpu->arch.target >= 0;
/*
* Sync back the state of the GIC CPU interface so that we have
* the latest PMR and group enables. This ensures that
* kvm_arch_vcpu_runnable has up-to-date data to decide whether
* we have pending interrupts, e.g. when determining if the
* vCPU should block.
*
* For the same reason, we want to tell GICv4 that we need
* doorbells to be signalled, should an interrupt become pending.
*/
preempt_disable();
kvm_vgic_vmcr_sync(vcpu);
vgic_v4_put(vcpu, true);
preempt_enable();
kvm_vcpu_halt(vcpu);
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
preempt_disable();
vgic_v4_load(vcpu);
preempt_enable();
}
static void check_vcpu_requests(struct kvm_vcpu *vcpu)
@@ -786,13 +812,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
struct kvm_run *run = vcpu->run;
int ret;
if (unlikely(!kvm_vcpu_initialized(vcpu)))
return -ENOEXEC;
ret = kvm_vcpu_first_run_init(vcpu);
if (ret)
return ret;
if (run->exit_reason == KVM_EXIT_MMIO) {
ret = kvm_handle_mmio_return(vcpu);
if (ret)
@@ -856,6 +875,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
}
kvm_arm_setup_debug(vcpu);
kvm_arch_vcpu_ctxflush_fp(vcpu);
/**************************************************************
* Enter the guest
@@ -1130,7 +1150,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
* need to invalidate the I-cache though, as FWB does *not*
* imply CTR_EL0.DIC.
*/
if (vcpu->arch.has_run_once) {
if (vcpu_has_run_once(vcpu)) {
if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
stage2_unmap_vm(vcpu->kvm);
else
@@ -2043,7 +2063,7 @@ static int finalize_hyp_mode(void)
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
{
struct kvm_vcpu *vcpu;
int i;
unsigned long i;
mpidr &= MPIDR_HWID_BITMASK;
kvm_for_each_vcpu(i, vcpu, kvm) {

View File

@@ -7,7 +7,6 @@
*/
#include <linux/irqflags.h>
#include <linux/sched.h>
#include <linux/thread_info.h>
#include <linux/kvm_host.h>
#include <asm/fpsimd.h>
#include <asm/kvm_asm.h>
@@ -15,6 +14,19 @@
#include <asm/kvm_mmu.h>
#include <asm/sysreg.h>
void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu)
{
struct task_struct *p = vcpu->arch.parent_task;
struct user_fpsimd_state *fpsimd;
if (!is_protected_kvm_enabled() || !p)
return;
fpsimd = &p->thread.uw.fpsimd_state;
kvm_unshare_hyp(fpsimd, fpsimd + 1);
put_task_struct(p);
}
/*
* Called on entry to KVM_RUN unless this vcpu previously ran at least
* once and the most recent prior KVM_RUN for this vcpu was called from
@@ -28,36 +40,29 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu)
{
int ret;
struct thread_info *ti = &current->thread_info;
struct user_fpsimd_state *fpsimd = &current->thread.uw.fpsimd_state;
kvm_vcpu_unshare_task_fp(vcpu);
/* Make sure the host task fpsimd state is visible to hyp: */
ret = kvm_share_hyp(fpsimd, fpsimd + 1);
if (ret)
return ret;
vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd);
/*
* Make sure the host task thread flags and fpsimd state are
* visible to hyp:
* We need to keep current's task_struct pinned until its data has been
* unshared with the hypervisor to make sure it is not re-used by the
* kernel and donated to someone else while already shared -- see
* kvm_vcpu_unshare_task_fp() for the matching put_task_struct().
*/
ret = create_hyp_mappings(ti, ti + 1, PAGE_HYP);
if (ret)
goto error;
ret = create_hyp_mappings(fpsimd, fpsimd + 1, PAGE_HYP);
if (ret)
goto error;
if (vcpu->arch.sve_state) {
void *sve_end;
sve_end = vcpu->arch.sve_state + vcpu_sve_state_size(vcpu);
ret = create_hyp_mappings(vcpu->arch.sve_state, sve_end,
PAGE_HYP);
if (ret)
goto error;
if (is_protected_kvm_enabled()) {
get_task_struct(current);
vcpu->arch.parent_task = current;
}
vcpu->arch.host_thread_info = kern_hyp_va(ti);
vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd);
error:
return ret;
return 0;
}
/*
@@ -66,26 +71,27 @@ error:
*
* Here, we just set the correct metadata to indicate that the FPSIMD
* state in the cpu regs (if any) belongs to current on the host.
*
* TIF_SVE is backed up here, since it may get clobbered with guest state.
* This flag is restored by kvm_arch_vcpu_put_fp(vcpu).
*/
void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
{
BUG_ON(!current->mm);
BUG_ON(test_thread_flag(TIF_SVE));
vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
KVM_ARM64_HOST_SVE_IN_USE |
KVM_ARM64_HOST_SVE_ENABLED);
vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED;
vcpu->arch.flags |= KVM_ARM64_FP_HOST;
if (test_thread_flag(TIF_SVE))
vcpu->arch.flags |= KVM_ARM64_HOST_SVE_IN_USE;
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED;
}
void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu)
{
if (test_thread_flag(TIF_FOREIGN_FPSTATE))
vcpu->arch.flags |= KVM_ARM64_FP_FOREIGN_FPSTATE;
else
vcpu->arch.flags &= ~KVM_ARM64_FP_FOREIGN_FPSTATE;
}
/*
* If the guest FPSIMD state was loaded, update the host's context
* tracking data mark the CPU FPSIMD regs as dirty and belonging to vcpu
@@ -115,13 +121,11 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
{
unsigned long flags;
bool host_has_sve = system_supports_sve();
bool guest_has_sve = vcpu_has_sve(vcpu);
local_irq_save(flags);
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
if (guest_has_sve) {
if (vcpu_has_sve(vcpu)) {
__vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
/* Restore the VL that was saved when bound to the CPU */
@@ -131,7 +135,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
}
fpsimd_save_and_flush_cpu_state();
} else if (has_vhe() && host_has_sve) {
} else if (has_vhe() && system_supports_sve()) {
/*
* The FPSIMD/SVE state in the CPU has not been touched, and we
* have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been
@@ -145,8 +149,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0);
}
update_thread_flag(TIF_SVE,
vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE);
update_thread_flag(TIF_SVE, 0);
local_irq_restore(flags);
}

View File

@@ -82,7 +82,7 @@ static int handle_no_fpsimd(struct kvm_vcpu *vcpu)
*
* WFE: Yield the CPU and come back to this vcpu when the scheduler
* decides to.
* WFI: Simply call kvm_vcpu_block(), which will halt execution of
* WFI: Simply call kvm_vcpu_halt(), which will halt execution of
* world-switches and schedule other host processes until there is an
* incoming IRQ or FIQ to the VM.
*/
@@ -95,8 +95,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
} else {
trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
vcpu->stat.wfi_exit_stat++;
kvm_vcpu_block(vcpu);
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
kvm_vcpu_wfi(vcpu);
}
kvm_incr_pc(vcpu);

View File

@@ -10,4 +10,4 @@ subdir-ccflags-y := -I$(incdir) \
-DDISABLE_BRANCH_PROFILING \
$(DISABLE_STACKLEAK_PLUGIN)
obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o reserved_mem.o
obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o

View File

@@ -25,9 +25,3 @@ SYM_FUNC_START(__sve_restore_state)
sve_load 0, x1, x2, 3
ret
SYM_FUNC_END(__sve_restore_state)
SYM_FUNC_START(__sve_save_state)
mov x2, #1
sve_save 0, x1, x2, 3
ret
SYM_FUNC_END(__sve_save_state)

View File

@@ -0,0 +1,10 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/kbuild.h>
#include <nvhe/memory.h>
int main(void)
{
DEFINE(STRUCT_HYP_PAGE_SIZE, sizeof(struct hyp_page));
return 0;
}

View File

@@ -29,7 +29,6 @@
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
#include <asm/processor.h>
#include <asm/thread_info.h>
struct kvm_exception_table_entry {
int insn, fixup;
@@ -49,7 +48,7 @@ static inline bool update_fp_enabled(struct kvm_vcpu *vcpu)
* trap the accesses.
*/
if (!system_supports_fpsimd() ||
vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE)
vcpu->arch.flags & KVM_ARM64_FP_FOREIGN_FPSTATE)
vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
KVM_ARM64_FP_HOST);
@@ -143,16 +142,6 @@ static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault);
}
static inline void __hyp_sve_save_host(struct kvm_vcpu *vcpu)
{
struct thread_struct *thread;
thread = container_of(vcpu->arch.host_fpsimd_state, struct thread_struct,
uw.fpsimd_state);
__sve_save_state(sve_pffr(thread), &vcpu->arch.host_fpsimd_state->fpsr);
}
static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
{
sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
@@ -169,21 +158,14 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
*/
static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
{
bool sve_guest, sve_host;
bool sve_guest;
u8 esr_ec;
u64 reg;
if (!system_supports_fpsimd())
return false;
if (system_supports_sve()) {
sve_guest = vcpu_has_sve(vcpu);
sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE;
} else {
sve_guest = false;
sve_host = false;
}
sve_guest = vcpu_has_sve(vcpu);
esr_ec = kvm_vcpu_trap_get_class(vcpu);
/* Don't handle SVE traps for non-SVE vcpus here: */
@@ -207,11 +189,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
isb();
if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
if (sve_host)
__hyp_sve_save_host(vcpu);
else
__fpsimd_save_state(vcpu->arch.host_fpsimd_state);
__fpsimd_save_state(vcpu->arch.host_fpsimd_state);
vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
}

View File

@@ -24,6 +24,11 @@ enum pkvm_page_state {
PKVM_PAGE_OWNED = 0ULL,
PKVM_PAGE_SHARED_OWNED = KVM_PGTABLE_PROT_SW0,
PKVM_PAGE_SHARED_BORROWED = KVM_PGTABLE_PROT_SW1,
__PKVM_PAGE_RESERVED = KVM_PGTABLE_PROT_SW0 |
KVM_PGTABLE_PROT_SW1,
/* Meta-states which aren't encoded directly in the PTE's SW bits */
PKVM_NOPAGE,
};
#define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1)
@@ -50,6 +55,7 @@ extern const u8 pkvm_hyp_id;
int __pkvm_prot_finalize(void);
int __pkvm_host_share_hyp(u64 pfn);
int __pkvm_host_unshare_hyp(u64 pfn);
bool addr_is_memory(phys_addr_t phys);
int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);

View File

@@ -10,13 +10,8 @@
#include <nvhe/memory.h>
#include <nvhe/spinlock.h>
#define HYP_MEMBLOCK_REGIONS 128
extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
extern struct kvm_pgtable pkvm_pgtable;
extern hyp_spinlock_t pkvm_pgd_lock;
extern struct hyp_pool hpool;
extern u64 __io_map_base;
int hyp_create_idmap(u32 hyp_va_bits);
int hyp_map_vectors(void);
@@ -39,58 +34,4 @@ static inline void hyp_vmemmap_range(phys_addr_t phys, unsigned long size,
*end = ALIGN(*end, PAGE_SIZE);
}
static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
{
unsigned long total = 0, i;
/* Provision the worst case scenario */
for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) {
nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE);
total += nr_pages;
}
return total;
}
static inline unsigned long __hyp_pgtable_total_pages(void)
{
unsigned long res = 0, i;
/* Cover all of memory with page-granularity */
for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
struct memblock_region *reg = &kvm_nvhe_sym(hyp_memory)[i];
res += __hyp_pgtable_max_pages(reg->size >> PAGE_SHIFT);
}
return res;
}
static inline unsigned long hyp_s1_pgtable_pages(void)
{
unsigned long res;
res = __hyp_pgtable_total_pages();
/* Allow 1 GiB for private mappings */
res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
return res;
}
static inline unsigned long host_s2_pgtable_pages(void)
{
unsigned long res;
/*
* Include an extra 16 pages to safely upper-bound the worst case of
* concatenated pgds.
*/
res = __hyp_pgtable_total_pages() + 16;
/* Allow 1 GiB for MMIO mappings */
res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
return res;
}
#endif /* __KVM_HYP_MM_H */

View File

@@ -43,6 +43,9 @@ void *hyp_early_alloc_page(void *arg)
return hyp_early_alloc_contig(1);
}
static void hyp_early_alloc_get_page(void *addr) { }
static void hyp_early_alloc_put_page(void *addr) { }
void hyp_early_alloc_init(void *virt, unsigned long size)
{
base = cur = (unsigned long)virt;
@@ -51,4 +54,6 @@ void hyp_early_alloc_init(void *virt, unsigned long size)
hyp_early_alloc_mm_ops.zalloc_page = hyp_early_alloc_page;
hyp_early_alloc_mm_ops.phys_to_virt = hyp_phys_to_virt;
hyp_early_alloc_mm_ops.virt_to_phys = hyp_virt_to_phys;
hyp_early_alloc_mm_ops.get_page = hyp_early_alloc_get_page;
hyp_early_alloc_mm_ops.put_page = hyp_early_alloc_put_page;
}

View File

@@ -147,6 +147,13 @@ static void handle___pkvm_host_share_hyp(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __pkvm_host_share_hyp(pfn);
}
static void handle___pkvm_host_unshare_hyp(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(u64, pfn, host_ctxt, 1);
cpu_reg(host_ctxt, 1) = __pkvm_host_unshare_hyp(pfn);
}
static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(phys_addr_t, phys, host_ctxt, 1);
@@ -184,6 +191,7 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__pkvm_prot_finalize),
HANDLE_FUNC(__pkvm_host_share_hyp),
HANDLE_FUNC(__pkvm_host_unshare_hyp),
HANDLE_FUNC(__kvm_adjust_pc),
HANDLE_FUNC(__kvm_vcpu_run),
HANDLE_FUNC(__kvm_flush_vm_context),

View File

@@ -9,6 +9,7 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_pgtable.h>
#include <asm/kvm_pkvm.h>
#include <asm/stage2_pgtable.h>
#include <hyp/fault.h>
@@ -27,6 +28,26 @@ static struct hyp_pool host_s2_pool;
const u8 pkvm_hyp_id = 1;
static void host_lock_component(void)
{
hyp_spin_lock(&host_kvm.lock);
}
static void host_unlock_component(void)
{
hyp_spin_unlock(&host_kvm.lock);
}
static void hyp_lock_component(void)
{
hyp_spin_lock(&pkvm_pgd_lock);
}
static void hyp_unlock_component(void)
{
hyp_spin_unlock(&pkvm_pgd_lock);
}
static void *host_s2_zalloc_pages_exact(size_t size)
{
void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size));
@@ -103,19 +124,19 @@ int kvm_host_prepare_stage2(void *pgt_pool_base)
prepare_host_vtcr();
hyp_spin_lock_init(&host_kvm.lock);
mmu->arch = &host_kvm.arch;
ret = prepare_s2_pool(pgt_pool_base);
if (ret)
return ret;
ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, &host_kvm.arch,
ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, mmu,
&host_kvm.mm_ops, KVM_HOST_S2_FLAGS,
host_stage2_force_pte_cb);
if (ret)
return ret;
mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd);
mmu->arch = &host_kvm.arch;
mmu->pgt = &host_kvm.pgt;
WRITE_ONCE(mmu->vmid.vmid_gen, 0);
WRITE_ONCE(mmu->vmid.vmid, 0);
@@ -338,102 +359,14 @@ static int host_stage2_idmap(u64 addr)
prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT;
hyp_spin_lock(&host_kvm.lock);
host_lock_component();
ret = host_stage2_adjust_range(addr, &range);
if (ret)
goto unlock;
ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot);
unlock:
hyp_spin_unlock(&host_kvm.lock);
return ret;
}
static inline bool check_prot(enum kvm_pgtable_prot prot,
enum kvm_pgtable_prot required,
enum kvm_pgtable_prot denied)
{
return (prot & (required | denied)) == required;
}
int __pkvm_host_share_hyp(u64 pfn)
{
phys_addr_t addr = hyp_pfn_to_phys(pfn);
enum kvm_pgtable_prot prot, cur;
void *virt = __hyp_va(addr);
enum pkvm_page_state state;
kvm_pte_t pte;
int ret;
if (!addr_is_memory(addr))
return -EINVAL;
hyp_spin_lock(&host_kvm.lock);
hyp_spin_lock(&pkvm_pgd_lock);
ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, NULL);
if (ret)
goto unlock;
if (!pte)
goto map_shared;
/*
* Check attributes in the host stage-2 PTE. We need the page to be:
* - mapped RWX as we're sharing memory;
* - not borrowed, as that implies absence of ownership.
* Otherwise, we can't let it got through
*/
cur = kvm_pgtable_stage2_pte_prot(pte);
prot = pkvm_mkstate(0, PKVM_PAGE_SHARED_BORROWED);
if (!check_prot(cur, PKVM_HOST_MEM_PROT, prot)) {
ret = -EPERM;
goto unlock;
}
state = pkvm_getstate(cur);
if (state == PKVM_PAGE_OWNED)
goto map_shared;
/*
* Tolerate double-sharing the same page, but this requires
* cross-checking the hypervisor stage-1.
*/
if (state != PKVM_PAGE_SHARED_OWNED) {
ret = -EPERM;
goto unlock;
}
ret = kvm_pgtable_get_leaf(&pkvm_pgtable, (u64)virt, &pte, NULL);
if (ret)
goto unlock;
/*
* If the page has been shared with the hypervisor, it must be
* already mapped as SHARED_BORROWED in its stage-1.
*/
cur = kvm_pgtable_hyp_pte_prot(pte);
prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
if (!check_prot(cur, prot, ~prot))
ret = -EPERM;
goto unlock;
map_shared:
/*
* If the page is not yet shared, adjust mappings in both page-tables
* while both locks are held.
*/
prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
ret = pkvm_create_mappings_locked(virt, virt + PAGE_SIZE, prot);
BUG_ON(ret);
prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED);
ret = host_stage2_idmap_locked(addr, PAGE_SIZE, prot);
BUG_ON(ret);
unlock:
hyp_spin_unlock(&pkvm_pgd_lock);
hyp_spin_unlock(&host_kvm.lock);
host_unlock_component();
return ret;
}
@@ -451,3 +384,421 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
ret = host_stage2_idmap(addr);
BUG_ON(ret && ret != -EAGAIN);
}
/* This corresponds to locking order */
enum pkvm_component_id {
PKVM_ID_HOST,
PKVM_ID_HYP,
};
struct pkvm_mem_transition {
u64 nr_pages;
struct {
enum pkvm_component_id id;
/* Address in the initiator's address space */
u64 addr;
union {
struct {
/* Address in the completer's address space */
u64 completer_addr;
} host;
};
} initiator;
struct {
enum pkvm_component_id id;
} completer;
};
struct pkvm_mem_share {
const struct pkvm_mem_transition tx;
const enum kvm_pgtable_prot completer_prot;
};
struct check_walk_data {
enum pkvm_page_state desired;
enum pkvm_page_state (*get_page_state)(kvm_pte_t pte);
};
static int __check_page_state_visitor(u64 addr, u64 end, u32 level,
kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag,
void * const arg)
{
struct check_walk_data *d = arg;
kvm_pte_t pte = *ptep;
if (kvm_pte_valid(pte) && !addr_is_memory(kvm_pte_to_phys(pte)))
return -EINVAL;
return d->get_page_state(pte) == d->desired ? 0 : -EPERM;
}
static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
struct check_walk_data *data)
{
struct kvm_pgtable_walker walker = {
.cb = __check_page_state_visitor,
.arg = data,
.flags = KVM_PGTABLE_WALK_LEAF,
};
return kvm_pgtable_walk(pgt, addr, size, &walker);
}
static enum pkvm_page_state host_get_page_state(kvm_pte_t pte)
{
if (!kvm_pte_valid(pte) && pte)
return PKVM_NOPAGE;
return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
}
static int __host_check_page_state_range(u64 addr, u64 size,
enum pkvm_page_state state)
{
struct check_walk_data d = {
.desired = state,
.get_page_state = host_get_page_state,
};
hyp_assert_lock_held(&host_kvm.lock);
return check_page_state_range(&host_kvm.pgt, addr, size, &d);
}
static int __host_set_page_state_range(u64 addr, u64 size,
enum pkvm_page_state state)
{
enum kvm_pgtable_prot prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, state);
return host_stage2_idmap_locked(addr, size, prot);
}
static int host_request_owned_transition(u64 *completer_addr,
const struct pkvm_mem_transition *tx)
{
u64 size = tx->nr_pages * PAGE_SIZE;
u64 addr = tx->initiator.addr;
*completer_addr = tx->initiator.host.completer_addr;
return __host_check_page_state_range(addr, size, PKVM_PAGE_OWNED);
}
static int host_request_unshare(u64 *completer_addr,
const struct pkvm_mem_transition *tx)
{
u64 size = tx->nr_pages * PAGE_SIZE;
u64 addr = tx->initiator.addr;
*completer_addr = tx->initiator.host.completer_addr;
return __host_check_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED);
}
static int host_initiate_share(u64 *completer_addr,
const struct pkvm_mem_transition *tx)
{
u64 size = tx->nr_pages * PAGE_SIZE;
u64 addr = tx->initiator.addr;
*completer_addr = tx->initiator.host.completer_addr;
return __host_set_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED);
}
static int host_initiate_unshare(u64 *completer_addr,
const struct pkvm_mem_transition *tx)
{
u64 size = tx->nr_pages * PAGE_SIZE;
u64 addr = tx->initiator.addr;
*completer_addr = tx->initiator.host.completer_addr;
return __host_set_page_state_range(addr, size, PKVM_PAGE_OWNED);
}
static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte)
{
if (!kvm_pte_valid(pte))
return PKVM_NOPAGE;
return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
}
static int __hyp_check_page_state_range(u64 addr, u64 size,
enum pkvm_page_state state)
{
struct check_walk_data d = {
.desired = state,
.get_page_state = hyp_get_page_state,
};
hyp_assert_lock_held(&pkvm_pgd_lock);
return check_page_state_range(&pkvm_pgtable, addr, size, &d);
}
static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx)
{
return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) ||
tx->initiator.id != PKVM_ID_HOST);
}
static int hyp_ack_share(u64 addr, const struct pkvm_mem_transition *tx,
enum kvm_pgtable_prot perms)
{
u64 size = tx->nr_pages * PAGE_SIZE;
if (perms != PAGE_HYP)
return -EPERM;
if (__hyp_ack_skip_pgtable_check(tx))
return 0;
return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE);
}
static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx)
{
u64 size = tx->nr_pages * PAGE_SIZE;
if (__hyp_ack_skip_pgtable_check(tx))
return 0;
return __hyp_check_page_state_range(addr, size,
PKVM_PAGE_SHARED_BORROWED);
}
static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx,
enum kvm_pgtable_prot perms)
{
void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE);
enum kvm_pgtable_prot prot;
prot = pkvm_mkstate(perms, PKVM_PAGE_SHARED_BORROWED);
return pkvm_create_mappings_locked(start, end, prot);
}
static int hyp_complete_unshare(u64 addr, const struct pkvm_mem_transition *tx)
{
u64 size = tx->nr_pages * PAGE_SIZE;
int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, addr, size);
return (ret != size) ? -EFAULT : 0;
}
static int check_share(struct pkvm_mem_share *share)
{
const struct pkvm_mem_transition *tx = &share->tx;
u64 completer_addr;
int ret;
switch (tx->initiator.id) {
case PKVM_ID_HOST:
ret = host_request_owned_transition(&completer_addr, tx);
break;
default:
ret = -EINVAL;
}
if (ret)
return ret;
switch (tx->completer.id) {
case PKVM_ID_HYP:
ret = hyp_ack_share(completer_addr, tx, share->completer_prot);
break;
default:
ret = -EINVAL;
}
return ret;
}
static int __do_share(struct pkvm_mem_share *share)
{
const struct pkvm_mem_transition *tx = &share->tx;
u64 completer_addr;
int ret;
switch (tx->initiator.id) {
case PKVM_ID_HOST:
ret = host_initiate_share(&completer_addr, tx);
break;
default:
ret = -EINVAL;
}
if (ret)
return ret;
switch (tx->completer.id) {
case PKVM_ID_HYP:
ret = hyp_complete_share(completer_addr, tx, share->completer_prot);
break;
default:
ret = -EINVAL;
}
return ret;
}
/*
* do_share():
*
* The page owner grants access to another component with a given set
* of permissions.
*
* Initiator: OWNED => SHARED_OWNED
* Completer: NOPAGE => SHARED_BORROWED
*/
static int do_share(struct pkvm_mem_share *share)
{
int ret;
ret = check_share(share);
if (ret)
return ret;
return WARN_ON(__do_share(share));
}
static int check_unshare(struct pkvm_mem_share *share)
{
const struct pkvm_mem_transition *tx = &share->tx;
u64 completer_addr;
int ret;
switch (tx->initiator.id) {
case PKVM_ID_HOST:
ret = host_request_unshare(&completer_addr, tx);
break;
default:
ret = -EINVAL;
}
if (ret)
return ret;
switch (tx->completer.id) {
case PKVM_ID_HYP:
ret = hyp_ack_unshare(completer_addr, tx);
break;
default:
ret = -EINVAL;
}
return ret;
}
static int __do_unshare(struct pkvm_mem_share *share)
{
const struct pkvm_mem_transition *tx = &share->tx;
u64 completer_addr;
int ret;
switch (tx->initiator.id) {
case PKVM_ID_HOST:
ret = host_initiate_unshare(&completer_addr, tx);
break;
default:
ret = -EINVAL;
}
if (ret)
return ret;
switch (tx->completer.id) {
case PKVM_ID_HYP:
ret = hyp_complete_unshare(completer_addr, tx);
break;
default:
ret = -EINVAL;
}
return ret;
}
/*
* do_unshare():
*
* The page owner revokes access from another component for a range of
* pages which were previously shared using do_share().
*
* Initiator: SHARED_OWNED => OWNED
* Completer: SHARED_BORROWED => NOPAGE
*/
static int do_unshare(struct pkvm_mem_share *share)
{
int ret;
ret = check_unshare(share);
if (ret)
return ret;
return WARN_ON(__do_unshare(share));
}
int __pkvm_host_share_hyp(u64 pfn)
{
int ret;
u64 host_addr = hyp_pfn_to_phys(pfn);
u64 hyp_addr = (u64)__hyp_va(host_addr);
struct pkvm_mem_share share = {
.tx = {
.nr_pages = 1,
.initiator = {
.id = PKVM_ID_HOST,
.addr = host_addr,
.host = {
.completer_addr = hyp_addr,
},
},
.completer = {
.id = PKVM_ID_HYP,
},
},
.completer_prot = PAGE_HYP,
};
host_lock_component();
hyp_lock_component();
ret = do_share(&share);
hyp_unlock_component();
host_unlock_component();
return ret;
}
int __pkvm_host_unshare_hyp(u64 pfn)
{
int ret;
u64 host_addr = hyp_pfn_to_phys(pfn);
u64 hyp_addr = (u64)__hyp_va(host_addr);
struct pkvm_mem_share share = {
.tx = {
.nr_pages = 1,
.initiator = {
.id = PKVM_ID_HOST,
.addr = host_addr,
.host = {
.completer_addr = hyp_addr,
},
},
.completer = {
.id = PKVM_ID_HYP,
},
},
.completer_prot = PAGE_HYP,
};
host_lock_component();
hyp_lock_component();
ret = do_unshare(&share);
hyp_unlock_component();
host_unlock_component();
return ret;
}

View File

@@ -8,6 +8,7 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_pgtable.h>
#include <asm/kvm_pkvm.h>
#include <asm/spectre.h>
#include <nvhe/early_alloc.h>
@@ -18,11 +19,12 @@
struct kvm_pgtable pkvm_pgtable;
hyp_spinlock_t pkvm_pgd_lock;
u64 __io_map_base;
struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS];
unsigned int hyp_memblock_nr;
static u64 __io_map_base;
static int __pkvm_create_mappings(unsigned long start, unsigned long size,
unsigned long phys, enum kvm_pgtable_prot prot)
{

View File

@@ -241,7 +241,7 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
int i;
hyp_spin_lock_init(&pool->lock);
pool->max_order = min(MAX_ORDER, get_order(nr_pages << PAGE_SHIFT));
pool->max_order = min(MAX_ORDER, get_order((nr_pages + 1) << PAGE_SHIFT));
for (i = 0; i < pool->max_order; i++)
INIT_LIST_HEAD(&pool->free_area[i]);
pool->range_start = phys;

View File

@@ -8,6 +8,7 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_pgtable.h>
#include <asm/kvm_pkvm.h>
#include <nvhe/early_alloc.h>
#include <nvhe/fixed_config.h>
@@ -17,7 +18,6 @@
#include <nvhe/mm.h>
#include <nvhe/trap_handler.h>
struct hyp_pool hpool;
unsigned long hyp_nr_cpus;
#define hyp_percpu_size ((unsigned long)__per_cpu_end - \
@@ -27,6 +27,7 @@ static void *vmemmap_base;
static void *hyp_pgt_base;
static void *host_s2_pgt_base;
static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops;
static struct hyp_pool hpool;
static int divide_memory_pool(void *virt, unsigned long size)
{
@@ -165,6 +166,7 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
enum kvm_pgtable_walk_flags flag,
void * const arg)
{
struct kvm_pgtable_mm_ops *mm_ops = arg;
enum kvm_pgtable_prot prot;
enum pkvm_page_state state;
kvm_pte_t pte = *ptep;
@@ -173,6 +175,15 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
if (!kvm_pte_valid(pte))
return 0;
/*
* Fix-up the refcount for the page-table pages as the early allocator
* was unable to access the hyp_vmemmap and so the buddy allocator has
* initialised the refcount to '1'.
*/
mm_ops->get_page(ptep);
if (flag != KVM_PGTABLE_WALK_LEAF)
return 0;
if (level != (KVM_PGTABLE_MAX_LEVELS - 1))
return -EINVAL;
@@ -205,7 +216,8 @@ static int finalize_host_mappings(void)
{
struct kvm_pgtable_walker walker = {
.cb = finalize_host_mappings_walker,
.flags = KVM_PGTABLE_WALK_LEAF,
.flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
.arg = pkvm_pgtable.mm_ops,
};
int i, ret;
@@ -240,19 +252,20 @@ void __noreturn __pkvm_init_finalise(void)
if (ret)
goto out;
ret = finalize_host_mappings();
if (ret)
goto out;
pkvm_pgtable_mm_ops = (struct kvm_pgtable_mm_ops) {
.zalloc_page = hyp_zalloc_hyp_page,
.phys_to_virt = hyp_phys_to_virt,
.virt_to_phys = hyp_virt_to_phys,
.get_page = hpool_get_page,
.put_page = hpool_put_page,
.page_count = hyp_page_count,
};
pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops;
ret = finalize_host_mappings();
if (ret)
goto out;
out:
/*
* We tail-called to here from handle___pkvm_init() and will not return,

View File

@@ -25,7 +25,6 @@
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
#include <asm/processor.h>
#include <asm/thread_info.h>
#include <nvhe/fixed_config.h>
#include <nvhe/mem_protect.h>

View File

@@ -383,21 +383,6 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)
return prot;
}
static bool hyp_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
{
/*
* Tolerate KVM recreating the exact same mapping, or changing software
* bits if the existing mapping was valid.
*/
if (old == new)
return false;
if (!kvm_pte_valid(old))
return true;
return !WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW);
}
static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
kvm_pte_t *ptep, struct hyp_map_data *data)
{
@@ -407,11 +392,16 @@ static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
if (!kvm_block_mapping_supported(addr, end, phys, level))
return false;
new = kvm_init_valid_leaf_pte(phys, data->attr, level);
if (hyp_pte_needs_update(old, new))
smp_store_release(ptep, new);
data->phys += granule;
new = kvm_init_valid_leaf_pte(phys, data->attr, level);
if (old == new)
return true;
if (!kvm_pte_valid(old))
data->mm_ops->get_page(ptep);
else if (WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW))
return false;
smp_store_release(ptep, new);
return true;
}
@@ -433,6 +423,7 @@ static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
return -ENOMEM;
kvm_set_table_pte(ptep, childp, mm_ops);
mm_ops->get_page(ptep);
return 0;
}
@@ -460,6 +451,69 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
return ret;
}
struct hyp_unmap_data {
u64 unmapped;
struct kvm_pgtable_mm_ops *mm_ops;
};
static int hyp_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag, void * const arg)
{
kvm_pte_t pte = *ptep, *childp = NULL;
u64 granule = kvm_granule_size(level);
struct hyp_unmap_data *data = arg;
struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
if (!kvm_pte_valid(pte))
return -EINVAL;
if (kvm_pte_table(pte, level)) {
childp = kvm_pte_follow(pte, mm_ops);
if (mm_ops->page_count(childp) != 1)
return 0;
kvm_clear_pte(ptep);
dsb(ishst);
__tlbi_level(vae2is, __TLBI_VADDR(addr, 0), level);
} else {
if (end - addr < granule)
return -EINVAL;
kvm_clear_pte(ptep);
dsb(ishst);
__tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level);
data->unmapped += granule;
}
dsb(ish);
isb();
mm_ops->put_page(ptep);
if (childp)
mm_ops->put_page(childp);
return 0;
}
u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
{
struct hyp_unmap_data unmap_data = {
.mm_ops = pgt->mm_ops,
};
struct kvm_pgtable_walker walker = {
.cb = hyp_unmap_walker,
.arg = &unmap_data,
.flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
};
if (!pgt->mm_ops->page_count)
return 0;
kvm_pgtable_walk(pgt, addr, size, &walker);
return unmap_data.unmapped;
}
int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
struct kvm_pgtable_mm_ops *mm_ops)
{
@@ -482,8 +536,16 @@ static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag, void * const arg)
{
struct kvm_pgtable_mm_ops *mm_ops = arg;
kvm_pte_t pte = *ptep;
if (!kvm_pte_valid(pte))
return 0;
mm_ops->put_page(ptep);
if (kvm_pte_table(pte, level))
mm_ops->put_page(kvm_pte_follow(pte, mm_ops));
mm_ops->put_page((void *)kvm_pte_follow(*ptep, mm_ops));
return 0;
}
@@ -491,7 +553,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
{
struct kvm_pgtable_walker walker = {
.cb = hyp_free_walker,
.flags = KVM_PGTABLE_WALK_TABLE_POST,
.flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
.arg = pgt->mm_ops,
};
@@ -1116,13 +1178,13 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
}
int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
struct kvm_pgtable_mm_ops *mm_ops,
enum kvm_pgtable_stage2_flags flags,
kvm_pgtable_force_pte_cb_t force_pte_cb)
{
size_t pgd_sz;
u64 vtcr = arch->vtcr;
u64 vtcr = mmu->arch->vtcr;
u32 ia_bits = VTCR_EL2_IPA(vtcr);
u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
@@ -1135,7 +1197,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
pgt->ia_bits = ia_bits;
pgt->start_level = start_level;
pgt->mm_ops = mm_ops;
pgt->mmu = &arch->mmu;
pgt->mmu = mmu;
pgt->flags = flags;
pgt->force_pte_cb = force_pte_cb;

View File

@@ -24,7 +24,6 @@
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
#include <asm/processor.h>
#include <asm/thread_info.h>
/* VHE specific context */
DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);

View File

@@ -210,13 +210,13 @@ static void stage2_flush_vm(struct kvm *kvm)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
int idx;
int idx, bkt;
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, slots)
kvm_for_each_memslot(memslot, bkt, slots)
stage2_flush_memslot(kvm, memslot);
spin_unlock(&kvm->mmu_lock);
@@ -239,6 +239,9 @@ void free_hyp_pgds(void)
static bool kvm_host_owns_hyp_mappings(void)
{
if (is_kernel_in_hyp_mode())
return false;
if (static_branch_likely(&kvm_protected_mode_initialized))
return false;
@@ -281,14 +284,117 @@ static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
}
}
static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end)
struct hyp_shared_pfn {
u64 pfn;
int count;
struct rb_node node;
};
static DEFINE_MUTEX(hyp_shared_pfns_lock);
static struct rb_root hyp_shared_pfns = RB_ROOT;
static struct hyp_shared_pfn *find_shared_pfn(u64 pfn, struct rb_node ***node,
struct rb_node **parent)
{
phys_addr_t addr;
struct hyp_shared_pfn *this;
*node = &hyp_shared_pfns.rb_node;
*parent = NULL;
while (**node) {
this = container_of(**node, struct hyp_shared_pfn, node);
*parent = **node;
if (this->pfn < pfn)
*node = &((**node)->rb_left);
else if (this->pfn > pfn)
*node = &((**node)->rb_right);
else
return this;
}
return NULL;
}
static int share_pfn_hyp(u64 pfn)
{
struct rb_node **node, *parent;
struct hyp_shared_pfn *this;
int ret = 0;
mutex_lock(&hyp_shared_pfns_lock);
this = find_shared_pfn(pfn, &node, &parent);
if (this) {
this->count++;
goto unlock;
}
this = kzalloc(sizeof(*this), GFP_KERNEL);
if (!this) {
ret = -ENOMEM;
goto unlock;
}
this->pfn = pfn;
this->count = 1;
rb_link_node(&this->node, parent, node);
rb_insert_color(&this->node, &hyp_shared_pfns);
ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, pfn, 1);
unlock:
mutex_unlock(&hyp_shared_pfns_lock);
return ret;
}
static int unshare_pfn_hyp(u64 pfn)
{
struct rb_node **node, *parent;
struct hyp_shared_pfn *this;
int ret = 0;
mutex_lock(&hyp_shared_pfns_lock);
this = find_shared_pfn(pfn, &node, &parent);
if (WARN_ON(!this)) {
ret = -ENOENT;
goto unlock;
}
this->count--;
if (this->count)
goto unlock;
rb_erase(&this->node, &hyp_shared_pfns);
kfree(this);
ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp, pfn, 1);
unlock:
mutex_unlock(&hyp_shared_pfns_lock);
return ret;
}
int kvm_share_hyp(void *from, void *to)
{
phys_addr_t start, end, cur;
u64 pfn;
int ret;
for (addr = ALIGN_DOWN(start, PAGE_SIZE); addr < end; addr += PAGE_SIZE) {
ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp,
__phys_to_pfn(addr));
if (is_kernel_in_hyp_mode())
return 0;
/*
* The share hcall maps things in the 'fixed-offset' region of the hyp
* VA space, so we can only share physically contiguous data-structures
* for now.
*/
if (is_vmalloc_or_module_addr(from) || is_vmalloc_or_module_addr(to))
return -EINVAL;
if (kvm_host_owns_hyp_mappings())
return create_hyp_mappings(from, to, PAGE_HYP);
start = ALIGN_DOWN(__pa(from), PAGE_SIZE);
end = PAGE_ALIGN(__pa(to));
for (cur = start; cur < end; cur += PAGE_SIZE) {
pfn = __phys_to_pfn(cur);
ret = share_pfn_hyp(pfn);
if (ret)
return ret;
}
@@ -296,6 +402,22 @@ static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end)
return 0;
}
void kvm_unshare_hyp(void *from, void *to)
{
phys_addr_t start, end, cur;
u64 pfn;
if (is_kernel_in_hyp_mode() || kvm_host_owns_hyp_mappings() || !from)
return;
start = ALIGN_DOWN(__pa(from), PAGE_SIZE);
end = PAGE_ALIGN(__pa(to));
for (cur = start; cur < end; cur += PAGE_SIZE) {
pfn = __phys_to_pfn(cur);
WARN_ON(unshare_pfn_hyp(pfn));
}
}
/**
* create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
* @from: The virtual kernel start address of the range
@@ -316,12 +438,8 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
if (is_kernel_in_hyp_mode())
return 0;
if (!kvm_host_owns_hyp_mappings()) {
if (WARN_ON(prot != PAGE_HYP))
return -EPERM;
return pkvm_share_hyp(kvm_kaddr_to_phys(from),
kvm_kaddr_to_phys(to));
}
if (!kvm_host_owns_hyp_mappings())
return -EPERM;
start = start & PAGE_MASK;
end = PAGE_ALIGN(end);
@@ -407,6 +525,9 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
unsigned long addr;
int ret;
if (is_protected_kvm_enabled())
return -EPERM;
*kaddr = ioremap(phys_addr, size);
if (!*kaddr)
return -ENOMEM;
@@ -516,7 +637,8 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
if (!pgt)
return -ENOMEM;
err = kvm_pgtable_stage2_init(pgt, &kvm->arch, &kvm_s2_mm_ops);
mmu->arch = &kvm->arch;
err = kvm_pgtable_stage2_init(pgt, mmu, &kvm_s2_mm_ops);
if (err)
goto out_free_pgtable;
@@ -529,7 +651,6 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
for_each_possible_cpu(cpu)
*per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1;
mmu->arch = &kvm->arch;
mmu->pgt = pgt;
mmu->pgd_phys = __pa(pgt->pgd);
WRITE_ONCE(mmu->vmid.vmid_gen, 0);
@@ -595,14 +716,14 @@ void stage2_unmap_vm(struct kvm *kvm)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
int idx;
int idx, bkt;
idx = srcu_read_lock(&kvm->srcu);
mmap_read_lock(current->mm);
spin_lock(&kvm->mmu_lock);
slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, slots)
kvm_for_each_memslot(memslot, bkt, slots)
stage2_unmap_memslot(kvm, memslot);
spin_unlock(&kvm->mmu_lock);
@@ -650,6 +771,9 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
KVM_PGTABLE_PROT_R |
(writable ? KVM_PGTABLE_PROT_W : 0);
if (is_protected_kvm_enabled())
return -EPERM;
size += offset_in_page(guest_ipa);
guest_ipa &= PAGE_MASK;
@@ -1463,7 +1587,6 @@ out:
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
@@ -1473,25 +1596,24 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
* allocated dirty_bitmap[], dirty pages will be tracked while the
* memory slot is write protected.
*/
if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
if (change != KVM_MR_DELETE && new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
/*
* If we're with initial-all-set, we don't need to write
* protect any pages because they're all reported as dirty.
* Huge pages and normal pages will be write protect gradually.
*/
if (!kvm_dirty_log_manual_protect_and_init_set(kvm)) {
kvm_mmu_wp_memory_region(kvm, mem->slot);
kvm_mmu_wp_memory_region(kvm, new->id);
}
}
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
const struct kvm_userspace_memory_region *mem,
const struct kvm_memory_slot *old,
struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
hva_t hva = mem->userspace_addr;
hva_t reg_end = hva + mem->memory_size;
hva_t hva, reg_end;
int ret = 0;
if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
@@ -1502,9 +1624,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
* Prevent userspace from creating a memory region outside of the IPA
* space addressable by the KVM guest IPA space.
*/
if ((memslot->base_gfn + memslot->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT))
if ((new->base_gfn + new->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT))
return -EFAULT;
hva = new->userspace_addr;
reg_end = hva + (new->npages << PAGE_SHIFT);
mmap_read_lock(current->mm);
/*
* A memory region could potentially cover multiple VMAs, and any holes
@@ -1536,7 +1661,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
if (vma->vm_flags & VM_PFNMAP) {
/* IO region dirty page logging not allowed */
if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
ret = -EINVAL;
break;
}

View File

@@ -8,10 +8,9 @@
#include <linux/memblock.h>
#include <linux/sort.h>
#include <asm/kvm_host.h>
#include <asm/kvm_pkvm.h>
#include <nvhe/memory.h>
#include <nvhe/mm.h>
#include "hyp_constants.h"
static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
@@ -82,7 +81,8 @@ void __init kvm_hyp_reserve(void)
do {
prev = nr_pages;
nr_pages = hyp_mem_pages + prev;
nr_pages = DIV_ROUND_UP(nr_pages * sizeof(struct hyp_page), PAGE_SIZE);
nr_pages = DIV_ROUND_UP(nr_pages * STRUCT_HYP_PAGE_SIZE,
PAGE_SIZE);
nr_pages += __hyp_pgtable_max_pages(nr_pages);
} while (nr_pages != prev);
hyp_mem_pages += nr_pages;

View File

@@ -30,6 +30,7 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm)
case ID_AA64DFR0_PMUVER_8_1:
case ID_AA64DFR0_PMUVER_8_4:
case ID_AA64DFR0_PMUVER_8_5:
case ID_AA64DFR0_PMUVER_8_7:
return GENMASK(15, 0);
default: /* Shouldn't be here, just for sanity */
WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver);
@@ -902,7 +903,7 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
*/
static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
{
int i;
unsigned long i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm) {

View File

@@ -46,7 +46,7 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
* specification (ARM DEN 0022A). This means all suspend states
* for KVM will preserve the register state.
*/
kvm_vcpu_block(vcpu);
kvm_vcpu_halt(vcpu);
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
return PSCI_RET_SUCCESS;
@@ -109,7 +109,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
/*
* Make sure the reset request is observed if the change to
* power_state is observed.
* power_off is observed.
*/
smp_wmb();
@@ -121,8 +121,8 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
{
int i, matching_cpus = 0;
unsigned long mpidr;
int matching_cpus = 0;
unsigned long i, mpidr;
unsigned long target_affinity;
unsigned long target_affinity_mask;
unsigned long lowest_affinity_level;
@@ -164,7 +164,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
{
int i;
unsigned long i;
struct kvm_vcpu *tmp;
/*

View File

@@ -94,22 +94,31 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu)
{
void *buf;
unsigned int vl;
size_t reg_sz;
int ret;
vl = vcpu->arch.sve_max_vl;
/*
* Responsibility for these properties is shared between
* kvm_arm_init_arch_resources(), kvm_vcpu_enable_sve() and
* kvm_arm_init_sve(), kvm_vcpu_enable_sve() and
* set_sve_vls(). Double-check here just to be sure:
*/
if (WARN_ON(!sve_vl_valid(vl) || vl > sve_max_virtualisable_vl() ||
vl > VL_ARCH_MAX))
return -EIO;
buf = kzalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)), GFP_KERNEL_ACCOUNT);
reg_sz = vcpu_sve_state_size(vcpu);
buf = kzalloc(reg_sz, GFP_KERNEL_ACCOUNT);
if (!buf)
return -ENOMEM;
ret = kvm_share_hyp(buf, buf + reg_sz);
if (ret) {
kfree(buf);
return ret;
}
vcpu->arch.sve_state = buf;
vcpu->arch.flags |= KVM_ARM64_VCPU_SVE_FINALIZED;
return 0;
@@ -141,7 +150,13 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu)
void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu)
{
kfree(vcpu->arch.sve_state);
void *sve_state = vcpu->arch.sve_state;
kvm_vcpu_unshare_task_fp(vcpu);
kvm_unshare_hyp(vcpu, vcpu + 1);
if (sve_state)
kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu));
kfree(sve_state);
}
static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu)
@@ -170,7 +185,7 @@ static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu *tmp;
bool is32bit;
int i;
unsigned long i;
is32bit = vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT);
if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit)
@@ -193,10 +208,9 @@ static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu)
* kvm_reset_vcpu - sets core registers and sys_regs to reset value
* @vcpu: The VCPU pointer
*
* This function finds the right table above and sets the registers on
* the virtual CPU struct to their architecturally defined reset
* values, except for registers whose reset is deferred until
* kvm_arm_vcpu_finalize().
* This function sets the registers on the virtual CPU struct to their
* architecturally defined reset values, except for registers whose reset is
* deferred until kvm_arm_vcpu_finalize().
*
* Note: This function can be called from two paths: The KVM_ARM_VCPU_INIT
* ioctl or as part of handling a request issued by another VCPU in the PSCI

View File

@@ -70,8 +70,9 @@ void kvm_vgic_early_init(struct kvm *kvm)
*/
int kvm_vgic_create(struct kvm *kvm, u32 type)
{
int i, ret;
struct kvm_vcpu *vcpu;
unsigned long i;
int ret;
if (irqchip_in_kernel(kvm))
return -EEXIST;
@@ -91,7 +92,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
return ret;
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu->arch.has_run_once)
if (vcpu_has_run_once(vcpu))
goto out_unlock;
}
ret = 0;
@@ -255,7 +256,8 @@ int vgic_init(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
int ret = 0, i, idx;
int ret = 0, i;
unsigned long idx;
if (vgic_initialized(kvm))
return 0;
@@ -308,7 +310,7 @@ int vgic_init(struct kvm *kvm)
goto out;
}
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_for_each_vcpu(idx, vcpu, kvm)
kvm_vgic_vcpu_enable(vcpu);
ret = kvm_vgic_setup_default_irq_routing(kvm);
@@ -370,7 +372,7 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
static void __kvm_vgic_destroy(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
int i;
unsigned long i;
vgic_debug_destroy(kvm);

View File

@@ -325,7 +325,7 @@ void unlock_all_vcpus(struct kvm *kvm)
bool lock_all_vcpus(struct kvm *kvm)
{
struct kvm_vcpu *tmp_vcpu;
int c;
unsigned long c;
/*
* Any time a vcpu is run, vcpu_load is called which tries to grab the

View File

@@ -113,9 +113,8 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu,
int intid = val & 0xf;
int targets = (val >> 16) & 0xff;
int mode = (val >> 24) & 0x03;
int c;
struct kvm_vcpu *vcpu;
unsigned long flags;
unsigned long flags, c;
switch (mode) {
case 0x0: /* as specified by targets */

View File

@@ -754,7 +754,8 @@ static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu)
static int vgic_register_all_redist_iodevs(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
int c, ret = 0;
unsigned long c;
int ret = 0;
kvm_for_each_vcpu(c, vcpu, kvm) {
ret = vgic_register_redist_iodev(vcpu);
@@ -763,10 +764,12 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm)
}
if (ret) {
/* The current c failed, so we start with the previous one. */
/* The current c failed, so iterate over the previous ones. */
int i;
mutex_lock(&kvm->slots_lock);
for (c--; c >= 0; c--) {
vcpu = kvm_get_vcpu(kvm, c);
for (i = 0; i < c; i++) {
vcpu = kvm_get_vcpu(kvm, i);
vgic_unregister_redist_iodev(vcpu);
}
mutex_unlock(&kvm->slots_lock);
@@ -995,10 +998,10 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1)
struct kvm_vcpu *c_vcpu;
u16 target_cpus;
u64 mpidr;
int sgi, c;
int sgi;
int vcpu_id = vcpu->vcpu_id;
bool broadcast;
unsigned long flags;
unsigned long c, flags;
sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT;
broadcast = reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT);

View File

@@ -1050,7 +1050,7 @@ static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
return 0;
}
struct kvm_io_device_ops kvm_io_gic_ops = {
const struct kvm_io_device_ops kvm_io_gic_ops = {
.read = dispatch_mmio_read,
.write = dispatch_mmio_write,
};

View File

@@ -34,7 +34,7 @@ struct vgic_register_region {
};
};
extern struct kvm_io_device_ops kvm_io_gic_ops;
extern const struct kvm_io_device_ops kvm_io_gic_ops;
#define VGIC_ACCESS_8bit 1
#define VGIC_ACCESS_32bit 2

View File

@@ -293,12 +293,12 @@ int vgic_v2_map_resources(struct kvm *kvm)
if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) {
kvm_err("Need to set vgic cpu and dist addresses first\n");
kvm_debug("Need to set vgic cpu and dist addresses first\n");
return -ENXIO;
}
if (!vgic_v2_check_base(dist->vgic_dist_base, dist->vgic_cpu_base)) {
kvm_err("VGIC CPU and dist frames overlap\n");
kvm_debug("VGIC CPU and dist frames overlap\n");
return -EINVAL;
}
@@ -345,6 +345,11 @@ int vgic_v2_probe(const struct gic_kvm_info *info)
int ret;
u32 vtr;
if (is_protected_kvm_enabled()) {
kvm_err("GICv2 not supported in protected mode\n");
return -ENXIO;
}
if (!info->vctrl.start) {
kvm_err("GICH not present in the firmware table\n");
return -ENXIO;

View File

@@ -542,24 +542,24 @@ int vgic_v3_map_resources(struct kvm *kvm)
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
int ret = 0;
int c;
unsigned long c;
kvm_for_each_vcpu(c, vcpu, kvm) {
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
if (IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) {
kvm_debug("vcpu %d redistributor base not set\n", c);
kvm_debug("vcpu %ld redistributor base not set\n", c);
return -ENXIO;
}
}
if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base)) {
kvm_err("Need to set vgic distributor addresses first\n");
kvm_debug("Need to set vgic distributor addresses first\n");
return -ENXIO;
}
if (!vgic_v3_check_base(kvm)) {
kvm_err("VGIC redist and dist frames overlap\n");
kvm_debug("VGIC redist and dist frames overlap\n");
return -EINVAL;
}
@@ -651,7 +651,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
} else if (!PAGE_ALIGNED(info->vcpu.start)) {
pr_warn("GICV physical address 0x%llx not page aligned\n",
(unsigned long long)info->vcpu.start);
} else {
} else if (kvm_get_mode() != KVM_MODE_PROTECTED) {
kvm_vgic_global_state.vcpu_base = info->vcpu.start;
kvm_vgic_global_state.can_emulate_gicv2 = true;
ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2);

View File

@@ -189,7 +189,7 @@ void vgic_v4_configure_vsgis(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
int i;
unsigned long i;
kvm_arm_halt_guest(kvm);
@@ -235,7 +235,8 @@ int vgic_v4_init(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
int i, nr_vcpus, ret;
int nr_vcpus, ret;
unsigned long i;
if (!kvm_vgic_global_state.has_gicv4)
return 0; /* Nothing to see here... move along. */

View File

@@ -990,7 +990,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
void vgic_kick_vcpus(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
int c;
unsigned long c;
/*
* We've injected an interrupt, time to find out who deserves

View File

@@ -898,7 +898,6 @@ static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
int kvm_arch_flush_remote_tlb(struct kvm *kvm);

View File

@@ -27,6 +27,7 @@ config KVM
select KVM_MMIO
select MMU_NOTIFIER
select SRCU
select INTERVAL_TREE
help
Support for hosting Guest kernels.

View File

@@ -2,9 +2,10 @@
# Makefile for KVM support for MIPS
#
include $(srctree)/virt/kvm/Makefile.kvm
ccflags-y += -Ivirt/kvm -Iarch/mips/kvm
kvm-y := $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o eventfd.o binary_stats.o)
kvm-$(CONFIG_CPU_HAS_MSA) += msa.o
kvm-y += mips.o emulate.o entry.o \

View File

@@ -952,7 +952,7 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu)
if (!vcpu->arch.pending_exceptions) {
kvm_vz_lose_htimer(vcpu);
vcpu->arch.wait = 1;
kvm_vcpu_block(vcpu);
kvm_vcpu_halt(vcpu);
/*
* We we are runnable, then definitely go off to user space to

View File

@@ -120,7 +120,7 @@ static int loongson_vipi_write(struct loongson_kvm_ipi *ipi,
s->status |= data;
irq.cpu = id;
irq.irq = 6;
kvm_vcpu_ioctl_interrupt(kvm->vcpus[id], &irq);
kvm_vcpu_ioctl_interrupt(kvm_get_vcpu(kvm, id), &irq);
break;
case CORE0_CLEAR_OFF:
@@ -128,7 +128,7 @@ static int loongson_vipi_write(struct loongson_kvm_ipi *ipi,
if (!s->status) {
irq.cpu = id;
irq.irq = -6;
kvm_vcpu_ioctl_interrupt(kvm->vcpus[id], &irq);
kvm_vcpu_ioctl_interrupt(kvm_get_vcpu(kvm, id), &irq);
}
break;

View File

@@ -171,25 +171,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
return 0;
}
void kvm_mips_free_vcpus(struct kvm *kvm)
{
unsigned int i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm) {
kvm_vcpu_destroy(vcpu);
}
mutex_lock(&kvm->lock);
for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
kvm->vcpus[i] = NULL;
atomic_set(&kvm->online_vcpus, 0);
mutex_unlock(&kvm->lock);
}
static void kvm_mips_free_gpa_pt(struct kvm *kvm)
{
/* It should always be safe to remove after flushing the whole range */
@@ -199,7 +180,7 @@ static void kvm_mips_free_gpa_pt(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_mips_free_vcpus(kvm);
kvm_destroy_vcpus(kvm);
kvm_mips_free_gpa_pt(kvm);
}
@@ -233,25 +214,20 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
const struct kvm_userspace_memory_region *mem,
const struct kvm_memory_slot *old,
struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
return 0;
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
int needs_flush;
kvm_debug("%s: kvm: %p slot: %d, GPA: %llx, size: %llx, QVA: %llx\n",
__func__, kvm, mem->slot, mem->guest_phys_addr,
mem->memory_size, mem->userspace_addr);
/*
* If dirty page logging is enabled, write protect all pages in the slot
* ready for dirty logging.
@@ -498,7 +474,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
if (irq->cpu == -1)
dvcpu = vcpu;
else
dvcpu = vcpu->kvm->vcpus[irq->cpu];
dvcpu = kvm_get_vcpu(vcpu->kvm, irq->cpu);
if (intr == 2 || intr == 3 || intr == 4 || intr == 6) {
kvm_mips_callbacks->queue_io_int(dvcpu, irq);

View File

@@ -10,6 +10,8 @@
#include <linux/slab.h>
#include <linux/sys_soc.h>
#include <linux/memblock.h>
#include <linux/pci.h>
#include <linux/bug.h>
#include <asm/bootinfo.h>
#include <asm/mipsregs.h>
@@ -22,6 +24,35 @@
static void *detect_magic __initdata = detect_memory_region;
int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
{
struct resource_entry *entry;
resource_size_t mask;
entry = resource_list_first_type(&bridge->windows, IORESOURCE_MEM);
if (!entry) {
pr_err("Cannot get memory resource\n");
return -EINVAL;
}
if (mips_cps_numiocu(0)) {
/*
* Hardware doesn't accept mask values with 1s after
* 0s (e.g. 0xffef), so warn if that's happen
*/
mask = ~(entry->res->end - entry->res->start) & CM_GCR_REGn_MASK_ADDRMASK;
WARN_ON(mask && BIT(ffz(~mask)) - 1 != ~mask);
write_gcr_reg1_base(entry->res->start);
write_gcr_reg1_mask(mask | CM_GCR_REGn_MASK_CMTGT_IOCU0);
pr_info("PCI coherence region base: 0x%08llx, mask/settings: 0x%08llx\n",
(unsigned long long)read_gcr_reg1_base(),
(unsigned long long)read_gcr_reg1_mask());
}
return 0;
}
phys_addr_t mips_cpc_default_phys_base(void)
{
panic("Cannot detect cpc address");

View File

@@ -752,6 +752,7 @@ struct kvm_vcpu_arch {
u8 irq_pending; /* Used by XIVE to signal pending guest irqs */
u32 last_inst;
struct rcuwait wait;
struct rcuwait *waitp;
struct kvmppc_vcore *vcore;
int ret;
@@ -867,6 +868,5 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_exit(void) {}
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
#endif /* __POWERPC_KVM_HOST_H__ */

View File

@@ -200,12 +200,11 @@ extern void kvmppc_core_destroy_vm(struct kvm *kvm);
extern void kvmppc_core_free_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot);
extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
const struct kvm_userspace_memory_region *mem,
const struct kvm_memory_slot *old,
struct kvm_memory_slot *new,
enum kvm_mr_change change);
extern void kvmppc_core_commit_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
const struct kvm_memory_slot *old,
struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change);
extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm,
@@ -274,12 +273,11 @@ struct kvmppc_ops {
int (*get_dirty_log)(struct kvm *kvm, struct kvm_dirty_log *log);
void (*flush_memslot)(struct kvm *kvm, struct kvm_memory_slot *memslot);
int (*prepare_memory_region)(struct kvm *kvm,
struct kvm_memory_slot *memslot,
const struct kvm_userspace_memory_region *mem,
const struct kvm_memory_slot *old,
struct kvm_memory_slot *new,
enum kvm_mr_change change);
void (*commit_memory_region)(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
const struct kvm_memory_slot *old,
struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change);
bool (*unmap_gfn_range)(struct kvm *kvm, struct kvm_gfn_range *range);

View File

@@ -26,6 +26,7 @@ config KVM
select KVM_VFIO
select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
select INTERVAL_TREE
config KVM_BOOK3S_HANDLER
bool

View File

@@ -4,11 +4,8 @@
#
ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
KVM := ../../../virt/kvm
common-objs-y = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/binary_stats.o
common-objs-$(CONFIG_KVM_VFIO) += $(KVM)/vfio.o
common-objs-$(CONFIG_KVM_MMIO) += $(KVM)/coalesced_mmio.o
include $(srctree)/virt/kvm/Makefile.kvm
common-objs-y += powerpc.o emulate_loadstore.o
obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
@@ -125,9 +122,8 @@ kvm-book3s_32-objs := \
kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o
kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o
kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
kvm-y += $(kvm-objs-m) $(kvm-objs-y)
obj-$(CONFIG_KVM_E500V2) += kvm.o
obj-$(CONFIG_KVM_E500MC) += kvm.o

View File

@@ -847,21 +847,19 @@ void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
}
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
const struct kvm_userspace_memory_region *mem,
enum kvm_mr_change change)
const struct kvm_memory_slot *old,
struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem,
change);
return kvm->arch.kvm_ops->prepare_memory_region(kvm, old, new, change);
}
void kvmppc_core_commit_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
const struct kvm_memory_slot *old,
struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new, change);
kvm->arch.kvm_ops->commit_memory_region(kvm, old, new, change);
}
bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)

View File

@@ -337,7 +337,7 @@ static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum,
static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large)
{
int i;
unsigned long i;
struct kvm_vcpu *v;
/* flush this VA on all cpus */

View File

@@ -530,7 +530,7 @@ static void kvmppc_mmu_book3s_64_tlbie(struct kvm_vcpu *vcpu, ulong va,
bool large)
{
u64 mask = 0xFFFFFFFFFULL;
long i;
unsigned long i;
struct kvm_vcpu *v;
dprintk("KVM MMU: tlbie(0x%lx)\n", va);

Some files were not shown because too many files have changed in this diff Show More