mirror of
https://github.com/hardkernel/linux.git
synced 2026-03-24 19:40:21 +09:00
Merge tag 'asoc-fix-v5.18-rc3' of https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
ASoC: Fixes for v5.18 A collection of fixes that came in since the merge window, plus one new device ID for an x86 laptop. Nothing that really stands out with particularly big impact outside of the affected device.
This commit is contained in:
12
.mailmap
12
.mailmap
@@ -70,6 +70,8 @@ Boris Brezillon <bbrezillon@kernel.org> <boris.brezillon@bootlin.com>
|
||||
Boris Brezillon <bbrezillon@kernel.org> <boris.brezillon@free-electrons.com>
|
||||
Brian Avery <b.avery@hp.com>
|
||||
Brian King <brking@us.ibm.com>
|
||||
Brian Silverman <bsilver16384@gmail.com> <brian.silverman@bluerivertech.com>
|
||||
Cai Huoqing <cai.huoqing@linux.dev> <caihuoqing@baidu.com>
|
||||
Changbin Du <changbin.du@intel.com> <changbin.du@gmail.com>
|
||||
Changbin Du <changbin.du@intel.com> <changbin.du@intel.com>
|
||||
Chao Yu <chao@kernel.org> <chao2.yu@samsung.com>
|
||||
@@ -79,6 +81,9 @@ Chris Chiu <chris.chiu@canonical.com> <chiu@endlessos.org>
|
||||
Christian Borntraeger <borntraeger@linux.ibm.com> <borntraeger@de.ibm.com>
|
||||
Christian Borntraeger <borntraeger@linux.ibm.com> <cborntra@de.ibm.com>
|
||||
Christian Borntraeger <borntraeger@linux.ibm.com> <borntrae@de.ibm.com>
|
||||
Christian Brauner <brauner@kernel.org> <christian@brauner.io>
|
||||
Christian Brauner <brauner@kernel.org> <christian.brauner@canonical.com>
|
||||
Christian Brauner <brauner@kernel.org> <christian.brauner@ubuntu.com>
|
||||
Christophe Ricard <christophe.ricard@gmail.com>
|
||||
Christoph Hellwig <hch@lst.de>
|
||||
Colin Ian King <colin.king@intel.com> <colin.king@canonical.com>
|
||||
@@ -183,6 +188,8 @@ Jiri Slaby <jirislaby@kernel.org> <jslaby@novell.com>
|
||||
Jiri Slaby <jirislaby@kernel.org> <jslaby@suse.com>
|
||||
Jiri Slaby <jirislaby@kernel.org> <jslaby@suse.cz>
|
||||
Jiri Slaby <jirislaby@kernel.org> <xslaby@fi.muni.cz>
|
||||
Jisheng Zhang <jszhang@kernel.org> <jszhang@marvell.com>
|
||||
Jisheng Zhang <jszhang@kernel.org> <Jisheng.Zhang@synaptics.com>
|
||||
Johan Hovold <johan@kernel.org> <jhovold@gmail.com>
|
||||
Johan Hovold <johan@kernel.org> <johan@hovoldconsulting.com>
|
||||
John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
|
||||
@@ -206,12 +213,14 @@ Kees Cook <keescook@chromium.org> <kees@ubuntu.com>
|
||||
Keith Busch <kbusch@kernel.org> <keith.busch@intel.com>
|
||||
Keith Busch <kbusch@kernel.org> <keith.busch@linux.intel.com>
|
||||
Kenneth W Chen <kenneth.w.chen@intel.com>
|
||||
Kirill Tkhai <kirill.tkhai@openvz.org> <ktkhai@virtuozzo.com>
|
||||
Konstantin Khlebnikov <koct9i@gmail.com> <khlebnikov@yandex-team.ru>
|
||||
Konstantin Khlebnikov <koct9i@gmail.com> <k.khlebnikov@samsung.com>
|
||||
Koushik <raghavendra.koushik@neterion.com>
|
||||
Krishna Manikandan <quic_mkrishn@quicinc.com> <mkrishn@codeaurora.org>
|
||||
Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski.k@gmail.com>
|
||||
Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski@samsung.com>
|
||||
Krzysztof Kozlowski <krzk@kernel.org> <krzysztof.kozlowski@canonical.com>
|
||||
Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
|
||||
Kuogee Hsieh <quic_khsieh@quicinc.com> <khsieh@codeaurora.org>
|
||||
Leonardo Bras <leobras.c@gmail.com> <leonardo@linux.ibm.com>
|
||||
@@ -329,6 +338,9 @@ Rémi Denis-Courmont <rdenis@simphalempin.com>
|
||||
Ricardo Ribalda <ribalda@kernel.org> <ricardo@ribalda.com>
|
||||
Ricardo Ribalda <ribalda@kernel.org> Ricardo Ribalda Delgado <ribalda@kernel.org>
|
||||
Ricardo Ribalda <ribalda@kernel.org> <ricardo.ribalda@gmail.com>
|
||||
Roman Gushchin <roman.gushchin@linux.dev> <guro@fb.com>
|
||||
Roman Gushchin <roman.gushchin@linux.dev> <guroan@gmail.com>
|
||||
Roman Gushchin <roman.gushchin@linux.dev> <klamm@yandex-team.ru>
|
||||
Ross Zwisler <zwisler@kernel.org> <ross.zwisler@linux.intel.com>
|
||||
Rudolf Marek <R.Marek@sh.cvut.cz>
|
||||
Rui Saraiva <rmps@joel.ist.utl.pt>
|
||||
|
||||
6
CREDITS
6
CREDITS
@@ -895,6 +895,12 @@ S: 3000 FORE Drive
|
||||
S: Warrendale, Pennsylvania 15086
|
||||
S: USA
|
||||
|
||||
N: Ludovic Desroches
|
||||
E: ludovic.desroches@microchip.com
|
||||
D: Maintainer for ARM/Microchip (AT91) SoC support
|
||||
D: Author of ADC, pinctrl, XDMA and SDHCI drivers for this platform
|
||||
S: France
|
||||
|
||||
N: Martin Devera
|
||||
E: devik@cdi.cz
|
||||
W: http://luxik.cdi.cz/~devik/qos/
|
||||
|
||||
10
Documentation/ABI/obsolete/procfs-i8k
Normal file
10
Documentation/ABI/obsolete/procfs-i8k
Normal file
@@ -0,0 +1,10 @@
|
||||
What: /proc/i8k
|
||||
Date: November 2001
|
||||
KernelVersion: 2.4.14
|
||||
Contact: Pali Rohár <pali@kernel.org>
|
||||
Description: Legacy interface for getting/setting sensor information like
|
||||
fan speed, temperature, serial number, hotkey status etc
|
||||
on Dell Laptops.
|
||||
Since the driver is now using the standard hwmon sysfs interface,
|
||||
the procfs interface is deprecated.
|
||||
Users: https://github.com/vitorafsr/i8kutils
|
||||
37
Documentation/ABI/removed/sysfs-mce
Normal file
37
Documentation/ABI/removed/sysfs-mce
Normal file
@@ -0,0 +1,37 @@
|
||||
What: /sys/devices/system/machinecheck/machinecheckX/tolerant
|
||||
Contact: Borislav Petkov <bp@suse.de>
|
||||
Date: Dec, 2021
|
||||
Description:
|
||||
Unused and obsolete after the advent of recoverable machine
|
||||
checks (see last sentence below) and those are present since
|
||||
2010 (Nehalem).
|
||||
|
||||
Original description:
|
||||
|
||||
The entries appear for each CPU, but they are truly shared
|
||||
between all CPUs.
|
||||
|
||||
Tolerance level. When a machine check exception occurs for a
|
||||
non corrected machine check the kernel can take different
|
||||
actions.
|
||||
|
||||
Since machine check exceptions can happen any time it is
|
||||
sometimes risky for the kernel to kill a process because it
|
||||
defies normal kernel locking rules. The tolerance level
|
||||
configures how hard the kernel tries to recover even at some
|
||||
risk of deadlock. Higher tolerant values trade potentially
|
||||
better uptime with the risk of a crash or even corruption
|
||||
(for tolerant >= 3).
|
||||
|
||||
== ===========================================================
|
||||
0 always panic on uncorrected errors, log corrected errors
|
||||
1 panic or SIGBUS on uncorrected errors, log corrected errors
|
||||
2 SIGBUS or log uncorrected errors, log corrected errors
|
||||
3 never panic or SIGBUS, log all errors (for testing only)
|
||||
== ===========================================================
|
||||
|
||||
Default: 1
|
||||
|
||||
Note this only makes a difference if the CPU allows recovery
|
||||
from a machine check exception. Current x86 CPUs generally
|
||||
do not.
|
||||
@@ -155,6 +155,55 @@ Description:
|
||||
last zone of the device which may be smaller.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/crypto/
|
||||
Date: February 2022
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
The presence of this subdirectory of /sys/block/<disk>/queue/
|
||||
indicates that the device supports inline encryption. This
|
||||
subdirectory contains files which describe the inline encryption
|
||||
capabilities of the device. For more information about inline
|
||||
encryption, refer to Documentation/block/inline-encryption.rst.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/crypto/max_dun_bits
|
||||
Date: February 2022
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RO] This file shows the maximum length, in bits, of data unit
|
||||
numbers accepted by the device in inline encryption requests.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/crypto/modes/<mode>
|
||||
Date: February 2022
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RO] For each crypto mode (i.e., encryption/decryption
|
||||
algorithm) the device supports with inline encryption, a file
|
||||
will exist at this location. It will contain a hexadecimal
|
||||
number that is a bitmask of the supported data unit sizes, in
|
||||
bytes, for that crypto mode.
|
||||
|
||||
Currently, the crypto modes that may be supported are:
|
||||
|
||||
* AES-256-XTS
|
||||
* AES-128-CBC-ESSIV
|
||||
* Adiantum
|
||||
|
||||
For example, if a device supports AES-256-XTS inline encryption
|
||||
with data unit sizes of 512 and 4096 bytes, the file
|
||||
/sys/block/<disk>/queue/crypto/modes/AES-256-XTS will exist and
|
||||
will contain "0x1200".
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/crypto/num_keyslots
|
||||
Date: February 2022
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RO] This file shows the number of keyslots the device has for
|
||||
use with inline encryption.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/dax
|
||||
Date: June 2016
|
||||
Contact: linux-block@vger.kernel.org
|
||||
|
||||
@@ -86,6 +86,10 @@ What: /sys/devices/system/cpu/cpuX/topology/die_cpus
|
||||
Description: internal kernel map of CPUs within the same die.
|
||||
Values: hexadecimal bitmask.
|
||||
|
||||
What: /sys/devices/system/cpu/cpuX/topology/ppin
|
||||
Description: per-socket protected processor inventory number
|
||||
Values: hexadecimal.
|
||||
|
||||
What: /sys/devices/system/cpu/cpuX/topology/die_cpus_list
|
||||
Description: human-readable list of CPUs within the same die.
|
||||
The format is like 0-3, 8-11, 14,17.
|
||||
|
||||
@@ -113,3 +113,144 @@ Description:
|
||||
# echo 0 > /sys/devices/platform/firmware\:zynqmp-firmware/health_status
|
||||
|
||||
Users: Xilinx
|
||||
|
||||
What: /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
Date: Feb 2022
|
||||
KernelVersion: 5.18
|
||||
Contact: "Ronak Jain" <ronak.jain@xilinx.com>
|
||||
Description:
|
||||
This sysfs interface allows user to configure features at
|
||||
runtime. The user can enable or disable features running at
|
||||
firmware as well as the user can configure the parameters of
|
||||
the features at runtime. The supported features are over
|
||||
temperature and external watchdog. Here, the external watchdog
|
||||
is completely different than the /dev/watchdog as the external
|
||||
watchdog is running on the firmware and it is used to monitor
|
||||
the health of firmware not APU(Linux). Also, the external
|
||||
watchdog is interfaced outside of the zynqmp soc.
|
||||
|
||||
The supported config ids are for the feature configuration is,
|
||||
1. PM_FEATURE_OVERTEMP_STATUS = 1, the user can enable or
|
||||
disable the over temperature feature.
|
||||
2. PM_FEATURE_OVERTEMP_VALUE = 2, the user can configure the
|
||||
over temperature limit in Degree Celsius.
|
||||
3. PM_FEATURE_EXTWDT_STATUS = 3, the user can enable or disable
|
||||
the external watchdog feature.
|
||||
4. PM_FEATURE_EXTWDT_VALUE = 4, the user can configure the
|
||||
external watchdog feature.
|
||||
|
||||
Usage:
|
||||
|
||||
Select over temperature config ID to enable/disable feature
|
||||
# echo 1 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
|
||||
Check over temperature config ID is selected or not
|
||||
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
The expected result is 1.
|
||||
|
||||
Select over temperature config ID to configure OT limit
|
||||
# echo 2 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
|
||||
Check over temperature config ID is selected or not
|
||||
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
The expected result is 2.
|
||||
|
||||
Select external watchdog config ID to enable/disable feature
|
||||
# echo 3 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
|
||||
Check external watchdog config ID is selected or not
|
||||
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
The expected result is 3.
|
||||
|
||||
Select external watchdog config ID to configure time interval
|
||||
# echo 4 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
|
||||
Check external watchdog config ID is selected or not
|
||||
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
The expected result is 4.
|
||||
|
||||
Users: Xilinx
|
||||
|
||||
What: /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||
Date: Feb 2022
|
||||
KernelVersion: 5.18
|
||||
Contact: "Ronak Jain" <ronak.jain@xilinx.com>
|
||||
Description:
|
||||
This sysfs interface allows to configure features at runtime.
|
||||
The user can enable or disable features running at firmware.
|
||||
Also, the user can configure the parameters of the features
|
||||
at runtime. The supported features are over temperature and
|
||||
external watchdog. Here, the external watchdog is completely
|
||||
different than the /dev/watchdog as the external watchdog is
|
||||
running on the firmware and it is used to monitor the health
|
||||
of firmware not APU(Linux). Also, the external watchdog is
|
||||
interfaced outside of the zynqmp soc.
|
||||
|
||||
By default the features are disabled in the firmware. The user
|
||||
can enable features by querying appropriate config id of the
|
||||
features.
|
||||
|
||||
The default limit for the over temperature is 90 Degree Celsius.
|
||||
The default timer interval for the external watchdog is 570ms.
|
||||
|
||||
The supported config ids are for the feature configuration is,
|
||||
1. PM_FEATURE_OVERTEMP_STATUS = 1, the user can enable or
|
||||
disable the over temperature feature.
|
||||
2. PM_FEATURE_OVERTEMP_VALUE = 2, the user can configure the
|
||||
over temperature limit in Degree Celsius.
|
||||
3. PM_FEATURE_EXTWDT_STATUS = 3, the user can enable or disable
|
||||
the external watchdog feature.
|
||||
4. PM_FEATURE_EXTWDT_VALUE = 4, the user can configure the
|
||||
external watchdog feature.
|
||||
|
||||
Usage:
|
||||
|
||||
Enable over temperature feature
|
||||
# echo 1 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
# echo 1 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||
|
||||
Check whether the over temperature feature is enabled or not
|
||||
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||
The expected result is 1.
|
||||
|
||||
Disable over temperature feature
|
||||
# echo 1 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
# echo 0 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||
|
||||
Check whether the over temperature feature is disabled or not
|
||||
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||
The expected result is 0.
|
||||
|
||||
Configure over temperature limit to 50 Degree Celsius
|
||||
# echo 2 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
# echo 50 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||
|
||||
Check whether the over temperature limit is configured or not
|
||||
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||
The expected result is 50.
|
||||
|
||||
Enable external watchdog feature
|
||||
# echo 3 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
# echo 1 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||
|
||||
Check whether the external watchdog feature is enabled or not
|
||||
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||
The expected result is 1.
|
||||
|
||||
Disable external watchdog feature
|
||||
# echo 3 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
# echo 0 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||
|
||||
Check whether the external watchdog feature is disabled or not
|
||||
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||
The expected result is 0.
|
||||
|
||||
Configure external watchdog timer interval to 500ms
|
||||
# echo 4 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
# echo 500 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||
|
||||
Check whether the external watchdog timer interval is configured or not
|
||||
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||
The expected result is 500.
|
||||
|
||||
Users: Xilinx
|
||||
|
||||
@@ -6,7 +6,7 @@ Description:
|
||||
|
||||
===================== =======================================
|
||||
c_chmask capture channel mask
|
||||
c_srate capture sampling rate
|
||||
c_srate list of capture sampling rates (comma-separated)
|
||||
c_ssize capture sample size (bytes)
|
||||
c_mute_present capture mute control enable
|
||||
c_volume_present capture volume control enable
|
||||
@@ -17,7 +17,7 @@ Description:
|
||||
c_volume_res capture volume control resolution
|
||||
(in 1/256 dB)
|
||||
p_chmask playback channel mask
|
||||
p_srate playback sampling rate
|
||||
p_srate list of playback sampling rates (comma-separated)
|
||||
p_ssize playback sample size (bytes)
|
||||
p_mute_present playback mute control enable
|
||||
p_volume_present playback volume control enable
|
||||
@@ -29,4 +29,5 @@ Description:
|
||||
(in 1/256 dB)
|
||||
req_number the number of pre-allocated requests
|
||||
for both capture and playback
|
||||
function_name name of the interface
|
||||
===================== =======================================
|
||||
|
||||
@@ -6,8 +6,9 @@ Description:
|
||||
|
||||
===================== =======================================
|
||||
c_chmask capture channel mask
|
||||
c_srate capture sampling rate
|
||||
c_srate list of capture sampling rates (comma-separated)
|
||||
c_ssize capture sample size (bytes)
|
||||
c_hs_bint capture bInterval for HS/SS (1-4: fixed, 0: auto)
|
||||
c_sync capture synchronization type
|
||||
(async/adaptive)
|
||||
c_mute_present capture mute control enable
|
||||
@@ -20,8 +21,9 @@ Description:
|
||||
(in 1/256 dB)
|
||||
fb_max maximum extra bandwidth in async mode
|
||||
p_chmask playback channel mask
|
||||
p_srate playback sampling rate
|
||||
p_srate list of playback sampling rates (comma-separated)
|
||||
p_ssize playback sample size (bytes)
|
||||
p_hs_bint playback bInterval for HS/SS (1-4: fixed, 0: auto)
|
||||
p_mute_present playback mute control enable
|
||||
p_volume_present playback volume control enable
|
||||
p_volume_min playback volume control min value
|
||||
@@ -32,4 +34,5 @@ Description:
|
||||
(in 1/256 dB)
|
||||
req_number the number of pre-allocated requests
|
||||
for both capture and playback
|
||||
function_name name of the interface
|
||||
===================== =======================================
|
||||
|
||||
@@ -12,24 +12,7 @@ What: /sys/kernel/debug/habanalabs/hl<n>/clk_gate
|
||||
Date: May 2020
|
||||
KernelVersion: 5.8
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Allow the root user to disable/enable in runtime the clock
|
||||
gating mechanism in Gaudi. Due to how Gaudi is built, the
|
||||
clock gating needs to be disabled in order to access the
|
||||
registers of the TPC and MME engines. This is sometimes needed
|
||||
during debug by the user and hence the user needs this option.
|
||||
The user can supply a bitmask value, each bit represents
|
||||
a different engine to disable/enable its clock gating feature.
|
||||
The bitmask is composed of 20 bits:
|
||||
|
||||
======= ============
|
||||
0 - 7 DMA channels
|
||||
8 - 11 MME engines
|
||||
12 - 19 TPC engines
|
||||
======= ============
|
||||
|
||||
The bit's location of a specific engine can be determined
|
||||
using (1 << GAUDI_ENGINE_ID_*). GAUDI_ENGINE_ID_* values
|
||||
are defined in uapi habanalabs.h file in enum gaudi_engine_id
|
||||
Description: This setting is now deprecated as clock gating is handled solely by the f/w
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/command_buffers
|
||||
Date: Jan 2019
|
||||
@@ -239,6 +222,7 @@ KernelVersion: 5.6
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Sets the stop-on_error option for the device engines. Value of
|
||||
"0" is for disable, otherwise enable.
|
||||
Relevant only for GOYA and GAUDI.
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/timeout_locked
|
||||
Date: Sep 2021
|
||||
|
||||
@@ -1,140 +1,150 @@
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/cluster[0-3]/regs
|
||||
Date: Sep 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump debug registers from the HPRE cluster.
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/cluster[0-3]/regs
|
||||
Date: Sep 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump debug registers from the HPRE cluster.
|
||||
Only available for PF.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/cluster[0-3]/cluster_ctrl
|
||||
Date: Sep 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Write the HPRE core selection in the cluster into this file,
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/cluster[0-3]/cluster_ctrl
|
||||
Date: Sep 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Write the HPRE core selection in the cluster into this file,
|
||||
and then we can read the debug information of the core.
|
||||
Only available for PF.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/rdclr_en
|
||||
Date: Sep 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: HPRE cores debug registers read clear control. 1 means enable
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/rdclr_en
|
||||
Date: Sep 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: HPRE cores debug registers read clear control. 1 means enable
|
||||
register read clear, otherwise 0. Writing to this file has no
|
||||
functional effect, only enable or disable counters clear after
|
||||
reading of these registers.
|
||||
Only available for PF.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/current_qm
|
||||
Date: Sep 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: One HPRE controller has one PF and multiple VFs, each function
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/current_qm
|
||||
Date: Sep 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: One HPRE controller has one PF and multiple VFs, each function
|
||||
has a QM. Select the QM which below qm refers to.
|
||||
Only available for PF.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/regs
|
||||
Date: Sep 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump debug registers from the HPRE.
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/alg_qos
|
||||
Date: Jun 2021
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: The <bdf> is related the function for PF and VF.
|
||||
HPRE driver supports to configure each function's QoS, the driver
|
||||
supports to write <bdf> value to alg_qos in the host. Such as
|
||||
"echo <bdf> value > alg_qos". The qos value is 1~1000, means
|
||||
1/1000~1000/1000 of total QoS. The driver reading alg_qos to
|
||||
get related QoS in the host and VM, Such as "cat alg_qos".
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/regs
|
||||
Date: Sep 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump debug registers from the HPRE.
|
||||
Only available for PF.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/regs
|
||||
Date: Sep 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump debug registers from the QM.
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/regs
|
||||
Date: Sep 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump debug registers from the QM.
|
||||
Available for PF and VF in host. VF in guest currently only
|
||||
has one debug register.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/current_q
|
||||
Date: Sep 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: One QM may contain multiple queues. Select specific queue to
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/current_q
|
||||
Date: Sep 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: One QM may contain multiple queues. Select specific queue to
|
||||
show its debug registers in above regs.
|
||||
Only available for PF.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/clear_enable
|
||||
Date: Sep 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: QM debug registers(regs) read clear control. 1 means enable
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/clear_enable
|
||||
Date: Sep 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: QM debug registers(regs) read clear control. 1 means enable
|
||||
register read clear, otherwise 0.
|
||||
Writing to this file has no functional effect, only enable or
|
||||
disable counters clear after reading of these registers.
|
||||
Only available for PF.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/err_irq
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of invalid interrupts for
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/err_irq
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of invalid interrupts for
|
||||
QM task completion.
|
||||
Available for both PF and VF, and take no other effect on HPRE.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/aeq_irq
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of QM async event queue interrupts.
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/aeq_irq
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of QM async event queue interrupts.
|
||||
Available for both PF and VF, and take no other effect on HPRE.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/abnormal_irq
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of interrupts for QM abnormal event.
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/abnormal_irq
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of interrupts for QM abnormal event.
|
||||
Available for both PF and VF, and take no other effect on HPRE.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/create_qp_err
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of queue allocation errors.
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/create_qp_err
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of queue allocation errors.
|
||||
Available for both PF and VF, and take no other effect on HPRE.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/mb_err
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of failed QM mailbox commands.
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/mb_err
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of failed QM mailbox commands.
|
||||
Available for both PF and VF, and take no other effect on HPRE.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/status
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the status of the QM.
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/status
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the status of the QM.
|
||||
Four states: initiated, started, stopped and closed.
|
||||
Available for both PF and VF, and take no other effect on HPRE.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of sent requests.
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of sent requests.
|
||||
Available for both PF and VF, and take no other effect on HPRE.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/recv_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of received requests.
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/recv_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of received requests.
|
||||
Available for both PF and VF, and take no other effect on HPRE.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_busy_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of requests sent
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_busy_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of requests sent
|
||||
with returning busy.
|
||||
Available for both PF and VF, and take no other effect on HPRE.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_fail_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of completed but error requests.
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_fail_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of completed but error requests.
|
||||
Available for both PF and VF, and take no other effect on HPRE.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/invalid_req_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of invalid requests being received.
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/invalid_req_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of invalid requests being received.
|
||||
Available for both PF and VF, and take no other effect on HPRE.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/overtime_thrhld
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Set the threshold time for counting the request which is
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/overtime_thrhld
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Set the threshold time for counting the request which is
|
||||
processed longer than the threshold.
|
||||
0: disable(default), 1: 1 microsecond.
|
||||
Available for both PF and VF, and take no other effect on HPRE.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/over_thrhld_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of time out requests.
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/over_thrhld_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of time out requests.
|
||||
Available for both PF and VF, and take no other effect on HPRE.
|
||||
|
||||
@@ -1,113 +1,123 @@
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/clear_enable
|
||||
Date: Oct 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Enabling/disabling of clear action after reading
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/clear_enable
|
||||
Date: Oct 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Enabling/disabling of clear action after reading
|
||||
the SEC debug registers.
|
||||
0: disable, 1: enable.
|
||||
Only available for PF, and take no other effect on SEC.
|
||||
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/current_qm
|
||||
Date: Oct 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: One SEC controller has one PF and multiple VFs, each function
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/current_qm
|
||||
Date: Oct 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: One SEC controller has one PF and multiple VFs, each function
|
||||
has a QM. This file can be used to select the QM which below
|
||||
qm refers to.
|
||||
Only available for PF.
|
||||
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/qm_regs
|
||||
Date: Oct 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump of QM related debug registers.
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/alg_qos
|
||||
Date: Jun 2021
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: The <bdf> is related the function for PF and VF.
|
||||
SEC driver supports to configure each function's QoS, the driver
|
||||
supports to write <bdf> value to alg_qos in the host. Such as
|
||||
"echo <bdf> value > alg_qos". The qos value is 1~1000, means
|
||||
1/1000~1000/1000 of total QoS. The driver reading alg_qos to
|
||||
get related QoS in the host and VM, Such as "cat alg_qos".
|
||||
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/qm_regs
|
||||
Date: Oct 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump of QM related debug registers.
|
||||
Available for PF and VF in host. VF in guest currently only
|
||||
has one debug register.
|
||||
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/current_q
|
||||
Date: Oct 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: One QM of SEC may contain multiple queues. Select specific
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/current_q
|
||||
Date: Oct 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: One QM of SEC may contain multiple queues. Select specific
|
||||
queue to show its debug registers in above 'regs'.
|
||||
Only available for PF.
|
||||
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/clear_enable
|
||||
Date: Oct 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Enabling/disabling of clear action after reading
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/clear_enable
|
||||
Date: Oct 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Enabling/disabling of clear action after reading
|
||||
the SEC's QM debug registers.
|
||||
0: disable, 1: enable.
|
||||
Only available for PF, and take no other effect on SEC.
|
||||
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/err_irq
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of invalid interrupts for
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/err_irq
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of invalid interrupts for
|
||||
QM task completion.
|
||||
Available for both PF and VF, and take no other effect on SEC.
|
||||
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/aeq_irq
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of QM async event queue interrupts.
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/aeq_irq
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of QM async event queue interrupts.
|
||||
Available for both PF and VF, and take no other effect on SEC.
|
||||
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/abnormal_irq
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of interrupts for QM abnormal event.
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/abnormal_irq
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of interrupts for QM abnormal event.
|
||||
Available for both PF and VF, and take no other effect on SEC.
|
||||
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/create_qp_err
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of queue allocation errors.
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/create_qp_err
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of queue allocation errors.
|
||||
Available for both PF and VF, and take no other effect on SEC.
|
||||
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/mb_err
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of failed QM mailbox commands.
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/mb_err
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of failed QM mailbox commands.
|
||||
Available for both PF and VF, and take no other effect on SEC.
|
||||
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/status
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the status of the QM.
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/status
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the status of the QM.
|
||||
Four states: initiated, started, stopped and closed.
|
||||
Available for both PF and VF, and take no other effect on SEC.
|
||||
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/send_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of sent requests.
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/send_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of sent requests.
|
||||
Available for both PF and VF, and take no other effect on SEC.
|
||||
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/recv_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of received requests.
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/recv_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of received requests.
|
||||
Available for both PF and VF, and take no other effect on SEC.
|
||||
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/send_busy_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of requests sent with returning busy.
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/send_busy_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of requests sent with returning busy.
|
||||
Available for both PF and VF, and take no other effect on SEC.
|
||||
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/err_bd_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of BD type error requests
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/err_bd_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of BD type error requests
|
||||
to be received.
|
||||
Available for both PF and VF, and take no other effect on SEC.
|
||||
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/invalid_req_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of invalid requests being received.
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/invalid_req_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of invalid requests being received.
|
||||
Available for both PF and VF, and take no other effect on SEC.
|
||||
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/done_flag_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of completed but marked error requests
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/done_flag_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of completed but marked error requests
|
||||
to be received.
|
||||
Available for both PF and VF, and take no other effect on SEC.
|
||||
|
||||
@@ -1,114 +1,124 @@
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/comp_core[01]/regs
|
||||
Date: Nov 2018
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump of compression cores related debug registers.
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/comp_core[01]/regs
|
||||
Date: Nov 2018
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump of compression cores related debug registers.
|
||||
Only available for PF.
|
||||
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/decomp_core[0-5]/regs
|
||||
Date: Nov 2018
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump of decompression cores related debug registers.
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/decomp_core[0-5]/regs
|
||||
Date: Nov 2018
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump of decompression cores related debug registers.
|
||||
Only available for PF.
|
||||
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/clear_enable
|
||||
Date: Nov 2018
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Compression/decompression core debug registers read clear
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/clear_enable
|
||||
Date: Nov 2018
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Compression/decompression core debug registers read clear
|
||||
control. 1 means enable register read clear, otherwise 0.
|
||||
Writing to this file has no functional effect, only enable or
|
||||
disable counters clear after reading of these registers.
|
||||
Only available for PF.
|
||||
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/current_qm
|
||||
Date: Nov 2018
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: One ZIP controller has one PF and multiple VFs, each function
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/current_qm
|
||||
Date: Nov 2018
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: One ZIP controller has one PF and multiple VFs, each function
|
||||
has a QM. Select the QM which below qm refers to.
|
||||
Only available for PF.
|
||||
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/qm/regs
|
||||
Date: Nov 2018
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump of QM related debug registers.
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/alg_qos
|
||||
Date: Jun 2021
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: The <bdf> is related the function for PF and VF.
|
||||
ZIP driver supports to configure each function's QoS, the driver
|
||||
supports to write <bdf> value to alg_qos in the host. Such as
|
||||
"echo <bdf> value > alg_qos". The qos value is 1~1000, means
|
||||
1/1000~1000/1000 of total QoS. The driver reading alg_qos to
|
||||
get related QoS in the host and VM, Such as "cat alg_qos".
|
||||
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/qm/regs
|
||||
Date: Nov 2018
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump of QM related debug registers.
|
||||
Available for PF and VF in host. VF in guest currently only
|
||||
has one debug register.
|
||||
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/qm/current_q
|
||||
Date: Nov 2018
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: One QM may contain multiple queues. Select specific queue to
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/qm/current_q
|
||||
Date: Nov 2018
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: One QM may contain multiple queues. Select specific queue to
|
||||
show its debug registers in above regs.
|
||||
Only available for PF.
|
||||
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/qm/clear_enable
|
||||
Date: Nov 2018
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: QM debug registers(regs) read clear control. 1 means enable
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/qm/clear_enable
|
||||
Date: Nov 2018
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: QM debug registers(regs) read clear control. 1 means enable
|
||||
register read clear, otherwise 0.
|
||||
Writing to this file has no functional effect, only enable or
|
||||
disable counters clear after reading of these registers.
|
||||
Only available for PF.
|
||||
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/qm/err_irq
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of invalid interrupts for
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/qm/err_irq
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of invalid interrupts for
|
||||
QM task completion.
|
||||
Available for both PF and VF, and take no other effect on ZIP.
|
||||
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/qm/aeq_irq
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of QM async event queue interrupts.
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/qm/aeq_irq
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of QM async event queue interrupts.
|
||||
Available for both PF and VF, and take no other effect on ZIP.
|
||||
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/qm/abnormal_irq
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of interrupts for QM abnormal event.
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/qm/abnormal_irq
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of interrupts for QM abnormal event.
|
||||
Available for both PF and VF, and take no other effect on ZIP.
|
||||
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/qm/create_qp_err
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of queue allocation errors.
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/qm/create_qp_err
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of queue allocation errors.
|
||||
Available for both PF and VF, and take no other effect on ZIP.
|
||||
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/qm/mb_err
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of failed QM mailbox commands.
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/qm/mb_err
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the number of failed QM mailbox commands.
|
||||
Available for both PF and VF, and take no other effect on ZIP.
|
||||
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/qm/status
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the status of the QM.
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/qm/status
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the status of the QM.
|
||||
Four states: initiated, started, stopped and closed.
|
||||
Available for both PF and VF, and take no other effect on ZIP.
|
||||
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/send_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of sent requests.
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/send_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of sent requests.
|
||||
Available for both PF and VF, and take no other effect on ZIP.
|
||||
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/recv_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of received requests.
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/recv_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of received requests.
|
||||
Available for both PF and VF, and take no other effect on ZIP.
|
||||
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/send_busy_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of requests received
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/send_busy_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of requests received
|
||||
with returning busy.
|
||||
Available for both PF and VF, and take no other effect on ZIP.
|
||||
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/err_bd_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of BD type error requests
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/err_bd_cnt
|
||||
Date: Apr 2020
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: Dump the total number of BD type error requests
|
||||
to be received.
|
||||
Available for both PF and VF, and take no other effect on ZIP.
|
||||
|
||||
@@ -1,3 +1,12 @@
|
||||
What: /sys/bus/cxl/flush
|
||||
Date: Januarry, 2022
|
||||
KernelVersion: v5.18
|
||||
Contact: linux-cxl@vger.kernel.org
|
||||
Description:
|
||||
(WO) If userspace manually unbinds a port the kernel schedules
|
||||
all descendant memdevs for unbind. Writing '1' to this attribute
|
||||
flushes that work.
|
||||
|
||||
What: /sys/bus/cxl/devices/memX/firmware_version
|
||||
Date: December, 2020
|
||||
KernelVersion: v5.12
|
||||
@@ -25,6 +34,24 @@ Description:
|
||||
identically named field in the Identify Memory Device Output
|
||||
Payload in the CXL-2.0 specification.
|
||||
|
||||
What: /sys/bus/cxl/devices/memX/serial
|
||||
Date: January, 2022
|
||||
KernelVersion: v5.18
|
||||
Contact: linux-cxl@vger.kernel.org
|
||||
Description:
|
||||
(RO) 64-bit serial number per the PCIe Device Serial Number
|
||||
capability. Mandatory for CXL devices, see CXL 2.0 8.1.12.2
|
||||
Memory Device PCIe Capabilities and Extended Capabilities.
|
||||
|
||||
What: /sys/bus/cxl/devices/memX/numa_node
|
||||
Date: January, 2022
|
||||
KernelVersion: v5.18
|
||||
Contact: linux-cxl@vger.kernel.org
|
||||
Description:
|
||||
(RO) If NUMA is enabled and the platform has affinitized the
|
||||
host PCI device for this memory device, emit the CPU node
|
||||
affinity for this device.
|
||||
|
||||
What: /sys/bus/cxl/devices/*/devtype
|
||||
Date: June, 2021
|
||||
KernelVersion: v5.14
|
||||
@@ -34,6 +61,15 @@ Description:
|
||||
the same value communicated in the DEVTYPE environment variable
|
||||
for uevents for devices on the "cxl" bus.
|
||||
|
||||
What: /sys/bus/cxl/devices/*/modalias
|
||||
Date: December, 2021
|
||||
KernelVersion: v5.18
|
||||
Contact: linux-cxl@vger.kernel.org
|
||||
Description:
|
||||
CXL device objects export the modalias attribute which mirrors
|
||||
the same value communicated in the MODALIAS environment variable
|
||||
for uevents for devices on the "cxl" bus.
|
||||
|
||||
What: /sys/bus/cxl/devices/portX/uport
|
||||
Date: June, 2021
|
||||
KernelVersion: v5.14
|
||||
|
||||
@@ -476,6 +476,7 @@ What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_i_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_q_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_i_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_q_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_x_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_y_calibscale
|
||||
@@ -1213,6 +1214,32 @@ Description:
|
||||
number or direction is not specified, applies to all channels of
|
||||
this type.
|
||||
|
||||
What: /sys/.../iio:deviceX/events/in_accel_mag_referenced_en
|
||||
What: /sys/.../iio:deviceX/events/in_accel_mag_referenced_rising_en
|
||||
What: /sys/.../iio:deviceX/events/in_accel_mag_referenced_falling_en
|
||||
What: /sys/.../iio:deviceX/events/in_accel_y_mag_referenced_en
|
||||
What: /sys/.../iio:deviceX/events/in_accel_y_mag_referenced_rising_en
|
||||
What: /sys/.../iio:deviceX/events/in_accel_y_mag_referenced_falling_en
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Similar to in_accel_mag[_y][_rising|_falling]_en, but the event
|
||||
value is relative to a reference magnitude. The reference magnitude
|
||||
includes the graviational acceleration.
|
||||
|
||||
What: /sys/.../iio:deviceX/events/in_accel_mag_referenced_value
|
||||
What: /sys/.../iio:deviceX/events/in_accel_mag_referenced_rising_value
|
||||
What: /sys/.../iio:deviceX/events/in_accel_mag_referenced_falling_value
|
||||
What: /sys/.../iio:deviceX/events/in_accel_y_mag_referenced_value
|
||||
What: /sys/.../iio:deviceX/events/in_accel_y_mag_referenced_rising_value
|
||||
What: /sys/.../iio:deviceX/events/in_accel_y_mag_referenced_falling_value
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
The value to which the reference magnitude of the channel is
|
||||
compared. If the axis is not specified, it applies to all channels
|
||||
of this type.
|
||||
|
||||
What: /sys/.../events/in_steps_change_en
|
||||
KernelVersion: 4.0
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
@@ -1252,6 +1279,10 @@ Description:
|
||||
Actually start the buffer capture up. Will start trigger
|
||||
if first device and appropriate.
|
||||
|
||||
Note that it might be impossible to configure other attributes,
|
||||
(e.g.: events, scale, sampling rate) if they impact the currently
|
||||
active buffer capture session.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/bufferY
|
||||
KernelVersion: 5.11
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
|
||||
13
Documentation/ABI/testing/sysfs-bus-iio-adc-ad7280a
Normal file
13
Documentation/ABI/testing/sysfs-bus-iio-adc-ad7280a
Normal file
@@ -0,0 +1,13 @@
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY-voltageZ_balance_switch_en
|
||||
KernelVersion: 5.14
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Used to enable an output for balancing cells for time
|
||||
controlled via in_voltage_Y-voltageZ_balance_switch_timer.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY-voltageZ_balance_switch_timer
|
||||
KernelVersion: 5.14
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Time in seconds for which balance switch will be turned on.
|
||||
Multiple of 71.5 seconds.
|
||||
86
Documentation/ABI/testing/sysfs-bus-iio-dac-ltc2688
Normal file
86
Documentation/ABI/testing/sysfs-bus-iio-dac-ltc2688
Normal file
@@ -0,0 +1,86 @@
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_en
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Dither enable. Write 1 to enable dither or 0 to disable it. This is useful
|
||||
for changing the dither parameters. They way it should be done is:
|
||||
|
||||
- disable dither operation;
|
||||
- change dither parameters (eg: frequency, phase...);
|
||||
- enabled dither operation
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_raw
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
This raw, unscaled value refers to the dither signal amplitude.
|
||||
The same scale as in out_voltageY_raw applies. However, the
|
||||
offset might be different as it's always 0 for this attribute.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_raw_available
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Available range for dither raw amplitude values.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_offset
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Offset applied to out_voltageY_dither_raw. Read only attribute
|
||||
always set to 0.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_frequency
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Sets the dither signal frequency. Units are in Hz.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_frequency_available
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Returns the available values for the dither frequency.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_phase
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Sets the dither signal phase. Units are in Radians.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_phase_available
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Returns the available values for the dither phase.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_toggle_en
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Toggle enable. Write 1 to enable toggle or 0 to disable it. This is
|
||||
useful when one wants to change the DAC output codes. The way it should
|
||||
be done is:
|
||||
|
||||
- disable toggle operation;
|
||||
- change out_voltageY_raw0 and out_voltageY_raw1;
|
||||
- enable toggle operation.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_raw0
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_raw1
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
It has the same meaning as out_voltageY_raw. This attribute is
|
||||
specific to toggle enabled channels and refers to the DAC output
|
||||
code in INPUT_A (_raw0) and INPUT_B (_raw1). The same scale and offset
|
||||
as in out_voltageY_raw applies.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_symbol
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Performs a SW toggle. This attribute is specific to toggle
|
||||
enabled channels and allows to toggle between out_voltageY_raw0
|
||||
and out_voltageY_raw1 through software. Writing 0 will select
|
||||
out_voltageY_raw0 while 1 selects out_voltageY_raw1.
|
||||
23
Documentation/ABI/testing/sysfs-bus-iio-frequency-admv1014
Normal file
23
Documentation/ABI/testing/sysfs-bus-iio-frequency-admv1014
Normal file
@@ -0,0 +1,23 @@
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0_i_calibscale_coarse
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Read/write value for the digital attenuator gain (IF_I) with coarse steps.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0_q_calibscale_coarse
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Read/write value for the digital attenuator gain (IF_Q) with coarse steps.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0_i_calibscale_fine
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Read/write value for the digital attenuator gain (IF_I) with fine steps.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0_q_calibscale_fine
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Read/write value for the digital attenuator gain (IF_Q) with fine steps.
|
||||
28
Documentation/ABI/testing/sysfs-bus-iio-sx9324
Normal file
28
Documentation/ABI/testing/sysfs-bus-iio-sx9324
Normal file
@@ -0,0 +1,28 @@
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_proximity<id>_setup
|
||||
Date: November 2021
|
||||
KernelVersion: 5.17
|
||||
Contact: Gwendal Grignou <gwendal@chromium.org>
|
||||
Description:
|
||||
SX9324 has 3 inputs, CS0, CS1 and CS2. Hardware layout
|
||||
defines if the input is
|
||||
+ not connected (HZ),
|
||||
+ grounded (GD),
|
||||
+ connected to an antenna where it can act as a base
|
||||
(DS - data shield), or measured input (MI).
|
||||
|
||||
The sensor rotates measurement across 4 phases
|
||||
(PH0, PH1, PH2, PH3), where the inputs are configured
|
||||
and then measured.
|
||||
|
||||
By default, during the first phase, [PH0], CS0 is measured,
|
||||
while CS1 and CS2 are used as shields.
|
||||
`cat in_proximity0_setup` returns "MI,DS,DS".
|
||||
[PH1], CS1 is measured, CS0 and CS2 are shield:
|
||||
`cat in_proximity1_setup` returns "DS,MI,DS".
|
||||
[PH2], CS2 is measured, CS0 and CS1 are shield:
|
||||
`cat in_proximity1_setup` returns "DS,DS,MI".
|
||||
[PH3], CS1 and CS2 are measured (combo mode):
|
||||
`cat in_proximity1_setup` returns "DS,MI,MI".
|
||||
|
||||
Note, these are the chip default. Hardware layout will most
|
||||
likely dictate different output. The entry is read-only.
|
||||
@@ -6,3 +6,38 @@ Description:
|
||||
|
||||
The libnvdimm sub-system implements a common sysfs interface for
|
||||
platform nvdimm resources. See Documentation/driver-api/nvdimm/.
|
||||
|
||||
What: /sys/bus/event_source/devices/nmemX/format
|
||||
Date: February 2022
|
||||
KernelVersion: 5.18
|
||||
Contact: Kajol Jain <kjain@linux.ibm.com>
|
||||
Description: (RO) Attribute group to describe the magic bits
|
||||
that go into perf_event_attr.config for a particular pmu.
|
||||
(See ABI/testing/sysfs-bus-event_source-devices-format).
|
||||
|
||||
Each attribute under this group defines a bit range of the
|
||||
perf_event_attr.config. Supported attribute is listed
|
||||
below::
|
||||
event = "config:0-4" - event ID
|
||||
|
||||
For example::
|
||||
ctl_res_cnt = "event=0x1"
|
||||
|
||||
What: /sys/bus/event_source/devices/nmemX/events
|
||||
Date: February 2022
|
||||
KernelVersion: 5.18
|
||||
Contact: Kajol Jain <kjain@linux.ibm.com>
|
||||
Description: (RO) Attribute group to describe performance monitoring events
|
||||
for the nvdimm memory device. Each attribute in this group
|
||||
describes a single performance monitoring event supported by
|
||||
this nvdimm pmu. The name of the file is the name of the event.
|
||||
(See ABI/testing/sysfs-bus-event_source-devices-events). A
|
||||
listing of the events supported by a given nvdimm provider type
|
||||
can be found in Documentation/driver-api/nvdimm/$provider.
|
||||
|
||||
What: /sys/bus/event_source/devices/nmemX/cpumask
|
||||
Date: February 2022
|
||||
KernelVersion: 5.18
|
||||
Contact: Kajol Jain <kjain@linux.ibm.com>
|
||||
Description: (RO) This sysfs file exposes the cpumask which is designated to
|
||||
to retrieve nvdimm pmu event counter data.
|
||||
|
||||
@@ -61,3 +61,15 @@ Description:
|
||||
* "CchRHCnt" : Cache Read Hit Count
|
||||
* "CchWHCnt" : Cache Write Hit Count
|
||||
* "FastWCnt" : Fast Write Count
|
||||
|
||||
What: /sys/bus/nd/devices/nmemX/papr/health_bitmap_inject
|
||||
Date: Jan, 2022
|
||||
KernelVersion: v5.17
|
||||
Contact: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, nvdimm@lists.linux.dev,
|
||||
Description:
|
||||
(RO) Reports the health bitmap inject bitmap that is applied to
|
||||
bitmap received from PowerVM via the H_SCM_HEALTH. This is used
|
||||
to forcibly set specific bits returned from Hcall. These is then
|
||||
used to simulate various health or shutdown states for an nvdimm
|
||||
and are set by user-space tools like ndctl by issuing a PAPR DSM.
|
||||
|
||||
|
||||
16
Documentation/ABI/testing/sysfs-bus-peci
Normal file
16
Documentation/ABI/testing/sysfs-bus-peci
Normal file
@@ -0,0 +1,16 @@
|
||||
What: /sys/bus/peci/rescan
|
||||
Date: July 2021
|
||||
KernelVersion: 5.18
|
||||
Contact: Iwona Winiarska <iwona.winiarska@intel.com>
|
||||
Description:
|
||||
Writing a non-zero value to this attribute will
|
||||
initiate scan for PECI devices on all PECI controllers
|
||||
in the system.
|
||||
|
||||
What: /sys/bus/peci/devices/<controller_id>-<device_addr>/remove
|
||||
Date: July 2021
|
||||
KernelVersion: 5.18
|
||||
Contact: Iwona Winiarska <iwona.winiarska@intel.com>
|
||||
Description:
|
||||
Writing a non-zero value to this attribute will
|
||||
remove the PECI device and any of its children.
|
||||
@@ -246,6 +246,51 @@ Description:
|
||||
that is being referenced (e.g hdd0, hdd1 etc)
|
||||
This attribute defaults to device 0.
|
||||
|
||||
certificate:
|
||||
signature:
|
||||
save_signature:
|
||||
These attributes are used for certificate based authentication. This is
|
||||
used in conjunction with a signing server as an alternative to password
|
||||
based authentication.
|
||||
The user writes to the attribute(s) with a BASE64 encoded string obtained
|
||||
from the signing server.
|
||||
The attributes can be displayed to check the stored value.
|
||||
|
||||
Some usage examples:
|
||||
Installing a certificate to enable feature:
|
||||
echo <supervisor password > authentication/Admin/current_password
|
||||
echo <signed certificate> > authentication/Admin/certificate
|
||||
|
||||
Updating the installed certificate:
|
||||
echo <signature> > authentication/Admin/signature
|
||||
echo <signed certificate> > authentication/Admin/certificate
|
||||
|
||||
Removing the installed certificate:
|
||||
echo <signature> > authentication/Admin/signature
|
||||
echo '' > authentication/Admin/certificate
|
||||
|
||||
Changing a BIOS setting:
|
||||
echo <signature> > authentication/Admin/signature
|
||||
echo <save signature> > authentication/Admin/save_signature
|
||||
echo Enable > attribute/PasswordBeep/current_value
|
||||
|
||||
You cannot enable certificate authentication if a supervisor password
|
||||
has not been set.
|
||||
Clearing the certificate results in no bios-admin authentication method
|
||||
being configured allowing anyone to make changes.
|
||||
After any of these operations the system must reboot for the changes to
|
||||
take effect.
|
||||
|
||||
certificate_thumbprint:
|
||||
Read only attribute used to display the MD5, SHA1 and SHA256 thumbprints
|
||||
for the certificate installed in the BIOS.
|
||||
|
||||
certificate_to_password:
|
||||
Write only attribute used to switch from certificate based authentication
|
||||
back to password based.
|
||||
Usage:
|
||||
echo <signature> > authentication/Admin/signature
|
||||
echo <password> > authentication/Admin/certificate_to_password
|
||||
|
||||
|
||||
What: /sys/class/firmware-attributes/*/attributes/pending_reboot
|
||||
|
||||
@@ -9,6 +9,14 @@ Description:
|
||||
|
||||
RO
|
||||
|
||||
What: /sys/class/hwmon/hwmonX/label
|
||||
Description:
|
||||
A descriptive label that allows to uniquely identify a
|
||||
device within the system.
|
||||
The contents of the label are free-form.
|
||||
|
||||
RO
|
||||
|
||||
What: /sys/class/hwmon/hwmonX/update_interval
|
||||
Description:
|
||||
The interval at which the chip will update readings.
|
||||
|
||||
@@ -380,13 +380,17 @@ Description:
|
||||
algorithm to adjust the charge rate dynamically, without
|
||||
any user configuration required. "Custom" means that the charger
|
||||
uses the charge_control_* properties as configuration for some
|
||||
different algorithm.
|
||||
different algorithm. "Long Life" means the charger reduces its
|
||||
charging rate in order to prolong the battery health. "Bypass"
|
||||
means the charger bypasses the charging path around the
|
||||
integrated converter allowing for a "smart" wall adaptor to
|
||||
perform the power conversion externally.
|
||||
|
||||
Access: Read, Write
|
||||
|
||||
Valid values:
|
||||
"Unknown", "N/A", "Trickle", "Fast", "Standard",
|
||||
"Adaptive", "Custom"
|
||||
"Adaptive", "Custom", "Long Life", "Bypass"
|
||||
|
||||
What: /sys/class/power_supply/<supply_name>/charge_term_current
|
||||
Date: July 2014
|
||||
@@ -468,6 +472,7 @@ Description:
|
||||
auto: Charge normally, respect thresholds
|
||||
inhibit-charge: Do not charge while AC is attached
|
||||
force-discharge: Force discharge while AC is attached
|
||||
================ ====================================
|
||||
|
||||
What: /sys/class/power_supply/<supply_name>/technology
|
||||
Date: May 2007
|
||||
|
||||
@@ -203,7 +203,7 @@ Description:
|
||||
|
||||
- for generic ACPI: should be "Fan", "Processor" or "LCD"
|
||||
- for memory controller device on intel_menlow platform:
|
||||
should be "Memory controller".
|
||||
should be "Memory controller".
|
||||
|
||||
RO, Required
|
||||
|
||||
|
||||
@@ -73,6 +73,7 @@ What: /sys/devices/system/cpu/cpuX/topology/core_id
|
||||
/sys/devices/system/cpu/cpuX/topology/physical_package_id
|
||||
/sys/devices/system/cpu/cpuX/topology/thread_siblings
|
||||
/sys/devices/system/cpu/cpuX/topology/thread_siblings_list
|
||||
/sys/devices/system/cpu/cpuX/topology/ppin
|
||||
Date: December 2008
|
||||
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
|
||||
Description: CPU topology files that describe a logical CPU's relationship
|
||||
@@ -103,6 +104,11 @@ Description: CPU topology files that describe a logical CPU's relationship
|
||||
thread_siblings_list: human-readable list of cpuX's hardware
|
||||
threads within the same core as cpuX
|
||||
|
||||
ppin: human-readable Protected Processor Identification
|
||||
Number of the socket the cpu# belongs to. There should be
|
||||
one per physical_package_id. File is readable only to
|
||||
admin.
|
||||
|
||||
See Documentation/admin-guide/cputopology.rst for more information.
|
||||
|
||||
|
||||
@@ -662,6 +668,7 @@ Description: Preferred MTE tag checking mode
|
||||
|
||||
================ ==============================================
|
||||
"sync" Prefer synchronous mode
|
||||
"asymm" Prefer asymmetric mode
|
||||
"async" Prefer asynchronous mode
|
||||
================ ==============================================
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
What: /sys/bus/platform/drivers/aspeed-uart-routing/*/uart*
|
||||
What: /sys/bus/platform/drivers/aspeed-uart-routing/\*/uart\*
|
||||
Date: September 2021
|
||||
Contact: Oskar Senft <osk@google.com>
|
||||
Chia-Wei Wang <chiawei_wang@aspeedtech.com>
|
||||
@@ -9,7 +9,7 @@ Description: Selects the RX source of the UARTx device.
|
||||
depends on the selected file.
|
||||
|
||||
e.g.
|
||||
cat /sys/bus/platform/drivers/aspeed-uart-routing/*.uart_routing/uart1
|
||||
cat /sys/bus/platform/drivers/aspeed-uart-routing/\*.uart_routing/uart1
|
||||
[io1] io2 io3 io4 uart2 uart3 uart4 io6
|
||||
|
||||
In this case, UART1 gets its input from IO1 (physical serial port 1).
|
||||
@@ -17,7 +17,7 @@ Description: Selects the RX source of the UARTx device.
|
||||
Users: OpenBMC. Proposed changes should be mailed to
|
||||
openbmc@lists.ozlabs.org
|
||||
|
||||
What: /sys/bus/platform/drivers/aspeed-uart-routing/*/io*
|
||||
What: /sys/bus/platform/drivers/aspeed-uart-routing/\*/io\*
|
||||
Date: September 2021
|
||||
Contact: Oskar Senft <osk@google.com>
|
||||
Chia-Wei Wang <chiawei_wang@aspeedtech.com>
|
||||
|
||||
9
Documentation/ABI/testing/sysfs-driver-eud
Normal file
9
Documentation/ABI/testing/sysfs-driver-eud
Normal file
@@ -0,0 +1,9 @@
|
||||
What: /sys/bus/platform/drivers/eud/.../enable
|
||||
Date: February 2022
|
||||
Contact: Souradeep Chowdhury <quic_schowdhu@quicinc.com>
|
||||
Description:
|
||||
The Enable/Disable sysfs interface for Embedded
|
||||
USB Debugger(EUD). This enables and disables the
|
||||
EUD based on a 1 or a 0 value. By enabling EUD,
|
||||
the user is able to activate the mini-usb hub of
|
||||
EUD for debug and trace capabilities.
|
||||
@@ -69,6 +69,12 @@ KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Displays the device's version from the eFuse
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/fw_os_ver
|
||||
Date: Dec 2021
|
||||
KernelVersion: 5.18
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Version of the firmware OS running on the device's CPU
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/hard_reset
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
@@ -115,7 +121,7 @@ What: /sys/class/habanalabs/hl<n>/infineon_ver
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Version of the Device's power supply F/W code
|
||||
Description: Version of the Device's power supply F/W code. Relevant only to GOYA and GAUDI
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/max_power
|
||||
Date: Jan 2019
|
||||
@@ -220,4 +226,10 @@ What: /sys/class/habanalabs/hl<n>/uboot_ver
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Version of the u-boot running on the device's CPU
|
||||
Description: Version of the u-boot running on the device's CPU
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/vrm_ver
|
||||
Date: Jan 2022
|
||||
KernelVersion: not yet upstreamed
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Version of the Device's Voltage Regulator Monitor F/W code. N/A to GOYA and GAUDI
|
||||
|
||||
77
Documentation/ABI/testing/sysfs-driver-intel_sdsi
Normal file
77
Documentation/ABI/testing/sysfs-driver-intel_sdsi
Normal file
@@ -0,0 +1,77 @@
|
||||
What: /sys/bus/auxiliary/devices/intel_vsec.sdsi.X
|
||||
Date: Feb 2022
|
||||
KernelVersion: 5.18
|
||||
Contact: "David E. Box" <david.e.box@linux.intel.com>
|
||||
Description:
|
||||
This directory contains interface files for accessing Intel
|
||||
Software Defined Silicon (SDSi) features on a CPU. X
|
||||
represents the socket instance (though not the socket ID).
|
||||
The socket ID is determined by reading the registers file
|
||||
and decoding it per the specification.
|
||||
|
||||
Some files communicate with SDSi hardware through a mailbox.
|
||||
Should the operation fail, one of the following error codes
|
||||
may be returned:
|
||||
|
||||
Error Code Cause
|
||||
---------- -----
|
||||
EIO General mailbox failure. Log may indicate cause.
|
||||
EBUSY Mailbox is owned by another agent.
|
||||
EPERM SDSI capability is not enabled in hardware.
|
||||
EPROTO Failure in mailbox protocol detected by driver.
|
||||
See log for details.
|
||||
EOVERFLOW For provision commands, the size of the data
|
||||
exceeds what may be written.
|
||||
ESPIPE Seeking is not allowed.
|
||||
ETIMEDOUT Failure to complete mailbox transaction in time.
|
||||
|
||||
What: /sys/bus/auxiliary/devices/intel_vsec.sdsi.X/guid
|
||||
Date: Feb 2022
|
||||
KernelVersion: 5.18
|
||||
Contact: "David E. Box" <david.e.box@linux.intel.com>
|
||||
Description:
|
||||
(RO) The GUID for the registers file. The GUID identifies
|
||||
the layout of the registers file in this directory.
|
||||
Information about the register layouts for a particular GUID
|
||||
is available at http://github.com/intel/intel-sdsi
|
||||
|
||||
What: /sys/bus/auxiliary/devices/intel_vsec.sdsi.X/registers
|
||||
Date: Feb 2022
|
||||
KernelVersion: 5.18
|
||||
Contact: "David E. Box" <david.e.box@linux.intel.com>
|
||||
Description:
|
||||
(RO) Contains information needed by applications to provision
|
||||
a CPU and monitor status information. The layout of this file
|
||||
is determined by the GUID in this directory. Information about
|
||||
the layout for a particular GUID is available at
|
||||
http://github.com/intel/intel-sdsi
|
||||
|
||||
What: /sys/bus/auxiliary/devices/intel_vsec.sdsi.X/provision_akc
|
||||
Date: Feb 2022
|
||||
KernelVersion: 5.18
|
||||
Contact: "David E. Box" <david.e.box@linux.intel.com>
|
||||
Description:
|
||||
(WO) Used to write an Authentication Key Certificate (AKC) to
|
||||
the SDSi NVRAM for the CPU. The AKC is used to authenticate a
|
||||
Capability Activation Payload. Mailbox command.
|
||||
|
||||
What: /sys/bus/auxiliary/devices/intel_vsec.sdsi.X/provision_cap
|
||||
Date: Feb 2022
|
||||
KernelVersion: 5.18
|
||||
Contact: "David E. Box" <david.e.box@linux.intel.com>
|
||||
Description:
|
||||
(WO) Used to write a Capability Activation Payload (CAP) to the
|
||||
SDSi NVRAM for the CPU. CAPs are used to activate a given CPU
|
||||
feature. A CAP is validated by SDSi hardware using a previously
|
||||
provisioned AKC file. Upon successful authentication, the CPU
|
||||
configuration is updated. A cold reboot is required to fully
|
||||
activate the feature. Mailbox command.
|
||||
|
||||
What: /sys/bus/auxiliary/devices/intel_vsec.sdsi.X/state_certificate
|
||||
Date: Feb 2022
|
||||
KernelVersion: 5.18
|
||||
Contact: "David E. Box" <david.e.box@linux.intel.com>
|
||||
Description:
|
||||
(RO) Used to read back the current State Certificate for the CPU
|
||||
from SDSi hardware. The State Certificate contains information
|
||||
about the current licenses on the CPU. Mailbox command.
|
||||
@@ -0,0 +1,29 @@
|
||||
What: /sys/firmware/papr/energy_scale_info
|
||||
Date: February 2022
|
||||
Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
|
||||
Description: Directory hosting a set of platform attributes like
|
||||
energy/frequency on Linux running as a PAPR guest.
|
||||
|
||||
Each file in a directory contains a platform
|
||||
attribute hierarchy pertaining to performance/
|
||||
energy-savings mode and processor frequency.
|
||||
|
||||
What: /sys/firmware/papr/energy_scale_info/<id>
|
||||
Date: February 2022
|
||||
Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
|
||||
Description: Energy, frequency attributes directory for POWERVM servers
|
||||
|
||||
What: /sys/firmware/papr/energy_scale_info/<id>/desc
|
||||
Date: February 2022
|
||||
Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
|
||||
Description: String description of the energy attribute of <id>
|
||||
|
||||
What: /sys/firmware/papr/energy_scale_info/<id>/value
|
||||
Date: February 2022
|
||||
Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
|
||||
Description: Numeric value of the energy attribute of <id>
|
||||
|
||||
What: /sys/firmware/papr/energy_scale_info/<id>/value_desc
|
||||
Date: February 2022
|
||||
Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
|
||||
Description: String value of the energy attribute of <id>
|
||||
@@ -55,8 +55,9 @@ Description: Controls the in-place-update policy.
|
||||
0x04 F2FS_IPU_UTIL
|
||||
0x08 F2FS_IPU_SSR_UTIL
|
||||
0x10 F2FS_IPU_FSYNC
|
||||
0x20 F2FS_IPU_ASYNC,
|
||||
0x20 F2FS_IPU_ASYNC
|
||||
0x40 F2FS_IPU_NOCACHE
|
||||
0x80 F2FS_IPU_HONOR_OPU_WRITE
|
||||
==== =================
|
||||
|
||||
Refer segment.h for details.
|
||||
@@ -98,6 +99,33 @@ Description: Controls the issue rate of discard commands that consist of small
|
||||
checkpoint is triggered, and issued during the checkpoint.
|
||||
By default, it is disabled with 0.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/max_discard_request
|
||||
Date: December 2021
|
||||
Contact: "Konstantin Vyshetsky" <vkon@google.com>
|
||||
Description: Controls the number of discards a thread will issue at a time.
|
||||
Higher number will allow the discard thread to finish its work
|
||||
faster, at the cost of higher latency for incomming I/O.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/min_discard_issue_time
|
||||
Date: December 2021
|
||||
Contact: "Konstantin Vyshetsky" <vkon@google.com>
|
||||
Description: Controls the interval the discard thread will wait between
|
||||
issuing discard requests when there are discards to be issued and
|
||||
no I/O aware interruptions occur.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/mid_discard_issue_time
|
||||
Date: December 2021
|
||||
Contact: "Konstantin Vyshetsky" <vkon@google.com>
|
||||
Description: Controls the interval the discard thread will wait between
|
||||
issuing discard requests when there are discards to be issued and
|
||||
an I/O aware interruption occurs.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/max_discard_issue_time
|
||||
Date: December 2021
|
||||
Contact: "Konstantin Vyshetsky" <vkon@google.com>
|
||||
Description: Controls the interval the discard thread will wait when there are
|
||||
no discard operations to be issued.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/discard_granularity
|
||||
Date: July 2017
|
||||
Contact: "Chao Yu" <yuchao0@huawei.com>
|
||||
@@ -269,11 +297,16 @@ Description: Shows current reserved blocks in system, it may be temporarily
|
||||
What: /sys/fs/f2fs/<disk>/gc_urgent
|
||||
Date: August 2017
|
||||
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
|
||||
Description: Do background GC aggressively when set. When gc_urgent = 1,
|
||||
background thread starts to do GC by given gc_urgent_sleep_time
|
||||
interval. When gc_urgent = 2, F2FS will lower the bar of
|
||||
checking idle in order to process outstanding discard commands
|
||||
and GC a little bit aggressively. It is set to 0 by default.
|
||||
Description: Do background GC aggressively when set. Set to 0 by default.
|
||||
gc urgent high(1): does GC forcibly in a period of given
|
||||
gc_urgent_sleep_time and ignores I/O idling check. uses greedy
|
||||
GC approach and turns SSR mode on.
|
||||
gc urgent low(2): lowers the bar of checking I/O idling in
|
||||
order to process outstanding discard commands and GC a
|
||||
little bit aggressively. uses cost benefit GC approach.
|
||||
gc urgent mid(3): does GC forcibly in a period of given
|
||||
gc_urgent_sleep_time and executes a mid level of I/O idling check.
|
||||
uses cost benefit GC approach.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/gc_urgent_sleep_time
|
||||
Date: August 2017
|
||||
@@ -430,6 +463,7 @@ Description: Show status of f2fs superblock in real time.
|
||||
0x800 SBI_QUOTA_SKIP_FLUSH skip flushing quota in current CP
|
||||
0x1000 SBI_QUOTA_NEED_REPAIR quota file may be corrupted
|
||||
0x2000 SBI_IS_RESIZEFS resizefs is in process
|
||||
0x4000 SBI_IS_FREEZING freefs is in process
|
||||
====== ===================== =================================
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/ckpt_thread_ioprio
|
||||
@@ -503,7 +537,7 @@ Date: July 2021
|
||||
Contact: "Daeho Jeong" <daehojeong@google.com>
|
||||
Description: Show how many segments have been reclaimed by GC during a specific
|
||||
GC mode (0: GC normal, 1: GC idle CB, 2: GC idle greedy,
|
||||
3: GC idle AT, 4: GC urgent high, 5: GC urgent low)
|
||||
3: GC idle AT, 4: GC urgent high, 5: GC urgent low 6: GC urgent mid)
|
||||
You can re-initialize this value to "0".
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/gc_segment_mode
|
||||
@@ -540,3 +574,9 @@ Contact: "Daeho Jeong" <daehojeong@google.com>
|
||||
Description: You can set the trial count limit for GC urgent high mode with this value.
|
||||
If GC thread gets to the limit, the mode will turn back to GC normal mode.
|
||||
By default, the value is zero, which means there is no limit like before.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/max_roll_forward_node_blocks
|
||||
Date: January 2022
|
||||
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
|
||||
Description: Controls max # of node block writes to be used for roll forward
|
||||
recovery. This can limit the roll forward recovery time.
|
||||
|
||||
274
Documentation/ABI/testing/sysfs-kernel-mm-damon
Normal file
274
Documentation/ABI/testing/sysfs-kernel-mm-damon
Normal file
@@ -0,0 +1,274 @@
|
||||
what: /sys/kernel/mm/damon/
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Interface for Data Access MONitoring (DAMON). Contains files
|
||||
for controlling DAMON. For more details on DAMON itself,
|
||||
please refer to Documentation/admin-guide/mm/damon/index.rst.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Interface for privileged users of DAMON. Contains files for
|
||||
controlling DAMON that aimed to be used by privileged users.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/nr_kdamonds
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing a number 'N' to this file creates the number of
|
||||
directories for controlling each DAMON worker thread (kdamond)
|
||||
named '0' to 'N-1' under the kdamonds/ directory.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/state
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing 'on' or 'off' to this file makes the kdamond starts or
|
||||
stops, respectively. Reading the file returns the keywords
|
||||
based on the current status. Writing 'update_schemes_stats' to
|
||||
the file updates contents of schemes stats files of the
|
||||
kdamond.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/pid
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Reading this file returns the pid of the kdamond if it is
|
||||
running.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/nr_contexts
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing a number 'N' to this file creates the number of
|
||||
directories for controlling each DAMON context named '0' to
|
||||
'N-1' under the contexts/ directory.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/operations
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing a keyword for a monitoring operations set ('vaddr' for
|
||||
virtual address spaces monitoring, and 'paddr' for the physical
|
||||
address space monitoring) to this file makes the context to use
|
||||
the operations set. Reading the file returns the keyword for
|
||||
the operations set the context is set to use.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/sample_us
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing a value to this file sets the sampling interval of the
|
||||
DAMON context in microseconds as the value. Reading this file
|
||||
returns the value.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/aggr_us
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing a value to this file sets the aggregation interval of
|
||||
the DAMON context in microseconds as the value. Reading this
|
||||
file returns the value.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/update_us
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing a value to this file sets the update interval of the
|
||||
DAMON context in microseconds as the value. Reading this file
|
||||
returns the value.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/nr_regions/min
|
||||
|
||||
WDate: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing a value to this file sets the minimum number of
|
||||
monitoring regions of the DAMON context as the value. Reading
|
||||
this file returns the value.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/nr_regions/max
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing a value to this file sets the maximum number of
|
||||
monitoring regions of the DAMON context as the value. Reading
|
||||
this file returns the value.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/nr_targets
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing a number 'N' to this file creates the number of
|
||||
directories for controlling each DAMON target of the context
|
||||
named '0' to 'N-1' under the contexts/ directory.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/<T>/pid_target
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the pid of
|
||||
the target process if the context is for virtual address spaces
|
||||
monitoring, respectively.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/<T>/regions/nr_regions
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing a number 'N' to this file creates the number of
|
||||
directories for setting each DAMON target memory region of the
|
||||
context named '0' to 'N-1' under the regions/ directory. In
|
||||
case of the virtual address space monitoring, DAMON
|
||||
automatically sets the target memory region based on the target
|
||||
processes' mappings.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/<T>/regions/<R>/start
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the start
|
||||
address of the monitoring region.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/<T>/regions/<R>/end
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the end
|
||||
address of the monitoring region.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/nr_schemes
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing a number 'N' to this file creates the number of
|
||||
directories for controlling each DAMON-based operation scheme
|
||||
of the context named '0' to 'N-1' under the schemes/ directory.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/action
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the action
|
||||
of the scheme.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/sz/min
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the mimimum
|
||||
size of the scheme's target regions in bytes.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/sz/max
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the maximum
|
||||
size of the scheme's target regions in bytes.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/nr_accesses/min
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the manimum
|
||||
'nr_accesses' of the scheme's target regions.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/nr_accesses/max
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the maximum
|
||||
'nr_accesses' of the scheme's target regions.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/age/min
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the minimum
|
||||
'age' of the scheme's target regions.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/age/max
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the maximum
|
||||
'age' of the scheme's target regions.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/ms
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the time
|
||||
quota of the scheme in milliseconds.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/bytes
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the size
|
||||
quota of the scheme in bytes.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/reset_interval_ms
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the quotas
|
||||
charge reset interval of the scheme in milliseconds.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/weights/sz_permil
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the
|
||||
under-quota limit regions prioritization weight for 'size' in
|
||||
permil.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/weights/nr_accesses_permil
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the
|
||||
under-quota limit regions prioritization weight for
|
||||
'nr_accesses' in permil.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/weights/age_permil
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the
|
||||
under-quota limit regions prioritization weight for 'age' in
|
||||
permil.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/metric
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the metric
|
||||
of the watermarks for the scheme. The writable/readable
|
||||
keywords for this file are 'none' for disabling the watermarks
|
||||
feature, or 'free_mem_rate' for the system's global free memory
|
||||
rate in permil.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/interval_us
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the metric
|
||||
check interval of the watermarks for the scheme in
|
||||
microseconds.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/high
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the high
|
||||
watermark of the scheme in permil.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/mid
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the mid
|
||||
watermark of the scheme in permil.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/low
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the low
|
||||
watermark of the scheme in permil.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/nr_tried
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Reading this file returns the number of regions that the action
|
||||
of the scheme has tried to be applied.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/sz_tried
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Reading this file returns the total size of regions that the
|
||||
action of the scheme has tried to be applied in bytes.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/nr_applied
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Reading this file returns the number of regions that the action
|
||||
of the scheme has successfully applied.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/sz_applied
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Reading this file returns the total size of regions that the
|
||||
action of the scheme has successfully applied in bytes.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/qt_exceeds
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Reading this file returns the number of the exceed events of
|
||||
the scheme's quotas.
|
||||
@@ -53,38 +53,6 @@ Description:
|
||||
(but some corrected errors might be still reported
|
||||
in other ways)
|
||||
|
||||
What: /sys/devices/system/machinecheck/machinecheckX/tolerant
|
||||
Contact: Andi Kleen <ak@linux.intel.com>
|
||||
Date: Feb, 2007
|
||||
Description:
|
||||
The entries appear for each CPU, but they are truly shared
|
||||
between all CPUs.
|
||||
|
||||
Tolerance level. When a machine check exception occurs for a
|
||||
non corrected machine check the kernel can take different
|
||||
actions.
|
||||
|
||||
Since machine check exceptions can happen any time it is
|
||||
sometimes risky for the kernel to kill a process because it
|
||||
defies normal kernel locking rules. The tolerance level
|
||||
configures how hard the kernel tries to recover even at some
|
||||
risk of deadlock. Higher tolerant values trade potentially
|
||||
better uptime with the risk of a crash or even corruption
|
||||
(for tolerant >= 3).
|
||||
|
||||
== ===========================================================
|
||||
0 always panic on uncorrected errors, log corrected errors
|
||||
1 panic or SIGBUS on uncorrected errors, log corrected errors
|
||||
2 SIGBUS or log uncorrected errors, log corrected errors
|
||||
3 never panic or SIGBUS, log all errors (for testing only)
|
||||
== ===========================================================
|
||||
|
||||
Default: 1
|
||||
|
||||
Note this only makes a difference if the CPU allows recovery
|
||||
from a machine check exception. Current x86 CPUs generally
|
||||
do not.
|
||||
|
||||
What: /sys/devices/system/machinecheck/machinecheckX/trigger
|
||||
Contact: Andi Kleen <ak@linux.intel.com>
|
||||
Date: Feb, 2007
|
||||
|
||||
@@ -17,6 +17,7 @@ Date: October 2018
|
||||
KernelVersion: 4.20
|
||||
Contact: "Matan Ziv-Av <matan@svgalib.org>
|
||||
Description:
|
||||
Deprecated use /sys/class/power_supply/CMB0/charge_control_end_threshold
|
||||
Maximal battery charge level. Accepted values are 80 or 100.
|
||||
|
||||
What: /sys/devices/platform/lg-laptop/fan_mode
|
||||
|
||||
@@ -37,8 +37,15 @@ Description: (RO) Set of available destinations (sinks) for a SMA
|
||||
PPS2 signal is sent to the PPS2 selector
|
||||
TS1 signal is sent to timestamper 1
|
||||
TS2 signal is sent to timestamper 2
|
||||
TS3 signal is sent to timestamper 3
|
||||
TS4 signal is sent to timestamper 4
|
||||
IRIG signal is sent to the IRIG-B module
|
||||
DCF signal is sent to the DCF module
|
||||
FREQ1 signal is sent to frequency counter 1
|
||||
FREQ2 signal is sent to frequency counter 2
|
||||
FREQ3 signal is sent to frequency counter 3
|
||||
FREQ4 signal is sent to frequency counter 4
|
||||
None signal input is disabled
|
||||
===== ================================================
|
||||
|
||||
What: /sys/class/timecard/ocpN/available_sma_outputs
|
||||
@@ -50,10 +57,16 @@ Description: (RO) Set of available sources for a SMA output signal.
|
||||
10Mhz output is from the 10Mhz reference clock
|
||||
PHC output PPS is from the PHC clock
|
||||
MAC output PPS is from the Miniature Atomic Clock
|
||||
GNSS output PPS is from the GNSS module
|
||||
GNSS1 output PPS is from the first GNSS module
|
||||
GNSS2 output PPS is from the second GNSS module
|
||||
IRIG output is from the PHC, in IRIG-B format
|
||||
DCF output is from the PHC, in DCF format
|
||||
GEN1 output is from frequency generator 1
|
||||
GEN2 output is from frequency generator 2
|
||||
GEN3 output is from frequency generator 3
|
||||
GEN4 output is from frequency generator 4
|
||||
GND output is GND
|
||||
VCC output is VCC
|
||||
===== ================================================
|
||||
|
||||
What: /sys/class/timecard/ocpN/clock_source
|
||||
@@ -63,6 +76,97 @@ Description: (RW) Contains the current synchronization source used by
|
||||
the PHC. May be changed by writing one of the listed
|
||||
values from the available_clock_sources attribute set.
|
||||
|
||||
What: /sys/class/timecard/ocpN/clock_status_drift
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RO) Contains the current drift value used by the firmware
|
||||
for internal disciplining of the atomic clock.
|
||||
|
||||
What: /sys/class/timecard/ocpN/clock_status_offset
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RO) Contains the current offset value used by the firmware
|
||||
for internal disciplining of the atomic clock.
|
||||
|
||||
What: /sys/class/timecard/ocpN/freqX
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RO) Optional directory containing the sysfs nodes for
|
||||
frequency counter <X>.
|
||||
|
||||
What: /sys/class/timecard/ocpN/freqX/frequency
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RO) Contains the measured frequency over the specified
|
||||
measurement period.
|
||||
|
||||
What: /sys/class/timecard/ocpN/freqX/seconds
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RW) Specifies the number of seconds from 0-255 that the
|
||||
frequency should be measured over. Write 0 to disable.
|
||||
|
||||
What: /sys/class/timecard/ocpN/genX
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RO) Optional directory containing the sysfs nodes for
|
||||
frequency generator <X>.
|
||||
|
||||
What: /sys/class/timecard/ocpN/genX/duty
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RO) Specifies the signal duty cycle as a percentage from 1-99.
|
||||
|
||||
What: /sys/class/timecard/ocpN/genX/period
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RO) Specifies the signal period in nanoseconds.
|
||||
|
||||
What: /sys/class/timecard/ocpN/genX/phase
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RO) Specifies the signal phase offset in nanoseconds.
|
||||
|
||||
What: /sys/class/timecard/ocpN/genX/polarity
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RO) Specifies the signal polarity, either 1 or 0.
|
||||
|
||||
What: /sys/class/timecard/ocpN/genX/running
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RO) Either 0 or 1, showing if the signal generator is running.
|
||||
|
||||
What: /sys/class/timecard/ocpN/genX/start
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RO) Shows the time in <sec>.<nsec> that the signal generator
|
||||
started running.
|
||||
|
||||
What: /sys/class/timecard/ocpN/genX/signal
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RW) Used to start the signal generator, and summarize
|
||||
the current status.
|
||||
|
||||
The signal generator may be started by writing the signal
|
||||
period, followed by the optional signal values. If the
|
||||
optional values are not provided, they default to the current
|
||||
settings, which may be obtained from the other sysfs nodes.
|
||||
|
||||
period [duty [phase [polarity]]]
|
||||
|
||||
echo 500000000 > signal # 1/2 second period
|
||||
echo 1000000 40 100 > signal
|
||||
echo 0 > signal # turn off generator
|
||||
|
||||
Period and phase are specified in nanoseconds. Duty cycle is
|
||||
a percentage from 1-99. Polarity is 1 or 0.
|
||||
|
||||
Reading this node will return:
|
||||
|
||||
period duty phase polarity start_time
|
||||
|
||||
What: /sys/class/timecard/ocpN/gnss_sync
|
||||
Date: September 2021
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
@@ -126,6 +230,16 @@ Description: (RW) These attributes specify the direction of the signal
|
||||
The 10Mhz reference clock input is currently only valid
|
||||
on SMA1 and may not be combined with other destination sinks.
|
||||
|
||||
What: /sys/class/timecard/ocpN/tod_correction
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RW) The incoming GNSS signal is in UTC time, and the NMEA
|
||||
format messages do not provide a TAI offset. This sets the
|
||||
correction value for the incoming time.
|
||||
|
||||
If UBX_LS is enabled, this should be 0, and the offset is
|
||||
taken from the UBX-NAV-TIMELS message.
|
||||
|
||||
What: /sys/class/timecard/ocpN/ts_window_adjust
|
||||
Date: September 2021
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
|
||||
@@ -26,7 +26,7 @@ SPHINX_CONF = conf.py
|
||||
PAPER =
|
||||
BUILDDIR = $(obj)/output
|
||||
PDFLATEX = xelatex
|
||||
LATEXOPTS = -interaction=batchmode
|
||||
LATEXOPTS = -interaction=batchmode -no-shell-escape
|
||||
|
||||
ifeq ($(KBUILD_VERBOSE),0)
|
||||
SPHINXOPTS += "-q"
|
||||
|
||||
@@ -278,20 +278,20 @@ appropriate parameters. In general this allows more efficient DMA
|
||||
on systems where System RAM exists above 4G _physical_ address.
|
||||
|
||||
Drivers for all PCI-X and PCIe compliant devices must call
|
||||
pci_set_dma_mask() as they are 64-bit DMA devices.
|
||||
set_dma_mask() as they are 64-bit DMA devices.
|
||||
|
||||
Similarly, drivers must also "register" this capability if the device
|
||||
can directly address "consistent memory" in System RAM above 4G physical
|
||||
address by calling pci_set_consistent_dma_mask().
|
||||
can directly address "coherent memory" in System RAM above 4G physical
|
||||
address by calling dma_set_coherent_mask().
|
||||
Again, this includes drivers for all PCI-X and PCIe compliant devices.
|
||||
Many 64-bit "PCI" devices (before PCI-X) and some PCI-X devices are
|
||||
64-bit DMA capable for payload ("streaming") data but not control
|
||||
("consistent") data.
|
||||
("coherent") data.
|
||||
|
||||
|
||||
Setup shared control data
|
||||
-------------------------
|
||||
Once the DMA masks are set, the driver can allocate "consistent" (a.k.a. shared)
|
||||
Once the DMA masks are set, the driver can allocate "coherent" (a.k.a. shared)
|
||||
memory. See Documentation/core-api/dma-api.rst for a full description of
|
||||
the DMA APIs. This section is just a reminder that it needs to be done
|
||||
before enabling DMA on the device.
|
||||
@@ -367,7 +367,7 @@ steps need to be performed:
|
||||
- Disable the device from generating IRQs
|
||||
- Release the IRQ (free_irq())
|
||||
- Stop all DMA activity
|
||||
- Release DMA buffers (both streaming and consistent)
|
||||
- Release DMA buffers (both streaming and coherent)
|
||||
- Unregister from other subsystems (e.g. scsi or netdev)
|
||||
- Disable device from responding to MMIO/IO Port addresses
|
||||
- Release MMIO/IO Port resource(s)
|
||||
@@ -420,7 +420,7 @@ Once DMA is stopped, clean up streaming DMA first.
|
||||
I.e. unmap data buffers and return buffers to "upstream"
|
||||
owners if there is one.
|
||||
|
||||
Then clean up "consistent" buffers which contain the control data.
|
||||
Then clean up "coherent" buffers which contain the control data.
|
||||
|
||||
See Documentation/core-api/dma-api.rst for details on unmapping interfaces.
|
||||
|
||||
|
||||
@@ -92,7 +92,8 @@ Triggers can be set on more than one psi metric and more than one trigger
|
||||
for the same psi metric can be specified. However for each trigger a separate
|
||||
file descriptor is required to be able to poll it separately from others,
|
||||
therefore for each trigger a separate open() syscall should be made even
|
||||
when opening the same psi interface file.
|
||||
when opening the same psi interface file. Write operations to a file descriptor
|
||||
with an already existing psi trigger will fail with EBUSY.
|
||||
|
||||
Monitors activate only when system enters stall state for the monitored
|
||||
psi metric and deactivates upon exit from the stall state. While system is
|
||||
|
||||
@@ -60,3 +60,31 @@ For example::
|
||||
|
||||
When a given field is not populated or its value provided by the platform
|
||||
firmware is invalid, the "not-defined" string is shown instead of the value.
|
||||
|
||||
ACPI Fan Fine Grain Control
|
||||
=============================
|
||||
|
||||
When _FIF object specifies support for fine grain control, then fan speed
|
||||
can be set from 0 to 100% with the recommended minimum "step size" via
|
||||
_FSL object. User can adjust fan speed using thermal sysfs cooling device.
|
||||
|
||||
Here use can look at fan performance states for a reference speed (speed_rpm)
|
||||
and set it by changing cooling device cur_state. If the fine grain control
|
||||
is supported then user can also adjust to some other speeds which are
|
||||
not defined in the performance states.
|
||||
|
||||
The support of fine grain control is presented via sysfs attribute
|
||||
"fine_grain_control". If fine grain control is present, this attribute
|
||||
will show "1" otherwise "0".
|
||||
|
||||
This sysfs attribute is presented in the same directory as performance states.
|
||||
|
||||
ACPI Fan Performance Feedback
|
||||
=============================
|
||||
|
||||
The optional _FST object provides status information for the fan device.
|
||||
This includes field to provide current fan speed in revolutions per minute
|
||||
at which the fan is rotating.
|
||||
|
||||
This speed is presented in the sysfs using the attribute "fan_speed_rpm",
|
||||
in the same directory as performance states.
|
||||
|
||||
@@ -315,8 +315,8 @@ To use the feature, admin should set up backing device via::
|
||||
|
||||
echo /dev/sda5 > /sys/block/zramX/backing_dev
|
||||
|
||||
before disksize setting. It supports only partition at this moment.
|
||||
If admin wants to use incompressible page writeback, they could do via::
|
||||
before disksize setting. It supports only partitions at this moment.
|
||||
If admin wants to use incompressible page writeback, they could do it via::
|
||||
|
||||
echo huge > /sys/block/zramX/writeback
|
||||
|
||||
@@ -341,9 +341,9 @@ Admin can request writeback of those idle pages at right timing via::
|
||||
|
||||
echo idle > /sys/block/zramX/writeback
|
||||
|
||||
With the command, zram writeback idle pages from memory to the storage.
|
||||
With the command, zram will writeback idle pages from memory to the storage.
|
||||
|
||||
If admin want to write a specific page in zram device to backing device,
|
||||
If an admin wants to write a specific page in zram device to the backing device,
|
||||
they could write a page index into the interface.
|
||||
|
||||
echo "page_index=1251" > /sys/block/zramX/writeback
|
||||
@@ -354,7 +354,7 @@ to guarantee storage health for entire product life.
|
||||
|
||||
To overcome the concern, zram supports "writeback_limit" feature.
|
||||
The "writeback_limit_enable"'s default value is 0 so that it doesn't limit
|
||||
any writeback. IOW, if admin wants to apply writeback budget, he should
|
||||
any writeback. IOW, if admin wants to apply writeback budget, they should
|
||||
enable writeback_limit_enable via::
|
||||
|
||||
$ echo 1 > /sys/block/zramX/writeback_limit_enable
|
||||
@@ -365,7 +365,7 @@ until admin sets the budget via /sys/block/zramX/writeback_limit.
|
||||
(If admin doesn't enable writeback_limit_enable, writeback_limit's value
|
||||
assigned via /sys/block/zramX/writeback_limit is meaningless.)
|
||||
|
||||
If admin want to limit writeback as per-day 400M, he could do it
|
||||
If admin wants to limit writeback as per-day 400M, they could do it
|
||||
like below::
|
||||
|
||||
$ MB_SHIFT=20
|
||||
@@ -375,16 +375,16 @@ like below::
|
||||
$ echo 1 > /sys/block/zram0/writeback_limit_enable
|
||||
|
||||
If admins want to allow further write again once the budget is exhausted,
|
||||
he could do it like below::
|
||||
they could do it like below::
|
||||
|
||||
$ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
|
||||
/sys/block/zram0/writeback_limit
|
||||
|
||||
If admin wants to see remaining writeback budget since last set::
|
||||
If an admin wants to see the remaining writeback budget since last set::
|
||||
|
||||
$ cat /sys/block/zramX/writeback_limit
|
||||
|
||||
If admin want to disable writeback limit, he could do::
|
||||
If an admin wants to disable writeback limit, they could do::
|
||||
|
||||
$ echo 0 > /sys/block/zramX/writeback_limit_enable
|
||||
|
||||
@@ -393,7 +393,7 @@ system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of
|
||||
writeback happened until you reset the zram to allocate extra writeback
|
||||
budget in next setting is user's job.
|
||||
|
||||
If admin wants to measure writeback count in a certain period, he could
|
||||
If admin wants to measure writeback count in a certain period, they could
|
||||
know it via /sys/block/zram0/bd_stat's 3rd column.
|
||||
|
||||
memory tracking
|
||||
|
||||
@@ -64,6 +64,7 @@ Brief summary of control files.
|
||||
threads
|
||||
cgroup.procs show list of processes
|
||||
cgroup.event_control an interface for event_fd()
|
||||
This knob is not available on CONFIG_PREEMPT_RT systems.
|
||||
memory.usage_in_bytes show current usage for memory
|
||||
(See 5.5 for details)
|
||||
memory.memsw.usage_in_bytes show current usage for memory+Swap
|
||||
@@ -75,6 +76,7 @@ Brief summary of control files.
|
||||
memory.max_usage_in_bytes show max memory usage recorded
|
||||
memory.memsw.max_usage_in_bytes show max memory+Swap usage recorded
|
||||
memory.soft_limit_in_bytes set/show soft limit of memory usage
|
||||
This knob is not available on CONFIG_PREEMPT_RT systems.
|
||||
memory.stat show various statistics
|
||||
memory.use_hierarchy set/show hierarchical account enabled
|
||||
This knob is deprecated and shouldn't be
|
||||
|
||||
@@ -1301,6 +1301,11 @@ PAGE_SIZE multiple when read back.
|
||||
Amount of memory used to cache filesystem data,
|
||||
including tmpfs and shared memory.
|
||||
|
||||
kernel (npn)
|
||||
Amount of total kernel memory, including
|
||||
(kernel_stack, pagetables, percpu, vmalloc, slab) in
|
||||
addition to other kernel memory use cases.
|
||||
|
||||
kernel_stack
|
||||
Amount of memory allocated to kernel stacks.
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ gpio
|
||||
gpio-aggregator
|
||||
sysfs
|
||||
gpio-mockup
|
||||
gpio-sim
|
||||
|
||||
.. only:: subproject and html
|
||||
|
||||
|
||||
@@ -60,8 +60,8 @@ privileged data touched during the speculative execution.
|
||||
Spectre variant 1 attacks take advantage of speculative execution of
|
||||
conditional branches, while Spectre variant 2 attacks use speculative
|
||||
execution of indirect branches to leak privileged memory.
|
||||
See :ref:`[1] <spec_ref1>` :ref:`[5] <spec_ref5>` :ref:`[7] <spec_ref7>`
|
||||
:ref:`[10] <spec_ref10>` :ref:`[11] <spec_ref11>`.
|
||||
See :ref:`[1] <spec_ref1>` :ref:`[5] <spec_ref5>` :ref:`[6] <spec_ref6>`
|
||||
:ref:`[7] <spec_ref7>` :ref:`[10] <spec_ref10>` :ref:`[11] <spec_ref11>`.
|
||||
|
||||
Spectre variant 1 (Bounds Check Bypass)
|
||||
---------------------------------------
|
||||
@@ -131,6 +131,19 @@ steer its indirect branch speculations to gadget code, and measure the
|
||||
speculative execution's side effects left in level 1 cache to infer the
|
||||
victim's data.
|
||||
|
||||
Yet another variant 2 attack vector is for the attacker to poison the
|
||||
Branch History Buffer (BHB) to speculatively steer an indirect branch
|
||||
to a specific Branch Target Buffer (BTB) entry, even if the entry isn't
|
||||
associated with the source address of the indirect branch. Specifically,
|
||||
the BHB might be shared across privilege levels even in the presence of
|
||||
Enhanced IBRS.
|
||||
|
||||
Currently the only known real-world BHB attack vector is via
|
||||
unprivileged eBPF. Therefore, it's highly recommended to not enable
|
||||
unprivileged eBPF, especially when eIBRS is used (without retpolines).
|
||||
For a full mitigation against BHB attacks, it's recommended to use
|
||||
retpolines (or eIBRS combined with retpolines).
|
||||
|
||||
Attack scenarios
|
||||
----------------
|
||||
|
||||
@@ -364,13 +377,15 @@ The possible values in this file are:
|
||||
|
||||
- Kernel status:
|
||||
|
||||
==================================== =================================
|
||||
'Not affected' The processor is not vulnerable
|
||||
'Vulnerable' Vulnerable, no mitigation
|
||||
'Mitigation: Full generic retpoline' Software-focused mitigation
|
||||
'Mitigation: Full AMD retpoline' AMD-specific software mitigation
|
||||
'Mitigation: Enhanced IBRS' Hardware-focused mitigation
|
||||
==================================== =================================
|
||||
======================================== =================================
|
||||
'Not affected' The processor is not vulnerable
|
||||
'Mitigation: None' Vulnerable, no mitigation
|
||||
'Mitigation: Retpolines' Use Retpoline thunks
|
||||
'Mitigation: LFENCE' Use LFENCE instructions
|
||||
'Mitigation: Enhanced IBRS' Hardware-focused mitigation
|
||||
'Mitigation: Enhanced IBRS + Retpolines' Hardware-focused + Retpolines
|
||||
'Mitigation: Enhanced IBRS + LFENCE' Hardware-focused + LFENCE
|
||||
======================================== =================================
|
||||
|
||||
- Firmware status: Show if Indirect Branch Restricted Speculation (IBRS) is
|
||||
used to protect against Spectre variant 2 attacks when calling firmware (x86 only).
|
||||
@@ -583,12 +598,13 @@ kernel command line.
|
||||
|
||||
Specific mitigations can also be selected manually:
|
||||
|
||||
retpoline
|
||||
replace indirect branches
|
||||
retpoline,generic
|
||||
google's original retpoline
|
||||
retpoline,amd
|
||||
AMD-specific minimal thunk
|
||||
retpoline auto pick between generic,lfence
|
||||
retpoline,generic Retpolines
|
||||
retpoline,lfence LFENCE; indirect branch
|
||||
retpoline,amd alias for retpoline,lfence
|
||||
eibrs enhanced IBRS
|
||||
eibrs,retpoline enhanced IBRS + Retpolines
|
||||
eibrs,lfence enhanced IBRS + LFENCE
|
||||
|
||||
Not specifying this option is equivalent to
|
||||
spectre_v2=auto.
|
||||
@@ -599,7 +615,7 @@ kernel command line.
|
||||
spectre_v2=off. Spectre variant 1 mitigations
|
||||
cannot be disabled.
|
||||
|
||||
For spectre_v2_user see :doc:`/admin-guide/kernel-parameters`.
|
||||
For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt
|
||||
|
||||
Mitigation selection guide
|
||||
--------------------------
|
||||
@@ -681,7 +697,7 @@ AMD white papers:
|
||||
|
||||
.. _spec_ref6:
|
||||
|
||||
[6] `Software techniques for managing speculation on AMD processors <https://developer.amd.com/wp-content/resources/90343-B_SoftwareTechniquesforManagingSpeculation_WP_7-18Update_FNL.pdf>`_.
|
||||
[6] `Software techniques for managing speculation on AMD processors <https://developer.amd.com/wp-content/resources/Managing-Speculation-on-AMD-Processors.pdf>`_.
|
||||
|
||||
ARM white papers:
|
||||
|
||||
|
||||
@@ -35,6 +35,7 @@ problems and bugs in particular.
|
||||
:maxdepth: 1
|
||||
|
||||
reporting-issues
|
||||
reporting-regressions
|
||||
security-bugs
|
||||
bug-hunting
|
||||
bug-bisect
|
||||
|
||||
@@ -76,7 +76,7 @@ Field 3 -- # of sectors read (unsigned long)
|
||||
|
||||
Field 4 -- # of milliseconds spent reading (unsigned int)
|
||||
This is the total number of milliseconds spent by all reads (as
|
||||
measured from __make_request() to end_that_request_last()).
|
||||
measured from blk_mq_alloc_request() to __blk_mq_end_request()).
|
||||
|
||||
Field 5 -- # of writes completed (unsigned long)
|
||||
This is the total number of writes completed successfully.
|
||||
@@ -89,7 +89,7 @@ Field 7 -- # of sectors written (unsigned long)
|
||||
|
||||
Field 8 -- # of milliseconds spent writing (unsigned int)
|
||||
This is the total number of milliseconds spent by all writes (as
|
||||
measured from __make_request() to end_that_request_last()).
|
||||
measured from blk_mq_alloc_request() to __blk_mq_end_request()).
|
||||
|
||||
Field 9 -- # of I/Os currently in progress (unsigned int)
|
||||
The only field that should go to zero. Incremented as requests are
|
||||
@@ -120,7 +120,7 @@ Field 14 -- # of sectors discarded (unsigned long)
|
||||
|
||||
Field 15 -- # of milliseconds spent discarding (unsigned int)
|
||||
This is the total number of milliseconds spent by all discards (as
|
||||
measured from __make_request() to end_that_request_last()).
|
||||
measured from blk_mq_alloc_request() to __blk_mq_end_request()).
|
||||
|
||||
Field 16 -- # of flush requests completed
|
||||
This is the total number of flush requests completed successfully.
|
||||
|
||||
@@ -146,9 +146,9 @@ System kernel config options
|
||||
CONFIG_SYSFS=y
|
||||
|
||||
Note that "sysfs file system support" might not appear in the "Pseudo
|
||||
filesystems" menu if "Configure standard kernel features (for small
|
||||
systems)" is not enabled in "General Setup." In this case, check the
|
||||
.config file itself to ensure that sysfs is turned on, as follows::
|
||||
filesystems" menu if "Configure standard kernel features (expert users)"
|
||||
is not enabled in "General Setup." In this case, check the .config file
|
||||
itself to ensure that sysfs is turned on, as follows::
|
||||
|
||||
grep 'CONFIG_SYSFS' .config
|
||||
|
||||
@@ -533,6 +533,10 @@ the following command::
|
||||
|
||||
cp /proc/vmcore <dump-file>
|
||||
|
||||
or use scp to write out the dump file between hosts on a network, e.g::
|
||||
|
||||
scp /proc/vmcore remote_username@remote_ip:<dump-file>
|
||||
|
||||
You can also use makedumpfile utility to write out the dump file
|
||||
with specified options to filter out unwanted contents, e.g::
|
||||
|
||||
|
||||
@@ -494,6 +494,14 @@ architecture which is used to lookup the page-tables for the Virtual
|
||||
addresses in the higher VA range (refer to ARMv8 ARM document for
|
||||
more details).
|
||||
|
||||
MODULES_VADDR|MODULES_END|VMALLOC_START|VMALLOC_END|VMEMMAP_START|VMEMMAP_END
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
Used to get the correct ranges:
|
||||
MODULES_VADDR ~ MODULES_END-1 : Kernel module space.
|
||||
VMALLOC_START ~ VMALLOC_END-1 : vmalloc() / ioremap() space.
|
||||
VMEMMAP_START ~ VMEMMAP_END-1 : vmemmap region, used for struct page array.
|
||||
|
||||
arm
|
||||
===
|
||||
|
||||
|
||||
@@ -724,6 +724,12 @@
|
||||
hvc<n> Use the hypervisor console device <n>. This is for
|
||||
both Xen and PowerPC hypervisors.
|
||||
|
||||
{ null | "" }
|
||||
Use to disable console output, i.e., to have kernel
|
||||
console messages discarded.
|
||||
This must be the only console= parameter used on the
|
||||
kernel command line.
|
||||
|
||||
If the device connected to the port is not a TTY but a braille
|
||||
device, prepend "brl," before the device type, for instance
|
||||
console=brl,ttyS0
|
||||
@@ -944,6 +950,30 @@
|
||||
dump out devices still on the deferred probe list after
|
||||
retrying.
|
||||
|
||||
dell_smm_hwmon.ignore_dmi=
|
||||
[HW] Continue probing hardware even if DMI data
|
||||
indicates that the driver is running on unsupported
|
||||
hardware.
|
||||
|
||||
dell_smm_hwmon.force=
|
||||
[HW] Activate driver even if SMM BIOS signature does
|
||||
not match list of supported models and enable otherwise
|
||||
blacklisted features.
|
||||
|
||||
dell_smm_hwmon.power_status=
|
||||
[HW] Report power status in /proc/i8k
|
||||
(disabled by default).
|
||||
|
||||
dell_smm_hwmon.restricted=
|
||||
[HW] Allow controlling fans only if SYS_ADMIN
|
||||
capability is set.
|
||||
|
||||
dell_smm_hwmon.fan_mult=
|
||||
[HW] Factor to multiply fan speed with.
|
||||
|
||||
dell_smm_hwmon.fan_max=
|
||||
[HW] Maximum configurable fan speed.
|
||||
|
||||
dfltcc= [HW,S390]
|
||||
Format: { on | off | def_only | inf_only | always }
|
||||
on: s390 zlib hardware support for compression on
|
||||
@@ -1435,6 +1465,14 @@
|
||||
as early as possible in order to facilitate early
|
||||
boot debugging.
|
||||
|
||||
ftrace_boot_snapshot
|
||||
[FTRACE] On boot up, a snapshot will be taken of the
|
||||
ftrace ring buffer that can be read at:
|
||||
/sys/kernel/tracing/snapshot.
|
||||
This is useful if you need tracing information from kernel
|
||||
boot up that is likely to be overridden by user space
|
||||
start up functionality.
|
||||
|
||||
ftrace_dump_on_oops[=orig_cpu]
|
||||
[FTRACE] will dump the trace buffers on oops.
|
||||
If no parameter is passed, ftrace will dump
|
||||
@@ -1625,7 +1663,7 @@
|
||||
[KNL] Reguires CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
|
||||
enabled.
|
||||
Allows heavy hugetlb users to free up some more
|
||||
memory (6 * PAGE_SIZE for each 2MB hugetlb page).
|
||||
memory (7 * PAGE_SIZE for each 2MB hugetlb page).
|
||||
Format: { on | off (default) }
|
||||
|
||||
on: enable the feature
|
||||
@@ -1703,17 +1741,6 @@
|
||||
|
||||
i810= [HW,DRM]
|
||||
|
||||
i8k.ignore_dmi [HW] Continue probing hardware even if DMI data
|
||||
indicates that the driver is running on unsupported
|
||||
hardware.
|
||||
i8k.force [HW] Activate i8k driver even if SMM BIOS signature
|
||||
does not match list of supported models.
|
||||
i8k.power_status
|
||||
[HW] Report power status in /proc/i8k
|
||||
(disabled by default)
|
||||
i8k.restricted [HW] Allow controlling fans only if SYS_ADMIN
|
||||
capability is set.
|
||||
|
||||
i915.invert_brightness=
|
||||
[DRM] Invert the sense of the variable that is used to
|
||||
set the brightness of the panel backlight. Normally a
|
||||
@@ -2339,13 +2366,35 @@
|
||||
kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
|
||||
Default is 0 (don't ignore, but inject #GP)
|
||||
|
||||
kvm.eager_page_split=
|
||||
[KVM,X86] Controls whether or not KVM will try to
|
||||
proactively split all huge pages during dirty logging.
|
||||
Eager page splitting reduces interruptions to vCPU
|
||||
execution by eliminating the write-protection faults
|
||||
and MMU lock contention that would otherwise be
|
||||
required to split huge pages lazily.
|
||||
|
||||
VM workloads that rarely perform writes or that write
|
||||
only to a small region of VM memory may benefit from
|
||||
disabling eager page splitting to allow huge pages to
|
||||
still be used for reads.
|
||||
|
||||
The behavior of eager page splitting depends on whether
|
||||
KVM_DIRTY_LOG_INITIALLY_SET is enabled or disabled. If
|
||||
disabled, all huge pages in a memslot will be eagerly
|
||||
split when dirty logging is enabled on that memslot. If
|
||||
enabled, eager page splitting will be performed during
|
||||
the KVM_CLEAR_DIRTY ioctl, and only for the pages being
|
||||
cleared.
|
||||
|
||||
Eager page splitting currently only supports splitting
|
||||
huge pages mapped by the TDP MMU.
|
||||
|
||||
Default is Y (on).
|
||||
|
||||
kvm.enable_vmware_backdoor=[KVM] Support VMware backdoor PV interface.
|
||||
Default is false (don't support).
|
||||
|
||||
kvm.mmu_audit= [KVM] This is a R/W parameter which allows audit
|
||||
KVM MMU at runtime.
|
||||
Default is 0 (off)
|
||||
|
||||
kvm.nx_huge_pages=
|
||||
[KVM] Controls the software workaround for the
|
||||
X86_BUG_ITLB_MULTIHIT bug.
|
||||
@@ -2827,6 +2876,9 @@
|
||||
|
||||
For details see: Documentation/admin-guide/hw-vuln/mds.rst
|
||||
|
||||
mem=nn[KMG] [HEXAGON] Set the memory size.
|
||||
Must be specified, otherwise memory size will be 0.
|
||||
|
||||
mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory
|
||||
Amount of memory to be used in cases as follows:
|
||||
|
||||
@@ -2834,6 +2886,13 @@
|
||||
2 when the kernel is not able to see the whole system memory;
|
||||
3 memory that lies after 'mem=' boundary is excluded from
|
||||
the hypervisor, then assigned to KVM guests.
|
||||
4 to limit the memory available for kdump kernel.
|
||||
|
||||
[ARC,MICROBLAZE] - the limit applies only to low memory,
|
||||
high memory is not affected.
|
||||
|
||||
[ARM64] - only limits memory covered by the linear
|
||||
mapping. The NOMAP regions are not affected.
|
||||
|
||||
[X86] Work as limiting max address. Use together
|
||||
with memmap= to avoid physical address space collisions.
|
||||
@@ -2844,6 +2903,14 @@
|
||||
in above case 3, memory may need be hot added after boot
|
||||
if system memory of hypervisor is not sufficient.
|
||||
|
||||
mem=nn[KMG]@ss[KMG]
|
||||
[ARM,MIPS] - override the memory layout reported by
|
||||
firmware.
|
||||
Define a memory region of size nn[KMG] starting at
|
||||
ss[KMG].
|
||||
Multiple different regions can be specified with
|
||||
multiple mem= parameters on the command line.
|
||||
|
||||
mem=nopentium [BUGS=X86-32] Disable usage of 4MB pages for kernel
|
||||
memory.
|
||||
|
||||
@@ -3485,8 +3552,7 @@
|
||||
difficult since unequal pointers can no longer be
|
||||
compared. However, if this command-line option is
|
||||
specified, then all normal pointers will have their true
|
||||
value printed. Pointers printed via %pK may still be
|
||||
hashed. This option should only be specified when
|
||||
value printed. This option should only be specified when
|
||||
debugging the kernel. Please do not use on production
|
||||
kernels.
|
||||
|
||||
@@ -3726,6 +3792,11 @@
|
||||
bit 3: print locks info if CONFIG_LOCKDEP is on
|
||||
bit 4: print ftrace buffer
|
||||
bit 5: print all printk messages in buffer
|
||||
bit 6: print all CPUs backtrace (if available in the arch)
|
||||
*Be aware* that this option may print a _lot_ of lines,
|
||||
so there are risks of losing older messages in the log.
|
||||
Use this option carefully, maybe worth to setup a
|
||||
bigger log buffer with "log_buf_len" along with this.
|
||||
|
||||
panic_on_taint= Bitmask for conditionally calling panic() in add_taint()
|
||||
Format: <hex>[,nousertaint]
|
||||
@@ -4356,6 +4427,12 @@
|
||||
fully seed the kernel's CRNG. Default is controlled
|
||||
by CONFIG_RANDOM_TRUST_CPU.
|
||||
|
||||
random.trust_bootloader={on,off}
|
||||
[KNL] Enable or disable trusting the use of a
|
||||
seed passed by the bootloader (if available) to
|
||||
fully seed the kernel's CRNG. Default is controlled
|
||||
by CONFIG_RANDOM_TRUST_BOOTLOADER.
|
||||
|
||||
randomize_kstack_offset=
|
||||
[KNL] Enable or disable kernel stack offset
|
||||
randomization, which provides roughly 5 bits of
|
||||
@@ -4504,6 +4581,8 @@
|
||||
(the least-favored priority). Otherwise, when
|
||||
RCU_BOOST is not set, valid values are 0-99 and
|
||||
the default is zero (non-realtime operation).
|
||||
When RCU_NOCB_CPU is set, also adjust the
|
||||
priority of NOCB callback kthreads.
|
||||
|
||||
rcutree.rcu_nocb_gp_stride= [KNL]
|
||||
Set the number of NOCB callback kthreads in
|
||||
@@ -5361,8 +5440,12 @@
|
||||
Specific mitigations can also be selected manually:
|
||||
|
||||
retpoline - replace indirect branches
|
||||
retpoline,generic - google's original retpoline
|
||||
retpoline,amd - AMD-specific minimal thunk
|
||||
retpoline,generic - Retpolines
|
||||
retpoline,lfence - LFENCE; indirect branch
|
||||
retpoline,amd - alias for retpoline,lfence
|
||||
eibrs - enhanced IBRS
|
||||
eibrs,retpoline - enhanced IBRS + Retpolines
|
||||
eibrs,lfence - enhanced IBRS + LFENCE
|
||||
|
||||
Not specifying this option is equivalent to
|
||||
spectre_v2=auto.
|
||||
|
||||
@@ -38,7 +38,7 @@ FN lock.
|
||||
Battery care limit
|
||||
------------------
|
||||
|
||||
Writing 80/100 to /sys/devices/platform/lg-laptop/battery_care_limit
|
||||
Writing 80/100 to /sys/class/power_supply/CMB0/charge_control_end_threshold
|
||||
sets the maximum capacity to charge the battery. Limiting the charge
|
||||
reduces battery capacity loss over time.
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ data from LCD controller (FIMD) through the SoC internal writeback data
|
||||
path. There are multiple FIMC instances in the SoCs (up to 4), having
|
||||
slightly different capabilities, like pixel alignment constraints, rotator
|
||||
availability, LCD writeback support, etc. The driver is located at
|
||||
drivers/media/platform/exynos4-is directory.
|
||||
drivers/media/platform/samsung/exynos4-is directory.
|
||||
|
||||
Supported SoCs
|
||||
--------------
|
||||
|
||||
@@ -284,7 +284,7 @@ tda9887 TDA 9885/6/7 analog IF demodulator
|
||||
tea5761 TEA 5761 radio tuner
|
||||
tea5767 TEA 5767 radio tuner
|
||||
tua9001 Infineon TUA9001 silicon tuner
|
||||
tuner-xc2028 XCeive xc2028/xc3028 tuners
|
||||
xc2028 XCeive xc2028/xc3028 tuners
|
||||
xc4000 Xceive XC4000 silicon tuner
|
||||
xc5000 Xceive XC5000 silicon tuner
|
||||
============ ==================================================
|
||||
|
||||
@@ -33,7 +33,7 @@ reference manual [#f1]_.
|
||||
Entities
|
||||
--------
|
||||
|
||||
imx7-mipi-csi2
|
||||
imx-mipi-csi2
|
||||
--------------
|
||||
|
||||
This is the MIPI CSI-2 receiver entity. It has one sink pad to receive the pixel
|
||||
|
||||
@@ -17,7 +17,7 @@ Introduction
|
||||
------------
|
||||
|
||||
This file documents the Texas Instruments OMAP 3 Image Signal Processor (ISP)
|
||||
driver located under drivers/media/platform/omap3isp. The original driver was
|
||||
driver located under drivers/media/platform/ti/omap3isp. The original driver was
|
||||
written by Texas Instruments but since that it has been rewritten (twice) at
|
||||
Nokia.
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ As of Revision AB, the ISS is described in detail in section 8.
|
||||
This driver is supporting **only** the CSI2-A/B interfaces for now.
|
||||
|
||||
It makes use of the Media Controller framework [#f2]_, and inherited most of the
|
||||
code from OMAP3 ISP driver (found under drivers/media/platform/omap3isp/\*),
|
||||
code from OMAP3 ISP driver (found under drivers/media/platform/ti/omap3isp/\*),
|
||||
except that it doesn't need an IOMMU now for ISS buffers memory mapping.
|
||||
|
||||
Supports usage of MMAP buffers only (for now).
|
||||
|
||||
@@ -76,3 +76,16 @@ vimc-capture:
|
||||
|
||||
* 1 Pad sink
|
||||
* 1 Pad source
|
||||
|
||||
Module options
|
||||
--------------
|
||||
|
||||
Vimc has a module parameter to configure the driver.
|
||||
|
||||
* ``allocator=<unsigned int>``
|
||||
|
||||
memory allocator selection, default is 0. It specifies the way buffers
|
||||
will be allocated.
|
||||
|
||||
- 0: vmalloc
|
||||
- 1: dma-contig
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
Detailed Usages
|
||||
===============
|
||||
|
||||
DAMON provides below three interfaces for different users.
|
||||
DAMON provides below interfaces for different users.
|
||||
|
||||
- *DAMON user space tool.*
|
||||
`This <https://github.com/awslabs/damo>`_ is for privileged people such as
|
||||
@@ -14,17 +14,21 @@ DAMON provides below three interfaces for different users.
|
||||
virtual and physical address spaces monitoring. For more detail, please
|
||||
refer to its `usage document
|
||||
<https://github.com/awslabs/damo/blob/next/USAGE.md>`_.
|
||||
- *debugfs interface.*
|
||||
:ref:`This <debugfs_interface>` is for privileged user space programmers who
|
||||
- *sysfs interface.*
|
||||
:ref:`This <sysfs_interface>` is for privileged user space programmers who
|
||||
want more optimized use of DAMON. Using this, users can use DAMON’s major
|
||||
features by reading from and writing to special debugfs files. Therefore,
|
||||
you can write and use your personalized DAMON debugfs wrapper programs that
|
||||
reads/writes the debugfs files instead of you. The `DAMON user space tool
|
||||
features by reading from and writing to special sysfs files. Therefore,
|
||||
you can write and use your personalized DAMON sysfs wrapper programs that
|
||||
reads/writes the sysfs files instead of you. The `DAMON user space tool
|
||||
<https://github.com/awslabs/damo>`_ is one example of such programs. It
|
||||
supports both virtual and physical address spaces monitoring. Note that this
|
||||
interface provides only simple :ref:`statistics <damos_stats>` for the
|
||||
monitoring results. For detailed monitoring results, DAMON provides a
|
||||
:ref:`tracepoint <tracepoint>`.
|
||||
- *debugfs interface.*
|
||||
:ref:`This <debugfs_interface>` is almost identical to :ref:`sysfs interface
|
||||
<sysfs_interface>`. This will be removed after next LTS kernel is released,
|
||||
so users should move to the :ref:`sysfs interface <sysfs_interface>`.
|
||||
- *Kernel Space Programming Interface.*
|
||||
:doc:`This </vm/damon/api>` is for kernel space programmers. Using this,
|
||||
users can utilize every feature of DAMON most flexibly and efficiently by
|
||||
@@ -32,6 +36,340 @@ DAMON provides below three interfaces for different users.
|
||||
DAMON for various address spaces. For detail, please refer to the interface
|
||||
:doc:`document </vm/damon/api>`.
|
||||
|
||||
.. _sysfs_interface:
|
||||
|
||||
sysfs Interface
|
||||
===============
|
||||
|
||||
DAMON sysfs interface is built when ``CONFIG_DAMON_SYSFS`` is defined. It
|
||||
creates multiple directories and files under its sysfs directory,
|
||||
``<sysfs>/kernel/mm/damon/``. You can control DAMON by writing to and reading
|
||||
from the files under the directory.
|
||||
|
||||
For a short example, users can monitor the virtual address space of a given
|
||||
workload as below. ::
|
||||
|
||||
# cd /sys/kernel/mm/damon/admin/
|
||||
# echo 1 > kdamonds/nr && echo 1 > kdamonds/0/contexts/nr
|
||||
# echo vaddr > kdamonds/0/contexts/0/operations
|
||||
# echo 1 > kdamonds/0/contexts/0/targets/nr
|
||||
# echo $(pidof <workload>) > kdamonds/0/contexts/0/targets/0/pid
|
||||
# echo on > kdamonds/0/state
|
||||
|
||||
Files Hierarchy
|
||||
---------------
|
||||
|
||||
The files hierarchy of DAMON sysfs interface is shown below. In the below
|
||||
figure, parents-children relations are represented with indentations, each
|
||||
directory is having ``/`` suffix, and files in each directory are separated by
|
||||
comma (","). ::
|
||||
|
||||
/sys/kernel/mm/damon/admin
|
||||
│ kdamonds/nr_kdamonds
|
||||
│ │ 0/state,pid
|
||||
│ │ │ contexts/nr_contexts
|
||||
│ │ │ │ 0/operations
|
||||
│ │ │ │ │ monitoring_attrs/
|
||||
│ │ │ │ │ │ intervals/sample_us,aggr_us,update_us
|
||||
│ │ │ │ │ │ nr_regions/min,max
|
||||
│ │ │ │ │ targets/nr_targets
|
||||
│ │ │ │ │ │ 0/pid_target
|
||||
│ │ │ │ │ │ │ regions/nr_regions
|
||||
│ │ │ │ │ │ │ │ 0/start,end
|
||||
│ │ │ │ │ │ │ │ ...
|
||||
│ │ │ │ │ │ ...
|
||||
│ │ │ │ │ schemes/nr_schemes
|
||||
│ │ │ │ │ │ 0/action
|
||||
│ │ │ │ │ │ │ access_pattern/
|
||||
│ │ │ │ │ │ │ │ sz/min,max
|
||||
│ │ │ │ │ │ │ │ nr_accesses/min,max
|
||||
│ │ │ │ │ │ │ │ age/min,max
|
||||
│ │ │ │ │ │ │ quotas/ms,bytes,reset_interval_ms
|
||||
│ │ │ │ │ │ │ │ weights/sz_permil,nr_accesses_permil,age_permil
|
||||
│ │ │ │ │ │ │ watermarks/metric,interval_us,high,mid,low
|
||||
│ │ │ │ │ │ │ stats/nr_tried,sz_tried,nr_applied,sz_applied,qt_exceeds
|
||||
│ │ │ │ │ │ ...
|
||||
│ │ │ │ ...
|
||||
│ │ ...
|
||||
|
||||
Root
|
||||
----
|
||||
|
||||
The root of the DAMON sysfs interface is ``<sysfs>/kernel/mm/damon/``, and it
|
||||
has one directory named ``admin``. The directory contains the files for
|
||||
privileged user space programs' control of DAMON. User space tools or deamons
|
||||
having the root permission could use this directory.
|
||||
|
||||
kdamonds/
|
||||
---------
|
||||
|
||||
The monitoring-related information including request specifications and results
|
||||
are called DAMON context. DAMON executes each context with a kernel thread
|
||||
called kdamond, and multiple kdamonds could run in parallel.
|
||||
|
||||
Under the ``admin`` directory, one directory, ``kdamonds``, which has files for
|
||||
controlling the kdamonds exist. In the beginning, this directory has only one
|
||||
file, ``nr_kdamonds``. Writing a number (``N``) to the file creates the number
|
||||
of child directories named ``0`` to ``N-1``. Each directory represents each
|
||||
kdamond.
|
||||
|
||||
kdamonds/<N>/
|
||||
-------------
|
||||
|
||||
In each kdamond directory, two files (``state`` and ``pid``) and one directory
|
||||
(``contexts``) exist.
|
||||
|
||||
Reading ``state`` returns ``on`` if the kdamond is currently running, or
|
||||
``off`` if it is not running. Writing ``on`` or ``off`` makes the kdamond be
|
||||
in the state. Writing ``update_schemes_stats`` to ``state`` file updates the
|
||||
contents of stats files for each DAMON-based operation scheme of the kdamond.
|
||||
For details of the stats, please refer to :ref:`stats section
|
||||
<sysfs_schemes_stats>`.
|
||||
|
||||
If the state is ``on``, reading ``pid`` shows the pid of the kdamond thread.
|
||||
|
||||
``contexts`` directory contains files for controlling the monitoring contexts
|
||||
that this kdamond will execute.
|
||||
|
||||
kdamonds/<N>/contexts/
|
||||
----------------------
|
||||
|
||||
In the beginning, this directory has only one file, ``nr_contexts``. Writing a
|
||||
number (``N``) to the file creates the number of child directories named as
|
||||
``0`` to ``N-1``. Each directory represents each monitoring context. At the
|
||||
moment, only one context per kdamond is supported, so only ``0`` or ``1`` can
|
||||
be written to the file.
|
||||
|
||||
contexts/<N>/
|
||||
-------------
|
||||
|
||||
In each context directory, one file (``operations``) and three directories
|
||||
(``monitoring_attrs``, ``targets``, and ``schemes``) exist.
|
||||
|
||||
DAMON supports multiple types of monitoring operations, including those for
|
||||
virtual address space and the physical address space. You can set and get what
|
||||
type of monitoring operations DAMON will use for the context by writing one of
|
||||
below keywords to, and reading from the file.
|
||||
|
||||
- vaddr: Monitor virtual address spaces of specific processes
|
||||
- paddr: Monitor the physical address space of the system
|
||||
|
||||
contexts/<N>/monitoring_attrs/
|
||||
------------------------------
|
||||
|
||||
Files for specifying attributes of the monitoring including required quality
|
||||
and efficiency of the monitoring are in ``monitoring_attrs`` directory.
|
||||
Specifically, two directories, ``intervals`` and ``nr_regions`` exist in this
|
||||
directory.
|
||||
|
||||
Under ``intervals`` directory, three files for DAMON's sampling interval
|
||||
(``sample_us``), aggregation interval (``aggr_us``), and update interval
|
||||
(``update_us``) exist. You can set and get the values in micro-seconds by
|
||||
writing to and reading from the files.
|
||||
|
||||
Under ``nr_regions`` directory, two files for the lower-bound and upper-bound
|
||||
of DAMON's monitoring regions (``min`` and ``max``, respectively), which
|
||||
controls the monitoring overhead, exist. You can set and get the values by
|
||||
writing to and rading from the files.
|
||||
|
||||
For more details about the intervals and monitoring regions range, please refer
|
||||
to the Design document (:doc:`/vm/damon/design`).
|
||||
|
||||
contexts/<N>/targets/
|
||||
---------------------
|
||||
|
||||
In the beginning, this directory has only one file, ``nr_targets``. Writing a
|
||||
number (``N``) to the file creates the number of child directories named ``0``
|
||||
to ``N-1``. Each directory represents each monitoring target.
|
||||
|
||||
targets/<N>/
|
||||
------------
|
||||
|
||||
In each target directory, one file (``pid_target``) and one directory
|
||||
(``regions``) exist.
|
||||
|
||||
If you wrote ``vaddr`` to the ``contexts/<N>/operations``, each target should
|
||||
be a process. You can specify the process to DAMON by writing the pid of the
|
||||
process to the ``pid_target`` file.
|
||||
|
||||
targets/<N>/regions
|
||||
-------------------
|
||||
|
||||
When ``vaddr`` monitoring operations set is being used (``vaddr`` is written to
|
||||
the ``contexts/<N>/operations`` file), DAMON automatically sets and updates the
|
||||
monitoring target regions so that entire memory mappings of target processes
|
||||
can be covered. However, users could want to set the initial monitoring region
|
||||
to specific address ranges.
|
||||
|
||||
In contrast, DAMON do not automatically sets and updates the monitoring target
|
||||
regions when ``paddr`` monitoring operations set is being used (``paddr`` is
|
||||
written to the ``contexts/<N>/operations``). Therefore, users should set the
|
||||
monitoring target regions by themselves in the case.
|
||||
|
||||
For such cases, users can explicitly set the initial monitoring target regions
|
||||
as they want, by writing proper values to the files under this directory.
|
||||
|
||||
In the beginning, this directory has only one file, ``nr_regions``. Writing a
|
||||
number (``N``) to the file creates the number of child directories named ``0``
|
||||
to ``N-1``. Each directory represents each initial monitoring target region.
|
||||
|
||||
regions/<N>/
|
||||
------------
|
||||
|
||||
In each region directory, you will find two files (``start`` and ``end``). You
|
||||
can set and get the start and end addresses of the initial monitoring target
|
||||
region by writing to and reading from the files, respectively.
|
||||
|
||||
contexts/<N>/schemes/
|
||||
---------------------
|
||||
|
||||
For usual DAMON-based data access aware memory management optimizations, users
|
||||
would normally want the system to apply a memory management action to a memory
|
||||
region of a specific access pattern. DAMON receives such formalized operation
|
||||
schemes from the user and applies those to the target memory regions. Users
|
||||
can get and set the schemes by reading from and writing to files under this
|
||||
directory.
|
||||
|
||||
In the beginning, this directory has only one file, ``nr_schemes``. Writing a
|
||||
number (``N``) to the file creates the number of child directories named ``0``
|
||||
to ``N-1``. Each directory represents each DAMON-based operation scheme.
|
||||
|
||||
schemes/<N>/
|
||||
------------
|
||||
|
||||
In each scheme directory, four directories (``access_pattern``, ``quotas``,
|
||||
``watermarks``, and ``stats``) and one file (``action``) exist.
|
||||
|
||||
The ``action`` file is for setting and getting what action you want to apply to
|
||||
memory regions having specific access pattern of the interest. The keywords
|
||||
that can be written to and read from the file and their meaning are as below.
|
||||
|
||||
- ``willneed``: Call ``madvise()`` for the region with ``MADV_WILLNEED``
|
||||
- ``cold``: Call ``madvise()`` for the region with ``MADV_COLD``
|
||||
- ``pageout``: Call ``madvise()`` for the region with ``MADV_PAGEOUT``
|
||||
- ``hugepage``: Call ``madvise()`` for the region with ``MADV_HUGEPAGE``
|
||||
- ``nohugepage``: Call ``madvise()`` for the region with ``MADV_NOHUGEPAGE``
|
||||
- ``stat``: Do nothing but count the statistics
|
||||
|
||||
schemes/<N>/access_pattern/
|
||||
---------------------------
|
||||
|
||||
The target access pattern of each DAMON-based operation scheme is constructed
|
||||
with three ranges including the size of the region in bytes, number of
|
||||
monitored accesses per aggregate interval, and number of aggregated intervals
|
||||
for the age of the region.
|
||||
|
||||
Under the ``access_pattern`` directory, three directories (``sz``,
|
||||
``nr_accesses``, and ``age``) each having two files (``min`` and ``max``)
|
||||
exist. You can set and get the access pattern for the given scheme by writing
|
||||
to and reading from the ``min`` and ``max`` files under ``sz``,
|
||||
``nr_accesses``, and ``age`` directories, respectively.
|
||||
|
||||
schemes/<N>/quotas/
|
||||
-------------------
|
||||
|
||||
Optimal ``target access pattern`` for each ``action`` is workload dependent, so
|
||||
not easy to find. Worse yet, setting a scheme of some action too aggressive
|
||||
can cause severe overhead. To avoid such overhead, users can limit time and
|
||||
size quota for each scheme. In detail, users can ask DAMON to try to use only
|
||||
up to specific time (``time quota``) for applying the action, and to apply the
|
||||
action to only up to specific amount (``size quota``) of memory regions having
|
||||
the target access pattern within a given time interval (``reset interval``).
|
||||
|
||||
When the quota limit is expected to be exceeded, DAMON prioritizes found memory
|
||||
regions of the ``target access pattern`` based on their size, access frequency,
|
||||
and age. For personalized prioritization, users can set the weights for the
|
||||
three properties.
|
||||
|
||||
Under ``quotas`` directory, three files (``ms``, ``bytes``,
|
||||
``reset_interval_ms``) and one directory (``weights``) having three files
|
||||
(``sz_permil``, ``nr_accesses_permil``, and ``age_permil``) in it exist.
|
||||
|
||||
You can set the ``time quota`` in milliseconds, ``size quota`` in bytes, and
|
||||
``reset interval`` in milliseconds by writing the values to the three files,
|
||||
respectively. You can also set the prioritization weights for size, access
|
||||
frequency, and age in per-thousand unit by writing the values to the three
|
||||
files under the ``weights`` directory.
|
||||
|
||||
schemes/<N>/watermarks/
|
||||
-----------------------
|
||||
|
||||
To allow easy activation and deactivation of each scheme based on system
|
||||
status, DAMON provides a feature called watermarks. The feature receives five
|
||||
values called ``metric``, ``interval``, ``high``, ``mid``, and ``low``. The
|
||||
``metric`` is the system metric such as free memory ratio that can be measured.
|
||||
If the metric value of the system is higher than the value in ``high`` or lower
|
||||
than ``low`` at the memoent, the scheme is deactivated. If the value is lower
|
||||
than ``mid``, the scheme is activated.
|
||||
|
||||
Under the watermarks directory, five files (``metric``, ``interval_us``,
|
||||
``high``, ``mid``, and ``low``) for setting each value exist. You can set and
|
||||
get the five values by writing to the files, respectively.
|
||||
|
||||
Keywords and meanings of those that can be written to the ``metric`` file are
|
||||
as below.
|
||||
|
||||
- none: Ignore the watermarks
|
||||
- free_mem_rate: System's free memory rate (per thousand)
|
||||
|
||||
The ``interval`` should written in microseconds unit.
|
||||
|
||||
.. _sysfs_schemes_stats:
|
||||
|
||||
schemes/<N>/stats/
|
||||
------------------
|
||||
|
||||
DAMON counts the total number and bytes of regions that each scheme is tried to
|
||||
be applied, the two numbers for the regions that each scheme is successfully
|
||||
applied, and the total number of the quota limit exceeds. This statistics can
|
||||
be used for online analysis or tuning of the schemes.
|
||||
|
||||
The statistics can be retrieved by reading the files under ``stats`` directory
|
||||
(``nr_tried``, ``sz_tried``, ``nr_applied``, ``sz_applied``, and
|
||||
``qt_exceeds``), respectively. The files are not updated in real time, so you
|
||||
should ask DAMON sysfs interface to updte the content of the files for the
|
||||
stats by writing a special keyword, ``update_schemes_stats`` to the relevant
|
||||
``kdamonds/<N>/state`` file.
|
||||
|
||||
Example
|
||||
~~~~~~~
|
||||
|
||||
Below commands applies a scheme saying "If a memory region of size in [4KiB,
|
||||
8KiB] is showing accesses per aggregate interval in [0, 5] for aggregate
|
||||
interval in [10, 20], page out the region. For the paging out, use only up to
|
||||
10ms per second, and also don't page out more than 1GiB per second. Under the
|
||||
limitation, page out memory regions having longer age first. Also, check the
|
||||
free memory rate of the system every 5 seconds, start the monitoring and paging
|
||||
out when the free memory rate becomes lower than 50%, but stop it if the free
|
||||
memory rate becomes larger than 60%, or lower than 30%". ::
|
||||
|
||||
# cd <sysfs>/kernel/mm/damon/admin
|
||||
# # populate directories
|
||||
# echo 1 > kdamonds/nr_kdamonds; echo 1 > kdamonds/0/contexts/nr_contexts;
|
||||
# echo 1 > kdamonds/0/contexts/0/schemes/nr_schemes
|
||||
# cd kdamonds/0/contexts/0/schemes/0
|
||||
# # set the basic access pattern and the action
|
||||
# echo 4096 > access_patterns/sz/min
|
||||
# echo 8192 > access_patterns/sz/max
|
||||
# echo 0 > access_patterns/nr_accesses/min
|
||||
# echo 5 > access_patterns/nr_accesses/max
|
||||
# echo 10 > access_patterns/age/min
|
||||
# echo 20 > access_patterns/age/max
|
||||
# echo pageout > action
|
||||
# # set quotas
|
||||
# echo 10 > quotas/ms
|
||||
# echo $((1024*1024*1024)) > quotas/bytes
|
||||
# echo 1000 > quotas/reset_interval_ms
|
||||
# # set watermark
|
||||
# echo free_mem_rate > watermarks/metric
|
||||
# echo 5000000 > watermarks/interval_us
|
||||
# echo 600 > watermarks/high
|
||||
# echo 500 > watermarks/mid
|
||||
# echo 300 > watermarks/low
|
||||
|
||||
Please note that it's highly recommended to use user space tools like `damo
|
||||
<https://github.com/awslabs/damo>`_ rather than manually reading and writing
|
||||
the files as above. Above is only for an example.
|
||||
|
||||
.. _debugfs_interface:
|
||||
|
||||
@@ -47,7 +385,7 @@ Attributes
|
||||
----------
|
||||
|
||||
Users can get and set the ``sampling interval``, ``aggregation interval``,
|
||||
``regions update interval``, and min/max number of monitoring target regions by
|
||||
``update interval``, and min/max number of monitoring target regions by
|
||||
reading from and writing to the ``attrs`` file. To know about the monitoring
|
||||
attributes in detail, please refer to the :doc:`/vm/damon/design`. For
|
||||
example, below commands set those values to 5 ms, 100 ms, 1,000 ms, 10 and
|
||||
@@ -108,24 +446,28 @@ In such cases, users can explicitly set the initial monitoring target regions
|
||||
as they want, by writing proper values to the ``init_regions`` file. Each line
|
||||
of the input should represent one region in below form.::
|
||||
|
||||
<target id> <start address> <end address>
|
||||
<target idx> <start address> <end address>
|
||||
|
||||
The ``target id`` should already in ``target_ids`` file, and the regions should
|
||||
be passed in address order. For example, below commands will set a couple of
|
||||
address ranges, ``1-100`` and ``100-200`` as the initial monitoring target
|
||||
region of process 42, and another couple of address ranges, ``20-40`` and
|
||||
``50-100`` as that of process 4242.::
|
||||
The ``target idx`` should be the index of the target in ``target_ids`` file,
|
||||
starting from ``0``, and the regions should be passed in address order. For
|
||||
example, below commands will set a couple of address ranges, ``1-100`` and
|
||||
``100-200`` as the initial monitoring target region of pid 42, which is the
|
||||
first one (index ``0``) in ``target_ids``, and another couple of address
|
||||
ranges, ``20-40`` and ``50-100`` as that of pid 4242, which is the second one
|
||||
(index ``1``) in ``target_ids``.::
|
||||
|
||||
# cd <debugfs>/damon
|
||||
# echo "42 1 100
|
||||
42 100 200
|
||||
4242 20 40
|
||||
4242 50 100" > init_regions
|
||||
# cat target_ids
|
||||
42 4242
|
||||
# echo "0 1 100
|
||||
0 100 200
|
||||
1 20 40
|
||||
1 50 100" > init_regions
|
||||
|
||||
Note that this sets the initial monitoring target regions only. In case of
|
||||
virtual memory monitoring, DAMON will automatically updates the boundary of the
|
||||
regions after one ``regions update interval``. Therefore, users should set the
|
||||
``regions update interval`` large enough in this case, if they don't want the
|
||||
regions after one ``update interval``. Therefore, users should set the
|
||||
``update interval`` large enough in this case, if they don't want the
|
||||
update.
|
||||
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ There are four components to pagemap:
|
||||
* Bit 56 page exclusively mapped (since 4.2)
|
||||
* Bit 57 pte is uffd-wp write-protected (since 5.13) (see
|
||||
:ref:`Documentation/admin-guide/mm/userfaultfd.rst <userfaultfd>`)
|
||||
* Bits 57-60 zero
|
||||
* Bits 58-60 zero
|
||||
* Bit 61 page is file-page or shared-anon (since 3.5)
|
||||
* Bit 62 page swapped
|
||||
* Bit 63 page present
|
||||
|
||||
@@ -130,9 +130,25 @@ attribute, e.g.::
|
||||
echo 1 > /sys/module/zswap/parameters/same_filled_pages_enabled
|
||||
|
||||
When zswap same-filled page identification is disabled at runtime, it will stop
|
||||
checking for the same-value filled pages during store operation. However, the
|
||||
existing pages which are marked as same-value filled pages remain stored
|
||||
unchanged in zswap until they are either loaded or invalidated.
|
||||
checking for the same-value filled pages during store operation.
|
||||
In other words, every page will be then considered non-same-value filled.
|
||||
However, the existing pages which are marked as same-value filled pages remain
|
||||
stored unchanged in zswap until they are either loaded or invalidated.
|
||||
|
||||
In some circumstances it might be advantageous to make use of just the zswap
|
||||
ability to efficiently store same-filled pages without enabling the whole
|
||||
compressed page storage.
|
||||
In this case the handling of non-same-value pages by zswap (enabled by default)
|
||||
can be disabled by setting the ``non_same_filled_pages_enabled`` attribute
|
||||
to 0, e.g. ``zswap.non_same_filled_pages_enabled=0``.
|
||||
It can also be enabled and disabled at runtime using the sysfs
|
||||
``non_same_filled_pages_enabled`` attribute, e.g.::
|
||||
|
||||
echo 1 > /sys/module/zswap/parameters/non_same_filled_pages_enabled
|
||||
|
||||
Disabling both ``zswap.same_filled_pages_enabled`` and
|
||||
``zswap.non_same_filled_pages_enabled`` effectively disables accepting any new
|
||||
pages by zswap.
|
||||
|
||||
To prevent zswap from shrinking pool when zswap is full and there's a high
|
||||
pressure on swap (this will result in flipping pages in and out zswap pool
|
||||
|
||||
@@ -8,6 +8,7 @@ Performance monitor support
|
||||
:maxdepth: 1
|
||||
|
||||
hisi-pmu
|
||||
hisi-pcie-pmu
|
||||
imx-ddr
|
||||
qcom_l2_pmu
|
||||
qcom_l3_pmu
|
||||
|
||||
@@ -19,7 +19,7 @@ Linux kernel. The new mechanism is based on Collaborative Processor
|
||||
Performance Control (CPPC) which provides finer grain frequency management
|
||||
than legacy ACPI hardware P-States. Current AMD CPU/APU platforms are using
|
||||
the ACPI P-states driver to manage CPU frequency and clocks with switching
|
||||
only in 3 P-states. CPPC replaces the ACPI P-states controls, allows a
|
||||
only in 3 P-states. CPPC replaces the ACPI P-states controls and allows a
|
||||
flexible, low-latency interface for the Linux kernel to directly
|
||||
communicate the performance hints to hardware.
|
||||
|
||||
@@ -27,7 +27,7 @@ communicate the performance hints to hardware.
|
||||
``ondemand``, etc. to manage the performance hints which are provided by
|
||||
CPPC hardware functionality that internally follows the hardware
|
||||
specification (for details refer to AMD64 Architecture Programmer's Manual
|
||||
Volume 2: System Programming [1]_). Currently ``amd-pstate`` supports basic
|
||||
Volume 2: System Programming [1]_). Currently, ``amd-pstate`` supports basic
|
||||
frequency control function according to kernel governors on some of the
|
||||
Zen2 and Zen3 processors, and we will implement more AMD specific functions
|
||||
in future after we verify them on the hardware and SBIOS.
|
||||
@@ -41,9 +41,9 @@ continuous, abstract, and unit-less performance value in a scale that is
|
||||
not tied to a specific performance state / frequency. This is an ACPI
|
||||
standard [2]_ which software can specify application performance goals and
|
||||
hints as a relative target to the infrastructure limits. AMD processors
|
||||
provides the low latency register model (MSR) instead of AML code
|
||||
provide the low latency register model (MSR) instead of an AML code
|
||||
interpreter for performance adjustments. ``amd-pstate`` will initialize a
|
||||
``struct cpufreq_driver`` instance ``amd_pstate_driver`` with the callbacks
|
||||
``struct cpufreq_driver`` instance, ``amd_pstate_driver``, with the callbacks
|
||||
to manage each performance update behavior. ::
|
||||
|
||||
Highest Perf ------>+-----------------------+ +-----------------------+
|
||||
@@ -91,26 +91,26 @@ AMD CPPC Performance Capability
|
||||
Highest Performance (RO)
|
||||
.........................
|
||||
|
||||
It is the absolute maximum performance an individual processor may reach,
|
||||
This is the absolute maximum performance an individual processor may reach,
|
||||
assuming ideal conditions. This performance level may not be sustainable
|
||||
for long durations and may only be achievable if other platform components
|
||||
are in a specific state; for example, it may require other processors be in
|
||||
are in a specific state; for example, it may require other processors to be in
|
||||
an idle state. This would be equivalent to the highest frequencies
|
||||
supported by the processor.
|
||||
|
||||
Nominal (Guaranteed) Performance (RO)
|
||||
......................................
|
||||
|
||||
It is the maximum sustained performance level of the processor, assuming
|
||||
ideal operating conditions. In absence of an external constraint (power,
|
||||
thermal, etc.) this is the performance level the processor is expected to
|
||||
This is the maximum sustained performance level of the processor, assuming
|
||||
ideal operating conditions. In the absence of an external constraint (power,
|
||||
thermal, etc.), this is the performance level the processor is expected to
|
||||
be able to maintain continuously. All cores/processors are expected to be
|
||||
able to sustain their nominal performance state simultaneously.
|
||||
|
||||
Lowest non-linear Performance (RO)
|
||||
...................................
|
||||
|
||||
It is the lowest performance level at which nonlinear power savings are
|
||||
This is the lowest performance level at which nonlinear power savings are
|
||||
achieved, for example, due to the combined effects of voltage and frequency
|
||||
scaling. Above this threshold, lower performance levels should be generally
|
||||
more energy efficient than higher performance levels. This register
|
||||
@@ -119,7 +119,7 @@ effectively conveys the most efficient performance level to ``amd-pstate``.
|
||||
Lowest Performance (RO)
|
||||
........................
|
||||
|
||||
It is the absolute lowest performance level of the processor. Selecting a
|
||||
This is the absolute lowest performance level of the processor. Selecting a
|
||||
performance level lower than the lowest nonlinear performance level may
|
||||
cause an efficiency penalty but should reduce the instantaneous power
|
||||
consumption of the processor.
|
||||
@@ -149,14 +149,14 @@ a relative number. This can be expressed as percentage of nominal
|
||||
performance (infrastructure max). Below the nominal sustained performance
|
||||
level, desired performance expresses the average performance level of the
|
||||
processor subject to hardware. Above the nominal performance level,
|
||||
processor must provide at least nominal performance requested and go higher
|
||||
the processor must provide at least nominal performance requested and go higher
|
||||
if current operating conditions allow.
|
||||
|
||||
Energy Performance Preference (EPP) (RW)
|
||||
.........................................
|
||||
|
||||
Provides a hint to the hardware if software wants to bias toward performance
|
||||
(0x0) or energy efficiency (0xff).
|
||||
This attribute provides a hint to the hardware if software wants to bias
|
||||
toward performance (0x0) or energy efficiency (0xff).
|
||||
|
||||
|
||||
Key Governors Support
|
||||
@@ -173,35 +173,34 @@ operating frequencies supported by the hardware. Users can check the
|
||||
``amd-pstate`` mainly supports ``schedutil`` and ``ondemand`` for dynamic
|
||||
frequency control. It is to fine tune the processor configuration on
|
||||
``amd-pstate`` to the ``schedutil`` with CPU CFS scheduler. ``amd-pstate``
|
||||
registers adjust_perf callback to implement the CPPC similar performance
|
||||
update behavior. It is initialized by ``sugov_start`` and then populate the
|
||||
CPU's update_util_data pointer to assign ``sugov_update_single_perf`` as
|
||||
the utilization update callback function in CPU scheduler. CPU scheduler
|
||||
will call ``cpufreq_update_util`` and assign the target performance
|
||||
according to the ``struct sugov_cpu`` that utilization update belongs to.
|
||||
Then ``amd-pstate`` updates the desired performance according to the CPU
|
||||
registers the adjust_perf callback to implement performance update behavior
|
||||
similar to CPPC. It is initialized by ``sugov_start`` and then populates the
|
||||
CPU's update_util_data pointer to assign ``sugov_update_single_perf`` as the
|
||||
utilization update callback function in the CPU scheduler. The CPU scheduler
|
||||
will call ``cpufreq_update_util`` and assigns the target performance according
|
||||
to the ``struct sugov_cpu`` that the utilization update belongs to.
|
||||
Then, ``amd-pstate`` updates the desired performance according to the CPU
|
||||
scheduler assigned.
|
||||
|
||||
|
||||
Processor Support
|
||||
=======================
|
||||
|
||||
The ``amd-pstate`` initialization will fail if the _CPC in ACPI SBIOS is
|
||||
not existed at the detected processor, and it uses ``acpi_cpc_valid`` to
|
||||
check the _CPC existence. All Zen based processors support legacy ACPI
|
||||
hardware P-States function, so while the ``amd-pstate`` fails to be
|
||||
initialized, the kernel will fall back to initialize ``acpi-cpufreq``
|
||||
driver.
|
||||
The ``amd-pstate`` initialization will fail if the ``_CPC`` entry in the ACPI
|
||||
SBIOS does not exist in the detected processor. It uses ``acpi_cpc_valid``
|
||||
to check the existence of ``_CPC``. All Zen based processors support the legacy
|
||||
ACPI hardware P-States function, so when ``amd-pstate`` fails initialization,
|
||||
the kernel will fall back to initialize the ``acpi-cpufreq`` driver.
|
||||
|
||||
There are two types of hardware implementations for ``amd-pstate``: one is
|
||||
`Full MSR Support <perf_cap_>`_ and another is `Shared Memory Support
|
||||
<perf_cap_>`_. It can use :c:macro:`X86_FEATURE_CPPC` feature flag (for
|
||||
details refer to Processor Programming Reference (PPR) for AMD Family
|
||||
19h Model 51h, Revision A1 Processors [3]_) to indicate the different
|
||||
types. ``amd-pstate`` is to register different ``static_call`` instances
|
||||
for different hardware implementations.
|
||||
<perf_cap_>`_. It can use the :c:macro:`X86_FEATURE_CPPC` feature flag to
|
||||
indicate the different types. (For details, refer to the Processor Programming
|
||||
Reference (PPR) for AMD Family 19h Model 51h, Revision A1 Processors [3]_.)
|
||||
``amd-pstate`` is to register different ``static_call`` instances for different
|
||||
hardware implementations.
|
||||
|
||||
Currently, some of Zen2 and Zen3 processors support ``amd-pstate``. In the
|
||||
Currently, some of the Zen2 and Zen3 processors support ``amd-pstate``. In the
|
||||
future, it will be supported on more and more AMD processors.
|
||||
|
||||
Full MSR Support
|
||||
@@ -210,18 +209,18 @@ Full MSR Support
|
||||
Some new Zen3 processors such as Cezanne provide the MSR registers directly
|
||||
while the :c:macro:`X86_FEATURE_CPPC` CPU feature flag is set.
|
||||
``amd-pstate`` can handle the MSR register to implement the fast switch
|
||||
function in ``CPUFreq`` that can shrink latency of frequency control on the
|
||||
interrupt context. The functions with ``pstate_xxx`` prefix represent the
|
||||
operations of MSR registers.
|
||||
function in ``CPUFreq`` that can reduce the latency of frequency control in
|
||||
interrupt context. The functions with a ``pstate_xxx`` prefix represent the
|
||||
operations on MSR registers.
|
||||
|
||||
Shared Memory Support
|
||||
----------------------
|
||||
|
||||
If :c:macro:`X86_FEATURE_CPPC` CPU feature flag is not set, that means the
|
||||
processor supports shared memory solution. In this case, ``amd-pstate``
|
||||
If the :c:macro:`X86_FEATURE_CPPC` CPU feature flag is not set, the
|
||||
processor supports the shared memory solution. In this case, ``amd-pstate``
|
||||
uses the ``cppc_acpi`` helper methods to implement the callback functions
|
||||
that defined on ``static_call``. The functions with ``cppc_xxx`` prefix
|
||||
represent the operations of acpi cppc helpers for shared memory solution.
|
||||
that are defined on ``static_call``. The functions with the ``cppc_xxx`` prefix
|
||||
represent the operations of ACPI CPPC helpers for the shared memory solution.
|
||||
|
||||
|
||||
AMD P-States and ACPI hardware P-States always can be supported in one
|
||||
@@ -234,7 +233,7 @@ User Space Interface in ``sysfs``
|
||||
==================================
|
||||
|
||||
``amd-pstate`` exposes several global attributes (files) in ``sysfs`` to
|
||||
control its functionality at the system level. They located in the
|
||||
control its functionality at the system level. They are located in the
|
||||
``/sys/devices/system/cpu/cpufreq/policyX/`` directory and affect all CPUs. ::
|
||||
|
||||
root@hr-test1:/home/ray# ls /sys/devices/system/cpu/cpufreq/policy0/*amd*
|
||||
@@ -246,38 +245,38 @@ control its functionality at the system level. They located in the
|
||||
``amd_pstate_highest_perf / amd_pstate_max_freq``
|
||||
|
||||
Maximum CPPC performance and CPU frequency that the driver is allowed to
|
||||
set in percent of the maximum supported CPPC performance level (the highest
|
||||
set, in percent of the maximum supported CPPC performance level (the highest
|
||||
performance supported in `AMD CPPC Performance Capability <perf_cap_>`_).
|
||||
In some of ASICs, the highest CPPC performance is not the one in the _CPC
|
||||
table, so we need to expose it to sysfs. If boost is not active but
|
||||
supported, this maximum frequency will be larger than the one in
|
||||
In some ASICs, the highest CPPC performance is not the one in the ``_CPC``
|
||||
table, so we need to expose it to sysfs. If boost is not active, but
|
||||
still supported, this maximum frequency will be larger than the one in
|
||||
``cpuinfo``.
|
||||
This attribute is read-only.
|
||||
|
||||
``amd_pstate_lowest_nonlinear_freq``
|
||||
|
||||
The lowest non-linear CPPC CPU frequency that the driver is allowed to set
|
||||
in percent of the maximum supported CPPC performance level (Please see the
|
||||
The lowest non-linear CPPC CPU frequency that the driver is allowed to set,
|
||||
in percent of the maximum supported CPPC performance level. (Please see the
|
||||
lowest non-linear performance in `AMD CPPC Performance Capability
|
||||
<perf_cap_>`_).
|
||||
<perf_cap_>`_.)
|
||||
This attribute is read-only.
|
||||
|
||||
For other performance and frequency values, we can read them back from
|
||||
Other performance and frequency values can be read back from
|
||||
``/sys/devices/system/cpu/cpuX/acpi_cppc/``, see :ref:`cppc_sysfs`.
|
||||
|
||||
|
||||
``amd-pstate`` vs ``acpi-cpufreq``
|
||||
======================================
|
||||
|
||||
On majority of AMD platforms supported by ``acpi-cpufreq``, the ACPI tables
|
||||
provided by the platform firmware used for CPU performance scaling, but
|
||||
only provides 3 P-states on AMD processors.
|
||||
However, on modern AMD APU and CPU series, it provides the collaborative
|
||||
processor performance control according to ACPI protocol and customize this
|
||||
for AMD platforms. That is fine-grain and continuous frequency range
|
||||
On the majority of AMD platforms supported by ``acpi-cpufreq``, the ACPI tables
|
||||
provided by the platform firmware are used for CPU performance scaling, but
|
||||
only provide 3 P-states on AMD processors.
|
||||
However, on modern AMD APU and CPU series, hardware provides the Collaborative
|
||||
Processor Performance Control according to the ACPI protocol and customizes this
|
||||
for AMD platforms. That is, fine-grained and continuous frequency ranges
|
||||
instead of the legacy hardware P-states. ``amd-pstate`` is the kernel
|
||||
module which supports the new AMD P-States mechanism on most of future AMD
|
||||
platforms. The AMD P-States mechanism will be the more performance and energy
|
||||
module which supports the new AMD P-States mechanism on most of the future AMD
|
||||
platforms. The AMD P-States mechanism is the more performance and energy
|
||||
efficiency frequency management method on AMD processors.
|
||||
|
||||
Kernel Module Options for ``amd-pstate``
|
||||
@@ -287,25 +286,25 @@ Kernel Module Options for ``amd-pstate``
|
||||
Use a module param (shared_mem) to enable related processors manually with
|
||||
**amd_pstate.shared_mem=1**.
|
||||
Due to the performance issue on the processors with `Shared Memory Support
|
||||
<perf_cap_>`_, so we disable it for the moment and will enable this by default
|
||||
once we address performance issue on this solution.
|
||||
<perf_cap_>`_, we disable it presently and will re-enable this by default
|
||||
once we address performance issue with this solution.
|
||||
|
||||
The way to check whether current processor is `Full MSR Support <perf_cap_>`_
|
||||
To check whether the current processor is using `Full MSR Support <perf_cap_>`_
|
||||
or `Shared Memory Support <perf_cap_>`_ : ::
|
||||
|
||||
ray@hr-test1:~$ lscpu | grep cppc
|
||||
Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd cppc arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca fsrm
|
||||
|
||||
If CPU Flags have cppc, then this processor supports `Full MSR Support
|
||||
<perf_cap_>`_. Otherwise it supports `Shared Memory Support <perf_cap_>`_.
|
||||
If the CPU flags have ``cppc``, then this processor supports `Full MSR Support
|
||||
<perf_cap_>`_. Otherwise, it supports `Shared Memory Support <perf_cap_>`_.
|
||||
|
||||
|
||||
``cpupower`` tool support for ``amd-pstate``
|
||||
===============================================
|
||||
|
||||
``amd-pstate`` is supported on ``cpupower`` tool that can be used to dump the frequency
|
||||
information. And it is in progress to support more and more operations for new
|
||||
``amd-pstate`` module with this tool. ::
|
||||
``amd-pstate`` is supported by the ``cpupower`` tool, which can be used to dump
|
||||
frequency information. Development is in progress to support more and more
|
||||
operations for the new ``amd-pstate`` module with this tool. ::
|
||||
|
||||
root@hr-test1:/home/ray# cpupower frequency-info
|
||||
analyzing CPU 0:
|
||||
@@ -336,10 +335,10 @@ Trace Events
|
||||
--------------
|
||||
|
||||
There are two static trace events that can be used for ``amd-pstate``
|
||||
diagnostics. One of them is the cpu_frequency trace event generally used
|
||||
diagnostics. One of them is the ``cpu_frequency`` trace event generally used
|
||||
by ``CPUFreq``, and the other one is the ``amd_pstate_perf`` trace event
|
||||
specific to ``amd-pstate``. The following sequence of shell commands can
|
||||
be used to enable them and see their output (if the kernel is generally
|
||||
be used to enable them and see their output (if the kernel is
|
||||
configured to support event tracing). ::
|
||||
|
||||
root@hr-test1:/home/ray# cd /sys/kernel/tracing/
|
||||
@@ -364,11 +363,37 @@ configured to support event tracing). ::
|
||||
<idle>-0 [003] d.s.. 4995.980971: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=3 changed=false fast_switch=true
|
||||
<idle>-0 [011] d.s.. 4995.980996: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=11 changed=false fast_switch=true
|
||||
|
||||
The cpu_frequency trace event will be triggered either by the ``schedutil`` scaling
|
||||
The ``cpu_frequency`` trace event will be triggered either by the ``schedutil`` scaling
|
||||
governor (for the policies it is attached to), or by the ``CPUFreq`` core (for the
|
||||
policies with other scaling governors).
|
||||
|
||||
|
||||
Tracer Tool
|
||||
-------------
|
||||
|
||||
``amd_pstate_tracer.py`` can record and parse ``amd-pstate`` trace log, then
|
||||
generate performance plots. This utility can be used to debug and tune the
|
||||
performance of ``amd-pstate`` driver. The tracer tool needs to import intel
|
||||
pstate tracer.
|
||||
|
||||
Tracer tool located in ``linux/tools/power/x86/amd_pstate_tracer``. It can be
|
||||
used in two ways. If trace file is available, then directly parse the file
|
||||
with command ::
|
||||
|
||||
./amd_pstate_trace.py [-c cpus] -t <trace_file> -n <test_name>
|
||||
|
||||
Or generate trace file with root privilege, then parse and plot with command ::
|
||||
|
||||
sudo ./amd_pstate_trace.py [-c cpus] -n <test_name> -i <interval> [-m kbytes]
|
||||
|
||||
The test result can be found in ``results/test_name``. Following is the example
|
||||
about part of the output. ::
|
||||
|
||||
common_cpu common_secs common_usecs min_perf des_perf max_perf freq mperf apef tsc load duration_ms sample_num elapsed_time common_comm
|
||||
CPU_005 712 116384 39 49 166 0.7565 9645075 2214891 38431470 25.1 11.646 469 2.496 kworker/5:0-40
|
||||
CPU_006 712 116408 39 49 166 0.6769 8950227 1839034 37192089 24.06 11.272 470 2.496 kworker/6:0-1264
|
||||
|
||||
|
||||
Reference
|
||||
===========
|
||||
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
==============================
|
||||
Intel Uncore Frequency Scaling
|
||||
==============================
|
||||
|
||||
:Copyright: |copy| 2022 Intel Corporation
|
||||
|
||||
:Author: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
||||
|
||||
Introduction
|
||||
------------
|
||||
|
||||
The uncore can consume significant amount of power in Intel's Xeon servers based
|
||||
on the workload characteristics. To optimize the total power and improve overall
|
||||
performance, SoCs have internal algorithms for scaling uncore frequency. These
|
||||
algorithms monitor workload usage of uncore and set a desirable frequency.
|
||||
|
||||
It is possible that users have different expectations of uncore performance and
|
||||
want to have control over it. The objective is similar to allowing users to set
|
||||
the scaling min/max frequencies via cpufreq sysfs to improve CPU performance.
|
||||
Users may have some latency sensitive workloads where they do not want any
|
||||
change to uncore frequency. Also, users may have workloads which require
|
||||
different core and uncore performance at distinct phases and they may want to
|
||||
use both cpufreq and the uncore scaling interface to distribute power and
|
||||
improve overall performance.
|
||||
|
||||
Sysfs Interface
|
||||
---------------
|
||||
|
||||
To control uncore frequency, a sysfs interface is provided in the directory:
|
||||
`/sys/devices/system/cpu/intel_uncore_frequency/`.
|
||||
|
||||
There is one directory for each package and die combination as the scope of
|
||||
uncore scaling control is per die in multiple die/package SoCs or per
|
||||
package for single die per package SoCs. The name represents the
|
||||
scope of control. For example: 'package_00_die_00' is for package id 0 and
|
||||
die 0.
|
||||
|
||||
Each package_*_die_* contains the following attributes:
|
||||
|
||||
``initial_max_freq_khz``
|
||||
Out of reset, this attribute represent the maximum possible frequency.
|
||||
This is a read-only attribute. If users adjust max_freq_khz,
|
||||
they can always go back to maximum using the value from this attribute.
|
||||
|
||||
``initial_min_freq_khz``
|
||||
Out of reset, this attribute represent the minimum possible frequency.
|
||||
This is a read-only attribute. If users adjust min_freq_khz,
|
||||
they can always go back to minimum using the value from this attribute.
|
||||
|
||||
``max_freq_khz``
|
||||
This attribute is used to set the maximum uncore frequency.
|
||||
|
||||
``min_freq_khz``
|
||||
This attribute is used to set the minimum uncore frequency.
|
||||
|
||||
``current_freq_khz``
|
||||
This attribute is used to get the current uncore frequency.
|
||||
@@ -15,3 +15,4 @@ Working-State Power Management
|
||||
cpufreq_drivers
|
||||
intel_epb
|
||||
intel-speed-select
|
||||
intel_uncore_frequency_scaling
|
||||
|
||||
@@ -1,14 +1,5 @@
|
||||
.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
|
||||
..
|
||||
If you want to distribute this text under CC-BY-4.0 only, please use 'The
|
||||
Linux kernel developers' for author attribution and link this as source:
|
||||
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-issues.rst
|
||||
..
|
||||
Note: Only the content of this RST file as found in the Linux kernel sources
|
||||
is available under CC-BY-4.0, as versions of this text that were processed
|
||||
(for example by the kernel's build system) might contain content taken from
|
||||
files which use a more restrictive license.
|
||||
|
||||
.. See the bottom of this file for additional redistribution information.
|
||||
|
||||
Reporting issues
|
||||
++++++++++++++++
|
||||
@@ -395,22 +386,16 @@ fixed as soon as possible, hence there are 'issues of high priority' that get
|
||||
handled slightly differently in the reporting process. Three type of cases
|
||||
qualify: regressions, security issues, and really severe problems.
|
||||
|
||||
You deal with a 'regression' if something that worked with an older version of
|
||||
the Linux kernel does not work with a newer one or somehow works worse with it.
|
||||
It thus is a regression when a WiFi driver that did a fine job with Linux 5.7
|
||||
somehow misbehaves with 5.8 or doesn't work at all. It's also a regression if
|
||||
an application shows erratic behavior with a newer kernel, which might happen
|
||||
due to incompatible changes in the interface between the kernel and the
|
||||
userland (like procfs and sysfs). Significantly reduced performance or
|
||||
increased power consumption also qualify as regression. But keep in mind: the
|
||||
new kernel needs to be built with a configuration that is similar to the one
|
||||
from the old kernel (see below how to achieve that). That's because the kernel
|
||||
developers sometimes can not avoid incompatibilities when implementing new
|
||||
features; but to avoid regressions such features have to be enabled explicitly
|
||||
during build time configuration.
|
||||
You deal with a regression if some application or practical use case running
|
||||
fine with one Linux kernel works worse or not at all with a newer version
|
||||
compiled using a similar configuration. The document
|
||||
Documentation/admin-guide/reporting-regressions.rst explains this in more
|
||||
detail. It also provides a good deal of other information about regressions you
|
||||
might want to be aware of; it for example explains how to add your issue to the
|
||||
list of tracked regressions, to ensure it won't fall through the cracks.
|
||||
|
||||
What qualifies as security issue is left to your judgment. Consider reading
|
||||
'Documentation/admin-guide/security-bugs.rst' before proceeding, as it
|
||||
Documentation/admin-guide/security-bugs.rst before proceeding, as it
|
||||
provides additional details how to best handle security issues.
|
||||
|
||||
An issue is a 'really severe problem' when something totally unacceptably bad
|
||||
@@ -517,7 +502,7 @@ line starting with 'CPU:'. It should end with 'Not tainted' if the kernel was
|
||||
not tainted when it noticed the problem; it was tainted if you see 'Tainted:'
|
||||
followed by a few spaces and some letters.
|
||||
|
||||
If your kernel is tainted, study 'Documentation/admin-guide/tainted-kernels.rst'
|
||||
If your kernel is tainted, study Documentation/admin-guide/tainted-kernels.rst
|
||||
to find out why. Try to eliminate the reason. Often it's caused by one these
|
||||
three things:
|
||||
|
||||
@@ -1043,7 +1028,7 @@ down the culprit, as maintainers often won't have the time or setup at hand to
|
||||
reproduce it themselves.
|
||||
|
||||
To find the change there is a process called 'bisection' which the document
|
||||
'Documentation/admin-guide/bug-bisect.rst' describes in detail. That process
|
||||
Documentation/admin-guide/bug-bisect.rst describes in detail. That process
|
||||
will often require you to build about ten to twenty kernel images, trying to
|
||||
reproduce the issue with each of them before building the next. Yes, that takes
|
||||
some time, but don't worry, it works a lot quicker than most people assume.
|
||||
@@ -1073,10 +1058,11 @@ When dealing with regressions make sure the issue you face is really caused by
|
||||
the kernel and not by something else, as outlined above already.
|
||||
|
||||
In the whole process keep in mind: an issue only qualifies as regression if the
|
||||
older and the newer kernel got built with a similar configuration. The best way
|
||||
to archive this: copy the configuration file (``.config``) from the old working
|
||||
kernel freshly to each newer kernel version you try. Afterwards run ``make
|
||||
olddefconfig`` to adjust it for the needs of the new version.
|
||||
older and the newer kernel got built with a similar configuration. This can be
|
||||
achieved by using ``make olddefconfig``, as explained in more detail by
|
||||
Documentation/admin-guide/reporting-regressions.rst; that document also
|
||||
provides a good deal of other information about regressions you might want to be
|
||||
aware of.
|
||||
|
||||
|
||||
Write and send the report
|
||||
@@ -1283,7 +1269,7 @@ them when sending the report by mail. If you filed it in a bug tracker, forward
|
||||
the report's text to these addresses; but on top of it put a small note where
|
||||
you mention that you filed it with a link to the ticket.
|
||||
|
||||
See 'Documentation/admin-guide/security-bugs.rst' for more information.
|
||||
See Documentation/admin-guide/security-bugs.rst for more information.
|
||||
|
||||
|
||||
Duties after the report went out
|
||||
@@ -1571,7 +1557,7 @@ Once your report is out your might get asked to do a proper one, as it allows to
|
||||
pinpoint the exact change that causes the issue (which then can easily get
|
||||
reverted to fix the issue quickly). Hence consider to do a proper bisection
|
||||
right away if time permits. See the section 'Special care for regressions' and
|
||||
the document 'Documentation/admin-guide/bug-bisect.rst' for details how to
|
||||
the document Documentation/admin-guide/bug-bisect.rst for details how to
|
||||
perform one. In case of a successful bisection add the author of the culprit to
|
||||
the recipients; also CC everyone in the signed-off-by chain, which you find at
|
||||
the end of its commit message.
|
||||
@@ -1594,7 +1580,7 @@ Some fixes are too complex
|
||||
Even small and seemingly obvious code-changes sometimes introduce new and
|
||||
totally unexpected problems. The maintainers of the stable and longterm kernels
|
||||
are very aware of that and thus only apply changes to these kernels that are
|
||||
within rules outlined in 'Documentation/process/stable-kernel-rules.rst'.
|
||||
within rules outlined in Documentation/process/stable-kernel-rules.rst.
|
||||
|
||||
Complex or risky changes for example do not qualify and thus only get applied
|
||||
to mainline. Other fixes are easy to get backported to the newest stable and
|
||||
@@ -1756,10 +1742,23 @@ art will lay some groundwork to improve the situation over time.
|
||||
|
||||
|
||||
..
|
||||
This text is maintained by Thorsten Leemhuis <linux@leemhuis.info>. If you
|
||||
spot a typo or small mistake, feel free to let him know directly and he'll
|
||||
fix it. You are free to do the same in a mostly informal way if you want
|
||||
to contribute changes to the text, but for copyright reasons please CC
|
||||
end-of-content
|
||||
..
|
||||
This document is maintained by Thorsten Leemhuis <linux@leemhuis.info>. If
|
||||
you spot a typo or small mistake, feel free to let him know directly and
|
||||
he'll fix it. You are free to do the same in a mostly informal way if you
|
||||
want to contribute changes to the text, but for copyright reasons please CC
|
||||
linux-doc@vger.kernel.org and "sign-off" your contribution as
|
||||
Documentation/process/submitting-patches.rst outlines in the section "Sign
|
||||
your work - the Developer's Certificate of Origin".
|
||||
..
|
||||
This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
|
||||
of the file. If you want to distribute this text under CC-BY-4.0 only,
|
||||
please use "The Linux kernel developers" for author attribution and link
|
||||
this as source:
|
||||
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-issues.rst
|
||||
..
|
||||
Note: Only the content of this RST file as found in the Linux kernel sources
|
||||
is available under CC-BY-4.0, as versions of this text that were processed
|
||||
(for example by the kernel's build system) might contain content taken from
|
||||
files which use a more restrictive license.
|
||||
|
||||
451
Documentation/admin-guide/reporting-regressions.rst
Normal file
451
Documentation/admin-guide/reporting-regressions.rst
Normal file
@@ -0,0 +1,451 @@
|
||||
.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
|
||||
.. [see the bottom of this file for redistribution information]
|
||||
|
||||
Reporting regressions
|
||||
+++++++++++++++++++++
|
||||
|
||||
"*We don't cause regressions*" is the first rule of Linux kernel development;
|
||||
Linux founder and lead developer Linus Torvalds established it himself and
|
||||
ensures it's obeyed.
|
||||
|
||||
This document describes what the rule means for users and how the Linux kernel's
|
||||
development model ensures to address all reported regressions; aspects relevant
|
||||
for kernel developers are left to Documentation/process/handling-regressions.rst.
|
||||
|
||||
|
||||
The important bits (aka "TL;DR")
|
||||
================================
|
||||
|
||||
#. It's a regression if something running fine with one Linux kernel works worse
|
||||
or not at all with a newer version. Note, the newer kernel has to be compiled
|
||||
using a similar configuration; the detailed explanations below describes this
|
||||
and other fine print in more detail.
|
||||
|
||||
#. Report your issue as outlined in Documentation/admin-guide/reporting-issues.rst,
|
||||
it already covers all aspects important for regressions and repeated
|
||||
below for convenience. Two of them are important: start your report's subject
|
||||
with "[REGRESSION]" and CC or forward it to `the regression mailing list
|
||||
<https://lore.kernel.org/regressions/>`_ (regressions@lists.linux.dev).
|
||||
|
||||
#. Optional, but recommended: when sending or forwarding your report, make the
|
||||
Linux kernel regression tracking bot "regzbot" track the issue by specifying
|
||||
when the regression started like this::
|
||||
|
||||
#regzbot introduced v5.13..v5.14-rc1
|
||||
|
||||
|
||||
All the details on Linux kernel regressions relevant for users
|
||||
==============================================================
|
||||
|
||||
|
||||
The important basics
|
||||
--------------------
|
||||
|
||||
|
||||
What is a "regression" and what is the "no regressions rule"?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
It's a regression if some application or practical use case running fine with
|
||||
one Linux kernel works worse or not at all with a newer version compiled using a
|
||||
similar configuration. The "no regressions rule" forbids this to take place; if
|
||||
it happens by accident, developers that caused it are expected to quickly fix
|
||||
the issue.
|
||||
|
||||
It thus is a regression when a WiFi driver from Linux 5.13 works fine, but with
|
||||
5.14 doesn't work at all, works significantly slower, or misbehaves somehow.
|
||||
It's also a regression if a perfectly working application suddenly shows erratic
|
||||
behavior with a newer kernel version; such issues can be caused by changes in
|
||||
procfs, sysfs, or one of the many other interfaces Linux provides to userland
|
||||
software. But keep in mind, as mentioned earlier: 5.14 in this example needs to
|
||||
be built from a configuration similar to the one from 5.13. This can be achieved
|
||||
using ``make olddefconfig``, as explained in more detail below.
|
||||
|
||||
Note the "practical use case" in the first sentence of this section: developers
|
||||
despite the "no regressions" rule are free to change any aspect of the kernel
|
||||
and even APIs or ABIs to userland, as long as no existing application or use
|
||||
case breaks.
|
||||
|
||||
Also be aware the "no regressions" rule covers only interfaces the kernel
|
||||
provides to the userland. It thus does not apply to kernel-internal interfaces
|
||||
like the module API, which some externally developed drivers use to hook into
|
||||
the kernel.
|
||||
|
||||
How do I report a regression?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Just report the issue as outlined in
|
||||
Documentation/admin-guide/reporting-issues.rst, it already describes the
|
||||
important points. The following aspects outlined there are especially relevant
|
||||
for regressions:
|
||||
|
||||
* When checking for existing reports to join, also search the `archives of the
|
||||
Linux regressions mailing list <https://lore.kernel.org/regressions/>`_ and
|
||||
`regzbot's web-interface <https://linux-regtracking.leemhuis.info/regzbot/>`_.
|
||||
|
||||
* Start your report's subject with "[REGRESSION]".
|
||||
|
||||
* In your report, clearly mention the last kernel version that worked fine and
|
||||
the first broken one. Ideally try to find the exact change causing the
|
||||
regression using a bisection, as explained below in more detail.
|
||||
|
||||
* Remember to let the Linux regressions mailing list
|
||||
(regressions@lists.linux.dev) know about your report:
|
||||
|
||||
* If you report the regression by mail, CC the regressions list.
|
||||
|
||||
* If you report your regression to some bug tracker, forward the submitted
|
||||
report by mail to the regressions list while CCing the maintainer and the
|
||||
mailing list for the subsystem in question.
|
||||
|
||||
If it's a regression within a stable or longterm series (e.g.
|
||||
v5.15.3..v5.15.5), remember to CC the `Linux stable mailing list
|
||||
<https://lore.kernel.org/stable/>`_ (stable@vger.kernel.org).
|
||||
|
||||
In case you performed a successful bisection, add everyone to the CC the
|
||||
culprit's commit message mentions in lines starting with "Signed-off-by:".
|
||||
|
||||
When CCing for forwarding your report to the list, consider directly telling the
|
||||
aforementioned Linux kernel regression tracking bot about your report. To do
|
||||
that, include a paragraph like this in your mail::
|
||||
|
||||
#regzbot introduced: v5.13..v5.14-rc1
|
||||
|
||||
Regzbot will then consider your mail a report for a regression introduced in the
|
||||
specified version range. In above case Linux v5.13 still worked fine and Linux
|
||||
v5.14-rc1 was the first version where you encountered the issue. If you
|
||||
performed a bisection to find the commit that caused the regression, specify the
|
||||
culprit's commit-id instead::
|
||||
|
||||
#regzbot introduced: 1f2e3d4c5d
|
||||
|
||||
Placing such a "regzbot command" is in your interest, as it will ensure the
|
||||
report won't fall through the cracks unnoticed. If you omit this, the Linux
|
||||
kernel's regressions tracker will take care of telling regzbot about your
|
||||
regression, as long as you send a copy to the regressions mailing lists. But the
|
||||
regression tracker is just one human which sometimes has to rest or occasionally
|
||||
might even enjoy some time away from computers (as crazy as that might sound).
|
||||
Relying on this person thus will result in an unnecessary delay before the
|
||||
regressions becomes mentioned `on the list of tracked and unresolved Linux
|
||||
kernel regressions <https://linux-regtracking.leemhuis.info/regzbot/>`_ and the
|
||||
weekly regression reports sent by regzbot. Such delays can result in Linus
|
||||
Torvalds being unaware of important regressions when deciding between "continue
|
||||
development or call this finished and release the final?".
|
||||
|
||||
Are really all regressions fixed?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Nearly all of them are, as long as the change causing the regression (the
|
||||
"culprit commit") is reliably identified. Some regressions can be fixed without
|
||||
this, but often it's required.
|
||||
|
||||
Who needs to find the root cause of a regression?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Developers of the affected code area should try to locate the culprit on their
|
||||
own. But for them that's often impossible to do with reasonable effort, as quite
|
||||
a lot of issues only occur in a particular environment outside the developer's
|
||||
reach -- for example, a specific hardware platform, firmware, Linux distro,
|
||||
system's configuration, or application. That's why in the end it's often up to
|
||||
the reporter to locate the culprit commit; sometimes users might even need to
|
||||
run additional tests afterwards to pinpoint the exact root cause. Developers
|
||||
should offer advice and reasonably help where they can, to make this process
|
||||
relatively easy and achievable for typical users.
|
||||
|
||||
How can I find the culprit?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Perform a bisection, as roughly outlined in
|
||||
Documentation/admin-guide/reporting-issues.rst and described in more detail by
|
||||
Documentation/admin-guide/bug-bisect.rst. It might sound like a lot of work, but
|
||||
in many cases finds the culprit relatively quickly. If it's hard or
|
||||
time-consuming to reliably reproduce the issue, consider teaming up with other
|
||||
affected users to narrow down the search range together.
|
||||
|
||||
Who can I ask for advice when it comes to regressions?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Send a mail to the regressions mailing list (regressions@lists.linux.dev) while
|
||||
CCing the Linux kernel's regression tracker (regressions@leemhuis.info); if the
|
||||
issue might better be dealt with in private, feel free to omit the list.
|
||||
|
||||
|
||||
Additional details about regressions
|
||||
------------------------------------
|
||||
|
||||
|
||||
What is the goal of the "no regressions rule"?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Users should feel safe when updating kernel versions and not have to worry
|
||||
something might break. This is in the interest of the kernel developers to make
|
||||
updating attractive: they don't want users to stay on stable or longterm Linux
|
||||
series that are either abandoned or more than one and a half years old. That's
|
||||
in everybody's interest, as `those series might have known bugs, security
|
||||
issues, or other problematic aspects already fixed in later versions
|
||||
<http://www.kroah.com/log/blog/2018/08/24/what-stable-kernel-should-i-use/>`_.
|
||||
Additionally, the kernel developers want to make it simple and appealing for
|
||||
users to test the latest pre-release or regular release. That's also in
|
||||
everybody's interest, as it's a lot easier to track down and fix problems, if
|
||||
they are reported shortly after being introduced.
|
||||
|
||||
Is the "no regressions" rule really adhered in practice?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
It's taken really seriously, as can be seen by many mailing list posts from
|
||||
Linux creator and lead developer Linus Torvalds, some of which are quoted in
|
||||
Documentation/process/handling-regressions.rst.
|
||||
|
||||
Exceptions to this rule are extremely rare; in the past developers almost always
|
||||
turned out to be wrong when they assumed a particular situation was warranting
|
||||
an exception.
|
||||
|
||||
Who ensures the "no regressions" is actually followed?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The subsystem maintainers should take care of that, which are watched and
|
||||
supported by the tree maintainers -- e.g. Linus Torvalds for mainline and
|
||||
Greg Kroah-Hartman et al. for various stable/longterm series.
|
||||
|
||||
All of them are helped by people trying to ensure no regression report falls
|
||||
through the cracks. One of them is Thorsten Leemhuis, who's currently acting as
|
||||
the Linux kernel's "regressions tracker"; to facilitate this work he relies on
|
||||
regzbot, the Linux kernel regression tracking bot. That's why you want to bring
|
||||
your report on the radar of these people by CCing or forwarding each report to
|
||||
the regressions mailing list, ideally with a "regzbot command" in your mail to
|
||||
get it tracked immediately.
|
||||
|
||||
How quickly are regressions normally fixed?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Developers should fix any reported regression as quickly as possible, to provide
|
||||
affected users with a solution in a timely manner and prevent more users from
|
||||
running into the issue; nevertheless developers need to take enough time and
|
||||
care to ensure regression fixes do not cause additional damage.
|
||||
|
||||
The answer thus depends on various factors like the impact of a regression, its
|
||||
age, or the Linux series in which it occurs. In the end though, most regressions
|
||||
should be fixed within two weeks.
|
||||
|
||||
Is it a regression, if the issue can be avoided by updating some software?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Almost always: yes. If a developer tells you otherwise, ask the regression
|
||||
tracker for advice as outlined above.
|
||||
|
||||
Is it a regression, if a newer kernel works slower or consumes more energy?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Yes, but the difference has to be significant. A five percent slow-down in a
|
||||
micro-benchmark thus is unlikely to qualify as regression, unless it also
|
||||
influences the results of a broad benchmark by more than one percent. If in
|
||||
doubt, ask for advice.
|
||||
|
||||
Is it a regression, if an external kernel module breaks when updating Linux?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
No, as the "no regression" rule is about interfaces and services the Linux
|
||||
kernel provides to the userland. It thus does not cover building or running
|
||||
externally developed kernel modules, as they run in kernel-space and hook into
|
||||
the kernel using internal interfaces occasionally changed.
|
||||
|
||||
How are regressions handled that are caused by security fixes?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
In extremely rare situations security issues can't be fixed without causing
|
||||
regressions; those fixes are given way, as they are the lesser evil in the end.
|
||||
Luckily this middling almost always can be avoided, as key developers for the
|
||||
affected area and often Linus Torvalds himself try very hard to fix security
|
||||
issues without causing regressions.
|
||||
|
||||
If you nevertheless face such a case, check the mailing list archives if people
|
||||
tried their best to avoid the regression. If not, report it; if in doubt, ask
|
||||
for advice as outlined above.
|
||||
|
||||
What happens if fixing a regression is impossible without causing another?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Sadly these things happen, but luckily not very often; if they occur, expert
|
||||
developers of the affected code area should look into the issue to find a fix
|
||||
that avoids regressions or at least their impact. If you run into such a
|
||||
situation, do what was outlined already for regressions caused by security
|
||||
fixes: check earlier discussions if people already tried their best and ask for
|
||||
advice if in doubt.
|
||||
|
||||
A quick note while at it: these situations could be avoided, if people would
|
||||
regularly give mainline pre-releases (say v5.15-rc1 or -rc3) from each
|
||||
development cycle a test run. This is best explained by imagining a change
|
||||
integrated between Linux v5.14 and v5.15-rc1 which causes a regression, but at
|
||||
the same time is a hard requirement for some other improvement applied for
|
||||
5.15-rc1. All these changes often can simply be reverted and the regression thus
|
||||
solved, if someone finds and reports it before 5.15 is released. A few days or
|
||||
weeks later this solution can become impossible, as some software might have
|
||||
started to rely on aspects introduced by one of the follow-up changes: reverting
|
||||
all changes would then cause a regression for users of said software and thus is
|
||||
out of the question.
|
||||
|
||||
Is it a regression, if some feature I relied on was removed months ago?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
It is, but often it's hard to fix such regressions due to the aspects outlined
|
||||
in the previous section. It hence needs to be dealt with on a case-by-case
|
||||
basis. This is another reason why it's in everybody's interest to regularly test
|
||||
mainline pre-releases.
|
||||
|
||||
Does the "no regression" rule apply if I seem to be the only affected person?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
It does, but only for practical usage: the Linux developers want to be free to
|
||||
remove support for hardware only to be found in attics and museums anymore.
|
||||
|
||||
Note, sometimes regressions can't be avoided to make progress -- and the latter
|
||||
is needed to prevent Linux from stagnation. Hence, if only very few users seem
|
||||
to be affected by a regression, it for the greater good might be in their and
|
||||
everyone else's interest to lettings things pass. Especially if there is an
|
||||
easy way to circumvent the regression somehow, for example by updating some
|
||||
software or using a kernel parameter created just for this purpose.
|
||||
|
||||
Does the regression rule apply for code in the staging tree as well?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Not according to the `help text for the configuration option covering all
|
||||
staging code <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/staging/Kconfig>`_,
|
||||
which since its early days states::
|
||||
|
||||
Please note that these drivers are under heavy development, may or
|
||||
may not work, and may contain userspace interfaces that most likely
|
||||
will be changed in the near future.
|
||||
|
||||
The staging developers nevertheless often adhere to the "no regressions" rule,
|
||||
but sometimes bend it to make progress. That's for example why some users had to
|
||||
deal with (often negligible) regressions when a WiFi driver from the staging
|
||||
tree was replaced by a totally different one written from scratch.
|
||||
|
||||
Why do later versions have to be "compiled with a similar configuration"?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Because the Linux kernel developers sometimes integrate changes known to cause
|
||||
regressions, but make them optional and disable them in the kernel's default
|
||||
configuration. This trick allows progress, as the "no regressions" rule
|
||||
otherwise would lead to stagnation.
|
||||
|
||||
Consider for example a new security feature blocking access to some kernel
|
||||
interfaces often abused by malware, which at the same time are required to run a
|
||||
few rarely used applications. The outlined approach makes both camps happy:
|
||||
people using these applications can leave the new security feature off, while
|
||||
everyone else can enable it without running into trouble.
|
||||
|
||||
How to create a configuration similar to the one of an older kernel?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Start your machine with a known-good kernel and configure the newer Linux
|
||||
version with ``make olddefconfig``. This makes the kernel's build scripts pick
|
||||
up the configuration file (the ".config" file) from the running kernel as base
|
||||
for the new one you are about to compile; afterwards they set all new
|
||||
configuration options to their default value, which should disable new features
|
||||
that might cause regressions.
|
||||
|
||||
Can I report a regression I found with pre-compiled vanilla kernels?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
You need to ensure the newer kernel was compiled with a similar configuration
|
||||
file as the older one (see above), as those that built them might have enabled
|
||||
some known-to-be incompatible feature for the newer kernel. If in doubt, report
|
||||
the matter to the kernel's provider and ask for advice.
|
||||
|
||||
|
||||
More about regression tracking with "regzbot"
|
||||
---------------------------------------------
|
||||
|
||||
What is regression tracking and why should I care about it?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Rules like "no regressions" need someone to ensure they are followed, otherwise
|
||||
they are broken either accidentally or on purpose. History has shown this to be
|
||||
true for Linux kernel development as well. That's why Thorsten Leemhuis, the
|
||||
Linux Kernel's regression tracker, and some people try to ensure all regression
|
||||
are fixed by keeping an eye on them until they are resolved. Neither of them are
|
||||
paid for this, that's why the work is done on a best effort basis.
|
||||
|
||||
Why and how are Linux kernel regressions tracked using a bot?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Tracking regressions completely manually has proven to be quite hard due to the
|
||||
distributed and loosely structured nature of Linux kernel development process.
|
||||
That's why the Linux kernel's regression tracker developed regzbot to facilitate
|
||||
the work, with the long term goal to automate regression tracking as much as
|
||||
possible for everyone involved.
|
||||
|
||||
Regzbot works by watching for replies to reports of tracked regressions.
|
||||
Additionally, it's looking out for posted or committed patches referencing such
|
||||
reports with "Link:" tags; replies to such patch postings are tracked as well.
|
||||
Combined this data provides good insights into the current state of the fixing
|
||||
process.
|
||||
|
||||
How to see which regressions regzbot tracks currently?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Check out `regzbot's web-interface <https://linux-regtracking.leemhuis.info/regzbot/>`_.
|
||||
|
||||
What kind of issues are supposed to be tracked by regzbot?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The bot is meant to track regressions, hence please don't involve regzbot for
|
||||
regular issues. But it's okay for the Linux kernel's regression tracker if you
|
||||
involve regzbot to track severe issues, like reports about hangs, corrupted
|
||||
data, or internal errors (Panic, Oops, BUG(), warning, ...).
|
||||
|
||||
How to change aspects of a tracked regression?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
By using a 'regzbot command' in a direct or indirect reply to the mail with the
|
||||
report. The easiest way to do that: find the report in your "Sent" folder or the
|
||||
mailing list archive and reply to it using your mailer's "Reply-all" function.
|
||||
In that mail, use one of the following commands in a stand-alone paragraph (IOW:
|
||||
use blank lines to separate one or multiple of these commands from the rest of
|
||||
the mail's text).
|
||||
|
||||
* Update when the regression started to happen, for example after performing a
|
||||
bisection::
|
||||
|
||||
#regzbot introduced: 1f2e3d4c5d
|
||||
|
||||
* Set or update the title::
|
||||
|
||||
#regzbot title: foo
|
||||
|
||||
* Monitor a discussion or bugzilla.kernel.org ticket where additions aspects of
|
||||
the issue or a fix are discussed:::
|
||||
|
||||
#regzbot monitor: https://lore.kernel.org/r/30th.anniversary.repost@klaava.Helsinki.FI/
|
||||
#regzbot monitor: https://bugzilla.kernel.org/show_bug.cgi?id=123456789
|
||||
|
||||
* Point to a place with further details of interest, like a mailing list post
|
||||
or a ticket in a bug tracker that are slightly related, but about a different
|
||||
topic::
|
||||
|
||||
#regzbot link: https://bugzilla.kernel.org/show_bug.cgi?id=123456789
|
||||
|
||||
* Mark a regression as invalid::
|
||||
|
||||
#regzbot invalid: wasn't a regression, problem has always existed
|
||||
|
||||
Regzbot supports a few other commands primarily used by developers or people
|
||||
tracking regressions. They and more details about the aforementioned regzbot
|
||||
commands can be found in the `getting started guide
|
||||
<https://gitlab.com/knurd42/regzbot/-/blob/main/docs/getting_started.md>`_ and
|
||||
the `reference documentation <https://gitlab.com/knurd42/regzbot/-/blob/main/docs/reference.md>`_
|
||||
for regzbot.
|
||||
|
||||
..
|
||||
end-of-content
|
||||
..
|
||||
This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
|
||||
of the file. If you want to distribute this text under CC-BY-4.0 only,
|
||||
please use "The Linux kernel developers" for author attribution and link
|
||||
this as source:
|
||||
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-regressions.rst
|
||||
..
|
||||
Note: Only the content of this RST file as found in the Linux kernel sources
|
||||
is available under CC-BY-4.0, as versions of this text that were processed
|
||||
(for example by the kernel's build system) might contain content taken from
|
||||
files which use a more restrictive license.
|
||||
@@ -595,65 +595,33 @@ Documentation/admin-guide/kernel-parameters.rst).
|
||||
numa_balancing
|
||||
==============
|
||||
|
||||
Enables/disables automatic page fault based NUMA memory
|
||||
balancing. Memory is moved automatically to nodes
|
||||
that access it often.
|
||||
Enables/disables and configures automatic page fault based NUMA memory
|
||||
balancing. Memory is moved automatically to nodes that access it often.
|
||||
The value to set can be the result of ORing the following:
|
||||
|
||||
Enables/disables automatic NUMA memory balancing. On NUMA machines, there
|
||||
is a performance penalty if remote memory is accessed by a CPU. When this
|
||||
feature is enabled the kernel samples what task thread is accessing memory
|
||||
by periodically unmapping pages and later trapping a page fault. At the
|
||||
time of the page fault, it is determined if the data being accessed should
|
||||
be migrated to a local memory node.
|
||||
= =================================
|
||||
0 NUMA_BALANCING_DISABLED
|
||||
1 NUMA_BALANCING_NORMAL
|
||||
2 NUMA_BALANCING_MEMORY_TIERING
|
||||
= =================================
|
||||
|
||||
Or NUMA_BALANCING_NORMAL to optimize page placement among different
|
||||
NUMA nodes to reduce remote accessing. On NUMA machines, there is a
|
||||
performance penalty if remote memory is accessed by a CPU. When this
|
||||
feature is enabled the kernel samples what task thread is accessing
|
||||
memory by periodically unmapping pages and later trapping a page
|
||||
fault. At the time of the page fault, it is determined if the data
|
||||
being accessed should be migrated to a local memory node.
|
||||
|
||||
The unmapping of pages and trapping faults incur additional overhead that
|
||||
ideally is offset by improved memory locality but there is no universal
|
||||
guarantee. If the target workload is already bound to NUMA nodes then this
|
||||
feature should be disabled. Otherwise, if the system overhead from the
|
||||
feature is too high then the rate the kernel samples for NUMA hinting
|
||||
faults may be controlled by the `numa_balancing_scan_period_min_ms,
|
||||
numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms,
|
||||
numa_balancing_scan_size_mb`_, and numa_balancing_settle_count sysctls.
|
||||
|
||||
|
||||
numa_balancing_scan_period_min_ms, numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms, numa_balancing_scan_size_mb
|
||||
===============================================================================================================================
|
||||
|
||||
|
||||
Automatic NUMA balancing scans tasks address space and unmaps pages to
|
||||
detect if pages are properly placed or if the data should be migrated to a
|
||||
memory node local to where the task is running. Every "scan delay" the task
|
||||
scans the next "scan size" number of pages in its address space. When the
|
||||
end of the address space is reached the scanner restarts from the beginning.
|
||||
|
||||
In combination, the "scan delay" and "scan size" determine the scan rate.
|
||||
When "scan delay" decreases, the scan rate increases. The scan delay and
|
||||
hence the scan rate of every task is adaptive and depends on historical
|
||||
behaviour. If pages are properly placed then the scan delay increases,
|
||||
otherwise the scan delay decreases. The "scan size" is not adaptive but
|
||||
the higher the "scan size", the higher the scan rate.
|
||||
|
||||
Higher scan rates incur higher system overhead as page faults must be
|
||||
trapped and potentially data must be migrated. However, the higher the scan
|
||||
rate, the more quickly a tasks memory is migrated to a local node if the
|
||||
workload pattern changes and minimises performance impact due to remote
|
||||
memory accesses. These sysctls control the thresholds for scan delays and
|
||||
the number of pages scanned.
|
||||
|
||||
``numa_balancing_scan_period_min_ms`` is the minimum time in milliseconds to
|
||||
scan a tasks virtual memory. It effectively controls the maximum scanning
|
||||
rate for each task.
|
||||
|
||||
``numa_balancing_scan_delay_ms`` is the starting "scan delay" used for a task
|
||||
when it initially forks.
|
||||
|
||||
``numa_balancing_scan_period_max_ms`` is the maximum time in milliseconds to
|
||||
scan a tasks virtual memory. It effectively controls the minimum scanning
|
||||
rate for each task.
|
||||
|
||||
``numa_balancing_scan_size_mb`` is how many megabytes worth of pages are
|
||||
scanned for a given scan.
|
||||
feature should be disabled.
|
||||
|
||||
Or NUMA_BALANCING_MEMORY_TIERING to optimize page placement among
|
||||
different types of memory (represented as different NUMA nodes) to
|
||||
place the hot pages in the fast memory. This is implemented based on
|
||||
unmapping and page fault too.
|
||||
|
||||
oops_all_cpu_backtrace
|
||||
======================
|
||||
@@ -795,6 +763,8 @@ bit 1 print system memory info
|
||||
bit 2 print timer info
|
||||
bit 3 print locks info if ``CONFIG_LOCKDEP`` is on
|
||||
bit 4 print ftrace buffer
|
||||
bit 5 print all printk messages in buffer
|
||||
bit 6 print all CPUs backtrace (if available in the arch)
|
||||
===== ============================================
|
||||
|
||||
So for example to print tasks and memory info on panic, user can::
|
||||
@@ -1029,23 +999,17 @@ This is a directory, with the following entries:
|
||||
* ``poolsize``: the entropy pool size, in bits;
|
||||
|
||||
* ``urandom_min_reseed_secs``: obsolete (used to determine the minimum
|
||||
number of seconds between urandom pool reseeding).
|
||||
number of seconds between urandom pool reseeding). This file is
|
||||
writable for compatibility purposes, but writing to it has no effect
|
||||
on any RNG behavior.
|
||||
|
||||
* ``uuid``: a UUID generated every time this is retrieved (this can
|
||||
thus be used to generate UUIDs at will);
|
||||
|
||||
* ``write_wakeup_threshold``: when the entropy count drops below this
|
||||
(as a number of bits), processes waiting to write to ``/dev/random``
|
||||
are woken up.
|
||||
|
||||
If ``drivers/char/random.c`` is built with ``ADD_INTERRUPT_BENCH``
|
||||
defined, these additional entries are present:
|
||||
|
||||
* ``add_interrupt_avg_cycles``: the average number of cycles between
|
||||
interrupts used to feed the pool;
|
||||
|
||||
* ``add_interrupt_avg_deviation``: the standard deviation seen on the
|
||||
number of cycles between interrupts used to feed the pool.
|
||||
are woken up. This file is writable for compatibility purposes, but
|
||||
writing to it has no effect on any RNG behavior.
|
||||
|
||||
|
||||
randomize_va_space
|
||||
|
||||
@@ -365,6 +365,15 @@ new netns has been created.
|
||||
|
||||
Default : 0 (for compatibility reasons)
|
||||
|
||||
txrehash
|
||||
--------
|
||||
|
||||
Controls default hash rethink behaviour on listening socket when SO_TXREHASH
|
||||
option is set to SOCK_TXREHASH_DEFAULT (i. e. not overridden by setsockopt).
|
||||
|
||||
If set to 1 (default), hash rethink is performed on listening socket.
|
||||
If set to 0, hash rethink is not performed.
|
||||
|
||||
2. /proc/sys/net/unix - Parameters for Unix domain sockets
|
||||
----------------------------------------------------------
|
||||
|
||||
|
||||
@@ -266,10 +266,12 @@ Avanta family
|
||||
-------------
|
||||
|
||||
Flavors:
|
||||
- 88F6500
|
||||
- 88F6510
|
||||
- 88F6530P
|
||||
- 88F6550
|
||||
- 88F6560
|
||||
- 88F6601
|
||||
|
||||
Homepage:
|
||||
https://web.archive.org/web/20181005145041/http://www.marvell.com/broadband/
|
||||
|
||||
@@ -10,9 +10,9 @@ This document is based on the ARM booting document by Russell King and
|
||||
is relevant to all public releases of the AArch64 Linux kernel.
|
||||
|
||||
The AArch64 exception model is made up of a number of exception levels
|
||||
(EL0 - EL3), with EL0 and EL1 having a secure and a non-secure
|
||||
counterpart. EL2 is the hypervisor level and exists only in non-secure
|
||||
mode. EL3 is the highest priority level and exists only in secure mode.
|
||||
(EL0 - EL3), with EL0, EL1 and EL2 having a secure and a non-secure
|
||||
counterpart. EL2 is the hypervisor level, EL3 is the highest priority
|
||||
level and exists only in secure mode. Both are architecturally optional.
|
||||
|
||||
For the purposes of this document, we will use the term `boot loader`
|
||||
simply to define all software that executes on the CPU(s) before control
|
||||
@@ -167,8 +167,8 @@ Before jumping into the kernel, the following conditions must be met:
|
||||
|
||||
All forms of interrupts must be masked in PSTATE.DAIF (Debug, SError,
|
||||
IRQ and FIQ).
|
||||
The CPU must be in either EL2 (RECOMMENDED in order to have access to
|
||||
the virtualisation extensions) or non-secure EL1.
|
||||
The CPU must be in non-secure state, either in EL2 (RECOMMENDED in order
|
||||
to have access to the virtualisation extensions), or in EL1.
|
||||
|
||||
- Caches, MMUs
|
||||
|
||||
|
||||
@@ -259,6 +259,11 @@ HWCAP2_RPRES
|
||||
|
||||
Functionality implied by ID_AA64ISAR2_EL1.RPRES == 0b0001.
|
||||
|
||||
HWCAP2_MTE3
|
||||
|
||||
Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0011, as described
|
||||
by Documentation/arm64/memory-tagging-extension.rst.
|
||||
|
||||
4. Unused AT_HWCAP bits
|
||||
-----------------------
|
||||
|
||||
|
||||
@@ -76,6 +76,9 @@ configurable behaviours:
|
||||
with ``.si_code = SEGV_MTEAERR`` and ``.si_addr = 0`` (the faulting
|
||||
address is unknown).
|
||||
|
||||
- *Asymmetric* - Reads are handled as for synchronous mode while writes
|
||||
are handled as for asynchronous mode.
|
||||
|
||||
The user can select the above modes, per thread, using the
|
||||
``prctl(PR_SET_TAGGED_ADDR_CTRL, flags, 0, 0, 0)`` system call where ``flags``
|
||||
contains any number of the following values in the ``PR_MTE_TCF_MASK``
|
||||
@@ -91,8 +94,9 @@ mode is specified, the program will run in that mode. If multiple
|
||||
modes are specified, the mode is selected as described in the "Per-CPU
|
||||
preferred tag checking modes" section below.
|
||||
|
||||
The current tag check fault mode can be read using the
|
||||
``prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)`` system call.
|
||||
The current tag check fault configuration can be read using the
|
||||
``prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)`` system call. If
|
||||
multiple modes were requested then all will be reported.
|
||||
|
||||
Tag checking can also be disabled for a user thread by setting the
|
||||
``PSTATE.TCO`` bit with ``MSR TCO, #1``.
|
||||
@@ -139,18 +143,25 @@ tag checking mode as the CPU's preferred tag checking mode.
|
||||
|
||||
The preferred tag checking mode for each CPU is controlled by
|
||||
``/sys/devices/system/cpu/cpu<N>/mte_tcf_preferred``, to which a
|
||||
privileged user may write the value ``async`` or ``sync``. The default
|
||||
preferred mode for each CPU is ``async``.
|
||||
privileged user may write the value ``async``, ``sync`` or ``asymm``. The
|
||||
default preferred mode for each CPU is ``async``.
|
||||
|
||||
To allow a program to potentially run in the CPU's preferred tag
|
||||
checking mode, the user program may set multiple tag check fault mode
|
||||
bits in the ``flags`` argument to the ``prctl(PR_SET_TAGGED_ADDR_CTRL,
|
||||
flags, 0, 0, 0)`` system call. If the CPU's preferred tag checking
|
||||
mode is in the task's set of provided tag checking modes (this will
|
||||
always be the case at present because the kernel only supports two
|
||||
tag checking modes, but future kernels may support more modes), that
|
||||
mode will be selected. Otherwise, one of the modes in the task's mode
|
||||
set will be selected in a currently unspecified manner.
|
||||
flags, 0, 0, 0)`` system call. If both synchronous and asynchronous
|
||||
modes are requested then asymmetric mode may also be selected by the
|
||||
kernel. If the CPU's preferred tag checking mode is in the task's set
|
||||
of provided tag checking modes, that mode will be selected. Otherwise,
|
||||
one of the modes in the task's mode will be selected by the kernel
|
||||
from the task's mode set using the preference order:
|
||||
|
||||
1. Asynchronous
|
||||
2. Asymmetric
|
||||
3. Synchronous
|
||||
|
||||
Note that there is no way for userspace to request multiple modes and
|
||||
also disable asymmetric mode.
|
||||
|
||||
Initial process state
|
||||
---------------------
|
||||
@@ -213,6 +224,29 @@ address ABI control and MTE configuration of a process as per the
|
||||
Documentation/arm64/tagged-address-abi.rst and above. The corresponding
|
||||
``regset`` is 1 element of 8 bytes (``sizeof(long))``).
|
||||
|
||||
Core dump support
|
||||
-----------------
|
||||
|
||||
The allocation tags for user memory mapped with ``PROT_MTE`` are dumped
|
||||
in the core file as additional ``PT_ARM_MEMTAG_MTE`` segments. The
|
||||
program header for such segment is defined as:
|
||||
|
||||
:``p_type``: ``PT_ARM_MEMTAG_MTE``
|
||||
:``p_flags``: 0
|
||||
:``p_offset``: segment file offset
|
||||
:``p_vaddr``: segment virtual address, same as the corresponding
|
||||
``PT_LOAD`` segment
|
||||
:``p_paddr``: 0
|
||||
:``p_filesz``: segment size in file, calculated as ``p_mem_sz / 32``
|
||||
(two 4-bit tags cover 32 bytes of memory)
|
||||
:``p_memsz``: segment size in memory, same as the corresponding
|
||||
``PT_LOAD`` segment
|
||||
:``p_align``: 0
|
||||
|
||||
The tags are stored in the core file at ``p_offset`` as two 4-bit tags
|
||||
in a byte. With the tag granule of 16 bytes, a 4K page requires 128
|
||||
bytes in the core file.
|
||||
|
||||
Example of correct usage
|
||||
========================
|
||||
|
||||
|
||||
@@ -52,6 +52,12 @@ stable kernels.
|
||||
| Allwinner | A64/R18 | UNKNOWN1 | SUN50I_ERRATUM_UNKNOWN1 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A510 | #2064142 | ARM64_ERRATUM_2064142 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A510 | #2038923 | ARM64_ERRATUM_2038923 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A510 | #1902691 | ARM64_ERRATUM_1902691 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A53 | #827319 | ARM64_ERRATUM_827319 |
|
||||
@@ -92,12 +98,20 @@ stable kernels.
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A77 | #1508412 | ARM64_ERRATUM_1508412 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A510 | #2051678 | ARM64_ERRATUM_2051678 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A510 | #2077057 | ARM64_ERRATUM_2077057 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A710 | #2224489 | ARM64_ERRATUM_2224489 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-X2 | #2119858 | ARM64_ERRATUM_2119858 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-X2 | #2224489 | ARM64_ERRATUM_2224489 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Neoverse-N1 | #1349291 | N/A |
|
||||
@@ -122,7 +136,7 @@ stable kernels.
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Cavium | ThunderX ITS | #23144 | CAVIUM_ERRATUM_23144 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 |
|
||||
| Cavium | ThunderX GICv3 | #23154,38545 | CAVIUM_ERRATUM_23154 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Cavium | ThunderX GICv3 | #38539 | N/A |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
|
||||
@@ -130,14 +130,13 @@ denoting a range of code via ``SYM_*_START/END`` annotations.
|
||||
In fact, this kind of annotation corresponds to the now deprecated ``ENTRY``
|
||||
and ``ENDPROC`` macros.
|
||||
|
||||
* ``SYM_FUNC_START_ALIAS`` and ``SYM_FUNC_START_LOCAL_ALIAS`` serve for those
|
||||
who decided to have two or more names for one function. The typical use is::
|
||||
* ``SYM_FUNC_ALIAS``, ``SYM_FUNC_ALIAS_LOCAL``, and ``SYM_FUNC_ALIAS_WEAK`` can
|
||||
be used to define multiple names for a function. The typical use is::
|
||||
|
||||
SYM_FUNC_START_ALIAS(__memset)
|
||||
SYM_FUNC_START(memset)
|
||||
SYM_FUNC_START(__memset)
|
||||
... asm insns ...
|
||||
SYM_FUNC_END(memset)
|
||||
SYM_FUNC_END_ALIAS(__memset)
|
||||
SYN_FUNC_END(__memset)
|
||||
SYM_FUNC_ALIAS(memset, __memset)
|
||||
|
||||
In this example, one can call ``__memset`` or ``memset`` with the same
|
||||
result, except the debug information for the instructions is generated to
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -7,4 +7,4 @@ This file documents the sysfs file ``block/<disk>/capability``.
|
||||
``capability`` is a bitfield, printed in hexadecimal, indicating which
|
||||
capabilities a specific block device supports:
|
||||
|
||||
.. kernel-doc:: include/linux/genhd.h
|
||||
.. kernel-doc:: include/linux/blkdev.h
|
||||
|
||||
@@ -8,7 +8,6 @@ Block
|
||||
:maxdepth: 1
|
||||
|
||||
bfq-iosched
|
||||
biodoc
|
||||
biovecs
|
||||
blk-mq
|
||||
capability
|
||||
|
||||
@@ -658,7 +658,7 @@ when:
|
||||
|
||||
.. Links
|
||||
.. _Documentation/process/: https://www.kernel.org/doc/html/latest/process/
|
||||
.. _netdev-FAQ: ../networking/netdev-FAQ.rst
|
||||
.. _netdev-FAQ: Documentation/process/maintainer-netdev.rst
|
||||
.. _selftests:
|
||||
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/
|
||||
.. _Documentation/dev-tools/kselftest.rst:
|
||||
|
||||
117
Documentation/bpf/bpf_prog_run.rst
Normal file
117
Documentation/bpf/bpf_prog_run.rst
Normal file
@@ -0,0 +1,117 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===================================
|
||||
Running BPF programs from userspace
|
||||
===================================
|
||||
|
||||
This document describes the ``BPF_PROG_RUN`` facility for running BPF programs
|
||||
from userspace.
|
||||
|
||||
.. contents::
|
||||
:local:
|
||||
:depth: 2
|
||||
|
||||
|
||||
Overview
|
||||
--------
|
||||
|
||||
The ``BPF_PROG_RUN`` command can be used through the ``bpf()`` syscall to
|
||||
execute a BPF program in the kernel and return the results to userspace. This
|
||||
can be used to unit test BPF programs against user-supplied context objects, and
|
||||
as way to explicitly execute programs in the kernel for their side effects. The
|
||||
command was previously named ``BPF_PROG_TEST_RUN``, and both constants continue
|
||||
to be defined in the UAPI header, aliased to the same value.
|
||||
|
||||
The ``BPF_PROG_RUN`` command can be used to execute BPF programs of the
|
||||
following types:
|
||||
|
||||
- ``BPF_PROG_TYPE_SOCKET_FILTER``
|
||||
- ``BPF_PROG_TYPE_SCHED_CLS``
|
||||
- ``BPF_PROG_TYPE_SCHED_ACT``
|
||||
- ``BPF_PROG_TYPE_XDP``
|
||||
- ``BPF_PROG_TYPE_SK_LOOKUP``
|
||||
- ``BPF_PROG_TYPE_CGROUP_SKB``
|
||||
- ``BPF_PROG_TYPE_LWT_IN``
|
||||
- ``BPF_PROG_TYPE_LWT_OUT``
|
||||
- ``BPF_PROG_TYPE_LWT_XMIT``
|
||||
- ``BPF_PROG_TYPE_LWT_SEG6LOCAL``
|
||||
- ``BPF_PROG_TYPE_FLOW_DISSECTOR``
|
||||
- ``BPF_PROG_TYPE_STRUCT_OPS``
|
||||
- ``BPF_PROG_TYPE_RAW_TRACEPOINT``
|
||||
- ``BPF_PROG_TYPE_SYSCALL``
|
||||
|
||||
When using the ``BPF_PROG_RUN`` command, userspace supplies an input context
|
||||
object and (for program types operating on network packets) a buffer containing
|
||||
the packet data that the BPF program will operate on. The kernel will then
|
||||
execute the program and return the results to userspace. Note that programs will
|
||||
not have any side effects while being run in this mode; in particular, packets
|
||||
will not actually be redirected or dropped, the program return code will just be
|
||||
returned to userspace. A separate mode for live execution of XDP programs is
|
||||
provided, documented separately below.
|
||||
|
||||
Running XDP programs in "live frame mode"
|
||||
-----------------------------------------
|
||||
|
||||
The ``BPF_PROG_RUN`` command has a separate mode for running live XDP programs,
|
||||
which can be used to execute XDP programs in a way where packets will actually
|
||||
be processed by the kernel after the execution of the XDP program as if they
|
||||
arrived on a physical interface. This mode is activated by setting the
|
||||
``BPF_F_TEST_XDP_LIVE_FRAMES`` flag when supplying an XDP program to
|
||||
``BPF_PROG_RUN``.
|
||||
|
||||
The live packet mode is optimised for high performance execution of the supplied
|
||||
XDP program many times (suitable for, e.g., running as a traffic generator),
|
||||
which means the semantics are not quite as straight-forward as the regular test
|
||||
run mode. Specifically:
|
||||
|
||||
- When executing an XDP program in live frame mode, the result of the execution
|
||||
will not be returned to userspace; instead, the kernel will perform the
|
||||
operation indicated by the program's return code (drop the packet, redirect
|
||||
it, etc). For this reason, setting the ``data_out`` or ``ctx_out`` attributes
|
||||
in the syscall parameters when running in this mode will be rejected. In
|
||||
addition, not all failures will be reported back to userspace directly;
|
||||
specifically, only fatal errors in setup or during execution (like memory
|
||||
allocation errors) will halt execution and return an error. If an error occurs
|
||||
in packet processing, like a failure to redirect to a given interface,
|
||||
execution will continue with the next repetition; these errors can be detected
|
||||
via the same trace points as for regular XDP programs.
|
||||
|
||||
- Userspace can supply an ifindex as part of the context object, just like in
|
||||
the regular (non-live) mode. The XDP program will be executed as though the
|
||||
packet arrived on this interface; i.e., the ``ingress_ifindex`` of the context
|
||||
object will point to that interface. Furthermore, if the XDP program returns
|
||||
``XDP_PASS``, the packet will be injected into the kernel networking stack as
|
||||
though it arrived on that ifindex, and if it returns ``XDP_TX``, the packet
|
||||
will be transmitted *out* of that same interface. Do note, though, that
|
||||
because the program execution is not happening in driver context, an
|
||||
``XDP_TX`` is actually turned into the same action as an ``XDP_REDIRECT`` to
|
||||
that same interface (i.e., it will only work if the driver has support for the
|
||||
``ndo_xdp_xmit`` driver op).
|
||||
|
||||
- When running the program with multiple repetitions, the execution will happen
|
||||
in batches. The batch size defaults to 64 packets (which is same as the
|
||||
maximum NAPI receive batch size), but can be specified by userspace through
|
||||
the ``batch_size`` parameter, up to a maximum of 256 packets. For each batch,
|
||||
the kernel executes the XDP program repeatedly, each invocation getting a
|
||||
separate copy of the packet data. For each repetition, if the program drops
|
||||
the packet, the data page is immediately recycled (see below). Otherwise, the
|
||||
packet is buffered until the end of the batch, at which point all packets
|
||||
buffered this way during the batch are transmitted at once.
|
||||
|
||||
- When setting up the test run, the kernel will initialise a pool of memory
|
||||
pages of the same size as the batch size. Each memory page will be initialised
|
||||
with the initial packet data supplied by userspace at ``BPF_PROG_RUN``
|
||||
invocation. When possible, the pages will be recycled on future program
|
||||
invocations, to improve performance. Pages will generally be recycled a full
|
||||
batch at a time, except when a packet is dropped (by return code or because
|
||||
of, say, a redirection error), in which case that page will be recycled
|
||||
immediately. If a packet ends up being passed to the regular networking stack
|
||||
(because the XDP program returns ``XDP_PASS``, or because it ends up being
|
||||
redirected to an interface that injects it into the stack), the page will be
|
||||
released and a new one will be allocated when the pool is empty.
|
||||
|
||||
When recycling, the page content is not rewritten; only the packet boundary
|
||||
pointers (``data``, ``data_end`` and ``data_meta``) in the context object will
|
||||
be reset to the original values. This means that if a program rewrites the
|
||||
packet contents, it has to be prepared to see either the original content or
|
||||
the modified version on subsequent invocations.
|
||||
@@ -503,6 +503,19 @@ valid index (starting from 0) pointing to a member or an argument.
|
||||
* ``info.vlen``: 0
|
||||
* ``type``: the type with ``btf_type_tag`` attribute
|
||||
|
||||
Currently, ``BTF_KIND_TYPE_TAG`` is only emitted for pointer types.
|
||||
It has the following btf type chain:
|
||||
::
|
||||
|
||||
ptr -> [type_tag]*
|
||||
-> [const | volatile | restrict | typedef]*
|
||||
-> base_type
|
||||
|
||||
Basically, a pointer type points to zero or more
|
||||
type_tag, then zero or more const/volatile/restrict/typedef
|
||||
and finally the base type. The base type is one of
|
||||
int, ptr, array, struct, union, enum, func_proto and float types.
|
||||
|
||||
3. BTF Kernel API
|
||||
=================
|
||||
|
||||
@@ -565,18 +578,15 @@ A map can be created with ``btf_fd`` and specified key/value type id.::
|
||||
In libbpf, the map can be defined with extra annotation like below:
|
||||
::
|
||||
|
||||
struct bpf_map_def SEC("maps") btf_map = {
|
||||
.type = BPF_MAP_TYPE_ARRAY,
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(struct ipv_counts),
|
||||
.max_entries = 4,
|
||||
};
|
||||
BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__type(key, int);
|
||||
__type(value, struct ipv_counts);
|
||||
__uint(max_entries, 4);
|
||||
} btf_map SEC(".maps");
|
||||
|
||||
Here, the parameters for macro BPF_ANNOTATE_KV_PAIR are map name, key and
|
||||
value types for the map. During ELF parsing, libbpf is able to extract
|
||||
key/value type_id's and assign them to BPF_MAP_CREATE attributes
|
||||
automatically.
|
||||
During ELF parsing, libbpf is able to extract key/value type_id's and assign
|
||||
them to BPF_MAP_CREATE attributes automatically.
|
||||
|
||||
.. _BPF_Prog_Load:
|
||||
|
||||
@@ -824,13 +834,12 @@ structure has bitfields. For example, for the following map,::
|
||||
___A b1:4;
|
||||
enum A b2:4;
|
||||
};
|
||||
struct bpf_map_def SEC("maps") tmpmap = {
|
||||
.type = BPF_MAP_TYPE_ARRAY,
|
||||
.key_size = sizeof(__u32),
|
||||
.value_size = sizeof(struct tmp_t),
|
||||
.max_entries = 1,
|
||||
};
|
||||
BPF_ANNOTATE_KV_PAIR(tmpmap, int, struct tmp_t);
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__type(key, int);
|
||||
__type(value, struct tmp_t);
|
||||
__uint(max_entries, 1);
|
||||
} tmpmap SEC(".maps");
|
||||
|
||||
bpftool is able to pretty print like below:
|
||||
::
|
||||
|
||||
@@ -21,6 +21,7 @@ that goes into great technical depth about the BPF Architecture.
|
||||
helpers
|
||||
programs
|
||||
maps
|
||||
bpf_prog_run
|
||||
classic_vs_extended.rst
|
||||
bpf_licensing
|
||||
test_debug
|
||||
|
||||
@@ -22,7 +22,13 @@ necessary across calls.
|
||||
Instruction encoding
|
||||
====================
|
||||
|
||||
eBPF uses 64-bit instructions with the following encoding:
|
||||
eBPF has two instruction encodings:
|
||||
|
||||
* the basic instruction encoding, which uses 64 bits to encode an instruction
|
||||
* the wide instruction encoding, which appends a second 64-bit immediate value
|
||||
(imm64) after the basic instruction for a total of 128 bits.
|
||||
|
||||
The basic instruction encoding looks as follows:
|
||||
|
||||
============= ======= =============== ==================== ============
|
||||
32 bits (MSB) 16 bits 4 bits 4 bits 8 bits (LSB)
|
||||
@@ -82,9 +88,9 @@ BPF_ALU uses 32-bit wide operands while BPF_ALU64 uses 64-bit wide operands for
|
||||
otherwise identical operations.
|
||||
The code field encodes the operation as below:
|
||||
|
||||
======== ===== ==========================
|
||||
======== ===== =================================================
|
||||
code value description
|
||||
======== ===== ==========================
|
||||
======== ===== =================================================
|
||||
BPF_ADD 0x00 dst += src
|
||||
BPF_SUB 0x10 dst -= src
|
||||
BPF_MUL 0x20 dst \*= src
|
||||
@@ -98,8 +104,8 @@ The code field encodes the operation as below:
|
||||
BPF_XOR 0xa0 dst ^= src
|
||||
BPF_MOV 0xb0 dst = src
|
||||
BPF_ARSH 0xc0 sign extending shift right
|
||||
BPF_END 0xd0 endianness conversion
|
||||
======== ===== ==========================
|
||||
BPF_END 0xd0 byte swap operations (see separate section below)
|
||||
======== ===== =================================================
|
||||
|
||||
BPF_ADD | BPF_X | BPF_ALU means::
|
||||
|
||||
@@ -118,6 +124,42 @@ BPF_XOR | BPF_K | BPF_ALU64 means::
|
||||
src_reg = src_reg ^ imm32
|
||||
|
||||
|
||||
Byte swap instructions
|
||||
----------------------
|
||||
|
||||
The byte swap instructions use an instruction class of ``BFP_ALU`` and a 4-bit
|
||||
code field of ``BPF_END``.
|
||||
|
||||
The byte swap instructions instructions operate on the destination register
|
||||
only and do not use a separate source register or immediate value.
|
||||
|
||||
The 1-bit source operand field in the opcode is used to to select what byte
|
||||
order the operation convert from or to:
|
||||
|
||||
========= ===== =================================================
|
||||
source value description
|
||||
========= ===== =================================================
|
||||
BPF_TO_LE 0x00 convert between host byte order and little endian
|
||||
BPF_TO_BE 0x08 convert between host byte order and big endian
|
||||
========= ===== =================================================
|
||||
|
||||
The imm field encodes the width of the swap operations. The following widths
|
||||
are supported: 16, 32 and 64.
|
||||
|
||||
Examples:
|
||||
|
||||
``BPF_ALU | BPF_TO_LE | BPF_END`` with imm = 16 means::
|
||||
|
||||
dst_reg = htole16(dst_reg)
|
||||
|
||||
``BPF_ALU | BPF_TO_BE | BPF_END`` with imm = 64 means::
|
||||
|
||||
dst_reg = htobe64(dst_reg)
|
||||
|
||||
``BPF_FROM_LE`` and ``BPF_FROM_BE`` exist as aliases for ``BPF_TO_LE`` and
|
||||
``BPF_TO_LE`` respetively.
|
||||
|
||||
|
||||
Jump instructions
|
||||
-----------------
|
||||
|
||||
@@ -176,63 +218,96 @@ The mode modifier is one of:
|
||||
============= ===== ====================================
|
||||
mode modifier value description
|
||||
============= ===== ====================================
|
||||
BPF_IMM 0x00 used for 64-bit mov
|
||||
BPF_ABS 0x20 legacy BPF packet access
|
||||
BPF_IND 0x40 legacy BPF packet access
|
||||
BPF_MEM 0x60 all normal load and store operations
|
||||
BPF_IMM 0x00 64-bit immediate instructions
|
||||
BPF_ABS 0x20 legacy BPF packet access (absolute)
|
||||
BPF_IND 0x40 legacy BPF packet access (indirect)
|
||||
BPF_MEM 0x60 regular load and store operations
|
||||
BPF_ATOMIC 0xc0 atomic operations
|
||||
============= ===== ====================================
|
||||
|
||||
BPF_MEM | <size> | BPF_STX means::
|
||||
|
||||
Regular load and store operations
|
||||
---------------------------------
|
||||
|
||||
The ``BPF_MEM`` mode modifier is used to encode regular load and store
|
||||
instructions that transfer data between a register and memory.
|
||||
|
||||
``BPF_MEM | <size> | BPF_STX`` means::
|
||||
|
||||
*(size *) (dst_reg + off) = src_reg
|
||||
|
||||
BPF_MEM | <size> | BPF_ST means::
|
||||
``BPF_MEM | <size> | BPF_ST`` means::
|
||||
|
||||
*(size *) (dst_reg + off) = imm32
|
||||
|
||||
BPF_MEM | <size> | BPF_LDX means::
|
||||
``BPF_MEM | <size> | BPF_LDX`` means::
|
||||
|
||||
dst_reg = *(size *) (src_reg + off)
|
||||
|
||||
Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW.
|
||||
Where size is one of: ``BPF_B``, ``BPF_H``, ``BPF_W``, or ``BPF_DW``.
|
||||
|
||||
Atomic operations
|
||||
-----------------
|
||||
|
||||
eBPF includes atomic operations, which use the immediate field for extra
|
||||
encoding::
|
||||
Atomic operations are operations that operate on memory and can not be
|
||||
interrupted or corrupted by other access to the same memory region
|
||||
by other eBPF programs or means outside of this specification.
|
||||
|
||||
.imm = BPF_ADD, .code = BPF_ATOMIC | BPF_W | BPF_STX: lock xadd *(u32 *)(dst_reg + off16) += src_reg
|
||||
.imm = BPF_ADD, .code = BPF_ATOMIC | BPF_DW | BPF_STX: lock xadd *(u64 *)(dst_reg + off16) += src_reg
|
||||
All atomic operations supported by eBPF are encoded as store operations
|
||||
that use the ``BPF_ATOMIC`` mode modifier as follows:
|
||||
|
||||
The basic atomic operations supported are::
|
||||
* ``BPF_ATOMIC | BPF_W | BPF_STX`` for 32-bit operations
|
||||
* ``BPF_ATOMIC | BPF_DW | BPF_STX`` for 64-bit operations
|
||||
* 8-bit and 16-bit wide atomic operations are not supported.
|
||||
|
||||
BPF_ADD
|
||||
BPF_AND
|
||||
BPF_OR
|
||||
BPF_XOR
|
||||
The imm field is used to encode the actual atomic operation.
|
||||
Simple atomic operation use a subset of the values defined to encode
|
||||
arithmetic operations in the imm field to encode the atomic operation:
|
||||
|
||||
Each having equivalent semantics with the ``BPF_ADD`` example, that is: the
|
||||
memory location addresed by ``dst_reg + off`` is atomically modified, with
|
||||
``src_reg`` as the other operand. If the ``BPF_FETCH`` flag is set in the
|
||||
immediate, then these operations also overwrite ``src_reg`` with the
|
||||
value that was in memory before it was modified.
|
||||
======== ===== ===========
|
||||
imm value description
|
||||
======== ===== ===========
|
||||
BPF_ADD 0x00 atomic add
|
||||
BPF_OR 0x40 atomic or
|
||||
BPF_AND 0x50 atomic and
|
||||
BPF_XOR 0xa0 atomic xor
|
||||
======== ===== ===========
|
||||
|
||||
The more special operations are::
|
||||
|
||||
BPF_XCHG
|
||||
``BPF_ATOMIC | BPF_W | BPF_STX`` with imm = BPF_ADD means::
|
||||
|
||||
This atomically exchanges ``src_reg`` with the value addressed by ``dst_reg +
|
||||
off``. ::
|
||||
*(u32 *)(dst_reg + off16) += src_reg
|
||||
|
||||
BPF_CMPXCHG
|
||||
``BPF_ATOMIC | BPF_DW | BPF_STX`` with imm = BPF ADD means::
|
||||
|
||||
This atomically compares the value addressed by ``dst_reg + off`` with
|
||||
``R0``. If they match it is replaced with ``src_reg``. In either case, the
|
||||
value that was there before is zero-extended and loaded back to ``R0``.
|
||||
*(u64 *)(dst_reg + off16) += src_reg
|
||||
|
||||
Note that 1 and 2 byte atomic operations are not supported.
|
||||
``BPF_XADD`` is a deprecated name for ``BPF_ATOMIC | BPF_ADD``.
|
||||
|
||||
In addition to the simple atomic operations, there also is a modifier and
|
||||
two complex atomic operations:
|
||||
|
||||
=========== ================ ===========================
|
||||
imm value description
|
||||
=========== ================ ===========================
|
||||
BPF_FETCH 0x01 modifier: return old value
|
||||
BPF_XCHG 0xe0 | BPF_FETCH atomic exchange
|
||||
BPF_CMPXCHG 0xf0 | BPF_FETCH atomic compare and exchange
|
||||
=========== ================ ===========================
|
||||
|
||||
The ``BPF_FETCH`` modifier is optional for simple atomic operations, and
|
||||
always set for the complex atomic operations. If the ``BPF_FETCH`` flag
|
||||
is set, then the operation also overwrites ``src_reg`` with the value that
|
||||
was in memory before it was modified.
|
||||
|
||||
The ``BPF_XCHG`` operation atomically exchanges ``src_reg`` with the value
|
||||
addressed by ``dst_reg + off``.
|
||||
|
||||
The ``BPF_CMPXCHG`` operation atomically compares the value addressed by
|
||||
``dst_reg + off`` with ``R0``. If they match, the value addressed by
|
||||
``dst_reg + off`` is replaced with ``src_reg``. In either case, the
|
||||
value that was at ``dst_reg + off`` before the operation is zero-extended
|
||||
and loaded back to ``R0``.
|
||||
|
||||
Clang can generate atomic instructions by default when ``-mcpu=v3`` is
|
||||
enabled. If a lower version for ``-mcpu`` is set, the only atomic instruction
|
||||
@@ -240,40 +315,52 @@ Clang can generate is ``BPF_ADD`` *without* ``BPF_FETCH``. If you need to enable
|
||||
the atomics features, while keeping a lower ``-mcpu`` version, you can use
|
||||
``-Xclang -target-feature -Xclang +alu32``.
|
||||
|
||||
You may encounter ``BPF_XADD`` - this is a legacy name for ``BPF_ATOMIC``,
|
||||
referring to the exclusive-add operation encoded when the immediate field is
|
||||
zero.
|
||||
64-bit immediate instructions
|
||||
-----------------------------
|
||||
|
||||
16-byte instructions
|
||||
--------------------
|
||||
Instructions with the ``BPF_IMM`` mode modifier use the wide instruction
|
||||
encoding for an extra imm64 value.
|
||||
|
||||
eBPF has one 16-byte instruction: ``BPF_LD | BPF_DW | BPF_IMM`` which consists
|
||||
of two consecutive ``struct bpf_insn`` 8-byte blocks and interpreted as single
|
||||
instruction that loads 64-bit immediate value into a dst_reg.
|
||||
There is currently only one such instruction.
|
||||
|
||||
Packet access instructions
|
||||
--------------------------
|
||||
``BPF_LD | BPF_DW | BPF_IMM`` means::
|
||||
|
||||
eBPF has two non-generic instructions: (BPF_ABS | <size> | BPF_LD) and
|
||||
(BPF_IND | <size> | BPF_LD) which are used to access packet data.
|
||||
dst_reg = imm64
|
||||
|
||||
They had to be carried over from classic BPF to have strong performance of
|
||||
socket filters running in eBPF interpreter. These instructions can only
|
||||
be used when interpreter context is a pointer to ``struct sk_buff`` and
|
||||
have seven implicit operands. Register R6 is an implicit input that must
|
||||
contain pointer to sk_buff. Register R0 is an implicit output which contains
|
||||
the data fetched from the packet. Registers R1-R5 are scratch registers
|
||||
and must not be used to store the data across BPF_ABS | BPF_LD or
|
||||
BPF_IND | BPF_LD instructions.
|
||||
|
||||
These instructions have implicit program exit condition as well. When
|
||||
eBPF program is trying to access the data beyond the packet boundary,
|
||||
the interpreter will abort the execution of the program. JIT compilers
|
||||
therefore must preserve this property. src_reg and imm32 fields are
|
||||
explicit inputs to these instructions.
|
||||
Legacy BPF Packet access instructions
|
||||
-------------------------------------
|
||||
|
||||
For example, BPF_IND | BPF_W | BPF_LD means::
|
||||
eBPF has special instructions for access to packet data that have been
|
||||
carried over from classic BPF to retain the performance of legacy socket
|
||||
filters running in the eBPF interpreter.
|
||||
|
||||
The instructions come in two forms: ``BPF_ABS | <size> | BPF_LD`` and
|
||||
``BPF_IND | <size> | BPF_LD``.
|
||||
|
||||
These instructions are used to access packet data and can only be used when
|
||||
the program context is a pointer to networking packet. ``BPF_ABS``
|
||||
accesses packet data at an absolute offset specified by the immediate data
|
||||
and ``BPF_IND`` access packet data at an offset that includes the value of
|
||||
a register in addition to the immediate data.
|
||||
|
||||
These instructions have seven implicit operands:
|
||||
|
||||
* Register R6 is an implicit input that must contain pointer to a
|
||||
struct sk_buff.
|
||||
* Register R0 is an implicit output which contains the data fetched from
|
||||
the packet.
|
||||
* Registers R1-R5 are scratch registers that are clobbered after a call to
|
||||
``BPF_ABS | BPF_LD`` or ``BPF_IND`` | BPF_LD instructions.
|
||||
|
||||
These instructions have an implicit program exit condition as well. When an
|
||||
eBPF program is trying to access the data beyond the packet boundary, the
|
||||
program execution will be aborted.
|
||||
|
||||
``BPF_ABS | BPF_W | BPF_LD`` means::
|
||||
|
||||
R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + imm32))
|
||||
|
||||
``BPF_IND | BPF_W | BPF_LD`` means::
|
||||
|
||||
R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + src_reg + imm32))
|
||||
|
||||
and R1 - R5 are clobbered.
|
||||
|
||||
@@ -329,7 +329,7 @@ Program with unreachable instructions::
|
||||
BPF_EXIT_INSN(),
|
||||
};
|
||||
|
||||
Error:
|
||||
Error::
|
||||
|
||||
unreachable insn 1
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ Getting started quick
|
||||
- Compile and install kernel and modules, reboot.
|
||||
|
||||
- You need the udftools package (pktsetup, mkudffs, cdrwtool).
|
||||
Download from http://sourceforge.net/projects/linux-udf/
|
||||
Download from https://github.com/pali/udftools
|
||||
|
||||
- Grab a new CD-RW disc and format it (assuming CD-RW is hdc, substitute
|
||||
as appropriate)::
|
||||
@@ -102,7 +102,7 @@ Using the pktcdvd sysfs interface
|
||||
|
||||
Since Linux 2.6.20, the pktcdvd module has a sysfs interface
|
||||
and can be controlled by it. For example the "pktcdvd" tool uses
|
||||
this interface. (see http://tom.ist-im-web.de/download/pktcdvd )
|
||||
this interface. (see http://tom.ist-im-web.de/linux/software/pktcdvd )
|
||||
|
||||
"pktcdvd" works similar to "pktsetup", e.g.::
|
||||
|
||||
|
||||
@@ -409,135 +409,25 @@ latex_elements = {
|
||||
|
||||
# Additional stuff for the LaTeX preamble.
|
||||
'preamble': '''
|
||||
% Prevent column squeezing of tabulary.
|
||||
\\setlength{\\tymin}{20em}
|
||||
% Use some font with UTF-8 support with XeLaTeX
|
||||
\\usepackage{fontspec}
|
||||
\\setsansfont{DejaVu Sans}
|
||||
\\setromanfont{DejaVu Serif}
|
||||
\\setmonofont{DejaVu Sans Mono}
|
||||
% Adjust \\headheight for fancyhdr
|
||||
\\addtolength{\\headheight}{1.6pt}
|
||||
\\addtolength{\\topmargin}{-1.6pt}
|
||||
''',
|
||||
''',
|
||||
}
|
||||
|
||||
# Translations have Asian (CJK) characters which are only displayed if
|
||||
# xeCJK is used
|
||||
|
||||
latex_elements['preamble'] += '''
|
||||
\\IfFontExistsTF{Noto Sans CJK SC}{
|
||||
% This is needed for translations
|
||||
\\usepackage{xeCJK}
|
||||
\\IfFontExistsTF{Noto Serif CJK SC}{
|
||||
\\setCJKmainfont{Noto Serif CJK SC}[AutoFakeSlant]
|
||||
}{
|
||||
\\setCJKmainfont{Noto Sans CJK SC}[AutoFakeSlant]
|
||||
}
|
||||
\\setCJKsansfont{Noto Sans CJK SC}[AutoFakeSlant]
|
||||
\\setCJKmonofont{Noto Sans Mono CJK SC}[AutoFakeSlant]
|
||||
% CJK Language-specific font choices
|
||||
\\IfFontExistsTF{Noto Serif CJK SC}{
|
||||
\\newCJKfontfamily[SCmain]\\scmain{Noto Serif CJK SC}[AutoFakeSlant]
|
||||
\\newCJKfontfamily[SCserif]\\scserif{Noto Serif CJK SC}[AutoFakeSlant]
|
||||
}{
|
||||
\\newCJKfontfamily[SCmain]\\scmain{Noto Sans CJK SC}[AutoFakeSlant]
|
||||
\\newCJKfontfamily[SCserif]\\scserif{Noto Sans CJK SC}[AutoFakeSlant]
|
||||
}
|
||||
\\newCJKfontfamily[SCsans]\\scsans{Noto Sans CJK SC}[AutoFakeSlant]
|
||||
\\newCJKfontfamily[SCmono]\\scmono{Noto Sans Mono CJK SC}[AutoFakeSlant]
|
||||
\\IfFontExistsTF{Noto Serif CJK TC}{
|
||||
\\newCJKfontfamily[TCmain]\\tcmain{Noto Serif CJK TC}[AutoFakeSlant]
|
||||
\\newCJKfontfamily[TCserif]\\tcserif{Noto Serif CJK TC}[AutoFakeSlant]
|
||||
}{
|
||||
\\newCJKfontfamily[TCmain]\\tcmain{Noto Sans CJK TC}[AutoFakeSlant]
|
||||
\\newCJKfontfamily[TCserif]\\tcserif{Noto Sans CJK TC}[AutoFakeSlant]
|
||||
}
|
||||
\\newCJKfontfamily[TCsans]\\tcsans{Noto Sans CJK TC}[AutoFakeSlant]
|
||||
\\newCJKfontfamily[TCmono]\\tcmono{Noto Sans Mono CJK TC}[AutoFakeSlant]
|
||||
\\IfFontExistsTF{Noto Serif CJK KR}{
|
||||
\\newCJKfontfamily[KRmain]\\krmain{Noto Serif CJK KR}[AutoFakeSlant]
|
||||
\\newCJKfontfamily[KRserif]\\krserif{Noto Serif CJK KR}[AutoFakeSlant]
|
||||
}{
|
||||
\\newCJKfontfamily[KRmain]\\krmain{Noto Sans CJK KR}[AutoFakeSlant]
|
||||
\\newCJKfontfamily[KRserif]\\krserif{Noto Sans CJK KR}[AutoFakeSlant]
|
||||
}
|
||||
\\newCJKfontfamily[KRsans]\\krsans{Noto Sans CJK KR}[AutoFakeSlant]
|
||||
\\newCJKfontfamily[KRmono]\\krmono{Noto Sans Mono CJK KR}[AutoFakeSlant]
|
||||
\\IfFontExistsTF{Noto Serif CJK JP}{
|
||||
\\newCJKfontfamily[JPmain]\\jpmain{Noto Serif CJK JP}[AutoFakeSlant]
|
||||
\\newCJKfontfamily[JPserif]\\jpserif{Noto Serif CJK JP}[AutoFakeSlant]
|
||||
}{
|
||||
\\newCJKfontfamily[JPmain]\\jpmain{Noto Sans CJK JP}[AutoFakeSlant]
|
||||
\\newCJKfontfamily[JPserif]\\jpserif{Noto Sans CJK JP}[AutoFakeSlant]
|
||||
}
|
||||
\\newCJKfontfamily[JPsans]\\jpsans{Noto Sans CJK JP}[AutoFakeSlant]
|
||||
\\newCJKfontfamily[JPmono]\\jpmono{Noto Sans Mono CJK JP}[AutoFakeSlant]
|
||||
% Dummy commands for Sphinx < 2.3 (no 'extrapackages' support)
|
||||
\\providecommand{\\onehalfspacing}{}
|
||||
\\providecommand{\\singlespacing}{}
|
||||
% Define custom macros to on/off CJK
|
||||
\\newcommand{\\kerneldocCJKon}{\\makexeCJKactive\\onehalfspacing}
|
||||
\\newcommand{\\kerneldocCJKoff}{\\makexeCJKinactive\\singlespacing}
|
||||
\\newcommand{\\kerneldocBeginSC}{%
|
||||
\\begingroup%
|
||||
\\scmain%
|
||||
}
|
||||
\\newcommand{\\kerneldocEndSC}{\\endgroup}
|
||||
\\newcommand{\\kerneldocBeginTC}{%
|
||||
\\begingroup%
|
||||
\\tcmain%
|
||||
\\renewcommand{\\CJKrmdefault}{TCserif}%
|
||||
\\renewcommand{\\CJKsfdefault}{TCsans}%
|
||||
\\renewcommand{\\CJKttdefault}{TCmono}%
|
||||
}
|
||||
\\newcommand{\\kerneldocEndTC}{\\endgroup}
|
||||
\\newcommand{\\kerneldocBeginKR}{%
|
||||
\\begingroup%
|
||||
\\xeCJKDeclareCharClass{HalfLeft}{`“,`‘}%
|
||||
\\xeCJKDeclareCharClass{HalfRight}{`”,`’}%
|
||||
\\krmain%
|
||||
\\renewcommand{\\CJKrmdefault}{KRserif}%
|
||||
\\renewcommand{\\CJKsfdefault}{KRsans}%
|
||||
\\renewcommand{\\CJKttdefault}{KRmono}%
|
||||
\\xeCJKsetup{CJKspace = true} % For inter-phrase space
|
||||
}
|
||||
\\newcommand{\\kerneldocEndKR}{\\endgroup}
|
||||
\\newcommand{\\kerneldocBeginJP}{%
|
||||
\\begingroup%
|
||||
\\xeCJKDeclareCharClass{HalfLeft}{`“,`‘}%
|
||||
\\xeCJKDeclareCharClass{HalfRight}{`”,`’}%
|
||||
\\jpmain%
|
||||
\\renewcommand{\\CJKrmdefault}{JPserif}%
|
||||
\\renewcommand{\\CJKsfdefault}{JPsans}%
|
||||
\\renewcommand{\\CJKttdefault}{JPmono}%
|
||||
}
|
||||
\\newcommand{\\kerneldocEndJP}{\\endgroup}
|
||||
% Single spacing in literal blocks
|
||||
\\fvset{baselinestretch=1}
|
||||
% To customize \\sphinxtableofcontents
|
||||
\\usepackage{etoolbox}
|
||||
% Inactivate CJK after tableofcontents
|
||||
\\apptocmd{\\sphinxtableofcontents}{\\kerneldocCJKoff}{}{}
|
||||
}{ % No CJK font found
|
||||
% Custom macros to on/off CJK (Dummy)
|
||||
\\newcommand{\\kerneldocCJKon}{}
|
||||
\\newcommand{\\kerneldocCJKoff}{}
|
||||
\\newcommand{\\kerneldocBeginSC}{}
|
||||
\\newcommand{\\kerneldocEndSC}{}
|
||||
\\newcommand{\\kerneldocBeginTC}{}
|
||||
\\newcommand{\\kerneldocEndTC}{}
|
||||
\\newcommand{\\kerneldocBeginKR}{}
|
||||
\\newcommand{\\kerneldocEndKR}{}
|
||||
\\newcommand{\\kerneldocBeginJP}{}
|
||||
\\newcommand{\\kerneldocEndJP}{}
|
||||
}
|
||||
'''
|
||||
|
||||
# Fix reference escape troubles with Sphinx 1.4.x
|
||||
if major == 1:
|
||||
latex_elements['preamble'] += '\\renewcommand*{\\DUrole}[2]{ #2 }\n'
|
||||
|
||||
|
||||
# Load kerneldoc specific LaTeX settings
|
||||
latex_elements['preamble'] += '''
|
||||
% Load kerneldoc specific LaTeX settings
|
||||
\\input{kerneldoc-preamble.sty}
|
||||
'''
|
||||
|
||||
# With Sphinx 1.6, it is possible to change the Bg color directly
|
||||
# by using:
|
||||
# \definecolor{sphinxnoteBgColor}{RGB}{204,255,255}
|
||||
@@ -599,6 +489,11 @@ for fn in os.listdir('.'):
|
||||
# If false, no module index is generated.
|
||||
#latex_domain_indices = True
|
||||
|
||||
# Additional LaTeX stuff to be copied to build directory
|
||||
latex_additional_files = [
|
||||
'sphinx/kerneldoc-preamble.sty',
|
||||
]
|
||||
|
||||
|
||||
# -- Options for manual page output ---------------------------------------
|
||||
|
||||
|
||||
279
Documentation/core-api/entry.rst
Normal file
279
Documentation/core-api/entry.rst
Normal file
@@ -0,0 +1,279 @@
|
||||
Entry/exit handling for exceptions, interrupts, syscalls and KVM
|
||||
================================================================
|
||||
|
||||
All transitions between execution domains require state updates which are
|
||||
subject to strict ordering constraints. State updates are required for the
|
||||
following:
|
||||
|
||||
* Lockdep
|
||||
* RCU / Context tracking
|
||||
* Preemption counter
|
||||
* Tracing
|
||||
* Time accounting
|
||||
|
||||
The update order depends on the transition type and is explained below in
|
||||
the transition type sections: `Syscalls`_, `KVM`_, `Interrupts and regular
|
||||
exceptions`_, `NMI and NMI-like exceptions`_.
|
||||
|
||||
Non-instrumentable code - noinstr
|
||||
---------------------------------
|
||||
|
||||
Most instrumentation facilities depend on RCU, so intrumentation is prohibited
|
||||
for entry code before RCU starts watching and exit code after RCU stops
|
||||
watching. In addition, many architectures must save and restore register state,
|
||||
which means that (for example) a breakpoint in the breakpoint entry code would
|
||||
overwrite the debug registers of the initial breakpoint.
|
||||
|
||||
Such code must be marked with the 'noinstr' attribute, placing that code into a
|
||||
special section inaccessible to instrumentation and debug facilities. Some
|
||||
functions are partially instrumentable, which is handled by marking them
|
||||
noinstr and using instrumentation_begin() and instrumentation_end() to flag the
|
||||
instrumentable ranges of code:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
noinstr void entry(void)
|
||||
{
|
||||
handle_entry(); // <-- must be 'noinstr' or '__always_inline'
|
||||
...
|
||||
|
||||
instrumentation_begin();
|
||||
handle_context(); // <-- instrumentable code
|
||||
instrumentation_end();
|
||||
|
||||
...
|
||||
handle_exit(); // <-- must be 'noinstr' or '__always_inline'
|
||||
}
|
||||
|
||||
This allows verification of the 'noinstr' restrictions via objtool on
|
||||
supported architectures.
|
||||
|
||||
Invoking non-instrumentable functions from instrumentable context has no
|
||||
restrictions and is useful to protect e.g. state switching which would
|
||||
cause malfunction if instrumented.
|
||||
|
||||
All non-instrumentable entry/exit code sections before and after the RCU
|
||||
state transitions must run with interrupts disabled.
|
||||
|
||||
Syscalls
|
||||
--------
|
||||
|
||||
Syscall-entry code starts in assembly code and calls out into low-level C code
|
||||
after establishing low-level architecture-specific state and stack frames. This
|
||||
low-level C code must not be instrumented. A typical syscall handling function
|
||||
invoked from low-level assembly code looks like this:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
noinstr void syscall(struct pt_regs *regs, int nr)
|
||||
{
|
||||
arch_syscall_enter(regs);
|
||||
nr = syscall_enter_from_user_mode(regs, nr);
|
||||
|
||||
instrumentation_begin();
|
||||
if (!invoke_syscall(regs, nr) && nr != -1)
|
||||
result_reg(regs) = __sys_ni_syscall(regs);
|
||||
instrumentation_end();
|
||||
|
||||
syscall_exit_to_user_mode(regs);
|
||||
}
|
||||
|
||||
syscall_enter_from_user_mode() first invokes enter_from_user_mode() which
|
||||
establishes state in the following order:
|
||||
|
||||
* Lockdep
|
||||
* RCU / Context tracking
|
||||
* Tracing
|
||||
|
||||
and then invokes the various entry work functions like ptrace, seccomp, audit,
|
||||
syscall tracing, etc. After all that is done, the instrumentable invoke_syscall
|
||||
function can be invoked. The instrumentable code section then ends, after which
|
||||
syscall_exit_to_user_mode() is invoked.
|
||||
|
||||
syscall_exit_to_user_mode() handles all work which needs to be done before
|
||||
returning to user space like tracing, audit, signals, task work etc. After
|
||||
that it invokes exit_to_user_mode() which again handles the state
|
||||
transition in the reverse order:
|
||||
|
||||
* Tracing
|
||||
* RCU / Context tracking
|
||||
* Lockdep
|
||||
|
||||
syscall_enter_from_user_mode() and syscall_exit_to_user_mode() are also
|
||||
available as fine grained subfunctions in cases where the architecture code
|
||||
has to do extra work between the various steps. In such cases it has to
|
||||
ensure that enter_from_user_mode() is called first on entry and
|
||||
exit_to_user_mode() is called last on exit.
|
||||
|
||||
Do not nest syscalls. Nested systcalls will cause RCU and/or context tracking
|
||||
to print a warning.
|
||||
|
||||
KVM
|
||||
---
|
||||
|
||||
Entering or exiting guest mode is very similar to syscalls. From the host
|
||||
kernel point of view the CPU goes off into user space when entering the
|
||||
guest and returns to the kernel on exit.
|
||||
|
||||
kvm_guest_enter_irqoff() is a KVM-specific variant of exit_to_user_mode()
|
||||
and kvm_guest_exit_irqoff() is the KVM variant of enter_from_user_mode().
|
||||
The state operations have the same ordering.
|
||||
|
||||
Task work handling is done separately for guest at the boundary of the
|
||||
vcpu_run() loop via xfer_to_guest_mode_handle_work() which is a subset of
|
||||
the work handled on return to user space.
|
||||
|
||||
Do not nest KVM entry/exit transitions because doing so is nonsensical.
|
||||
|
||||
Interrupts and regular exceptions
|
||||
---------------------------------
|
||||
|
||||
Interrupts entry and exit handling is slightly more complex than syscalls
|
||||
and KVM transitions.
|
||||
|
||||
If an interrupt is raised while the CPU executes in user space, the entry
|
||||
and exit handling is exactly the same as for syscalls.
|
||||
|
||||
If the interrupt is raised while the CPU executes in kernel space the entry and
|
||||
exit handling is slightly different. RCU state is only updated when the
|
||||
interrupt is raised in the context of the CPU's idle task. Otherwise, RCU will
|
||||
already be watching. Lockdep and tracing have to be updated unconditionally.
|
||||
|
||||
irqentry_enter() and irqentry_exit() provide the implementation for this.
|
||||
|
||||
The architecture-specific part looks similar to syscall handling:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
noinstr void interrupt(struct pt_regs *regs, int nr)
|
||||
{
|
||||
arch_interrupt_enter(regs);
|
||||
state = irqentry_enter(regs);
|
||||
|
||||
instrumentation_begin();
|
||||
|
||||
irq_enter_rcu();
|
||||
invoke_irq_handler(regs, nr);
|
||||
irq_exit_rcu();
|
||||
|
||||
instrumentation_end();
|
||||
|
||||
irqentry_exit(regs, state);
|
||||
}
|
||||
|
||||
Note that the invocation of the actual interrupt handler is within a
|
||||
irq_enter_rcu() and irq_exit_rcu() pair.
|
||||
|
||||
irq_enter_rcu() updates the preemption count which makes in_hardirq()
|
||||
return true, handles NOHZ tick state and interrupt time accounting. This
|
||||
means that up to the point where irq_enter_rcu() is invoked in_hardirq()
|
||||
returns false.
|
||||
|
||||
irq_exit_rcu() handles interrupt time accounting, undoes the preemption
|
||||
count update and eventually handles soft interrupts and NOHZ tick state.
|
||||
|
||||
In theory, the preemption count could be updated in irqentry_enter(). In
|
||||
practice, deferring this update to irq_enter_rcu() allows the preemption-count
|
||||
code to be traced, while also maintaining symmetry with irq_exit_rcu() and
|
||||
irqentry_exit(), which are described in the next paragraph. The only downside
|
||||
is that the early entry code up to irq_enter_rcu() must be aware that the
|
||||
preemption count has not yet been updated with the HARDIRQ_OFFSET state.
|
||||
|
||||
Note that irq_exit_rcu() must remove HARDIRQ_OFFSET from the preemption count
|
||||
before it handles soft interrupts, whose handlers must run in BH context rather
|
||||
than irq-disabled context. In addition, irqentry_exit() might schedule, which
|
||||
also requires that HARDIRQ_OFFSET has been removed from the preemption count.
|
||||
|
||||
Even though interrupt handlers are expected to run with local interrupts
|
||||
disabled, interrupt nesting is common from an entry/exit perspective. For
|
||||
example, softirq handling happens within an irqentry_{enter,exit}() block with
|
||||
local interrupts enabled. Also, although uncommon, nothing prevents an
|
||||
interrupt handler from re-enabling interrupts.
|
||||
|
||||
Interrupt entry/exit code doesn't strictly need to handle reentrancy, since it
|
||||
runs with local interrupts disabled. But NMIs can happen anytime, and a lot of
|
||||
the entry code is shared between the two.
|
||||
|
||||
NMI and NMI-like exceptions
|
||||
---------------------------
|
||||
|
||||
NMIs and NMI-like exceptions (machine checks, double faults, debug
|
||||
interrupts, etc.) can hit any context and must be extra careful with
|
||||
the state.
|
||||
|
||||
State changes for debug exceptions and machine-check exceptions depend on
|
||||
whether these exceptions happened in user-space (breakpoints or watchpoints) or
|
||||
in kernel mode (code patching). From user-space, they are treated like
|
||||
interrupts, while from kernel mode they are treated like NMIs.
|
||||
|
||||
NMIs and other NMI-like exceptions handle state transitions without
|
||||
distinguishing between user-mode and kernel-mode origin.
|
||||
|
||||
The state update on entry is handled in irqentry_nmi_enter() which updates
|
||||
state in the following order:
|
||||
|
||||
* Preemption counter
|
||||
* Lockdep
|
||||
* RCU / Context tracking
|
||||
* Tracing
|
||||
|
||||
The exit counterpart irqentry_nmi_exit() does the reverse operation in the
|
||||
reverse order.
|
||||
|
||||
Note that the update of the preemption counter has to be the first
|
||||
operation on enter and the last operation on exit. The reason is that both
|
||||
lockdep and RCU rely on in_nmi() returning true in this case. The
|
||||
preemption count modification in the NMI entry/exit case must not be
|
||||
traced.
|
||||
|
||||
Architecture-specific code looks like this:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
noinstr void nmi(struct pt_regs *regs)
|
||||
{
|
||||
arch_nmi_enter(regs);
|
||||
state = irqentry_nmi_enter(regs);
|
||||
|
||||
instrumentation_begin();
|
||||
nmi_handler(regs);
|
||||
instrumentation_end();
|
||||
|
||||
irqentry_nmi_exit(regs);
|
||||
}
|
||||
|
||||
and for e.g. a debug exception it can look like this:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
noinstr void debug(struct pt_regs *regs)
|
||||
{
|
||||
arch_nmi_enter(regs);
|
||||
|
||||
debug_regs = save_debug_regs();
|
||||
|
||||
if (user_mode(regs)) {
|
||||
state = irqentry_enter(regs);
|
||||
|
||||
instrumentation_begin();
|
||||
user_mode_debug_handler(regs, debug_regs);
|
||||
instrumentation_end();
|
||||
|
||||
irqentry_exit(regs, state);
|
||||
} else {
|
||||
state = irqentry_nmi_enter(regs);
|
||||
|
||||
instrumentation_begin();
|
||||
kernel_mode_debug_handler(regs, debug_regs);
|
||||
instrumentation_end();
|
||||
|
||||
irqentry_nmi_exit(regs, state);
|
||||
}
|
||||
}
|
||||
|
||||
There is no combined irqentry_nmi_if_kernel() function available as the
|
||||
above cannot be handled in an exception-agnostic way.
|
||||
|
||||
NMIs can happen in any context. For example, an NMI-like exception triggered
|
||||
while handling an NMI. So NMI entry code has to be reentrant and state updates
|
||||
need to handle nesting.
|
||||
@@ -44,6 +44,14 @@ Library functionality that is used throughout the kernel.
|
||||
timekeeping
|
||||
errseq
|
||||
|
||||
Low level entry and exit
|
||||
========================
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
entry
|
||||
|
||||
Concurrency primitives
|
||||
======================
|
||||
|
||||
|
||||
@@ -58,15 +58,30 @@ Virtually Contiguous Mappings
|
||||
File Mapping and Page Cache
|
||||
===========================
|
||||
|
||||
.. kernel-doc:: mm/readahead.c
|
||||
:export:
|
||||
Filemap
|
||||
-------
|
||||
|
||||
.. kernel-doc:: mm/filemap.c
|
||||
:export:
|
||||
|
||||
Readahead
|
||||
---------
|
||||
|
||||
.. kernel-doc:: mm/readahead.c
|
||||
:doc: Readahead Overview
|
||||
|
||||
.. kernel-doc:: mm/readahead.c
|
||||
:export:
|
||||
|
||||
Writeback
|
||||
---------
|
||||
|
||||
.. kernel-doc:: mm/page-writeback.c
|
||||
:export:
|
||||
|
||||
Truncate
|
||||
--------
|
||||
|
||||
.. kernel-doc:: mm/truncate.c
|
||||
:export:
|
||||
|
||||
|
||||
@@ -55,18 +55,18 @@ flags the caller provides. The caller is required to pass in a non-null struct
|
||||
pages* array, and the function then pins pages by incrementing each by a special
|
||||
value: GUP_PIN_COUNTING_BIAS.
|
||||
|
||||
For huge pages (and in fact, any compound page of more than 2 pages), the
|
||||
GUP_PIN_COUNTING_BIAS scheme is not used. Instead, an exact form of pin counting
|
||||
is achieved, by using the 3rd struct page in the compound page. A new struct
|
||||
page field, hpage_pinned_refcount, has been added in order to support this.
|
||||
For compound pages, the GUP_PIN_COUNTING_BIAS scheme is not used. Instead,
|
||||
an exact form of pin counting is achieved, by using the 2nd struct page
|
||||
in the compound page. A new struct page field, compound_pincount, has
|
||||
been added in order to support this.
|
||||
|
||||
This approach for compound pages avoids the counting upper limit problems that
|
||||
are discussed below. Those limitations would have been aggravated severely by
|
||||
huge pages, because each tail page adds a refcount to the head page. And in
|
||||
fact, testing revealed that, without a separate hpage_pinned_refcount field,
|
||||
fact, testing revealed that, without a separate compound_pincount field,
|
||||
page overflows were seen in some huge page stress tests.
|
||||
|
||||
This also means that huge pages and compound pages (of order > 1) do not suffer
|
||||
This also means that huge pages and compound pages do not suffer
|
||||
from the false positives problem that is mentioned below.::
|
||||
|
||||
Function
|
||||
@@ -264,9 +264,9 @@ place.)
|
||||
Other diagnostics
|
||||
=================
|
||||
|
||||
dump_page() has been enhanced slightly, to handle these new counting fields, and
|
||||
to better report on compound pages in general. Specifically, for compound pages
|
||||
with order > 1, the exact (hpage_pinned_refcount) pincount is reported.
|
||||
dump_page() has been enhanced slightly, to handle these new counting
|
||||
fields, and to better report on compound pages in general. Specifically,
|
||||
for compound pages, the exact (compound_pincount) pincount is reported.
|
||||
|
||||
References
|
||||
==========
|
||||
|
||||
@@ -315,11 +315,15 @@ indeed the normal API is implemented in terms of the advanced API. The
|
||||
advanced API is only available to modules with a GPL-compatible license.
|
||||
|
||||
The advanced API is based around the xa_state. This is an opaque data
|
||||
structure which you declare on the stack using the XA_STATE()
|
||||
macro. This macro initialises the xa_state ready to start walking
|
||||
around the XArray. It is used as a cursor to maintain the position
|
||||
in the XArray and let you compose various operations together without
|
||||
having to restart from the top every time.
|
||||
structure which you declare on the stack using the XA_STATE() macro.
|
||||
This macro initialises the xa_state ready to start walking around the
|
||||
XArray. It is used as a cursor to maintain the position in the XArray
|
||||
and let you compose various operations together without having to restart
|
||||
from the top every time. The contents of the xa_state are protected by
|
||||
the rcu_read_lock() or the xas_lock(). If you need to drop whichever of
|
||||
those locks is protecting your state and tree, you must call xas_pause()
|
||||
so that future calls do not rely on the parts of the state which were
|
||||
left unprotected.
|
||||
|
||||
The xa_state is also used to store errors. You can call
|
||||
xas_error() to retrieve the error. All operations check whether
|
||||
|
||||
@@ -75,6 +75,9 @@ And optionally
|
||||
.resume - A pointer to a per-policy resume function which is called
|
||||
with interrupts disabled and _before_ the governor is started again.
|
||||
|
||||
.ready - A pointer to a per-policy ready function which is called after
|
||||
the policy is fully initialized.
|
||||
|
||||
.attr - A pointer to a NULL-terminated list of "struct freq_attr" which
|
||||
allow to export values to sysfs.
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ Software tag-based KASAN mode is only supported in Clang.
|
||||
|
||||
The hardware KASAN mode (#3) relies on hardware to perform the checks but
|
||||
still requires a compiler version that supports memory tagging instructions.
|
||||
This mode is supported in GCC 10+ and Clang 11+.
|
||||
This mode is supported in GCC 10+ and Clang 12+.
|
||||
|
||||
Both software KASAN modes work with SLUB and SLAB memory allocators,
|
||||
while the hardware tag-based KASAN currently only supports SLUB.
|
||||
@@ -206,6 +206,9 @@ additional boot parameters that allow disabling KASAN or controlling features:
|
||||
Asymmetric mode: a bad access is detected synchronously on reads and
|
||||
asynchronously on writes.
|
||||
|
||||
- ``kasan.vmalloc=off`` or ``=on`` disables or enables tagging of vmalloc
|
||||
allocations (default: ``on``).
|
||||
|
||||
- ``kasan.stacktrace=off`` or ``=on`` disables or enables alloc and free stack
|
||||
traces collection (default: ``on``).
|
||||
|
||||
@@ -279,8 +282,8 @@ Software tag-based KASAN uses 0xFF as a match-all pointer tag (accesses through
|
||||
pointers with the 0xFF pointer tag are not checked). The value 0xFE is currently
|
||||
reserved to tag freed memory regions.
|
||||
|
||||
Software tag-based KASAN currently only supports tagging of slab and page_alloc
|
||||
memory.
|
||||
Software tag-based KASAN currently only supports tagging of slab, page_alloc,
|
||||
and vmalloc memory.
|
||||
|
||||
Hardware tag-based KASAN
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
@@ -303,8 +306,8 @@ Hardware tag-based KASAN uses 0xFF as a match-all pointer tag (accesses through
|
||||
pointers with the 0xFF pointer tag are not checked). The value 0xFE is currently
|
||||
reserved to tag freed memory regions.
|
||||
|
||||
Hardware tag-based KASAN currently only supports tagging of slab and page_alloc
|
||||
memory.
|
||||
Hardware tag-based KASAN currently only supports tagging of slab, page_alloc,
|
||||
and VM_ALLOC-based vmalloc memory.
|
||||
|
||||
If the hardware does not support MTE (pre ARMv8.5), hardware tag-based KASAN
|
||||
will not be enabled. In this case, all KASAN boot parameters are ignored.
|
||||
@@ -319,6 +322,8 @@ checking gets disabled.
|
||||
Shadow memory
|
||||
-------------
|
||||
|
||||
The contents of this section are only applicable to software KASAN modes.
|
||||
|
||||
The kernel maps memory in several different parts of the address space.
|
||||
The range of kernel virtual addresses is large: there is not enough real
|
||||
memory to support a real shadow region for every address that could be
|
||||
@@ -349,7 +354,7 @@ CONFIG_KASAN_VMALLOC
|
||||
|
||||
With ``CONFIG_KASAN_VMALLOC``, KASAN can cover vmalloc space at the
|
||||
cost of greater memory usage. Currently, this is supported on x86,
|
||||
riscv, s390, and powerpc.
|
||||
arm64, riscv, s390, and powerpc.
|
||||
|
||||
This works by hooking into vmalloc and vmap and dynamically
|
||||
allocating real shadow memory to back the mappings.
|
||||
|
||||
@@ -41,6 +41,18 @@ guarded by KFENCE. The default is configurable via the Kconfig option
|
||||
``CONFIG_KFENCE_SAMPLE_INTERVAL``. Setting ``kfence.sample_interval=0``
|
||||
disables KFENCE.
|
||||
|
||||
The sample interval controls a timer that sets up KFENCE allocations. By
|
||||
default, to keep the real sample interval predictable, the normal timer also
|
||||
causes CPU wake-ups when the system is completely idle. This may be undesirable
|
||||
on power-constrained systems. The boot parameter ``kfence.deferrable=1``
|
||||
instead switches to a "deferrable" timer which does not force CPU wake-ups on
|
||||
idle systems, at the risk of unpredictable sample intervals. The default is
|
||||
configurable via the Kconfig option ``CONFIG_KFENCE_DEFERRABLE``.
|
||||
|
||||
.. warning::
|
||||
The KUnit test suite is very likely to fail when using a deferrable timer
|
||||
since it currently causes very unpredictable sample intervals.
|
||||
|
||||
The KFENCE memory pool is of fixed size, and if the pool is exhausted, no
|
||||
further KFENCE allocations occur. With ``CONFIG_KFENCE_NUM_OBJECTS`` (default
|
||||
255), the number of available guarded objects can be controlled. Each object
|
||||
|
||||
@@ -7,6 +7,14 @@ directory. These are intended to be small tests to exercise individual code
|
||||
paths in the kernel. Tests are intended to be run after building, installing
|
||||
and booting a kernel.
|
||||
|
||||
Kselftest from mainline can be run on older stable kernels. Running tests
|
||||
from mainline offers the best coverage. Several test rings run mainline
|
||||
kselftest suite on stable releases. The reason is that when a new test
|
||||
gets added to test existing code to regression test a bug, we should be
|
||||
able to run that test on an older kernel. Hence, it is important to keep
|
||||
code that can still test an older kernel and make sure it skips the test
|
||||
gracefully on newer releases.
|
||||
|
||||
You can find additional information on Kselftest framework, how to
|
||||
write new tests using the framework on Kselftest wiki:
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
========================================
|
||||
The Kernel Test Anything Protocol (KTAP)
|
||||
========================================
|
||||
===================================================
|
||||
The Kernel Test Anything Protocol (KTAP), version 1
|
||||
===================================================
|
||||
|
||||
TAP, or the Test Anything Protocol is a format for specifying test results used
|
||||
by a number of projects. It's website and specification are found at this `link
|
||||
@@ -68,7 +68,7 @@ Test case result lines
|
||||
Test case result lines indicate the final status of a test.
|
||||
They are required and must have the format:
|
||||
|
||||
.. code-block::
|
||||
.. code-block:: none
|
||||
|
||||
<result> <number> [<description>][ # [<directive>] [<diagnostic data>]]
|
||||
|
||||
@@ -117,32 +117,32 @@ separator.
|
||||
|
||||
Example result lines include:
|
||||
|
||||
.. code-block::
|
||||
.. code-block:: none
|
||||
|
||||
ok 1 test_case_name
|
||||
|
||||
The test "test_case_name" passed.
|
||||
|
||||
.. code-block::
|
||||
.. code-block:: none
|
||||
|
||||
not ok 1 test_case_name
|
||||
|
||||
The test "test_case_name" failed.
|
||||
|
||||
.. code-block::
|
||||
.. code-block:: none
|
||||
|
||||
ok 1 test # SKIP necessary dependency unavailable
|
||||
|
||||
The test "test" was SKIPPED with the diagnostic message "necessary dependency
|
||||
unavailable".
|
||||
|
||||
.. code-block::
|
||||
.. code-block:: none
|
||||
|
||||
not ok 1 test # TIMEOUT 30 seconds
|
||||
|
||||
The test "test" timed out, with diagnostic data "30 seconds".
|
||||
|
||||
.. code-block::
|
||||
.. code-block:: none
|
||||
|
||||
ok 5 check return code # rcode=0
|
||||
|
||||
@@ -174,6 +174,13 @@ There may be lines within KTAP output that do not follow the format of one of
|
||||
the four formats for lines described above. This is allowed, however, they will
|
||||
not influence the status of the tests.
|
||||
|
||||
This is an important difference from TAP. Kernel tests may print messages
|
||||
to the system console or a log file. Both of these destinations may contain
|
||||
messages either from unrelated kernel or userspace activity, or kernel
|
||||
messages from non-test code that is invoked by the test. The kernel code
|
||||
invoked by the test likely is not aware that a test is in progress and
|
||||
thus can not print the message as a diagnostic message.
|
||||
|
||||
Nested tests
|
||||
------------
|
||||
|
||||
@@ -186,13 +193,16 @@ starting with another KTAP version line and test plan, and end with the overall
|
||||
result. If one of the subtests fail, for example, the parent test should also
|
||||
fail.
|
||||
|
||||
Additionally, all result lines in a subtest should be indented. One level of
|
||||
Additionally, all lines in a subtest should be indented. One level of
|
||||
indentation is two spaces: " ". The indentation should begin at the version
|
||||
line and should end before the parent test's result line.
|
||||
|
||||
"Unknown lines" are not considered to be lines in a subtest and thus are
|
||||
allowed to be either indented or not indented.
|
||||
|
||||
An example of a test with two nested subtests:
|
||||
|
||||
.. code-block::
|
||||
.. code-block:: none
|
||||
|
||||
KTAP version 1
|
||||
1..1
|
||||
@@ -205,7 +215,7 @@ An example of a test with two nested subtests:
|
||||
|
||||
An example format with multiple levels of nested testing:
|
||||
|
||||
.. code-block::
|
||||
.. code-block:: none
|
||||
|
||||
KTAP version 1
|
||||
1..2
|
||||
@@ -224,10 +234,15 @@ An example format with multiple levels of nested testing:
|
||||
Major differences between TAP and KTAP
|
||||
--------------------------------------
|
||||
|
||||
Note the major differences between the TAP and KTAP specification:
|
||||
- yaml and json are not recommended in diagnostic messages
|
||||
- TODO directive not recognized
|
||||
- KTAP allows for an arbitrary number of tests to be nested
|
||||
================================================== ========= ===============
|
||||
Feature TAP KTAP
|
||||
================================================== ========= ===============
|
||||
yaml and json in diagnosic message ok not recommended
|
||||
TODO directive ok not recognized
|
||||
allows an arbitrary number of tests to be nested no yes
|
||||
"Unknown lines" are in category of "Anything else" yes no
|
||||
"Unknown lines" are incorrect allowed
|
||||
================================================== ========= ===============
|
||||
|
||||
The TAP14 specification does permit nested tests, but instead of using another
|
||||
nested version line, uses a line of the form
|
||||
@@ -235,7 +250,7 @@ nested version line, uses a line of the form
|
||||
|
||||
Example KTAP output
|
||||
--------------------
|
||||
.. code-block::
|
||||
.. code-block:: none
|
||||
|
||||
KTAP version 1
|
||||
1..1
|
||||
|
||||
@@ -26,10 +26,7 @@ The fundamental unit in KUnit is the test case. The KUnit test cases are
|
||||
grouped into KUnit suites. A KUnit test case is a function with type
|
||||
signature ``void (*)(struct kunit *test)``.
|
||||
These test case functions are wrapped in a struct called
|
||||
``struct kunit_case``. For code, see:
|
||||
|
||||
.. kernel-doc:: include/kunit/test.h
|
||||
:identifiers: kunit_case
|
||||
struct kunit_case.
|
||||
|
||||
.. note:
|
||||
``generate_params`` is optional for non-parameterized tests.
|
||||
@@ -152,18 +149,12 @@ Parameterized Tests
|
||||
Each KUnit parameterized test is associated with a collection of
|
||||
parameters. The test is invoked multiple times, once for each parameter
|
||||
value and the parameter is stored in the ``param_value`` field.
|
||||
The test case includes a ``KUNIT_CASE_PARAM()`` macro that accepts a
|
||||
The test case includes a KUNIT_CASE_PARAM() macro that accepts a
|
||||
generator function.
|
||||
The generator function is passed the previous parameter and returns the next
|
||||
parameter. It also provides a macro to generate common-case generators based on
|
||||
arrays.
|
||||
|
||||
For code, see:
|
||||
|
||||
.. kernel-doc:: include/kunit/test.h
|
||||
:identifiers: KUNIT_ARRAY_PARAM
|
||||
|
||||
|
||||
kunit_tool (Command Line Test Harness)
|
||||
======================================
|
||||
|
||||
|
||||
@@ -242,7 +242,7 @@ example:
|
||||
|
||||
int rectangle_area(struct shape *this)
|
||||
{
|
||||
struct rectangle *self = container_of(this, struct shape, parent);
|
||||
struct rectangle *self = container_of(this, struct rectangle, parent);
|
||||
|
||||
return self->length * self->width;
|
||||
};
|
||||
|
||||
@@ -100,3 +100,5 @@ have already built it.
|
||||
|
||||
The optional make variable CF can be used to pass arguments to sparse. The
|
||||
build system passes -Wbitwise to sparse automatically.
|
||||
|
||||
Note that sparse defines the __CHECKER__ preprocessor symbol.
|
||||
|
||||
@@ -3,9 +3,10 @@ DT_DOC_CHECKER ?= dt-doc-validate
|
||||
DT_EXTRACT_EX ?= dt-extract-example
|
||||
DT_MK_SCHEMA ?= dt-mk-schema
|
||||
|
||||
DT_SCHEMA_LINT = $(shell which yamllint)
|
||||
DT_SCHEMA_LINT = $(shell which yamllint || \
|
||||
echo "warning: python package 'yamllint' not installed, skipping" >&2)
|
||||
|
||||
DT_SCHEMA_MIN_VERSION = 2021.2.1
|
||||
DT_SCHEMA_MIN_VERSION = 2022.3
|
||||
|
||||
PHONY += check_dtschema_version
|
||||
check_dtschema_version:
|
||||
@@ -24,18 +25,11 @@ quiet_cmd_extract_ex = DTEX $@
|
||||
$(obj)/%.example.dts: $(src)/%.yaml check_dtschema_version FORCE
|
||||
$(call if_changed,extract_ex)
|
||||
|
||||
# Use full schemas when checking %.example.dts
|
||||
DT_TMP_SCHEMA := $(obj)/processed-schema-examples.json
|
||||
|
||||
find_all_cmd = find $(srctree)/$(src) \( -name '*.yaml' ! \
|
||||
-name 'processed-schema*' ! \
|
||||
-name '*.example.dt.yaml' \)
|
||||
-name 'processed-schema*' \)
|
||||
|
||||
ifeq ($(DT_SCHEMA_FILES),)
|
||||
find_cmd = $(find_all_cmd)
|
||||
else
|
||||
find_cmd = echo $(addprefix $(srctree)/, $(DT_SCHEMA_FILES))
|
||||
endif
|
||||
find_cmd = $(find_all_cmd) | grep -F "$(DT_SCHEMA_FILES)"
|
||||
CHK_DT_DOCS := $(shell $(find_cmd))
|
||||
|
||||
quiet_cmd_yamllint = LINT $(src)
|
||||
cmd_yamllint = ($(find_cmd) | \
|
||||
@@ -72,35 +66,14 @@ override DTC_FLAGS := \
|
||||
# Disable undocumented compatible checks until warning free
|
||||
override DT_CHECKER_FLAGS ?=
|
||||
|
||||
$(obj)/processed-schema-examples.json: $(DT_DOCS) $(src)/.yamllint check_dtschema_version FORCE
|
||||
$(obj)/processed-schema.json: $(DT_DOCS) $(src)/.yamllint check_dtschema_version FORCE
|
||||
$(call if_changed_rule,chkdt)
|
||||
|
||||
ifeq ($(DT_SCHEMA_FILES),)
|
||||
|
||||
# Unless DT_SCHEMA_FILES is specified, use the full schema for dtbs_check too.
|
||||
# Just copy processed-schema-examples.json
|
||||
|
||||
$(obj)/processed-schema.json: $(obj)/processed-schema-examples.json FORCE
|
||||
$(call if_changed,copy)
|
||||
|
||||
DT_SCHEMA_FILES = $(DT_DOCS)
|
||||
|
||||
else
|
||||
|
||||
# If DT_SCHEMA_FILES is specified, use it for processed-schema.json
|
||||
|
||||
$(obj)/processed-schema.json: DT_MK_SCHEMA_FLAGS := -u
|
||||
$(obj)/processed-schema.json: $(DT_SCHEMA_FILES) check_dtschema_version FORCE
|
||||
$(call if_changed,mk_schema)
|
||||
|
||||
endif
|
||||
|
||||
always-$(CHECK_DT_BINDING) += processed-schema-examples.json
|
||||
always-$(CHECK_DTBS) += processed-schema.json
|
||||
always-$(CHECK_DT_BINDING) += $(patsubst $(src)/%.yaml,%.example.dts, $(DT_SCHEMA_FILES))
|
||||
always-$(CHECK_DT_BINDING) += $(patsubst $(src)/%.yaml,%.example.dt.yaml, $(DT_SCHEMA_FILES))
|
||||
always-y += processed-schema.json
|
||||
always-$(CHECK_DT_BINDING) += $(patsubst $(srctree)/$(src)/%.yaml,%.example.dts, $(CHK_DT_DOCS))
|
||||
always-$(CHECK_DT_BINDING) += $(patsubst $(srctree)/$(src)/%.yaml,%.example.dtb, $(CHK_DT_DOCS))
|
||||
|
||||
# Hack: avoid 'Argument list too long' error for 'make clean'. Remove most of
|
||||
# build artifacts here before they are processed by scripts/Makefile.clean
|
||||
clean-files = $(shell find $(obj) \( -name '*.example.dts' -o \
|
||||
-name '*.example.dt.yaml' \) -delete 2>/dev/null)
|
||||
-name '*.example.dtb' \) -delete 2>/dev/null)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user