Merge remote-tracking branch 'aosp/upstream-f2fs-stable-linux-4.19.y' into android-4.19

* aosp/upstream-f2fs-stable-linux-4.19.y:
  f2fs: add a condition to detect overflow in f2fs_ioc_gc_range()
  f2fs: fix to add missing F2FS_IO_ALIGNED() condition
  f2fs: fix to fallback to buffered IO in IO aligned mode
  f2fs: fix to handle error path correctly in f2fs_map_blocks
  f2fs: fix extent corrupotion during directIO in LFS mode
  f2fs: check all the data segments against all node ones
  f2fs: Add a small clarification to CONFIG_FS_F2FS_FS_SECURITY
  f2fs: fix inode rwsem regression
  f2fs: fix to avoid accessing uninitialized field of inode page in is_alive()
  f2fs: avoid infinite GC loop due to stale atomic files
  f2fs: Fix indefinite loop in f2fs_gc()
  f2fs: convert inline_data in prior to i_size_write
  f2fs: fix error path of f2fs_convert_inline_page()
  f2fs: add missing documents of reserve_root/resuid/resgid
  f2fs: fix flushing node pages when checkpoint is disabled
  f2fs: enhance f2fs_is_checkpoint_ready()'s readability
  f2fs: clean up __bio_alloc()'s parameter
  f2fs: fix wrong error injection path in inc_valid_block_count()
  f2fs: fix to writeout dirty inode during node flush
  f2fs: optimize case-insensitive lookups
  f2fs: introduce f2fs_match_name() for cleanup
  f2fs: Fix indefinite loop in f2fs_gc()
  f2fs: allocate memory in batch in build_sit_info()
  f2fs: support FS_IOC_{GET,SET}FSLABEL
  f2fs: fix to avoid data corruption by forbidding SSR overwrite
  f2fs: Fix build error while CONFIG_NLS=m
  Revert "f2fs: avoid out-of-range memory access"
  f2fs: cleanup the code in build_sit_entries.
  f2fs: fix wrong available node count calculation
  f2fs: remove duplicate code in f2fs_file_write_iter
  f2fs: fix to migrate blocks correctly during defragment
  f2fs: use wrapped f2fs_cp_error()
  f2fs: fix to use more generic EOPNOTSUPP
  f2fs: use wrapped IS_SWAPFILE()
  f2fs: Support case-insensitive file name lookups
  f2fs: include charset encoding information in the superblock
  fs: Reserve flag for casefolding
  f2fs: fix to avoid call kvfree under spinlock
  fs: f2fs: Remove unnecessary checks of SM_I(sbi) in update_general_status()
  f2fs: disallow direct IO in atomic write
  f2fs: fix to handle quota_{on,off} correctly
  f2fs: fix to detect cp error in f2fs_setxattr()
  f2fs: fix to spread f2fs_is_checkpoint_ready()
  f2fs: support fiemap() for directory inode
  f2fs: fix to avoid discard command leak
  f2fs: fix to avoid tagging SBI_QUOTA_NEED_REPAIR incorrectly
  f2fs: fix to drop meta/node pages during umount
  f2fs: disallow switching io_bits option during remount
  f2fs: fix panic of IO alignment feature
  f2fs: introduce {page,io}_is_mergeable() for readability
  f2fs: fix livelock in swapfile writes
  f2fs: add fs-verity support
  ext4: update on-disk format documentation for fs-verity
  ext4: add fs-verity read support
  ext4: add basic fs-verity support
  fs-verity: support builtin file signatures
  fs-verity: add SHA-512 support
  fs-verity: implement FS_IOC_MEASURE_VERITY ioctl
  fs-verity: implement FS_IOC_ENABLE_VERITY ioctl
  fs-verity: add data verification hooks for ->readpages()
  fs-verity: add the hook for file ->setattr()
  fs-verity: add the hook for file ->open()
  fs-verity: add inode and superblock fields
  fs-verity: add Kconfig and the helper functions for hashing
  fs: uapi: define verity bit for FS_IOC_GETFLAGS
  fs-verity: add UAPI header
  fs-verity: add MAINTAINERS file entry
  fs-verity: add a documentation file
  ext4: fix kernel oops caused by spurious casefold flag
  ext4: fix coverity warning on error path of filename setup
  ext4: optimize case-insensitive lookups
  ext4: fix dcache lookup of !casefolded directories
  unicode: update to Unicode 12.1.0 final
  unicode: add missing check for an error return from utf8lookup()
  ext4: export /sys/fs/ext4/feature/casefold if Unicode support is present
  unicode: refactor the rule for regenerating utf8data.h
  ext4: Support case-insensitive file name lookups
  ext4: include charset encoding information in the superblock
  unicode: update unicode database unicode version 12.1.0
  unicode: introduce test module for normalized utf8 implementation
  unicode: implement higher level API for string handling
  unicode: reduce the size of utf8data[]
  unicode: introduce code for UTF-8 normalization
  unicode: introduce UTF-8 character database
  ext4 crypto: fix to check feature status before get policy
  fscrypt: document the new ioctls and policy version
  ubifs: wire up new fscrypt ioctls
  f2fs: wire up new fscrypt ioctls
  ext4: wire up new fscrypt ioctls
  fscrypt: require that key be added when setting a v2 encryption policy
  fscrypt: add FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS ioctl
  fscrypt: allow unprivileged users to add/remove keys for v2 policies
  fscrypt: v2 encryption policy support
  fscrypt: add an HKDF-SHA512 implementation
  fscrypt: add FS_IOC_GET_ENCRYPTION_KEY_STATUS ioctl
  fscrypt: add FS_IOC_REMOVE_ENCRYPTION_KEY ioctl
  fscrypt: add FS_IOC_ADD_ENCRYPTION_KEY ioctl
  fscrypt: rename keyinfo.c to keysetup.c
  fscrypt: move v1 policy key setup to keysetup_v1.c
  fscrypt: refactor key setup code in preparation for v2 policies
  fscrypt: rename fscrypt_master_key to fscrypt_direct_key
  fscrypt: add ->ci_inode to fscrypt_info
  fscrypt: use FSCRYPT_* definitions, not FS_*
  fscrypt: use FSCRYPT_ prefix for uapi constants
  fs, fscrypt: move uapi definitions to new header <linux/fscrypt.h>
  fscrypt: use ENOPKG when crypto API support missing
  fscrypt: improve warnings for missing crypto API support
  fscrypt: improve warning messages for unsupported encryption contexts
  fscrypt: make fscrypt_msg() take inode instead of super_block
  fscrypt: clean up base64 encoding/decoding
  fscrypt: remove loadable module related code

 Conflicts:
	fs/ext4/ioctl.c
	fs/ext4/readpage.c

Bug: 141329812
Change-Id: I2e10c22a7c52982d073ac6897cc8aa4d5a811a38
Signed-off-by: Jaegeuk Kim <jaegeuk@google.com>
This commit is contained in:
Jaegeuk Kim
2019-10-07 13:27:18 -07:00
90 changed files with 18138 additions and 1293 deletions

View File

@@ -251,3 +251,10 @@ Description:
If checkpoint=disable, it displays the number of blocks that are unusable.
If checkpoint=enable it displays the enumber of blocks that would be unusable
if checkpoint=disable were to be set.
What: /sys/fs/f2fs/<disk>/encoding
Date July 2019
Contact: "Daniel Rosenberg" <drosen@google.com>
Description:
Displays name and version of the encoding set for the filesystem.
If no encoding is set, displays (none)

View File

@@ -177,6 +177,7 @@ mkprep
mkregtable
mktables
mktree
mkutf8data
modpost
modules.builtin
modules.order
@@ -255,6 +256,7 @@ vsyscall_32.lds
wanxlfw.inc
uImage
unifdef
utf8data.h
wakeup.bin
wakeup.elf
wakeup.lds

View File

@@ -24,3 +24,4 @@ order.
.. include:: bigalloc.rst
.. include:: inlinedata.rst
.. include:: eainode.rst
.. include:: verity.rst

View File

@@ -0,0 +1,41 @@
.. SPDX-License-Identifier: GPL-2.0
Verity files
------------
ext4 supports fs-verity, which is a filesystem feature that provides
Merkle tree based hashing for individual readonly files. Most of
fs-verity is common to all filesystems that support it; see
:ref:`Documentation/filesystems/fsverity.rst <fsverity>` for the
fs-verity documentation. However, the on-disk layout of the verity
metadata is filesystem-specific. On ext4, the verity metadata is
stored after the end of the file data itself, in the following format:
- Zero-padding to the next 65536-byte boundary. This padding need not
actually be allocated on-disk, i.e. it may be a hole.
- The Merkle tree, as documented in
:ref:`Documentation/filesystems/fsverity.rst
<fsverity_merkle_tree>`, with the tree levels stored in order from
root to leaf, and the tree blocks within each level stored in their
natural order.
- Zero-padding to the next filesystem block boundary.
- The verity descriptor, as documented in
:ref:`Documentation/filesystems/fsverity.rst <fsverity_descriptor>`,
with optionally appended signature blob.
- Zero-padding to the next offset that is 4 bytes before a filesystem
block boundary.
- The size of the verity descriptor in bytes, as a 4-byte little
endian integer.
Verity inodes have EXT4_VERITY_FL set, and they must use extents, i.e.
EXT4_EXTENTS_FL must be set and EXT4_INLINE_DATA_FL must be clear.
They can have EXT4_ENCRYPT_FL set, in which case the verity metadata
is encrypted as well as the data itself.
Verity files cannot have blocks allocated past the end of the verity
metadata.

View File

@@ -157,6 +157,11 @@ noinline_data Disable the inline data feature, inline data feature is
enabled by default.
data_flush Enable data flushing before checkpoint in order to
persist data of regular and symlink.
reserve_root=%d Support configuring reserved space which is used for
allocation from a privileged user with specified uid or
gid, unit: 4KB, the default limit is 0.2% of user blocks.
resuid=%d The user ID which may use the reserved blocks.
resgid=%d The group ID which may use the reserved blocks.
fault_injection=%d Enable fault injection in all supported types with
specified injection rate.
fault_type=%d Support configuring fault injection type, should be
@@ -413,6 +418,9 @@ Files in /sys/fs/f2fs/<devname>
that would be unusable if checkpoint=disable were
to be set.
encoding This shows the encoding used for casefolding.
If casefolding is not enabled, returns (none)
================================================================================
USAGE
================================================================================

View File

@@ -72,6 +72,9 @@ Online attacks
fscrypt (and storage encryption in general) can only provide limited
protection, if any at all, against online attacks. In detail:
Side-channel attacks
~~~~~~~~~~~~~~~~~~~~
fscrypt is only resistant to side-channel attacks, such as timing or
electromagnetic attacks, to the extent that the underlying Linux
Cryptographic API algorithms are. If a vulnerable algorithm is used,
@@ -80,29 +83,90 @@ attacker to mount a side channel attack against the online system.
Side channel attacks may also be mounted against applications
consuming decrypted data.
After an encryption key has been provided, fscrypt is not designed to
hide the plaintext file contents or filenames from other users on the
same system, regardless of the visibility of the keyring key.
Instead, existing access control mechanisms such as file mode bits,
POSIX ACLs, LSMs, or mount namespaces should be used for this purpose.
Also note that as long as the encryption keys are *anywhere* in
memory, an online attacker can necessarily compromise them by mounting
a physical attack or by exploiting any kernel security vulnerability
which provides an arbitrary memory read primitive.
Unauthorized file access
~~~~~~~~~~~~~~~~~~~~~~~~
While it is ostensibly possible to "evict" keys from the system,
recently accessed encrypted files will remain accessible at least
until the filesystem is unmounted or the VFS caches are dropped, e.g.
using ``echo 2 > /proc/sys/vm/drop_caches``. Even after that, if the
RAM is compromised before being powered off, it will likely still be
possible to recover portions of the plaintext file contents, if not
some of the encryption keys as well. (Since Linux v4.12, all
in-kernel keys related to fscrypt are sanitized before being freed.
However, userspace would need to do its part as well.)
After an encryption key has been added, fscrypt does not hide the
plaintext file contents or filenames from other users on the same
system. Instead, existing access control mechanisms such as file mode
bits, POSIX ACLs, LSMs, or namespaces should be used for this purpose.
Currently, fscrypt does not prevent a user from maliciously providing
an incorrect key for another user's existing encrypted files. A
protection against this is planned.
(For the reasoning behind this, understand that while the key is
added, the confidentiality of the data, from the perspective of the
system itself, is *not* protected by the mathematical properties of
encryption but rather only by the correctness of the kernel.
Therefore, any encryption-specific access control checks would merely
be enforced by kernel *code* and therefore would be largely redundant
with the wide variety of access control mechanisms already available.)
Kernel memory compromise
~~~~~~~~~~~~~~~~~~~~~~~~
An attacker who compromises the system enough to read from arbitrary
memory, e.g. by mounting a physical attack or by exploiting a kernel
security vulnerability, can compromise all encryption keys that are
currently in use.
However, fscrypt allows encryption keys to be removed from the kernel,
which may protect them from later compromise.
In more detail, the FS_IOC_REMOVE_ENCRYPTION_KEY ioctl (or the
FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS ioctl) can wipe a master
encryption key from kernel memory. If it does so, it will also try to
evict all cached inodes which had been "unlocked" using the key,
thereby wiping their per-file keys and making them once again appear
"locked", i.e. in ciphertext or encrypted form.
However, these ioctls have some limitations:
- Per-file keys for in-use files will *not* be removed or wiped.
Therefore, for maximum effect, userspace should close the relevant
encrypted files and directories before removing a master key, as
well as kill any processes whose working directory is in an affected
encrypted directory.
- The kernel cannot magically wipe copies of the master key(s) that
userspace might have as well. Therefore, userspace must wipe all
copies of the master key(s) it makes as well; normally this should
be done immediately after FS_IOC_ADD_ENCRYPTION_KEY, without waiting
for FS_IOC_REMOVE_ENCRYPTION_KEY. Naturally, the same also applies
to all higher levels in the key hierarchy. Userspace should also
follow other security precautions such as mlock()ing memory
containing keys to prevent it from being swapped out.
- In general, decrypted contents and filenames in the kernel VFS
caches are freed but not wiped. Therefore, portions thereof may be
recoverable from freed memory, even after the corresponding key(s)
were wiped. To partially solve this, you can set
CONFIG_PAGE_POISONING=y in your kernel config and add page_poison=1
to your kernel command line. However, this has a performance cost.
- Secret keys might still exist in CPU registers, in crypto
accelerator hardware (if used by the crypto API to implement any of
the algorithms), or in other places not explicitly considered here.
Limitations of v1 policies
~~~~~~~~~~~~~~~~~~~~~~~~~~
v1 encryption policies have some weaknesses with respect to online
attacks:
- There is no verification that the provided master key is correct.
Therefore, a malicious user can temporarily associate the wrong key
with another user's encrypted files to which they have read-only
access. Because of filesystem caching, the wrong key will then be
used by the other user's accesses to those files, even if the other
user has the correct key in their own keyring. This violates the
meaning of "read-only access".
- A compromise of a per-file key also compromises the master key from
which it was derived.
- Non-root users cannot securely remove encryption keys.
All the above problems are fixed with v2 encryption policies. For
this reason among others, it is recommended to use v2 encryption
policies on all new encrypted directories.
Key hierarchy
=============
@@ -123,11 +187,52 @@ appropriate master key. There can be any number of master keys, each
of which protects any number of directory trees on any number of
filesystems.
Userspace should generate master keys either using a cryptographically
secure random number generator, or by using a KDF (Key Derivation
Function). Note that whenever a KDF is used to "stretch" a
lower-entropy secret such as a passphrase, it is critical that a KDF
designed for this purpose be used, such as scrypt, PBKDF2, or Argon2.
Master keys must be real cryptographic keys, i.e. indistinguishable
from random bytestrings of the same length. This implies that users
**must not** directly use a password as a master key, zero-pad a
shorter key, or repeat a shorter key. Security cannot be guaranteed
if userspace makes any such error, as the cryptographic proofs and
analysis would no longer apply.
Instead, users should generate master keys either using a
cryptographically secure random number generator, or by using a KDF
(Key Derivation Function). The kernel does not do any key stretching;
therefore, if userspace derives the key from a low-entropy secret such
as a passphrase, it is critical that a KDF designed for this purpose
be used, such as scrypt, PBKDF2, or Argon2.
Key derivation function
-----------------------
With one exception, fscrypt never uses the master key(s) for
encryption directly. Instead, they are only used as input to a KDF
(Key Derivation Function) to derive the actual keys.
The KDF used for a particular master key differs depending on whether
the key is used for v1 encryption policies or for v2 encryption
policies. Users **must not** use the same key for both v1 and v2
encryption policies. (No real-world attack is currently known on this
specific case of key reuse, but its security cannot be guaranteed
since the cryptographic proofs and analysis would no longer apply.)
For v1 encryption policies, the KDF only supports deriving per-file
encryption keys. It works by encrypting the master key with
AES-128-ECB, using the file's 16-byte nonce as the AES key. The
resulting ciphertext is used as the derived key. If the ciphertext is
longer than needed, then it is truncated to the needed length.
For v2 encryption policies, the KDF is HKDF-SHA512. The master key is
passed as the "input keying material", no salt is used, and a distinct
"application-specific information string" is used for each distinct
key to be derived. For example, when a per-file encryption key is
derived, the application-specific information string is the file's
nonce prefixed with "fscrypt\\0" and a context byte. Different
context bytes are used for other types of derived keys.
HKDF-SHA512 is preferred to the original AES-128-ECB based KDF because
HKDF is more flexible, is nonreversible, and evenly distributes
entropy from the master key. HKDF is also standardized and widely
used by other software, whereas the AES-128-ECB based KDF is ad-hoc.
Per-file keys
-------------
@@ -138,29 +243,9 @@ files doesn't map to the same ciphertext, or vice versa. In most
cases, fscrypt does this by deriving per-file keys. When a new
encrypted inode (regular file, directory, or symlink) is created,
fscrypt randomly generates a 16-byte nonce and stores it in the
inode's encryption xattr. Then, it uses a KDF (Key Derivation
Function) to derive the file's key from the master key and nonce.
The Adiantum encryption mode (see `Encryption modes and usage`_) is
special, since it accepts longer IVs and is suitable for both contents
and filenames encryption. For it, a "direct key" option is offered
where the file's nonce is included in the IVs and the master key is
used for encryption directly. This improves performance; however,
users must not use the same master key for any other encryption mode.
Below, the KDF and design considerations are described in more detail.
The current KDF works by encrypting the master key with AES-128-ECB,
using the file's nonce as the AES key. The output is used as the
derived key. If the output is longer than needed, then it is
truncated to the needed length.
Note: this KDF meets the primary security requirement, which is to
produce unique derived keys that preserve the entropy of the master
key, assuming that the master key is already a good pseudorandom key.
However, it is nonstandard and has some problems such as being
reversible, so it is generally considered to be a mistake! It may be
replaced with HKDF or another more standard KDF in the future.
inode's encryption xattr. Then, it uses a KDF (as described in `Key
derivation function`_) to derive the file's key from the master key
and nonce.
Key derivation was chosen over key wrapping because wrapped keys would
require larger xattrs which would be less likely to fit in-line in the
@@ -176,6 +261,37 @@ rejected as it would have prevented ext4 filesystems from being
resized, and by itself still wouldn't have been sufficient to prevent
the same key from being directly reused for both XTS and CTS-CBC.
DIRECT_KEY and per-mode keys
----------------------------
The Adiantum encryption mode (see `Encryption modes and usage`_) is
suitable for both contents and filenames encryption, and it accepts
long IVs --- long enough to hold both an 8-byte logical block number
and a 16-byte per-file nonce. Also, the overhead of each Adiantum key
is greater than that of an AES-256-XTS key.
Therefore, to improve performance and save memory, for Adiantum a
"direct key" configuration is supported. When the user has enabled
this by setting FSCRYPT_POLICY_FLAG_DIRECT_KEY in the fscrypt policy,
per-file keys are not used. Instead, whenever any data (contents or
filenames) is encrypted, the file's 16-byte nonce is included in the
IV. Moreover:
- For v1 encryption policies, the encryption is done directly with the
master key. Because of this, users **must not** use the same master
key for any other purpose, even for other v1 policies.
- For v2 encryption policies, the encryption is done with a per-mode
key derived using the KDF. Users may use the same master key for
other v2 encryption policies.
Key identifiers
---------------
For master keys used for v2 encryption policies, a unique 16-byte "key
identifier" is also derived using the KDF. This value is stored in
the clear, since it is needed to reliably identify the key itself.
Encryption modes and usage
==========================
@@ -225,9 +341,10 @@ a little endian number, except that:
is encrypted with AES-256 where the AES-256 key is the SHA-256 hash
of the file's data encryption key.
- In the "direct key" configuration (FS_POLICY_FLAG_DIRECT_KEY set in
the fscrypt_policy), the file's nonce is also appended to the IV.
Currently this is only allowed with the Adiantum encryption mode.
- In the "direct key" configuration (FSCRYPT_POLICY_FLAG_DIRECT_KEY
set in the fscrypt_policy), the file's nonce is also appended to the
IV. Currently this is only allowed with the Adiantum encryption
mode.
Filenames encryption
--------------------
@@ -269,49 +386,77 @@ User API
Setting an encryption policy
----------------------------
FS_IOC_SET_ENCRYPTION_POLICY
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The FS_IOC_SET_ENCRYPTION_POLICY ioctl sets an encryption policy on an
empty directory or verifies that a directory or regular file already
has the specified encryption policy. It takes in a pointer to a
:c:type:`struct fscrypt_policy`, defined as follows::
:c:type:`struct fscrypt_policy_v1` or a :c:type:`struct
fscrypt_policy_v2`, defined as follows::
#define FS_KEY_DESCRIPTOR_SIZE 8
struct fscrypt_policy {
#define FSCRYPT_POLICY_V1 0
#define FSCRYPT_KEY_DESCRIPTOR_SIZE 8
struct fscrypt_policy_v1 {
__u8 version;
__u8 contents_encryption_mode;
__u8 filenames_encryption_mode;
__u8 flags;
__u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
__u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
};
#define fscrypt_policy fscrypt_policy_v1
#define FSCRYPT_POLICY_V2 2
#define FSCRYPT_KEY_IDENTIFIER_SIZE 16
struct fscrypt_policy_v2 {
__u8 version;
__u8 contents_encryption_mode;
__u8 filenames_encryption_mode;
__u8 flags;
__u8 __reserved[4];
__u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE];
};
This structure must be initialized as follows:
- ``version`` must be 0.
- ``version`` must be FSCRYPT_POLICY_V1 (0) if the struct is
:c:type:`fscrypt_policy_v1` or FSCRYPT_POLICY_V2 (2) if the struct
is :c:type:`fscrypt_policy_v2`. (Note: we refer to the original
policy version as "v1", though its version code is really 0.) For
new encrypted directories, use v2 policies.
- ``contents_encryption_mode`` and ``filenames_encryption_mode`` must
be set to constants from ``<linux/fs.h>`` which identify the
encryption modes to use. If unsure, use
FS_ENCRYPTION_MODE_AES_256_XTS (1) for ``contents_encryption_mode``
and FS_ENCRYPTION_MODE_AES_256_CTS (4) for
``filenames_encryption_mode``.
be set to constants from ``<linux/fscrypt.h>`` which identify the
encryption modes to use. If unsure, use FSCRYPT_MODE_AES_256_XTS
(1) for ``contents_encryption_mode`` and FSCRYPT_MODE_AES_256_CTS
(4) for ``filenames_encryption_mode``.
- ``flags`` must contain a value from ``<linux/fs.h>`` which
- ``flags`` must contain a value from ``<linux/fscrypt.h>`` which
identifies the amount of NUL-padding to use when encrypting
filenames. If unsure, use FS_POLICY_FLAGS_PAD_32 (0x3).
In addition, if the chosen encryption modes are both
FS_ENCRYPTION_MODE_ADIANTUM, this can contain
FS_POLICY_FLAG_DIRECT_KEY to specify that the master key should be
used directly, without key derivation.
filenames. If unsure, use FSCRYPT_POLICY_FLAGS_PAD_32 (0x3).
Additionally, if the encryption modes are both
FSCRYPT_MODE_ADIANTUM, this can contain
FSCRYPT_POLICY_FLAG_DIRECT_KEY; see `DIRECT_KEY and per-mode keys`_.
- ``master_key_descriptor`` specifies how to find the master key in
the keyring; see `Adding keys`_. It is up to userspace to choose a
unique ``master_key_descriptor`` for each master key. The e4crypt
and fscrypt tools use the first 8 bytes of
- For v2 encryption policies, ``__reserved`` must be zeroed.
- For v1 encryption policies, ``master_key_descriptor`` specifies how
to find the master key in a keyring; see `Adding keys`_. It is up
to userspace to choose a unique ``master_key_descriptor`` for each
master key. The e4crypt and fscrypt tools use the first 8 bytes of
``SHA-512(SHA-512(master_key))``, but this particular scheme is not
required. Also, the master key need not be in the keyring yet when
FS_IOC_SET_ENCRYPTION_POLICY is executed. However, it must be added
before any files can be created in the encrypted directory.
For v2 encryption policies, ``master_key_descriptor`` has been
replaced with ``master_key_identifier``, which is longer and cannot
be arbitrarily chosen. Instead, the key must first be added using
`FS_IOC_ADD_ENCRYPTION_KEY`_. Then, the ``key_spec.u.identifier``
the kernel returned in the :c:type:`struct fscrypt_add_key_arg` must
be used as the ``master_key_identifier`` in the :c:type:`struct
fscrypt_policy_v2`.
If the file is not yet encrypted, then FS_IOC_SET_ENCRYPTION_POLICY
verifies that the file is an empty directory. If so, the specified
encryption policy is assigned to the directory, turning it into an
@@ -327,6 +472,15 @@ policy exactly matches the actual one. If they match, then the ioctl
returns 0. Otherwise, it fails with EEXIST. This works on both
regular files and directories, including nonempty directories.
When a v2 encryption policy is assigned to a directory, it is also
required that either the specified key has been added by the current
user or that the caller has CAP_FOWNER in the initial user namespace.
(This is needed to prevent a user from encrypting their data with
another user's key.) The key must remain added while
FS_IOC_SET_ENCRYPTION_POLICY is executing. However, if the new
encrypted directory does not need to be accessed immediately, then the
key can be removed right away afterwards.
Note that the ext4 filesystem does not allow the root directory to be
encrypted, even if it is empty. Users who want to encrypt an entire
filesystem with one key should consider using dm-crypt instead.
@@ -339,7 +493,11 @@ FS_IOC_SET_ENCRYPTION_POLICY can fail with the following errors:
- ``EEXIST``: the file is already encrypted with an encryption policy
different from the one specified
- ``EINVAL``: an invalid encryption policy was specified (invalid
version, mode(s), or flags)
version, mode(s), or flags; or reserved bits were set)
- ``ENOKEY``: a v2 encryption policy was specified, but the key with
the specified ``master_key_identifier`` has not been added, nor does
the process have the CAP_FOWNER capability in the initial user
namespace
- ``ENOTDIR``: the file is unencrypted and is a regular file, not a
directory
- ``ENOTEMPTY``: the file is unencrypted and is a nonempty directory
@@ -358,25 +516,79 @@ FS_IOC_SET_ENCRYPTION_POLICY can fail with the following errors:
Getting an encryption policy
----------------------------
The FS_IOC_GET_ENCRYPTION_POLICY ioctl retrieves the :c:type:`struct
fscrypt_policy`, if any, for a directory or regular file. See above
for the struct definition. No additional permissions are required
beyond the ability to open the file.
Two ioctls are available to get a file's encryption policy:
FS_IOC_GET_ENCRYPTION_POLICY can fail with the following errors:
- `FS_IOC_GET_ENCRYPTION_POLICY_EX`_
- `FS_IOC_GET_ENCRYPTION_POLICY`_
The extended (_EX) version of the ioctl is more general and is
recommended to use when possible. However, on older kernels only the
original ioctl is available. Applications should try the extended
version, and if it fails with ENOTTY fall back to the original
version.
FS_IOC_GET_ENCRYPTION_POLICY_EX
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The FS_IOC_GET_ENCRYPTION_POLICY_EX ioctl retrieves the encryption
policy, if any, for a directory or regular file. No additional
permissions are required beyond the ability to open the file. It
takes in a pointer to a :c:type:`struct fscrypt_get_policy_ex_arg`,
defined as follows::
struct fscrypt_get_policy_ex_arg {
__u64 policy_size; /* input/output */
union {
__u8 version;
struct fscrypt_policy_v1 v1;
struct fscrypt_policy_v2 v2;
} policy; /* output */
};
The caller must initialize ``policy_size`` to the size available for
the policy struct, i.e. ``sizeof(arg.policy)``.
On success, the policy struct is returned in ``policy``, and its
actual size is returned in ``policy_size``. ``policy.version`` should
be checked to determine the version of policy returned. Note that the
version code for the "v1" policy is actually 0 (FSCRYPT_POLICY_V1).
FS_IOC_GET_ENCRYPTION_POLICY_EX can fail with the following errors:
- ``EINVAL``: the file is encrypted, but it uses an unrecognized
encryption context format
encryption policy version
- ``ENODATA``: the file is not encrypted
- ``ENOTTY``: this type of filesystem does not implement encryption
- ``ENOTTY``: this type of filesystem does not implement encryption,
or this kernel is too old to support FS_IOC_GET_ENCRYPTION_POLICY_EX
(try FS_IOC_GET_ENCRYPTION_POLICY instead)
- ``EOPNOTSUPP``: the kernel was not configured with encryption
support for this filesystem
support for this filesystem, or the filesystem superblock has not
had encryption enabled on it
- ``EOVERFLOW``: the file is encrypted and uses a recognized
encryption policy version, but the policy struct does not fit into
the provided buffer
Note: if you only need to know whether a file is encrypted or not, on
most filesystems it is also possible to use the FS_IOC_GETFLAGS ioctl
and check for FS_ENCRYPT_FL, or to use the statx() system call and
check for STATX_ATTR_ENCRYPTED in stx_attributes.
FS_IOC_GET_ENCRYPTION_POLICY
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The FS_IOC_GET_ENCRYPTION_POLICY ioctl can also retrieve the
encryption policy, if any, for a directory or regular file. However,
unlike `FS_IOC_GET_ENCRYPTION_POLICY_EX`_,
FS_IOC_GET_ENCRYPTION_POLICY only supports the original policy
version. It takes in a pointer directly to a :c:type:`struct
fscrypt_policy_v1` rather than a :c:type:`struct
fscrypt_get_policy_ex_arg`.
The error codes for FS_IOC_GET_ENCRYPTION_POLICY are the same as those
for FS_IOC_GET_ENCRYPTION_POLICY_EX, except that
FS_IOC_GET_ENCRYPTION_POLICY also returns ``EINVAL`` if the file is
encrypted using a newer encryption policy version.
Getting the per-filesystem salt
-------------------------------
@@ -392,8 +604,115 @@ generate and manage any needed salt(s) in userspace.
Adding keys
-----------
To provide a master key, userspace must add it to an appropriate
keyring using the add_key() system call (see:
FS_IOC_ADD_ENCRYPTION_KEY
~~~~~~~~~~~~~~~~~~~~~~~~~
The FS_IOC_ADD_ENCRYPTION_KEY ioctl adds a master encryption key to
the filesystem, making all files on the filesystem which were
encrypted using that key appear "unlocked", i.e. in plaintext form.
It can be executed on any file or directory on the target filesystem,
but using the filesystem's root directory is recommended. It takes in
a pointer to a :c:type:`struct fscrypt_add_key_arg`, defined as
follows::
struct fscrypt_add_key_arg {
struct fscrypt_key_specifier key_spec;
__u32 raw_size;
__u32 __reserved[9];
__u8 raw[];
};
#define FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR 1
#define FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER 2
struct fscrypt_key_specifier {
__u32 type; /* one of FSCRYPT_KEY_SPEC_TYPE_* */
__u32 __reserved;
union {
__u8 __reserved[32]; /* reserve some extra space */
__u8 descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
__u8 identifier[FSCRYPT_KEY_IDENTIFIER_SIZE];
} u;
};
:c:type:`struct fscrypt_add_key_arg` must be zeroed, then initialized
as follows:
- If the key is being added for use by v1 encryption policies, then
``key_spec.type`` must contain FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR, and
``key_spec.u.descriptor`` must contain the descriptor of the key
being added, corresponding to the value in the
``master_key_descriptor`` field of :c:type:`struct
fscrypt_policy_v1`. To add this type of key, the calling process
must have the CAP_SYS_ADMIN capability in the initial user
namespace.
Alternatively, if the key is being added for use by v2 encryption
policies, then ``key_spec.type`` must contain
FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER, and ``key_spec.u.identifier`` is
an *output* field which the kernel fills in with a cryptographic
hash of the key. To add this type of key, the calling process does
not need any privileges. However, the number of keys that can be
added is limited by the user's quota for the keyrings service (see
``Documentation/security/keys/core.rst``).
- ``raw_size`` must be the size of the ``raw`` key provided, in bytes.
- ``raw`` is a variable-length field which must contain the actual
key, ``raw_size`` bytes long.
For v2 policy keys, the kernel keeps track of which user (identified
by effective user ID) added the key, and only allows the key to be
removed by that user --- or by "root", if they use
`FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS`_.
However, if another user has added the key, it may be desirable to
prevent that other user from unexpectedly removing it. Therefore,
FS_IOC_ADD_ENCRYPTION_KEY may also be used to add a v2 policy key
*again*, even if it's already added by other user(s). In this case,
FS_IOC_ADD_ENCRYPTION_KEY will just install a claim to the key for the
current user, rather than actually add the key again (but the raw key
must still be provided, as a proof of knowledge).
FS_IOC_ADD_ENCRYPTION_KEY returns 0 if either the key or a claim to
the key was either added or already exists.
FS_IOC_ADD_ENCRYPTION_KEY can fail with the following errors:
- ``EACCES``: FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR was specified, but the
caller does not have the CAP_SYS_ADMIN capability in the initial
user namespace
- ``EDQUOT``: the key quota for this user would be exceeded by adding
the key
- ``EINVAL``: invalid key size or key specifier type, or reserved bits
were set
- ``ENOTTY``: this type of filesystem does not implement encryption
- ``EOPNOTSUPP``: the kernel was not configured with encryption
support for this filesystem, or the filesystem superblock has not
had encryption enabled on it
Legacy method
~~~~~~~~~~~~~
For v1 encryption policies, a master encryption key can also be
provided by adding it to a process-subscribed keyring, e.g. to a
session keyring, or to a user keyring if the user keyring is linked
into the session keyring.
This method is deprecated (and not supported for v2 encryption
policies) for several reasons. First, it cannot be used in
combination with FS_IOC_REMOVE_ENCRYPTION_KEY (see `Removing keys`_),
so for removing a key a workaround such as keyctl_unlink() in
combination with ``sync; echo 2 > /proc/sys/vm/drop_caches`` would
have to be used. Second, it doesn't match the fact that the
locked/unlocked status of encrypted files (i.e. whether they appear to
be in plaintext form or in ciphertext form) is global. This mismatch
has caused much confusion as well as real problems when processes
running under different UIDs, such as a ``sudo`` command, need to
access encrypted files.
Nevertheless, to add a key to one of the process-subscribed keyrings,
the add_key() system call can be used (see:
``Documentation/security/keys/core.rst``). The key type must be
"logon"; keys of this type are kept in kernel memory and cannot be
read back by userspace. The key description must be "fscrypt:"
@@ -401,12 +720,12 @@ followed by the 16-character lower case hex representation of the
``master_key_descriptor`` that was set in the encryption policy. The
key payload must conform to the following structure::
#define FS_MAX_KEY_SIZE 64
#define FSCRYPT_MAX_KEY_SIZE 64
struct fscrypt_key {
u32 mode;
u8 raw[FS_MAX_KEY_SIZE];
u32 size;
__u32 mode;
__u8 raw[FSCRYPT_MAX_KEY_SIZE];
__u32 size;
};
``mode`` is ignored; just set it to 0. The actual key is provided in
@@ -418,26 +737,194 @@ with a filesystem-specific prefix such as "ext4:". However, the
filesystem-specific prefixes are deprecated and should not be used in
new programs.
There are several different types of keyrings in which encryption keys
may be placed, such as a session keyring, a user session keyring, or a
user keyring. Each key must be placed in a keyring that is "attached"
to all processes that might need to access files encrypted with it, in
the sense that request_key() will find the key. Generally, if only
processes belonging to a specific user need to access a given
encrypted directory and no session keyring has been installed, then
that directory's key should be placed in that user's user session
keyring or user keyring. Otherwise, a session keyring should be
installed if needed, and the key should be linked into that session
keyring, or in a keyring linked into that session keyring.
Removing keys
-------------
Note: introducing the complex visibility semantics of keyrings here
was arguably a mistake --- especially given that by design, after any
process successfully opens an encrypted file (thereby setting up the
per-file key), possessing the keyring key is not actually required for
any process to read/write the file until its in-memory inode is
evicted. In the future there probably should be a way to provide keys
directly to the filesystem instead, which would make the intended
semantics clearer.
Two ioctls are available for removing a key that was added by
`FS_IOC_ADD_ENCRYPTION_KEY`_:
- `FS_IOC_REMOVE_ENCRYPTION_KEY`_
- `FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS`_
These two ioctls differ only in cases where v2 policy keys are added
or removed by non-root users.
These ioctls don't work on keys that were added via the legacy
process-subscribed keyrings mechanism.
Before using these ioctls, read the `Kernel memory compromise`_
section for a discussion of the security goals and limitations of
these ioctls.
FS_IOC_REMOVE_ENCRYPTION_KEY
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The FS_IOC_REMOVE_ENCRYPTION_KEY ioctl removes a claim to a master
encryption key from the filesystem, and possibly removes the key
itself. It can be executed on any file or directory on the target
filesystem, but using the filesystem's root directory is recommended.
It takes in a pointer to a :c:type:`struct fscrypt_remove_key_arg`,
defined as follows::
struct fscrypt_remove_key_arg {
struct fscrypt_key_specifier key_spec;
#define FSCRYPT_KEY_REMOVAL_STATUS_FLAG_FILES_BUSY 0x00000001
#define FSCRYPT_KEY_REMOVAL_STATUS_FLAG_OTHER_USERS 0x00000002
__u32 removal_status_flags; /* output */
__u32 __reserved[5];
};
This structure must be zeroed, then initialized as follows:
- The key to remove is specified by ``key_spec``:
- To remove a key used by v1 encryption policies, set
``key_spec.type`` to FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR and fill
in ``key_spec.u.descriptor``. To remove this type of key, the
calling process must have the CAP_SYS_ADMIN capability in the
initial user namespace.
- To remove a key used by v2 encryption policies, set
``key_spec.type`` to FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER and fill
in ``key_spec.u.identifier``.
For v2 policy keys, this ioctl is usable by non-root users. However,
to make this possible, it actually just removes the current user's
claim to the key, undoing a single call to FS_IOC_ADD_ENCRYPTION_KEY.
Only after all claims are removed is the key really removed.
For example, if FS_IOC_ADD_ENCRYPTION_KEY was called with uid 1000,
then the key will be "claimed" by uid 1000, and
FS_IOC_REMOVE_ENCRYPTION_KEY will only succeed as uid 1000. Or, if
both uids 1000 and 2000 added the key, then for each uid
FS_IOC_REMOVE_ENCRYPTION_KEY will only remove their own claim. Only
once *both* are removed is the key really removed. (Think of it like
unlinking a file that may have hard links.)
If FS_IOC_REMOVE_ENCRYPTION_KEY really removes the key, it will also
try to "lock" all files that had been unlocked with the key. It won't
lock files that are still in-use, so this ioctl is expected to be used
in cooperation with userspace ensuring that none of the files are
still open. However, if necessary, this ioctl can be executed again
later to retry locking any remaining files.
FS_IOC_REMOVE_ENCRYPTION_KEY returns 0 if either the key was removed
(but may still have files remaining to be locked), the user's claim to
the key was removed, or the key was already removed but had files
remaining to be the locked so the ioctl retried locking them. In any
of these cases, ``removal_status_flags`` is filled in with the
following informational status flags:
- ``FSCRYPT_KEY_REMOVAL_STATUS_FLAG_FILES_BUSY``: set if some file(s)
are still in-use. Not guaranteed to be set in the case where only
the user's claim to the key was removed.
- ``FSCRYPT_KEY_REMOVAL_STATUS_FLAG_OTHER_USERS``: set if only the
user's claim to the key was removed, not the key itself
FS_IOC_REMOVE_ENCRYPTION_KEY can fail with the following errors:
- ``EACCES``: The FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR key specifier type
was specified, but the caller does not have the CAP_SYS_ADMIN
capability in the initial user namespace
- ``EINVAL``: invalid key specifier type, or reserved bits were set
- ``ENOKEY``: the key object was not found at all, i.e. it was never
added in the first place or was already fully removed including all
files locked; or, the user does not have a claim to the key (but
someone else does).
- ``ENOTTY``: this type of filesystem does not implement encryption
- ``EOPNOTSUPP``: the kernel was not configured with encryption
support for this filesystem, or the filesystem superblock has not
had encryption enabled on it
FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS is exactly the same as
`FS_IOC_REMOVE_ENCRYPTION_KEY`_, except that for v2 policy keys, the
ALL_USERS version of the ioctl will remove all users' claims to the
key, not just the current user's. I.e., the key itself will always be
removed, no matter how many users have added it. This difference is
only meaningful if non-root users are adding and removing keys.
Because of this, FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS also requires
"root", namely the CAP_SYS_ADMIN capability in the initial user
namespace. Otherwise it will fail with EACCES.
Getting key status
------------------
FS_IOC_GET_ENCRYPTION_KEY_STATUS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The FS_IOC_GET_ENCRYPTION_KEY_STATUS ioctl retrieves the status of a
master encryption key. It can be executed on any file or directory on
the target filesystem, but using the filesystem's root directory is
recommended. It takes in a pointer to a :c:type:`struct
fscrypt_get_key_status_arg`, defined as follows::
struct fscrypt_get_key_status_arg {
/* input */
struct fscrypt_key_specifier key_spec;
__u32 __reserved[6];
/* output */
#define FSCRYPT_KEY_STATUS_ABSENT 1
#define FSCRYPT_KEY_STATUS_PRESENT 2
#define FSCRYPT_KEY_STATUS_INCOMPLETELY_REMOVED 3
__u32 status;
#define FSCRYPT_KEY_STATUS_FLAG_ADDED_BY_SELF 0x00000001
__u32 status_flags;
__u32 user_count;
__u32 __out_reserved[13];
};
The caller must zero all input fields, then fill in ``key_spec``:
- To get the status of a key for v1 encryption policies, set
``key_spec.type`` to FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR and fill
in ``key_spec.u.descriptor``.
- To get the status of a key for v2 encryption policies, set
``key_spec.type`` to FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER and fill
in ``key_spec.u.identifier``.
On success, 0 is returned and the kernel fills in the output fields:
- ``status`` indicates whether the key is absent, present, or
incompletely removed. Incompletely removed means that the master
secret has been removed, but some files are still in use; i.e.,
`FS_IOC_REMOVE_ENCRYPTION_KEY`_ returned 0 but set the informational
status flag FSCRYPT_KEY_REMOVAL_STATUS_FLAG_FILES_BUSY.
- ``status_flags`` can contain the following flags:
- ``FSCRYPT_KEY_STATUS_FLAG_ADDED_BY_SELF`` indicates that the key
has added by the current user. This is only set for keys
identified by ``identifier`` rather than by ``descriptor``.
- ``user_count`` specifies the number of users who have added the key.
This is only set for keys identified by ``identifier`` rather than
by ``descriptor``.
FS_IOC_GET_ENCRYPTION_KEY_STATUS can fail with the following errors:
- ``EINVAL``: invalid key specifier type, or reserved bits were set
- ``ENOTTY``: this type of filesystem does not implement encryption
- ``EOPNOTSUPP``: the kernel was not configured with encryption
support for this filesystem, or the filesystem superblock has not
had encryption enabled on it
Among other use cases, FS_IOC_GET_ENCRYPTION_KEY_STATUS can be useful
for determining whether the key for a given encrypted directory needs
to be added before prompting the user for the passphrase needed to
derive the key.
FS_IOC_GET_ENCRYPTION_KEY_STATUS can only get the status of keys in
the filesystem-level keyring, i.e. the keyring managed by
`FS_IOC_ADD_ENCRYPTION_KEY`_ and `FS_IOC_REMOVE_ENCRYPTION_KEY`_. It
cannot get the status of a key that has only been added for use by v1
encryption policies using the legacy mechanism involving
process-subscribed keyrings.
Access semantics
================
@@ -500,7 +987,7 @@ Without the key
Some filesystem operations may be performed on encrypted regular
files, directories, and symlinks even before their encryption key has
been provided:
been added, or after their encryption key has been removed:
- File metadata may be read, e.g. using stat().
@@ -565,33 +1052,44 @@ Encryption context
------------------
An encryption policy is represented on-disk by a :c:type:`struct
fscrypt_context`. It is up to individual filesystems to decide where
to store it, but normally it would be stored in a hidden extended
attribute. It should *not* be exposed by the xattr-related system
calls such as getxattr() and setxattr() because of the special
semantics of the encryption xattr. (In particular, there would be
much confusion if an encryption policy were to be added to or removed
from anything other than an empty directory.) The struct is defined
as follows::
fscrypt_context_v1` or a :c:type:`struct fscrypt_context_v2`. It is
up to individual filesystems to decide where to store it, but normally
it would be stored in a hidden extended attribute. It should *not* be
exposed by the xattr-related system calls such as getxattr() and
setxattr() because of the special semantics of the encryption xattr.
(In particular, there would be much confusion if an encryption policy
were to be added to or removed from anything other than an empty
directory.) These structs are defined as follows::
#define FS_KEY_DESCRIPTOR_SIZE 8
#define FS_KEY_DERIVATION_NONCE_SIZE 16
struct fscrypt_context {
u8 format;
#define FSCRYPT_KEY_DESCRIPTOR_SIZE 8
struct fscrypt_context_v1 {
u8 version;
u8 contents_encryption_mode;
u8 filenames_encryption_mode;
u8 flags;
u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
};
Note that :c:type:`struct fscrypt_context` contains the same
information as :c:type:`struct fscrypt_policy` (see `Setting an
encryption policy`_), except that :c:type:`struct fscrypt_context`
also contains a nonce. The nonce is randomly generated by the kernel
and is used to derive the inode's encryption key as described in
`Per-file keys`_.
#define FSCRYPT_KEY_IDENTIFIER_SIZE 16
struct fscrypt_context_v2 {
u8 version;
u8 contents_encryption_mode;
u8 filenames_encryption_mode;
u8 flags;
u8 __reserved[4];
u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE];
u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
};
The context structs contain the same information as the corresponding
policy structs (see `Setting an encryption policy`_), except that the
context structs also contain a nonce. The nonce is randomly generated
by the kernel and is used as KDF input or as a tweak to cause
different files to be encrypted differently; see `Per-file keys`_ and
`DIRECT_KEY and per-mode keys`_.
Data path changes
-----------------

View File

@@ -0,0 +1,726 @@
.. SPDX-License-Identifier: GPL-2.0
.. _fsverity:
=======================================================
fs-verity: read-only file-based authenticity protection
=======================================================
Introduction
============
fs-verity (``fs/verity/``) is a support layer that filesystems can
hook into to support transparent integrity and authenticity protection
of read-only files. Currently, it is supported by the ext4 and f2fs
filesystems. Like fscrypt, not too much filesystem-specific code is
needed to support fs-verity.
fs-verity is similar to `dm-verity
<https://www.kernel.org/doc/Documentation/device-mapper/verity.txt>`_
but works on files rather than block devices. On regular files on
filesystems supporting fs-verity, userspace can execute an ioctl that
causes the filesystem to build a Merkle tree for the file and persist
it to a filesystem-specific location associated with the file.
After this, the file is made readonly, and all reads from the file are
automatically verified against the file's Merkle tree. Reads of any
corrupted data, including mmap reads, will fail.
Userspace can use another ioctl to retrieve the root hash (actually
the "file measurement", which is a hash that includes the root hash)
that fs-verity is enforcing for the file. This ioctl executes in
constant time, regardless of the file size.
fs-verity is essentially a way to hash a file in constant time,
subject to the caveat that reads which would violate the hash will
fail at runtime.
Use cases
=========
By itself, the base fs-verity feature only provides integrity
protection, i.e. detection of accidental (non-malicious) corruption.
However, because fs-verity makes retrieving the file hash extremely
efficient, it's primarily meant to be used as a tool to support
authentication (detection of malicious modifications) or auditing
(logging file hashes before use).
Trusted userspace code (e.g. operating system code running on a
read-only partition that is itself authenticated by dm-verity) can
authenticate the contents of an fs-verity file by using the
`FS_IOC_MEASURE_VERITY`_ ioctl to retrieve its hash, then verifying a
digital signature of it.
A standard file hash could be used instead of fs-verity. However,
this is inefficient if the file is large and only a small portion may
be accessed. This is often the case for Android application package
(APK) files, for example. These typically contain many translations,
classes, and other resources that are infrequently or even never
accessed on a particular device. It would be slow and wasteful to
read and hash the entire file before starting the application.
Unlike an ahead-of-time hash, fs-verity also re-verifies data each
time it's paged in. This ensures that malicious disk firmware can't
undetectably change the contents of the file at runtime.
fs-verity does not replace or obsolete dm-verity. dm-verity should
still be used on read-only filesystems. fs-verity is for files that
must live on a read-write filesystem because they are independently
updated and potentially user-installed, so dm-verity cannot be used.
The base fs-verity feature is a hashing mechanism only; actually
authenticating the files is up to userspace. However, to meet some
users' needs, fs-verity optionally supports a simple signature
verification mechanism where users can configure the kernel to require
that all fs-verity files be signed by a key loaded into a keyring; see
`Built-in signature verification`_. Support for fs-verity file hashes
in IMA (Integrity Measurement Architecture) policies is also planned.
User API
========
FS_IOC_ENABLE_VERITY
--------------------
The FS_IOC_ENABLE_VERITY ioctl enables fs-verity on a file. It takes
in a pointer to a :c:type:`struct fsverity_enable_arg`, defined as
follows::
struct fsverity_enable_arg {
__u32 version;
__u32 hash_algorithm;
__u32 block_size;
__u32 salt_size;
__u64 salt_ptr;
__u32 sig_size;
__u32 __reserved1;
__u64 sig_ptr;
__u64 __reserved2[11];
};
This structure contains the parameters of the Merkle tree to build for
the file, and optionally contains a signature. It must be initialized
as follows:
- ``version`` must be 1.
- ``hash_algorithm`` must be the identifier for the hash algorithm to
use for the Merkle tree, such as FS_VERITY_HASH_ALG_SHA256. See
``include/uapi/linux/fsverity.h`` for the list of possible values.
- ``block_size`` must be the Merkle tree block size. Currently, this
must be equal to the system page size, which is usually 4096 bytes.
Other sizes may be supported in the future. This value is not
necessarily the same as the filesystem block size.
- ``salt_size`` is the size of the salt in bytes, or 0 if no salt is
provided. The salt is a value that is prepended to every hashed
block; it can be used to personalize the hashing for a particular
file or device. Currently the maximum salt size is 32 bytes.
- ``salt_ptr`` is the pointer to the salt, or NULL if no salt is
provided.
- ``sig_size`` is the size of the signature in bytes, or 0 if no
signature is provided. Currently the signature is (somewhat
arbitrarily) limited to 16128 bytes. See `Built-in signature
verification`_ for more information.
- ``sig_ptr`` is the pointer to the signature, or NULL if no
signature is provided.
- All reserved fields must be zeroed.
FS_IOC_ENABLE_VERITY causes the filesystem to build a Merkle tree for
the file and persist it to a filesystem-specific location associated
with the file, then mark the file as a verity file. This ioctl may
take a long time to execute on large files, and it is interruptible by
fatal signals.
FS_IOC_ENABLE_VERITY checks for write access to the inode. However,
it must be executed on an O_RDONLY file descriptor and no processes
can have the file open for writing. Attempts to open the file for
writing while this ioctl is executing will fail with ETXTBSY. (This
is necessary to guarantee that no writable file descriptors will exist
after verity is enabled, and to guarantee that the file's contents are
stable while the Merkle tree is being built over it.)
On success, FS_IOC_ENABLE_VERITY returns 0, and the file becomes a
verity file. On failure (including the case of interruption by a
fatal signal), no changes are made to the file.
FS_IOC_ENABLE_VERITY can fail with the following errors:
- ``EACCES``: the process does not have write access to the file
- ``EBADMSG``: the signature is malformed
- ``EBUSY``: this ioctl is already running on the file
- ``EEXIST``: the file already has verity enabled
- ``EFAULT``: the caller provided inaccessible memory
- ``EINTR``: the operation was interrupted by a fatal signal
- ``EINVAL``: unsupported version, hash algorithm, or block size; or
reserved bits are set; or the file descriptor refers to neither a
regular file nor a directory.
- ``EISDIR``: the file descriptor refers to a directory
- ``EKEYREJECTED``: the signature doesn't match the file
- ``EMSGSIZE``: the salt or signature is too long
- ``ENOKEY``: the fs-verity keyring doesn't contain the certificate
needed to verify the signature
- ``ENOPKG``: fs-verity recognizes the hash algorithm, but it's not
available in the kernel's crypto API as currently configured (e.g.
for SHA-512, missing CONFIG_CRYPTO_SHA512).
- ``ENOTTY``: this type of filesystem does not implement fs-verity
- ``EOPNOTSUPP``: the kernel was not configured with fs-verity
support; or the filesystem superblock has not had the 'verity'
feature enabled on it; or the filesystem does not support fs-verity
on this file. (See `Filesystem support`_.)
- ``EPERM``: the file is append-only; or, a signature is required and
one was not provided.
- ``EROFS``: the filesystem is read-only
- ``ETXTBSY``: someone has the file open for writing. This can be the
caller's file descriptor, another open file descriptor, or the file
reference held by a writable memory map.
FS_IOC_MEASURE_VERITY
---------------------
The FS_IOC_MEASURE_VERITY ioctl retrieves the measurement of a verity
file. The file measurement is a digest that cryptographically
identifies the file contents that are being enforced on reads.
This ioctl takes in a pointer to a variable-length structure::
struct fsverity_digest {
__u16 digest_algorithm;
__u16 digest_size; /* input/output */
__u8 digest[];
};
``digest_size`` is an input/output field. On input, it must be
initialized to the number of bytes allocated for the variable-length
``digest`` field.
On success, 0 is returned and the kernel fills in the structure as
follows:
- ``digest_algorithm`` will be the hash algorithm used for the file
measurement. It will match ``fsverity_enable_arg::hash_algorithm``.
- ``digest_size`` will be the size of the digest in bytes, e.g. 32
for SHA-256. (This can be redundant with ``digest_algorithm``.)
- ``digest`` will be the actual bytes of the digest.
FS_IOC_MEASURE_VERITY is guaranteed to execute in constant time,
regardless of the size of the file.
FS_IOC_MEASURE_VERITY can fail with the following errors:
- ``EFAULT``: the caller provided inaccessible memory
- ``ENODATA``: the file is not a verity file
- ``ENOTTY``: this type of filesystem does not implement fs-verity
- ``EOPNOTSUPP``: the kernel was not configured with fs-verity
support, or the filesystem superblock has not had the 'verity'
feature enabled on it. (See `Filesystem support`_.)
- ``EOVERFLOW``: the digest is longer than the specified
``digest_size`` bytes. Try providing a larger buffer.
FS_IOC_GETFLAGS
---------------
The existing ioctl FS_IOC_GETFLAGS (which isn't specific to fs-verity)
can also be used to check whether a file has fs-verity enabled or not.
To do so, check for FS_VERITY_FL (0x00100000) in the returned flags.
The verity flag is not settable via FS_IOC_SETFLAGS. You must use
FS_IOC_ENABLE_VERITY instead, since parameters must be provided.
Accessing verity files
======================
Applications can transparently access a verity file just like a
non-verity one, with the following exceptions:
- Verity files are readonly. They cannot be opened for writing or
truncate()d, even if the file mode bits allow it. Attempts to do
one of these things will fail with EPERM. However, changes to
metadata such as owner, mode, timestamps, and xattrs are still
allowed, since these are not measured by fs-verity. Verity files
can also still be renamed, deleted, and linked to.
- Direct I/O is not supported on verity files. Attempts to use direct
I/O on such files will fall back to buffered I/O.
- DAX (Direct Access) is not supported on verity files, because this
would circumvent the data verification.
- Reads of data that doesn't match the verity Merkle tree will fail
with EIO (for read()) or SIGBUS (for mmap() reads).
- If the sysctl "fs.verity.require_signatures" is set to 1 and the
file's verity measurement is not signed by a key in the fs-verity
keyring, then opening the file will fail. See `Built-in signature
verification`_.
Direct access to the Merkle tree is not supported. Therefore, if a
verity file is copied, or is backed up and restored, then it will lose
its "verity"-ness. fs-verity is primarily meant for files like
executables that are managed by a package manager.
File measurement computation
============================
This section describes how fs-verity hashes the file contents using a
Merkle tree to produce the "file measurement" which cryptographically
identifies the file contents. This algorithm is the same for all
filesystems that support fs-verity.
Userspace only needs to be aware of this algorithm if it needs to
compute the file measurement itself, e.g. in order to sign the file.
.. _fsverity_merkle_tree:
Merkle tree
-----------
The file contents is divided into blocks, where the block size is
configurable but is usually 4096 bytes. The end of the last block is
zero-padded if needed. Each block is then hashed, producing the first
level of hashes. Then, the hashes in this first level are grouped
into 'blocksize'-byte blocks (zero-padding the ends as needed) and
these blocks are hashed, producing the second level of hashes. This
proceeds up the tree until only a single block remains. The hash of
this block is the "Merkle tree root hash".
If the file fits in one block and is nonempty, then the "Merkle tree
root hash" is simply the hash of the single data block. If the file
is empty, then the "Merkle tree root hash" is all zeroes.
The "blocks" here are not necessarily the same as "filesystem blocks".
If a salt was specified, then it's zero-padded to the closest multiple
of the input size of the hash algorithm's compression function, e.g.
64 bytes for SHA-256 or 128 bytes for SHA-512. The padded salt is
prepended to every data or Merkle tree block that is hashed.
The purpose of the block padding is to cause every hash to be taken
over the same amount of data, which simplifies the implementation and
keeps open more possibilities for hardware acceleration. The purpose
of the salt padding is to make the salting "free" when the salted hash
state is precomputed, then imported for each hash.
Example: in the recommended configuration of SHA-256 and 4K blocks,
128 hash values fit in each block. Thus, each level of the Merkle
tree is approximately 128 times smaller than the previous, and for
large files the Merkle tree's size converges to approximately 1/127 of
the original file size. However, for small files, the padding is
significant, making the space overhead proportionally more.
.. _fsverity_descriptor:
fs-verity descriptor
--------------------
By itself, the Merkle tree root hash is ambiguous. For example, it
can't a distinguish a large file from a small second file whose data
is exactly the top-level hash block of the first file. Ambiguities
also arise from the convention of padding to the next block boundary.
To solve this problem, the verity file measurement is actually
computed as a hash of the following structure, which contains the
Merkle tree root hash as well as other fields such as the file size::
struct fsverity_descriptor {
__u8 version; /* must be 1 */
__u8 hash_algorithm; /* Merkle tree hash algorithm */
__u8 log_blocksize; /* log2 of size of data and tree blocks */
__u8 salt_size; /* size of salt in bytes; 0 if none */
__le32 sig_size; /* must be 0 */
__le64 data_size; /* size of file the Merkle tree is built over */
__u8 root_hash[64]; /* Merkle tree root hash */
__u8 salt[32]; /* salt prepended to each hashed block */
__u8 __reserved[144]; /* must be 0's */
};
Note that the ``sig_size`` field must be set to 0 for the purpose of
computing the file measurement, even if a signature was provided (or
will be provided) to `FS_IOC_ENABLE_VERITY`_.
Built-in signature verification
===============================
With CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y, fs-verity supports putting
a portion of an authentication policy (see `Use cases`_) in the
kernel. Specifically, it adds support for:
1. At fs-verity module initialization time, a keyring ".fs-verity" is
created. The root user can add trusted X.509 certificates to this
keyring using the add_key() system call, then (when done)
optionally use keyctl_restrict_keyring() to prevent additional
certificates from being added.
2. `FS_IOC_ENABLE_VERITY`_ accepts a pointer to a PKCS#7 formatted
detached signature in DER format of the file measurement. On
success, this signature is persisted alongside the Merkle tree.
Then, any time the file is opened, the kernel will verify the
file's actual measurement against this signature, using the
certificates in the ".fs-verity" keyring.
3. A new sysctl "fs.verity.require_signatures" is made available.
When set to 1, the kernel requires that all verity files have a
correctly signed file measurement as described in (2).
File measurements must be signed in the following format, which is
similar to the structure used by `FS_IOC_MEASURE_VERITY`_::
struct fsverity_signed_digest {
char magic[8]; /* must be "FSVerity" */
__le16 digest_algorithm;
__le16 digest_size;
__u8 digest[];
};
fs-verity's built-in signature verification support is meant as a
relatively simple mechanism that can be used to provide some level of
authenticity protection for verity files, as an alternative to doing
the signature verification in userspace or using IMA-appraisal.
However, with this mechanism, userspace programs still need to check
that the verity bit is set, and there is no protection against verity
files being swapped around.
Filesystem support
==================
fs-verity is currently supported by the ext4 and f2fs filesystems.
The CONFIG_FS_VERITY kconfig option must be enabled to use fs-verity
on either filesystem.
``include/linux/fsverity.h`` declares the interface between the
``fs/verity/`` support layer and filesystems. Briefly, filesystems
must provide an ``fsverity_operations`` structure that provides
methods to read and write the verity metadata to a filesystem-specific
location, including the Merkle tree blocks and
``fsverity_descriptor``. Filesystems must also call functions in
``fs/verity/`` at certain times, such as when a file is opened or when
pages have been read into the pagecache. (See `Verifying data`_.)
ext4
----
ext4 supports fs-verity since Linux TODO and e2fsprogs v1.45.2.
To create verity files on an ext4 filesystem, the filesystem must have
been formatted with ``-O verity`` or had ``tune2fs -O verity`` run on
it. "verity" is an RO_COMPAT filesystem feature, so once set, old
kernels will only be able to mount the filesystem readonly, and old
versions of e2fsck will be unable to check the filesystem. Moreover,
currently ext4 only supports mounting a filesystem with the "verity"
feature when its block size is equal to PAGE_SIZE (often 4096 bytes).
ext4 sets the EXT4_VERITY_FL on-disk inode flag on verity files. It
can only be set by `FS_IOC_ENABLE_VERITY`_, and it cannot be cleared.
ext4 also supports encryption, which can be used simultaneously with
fs-verity. In this case, the plaintext data is verified rather than
the ciphertext. This is necessary in order to make the file
measurement meaningful, since every file is encrypted differently.
ext4 stores the verity metadata (Merkle tree and fsverity_descriptor)
past the end of the file, starting at the first 64K boundary beyond
i_size. This approach works because (a) verity files are readonly,
and (b) pages fully beyond i_size aren't visible to userspace but can
be read/written internally by ext4 with only some relatively small
changes to ext4. This approach avoids having to depend on the
EA_INODE feature and on rearchitecturing ext4's xattr support to
support paging multi-gigabyte xattrs into memory, and to support
encrypting xattrs. Note that the verity metadata *must* be encrypted
when the file is, since it contains hashes of the plaintext data.
Currently, ext4 verity only supports the case where the Merkle tree
block size, filesystem block size, and page size are all the same. It
also only supports extent-based files.
f2fs
----
f2fs supports fs-verity since Linux TODO and f2fs-tools v1.11.0.
To create verity files on an f2fs filesystem, the filesystem must have
been formatted with ``-O verity``.
f2fs sets the FADVISE_VERITY_BIT on-disk inode flag on verity files.
It can only be set by `FS_IOC_ENABLE_VERITY`_, and it cannot be
cleared.
Like ext4, f2fs stores the verity metadata (Merkle tree and
fsverity_descriptor) past the end of the file, starting at the first
64K boundary beyond i_size. See explanation for ext4 above.
Moreover, f2fs supports at most 4096 bytes of xattr entries per inode
which wouldn't be enough for even a single Merkle tree block.
Currently, f2fs verity only supports a Merkle tree block size of 4096.
Also, f2fs doesn't support enabling verity on files that currently
have atomic or volatile writes pending.
Implementation details
======================
Verifying data
--------------
fs-verity ensures that all reads of a verity file's data are verified,
regardless of which syscall is used to do the read (e.g. mmap(),
read(), pread()) and regardless of whether it's the first read or a
later read (unless the later read can return cached data that was
already verified). Below, we describe how filesystems implement this.
Pagecache
~~~~~~~~~
For filesystems using Linux's pagecache, the ``->readpage()`` and
``->readpages()`` methods must be modified to verify pages before they
are marked Uptodate. Merely hooking ``->read_iter()`` would be
insufficient, since ``->read_iter()`` is not used for memory maps.
Therefore, fs/verity/ provides a function fsverity_verify_page() which
verifies a page that has been read into the pagecache of a verity
inode, but is still locked and not Uptodate, so it's not yet readable
by userspace. As needed to do the verification,
fsverity_verify_page() will call back into the filesystem to read
Merkle tree pages via fsverity_operations::read_merkle_tree_page().
fsverity_verify_page() returns false if verification failed; in this
case, the filesystem must not set the page Uptodate. Following this,
as per the usual Linux pagecache behavior, attempts by userspace to
read() from the part of the file containing the page will fail with
EIO, and accesses to the page within a memory map will raise SIGBUS.
fsverity_verify_page() currently only supports the case where the
Merkle tree block size is equal to PAGE_SIZE (often 4096 bytes).
In principle, fsverity_verify_page() verifies the entire path in the
Merkle tree from the data page to the root hash. However, for
efficiency the filesystem may cache the hash pages. Therefore,
fsverity_verify_page() only ascends the tree reading hash pages until
an already-verified hash page is seen, as indicated by the PageChecked
bit being set. It then verifies the path to that page.
This optimization, which is also used by dm-verity, results in
excellent sequential read performance. This is because usually (e.g.
127 in 128 times for 4K blocks and SHA-256) the hash page from the
bottom level of the tree will already be cached and checked from
reading a previous data page. However, random reads perform worse.
Block device based filesystems
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Block device based filesystems (e.g. ext4 and f2fs) in Linux also use
the pagecache, so the above subsection applies too. However, they
also usually read many pages from a file at once, grouped into a
structure called a "bio". To make it easier for these types of
filesystems to support fs-verity, fs/verity/ also provides a function
fsverity_verify_bio() which verifies all pages in a bio.
ext4 and f2fs also support encryption. If a verity file is also
encrypted, the pages must be decrypted before being verified. To
support this, these filesystems allocate a "post-read context" for
each bio and store it in ``->bi_private``::
struct bio_post_read_ctx {
struct bio *bio;
struct work_struct work;
unsigned int cur_step;
unsigned int enabled_steps;
};
``enabled_steps`` is a bitmask that specifies whether decryption,
verity, or both is enabled. After the bio completes, for each needed
postprocessing step the filesystem enqueues the bio_post_read_ctx on a
workqueue, and then the workqueue work does the decryption or
verification. Finally, pages where no decryption or verity error
occurred are marked Uptodate, and the pages are unlocked.
Files on ext4 and f2fs may contain holes. Normally, ``->readpages()``
simply zeroes holes and sets the corresponding pages Uptodate; no bios
are issued. To prevent this case from bypassing fs-verity, these
filesystems use fsverity_verify_page() to verify hole pages.
ext4 and f2fs disable direct I/O on verity files, since otherwise
direct I/O would bypass fs-verity. (They also do the same for
encrypted files.)
Userspace utility
=================
This document focuses on the kernel, but a userspace utility for
fs-verity can be found at:
https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/fsverity-utils.git
See the README.md file in the fsverity-utils source tree for details,
including examples of setting up fs-verity protected files.
Tests
=====
To test fs-verity, use xfstests. For example, using `kvm-xfstests
<https://github.com/tytso/xfstests-bld/blob/master/Documentation/kvm-quickstart.md>`_::
kvm-xfstests -c ext4,f2fs -g verity
FAQ
===
This section answers frequently asked questions about fs-verity that
weren't already directly answered in other parts of this document.
:Q: Why isn't fs-verity part of IMA?
:A: fs-verity and IMA (Integrity Measurement Architecture) have
different focuses. fs-verity is a filesystem-level mechanism for
hashing individual files using a Merkle tree. In contrast, IMA
specifies a system-wide policy that specifies which files are
hashed and what to do with those hashes, such as log them,
authenticate them, or add them to a measurement list.
IMA is planned to support the fs-verity hashing mechanism as an
alternative to doing full file hashes, for people who want the
performance and security benefits of the Merkle tree based hash.
But it doesn't make sense to force all uses of fs-verity to be
through IMA. As a standalone filesystem feature, fs-verity
already meets many users' needs, and it's testable like other
filesystem features e.g. with xfstests.
:Q: Isn't fs-verity useless because the attacker can just modify the
hashes in the Merkle tree, which is stored on-disk?
:A: To verify the authenticity of an fs-verity file you must verify
the authenticity of the "file measurement", which is basically the
root hash of the Merkle tree. See `Use cases`_.
:Q: Isn't fs-verity useless because the attacker can just replace a
verity file with a non-verity one?
:A: See `Use cases`_. In the initial use case, it's really trusted
userspace code that authenticates the files; fs-verity is just a
tool to do this job efficiently and securely. The trusted
userspace code will consider non-verity files to be inauthentic.
:Q: Why does the Merkle tree need to be stored on-disk? Couldn't you
store just the root hash?
:A: If the Merkle tree wasn't stored on-disk, then you'd have to
compute the entire tree when the file is first accessed, even if
just one byte is being read. This is a fundamental consequence of
how Merkle tree hashing works. To verify a leaf node, you need to
verify the whole path to the root hash, including the root node
(the thing which the root hash is a hash of). But if the root
node isn't stored on-disk, you have to compute it by hashing its
children, and so on until you've actually hashed the entire file.
That defeats most of the point of doing a Merkle tree-based hash,
since if you have to hash the whole file ahead of time anyway,
then you could simply do sha256(file) instead. That would be much
simpler, and a bit faster too.
It's true that an in-memory Merkle tree could still provide the
advantage of verification on every read rather than just on the
first read. However, it would be inefficient because every time a
hash page gets evicted (you can't pin the entire Merkle tree into
memory, since it may be very large), in order to restore it you
again need to hash everything below it in the tree. This again
defeats most of the point of doing a Merkle tree-based hash, since
a single block read could trigger re-hashing gigabytes of data.
:Q: But couldn't you store just the leaf nodes and compute the rest?
:A: See previous answer; this really just moves up one level, since
one could alternatively interpret the data blocks as being the
leaf nodes of the Merkle tree. It's true that the tree can be
computed much faster if the leaf level is stored rather than just
the data, but that's only because each level is less than 1% the
size of the level below (assuming the recommended settings of
SHA-256 and 4K blocks). For the exact same reason, by storing
"just the leaf nodes" you'd already be storing over 99% of the
tree, so you might as well simply store the whole tree.
:Q: Can the Merkle tree be built ahead of time, e.g. distributed as
part of a package that is installed to many computers?
:A: This isn't currently supported. It was part of the original
design, but was removed to simplify the kernel UAPI and because it
wasn't a critical use case. Files are usually installed once and
used many times, and cryptographic hashing is somewhat fast on
most modern processors.
:Q: Why doesn't fs-verity support writes?
:A: Write support would be very difficult and would require a
completely different design, so it's well outside the scope of
fs-verity. Write support would require:
- A way to maintain consistency between the data and hashes,
including all levels of hashes, since corruption after a crash
(especially of potentially the entire file!) is unacceptable.
The main options for solving this are data journalling,
copy-on-write, and log-structured volume. But it's very hard to
retrofit existing filesystems with new consistency mechanisms.
Data journalling is available on ext4, but is very slow.
- Rebuilding the the Merkle tree after every write, which would be
extremely inefficient. Alternatively, a different authenticated
dictionary structure such as an "authenticated skiplist" could
be used. However, this would be far more complex.
Compare it to dm-verity vs. dm-integrity. dm-verity is very
simple: the kernel just verifies read-only data against a
read-only Merkle tree. In contrast, dm-integrity supports writes
but is slow, is much more complex, and doesn't actually support
full-device authentication since it authenticates each sector
independently, i.e. there is no "root hash". It doesn't really
make sense for the same device-mapper target to support these two
very different cases; the same applies to fs-verity.
:Q: Since verity files are immutable, why isn't the immutable bit set?
:A: The existing "immutable" bit (FS_IMMUTABLE_FL) already has a
specific set of semantics which not only make the file contents
read-only, but also prevent the file from being deleted, renamed,
linked to, or having its owner or mode changed. These extra
properties are unwanted for fs-verity, so reusing the immutable
bit isn't appropriate.
:Q: Why does the API use ioctls instead of setxattr() and getxattr()?
:A: Abusing the xattr interface for basically arbitrary syscalls is
heavily frowned upon by most of the Linux filesystem developers.
An xattr should really just be an xattr on-disk, not an API to
e.g. magically trigger construction of a Merkle tree.
:Q: Does fs-verity support remote filesystems?
:A: Only ext4 and f2fs support is implemented currently, but in
principle any filesystem that can store per-file verity metadata
can support fs-verity, regardless of whether it's local or remote.
Some filesystems may have fewer options of where to store the
verity metadata; one possibility is to store it past the end of
the file and "hide" it from userspace by manipulating i_size. The
data verification functions provided by ``fs/verity/`` also assume
that the filesystem uses the Linux pagecache, but both local and
remote filesystems normally do so.
:Q: Why is anything filesystem-specific at all? Shouldn't fs-verity
be implemented entirely at the VFS level?
:A: There are many reasons why this is not possible or would be very
difficult, including the following:
- To prevent bypassing verification, pages must not be marked
Uptodate until they've been verified. Currently, each
filesystem is responsible for marking pages Uptodate via
``->readpages()``. Therefore, currently it's not possible for
the VFS to do the verification on its own. Changing this would
require significant changes to the VFS and all filesystems.
- It would require defining a filesystem-independent way to store
the verity metadata. Extended attributes don't work for this
because (a) the Merkle tree may be gigabytes, but many
filesystems assume that all xattrs fit into a single 4K
filesystem block, and (b) ext4 and f2fs encryption doesn't
encrypt xattrs, yet the Merkle tree *must* be encrypted when the
file contents are, because it stores hashes of the plaintext
file contents.
So the verity metadata would have to be stored in an actual
file. Using a separate file would be very ugly, since the
metadata is fundamentally part of the file to be protected, and
it could cause problems where users could delete the real file
but not the metadata file or vice versa. On the other hand,
having it be in the same file would break applications unless
filesystems' notion of i_size were divorced from the VFS's,
which would be complex and require changes to all filesystems.
- It's desirable that FS_IOC_ENABLE_VERITY uses the filesystem's
transaction mechanism so that either the file ends up with
verity enabled, or no changes were made. Allowing intermediate
states to occur after a crash may cause problems.

View File

@@ -359,3 +359,4 @@ encryption of files and directories.
:maxdepth: 2
fscrypt
fsverity

View File

@@ -5999,6 +5999,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tytso/fscrypt.git
S: Supported
F: fs/crypto/
F: include/linux/fscrypt*.h
F: include/uapi/linux/fscrypt.h
F: Documentation/filesystems/fscrypt.rst
FSI-ATTACHED I2C DRIVER
@@ -6017,6 +6018,18 @@ S: Maintained
F: fs/notify/
F: include/linux/fsnotify*.h
FSVERITY: READ-ONLY FILE-BASED AUTHENTICITY PROTECTION
M: Eric Biggers <ebiggers@kernel.org>
M: Theodore Y. Ts'o <tytso@mit.edu>
L: linux-fscrypt@vger.kernel.org
Q: https://patchwork.kernel.org/project/linux-fscrypt/list/
T: git git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt.git fsverity
S: Supported
F: fs/verity/
F: include/linux/fsverity.h
F: include/uapi/linux/fsverity.h
F: Documentation/filesystems/fsverity.rst
FUJITSU LAPTOP EXTRAS
M: Jonathan Woithe <jwoithe@just42.net>
L: platform-driver-x86@vger.kernel.org

View File

@@ -396,7 +396,6 @@ CONFIG_HARDENED_USERCOPY=y
CONFIG_SECURITY_SELINUX=y
CONFIG_CRYPTO_ADIANTUM=y
CONFIG_CRYPTO_MD4=y
CONFIG_CRYPTO_SHA512=y
CONFIG_CRYPTO_LZ4=y
CONFIG_CRYPTO_ZSTD=y
CONFIG_CRYPTO_ANSI_CPRNG=y

View File

@@ -333,7 +333,6 @@ CONFIG_SECURITY_NETWORK=y
CONFIG_HARDENED_USERCOPY=y
CONFIG_SECURITY_SELINUX=y
CONFIG_CRYPTO_ADIANTUM=y
CONFIG_CRYPTO_SHA512=y
CONFIG_CRYPTO_LZ4=y
CONFIG_CRYPTO_ZSTD=y
CONFIG_CRYPTO_ANSI_CPRNG=y

View File

@@ -105,6 +105,8 @@ config MANDATORY_FILE_LOCKING
source "fs/crypto/Kconfig"
source "fs/verity/Kconfig"
source "fs/notify/Kconfig"
source "fs/quota/Kconfig"
@@ -314,5 +316,6 @@ endif # NETWORK_FILESYSTEMS
source "fs/nls/Kconfig"
source "fs/dlm/Kconfig"
source "fs/unicode/Kconfig"
endmenu

View File

@@ -32,6 +32,7 @@ obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
obj-$(CONFIG_AIO) += aio.o
obj-$(CONFIG_FS_DAX) += dax.o
obj-$(CONFIG_FS_ENCRYPTION) += crypto/
obj-$(CONFIG_FS_VERITY) += verity/
obj-$(CONFIG_FILE_LOCKING) += locks.o
obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o
obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o
@@ -91,6 +92,7 @@ obj-$(CONFIG_EXPORTFS) += exportfs/
obj-$(CONFIG_NFSD) += nfsd/
obj-$(CONFIG_LOCKD) += lockd/
obj-$(CONFIG_NLS) += nls/
obj-$(CONFIG_UNICODE) += unicode/
obj-$(CONFIG_SYSV_FS) += sysv/
obj-$(CONFIG_CIFS) += cifs/
obj-$(CONFIG_HPFS_FS) += hpfs/

View File

@@ -6,6 +6,8 @@ config FS_ENCRYPTION
select CRYPTO_ECB
select CRYPTO_XTS
select CRYPTO_CTS
select CRYPTO_SHA512
select CRYPTO_HMAC
select KEYS
help
Enable encryption of files and directories. This

View File

@@ -1,4 +1,12 @@
obj-$(CONFIG_FS_ENCRYPTION) += fscrypto.o
fscrypto-y := crypto.o fname.o hooks.o keyinfo.o policy.o
fscrypto-y := crypto.o \
fname.o \
hkdf.o \
hooks.o \
keyring.o \
keysetup.o \
keysetup_v1.o \
policy.o
fscrypto-$(CONFIG_BLOCK) += bio.o

View File

@@ -140,7 +140,7 @@ void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num,
memset(iv, 0, ci->ci_mode->ivsize);
iv->lblk_num = cpu_to_le64(lblk_num);
if (ci->ci_flags & FS_POLICY_FLAG_DIRECT_KEY)
if (fscrypt_is_direct_key_policy(&ci->ci_policy))
memcpy(iv->nonce, ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE);
if (ci->ci_essiv_tfm != NULL)
@@ -187,10 +187,8 @@ int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw,
res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
skcipher_request_free(req);
if (res) {
fscrypt_err(inode->i_sb,
"%scryption failed for inode %lu, block %llu: %d",
(rw == FS_DECRYPT ? "de" : "en"),
inode->i_ino, lblk_num, res);
fscrypt_err(inode, "%scryption failed for block %llu: %d",
(rw == FS_DECRYPT ? "De" : "En"), lblk_num, res);
return res;
}
return 0;
@@ -452,7 +450,7 @@ fail:
return res;
}
void fscrypt_msg(struct super_block *sb, const char *level,
void fscrypt_msg(const struct inode *inode, const char *level,
const char *fmt, ...)
{
static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
@@ -466,8 +464,9 @@ void fscrypt_msg(struct super_block *sb, const char *level,
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
if (sb)
printk("%sfscrypt (%s): %pV\n", level, sb->s_id, &vaf);
if (inode)
printk("%sfscrypt (%s, inode %lu): %pV\n",
level, inode->i_sb->s_id, inode->i_ino, &vaf);
else
printk("%sfscrypt: %pV\n", level, &vaf);
va_end(args);
@@ -478,6 +477,8 @@ void fscrypt_msg(struct super_block *sb, const char *level,
*/
static int __init fscrypt_init(void)
{
int err = -ENOMEM;
/*
* Use an unbound workqueue to allow bios to be decrypted in parallel
* even when they happen to complete on the same CPU. This sacrifices
@@ -500,31 +501,19 @@ static int __init fscrypt_init(void)
if (!fscrypt_info_cachep)
goto fail_free_ctx;
err = fscrypt_init_keyring();
if (err)
goto fail_free_info;
return 0;
fail_free_info:
kmem_cache_destroy(fscrypt_info_cachep);
fail_free_ctx:
kmem_cache_destroy(fscrypt_ctx_cachep);
fail_free_queue:
destroy_workqueue(fscrypt_read_workqueue);
fail:
return -ENOMEM;
return err;
}
module_init(fscrypt_init)
/**
* fscrypt_exit() - Shutdown the fs encryption system
*/
static void __exit fscrypt_exit(void)
{
fscrypt_destroy();
if (fscrypt_read_workqueue)
destroy_workqueue(fscrypt_read_workqueue);
kmem_cache_destroy(fscrypt_ctx_cachep);
kmem_cache_destroy(fscrypt_info_cachep);
fscrypt_essiv_cleanup();
}
module_exit(fscrypt_exit);
MODULE_LICENSE("GPL");
late_initcall(fscrypt_init)

View File

@@ -71,9 +71,7 @@ int fname_encrypt(struct inode *inode, const struct qstr *iname,
res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
skcipher_request_free(req);
if (res < 0) {
fscrypt_err(inode->i_sb,
"Filename encryption failed for inode %lu: %d",
inode->i_ino, res);
fscrypt_err(inode, "Filename encryption failed: %d", res);
return res;
}
@@ -117,9 +115,7 @@ static int fname_decrypt(struct inode *inode,
res = crypto_wait_req(crypto_skcipher_decrypt(req), &wait);
skcipher_request_free(req);
if (res < 0) {
fscrypt_err(inode->i_sb,
"Filename decryption failed for inode %lu: %d",
inode->i_ino, res);
fscrypt_err(inode, "Filename decryption failed: %d", res);
return res;
}
@@ -127,44 +123,45 @@ static int fname_decrypt(struct inode *inode,
return 0;
}
static const char *lookup_table =
static const char lookup_table[65] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
#define BASE64_CHARS(nbytes) DIV_ROUND_UP((nbytes) * 4, 3)
/**
* digest_encode() -
* base64_encode() -
*
* Encodes the input digest using characters from the set [a-zA-Z0-9_+].
* Encodes the input string using characters from the set [A-Za-z0-9+,].
* The encoded string is roughly 4/3 times the size of the input string.
*
* Return: length of the encoded string
*/
static int digest_encode(const char *src, int len, char *dst)
static int base64_encode(const u8 *src, int len, char *dst)
{
int i = 0, bits = 0, ac = 0;
int i, bits = 0, ac = 0;
char *cp = dst;
while (i < len) {
ac += (((unsigned char) src[i]) << bits);
for (i = 0; i < len; i++) {
ac += src[i] << bits;
bits += 8;
do {
*cp++ = lookup_table[ac & 0x3f];
ac >>= 6;
bits -= 6;
} while (bits >= 6);
i++;
}
if (bits)
*cp++ = lookup_table[ac & 0x3f];
return cp - dst;
}
static int digest_decode(const char *src, int len, char *dst)
static int base64_decode(const char *src, int len, u8 *dst)
{
int i = 0, bits = 0, ac = 0;
int i, bits = 0, ac = 0;
const char *p;
char *cp = dst;
u8 *cp = dst;
while (i < len) {
for (i = 0; i < len; i++) {
p = strchr(lookup_table, src[i]);
if (p == NULL || src[i] == 0)
return -2;
@@ -175,7 +172,6 @@ static int digest_decode(const char *src, int len, char *dst)
ac >>= 8;
bits -= 8;
}
i++;
}
if (ac)
return -1;
@@ -185,8 +181,9 @@ static int digest_decode(const char *src, int len, char *dst)
bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len,
u32 max_len, u32 *encrypted_len_ret)
{
int padding = 4 << (inode->i_crypt_info->ci_flags &
FS_POLICY_FLAGS_PAD_MASK);
const struct fscrypt_info *ci = inode->i_crypt_info;
int padding = 4 << (fscrypt_policy_flags(&ci->ci_policy) &
FSCRYPT_POLICY_FLAGS_PAD_MASK);
u32 encrypted_len;
if (orig_len > max_len)
@@ -272,7 +269,7 @@ int fscrypt_fname_disk_to_usr(struct inode *inode,
return fname_decrypt(inode, iname, oname);
if (iname->len <= FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE) {
oname->len = digest_encode(iname->name, iname->len,
oname->len = base64_encode(iname->name, iname->len,
oname->name);
return 0;
}
@@ -287,7 +284,7 @@ int fscrypt_fname_disk_to_usr(struct inode *inode,
FSCRYPT_FNAME_DIGEST(iname->name, iname->len),
FSCRYPT_FNAME_DIGEST_SIZE);
oname->name[0] = '_';
oname->len = 1 + digest_encode((const char *)&digested_name,
oname->len = 1 + base64_encode((const u8 *)&digested_name,
sizeof(digested_name), oname->name + 1);
return 0;
}
@@ -380,8 +377,8 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
if (fname->crypto_buf.name == NULL)
return -ENOMEM;
ret = digest_decode(iname->name + digested, iname->len - digested,
fname->crypto_buf.name);
ret = base64_decode(iname->name + digested, iname->len - digested,
fname->crypto_buf.name);
if (ret < 0) {
ret = -ENOENT;
goto errout;

View File

@@ -4,9 +4,8 @@
*
* Copyright (C) 2015, Google, Inc.
*
* This contains encryption key functions.
*
* Written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar, 2015.
* Originally written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar.
* Heavily modified since then.
*/
#ifndef _FSCRYPT_PRIVATE_H
@@ -15,30 +14,133 @@
#include <linux/fscrypt.h>
#include <crypto/hash.h>
/* Encryption parameters */
#define CONST_STRLEN(str) (sizeof(str) - 1)
#define FS_KEY_DERIVATION_NONCE_SIZE 16
/**
* Encryption context for inode
*
* Protector format:
* 1 byte: Protector format (1 = this version)
* 1 byte: File contents encryption mode
* 1 byte: File names encryption mode
* 1 byte: Flags
* 8 bytes: Master Key descriptor
* 16 bytes: Encryption Key derivation nonce
*/
struct fscrypt_context {
u8 format;
#define FSCRYPT_MIN_KEY_SIZE 16
#define FSCRYPT_CONTEXT_V1 1
#define FSCRYPT_CONTEXT_V2 2
struct fscrypt_context_v1 {
u8 version; /* FSCRYPT_CONTEXT_V1 */
u8 contents_encryption_mode;
u8 filenames_encryption_mode;
u8 flags;
u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
} __packed;
};
#define FS_ENCRYPTION_CONTEXT_FORMAT_V1 1
struct fscrypt_context_v2 {
u8 version; /* FSCRYPT_CONTEXT_V2 */
u8 contents_encryption_mode;
u8 filenames_encryption_mode;
u8 flags;
u8 __reserved[4];
u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE];
u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
};
/**
* fscrypt_context - the encryption context of an inode
*
* This is the on-disk equivalent of an fscrypt_policy, stored alongside each
* encrypted file usually in a hidden extended attribute. It contains the
* fields from the fscrypt_policy, in order to identify the encryption algorithm
* and key with which the file is encrypted. It also contains a nonce that was
* randomly generated by fscrypt itself; this is used as KDF input or as a tweak
* to cause different files to be encrypted differently.
*/
union fscrypt_context {
u8 version;
struct fscrypt_context_v1 v1;
struct fscrypt_context_v2 v2;
};
/*
* Return the size expected for the given fscrypt_context based on its version
* number, or 0 if the context version is unrecognized.
*/
static inline int fscrypt_context_size(const union fscrypt_context *ctx)
{
switch (ctx->version) {
case FSCRYPT_CONTEXT_V1:
BUILD_BUG_ON(sizeof(ctx->v1) != 28);
return sizeof(ctx->v1);
case FSCRYPT_CONTEXT_V2:
BUILD_BUG_ON(sizeof(ctx->v2) != 40);
return sizeof(ctx->v2);
}
return 0;
}
#undef fscrypt_policy
union fscrypt_policy {
u8 version;
struct fscrypt_policy_v1 v1;
struct fscrypt_policy_v2 v2;
};
/*
* Return the size expected for the given fscrypt_policy based on its version
* number, or 0 if the policy version is unrecognized.
*/
static inline int fscrypt_policy_size(const union fscrypt_policy *policy)
{
switch (policy->version) {
case FSCRYPT_POLICY_V1:
return sizeof(policy->v1);
case FSCRYPT_POLICY_V2:
return sizeof(policy->v2);
}
return 0;
}
/* Return the contents encryption mode of a valid encryption policy */
static inline u8
fscrypt_policy_contents_mode(const union fscrypt_policy *policy)
{
switch (policy->version) {
case FSCRYPT_POLICY_V1:
return policy->v1.contents_encryption_mode;
case FSCRYPT_POLICY_V2:
return policy->v2.contents_encryption_mode;
}
BUG();
}
/* Return the filenames encryption mode of a valid encryption policy */
static inline u8
fscrypt_policy_fnames_mode(const union fscrypt_policy *policy)
{
switch (policy->version) {
case FSCRYPT_POLICY_V1:
return policy->v1.filenames_encryption_mode;
case FSCRYPT_POLICY_V2:
return policy->v2.filenames_encryption_mode;
}
BUG();
}
/* Return the flags (FSCRYPT_POLICY_FLAG*) of a valid encryption policy */
static inline u8
fscrypt_policy_flags(const union fscrypt_policy *policy)
{
switch (policy->version) {
case FSCRYPT_POLICY_V1:
return policy->v1.flags;
case FSCRYPT_POLICY_V2:
return policy->v2.flags;
}
BUG();
}
static inline bool
fscrypt_is_direct_key_policy(const union fscrypt_policy *policy)
{
return fscrypt_policy_flags(policy) & FSCRYPT_POLICY_FLAG_DIRECT_KEY;
}
/**
* For encrypted symlinks, the ciphertext length is stored at the beginning
@@ -68,23 +170,37 @@ struct fscrypt_info {
struct crypto_cipher *ci_essiv_tfm;
/*
* Encryption mode used for this inode. It corresponds to either
* ci_data_mode or ci_filename_mode, depending on the inode type.
* Encryption mode used for this inode. It corresponds to either the
* contents or filenames encryption mode, depending on the inode type.
*/
struct fscrypt_mode *ci_mode;
/*
* If non-NULL, then this inode uses a master key directly rather than a
* derived key, and ci_ctfm will equal ci_master_key->mk_ctfm.
* Otherwise, this inode uses a derived key.
*/
struct fscrypt_master_key *ci_master_key;
/* Back-pointer to the inode */
struct inode *ci_inode;
/* fields from the fscrypt_context */
u8 ci_data_mode;
u8 ci_filename_mode;
u8 ci_flags;
u8 ci_master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
/*
* The master key with which this inode was unlocked (decrypted). This
* will be NULL if the master key was found in a process-subscribed
* keyring rather than in the filesystem-level keyring.
*/
struct key *ci_master_key;
/*
* Link in list of inodes that were unlocked with the master key.
* Only used when ->ci_master_key is set.
*/
struct list_head ci_master_key_link;
/*
* If non-NULL, then encryption is done using the master key directly
* and ci_ctfm will equal ci_direct_key->dk_ctfm.
*/
struct fscrypt_direct_key *ci_direct_key;
/* The encryption policy used by this inode */
union fscrypt_policy ci_policy;
/* This inode's nonce, copied from the fscrypt_context */
u8 ci_nonce[FS_KEY_DERIVATION_NONCE_SIZE];
};
@@ -98,16 +214,16 @@ typedef enum {
static inline bool fscrypt_valid_enc_modes(u32 contents_mode,
u32 filenames_mode)
{
if (contents_mode == FS_ENCRYPTION_MODE_AES_128_CBC &&
filenames_mode == FS_ENCRYPTION_MODE_AES_128_CTS)
if (contents_mode == FSCRYPT_MODE_AES_128_CBC &&
filenames_mode == FSCRYPT_MODE_AES_128_CTS)
return true;
if (contents_mode == FS_ENCRYPTION_MODE_AES_256_XTS &&
filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS)
if (contents_mode == FSCRYPT_MODE_AES_256_XTS &&
filenames_mode == FSCRYPT_MODE_AES_256_CTS)
return true;
if (contents_mode == FS_ENCRYPTION_MODE_ADIANTUM &&
filenames_mode == FS_ENCRYPTION_MODE_ADIANTUM)
if (contents_mode == FSCRYPT_MODE_ADIANTUM &&
filenames_mode == FSCRYPT_MODE_ADIANTUM)
return true;
return false;
@@ -125,12 +241,12 @@ extern struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags);
extern const struct dentry_operations fscrypt_d_ops;
extern void __printf(3, 4) __cold
fscrypt_msg(struct super_block *sb, const char *level, const char *fmt, ...);
fscrypt_msg(const struct inode *inode, const char *level, const char *fmt, ...);
#define fscrypt_warn(sb, fmt, ...) \
fscrypt_msg(sb, KERN_WARNING, fmt, ##__VA_ARGS__)
#define fscrypt_err(sb, fmt, ...) \
fscrypt_msg(sb, KERN_ERR, fmt, ##__VA_ARGS__)
#define fscrypt_warn(inode, fmt, ...) \
fscrypt_msg((inode), KERN_WARNING, fmt, ##__VA_ARGS__)
#define fscrypt_err(inode, fmt, ...) \
fscrypt_msg((inode), KERN_ERR, fmt, ##__VA_ARGS__)
#define FSCRYPT_MAX_IV_SIZE 32
@@ -155,7 +271,172 @@ extern bool fscrypt_fname_encrypted_size(const struct inode *inode,
u32 orig_len, u32 max_len,
u32 *encrypted_len_ret);
/* keyinfo.c */
/* hkdf.c */
struct fscrypt_hkdf {
struct crypto_shash *hmac_tfm;
};
extern int fscrypt_init_hkdf(struct fscrypt_hkdf *hkdf, const u8 *master_key,
unsigned int master_key_size);
/*
* The list of contexts in which fscrypt uses HKDF. These values are used as
* the first byte of the HKDF application-specific info string to guarantee that
* info strings are never repeated between contexts. This ensures that all HKDF
* outputs are unique and cryptographically isolated, i.e. knowledge of one
* output doesn't reveal another.
*/
#define HKDF_CONTEXT_KEY_IDENTIFIER 1
#define HKDF_CONTEXT_PER_FILE_KEY 2
#define HKDF_CONTEXT_PER_MODE_KEY 3
extern int fscrypt_hkdf_expand(struct fscrypt_hkdf *hkdf, u8 context,
const u8 *info, unsigned int infolen,
u8 *okm, unsigned int okmlen);
extern void fscrypt_destroy_hkdf(struct fscrypt_hkdf *hkdf);
/* keyring.c */
/*
* fscrypt_master_key_secret - secret key material of an in-use master key
*/
struct fscrypt_master_key_secret {
/*
* For v2 policy keys: HKDF context keyed by this master key.
* For v1 policy keys: not set (hkdf.hmac_tfm == NULL).
*/
struct fscrypt_hkdf hkdf;
/* Size of the raw key in bytes. Set even if ->raw isn't set. */
u32 size;
/* For v1 policy keys: the raw key. Wiped for v2 policy keys. */
u8 raw[FSCRYPT_MAX_KEY_SIZE];
} __randomize_layout;
/*
* fscrypt_master_key - an in-use master key
*
* This represents a master encryption key which has been added to the
* filesystem and can be used to "unlock" the encrypted files which were
* encrypted with it.
*/
struct fscrypt_master_key {
/*
* The secret key material. After FS_IOC_REMOVE_ENCRYPTION_KEY is
* executed, this is wiped and no new inodes can be unlocked with this
* key; however, there may still be inodes in ->mk_decrypted_inodes
* which could not be evicted. As long as some inodes still remain,
* FS_IOC_REMOVE_ENCRYPTION_KEY can be retried, or
* FS_IOC_ADD_ENCRYPTION_KEY can add the secret again.
*
* Locking: protected by key->sem (outer) and mk_secret_sem (inner).
* The reason for two locks is that key->sem also protects modifying
* mk_users, which ranks it above the semaphore for the keyring key
* type, which is in turn above page faults (via keyring_read). But
* sometimes filesystems call fscrypt_get_encryption_info() from within
* a transaction, which ranks it below page faults. So we need a
* separate lock which protects mk_secret but not also mk_users.
*/
struct fscrypt_master_key_secret mk_secret;
struct rw_semaphore mk_secret_sem;
/*
* For v1 policy keys: an arbitrary key descriptor which was assigned by
* userspace (->descriptor).
*
* For v2 policy keys: a cryptographic hash of this key (->identifier).
*/
struct fscrypt_key_specifier mk_spec;
/*
* Keyring which contains a key of type 'key_type_fscrypt_user' for each
* user who has added this key. Normally each key will be added by just
* one user, but it's possible that multiple users share a key, and in
* that case we need to keep track of those users so that one user can't
* remove the key before the others want it removed too.
*
* This is NULL for v1 policy keys; those can only be added by root.
*
* Locking: in addition to this keyrings own semaphore, this is
* protected by the master key's key->sem, so we can do atomic
* search+insert. It can also be searched without taking any locks, but
* in that case the returned key may have already been removed.
*/
struct key *mk_users;
/*
* Length of ->mk_decrypted_inodes, plus one if mk_secret is present.
* Once this goes to 0, the master key is removed from ->s_master_keys.
* The 'struct fscrypt_master_key' will continue to live as long as the
* 'struct key' whose payload it is, but we won't let this reference
* count rise again.
*/
refcount_t mk_refcount;
/*
* List of inodes that were unlocked using this key. This allows the
* inodes to be evicted efficiently if the key is removed.
*/
struct list_head mk_decrypted_inodes;
spinlock_t mk_decrypted_inodes_lock;
/* Per-mode tfms for DIRECT_KEY policies, allocated on-demand */
struct crypto_skcipher *mk_mode_keys[__FSCRYPT_MODE_MAX + 1];
} __randomize_layout;
static inline bool
is_master_key_secret_present(const struct fscrypt_master_key_secret *secret)
{
/*
* The READ_ONCE() is only necessary for fscrypt_drop_inode() and
* fscrypt_key_describe(). These run in atomic context, so they can't
* take ->mk_secret_sem and thus 'secret' can change concurrently which
* would be a data race. But they only need to know whether the secret
* *was* present at the time of check, so READ_ONCE() suffices.
*/
return READ_ONCE(secret->size) != 0;
}
static inline const char *master_key_spec_type(
const struct fscrypt_key_specifier *spec)
{
switch (spec->type) {
case FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR:
return "descriptor";
case FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER:
return "identifier";
}
return "[unknown]";
}
static inline int master_key_spec_len(const struct fscrypt_key_specifier *spec)
{
switch (spec->type) {
case FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR:
return FSCRYPT_KEY_DESCRIPTOR_SIZE;
case FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER:
return FSCRYPT_KEY_IDENTIFIER_SIZE;
}
return 0;
}
extern struct key *
fscrypt_find_master_key(struct super_block *sb,
const struct fscrypt_key_specifier *mk_spec);
extern int fscrypt_verify_key_added(struct super_block *sb,
const u8 identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]);
extern int __init fscrypt_init_keyring(void);
/* keysetup.c */
struct fscrypt_mode {
const char *friendly_name;
@@ -166,6 +447,36 @@ struct fscrypt_mode {
bool needs_essiv;
};
extern void __exit fscrypt_essiv_cleanup(void);
static inline bool
fscrypt_mode_supports_direct_key(const struct fscrypt_mode *mode)
{
return mode->ivsize >= offsetofend(union fscrypt_iv, nonce);
}
extern struct crypto_skcipher *
fscrypt_allocate_skcipher(struct fscrypt_mode *mode, const u8 *raw_key,
const struct inode *inode);
extern int fscrypt_set_derived_key(struct fscrypt_info *ci,
const u8 *derived_key);
/* keysetup_v1.c */
extern void fscrypt_put_direct_key(struct fscrypt_direct_key *dk);
extern int fscrypt_setup_v1_file_key(struct fscrypt_info *ci,
const u8 *raw_master_key);
extern int fscrypt_setup_v1_file_key_via_subscribed_keyrings(
struct fscrypt_info *ci);
/* policy.c */
extern bool fscrypt_policies_equal(const union fscrypt_policy *policy1,
const union fscrypt_policy *policy2);
extern bool fscrypt_supported_policy(const union fscrypt_policy *policy_u,
const struct inode *inode);
extern int fscrypt_policy_from_context(union fscrypt_policy *policy_u,
const union fscrypt_context *ctx_u,
int ctx_size);
#endif /* _FSCRYPT_PRIVATE_H */

183
fs/crypto/hkdf.c Normal file
View File

@@ -0,0 +1,183 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Implementation of HKDF ("HMAC-based Extract-and-Expand Key Derivation
* Function"), aka RFC 5869. See also the original paper (Krawczyk 2010):
* "Cryptographic Extraction and Key Derivation: The HKDF Scheme".
*
* This is used to derive keys from the fscrypt master keys.
*
* Copyright 2019 Google LLC
*/
#include <crypto/hash.h>
#include <crypto/sha.h>
#include "fscrypt_private.h"
/*
* HKDF supports any unkeyed cryptographic hash algorithm, but fscrypt uses
* SHA-512 because it is reasonably secure and efficient; and since it produces
* a 64-byte digest, deriving an AES-256-XTS key preserves all 64 bytes of
* entropy from the master key and requires only one iteration of HKDF-Expand.
*/
#define HKDF_HMAC_ALG "hmac(sha512)"
#define HKDF_HASHLEN SHA512_DIGEST_SIZE
/*
* HKDF consists of two steps:
*
* 1. HKDF-Extract: extract a pseudorandom key of length HKDF_HASHLEN bytes from
* the input keying material and optional salt.
* 2. HKDF-Expand: expand the pseudorandom key into output keying material of
* any length, parameterized by an application-specific info string.
*
* HKDF-Extract can be skipped if the input is already a pseudorandom key of
* length HKDF_HASHLEN bytes. However, cipher modes other than AES-256-XTS take
* shorter keys, and we don't want to force users of those modes to provide
* unnecessarily long master keys. Thus fscrypt still does HKDF-Extract. No
* salt is used, since fscrypt master keys should already be pseudorandom and
* there's no way to persist a random salt per master key from kernel mode.
*/
/* HKDF-Extract (RFC 5869 section 2.2), unsalted */
static int hkdf_extract(struct crypto_shash *hmac_tfm, const u8 *ikm,
unsigned int ikmlen, u8 prk[HKDF_HASHLEN])
{
static const u8 default_salt[HKDF_HASHLEN];
SHASH_DESC_ON_STACK(desc, hmac_tfm);
int err;
err = crypto_shash_setkey(hmac_tfm, default_salt, HKDF_HASHLEN);
if (err)
return err;
desc->tfm = hmac_tfm;
desc->flags = 0;
err = crypto_shash_digest(desc, ikm, ikmlen, prk);
shash_desc_zero(desc);
return err;
}
/*
* Compute HKDF-Extract using the given master key as the input keying material,
* and prepare an HMAC transform object keyed by the resulting pseudorandom key.
*
* Afterwards, the keyed HMAC transform object can be used for HKDF-Expand many
* times without having to recompute HKDF-Extract each time.
*/
int fscrypt_init_hkdf(struct fscrypt_hkdf *hkdf, const u8 *master_key,
unsigned int master_key_size)
{
struct crypto_shash *hmac_tfm;
u8 prk[HKDF_HASHLEN];
int err;
hmac_tfm = crypto_alloc_shash(HKDF_HMAC_ALG, 0, 0);
if (IS_ERR(hmac_tfm)) {
fscrypt_err(NULL, "Error allocating " HKDF_HMAC_ALG ": %ld",
PTR_ERR(hmac_tfm));
return PTR_ERR(hmac_tfm);
}
if (WARN_ON(crypto_shash_digestsize(hmac_tfm) != sizeof(prk))) {
err = -EINVAL;
goto err_free_tfm;
}
err = hkdf_extract(hmac_tfm, master_key, master_key_size, prk);
if (err)
goto err_free_tfm;
err = crypto_shash_setkey(hmac_tfm, prk, sizeof(prk));
if (err)
goto err_free_tfm;
hkdf->hmac_tfm = hmac_tfm;
goto out;
err_free_tfm:
crypto_free_shash(hmac_tfm);
out:
memzero_explicit(prk, sizeof(prk));
return err;
}
/*
* HKDF-Expand (RFC 5869 section 2.3). This expands the pseudorandom key, which
* was already keyed into 'hkdf->hmac_tfm' by fscrypt_init_hkdf(), into 'okmlen'
* bytes of output keying material parameterized by the application-specific
* 'info' of length 'infolen' bytes, prefixed by "fscrypt\0" and the 'context'
* byte. This is thread-safe and may be called by multiple threads in parallel.
*
* ('context' isn't part of the HKDF specification; it's just a prefix fscrypt
* adds to its application-specific info strings to guarantee that it doesn't
* accidentally repeat an info string when using HKDF for different purposes.)
*/
int fscrypt_hkdf_expand(struct fscrypt_hkdf *hkdf, u8 context,
const u8 *info, unsigned int infolen,
u8 *okm, unsigned int okmlen)
{
SHASH_DESC_ON_STACK(desc, hkdf->hmac_tfm);
u8 prefix[9];
unsigned int i;
int err;
const u8 *prev = NULL;
u8 counter = 1;
u8 tmp[HKDF_HASHLEN];
if (WARN_ON(okmlen > 255 * HKDF_HASHLEN))
return -EINVAL;
desc->tfm = hkdf->hmac_tfm;
desc->flags = 0;
memcpy(prefix, "fscrypt\0", 8);
prefix[8] = context;
for (i = 0; i < okmlen; i += HKDF_HASHLEN) {
err = crypto_shash_init(desc);
if (err)
goto out;
if (prev) {
err = crypto_shash_update(desc, prev, HKDF_HASHLEN);
if (err)
goto out;
}
err = crypto_shash_update(desc, prefix, sizeof(prefix));
if (err)
goto out;
err = crypto_shash_update(desc, info, infolen);
if (err)
goto out;
BUILD_BUG_ON(sizeof(counter) != 1);
if (okmlen - i < HKDF_HASHLEN) {
err = crypto_shash_finup(desc, &counter, 1, tmp);
if (err)
goto out;
memcpy(&okm[i], tmp, okmlen - i);
memzero_explicit(tmp, sizeof(tmp));
} else {
err = crypto_shash_finup(desc, &counter, 1, &okm[i]);
if (err)
goto out;
}
counter++;
prev = &okm[i];
}
err = 0;
out:
if (unlikely(err))
memzero_explicit(okm, okmlen); /* so caller doesn't need to */
shash_desc_zero(desc);
return err;
}
void fscrypt_destroy_hkdf(struct fscrypt_hkdf *hkdf)
{
crypto_free_shash(hkdf->hmac_tfm);
}

View File

@@ -38,9 +38,9 @@ int fscrypt_file_open(struct inode *inode, struct file *filp)
dir = dget_parent(file_dentry(filp));
if (IS_ENCRYPTED(d_inode(dir)) &&
!fscrypt_has_permitted_context(d_inode(dir), inode)) {
fscrypt_warn(inode->i_sb,
"inconsistent encryption contexts: %lu/%lu",
d_inode(dir)->i_ino, inode->i_ino);
fscrypt_warn(inode,
"Inconsistent encryption context (parent directory: %lu)",
d_inode(dir)->i_ino);
err = -EPERM;
}
dput(dir);

View File

@@ -1,612 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* key management facility for FS encryption support.
*
* Copyright (C) 2015, Google, Inc.
*
* This contains encryption key functions.
*
* Written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar, 2015.
*/
#include <keys/user-type.h>
#include <linux/hashtable.h>
#include <linux/scatterlist.h>
#include <crypto/aes.h>
#include <crypto/algapi.h>
#include <crypto/sha.h>
#include <crypto/skcipher.h>
#include "fscrypt_private.h"
static struct crypto_shash *essiv_hash_tfm;
/* Table of keys referenced by FS_POLICY_FLAG_DIRECT_KEY policies */
static DEFINE_HASHTABLE(fscrypt_master_keys, 6); /* 6 bits = 64 buckets */
static DEFINE_SPINLOCK(fscrypt_master_keys_lock);
/*
* Key derivation function. This generates the derived key by encrypting the
* master key with AES-128-ECB using the inode's nonce as the AES key.
*
* The master key must be at least as long as the derived key. If the master
* key is longer, then only the first 'derived_keysize' bytes are used.
*/
static int derive_key_aes(const u8 *master_key,
const struct fscrypt_context *ctx,
u8 *derived_key, unsigned int derived_keysize)
{
int res = 0;
struct skcipher_request *req = NULL;
DECLARE_CRYPTO_WAIT(wait);
struct scatterlist src_sg, dst_sg;
struct crypto_skcipher *tfm = crypto_alloc_skcipher("ecb(aes)", 0, 0);
if (IS_ERR(tfm)) {
res = PTR_ERR(tfm);
tfm = NULL;
goto out;
}
crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
req = skcipher_request_alloc(tfm, GFP_NOFS);
if (!req) {
res = -ENOMEM;
goto out;
}
skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
crypto_req_done, &wait);
res = crypto_skcipher_setkey(tfm, ctx->nonce, sizeof(ctx->nonce));
if (res < 0)
goto out;
sg_init_one(&src_sg, master_key, derived_keysize);
sg_init_one(&dst_sg, derived_key, derived_keysize);
skcipher_request_set_crypt(req, &src_sg, &dst_sg, derived_keysize,
NULL);
res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
out:
skcipher_request_free(req);
crypto_free_skcipher(tfm);
return res;
}
/*
* Search the current task's subscribed keyrings for a "logon" key with
* description prefix:descriptor, and if found acquire a read lock on it and
* return a pointer to its validated payload in *payload_ret.
*/
static struct key *
find_and_lock_process_key(const char *prefix,
const u8 descriptor[FS_KEY_DESCRIPTOR_SIZE],
unsigned int min_keysize,
const struct fscrypt_key **payload_ret)
{
char *description;
struct key *key;
const struct user_key_payload *ukp;
const struct fscrypt_key *payload;
description = kasprintf(GFP_NOFS, "%s%*phN", prefix,
FS_KEY_DESCRIPTOR_SIZE, descriptor);
if (!description)
return ERR_PTR(-ENOMEM);
key = request_key(&key_type_logon, description, NULL);
kfree(description);
if (IS_ERR(key))
return key;
down_read(&key->sem);
ukp = user_key_payload_locked(key);
if (!ukp) /* was the key revoked before we acquired its semaphore? */
goto invalid;
payload = (const struct fscrypt_key *)ukp->data;
if (ukp->datalen != sizeof(struct fscrypt_key) ||
payload->size < 1 || payload->size > FS_MAX_KEY_SIZE) {
fscrypt_warn(NULL,
"key with description '%s' has invalid payload",
key->description);
goto invalid;
}
if (payload->size < min_keysize) {
fscrypt_warn(NULL,
"key with description '%s' is too short (got %u bytes, need %u+ bytes)",
key->description, payload->size, min_keysize);
goto invalid;
}
*payload_ret = payload;
return key;
invalid:
up_read(&key->sem);
key_put(key);
return ERR_PTR(-ENOKEY);
}
static struct fscrypt_mode available_modes[] = {
[FS_ENCRYPTION_MODE_AES_256_XTS] = {
.friendly_name = "AES-256-XTS",
.cipher_str = "xts(aes)",
.keysize = 64,
.ivsize = 16,
},
[FS_ENCRYPTION_MODE_AES_256_CTS] = {
.friendly_name = "AES-256-CTS-CBC",
.cipher_str = "cts(cbc(aes))",
.keysize = 32,
.ivsize = 16,
},
[FS_ENCRYPTION_MODE_AES_128_CBC] = {
.friendly_name = "AES-128-CBC",
.cipher_str = "cbc(aes)",
.keysize = 16,
.ivsize = 16,
.needs_essiv = true,
},
[FS_ENCRYPTION_MODE_AES_128_CTS] = {
.friendly_name = "AES-128-CTS-CBC",
.cipher_str = "cts(cbc(aes))",
.keysize = 16,
.ivsize = 16,
},
[FS_ENCRYPTION_MODE_ADIANTUM] = {
.friendly_name = "Adiantum",
.cipher_str = "adiantum(xchacha12,aes)",
.keysize = 32,
.ivsize = 32,
},
};
static struct fscrypt_mode *
select_encryption_mode(const struct fscrypt_info *ci, const struct inode *inode)
{
if (!fscrypt_valid_enc_modes(ci->ci_data_mode, ci->ci_filename_mode)) {
fscrypt_warn(inode->i_sb,
"inode %lu uses unsupported encryption modes (contents mode %d, filenames mode %d)",
inode->i_ino, ci->ci_data_mode,
ci->ci_filename_mode);
return ERR_PTR(-EINVAL);
}
if (S_ISREG(inode->i_mode))
return &available_modes[ci->ci_data_mode];
if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
return &available_modes[ci->ci_filename_mode];
WARN_ONCE(1, "fscrypt: filesystem tried to load encryption info for inode %lu, which is not encryptable (file type %d)\n",
inode->i_ino, (inode->i_mode & S_IFMT));
return ERR_PTR(-EINVAL);
}
/* Find the master key, then derive the inode's actual encryption key */
static int find_and_derive_key(const struct inode *inode,
const struct fscrypt_context *ctx,
u8 *derived_key, const struct fscrypt_mode *mode)
{
struct key *key;
const struct fscrypt_key *payload;
int err;
key = find_and_lock_process_key(FS_KEY_DESC_PREFIX,
ctx->master_key_descriptor,
mode->keysize, &payload);
if (key == ERR_PTR(-ENOKEY) && inode->i_sb->s_cop->key_prefix) {
key = find_and_lock_process_key(inode->i_sb->s_cop->key_prefix,
ctx->master_key_descriptor,
mode->keysize, &payload);
}
if (IS_ERR(key))
return PTR_ERR(key);
if (ctx->flags & FS_POLICY_FLAG_DIRECT_KEY) {
if (mode->ivsize < offsetofend(union fscrypt_iv, nonce)) {
fscrypt_warn(inode->i_sb,
"direct key mode not allowed with %s",
mode->friendly_name);
err = -EINVAL;
} else if (ctx->contents_encryption_mode !=
ctx->filenames_encryption_mode) {
fscrypt_warn(inode->i_sb,
"direct key mode not allowed with different contents and filenames modes");
err = -EINVAL;
} else {
memcpy(derived_key, payload->raw, mode->keysize);
err = 0;
}
} else {
err = derive_key_aes(payload->raw, ctx, derived_key,
mode->keysize);
}
up_read(&key->sem);
key_put(key);
return err;
}
/* Allocate and key a symmetric cipher object for the given encryption mode */
static struct crypto_skcipher *
allocate_skcipher_for_mode(struct fscrypt_mode *mode, const u8 *raw_key,
const struct inode *inode)
{
struct crypto_skcipher *tfm;
int err;
tfm = crypto_alloc_skcipher(mode->cipher_str, 0, 0);
if (IS_ERR(tfm)) {
fscrypt_warn(inode->i_sb,
"error allocating '%s' transform for inode %lu: %ld",
mode->cipher_str, inode->i_ino, PTR_ERR(tfm));
return tfm;
}
if (unlikely(!mode->logged_impl_name)) {
/*
* fscrypt performance can vary greatly depending on which
* crypto algorithm implementation is used. Help people debug
* performance problems by logging the ->cra_driver_name the
* first time a mode is used. Note that multiple threads can
* race here, but it doesn't really matter.
*/
mode->logged_impl_name = true;
pr_info("fscrypt: %s using implementation \"%s\"\n",
mode->friendly_name,
crypto_skcipher_alg(tfm)->base.cra_driver_name);
}
crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
err = crypto_skcipher_setkey(tfm, raw_key, mode->keysize);
if (err)
goto err_free_tfm;
return tfm;
err_free_tfm:
crypto_free_skcipher(tfm);
return ERR_PTR(err);
}
/* Master key referenced by FS_POLICY_FLAG_DIRECT_KEY policy */
struct fscrypt_master_key {
struct hlist_node mk_node;
refcount_t mk_refcount;
const struct fscrypt_mode *mk_mode;
struct crypto_skcipher *mk_ctfm;
u8 mk_descriptor[FS_KEY_DESCRIPTOR_SIZE];
u8 mk_raw[FS_MAX_KEY_SIZE];
};
static void free_master_key(struct fscrypt_master_key *mk)
{
if (mk) {
crypto_free_skcipher(mk->mk_ctfm);
kzfree(mk);
}
}
static void put_master_key(struct fscrypt_master_key *mk)
{
if (!refcount_dec_and_lock(&mk->mk_refcount, &fscrypt_master_keys_lock))
return;
hash_del(&mk->mk_node);
spin_unlock(&fscrypt_master_keys_lock);
free_master_key(mk);
}
/*
* Find/insert the given master key into the fscrypt_master_keys table. If
* found, it is returned with elevated refcount, and 'to_insert' is freed if
* non-NULL. If not found, 'to_insert' is inserted and returned if it's
* non-NULL; otherwise NULL is returned.
*/
static struct fscrypt_master_key *
find_or_insert_master_key(struct fscrypt_master_key *to_insert,
const u8 *raw_key, const struct fscrypt_mode *mode,
const struct fscrypt_info *ci)
{
unsigned long hash_key;
struct fscrypt_master_key *mk;
/*
* Careful: to avoid potentially leaking secret key bytes via timing
* information, we must key the hash table by descriptor rather than by
* raw key, and use crypto_memneq() when comparing raw keys.
*/
BUILD_BUG_ON(sizeof(hash_key) > FS_KEY_DESCRIPTOR_SIZE);
memcpy(&hash_key, ci->ci_master_key_descriptor, sizeof(hash_key));
spin_lock(&fscrypt_master_keys_lock);
hash_for_each_possible(fscrypt_master_keys, mk, mk_node, hash_key) {
if (memcmp(ci->ci_master_key_descriptor, mk->mk_descriptor,
FS_KEY_DESCRIPTOR_SIZE) != 0)
continue;
if (mode != mk->mk_mode)
continue;
if (crypto_memneq(raw_key, mk->mk_raw, mode->keysize))
continue;
/* using existing tfm with same (descriptor, mode, raw_key) */
refcount_inc(&mk->mk_refcount);
spin_unlock(&fscrypt_master_keys_lock);
free_master_key(to_insert);
return mk;
}
if (to_insert)
hash_add(fscrypt_master_keys, &to_insert->mk_node, hash_key);
spin_unlock(&fscrypt_master_keys_lock);
return to_insert;
}
/* Prepare to encrypt directly using the master key in the given mode */
static struct fscrypt_master_key *
fscrypt_get_master_key(const struct fscrypt_info *ci, struct fscrypt_mode *mode,
const u8 *raw_key, const struct inode *inode)
{
struct fscrypt_master_key *mk;
int err;
/* Is there already a tfm for this key? */
mk = find_or_insert_master_key(NULL, raw_key, mode, ci);
if (mk)
return mk;
/* Nope, allocate one. */
mk = kzalloc(sizeof(*mk), GFP_NOFS);
if (!mk)
return ERR_PTR(-ENOMEM);
refcount_set(&mk->mk_refcount, 1);
mk->mk_mode = mode;
mk->mk_ctfm = allocate_skcipher_for_mode(mode, raw_key, inode);
if (IS_ERR(mk->mk_ctfm)) {
err = PTR_ERR(mk->mk_ctfm);
mk->mk_ctfm = NULL;
goto err_free_mk;
}
memcpy(mk->mk_descriptor, ci->ci_master_key_descriptor,
FS_KEY_DESCRIPTOR_SIZE);
memcpy(mk->mk_raw, raw_key, mode->keysize);
return find_or_insert_master_key(mk, raw_key, mode, ci);
err_free_mk:
free_master_key(mk);
return ERR_PTR(err);
}
static int derive_essiv_salt(const u8 *key, int keysize, u8 *salt)
{
struct crypto_shash *tfm = READ_ONCE(essiv_hash_tfm);
/* init hash transform on demand */
if (unlikely(!tfm)) {
struct crypto_shash *prev_tfm;
tfm = crypto_alloc_shash("sha256", 0, 0);
if (IS_ERR(tfm)) {
fscrypt_warn(NULL,
"error allocating SHA-256 transform: %ld",
PTR_ERR(tfm));
return PTR_ERR(tfm);
}
prev_tfm = cmpxchg(&essiv_hash_tfm, NULL, tfm);
if (prev_tfm) {
crypto_free_shash(tfm);
tfm = prev_tfm;
}
}
{
SHASH_DESC_ON_STACK(desc, tfm);
desc->tfm = tfm;
desc->flags = 0;
return crypto_shash_digest(desc, key, keysize, salt);
}
}
static int init_essiv_generator(struct fscrypt_info *ci, const u8 *raw_key,
int keysize)
{
int err;
struct crypto_cipher *essiv_tfm;
u8 salt[SHA256_DIGEST_SIZE];
essiv_tfm = crypto_alloc_cipher("aes", 0, 0);
if (IS_ERR(essiv_tfm))
return PTR_ERR(essiv_tfm);
ci->ci_essiv_tfm = essiv_tfm;
err = derive_essiv_salt(raw_key, keysize, salt);
if (err)
goto out;
/*
* Using SHA256 to derive the salt/key will result in AES-256 being
* used for IV generation. File contents encryption will still use the
* configured keysize (AES-128) nevertheless.
*/
err = crypto_cipher_setkey(essiv_tfm, salt, sizeof(salt));
if (err)
goto out;
out:
memzero_explicit(salt, sizeof(salt));
return err;
}
void __exit fscrypt_essiv_cleanup(void)
{
crypto_free_shash(essiv_hash_tfm);
}
/*
* Given the encryption mode and key (normally the derived key, but for
* FS_POLICY_FLAG_DIRECT_KEY mode it's the master key), set up the inode's
* symmetric cipher transform object(s).
*/
static int setup_crypto_transform(struct fscrypt_info *ci,
struct fscrypt_mode *mode,
const u8 *raw_key, const struct inode *inode)
{
struct fscrypt_master_key *mk;
struct crypto_skcipher *ctfm;
int err;
if (ci->ci_flags & FS_POLICY_FLAG_DIRECT_KEY) {
mk = fscrypt_get_master_key(ci, mode, raw_key, inode);
if (IS_ERR(mk))
return PTR_ERR(mk);
ctfm = mk->mk_ctfm;
} else {
mk = NULL;
ctfm = allocate_skcipher_for_mode(mode, raw_key, inode);
if (IS_ERR(ctfm))
return PTR_ERR(ctfm);
}
ci->ci_master_key = mk;
ci->ci_ctfm = ctfm;
if (mode->needs_essiv) {
/* ESSIV implies 16-byte IVs which implies !DIRECT_KEY */
WARN_ON(mode->ivsize != AES_BLOCK_SIZE);
WARN_ON(ci->ci_flags & FS_POLICY_FLAG_DIRECT_KEY);
err = init_essiv_generator(ci, raw_key, mode->keysize);
if (err) {
fscrypt_warn(inode->i_sb,
"error initializing ESSIV generator for inode %lu: %d",
inode->i_ino, err);
return err;
}
}
return 0;
}
static void put_crypt_info(struct fscrypt_info *ci)
{
if (!ci)
return;
if (ci->ci_master_key) {
put_master_key(ci->ci_master_key);
} else {
crypto_free_skcipher(ci->ci_ctfm);
crypto_free_cipher(ci->ci_essiv_tfm);
}
kmem_cache_free(fscrypt_info_cachep, ci);
}
int fscrypt_get_encryption_info(struct inode *inode)
{
struct fscrypt_info *crypt_info;
struct fscrypt_context ctx;
struct fscrypt_mode *mode;
u8 *raw_key = NULL;
int res;
if (fscrypt_has_encryption_key(inode))
return 0;
res = fscrypt_initialize(inode->i_sb->s_cop->flags);
if (res)
return res;
res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
if (res < 0) {
if (!fscrypt_dummy_context_enabled(inode) ||
IS_ENCRYPTED(inode))
return res;
/* Fake up a context for an unencrypted directory */
memset(&ctx, 0, sizeof(ctx));
ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
ctx.contents_encryption_mode = FS_ENCRYPTION_MODE_AES_256_XTS;
ctx.filenames_encryption_mode = FS_ENCRYPTION_MODE_AES_256_CTS;
memset(ctx.master_key_descriptor, 0x42, FS_KEY_DESCRIPTOR_SIZE);
} else if (res != sizeof(ctx)) {
return -EINVAL;
}
if (ctx.format != FS_ENCRYPTION_CONTEXT_FORMAT_V1)
return -EINVAL;
if (ctx.flags & ~FS_POLICY_FLAGS_VALID)
return -EINVAL;
crypt_info = kmem_cache_zalloc(fscrypt_info_cachep, GFP_NOFS);
if (!crypt_info)
return -ENOMEM;
crypt_info->ci_flags = ctx.flags;
crypt_info->ci_data_mode = ctx.contents_encryption_mode;
crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
memcpy(crypt_info->ci_master_key_descriptor, ctx.master_key_descriptor,
FS_KEY_DESCRIPTOR_SIZE);
memcpy(crypt_info->ci_nonce, ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
mode = select_encryption_mode(crypt_info, inode);
if (IS_ERR(mode)) {
res = PTR_ERR(mode);
goto out;
}
WARN_ON(mode->ivsize > FSCRYPT_MAX_IV_SIZE);
crypt_info->ci_mode = mode;
/*
* This cannot be a stack buffer because it may be passed to the
* scatterlist crypto API as part of key derivation.
*/
res = -ENOMEM;
raw_key = kmalloc(mode->keysize, GFP_NOFS);
if (!raw_key)
goto out;
res = find_and_derive_key(inode, &ctx, raw_key, mode);
if (res)
goto out;
res = setup_crypto_transform(crypt_info, mode, raw_key, inode);
if (res)
goto out;
if (cmpxchg_release(&inode->i_crypt_info, NULL, crypt_info) == NULL)
crypt_info = NULL;
out:
if (res == -ENOKEY)
res = 0;
put_crypt_info(crypt_info);
kzfree(raw_key);
return res;
}
EXPORT_SYMBOL(fscrypt_get_encryption_info);
/**
* fscrypt_put_encryption_info - free most of an inode's fscrypt data
*
* Free the inode's fscrypt_info. Filesystems must call this when the inode is
* being evicted. An RCU grace period need not have elapsed yet.
*/
void fscrypt_put_encryption_info(struct inode *inode)
{
put_crypt_info(inode->i_crypt_info);
inode->i_crypt_info = NULL;
}
EXPORT_SYMBOL(fscrypt_put_encryption_info);
/**
* fscrypt_free_inode - free an inode's fscrypt data requiring RCU delay
*
* Free the inode's cached decrypted symlink target, if any. Filesystems must
* call this after an RCU grace period, just before they free the inode.
*/
void fscrypt_free_inode(struct inode *inode)
{
if (IS_ENCRYPTED(inode) && S_ISLNK(inode->i_mode)) {
kfree(inode->i_link);
inode->i_link = NULL;
}
}
EXPORT_SYMBOL(fscrypt_free_inode);

984
fs/crypto/keyring.c Normal file
View File

@@ -0,0 +1,984 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Filesystem-level keyring for fscrypt
*
* Copyright 2019 Google LLC
*/
/*
* This file implements management of fscrypt master keys in the
* filesystem-level keyring, including the ioctls:
*
* - FS_IOC_ADD_ENCRYPTION_KEY
* - FS_IOC_REMOVE_ENCRYPTION_KEY
* - FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS
* - FS_IOC_GET_ENCRYPTION_KEY_STATUS
*
* See the "User API" section of Documentation/filesystems/fscrypt.rst for more
* information about these ioctls.
*/
#include <crypto/skcipher.h>
#include <linux/key-type.h>
#include <linux/seq_file.h>
#include "fscrypt_private.h"
static void wipe_master_key_secret(struct fscrypt_master_key_secret *secret)
{
fscrypt_destroy_hkdf(&secret->hkdf);
memzero_explicit(secret, sizeof(*secret));
}
static void move_master_key_secret(struct fscrypt_master_key_secret *dst,
struct fscrypt_master_key_secret *src)
{
memcpy(dst, src, sizeof(*dst));
memzero_explicit(src, sizeof(*src));
}
static void free_master_key(struct fscrypt_master_key *mk)
{
size_t i;
wipe_master_key_secret(&mk->mk_secret);
for (i = 0; i < ARRAY_SIZE(mk->mk_mode_keys); i++)
crypto_free_skcipher(mk->mk_mode_keys[i]);
key_put(mk->mk_users);
kzfree(mk);
}
static inline bool valid_key_spec(const struct fscrypt_key_specifier *spec)
{
if (spec->__reserved)
return false;
return master_key_spec_len(spec) != 0;
}
static int fscrypt_key_instantiate(struct key *key,
struct key_preparsed_payload *prep)
{
key->payload.data[0] = (struct fscrypt_master_key *)prep->data;
return 0;
}
static void fscrypt_key_destroy(struct key *key)
{
free_master_key(key->payload.data[0]);
}
static void fscrypt_key_describe(const struct key *key, struct seq_file *m)
{
seq_puts(m, key->description);
if (key_is_positive(key)) {
const struct fscrypt_master_key *mk = key->payload.data[0];
if (!is_master_key_secret_present(&mk->mk_secret))
seq_puts(m, ": secret removed");
}
}
/*
* Type of key in ->s_master_keys. Each key of this type represents a master
* key which has been added to the filesystem. Its payload is a
* 'struct fscrypt_master_key'. The "." prefix in the key type name prevents
* users from adding keys of this type via the keyrings syscalls rather than via
* the intended method of FS_IOC_ADD_ENCRYPTION_KEY.
*/
static struct key_type key_type_fscrypt = {
.name = "._fscrypt",
.instantiate = fscrypt_key_instantiate,
.destroy = fscrypt_key_destroy,
.describe = fscrypt_key_describe,
};
static int fscrypt_user_key_instantiate(struct key *key,
struct key_preparsed_payload *prep)
{
/*
* We just charge FSCRYPT_MAX_KEY_SIZE bytes to the user's key quota for
* each key, regardless of the exact key size. The amount of memory
* actually used is greater than the size of the raw key anyway.
*/
return key_payload_reserve(key, FSCRYPT_MAX_KEY_SIZE);
}
static void fscrypt_user_key_describe(const struct key *key, struct seq_file *m)
{
seq_puts(m, key->description);
}
/*
* Type of key in ->mk_users. Each key of this type represents a particular
* user who has added a particular master key.
*
* Note that the name of this key type really should be something like
* ".fscrypt-user" instead of simply ".fscrypt". But the shorter name is chosen
* mainly for simplicity of presentation in /proc/keys when read by a non-root
* user. And it is expected to be rare that a key is actually added by multiple
* users, since users should keep their encryption keys confidential.
*/
static struct key_type key_type_fscrypt_user = {
.name = ".fscrypt",
.instantiate = fscrypt_user_key_instantiate,
.describe = fscrypt_user_key_describe,
};
/* Search ->s_master_keys or ->mk_users */
static struct key *search_fscrypt_keyring(struct key *keyring,
struct key_type *type,
const char *description)
{
/*
* We need to mark the keyring reference as "possessed" so that we
* acquire permission to search it, via the KEY_POS_SEARCH permission.
*/
key_ref_t keyref = make_key_ref(keyring, true /* possessed */);
keyref = keyring_search(keyref, type, description);
if (IS_ERR(keyref)) {
if (PTR_ERR(keyref) == -EAGAIN || /* not found */
PTR_ERR(keyref) == -EKEYREVOKED) /* recently invalidated */
keyref = ERR_PTR(-ENOKEY);
return ERR_CAST(keyref);
}
return key_ref_to_ptr(keyref);
}
#define FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE \
(CONST_STRLEN("fscrypt-") + FIELD_SIZEOF(struct super_block, s_id))
#define FSCRYPT_MK_DESCRIPTION_SIZE (2 * FSCRYPT_KEY_IDENTIFIER_SIZE + 1)
#define FSCRYPT_MK_USERS_DESCRIPTION_SIZE \
(CONST_STRLEN("fscrypt-") + 2 * FSCRYPT_KEY_IDENTIFIER_SIZE + \
CONST_STRLEN("-users") + 1)
#define FSCRYPT_MK_USER_DESCRIPTION_SIZE \
(2 * FSCRYPT_KEY_IDENTIFIER_SIZE + CONST_STRLEN(".uid.") + 10 + 1)
static void format_fs_keyring_description(
char description[FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE],
const struct super_block *sb)
{
sprintf(description, "fscrypt-%s", sb->s_id);
}
static void format_mk_description(
char description[FSCRYPT_MK_DESCRIPTION_SIZE],
const struct fscrypt_key_specifier *mk_spec)
{
sprintf(description, "%*phN",
master_key_spec_len(mk_spec), (u8 *)&mk_spec->u);
}
static void format_mk_users_keyring_description(
char description[FSCRYPT_MK_USERS_DESCRIPTION_SIZE],
const u8 mk_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE])
{
sprintf(description, "fscrypt-%*phN-users",
FSCRYPT_KEY_IDENTIFIER_SIZE, mk_identifier);
}
static void format_mk_user_description(
char description[FSCRYPT_MK_USER_DESCRIPTION_SIZE],
const u8 mk_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE])
{
sprintf(description, "%*phN.uid.%u", FSCRYPT_KEY_IDENTIFIER_SIZE,
mk_identifier, __kuid_val(current_fsuid()));
}
/* Create ->s_master_keys if needed. Synchronized by fscrypt_add_key_mutex. */
static int allocate_filesystem_keyring(struct super_block *sb)
{
char description[FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE];
struct key *keyring;
if (sb->s_master_keys)
return 0;
format_fs_keyring_description(description, sb);
keyring = keyring_alloc(description, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
current_cred(), KEY_POS_SEARCH |
KEY_USR_SEARCH | KEY_USR_READ | KEY_USR_VIEW,
KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
if (IS_ERR(keyring))
return PTR_ERR(keyring);
/* Pairs with READ_ONCE() in fscrypt_find_master_key() */
smp_store_release(&sb->s_master_keys, keyring);
return 0;
}
void fscrypt_sb_free(struct super_block *sb)
{
key_put(sb->s_master_keys);
sb->s_master_keys = NULL;
}
/*
* Find the specified master key in ->s_master_keys.
* Returns ERR_PTR(-ENOKEY) if not found.
*/
struct key *fscrypt_find_master_key(struct super_block *sb,
const struct fscrypt_key_specifier *mk_spec)
{
struct key *keyring;
char description[FSCRYPT_MK_DESCRIPTION_SIZE];
/* pairs with smp_store_release() in allocate_filesystem_keyring() */
keyring = READ_ONCE(sb->s_master_keys);
if (keyring == NULL)
return ERR_PTR(-ENOKEY); /* No keyring yet, so no keys yet. */
format_mk_description(description, mk_spec);
return search_fscrypt_keyring(keyring, &key_type_fscrypt, description);
}
static int allocate_master_key_users_keyring(struct fscrypt_master_key *mk)
{
char description[FSCRYPT_MK_USERS_DESCRIPTION_SIZE];
struct key *keyring;
format_mk_users_keyring_description(description,
mk->mk_spec.u.identifier);
keyring = keyring_alloc(description, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
current_cred(), KEY_POS_SEARCH |
KEY_USR_SEARCH | KEY_USR_READ | KEY_USR_VIEW,
KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
if (IS_ERR(keyring))
return PTR_ERR(keyring);
mk->mk_users = keyring;
return 0;
}
/*
* Find the current user's "key" in the master key's ->mk_users.
* Returns ERR_PTR(-ENOKEY) if not found.
*/
static struct key *find_master_key_user(struct fscrypt_master_key *mk)
{
char description[FSCRYPT_MK_USER_DESCRIPTION_SIZE];
format_mk_user_description(description, mk->mk_spec.u.identifier);
return search_fscrypt_keyring(mk->mk_users, &key_type_fscrypt_user,
description);
}
/*
* Give the current user a "key" in ->mk_users. This charges the user's quota
* and marks the master key as added by the current user, so that it cannot be
* removed by another user with the key. Either the master key's key->sem must
* be held for write, or the master key must be still undergoing initialization.
*/
static int add_master_key_user(struct fscrypt_master_key *mk)
{
char description[FSCRYPT_MK_USER_DESCRIPTION_SIZE];
struct key *mk_user;
int err;
format_mk_user_description(description, mk->mk_spec.u.identifier);
mk_user = key_alloc(&key_type_fscrypt_user, description,
current_fsuid(), current_gid(), current_cred(),
KEY_POS_SEARCH | KEY_USR_VIEW, 0, NULL);
if (IS_ERR(mk_user))
return PTR_ERR(mk_user);
err = key_instantiate_and_link(mk_user, NULL, 0, mk->mk_users, NULL);
key_put(mk_user);
return err;
}
/*
* Remove the current user's "key" from ->mk_users.
* The master key's key->sem must be held for write.
*
* Returns 0 if removed, -ENOKEY if not found, or another -errno code.
*/
static int remove_master_key_user(struct fscrypt_master_key *mk)
{
struct key *mk_user;
int err;
mk_user = find_master_key_user(mk);
if (IS_ERR(mk_user))
return PTR_ERR(mk_user);
err = key_unlink(mk->mk_users, mk_user);
key_put(mk_user);
return err;
}
/*
* Allocate a new fscrypt_master_key which contains the given secret, set it as
* the payload of a new 'struct key' of type fscrypt, and link the 'struct key'
* into the given keyring. Synchronized by fscrypt_add_key_mutex.
*/
static int add_new_master_key(struct fscrypt_master_key_secret *secret,
const struct fscrypt_key_specifier *mk_spec,
struct key *keyring)
{
struct fscrypt_master_key *mk;
char description[FSCRYPT_MK_DESCRIPTION_SIZE];
struct key *key;
int err;
mk = kzalloc(sizeof(*mk), GFP_KERNEL);
if (!mk)
return -ENOMEM;
mk->mk_spec = *mk_spec;
move_master_key_secret(&mk->mk_secret, secret);
init_rwsem(&mk->mk_secret_sem);
refcount_set(&mk->mk_refcount, 1); /* secret is present */
INIT_LIST_HEAD(&mk->mk_decrypted_inodes);
spin_lock_init(&mk->mk_decrypted_inodes_lock);
if (mk_spec->type == FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER) {
err = allocate_master_key_users_keyring(mk);
if (err)
goto out_free_mk;
err = add_master_key_user(mk);
if (err)
goto out_free_mk;
}
/*
* Note that we don't charge this key to anyone's quota, since when
* ->mk_users is in use those keys are charged instead, and otherwise
* (when ->mk_users isn't in use) only root can add these keys.
*/
format_mk_description(description, mk_spec);
key = key_alloc(&key_type_fscrypt, description,
GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(),
KEY_POS_SEARCH | KEY_USR_SEARCH | KEY_USR_VIEW,
KEY_ALLOC_NOT_IN_QUOTA, NULL);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto out_free_mk;
}
err = key_instantiate_and_link(key, mk, sizeof(*mk), keyring, NULL);
key_put(key);
if (err)
goto out_free_mk;
return 0;
out_free_mk:
free_master_key(mk);
return err;
}
#define KEY_DEAD 1
static int add_existing_master_key(struct fscrypt_master_key *mk,
struct fscrypt_master_key_secret *secret)
{
struct key *mk_user;
bool rekey;
int err;
/*
* If the current user is already in ->mk_users, then there's nothing to
* do. (Not applicable for v1 policy keys, which have NULL ->mk_users.)
*/
if (mk->mk_users) {
mk_user = find_master_key_user(mk);
if (mk_user != ERR_PTR(-ENOKEY)) {
if (IS_ERR(mk_user))
return PTR_ERR(mk_user);
key_put(mk_user);
return 0;
}
}
/* If we'll be re-adding ->mk_secret, try to take the reference. */
rekey = !is_master_key_secret_present(&mk->mk_secret);
if (rekey && !refcount_inc_not_zero(&mk->mk_refcount))
return KEY_DEAD;
/* Add the current user to ->mk_users, if applicable. */
if (mk->mk_users) {
err = add_master_key_user(mk);
if (err) {
if (rekey && refcount_dec_and_test(&mk->mk_refcount))
return KEY_DEAD;
return err;
}
}
/* Re-add the secret if needed. */
if (rekey) {
down_write(&mk->mk_secret_sem);
move_master_key_secret(&mk->mk_secret, secret);
up_write(&mk->mk_secret_sem);
}
return 0;
}
static int add_master_key(struct super_block *sb,
struct fscrypt_master_key_secret *secret,
const struct fscrypt_key_specifier *mk_spec)
{
static DEFINE_MUTEX(fscrypt_add_key_mutex);
struct key *key;
int err;
mutex_lock(&fscrypt_add_key_mutex); /* serialize find + link */
retry:
key = fscrypt_find_master_key(sb, mk_spec);
if (IS_ERR(key)) {
err = PTR_ERR(key);
if (err != -ENOKEY)
goto out_unlock;
/* Didn't find the key in ->s_master_keys. Add it. */
err = allocate_filesystem_keyring(sb);
if (err)
goto out_unlock;
err = add_new_master_key(secret, mk_spec, sb->s_master_keys);
} else {
/*
* Found the key in ->s_master_keys. Re-add the secret if
* needed, and add the user to ->mk_users if needed.
*/
down_write(&key->sem);
err = add_existing_master_key(key->payload.data[0], secret);
up_write(&key->sem);
if (err == KEY_DEAD) {
/* Key being removed or needs to be removed */
key_invalidate(key);
key_put(key);
goto retry;
}
key_put(key);
}
out_unlock:
mutex_unlock(&fscrypt_add_key_mutex);
return err;
}
/*
* Add a master encryption key to the filesystem, causing all files which were
* encrypted with it to appear "unlocked" (decrypted) when accessed.
*
* When adding a key for use by v1 encryption policies, this ioctl is
* privileged, and userspace must provide the 'key_descriptor'.
*
* When adding a key for use by v2+ encryption policies, this ioctl is
* unprivileged. This is needed, in general, to allow non-root users to use
* encryption without encountering the visibility problems of process-subscribed
* keyrings and the inability to properly remove keys. This works by having
* each key identified by its cryptographically secure hash --- the
* 'key_identifier'. The cryptographic hash ensures that a malicious user
* cannot add the wrong key for a given identifier. Furthermore, each added key
* is charged to the appropriate user's quota for the keyrings service, which
* prevents a malicious user from adding too many keys. Finally, we forbid a
* user from removing a key while other users have added it too, which prevents
* a user who knows another user's key from causing a denial-of-service by
* removing it at an inopportune time. (We tolerate that a user who knows a key
* can prevent other users from removing it.)
*
* For more details, see the "FS_IOC_ADD_ENCRYPTION_KEY" section of
* Documentation/filesystems/fscrypt.rst.
*/
int fscrypt_ioctl_add_key(struct file *filp, void __user *_uarg)
{
struct super_block *sb = file_inode(filp)->i_sb;
struct fscrypt_add_key_arg __user *uarg = _uarg;
struct fscrypt_add_key_arg arg;
struct fscrypt_master_key_secret secret;
int err;
if (copy_from_user(&arg, uarg, sizeof(arg)))
return -EFAULT;
if (!valid_key_spec(&arg.key_spec))
return -EINVAL;
if (arg.raw_size < FSCRYPT_MIN_KEY_SIZE ||
arg.raw_size > FSCRYPT_MAX_KEY_SIZE)
return -EINVAL;
if (memchr_inv(arg.__reserved, 0, sizeof(arg.__reserved)))
return -EINVAL;
memset(&secret, 0, sizeof(secret));
secret.size = arg.raw_size;
err = -EFAULT;
if (copy_from_user(secret.raw, uarg->raw, secret.size))
goto out_wipe_secret;
switch (arg.key_spec.type) {
case FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR:
/*
* Only root can add keys that are identified by an arbitrary
* descriptor rather than by a cryptographic hash --- since
* otherwise a malicious user could add the wrong key.
*/
err = -EACCES;
if (!capable(CAP_SYS_ADMIN))
goto out_wipe_secret;
break;
case FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER:
err = fscrypt_init_hkdf(&secret.hkdf, secret.raw, secret.size);
if (err)
goto out_wipe_secret;
/*
* Now that the HKDF context is initialized, the raw key is no
* longer needed.
*/
memzero_explicit(secret.raw, secret.size);
/* Calculate the key identifier and return it to userspace. */
err = fscrypt_hkdf_expand(&secret.hkdf,
HKDF_CONTEXT_KEY_IDENTIFIER,
NULL, 0, arg.key_spec.u.identifier,
FSCRYPT_KEY_IDENTIFIER_SIZE);
if (err)
goto out_wipe_secret;
err = -EFAULT;
if (copy_to_user(uarg->key_spec.u.identifier,
arg.key_spec.u.identifier,
FSCRYPT_KEY_IDENTIFIER_SIZE))
goto out_wipe_secret;
break;
default:
WARN_ON(1);
err = -EINVAL;
goto out_wipe_secret;
}
err = add_master_key(sb, &secret, &arg.key_spec);
out_wipe_secret:
wipe_master_key_secret(&secret);
return err;
}
EXPORT_SYMBOL_GPL(fscrypt_ioctl_add_key);
/*
* Verify that the current user has added a master key with the given identifier
* (returns -ENOKEY if not). This is needed to prevent a user from encrypting
* their files using some other user's key which they don't actually know.
* Cryptographically this isn't much of a problem, but the semantics of this
* would be a bit weird, so it's best to just forbid it.
*
* The system administrator (CAP_FOWNER) can override this, which should be
* enough for any use cases where encryption policies are being set using keys
* that were chosen ahead of time but aren't available at the moment.
*
* Note that the key may have already removed by the time this returns, but
* that's okay; we just care whether the key was there at some point.
*
* Return: 0 if the key is added, -ENOKEY if it isn't, or another -errno code
*/
int fscrypt_verify_key_added(struct super_block *sb,
const u8 identifier[FSCRYPT_KEY_IDENTIFIER_SIZE])
{
struct fscrypt_key_specifier mk_spec;
struct key *key, *mk_user;
struct fscrypt_master_key *mk;
int err;
mk_spec.type = FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER;
memcpy(mk_spec.u.identifier, identifier, FSCRYPT_KEY_IDENTIFIER_SIZE);
key = fscrypt_find_master_key(sb, &mk_spec);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto out;
}
mk = key->payload.data[0];
mk_user = find_master_key_user(mk);
if (IS_ERR(mk_user)) {
err = PTR_ERR(mk_user);
} else {
key_put(mk_user);
err = 0;
}
key_put(key);
out:
if (err == -ENOKEY && capable(CAP_FOWNER))
err = 0;
return err;
}
/*
* Try to evict the inode's dentries from the dentry cache. If the inode is a
* directory, then it can have at most one dentry; however, that dentry may be
* pinned by child dentries, so first try to evict the children too.
*/
static void shrink_dcache_inode(struct inode *inode)
{
struct dentry *dentry;
if (S_ISDIR(inode->i_mode)) {
dentry = d_find_any_alias(inode);
if (dentry) {
shrink_dcache_parent(dentry);
dput(dentry);
}
}
d_prune_aliases(inode);
}
static void evict_dentries_for_decrypted_inodes(struct fscrypt_master_key *mk)
{
struct fscrypt_info *ci;
struct inode *inode;
struct inode *toput_inode = NULL;
spin_lock(&mk->mk_decrypted_inodes_lock);
list_for_each_entry(ci, &mk->mk_decrypted_inodes, ci_master_key_link) {
inode = ci->ci_inode;
spin_lock(&inode->i_lock);
if (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW)) {
spin_unlock(&inode->i_lock);
continue;
}
__iget(inode);
spin_unlock(&inode->i_lock);
spin_unlock(&mk->mk_decrypted_inodes_lock);
shrink_dcache_inode(inode);
iput(toput_inode);
toput_inode = inode;
spin_lock(&mk->mk_decrypted_inodes_lock);
}
spin_unlock(&mk->mk_decrypted_inodes_lock);
iput(toput_inode);
}
static int check_for_busy_inodes(struct super_block *sb,
struct fscrypt_master_key *mk)
{
struct list_head *pos;
size_t busy_count = 0;
unsigned long ino;
struct dentry *dentry;
char _path[256];
char *path = NULL;
spin_lock(&mk->mk_decrypted_inodes_lock);
list_for_each(pos, &mk->mk_decrypted_inodes)
busy_count++;
if (busy_count == 0) {
spin_unlock(&mk->mk_decrypted_inodes_lock);
return 0;
}
{
/* select an example file to show for debugging purposes */
struct inode *inode =
list_first_entry(&mk->mk_decrypted_inodes,
struct fscrypt_info,
ci_master_key_link)->ci_inode;
ino = inode->i_ino;
dentry = d_find_alias(inode);
}
spin_unlock(&mk->mk_decrypted_inodes_lock);
if (dentry) {
path = dentry_path(dentry, _path, sizeof(_path));
dput(dentry);
}
if (IS_ERR_OR_NULL(path))
path = "(unknown)";
fscrypt_warn(NULL,
"%s: %zu inode(s) still busy after removing key with %s %*phN, including ino %lu (%s)",
sb->s_id, busy_count, master_key_spec_type(&mk->mk_spec),
master_key_spec_len(&mk->mk_spec), (u8 *)&mk->mk_spec.u,
ino, path);
return -EBUSY;
}
static int try_to_lock_encrypted_files(struct super_block *sb,
struct fscrypt_master_key *mk)
{
int err1;
int err2;
/*
* An inode can't be evicted while it is dirty or has dirty pages.
* Thus, we first have to clean the inodes in ->mk_decrypted_inodes.
*
* Just do it the easy way: call sync_filesystem(). It's overkill, but
* it works, and it's more important to minimize the amount of caches we
* drop than the amount of data we sync. Also, unprivileged users can
* already call sync_filesystem() via sys_syncfs() or sys_sync().
*/
down_read(&sb->s_umount);
err1 = sync_filesystem(sb);
up_read(&sb->s_umount);
/* If a sync error occurs, still try to evict as much as possible. */
/*
* Inodes are pinned by their dentries, so we have to evict their
* dentries. shrink_dcache_sb() would suffice, but would be overkill
* and inappropriate for use by unprivileged users. So instead go
* through the inodes' alias lists and try to evict each dentry.
*/
evict_dentries_for_decrypted_inodes(mk);
/*
* evict_dentries_for_decrypted_inodes() already iput() each inode in
* the list; any inodes for which that dropped the last reference will
* have been evicted due to fscrypt_drop_inode() detecting the key
* removal and telling the VFS to evict the inode. So to finish, we
* just need to check whether any inodes couldn't be evicted.
*/
err2 = check_for_busy_inodes(sb, mk);
return err1 ?: err2;
}
/*
* Try to remove an fscrypt master encryption key.
*
* FS_IOC_REMOVE_ENCRYPTION_KEY (all_users=false) removes the current user's
* claim to the key, then removes the key itself if no other users have claims.
* FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS (all_users=true) always removes the
* key itself.
*
* To "remove the key itself", first we wipe the actual master key secret, so
* that no more inodes can be unlocked with it. Then we try to evict all cached
* inodes that had been unlocked with the key.
*
* If all inodes were evicted, then we unlink the fscrypt_master_key from the
* keyring. Otherwise it remains in the keyring in the "incompletely removed"
* state (without the actual secret key) where it tracks the list of remaining
* inodes. Userspace can execute the ioctl again later to retry eviction, or
* alternatively can re-add the secret key again.
*
* For more details, see the "Removing keys" section of
* Documentation/filesystems/fscrypt.rst.
*/
static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users)
{
struct super_block *sb = file_inode(filp)->i_sb;
struct fscrypt_remove_key_arg __user *uarg = _uarg;
struct fscrypt_remove_key_arg arg;
struct key *key;
struct fscrypt_master_key *mk;
u32 status_flags = 0;
int err;
bool dead;
if (copy_from_user(&arg, uarg, sizeof(arg)))
return -EFAULT;
if (!valid_key_spec(&arg.key_spec))
return -EINVAL;
if (memchr_inv(arg.__reserved, 0, sizeof(arg.__reserved)))
return -EINVAL;
/*
* Only root can add and remove keys that are identified by an arbitrary
* descriptor rather than by a cryptographic hash.
*/
if (arg.key_spec.type == FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR &&
!capable(CAP_SYS_ADMIN))
return -EACCES;
/* Find the key being removed. */
key = fscrypt_find_master_key(sb, &arg.key_spec);
if (IS_ERR(key))
return PTR_ERR(key);
mk = key->payload.data[0];
down_write(&key->sem);
/* If relevant, remove current user's (or all users) claim to the key */
if (mk->mk_users && mk->mk_users->keys.nr_leaves_on_tree != 0) {
if (all_users)
err = keyring_clear(mk->mk_users);
else
err = remove_master_key_user(mk);
if (err) {
up_write(&key->sem);
goto out_put_key;
}
if (mk->mk_users->keys.nr_leaves_on_tree != 0) {
/*
* Other users have still added the key too. We removed
* the current user's claim to the key, but we still
* can't remove the key itself.
*/
status_flags |=
FSCRYPT_KEY_REMOVAL_STATUS_FLAG_OTHER_USERS;
err = 0;
up_write(&key->sem);
goto out_put_key;
}
}
/* No user claims remaining. Go ahead and wipe the secret. */
dead = false;
if (is_master_key_secret_present(&mk->mk_secret)) {
down_write(&mk->mk_secret_sem);
wipe_master_key_secret(&mk->mk_secret);
dead = refcount_dec_and_test(&mk->mk_refcount);
up_write(&mk->mk_secret_sem);
}
up_write(&key->sem);
if (dead) {
/*
* No inodes reference the key, and we wiped the secret, so the
* key object is free to be removed from the keyring.
*/
key_invalidate(key);
err = 0;
} else {
/* Some inodes still reference this key; try to evict them. */
err = try_to_lock_encrypted_files(sb, mk);
if (err == -EBUSY) {
status_flags |=
FSCRYPT_KEY_REMOVAL_STATUS_FLAG_FILES_BUSY;
err = 0;
}
}
/*
* We return 0 if we successfully did something: removed a claim to the
* key, wiped the secret, or tried locking the files again. Users need
* to check the informational status flags if they care whether the key
* has been fully removed including all files locked.
*/
out_put_key:
key_put(key);
if (err == 0)
err = put_user(status_flags, &uarg->removal_status_flags);
return err;
}
int fscrypt_ioctl_remove_key(struct file *filp, void __user *uarg)
{
return do_remove_key(filp, uarg, false);
}
EXPORT_SYMBOL_GPL(fscrypt_ioctl_remove_key);
int fscrypt_ioctl_remove_key_all_users(struct file *filp, void __user *uarg)
{
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
return do_remove_key(filp, uarg, true);
}
EXPORT_SYMBOL_GPL(fscrypt_ioctl_remove_key_all_users);
/*
* Retrieve the status of an fscrypt master encryption key.
*
* We set ->status to indicate whether the key is absent, present, or
* incompletely removed. "Incompletely removed" means that the master key
* secret has been removed, but some files which had been unlocked with it are
* still in use. This field allows applications to easily determine the state
* of an encrypted directory without using a hack such as trying to open a
* regular file in it (which can confuse the "incompletely removed" state with
* absent or present).
*
* In addition, for v2 policy keys we allow applications to determine, via
* ->status_flags and ->user_count, whether the key has been added by the
* current user, by other users, or by both. Most applications should not need
* this, since ordinarily only one user should know a given key. However, if a
* secret key is shared by multiple users, applications may wish to add an
* already-present key to prevent other users from removing it. This ioctl can
* be used to check whether that really is the case before the work is done to
* add the key --- which might e.g. require prompting the user for a passphrase.
*
* For more details, see the "FS_IOC_GET_ENCRYPTION_KEY_STATUS" section of
* Documentation/filesystems/fscrypt.rst.
*/
int fscrypt_ioctl_get_key_status(struct file *filp, void __user *uarg)
{
struct super_block *sb = file_inode(filp)->i_sb;
struct fscrypt_get_key_status_arg arg;
struct key *key;
struct fscrypt_master_key *mk;
int err;
if (copy_from_user(&arg, uarg, sizeof(arg)))
return -EFAULT;
if (!valid_key_spec(&arg.key_spec))
return -EINVAL;
if (memchr_inv(arg.__reserved, 0, sizeof(arg.__reserved)))
return -EINVAL;
arg.status_flags = 0;
arg.user_count = 0;
memset(arg.__out_reserved, 0, sizeof(arg.__out_reserved));
key = fscrypt_find_master_key(sb, &arg.key_spec);
if (IS_ERR(key)) {
if (key != ERR_PTR(-ENOKEY))
return PTR_ERR(key);
arg.status = FSCRYPT_KEY_STATUS_ABSENT;
err = 0;
goto out;
}
mk = key->payload.data[0];
down_read(&key->sem);
if (!is_master_key_secret_present(&mk->mk_secret)) {
arg.status = FSCRYPT_KEY_STATUS_INCOMPLETELY_REMOVED;
err = 0;
goto out_release_key;
}
arg.status = FSCRYPT_KEY_STATUS_PRESENT;
if (mk->mk_users) {
struct key *mk_user;
arg.user_count = mk->mk_users->keys.nr_leaves_on_tree;
mk_user = find_master_key_user(mk);
if (!IS_ERR(mk_user)) {
arg.status_flags |=
FSCRYPT_KEY_STATUS_FLAG_ADDED_BY_SELF;
key_put(mk_user);
} else if (mk_user != ERR_PTR(-ENOKEY)) {
err = PTR_ERR(mk_user);
goto out_release_key;
}
}
err = 0;
out_release_key:
up_read(&key->sem);
key_put(key);
out:
if (!err && copy_to_user(uarg, &arg, sizeof(arg)))
err = -EFAULT;
return err;
}
EXPORT_SYMBOL_GPL(fscrypt_ioctl_get_key_status);
int __init fscrypt_init_keyring(void)
{
int err;
err = register_key_type(&key_type_fscrypt);
if (err)
return err;
err = register_key_type(&key_type_fscrypt_user);
if (err)
goto err_unregister_fscrypt;
return 0;
err_unregister_fscrypt:
unregister_key_type(&key_type_fscrypt);
return err;
}

592
fs/crypto/keysetup.c Normal file
View File

@@ -0,0 +1,592 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Key setup facility for FS encryption support.
*
* Copyright (C) 2015, Google, Inc.
*
* Originally written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar.
* Heavily modified since then.
*/
#include <crypto/aes.h>
#include <crypto/sha.h>
#include <crypto/skcipher.h>
#include <linux/key.h>
#include "fscrypt_private.h"
static struct crypto_shash *essiv_hash_tfm;
static struct fscrypt_mode available_modes[] = {
[FSCRYPT_MODE_AES_256_XTS] = {
.friendly_name = "AES-256-XTS",
.cipher_str = "xts(aes)",
.keysize = 64,
.ivsize = 16,
},
[FSCRYPT_MODE_AES_256_CTS] = {
.friendly_name = "AES-256-CTS-CBC",
.cipher_str = "cts(cbc(aes))",
.keysize = 32,
.ivsize = 16,
},
[FSCRYPT_MODE_AES_128_CBC] = {
.friendly_name = "AES-128-CBC",
.cipher_str = "cbc(aes)",
.keysize = 16,
.ivsize = 16,
.needs_essiv = true,
},
[FSCRYPT_MODE_AES_128_CTS] = {
.friendly_name = "AES-128-CTS-CBC",
.cipher_str = "cts(cbc(aes))",
.keysize = 16,
.ivsize = 16,
},
[FSCRYPT_MODE_ADIANTUM] = {
.friendly_name = "Adiantum",
.cipher_str = "adiantum(xchacha12,aes)",
.keysize = 32,
.ivsize = 32,
},
};
static struct fscrypt_mode *
select_encryption_mode(const union fscrypt_policy *policy,
const struct inode *inode)
{
if (S_ISREG(inode->i_mode))
return &available_modes[fscrypt_policy_contents_mode(policy)];
if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
return &available_modes[fscrypt_policy_fnames_mode(policy)];
WARN_ONCE(1, "fscrypt: filesystem tried to load encryption info for inode %lu, which is not encryptable (file type %d)\n",
inode->i_ino, (inode->i_mode & S_IFMT));
return ERR_PTR(-EINVAL);
}
/* Create a symmetric cipher object for the given encryption mode and key */
struct crypto_skcipher *fscrypt_allocate_skcipher(struct fscrypt_mode *mode,
const u8 *raw_key,
const struct inode *inode)
{
struct crypto_skcipher *tfm;
int err;
tfm = crypto_alloc_skcipher(mode->cipher_str, 0, 0);
if (IS_ERR(tfm)) {
if (PTR_ERR(tfm) == -ENOENT) {
fscrypt_warn(inode,
"Missing crypto API support for %s (API name: \"%s\")",
mode->friendly_name, mode->cipher_str);
return ERR_PTR(-ENOPKG);
}
fscrypt_err(inode, "Error allocating '%s' transform: %ld",
mode->cipher_str, PTR_ERR(tfm));
return tfm;
}
if (unlikely(!mode->logged_impl_name)) {
/*
* fscrypt performance can vary greatly depending on which
* crypto algorithm implementation is used. Help people debug
* performance problems by logging the ->cra_driver_name the
* first time a mode is used. Note that multiple threads can
* race here, but it doesn't really matter.
*/
mode->logged_impl_name = true;
pr_info("fscrypt: %s using implementation \"%s\"\n",
mode->friendly_name,
crypto_skcipher_alg(tfm)->base.cra_driver_name);
}
crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
err = crypto_skcipher_setkey(tfm, raw_key, mode->keysize);
if (err)
goto err_free_tfm;
return tfm;
err_free_tfm:
crypto_free_skcipher(tfm);
return ERR_PTR(err);
}
static int derive_essiv_salt(const u8 *key, int keysize, u8 *salt)
{
struct crypto_shash *tfm = READ_ONCE(essiv_hash_tfm);
/* init hash transform on demand */
if (unlikely(!tfm)) {
struct crypto_shash *prev_tfm;
tfm = crypto_alloc_shash("sha256", 0, 0);
if (IS_ERR(tfm)) {
if (PTR_ERR(tfm) == -ENOENT) {
fscrypt_warn(NULL,
"Missing crypto API support for SHA-256");
return -ENOPKG;
}
fscrypt_err(NULL,
"Error allocating SHA-256 transform: %ld",
PTR_ERR(tfm));
return PTR_ERR(tfm);
}
prev_tfm = cmpxchg(&essiv_hash_tfm, NULL, tfm);
if (prev_tfm) {
crypto_free_shash(tfm);
tfm = prev_tfm;
}
}
{
SHASH_DESC_ON_STACK(desc, tfm);
desc->tfm = tfm;
desc->flags = 0;
return crypto_shash_digest(desc, key, keysize, salt);
}
}
static int init_essiv_generator(struct fscrypt_info *ci, const u8 *raw_key,
int keysize)
{
int err;
struct crypto_cipher *essiv_tfm;
u8 salt[SHA256_DIGEST_SIZE];
if (WARN_ON(ci->ci_mode->ivsize != AES_BLOCK_SIZE))
return -EINVAL;
essiv_tfm = crypto_alloc_cipher("aes", 0, 0);
if (IS_ERR(essiv_tfm))
return PTR_ERR(essiv_tfm);
ci->ci_essiv_tfm = essiv_tfm;
err = derive_essiv_salt(raw_key, keysize, salt);
if (err)
goto out;
/*
* Using SHA256 to derive the salt/key will result in AES-256 being
* used for IV generation. File contents encryption will still use the
* configured keysize (AES-128) nevertheless.
*/
err = crypto_cipher_setkey(essiv_tfm, salt, sizeof(salt));
if (err)
goto out;
out:
memzero_explicit(salt, sizeof(salt));
return err;
}
/* Given the per-file key, set up the file's crypto transform object(s) */
int fscrypt_set_derived_key(struct fscrypt_info *ci, const u8 *derived_key)
{
struct fscrypt_mode *mode = ci->ci_mode;
struct crypto_skcipher *ctfm;
int err;
ctfm = fscrypt_allocate_skcipher(mode, derived_key, ci->ci_inode);
if (IS_ERR(ctfm))
return PTR_ERR(ctfm);
ci->ci_ctfm = ctfm;
if (mode->needs_essiv) {
err = init_essiv_generator(ci, derived_key, mode->keysize);
if (err) {
fscrypt_warn(ci->ci_inode,
"Error initializing ESSIV generator: %d",
err);
return err;
}
}
return 0;
}
static int setup_per_mode_key(struct fscrypt_info *ci,
struct fscrypt_master_key *mk)
{
struct fscrypt_mode *mode = ci->ci_mode;
u8 mode_num = mode - available_modes;
struct crypto_skcipher *tfm, *prev_tfm;
u8 mode_key[FSCRYPT_MAX_KEY_SIZE];
int err;
if (WARN_ON(mode_num >= ARRAY_SIZE(mk->mk_mode_keys)))
return -EINVAL;
/* pairs with cmpxchg() below */
tfm = READ_ONCE(mk->mk_mode_keys[mode_num]);
if (likely(tfm != NULL))
goto done;
BUILD_BUG_ON(sizeof(mode_num) != 1);
err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf,
HKDF_CONTEXT_PER_MODE_KEY,
&mode_num, sizeof(mode_num),
mode_key, mode->keysize);
if (err)
return err;
tfm = fscrypt_allocate_skcipher(mode, mode_key, ci->ci_inode);
memzero_explicit(mode_key, mode->keysize);
if (IS_ERR(tfm))
return PTR_ERR(tfm);
/* pairs with READ_ONCE() above */
prev_tfm = cmpxchg(&mk->mk_mode_keys[mode_num], NULL, tfm);
if (prev_tfm != NULL) {
crypto_free_skcipher(tfm);
tfm = prev_tfm;
}
done:
ci->ci_ctfm = tfm;
return 0;
}
static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci,
struct fscrypt_master_key *mk)
{
u8 derived_key[FSCRYPT_MAX_KEY_SIZE];
int err;
if (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) {
/*
* DIRECT_KEY: instead of deriving per-file keys, the per-file
* nonce will be included in all the IVs. But unlike v1
* policies, for v2 policies in this case we don't encrypt with
* the master key directly but rather derive a per-mode key.
* This ensures that the master key is consistently used only
* for HKDF, avoiding key reuse issues.
*/
if (!fscrypt_mode_supports_direct_key(ci->ci_mode)) {
fscrypt_warn(ci->ci_inode,
"Direct key flag not allowed with %s",
ci->ci_mode->friendly_name);
return -EINVAL;
}
return setup_per_mode_key(ci, mk);
}
err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf,
HKDF_CONTEXT_PER_FILE_KEY,
ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE,
derived_key, ci->ci_mode->keysize);
if (err)
return err;
err = fscrypt_set_derived_key(ci, derived_key);
memzero_explicit(derived_key, ci->ci_mode->keysize);
return err;
}
/*
* Find the master key, then set up the inode's actual encryption key.
*
* If the master key is found in the filesystem-level keyring, then the
* corresponding 'struct key' is returned in *master_key_ret with
* ->mk_secret_sem read-locked. This is needed to ensure that only one task
* links the fscrypt_info into ->mk_decrypted_inodes (as multiple tasks may race
* to create an fscrypt_info for the same inode), and to synchronize the master
* key being removed with a new inode starting to use it.
*/
static int setup_file_encryption_key(struct fscrypt_info *ci,
struct key **master_key_ret)
{
struct key *key;
struct fscrypt_master_key *mk = NULL;
struct fscrypt_key_specifier mk_spec;
int err;
switch (ci->ci_policy.version) {
case FSCRYPT_POLICY_V1:
mk_spec.type = FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR;
memcpy(mk_spec.u.descriptor,
ci->ci_policy.v1.master_key_descriptor,
FSCRYPT_KEY_DESCRIPTOR_SIZE);
break;
case FSCRYPT_POLICY_V2:
mk_spec.type = FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER;
memcpy(mk_spec.u.identifier,
ci->ci_policy.v2.master_key_identifier,
FSCRYPT_KEY_IDENTIFIER_SIZE);
break;
default:
WARN_ON(1);
return -EINVAL;
}
key = fscrypt_find_master_key(ci->ci_inode->i_sb, &mk_spec);
if (IS_ERR(key)) {
if (key != ERR_PTR(-ENOKEY) ||
ci->ci_policy.version != FSCRYPT_POLICY_V1)
return PTR_ERR(key);
/*
* As a legacy fallback for v1 policies, search for the key in
* the current task's subscribed keyrings too. Don't move this
* to before the search of ->s_master_keys, since users
* shouldn't be able to override filesystem-level keys.
*/
return fscrypt_setup_v1_file_key_via_subscribed_keyrings(ci);
}
mk = key->payload.data[0];
down_read(&mk->mk_secret_sem);
/* Has the secret been removed (via FS_IOC_REMOVE_ENCRYPTION_KEY)? */
if (!is_master_key_secret_present(&mk->mk_secret)) {
err = -ENOKEY;
goto out_release_key;
}
/*
* Require that the master key be at least as long as the derived key.
* Otherwise, the derived key cannot possibly contain as much entropy as
* that required by the encryption mode it will be used for. For v1
* policies it's also required for the KDF to work at all.
*/
if (mk->mk_secret.size < ci->ci_mode->keysize) {
fscrypt_warn(NULL,
"key with %s %*phN is too short (got %u bytes, need %u+ bytes)",
master_key_spec_type(&mk_spec),
master_key_spec_len(&mk_spec), (u8 *)&mk_spec.u,
mk->mk_secret.size, ci->ci_mode->keysize);
err = -ENOKEY;
goto out_release_key;
}
switch (ci->ci_policy.version) {
case FSCRYPT_POLICY_V1:
err = fscrypt_setup_v1_file_key(ci, mk->mk_secret.raw);
break;
case FSCRYPT_POLICY_V2:
err = fscrypt_setup_v2_file_key(ci, mk);
break;
default:
WARN_ON(1);
err = -EINVAL;
break;
}
if (err)
goto out_release_key;
*master_key_ret = key;
return 0;
out_release_key:
up_read(&mk->mk_secret_sem);
key_put(key);
return err;
}
static void put_crypt_info(struct fscrypt_info *ci)
{
struct key *key;
if (!ci)
return;
if (ci->ci_direct_key) {
fscrypt_put_direct_key(ci->ci_direct_key);
} else if ((ci->ci_ctfm != NULL || ci->ci_essiv_tfm != NULL) &&
!fscrypt_is_direct_key_policy(&ci->ci_policy)) {
crypto_free_skcipher(ci->ci_ctfm);
crypto_free_cipher(ci->ci_essiv_tfm);
}
key = ci->ci_master_key;
if (key) {
struct fscrypt_master_key *mk = key->payload.data[0];
/*
* Remove this inode from the list of inodes that were unlocked
* with the master key.
*
* In addition, if we're removing the last inode from a key that
* already had its secret removed, invalidate the key so that it
* gets removed from ->s_master_keys.
*/
spin_lock(&mk->mk_decrypted_inodes_lock);
list_del(&ci->ci_master_key_link);
spin_unlock(&mk->mk_decrypted_inodes_lock);
if (refcount_dec_and_test(&mk->mk_refcount))
key_invalidate(key);
key_put(key);
}
kmem_cache_free(fscrypt_info_cachep, ci);
}
int fscrypt_get_encryption_info(struct inode *inode)
{
struct fscrypt_info *crypt_info;
union fscrypt_context ctx;
struct fscrypt_mode *mode;
struct key *master_key = NULL;
int res;
if (fscrypt_has_encryption_key(inode))
return 0;
res = fscrypt_initialize(inode->i_sb->s_cop->flags);
if (res)
return res;
res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
if (res < 0) {
if (!fscrypt_dummy_context_enabled(inode) ||
IS_ENCRYPTED(inode)) {
fscrypt_warn(inode,
"Error %d getting encryption context",
res);
return res;
}
/* Fake up a context for an unencrypted directory */
memset(&ctx, 0, sizeof(ctx));
ctx.version = FSCRYPT_CONTEXT_V1;
ctx.v1.contents_encryption_mode = FSCRYPT_MODE_AES_256_XTS;
ctx.v1.filenames_encryption_mode = FSCRYPT_MODE_AES_256_CTS;
memset(ctx.v1.master_key_descriptor, 0x42,
FSCRYPT_KEY_DESCRIPTOR_SIZE);
res = sizeof(ctx.v1);
}
crypt_info = kmem_cache_zalloc(fscrypt_info_cachep, GFP_NOFS);
if (!crypt_info)
return -ENOMEM;
crypt_info->ci_inode = inode;
res = fscrypt_policy_from_context(&crypt_info->ci_policy, &ctx, res);
if (res) {
fscrypt_warn(inode,
"Unrecognized or corrupt encryption context");
goto out;
}
switch (ctx.version) {
case FSCRYPT_CONTEXT_V1:
memcpy(crypt_info->ci_nonce, ctx.v1.nonce,
FS_KEY_DERIVATION_NONCE_SIZE);
break;
case FSCRYPT_CONTEXT_V2:
memcpy(crypt_info->ci_nonce, ctx.v2.nonce,
FS_KEY_DERIVATION_NONCE_SIZE);
break;
default:
WARN_ON(1);
res = -EINVAL;
goto out;
}
if (!fscrypt_supported_policy(&crypt_info->ci_policy, inode)) {
res = -EINVAL;
goto out;
}
mode = select_encryption_mode(&crypt_info->ci_policy, inode);
if (IS_ERR(mode)) {
res = PTR_ERR(mode);
goto out;
}
WARN_ON(mode->ivsize > FSCRYPT_MAX_IV_SIZE);
crypt_info->ci_mode = mode;
res = setup_file_encryption_key(crypt_info, &master_key);
if (res)
goto out;
if (cmpxchg_release(&inode->i_crypt_info, NULL, crypt_info) == NULL) {
if (master_key) {
struct fscrypt_master_key *mk =
master_key->payload.data[0];
refcount_inc(&mk->mk_refcount);
crypt_info->ci_master_key = key_get(master_key);
spin_lock(&mk->mk_decrypted_inodes_lock);
list_add(&crypt_info->ci_master_key_link,
&mk->mk_decrypted_inodes);
spin_unlock(&mk->mk_decrypted_inodes_lock);
}
crypt_info = NULL;
}
res = 0;
out:
if (master_key) {
struct fscrypt_master_key *mk = master_key->payload.data[0];
up_read(&mk->mk_secret_sem);
key_put(master_key);
}
if (res == -ENOKEY)
res = 0;
put_crypt_info(crypt_info);
return res;
}
EXPORT_SYMBOL(fscrypt_get_encryption_info);
/**
* fscrypt_put_encryption_info - free most of an inode's fscrypt data
*
* Free the inode's fscrypt_info. Filesystems must call this when the inode is
* being evicted. An RCU grace period need not have elapsed yet.
*/
void fscrypt_put_encryption_info(struct inode *inode)
{
put_crypt_info(inode->i_crypt_info);
inode->i_crypt_info = NULL;
}
EXPORT_SYMBOL(fscrypt_put_encryption_info);
/**
* fscrypt_free_inode - free an inode's fscrypt data requiring RCU delay
*
* Free the inode's cached decrypted symlink target, if any. Filesystems must
* call this after an RCU grace period, just before they free the inode.
*/
void fscrypt_free_inode(struct inode *inode)
{
if (IS_ENCRYPTED(inode) && S_ISLNK(inode->i_mode)) {
kfree(inode->i_link);
inode->i_link = NULL;
}
}
EXPORT_SYMBOL(fscrypt_free_inode);
/**
* fscrypt_drop_inode - check whether the inode's master key has been removed
*
* Filesystems supporting fscrypt must call this from their ->drop_inode()
* method so that encrypted inodes are evicted as soon as they're no longer in
* use and their master key has been removed.
*
* Return: 1 if fscrypt wants the inode to be evicted now, otherwise 0
*/
int fscrypt_drop_inode(struct inode *inode)
{
const struct fscrypt_info *ci = READ_ONCE(inode->i_crypt_info);
const struct fscrypt_master_key *mk;
/*
* If ci is NULL, then the inode doesn't have an encryption key set up
* so it's irrelevant. If ci_master_key is NULL, then the master key
* was provided via the legacy mechanism of the process-subscribed
* keyrings, so we don't know whether it's been removed or not.
*/
if (!ci || !ci->ci_master_key)
return 0;
mk = ci->ci_master_key->payload.data[0];
/*
* Note: since we aren't holding ->mk_secret_sem, the result here can
* immediately become outdated. But there's no correctness problem with
* unnecessarily evicting. Nor is there a correctness problem with not
* evicting while iput() is racing with the key being removed, since
* then the thread removing the key will either evict the inode itself
* or will correctly detect that it wasn't evicted due to the race.
*/
return !is_master_key_secret_present(&mk->mk_secret);
}
EXPORT_SYMBOL_GPL(fscrypt_drop_inode);

340
fs/crypto/keysetup_v1.c Normal file
View File

@@ -0,0 +1,340 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Key setup for v1 encryption policies
*
* Copyright 2015, 2019 Google LLC
*/
/*
* This file implements compatibility functions for the original encryption
* policy version ("v1"), including:
*
* - Deriving per-file keys using the AES-128-ECB based KDF
* (rather than the new method of using HKDF-SHA512)
*
* - Retrieving fscrypt master keys from process-subscribed keyrings
* (rather than the new method of using a filesystem-level keyring)
*
* - Handling policies with the DIRECT_KEY flag set using a master key table
* (rather than the new method of implementing DIRECT_KEY with per-mode keys
* managed alongside the master keys in the filesystem-level keyring)
*/
#include <crypto/algapi.h>
#include <crypto/skcipher.h>
#include <keys/user-type.h>
#include <linux/hashtable.h>
#include <linux/scatterlist.h>
#include "fscrypt_private.h"
/* Table of keys referenced by DIRECT_KEY policies */
static DEFINE_HASHTABLE(fscrypt_direct_keys, 6); /* 6 bits = 64 buckets */
static DEFINE_SPINLOCK(fscrypt_direct_keys_lock);
/*
* v1 key derivation function. This generates the derived key by encrypting the
* master key with AES-128-ECB using the nonce as the AES key. This provides a
* unique derived key with sufficient entropy for each inode. However, it's
* nonstandard, non-extensible, doesn't evenly distribute the entropy from the
* master key, and is trivially reversible: an attacker who compromises a
* derived key can "decrypt" it to get back to the master key, then derive any
* other key. For all new code, use HKDF instead.
*
* The master key must be at least as long as the derived key. If the master
* key is longer, then only the first 'derived_keysize' bytes are used.
*/
static int derive_key_aes(const u8 *master_key,
const u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE],
u8 *derived_key, unsigned int derived_keysize)
{
int res = 0;
struct skcipher_request *req = NULL;
DECLARE_CRYPTO_WAIT(wait);
struct scatterlist src_sg, dst_sg;
struct crypto_skcipher *tfm = crypto_alloc_skcipher("ecb(aes)", 0, 0);
if (IS_ERR(tfm)) {
res = PTR_ERR(tfm);
tfm = NULL;
goto out;
}
crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
req = skcipher_request_alloc(tfm, GFP_NOFS);
if (!req) {
res = -ENOMEM;
goto out;
}
skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
crypto_req_done, &wait);
res = crypto_skcipher_setkey(tfm, nonce, FS_KEY_DERIVATION_NONCE_SIZE);
if (res < 0)
goto out;
sg_init_one(&src_sg, master_key, derived_keysize);
sg_init_one(&dst_sg, derived_key, derived_keysize);
skcipher_request_set_crypt(req, &src_sg, &dst_sg, derived_keysize,
NULL);
res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
out:
skcipher_request_free(req);
crypto_free_skcipher(tfm);
return res;
}
/*
* Search the current task's subscribed keyrings for a "logon" key with
* description prefix:descriptor, and if found acquire a read lock on it and
* return a pointer to its validated payload in *payload_ret.
*/
static struct key *
find_and_lock_process_key(const char *prefix,
const u8 descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE],
unsigned int min_keysize,
const struct fscrypt_key **payload_ret)
{
char *description;
struct key *key;
const struct user_key_payload *ukp;
const struct fscrypt_key *payload;
description = kasprintf(GFP_NOFS, "%s%*phN", prefix,
FSCRYPT_KEY_DESCRIPTOR_SIZE, descriptor);
if (!description)
return ERR_PTR(-ENOMEM);
key = request_key(&key_type_logon, description, NULL);
kfree(description);
if (IS_ERR(key))
return key;
down_read(&key->sem);
ukp = user_key_payload_locked(key);
if (!ukp) /* was the key revoked before we acquired its semaphore? */
goto invalid;
payload = (const struct fscrypt_key *)ukp->data;
if (ukp->datalen != sizeof(struct fscrypt_key) ||
payload->size < 1 || payload->size > FSCRYPT_MAX_KEY_SIZE) {
fscrypt_warn(NULL,
"key with description '%s' has invalid payload",
key->description);
goto invalid;
}
if (payload->size < min_keysize) {
fscrypt_warn(NULL,
"key with description '%s' is too short (got %u bytes, need %u+ bytes)",
key->description, payload->size, min_keysize);
goto invalid;
}
*payload_ret = payload;
return key;
invalid:
up_read(&key->sem);
key_put(key);
return ERR_PTR(-ENOKEY);
}
/* Master key referenced by DIRECT_KEY policy */
struct fscrypt_direct_key {
struct hlist_node dk_node;
refcount_t dk_refcount;
const struct fscrypt_mode *dk_mode;
struct crypto_skcipher *dk_ctfm;
u8 dk_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
u8 dk_raw[FSCRYPT_MAX_KEY_SIZE];
};
static void free_direct_key(struct fscrypt_direct_key *dk)
{
if (dk) {
crypto_free_skcipher(dk->dk_ctfm);
kzfree(dk);
}
}
void fscrypt_put_direct_key(struct fscrypt_direct_key *dk)
{
if (!refcount_dec_and_lock(&dk->dk_refcount, &fscrypt_direct_keys_lock))
return;
hash_del(&dk->dk_node);
spin_unlock(&fscrypt_direct_keys_lock);
free_direct_key(dk);
}
/*
* Find/insert the given key into the fscrypt_direct_keys table. If found, it
* is returned with elevated refcount, and 'to_insert' is freed if non-NULL. If
* not found, 'to_insert' is inserted and returned if it's non-NULL; otherwise
* NULL is returned.
*/
static struct fscrypt_direct_key *
find_or_insert_direct_key(struct fscrypt_direct_key *to_insert,
const u8 *raw_key, const struct fscrypt_info *ci)
{
unsigned long hash_key;
struct fscrypt_direct_key *dk;
/*
* Careful: to avoid potentially leaking secret key bytes via timing
* information, we must key the hash table by descriptor rather than by
* raw key, and use crypto_memneq() when comparing raw keys.
*/
BUILD_BUG_ON(sizeof(hash_key) > FSCRYPT_KEY_DESCRIPTOR_SIZE);
memcpy(&hash_key, ci->ci_policy.v1.master_key_descriptor,
sizeof(hash_key));
spin_lock(&fscrypt_direct_keys_lock);
hash_for_each_possible(fscrypt_direct_keys, dk, dk_node, hash_key) {
if (memcmp(ci->ci_policy.v1.master_key_descriptor,
dk->dk_descriptor, FSCRYPT_KEY_DESCRIPTOR_SIZE) != 0)
continue;
if (ci->ci_mode != dk->dk_mode)
continue;
if (crypto_memneq(raw_key, dk->dk_raw, ci->ci_mode->keysize))
continue;
/* using existing tfm with same (descriptor, mode, raw_key) */
refcount_inc(&dk->dk_refcount);
spin_unlock(&fscrypt_direct_keys_lock);
free_direct_key(to_insert);
return dk;
}
if (to_insert)
hash_add(fscrypt_direct_keys, &to_insert->dk_node, hash_key);
spin_unlock(&fscrypt_direct_keys_lock);
return to_insert;
}
/* Prepare to encrypt directly using the master key in the given mode */
static struct fscrypt_direct_key *
fscrypt_get_direct_key(const struct fscrypt_info *ci, const u8 *raw_key)
{
struct fscrypt_direct_key *dk;
int err;
/* Is there already a tfm for this key? */
dk = find_or_insert_direct_key(NULL, raw_key, ci);
if (dk)
return dk;
/* Nope, allocate one. */
dk = kzalloc(sizeof(*dk), GFP_NOFS);
if (!dk)
return ERR_PTR(-ENOMEM);
refcount_set(&dk->dk_refcount, 1);
dk->dk_mode = ci->ci_mode;
dk->dk_ctfm = fscrypt_allocate_skcipher(ci->ci_mode, raw_key,
ci->ci_inode);
if (IS_ERR(dk->dk_ctfm)) {
err = PTR_ERR(dk->dk_ctfm);
dk->dk_ctfm = NULL;
goto err_free_dk;
}
memcpy(dk->dk_descriptor, ci->ci_policy.v1.master_key_descriptor,
FSCRYPT_KEY_DESCRIPTOR_SIZE);
memcpy(dk->dk_raw, raw_key, ci->ci_mode->keysize);
return find_or_insert_direct_key(dk, raw_key, ci);
err_free_dk:
free_direct_key(dk);
return ERR_PTR(err);
}
/* v1 policy, DIRECT_KEY: use the master key directly */
static int setup_v1_file_key_direct(struct fscrypt_info *ci,
const u8 *raw_master_key)
{
const struct fscrypt_mode *mode = ci->ci_mode;
struct fscrypt_direct_key *dk;
if (!fscrypt_mode_supports_direct_key(mode)) {
fscrypt_warn(ci->ci_inode,
"Direct key mode not allowed with %s",
mode->friendly_name);
return -EINVAL;
}
if (ci->ci_policy.v1.contents_encryption_mode !=
ci->ci_policy.v1.filenames_encryption_mode) {
fscrypt_warn(ci->ci_inode,
"Direct key mode not allowed with different contents and filenames modes");
return -EINVAL;
}
/* ESSIV implies 16-byte IVs which implies !DIRECT_KEY */
if (WARN_ON(mode->needs_essiv))
return -EINVAL;
dk = fscrypt_get_direct_key(ci, raw_master_key);
if (IS_ERR(dk))
return PTR_ERR(dk);
ci->ci_direct_key = dk;
ci->ci_ctfm = dk->dk_ctfm;
return 0;
}
/* v1 policy, !DIRECT_KEY: derive the file's encryption key */
static int setup_v1_file_key_derived(struct fscrypt_info *ci,
const u8 *raw_master_key)
{
u8 *derived_key;
int err;
/*
* This cannot be a stack buffer because it will be passed to the
* scatterlist crypto API during derive_key_aes().
*/
derived_key = kmalloc(ci->ci_mode->keysize, GFP_NOFS);
if (!derived_key)
return -ENOMEM;
err = derive_key_aes(raw_master_key, ci->ci_nonce,
derived_key, ci->ci_mode->keysize);
if (err)
goto out;
err = fscrypt_set_derived_key(ci, derived_key);
out:
kzfree(derived_key);
return err;
}
int fscrypt_setup_v1_file_key(struct fscrypt_info *ci, const u8 *raw_master_key)
{
if (ci->ci_policy.v1.flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY)
return setup_v1_file_key_direct(ci, raw_master_key);
else
return setup_v1_file_key_derived(ci, raw_master_key);
}
int fscrypt_setup_v1_file_key_via_subscribed_keyrings(struct fscrypt_info *ci)
{
struct key *key;
const struct fscrypt_key *payload;
int err;
key = find_and_lock_process_key(FSCRYPT_KEY_DESC_PREFIX,
ci->ci_policy.v1.master_key_descriptor,
ci->ci_mode->keysize, &payload);
if (key == ERR_PTR(-ENOKEY) && ci->ci_inode->i_sb->s_cop->key_prefix) {
key = find_and_lock_process_key(ci->ci_inode->i_sb->s_cop->key_prefix,
ci->ci_policy.v1.master_key_descriptor,
ci->ci_mode->keysize, &payload);
}
if (IS_ERR(key))
return PTR_ERR(key);
err = fscrypt_setup_v1_file_key(ci, payload->raw);
up_read(&key->sem);
key_put(key);
return err;
}

View File

@@ -5,8 +5,9 @@
* Copyright (C) 2015, Google, Inc.
* Copyright (C) 2015, Motorola Mobility.
*
* Written by Michael Halcrow, 2015.
* Originally written by Michael Halcrow, 2015.
* Modified by Jaegeuk Kim, 2015.
* Modified by Eric Biggers, 2019 for v2 policy support.
*/
#include <linux/random.h>
@@ -14,70 +15,303 @@
#include <linux/mount.h>
#include "fscrypt_private.h"
/*
* check whether an encryption policy is consistent with an encryption context
/**
* fscrypt_policies_equal - check whether two encryption policies are the same
*
* Return: %true if equal, else %false
*/
static bool is_encryption_context_consistent_with_policy(
const struct fscrypt_context *ctx,
const struct fscrypt_policy *policy)
bool fscrypt_policies_equal(const union fscrypt_policy *policy1,
const union fscrypt_policy *policy2)
{
return memcmp(ctx->master_key_descriptor, policy->master_key_descriptor,
FS_KEY_DESCRIPTOR_SIZE) == 0 &&
(ctx->flags == policy->flags) &&
(ctx->contents_encryption_mode ==
policy->contents_encryption_mode) &&
(ctx->filenames_encryption_mode ==
policy->filenames_encryption_mode);
if (policy1->version != policy2->version)
return false;
return !memcmp(policy1, policy2, fscrypt_policy_size(policy1));
}
static int create_encryption_context_from_policy(struct inode *inode,
const struct fscrypt_policy *policy)
/**
* fscrypt_supported_policy - check whether an encryption policy is supported
*
* Given an encryption policy, check whether all its encryption modes and other
* settings are supported by this kernel. (But we don't currently don't check
* for crypto API support here, so attempting to use an algorithm not configured
* into the crypto API will still fail later.)
*
* Return: %true if supported, else %false
*/
bool fscrypt_supported_policy(const union fscrypt_policy *policy_u,
const struct inode *inode)
{
struct fscrypt_context ctx;
switch (policy_u->version) {
case FSCRYPT_POLICY_V1: {
const struct fscrypt_policy_v1 *policy = &policy_u->v1;
ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
FS_KEY_DESCRIPTOR_SIZE);
if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode,
policy->filenames_encryption_mode)) {
fscrypt_warn(inode,
"Unsupported encryption modes (contents %d, filenames %d)",
policy->contents_encryption_mode,
policy->filenames_encryption_mode);
return false;
}
if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode,
policy->filenames_encryption_mode))
if (policy->flags & ~FSCRYPT_POLICY_FLAGS_VALID) {
fscrypt_warn(inode,
"Unsupported encryption flags (0x%02x)",
policy->flags);
return false;
}
return true;
}
case FSCRYPT_POLICY_V2: {
const struct fscrypt_policy_v2 *policy = &policy_u->v2;
if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode,
policy->filenames_encryption_mode)) {
fscrypt_warn(inode,
"Unsupported encryption modes (contents %d, filenames %d)",
policy->contents_encryption_mode,
policy->filenames_encryption_mode);
return false;
}
if (policy->flags & ~FSCRYPT_POLICY_FLAGS_VALID) {
fscrypt_warn(inode,
"Unsupported encryption flags (0x%02x)",
policy->flags);
return false;
}
if (memchr_inv(policy->__reserved, 0,
sizeof(policy->__reserved))) {
fscrypt_warn(inode,
"Reserved bits set in encryption policy");
return false;
}
return true;
}
}
return false;
}
/**
* fscrypt_new_context_from_policy - create a new fscrypt_context from a policy
*
* Create an fscrypt_context for an inode that is being assigned the given
* encryption policy. A new nonce is randomly generated.
*
* Return: the size of the new context in bytes.
*/
static int fscrypt_new_context_from_policy(union fscrypt_context *ctx_u,
const union fscrypt_policy *policy_u)
{
memset(ctx_u, 0, sizeof(*ctx_u));
switch (policy_u->version) {
case FSCRYPT_POLICY_V1: {
const struct fscrypt_policy_v1 *policy = &policy_u->v1;
struct fscrypt_context_v1 *ctx = &ctx_u->v1;
ctx->version = FSCRYPT_CONTEXT_V1;
ctx->contents_encryption_mode =
policy->contents_encryption_mode;
ctx->filenames_encryption_mode =
policy->filenames_encryption_mode;
ctx->flags = policy->flags;
memcpy(ctx->master_key_descriptor,
policy->master_key_descriptor,
sizeof(ctx->master_key_descriptor));
get_random_bytes(ctx->nonce, sizeof(ctx->nonce));
return sizeof(*ctx);
}
case FSCRYPT_POLICY_V2: {
const struct fscrypt_policy_v2 *policy = &policy_u->v2;
struct fscrypt_context_v2 *ctx = &ctx_u->v2;
ctx->version = FSCRYPT_CONTEXT_V2;
ctx->contents_encryption_mode =
policy->contents_encryption_mode;
ctx->filenames_encryption_mode =
policy->filenames_encryption_mode;
ctx->flags = policy->flags;
memcpy(ctx->master_key_identifier,
policy->master_key_identifier,
sizeof(ctx->master_key_identifier));
get_random_bytes(ctx->nonce, sizeof(ctx->nonce));
return sizeof(*ctx);
}
}
BUG();
}
/**
* fscrypt_policy_from_context - convert an fscrypt_context to an fscrypt_policy
*
* Given an fscrypt_context, build the corresponding fscrypt_policy.
*
* Return: 0 on success, or -EINVAL if the fscrypt_context has an unrecognized
* version number or size.
*
* This does *not* validate the settings within the policy itself, e.g. the
* modes, flags, and reserved bits. Use fscrypt_supported_policy() for that.
*/
int fscrypt_policy_from_context(union fscrypt_policy *policy_u,
const union fscrypt_context *ctx_u,
int ctx_size)
{
memset(policy_u, 0, sizeof(*policy_u));
if (ctx_size <= 0 || ctx_size != fscrypt_context_size(ctx_u))
return -EINVAL;
if (policy->flags & ~FS_POLICY_FLAGS_VALID)
switch (ctx_u->version) {
case FSCRYPT_CONTEXT_V1: {
const struct fscrypt_context_v1 *ctx = &ctx_u->v1;
struct fscrypt_policy_v1 *policy = &policy_u->v1;
policy->version = FSCRYPT_POLICY_V1;
policy->contents_encryption_mode =
ctx->contents_encryption_mode;
policy->filenames_encryption_mode =
ctx->filenames_encryption_mode;
policy->flags = ctx->flags;
memcpy(policy->master_key_descriptor,
ctx->master_key_descriptor,
sizeof(policy->master_key_descriptor));
return 0;
}
case FSCRYPT_CONTEXT_V2: {
const struct fscrypt_context_v2 *ctx = &ctx_u->v2;
struct fscrypt_policy_v2 *policy = &policy_u->v2;
policy->version = FSCRYPT_POLICY_V2;
policy->contents_encryption_mode =
ctx->contents_encryption_mode;
policy->filenames_encryption_mode =
ctx->filenames_encryption_mode;
policy->flags = ctx->flags;
memcpy(policy->__reserved, ctx->__reserved,
sizeof(policy->__reserved));
memcpy(policy->master_key_identifier,
ctx->master_key_identifier,
sizeof(policy->master_key_identifier));
return 0;
}
}
/* unreachable */
return -EINVAL;
}
/* Retrieve an inode's encryption policy */
static int fscrypt_get_policy(struct inode *inode, union fscrypt_policy *policy)
{
const struct fscrypt_info *ci;
union fscrypt_context ctx;
int ret;
ci = READ_ONCE(inode->i_crypt_info);
if (ci) {
/* key available, use the cached policy */
*policy = ci->ci_policy;
return 0;
}
if (!IS_ENCRYPTED(inode))
return -ENODATA;
ret = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
if (ret < 0)
return (ret == -ERANGE) ? -EINVAL : ret;
return fscrypt_policy_from_context(policy, &ctx, ret);
}
static int set_encryption_policy(struct inode *inode,
const union fscrypt_policy *policy)
{
union fscrypt_context ctx;
int ctxsize;
int err;
if (!fscrypt_supported_policy(policy, inode))
return -EINVAL;
ctx.contents_encryption_mode = policy->contents_encryption_mode;
ctx.filenames_encryption_mode = policy->filenames_encryption_mode;
ctx.flags = policy->flags;
BUILD_BUG_ON(sizeof(ctx.nonce) != FS_KEY_DERIVATION_NONCE_SIZE);
get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
switch (policy->version) {
case FSCRYPT_POLICY_V1:
/*
* The original encryption policy version provided no way of
* verifying that the correct master key was supplied, which was
* insecure in scenarios where multiple users have access to the
* same encrypted files (even just read-only access). The new
* encryption policy version fixes this and also implies use of
* an improved key derivation function and allows non-root users
* to securely remove keys. So as long as compatibility with
* old kernels isn't required, it is recommended to use the new
* policy version for all new encrypted directories.
*/
pr_warn_once("%s (pid %d) is setting deprecated v1 encryption policy; recommend upgrading to v2.\n",
current->comm, current->pid);
break;
case FSCRYPT_POLICY_V2:
err = fscrypt_verify_key_added(inode->i_sb,
policy->v2.master_key_identifier);
if (err)
return err;
break;
default:
WARN_ON(1);
return -EINVAL;
}
return inode->i_sb->s_cop->set_context(inode, &ctx, sizeof(ctx), NULL);
ctxsize = fscrypt_new_context_from_policy(&ctx, policy);
return inode->i_sb->s_cop->set_context(inode, &ctx, ctxsize, NULL);
}
int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg)
{
struct fscrypt_policy policy;
union fscrypt_policy policy;
union fscrypt_policy existing_policy;
struct inode *inode = file_inode(filp);
u8 version;
int size;
int ret;
struct fscrypt_context ctx;
if (copy_from_user(&policy, arg, sizeof(policy)))
if (get_user(policy.version, (const u8 __user *)arg))
return -EFAULT;
size = fscrypt_policy_size(&policy);
if (size <= 0)
return -EINVAL;
/*
* We should just copy the remaining 'size - 1' bytes here, but a
* bizarre bug in gcc 7 and earlier (fixed by gcc r255731) causes gcc to
* think that size can be 0 here (despite the check above!) *and* that
* it's a compile-time constant. Thus it would think copy_from_user()
* is passed compile-time constant ULONG_MAX, causing the compile-time
* buffer overflow check to fail, breaking the build. This only occurred
* when building an i386 kernel with -Os and branch profiling enabled.
*
* Work around it by just copying the first byte again...
*/
version = policy.version;
if (copy_from_user(&policy, arg, size))
return -EFAULT;
policy.version = version;
if (!inode_owner_or_capable(inode))
return -EACCES;
if (policy.version != 0)
return -EINVAL;
ret = mnt_want_write_file(filp);
if (ret)
return ret;
inode_lock(inode);
ret = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
ret = fscrypt_get_policy(inode, &existing_policy);
if (ret == -ENODATA) {
if (!S_ISDIR(inode->i_mode))
ret = -ENOTDIR;
@@ -86,14 +320,10 @@ int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg)
else if (!inode->i_sb->s_cop->empty_dir(inode))
ret = -ENOTEMPTY;
else
ret = create_encryption_context_from_policy(inode,
&policy);
} else if (ret == sizeof(ctx) &&
is_encryption_context_consistent_with_policy(&ctx,
&policy)) {
/* The file already uses the same encryption policy. */
ret = 0;
} else if (ret >= 0 || ret == -ERANGE) {
ret = set_encryption_policy(inode, &policy);
} else if (ret == -EINVAL ||
(ret == 0 && !fscrypt_policies_equal(&policy,
&existing_policy))) {
/* The file already uses a different encryption policy. */
ret = -EEXIST;
}
@@ -105,37 +335,57 @@ int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg)
}
EXPORT_SYMBOL(fscrypt_ioctl_set_policy);
/* Original ioctl version; can only get the original policy version */
int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg)
{
struct inode *inode = file_inode(filp);
struct fscrypt_context ctx;
struct fscrypt_policy policy;
int res;
union fscrypt_policy policy;
int err;
if (!IS_ENCRYPTED(inode))
return -ENODATA;
err = fscrypt_get_policy(file_inode(filp), &policy);
if (err)
return err;
res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
if (res < 0 && res != -ERANGE)
return res;
if (res != sizeof(ctx))
return -EINVAL;
if (ctx.format != FS_ENCRYPTION_CONTEXT_FORMAT_V1)
if (policy.version != FSCRYPT_POLICY_V1)
return -EINVAL;
policy.version = 0;
policy.contents_encryption_mode = ctx.contents_encryption_mode;
policy.filenames_encryption_mode = ctx.filenames_encryption_mode;
policy.flags = ctx.flags;
memcpy(policy.master_key_descriptor, ctx.master_key_descriptor,
FS_KEY_DESCRIPTOR_SIZE);
if (copy_to_user(arg, &policy, sizeof(policy)))
if (copy_to_user(arg, &policy, sizeof(policy.v1)))
return -EFAULT;
return 0;
}
EXPORT_SYMBOL(fscrypt_ioctl_get_policy);
/* Extended ioctl version; can get policies of any version */
int fscrypt_ioctl_get_policy_ex(struct file *filp, void __user *uarg)
{
struct fscrypt_get_policy_ex_arg arg;
union fscrypt_policy *policy = (union fscrypt_policy *)&arg.policy;
size_t policy_size;
int err;
/* arg is policy_size, then policy */
BUILD_BUG_ON(offsetof(typeof(arg), policy_size) != 0);
BUILD_BUG_ON(offsetofend(typeof(arg), policy_size) !=
offsetof(typeof(arg), policy));
BUILD_BUG_ON(sizeof(arg.policy) != sizeof(*policy));
err = fscrypt_get_policy(file_inode(filp), policy);
if (err)
return err;
policy_size = fscrypt_policy_size(policy);
if (copy_from_user(&arg, uarg, sizeof(arg.policy_size)))
return -EFAULT;
if (policy_size > arg.policy_size)
return -EOVERFLOW;
arg.policy_size = policy_size;
if (copy_to_user(uarg, &arg, sizeof(arg.policy_size) + policy_size))
return -EFAULT;
return 0;
}
EXPORT_SYMBOL_GPL(fscrypt_ioctl_get_policy_ex);
/**
* fscrypt_has_permitted_context() - is a file's encryption policy permitted
* within its directory?
@@ -157,10 +407,8 @@ EXPORT_SYMBOL(fscrypt_ioctl_get_policy);
*/
int fscrypt_has_permitted_context(struct inode *parent, struct inode *child)
{
const struct fscrypt_operations *cops = parent->i_sb->s_cop;
const struct fscrypt_info *parent_ci, *child_ci;
struct fscrypt_context parent_ctx, child_ctx;
int res;
union fscrypt_policy parent_policy, child_policy;
int err;
/* No restrictions on file types which are never encrypted */
if (!S_ISREG(child->i_mode) && !S_ISDIR(child->i_mode) &&
@@ -190,41 +438,22 @@ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child)
* In any case, if an unexpected error occurs, fall back to "forbidden".
*/
res = fscrypt_get_encryption_info(parent);
if (res)
err = fscrypt_get_encryption_info(parent);
if (err)
return 0;
res = fscrypt_get_encryption_info(child);
if (res)
return 0;
parent_ci = READ_ONCE(parent->i_crypt_info);
child_ci = READ_ONCE(child->i_crypt_info);
if (parent_ci && child_ci) {
return memcmp(parent_ci->ci_master_key_descriptor,
child_ci->ci_master_key_descriptor,
FS_KEY_DESCRIPTOR_SIZE) == 0 &&
(parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
(parent_ci->ci_filename_mode ==
child_ci->ci_filename_mode) &&
(parent_ci->ci_flags == child_ci->ci_flags);
}
res = cops->get_context(parent, &parent_ctx, sizeof(parent_ctx));
if (res != sizeof(parent_ctx))
err = fscrypt_get_encryption_info(child);
if (err)
return 0;
res = cops->get_context(child, &child_ctx, sizeof(child_ctx));
if (res != sizeof(child_ctx))
err = fscrypt_get_policy(parent, &parent_policy);
if (err)
return 0;
return memcmp(parent_ctx.master_key_descriptor,
child_ctx.master_key_descriptor,
FS_KEY_DESCRIPTOR_SIZE) == 0 &&
(parent_ctx.contents_encryption_mode ==
child_ctx.contents_encryption_mode) &&
(parent_ctx.filenames_encryption_mode ==
child_ctx.filenames_encryption_mode) &&
(parent_ctx.flags == child_ctx.flags);
err = fscrypt_get_policy(child, &child_policy);
if (err)
return 0;
return fscrypt_policies_equal(&parent_policy, &child_policy);
}
EXPORT_SYMBOL(fscrypt_has_permitted_context);
@@ -240,7 +469,8 @@ EXPORT_SYMBOL(fscrypt_has_permitted_context);
int fscrypt_inherit_context(struct inode *parent, struct inode *child,
void *fs_data, bool preload)
{
struct fscrypt_context ctx;
union fscrypt_context ctx;
int ctxsize;
struct fscrypt_info *ci;
int res;
@@ -252,16 +482,10 @@ int fscrypt_inherit_context(struct inode *parent, struct inode *child,
if (ci == NULL)
return -ENOKEY;
ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
ctx.contents_encryption_mode = ci->ci_data_mode;
ctx.filenames_encryption_mode = ci->ci_filename_mode;
ctx.flags = ci->ci_flags;
memcpy(ctx.master_key_descriptor, ci->ci_master_key_descriptor,
FS_KEY_DESCRIPTOR_SIZE);
get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
ctxsize = fscrypt_new_context_from_policy(&ctx, &ci->ci_policy);
BUILD_BUG_ON(sizeof(ctx) != FSCRYPT_SET_CONTEXT_MAX_SIZE);
res = parent->i_sb->s_cop->set_context(child, &ctx,
sizeof(ctx), fs_data);
res = parent->i_sb->s_cop->set_context(child, &ctx, ctxsize, fs_data);
if (res)
return res;
return preload ? fscrypt_get_encryption_info(child): 0;

View File

@@ -13,3 +13,4 @@ ext4-y := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \
ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o
ext4-$(CONFIG_FS_VERITY) += verity.o

View File

@@ -26,6 +26,7 @@
#include <linux/buffer_head.h>
#include <linux/slab.h>
#include <linux/iversion.h>
#include <linux/unicode.h>
#include "ext4.h"
#include "xattr.h"
@@ -659,3 +660,51 @@ const struct file_operations ext4_dir_operations = {
.open = ext4_dir_open,
.release = ext4_release_dir,
};
#ifdef CONFIG_UNICODE
static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
const char *str, const struct qstr *name)
{
struct qstr qstr = {.name = str, .len = len };
struct inode *inode = dentry->d_parent->d_inode;
if (!IS_CASEFOLDED(inode) || !EXT4_SB(inode->i_sb)->s_encoding) {
if (len != name->len)
return -1;
return memcmp(str, name->name, len);
}
return ext4_ci_compare(inode, name, &qstr, false);
}
static int ext4_d_hash(const struct dentry *dentry, struct qstr *str)
{
const struct ext4_sb_info *sbi = EXT4_SB(dentry->d_sb);
const struct unicode_map *um = sbi->s_encoding;
unsigned char *norm;
int len, ret = 0;
if (!IS_CASEFOLDED(dentry->d_inode) || !um)
return 0;
norm = kmalloc(PATH_MAX, GFP_ATOMIC);
if (!norm)
return -ENOMEM;
len = utf8_casefold(um, str, norm, PATH_MAX);
if (len < 0) {
if (ext4_has_strict_mode(sbi))
ret = -EINVAL;
goto out;
}
str->hash = full_name_hash(dentry, norm, len);
out:
kfree(norm);
return ret;
}
const struct dentry_operations ext4_dentry_ops = {
.d_hash = ext4_d_hash,
.d_compare = ext4_d_compare,
};
#endif

View File

@@ -41,6 +41,7 @@
#endif
#include <linux/fscrypt.h>
#include <linux/fsverity.h>
#include <linux/compiler.h>
@@ -412,14 +413,16 @@ struct flex_groups {
#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */
#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
#define EXT4_VERITY_FL 0x00100000 /* Verity protected inode */
#define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */
#define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */
#define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
#define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
#define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded file */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
#define EXT4_FL_USER_VISIBLE 0x304BDFFF /* User visible flags */
#define EXT4_FL_USER_MODIFIABLE 0x204BC0FF /* User modifiable flags */
#define EXT4_FL_USER_VISIBLE 0x705BDFFF /* User visible flags */
#define EXT4_FL_USER_MODIFIABLE 0x604BC0FF /* User modifiable flags */
/* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */
#define EXT4_FL_XFLAG_VISIBLE (EXT4_SYNC_FL | \
@@ -434,10 +437,10 @@ struct flex_groups {
EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
EXT4_PROJINHERIT_FL)
EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL)
/* Flags that are appropriate for regular files (all but dir-specific ones). */
#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL))
/* Flags that are appropriate for non-directories/regular files. */
#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL)
@@ -482,6 +485,7 @@ enum {
EXT4_INODE_TOPDIR = 17, /* Top of directory hierarchies*/
EXT4_INODE_HUGE_FILE = 18, /* Set to each huge file */
EXT4_INODE_EXTENTS = 19, /* Inode uses extents */
EXT4_INODE_VERITY = 20, /* Verity protected inode */
EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */
EXT4_INODE_EOFBLOCKS = 22, /* Blocks allocated beyond EOF */
EXT4_INODE_INLINE_DATA = 28, /* Data in inode. */
@@ -527,6 +531,7 @@ static inline void ext4_check_flag_values(void)
CHECK_FLAG_VALUE(TOPDIR);
CHECK_FLAG_VALUE(HUGE_FILE);
CHECK_FLAG_VALUE(EXTENTS);
CHECK_FLAG_VALUE(VERITY);
CHECK_FLAG_VALUE(EA_INODE);
CHECK_FLAG_VALUE(EOFBLOCKS);
CHECK_FLAG_VALUE(INLINE_DATA);
@@ -1326,7 +1331,9 @@ struct ext4_super_block {
__u8 s_first_error_time_hi;
__u8 s_last_error_time_hi;
__u8 s_pad[2];
__le32 s_reserved[96]; /* Padding to the end of the block */
__le16 s_encoding; /* Filename charset encoding */
__le16 s_encoding_flags; /* Filename charset encoding flags */
__le32 s_reserved[95]; /* Padding to the end of the block */
__le32 s_checksum; /* crc32c(superblock) */
};
@@ -1351,6 +1358,16 @@ struct ext4_super_block {
/* Number of quota types we support */
#define EXT4_MAXQUOTAS 3
#define EXT4_ENC_UTF8_12_1 1
/*
* Flags for ext4_sb_info.s_encoding_flags.
*/
#define EXT4_ENC_STRICT_MODE_FL (1 << 0)
#define ext4_has_strict_mode(sbi) \
(sbi->s_encoding_flags & EXT4_ENC_STRICT_MODE_FL)
/*
* fourth extended-fs super-block data in memory
*/
@@ -1400,6 +1417,10 @@ struct ext4_sb_info {
struct kobject s_kobj;
struct completion s_kobj_unregister;
struct super_block *s_sb;
#ifdef CONFIG_UNICODE
struct unicode_map *s_encoding;
__u16 s_encoding_flags;
#endif
/* Journaling */
struct journal_s *s_journal;
@@ -1555,6 +1576,7 @@ enum {
EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */
EXT4_STATE_EXT_PRECACHED, /* extents have been precached */
EXT4_STATE_LUSTRE_EA_INODE, /* Lustre-style ea_inode */
EXT4_STATE_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */
};
#define EXT4_INODE_BIT_FNS(name, field, offset) \
@@ -1605,9 +1627,12 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#define EXT4_SB(sb) (sb)
#endif
/*
* Returns true if the inode is inode is encrypted
*/
static inline bool ext4_verity_in_progress(struct inode *inode)
{
return IS_ENABLED(CONFIG_FS_VERITY) &&
ext4_test_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
}
#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime
/*
@@ -1660,6 +1685,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400
#define EXT4_FEATURE_RO_COMPAT_READONLY 0x1000
#define EXT4_FEATURE_RO_COMPAT_PROJECT 0x2000
#define EXT4_FEATURE_RO_COMPAT_VERITY 0x8000
#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
@@ -1676,6 +1702,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */
#define EXT4_FEATURE_INCOMPAT_INLINE_DATA 0x8000 /* data in inode */
#define EXT4_FEATURE_INCOMPAT_ENCRYPT 0x10000
#define EXT4_FEATURE_INCOMPAT_CASEFOLD 0x20000
extern void ext4_update_dynamic_rev(struct super_block *sb);
@@ -1753,6 +1780,7 @@ EXT4_FEATURE_RO_COMPAT_FUNCS(bigalloc, BIGALLOC)
EXT4_FEATURE_RO_COMPAT_FUNCS(metadata_csum, METADATA_CSUM)
EXT4_FEATURE_RO_COMPAT_FUNCS(readonly, READONLY)
EXT4_FEATURE_RO_COMPAT_FUNCS(project, PROJECT)
EXT4_FEATURE_RO_COMPAT_FUNCS(verity, VERITY)
EXT4_FEATURE_INCOMPAT_FUNCS(compression, COMPRESSION)
EXT4_FEATURE_INCOMPAT_FUNCS(filetype, FILETYPE)
@@ -1769,6 +1797,7 @@ EXT4_FEATURE_INCOMPAT_FUNCS(csum_seed, CSUM_SEED)
EXT4_FEATURE_INCOMPAT_FUNCS(largedir, LARGEDIR)
EXT4_FEATURE_INCOMPAT_FUNCS(inline_data, INLINE_DATA)
EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
EXT4_FEATURE_INCOMPAT_FUNCS(casefold, CASEFOLD)
#define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR
#define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
@@ -1796,6 +1825,7 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
EXT4_FEATURE_INCOMPAT_MMP | \
EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
EXT4_FEATURE_INCOMPAT_ENCRYPT | \
EXT4_FEATURE_INCOMPAT_CASEFOLD | \
EXT4_FEATURE_INCOMPAT_CSUM_SEED | \
EXT4_FEATURE_INCOMPAT_LARGEDIR)
#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
@@ -1808,7 +1838,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
EXT4_FEATURE_RO_COMPAT_BIGALLOC |\
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\
EXT4_FEATURE_RO_COMPAT_QUOTA |\
EXT4_FEATURE_RO_COMPAT_PROJECT)
EXT4_FEATURE_RO_COMPAT_PROJECT |\
EXT4_FEATURE_RO_COMPAT_VERITY)
#define EXTN_FEATURE_FUNCS(ver) \
static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \
@@ -2074,6 +2105,9 @@ struct ext4_filename {
#ifdef CONFIG_FS_ENCRYPTION
struct fscrypt_str crypto_buf;
#endif
#ifdef CONFIG_UNICODE
struct fscrypt_str cf_name;
#endif
};
#define fname_name(p) ((p)->disk_name.name)
@@ -2299,6 +2333,12 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
struct ext4_group_desc *gdp);
ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
#ifdef CONFIG_UNICODE
extern void ext4_fname_setup_ci_filename(struct inode *dir,
const struct qstr *iname,
struct fscrypt_str *fname);
#endif
#ifdef CONFIG_FS_ENCRYPTION
static inline void ext4_fname_from_fscrypt_name(struct ext4_filename *dst,
const struct fscrypt_name *src)
@@ -2325,6 +2365,10 @@ static inline int ext4_fname_setup_filename(struct inode *dir,
return err;
ext4_fname_from_fscrypt_name(fname, &name);
#ifdef CONFIG_UNICODE
ext4_fname_setup_ci_filename(dir, iname, &fname->cf_name);
#endif
return 0;
}
@@ -2340,6 +2384,10 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir,
return err;
ext4_fname_from_fscrypt_name(fname, &name);
#ifdef CONFIG_UNICODE
ext4_fname_setup_ci_filename(dir, &dentry->d_name, &fname->cf_name);
#endif
return 0;
}
@@ -2353,6 +2401,11 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname)
fname->crypto_buf.name = NULL;
fname->usr_fname = NULL;
fname->disk_name.name = NULL;
#ifdef CONFIG_UNICODE
kfree(fname->cf_name.name);
fname->cf_name.name = NULL;
#endif
}
#else /* !CONFIG_FS_ENCRYPTION */
static inline int ext4_fname_setup_filename(struct inode *dir,
@@ -2363,6 +2416,11 @@ static inline int ext4_fname_setup_filename(struct inode *dir,
fname->usr_fname = iname;
fname->disk_name.name = (unsigned char *) iname->name;
fname->disk_name.len = iname->len;
#ifdef CONFIG_UNICODE
ext4_fname_setup_ci_filename(dir, iname, &fname->cf_name);
#endif
return 0;
}
@@ -2373,7 +2431,13 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir,
return ext4_fname_setup_filename(dir, &dentry->d_name, 1, fname);
}
static inline void ext4_fname_free_filename(struct ext4_filename *fname) { }
static inline void ext4_fname_free_filename(struct ext4_filename *fname)
{
#ifdef CONFIG_UNICODE
kfree(fname->cf_name.name);
fname->cf_name.name = NULL;
#endif
}
#endif /* !CONFIG_FS_ENCRYPTION */
/* dir.c */
@@ -2422,8 +2486,8 @@ extern int ext4_check_all_de(struct inode *dir, struct buffer_head *bh,
extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
/* hash.c */
extern int ext4fs_dirhash(const char *name, int len, struct
dx_hash_info *hinfo);
extern int ext4fs_dirhash(const struct inode *dir, const char *name, int len,
struct dx_hash_info *hinfo);
/* ialloc.c */
extern struct inode *__ext4_new_inode(handle_t *, struct inode *, umode_t,
@@ -3018,6 +3082,10 @@ static inline void ext4_unlock_group(struct super_block *sb,
/* dir.c */
extern const struct file_operations ext4_dir_operations;
#ifdef CONFIG_UNICODE
extern const struct dentry_operations ext4_dentry_ops;
#endif
/* file.c */
extern const struct inode_operations ext4_file_inode_operations;
extern const struct file_operations ext4_file_operations;
@@ -3110,6 +3178,10 @@ extern void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
extern int ext4_handle_dirty_dirent_node(handle_t *handle,
struct inode *inode,
struct buffer_head *bh);
extern int ext4_ci_compare(const struct inode *parent,
const struct qstr *fname,
const struct qstr *entry, bool quick);
#define S_SHIFT 12
static const unsigned char ext4_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = {
[S_IFREG >> S_SHIFT] = EXT4_FT_REG_FILE,
@@ -3132,6 +3204,8 @@ static inline void ext4_set_de_type(struct super_block *sb,
extern int ext4_mpage_readpages(struct address_space *mapping,
struct list_head *pages, struct page *page,
unsigned nr_pages, bool is_readahead);
extern int __init ext4_init_post_read_processing(void);
extern void ext4_exit_post_read_processing(void);
/* symlink.c */
extern const struct inode_operations ext4_encrypted_symlink_inode_operations;
@@ -3241,6 +3315,9 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io,
/* mmp.c */
extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
/* verity.c */
extern const struct fsverity_operations ext4_verityops;
/*
* Add new method to test whether block and inode bitmaps are properly
* initialized. With uninit_bg reading the block from disk is not enough

View File

@@ -455,6 +455,10 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
if (ret)
return ret;
ret = fsverity_file_open(inode, filp);
if (ret)
return ret;
/*
* Set up the jbd2_inode if we are opening the inode for
* writing and the journal is present

View File

@@ -6,6 +6,7 @@
*/
#include <linux/fs.h>
#include <linux/unicode.h>
#include <linux/compiler.h>
#include <linux/bitops.h>
#include "ext4.h"
@@ -196,7 +197,8 @@ static void str2hashbuf_unsigned(const char *msg, int len, __u32 *buf, int num)
* represented, and whether or not the returned hash is 32 bits or 64
* bits. 32 bit hashes will return 0 for the minor hash.
*/
int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
static int __ext4fs_dirhash(const char *name, int len,
struct dx_hash_info *hinfo)
{
__u32 hash;
__u32 minor_hash = 0;
@@ -266,3 +268,33 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
hinfo->minor_hash = minor_hash;
return 0;
}
int ext4fs_dirhash(const struct inode *dir, const char *name, int len,
struct dx_hash_info *hinfo)
{
#ifdef CONFIG_UNICODE
const struct unicode_map *um = EXT4_SB(dir->i_sb)->s_encoding;
int r, dlen;
unsigned char *buff;
struct qstr qstr = {.name = name, .len = len };
if (len && IS_CASEFOLDED(dir) && um) {
buff = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL);
if (!buff)
return -ENOMEM;
dlen = utf8_casefold(um, &qstr, buff, PATH_MAX);
if (dlen < 0) {
kfree(buff);
goto opaque_seq;
}
r = __ext4fs_dirhash(buff, dlen, hinfo);
kfree(buff);
return r;
}
opaque_seq:
#endif
return __ext4fs_dirhash(name, len, hinfo);
}

View File

@@ -455,7 +455,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
if (qstr) {
hinfo.hash_version = DX_HASH_HALF_MD4;
hinfo.seed = sbi->s_hash_seed;
ext4fs_dirhash(qstr->name, qstr->len, &hinfo);
ext4fs_dirhash(parent, qstr->name, qstr->len, &hinfo);
grp = hinfo.hash;
} else
grp = prandom_u32();

View File

@@ -1421,7 +1421,7 @@ int htree_inlinedir_to_tree(struct file *dir_file,
}
}
ext4fs_dirhash(de->name, de->name_len, hinfo);
ext4fs_dirhash(dir, de->name, de->name_len, hinfo);
if ((hinfo->hash < start_hash) ||
((hinfo->hash == start_hash) &&
(hinfo->minor_hash < start_minor_hash)))

View File

@@ -1343,6 +1343,9 @@ retry_journal:
}
if (ret) {
bool extended = (pos + len > inode->i_size) &&
!ext4_verity_in_progress(inode);
unlock_page(page);
/*
* __block_write_begin may have instantiated a few blocks
@@ -1352,11 +1355,11 @@ retry_journal:
* Add inode to orphan list in case we crash before
* truncate finishes
*/
if (pos + len > inode->i_size && ext4_can_truncate(inode))
if (extended && ext4_can_truncate(inode))
ext4_orphan_add(handle, inode);
ext4_journal_stop(handle);
if (pos + len > inode->i_size) {
if (extended) {
ext4_truncate_failed_write(inode);
/*
* If truncate failed early the inode might
@@ -1409,6 +1412,7 @@ static int ext4_write_end(struct file *file,
int ret = 0, ret2;
int i_size_changed = 0;
int inline_data = ext4_has_inline_data(inode);
bool verity = ext4_verity_in_progress(inode);
trace_android_fs_datawrite_end(inode, pos, len);
trace_ext4_write_end(inode, pos, len, copied);
@@ -1427,12 +1431,16 @@ static int ext4_write_end(struct file *file,
/*
* it's important to update i_size while still holding page lock:
* page writeout could otherwise come in and zero beyond i_size.
*
* If FS_IOC_ENABLE_VERITY is running on this inode, then Merkle tree
* blocks are being written past EOF, so skip the i_size update.
*/
i_size_changed = ext4_update_inode_size(inode, pos + copied);
if (!verity)
i_size_changed = ext4_update_inode_size(inode, pos + copied);
unlock_page(page);
put_page(page);
if (old_size < pos)
if (old_size < pos && !verity)
pagecache_isize_extended(inode, old_size, pos);
/*
* Don't mark the inode dirty under page lock. First, it unnecessarily
@@ -1443,7 +1451,7 @@ static int ext4_write_end(struct file *file,
if (i_size_changed || inline_data)
ext4_mark_inode_dirty(handle, inode);
if (pos + len > inode->i_size && ext4_can_truncate(inode))
if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode))
/* if we have allocated more blocks and copied
* less. We will have blocks allocated outside
* inode->i_size. So truncate them
@@ -1454,7 +1462,7 @@ errout:
if (!ret)
ret = ret2;
if (pos + len > inode->i_size) {
if (pos + len > inode->i_size && !verity) {
ext4_truncate_failed_write(inode);
/*
* If truncate failed early the inode might still be
@@ -1515,6 +1523,7 @@ static int ext4_journalled_write_end(struct file *file,
unsigned from, to;
int size_changed = 0;
int inline_data = ext4_has_inline_data(inode);
bool verity = ext4_verity_in_progress(inode);
trace_android_fs_datawrite_end(inode, pos, len);
trace_ext4_journalled_write_end(inode, pos, len, copied);
@@ -1545,13 +1554,14 @@ static int ext4_journalled_write_end(struct file *file,
if (!partial)
SetPageUptodate(page);
}
size_changed = ext4_update_inode_size(inode, pos + copied);
if (!verity)
size_changed = ext4_update_inode_size(inode, pos + copied);
ext4_set_inode_state(inode, EXT4_STATE_JDATA);
EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
unlock_page(page);
put_page(page);
if (old_size < pos)
if (old_size < pos && !verity)
pagecache_isize_extended(inode, old_size, pos);
if (size_changed || inline_data) {
@@ -1560,7 +1570,7 @@ static int ext4_journalled_write_end(struct file *file,
ret = ret2;
}
if (pos + len > inode->i_size && ext4_can_truncate(inode))
if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode))
/* if we have allocated more blocks and copied
* less. We will have blocks allocated outside
* inode->i_size. So truncate them
@@ -1571,7 +1581,7 @@ errout:
ret2 = ext4_journal_stop(handle);
if (!ret)
ret = ret2;
if (pos + len > inode->i_size) {
if (pos + len > inode->i_size && !verity) {
ext4_truncate_failed_write(inode);
/*
* If truncate failed early the inode might still be
@@ -2137,7 +2147,8 @@ static int ext4_writepage(struct page *page,
trace_ext4_writepage(page);
size = i_size_read(inode);
if (page->index == size >> PAGE_SHIFT)
if (page->index == size >> PAGE_SHIFT &&
!ext4_verity_in_progress(inode))
len = size & ~PAGE_MASK;
else
len = PAGE_SIZE;
@@ -2221,7 +2232,8 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
* after page tables are updated.
*/
size = i_size_read(mpd->inode);
if (page->index == size >> PAGE_SHIFT)
if (page->index == size >> PAGE_SHIFT &&
!ext4_verity_in_progress(mpd->inode))
len = size & ~PAGE_MASK;
else
len = PAGE_SIZE;
@@ -2320,6 +2332,9 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd,
ext4_lblk_t blocks = (i_size_read(inode) + i_blocksize(inode) - 1)
>> inode->i_blkbits;
if (ext4_verity_in_progress(inode))
blocks = EXT_MAX_BLOCKS;
do {
BUG_ON(buffer_locked(bh));
@@ -3039,8 +3054,8 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
index = pos >> PAGE_SHIFT;
if (ext4_nonda_switch(inode->i_sb) ||
S_ISLNK(inode->i_mode)) {
if (ext4_nonda_switch(inode->i_sb) || S_ISLNK(inode->i_mode) ||
ext4_verity_in_progress(inode)) {
*fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
return ext4_write_begin(file, mapping, pos,
len, flags, pagep, fsdata);
@@ -3886,6 +3901,8 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode))
return 0;
#endif
if (fsverity_active(inode))
return 0;
/*
* If we are doing data journalling we don't support O_DIRECT
@@ -4765,6 +4782,8 @@ static bool ext4_should_use_dax(struct inode *inode)
return false;
if (ext4_test_inode_flag(inode, EXT4_INODE_ENCRYPT))
return false;
if (ext4_test_inode_flag(inode, EXT4_INODE_VERITY))
return false;
return true;
}
@@ -4787,9 +4806,13 @@ void ext4_set_inode_flags(struct inode *inode)
new_fl |= S_DAX;
if (flags & EXT4_ENCRYPT_FL)
new_fl |= S_ENCRYPTED;
if (flags & EXT4_CASEFOLD_FL)
new_fl |= S_CASEFOLD;
if (flags & EXT4_VERITY_FL)
new_fl |= S_VERITY;
inode_set_flags(inode, new_fl,
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX|
S_ENCRYPTED);
S_ENCRYPTED|S_CASEFOLD|S_VERITY);
}
static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
@@ -5143,6 +5166,9 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
"iget: bogus i_mode (%o)", inode->i_mode);
goto bad_inode;
}
if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb))
EXT4_ERROR_INODE(inode,
"casefold flag without casefold feature");
brelse(iloc.bh);
unlock_new_inode(inode);
@@ -5579,6 +5605,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (error)
return error;
error = fsverity_prepare_setattr(dentry, attr);
if (error)
return error;
if (is_quota_modification(inode, attr)) {
error = dquot_initialize(inode);
if (error)

View File

@@ -300,6 +300,7 @@ static int ext4_ioctl_setflags(struct inode *inode,
struct ext4_iloc iloc;
unsigned int oldflags, mask, i;
unsigned int jflag;
struct super_block *sb = inode->i_sb;
/* Is it quota file? Do not allow user to mess with it */
if (ext4_is_quota_file(inode))
@@ -344,6 +345,23 @@ static int ext4_ioctl_setflags(struct inode *inode,
goto flags_out;
}
if ((flags ^ oldflags) & EXT4_CASEFOLD_FL) {
if (!ext4_has_feature_casefold(sb)) {
err = -EOPNOTSUPP;
goto flags_out;
}
if (!S_ISDIR(inode->i_mode)) {
err = -ENOTDIR;
goto flags_out;
}
if (!ext4_empty_dir(inode)) {
err = -ENOTEMPTY;
goto flags_out;
}
}
/*
* Wait for all pending directio and then flush all the dirty pages
* for this file. The flush marks all the pages readonly, so any
@@ -1112,8 +1130,35 @@ resizefs_out:
#endif
}
case EXT4_IOC_GET_ENCRYPTION_POLICY:
if (!ext4_has_feature_encrypt(sb))
return -EOPNOTSUPP;
return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
case FS_IOC_GET_ENCRYPTION_POLICY_EX:
if (!ext4_has_feature_encrypt(sb))
return -EOPNOTSUPP;
return fscrypt_ioctl_get_policy_ex(filp, (void __user *)arg);
case FS_IOC_ADD_ENCRYPTION_KEY:
if (!ext4_has_feature_encrypt(sb))
return -EOPNOTSUPP;
return fscrypt_ioctl_add_key(filp, (void __user *)arg);
case FS_IOC_REMOVE_ENCRYPTION_KEY:
if (!ext4_has_feature_encrypt(sb))
return -EOPNOTSUPP;
return fscrypt_ioctl_remove_key(filp, (void __user *)arg);
case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
if (!ext4_has_feature_encrypt(sb))
return -EOPNOTSUPP;
return fscrypt_ioctl_remove_key_all_users(filp,
(void __user *)arg);
case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
if (!ext4_has_feature_encrypt(sb))
return -EOPNOTSUPP;
return fscrypt_ioctl_get_key_status(filp, (void __user *)arg);
case EXT4_IOC_FSGETXATTR:
{
struct fsxattr fa;
@@ -1175,6 +1220,17 @@ out:
}
case EXT4_IOC_SHUTDOWN:
return ext4_shutdown(sb, arg);
case FS_IOC_ENABLE_VERITY:
if (!ext4_has_feature_verity(sb))
return -EOPNOTSUPP;
return fsverity_ioctl_enable(filp, (const void __user *)arg);
case FS_IOC_MEASURE_VERITY:
if (!ext4_has_feature_verity(sb))
return -EOPNOTSUPP;
return fsverity_ioctl_measure(filp, (void __user *)arg);
default:
return -ENOTTY;
}
@@ -1235,8 +1291,15 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case EXT4_IOC_SET_ENCRYPTION_POLICY:
case EXT4_IOC_GET_ENCRYPTION_PWSALT:
case EXT4_IOC_GET_ENCRYPTION_POLICY:
case FS_IOC_GET_ENCRYPTION_POLICY_EX:
case FS_IOC_ADD_ENCRYPTION_KEY:
case FS_IOC_REMOVE_ENCRYPTION_KEY:
case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
case EXT4_IOC_SHUTDOWN:
case FS_IOC_GETFSMAP:
case FS_IOC_ENABLE_VERITY:
case FS_IOC_MEASURE_VERITY:
break;
default:
return -ENOIOCTLCMD;

View File

@@ -35,6 +35,7 @@
#include <linux/buffer_head.h>
#include <linux/bio.h>
#include <linux/iversion.h>
#include <linux/unicode.h>
#include "ext4.h"
#include "ext4_jbd2.h"
@@ -642,7 +643,7 @@ static struct stats dx_show_leaf(struct inode *dir,
}
if (!fscrypt_has_encryption_key(dir)) {
/* Directory is not encrypted */
ext4fs_dirhash(de->name,
ext4fs_dirhash(dir, de->name,
de->name_len, &h);
printk("%*.s:(U)%x.%u ", len,
name, h.hash,
@@ -675,8 +676,8 @@ static struct stats dx_show_leaf(struct inode *dir,
name = fname_crypto_str.name;
len = fname_crypto_str.len;
}
ext4fs_dirhash(de->name, de->name_len,
&h);
ext4fs_dirhash(dir, de->name,
de->name_len, &h);
printk("%*.s:(E)%x.%u ", len, name,
h.hash, (unsigned) ((char *) de
- base));
@@ -686,7 +687,7 @@ static struct stats dx_show_leaf(struct inode *dir,
#else
int len = de->name_len;
char *name = de->name;
ext4fs_dirhash(de->name, de->name_len, &h);
ext4fs_dirhash(dir, de->name, de->name_len, &h);
printk("%*.s:%x.%u ", len, name, h.hash,
(unsigned) ((char *) de - base));
#endif
@@ -775,7 +776,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
if (fname && fname_name(fname))
ext4fs_dirhash(fname_name(fname), fname_len(fname), hinfo);
ext4fs_dirhash(dir, fname_name(fname), fname_len(fname), hinfo);
hash = hinfo->hash;
if (root->info.unused_flags & 1) {
@@ -1024,7 +1025,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
/* silently ignore the rest of the block */
break;
}
ext4fs_dirhash(de->name, de->name_len, hinfo);
ext4fs_dirhash(dir, de->name, de->name_len, hinfo);
if ((hinfo->hash < start_hash) ||
((hinfo->hash == start_hash) &&
(hinfo->minor_hash < start_minor_hash)))
@@ -1213,7 +1214,7 @@ static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
while ((char *) de < base + blocksize) {
if (de->name_len && de->inode) {
ext4fs_dirhash(de->name, de->name_len, &h);
ext4fs_dirhash(dir, de->name, de->name_len, &h);
map_tail--;
map_tail->hash = h.hash;
map_tail->offs = ((char *) de - base)>>2;
@@ -1268,15 +1269,83 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
dx_set_count(entries, count + 1);
}
#ifdef CONFIG_UNICODE
/*
* Test whether a case-insensitive directory entry matches the filename
* being searched for. If quick is set, assume the name being looked up
* is already in the casefolded form.
*
* Returns: 0 if the directory entry matches, more than 0 if it
* doesn't match or less than zero on error.
*/
int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
const struct qstr *entry, bool quick)
{
const struct ext4_sb_info *sbi = EXT4_SB(parent->i_sb);
const struct unicode_map *um = sbi->s_encoding;
int ret;
if (quick)
ret = utf8_strncasecmp_folded(um, name, entry);
else
ret = utf8_strncasecmp(um, name, entry);
if (ret < 0) {
/* Handle invalid character sequence as either an error
* or as an opaque byte sequence.
*/
if (ext4_has_strict_mode(sbi))
return -EINVAL;
if (name->len != entry->len)
return 1;
return !!memcmp(name->name, entry->name, name->len);
}
return ret;
}
void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
struct fscrypt_str *cf_name)
{
int len;
if (!IS_CASEFOLDED(dir) || !EXT4_SB(dir->i_sb)->s_encoding) {
cf_name->name = NULL;
return;
}
cf_name->name = kmalloc(EXT4_NAME_LEN, GFP_NOFS);
if (!cf_name->name)
return;
len = utf8_casefold(EXT4_SB(dir->i_sb)->s_encoding,
iname, cf_name->name,
EXT4_NAME_LEN);
if (len <= 0) {
kfree(cf_name->name);
cf_name->name = NULL;
return;
}
cf_name->len = (unsigned) len;
}
#endif
/*
* Test whether a directory entry matches the filename being searched for.
*
* Return: %true if the directory entry matches, otherwise %false.
*/
static inline bool ext4_match(const struct ext4_filename *fname,
static inline bool ext4_match(const struct inode *parent,
const struct ext4_filename *fname,
const struct ext4_dir_entry_2 *de)
{
struct fscrypt_name f;
#ifdef CONFIG_UNICODE
const struct qstr entry = {.name = de->name, .len = de->name_len};
#endif
if (!de->inode)
return false;
@@ -1286,6 +1355,19 @@ static inline bool ext4_match(const struct ext4_filename *fname,
#ifdef CONFIG_FS_ENCRYPTION
f.crypto_buf = fname->crypto_buf;
#endif
#ifdef CONFIG_UNICODE
if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent)) {
if (fname->cf_name.name) {
struct qstr cf = {.name = fname->cf_name.name,
.len = fname->cf_name.len};
return !ext4_ci_compare(parent, &cf, &entry, true);
}
return !ext4_ci_compare(parent, fname->usr_fname, &entry,
false);
}
#endif
return fscrypt_match_name(&f, de->name, de->name_len);
}
@@ -1306,7 +1388,7 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
/* this code is executed quadratically often */
/* do minimal checking `by hand' */
if ((char *) de + de->name_len <= dlimit &&
ext4_match(fname, de)) {
ext4_match(dir, fname, de)) {
/* found a match - just to be sure, do
* a full check */
if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data,
@@ -1632,6 +1714,17 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
return ERR_PTR(-EPERM);
}
}
#ifdef CONFIG_UNICODE
if (!inode && IS_CASEFOLDED(dir)) {
/* Eventually we want to call d_add_ci(dentry, NULL)
* for negative dentries in the encoding case as
* well. For now, prevent the negative dentry
* from being cached.
*/
return NULL;
}
#endif
return d_splice_alias(inode, dentry);
}
@@ -1842,7 +1935,7 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
if (ext4_check_dir_entry(dir, NULL, de, bh,
buf, buf_size, offset))
return -EFSCORRUPTED;
if (ext4_match(fname, de))
if (ext4_match(dir, fname, de))
return -EEXIST;
nlen = EXT4_DIR_REC_LEN(de->name_len);
rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
@@ -2027,7 +2120,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
if (fname->hinfo.hash_version <= DX_HASH_TEA)
fname->hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
fname->hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
ext4fs_dirhash(fname_name(fname), fname_len(fname), &fname->hinfo);
ext4fs_dirhash(dir, fname_name(fname), fname_len(fname), &fname->hinfo);
memset(frames, 0, sizeof(frames));
frame = frames;
@@ -2080,6 +2173,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
struct ext4_dir_entry_2 *de;
struct ext4_dir_entry_tail *t;
struct super_block *sb;
struct ext4_sb_info *sbi;
struct ext4_filename fname;
int retval;
int dx_fallback=0;
@@ -2091,10 +2185,17 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
csum_size = sizeof(struct ext4_dir_entry_tail);
sb = dir->i_sb;
sbi = EXT4_SB(sb);
blocksize = sb->s_blocksize;
if (!dentry->d_name.len)
return -EINVAL;
#ifdef CONFIG_UNICODE
if (ext4_has_strict_mode(sbi) && IS_CASEFOLDED(dir) &&
sbi->s_encoding && utf8_validate(sbi->s_encoding, &dentry->d_name))
return -EINVAL;
#endif
retval = ext4_fname_setup_filename(dir, &dentry->d_name, 0, &fname);
if (retval)
return retval;
@@ -3032,6 +3133,17 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
ext4_update_dx_flag(dir);
ext4_mark_inode_dirty(handle, dir);
#ifdef CONFIG_UNICODE
/* VFS negative dentries are incompatible with Encoding and
* Case-insensitiveness. Eventually we'll want avoid
* invalidating the dentries here, alongside with returning the
* negative dentries at ext4_lookup(), when it is better
* supported by the VFS for the CI case.
*/
if (IS_CASEFOLDED(dir))
d_invalidate(dentry);
#endif
end_rmdir:
brelse(bh);
if (handle)
@@ -3101,6 +3213,17 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
inode->i_ctime = current_time(inode);
ext4_mark_inode_dirty(handle, inode);
#ifdef CONFIG_UNICODE
/* VFS negative dentries are incompatible with Encoding and
* Case-insensitiveness. Eventually we'll want avoid
* invalidating the dentries here, alongside with returning the
* negative dentries at ext4_lookup(), when it is better
* supported by the VFS for the CI case.
*/
if (IS_CASEFOLDED(dir))
d_invalidate(dentry);
#endif
end_unlink:
brelse(bh);
if (handle)

View File

@@ -48,13 +48,103 @@
#include "ext4.h"
#include <trace/events/android_fs.h>
static inline bool ext4_bio_encrypted(struct bio *bio)
#define NUM_PREALLOC_POST_READ_CTXS 128
static struct kmem_cache *bio_post_read_ctx_cache;
static mempool_t *bio_post_read_ctx_pool;
/* postprocessing steps for read bios */
enum bio_post_read_step {
STEP_INITIAL = 0,
STEP_DECRYPT,
STEP_VERITY,
};
struct bio_post_read_ctx {
struct bio *bio;
struct work_struct work;
unsigned int cur_step;
unsigned int enabled_steps;
};
static void __read_end_io(struct bio *bio)
{
#ifdef CONFIG_FS_ENCRYPTION
return unlikely(bio->bi_private != NULL);
#else
return false;
#endif
struct page *page;
struct bio_vec *bv;
int i;
bio_for_each_segment_all(bv, bio, i) {
page = bv->bv_page;
/* PG_error was set if any post_read step failed */
if (bio->bi_status || PageError(page)) {
ClearPageUptodate(page);
/* will re-read again later */
ClearPageError(page);
} else {
SetPageUptodate(page);
}
unlock_page(page);
}
if (bio->bi_private)
mempool_free(bio->bi_private, bio_post_read_ctx_pool);
bio_put(bio);
}
static void bio_post_read_processing(struct bio_post_read_ctx *ctx);
static void decrypt_work(struct work_struct *work)
{
struct bio_post_read_ctx *ctx =
container_of(work, struct bio_post_read_ctx, work);
fscrypt_decrypt_bio(ctx->bio);
bio_post_read_processing(ctx);
}
static void verity_work(struct work_struct *work)
{
struct bio_post_read_ctx *ctx =
container_of(work, struct bio_post_read_ctx, work);
fsverity_verify_bio(ctx->bio);
bio_post_read_processing(ctx);
}
static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
{
/*
* We use different work queues for decryption and for verity because
* verity may require reading metadata pages that need decryption, and
* we shouldn't recurse to the same workqueue.
*/
switch (++ctx->cur_step) {
case STEP_DECRYPT:
if (ctx->enabled_steps & (1 << STEP_DECRYPT)) {
INIT_WORK(&ctx->work, decrypt_work);
fscrypt_enqueue_decrypt_work(&ctx->work);
return;
}
ctx->cur_step++;
/* fall-through */
case STEP_VERITY:
if (ctx->enabled_steps & (1 << STEP_VERITY)) {
INIT_WORK(&ctx->work, verity_work);
fsverity_enqueue_verify_work(&ctx->work);
return;
}
ctx->cur_step++;
/* fall-through */
default:
__read_end_io(ctx->bio);
}
}
static bool bio_post_read_required(struct bio *bio)
{
return bio->bi_private && !bio->bi_status;
}
static void
@@ -82,33 +172,56 @@ ext4_trace_read_completion(struct bio *bio)
*/
static void mpage_end_io(struct bio *bio)
{
struct bio_vec *bv;
int i;
if (trace_android_fs_dataread_start_enabled())
ext4_trace_read_completion(bio);
if (ext4_bio_encrypted(bio)) {
if (bio->bi_status) {
fscrypt_release_ctx(bio->bi_private);
} else {
fscrypt_enqueue_decrypt_bio(bio->bi_private, bio);
return;
}
}
bio_for_each_segment_all(bv, bio, i) {
struct page *page = bv->bv_page;
if (bio_post_read_required(bio)) {
struct bio_post_read_ctx *ctx = bio->bi_private;
if (!bio->bi_status) {
SetPageUptodate(page);
} else {
ClearPageUptodate(page);
SetPageError(page);
}
unlock_page(page);
ctx->cur_step = STEP_INITIAL;
bio_post_read_processing(ctx);
return;
}
__read_end_io(bio);
}
bio_put(bio);
static inline bool ext4_need_verity(const struct inode *inode, pgoff_t idx)
{
return fsverity_active(inode) &&
idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
}
static struct bio_post_read_ctx *get_bio_post_read_ctx(struct inode *inode,
struct bio *bio,
pgoff_t first_idx)
{
unsigned int post_read_steps = 0;
struct bio_post_read_ctx *ctx = NULL;
if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode))
post_read_steps |= 1 << STEP_DECRYPT;
if (ext4_need_verity(inode, first_idx))
post_read_steps |= 1 << STEP_VERITY;
if (post_read_steps) {
ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
if (!ctx)
return ERR_PTR(-ENOMEM);
ctx->bio = bio;
ctx->enabled_steps = post_read_steps;
bio->bi_private = ctx;
}
return ctx;
}
static inline loff_t ext4_readpage_limit(struct inode *inode)
{
if (IS_ENABLED(CONFIG_FS_VERITY) &&
(IS_VERITY(inode) || ext4_verity_in_progress(inode)))
return inode->i_sb->s_maxbytes;
return i_size_read(inode);
}
static void
@@ -179,7 +292,8 @@ int ext4_mpage_readpages(struct address_space *mapping,
block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
last_block = block_in_file + nr_pages * blocks_per_page;
last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
last_block_in_file = (ext4_readpage_limit(inode) +
blocksize - 1) >> blkbits;
if (last_block > last_block_in_file)
last_block = last_block_in_file;
page_block = 0;
@@ -256,6 +370,9 @@ int ext4_mpage_readpages(struct address_space *mapping,
zero_user_segment(page, first_hole << blkbits,
PAGE_SIZE);
if (first_hole == 0) {
if (ext4_need_verity(inode, page->index) &&
!fsverity_verify_page(page))
goto set_error_page;
SetPageUptodate(page);
unlock_page(page);
goto next_page;
@@ -279,18 +396,16 @@ int ext4_mpage_readpages(struct address_space *mapping,
bio = NULL;
}
if (bio == NULL) {
struct fscrypt_ctx *ctx = NULL;
struct bio_post_read_ctx *ctx;
if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) {
ctx = fscrypt_get_ctx(GFP_NOFS);
if (IS_ERR(ctx))
goto set_error_page;
}
bio = bio_alloc(GFP_KERNEL,
min_t(int, nr_pages, BIO_MAX_PAGES));
if (!bio) {
if (ctx)
fscrypt_release_ctx(ctx);
if (!bio)
goto set_error_page;
ctx = get_bio_post_read_ctx(inode, bio, page->index);
if (IS_ERR(ctx)) {
bio_put(bio);
bio = NULL;
goto set_error_page;
}
bio_set_dev(bio, bdev);
@@ -331,3 +446,29 @@ int ext4_mpage_readpages(struct address_space *mapping,
ext4_submit_bio_read(bio);
return 0;
}
int __init ext4_init_post_read_processing(void)
{
bio_post_read_ctx_cache =
kmem_cache_create("ext4_bio_post_read_ctx",
sizeof(struct bio_post_read_ctx), 0, 0, NULL);
if (!bio_post_read_ctx_cache)
goto fail;
bio_post_read_ctx_pool =
mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
bio_post_read_ctx_cache);
if (!bio_post_read_ctx_pool)
goto fail_free_cache;
return 0;
fail_free_cache:
kmem_cache_destroy(bio_post_read_ctx_cache);
fail:
return -ENOMEM;
}
void ext4_exit_post_read_processing(void)
{
mempool_destroy(bio_post_read_ctx_pool);
kmem_cache_destroy(bio_post_read_ctx_cache);
}

View File

@@ -42,6 +42,7 @@
#include <linux/cleancache.h>
#include <linux/uaccess.h>
#include <linux/iversion.h>
#include <linux/unicode.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
@@ -1055,6 +1056,9 @@ static void ext4_put_super(struct super_block *sb)
crypto_free_shash(sbi->s_chksum_driver);
kfree(sbi->s_blockgroup_lock);
fs_put_dax(sbi->s_daxdev);
#ifdef CONFIG_UNICODE
utf8_unload(sbi->s_encoding);
#endif
kfree(sbi);
}
@@ -1103,6 +1107,9 @@ static int ext4_drop_inode(struct inode *inode)
{
int drop = generic_drop_inode(inode);
if (!drop)
drop = fscrypt_drop_inode(inode);
trace_ext4_drop_inode(inode, drop);
return drop;
}
@@ -1179,6 +1186,7 @@ void ext4_clear_inode(struct inode *inode)
EXT4_I(inode)->jinode = NULL;
}
fscrypt_put_encryption_info(inode);
fsverity_cleanup_inode(inode);
}
static struct inode *ext4_nfs_get_inode(struct super_block *sb,
@@ -1753,6 +1761,36 @@ static const struct mount_opts {
{Opt_err, 0, 0}
};
#ifdef CONFIG_UNICODE
static const struct ext4_sb_encodings {
__u16 magic;
char *name;
char *version;
} ext4_sb_encoding_map[] = {
{EXT4_ENC_UTF8_12_1, "utf8", "12.1.0"},
};
static int ext4_sb_read_encoding(const struct ext4_super_block *es,
const struct ext4_sb_encodings **encoding,
__u16 *flags)
{
__u16 magic = le16_to_cpu(es->s_encoding);
int i;
for (i = 0; i < ARRAY_SIZE(ext4_sb_encoding_map); i++)
if (magic == ext4_sb_encoding_map[i].magic)
break;
if (i >= ARRAY_SIZE(ext4_sb_encoding_map))
return -EINVAL;
*encoding = &ext4_sb_encoding_map[i];
*flags = le16_to_cpu(es->s_encoding_flags);
return 0;
}
#endif
static int handle_mount_opt(struct super_block *sb, char *opt, int token,
substring_t *args, unsigned long *journal_devnum,
unsigned int *journal_ioprio, int is_remount)
@@ -2883,6 +2921,15 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
return 0;
}
#ifndef CONFIG_UNICODE
if (ext4_has_feature_casefold(sb)) {
ext4_msg(sb, KERN_ERR,
"Filesystem with casefold feature cannot be "
"mounted without CONFIG_UNICODE");
return 0;
}
#endif
if (readonly)
return 1;
@@ -3773,6 +3820,43 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
&journal_ioprio, 0))
goto failed_mount;
#ifdef CONFIG_UNICODE
if (ext4_has_feature_casefold(sb) && !sbi->s_encoding) {
const struct ext4_sb_encodings *encoding_info;
struct unicode_map *encoding;
__u16 encoding_flags;
if (ext4_has_feature_encrypt(sb)) {
ext4_msg(sb, KERN_ERR,
"Can't mount with encoding and encryption");
goto failed_mount;
}
if (ext4_sb_read_encoding(es, &encoding_info,
&encoding_flags)) {
ext4_msg(sb, KERN_ERR,
"Encoding requested by superblock is unknown");
goto failed_mount;
}
encoding = utf8_load(encoding_info->version);
if (IS_ERR(encoding)) {
ext4_msg(sb, KERN_ERR,
"can't mount with superblock charset: %s-%s "
"not supported by the kernel. flags: 0x%x.",
encoding_info->name, encoding_info->version,
encoding_flags);
goto failed_mount;
}
ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: "
"%s-%s with flags 0x%hx", encoding_info->name,
encoding_info->version?:"\b", encoding_flags);
sbi->s_encoding = encoding;
sbi->s_encoding_flags = encoding_flags;
}
#endif
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
printk_once(KERN_WARNING "EXT4-fs: Warning: mounting "
"with data=journal disables delayed "
@@ -4213,6 +4297,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
#ifdef CONFIG_FS_ENCRYPTION
sb->s_cop = &ext4_cryptops;
#endif
#ifdef CONFIG_FS_VERITY
sb->s_vop = &ext4_verityops;
#endif
#ifdef CONFIG_QUOTA
sb->dq_op = &ext4_quota_operations;
if (ext4_has_feature_quota(sb))
@@ -4360,6 +4447,11 @@ no_journal:
goto failed_mount_wq;
}
if (ext4_has_feature_verity(sb) && blocksize != PAGE_SIZE) {
ext4_msg(sb, KERN_ERR, "Unsupported blocksize for fs-verity");
goto failed_mount_wq;
}
if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) &&
!ext4_has_feature_encrypt(sb)) {
ext4_set_feature_encrypt(sb);
@@ -4407,6 +4499,12 @@ no_journal:
iput(root);
goto failed_mount4;
}
#ifdef CONFIG_UNICODE
if (sbi->s_encoding)
sb->s_d_op = &ext4_dentry_ops;
#endif
sb->s_root = d_make_root(root);
if (!sb->s_root) {
ext4_msg(sb, KERN_ERR, "get root dentry failed");
@@ -4591,6 +4689,11 @@ failed_mount2:
failed_mount:
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
#ifdef CONFIG_UNICODE
utf8_unload(sbi->s_encoding);
#endif
#ifdef CONFIG_QUOTA
for (i = 0; i < EXT4_MAXQUOTAS; i++)
kfree(sbi->s_qf_names[i]);
@@ -6025,6 +6128,10 @@ static int __init ext4_init_fs(void)
if (err)
return err;
err = ext4_init_post_read_processing();
if (err)
goto out6;
err = ext4_init_pageio();
if (err)
goto out5;
@@ -6063,6 +6170,8 @@ out3:
out4:
ext4_exit_pageio();
out5:
ext4_exit_post_read_processing();
out6:
ext4_exit_es();
return err;
@@ -6079,6 +6188,7 @@ static void __exit ext4_exit_fs(void)
ext4_exit_sysfs();
ext4_exit_system_zone();
ext4_exit_pageio();
ext4_exit_post_read_processing();
ext4_exit_es();
}

View File

@@ -227,6 +227,12 @@ EXT4_ATTR_FEATURE(meta_bg_resize);
#ifdef CONFIG_FS_ENCRYPTION
EXT4_ATTR_FEATURE(encryption);
#endif
#ifdef CONFIG_UNICODE
EXT4_ATTR_FEATURE(casefold);
#endif
#ifdef CONFIG_FS_VERITY
EXT4_ATTR_FEATURE(verity);
#endif
EXT4_ATTR_FEATURE(metadata_csum_seed);
static struct attribute *ext4_feat_attrs[] = {
@@ -235,6 +241,12 @@ static struct attribute *ext4_feat_attrs[] = {
ATTR_LIST(meta_bg_resize),
#ifdef CONFIG_FS_ENCRYPTION
ATTR_LIST(encryption),
#endif
#ifdef CONFIG_UNICODE
ATTR_LIST(casefold),
#endif
#ifdef CONFIG_FS_VERITY
ATTR_LIST(verity),
#endif
ATTR_LIST(metadata_csum_seed),
NULL,

367
fs/ext4/verity.c Normal file
View File

@@ -0,0 +1,367 @@
// SPDX-License-Identifier: GPL-2.0
/*
* fs/ext4/verity.c: fs-verity support for ext4
*
* Copyright 2019 Google LLC
*/
/*
* Implementation of fsverity_operations for ext4.
*
* ext4 stores the verity metadata (Merkle tree and fsverity_descriptor) past
* the end of the file, starting at the first 64K boundary beyond i_size. This
* approach works because (a) verity files are readonly, and (b) pages fully
* beyond i_size aren't visible to userspace but can be read/written internally
* by ext4 with only some relatively small changes to ext4. This approach
* avoids having to depend on the EA_INODE feature and on rearchitecturing
* ext4's xattr support to support paging multi-gigabyte xattrs into memory, and
* to support encrypting xattrs. Note that the verity metadata *must* be
* encrypted when the file is, since it contains hashes of the plaintext data.
*
* Using a 64K boundary rather than a 4K one keeps things ready for
* architectures with 64K pages, and it doesn't necessarily waste space on-disk
* since there can be a hole between i_size and the start of the Merkle tree.
*/
#include <linux/quotaops.h>
#include "ext4.h"
#include "ext4_extents.h"
#include "ext4_jbd2.h"
static inline loff_t ext4_verity_metadata_pos(const struct inode *inode)
{
return round_up(inode->i_size, 65536);
}
/*
* Read some verity metadata from the inode. __vfs_read() can't be used because
* we need to read beyond i_size.
*/
static int pagecache_read(struct inode *inode, void *buf, size_t count,
loff_t pos)
{
while (count) {
size_t n = min_t(size_t, count,
PAGE_SIZE - offset_in_page(pos));
struct page *page;
void *addr;
page = read_mapping_page(inode->i_mapping, pos >> PAGE_SHIFT,
NULL);
if (IS_ERR(page))
return PTR_ERR(page);
addr = kmap_atomic(page);
memcpy(buf, addr + offset_in_page(pos), n);
kunmap_atomic(addr);
put_page(page);
buf += n;
pos += n;
count -= n;
}
return 0;
}
/*
* Write some verity metadata to the inode for FS_IOC_ENABLE_VERITY.
* kernel_write() can't be used because the file descriptor is readonly.
*/
static int pagecache_write(struct inode *inode, const void *buf, size_t count,
loff_t pos)
{
if (pos + count > inode->i_sb->s_maxbytes)
return -EFBIG;
while (count) {
size_t n = min_t(size_t, count,
PAGE_SIZE - offset_in_page(pos));
struct page *page;
void *fsdata;
void *addr;
int res;
res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0,
&page, &fsdata);
if (res)
return res;
addr = kmap_atomic(page);
memcpy(addr + offset_in_page(pos), buf, n);
kunmap_atomic(addr);
res = pagecache_write_end(NULL, inode->i_mapping, pos, n, n,
page, fsdata);
if (res < 0)
return res;
if (res != n)
return -EIO;
buf += n;
pos += n;
count -= n;
}
return 0;
}
static int ext4_begin_enable_verity(struct file *filp)
{
struct inode *inode = file_inode(filp);
const int credits = 2; /* superblock and inode for ext4_orphan_add() */
handle_t *handle;
int err;
if (ext4_verity_in_progress(inode))
return -EBUSY;
/*
* Since the file was opened readonly, we have to initialize the jbd
* inode and quotas here and not rely on ->open() doing it. This must
* be done before evicting the inline data.
*/
err = ext4_inode_attach_jinode(inode);
if (err)
return err;
err = dquot_initialize(inode);
if (err)
return err;
err = ext4_convert_inline_data(inode);
if (err)
return err;
if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
ext4_warning_inode(inode,
"verity is only allowed on extent-based files");
return -EOPNOTSUPP;
}
/*
* ext4 uses the last allocated block to find the verity descriptor, so
* we must remove any other blocks past EOF which might confuse things.
*/
err = ext4_truncate(inode);
if (err)
return err;
handle = ext4_journal_start(inode, EXT4_HT_INODE, credits);
if (IS_ERR(handle))
return PTR_ERR(handle);
err = ext4_orphan_add(handle, inode);
if (err == 0)
ext4_set_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
ext4_journal_stop(handle);
return err;
}
/*
* ext4 stores the verity descriptor beginning on the next filesystem block
* boundary after the Merkle tree. Then, the descriptor size is stored in the
* last 4 bytes of the last allocated filesystem block --- which is either the
* block in which the descriptor ends, or the next block after that if there
* weren't at least 4 bytes remaining.
*
* We can't simply store the descriptor in an xattr because it *must* be
* encrypted when ext4 encryption is used, but ext4 encryption doesn't encrypt
* xattrs. Also, if the descriptor includes a large signature blob it may be
* too large to store in an xattr without the EA_INODE feature.
*/
static int ext4_write_verity_descriptor(struct inode *inode, const void *desc,
size_t desc_size, u64 merkle_tree_size)
{
const u64 desc_pos = round_up(ext4_verity_metadata_pos(inode) +
merkle_tree_size, i_blocksize(inode));
const u64 desc_end = desc_pos + desc_size;
const __le32 desc_size_disk = cpu_to_le32(desc_size);
const u64 desc_size_pos = round_up(desc_end + sizeof(desc_size_disk),
i_blocksize(inode)) -
sizeof(desc_size_disk);
int err;
err = pagecache_write(inode, desc, desc_size, desc_pos);
if (err)
return err;
return pagecache_write(inode, &desc_size_disk, sizeof(desc_size_disk),
desc_size_pos);
}
static int ext4_end_enable_verity(struct file *filp, const void *desc,
size_t desc_size, u64 merkle_tree_size)
{
struct inode *inode = file_inode(filp);
const int credits = 2; /* superblock and inode for ext4_orphan_del() */
handle_t *handle;
int err = 0;
int err2;
if (desc != NULL) {
/* Succeeded; write the verity descriptor. */
err = ext4_write_verity_descriptor(inode, desc, desc_size,
merkle_tree_size);
/* Write all pages before clearing VERITY_IN_PROGRESS. */
if (!err)
err = filemap_write_and_wait(inode->i_mapping);
}
/* If we failed, truncate anything we wrote past i_size. */
if (desc == NULL || err)
ext4_truncate(inode);
/*
* We must always clean up by clearing EXT4_STATE_VERITY_IN_PROGRESS and
* deleting the inode from the orphan list, even if something failed.
* If everything succeeded, we'll also set the verity bit in the same
* transaction.
*/
ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
handle = ext4_journal_start(inode, EXT4_HT_INODE, credits);
if (IS_ERR(handle)) {
ext4_orphan_del(NULL, inode);
return PTR_ERR(handle);
}
err2 = ext4_orphan_del(handle, inode);
if (err2)
goto out_stop;
if (desc != NULL && !err) {
struct ext4_iloc iloc;
err = ext4_reserve_inode_write(handle, inode, &iloc);
if (err)
goto out_stop;
ext4_set_inode_flag(inode, EXT4_INODE_VERITY);
ext4_set_inode_flags(inode);
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
}
out_stop:
ext4_journal_stop(handle);
return err ?: err2;
}
static int ext4_get_verity_descriptor_location(struct inode *inode,
size_t *desc_size_ret,
u64 *desc_pos_ret)
{
struct ext4_ext_path *path;
struct ext4_extent *last_extent;
u32 end_lblk;
u64 desc_size_pos;
__le32 desc_size_disk;
u32 desc_size;
u64 desc_pos;
int err;
/*
* Descriptor size is in last 4 bytes of last allocated block.
* See ext4_write_verity_descriptor().
*/
if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
EXT4_ERROR_INODE(inode, "verity file doesn't use extents");
return -EFSCORRUPTED;
}
path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
if (IS_ERR(path))
return PTR_ERR(path);
last_extent = path[path->p_depth].p_ext;
if (!last_extent) {
EXT4_ERROR_INODE(inode, "verity file has no extents");
ext4_ext_drop_refs(path);
kfree(path);
return -EFSCORRUPTED;
}
end_lblk = le32_to_cpu(last_extent->ee_block) +
ext4_ext_get_actual_len(last_extent);
desc_size_pos = (u64)end_lblk << inode->i_blkbits;
ext4_ext_drop_refs(path);
kfree(path);
if (desc_size_pos < sizeof(desc_size_disk))
goto bad;
desc_size_pos -= sizeof(desc_size_disk);
err = pagecache_read(inode, &desc_size_disk, sizeof(desc_size_disk),
desc_size_pos);
if (err)
return err;
desc_size = le32_to_cpu(desc_size_disk);
/*
* The descriptor is stored just before the desc_size_disk, but starting
* on a filesystem block boundary.
*/
if (desc_size > INT_MAX || desc_size > desc_size_pos)
goto bad;
desc_pos = round_down(desc_size_pos - desc_size, i_blocksize(inode));
if (desc_pos < ext4_verity_metadata_pos(inode))
goto bad;
*desc_size_ret = desc_size;
*desc_pos_ret = desc_pos;
return 0;
bad:
EXT4_ERROR_INODE(inode, "verity file corrupted; can't find descriptor");
return -EFSCORRUPTED;
}
static int ext4_get_verity_descriptor(struct inode *inode, void *buf,
size_t buf_size)
{
size_t desc_size = 0;
u64 desc_pos = 0;
int err;
err = ext4_get_verity_descriptor_location(inode, &desc_size, &desc_pos);
if (err)
return err;
if (buf_size) {
if (desc_size > buf_size)
return -ERANGE;
err = pagecache_read(inode, buf, desc_size, desc_pos);
if (err)
return err;
}
return desc_size;
}
static struct page *ext4_read_merkle_tree_page(struct inode *inode,
pgoff_t index)
{
index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT;
return read_mapping_page(inode->i_mapping, index, NULL);
}
static int ext4_write_merkle_tree_block(struct inode *inode, const void *buf,
u64 index, int log_blocksize)
{
loff_t pos = ext4_verity_metadata_pos(inode) + (index << log_blocksize);
return pagecache_write(inode, buf, 1 << log_blocksize, pos);
}
const struct fsverity_operations ext4_verityops = {
.begin_enable_verity = ext4_begin_enable_verity,
.end_enable_verity = ext4_end_enable_verity,
.get_verity_descriptor = ext4_get_verity_descriptor,
.read_merkle_tree_page = ext4_read_merkle_tree_page,
.write_merkle_tree_block = ext4_write_merkle_tree_block,
};

View File

@@ -1,6 +1,7 @@
config F2FS_FS
tristate "F2FS filesystem support"
depends on BLOCK
select NLS
select CRYPTO
select CRYPTO_CRC32
select F2FS_FS_XATTR if FS_ENCRYPTION
@@ -59,7 +60,9 @@ config F2FS_FS_SECURITY
Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO
Linux. This option enables an extended attribute handler for file
security labels in the f2fs filesystem, so that it requires enabling
the extended attribute support in advance.
the extended attribute support in advance. In particular you need this
option if you use the setcap command to assign initial process capabi-
lities to executables (the security.* extended attributes).
If you are not using a security module, say N.

View File

@@ -8,3 +8,4 @@ f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o
f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o
f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o
f2fs-$(CONFIG_FS_VERITY) += verity.o

View File

@@ -75,6 +75,7 @@ static enum count_type __read_io_type(struct page *page)
enum bio_post_read_step {
STEP_INITIAL = 0,
STEP_DECRYPT,
STEP_VERITY,
};
struct bio_post_read_ctx {
@@ -121,8 +122,23 @@ static void decrypt_work(struct work_struct *work)
bio_post_read_processing(ctx);
}
static void verity_work(struct work_struct *work)
{
struct bio_post_read_ctx *ctx =
container_of(work, struct bio_post_read_ctx, work);
fsverity_verify_bio(ctx->bio);
bio_post_read_processing(ctx);
}
static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
{
/*
* We use different work queues for decryption and for verity because
* verity may require reading metadata pages that need decryption, and
* we shouldn't recurse to the same workqueue.
*/
switch (++ctx->cur_step) {
case STEP_DECRYPT:
if (ctx->enabled_steps & (1 << STEP_DECRYPT)) {
@@ -132,6 +148,14 @@ static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
}
ctx->cur_step++;
/* fall-through */
case STEP_VERITY:
if (ctx->enabled_steps & (1 << STEP_VERITY)) {
INIT_WORK(&ctx->work, verity_work);
fsverity_enqueue_verify_work(&ctx->work);
return;
}
ctx->cur_step++;
/* fall-through */
default:
__read_end_io(ctx->bio);
}
@@ -269,26 +293,25 @@ static bool __same_bdev(struct f2fs_sb_info *sbi,
/*
* Low-level block read/write IO operations.
*/
static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
struct writeback_control *wbc,
int npages, bool is_read,
enum page_type type, enum temp_type temp)
static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
{
struct f2fs_sb_info *sbi = fio->sbi;
struct bio *bio;
bio = f2fs_bio_alloc(sbi, npages, true);
f2fs_target_device(sbi, blk_addr, bio);
if (is_read) {
f2fs_target_device(sbi, fio->new_blkaddr, bio);
if (is_read_io(fio->op)) {
bio->bi_end_io = f2fs_read_end_io;
bio->bi_private = NULL;
} else {
bio->bi_end_io = f2fs_write_end_io;
bio->bi_private = sbi;
bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, type, temp);
bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi,
fio->type, fio->temp);
}
if (wbc)
wbc_init_bio(wbc, bio);
if (fio->io_wbc)
wbc_init_bio(fio->io_wbc, bio);
return bio;
}
@@ -305,6 +328,9 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
if (test_opt(sbi, LFS) && current->plug)
blk_finish_plug(current->plug);
if (F2FS_IO_ALIGNED(sbi))
goto submit_io;
start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
start %= F2FS_IO_SIZE(sbi);
@@ -497,8 +523,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
f2fs_trace_ios(fio, 0);
/* Allocate a new bio */
bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc,
1, is_read_io(fio->op), fio->type, fio->temp);
bio = __bio_alloc(fio, 1);
if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
bio_put(bio);
@@ -520,6 +545,43 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
return 0;
}
static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
block_t last_blkaddr, block_t cur_blkaddr)
{
if (last_blkaddr + 1 != cur_blkaddr)
return false;
return __same_bdev(sbi, cur_blkaddr, bio);
}
static bool io_type_is_mergeable(struct f2fs_bio_info *io,
struct f2fs_io_info *fio)
{
if (io->fio.op != fio->op)
return false;
return io->fio.op_flags == fio->op_flags;
}
static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
struct f2fs_bio_info *io,
struct f2fs_io_info *fio,
block_t last_blkaddr,
block_t cur_blkaddr)
{
if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA || fio->type == NODE)) {
unsigned int filled_blocks =
F2FS_BYTES_TO_BLK(bio->bi_iter.bi_size);
unsigned int io_size = F2FS_IO_SIZE(sbi);
unsigned int left_vecs = bio->bi_max_vecs - bio->bi_vcnt;
/* IOs in bio is aligned and left space of vectors is not enough */
if (!(filled_blocks % io_size) && left_vecs < io_size)
return false;
}
if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr))
return false;
return io_type_is_mergeable(io, fio);
}
int f2fs_merge_page_bio(struct f2fs_io_info *fio)
{
struct bio *bio = *fio->bio;
@@ -533,15 +595,14 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
trace_f2fs_submit_page_bio(page, fio);
f2fs_trace_ios(fio, 0);
if (bio && (*fio->last_block + 1 != fio->new_blkaddr ||
!__same_bdev(fio->sbi, fio->new_blkaddr, bio))) {
if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block,
fio->new_blkaddr)) {
__submit_bio(fio->sbi, bio, fio->type);
bio = NULL;
}
alloc_new:
if (!bio) {
bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc,
BIO_MAX_PAGES, false, fio->type, fio->temp);
bio = __bio_alloc(fio, BIO_MAX_PAGES);
bio_set_op_attrs(bio, fio->op, fio->op_flags);
}
@@ -607,21 +668,19 @@ next:
inc_page_count(sbi, WB_DATA_TYPE(bio_page));
if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
(io->fio.op != fio->op || io->fio.op_flags != fio->op_flags) ||
!__same_bdev(sbi, fio->new_blkaddr, io->bio)))
if (io->bio && !io_is_mergeable(sbi, io->bio, io, fio,
io->last_block_in_bio, fio->new_blkaddr))
__submit_merged_bio(io);
alloc_new:
if (io->bio == NULL) {
if ((fio->type == DATA || fio->type == NODE) &&
if (F2FS_IO_ALIGNED(sbi) &&
(fio->type == DATA || fio->type == NODE) &&
fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
dec_page_count(sbi, WB_DATA_TYPE(bio_page));
fio->retry = true;
goto skip;
}
io->bio = __bio_alloc(sbi, fio->new_blkaddr, fio->io_wbc,
BIO_MAX_PAGES, false,
fio->type, fio->temp);
io->bio = __bio_alloc(fio, BIO_MAX_PAGES);
io->fio = *fio;
}
@@ -642,13 +701,20 @@ skip:
goto next;
out:
if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
f2fs_is_checkpoint_ready(sbi))
!f2fs_is_checkpoint_ready(sbi))
__submit_merged_bio(io);
up_write(&io->io_rwsem);
}
static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx)
{
return fsverity_active(inode) &&
idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
}
static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
unsigned nr_pages, unsigned op_flag)
unsigned nr_pages, unsigned op_flag,
pgoff_t first_idx)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct bio *bio;
@@ -664,6 +730,10 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
if (f2fs_encrypted_file(inode))
post_read_steps |= 1 << STEP_DECRYPT;
if (f2fs_need_verity(inode, first_idx))
post_read_steps |= 1 << STEP_VERITY;
if (post_read_steps) {
ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
if (!ctx) {
@@ -685,7 +755,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page,
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct bio *bio;
bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0);
bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0, page->index);
if (IS_ERR(bio))
return PTR_ERR(bio);
@@ -1026,7 +1096,7 @@ alloc:
if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
invalidate_mapping_pages(META_MAPPING(sbi),
old_blkaddr, old_blkaddr);
f2fs_set_data_blkaddr(dn);
f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
/*
* i_size will be updated by direct_IO. Otherwise, we'll get stale
@@ -1203,10 +1273,10 @@ next_block:
if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO &&
map->m_may_create) {
err = __allocate_data_block(&dn, map->m_seg_type);
if (!err) {
blkaddr = dn.data_blkaddr;
set_inode_flag(inode, FI_APPEND_WRITE);
}
if (err)
goto sync_out;
blkaddr = dn.data_blkaddr;
set_inode_flag(inode, FI_APPEND_WRITE);
}
} else {
if (create) {
@@ -1411,7 +1481,7 @@ static int get_data_block_dio_write(struct inode *inode, sector_t iblock,
return __get_data_block(inode, iblock, bh_result, create,
F2FS_GET_BLOCK_DIO, NULL,
f2fs_rw_hint_to_seg_type(inode->i_write_hint),
true);
IS_SWAPFILE(inode) ? false : true);
}
static int get_data_block_dio(struct inode *inode, sector_t iblock,
@@ -1542,7 +1612,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
goto out;
}
if (f2fs_has_inline_data(inode)) {
if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
if (ret != -EAGAIN)
goto out;
@@ -1608,6 +1678,15 @@ out:
return ret;
}
static inline loff_t f2fs_readpage_limit(struct inode *inode)
{
if (IS_ENABLED(CONFIG_FS_VERITY) &&
(IS_VERITY(inode) || f2fs_verity_in_progress(inode)))
return inode->i_sb->s_maxbytes;
return i_size_read(inode);
}
static int f2fs_read_single_page(struct inode *inode, struct page *page,
unsigned nr_pages,
struct f2fs_map_blocks *map,
@@ -1626,7 +1705,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page,
block_in_file = (sector_t)page_index(page);
last_block = block_in_file + nr_pages;
last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
last_block_in_file = (f2fs_readpage_limit(inode) + blocksize - 1) >>
blkbits;
if (last_block > last_block_in_file)
last_block = last_block_in_file;
@@ -1671,6 +1750,11 @@ got_it:
} else {
zero_out:
zero_user_segment(page, 0, PAGE_SIZE);
if (f2fs_need_verity(inode, page->index) &&
!fsverity_verify_page(page)) {
ret = -EIO;
goto out;
}
if (!PageUptodate(page))
SetPageUptodate(page);
unlock_page(page);
@@ -1681,15 +1765,15 @@ zero_out:
* This page will go to BIO. Do we need to send this
* BIO off first?
*/
if (bio && (*last_block_in_bio != block_nr - 1 ||
!__same_bdev(F2FS_I_SB(inode), block_nr, bio))) {
if (bio && !page_is_mergeable(F2FS_I_SB(inode), bio,
*last_block_in_bio, block_nr)) {
submit_and_realloc:
__f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
bio = NULL;
}
if (bio == NULL) {
bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
is_readahead ? REQ_RAHEAD : 0);
is_readahead ? REQ_RAHEAD : 0, page->index);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
bio = NULL;
@@ -2091,7 +2175,7 @@ static int __write_data_page(struct page *page, bool *submitted,
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
if (page->index < end_index)
if (page->index < end_index || f2fs_verity_in_progress(inode))
goto write;
/*
@@ -2466,7 +2550,8 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
struct inode *inode = mapping->host;
loff_t i_size = i_size_read(inode);
if (to > i_size) {
/* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
if (to > i_size && !f2fs_verity_in_progress(inode)) {
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);
@@ -2497,7 +2582,8 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
* the block addresses when there is no need to fill the page.
*/
if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
!is_inode_flag_set(inode, FI_NO_PREALLOC))
!is_inode_flag_set(inode, FI_NO_PREALLOC) &&
!f2fs_verity_in_progress(inode))
return 0;
/* f2fs_lock_op avoids race between write CP and convert_inline_page */
@@ -2588,9 +2674,10 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
}
trace_f2fs_write_begin(inode, pos, len, flags);
err = f2fs_is_checkpoint_ready(sbi);
if (err)
if (!f2fs_is_checkpoint_ready(sbi)) {
err = -ENOSPC;
goto fail;
}
if ((f2fs_is_atomic_file(inode) &&
!f2fs_available_free_memory(sbi, INMEM_PAGES)) ||
@@ -2646,7 +2733,8 @@ repeat:
if (len == PAGE_SIZE || PageUptodate(page))
return 0;
if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode)) {
if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) &&
!f2fs_verity_in_progress(inode)) {
zero_user_segment(page, len, PAGE_SIZE);
return 0;
}
@@ -2710,7 +2798,8 @@ static int f2fs_write_end(struct file *file,
set_page_dirty(page);
if (pos + copied > i_size_read(inode))
if (pos + copied > i_size_read(inode) &&
!f2fs_verity_in_progress(inode))
f2fs_i_size_write(inode, pos + copied);
unlock_out:
f2fs_put_page(page, 1);
@@ -3184,7 +3273,9 @@ void f2fs_clear_radix_tree_dirty_tag(struct page *page)
int __init f2fs_init_post_read_processing(void)
{
bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, 0);
bio_post_read_ctx_cache =
kmem_cache_create("f2fs_bio_post_read_ctx",
sizeof(struct bio_post_read_ctx), 0, 0, NULL);
if (!bio_post_read_ctx_cache)
goto fail;
bio_post_read_ctx_pool =

View File

@@ -67,7 +67,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->nr_rd_data = get_pages(sbi, F2FS_RD_DATA);
si->nr_rd_node = get_pages(sbi, F2FS_RD_NODE);
si->nr_rd_meta = get_pages(sbi, F2FS_RD_META);
if (SM_I(sbi) && SM_I(sbi)->fcc_info) {
if (SM_I(sbi)->fcc_info) {
si->nr_flushed =
atomic_read(&SM_I(sbi)->fcc_info->issued_flush);
si->nr_flushing =
@@ -75,7 +75,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->flush_list_empty =
llist_empty(&SM_I(sbi)->fcc_info->issue_list);
}
if (SM_I(sbi) && SM_I(sbi)->dcc_info) {
if (SM_I(sbi)->dcc_info) {
si->nr_discarded =
atomic_read(&SM_I(sbi)->dcc_info->issued_discard);
si->nr_discarding =

View File

@@ -8,6 +8,7 @@
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/sched/signal.h>
#include <linux/unicode.h>
#include "f2fs.h"
#include "node.h"
#include "acl.h"
@@ -81,7 +82,8 @@ static unsigned long dir_block_index(unsigned int level,
return bidx;
}
static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
static struct f2fs_dir_entry *find_in_block(struct inode *dir,
struct page *dentry_page,
struct fscrypt_name *fname,
f2fs_hash_t namehash,
int *max_slots,
@@ -93,7 +95,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page);
make_dentry_ptr_block(NULL, &d, dentry_blk);
make_dentry_ptr_block(dir, &d, dentry_blk);
de = f2fs_find_target_dentry(fname, namehash, max_slots, &d);
if (de)
*res_page = dentry_page;
@@ -101,14 +103,116 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
return de;
}
#ifdef CONFIG_UNICODE
/*
* Test whether a case-insensitive directory entry matches the filename
* being searched for.
*
* Returns: 0 if the directory entry matches, more than 0 if it
* doesn't match or less than zero on error.
*/
int f2fs_ci_compare(const struct inode *parent, const struct qstr *name,
const struct qstr *entry, bool quick)
{
const struct f2fs_sb_info *sbi = F2FS_SB(parent->i_sb);
const struct unicode_map *um = sbi->s_encoding;
int ret;
if (quick)
ret = utf8_strncasecmp_folded(um, name, entry);
else
ret = utf8_strncasecmp(um, name, entry);
if (ret < 0) {
/* Handle invalid character sequence as either an error
* or as an opaque byte sequence.
*/
if (f2fs_has_strict_mode(sbi))
return -EINVAL;
if (name->len != entry->len)
return 1;
return !!memcmp(name->name, entry->name, name->len);
}
return ret;
}
static void f2fs_fname_setup_ci_filename(struct inode *dir,
const struct qstr *iname,
struct fscrypt_str *cf_name)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
if (!IS_CASEFOLDED(dir)) {
cf_name->name = NULL;
return;
}
cf_name->name = f2fs_kmalloc(sbi, F2FS_NAME_LEN, GFP_NOFS);
if (!cf_name->name)
return;
cf_name->len = utf8_casefold(sbi->s_encoding,
iname, cf_name->name,
F2FS_NAME_LEN);
if ((int)cf_name->len <= 0) {
kvfree(cf_name->name);
cf_name->name = NULL;
}
}
#endif
static inline bool f2fs_match_name(struct f2fs_dentry_ptr *d,
struct f2fs_dir_entry *de,
struct fscrypt_name *fname,
struct fscrypt_str *cf_str,
unsigned long bit_pos,
f2fs_hash_t namehash)
{
#ifdef CONFIG_UNICODE
struct inode *parent = d->inode;
struct f2fs_sb_info *sbi = F2FS_I_SB(parent);
struct qstr entry;
#endif
if (de->hash_code != namehash)
return false;
#ifdef CONFIG_UNICODE
entry.name = d->filename[bit_pos];
entry.len = de->name_len;
if (sbi->s_encoding && IS_CASEFOLDED(parent)) {
if (cf_str->name) {
struct qstr cf = {.name = cf_str->name,
.len = cf_str->len};
return !f2fs_ci_compare(parent, &cf, &entry, true);
}
return !f2fs_ci_compare(parent, fname->usr_fname, &entry,
false);
}
#endif
if (fscrypt_match_name(fname, d->filename[bit_pos],
le16_to_cpu(de->name_len)))
return true;
return false;
}
struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname,
f2fs_hash_t namehash, int *max_slots,
struct f2fs_dentry_ptr *d)
{
struct f2fs_dir_entry *de;
struct fscrypt_str cf_str = { .name = NULL, .len = 0 };
unsigned long bit_pos = 0;
int max_len = 0;
#ifdef CONFIG_UNICODE
f2fs_fname_setup_ci_filename(d->inode, fname->usr_fname, &cf_str);
#endif
if (max_slots)
*max_slots = 0;
while (bit_pos < d->max) {
@@ -125,9 +229,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname,
continue;
}
if (de->hash_code == namehash &&
fscrypt_match_name(fname, d->filename[bit_pos],
le16_to_cpu(de->name_len)))
if (f2fs_match_name(d, de, fname, &cf_str, bit_pos, namehash))
goto found;
if (max_slots && max_len > *max_slots)
@@ -141,6 +243,10 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname,
found:
if (max_slots && max_len > *max_slots)
*max_slots = max_len;
#ifdef CONFIG_UNICODE
kvfree(cf_str.name);
#endif
return de;
}
@@ -157,7 +263,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
struct f2fs_dir_entry *de = NULL;
bool room = false;
int max_slots;
f2fs_hash_t namehash = f2fs_dentry_hash(&name, fname);
f2fs_hash_t namehash = f2fs_dentry_hash(dir, &name, fname);
nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
nblock = bucket_blocks(level);
@@ -179,8 +285,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
}
}
de = find_in_block(dentry_page, fname, namehash, &max_slots,
res_page);
de = find_in_block(dir, dentry_page, fname, namehash,
&max_slots, res_page);
if (de)
break;
@@ -250,6 +356,14 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
struct fscrypt_name fname;
int err;
#ifdef CONFIG_UNICODE
if (f2fs_has_strict_mode(F2FS_I_SB(dir)) && IS_CASEFOLDED(dir) &&
utf8_validate(F2FS_I_SB(dir)->s_encoding, child)) {
*res_page = ERR_PTR(-EINVAL);
return NULL;
}
#endif
err = fscrypt_setup_filename(dir, child, 1, &fname);
if (err) {
if (err == -ENOENT)
@@ -504,7 +618,7 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
level = 0;
slots = GET_DENTRY_SLOTS(new_name->len);
dentry_hash = f2fs_dentry_hash(new_name, NULL);
dentry_hash = f2fs_dentry_hash(dir, new_name, NULL);
current_depth = F2FS_I(dir)->i_current_depth;
if (F2FS_I(dir)->chash == dentry_hash) {
@@ -568,6 +682,11 @@ add_dentry:
if (inode) {
f2fs_i_pino_write(inode, dir->i_ino);
/* synchronize inode page's data from inode cache */
if (is_inode_flag_set(inode, FI_NEW_INODE))
f2fs_update_inode(inode, page);
f2fs_put_page(page, 1);
}
@@ -943,3 +1062,50 @@ const struct file_operations f2fs_dir_operations = {
.compat_ioctl = f2fs_compat_ioctl,
#endif
};
#ifdef CONFIG_UNICODE
static int f2fs_d_compare(const struct dentry *dentry, unsigned int len,
const char *str, const struct qstr *name)
{
struct qstr qstr = {.name = str, .len = len };
if (!IS_CASEFOLDED(dentry->d_parent->d_inode)) {
if (len != name->len)
return -1;
return memcmp(str, name, len);
}
return f2fs_ci_compare(dentry->d_parent->d_inode, name, &qstr, false);
}
static int f2fs_d_hash(const struct dentry *dentry, struct qstr *str)
{
struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
const struct unicode_map *um = sbi->s_encoding;
unsigned char *norm;
int len, ret = 0;
if (!IS_CASEFOLDED(dentry->d_inode))
return 0;
norm = f2fs_kmalloc(sbi, PATH_MAX, GFP_ATOMIC);
if (!norm)
return -ENOMEM;
len = utf8_casefold(um, str, norm, PATH_MAX);
if (len < 0) {
if (f2fs_has_strict_mode(sbi))
ret = -EINVAL;
goto out;
}
str->hash = full_name_hash(dentry, norm, len);
out:
kvfree(norm);
return ret;
}
const struct dentry_operations f2fs_dentry_ops = {
.d_hash = f2fs_d_hash,
.d_compare = f2fs_d_compare,
};
#endif

View File

@@ -25,6 +25,7 @@
#include <crypto/hash.h>
#include <linux/fscrypt.h>
#include <linux/fsverity.h>
#ifdef CONFIG_F2FS_CHECK_FS
#define f2fs_bug_on(sbi, condition) BUG_ON(condition)
@@ -151,8 +152,9 @@ struct f2fs_mount_info {
#define F2FS_FEATURE_QUOTA_INO 0x0080
#define F2FS_FEATURE_INODE_CRTIME 0x0100
#define F2FS_FEATURE_LOST_FOUND 0x0200
#define F2FS_FEATURE_VERITY 0x0400 /* reserved */
#define F2FS_FEATURE_VERITY 0x0400
#define F2FS_FEATURE_SB_CHKSUM 0x0800
#define F2FS_FEATURE_CASEFOLD 0x1000
#define __F2FS_HAS_FEATURE(raw_super, mask) \
((raw_super->feature & cpu_to_le32(mask)) != 0)
@@ -417,6 +419,9 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
#define F2FS_IOC_PRECACHE_EXTENTS _IO(F2FS_IOCTL_MAGIC, 15)
#define F2FS_IOC_RESIZE_FS _IOW(F2FS_IOCTL_MAGIC, 16, __u64)
#define F2FS_IOC_GET_VOLUME_NAME FS_IOC_GETFSLABEL
#define F2FS_IOC_SET_VOLUME_NAME FS_IOC_SETFSLABEL
#define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY
#define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY
#define F2FS_IOC_GET_ENCRYPTION_PWSALT FS_IOC_GET_ENCRYPTION_PWSALT
@@ -630,7 +635,7 @@ enum {
#define FADVISE_ENC_NAME_BIT 0x08
#define FADVISE_KEEP_SIZE_BIT 0x10
#define FADVISE_HOT_BIT 0x20
#define FADVISE_VERITY_BIT 0x40 /* reserved */
#define FADVISE_VERITY_BIT 0x40
#define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT)
@@ -650,6 +655,8 @@ enum {
#define file_is_hot(inode) is_file(inode, FADVISE_HOT_BIT)
#define file_set_hot(inode) set_file(inode, FADVISE_HOT_BIT)
#define file_clear_hot(inode) clear_file(inode, FADVISE_HOT_BIT)
#define file_is_verity(inode) is_file(inode, FADVISE_VERITY_BIT)
#define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT)
#define DEF_DIR_LEVEL 0
@@ -1169,6 +1176,10 @@ struct f2fs_sb_info {
int valid_super_block; /* valid super block no */
unsigned long s_flag; /* flags for sbi */
struct mutex writepages; /* mutex for writepages() */
#ifdef CONFIG_UNICODE
struct unicode_map *s_encoding;
__u16 s_encoding_flags;
#endif
#ifdef CONFIG_BLK_DEV_ZONED
unsigned int blocks_per_blkz; /* F2FS blocks per zone */
@@ -1641,6 +1652,7 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock)
{
unsigned long flags;
unsigned char *nat_bits;
/*
* In order to re-enable nat_bits we need to call fsck.f2fs by
@@ -1651,10 +1663,12 @@ static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock)
if (lock)
spin_lock_irqsave(&sbi->cp_lock, flags);
__clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG);
kvfree(NM_I(sbi)->nat_bits);
nat_bits = NM_I(sbi)->nat_bits;
NM_I(sbi)->nat_bits = NULL;
if (lock)
spin_unlock_irqrestore(&sbi->cp_lock, flags);
kvfree(nat_bits);
}
static inline bool enabled_nat_bits(struct f2fs_sb_info *sbi,
@@ -1761,7 +1775,7 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
if (time_to_inject(sbi, FAULT_BLOCK)) {
f2fs_show_injection_info(FAULT_BLOCK);
release = *count;
goto enospc;
goto release_quota;
}
/*
@@ -1806,6 +1820,7 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
enospc:
percpu_counter_sub(&sbi->alloc_valid_block_count, release);
release_quota:
dquot_release_reservation_block(inode, release);
return -ENOSPC;
}
@@ -2360,13 +2375,16 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr)
#define F2FS_INDEX_FL 0x00001000 /* hash-indexed directory */
#define F2FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
#define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
#define F2FS_CASEFOLD_FL 0x40000000 /* Casefolded file */
/* Flags that should be inherited by new inodes from their parent. */
#define F2FS_FL_INHERITED (F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL | \
F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL)
F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL | \
F2FS_CASEFOLD_FL)
/* Flags that are appropriate for regular files (all but dir-specific ones). */
#define F2FS_REG_FLMASK (~(F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL))
#define F2FS_REG_FLMASK (~(F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL | \
F2FS_CASEFOLD_FL))
/* Flags that are appropriate for non-directories/regular files. */
#define F2FS_OTHER_FLMASK (F2FS_NODUMP_FL | F2FS_NOATIME_FL)
@@ -2413,6 +2431,7 @@ enum {
FI_PROJ_INHERIT, /* indicate file inherits projectid */
FI_PIN_FILE, /* indicate file should not be gced */
FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */
FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */
};
static inline void __mark_inode_dirty_flag(struct inode *inode,
@@ -2452,6 +2471,12 @@ static inline void clear_inode_flag(struct inode *inode, int flag)
__mark_inode_dirty_flag(inode, flag, false);
}
static inline bool f2fs_verity_in_progress(struct inode *inode)
{
return IS_ENABLED(CONFIG_FS_VERITY) &&
is_inode_flag_set(inode, FI_VERITY_IN_PROGRESS);
}
static inline void set_acl_inode(struct inode *inode, umode_t mode)
{
F2FS_I(inode)->i_acl_mode = mode;
@@ -2926,6 +2951,11 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
bool hot, bool set);
struct dentry *f2fs_get_parent(struct dentry *child);
extern int f2fs_ci_compare(const struct inode *parent,
const struct qstr *name,
const struct qstr *entry,
bool quick);
/*
* dir.c
*/
@@ -2989,8 +3019,8 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi);
/*
* hash.c
*/
f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info,
struct fscrypt_name *fname);
f2fs_hash_t f2fs_dentry_hash(const struct inode *dir,
const struct qstr *name_info, struct fscrypt_name *fname);
/*
* node.c
@@ -3433,6 +3463,9 @@ static inline void f2fs_destroy_root_stats(void) { }
#endif
extern const struct file_operations f2fs_dir_operations;
#ifdef CONFIG_UNICODE
extern const struct dentry_operations f2fs_dentry_ops;
#endif
extern const struct file_operations f2fs_file_operations;
extern const struct inode_operations f2fs_file_inode_operations;
extern const struct address_space_operations f2fs_dblock_aops;
@@ -3522,6 +3555,9 @@ void f2fs_exit_sysfs(void);
int f2fs_register_sysfs(struct f2fs_sb_info *sbi);
void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi);
/* verity.c */
extern const struct fsverity_operations f2fs_verityops;
/*
* crypto support
*/
@@ -3544,7 +3580,7 @@ static inline void f2fs_set_encrypted_inode(struct inode *inode)
*/
static inline bool f2fs_post_read_required(struct inode *inode)
{
return f2fs_encrypted_file(inode);
return f2fs_encrypted_file(inode) || fsverity_active(inode);
}
#define F2FS_FEATURE_FUNCS(name, flagname) \
@@ -3562,7 +3598,9 @@ F2FS_FEATURE_FUNCS(flexible_inline_xattr, FLEXIBLE_INLINE_XATTR);
F2FS_FEATURE_FUNCS(quota_ino, QUOTA_INO);
F2FS_FEATURE_FUNCS(inode_crtime, INODE_CRTIME);
F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND);
F2FS_FEATURE_FUNCS(verity, VERITY);
F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM);
F2FS_FEATURE_FUNCS(casefold, CASEFOLD);
#ifdef CONFIG_BLK_DEV_ZONED
static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
@@ -3681,11 +3719,14 @@ static inline bool f2fs_force_buffered_io(struct inode *inode,
*/
if (f2fs_sb_has_blkzoned(sbi))
return true;
if (test_opt(sbi, LFS) && (rw == WRITE) &&
block_unaligned_IO(inode, iocb, iter))
return true;
if (test_opt(sbi, LFS) && (rw == WRITE)) {
if (block_unaligned_IO(inode, iocb, iter))
return true;
if (F2FS_IO_ALIGNED(sbi))
return true;
}
if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED) &&
!(inode->i_flags & S_SWAPFILE))
!IS_SWAPFILE(inode))
return true;
return false;

View File

@@ -20,6 +20,7 @@
#include <linux/uio.h>
#include <linux/uuid.h>
#include <linux/file.h>
#include <linux/nls.h>
#include "f2fs.h"
#include "node.h"
@@ -58,6 +59,11 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
goto err;
}
if (!f2fs_is_checkpoint_ready(sbi)) {
err = -ENOSPC;
goto err;
}
sb_start_pagefault(inode->i_sb);
f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
@@ -505,6 +511,10 @@ static int f2fs_file_open(struct inode *inode, struct file *filp)
{
int err = fscrypt_file_open(inode, filp);
if (err)
return err;
err = fsverity_file_open(inode, filp);
if (err)
return err;
@@ -790,6 +800,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
if (err)
return err;
err = fsverity_prepare_setattr(dentry, attr);
if (err)
return err;
if (is_quota_modification(inode, attr)) {
err = dquot_initialize(inode);
if (err)
@@ -820,14 +834,24 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
}
if (attr->ia_valid & ATTR_SIZE) {
bool to_smaller = (attr->ia_size <= i_size_read(inode));
loff_t old_size = i_size_read(inode);
if (attr->ia_size > MAX_INLINE_DATA(inode)) {
/*
* should convert inline inode before i_size_write to
* keep smaller than inline_data size with inline flag.
*/
err = f2fs_convert_inline_inode(inode);
if (err)
return err;
}
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);
truncate_setsize(inode, attr->ia_size);
if (to_smaller)
if (attr->ia_size <= old_size)
err = f2fs_truncate(inode);
/*
* do not trim all blocks after i_size if target size is
@@ -835,21 +859,11 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
*/
up_write(&F2FS_I(inode)->i_mmap_sem);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
if (err)
return err;
if (!to_smaller) {
/* should convert inline inode here */
if (!f2fs_may_inline_data(inode)) {
err = f2fs_convert_inline_inode(inode);
if (err)
return err;
}
inode->i_mtime = inode->i_ctime = current_time(inode);
}
down_write(&F2FS_I(inode)->i_sem);
inode->i_mtime = inode->i_ctime = current_time(inode);
F2FS_I(inode)->last_disk_size = i_size_read(inode);
up_write(&F2FS_I(inode)->i_sem);
}
@@ -1042,7 +1056,7 @@ next_dnode:
if (test_opt(sbi, LFS)) {
f2fs_put_dnode(&dn);
return -ENOTSUPP;
return -EOPNOTSUPP;
}
/* do not invalidate this block address */
@@ -1579,6 +1593,8 @@ static long f2fs_fallocate(struct file *file, int mode,
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
return -EIO;
if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode)))
return -ENOSPC;
/* f2fs only support ->fallocate for regular file */
if (!S_ISREG(inode->i_mode))
@@ -1670,6 +1686,13 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
if (IS_NOQUOTA(inode))
return -EPERM;
if ((iflags ^ fi->i_flags) & F2FS_CASEFOLD_FL) {
if (!f2fs_sb_has_casefold(F2FS_I_SB(inode)))
return -EOPNOTSUPP;
if (!f2fs_empty_dir(inode))
return -ENOTEMPTY;
}
fi->i_flags = iflags | (fi->i_flags & ~mask);
if (fi->i_flags & F2FS_PROJINHERIT_FL)
@@ -1704,6 +1727,7 @@ static const struct {
{ F2FS_INDEX_FL, FS_INDEX_FL },
{ F2FS_DIRSYNC_FL, FS_DIRSYNC_FL },
{ F2FS_PROJINHERIT_FL, FS_PROJINHERIT_FL },
{ F2FS_CASEFOLD_FL, FS_CASEFOLD_FL },
};
#define F2FS_GETTABLE_FS_FL ( \
@@ -1717,7 +1741,9 @@ static const struct {
FS_PROJINHERIT_FL | \
FS_ENCRYPT_FL | \
FS_INLINE_DATA_FL | \
FS_NOCOW_FL)
FS_NOCOW_FL | \
FS_VERITY_FL | \
FS_CASEFOLD_FL)
#define F2FS_SETTABLE_FS_FL ( \
FS_SYNC_FL | \
@@ -1726,7 +1752,8 @@ static const struct {
FS_NODUMP_FL | \
FS_NOATIME_FL | \
FS_DIRSYNC_FL | \
FS_PROJINHERIT_FL)
FS_PROJINHERIT_FL | \
FS_CASEFOLD_FL)
/* Convert f2fs on-disk i_flags to FS_IOC_{GET,SET}FLAGS flags */
static inline u32 f2fs_iflags_to_fsflags(u32 iflags)
@@ -1762,6 +1789,8 @@ static int f2fs_ioc_getflags(struct file *filp, unsigned long arg)
if (IS_ENCRYPTED(inode))
fsflags |= FS_ENCRYPT_FL;
if (IS_VERITY(inode))
fsflags |= FS_VERITY_FL;
if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
fsflags |= FS_INLINE_DATA_FL;
if (is_inode_flag_set(inode, FI_PIN_FILE))
@@ -1823,6 +1852,8 @@ static int f2fs_ioc_getversion(struct file *filp, unsigned long arg)
static int f2fs_ioc_start_atomic_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int ret;
if (!inode_owner_or_capable(inode))
@@ -1831,6 +1862,9 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
if (!S_ISREG(inode->i_mode))
return -EINVAL;
if (filp->f_flags & O_DIRECT)
return -EINVAL;
ret = mnt_want_write_file(filp);
if (ret)
return ret;
@@ -1862,6 +1896,12 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
goto out;
}
spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
if (list_empty(&fi->inmem_ilist))
list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
/* add inode in inmem_list first and set atomic_file */
set_inode_flag(inode, FI_ATOMIC_FILE);
clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
@@ -1903,11 +1943,8 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
goto err_out;
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
if (!ret) {
clear_inode_flag(inode, FI_ATOMIC_FILE);
F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
stat_dec_atomic_write(inode);
}
if (!ret)
f2fs_drop_inmem_pages(inode);
} else {
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
}
@@ -2196,6 +2233,49 @@ out_err:
return err;
}
static int f2fs_ioc_get_encryption_policy_ex(struct file *filp,
unsigned long arg)
{
if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
return -EOPNOTSUPP;
return fscrypt_ioctl_get_policy_ex(filp, (void __user *)arg);
}
static int f2fs_ioc_add_encryption_key(struct file *filp, unsigned long arg)
{
if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
return -EOPNOTSUPP;
return fscrypt_ioctl_add_key(filp, (void __user *)arg);
}
static int f2fs_ioc_remove_encryption_key(struct file *filp, unsigned long arg)
{
if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
return -EOPNOTSUPP;
return fscrypt_ioctl_remove_key(filp, (void __user *)arg);
}
static int f2fs_ioc_remove_encryption_key_all_users(struct file *filp,
unsigned long arg)
{
if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
return -EOPNOTSUPP;
return fscrypt_ioctl_remove_key_all_users(filp, (void __user *)arg);
}
static int f2fs_ioc_get_encryption_key_status(struct file *filp,
unsigned long arg)
{
if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
return -EOPNOTSUPP;
return fscrypt_ioctl_get_key_status(filp, (void __user *)arg);
}
static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -2250,9 +2330,9 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
return -EROFS;
end = range.start + range.len;
if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) {
if (end < range.start || range.start < MAIN_BLKADDR(sbi) ||
end >= MAX_BLKADDR(sbi))
return -EINVAL;
}
ret = mnt_want_write_file(filp);
if (ret)
@@ -2376,8 +2456,10 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
map.m_lblk += map.m_len;
}
if (!fragmented)
if (!fragmented) {
total = 0;
goto out;
}
sec_num = DIV_ROUND_UP(total, BLKS_PER_SEC(sbi));
@@ -2407,7 +2489,7 @@ do_map:
if (!(map.m_flags & F2FS_MAP_FLAGS)) {
map.m_lblk = next_pgofs;
continue;
goto check;
}
set_inode_flag(inode, FI_DO_DEFRAG);
@@ -2431,8 +2513,8 @@ do_map:
}
map.m_lblk = idx;
if (idx < pg_end && cnt < blk_per_seg)
check:
if (map.m_lblk < pg_end && cnt < blk_per_seg)
goto do_map;
clear_inode_flag(inode, FI_DO_DEFRAG);
@@ -3072,10 +3154,98 @@ static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg)
return ret;
}
static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
if (!f2fs_sb_has_verity(F2FS_I_SB(inode))) {
f2fs_warn(F2FS_I_SB(inode),
"Can't enable fs-verity on inode %lu: the verity feature is not enabled on this filesystem.\n",
inode->i_ino);
return -EOPNOTSUPP;
}
return fsverity_ioctl_enable(filp, (const void __user *)arg);
}
static int f2fs_ioc_measure_verity(struct file *filp, unsigned long arg)
{
if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp))))
return -EOPNOTSUPP;
return fsverity_ioctl_measure(filp, (void __user *)arg);
}
static int f2fs_get_volume_name(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
char *vbuf;
int count;
int err = 0;
vbuf = f2fs_kzalloc(sbi, MAX_VOLUME_NAME, GFP_KERNEL);
if (!vbuf)
return -ENOMEM;
down_read(&sbi->sb_lock);
count = utf16s_to_utf8s(sbi->raw_super->volume_name,
ARRAY_SIZE(sbi->raw_super->volume_name),
UTF16_LITTLE_ENDIAN, vbuf, MAX_VOLUME_NAME);
up_read(&sbi->sb_lock);
if (copy_to_user((char __user *)arg, vbuf,
min(FSLABEL_MAX, count)))
err = -EFAULT;
kvfree(vbuf);
return err;
}
static int f2fs_set_volume_name(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
char *vbuf;
int err = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
vbuf = strndup_user((const char __user *)arg, FSLABEL_MAX);
if (IS_ERR(vbuf))
return PTR_ERR(vbuf);
err = mnt_want_write_file(filp);
if (err)
goto out;
down_write(&sbi->sb_lock);
memset(sbi->raw_super->volume_name, 0,
sizeof(sbi->raw_super->volume_name));
utf8s_to_utf16s(vbuf, strlen(vbuf), UTF16_LITTLE_ENDIAN,
sbi->raw_super->volume_name,
ARRAY_SIZE(sbi->raw_super->volume_name));
err = f2fs_commit_super(sbi, false);
up_write(&sbi->sb_lock);
mnt_drop_write_file(filp);
out:
kfree(vbuf);
return err;
}
long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
return -EIO;
if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(filp))))
return -ENOSPC;
switch (cmd) {
case F2FS_IOC_GETFLAGS:
@@ -3104,6 +3274,16 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return f2fs_ioc_get_encryption_policy(filp, arg);
case F2FS_IOC_GET_ENCRYPTION_PWSALT:
return f2fs_ioc_get_encryption_pwsalt(filp, arg);
case FS_IOC_GET_ENCRYPTION_POLICY_EX:
return f2fs_ioc_get_encryption_policy_ex(filp, arg);
case FS_IOC_ADD_ENCRYPTION_KEY:
return f2fs_ioc_add_encryption_key(filp, arg);
case FS_IOC_REMOVE_ENCRYPTION_KEY:
return f2fs_ioc_remove_encryption_key(filp, arg);
case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
return f2fs_ioc_remove_encryption_key_all_users(filp, arg);
case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
return f2fs_ioc_get_encryption_key_status(filp, arg);
case F2FS_IOC_GARBAGE_COLLECT:
return f2fs_ioc_gc(filp, arg);
case F2FS_IOC_GARBAGE_COLLECT_RANGE:
@@ -3130,6 +3310,14 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return f2fs_ioc_precache_extents(filp, arg);
case F2FS_IOC_RESIZE_FS:
return f2fs_ioc_resize_fs(filp, arg);
case FS_IOC_ENABLE_VERITY:
return f2fs_ioc_enable_verity(filp, arg);
case FS_IOC_MEASURE_VERITY:
return f2fs_ioc_measure_verity(filp, arg);
case F2FS_IOC_GET_VOLUME_NAME:
return f2fs_get_volume_name(filp, arg);
case F2FS_IOC_SET_VOLUME_NAME:
return f2fs_set_volume_name(filp, arg);
default:
return -ENOTTY;
}
@@ -3146,16 +3334,12 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
goto out;
}
if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) {
ret = -EINVAL;
goto out;
}
if (!inode_trylock(inode)) {
if (iocb->ki_flags & IOCB_NOWAIT) {
if (iocb->ki_flags & IOCB_NOWAIT) {
if (!inode_trylock(inode)) {
ret = -EAGAIN;
goto out;
}
} else {
inode_lock(inode);
}
@@ -3231,6 +3415,11 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case F2FS_IOC_SET_ENCRYPTION_POLICY:
case F2FS_IOC_GET_ENCRYPTION_PWSALT:
case F2FS_IOC_GET_ENCRYPTION_POLICY:
case FS_IOC_GET_ENCRYPTION_POLICY_EX:
case FS_IOC_ADD_ENCRYPTION_KEY:
case FS_IOC_REMOVE_ENCRYPTION_KEY:
case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
case F2FS_IOC_GARBAGE_COLLECT:
case F2FS_IOC_GARBAGE_COLLECT_RANGE:
case F2FS_IOC_WRITE_CHECKPOINT:
@@ -3244,6 +3433,10 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case F2FS_IOC_SET_PIN_FILE:
case F2FS_IOC_PRECACHE_EXTENTS:
case F2FS_IOC_RESIZE_FS:
case FS_IOC_ENABLE_VERITY:
case FS_IOC_MEASURE_VERITY:
case F2FS_IOC_GET_VOLUME_NAME:
case F2FS_IOC_SET_VOLUME_NAME:
break;
default:
return -ENOIOCTLCMD;

View File

@@ -382,6 +382,16 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
nsearched++;
}
#ifdef CONFIG_F2FS_CHECK_FS
/*
* skip selecting the invalid segno (that is failed due to block
* validity check failure during GC) to avoid endless GC loop in
* such cases.
*/
if (test_bit(segno, sm->invalid_segmap))
goto next;
#endif
secno = GET_SEC_FROM_SEG(sbi, segno);
if (sec_usage_check(sbi, secno))
@@ -627,8 +637,21 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
source_blkaddr = datablock_addr(NULL, node_page, ofs_in_node);
f2fs_put_page(node_page, 1);
if (source_blkaddr != blkaddr)
if (source_blkaddr != blkaddr) {
#ifdef CONFIG_F2FS_CHECK_FS
unsigned int segno = GET_SEGNO(sbi, blkaddr);
unsigned long offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
if (unlikely(check_valid_map(sbi, segno, offset))) {
if (!test_and_set_bit(segno, SIT_I(sbi)->invalid_segmap)) {
f2fs_err(sbi, "mismatched blkaddr %u (source_blkaddr %u) in seg %u\n",
blkaddr, source_blkaddr, segno);
f2fs_bug_on(sbi, 1);
}
}
#endif
return false;
}
return true;
}
@@ -1303,7 +1326,7 @@ gc_more:
round++;
}
if (gc_type == FG_GC)
if (gc_type == FG_GC && seg_freed)
sbi->cur_victim_sec = NULL_SEGNO;
if (sync)

View File

@@ -14,6 +14,7 @@
#include <linux/f2fs_fs.h>
#include <linux/cryptohash.h>
#include <linux/pagemap.h>
#include <linux/unicode.h>
#include "f2fs.h"
@@ -67,7 +68,7 @@ static void str2hashbuf(const unsigned char *msg, size_t len,
*buf++ = pad;
}
f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info,
static f2fs_hash_t __f2fs_dentry_hash(const struct qstr *name_info,
struct fscrypt_name *fname)
{
__u32 hash;
@@ -103,3 +104,37 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info,
f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT);
return f2fs_hash;
}
f2fs_hash_t f2fs_dentry_hash(const struct inode *dir,
const struct qstr *name_info, struct fscrypt_name *fname)
{
#ifdef CONFIG_UNICODE
struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
const struct unicode_map *um = sbi->s_encoding;
int r, dlen;
unsigned char *buff;
struct qstr folded;
if (!name_info->len || !IS_CASEFOLDED(dir))
goto opaque_seq;
buff = f2fs_kzalloc(sbi, sizeof(char) * PATH_MAX, GFP_KERNEL);
if (!buff)
return -ENOMEM;
dlen = utf8_casefold(um, name_info, buff, PATH_MAX);
if (dlen < 0) {
kvfree(buff);
goto opaque_seq;
}
folded.name = buff;
folded.len = dlen;
r = __f2fs_dentry_hash(&folded, fname);
kvfree(buff);
return r;
opaque_seq:
#endif
return __f2fs_dentry_hash(name_info, fname);
}

View File

@@ -149,6 +149,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
err = f2fs_get_node_info(fio.sbi, dn->nid, &ni);
if (err) {
f2fs_truncate_data_blocks_range(dn, 1);
f2fs_put_dnode(dn);
return err;
}
@@ -338,7 +339,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
return NULL;
}
namehash = f2fs_dentry_hash(&name, fname);
namehash = f2fs_dentry_hash(dir, &name, fname);
inline_dentry = inline_data_addr(dir, ipage);
@@ -598,7 +599,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
f2fs_wait_on_page_writeback(ipage, NODE, true, true);
name_hash = f2fs_dentry_hash(new_name, NULL);
name_hash = f2fs_dentry_hash(dir, new_name, NULL);
f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos);
set_page_dirty(ipage);
@@ -606,6 +607,11 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
/* we don't need to mark_inode_dirty now */
if (inode) {
f2fs_i_pino_write(inode, dir->i_ino);
/* synchronize inode page's data from inode cache */
if (is_inode_flag_set(inode, FI_NEW_INODE))
f2fs_update_inode(inode, page);
f2fs_put_page(page, 1);
}
@@ -722,7 +728,13 @@ int f2fs_inline_data_fiemap(struct inode *inode,
if (IS_ERR(ipage))
return PTR_ERR(ipage);
if (!f2fs_has_inline_data(inode)) {
if ((S_ISREG(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
!f2fs_has_inline_data(inode)) {
err = -EAGAIN;
goto out;
}
if (S_ISDIR(inode->i_mode) && !f2fs_has_inline_dentry(inode)) {
err = -EAGAIN;
goto out;
}

View File

@@ -46,9 +46,13 @@ void f2fs_set_inode_flags(struct inode *inode)
new_fl |= S_DIRSYNC;
if (file_is_encrypt(inode))
new_fl |= S_ENCRYPTED;
if (file_is_verity(inode))
new_fl |= S_VERITY;
if (flags & F2FS_CASEFOLD_FL)
new_fl |= S_CASEFOLD;
inode_set_flags(inode, new_fl,
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|
S_ENCRYPTED);
S_ENCRYPTED|S_VERITY|S_CASEFOLD);
}
static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
@@ -614,7 +618,7 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
if (!is_inode_flag_set(inode, FI_DIRTY_INODE))
return 0;
if (f2fs_is_checkpoint_ready(sbi))
if (!f2fs_is_checkpoint_ready(sbi))
return -ENOSPC;
/*
@@ -693,7 +697,8 @@ retry:
if (err) {
f2fs_update_inode_page(inode);
set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
if (dquot_initialize_needed(inode))
set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
}
sb_end_intwrite(inode->i_sb);
no_delete:
@@ -703,7 +708,7 @@ no_delete:
stat_dec_inline_dir(inode);
stat_dec_inline_inode(inode);
if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG) &&
if (likely(!f2fs_cp_error(sbi) &&
!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE));
else
@@ -733,6 +738,7 @@ no_delete:
}
out_clear:
fscrypt_put_encryption_info(inode);
fsverity_cleanup_inode(inode);
clear_inode(inode);
}

View File

@@ -272,9 +272,8 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
err = f2fs_is_checkpoint_ready(sbi);
if (err)
return err;
if (!f2fs_is_checkpoint_ready(sbi))
return -ENOSPC;
err = dquot_initialize(dir);
if (err)
@@ -321,9 +320,8 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
err = f2fs_is_checkpoint_ready(sbi);
if (err)
return err;
if (!f2fs_is_checkpoint_ready(sbi))
return -ENOSPC;
err = fscrypt_prepare_link(old_dentry, dir, dentry);
if (err)
@@ -489,6 +487,17 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
goto out_iput;
}
out_splice:
#ifdef CONFIG_UNICODE
if (!inode && IS_CASEFOLDED(dir)) {
/* Eventually we want to call d_add_ci(dentry, NULL)
* for negative dentries in the encoding case as
* well. For now, prevent the negative dentry
* from being cached.
*/
trace_f2fs_lookup_end(dir, dentry, ino, err);
return NULL;
}
#endif
new = d_splice_alias(inode, dentry);
err = PTR_ERR_OR_ZERO(new);
trace_f2fs_lookup_end(dir, dentry, ino, err);
@@ -537,6 +546,16 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
goto fail;
}
f2fs_delete_entry(de, page, dir, inode);
#ifdef CONFIG_UNICODE
/* VFS negative dentries are incompatible with Encoding and
* Case-insensitiveness. Eventually we'll want avoid
* invalidating the dentries here, alongside with returning the
* negative dentries at f2fs_lookup(), when it is better
* supported by the VFS for the CI case.
*/
if (IS_CASEFOLDED(dir))
d_invalidate(dentry);
#endif
f2fs_unlock_op(sbi);
if (IS_DIRSYNC(dir))
@@ -571,9 +590,8 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
err = f2fs_is_checkpoint_ready(sbi);
if (err)
return err;
if (!f2fs_is_checkpoint_ready(sbi))
return -ENOSPC;
err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize,
&disk_link);
@@ -703,9 +721,8 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
err = f2fs_is_checkpoint_ready(sbi);
if (err)
return err;
if (!f2fs_is_checkpoint_ready(sbi))
return -ENOSPC;
err = dquot_initialize(dir);
if (err)
@@ -804,6 +821,8 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
if (!f2fs_is_checkpoint_ready(sbi))
return -ENOSPC;
if (IS_ENCRYPTED(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) {
int err = fscrypt_get_encryption_info(dir);
@@ -840,9 +859,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
err = f2fs_is_checkpoint_ready(sbi);
if (err)
return err;
if (!f2fs_is_checkpoint_ready(sbi))
return -ENOSPC;
if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
(!projid_eq(F2FS_I(new_dir)->i_projid,
@@ -1035,9 +1053,8 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
err = f2fs_is_checkpoint_ready(sbi);
if (err)
return err;
if (!f2fs_is_checkpoint_ready(sbi))
return -ENOSPC;
if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
!projid_eq(F2FS_I(new_dir)->i_projid,
@@ -1250,6 +1267,7 @@ const struct inode_operations f2fs_dir_inode_operations = {
#ifdef CONFIG_F2FS_FS_XATTR
.listxattr = f2fs_listxattr,
#endif
.fiemap = f2fs_fiemap,
};
const struct inode_operations f2fs_symlink_inode_operations = {

View File

@@ -1524,7 +1524,8 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
if (wbc->sync_mode == WB_SYNC_NONE &&
if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
wbc->sync_mode == WB_SYNC_NONE &&
IS_DNODE(page) && is_cold_node(page))
goto redirty_out;
@@ -1762,6 +1763,47 @@ out:
return ret ? -EIO: 0;
}
static int f2fs_match_ino(struct inode *inode, unsigned long ino, void *data)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
bool clean;
if (inode->i_ino != ino)
return 0;
if (!is_inode_flag_set(inode, FI_DIRTY_INODE))
return 0;
spin_lock(&sbi->inode_lock[DIRTY_META]);
clean = list_empty(&F2FS_I(inode)->gdirty_list);
spin_unlock(&sbi->inode_lock[DIRTY_META]);
if (clean)
return 0;
inode = igrab(inode);
if (!inode)
return 0;
return 1;
}
static bool flush_dirty_inode(struct page *page)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
struct inode *inode;
nid_t ino = ino_of_node(page);
inode = find_inode_nowait(sbi->sb, ino, f2fs_match_ino, NULL);
if (!inode)
return false;
f2fs_update_inode(inode, page);
unlock_page(page);
iput(inode);
return true;
}
int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
struct writeback_control *wbc,
bool do_balance, enum iostat_type io_type)
@@ -1785,6 +1827,7 @@ next_step:
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
bool submitted = false;
bool may_dirty = true;
/* give a priority to WB_SYNC threads */
if (atomic_read(&sbi->wb_sync_req[NODE]) &&
@@ -1832,6 +1875,13 @@ continue_unlock:
goto lock_node;
}
/* flush dirty inode */
if (IS_INODE(page) && may_dirty) {
may_dirty = false;
if (flush_dirty_inode(page))
goto lock_node;
}
f2fs_wait_on_page_writeback(page, NODE, true, true);
if (!clear_page_dirty_for_io(page))
@@ -1860,7 +1910,8 @@ continue_unlock:
}
if (step < 2) {
if (wbc->sync_mode == WB_SYNC_NONE && step == 1)
if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
wbc->sync_mode == WB_SYNC_NONE && step == 1)
goto out;
step++;
goto next_step;
@@ -2964,7 +3015,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
/* not used nids: 0, node, meta, (and root counted as valid node) */
nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
sbi->nquota_files - F2FS_RESERVED_NODE_NUM;
F2FS_RESERVED_NODE_NUM;
nm_i->nid_cnt[FREE_NID] = 0;
nm_i->nid_cnt[PREALLOC_NID] = 0;
nm_i->nat_cnt = 0;

View File

@@ -185,8 +185,6 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
void f2fs_register_inmem_page(struct inode *inode, struct page *page)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
struct inmem_pages *new;
f2fs_trace_pid(page);
@@ -200,15 +198,11 @@ void f2fs_register_inmem_page(struct inode *inode, struct page *page)
INIT_LIST_HEAD(&new->list);
/* increase reference count with clean state */
mutex_lock(&fi->inmem_lock);
get_page(page);
list_add_tail(&new->list, &fi->inmem_pages);
spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
if (list_empty(&fi->inmem_ilist))
list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
mutex_lock(&F2FS_I(inode)->inmem_lock);
list_add_tail(&new->list, &F2FS_I(inode)->inmem_pages);
inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
mutex_unlock(&fi->inmem_lock);
mutex_unlock(&F2FS_I(inode)->inmem_lock);
trace_f2fs_register_inmem_page(page, INMEM);
}
@@ -330,19 +324,17 @@ void f2fs_drop_inmem_pages(struct inode *inode)
mutex_lock(&fi->inmem_lock);
__revoke_inmem_pages(inode, &fi->inmem_pages,
true, false, true);
if (list_empty(&fi->inmem_pages)) {
spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
if (!list_empty(&fi->inmem_ilist))
list_del_init(&fi->inmem_ilist);
spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
}
mutex_unlock(&fi->inmem_lock);
}
clear_inode_flag(inode, FI_ATOMIC_FILE);
fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
stat_dec_atomic_write(inode);
spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
if (!list_empty(&fi->inmem_ilist))
list_del_init(&fi->inmem_ilist);
spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
}
void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
@@ -471,11 +463,6 @@ int f2fs_commit_inmem_pages(struct inode *inode)
mutex_lock(&fi->inmem_lock);
err = __f2fs_commit_inmem_pages(inode);
spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
if (!list_empty(&fi->inmem_ilist))
list_del_init(&fi->inmem_ilist);
spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
mutex_unlock(&fi->inmem_lock);
clear_inode_flag(inode, FI_ATOMIC_COMMIT);
@@ -501,7 +488,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
if (need && excess_cached_nats(sbi))
f2fs_balance_fs_bg(sbi);
if (f2fs_is_checkpoint_ready(sbi))
if (!f2fs_is_checkpoint_ready(sbi))
return;
/*
@@ -817,9 +804,13 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
dirty_i->nr_dirty[t]--;
if (get_valid_blocks(sbi, segno, true) == 0)
if (get_valid_blocks(sbi, segno, true) == 0) {
clear_bit(GET_SEC_FROM_SEG(sbi, segno),
dirty_i->victim_secmap);
#ifdef CONFIG_F2FS_CHECK_FS
clear_bit(segno, SIT_I(sbi)->invalid_segmap);
#endif
}
}
}
@@ -2084,6 +2075,13 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
f2fs_stop_discard_thread(sbi);
/*
* Recovery can cache discard commands, so in error path of
* fill_super(), it needs to give a chance to handle them.
*/
if (unlikely(atomic_read(&dcc->discard_cmd_cnt)))
f2fs_issue_discard_timeout(sbi);
kvfree(dcc);
SM_I(sbi)->dcc_info = NULL;
}
@@ -2156,9 +2154,11 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
if (!f2fs_test_and_set_bit(offset, se->discard_map))
sbi->discard_blks--;
/* don't overwrite by SSR to keep node chain */
if (IS_NODESEG(se->type) &&
!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
/*
* SSR should never reuse block which is checkpointed
* or newly invalidated.
*/
if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
se->ckpt_valid_blocks++;
}
@@ -3116,12 +3116,14 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
f2fs_inode_chksum_set(sbi, page);
}
if (F2FS_IO_ALIGNED(sbi))
fio->retry = false;
if (add_list) {
struct f2fs_bio_info *io;
INIT_LIST_HEAD(&fio->list);
fio->in_list = true;
fio->retry = false;
io = sbi->write_io[fio->type] + fio->temp;
spin_lock(&io->io_lock);
list_add_tail(&fio->list, &io->io_list);
@@ -3936,8 +3938,8 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct sit_info *sit_i;
unsigned int sit_segs, start;
char *src_bitmap;
unsigned int bitmap_size;
char *src_bitmap, *bitmap;
unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size;
/* allocate memory for SIT information */
sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
@@ -3953,33 +3955,37 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
if (!sit_i->sentries)
return -ENOMEM;
bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, bitmap_size,
main_bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, main_bitmap_size,
GFP_KERNEL);
if (!sit_i->dirty_sentries_bitmap)
return -ENOMEM;
#ifdef CONFIG_F2FS_CHECK_FS
bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 4;
#else
bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 3;
#endif
sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
if (!sit_i->bitmap)
return -ENOMEM;
bitmap = sit_i->bitmap;
for (start = 0; start < MAIN_SEGS(sbi); start++) {
sit_i->sentries[start].cur_valid_map
= f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
sit_i->sentries[start].ckpt_valid_map
= f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
if (!sit_i->sentries[start].cur_valid_map ||
!sit_i->sentries[start].ckpt_valid_map)
return -ENOMEM;
sit_i->sentries[start].cur_valid_map = bitmap;
bitmap += SIT_VBLOCK_MAP_SIZE;
sit_i->sentries[start].ckpt_valid_map = bitmap;
bitmap += SIT_VBLOCK_MAP_SIZE;
#ifdef CONFIG_F2FS_CHECK_FS
sit_i->sentries[start].cur_valid_map_mir
= f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
if (!sit_i->sentries[start].cur_valid_map_mir)
return -ENOMEM;
sit_i->sentries[start].cur_valid_map_mir = bitmap;
bitmap += SIT_VBLOCK_MAP_SIZE;
#endif
sit_i->sentries[start].discard_map
= f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE,
GFP_KERNEL);
if (!sit_i->sentries[start].discard_map)
return -ENOMEM;
sit_i->sentries[start].discard_map = bitmap;
bitmap += SIT_VBLOCK_MAP_SIZE;
}
sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
@@ -3999,17 +4005,23 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
/* setup SIT bitmap from ckeckpoint pack */
bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
sit_bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
sit_i->sit_bitmap = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL);
if (!sit_i->sit_bitmap)
return -ENOMEM;
#ifdef CONFIG_F2FS_CHECK_FS
sit_i->sit_bitmap_mir = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
sit_i->sit_bitmap_mir = kmemdup(src_bitmap,
sit_bitmap_size, GFP_KERNEL);
if (!sit_i->sit_bitmap_mir)
return -ENOMEM;
sit_i->invalid_segmap = f2fs_kvzalloc(sbi,
main_bitmap_size, GFP_KERNEL);
if (!sit_i->invalid_segmap)
return -ENOMEM;
#endif
/* init SIT information */
@@ -4018,7 +4030,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
sit_i->written_valid_blocks = 0;
sit_i->bitmap_size = bitmap_size;
sit_i->bitmap_size = sit_bitmap_size;
sit_i->dirty_sentries = 0;
sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
@@ -4156,7 +4168,6 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
if (start >= MAIN_SEGS(sbi)) {
f2fs_err(sbi, "Wrong journal entry on segno %u",
start);
set_sbi_flag(sbi, SBI_NEED_FSCK);
err = -EFSCORRUPTED;
break;
}
@@ -4196,7 +4207,6 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
if (!err && total_node_blocks != valid_node_count(sbi)) {
f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
total_node_blocks, valid_node_count(sbi));
set_sbi_flag(sbi, SBI_NEED_FSCK);
err = -EFSCORRUPTED;
}
@@ -4487,21 +4497,12 @@ static void destroy_free_segmap(struct f2fs_sb_info *sbi)
static void destroy_sit_info(struct f2fs_sb_info *sbi)
{
struct sit_info *sit_i = SIT_I(sbi);
unsigned int start;
if (!sit_i)
return;
if (sit_i->sentries) {
for (start = 0; start < MAIN_SEGS(sbi); start++) {
kvfree(sit_i->sentries[start].cur_valid_map);
#ifdef CONFIG_F2FS_CHECK_FS
kvfree(sit_i->sentries[start].cur_valid_map_mir);
#endif
kvfree(sit_i->sentries[start].ckpt_valid_map);
kvfree(sit_i->sentries[start].discard_map);
}
}
if (sit_i->sentries)
kvfree(sit_i->bitmap);
kvfree(sit_i->tmp_map);
kvfree(sit_i->sentries);
@@ -4512,6 +4513,7 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
kvfree(sit_i->sit_bitmap);
#ifdef CONFIG_F2FS_CHECK_FS
kvfree(sit_i->sit_bitmap_mir);
kvfree(sit_i->invalid_segmap);
#endif
kvfree(sit_i);
}

View File

@@ -226,9 +226,13 @@ struct sit_info {
block_t sit_base_addr; /* start block address of SIT area */
block_t sit_blocks; /* # of blocks used by SIT area */
block_t written_valid_blocks; /* # of valid blocks in main area */
char *bitmap; /* all bitmaps pointer */
char *sit_bitmap; /* SIT bitmap pointer */
#ifdef CONFIG_F2FS_CHECK_FS
char *sit_bitmap_mir; /* SIT bitmap mirror */
/* bitmap of segments to be ignored by GC in case of errors */
unsigned long *invalid_segmap;
#endif
unsigned int bitmap_size; /* SIT bitmap size */
@@ -582,13 +586,13 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
reserved_sections(sbi) + needed);
}
static inline int f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi)
static inline bool f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi)
{
if (likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
return 0;
return true;
if (likely(!has_not_enough_free_secs(sbi, 0, 0)))
return 0;
return -ENOSPC;
return true;
return false;
}
static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi)

View File

@@ -23,6 +23,7 @@
#include <linux/f2fs_fs.h>
#include <linux/sysfs.h>
#include <linux/quota.h>
#include <linux/unicode.h>
#include "f2fs.h"
#include "node.h"
@@ -222,6 +223,36 @@ void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...)
va_end(args);
}
#ifdef CONFIG_UNICODE
static const struct f2fs_sb_encodings {
__u16 magic;
char *name;
char *version;
} f2fs_sb_encoding_map[] = {
{F2FS_ENC_UTF8_12_1, "utf8", "12.1.0"},
};
static int f2fs_sb_read_encoding(const struct f2fs_super_block *sb,
const struct f2fs_sb_encodings **encoding,
__u16 *flags)
{
__u16 magic = le16_to_cpu(sb->s_encoding);
int i;
for (i = 0; i < ARRAY_SIZE(f2fs_sb_encoding_map); i++)
if (magic == f2fs_sb_encoding_map[i].magic)
break;
if (i >= ARRAY_SIZE(f2fs_sb_encoding_map))
return -EINVAL;
*encoding = &f2fs_sb_encoding_map[i];
*flags = le16_to_cpu(sb->s_encoding_flags);
return 0;
}
#endif
static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
{
block_t limit = min((sbi->user_block_count << 1) / 1000,
@@ -798,6 +829,13 @@ static int parse_options(struct super_block *sb, char *options)
return -EINVAL;
}
#endif
#ifndef CONFIG_UNICODE
if (f2fs_sb_has_casefold(sbi)) {
f2fs_err(sbi,
"Filesystem with casefold feature cannot be mounted without CONFIG_UNICODE");
return -EINVAL;
}
#endif
if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) {
f2fs_err(sbi, "Should set mode=lfs with %uKB-sized IO",
@@ -873,7 +911,21 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
static int f2fs_drop_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int ret;
/*
* during filesystem shutdown, if checkpoint is disabled,
* drop useless meta/node dirty pages.
*/
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
if (inode->i_ino == F2FS_NODE_INO(sbi) ||
inode->i_ino == F2FS_META_INO(sbi)) {
trace_f2fs_drop_inode(inode, 1);
return 1;
}
}
/*
* This is to avoid a deadlock condition like below.
* writeback_single_inode(inode)
@@ -913,6 +965,8 @@ static int f2fs_drop_inode(struct inode *inode)
return 0;
}
ret = generic_drop_inode(inode);
if (!ret)
ret = fscrypt_drop_inode(inode);
trace_f2fs_drop_inode(inode, ret);
return ret;
}
@@ -1097,6 +1151,9 @@ static void f2fs_put_super(struct super_block *sb)
destroy_percpu_info(sbi);
for (i = 0; i < NR_PAGE_TYPE; i++)
kvfree(sbi->write_io[i]);
#ifdef CONFIG_UNICODE
utf8_unload(sbi->s_encoding);
#endif
kvfree(sbi);
}
@@ -1222,8 +1279,7 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
else
buf->f_bavail = 0;
avail_node_count = sbi->total_node_count - sbi->nquota_files -
F2FS_RESERVED_NODE_NUM;
avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
if (avail_node_count > user_block_count) {
buf->f_files = user_block_count;
@@ -1530,6 +1586,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
bool need_stop_gc = false;
bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT);
bool no_io_align = !F2FS_IO_ALIGNED(sbi);
bool checkpoint_changed;
#ifdef CONFIG_QUOTA
int i, j;
@@ -1609,6 +1666,12 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
if (no_io_align == !!F2FS_IO_ALIGNED(sbi)) {
err = -EINVAL;
f2fs_warn(sbi, "switch io_bits option is not allowed");
goto restore_opts;
}
if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) {
err = -EINVAL;
f2fs_warn(sbi, "disabling checkpoint not compatible with read-only");
@@ -1987,6 +2050,12 @@ static int f2fs_quota_on(struct super_block *sb, int type, int format_id,
struct inode *inode;
int err;
/* if quota sysfile exists, deny enabling quota with specific file */
if (f2fs_sb_has_quota_ino(F2FS_SB(sb))) {
f2fs_err(F2FS_SB(sb), "quota sysfile already exists");
return -EBUSY;
}
err = f2fs_quota_sync(sb, type);
if (err)
return err;
@@ -2006,7 +2075,7 @@ static int f2fs_quota_on(struct super_block *sb, int type, int format_id,
return 0;
}
static int f2fs_quota_off(struct super_block *sb, int type)
static int __f2fs_quota_off(struct super_block *sb, int type)
{
struct inode *inode = sb_dqopt(sb)->files[type];
int err;
@@ -2032,13 +2101,30 @@ out_put:
return err;
}
static int f2fs_quota_off(struct super_block *sb, int type)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
int err;
err = __f2fs_quota_off(sb, type);
/*
* quotactl can shutdown journalled quota, result in inconsistence
* between quota record and fs data by following updates, tag the
* flag to let fsck be aware of it.
*/
if (is_journalled_quota(sbi))
set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
return err;
}
void f2fs_quota_off_umount(struct super_block *sb)
{
int type;
int err;
for (type = 0; type < MAXQUOTAS; type++) {
err = f2fs_quota_off(sb, type);
err = __f2fs_quota_off(sb, type);
if (err) {
int ret = dquot_quota_off(sb, type);
@@ -2623,8 +2709,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
}
valid_node_count = le32_to_cpu(ckpt->valid_node_count);
avail_node_count = sbi->total_node_count - sbi->nquota_files -
F2FS_RESERVED_NODE_NUM;
avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
if (valid_node_count > avail_node_count) {
f2fs_err(sbi, "Wrong valid_node_count: %u, avail_node_count: %u",
valid_node_count, avail_node_count);
@@ -2666,7 +2751,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
for (j = 0; j < NR_CURSEG_DATA_TYPE; j++) {
if (le32_to_cpu(ckpt->cur_node_segno[i]) ==
le32_to_cpu(ckpt->cur_data_segno[j])) {
f2fs_err(sbi, "Data segment (%u) and Data segment (%u) has the same segno: %u",
f2fs_err(sbi, "Node segment (%u) and Data segment (%u) has the same segno: %u",
i, j,
le32_to_cpu(ckpt->cur_node_segno[i]));
return 1;
@@ -3042,6 +3127,53 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
return 0;
}
static int f2fs_setup_casefold(struct f2fs_sb_info *sbi)
{
#ifdef CONFIG_UNICODE
if (f2fs_sb_has_casefold(sbi) && !sbi->s_encoding) {
const struct f2fs_sb_encodings *encoding_info;
struct unicode_map *encoding;
__u16 encoding_flags;
if (f2fs_sb_has_encrypt(sbi)) {
f2fs_err(sbi,
"Can't mount with encoding and encryption");
return -EINVAL;
}
if (f2fs_sb_read_encoding(sbi->raw_super, &encoding_info,
&encoding_flags)) {
f2fs_err(sbi,
"Encoding requested by superblock is unknown");
return -EINVAL;
}
encoding = utf8_load(encoding_info->version);
if (IS_ERR(encoding)) {
f2fs_err(sbi,
"can't mount with superblock charset: %s-%s "
"not supported by the kernel. flags: 0x%x.",
encoding_info->name, encoding_info->version,
encoding_flags);
return PTR_ERR(encoding);
}
f2fs_info(sbi, "Using encoding defined by superblock: "
"%s-%s with flags 0x%hx", encoding_info->name,
encoding_info->version?:"\b", encoding_flags);
sbi->s_encoding = encoding;
sbi->s_encoding_flags = encoding_flags;
sbi->sb->s_d_op = &f2fs_dentry_ops;
}
#else
if (f2fs_sb_has_casefold(sbi)) {
f2fs_err(sbi, "Filesystem with casefold feature cannot be mounted without CONFIG_UNICODE");
return -EINVAL;
}
#endif
return 0;
}
static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
{
struct f2fs_sm_info *sm_i = SM_I(sbi);
@@ -3138,6 +3270,10 @@ try_onemore:
le32_to_cpu(raw_super->log_blocksize);
sb->s_max_links = F2FS_LINK_MAX;
err = f2fs_setup_casefold(sbi);
if (err)
goto free_options;
#ifdef CONFIG_QUOTA
sb->dq_op = &f2fs_quota_operations;
sb->s_qcop = &f2fs_quotactl_ops;
@@ -3154,6 +3290,9 @@ try_onemore:
sb->s_op = &f2fs_sops;
#ifdef CONFIG_FS_ENCRYPTION
sb->s_cop = &f2fs_cryptops;
#endif
#ifdef CONFIG_FS_VERITY
sb->s_vop = &f2fs_verityops;
#endif
sb->s_xattr = f2fs_xattr_handlers;
sb->s_export_op = &f2fs_export_ops;
@@ -3213,7 +3352,7 @@ try_onemore:
if (err)
goto free_bio_info;
if (F2FS_IO_SIZE(sbi) > 1) {
if (F2FS_IO_ALIGNED(sbi)) {
sbi->write_io_dummy =
mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0);
if (!sbi->write_io_dummy) {
@@ -3488,6 +3627,10 @@ free_percpu:
free_bio_info:
for (i = 0; i < NR_PAGE_TYPE; i++)
kvfree(sbi->write_io[i]);
#ifdef CONFIG_UNICODE
utf8_unload(sbi->s_encoding);
#endif
free_options:
#ifdef CONFIG_QUOTA
for (i = 0; i < MAXQUOTAS; i++)

View File

@@ -10,6 +10,7 @@
#include <linux/proc_fs.h>
#include <linux/f2fs_fs.h>
#include <linux/seq_file.h>
#include <linux/unicode.h>
#include "f2fs.h"
#include "segment.h"
@@ -81,6 +82,19 @@ static ssize_t unusable_show(struct f2fs_attr *a,
(unsigned long long)unusable);
}
static ssize_t encoding_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
#ifdef CONFIG_UNICODE
if (f2fs_sb_has_casefold(sbi))
return snprintf(buf, PAGE_SIZE, "%s (%d.%d.%d)\n",
sbi->s_encoding->charset,
(sbi->s_encoding->version >> 16) & 0xff,
(sbi->s_encoding->version >> 8) & 0xff,
sbi->s_encoding->version & 0xff);
#endif
return snprintf(buf, PAGE_SIZE, "(none)");
}
static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
@@ -131,9 +145,15 @@ static ssize_t features_show(struct f2fs_attr *a,
if (f2fs_sb_has_lost_found(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "lost_found");
if (f2fs_sb_has_verity(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "verity");
if (f2fs_sb_has_sb_chksum(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "sb_checksum");
if (f2fs_sb_has_casefold(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "casefold");
len += snprintf(buf + len, PAGE_SIZE - len, "\n");
return len;
}
@@ -364,7 +384,9 @@ enum feat_id {
FEAT_QUOTA_INO,
FEAT_INODE_CRTIME,
FEAT_LOST_FOUND,
FEAT_VERITY,
FEAT_SB_CHECKSUM,
FEAT_CASEFOLD,
};
static ssize_t f2fs_feature_show(struct f2fs_attr *a,
@@ -381,7 +403,9 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a,
case FEAT_QUOTA_INO:
case FEAT_INODE_CRTIME:
case FEAT_LOST_FOUND:
case FEAT_VERITY:
case FEAT_SB_CHECKSUM:
case FEAT_CASEFOLD:
return snprintf(buf, PAGE_SIZE, "supported\n");
}
return 0;
@@ -455,6 +479,7 @@ F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
F2FS_GENERAL_RO_ATTR(features);
F2FS_GENERAL_RO_ATTR(current_reserved_blocks);
F2FS_GENERAL_RO_ATTR(unusable);
F2FS_GENERAL_RO_ATTR(encoding);
#ifdef CONFIG_FS_ENCRYPTION
F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO);
@@ -470,7 +495,11 @@ F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR);
F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO);
F2FS_FEATURE_RO_ATTR(inode_crtime, FEAT_INODE_CRTIME);
F2FS_FEATURE_RO_ATTR(lost_found, FEAT_LOST_FOUND);
#ifdef CONFIG_FS_VERITY
F2FS_FEATURE_RO_ATTR(verity, FEAT_VERITY);
#endif
F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM);
F2FS_FEATURE_RO_ATTR(casefold, FEAT_CASEFOLD);
#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = {
@@ -515,6 +544,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(features),
ATTR_LIST(reserved_blocks),
ATTR_LIST(current_reserved_blocks),
ATTR_LIST(encoding),
NULL,
};
@@ -533,7 +563,11 @@ static struct attribute *f2fs_feat_attrs[] = {
ATTR_LIST(quota_ino),
ATTR_LIST(inode_crtime),
ATTR_LIST(lost_found),
#ifdef CONFIG_FS_VERITY
ATTR_LIST(verity),
#endif
ATTR_LIST(sb_checksum),
ATTR_LIST(casefold),
NULL,
};

247
fs/f2fs/verity.c Normal file
View File

@@ -0,0 +1,247 @@
// SPDX-License-Identifier: GPL-2.0
/*
* fs/f2fs/verity.c: fs-verity support for f2fs
*
* Copyright 2019 Google LLC
*/
/*
* Implementation of fsverity_operations for f2fs.
*
* Like ext4, f2fs stores the verity metadata (Merkle tree and
* fsverity_descriptor) past the end of the file, starting at the first 64K
* boundary beyond i_size. This approach works because (a) verity files are
* readonly, and (b) pages fully beyond i_size aren't visible to userspace but
* can be read/written internally by f2fs with only some relatively small
* changes to f2fs. Extended attributes cannot be used because (a) f2fs limits
* the total size of an inode's xattr entries to 4096 bytes, which wouldn't be
* enough for even a single Merkle tree block, and (b) f2fs encryption doesn't
* encrypt xattrs, yet the verity metadata *must* be encrypted when the file is
* because it contains hashes of the plaintext data.
*
* Using a 64K boundary rather than a 4K one keeps things ready for
* architectures with 64K pages, and it doesn't necessarily waste space on-disk
* since there can be a hole between i_size and the start of the Merkle tree.
*/
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "xattr.h"
static inline loff_t f2fs_verity_metadata_pos(const struct inode *inode)
{
return round_up(inode->i_size, 65536);
}
/*
* Read some verity metadata from the inode. __vfs_read() can't be used because
* we need to read beyond i_size.
*/
static int pagecache_read(struct inode *inode, void *buf, size_t count,
loff_t pos)
{
while (count) {
size_t n = min_t(size_t, count,
PAGE_SIZE - offset_in_page(pos));
struct page *page;
void *addr;
page = read_mapping_page(inode->i_mapping, pos >> PAGE_SHIFT,
NULL);
if (IS_ERR(page))
return PTR_ERR(page);
addr = kmap_atomic(page);
memcpy(buf, addr + offset_in_page(pos), n);
kunmap_atomic(addr);
put_page(page);
buf += n;
pos += n;
count -= n;
}
return 0;
}
/*
* Write some verity metadata to the inode for FS_IOC_ENABLE_VERITY.
* kernel_write() can't be used because the file descriptor is readonly.
*/
static int pagecache_write(struct inode *inode, const void *buf, size_t count,
loff_t pos)
{
if (pos + count > inode->i_sb->s_maxbytes)
return -EFBIG;
while (count) {
size_t n = min_t(size_t, count,
PAGE_SIZE - offset_in_page(pos));
struct page *page;
void *fsdata;
void *addr;
int res;
res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0,
&page, &fsdata);
if (res)
return res;
addr = kmap_atomic(page);
memcpy(addr + offset_in_page(pos), buf, n);
kunmap_atomic(addr);
res = pagecache_write_end(NULL, inode->i_mapping, pos, n, n,
page, fsdata);
if (res < 0)
return res;
if (res != n)
return -EIO;
buf += n;
pos += n;
count -= n;
}
return 0;
}
/*
* Format of f2fs verity xattr. This points to the location of the verity
* descriptor within the file data rather than containing it directly because
* the verity descriptor *must* be encrypted when f2fs encryption is used. But,
* f2fs encryption does not encrypt xattrs.
*/
struct fsverity_descriptor_location {
__le32 version;
__le32 size;
__le64 pos;
};
static int f2fs_begin_enable_verity(struct file *filp)
{
struct inode *inode = file_inode(filp);
int err;
if (f2fs_verity_in_progress(inode))
return -EBUSY;
if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode))
return -EOPNOTSUPP;
/*
* Since the file was opened readonly, we have to initialize the quotas
* here and not rely on ->open() doing it. This must be done before
* evicting the inline data.
*/
err = dquot_initialize(inode);
if (err)
return err;
err = f2fs_convert_inline_inode(inode);
if (err)
return err;
set_inode_flag(inode, FI_VERITY_IN_PROGRESS);
return 0;
}
static int f2fs_end_enable_verity(struct file *filp, const void *desc,
size_t desc_size, u64 merkle_tree_size)
{
struct inode *inode = file_inode(filp);
u64 desc_pos = f2fs_verity_metadata_pos(inode) + merkle_tree_size;
struct fsverity_descriptor_location dloc = {
.version = cpu_to_le32(1),
.size = cpu_to_le32(desc_size),
.pos = cpu_to_le64(desc_pos),
};
int err = 0;
if (desc != NULL) {
/* Succeeded; write the verity descriptor. */
err = pagecache_write(inode, desc, desc_size, desc_pos);
/* Write all pages before clearing FI_VERITY_IN_PROGRESS. */
if (!err)
err = filemap_write_and_wait(inode->i_mapping);
}
/* If we failed, truncate anything we wrote past i_size. */
if (desc == NULL || err)
f2fs_truncate(inode);
clear_inode_flag(inode, FI_VERITY_IN_PROGRESS);
if (desc != NULL && !err) {
err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_VERITY,
F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc),
NULL, XATTR_CREATE);
if (!err) {
file_set_verity(inode);
f2fs_set_inode_flags(inode);
f2fs_mark_inode_dirty_sync(inode, true);
}
}
return err;
}
static int f2fs_get_verity_descriptor(struct inode *inode, void *buf,
size_t buf_size)
{
struct fsverity_descriptor_location dloc;
int res;
u32 size;
u64 pos;
/* Get the descriptor location */
res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_VERITY,
F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc), NULL);
if (res < 0 && res != -ERANGE)
return res;
if (res != sizeof(dloc) || dloc.version != cpu_to_le32(1)) {
f2fs_warn(F2FS_I_SB(inode), "unknown verity xattr format");
return -EINVAL;
}
size = le32_to_cpu(dloc.size);
pos = le64_to_cpu(dloc.pos);
/* Get the descriptor */
if (pos + size < pos || pos + size > inode->i_sb->s_maxbytes ||
pos < f2fs_verity_metadata_pos(inode) || size > INT_MAX) {
f2fs_warn(F2FS_I_SB(inode), "invalid verity xattr");
return -EFSCORRUPTED;
}
if (buf_size) {
if (size > buf_size)
return -ERANGE;
res = pagecache_read(inode, buf, size, pos);
if (res)
return res;
}
return size;
}
static struct page *f2fs_read_merkle_tree_page(struct inode *inode,
pgoff_t index)
{
index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT;
return read_mapping_page(inode->i_mapping, index, NULL);
}
static int f2fs_write_merkle_tree_block(struct inode *inode, const void *buf,
u64 index, int log_blocksize)
{
loff_t pos = f2fs_verity_metadata_pos(inode) + (index << log_blocksize);
return pagecache_write(inode, buf, 1 << log_blocksize, pos);
}
const struct fsverity_operations f2fs_verityops = {
.begin_enable_verity = f2fs_begin_enable_verity,
.end_enable_verity = f2fs_end_enable_verity,
.get_verity_descriptor = f2fs_get_verity_descriptor,
.read_merkle_tree_page = f2fs_read_merkle_tree_page,
.write_merkle_tree_block = f2fs_write_merkle_tree_block,
};

View File

@@ -21,6 +21,7 @@
#include <linux/posix_acl_xattr.h>
#include "f2fs.h"
#include "xattr.h"
#include "segment.h"
static int f2fs_xattr_generic_get(const struct xattr_handler *handler,
struct dentry *unused, struct inode *inode,
@@ -729,6 +730,11 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int err;
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
if (!f2fs_is_checkpoint_ready(sbi))
return -ENOSPC;
err = dquot_initialize(inode);
if (err)
return err;

View File

@@ -34,8 +34,10 @@
#define F2FS_XATTR_INDEX_ADVISE 7
/* Should be same as EXT4_XATTR_INDEX_ENCRYPTION */
#define F2FS_XATTR_INDEX_ENCRYPTION 9
#define F2FS_XATTR_INDEX_VERITY 11
#define F2FS_XATTR_NAME_ENCRYPTION_CONTEXT "c"
#define F2FS_XATTR_NAME_VERITY "v"
struct f2fs_xattr_header {
__le32 h_magic; /* magic number for identification */

View File

@@ -32,6 +32,7 @@
#include <linux/backing-dev.h>
#include <linux/rculist_bl.h>
#include <linux/cleancache.h>
#include <linux/fscrypt.h>
#include <linux/fsnotify.h>
#include <linux/lockdep.h>
#include <linux/user_namespace.h>
@@ -288,6 +289,7 @@ static void __put_super(struct super_block *s)
WARN_ON(s->s_inode_lru.node);
WARN_ON(!list_empty(&s->s_mounts));
security_sb_free(s);
fscrypt_sb_free(s);
put_user_ns(s->s_user_ns);
kfree(s->s_subtype);
call_rcu(&s->rcu, destroy_super_rcu);

View File

@@ -205,6 +205,21 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
#endif
}
case FS_IOC_GET_ENCRYPTION_POLICY_EX:
return fscrypt_ioctl_get_policy_ex(file, (void __user *)arg);
case FS_IOC_ADD_ENCRYPTION_KEY:
return fscrypt_ioctl_add_key(file, (void __user *)arg);
case FS_IOC_REMOVE_ENCRYPTION_KEY:
return fscrypt_ioctl_remove_key(file, (void __user *)arg);
case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
return fscrypt_ioctl_remove_key_all_users(file,
(void __user *)arg);
case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
return fscrypt_ioctl_get_key_status(file, (void __user *)arg);
default:
return -ENOTTY;
}
@@ -222,6 +237,11 @@ long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
break;
case FS_IOC_SET_ENCRYPTION_POLICY:
case FS_IOC_GET_ENCRYPTION_POLICY:
case FS_IOC_GET_ENCRYPTION_POLICY_EX:
case FS_IOC_ADD_ENCRYPTION_KEY:
case FS_IOC_REMOVE_ENCRYPTION_KEY:
case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
break;
default:
return -ENOIOCTLCMD;

View File

@@ -336,6 +336,16 @@ static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc)
return err;
}
static int ubifs_drop_inode(struct inode *inode)
{
int drop = generic_drop_inode(inode);
if (!drop)
drop = fscrypt_drop_inode(inode);
return drop;
}
static void ubifs_evict_inode(struct inode *inode)
{
int err;
@@ -1925,6 +1935,7 @@ const struct super_operations ubifs_super_operations = {
.destroy_inode = ubifs_destroy_inode,
.put_super = ubifs_put_super,
.write_inode = ubifs_write_inode,
.drop_inode = ubifs_drop_inode,
.evict_inode = ubifs_evict_inode,
.statfs = ubifs_statfs,
.dirty_inode = ubifs_dirty_inode,

2
fs/unicode/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
mkutf8data
utf8data.h

13
fs/unicode/Kconfig Normal file
View File

@@ -0,0 +1,13 @@
#
# UTF-8 normalization
#
config UNICODE
bool "UTF-8 normalization and casefolding support"
help
Say Y here to enable UTF-8 NFD normalization and NFD+CF casefolding
support.
config UNICODE_NORMALIZATION_SELFTEST
tristate "Test UTF-8 normalization support"
depends on UNICODE
default n

38
fs/unicode/Makefile Normal file
View File

@@ -0,0 +1,38 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_UNICODE) += unicode.o
obj-$(CONFIG_UNICODE_NORMALIZATION_SELFTEST) += utf8-selftest.o
unicode-y := utf8-norm.o utf8-core.o
$(obj)/utf8-norm.o: $(obj)/utf8data.h
# In the normal build, the checked-in utf8data.h is just shipped.
#
# To generate utf8data.h from UCD, put *.txt files in this directory
# and pass REGENERATE_UTF8DATA=1 from the command line.
ifdef REGENERATE_UTF8DATA
quiet_cmd_utf8data = GEN $@
cmd_utf8data = $< \
-a $(srctree)/$(src)/DerivedAge.txt \
-c $(srctree)/$(src)/DerivedCombiningClass.txt \
-p $(srctree)/$(src)/DerivedCoreProperties.txt \
-d $(srctree)/$(src)/UnicodeData.txt \
-f $(srctree)/$(src)/CaseFolding.txt \
-n $(srctree)/$(src)/NormalizationCorrections.txt \
-t $(srctree)/$(src)/NormalizationTest.txt \
-o $@
$(obj)/utf8data.h: $(obj)/mkutf8data $(filter %.txt, $(cmd_utf8data)) FORCE
$(call if_changed,utf8data)
else
$(obj)/utf8data.h: $(src)/utf8data.h_shipped FORCE
$(call if_changed,shipped)
endif
targets += utf8data.h
hostprogs-y += mkutf8data

View File

@@ -0,0 +1,57 @@
The utf8data.h file in this directory is generated from the Unicode
Character Database for version 12.1.0 of the Unicode standard.
The full set of files can be found here:
http://www.unicode.org/Public/12.1.0/ucd/
Individual source links:
https://www.unicode.org/Public/12.1.0/ucd/CaseFolding.txt
https://www.unicode.org/Public/12.1.0/ucd/DerivedAge.txt
https://www.unicode.org/Public/12.1.0/ucd/extracted/DerivedCombiningClass.txt
https://www.unicode.org/Public/12.1.0/ucd/DerivedCoreProperties.txt
https://www.unicode.org/Public/12.1.0/ucd/NormalizationCorrections.txt
https://www.unicode.org/Public/12.1.0/ucd/NormalizationTest.txt
https://www.unicode.org/Public/12.1.0/ucd/UnicodeData.txt
md5sums (verify by running "md5sum -c README.utf8data"):
900e76da1d822a160fd6b8c0b1d70094 CaseFolding.txt
131256380bff4fea8ad4a851616f2f10 DerivedAge.txt
e731a4089b30002144e107e3d6f8d1fa DerivedCombiningClass.txt
a47c9fbd7ff92a9b261ba9831e68778a DerivedCoreProperties.txt
fcab6dad15e440879d92f315978f93d3 NormalizationCorrections.txt
f9ff1c55a60decf436100f791b44aa98 NormalizationTest.txt
755f6af699f8c8d2d958da411f78f6c6 UnicodeData.txt
sha1sums (verify by running "sha1sum -c README.utf8data"):
dc9245f6803c4ac99555c361f5052e0b13eb779b CaseFolding.txt
3281104f237184cdb5d869e86eb8573678ada7da DerivedAge.txt
2f5f995ccb96e0fa84b15151b35d5e2681535175 DerivedCombiningClass.txt
5b8698a3fcd5018e1987f296b02e2c17e696415e DerivedCoreProperties.txt
cd83935fbc012345d8792d2c704f69497e753835 NormalizationCorrections.txt
ea419aae505b337b0d99a83fa83fe58ddff7c19f NormalizationTest.txt
dc973c0fc93d6f09d9ab9f70d1c9f89c447f0526 UnicodeData.txt
To update to the newer version of the Unicode standard, the latest
released version of the UCD can be found here:
http://www.unicode.org/Public/UCD/latest/
Then, build under fs/unicode/ with REGENERATE_UTF8DATA=1:
make REGENERATE_UTF8DATA=1 fs/unicode/
After sanity checking the newly generated utf8data.h file (the
version generated from the 12.1.0 UCD should be 4,109 lines long, and
have a total size of 324k) and/or comparing it with the older version
of utf8data.h_shipped, rename it to utf8data.h_shipped.
If you are a kernel developer updating to a newer version of the
Unicode Character Database, please update this README.utf8data file
with the version of the UCD that was used, the md5sum and sha1sums of
the *.txt files, before checking in the new versions of the utf8data.h
and README.utf8data files.

3419
fs/unicode/mkutf8data.c Normal file

File diff suppressed because it is too large Load Diff

215
fs/unicode/utf8-core.c Normal file
View File

@@ -0,0 +1,215 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/parser.h>
#include <linux/errno.h>
#include <linux/unicode.h>
#include "utf8n.h"
int utf8_validate(const struct unicode_map *um, const struct qstr *str)
{
const struct utf8data *data = utf8nfdi(um->version);
if (utf8nlen(data, str->name, str->len) < 0)
return -1;
return 0;
}
EXPORT_SYMBOL(utf8_validate);
int utf8_strncmp(const struct unicode_map *um,
const struct qstr *s1, const struct qstr *s2)
{
const struct utf8data *data = utf8nfdi(um->version);
struct utf8cursor cur1, cur2;
int c1, c2;
if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
return -EINVAL;
if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
return -EINVAL;
do {
c1 = utf8byte(&cur1);
c2 = utf8byte(&cur2);
if (c1 < 0 || c2 < 0)
return -EINVAL;
if (c1 != c2)
return 1;
} while (c1);
return 0;
}
EXPORT_SYMBOL(utf8_strncmp);
int utf8_strncasecmp(const struct unicode_map *um,
const struct qstr *s1, const struct qstr *s2)
{
const struct utf8data *data = utf8nfdicf(um->version);
struct utf8cursor cur1, cur2;
int c1, c2;
if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
return -EINVAL;
if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
return -EINVAL;
do {
c1 = utf8byte(&cur1);
c2 = utf8byte(&cur2);
if (c1 < 0 || c2 < 0)
return -EINVAL;
if (c1 != c2)
return 1;
} while (c1);
return 0;
}
EXPORT_SYMBOL(utf8_strncasecmp);
/* String cf is expected to be a valid UTF-8 casefolded
* string.
*/
int utf8_strncasecmp_folded(const struct unicode_map *um,
const struct qstr *cf,
const struct qstr *s1)
{
const struct utf8data *data = utf8nfdicf(um->version);
struct utf8cursor cur1;
int c1, c2;
int i = 0;
if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
return -EINVAL;
do {
c1 = utf8byte(&cur1);
c2 = cf->name[i++];
if (c1 < 0)
return -EINVAL;
if (c1 != c2)
return 1;
} while (c1);
return 0;
}
EXPORT_SYMBOL(utf8_strncasecmp_folded);
int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
unsigned char *dest, size_t dlen)
{
const struct utf8data *data = utf8nfdicf(um->version);
struct utf8cursor cur;
size_t nlen = 0;
if (utf8ncursor(&cur, data, str->name, str->len) < 0)
return -EINVAL;
for (nlen = 0; nlen < dlen; nlen++) {
int c = utf8byte(&cur);
dest[nlen] = c;
if (!c)
return nlen;
if (c == -1)
break;
}
return -EINVAL;
}
EXPORT_SYMBOL(utf8_casefold);
int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
unsigned char *dest, size_t dlen)
{
const struct utf8data *data = utf8nfdi(um->version);
struct utf8cursor cur;
ssize_t nlen = 0;
if (utf8ncursor(&cur, data, str->name, str->len) < 0)
return -EINVAL;
for (nlen = 0; nlen < dlen; nlen++) {
int c = utf8byte(&cur);
dest[nlen] = c;
if (!c)
return nlen;
if (c == -1)
break;
}
return -EINVAL;
}
EXPORT_SYMBOL(utf8_normalize);
static int utf8_parse_version(const char *version, unsigned int *maj,
unsigned int *min, unsigned int *rev)
{
substring_t args[3];
char version_string[12];
const struct match_token token[] = {
{1, "%d.%d.%d"},
{0, NULL}
};
strncpy(version_string, version, sizeof(version_string));
if (match_token(version_string, token, args) != 1)
return -EINVAL;
if (match_int(&args[0], maj) || match_int(&args[1], min) ||
match_int(&args[2], rev))
return -EINVAL;
return 0;
}
struct unicode_map *utf8_load(const char *version)
{
struct unicode_map *um = NULL;
int unicode_version;
if (version) {
unsigned int maj, min, rev;
if (utf8_parse_version(version, &maj, &min, &rev) < 0)
return ERR_PTR(-EINVAL);
if (!utf8version_is_supported(maj, min, rev))
return ERR_PTR(-EINVAL);
unicode_version = UNICODE_AGE(maj, min, rev);
} else {
unicode_version = utf8version_latest();
printk(KERN_WARNING"UTF-8 version not specified. "
"Assuming latest supported version (%d.%d.%d).",
(unicode_version >> 16) & 0xff,
(unicode_version >> 8) & 0xff,
(unicode_version & 0xff));
}
um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL);
if (!um)
return ERR_PTR(-ENOMEM);
um->charset = "UTF-8";
um->version = unicode_version;
return um;
}
EXPORT_SYMBOL(utf8_load);
void utf8_unload(struct unicode_map *um)
{
kfree(um);
}
EXPORT_SYMBOL(utf8_unload);
MODULE_LICENSE("GPL v2");

801
fs/unicode/utf8-norm.c Normal file
View File

@@ -0,0 +1,801 @@
/*
* Copyright (c) 2014 SGI.
* All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include "utf8n.h"
struct utf8data {
unsigned int maxage;
unsigned int offset;
};
#define __INCLUDED_FROM_UTF8NORM_C__
#include "utf8data.h"
#undef __INCLUDED_FROM_UTF8NORM_C__
int utf8version_is_supported(u8 maj, u8 min, u8 rev)
{
int i = ARRAY_SIZE(utf8agetab) - 1;
unsigned int sb_utf8version = UNICODE_AGE(maj, min, rev);
while (i >= 0 && utf8agetab[i] != 0) {
if (sb_utf8version == utf8agetab[i])
return 1;
i--;
}
return 0;
}
EXPORT_SYMBOL(utf8version_is_supported);
int utf8version_latest(void)
{
return utf8vers;
}
EXPORT_SYMBOL(utf8version_latest);
/*
* UTF-8 valid ranges.
*
* The UTF-8 encoding spreads the bits of a 32bit word over several
* bytes. This table gives the ranges that can be held and how they'd
* be represented.
*
* 0x00000000 0x0000007F: 0xxxxxxx
* 0x00000000 0x000007FF: 110xxxxx 10xxxxxx
* 0x00000000 0x0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
* 0x00000000 0x001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
* 0x00000000 0x03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
* 0x00000000 0x7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*
* There is an additional requirement on UTF-8, in that only the
* shortest representation of a 32bit value is to be used. A decoder
* must not decode sequences that do not satisfy this requirement.
* Thus the allowed ranges have a lower bound.
*
* 0x00000000 0x0000007F: 0xxxxxxx
* 0x00000080 0x000007FF: 110xxxxx 10xxxxxx
* 0x00000800 0x0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
* 0x00010000 0x001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
* 0x00200000 0x03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
* 0x04000000 0x7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*
* Actual unicode characters are limited to the range 0x0 - 0x10FFFF,
* 17 planes of 65536 values. This limits the sequences actually seen
* even more, to just the following.
*
* 0 - 0x7F: 0 - 0x7F
* 0x80 - 0x7FF: 0xC2 0x80 - 0xDF 0xBF
* 0x800 - 0xFFFF: 0xE0 0xA0 0x80 - 0xEF 0xBF 0xBF
* 0x10000 - 0x10FFFF: 0xF0 0x90 0x80 0x80 - 0xF4 0x8F 0xBF 0xBF
*
* Within those ranges the surrogates 0xD800 - 0xDFFF are not allowed.
*
* Note that the longest sequence seen with valid usage is 4 bytes,
* the same a single UTF-32 character. This makes the UTF-8
* representation of Unicode strictly smaller than UTF-32.
*
* The shortest sequence requirement was introduced by:
* Corrigendum #1: UTF-8 Shortest Form
* It can be found here:
* http://www.unicode.org/versions/corrigendum1.html
*
*/
/*
* Return the number of bytes used by the current UTF-8 sequence.
* Assumes the input points to the first byte of a valid UTF-8
* sequence.
*/
static inline int utf8clen(const char *s)
{
unsigned char c = *s;
return 1 + (c >= 0xC0) + (c >= 0xE0) + (c >= 0xF0);
}
/*
* Decode a 3-byte UTF-8 sequence.
*/
static unsigned int
utf8decode3(const char *str)
{
unsigned int uc;
uc = *str++ & 0x0F;
uc <<= 6;
uc |= *str++ & 0x3F;
uc <<= 6;
uc |= *str++ & 0x3F;
return uc;
}
/*
* Encode a 3-byte UTF-8 sequence.
*/
static int
utf8encode3(char *str, unsigned int val)
{
str[2] = (val & 0x3F) | 0x80;
val >>= 6;
str[1] = (val & 0x3F) | 0x80;
val >>= 6;
str[0] = val | 0xE0;
return 3;
}
/*
* utf8trie_t
*
* A compact binary tree, used to decode UTF-8 characters.
*
* Internal nodes are one byte for the node itself, and up to three
* bytes for an offset into the tree. The first byte contains the
* following information:
* NEXTBYTE - flag - advance to next byte if set
* BITNUM - 3 bit field - the bit number to tested
* OFFLEN - 2 bit field - number of bytes in the offset
* if offlen == 0 (non-branching node)
* RIGHTPATH - 1 bit field - set if the following node is for the
* right-hand path (tested bit is set)
* TRIENODE - 1 bit field - set if the following node is an internal
* node, otherwise it is a leaf node
* if offlen != 0 (branching node)
* LEFTNODE - 1 bit field - set if the left-hand node is internal
* RIGHTNODE - 1 bit field - set if the right-hand node is internal
*
* Due to the way utf8 works, there cannot be branching nodes with
* NEXTBYTE set, and moreover those nodes always have a righthand
* descendant.
*/
typedef const unsigned char utf8trie_t;
#define BITNUM 0x07
#define NEXTBYTE 0x08
#define OFFLEN 0x30
#define OFFLEN_SHIFT 4
#define RIGHTPATH 0x40
#define TRIENODE 0x80
#define RIGHTNODE 0x40
#define LEFTNODE 0x80
/*
* utf8leaf_t
*
* The leaves of the trie are embedded in the trie, and so the same
* underlying datatype: unsigned char.
*
* leaf[0]: The unicode version, stored as a generation number that is
* an index into utf8agetab[]. With this we can filter code
* points based on the unicode version in which they were
* defined. The CCC of a non-defined code point is 0.
* leaf[1]: Canonical Combining Class. During normalization, we need
* to do a stable sort into ascending order of all characters
* with a non-zero CCC that occur between two characters with
* a CCC of 0, or at the begin or end of a string.
* The unicode standard guarantees that all CCC values are
* between 0 and 254 inclusive, which leaves 255 available as
* a special value.
* Code points with CCC 0 are known as stoppers.
* leaf[2]: Decomposition. If leaf[1] == 255, then leaf[2] is the
* start of a NUL-terminated string that is the decomposition
* of the character.
* The CCC of a decomposable character is the same as the CCC
* of the first character of its decomposition.
* Some characters decompose as the empty string: these are
* characters with the Default_Ignorable_Code_Point property.
* These do affect normalization, as they all have CCC 0.
*
* The decompositions in the trie have been fully expanded, with the
* exception of Hangul syllables, which are decomposed algorithmically.
*
* Casefolding, if applicable, is also done using decompositions.
*
* The trie is constructed in such a way that leaves exist for all
* UTF-8 sequences that match the criteria from the "UTF-8 valid
* ranges" comment above, and only for those sequences. Therefore a
* lookup in the trie can be used to validate the UTF-8 input.
*/
typedef const unsigned char utf8leaf_t;
#define LEAF_GEN(LEAF) ((LEAF)[0])
#define LEAF_CCC(LEAF) ((LEAF)[1])
#define LEAF_STR(LEAF) ((const char *)((LEAF) + 2))
#define MINCCC (0)
#define MAXCCC (254)
#define STOPPER (0)
#define DECOMPOSE (255)
/* Marker for hangul syllable decomposition. */
#define HANGUL ((char)(255))
/* Size of the synthesized leaf used for Hangul syllable decomposition. */
#define UTF8HANGULLEAF (12)
/*
* Hangul decomposition (algorithm from Section 3.12 of Unicode 6.3.0)
*
* AC00;<Hangul Syllable, First>;Lo;0;L;;;;;N;;;;;
* D7A3;<Hangul Syllable, Last>;Lo;0;L;;;;;N;;;;;
*
* SBase = 0xAC00
* LBase = 0x1100
* VBase = 0x1161
* TBase = 0x11A7
* LCount = 19
* VCount = 21
* TCount = 28
* NCount = 588 (VCount * TCount)
* SCount = 11172 (LCount * NCount)
*
* Decomposition:
* SIndex = s - SBase
*
* LV (Canonical/Full)
* LIndex = SIndex / NCount
* VIndex = (Sindex % NCount) / TCount
* LPart = LBase + LIndex
* VPart = VBase + VIndex
*
* LVT (Canonical)
* LVIndex = (SIndex / TCount) * TCount
* TIndex = (Sindex % TCount)
* LVPart = SBase + LVIndex
* TPart = TBase + TIndex
*
* LVT (Full)
* LIndex = SIndex / NCount
* VIndex = (Sindex % NCount) / TCount
* TIndex = (Sindex % TCount)
* LPart = LBase + LIndex
* VPart = VBase + VIndex
* if (TIndex == 0) {
* d = <LPart, VPart>
* } else {
* TPart = TBase + TIndex
* d = <LPart, TPart, VPart>
* }
*/
/* Constants */
#define SB (0xAC00)
#define LB (0x1100)
#define VB (0x1161)
#define TB (0x11A7)
#define LC (19)
#define VC (21)
#define TC (28)
#define NC (VC * TC)
#define SC (LC * NC)
/* Algorithmic decomposition of hangul syllable. */
static utf8leaf_t *
utf8hangul(const char *str, unsigned char *hangul)
{
unsigned int si;
unsigned int li;
unsigned int vi;
unsigned int ti;
unsigned char *h;
/* Calculate the SI, LI, VI, and TI values. */
si = utf8decode3(str) - SB;
li = si / NC;
vi = (si % NC) / TC;
ti = si % TC;
/* Fill in base of leaf. */
h = hangul;
LEAF_GEN(h) = 2;
LEAF_CCC(h) = DECOMPOSE;
h += 2;
/* Add LPart, a 3-byte UTF-8 sequence. */
h += utf8encode3((char *)h, li + LB);
/* Add VPart, a 3-byte UTF-8 sequence. */
h += utf8encode3((char *)h, vi + VB);
/* Add TPart if required, also a 3-byte UTF-8 sequence. */
if (ti)
h += utf8encode3((char *)h, ti + TB);
/* Terminate string. */
h[0] = '\0';
return hangul;
}
/*
* Use trie to scan s, touching at most len bytes.
* Returns the leaf if one exists, NULL otherwise.
*
* A non-NULL return guarantees that the UTF-8 sequence starting at s
* is well-formed and corresponds to a known unicode code point. The
* shorthand for this will be "is valid UTF-8 unicode".
*/
static utf8leaf_t *utf8nlookup(const struct utf8data *data,
unsigned char *hangul, const char *s, size_t len)
{
utf8trie_t *trie = NULL;
int offlen;
int offset;
int mask;
int node;
if (!data)
return NULL;
if (len == 0)
return NULL;
trie = utf8data + data->offset;
node = 1;
while (node) {
offlen = (*trie & OFFLEN) >> OFFLEN_SHIFT;
if (*trie & NEXTBYTE) {
if (--len == 0)
return NULL;
s++;
}
mask = 1 << (*trie & BITNUM);
if (*s & mask) {
/* Right leg */
if (offlen) {
/* Right node at offset of trie */
node = (*trie & RIGHTNODE);
offset = trie[offlen];
while (--offlen) {
offset <<= 8;
offset |= trie[offlen];
}
trie += offset;
} else if (*trie & RIGHTPATH) {
/* Right node after this node */
node = (*trie & TRIENODE);
trie++;
} else {
/* No right node. */
return NULL;
}
} else {
/* Left leg */
if (offlen) {
/* Left node after this node. */
node = (*trie & LEFTNODE);
trie += offlen + 1;
} else if (*trie & RIGHTPATH) {
/* No left node. */
return NULL;
} else {
/* Left node after this node */
node = (*trie & TRIENODE);
trie++;
}
}
}
/*
* Hangul decomposition is done algorithmically. These are the
* codepoints >= 0xAC00 and <= 0xD7A3. Their UTF-8 encoding is
* always 3 bytes long, so s has been advanced twice, and the
* start of the sequence is at s-2.
*/
if (LEAF_CCC(trie) == DECOMPOSE && LEAF_STR(trie)[0] == HANGUL)
trie = utf8hangul(s - 2, hangul);
return trie;
}
/*
* Use trie to scan s.
* Returns the leaf if one exists, NULL otherwise.
*
* Forwards to utf8nlookup().
*/
static utf8leaf_t *utf8lookup(const struct utf8data *data,
unsigned char *hangul, const char *s)
{
return utf8nlookup(data, hangul, s, (size_t)-1);
}
/*
* Maximum age of any character in s.
* Return -1 if s is not valid UTF-8 unicode.
* Return 0 if only non-assigned code points are used.
*/
int utf8agemax(const struct utf8data *data, const char *s)
{
utf8leaf_t *leaf;
int age = 0;
int leaf_age;
unsigned char hangul[UTF8HANGULLEAF];
if (!data)
return -1;
while (*s) {
leaf = utf8lookup(data, hangul, s);
if (!leaf)
return -1;
leaf_age = utf8agetab[LEAF_GEN(leaf)];
if (leaf_age <= data->maxage && leaf_age > age)
age = leaf_age;
s += utf8clen(s);
}
return age;
}
EXPORT_SYMBOL(utf8agemax);
/*
* Minimum age of any character in s.
* Return -1 if s is not valid UTF-8 unicode.
* Return 0 if non-assigned code points are used.
*/
int utf8agemin(const struct utf8data *data, const char *s)
{
utf8leaf_t *leaf;
int age;
int leaf_age;
unsigned char hangul[UTF8HANGULLEAF];
if (!data)
return -1;
age = data->maxage;
while (*s) {
leaf = utf8lookup(data, hangul, s);
if (!leaf)
return -1;
leaf_age = utf8agetab[LEAF_GEN(leaf)];
if (leaf_age <= data->maxage && leaf_age < age)
age = leaf_age;
s += utf8clen(s);
}
return age;
}
EXPORT_SYMBOL(utf8agemin);
/*
* Maximum age of any character in s, touch at most len bytes.
* Return -1 if s is not valid UTF-8 unicode.
*/
int utf8nagemax(const struct utf8data *data, const char *s, size_t len)
{
utf8leaf_t *leaf;
int age = 0;
int leaf_age;
unsigned char hangul[UTF8HANGULLEAF];
if (!data)
return -1;
while (len && *s) {
leaf = utf8nlookup(data, hangul, s, len);
if (!leaf)
return -1;
leaf_age = utf8agetab[LEAF_GEN(leaf)];
if (leaf_age <= data->maxage && leaf_age > age)
age = leaf_age;
len -= utf8clen(s);
s += utf8clen(s);
}
return age;
}
EXPORT_SYMBOL(utf8nagemax);
/*
* Maximum age of any character in s, touch at most len bytes.
* Return -1 if s is not valid UTF-8 unicode.
*/
int utf8nagemin(const struct utf8data *data, const char *s, size_t len)
{
utf8leaf_t *leaf;
int leaf_age;
int age;
unsigned char hangul[UTF8HANGULLEAF];
if (!data)
return -1;
age = data->maxage;
while (len && *s) {
leaf = utf8nlookup(data, hangul, s, len);
if (!leaf)
return -1;
leaf_age = utf8agetab[LEAF_GEN(leaf)];
if (leaf_age <= data->maxage && leaf_age < age)
age = leaf_age;
len -= utf8clen(s);
s += utf8clen(s);
}
return age;
}
EXPORT_SYMBOL(utf8nagemin);
/*
* Length of the normalization of s.
* Return -1 if s is not valid UTF-8 unicode.
*
* A string of Default_Ignorable_Code_Point has length 0.
*/
ssize_t utf8len(const struct utf8data *data, const char *s)
{
utf8leaf_t *leaf;
size_t ret = 0;
unsigned char hangul[UTF8HANGULLEAF];
if (!data)
return -1;
while (*s) {
leaf = utf8lookup(data, hangul, s);
if (!leaf)
return -1;
if (utf8agetab[LEAF_GEN(leaf)] > data->maxage)
ret += utf8clen(s);
else if (LEAF_CCC(leaf) == DECOMPOSE)
ret += strlen(LEAF_STR(leaf));
else
ret += utf8clen(s);
s += utf8clen(s);
}
return ret;
}
EXPORT_SYMBOL(utf8len);
/*
* Length of the normalization of s, touch at most len bytes.
* Return -1 if s is not valid UTF-8 unicode.
*/
ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len)
{
utf8leaf_t *leaf;
size_t ret = 0;
unsigned char hangul[UTF8HANGULLEAF];
if (!data)
return -1;
while (len && *s) {
leaf = utf8nlookup(data, hangul, s, len);
if (!leaf)
return -1;
if (utf8agetab[LEAF_GEN(leaf)] > data->maxage)
ret += utf8clen(s);
else if (LEAF_CCC(leaf) == DECOMPOSE)
ret += strlen(LEAF_STR(leaf));
else
ret += utf8clen(s);
len -= utf8clen(s);
s += utf8clen(s);
}
return ret;
}
EXPORT_SYMBOL(utf8nlen);
/*
* Set up an utf8cursor for use by utf8byte().
*
* u8c : pointer to cursor.
* data : const struct utf8data to use for normalization.
* s : string.
* len : length of s.
*
* Returns -1 on error, 0 on success.
*/
int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data,
const char *s, size_t len)
{
if (!data)
return -1;
if (!s)
return -1;
u8c->data = data;
u8c->s = s;
u8c->p = NULL;
u8c->ss = NULL;
u8c->sp = NULL;
u8c->len = len;
u8c->slen = 0;
u8c->ccc = STOPPER;
u8c->nccc = STOPPER;
/* Check we didn't clobber the maximum length. */
if (u8c->len != len)
return -1;
/* The first byte of s may not be an utf8 continuation. */
if (len > 0 && (*s & 0xC0) == 0x80)
return -1;
return 0;
}
EXPORT_SYMBOL(utf8ncursor);
/*
* Set up an utf8cursor for use by utf8byte().
*
* u8c : pointer to cursor.
* data : const struct utf8data to use for normalization.
* s : NUL-terminated string.
*
* Returns -1 on error, 0 on success.
*/
int utf8cursor(struct utf8cursor *u8c, const struct utf8data *data,
const char *s)
{
return utf8ncursor(u8c, data, s, (unsigned int)-1);
}
EXPORT_SYMBOL(utf8cursor);
/*
* Get one byte from the normalized form of the string described by u8c.
*
* Returns the byte cast to an unsigned char on succes, and -1 on failure.
*
* The cursor keeps track of the location in the string in u8c->s.
* When a character is decomposed, the current location is stored in
* u8c->p, and u8c->s is set to the start of the decomposition. Note
* that bytes from a decomposition do not count against u8c->len.
*
* Characters are emitted if they match the current CCC in u8c->ccc.
* Hitting end-of-string while u8c->ccc == STOPPER means we're done,
* and the function returns 0 in that case.
*
* Sorting by CCC is done by repeatedly scanning the string. The
* values of u8c->s and u8c->p are stored in u8c->ss and u8c->sp at
* the start of the scan. The first pass finds the lowest CCC to be
* emitted and stores it in u8c->nccc, the second pass emits the
* characters with this CCC and finds the next lowest CCC. This limits
* the number of passes to 1 + the number of different CCCs in the
* sequence being scanned.
*
* Therefore:
* u8c->p != NULL -> a decomposition is being scanned.
* u8c->ss != NULL -> this is a repeating scan.
* u8c->ccc == -1 -> this is the first scan of a repeating scan.
*/
int utf8byte(struct utf8cursor *u8c)
{
utf8leaf_t *leaf;
int ccc;
for (;;) {
/* Check for the end of a decomposed character. */
if (u8c->p && *u8c->s == '\0') {
u8c->s = u8c->p;
u8c->p = NULL;
}
/* Check for end-of-string. */
if (!u8c->p && (u8c->len == 0 || *u8c->s == '\0')) {
/* There is no next byte. */
if (u8c->ccc == STOPPER)
return 0;
/* End-of-string during a scan counts as a stopper. */
ccc = STOPPER;
goto ccc_mismatch;
} else if ((*u8c->s & 0xC0) == 0x80) {
/* This is a continuation of the current character. */
if (!u8c->p)
u8c->len--;
return (unsigned char)*u8c->s++;
}
/* Look up the data for the current character. */
if (u8c->p) {
leaf = utf8lookup(u8c->data, u8c->hangul, u8c->s);
} else {
leaf = utf8nlookup(u8c->data, u8c->hangul,
u8c->s, u8c->len);
}
/* No leaf found implies that the input is a binary blob. */
if (!leaf)
return -1;
ccc = LEAF_CCC(leaf);
/* Characters that are too new have CCC 0. */
if (utf8agetab[LEAF_GEN(leaf)] > u8c->data->maxage) {
ccc = STOPPER;
} else if (ccc == DECOMPOSE) {
u8c->len -= utf8clen(u8c->s);
u8c->p = u8c->s + utf8clen(u8c->s);
u8c->s = LEAF_STR(leaf);
/* Empty decomposition implies CCC 0. */
if (*u8c->s == '\0') {
if (u8c->ccc == STOPPER)
continue;
ccc = STOPPER;
goto ccc_mismatch;
}
leaf = utf8lookup(u8c->data, u8c->hangul, u8c->s);
if (!leaf)
return -1;
ccc = LEAF_CCC(leaf);
}
/*
* If this is not a stopper, then see if it updates
* the next canonical class to be emitted.
*/
if (ccc != STOPPER && u8c->ccc < ccc && ccc < u8c->nccc)
u8c->nccc = ccc;
/*
* Return the current byte if this is the current
* combining class.
*/
if (ccc == u8c->ccc) {
if (!u8c->p)
u8c->len--;
return (unsigned char)*u8c->s++;
}
/* Current combining class mismatch. */
ccc_mismatch:
if (u8c->nccc == STOPPER) {
/*
* Scan forward for the first canonical class
* to be emitted. Save the position from
* which to restart.
*/
u8c->ccc = MINCCC - 1;
u8c->nccc = ccc;
u8c->sp = u8c->p;
u8c->ss = u8c->s;
u8c->slen = u8c->len;
if (!u8c->p)
u8c->len -= utf8clen(u8c->s);
u8c->s += utf8clen(u8c->s);
} else if (ccc != STOPPER) {
/* Not a stopper, and not the ccc we're emitting. */
if (!u8c->p)
u8c->len -= utf8clen(u8c->s);
u8c->s += utf8clen(u8c->s);
} else if (u8c->nccc != MAXCCC + 1) {
/* At a stopper, restart for next ccc. */
u8c->ccc = u8c->nccc;
u8c->nccc = MAXCCC + 1;
u8c->s = u8c->ss;
u8c->p = u8c->sp;
u8c->len = u8c->slen;
} else {
/* All done, proceed from here. */
u8c->ccc = STOPPER;
u8c->nccc = STOPPER;
u8c->sp = NULL;
u8c->ss = NULL;
u8c->slen = 0;
}
}
}
EXPORT_SYMBOL(utf8byte);
const struct utf8data *utf8nfdi(unsigned int maxage)
{
int i = ARRAY_SIZE(utf8nfdidata) - 1;
while (maxage < utf8nfdidata[i].maxage)
i--;
if (maxage > utf8nfdidata[i].maxage)
return NULL;
return &utf8nfdidata[i];
}
EXPORT_SYMBOL(utf8nfdi);
const struct utf8data *utf8nfdicf(unsigned int maxage)
{
int i = ARRAY_SIZE(utf8nfdicfdata) - 1;
while (maxage < utf8nfdicfdata[i].maxage)
i--;
if (maxage > utf8nfdicfdata[i].maxage)
return NULL;
return &utf8nfdicfdata[i];
}
EXPORT_SYMBOL(utf8nfdicf);

320
fs/unicode/utf8-selftest.c Normal file
View File

@@ -0,0 +1,320 @@
/*
* Kernel module for testing utf-8 support.
*
* Copyright 2017 Collabora Ltd.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/printk.h>
#include <linux/unicode.h>
#include <linux/dcache.h>
#include "utf8n.h"
unsigned int failed_tests;
unsigned int total_tests;
/* Tests will be based on this version. */
#define latest_maj 12
#define latest_min 1
#define latest_rev 0
#define _test(cond, func, line, fmt, ...) do { \
total_tests++; \
if (!cond) { \
failed_tests++; \
pr_err("test %s:%d Failed: %s%s", \
func, line, #cond, (fmt?":":".")); \
if (fmt) \
pr_err(fmt, ##__VA_ARGS__); \
} \
} while (0)
#define test_f(cond, fmt, ...) _test(cond, __func__, __LINE__, fmt, ##__VA_ARGS__)
#define test(cond) _test(cond, __func__, __LINE__, "")
const static struct {
/* UTF-8 strings in this vector _must_ be NULL-terminated. */
unsigned char str[10];
unsigned char dec[10];
} nfdi_test_data[] = {
/* Trivial sequence */
{
/* "ABba" decomposes to itself */
.str = "aBba",
.dec = "aBba",
},
/* Simple equivalent sequences */
{
/* 'VULGAR FRACTION ONE QUARTER' cannot decompose to
'NUMBER 1' + 'FRACTION SLASH' + 'NUMBER 4' on
canonical decomposition */
.str = {0xc2, 0xbc, 0x00},
.dec = {0xc2, 0xbc, 0x00},
},
{
/* 'LATIN SMALL LETTER A WITH DIAERESIS' decomposes to
'LETTER A' + 'COMBINING DIAERESIS' */
.str = {0xc3, 0xa4, 0x00},
.dec = {0x61, 0xcc, 0x88, 0x00},
},
{
/* 'LATIN SMALL LETTER LJ' can't decompose to
'LETTER L' + 'LETTER J' on canonical decomposition */
.str = {0xC7, 0x89, 0x00},
.dec = {0xC7, 0x89, 0x00},
},
{
/* GREEK ANO TELEIA decomposes to MIDDLE DOT */
.str = {0xCE, 0x87, 0x00},
.dec = {0xC2, 0xB7, 0x00}
},
/* Canonical ordering */
{
/* A + 'COMBINING ACUTE ACCENT' + 'COMBINING OGONEK' decomposes
to A + 'COMBINING OGONEK' + 'COMBINING ACUTE ACCENT' */
.str = {0x41, 0xcc, 0x81, 0xcc, 0xa8, 0x0},
.dec = {0x41, 0xcc, 0xa8, 0xcc, 0x81, 0x0},
},
{
/* 'LATIN SMALL LETTER A WITH DIAERESIS' + 'COMBINING OGONEK'
decomposes to
'LETTER A' + 'COMBINING OGONEK' + 'COMBINING DIAERESIS' */
.str = {0xc3, 0xa4, 0xCC, 0xA8, 0x00},
.dec = {0x61, 0xCC, 0xA8, 0xcc, 0x88, 0x00},
},
};
const static struct {
/* UTF-8 strings in this vector _must_ be NULL-terminated. */
unsigned char str[30];
unsigned char ncf[30];
} nfdicf_test_data[] = {
/* Trivial sequences */
{
/* "ABba" folds to lowercase */
.str = {0x41, 0x42, 0x62, 0x61, 0x00},
.ncf = {0x61, 0x62, 0x62, 0x61, 0x00},
},
{
/* All ASCII folds to lower-case */
.str = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0.1",
.ncf = "abcdefghijklmnopqrstuvwxyz0.1",
},
{
/* LATIN SMALL LETTER SHARP S folds to
LATIN SMALL LETTER S + LATIN SMALL LETTER S */
.str = {0xc3, 0x9f, 0x00},
.ncf = {0x73, 0x73, 0x00},
},
{
/* LATIN CAPITAL LETTER A WITH RING ABOVE folds to
LATIN SMALL LETTER A + COMBINING RING ABOVE */
.str = {0xC3, 0x85, 0x00},
.ncf = {0x61, 0xcc, 0x8a, 0x00},
},
/* Introduced by UTF-8.0.0. */
/* Cherokee letters are interesting test-cases because they fold
to upper-case. Before 8.0.0, Cherokee lowercase were
undefined, thus, the folding from LC is not stable between
7.0.0 -> 8.0.0, but it is from UC. */
{
/* CHEROKEE SMALL LETTER A folds to CHEROKEE LETTER A */
.str = {0xea, 0xad, 0xb0, 0x00},
.ncf = {0xe1, 0x8e, 0xa0, 0x00},
},
{
/* CHEROKEE SMALL LETTER YE folds to CHEROKEE LETTER YE */
.str = {0xe1, 0x8f, 0xb8, 0x00},
.ncf = {0xe1, 0x8f, 0xb0, 0x00},
},
{
/* OLD HUNGARIAN CAPITAL LETTER AMB folds to
OLD HUNGARIAN SMALL LETTER AMB */
.str = {0xf0, 0x90, 0xb2, 0x83, 0x00},
.ncf = {0xf0, 0x90, 0xb3, 0x83, 0x00},
},
/* Introduced by UTF-9.0.0. */
{
/* OSAGE CAPITAL LETTER CHA folds to
OSAGE SMALL LETTER CHA */
.str = {0xf0, 0x90, 0x92, 0xb5, 0x00},
.ncf = {0xf0, 0x90, 0x93, 0x9d, 0x00},
},
{
/* LATIN CAPITAL LETTER SMALL CAPITAL I folds to
LATIN LETTER SMALL CAPITAL I */
.str = {0xea, 0x9e, 0xae, 0x00},
.ncf = {0xc9, 0xaa, 0x00},
},
/* Introduced by UTF-11.0.0. */
{
/* GEORGIAN SMALL LETTER AN folds to GEORGIAN MTAVRULI
CAPITAL LETTER AN */
.str = {0xe1, 0xb2, 0x90, 0x00},
.ncf = {0xe1, 0x83, 0x90, 0x00},
}
};
static void check_utf8_nfdi(void)
{
int i;
struct utf8cursor u8c;
const struct utf8data *data;
data = utf8nfdi(UNICODE_AGE(latest_maj, latest_min, latest_rev));
if (!data) {
pr_err("%s: Unable to load utf8-%d.%d.%d. Skipping.\n",
__func__, latest_maj, latest_min, latest_rev);
return;
}
for (i = 0; i < ARRAY_SIZE(nfdi_test_data); i++) {
int len = strlen(nfdi_test_data[i].str);
int nlen = strlen(nfdi_test_data[i].dec);
int j = 0;
unsigned char c;
test((utf8len(data, nfdi_test_data[i].str) == nlen));
test((utf8nlen(data, nfdi_test_data[i].str, len) == nlen));
if (utf8cursor(&u8c, data, nfdi_test_data[i].str) < 0)
pr_err("can't create cursor\n");
while ((c = utf8byte(&u8c)) > 0) {
test_f((c == nfdi_test_data[i].dec[j]),
"Unexpected byte 0x%x should be 0x%x\n",
c, nfdi_test_data[i].dec[j]);
j++;
}
test((j == nlen));
}
}
static void check_utf8_nfdicf(void)
{
int i;
struct utf8cursor u8c;
const struct utf8data *data;
data = utf8nfdicf(UNICODE_AGE(latest_maj, latest_min, latest_rev));
if (!data) {
pr_err("%s: Unable to load utf8-%d.%d.%d. Skipping.\n",
__func__, latest_maj, latest_min, latest_rev);
return;
}
for (i = 0; i < ARRAY_SIZE(nfdicf_test_data); i++) {
int len = strlen(nfdicf_test_data[i].str);
int nlen = strlen(nfdicf_test_data[i].ncf);
int j = 0;
unsigned char c;
test((utf8len(data, nfdicf_test_data[i].str) == nlen));
test((utf8nlen(data, nfdicf_test_data[i].str, len) == nlen));
if (utf8cursor(&u8c, data, nfdicf_test_data[i].str) < 0)
pr_err("can't create cursor\n");
while ((c = utf8byte(&u8c)) > 0) {
test_f((c == nfdicf_test_data[i].ncf[j]),
"Unexpected byte 0x%x should be 0x%x\n",
c, nfdicf_test_data[i].ncf[j]);
j++;
}
test((j == nlen));
}
}
static void check_utf8_comparisons(void)
{
int i;
struct unicode_map *table = utf8_load("12.1.0");
if (IS_ERR(table)) {
pr_err("%s: Unable to load utf8 %d.%d.%d. Skipping.\n",
__func__, latest_maj, latest_min, latest_rev);
return;
}
for (i = 0; i < ARRAY_SIZE(nfdi_test_data); i++) {
const struct qstr s1 = {.name = nfdi_test_data[i].str,
.len = sizeof(nfdi_test_data[i].str)};
const struct qstr s2 = {.name = nfdi_test_data[i].dec,
.len = sizeof(nfdi_test_data[i].dec)};
test_f(!utf8_strncmp(table, &s1, &s2),
"%s %s comparison mismatch\n", s1.name, s2.name);
}
for (i = 0; i < ARRAY_SIZE(nfdicf_test_data); i++) {
const struct qstr s1 = {.name = nfdicf_test_data[i].str,
.len = sizeof(nfdicf_test_data[i].str)};
const struct qstr s2 = {.name = nfdicf_test_data[i].ncf,
.len = sizeof(nfdicf_test_data[i].ncf)};
test_f(!utf8_strncasecmp(table, &s1, &s2),
"%s %s comparison mismatch\n", s1.name, s2.name);
}
utf8_unload(table);
}
static void check_supported_versions(void)
{
/* Unicode 7.0.0 should be supported. */
test(utf8version_is_supported(7, 0, 0));
/* Unicode 9.0.0 should be supported. */
test(utf8version_is_supported(9, 0, 0));
/* Unicode 1x.0.0 (the latest version) should be supported. */
test(utf8version_is_supported(latest_maj, latest_min, latest_rev));
/* Next versions don't exist. */
test(!utf8version_is_supported(13, 0, 0));
test(!utf8version_is_supported(0, 0, 0));
test(!utf8version_is_supported(-1, -1, -1));
}
static int __init init_test_ucd(void)
{
failed_tests = 0;
total_tests = 0;
check_supported_versions();
check_utf8_nfdi();
check_utf8_nfdicf();
check_utf8_comparisons();
if (!failed_tests)
pr_info("All %u tests passed\n", total_tests);
else
pr_err("%u out of %u tests failed\n", failed_tests,
total_tests);
return 0;
}
static void __exit exit_test_ucd(void)
{
}
module_init(init_test_ucd);
module_exit(exit_test_ucd);
MODULE_AUTHOR("Gabriel Krisman Bertazi <krisman@collabora.co.uk>");
MODULE_LICENSE("GPL");

File diff suppressed because it is too large Load Diff

117
fs/unicode/utf8n.h Normal file
View File

@@ -0,0 +1,117 @@
/*
* Copyright (c) 2014 SGI.
* All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef UTF8NORM_H
#define UTF8NORM_H
#include <linux/types.h>
#include <linux/export.h>
#include <linux/string.h>
#include <linux/module.h>
/* Encoding a unicode version number as a single unsigned int. */
#define UNICODE_MAJ_SHIFT (16)
#define UNICODE_MIN_SHIFT (8)
#define UNICODE_AGE(MAJ, MIN, REV) \
(((unsigned int)(MAJ) << UNICODE_MAJ_SHIFT) | \
((unsigned int)(MIN) << UNICODE_MIN_SHIFT) | \
((unsigned int)(REV)))
/* Highest unicode version supported by the data tables. */
extern int utf8version_is_supported(u8 maj, u8 min, u8 rev);
extern int utf8version_latest(void);
/*
* Look for the correct const struct utf8data for a unicode version.
* Returns NULL if the version requested is too new.
*
* Two normalization forms are supported: nfdi and nfdicf.
*
* nfdi:
* - Apply unicode normalization form NFD.
* - Remove any Default_Ignorable_Code_Point.
*
* nfdicf:
* - Apply unicode normalization form NFD.
* - Remove any Default_Ignorable_Code_Point.
* - Apply a full casefold (C + F).
*/
extern const struct utf8data *utf8nfdi(unsigned int maxage);
extern const struct utf8data *utf8nfdicf(unsigned int maxage);
/*
* Determine the maximum age of any unicode character in the string.
* Returns 0 if only unassigned code points are present.
* Returns -1 if the input is not valid UTF-8.
*/
extern int utf8agemax(const struct utf8data *data, const char *s);
extern int utf8nagemax(const struct utf8data *data, const char *s, size_t len);
/*
* Determine the minimum age of any unicode character in the string.
* Returns 0 if any unassigned code points are present.
* Returns -1 if the input is not valid UTF-8.
*/
extern int utf8agemin(const struct utf8data *data, const char *s);
extern int utf8nagemin(const struct utf8data *data, const char *s, size_t len);
/*
* Determine the length of the normalized from of the string,
* excluding any terminating NULL byte.
* Returns 0 if only ignorable code points are present.
* Returns -1 if the input is not valid UTF-8.
*/
extern ssize_t utf8len(const struct utf8data *data, const char *s);
extern ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len);
/* Needed in struct utf8cursor below. */
#define UTF8HANGULLEAF (12)
/*
* Cursor structure used by the normalizer.
*/
struct utf8cursor {
const struct utf8data *data;
const char *s;
const char *p;
const char *ss;
const char *sp;
unsigned int len;
unsigned int slen;
short int ccc;
short int nccc;
unsigned char hangul[UTF8HANGULLEAF];
};
/*
* Initialize a utf8cursor to normalize a string.
* Returns 0 on success.
* Returns -1 on failure.
*/
extern int utf8cursor(struct utf8cursor *u8c, const struct utf8data *data,
const char *s);
extern int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data,
const char *s, size_t len);
/*
* Get the next byte in the normalization.
* Returns a value > 0 && < 256 on success.
* Returns 0 when the end of the normalization is reached.
* Returns -1 if the string being normalized is not valid UTF-8.
*/
extern int utf8byte(struct utf8cursor *u8c);
#endif /* UTF8NORM_H */

55
fs/verity/Kconfig Normal file
View File

@@ -0,0 +1,55 @@
# SPDX-License-Identifier: GPL-2.0
config FS_VERITY
bool "FS Verity (read-only file-based authenticity protection)"
select CRYPTO
# SHA-256 is selected as it's intended to be the default hash algorithm.
# To avoid bloat, other wanted algorithms must be selected explicitly.
select CRYPTO_SHA256
help
This option enables fs-verity. fs-verity is the dm-verity
mechanism implemented at the file level. On supported
filesystems (currently EXT4 and F2FS), userspace can use an
ioctl to enable verity for a file, which causes the filesystem
to build a Merkle tree for the file. The filesystem will then
transparently verify any data read from the file against the
Merkle tree. The file is also made read-only.
This serves as an integrity check, but the availability of the
Merkle tree root hash also allows efficiently supporting
various use cases where normally the whole file would need to
be hashed at once, such as: (a) auditing (logging the file's
hash), or (b) authenticity verification (comparing the hash
against a known good value, e.g. from a digital signature).
fs-verity is especially useful on large files where not all
the contents may actually be needed. Also, fs-verity verifies
data each time it is paged back in, which provides better
protection against malicious disks vs. an ahead-of-time hash.
If unsure, say N.
config FS_VERITY_DEBUG
bool "FS Verity debugging"
depends on FS_VERITY
help
Enable debugging messages related to fs-verity by default.
Say N unless you are an fs-verity developer.
config FS_VERITY_BUILTIN_SIGNATURES
bool "FS Verity builtin signature support"
depends on FS_VERITY
select SYSTEM_DATA_VERIFICATION
help
Support verifying signatures of verity files against the X.509
certificates that have been loaded into the ".fs-verity"
kernel keyring.
This is meant as a relatively simple mechanism that can be
used to provide an authenticity guarantee for verity files, as
an alternative to IMA appraisal. Userspace programs still
need to check that the verity bit is set in order to get an
authenticity guarantee.
If unsure, say N.

10
fs/verity/Makefile Normal file
View File

@@ -0,0 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_FS_VERITY) += enable.o \
hash_algs.o \
init.o \
measure.o \
open.o \
verify.o
obj-$(CONFIG_FS_VERITY_BUILTIN_SIGNATURES) += signature.o

377
fs/verity/enable.c Normal file
View File

@@ -0,0 +1,377 @@
// SPDX-License-Identifier: GPL-2.0
/*
* fs/verity/enable.c: ioctl to enable verity on a file
*
* Copyright 2019 Google LLC
*/
#include "fsverity_private.h"
#include <crypto/hash.h>
#include <linux/mount.h>
#include <linux/pagemap.h>
#include <linux/sched/signal.h>
#include <linux/uaccess.h>
static int build_merkle_tree_level(struct inode *inode, unsigned int level,
u64 num_blocks_to_hash,
const struct merkle_tree_params *params,
u8 *pending_hashes,
struct ahash_request *req)
{
const struct fsverity_operations *vops = inode->i_sb->s_vop;
unsigned int pending_size = 0;
u64 dst_block_num;
u64 i;
int err;
if (WARN_ON(params->block_size != PAGE_SIZE)) /* checked earlier too */
return -EINVAL;
if (level < params->num_levels) {
dst_block_num = params->level_start[level];
} else {
if (WARN_ON(num_blocks_to_hash != 1))
return -EINVAL;
dst_block_num = 0; /* unused */
}
for (i = 0; i < num_blocks_to_hash; i++) {
struct page *src_page;
if ((pgoff_t)i % 10000 == 0 || i + 1 == num_blocks_to_hash)
pr_debug("Hashing block %llu of %llu for level %u\n",
i + 1, num_blocks_to_hash, level);
if (level == 0) {
/* Leaf: hashing a data block */
src_page = read_mapping_page(inode->i_mapping, i, NULL);
if (IS_ERR(src_page)) {
err = PTR_ERR(src_page);
fsverity_err(inode,
"Error %d reading data page %llu",
err, i);
return err;
}
} else {
/* Non-leaf: hashing hash block from level below */
src_page = vops->read_merkle_tree_page(inode,
params->level_start[level - 1] + i);
if (IS_ERR(src_page)) {
err = PTR_ERR(src_page);
fsverity_err(inode,
"Error %d reading Merkle tree page %llu",
err, params->level_start[level - 1] + i);
return err;
}
}
err = fsverity_hash_page(params, inode, req, src_page,
&pending_hashes[pending_size]);
put_page(src_page);
if (err)
return err;
pending_size += params->digest_size;
if (level == params->num_levels) /* Root hash? */
return 0;
if (pending_size + params->digest_size > params->block_size ||
i + 1 == num_blocks_to_hash) {
/* Flush the pending hash block */
memset(&pending_hashes[pending_size], 0,
params->block_size - pending_size);
err = vops->write_merkle_tree_block(inode,
pending_hashes,
dst_block_num,
params->log_blocksize);
if (err) {
fsverity_err(inode,
"Error %d writing Merkle tree block %llu",
err, dst_block_num);
return err;
}
dst_block_num++;
pending_size = 0;
}
if (fatal_signal_pending(current))
return -EINTR;
cond_resched();
}
return 0;
}
/*
* Build the Merkle tree for the given inode using the given parameters, and
* return the root hash in @root_hash.
*
* The tree is written to a filesystem-specific location as determined by the
* ->write_merkle_tree_block() method. However, the blocks that comprise the
* tree are the same for all filesystems.
*/
static int build_merkle_tree(struct inode *inode,
const struct merkle_tree_params *params,
u8 *root_hash)
{
u8 *pending_hashes;
struct ahash_request *req;
u64 blocks;
unsigned int level;
int err = -ENOMEM;
if (inode->i_size == 0) {
/* Empty file is a special case; root hash is all 0's */
memset(root_hash, 0, params->digest_size);
return 0;
}
pending_hashes = kmalloc(params->block_size, GFP_KERNEL);
req = ahash_request_alloc(params->hash_alg->tfm, GFP_KERNEL);
if (!pending_hashes || !req)
goto out;
/*
* Build each level of the Merkle tree, starting at the leaf level
* (level 0) and ascending to the root node (level 'num_levels - 1').
* Then at the end (level 'num_levels'), calculate the root hash.
*/
blocks = (inode->i_size + params->block_size - 1) >>
params->log_blocksize;
for (level = 0; level <= params->num_levels; level++) {
err = build_merkle_tree_level(inode, level, blocks, params,
pending_hashes, req);
if (err)
goto out;
blocks = (blocks + params->hashes_per_block - 1) >>
params->log_arity;
}
memcpy(root_hash, pending_hashes, params->digest_size);
err = 0;
out:
kfree(pending_hashes);
ahash_request_free(req);
return err;
}
static int enable_verity(struct file *filp,
const struct fsverity_enable_arg *arg)
{
struct inode *inode = file_inode(filp);
const struct fsverity_operations *vops = inode->i_sb->s_vop;
struct merkle_tree_params params = { };
struct fsverity_descriptor *desc;
size_t desc_size = sizeof(*desc) + arg->sig_size;
struct fsverity_info *vi;
int err;
/* Start initializing the fsverity_descriptor */
desc = kzalloc(desc_size, GFP_KERNEL);
if (!desc)
return -ENOMEM;
desc->version = 1;
desc->hash_algorithm = arg->hash_algorithm;
desc->log_blocksize = ilog2(arg->block_size);
/* Get the salt if the user provided one */
if (arg->salt_size &&
copy_from_user(desc->salt,
(const u8 __user *)(uintptr_t)arg->salt_ptr,
arg->salt_size)) {
err = -EFAULT;
goto out;
}
desc->salt_size = arg->salt_size;
/* Get the signature if the user provided one */
if (arg->sig_size &&
copy_from_user(desc->signature,
(const u8 __user *)(uintptr_t)arg->sig_ptr,
arg->sig_size)) {
err = -EFAULT;
goto out;
}
desc->sig_size = cpu_to_le32(arg->sig_size);
desc->data_size = cpu_to_le64(inode->i_size);
/* Prepare the Merkle tree parameters */
err = fsverity_init_merkle_tree_params(&params, inode,
arg->hash_algorithm,
desc->log_blocksize,
desc->salt, desc->salt_size);
if (err)
goto out;
/*
* Start enabling verity on this file, serialized by the inode lock.
* Fail if verity is already enabled or is already being enabled.
*/
inode_lock(inode);
if (IS_VERITY(inode))
err = -EEXIST;
else
err = vops->begin_enable_verity(filp);
inode_unlock(inode);
if (err)
goto out;
/*
* Build the Merkle tree. Don't hold the inode lock during this, since
* on huge files this may take a very long time and we don't want to
* force unrelated syscalls like chown() to block forever. We don't
* need the inode lock here because deny_write_access() already prevents
* the file from being written to or truncated, and we still serialize
* ->begin_enable_verity() and ->end_enable_verity() using the inode
* lock and only allow one process to be here at a time on a given file.
*/
pr_debug("Building Merkle tree...\n");
BUILD_BUG_ON(sizeof(desc->root_hash) < FS_VERITY_MAX_DIGEST_SIZE);
err = build_merkle_tree(inode, &params, desc->root_hash);
if (err) {
fsverity_err(inode, "Error %d building Merkle tree", err);
goto rollback;
}
pr_debug("Done building Merkle tree. Root hash is %s:%*phN\n",
params.hash_alg->name, params.digest_size, desc->root_hash);
/*
* Create the fsverity_info. Don't bother trying to save work by
* reusing the merkle_tree_params from above. Instead, just create the
* fsverity_info from the fsverity_descriptor as if it were just loaded
* from disk. This is simpler, and it serves as an extra check that the
* metadata we're writing is valid before actually enabling verity.
*/
vi = fsverity_create_info(inode, desc, desc_size);
if (IS_ERR(vi)) {
err = PTR_ERR(vi);
goto rollback;
}
if (arg->sig_size)
pr_debug("Storing a %u-byte PKCS#7 signature alongside the file\n",
arg->sig_size);
/*
* Tell the filesystem to finish enabling verity on the file.
* Serialized with ->begin_enable_verity() by the inode lock.
*/
inode_lock(inode);
err = vops->end_enable_verity(filp, desc, desc_size, params.tree_size);
inode_unlock(inode);
if (err) {
fsverity_err(inode, "%ps() failed with err %d",
vops->end_enable_verity, err);
fsverity_free_info(vi);
} else if (WARN_ON(!IS_VERITY(inode))) {
err = -EINVAL;
fsverity_free_info(vi);
} else {
/* Successfully enabled verity */
/*
* Readers can start using ->i_verity_info immediately, so it
* can't be rolled back once set. So don't set it until just
* after the filesystem has successfully enabled verity.
*/
fsverity_set_info(inode, vi);
}
out:
kfree(params.hashstate);
kfree(desc);
return err;
rollback:
inode_lock(inode);
(void)vops->end_enable_verity(filp, NULL, 0, params.tree_size);
inode_unlock(inode);
goto out;
}
/**
* fsverity_ioctl_enable() - enable verity on a file
*
* Enable fs-verity on a file. See the "FS_IOC_ENABLE_VERITY" section of
* Documentation/filesystems/fsverity.rst for the documentation.
*
* Return: 0 on success, -errno on failure
*/
int fsverity_ioctl_enable(struct file *filp, const void __user *uarg)
{
struct inode *inode = file_inode(filp);
struct fsverity_enable_arg arg;
int err;
if (copy_from_user(&arg, uarg, sizeof(arg)))
return -EFAULT;
if (arg.version != 1)
return -EINVAL;
if (arg.__reserved1 ||
memchr_inv(arg.__reserved2, 0, sizeof(arg.__reserved2)))
return -EINVAL;
if (arg.block_size != PAGE_SIZE)
return -EINVAL;
if (arg.salt_size > FIELD_SIZEOF(struct fsverity_descriptor, salt))
return -EMSGSIZE;
if (arg.sig_size > FS_VERITY_MAX_SIGNATURE_SIZE)
return -EMSGSIZE;
/*
* Require a regular file with write access. But the actual fd must
* still be readonly so that we can lock out all writers. This is
* needed to guarantee that no writable fds exist to the file once it
* has verity enabled, and to stabilize the data being hashed.
*/
err = inode_permission(inode, MAY_WRITE);
if (err)
return err;
if (IS_APPEND(inode))
return -EPERM;
if (S_ISDIR(inode->i_mode))
return -EISDIR;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
err = mnt_want_write_file(filp);
if (err) /* -EROFS */
return err;
err = deny_write_access(filp);
if (err) /* -ETXTBSY */
goto out_drop_write;
err = enable_verity(filp, &arg);
if (err)
goto out_allow_write_access;
/*
* Some pages of the file may have been evicted from pagecache after
* being used in the Merkle tree construction, then read into pagecache
* again by another process reading from the file concurrently. Since
* these pages didn't undergo verification against the file measurement
* which fs-verity now claims to be enforcing, we have to wipe the
* pagecache to ensure that all future reads are verified.
*/
filemap_write_and_wait(inode->i_mapping);
invalidate_inode_pages2(inode->i_mapping);
/*
* allow_write_access() is needed to pair with deny_write_access().
* Regardless, the filesystem won't allow writing to verity files.
*/
out_allow_write_access:
allow_write_access(filp);
out_drop_write:
mnt_drop_write_file(filp);
return err;
}
EXPORT_SYMBOL_GPL(fsverity_ioctl_enable);

View File

@@ -0,0 +1,185 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* fs-verity: read-only file-based authenticity protection
*
* Copyright 2019 Google LLC
*/
#ifndef _FSVERITY_PRIVATE_H
#define _FSVERITY_PRIVATE_H
#ifdef CONFIG_FS_VERITY_DEBUG
#define DEBUG
#endif
#define pr_fmt(fmt) "fs-verity: " fmt
#include <crypto/sha.h>
#include <linux/fsverity.h>
struct ahash_request;
/*
* Implementation limit: maximum depth of the Merkle tree. For now 8 is plenty;
* it's enough for over U64_MAX bytes of data using SHA-256 and 4K blocks.
*/
#define FS_VERITY_MAX_LEVELS 8
/*
* Largest digest size among all hash algorithms supported by fs-verity.
* Currently assumed to be <= size of fsverity_descriptor::root_hash.
*/
#define FS_VERITY_MAX_DIGEST_SIZE SHA512_DIGEST_SIZE
/* A hash algorithm supported by fs-verity */
struct fsverity_hash_alg {
struct crypto_ahash *tfm; /* hash tfm, allocated on demand */
const char *name; /* crypto API name, e.g. sha256 */
unsigned int digest_size; /* digest size in bytes, e.g. 32 for SHA-256 */
unsigned int block_size; /* block size in bytes, e.g. 64 for SHA-256 */
};
/* Merkle tree parameters: hash algorithm, initial hash state, and topology */
struct merkle_tree_params {
const struct fsverity_hash_alg *hash_alg; /* the hash algorithm */
const u8 *hashstate; /* initial hash state or NULL */
unsigned int digest_size; /* same as hash_alg->digest_size */
unsigned int block_size; /* size of data and tree blocks */
unsigned int hashes_per_block; /* number of hashes per tree block */
unsigned int log_blocksize; /* log2(block_size) */
unsigned int log_arity; /* log2(hashes_per_block) */
unsigned int num_levels; /* number of levels in Merkle tree */
u64 tree_size; /* Merkle tree size in bytes */
/*
* Starting block index for each tree level, ordered from leaf level (0)
* to root level ('num_levels - 1')
*/
u64 level_start[FS_VERITY_MAX_LEVELS];
};
/**
* fsverity_info - cached verity metadata for an inode
*
* When a verity file is first opened, an instance of this struct is allocated
* and stored in ->i_verity_info; it remains until the inode is evicted. It
* caches information about the Merkle tree that's needed to efficiently verify
* data read from the file. It also caches the file measurement. The Merkle
* tree pages themselves are not cached here, but the filesystem may cache them.
*/
struct fsverity_info {
struct merkle_tree_params tree_params;
u8 root_hash[FS_VERITY_MAX_DIGEST_SIZE];
u8 measurement[FS_VERITY_MAX_DIGEST_SIZE];
const struct inode *inode;
};
/*
* Merkle tree properties. The file measurement is the hash of this structure
* excluding the signature and with the sig_size field set to 0.
*/
struct fsverity_descriptor {
__u8 version; /* must be 1 */
__u8 hash_algorithm; /* Merkle tree hash algorithm */
__u8 log_blocksize; /* log2 of size of data and tree blocks */
__u8 salt_size; /* size of salt in bytes; 0 if none */
__le32 sig_size; /* size of signature in bytes; 0 if none */
__le64 data_size; /* size of file the Merkle tree is built over */
__u8 root_hash[64]; /* Merkle tree root hash */
__u8 salt[32]; /* salt prepended to each hashed block */
__u8 __reserved[144]; /* must be 0's */
__u8 signature[]; /* optional PKCS#7 signature */
};
/* Arbitrary limit to bound the kmalloc() size. Can be changed. */
#define FS_VERITY_MAX_DESCRIPTOR_SIZE 16384
#define FS_VERITY_MAX_SIGNATURE_SIZE (FS_VERITY_MAX_DESCRIPTOR_SIZE - \
sizeof(struct fsverity_descriptor))
/*
* Format in which verity file measurements are signed. This is the same as
* 'struct fsverity_digest', except here some magic bytes are prepended to
* provide some context about what is being signed in case the same key is used
* for non-fsverity purposes, and here the fields have fixed endianness.
*/
struct fsverity_signed_digest {
char magic[8]; /* must be "FSVerity" */
__le16 digest_algorithm;
__le16 digest_size;
__u8 digest[];
};
/* hash_algs.c */
extern struct fsverity_hash_alg fsverity_hash_algs[];
const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode,
unsigned int num);
const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg,
const u8 *salt, size_t salt_size);
int fsverity_hash_page(const struct merkle_tree_params *params,
const struct inode *inode,
struct ahash_request *req, struct page *page, u8 *out);
int fsverity_hash_buffer(const struct fsverity_hash_alg *alg,
const void *data, size_t size, u8 *out);
void __init fsverity_check_hash_algs(void);
/* init.c */
extern void __printf(3, 4) __cold
fsverity_msg(const struct inode *inode, const char *level,
const char *fmt, ...);
#define fsverity_warn(inode, fmt, ...) \
fsverity_msg((inode), KERN_WARNING, fmt, ##__VA_ARGS__)
#define fsverity_err(inode, fmt, ...) \
fsverity_msg((inode), KERN_ERR, fmt, ##__VA_ARGS__)
/* open.c */
int fsverity_init_merkle_tree_params(struct merkle_tree_params *params,
const struct inode *inode,
unsigned int hash_algorithm,
unsigned int log_blocksize,
const u8 *salt, size_t salt_size);
struct fsverity_info *fsverity_create_info(const struct inode *inode,
void *desc, size_t desc_size);
void fsverity_set_info(struct inode *inode, struct fsverity_info *vi);
void fsverity_free_info(struct fsverity_info *vi);
int __init fsverity_init_info_cache(void);
void __init fsverity_exit_info_cache(void);
/* signature.c */
#ifdef CONFIG_FS_VERITY_BUILTIN_SIGNATURES
int fsverity_verify_signature(const struct fsverity_info *vi,
const struct fsverity_descriptor *desc,
size_t desc_size);
int __init fsverity_init_signature(void);
#else /* !CONFIG_FS_VERITY_BUILTIN_SIGNATURES */
static inline int
fsverity_verify_signature(const struct fsverity_info *vi,
const struct fsverity_descriptor *desc,
size_t desc_size)
{
return 0;
}
static inline int fsverity_init_signature(void)
{
return 0;
}
#endif /* !CONFIG_FS_VERITY_BUILTIN_SIGNATURES */
/* verify.c */
int __init fsverity_init_workqueue(void);
void __init fsverity_exit_workqueue(void);
#endif /* _FSVERITY_PRIVATE_H */

280
fs/verity/hash_algs.c Normal file
View File

@@ -0,0 +1,280 @@
// SPDX-License-Identifier: GPL-2.0
/*
* fs/verity/hash_algs.c: fs-verity hash algorithms
*
* Copyright 2019 Google LLC
*/
#include "fsverity_private.h"
#include <crypto/hash.h>
#include <linux/scatterlist.h>
/* The hash algorithms supported by fs-verity */
struct fsverity_hash_alg fsverity_hash_algs[] = {
[FS_VERITY_HASH_ALG_SHA256] = {
.name = "sha256",
.digest_size = SHA256_DIGEST_SIZE,
.block_size = SHA256_BLOCK_SIZE,
},
[FS_VERITY_HASH_ALG_SHA512] = {
.name = "sha512",
.digest_size = SHA512_DIGEST_SIZE,
.block_size = SHA512_BLOCK_SIZE,
},
};
/**
* fsverity_get_hash_alg() - validate and prepare a hash algorithm
* @inode: optional inode for logging purposes
* @num: the hash algorithm number
*
* Get the struct fsverity_hash_alg for the given hash algorithm number, and
* ensure it has a hash transform ready to go. The hash transforms are
* allocated on-demand so that we don't waste resources unnecessarily, and
* because the crypto modules may be initialized later than fs/verity/.
*
* Return: pointer to the hash alg on success, else an ERR_PTR()
*/
const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode,
unsigned int num)
{
struct fsverity_hash_alg *alg;
struct crypto_ahash *tfm;
int err;
if (num >= ARRAY_SIZE(fsverity_hash_algs) ||
!fsverity_hash_algs[num].name) {
fsverity_warn(inode, "Unknown hash algorithm number: %u", num);
return ERR_PTR(-EINVAL);
}
alg = &fsverity_hash_algs[num];
/* pairs with cmpxchg() below */
tfm = READ_ONCE(alg->tfm);
if (likely(tfm != NULL))
return alg;
/*
* Using the shash API would make things a bit simpler, but the ahash
* API is preferable as it allows the use of crypto accelerators.
*/
tfm = crypto_alloc_ahash(alg->name, 0, 0);
if (IS_ERR(tfm)) {
if (PTR_ERR(tfm) == -ENOENT) {
fsverity_warn(inode,
"Missing crypto API support for hash algorithm \"%s\"",
alg->name);
return ERR_PTR(-ENOPKG);
}
fsverity_err(inode,
"Error allocating hash algorithm \"%s\": %ld",
alg->name, PTR_ERR(tfm));
return ERR_CAST(tfm);
}
err = -EINVAL;
if (WARN_ON(alg->digest_size != crypto_ahash_digestsize(tfm)))
goto err_free_tfm;
if (WARN_ON(alg->block_size != crypto_ahash_blocksize(tfm)))
goto err_free_tfm;
pr_info("%s using implementation \"%s\"\n",
alg->name, crypto_ahash_driver_name(tfm));
/* pairs with READ_ONCE() above */
if (cmpxchg(&alg->tfm, NULL, tfm) != NULL)
crypto_free_ahash(tfm);
return alg;
err_free_tfm:
crypto_free_ahash(tfm);
return ERR_PTR(err);
}
/**
* fsverity_prepare_hash_state() - precompute the initial hash state
* @alg: hash algorithm
* @salt: a salt which is to be prepended to all data to be hashed
* @salt_size: salt size in bytes, possibly 0
*
* Return: NULL if the salt is empty, otherwise the kmalloc()'ed precomputed
* initial hash state on success or an ERR_PTR() on failure.
*/
const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg,
const u8 *salt, size_t salt_size)
{
u8 *hashstate = NULL;
struct ahash_request *req = NULL;
u8 *padded_salt = NULL;
size_t padded_salt_size;
struct scatterlist sg;
DECLARE_CRYPTO_WAIT(wait);
int err;
if (salt_size == 0)
return NULL;
hashstate = kmalloc(crypto_ahash_statesize(alg->tfm), GFP_KERNEL);
if (!hashstate)
return ERR_PTR(-ENOMEM);
req = ahash_request_alloc(alg->tfm, GFP_KERNEL);
if (!req) {
err = -ENOMEM;
goto err_free;
}
/*
* Zero-pad the salt to the next multiple of the input size of the hash
* algorithm's compression function, e.g. 64 bytes for SHA-256 or 128
* bytes for SHA-512. This ensures that the hash algorithm won't have
* any bytes buffered internally after processing the salt, thus making
* salted hashing just as fast as unsalted hashing.
*/
padded_salt_size = round_up(salt_size, alg->block_size);
padded_salt = kzalloc(padded_salt_size, GFP_KERNEL);
if (!padded_salt) {
err = -ENOMEM;
goto err_free;
}
memcpy(padded_salt, salt, salt_size);
sg_init_one(&sg, padded_salt, padded_salt_size);
ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
CRYPTO_TFM_REQ_MAY_BACKLOG,
crypto_req_done, &wait);
ahash_request_set_crypt(req, &sg, NULL, padded_salt_size);
err = crypto_wait_req(crypto_ahash_init(req), &wait);
if (err)
goto err_free;
err = crypto_wait_req(crypto_ahash_update(req), &wait);
if (err)
goto err_free;
err = crypto_ahash_export(req, hashstate);
if (err)
goto err_free;
out:
ahash_request_free(req);
kfree(padded_salt);
return hashstate;
err_free:
kfree(hashstate);
hashstate = ERR_PTR(err);
goto out;
}
/**
* fsverity_hash_page() - hash a single data or hash page
* @params: the Merkle tree's parameters
* @inode: inode for which the hashing is being done
* @req: preallocated hash request
* @page: the page to hash
* @out: output digest, size 'params->digest_size' bytes
*
* Hash a single data or hash block, assuming block_size == PAGE_SIZE.
* The hash is salted if a salt is specified in the Merkle tree parameters.
*
* Return: 0 on success, -errno on failure
*/
int fsverity_hash_page(const struct merkle_tree_params *params,
const struct inode *inode,
struct ahash_request *req, struct page *page, u8 *out)
{
struct scatterlist sg;
DECLARE_CRYPTO_WAIT(wait);
int err;
if (WARN_ON(params->block_size != PAGE_SIZE))
return -EINVAL;
sg_init_table(&sg, 1);
sg_set_page(&sg, page, PAGE_SIZE, 0);
ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
CRYPTO_TFM_REQ_MAY_BACKLOG,
crypto_req_done, &wait);
ahash_request_set_crypt(req, &sg, out, PAGE_SIZE);
if (params->hashstate) {
err = crypto_ahash_import(req, params->hashstate);
if (err) {
fsverity_err(inode,
"Error %d importing hash state", err);
return err;
}
err = crypto_ahash_finup(req);
} else {
err = crypto_ahash_digest(req);
}
err = crypto_wait_req(err, &wait);
if (err)
fsverity_err(inode, "Error %d computing page hash", err);
return err;
}
/**
* fsverity_hash_buffer() - hash some data
* @alg: the hash algorithm to use
* @data: the data to hash
* @size: size of data to hash, in bytes
* @out: output digest, size 'alg->digest_size' bytes
*
* Hash some data which is located in physically contiguous memory (i.e. memory
* allocated by kmalloc(), not by vmalloc()). No salt is used.
*
* Return: 0 on success, -errno on failure
*/
int fsverity_hash_buffer(const struct fsverity_hash_alg *alg,
const void *data, size_t size, u8 *out)
{
struct ahash_request *req;
struct scatterlist sg;
DECLARE_CRYPTO_WAIT(wait);
int err;
req = ahash_request_alloc(alg->tfm, GFP_KERNEL);
if (!req)
return -ENOMEM;
sg_init_one(&sg, data, size);
ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
CRYPTO_TFM_REQ_MAY_BACKLOG,
crypto_req_done, &wait);
ahash_request_set_crypt(req, &sg, out, size);
err = crypto_wait_req(crypto_ahash_digest(req), &wait);
ahash_request_free(req);
return err;
}
void __init fsverity_check_hash_algs(void)
{
size_t i;
/*
* Sanity check the hash algorithms (could be a build-time check, but
* they're in an array)
*/
for (i = 0; i < ARRAY_SIZE(fsverity_hash_algs); i++) {
const struct fsverity_hash_alg *alg = &fsverity_hash_algs[i];
if (!alg->name)
continue;
BUG_ON(alg->digest_size > FS_VERITY_MAX_DIGEST_SIZE);
/*
* For efficiency, the implementation currently assumes the
* digest and block sizes are powers of 2. This limitation can
* be lifted if the code is updated to handle other values.
*/
BUG_ON(!is_power_of_2(alg->digest_size));
BUG_ON(!is_power_of_2(alg->block_size));
}
}

61
fs/verity/init.c Normal file
View File

@@ -0,0 +1,61 @@
// SPDX-License-Identifier: GPL-2.0
/*
* fs/verity/init.c: fs-verity module initialization and logging
*
* Copyright 2019 Google LLC
*/
#include "fsverity_private.h"
#include <linux/ratelimit.h>
void fsverity_msg(const struct inode *inode, const char *level,
const char *fmt, ...)
{
static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
struct va_format vaf;
va_list args;
if (!__ratelimit(&rs))
return;
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
if (inode)
printk("%sfs-verity (%s, inode %lu): %pV\n",
level, inode->i_sb->s_id, inode->i_ino, &vaf);
else
printk("%sfs-verity: %pV\n", level, &vaf);
va_end(args);
}
static int __init fsverity_init(void)
{
int err;
fsverity_check_hash_algs();
err = fsverity_init_info_cache();
if (err)
return err;
err = fsverity_init_workqueue();
if (err)
goto err_exit_info_cache;
err = fsverity_init_signature();
if (err)
goto err_exit_workqueue;
pr_debug("Initialized fs-verity\n");
return 0;
err_exit_workqueue:
fsverity_exit_workqueue();
err_exit_info_cache:
fsverity_exit_info_cache();
return err;
}
late_initcall(fsverity_init)

57
fs/verity/measure.c Normal file
View File

@@ -0,0 +1,57 @@
// SPDX-License-Identifier: GPL-2.0
/*
* fs/verity/measure.c: ioctl to get a verity file's measurement
*
* Copyright 2019 Google LLC
*/
#include "fsverity_private.h"
#include <linux/uaccess.h>
/**
* fsverity_ioctl_measure() - get a verity file's measurement
*
* Retrieve the file measurement that the kernel is enforcing for reads from a
* verity file. See the "FS_IOC_MEASURE_VERITY" section of
* Documentation/filesystems/fsverity.rst for the documentation.
*
* Return: 0 on success, -errno on failure
*/
int fsverity_ioctl_measure(struct file *filp, void __user *_uarg)
{
const struct inode *inode = file_inode(filp);
struct fsverity_digest __user *uarg = _uarg;
const struct fsverity_info *vi;
const struct fsverity_hash_alg *hash_alg;
struct fsverity_digest arg;
vi = fsverity_get_info(inode);
if (!vi)
return -ENODATA; /* not a verity file */
hash_alg = vi->tree_params.hash_alg;
/*
* The user specifies the digest_size their buffer has space for; we can
* return the digest if it fits in the available space. We write back
* the actual size, which may be shorter than the user-specified size.
*/
if (get_user(arg.digest_size, &uarg->digest_size))
return -EFAULT;
if (arg.digest_size < hash_alg->digest_size)
return -EOVERFLOW;
memset(&arg, 0, sizeof(arg));
arg.digest_algorithm = hash_alg - fsverity_hash_algs;
arg.digest_size = hash_alg->digest_size;
if (copy_to_user(uarg, &arg, sizeof(arg)))
return -EFAULT;
if (copy_to_user(uarg->digest, vi->measurement, hash_alg->digest_size))
return -EFAULT;
return 0;
}
EXPORT_SYMBOL_GPL(fsverity_ioctl_measure);

356
fs/verity/open.c Normal file
View File

@@ -0,0 +1,356 @@
// SPDX-License-Identifier: GPL-2.0
/*
* fs/verity/open.c: opening fs-verity files
*
* Copyright 2019 Google LLC
*/
#include "fsverity_private.h"
#include <linux/slab.h>
static struct kmem_cache *fsverity_info_cachep;
/**
* fsverity_init_merkle_tree_params() - initialize Merkle tree parameters
* @params: the parameters struct to initialize
* @inode: the inode for which the Merkle tree is being built
* @hash_algorithm: number of hash algorithm to use
* @log_blocksize: log base 2 of block size to use
* @salt: pointer to salt (optional)
* @salt_size: size of salt, possibly 0
*
* Validate the hash algorithm and block size, then compute the tree topology
* (num levels, num blocks in each level, etc.) and initialize @params.
*
* Return: 0 on success, -errno on failure
*/
int fsverity_init_merkle_tree_params(struct merkle_tree_params *params,
const struct inode *inode,
unsigned int hash_algorithm,
unsigned int log_blocksize,
const u8 *salt, size_t salt_size)
{
const struct fsverity_hash_alg *hash_alg;
int err;
u64 blocks;
u64 offset;
int level;
memset(params, 0, sizeof(*params));
hash_alg = fsverity_get_hash_alg(inode, hash_algorithm);
if (IS_ERR(hash_alg))
return PTR_ERR(hash_alg);
params->hash_alg = hash_alg;
params->digest_size = hash_alg->digest_size;
params->hashstate = fsverity_prepare_hash_state(hash_alg, salt,
salt_size);
if (IS_ERR(params->hashstate)) {
err = PTR_ERR(params->hashstate);
params->hashstate = NULL;
fsverity_err(inode, "Error %d preparing hash state", err);
goto out_err;
}
if (log_blocksize != PAGE_SHIFT) {
fsverity_warn(inode, "Unsupported log_blocksize: %u",
log_blocksize);
err = -EINVAL;
goto out_err;
}
params->log_blocksize = log_blocksize;
params->block_size = 1 << log_blocksize;
if (WARN_ON(!is_power_of_2(params->digest_size))) {
err = -EINVAL;
goto out_err;
}
if (params->block_size < 2 * params->digest_size) {
fsverity_warn(inode,
"Merkle tree block size (%u) too small for hash algorithm \"%s\"",
params->block_size, hash_alg->name);
err = -EINVAL;
goto out_err;
}
params->log_arity = params->log_blocksize - ilog2(params->digest_size);
params->hashes_per_block = 1 << params->log_arity;
pr_debug("Merkle tree uses %s with %u-byte blocks (%u hashes/block), salt=%*phN\n",
hash_alg->name, params->block_size, params->hashes_per_block,
(int)salt_size, salt);
/*
* Compute the number of levels in the Merkle tree and create a map from
* level to the starting block of that level. Level 'num_levels - 1' is
* the root and is stored first. Level 0 is the level directly "above"
* the data blocks and is stored last.
*/
/* Compute number of levels and the number of blocks in each level */
blocks = (inode->i_size + params->block_size - 1) >> log_blocksize;
pr_debug("Data is %lld bytes (%llu blocks)\n", inode->i_size, blocks);
while (blocks > 1) {
if (params->num_levels >= FS_VERITY_MAX_LEVELS) {
fsverity_err(inode, "Too many levels in Merkle tree");
err = -EINVAL;
goto out_err;
}
blocks = (blocks + params->hashes_per_block - 1) >>
params->log_arity;
/* temporarily using level_start[] to store blocks in level */
params->level_start[params->num_levels++] = blocks;
}
/* Compute the starting block of each level */
offset = 0;
for (level = (int)params->num_levels - 1; level >= 0; level--) {
blocks = params->level_start[level];
params->level_start[level] = offset;
pr_debug("Level %d is %llu blocks starting at index %llu\n",
level, blocks, offset);
offset += blocks;
}
params->tree_size = offset << log_blocksize;
return 0;
out_err:
kfree(params->hashstate);
memset(params, 0, sizeof(*params));
return err;
}
/*
* Compute the file measurement by hashing the fsverity_descriptor excluding the
* signature and with the sig_size field set to 0.
*/
static int compute_file_measurement(const struct fsverity_hash_alg *hash_alg,
struct fsverity_descriptor *desc,
u8 *measurement)
{
__le32 sig_size = desc->sig_size;
int err;
desc->sig_size = 0;
err = fsverity_hash_buffer(hash_alg, desc, sizeof(*desc), measurement);
desc->sig_size = sig_size;
return err;
}
/*
* Validate the given fsverity_descriptor and create a new fsverity_info from
* it. The signature (if present) is also checked.
*/
struct fsverity_info *fsverity_create_info(const struct inode *inode,
void *_desc, size_t desc_size)
{
struct fsverity_descriptor *desc = _desc;
struct fsverity_info *vi;
int err;
if (desc_size < sizeof(*desc)) {
fsverity_err(inode, "Unrecognized descriptor size: %zu bytes",
desc_size);
return ERR_PTR(-EINVAL);
}
if (desc->version != 1) {
fsverity_err(inode, "Unrecognized descriptor version: %u",
desc->version);
return ERR_PTR(-EINVAL);
}
if (memchr_inv(desc->__reserved, 0, sizeof(desc->__reserved))) {
fsverity_err(inode, "Reserved bits set in descriptor");
return ERR_PTR(-EINVAL);
}
if (desc->salt_size > sizeof(desc->salt)) {
fsverity_err(inode, "Invalid salt_size: %u", desc->salt_size);
return ERR_PTR(-EINVAL);
}
if (le64_to_cpu(desc->data_size) != inode->i_size) {
fsverity_err(inode,
"Wrong data_size: %llu (desc) != %lld (inode)",
le64_to_cpu(desc->data_size), inode->i_size);
return ERR_PTR(-EINVAL);
}
vi = kmem_cache_zalloc(fsverity_info_cachep, GFP_KERNEL);
if (!vi)
return ERR_PTR(-ENOMEM);
vi->inode = inode;
err = fsverity_init_merkle_tree_params(&vi->tree_params, inode,
desc->hash_algorithm,
desc->log_blocksize,
desc->salt, desc->salt_size);
if (err) {
fsverity_err(inode,
"Error %d initializing Merkle tree parameters",
err);
goto out;
}
memcpy(vi->root_hash, desc->root_hash, vi->tree_params.digest_size);
err = compute_file_measurement(vi->tree_params.hash_alg, desc,
vi->measurement);
if (err) {
fsverity_err(inode, "Error %d computing file measurement", err);
goto out;
}
pr_debug("Computed file measurement: %s:%*phN\n",
vi->tree_params.hash_alg->name,
vi->tree_params.digest_size, vi->measurement);
err = fsverity_verify_signature(vi, desc, desc_size);
out:
if (err) {
fsverity_free_info(vi);
vi = ERR_PTR(err);
}
return vi;
}
void fsverity_set_info(struct inode *inode, struct fsverity_info *vi)
{
/*
* Multiple processes may race to set ->i_verity_info, so use cmpxchg.
* This pairs with the READ_ONCE() in fsverity_get_info().
*/
if (cmpxchg(&inode->i_verity_info, NULL, vi) != NULL)
fsverity_free_info(vi);
}
void fsverity_free_info(struct fsverity_info *vi)
{
if (!vi)
return;
kfree(vi->tree_params.hashstate);
kmem_cache_free(fsverity_info_cachep, vi);
}
/* Ensure the inode has an ->i_verity_info */
static int ensure_verity_info(struct inode *inode)
{
struct fsverity_info *vi = fsverity_get_info(inode);
struct fsverity_descriptor *desc;
int res;
if (vi)
return 0;
res = inode->i_sb->s_vop->get_verity_descriptor(inode, NULL, 0);
if (res < 0) {
fsverity_err(inode,
"Error %d getting verity descriptor size", res);
return res;
}
if (res > FS_VERITY_MAX_DESCRIPTOR_SIZE) {
fsverity_err(inode, "Verity descriptor is too large (%d bytes)",
res);
return -EMSGSIZE;
}
desc = kmalloc(res, GFP_KERNEL);
if (!desc)
return -ENOMEM;
res = inode->i_sb->s_vop->get_verity_descriptor(inode, desc, res);
if (res < 0) {
fsverity_err(inode, "Error %d reading verity descriptor", res);
goto out_free_desc;
}
vi = fsverity_create_info(inode, desc, res);
if (IS_ERR(vi)) {
res = PTR_ERR(vi);
goto out_free_desc;
}
fsverity_set_info(inode, vi);
res = 0;
out_free_desc:
kfree(desc);
return res;
}
/**
* fsverity_file_open() - prepare to open a verity file
* @inode: the inode being opened
* @filp: the struct file being set up
*
* When opening a verity file, deny the open if it is for writing. Otherwise,
* set up the inode's ->i_verity_info if not already done.
*
* When combined with fscrypt, this must be called after fscrypt_file_open().
* Otherwise, we won't have the key set up to decrypt the verity metadata.
*
* Return: 0 on success, -errno on failure
*/
int fsverity_file_open(struct inode *inode, struct file *filp)
{
if (!IS_VERITY(inode))
return 0;
if (filp->f_mode & FMODE_WRITE) {
pr_debug("Denying opening verity file (ino %lu) for write\n",
inode->i_ino);
return -EPERM;
}
return ensure_verity_info(inode);
}
EXPORT_SYMBOL_GPL(fsverity_file_open);
/**
* fsverity_prepare_setattr() - prepare to change a verity inode's attributes
* @dentry: dentry through which the inode is being changed
* @attr: attributes to change
*
* Verity files are immutable, so deny truncates. This isn't covered by the
* open-time check because sys_truncate() takes a path, not a file descriptor.
*
* Return: 0 on success, -errno on failure
*/
int fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr)
{
if (IS_VERITY(d_inode(dentry)) && (attr->ia_valid & ATTR_SIZE)) {
pr_debug("Denying truncate of verity file (ino %lu)\n",
d_inode(dentry)->i_ino);
return -EPERM;
}
return 0;
}
EXPORT_SYMBOL_GPL(fsverity_prepare_setattr);
/**
* fsverity_cleanup_inode() - free the inode's verity info, if present
*
* Filesystems must call this on inode eviction to free ->i_verity_info.
*/
void fsverity_cleanup_inode(struct inode *inode)
{
fsverity_free_info(inode->i_verity_info);
inode->i_verity_info = NULL;
}
EXPORT_SYMBOL_GPL(fsverity_cleanup_inode);
int __init fsverity_init_info_cache(void)
{
fsverity_info_cachep = KMEM_CACHE_USERCOPY(fsverity_info,
SLAB_RECLAIM_ACCOUNT,
measurement);
if (!fsverity_info_cachep)
return -ENOMEM;
return 0;
}
void __init fsverity_exit_info_cache(void)
{
kmem_cache_destroy(fsverity_info_cachep);
fsverity_info_cachep = NULL;
}

164
fs/verity/signature.c Normal file
View File

@@ -0,0 +1,164 @@
// SPDX-License-Identifier: GPL-2.0
/*
* fs/verity/signature.c: verification of builtin signatures
*
* Copyright 2019 Google LLC
*/
#include "fsverity_private.h"
#include <linux/cred.h>
#include <linux/key.h>
#include <linux/slab.h>
#include <linux/verification.h>
/*
* /proc/sys/fs/verity/require_signatures
* If 1, all verity files must have a valid builtin signature.
*/
static int fsverity_require_signatures;
/*
* Keyring that contains the trusted X.509 certificates.
*
* Only root (kuid=0) can modify this. Also, root may use
* keyctl_restrict_keyring() to prevent any more additions.
*/
static struct key *fsverity_keyring;
/**
* fsverity_verify_signature() - check a verity file's signature
*
* If the file's fs-verity descriptor includes a signature of the file
* measurement, verify it against the certificates in the fs-verity keyring.
*
* Return: 0 on success (signature valid or not required); -errno on failure
*/
int fsverity_verify_signature(const struct fsverity_info *vi,
const struct fsverity_descriptor *desc,
size_t desc_size)
{
const struct inode *inode = vi->inode;
const struct fsverity_hash_alg *hash_alg = vi->tree_params.hash_alg;
const u32 sig_size = le32_to_cpu(desc->sig_size);
struct fsverity_signed_digest *d;
int err;
if (sig_size == 0) {
if (fsverity_require_signatures) {
fsverity_err(inode,
"require_signatures=1, rejecting unsigned file!");
return -EPERM;
}
return 0;
}
if (sig_size > desc_size - sizeof(*desc)) {
fsverity_err(inode, "Signature overflows verity descriptor");
return -EBADMSG;
}
d = kzalloc(sizeof(*d) + hash_alg->digest_size, GFP_KERNEL);
if (!d)
return -ENOMEM;
memcpy(d->magic, "FSVerity", 8);
d->digest_algorithm = cpu_to_le16(hash_alg - fsverity_hash_algs);
d->digest_size = cpu_to_le16(hash_alg->digest_size);
memcpy(d->digest, vi->measurement, hash_alg->digest_size);
err = verify_pkcs7_signature(d, sizeof(*d) + hash_alg->digest_size,
desc->signature, sig_size,
fsverity_keyring,
VERIFYING_UNSPECIFIED_SIGNATURE,
NULL, NULL);
kfree(d);
if (err) {
if (err == -ENOKEY)
fsverity_err(inode,
"File's signing cert isn't in the fs-verity keyring");
else if (err == -EKEYREJECTED)
fsverity_err(inode, "Incorrect file signature");
else if (err == -EBADMSG)
fsverity_err(inode, "Malformed file signature");
else
fsverity_err(inode, "Error %d verifying file signature",
err);
return err;
}
pr_debug("Valid signature for file measurement %s:%*phN\n",
hash_alg->name, hash_alg->digest_size, vi->measurement);
return 0;
}
#ifdef CONFIG_SYSCTL
static struct ctl_table_header *fsverity_sysctl_header;
static const struct ctl_path fsverity_sysctl_path[] = {
{ .procname = "fs", },
{ .procname = "verity", },
{ }
};
/* shared constants to be used in various sysctls */
static int sysctl_vals[] = { 0, 1, INT_MAX };
#define SYSCTL_ZERO ((void *)&sysctl_vals[0])
#define SYSCTL_ONE ((void *)&sysctl_vals[1])
#define SYSCTL_INT_MAX ((void *)&sysctl_vals[2])
static struct ctl_table fsverity_sysctl_table[] = {
{
.procname = "require_signatures",
.data = &fsverity_require_signatures,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
{ }
};
static int __init fsverity_sysctl_init(void)
{
fsverity_sysctl_header = register_sysctl_paths(fsverity_sysctl_path,
fsverity_sysctl_table);
if (!fsverity_sysctl_header) {
pr_err("sysctl registration failed!\n");
return -ENOMEM;
}
return 0;
}
#else /* !CONFIG_SYSCTL */
static inline int __init fsverity_sysctl_init(void)
{
return 0;
}
#endif /* !CONFIG_SYSCTL */
int __init fsverity_init_signature(void)
{
struct key *ring;
int err;
ring = keyring_alloc(".fs-verity", KUIDT_INIT(0), KGIDT_INIT(0),
current_cred(), KEY_POS_SEARCH |
KEY_USR_VIEW | KEY_USR_READ | KEY_USR_WRITE |
KEY_USR_SEARCH | KEY_USR_SETATTR,
KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
if (IS_ERR(ring))
return PTR_ERR(ring);
err = fsverity_sysctl_init();
if (err)
goto err_put_ring;
fsverity_keyring = ring;
return 0;
err_put_ring:
key_put(ring);
return err;
}

281
fs/verity/verify.c Normal file
View File

@@ -0,0 +1,281 @@
// SPDX-License-Identifier: GPL-2.0
/*
* fs/verity/verify.c: data verification functions, i.e. hooks for ->readpages()
*
* Copyright 2019 Google LLC
*/
#include "fsverity_private.h"
#include <crypto/hash.h>
#include <linux/bio.h>
#include <linux/ratelimit.h>
static struct workqueue_struct *fsverity_read_workqueue;
/**
* hash_at_level() - compute the location of the block's hash at the given level
*
* @params: (in) the Merkle tree parameters
* @dindex: (in) the index of the data block being verified
* @level: (in) the level of hash we want (0 is leaf level)
* @hindex: (out) the index of the hash block containing the wanted hash
* @hoffset: (out) the byte offset to the wanted hash within the hash block
*/
static void hash_at_level(const struct merkle_tree_params *params,
pgoff_t dindex, unsigned int level, pgoff_t *hindex,
unsigned int *hoffset)
{
pgoff_t position;
/* Offset of the hash within the level's region, in hashes */
position = dindex >> (level * params->log_arity);
/* Index of the hash block in the tree overall */
*hindex = params->level_start[level] + (position >> params->log_arity);
/* Offset of the wanted hash (in bytes) within the hash block */
*hoffset = (position & ((1 << params->log_arity) - 1)) <<
(params->log_blocksize - params->log_arity);
}
/* Extract a hash from a hash page */
static void extract_hash(struct page *hpage, unsigned int hoffset,
unsigned int hsize, u8 *out)
{
void *virt = kmap_atomic(hpage);
memcpy(out, virt + hoffset, hsize);
kunmap_atomic(virt);
}
static inline int cmp_hashes(const struct fsverity_info *vi,
const u8 *want_hash, const u8 *real_hash,
pgoff_t index, int level)
{
const unsigned int hsize = vi->tree_params.digest_size;
if (memcmp(want_hash, real_hash, hsize) == 0)
return 0;
fsverity_err(vi->inode,
"FILE CORRUPTED! index=%lu, level=%d, want_hash=%s:%*phN, real_hash=%s:%*phN",
index, level,
vi->tree_params.hash_alg->name, hsize, want_hash,
vi->tree_params.hash_alg->name, hsize, real_hash);
return -EBADMSG;
}
/*
* Verify a single data page against the file's Merkle tree.
*
* In principle, we need to verify the entire path to the root node. However,
* for efficiency the filesystem may cache the hash pages. Therefore we need
* only ascend the tree until an already-verified page is seen, as indicated by
* the PageChecked bit being set; then verify the path to that page.
*
* This code currently only supports the case where the verity block size is
* equal to PAGE_SIZE. Doing otherwise would be possible but tricky, since we
* wouldn't be able to use the PageChecked bit.
*
* Note that multiple processes may race to verify a hash page and mark it
* Checked, but it doesn't matter; the result will be the same either way.
*
* Return: true if the page is valid, else false.
*/
static bool verify_page(struct inode *inode, const struct fsverity_info *vi,
struct ahash_request *req, struct page *data_page)
{
const struct merkle_tree_params *params = &vi->tree_params;
const unsigned int hsize = params->digest_size;
const pgoff_t index = data_page->index;
int level;
u8 _want_hash[FS_VERITY_MAX_DIGEST_SIZE];
const u8 *want_hash;
u8 real_hash[FS_VERITY_MAX_DIGEST_SIZE];
struct page *hpages[FS_VERITY_MAX_LEVELS];
unsigned int hoffsets[FS_VERITY_MAX_LEVELS];
int err;
if (WARN_ON_ONCE(!PageLocked(data_page) || PageUptodate(data_page)))
return false;
pr_debug_ratelimited("Verifying data page %lu...\n", index);
/*
* Starting at the leaf level, ascend the tree saving hash pages along
* the way until we find a verified hash page, indicated by PageChecked;
* or until we reach the root.
*/
for (level = 0; level < params->num_levels; level++) {
pgoff_t hindex;
unsigned int hoffset;
struct page *hpage;
hash_at_level(params, index, level, &hindex, &hoffset);
pr_debug_ratelimited("Level %d: hindex=%lu, hoffset=%u\n",
level, hindex, hoffset);
hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode,
hindex);
if (IS_ERR(hpage)) {
err = PTR_ERR(hpage);
fsverity_err(inode,
"Error %d reading Merkle tree page %lu",
err, hindex);
goto out;
}
if (PageChecked(hpage)) {
extract_hash(hpage, hoffset, hsize, _want_hash);
want_hash = _want_hash;
put_page(hpage);
pr_debug_ratelimited("Hash page already checked, want %s:%*phN\n",
params->hash_alg->name,
hsize, want_hash);
goto descend;
}
pr_debug_ratelimited("Hash page not yet checked\n");
hpages[level] = hpage;
hoffsets[level] = hoffset;
}
want_hash = vi->root_hash;
pr_debug("Want root hash: %s:%*phN\n",
params->hash_alg->name, hsize, want_hash);
descend:
/* Descend the tree verifying hash pages */
for (; level > 0; level--) {
struct page *hpage = hpages[level - 1];
unsigned int hoffset = hoffsets[level - 1];
err = fsverity_hash_page(params, inode, req, hpage, real_hash);
if (err)
goto out;
err = cmp_hashes(vi, want_hash, real_hash, index, level - 1);
if (err)
goto out;
SetPageChecked(hpage);
extract_hash(hpage, hoffset, hsize, _want_hash);
want_hash = _want_hash;
put_page(hpage);
pr_debug("Verified hash page at level %d, now want %s:%*phN\n",
level - 1, params->hash_alg->name, hsize, want_hash);
}
/* Finally, verify the data page */
err = fsverity_hash_page(params, inode, req, data_page, real_hash);
if (err)
goto out;
err = cmp_hashes(vi, want_hash, real_hash, index, -1);
out:
for (; level > 0; level--)
put_page(hpages[level - 1]);
return err == 0;
}
/**
* fsverity_verify_page() - verify a data page
*
* Verify a page that has just been read from a verity file. The page must be a
* pagecache page that is still locked and not yet uptodate.
*
* Return: true if the page is valid, else false.
*/
bool fsverity_verify_page(struct page *page)
{
struct inode *inode = page->mapping->host;
const struct fsverity_info *vi = inode->i_verity_info;
struct ahash_request *req;
bool valid;
req = ahash_request_alloc(vi->tree_params.hash_alg->tfm, GFP_NOFS);
if (unlikely(!req))
return false;
valid = verify_page(inode, vi, req, page);
ahash_request_free(req);
return valid;
}
EXPORT_SYMBOL_GPL(fsverity_verify_page);
#ifdef CONFIG_BLOCK
/**
* fsverity_verify_bio() - verify a 'read' bio that has just completed
*
* Verify a set of pages that have just been read from a verity file. The pages
* must be pagecache pages that are still locked and not yet uptodate. Pages
* that fail verification are set to the Error state. Verification is skipped
* for pages already in the Error state, e.g. due to fscrypt decryption failure.
*
* This is a helper function for use by the ->readpages() method of filesystems
* that issue bios to read data directly into the page cache. Filesystems that
* populate the page cache without issuing bios (e.g. non block-based
* filesystems) must instead call fsverity_verify_page() directly on each page.
* All filesystems must also call fsverity_verify_page() on holes.
*/
void fsverity_verify_bio(struct bio *bio)
{
struct inode *inode = bio_first_page_all(bio)->mapping->host;
const struct fsverity_info *vi = inode->i_verity_info;
struct ahash_request *req;
struct bio_vec *bv;
int i;
req = ahash_request_alloc(vi->tree_params.hash_alg->tfm, GFP_NOFS);
if (unlikely(!req)) {
bio_for_each_segment_all(bv, bio, i)
SetPageError(bv->bv_page);
return;
}
bio_for_each_segment_all(bv, bio, i) {
struct page *page = bv->bv_page;
if (!PageError(page) && !verify_page(inode, vi, req, page))
SetPageError(page);
}
ahash_request_free(req);
}
EXPORT_SYMBOL_GPL(fsverity_verify_bio);
#endif /* CONFIG_BLOCK */
/**
* fsverity_enqueue_verify_work() - enqueue work on the fs-verity workqueue
*
* Enqueue verification work for asynchronous processing.
*/
void fsverity_enqueue_verify_work(struct work_struct *work)
{
queue_work(fsverity_read_workqueue, work);
}
EXPORT_SYMBOL_GPL(fsverity_enqueue_verify_work);
int __init fsverity_init_workqueue(void)
{
/*
* Use an unbound workqueue to allow bios to be verified in parallel
* even when they happen to complete on the same CPU. This sacrifices
* locality, but it's worthwhile since hashing is CPU-intensive.
*
* Also use a high-priority workqueue to prioritize verification work,
* which blocks reads from completing, over regular application tasks.
*/
fsverity_read_workqueue = alloc_workqueue("fsverity_read_queue",
WQ_UNBOUND | WQ_HIGHPRI,
num_online_cpus());
if (!fsverity_read_workqueue)
return -ENOMEM;
return 0;
}
void __init fsverity_exit_workqueue(void)
{
destroy_workqueue(fsverity_read_workqueue);
fsverity_read_workqueue = NULL;
}

View File

@@ -36,11 +36,17 @@
#define F2FS_MAX_QUOTAS 3
#define F2FS_ENC_UTF8_12_1 1
#define F2FS_ENC_STRICT_MODE_FL (1 << 0)
#define f2fs_has_strict_mode(sbi) \
(sbi->s_encoding_flags & F2FS_ENC_STRICT_MODE_FL)
#define F2FS_IO_SIZE(sbi) (1 << F2FS_OPTION(sbi).write_io_size_bits) /* Blocks */
#define F2FS_IO_SIZE_KB(sbi) (1 << (F2FS_OPTION(sbi).write_io_size_bits + 2)) /* KB */
#define F2FS_IO_SIZE_BYTES(sbi) (1 << (F2FS_OPTION(sbi).write_io_size_bits + 12)) /* B */
#define F2FS_IO_SIZE_BITS(sbi) (F2FS_OPTION(sbi).write_io_size_bits) /* power of 2 */
#define F2FS_IO_SIZE_MASK(sbi) (F2FS_IO_SIZE(sbi) - 1)
#define F2FS_IO_ALIGNED(sbi) (F2FS_IO_SIZE(sbi) > 1)
/* This flag is used by node and meta inodes, and by recovery */
#define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO)
@@ -109,7 +115,9 @@ struct f2fs_super_block {
struct f2fs_device devs[MAX_DEVICES]; /* device list */
__le32 qf_ino[F2FS_MAX_QUOTAS]; /* quota inode numbers */
__u8 hot_ext_count; /* # of hot file extension */
__u8 reserved[310]; /* valid reserved region */
__le16 s_encoding; /* Filename charset encoding */
__le16 s_encoding_flags; /* Filename charset encoding flags */
__u8 reserved[306]; /* valid reserved region */
__le32 crc; /* checksum of superblock */
} __packed;

View File

@@ -61,6 +61,8 @@ struct workqueue_struct;
struct iov_iter;
struct fscrypt_info;
struct fscrypt_operations;
struct fsverity_info;
struct fsverity_operations;
extern void __init inode_init(void);
extern void __init inode_init_early(void);
@@ -692,6 +694,10 @@ struct inode {
struct fscrypt_info *i_crypt_info;
#endif
#ifdef CONFIG_FS_VERITY
struct fsverity_info *i_verity_info;
#endif
void *i_private; /* fs or device private pointer */
} __randomize_layout;
@@ -1390,6 +1396,10 @@ struct super_block {
const struct xattr_handler **s_xattr;
#ifdef CONFIG_FS_ENCRYPTION
const struct fscrypt_operations *s_cop;
struct key *s_master_keys; /* master crypto keys in use */
#endif
#ifdef CONFIG_FS_VERITY
const struct fsverity_operations *s_vop;
#endif
struct hlist_bl_head s_roots; /* alternate root dentries for NFS */
struct list_head s_mounts; /* list of mounts; _not_ for fs use */
@@ -1927,6 +1937,8 @@ struct super_operations {
#define S_DAX 0 /* Make all the DAX code disappear */
#endif
#define S_ENCRYPTED 16384 /* Encrypted file (using fs/crypto/) */
#define S_CASEFOLD 32768 /* Casefolded file */
#define S_VERITY 65536 /* Verity file (using fs/verity/) */
/*
* Note that nosuid etc flags are inode-specific: setting some file-system
@@ -1967,6 +1979,8 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags
#define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC)
#define IS_DAX(inode) ((inode)->i_flags & S_DAX)
#define IS_ENCRYPTED(inode) ((inode)->i_flags & S_ENCRYPTED)
#define IS_CASEFOLDED(inode) ((inode)->i_flags & S_CASEFOLD)
#define IS_VERITY(inode) ((inode)->i_flags & S_VERITY)
#define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \
(inode)->i_rdev == WHITEOUT_DEV)

View File

@@ -16,6 +16,7 @@
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <uapi/linux/fscrypt.h>
#define FS_CRYPTO_BLOCK_SIZE 16
@@ -42,7 +43,7 @@ struct fscrypt_name {
#define fname_len(p) ((p)->disk_name.len)
/* Maximum value for the third parameter of fscrypt_operations.set_context(). */
#define FSCRYPT_SET_CONTEXT_MAX_SIZE 28
#define FSCRYPT_SET_CONTEXT_MAX_SIZE 40
#ifdef CONFIG_FS_ENCRYPTION
/*
@@ -134,13 +135,23 @@ extern void fscrypt_free_bounce_page(struct page *bounce_page);
/* policy.c */
extern int fscrypt_ioctl_set_policy(struct file *, const void __user *);
extern int fscrypt_ioctl_get_policy(struct file *, void __user *);
extern int fscrypt_ioctl_get_policy_ex(struct file *, void __user *);
extern int fscrypt_has_permitted_context(struct inode *, struct inode *);
extern int fscrypt_inherit_context(struct inode *, struct inode *,
void *, bool);
/* keyinfo.c */
/* keyring.c */
extern void fscrypt_sb_free(struct super_block *sb);
extern int fscrypt_ioctl_add_key(struct file *filp, void __user *arg);
extern int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg);
extern int fscrypt_ioctl_remove_key_all_users(struct file *filp,
void __user *arg);
extern int fscrypt_ioctl_get_key_status(struct file *filp, void __user *arg);
/* keysetup.c */
extern int fscrypt_get_encryption_info(struct inode *);
extern void fscrypt_put_encryption_info(struct inode *);
extern void fscrypt_free_inode(struct inode *);
extern int fscrypt_drop_inode(struct inode *inode);
/* fname.c */
extern int fscrypt_setup_filename(struct inode *, const struct qstr *,
@@ -348,6 +359,12 @@ static inline int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg)
return -EOPNOTSUPP;
}
static inline int fscrypt_ioctl_get_policy_ex(struct file *filp,
void __user *arg)
{
return -EOPNOTSUPP;
}
static inline int fscrypt_has_permitted_context(struct inode *parent,
struct inode *child)
{
@@ -361,7 +378,34 @@ static inline int fscrypt_inherit_context(struct inode *parent,
return -EOPNOTSUPP;
}
/* keyinfo.c */
/* keyring.c */
static inline void fscrypt_sb_free(struct super_block *sb)
{
}
static inline int fscrypt_ioctl_add_key(struct file *filp, void __user *arg)
{
return -EOPNOTSUPP;
}
static inline int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg)
{
return -EOPNOTSUPP;
}
static inline int fscrypt_ioctl_remove_key_all_users(struct file *filp,
void __user *arg)
{
return -EOPNOTSUPP;
}
static inline int fscrypt_ioctl_get_key_status(struct file *filp,
void __user *arg)
{
return -EOPNOTSUPP;
}
/* keysetup.c */
static inline int fscrypt_get_encryption_info(struct inode *inode)
{
return -EOPNOTSUPP;
@@ -376,6 +420,11 @@ static inline void fscrypt_free_inode(struct inode *inode)
{
}
static inline int fscrypt_drop_inode(struct inode *inode)
{
return 0;
}
/* fname.c */
static inline int fscrypt_setup_filename(struct inode *dir,
const struct qstr *iname,

211
include/linux/fsverity.h Normal file
View File

@@ -0,0 +1,211 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* fs-verity: read-only file-based authenticity protection
*
* This header declares the interface between the fs/verity/ support layer and
* filesystems that support fs-verity.
*
* Copyright 2019 Google LLC
*/
#ifndef _LINUX_FSVERITY_H
#define _LINUX_FSVERITY_H
#include <linux/fs.h>
#include <uapi/linux/fsverity.h>
/* Verity operations for filesystems */
struct fsverity_operations {
/**
* Begin enabling verity on the given file.
*
* @filp: a readonly file descriptor for the file
*
* The filesystem must do any needed filesystem-specific preparations
* for enabling verity, e.g. evicting inline data. It also must return
* -EBUSY if verity is already being enabled on the given file.
*
* i_rwsem is held for write.
*
* Return: 0 on success, -errno on failure
*/
int (*begin_enable_verity)(struct file *filp);
/**
* End enabling verity on the given file.
*
* @filp: a readonly file descriptor for the file
* @desc: the verity descriptor to write, or NULL on failure
* @desc_size: size of verity descriptor, or 0 on failure
* @merkle_tree_size: total bytes the Merkle tree took up
*
* If desc == NULL, then enabling verity failed and the filesystem only
* must do any necessary cleanups. Else, it must also store the given
* verity descriptor to a fs-specific location associated with the inode
* and do any fs-specific actions needed to mark the inode as a verity
* inode, e.g. setting a bit in the on-disk inode. The filesystem is
* also responsible for setting the S_VERITY flag in the VFS inode.
*
* i_rwsem is held for write, but it may have been dropped between
* ->begin_enable_verity() and ->end_enable_verity().
*
* Return: 0 on success, -errno on failure
*/
int (*end_enable_verity)(struct file *filp, const void *desc,
size_t desc_size, u64 merkle_tree_size);
/**
* Get the verity descriptor of the given inode.
*
* @inode: an inode with the S_VERITY flag set
* @buf: buffer in which to place the verity descriptor
* @bufsize: size of @buf, or 0 to retrieve the size only
*
* If bufsize == 0, then the size of the verity descriptor is returned.
* Otherwise the verity descriptor is written to 'buf' and its actual
* size is returned; -ERANGE is returned if it's too large. This may be
* called by multiple processes concurrently on the same inode.
*
* Return: the size on success, -errno on failure
*/
int (*get_verity_descriptor)(struct inode *inode, void *buf,
size_t bufsize);
/**
* Read a Merkle tree page of the given inode.
*
* @inode: the inode
* @index: 0-based index of the page within the Merkle tree
*
* This can be called at any time on an open verity file, as well as
* between ->begin_enable_verity() and ->end_enable_verity(). It may be
* called by multiple processes concurrently, even with the same page.
*
* Note that this must retrieve a *page*, not necessarily a *block*.
*
* Return: the page on success, ERR_PTR() on failure
*/
struct page *(*read_merkle_tree_page)(struct inode *inode,
pgoff_t index);
/**
* Write a Merkle tree block to the given inode.
*
* @inode: the inode for which the Merkle tree is being built
* @buf: block to write
* @index: 0-based index of the block within the Merkle tree
* @log_blocksize: log base 2 of the Merkle tree block size
*
* This is only called between ->begin_enable_verity() and
* ->end_enable_verity().
*
* Return: 0 on success, -errno on failure
*/
int (*write_merkle_tree_block)(struct inode *inode, const void *buf,
u64 index, int log_blocksize);
};
#ifdef CONFIG_FS_VERITY
static inline struct fsverity_info *fsverity_get_info(const struct inode *inode)
{
/* pairs with the cmpxchg() in fsverity_set_info() */
return READ_ONCE(inode->i_verity_info);
}
/* enable.c */
extern int fsverity_ioctl_enable(struct file *filp, const void __user *arg);
/* measure.c */
extern int fsverity_ioctl_measure(struct file *filp, void __user *arg);
/* open.c */
extern int fsverity_file_open(struct inode *inode, struct file *filp);
extern int fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr);
extern void fsverity_cleanup_inode(struct inode *inode);
/* verify.c */
extern bool fsverity_verify_page(struct page *page);
extern void fsverity_verify_bio(struct bio *bio);
extern void fsverity_enqueue_verify_work(struct work_struct *work);
#else /* !CONFIG_FS_VERITY */
static inline struct fsverity_info *fsverity_get_info(const struct inode *inode)
{
return NULL;
}
/* enable.c */
static inline int fsverity_ioctl_enable(struct file *filp,
const void __user *arg)
{
return -EOPNOTSUPP;
}
/* measure.c */
static inline int fsverity_ioctl_measure(struct file *filp, void __user *arg)
{
return -EOPNOTSUPP;
}
/* open.c */
static inline int fsverity_file_open(struct inode *inode, struct file *filp)
{
return IS_VERITY(inode) ? -EOPNOTSUPP : 0;
}
static inline int fsverity_prepare_setattr(struct dentry *dentry,
struct iattr *attr)
{
return IS_VERITY(d_inode(dentry)) ? -EOPNOTSUPP : 0;
}
static inline void fsverity_cleanup_inode(struct inode *inode)
{
}
/* verify.c */
static inline bool fsverity_verify_page(struct page *page)
{
WARN_ON(1);
return false;
}
static inline void fsverity_verify_bio(struct bio *bio)
{
WARN_ON(1);
}
static inline void fsverity_enqueue_verify_work(struct work_struct *work)
{
WARN_ON(1);
}
#endif /* !CONFIG_FS_VERITY */
/**
* fsverity_active() - do reads from the inode need to go through fs-verity?
*
* This checks whether ->i_verity_info has been set.
*
* Filesystems call this from ->readpages() to check whether the pages need to
* be verified or not. Don't use IS_VERITY() for this purpose; it's subject to
* a race condition where the file is being read concurrently with
* FS_IOC_ENABLE_VERITY completing. (S_VERITY is set before ->i_verity_info.)
*/
static inline bool fsverity_active(const struct inode *inode)
{
return fsverity_get_info(inode) != NULL;
}
#endif /* _LINUX_FSVERITY_H */

33
include/linux/unicode.h Normal file
View File

@@ -0,0 +1,33 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_UNICODE_H
#define _LINUX_UNICODE_H
#include <linux/init.h>
#include <linux/dcache.h>
struct unicode_map {
const char *charset;
int version;
};
int utf8_validate(const struct unicode_map *um, const struct qstr *str);
int utf8_strncmp(const struct unicode_map *um,
const struct qstr *s1, const struct qstr *s2);
int utf8_strncasecmp(const struct unicode_map *um,
const struct qstr *s1, const struct qstr *s2);
int utf8_strncasecmp_folded(const struct unicode_map *um,
const struct qstr *cf,
const struct qstr *s1);
int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
unsigned char *dest, size_t dlen);
int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
unsigned char *dest, size_t dlen);
struct unicode_map *utf8_load(const char *version);
void utf8_unload(struct unicode_map *um);
#endif /* _LINUX_UNICODE_H */

View File

@@ -13,6 +13,9 @@
#include <linux/limits.h>
#include <linux/ioctl.h>
#include <linux/types.h>
#ifndef __KERNEL__
#include <linux/fscrypt.h>
#endif
/*
* It's silly to have NR_OPEN bigger than NR_FILE, but you can change
@@ -258,57 +261,6 @@ struct fsxattr {
#define FS_IOC_GETFSLABEL _IOR(0x94, 49, char[FSLABEL_MAX])
#define FS_IOC_SETFSLABEL _IOW(0x94, 50, char[FSLABEL_MAX])
/*
* File system encryption support
*/
/* Policy provided via an ioctl on the topmost directory */
#define FS_KEY_DESCRIPTOR_SIZE 8
#define FS_POLICY_FLAGS_PAD_4 0x00
#define FS_POLICY_FLAGS_PAD_8 0x01
#define FS_POLICY_FLAGS_PAD_16 0x02
#define FS_POLICY_FLAGS_PAD_32 0x03
#define FS_POLICY_FLAGS_PAD_MASK 0x03
#define FS_POLICY_FLAG_DIRECT_KEY 0x04 /* use master key directly */
#define FS_POLICY_FLAGS_VALID 0x07
/* Encryption algorithms */
#define FS_ENCRYPTION_MODE_INVALID 0
#define FS_ENCRYPTION_MODE_AES_256_XTS 1
#define FS_ENCRYPTION_MODE_AES_256_GCM 2
#define FS_ENCRYPTION_MODE_AES_256_CBC 3
#define FS_ENCRYPTION_MODE_AES_256_CTS 4
#define FS_ENCRYPTION_MODE_AES_128_CBC 5
#define FS_ENCRYPTION_MODE_AES_128_CTS 6
#define FS_ENCRYPTION_MODE_SPECK128_256_XTS 7 /* Removed, do not use. */
#define FS_ENCRYPTION_MODE_SPECK128_256_CTS 8 /* Removed, do not use. */
#define FS_ENCRYPTION_MODE_ADIANTUM 9
struct fscrypt_policy {
__u8 version;
__u8 contents_encryption_mode;
__u8 filenames_encryption_mode;
__u8 flags;
__u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
};
#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy)
#define FS_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16])
#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy)
/* Parameters for passing an encryption key into the kernel keyring */
#define FS_KEY_DESC_PREFIX "fscrypt:"
#define FS_KEY_DESC_PREFIX_SIZE 8
/* Structure that userspace passes to the kernel keyring */
#define FS_MAX_KEY_SIZE 64
struct fscrypt_key {
__u32 mode;
__u8 raw[FS_MAX_KEY_SIZE];
__u32 size;
};
/*
* Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
*
@@ -352,11 +304,13 @@ struct fscrypt_key {
#define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
#define FS_HUGE_FILE_FL 0x00040000 /* Reserved for ext4 */
#define FS_EXTENT_FL 0x00080000 /* Extents */
#define FS_VERITY_FL 0x00100000 /* Verity protected inode */
#define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */
#define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */
#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
#define FS_INLINE_DATA_FL 0x10000000 /* Reserved for ext4 */
#define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
#define FS_CASEFOLD_FL 0x40000000 /* Folder is case insensitive */
#define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */
#define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */

View File

@@ -0,0 +1,181 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* fscrypt user API
*
* These ioctls can be used on filesystems that support fscrypt. See the
* "User API" section of Documentation/filesystems/fscrypt.rst.
*/
#ifndef _UAPI_LINUX_FSCRYPT_H
#define _UAPI_LINUX_FSCRYPT_H
#include <linux/types.h>
/* Encryption policy flags */
#define FSCRYPT_POLICY_FLAGS_PAD_4 0x00
#define FSCRYPT_POLICY_FLAGS_PAD_8 0x01
#define FSCRYPT_POLICY_FLAGS_PAD_16 0x02
#define FSCRYPT_POLICY_FLAGS_PAD_32 0x03
#define FSCRYPT_POLICY_FLAGS_PAD_MASK 0x03
#define FSCRYPT_POLICY_FLAG_DIRECT_KEY 0x04
#define FSCRYPT_POLICY_FLAGS_VALID 0x07
/* Encryption algorithms */
#define FSCRYPT_MODE_AES_256_XTS 1
#define FSCRYPT_MODE_AES_256_CTS 4
#define FSCRYPT_MODE_AES_128_CBC 5
#define FSCRYPT_MODE_AES_128_CTS 6
#define FSCRYPT_MODE_ADIANTUM 9
#define __FSCRYPT_MODE_MAX 9
/*
* Legacy policy version; ad-hoc KDF and no key verification.
* For new encrypted directories, use fscrypt_policy_v2 instead.
*
* Careful: the .version field for this is actually 0, not 1.
*/
#define FSCRYPT_POLICY_V1 0
#define FSCRYPT_KEY_DESCRIPTOR_SIZE 8
struct fscrypt_policy_v1 {
__u8 version;
__u8 contents_encryption_mode;
__u8 filenames_encryption_mode;
__u8 flags;
__u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
};
#define fscrypt_policy fscrypt_policy_v1
/*
* Process-subscribed "logon" key description prefix and payload format.
* Deprecated; prefer FS_IOC_ADD_ENCRYPTION_KEY instead.
*/
#define FSCRYPT_KEY_DESC_PREFIX "fscrypt:"
#define FSCRYPT_KEY_DESC_PREFIX_SIZE 8
#define FSCRYPT_MAX_KEY_SIZE 64
struct fscrypt_key {
__u32 mode;
__u8 raw[FSCRYPT_MAX_KEY_SIZE];
__u32 size;
};
/*
* New policy version with HKDF and key verification (recommended).
*/
#define FSCRYPT_POLICY_V2 2
#define FSCRYPT_KEY_IDENTIFIER_SIZE 16
struct fscrypt_policy_v2 {
__u8 version;
__u8 contents_encryption_mode;
__u8 filenames_encryption_mode;
__u8 flags;
__u8 __reserved[4];
__u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE];
};
/* Struct passed to FS_IOC_GET_ENCRYPTION_POLICY_EX */
struct fscrypt_get_policy_ex_arg {
__u64 policy_size; /* input/output */
union {
__u8 version;
struct fscrypt_policy_v1 v1;
struct fscrypt_policy_v2 v2;
} policy; /* output */
};
/*
* v1 policy keys are specified by an arbitrary 8-byte key "descriptor",
* matching fscrypt_policy_v1::master_key_descriptor.
*/
#define FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR 1
/*
* v2 policy keys are specified by a 16-byte key "identifier" which the kernel
* calculates as a cryptographic hash of the key itself,
* matching fscrypt_policy_v2::master_key_identifier.
*/
#define FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER 2
/*
* Specifies a key, either for v1 or v2 policies. This doesn't contain the
* actual key itself; this is just the "name" of the key.
*/
struct fscrypt_key_specifier {
__u32 type; /* one of FSCRYPT_KEY_SPEC_TYPE_* */
__u32 __reserved;
union {
__u8 __reserved[32]; /* reserve some extra space */
__u8 descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
__u8 identifier[FSCRYPT_KEY_IDENTIFIER_SIZE];
} u;
};
/* Struct passed to FS_IOC_ADD_ENCRYPTION_KEY */
struct fscrypt_add_key_arg {
struct fscrypt_key_specifier key_spec;
__u32 raw_size;
__u32 __reserved[9];
__u8 raw[];
};
/* Struct passed to FS_IOC_REMOVE_ENCRYPTION_KEY */
struct fscrypt_remove_key_arg {
struct fscrypt_key_specifier key_spec;
#define FSCRYPT_KEY_REMOVAL_STATUS_FLAG_FILES_BUSY 0x00000001
#define FSCRYPT_KEY_REMOVAL_STATUS_FLAG_OTHER_USERS 0x00000002
__u32 removal_status_flags; /* output */
__u32 __reserved[5];
};
/* Struct passed to FS_IOC_GET_ENCRYPTION_KEY_STATUS */
struct fscrypt_get_key_status_arg {
/* input */
struct fscrypt_key_specifier key_spec;
__u32 __reserved[6];
/* output */
#define FSCRYPT_KEY_STATUS_ABSENT 1
#define FSCRYPT_KEY_STATUS_PRESENT 2
#define FSCRYPT_KEY_STATUS_INCOMPLETELY_REMOVED 3
__u32 status;
#define FSCRYPT_KEY_STATUS_FLAG_ADDED_BY_SELF 0x00000001
__u32 status_flags;
__u32 user_count;
__u32 __out_reserved[13];
};
#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy)
#define FS_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16])
#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy)
#define FS_IOC_GET_ENCRYPTION_POLICY_EX _IOWR('f', 22, __u8[9]) /* size + version */
#define FS_IOC_ADD_ENCRYPTION_KEY _IOWR('f', 23, struct fscrypt_add_key_arg)
#define FS_IOC_REMOVE_ENCRYPTION_KEY _IOWR('f', 24, struct fscrypt_remove_key_arg)
#define FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS _IOWR('f', 25, struct fscrypt_remove_key_arg)
#define FS_IOC_GET_ENCRYPTION_KEY_STATUS _IOWR('f', 26, struct fscrypt_get_key_status_arg)
/**********************************************************************/
/* old names; don't add anything new here! */
#ifndef __KERNEL__
#define FS_KEY_DESCRIPTOR_SIZE FSCRYPT_KEY_DESCRIPTOR_SIZE
#define FS_POLICY_FLAGS_PAD_4 FSCRYPT_POLICY_FLAGS_PAD_4
#define FS_POLICY_FLAGS_PAD_8 FSCRYPT_POLICY_FLAGS_PAD_8
#define FS_POLICY_FLAGS_PAD_16 FSCRYPT_POLICY_FLAGS_PAD_16
#define FS_POLICY_FLAGS_PAD_32 FSCRYPT_POLICY_FLAGS_PAD_32
#define FS_POLICY_FLAGS_PAD_MASK FSCRYPT_POLICY_FLAGS_PAD_MASK
#define FS_POLICY_FLAG_DIRECT_KEY FSCRYPT_POLICY_FLAG_DIRECT_KEY
#define FS_POLICY_FLAGS_VALID FSCRYPT_POLICY_FLAGS_VALID
#define FS_ENCRYPTION_MODE_INVALID 0 /* never used */
#define FS_ENCRYPTION_MODE_AES_256_XTS FSCRYPT_MODE_AES_256_XTS
#define FS_ENCRYPTION_MODE_AES_256_GCM 2 /* never used */
#define FS_ENCRYPTION_MODE_AES_256_CBC 3 /* never used */
#define FS_ENCRYPTION_MODE_AES_256_CTS FSCRYPT_MODE_AES_256_CTS
#define FS_ENCRYPTION_MODE_AES_128_CBC FSCRYPT_MODE_AES_128_CBC
#define FS_ENCRYPTION_MODE_AES_128_CTS FSCRYPT_MODE_AES_128_CTS
#define FS_ENCRYPTION_MODE_SPECK128_256_XTS 7 /* removed */
#define FS_ENCRYPTION_MODE_SPECK128_256_CTS 8 /* removed */
#define FS_ENCRYPTION_MODE_ADIANTUM FSCRYPT_MODE_ADIANTUM
#define FS_KEY_DESC_PREFIX FSCRYPT_KEY_DESC_PREFIX
#define FS_KEY_DESC_PREFIX_SIZE FSCRYPT_KEY_DESC_PREFIX_SIZE
#define FS_MAX_KEY_SIZE FSCRYPT_MAX_KEY_SIZE
#endif /* !__KERNEL__ */
#endif /* _UAPI_LINUX_FSCRYPT_H */

View File

@@ -0,0 +1,40 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* fs-verity user API
*
* These ioctls can be used on filesystems that support fs-verity. See the
* "User API" section of Documentation/filesystems/fsverity.rst.
*
* Copyright 2019 Google LLC
*/
#ifndef _UAPI_LINUX_FSVERITY_H
#define _UAPI_LINUX_FSVERITY_H
#include <linux/ioctl.h>
#include <linux/types.h>
#define FS_VERITY_HASH_ALG_SHA256 1
#define FS_VERITY_HASH_ALG_SHA512 2
struct fsverity_enable_arg {
__u32 version;
__u32 hash_algorithm;
__u32 block_size;
__u32 salt_size;
__u64 salt_ptr;
__u32 sig_size;
__u32 __reserved1;
__u64 sig_ptr;
__u64 __reserved2[11];
};
struct fsverity_digest {
__u16 digest_algorithm;
__u16 digest_size; /* input/output */
__u8 digest[];
};
#define FS_IOC_ENABLE_VERITY _IOW('f', 133, struct fsverity_enable_arg)
#define FS_IOC_MEASURE_VERITY _IOWR('f', 134, struct fsverity_digest)
#endif /* _UAPI_LINUX_FSVERITY_H */