mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-06 02:50:49 +09:00
Merge remote-tracking branch 'aosp/upstream-f2fs-stable-linux-5.10.y' into android13-5.10
* aosp/upstream-f2fs-stable-linux-5.10.y: fscrypt: fix derivation of SipHash keys on big endian CPUs fscrypt: don't ignore minor_hash when hash is 0 erofs: remove useless cache strategy of DELAYEDALLOC erofs: fix unsafe pagevec reuse of hooked pclusters erofs: don't trigger WARN() when decompression fails erofs: get rid of ->lru usage erofs: lzma compression support erofs: rename some generic methods in decompressor lib/xz, lib/decompress_unxz.c: Fix spelling in comments lib/xz: Add MicroLZMA decoder lib/xz: Move s->lzma.len = 0 initialization to lzma_reset() lib/xz: Validate the value before assigning it to an enum variable lib/xz: Avoid overlapping memcpy() with invalid input with in-place decompression erofs: introduce the secondary compression head erofs: get compression algorithms directly on mapping erofs: add multiple device support erofs: decouple basic mount options from fs_context erofs: remove the fast path of per-CPU buffer decompression fscrypt: improve a few comments fscrypt: allow 256-bit master keys with AES-256-XTS fscrypt: improve documentation for inline encryption fscrypt: clean up comments in bio.c fscrypt: remove fscrypt_operations::max_namelen fs-verity: fix signed integer overflow with i_size near S64_MAX fscrypt: document struct fscrypt_operations fscrypt: align Base64 encoding with RFC 4648 base64url fscrypt: remove mention of symlink st_size quirk from documentation f2fs: report correct st_size for encrypted symlinks ext4: report correct st_size for encrypted symlinks fscrypt: add fscrypt_symlink_getattr() for computing st_size Bug: 199807319 Signed-off-by: Jaegeuk Kim <jaegeuk@google.com> Change-Id: Iddcef4e9c4be2e1185f57695d7a42d969dc36823
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
.. _inline_encryption:
|
||||
|
||||
=================
|
||||
Inline Encryption
|
||||
=================
|
||||
|
||||
@@ -19,9 +19,10 @@ It is designed as a better filesystem solution for the following scenarios:
|
||||
immutable and bit-for-bit identical to the official golden image for
|
||||
their releases due to security and other considerations and
|
||||
|
||||
- hope to save some extra storage space with guaranteed end-to-end performance
|
||||
by using reduced metadata and transparent file compression, especially
|
||||
for those embedded devices with limited memory (ex, smartphone);
|
||||
- hope to minimize extra storage space with guaranteed end-to-end performance
|
||||
by using compact layout, transparent file compression and direct access,
|
||||
especially for those embedded devices with limited memory and high-density
|
||||
hosts with numerous containers;
|
||||
|
||||
Here is the main features of EROFS:
|
||||
|
||||
@@ -51,7 +52,9 @@ Here is the main features of EROFS:
|
||||
- Support POSIX.1e ACLs by using xattrs;
|
||||
|
||||
- Support transparent data compression as an option:
|
||||
LZ4 algorithm with the fixed-sized output compression for high performance.
|
||||
LZ4 algorithm with the fixed-sized output compression for high performance;
|
||||
|
||||
- Multiple device support for multi-layer container images.
|
||||
|
||||
The following git tree provides the file system user-space tools under
|
||||
development (ex, formatting tool mkfs.erofs):
|
||||
@@ -87,6 +90,7 @@ cache_strategy=%s Select a strategy for cached decompression from now on:
|
||||
dax={always,never} Use direct access (no page cache). See
|
||||
Documentation/filesystems/dax.rst.
|
||||
dax A legacy option which is an alias for ``dax=always``.
|
||||
device=%s Specify a path to an extra device to be used together.
|
||||
=================== =========================================================
|
||||
|
||||
On-disk details
|
||||
|
||||
@@ -77,11 +77,11 @@ Side-channel attacks
|
||||
|
||||
fscrypt is only resistant to side-channel attacks, such as timing or
|
||||
electromagnetic attacks, to the extent that the underlying Linux
|
||||
Cryptographic API algorithms are. If a vulnerable algorithm is used,
|
||||
such as a table-based implementation of AES, it may be possible for an
|
||||
attacker to mount a side channel attack against the online system.
|
||||
Side channel attacks may also be mounted against applications
|
||||
consuming decrypted data.
|
||||
Cryptographic API algorithms or inline encryption hardware are. If a
|
||||
vulnerable algorithm is used, such as a table-based implementation of
|
||||
AES, it may be possible for an attacker to mount a side channel attack
|
||||
against the online system. Side channel attacks may also be mounted
|
||||
against applications consuming decrypted data.
|
||||
|
||||
Unauthorized file access
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
@@ -1063,11 +1063,6 @@ astute users may notice some differences in behavior:
|
||||
|
||||
- DAX (Direct Access) is not supported on encrypted files.
|
||||
|
||||
- The st_size of an encrypted symlink will not necessarily give the
|
||||
length of the symlink target as required by POSIX. It will actually
|
||||
give the length of the ciphertext, which will be slightly longer
|
||||
than the plaintext due to NUL-padding and an extra 2-byte overhead.
|
||||
|
||||
- The maximum length of an encrypted symlink is 2 bytes shorter than
|
||||
the maximum length of an unencrypted symlink. For example, on an
|
||||
EXT4 filesystem with a 4K block size, unencrypted symlinks can be up
|
||||
@@ -1140,6 +1135,50 @@ where applications may later write sensitive data. It is recommended
|
||||
that systems implementing a form of "verified boot" take advantage of
|
||||
this by validating all top-level encryption policies prior to access.
|
||||
|
||||
Inline encryption support
|
||||
=========================
|
||||
|
||||
By default, fscrypt uses the kernel crypto API for all cryptographic
|
||||
operations (other than HKDF, which fscrypt partially implements
|
||||
itself). The kernel crypto API supports hardware crypto accelerators,
|
||||
but only ones that work in the traditional way where all inputs and
|
||||
outputs (e.g. plaintexts and ciphertexts) are in memory. fscrypt can
|
||||
take advantage of such hardware, but the traditional acceleration
|
||||
model isn't particularly efficient and fscrypt hasn't been optimized
|
||||
for it.
|
||||
|
||||
Instead, many newer systems (especially mobile SoCs) have *inline
|
||||
encryption hardware* that can encrypt/decrypt data while it is on its
|
||||
way to/from the storage device. Linux supports inline encryption
|
||||
through a set of extensions to the block layer called *blk-crypto*.
|
||||
blk-crypto allows filesystems to attach encryption contexts to bios
|
||||
(I/O requests) to specify how the data will be encrypted or decrypted
|
||||
in-line. For more information about blk-crypto, see
|
||||
:ref:`Documentation/block/inline-encryption.rst <inline_encryption>`.
|
||||
|
||||
On supported filesystems (currently ext4 and f2fs), fscrypt can use
|
||||
blk-crypto instead of the kernel crypto API to encrypt/decrypt file
|
||||
contents. To enable this, set CONFIG_FS_ENCRYPTION_INLINE_CRYPT=y in
|
||||
the kernel configuration, and specify the "inlinecrypt" mount option
|
||||
when mounting the filesystem.
|
||||
|
||||
Note that the "inlinecrypt" mount option just specifies to use inline
|
||||
encryption when possible; it doesn't force its use. fscrypt will
|
||||
still fall back to using the kernel crypto API on files where the
|
||||
inline encryption hardware doesn't have the needed crypto capabilities
|
||||
(e.g. support for the needed encryption algorithm and data unit size)
|
||||
and where blk-crypto-fallback is unusable. (For blk-crypto-fallback
|
||||
to be usable, it must be enabled in the kernel configuration with
|
||||
CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y.)
|
||||
|
||||
Currently fscrypt always uses the filesystem block size (which is
|
||||
usually 4096 bytes) as the data unit size. Therefore, it can only use
|
||||
inline encryption hardware that supports that data unit size.
|
||||
|
||||
Inline encryption doesn't affect the ciphertext or other aspects of
|
||||
the on-disk format, so users may freely switch back and forth between
|
||||
using "inlinecrypt" and not using "inlinecrypt".
|
||||
|
||||
Implementation details
|
||||
======================
|
||||
|
||||
@@ -1189,6 +1228,13 @@ keys`_ and `DIRECT_KEY policies`_.
|
||||
Data path changes
|
||||
-----------------
|
||||
|
||||
When inline encryption is used, filesystems just need to associate
|
||||
encryption contexts with bios to specify how the block layer or the
|
||||
inline encryption hardware will encrypt/decrypt the file contents.
|
||||
|
||||
When inline encryption isn't used, filesystems must encrypt/decrypt
|
||||
the file contents themselves, as described below:
|
||||
|
||||
For the read path (->readpage()) of regular files, filesystems can
|
||||
read the ciphertext into the page cache and decrypt it in-place. The
|
||||
page lock must be held until decryption has finished, to prevent the
|
||||
@@ -1202,18 +1248,6 @@ buffer. Some filesystems, such as UBIFS, already use temporary
|
||||
buffers regardless of encryption. Other filesystems, such as ext4 and
|
||||
F2FS, have to allocate bounce pages specially for encryption.
|
||||
|
||||
Fscrypt is also able to use inline encryption hardware instead of the
|
||||
kernel crypto API for en/decryption of file contents. When possible,
|
||||
and if directed to do so (by specifying the 'inlinecrypt' mount option
|
||||
for an ext4/F2FS filesystem), it adds encryption contexts to bios and
|
||||
uses blk-crypto to perform the en/decryption instead of making use of
|
||||
the above read/write path changes. Of course, even if directed to
|
||||
make use of inline encryption, fscrypt will only be able to do so if
|
||||
either hardware inline encryption support is available for the
|
||||
selected encryption algorithm or CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK
|
||||
is selected. If neither is the case, fscrypt will fall back to using
|
||||
the above mentioned read/write path changes for en/decryption.
|
||||
|
||||
Filename hashing and encoding
|
||||
-----------------------------
|
||||
|
||||
@@ -1235,12 +1269,12 @@ the user-supplied name to get the ciphertext.
|
||||
|
||||
Lookups without the key are more complicated. The raw ciphertext may
|
||||
contain the ``\0`` and ``/`` characters, which are illegal in
|
||||
filenames. Therefore, readdir() must base64-encode the ciphertext for
|
||||
presentation. For most filenames, this works fine; on ->lookup(), the
|
||||
filesystem just base64-decodes the user-supplied name to get back to
|
||||
the raw ciphertext.
|
||||
filenames. Therefore, readdir() must base64url-encode the ciphertext
|
||||
for presentation. For most filenames, this works fine; on ->lookup(),
|
||||
the filesystem just base64url-decodes the user-supplied name to get
|
||||
back to the raw ciphertext.
|
||||
|
||||
However, for very long filenames, base64 encoding would cause the
|
||||
However, for very long filenames, base64url encoding would cause the
|
||||
filename length to exceed NAME_MAX. To prevent this, readdir()
|
||||
actually presents long filenames in an abbreviated form which encodes
|
||||
a strong "hash" of the ciphertext filename, along with the optional
|
||||
|
||||
@@ -1,23 +1,10 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* This contains encryption functions for per-file encryption.
|
||||
* Utility functions for file contents encryption/decryption on
|
||||
* block device-based filesystems.
|
||||
*
|
||||
* Copyright (C) 2015, Google, Inc.
|
||||
* Copyright (C) 2015, Motorola Mobility
|
||||
*
|
||||
* Written by Michael Halcrow, 2014.
|
||||
*
|
||||
* Filename encryption additions
|
||||
* Uday Savagaonkar, 2014
|
||||
* Encryption policy handling additions
|
||||
* Ildar Muslukhov, 2014
|
||||
* Add fscrypt_pullback_bio_page()
|
||||
* Jaegeuk Kim, 2015.
|
||||
*
|
||||
* This has not yet undergone a rigorous security audit.
|
||||
*
|
||||
* The usage of AES-XTS should conform to recommendations in NIST
|
||||
* Special Publication 800-38E and IEEE P1619/D16.
|
||||
*/
|
||||
|
||||
#include <linux/pagemap.h>
|
||||
@@ -26,6 +13,21 @@
|
||||
#include <linux/namei.h>
|
||||
#include "fscrypt_private.h"
|
||||
|
||||
/**
|
||||
* fscrypt_decrypt_bio() - decrypt the contents of a bio
|
||||
* @bio: the bio to decrypt
|
||||
*
|
||||
* Decrypt the contents of a "read" bio following successful completion of the
|
||||
* underlying disk read. The bio must be reading a whole number of blocks of an
|
||||
* encrypted file directly into the page cache. If the bio is reading the
|
||||
* ciphertext into bounce pages instead of the page cache (for example, because
|
||||
* the file is also compressed, so decompression is required after decryption),
|
||||
* then this function isn't applicable. This function may sleep, so it must be
|
||||
* called from a workqueue rather than from the bio's bi_end_io callback.
|
||||
*
|
||||
* This function sets PG_error on any pages that contain any blocks that failed
|
||||
* to be decrypted. The filesystem must not mark such pages uptodate.
|
||||
*/
|
||||
void fscrypt_decrypt_bio(struct bio *bio)
|
||||
{
|
||||
struct bio_vec *bv;
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
* it to find the directory entry again if requested. Naively, that would just
|
||||
* mean using the ciphertext filenames. However, since the ciphertext filenames
|
||||
* can contain illegal characters ('\0' and '/'), they must be encoded in some
|
||||
* way. We use base64. But that can cause names to exceed NAME_MAX (255
|
||||
* way. We use base64url. But that can cause names to exceed NAME_MAX (255
|
||||
* bytes), so we also need to use a strong hash to abbreviate long names.
|
||||
*
|
||||
* The filesystem may also need another kind of hash, the "dirhash", to quickly
|
||||
@@ -38,7 +38,7 @@
|
||||
* casefolded directories use this type of dirhash. At least in these cases,
|
||||
* each no-key name must include the name's dirhash too.
|
||||
*
|
||||
* To meet all these requirements, we base64-encode the following
|
||||
* To meet all these requirements, we base64url-encode the following
|
||||
* variable-length structure. It contains the dirhash, or 0's if the filesystem
|
||||
* didn't provide one; up to 149 bytes of the ciphertext name; and for
|
||||
* ciphertexts longer than 149 bytes, also the SHA-256 of the remaining bytes.
|
||||
@@ -52,15 +52,19 @@ struct fscrypt_nokey_name {
|
||||
u32 dirhash[2];
|
||||
u8 bytes[149];
|
||||
u8 sha256[SHA256_DIGEST_SIZE];
|
||||
}; /* 189 bytes => 252 bytes base64-encoded, which is <= NAME_MAX (255) */
|
||||
}; /* 189 bytes => 252 bytes base64url-encoded, which is <= NAME_MAX (255) */
|
||||
|
||||
/*
|
||||
* Decoded size of max-size nokey name, i.e. a name that was abbreviated using
|
||||
* Decoded size of max-size no-key name, i.e. a name that was abbreviated using
|
||||
* the strong hash and thus includes the 'sha256' field. This isn't simply
|
||||
* sizeof(struct fscrypt_nokey_name), as the padding at the end isn't included.
|
||||
*/
|
||||
#define FSCRYPT_NOKEY_NAME_MAX offsetofend(struct fscrypt_nokey_name, sha256)
|
||||
|
||||
/* Encoded size of max-size no-key name */
|
||||
#define FSCRYPT_NOKEY_NAME_MAX_ENCODED \
|
||||
FSCRYPT_BASE64URL_CHARS(FSCRYPT_NOKEY_NAME_MAX)
|
||||
|
||||
static inline bool fscrypt_is_dot_dotdot(const struct qstr *str)
|
||||
{
|
||||
if (str->len == 1 && str->name[0] == '.')
|
||||
@@ -175,62 +179,82 @@ static int fname_decrypt(const struct inode *inode,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char lookup_table[65] =
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
|
||||
static const char base64url_table[65] =
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
|
||||
|
||||
#define BASE64_CHARS(nbytes) DIV_ROUND_UP((nbytes) * 4, 3)
|
||||
#define FSCRYPT_BASE64URL_CHARS(nbytes) DIV_ROUND_UP((nbytes) * 4, 3)
|
||||
|
||||
/**
|
||||
* base64_encode() - base64-encode some bytes
|
||||
* @src: the bytes to encode
|
||||
* @len: number of bytes to encode
|
||||
* @dst: (output) the base64-encoded string. Not NUL-terminated.
|
||||
* fscrypt_base64url_encode() - base64url-encode some binary data
|
||||
* @src: the binary data to encode
|
||||
* @srclen: the length of @src in bytes
|
||||
* @dst: (output) the base64url-encoded string. Not NUL-terminated.
|
||||
*
|
||||
* Encodes the input string using characters from the set [A-Za-z0-9+,].
|
||||
* The encoded string is roughly 4/3 times the size of the input string.
|
||||
* Encodes data using base64url encoding, i.e. the "Base 64 Encoding with URL
|
||||
* and Filename Safe Alphabet" specified by RFC 4648. '='-padding isn't used,
|
||||
* as it's unneeded and not required by the RFC. base64url is used instead of
|
||||
* base64 to avoid the '/' character, which isn't allowed in filenames.
|
||||
*
|
||||
* Return: length of the encoded string
|
||||
* Return: the length of the resulting base64url-encoded string in bytes.
|
||||
* This will be equal to FSCRYPT_BASE64URL_CHARS(srclen).
|
||||
*/
|
||||
static int base64_encode(const u8 *src, int len, char *dst)
|
||||
static int fscrypt_base64url_encode(const u8 *src, int srclen, char *dst)
|
||||
{
|
||||
int i, bits = 0, ac = 0;
|
||||
u32 ac = 0;
|
||||
int bits = 0;
|
||||
int i;
|
||||
char *cp = dst;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
ac += src[i] << bits;
|
||||
for (i = 0; i < srclen; i++) {
|
||||
ac = (ac << 8) | src[i];
|
||||
bits += 8;
|
||||
do {
|
||||
*cp++ = lookup_table[ac & 0x3f];
|
||||
ac >>= 6;
|
||||
bits -= 6;
|
||||
*cp++ = base64url_table[(ac >> bits) & 0x3f];
|
||||
} while (bits >= 6);
|
||||
}
|
||||
if (bits)
|
||||
*cp++ = lookup_table[ac & 0x3f];
|
||||
*cp++ = base64url_table[(ac << (6 - bits)) & 0x3f];
|
||||
return cp - dst;
|
||||
}
|
||||
|
||||
static int base64_decode(const char *src, int len, u8 *dst)
|
||||
/**
|
||||
* fscrypt_base64url_decode() - base64url-decode a string
|
||||
* @src: the string to decode. Doesn't need to be NUL-terminated.
|
||||
* @srclen: the length of @src in bytes
|
||||
* @dst: (output) the decoded binary data
|
||||
*
|
||||
* Decodes a string using base64url encoding, i.e. the "Base 64 Encoding with
|
||||
* URL and Filename Safe Alphabet" specified by RFC 4648. '='-padding isn't
|
||||
* accepted, nor are non-encoding characters such as whitespace.
|
||||
*
|
||||
* This implementation hasn't been optimized for performance.
|
||||
*
|
||||
* Return: the length of the resulting decoded binary data in bytes,
|
||||
* or -1 if the string isn't a valid base64url string.
|
||||
*/
|
||||
static int fscrypt_base64url_decode(const char *src, int srclen, u8 *dst)
|
||||
{
|
||||
int i, bits = 0, ac = 0;
|
||||
const char *p;
|
||||
u8 *cp = dst;
|
||||
u32 ac = 0;
|
||||
int bits = 0;
|
||||
int i;
|
||||
u8 *bp = dst;
|
||||
|
||||
for (i = 0; i < srclen; i++) {
|
||||
const char *p = strchr(base64url_table, src[i]);
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
p = strchr(lookup_table, src[i]);
|
||||
if (p == NULL || src[i] == 0)
|
||||
return -2;
|
||||
ac += (p - lookup_table) << bits;
|
||||
return -1;
|
||||
ac = (ac << 6) | (p - base64url_table);
|
||||
bits += 6;
|
||||
if (bits >= 8) {
|
||||
*cp++ = ac & 0xff;
|
||||
ac >>= 8;
|
||||
bits -= 8;
|
||||
*bp++ = (u8)(ac >> bits);
|
||||
}
|
||||
}
|
||||
if (ac)
|
||||
if (ac & ((1 << bits) - 1))
|
||||
return -1;
|
||||
return cp - dst;
|
||||
return bp - dst;
|
||||
}
|
||||
|
||||
bool fscrypt_fname_encrypted_size(const union fscrypt_policy *policy,
|
||||
@@ -263,10 +287,8 @@ bool fscrypt_fname_encrypted_size(const union fscrypt_policy *policy,
|
||||
int fscrypt_fname_alloc_buffer(u32 max_encrypted_len,
|
||||
struct fscrypt_str *crypto_str)
|
||||
{
|
||||
const u32 max_encoded_len = BASE64_CHARS(FSCRYPT_NOKEY_NAME_MAX);
|
||||
u32 max_presented_len;
|
||||
|
||||
max_presented_len = max(max_encoded_len, max_encrypted_len);
|
||||
u32 max_presented_len = max_t(u32, FSCRYPT_NOKEY_NAME_MAX_ENCODED,
|
||||
max_encrypted_len);
|
||||
|
||||
crypto_str->name = kmalloc(max_presented_len + 1, GFP_NOFS);
|
||||
if (!crypto_str->name)
|
||||
@@ -342,7 +364,7 @@ int fscrypt_fname_disk_to_usr(const struct inode *inode,
|
||||
offsetof(struct fscrypt_nokey_name, bytes));
|
||||
BUILD_BUG_ON(offsetofend(struct fscrypt_nokey_name, bytes) !=
|
||||
offsetof(struct fscrypt_nokey_name, sha256));
|
||||
BUILD_BUG_ON(BASE64_CHARS(FSCRYPT_NOKEY_NAME_MAX) > NAME_MAX);
|
||||
BUILD_BUG_ON(FSCRYPT_NOKEY_NAME_MAX_ENCODED > NAME_MAX);
|
||||
|
||||
nokey_name.dirhash[0] = hash;
|
||||
nokey_name.dirhash[1] = minor_hash;
|
||||
@@ -358,7 +380,8 @@ int fscrypt_fname_disk_to_usr(const struct inode *inode,
|
||||
nokey_name.sha256);
|
||||
size = FSCRYPT_NOKEY_NAME_MAX;
|
||||
}
|
||||
oname->len = base64_encode((const u8 *)&nokey_name, size, oname->name);
|
||||
oname->len = fscrypt_base64url_encode((const u8 *)&nokey_name, size,
|
||||
oname->name);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(fscrypt_fname_disk_to_usr);
|
||||
@@ -406,8 +429,7 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
|
||||
|
||||
if (fscrypt_has_encryption_key(dir)) {
|
||||
if (!fscrypt_fname_encrypted_size(&dir->i_crypt_info->ci_policy,
|
||||
iname->len,
|
||||
dir->i_sb->s_cop->max_namelen,
|
||||
iname->len, NAME_MAX,
|
||||
&fname->crypto_buf.len))
|
||||
return -ENAMETOOLONG;
|
||||
fname->crypto_buf.name = kmalloc(fname->crypto_buf.len,
|
||||
@@ -432,14 +454,15 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
|
||||
* user-supplied name
|
||||
*/
|
||||
|
||||
if (iname->len > BASE64_CHARS(FSCRYPT_NOKEY_NAME_MAX))
|
||||
if (iname->len > FSCRYPT_NOKEY_NAME_MAX_ENCODED)
|
||||
return -ENOENT;
|
||||
|
||||
fname->crypto_buf.name = kmalloc(FSCRYPT_NOKEY_NAME_MAX, GFP_KERNEL);
|
||||
if (fname->crypto_buf.name == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = base64_decode(iname->name, iname->len, fname->crypto_buf.name);
|
||||
ret = fscrypt_base64url_decode(iname->name, iname->len,
|
||||
fname->crypto_buf.name);
|
||||
if (ret < (int)offsetof(struct fscrypt_nokey_name, bytes[1]) ||
|
||||
(ret > offsetof(struct fscrypt_nokey_name, sha256) &&
|
||||
ret != FSCRYPT_NOKEY_NAME_MAX)) {
|
||||
|
||||
@@ -20,6 +20,11 @@
|
||||
|
||||
#define FSCRYPT_FILE_NONCE_SIZE 16
|
||||
|
||||
/*
|
||||
* Minimum size of an fscrypt master key. Note: a longer key will be required
|
||||
* if ciphers with a 256-bit security strength are used. This is just the
|
||||
* absolute minimum, which applies when only 128-bit encryption is used.
|
||||
*/
|
||||
#define FSCRYPT_MIN_KEY_SIZE 16
|
||||
|
||||
#define FSCRYPT_MAX_HW_WRAPPED_KEY_SIZE 128
|
||||
@@ -437,7 +442,11 @@ struct fscrypt_master_key_secret {
|
||||
*/
|
||||
struct fscrypt_hkdf hkdf;
|
||||
|
||||
/* Size of the raw key in bytes. Set even if ->raw isn't set. */
|
||||
/*
|
||||
* Size of the raw key in bytes. This remains set even if ->raw was
|
||||
* zeroized due to no longer being needed. I.e. we still remember the
|
||||
* size of the key even if we don't need to remember the key itself.
|
||||
*/
|
||||
u32 size;
|
||||
|
||||
/* True if the key in ->raw is a hardware-wrapped key. */
|
||||
|
||||
@@ -122,8 +122,9 @@ err_free_tfm:
|
||||
|
||||
/*
|
||||
* Prepare the crypto transform object or blk-crypto key in @prep_key, given the
|
||||
* raw key, encryption mode, and flag indicating which encryption implementation
|
||||
* (fs-layer or blk-crypto) will be used.
|
||||
* raw key, encryption mode (@ci->ci_mode), flag indicating which encryption
|
||||
* implementation (fs-layer or blk-crypto) will be used (@ci->ci_inlinecrypt),
|
||||
* and IV generation method (@ci->ci_policy.flags).
|
||||
*/
|
||||
int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key,
|
||||
const u8 *raw_key, unsigned int raw_key_size,
|
||||
|
||||
@@ -6,16 +6,22 @@ config EROFS_FS
|
||||
select FS_IOMAP
|
||||
select LIBCRC32C
|
||||
help
|
||||
EROFS (Enhanced Read-Only File System) is a lightweight
|
||||
read-only file system with modern designs (eg. page-sized
|
||||
blocks, inline xattrs/data, etc.) for scenarios which need
|
||||
high-performance read-only requirements, e.g. Android OS
|
||||
for mobile phones and LIVECDs.
|
||||
EROFS (Enhanced Read-Only File System) is a lightweight read-only
|
||||
file system with modern designs (e.g. no buffer heads, inline
|
||||
xattrs/data, chunk-based deduplication, multiple devices, etc.) for
|
||||
scenarios which need high-performance read-only solutions, e.g.
|
||||
smartphones with Android OS, LiveCDs and high-density hosts with
|
||||
numerous containers;
|
||||
|
||||
It also provides fixed-sized output compression support,
|
||||
which improves storage density, keeps relatively higher
|
||||
compression ratios, which is more useful to achieve high
|
||||
performance for embedded devices with limited memory.
|
||||
It also provides fixed-sized output compression support in order to
|
||||
improve storage density as well as keep relatively higher compression
|
||||
ratios and implements in-place decompression to reuse the file page
|
||||
for compressed data temporarily with proper strategies, which is
|
||||
quite useful to ensure guaranteed end-to-end runtime decompression
|
||||
performance under extremely memory pressure without extra cost.
|
||||
|
||||
See the documentation at <file:Documentation/filesystems/erofs.rst>
|
||||
for more details.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
@@ -76,3 +82,19 @@ config EROFS_FS_ZIP
|
||||
Enable fixed-sized output compression for EROFS.
|
||||
|
||||
If you don't want to enable compression feature, say N.
|
||||
|
||||
config EROFS_FS_ZIP_LZMA
|
||||
bool "EROFS LZMA compressed data support"
|
||||
depends on EROFS_FS_ZIP
|
||||
select XZ_DEC
|
||||
select XZ_DEC_MICROLZMA
|
||||
help
|
||||
Saying Y here includes support for reading EROFS file systems
|
||||
containing LZMA compressed data, specifically called microLZMA. it
|
||||
gives better compression ratios than the LZ4 algorithm, at the
|
||||
expense of more CPU overhead.
|
||||
|
||||
LZMA support is an experimental feature for now and so most file
|
||||
systems will be readable without selecting this option.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
@@ -4,3 +4,4 @@ obj-$(CONFIG_EROFS_FS) += erofs.o
|
||||
erofs-objs := super.o inode.o data.o namei.o dir.o utils.o pcpubuf.o
|
||||
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
|
||||
erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o
|
||||
erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
|
||||
|
||||
@@ -8,11 +8,6 @@
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
enum {
|
||||
Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX,
|
||||
Z_EROFS_COMPRESSION_RUNTIME_MAX
|
||||
};
|
||||
|
||||
struct z_erofs_decompress_req {
|
||||
struct super_block *sb;
|
||||
struct page **in, **out;
|
||||
@@ -25,6 +20,12 @@ struct z_erofs_decompress_req {
|
||||
bool inplace_io, partial_decoding;
|
||||
};
|
||||
|
||||
struct z_erofs_decompressor {
|
||||
int (*decompress)(struct z_erofs_decompress_req *rq,
|
||||
struct page **pagepool);
|
||||
char *name;
|
||||
};
|
||||
|
||||
/* some special page->private (unsigned long, see below) */
|
||||
#define Z_EROFS_SHORTLIVED_PAGE (-1UL << 2)
|
||||
#define Z_EROFS_PREALLOCATED_PAGE (-2UL << 2)
|
||||
@@ -63,7 +64,7 @@ static inline bool z_erofs_is_shortlived_page(struct page *page)
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool z_erofs_put_shortlivedpage(struct list_head *pagepool,
|
||||
static inline bool z_erofs_put_shortlivedpage(struct page **pagepool,
|
||||
struct page *page)
|
||||
{
|
||||
if (!z_erofs_is_shortlived_page(page))
|
||||
@@ -74,13 +75,22 @@ static inline bool z_erofs_put_shortlivedpage(struct list_head *pagepool,
|
||||
put_page(page);
|
||||
} else {
|
||||
/* follow the pcluster rule above. */
|
||||
set_page_private(page, 0);
|
||||
list_add(&page->lru, pagepool);
|
||||
erofs_pagepool_add(pagepool, page);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int z_erofs_decompress(struct z_erofs_decompress_req *rq,
|
||||
struct list_head *pagepool);
|
||||
#define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping)
|
||||
static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi,
|
||||
struct page *page)
|
||||
{
|
||||
return page->mapping == MNGD_MAPPING(sbi);
|
||||
}
|
||||
|
||||
int z_erofs_decompress(struct z_erofs_decompress_req *rq,
|
||||
struct page **pagepool);
|
||||
|
||||
/* prototypes for specific algorithms */
|
||||
int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
|
||||
struct page **pagepool);
|
||||
#endif
|
||||
|
||||
@@ -90,6 +90,7 @@ static int erofs_map_blocks(struct inode *inode,
|
||||
erofs_off_t pos;
|
||||
int err = 0;
|
||||
|
||||
map->m_deviceid = 0;
|
||||
if (map->m_la >= inode->i_size) {
|
||||
/* leave out-of-bound access unmapped */
|
||||
map->m_flags = 0;
|
||||
@@ -136,14 +137,8 @@ static int erofs_map_blocks(struct inode *inode,
|
||||
map->m_flags = 0;
|
||||
break;
|
||||
default:
|
||||
/* only one device is supported for now */
|
||||
if (idx->device_id) {
|
||||
erofs_err(sb, "invalid device id %u @ %llu for nid %llu",
|
||||
le16_to_cpu(idx->device_id),
|
||||
chunknr, vi->nid);
|
||||
err = -EFSCORRUPTED;
|
||||
goto out_unlock;
|
||||
}
|
||||
map->m_deviceid = le16_to_cpu(idx->device_id) &
|
||||
EROFS_SB(sb)->device_id_mask;
|
||||
map->m_pa = blknr_to_addr(le32_to_cpu(idx->blkaddr));
|
||||
map->m_flags = EROFS_MAP_MAPPED;
|
||||
break;
|
||||
@@ -156,11 +151,55 @@ out:
|
||||
return err;
|
||||
}
|
||||
|
||||
int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
|
||||
{
|
||||
struct erofs_dev_context *devs = EROFS_SB(sb)->devs;
|
||||
struct erofs_device_info *dif;
|
||||
int id;
|
||||
|
||||
/* primary device by default */
|
||||
map->m_bdev = sb->s_bdev;
|
||||
map->m_daxdev = EROFS_SB(sb)->dax_dev;
|
||||
|
||||
if (map->m_deviceid) {
|
||||
down_read(&devs->rwsem);
|
||||
dif = idr_find(&devs->tree, map->m_deviceid - 1);
|
||||
if (!dif) {
|
||||
up_read(&devs->rwsem);
|
||||
return -ENODEV;
|
||||
}
|
||||
map->m_bdev = dif->bdev;
|
||||
map->m_daxdev = dif->dax_dev;
|
||||
up_read(&devs->rwsem);
|
||||
} else if (devs->extra_devices) {
|
||||
down_read(&devs->rwsem);
|
||||
idr_for_each_entry(&devs->tree, dif, id) {
|
||||
erofs_off_t startoff, length;
|
||||
|
||||
if (!dif->mapped_blkaddr)
|
||||
continue;
|
||||
startoff = blknr_to_addr(dif->mapped_blkaddr);
|
||||
length = blknr_to_addr(dif->blocks);
|
||||
|
||||
if (map->m_pa >= startoff &&
|
||||
map->m_pa < startoff + length) {
|
||||
map->m_pa -= startoff;
|
||||
map->m_bdev = dif->bdev;
|
||||
map->m_daxdev = dif->dax_dev;
|
||||
break;
|
||||
}
|
||||
}
|
||||
up_read(&devs->rwsem);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
|
||||
unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
|
||||
{
|
||||
int ret;
|
||||
struct erofs_map_blocks map;
|
||||
struct erofs_map_dev mdev;
|
||||
|
||||
map.m_la = offset;
|
||||
map.m_llen = length;
|
||||
@@ -169,8 +208,16 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
iomap->bdev = inode->i_sb->s_bdev;
|
||||
iomap->dax_dev = EROFS_I_SB(inode)->dax_dev;
|
||||
mdev = (struct erofs_map_dev) {
|
||||
.m_deviceid = map.m_deviceid,
|
||||
.m_pa = map.m_pa,
|
||||
};
|
||||
ret = erofs_map_dev(inode->i_sb, &mdev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
iomap->bdev = mdev.m_bdev;
|
||||
iomap->dax_dev = mdev.m_daxdev;
|
||||
iomap->offset = map.m_la;
|
||||
iomap->length = map.m_llen;
|
||||
iomap->flags = 0;
|
||||
@@ -189,15 +236,15 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
|
||||
|
||||
iomap->type = IOMAP_INLINE;
|
||||
ipage = erofs_get_meta_page(inode->i_sb,
|
||||
erofs_blknr(map.m_pa));
|
||||
erofs_blknr(mdev.m_pa));
|
||||
if (IS_ERR(ipage))
|
||||
return PTR_ERR(ipage);
|
||||
iomap->inline_data = page_address(ipage) +
|
||||
erofs_blkoff(map.m_pa);
|
||||
erofs_blkoff(mdev.m_pa);
|
||||
iomap->private = ipage;
|
||||
} else {
|
||||
iomap->type = IOMAP_MAPPED;
|
||||
iomap->addr = map.m_pa;
|
||||
iomap->addr = mdev.m_pa;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -16,17 +16,6 @@
|
||||
#define LZ4_DECOMPRESS_INPLACE_MARGIN(srcsize) (((srcsize) >> 8) + 32)
|
||||
#endif
|
||||
|
||||
struct z_erofs_decompressor {
|
||||
/*
|
||||
* if destpages have sparsed pages, fill them with bounce pages.
|
||||
* it also check whether destpages indicate continuous physical memory.
|
||||
*/
|
||||
int (*prepare_destpages)(struct z_erofs_decompress_req *rq,
|
||||
struct list_head *pagepool);
|
||||
int (*decompress)(struct z_erofs_decompress_req *rq, u8 *out);
|
||||
char *name;
|
||||
};
|
||||
|
||||
int z_erofs_load_lz4_config(struct super_block *sb,
|
||||
struct erofs_super_block *dsb,
|
||||
struct z_erofs_lz4_cfgs *lz4, int size)
|
||||
@@ -63,8 +52,12 @@ int z_erofs_load_lz4_config(struct super_block *sb,
|
||||
return erofs_pcpubuf_growsize(sbi->lz4.max_pclusterblks);
|
||||
}
|
||||
|
||||
static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq,
|
||||
struct list_head *pagepool)
|
||||
/*
|
||||
* Fill all gaps with bounce pages if it's a sparse page list. Also check if
|
||||
* all physical pages are consecutive, which can be seen for moderate CR.
|
||||
*/
|
||||
static int z_erofs_lz4_prepare_dstpages(struct z_erofs_decompress_req *rq,
|
||||
struct page **pagepool)
|
||||
{
|
||||
const unsigned int nr =
|
||||
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
|
||||
@@ -119,7 +112,7 @@ static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq,
|
||||
return kaddr ? 1 : 0;
|
||||
}
|
||||
|
||||
static void *z_erofs_handle_inplace_io(struct z_erofs_decompress_req *rq,
|
||||
static void *z_erofs_lz4_handle_inplace_io(struct z_erofs_decompress_req *rq,
|
||||
void *inpage, unsigned int *inputmargin, int *maptype,
|
||||
bool support_0padding)
|
||||
{
|
||||
@@ -189,7 +182,8 @@ docopy:
|
||||
return src;
|
||||
}
|
||||
|
||||
static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out)
|
||||
static int z_erofs_lz4_decompress_mem(struct z_erofs_decompress_req *rq,
|
||||
u8 *out)
|
||||
{
|
||||
unsigned int inputmargin;
|
||||
u8 *headpage, *src;
|
||||
@@ -216,8 +210,8 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out)
|
||||
}
|
||||
|
||||
rq->inputsize -= inputmargin;
|
||||
src = z_erofs_handle_inplace_io(rq, headpage, &inputmargin, &maptype,
|
||||
support_0padding);
|
||||
src = z_erofs_lz4_handle_inplace_io(rq, headpage, &inputmargin,
|
||||
&maptype, support_0padding);
|
||||
if (IS_ERR(src))
|
||||
return PTR_ERR(src);
|
||||
|
||||
@@ -241,6 +235,8 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out)
|
||||
if (ret >= 0)
|
||||
memset(out + ret, 0, rq->outputsize - ret);
|
||||
ret = -EIO;
|
||||
} else {
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
if (maptype == 0) {
|
||||
@@ -256,86 +252,25 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct z_erofs_decompressor decompressors[] = {
|
||||
[Z_EROFS_COMPRESSION_SHIFTED] = {
|
||||
.name = "shifted"
|
||||
},
|
||||
[Z_EROFS_COMPRESSION_LZ4] = {
|
||||
.prepare_destpages = z_erofs_lz4_prepare_destpages,
|
||||
.decompress = z_erofs_lz4_decompress,
|
||||
.name = "lz4"
|
||||
},
|
||||
};
|
||||
|
||||
static void copy_from_pcpubuf(struct page **out, const char *dst,
|
||||
unsigned short pageofs_out,
|
||||
unsigned int outputsize)
|
||||
{
|
||||
const char *end = dst + outputsize;
|
||||
const unsigned int righthalf = PAGE_SIZE - pageofs_out;
|
||||
const char *cur = dst - pageofs_out;
|
||||
|
||||
while (cur < end) {
|
||||
struct page *const page = *out++;
|
||||
|
||||
if (page) {
|
||||
char *buf = kmap_atomic(page);
|
||||
|
||||
if (cur >= dst) {
|
||||
memcpy(buf, cur, min_t(uint, PAGE_SIZE,
|
||||
end - cur));
|
||||
} else {
|
||||
memcpy(buf + pageofs_out, cur + pageofs_out,
|
||||
min_t(uint, righthalf, end - cur));
|
||||
}
|
||||
kunmap_atomic(buf);
|
||||
}
|
||||
cur += PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static int z_erofs_decompress_generic(struct z_erofs_decompress_req *rq,
|
||||
struct list_head *pagepool)
|
||||
static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq,
|
||||
struct page **pagepool)
|
||||
{
|
||||
const unsigned int nrpages_out =
|
||||
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
|
||||
const struct z_erofs_decompressor *alg = decompressors + rq->alg;
|
||||
unsigned int dst_maptype;
|
||||
void *dst;
|
||||
int ret;
|
||||
|
||||
/* two optimized fast paths only for non bigpcluster cases yet */
|
||||
if (rq->inputsize <= PAGE_SIZE) {
|
||||
if (nrpages_out == 1 && !rq->inplace_io) {
|
||||
DBG_BUGON(!*rq->out);
|
||||
dst = kmap_atomic(*rq->out);
|
||||
dst_maptype = 0;
|
||||
goto dstmap_out;
|
||||
}
|
||||
|
||||
/*
|
||||
* For the case of small output size (especially much less
|
||||
* than PAGE_SIZE), memcpy the decompressed data rather than
|
||||
* compressed data is preferred.
|
||||
*/
|
||||
if (rq->outputsize <= PAGE_SIZE * 7 / 8) {
|
||||
dst = erofs_get_pcpubuf(1);
|
||||
if (IS_ERR(dst))
|
||||
return PTR_ERR(dst);
|
||||
|
||||
rq->inplace_io = false;
|
||||
ret = alg->decompress(rq, dst);
|
||||
if (!ret)
|
||||
copy_from_pcpubuf(rq->out, dst, rq->pageofs_out,
|
||||
rq->outputsize);
|
||||
|
||||
erofs_put_pcpubuf(dst);
|
||||
return ret;
|
||||
}
|
||||
/* one optimized fast path only for non bigpcluster cases yet */
|
||||
if (rq->inputsize <= PAGE_SIZE && nrpages_out == 1 && !rq->inplace_io) {
|
||||
DBG_BUGON(!*rq->out);
|
||||
dst = kmap_atomic(*rq->out);
|
||||
dst_maptype = 0;
|
||||
goto dstmap_out;
|
||||
}
|
||||
|
||||
/* general decoding path which can be used for all cases */
|
||||
ret = alg->prepare_destpages(rq, pagepool);
|
||||
ret = z_erofs_lz4_prepare_dstpages(rq, pagepool);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret) {
|
||||
@@ -350,7 +285,7 @@ static int z_erofs_decompress_generic(struct z_erofs_decompress_req *rq,
|
||||
dst_maptype = 2;
|
||||
|
||||
dstmap_out:
|
||||
ret = alg->decompress(rq, dst + rq->pageofs_out);
|
||||
ret = z_erofs_lz4_decompress_mem(rq, dst + rq->pageofs_out);
|
||||
|
||||
if (!dst_maptype)
|
||||
kunmap_atomic(dst);
|
||||
@@ -359,8 +294,8 @@ dstmap_out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int z_erofs_shifted_transform(const struct z_erofs_decompress_req *rq,
|
||||
struct list_head *pagepool)
|
||||
static int z_erofs_shifted_transform(struct z_erofs_decompress_req *rq,
|
||||
struct page **pagepool)
|
||||
{
|
||||
const unsigned int nrpages_out =
|
||||
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
|
||||
@@ -398,10 +333,25 @@ static int z_erofs_shifted_transform(const struct z_erofs_decompress_req *rq,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct z_erofs_decompressor decompressors[] = {
|
||||
[Z_EROFS_COMPRESSION_SHIFTED] = {
|
||||
.decompress = z_erofs_shifted_transform,
|
||||
.name = "shifted"
|
||||
},
|
||||
[Z_EROFS_COMPRESSION_LZ4] = {
|
||||
.decompress = z_erofs_lz4_decompress,
|
||||
.name = "lz4"
|
||||
},
|
||||
#ifdef CONFIG_EROFS_FS_ZIP_LZMA
|
||||
[Z_EROFS_COMPRESSION_LZMA] = {
|
||||
.decompress = z_erofs_lzma_decompress,
|
||||
.name = "lzma"
|
||||
},
|
||||
#endif
|
||||
};
|
||||
|
||||
int z_erofs_decompress(struct z_erofs_decompress_req *rq,
|
||||
struct list_head *pagepool)
|
||||
struct page **pagepool)
|
||||
{
|
||||
if (rq->alg == Z_EROFS_COMPRESSION_SHIFTED)
|
||||
return z_erofs_shifted_transform(rq, pagepool);
|
||||
return z_erofs_decompress_generic(rq, pagepool);
|
||||
return decompressors[rq->alg].decompress(rq, pagepool);
|
||||
}
|
||||
|
||||
290
fs/erofs/decompressor_lzma.c
Normal file
290
fs/erofs/decompressor_lzma.c
Normal file
@@ -0,0 +1,290 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
#include <linux/xz.h>
|
||||
#include <linux/module.h>
|
||||
#include "compress.h"
|
||||
|
||||
struct z_erofs_lzma {
|
||||
struct z_erofs_lzma *next;
|
||||
struct xz_dec_microlzma *state;
|
||||
struct xz_buf buf;
|
||||
u8 bounce[PAGE_SIZE];
|
||||
};
|
||||
|
||||
/* considering the LZMA performance, no need to use a lockless list for now */
|
||||
static DEFINE_SPINLOCK(z_erofs_lzma_lock);
|
||||
static unsigned int z_erofs_lzma_max_dictsize;
|
||||
static unsigned int z_erofs_lzma_nstrms, z_erofs_lzma_avail_strms;
|
||||
static struct z_erofs_lzma *z_erofs_lzma_head;
|
||||
static DECLARE_WAIT_QUEUE_HEAD(z_erofs_lzma_wq);
|
||||
|
||||
module_param_named(lzma_streams, z_erofs_lzma_nstrms, uint, 0444);
|
||||
|
||||
void z_erofs_lzma_exit(void)
|
||||
{
|
||||
/* there should be no running fs instance */
|
||||
while (z_erofs_lzma_avail_strms) {
|
||||
struct z_erofs_lzma *strm;
|
||||
|
||||
spin_lock(&z_erofs_lzma_lock);
|
||||
strm = z_erofs_lzma_head;
|
||||
if (!strm) {
|
||||
spin_unlock(&z_erofs_lzma_lock);
|
||||
DBG_BUGON(1);
|
||||
return;
|
||||
}
|
||||
z_erofs_lzma_head = NULL;
|
||||
spin_unlock(&z_erofs_lzma_lock);
|
||||
|
||||
while (strm) {
|
||||
struct z_erofs_lzma *n = strm->next;
|
||||
|
||||
if (strm->state)
|
||||
xz_dec_microlzma_end(strm->state);
|
||||
kfree(strm);
|
||||
--z_erofs_lzma_avail_strms;
|
||||
strm = n;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int z_erofs_lzma_init(void)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
/* by default, use # of possible CPUs instead */
|
||||
if (!z_erofs_lzma_nstrms)
|
||||
z_erofs_lzma_nstrms = num_possible_cpus();
|
||||
|
||||
for (i = 0; i < z_erofs_lzma_nstrms; ++i) {
|
||||
struct z_erofs_lzma *strm = kzalloc(sizeof(*strm), GFP_KERNEL);
|
||||
|
||||
if (!strm) {
|
||||
z_erofs_lzma_exit();
|
||||
return -ENOMEM;
|
||||
}
|
||||
spin_lock(&z_erofs_lzma_lock);
|
||||
strm->next = z_erofs_lzma_head;
|
||||
z_erofs_lzma_head = strm;
|
||||
spin_unlock(&z_erofs_lzma_lock);
|
||||
++z_erofs_lzma_avail_strms;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int z_erofs_load_lzma_config(struct super_block *sb,
|
||||
struct erofs_super_block *dsb,
|
||||
struct z_erofs_lzma_cfgs *lzma, int size)
|
||||
{
|
||||
static DEFINE_MUTEX(lzma_resize_mutex);
|
||||
unsigned int dict_size, i;
|
||||
struct z_erofs_lzma *strm, *head = NULL;
|
||||
int err;
|
||||
|
||||
if (!lzma || size < sizeof(struct z_erofs_lzma_cfgs)) {
|
||||
erofs_err(sb, "invalid lzma cfgs, size=%u", size);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (lzma->format) {
|
||||
erofs_err(sb, "unidentified lzma format %x, please check kernel version",
|
||||
le16_to_cpu(lzma->format));
|
||||
return -EINVAL;
|
||||
}
|
||||
dict_size = le32_to_cpu(lzma->dict_size);
|
||||
if (dict_size > Z_EROFS_LZMA_MAX_DICT_SIZE || dict_size < 4096) {
|
||||
erofs_err(sb, "unsupported lzma dictionary size %u",
|
||||
dict_size);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
erofs_info(sb, "EXPERIMENTAL MicroLZMA in use. Use at your own risk!");
|
||||
|
||||
/* in case 2 z_erofs_load_lzma_config() race to avoid deadlock */
|
||||
mutex_lock(&lzma_resize_mutex);
|
||||
|
||||
if (z_erofs_lzma_max_dictsize >= dict_size) {
|
||||
mutex_unlock(&lzma_resize_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* 1. collect/isolate all streams for the following check */
|
||||
for (i = 0; i < z_erofs_lzma_avail_strms; ++i) {
|
||||
struct z_erofs_lzma *last;
|
||||
|
||||
again:
|
||||
spin_lock(&z_erofs_lzma_lock);
|
||||
strm = z_erofs_lzma_head;
|
||||
if (!strm) {
|
||||
spin_unlock(&z_erofs_lzma_lock);
|
||||
wait_event(z_erofs_lzma_wq,
|
||||
READ_ONCE(z_erofs_lzma_head));
|
||||
goto again;
|
||||
}
|
||||
z_erofs_lzma_head = NULL;
|
||||
spin_unlock(&z_erofs_lzma_lock);
|
||||
|
||||
for (last = strm; last->next; last = last->next)
|
||||
++i;
|
||||
last->next = head;
|
||||
head = strm;
|
||||
}
|
||||
|
||||
err = 0;
|
||||
/* 2. walk each isolated stream and grow max dict_size if needed */
|
||||
for (strm = head; strm; strm = strm->next) {
|
||||
if (strm->state)
|
||||
xz_dec_microlzma_end(strm->state);
|
||||
strm->state = xz_dec_microlzma_alloc(XZ_PREALLOC, dict_size);
|
||||
if (!strm->state)
|
||||
err = -ENOMEM;
|
||||
}
|
||||
|
||||
/* 3. push back all to the global list and update max dict_size */
|
||||
spin_lock(&z_erofs_lzma_lock);
|
||||
DBG_BUGON(z_erofs_lzma_head);
|
||||
z_erofs_lzma_head = head;
|
||||
spin_unlock(&z_erofs_lzma_lock);
|
||||
|
||||
z_erofs_lzma_max_dictsize = dict_size;
|
||||
mutex_unlock(&lzma_resize_mutex);
|
||||
return err;
|
||||
}
|
||||
|
||||
int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
|
||||
struct page **pagepool)
|
||||
{
|
||||
const unsigned int nrpages_out =
|
||||
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
|
||||
const unsigned int nrpages_in =
|
||||
PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT;
|
||||
unsigned int inputmargin, inlen, outlen, pageofs;
|
||||
struct z_erofs_lzma *strm;
|
||||
u8 *kin;
|
||||
bool bounced = false;
|
||||
int no, ni, j, err = 0;
|
||||
|
||||
/* 1. get the exact LZMA compressed size */
|
||||
kin = kmap(*rq->in);
|
||||
inputmargin = 0;
|
||||
while (!kin[inputmargin & ~PAGE_MASK])
|
||||
if (!(++inputmargin & ~PAGE_MASK))
|
||||
break;
|
||||
|
||||
if (inputmargin >= PAGE_SIZE) {
|
||||
kunmap(*rq->in);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
rq->inputsize -= inputmargin;
|
||||
|
||||
/* 2. get an available lzma context */
|
||||
again:
|
||||
spin_lock(&z_erofs_lzma_lock);
|
||||
strm = z_erofs_lzma_head;
|
||||
if (!strm) {
|
||||
spin_unlock(&z_erofs_lzma_lock);
|
||||
wait_event(z_erofs_lzma_wq, READ_ONCE(z_erofs_lzma_head));
|
||||
goto again;
|
||||
}
|
||||
z_erofs_lzma_head = strm->next;
|
||||
spin_unlock(&z_erofs_lzma_lock);
|
||||
|
||||
/* 3. multi-call decompress */
|
||||
inlen = rq->inputsize;
|
||||
outlen = rq->outputsize;
|
||||
xz_dec_microlzma_reset(strm->state, inlen, outlen,
|
||||
!rq->partial_decoding);
|
||||
pageofs = rq->pageofs_out;
|
||||
strm->buf.in = kin + inputmargin;
|
||||
strm->buf.in_pos = 0;
|
||||
strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE - inputmargin);
|
||||
inlen -= strm->buf.in_size;
|
||||
strm->buf.out = NULL;
|
||||
strm->buf.out_pos = 0;
|
||||
strm->buf.out_size = 0;
|
||||
|
||||
for (ni = 0, no = -1;;) {
|
||||
enum xz_ret xz_err;
|
||||
|
||||
if (strm->buf.out_pos == strm->buf.out_size) {
|
||||
if (strm->buf.out) {
|
||||
kunmap(rq->out[no]);
|
||||
strm->buf.out = NULL;
|
||||
}
|
||||
|
||||
if (++no >= nrpages_out || !outlen) {
|
||||
erofs_err(rq->sb, "decompressed buf out of bound");
|
||||
err = -EFSCORRUPTED;
|
||||
break;
|
||||
}
|
||||
strm->buf.out_pos = 0;
|
||||
strm->buf.out_size = min_t(u32, outlen,
|
||||
PAGE_SIZE - pageofs);
|
||||
outlen -= strm->buf.out_size;
|
||||
if (rq->out[no])
|
||||
strm->buf.out = kmap(rq->out[no]) + pageofs;
|
||||
pageofs = 0;
|
||||
} else if (strm->buf.in_pos == strm->buf.in_size) {
|
||||
kunmap(rq->in[ni]);
|
||||
|
||||
if (++ni >= nrpages_in || !inlen) {
|
||||
erofs_err(rq->sb, "compressed buf out of bound");
|
||||
err = -EFSCORRUPTED;
|
||||
break;
|
||||
}
|
||||
strm->buf.in_pos = 0;
|
||||
strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE);
|
||||
inlen -= strm->buf.in_size;
|
||||
kin = kmap(rq->in[ni]);
|
||||
strm->buf.in = kin;
|
||||
bounced = false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle overlapping: Use bounced buffer if the compressed
|
||||
* data is under processing; Otherwise, Use short-lived pages
|
||||
* from the on-stack pagepool where pages share with the same
|
||||
* request.
|
||||
*/
|
||||
if (!bounced && rq->out[no] == rq->in[ni]) {
|
||||
memcpy(strm->bounce, strm->buf.in, strm->buf.in_size);
|
||||
strm->buf.in = strm->bounce;
|
||||
bounced = true;
|
||||
}
|
||||
for (j = ni + 1; j < nrpages_in; ++j) {
|
||||
struct page *tmppage;
|
||||
|
||||
if (rq->out[no] != rq->in[j])
|
||||
continue;
|
||||
|
||||
DBG_BUGON(erofs_page_is_managed(EROFS_SB(rq->sb),
|
||||
rq->in[j]));
|
||||
tmppage = erofs_allocpage(pagepool,
|
||||
GFP_KERNEL | __GFP_NOFAIL);
|
||||
set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE);
|
||||
copy_highpage(tmppage, rq->in[j]);
|
||||
rq->in[j] = tmppage;
|
||||
}
|
||||
xz_err = xz_dec_microlzma_run(strm->state, &strm->buf);
|
||||
DBG_BUGON(strm->buf.out_pos > strm->buf.out_size);
|
||||
DBG_BUGON(strm->buf.in_pos > strm->buf.in_size);
|
||||
|
||||
if (xz_err != XZ_OK) {
|
||||
if (xz_err == XZ_STREAM_END && !outlen)
|
||||
break;
|
||||
erofs_err(rq->sb, "failed to decompress %d in[%u] out[%u]",
|
||||
xz_err, rq->inputsize, rq->outputsize);
|
||||
err = -EFSCORRUPTED;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (no < nrpages_out && strm->buf.out)
|
||||
kunmap(rq->in[no]);
|
||||
if (ni < nrpages_in)
|
||||
kunmap(rq->in[ni]);
|
||||
/* 4. push back LZMA stream context to the global list */
|
||||
spin_lock(&z_erofs_lzma_lock);
|
||||
strm->next = z_erofs_lzma_head;
|
||||
z_erofs_lzma_head = strm;
|
||||
spin_unlock(&z_erofs_lzma_lock);
|
||||
wake_up(&z_erofs_lzma_wq);
|
||||
return err;
|
||||
}
|
||||
@@ -21,14 +21,29 @@
|
||||
#define EROFS_FEATURE_INCOMPAT_COMPR_CFGS 0x00000002
|
||||
#define EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER 0x00000002
|
||||
#define EROFS_FEATURE_INCOMPAT_CHUNKED_FILE 0x00000004
|
||||
#define EROFS_FEATURE_INCOMPAT_DEVICE_TABLE 0x00000008
|
||||
#define EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 0x00000008
|
||||
#define EROFS_ALL_FEATURE_INCOMPAT \
|
||||
(EROFS_FEATURE_INCOMPAT_LZ4_0PADDING | \
|
||||
EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \
|
||||
EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER | \
|
||||
EROFS_FEATURE_INCOMPAT_CHUNKED_FILE)
|
||||
EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \
|
||||
EROFS_FEATURE_INCOMPAT_DEVICE_TABLE | \
|
||||
EROFS_FEATURE_INCOMPAT_COMPR_HEAD2)
|
||||
|
||||
#define EROFS_SB_EXTSLOT_SIZE 16
|
||||
|
||||
struct erofs_deviceslot {
|
||||
union {
|
||||
u8 uuid[16]; /* used for device manager later */
|
||||
u8 userdata[64]; /* digest(sha256), etc. */
|
||||
} u;
|
||||
__le32 blocks; /* total fs blocks of this device */
|
||||
__le32 mapped_blkaddr; /* map starting at mapped_blkaddr */
|
||||
u8 reserved[56];
|
||||
};
|
||||
#define EROFS_DEVT_SLOT_SIZE sizeof(struct erofs_deviceslot)
|
||||
|
||||
/* erofs on-disk super block (currently 128 bytes) */
|
||||
struct erofs_super_block {
|
||||
__le32 magic; /* file system magic number */
|
||||
@@ -54,7 +69,9 @@ struct erofs_super_block {
|
||||
/* customized sliding window size instead of 64k by default */
|
||||
__le16 lz4_max_distance;
|
||||
} __packed u1;
|
||||
__u8 reserved2[42];
|
||||
__le16 extra_devices; /* # of devices besides the primary device */
|
||||
__le16 devt_slotoff; /* startoff = devt_slotoff * devt_slotsize */
|
||||
__u8 reserved2[38];
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -238,7 +255,7 @@ static inline unsigned int erofs_xattr_entry_size(struct erofs_xattr_entry *e)
|
||||
/* 8-byte inode chunk indexes */
|
||||
struct erofs_inode_chunk_index {
|
||||
__le16 advise; /* always 0, don't care for now */
|
||||
__le16 device_id; /* back-end storage id, always 0 for now */
|
||||
__le16 device_id; /* back-end storage id (with bits masked) */
|
||||
__le32 blkaddr; /* start block address of this inode chunk */
|
||||
};
|
||||
|
||||
@@ -247,10 +264,11 @@ struct erofs_inode_chunk_index {
|
||||
|
||||
/* available compression algorithm types (for h_algorithmtype) */
|
||||
enum {
|
||||
Z_EROFS_COMPRESSION_LZ4 = 0,
|
||||
Z_EROFS_COMPRESSION_LZ4 = 0,
|
||||
Z_EROFS_COMPRESSION_LZMA = 1,
|
||||
Z_EROFS_COMPRESSION_MAX
|
||||
};
|
||||
#define Z_EROFS_ALL_COMPR_ALGS (1 << (Z_EROFS_COMPRESSION_MAX - 1))
|
||||
#define Z_EROFS_ALL_COMPR_ALGS ((1 << Z_EROFS_COMPRESSION_MAX) - 1)
|
||||
|
||||
/* 14 bytes (+ length field = 16 bytes) */
|
||||
struct z_erofs_lz4_cfgs {
|
||||
@@ -259,6 +277,15 @@ struct z_erofs_lz4_cfgs {
|
||||
u8 reserved[10];
|
||||
} __packed;
|
||||
|
||||
/* 14 bytes (+ length field = 16 bytes) */
|
||||
struct z_erofs_lzma_cfgs {
|
||||
__le32 dict_size;
|
||||
__le16 format;
|
||||
u8 reserved[8];
|
||||
} __packed;
|
||||
|
||||
#define Z_EROFS_LZMA_MAX_DICT_SIZE (8 * Z_EROFS_PCLUSTER_MAX_SIZE)
|
||||
|
||||
/*
|
||||
* bit 0 : COMPACTED_2B indexes (0 - off; 1 - on)
|
||||
* e.g. for 4k logical cluster size, 4B if compacted 2B is off;
|
||||
@@ -288,35 +315,34 @@ struct z_erofs_map_header {
|
||||
#define Z_EROFS_VLE_LEGACY_HEADER_PADDING 8
|
||||
|
||||
/*
|
||||
* Fixed-sized output compression ondisk Logical Extent cluster type:
|
||||
* 0 - literal (uncompressed) cluster
|
||||
* 1 - compressed cluster (for the head logical cluster)
|
||||
* 2 - compressed cluster (for the other logical clusters)
|
||||
* Fixed-sized output compression on-disk logical cluster type:
|
||||
* 0 - literal (uncompressed) lcluster
|
||||
* 1,3 - compressed lcluster (for HEAD lclusters)
|
||||
* 2 - compressed lcluster (for NONHEAD lclusters)
|
||||
*
|
||||
* In detail,
|
||||
* 0 - literal (uncompressed) cluster,
|
||||
* 0 - literal (uncompressed) lcluster,
|
||||
* di_advise = 0
|
||||
* di_clusterofs = the literal data offset of the cluster
|
||||
* di_blkaddr = the blkaddr of the literal cluster
|
||||
* di_clusterofs = the literal data offset of the lcluster
|
||||
* di_blkaddr = the blkaddr of the literal pcluster
|
||||
*
|
||||
* 1 - compressed cluster (for the head logical cluster)
|
||||
* di_advise = 1
|
||||
* di_clusterofs = the decompressed data offset of the cluster
|
||||
* di_blkaddr = the blkaddr of the compressed cluster
|
||||
* 1,3 - compressed lcluster (for HEAD lclusters)
|
||||
* di_advise = 1 or 3
|
||||
* di_clusterofs = the decompressed data offset of the lcluster
|
||||
* di_blkaddr = the blkaddr of the compressed pcluster
|
||||
*
|
||||
* 2 - compressed cluster (for the other logical clusters)
|
||||
* 2 - compressed lcluster (for NONHEAD lclusters)
|
||||
* di_advise = 2
|
||||
* di_clusterofs =
|
||||
* the decompressed data offset in its own head cluster
|
||||
* di_u.delta[0] = distance to its corresponding head cluster
|
||||
* di_u.delta[1] = distance to its corresponding tail cluster
|
||||
* (di_advise could be 0, 1 or 2)
|
||||
* the decompressed data offset in its own HEAD lcluster
|
||||
* di_u.delta[0] = distance to this HEAD lcluster
|
||||
* di_u.delta[1] = distance to the next HEAD lcluster
|
||||
*/
|
||||
enum {
|
||||
Z_EROFS_VLE_CLUSTER_TYPE_PLAIN = 0,
|
||||
Z_EROFS_VLE_CLUSTER_TYPE_HEAD = 1,
|
||||
Z_EROFS_VLE_CLUSTER_TYPE_HEAD1 = 1,
|
||||
Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD = 2,
|
||||
Z_EROFS_VLE_CLUSTER_TYPE_RESERVED = 3,
|
||||
Z_EROFS_VLE_CLUSTER_TYPE_HEAD2 = 3,
|
||||
Z_EROFS_VLE_CLUSTER_TYPE_MAX
|
||||
};
|
||||
|
||||
@@ -384,6 +410,7 @@ static inline void erofs_check_ondisk_layout_definitions(void)
|
||||
/* keep in sync between 2 index structures for better extendibility */
|
||||
BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) !=
|
||||
sizeof(struct z_erofs_vle_decompressed_index));
|
||||
BUILD_BUG_ON(sizeof(struct erofs_deviceslot) != 128);
|
||||
|
||||
BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) <
|
||||
Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1);
|
||||
|
||||
@@ -192,7 +192,7 @@ static struct page *erofs_read_inode(struct inode *inode,
|
||||
inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec;
|
||||
|
||||
inode->i_flags &= ~S_DAX;
|
||||
if (test_opt(&sbi->ctx, DAX_ALWAYS) && S_ISREG(inode->i_mode) &&
|
||||
if (test_opt(&sbi->opt, DAX_ALWAYS) && S_ISREG(inode->i_mode) &&
|
||||
vi->datalayout == EROFS_INODE_FLAT_PLAIN)
|
||||
inode->i_flags |= S_DAX;
|
||||
if (!nblks)
|
||||
|
||||
@@ -47,7 +47,16 @@ typedef u64 erofs_off_t;
|
||||
/* data type for filesystem-wide blocks number */
|
||||
typedef u32 erofs_blk_t;
|
||||
|
||||
struct erofs_fs_context {
|
||||
struct erofs_device_info {
|
||||
char *path;
|
||||
struct block_device *bdev;
|
||||
struct dax_device *dax_dev;
|
||||
|
||||
u32 blocks;
|
||||
u32 mapped_blkaddr;
|
||||
};
|
||||
|
||||
struct erofs_mount_opts {
|
||||
#ifdef CONFIG_EROFS_FS_ZIP
|
||||
/* current strategy of how to use managed cache */
|
||||
unsigned char cache_strategy;
|
||||
@@ -60,6 +69,18 @@ struct erofs_fs_context {
|
||||
unsigned int mount_opt;
|
||||
};
|
||||
|
||||
struct erofs_dev_context {
|
||||
struct idr tree;
|
||||
struct rw_semaphore rwsem;
|
||||
|
||||
unsigned int extra_devices;
|
||||
};
|
||||
|
||||
struct erofs_fs_context {
|
||||
struct erofs_mount_opts opt;
|
||||
struct erofs_dev_context *devs;
|
||||
};
|
||||
|
||||
/* all filesystem-wide lz4 configurations */
|
||||
struct erofs_sb_lz4_info {
|
||||
/* # of pages needed for EROFS lz4 rolling decompression */
|
||||
@@ -69,6 +90,7 @@ struct erofs_sb_lz4_info {
|
||||
};
|
||||
|
||||
struct erofs_sb_info {
|
||||
struct erofs_mount_opts opt; /* options */
|
||||
#ifdef CONFIG_EROFS_FS_ZIP
|
||||
/* list for all registered superblocks, mainly for shrinker */
|
||||
struct list_head list;
|
||||
@@ -85,12 +107,16 @@ struct erofs_sb_info {
|
||||
|
||||
struct erofs_sb_lz4_info lz4;
|
||||
#endif /* CONFIG_EROFS_FS_ZIP */
|
||||
struct erofs_dev_context *devs;
|
||||
struct dax_device *dax_dev;
|
||||
u32 blocks;
|
||||
u64 total_blocks;
|
||||
u32 primarydevice_blocks;
|
||||
|
||||
u32 meta_blkaddr;
|
||||
#ifdef CONFIG_EROFS_FS_XATTR
|
||||
u32 xattr_blkaddr;
|
||||
#endif
|
||||
u16 device_id_mask; /* valid bits of device id to be used */
|
||||
|
||||
/* inode slot unit size in bit shift */
|
||||
unsigned char islotbits;
|
||||
@@ -108,8 +134,6 @@ struct erofs_sb_info {
|
||||
u8 volume_name[16]; /* volume name */
|
||||
u32 feature_compat;
|
||||
u32 feature_incompat;
|
||||
|
||||
struct erofs_fs_context ctx; /* options */
|
||||
};
|
||||
|
||||
#define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info)
|
||||
@@ -121,9 +145,9 @@ struct erofs_sb_info {
|
||||
#define EROFS_MOUNT_DAX_ALWAYS 0x00000040
|
||||
#define EROFS_MOUNT_DAX_NEVER 0x00000080
|
||||
|
||||
#define clear_opt(ctx, option) ((ctx)->mount_opt &= ~EROFS_MOUNT_##option)
|
||||
#define set_opt(ctx, option) ((ctx)->mount_opt |= EROFS_MOUNT_##option)
|
||||
#define test_opt(ctx, option) ((ctx)->mount_opt & EROFS_MOUNT_##option)
|
||||
#define clear_opt(opt, option) ((opt)->mount_opt &= ~EROFS_MOUNT_##option)
|
||||
#define set_opt(opt, option) ((opt)->mount_opt |= EROFS_MOUNT_##option)
|
||||
#define test_opt(opt, option) ((opt)->mount_opt & EROFS_MOUNT_##option)
|
||||
|
||||
enum {
|
||||
EROFS_ZIP_CACHE_DISABLED,
|
||||
@@ -237,6 +261,7 @@ static inline bool erofs_sb_has_##name(struct erofs_sb_info *sbi) \
|
||||
EROFS_FEATURE_FUNCS(lz4_0padding, incompat, INCOMPAT_LZ4_0PADDING)
|
||||
EROFS_FEATURE_FUNCS(compr_cfgs, incompat, INCOMPAT_COMPR_CFGS)
|
||||
EROFS_FEATURE_FUNCS(big_pcluster, incompat, INCOMPAT_BIG_PCLUSTER)
|
||||
EROFS_FEATURE_FUNCS(device_table, incompat, INCOMPAT_DEVICE_TABLE)
|
||||
EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)
|
||||
|
||||
/* atomic flag definitions */
|
||||
@@ -338,7 +363,7 @@ extern const struct address_space_operations z_erofs_aops;
|
||||
* of the corresponding uncompressed data in the file.
|
||||
*/
|
||||
enum {
|
||||
BH_Zipped = BH_PrivateStart,
|
||||
BH_Encoded = BH_PrivateStart,
|
||||
BH_FullMapped,
|
||||
};
|
||||
|
||||
@@ -346,8 +371,8 @@ enum {
|
||||
#define EROFS_MAP_MAPPED (1 << BH_Mapped)
|
||||
/* Located in metadata (could be copied from bd_inode) */
|
||||
#define EROFS_MAP_META (1 << BH_Meta)
|
||||
/* The extent has been compressed */
|
||||
#define EROFS_MAP_ZIPPED (1 << BH_Zipped)
|
||||
/* The extent is encoded */
|
||||
#define EROFS_MAP_ENCODED (1 << BH_Encoded)
|
||||
/* The length of extent is full */
|
||||
#define EROFS_MAP_FULL_MAPPED (1 << BH_FullMapped)
|
||||
|
||||
@@ -355,6 +380,8 @@ struct erofs_map_blocks {
|
||||
erofs_off_t m_pa, m_la;
|
||||
u64 m_plen, m_llen;
|
||||
|
||||
unsigned short m_deviceid;
|
||||
char m_algorithmformat;
|
||||
unsigned int m_flags;
|
||||
|
||||
struct page *mpage;
|
||||
@@ -367,6 +394,13 @@ struct erofs_map_blocks {
|
||||
* approach instead if possible since it's more metadata lightweight.)
|
||||
*/
|
||||
#define EROFS_GET_BLOCKS_FIEMAP 0x0002
|
||||
/* Used to map the whole extent if non-negligible data is requested for LZMA */
|
||||
#define EROFS_GET_BLOCKS_READMORE 0x0004
|
||||
|
||||
enum {
|
||||
Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX,
|
||||
Z_EROFS_COMPRESSION_RUNTIME_MAX
|
||||
};
|
||||
|
||||
/* zmap.c */
|
||||
extern const struct iomap_ops z_erofs_iomap_report_ops;
|
||||
@@ -386,9 +420,18 @@ static inline int z_erofs_map_blocks_iter(struct inode *inode,
|
||||
}
|
||||
#endif /* !CONFIG_EROFS_FS_ZIP */
|
||||
|
||||
struct erofs_map_dev {
|
||||
struct block_device *m_bdev;
|
||||
struct dax_device *m_daxdev;
|
||||
|
||||
erofs_off_t m_pa;
|
||||
unsigned int m_deviceid;
|
||||
};
|
||||
|
||||
/* data.c */
|
||||
extern const struct file_operations erofs_file_fops;
|
||||
struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr);
|
||||
int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev);
|
||||
int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
u64 start, u64 len);
|
||||
|
||||
@@ -442,7 +485,14 @@ void erofs_pcpubuf_init(void);
|
||||
void erofs_pcpubuf_exit(void);
|
||||
|
||||
/* utils.c / zdata.c */
|
||||
struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp);
|
||||
struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp);
|
||||
static inline void erofs_pagepool_add(struct page **pagepool,
|
||||
struct page *page)
|
||||
{
|
||||
set_page_private(page, (unsigned long)*pagepool);
|
||||
*pagepool = page;
|
||||
}
|
||||
void erofs_release_pages(struct page **pagepool);
|
||||
|
||||
#ifdef CONFIG_EROFS_FS_ZIP
|
||||
int erofs_workgroup_put(struct erofs_workgroup *grp);
|
||||
@@ -482,6 +532,26 @@ static inline int z_erofs_load_lz4_config(struct super_block *sb,
|
||||
}
|
||||
#endif /* !CONFIG_EROFS_FS_ZIP */
|
||||
|
||||
#ifdef CONFIG_EROFS_FS_ZIP_LZMA
|
||||
int z_erofs_lzma_init(void);
|
||||
void z_erofs_lzma_exit(void);
|
||||
int z_erofs_load_lzma_config(struct super_block *sb,
|
||||
struct erofs_super_block *dsb,
|
||||
struct z_erofs_lzma_cfgs *lzma, int size);
|
||||
#else
|
||||
static inline int z_erofs_lzma_init(void) { return 0; }
|
||||
static inline int z_erofs_lzma_exit(void) { return 0; }
|
||||
static inline int z_erofs_load_lzma_config(struct super_block *sb,
|
||||
struct erofs_super_block *dsb,
|
||||
struct z_erofs_lzma_cfgs *lzma, int size) {
|
||||
if (lzma) {
|
||||
erofs_err(sb, "lzma algorithm isn't enabled");
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif /* !CONFIG_EROFS_FS_ZIP */
|
||||
|
||||
#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */
|
||||
|
||||
#endif /* __EROFS_INTERNAL_H */
|
||||
|
||||
@@ -49,7 +49,7 @@ int erofs_pcpubuf_growsize(unsigned int nrpages)
|
||||
{
|
||||
static DEFINE_MUTEX(pcb_resize_mutex);
|
||||
static unsigned int pcb_nrpages;
|
||||
LIST_HEAD(pagepool);
|
||||
struct page *pagepool = NULL;
|
||||
int delta, cpu, ret, i;
|
||||
|
||||
mutex_lock(&pcb_resize_mutex);
|
||||
@@ -102,13 +102,13 @@ int erofs_pcpubuf_growsize(unsigned int nrpages)
|
||||
vunmap(old_ptr);
|
||||
free_pagearray:
|
||||
while (i)
|
||||
list_add(&oldpages[--i]->lru, &pagepool);
|
||||
erofs_pagepool_add(&pagepool, oldpages[--i]);
|
||||
kfree(oldpages);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
pcb_nrpages = nrpages;
|
||||
put_pages_list(&pagepool);
|
||||
erofs_release_pages(&pagepool);
|
||||
out:
|
||||
mutex_unlock(&pcb_resize_mutex);
|
||||
return ret;
|
||||
|
||||
231
fs/erofs/super.c
231
fs/erofs/super.c
@@ -225,6 +225,9 @@ static int erofs_load_compr_cfgs(struct super_block *sb,
|
||||
case Z_EROFS_COMPRESSION_LZ4:
|
||||
ret = z_erofs_load_lz4_config(sb, dsb, data, size);
|
||||
break;
|
||||
case Z_EROFS_COMPRESSION_LZMA:
|
||||
ret = z_erofs_load_lzma_config(sb, dsb, data, size);
|
||||
break;
|
||||
default:
|
||||
DBG_BUGON(1);
|
||||
ret = -EFAULT;
|
||||
@@ -252,6 +255,79 @@ static int erofs_load_compr_cfgs(struct super_block *sb,
|
||||
}
|
||||
#endif
|
||||
|
||||
static int erofs_init_devices(struct super_block *sb,
|
||||
struct erofs_super_block *dsb)
|
||||
{
|
||||
struct erofs_sb_info *sbi = EROFS_SB(sb);
|
||||
unsigned int ondisk_extradevs;
|
||||
erofs_off_t pos;
|
||||
struct page *page = NULL;
|
||||
struct erofs_device_info *dif;
|
||||
struct erofs_deviceslot *dis;
|
||||
void *ptr;
|
||||
int id, err = 0;
|
||||
|
||||
sbi->total_blocks = sbi->primarydevice_blocks;
|
||||
if (!erofs_sb_has_device_table(sbi))
|
||||
ondisk_extradevs = 0;
|
||||
else
|
||||
ondisk_extradevs = le16_to_cpu(dsb->extra_devices);
|
||||
|
||||
if (ondisk_extradevs != sbi->devs->extra_devices) {
|
||||
erofs_err(sb, "extra devices don't match (ondisk %u, given %u)",
|
||||
ondisk_extradevs, sbi->devs->extra_devices);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (!ondisk_extradevs)
|
||||
return 0;
|
||||
|
||||
sbi->device_id_mask = roundup_pow_of_two(ondisk_extradevs + 1) - 1;
|
||||
pos = le16_to_cpu(dsb->devt_slotoff) * EROFS_DEVT_SLOT_SIZE;
|
||||
down_read(&sbi->devs->rwsem);
|
||||
idr_for_each_entry(&sbi->devs->tree, dif, id) {
|
||||
erofs_blk_t blk = erofs_blknr(pos);
|
||||
struct block_device *bdev;
|
||||
|
||||
if (!page || page->index != blk) {
|
||||
if (page) {
|
||||
kunmap(page);
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
page = erofs_get_meta_page(sb, blk);
|
||||
if (IS_ERR(page)) {
|
||||
up_read(&sbi->devs->rwsem);
|
||||
return PTR_ERR(page);
|
||||
}
|
||||
ptr = kmap(page);
|
||||
}
|
||||
dis = ptr + erofs_blkoff(pos);
|
||||
|
||||
bdev = blkdev_get_by_path(dif->path,
|
||||
FMODE_READ | FMODE_EXCL,
|
||||
sb->s_type);
|
||||
if (IS_ERR(bdev)) {
|
||||
err = PTR_ERR(bdev);
|
||||
goto err_out;
|
||||
}
|
||||
dif->bdev = bdev;
|
||||
dif->dax_dev = fs_dax_get_by_bdev(bdev);
|
||||
dif->blocks = le32_to_cpu(dis->blocks);
|
||||
dif->mapped_blkaddr = le32_to_cpu(dis->mapped_blkaddr);
|
||||
sbi->total_blocks += dif->blocks;
|
||||
pos += EROFS_DEVT_SLOT_SIZE;
|
||||
}
|
||||
err_out:
|
||||
up_read(&sbi->devs->rwsem);
|
||||
if (page) {
|
||||
kunmap(page);
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int erofs_read_superblock(struct super_block *sb)
|
||||
{
|
||||
struct erofs_sb_info *sbi;
|
||||
@@ -303,7 +379,7 @@ static int erofs_read_superblock(struct super_block *sb)
|
||||
sbi->sb_size);
|
||||
goto out;
|
||||
}
|
||||
sbi->blocks = le32_to_cpu(dsb->blocks);
|
||||
sbi->primarydevice_blocks = le32_to_cpu(dsb->blocks);
|
||||
sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr);
|
||||
#ifdef CONFIG_EROFS_FS_XATTR
|
||||
sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr);
|
||||
@@ -330,6 +406,11 @@ static int erofs_read_superblock(struct super_block *sb)
|
||||
ret = erofs_load_compr_cfgs(sb, dsb);
|
||||
else
|
||||
ret = z_erofs_load_lz4_config(sb, dsb, NULL, 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/* handle multiple devices */
|
||||
ret = erofs_init_devices(sb, dsb);
|
||||
out:
|
||||
kunmap(page);
|
||||
put_page(page);
|
||||
@@ -340,15 +421,15 @@ out:
|
||||
static void erofs_default_options(struct erofs_fs_context *ctx)
|
||||
{
|
||||
#ifdef CONFIG_EROFS_FS_ZIP
|
||||
ctx->cache_strategy = EROFS_ZIP_CACHE_READAROUND;
|
||||
ctx->max_sync_decompress_pages = 3;
|
||||
ctx->readahead_sync_decompress = false;
|
||||
ctx->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND;
|
||||
ctx->opt.max_sync_decompress_pages = 3;
|
||||
ctx->opt.readahead_sync_decompress = false;
|
||||
#endif
|
||||
#ifdef CONFIG_EROFS_FS_XATTR
|
||||
set_opt(ctx, XATTR_USER);
|
||||
set_opt(&ctx->opt, XATTR_USER);
|
||||
#endif
|
||||
#ifdef CONFIG_EROFS_FS_POSIX_ACL
|
||||
set_opt(ctx, POSIX_ACL);
|
||||
set_opt(&ctx->opt, POSIX_ACL);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -358,6 +439,7 @@ enum {
|
||||
Opt_cache_strategy,
|
||||
Opt_dax,
|
||||
Opt_dax_enum,
|
||||
Opt_device,
|
||||
Opt_err
|
||||
};
|
||||
|
||||
@@ -381,6 +463,7 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = {
|
||||
erofs_param_cache_strategy),
|
||||
fsparam_flag("dax", Opt_dax),
|
||||
fsparam_enum("dax", Opt_dax_enum, erofs_dax_param_enums),
|
||||
fsparam_string("device", Opt_device),
|
||||
{}
|
||||
};
|
||||
|
||||
@@ -392,12 +475,12 @@ static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode)
|
||||
switch (mode) {
|
||||
case EROFS_MOUNT_DAX_ALWAYS:
|
||||
warnfc(fc, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
|
||||
set_opt(ctx, DAX_ALWAYS);
|
||||
clear_opt(ctx, DAX_NEVER);
|
||||
set_opt(&ctx->opt, DAX_ALWAYS);
|
||||
clear_opt(&ctx->opt, DAX_NEVER);
|
||||
return true;
|
||||
case EROFS_MOUNT_DAX_NEVER:
|
||||
set_opt(ctx, DAX_NEVER);
|
||||
clear_opt(ctx, DAX_ALWAYS);
|
||||
set_opt(&ctx->opt, DAX_NEVER);
|
||||
clear_opt(&ctx->opt, DAX_ALWAYS);
|
||||
return true;
|
||||
default:
|
||||
DBG_BUGON(1);
|
||||
@@ -412,9 +495,10 @@ static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode)
|
||||
static int erofs_fc_parse_param(struct fs_context *fc,
|
||||
struct fs_parameter *param)
|
||||
{
|
||||
struct erofs_fs_context *ctx __maybe_unused = fc->fs_private;
|
||||
struct erofs_fs_context *ctx = fc->fs_private;
|
||||
struct fs_parse_result result;
|
||||
int opt;
|
||||
struct erofs_device_info *dif;
|
||||
int opt, ret;
|
||||
|
||||
opt = fs_parse(fc, erofs_fs_parameters, param, &result);
|
||||
if (opt < 0)
|
||||
@@ -424,9 +508,9 @@ static int erofs_fc_parse_param(struct fs_context *fc,
|
||||
case Opt_user_xattr:
|
||||
#ifdef CONFIG_EROFS_FS_XATTR
|
||||
if (result.boolean)
|
||||
set_opt(ctx, XATTR_USER);
|
||||
set_opt(&ctx->opt, XATTR_USER);
|
||||
else
|
||||
clear_opt(ctx, XATTR_USER);
|
||||
clear_opt(&ctx->opt, XATTR_USER);
|
||||
#else
|
||||
errorfc(fc, "{,no}user_xattr options not supported");
|
||||
#endif
|
||||
@@ -434,16 +518,16 @@ static int erofs_fc_parse_param(struct fs_context *fc,
|
||||
case Opt_acl:
|
||||
#ifdef CONFIG_EROFS_FS_POSIX_ACL
|
||||
if (result.boolean)
|
||||
set_opt(ctx, POSIX_ACL);
|
||||
set_opt(&ctx->opt, POSIX_ACL);
|
||||
else
|
||||
clear_opt(ctx, POSIX_ACL);
|
||||
clear_opt(&ctx->opt, POSIX_ACL);
|
||||
#else
|
||||
errorfc(fc, "{,no}acl options not supported");
|
||||
#endif
|
||||
break;
|
||||
case Opt_cache_strategy:
|
||||
#ifdef CONFIG_EROFS_FS_ZIP
|
||||
ctx->cache_strategy = result.uint_32;
|
||||
ctx->opt.cache_strategy = result.uint_32;
|
||||
#else
|
||||
errorfc(fc, "compression not supported, cache_strategy ignored");
|
||||
#endif
|
||||
@@ -456,6 +540,25 @@ static int erofs_fc_parse_param(struct fs_context *fc,
|
||||
if (!erofs_fc_set_dax_mode(fc, result.uint_32))
|
||||
return -EINVAL;
|
||||
break;
|
||||
case Opt_device:
|
||||
dif = kzalloc(sizeof(*dif), GFP_KERNEL);
|
||||
if (!dif)
|
||||
return -ENOMEM;
|
||||
dif->path = kstrdup(param->string, GFP_KERNEL);
|
||||
if (!dif->path) {
|
||||
kfree(dif);
|
||||
return -ENOMEM;
|
||||
}
|
||||
down_write(&ctx->devs->rwsem);
|
||||
ret = idr_alloc(&ctx->devs->tree, dif, 0, 0, GFP_KERNEL);
|
||||
up_write(&ctx->devs->rwsem);
|
||||
if (ret < 0) {
|
||||
kfree(dif->path);
|
||||
kfree(dif);
|
||||
return ret;
|
||||
}
|
||||
++ctx->devs->extra_devices;
|
||||
break;
|
||||
default:
|
||||
return -ENOPARAM;
|
||||
}
|
||||
@@ -540,15 +643,19 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
|
||||
return -ENOMEM;
|
||||
|
||||
sb->s_fs_info = sbi;
|
||||
sbi->opt = ctx->opt;
|
||||
sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
|
||||
sbi->devs = ctx->devs;
|
||||
ctx->devs = NULL;
|
||||
|
||||
err = erofs_read_superblock(sb);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (test_opt(ctx, DAX_ALWAYS) &&
|
||||
if (test_opt(&sbi->opt, DAX_ALWAYS) &&
|
||||
!bdev_dax_supported(sb->s_bdev, EROFS_BLKSIZ)) {
|
||||
errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
|
||||
clear_opt(ctx, DAX_ALWAYS);
|
||||
clear_opt(&sbi->opt, DAX_ALWAYS);
|
||||
}
|
||||
sb->s_flags |= SB_RDONLY | SB_NOATIME;
|
||||
sb->s_maxbytes = MAX_LFS_FILESIZE;
|
||||
@@ -557,13 +664,11 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
|
||||
sb->s_op = &erofs_sops;
|
||||
sb->s_xattr = erofs_xattr_handlers;
|
||||
|
||||
if (test_opt(ctx, POSIX_ACL))
|
||||
if (test_opt(&sbi->opt, POSIX_ACL))
|
||||
sb->s_flags |= SB_POSIXACL;
|
||||
else
|
||||
sb->s_flags &= ~SB_POSIXACL;
|
||||
|
||||
sbi->ctx = *ctx;
|
||||
|
||||
#ifdef CONFIG_EROFS_FS_ZIP
|
||||
xa_init(&sbi->managed_pslots);
|
||||
#endif
|
||||
@@ -607,20 +712,44 @@ static int erofs_fc_reconfigure(struct fs_context *fc)
|
||||
|
||||
DBG_BUGON(!sb_rdonly(sb));
|
||||
|
||||
if (test_opt(ctx, POSIX_ACL))
|
||||
if (test_opt(&ctx->opt, POSIX_ACL))
|
||||
fc->sb_flags |= SB_POSIXACL;
|
||||
else
|
||||
fc->sb_flags &= ~SB_POSIXACL;
|
||||
|
||||
sbi->ctx = *ctx;
|
||||
sbi->opt = ctx->opt;
|
||||
|
||||
fc->sb_flags |= SB_RDONLY;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int erofs_release_device_info(int id, void *ptr, void *data)
|
||||
{
|
||||
struct erofs_device_info *dif = ptr;
|
||||
|
||||
fs_put_dax(dif->dax_dev);
|
||||
if (dif->bdev)
|
||||
blkdev_put(dif->bdev, FMODE_READ | FMODE_EXCL);
|
||||
kfree(dif->path);
|
||||
kfree(dif);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void erofs_free_dev_context(struct erofs_dev_context *devs)
|
||||
{
|
||||
if (!devs)
|
||||
return;
|
||||
idr_for_each(&devs->tree, &erofs_release_device_info, NULL);
|
||||
idr_destroy(&devs->tree);
|
||||
kfree(devs);
|
||||
}
|
||||
|
||||
static void erofs_fc_free(struct fs_context *fc)
|
||||
{
|
||||
kfree(fc->fs_private);
|
||||
struct erofs_fs_context *ctx = fc->fs_private;
|
||||
|
||||
erofs_free_dev_context(ctx->devs);
|
||||
kfree(ctx);
|
||||
}
|
||||
|
||||
static const struct fs_context_operations erofs_context_ops = {
|
||||
@@ -632,15 +761,21 @@ static const struct fs_context_operations erofs_context_ops = {
|
||||
|
||||
static int erofs_init_fs_context(struct fs_context *fc)
|
||||
{
|
||||
fc->fs_private = kzalloc(sizeof(struct erofs_fs_context), GFP_KERNEL);
|
||||
if (!fc->fs_private)
|
||||
struct erofs_fs_context *ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
|
||||
|
||||
if (!ctx)
|
||||
return -ENOMEM;
|
||||
ctx->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL);
|
||||
if (!ctx->devs) {
|
||||
kfree(ctx);
|
||||
return -ENOMEM;
|
||||
}
|
||||
fc->fs_private = ctx;
|
||||
|
||||
/* set default mount options */
|
||||
erofs_default_options(fc->fs_private);
|
||||
|
||||
idr_init(&ctx->devs->tree);
|
||||
init_rwsem(&ctx->devs->rwsem);
|
||||
erofs_default_options(ctx);
|
||||
fc->ops = &erofs_context_ops;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -659,6 +794,8 @@ static void erofs_kill_sb(struct super_block *sb)
|
||||
sbi = EROFS_SB(sb);
|
||||
if (!sbi)
|
||||
return;
|
||||
|
||||
erofs_free_dev_context(sbi->devs);
|
||||
fs_put_dax(sbi->dax_dev);
|
||||
kfree(sbi);
|
||||
sb->s_fs_info = NULL;
|
||||
@@ -706,6 +843,10 @@ static int __init erofs_module_init(void)
|
||||
if (err)
|
||||
goto shrinker_err;
|
||||
|
||||
err = z_erofs_lzma_init();
|
||||
if (err)
|
||||
goto lzma_err;
|
||||
|
||||
erofs_pcpubuf_init();
|
||||
err = z_erofs_init_zip_subsystem();
|
||||
if (err)
|
||||
@@ -720,6 +861,8 @@ static int __init erofs_module_init(void)
|
||||
fs_err:
|
||||
z_erofs_exit_zip_subsystem();
|
||||
zip_err:
|
||||
z_erofs_lzma_exit();
|
||||
lzma_err:
|
||||
erofs_exit_shrinker();
|
||||
shrinker_err:
|
||||
kmem_cache_destroy(erofs_inode_cachep);
|
||||
@@ -730,11 +873,13 @@ icache_err:
|
||||
static void __exit erofs_module_exit(void)
|
||||
{
|
||||
unregister_filesystem(&erofs_fs_type);
|
||||
z_erofs_exit_zip_subsystem();
|
||||
erofs_exit_shrinker();
|
||||
|
||||
/* Ensure all RCU free inodes are safe before cache is destroyed. */
|
||||
/* Ensure all RCU free inodes / pclusters are safe to be destroyed. */
|
||||
rcu_barrier();
|
||||
|
||||
z_erofs_exit_zip_subsystem();
|
||||
z_erofs_lzma_exit();
|
||||
erofs_exit_shrinker();
|
||||
kmem_cache_destroy(erofs_inode_cachep);
|
||||
erofs_pcpubuf_exit();
|
||||
}
|
||||
@@ -748,7 +893,7 @@ static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
|
||||
|
||||
buf->f_type = sb->s_magic;
|
||||
buf->f_bsize = EROFS_BLKSIZ;
|
||||
buf->f_blocks = sbi->blocks;
|
||||
buf->f_blocks = sbi->total_blocks;
|
||||
buf->f_bfree = buf->f_bavail = 0;
|
||||
|
||||
buf->f_files = ULLONG_MAX;
|
||||
@@ -763,31 +908,31 @@ static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
|
||||
static int erofs_show_options(struct seq_file *seq, struct dentry *root)
|
||||
{
|
||||
struct erofs_sb_info *sbi = EROFS_SB(root->d_sb);
|
||||
struct erofs_fs_context *ctx = &sbi->ctx;
|
||||
struct erofs_mount_opts *opt = &sbi->opt;
|
||||
|
||||
#ifdef CONFIG_EROFS_FS_XATTR
|
||||
if (test_opt(ctx, XATTR_USER))
|
||||
if (test_opt(opt, XATTR_USER))
|
||||
seq_puts(seq, ",user_xattr");
|
||||
else
|
||||
seq_puts(seq, ",nouser_xattr");
|
||||
#endif
|
||||
#ifdef CONFIG_EROFS_FS_POSIX_ACL
|
||||
if (test_opt(ctx, POSIX_ACL))
|
||||
if (test_opt(opt, POSIX_ACL))
|
||||
seq_puts(seq, ",acl");
|
||||
else
|
||||
seq_puts(seq, ",noacl");
|
||||
#endif
|
||||
#ifdef CONFIG_EROFS_FS_ZIP
|
||||
if (ctx->cache_strategy == EROFS_ZIP_CACHE_DISABLED)
|
||||
if (opt->cache_strategy == EROFS_ZIP_CACHE_DISABLED)
|
||||
seq_puts(seq, ",cache_strategy=disabled");
|
||||
else if (ctx->cache_strategy == EROFS_ZIP_CACHE_READAHEAD)
|
||||
else if (opt->cache_strategy == EROFS_ZIP_CACHE_READAHEAD)
|
||||
seq_puts(seq, ",cache_strategy=readahead");
|
||||
else if (ctx->cache_strategy == EROFS_ZIP_CACHE_READAROUND)
|
||||
else if (opt->cache_strategy == EROFS_ZIP_CACHE_READAROUND)
|
||||
seq_puts(seq, ",cache_strategy=readaround");
|
||||
#endif
|
||||
if (test_opt(ctx, DAX_ALWAYS))
|
||||
if (test_opt(opt, DAX_ALWAYS))
|
||||
seq_puts(seq, ",dax=always");
|
||||
if (test_opt(ctx, DAX_NEVER))
|
||||
if (test_opt(opt, DAX_NEVER))
|
||||
seq_puts(seq, ",dax=never");
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -6,20 +6,29 @@
|
||||
#include "internal.h"
|
||||
#include <linux/pagevec.h>
|
||||
|
||||
struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp)
|
||||
struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp)
|
||||
{
|
||||
struct page *page;
|
||||
struct page *page = *pagepool;
|
||||
|
||||
if (!list_empty(pool)) {
|
||||
page = lru_to_page(pool);
|
||||
if (page) {
|
||||
DBG_BUGON(page_ref_count(page) != 1);
|
||||
list_del(&page->lru);
|
||||
*pagepool = (struct page *)page_private(page);
|
||||
} else {
|
||||
page = alloc_page(gfp);
|
||||
}
|
||||
return page;
|
||||
}
|
||||
|
||||
void erofs_release_pages(struct page **pagepool)
|
||||
{
|
||||
while (*pagepool) {
|
||||
struct page *page = *pagepool;
|
||||
|
||||
*pagepool = (struct page *)page_private(page);
|
||||
put_page(page);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_EROFS_FS_ZIP
|
||||
/* global shrink count (for all mounted EROFS instances) */
|
||||
static atomic_long_t erofs_global_shrink_cnt;
|
||||
|
||||
@@ -429,7 +429,7 @@ static int shared_getxattr(struct inode *inode, struct getxattr_iter *it)
|
||||
|
||||
static bool erofs_xattr_user_list(struct dentry *dentry)
|
||||
{
|
||||
return test_opt(&EROFS_SB(dentry->d_sb)->ctx, XATTR_USER);
|
||||
return test_opt(&EROFS_SB(dentry->d_sb)->opt, XATTR_USER);
|
||||
}
|
||||
|
||||
static bool erofs_xattr_trusted_list(struct dentry *dentry)
|
||||
@@ -477,7 +477,7 @@ static int erofs_xattr_generic_get(const struct xattr_handler *handler,
|
||||
|
||||
switch (handler->flags) {
|
||||
case EROFS_XATTR_INDEX_USER:
|
||||
if (!test_opt(&sbi->ctx, XATTR_USER))
|
||||
if (!test_opt(&sbi->opt, XATTR_USER))
|
||||
return -EOPNOTSUPP;
|
||||
break;
|
||||
case EROFS_XATTR_INDEX_TRUSTED:
|
||||
|
||||
112
fs/erofs/zdata.c
112
fs/erofs/zdata.c
@@ -96,16 +96,9 @@ static void z_erofs_free_pcluster(struct z_erofs_pcluster *pcl)
|
||||
DBG_BUGON(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* a compressed_pages[] placeholder in order to avoid
|
||||
* being filled with file pages for in-place decompression.
|
||||
*/
|
||||
#define PAGE_UNALLOCATED ((void *)0x5F0E4B1D)
|
||||
|
||||
/* how to allocate cached pages for a pcluster */
|
||||
enum z_erofs_cache_alloctype {
|
||||
DONTALLOC, /* don't allocate any cached pages */
|
||||
DELAYEDALLOC, /* delayed allocation (at the time of submitting io) */
|
||||
/*
|
||||
* try to use cached I/O if page allocation succeeds or fallback
|
||||
* to in-place I/O instead to avoid any direct reclaim.
|
||||
@@ -236,7 +229,7 @@ static DEFINE_MUTEX(z_pagemap_global_lock);
|
||||
static void preload_compressed_pages(struct z_erofs_collector *clt,
|
||||
struct address_space *mc,
|
||||
enum z_erofs_cache_alloctype type,
|
||||
struct list_head *pagepool)
|
||||
struct page **pagepool)
|
||||
{
|
||||
struct z_erofs_pcluster *pcl = clt->pcl;
|
||||
bool standalone = true;
|
||||
@@ -267,10 +260,6 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
|
||||
/* I/O is needed, no possible to decompress directly */
|
||||
standalone = false;
|
||||
switch (type) {
|
||||
case DELAYEDALLOC:
|
||||
t = tagptr_init(compressed_page_t,
|
||||
PAGE_UNALLOCATED);
|
||||
break;
|
||||
case TRYALLOC:
|
||||
newpage = erofs_allocpage(pagepool, gfp);
|
||||
if (!newpage)
|
||||
@@ -287,12 +276,10 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
|
||||
if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t)))
|
||||
continue;
|
||||
|
||||
if (page) {
|
||||
if (page)
|
||||
put_page(page);
|
||||
} else if (newpage) {
|
||||
set_page_private(newpage, 0);
|
||||
list_add(&newpage->lru, pagepool);
|
||||
}
|
||||
else if (newpage)
|
||||
erofs_pagepool_add(pagepool, newpage);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -373,8 +360,8 @@ static bool z_erofs_try_inplace_io(struct z_erofs_collector *clt,
|
||||
|
||||
/* callers must be with collection lock held */
|
||||
static int z_erofs_attach_page(struct z_erofs_collector *clt,
|
||||
struct page *page,
|
||||
enum z_erofs_page_type type)
|
||||
struct page *page, enum z_erofs_page_type type,
|
||||
bool pvec_safereuse)
|
||||
{
|
||||
int ret;
|
||||
|
||||
@@ -384,9 +371,9 @@ static int z_erofs_attach_page(struct z_erofs_collector *clt,
|
||||
z_erofs_try_inplace_io(clt, page))
|
||||
return 0;
|
||||
|
||||
ret = z_erofs_pagevec_enqueue(&clt->vector, page, type);
|
||||
ret = z_erofs_pagevec_enqueue(&clt->vector, page, type,
|
||||
pvec_safereuse);
|
||||
clt->cl->vcnt += (unsigned int)ret;
|
||||
|
||||
return ret ? 0 : -EAGAIN;
|
||||
}
|
||||
|
||||
@@ -476,6 +463,11 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
|
||||
struct erofs_workgroup *grp;
|
||||
int err;
|
||||
|
||||
if (!(map->m_flags & EROFS_MAP_ENCODED)) {
|
||||
DBG_BUGON(1);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
/* no available pcluster, let's allocate one */
|
||||
pcl = z_erofs_alloc_pcluster(map->m_plen >> PAGE_SHIFT);
|
||||
if (IS_ERR(pcl))
|
||||
@@ -483,16 +475,11 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
|
||||
|
||||
atomic_set(&pcl->obj.refcount, 1);
|
||||
pcl->obj.index = map->m_pa >> PAGE_SHIFT;
|
||||
|
||||
pcl->algorithmformat = map->m_algorithmformat;
|
||||
pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) |
|
||||
(map->m_flags & EROFS_MAP_FULL_MAPPED ?
|
||||
Z_EROFS_PCLUSTER_FULL_LENGTH : 0);
|
||||
|
||||
if (map->m_flags & EROFS_MAP_ZIPPED)
|
||||
pcl->algorithmformat = Z_EROFS_COMPRESSION_LZ4;
|
||||
else
|
||||
pcl->algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;
|
||||
|
||||
/* new pclusters should be claimed as type 1, primary and followed */
|
||||
pcl->next = clt->owned_head;
|
||||
clt->mode = COLLECT_PRIMARY_FOLLOWED;
|
||||
@@ -643,7 +630,7 @@ static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe,
|
||||
}
|
||||
|
||||
static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
|
||||
struct page *page, struct list_head *pagepool)
|
||||
struct page *page, struct page **pagepool)
|
||||
{
|
||||
struct inode *const inode = fe->inode;
|
||||
struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
|
||||
@@ -695,7 +682,7 @@ restart_now:
|
||||
goto err_out;
|
||||
|
||||
/* preload all compressed pages (maybe downgrade role if necessary) */
|
||||
if (should_alloc_managed_pages(fe, sbi->ctx.cache_strategy, map->m_la))
|
||||
if (should_alloc_managed_pages(fe, sbi->opt.cache_strategy, map->m_la))
|
||||
cache_strategy = TRYALLOC;
|
||||
else
|
||||
cache_strategy = DONTALLOC;
|
||||
@@ -729,7 +716,8 @@ hitted:
|
||||
tight &= (clt->mode >= COLLECT_PRIMARY_FOLLOWED);
|
||||
|
||||
retry:
|
||||
err = z_erofs_attach_page(clt, page, page_type);
|
||||
err = z_erofs_attach_page(clt, page, page_type,
|
||||
clt->mode >= COLLECT_PRIMARY_FOLLOWED);
|
||||
/* should allocate an additional short-lived page for pagevec */
|
||||
if (err == -EAGAIN) {
|
||||
struct page *const newpage =
|
||||
@@ -737,7 +725,7 @@ retry:
|
||||
|
||||
set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE);
|
||||
err = z_erofs_attach_page(clt, newpage,
|
||||
Z_EROFS_PAGE_TYPE_EXCLUSIVE);
|
||||
Z_EROFS_PAGE_TYPE_EXCLUSIVE, true);
|
||||
if (!err)
|
||||
goto retry;
|
||||
}
|
||||
@@ -796,7 +784,7 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
|
||||
/* Use workqueue and sync decompression for atomic contexts only */
|
||||
if (in_atomic() || irqs_disabled()) {
|
||||
queue_work(z_erofs_workqueue, &io->u.work);
|
||||
sbi->ctx.readahead_sync_decompress = true;
|
||||
sbi->opt.readahead_sync_decompress = true;
|
||||
return;
|
||||
}
|
||||
z_erofs_decompressqueue_work(&io->u.work);
|
||||
@@ -836,7 +824,7 @@ static void z_erofs_decompressqueue_endio(struct bio *bio)
|
||||
|
||||
static int z_erofs_decompress_pcluster(struct super_block *sb,
|
||||
struct z_erofs_pcluster *pcl,
|
||||
struct list_head *pagepool)
|
||||
struct page **pagepool)
|
||||
{
|
||||
struct erofs_sb_info *const sbi = EROFS_SB(sb);
|
||||
struct z_erofs_pagevec_ctor ctor;
|
||||
@@ -1036,7 +1024,7 @@ out:
|
||||
}
|
||||
|
||||
static void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io,
|
||||
struct list_head *pagepool)
|
||||
struct page **pagepool)
|
||||
{
|
||||
z_erofs_next_pcluster_t owned = io->head;
|
||||
|
||||
@@ -1060,18 +1048,18 @@ static void z_erofs_decompressqueue_work(struct work_struct *work)
|
||||
{
|
||||
struct z_erofs_decompressqueue *bgq =
|
||||
container_of(work, struct z_erofs_decompressqueue, u.work);
|
||||
LIST_HEAD(pagepool);
|
||||
struct page *pagepool = NULL;
|
||||
|
||||
DBG_BUGON(bgq->head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
|
||||
z_erofs_decompress_queue(bgq, &pagepool);
|
||||
|
||||
put_pages_list(&pagepool);
|
||||
erofs_release_pages(&pagepool);
|
||||
kvfree(bgq);
|
||||
}
|
||||
|
||||
static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
|
||||
unsigned int nr,
|
||||
struct list_head *pagepool,
|
||||
struct page **pagepool,
|
||||
struct address_space *mc,
|
||||
gfp_t gfp)
|
||||
{
|
||||
@@ -1091,15 +1079,6 @@ repeat:
|
||||
if (!page)
|
||||
goto out_allocpage;
|
||||
|
||||
/*
|
||||
* the cached page has not been allocated and
|
||||
* an placeholder is out there, prepare it now.
|
||||
*/
|
||||
if (page == PAGE_UNALLOCATED) {
|
||||
tocache = true;
|
||||
goto out_allocpage;
|
||||
}
|
||||
|
||||
/* process the target tagged pointer */
|
||||
t = tagptr_init(compressed_page_t, page);
|
||||
justfound = tagptr_unfold_tags(t);
|
||||
@@ -1173,7 +1152,7 @@ repeat:
|
||||
out_allocpage:
|
||||
page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL);
|
||||
if (oldpage != cmpxchg(&pcl->compressed_pages[nr], oldpage, page)) {
|
||||
list_add(&page->lru, pagepool);
|
||||
erofs_pagepool_add(pagepool, page);
|
||||
cond_resched();
|
||||
goto repeat;
|
||||
}
|
||||
@@ -1257,7 +1236,7 @@ static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl,
|
||||
|
||||
static void z_erofs_submit_queue(struct super_block *sb,
|
||||
struct z_erofs_decompress_frontend *f,
|
||||
struct list_head *pagepool,
|
||||
struct page **pagepool,
|
||||
struct z_erofs_decompressqueue *fgq,
|
||||
bool *force_fg)
|
||||
{
|
||||
@@ -1266,8 +1245,9 @@ static void z_erofs_submit_queue(struct super_block *sb,
|
||||
struct z_erofs_decompressqueue *q[NR_JOBQUEUES];
|
||||
void *bi_private;
|
||||
z_erofs_next_pcluster_t owned_head = f->clt.owned_head;
|
||||
/* since bio will be NULL, no need to initialize last_index */
|
||||
/* bio is NULL initially, so no need to initialize last_{index,bdev} */
|
||||
pgoff_t last_index;
|
||||
struct block_device *last_bdev;
|
||||
unsigned int nr_bios = 0;
|
||||
struct bio *bio = NULL;
|
||||
|
||||
@@ -1279,6 +1259,7 @@ static void z_erofs_submit_queue(struct super_block *sb,
|
||||
q[JQ_SUBMIT]->head = owned_head;
|
||||
|
||||
do {
|
||||
struct erofs_map_dev mdev;
|
||||
struct z_erofs_pcluster *pcl;
|
||||
pgoff_t cur, end;
|
||||
unsigned int i = 0;
|
||||
@@ -1290,7 +1271,13 @@ static void z_erofs_submit_queue(struct super_block *sb,
|
||||
|
||||
pcl = container_of(owned_head, struct z_erofs_pcluster, next);
|
||||
|
||||
cur = pcl->obj.index;
|
||||
/* no device id here, thus it will always succeed */
|
||||
mdev = (struct erofs_map_dev) {
|
||||
.m_pa = blknr_to_addr(pcl->obj.index),
|
||||
};
|
||||
(void)erofs_map_dev(sb, &mdev);
|
||||
|
||||
cur = erofs_blknr(mdev.m_pa);
|
||||
end = cur + pcl->pclusterpages;
|
||||
|
||||
/* close the main owned chain at first */
|
||||
@@ -1306,7 +1293,8 @@ static void z_erofs_submit_queue(struct super_block *sb,
|
||||
if (!page)
|
||||
continue;
|
||||
|
||||
if (bio && cur != last_index + 1) {
|
||||
if (bio && (cur != last_index + 1 ||
|
||||
last_bdev != mdev.m_bdev)) {
|
||||
submit_bio_retry:
|
||||
submit_bio(bio);
|
||||
bio = NULL;
|
||||
@@ -1316,7 +1304,9 @@ submit_bio_retry:
|
||||
bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
|
||||
|
||||
bio->bi_end_io = z_erofs_decompressqueue_endio;
|
||||
bio_set_dev(bio, sb->s_bdev);
|
||||
|
||||
bio_set_dev(bio, mdev.m_bdev);
|
||||
last_bdev = mdev.m_bdev;
|
||||
bio->bi_iter.bi_sector = (sector_t)cur <<
|
||||
LOG_SECTORS_PER_BLOCK;
|
||||
bio->bi_private = bi_private;
|
||||
@@ -1355,7 +1345,7 @@ submit_bio_retry:
|
||||
|
||||
static void z_erofs_runqueue(struct super_block *sb,
|
||||
struct z_erofs_decompress_frontend *f,
|
||||
struct list_head *pagepool, bool force_fg)
|
||||
struct page **pagepool, bool force_fg)
|
||||
{
|
||||
struct z_erofs_decompressqueue io[NR_JOBQUEUES];
|
||||
|
||||
@@ -1381,8 +1371,8 @@ static int z_erofs_readpage(struct file *file, struct page *page)
|
||||
{
|
||||
struct inode *const inode = page->mapping->host;
|
||||
struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
|
||||
struct page *pagepool = NULL;
|
||||
int err;
|
||||
LIST_HEAD(pagepool);
|
||||
|
||||
trace_erofs_readpage(page, false);
|
||||
|
||||
@@ -1400,8 +1390,7 @@ static int z_erofs_readpage(struct file *file, struct page *page)
|
||||
if (f.map.mpage)
|
||||
put_page(f.map.mpage);
|
||||
|
||||
/* clean up the remaining free pages */
|
||||
put_pages_list(&pagepool);
|
||||
erofs_release_pages(&pagepool);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -1411,11 +1400,10 @@ static void z_erofs_readahead(struct readahead_control *rac)
|
||||
struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
|
||||
|
||||
unsigned int nr_pages = readahead_count(rac);
|
||||
bool sync = (sbi->ctx.readahead_sync_decompress &&
|
||||
nr_pages <= sbi->ctx.max_sync_decompress_pages);
|
||||
bool sync = (sbi->opt.readahead_sync_decompress &&
|
||||
nr_pages <= sbi->opt.max_sync_decompress_pages);
|
||||
struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
|
||||
struct page *page, *head = NULL;
|
||||
LIST_HEAD(pagepool);
|
||||
struct page *pagepool = NULL, *head = NULL, *page;
|
||||
|
||||
trace_erofs_readpages(inode, readahead_index(rac), nr_pages, false);
|
||||
|
||||
@@ -1457,9 +1445,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
|
||||
|
||||
if (f.map.mpage)
|
||||
put_page(f.map.mpage);
|
||||
|
||||
/* clean up the remaining free pages */
|
||||
put_pages_list(&pagepool);
|
||||
erofs_release_pages(&pagepool);
|
||||
}
|
||||
|
||||
const struct address_space_operations z_erofs_aops = {
|
||||
|
||||
@@ -94,13 +94,6 @@ struct z_erofs_decompressqueue {
|
||||
} u;
|
||||
};
|
||||
|
||||
#define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping)
|
||||
static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi,
|
||||
struct page *page)
|
||||
{
|
||||
return page->mapping == MNGD_MAPPING(sbi);
|
||||
}
|
||||
|
||||
#define Z_EROFS_ONLINEPAGE_COUNT_BITS 2
|
||||
#define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1)
|
||||
#define Z_EROFS_ONLINEPAGE_INDEX_SHIFT (Z_EROFS_ONLINEPAGE_COUNT_BITS)
|
||||
@@ -186,4 +179,3 @@ static inline void z_erofs_onlinepage_endio(struct page *page)
|
||||
#define Z_EROFS_VMAP_GLOBAL_PAGES 2048
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
|
||||
{
|
||||
struct erofs_inode *const vi = EROFS_I(inode);
|
||||
struct super_block *const sb = inode->i_sb;
|
||||
int err;
|
||||
int err, headnr;
|
||||
erofs_off_t pos;
|
||||
struct page *page;
|
||||
void *kaddr;
|
||||
@@ -68,9 +68,11 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
|
||||
vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
|
||||
vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
|
||||
|
||||
if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX) {
|
||||
erofs_err(sb, "unknown compression format %u for nid %llu, please upgrade kernel",
|
||||
vi->z_algorithmtype[0], vi->nid);
|
||||
headnr = 0;
|
||||
if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX ||
|
||||
vi->z_algorithmtype[++headnr] >= Z_EROFS_COMPRESSION_MAX) {
|
||||
erofs_err(sb, "unknown HEAD%u format %u for nid %llu, please upgrade kernel",
|
||||
headnr + 1, vi->z_algorithmtype[headnr], vi->nid);
|
||||
err = -EOPNOTSUPP;
|
||||
goto unmap_done;
|
||||
}
|
||||
@@ -111,7 +113,7 @@ struct z_erofs_maprecorder {
|
||||
|
||||
unsigned long lcn;
|
||||
/* compression extent information gathered */
|
||||
u8 type;
|
||||
u8 type, headtype;
|
||||
u16 clusterofs;
|
||||
u16 delta[2];
|
||||
erofs_blk_t pblk, compressedlcs;
|
||||
@@ -178,7 +180,8 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
|
||||
m->clusterofs = 1 << vi->z_logical_clusterbits;
|
||||
m->delta[0] = le16_to_cpu(di->di_u.delta[0]);
|
||||
if (m->delta[0] & Z_EROFS_VLE_DI_D0_CBLKCNT) {
|
||||
if (!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) {
|
||||
if (!(vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 |
|
||||
Z_EROFS_ADVISE_BIG_PCLUSTER_2))) {
|
||||
DBG_BUGON(1);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
@@ -189,7 +192,8 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
|
||||
m->delta[1] = le16_to_cpu(di->di_u.delta[1]);
|
||||
break;
|
||||
case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
|
||||
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
|
||||
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1:
|
||||
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2:
|
||||
m->clusterofs = le16_to_cpu(di->di_clusterofs);
|
||||
m->pblk = le32_to_cpu(di->di_u.blkaddr);
|
||||
break;
|
||||
@@ -445,9 +449,9 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
|
||||
}
|
||||
return z_erofs_extent_lookback(m, m->delta[0]);
|
||||
case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
|
||||
map->m_flags &= ~EROFS_MAP_ZIPPED;
|
||||
fallthrough;
|
||||
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
|
||||
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1:
|
||||
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2:
|
||||
m->headtype = m->type;
|
||||
map->m_la = (lcn << lclusterbits) | m->clusterofs;
|
||||
break;
|
||||
default:
|
||||
@@ -470,13 +474,18 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
|
||||
int err;
|
||||
|
||||
DBG_BUGON(m->type != Z_EROFS_VLE_CLUSTER_TYPE_PLAIN &&
|
||||
m->type != Z_EROFS_VLE_CLUSTER_TYPE_HEAD);
|
||||
if (!(map->m_flags & EROFS_MAP_ZIPPED) ||
|
||||
!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) {
|
||||
m->type != Z_EROFS_VLE_CLUSTER_TYPE_HEAD1 &&
|
||||
m->type != Z_EROFS_VLE_CLUSTER_TYPE_HEAD2);
|
||||
DBG_BUGON(m->type != m->headtype);
|
||||
|
||||
if (m->headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN ||
|
||||
((m->headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD1) &&
|
||||
!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) ||
|
||||
((m->headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) &&
|
||||
!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2))) {
|
||||
map->m_plen = 1 << lclusterbits;
|
||||
return 0;
|
||||
}
|
||||
|
||||
lcn = m->lcn + 1;
|
||||
if (m->compressedlcs)
|
||||
goto out;
|
||||
@@ -498,7 +507,8 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
|
||||
|
||||
switch (m->type) {
|
||||
case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
|
||||
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
|
||||
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1:
|
||||
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2:
|
||||
/*
|
||||
* if the 1st NONHEAD lcluster is actually PLAIN or HEAD type
|
||||
* rather than CBLKCNT, it's a 1 lcluster-sized pcluster.
|
||||
@@ -553,7 +563,8 @@ static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m)
|
||||
DBG_BUGON(!m->delta[1] &&
|
||||
m->clusterofs != 1 << lclusterbits);
|
||||
} else if (m->type == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN ||
|
||||
m->type == Z_EROFS_VLE_CLUSTER_TYPE_HEAD) {
|
||||
m->type == Z_EROFS_VLE_CLUSTER_TYPE_HEAD1 ||
|
||||
m->type == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) {
|
||||
/* go on until the next HEAD lcluster */
|
||||
if (lcn != headlcn)
|
||||
break;
|
||||
@@ -608,16 +619,15 @@ int z_erofs_map_blocks_iter(struct inode *inode,
|
||||
if (err)
|
||||
goto unmap_out;
|
||||
|
||||
map->m_flags = EROFS_MAP_ZIPPED; /* by default, compressed */
|
||||
map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED;
|
||||
end = (m.lcn + 1ULL) << lclusterbits;
|
||||
|
||||
switch (m.type) {
|
||||
case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
|
||||
if (endoff >= m.clusterofs)
|
||||
map->m_flags &= ~EROFS_MAP_ZIPPED;
|
||||
fallthrough;
|
||||
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
|
||||
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1:
|
||||
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2:
|
||||
if (endoff >= m.clusterofs) {
|
||||
m.headtype = m.type;
|
||||
map->m_la = (m.lcn << lclusterbits) | m.clusterofs;
|
||||
break;
|
||||
}
|
||||
@@ -649,13 +659,22 @@ int z_erofs_map_blocks_iter(struct inode *inode,
|
||||
|
||||
map->m_llen = end - map->m_la;
|
||||
map->m_pa = blknr_to_addr(m.pblk);
|
||||
map->m_flags |= EROFS_MAP_MAPPED;
|
||||
|
||||
err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (flags & EROFS_GET_BLOCKS_FIEMAP) {
|
||||
if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN)
|
||||
map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;
|
||||
else if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2)
|
||||
map->m_algorithmformat = vi->z_algorithmtype[1];
|
||||
else
|
||||
map->m_algorithmformat = vi->z_algorithmtype[0];
|
||||
|
||||
if ((flags & EROFS_GET_BLOCKS_FIEMAP) ||
|
||||
((flags & EROFS_GET_BLOCKS_READMORE) &&
|
||||
map->m_algorithmformat == Z_EROFS_COMPRESSION_LZMA &&
|
||||
map->m_llen >= EROFS_BLKSIZ)) {
|
||||
err = z_erofs_get_extent_decompressedlen(&m);
|
||||
if (!err)
|
||||
map->m_flags |= EROFS_MAP_FULL_MAPPED;
|
||||
|
||||
@@ -106,11 +106,18 @@ static inline void z_erofs_pagevec_ctor_init(struct z_erofs_pagevec_ctor *ctor,
|
||||
|
||||
static inline bool z_erofs_pagevec_enqueue(struct z_erofs_pagevec_ctor *ctor,
|
||||
struct page *page,
|
||||
enum z_erofs_page_type type)
|
||||
enum z_erofs_page_type type,
|
||||
bool pvec_safereuse)
|
||||
{
|
||||
if (!ctor->next && type)
|
||||
if (ctor->index + 1 == ctor->nr)
|
||||
if (!ctor->next) {
|
||||
/* some pages cannot be reused as pvec safely without I/O */
|
||||
if (type == Z_EROFS_PAGE_TYPE_EXCLUSIVE && !pvec_safereuse)
|
||||
type = Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED;
|
||||
|
||||
if (type != Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
|
||||
ctor->index + 1 == ctor->nr)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ctor->index >= ctor->nr)
|
||||
z_erofs_pagevec_ctor_pagedown(ctor, false);
|
||||
|
||||
@@ -1599,7 +1599,6 @@ static const struct fscrypt_operations ext4_cryptops = {
|
||||
.set_context = ext4_set_context,
|
||||
.get_dummy_policy = ext4_get_dummy_policy,
|
||||
.empty_dir = ext4_empty_dir,
|
||||
.max_namelen = EXT4_NAME_LEN,
|
||||
.has_stable_inodes = ext4_has_stable_inodes,
|
||||
.get_ino_and_lblk_bits = ext4_get_ino_and_lblk_bits,
|
||||
};
|
||||
|
||||
@@ -3004,7 +3004,6 @@ static const struct fscrypt_operations f2fs_cryptops = {
|
||||
.set_context = f2fs_set_context,
|
||||
.get_dummy_policy = f2fs_get_dummy_policy,
|
||||
.empty_dir = f2fs_empty_dir,
|
||||
.max_namelen = F2FS_NAME_LEN,
|
||||
.has_stable_inodes = f2fs_has_stable_inodes,
|
||||
.get_ino_and_lblk_bits = f2fs_get_ino_and_lblk_bits,
|
||||
.get_num_devices = f2fs_get_num_devices,
|
||||
|
||||
@@ -82,5 +82,4 @@ const struct fscrypt_operations ubifs_crypt_operations = {
|
||||
.get_context = ubifs_crypt_get_context,
|
||||
.set_context = ubifs_crypt_set_context,
|
||||
.empty_dir = ubifs_crypt_empty_dir,
|
||||
.max_namelen = UBIFS_MAX_NLEN,
|
||||
};
|
||||
|
||||
@@ -47,27 +47,125 @@ struct fscrypt_name {
|
||||
#define FSCRYPT_SET_CONTEXT_MAX_SIZE 40
|
||||
|
||||
#ifdef CONFIG_FS_ENCRYPTION
|
||||
|
||||
/*
|
||||
* fscrypt superblock flags
|
||||
* If set, the fscrypt bounce page pool won't be allocated (unless another
|
||||
* filesystem needs it). Set this if the filesystem always uses its own bounce
|
||||
* pages for writes and therefore won't need the fscrypt bounce page pool.
|
||||
*/
|
||||
#define FS_CFLG_OWN_PAGES (1U << 1)
|
||||
|
||||
/*
|
||||
* crypto operations for filesystems
|
||||
*/
|
||||
/* Crypto operations for filesystems */
|
||||
struct fscrypt_operations {
|
||||
|
||||
/* Set of optional flags; see above for allowed flags */
|
||||
unsigned int flags;
|
||||
|
||||
/*
|
||||
* If set, this is a filesystem-specific key description prefix that
|
||||
* will be accepted for "logon" keys for v1 fscrypt policies, in
|
||||
* addition to the generic prefix "fscrypt:". This functionality is
|
||||
* deprecated, so new filesystems shouldn't set this field.
|
||||
*/
|
||||
const char *key_prefix;
|
||||
|
||||
/*
|
||||
* Get the fscrypt context of the given inode.
|
||||
*
|
||||
* @inode: the inode whose context to get
|
||||
* @ctx: the buffer into which to get the context
|
||||
* @len: length of the @ctx buffer in bytes
|
||||
*
|
||||
* Return: On success, returns the length of the context in bytes; this
|
||||
* may be less than @len. On failure, returns -ENODATA if the
|
||||
* inode doesn't have a context, -ERANGE if the context is
|
||||
* longer than @len, or another -errno code.
|
||||
*/
|
||||
int (*get_context)(struct inode *inode, void *ctx, size_t len);
|
||||
|
||||
/*
|
||||
* Set an fscrypt context on the given inode.
|
||||
*
|
||||
* @inode: the inode whose context to set. The inode won't already have
|
||||
* an fscrypt context.
|
||||
* @ctx: the context to set
|
||||
* @len: length of @ctx in bytes (at most FSCRYPT_SET_CONTEXT_MAX_SIZE)
|
||||
* @fs_data: If called from fscrypt_set_context(), this will be the
|
||||
* value the filesystem passed to fscrypt_set_context().
|
||||
* Otherwise (i.e. when called from
|
||||
* FS_IOC_SET_ENCRYPTION_POLICY) this will be NULL.
|
||||
*
|
||||
* i_rwsem will be held for write.
|
||||
*
|
||||
* Return: 0 on success, -errno on failure.
|
||||
*/
|
||||
int (*set_context)(struct inode *inode, const void *ctx, size_t len,
|
||||
void *fs_data);
|
||||
|
||||
/*
|
||||
* Get the dummy fscrypt policy in use on the filesystem (if any).
|
||||
*
|
||||
* Filesystems only need to implement this function if they support the
|
||||
* test_dummy_encryption mount option.
|
||||
*
|
||||
* Return: A pointer to the dummy fscrypt policy, if the filesystem is
|
||||
* mounted with test_dummy_encryption; otherwise NULL.
|
||||
*/
|
||||
const union fscrypt_policy *(*get_dummy_policy)(struct super_block *sb);
|
||||
|
||||
/*
|
||||
* Check whether a directory is empty. i_rwsem will be held for write.
|
||||
*/
|
||||
bool (*empty_dir)(struct inode *inode);
|
||||
unsigned int max_namelen;
|
||||
|
||||
/*
|
||||
* Check whether the filesystem's inode numbers and UUID are stable,
|
||||
* meaning that they will never be changed even by offline operations
|
||||
* such as filesystem shrinking and therefore can be used in the
|
||||
* encryption without the possibility of files becoming unreadable.
|
||||
*
|
||||
* Filesystems only need to implement this function if they want to
|
||||
* support the FSCRYPT_POLICY_FLAG_IV_INO_LBLK_{32,64} flags. These
|
||||
* flags are designed to work around the limitations of UFS and eMMC
|
||||
* inline crypto hardware, and they shouldn't be used in scenarios where
|
||||
* such hardware isn't being used.
|
||||
*
|
||||
* Leaving this NULL is equivalent to always returning false.
|
||||
*/
|
||||
bool (*has_stable_inodes)(struct super_block *sb);
|
||||
|
||||
/*
|
||||
* Get the number of bits that the filesystem uses to represent inode
|
||||
* numbers and file logical block numbers.
|
||||
*
|
||||
* By default, both of these are assumed to be 64-bit. This function
|
||||
* can be implemented to declare that either or both of these numbers is
|
||||
* shorter, which may allow the use of the
|
||||
* FSCRYPT_POLICY_FLAG_IV_INO_LBLK_{32,64} flags and/or the use of
|
||||
* inline crypto hardware whose maximum DUN length is less than 64 bits
|
||||
* (e.g., eMMC v5.2 spec compliant hardware). This function only needs
|
||||
* to be implemented if support for one of these features is needed.
|
||||
*/
|
||||
void (*get_ino_and_lblk_bits)(struct super_block *sb,
|
||||
int *ino_bits_ret, int *lblk_bits_ret);
|
||||
|
||||
/*
|
||||
* Return the number of block devices to which the filesystem may write
|
||||
* encrypted file contents.
|
||||
*
|
||||
* If the filesystem can use multiple block devices (other than block
|
||||
* devices that aren't used for encrypted file contents, such as
|
||||
* external journal devices), and wants to support inline encryption,
|
||||
* then it must implement this function. Otherwise it's not needed.
|
||||
*/
|
||||
int (*get_num_devices)(struct super_block *sb);
|
||||
|
||||
/*
|
||||
* If ->get_num_devices() returns a value greater than 1, then this
|
||||
* function is called to get the array of request_queues that the
|
||||
* filesystem is using -- one per block device. (There may be duplicate
|
||||
* entries in this array, as block devices can share a request_queue.)
|
||||
*/
|
||||
void (*get_devices)(struct super_block *sb,
|
||||
struct request_queue **devs);
|
||||
|
||||
|
||||
@@ -233,6 +233,112 @@ XZ_EXTERN void xz_dec_reset(struct xz_dec *s);
|
||||
*/
|
||||
XZ_EXTERN void xz_dec_end(struct xz_dec *s);
|
||||
|
||||
/*
|
||||
* Decompressor for MicroLZMA, an LZMA variant with a very minimal header.
|
||||
* See xz_dec_microlzma_alloc() below for details.
|
||||
*
|
||||
* These functions aren't used or available in preboot code and thus aren't
|
||||
* marked with XZ_EXTERN. This avoids warnings about static functions that
|
||||
* are never defined.
|
||||
*/
|
||||
/**
|
||||
* struct xz_dec_microlzma - Opaque type to hold the MicroLZMA decoder state
|
||||
*/
|
||||
struct xz_dec_microlzma;
|
||||
|
||||
/**
|
||||
* xz_dec_microlzma_alloc() - Allocate memory for the MicroLZMA decoder
|
||||
* @mode XZ_SINGLE or XZ_PREALLOC
|
||||
* @dict_size LZMA dictionary size. This must be at least 4 KiB and
|
||||
* at most 3 GiB.
|
||||
*
|
||||
* In contrast to xz_dec_init(), this function only allocates the memory
|
||||
* and remembers the dictionary size. xz_dec_microlzma_reset() must be used
|
||||
* before calling xz_dec_microlzma_run().
|
||||
*
|
||||
* The amount of allocated memory is a little less than 30 KiB with XZ_SINGLE.
|
||||
* With XZ_PREALLOC also a dictionary buffer of dict_size bytes is allocated.
|
||||
*
|
||||
* On success, xz_dec_microlzma_alloc() returns a pointer to
|
||||
* struct xz_dec_microlzma. If memory allocation fails or
|
||||
* dict_size is invalid, NULL is returned.
|
||||
*
|
||||
* The compressed format supported by this decoder is a raw LZMA stream
|
||||
* whose first byte (always 0x00) has been replaced with bitwise-negation
|
||||
* of the LZMA properties (lc/lp/pb) byte. For example, if lc/lp/pb is
|
||||
* 3/0/2, the first byte is 0xA2. This way the first byte can never be 0x00.
|
||||
* Just like with LZMA2, lc + lp <= 4 must be true. The LZMA end-of-stream
|
||||
* marker must not be used. The unused values are reserved for future use.
|
||||
* This MicroLZMA header format was created for use in EROFS but may be used
|
||||
* by others too.
|
||||
*/
|
||||
extern struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode,
|
||||
uint32_t dict_size);
|
||||
|
||||
/**
|
||||
* xz_dec_microlzma_reset() - Reset the MicroLZMA decoder state
|
||||
* @s Decoder state allocated using xz_dec_microlzma_alloc()
|
||||
* @comp_size Compressed size of the input stream
|
||||
* @uncomp_size Uncompressed size of the input stream. A value smaller
|
||||
* than the real uncompressed size of the input stream can
|
||||
* be specified if uncomp_size_is_exact is set to false.
|
||||
* uncomp_size can never be set to a value larger than the
|
||||
* expected real uncompressed size because it would eventually
|
||||
* result in XZ_DATA_ERROR.
|
||||
* @uncomp_size_is_exact This is an int instead of bool to avoid
|
||||
* requiring stdbool.h. This should normally be set to true.
|
||||
* When this is set to false, error detection is weaker.
|
||||
*/
|
||||
extern void xz_dec_microlzma_reset(struct xz_dec_microlzma *s,
|
||||
uint32_t comp_size, uint32_t uncomp_size,
|
||||
int uncomp_size_is_exact);
|
||||
|
||||
/**
|
||||
* xz_dec_microlzma_run() - Run the MicroLZMA decoder
|
||||
* @s Decoder state initialized using xz_dec_microlzma_reset()
|
||||
* @b: Input and output buffers
|
||||
*
|
||||
* This works similarly to xz_dec_run() with a few important differences.
|
||||
* Only the differences are documented here.
|
||||
*
|
||||
* The only possible return values are XZ_OK, XZ_STREAM_END, and
|
||||
* XZ_DATA_ERROR. This function cannot return XZ_BUF_ERROR: if no progress
|
||||
* is possible due to lack of input data or output space, this function will
|
||||
* keep returning XZ_OK. Thus, the calling code must be written so that it
|
||||
* will eventually provide input and output space matching (or exceeding)
|
||||
* comp_size and uncomp_size arguments given to xz_dec_microlzma_reset().
|
||||
* If the caller cannot do this (for example, if the input file is truncated
|
||||
* or otherwise corrupt), the caller must detect this error by itself to
|
||||
* avoid an infinite loop.
|
||||
*
|
||||
* If the compressed data seems to be corrupt, XZ_DATA_ERROR is returned.
|
||||
* This can happen also when incorrect dictionary, uncompressed, or
|
||||
* compressed sizes have been specified.
|
||||
*
|
||||
* With XZ_PREALLOC only: As an extra feature, b->out may be NULL to skip over
|
||||
* uncompressed data. This way the caller doesn't need to provide a temporary
|
||||
* output buffer for the bytes that will be ignored.
|
||||
*
|
||||
* With XZ_SINGLE only: In contrast to xz_dec_run(), the return value XZ_OK
|
||||
* is also possible and thus XZ_SINGLE is actually a limited multi-call mode.
|
||||
* After XZ_OK the bytes decoded so far may be read from the output buffer.
|
||||
* It is possible to continue decoding but the variables b->out and b->out_pos
|
||||
* MUST NOT be changed by the caller. Increasing the value of b->out_size is
|
||||
* allowed to make more output space available; one doesn't need to provide
|
||||
* space for the whole uncompressed data on the first call. The input buffer
|
||||
* may be changed normally like with XZ_PREALLOC. This way input data can be
|
||||
* provided from non-contiguous memory.
|
||||
*/
|
||||
extern enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s,
|
||||
struct xz_buf *b);
|
||||
|
||||
/**
|
||||
* xz_dec_microlzma_end() - Free the memory allocated for the decoder state
|
||||
* @s: Decoder state allocated using xz_dec_microlzma_alloc().
|
||||
* If s is NULL, this function does nothing.
|
||||
*/
|
||||
extern void xz_dec_microlzma_end(struct xz_dec_microlzma *s);
|
||||
|
||||
/*
|
||||
* Standalone build (userspace build or in-kernel build for boot time use)
|
||||
* needs a CRC32 implementation. For normal in-kernel use, kernel's own
|
||||
|
||||
@@ -24,7 +24,7 @@ struct erofs_map_blocks;
|
||||
#define show_mflags(flags) __print_flags(flags, "", \
|
||||
{ EROFS_MAP_MAPPED, "M" }, \
|
||||
{ EROFS_MAP_META, "I" }, \
|
||||
{ EROFS_MAP_ZIPPED, "Z" })
|
||||
{ EROFS_MAP_ENCODED, "E" })
|
||||
|
||||
TRACE_EVENT(erofs_lookup,
|
||||
|
||||
|
||||
@@ -20,8 +20,8 @@
|
||||
*
|
||||
* The worst case for in-place decompression is that the beginning of
|
||||
* the file is compressed extremely well, and the rest of the file is
|
||||
* uncompressible. Thus, we must look for worst-case expansion when the
|
||||
* compressor is encoding uncompressible data.
|
||||
* incompressible. Thus, we must look for worst-case expansion when the
|
||||
* compressor is encoding incompressible data.
|
||||
*
|
||||
* The structure of the .xz file in case of a compresed kernel is as follows.
|
||||
* Sizes (as bytes) of the fields are in parenthesis.
|
||||
@@ -58,7 +58,7 @@
|
||||
* uncompressed size of the payload is in practice never less than the
|
||||
* payload size itself. The LZMA2 format would allow uncompressed size
|
||||
* to be less than the payload size, but no sane compressor creates such
|
||||
* files. LZMA2 supports storing uncompressible data in uncompressed form,
|
||||
* files. LZMA2 supports storing incompressible data in uncompressed form,
|
||||
* so there's never a need to create payloads whose uncompressed size is
|
||||
* smaller than the compressed size.
|
||||
*
|
||||
|
||||
@@ -39,6 +39,19 @@ config XZ_DEC_SPARC
|
||||
default y
|
||||
select XZ_DEC_BCJ
|
||||
|
||||
config XZ_DEC_MICROLZMA
|
||||
bool "MicroLZMA decoder"
|
||||
default n
|
||||
help
|
||||
MicroLZMA is a header format variant where the first byte
|
||||
of a raw LZMA stream (without the end of stream marker) has
|
||||
been replaced with a bitwise-negation of the lc/lp/pb
|
||||
properties byte. MicroLZMA was created to be used in EROFS
|
||||
but can be used by other things too where wasting minimal
|
||||
amount of space for headers is important.
|
||||
|
||||
Unless you know that you need this, say N.
|
||||
|
||||
endif
|
||||
|
||||
config XZ_DEC_BCJ
|
||||
|
||||
@@ -248,6 +248,10 @@ struct lzma2_dec {
|
||||
* before the first LZMA chunk.
|
||||
*/
|
||||
bool need_props;
|
||||
|
||||
#ifdef XZ_DEC_MICROLZMA
|
||||
bool pedantic_microlzma;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct xz_dec_lzma2 {
|
||||
@@ -419,6 +423,12 @@ static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef XZ_DEC_MICROLZMA
|
||||
# define DICT_FLUSH_SUPPORTS_SKIPPING true
|
||||
#else
|
||||
# define DICT_FLUSH_SUPPORTS_SKIPPING false
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Flush pending data from dictionary to b->out. It is assumed that there is
|
||||
* enough space in b->out. This is guaranteed because caller uses dict_limit()
|
||||
@@ -437,9 +447,14 @@ static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b)
|
||||
* decompression because in multi-call mode dict->buf
|
||||
* has been allocated by us in this file; it's not
|
||||
* provided by the caller like in single-call mode.
|
||||
*
|
||||
* With MicroLZMA, b->out can be NULL to skip bytes that
|
||||
* the caller doesn't need. This cannot be done with XZ
|
||||
* because it would break BCJ filters.
|
||||
*/
|
||||
memcpy(b->out + b->out_pos, dict->buf + dict->start,
|
||||
copy_size);
|
||||
if (!DICT_FLUSH_SUPPORTS_SKIPPING || b->out != NULL)
|
||||
memcpy(b->out + b->out_pos, dict->buf + dict->start,
|
||||
copy_size);
|
||||
}
|
||||
|
||||
dict->start = dict->pos;
|
||||
@@ -505,7 +520,7 @@ static __always_inline void rc_normalize(struct rc_dec *rc)
|
||||
* functions so that the compiler is supposed to be able to more easily avoid
|
||||
* an extra branch. In this particular version of the LZMA decoder, this
|
||||
* doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3
|
||||
* on x86). Using a non-splitted version results in nicer looking code too.
|
||||
* on x86). Using a non-split version results in nicer looking code too.
|
||||
*
|
||||
* NOTE: This must return an int. Do not make it return a bool or the speed
|
||||
* of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care,
|
||||
@@ -791,6 +806,7 @@ static void lzma_reset(struct xz_dec_lzma2 *s)
|
||||
s->lzma.rep1 = 0;
|
||||
s->lzma.rep2 = 0;
|
||||
s->lzma.rep3 = 0;
|
||||
s->lzma.len = 0;
|
||||
|
||||
/*
|
||||
* All probabilities are initialized to the same value. This hack
|
||||
@@ -1174,8 +1190,6 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props)
|
||||
}
|
||||
}
|
||||
|
||||
s->lzma.len = 0;
|
||||
|
||||
s->lzma2.sequence = SEQ_CONTROL;
|
||||
s->lzma2.need_dict_reset = true;
|
||||
|
||||
@@ -1191,3 +1205,140 @@ XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s)
|
||||
|
||||
kfree(s);
|
||||
}
|
||||
|
||||
#ifdef XZ_DEC_MICROLZMA
|
||||
/* This is a wrapper struct to have a nice struct name in the public API. */
|
||||
struct xz_dec_microlzma {
|
||||
struct xz_dec_lzma2 s;
|
||||
};
|
||||
|
||||
enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s_ptr,
|
||||
struct xz_buf *b)
|
||||
{
|
||||
struct xz_dec_lzma2 *s = &s_ptr->s;
|
||||
|
||||
/*
|
||||
* sequence is SEQ_PROPERTIES before the first input byte,
|
||||
* SEQ_LZMA_PREPARE until a total of five bytes have been read,
|
||||
* and SEQ_LZMA_RUN for the rest of the input stream.
|
||||
*/
|
||||
if (s->lzma2.sequence != SEQ_LZMA_RUN) {
|
||||
if (s->lzma2.sequence == SEQ_PROPERTIES) {
|
||||
/* One byte is needed for the props. */
|
||||
if (b->in_pos >= b->in_size)
|
||||
return XZ_OK;
|
||||
|
||||
/*
|
||||
* Don't increment b->in_pos here. The same byte is
|
||||
* also passed to rc_read_init() which will ignore it.
|
||||
*/
|
||||
if (!lzma_props(s, ~b->in[b->in_pos]))
|
||||
return XZ_DATA_ERROR;
|
||||
|
||||
s->lzma2.sequence = SEQ_LZMA_PREPARE;
|
||||
}
|
||||
|
||||
/*
|
||||
* xz_dec_microlzma_reset() doesn't validate the compressed
|
||||
* size so we do it here. We have to limit the maximum size
|
||||
* to avoid integer overflows in lzma2_lzma(). 3 GiB is a nice
|
||||
* round number and much more than users of this code should
|
||||
* ever need.
|
||||
*/
|
||||
if (s->lzma2.compressed < RC_INIT_BYTES
|
||||
|| s->lzma2.compressed > (3U << 30))
|
||||
return XZ_DATA_ERROR;
|
||||
|
||||
if (!rc_read_init(&s->rc, b))
|
||||
return XZ_OK;
|
||||
|
||||
s->lzma2.compressed -= RC_INIT_BYTES;
|
||||
s->lzma2.sequence = SEQ_LZMA_RUN;
|
||||
|
||||
dict_reset(&s->dict, b);
|
||||
}
|
||||
|
||||
/* This is to allow increasing b->out_size between calls. */
|
||||
if (DEC_IS_SINGLE(s->dict.mode))
|
||||
s->dict.end = b->out_size - b->out_pos;
|
||||
|
||||
while (true) {
|
||||
dict_limit(&s->dict, min_t(size_t, b->out_size - b->out_pos,
|
||||
s->lzma2.uncompressed));
|
||||
|
||||
if (!lzma2_lzma(s, b))
|
||||
return XZ_DATA_ERROR;
|
||||
|
||||
s->lzma2.uncompressed -= dict_flush(&s->dict, b);
|
||||
|
||||
if (s->lzma2.uncompressed == 0) {
|
||||
if (s->lzma2.pedantic_microlzma) {
|
||||
if (s->lzma2.compressed > 0 || s->lzma.len > 0
|
||||
|| !rc_is_finished(&s->rc))
|
||||
return XZ_DATA_ERROR;
|
||||
}
|
||||
|
||||
return XZ_STREAM_END;
|
||||
}
|
||||
|
||||
if (b->out_pos == b->out_size)
|
||||
return XZ_OK;
|
||||
|
||||
if (b->in_pos == b->in_size
|
||||
&& s->temp.size < s->lzma2.compressed)
|
||||
return XZ_OK;
|
||||
}
|
||||
}
|
||||
|
||||
struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode,
|
||||
uint32_t dict_size)
|
||||
{
|
||||
struct xz_dec_microlzma *s;
|
||||
|
||||
/* Restrict dict_size to the same range as in the LZMA2 code. */
|
||||
if (dict_size < 4096 || dict_size > (3U << 30))
|
||||
return NULL;
|
||||
|
||||
s = kmalloc(sizeof(*s), GFP_KERNEL);
|
||||
if (s == NULL)
|
||||
return NULL;
|
||||
|
||||
s->s.dict.mode = mode;
|
||||
s->s.dict.size = dict_size;
|
||||
|
||||
if (DEC_IS_MULTI(mode)) {
|
||||
s->s.dict.end = dict_size;
|
||||
|
||||
s->s.dict.buf = vmalloc(dict_size);
|
||||
if (s->s.dict.buf == NULL) {
|
||||
kfree(s);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, uint32_t comp_size,
|
||||
uint32_t uncomp_size, int uncomp_size_is_exact)
|
||||
{
|
||||
/*
|
||||
* comp_size is validated in xz_dec_microlzma_run().
|
||||
* uncomp_size can safely be anything.
|
||||
*/
|
||||
s->s.lzma2.compressed = comp_size;
|
||||
s->s.lzma2.uncompressed = uncomp_size;
|
||||
s->s.lzma2.pedantic_microlzma = uncomp_size_is_exact;
|
||||
|
||||
s->s.lzma2.sequence = SEQ_PROPERTIES;
|
||||
s->s.temp.size = 0;
|
||||
}
|
||||
|
||||
void xz_dec_microlzma_end(struct xz_dec_microlzma *s)
|
||||
{
|
||||
if (DEC_IS_MULTI(s->s.dict.mode))
|
||||
vfree(s->s.dict.buf);
|
||||
|
||||
kfree(s);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -15,8 +15,15 @@ EXPORT_SYMBOL(xz_dec_reset);
|
||||
EXPORT_SYMBOL(xz_dec_run);
|
||||
EXPORT_SYMBOL(xz_dec_end);
|
||||
|
||||
#ifdef CONFIG_XZ_DEC_MICROLZMA
|
||||
EXPORT_SYMBOL(xz_dec_microlzma_alloc);
|
||||
EXPORT_SYMBOL(xz_dec_microlzma_reset);
|
||||
EXPORT_SYMBOL(xz_dec_microlzma_run);
|
||||
EXPORT_SYMBOL(xz_dec_microlzma_end);
|
||||
#endif
|
||||
|
||||
MODULE_DESCRIPTION("XZ decompressor");
|
||||
MODULE_VERSION("1.0");
|
||||
MODULE_VERSION("1.1");
|
||||
MODULE_AUTHOR("Lasse Collin <lasse.collin@tukaani.org> and Igor Pavlov");
|
||||
|
||||
/*
|
||||
|
||||
@@ -37,6 +37,9 @@
|
||||
# ifdef CONFIG_XZ_DEC_SPARC
|
||||
# define XZ_DEC_SPARC
|
||||
# endif
|
||||
# ifdef CONFIG_XZ_DEC_MICROLZMA
|
||||
# define XZ_DEC_MICROLZMA
|
||||
# endif
|
||||
# define memeq(a, b, size) (memcmp(a, b, size) == 0)
|
||||
# define memzero(buf, size) memset(buf, 0, size)
|
||||
# endif
|
||||
|
||||
Reference in New Issue
Block a user