Merge 526942b813 ("Merge tag 'ata-5.20-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/libata") into android-mainline

Steps on the way to 6.0-rc1

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: Ia5d8d07975a1d38e7cc872e45bd7c0e84d54cfa3
This commit is contained in:
Greg Kroah-Hartman
2022-08-13 10:30:09 +02:00
258 changed files with 6611 additions and 8034 deletions

View File

@@ -97,7 +97,7 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
=============
Page Cache is charged at
- add_to_page_cache_locked().
- filemap_add_folio().
The logic is very clear. (About migration, see below)

View File

@@ -184,6 +184,14 @@ cgroup v2 currently supports the following mount options.
ignored on non-init namespace mounts. Please refer to the
Delegation section for details.
favordynmods
Reduce the latencies of dynamic cgroup modifications such as
task migrations and controller on/offs at the cost of making
hot path operations such as forks and exits more expensive.
The static usage pattern of creating a cgroup, enabling
controllers, and then seeding it with CLONE_INTO_CGROUP is
not affected by this option.
memory_localevents
Only populate memory.events with data for the current cgroup,
and not any subtrees. This is legacy behaviour, the default

View File

@@ -17,6 +17,9 @@ solution to the problem to avoid everybody inventing their own. The IDR
provides the ability to map an ID to a pointer, while the IDA provides
only ID allocation, and as a result is much more memory-efficient.
The IDR interface is deprecated; please use the :doc:`XArray <xarray>`
instead.
IDR usage
=========

View File

@@ -59,8 +59,6 @@ acl Enable POSIX Access Control Lists support
(requires CONFIG_EXT2_FS_POSIX_ACL).
noacl Don't support POSIX ACLs.
nobh Do not attach buffer_heads to file pagecache.
quota, usrquota Enable user disk quota support
(requires CONFIG_QUOTA).

View File

@@ -252,9 +252,8 @@ prototypes::
bool (*release_folio)(struct folio *, gfp_t);
void (*free_folio)(struct folio *);
int (*direct_IO)(struct kiocb *, struct iov_iter *iter);
bool (*isolate_page) (struct page *, isolate_mode_t);
int (*migratepage)(struct address_space *, struct page *, struct page *);
void (*putback_page) (struct page *);
int (*migrate_folio)(struct address_space *, struct folio *dst,
struct folio *src, enum migrate_mode);
int (*launder_folio)(struct folio *);
bool (*is_partially_uptodate)(struct folio *, size_t from, size_t count);
int (*error_remove_page)(struct address_space *, struct page *);
@@ -280,9 +279,7 @@ invalidate_folio: yes exclusive
release_folio: yes
free_folio: yes
direct_IO:
isolate_page: yes
migratepage: yes (both)
putback_page: yes
migrate_folio: yes (both)
launder_folio: yes
is_partially_uptodate: yes
error_remove_page: yes

View File

@@ -914,3 +914,11 @@ Calling conventions for file_open_root() changed; now it takes struct path *
instead of passing mount and dentry separately. For callers that used to
pass <mnt, mnt->mnt_root> pair (i.e. the root of given mount), a new helper
is provided - file_open_root_mnt(). In-tree users adjusted.
---
**mandatory**
no_llseek is gone; don't set .llseek to that - just leave it NULL instead.
Checks for "does that file have llseek(2), or should it fail with ESPIPE"
should be done by looking at FMODE_LSEEK in file->f_mode.

View File

@@ -737,12 +737,8 @@ cache in your filesystem. The following members are defined:
bool (*release_folio)(struct folio *, gfp_t);
void (*free_folio)(struct folio *);
ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter);
/* isolate a page for migration */
bool (*isolate_page) (struct page *, isolate_mode_t);
/* migrate the contents of a page to the specified target */
int (*migratepage) (struct page *, struct page *);
/* put migration-failed page back to right list */
void (*putback_page) (struct page *);
int (*migrate_folio)(struct mapping *, struct folio *dst,
struct folio *src, enum migrate_mode);
int (*launder_folio) (struct folio *);
bool (*is_partially_uptodate) (struct folio *, size_t from,
@@ -774,13 +770,38 @@ cache in your filesystem. The following members are defined:
See the file "Locking" for more details.
``read_folio``
called by the VM to read a folio from backing store. The folio
will be locked when read_folio is called, and should be unlocked
and marked uptodate once the read completes. If ->read_folio
discovers that it cannot perform the I/O at this time, it can
unlock the folio and return AOP_TRUNCATED_PAGE. In this case,
the folio will be looked up again, relocked and if that all succeeds,
->read_folio will be called again.
Called by the page cache to read a folio from the backing store.
The 'file' argument supplies authentication information to network
filesystems, and is generally not used by block based filesystems.
It may be NULL if the caller does not have an open file (eg if
the kernel is performing a read for itself rather than on behalf
of a userspace process with an open file).
If the mapping does not support large folios, the folio will
contain a single page. The folio will be locked when read_folio
is called. If the read completes successfully, the folio should
be marked uptodate. The filesystem should unlock the folio
once the read has completed, whether it was successful or not.
The filesystem does not need to modify the refcount on the folio;
the page cache holds a reference count and that will not be
released until the folio is unlocked.
Filesystems may implement ->read_folio() synchronously.
In normal operation, folios are read through the ->readahead()
method. Only if this fails, or if the caller needs to wait for
the read to complete will the page cache call ->read_folio().
Filesystems should not attempt to perform their own readahead
in the ->read_folio() operation.
If the filesystem cannot perform the read at this time, it can
unlock the folio, do whatever action it needs to ensure that the
read will succeed in the future and return AOP_TRUNCATED_PAGE.
In this case, the caller should look up the folio, lock it,
and call ->read_folio again.
Callers may invoke the ->read_folio() method directly, but using
read_mapping_folio() will take care of locking, waiting for the
read to complete and handle cases such as AOP_TRUNCATED_PAGE.
``writepages``
called by the VM to write out pages associated with the
@@ -905,20 +926,12 @@ cache in your filesystem. The following members are defined:
data directly between the storage and the application's address
space.
``isolate_page``
Called by the VM when isolating a movable non-lru page. If page
is successfully isolated, VM marks the page as PG_isolated via
__SetPageIsolated.
``migrate_page``
``migrate_folio``
This is used to compact the physical memory usage. If the VM
wants to relocate a page (maybe off a memory card that is
signalling imminent failure) it will pass a new page and an old
page to this function. migrate_page should transfer any private
data across and update any references that it has to the page.
``putback_page``
Called by the VM when isolated page's migration fails.
wants to relocate a folio (maybe from a memory device that is
signalling imminent failure) it will pass a new folio and an old
folio to this function. migrate_folio should transfer any private
data across and update any references that it has to the folio.
``launder_folio``
Called before freeing a folio - it writes back the dirty folio.

View File

@@ -152,110 +152,15 @@ Steps:
Non-LRU page migration
======================
Although migration originally aimed for reducing the latency of memory accesses
for NUMA, compaction also uses migration to create high-order pages.
Although migration originally aimed for reducing the latency of memory
accesses for NUMA, compaction also uses migration to create high-order
pages. For compaction purposes, it is also useful to be able to move
non-LRU pages, such as zsmalloc and virtio-balloon pages.
Current problem of the implementation is that it is designed to migrate only
*LRU* pages. However, there are potential non-LRU pages which can be migrated
in drivers, for example, zsmalloc, virtio-balloon pages.
For virtio-balloon pages, some parts of migration code path have been hooked
up and added virtio-balloon specific functions to intercept migration logics.
It's too specific to a driver so other drivers who want to make their pages
movable would have to add their own specific hooks in the migration path.
To overcome the problem, VM supports non-LRU page migration which provides
generic functions for non-LRU movable pages without driver specific hooks
in the migration path.
If a driver wants to make its pages movable, it should define three functions
which are function pointers of struct address_space_operations.
1. ``bool (*isolate_page) (struct page *page, isolate_mode_t mode);``
What VM expects from isolate_page() function of driver is to return *true*
if driver isolates the page successfully. On returning true, VM marks the page
as PG_isolated so concurrent isolation in several CPUs skip the page
for isolation. If a driver cannot isolate the page, it should return *false*.
Once page is successfully isolated, VM uses page.lru fields so driver
shouldn't expect to preserve values in those fields.
2. ``int (*migratepage) (struct address_space *mapping,``
| ``struct page *newpage, struct page *oldpage, enum migrate_mode);``
After isolation, VM calls migratepage() of driver with the isolated page.
The function of migratepage() is to move the contents of the old page to the
new page
and set up fields of struct page newpage. Keep in mind that you should
indicate to the VM the oldpage is no longer movable via __ClearPageMovable()
under page_lock if you migrated the oldpage successfully and returned
MIGRATEPAGE_SUCCESS. If driver cannot migrate the page at the moment, driver
can return -EAGAIN. On -EAGAIN, VM will retry page migration in a short time
because VM interprets -EAGAIN as "temporary migration failure". On returning
any error except -EAGAIN, VM will give up the page migration without
retrying.
Driver shouldn't touch the page.lru field while in the migratepage() function.
3. ``void (*putback_page)(struct page *);``
If migration fails on the isolated page, VM should return the isolated page
to the driver so VM calls the driver's putback_page() with the isolated page.
In this function, the driver should put the isolated page back into its own data
structure.
Non-LRU movable page flags
There are two page flags for supporting non-LRU movable page.
* PG_movable
Driver should use the function below to make page movable under page_lock::
void __SetPageMovable(struct page *page, struct address_space *mapping)
It needs argument of address_space for registering migration
family functions which will be called by VM. Exactly speaking,
PG_movable is not a real flag of struct page. Rather, VM
reuses the page->mapping's lower bits to represent it::
#define PAGE_MAPPING_MOVABLE 0x2
page->mapping = page->mapping | PAGE_MAPPING_MOVABLE;
so driver shouldn't access page->mapping directly. Instead, driver should
use page_mapping() which masks off the low two bits of page->mapping under
page lock so it can get the right struct address_space.
For testing of non-LRU movable pages, VM supports __PageMovable() function.
However, it doesn't guarantee to identify non-LRU movable pages because
the page->mapping field is unified with other variables in struct page.
If the driver releases the page after isolation by VM, page->mapping
doesn't have a stable value although it has PAGE_MAPPING_MOVABLE set
(look at __ClearPageMovable). But __PageMovable() is cheap to call whether
page is LRU or non-LRU movable once the page has been isolated because LRU
pages can never have PAGE_MAPPING_MOVABLE set in page->mapping. It is also
good for just peeking to test non-LRU movable pages before more expensive
checking with lock_page() in pfn scanning to select a victim.
For guaranteeing non-LRU movable page, VM provides PageMovable() function.
Unlike __PageMovable(), PageMovable() validates page->mapping and
mapping->a_ops->isolate_page under lock_page(). The lock_page() prevents
sudden destroying of page->mapping.
Drivers using __SetPageMovable() should clear the flag via
__ClearMovablePage() under page_lock() before the releasing the page.
* PG_isolated
To prevent concurrent isolation among several CPUs, VM marks isolated page
as PG_isolated under lock_page(). So if a CPU encounters PG_isolated
non-LRU movable page, it can skip it. Driver doesn't need to manipulate the
flag because VM will set/clear it automatically. Keep in mind that if the
driver sees a PG_isolated page, it means the page has been isolated by the
VM so it shouldn't touch the page.lru field.
The PG_isolated flag is aliased with the PG_reclaim flag so drivers
shouldn't use PG_isolated for its own purposes.
If a driver wants to make its pages movable, it should define a struct
movable_operations. It then needs to call __SetPageMovable() on each
page that it may be able to move. This uses the ``page->mapping`` field,
so this field is not available for the driver to use for other purposes.
Monitoring Migration
=====================
@@ -286,3 +191,5 @@ THP_MIGRATION_FAIL and PGMIGRATE_FAIL to increase.
Christoph Lameter, May 8, 2006.
Minchan Kim, Mar 28, 2016.
.. kernel-doc:: include/linux/migrate.h

View File

@@ -29,7 +29,7 @@ Mechanics
be selected::
CONFIG_EFI=y
CONFIG_EFI_VARS=y or m # optional
CONFIG_EFIVAR_FS=y or m # optional
- Create a VFAT partition on the disk
- Copy the following to the VFAT partition:

View File

@@ -44,7 +44,6 @@ CONFIG_ARM_CPUIDLE=y
CONFIG_VFP=y
CONFIG_NEON=y
CONFIG_KERNEL_MODE_NEON=y
CONFIG_EFI_VARS=m
CONFIG_ARM_CRYPTO=y
CONFIG_CRYPTO_SHA1_ARM_NEON=m
CONFIG_CRYPTO_SHA1_ARM_CE=m

View File

@@ -24,13 +24,6 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
#define arch_efi_call_virt_setup() efi_virtmap_load()
#define arch_efi_call_virt_teardown() efi_virtmap_unload()
#define arch_efi_call_virt(p, f, args...) \
({ \
efi_##f##_t *__f; \
__f = p->f; \
__f(args); \
})
#define ARCH_EFI_IRQ_FLAGS_MASK \
(PSR_J_BIT | PSR_E_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | \
PSR_T_BIT | MODE_MASK)

View File

@@ -27,12 +27,9 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
__efi_fpsimd_begin(); \
})
#undef arch_efi_call_virt
#define arch_efi_call_virt(p, f, args...) \
({ \
efi_##f##_t *__f; \
__f = p->f; \
__efi_rt_asm_wrapper(__f, #f, args); \
})
__efi_rt_asm_wrapper((p)->f, #f, args)
#define arch_efi_call_virt_teardown() \
({ \

View File

@@ -350,8 +350,8 @@ void __init arm64_memblock_init(void)
"initrd not fully accessible via the linear mapping -- please check your bootloader ...\n")) {
phys_initrd_size = 0;
} else {
memblock_remove(base, size); /* clear MEMBLOCK_ flags */
memblock_add(base, size);
memblock_clear_nomap(base, size);
memblock_reserve(base, size);
}
}

View File

@@ -10,7 +10,6 @@ CONFIG_SMP=y
CONFIG_NR_CPUS=2
CONFIG_PREEMPT=y
CONFIG_IA64_PALINFO=y
CONFIG_EFI_VARS=y
CONFIG_BINFMT_MISC=m
CONFIG_ACPI_BUTTON=m
CONFIG_ACPI_FAN=m

View File

@@ -21,7 +21,6 @@ CONFIG_IA64_MCA_RECOVERY=y
CONFIG_IA64_PALINFO=y
CONFIG_KEXEC=y
CONFIG_CRASH_DUMP=y
CONFIG_EFI_VARS=y
CONFIG_BINFMT_MISC=m
CONFIG_ACPI_BUTTON=m
CONFIG_ACPI_FAN=m

View File

@@ -18,7 +18,6 @@ CONFIG_HOTPLUG_CPU=y
CONFIG_SPARSEMEM_MANUAL=y
CONFIG_IA64_MCA_RECOVERY=y
CONFIG_IA64_PALINFO=y
CONFIG_EFI_VARS=y
CONFIG_BINFMT_MISC=m
CONFIG_ACPI_BUTTON=m
CONFIG_ACPI_FAN=m

View File

@@ -23,7 +23,6 @@ CONFIG_FORCE_CPEI_RETARGET=y
CONFIG_IA64_MCA_RECOVERY=y
CONFIG_IA64_PALINFO=y
CONFIG_KEXEC=y
CONFIG_EFI_VARS=y
CONFIG_BINFMT_MISC=m
CONFIG_ACPI_BUTTON=m
CONFIG_ACPI_FAN=m

View File

@@ -12,7 +12,6 @@ CONFIG_FLATMEM_MANUAL=y
CONFIG_IA64_MCA_RECOVERY=y
CONFIG_IA64_PALINFO=y
CONFIG_CRASH_DUMP=y
CONFIG_EFI_VARS=y
CONFIG_BINFMT_MISC=y
CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_ACPI=y

View File

@@ -13,20 +13,8 @@ void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
#define ARCH_EFI_IRQ_FLAGS_MASK 0x00000004 /* Bit 2: CSR.CRMD.IE */
#define arch_efi_call_virt_setup() \
({ \
})
#define arch_efi_call_virt(p, f, args...) \
({ \
efi_##f##_t * __f; \
__f = p->f; \
__f(args); \
})
#define arch_efi_call_virt_teardown() \
({ \
})
#define arch_efi_call_virt_setup()
#define arch_efi_call_virt_teardown()
#define EFI_ALLOC_ALIGN SZ_64K

View File

@@ -22,7 +22,7 @@ void flush_kernel_icache_range_asm(unsigned long, unsigned long);
void flush_user_dcache_range_asm(unsigned long, unsigned long);
void flush_kernel_dcache_range_asm(unsigned long, unsigned long);
void purge_kernel_dcache_range_asm(unsigned long, unsigned long);
void flush_kernel_dcache_page_asm(void *);
void flush_kernel_dcache_page_asm(const void *addr);
void flush_kernel_icache_page(void *);
/* Cache flush operations */
@@ -31,7 +31,7 @@ void flush_cache_all_local(void);
void flush_cache_all(void);
void flush_cache_mm(struct mm_struct *mm);
void flush_kernel_dcache_page_addr(void *addr);
void flush_kernel_dcache_page_addr(const void *addr);
#define flush_kernel_dcache_range(start,size) \
flush_kernel_dcache_range_asm((start), (start)+(size));
@@ -75,7 +75,7 @@ void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr);
void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr);
#define ARCH_HAS_FLUSH_ON_KUNMAP
static inline void kunmap_flush_on_unmap(void *addr)
static inline void kunmap_flush_on_unmap(const void *addr)
{
flush_kernel_dcache_page_addr(addr);
}

View File

@@ -549,7 +549,7 @@ extern void purge_kernel_dcache_page_asm(unsigned long);
extern void clear_user_page_asm(void *, unsigned long);
extern void copy_user_page_asm(void *, void *, unsigned long);
void flush_kernel_dcache_page_addr(void *addr)
void flush_kernel_dcache_page_addr(const void *addr)
{
unsigned long flags;

View File

@@ -348,7 +348,7 @@ copy_mc_to_kernel(void *to, const void *from, unsigned long size)
static inline unsigned long __must_check
copy_mc_to_user(void __user *to, const void *from, unsigned long n)
{
if (likely(check_copy_size(from, n, true))) {
if (check_copy_size(from, n, true)) {
if (access_ok(to, n)) {
allow_write_to_user(to, n);
n = copy_mc_generic((void *)to, from, n);

View File

@@ -19,9 +19,6 @@
#include <linux/stringify.h>
#include <linux/swap.h>
#include <linux/device.h>
#include <linux/mount.h>
#include <linux/pseudo_fs.h>
#include <linux/magic.h>
#include <linux/balloon_compaction.h>
#include <asm/firmware.h>
#include <asm/hvcall.h>
@@ -500,19 +497,6 @@ static struct notifier_block cmm_mem_nb = {
};
#ifdef CONFIG_BALLOON_COMPACTION
static struct vfsmount *balloon_mnt;
static int cmm_init_fs_context(struct fs_context *fc)
{
return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM;
}
static struct file_system_type balloon_fs = {
.name = "ppc-cmm",
.init_fs_context = cmm_init_fs_context,
.kill_sb = kill_anon_super,
};
static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
struct page *newpage, struct page *page,
enum migrate_mode mode)
@@ -564,47 +548,13 @@ static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
return MIGRATEPAGE_SUCCESS;
}
static int cmm_balloon_compaction_init(void)
static void cmm_balloon_compaction_init(void)
{
int rc;
balloon_devinfo_init(&b_dev_info);
b_dev_info.migratepage = cmm_migratepage;
balloon_mnt = kern_mount(&balloon_fs);
if (IS_ERR(balloon_mnt)) {
rc = PTR_ERR(balloon_mnt);
balloon_mnt = NULL;
return rc;
}
b_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
if (IS_ERR(b_dev_info.inode)) {
rc = PTR_ERR(b_dev_info.inode);
b_dev_info.inode = NULL;
kern_unmount(balloon_mnt);
balloon_mnt = NULL;
return rc;
}
b_dev_info.inode->i_mapping->a_ops = &balloon_aops;
return 0;
}
static void cmm_balloon_compaction_deinit(void)
{
if (b_dev_info.inode)
iput(b_dev_info.inode);
b_dev_info.inode = NULL;
kern_unmount(balloon_mnt);
balloon_mnt = NULL;
}
#else /* CONFIG_BALLOON_COMPACTION */
static int cmm_balloon_compaction_init(void)
{
return 0;
}
static void cmm_balloon_compaction_deinit(void)
static void cmm_balloon_compaction_init(void)
{
}
#endif /* CONFIG_BALLOON_COMPACTION */
@@ -622,9 +572,7 @@ static int cmm_init(void)
if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate)
return -EOPNOTSUPP;
rc = cmm_balloon_compaction_init();
if (rc)
return rc;
cmm_balloon_compaction_init();
rc = register_oom_notifier(&cmm_oom_nb);
if (rc < 0)
@@ -658,7 +606,6 @@ out_reboot_notifier:
out_oom_notifier:
unregister_oom_notifier(&cmm_oom_nb);
out_balloon_compaction:
cmm_balloon_compaction_deinit();
return rc;
}
@@ -677,7 +624,6 @@ static void cmm_exit(void)
unregister_memory_notifier(&cmm_mem_nb);
cmm_free_pages(atomic_long_read(&loaned_pages));
cmm_unregister_sysfs(&cmm_dev);
cmm_balloon_compaction_deinit();
}
/**

View File

@@ -23,8 +23,6 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
#define arch_efi_call_virt_setup() efi_virtmap_load()
#define arch_efi_call_virt_teardown() efi_virtmap_unload()
#define arch_efi_call_virt(p, f, args...) p->f(args)
#define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE)
/* Load initrd anywhere in system RAM */

View File

@@ -39,7 +39,7 @@ _copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned
static __always_inline unsigned long __must_check
copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key)
{
if (likely(check_copy_size(to, n, false)))
if (check_copy_size(to, n, false))
n = _copy_from_user_key(to, from, n, key);
return n;
}
@@ -50,7 +50,7 @@ _copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned l
static __always_inline unsigned long __must_check
copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned long key)
{
if (likely(check_copy_size(from, n, true)))
if (check_copy_size(from, n, true))
n = _copy_to_user_key(to, from, n, key);
return n;
}

View File

@@ -135,7 +135,6 @@ CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_DEBUG_DEVRES=y
CONFIG_CONNECTOR=y
CONFIG_EFI_VARS=y
CONFIG_EFI_CAPSULE_LOADER=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_VIRTIO_BLK=y

View File

@@ -134,7 +134,6 @@ CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_DEBUG_DEVRES=y
CONFIG_CONNECTOR=y
CONFIG_EFI_VARS=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_VIRTIO_BLK=y
CONFIG_BLK_DEV_SD=y

View File

@@ -100,8 +100,6 @@ static inline void efi_fpu_end(void)
efi_fpu_end(); \
})
#define arch_efi_call_virt(p, f, args...) p->f(args)
#else /* !CONFIG_X86_32 */
#define EFI_LOADER_SIGNATURE "EL64"
@@ -121,6 +119,7 @@ extern asmlinkage u64 __efi_call(void *fp, ...);
efi_enter_mm(); \
})
#undef arch_efi_call_virt
#define arch_efi_call_virt(p, f, args...) ({ \
u64 ret, ibt = ibt_save(); \
ret = efi_call((void *)p->f, args); \
@@ -383,7 +382,6 @@ static inline bool efi_is_64bit(void)
extern bool efi_reboot_required(void);
extern bool efi_is_table_address(unsigned long phys_addr);
extern void efi_find_mirror(void);
extern void efi_reserve_boot_services(void);
#else
static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {}
@@ -395,9 +393,6 @@ static inline bool efi_is_table_address(unsigned long phys_addr)
{
return false;
}
static inline void efi_find_mirror(void)
{
}
static inline void efi_reserve_boot_services(void)
{
}

View File

@@ -108,29 +108,6 @@ static int __init setup_add_efi_memmap(char *arg)
}
early_param("add_efi_memmap", setup_add_efi_memmap);
void __init efi_find_mirror(void)
{
efi_memory_desc_t *md;
u64 mirror_size = 0, total_size = 0;
if (!efi_enabled(EFI_MEMMAP))
return;
for_each_efi_memory_desc(md) {
unsigned long long start = md->phys_addr;
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
total_size += size;
if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
memblock_mark_mirror(start, size);
mirror_size += size;
}
}
if (mirror_size)
pr_info("Memory: %lldM/%lldM mirrored memory\n",
mirror_size>>20, total_size>>20);
}
/*
* Tell the kernel about the EFI memory map. This might include
* more than the max 128 entries that can fit in the passed in e820

View File

@@ -37,7 +37,7 @@ static blk_opf_t dio_bio_write_op(struct kiocb *iocb)
blk_opf_t opf = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
/* avoid the need for a I/O completion work item */
if (iocb->ki_flags & IOCB_DSYNC)
if (iocb_is_dsync(iocb))
opf |= REQ_FUA;
return opf;
}
@@ -421,7 +421,7 @@ const struct address_space_operations def_blk_aops = {
.write_end = blkdev_write_end,
.writepages = blkdev_writepages,
.direct_IO = blkdev_direct_IO,
.migratepage = buffer_migrate_page_norefs,
.migrate_folio = buffer_migrate_folio_norefs,
.is_dirty_writeback = buffer_check_dirty_writeback,
};

View File

@@ -24,13 +24,13 @@ struct parsed_partitions {
};
typedef struct {
struct page *v;
struct folio *v;
} Sector;
void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p);
static inline void put_dev_sector(Sector p)
{
put_page(p.v);
folio_put(p.v);
}
static inline void

View File

@@ -704,25 +704,19 @@ EXPORT_SYMBOL_GPL(bdev_disk_changed);
void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p)
{
struct address_space *mapping = state->disk->part0->bd_inode->i_mapping;
struct page *page;
struct folio *folio;
if (n >= get_capacity(state->disk)) {
state->access_beyond_eod = true;
return NULL;
goto out;
}
page = read_mapping_page(mapping,
(pgoff_t)(n >> (PAGE_SHIFT - 9)), NULL);
if (IS_ERR(page))
folio = read_mapping_folio(mapping, n >> PAGE_SECTORS_SHIFT, NULL);
if (IS_ERR(folio))
goto out;
if (PageError(page))
goto out_put_page;
p->v = page;
return (unsigned char *)page_address(page) +
((n & ((1 << (PAGE_SHIFT - 9)) - 1)) << SECTOR_SHIFT);
out_put_page:
put_page(page);
p->v = folio;
return folio_address(folio) + offset_in_folio(folio, n * SECTOR_SIZE);
out:
p->v = NULL;
return NULL;

View File

@@ -572,6 +572,21 @@ source "drivers/acpi/pmic/Kconfig"
config ACPI_VIOT
bool
config ACPI_PRMT
bool "Platform Runtime Mechanism Support"
depends on EFI && (X86_64 || ARM64)
default y
help
Platform Runtime Mechanism (PRM) is a firmware interface exposing a
set of binary executables that can be called from the AML interpreter
or directly from device drivers.
Say Y to enable the AML interpreter to execute the PRM code.
While this feature is optional in principle, leaving it out may
substantially increase computational overhead related to the
initialization of some server systems.
endif # ACPI
config X86_PM_TIMER
@@ -589,18 +604,3 @@ config X86_PM_TIMER
You should nearly always say Y here because many modern
systems require this timer.
config ACPI_PRMT
bool "Platform Runtime Mechanism Support"
depends on EFI && X86_64
default y
help
Platform Runtime Mechanism (PRM) is a firmware interface exposing a
set of binary executables that can be called from the AML interpreter
or directly from device drivers.
Say Y to enable the AML interpreter to execute the PRM code.
While this feature is optional in principle, leaving it out may
substantially increase computational overhead related to the
initialization of some server systems.

View File

@@ -53,7 +53,7 @@ static LIST_HEAD(prm_module_list);
struct prm_handler_info {
guid_t guid;
u64 handler_addr;
void *handler_addr;
u64 static_data_buffer_addr;
u64 acpi_param_buffer_addr;
@@ -148,7 +148,7 @@ acpi_parse_prmt(union acpi_subtable_headers *header, const unsigned long end)
th = &tm->handlers[cur_handler];
guid_copy(&th->guid, (guid_t *)handler_info->handler_guid);
th->handler_addr = efi_pa_va_lookup(handler_info->handler_address);
th->handler_addr = (void *)efi_pa_va_lookup(handler_info->handler_address);
th->static_data_buffer_addr = efi_pa_va_lookup(handler_info->static_data_buffer_address);
th->acpi_param_buffer_addr = efi_pa_va_lookup(handler_info->acpi_param_buffer_address);
} while (++cur_handler < tm->handler_count && (handler_info = get_next_handler(handler_info)));

View File

@@ -480,10 +480,10 @@ static int ata_dev_get_GTF(struct ata_device *dev, struct ata_acpi_gtf **gtf)
* RETURNS:
* Determined xfermask.
*/
unsigned long ata_acpi_gtm_xfermask(struct ata_device *dev,
const struct ata_acpi_gtm *gtm)
unsigned int ata_acpi_gtm_xfermask(struct ata_device *dev,
const struct ata_acpi_gtm *gtm)
{
unsigned long xfer_mask = 0;
unsigned int xfer_mask = 0;
unsigned int type;
int unit;
u8 mode;
@@ -525,7 +525,7 @@ int ata_acpi_cbl_80wire(struct ata_port *ap, const struct ata_acpi_gtm *gtm)
struct ata_device *dev;
ata_for_each_dev(dev, &ap->link, ENABLED) {
unsigned long xfer_mask, udma_mask;
unsigned int xfer_mask, udma_mask;
xfer_mask = ata_acpi_gtm_xfermask(dev, gtm);
ata_unpack_xfermask(xfer_mask, NULL, NULL, &udma_mask);

View File

@@ -93,7 +93,7 @@ struct ata_force_param {
const char *name;
u8 cbl;
u8 spd_limit;
unsigned long xfer_mask;
unsigned int xfer_mask;
unsigned int horkage_on;
unsigned int horkage_off;
u16 lflags_on;
@@ -425,7 +425,7 @@ static void ata_force_xfermask(struct ata_device *dev)
for (i = ata_force_tbl_size - 1; i >= 0; i--) {
const struct ata_force_ent *fe = &ata_force_tbl[i];
unsigned long pio_mask, mwdma_mask, udma_mask;
unsigned int pio_mask, mwdma_mask, udma_mask;
if (fe->port != -1 && fe->port != dev->link->ap->print_id)
continue;
@@ -803,11 +803,11 @@ int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev,
* RETURNS:
* Packed xfer_mask.
*/
unsigned long ata_pack_xfermask(unsigned long pio_mask,
unsigned long mwdma_mask,
unsigned long udma_mask)
unsigned int ata_pack_xfermask(unsigned int pio_mask,
unsigned int mwdma_mask,
unsigned int udma_mask)
{
return ((pio_mask << ATA_SHIFT_PIO) & ATA_MASK_PIO) |
return ((pio_mask << ATA_SHIFT_PIO) & ATA_MASK_PIO) |
((mwdma_mask << ATA_SHIFT_MWDMA) & ATA_MASK_MWDMA) |
((udma_mask << ATA_SHIFT_UDMA) & ATA_MASK_UDMA);
}
@@ -823,8 +823,8 @@ EXPORT_SYMBOL_GPL(ata_pack_xfermask);
* Unpack @xfer_mask into @pio_mask, @mwdma_mask and @udma_mask.
* Any NULL destination masks will be ignored.
*/
void ata_unpack_xfermask(unsigned long xfer_mask, unsigned long *pio_mask,
unsigned long *mwdma_mask, unsigned long *udma_mask)
void ata_unpack_xfermask(unsigned int xfer_mask, unsigned int *pio_mask,
unsigned int *mwdma_mask, unsigned int *udma_mask)
{
if (pio_mask)
*pio_mask = (xfer_mask & ATA_MASK_PIO) >> ATA_SHIFT_PIO;
@@ -857,7 +857,7 @@ static const struct ata_xfer_ent {
* RETURNS:
* Matching XFER_* value, 0xff if no match found.
*/
u8 ata_xfer_mask2mode(unsigned long xfer_mask)
u8 ata_xfer_mask2mode(unsigned int xfer_mask)
{
int highbit = fls(xfer_mask) - 1;
const struct ata_xfer_ent *ent;
@@ -881,7 +881,7 @@ EXPORT_SYMBOL_GPL(ata_xfer_mask2mode);
* RETURNS:
* Matching xfer_mask, 0 if no match found.
*/
unsigned long ata_xfer_mode2mask(u8 xfer_mode)
unsigned int ata_xfer_mode2mask(u8 xfer_mode)
{
const struct ata_xfer_ent *ent;
@@ -930,7 +930,7 @@ EXPORT_SYMBOL_GPL(ata_xfer_mode2shift);
* Constant C string representing highest speed listed in
* @mode_mask, or the constant C string "<n/a>".
*/
const char *ata_mode_string(unsigned long xfer_mask)
const char *ata_mode_string(unsigned int xfer_mask)
{
static const char * const xfer_mode_str[] = {
"PIO0",
@@ -1103,16 +1103,16 @@ static u64 ata_id_n_sectors(const u16 *id)
if (ata_id_has_lba(id)) {
if (ata_id_has_lba48(id))
return ata_id_u64(id, ATA_ID_LBA_CAPACITY_2);
else
return ata_id_u32(id, ATA_ID_LBA_CAPACITY);
} else {
if (ata_id_current_chs_valid(id))
return id[ATA_ID_CUR_CYLS] * id[ATA_ID_CUR_HEADS] *
id[ATA_ID_CUR_SECTORS];
else
return id[ATA_ID_CYLS] * id[ATA_ID_HEADS] *
id[ATA_ID_SECTORS];
return ata_id_u32(id, ATA_ID_LBA_CAPACITY);
}
if (ata_id_current_chs_valid(id))
return (u32)id[ATA_ID_CUR_CYLS] * (u32)id[ATA_ID_CUR_HEADS] *
(u32)id[ATA_ID_CUR_SECTORS];
return (u32)id[ATA_ID_CYLS] * (u32)id[ATA_ID_HEADS] *
(u32)id[ATA_ID_SECTORS];
}
u64 ata_tf_to_lba48(const struct ata_taskfile *tf)
@@ -1383,9 +1383,9 @@ static inline void ata_dump_id(struct ata_device *dev, const u16 *id)
* RETURNS:
* Computed xfermask
*/
unsigned long ata_id_xfermask(const u16 *id)
unsigned int ata_id_xfermask(const u16 *id)
{
unsigned long pio_mask, mwdma_mask, udma_mask;
unsigned int pio_mask, mwdma_mask, udma_mask;
/* Usual case. Word 53 indicates word 64 is valid */
if (id[ATA_ID_FIELD_VALID] & (1 << 1)) {
@@ -1467,10 +1467,10 @@ static void ata_qc_complete_internal(struct ata_queued_cmd *qc)
* RETURNS:
* Zero on success, AC_ERR_* mask on failure
*/
unsigned ata_exec_internal_sg(struct ata_device *dev,
struct ata_taskfile *tf, const u8 *cdb,
int dma_dir, struct scatterlist *sgl,
unsigned int n_elem, unsigned long timeout)
static unsigned ata_exec_internal_sg(struct ata_device *dev,
struct ata_taskfile *tf, const u8 *cdb,
int dma_dir, struct scatterlist *sgl,
unsigned int n_elem, unsigned int timeout)
{
struct ata_link *link = dev->link;
struct ata_port *ap = link->ap;
@@ -1645,7 +1645,7 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
unsigned ata_exec_internal(struct ata_device *dev,
struct ata_taskfile *tf, const u8 *cdb,
int dma_dir, void *buf, unsigned int buflen,
unsigned long timeout)
unsigned int timeout)
{
struct scatterlist *psg = NULL, sg;
unsigned int n_elem = 0;
@@ -2534,7 +2534,7 @@ int ata_dev_configure(struct ata_device *dev)
struct ata_port *ap = dev->link->ap;
bool print_info = ata_dev_print_info(dev);
const u16 *id = dev->id;
unsigned long xfer_mask;
unsigned int xfer_mask;
unsigned int err_mask;
char revbuf[7]; /* XYZ-99\0 */
char fwrevbuf[ATA_ID_FW_REV_LEN+1];
@@ -3202,8 +3202,8 @@ u8 ata_timing_cycle2mode(unsigned int xfer_shift, int cycle)
int ata_down_xfermask_limit(struct ata_device *dev, unsigned int sel)
{
char buf[32];
unsigned long orig_mask, xfer_mask;
unsigned long pio_mask, mwdma_mask, udma_mask;
unsigned int orig_mask, xfer_mask;
unsigned int pio_mask, mwdma_mask, udma_mask;
int quiet, highbit;
quiet = !!(sel & ATA_DNXFER_QUIET);
@@ -3381,7 +3381,7 @@ int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev)
/* step 1: calculate xfer_mask */
ata_for_each_dev(dev, link, ENABLED) {
unsigned long pio_mask, dma_mask;
unsigned int pio_mask, dma_mask;
unsigned int mode_mask;
mode_mask = ATA_DMA_MASK_ATA;
@@ -4217,7 +4217,7 @@ static void ata_dev_xfermask(struct ata_device *dev)
struct ata_link *link = dev->link;
struct ata_port *ap = link->ap;
struct ata_host *host = ap->host;
unsigned long xfer_mask;
unsigned int xfer_mask;
/* controller modes available */
xfer_mask = ata_pack_xfermask(ap->pio_mask,
@@ -4342,7 +4342,7 @@ unsigned int ata_dev_set_feature(struct ata_device *dev, u8 enable, u8 feature)
{
struct ata_taskfile tf;
unsigned int err_mask;
unsigned long timeout = 0;
unsigned int timeout = 0;
/* set up set-features taskfile */
ata_dev_dbg(dev, "set features - SATA features\n");
@@ -5776,7 +5776,7 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht)
/* set cable, sata_spd_limit and report */
for (i = 0; i < host->n_ports; i++) {
struct ata_port *ap = host->ports[i];
unsigned long xfer_mask;
unsigned int xfer_mask;
/* set SATA cable type if still unset */
if (ap->cbl == ATA_CBL_NONE && (ap->flags & ATA_FLAG_SATA))

View File

@@ -86,36 +86,36 @@ static const unsigned long ata_eh_reset_timeouts[] = {
ULONG_MAX, /* > 1 min has elapsed, give up */
};
static const unsigned long ata_eh_identify_timeouts[] = {
static const unsigned int ata_eh_identify_timeouts[] = {
5000, /* covers > 99% of successes and not too boring on failures */
10000, /* combined time till here is enough even for media access */
30000, /* for true idiots */
ULONG_MAX,
UINT_MAX,
};
static const unsigned long ata_eh_revalidate_timeouts[] = {
static const unsigned int ata_eh_revalidate_timeouts[] = {
15000, /* Some drives are slow to read log pages when waking-up */
15000, /* combined time till here is enough even for media access */
ULONG_MAX,
UINT_MAX,
};
static const unsigned long ata_eh_flush_timeouts[] = {
static const unsigned int ata_eh_flush_timeouts[] = {
15000, /* be generous with flush */
15000, /* ditto */
30000, /* and even more generous */
ULONG_MAX,
UINT_MAX,
};
static const unsigned long ata_eh_other_timeouts[] = {
static const unsigned int ata_eh_other_timeouts[] = {
5000, /* same rationale as identify timeout */
10000, /* ditto */
/* but no merciful 30sec for other commands, it just isn't worth it */
ULONG_MAX,
UINT_MAX,
};
struct ata_eh_cmd_timeout_ent {
const u8 *commands;
const unsigned long *timeouts;
const unsigned int *timeouts;
};
/* The following table determines timeouts to use for EH internal
@@ -326,7 +326,7 @@ static int ata_lookup_timeout_table(u8 cmd)
* RETURNS:
* Determined timeout.
*/
unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd)
unsigned int ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd)
{
struct ata_eh_context *ehc = &dev->link->eh_context;
int ent = ata_lookup_timeout_table(cmd);
@@ -361,7 +361,7 @@ void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd)
return;
idx = ehc->cmd_timeout_idx[dev->devno][ent];
if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != ULONG_MAX)
if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != UINT_MAX)
ehc->cmd_timeout_idx[dev->devno][ent]++;
}
@@ -802,11 +802,11 @@ void ata_port_wait_eh(struct ata_port *ap)
}
EXPORT_SYMBOL_GPL(ata_port_wait_eh);
static int ata_eh_nr_in_flight(struct ata_port *ap)
static unsigned int ata_eh_nr_in_flight(struct ata_port *ap)
{
struct ata_queued_cmd *qc;
unsigned int tag;
int nr = 0;
unsigned int nr = 0;
/* count only non-internal commands */
ata_qc_for_each(ap, qc, tag) {
@@ -821,7 +821,7 @@ void ata_eh_fastdrain_timerfn(struct timer_list *t)
{
struct ata_port *ap = from_timer(ap, t, fastdrain_timer);
unsigned long flags;
int cnt;
unsigned int cnt;
spin_lock_irqsave(ap->lock, flags);
@@ -870,7 +870,7 @@ void ata_eh_fastdrain_timerfn(struct timer_list *t)
*/
static void ata_eh_set_pending(struct ata_port *ap, int fastdrain)
{
int cnt;
unsigned int cnt;
/* already scheduled? */
if (ap->pflags & ATA_PFLAG_EH_PENDING)

View File

@@ -539,13 +539,13 @@ int ata_task_ioctl(struct scsi_device *scsidev, void __user *arg)
return rc;
}
static int ata_ioc32(struct ata_port *ap)
static bool ata_ioc32(struct ata_port *ap)
{
if (ap->flags & ATA_FLAG_PIO_DMA)
return 1;
return true;
if (ap->pflags & ATA_PFLAG_PIO32)
return 1;
return 0;
return true;
return false;
}
/*

View File

@@ -9,7 +9,7 @@
* and various sysfs attributes to expose these topologies and management
* interfaces to user-space.
*
* There are 3 objects defined in in this class:
* There are 3 objects defined in this class:
* - ata_port
* - ata_link
* - ata_device

View File

@@ -52,11 +52,7 @@ extern u64 ata_tf_read_block(const struct ata_taskfile *tf,
extern unsigned ata_exec_internal(struct ata_device *dev,
struct ata_taskfile *tf, const u8 *cdb,
int dma_dir, void *buf, unsigned int buflen,
unsigned long timeout);
extern unsigned ata_exec_internal_sg(struct ata_device *dev,
struct ata_taskfile *tf, const u8 *cdb,
int dma_dir, struct scatterlist *sg,
unsigned int n_elem, unsigned long timeout);
unsigned int timeout);
extern int ata_wait_ready(struct ata_link *link, unsigned long deadline,
int (*check_ready)(struct ata_link *link));
extern int ata_dev_read_id(struct ata_device *dev, unsigned int *p_class,
@@ -136,7 +132,7 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev);
int __ata_scsi_queuecmd(struct scsi_cmnd *scmd, struct ata_device *dev);
/* libata-eh.c */
extern unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd);
extern unsigned int ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd);
extern void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd);
extern void ata_eh_acquire(struct ata_port *ap);
extern void ata_eh_release(struct ata_port *ap);

View File

@@ -97,7 +97,7 @@ static unsigned long pacpi_discover_modes(struct ata_port *ap, struct ata_device
* this case the list of discovered valid modes obtained by ACPI probing
*/
static unsigned long pacpi_mode_filter(struct ata_device *adev, unsigned long mask)
static unsigned int pacpi_mode_filter(struct ata_device *adev, unsigned int mask)
{
struct pata_acpi *acpi = adev->link->ap->private_data;
return mask & acpi->mask[adev->devno];

View File

@@ -115,7 +115,7 @@ static int ali_c2_cable_detect(struct ata_port *ap)
* fix that later on. Also ensure we do not do UDMA on WDC drives
*/
static unsigned long ali_20_filter(struct ata_device *adev, unsigned long mask)
static unsigned int ali_20_filter(struct ata_device *adev, unsigned int mask)
{
char model_num[ATA_ID_PROD_LEN + 1];
/* No DMA on anything but a disk for now */

View File

@@ -264,8 +264,8 @@ static void amd133_set_dmamode(struct ata_port *ap, struct ata_device *adev)
* cached during driver attach and are consulted to select transfer
* mode.
*/
static unsigned long nv_mode_filter(struct ata_device *dev,
unsigned long xfer_mask)
static unsigned int nv_mode_filter(struct ata_device *dev,
unsigned int xfer_mask)
{
static const unsigned int udma_mask_map[] =
{ ATA_UDMA2, ATA_UDMA1, ATA_UDMA0, 0,
@@ -274,7 +274,7 @@ static unsigned long nv_mode_filter(struct ata_device *dev,
char acpi_str[32] = "";
u32 saved_udma, udma;
const struct ata_acpi_gtm *gtm;
unsigned long bios_limit = 0, acpi_limit = 0, limit;
unsigned int bios_limit = 0, acpi_limit = 0, limit;
/* find out what BIOS configured */
udma = saved_udma = (unsigned long)ap->host->private_data;
@@ -310,10 +310,10 @@ static unsigned long nv_mode_filter(struct ata_device *dev,
cable detection result */
limit |= ata_pack_xfermask(ATA_PIO4, ATA_MWDMA2, ATA_UDMA2);
ata_port_dbg(ap, "nv_mode_filter: 0x%lx&0x%lx->0x%lx, "
"BIOS=0x%lx (0x%x) ACPI=0x%lx%s\n",
xfer_mask, limit, xfer_mask & limit, bios_limit,
saved_udma, acpi_limit, acpi_str);
ata_port_dbg(ap,
"nv_mode_filter: 0x%x&0x%x->0x%x, BIOS=0x%x (0x%x) ACPI=0x%x%s\n",
xfer_mask, limit, xfer_mask & limit, bios_limit,
saved_udma, acpi_limit, acpi_str);
return xfer_mask & limit;
}

View File

@@ -194,7 +194,7 @@ static int hpt_dma_blacklisted(const struct ata_device *dev, char *modestr,
* Block UDMA on devices that cause trouble with this controller.
*/
static unsigned long hpt366_filter(struct ata_device *adev, unsigned long mask)
static unsigned int hpt366_filter(struct ata_device *adev, unsigned int mask)
{
if (adev->class == ATA_DEV_ATA) {
if (hpt_dma_blacklisted(adev, "UDMA", bad_ata33))

View File

@@ -23,7 +23,7 @@
#include <linux/libata.h>
#define DRV_NAME "pata_hpt37x"
#define DRV_VERSION "0.6.25"
#define DRV_VERSION "0.6.30"
struct hpt_clock {
u8 xfer_speed;
@@ -278,7 +278,7 @@ static const char * const bad_ata100_5[] = {
* Block UDMA on devices that cause trouble with this controller.
*/
static unsigned long hpt370_filter(struct ata_device *adev, unsigned long mask)
static unsigned int hpt370_filter(struct ata_device *adev, unsigned int mask)
{
if (adev->class == ATA_DEV_ATA) {
if (hpt_dma_blacklisted(adev, "UDMA", bad_ata33))
@@ -297,7 +297,7 @@ static unsigned long hpt370_filter(struct ata_device *adev, unsigned long mask)
* Block UDMA on devices that cause trouble with this controller.
*/
static unsigned long hpt370a_filter(struct ata_device *adev, unsigned long mask)
static unsigned int hpt370a_filter(struct ata_device *adev, unsigned int mask)
{
if (adev->class == ATA_DEV_ATA) {
if (hpt_dma_blacklisted(adev, "UDMA100", bad_ata100_5))
@@ -314,7 +314,7 @@ static unsigned long hpt370a_filter(struct ata_device *adev, unsigned long mask)
* The Marvell bridge chips used on the HighPoint SATA cards do not seem
* to support the UltraDMA modes 1, 2, and 3 as well as any MWDMA modes...
*/
static unsigned long hpt372_filter(struct ata_device *adev, unsigned long mask)
static unsigned int hpt372_filter(struct ata_device *adev, unsigned int mask)
{
if (ata_id_is_sata(adev->id))
mask &= ~((0xE << ATA_SHIFT_UDMA) | ATA_MASK_MWDMA);
@@ -592,21 +592,19 @@ static struct ata_port_operations hpt374_fn1_port_ops = {
/**
* hpt37x_clock_slot - Turn timing to PC clock entry
* @freq: Reported frequency timing
* @base: Base timing
* @freq: Reported frequency in MHz
*
* Turn the timing data intoa clock slot (0 for 33, 1 for 40, 2 for 50
* Turn the timing data into a clock slot (0 for 33, 1 for 40, 2 for 50
* and 3 for 66Mhz)
*/
static int hpt37x_clock_slot(unsigned int freq, unsigned int base)
static int hpt37x_clock_slot(unsigned int freq)
{
unsigned int f = (base * freq) / 192; /* Mhz */
if (f < 40)
if (freq < 40)
return 0; /* 33Mhz slot */
if (f < 45)
if (freq < 45)
return 1; /* 40Mhz slot */
if (f < 55)
if (freq < 55)
return 2; /* 50Mhz slot */
return 3; /* 60Mhz slot */
}
@@ -646,24 +644,57 @@ static int hpt37x_calibrate_dpll(struct pci_dev *dev)
return 0;
}
static u32 hpt374_read_freq(struct pci_dev *pdev)
static int hpt37x_pci_clock(struct pci_dev *pdev, unsigned int base)
{
u32 freq;
unsigned long io_base = pci_resource_start(pdev, 4);
unsigned int freq;
u32 fcnt;
if (PCI_FUNC(pdev->devfn) & 1) {
struct pci_dev *pdev_0;
/*
* Some devices do not let this value be accessed via PCI space
* according to the old driver. In addition we must use the value
* from FN 0 on the HPT374.
*/
if (pdev->device == PCI_DEVICE_ID_TTI_HPT374 &&
(PCI_FUNC(pdev->devfn) & 1)) {
struct pci_dev *pdev_fn0;
pdev_0 = pci_get_slot(pdev->bus, pdev->devfn - 1);
/* Someone hot plugged the controller on us ? */
if (pdev_0 == NULL)
pdev_fn0 = pci_get_slot(pdev->bus, pdev->devfn - 1);
/* Someone hot plugged the controller on us? */
if (!pdev_fn0)
return 0;
io_base = pci_resource_start(pdev_0, 4);
freq = inl(io_base + 0x90);
pci_dev_put(pdev_0);
} else
freq = inl(io_base + 0x90);
return freq;
fcnt = inl(pci_resource_start(pdev_fn0, 4) + 0x90);
pci_dev_put(pdev_fn0);
} else {
fcnt = inl(pci_resource_start(pdev, 4) + 0x90);
}
if ((fcnt >> 12) != 0xABCDE) {
u32 total = 0;
int i;
u16 sr;
dev_warn(&pdev->dev, "BIOS clock data not set\n");
/* This is the process the HPT371 BIOS is reported to use */
for (i = 0; i < 128; i++) {
pci_read_config_word(pdev, 0x78, &sr);
total += sr & 0x1FF;
udelay(15);
}
fcnt = total / 128;
}
fcnt &= 0x1FF;
freq = (fcnt * base) / 192; /* in MHz */
/* Clamp to bands */
if (freq < 40)
return 33;
if (freq < 45)
return 40;
if (freq < 55)
return 50;
return 66;
}
/**
@@ -770,7 +801,7 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
u8 rev = dev->revision;
u8 irqmask;
u8 mcr1;
u32 freq;
unsigned int freq; /* MHz */
int prefer_dpll = 1;
unsigned long iobase = pci_resource_start(dev, 4);
@@ -896,42 +927,16 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
if (chip_table == &hpt372a)
outb(0x0e, iobase + 0x9c);
/*
* Some devices do not let this value be accessed via PCI space
* according to the old driver. In addition we must use the value
* from FN 0 on the HPT374.
*/
if (chip_table == &hpt374) {
freq = hpt374_read_freq(dev);
if (freq == 0)
return -ENODEV;
} else
freq = inl(iobase + 0x90);
if ((freq >> 12) != 0xABCDE) {
int i;
u16 sr;
u32 total = 0;
dev_warn(&dev->dev, "BIOS has not set timing clocks\n");
/* This is the process the HPT371 BIOS is reported to use */
for (i = 0; i < 128; i++) {
pci_read_config_word(dev, 0x78, &sr);
total += sr & 0x1FF;
udelay(15);
}
freq = total / 128;
}
freq &= 0x1FF;
freq = hpt37x_pci_clock(dev, chip_table->base);
if (!freq)
return -ENODEV;
/*
* Turn the frequency check into a band and then find a timing
* table to match it.
*/
clock_slot = hpt37x_clock_slot(freq, chip_table->base);
clock_slot = hpt37x_clock_slot(freq);
if (chip_table->clocks[clock_slot] == NULL || prefer_dpll) {
/*
* We need to try PLL mode instead

View File

@@ -24,7 +24,7 @@
#include <linux/libata.h>
#define DRV_NAME "pata_hpt3x2n"
#define DRV_VERSION "0.3.18"
#define DRV_VERSION "0.3.19"
enum {
PCI66 = (1 << 1),
@@ -113,7 +113,7 @@ static u32 hpt3x2n_find_mode(struct ata_port *ap, int speed)
* The Marvell bridge chips used on the HighPoint SATA cards do not seem
* to support the UltraDMA modes 1, 2, and 3 as well as any MWDMA modes...
*/
static unsigned long hpt372n_filter(struct ata_device *adev, unsigned long mask)
static unsigned int hpt372n_filter(struct ata_device *adev, unsigned int mask)
{
if (ata_id_is_sata(adev->id))
mask &= ~((0xE << ATA_SHIFT_UDMA) | ATA_MASK_MWDMA);
@@ -403,17 +403,20 @@ static int hpt3xn_calibrate_dpll(struct pci_dev *dev)
return 0;
}
static int hpt3x2n_pci_clock(struct pci_dev *pdev)
static int hpt3x2n_pci_clock(struct pci_dev *pdev, unsigned int base)
{
unsigned long freq;
unsigned int freq;
u32 fcnt;
unsigned long iobase = pci_resource_start(pdev, 4);
fcnt = inl(iobase + 0x90); /* Not PCI readable for some chips */
/*
* Some devices do not let this value be accessed via PCI space
* according to the old driver.
*/
fcnt = inl(pci_resource_start(pdev, 4) + 0x90);
if ((fcnt >> 12) != 0xABCDE) {
u32 total = 0;
int i;
u16 sr;
u32 total = 0;
dev_warn(&pdev->dev, "BIOS clock data not set\n");
@@ -427,7 +430,7 @@ static int hpt3x2n_pci_clock(struct pci_dev *pdev)
}
fcnt &= 0x1FF;
freq = (fcnt * 77) / 192;
freq = (fcnt * base) / 192; /* in MHz */
/* Clamp to bands */
if (freq < 40)
@@ -559,7 +562,7 @@ hpt372n:
* 50 for UDMA100. Right now we always use 66
*/
pci_mhz = hpt3x2n_pci_clock(dev);
pci_mhz = hpt3x2n_pci_clock(dev, 77);
f_low = (pci_mhz * 48) / 66; /* PCI Mhz for 66Mhz DPLL */
f_high = f_low + 2; /* Tolerance */

View File

@@ -1028,7 +1028,7 @@ static void pmac_macio_calc_timing_masks(struct pata_macio_priv *priv,
}
i++;
}
dev_dbg(priv->dev, "Supported masks: PIO=%lx, MWDMA=%lx, UDMA=%lx\n",
dev_dbg(priv->dev, "Supported masks: PIO=%x, MWDMA=%x, UDMA=%x\n",
pinfo->pio_mask, pinfo->mwdma_mask, pinfo->udma_mask);
}

View File

@@ -57,7 +57,7 @@ static int pdc2027x_prereset(struct ata_link *link, unsigned long deadline);
static void pdc2027x_set_piomode(struct ata_port *ap, struct ata_device *adev);
static void pdc2027x_set_dmamode(struct ata_port *ap, struct ata_device *adev);
static int pdc2027x_check_atapi_dma(struct ata_queued_cmd *qc);
static unsigned long pdc2027x_mode_filter(struct ata_device *adev, unsigned long mask);
static unsigned int pdc2027x_mode_filter(struct ata_device *adev, unsigned int mask);
static int pdc2027x_cable_detect(struct ata_port *ap);
static int pdc2027x_set_mode(struct ata_link *link, struct ata_device **r_failed);
@@ -251,7 +251,7 @@ static int pdc2027x_prereset(struct ata_link *link, unsigned long deadline)
* Block UDMA on devices that cause trouble with this controller.
*/
static unsigned long pdc2027x_mode_filter(struct ata_device *adev, unsigned long mask)
static unsigned int pdc2027x_mode_filter(struct ata_device *adev, unsigned int mask)
{
unsigned char model_num[ATA_ID_PROD_LEN + 1];
struct ata_device *pair = ata_dev_pair(adev);

View File

@@ -150,7 +150,7 @@ static u8 serverworks_is_csb(struct pci_dev *pdev)
* bug we hit.
*/
static unsigned long serverworks_osb4_filter(struct ata_device *adev, unsigned long mask)
static unsigned int serverworks_osb4_filter(struct ata_device *adev, unsigned int mask)
{
if (adev->class == ATA_DEV_ATA)
mask &= ~ATA_MASK_UDMA;
@@ -166,7 +166,7 @@ static unsigned long serverworks_osb4_filter(struct ata_device *adev, unsigned l
* Check the blacklist and disable UDMA5 if matched
*/
static unsigned long serverworks_csb_filter(struct ata_device *adev, unsigned long mask)
static unsigned int serverworks_csb_filter(struct ata_device *adev, unsigned int mask)
{
const char *p;
char model_num[ATA_ID_PROD_LEN + 1];

View File

@@ -525,7 +525,7 @@ static void sis_133_set_dmamode (struct ata_port *ap, struct ata_device *adev)
* Block UDMA6 on devices that do not support it.
*/
static unsigned long sis_133_mode_filter(struct ata_device *adev, unsigned long mask)
static unsigned int sis_133_mode_filter(struct ata_device *adev, unsigned int mask)
{
struct ata_port *ap = adev->link->ap;
struct pci_dev *pdev = to_pci_dev(ap->host->dev);

View File

@@ -352,7 +352,7 @@ static void via_set_dmamode(struct ata_port *ap, struct ata_device *adev)
* one breed of Transcend SSD. Return the updated mask.
*/
static unsigned long via_mode_filter(struct ata_device *dev, unsigned long mask)
static unsigned int via_mode_filter(struct ata_device *dev, unsigned int mask)
{
struct ata_host *host = dev->link->ap->host;
const struct via_isa_bridge *config = host->private_data;

View File

@@ -4057,7 +4057,7 @@ static int mv_platform_probe(struct platform_device *pdev)
/*
* Simple resource validation ..
*/
if (unlikely(pdev->num_resources != 2)) {
if (unlikely(pdev->num_resources != 1)) {
dev_err(&pdev->dev, "invalid number of resources\n");
return -EINVAL;
}

View File

@@ -549,7 +549,6 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
goto err_dmabuf;
}
file->f_mode |= FMODE_LSEEK;
dmabuf->file = file;
mutex_init(&dmabuf->lock);

View File

@@ -2,18 +2,6 @@
menu "EFI (Extensible Firmware Interface) Support"
depends on EFI
config EFI_VARS
tristate "EFI Variable Support via sysfs"
depends on EFI && (X86 || IA64)
default n
help
If you say Y here, you are able to get EFI (Extensible Firmware
Interface) variable information via sysfs. You may read,
write, create, and destroy EFI variables through this interface.
Note that this driver is only retained for compatibility with
legacy users: new users should use the efivarfs filesystem
instead.
config EFI_ESRT
bool
depends on EFI && !IA64
@@ -22,6 +10,7 @@ config EFI_ESRT
config EFI_VARS_PSTORE
tristate "Register efivars backend for pstore"
depends on PSTORE
select UCS2_STRING
default y
help
Say Y here to enable use efivars as a backend to pstore. This
@@ -145,6 +134,7 @@ config EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER
config EFI_BOOTLOADER_CONTROL
tristate "EFI Bootloader Control"
select UCS2_STRING
default n
help
This module installs a reboot hook, such that if reboot() is

View File

@@ -17,7 +17,6 @@ ifneq ($(CONFIG_EFI_CAPSULE_LOADER),)
obj-$(CONFIG_EFI) += capsule.o
endif
obj-$(CONFIG_EFI_PARAMS_FROM_FDT) += fdtparams.o
obj-$(CONFIG_EFI_VARS) += efivars.o
obj-$(CONFIG_EFI_ESRT) += esrt.o
obj-$(CONFIG_EFI_VARS_PSTORE) += efi-pstore.o
obj-$(CONFIG_UEFI_CPER) += cper.o

View File

@@ -240,6 +240,7 @@ void __init efi_init(void)
* And now, memblock is fully populated, it is time to do capping.
*/
early_init_dt_check_for_usable_mem_range();
efi_find_mirror();
efi_esrt_init();
efi_mokvar_table_init();

View File

@@ -6,6 +6,8 @@
#include <linux/slab.h>
#include <linux/ucs2_string.h>
MODULE_IMPORT_NS(EFIVAR);
#define DUMP_NAME_LEN 66
#define EFIVARS_DATA_SIZE_MAX 1024
@@ -20,18 +22,25 @@ module_param_named(pstore_disable, efivars_pstore_disable, bool, 0644);
EFI_VARIABLE_BOOTSERVICE_ACCESS | \
EFI_VARIABLE_RUNTIME_ACCESS)
static LIST_HEAD(efi_pstore_list);
static DECLARE_WORK(efivar_work, NULL);
static int efi_pstore_open(struct pstore_info *psi)
{
psi->data = NULL;
int err;
err = efivar_lock();
if (err)
return err;
psi->data = kzalloc(EFIVARS_DATA_SIZE_MAX, GFP_KERNEL);
if (!psi->data)
return -ENOMEM;
return 0;
}
static int efi_pstore_close(struct pstore_info *psi)
{
psi->data = NULL;
efivar_unlock();
kfree(psi->data);
return 0;
}
@@ -40,22 +49,17 @@ static inline u64 generic_id(u64 timestamp, unsigned int part, int count)
return (timestamp * 100 + part) * 1000 + count;
}
static int efi_pstore_read_func(struct efivar_entry *entry,
struct pstore_record *record)
static int efi_pstore_read_func(struct pstore_record *record,
efi_char16_t *varname)
{
efi_guid_t vendor = LINUX_EFI_CRASH_GUID;
unsigned long wlen, size = EFIVARS_DATA_SIZE_MAX;
char name[DUMP_NAME_LEN], data_type;
int i;
efi_status_t status;
int cnt;
unsigned int part;
unsigned long size;
u64 time;
if (efi_guidcmp(entry->var.VendorGuid, vendor))
return 0;
for (i = 0; i < DUMP_NAME_LEN; i++)
name[i] = entry->var.VariableName[i];
ucs2_as_utf8(name, varname, DUMP_NAME_LEN);
if (sscanf(name, "dump-type%u-%u-%d-%llu-%c",
&record->type, &part, &cnt, &time, &data_type) == 5) {
@@ -95,161 +99,75 @@ static int efi_pstore_read_func(struct efivar_entry *entry,
} else
return 0;
entry->var.DataSize = 1024;
__efivar_entry_get(entry, &entry->var.Attributes,
&entry->var.DataSize, entry->var.Data);
size = entry->var.DataSize;
memcpy(record->buf, entry->var.Data,
(size_t)min_t(unsigned long, EFIVARS_DATA_SIZE_MAX, size));
return size;
}
/**
* efi_pstore_scan_sysfs_enter
* @pos: scanning entry
* @next: next entry
* @head: list head
*/
static void efi_pstore_scan_sysfs_enter(struct efivar_entry *pos,
struct efivar_entry *next,
struct list_head *head)
{
pos->scanning = true;
if (&next->list != head)
next->scanning = true;
}
/**
* __efi_pstore_scan_sysfs_exit
* @entry: deleting entry
* @turn_off_scanning: Check if a scanning flag should be turned off
*/
static inline int __efi_pstore_scan_sysfs_exit(struct efivar_entry *entry,
bool turn_off_scanning)
{
if (entry->deleting) {
list_del(&entry->list);
efivar_entry_iter_end();
kfree(entry);
if (efivar_entry_iter_begin())
return -EINTR;
} else if (turn_off_scanning)
entry->scanning = false;
return 0;
}
/**
* efi_pstore_scan_sysfs_exit
* @pos: scanning entry
* @next: next entry
* @head: list head
* @stop: a flag checking if scanning will stop
*/
static int efi_pstore_scan_sysfs_exit(struct efivar_entry *pos,
struct efivar_entry *next,
struct list_head *head, bool stop)
{
int ret = __efi_pstore_scan_sysfs_exit(pos, true);
if (ret)
return ret;
if (stop)
ret = __efi_pstore_scan_sysfs_exit(next, &next->list != head);
return ret;
}
/**
* efi_pstore_sysfs_entry_iter
*
* @record: pstore record to pass to callback
*
* You MUST call efivar_entry_iter_begin() before this function, and
* efivar_entry_iter_end() afterwards.
*
*/
static int efi_pstore_sysfs_entry_iter(struct pstore_record *record)
{
struct efivar_entry **pos = (struct efivar_entry **)&record->psi->data;
struct efivar_entry *entry, *n;
struct list_head *head = &efi_pstore_list;
int size = 0;
int ret;
if (!*pos) {
list_for_each_entry_safe(entry, n, head, list) {
efi_pstore_scan_sysfs_enter(entry, n, head);
size = efi_pstore_read_func(entry, record);
ret = efi_pstore_scan_sysfs_exit(entry, n, head,
size < 0);
if (ret)
return ret;
if (size)
break;
}
*pos = n;
return size;
}
list_for_each_entry_safe_from((*pos), n, head, list) {
efi_pstore_scan_sysfs_enter((*pos), n, head);
size = efi_pstore_read_func((*pos), record);
ret = efi_pstore_scan_sysfs_exit((*pos), n, head, size < 0);
if (ret)
return ret;
if (size)
break;
}
*pos = n;
return size;
}
/**
* efi_pstore_read
*
* This function returns a size of NVRAM entry logged via efi_pstore_write().
* The meaning and behavior of efi_pstore/pstore are as below.
*
* size > 0: Got data of an entry logged via efi_pstore_write() successfully,
* and pstore filesystem will continue reading subsequent entries.
* size == 0: Entry was not logged via efi_pstore_write(),
* and efi_pstore driver will continue reading subsequent entries.
* size < 0: Failed to get data of entry logging via efi_pstore_write(),
* and pstore will stop reading entry.
*/
static ssize_t efi_pstore_read(struct pstore_record *record)
{
ssize_t size;
record->buf = kzalloc(EFIVARS_DATA_SIZE_MAX, GFP_KERNEL);
record->buf = kmalloc(size, GFP_KERNEL);
if (!record->buf)
return -ENOMEM;
if (efivar_entry_iter_begin()) {
size = -EINTR;
goto out;
}
size = efi_pstore_sysfs_entry_iter(record);
efivar_entry_iter_end();
out:
if (size <= 0) {
status = efivar_get_variable(varname, &LINUX_EFI_CRASH_GUID, NULL,
&size, record->buf);
if (status != EFI_SUCCESS) {
kfree(record->buf);
record->buf = NULL;
return -EIO;
}
/*
* Store the name of the variable in the pstore_record priv field, so
* we can reuse it later if we need to delete the EFI variable from the
* variable store.
*/
wlen = (ucs2_strnlen(varname, DUMP_NAME_LEN) + 1) * sizeof(efi_char16_t);
record->priv = kmemdup(varname, wlen, GFP_KERNEL);
if (!record->priv) {
kfree(record->buf);
return -ENOMEM;
}
return size;
}
static ssize_t efi_pstore_read(struct pstore_record *record)
{
efi_char16_t *varname = record->psi->data;
efi_guid_t guid = LINUX_EFI_CRASH_GUID;
unsigned long varname_size;
efi_status_t status;
for (;;) {
varname_size = EFIVARS_DATA_SIZE_MAX;
/*
* If this is the first read() call in the pstore enumeration,
* varname will be the empty string, and the GetNextVariable()
* runtime service call will return the first EFI variable in
* its own enumeration order, ignoring the guid argument.
*
* Subsequent calls to GetNextVariable() must pass the name and
* guid values returned by the previous call, which is why we
* store varname in record->psi->data. Given that we only
* enumerate variables with the efi-pstore GUID, there is no
* need to record the guid return value.
*/
status = efivar_get_next_variable(&varname_size, varname, &guid);
if (status == EFI_NOT_FOUND)
return 0;
if (status != EFI_SUCCESS)
return -EIO;
/* skip variables that don't concern us */
if (efi_guidcmp(guid, LINUX_EFI_CRASH_GUID))
continue;
return efi_pstore_read_func(record, varname);
}
}
static int efi_pstore_write(struct pstore_record *record)
{
char name[DUMP_NAME_LEN];
efi_char16_t efi_name[DUMP_NAME_LEN];
efi_guid_t vendor = LINUX_EFI_CRASH_GUID;
int i, ret = 0;
efi_status_t status;
int i;
record->id = generic_id(record->time.tv_sec, record->part,
record->count);
@@ -265,88 +183,26 @@ static int efi_pstore_write(struct pstore_record *record)
for (i = 0; i < DUMP_NAME_LEN; i++)
efi_name[i] = name[i];
ret = efivar_entry_set_safe(efi_name, vendor, PSTORE_EFI_ATTRIBUTES,
false, record->size, record->psi->buf);
if (record->reason == KMSG_DUMP_OOPS && try_module_get(THIS_MODULE))
if (!schedule_work(&efivar_work))
module_put(THIS_MODULE);
return ret;
if (efivar_trylock())
return -EBUSY;
status = efivar_set_variable_locked(efi_name, &LINUX_EFI_CRASH_GUID,
PSTORE_EFI_ATTRIBUTES,
record->size, record->psi->buf,
true);
efivar_unlock();
return status == EFI_SUCCESS ? 0 : -EIO;
};
/*
* Clean up an entry with the same name
*/
static int efi_pstore_erase_func(struct efivar_entry *entry, void *data)
{
efi_char16_t *efi_name = data;
efi_guid_t vendor = LINUX_EFI_CRASH_GUID;
unsigned long ucs2_len = ucs2_strlen(efi_name);
if (efi_guidcmp(entry->var.VendorGuid, vendor))
return 0;
if (ucs2_strncmp(entry->var.VariableName, efi_name, (size_t)ucs2_len))
return 0;
if (entry->scanning) {
/*
* Skip deletion because this entry will be deleted
* after scanning is completed.
*/
entry->deleting = true;
} else
list_del(&entry->list);
/* found */
__efivar_entry_delete(entry);
return 1;
}
static int efi_pstore_erase_name(const char *name)
{
struct efivar_entry *entry = NULL;
efi_char16_t efi_name[DUMP_NAME_LEN];
int found, i;
for (i = 0; i < DUMP_NAME_LEN; i++) {
efi_name[i] = name[i];
if (name[i] == '\0')
break;
}
if (efivar_entry_iter_begin())
return -EINTR;
found = __efivar_entry_iter(efi_pstore_erase_func, &efi_pstore_list,
efi_name, &entry);
efivar_entry_iter_end();
if (found && !entry->scanning)
kfree(entry);
return found ? 0 : -ENOENT;
}
static int efi_pstore_erase(struct pstore_record *record)
{
char name[DUMP_NAME_LEN];
int ret;
efi_status_t status;
snprintf(name, sizeof(name), "dump-type%u-%u-%d-%lld",
record->type, record->part, record->count,
(long long)record->time.tv_sec);
ret = efi_pstore_erase_name(name);
if (ret != -ENOENT)
return ret;
status = efivar_set_variable(record->priv, &LINUX_EFI_CRASH_GUID,
PSTORE_EFI_ATTRIBUTES, 0, NULL);
snprintf(name, sizeof(name), "dump-type%u-%u-%lld",
record->type, record->part, (long long)record->time.tv_sec);
ret = efi_pstore_erase_name(name);
return ret;
if (status != EFI_SUCCESS && status != EFI_NOT_FOUND)
return -EIO;
return 0;
}
static struct pstore_info efi_pstore_info = {
@@ -360,77 +216,14 @@ static struct pstore_info efi_pstore_info = {
.erase = efi_pstore_erase,
};
static int efi_pstore_callback(efi_char16_t *name, efi_guid_t vendor,
unsigned long name_size, void *data)
{
struct efivar_entry *entry;
int ret;
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return -ENOMEM;
memcpy(entry->var.VariableName, name, name_size);
entry->var.VendorGuid = vendor;
ret = efivar_entry_add(entry, &efi_pstore_list);
if (ret)
kfree(entry);
return ret;
}
static int efi_pstore_update_entry(efi_char16_t *name, efi_guid_t vendor,
unsigned long name_size, void *data)
{
struct efivar_entry *entry = data;
if (efivar_entry_find(name, vendor, &efi_pstore_list, false))
return 0;
memcpy(entry->var.VariableName, name, name_size);
memcpy(&(entry->var.VendorGuid), &vendor, sizeof(efi_guid_t));
return 1;
}
static void efi_pstore_update_entries(struct work_struct *work)
{
struct efivar_entry *entry;
int err;
/* Add new sysfs entries */
while (1) {
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return;
err = efivar_init(efi_pstore_update_entry, entry,
false, &efi_pstore_list);
if (!err)
break;
efivar_entry_add(entry, &efi_pstore_list);
}
kfree(entry);
module_put(THIS_MODULE);
}
static __init int efivars_pstore_init(void)
{
int ret;
if (!efivars_kobject() || !efivar_supports_writes())
if (!efivar_supports_writes())
return 0;
if (efivars_pstore_disable)
return 0;
ret = efivar_init(efi_pstore_callback, NULL, true, &efi_pstore_list);
if (ret)
return ret;
efi_pstore_info.buf = kmalloc(4096, GFP_KERNEL);
if (!efi_pstore_info.buf)
return -ENOMEM;
@@ -443,8 +236,6 @@ static __init int efivars_pstore_init(void)
efi_pstore_info.bufsize = 0;
}
INIT_WORK(&efivar_work, efi_pstore_update_entries);
return 0;
}

View File

@@ -202,7 +202,7 @@ static void generic_ops_unregister(void)
}
#ifdef CONFIG_EFI_CUSTOM_SSDT_OVERLAYS
#define EFIVAR_SSDT_NAME_MAX 16
#define EFIVAR_SSDT_NAME_MAX 16UL
static char efivar_ssdt[EFIVAR_SSDT_NAME_MAX] __initdata;
static int __init efivar_ssdt_setup(char *str)
{
@@ -219,83 +219,62 @@ static int __init efivar_ssdt_setup(char *str)
}
__setup("efivar_ssdt=", efivar_ssdt_setup);
static __init int efivar_ssdt_iter(efi_char16_t *name, efi_guid_t vendor,
unsigned long name_size, void *data)
{
struct efivar_entry *entry;
struct list_head *list = data;
char utf8_name[EFIVAR_SSDT_NAME_MAX];
int limit = min_t(unsigned long, EFIVAR_SSDT_NAME_MAX, name_size);
ucs2_as_utf8(utf8_name, name, limit - 1);
if (strncmp(utf8_name, efivar_ssdt, limit) != 0)
return 0;
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return 0;
memcpy(entry->var.VariableName, name, name_size);
memcpy(&entry->var.VendorGuid, &vendor, sizeof(efi_guid_t));
efivar_entry_add(entry, list);
return 0;
}
static __init int efivar_ssdt_load(void)
{
LIST_HEAD(entries);
struct efivar_entry *entry, *aux;
unsigned long size;
void *data;
int ret;
unsigned long name_size = 256;
efi_char16_t *name = NULL;
efi_status_t status;
efi_guid_t guid;
if (!efivar_ssdt[0])
return 0;
ret = efivar_init(efivar_ssdt_iter, &entries, true, &entries);
name = kzalloc(name_size, GFP_KERNEL);
if (!name)
return -ENOMEM;
list_for_each_entry_safe(entry, aux, &entries, list) {
pr_info("loading SSDT from variable %s-%pUl\n", efivar_ssdt,
&entry->var.VendorGuid);
for (;;) {
char utf8_name[EFIVAR_SSDT_NAME_MAX];
unsigned long data_size = 0;
void *data;
int limit;
list_del(&entry->list);
ret = efivar_entry_size(entry, &size);
if (ret) {
pr_err("failed to get var size\n");
goto free_entry;
status = efi.get_next_variable(&name_size, name, &guid);
if (status == EFI_NOT_FOUND) {
break;
} else if (status == EFI_BUFFER_TOO_SMALL) {
name = krealloc(name, name_size, GFP_KERNEL);
if (!name)
return -ENOMEM;
continue;
}
data = kmalloc(size, GFP_KERNEL);
if (!data) {
ret = -ENOMEM;
goto free_entry;
limit = min(EFIVAR_SSDT_NAME_MAX, name_size);
ucs2_as_utf8(utf8_name, name, limit - 1);
if (strncmp(utf8_name, efivar_ssdt, limit) != 0)
continue;
pr_info("loading SSDT from variable %s-%pUl\n", efivar_ssdt, &guid);
status = efi.get_variable(name, &guid, NULL, &data_size, NULL);
if (status != EFI_BUFFER_TOO_SMALL || !data_size)
return -EIO;
data = kmalloc(data_size, GFP_KERNEL);
if (!data)
return -ENOMEM;
status = efi.get_variable(name, &guid, NULL, &data_size, data);
if (status == EFI_SUCCESS) {
acpi_status ret = acpi_load_table(data, NULL);
if (ret)
pr_err("failed to load table: %u\n", ret);
} else {
pr_err("failed to get var data: 0x%lx\n", status);
}
ret = efivar_entry_get(entry, NULL, &size, data);
if (ret) {
pr_err("failed to get var data\n");
goto free_data;
}
ret = acpi_load_table(data, NULL);
if (ret) {
pr_err("failed to load table: %d\n", ret);
goto free_data;
}
goto free_entry;
free_data:
kfree(data);
free_entry:
kfree(entry);
}
return ret;
return 0;
}
#else
static inline int efivar_ssdt_load(void) { return 0; }
@@ -446,6 +425,29 @@ err_put:
subsys_initcall(efisubsys_init);
void __init efi_find_mirror(void)
{
efi_memory_desc_t *md;
u64 mirror_size = 0, total_size = 0;
if (!efi_enabled(EFI_MEMMAP))
return;
for_each_efi_memory_desc(md) {
unsigned long long start = md->phys_addr;
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
total_size += size;
if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
memblock_mark_mirror(start, size);
mirror_size += size;
}
}
if (mirror_size)
pr_info("Memory: %lldM/%lldM mirrored memory\n",
mirror_size>>20, total_size>>20);
}
/*
* Find the efi memory descriptor for a given physical address. Given a
* physical address, determine if it exists within an EFI Memory Map entry,
@@ -897,6 +899,7 @@ int efi_status_to_err(efi_status_t status)
return err;
}
EXPORT_SYMBOL_GPL(efi_status_to_err);
static DEFINE_SPINLOCK(efi_mem_reserve_persistent_lock);
static struct linux_efi_memreserve *efi_memreserve_root __ro_after_init;

View File

@@ -10,69 +10,51 @@
#include <linux/module.h>
#include <linux/reboot.h>
#include <linux/slab.h>
#include <linux/ucs2_string.h>
static void efibc_str_to_str16(const char *str, efi_char16_t *str16)
#define MAX_DATA_LEN 512
static int efibc_set_variable(efi_char16_t *name, efi_char16_t *value,
unsigned long len)
{
size_t i;
efi_status_t status;
for (i = 0; i < strlen(str); i++)
str16[i] = str[i];
status = efi.set_variable(name, &LINUX_EFI_LOADER_ENTRY_GUID,
EFI_VARIABLE_NON_VOLATILE
| EFI_VARIABLE_BOOTSERVICE_ACCESS
| EFI_VARIABLE_RUNTIME_ACCESS,
len * sizeof(efi_char16_t), value);
str16[i] = '\0';
}
static int efibc_set_variable(const char *name, const char *value)
{
int ret;
efi_guid_t guid = LINUX_EFI_LOADER_ENTRY_GUID;
struct efivar_entry *entry;
size_t size = (strlen(value) + 1) * sizeof(efi_char16_t);
if (size > sizeof(entry->var.Data)) {
pr_err("value is too large (%zu bytes) for '%s' EFI variable\n", size, name);
return -EINVAL;
if (status != EFI_SUCCESS) {
pr_err("failed to set EFI variable: 0x%lx\n", status);
return -EIO;
}
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry) {
pr_err("failed to allocate efivar entry for '%s' EFI variable\n", name);
return -ENOMEM;
}
efibc_str_to_str16(name, entry->var.VariableName);
efibc_str_to_str16(value, (efi_char16_t *)entry->var.Data);
memcpy(&entry->var.VendorGuid, &guid, sizeof(guid));
ret = efivar_entry_set_safe(entry->var.VariableName,
entry->var.VendorGuid,
EFI_VARIABLE_NON_VOLATILE
| EFI_VARIABLE_BOOTSERVICE_ACCESS
| EFI_VARIABLE_RUNTIME_ACCESS,
false, size, entry->var.Data);
if (ret)
pr_err("failed to set %s EFI variable: 0x%x\n",
name, ret);
kfree(entry);
return ret;
return 0;
}
static int efibc_reboot_notifier_call(struct notifier_block *notifier,
unsigned long event, void *data)
{
const char *reason = "shutdown";
efi_char16_t *reason = event == SYS_RESTART ? L"reboot"
: L"shutdown";
const u8 *str = data;
efi_char16_t *wdata;
unsigned long l;
int ret;
if (event == SYS_RESTART)
reason = "reboot";
ret = efibc_set_variable("LoaderEntryRebootReason", reason);
ret = efibc_set_variable(L"LoaderEntryRebootReason", reason,
ucs2_strlen(reason));
if (ret || !data)
return NOTIFY_DONE;
efibc_set_variable("LoaderEntryOneShot", (char *)data);
wdata = kmalloc(MAX_DATA_LEN * sizeof(efi_char16_t), GFP_KERNEL);
for (l = 0; l < MAX_DATA_LEN - 1 && str[l] != '\0'; l++)
wdata[l] = str[l];
wdata[l] = L'\0';
efibc_set_variable(L"LoaderEntryOneShot", wdata, l);
kfree(wdata);
return NOTIFY_DONE;
}
@@ -84,7 +66,7 @@ static int __init efibc_init(void)
{
int ret;
if (!efivars_kobject() || !efivar_supports_writes())
if (!efi_rt_services_supported(EFI_RT_SUPPORTED_SET_VARIABLE))
return -ENODEV;
ret = register_reboot_notifier(&efibc_reboot_notifier);

View File

@@ -1,671 +0,0 @@
// SPDX-License-Identifier: GPL-2.0+
/*
* Originally from efivars.c,
*
* Copyright (C) 2001,2003,2004 Dell <Matt_Domsch@dell.com>
* Copyright (C) 2004 Intel Corporation <matthew.e.tolentino@intel.com>
*
* This code takes all variables accessible from EFI runtime and
* exports them via sysfs
*/
#include <linux/efi.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/ucs2_string.h>
#include <linux/compat.h>
#define EFIVARS_VERSION "0.08"
#define EFIVARS_DATE "2004-May-17"
MODULE_AUTHOR("Matt Domsch <Matt_Domsch@Dell.com>");
MODULE_DESCRIPTION("sysfs interface to EFI Variables");
MODULE_LICENSE("GPL");
MODULE_VERSION(EFIVARS_VERSION);
static LIST_HEAD(efivar_sysfs_list);
static struct kset *efivars_kset;
static struct bin_attribute *efivars_new_var;
static struct bin_attribute *efivars_del_var;
struct compat_efi_variable {
efi_char16_t VariableName[EFI_VAR_NAME_LEN/sizeof(efi_char16_t)];
efi_guid_t VendorGuid;
__u32 DataSize;
__u8 Data[1024];
__u32 Status;
__u32 Attributes;
} __packed;
struct efivar_attribute {
struct attribute attr;
ssize_t (*show) (struct efivar_entry *entry, char *buf);
ssize_t (*store)(struct efivar_entry *entry, const char *buf, size_t count);
};
#define EFIVAR_ATTR(_name, _mode, _show, _store) \
struct efivar_attribute efivar_attr_##_name = { \
.attr = {.name = __stringify(_name), .mode = _mode}, \
.show = _show, \
.store = _store, \
};
#define to_efivar_attr(_attr) container_of(_attr, struct efivar_attribute, attr)
#define to_efivar_entry(obj) container_of(obj, struct efivar_entry, kobj)
/*
* Prototype for sysfs creation function
*/
static int
efivar_create_sysfs_entry(struct efivar_entry *new_var);
static ssize_t
efivar_guid_read(struct efivar_entry *entry, char *buf)
{
struct efi_variable *var = &entry->var;
char *str = buf;
if (!entry || !buf)
return 0;
efi_guid_to_str(&var->VendorGuid, str);
str += strlen(str);
str += sprintf(str, "\n");
return str - buf;
}
static ssize_t
efivar_attr_read(struct efivar_entry *entry, char *buf)
{
struct efi_variable *var = &entry->var;
unsigned long size = sizeof(var->Data);
char *str = buf;
int ret;
if (!entry || !buf)
return -EINVAL;
ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data);
var->DataSize = size;
if (ret)
return -EIO;
if (var->Attributes & EFI_VARIABLE_NON_VOLATILE)
str += sprintf(str, "EFI_VARIABLE_NON_VOLATILE\n");
if (var->Attributes & EFI_VARIABLE_BOOTSERVICE_ACCESS)
str += sprintf(str, "EFI_VARIABLE_BOOTSERVICE_ACCESS\n");
if (var->Attributes & EFI_VARIABLE_RUNTIME_ACCESS)
str += sprintf(str, "EFI_VARIABLE_RUNTIME_ACCESS\n");
if (var->Attributes & EFI_VARIABLE_HARDWARE_ERROR_RECORD)
str += sprintf(str, "EFI_VARIABLE_HARDWARE_ERROR_RECORD\n");
if (var->Attributes & EFI_VARIABLE_AUTHENTICATED_WRITE_ACCESS)
str += sprintf(str,
"EFI_VARIABLE_AUTHENTICATED_WRITE_ACCESS\n");
if (var->Attributes &
EFI_VARIABLE_TIME_BASED_AUTHENTICATED_WRITE_ACCESS)
str += sprintf(str,
"EFI_VARIABLE_TIME_BASED_AUTHENTICATED_WRITE_ACCESS\n");
if (var->Attributes & EFI_VARIABLE_APPEND_WRITE)
str += sprintf(str, "EFI_VARIABLE_APPEND_WRITE\n");
return str - buf;
}
static ssize_t
efivar_size_read(struct efivar_entry *entry, char *buf)
{
struct efi_variable *var = &entry->var;
unsigned long size = sizeof(var->Data);
char *str = buf;
int ret;
if (!entry || !buf)
return -EINVAL;
ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data);
var->DataSize = size;
if (ret)
return -EIO;
str += sprintf(str, "0x%lx\n", var->DataSize);
return str - buf;
}
static ssize_t
efivar_data_read(struct efivar_entry *entry, char *buf)
{
struct efi_variable *var = &entry->var;
unsigned long size = sizeof(var->Data);
int ret;
if (!entry || !buf)
return -EINVAL;
ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data);
var->DataSize = size;
if (ret)
return -EIO;
memcpy(buf, var->Data, var->DataSize);
return var->DataSize;
}
static inline int
sanity_check(struct efi_variable *var, efi_char16_t *name, efi_guid_t vendor,
unsigned long size, u32 attributes, u8 *data)
{
/*
* If only updating the variable data, then the name
* and guid should remain the same
*/
if (memcmp(name, var->VariableName, sizeof(var->VariableName)) ||
efi_guidcmp(vendor, var->VendorGuid)) {
printk(KERN_ERR "efivars: Cannot edit the wrong variable!\n");
return -EINVAL;
}
if ((size <= 0) || (attributes == 0)){
printk(KERN_ERR "efivars: DataSize & Attributes must be valid!\n");
return -EINVAL;
}
if ((attributes & ~EFI_VARIABLE_MASK) != 0 ||
efivar_validate(vendor, name, data, size) == false) {
printk(KERN_ERR "efivars: Malformed variable content\n");
return -EINVAL;
}
return 0;
}
static void
copy_out_compat(struct efi_variable *dst, struct compat_efi_variable *src)
{
memcpy(dst->VariableName, src->VariableName, EFI_VAR_NAME_LEN);
memcpy(dst->Data, src->Data, sizeof(src->Data));
dst->VendorGuid = src->VendorGuid;
dst->DataSize = src->DataSize;
dst->Attributes = src->Attributes;
}
/*
* We allow each variable to be edited via rewriting the
* entire efi variable structure.
*/
static ssize_t
efivar_store_raw(struct efivar_entry *entry, const char *buf, size_t count)
{
struct efi_variable *new_var, *var = &entry->var;
efi_char16_t *name;
unsigned long size;
efi_guid_t vendor;
u32 attributes;
u8 *data;
int err;
if (!entry || !buf)
return -EINVAL;
if (in_compat_syscall()) {
struct compat_efi_variable *compat;
if (count != sizeof(*compat))
return -EINVAL;
compat = (struct compat_efi_variable *)buf;
attributes = compat->Attributes;
vendor = compat->VendorGuid;
name = compat->VariableName;
size = compat->DataSize;
data = compat->Data;
err = sanity_check(var, name, vendor, size, attributes, data);
if (err)
return err;
copy_out_compat(&entry->var, compat);
} else {
if (count != sizeof(struct efi_variable))
return -EINVAL;
new_var = (struct efi_variable *)buf;
attributes = new_var->Attributes;
vendor = new_var->VendorGuid;
name = new_var->VariableName;
size = new_var->DataSize;
data = new_var->Data;
err = sanity_check(var, name, vendor, size, attributes, data);
if (err)
return err;
memcpy(&entry->var, new_var, count);
}
err = efivar_entry_set(entry, attributes, size, data, NULL);
if (err) {
printk(KERN_WARNING "efivars: set_variable() failed: status=%d\n", err);
return -EIO;
}
return count;
}
static ssize_t
efivar_show_raw(struct efivar_entry *entry, char *buf)
{
struct efi_variable *var = &entry->var;
struct compat_efi_variable *compat;
unsigned long datasize = sizeof(var->Data);
size_t size;
int ret;
if (!entry || !buf)
return 0;
ret = efivar_entry_get(entry, &var->Attributes, &datasize, var->Data);
var->DataSize = datasize;
if (ret)
return -EIO;
if (in_compat_syscall()) {
compat = (struct compat_efi_variable *)buf;
size = sizeof(*compat);
memcpy(compat->VariableName, var->VariableName,
EFI_VAR_NAME_LEN);
memcpy(compat->Data, var->Data, sizeof(compat->Data));
compat->VendorGuid = var->VendorGuid;
compat->DataSize = var->DataSize;
compat->Attributes = var->Attributes;
} else {
size = sizeof(*var);
memcpy(buf, var, size);
}
return size;
}
/*
* Generic read/write functions that call the specific functions of
* the attributes...
*/
static ssize_t efivar_attr_show(struct kobject *kobj, struct attribute *attr,
char *buf)
{
struct efivar_entry *var = to_efivar_entry(kobj);
struct efivar_attribute *efivar_attr = to_efivar_attr(attr);
ssize_t ret = -EIO;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (efivar_attr->show) {
ret = efivar_attr->show(var, buf);
}
return ret;
}
static ssize_t efivar_attr_store(struct kobject *kobj, struct attribute *attr,
const char *buf, size_t count)
{
struct efivar_entry *var = to_efivar_entry(kobj);
struct efivar_attribute *efivar_attr = to_efivar_attr(attr);
ssize_t ret = -EIO;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (efivar_attr->store)
ret = efivar_attr->store(var, buf, count);
return ret;
}
static const struct sysfs_ops efivar_attr_ops = {
.show = efivar_attr_show,
.store = efivar_attr_store,
};
static void efivar_release(struct kobject *kobj)
{
struct efivar_entry *var = to_efivar_entry(kobj);
kfree(var);
}
static EFIVAR_ATTR(guid, 0400, efivar_guid_read, NULL);
static EFIVAR_ATTR(attributes, 0400, efivar_attr_read, NULL);
static EFIVAR_ATTR(size, 0400, efivar_size_read, NULL);
static EFIVAR_ATTR(data, 0400, efivar_data_read, NULL);
static EFIVAR_ATTR(raw_var, 0600, efivar_show_raw, efivar_store_raw);
static struct attribute *def_attrs[] = {
&efivar_attr_guid.attr,
&efivar_attr_size.attr,
&efivar_attr_attributes.attr,
&efivar_attr_data.attr,
&efivar_attr_raw_var.attr,
NULL,
};
ATTRIBUTE_GROUPS(def);
static struct kobj_type efivar_ktype = {
.release = efivar_release,
.sysfs_ops = &efivar_attr_ops,
.default_groups = def_groups,
};
static ssize_t efivar_create(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buf, loff_t pos, size_t count)
{
struct compat_efi_variable *compat = (struct compat_efi_variable *)buf;
struct efi_variable *new_var = (struct efi_variable *)buf;
struct efivar_entry *new_entry;
bool need_compat = in_compat_syscall();
efi_char16_t *name;
unsigned long size;
u32 attributes;
u8 *data;
int err;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (need_compat) {
if (count != sizeof(*compat))
return -EINVAL;
attributes = compat->Attributes;
name = compat->VariableName;
size = compat->DataSize;
data = compat->Data;
} else {
if (count != sizeof(*new_var))
return -EINVAL;
attributes = new_var->Attributes;
name = new_var->VariableName;
size = new_var->DataSize;
data = new_var->Data;
}
if ((attributes & ~EFI_VARIABLE_MASK) != 0 ||
efivar_validate(new_var->VendorGuid, name, data,
size) == false) {
printk(KERN_ERR "efivars: Malformed variable content\n");
return -EINVAL;
}
new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL);
if (!new_entry)
return -ENOMEM;
if (need_compat)
copy_out_compat(&new_entry->var, compat);
else
memcpy(&new_entry->var, new_var, sizeof(*new_var));
err = efivar_entry_set(new_entry, attributes, size,
data, &efivar_sysfs_list);
if (err) {
if (err == -EEXIST)
err = -EINVAL;
goto out;
}
if (efivar_create_sysfs_entry(new_entry)) {
printk(KERN_WARNING "efivars: failed to create sysfs entry.\n");
kfree(new_entry);
}
return count;
out:
kfree(new_entry);
return err;
}
static ssize_t efivar_delete(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buf, loff_t pos, size_t count)
{
struct efi_variable *del_var = (struct efi_variable *)buf;
struct compat_efi_variable *compat;
struct efivar_entry *entry;
efi_char16_t *name;
efi_guid_t vendor;
int err = 0;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (in_compat_syscall()) {
if (count != sizeof(*compat))
return -EINVAL;
compat = (struct compat_efi_variable *)buf;
name = compat->VariableName;
vendor = compat->VendorGuid;
} else {
if (count != sizeof(*del_var))
return -EINVAL;
name = del_var->VariableName;
vendor = del_var->VendorGuid;
}
if (efivar_entry_iter_begin())
return -EINTR;
entry = efivar_entry_find(name, vendor, &efivar_sysfs_list, true);
if (!entry)
err = -EINVAL;
else if (__efivar_entry_delete(entry))
err = -EIO;
if (err) {
efivar_entry_iter_end();
return err;
}
if (!entry->scanning) {
efivar_entry_iter_end();
efivar_unregister(entry);
} else
efivar_entry_iter_end();
/* It's dead Jim.... */
return count;
}
/**
* efivar_create_sysfs_entry - create a new entry in sysfs
* @new_var: efivar entry to create
*
* Returns 0 on success, negative error code on failure
*/
static int
efivar_create_sysfs_entry(struct efivar_entry *new_var)
{
int short_name_size;
char *short_name;
unsigned long utf8_name_size;
efi_char16_t *variable_name = new_var->var.VariableName;
int ret;
/*
* Length of the variable bytes in UTF8, plus the '-' separator,
* plus the GUID, plus trailing NUL
*/
utf8_name_size = ucs2_utf8size(variable_name);
short_name_size = utf8_name_size + 1 + EFI_VARIABLE_GUID_LEN + 1;
short_name = kmalloc(short_name_size, GFP_KERNEL);
if (!short_name)
return -ENOMEM;
ucs2_as_utf8(short_name, variable_name, short_name_size);
/* This is ugly, but necessary to separate one vendor's
private variables from another's. */
short_name[utf8_name_size] = '-';
efi_guid_to_str(&new_var->var.VendorGuid,
short_name + utf8_name_size + 1);
new_var->kobj.kset = efivars_kset;
ret = kobject_init_and_add(&new_var->kobj, &efivar_ktype,
NULL, "%s", short_name);
kfree(short_name);
if (ret) {
kobject_put(&new_var->kobj);
return ret;
}
kobject_uevent(&new_var->kobj, KOBJ_ADD);
if (efivar_entry_add(new_var, &efivar_sysfs_list)) {
efivar_unregister(new_var);
return -EINTR;
}
return 0;
}
static int
create_efivars_bin_attributes(void)
{
struct bin_attribute *attr;
int error;
/* new_var */
attr = kzalloc(sizeof(*attr), GFP_KERNEL);
if (!attr)
return -ENOMEM;
attr->attr.name = "new_var";
attr->attr.mode = 0200;
attr->write = efivar_create;
efivars_new_var = attr;
/* del_var */
attr = kzalloc(sizeof(*attr), GFP_KERNEL);
if (!attr) {
error = -ENOMEM;
goto out_free;
}
attr->attr.name = "del_var";
attr->attr.mode = 0200;
attr->write = efivar_delete;
efivars_del_var = attr;
sysfs_bin_attr_init(efivars_new_var);
sysfs_bin_attr_init(efivars_del_var);
/* Register */
error = sysfs_create_bin_file(&efivars_kset->kobj, efivars_new_var);
if (error) {
printk(KERN_ERR "efivars: unable to create new_var sysfs file"
" due to error %d\n", error);
goto out_free;
}
error = sysfs_create_bin_file(&efivars_kset->kobj, efivars_del_var);
if (error) {
printk(KERN_ERR "efivars: unable to create del_var sysfs file"
" due to error %d\n", error);
sysfs_remove_bin_file(&efivars_kset->kobj, efivars_new_var);
goto out_free;
}
return 0;
out_free:
kfree(efivars_del_var);
efivars_del_var = NULL;
kfree(efivars_new_var);
efivars_new_var = NULL;
return error;
}
static int efivars_sysfs_callback(efi_char16_t *name, efi_guid_t vendor,
unsigned long name_size, void *data)
{
struct efivar_entry *entry;
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return -ENOMEM;
memcpy(entry->var.VariableName, name, name_size);
memcpy(&(entry->var.VendorGuid), &vendor, sizeof(efi_guid_t));
efivar_create_sysfs_entry(entry);
return 0;
}
static int efivar_sysfs_destroy(struct efivar_entry *entry, void *data)
{
int err = efivar_entry_remove(entry);
if (err)
return err;
efivar_unregister(entry);
return 0;
}
static void efivars_sysfs_exit(void)
{
/* Remove all entries and destroy */
int err;
err = __efivar_entry_iter(efivar_sysfs_destroy, &efivar_sysfs_list,
NULL, NULL);
if (err) {
pr_err("efivars: Failed to destroy sysfs entries\n");
return;
}
if (efivars_new_var)
sysfs_remove_bin_file(&efivars_kset->kobj, efivars_new_var);
if (efivars_del_var)
sysfs_remove_bin_file(&efivars_kset->kobj, efivars_del_var);
kfree(efivars_new_var);
kfree(efivars_del_var);
kset_unregister(efivars_kset);
}
static int efivars_sysfs_init(void)
{
struct kobject *parent_kobj = efivars_kobject();
int error = 0;
/* No efivars has been registered yet */
if (!parent_kobj || !efivar_supports_writes())
return 0;
printk(KERN_INFO "EFI Variables Facility v%s %s\n", EFIVARS_VERSION,
EFIVARS_DATE);
efivars_kset = kset_create_and_add("vars", NULL, parent_kobj);
if (!efivars_kset) {
printk(KERN_ERR "efivars: Subsystem registration failed.\n");
return -ENOMEM;
}
efivar_init(efivars_sysfs_callback, NULL, true, &efivar_sysfs_list);
error = create_efivars_bin_attributes();
if (error) {
efivars_sysfs_exit();
return error;
}
return 0;
}
module_init(efivars_sysfs_init);
module_exit(efivars_sysfs_exit);

View File

@@ -59,8 +59,7 @@ static void __init efi_memmap_free(void)
* Depending on whether mm_init() has already been invoked or not,
* either memblock or "normal" page allocation is used.
*
* Returns the physical address of the allocated memory map on
* success, zero on failure.
* Returns zero on success, a negative error code on failure.
*/
int __init efi_memmap_alloc(unsigned int num_entries,
struct efi_memory_map_data *data)
@@ -245,7 +244,7 @@ int __init efi_memmap_install(struct efi_memory_map_data *data)
* @range: Address range (start, end) to split around
*
* Returns the number of additional EFI memmap entries required to
* accomodate @range.
* accommodate @range.
*/
int __init efi_memmap_split_count(efi_memory_desc_t *md, struct range *range)
{

File diff suppressed because it is too large Load Diff

View File

@@ -552,8 +552,7 @@ EXPORT_SYMBOL(drm_release_noglobal);
* Since events are used by the KMS API for vblank and page flip completion this
* means all modern display drivers must use it.
*
* @offset is ignored, DRM events are read like a pipe. Therefore drivers also
* must set the &file_operation.llseek to no_llseek(). Polling support is
* @offset is ignored, DRM events are read like a pipe. Polling support is
* provided by drm_poll().
*
* This function will only ever read a full event. Therefore userspace must

View File

@@ -216,8 +216,8 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
* However...!
*
* The mmu-notifier can be invalidated for a
* migrate_page, that is alreadying holding the lock
* on the page. Such a try_to_unmap() will result
* migrate_folio, that is alreadying holding the lock
* on the folio. Such a try_to_unmap() will result
* in us calling put_pages() and so recursively try
* to lock the page. We avoid that deadlock with
* a trylock_page() and in exchange we risk missing

View File

@@ -1597,52 +1597,38 @@ static u32 applespi_notify(acpi_handle gpe_device, u32 gpe, void *context)
static int applespi_get_saved_bl_level(struct applespi_data *applespi)
{
struct efivar_entry *efivar_entry;
efi_status_t sts = EFI_NOT_FOUND;
u16 efi_data = 0;
unsigned long efi_data_len;
int sts;
unsigned long efi_data_len = sizeof(efi_data);
efivar_entry = kmalloc(sizeof(*efivar_entry), GFP_KERNEL);
if (!efivar_entry)
return -ENOMEM;
memcpy(efivar_entry->var.VariableName, EFI_BL_LEVEL_NAME,
sizeof(EFI_BL_LEVEL_NAME));
efivar_entry->var.VendorGuid = EFI_BL_LEVEL_GUID;
efi_data_len = sizeof(efi_data);
sts = efivar_entry_get(efivar_entry, NULL, &efi_data_len, &efi_data);
if (sts && sts != -ENOENT)
if (efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE))
sts = efi.get_variable(EFI_BL_LEVEL_NAME, &EFI_BL_LEVEL_GUID,
NULL, &efi_data_len, &efi_data);
if (sts != EFI_SUCCESS && sts != EFI_NOT_FOUND)
dev_warn(&applespi->spi->dev,
"Error getting backlight level from EFI vars: %d\n",
"Error getting backlight level from EFI vars: 0x%lx\n",
sts);
kfree(efivar_entry);
return sts ? sts : efi_data;
return sts != EFI_SUCCESS ? -ENODEV : efi_data;
}
static void applespi_save_bl_level(struct applespi_data *applespi,
unsigned int level)
{
efi_guid_t efi_guid;
efi_status_t sts = EFI_UNSUPPORTED;
u32 efi_attr;
unsigned long efi_data_len;
u16 efi_data;
int sts;
/* Save keyboard backlight level */
efi_guid = EFI_BL_LEVEL_GUID;
efi_data = (u16)level;
efi_data_len = sizeof(efi_data);
efi_attr = EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_BOOTSERVICE_ACCESS |
EFI_VARIABLE_RUNTIME_ACCESS;
sts = efivar_entry_set_safe((efi_char16_t *)EFI_BL_LEVEL_NAME, efi_guid,
efi_attr, true, efi_data_len, &efi_data);
if (sts)
if (efi_rt_services_supported(EFI_RT_SUPPORTED_SET_VARIABLE))
sts = efi.set_variable(EFI_BL_LEVEL_NAME, &EFI_BL_LEVEL_GUID,
efi_attr, sizeof(efi_data), &efi_data);
if (sts != EFI_SUCCESS)
dev_warn(&applespi->spi->dev,
"Error saving backlight level to EFI vars: %d\n", sts);
"Error saving backlight level to EFI vars: 0x%lx\n", sts);
}
static int applespi_probe(struct spi_device *spi)

View File

@@ -29,8 +29,6 @@
#include <linux/rwsem.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/mount.h>
#include <linux/pseudo_fs.h>
#include <linux/balloon_compaction.h>
#include <linux/vmw_vmci_defs.h>
#include <linux/vmw_vmci_api.h>
@@ -1730,20 +1728,6 @@ static inline void vmballoon_debugfs_exit(struct vmballoon *b)
#ifdef CONFIG_BALLOON_COMPACTION
static int vmballoon_init_fs_context(struct fs_context *fc)
{
return init_pseudo(fc, BALLOON_VMW_MAGIC) ? 0 : -ENOMEM;
}
static struct file_system_type vmballoon_fs = {
.name = "balloon-vmware",
.init_fs_context = vmballoon_init_fs_context,
.kill_sb = kill_anon_super,
};
static struct vfsmount *vmballoon_mnt;
/**
* vmballoon_migratepage() - migrates a balloon page.
* @b_dev_info: balloon device information descriptor.
@@ -1862,21 +1846,6 @@ out_unlock:
return ret;
}
/**
* vmballoon_compaction_deinit() - removes compaction related data.
*
* @b: pointer to the balloon.
*/
static void vmballoon_compaction_deinit(struct vmballoon *b)
{
if (!IS_ERR(b->b_dev_info.inode))
iput(b->b_dev_info.inode);
b->b_dev_info.inode = NULL;
kern_unmount(vmballoon_mnt);
vmballoon_mnt = NULL;
}
/**
* vmballoon_compaction_init() - initialized compaction for the balloon.
*
@@ -1888,33 +1857,15 @@ static void vmballoon_compaction_deinit(struct vmballoon *b)
*
* Return: zero on success or error code on failure.
*/
static __init int vmballoon_compaction_init(struct vmballoon *b)
static __init void vmballoon_compaction_init(struct vmballoon *b)
{
vmballoon_mnt = kern_mount(&vmballoon_fs);
if (IS_ERR(vmballoon_mnt))
return PTR_ERR(vmballoon_mnt);
b->b_dev_info.migratepage = vmballoon_migratepage;
b->b_dev_info.inode = alloc_anon_inode(vmballoon_mnt->mnt_sb);
if (IS_ERR(b->b_dev_info.inode))
return PTR_ERR(b->b_dev_info.inode);
b->b_dev_info.inode->i_mapping->a_ops = &balloon_aops;
return 0;
}
#else /* CONFIG_BALLOON_COMPACTION */
static void vmballoon_compaction_deinit(struct vmballoon *b)
static inline void vmballoon_compaction_init(struct vmballoon *b)
{
}
static int vmballoon_compaction_init(struct vmballoon *b)
{
return 0;
}
#endif /* CONFIG_BALLOON_COMPACTION */
static int __init vmballoon_init(void)
@@ -1939,9 +1890,7 @@ static int __init vmballoon_init(void)
* balloon_devinfo_init() .
*/
balloon_devinfo_init(&balloon.b_dev_info);
error = vmballoon_compaction_init(&balloon);
if (error)
goto fail;
vmballoon_compaction_init(&balloon);
INIT_LIST_HEAD(&balloon.huge_pages);
spin_lock_init(&balloon.comm_lock);
@@ -1958,7 +1907,6 @@ static int __init vmballoon_init(void)
return 0;
fail:
vmballoon_unregister_shrinker(&balloon);
vmballoon_compaction_deinit(&balloon);
return error;
}
@@ -1985,8 +1933,5 @@ static void __exit vmballoon_exit(void)
*/
vmballoon_send_start(&balloon, 0);
vmballoon_pop(&balloon);
/* Only once we popped the balloon, compaction can be deinit */
vmballoon_compaction_deinit(&balloon);
}
module_exit(vmballoon_exit);

View File

@@ -459,43 +459,34 @@ static void brcmf_fw_fix_efi_nvram_ccode(char *data, unsigned long data_len)
static u8 *brcmf_fw_nvram_from_efi(size_t *data_len_ret)
{
const u16 name[] = { 'n', 'v', 'r', 'a', 'm', 0 };
struct efivar_entry *nvram_efivar;
efi_guid_t guid = EFI_GUID(0x74b00bd9, 0x805a, 0x4d61, 0xb5, 0x1f,
0x43, 0x26, 0x81, 0x23, 0xd1, 0x13);
unsigned long data_len = 0;
efi_status_t status;
u8 *data = NULL;
int err;
nvram_efivar = kzalloc(sizeof(*nvram_efivar), GFP_KERNEL);
if (!nvram_efivar)
if (!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE))
return NULL;
memcpy(&nvram_efivar->var.VariableName, name, sizeof(name));
nvram_efivar->var.VendorGuid = EFI_GUID(0x74b00bd9, 0x805a, 0x4d61,
0xb5, 0x1f, 0x43, 0x26,
0x81, 0x23, 0xd1, 0x13);
err = efivar_entry_size(nvram_efivar, &data_len);
if (err)
status = efi.get_variable(L"nvram", &guid, NULL, &data_len, NULL);
if (status != EFI_BUFFER_TOO_SMALL)
goto fail;
data = kmalloc(data_len, GFP_KERNEL);
if (!data)
goto fail;
err = efivar_entry_get(nvram_efivar, NULL, &data_len, data);
if (err)
status = efi.get_variable(L"nvram", &guid, NULL, &data_len, data);
if (status != EFI_SUCCESS)
goto fail;
brcmf_fw_fix_efi_nvram_ccode(data, data_len);
brcmf_info("Using nvram EFI variable\n");
kfree(nvram_efivar);
*data_len_ret = data_len;
return data;
fail:
kfree(data);
kfree(nvram_efivar);
return NULL;
}
#else

View File

@@ -19,20 +19,14 @@
void *iwl_uefi_get_pnvm(struct iwl_trans *trans, size_t *len)
{
struct efivar_entry *pnvm_efivar;
void *data;
unsigned long package_size;
int err;
efi_status_t status;
*len = 0;
pnvm_efivar = kzalloc(sizeof(*pnvm_efivar), GFP_KERNEL);
if (!pnvm_efivar)
return ERR_PTR(-ENOMEM);
memcpy(&pnvm_efivar->var.VariableName, IWL_UEFI_OEM_PNVM_NAME,
sizeof(IWL_UEFI_OEM_PNVM_NAME));
pnvm_efivar->var.VendorGuid = IWL_EFI_VAR_GUID;
if (!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE))
return ERR_PTR(-ENODEV);
/*
* TODO: we hardcode a maximum length here, because reading
@@ -42,27 +36,22 @@ void *iwl_uefi_get_pnvm(struct iwl_trans *trans, size_t *len)
package_size = IWL_HARDCODED_PNVM_SIZE;
data = kmalloc(package_size, GFP_KERNEL);
if (!data) {
data = ERR_PTR(-ENOMEM);
goto out;
}
if (!data)
return ERR_PTR(-ENOMEM);
err = efivar_entry_get(pnvm_efivar, NULL, &package_size, data);
if (err) {
status = efi.get_variable(IWL_UEFI_OEM_PNVM_NAME, &IWL_EFI_VAR_GUID,
NULL, &package_size, data);
if (status != EFI_SUCCESS) {
IWL_DEBUG_FW(trans,
"PNVM UEFI variable not found %d (len %lu)\n",
err, package_size);
"PNVM UEFI variable not found 0x%lx (len %lu)\n",
status, package_size);
kfree(data);
data = ERR_PTR(err);
goto out;
return ERR_PTR(-ENOENT);
}
IWL_DEBUG_FW(trans, "Read PNVM from UEFI with size %lu\n", package_size);
*len = package_size;
out:
kfree(pnvm_efivar);
return data;
}
@@ -211,21 +200,15 @@ static void *iwl_uefi_reduce_power_parse(struct iwl_trans *trans,
void *iwl_uefi_get_reduced_power(struct iwl_trans *trans, size_t *len)
{
struct efivar_entry *reduce_power_efivar;
struct pnvm_sku_package *package;
void *data = NULL;
unsigned long package_size;
int err;
efi_status_t status;
*len = 0;
reduce_power_efivar = kzalloc(sizeof(*reduce_power_efivar), GFP_KERNEL);
if (!reduce_power_efivar)
return ERR_PTR(-ENOMEM);
memcpy(&reduce_power_efivar->var.VariableName, IWL_UEFI_REDUCED_POWER_NAME,
sizeof(IWL_UEFI_REDUCED_POWER_NAME));
reduce_power_efivar->var.VendorGuid = IWL_EFI_VAR_GUID;
if (!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE))
return ERR_PTR(-ENODEV);
/*
* TODO: we hardcode a maximum length here, because reading
@@ -235,19 +218,17 @@ void *iwl_uefi_get_reduced_power(struct iwl_trans *trans, size_t *len)
package_size = IWL_HARDCODED_REDUCE_POWER_SIZE;
package = kmalloc(package_size, GFP_KERNEL);
if (!package) {
package = ERR_PTR(-ENOMEM);
goto out;
}
if (!package)
return ERR_PTR(-ENOMEM);
err = efivar_entry_get(reduce_power_efivar, NULL, &package_size, package);
if (err) {
status = efi.get_variable(IWL_UEFI_REDUCED_POWER_NAME, &IWL_EFI_VAR_GUID,
NULL, &package_size, data);
if (status != EFI_SUCCESS) {
IWL_DEBUG_FW(trans,
"Reduced Power UEFI variable not found %d (len %lu)\n",
err, package_size);
"Reduced Power UEFI variable not found 0x%lx (len %lu)\n",
status, package_size);
kfree(package);
data = ERR_PTR(err);
goto out;
return ERR_PTR(-ENOENT);
}
IWL_DEBUG_FW(trans, "Read reduced power from UEFI with size %lu\n",
@@ -262,9 +243,6 @@ void *iwl_uefi_get_reduced_power(struct iwl_trans *trans, size_t *len)
kfree(package);
out:
kfree(reduce_power_efivar);
return data;
}
@@ -304,22 +282,15 @@ static int iwl_uefi_sgom_parse(struct uefi_cnv_wlan_sgom_data *sgom_data,
void iwl_uefi_get_sgom_table(struct iwl_trans *trans,
struct iwl_fw_runtime *fwrt)
{
struct efivar_entry *sgom_efivar;
struct uefi_cnv_wlan_sgom_data *data;
unsigned long package_size;
int err, ret;
efi_status_t status;
int ret;
if (!fwrt->geo_enabled)
if (!fwrt->geo_enabled ||
!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE))
return;
sgom_efivar = kzalloc(sizeof(*sgom_efivar), GFP_KERNEL);
if (!sgom_efivar)
return;
memcpy(&sgom_efivar->var.VariableName, IWL_UEFI_SGOM_NAME,
sizeof(IWL_UEFI_SGOM_NAME));
sgom_efivar->var.VendorGuid = IWL_EFI_VAR_GUID;
/* TODO: we hardcode a maximum length here, because reading
* from the UEFI is not working. To implement this properly,
* we have to call efivar_entry_size().
@@ -327,15 +298,14 @@ void iwl_uefi_get_sgom_table(struct iwl_trans *trans,
package_size = IWL_HARDCODED_SGOM_SIZE;
data = kmalloc(package_size, GFP_KERNEL);
if (!data) {
data = ERR_PTR(-ENOMEM);
goto out;
}
if (!data)
return;
err = efivar_entry_get(sgom_efivar, NULL, &package_size, data);
if (err) {
status = efi.get_variable(IWL_UEFI_SGOM_NAME, &IWL_EFI_VAR_GUID,
NULL, &package_size, data);
if (status != EFI_SUCCESS) {
IWL_DEBUG_FW(trans,
"SGOM UEFI variable not found %d\n", err);
"SGOM UEFI variable not found 0x%lx\n", status);
goto out_free;
}
@@ -349,8 +319,6 @@ void iwl_uefi_get_sgom_table(struct iwl_trans *trans,
out_free:
kfree(data);
out:
kfree(sgom_efivar);
}
IWL_EXPORT_SYMBOL(iwl_uefi_get_sgom_table);
#endif /* CONFIG_ACPI */

View File

@@ -112,7 +112,7 @@ static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
iocb->ki_pos = pos;
iocb->ki_filp = req->ns->file;
iocb->ki_flags = ki_flags | iocb_flags(req->ns->file);
iocb->ki_flags = ki_flags | iocb->ki_filp->f_iocb_flags;
return call_iter(iocb, &iter);
}

View File

@@ -1284,7 +1284,7 @@ static int gmin_get_config_var(struct device *maindev,
const struct dmi_system_id *id;
struct device *dev = maindev;
char var8[CFG_VAR_NAME_MAX];
struct efivar_entry *ev;
efi_status_t status;
int i, ret;
/* For sensors, try first to use the _DSM table */
@@ -1326,24 +1326,11 @@ static int gmin_get_config_var(struct device *maindev,
for (i = 0; i < sizeof(var8) && var8[i]; i++)
var16[i] = var8[i];
/* Not sure this API usage is kosher; efivar_entry_get()'s
* implementation simply uses VariableName and VendorGuid from
* the struct and ignores the rest, but it seems like there
* ought to be an "official" efivar_entry registered
* somewhere?
*/
ev = kzalloc(sizeof(*ev), GFP_KERNEL);
if (!ev)
return -ENOMEM;
memcpy(&ev->var.VariableName, var16, sizeof(var16));
ev->var.VendorGuid = GMIN_CFG_VAR_EFI_GUID;
ev->var.DataSize = *out_len;
ret = efivar_entry_get(ev, &ev->var.Attributes,
&ev->var.DataSize, ev->var.Data);
if (ret == 0) {
memcpy(out, ev->var.Data, ev->var.DataSize);
*out_len = ev->var.DataSize;
status = EFI_UNSUPPORTED;
if (efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE))
status = efi.get_variable(var16, &GMIN_CFG_VAR_EFI_GUID, NULL,
(unsigned long *)out_len, out);
if (status == EFI_SUCCESS) {
dev_info(maindev, "found EFI entry for '%s'\n", var8);
} else if (is_gmin) {
dev_info(maindev, "Failed to find EFI gmin variable %s\n", var8);
@@ -1351,8 +1338,6 @@ static int gmin_get_config_var(struct device *maindev,
dev_info(maindev, "Failed to find EFI variable %s\n", var8);
}
kfree(ev);
return ret;
}

View File

@@ -1132,7 +1132,7 @@ static struct file *vfio_device_open(struct vfio_device *device)
* Appears to be missing by lack of need rather than
* explicitly prevented. Now there's need.
*/
filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE);
if (device->group->type == VFIO_NO_IOMMU)
dev_warn(device->dev, "vfio-noiommu device opened by user "

View File

@@ -17,9 +17,6 @@
#include <linux/oom.h>
#include <linux/wait.h>
#include <linux/mm.h>
#include <linux/mount.h>
#include <linux/magic.h>
#include <linux/pseudo_fs.h>
#include <linux/page_reporting.h>
/*
@@ -42,10 +39,6 @@
(1 << (VIRTIO_BALLOON_HINT_BLOCK_ORDER + PAGE_SHIFT))
#define VIRTIO_BALLOON_HINT_BLOCK_PAGES (1 << VIRTIO_BALLOON_HINT_BLOCK_ORDER)
#ifdef CONFIG_BALLOON_COMPACTION
static struct vfsmount *balloon_mnt;
#endif
enum virtio_balloon_vq {
VIRTIO_BALLOON_VQ_INFLATE,
VIRTIO_BALLOON_VQ_DEFLATE,
@@ -805,18 +798,6 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info,
return MIGRATEPAGE_SUCCESS;
}
static int balloon_init_fs_context(struct fs_context *fc)
{
return init_pseudo(fc, BALLOON_KVM_MAGIC) ? 0 : -ENOMEM;
}
static struct file_system_type balloon_fs = {
.name = "balloon-kvm",
.init_fs_context = balloon_init_fs_context,
.kill_sb = kill_anon_super,
};
#endif /* CONFIG_BALLOON_COMPACTION */
static unsigned long shrink_free_pages(struct virtio_balloon *vb,
@@ -909,19 +890,7 @@ static int virtballoon_probe(struct virtio_device *vdev)
goto out_free_vb;
#ifdef CONFIG_BALLOON_COMPACTION
balloon_mnt = kern_mount(&balloon_fs);
if (IS_ERR(balloon_mnt)) {
err = PTR_ERR(balloon_mnt);
goto out_del_vqs;
}
vb->vb_dev_info.migratepage = virtballoon_migratepage;
vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
if (IS_ERR(vb->vb_dev_info.inode)) {
err = PTR_ERR(vb->vb_dev_info.inode);
goto out_kern_unmount;
}
vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops;
#endif
if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
/*
@@ -930,13 +899,13 @@ static int virtballoon_probe(struct virtio_device *vdev)
*/
if (virtqueue_get_vring_size(vb->free_page_vq) < 2) {
err = -ENOSPC;
goto out_iput;
goto out_del_vqs;
}
vb->balloon_wq = alloc_workqueue("balloon-wq",
WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0);
if (!vb->balloon_wq) {
err = -ENOMEM;
goto out_iput;
goto out_del_vqs;
}
INIT_WORK(&vb->report_free_page_work, report_free_page_func);
vb->cmd_id_received_cache = VIRTIO_BALLOON_CMD_ID_STOP;
@@ -1030,13 +999,7 @@ out_unregister_shrinker:
out_del_balloon_wq:
if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
destroy_workqueue(vb->balloon_wq);
out_iput:
#ifdef CONFIG_BALLOON_COMPACTION
iput(vb->vb_dev_info.inode);
out_kern_unmount:
kern_unmount(balloon_mnt);
out_del_vqs:
#endif
vdev->config->del_vqs(vdev);
out_free_vb:
kfree(vb);
@@ -1083,12 +1046,6 @@ static void virtballoon_remove(struct virtio_device *vdev)
}
remove_common(vb);
#ifdef CONFIG_BALLOON_COMPACTION
if (vb->vb_dev_info.inode)
iput(vb->vb_dev_info.inode);
kern_unmount(balloon_mnt);
#endif
kfree(vb);
}

View File

@@ -526,7 +526,6 @@ affs_do_readpage_ofs(struct page *page, unsigned to, int create)
struct inode *inode = page->mapping->host;
struct super_block *sb = inode->i_sb;
struct buffer_head *bh;
char *data;
unsigned pos = 0;
u32 bidx, boff, bsize;
u32 tmp;
@@ -545,15 +544,12 @@ affs_do_readpage_ofs(struct page *page, unsigned to, int create)
return PTR_ERR(bh);
tmp = min(bsize - boff, to - pos);
BUG_ON(pos + tmp > to || tmp > bsize);
data = kmap_atomic(page);
memcpy(data + pos, AFFS_DATA(bh) + boff, tmp);
kunmap_atomic(data);
memcpy_to_page(page, pos, AFFS_DATA(bh) + boff, tmp);
affs_brelse(bh);
bidx++;
pos += tmp;
boff = 0;
}
flush_dcache_page(page);
return 0;
}

View File

@@ -132,12 +132,6 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
if (IS_ERR(page))
return PTR_ERR(page);
if (PageError(page)) {
ret = afs_bad(AFS_FS_I(d_inode(mntpt)), afs_file_error_mntpt);
put_page(page);
return ret;
}
buf = kmap(page);
ret = -EINVAL;
if (buf[size - 1] == '.')

View File

@@ -400,8 +400,8 @@ static const struct file_operations aio_ring_fops = {
};
#if IS_ENABLED(CONFIG_MIGRATION)
static int aio_migratepage(struct address_space *mapping, struct page *new,
struct page *old, enum migrate_mode mode)
static int aio_migrate_folio(struct address_space *mapping, struct folio *dst,
struct folio *src, enum migrate_mode mode)
{
struct kioctx *ctx;
unsigned long flags;
@@ -435,10 +435,10 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
goto out;
}
idx = old->index;
idx = src->index;
if (idx < (pgoff_t)ctx->nr_pages) {
/* Make sure the old page hasn't already been changed */
if (ctx->ring_pages[idx] != old)
/* Make sure the old folio hasn't already been changed */
if (ctx->ring_pages[idx] != &src->page)
rc = -EAGAIN;
} else
rc = -EINVAL;
@@ -447,27 +447,27 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
goto out_unlock;
/* Writeback must be complete */
BUG_ON(PageWriteback(old));
get_page(new);
BUG_ON(folio_test_writeback(src));
folio_get(dst);
rc = migrate_page_move_mapping(mapping, new, old, 1);
rc = folio_migrate_mapping(mapping, dst, src, 1);
if (rc != MIGRATEPAGE_SUCCESS) {
put_page(new);
folio_put(dst);
goto out_unlock;
}
/* Take completion_lock to prevent other writes to the ring buffer
* while the old page is copied to the new. This prevents new
* while the old folio is copied to the new. This prevents new
* events from being lost.
*/
spin_lock_irqsave(&ctx->completion_lock, flags);
migrate_page_copy(new, old);
BUG_ON(ctx->ring_pages[idx] != old);
ctx->ring_pages[idx] = new;
folio_migrate_copy(dst, src);
BUG_ON(ctx->ring_pages[idx] != &src->page);
ctx->ring_pages[idx] = &dst->page;
spin_unlock_irqrestore(&ctx->completion_lock, flags);
/* The old page is no longer accessible. */
put_page(old);
/* The old folio is no longer accessible. */
folio_put(src);
out_unlock:
mutex_unlock(&ctx->ring_lock);
@@ -475,13 +475,13 @@ out:
spin_unlock(&mapping->private_lock);
return rc;
}
#else
#define aio_migrate_folio NULL
#endif
static const struct address_space_operations aio_ctx_aops = {
.dirty_folio = noop_dirty_folio,
#if IS_ENABLED(CONFIG_MIGRATION)
.migratepage = aio_migratepage,
#endif
.migrate_folio = aio_migrate_folio,
};
static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
@@ -1475,7 +1475,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
req->ki_complete = aio_complete_rw;
req->private = NULL;
req->ki_pos = iocb->aio_offset;
req->ki_flags = iocb_flags(req->ki_filp);
req->ki_flags = req->ki_filp->f_iocb_flags;
if (iocb->aio_flags & IOCB_FLAG_RESFD)
req->ki_flags |= IOCB_EVENTFD;
if (iocb->aio_flags & IOCB_FLAG_IOPRIO) {

View File

@@ -108,8 +108,7 @@ static const struct export_operations befs_export_operations = {
* passes it the address of befs_get_block, for mapping file
* positions to disk blocks.
*/
static int
befs_read_folio(struct file *file, struct folio *folio)
static int befs_read_folio(struct file *file, struct folio *folio)
{
return block_read_full_folio(folio, befs_get_block);
}
@@ -470,13 +469,12 @@ befs_destroy_inodecache(void)
*/
static int befs_symlink_read_folio(struct file *unused, struct folio *folio)
{
struct page *page = &folio->page;
struct inode *inode = page->mapping->host;
struct inode *inode = folio->mapping->host;
struct super_block *sb = inode->i_sb;
struct befs_inode_info *befs_ino = BEFS_I(inode);
befs_data_stream *data = &befs_ino->i_data.ds;
befs_off_t len = data->size;
char *link = page_address(page);
char *link = folio_address(folio);
if (len == 0 || len > PAGE_SIZE) {
befs_error(sb, "Long symlink with illegal length");
@@ -489,12 +487,12 @@ static int befs_symlink_read_folio(struct file *unused, struct folio *folio)
goto fail;
}
link[len - 1] = '\0';
SetPageUptodate(page);
unlock_page(page);
folio_mark_uptodate(folio);
folio_unlock(folio);
return 0;
fail:
SetPageError(page);
unlock_page(page);
folio_set_error(folio);
folio_unlock(folio);
return -EIO;
}

View File

@@ -13,7 +13,6 @@ struct btrfs_fs_info;
struct btrfs_workqueue;
struct btrfs_work;
typedef void (*btrfs_func_t)(struct btrfs_work *arg);
typedef void (*btrfs_work_func_t)(struct work_struct *arg);
struct btrfs_work {
btrfs_func_t func;

View File

@@ -2028,10 +2028,29 @@ out:
return ret;
}
static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx)
{
struct btrfs_data_container *inodes = ctx;
const size_t c = 3 * sizeof(u64);
if (inodes->bytes_left >= c) {
inodes->bytes_left -= c;
inodes->val[inodes->elem_cnt] = inum;
inodes->val[inodes->elem_cnt + 1] = offset;
inodes->val[inodes->elem_cnt + 2] = root;
inodes->elem_cnt += 3;
} else {
inodes->bytes_missing += c - inodes->bytes_left;
inodes->bytes_left = 0;
inodes->elem_missed += 3;
}
return 0;
}
int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
iterate_extent_inodes_t *iterate, void *ctx,
bool ignore_offset)
void *ctx, bool ignore_offset)
{
int ret;
u64 extent_item_pos;
@@ -2049,17 +2068,15 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
extent_item_pos = logical - found_key.objectid;
ret = iterate_extent_inodes(fs_info, found_key.objectid,
extent_item_pos, search_commit_root,
iterate, ctx, ignore_offset);
build_ino_list, ctx, ignore_offset);
return ret;
}
typedef int (iterate_irefs_t)(u64 parent, u32 name_len, unsigned long name_off,
struct extent_buffer *eb, void *ctx);
static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off,
struct extent_buffer *eb, struct inode_fs_paths *ipath);
static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
struct btrfs_path *path,
iterate_irefs_t *iterate, void *ctx)
static int iterate_inode_refs(u64 inum, struct inode_fs_paths *ipath)
{
int ret = 0;
int slot;
@@ -2068,6 +2085,8 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
u32 name_len;
u64 parent = 0;
int found = 0;
struct btrfs_root *fs_root = ipath->fs_root;
struct btrfs_path *path = ipath->btrfs_path;
struct extent_buffer *eb;
struct btrfs_inode_ref *iref;
struct btrfs_key found_key;
@@ -2103,8 +2122,8 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
"following ref at offset %u for inode %llu in tree %llu",
cur, found_key.objectid,
fs_root->root_key.objectid);
ret = iterate(parent, name_len,
(unsigned long)(iref + 1), eb, ctx);
ret = inode_to_path(parent, name_len,
(unsigned long)(iref + 1), eb, ipath);
if (ret)
break;
len = sizeof(*iref) + name_len;
@@ -2118,15 +2137,15 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
return ret;
}
static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
struct btrfs_path *path,
iterate_irefs_t *iterate, void *ctx)
static int iterate_inode_extrefs(u64 inum, struct inode_fs_paths *ipath)
{
int ret;
int slot;
u64 offset = 0;
u64 parent;
int found = 0;
struct btrfs_root *fs_root = ipath->fs_root;
struct btrfs_path *path = ipath->btrfs_path;
struct extent_buffer *eb;
struct btrfs_inode_extref *extref;
u32 item_size;
@@ -2162,8 +2181,8 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
extref = (struct btrfs_inode_extref *)(ptr + cur_offset);
parent = btrfs_inode_extref_parent(eb, extref);
name_len = btrfs_inode_extref_name_len(eb, extref);
ret = iterate(parent, name_len,
(unsigned long)&extref->name, eb, ctx);
ret = inode_to_path(parent, name_len,
(unsigned long)&extref->name, eb, ipath);
if (ret)
break;
@@ -2180,34 +2199,13 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
return ret;
}
static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
struct btrfs_path *path, iterate_irefs_t *iterate,
void *ctx)
{
int ret;
int found_refs = 0;
ret = iterate_inode_refs(inum, fs_root, path, iterate, ctx);
if (!ret)
++found_refs;
else if (ret != -ENOENT)
return ret;
ret = iterate_inode_extrefs(inum, fs_root, path, iterate, ctx);
if (ret == -ENOENT && found_refs)
return 0;
return ret;
}
/*
* returns 0 if the path could be dumped (probably truncated)
* returns <0 in case of an error
*/
static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off,
struct extent_buffer *eb, void *ctx)
struct extent_buffer *eb, struct inode_fs_paths *ipath)
{
struct inode_fs_paths *ipath = ctx;
char *fspath;
char *fspath_min;
int i = ipath->fspath->elem_cnt;
@@ -2248,8 +2246,20 @@ static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off,
*/
int paths_from_inode(u64 inum, struct inode_fs_paths *ipath)
{
return iterate_irefs(inum, ipath->fs_root, ipath->btrfs_path,
inode_to_path, ipath);
int ret;
int found_refs = 0;
ret = iterate_inode_refs(inum, ipath);
if (!ret)
++found_refs;
else if (ret != -ENOENT)
return ret;
ret = iterate_inode_extrefs(inum, ipath);
if (ret == -ENOENT && found_refs)
return 0;
return ret;
}
struct btrfs_data_container *init_data_container(u32 total_bytes)

View File

@@ -35,8 +35,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
bool ignore_offset);
int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
iterate_extent_inodes_t *iterate, void *ctx,
struct btrfs_path *path, void *ctx,
bool ignore_offset);
int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);

View File

@@ -1051,8 +1051,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
< block_group->zone_unusable);
WARN_ON(block_group->space_info->disk_total
< block_group->length * factor);
WARN_ON(block_group->zone_is_active &&
block_group->space_info->active_total_bytes
< block_group->length);
}
block_group->space_info->total_bytes -= block_group->length;
if (block_group->zone_is_active)
block_group->space_info->active_total_bytes -= block_group->length;
block_group->space_info->bytes_readonly -=
(block_group->length - block_group->zone_unusable);
block_group->space_info->bytes_zone_unusable -=
@@ -1816,11 +1821,10 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
stripe_nr = physical - map->stripes[i].physical;
stripe_nr = div64_u64_rem(stripe_nr, map->stripe_len, &offset);
if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
BTRFS_BLOCK_GROUP_RAID10)) {
stripe_nr = stripe_nr * map->num_stripes + i;
stripe_nr = div_u64(stripe_nr, map->sub_stripes);
} else if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
stripe_nr = stripe_nr * map->num_stripes + i;
}
/*
* The remaining case would be for RAID56, multiply by
@@ -2108,7 +2112,8 @@ static int read_one_block_group(struct btrfs_fs_info *info,
trace_btrfs_add_block_group(info, cache, 0);
btrfs_update_space_info(info, cache->flags, cache->length,
cache->used, cache->bytes_super,
cache->zone_unusable, &space_info);
cache->zone_unusable, cache->zone_is_active,
&space_info);
cache->space_info = space_info;
@@ -2178,7 +2183,7 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
}
btrfs_update_space_info(fs_info, bg->flags, em->len, em->len,
0, 0, &space_info);
0, 0, false, &space_info);
bg->space_info = space_info;
link_block_group(bg);
@@ -2559,7 +2564,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
trace_btrfs_add_block_group(fs_info, cache, 1);
btrfs_update_space_info(fs_info, cache->flags, size, bytes_used,
cache->bytes_super, cache->zone_unusable,
&cache->space_info);
cache->zone_is_active, &cache->space_info);
btrfs_update_global_block_rsv(fs_info);
link_block_group(cache);
@@ -2659,6 +2664,14 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
if (ret < 0)
goto out;
/*
* We have allocated a new chunk. We also need to activate that chunk to
* grant metadata tickets for zoned filesystem.
*/
ret = btrfs_zoned_activate_one_bg(fs_info, cache->space_info, true);
if (ret < 0)
goto out;
ret = inc_block_group_ro(cache, 0);
if (ret == -ETXTBSY)
goto unlock_out;
@@ -3761,6 +3774,7 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
* attempt.
*/
wait_for_alloc = true;
force = CHUNK_ALLOC_NO_FORCE;
spin_unlock(&space_info->lock);
mutex_lock(&fs_info->chunk_mutex);
mutex_unlock(&fs_info->chunk_mutex);
@@ -3883,6 +3897,14 @@ static void reserve_chunk_space(struct btrfs_trans_handle *trans,
if (IS_ERR(bg)) {
ret = PTR_ERR(bg);
} else {
/*
* We have a new chunk. We also need to activate it for
* zoned filesystem.
*/
ret = btrfs_zoned_activate_one_bg(fs_info, info, true);
if (ret < 0)
return;
/*
* If we fail to add the chunk item here, we end up
* trying again at phase 2 of chunk allocation, at

View File

@@ -118,7 +118,7 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
if (block_rsv->reserved >= block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size;
block_rsv->reserved = block_rsv->size;
block_rsv->full = 1;
block_rsv->full = true;
} else {
num_bytes = 0;
}
@@ -142,7 +142,7 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
bytes_to_add = min(num_bytes, bytes_to_add);
dest->reserved += bytes_to_add;
if (dest->reserved >= dest->size)
dest->full = 1;
dest->full = true;
num_bytes -= bytes_to_add;
}
spin_unlock(&dest->lock);
@@ -171,7 +171,7 @@ int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src,
return 0;
}
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, enum btrfs_rsv_type type)
{
memset(rsv, 0, sizeof(*rsv));
spin_lock_init(&rsv->lock);
@@ -180,7 +180,7 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv,
unsigned short type)
enum btrfs_rsv_type type)
{
btrfs_init_block_rsv(rsv, type);
rsv->space_info = btrfs_find_space_info(fs_info,
@@ -188,7 +188,7 @@ void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
}
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
unsigned short type)
enum btrfs_rsv_type type)
{
struct btrfs_block_rsv *block_rsv;
@@ -304,7 +304,7 @@ int btrfs_block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, u64 num_bytes)
if (block_rsv->reserved >= num_bytes) {
block_rsv->reserved -= num_bytes;
if (block_rsv->reserved < block_rsv->size)
block_rsv->full = 0;
block_rsv->full = false;
ret = 0;
}
spin_unlock(&block_rsv->lock);
@@ -319,7 +319,7 @@ void btrfs_block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
if (update_size)
block_rsv->size += num_bytes;
else if (block_rsv->reserved >= block_rsv->size)
block_rsv->full = 1;
block_rsv->full = true;
spin_unlock(&block_rsv->lock);
}
@@ -341,7 +341,7 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
}
global_rsv->reserved -= num_bytes;
if (global_rsv->reserved < global_rsv->size)
global_rsv->full = 0;
global_rsv->full = false;
spin_unlock(&global_rsv->lock);
btrfs_block_rsv_add_bytes(dest, num_bytes, true);
@@ -408,10 +408,7 @@ void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info)
btrfs_try_granting_tickets(fs_info, sinfo);
}
if (block_rsv->reserved == block_rsv->size)
block_rsv->full = 1;
else
block_rsv->full = 0;
block_rsv->full = (block_rsv->reserved == block_rsv->size);
if (block_rsv->size >= sinfo->total_bytes)
sinfo->force_alloc = CHUNK_ALLOC_FORCE;

View File

@@ -9,7 +9,7 @@ enum btrfs_reserve_flush_enum;
/*
* Types of block reserves
*/
enum {
enum btrfs_rsv_type {
BTRFS_BLOCK_RSV_GLOBAL,
BTRFS_BLOCK_RSV_DELALLOC,
BTRFS_BLOCK_RSV_TRANS,
@@ -25,9 +25,10 @@ struct btrfs_block_rsv {
u64 reserved;
struct btrfs_space_info *space_info;
spinlock_t lock;
unsigned short full;
unsigned short type;
unsigned short failfast;
bool full;
bool failfast;
/* Block reserve type, one of BTRFS_BLOCK_RSV_* */
enum btrfs_rsv_type type:8;
/*
* Qgroup equivalent for @size @reserved
@@ -49,13 +50,13 @@ struct btrfs_block_rsv {
u64 qgroup_rsv_reserved;
};
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, enum btrfs_rsv_type type);
void btrfs_init_root_block_rsv(struct btrfs_root *root);
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
unsigned short type);
enum btrfs_rsv_type type);
void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv,
unsigned short type);
enum btrfs_rsv_type type);
void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
int btrfs_block_rsv_add(struct btrfs_fs_info *fs_info,

View File

@@ -279,19 +279,31 @@ static inline void btrfs_insert_inode_hash(struct inode *inode)
__insert_inode_hash(inode, h);
}
#if BITS_PER_LONG == 32
/*
* On 32 bit systems the i_ino of struct inode is 32 bits (unsigned long), so
* we use the inode's location objectid which is a u64 to avoid truncation.
*/
static inline u64 btrfs_ino(const struct btrfs_inode *inode)
{
u64 ino = inode->location.objectid;
/*
* !ino: btree_inode
* type == BTRFS_ROOT_ITEM_KEY: subvol dir
*/
if (!ino || inode->location.type == BTRFS_ROOT_ITEM_KEY)
/* type == BTRFS_ROOT_ITEM_KEY: subvol dir */
if (inode->location.type == BTRFS_ROOT_ITEM_KEY)
ino = inode->vfs_inode.i_ino;
return ino;
}
#else
static inline u64 btrfs_ino(const struct btrfs_inode *inode)
{
return inode->vfs_inode.i_ino;
}
#endif
static inline void btrfs_i_size_write(struct btrfs_inode *inode, u64 size)
{
i_size_write(&inode->vfs_inode, size);
@@ -305,8 +317,7 @@ static inline bool btrfs_is_free_space_inode(struct btrfs_inode *inode)
if (root == root->fs_info->tree_root &&
btrfs_ino(inode) != BTRFS_BTREE_INODE_OBJECTID)
return true;
if (inode->location.objectid == BTRFS_FREE_INO_OBJECTID)
return true;
return false;
}

View File

@@ -136,109 +136,14 @@ static int compression_decompress(int type, struct list_head *ws,
static int btrfs_decompress_bio(struct compressed_bio *cb);
static inline int compressed_bio_size(struct btrfs_fs_info *fs_info,
unsigned long disk_size)
{
return sizeof(struct compressed_bio) +
(DIV_ROUND_UP(disk_size, fs_info->sectorsize)) * fs_info->csum_size;
}
static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
u64 disk_start)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
const u32 csum_size = fs_info->csum_size;
const u32 sectorsize = fs_info->sectorsize;
struct page *page;
unsigned int i;
char *kaddr;
u8 csum[BTRFS_CSUM_SIZE];
struct compressed_bio *cb = bio->bi_private;
u8 *cb_sum = cb->sums;
if ((inode->flags & BTRFS_INODE_NODATASUM) ||
test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state))
return 0;
shash->tfm = fs_info->csum_shash;
for (i = 0; i < cb->nr_pages; i++) {
u32 pg_offset;
u32 bytes_left = PAGE_SIZE;
page = cb->compressed_pages[i];
/* Determine the remaining bytes inside the page first */
if (i == cb->nr_pages - 1)
bytes_left = cb->compressed_len - i * PAGE_SIZE;
/* Hash through the page sector by sector */
for (pg_offset = 0; pg_offset < bytes_left;
pg_offset += sectorsize) {
kaddr = kmap_atomic(page);
crypto_shash_digest(shash, kaddr + pg_offset,
sectorsize, csum);
kunmap_atomic(kaddr);
if (memcmp(&csum, cb_sum, csum_size) != 0) {
btrfs_print_data_csum_error(inode, disk_start,
csum, cb_sum, cb->mirror_num);
if (btrfs_bio(bio)->device)
btrfs_dev_stat_inc_and_print(
btrfs_bio(bio)->device,
BTRFS_DEV_STAT_CORRUPTION_ERRS);
return -EIO;
}
cb_sum += csum_size;
disk_start += sectorsize;
}
}
return 0;
}
/*
* Reduce bio and io accounting for a compressed_bio with its corresponding bio.
*
* Return true if there is no pending bio nor io.
* Return false otherwise.
*/
static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *bio)
{
struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
unsigned int bi_size = 0;
bool last_io = false;
struct bio_vec *bvec;
struct bvec_iter_all iter_all;
/*
* At endio time, bi_iter.bi_size doesn't represent the real bio size.
* Thus here we have to iterate through all segments to grab correct
* bio size.
*/
bio_for_each_segment_all(bvec, bio, iter_all)
bi_size += bvec->bv_len;
if (bio->bi_status)
cb->status = bio->bi_status;
ASSERT(bi_size && bi_size <= cb->compressed_len);
last_io = refcount_sub_and_test(bi_size >> fs_info->sectorsize_bits,
&cb->pending_sectors);
/*
* Here we must wake up the possible error handler after all other
* operations on @cb finished, or we can race with
* finish_compressed_bio_*() which may free @cb.
*/
wake_up_var(cb);
return last_io;
}
static void finish_compressed_bio_read(struct compressed_bio *cb)
{
unsigned int index;
struct page *page;
if (cb->status == BLK_STS_OK)
cb->status = errno_to_blk_status(btrfs_decompress_bio(cb));
/* Release the compressed pages */
for (index = 0; index < cb->nr_pages; index++) {
page = cb->compressed_pages[index];
@@ -247,85 +152,63 @@ static void finish_compressed_bio_read(struct compressed_bio *cb)
}
/* Do io completion on the original bio */
if (cb->status != BLK_STS_OK) {
if (cb->status != BLK_STS_OK)
cb->orig_bio->bi_status = cb->status;
bio_endio(cb->orig_bio);
} else {
struct bio_vec *bvec;
struct bvec_iter_all iter_all;
/*
* We have verified the checksum already, set page checked so
* the end_io handlers know about it
*/
ASSERT(!bio_flagged(cb->orig_bio, BIO_CLONED));
bio_for_each_segment_all(bvec, cb->orig_bio, iter_all) {
u64 bvec_start = page_offset(bvec->bv_page) +
bvec->bv_offset;
btrfs_page_set_checked(btrfs_sb(cb->inode->i_sb),
bvec->bv_page, bvec_start,
bvec->bv_len);
}
bio_endio(cb->orig_bio);
}
bio_endio(cb->orig_bio);
/* Finally free the cb struct */
kfree(cb->compressed_pages);
kfree(cb);
}
/* when we finish reading compressed pages from the disk, we
* decompress them and then run the bio end_io routines on the
* decompressed pages (in the inode address space).
*
* This allows the checksumming and other IO error handling routines
* to work normally
*
* The compressed pages are freed here, and it must be run
* in process context
/*
* Verify the checksums and kick off repair if needed on the uncompressed data
* before decompressing it into the original bio and freeing the uncompressed
* pages.
*/
static void end_compressed_bio_read(struct bio *bio)
{
struct compressed_bio *cb = bio->bi_private;
struct inode *inode;
unsigned int mirror = btrfs_bio(bio)->mirror_num;
int ret = 0;
struct inode *inode = cb->inode;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_inode *bi = BTRFS_I(inode);
bool csum = !(bi->flags & BTRFS_INODE_NODATASUM) &&
!test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
blk_status_t status = bio->bi_status;
struct btrfs_bio *bbio = btrfs_bio(bio);
struct bvec_iter iter;
struct bio_vec bv;
u32 offset;
if (!dec_and_test_compressed_bio(cb, bio))
goto out;
btrfs_bio_for_each_sector(fs_info, bv, bbio, iter, offset) {
u64 start = bbio->file_offset + offset;
/*
* Record the correct mirror_num in cb->orig_bio so that
* read-repair can work properly.
*/
btrfs_bio(cb->orig_bio)->mirror_num = mirror;
cb->mirror_num = mirror;
if (!status &&
(!csum || !btrfs_check_data_csum(inode, bbio, offset,
bv.bv_page, bv.bv_offset))) {
clean_io_failure(fs_info, &bi->io_failure_tree,
&bi->io_tree, start, bv.bv_page,
btrfs_ino(bi), bv.bv_offset);
} else {
int ret;
/*
* Some IO in this cb have failed, just skip checksum as there
* is no way it could be correct.
*/
if (cb->status != BLK_STS_OK)
goto csum_failed;
refcount_inc(&cb->pending_ios);
ret = btrfs_repair_one_sector(inode, bbio, offset,
bv.bv_page, bv.bv_offset,
btrfs_submit_data_read_bio);
if (ret) {
refcount_dec(&cb->pending_ios);
status = errno_to_blk_status(ret);
}
}
}
inode = cb->inode;
ret = check_compressed_csum(BTRFS_I(inode), bio,
bio->bi_iter.bi_sector << 9);
if (ret)
goto csum_failed;
if (status)
cb->status = status;
/* ok, we're the last bio for this extent, lets start
* the decompression.
*/
ret = btrfs_decompress_bio(cb);
csum_failed:
if (ret)
cb->status = errno_to_blk_status(ret);
finish_compressed_bio_read(cb);
out:
if (refcount_dec_and_test(&cb->pending_ios))
finish_compressed_bio_read(cb);
btrfs_bio_free_csum(bbio);
bio_put(bio);
}
@@ -403,6 +286,14 @@ static void finish_compressed_bio_write(struct compressed_bio *cb)
kfree(cb);
}
static void btrfs_finish_compressed_write_work(struct work_struct *work)
{
struct compressed_bio *cb =
container_of(work, struct compressed_bio, write_end_work);
finish_compressed_bio_write(cb);
}
/*
* Do the cleanup once all the compressed pages hit the disk. This will clear
* writeback on the file pages and free the compressed pages.
@@ -414,29 +305,18 @@ static void end_compressed_bio_write(struct bio *bio)
{
struct compressed_bio *cb = bio->bi_private;
if (!dec_and_test_compressed_bio(cb, bio))
goto out;
if (bio->bi_status)
cb->status = bio->bi_status;
btrfs_record_physical_zoned(cb->inode, cb->start, bio);
if (refcount_dec_and_test(&cb->pending_ios)) {
struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
finish_compressed_bio_write(cb);
out:
btrfs_record_physical_zoned(cb->inode, cb->start, bio);
queue_work(fs_info->compressed_write_workers, &cb->write_end_work);
}
bio_put(bio);
}
static blk_status_t submit_compressed_bio(struct btrfs_fs_info *fs_info,
struct bio *bio, int mirror_num)
{
blk_status_t ret;
ASSERT(bio->bi_iter.bi_size);
ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
if (ret)
return ret;
ret = btrfs_map_bio(fs_info, bio, mirror_num);
return ret;
}
/*
* Allocate a compressed_bio, which will be used to read/write on-disk
* (aka, compressed) * data.
@@ -487,7 +367,7 @@ static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_byte
return ERR_PTR(ret);
}
*next_stripe_start = disk_bytenr + geom.len;
refcount_inc(&cb->pending_ios);
return bio;
}
@@ -514,26 +394,25 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
struct compressed_bio *cb;
u64 cur_disk_bytenr = disk_start;
u64 next_stripe_start;
blk_status_t ret;
blk_status_t ret = BLK_STS_OK;
int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
const bool use_append = btrfs_use_zone_append(inode, disk_start);
const enum req_op bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE;
ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
IS_ALIGNED(len, fs_info->sectorsize));
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
cb = kmalloc(sizeof(struct compressed_bio), GFP_NOFS);
if (!cb)
return BLK_STS_RESOURCE;
refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
refcount_set(&cb->pending_ios, 1);
cb->status = BLK_STS_OK;
cb->inode = &inode->vfs_inode;
cb->start = start;
cb->len = len;
cb->mirror_num = 0;
cb->compressed_pages = compressed_pages;
cb->compressed_len = compressed_len;
cb->writeback = writeback;
cb->orig_bio = NULL;
INIT_WORK(&cb->write_end_work, btrfs_finish_compressed_write_work);
cb->nr_pages = nr_pages;
if (blkcg_css)
@@ -554,8 +433,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
&next_stripe_start);
if (IS_ERR(bio)) {
ret = errno_to_blk_status(PTR_ERR(bio));
bio = NULL;
goto finish_cb;
break;
}
if (blkcg_css)
bio->bi_opf |= REQ_CGROUP_PUNT;
@@ -599,44 +477,25 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
if (submit) {
if (!skip_sum) {
ret = btrfs_csum_one_bio(inode, bio, start, true);
if (ret)
goto finish_cb;
if (ret) {
bio->bi_status = ret;
bio_endio(bio);
break;
}
}
ret = submit_compressed_bio(fs_info, bio, 0);
if (ret)
goto finish_cb;
ASSERT(bio->bi_iter.bi_size);
btrfs_submit_bio(fs_info, bio, 0);
bio = NULL;
}
cond_resched();
}
if (blkcg_css)
kthread_associate_blkcg(NULL);
return 0;
finish_cb:
if (blkcg_css)
kthread_associate_blkcg(NULL);
if (bio) {
bio->bi_status = ret;
bio_endio(bio);
}
/* Last byte of @cb is submitted, endio will free @cb */
if (cur_disk_bytenr == disk_start + compressed_len)
return ret;
wait_var_event(cb, refcount_read(&cb->pending_sectors) ==
(disk_start + compressed_len - cur_disk_bytenr) >>
fs_info->sectorsize_bits);
/*
* Even with previous bio ended, we should still have io not yet
* submitted, thus need to finish manually.
*/
ASSERT(refcount_read(&cb->pending_sectors));
/* Now we are the only one referring @cb, can finish it safely. */
finish_compressed_bio_write(cb);
if (refcount_dec_and_test(&cb->pending_ios))
finish_compressed_bio_write(cb);
return ret;
}
@@ -765,7 +624,6 @@ static noinline int add_ra_bio_pages(struct inode *inode,
int zeros;
zeros = PAGE_SIZE - zero_offset;
memzero_page(page, zero_offset, zeros);
flush_dcache_page(page);
}
}
@@ -819,7 +677,6 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
blk_status_t ret;
int ret2;
int i;
u8 *sums;
em_tree = &BTRFS_I(inode)->extent_tree;
@@ -837,17 +694,15 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
ASSERT(em->compress_type != BTRFS_COMPRESS_NONE);
compressed_len = em->block_len;
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
cb = kmalloc(sizeof(struct compressed_bio), GFP_NOFS);
if (!cb) {
ret = BLK_STS_RESOURCE;
goto out;
}
refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
refcount_set(&cb->pending_ios, 1);
cb->status = BLK_STS_OK;
cb->inode = inode;
cb->mirror_num = mirror_num;
sums = cb->sums;
cb->start = em->orig_start;
em_len = em->len;
@@ -893,9 +748,8 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
REQ_OP_READ, end_compressed_bio_read,
&next_stripe_start);
if (IS_ERR(comp_bio)) {
ret = errno_to_blk_status(PTR_ERR(comp_bio));
comp_bio = NULL;
goto finish_cb;
cb->status = errno_to_blk_status(PTR_ERR(comp_bio));
break;
}
}
/*
@@ -931,22 +785,33 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
submit = true;
if (submit) {
unsigned int nr_sectors;
/* Save the original iter for read repair */
if (bio_op(comp_bio) == REQ_OP_READ)
btrfs_bio(comp_bio)->iter = comp_bio->bi_iter;
ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
if (ret)
goto finish_cb;
/*
* Save the initial offset of this chunk, as there
* is no direct correlation between compressed pages and
* the original file offset. The field is only used for
* priting error messages.
*/
btrfs_bio(comp_bio)->file_offset = file_offset;
nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
fs_info->sectorsize);
sums += fs_info->csum_size * nr_sectors;
ret = btrfs_lookup_bio_sums(inode, comp_bio, NULL);
if (ret) {
comp_bio->bi_status = ret;
bio_endio(comp_bio);
break;
}
ret = submit_compressed_bio(fs_info, comp_bio, mirror_num);
if (ret)
goto finish_cb;
ASSERT(comp_bio->bi_iter.bi_size);
btrfs_submit_bio(fs_info, comp_bio, mirror_num);
comp_bio = NULL;
}
}
if (refcount_dec_and_test(&cb->pending_ios))
finish_compressed_bio_read(cb);
return;
fail:
@@ -964,25 +829,6 @@ out:
bio->bi_status = ret;
bio_endio(bio);
return;
finish_cb:
if (comp_bio) {
comp_bio->bi_status = ret;
bio_endio(comp_bio);
}
/* All bytes of @cb is submitted, endio will free @cb */
if (cur_disk_byte == disk_bytenr + compressed_len)
return;
wait_var_event(cb, refcount_read(&cb->pending_sectors) ==
(disk_bytenr + compressed_len - cur_disk_byte) >>
fs_info->sectorsize_bits);
/*
* Even with previous bio ended, we should still have io not yet
* submitted, thus need to finish @cb manually.
*/
ASSERT(refcount_read(&cb->pending_sectors));
/* Now we are the only one referring @cb, can finish it safely. */
finish_compressed_bio_read(cb);
}
/*
@@ -1481,7 +1327,6 @@ int btrfs_decompress_buf2page(const char *buf, u32 buf_len,
ASSERT(copy_start - decompressed < buf_len);
memcpy_to_page(bvec.bv_page, bvec.bv_offset,
buf + copy_start - decompressed, copy_len);
flush_dcache_page(bvec.bv_page);
cur_offset += copy_len;
bio_advance(orig_bio, copy_len);

View File

@@ -30,8 +30,8 @@ static_assert((BTRFS_MAX_COMPRESSED % PAGE_SIZE) == 0);
#define BTRFS_ZLIB_DEFAULT_LEVEL 3
struct compressed_bio {
/* Number of sectors with unfinished IO (unsubmitted or unfinished) */
refcount_t pending_sectors;
/* Number of outstanding bios */
refcount_t pending_ios;
/* Number of compressed pages in the array */
unsigned int nr_pages;
@@ -59,16 +59,12 @@ struct compressed_bio {
/* IO errors */
blk_status_t status;
int mirror_num;
/* for reads, this is the bio we are copying the data into */
struct bio *orig_bio;
/*
* the start of a variable length array of checksums only
* used by reads
*/
u8 sums[];
union {
/* For reads, this is the bio we are copying the data into */
struct bio *orig_bio;
struct work_struct write_end_work;
};
};
static inline unsigned int btrfs_compress_type(unsigned int type_level)

View File

@@ -107,14 +107,6 @@ struct btrfs_ioctl_encoded_io_args;
#define BTRFS_STAT_CURR 0
#define BTRFS_STAT_PREV 1
/*
* Count how many BTRFS_MAX_EXTENT_SIZE cover the @size
*/
static inline u32 count_max_extents(u64 size)
{
return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
}
static inline unsigned long btrfs_chunk_item_size(int num_stripes)
{
BUG_ON(num_stripes == 0);
@@ -229,6 +221,13 @@ struct btrfs_root_backup {
#define BTRFS_SUPER_INFO_OFFSET SZ_64K
#define BTRFS_SUPER_INFO_SIZE 4096
/*
* The reserved space at the beginning of each device.
* It covers the primary super block and leaves space for potential use by other
* tools like bootloaders or to lower potential damage of accidental overwrite.
*/
#define BTRFS_DEVICE_RANGE_RESERVED (SZ_1M)
/*
* the super block basically lists the main trees of the FS
* it currently lacks any block count etc etc
@@ -248,8 +247,12 @@ struct btrfs_super_block {
__le64 chunk_root;
__le64 log_root;
/* this will help find the new super based on the log root */
__le64 log_root_transid;
/*
* This member has never been utilized since the very beginning, thus
* it's always 0 regardless of kernel version. We always use
* generation + 1 to read log tree root. So here we mark it deprecated.
*/
__le64 __unused_log_root_transid;
__le64 total_bytes;
__le64 bytes_used;
__le64 root_dir_objectid;
@@ -635,6 +638,9 @@ enum {
/* Indicate we have half completed snapshot deletions pending. */
BTRFS_FS_UNFINISHED_DROPS,
/* Indicate we have to finish a zone to do next allocation. */
BTRFS_FS_NEED_ZONE_FINISH,
#if BITS_PER_LONG == 32
/* Indicate if we have error/warn message printed on 32bit systems */
BTRFS_FS_32BIT_ERROR,
@@ -656,6 +662,18 @@ enum btrfs_exclusive_operation {
BTRFS_EXCLOP_SWAP_ACTIVATE,
};
/* Store data about transaction commits, exported via sysfs. */
struct btrfs_commit_stats {
/* Total number of commits */
u64 commit_count;
/* The maximum commit duration so far in ns */
u64 max_commit_dur;
/* The last commit duration in ns */
u64 last_commit_dur;
/* The total commit duration in ns */
u64 total_commit_dur;
};
struct btrfs_fs_info {
u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
unsigned long flags;
@@ -850,11 +868,11 @@ struct btrfs_fs_info {
struct btrfs_workqueue *hipri_workers;
struct btrfs_workqueue *delalloc_workers;
struct btrfs_workqueue *flush_workers;
struct btrfs_workqueue *endio_workers;
struct btrfs_workqueue *endio_meta_workers;
struct btrfs_workqueue *endio_raid56_workers;
struct workqueue_struct *endio_workers;
struct workqueue_struct *endio_meta_workers;
struct workqueue_struct *endio_raid56_workers;
struct workqueue_struct *rmw_workers;
struct btrfs_workqueue *endio_meta_write_workers;
struct workqueue_struct *compressed_write_workers;
struct btrfs_workqueue *endio_write_workers;
struct btrfs_workqueue *endio_freespace_worker;
struct btrfs_workqueue *caching_workers;
@@ -1032,6 +1050,12 @@ struct btrfs_fs_info {
u32 csums_per_leaf;
u32 stripesize;
/*
* Maximum size of an extent. BTRFS_MAX_EXTENT_SIZE on regular
* filesystem, on zoned it depends on the device constraints.
*/
u64 max_extent_size;
/* Block groups and devices containing active swapfiles. */
spinlock_t swapfile_pins_lock;
struct rb_root swapfile_pins;
@@ -1047,6 +1071,8 @@ struct btrfs_fs_info {
*/
u64 zone_size;
/* Max size to emit ZONE_APPEND write command */
u64 max_zone_append_size;
struct mutex zoned_meta_io_lock;
spinlock_t treelog_bg_lock;
u64 treelog_bg;
@@ -1063,6 +1089,11 @@ struct btrfs_fs_info {
spinlock_t zone_active_bgs_lock;
struct list_head zone_active_bgs;
/* Waiters when BTRFS_FS_NEED_ZONE_FINISH is set */
wait_queue_head_t zone_finish_wait;
/* Updates are not protected by any lock */
struct btrfs_commit_stats commit_stats;
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
spinlock_t ref_verify_lock;
@@ -2475,8 +2506,6 @@ BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block,
chunk_root_level, 8);
BTRFS_SETGET_STACK_FUNCS(super_log_root, struct btrfs_super_block,
log_root, 64);
BTRFS_SETGET_STACK_FUNCS(super_log_root_transid, struct btrfs_super_block,
log_root_transid, 64);
BTRFS_SETGET_STACK_FUNCS(super_log_root_level, struct btrfs_super_block,
log_root_level, 8);
BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block,
@@ -2733,8 +2762,16 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
enum btrfs_inline_ref_type is_data);
u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset);
static inline u8 *btrfs_csum_ptr(const struct btrfs_fs_info *fs_info, u8 *csums,
u64 offset)
{
u64 offset_in_sectors = offset >> fs_info->sectorsize_bits;
return csums + offset_in_sectors * fs_info->csum_size;
}
/*
* Take the number of bytes to be checksummmed and figure out how many leaves
* Take the number of bytes to be checksummed and figure out how many leaves
* it would require to store the csums for that many bytes.
*/
static inline u64 btrfs_csum_bytes_to_leaves(
@@ -3251,11 +3288,18 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_siz
u64 btrfs_file_extent_end(const struct btrfs_path *path);
/* inode.c */
void btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
int mirror_num, enum btrfs_compression_type compress_type);
void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio, int mirror_num);
void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
int mirror_num, enum btrfs_compression_type compress_type);
int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page,
u32 pgoff, u8 *csum, const u8 * const csum_expected);
int btrfs_check_data_csum(struct inode *inode, struct btrfs_bio *bbio,
u32 bio_offset, struct page *page, u32 pgoff);
unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio,
u32 bio_offset, struct page *page,
u64 start, u64 end);
int btrfs_check_data_csum(struct inode *inode, struct btrfs_bio *bbio,
u32 bio_offset, struct page *page, u32 pgoff);
struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
u64 start, u64 len);
noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
@@ -3305,9 +3349,9 @@ void btrfs_new_inode_args_destroy(struct btrfs_new_inode_args *args);
struct inode *btrfs_new_subvol_inode(struct user_namespace *mnt_userns,
struct inode *dir);
void btrfs_set_delalloc_extent(struct inode *inode, struct extent_state *state,
unsigned *bits);
u32 bits);
void btrfs_clear_delalloc_extent(struct inode *inode,
struct extent_state *state, unsigned *bits);
struct extent_state *state, u32 bits);
void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new,
struct extent_state *other);
void btrfs_split_delalloc_extent(struct inode *inode,
@@ -3353,6 +3397,12 @@ int btrfs_writepage_cow_fixup(struct page *page);
void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
struct page *page, u64 start,
u64 end, bool uptodate);
int btrfs_encoded_io_compression_from_extent(struct btrfs_fs_info *fs_info,
int compress_type);
int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
u64 file_offset, u64 disk_bytenr,
u64 disk_io_size,
struct page **pages);
ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
struct btrfs_ioctl_encoded_io_args *encoded);
ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
@@ -4009,6 +4059,19 @@ static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info)
return fs_info->zone_size > 0;
}
/*
* Count how many fs_info->max_extent_size cover the @size
*/
static inline u32 count_max_extents(struct btrfs_fs_info *fs_info, u64 size)
{
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
if (!fs_info)
return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
#endif
return div_u64(size + fs_info->max_extent_size - 1, fs_info->max_extent_size);
}
static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root)
{
return root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID;

View File

@@ -273,7 +273,7 @@ static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
u64 num_bytes, u64 disk_num_bytes,
u64 *meta_reserve, u64 *qgroup_reserve)
{
u64 nr_extents = count_max_extents(num_bytes);
u64 nr_extents = count_max_extents(fs_info, num_bytes);
u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes);
u64 inode_update = btrfs_calc_metadata_size(fs_info, 1);
@@ -350,7 +350,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
* needs to free the reservation we just made.
*/
spin_lock(&inode->lock);
nr_extents = count_max_extents(num_bytes);
nr_extents = count_max_extents(fs_info, num_bytes);
btrfs_mod_outstanding_extents(inode, nr_extents);
inode->csum_bytes += disk_num_bytes;
btrfs_calculate_inode_block_rsv_size(fs_info, inode);
@@ -413,7 +413,7 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
unsigned num_extents;
spin_lock(&inode->lock);
num_extents = count_max_extents(num_bytes);
num_extents = count_max_extents(fs_info, num_bytes);
btrfs_mod_outstanding_extents(inode, -num_extents);
btrfs_calculate_inode_block_rsv_size(fs_info, inode);
spin_unlock(&inode->lock);

View File

@@ -52,18 +52,6 @@ static inline void btrfs_init_delayed_node(
INIT_LIST_HEAD(&delayed_node->p_list);
}
static inline int btrfs_is_continuous_delayed_item(
struct btrfs_delayed_item *item1,
struct btrfs_delayed_item *item2)
{
if (item1->key.type == BTRFS_DIR_INDEX_KEY &&
item1->key.objectid == item2->key.objectid &&
item1->key.type == item2->key.type &&
item1->key.offset + 1 == item2->key.offset)
return 1;
return 0;
}
static struct btrfs_delayed_node *btrfs_get_delayed_node(
struct btrfs_inode *btrfs_inode)
{
@@ -398,8 +386,7 @@ static struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
}
static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
struct btrfs_delayed_item *ins,
int action)
struct btrfs_delayed_item *ins)
{
struct rb_node **p, *node;
struct rb_node *parent_node = NULL;
@@ -408,9 +395,9 @@ static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
int cmp;
bool leftmost = true;
if (action == BTRFS_DELAYED_INSERTION_ITEM)
if (ins->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
root = &delayed_node->ins_root;
else if (action == BTRFS_DELAYED_DELETION_ITEM)
else if (ins->ins_or_del == BTRFS_DELAYED_DELETION_ITEM)
root = &delayed_node->del_root;
else
BUG();
@@ -436,32 +423,19 @@ static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
rb_link_node(node, parent_node, p);
rb_insert_color_cached(node, root, leftmost);
ins->delayed_node = delayed_node;
ins->ins_or_del = action;
if (ins->key.type == BTRFS_DIR_INDEX_KEY &&
action == BTRFS_DELAYED_INSERTION_ITEM &&
/* Delayed items are always for dir index items. */
ASSERT(ins->key.type == BTRFS_DIR_INDEX_KEY);
if (ins->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM &&
ins->key.offset >= delayed_node->index_cnt)
delayed_node->index_cnt = ins->key.offset + 1;
delayed_node->index_cnt = ins->key.offset + 1;
delayed_node->count++;
atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
return 0;
}
static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node,
struct btrfs_delayed_item *item)
{
return __btrfs_add_delayed_item(node, item,
BTRFS_DELAYED_INSERTION_ITEM);
}
static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
struct btrfs_delayed_item *item)
{
return __btrfs_add_delayed_item(node, item,
BTRFS_DELAYED_DELETION_ITEM);
}
static void finish_one_item(struct btrfs_delayed_root *delayed_root)
{
int seq = atomic_inc_return(&delayed_root->items_seq);
@@ -573,7 +547,13 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
trace_btrfs_space_reservation(fs_info, "delayed_item",
item->key.objectid,
num_bytes, 1);
item->bytes_reserved = num_bytes;
/*
* For insertions we track reserved metadata space by accounting
* for the number of leaves that will be used, based on the delayed
* node's index_items_size field.
*/
if (item->ins_or_del == BTRFS_DELAYED_DELETION_ITEM)
item->bytes_reserved = num_bytes;
}
return ret;
@@ -599,6 +579,21 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
btrfs_block_rsv_release(fs_info, rsv, item->bytes_reserved, NULL);
}
static void btrfs_delayed_item_release_leaves(struct btrfs_delayed_node *node,
unsigned int num_leaves)
{
struct btrfs_fs_info *fs_info = node->root->fs_info;
const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, num_leaves);
/* There are no space reservations during log replay, bail out. */
if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
return;
trace_btrfs_space_reservation(fs_info, "delayed_item", node->inode_id,
bytes, 0);
btrfs_block_rsv_release(fs_info, &fs_info->delayed_block_rsv, bytes, NULL);
}
static int btrfs_delayed_inode_reserve_metadata(
struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -672,22 +667,53 @@ static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info,
}
/*
* Insert a single delayed item or a batch of delayed items that have consecutive
* keys if they exist.
* Insert a single delayed item or a batch of delayed items, as many as possible
* that fit in a leaf. The delayed items (dir index keys) are sorted by their key
* in the rbtree, and if there's a gap between two consecutive dir index items,
* then it means at some point we had delayed dir indexes to add but they got
* removed (by btrfs_delete_delayed_dir_index()) before we attempted to flush them
* into the subvolume tree. Dir index keys also have their offsets coming from a
* monotonically increasing counter, so we can't get new keys with an offset that
* fits within a gap between delayed dir index items.
*/
static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_delayed_item *first_item)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_delayed_node *node = first_item->delayed_node;
LIST_HEAD(item_list);
struct btrfs_delayed_item *curr;
struct btrfs_delayed_item *next;
const int max_size = BTRFS_LEAF_DATA_SIZE(root->fs_info);
const int max_size = BTRFS_LEAF_DATA_SIZE(fs_info);
struct btrfs_item_batch batch;
int total_size;
char *ins_data = NULL;
int ret;
bool continuous_keys_only = false;
lockdep_assert_held(&node->mutex);
/*
* During normal operation the delayed index offset is continuously
* increasing, so we can batch insert all items as there will not be any
* overlapping keys in the tree.
*
* The exception to this is log replay, where we may have interleaved
* offsets in the tree, so our batch needs to be continuous keys only in
* order to ensure we do not end up with out of order items in our leaf.
*/
if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
continuous_keys_only = true;
/*
* For delayed items to insert, we track reserved metadata bytes based
* on the number of leaves that we will use.
* See btrfs_insert_delayed_dir_index() and
* btrfs_delayed_item_reserve_metadata()).
*/
ASSERT(first_item->bytes_reserved == 0);
list_add_tail(&first_item->tree_list, &item_list);
batch.total_data_size = first_item->data_len;
@@ -699,9 +725,19 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
int next_size;
next = __btrfs_next_delayed_item(curr);
if (!next || !btrfs_is_continuous_delayed_item(curr, next))
if (!next)
break;
/*
* We cannot allow gaps in the key space if we're doing log
* replay.
*/
if (continuous_keys_only &&
(next->key.offset != curr->key.offset + 1))
break;
ASSERT(next->bytes_reserved == 0);
next_size = next->data_len + sizeof(struct btrfs_item);
if (total_size + next_size > max_size)
break;
@@ -758,9 +794,41 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
*/
btrfs_release_path(path);
ASSERT(node->index_item_leaves > 0);
/*
* For normal operations we will batch an entire leaf's worth of delayed
* items, so if there are more items to process we can decrement
* index_item_leaves by 1 as we inserted 1 leaf's worth of items.
*
* However for log replay we may not have inserted an entire leaf's
* worth of items, we may have not had continuous items, so decrementing
* here would mess up the index_item_leaves accounting. For this case
* only clean up the accounting when there are no items left.
*/
if (next && !continuous_keys_only) {
/*
* We inserted one batch of items into a leaf a there are more
* items to flush in a future batch, now release one unit of
* metadata space from the delayed block reserve, corresponding
* the leaf we just flushed to.
*/
btrfs_delayed_item_release_leaves(node, 1);
node->index_item_leaves--;
} else if (!next) {
/*
* There are no more items to insert. We can have a number of
* reserved leaves > 1 here - this happens when many dir index
* items are added and then removed before they are flushed (file
* names with a very short life, never span a transaction). So
* release all remaining leaves.
*/
btrfs_delayed_item_release_leaves(node, node->index_item_leaves);
node->index_item_leaves = 0;
}
list_for_each_entry_safe(curr, next, &item_list, tree_list) {
list_del(&curr->tree_list);
btrfs_delayed_item_release_metadata(root, curr);
btrfs_release_delayed_item(curr);
}
out:
@@ -796,62 +864,75 @@ static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_delayed_item *item)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_delayed_item *curr, *next;
struct extent_buffer *leaf;
struct btrfs_key key;
struct list_head head;
int nitems, i, last_item;
int ret = 0;
struct extent_buffer *leaf = path->nodes[0];
LIST_HEAD(batch_list);
int nitems, slot, last_slot;
int ret;
u64 total_reserved_size = item->bytes_reserved;
BUG_ON(!path->nodes[0]);
ASSERT(leaf != NULL);
leaf = path->nodes[0];
i = path->slots[0];
last_item = btrfs_header_nritems(leaf) - 1;
if (i > last_item)
return -ENOENT; /* FIXME: Is errno suitable? */
next = item;
INIT_LIST_HEAD(&head);
btrfs_item_key_to_cpu(leaf, &key, i);
nitems = 0;
slot = path->slots[0];
last_slot = btrfs_header_nritems(leaf) - 1;
/*
* count the number of the dir index items that we can delete in batch
* Our caller always gives us a path pointing to an existing item, so
* this can not happen.
*/
while (btrfs_comp_cpu_keys(&next->key, &key) == 0) {
list_add_tail(&next->tree_list, &head);
nitems++;
ASSERT(slot <= last_slot);
if (WARN_ON(slot > last_slot))
return -ENOENT;
nitems = 1;
curr = item;
list_add_tail(&curr->tree_list, &batch_list);
/*
* Keep checking if the next delayed item matches the next item in the
* leaf - if so, we can add it to the batch of items to delete from the
* leaf.
*/
while (slot < last_slot) {
struct btrfs_key key;
curr = next;
next = __btrfs_next_delayed_item(curr);
if (!next)
break;
if (!btrfs_is_continuous_delayed_item(curr, next))
slot++;
btrfs_item_key_to_cpu(leaf, &key, slot);
if (btrfs_comp_cpu_keys(&next->key, &key) != 0)
break;
i++;
if (i > last_item)
break;
btrfs_item_key_to_cpu(leaf, &key, i);
nitems++;
curr = next;
list_add_tail(&curr->tree_list, &batch_list);
total_reserved_size += curr->bytes_reserved;
}
if (!nitems)
return 0;
ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
if (ret)
goto out;
return ret;
list_for_each_entry_safe(curr, next, &head, tree_list) {
btrfs_delayed_item_release_metadata(root, curr);
/* In case of BTRFS_FS_LOG_RECOVERING items won't have reserved space */
if (total_reserved_size > 0) {
/*
* Check btrfs_delayed_item_reserve_metadata() to see why we
* don't need to release/reserve qgroup space.
*/
trace_btrfs_space_reservation(fs_info, "delayed_item",
item->key.objectid, total_reserved_size,
0);
btrfs_block_rsv_release(fs_info, &fs_info->delayed_block_rsv,
total_reserved_size, NULL);
}
list_for_each_entry_safe(curr, next, &batch_list, tree_list) {
list_del(&curr->tree_list);
btrfs_release_delayed_item(curr);
}
out:
return ret;
return 0;
}
static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
@@ -859,43 +940,52 @@ static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_delayed_node *node)
{
struct btrfs_delayed_item *curr, *prev;
int ret = 0;
do_again:
mutex_lock(&node->mutex);
curr = __btrfs_first_delayed_deletion_item(node);
if (!curr)
goto delete_fail;
while (ret == 0) {
struct btrfs_delayed_item *item;
ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
if (ret < 0)
goto delete_fail;
else if (ret > 0) {
/*
* can't find the item which the node points to, so this node
* is invalid, just drop it.
*/
prev = curr;
curr = __btrfs_next_delayed_item(prev);
btrfs_release_delayed_item(prev);
ret = 0;
btrfs_release_path(path);
if (curr) {
mutex_lock(&node->mutex);
item = __btrfs_first_delayed_deletion_item(node);
if (!item) {
mutex_unlock(&node->mutex);
goto do_again;
} else
goto delete_fail;
break;
}
ret = btrfs_search_slot(trans, root, &item->key, path, -1, 1);
if (ret > 0) {
/*
* There's no matching item in the leaf. This means we
* have already deleted this item in a past run of the
* delayed items. We ignore errors when running delayed
* items from an async context, through a work queue job
* running btrfs_async_run_delayed_root(), and don't
* release delayed items that failed to complete. This
* is because we will retry later, and at transaction
* commit time we always run delayed items and will
* then deal with errors if they fail to run again.
*
* So just release delayed items for which we can't find
* an item in the tree, and move to the next item.
*/
btrfs_release_path(path);
btrfs_release_delayed_item(item);
ret = 0;
} else if (ret == 0) {
ret = btrfs_batch_delete_items(trans, root, path, item);
btrfs_release_path(path);
}
/*
* We unlock and relock on each iteration, this is to prevent
* blocking other tasks for too long while we are being run from
* the async context (work queue job). Those tasks are typically
* running system calls like creat/mkdir/rename/unlink/etc which
* need to add delayed items to this delayed node.
*/
mutex_unlock(&node->mutex);
}
btrfs_batch_delete_items(trans, root, path, curr);
btrfs_release_path(path);
mutex_unlock(&node->mutex);
goto do_again;
delete_fail:
btrfs_release_path(path);
mutex_unlock(&node->mutex);
return ret;
}
@@ -1354,9 +1444,13 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
struct btrfs_disk_key *disk_key, u8 type,
u64 index)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
const unsigned int leaf_data_size = BTRFS_LEAF_DATA_SIZE(fs_info);
struct btrfs_delayed_node *delayed_node;
struct btrfs_delayed_item *delayed_item;
struct btrfs_dir_item *dir_item;
bool reserve_leaf_space;
u32 data_len;
int ret;
delayed_node = btrfs_get_or_create_delayed_node(dir);
@@ -1372,6 +1466,7 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
delayed_item->key.objectid = btrfs_ino(dir);
delayed_item->key.type = BTRFS_DIR_INDEX_KEY;
delayed_item->key.offset = index;
delayed_item->ins_or_del = BTRFS_DELAYED_INSERTION_ITEM;
dir_item = (struct btrfs_dir_item *)delayed_item->data;
dir_item->location = *disk_key;
@@ -1381,15 +1476,52 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
btrfs_set_stack_dir_type(dir_item, type);
memcpy((char *)(dir_item + 1), name, name_len);
ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, delayed_item);
/*
* we have reserved enough space when we start a new transaction,
* so reserving metadata failure is impossible
*/
BUG_ON(ret);
data_len = delayed_item->data_len + sizeof(struct btrfs_item);
mutex_lock(&delayed_node->mutex);
ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
if (delayed_node->index_item_leaves == 0 ||
delayed_node->curr_index_batch_size + data_len > leaf_data_size) {
delayed_node->curr_index_batch_size = data_len;
reserve_leaf_space = true;
} else {
delayed_node->curr_index_batch_size += data_len;
reserve_leaf_space = false;
}
if (reserve_leaf_space) {
ret = btrfs_delayed_item_reserve_metadata(trans, dir->root,
delayed_item);
/*
* Space was reserved for a dir index item insertion when we
* started the transaction, so getting a failure here should be
* impossible.
*/
if (WARN_ON(ret)) {
mutex_unlock(&delayed_node->mutex);
btrfs_release_delayed_item(delayed_item);
goto release_node;
}
delayed_node->index_item_leaves++;
} else if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
/*
* Adding the new dir index item does not require touching another
* leaf, so we can release 1 unit of metadata that was previously
* reserved when starting the transaction. This applies only to
* the case where we had a transaction start and excludes the
* transaction join case (when replaying log trees).
*/
trace_btrfs_space_reservation(fs_info, "transaction",
trans->transid, bytes, 0);
btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
ASSERT(trans->bytes_reserved >= bytes);
trans->bytes_reserved -= bytes;
}
ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
if (unlikely(ret)) {
btrfs_err(trans->fs_info,
"err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
@@ -1417,8 +1549,37 @@ static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
return 1;
}
btrfs_delayed_item_release_metadata(node->root, item);
/*
* For delayed items to insert, we track reserved metadata bytes based
* on the number of leaves that we will use.
* See btrfs_insert_delayed_dir_index() and
* btrfs_delayed_item_reserve_metadata()).
*/
ASSERT(item->bytes_reserved == 0);
ASSERT(node->index_item_leaves > 0);
/*
* If there's only one leaf reserved, we can decrement this item from the
* current batch, otherwise we can not because we don't know which leaf
* it belongs to. With the current limit on delayed items, we rarely
* accumulate enough dir index items to fill more than one leaf (even
* when using a leaf size of 4K).
*/
if (node->index_item_leaves == 1) {
const u32 data_len = item->data_len + sizeof(struct btrfs_item);
ASSERT(node->curr_index_batch_size >= data_len);
node->curr_index_batch_size -= data_len;
}
btrfs_release_delayed_item(item);
/* If we now have no more dir index items, we can release all leaves. */
if (RB_EMPTY_ROOT(&node->ins_root.rb_root)) {
btrfs_delayed_item_release_leaves(node, node->index_item_leaves);
node->index_item_leaves = 0;
}
mutex_unlock(&node->mutex);
return 0;
}
@@ -1451,6 +1612,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
}
item->key = item_key;
item->ins_or_del = BTRFS_DELAYED_DELETION_ITEM;
ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, item);
/*
@@ -1465,7 +1627,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
}
mutex_lock(&node->mutex);
ret = __btrfs_add_delayed_deletion_item(node, item);
ret = __btrfs_add_delayed_item(node, item);
if (unlikely(ret)) {
btrfs_err(trans->fs_info,
"err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
@@ -1833,12 +1995,17 @@ static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
mutex_lock(&delayed_node->mutex);
curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
while (curr_item) {
btrfs_delayed_item_release_metadata(root, curr_item);
prev_item = curr_item;
curr_item = __btrfs_next_delayed_item(prev_item);
btrfs_release_delayed_item(prev_item);
}
if (delayed_node->index_item_leaves > 0) {
btrfs_delayed_item_release_leaves(delayed_node,
delayed_node->index_item_leaves);
delayed_node->index_item_leaves = 0;
}
curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
while (curr_item) {
btrfs_delayed_item_release_metadata(root, curr_item);

View File

@@ -58,6 +58,17 @@ struct btrfs_delayed_node {
u64 index_cnt;
unsigned long flags;
int count;
/*
* The size of the next batch of dir index items to insert (if this
* node is from a directory inode). Protected by @mutex.
*/
u32 curr_index_batch_size;
/*
* Number of leaves reserved for inserting dir index items (if this
* node belongs to a directory inode). This may be larger then the
* actual number of leaves we end up using. Protected by @mutex.
*/
u32 index_item_leaves;
};
struct btrfs_delayed_item {

View File

@@ -132,7 +132,7 @@ void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
spin_lock(&delayed_rsv->lock);
delayed_rsv->size += num_bytes;
delayed_rsv->full = 0;
delayed_rsv->full = false;
spin_unlock(&delayed_rsv->lock);
trans->delayed_ref_updates = 0;
}
@@ -175,7 +175,7 @@ void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
if (num_bytes)
delayed_refs_rsv->reserved += num_bytes;
if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size)
delayed_refs_rsv->full = 1;
delayed_refs_rsv->full = true;
spin_unlock(&delayed_refs_rsv->lock);
if (num_bytes)

View File

@@ -587,7 +587,8 @@ bool btrfs_finish_block_group_to_copy(struct btrfs_device *srcdev,
ASSERT(!IS_ERR(em));
map = em->map_lookup;
num_extents = cur_extent = 0;
num_extents = 0;
cur_extent = 0;
for (i = 0; i < map->num_stripes; i++) {
/* We have more device extent to copy */
if (srcdev != map->stripes[i].dev)

View File

@@ -51,7 +51,6 @@
BTRFS_SUPER_FLAG_METADUMP |\
BTRFS_SUPER_FLAG_METADUMP_V2)
static void end_workqueue_fn(struct btrfs_work *work);
static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
struct btrfs_fs_info *fs_info);
@@ -64,40 +63,6 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info);
static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info);
/*
* btrfs_end_io_wq structs are used to do processing in task context when an IO
* is complete. This is used during reads to verify checksums, and it is used
* by writes to insert metadata for new file extents after IO is complete.
*/
struct btrfs_end_io_wq {
struct bio *bio;
bio_end_io_t *end_io;
void *private;
struct btrfs_fs_info *info;
blk_status_t status;
enum btrfs_wq_endio_type metadata;
struct btrfs_work work;
};
static struct kmem_cache *btrfs_end_io_wq_cache;
int __init btrfs_end_io_wq_init(void)
{
btrfs_end_io_wq_cache = kmem_cache_create("btrfs_end_io_wq",
sizeof(struct btrfs_end_io_wq),
0,
SLAB_MEM_SPREAD,
NULL);
if (!btrfs_end_io_wq_cache)
return -ENOMEM;
return 0;
}
void __cold btrfs_end_io_wq_exit(void)
{
kmem_cache_destroy(btrfs_end_io_wq_cache);
}
static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info)
{
if (fs_info->csum_shash)
@@ -256,8 +221,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
goto out;
}
btrfs_err_rl(eb->fs_info,
"parent transid verify failed on %llu wanted %llu found %llu",
eb->start,
"parent transid verify failed on logical %llu mirror %u wanted %llu found %llu",
eb->start, eb->read_mirror,
parent_transid, btrfs_header_generation(eb));
ret = 1;
clear_extent_buffer_uptodate(eb);
@@ -587,21 +552,23 @@ static int validate_extent_buffer(struct extent_buffer *eb)
found_start = btrfs_header_bytenr(eb);
if (found_start != eb->start) {
btrfs_err_rl(fs_info, "bad tree block start, want %llu have %llu",
eb->start, found_start);
btrfs_err_rl(fs_info,
"bad tree block start, mirror %u want %llu have %llu",
eb->read_mirror, eb->start, found_start);
ret = -EIO;
goto out;
}
if (check_tree_block_fsid(eb)) {
btrfs_err_rl(fs_info, "bad fsid on block %llu",
eb->start);
btrfs_err_rl(fs_info, "bad fsid on logical %llu mirror %u",
eb->start, eb->read_mirror);
ret = -EIO;
goto out;
}
found_level = btrfs_header_level(eb);
if (found_level >= BTRFS_MAX_LEVEL) {
btrfs_err(fs_info, "bad tree block level %d on %llu",
(int)btrfs_header_level(eb), eb->start);
btrfs_err(fs_info,
"bad tree block level, mirror %u level %d on logical %llu",
eb->read_mirror, btrfs_header_level(eb), eb->start);
ret = -EIO;
goto out;
}
@@ -612,8 +579,8 @@ static int validate_extent_buffer(struct extent_buffer *eb)
if (memcmp(result, header_csum, csum_size) != 0) {
btrfs_warn_rl(fs_info,
"checksum verify failed on %llu wanted " CSUM_FMT " found " CSUM_FMT " level %d",
eb->start,
"checksum verify failed on logical %llu mirror %u wanted " CSUM_FMT " found " CSUM_FMT " level %d",
eb->start, eb->read_mirror,
CSUM_FMT_VALUE(csum_size, header_csum),
CSUM_FMT_VALUE(csum_size, result),
btrfs_header_level(eb));
@@ -638,8 +605,8 @@ static int validate_extent_buffer(struct extent_buffer *eb)
set_extent_buffer_uptodate(eb);
else
btrfs_err(fs_info,
"block=%llu read time tree block corruption detected",
eb->start);
"read time tree block corruption detected on logical %llu mirror %u",
eb->start, eb->read_mirror);
out:
return ret;
}
@@ -740,58 +707,6 @@ err:
return ret;
}
static void end_workqueue_bio(struct bio *bio)
{
struct btrfs_end_io_wq *end_io_wq = bio->bi_private;
struct btrfs_fs_info *fs_info;
struct btrfs_workqueue *wq;
fs_info = end_io_wq->info;
end_io_wq->status = bio->bi_status;
if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA)
wq = fs_info->endio_meta_write_workers;
else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE)
wq = fs_info->endio_freespace_worker;
else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
wq = fs_info->endio_raid56_workers;
else
wq = fs_info->endio_write_workers;
} else {
if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
wq = fs_info->endio_raid56_workers;
else if (end_io_wq->metadata)
wq = fs_info->endio_meta_workers;
else
wq = fs_info->endio_workers;
}
btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
btrfs_queue_work(wq, &end_io_wq->work);
}
blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
enum btrfs_wq_endio_type metadata)
{
struct btrfs_end_io_wq *end_io_wq;
end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS);
if (!end_io_wq)
return BLK_STS_RESOURCE;
end_io_wq->private = bio->bi_private;
end_io_wq->end_io = bio->bi_end_io;
end_io_wq->info = info;
end_io_wq->status = 0;
end_io_wq->bio = bio;
end_io_wq->metadata = metadata;
bio->bi_private = end_io_wq;
bio->bi_end_io = end_workqueue_bio;
return 0;
}
static void run_one_async_start(struct btrfs_work *work)
{
struct async_submit_bio *async;
@@ -816,7 +731,6 @@ static void run_one_async_done(struct btrfs_work *work)
{
struct async_submit_bio *async;
struct inode *inode;
blk_status_t ret;
async = container_of(work, struct async_submit_bio, work);
inode = async->inode;
@@ -834,11 +748,7 @@ static void run_one_async_done(struct btrfs_work *work)
* This changes nothing when cgroups aren't in use.
*/
async->bio->bi_opf |= REQ_CGROUP_PUNT;
ret = btrfs_map_bio(btrfs_sb(inode->i_sb), async->bio, async->mirror_num);
if (ret) {
async->bio->bi_status = ret;
bio_endio(async->bio);
}
btrfs_submit_bio(btrfs_sb(inode->i_sb), async->bio, async->mirror_num);
}
static void run_one_async_free(struct btrfs_work *work)
@@ -849,16 +759,23 @@ static void run_one_async_free(struct btrfs_work *work)
kfree(async);
}
blk_status_t btrfs_wq_submit_bio(struct inode *inode, struct bio *bio,
int mirror_num, u64 dio_file_offset,
extent_submit_bio_start_t *submit_bio_start)
/*
* Submit bio to an async queue.
*
* Retrun:
* - true if the work has been succesfuly submitted
* - false in case of error
*/
bool btrfs_wq_submit_bio(struct inode *inode, struct bio *bio, int mirror_num,
u64 dio_file_offset,
extent_submit_bio_start_t *submit_bio_start)
{
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
struct async_submit_bio *async;
async = kmalloc(sizeof(*async), GFP_NOFS);
if (!async)
return BLK_STS_RESOURCE;
return false;
async->inode = inode;
async->bio = bio;
@@ -876,7 +793,7 @@ blk_status_t btrfs_wq_submit_bio(struct inode *inode, struct bio *bio,
btrfs_queue_work(fs_info->hipri_workers, &async->work);
else
btrfs_queue_work(fs_info->workers, &async->work);
return 0;
return true;
}
static blk_status_t btree_csum_one_bio(struct bio *bio)
@@ -902,7 +819,7 @@ static blk_status_t btree_submit_bio_start(struct inode *inode, struct bio *bio,
{
/*
* when we're called for a write, we're already in the async
* submission context. Just jump into btrfs_map_bio
* submission context. Just jump into btrfs_submit_bio.
*/
return btree_csum_one_bio(bio);
}
@@ -924,57 +841,54 @@ void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
blk_status_t ret;
bio->bi_opf |= REQ_META;
if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
/*
* called for a read, do the setup so that checksum validation
* can happen in the async kernel threads
*/
ret = btrfs_bio_wq_end_io(fs_info, bio,
BTRFS_WQ_ENDIO_METADATA);
if (!ret)
ret = btrfs_map_bio(fs_info, bio, mirror_num);
} else if (!should_async_write(fs_info, BTRFS_I(inode))) {
ret = btree_csum_one_bio(bio);
if (!ret)
ret = btrfs_map_bio(fs_info, bio, mirror_num);
} else {
/*
* kthread helpers are used to submit writes so that
* checksumming can happen in parallel across all CPUs
*/
ret = btrfs_wq_submit_bio(inode, bio, mirror_num, 0,
btree_submit_bio_start);
btrfs_submit_bio(fs_info, bio, mirror_num);
return;
}
/*
* Kthread helpers are used to submit writes so that checksumming can
* happen in parallel across all CPUs.
*/
if (should_async_write(fs_info, BTRFS_I(inode)) &&
btrfs_wq_submit_bio(inode, bio, mirror_num, 0, btree_submit_bio_start))
return;
ret = btree_csum_one_bio(bio);
if (ret) {
bio->bi_status = ret;
bio_endio(bio);
return;
}
btrfs_submit_bio(fs_info, bio, mirror_num);
}
#ifdef CONFIG_MIGRATION
static int btree_migratepage(struct address_space *mapping,
struct page *newpage, struct page *page,
enum migrate_mode mode)
static int btree_migrate_folio(struct address_space *mapping,
struct folio *dst, struct folio *src, enum migrate_mode mode)
{
/*
* we can't safely write a btree page from here,
* we haven't done the locking hook
*/
if (PageDirty(page))
if (folio_test_dirty(src))
return -EAGAIN;
/*
* Buffers may be managed in a filesystem specific way.
* We must have no buffers or drop them.
*/
if (page_has_private(page) &&
!try_to_release_page(page, GFP_KERNEL))
if (folio_get_private(src) &&
!filemap_release_folio(src, GFP_KERNEL))
return -EAGAIN;
return migrate_page(mapping, newpage, page, mode);
return migrate_folio(mapping, dst, src, mode);
}
#else
#define btree_migrate_folio NULL
#endif
static int btree_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
@@ -1074,10 +988,8 @@ static const struct address_space_operations btree_aops = {
.writepages = btree_writepages,
.release_folio = btree_release_folio,
.invalidate_folio = btree_invalidate_folio,
#ifdef CONFIG_MIGRATION
.migratepage = btree_migratepage,
#endif
.dirty_folio = btree_dirty_folio,
.migrate_folio = btree_migrate_folio,
.dirty_folio = btree_dirty_folio,
};
struct extent_buffer *btrfs_find_create_tree_block(
@@ -1872,7 +1784,7 @@ again:
fail:
/*
* If our caller provided us an anonymous device, then it's his
* responsability to free it in case we fail. So we have to set our
* responsibility to free it in case we fail. So we have to set our
* root's anon_dev to 0 to avoid a double free, once by btrfs_put_root()
* and once again by our caller.
*/
@@ -1955,25 +1867,6 @@ struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
return root;
}
/*
* called by the kthread helper functions to finally call the bio end_io
* functions. This is where read checksum verification actually happens
*/
static void end_workqueue_fn(struct btrfs_work *work)
{
struct bio *bio;
struct btrfs_end_io_wq *end_io_wq;
end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
bio = end_io_wq->bio;
bio->bi_status = end_io_wq->status;
bio->bi_private = end_io_wq->private;
bio->bi_end_io = end_io_wq->end_io;
bio_endio(bio);
kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
}
static int cleaner_kthread(void *arg)
{
struct btrfs_fs_info *fs_info = arg;
@@ -2280,10 +2173,14 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
btrfs_destroy_workqueue(fs_info->delalloc_workers);
btrfs_destroy_workqueue(fs_info->hipri_workers);
btrfs_destroy_workqueue(fs_info->workers);
btrfs_destroy_workqueue(fs_info->endio_workers);
btrfs_destroy_workqueue(fs_info->endio_raid56_workers);
if (fs_info->endio_workers)
destroy_workqueue(fs_info->endio_workers);
if (fs_info->endio_raid56_workers)
destroy_workqueue(fs_info->endio_raid56_workers);
if (fs_info->rmw_workers)
destroy_workqueue(fs_info->rmw_workers);
if (fs_info->compressed_write_workers)
destroy_workqueue(fs_info->compressed_write_workers);
btrfs_destroy_workqueue(fs_info->endio_write_workers);
btrfs_destroy_workqueue(fs_info->endio_freespace_worker);
btrfs_destroy_workqueue(fs_info->delayed_workers);
@@ -2297,8 +2194,8 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
* the queues used for metadata I/O, since tasks from those other work
* queues can do metadata I/O operations.
*/
btrfs_destroy_workqueue(fs_info->endio_meta_workers);
btrfs_destroy_workqueue(fs_info->endio_meta_write_workers);
if (fs_info->endio_meta_workers)
destroy_workqueue(fs_info->endio_meta_workers);
}
static void free_root_extent_buffers(struct btrfs_root *root)
@@ -2428,7 +2325,9 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
extent_map_tree_init(&BTRFS_I(inode)->extent_tree);
BTRFS_I(inode)->root = btrfs_grab_root(fs_info->tree_root);
memset(&BTRFS_I(inode)->location, 0, sizeof(struct btrfs_key));
BTRFS_I(inode)->location.objectid = BTRFS_BTREE_INODE_OBJECTID;
BTRFS_I(inode)->location.type = 0;
BTRFS_I(inode)->location.offset = 0;
set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
btrfs_insert_inode_hash(inode);
}
@@ -2477,25 +2376,18 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
fs_info->fixup_workers =
btrfs_alloc_workqueue(fs_info, "fixup", flags, 1, 0);
/*
* endios are largely parallel and should have a very
* low idle thresh
*/
fs_info->endio_workers =
btrfs_alloc_workqueue(fs_info, "endio", flags, max_active, 4);
alloc_workqueue("btrfs-endio", flags, max_active);
fs_info->endio_meta_workers =
btrfs_alloc_workqueue(fs_info, "endio-meta", flags,
max_active, 4);
fs_info->endio_meta_write_workers =
btrfs_alloc_workqueue(fs_info, "endio-meta-write", flags,
max_active, 2);
alloc_workqueue("btrfs-endio-meta", flags, max_active);
fs_info->endio_raid56_workers =
btrfs_alloc_workqueue(fs_info, "endio-raid56", flags,
max_active, 4);
alloc_workqueue("btrfs-endio-raid56", flags, max_active);
fs_info->rmw_workers = alloc_workqueue("btrfs-rmw", flags, max_active);
fs_info->endio_write_workers =
btrfs_alloc_workqueue(fs_info, "endio-write", flags,
max_active, 2);
fs_info->compressed_write_workers =
alloc_workqueue("btrfs-compressed-write", flags, max_active);
fs_info->endio_freespace_worker =
btrfs_alloc_workqueue(fs_info, "freespace-write", flags,
max_active, 0);
@@ -2510,7 +2402,7 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
if (!(fs_info->workers && fs_info->hipri_workers &&
fs_info->delalloc_workers && fs_info->flush_workers &&
fs_info->endio_workers && fs_info->endio_meta_workers &&
fs_info->endio_meta_write_workers &&
fs_info->compressed_write_workers &&
fs_info->endio_write_workers && fs_info->endio_raid56_workers &&
fs_info->endio_freespace_worker && fs_info->rmw_workers &&
fs_info->caching_workers && fs_info->fixup_workers &&
@@ -2537,6 +2429,9 @@ static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type)
fs_info->csum_shash = csum_shash;
btrfs_info(fs_info, "using %s (%s) checksum algorithm",
btrfs_super_csum_name(csum_type),
crypto_shash_driver_name(csum_shash));
return 0;
}
@@ -3255,6 +3150,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
init_waitqueue_head(&fs_info->transaction_blocked_wait);
init_waitqueue_head(&fs_info->async_submit_wait);
init_waitqueue_head(&fs_info->delayed_iputs_wait);
init_waitqueue_head(&fs_info->zone_finish_wait);
/* Usable values until the real ones are cached from the superblock */
fs_info->nodesize = 4096;
@@ -3262,6 +3158,8 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
fs_info->sectorsize_bits = ilog2(4096);
fs_info->stripesize = 4096;
fs_info->max_extent_size = BTRFS_MAX_EXTENT_SIZE;
spin_lock_init(&fs_info->swapfile_pins_lock);
fs_info->swapfile_pins = RB_ROOT;
@@ -3593,16 +3491,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
*/
fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
/*
* Flag our filesystem as having big metadata blocks if they are bigger
* than the page size.
*/
if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) {
if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
btrfs_info(fs_info,
"flagging fs with big metadata feature");
features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
}
/* Set up fs_info before parsing mount options */
nodesize = btrfs_super_nodesize(disk_super);
@@ -3640,8 +3528,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
else if (fs_info->compress_type == BTRFS_COMPRESS_ZSTD)
features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD;
if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
btrfs_info(fs_info, "has skinny extents");
/*
* Flag our filesystem as having big metadata blocks if they are bigger
* than the page size.
*/
if (btrfs_super_nodesize(disk_super) > PAGE_SIZE)
features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
/*
* mixed block groups end up with duplicate but slightly offset
@@ -3670,6 +3562,20 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
err = -EINVAL;
goto fail_alloc;
}
/*
* We have unsupported RO compat features, although RO mounted, we
* should not cause any metadata write, including log replay.
* Or we could screw up whatever the new feature requires.
*/
if (unlikely(features && btrfs_super_log_root(disk_super) &&
!btrfs_test_opt(fs_info, NOLOGREPLAY))) {
btrfs_err(fs_info,
"cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay",
features);
err = -EINVAL;
goto fail_alloc;
}
if (sectorsize < PAGE_SIZE) {
struct btrfs_subpage_info *subpage_info;

View File

@@ -17,13 +17,6 @@
*/
#define BTRFS_BDEV_BLOCKSIZE (4096)
enum btrfs_wq_endio_type {
BTRFS_WQ_ENDIO_DATA,
BTRFS_WQ_ENDIO_METADATA,
BTRFS_WQ_ENDIO_FREE_SPACE,
BTRFS_WQ_ENDIO_RAID56,
};
static inline u64 btrfs_sb_offset(int mirror)
{
u64 start = SZ_16K;
@@ -121,11 +114,9 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
int atomic);
int btrfs_read_extent_buffer(struct extent_buffer *buf, u64 parent_transid,
int level, struct btrfs_key *first_key);
blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
enum btrfs_wq_endio_type metadata);
blk_status_t btrfs_wq_submit_bio(struct inode *inode, struct bio *bio,
int mirror_num, u64 dio_file_offset,
extent_submit_bio_start_t *submit_bio_start);
bool btrfs_wq_submit_bio(struct inode *inode, struct bio *bio, int mirror_num,
u64 dio_file_offset,
extent_submit_bio_start_t *submit_bio_start);
blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
int mirror_num);
int btrfs_alloc_log_tree_node(struct btrfs_trans_handle *trans,
@@ -145,8 +136,6 @@ int btree_lock_page_hook(struct page *page, void *data,
int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
int btrfs_get_free_objectid(struct btrfs_root *root, u64 *objectid);
int btrfs_init_root_free_objectid(struct btrfs_root *root);
int __init btrfs_end_io_wq_init(void);
void __cold btrfs_end_io_wq_exit(void);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void btrfs_set_buffer_lockdep_class(u64 objectid,

View File

@@ -1269,7 +1269,7 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
return ret;
}
static int do_discard_extent(struct btrfs_io_stripe *stripe, u64 *bytes)
static int do_discard_extent(struct btrfs_discard_stripe *stripe, u64 *bytes)
{
struct btrfs_device *dev = stripe->dev;
struct btrfs_fs_info *fs_info = dev->fs_info;
@@ -1316,76 +1316,60 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
u64 discarded_bytes = 0;
u64 end = bytenr + num_bytes;
u64 cur = bytenr;
struct btrfs_io_context *bioc = NULL;
/*
* Avoid races with device replace and make sure our bioc has devices
* associated to its stripes that don't go away while we are discarding.
* Avoid races with device replace and make sure the devices in the
* stripes don't go away while we are discarding.
*/
btrfs_bio_counter_inc_blocked(fs_info);
while (cur < end) {
struct btrfs_io_stripe *stripe;
struct btrfs_discard_stripe *stripes;
unsigned int num_stripes;
int i;
num_bytes = end - cur;
/* Tell the block device(s) that the sectors can be discarded */
ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, cur,
&num_bytes, &bioc, 0);
/*
* Error can be -ENOMEM, -ENOENT (no such chunk mapping) or
* -EOPNOTSUPP. For any such error, @num_bytes is not updated,
* thus we can't continue anyway.
*/
if (ret < 0)
goto out;
stripes = btrfs_map_discard(fs_info, cur, &num_bytes, &num_stripes);
if (IS_ERR(stripes)) {
ret = PTR_ERR(stripes);
if (ret == -EOPNOTSUPP)
ret = 0;
break;
}
stripe = bioc->stripes;
for (i = 0; i < bioc->num_stripes; i++, stripe++) {
for (i = 0; i < num_stripes; i++) {
struct btrfs_discard_stripe *stripe = stripes + i;
u64 bytes;
struct btrfs_device *device = stripe->dev;
if (!device->bdev) {
if (!stripe->dev->bdev) {
ASSERT(btrfs_test_opt(fs_info, DEGRADED));
continue;
}
if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
if (!test_bit(BTRFS_DEV_STATE_WRITEABLE,
&stripe->dev->dev_state))
continue;
ret = do_discard_extent(stripe, &bytes);
if (!ret) {
discarded_bytes += bytes;
} else if (ret != -EOPNOTSUPP) {
if (ret) {
/*
* Logic errors or -ENOMEM, or -EIO, but
* unlikely to happen.
*
* And since there are two loops, explicitly
* go to out to avoid confusion.
* Keep going if discard is not supported by the
* device.
*/
btrfs_put_bioc(bioc);
goto out;
if (ret != -EOPNOTSUPP)
break;
ret = 0;
} else {
discarded_bytes += bytes;
}
/*
* Just in case we get back EOPNOTSUPP for some reason,
* just ignore the return value so we don't screw up
* people calling discard_extent.
*/
ret = 0;
}
btrfs_put_bioc(bioc);
kfree(stripes);
if (ret)
break;
cur += num_bytes;
}
out:
btrfs_bio_counter_dec(fs_info);
if (actual_bytes)
*actual_bytes = discarded_bytes;
if (ret == -EOPNOTSUPP)
ret = 0;
return ret;
}
@@ -3981,23 +3965,63 @@ static void found_extent(struct find_free_extent_ctl *ffe_ctl,
}
}
static bool can_allocate_chunk(struct btrfs_fs_info *fs_info,
struct find_free_extent_ctl *ffe_ctl)
static int can_allocate_chunk_zoned(struct btrfs_fs_info *fs_info,
struct find_free_extent_ctl *ffe_ctl)
{
/* If we can activate new zone, just allocate a chunk and use it */
if (btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
return 0;
/*
* We already reached the max active zones. Try to finish one block
* group to make a room for a new block group. This is only possible
* for a data block group because btrfs_zone_finish() may need to wait
* for a running transaction which can cause a deadlock for metadata
* allocation.
*/
if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) {
int ret = btrfs_zone_finish_one_bg(fs_info);
if (ret == 1)
return 0;
else if (ret < 0)
return ret;
}
/*
* If we have enough free space left in an already active block group
* and we can't activate any other zone now, do not allow allocating a
* new chunk and let find_free_extent() retry with a smaller size.
*/
if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size)
return -ENOSPC;
/*
* Even min_alloc_size is not left in any block groups. Since we cannot
* activate a new block group, allocating it may not help. Let's tell a
* caller to try again and hope it progress something by writing some
* parts of the region. That is only possible for data block groups,
* where a part of the region can be written.
*/
if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA)
return -EAGAIN;
/*
* We cannot activate a new block group and no enough space left in any
* block groups. So, allocating a new block group may not help. But,
* there is nothing to do anyway, so let's go with it.
*/
return 0;
}
static int can_allocate_chunk(struct btrfs_fs_info *fs_info,
struct find_free_extent_ctl *ffe_ctl)
{
switch (ffe_ctl->policy) {
case BTRFS_EXTENT_ALLOC_CLUSTERED:
return true;
return 0;
case BTRFS_EXTENT_ALLOC_ZONED:
/*
* If we have enough free space left in an already
* active block group and we can't activate any other
* zone now, do not allow allocating a new chunk and
* let find_free_extent() retry with a smaller size.
*/
if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size &&
!btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
return false;
return true;
return can_allocate_chunk_zoned(fs_info, ffe_ctl);
default:
BUG();
}
@@ -4079,8 +4103,9 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
int exist = 0;
/*Check if allocation policy allows to create a new chunk */
if (!can_allocate_chunk(fs_info, ffe_ctl))
return -ENOSPC;
ret = can_allocate_chunk(fs_info, ffe_ctl);
if (ret)
return ret;
trans = current->journal_info;
if (trans)
@@ -5992,7 +6017,7 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
*/
static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
{
u64 start = SZ_1M, len = 0, end = 0;
u64 start = BTRFS_DEVICE_RANGE_RESERVED, len = 0, end = 0;
int ret;
*trimmed = 0;
@@ -6036,8 +6061,8 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
break;
}
/* Ensure we skip the reserved area in the first 1M */
start = max_t(u64, start, SZ_1M);
/* Ensure we skip the reserved space on each device. */
start = max_t(u64, start, BTRFS_DEVICE_RANGE_RESERVED);
/*
* If find_first_clear_extent_bit find a range that spans the

File diff suppressed because it is too large Load Diff

View File

@@ -57,6 +57,7 @@ enum {
#define BITMAP_LAST_BYTE_MASK(nbits) \
(BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
struct btrfs_bio;
struct btrfs_root;
struct btrfs_inode;
struct btrfs_io_bio;
@@ -142,15 +143,10 @@ static inline void extent_changeset_free(struct extent_changeset *changeset)
struct extent_map_tree;
typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode,
struct page *page, size_t pg_offset,
u64 start, u64 len);
int try_release_extent_mapping(struct page *page, gfp_t mask);
int try_release_extent_buffer(struct page *page);
int btrfs_read_folio(struct file *file, struct folio *folio);
int extent_write_full_page(struct page *page, struct writeback_control *wbc);
int extent_write_locked_range(struct inode *inode, u64 start, u64 end);
int extent_writepages(struct address_space *mapping,
struct writeback_control *wbc);
@@ -247,7 +243,6 @@ void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array);
struct bio *btrfs_bio_alloc(unsigned int nr_iovecs);
struct bio *btrfs_bio_clone(struct block_device *bdev, struct bio *bio);
struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size);
void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
@@ -266,15 +261,13 @@ struct io_failure_record {
u64 start;
u64 len;
u64 logical;
enum btrfs_compression_type compress_type;
int this_mirror;
int failed_mirror;
int num_copies;
};
int btrfs_repair_one_sector(struct inode *inode,
struct bio *failed_bio, u32 bio_offset,
struct page *page, unsigned int pgoff,
u64 start, int failed_mirror,
int btrfs_repair_one_sector(struct inode *inode, struct btrfs_bio *failed_bbio,
u32 bio_offset, struct page *page, unsigned int pgoff,
submit_bio_hook_t *submit_bio_hook);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS

View File

@@ -1848,7 +1848,6 @@ static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
{
const bool is_sync_write = (iocb->ki_flags & IOCB_DSYNC);
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -1901,15 +1900,6 @@ relock:
goto buffered;
}
/*
* We remove IOCB_DSYNC so that we don't deadlock when iomap_dio_rw()
* calls generic_write_sync() (through iomap_dio_complete()), because
* that results in calling fsync (btrfs_sync_file()) which will try to
* lock the inode in exclusive/write mode.
*/
if (is_sync_write)
iocb->ki_flags &= ~IOCB_DSYNC;
/*
* The iov_iter can be mapped to the same file range we are writing to.
* If that's the case, then we will deadlock in the iomap code, because
@@ -1965,17 +1955,24 @@ again:
btrfs_inode_unlock(inode, ilock_flags);
/*
* Add back IOCB_DSYNC. Our caller, btrfs_file_write_iter(), will do
* the fsync (call generic_write_sync()).
* If 'err' is -ENOTBLK or we have not written all data, then it means
* we must fallback to buffered IO.
*/
if (is_sync_write)
iocb->ki_flags |= IOCB_DSYNC;
/* If 'err' is -ENOTBLK then it means we must fallback to buffered IO. */
if ((err < 0 && err != -ENOTBLK) || !iov_iter_count(from))
goto out;
buffered:
/*
* If we are in a NOWAIT context, then return -EAGAIN to signal the caller
* it must retry the operation in a context where blocking is acceptable,
* since we currently don't have NOWAIT semantics support for buffered IO
* and may block there for many reasons (reserving space for example).
*/
if (iocb->ki_flags & IOCB_NOWAIT) {
err = -EAGAIN;
goto out;
}
pos = iocb->ki_pos;
written_buffered = btrfs_buffered_write(iocb, from);
if (written_buffered < 0) {
@@ -2038,7 +2035,7 @@ ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
struct file *file = iocb->ki_filp;
struct btrfs_inode *inode = BTRFS_I(file_inode(file));
ssize_t num_written, num_sync;
const bool sync = iocb->ki_flags & IOCB_DSYNC;
const bool sync = iocb_is_dsync(iocb);
/*
* If the fs flips readonly due to some impossible error, although we
@@ -2058,9 +2055,11 @@ ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
num_written = btrfs_encoded_write(iocb, from, encoded);
num_sync = encoded->len;
} else if (iocb->ki_flags & IOCB_DIRECT) {
num_written = num_sync = btrfs_direct_write(iocb, from);
num_written = btrfs_direct_write(iocb, from);
num_sync = num_written;
} else {
num_written = num_sync = btrfs_buffered_write(iocb, from);
num_written = btrfs_buffered_write(iocb, from);
num_sync = num_written;
}
btrfs_set_inode_last_sub_trans(inode);
@@ -2308,7 +2307,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
btrfs_release_log_ctx_extents(&ctx);
if (ret < 0) {
/* Fallthrough and commit/free transaction. */
ret = 1;
ret = BTRFS_LOG_FORCE_COMMIT;
}
/* we've logged all the items and now have a consistent
@@ -2734,7 +2733,7 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
goto out;
}
rsv->size = btrfs_calc_insert_metadata_size(fs_info, 1);
rsv->failfast = 1;
rsv->failfast = true;
/*
* 1 - update the inode
@@ -3100,7 +3099,8 @@ static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len)
ASSERT(trans != NULL);
inode_inc_iversion(inode);
inode->i_mtime = inode->i_ctime = current_time(inode);
inode->i_mtime = current_time(inode);
inode->i_ctime = inode->i_mtime;
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
updated_inode = true;
btrfs_end_transaction(trans);

View File

@@ -3536,7 +3536,8 @@ int btrfs_find_space_cluster(struct btrfs_block_group *block_group,
* data, keep it dense.
*/
if (btrfs_test_opt(fs_info, SSD_SPREAD)) {
cont1_bytes = min_bytes = bytes + empty_size;
cont1_bytes = bytes + empty_size;
min_bytes = cont1_bytes;
} else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
cont1_bytes = bytes;
min_bytes = fs_info->sectorsize;

File diff suppressed because it is too large Load Diff

View File

@@ -1230,16 +1230,18 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start,
return em;
}
static u32 get_extent_max_capacity(const struct extent_map *em)
static u32 get_extent_max_capacity(const struct btrfs_fs_info *fs_info,
const struct extent_map *em)
{
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
return BTRFS_MAX_COMPRESSED;
return BTRFS_MAX_EXTENT_SIZE;
return fs_info->max_extent_size;
}
static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em,
u32 extent_thresh, u64 newer_than, bool locked)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct extent_map *next;
bool ret = false;
@@ -1263,7 +1265,7 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em,
* If the next extent is at its max capacity, defragging current extent
* makes no sense, as the total number of extents won't change.
*/
if (next->len >= get_extent_max_capacity(em))
if (next->len >= get_extent_max_capacity(fs_info, em))
goto out;
/* Skip older extent */
if (next->generation < newer_than)
@@ -1400,6 +1402,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
bool locked, struct list_head *target_list,
u64 *last_scanned_ret)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
bool last_is_target = false;
u64 cur = start;
int ret = 0;
@@ -1484,7 +1487,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
* Skip extents already at its max capacity, this is mostly for
* compressed extents, which max cap is only 128K.
*/
if (em->len >= get_extent_max_capacity(em))
if (em->len >= get_extent_max_capacity(fs_info, em))
goto next;
/*
@@ -4243,26 +4246,6 @@ out:
return ret;
}
static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx)
{
struct btrfs_data_container *inodes = ctx;
const size_t c = 3 * sizeof(u64);
if (inodes->bytes_left >= c) {
inodes->bytes_left -= c;
inodes->val[inodes->elem_cnt] = inum;
inodes->val[inodes->elem_cnt + 1] = offset;
inodes->val[inodes->elem_cnt + 2] = root;
inodes->elem_cnt += 3;
} else {
inodes->bytes_missing += c - inodes->bytes_left;
inodes->bytes_left = 0;
inodes->elem_missed += 3;
}
return 0;
}
static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
void __user *arg, int version)
{
@@ -4312,7 +4295,7 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
}
ret = iterate_inodes_from_logical(loi->logical, fs_info, path,
build_ino_list, inodes, ignore_offset);
inodes, ignore_offset);
if (ret == -EINVAL)
ret = -ENOENT;
if (ret < 0)
@@ -4355,13 +4338,79 @@ void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
spin_unlock(&fs_info->balance_lock);
}
/**
* Try to acquire fs_info::balance_mutex as well as set BTRFS_EXLCOP_BALANCE as
* required.
*
* @fs_info: the filesystem
* @excl_acquired: ptr to boolean value which is set to false in case balance
* is being resumed
*
* Return 0 on success in which case both fs_info::balance is acquired as well
* as exclusive ops are blocked. In case of failure return an error code.
*/
static int btrfs_try_lock_balance(struct btrfs_fs_info *fs_info, bool *excl_acquired)
{
int ret;
/*
* Exclusive operation is locked. Three possibilities:
* (1) some other op is running
* (2) balance is running
* (3) balance is paused -- special case (think resume)
*/
while (1) {
if (btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
*excl_acquired = true;
mutex_lock(&fs_info->balance_mutex);
return 0;
}
mutex_lock(&fs_info->balance_mutex);
if (fs_info->balance_ctl) {
/* This is either (2) or (3) */
if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
/* This is (2) */
ret = -EINPROGRESS;
goto out_failure;
} else {
mutex_unlock(&fs_info->balance_mutex);
/*
* Lock released to allow other waiters to
* continue, we'll reexamine the status again.
*/
mutex_lock(&fs_info->balance_mutex);
if (fs_info->balance_ctl &&
!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
/* This is (3) */
*excl_acquired = false;
return 0;
}
}
} else {
/* This is (1) */
ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
goto out_failure;
}
mutex_unlock(&fs_info->balance_mutex);
}
out_failure:
mutex_unlock(&fs_info->balance_mutex);
*excl_acquired = false;
return ret;
}
static long btrfs_ioctl_balance(struct file *file, void __user *arg)
{
struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_ioctl_balance_args *bargs;
struct btrfs_balance_control *bctl;
bool need_unlock; /* for mut. excl. ops lock */
bool need_unlock = true;
int ret;
if (!capable(CAP_SYS_ADMIN))
@@ -4378,53 +4427,12 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
goto out;
}
again:
if (btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
mutex_lock(&fs_info->balance_mutex);
need_unlock = true;
goto locked;
}
/*
* mut. excl. ops lock is locked. Three possibilities:
* (1) some other op is running
* (2) balance is running
* (3) balance is paused -- special case (think resume)
*/
mutex_lock(&fs_info->balance_mutex);
if (fs_info->balance_ctl) {
/* this is either (2) or (3) */
if (!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
mutex_unlock(&fs_info->balance_mutex);
/*
* Lock released to allow other waiters to continue,
* we'll reexamine the status again.
*/
mutex_lock(&fs_info->balance_mutex);
if (fs_info->balance_ctl &&
!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
/* this is (3) */
need_unlock = false;
goto locked;
}
mutex_unlock(&fs_info->balance_mutex);
goto again;
} else {
/* this is (2) */
mutex_unlock(&fs_info->balance_mutex);
ret = -EINPROGRESS;
goto out;
}
} else {
/* this is (1) */
mutex_unlock(&fs_info->balance_mutex);
ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
ret = btrfs_try_lock_balance(fs_info, &need_unlock);
if (ret)
goto out;
}
locked:
lockdep_assert_held(&fs_info->balance_mutex);
if (bargs->flags & BTRFS_BALANCE_RESUME) {
if (!fs_info->balance_ctl) {
ret = -ENOTCONN;

Some files were not shown because too many files have changed in this diff Show More