mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-05 18:41:58 +09:00
Merge 526942b813 ("Merge tag 'ata-5.20-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/libata") into android-mainline
Steps on the way to 6.0-rc1 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: Ia5d8d07975a1d38e7cc872e45bd7c0e84d54cfa3
This commit is contained in:
@@ -97,7 +97,7 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
|
||||
=============
|
||||
|
||||
Page Cache is charged at
|
||||
- add_to_page_cache_locked().
|
||||
- filemap_add_folio().
|
||||
|
||||
The logic is very clear. (About migration, see below)
|
||||
|
||||
|
||||
@@ -184,6 +184,14 @@ cgroup v2 currently supports the following mount options.
|
||||
ignored on non-init namespace mounts. Please refer to the
|
||||
Delegation section for details.
|
||||
|
||||
favordynmods
|
||||
Reduce the latencies of dynamic cgroup modifications such as
|
||||
task migrations and controller on/offs at the cost of making
|
||||
hot path operations such as forks and exits more expensive.
|
||||
The static usage pattern of creating a cgroup, enabling
|
||||
controllers, and then seeding it with CLONE_INTO_CGROUP is
|
||||
not affected by this option.
|
||||
|
||||
memory_localevents
|
||||
Only populate memory.events with data for the current cgroup,
|
||||
and not any subtrees. This is legacy behaviour, the default
|
||||
|
||||
@@ -17,6 +17,9 @@ solution to the problem to avoid everybody inventing their own. The IDR
|
||||
provides the ability to map an ID to a pointer, while the IDA provides
|
||||
only ID allocation, and as a result is much more memory-efficient.
|
||||
|
||||
The IDR interface is deprecated; please use the :doc:`XArray <xarray>`
|
||||
instead.
|
||||
|
||||
IDR usage
|
||||
=========
|
||||
|
||||
|
||||
@@ -59,8 +59,6 @@ acl Enable POSIX Access Control Lists support
|
||||
(requires CONFIG_EXT2_FS_POSIX_ACL).
|
||||
noacl Don't support POSIX ACLs.
|
||||
|
||||
nobh Do not attach buffer_heads to file pagecache.
|
||||
|
||||
quota, usrquota Enable user disk quota support
|
||||
(requires CONFIG_QUOTA).
|
||||
|
||||
|
||||
@@ -252,9 +252,8 @@ prototypes::
|
||||
bool (*release_folio)(struct folio *, gfp_t);
|
||||
void (*free_folio)(struct folio *);
|
||||
int (*direct_IO)(struct kiocb *, struct iov_iter *iter);
|
||||
bool (*isolate_page) (struct page *, isolate_mode_t);
|
||||
int (*migratepage)(struct address_space *, struct page *, struct page *);
|
||||
void (*putback_page) (struct page *);
|
||||
int (*migrate_folio)(struct address_space *, struct folio *dst,
|
||||
struct folio *src, enum migrate_mode);
|
||||
int (*launder_folio)(struct folio *);
|
||||
bool (*is_partially_uptodate)(struct folio *, size_t from, size_t count);
|
||||
int (*error_remove_page)(struct address_space *, struct page *);
|
||||
@@ -280,9 +279,7 @@ invalidate_folio: yes exclusive
|
||||
release_folio: yes
|
||||
free_folio: yes
|
||||
direct_IO:
|
||||
isolate_page: yes
|
||||
migratepage: yes (both)
|
||||
putback_page: yes
|
||||
migrate_folio: yes (both)
|
||||
launder_folio: yes
|
||||
is_partially_uptodate: yes
|
||||
error_remove_page: yes
|
||||
|
||||
@@ -914,3 +914,11 @@ Calling conventions for file_open_root() changed; now it takes struct path *
|
||||
instead of passing mount and dentry separately. For callers that used to
|
||||
pass <mnt, mnt->mnt_root> pair (i.e. the root of given mount), a new helper
|
||||
is provided - file_open_root_mnt(). In-tree users adjusted.
|
||||
|
||||
---
|
||||
|
||||
**mandatory**
|
||||
|
||||
no_llseek is gone; don't set .llseek to that - just leave it NULL instead.
|
||||
Checks for "does that file have llseek(2), or should it fail with ESPIPE"
|
||||
should be done by looking at FMODE_LSEEK in file->f_mode.
|
||||
|
||||
@@ -737,12 +737,8 @@ cache in your filesystem. The following members are defined:
|
||||
bool (*release_folio)(struct folio *, gfp_t);
|
||||
void (*free_folio)(struct folio *);
|
||||
ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter);
|
||||
/* isolate a page for migration */
|
||||
bool (*isolate_page) (struct page *, isolate_mode_t);
|
||||
/* migrate the contents of a page to the specified target */
|
||||
int (*migratepage) (struct page *, struct page *);
|
||||
/* put migration-failed page back to right list */
|
||||
void (*putback_page) (struct page *);
|
||||
int (*migrate_folio)(struct mapping *, struct folio *dst,
|
||||
struct folio *src, enum migrate_mode);
|
||||
int (*launder_folio) (struct folio *);
|
||||
|
||||
bool (*is_partially_uptodate) (struct folio *, size_t from,
|
||||
@@ -774,13 +770,38 @@ cache in your filesystem. The following members are defined:
|
||||
See the file "Locking" for more details.
|
||||
|
||||
``read_folio``
|
||||
called by the VM to read a folio from backing store. The folio
|
||||
will be locked when read_folio is called, and should be unlocked
|
||||
and marked uptodate once the read completes. If ->read_folio
|
||||
discovers that it cannot perform the I/O at this time, it can
|
||||
unlock the folio and return AOP_TRUNCATED_PAGE. In this case,
|
||||
the folio will be looked up again, relocked and if that all succeeds,
|
||||
->read_folio will be called again.
|
||||
Called by the page cache to read a folio from the backing store.
|
||||
The 'file' argument supplies authentication information to network
|
||||
filesystems, and is generally not used by block based filesystems.
|
||||
It may be NULL if the caller does not have an open file (eg if
|
||||
the kernel is performing a read for itself rather than on behalf
|
||||
of a userspace process with an open file).
|
||||
|
||||
If the mapping does not support large folios, the folio will
|
||||
contain a single page. The folio will be locked when read_folio
|
||||
is called. If the read completes successfully, the folio should
|
||||
be marked uptodate. The filesystem should unlock the folio
|
||||
once the read has completed, whether it was successful or not.
|
||||
The filesystem does not need to modify the refcount on the folio;
|
||||
the page cache holds a reference count and that will not be
|
||||
released until the folio is unlocked.
|
||||
|
||||
Filesystems may implement ->read_folio() synchronously.
|
||||
In normal operation, folios are read through the ->readahead()
|
||||
method. Only if this fails, or if the caller needs to wait for
|
||||
the read to complete will the page cache call ->read_folio().
|
||||
Filesystems should not attempt to perform their own readahead
|
||||
in the ->read_folio() operation.
|
||||
|
||||
If the filesystem cannot perform the read at this time, it can
|
||||
unlock the folio, do whatever action it needs to ensure that the
|
||||
read will succeed in the future and return AOP_TRUNCATED_PAGE.
|
||||
In this case, the caller should look up the folio, lock it,
|
||||
and call ->read_folio again.
|
||||
|
||||
Callers may invoke the ->read_folio() method directly, but using
|
||||
read_mapping_folio() will take care of locking, waiting for the
|
||||
read to complete and handle cases such as AOP_TRUNCATED_PAGE.
|
||||
|
||||
``writepages``
|
||||
called by the VM to write out pages associated with the
|
||||
@@ -905,20 +926,12 @@ cache in your filesystem. The following members are defined:
|
||||
data directly between the storage and the application's address
|
||||
space.
|
||||
|
||||
``isolate_page``
|
||||
Called by the VM when isolating a movable non-lru page. If page
|
||||
is successfully isolated, VM marks the page as PG_isolated via
|
||||
__SetPageIsolated.
|
||||
|
||||
``migrate_page``
|
||||
``migrate_folio``
|
||||
This is used to compact the physical memory usage. If the VM
|
||||
wants to relocate a page (maybe off a memory card that is
|
||||
signalling imminent failure) it will pass a new page and an old
|
||||
page to this function. migrate_page should transfer any private
|
||||
data across and update any references that it has to the page.
|
||||
|
||||
``putback_page``
|
||||
Called by the VM when isolated page's migration fails.
|
||||
wants to relocate a folio (maybe from a memory device that is
|
||||
signalling imminent failure) it will pass a new folio and an old
|
||||
folio to this function. migrate_folio should transfer any private
|
||||
data across and update any references that it has to the folio.
|
||||
|
||||
``launder_folio``
|
||||
Called before freeing a folio - it writes back the dirty folio.
|
||||
|
||||
@@ -152,110 +152,15 @@ Steps:
|
||||
Non-LRU page migration
|
||||
======================
|
||||
|
||||
Although migration originally aimed for reducing the latency of memory accesses
|
||||
for NUMA, compaction also uses migration to create high-order pages.
|
||||
Although migration originally aimed for reducing the latency of memory
|
||||
accesses for NUMA, compaction also uses migration to create high-order
|
||||
pages. For compaction purposes, it is also useful to be able to move
|
||||
non-LRU pages, such as zsmalloc and virtio-balloon pages.
|
||||
|
||||
Current problem of the implementation is that it is designed to migrate only
|
||||
*LRU* pages. However, there are potential non-LRU pages which can be migrated
|
||||
in drivers, for example, zsmalloc, virtio-balloon pages.
|
||||
|
||||
For virtio-balloon pages, some parts of migration code path have been hooked
|
||||
up and added virtio-balloon specific functions to intercept migration logics.
|
||||
It's too specific to a driver so other drivers who want to make their pages
|
||||
movable would have to add their own specific hooks in the migration path.
|
||||
|
||||
To overcome the problem, VM supports non-LRU page migration which provides
|
||||
generic functions for non-LRU movable pages without driver specific hooks
|
||||
in the migration path.
|
||||
|
||||
If a driver wants to make its pages movable, it should define three functions
|
||||
which are function pointers of struct address_space_operations.
|
||||
|
||||
1. ``bool (*isolate_page) (struct page *page, isolate_mode_t mode);``
|
||||
|
||||
What VM expects from isolate_page() function of driver is to return *true*
|
||||
if driver isolates the page successfully. On returning true, VM marks the page
|
||||
as PG_isolated so concurrent isolation in several CPUs skip the page
|
||||
for isolation. If a driver cannot isolate the page, it should return *false*.
|
||||
|
||||
Once page is successfully isolated, VM uses page.lru fields so driver
|
||||
shouldn't expect to preserve values in those fields.
|
||||
|
||||
2. ``int (*migratepage) (struct address_space *mapping,``
|
||||
| ``struct page *newpage, struct page *oldpage, enum migrate_mode);``
|
||||
|
||||
After isolation, VM calls migratepage() of driver with the isolated page.
|
||||
The function of migratepage() is to move the contents of the old page to the
|
||||
new page
|
||||
and set up fields of struct page newpage. Keep in mind that you should
|
||||
indicate to the VM the oldpage is no longer movable via __ClearPageMovable()
|
||||
under page_lock if you migrated the oldpage successfully and returned
|
||||
MIGRATEPAGE_SUCCESS. If driver cannot migrate the page at the moment, driver
|
||||
can return -EAGAIN. On -EAGAIN, VM will retry page migration in a short time
|
||||
because VM interprets -EAGAIN as "temporary migration failure". On returning
|
||||
any error except -EAGAIN, VM will give up the page migration without
|
||||
retrying.
|
||||
|
||||
Driver shouldn't touch the page.lru field while in the migratepage() function.
|
||||
|
||||
3. ``void (*putback_page)(struct page *);``
|
||||
|
||||
If migration fails on the isolated page, VM should return the isolated page
|
||||
to the driver so VM calls the driver's putback_page() with the isolated page.
|
||||
In this function, the driver should put the isolated page back into its own data
|
||||
structure.
|
||||
|
||||
Non-LRU movable page flags
|
||||
|
||||
There are two page flags for supporting non-LRU movable page.
|
||||
|
||||
* PG_movable
|
||||
|
||||
Driver should use the function below to make page movable under page_lock::
|
||||
|
||||
void __SetPageMovable(struct page *page, struct address_space *mapping)
|
||||
|
||||
It needs argument of address_space for registering migration
|
||||
family functions which will be called by VM. Exactly speaking,
|
||||
PG_movable is not a real flag of struct page. Rather, VM
|
||||
reuses the page->mapping's lower bits to represent it::
|
||||
|
||||
#define PAGE_MAPPING_MOVABLE 0x2
|
||||
page->mapping = page->mapping | PAGE_MAPPING_MOVABLE;
|
||||
|
||||
so driver shouldn't access page->mapping directly. Instead, driver should
|
||||
use page_mapping() which masks off the low two bits of page->mapping under
|
||||
page lock so it can get the right struct address_space.
|
||||
|
||||
For testing of non-LRU movable pages, VM supports __PageMovable() function.
|
||||
However, it doesn't guarantee to identify non-LRU movable pages because
|
||||
the page->mapping field is unified with other variables in struct page.
|
||||
If the driver releases the page after isolation by VM, page->mapping
|
||||
doesn't have a stable value although it has PAGE_MAPPING_MOVABLE set
|
||||
(look at __ClearPageMovable). But __PageMovable() is cheap to call whether
|
||||
page is LRU or non-LRU movable once the page has been isolated because LRU
|
||||
pages can never have PAGE_MAPPING_MOVABLE set in page->mapping. It is also
|
||||
good for just peeking to test non-LRU movable pages before more expensive
|
||||
checking with lock_page() in pfn scanning to select a victim.
|
||||
|
||||
For guaranteeing non-LRU movable page, VM provides PageMovable() function.
|
||||
Unlike __PageMovable(), PageMovable() validates page->mapping and
|
||||
mapping->a_ops->isolate_page under lock_page(). The lock_page() prevents
|
||||
sudden destroying of page->mapping.
|
||||
|
||||
Drivers using __SetPageMovable() should clear the flag via
|
||||
__ClearMovablePage() under page_lock() before the releasing the page.
|
||||
|
||||
* PG_isolated
|
||||
|
||||
To prevent concurrent isolation among several CPUs, VM marks isolated page
|
||||
as PG_isolated under lock_page(). So if a CPU encounters PG_isolated
|
||||
non-LRU movable page, it can skip it. Driver doesn't need to manipulate the
|
||||
flag because VM will set/clear it automatically. Keep in mind that if the
|
||||
driver sees a PG_isolated page, it means the page has been isolated by the
|
||||
VM so it shouldn't touch the page.lru field.
|
||||
The PG_isolated flag is aliased with the PG_reclaim flag so drivers
|
||||
shouldn't use PG_isolated for its own purposes.
|
||||
If a driver wants to make its pages movable, it should define a struct
|
||||
movable_operations. It then needs to call __SetPageMovable() on each
|
||||
page that it may be able to move. This uses the ``page->mapping`` field,
|
||||
so this field is not available for the driver to use for other purposes.
|
||||
|
||||
Monitoring Migration
|
||||
=====================
|
||||
@@ -286,3 +191,5 @@ THP_MIGRATION_FAIL and PGMIGRATE_FAIL to increase.
|
||||
|
||||
Christoph Lameter, May 8, 2006.
|
||||
Minchan Kim, Mar 28, 2016.
|
||||
|
||||
.. kernel-doc:: include/linux/migrate.h
|
||||
|
||||
@@ -29,7 +29,7 @@ Mechanics
|
||||
be selected::
|
||||
|
||||
CONFIG_EFI=y
|
||||
CONFIG_EFI_VARS=y or m # optional
|
||||
CONFIG_EFIVAR_FS=y or m # optional
|
||||
|
||||
- Create a VFAT partition on the disk
|
||||
- Copy the following to the VFAT partition:
|
||||
|
||||
@@ -44,7 +44,6 @@ CONFIG_ARM_CPUIDLE=y
|
||||
CONFIG_VFP=y
|
||||
CONFIG_NEON=y
|
||||
CONFIG_KERNEL_MODE_NEON=y
|
||||
CONFIG_EFI_VARS=m
|
||||
CONFIG_ARM_CRYPTO=y
|
||||
CONFIG_CRYPTO_SHA1_ARM_NEON=m
|
||||
CONFIG_CRYPTO_SHA1_ARM_CE=m
|
||||
|
||||
@@ -24,13 +24,6 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
|
||||
#define arch_efi_call_virt_setup() efi_virtmap_load()
|
||||
#define arch_efi_call_virt_teardown() efi_virtmap_unload()
|
||||
|
||||
#define arch_efi_call_virt(p, f, args...) \
|
||||
({ \
|
||||
efi_##f##_t *__f; \
|
||||
__f = p->f; \
|
||||
__f(args); \
|
||||
})
|
||||
|
||||
#define ARCH_EFI_IRQ_FLAGS_MASK \
|
||||
(PSR_J_BIT | PSR_E_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | \
|
||||
PSR_T_BIT | MODE_MASK)
|
||||
|
||||
@@ -27,12 +27,9 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
|
||||
__efi_fpsimd_begin(); \
|
||||
})
|
||||
|
||||
#undef arch_efi_call_virt
|
||||
#define arch_efi_call_virt(p, f, args...) \
|
||||
({ \
|
||||
efi_##f##_t *__f; \
|
||||
__f = p->f; \
|
||||
__efi_rt_asm_wrapper(__f, #f, args); \
|
||||
})
|
||||
__efi_rt_asm_wrapper((p)->f, #f, args)
|
||||
|
||||
#define arch_efi_call_virt_teardown() \
|
||||
({ \
|
||||
|
||||
@@ -350,8 +350,8 @@ void __init arm64_memblock_init(void)
|
||||
"initrd not fully accessible via the linear mapping -- please check your bootloader ...\n")) {
|
||||
phys_initrd_size = 0;
|
||||
} else {
|
||||
memblock_remove(base, size); /* clear MEMBLOCK_ flags */
|
||||
memblock_add(base, size);
|
||||
memblock_clear_nomap(base, size);
|
||||
memblock_reserve(base, size);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,7 +10,6 @@ CONFIG_SMP=y
|
||||
CONFIG_NR_CPUS=2
|
||||
CONFIG_PREEMPT=y
|
||||
CONFIG_IA64_PALINFO=y
|
||||
CONFIG_EFI_VARS=y
|
||||
CONFIG_BINFMT_MISC=m
|
||||
CONFIG_ACPI_BUTTON=m
|
||||
CONFIG_ACPI_FAN=m
|
||||
|
||||
@@ -21,7 +21,6 @@ CONFIG_IA64_MCA_RECOVERY=y
|
||||
CONFIG_IA64_PALINFO=y
|
||||
CONFIG_KEXEC=y
|
||||
CONFIG_CRASH_DUMP=y
|
||||
CONFIG_EFI_VARS=y
|
||||
CONFIG_BINFMT_MISC=m
|
||||
CONFIG_ACPI_BUTTON=m
|
||||
CONFIG_ACPI_FAN=m
|
||||
|
||||
@@ -18,7 +18,6 @@ CONFIG_HOTPLUG_CPU=y
|
||||
CONFIG_SPARSEMEM_MANUAL=y
|
||||
CONFIG_IA64_MCA_RECOVERY=y
|
||||
CONFIG_IA64_PALINFO=y
|
||||
CONFIG_EFI_VARS=y
|
||||
CONFIG_BINFMT_MISC=m
|
||||
CONFIG_ACPI_BUTTON=m
|
||||
CONFIG_ACPI_FAN=m
|
||||
|
||||
@@ -23,7 +23,6 @@ CONFIG_FORCE_CPEI_RETARGET=y
|
||||
CONFIG_IA64_MCA_RECOVERY=y
|
||||
CONFIG_IA64_PALINFO=y
|
||||
CONFIG_KEXEC=y
|
||||
CONFIG_EFI_VARS=y
|
||||
CONFIG_BINFMT_MISC=m
|
||||
CONFIG_ACPI_BUTTON=m
|
||||
CONFIG_ACPI_FAN=m
|
||||
|
||||
@@ -12,7 +12,6 @@ CONFIG_FLATMEM_MANUAL=y
|
||||
CONFIG_IA64_MCA_RECOVERY=y
|
||||
CONFIG_IA64_PALINFO=y
|
||||
CONFIG_CRASH_DUMP=y
|
||||
CONFIG_EFI_VARS=y
|
||||
CONFIG_BINFMT_MISC=y
|
||||
CONFIG_HOTPLUG_PCI=y
|
||||
CONFIG_HOTPLUG_PCI_ACPI=y
|
||||
|
||||
@@ -13,20 +13,8 @@ void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
|
||||
|
||||
#define ARCH_EFI_IRQ_FLAGS_MASK 0x00000004 /* Bit 2: CSR.CRMD.IE */
|
||||
|
||||
#define arch_efi_call_virt_setup() \
|
||||
({ \
|
||||
})
|
||||
|
||||
#define arch_efi_call_virt(p, f, args...) \
|
||||
({ \
|
||||
efi_##f##_t * __f; \
|
||||
__f = p->f; \
|
||||
__f(args); \
|
||||
})
|
||||
|
||||
#define arch_efi_call_virt_teardown() \
|
||||
({ \
|
||||
})
|
||||
#define arch_efi_call_virt_setup()
|
||||
#define arch_efi_call_virt_teardown()
|
||||
|
||||
#define EFI_ALLOC_ALIGN SZ_64K
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ void flush_kernel_icache_range_asm(unsigned long, unsigned long);
|
||||
void flush_user_dcache_range_asm(unsigned long, unsigned long);
|
||||
void flush_kernel_dcache_range_asm(unsigned long, unsigned long);
|
||||
void purge_kernel_dcache_range_asm(unsigned long, unsigned long);
|
||||
void flush_kernel_dcache_page_asm(void *);
|
||||
void flush_kernel_dcache_page_asm(const void *addr);
|
||||
void flush_kernel_icache_page(void *);
|
||||
|
||||
/* Cache flush operations */
|
||||
@@ -31,7 +31,7 @@ void flush_cache_all_local(void);
|
||||
void flush_cache_all(void);
|
||||
void flush_cache_mm(struct mm_struct *mm);
|
||||
|
||||
void flush_kernel_dcache_page_addr(void *addr);
|
||||
void flush_kernel_dcache_page_addr(const void *addr);
|
||||
|
||||
#define flush_kernel_dcache_range(start,size) \
|
||||
flush_kernel_dcache_range_asm((start), (start)+(size));
|
||||
@@ -75,7 +75,7 @@ void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr);
|
||||
void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr);
|
||||
|
||||
#define ARCH_HAS_FLUSH_ON_KUNMAP
|
||||
static inline void kunmap_flush_on_unmap(void *addr)
|
||||
static inline void kunmap_flush_on_unmap(const void *addr)
|
||||
{
|
||||
flush_kernel_dcache_page_addr(addr);
|
||||
}
|
||||
|
||||
@@ -549,7 +549,7 @@ extern void purge_kernel_dcache_page_asm(unsigned long);
|
||||
extern void clear_user_page_asm(void *, unsigned long);
|
||||
extern void copy_user_page_asm(void *, void *, unsigned long);
|
||||
|
||||
void flush_kernel_dcache_page_addr(void *addr)
|
||||
void flush_kernel_dcache_page_addr(const void *addr)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
|
||||
@@ -348,7 +348,7 @@ copy_mc_to_kernel(void *to, const void *from, unsigned long size)
|
||||
static inline unsigned long __must_check
|
||||
copy_mc_to_user(void __user *to, const void *from, unsigned long n)
|
||||
{
|
||||
if (likely(check_copy_size(from, n, true))) {
|
||||
if (check_copy_size(from, n, true)) {
|
||||
if (access_ok(to, n)) {
|
||||
allow_write_to_user(to, n);
|
||||
n = copy_mc_generic((void *)to, from, n);
|
||||
|
||||
@@ -19,9 +19,6 @@
|
||||
#include <linux/stringify.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/pseudo_fs.h>
|
||||
#include <linux/magic.h>
|
||||
#include <linux/balloon_compaction.h>
|
||||
#include <asm/firmware.h>
|
||||
#include <asm/hvcall.h>
|
||||
@@ -500,19 +497,6 @@ static struct notifier_block cmm_mem_nb = {
|
||||
};
|
||||
|
||||
#ifdef CONFIG_BALLOON_COMPACTION
|
||||
static struct vfsmount *balloon_mnt;
|
||||
|
||||
static int cmm_init_fs_context(struct fs_context *fc)
|
||||
{
|
||||
return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM;
|
||||
}
|
||||
|
||||
static struct file_system_type balloon_fs = {
|
||||
.name = "ppc-cmm",
|
||||
.init_fs_context = cmm_init_fs_context,
|
||||
.kill_sb = kill_anon_super,
|
||||
};
|
||||
|
||||
static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
|
||||
struct page *newpage, struct page *page,
|
||||
enum migrate_mode mode)
|
||||
@@ -564,47 +548,13 @@ static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
|
||||
return MIGRATEPAGE_SUCCESS;
|
||||
}
|
||||
|
||||
static int cmm_balloon_compaction_init(void)
|
||||
static void cmm_balloon_compaction_init(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
balloon_devinfo_init(&b_dev_info);
|
||||
b_dev_info.migratepage = cmm_migratepage;
|
||||
|
||||
balloon_mnt = kern_mount(&balloon_fs);
|
||||
if (IS_ERR(balloon_mnt)) {
|
||||
rc = PTR_ERR(balloon_mnt);
|
||||
balloon_mnt = NULL;
|
||||
return rc;
|
||||
}
|
||||
|
||||
b_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
|
||||
if (IS_ERR(b_dev_info.inode)) {
|
||||
rc = PTR_ERR(b_dev_info.inode);
|
||||
b_dev_info.inode = NULL;
|
||||
kern_unmount(balloon_mnt);
|
||||
balloon_mnt = NULL;
|
||||
return rc;
|
||||
}
|
||||
|
||||
b_dev_info.inode->i_mapping->a_ops = &balloon_aops;
|
||||
return 0;
|
||||
}
|
||||
static void cmm_balloon_compaction_deinit(void)
|
||||
{
|
||||
if (b_dev_info.inode)
|
||||
iput(b_dev_info.inode);
|
||||
b_dev_info.inode = NULL;
|
||||
kern_unmount(balloon_mnt);
|
||||
balloon_mnt = NULL;
|
||||
}
|
||||
#else /* CONFIG_BALLOON_COMPACTION */
|
||||
static int cmm_balloon_compaction_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cmm_balloon_compaction_deinit(void)
|
||||
static void cmm_balloon_compaction_init(void)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_BALLOON_COMPACTION */
|
||||
@@ -622,9 +572,7 @@ static int cmm_init(void)
|
||||
if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
rc = cmm_balloon_compaction_init();
|
||||
if (rc)
|
||||
return rc;
|
||||
cmm_balloon_compaction_init();
|
||||
|
||||
rc = register_oom_notifier(&cmm_oom_nb);
|
||||
if (rc < 0)
|
||||
@@ -658,7 +606,6 @@ out_reboot_notifier:
|
||||
out_oom_notifier:
|
||||
unregister_oom_notifier(&cmm_oom_nb);
|
||||
out_balloon_compaction:
|
||||
cmm_balloon_compaction_deinit();
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -677,7 +624,6 @@ static void cmm_exit(void)
|
||||
unregister_memory_notifier(&cmm_mem_nb);
|
||||
cmm_free_pages(atomic_long_read(&loaned_pages));
|
||||
cmm_unregister_sysfs(&cmm_dev);
|
||||
cmm_balloon_compaction_deinit();
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -23,8 +23,6 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
|
||||
#define arch_efi_call_virt_setup() efi_virtmap_load()
|
||||
#define arch_efi_call_virt_teardown() efi_virtmap_unload()
|
||||
|
||||
#define arch_efi_call_virt(p, f, args...) p->f(args)
|
||||
|
||||
#define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE)
|
||||
|
||||
/* Load initrd anywhere in system RAM */
|
||||
|
||||
@@ -39,7 +39,7 @@ _copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned
|
||||
static __always_inline unsigned long __must_check
|
||||
copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key)
|
||||
{
|
||||
if (likely(check_copy_size(to, n, false)))
|
||||
if (check_copy_size(to, n, false))
|
||||
n = _copy_from_user_key(to, from, n, key);
|
||||
return n;
|
||||
}
|
||||
@@ -50,7 +50,7 @@ _copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned l
|
||||
static __always_inline unsigned long __must_check
|
||||
copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned long key)
|
||||
{
|
||||
if (likely(check_copy_size(from, n, true)))
|
||||
if (check_copy_size(from, n, true))
|
||||
n = _copy_to_user_key(to, from, n, key);
|
||||
return n;
|
||||
}
|
||||
|
||||
@@ -135,7 +135,6 @@ CONFIG_DEVTMPFS=y
|
||||
CONFIG_DEVTMPFS_MOUNT=y
|
||||
CONFIG_DEBUG_DEVRES=y
|
||||
CONFIG_CONNECTOR=y
|
||||
CONFIG_EFI_VARS=y
|
||||
CONFIG_EFI_CAPSULE_LOADER=y
|
||||
CONFIG_BLK_DEV_LOOP=y
|
||||
CONFIG_VIRTIO_BLK=y
|
||||
|
||||
@@ -134,7 +134,6 @@ CONFIG_DEVTMPFS=y
|
||||
CONFIG_DEVTMPFS_MOUNT=y
|
||||
CONFIG_DEBUG_DEVRES=y
|
||||
CONFIG_CONNECTOR=y
|
||||
CONFIG_EFI_VARS=y
|
||||
CONFIG_BLK_DEV_LOOP=y
|
||||
CONFIG_VIRTIO_BLK=y
|
||||
CONFIG_BLK_DEV_SD=y
|
||||
|
||||
@@ -100,8 +100,6 @@ static inline void efi_fpu_end(void)
|
||||
efi_fpu_end(); \
|
||||
})
|
||||
|
||||
#define arch_efi_call_virt(p, f, args...) p->f(args)
|
||||
|
||||
#else /* !CONFIG_X86_32 */
|
||||
|
||||
#define EFI_LOADER_SIGNATURE "EL64"
|
||||
@@ -121,6 +119,7 @@ extern asmlinkage u64 __efi_call(void *fp, ...);
|
||||
efi_enter_mm(); \
|
||||
})
|
||||
|
||||
#undef arch_efi_call_virt
|
||||
#define arch_efi_call_virt(p, f, args...) ({ \
|
||||
u64 ret, ibt = ibt_save(); \
|
||||
ret = efi_call((void *)p->f, args); \
|
||||
@@ -383,7 +382,6 @@ static inline bool efi_is_64bit(void)
|
||||
extern bool efi_reboot_required(void);
|
||||
extern bool efi_is_table_address(unsigned long phys_addr);
|
||||
|
||||
extern void efi_find_mirror(void);
|
||||
extern void efi_reserve_boot_services(void);
|
||||
#else
|
||||
static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {}
|
||||
@@ -395,9 +393,6 @@ static inline bool efi_is_table_address(unsigned long phys_addr)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline void efi_find_mirror(void)
|
||||
{
|
||||
}
|
||||
static inline void efi_reserve_boot_services(void)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -108,29 +108,6 @@ static int __init setup_add_efi_memmap(char *arg)
|
||||
}
|
||||
early_param("add_efi_memmap", setup_add_efi_memmap);
|
||||
|
||||
void __init efi_find_mirror(void)
|
||||
{
|
||||
efi_memory_desc_t *md;
|
||||
u64 mirror_size = 0, total_size = 0;
|
||||
|
||||
if (!efi_enabled(EFI_MEMMAP))
|
||||
return;
|
||||
|
||||
for_each_efi_memory_desc(md) {
|
||||
unsigned long long start = md->phys_addr;
|
||||
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
|
||||
|
||||
total_size += size;
|
||||
if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
|
||||
memblock_mark_mirror(start, size);
|
||||
mirror_size += size;
|
||||
}
|
||||
}
|
||||
if (mirror_size)
|
||||
pr_info("Memory: %lldM/%lldM mirrored memory\n",
|
||||
mirror_size>>20, total_size>>20);
|
||||
}
|
||||
|
||||
/*
|
||||
* Tell the kernel about the EFI memory map. This might include
|
||||
* more than the max 128 entries that can fit in the passed in e820
|
||||
|
||||
@@ -37,7 +37,7 @@ static blk_opf_t dio_bio_write_op(struct kiocb *iocb)
|
||||
blk_opf_t opf = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
|
||||
|
||||
/* avoid the need for a I/O completion work item */
|
||||
if (iocb->ki_flags & IOCB_DSYNC)
|
||||
if (iocb_is_dsync(iocb))
|
||||
opf |= REQ_FUA;
|
||||
return opf;
|
||||
}
|
||||
@@ -421,7 +421,7 @@ const struct address_space_operations def_blk_aops = {
|
||||
.write_end = blkdev_write_end,
|
||||
.writepages = blkdev_writepages,
|
||||
.direct_IO = blkdev_direct_IO,
|
||||
.migratepage = buffer_migrate_page_norefs,
|
||||
.migrate_folio = buffer_migrate_folio_norefs,
|
||||
.is_dirty_writeback = buffer_check_dirty_writeback,
|
||||
};
|
||||
|
||||
|
||||
@@ -24,13 +24,13 @@ struct parsed_partitions {
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
struct page *v;
|
||||
struct folio *v;
|
||||
} Sector;
|
||||
|
||||
void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p);
|
||||
static inline void put_dev_sector(Sector p)
|
||||
{
|
||||
put_page(p.v);
|
||||
folio_put(p.v);
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
||||
@@ -704,25 +704,19 @@ EXPORT_SYMBOL_GPL(bdev_disk_changed);
|
||||
void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p)
|
||||
{
|
||||
struct address_space *mapping = state->disk->part0->bd_inode->i_mapping;
|
||||
struct page *page;
|
||||
struct folio *folio;
|
||||
|
||||
if (n >= get_capacity(state->disk)) {
|
||||
state->access_beyond_eod = true;
|
||||
return NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
page = read_mapping_page(mapping,
|
||||
(pgoff_t)(n >> (PAGE_SHIFT - 9)), NULL);
|
||||
if (IS_ERR(page))
|
||||
folio = read_mapping_folio(mapping, n >> PAGE_SECTORS_SHIFT, NULL);
|
||||
if (IS_ERR(folio))
|
||||
goto out;
|
||||
if (PageError(page))
|
||||
goto out_put_page;
|
||||
|
||||
p->v = page;
|
||||
return (unsigned char *)page_address(page) +
|
||||
((n & ((1 << (PAGE_SHIFT - 9)) - 1)) << SECTOR_SHIFT);
|
||||
out_put_page:
|
||||
put_page(page);
|
||||
p->v = folio;
|
||||
return folio_address(folio) + offset_in_folio(folio, n * SECTOR_SIZE);
|
||||
out:
|
||||
p->v = NULL;
|
||||
return NULL;
|
||||
|
||||
@@ -572,6 +572,21 @@ source "drivers/acpi/pmic/Kconfig"
|
||||
config ACPI_VIOT
|
||||
bool
|
||||
|
||||
config ACPI_PRMT
|
||||
bool "Platform Runtime Mechanism Support"
|
||||
depends on EFI && (X86_64 || ARM64)
|
||||
default y
|
||||
help
|
||||
Platform Runtime Mechanism (PRM) is a firmware interface exposing a
|
||||
set of binary executables that can be called from the AML interpreter
|
||||
or directly from device drivers.
|
||||
|
||||
Say Y to enable the AML interpreter to execute the PRM code.
|
||||
|
||||
While this feature is optional in principle, leaving it out may
|
||||
substantially increase computational overhead related to the
|
||||
initialization of some server systems.
|
||||
|
||||
endif # ACPI
|
||||
|
||||
config X86_PM_TIMER
|
||||
@@ -589,18 +604,3 @@ config X86_PM_TIMER
|
||||
|
||||
You should nearly always say Y here because many modern
|
||||
systems require this timer.
|
||||
|
||||
config ACPI_PRMT
|
||||
bool "Platform Runtime Mechanism Support"
|
||||
depends on EFI && X86_64
|
||||
default y
|
||||
help
|
||||
Platform Runtime Mechanism (PRM) is a firmware interface exposing a
|
||||
set of binary executables that can be called from the AML interpreter
|
||||
or directly from device drivers.
|
||||
|
||||
Say Y to enable the AML interpreter to execute the PRM code.
|
||||
|
||||
While this feature is optional in principle, leaving it out may
|
||||
substantially increase computational overhead related to the
|
||||
initialization of some server systems.
|
||||
|
||||
@@ -53,7 +53,7 @@ static LIST_HEAD(prm_module_list);
|
||||
|
||||
struct prm_handler_info {
|
||||
guid_t guid;
|
||||
u64 handler_addr;
|
||||
void *handler_addr;
|
||||
u64 static_data_buffer_addr;
|
||||
u64 acpi_param_buffer_addr;
|
||||
|
||||
@@ -148,7 +148,7 @@ acpi_parse_prmt(union acpi_subtable_headers *header, const unsigned long end)
|
||||
th = &tm->handlers[cur_handler];
|
||||
|
||||
guid_copy(&th->guid, (guid_t *)handler_info->handler_guid);
|
||||
th->handler_addr = efi_pa_va_lookup(handler_info->handler_address);
|
||||
th->handler_addr = (void *)efi_pa_va_lookup(handler_info->handler_address);
|
||||
th->static_data_buffer_addr = efi_pa_va_lookup(handler_info->static_data_buffer_address);
|
||||
th->acpi_param_buffer_addr = efi_pa_va_lookup(handler_info->acpi_param_buffer_address);
|
||||
} while (++cur_handler < tm->handler_count && (handler_info = get_next_handler(handler_info)));
|
||||
|
||||
@@ -480,10 +480,10 @@ static int ata_dev_get_GTF(struct ata_device *dev, struct ata_acpi_gtf **gtf)
|
||||
* RETURNS:
|
||||
* Determined xfermask.
|
||||
*/
|
||||
unsigned long ata_acpi_gtm_xfermask(struct ata_device *dev,
|
||||
const struct ata_acpi_gtm *gtm)
|
||||
unsigned int ata_acpi_gtm_xfermask(struct ata_device *dev,
|
||||
const struct ata_acpi_gtm *gtm)
|
||||
{
|
||||
unsigned long xfer_mask = 0;
|
||||
unsigned int xfer_mask = 0;
|
||||
unsigned int type;
|
||||
int unit;
|
||||
u8 mode;
|
||||
@@ -525,7 +525,7 @@ int ata_acpi_cbl_80wire(struct ata_port *ap, const struct ata_acpi_gtm *gtm)
|
||||
struct ata_device *dev;
|
||||
|
||||
ata_for_each_dev(dev, &ap->link, ENABLED) {
|
||||
unsigned long xfer_mask, udma_mask;
|
||||
unsigned int xfer_mask, udma_mask;
|
||||
|
||||
xfer_mask = ata_acpi_gtm_xfermask(dev, gtm);
|
||||
ata_unpack_xfermask(xfer_mask, NULL, NULL, &udma_mask);
|
||||
|
||||
@@ -93,7 +93,7 @@ struct ata_force_param {
|
||||
const char *name;
|
||||
u8 cbl;
|
||||
u8 spd_limit;
|
||||
unsigned long xfer_mask;
|
||||
unsigned int xfer_mask;
|
||||
unsigned int horkage_on;
|
||||
unsigned int horkage_off;
|
||||
u16 lflags_on;
|
||||
@@ -425,7 +425,7 @@ static void ata_force_xfermask(struct ata_device *dev)
|
||||
|
||||
for (i = ata_force_tbl_size - 1; i >= 0; i--) {
|
||||
const struct ata_force_ent *fe = &ata_force_tbl[i];
|
||||
unsigned long pio_mask, mwdma_mask, udma_mask;
|
||||
unsigned int pio_mask, mwdma_mask, udma_mask;
|
||||
|
||||
if (fe->port != -1 && fe->port != dev->link->ap->print_id)
|
||||
continue;
|
||||
@@ -803,11 +803,11 @@ int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev,
|
||||
* RETURNS:
|
||||
* Packed xfer_mask.
|
||||
*/
|
||||
unsigned long ata_pack_xfermask(unsigned long pio_mask,
|
||||
unsigned long mwdma_mask,
|
||||
unsigned long udma_mask)
|
||||
unsigned int ata_pack_xfermask(unsigned int pio_mask,
|
||||
unsigned int mwdma_mask,
|
||||
unsigned int udma_mask)
|
||||
{
|
||||
return ((pio_mask << ATA_SHIFT_PIO) & ATA_MASK_PIO) |
|
||||
return ((pio_mask << ATA_SHIFT_PIO) & ATA_MASK_PIO) |
|
||||
((mwdma_mask << ATA_SHIFT_MWDMA) & ATA_MASK_MWDMA) |
|
||||
((udma_mask << ATA_SHIFT_UDMA) & ATA_MASK_UDMA);
|
||||
}
|
||||
@@ -823,8 +823,8 @@ EXPORT_SYMBOL_GPL(ata_pack_xfermask);
|
||||
* Unpack @xfer_mask into @pio_mask, @mwdma_mask and @udma_mask.
|
||||
* Any NULL destination masks will be ignored.
|
||||
*/
|
||||
void ata_unpack_xfermask(unsigned long xfer_mask, unsigned long *pio_mask,
|
||||
unsigned long *mwdma_mask, unsigned long *udma_mask)
|
||||
void ata_unpack_xfermask(unsigned int xfer_mask, unsigned int *pio_mask,
|
||||
unsigned int *mwdma_mask, unsigned int *udma_mask)
|
||||
{
|
||||
if (pio_mask)
|
||||
*pio_mask = (xfer_mask & ATA_MASK_PIO) >> ATA_SHIFT_PIO;
|
||||
@@ -857,7 +857,7 @@ static const struct ata_xfer_ent {
|
||||
* RETURNS:
|
||||
* Matching XFER_* value, 0xff if no match found.
|
||||
*/
|
||||
u8 ata_xfer_mask2mode(unsigned long xfer_mask)
|
||||
u8 ata_xfer_mask2mode(unsigned int xfer_mask)
|
||||
{
|
||||
int highbit = fls(xfer_mask) - 1;
|
||||
const struct ata_xfer_ent *ent;
|
||||
@@ -881,7 +881,7 @@ EXPORT_SYMBOL_GPL(ata_xfer_mask2mode);
|
||||
* RETURNS:
|
||||
* Matching xfer_mask, 0 if no match found.
|
||||
*/
|
||||
unsigned long ata_xfer_mode2mask(u8 xfer_mode)
|
||||
unsigned int ata_xfer_mode2mask(u8 xfer_mode)
|
||||
{
|
||||
const struct ata_xfer_ent *ent;
|
||||
|
||||
@@ -930,7 +930,7 @@ EXPORT_SYMBOL_GPL(ata_xfer_mode2shift);
|
||||
* Constant C string representing highest speed listed in
|
||||
* @mode_mask, or the constant C string "<n/a>".
|
||||
*/
|
||||
const char *ata_mode_string(unsigned long xfer_mask)
|
||||
const char *ata_mode_string(unsigned int xfer_mask)
|
||||
{
|
||||
static const char * const xfer_mode_str[] = {
|
||||
"PIO0",
|
||||
@@ -1103,16 +1103,16 @@ static u64 ata_id_n_sectors(const u16 *id)
|
||||
if (ata_id_has_lba(id)) {
|
||||
if (ata_id_has_lba48(id))
|
||||
return ata_id_u64(id, ATA_ID_LBA_CAPACITY_2);
|
||||
else
|
||||
return ata_id_u32(id, ATA_ID_LBA_CAPACITY);
|
||||
} else {
|
||||
if (ata_id_current_chs_valid(id))
|
||||
return id[ATA_ID_CUR_CYLS] * id[ATA_ID_CUR_HEADS] *
|
||||
id[ATA_ID_CUR_SECTORS];
|
||||
else
|
||||
return id[ATA_ID_CYLS] * id[ATA_ID_HEADS] *
|
||||
id[ATA_ID_SECTORS];
|
||||
|
||||
return ata_id_u32(id, ATA_ID_LBA_CAPACITY);
|
||||
}
|
||||
|
||||
if (ata_id_current_chs_valid(id))
|
||||
return (u32)id[ATA_ID_CUR_CYLS] * (u32)id[ATA_ID_CUR_HEADS] *
|
||||
(u32)id[ATA_ID_CUR_SECTORS];
|
||||
|
||||
return (u32)id[ATA_ID_CYLS] * (u32)id[ATA_ID_HEADS] *
|
||||
(u32)id[ATA_ID_SECTORS];
|
||||
}
|
||||
|
||||
u64 ata_tf_to_lba48(const struct ata_taskfile *tf)
|
||||
@@ -1383,9 +1383,9 @@ static inline void ata_dump_id(struct ata_device *dev, const u16 *id)
|
||||
* RETURNS:
|
||||
* Computed xfermask
|
||||
*/
|
||||
unsigned long ata_id_xfermask(const u16 *id)
|
||||
unsigned int ata_id_xfermask(const u16 *id)
|
||||
{
|
||||
unsigned long pio_mask, mwdma_mask, udma_mask;
|
||||
unsigned int pio_mask, mwdma_mask, udma_mask;
|
||||
|
||||
/* Usual case. Word 53 indicates word 64 is valid */
|
||||
if (id[ATA_ID_FIELD_VALID] & (1 << 1)) {
|
||||
@@ -1467,10 +1467,10 @@ static void ata_qc_complete_internal(struct ata_queued_cmd *qc)
|
||||
* RETURNS:
|
||||
* Zero on success, AC_ERR_* mask on failure
|
||||
*/
|
||||
unsigned ata_exec_internal_sg(struct ata_device *dev,
|
||||
struct ata_taskfile *tf, const u8 *cdb,
|
||||
int dma_dir, struct scatterlist *sgl,
|
||||
unsigned int n_elem, unsigned long timeout)
|
||||
static unsigned ata_exec_internal_sg(struct ata_device *dev,
|
||||
struct ata_taskfile *tf, const u8 *cdb,
|
||||
int dma_dir, struct scatterlist *sgl,
|
||||
unsigned int n_elem, unsigned int timeout)
|
||||
{
|
||||
struct ata_link *link = dev->link;
|
||||
struct ata_port *ap = link->ap;
|
||||
@@ -1645,7 +1645,7 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
|
||||
unsigned ata_exec_internal(struct ata_device *dev,
|
||||
struct ata_taskfile *tf, const u8 *cdb,
|
||||
int dma_dir, void *buf, unsigned int buflen,
|
||||
unsigned long timeout)
|
||||
unsigned int timeout)
|
||||
{
|
||||
struct scatterlist *psg = NULL, sg;
|
||||
unsigned int n_elem = 0;
|
||||
@@ -2534,7 +2534,7 @@ int ata_dev_configure(struct ata_device *dev)
|
||||
struct ata_port *ap = dev->link->ap;
|
||||
bool print_info = ata_dev_print_info(dev);
|
||||
const u16 *id = dev->id;
|
||||
unsigned long xfer_mask;
|
||||
unsigned int xfer_mask;
|
||||
unsigned int err_mask;
|
||||
char revbuf[7]; /* XYZ-99\0 */
|
||||
char fwrevbuf[ATA_ID_FW_REV_LEN+1];
|
||||
@@ -3202,8 +3202,8 @@ u8 ata_timing_cycle2mode(unsigned int xfer_shift, int cycle)
|
||||
int ata_down_xfermask_limit(struct ata_device *dev, unsigned int sel)
|
||||
{
|
||||
char buf[32];
|
||||
unsigned long orig_mask, xfer_mask;
|
||||
unsigned long pio_mask, mwdma_mask, udma_mask;
|
||||
unsigned int orig_mask, xfer_mask;
|
||||
unsigned int pio_mask, mwdma_mask, udma_mask;
|
||||
int quiet, highbit;
|
||||
|
||||
quiet = !!(sel & ATA_DNXFER_QUIET);
|
||||
@@ -3381,7 +3381,7 @@ int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev)
|
||||
|
||||
/* step 1: calculate xfer_mask */
|
||||
ata_for_each_dev(dev, link, ENABLED) {
|
||||
unsigned long pio_mask, dma_mask;
|
||||
unsigned int pio_mask, dma_mask;
|
||||
unsigned int mode_mask;
|
||||
|
||||
mode_mask = ATA_DMA_MASK_ATA;
|
||||
@@ -4217,7 +4217,7 @@ static void ata_dev_xfermask(struct ata_device *dev)
|
||||
struct ata_link *link = dev->link;
|
||||
struct ata_port *ap = link->ap;
|
||||
struct ata_host *host = ap->host;
|
||||
unsigned long xfer_mask;
|
||||
unsigned int xfer_mask;
|
||||
|
||||
/* controller modes available */
|
||||
xfer_mask = ata_pack_xfermask(ap->pio_mask,
|
||||
@@ -4342,7 +4342,7 @@ unsigned int ata_dev_set_feature(struct ata_device *dev, u8 enable, u8 feature)
|
||||
{
|
||||
struct ata_taskfile tf;
|
||||
unsigned int err_mask;
|
||||
unsigned long timeout = 0;
|
||||
unsigned int timeout = 0;
|
||||
|
||||
/* set up set-features taskfile */
|
||||
ata_dev_dbg(dev, "set features - SATA features\n");
|
||||
@@ -5776,7 +5776,7 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht)
|
||||
/* set cable, sata_spd_limit and report */
|
||||
for (i = 0; i < host->n_ports; i++) {
|
||||
struct ata_port *ap = host->ports[i];
|
||||
unsigned long xfer_mask;
|
||||
unsigned int xfer_mask;
|
||||
|
||||
/* set SATA cable type if still unset */
|
||||
if (ap->cbl == ATA_CBL_NONE && (ap->flags & ATA_FLAG_SATA))
|
||||
|
||||
@@ -86,36 +86,36 @@ static const unsigned long ata_eh_reset_timeouts[] = {
|
||||
ULONG_MAX, /* > 1 min has elapsed, give up */
|
||||
};
|
||||
|
||||
static const unsigned long ata_eh_identify_timeouts[] = {
|
||||
static const unsigned int ata_eh_identify_timeouts[] = {
|
||||
5000, /* covers > 99% of successes and not too boring on failures */
|
||||
10000, /* combined time till here is enough even for media access */
|
||||
30000, /* for true idiots */
|
||||
ULONG_MAX,
|
||||
UINT_MAX,
|
||||
};
|
||||
|
||||
static const unsigned long ata_eh_revalidate_timeouts[] = {
|
||||
static const unsigned int ata_eh_revalidate_timeouts[] = {
|
||||
15000, /* Some drives are slow to read log pages when waking-up */
|
||||
15000, /* combined time till here is enough even for media access */
|
||||
ULONG_MAX,
|
||||
UINT_MAX,
|
||||
};
|
||||
|
||||
static const unsigned long ata_eh_flush_timeouts[] = {
|
||||
static const unsigned int ata_eh_flush_timeouts[] = {
|
||||
15000, /* be generous with flush */
|
||||
15000, /* ditto */
|
||||
30000, /* and even more generous */
|
||||
ULONG_MAX,
|
||||
UINT_MAX,
|
||||
};
|
||||
|
||||
static const unsigned long ata_eh_other_timeouts[] = {
|
||||
static const unsigned int ata_eh_other_timeouts[] = {
|
||||
5000, /* same rationale as identify timeout */
|
||||
10000, /* ditto */
|
||||
/* but no merciful 30sec for other commands, it just isn't worth it */
|
||||
ULONG_MAX,
|
||||
UINT_MAX,
|
||||
};
|
||||
|
||||
struct ata_eh_cmd_timeout_ent {
|
||||
const u8 *commands;
|
||||
const unsigned long *timeouts;
|
||||
const unsigned int *timeouts;
|
||||
};
|
||||
|
||||
/* The following table determines timeouts to use for EH internal
|
||||
@@ -326,7 +326,7 @@ static int ata_lookup_timeout_table(u8 cmd)
|
||||
* RETURNS:
|
||||
* Determined timeout.
|
||||
*/
|
||||
unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd)
|
||||
unsigned int ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd)
|
||||
{
|
||||
struct ata_eh_context *ehc = &dev->link->eh_context;
|
||||
int ent = ata_lookup_timeout_table(cmd);
|
||||
@@ -361,7 +361,7 @@ void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd)
|
||||
return;
|
||||
|
||||
idx = ehc->cmd_timeout_idx[dev->devno][ent];
|
||||
if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != ULONG_MAX)
|
||||
if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != UINT_MAX)
|
||||
ehc->cmd_timeout_idx[dev->devno][ent]++;
|
||||
}
|
||||
|
||||
@@ -802,11 +802,11 @@ void ata_port_wait_eh(struct ata_port *ap)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ata_port_wait_eh);
|
||||
|
||||
static int ata_eh_nr_in_flight(struct ata_port *ap)
|
||||
static unsigned int ata_eh_nr_in_flight(struct ata_port *ap)
|
||||
{
|
||||
struct ata_queued_cmd *qc;
|
||||
unsigned int tag;
|
||||
int nr = 0;
|
||||
unsigned int nr = 0;
|
||||
|
||||
/* count only non-internal commands */
|
||||
ata_qc_for_each(ap, qc, tag) {
|
||||
@@ -821,7 +821,7 @@ void ata_eh_fastdrain_timerfn(struct timer_list *t)
|
||||
{
|
||||
struct ata_port *ap = from_timer(ap, t, fastdrain_timer);
|
||||
unsigned long flags;
|
||||
int cnt;
|
||||
unsigned int cnt;
|
||||
|
||||
spin_lock_irqsave(ap->lock, flags);
|
||||
|
||||
@@ -870,7 +870,7 @@ void ata_eh_fastdrain_timerfn(struct timer_list *t)
|
||||
*/
|
||||
static void ata_eh_set_pending(struct ata_port *ap, int fastdrain)
|
||||
{
|
||||
int cnt;
|
||||
unsigned int cnt;
|
||||
|
||||
/* already scheduled? */
|
||||
if (ap->pflags & ATA_PFLAG_EH_PENDING)
|
||||
|
||||
@@ -539,13 +539,13 @@ int ata_task_ioctl(struct scsi_device *scsidev, void __user *arg)
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int ata_ioc32(struct ata_port *ap)
|
||||
static bool ata_ioc32(struct ata_port *ap)
|
||||
{
|
||||
if (ap->flags & ATA_FLAG_PIO_DMA)
|
||||
return 1;
|
||||
return true;
|
||||
if (ap->pflags & ATA_PFLAG_PIO32)
|
||||
return 1;
|
||||
return 0;
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
* and various sysfs attributes to expose these topologies and management
|
||||
* interfaces to user-space.
|
||||
*
|
||||
* There are 3 objects defined in in this class:
|
||||
* There are 3 objects defined in this class:
|
||||
* - ata_port
|
||||
* - ata_link
|
||||
* - ata_device
|
||||
|
||||
@@ -52,11 +52,7 @@ extern u64 ata_tf_read_block(const struct ata_taskfile *tf,
|
||||
extern unsigned ata_exec_internal(struct ata_device *dev,
|
||||
struct ata_taskfile *tf, const u8 *cdb,
|
||||
int dma_dir, void *buf, unsigned int buflen,
|
||||
unsigned long timeout);
|
||||
extern unsigned ata_exec_internal_sg(struct ata_device *dev,
|
||||
struct ata_taskfile *tf, const u8 *cdb,
|
||||
int dma_dir, struct scatterlist *sg,
|
||||
unsigned int n_elem, unsigned long timeout);
|
||||
unsigned int timeout);
|
||||
extern int ata_wait_ready(struct ata_link *link, unsigned long deadline,
|
||||
int (*check_ready)(struct ata_link *link));
|
||||
extern int ata_dev_read_id(struct ata_device *dev, unsigned int *p_class,
|
||||
@@ -136,7 +132,7 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev);
|
||||
int __ata_scsi_queuecmd(struct scsi_cmnd *scmd, struct ata_device *dev);
|
||||
|
||||
/* libata-eh.c */
|
||||
extern unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd);
|
||||
extern unsigned int ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd);
|
||||
extern void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd);
|
||||
extern void ata_eh_acquire(struct ata_port *ap);
|
||||
extern void ata_eh_release(struct ata_port *ap);
|
||||
|
||||
@@ -97,7 +97,7 @@ static unsigned long pacpi_discover_modes(struct ata_port *ap, struct ata_device
|
||||
* this case the list of discovered valid modes obtained by ACPI probing
|
||||
*/
|
||||
|
||||
static unsigned long pacpi_mode_filter(struct ata_device *adev, unsigned long mask)
|
||||
static unsigned int pacpi_mode_filter(struct ata_device *adev, unsigned int mask)
|
||||
{
|
||||
struct pata_acpi *acpi = adev->link->ap->private_data;
|
||||
return mask & acpi->mask[adev->devno];
|
||||
|
||||
@@ -115,7 +115,7 @@ static int ali_c2_cable_detect(struct ata_port *ap)
|
||||
* fix that later on. Also ensure we do not do UDMA on WDC drives
|
||||
*/
|
||||
|
||||
static unsigned long ali_20_filter(struct ata_device *adev, unsigned long mask)
|
||||
static unsigned int ali_20_filter(struct ata_device *adev, unsigned int mask)
|
||||
{
|
||||
char model_num[ATA_ID_PROD_LEN + 1];
|
||||
/* No DMA on anything but a disk for now */
|
||||
|
||||
@@ -264,8 +264,8 @@ static void amd133_set_dmamode(struct ata_port *ap, struct ata_device *adev)
|
||||
* cached during driver attach and are consulted to select transfer
|
||||
* mode.
|
||||
*/
|
||||
static unsigned long nv_mode_filter(struct ata_device *dev,
|
||||
unsigned long xfer_mask)
|
||||
static unsigned int nv_mode_filter(struct ata_device *dev,
|
||||
unsigned int xfer_mask)
|
||||
{
|
||||
static const unsigned int udma_mask_map[] =
|
||||
{ ATA_UDMA2, ATA_UDMA1, ATA_UDMA0, 0,
|
||||
@@ -274,7 +274,7 @@ static unsigned long nv_mode_filter(struct ata_device *dev,
|
||||
char acpi_str[32] = "";
|
||||
u32 saved_udma, udma;
|
||||
const struct ata_acpi_gtm *gtm;
|
||||
unsigned long bios_limit = 0, acpi_limit = 0, limit;
|
||||
unsigned int bios_limit = 0, acpi_limit = 0, limit;
|
||||
|
||||
/* find out what BIOS configured */
|
||||
udma = saved_udma = (unsigned long)ap->host->private_data;
|
||||
@@ -310,10 +310,10 @@ static unsigned long nv_mode_filter(struct ata_device *dev,
|
||||
cable detection result */
|
||||
limit |= ata_pack_xfermask(ATA_PIO4, ATA_MWDMA2, ATA_UDMA2);
|
||||
|
||||
ata_port_dbg(ap, "nv_mode_filter: 0x%lx&0x%lx->0x%lx, "
|
||||
"BIOS=0x%lx (0x%x) ACPI=0x%lx%s\n",
|
||||
xfer_mask, limit, xfer_mask & limit, bios_limit,
|
||||
saved_udma, acpi_limit, acpi_str);
|
||||
ata_port_dbg(ap,
|
||||
"nv_mode_filter: 0x%x&0x%x->0x%x, BIOS=0x%x (0x%x) ACPI=0x%x%s\n",
|
||||
xfer_mask, limit, xfer_mask & limit, bios_limit,
|
||||
saved_udma, acpi_limit, acpi_str);
|
||||
|
||||
return xfer_mask & limit;
|
||||
}
|
||||
|
||||
@@ -194,7 +194,7 @@ static int hpt_dma_blacklisted(const struct ata_device *dev, char *modestr,
|
||||
* Block UDMA on devices that cause trouble with this controller.
|
||||
*/
|
||||
|
||||
static unsigned long hpt366_filter(struct ata_device *adev, unsigned long mask)
|
||||
static unsigned int hpt366_filter(struct ata_device *adev, unsigned int mask)
|
||||
{
|
||||
if (adev->class == ATA_DEV_ATA) {
|
||||
if (hpt_dma_blacklisted(adev, "UDMA", bad_ata33))
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
#include <linux/libata.h>
|
||||
|
||||
#define DRV_NAME "pata_hpt37x"
|
||||
#define DRV_VERSION "0.6.25"
|
||||
#define DRV_VERSION "0.6.30"
|
||||
|
||||
struct hpt_clock {
|
||||
u8 xfer_speed;
|
||||
@@ -278,7 +278,7 @@ static const char * const bad_ata100_5[] = {
|
||||
* Block UDMA on devices that cause trouble with this controller.
|
||||
*/
|
||||
|
||||
static unsigned long hpt370_filter(struct ata_device *adev, unsigned long mask)
|
||||
static unsigned int hpt370_filter(struct ata_device *adev, unsigned int mask)
|
||||
{
|
||||
if (adev->class == ATA_DEV_ATA) {
|
||||
if (hpt_dma_blacklisted(adev, "UDMA", bad_ata33))
|
||||
@@ -297,7 +297,7 @@ static unsigned long hpt370_filter(struct ata_device *adev, unsigned long mask)
|
||||
* Block UDMA on devices that cause trouble with this controller.
|
||||
*/
|
||||
|
||||
static unsigned long hpt370a_filter(struct ata_device *adev, unsigned long mask)
|
||||
static unsigned int hpt370a_filter(struct ata_device *adev, unsigned int mask)
|
||||
{
|
||||
if (adev->class == ATA_DEV_ATA) {
|
||||
if (hpt_dma_blacklisted(adev, "UDMA100", bad_ata100_5))
|
||||
@@ -314,7 +314,7 @@ static unsigned long hpt370a_filter(struct ata_device *adev, unsigned long mask)
|
||||
* The Marvell bridge chips used on the HighPoint SATA cards do not seem
|
||||
* to support the UltraDMA modes 1, 2, and 3 as well as any MWDMA modes...
|
||||
*/
|
||||
static unsigned long hpt372_filter(struct ata_device *adev, unsigned long mask)
|
||||
static unsigned int hpt372_filter(struct ata_device *adev, unsigned int mask)
|
||||
{
|
||||
if (ata_id_is_sata(adev->id))
|
||||
mask &= ~((0xE << ATA_SHIFT_UDMA) | ATA_MASK_MWDMA);
|
||||
@@ -592,21 +592,19 @@ static struct ata_port_operations hpt374_fn1_port_ops = {
|
||||
|
||||
/**
|
||||
* hpt37x_clock_slot - Turn timing to PC clock entry
|
||||
* @freq: Reported frequency timing
|
||||
* @base: Base timing
|
||||
* @freq: Reported frequency in MHz
|
||||
*
|
||||
* Turn the timing data intoa clock slot (0 for 33, 1 for 40, 2 for 50
|
||||
* Turn the timing data into a clock slot (0 for 33, 1 for 40, 2 for 50
|
||||
* and 3 for 66Mhz)
|
||||
*/
|
||||
|
||||
static int hpt37x_clock_slot(unsigned int freq, unsigned int base)
|
||||
static int hpt37x_clock_slot(unsigned int freq)
|
||||
{
|
||||
unsigned int f = (base * freq) / 192; /* Mhz */
|
||||
if (f < 40)
|
||||
if (freq < 40)
|
||||
return 0; /* 33Mhz slot */
|
||||
if (f < 45)
|
||||
if (freq < 45)
|
||||
return 1; /* 40Mhz slot */
|
||||
if (f < 55)
|
||||
if (freq < 55)
|
||||
return 2; /* 50Mhz slot */
|
||||
return 3; /* 60Mhz slot */
|
||||
}
|
||||
@@ -646,24 +644,57 @@ static int hpt37x_calibrate_dpll(struct pci_dev *dev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u32 hpt374_read_freq(struct pci_dev *pdev)
|
||||
static int hpt37x_pci_clock(struct pci_dev *pdev, unsigned int base)
|
||||
{
|
||||
u32 freq;
|
||||
unsigned long io_base = pci_resource_start(pdev, 4);
|
||||
unsigned int freq;
|
||||
u32 fcnt;
|
||||
|
||||
if (PCI_FUNC(pdev->devfn) & 1) {
|
||||
struct pci_dev *pdev_0;
|
||||
/*
|
||||
* Some devices do not let this value be accessed via PCI space
|
||||
* according to the old driver. In addition we must use the value
|
||||
* from FN 0 on the HPT374.
|
||||
*/
|
||||
if (pdev->device == PCI_DEVICE_ID_TTI_HPT374 &&
|
||||
(PCI_FUNC(pdev->devfn) & 1)) {
|
||||
struct pci_dev *pdev_fn0;
|
||||
|
||||
pdev_0 = pci_get_slot(pdev->bus, pdev->devfn - 1);
|
||||
/* Someone hot plugged the controller on us ? */
|
||||
if (pdev_0 == NULL)
|
||||
pdev_fn0 = pci_get_slot(pdev->bus, pdev->devfn - 1);
|
||||
/* Someone hot plugged the controller on us? */
|
||||
if (!pdev_fn0)
|
||||
return 0;
|
||||
io_base = pci_resource_start(pdev_0, 4);
|
||||
freq = inl(io_base + 0x90);
|
||||
pci_dev_put(pdev_0);
|
||||
} else
|
||||
freq = inl(io_base + 0x90);
|
||||
return freq;
|
||||
fcnt = inl(pci_resource_start(pdev_fn0, 4) + 0x90);
|
||||
pci_dev_put(pdev_fn0);
|
||||
} else {
|
||||
fcnt = inl(pci_resource_start(pdev, 4) + 0x90);
|
||||
}
|
||||
|
||||
if ((fcnt >> 12) != 0xABCDE) {
|
||||
u32 total = 0;
|
||||
int i;
|
||||
u16 sr;
|
||||
|
||||
dev_warn(&pdev->dev, "BIOS clock data not set\n");
|
||||
|
||||
/* This is the process the HPT371 BIOS is reported to use */
|
||||
for (i = 0; i < 128; i++) {
|
||||
pci_read_config_word(pdev, 0x78, &sr);
|
||||
total += sr & 0x1FF;
|
||||
udelay(15);
|
||||
}
|
||||
fcnt = total / 128;
|
||||
}
|
||||
fcnt &= 0x1FF;
|
||||
|
||||
freq = (fcnt * base) / 192; /* in MHz */
|
||||
|
||||
/* Clamp to bands */
|
||||
if (freq < 40)
|
||||
return 33;
|
||||
if (freq < 45)
|
||||
return 40;
|
||||
if (freq < 55)
|
||||
return 50;
|
||||
return 66;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -770,7 +801,7 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
|
||||
u8 rev = dev->revision;
|
||||
u8 irqmask;
|
||||
u8 mcr1;
|
||||
u32 freq;
|
||||
unsigned int freq; /* MHz */
|
||||
int prefer_dpll = 1;
|
||||
|
||||
unsigned long iobase = pci_resource_start(dev, 4);
|
||||
@@ -896,42 +927,16 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
|
||||
if (chip_table == &hpt372a)
|
||||
outb(0x0e, iobase + 0x9c);
|
||||
|
||||
/*
|
||||
* Some devices do not let this value be accessed via PCI space
|
||||
* according to the old driver. In addition we must use the value
|
||||
* from FN 0 on the HPT374.
|
||||
*/
|
||||
|
||||
if (chip_table == &hpt374) {
|
||||
freq = hpt374_read_freq(dev);
|
||||
if (freq == 0)
|
||||
return -ENODEV;
|
||||
} else
|
||||
freq = inl(iobase + 0x90);
|
||||
|
||||
if ((freq >> 12) != 0xABCDE) {
|
||||
int i;
|
||||
u16 sr;
|
||||
u32 total = 0;
|
||||
|
||||
dev_warn(&dev->dev, "BIOS has not set timing clocks\n");
|
||||
|
||||
/* This is the process the HPT371 BIOS is reported to use */
|
||||
for (i = 0; i < 128; i++) {
|
||||
pci_read_config_word(dev, 0x78, &sr);
|
||||
total += sr & 0x1FF;
|
||||
udelay(15);
|
||||
}
|
||||
freq = total / 128;
|
||||
}
|
||||
freq &= 0x1FF;
|
||||
freq = hpt37x_pci_clock(dev, chip_table->base);
|
||||
if (!freq)
|
||||
return -ENODEV;
|
||||
|
||||
/*
|
||||
* Turn the frequency check into a band and then find a timing
|
||||
* table to match it.
|
||||
*/
|
||||
|
||||
clock_slot = hpt37x_clock_slot(freq, chip_table->base);
|
||||
clock_slot = hpt37x_clock_slot(freq);
|
||||
if (chip_table->clocks[clock_slot] == NULL || prefer_dpll) {
|
||||
/*
|
||||
* We need to try PLL mode instead
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
#include <linux/libata.h>
|
||||
|
||||
#define DRV_NAME "pata_hpt3x2n"
|
||||
#define DRV_VERSION "0.3.18"
|
||||
#define DRV_VERSION "0.3.19"
|
||||
|
||||
enum {
|
||||
PCI66 = (1 << 1),
|
||||
@@ -113,7 +113,7 @@ static u32 hpt3x2n_find_mode(struct ata_port *ap, int speed)
|
||||
* The Marvell bridge chips used on the HighPoint SATA cards do not seem
|
||||
* to support the UltraDMA modes 1, 2, and 3 as well as any MWDMA modes...
|
||||
*/
|
||||
static unsigned long hpt372n_filter(struct ata_device *adev, unsigned long mask)
|
||||
static unsigned int hpt372n_filter(struct ata_device *adev, unsigned int mask)
|
||||
{
|
||||
if (ata_id_is_sata(adev->id))
|
||||
mask &= ~((0xE << ATA_SHIFT_UDMA) | ATA_MASK_MWDMA);
|
||||
@@ -403,17 +403,20 @@ static int hpt3xn_calibrate_dpll(struct pci_dev *dev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int hpt3x2n_pci_clock(struct pci_dev *pdev)
|
||||
static int hpt3x2n_pci_clock(struct pci_dev *pdev, unsigned int base)
|
||||
{
|
||||
unsigned long freq;
|
||||
unsigned int freq;
|
||||
u32 fcnt;
|
||||
unsigned long iobase = pci_resource_start(pdev, 4);
|
||||
|
||||
fcnt = inl(iobase + 0x90); /* Not PCI readable for some chips */
|
||||
/*
|
||||
* Some devices do not let this value be accessed via PCI space
|
||||
* according to the old driver.
|
||||
*/
|
||||
fcnt = inl(pci_resource_start(pdev, 4) + 0x90);
|
||||
if ((fcnt >> 12) != 0xABCDE) {
|
||||
u32 total = 0;
|
||||
int i;
|
||||
u16 sr;
|
||||
u32 total = 0;
|
||||
|
||||
dev_warn(&pdev->dev, "BIOS clock data not set\n");
|
||||
|
||||
@@ -427,7 +430,7 @@ static int hpt3x2n_pci_clock(struct pci_dev *pdev)
|
||||
}
|
||||
fcnt &= 0x1FF;
|
||||
|
||||
freq = (fcnt * 77) / 192;
|
||||
freq = (fcnt * base) / 192; /* in MHz */
|
||||
|
||||
/* Clamp to bands */
|
||||
if (freq < 40)
|
||||
@@ -559,7 +562,7 @@ hpt372n:
|
||||
* 50 for UDMA100. Right now we always use 66
|
||||
*/
|
||||
|
||||
pci_mhz = hpt3x2n_pci_clock(dev);
|
||||
pci_mhz = hpt3x2n_pci_clock(dev, 77);
|
||||
|
||||
f_low = (pci_mhz * 48) / 66; /* PCI Mhz for 66Mhz DPLL */
|
||||
f_high = f_low + 2; /* Tolerance */
|
||||
|
||||
@@ -1028,7 +1028,7 @@ static void pmac_macio_calc_timing_masks(struct pata_macio_priv *priv,
|
||||
}
|
||||
i++;
|
||||
}
|
||||
dev_dbg(priv->dev, "Supported masks: PIO=%lx, MWDMA=%lx, UDMA=%lx\n",
|
||||
dev_dbg(priv->dev, "Supported masks: PIO=%x, MWDMA=%x, UDMA=%x\n",
|
||||
pinfo->pio_mask, pinfo->mwdma_mask, pinfo->udma_mask);
|
||||
}
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@ static int pdc2027x_prereset(struct ata_link *link, unsigned long deadline);
|
||||
static void pdc2027x_set_piomode(struct ata_port *ap, struct ata_device *adev);
|
||||
static void pdc2027x_set_dmamode(struct ata_port *ap, struct ata_device *adev);
|
||||
static int pdc2027x_check_atapi_dma(struct ata_queued_cmd *qc);
|
||||
static unsigned long pdc2027x_mode_filter(struct ata_device *adev, unsigned long mask);
|
||||
static unsigned int pdc2027x_mode_filter(struct ata_device *adev, unsigned int mask);
|
||||
static int pdc2027x_cable_detect(struct ata_port *ap);
|
||||
static int pdc2027x_set_mode(struct ata_link *link, struct ata_device **r_failed);
|
||||
|
||||
@@ -251,7 +251,7 @@ static int pdc2027x_prereset(struct ata_link *link, unsigned long deadline)
|
||||
* Block UDMA on devices that cause trouble with this controller.
|
||||
*/
|
||||
|
||||
static unsigned long pdc2027x_mode_filter(struct ata_device *adev, unsigned long mask)
|
||||
static unsigned int pdc2027x_mode_filter(struct ata_device *adev, unsigned int mask)
|
||||
{
|
||||
unsigned char model_num[ATA_ID_PROD_LEN + 1];
|
||||
struct ata_device *pair = ata_dev_pair(adev);
|
||||
|
||||
@@ -150,7 +150,7 @@ static u8 serverworks_is_csb(struct pci_dev *pdev)
|
||||
* bug we hit.
|
||||
*/
|
||||
|
||||
static unsigned long serverworks_osb4_filter(struct ata_device *adev, unsigned long mask)
|
||||
static unsigned int serverworks_osb4_filter(struct ata_device *adev, unsigned int mask)
|
||||
{
|
||||
if (adev->class == ATA_DEV_ATA)
|
||||
mask &= ~ATA_MASK_UDMA;
|
||||
@@ -166,7 +166,7 @@ static unsigned long serverworks_osb4_filter(struct ata_device *adev, unsigned l
|
||||
* Check the blacklist and disable UDMA5 if matched
|
||||
*/
|
||||
|
||||
static unsigned long serverworks_csb_filter(struct ata_device *adev, unsigned long mask)
|
||||
static unsigned int serverworks_csb_filter(struct ata_device *adev, unsigned int mask)
|
||||
{
|
||||
const char *p;
|
||||
char model_num[ATA_ID_PROD_LEN + 1];
|
||||
|
||||
@@ -525,7 +525,7 @@ static void sis_133_set_dmamode (struct ata_port *ap, struct ata_device *adev)
|
||||
* Block UDMA6 on devices that do not support it.
|
||||
*/
|
||||
|
||||
static unsigned long sis_133_mode_filter(struct ata_device *adev, unsigned long mask)
|
||||
static unsigned int sis_133_mode_filter(struct ata_device *adev, unsigned int mask)
|
||||
{
|
||||
struct ata_port *ap = adev->link->ap;
|
||||
struct pci_dev *pdev = to_pci_dev(ap->host->dev);
|
||||
|
||||
@@ -352,7 +352,7 @@ static void via_set_dmamode(struct ata_port *ap, struct ata_device *adev)
|
||||
* one breed of Transcend SSD. Return the updated mask.
|
||||
*/
|
||||
|
||||
static unsigned long via_mode_filter(struct ata_device *dev, unsigned long mask)
|
||||
static unsigned int via_mode_filter(struct ata_device *dev, unsigned int mask)
|
||||
{
|
||||
struct ata_host *host = dev->link->ap->host;
|
||||
const struct via_isa_bridge *config = host->private_data;
|
||||
|
||||
@@ -4057,7 +4057,7 @@ static int mv_platform_probe(struct platform_device *pdev)
|
||||
/*
|
||||
* Simple resource validation ..
|
||||
*/
|
||||
if (unlikely(pdev->num_resources != 2)) {
|
||||
if (unlikely(pdev->num_resources != 1)) {
|
||||
dev_err(&pdev->dev, "invalid number of resources\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@@ -549,7 +549,6 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
|
||||
goto err_dmabuf;
|
||||
}
|
||||
|
||||
file->f_mode |= FMODE_LSEEK;
|
||||
dmabuf->file = file;
|
||||
|
||||
mutex_init(&dmabuf->lock);
|
||||
|
||||
@@ -2,18 +2,6 @@
|
||||
menu "EFI (Extensible Firmware Interface) Support"
|
||||
depends on EFI
|
||||
|
||||
config EFI_VARS
|
||||
tristate "EFI Variable Support via sysfs"
|
||||
depends on EFI && (X86 || IA64)
|
||||
default n
|
||||
help
|
||||
If you say Y here, you are able to get EFI (Extensible Firmware
|
||||
Interface) variable information via sysfs. You may read,
|
||||
write, create, and destroy EFI variables through this interface.
|
||||
Note that this driver is only retained for compatibility with
|
||||
legacy users: new users should use the efivarfs filesystem
|
||||
instead.
|
||||
|
||||
config EFI_ESRT
|
||||
bool
|
||||
depends on EFI && !IA64
|
||||
@@ -22,6 +10,7 @@ config EFI_ESRT
|
||||
config EFI_VARS_PSTORE
|
||||
tristate "Register efivars backend for pstore"
|
||||
depends on PSTORE
|
||||
select UCS2_STRING
|
||||
default y
|
||||
help
|
||||
Say Y here to enable use efivars as a backend to pstore. This
|
||||
@@ -145,6 +134,7 @@ config EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER
|
||||
|
||||
config EFI_BOOTLOADER_CONTROL
|
||||
tristate "EFI Bootloader Control"
|
||||
select UCS2_STRING
|
||||
default n
|
||||
help
|
||||
This module installs a reboot hook, such that if reboot() is
|
||||
|
||||
@@ -17,7 +17,6 @@ ifneq ($(CONFIG_EFI_CAPSULE_LOADER),)
|
||||
obj-$(CONFIG_EFI) += capsule.o
|
||||
endif
|
||||
obj-$(CONFIG_EFI_PARAMS_FROM_FDT) += fdtparams.o
|
||||
obj-$(CONFIG_EFI_VARS) += efivars.o
|
||||
obj-$(CONFIG_EFI_ESRT) += esrt.o
|
||||
obj-$(CONFIG_EFI_VARS_PSTORE) += efi-pstore.o
|
||||
obj-$(CONFIG_UEFI_CPER) += cper.o
|
||||
|
||||
@@ -240,6 +240,7 @@ void __init efi_init(void)
|
||||
* And now, memblock is fully populated, it is time to do capping.
|
||||
*/
|
||||
early_init_dt_check_for_usable_mem_range();
|
||||
efi_find_mirror();
|
||||
efi_esrt_init();
|
||||
efi_mokvar_table_init();
|
||||
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/ucs2_string.h>
|
||||
|
||||
MODULE_IMPORT_NS(EFIVAR);
|
||||
|
||||
#define DUMP_NAME_LEN 66
|
||||
|
||||
#define EFIVARS_DATA_SIZE_MAX 1024
|
||||
@@ -20,18 +22,25 @@ module_param_named(pstore_disable, efivars_pstore_disable, bool, 0644);
|
||||
EFI_VARIABLE_BOOTSERVICE_ACCESS | \
|
||||
EFI_VARIABLE_RUNTIME_ACCESS)
|
||||
|
||||
static LIST_HEAD(efi_pstore_list);
|
||||
static DECLARE_WORK(efivar_work, NULL);
|
||||
|
||||
static int efi_pstore_open(struct pstore_info *psi)
|
||||
{
|
||||
psi->data = NULL;
|
||||
int err;
|
||||
|
||||
err = efivar_lock();
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
psi->data = kzalloc(EFIVARS_DATA_SIZE_MAX, GFP_KERNEL);
|
||||
if (!psi->data)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int efi_pstore_close(struct pstore_info *psi)
|
||||
{
|
||||
psi->data = NULL;
|
||||
efivar_unlock();
|
||||
kfree(psi->data);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -40,22 +49,17 @@ static inline u64 generic_id(u64 timestamp, unsigned int part, int count)
|
||||
return (timestamp * 100 + part) * 1000 + count;
|
||||
}
|
||||
|
||||
static int efi_pstore_read_func(struct efivar_entry *entry,
|
||||
struct pstore_record *record)
|
||||
static int efi_pstore_read_func(struct pstore_record *record,
|
||||
efi_char16_t *varname)
|
||||
{
|
||||
efi_guid_t vendor = LINUX_EFI_CRASH_GUID;
|
||||
unsigned long wlen, size = EFIVARS_DATA_SIZE_MAX;
|
||||
char name[DUMP_NAME_LEN], data_type;
|
||||
int i;
|
||||
efi_status_t status;
|
||||
int cnt;
|
||||
unsigned int part;
|
||||
unsigned long size;
|
||||
u64 time;
|
||||
|
||||
if (efi_guidcmp(entry->var.VendorGuid, vendor))
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < DUMP_NAME_LEN; i++)
|
||||
name[i] = entry->var.VariableName[i];
|
||||
ucs2_as_utf8(name, varname, DUMP_NAME_LEN);
|
||||
|
||||
if (sscanf(name, "dump-type%u-%u-%d-%llu-%c",
|
||||
&record->type, &part, &cnt, &time, &data_type) == 5) {
|
||||
@@ -95,161 +99,75 @@ static int efi_pstore_read_func(struct efivar_entry *entry,
|
||||
} else
|
||||
return 0;
|
||||
|
||||
entry->var.DataSize = 1024;
|
||||
__efivar_entry_get(entry, &entry->var.Attributes,
|
||||
&entry->var.DataSize, entry->var.Data);
|
||||
size = entry->var.DataSize;
|
||||
memcpy(record->buf, entry->var.Data,
|
||||
(size_t)min_t(unsigned long, EFIVARS_DATA_SIZE_MAX, size));
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* efi_pstore_scan_sysfs_enter
|
||||
* @pos: scanning entry
|
||||
* @next: next entry
|
||||
* @head: list head
|
||||
*/
|
||||
static void efi_pstore_scan_sysfs_enter(struct efivar_entry *pos,
|
||||
struct efivar_entry *next,
|
||||
struct list_head *head)
|
||||
{
|
||||
pos->scanning = true;
|
||||
if (&next->list != head)
|
||||
next->scanning = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* __efi_pstore_scan_sysfs_exit
|
||||
* @entry: deleting entry
|
||||
* @turn_off_scanning: Check if a scanning flag should be turned off
|
||||
*/
|
||||
static inline int __efi_pstore_scan_sysfs_exit(struct efivar_entry *entry,
|
||||
bool turn_off_scanning)
|
||||
{
|
||||
if (entry->deleting) {
|
||||
list_del(&entry->list);
|
||||
efivar_entry_iter_end();
|
||||
kfree(entry);
|
||||
if (efivar_entry_iter_begin())
|
||||
return -EINTR;
|
||||
} else if (turn_off_scanning)
|
||||
entry->scanning = false;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* efi_pstore_scan_sysfs_exit
|
||||
* @pos: scanning entry
|
||||
* @next: next entry
|
||||
* @head: list head
|
||||
* @stop: a flag checking if scanning will stop
|
||||
*/
|
||||
static int efi_pstore_scan_sysfs_exit(struct efivar_entry *pos,
|
||||
struct efivar_entry *next,
|
||||
struct list_head *head, bool stop)
|
||||
{
|
||||
int ret = __efi_pstore_scan_sysfs_exit(pos, true);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (stop)
|
||||
ret = __efi_pstore_scan_sysfs_exit(next, &next->list != head);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* efi_pstore_sysfs_entry_iter
|
||||
*
|
||||
* @record: pstore record to pass to callback
|
||||
*
|
||||
* You MUST call efivar_entry_iter_begin() before this function, and
|
||||
* efivar_entry_iter_end() afterwards.
|
||||
*
|
||||
*/
|
||||
static int efi_pstore_sysfs_entry_iter(struct pstore_record *record)
|
||||
{
|
||||
struct efivar_entry **pos = (struct efivar_entry **)&record->psi->data;
|
||||
struct efivar_entry *entry, *n;
|
||||
struct list_head *head = &efi_pstore_list;
|
||||
int size = 0;
|
||||
int ret;
|
||||
|
||||
if (!*pos) {
|
||||
list_for_each_entry_safe(entry, n, head, list) {
|
||||
efi_pstore_scan_sysfs_enter(entry, n, head);
|
||||
|
||||
size = efi_pstore_read_func(entry, record);
|
||||
ret = efi_pstore_scan_sysfs_exit(entry, n, head,
|
||||
size < 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (size)
|
||||
break;
|
||||
}
|
||||
*pos = n;
|
||||
return size;
|
||||
}
|
||||
|
||||
list_for_each_entry_safe_from((*pos), n, head, list) {
|
||||
efi_pstore_scan_sysfs_enter((*pos), n, head);
|
||||
|
||||
size = efi_pstore_read_func((*pos), record);
|
||||
ret = efi_pstore_scan_sysfs_exit((*pos), n, head, size < 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (size)
|
||||
break;
|
||||
}
|
||||
*pos = n;
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* efi_pstore_read
|
||||
*
|
||||
* This function returns a size of NVRAM entry logged via efi_pstore_write().
|
||||
* The meaning and behavior of efi_pstore/pstore are as below.
|
||||
*
|
||||
* size > 0: Got data of an entry logged via efi_pstore_write() successfully,
|
||||
* and pstore filesystem will continue reading subsequent entries.
|
||||
* size == 0: Entry was not logged via efi_pstore_write(),
|
||||
* and efi_pstore driver will continue reading subsequent entries.
|
||||
* size < 0: Failed to get data of entry logging via efi_pstore_write(),
|
||||
* and pstore will stop reading entry.
|
||||
*/
|
||||
static ssize_t efi_pstore_read(struct pstore_record *record)
|
||||
{
|
||||
ssize_t size;
|
||||
|
||||
record->buf = kzalloc(EFIVARS_DATA_SIZE_MAX, GFP_KERNEL);
|
||||
record->buf = kmalloc(size, GFP_KERNEL);
|
||||
if (!record->buf)
|
||||
return -ENOMEM;
|
||||
|
||||
if (efivar_entry_iter_begin()) {
|
||||
size = -EINTR;
|
||||
goto out;
|
||||
}
|
||||
size = efi_pstore_sysfs_entry_iter(record);
|
||||
efivar_entry_iter_end();
|
||||
|
||||
out:
|
||||
if (size <= 0) {
|
||||
status = efivar_get_variable(varname, &LINUX_EFI_CRASH_GUID, NULL,
|
||||
&size, record->buf);
|
||||
if (status != EFI_SUCCESS) {
|
||||
kfree(record->buf);
|
||||
record->buf = NULL;
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/*
|
||||
* Store the name of the variable in the pstore_record priv field, so
|
||||
* we can reuse it later if we need to delete the EFI variable from the
|
||||
* variable store.
|
||||
*/
|
||||
wlen = (ucs2_strnlen(varname, DUMP_NAME_LEN) + 1) * sizeof(efi_char16_t);
|
||||
record->priv = kmemdup(varname, wlen, GFP_KERNEL);
|
||||
if (!record->priv) {
|
||||
kfree(record->buf);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
static ssize_t efi_pstore_read(struct pstore_record *record)
|
||||
{
|
||||
efi_char16_t *varname = record->psi->data;
|
||||
efi_guid_t guid = LINUX_EFI_CRASH_GUID;
|
||||
unsigned long varname_size;
|
||||
efi_status_t status;
|
||||
|
||||
for (;;) {
|
||||
varname_size = EFIVARS_DATA_SIZE_MAX;
|
||||
|
||||
/*
|
||||
* If this is the first read() call in the pstore enumeration,
|
||||
* varname will be the empty string, and the GetNextVariable()
|
||||
* runtime service call will return the first EFI variable in
|
||||
* its own enumeration order, ignoring the guid argument.
|
||||
*
|
||||
* Subsequent calls to GetNextVariable() must pass the name and
|
||||
* guid values returned by the previous call, which is why we
|
||||
* store varname in record->psi->data. Given that we only
|
||||
* enumerate variables with the efi-pstore GUID, there is no
|
||||
* need to record the guid return value.
|
||||
*/
|
||||
status = efivar_get_next_variable(&varname_size, varname, &guid);
|
||||
if (status == EFI_NOT_FOUND)
|
||||
return 0;
|
||||
|
||||
if (status != EFI_SUCCESS)
|
||||
return -EIO;
|
||||
|
||||
/* skip variables that don't concern us */
|
||||
if (efi_guidcmp(guid, LINUX_EFI_CRASH_GUID))
|
||||
continue;
|
||||
|
||||
return efi_pstore_read_func(record, varname);
|
||||
}
|
||||
}
|
||||
|
||||
static int efi_pstore_write(struct pstore_record *record)
|
||||
{
|
||||
char name[DUMP_NAME_LEN];
|
||||
efi_char16_t efi_name[DUMP_NAME_LEN];
|
||||
efi_guid_t vendor = LINUX_EFI_CRASH_GUID;
|
||||
int i, ret = 0;
|
||||
efi_status_t status;
|
||||
int i;
|
||||
|
||||
record->id = generic_id(record->time.tv_sec, record->part,
|
||||
record->count);
|
||||
@@ -265,88 +183,26 @@ static int efi_pstore_write(struct pstore_record *record)
|
||||
for (i = 0; i < DUMP_NAME_LEN; i++)
|
||||
efi_name[i] = name[i];
|
||||
|
||||
ret = efivar_entry_set_safe(efi_name, vendor, PSTORE_EFI_ATTRIBUTES,
|
||||
false, record->size, record->psi->buf);
|
||||
|
||||
if (record->reason == KMSG_DUMP_OOPS && try_module_get(THIS_MODULE))
|
||||
if (!schedule_work(&efivar_work))
|
||||
module_put(THIS_MODULE);
|
||||
|
||||
return ret;
|
||||
if (efivar_trylock())
|
||||
return -EBUSY;
|
||||
status = efivar_set_variable_locked(efi_name, &LINUX_EFI_CRASH_GUID,
|
||||
PSTORE_EFI_ATTRIBUTES,
|
||||
record->size, record->psi->buf,
|
||||
true);
|
||||
efivar_unlock();
|
||||
return status == EFI_SUCCESS ? 0 : -EIO;
|
||||
};
|
||||
|
||||
/*
|
||||
* Clean up an entry with the same name
|
||||
*/
|
||||
static int efi_pstore_erase_func(struct efivar_entry *entry, void *data)
|
||||
{
|
||||
efi_char16_t *efi_name = data;
|
||||
efi_guid_t vendor = LINUX_EFI_CRASH_GUID;
|
||||
unsigned long ucs2_len = ucs2_strlen(efi_name);
|
||||
|
||||
if (efi_guidcmp(entry->var.VendorGuid, vendor))
|
||||
return 0;
|
||||
|
||||
if (ucs2_strncmp(entry->var.VariableName, efi_name, (size_t)ucs2_len))
|
||||
return 0;
|
||||
|
||||
if (entry->scanning) {
|
||||
/*
|
||||
* Skip deletion because this entry will be deleted
|
||||
* after scanning is completed.
|
||||
*/
|
||||
entry->deleting = true;
|
||||
} else
|
||||
list_del(&entry->list);
|
||||
|
||||
/* found */
|
||||
__efivar_entry_delete(entry);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int efi_pstore_erase_name(const char *name)
|
||||
{
|
||||
struct efivar_entry *entry = NULL;
|
||||
efi_char16_t efi_name[DUMP_NAME_LEN];
|
||||
int found, i;
|
||||
|
||||
for (i = 0; i < DUMP_NAME_LEN; i++) {
|
||||
efi_name[i] = name[i];
|
||||
if (name[i] == '\0')
|
||||
break;
|
||||
}
|
||||
|
||||
if (efivar_entry_iter_begin())
|
||||
return -EINTR;
|
||||
|
||||
found = __efivar_entry_iter(efi_pstore_erase_func, &efi_pstore_list,
|
||||
efi_name, &entry);
|
||||
efivar_entry_iter_end();
|
||||
|
||||
if (found && !entry->scanning)
|
||||
kfree(entry);
|
||||
|
||||
return found ? 0 : -ENOENT;
|
||||
}
|
||||
|
||||
static int efi_pstore_erase(struct pstore_record *record)
|
||||
{
|
||||
char name[DUMP_NAME_LEN];
|
||||
int ret;
|
||||
efi_status_t status;
|
||||
|
||||
snprintf(name, sizeof(name), "dump-type%u-%u-%d-%lld",
|
||||
record->type, record->part, record->count,
|
||||
(long long)record->time.tv_sec);
|
||||
ret = efi_pstore_erase_name(name);
|
||||
if (ret != -ENOENT)
|
||||
return ret;
|
||||
status = efivar_set_variable(record->priv, &LINUX_EFI_CRASH_GUID,
|
||||
PSTORE_EFI_ATTRIBUTES, 0, NULL);
|
||||
|
||||
snprintf(name, sizeof(name), "dump-type%u-%u-%lld",
|
||||
record->type, record->part, (long long)record->time.tv_sec);
|
||||
ret = efi_pstore_erase_name(name);
|
||||
|
||||
return ret;
|
||||
if (status != EFI_SUCCESS && status != EFI_NOT_FOUND)
|
||||
return -EIO;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct pstore_info efi_pstore_info = {
|
||||
@@ -360,77 +216,14 @@ static struct pstore_info efi_pstore_info = {
|
||||
.erase = efi_pstore_erase,
|
||||
};
|
||||
|
||||
static int efi_pstore_callback(efi_char16_t *name, efi_guid_t vendor,
|
||||
unsigned long name_size, void *data)
|
||||
{
|
||||
struct efivar_entry *entry;
|
||||
int ret;
|
||||
|
||||
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
|
||||
if (!entry)
|
||||
return -ENOMEM;
|
||||
|
||||
memcpy(entry->var.VariableName, name, name_size);
|
||||
entry->var.VendorGuid = vendor;
|
||||
|
||||
ret = efivar_entry_add(entry, &efi_pstore_list);
|
||||
if (ret)
|
||||
kfree(entry);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int efi_pstore_update_entry(efi_char16_t *name, efi_guid_t vendor,
|
||||
unsigned long name_size, void *data)
|
||||
{
|
||||
struct efivar_entry *entry = data;
|
||||
|
||||
if (efivar_entry_find(name, vendor, &efi_pstore_list, false))
|
||||
return 0;
|
||||
|
||||
memcpy(entry->var.VariableName, name, name_size);
|
||||
memcpy(&(entry->var.VendorGuid), &vendor, sizeof(efi_guid_t));
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void efi_pstore_update_entries(struct work_struct *work)
|
||||
{
|
||||
struct efivar_entry *entry;
|
||||
int err;
|
||||
|
||||
/* Add new sysfs entries */
|
||||
while (1) {
|
||||
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
|
||||
if (!entry)
|
||||
return;
|
||||
|
||||
err = efivar_init(efi_pstore_update_entry, entry,
|
||||
false, &efi_pstore_list);
|
||||
if (!err)
|
||||
break;
|
||||
|
||||
efivar_entry_add(entry, &efi_pstore_list);
|
||||
}
|
||||
|
||||
kfree(entry);
|
||||
module_put(THIS_MODULE);
|
||||
}
|
||||
|
||||
static __init int efivars_pstore_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!efivars_kobject() || !efivar_supports_writes())
|
||||
if (!efivar_supports_writes())
|
||||
return 0;
|
||||
|
||||
if (efivars_pstore_disable)
|
||||
return 0;
|
||||
|
||||
ret = efivar_init(efi_pstore_callback, NULL, true, &efi_pstore_list);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
efi_pstore_info.buf = kmalloc(4096, GFP_KERNEL);
|
||||
if (!efi_pstore_info.buf)
|
||||
return -ENOMEM;
|
||||
@@ -443,8 +236,6 @@ static __init int efivars_pstore_init(void)
|
||||
efi_pstore_info.bufsize = 0;
|
||||
}
|
||||
|
||||
INIT_WORK(&efivar_work, efi_pstore_update_entries);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -202,7 +202,7 @@ static void generic_ops_unregister(void)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_EFI_CUSTOM_SSDT_OVERLAYS
|
||||
#define EFIVAR_SSDT_NAME_MAX 16
|
||||
#define EFIVAR_SSDT_NAME_MAX 16UL
|
||||
static char efivar_ssdt[EFIVAR_SSDT_NAME_MAX] __initdata;
|
||||
static int __init efivar_ssdt_setup(char *str)
|
||||
{
|
||||
@@ -219,83 +219,62 @@ static int __init efivar_ssdt_setup(char *str)
|
||||
}
|
||||
__setup("efivar_ssdt=", efivar_ssdt_setup);
|
||||
|
||||
static __init int efivar_ssdt_iter(efi_char16_t *name, efi_guid_t vendor,
|
||||
unsigned long name_size, void *data)
|
||||
{
|
||||
struct efivar_entry *entry;
|
||||
struct list_head *list = data;
|
||||
char utf8_name[EFIVAR_SSDT_NAME_MAX];
|
||||
int limit = min_t(unsigned long, EFIVAR_SSDT_NAME_MAX, name_size);
|
||||
|
||||
ucs2_as_utf8(utf8_name, name, limit - 1);
|
||||
if (strncmp(utf8_name, efivar_ssdt, limit) != 0)
|
||||
return 0;
|
||||
|
||||
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
|
||||
if (!entry)
|
||||
return 0;
|
||||
|
||||
memcpy(entry->var.VariableName, name, name_size);
|
||||
memcpy(&entry->var.VendorGuid, &vendor, sizeof(efi_guid_t));
|
||||
|
||||
efivar_entry_add(entry, list);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __init int efivar_ssdt_load(void)
|
||||
{
|
||||
LIST_HEAD(entries);
|
||||
struct efivar_entry *entry, *aux;
|
||||
unsigned long size;
|
||||
void *data;
|
||||
int ret;
|
||||
unsigned long name_size = 256;
|
||||
efi_char16_t *name = NULL;
|
||||
efi_status_t status;
|
||||
efi_guid_t guid;
|
||||
|
||||
if (!efivar_ssdt[0])
|
||||
return 0;
|
||||
|
||||
ret = efivar_init(efivar_ssdt_iter, &entries, true, &entries);
|
||||
name = kzalloc(name_size, GFP_KERNEL);
|
||||
if (!name)
|
||||
return -ENOMEM;
|
||||
|
||||
list_for_each_entry_safe(entry, aux, &entries, list) {
|
||||
pr_info("loading SSDT from variable %s-%pUl\n", efivar_ssdt,
|
||||
&entry->var.VendorGuid);
|
||||
for (;;) {
|
||||
char utf8_name[EFIVAR_SSDT_NAME_MAX];
|
||||
unsigned long data_size = 0;
|
||||
void *data;
|
||||
int limit;
|
||||
|
||||
list_del(&entry->list);
|
||||
|
||||
ret = efivar_entry_size(entry, &size);
|
||||
if (ret) {
|
||||
pr_err("failed to get var size\n");
|
||||
goto free_entry;
|
||||
status = efi.get_next_variable(&name_size, name, &guid);
|
||||
if (status == EFI_NOT_FOUND) {
|
||||
break;
|
||||
} else if (status == EFI_BUFFER_TOO_SMALL) {
|
||||
name = krealloc(name, name_size, GFP_KERNEL);
|
||||
if (!name)
|
||||
return -ENOMEM;
|
||||
continue;
|
||||
}
|
||||
|
||||
data = kmalloc(size, GFP_KERNEL);
|
||||
if (!data) {
|
||||
ret = -ENOMEM;
|
||||
goto free_entry;
|
||||
limit = min(EFIVAR_SSDT_NAME_MAX, name_size);
|
||||
ucs2_as_utf8(utf8_name, name, limit - 1);
|
||||
if (strncmp(utf8_name, efivar_ssdt, limit) != 0)
|
||||
continue;
|
||||
|
||||
pr_info("loading SSDT from variable %s-%pUl\n", efivar_ssdt, &guid);
|
||||
|
||||
status = efi.get_variable(name, &guid, NULL, &data_size, NULL);
|
||||
if (status != EFI_BUFFER_TOO_SMALL || !data_size)
|
||||
return -EIO;
|
||||
|
||||
data = kmalloc(data_size, GFP_KERNEL);
|
||||
if (!data)
|
||||
return -ENOMEM;
|
||||
|
||||
status = efi.get_variable(name, &guid, NULL, &data_size, data);
|
||||
if (status == EFI_SUCCESS) {
|
||||
acpi_status ret = acpi_load_table(data, NULL);
|
||||
if (ret)
|
||||
pr_err("failed to load table: %u\n", ret);
|
||||
} else {
|
||||
pr_err("failed to get var data: 0x%lx\n", status);
|
||||
}
|
||||
|
||||
ret = efivar_entry_get(entry, NULL, &size, data);
|
||||
if (ret) {
|
||||
pr_err("failed to get var data\n");
|
||||
goto free_data;
|
||||
}
|
||||
|
||||
ret = acpi_load_table(data, NULL);
|
||||
if (ret) {
|
||||
pr_err("failed to load table: %d\n", ret);
|
||||
goto free_data;
|
||||
}
|
||||
|
||||
goto free_entry;
|
||||
|
||||
free_data:
|
||||
kfree(data);
|
||||
|
||||
free_entry:
|
||||
kfree(entry);
|
||||
}
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
static inline int efivar_ssdt_load(void) { return 0; }
|
||||
@@ -446,6 +425,29 @@ err_put:
|
||||
|
||||
subsys_initcall(efisubsys_init);
|
||||
|
||||
void __init efi_find_mirror(void)
|
||||
{
|
||||
efi_memory_desc_t *md;
|
||||
u64 mirror_size = 0, total_size = 0;
|
||||
|
||||
if (!efi_enabled(EFI_MEMMAP))
|
||||
return;
|
||||
|
||||
for_each_efi_memory_desc(md) {
|
||||
unsigned long long start = md->phys_addr;
|
||||
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
|
||||
|
||||
total_size += size;
|
||||
if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
|
||||
memblock_mark_mirror(start, size);
|
||||
mirror_size += size;
|
||||
}
|
||||
}
|
||||
if (mirror_size)
|
||||
pr_info("Memory: %lldM/%lldM mirrored memory\n",
|
||||
mirror_size>>20, total_size>>20);
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the efi memory descriptor for a given physical address. Given a
|
||||
* physical address, determine if it exists within an EFI Memory Map entry,
|
||||
@@ -897,6 +899,7 @@ int efi_status_to_err(efi_status_t status)
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(efi_status_to_err);
|
||||
|
||||
static DEFINE_SPINLOCK(efi_mem_reserve_persistent_lock);
|
||||
static struct linux_efi_memreserve *efi_memreserve_root __ro_after_init;
|
||||
|
||||
@@ -10,69 +10,51 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/reboot.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/ucs2_string.h>
|
||||
|
||||
static void efibc_str_to_str16(const char *str, efi_char16_t *str16)
|
||||
#define MAX_DATA_LEN 512
|
||||
|
||||
static int efibc_set_variable(efi_char16_t *name, efi_char16_t *value,
|
||||
unsigned long len)
|
||||
{
|
||||
size_t i;
|
||||
efi_status_t status;
|
||||
|
||||
for (i = 0; i < strlen(str); i++)
|
||||
str16[i] = str[i];
|
||||
status = efi.set_variable(name, &LINUX_EFI_LOADER_ENTRY_GUID,
|
||||
EFI_VARIABLE_NON_VOLATILE
|
||||
| EFI_VARIABLE_BOOTSERVICE_ACCESS
|
||||
| EFI_VARIABLE_RUNTIME_ACCESS,
|
||||
len * sizeof(efi_char16_t), value);
|
||||
|
||||
str16[i] = '\0';
|
||||
}
|
||||
|
||||
static int efibc_set_variable(const char *name, const char *value)
|
||||
{
|
||||
int ret;
|
||||
efi_guid_t guid = LINUX_EFI_LOADER_ENTRY_GUID;
|
||||
struct efivar_entry *entry;
|
||||
size_t size = (strlen(value) + 1) * sizeof(efi_char16_t);
|
||||
|
||||
if (size > sizeof(entry->var.Data)) {
|
||||
pr_err("value is too large (%zu bytes) for '%s' EFI variable\n", size, name);
|
||||
return -EINVAL;
|
||||
if (status != EFI_SUCCESS) {
|
||||
pr_err("failed to set EFI variable: 0x%lx\n", status);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
|
||||
if (!entry) {
|
||||
pr_err("failed to allocate efivar entry for '%s' EFI variable\n", name);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
efibc_str_to_str16(name, entry->var.VariableName);
|
||||
efibc_str_to_str16(value, (efi_char16_t *)entry->var.Data);
|
||||
memcpy(&entry->var.VendorGuid, &guid, sizeof(guid));
|
||||
|
||||
ret = efivar_entry_set_safe(entry->var.VariableName,
|
||||
entry->var.VendorGuid,
|
||||
EFI_VARIABLE_NON_VOLATILE
|
||||
| EFI_VARIABLE_BOOTSERVICE_ACCESS
|
||||
| EFI_VARIABLE_RUNTIME_ACCESS,
|
||||
false, size, entry->var.Data);
|
||||
|
||||
if (ret)
|
||||
pr_err("failed to set %s EFI variable: 0x%x\n",
|
||||
name, ret);
|
||||
|
||||
kfree(entry);
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int efibc_reboot_notifier_call(struct notifier_block *notifier,
|
||||
unsigned long event, void *data)
|
||||
{
|
||||
const char *reason = "shutdown";
|
||||
efi_char16_t *reason = event == SYS_RESTART ? L"reboot"
|
||||
: L"shutdown";
|
||||
const u8 *str = data;
|
||||
efi_char16_t *wdata;
|
||||
unsigned long l;
|
||||
int ret;
|
||||
|
||||
if (event == SYS_RESTART)
|
||||
reason = "reboot";
|
||||
|
||||
ret = efibc_set_variable("LoaderEntryRebootReason", reason);
|
||||
ret = efibc_set_variable(L"LoaderEntryRebootReason", reason,
|
||||
ucs2_strlen(reason));
|
||||
if (ret || !data)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
efibc_set_variable("LoaderEntryOneShot", (char *)data);
|
||||
wdata = kmalloc(MAX_DATA_LEN * sizeof(efi_char16_t), GFP_KERNEL);
|
||||
for (l = 0; l < MAX_DATA_LEN - 1 && str[l] != '\0'; l++)
|
||||
wdata[l] = str[l];
|
||||
wdata[l] = L'\0';
|
||||
|
||||
efibc_set_variable(L"LoaderEntryOneShot", wdata, l);
|
||||
|
||||
kfree(wdata);
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
@@ -84,7 +66,7 @@ static int __init efibc_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!efivars_kobject() || !efivar_supports_writes())
|
||||
if (!efi_rt_services_supported(EFI_RT_SUPPORTED_SET_VARIABLE))
|
||||
return -ENODEV;
|
||||
|
||||
ret = register_reboot_notifier(&efibc_reboot_notifier);
|
||||
|
||||
@@ -1,671 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0+
|
||||
/*
|
||||
* Originally from efivars.c,
|
||||
*
|
||||
* Copyright (C) 2001,2003,2004 Dell <Matt_Domsch@dell.com>
|
||||
* Copyright (C) 2004 Intel Corporation <matthew.e.tolentino@intel.com>
|
||||
*
|
||||
* This code takes all variables accessible from EFI runtime and
|
||||
* exports them via sysfs
|
||||
*/
|
||||
|
||||
#include <linux/efi.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/ucs2_string.h>
|
||||
#include <linux/compat.h>
|
||||
|
||||
#define EFIVARS_VERSION "0.08"
|
||||
#define EFIVARS_DATE "2004-May-17"
|
||||
|
||||
MODULE_AUTHOR("Matt Domsch <Matt_Domsch@Dell.com>");
|
||||
MODULE_DESCRIPTION("sysfs interface to EFI Variables");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_VERSION(EFIVARS_VERSION);
|
||||
|
||||
static LIST_HEAD(efivar_sysfs_list);
|
||||
|
||||
static struct kset *efivars_kset;
|
||||
|
||||
static struct bin_attribute *efivars_new_var;
|
||||
static struct bin_attribute *efivars_del_var;
|
||||
|
||||
struct compat_efi_variable {
|
||||
efi_char16_t VariableName[EFI_VAR_NAME_LEN/sizeof(efi_char16_t)];
|
||||
efi_guid_t VendorGuid;
|
||||
__u32 DataSize;
|
||||
__u8 Data[1024];
|
||||
__u32 Status;
|
||||
__u32 Attributes;
|
||||
} __packed;
|
||||
|
||||
struct efivar_attribute {
|
||||
struct attribute attr;
|
||||
ssize_t (*show) (struct efivar_entry *entry, char *buf);
|
||||
ssize_t (*store)(struct efivar_entry *entry, const char *buf, size_t count);
|
||||
};
|
||||
|
||||
#define EFIVAR_ATTR(_name, _mode, _show, _store) \
|
||||
struct efivar_attribute efivar_attr_##_name = { \
|
||||
.attr = {.name = __stringify(_name), .mode = _mode}, \
|
||||
.show = _show, \
|
||||
.store = _store, \
|
||||
};
|
||||
|
||||
#define to_efivar_attr(_attr) container_of(_attr, struct efivar_attribute, attr)
|
||||
#define to_efivar_entry(obj) container_of(obj, struct efivar_entry, kobj)
|
||||
|
||||
/*
|
||||
* Prototype for sysfs creation function
|
||||
*/
|
||||
static int
|
||||
efivar_create_sysfs_entry(struct efivar_entry *new_var);
|
||||
|
||||
static ssize_t
|
||||
efivar_guid_read(struct efivar_entry *entry, char *buf)
|
||||
{
|
||||
struct efi_variable *var = &entry->var;
|
||||
char *str = buf;
|
||||
|
||||
if (!entry || !buf)
|
||||
return 0;
|
||||
|
||||
efi_guid_to_str(&var->VendorGuid, str);
|
||||
str += strlen(str);
|
||||
str += sprintf(str, "\n");
|
||||
|
||||
return str - buf;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
efivar_attr_read(struct efivar_entry *entry, char *buf)
|
||||
{
|
||||
struct efi_variable *var = &entry->var;
|
||||
unsigned long size = sizeof(var->Data);
|
||||
char *str = buf;
|
||||
int ret;
|
||||
|
||||
if (!entry || !buf)
|
||||
return -EINVAL;
|
||||
|
||||
ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data);
|
||||
var->DataSize = size;
|
||||
if (ret)
|
||||
return -EIO;
|
||||
|
||||
if (var->Attributes & EFI_VARIABLE_NON_VOLATILE)
|
||||
str += sprintf(str, "EFI_VARIABLE_NON_VOLATILE\n");
|
||||
if (var->Attributes & EFI_VARIABLE_BOOTSERVICE_ACCESS)
|
||||
str += sprintf(str, "EFI_VARIABLE_BOOTSERVICE_ACCESS\n");
|
||||
if (var->Attributes & EFI_VARIABLE_RUNTIME_ACCESS)
|
||||
str += sprintf(str, "EFI_VARIABLE_RUNTIME_ACCESS\n");
|
||||
if (var->Attributes & EFI_VARIABLE_HARDWARE_ERROR_RECORD)
|
||||
str += sprintf(str, "EFI_VARIABLE_HARDWARE_ERROR_RECORD\n");
|
||||
if (var->Attributes & EFI_VARIABLE_AUTHENTICATED_WRITE_ACCESS)
|
||||
str += sprintf(str,
|
||||
"EFI_VARIABLE_AUTHENTICATED_WRITE_ACCESS\n");
|
||||
if (var->Attributes &
|
||||
EFI_VARIABLE_TIME_BASED_AUTHENTICATED_WRITE_ACCESS)
|
||||
str += sprintf(str,
|
||||
"EFI_VARIABLE_TIME_BASED_AUTHENTICATED_WRITE_ACCESS\n");
|
||||
if (var->Attributes & EFI_VARIABLE_APPEND_WRITE)
|
||||
str += sprintf(str, "EFI_VARIABLE_APPEND_WRITE\n");
|
||||
return str - buf;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
efivar_size_read(struct efivar_entry *entry, char *buf)
|
||||
{
|
||||
struct efi_variable *var = &entry->var;
|
||||
unsigned long size = sizeof(var->Data);
|
||||
char *str = buf;
|
||||
int ret;
|
||||
|
||||
if (!entry || !buf)
|
||||
return -EINVAL;
|
||||
|
||||
ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data);
|
||||
var->DataSize = size;
|
||||
if (ret)
|
||||
return -EIO;
|
||||
|
||||
str += sprintf(str, "0x%lx\n", var->DataSize);
|
||||
return str - buf;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
efivar_data_read(struct efivar_entry *entry, char *buf)
|
||||
{
|
||||
struct efi_variable *var = &entry->var;
|
||||
unsigned long size = sizeof(var->Data);
|
||||
int ret;
|
||||
|
||||
if (!entry || !buf)
|
||||
return -EINVAL;
|
||||
|
||||
ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data);
|
||||
var->DataSize = size;
|
||||
if (ret)
|
||||
return -EIO;
|
||||
|
||||
memcpy(buf, var->Data, var->DataSize);
|
||||
return var->DataSize;
|
||||
}
|
||||
|
||||
static inline int
|
||||
sanity_check(struct efi_variable *var, efi_char16_t *name, efi_guid_t vendor,
|
||||
unsigned long size, u32 attributes, u8 *data)
|
||||
{
|
||||
/*
|
||||
* If only updating the variable data, then the name
|
||||
* and guid should remain the same
|
||||
*/
|
||||
if (memcmp(name, var->VariableName, sizeof(var->VariableName)) ||
|
||||
efi_guidcmp(vendor, var->VendorGuid)) {
|
||||
printk(KERN_ERR "efivars: Cannot edit the wrong variable!\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if ((size <= 0) || (attributes == 0)){
|
||||
printk(KERN_ERR "efivars: DataSize & Attributes must be valid!\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if ((attributes & ~EFI_VARIABLE_MASK) != 0 ||
|
||||
efivar_validate(vendor, name, data, size) == false) {
|
||||
printk(KERN_ERR "efivars: Malformed variable content\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
copy_out_compat(struct efi_variable *dst, struct compat_efi_variable *src)
|
||||
{
|
||||
memcpy(dst->VariableName, src->VariableName, EFI_VAR_NAME_LEN);
|
||||
memcpy(dst->Data, src->Data, sizeof(src->Data));
|
||||
|
||||
dst->VendorGuid = src->VendorGuid;
|
||||
dst->DataSize = src->DataSize;
|
||||
dst->Attributes = src->Attributes;
|
||||
}
|
||||
|
||||
/*
|
||||
* We allow each variable to be edited via rewriting the
|
||||
* entire efi variable structure.
|
||||
*/
|
||||
static ssize_t
|
||||
efivar_store_raw(struct efivar_entry *entry, const char *buf, size_t count)
|
||||
{
|
||||
struct efi_variable *new_var, *var = &entry->var;
|
||||
efi_char16_t *name;
|
||||
unsigned long size;
|
||||
efi_guid_t vendor;
|
||||
u32 attributes;
|
||||
u8 *data;
|
||||
int err;
|
||||
|
||||
if (!entry || !buf)
|
||||
return -EINVAL;
|
||||
|
||||
if (in_compat_syscall()) {
|
||||
struct compat_efi_variable *compat;
|
||||
|
||||
if (count != sizeof(*compat))
|
||||
return -EINVAL;
|
||||
|
||||
compat = (struct compat_efi_variable *)buf;
|
||||
attributes = compat->Attributes;
|
||||
vendor = compat->VendorGuid;
|
||||
name = compat->VariableName;
|
||||
size = compat->DataSize;
|
||||
data = compat->Data;
|
||||
|
||||
err = sanity_check(var, name, vendor, size, attributes, data);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
copy_out_compat(&entry->var, compat);
|
||||
} else {
|
||||
if (count != sizeof(struct efi_variable))
|
||||
return -EINVAL;
|
||||
|
||||
new_var = (struct efi_variable *)buf;
|
||||
|
||||
attributes = new_var->Attributes;
|
||||
vendor = new_var->VendorGuid;
|
||||
name = new_var->VariableName;
|
||||
size = new_var->DataSize;
|
||||
data = new_var->Data;
|
||||
|
||||
err = sanity_check(var, name, vendor, size, attributes, data);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
memcpy(&entry->var, new_var, count);
|
||||
}
|
||||
|
||||
err = efivar_entry_set(entry, attributes, size, data, NULL);
|
||||
if (err) {
|
||||
printk(KERN_WARNING "efivars: set_variable() failed: status=%d\n", err);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
efivar_show_raw(struct efivar_entry *entry, char *buf)
|
||||
{
|
||||
struct efi_variable *var = &entry->var;
|
||||
struct compat_efi_variable *compat;
|
||||
unsigned long datasize = sizeof(var->Data);
|
||||
size_t size;
|
||||
int ret;
|
||||
|
||||
if (!entry || !buf)
|
||||
return 0;
|
||||
|
||||
ret = efivar_entry_get(entry, &var->Attributes, &datasize, var->Data);
|
||||
var->DataSize = datasize;
|
||||
if (ret)
|
||||
return -EIO;
|
||||
|
||||
if (in_compat_syscall()) {
|
||||
compat = (struct compat_efi_variable *)buf;
|
||||
|
||||
size = sizeof(*compat);
|
||||
memcpy(compat->VariableName, var->VariableName,
|
||||
EFI_VAR_NAME_LEN);
|
||||
memcpy(compat->Data, var->Data, sizeof(compat->Data));
|
||||
|
||||
compat->VendorGuid = var->VendorGuid;
|
||||
compat->DataSize = var->DataSize;
|
||||
compat->Attributes = var->Attributes;
|
||||
} else {
|
||||
size = sizeof(*var);
|
||||
memcpy(buf, var, size);
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Generic read/write functions that call the specific functions of
|
||||
* the attributes...
|
||||
*/
|
||||
static ssize_t efivar_attr_show(struct kobject *kobj, struct attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct efivar_entry *var = to_efivar_entry(kobj);
|
||||
struct efivar_attribute *efivar_attr = to_efivar_attr(attr);
|
||||
ssize_t ret = -EIO;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
|
||||
if (efivar_attr->show) {
|
||||
ret = efivar_attr->show(var, buf);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t efivar_attr_store(struct kobject *kobj, struct attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct efivar_entry *var = to_efivar_entry(kobj);
|
||||
struct efivar_attribute *efivar_attr = to_efivar_attr(attr);
|
||||
ssize_t ret = -EIO;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
|
||||
if (efivar_attr->store)
|
||||
ret = efivar_attr->store(var, buf, count);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct sysfs_ops efivar_attr_ops = {
|
||||
.show = efivar_attr_show,
|
||||
.store = efivar_attr_store,
|
||||
};
|
||||
|
||||
static void efivar_release(struct kobject *kobj)
|
||||
{
|
||||
struct efivar_entry *var = to_efivar_entry(kobj);
|
||||
kfree(var);
|
||||
}
|
||||
|
||||
static EFIVAR_ATTR(guid, 0400, efivar_guid_read, NULL);
|
||||
static EFIVAR_ATTR(attributes, 0400, efivar_attr_read, NULL);
|
||||
static EFIVAR_ATTR(size, 0400, efivar_size_read, NULL);
|
||||
static EFIVAR_ATTR(data, 0400, efivar_data_read, NULL);
|
||||
static EFIVAR_ATTR(raw_var, 0600, efivar_show_raw, efivar_store_raw);
|
||||
|
||||
static struct attribute *def_attrs[] = {
|
||||
&efivar_attr_guid.attr,
|
||||
&efivar_attr_size.attr,
|
||||
&efivar_attr_attributes.attr,
|
||||
&efivar_attr_data.attr,
|
||||
&efivar_attr_raw_var.attr,
|
||||
NULL,
|
||||
};
|
||||
ATTRIBUTE_GROUPS(def);
|
||||
|
||||
static struct kobj_type efivar_ktype = {
|
||||
.release = efivar_release,
|
||||
.sysfs_ops = &efivar_attr_ops,
|
||||
.default_groups = def_groups,
|
||||
};
|
||||
|
||||
static ssize_t efivar_create(struct file *filp, struct kobject *kobj,
|
||||
struct bin_attribute *bin_attr,
|
||||
char *buf, loff_t pos, size_t count)
|
||||
{
|
||||
struct compat_efi_variable *compat = (struct compat_efi_variable *)buf;
|
||||
struct efi_variable *new_var = (struct efi_variable *)buf;
|
||||
struct efivar_entry *new_entry;
|
||||
bool need_compat = in_compat_syscall();
|
||||
efi_char16_t *name;
|
||||
unsigned long size;
|
||||
u32 attributes;
|
||||
u8 *data;
|
||||
int err;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
|
||||
if (need_compat) {
|
||||
if (count != sizeof(*compat))
|
||||
return -EINVAL;
|
||||
|
||||
attributes = compat->Attributes;
|
||||
name = compat->VariableName;
|
||||
size = compat->DataSize;
|
||||
data = compat->Data;
|
||||
} else {
|
||||
if (count != sizeof(*new_var))
|
||||
return -EINVAL;
|
||||
|
||||
attributes = new_var->Attributes;
|
||||
name = new_var->VariableName;
|
||||
size = new_var->DataSize;
|
||||
data = new_var->Data;
|
||||
}
|
||||
|
||||
if ((attributes & ~EFI_VARIABLE_MASK) != 0 ||
|
||||
efivar_validate(new_var->VendorGuid, name, data,
|
||||
size) == false) {
|
||||
printk(KERN_ERR "efivars: Malformed variable content\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL);
|
||||
if (!new_entry)
|
||||
return -ENOMEM;
|
||||
|
||||
if (need_compat)
|
||||
copy_out_compat(&new_entry->var, compat);
|
||||
else
|
||||
memcpy(&new_entry->var, new_var, sizeof(*new_var));
|
||||
|
||||
err = efivar_entry_set(new_entry, attributes, size,
|
||||
data, &efivar_sysfs_list);
|
||||
if (err) {
|
||||
if (err == -EEXIST)
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (efivar_create_sysfs_entry(new_entry)) {
|
||||
printk(KERN_WARNING "efivars: failed to create sysfs entry.\n");
|
||||
kfree(new_entry);
|
||||
}
|
||||
return count;
|
||||
|
||||
out:
|
||||
kfree(new_entry);
|
||||
return err;
|
||||
}
|
||||
|
||||
static ssize_t efivar_delete(struct file *filp, struct kobject *kobj,
|
||||
struct bin_attribute *bin_attr,
|
||||
char *buf, loff_t pos, size_t count)
|
||||
{
|
||||
struct efi_variable *del_var = (struct efi_variable *)buf;
|
||||
struct compat_efi_variable *compat;
|
||||
struct efivar_entry *entry;
|
||||
efi_char16_t *name;
|
||||
efi_guid_t vendor;
|
||||
int err = 0;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
|
||||
if (in_compat_syscall()) {
|
||||
if (count != sizeof(*compat))
|
||||
return -EINVAL;
|
||||
|
||||
compat = (struct compat_efi_variable *)buf;
|
||||
name = compat->VariableName;
|
||||
vendor = compat->VendorGuid;
|
||||
} else {
|
||||
if (count != sizeof(*del_var))
|
||||
return -EINVAL;
|
||||
|
||||
name = del_var->VariableName;
|
||||
vendor = del_var->VendorGuid;
|
||||
}
|
||||
|
||||
if (efivar_entry_iter_begin())
|
||||
return -EINTR;
|
||||
entry = efivar_entry_find(name, vendor, &efivar_sysfs_list, true);
|
||||
if (!entry)
|
||||
err = -EINVAL;
|
||||
else if (__efivar_entry_delete(entry))
|
||||
err = -EIO;
|
||||
|
||||
if (err) {
|
||||
efivar_entry_iter_end();
|
||||
return err;
|
||||
}
|
||||
|
||||
if (!entry->scanning) {
|
||||
efivar_entry_iter_end();
|
||||
efivar_unregister(entry);
|
||||
} else
|
||||
efivar_entry_iter_end();
|
||||
|
||||
/* It's dead Jim.... */
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* efivar_create_sysfs_entry - create a new entry in sysfs
|
||||
* @new_var: efivar entry to create
|
||||
*
|
||||
* Returns 0 on success, negative error code on failure
|
||||
*/
|
||||
static int
|
||||
efivar_create_sysfs_entry(struct efivar_entry *new_var)
|
||||
{
|
||||
int short_name_size;
|
||||
char *short_name;
|
||||
unsigned long utf8_name_size;
|
||||
efi_char16_t *variable_name = new_var->var.VariableName;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Length of the variable bytes in UTF8, plus the '-' separator,
|
||||
* plus the GUID, plus trailing NUL
|
||||
*/
|
||||
utf8_name_size = ucs2_utf8size(variable_name);
|
||||
short_name_size = utf8_name_size + 1 + EFI_VARIABLE_GUID_LEN + 1;
|
||||
|
||||
short_name = kmalloc(short_name_size, GFP_KERNEL);
|
||||
if (!short_name)
|
||||
return -ENOMEM;
|
||||
|
||||
ucs2_as_utf8(short_name, variable_name, short_name_size);
|
||||
|
||||
/* This is ugly, but necessary to separate one vendor's
|
||||
private variables from another's. */
|
||||
short_name[utf8_name_size] = '-';
|
||||
efi_guid_to_str(&new_var->var.VendorGuid,
|
||||
short_name + utf8_name_size + 1);
|
||||
|
||||
new_var->kobj.kset = efivars_kset;
|
||||
|
||||
ret = kobject_init_and_add(&new_var->kobj, &efivar_ktype,
|
||||
NULL, "%s", short_name);
|
||||
kfree(short_name);
|
||||
if (ret) {
|
||||
kobject_put(&new_var->kobj);
|
||||
return ret;
|
||||
}
|
||||
|
||||
kobject_uevent(&new_var->kobj, KOBJ_ADD);
|
||||
if (efivar_entry_add(new_var, &efivar_sysfs_list)) {
|
||||
efivar_unregister(new_var);
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
create_efivars_bin_attributes(void)
|
||||
{
|
||||
struct bin_attribute *attr;
|
||||
int error;
|
||||
|
||||
/* new_var */
|
||||
attr = kzalloc(sizeof(*attr), GFP_KERNEL);
|
||||
if (!attr)
|
||||
return -ENOMEM;
|
||||
|
||||
attr->attr.name = "new_var";
|
||||
attr->attr.mode = 0200;
|
||||
attr->write = efivar_create;
|
||||
efivars_new_var = attr;
|
||||
|
||||
/* del_var */
|
||||
attr = kzalloc(sizeof(*attr), GFP_KERNEL);
|
||||
if (!attr) {
|
||||
error = -ENOMEM;
|
||||
goto out_free;
|
||||
}
|
||||
attr->attr.name = "del_var";
|
||||
attr->attr.mode = 0200;
|
||||
attr->write = efivar_delete;
|
||||
efivars_del_var = attr;
|
||||
|
||||
sysfs_bin_attr_init(efivars_new_var);
|
||||
sysfs_bin_attr_init(efivars_del_var);
|
||||
|
||||
/* Register */
|
||||
error = sysfs_create_bin_file(&efivars_kset->kobj, efivars_new_var);
|
||||
if (error) {
|
||||
printk(KERN_ERR "efivars: unable to create new_var sysfs file"
|
||||
" due to error %d\n", error);
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
error = sysfs_create_bin_file(&efivars_kset->kobj, efivars_del_var);
|
||||
if (error) {
|
||||
printk(KERN_ERR "efivars: unable to create del_var sysfs file"
|
||||
" due to error %d\n", error);
|
||||
sysfs_remove_bin_file(&efivars_kset->kobj, efivars_new_var);
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
return 0;
|
||||
out_free:
|
||||
kfree(efivars_del_var);
|
||||
efivars_del_var = NULL;
|
||||
kfree(efivars_new_var);
|
||||
efivars_new_var = NULL;
|
||||
return error;
|
||||
}
|
||||
|
||||
static int efivars_sysfs_callback(efi_char16_t *name, efi_guid_t vendor,
|
||||
unsigned long name_size, void *data)
|
||||
{
|
||||
struct efivar_entry *entry;
|
||||
|
||||
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
|
||||
if (!entry)
|
||||
return -ENOMEM;
|
||||
|
||||
memcpy(entry->var.VariableName, name, name_size);
|
||||
memcpy(&(entry->var.VendorGuid), &vendor, sizeof(efi_guid_t));
|
||||
|
||||
efivar_create_sysfs_entry(entry);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int efivar_sysfs_destroy(struct efivar_entry *entry, void *data)
|
||||
{
|
||||
int err = efivar_entry_remove(entry);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
efivar_unregister(entry);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void efivars_sysfs_exit(void)
|
||||
{
|
||||
/* Remove all entries and destroy */
|
||||
int err;
|
||||
|
||||
err = __efivar_entry_iter(efivar_sysfs_destroy, &efivar_sysfs_list,
|
||||
NULL, NULL);
|
||||
if (err) {
|
||||
pr_err("efivars: Failed to destroy sysfs entries\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (efivars_new_var)
|
||||
sysfs_remove_bin_file(&efivars_kset->kobj, efivars_new_var);
|
||||
if (efivars_del_var)
|
||||
sysfs_remove_bin_file(&efivars_kset->kobj, efivars_del_var);
|
||||
kfree(efivars_new_var);
|
||||
kfree(efivars_del_var);
|
||||
kset_unregister(efivars_kset);
|
||||
}
|
||||
|
||||
static int efivars_sysfs_init(void)
|
||||
{
|
||||
struct kobject *parent_kobj = efivars_kobject();
|
||||
int error = 0;
|
||||
|
||||
/* No efivars has been registered yet */
|
||||
if (!parent_kobj || !efivar_supports_writes())
|
||||
return 0;
|
||||
|
||||
printk(KERN_INFO "EFI Variables Facility v%s %s\n", EFIVARS_VERSION,
|
||||
EFIVARS_DATE);
|
||||
|
||||
efivars_kset = kset_create_and_add("vars", NULL, parent_kobj);
|
||||
if (!efivars_kset) {
|
||||
printk(KERN_ERR "efivars: Subsystem registration failed.\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
efivar_init(efivars_sysfs_callback, NULL, true, &efivar_sysfs_list);
|
||||
|
||||
error = create_efivars_bin_attributes();
|
||||
if (error) {
|
||||
efivars_sysfs_exit();
|
||||
return error;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
module_init(efivars_sysfs_init);
|
||||
module_exit(efivars_sysfs_exit);
|
||||
@@ -59,8 +59,7 @@ static void __init efi_memmap_free(void)
|
||||
* Depending on whether mm_init() has already been invoked or not,
|
||||
* either memblock or "normal" page allocation is used.
|
||||
*
|
||||
* Returns the physical address of the allocated memory map on
|
||||
* success, zero on failure.
|
||||
* Returns zero on success, a negative error code on failure.
|
||||
*/
|
||||
int __init efi_memmap_alloc(unsigned int num_entries,
|
||||
struct efi_memory_map_data *data)
|
||||
@@ -245,7 +244,7 @@ int __init efi_memmap_install(struct efi_memory_map_data *data)
|
||||
* @range: Address range (start, end) to split around
|
||||
*
|
||||
* Returns the number of additional EFI memmap entries required to
|
||||
* accomodate @range.
|
||||
* accommodate @range.
|
||||
*/
|
||||
int __init efi_memmap_split_count(efi_memory_desc_t *md, struct range *range)
|
||||
{
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -552,8 +552,7 @@ EXPORT_SYMBOL(drm_release_noglobal);
|
||||
* Since events are used by the KMS API for vblank and page flip completion this
|
||||
* means all modern display drivers must use it.
|
||||
*
|
||||
* @offset is ignored, DRM events are read like a pipe. Therefore drivers also
|
||||
* must set the &file_operation.llseek to no_llseek(). Polling support is
|
||||
* @offset is ignored, DRM events are read like a pipe. Polling support is
|
||||
* provided by drm_poll().
|
||||
*
|
||||
* This function will only ever read a full event. Therefore userspace must
|
||||
|
||||
@@ -216,8 +216,8 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
|
||||
* However...!
|
||||
*
|
||||
* The mmu-notifier can be invalidated for a
|
||||
* migrate_page, that is alreadying holding the lock
|
||||
* on the page. Such a try_to_unmap() will result
|
||||
* migrate_folio, that is alreadying holding the lock
|
||||
* on the folio. Such a try_to_unmap() will result
|
||||
* in us calling put_pages() and so recursively try
|
||||
* to lock the page. We avoid that deadlock with
|
||||
* a trylock_page() and in exchange we risk missing
|
||||
|
||||
@@ -1597,52 +1597,38 @@ static u32 applespi_notify(acpi_handle gpe_device, u32 gpe, void *context)
|
||||
|
||||
static int applespi_get_saved_bl_level(struct applespi_data *applespi)
|
||||
{
|
||||
struct efivar_entry *efivar_entry;
|
||||
efi_status_t sts = EFI_NOT_FOUND;
|
||||
u16 efi_data = 0;
|
||||
unsigned long efi_data_len;
|
||||
int sts;
|
||||
unsigned long efi_data_len = sizeof(efi_data);
|
||||
|
||||
efivar_entry = kmalloc(sizeof(*efivar_entry), GFP_KERNEL);
|
||||
if (!efivar_entry)
|
||||
return -ENOMEM;
|
||||
|
||||
memcpy(efivar_entry->var.VariableName, EFI_BL_LEVEL_NAME,
|
||||
sizeof(EFI_BL_LEVEL_NAME));
|
||||
efivar_entry->var.VendorGuid = EFI_BL_LEVEL_GUID;
|
||||
efi_data_len = sizeof(efi_data);
|
||||
|
||||
sts = efivar_entry_get(efivar_entry, NULL, &efi_data_len, &efi_data);
|
||||
if (sts && sts != -ENOENT)
|
||||
if (efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE))
|
||||
sts = efi.get_variable(EFI_BL_LEVEL_NAME, &EFI_BL_LEVEL_GUID,
|
||||
NULL, &efi_data_len, &efi_data);
|
||||
if (sts != EFI_SUCCESS && sts != EFI_NOT_FOUND)
|
||||
dev_warn(&applespi->spi->dev,
|
||||
"Error getting backlight level from EFI vars: %d\n",
|
||||
"Error getting backlight level from EFI vars: 0x%lx\n",
|
||||
sts);
|
||||
|
||||
kfree(efivar_entry);
|
||||
|
||||
return sts ? sts : efi_data;
|
||||
return sts != EFI_SUCCESS ? -ENODEV : efi_data;
|
||||
}
|
||||
|
||||
static void applespi_save_bl_level(struct applespi_data *applespi,
|
||||
unsigned int level)
|
||||
{
|
||||
efi_guid_t efi_guid;
|
||||
efi_status_t sts = EFI_UNSUPPORTED;
|
||||
u32 efi_attr;
|
||||
unsigned long efi_data_len;
|
||||
u16 efi_data;
|
||||
int sts;
|
||||
|
||||
/* Save keyboard backlight level */
|
||||
efi_guid = EFI_BL_LEVEL_GUID;
|
||||
efi_data = (u16)level;
|
||||
efi_data_len = sizeof(efi_data);
|
||||
efi_attr = EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_BOOTSERVICE_ACCESS |
|
||||
EFI_VARIABLE_RUNTIME_ACCESS;
|
||||
|
||||
sts = efivar_entry_set_safe((efi_char16_t *)EFI_BL_LEVEL_NAME, efi_guid,
|
||||
efi_attr, true, efi_data_len, &efi_data);
|
||||
if (sts)
|
||||
if (efi_rt_services_supported(EFI_RT_SUPPORTED_SET_VARIABLE))
|
||||
sts = efi.set_variable(EFI_BL_LEVEL_NAME, &EFI_BL_LEVEL_GUID,
|
||||
efi_attr, sizeof(efi_data), &efi_data);
|
||||
if (sts != EFI_SUCCESS)
|
||||
dev_warn(&applespi->spi->dev,
|
||||
"Error saving backlight level to EFI vars: %d\n", sts);
|
||||
"Error saving backlight level to EFI vars: 0x%lx\n", sts);
|
||||
}
|
||||
|
||||
static int applespi_probe(struct spi_device *spi)
|
||||
|
||||
@@ -29,8 +29,6 @@
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/pseudo_fs.h>
|
||||
#include <linux/balloon_compaction.h>
|
||||
#include <linux/vmw_vmci_defs.h>
|
||||
#include <linux/vmw_vmci_api.h>
|
||||
@@ -1730,20 +1728,6 @@ static inline void vmballoon_debugfs_exit(struct vmballoon *b)
|
||||
|
||||
|
||||
#ifdef CONFIG_BALLOON_COMPACTION
|
||||
|
||||
static int vmballoon_init_fs_context(struct fs_context *fc)
|
||||
{
|
||||
return init_pseudo(fc, BALLOON_VMW_MAGIC) ? 0 : -ENOMEM;
|
||||
}
|
||||
|
||||
static struct file_system_type vmballoon_fs = {
|
||||
.name = "balloon-vmware",
|
||||
.init_fs_context = vmballoon_init_fs_context,
|
||||
.kill_sb = kill_anon_super,
|
||||
};
|
||||
|
||||
static struct vfsmount *vmballoon_mnt;
|
||||
|
||||
/**
|
||||
* vmballoon_migratepage() - migrates a balloon page.
|
||||
* @b_dev_info: balloon device information descriptor.
|
||||
@@ -1862,21 +1846,6 @@ out_unlock:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* vmballoon_compaction_deinit() - removes compaction related data.
|
||||
*
|
||||
* @b: pointer to the balloon.
|
||||
*/
|
||||
static void vmballoon_compaction_deinit(struct vmballoon *b)
|
||||
{
|
||||
if (!IS_ERR(b->b_dev_info.inode))
|
||||
iput(b->b_dev_info.inode);
|
||||
|
||||
b->b_dev_info.inode = NULL;
|
||||
kern_unmount(vmballoon_mnt);
|
||||
vmballoon_mnt = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* vmballoon_compaction_init() - initialized compaction for the balloon.
|
||||
*
|
||||
@@ -1888,33 +1857,15 @@ static void vmballoon_compaction_deinit(struct vmballoon *b)
|
||||
*
|
||||
* Return: zero on success or error code on failure.
|
||||
*/
|
||||
static __init int vmballoon_compaction_init(struct vmballoon *b)
|
||||
static __init void vmballoon_compaction_init(struct vmballoon *b)
|
||||
{
|
||||
vmballoon_mnt = kern_mount(&vmballoon_fs);
|
||||
if (IS_ERR(vmballoon_mnt))
|
||||
return PTR_ERR(vmballoon_mnt);
|
||||
|
||||
b->b_dev_info.migratepage = vmballoon_migratepage;
|
||||
b->b_dev_info.inode = alloc_anon_inode(vmballoon_mnt->mnt_sb);
|
||||
|
||||
if (IS_ERR(b->b_dev_info.inode))
|
||||
return PTR_ERR(b->b_dev_info.inode);
|
||||
|
||||
b->b_dev_info.inode->i_mapping->a_ops = &balloon_aops;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else /* CONFIG_BALLOON_COMPACTION */
|
||||
|
||||
static void vmballoon_compaction_deinit(struct vmballoon *b)
|
||||
static inline void vmballoon_compaction_init(struct vmballoon *b)
|
||||
{
|
||||
}
|
||||
|
||||
static int vmballoon_compaction_init(struct vmballoon *b)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_BALLOON_COMPACTION */
|
||||
|
||||
static int __init vmballoon_init(void)
|
||||
@@ -1939,9 +1890,7 @@ static int __init vmballoon_init(void)
|
||||
* balloon_devinfo_init() .
|
||||
*/
|
||||
balloon_devinfo_init(&balloon.b_dev_info);
|
||||
error = vmballoon_compaction_init(&balloon);
|
||||
if (error)
|
||||
goto fail;
|
||||
vmballoon_compaction_init(&balloon);
|
||||
|
||||
INIT_LIST_HEAD(&balloon.huge_pages);
|
||||
spin_lock_init(&balloon.comm_lock);
|
||||
@@ -1958,7 +1907,6 @@ static int __init vmballoon_init(void)
|
||||
return 0;
|
||||
fail:
|
||||
vmballoon_unregister_shrinker(&balloon);
|
||||
vmballoon_compaction_deinit(&balloon);
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -1985,8 +1933,5 @@ static void __exit vmballoon_exit(void)
|
||||
*/
|
||||
vmballoon_send_start(&balloon, 0);
|
||||
vmballoon_pop(&balloon);
|
||||
|
||||
/* Only once we popped the balloon, compaction can be deinit */
|
||||
vmballoon_compaction_deinit(&balloon);
|
||||
}
|
||||
module_exit(vmballoon_exit);
|
||||
|
||||
@@ -459,43 +459,34 @@ static void brcmf_fw_fix_efi_nvram_ccode(char *data, unsigned long data_len)
|
||||
|
||||
static u8 *brcmf_fw_nvram_from_efi(size_t *data_len_ret)
|
||||
{
|
||||
const u16 name[] = { 'n', 'v', 'r', 'a', 'm', 0 };
|
||||
struct efivar_entry *nvram_efivar;
|
||||
efi_guid_t guid = EFI_GUID(0x74b00bd9, 0x805a, 0x4d61, 0xb5, 0x1f,
|
||||
0x43, 0x26, 0x81, 0x23, 0xd1, 0x13);
|
||||
unsigned long data_len = 0;
|
||||
efi_status_t status;
|
||||
u8 *data = NULL;
|
||||
int err;
|
||||
|
||||
nvram_efivar = kzalloc(sizeof(*nvram_efivar), GFP_KERNEL);
|
||||
if (!nvram_efivar)
|
||||
if (!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE))
|
||||
return NULL;
|
||||
|
||||
memcpy(&nvram_efivar->var.VariableName, name, sizeof(name));
|
||||
nvram_efivar->var.VendorGuid = EFI_GUID(0x74b00bd9, 0x805a, 0x4d61,
|
||||
0xb5, 0x1f, 0x43, 0x26,
|
||||
0x81, 0x23, 0xd1, 0x13);
|
||||
|
||||
err = efivar_entry_size(nvram_efivar, &data_len);
|
||||
if (err)
|
||||
status = efi.get_variable(L"nvram", &guid, NULL, &data_len, NULL);
|
||||
if (status != EFI_BUFFER_TOO_SMALL)
|
||||
goto fail;
|
||||
|
||||
data = kmalloc(data_len, GFP_KERNEL);
|
||||
if (!data)
|
||||
goto fail;
|
||||
|
||||
err = efivar_entry_get(nvram_efivar, NULL, &data_len, data);
|
||||
if (err)
|
||||
status = efi.get_variable(L"nvram", &guid, NULL, &data_len, data);
|
||||
if (status != EFI_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
brcmf_fw_fix_efi_nvram_ccode(data, data_len);
|
||||
brcmf_info("Using nvram EFI variable\n");
|
||||
|
||||
kfree(nvram_efivar);
|
||||
*data_len_ret = data_len;
|
||||
return data;
|
||||
|
||||
fail:
|
||||
kfree(data);
|
||||
kfree(nvram_efivar);
|
||||
return NULL;
|
||||
}
|
||||
#else
|
||||
|
||||
@@ -19,20 +19,14 @@
|
||||
|
||||
void *iwl_uefi_get_pnvm(struct iwl_trans *trans, size_t *len)
|
||||
{
|
||||
struct efivar_entry *pnvm_efivar;
|
||||
void *data;
|
||||
unsigned long package_size;
|
||||
int err;
|
||||
efi_status_t status;
|
||||
|
||||
*len = 0;
|
||||
|
||||
pnvm_efivar = kzalloc(sizeof(*pnvm_efivar), GFP_KERNEL);
|
||||
if (!pnvm_efivar)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
memcpy(&pnvm_efivar->var.VariableName, IWL_UEFI_OEM_PNVM_NAME,
|
||||
sizeof(IWL_UEFI_OEM_PNVM_NAME));
|
||||
pnvm_efivar->var.VendorGuid = IWL_EFI_VAR_GUID;
|
||||
if (!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE))
|
||||
return ERR_PTR(-ENODEV);
|
||||
|
||||
/*
|
||||
* TODO: we hardcode a maximum length here, because reading
|
||||
@@ -42,27 +36,22 @@ void *iwl_uefi_get_pnvm(struct iwl_trans *trans, size_t *len)
|
||||
package_size = IWL_HARDCODED_PNVM_SIZE;
|
||||
|
||||
data = kmalloc(package_size, GFP_KERNEL);
|
||||
if (!data) {
|
||||
data = ERR_PTR(-ENOMEM);
|
||||
goto out;
|
||||
}
|
||||
if (!data)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
err = efivar_entry_get(pnvm_efivar, NULL, &package_size, data);
|
||||
if (err) {
|
||||
status = efi.get_variable(IWL_UEFI_OEM_PNVM_NAME, &IWL_EFI_VAR_GUID,
|
||||
NULL, &package_size, data);
|
||||
if (status != EFI_SUCCESS) {
|
||||
IWL_DEBUG_FW(trans,
|
||||
"PNVM UEFI variable not found %d (len %lu)\n",
|
||||
err, package_size);
|
||||
"PNVM UEFI variable not found 0x%lx (len %lu)\n",
|
||||
status, package_size);
|
||||
kfree(data);
|
||||
data = ERR_PTR(err);
|
||||
goto out;
|
||||
return ERR_PTR(-ENOENT);
|
||||
}
|
||||
|
||||
IWL_DEBUG_FW(trans, "Read PNVM from UEFI with size %lu\n", package_size);
|
||||
*len = package_size;
|
||||
|
||||
out:
|
||||
kfree(pnvm_efivar);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
@@ -211,21 +200,15 @@ static void *iwl_uefi_reduce_power_parse(struct iwl_trans *trans,
|
||||
|
||||
void *iwl_uefi_get_reduced_power(struct iwl_trans *trans, size_t *len)
|
||||
{
|
||||
struct efivar_entry *reduce_power_efivar;
|
||||
struct pnvm_sku_package *package;
|
||||
void *data = NULL;
|
||||
unsigned long package_size;
|
||||
int err;
|
||||
efi_status_t status;
|
||||
|
||||
*len = 0;
|
||||
|
||||
reduce_power_efivar = kzalloc(sizeof(*reduce_power_efivar), GFP_KERNEL);
|
||||
if (!reduce_power_efivar)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
memcpy(&reduce_power_efivar->var.VariableName, IWL_UEFI_REDUCED_POWER_NAME,
|
||||
sizeof(IWL_UEFI_REDUCED_POWER_NAME));
|
||||
reduce_power_efivar->var.VendorGuid = IWL_EFI_VAR_GUID;
|
||||
if (!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE))
|
||||
return ERR_PTR(-ENODEV);
|
||||
|
||||
/*
|
||||
* TODO: we hardcode a maximum length here, because reading
|
||||
@@ -235,19 +218,17 @@ void *iwl_uefi_get_reduced_power(struct iwl_trans *trans, size_t *len)
|
||||
package_size = IWL_HARDCODED_REDUCE_POWER_SIZE;
|
||||
|
||||
package = kmalloc(package_size, GFP_KERNEL);
|
||||
if (!package) {
|
||||
package = ERR_PTR(-ENOMEM);
|
||||
goto out;
|
||||
}
|
||||
if (!package)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
err = efivar_entry_get(reduce_power_efivar, NULL, &package_size, package);
|
||||
if (err) {
|
||||
status = efi.get_variable(IWL_UEFI_REDUCED_POWER_NAME, &IWL_EFI_VAR_GUID,
|
||||
NULL, &package_size, data);
|
||||
if (status != EFI_SUCCESS) {
|
||||
IWL_DEBUG_FW(trans,
|
||||
"Reduced Power UEFI variable not found %d (len %lu)\n",
|
||||
err, package_size);
|
||||
"Reduced Power UEFI variable not found 0x%lx (len %lu)\n",
|
||||
status, package_size);
|
||||
kfree(package);
|
||||
data = ERR_PTR(err);
|
||||
goto out;
|
||||
return ERR_PTR(-ENOENT);
|
||||
}
|
||||
|
||||
IWL_DEBUG_FW(trans, "Read reduced power from UEFI with size %lu\n",
|
||||
@@ -262,9 +243,6 @@ void *iwl_uefi_get_reduced_power(struct iwl_trans *trans, size_t *len)
|
||||
|
||||
kfree(package);
|
||||
|
||||
out:
|
||||
kfree(reduce_power_efivar);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
@@ -304,22 +282,15 @@ static int iwl_uefi_sgom_parse(struct uefi_cnv_wlan_sgom_data *sgom_data,
|
||||
void iwl_uefi_get_sgom_table(struct iwl_trans *trans,
|
||||
struct iwl_fw_runtime *fwrt)
|
||||
{
|
||||
struct efivar_entry *sgom_efivar;
|
||||
struct uefi_cnv_wlan_sgom_data *data;
|
||||
unsigned long package_size;
|
||||
int err, ret;
|
||||
efi_status_t status;
|
||||
int ret;
|
||||
|
||||
if (!fwrt->geo_enabled)
|
||||
if (!fwrt->geo_enabled ||
|
||||
!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE))
|
||||
return;
|
||||
|
||||
sgom_efivar = kzalloc(sizeof(*sgom_efivar), GFP_KERNEL);
|
||||
if (!sgom_efivar)
|
||||
return;
|
||||
|
||||
memcpy(&sgom_efivar->var.VariableName, IWL_UEFI_SGOM_NAME,
|
||||
sizeof(IWL_UEFI_SGOM_NAME));
|
||||
sgom_efivar->var.VendorGuid = IWL_EFI_VAR_GUID;
|
||||
|
||||
/* TODO: we hardcode a maximum length here, because reading
|
||||
* from the UEFI is not working. To implement this properly,
|
||||
* we have to call efivar_entry_size().
|
||||
@@ -327,15 +298,14 @@ void iwl_uefi_get_sgom_table(struct iwl_trans *trans,
|
||||
package_size = IWL_HARDCODED_SGOM_SIZE;
|
||||
|
||||
data = kmalloc(package_size, GFP_KERNEL);
|
||||
if (!data) {
|
||||
data = ERR_PTR(-ENOMEM);
|
||||
goto out;
|
||||
}
|
||||
if (!data)
|
||||
return;
|
||||
|
||||
err = efivar_entry_get(sgom_efivar, NULL, &package_size, data);
|
||||
if (err) {
|
||||
status = efi.get_variable(IWL_UEFI_SGOM_NAME, &IWL_EFI_VAR_GUID,
|
||||
NULL, &package_size, data);
|
||||
if (status != EFI_SUCCESS) {
|
||||
IWL_DEBUG_FW(trans,
|
||||
"SGOM UEFI variable not found %d\n", err);
|
||||
"SGOM UEFI variable not found 0x%lx\n", status);
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
@@ -349,8 +319,6 @@ void iwl_uefi_get_sgom_table(struct iwl_trans *trans,
|
||||
out_free:
|
||||
kfree(data);
|
||||
|
||||
out:
|
||||
kfree(sgom_efivar);
|
||||
}
|
||||
IWL_EXPORT_SYMBOL(iwl_uefi_get_sgom_table);
|
||||
#endif /* CONFIG_ACPI */
|
||||
|
||||
@@ -112,7 +112,7 @@ static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
|
||||
|
||||
iocb->ki_pos = pos;
|
||||
iocb->ki_filp = req->ns->file;
|
||||
iocb->ki_flags = ki_flags | iocb_flags(req->ns->file);
|
||||
iocb->ki_flags = ki_flags | iocb->ki_filp->f_iocb_flags;
|
||||
|
||||
return call_iter(iocb, &iter);
|
||||
}
|
||||
|
||||
@@ -1284,7 +1284,7 @@ static int gmin_get_config_var(struct device *maindev,
|
||||
const struct dmi_system_id *id;
|
||||
struct device *dev = maindev;
|
||||
char var8[CFG_VAR_NAME_MAX];
|
||||
struct efivar_entry *ev;
|
||||
efi_status_t status;
|
||||
int i, ret;
|
||||
|
||||
/* For sensors, try first to use the _DSM table */
|
||||
@@ -1326,24 +1326,11 @@ static int gmin_get_config_var(struct device *maindev,
|
||||
for (i = 0; i < sizeof(var8) && var8[i]; i++)
|
||||
var16[i] = var8[i];
|
||||
|
||||
/* Not sure this API usage is kosher; efivar_entry_get()'s
|
||||
* implementation simply uses VariableName and VendorGuid from
|
||||
* the struct and ignores the rest, but it seems like there
|
||||
* ought to be an "official" efivar_entry registered
|
||||
* somewhere?
|
||||
*/
|
||||
ev = kzalloc(sizeof(*ev), GFP_KERNEL);
|
||||
if (!ev)
|
||||
return -ENOMEM;
|
||||
memcpy(&ev->var.VariableName, var16, sizeof(var16));
|
||||
ev->var.VendorGuid = GMIN_CFG_VAR_EFI_GUID;
|
||||
ev->var.DataSize = *out_len;
|
||||
|
||||
ret = efivar_entry_get(ev, &ev->var.Attributes,
|
||||
&ev->var.DataSize, ev->var.Data);
|
||||
if (ret == 0) {
|
||||
memcpy(out, ev->var.Data, ev->var.DataSize);
|
||||
*out_len = ev->var.DataSize;
|
||||
status = EFI_UNSUPPORTED;
|
||||
if (efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE))
|
||||
status = efi.get_variable(var16, &GMIN_CFG_VAR_EFI_GUID, NULL,
|
||||
(unsigned long *)out_len, out);
|
||||
if (status == EFI_SUCCESS) {
|
||||
dev_info(maindev, "found EFI entry for '%s'\n", var8);
|
||||
} else if (is_gmin) {
|
||||
dev_info(maindev, "Failed to find EFI gmin variable %s\n", var8);
|
||||
@@ -1351,8 +1338,6 @@ static int gmin_get_config_var(struct device *maindev,
|
||||
dev_info(maindev, "Failed to find EFI variable %s\n", var8);
|
||||
}
|
||||
|
||||
kfree(ev);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@@ -1132,7 +1132,7 @@ static struct file *vfio_device_open(struct vfio_device *device)
|
||||
* Appears to be missing by lack of need rather than
|
||||
* explicitly prevented. Now there's need.
|
||||
*/
|
||||
filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
|
||||
filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE);
|
||||
|
||||
if (device->group->type == VFIO_NO_IOMMU)
|
||||
dev_warn(device->dev, "vfio-noiommu device opened by user "
|
||||
|
||||
@@ -17,9 +17,6 @@
|
||||
#include <linux/oom.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/magic.h>
|
||||
#include <linux/pseudo_fs.h>
|
||||
#include <linux/page_reporting.h>
|
||||
|
||||
/*
|
||||
@@ -42,10 +39,6 @@
|
||||
(1 << (VIRTIO_BALLOON_HINT_BLOCK_ORDER + PAGE_SHIFT))
|
||||
#define VIRTIO_BALLOON_HINT_BLOCK_PAGES (1 << VIRTIO_BALLOON_HINT_BLOCK_ORDER)
|
||||
|
||||
#ifdef CONFIG_BALLOON_COMPACTION
|
||||
static struct vfsmount *balloon_mnt;
|
||||
#endif
|
||||
|
||||
enum virtio_balloon_vq {
|
||||
VIRTIO_BALLOON_VQ_INFLATE,
|
||||
VIRTIO_BALLOON_VQ_DEFLATE,
|
||||
@@ -805,18 +798,6 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info,
|
||||
|
||||
return MIGRATEPAGE_SUCCESS;
|
||||
}
|
||||
|
||||
static int balloon_init_fs_context(struct fs_context *fc)
|
||||
{
|
||||
return init_pseudo(fc, BALLOON_KVM_MAGIC) ? 0 : -ENOMEM;
|
||||
}
|
||||
|
||||
static struct file_system_type balloon_fs = {
|
||||
.name = "balloon-kvm",
|
||||
.init_fs_context = balloon_init_fs_context,
|
||||
.kill_sb = kill_anon_super,
|
||||
};
|
||||
|
||||
#endif /* CONFIG_BALLOON_COMPACTION */
|
||||
|
||||
static unsigned long shrink_free_pages(struct virtio_balloon *vb,
|
||||
@@ -909,19 +890,7 @@ static int virtballoon_probe(struct virtio_device *vdev)
|
||||
goto out_free_vb;
|
||||
|
||||
#ifdef CONFIG_BALLOON_COMPACTION
|
||||
balloon_mnt = kern_mount(&balloon_fs);
|
||||
if (IS_ERR(balloon_mnt)) {
|
||||
err = PTR_ERR(balloon_mnt);
|
||||
goto out_del_vqs;
|
||||
}
|
||||
|
||||
vb->vb_dev_info.migratepage = virtballoon_migratepage;
|
||||
vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
|
||||
if (IS_ERR(vb->vb_dev_info.inode)) {
|
||||
err = PTR_ERR(vb->vb_dev_info.inode);
|
||||
goto out_kern_unmount;
|
||||
}
|
||||
vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops;
|
||||
#endif
|
||||
if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
|
||||
/*
|
||||
@@ -930,13 +899,13 @@ static int virtballoon_probe(struct virtio_device *vdev)
|
||||
*/
|
||||
if (virtqueue_get_vring_size(vb->free_page_vq) < 2) {
|
||||
err = -ENOSPC;
|
||||
goto out_iput;
|
||||
goto out_del_vqs;
|
||||
}
|
||||
vb->balloon_wq = alloc_workqueue("balloon-wq",
|
||||
WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0);
|
||||
if (!vb->balloon_wq) {
|
||||
err = -ENOMEM;
|
||||
goto out_iput;
|
||||
goto out_del_vqs;
|
||||
}
|
||||
INIT_WORK(&vb->report_free_page_work, report_free_page_func);
|
||||
vb->cmd_id_received_cache = VIRTIO_BALLOON_CMD_ID_STOP;
|
||||
@@ -1030,13 +999,7 @@ out_unregister_shrinker:
|
||||
out_del_balloon_wq:
|
||||
if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
|
||||
destroy_workqueue(vb->balloon_wq);
|
||||
out_iput:
|
||||
#ifdef CONFIG_BALLOON_COMPACTION
|
||||
iput(vb->vb_dev_info.inode);
|
||||
out_kern_unmount:
|
||||
kern_unmount(balloon_mnt);
|
||||
out_del_vqs:
|
||||
#endif
|
||||
vdev->config->del_vqs(vdev);
|
||||
out_free_vb:
|
||||
kfree(vb);
|
||||
@@ -1083,12 +1046,6 @@ static void virtballoon_remove(struct virtio_device *vdev)
|
||||
}
|
||||
|
||||
remove_common(vb);
|
||||
#ifdef CONFIG_BALLOON_COMPACTION
|
||||
if (vb->vb_dev_info.inode)
|
||||
iput(vb->vb_dev_info.inode);
|
||||
|
||||
kern_unmount(balloon_mnt);
|
||||
#endif
|
||||
kfree(vb);
|
||||
}
|
||||
|
||||
|
||||
@@ -526,7 +526,6 @@ affs_do_readpage_ofs(struct page *page, unsigned to, int create)
|
||||
struct inode *inode = page->mapping->host;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct buffer_head *bh;
|
||||
char *data;
|
||||
unsigned pos = 0;
|
||||
u32 bidx, boff, bsize;
|
||||
u32 tmp;
|
||||
@@ -545,15 +544,12 @@ affs_do_readpage_ofs(struct page *page, unsigned to, int create)
|
||||
return PTR_ERR(bh);
|
||||
tmp = min(bsize - boff, to - pos);
|
||||
BUG_ON(pos + tmp > to || tmp > bsize);
|
||||
data = kmap_atomic(page);
|
||||
memcpy(data + pos, AFFS_DATA(bh) + boff, tmp);
|
||||
kunmap_atomic(data);
|
||||
memcpy_to_page(page, pos, AFFS_DATA(bh) + boff, tmp);
|
||||
affs_brelse(bh);
|
||||
bidx++;
|
||||
pos += tmp;
|
||||
boff = 0;
|
||||
}
|
||||
flush_dcache_page(page);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -132,12 +132,6 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
|
||||
if (IS_ERR(page))
|
||||
return PTR_ERR(page);
|
||||
|
||||
if (PageError(page)) {
|
||||
ret = afs_bad(AFS_FS_I(d_inode(mntpt)), afs_file_error_mntpt);
|
||||
put_page(page);
|
||||
return ret;
|
||||
}
|
||||
|
||||
buf = kmap(page);
|
||||
ret = -EINVAL;
|
||||
if (buf[size - 1] == '.')
|
||||
|
||||
38
fs/aio.c
38
fs/aio.c
@@ -400,8 +400,8 @@ static const struct file_operations aio_ring_fops = {
|
||||
};
|
||||
|
||||
#if IS_ENABLED(CONFIG_MIGRATION)
|
||||
static int aio_migratepage(struct address_space *mapping, struct page *new,
|
||||
struct page *old, enum migrate_mode mode)
|
||||
static int aio_migrate_folio(struct address_space *mapping, struct folio *dst,
|
||||
struct folio *src, enum migrate_mode mode)
|
||||
{
|
||||
struct kioctx *ctx;
|
||||
unsigned long flags;
|
||||
@@ -435,10 +435,10 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
|
||||
goto out;
|
||||
}
|
||||
|
||||
idx = old->index;
|
||||
idx = src->index;
|
||||
if (idx < (pgoff_t)ctx->nr_pages) {
|
||||
/* Make sure the old page hasn't already been changed */
|
||||
if (ctx->ring_pages[idx] != old)
|
||||
/* Make sure the old folio hasn't already been changed */
|
||||
if (ctx->ring_pages[idx] != &src->page)
|
||||
rc = -EAGAIN;
|
||||
} else
|
||||
rc = -EINVAL;
|
||||
@@ -447,27 +447,27 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
|
||||
goto out_unlock;
|
||||
|
||||
/* Writeback must be complete */
|
||||
BUG_ON(PageWriteback(old));
|
||||
get_page(new);
|
||||
BUG_ON(folio_test_writeback(src));
|
||||
folio_get(dst);
|
||||
|
||||
rc = migrate_page_move_mapping(mapping, new, old, 1);
|
||||
rc = folio_migrate_mapping(mapping, dst, src, 1);
|
||||
if (rc != MIGRATEPAGE_SUCCESS) {
|
||||
put_page(new);
|
||||
folio_put(dst);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/* Take completion_lock to prevent other writes to the ring buffer
|
||||
* while the old page is copied to the new. This prevents new
|
||||
* while the old folio is copied to the new. This prevents new
|
||||
* events from being lost.
|
||||
*/
|
||||
spin_lock_irqsave(&ctx->completion_lock, flags);
|
||||
migrate_page_copy(new, old);
|
||||
BUG_ON(ctx->ring_pages[idx] != old);
|
||||
ctx->ring_pages[idx] = new;
|
||||
folio_migrate_copy(dst, src);
|
||||
BUG_ON(ctx->ring_pages[idx] != &src->page);
|
||||
ctx->ring_pages[idx] = &dst->page;
|
||||
spin_unlock_irqrestore(&ctx->completion_lock, flags);
|
||||
|
||||
/* The old page is no longer accessible. */
|
||||
put_page(old);
|
||||
/* The old folio is no longer accessible. */
|
||||
folio_put(src);
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&ctx->ring_lock);
|
||||
@@ -475,13 +475,13 @@ out:
|
||||
spin_unlock(&mapping->private_lock);
|
||||
return rc;
|
||||
}
|
||||
#else
|
||||
#define aio_migrate_folio NULL
|
||||
#endif
|
||||
|
||||
static const struct address_space_operations aio_ctx_aops = {
|
||||
.dirty_folio = noop_dirty_folio,
|
||||
#if IS_ENABLED(CONFIG_MIGRATION)
|
||||
.migratepage = aio_migratepage,
|
||||
#endif
|
||||
.migrate_folio = aio_migrate_folio,
|
||||
};
|
||||
|
||||
static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
|
||||
@@ -1475,7 +1475,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
|
||||
req->ki_complete = aio_complete_rw;
|
||||
req->private = NULL;
|
||||
req->ki_pos = iocb->aio_offset;
|
||||
req->ki_flags = iocb_flags(req->ki_filp);
|
||||
req->ki_flags = req->ki_filp->f_iocb_flags;
|
||||
if (iocb->aio_flags & IOCB_FLAG_RESFD)
|
||||
req->ki_flags |= IOCB_EVENTFD;
|
||||
if (iocb->aio_flags & IOCB_FLAG_IOPRIO) {
|
||||
|
||||
@@ -108,8 +108,7 @@ static const struct export_operations befs_export_operations = {
|
||||
* passes it the address of befs_get_block, for mapping file
|
||||
* positions to disk blocks.
|
||||
*/
|
||||
static int
|
||||
befs_read_folio(struct file *file, struct folio *folio)
|
||||
static int befs_read_folio(struct file *file, struct folio *folio)
|
||||
{
|
||||
return block_read_full_folio(folio, befs_get_block);
|
||||
}
|
||||
@@ -470,13 +469,12 @@ befs_destroy_inodecache(void)
|
||||
*/
|
||||
static int befs_symlink_read_folio(struct file *unused, struct folio *folio)
|
||||
{
|
||||
struct page *page = &folio->page;
|
||||
struct inode *inode = page->mapping->host;
|
||||
struct inode *inode = folio->mapping->host;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct befs_inode_info *befs_ino = BEFS_I(inode);
|
||||
befs_data_stream *data = &befs_ino->i_data.ds;
|
||||
befs_off_t len = data->size;
|
||||
char *link = page_address(page);
|
||||
char *link = folio_address(folio);
|
||||
|
||||
if (len == 0 || len > PAGE_SIZE) {
|
||||
befs_error(sb, "Long symlink with illegal length");
|
||||
@@ -489,12 +487,12 @@ static int befs_symlink_read_folio(struct file *unused, struct folio *folio)
|
||||
goto fail;
|
||||
}
|
||||
link[len - 1] = '\0';
|
||||
SetPageUptodate(page);
|
||||
unlock_page(page);
|
||||
folio_mark_uptodate(folio);
|
||||
folio_unlock(folio);
|
||||
return 0;
|
||||
fail:
|
||||
SetPageError(page);
|
||||
unlock_page(page);
|
||||
folio_set_error(folio);
|
||||
folio_unlock(folio);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
|
||||
@@ -13,7 +13,6 @@ struct btrfs_fs_info;
|
||||
struct btrfs_workqueue;
|
||||
struct btrfs_work;
|
||||
typedef void (*btrfs_func_t)(struct btrfs_work *arg);
|
||||
typedef void (*btrfs_work_func_t)(struct work_struct *arg);
|
||||
|
||||
struct btrfs_work {
|
||||
btrfs_func_t func;
|
||||
|
||||
@@ -2028,10 +2028,29 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx)
|
||||
{
|
||||
struct btrfs_data_container *inodes = ctx;
|
||||
const size_t c = 3 * sizeof(u64);
|
||||
|
||||
if (inodes->bytes_left >= c) {
|
||||
inodes->bytes_left -= c;
|
||||
inodes->val[inodes->elem_cnt] = inum;
|
||||
inodes->val[inodes->elem_cnt + 1] = offset;
|
||||
inodes->val[inodes->elem_cnt + 2] = root;
|
||||
inodes->elem_cnt += 3;
|
||||
} else {
|
||||
inodes->bytes_missing += c - inodes->bytes_left;
|
||||
inodes->bytes_left = 0;
|
||||
inodes->elem_missed += 3;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_path *path,
|
||||
iterate_extent_inodes_t *iterate, void *ctx,
|
||||
bool ignore_offset)
|
||||
void *ctx, bool ignore_offset)
|
||||
{
|
||||
int ret;
|
||||
u64 extent_item_pos;
|
||||
@@ -2049,17 +2068,15 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
|
||||
extent_item_pos = logical - found_key.objectid;
|
||||
ret = iterate_extent_inodes(fs_info, found_key.objectid,
|
||||
extent_item_pos, search_commit_root,
|
||||
iterate, ctx, ignore_offset);
|
||||
build_ino_list, ctx, ignore_offset);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
typedef int (iterate_irefs_t)(u64 parent, u32 name_len, unsigned long name_off,
|
||||
struct extent_buffer *eb, void *ctx);
|
||||
static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off,
|
||||
struct extent_buffer *eb, struct inode_fs_paths *ipath);
|
||||
|
||||
static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
|
||||
struct btrfs_path *path,
|
||||
iterate_irefs_t *iterate, void *ctx)
|
||||
static int iterate_inode_refs(u64 inum, struct inode_fs_paths *ipath)
|
||||
{
|
||||
int ret = 0;
|
||||
int slot;
|
||||
@@ -2068,6 +2085,8 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
|
||||
u32 name_len;
|
||||
u64 parent = 0;
|
||||
int found = 0;
|
||||
struct btrfs_root *fs_root = ipath->fs_root;
|
||||
struct btrfs_path *path = ipath->btrfs_path;
|
||||
struct extent_buffer *eb;
|
||||
struct btrfs_inode_ref *iref;
|
||||
struct btrfs_key found_key;
|
||||
@@ -2103,8 +2122,8 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
|
||||
"following ref at offset %u for inode %llu in tree %llu",
|
||||
cur, found_key.objectid,
|
||||
fs_root->root_key.objectid);
|
||||
ret = iterate(parent, name_len,
|
||||
(unsigned long)(iref + 1), eb, ctx);
|
||||
ret = inode_to_path(parent, name_len,
|
||||
(unsigned long)(iref + 1), eb, ipath);
|
||||
if (ret)
|
||||
break;
|
||||
len = sizeof(*iref) + name_len;
|
||||
@@ -2118,15 +2137,15 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
|
||||
struct btrfs_path *path,
|
||||
iterate_irefs_t *iterate, void *ctx)
|
||||
static int iterate_inode_extrefs(u64 inum, struct inode_fs_paths *ipath)
|
||||
{
|
||||
int ret;
|
||||
int slot;
|
||||
u64 offset = 0;
|
||||
u64 parent;
|
||||
int found = 0;
|
||||
struct btrfs_root *fs_root = ipath->fs_root;
|
||||
struct btrfs_path *path = ipath->btrfs_path;
|
||||
struct extent_buffer *eb;
|
||||
struct btrfs_inode_extref *extref;
|
||||
u32 item_size;
|
||||
@@ -2162,8 +2181,8 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
|
||||
extref = (struct btrfs_inode_extref *)(ptr + cur_offset);
|
||||
parent = btrfs_inode_extref_parent(eb, extref);
|
||||
name_len = btrfs_inode_extref_name_len(eb, extref);
|
||||
ret = iterate(parent, name_len,
|
||||
(unsigned long)&extref->name, eb, ctx);
|
||||
ret = inode_to_path(parent, name_len,
|
||||
(unsigned long)&extref->name, eb, ipath);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
@@ -2180,34 +2199,13 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
|
||||
struct btrfs_path *path, iterate_irefs_t *iterate,
|
||||
void *ctx)
|
||||
{
|
||||
int ret;
|
||||
int found_refs = 0;
|
||||
|
||||
ret = iterate_inode_refs(inum, fs_root, path, iterate, ctx);
|
||||
if (!ret)
|
||||
++found_refs;
|
||||
else if (ret != -ENOENT)
|
||||
return ret;
|
||||
|
||||
ret = iterate_inode_extrefs(inum, fs_root, path, iterate, ctx);
|
||||
if (ret == -ENOENT && found_refs)
|
||||
return 0;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* returns 0 if the path could be dumped (probably truncated)
|
||||
* returns <0 in case of an error
|
||||
*/
|
||||
static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off,
|
||||
struct extent_buffer *eb, void *ctx)
|
||||
struct extent_buffer *eb, struct inode_fs_paths *ipath)
|
||||
{
|
||||
struct inode_fs_paths *ipath = ctx;
|
||||
char *fspath;
|
||||
char *fspath_min;
|
||||
int i = ipath->fspath->elem_cnt;
|
||||
@@ -2248,8 +2246,20 @@ static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off,
|
||||
*/
|
||||
int paths_from_inode(u64 inum, struct inode_fs_paths *ipath)
|
||||
{
|
||||
return iterate_irefs(inum, ipath->fs_root, ipath->btrfs_path,
|
||||
inode_to_path, ipath);
|
||||
int ret;
|
||||
int found_refs = 0;
|
||||
|
||||
ret = iterate_inode_refs(inum, ipath);
|
||||
if (!ret)
|
||||
++found_refs;
|
||||
else if (ret != -ENOENT)
|
||||
return ret;
|
||||
|
||||
ret = iterate_inode_extrefs(inum, ipath);
|
||||
if (ret == -ENOENT && found_refs)
|
||||
return 0;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct btrfs_data_container *init_data_container(u32 total_bytes)
|
||||
|
||||
@@ -35,8 +35,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
|
||||
bool ignore_offset);
|
||||
|
||||
int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_path *path,
|
||||
iterate_extent_inodes_t *iterate, void *ctx,
|
||||
struct btrfs_path *path, void *ctx,
|
||||
bool ignore_offset);
|
||||
|
||||
int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
|
||||
|
||||
@@ -1051,8 +1051,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
||||
< block_group->zone_unusable);
|
||||
WARN_ON(block_group->space_info->disk_total
|
||||
< block_group->length * factor);
|
||||
WARN_ON(block_group->zone_is_active &&
|
||||
block_group->space_info->active_total_bytes
|
||||
< block_group->length);
|
||||
}
|
||||
block_group->space_info->total_bytes -= block_group->length;
|
||||
if (block_group->zone_is_active)
|
||||
block_group->space_info->active_total_bytes -= block_group->length;
|
||||
block_group->space_info->bytes_readonly -=
|
||||
(block_group->length - block_group->zone_unusable);
|
||||
block_group->space_info->bytes_zone_unusable -=
|
||||
@@ -1816,11 +1821,10 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
|
||||
stripe_nr = physical - map->stripes[i].physical;
|
||||
stripe_nr = div64_u64_rem(stripe_nr, map->stripe_len, &offset);
|
||||
|
||||
if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
|
||||
if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
|
||||
BTRFS_BLOCK_GROUP_RAID10)) {
|
||||
stripe_nr = stripe_nr * map->num_stripes + i;
|
||||
stripe_nr = div_u64(stripe_nr, map->sub_stripes);
|
||||
} else if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
|
||||
stripe_nr = stripe_nr * map->num_stripes + i;
|
||||
}
|
||||
/*
|
||||
* The remaining case would be for RAID56, multiply by
|
||||
@@ -2108,7 +2112,8 @@ static int read_one_block_group(struct btrfs_fs_info *info,
|
||||
trace_btrfs_add_block_group(info, cache, 0);
|
||||
btrfs_update_space_info(info, cache->flags, cache->length,
|
||||
cache->used, cache->bytes_super,
|
||||
cache->zone_unusable, &space_info);
|
||||
cache->zone_unusable, cache->zone_is_active,
|
||||
&space_info);
|
||||
|
||||
cache->space_info = space_info;
|
||||
|
||||
@@ -2178,7 +2183,7 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
|
||||
}
|
||||
|
||||
btrfs_update_space_info(fs_info, bg->flags, em->len, em->len,
|
||||
0, 0, &space_info);
|
||||
0, 0, false, &space_info);
|
||||
bg->space_info = space_info;
|
||||
link_block_group(bg);
|
||||
|
||||
@@ -2559,7 +2564,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
|
||||
trace_btrfs_add_block_group(fs_info, cache, 1);
|
||||
btrfs_update_space_info(fs_info, cache->flags, size, bytes_used,
|
||||
cache->bytes_super, cache->zone_unusable,
|
||||
&cache->space_info);
|
||||
cache->zone_is_active, &cache->space_info);
|
||||
btrfs_update_global_block_rsv(fs_info);
|
||||
|
||||
link_block_group(cache);
|
||||
@@ -2659,6 +2664,14 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
|
||||
ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
/*
|
||||
* We have allocated a new chunk. We also need to activate that chunk to
|
||||
* grant metadata tickets for zoned filesystem.
|
||||
*/
|
||||
ret = btrfs_zoned_activate_one_bg(fs_info, cache->space_info, true);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = inc_block_group_ro(cache, 0);
|
||||
if (ret == -ETXTBSY)
|
||||
goto unlock_out;
|
||||
@@ -3761,6 +3774,7 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
|
||||
* attempt.
|
||||
*/
|
||||
wait_for_alloc = true;
|
||||
force = CHUNK_ALLOC_NO_FORCE;
|
||||
spin_unlock(&space_info->lock);
|
||||
mutex_lock(&fs_info->chunk_mutex);
|
||||
mutex_unlock(&fs_info->chunk_mutex);
|
||||
@@ -3883,6 +3897,14 @@ static void reserve_chunk_space(struct btrfs_trans_handle *trans,
|
||||
if (IS_ERR(bg)) {
|
||||
ret = PTR_ERR(bg);
|
||||
} else {
|
||||
/*
|
||||
* We have a new chunk. We also need to activate it for
|
||||
* zoned filesystem.
|
||||
*/
|
||||
ret = btrfs_zoned_activate_one_bg(fs_info, info, true);
|
||||
if (ret < 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If we fail to add the chunk item here, we end up
|
||||
* trying again at phase 2 of chunk allocation, at
|
||||
|
||||
@@ -118,7 +118,7 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
|
||||
if (block_rsv->reserved >= block_rsv->size) {
|
||||
num_bytes = block_rsv->reserved - block_rsv->size;
|
||||
block_rsv->reserved = block_rsv->size;
|
||||
block_rsv->full = 1;
|
||||
block_rsv->full = true;
|
||||
} else {
|
||||
num_bytes = 0;
|
||||
}
|
||||
@@ -142,7 +142,7 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
|
||||
bytes_to_add = min(num_bytes, bytes_to_add);
|
||||
dest->reserved += bytes_to_add;
|
||||
if (dest->reserved >= dest->size)
|
||||
dest->full = 1;
|
||||
dest->full = true;
|
||||
num_bytes -= bytes_to_add;
|
||||
}
|
||||
spin_unlock(&dest->lock);
|
||||
@@ -171,7 +171,7 @@ int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src,
|
||||
return 0;
|
||||
}
|
||||
|
||||
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
|
||||
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, enum btrfs_rsv_type type)
|
||||
{
|
||||
memset(rsv, 0, sizeof(*rsv));
|
||||
spin_lock_init(&rsv->lock);
|
||||
@@ -180,7 +180,7 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
|
||||
|
||||
void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *rsv,
|
||||
unsigned short type)
|
||||
enum btrfs_rsv_type type)
|
||||
{
|
||||
btrfs_init_block_rsv(rsv, type);
|
||||
rsv->space_info = btrfs_find_space_info(fs_info,
|
||||
@@ -188,7 +188,7 @@ void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
|
||||
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
|
||||
unsigned short type)
|
||||
enum btrfs_rsv_type type)
|
||||
{
|
||||
struct btrfs_block_rsv *block_rsv;
|
||||
|
||||
@@ -304,7 +304,7 @@ int btrfs_block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, u64 num_bytes)
|
||||
if (block_rsv->reserved >= num_bytes) {
|
||||
block_rsv->reserved -= num_bytes;
|
||||
if (block_rsv->reserved < block_rsv->size)
|
||||
block_rsv->full = 0;
|
||||
block_rsv->full = false;
|
||||
ret = 0;
|
||||
}
|
||||
spin_unlock(&block_rsv->lock);
|
||||
@@ -319,7 +319,7 @@ void btrfs_block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
|
||||
if (update_size)
|
||||
block_rsv->size += num_bytes;
|
||||
else if (block_rsv->reserved >= block_rsv->size)
|
||||
block_rsv->full = 1;
|
||||
block_rsv->full = true;
|
||||
spin_unlock(&block_rsv->lock);
|
||||
}
|
||||
|
||||
@@ -341,7 +341,7 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
global_rsv->reserved -= num_bytes;
|
||||
if (global_rsv->reserved < global_rsv->size)
|
||||
global_rsv->full = 0;
|
||||
global_rsv->full = false;
|
||||
spin_unlock(&global_rsv->lock);
|
||||
|
||||
btrfs_block_rsv_add_bytes(dest, num_bytes, true);
|
||||
@@ -408,10 +408,7 @@ void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info)
|
||||
btrfs_try_granting_tickets(fs_info, sinfo);
|
||||
}
|
||||
|
||||
if (block_rsv->reserved == block_rsv->size)
|
||||
block_rsv->full = 1;
|
||||
else
|
||||
block_rsv->full = 0;
|
||||
block_rsv->full = (block_rsv->reserved == block_rsv->size);
|
||||
|
||||
if (block_rsv->size >= sinfo->total_bytes)
|
||||
sinfo->force_alloc = CHUNK_ALLOC_FORCE;
|
||||
|
||||
@@ -9,7 +9,7 @@ enum btrfs_reserve_flush_enum;
|
||||
/*
|
||||
* Types of block reserves
|
||||
*/
|
||||
enum {
|
||||
enum btrfs_rsv_type {
|
||||
BTRFS_BLOCK_RSV_GLOBAL,
|
||||
BTRFS_BLOCK_RSV_DELALLOC,
|
||||
BTRFS_BLOCK_RSV_TRANS,
|
||||
@@ -25,9 +25,10 @@ struct btrfs_block_rsv {
|
||||
u64 reserved;
|
||||
struct btrfs_space_info *space_info;
|
||||
spinlock_t lock;
|
||||
unsigned short full;
|
||||
unsigned short type;
|
||||
unsigned short failfast;
|
||||
bool full;
|
||||
bool failfast;
|
||||
/* Block reserve type, one of BTRFS_BLOCK_RSV_* */
|
||||
enum btrfs_rsv_type type:8;
|
||||
|
||||
/*
|
||||
* Qgroup equivalent for @size @reserved
|
||||
@@ -49,13 +50,13 @@ struct btrfs_block_rsv {
|
||||
u64 qgroup_rsv_reserved;
|
||||
};
|
||||
|
||||
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
|
||||
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, enum btrfs_rsv_type type);
|
||||
void btrfs_init_root_block_rsv(struct btrfs_root *root);
|
||||
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
|
||||
unsigned short type);
|
||||
enum btrfs_rsv_type type);
|
||||
void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *rsv,
|
||||
unsigned short type);
|
||||
enum btrfs_rsv_type type);
|
||||
void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *rsv);
|
||||
int btrfs_block_rsv_add(struct btrfs_fs_info *fs_info,
|
||||
|
||||
@@ -279,19 +279,31 @@ static inline void btrfs_insert_inode_hash(struct inode *inode)
|
||||
__insert_inode_hash(inode, h);
|
||||
}
|
||||
|
||||
#if BITS_PER_LONG == 32
|
||||
|
||||
/*
|
||||
* On 32 bit systems the i_ino of struct inode is 32 bits (unsigned long), so
|
||||
* we use the inode's location objectid which is a u64 to avoid truncation.
|
||||
*/
|
||||
static inline u64 btrfs_ino(const struct btrfs_inode *inode)
|
||||
{
|
||||
u64 ino = inode->location.objectid;
|
||||
|
||||
/*
|
||||
* !ino: btree_inode
|
||||
* type == BTRFS_ROOT_ITEM_KEY: subvol dir
|
||||
*/
|
||||
if (!ino || inode->location.type == BTRFS_ROOT_ITEM_KEY)
|
||||
/* type == BTRFS_ROOT_ITEM_KEY: subvol dir */
|
||||
if (inode->location.type == BTRFS_ROOT_ITEM_KEY)
|
||||
ino = inode->vfs_inode.i_ino;
|
||||
return ino;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline u64 btrfs_ino(const struct btrfs_inode *inode)
|
||||
{
|
||||
return inode->vfs_inode.i_ino;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static inline void btrfs_i_size_write(struct btrfs_inode *inode, u64 size)
|
||||
{
|
||||
i_size_write(&inode->vfs_inode, size);
|
||||
@@ -305,8 +317,7 @@ static inline bool btrfs_is_free_space_inode(struct btrfs_inode *inode)
|
||||
if (root == root->fs_info->tree_root &&
|
||||
btrfs_ino(inode) != BTRFS_BTREE_INODE_OBJECTID)
|
||||
return true;
|
||||
if (inode->location.objectid == BTRFS_FREE_INO_OBJECTID)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -136,109 +136,14 @@ static int compression_decompress(int type, struct list_head *ws,
|
||||
|
||||
static int btrfs_decompress_bio(struct compressed_bio *cb);
|
||||
|
||||
static inline int compressed_bio_size(struct btrfs_fs_info *fs_info,
|
||||
unsigned long disk_size)
|
||||
{
|
||||
return sizeof(struct compressed_bio) +
|
||||
(DIV_ROUND_UP(disk_size, fs_info->sectorsize)) * fs_info->csum_size;
|
||||
}
|
||||
|
||||
static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
|
||||
u64 disk_start)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
|
||||
const u32 csum_size = fs_info->csum_size;
|
||||
const u32 sectorsize = fs_info->sectorsize;
|
||||
struct page *page;
|
||||
unsigned int i;
|
||||
char *kaddr;
|
||||
u8 csum[BTRFS_CSUM_SIZE];
|
||||
struct compressed_bio *cb = bio->bi_private;
|
||||
u8 *cb_sum = cb->sums;
|
||||
|
||||
if ((inode->flags & BTRFS_INODE_NODATASUM) ||
|
||||
test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state))
|
||||
return 0;
|
||||
|
||||
shash->tfm = fs_info->csum_shash;
|
||||
|
||||
for (i = 0; i < cb->nr_pages; i++) {
|
||||
u32 pg_offset;
|
||||
u32 bytes_left = PAGE_SIZE;
|
||||
page = cb->compressed_pages[i];
|
||||
|
||||
/* Determine the remaining bytes inside the page first */
|
||||
if (i == cb->nr_pages - 1)
|
||||
bytes_left = cb->compressed_len - i * PAGE_SIZE;
|
||||
|
||||
/* Hash through the page sector by sector */
|
||||
for (pg_offset = 0; pg_offset < bytes_left;
|
||||
pg_offset += sectorsize) {
|
||||
kaddr = kmap_atomic(page);
|
||||
crypto_shash_digest(shash, kaddr + pg_offset,
|
||||
sectorsize, csum);
|
||||
kunmap_atomic(kaddr);
|
||||
|
||||
if (memcmp(&csum, cb_sum, csum_size) != 0) {
|
||||
btrfs_print_data_csum_error(inode, disk_start,
|
||||
csum, cb_sum, cb->mirror_num);
|
||||
if (btrfs_bio(bio)->device)
|
||||
btrfs_dev_stat_inc_and_print(
|
||||
btrfs_bio(bio)->device,
|
||||
BTRFS_DEV_STAT_CORRUPTION_ERRS);
|
||||
return -EIO;
|
||||
}
|
||||
cb_sum += csum_size;
|
||||
disk_start += sectorsize;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reduce bio and io accounting for a compressed_bio with its corresponding bio.
|
||||
*
|
||||
* Return true if there is no pending bio nor io.
|
||||
* Return false otherwise.
|
||||
*/
|
||||
static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *bio)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
|
||||
unsigned int bi_size = 0;
|
||||
bool last_io = false;
|
||||
struct bio_vec *bvec;
|
||||
struct bvec_iter_all iter_all;
|
||||
|
||||
/*
|
||||
* At endio time, bi_iter.bi_size doesn't represent the real bio size.
|
||||
* Thus here we have to iterate through all segments to grab correct
|
||||
* bio size.
|
||||
*/
|
||||
bio_for_each_segment_all(bvec, bio, iter_all)
|
||||
bi_size += bvec->bv_len;
|
||||
|
||||
if (bio->bi_status)
|
||||
cb->status = bio->bi_status;
|
||||
|
||||
ASSERT(bi_size && bi_size <= cb->compressed_len);
|
||||
last_io = refcount_sub_and_test(bi_size >> fs_info->sectorsize_bits,
|
||||
&cb->pending_sectors);
|
||||
/*
|
||||
* Here we must wake up the possible error handler after all other
|
||||
* operations on @cb finished, or we can race with
|
||||
* finish_compressed_bio_*() which may free @cb.
|
||||
*/
|
||||
wake_up_var(cb);
|
||||
|
||||
return last_io;
|
||||
}
|
||||
|
||||
static void finish_compressed_bio_read(struct compressed_bio *cb)
|
||||
{
|
||||
unsigned int index;
|
||||
struct page *page;
|
||||
|
||||
if (cb->status == BLK_STS_OK)
|
||||
cb->status = errno_to_blk_status(btrfs_decompress_bio(cb));
|
||||
|
||||
/* Release the compressed pages */
|
||||
for (index = 0; index < cb->nr_pages; index++) {
|
||||
page = cb->compressed_pages[index];
|
||||
@@ -247,85 +152,63 @@ static void finish_compressed_bio_read(struct compressed_bio *cb)
|
||||
}
|
||||
|
||||
/* Do io completion on the original bio */
|
||||
if (cb->status != BLK_STS_OK) {
|
||||
if (cb->status != BLK_STS_OK)
|
||||
cb->orig_bio->bi_status = cb->status;
|
||||
bio_endio(cb->orig_bio);
|
||||
} else {
|
||||
struct bio_vec *bvec;
|
||||
struct bvec_iter_all iter_all;
|
||||
|
||||
/*
|
||||
* We have verified the checksum already, set page checked so
|
||||
* the end_io handlers know about it
|
||||
*/
|
||||
ASSERT(!bio_flagged(cb->orig_bio, BIO_CLONED));
|
||||
bio_for_each_segment_all(bvec, cb->orig_bio, iter_all) {
|
||||
u64 bvec_start = page_offset(bvec->bv_page) +
|
||||
bvec->bv_offset;
|
||||
|
||||
btrfs_page_set_checked(btrfs_sb(cb->inode->i_sb),
|
||||
bvec->bv_page, bvec_start,
|
||||
bvec->bv_len);
|
||||
}
|
||||
|
||||
bio_endio(cb->orig_bio);
|
||||
}
|
||||
bio_endio(cb->orig_bio);
|
||||
|
||||
/* Finally free the cb struct */
|
||||
kfree(cb->compressed_pages);
|
||||
kfree(cb);
|
||||
}
|
||||
|
||||
/* when we finish reading compressed pages from the disk, we
|
||||
* decompress them and then run the bio end_io routines on the
|
||||
* decompressed pages (in the inode address space).
|
||||
*
|
||||
* This allows the checksumming and other IO error handling routines
|
||||
* to work normally
|
||||
*
|
||||
* The compressed pages are freed here, and it must be run
|
||||
* in process context
|
||||
/*
|
||||
* Verify the checksums and kick off repair if needed on the uncompressed data
|
||||
* before decompressing it into the original bio and freeing the uncompressed
|
||||
* pages.
|
||||
*/
|
||||
static void end_compressed_bio_read(struct bio *bio)
|
||||
{
|
||||
struct compressed_bio *cb = bio->bi_private;
|
||||
struct inode *inode;
|
||||
unsigned int mirror = btrfs_bio(bio)->mirror_num;
|
||||
int ret = 0;
|
||||
struct inode *inode = cb->inode;
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
struct btrfs_inode *bi = BTRFS_I(inode);
|
||||
bool csum = !(bi->flags & BTRFS_INODE_NODATASUM) &&
|
||||
!test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
|
||||
blk_status_t status = bio->bi_status;
|
||||
struct btrfs_bio *bbio = btrfs_bio(bio);
|
||||
struct bvec_iter iter;
|
||||
struct bio_vec bv;
|
||||
u32 offset;
|
||||
|
||||
if (!dec_and_test_compressed_bio(cb, bio))
|
||||
goto out;
|
||||
btrfs_bio_for_each_sector(fs_info, bv, bbio, iter, offset) {
|
||||
u64 start = bbio->file_offset + offset;
|
||||
|
||||
/*
|
||||
* Record the correct mirror_num in cb->orig_bio so that
|
||||
* read-repair can work properly.
|
||||
*/
|
||||
btrfs_bio(cb->orig_bio)->mirror_num = mirror;
|
||||
cb->mirror_num = mirror;
|
||||
if (!status &&
|
||||
(!csum || !btrfs_check_data_csum(inode, bbio, offset,
|
||||
bv.bv_page, bv.bv_offset))) {
|
||||
clean_io_failure(fs_info, &bi->io_failure_tree,
|
||||
&bi->io_tree, start, bv.bv_page,
|
||||
btrfs_ino(bi), bv.bv_offset);
|
||||
} else {
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Some IO in this cb have failed, just skip checksum as there
|
||||
* is no way it could be correct.
|
||||
*/
|
||||
if (cb->status != BLK_STS_OK)
|
||||
goto csum_failed;
|
||||
refcount_inc(&cb->pending_ios);
|
||||
ret = btrfs_repair_one_sector(inode, bbio, offset,
|
||||
bv.bv_page, bv.bv_offset,
|
||||
btrfs_submit_data_read_bio);
|
||||
if (ret) {
|
||||
refcount_dec(&cb->pending_ios);
|
||||
status = errno_to_blk_status(ret);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inode = cb->inode;
|
||||
ret = check_compressed_csum(BTRFS_I(inode), bio,
|
||||
bio->bi_iter.bi_sector << 9);
|
||||
if (ret)
|
||||
goto csum_failed;
|
||||
if (status)
|
||||
cb->status = status;
|
||||
|
||||
/* ok, we're the last bio for this extent, lets start
|
||||
* the decompression.
|
||||
*/
|
||||
ret = btrfs_decompress_bio(cb);
|
||||
|
||||
csum_failed:
|
||||
if (ret)
|
||||
cb->status = errno_to_blk_status(ret);
|
||||
finish_compressed_bio_read(cb);
|
||||
out:
|
||||
if (refcount_dec_and_test(&cb->pending_ios))
|
||||
finish_compressed_bio_read(cb);
|
||||
btrfs_bio_free_csum(bbio);
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
@@ -403,6 +286,14 @@ static void finish_compressed_bio_write(struct compressed_bio *cb)
|
||||
kfree(cb);
|
||||
}
|
||||
|
||||
static void btrfs_finish_compressed_write_work(struct work_struct *work)
|
||||
{
|
||||
struct compressed_bio *cb =
|
||||
container_of(work, struct compressed_bio, write_end_work);
|
||||
|
||||
finish_compressed_bio_write(cb);
|
||||
}
|
||||
|
||||
/*
|
||||
* Do the cleanup once all the compressed pages hit the disk. This will clear
|
||||
* writeback on the file pages and free the compressed pages.
|
||||
@@ -414,29 +305,18 @@ static void end_compressed_bio_write(struct bio *bio)
|
||||
{
|
||||
struct compressed_bio *cb = bio->bi_private;
|
||||
|
||||
if (!dec_and_test_compressed_bio(cb, bio))
|
||||
goto out;
|
||||
if (bio->bi_status)
|
||||
cb->status = bio->bi_status;
|
||||
|
||||
btrfs_record_physical_zoned(cb->inode, cb->start, bio);
|
||||
if (refcount_dec_and_test(&cb->pending_ios)) {
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
|
||||
|
||||
finish_compressed_bio_write(cb);
|
||||
out:
|
||||
btrfs_record_physical_zoned(cb->inode, cb->start, bio);
|
||||
queue_work(fs_info->compressed_write_workers, &cb->write_end_work);
|
||||
}
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
static blk_status_t submit_compressed_bio(struct btrfs_fs_info *fs_info,
|
||||
struct bio *bio, int mirror_num)
|
||||
{
|
||||
blk_status_t ret;
|
||||
|
||||
ASSERT(bio->bi_iter.bi_size);
|
||||
ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = btrfs_map_bio(fs_info, bio, mirror_num);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a compressed_bio, which will be used to read/write on-disk
|
||||
* (aka, compressed) * data.
|
||||
@@ -487,7 +367,7 @@ static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_byte
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
*next_stripe_start = disk_bytenr + geom.len;
|
||||
|
||||
refcount_inc(&cb->pending_ios);
|
||||
return bio;
|
||||
}
|
||||
|
||||
@@ -514,26 +394,25 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
|
||||
struct compressed_bio *cb;
|
||||
u64 cur_disk_bytenr = disk_start;
|
||||
u64 next_stripe_start;
|
||||
blk_status_t ret;
|
||||
blk_status_t ret = BLK_STS_OK;
|
||||
int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
|
||||
const bool use_append = btrfs_use_zone_append(inode, disk_start);
|
||||
const enum req_op bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE;
|
||||
|
||||
ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
|
||||
IS_ALIGNED(len, fs_info->sectorsize));
|
||||
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
|
||||
cb = kmalloc(sizeof(struct compressed_bio), GFP_NOFS);
|
||||
if (!cb)
|
||||
return BLK_STS_RESOURCE;
|
||||
refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
|
||||
refcount_set(&cb->pending_ios, 1);
|
||||
cb->status = BLK_STS_OK;
|
||||
cb->inode = &inode->vfs_inode;
|
||||
cb->start = start;
|
||||
cb->len = len;
|
||||
cb->mirror_num = 0;
|
||||
cb->compressed_pages = compressed_pages;
|
||||
cb->compressed_len = compressed_len;
|
||||
cb->writeback = writeback;
|
||||
cb->orig_bio = NULL;
|
||||
INIT_WORK(&cb->write_end_work, btrfs_finish_compressed_write_work);
|
||||
cb->nr_pages = nr_pages;
|
||||
|
||||
if (blkcg_css)
|
||||
@@ -554,8 +433,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
|
||||
&next_stripe_start);
|
||||
if (IS_ERR(bio)) {
|
||||
ret = errno_to_blk_status(PTR_ERR(bio));
|
||||
bio = NULL;
|
||||
goto finish_cb;
|
||||
break;
|
||||
}
|
||||
if (blkcg_css)
|
||||
bio->bi_opf |= REQ_CGROUP_PUNT;
|
||||
@@ -599,44 +477,25 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
|
||||
if (submit) {
|
||||
if (!skip_sum) {
|
||||
ret = btrfs_csum_one_bio(inode, bio, start, true);
|
||||
if (ret)
|
||||
goto finish_cb;
|
||||
if (ret) {
|
||||
bio->bi_status = ret;
|
||||
bio_endio(bio);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ret = submit_compressed_bio(fs_info, bio, 0);
|
||||
if (ret)
|
||||
goto finish_cb;
|
||||
ASSERT(bio->bi_iter.bi_size);
|
||||
btrfs_submit_bio(fs_info, bio, 0);
|
||||
bio = NULL;
|
||||
}
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
if (blkcg_css)
|
||||
kthread_associate_blkcg(NULL);
|
||||
|
||||
return 0;
|
||||
|
||||
finish_cb:
|
||||
if (blkcg_css)
|
||||
kthread_associate_blkcg(NULL);
|
||||
|
||||
if (bio) {
|
||||
bio->bi_status = ret;
|
||||
bio_endio(bio);
|
||||
}
|
||||
/* Last byte of @cb is submitted, endio will free @cb */
|
||||
if (cur_disk_bytenr == disk_start + compressed_len)
|
||||
return ret;
|
||||
|
||||
wait_var_event(cb, refcount_read(&cb->pending_sectors) ==
|
||||
(disk_start + compressed_len - cur_disk_bytenr) >>
|
||||
fs_info->sectorsize_bits);
|
||||
/*
|
||||
* Even with previous bio ended, we should still have io not yet
|
||||
* submitted, thus need to finish manually.
|
||||
*/
|
||||
ASSERT(refcount_read(&cb->pending_sectors));
|
||||
/* Now we are the only one referring @cb, can finish it safely. */
|
||||
finish_compressed_bio_write(cb);
|
||||
if (refcount_dec_and_test(&cb->pending_ios))
|
||||
finish_compressed_bio_write(cb);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -765,7 +624,6 @@ static noinline int add_ra_bio_pages(struct inode *inode,
|
||||
int zeros;
|
||||
zeros = PAGE_SIZE - zero_offset;
|
||||
memzero_page(page, zero_offset, zeros);
|
||||
flush_dcache_page(page);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -819,7 +677,6 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
blk_status_t ret;
|
||||
int ret2;
|
||||
int i;
|
||||
u8 *sums;
|
||||
|
||||
em_tree = &BTRFS_I(inode)->extent_tree;
|
||||
|
||||
@@ -837,17 +694,15 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
|
||||
ASSERT(em->compress_type != BTRFS_COMPRESS_NONE);
|
||||
compressed_len = em->block_len;
|
||||
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
|
||||
cb = kmalloc(sizeof(struct compressed_bio), GFP_NOFS);
|
||||
if (!cb) {
|
||||
ret = BLK_STS_RESOURCE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
|
||||
refcount_set(&cb->pending_ios, 1);
|
||||
cb->status = BLK_STS_OK;
|
||||
cb->inode = inode;
|
||||
cb->mirror_num = mirror_num;
|
||||
sums = cb->sums;
|
||||
|
||||
cb->start = em->orig_start;
|
||||
em_len = em->len;
|
||||
@@ -893,9 +748,8 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
REQ_OP_READ, end_compressed_bio_read,
|
||||
&next_stripe_start);
|
||||
if (IS_ERR(comp_bio)) {
|
||||
ret = errno_to_blk_status(PTR_ERR(comp_bio));
|
||||
comp_bio = NULL;
|
||||
goto finish_cb;
|
||||
cb->status = errno_to_blk_status(PTR_ERR(comp_bio));
|
||||
break;
|
||||
}
|
||||
}
|
||||
/*
|
||||
@@ -931,22 +785,33 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
submit = true;
|
||||
|
||||
if (submit) {
|
||||
unsigned int nr_sectors;
|
||||
/* Save the original iter for read repair */
|
||||
if (bio_op(comp_bio) == REQ_OP_READ)
|
||||
btrfs_bio(comp_bio)->iter = comp_bio->bi_iter;
|
||||
|
||||
ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
|
||||
if (ret)
|
||||
goto finish_cb;
|
||||
/*
|
||||
* Save the initial offset of this chunk, as there
|
||||
* is no direct correlation between compressed pages and
|
||||
* the original file offset. The field is only used for
|
||||
* priting error messages.
|
||||
*/
|
||||
btrfs_bio(comp_bio)->file_offset = file_offset;
|
||||
|
||||
nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
|
||||
fs_info->sectorsize);
|
||||
sums += fs_info->csum_size * nr_sectors;
|
||||
ret = btrfs_lookup_bio_sums(inode, comp_bio, NULL);
|
||||
if (ret) {
|
||||
comp_bio->bi_status = ret;
|
||||
bio_endio(comp_bio);
|
||||
break;
|
||||
}
|
||||
|
||||
ret = submit_compressed_bio(fs_info, comp_bio, mirror_num);
|
||||
if (ret)
|
||||
goto finish_cb;
|
||||
ASSERT(comp_bio->bi_iter.bi_size);
|
||||
btrfs_submit_bio(fs_info, comp_bio, mirror_num);
|
||||
comp_bio = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (refcount_dec_and_test(&cb->pending_ios))
|
||||
finish_compressed_bio_read(cb);
|
||||
return;
|
||||
|
||||
fail:
|
||||
@@ -964,25 +829,6 @@ out:
|
||||
bio->bi_status = ret;
|
||||
bio_endio(bio);
|
||||
return;
|
||||
finish_cb:
|
||||
if (comp_bio) {
|
||||
comp_bio->bi_status = ret;
|
||||
bio_endio(comp_bio);
|
||||
}
|
||||
/* All bytes of @cb is submitted, endio will free @cb */
|
||||
if (cur_disk_byte == disk_bytenr + compressed_len)
|
||||
return;
|
||||
|
||||
wait_var_event(cb, refcount_read(&cb->pending_sectors) ==
|
||||
(disk_bytenr + compressed_len - cur_disk_byte) >>
|
||||
fs_info->sectorsize_bits);
|
||||
/*
|
||||
* Even with previous bio ended, we should still have io not yet
|
||||
* submitted, thus need to finish @cb manually.
|
||||
*/
|
||||
ASSERT(refcount_read(&cb->pending_sectors));
|
||||
/* Now we are the only one referring @cb, can finish it safely. */
|
||||
finish_compressed_bio_read(cb);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1481,7 +1327,6 @@ int btrfs_decompress_buf2page(const char *buf, u32 buf_len,
|
||||
ASSERT(copy_start - decompressed < buf_len);
|
||||
memcpy_to_page(bvec.bv_page, bvec.bv_offset,
|
||||
buf + copy_start - decompressed, copy_len);
|
||||
flush_dcache_page(bvec.bv_page);
|
||||
cur_offset += copy_len;
|
||||
|
||||
bio_advance(orig_bio, copy_len);
|
||||
|
||||
@@ -30,8 +30,8 @@ static_assert((BTRFS_MAX_COMPRESSED % PAGE_SIZE) == 0);
|
||||
#define BTRFS_ZLIB_DEFAULT_LEVEL 3
|
||||
|
||||
struct compressed_bio {
|
||||
/* Number of sectors with unfinished IO (unsubmitted or unfinished) */
|
||||
refcount_t pending_sectors;
|
||||
/* Number of outstanding bios */
|
||||
refcount_t pending_ios;
|
||||
|
||||
/* Number of compressed pages in the array */
|
||||
unsigned int nr_pages;
|
||||
@@ -59,16 +59,12 @@ struct compressed_bio {
|
||||
|
||||
/* IO errors */
|
||||
blk_status_t status;
|
||||
int mirror_num;
|
||||
|
||||
/* for reads, this is the bio we are copying the data into */
|
||||
struct bio *orig_bio;
|
||||
|
||||
/*
|
||||
* the start of a variable length array of checksums only
|
||||
* used by reads
|
||||
*/
|
||||
u8 sums[];
|
||||
union {
|
||||
/* For reads, this is the bio we are copying the data into */
|
||||
struct bio *orig_bio;
|
||||
struct work_struct write_end_work;
|
||||
};
|
||||
};
|
||||
|
||||
static inline unsigned int btrfs_compress_type(unsigned int type_level)
|
||||
|
||||
105
fs/btrfs/ctree.h
105
fs/btrfs/ctree.h
@@ -107,14 +107,6 @@ struct btrfs_ioctl_encoded_io_args;
|
||||
#define BTRFS_STAT_CURR 0
|
||||
#define BTRFS_STAT_PREV 1
|
||||
|
||||
/*
|
||||
* Count how many BTRFS_MAX_EXTENT_SIZE cover the @size
|
||||
*/
|
||||
static inline u32 count_max_extents(u64 size)
|
||||
{
|
||||
return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
|
||||
}
|
||||
|
||||
static inline unsigned long btrfs_chunk_item_size(int num_stripes)
|
||||
{
|
||||
BUG_ON(num_stripes == 0);
|
||||
@@ -229,6 +221,13 @@ struct btrfs_root_backup {
|
||||
#define BTRFS_SUPER_INFO_OFFSET SZ_64K
|
||||
#define BTRFS_SUPER_INFO_SIZE 4096
|
||||
|
||||
/*
|
||||
* The reserved space at the beginning of each device.
|
||||
* It covers the primary super block and leaves space for potential use by other
|
||||
* tools like bootloaders or to lower potential damage of accidental overwrite.
|
||||
*/
|
||||
#define BTRFS_DEVICE_RANGE_RESERVED (SZ_1M)
|
||||
|
||||
/*
|
||||
* the super block basically lists the main trees of the FS
|
||||
* it currently lacks any block count etc etc
|
||||
@@ -248,8 +247,12 @@ struct btrfs_super_block {
|
||||
__le64 chunk_root;
|
||||
__le64 log_root;
|
||||
|
||||
/* this will help find the new super based on the log root */
|
||||
__le64 log_root_transid;
|
||||
/*
|
||||
* This member has never been utilized since the very beginning, thus
|
||||
* it's always 0 regardless of kernel version. We always use
|
||||
* generation + 1 to read log tree root. So here we mark it deprecated.
|
||||
*/
|
||||
__le64 __unused_log_root_transid;
|
||||
__le64 total_bytes;
|
||||
__le64 bytes_used;
|
||||
__le64 root_dir_objectid;
|
||||
@@ -635,6 +638,9 @@ enum {
|
||||
/* Indicate we have half completed snapshot deletions pending. */
|
||||
BTRFS_FS_UNFINISHED_DROPS,
|
||||
|
||||
/* Indicate we have to finish a zone to do next allocation. */
|
||||
BTRFS_FS_NEED_ZONE_FINISH,
|
||||
|
||||
#if BITS_PER_LONG == 32
|
||||
/* Indicate if we have error/warn message printed on 32bit systems */
|
||||
BTRFS_FS_32BIT_ERROR,
|
||||
@@ -656,6 +662,18 @@ enum btrfs_exclusive_operation {
|
||||
BTRFS_EXCLOP_SWAP_ACTIVATE,
|
||||
};
|
||||
|
||||
/* Store data about transaction commits, exported via sysfs. */
|
||||
struct btrfs_commit_stats {
|
||||
/* Total number of commits */
|
||||
u64 commit_count;
|
||||
/* The maximum commit duration so far in ns */
|
||||
u64 max_commit_dur;
|
||||
/* The last commit duration in ns */
|
||||
u64 last_commit_dur;
|
||||
/* The total commit duration in ns */
|
||||
u64 total_commit_dur;
|
||||
};
|
||||
|
||||
struct btrfs_fs_info {
|
||||
u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
|
||||
unsigned long flags;
|
||||
@@ -850,11 +868,11 @@ struct btrfs_fs_info {
|
||||
struct btrfs_workqueue *hipri_workers;
|
||||
struct btrfs_workqueue *delalloc_workers;
|
||||
struct btrfs_workqueue *flush_workers;
|
||||
struct btrfs_workqueue *endio_workers;
|
||||
struct btrfs_workqueue *endio_meta_workers;
|
||||
struct btrfs_workqueue *endio_raid56_workers;
|
||||
struct workqueue_struct *endio_workers;
|
||||
struct workqueue_struct *endio_meta_workers;
|
||||
struct workqueue_struct *endio_raid56_workers;
|
||||
struct workqueue_struct *rmw_workers;
|
||||
struct btrfs_workqueue *endio_meta_write_workers;
|
||||
struct workqueue_struct *compressed_write_workers;
|
||||
struct btrfs_workqueue *endio_write_workers;
|
||||
struct btrfs_workqueue *endio_freespace_worker;
|
||||
struct btrfs_workqueue *caching_workers;
|
||||
@@ -1032,6 +1050,12 @@ struct btrfs_fs_info {
|
||||
u32 csums_per_leaf;
|
||||
u32 stripesize;
|
||||
|
||||
/*
|
||||
* Maximum size of an extent. BTRFS_MAX_EXTENT_SIZE on regular
|
||||
* filesystem, on zoned it depends on the device constraints.
|
||||
*/
|
||||
u64 max_extent_size;
|
||||
|
||||
/* Block groups and devices containing active swapfiles. */
|
||||
spinlock_t swapfile_pins_lock;
|
||||
struct rb_root swapfile_pins;
|
||||
@@ -1047,6 +1071,8 @@ struct btrfs_fs_info {
|
||||
*/
|
||||
u64 zone_size;
|
||||
|
||||
/* Max size to emit ZONE_APPEND write command */
|
||||
u64 max_zone_append_size;
|
||||
struct mutex zoned_meta_io_lock;
|
||||
spinlock_t treelog_bg_lock;
|
||||
u64 treelog_bg;
|
||||
@@ -1063,6 +1089,11 @@ struct btrfs_fs_info {
|
||||
|
||||
spinlock_t zone_active_bgs_lock;
|
||||
struct list_head zone_active_bgs;
|
||||
/* Waiters when BTRFS_FS_NEED_ZONE_FINISH is set */
|
||||
wait_queue_head_t zone_finish_wait;
|
||||
|
||||
/* Updates are not protected by any lock */
|
||||
struct btrfs_commit_stats commit_stats;
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
|
||||
spinlock_t ref_verify_lock;
|
||||
@@ -2475,8 +2506,6 @@ BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block,
|
||||
chunk_root_level, 8);
|
||||
BTRFS_SETGET_STACK_FUNCS(super_log_root, struct btrfs_super_block,
|
||||
log_root, 64);
|
||||
BTRFS_SETGET_STACK_FUNCS(super_log_root_transid, struct btrfs_super_block,
|
||||
log_root_transid, 64);
|
||||
BTRFS_SETGET_STACK_FUNCS(super_log_root_level, struct btrfs_super_block,
|
||||
log_root_level, 8);
|
||||
BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block,
|
||||
@@ -2733,8 +2762,16 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
|
||||
enum btrfs_inline_ref_type is_data);
|
||||
u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset);
|
||||
|
||||
static inline u8 *btrfs_csum_ptr(const struct btrfs_fs_info *fs_info, u8 *csums,
|
||||
u64 offset)
|
||||
{
|
||||
u64 offset_in_sectors = offset >> fs_info->sectorsize_bits;
|
||||
|
||||
return csums + offset_in_sectors * fs_info->csum_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Take the number of bytes to be checksummmed and figure out how many leaves
|
||||
* Take the number of bytes to be checksummed and figure out how many leaves
|
||||
* it would require to store the csums for that many bytes.
|
||||
*/
|
||||
static inline u64 btrfs_csum_bytes_to_leaves(
|
||||
@@ -3251,11 +3288,18 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_siz
|
||||
u64 btrfs_file_extent_end(const struct btrfs_path *path);
|
||||
|
||||
/* inode.c */
|
||||
void btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
|
||||
int mirror_num, enum btrfs_compression_type compress_type);
|
||||
void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio, int mirror_num);
|
||||
void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
|
||||
int mirror_num, enum btrfs_compression_type compress_type);
|
||||
int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page,
|
||||
u32 pgoff, u8 *csum, const u8 * const csum_expected);
|
||||
int btrfs_check_data_csum(struct inode *inode, struct btrfs_bio *bbio,
|
||||
u32 bio_offset, struct page *page, u32 pgoff);
|
||||
unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio,
|
||||
u32 bio_offset, struct page *page,
|
||||
u64 start, u64 end);
|
||||
int btrfs_check_data_csum(struct inode *inode, struct btrfs_bio *bbio,
|
||||
u32 bio_offset, struct page *page, u32 pgoff);
|
||||
struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
|
||||
u64 start, u64 len);
|
||||
noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
|
||||
@@ -3305,9 +3349,9 @@ void btrfs_new_inode_args_destroy(struct btrfs_new_inode_args *args);
|
||||
struct inode *btrfs_new_subvol_inode(struct user_namespace *mnt_userns,
|
||||
struct inode *dir);
|
||||
void btrfs_set_delalloc_extent(struct inode *inode, struct extent_state *state,
|
||||
unsigned *bits);
|
||||
u32 bits);
|
||||
void btrfs_clear_delalloc_extent(struct inode *inode,
|
||||
struct extent_state *state, unsigned *bits);
|
||||
struct extent_state *state, u32 bits);
|
||||
void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new,
|
||||
struct extent_state *other);
|
||||
void btrfs_split_delalloc_extent(struct inode *inode,
|
||||
@@ -3353,6 +3397,12 @@ int btrfs_writepage_cow_fixup(struct page *page);
|
||||
void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
|
||||
struct page *page, u64 start,
|
||||
u64 end, bool uptodate);
|
||||
int btrfs_encoded_io_compression_from_extent(struct btrfs_fs_info *fs_info,
|
||||
int compress_type);
|
||||
int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
|
||||
u64 file_offset, u64 disk_bytenr,
|
||||
u64 disk_io_size,
|
||||
struct page **pages);
|
||||
ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
|
||||
struct btrfs_ioctl_encoded_io_args *encoded);
|
||||
ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
|
||||
@@ -4009,6 +4059,19 @@ static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info)
|
||||
return fs_info->zone_size > 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Count how many fs_info->max_extent_size cover the @size
|
||||
*/
|
||||
static inline u32 count_max_extents(struct btrfs_fs_info *fs_info, u64 size)
|
||||
{
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
if (!fs_info)
|
||||
return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
|
||||
#endif
|
||||
|
||||
return div_u64(size + fs_info->max_extent_size - 1, fs_info->max_extent_size);
|
||||
}
|
||||
|
||||
static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root)
|
||||
{
|
||||
return root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID;
|
||||
|
||||
@@ -273,7 +273,7 @@ static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
|
||||
u64 num_bytes, u64 disk_num_bytes,
|
||||
u64 *meta_reserve, u64 *qgroup_reserve)
|
||||
{
|
||||
u64 nr_extents = count_max_extents(num_bytes);
|
||||
u64 nr_extents = count_max_extents(fs_info, num_bytes);
|
||||
u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes);
|
||||
u64 inode_update = btrfs_calc_metadata_size(fs_info, 1);
|
||||
|
||||
@@ -350,7 +350,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
|
||||
* needs to free the reservation we just made.
|
||||
*/
|
||||
spin_lock(&inode->lock);
|
||||
nr_extents = count_max_extents(num_bytes);
|
||||
nr_extents = count_max_extents(fs_info, num_bytes);
|
||||
btrfs_mod_outstanding_extents(inode, nr_extents);
|
||||
inode->csum_bytes += disk_num_bytes;
|
||||
btrfs_calculate_inode_block_rsv_size(fs_info, inode);
|
||||
@@ -413,7 +413,7 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
|
||||
unsigned num_extents;
|
||||
|
||||
spin_lock(&inode->lock);
|
||||
num_extents = count_max_extents(num_bytes);
|
||||
num_extents = count_max_extents(fs_info, num_bytes);
|
||||
btrfs_mod_outstanding_extents(inode, -num_extents);
|
||||
btrfs_calculate_inode_block_rsv_size(fs_info, inode);
|
||||
spin_unlock(&inode->lock);
|
||||
|
||||
@@ -52,18 +52,6 @@ static inline void btrfs_init_delayed_node(
|
||||
INIT_LIST_HEAD(&delayed_node->p_list);
|
||||
}
|
||||
|
||||
static inline int btrfs_is_continuous_delayed_item(
|
||||
struct btrfs_delayed_item *item1,
|
||||
struct btrfs_delayed_item *item2)
|
||||
{
|
||||
if (item1->key.type == BTRFS_DIR_INDEX_KEY &&
|
||||
item1->key.objectid == item2->key.objectid &&
|
||||
item1->key.type == item2->key.type &&
|
||||
item1->key.offset + 1 == item2->key.offset)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct btrfs_delayed_node *btrfs_get_delayed_node(
|
||||
struct btrfs_inode *btrfs_inode)
|
||||
{
|
||||
@@ -398,8 +386,7 @@ static struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
|
||||
}
|
||||
|
||||
static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
|
||||
struct btrfs_delayed_item *ins,
|
||||
int action)
|
||||
struct btrfs_delayed_item *ins)
|
||||
{
|
||||
struct rb_node **p, *node;
|
||||
struct rb_node *parent_node = NULL;
|
||||
@@ -408,9 +395,9 @@ static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
|
||||
int cmp;
|
||||
bool leftmost = true;
|
||||
|
||||
if (action == BTRFS_DELAYED_INSERTION_ITEM)
|
||||
if (ins->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
|
||||
root = &delayed_node->ins_root;
|
||||
else if (action == BTRFS_DELAYED_DELETION_ITEM)
|
||||
else if (ins->ins_or_del == BTRFS_DELAYED_DELETION_ITEM)
|
||||
root = &delayed_node->del_root;
|
||||
else
|
||||
BUG();
|
||||
@@ -436,32 +423,19 @@ static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
|
||||
rb_link_node(node, parent_node, p);
|
||||
rb_insert_color_cached(node, root, leftmost);
|
||||
ins->delayed_node = delayed_node;
|
||||
ins->ins_or_del = action;
|
||||
|
||||
if (ins->key.type == BTRFS_DIR_INDEX_KEY &&
|
||||
action == BTRFS_DELAYED_INSERTION_ITEM &&
|
||||
/* Delayed items are always for dir index items. */
|
||||
ASSERT(ins->key.type == BTRFS_DIR_INDEX_KEY);
|
||||
|
||||
if (ins->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM &&
|
||||
ins->key.offset >= delayed_node->index_cnt)
|
||||
delayed_node->index_cnt = ins->key.offset + 1;
|
||||
delayed_node->index_cnt = ins->key.offset + 1;
|
||||
|
||||
delayed_node->count++;
|
||||
atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node,
|
||||
struct btrfs_delayed_item *item)
|
||||
{
|
||||
return __btrfs_add_delayed_item(node, item,
|
||||
BTRFS_DELAYED_INSERTION_ITEM);
|
||||
}
|
||||
|
||||
static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
|
||||
struct btrfs_delayed_item *item)
|
||||
{
|
||||
return __btrfs_add_delayed_item(node, item,
|
||||
BTRFS_DELAYED_DELETION_ITEM);
|
||||
}
|
||||
|
||||
static void finish_one_item(struct btrfs_delayed_root *delayed_root)
|
||||
{
|
||||
int seq = atomic_inc_return(&delayed_root->items_seq);
|
||||
@@ -573,7 +547,13 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
|
||||
trace_btrfs_space_reservation(fs_info, "delayed_item",
|
||||
item->key.objectid,
|
||||
num_bytes, 1);
|
||||
item->bytes_reserved = num_bytes;
|
||||
/*
|
||||
* For insertions we track reserved metadata space by accounting
|
||||
* for the number of leaves that will be used, based on the delayed
|
||||
* node's index_items_size field.
|
||||
*/
|
||||
if (item->ins_or_del == BTRFS_DELAYED_DELETION_ITEM)
|
||||
item->bytes_reserved = num_bytes;
|
||||
}
|
||||
|
||||
return ret;
|
||||
@@ -599,6 +579,21 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
|
||||
btrfs_block_rsv_release(fs_info, rsv, item->bytes_reserved, NULL);
|
||||
}
|
||||
|
||||
static void btrfs_delayed_item_release_leaves(struct btrfs_delayed_node *node,
|
||||
unsigned int num_leaves)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = node->root->fs_info;
|
||||
const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, num_leaves);
|
||||
|
||||
/* There are no space reservations during log replay, bail out. */
|
||||
if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
|
||||
return;
|
||||
|
||||
trace_btrfs_space_reservation(fs_info, "delayed_item", node->inode_id,
|
||||
bytes, 0);
|
||||
btrfs_block_rsv_release(fs_info, &fs_info->delayed_block_rsv, bytes, NULL);
|
||||
}
|
||||
|
||||
static int btrfs_delayed_inode_reserve_metadata(
|
||||
struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
@@ -672,22 +667,53 @@ static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert a single delayed item or a batch of delayed items that have consecutive
|
||||
* keys if they exist.
|
||||
* Insert a single delayed item or a batch of delayed items, as many as possible
|
||||
* that fit in a leaf. The delayed items (dir index keys) are sorted by their key
|
||||
* in the rbtree, and if there's a gap between two consecutive dir index items,
|
||||
* then it means at some point we had delayed dir indexes to add but they got
|
||||
* removed (by btrfs_delete_delayed_dir_index()) before we attempted to flush them
|
||||
* into the subvolume tree. Dir index keys also have their offsets coming from a
|
||||
* monotonically increasing counter, so we can't get new keys with an offset that
|
||||
* fits within a gap between delayed dir index items.
|
||||
*/
|
||||
static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path,
|
||||
struct btrfs_delayed_item *first_item)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_delayed_node *node = first_item->delayed_node;
|
||||
LIST_HEAD(item_list);
|
||||
struct btrfs_delayed_item *curr;
|
||||
struct btrfs_delayed_item *next;
|
||||
const int max_size = BTRFS_LEAF_DATA_SIZE(root->fs_info);
|
||||
const int max_size = BTRFS_LEAF_DATA_SIZE(fs_info);
|
||||
struct btrfs_item_batch batch;
|
||||
int total_size;
|
||||
char *ins_data = NULL;
|
||||
int ret;
|
||||
bool continuous_keys_only = false;
|
||||
|
||||
lockdep_assert_held(&node->mutex);
|
||||
|
||||
/*
|
||||
* During normal operation the delayed index offset is continuously
|
||||
* increasing, so we can batch insert all items as there will not be any
|
||||
* overlapping keys in the tree.
|
||||
*
|
||||
* The exception to this is log replay, where we may have interleaved
|
||||
* offsets in the tree, so our batch needs to be continuous keys only in
|
||||
* order to ensure we do not end up with out of order items in our leaf.
|
||||
*/
|
||||
if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
|
||||
continuous_keys_only = true;
|
||||
|
||||
/*
|
||||
* For delayed items to insert, we track reserved metadata bytes based
|
||||
* on the number of leaves that we will use.
|
||||
* See btrfs_insert_delayed_dir_index() and
|
||||
* btrfs_delayed_item_reserve_metadata()).
|
||||
*/
|
||||
ASSERT(first_item->bytes_reserved == 0);
|
||||
|
||||
list_add_tail(&first_item->tree_list, &item_list);
|
||||
batch.total_data_size = first_item->data_len;
|
||||
@@ -699,9 +725,19 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
|
||||
int next_size;
|
||||
|
||||
next = __btrfs_next_delayed_item(curr);
|
||||
if (!next || !btrfs_is_continuous_delayed_item(curr, next))
|
||||
if (!next)
|
||||
break;
|
||||
|
||||
/*
|
||||
* We cannot allow gaps in the key space if we're doing log
|
||||
* replay.
|
||||
*/
|
||||
if (continuous_keys_only &&
|
||||
(next->key.offset != curr->key.offset + 1))
|
||||
break;
|
||||
|
||||
ASSERT(next->bytes_reserved == 0);
|
||||
|
||||
next_size = next->data_len + sizeof(struct btrfs_item);
|
||||
if (total_size + next_size > max_size)
|
||||
break;
|
||||
@@ -758,9 +794,41 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
|
||||
*/
|
||||
btrfs_release_path(path);
|
||||
|
||||
ASSERT(node->index_item_leaves > 0);
|
||||
|
||||
/*
|
||||
* For normal operations we will batch an entire leaf's worth of delayed
|
||||
* items, so if there are more items to process we can decrement
|
||||
* index_item_leaves by 1 as we inserted 1 leaf's worth of items.
|
||||
*
|
||||
* However for log replay we may not have inserted an entire leaf's
|
||||
* worth of items, we may have not had continuous items, so decrementing
|
||||
* here would mess up the index_item_leaves accounting. For this case
|
||||
* only clean up the accounting when there are no items left.
|
||||
*/
|
||||
if (next && !continuous_keys_only) {
|
||||
/*
|
||||
* We inserted one batch of items into a leaf a there are more
|
||||
* items to flush in a future batch, now release one unit of
|
||||
* metadata space from the delayed block reserve, corresponding
|
||||
* the leaf we just flushed to.
|
||||
*/
|
||||
btrfs_delayed_item_release_leaves(node, 1);
|
||||
node->index_item_leaves--;
|
||||
} else if (!next) {
|
||||
/*
|
||||
* There are no more items to insert. We can have a number of
|
||||
* reserved leaves > 1 here - this happens when many dir index
|
||||
* items are added and then removed before they are flushed (file
|
||||
* names with a very short life, never span a transaction). So
|
||||
* release all remaining leaves.
|
||||
*/
|
||||
btrfs_delayed_item_release_leaves(node, node->index_item_leaves);
|
||||
node->index_item_leaves = 0;
|
||||
}
|
||||
|
||||
list_for_each_entry_safe(curr, next, &item_list, tree_list) {
|
||||
list_del(&curr->tree_list);
|
||||
btrfs_delayed_item_release_metadata(root, curr);
|
||||
btrfs_release_delayed_item(curr);
|
||||
}
|
||||
out:
|
||||
@@ -796,62 +864,75 @@ static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_path *path,
|
||||
struct btrfs_delayed_item *item)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_delayed_item *curr, *next;
|
||||
struct extent_buffer *leaf;
|
||||
struct btrfs_key key;
|
||||
struct list_head head;
|
||||
int nitems, i, last_item;
|
||||
int ret = 0;
|
||||
struct extent_buffer *leaf = path->nodes[0];
|
||||
LIST_HEAD(batch_list);
|
||||
int nitems, slot, last_slot;
|
||||
int ret;
|
||||
u64 total_reserved_size = item->bytes_reserved;
|
||||
|
||||
BUG_ON(!path->nodes[0]);
|
||||
ASSERT(leaf != NULL);
|
||||
|
||||
leaf = path->nodes[0];
|
||||
|
||||
i = path->slots[0];
|
||||
last_item = btrfs_header_nritems(leaf) - 1;
|
||||
if (i > last_item)
|
||||
return -ENOENT; /* FIXME: Is errno suitable? */
|
||||
|
||||
next = item;
|
||||
INIT_LIST_HEAD(&head);
|
||||
btrfs_item_key_to_cpu(leaf, &key, i);
|
||||
nitems = 0;
|
||||
slot = path->slots[0];
|
||||
last_slot = btrfs_header_nritems(leaf) - 1;
|
||||
/*
|
||||
* count the number of the dir index items that we can delete in batch
|
||||
* Our caller always gives us a path pointing to an existing item, so
|
||||
* this can not happen.
|
||||
*/
|
||||
while (btrfs_comp_cpu_keys(&next->key, &key) == 0) {
|
||||
list_add_tail(&next->tree_list, &head);
|
||||
nitems++;
|
||||
ASSERT(slot <= last_slot);
|
||||
if (WARN_ON(slot > last_slot))
|
||||
return -ENOENT;
|
||||
|
||||
nitems = 1;
|
||||
curr = item;
|
||||
list_add_tail(&curr->tree_list, &batch_list);
|
||||
|
||||
/*
|
||||
* Keep checking if the next delayed item matches the next item in the
|
||||
* leaf - if so, we can add it to the batch of items to delete from the
|
||||
* leaf.
|
||||
*/
|
||||
while (slot < last_slot) {
|
||||
struct btrfs_key key;
|
||||
|
||||
curr = next;
|
||||
next = __btrfs_next_delayed_item(curr);
|
||||
if (!next)
|
||||
break;
|
||||
|
||||
if (!btrfs_is_continuous_delayed_item(curr, next))
|
||||
slot++;
|
||||
btrfs_item_key_to_cpu(leaf, &key, slot);
|
||||
if (btrfs_comp_cpu_keys(&next->key, &key) != 0)
|
||||
break;
|
||||
|
||||
i++;
|
||||
if (i > last_item)
|
||||
break;
|
||||
btrfs_item_key_to_cpu(leaf, &key, i);
|
||||
nitems++;
|
||||
curr = next;
|
||||
list_add_tail(&curr->tree_list, &batch_list);
|
||||
total_reserved_size += curr->bytes_reserved;
|
||||
}
|
||||
|
||||
if (!nitems)
|
||||
return 0;
|
||||
|
||||
ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
|
||||
if (ret)
|
||||
goto out;
|
||||
return ret;
|
||||
|
||||
list_for_each_entry_safe(curr, next, &head, tree_list) {
|
||||
btrfs_delayed_item_release_metadata(root, curr);
|
||||
/* In case of BTRFS_FS_LOG_RECOVERING items won't have reserved space */
|
||||
if (total_reserved_size > 0) {
|
||||
/*
|
||||
* Check btrfs_delayed_item_reserve_metadata() to see why we
|
||||
* don't need to release/reserve qgroup space.
|
||||
*/
|
||||
trace_btrfs_space_reservation(fs_info, "delayed_item",
|
||||
item->key.objectid, total_reserved_size,
|
||||
0);
|
||||
btrfs_block_rsv_release(fs_info, &fs_info->delayed_block_rsv,
|
||||
total_reserved_size, NULL);
|
||||
}
|
||||
|
||||
list_for_each_entry_safe(curr, next, &batch_list, tree_list) {
|
||||
list_del(&curr->tree_list);
|
||||
btrfs_release_delayed_item(curr);
|
||||
}
|
||||
|
||||
out:
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
|
||||
@@ -859,43 +940,52 @@ static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_delayed_node *node)
|
||||
{
|
||||
struct btrfs_delayed_item *curr, *prev;
|
||||
int ret = 0;
|
||||
|
||||
do_again:
|
||||
mutex_lock(&node->mutex);
|
||||
curr = __btrfs_first_delayed_deletion_item(node);
|
||||
if (!curr)
|
||||
goto delete_fail;
|
||||
while (ret == 0) {
|
||||
struct btrfs_delayed_item *item;
|
||||
|
||||
ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
|
||||
if (ret < 0)
|
||||
goto delete_fail;
|
||||
else if (ret > 0) {
|
||||
/*
|
||||
* can't find the item which the node points to, so this node
|
||||
* is invalid, just drop it.
|
||||
*/
|
||||
prev = curr;
|
||||
curr = __btrfs_next_delayed_item(prev);
|
||||
btrfs_release_delayed_item(prev);
|
||||
ret = 0;
|
||||
btrfs_release_path(path);
|
||||
if (curr) {
|
||||
mutex_lock(&node->mutex);
|
||||
item = __btrfs_first_delayed_deletion_item(node);
|
||||
if (!item) {
|
||||
mutex_unlock(&node->mutex);
|
||||
goto do_again;
|
||||
} else
|
||||
goto delete_fail;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = btrfs_search_slot(trans, root, &item->key, path, -1, 1);
|
||||
if (ret > 0) {
|
||||
/*
|
||||
* There's no matching item in the leaf. This means we
|
||||
* have already deleted this item in a past run of the
|
||||
* delayed items. We ignore errors when running delayed
|
||||
* items from an async context, through a work queue job
|
||||
* running btrfs_async_run_delayed_root(), and don't
|
||||
* release delayed items that failed to complete. This
|
||||
* is because we will retry later, and at transaction
|
||||
* commit time we always run delayed items and will
|
||||
* then deal with errors if they fail to run again.
|
||||
*
|
||||
* So just release delayed items for which we can't find
|
||||
* an item in the tree, and move to the next item.
|
||||
*/
|
||||
btrfs_release_path(path);
|
||||
btrfs_release_delayed_item(item);
|
||||
ret = 0;
|
||||
} else if (ret == 0) {
|
||||
ret = btrfs_batch_delete_items(trans, root, path, item);
|
||||
btrfs_release_path(path);
|
||||
}
|
||||
|
||||
/*
|
||||
* We unlock and relock on each iteration, this is to prevent
|
||||
* blocking other tasks for too long while we are being run from
|
||||
* the async context (work queue job). Those tasks are typically
|
||||
* running system calls like creat/mkdir/rename/unlink/etc which
|
||||
* need to add delayed items to this delayed node.
|
||||
*/
|
||||
mutex_unlock(&node->mutex);
|
||||
}
|
||||
|
||||
btrfs_batch_delete_items(trans, root, path, curr);
|
||||
btrfs_release_path(path);
|
||||
mutex_unlock(&node->mutex);
|
||||
goto do_again;
|
||||
|
||||
delete_fail:
|
||||
btrfs_release_path(path);
|
||||
mutex_unlock(&node->mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1354,9 +1444,13 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_disk_key *disk_key, u8 type,
|
||||
u64 index)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
const unsigned int leaf_data_size = BTRFS_LEAF_DATA_SIZE(fs_info);
|
||||
struct btrfs_delayed_node *delayed_node;
|
||||
struct btrfs_delayed_item *delayed_item;
|
||||
struct btrfs_dir_item *dir_item;
|
||||
bool reserve_leaf_space;
|
||||
u32 data_len;
|
||||
int ret;
|
||||
|
||||
delayed_node = btrfs_get_or_create_delayed_node(dir);
|
||||
@@ -1372,6 +1466,7 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
|
||||
delayed_item->key.objectid = btrfs_ino(dir);
|
||||
delayed_item->key.type = BTRFS_DIR_INDEX_KEY;
|
||||
delayed_item->key.offset = index;
|
||||
delayed_item->ins_or_del = BTRFS_DELAYED_INSERTION_ITEM;
|
||||
|
||||
dir_item = (struct btrfs_dir_item *)delayed_item->data;
|
||||
dir_item->location = *disk_key;
|
||||
@@ -1381,15 +1476,52 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
|
||||
btrfs_set_stack_dir_type(dir_item, type);
|
||||
memcpy((char *)(dir_item + 1), name, name_len);
|
||||
|
||||
ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, delayed_item);
|
||||
/*
|
||||
* we have reserved enough space when we start a new transaction,
|
||||
* so reserving metadata failure is impossible
|
||||
*/
|
||||
BUG_ON(ret);
|
||||
data_len = delayed_item->data_len + sizeof(struct btrfs_item);
|
||||
|
||||
mutex_lock(&delayed_node->mutex);
|
||||
ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
|
||||
|
||||
if (delayed_node->index_item_leaves == 0 ||
|
||||
delayed_node->curr_index_batch_size + data_len > leaf_data_size) {
|
||||
delayed_node->curr_index_batch_size = data_len;
|
||||
reserve_leaf_space = true;
|
||||
} else {
|
||||
delayed_node->curr_index_batch_size += data_len;
|
||||
reserve_leaf_space = false;
|
||||
}
|
||||
|
||||
if (reserve_leaf_space) {
|
||||
ret = btrfs_delayed_item_reserve_metadata(trans, dir->root,
|
||||
delayed_item);
|
||||
/*
|
||||
* Space was reserved for a dir index item insertion when we
|
||||
* started the transaction, so getting a failure here should be
|
||||
* impossible.
|
||||
*/
|
||||
if (WARN_ON(ret)) {
|
||||
mutex_unlock(&delayed_node->mutex);
|
||||
btrfs_release_delayed_item(delayed_item);
|
||||
goto release_node;
|
||||
}
|
||||
|
||||
delayed_node->index_item_leaves++;
|
||||
} else if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
|
||||
const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
|
||||
|
||||
/*
|
||||
* Adding the new dir index item does not require touching another
|
||||
* leaf, so we can release 1 unit of metadata that was previously
|
||||
* reserved when starting the transaction. This applies only to
|
||||
* the case where we had a transaction start and excludes the
|
||||
* transaction join case (when replaying log trees).
|
||||
*/
|
||||
trace_btrfs_space_reservation(fs_info, "transaction",
|
||||
trans->transid, bytes, 0);
|
||||
btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
|
||||
ASSERT(trans->bytes_reserved >= bytes);
|
||||
trans->bytes_reserved -= bytes;
|
||||
}
|
||||
|
||||
ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
|
||||
if (unlikely(ret)) {
|
||||
btrfs_err(trans->fs_info,
|
||||
"err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
|
||||
@@ -1417,8 +1549,37 @@ static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
|
||||
return 1;
|
||||
}
|
||||
|
||||
btrfs_delayed_item_release_metadata(node->root, item);
|
||||
/*
|
||||
* For delayed items to insert, we track reserved metadata bytes based
|
||||
* on the number of leaves that we will use.
|
||||
* See btrfs_insert_delayed_dir_index() and
|
||||
* btrfs_delayed_item_reserve_metadata()).
|
||||
*/
|
||||
ASSERT(item->bytes_reserved == 0);
|
||||
ASSERT(node->index_item_leaves > 0);
|
||||
|
||||
/*
|
||||
* If there's only one leaf reserved, we can decrement this item from the
|
||||
* current batch, otherwise we can not because we don't know which leaf
|
||||
* it belongs to. With the current limit on delayed items, we rarely
|
||||
* accumulate enough dir index items to fill more than one leaf (even
|
||||
* when using a leaf size of 4K).
|
||||
*/
|
||||
if (node->index_item_leaves == 1) {
|
||||
const u32 data_len = item->data_len + sizeof(struct btrfs_item);
|
||||
|
||||
ASSERT(node->curr_index_batch_size >= data_len);
|
||||
node->curr_index_batch_size -= data_len;
|
||||
}
|
||||
|
||||
btrfs_release_delayed_item(item);
|
||||
|
||||
/* If we now have no more dir index items, we can release all leaves. */
|
||||
if (RB_EMPTY_ROOT(&node->ins_root.rb_root)) {
|
||||
btrfs_delayed_item_release_leaves(node, node->index_item_leaves);
|
||||
node->index_item_leaves = 0;
|
||||
}
|
||||
|
||||
mutex_unlock(&node->mutex);
|
||||
return 0;
|
||||
}
|
||||
@@ -1451,6 +1612,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
|
||||
item->key = item_key;
|
||||
item->ins_or_del = BTRFS_DELAYED_DELETION_ITEM;
|
||||
|
||||
ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, item);
|
||||
/*
|
||||
@@ -1465,7 +1627,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
|
||||
mutex_lock(&node->mutex);
|
||||
ret = __btrfs_add_delayed_deletion_item(node, item);
|
||||
ret = __btrfs_add_delayed_item(node, item);
|
||||
if (unlikely(ret)) {
|
||||
btrfs_err(trans->fs_info,
|
||||
"err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
|
||||
@@ -1833,12 +1995,17 @@ static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
|
||||
mutex_lock(&delayed_node->mutex);
|
||||
curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
|
||||
while (curr_item) {
|
||||
btrfs_delayed_item_release_metadata(root, curr_item);
|
||||
prev_item = curr_item;
|
||||
curr_item = __btrfs_next_delayed_item(prev_item);
|
||||
btrfs_release_delayed_item(prev_item);
|
||||
}
|
||||
|
||||
if (delayed_node->index_item_leaves > 0) {
|
||||
btrfs_delayed_item_release_leaves(delayed_node,
|
||||
delayed_node->index_item_leaves);
|
||||
delayed_node->index_item_leaves = 0;
|
||||
}
|
||||
|
||||
curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
|
||||
while (curr_item) {
|
||||
btrfs_delayed_item_release_metadata(root, curr_item);
|
||||
|
||||
@@ -58,6 +58,17 @@ struct btrfs_delayed_node {
|
||||
u64 index_cnt;
|
||||
unsigned long flags;
|
||||
int count;
|
||||
/*
|
||||
* The size of the next batch of dir index items to insert (if this
|
||||
* node is from a directory inode). Protected by @mutex.
|
||||
*/
|
||||
u32 curr_index_batch_size;
|
||||
/*
|
||||
* Number of leaves reserved for inserting dir index items (if this
|
||||
* node belongs to a directory inode). This may be larger then the
|
||||
* actual number of leaves we end up using. Protected by @mutex.
|
||||
*/
|
||||
u32 index_item_leaves;
|
||||
};
|
||||
|
||||
struct btrfs_delayed_item {
|
||||
|
||||
@@ -132,7 +132,7 @@ void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
|
||||
|
||||
spin_lock(&delayed_rsv->lock);
|
||||
delayed_rsv->size += num_bytes;
|
||||
delayed_rsv->full = 0;
|
||||
delayed_rsv->full = false;
|
||||
spin_unlock(&delayed_rsv->lock);
|
||||
trans->delayed_ref_updates = 0;
|
||||
}
|
||||
@@ -175,7 +175,7 @@ void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
|
||||
if (num_bytes)
|
||||
delayed_refs_rsv->reserved += num_bytes;
|
||||
if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size)
|
||||
delayed_refs_rsv->full = 1;
|
||||
delayed_refs_rsv->full = true;
|
||||
spin_unlock(&delayed_refs_rsv->lock);
|
||||
|
||||
if (num_bytes)
|
||||
|
||||
@@ -587,7 +587,8 @@ bool btrfs_finish_block_group_to_copy(struct btrfs_device *srcdev,
|
||||
ASSERT(!IS_ERR(em));
|
||||
map = em->map_lookup;
|
||||
|
||||
num_extents = cur_extent = 0;
|
||||
num_extents = 0;
|
||||
cur_extent = 0;
|
||||
for (i = 0; i < map->num_stripes; i++) {
|
||||
/* We have more device extent to copy */
|
||||
if (srcdev != map->stripes[i].dev)
|
||||
|
||||
@@ -51,7 +51,6 @@
|
||||
BTRFS_SUPER_FLAG_METADUMP |\
|
||||
BTRFS_SUPER_FLAG_METADUMP_V2)
|
||||
|
||||
static void end_workqueue_fn(struct btrfs_work *work);
|
||||
static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
|
||||
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
|
||||
struct btrfs_fs_info *fs_info);
|
||||
@@ -64,40 +63,6 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
|
||||
static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info);
|
||||
static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info);
|
||||
|
||||
/*
|
||||
* btrfs_end_io_wq structs are used to do processing in task context when an IO
|
||||
* is complete. This is used during reads to verify checksums, and it is used
|
||||
* by writes to insert metadata for new file extents after IO is complete.
|
||||
*/
|
||||
struct btrfs_end_io_wq {
|
||||
struct bio *bio;
|
||||
bio_end_io_t *end_io;
|
||||
void *private;
|
||||
struct btrfs_fs_info *info;
|
||||
blk_status_t status;
|
||||
enum btrfs_wq_endio_type metadata;
|
||||
struct btrfs_work work;
|
||||
};
|
||||
|
||||
static struct kmem_cache *btrfs_end_io_wq_cache;
|
||||
|
||||
int __init btrfs_end_io_wq_init(void)
|
||||
{
|
||||
btrfs_end_io_wq_cache = kmem_cache_create("btrfs_end_io_wq",
|
||||
sizeof(struct btrfs_end_io_wq),
|
||||
0,
|
||||
SLAB_MEM_SPREAD,
|
||||
NULL);
|
||||
if (!btrfs_end_io_wq_cache)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __cold btrfs_end_io_wq_exit(void)
|
||||
{
|
||||
kmem_cache_destroy(btrfs_end_io_wq_cache);
|
||||
}
|
||||
|
||||
static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
if (fs_info->csum_shash)
|
||||
@@ -256,8 +221,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
|
||||
goto out;
|
||||
}
|
||||
btrfs_err_rl(eb->fs_info,
|
||||
"parent transid verify failed on %llu wanted %llu found %llu",
|
||||
eb->start,
|
||||
"parent transid verify failed on logical %llu mirror %u wanted %llu found %llu",
|
||||
eb->start, eb->read_mirror,
|
||||
parent_transid, btrfs_header_generation(eb));
|
||||
ret = 1;
|
||||
clear_extent_buffer_uptodate(eb);
|
||||
@@ -587,21 +552,23 @@ static int validate_extent_buffer(struct extent_buffer *eb)
|
||||
|
||||
found_start = btrfs_header_bytenr(eb);
|
||||
if (found_start != eb->start) {
|
||||
btrfs_err_rl(fs_info, "bad tree block start, want %llu have %llu",
|
||||
eb->start, found_start);
|
||||
btrfs_err_rl(fs_info,
|
||||
"bad tree block start, mirror %u want %llu have %llu",
|
||||
eb->read_mirror, eb->start, found_start);
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
if (check_tree_block_fsid(eb)) {
|
||||
btrfs_err_rl(fs_info, "bad fsid on block %llu",
|
||||
eb->start);
|
||||
btrfs_err_rl(fs_info, "bad fsid on logical %llu mirror %u",
|
||||
eb->start, eb->read_mirror);
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
found_level = btrfs_header_level(eb);
|
||||
if (found_level >= BTRFS_MAX_LEVEL) {
|
||||
btrfs_err(fs_info, "bad tree block level %d on %llu",
|
||||
(int)btrfs_header_level(eb), eb->start);
|
||||
btrfs_err(fs_info,
|
||||
"bad tree block level, mirror %u level %d on logical %llu",
|
||||
eb->read_mirror, btrfs_header_level(eb), eb->start);
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
@@ -612,8 +579,8 @@ static int validate_extent_buffer(struct extent_buffer *eb)
|
||||
|
||||
if (memcmp(result, header_csum, csum_size) != 0) {
|
||||
btrfs_warn_rl(fs_info,
|
||||
"checksum verify failed on %llu wanted " CSUM_FMT " found " CSUM_FMT " level %d",
|
||||
eb->start,
|
||||
"checksum verify failed on logical %llu mirror %u wanted " CSUM_FMT " found " CSUM_FMT " level %d",
|
||||
eb->start, eb->read_mirror,
|
||||
CSUM_FMT_VALUE(csum_size, header_csum),
|
||||
CSUM_FMT_VALUE(csum_size, result),
|
||||
btrfs_header_level(eb));
|
||||
@@ -638,8 +605,8 @@ static int validate_extent_buffer(struct extent_buffer *eb)
|
||||
set_extent_buffer_uptodate(eb);
|
||||
else
|
||||
btrfs_err(fs_info,
|
||||
"block=%llu read time tree block corruption detected",
|
||||
eb->start);
|
||||
"read time tree block corruption detected on logical %llu mirror %u",
|
||||
eb->start, eb->read_mirror);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
@@ -740,58 +707,6 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void end_workqueue_bio(struct bio *bio)
|
||||
{
|
||||
struct btrfs_end_io_wq *end_io_wq = bio->bi_private;
|
||||
struct btrfs_fs_info *fs_info;
|
||||
struct btrfs_workqueue *wq;
|
||||
|
||||
fs_info = end_io_wq->info;
|
||||
end_io_wq->status = bio->bi_status;
|
||||
|
||||
if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
|
||||
if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA)
|
||||
wq = fs_info->endio_meta_write_workers;
|
||||
else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE)
|
||||
wq = fs_info->endio_freespace_worker;
|
||||
else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
|
||||
wq = fs_info->endio_raid56_workers;
|
||||
else
|
||||
wq = fs_info->endio_write_workers;
|
||||
} else {
|
||||
if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
|
||||
wq = fs_info->endio_raid56_workers;
|
||||
else if (end_io_wq->metadata)
|
||||
wq = fs_info->endio_meta_workers;
|
||||
else
|
||||
wq = fs_info->endio_workers;
|
||||
}
|
||||
|
||||
btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
|
||||
btrfs_queue_work(wq, &end_io_wq->work);
|
||||
}
|
||||
|
||||
blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
|
||||
enum btrfs_wq_endio_type metadata)
|
||||
{
|
||||
struct btrfs_end_io_wq *end_io_wq;
|
||||
|
||||
end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS);
|
||||
if (!end_io_wq)
|
||||
return BLK_STS_RESOURCE;
|
||||
|
||||
end_io_wq->private = bio->bi_private;
|
||||
end_io_wq->end_io = bio->bi_end_io;
|
||||
end_io_wq->info = info;
|
||||
end_io_wq->status = 0;
|
||||
end_io_wq->bio = bio;
|
||||
end_io_wq->metadata = metadata;
|
||||
|
||||
bio->bi_private = end_io_wq;
|
||||
bio->bi_end_io = end_workqueue_bio;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void run_one_async_start(struct btrfs_work *work)
|
||||
{
|
||||
struct async_submit_bio *async;
|
||||
@@ -816,7 +731,6 @@ static void run_one_async_done(struct btrfs_work *work)
|
||||
{
|
||||
struct async_submit_bio *async;
|
||||
struct inode *inode;
|
||||
blk_status_t ret;
|
||||
|
||||
async = container_of(work, struct async_submit_bio, work);
|
||||
inode = async->inode;
|
||||
@@ -834,11 +748,7 @@ static void run_one_async_done(struct btrfs_work *work)
|
||||
* This changes nothing when cgroups aren't in use.
|
||||
*/
|
||||
async->bio->bi_opf |= REQ_CGROUP_PUNT;
|
||||
ret = btrfs_map_bio(btrfs_sb(inode->i_sb), async->bio, async->mirror_num);
|
||||
if (ret) {
|
||||
async->bio->bi_status = ret;
|
||||
bio_endio(async->bio);
|
||||
}
|
||||
btrfs_submit_bio(btrfs_sb(inode->i_sb), async->bio, async->mirror_num);
|
||||
}
|
||||
|
||||
static void run_one_async_free(struct btrfs_work *work)
|
||||
@@ -849,16 +759,23 @@ static void run_one_async_free(struct btrfs_work *work)
|
||||
kfree(async);
|
||||
}
|
||||
|
||||
blk_status_t btrfs_wq_submit_bio(struct inode *inode, struct bio *bio,
|
||||
int mirror_num, u64 dio_file_offset,
|
||||
extent_submit_bio_start_t *submit_bio_start)
|
||||
/*
|
||||
* Submit bio to an async queue.
|
||||
*
|
||||
* Retrun:
|
||||
* - true if the work has been succesfuly submitted
|
||||
* - false in case of error
|
||||
*/
|
||||
bool btrfs_wq_submit_bio(struct inode *inode, struct bio *bio, int mirror_num,
|
||||
u64 dio_file_offset,
|
||||
extent_submit_bio_start_t *submit_bio_start)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
|
||||
struct async_submit_bio *async;
|
||||
|
||||
async = kmalloc(sizeof(*async), GFP_NOFS);
|
||||
if (!async)
|
||||
return BLK_STS_RESOURCE;
|
||||
return false;
|
||||
|
||||
async->inode = inode;
|
||||
async->bio = bio;
|
||||
@@ -876,7 +793,7 @@ blk_status_t btrfs_wq_submit_bio(struct inode *inode, struct bio *bio,
|
||||
btrfs_queue_work(fs_info->hipri_workers, &async->work);
|
||||
else
|
||||
btrfs_queue_work(fs_info->workers, &async->work);
|
||||
return 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
static blk_status_t btree_csum_one_bio(struct bio *bio)
|
||||
@@ -902,7 +819,7 @@ static blk_status_t btree_submit_bio_start(struct inode *inode, struct bio *bio,
|
||||
{
|
||||
/*
|
||||
* when we're called for a write, we're already in the async
|
||||
* submission context. Just jump into btrfs_map_bio
|
||||
* submission context. Just jump into btrfs_submit_bio.
|
||||
*/
|
||||
return btree_csum_one_bio(bio);
|
||||
}
|
||||
@@ -924,57 +841,54 @@ void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
blk_status_t ret;
|
||||
|
||||
bio->bi_opf |= REQ_META;
|
||||
|
||||
if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
|
||||
/*
|
||||
* called for a read, do the setup so that checksum validation
|
||||
* can happen in the async kernel threads
|
||||
*/
|
||||
ret = btrfs_bio_wq_end_io(fs_info, bio,
|
||||
BTRFS_WQ_ENDIO_METADATA);
|
||||
if (!ret)
|
||||
ret = btrfs_map_bio(fs_info, bio, mirror_num);
|
||||
} else if (!should_async_write(fs_info, BTRFS_I(inode))) {
|
||||
ret = btree_csum_one_bio(bio);
|
||||
if (!ret)
|
||||
ret = btrfs_map_bio(fs_info, bio, mirror_num);
|
||||
} else {
|
||||
/*
|
||||
* kthread helpers are used to submit writes so that
|
||||
* checksumming can happen in parallel across all CPUs
|
||||
*/
|
||||
ret = btrfs_wq_submit_bio(inode, bio, mirror_num, 0,
|
||||
btree_submit_bio_start);
|
||||
btrfs_submit_bio(fs_info, bio, mirror_num);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Kthread helpers are used to submit writes so that checksumming can
|
||||
* happen in parallel across all CPUs.
|
||||
*/
|
||||
if (should_async_write(fs_info, BTRFS_I(inode)) &&
|
||||
btrfs_wq_submit_bio(inode, bio, mirror_num, 0, btree_submit_bio_start))
|
||||
return;
|
||||
|
||||
ret = btree_csum_one_bio(bio);
|
||||
if (ret) {
|
||||
bio->bi_status = ret;
|
||||
bio_endio(bio);
|
||||
return;
|
||||
}
|
||||
|
||||
btrfs_submit_bio(fs_info, bio, mirror_num);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MIGRATION
|
||||
static int btree_migratepage(struct address_space *mapping,
|
||||
struct page *newpage, struct page *page,
|
||||
enum migrate_mode mode)
|
||||
static int btree_migrate_folio(struct address_space *mapping,
|
||||
struct folio *dst, struct folio *src, enum migrate_mode mode)
|
||||
{
|
||||
/*
|
||||
* we can't safely write a btree page from here,
|
||||
* we haven't done the locking hook
|
||||
*/
|
||||
if (PageDirty(page))
|
||||
if (folio_test_dirty(src))
|
||||
return -EAGAIN;
|
||||
/*
|
||||
* Buffers may be managed in a filesystem specific way.
|
||||
* We must have no buffers or drop them.
|
||||
*/
|
||||
if (page_has_private(page) &&
|
||||
!try_to_release_page(page, GFP_KERNEL))
|
||||
if (folio_get_private(src) &&
|
||||
!filemap_release_folio(src, GFP_KERNEL))
|
||||
return -EAGAIN;
|
||||
return migrate_page(mapping, newpage, page, mode);
|
||||
return migrate_folio(mapping, dst, src, mode);
|
||||
}
|
||||
#else
|
||||
#define btree_migrate_folio NULL
|
||||
#endif
|
||||
|
||||
|
||||
static int btree_writepages(struct address_space *mapping,
|
||||
struct writeback_control *wbc)
|
||||
{
|
||||
@@ -1074,10 +988,8 @@ static const struct address_space_operations btree_aops = {
|
||||
.writepages = btree_writepages,
|
||||
.release_folio = btree_release_folio,
|
||||
.invalidate_folio = btree_invalidate_folio,
|
||||
#ifdef CONFIG_MIGRATION
|
||||
.migratepage = btree_migratepage,
|
||||
#endif
|
||||
.dirty_folio = btree_dirty_folio,
|
||||
.migrate_folio = btree_migrate_folio,
|
||||
.dirty_folio = btree_dirty_folio,
|
||||
};
|
||||
|
||||
struct extent_buffer *btrfs_find_create_tree_block(
|
||||
@@ -1872,7 +1784,7 @@ again:
|
||||
fail:
|
||||
/*
|
||||
* If our caller provided us an anonymous device, then it's his
|
||||
* responsability to free it in case we fail. So we have to set our
|
||||
* responsibility to free it in case we fail. So we have to set our
|
||||
* root's anon_dev to 0 to avoid a double free, once by btrfs_put_root()
|
||||
* and once again by our caller.
|
||||
*/
|
||||
@@ -1955,25 +1867,6 @@ struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
|
||||
return root;
|
||||
}
|
||||
|
||||
/*
|
||||
* called by the kthread helper functions to finally call the bio end_io
|
||||
* functions. This is where read checksum verification actually happens
|
||||
*/
|
||||
static void end_workqueue_fn(struct btrfs_work *work)
|
||||
{
|
||||
struct bio *bio;
|
||||
struct btrfs_end_io_wq *end_io_wq;
|
||||
|
||||
end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
|
||||
bio = end_io_wq->bio;
|
||||
|
||||
bio->bi_status = end_io_wq->status;
|
||||
bio->bi_private = end_io_wq->private;
|
||||
bio->bi_end_io = end_io_wq->end_io;
|
||||
bio_endio(bio);
|
||||
kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
|
||||
}
|
||||
|
||||
static int cleaner_kthread(void *arg)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = arg;
|
||||
@@ -2280,10 +2173,14 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
|
||||
btrfs_destroy_workqueue(fs_info->delalloc_workers);
|
||||
btrfs_destroy_workqueue(fs_info->hipri_workers);
|
||||
btrfs_destroy_workqueue(fs_info->workers);
|
||||
btrfs_destroy_workqueue(fs_info->endio_workers);
|
||||
btrfs_destroy_workqueue(fs_info->endio_raid56_workers);
|
||||
if (fs_info->endio_workers)
|
||||
destroy_workqueue(fs_info->endio_workers);
|
||||
if (fs_info->endio_raid56_workers)
|
||||
destroy_workqueue(fs_info->endio_raid56_workers);
|
||||
if (fs_info->rmw_workers)
|
||||
destroy_workqueue(fs_info->rmw_workers);
|
||||
if (fs_info->compressed_write_workers)
|
||||
destroy_workqueue(fs_info->compressed_write_workers);
|
||||
btrfs_destroy_workqueue(fs_info->endio_write_workers);
|
||||
btrfs_destroy_workqueue(fs_info->endio_freespace_worker);
|
||||
btrfs_destroy_workqueue(fs_info->delayed_workers);
|
||||
@@ -2297,8 +2194,8 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
|
||||
* the queues used for metadata I/O, since tasks from those other work
|
||||
* queues can do metadata I/O operations.
|
||||
*/
|
||||
btrfs_destroy_workqueue(fs_info->endio_meta_workers);
|
||||
btrfs_destroy_workqueue(fs_info->endio_meta_write_workers);
|
||||
if (fs_info->endio_meta_workers)
|
||||
destroy_workqueue(fs_info->endio_meta_workers);
|
||||
}
|
||||
|
||||
static void free_root_extent_buffers(struct btrfs_root *root)
|
||||
@@ -2428,7 +2325,9 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
|
||||
extent_map_tree_init(&BTRFS_I(inode)->extent_tree);
|
||||
|
||||
BTRFS_I(inode)->root = btrfs_grab_root(fs_info->tree_root);
|
||||
memset(&BTRFS_I(inode)->location, 0, sizeof(struct btrfs_key));
|
||||
BTRFS_I(inode)->location.objectid = BTRFS_BTREE_INODE_OBJECTID;
|
||||
BTRFS_I(inode)->location.type = 0;
|
||||
BTRFS_I(inode)->location.offset = 0;
|
||||
set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
|
||||
btrfs_insert_inode_hash(inode);
|
||||
}
|
||||
@@ -2477,25 +2376,18 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
|
||||
fs_info->fixup_workers =
|
||||
btrfs_alloc_workqueue(fs_info, "fixup", flags, 1, 0);
|
||||
|
||||
/*
|
||||
* endios are largely parallel and should have a very
|
||||
* low idle thresh
|
||||
*/
|
||||
fs_info->endio_workers =
|
||||
btrfs_alloc_workqueue(fs_info, "endio", flags, max_active, 4);
|
||||
alloc_workqueue("btrfs-endio", flags, max_active);
|
||||
fs_info->endio_meta_workers =
|
||||
btrfs_alloc_workqueue(fs_info, "endio-meta", flags,
|
||||
max_active, 4);
|
||||
fs_info->endio_meta_write_workers =
|
||||
btrfs_alloc_workqueue(fs_info, "endio-meta-write", flags,
|
||||
max_active, 2);
|
||||
alloc_workqueue("btrfs-endio-meta", flags, max_active);
|
||||
fs_info->endio_raid56_workers =
|
||||
btrfs_alloc_workqueue(fs_info, "endio-raid56", flags,
|
||||
max_active, 4);
|
||||
alloc_workqueue("btrfs-endio-raid56", flags, max_active);
|
||||
fs_info->rmw_workers = alloc_workqueue("btrfs-rmw", flags, max_active);
|
||||
fs_info->endio_write_workers =
|
||||
btrfs_alloc_workqueue(fs_info, "endio-write", flags,
|
||||
max_active, 2);
|
||||
fs_info->compressed_write_workers =
|
||||
alloc_workqueue("btrfs-compressed-write", flags, max_active);
|
||||
fs_info->endio_freespace_worker =
|
||||
btrfs_alloc_workqueue(fs_info, "freespace-write", flags,
|
||||
max_active, 0);
|
||||
@@ -2510,7 +2402,7 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
|
||||
if (!(fs_info->workers && fs_info->hipri_workers &&
|
||||
fs_info->delalloc_workers && fs_info->flush_workers &&
|
||||
fs_info->endio_workers && fs_info->endio_meta_workers &&
|
||||
fs_info->endio_meta_write_workers &&
|
||||
fs_info->compressed_write_workers &&
|
||||
fs_info->endio_write_workers && fs_info->endio_raid56_workers &&
|
||||
fs_info->endio_freespace_worker && fs_info->rmw_workers &&
|
||||
fs_info->caching_workers && fs_info->fixup_workers &&
|
||||
@@ -2537,6 +2429,9 @@ static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type)
|
||||
|
||||
fs_info->csum_shash = csum_shash;
|
||||
|
||||
btrfs_info(fs_info, "using %s (%s) checksum algorithm",
|
||||
btrfs_super_csum_name(csum_type),
|
||||
crypto_shash_driver_name(csum_shash));
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -3255,6 +3150,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
|
||||
init_waitqueue_head(&fs_info->transaction_blocked_wait);
|
||||
init_waitqueue_head(&fs_info->async_submit_wait);
|
||||
init_waitqueue_head(&fs_info->delayed_iputs_wait);
|
||||
init_waitqueue_head(&fs_info->zone_finish_wait);
|
||||
|
||||
/* Usable values until the real ones are cached from the superblock */
|
||||
fs_info->nodesize = 4096;
|
||||
@@ -3262,6 +3158,8 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
|
||||
fs_info->sectorsize_bits = ilog2(4096);
|
||||
fs_info->stripesize = 4096;
|
||||
|
||||
fs_info->max_extent_size = BTRFS_MAX_EXTENT_SIZE;
|
||||
|
||||
spin_lock_init(&fs_info->swapfile_pins_lock);
|
||||
fs_info->swapfile_pins = RB_ROOT;
|
||||
|
||||
@@ -3593,16 +3491,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
||||
*/
|
||||
fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
|
||||
|
||||
/*
|
||||
* Flag our filesystem as having big metadata blocks if they are bigger
|
||||
* than the page size.
|
||||
*/
|
||||
if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) {
|
||||
if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
|
||||
btrfs_info(fs_info,
|
||||
"flagging fs with big metadata feature");
|
||||
features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
|
||||
}
|
||||
|
||||
/* Set up fs_info before parsing mount options */
|
||||
nodesize = btrfs_super_nodesize(disk_super);
|
||||
@@ -3640,8 +3528,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
||||
else if (fs_info->compress_type == BTRFS_COMPRESS_ZSTD)
|
||||
features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD;
|
||||
|
||||
if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
|
||||
btrfs_info(fs_info, "has skinny extents");
|
||||
/*
|
||||
* Flag our filesystem as having big metadata blocks if they are bigger
|
||||
* than the page size.
|
||||
*/
|
||||
if (btrfs_super_nodesize(disk_super) > PAGE_SIZE)
|
||||
features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
|
||||
|
||||
/*
|
||||
* mixed block groups end up with duplicate but slightly offset
|
||||
@@ -3670,6 +3562,20 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
||||
err = -EINVAL;
|
||||
goto fail_alloc;
|
||||
}
|
||||
/*
|
||||
* We have unsupported RO compat features, although RO mounted, we
|
||||
* should not cause any metadata write, including log replay.
|
||||
* Or we could screw up whatever the new feature requires.
|
||||
*/
|
||||
if (unlikely(features && btrfs_super_log_root(disk_super) &&
|
||||
!btrfs_test_opt(fs_info, NOLOGREPLAY))) {
|
||||
btrfs_err(fs_info,
|
||||
"cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay",
|
||||
features);
|
||||
err = -EINVAL;
|
||||
goto fail_alloc;
|
||||
}
|
||||
|
||||
|
||||
if (sectorsize < PAGE_SIZE) {
|
||||
struct btrfs_subpage_info *subpage_info;
|
||||
|
||||
@@ -17,13 +17,6 @@
|
||||
*/
|
||||
#define BTRFS_BDEV_BLOCKSIZE (4096)
|
||||
|
||||
enum btrfs_wq_endio_type {
|
||||
BTRFS_WQ_ENDIO_DATA,
|
||||
BTRFS_WQ_ENDIO_METADATA,
|
||||
BTRFS_WQ_ENDIO_FREE_SPACE,
|
||||
BTRFS_WQ_ENDIO_RAID56,
|
||||
};
|
||||
|
||||
static inline u64 btrfs_sb_offset(int mirror)
|
||||
{
|
||||
u64 start = SZ_16K;
|
||||
@@ -121,11 +114,9 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
|
||||
int atomic);
|
||||
int btrfs_read_extent_buffer(struct extent_buffer *buf, u64 parent_transid,
|
||||
int level, struct btrfs_key *first_key);
|
||||
blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
|
||||
enum btrfs_wq_endio_type metadata);
|
||||
blk_status_t btrfs_wq_submit_bio(struct inode *inode, struct bio *bio,
|
||||
int mirror_num, u64 dio_file_offset,
|
||||
extent_submit_bio_start_t *submit_bio_start);
|
||||
bool btrfs_wq_submit_bio(struct inode *inode, struct bio *bio, int mirror_num,
|
||||
u64 dio_file_offset,
|
||||
extent_submit_bio_start_t *submit_bio_start);
|
||||
blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
|
||||
int mirror_num);
|
||||
int btrfs_alloc_log_tree_node(struct btrfs_trans_handle *trans,
|
||||
@@ -145,8 +136,6 @@ int btree_lock_page_hook(struct page *page, void *data,
|
||||
int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
|
||||
int btrfs_get_free_objectid(struct btrfs_root *root, u64 *objectid);
|
||||
int btrfs_init_root_free_objectid(struct btrfs_root *root);
|
||||
int __init btrfs_end_io_wq_init(void);
|
||||
void __cold btrfs_end_io_wq_exit(void);
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
void btrfs_set_buffer_lockdep_class(u64 objectid,
|
||||
|
||||
@@ -1269,7 +1269,7 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int do_discard_extent(struct btrfs_io_stripe *stripe, u64 *bytes)
|
||||
static int do_discard_extent(struct btrfs_discard_stripe *stripe, u64 *bytes)
|
||||
{
|
||||
struct btrfs_device *dev = stripe->dev;
|
||||
struct btrfs_fs_info *fs_info = dev->fs_info;
|
||||
@@ -1316,76 +1316,60 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||
u64 discarded_bytes = 0;
|
||||
u64 end = bytenr + num_bytes;
|
||||
u64 cur = bytenr;
|
||||
struct btrfs_io_context *bioc = NULL;
|
||||
|
||||
/*
|
||||
* Avoid races with device replace and make sure our bioc has devices
|
||||
* associated to its stripes that don't go away while we are discarding.
|
||||
* Avoid races with device replace and make sure the devices in the
|
||||
* stripes don't go away while we are discarding.
|
||||
*/
|
||||
btrfs_bio_counter_inc_blocked(fs_info);
|
||||
while (cur < end) {
|
||||
struct btrfs_io_stripe *stripe;
|
||||
struct btrfs_discard_stripe *stripes;
|
||||
unsigned int num_stripes;
|
||||
int i;
|
||||
|
||||
num_bytes = end - cur;
|
||||
/* Tell the block device(s) that the sectors can be discarded */
|
||||
ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, cur,
|
||||
&num_bytes, &bioc, 0);
|
||||
/*
|
||||
* Error can be -ENOMEM, -ENOENT (no such chunk mapping) or
|
||||
* -EOPNOTSUPP. For any such error, @num_bytes is not updated,
|
||||
* thus we can't continue anyway.
|
||||
*/
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
stripes = btrfs_map_discard(fs_info, cur, &num_bytes, &num_stripes);
|
||||
if (IS_ERR(stripes)) {
|
||||
ret = PTR_ERR(stripes);
|
||||
if (ret == -EOPNOTSUPP)
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
stripe = bioc->stripes;
|
||||
for (i = 0; i < bioc->num_stripes; i++, stripe++) {
|
||||
for (i = 0; i < num_stripes; i++) {
|
||||
struct btrfs_discard_stripe *stripe = stripes + i;
|
||||
u64 bytes;
|
||||
struct btrfs_device *device = stripe->dev;
|
||||
|
||||
if (!device->bdev) {
|
||||
if (!stripe->dev->bdev) {
|
||||
ASSERT(btrfs_test_opt(fs_info, DEGRADED));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
|
||||
if (!test_bit(BTRFS_DEV_STATE_WRITEABLE,
|
||||
&stripe->dev->dev_state))
|
||||
continue;
|
||||
|
||||
ret = do_discard_extent(stripe, &bytes);
|
||||
if (!ret) {
|
||||
discarded_bytes += bytes;
|
||||
} else if (ret != -EOPNOTSUPP) {
|
||||
if (ret) {
|
||||
/*
|
||||
* Logic errors or -ENOMEM, or -EIO, but
|
||||
* unlikely to happen.
|
||||
*
|
||||
* And since there are two loops, explicitly
|
||||
* go to out to avoid confusion.
|
||||
* Keep going if discard is not supported by the
|
||||
* device.
|
||||
*/
|
||||
btrfs_put_bioc(bioc);
|
||||
goto out;
|
||||
if (ret != -EOPNOTSUPP)
|
||||
break;
|
||||
ret = 0;
|
||||
} else {
|
||||
discarded_bytes += bytes;
|
||||
}
|
||||
|
||||
/*
|
||||
* Just in case we get back EOPNOTSUPP for some reason,
|
||||
* just ignore the return value so we don't screw up
|
||||
* people calling discard_extent.
|
||||
*/
|
||||
ret = 0;
|
||||
}
|
||||
btrfs_put_bioc(bioc);
|
||||
kfree(stripes);
|
||||
if (ret)
|
||||
break;
|
||||
cur += num_bytes;
|
||||
}
|
||||
out:
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
|
||||
if (actual_bytes)
|
||||
*actual_bytes = discarded_bytes;
|
||||
|
||||
|
||||
if (ret == -EOPNOTSUPP)
|
||||
ret = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -3981,23 +3965,63 @@ static void found_extent(struct find_free_extent_ctl *ffe_ctl,
|
||||
}
|
||||
}
|
||||
|
||||
static bool can_allocate_chunk(struct btrfs_fs_info *fs_info,
|
||||
struct find_free_extent_ctl *ffe_ctl)
|
||||
static int can_allocate_chunk_zoned(struct btrfs_fs_info *fs_info,
|
||||
struct find_free_extent_ctl *ffe_ctl)
|
||||
{
|
||||
/* If we can activate new zone, just allocate a chunk and use it */
|
||||
if (btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* We already reached the max active zones. Try to finish one block
|
||||
* group to make a room for a new block group. This is only possible
|
||||
* for a data block group because btrfs_zone_finish() may need to wait
|
||||
* for a running transaction which can cause a deadlock for metadata
|
||||
* allocation.
|
||||
*/
|
||||
if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) {
|
||||
int ret = btrfs_zone_finish_one_bg(fs_info);
|
||||
|
||||
if (ret == 1)
|
||||
return 0;
|
||||
else if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we have enough free space left in an already active block group
|
||||
* and we can't activate any other zone now, do not allow allocating a
|
||||
* new chunk and let find_free_extent() retry with a smaller size.
|
||||
*/
|
||||
if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size)
|
||||
return -ENOSPC;
|
||||
|
||||
/*
|
||||
* Even min_alloc_size is not left in any block groups. Since we cannot
|
||||
* activate a new block group, allocating it may not help. Let's tell a
|
||||
* caller to try again and hope it progress something by writing some
|
||||
* parts of the region. That is only possible for data block groups,
|
||||
* where a part of the region can be written.
|
||||
*/
|
||||
if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA)
|
||||
return -EAGAIN;
|
||||
|
||||
/*
|
||||
* We cannot activate a new block group and no enough space left in any
|
||||
* block groups. So, allocating a new block group may not help. But,
|
||||
* there is nothing to do anyway, so let's go with it.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int can_allocate_chunk(struct btrfs_fs_info *fs_info,
|
||||
struct find_free_extent_ctl *ffe_ctl)
|
||||
{
|
||||
switch (ffe_ctl->policy) {
|
||||
case BTRFS_EXTENT_ALLOC_CLUSTERED:
|
||||
return true;
|
||||
return 0;
|
||||
case BTRFS_EXTENT_ALLOC_ZONED:
|
||||
/*
|
||||
* If we have enough free space left in an already
|
||||
* active block group and we can't activate any other
|
||||
* zone now, do not allow allocating a new chunk and
|
||||
* let find_free_extent() retry with a smaller size.
|
||||
*/
|
||||
if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size &&
|
||||
!btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
|
||||
return false;
|
||||
return true;
|
||||
return can_allocate_chunk_zoned(fs_info, ffe_ctl);
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
@@ -4079,8 +4103,9 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
|
||||
int exist = 0;
|
||||
|
||||
/*Check if allocation policy allows to create a new chunk */
|
||||
if (!can_allocate_chunk(fs_info, ffe_ctl))
|
||||
return -ENOSPC;
|
||||
ret = can_allocate_chunk(fs_info, ffe_ctl);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
trans = current->journal_info;
|
||||
if (trans)
|
||||
@@ -5992,7 +6017,7 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
|
||||
*/
|
||||
static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
|
||||
{
|
||||
u64 start = SZ_1M, len = 0, end = 0;
|
||||
u64 start = BTRFS_DEVICE_RANGE_RESERVED, len = 0, end = 0;
|
||||
int ret;
|
||||
|
||||
*trimmed = 0;
|
||||
@@ -6036,8 +6061,8 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
|
||||
break;
|
||||
}
|
||||
|
||||
/* Ensure we skip the reserved area in the first 1M */
|
||||
start = max_t(u64, start, SZ_1M);
|
||||
/* Ensure we skip the reserved space on each device. */
|
||||
start = max_t(u64, start, BTRFS_DEVICE_RANGE_RESERVED);
|
||||
|
||||
/*
|
||||
* If find_first_clear_extent_bit find a range that spans the
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -57,6 +57,7 @@ enum {
|
||||
#define BITMAP_LAST_BYTE_MASK(nbits) \
|
||||
(BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
|
||||
|
||||
struct btrfs_bio;
|
||||
struct btrfs_root;
|
||||
struct btrfs_inode;
|
||||
struct btrfs_io_bio;
|
||||
@@ -142,15 +143,10 @@ static inline void extent_changeset_free(struct extent_changeset *changeset)
|
||||
|
||||
struct extent_map_tree;
|
||||
|
||||
typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode,
|
||||
struct page *page, size_t pg_offset,
|
||||
u64 start, u64 len);
|
||||
|
||||
int try_release_extent_mapping(struct page *page, gfp_t mask);
|
||||
int try_release_extent_buffer(struct page *page);
|
||||
|
||||
int btrfs_read_folio(struct file *file, struct folio *folio);
|
||||
int extent_write_full_page(struct page *page, struct writeback_control *wbc);
|
||||
int extent_write_locked_range(struct inode *inode, u64 start, u64 end);
|
||||
int extent_writepages(struct address_space *mapping,
|
||||
struct writeback_control *wbc);
|
||||
@@ -247,7 +243,6 @@ void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
|
||||
|
||||
int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array);
|
||||
struct bio *btrfs_bio_alloc(unsigned int nr_iovecs);
|
||||
struct bio *btrfs_bio_clone(struct block_device *bdev, struct bio *bio);
|
||||
struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size);
|
||||
|
||||
void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
|
||||
@@ -266,15 +261,13 @@ struct io_failure_record {
|
||||
u64 start;
|
||||
u64 len;
|
||||
u64 logical;
|
||||
enum btrfs_compression_type compress_type;
|
||||
int this_mirror;
|
||||
int failed_mirror;
|
||||
int num_copies;
|
||||
};
|
||||
|
||||
int btrfs_repair_one_sector(struct inode *inode,
|
||||
struct bio *failed_bio, u32 bio_offset,
|
||||
struct page *page, unsigned int pgoff,
|
||||
u64 start, int failed_mirror,
|
||||
int btrfs_repair_one_sector(struct inode *inode, struct btrfs_bio *failed_bbio,
|
||||
u32 bio_offset, struct page *page, unsigned int pgoff,
|
||||
submit_bio_hook_t *submit_bio_hook);
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
|
||||
@@ -1848,7 +1848,6 @@ static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
|
||||
|
||||
static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
|
||||
{
|
||||
const bool is_sync_write = (iocb->ki_flags & IOCB_DSYNC);
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file_inode(file);
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
@@ -1901,15 +1900,6 @@ relock:
|
||||
goto buffered;
|
||||
}
|
||||
|
||||
/*
|
||||
* We remove IOCB_DSYNC so that we don't deadlock when iomap_dio_rw()
|
||||
* calls generic_write_sync() (through iomap_dio_complete()), because
|
||||
* that results in calling fsync (btrfs_sync_file()) which will try to
|
||||
* lock the inode in exclusive/write mode.
|
||||
*/
|
||||
if (is_sync_write)
|
||||
iocb->ki_flags &= ~IOCB_DSYNC;
|
||||
|
||||
/*
|
||||
* The iov_iter can be mapped to the same file range we are writing to.
|
||||
* If that's the case, then we will deadlock in the iomap code, because
|
||||
@@ -1965,17 +1955,24 @@ again:
|
||||
btrfs_inode_unlock(inode, ilock_flags);
|
||||
|
||||
/*
|
||||
* Add back IOCB_DSYNC. Our caller, btrfs_file_write_iter(), will do
|
||||
* the fsync (call generic_write_sync()).
|
||||
* If 'err' is -ENOTBLK or we have not written all data, then it means
|
||||
* we must fallback to buffered IO.
|
||||
*/
|
||||
if (is_sync_write)
|
||||
iocb->ki_flags |= IOCB_DSYNC;
|
||||
|
||||
/* If 'err' is -ENOTBLK then it means we must fallback to buffered IO. */
|
||||
if ((err < 0 && err != -ENOTBLK) || !iov_iter_count(from))
|
||||
goto out;
|
||||
|
||||
buffered:
|
||||
/*
|
||||
* If we are in a NOWAIT context, then return -EAGAIN to signal the caller
|
||||
* it must retry the operation in a context where blocking is acceptable,
|
||||
* since we currently don't have NOWAIT semantics support for buffered IO
|
||||
* and may block there for many reasons (reserving space for example).
|
||||
*/
|
||||
if (iocb->ki_flags & IOCB_NOWAIT) {
|
||||
err = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
pos = iocb->ki_pos;
|
||||
written_buffered = btrfs_buffered_write(iocb, from);
|
||||
if (written_buffered < 0) {
|
||||
@@ -2038,7 +2035,7 @@ ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct btrfs_inode *inode = BTRFS_I(file_inode(file));
|
||||
ssize_t num_written, num_sync;
|
||||
const bool sync = iocb->ki_flags & IOCB_DSYNC;
|
||||
const bool sync = iocb_is_dsync(iocb);
|
||||
|
||||
/*
|
||||
* If the fs flips readonly due to some impossible error, although we
|
||||
@@ -2058,9 +2055,11 @@ ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
|
||||
num_written = btrfs_encoded_write(iocb, from, encoded);
|
||||
num_sync = encoded->len;
|
||||
} else if (iocb->ki_flags & IOCB_DIRECT) {
|
||||
num_written = num_sync = btrfs_direct_write(iocb, from);
|
||||
num_written = btrfs_direct_write(iocb, from);
|
||||
num_sync = num_written;
|
||||
} else {
|
||||
num_written = num_sync = btrfs_buffered_write(iocb, from);
|
||||
num_written = btrfs_buffered_write(iocb, from);
|
||||
num_sync = num_written;
|
||||
}
|
||||
|
||||
btrfs_set_inode_last_sub_trans(inode);
|
||||
@@ -2308,7 +2307,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
btrfs_release_log_ctx_extents(&ctx);
|
||||
if (ret < 0) {
|
||||
/* Fallthrough and commit/free transaction. */
|
||||
ret = 1;
|
||||
ret = BTRFS_LOG_FORCE_COMMIT;
|
||||
}
|
||||
|
||||
/* we've logged all the items and now have a consistent
|
||||
@@ -2734,7 +2733,7 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
|
||||
goto out;
|
||||
}
|
||||
rsv->size = btrfs_calc_insert_metadata_size(fs_info, 1);
|
||||
rsv->failfast = 1;
|
||||
rsv->failfast = true;
|
||||
|
||||
/*
|
||||
* 1 - update the inode
|
||||
@@ -3100,7 +3099,8 @@ static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len)
|
||||
|
||||
ASSERT(trans != NULL);
|
||||
inode_inc_iversion(inode);
|
||||
inode->i_mtime = inode->i_ctime = current_time(inode);
|
||||
inode->i_mtime = current_time(inode);
|
||||
inode->i_ctime = inode->i_mtime;
|
||||
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
|
||||
updated_inode = true;
|
||||
btrfs_end_transaction(trans);
|
||||
|
||||
@@ -3536,7 +3536,8 @@ int btrfs_find_space_cluster(struct btrfs_block_group *block_group,
|
||||
* data, keep it dense.
|
||||
*/
|
||||
if (btrfs_test_opt(fs_info, SSD_SPREAD)) {
|
||||
cont1_bytes = min_bytes = bytes + empty_size;
|
||||
cont1_bytes = bytes + empty_size;
|
||||
min_bytes = cont1_bytes;
|
||||
} else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
|
||||
cont1_bytes = bytes;
|
||||
min_bytes = fs_info->sectorsize;
|
||||
|
||||
791
fs/btrfs/inode.c
791
fs/btrfs/inode.c
File diff suppressed because it is too large
Load Diff
150
fs/btrfs/ioctl.c
150
fs/btrfs/ioctl.c
@@ -1230,16 +1230,18 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start,
|
||||
return em;
|
||||
}
|
||||
|
||||
static u32 get_extent_max_capacity(const struct extent_map *em)
|
||||
static u32 get_extent_max_capacity(const struct btrfs_fs_info *fs_info,
|
||||
const struct extent_map *em)
|
||||
{
|
||||
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
|
||||
return BTRFS_MAX_COMPRESSED;
|
||||
return BTRFS_MAX_EXTENT_SIZE;
|
||||
return fs_info->max_extent_size;
|
||||
}
|
||||
|
||||
static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em,
|
||||
u32 extent_thresh, u64 newer_than, bool locked)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
struct extent_map *next;
|
||||
bool ret = false;
|
||||
|
||||
@@ -1263,7 +1265,7 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em,
|
||||
* If the next extent is at its max capacity, defragging current extent
|
||||
* makes no sense, as the total number of extents won't change.
|
||||
*/
|
||||
if (next->len >= get_extent_max_capacity(em))
|
||||
if (next->len >= get_extent_max_capacity(fs_info, em))
|
||||
goto out;
|
||||
/* Skip older extent */
|
||||
if (next->generation < newer_than)
|
||||
@@ -1400,6 +1402,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
|
||||
bool locked, struct list_head *target_list,
|
||||
u64 *last_scanned_ret)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
bool last_is_target = false;
|
||||
u64 cur = start;
|
||||
int ret = 0;
|
||||
@@ -1484,7 +1487,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
|
||||
* Skip extents already at its max capacity, this is mostly for
|
||||
* compressed extents, which max cap is only 128K.
|
||||
*/
|
||||
if (em->len >= get_extent_max_capacity(em))
|
||||
if (em->len >= get_extent_max_capacity(fs_info, em))
|
||||
goto next;
|
||||
|
||||
/*
|
||||
@@ -4243,26 +4246,6 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx)
|
||||
{
|
||||
struct btrfs_data_container *inodes = ctx;
|
||||
const size_t c = 3 * sizeof(u64);
|
||||
|
||||
if (inodes->bytes_left >= c) {
|
||||
inodes->bytes_left -= c;
|
||||
inodes->val[inodes->elem_cnt] = inum;
|
||||
inodes->val[inodes->elem_cnt + 1] = offset;
|
||||
inodes->val[inodes->elem_cnt + 2] = root;
|
||||
inodes->elem_cnt += 3;
|
||||
} else {
|
||||
inodes->bytes_missing += c - inodes->bytes_left;
|
||||
inodes->bytes_left = 0;
|
||||
inodes->elem_missed += 3;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
|
||||
void __user *arg, int version)
|
||||
{
|
||||
@@ -4312,7 +4295,7 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
|
||||
ret = iterate_inodes_from_logical(loi->logical, fs_info, path,
|
||||
build_ino_list, inodes, ignore_offset);
|
||||
inodes, ignore_offset);
|
||||
if (ret == -EINVAL)
|
||||
ret = -ENOENT;
|
||||
if (ret < 0)
|
||||
@@ -4355,13 +4338,79 @@ void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
|
||||
spin_unlock(&fs_info->balance_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to acquire fs_info::balance_mutex as well as set BTRFS_EXLCOP_BALANCE as
|
||||
* required.
|
||||
*
|
||||
* @fs_info: the filesystem
|
||||
* @excl_acquired: ptr to boolean value which is set to false in case balance
|
||||
* is being resumed
|
||||
*
|
||||
* Return 0 on success in which case both fs_info::balance is acquired as well
|
||||
* as exclusive ops are blocked. In case of failure return an error code.
|
||||
*/
|
||||
static int btrfs_try_lock_balance(struct btrfs_fs_info *fs_info, bool *excl_acquired)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Exclusive operation is locked. Three possibilities:
|
||||
* (1) some other op is running
|
||||
* (2) balance is running
|
||||
* (3) balance is paused -- special case (think resume)
|
||||
*/
|
||||
while (1) {
|
||||
if (btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
|
||||
*excl_acquired = true;
|
||||
mutex_lock(&fs_info->balance_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
mutex_lock(&fs_info->balance_mutex);
|
||||
if (fs_info->balance_ctl) {
|
||||
/* This is either (2) or (3) */
|
||||
if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
|
||||
/* This is (2) */
|
||||
ret = -EINPROGRESS;
|
||||
goto out_failure;
|
||||
|
||||
} else {
|
||||
mutex_unlock(&fs_info->balance_mutex);
|
||||
/*
|
||||
* Lock released to allow other waiters to
|
||||
* continue, we'll reexamine the status again.
|
||||
*/
|
||||
mutex_lock(&fs_info->balance_mutex);
|
||||
|
||||
if (fs_info->balance_ctl &&
|
||||
!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
|
||||
/* This is (3) */
|
||||
*excl_acquired = false;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* This is (1) */
|
||||
ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
|
||||
goto out_failure;
|
||||
}
|
||||
|
||||
mutex_unlock(&fs_info->balance_mutex);
|
||||
}
|
||||
|
||||
out_failure:
|
||||
mutex_unlock(&fs_info->balance_mutex);
|
||||
*excl_acquired = false;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long btrfs_ioctl_balance(struct file *file, void __user *arg)
|
||||
{
|
||||
struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_ioctl_balance_args *bargs;
|
||||
struct btrfs_balance_control *bctl;
|
||||
bool need_unlock; /* for mut. excl. ops lock */
|
||||
bool need_unlock = true;
|
||||
int ret;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
@@ -4378,53 +4427,12 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
|
||||
goto out;
|
||||
}
|
||||
|
||||
again:
|
||||
if (btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
|
||||
mutex_lock(&fs_info->balance_mutex);
|
||||
need_unlock = true;
|
||||
goto locked;
|
||||
}
|
||||
|
||||
/*
|
||||
* mut. excl. ops lock is locked. Three possibilities:
|
||||
* (1) some other op is running
|
||||
* (2) balance is running
|
||||
* (3) balance is paused -- special case (think resume)
|
||||
*/
|
||||
mutex_lock(&fs_info->balance_mutex);
|
||||
if (fs_info->balance_ctl) {
|
||||
/* this is either (2) or (3) */
|
||||
if (!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
|
||||
mutex_unlock(&fs_info->balance_mutex);
|
||||
/*
|
||||
* Lock released to allow other waiters to continue,
|
||||
* we'll reexamine the status again.
|
||||
*/
|
||||
mutex_lock(&fs_info->balance_mutex);
|
||||
|
||||
if (fs_info->balance_ctl &&
|
||||
!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
|
||||
/* this is (3) */
|
||||
need_unlock = false;
|
||||
goto locked;
|
||||
}
|
||||
|
||||
mutex_unlock(&fs_info->balance_mutex);
|
||||
goto again;
|
||||
} else {
|
||||
/* this is (2) */
|
||||
mutex_unlock(&fs_info->balance_mutex);
|
||||
ret = -EINPROGRESS;
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
/* this is (1) */
|
||||
mutex_unlock(&fs_info->balance_mutex);
|
||||
ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
|
||||
ret = btrfs_try_lock_balance(fs_info, &need_unlock);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
locked:
|
||||
lockdep_assert_held(&fs_info->balance_mutex);
|
||||
|
||||
if (bargs->flags & BTRFS_BALANCE_RESUME) {
|
||||
if (!fs_info->balance_ctl) {
|
||||
ret = -ENOTCONN;
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user