mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-06 02:50:49 +09:00
Merge 5dc921868c ("Merge tag 'for-5.19/drivers-2022-05-22' of git://git.kernel.dk/linux-block") into android-mainline
Steps on the way to 5.19-rc1 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: Id21b66734cdd0ce488e238cf0490eeb2ec9c9563
This commit is contained in:
@@ -218,7 +218,6 @@ current *struct* is::
|
||||
int (*tray_move)(struct cdrom_device_info *, int);
|
||||
int (*lock_door)(struct cdrom_device_info *, int);
|
||||
int (*select_speed)(struct cdrom_device_info *, int);
|
||||
int (*select_disc)(struct cdrom_device_info *, int);
|
||||
int (*get_last_session) (struct cdrom_device_info *,
|
||||
struct cdrom_multisession *);
|
||||
int (*get_mcn)(struct cdrom_device_info *, struct cdrom_mcn *);
|
||||
@@ -419,15 +418,6 @@ this `auto-selection` capability, the decision should be made on the
|
||||
current disc loaded and the return value should be positive. A negative
|
||||
return value indicates an error.
|
||||
|
||||
::
|
||||
|
||||
int select_disc(struct cdrom_device_info *cdi, int number)
|
||||
|
||||
If the drive can store multiple discs (a juke-box) this function
|
||||
will perform disc selection. It should return the number of the
|
||||
selected disc on success, a negative value on error. Currently, only
|
||||
the ide-cd driver supports this functionality.
|
||||
|
||||
::
|
||||
|
||||
int get_last_session(struct cdrom_device_info *cdi,
|
||||
|
||||
@@ -1183,85 +1183,7 @@ Provides counts of softirq handlers serviced since boot time, for each CPU.
|
||||
HRTIMER: 0 0 0 0
|
||||
RCU: 1678 1769 2178 2250
|
||||
|
||||
|
||||
1.3 IDE devices in /proc/ide
|
||||
----------------------------
|
||||
|
||||
The subdirectory /proc/ide contains information about all IDE devices of which
|
||||
the kernel is aware. There is one subdirectory for each IDE controller, the
|
||||
file drivers and a link for each IDE device, pointing to the device directory
|
||||
in the controller specific subtree.
|
||||
|
||||
The file 'drivers' contains general information about the drivers used for the
|
||||
IDE devices::
|
||||
|
||||
> cat /proc/ide/drivers
|
||||
ide-cdrom version 4.53
|
||||
ide-disk version 1.08
|
||||
|
||||
More detailed information can be found in the controller specific
|
||||
subdirectories. These are named ide0, ide1 and so on. Each of these
|
||||
directories contains the files shown in table 1-6.
|
||||
|
||||
|
||||
.. table:: Table 1-6: IDE controller info in /proc/ide/ide?
|
||||
|
||||
======= =======================================
|
||||
File Content
|
||||
======= =======================================
|
||||
channel IDE channel (0 or 1)
|
||||
config Configuration (only for PCI/IDE bridge)
|
||||
mate Mate name
|
||||
model Type/Chipset of IDE controller
|
||||
======= =======================================
|
||||
|
||||
Each device connected to a controller has a separate subdirectory in the
|
||||
controllers directory. The files listed in table 1-7 are contained in these
|
||||
directories.
|
||||
|
||||
|
||||
.. table:: Table 1-7: IDE device information
|
||||
|
||||
================ ==========================================
|
||||
File Content
|
||||
================ ==========================================
|
||||
cache The cache
|
||||
capacity Capacity of the medium (in 512Byte blocks)
|
||||
driver driver and version
|
||||
geometry physical and logical geometry
|
||||
identify device identify block
|
||||
media media type
|
||||
model device identifier
|
||||
settings device setup
|
||||
smart_thresholds IDE disk management thresholds
|
||||
smart_values IDE disk management values
|
||||
================ ==========================================
|
||||
|
||||
The most interesting file is ``settings``. This file contains a nice
|
||||
overview of the drive parameters::
|
||||
|
||||
# cat /proc/ide/ide0/hda/settings
|
||||
name value min max mode
|
||||
---- ----- --- --- ----
|
||||
bios_cyl 526 0 65535 rw
|
||||
bios_head 255 0 255 rw
|
||||
bios_sect 63 0 63 rw
|
||||
breada_readahead 4 0 127 rw
|
||||
bswap 0 0 1 r
|
||||
file_readahead 72 0 2097151 rw
|
||||
io_32bit 0 0 3 rw
|
||||
keepsettings 0 0 1 rw
|
||||
max_kb_per_request 122 1 127 rw
|
||||
multcount 0 0 8 rw
|
||||
nice1 1 0 1 rw
|
||||
nowerr 0 0 1 rw
|
||||
pio_mode write-only 0 255 w
|
||||
slow 0 0 1 rw
|
||||
unmaskirq 0 0 1 rw
|
||||
using_dma 0 0 1 rw
|
||||
|
||||
|
||||
1.4 Networking info in /proc/net
|
||||
1.3 Networking info in /proc/net
|
||||
--------------------------------
|
||||
|
||||
The subdirectory /proc/net follows the usual pattern. Table 1-8 shows the
|
||||
@@ -1340,7 +1262,7 @@ It will contain information that is specific to that bond, such as the
|
||||
current slaves of the bond, the link status of the slaves, and how
|
||||
many times the slaves link has failed.
|
||||
|
||||
1.5 SCSI info
|
||||
1.4 SCSI info
|
||||
-------------
|
||||
|
||||
If you have a SCSI host adapter in your system, you'll find a subdirectory
|
||||
@@ -1403,7 +1325,7 @@ AHA-2940 SCSI adapter::
|
||||
Total transfers 0 (0 reads and 0 writes)
|
||||
|
||||
|
||||
1.6 Parallel port info in /proc/parport
|
||||
1.5 Parallel port info in /proc/parport
|
||||
---------------------------------------
|
||||
|
||||
The directory /proc/parport contains information about the parallel ports of
|
||||
@@ -1428,7 +1350,7 @@ These directories contain the four files shown in Table 1-10.
|
||||
number or none).
|
||||
========= ====================================================================
|
||||
|
||||
1.7 TTY info in /proc/tty
|
||||
1.6 TTY info in /proc/tty
|
||||
-------------------------
|
||||
|
||||
Information about the available and actually used tty's can be found in the
|
||||
@@ -1463,7 +1385,7 @@ To see which tty's are currently in use, you can simply look into the file
|
||||
unknown /dev/tty 4 1-63 console
|
||||
|
||||
|
||||
1.8 Miscellaneous kernel statistics in /proc/stat
|
||||
1.7 Miscellaneous kernel statistics in /proc/stat
|
||||
-------------------------------------------------
|
||||
|
||||
Various pieces of information about kernel activity are available in the
|
||||
@@ -1536,7 +1458,7 @@ softirqs serviced; each subsequent column is the total for that particular
|
||||
softirq.
|
||||
|
||||
|
||||
1.9 Ext4 file system parameters
|
||||
1.8 Ext4 file system parameters
|
||||
-------------------------------
|
||||
|
||||
Information about mounted ext4 file systems can be found in
|
||||
@@ -1552,7 +1474,7 @@ in Table 1-12, below.
|
||||
mb_groups details of multiblock allocator buddy cache of free blocks
|
||||
============== ==========================================================
|
||||
|
||||
1.10 /proc/consoles
|
||||
1.9 /proc/consoles
|
||||
-------------------
|
||||
Shows registered system console lines.
|
||||
|
||||
|
||||
@@ -718,6 +718,9 @@ CDROMPLAYBLK
|
||||
|
||||
|
||||
CDROMGETSPINDOWN
|
||||
Obsolete, was ide-cd only
|
||||
|
||||
|
||||
usage::
|
||||
|
||||
char spindown;
|
||||
@@ -736,6 +739,9 @@ CDROMGETSPINDOWN
|
||||
|
||||
|
||||
CDROMSETSPINDOWN
|
||||
Obsolete, was ide-cd only
|
||||
|
||||
|
||||
usage::
|
||||
|
||||
char spindown
|
||||
|
||||
@@ -483,7 +483,6 @@ static void ubd_handler(void)
|
||||
if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
|
||||
blk_queue_max_discard_sectors(io_req->req->q, 0);
|
||||
blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
|
||||
}
|
||||
blk_mq_end_request(io_req->req, io_req->error);
|
||||
kfree(io_req);
|
||||
@@ -800,10 +799,8 @@ static int ubd_open_dev(struct ubd *ubd_dev)
|
||||
}
|
||||
if (ubd_dev->no_trim == 0) {
|
||||
ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
|
||||
ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
|
||||
blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
|
||||
blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
|
||||
}
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
|
||||
return 0;
|
||||
|
||||
@@ -16,6 +16,7 @@ obj-$(CONFIG_BLK_DEV_BSG_COMMON) += bsg.o
|
||||
obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
|
||||
obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
|
||||
obj-$(CONFIG_BLK_CGROUP_RWSTAT) += blk-cgroup-rwstat.o
|
||||
obj-$(CONFIG_BLK_CGROUP_FC_APPID) += blk-cgroup-fc-appid.o
|
||||
obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
|
||||
obj-$(CONFIG_BLK_CGROUP_IOPRIO) += blk-ioprio.o
|
||||
obj-$(CONFIG_BLK_CGROUP_IOLATENCY) += blk-iolatency.o
|
||||
|
||||
@@ -65,7 +65,6 @@ int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
|
||||
s >>= bb->shift;
|
||||
target += (1<<bb->shift) - 1;
|
||||
target >>= bb->shift;
|
||||
sectors = target - s;
|
||||
}
|
||||
/* 'target' is now the first block after the bad range */
|
||||
|
||||
@@ -345,7 +344,6 @@ int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
|
||||
s += (1<<bb->shift) - 1;
|
||||
s >>= bb->shift;
|
||||
target >>= bb->shift;
|
||||
sectors = target - s;
|
||||
}
|
||||
|
||||
write_seqlock_irq(&bb->lock);
|
||||
|
||||
16
block/bdev.c
16
block/bdev.c
@@ -673,17 +673,17 @@ static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
|
||||
}
|
||||
}
|
||||
|
||||
if (!bdev->bd_openers)
|
||||
if (!atomic_read(&bdev->bd_openers))
|
||||
set_init_blocksize(bdev);
|
||||
if (test_bit(GD_NEED_PART_SCAN, &disk->state))
|
||||
bdev_disk_changed(disk, false);
|
||||
bdev->bd_openers++;
|
||||
atomic_inc(&bdev->bd_openers);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void blkdev_put_whole(struct block_device *bdev, fmode_t mode)
|
||||
{
|
||||
if (!--bdev->bd_openers)
|
||||
if (atomic_dec_and_test(&bdev->bd_openers))
|
||||
blkdev_flush_mapping(bdev);
|
||||
if (bdev->bd_disk->fops->release)
|
||||
bdev->bd_disk->fops->release(bdev->bd_disk, mode);
|
||||
@@ -694,7 +694,7 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
|
||||
struct gendisk *disk = part->bd_disk;
|
||||
int ret;
|
||||
|
||||
if (part->bd_openers)
|
||||
if (atomic_read(&part->bd_openers))
|
||||
goto done;
|
||||
|
||||
ret = blkdev_get_whole(bdev_whole(part), mode);
|
||||
@@ -708,7 +708,7 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
|
||||
disk->open_partitions++;
|
||||
set_init_blocksize(part);
|
||||
done:
|
||||
part->bd_openers++;
|
||||
atomic_inc(&part->bd_openers);
|
||||
return 0;
|
||||
|
||||
out_blkdev_put:
|
||||
@@ -720,7 +720,7 @@ static void blkdev_put_part(struct block_device *part, fmode_t mode)
|
||||
{
|
||||
struct block_device *whole = bdev_whole(part);
|
||||
|
||||
if (--part->bd_openers)
|
||||
if (!atomic_dec_and_test(&part->bd_openers))
|
||||
return;
|
||||
blkdev_flush_mapping(part);
|
||||
whole->bd_disk->open_partitions--;
|
||||
@@ -899,7 +899,7 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
|
||||
* of the world and we want to avoid long (could be several minute)
|
||||
* syncs while holding the mutex.
|
||||
*/
|
||||
if (bdev->bd_openers == 1)
|
||||
if (atomic_read(&bdev->bd_openers) == 1)
|
||||
sync_blockdev(bdev);
|
||||
|
||||
mutex_lock(&disk->open_mutex);
|
||||
@@ -1044,7 +1044,7 @@ void sync_bdevs(bool wait)
|
||||
bdev = I_BDEV(inode);
|
||||
|
||||
mutex_lock(&bdev->bd_disk->open_mutex);
|
||||
if (!bdev->bd_openers) {
|
||||
if (!atomic_read(&bdev->bd_openers)) {
|
||||
; /* skip */
|
||||
} else if (wait) {
|
||||
/*
|
||||
|
||||
@@ -557,6 +557,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd)
|
||||
*/
|
||||
bfqg->bfqd = bfqd;
|
||||
bfqg->active_entities = 0;
|
||||
bfqg->online = true;
|
||||
bfqg->rq_pos_tree = RB_ROOT;
|
||||
}
|
||||
|
||||
@@ -585,28 +586,11 @@ static void bfq_group_set_parent(struct bfq_group *bfqg,
|
||||
entity->sched_data = &parent->sched_data;
|
||||
}
|
||||
|
||||
static struct bfq_group *bfq_lookup_bfqg(struct bfq_data *bfqd,
|
||||
struct blkcg *blkcg)
|
||||
static void bfq_link_bfqg(struct bfq_data *bfqd, struct bfq_group *bfqg)
|
||||
{
|
||||
struct blkcg_gq *blkg;
|
||||
|
||||
blkg = blkg_lookup(blkcg, bfqd->queue);
|
||||
if (likely(blkg))
|
||||
return blkg_to_bfqg(blkg);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
|
||||
struct blkcg *blkcg)
|
||||
{
|
||||
struct bfq_group *bfqg, *parent;
|
||||
struct bfq_group *parent;
|
||||
struct bfq_entity *entity;
|
||||
|
||||
bfqg = bfq_lookup_bfqg(bfqd, blkcg);
|
||||
|
||||
if (unlikely(!bfqg))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Update chain of bfq_groups as we might be handling a leaf group
|
||||
* which, along with some of its relatives, has not been hooked yet
|
||||
@@ -623,8 +607,24 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
|
||||
bfq_group_set_parent(curr_bfqg, parent);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return bfqg;
|
||||
struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio)
|
||||
{
|
||||
struct blkcg_gq *blkg = bio->bi_blkg;
|
||||
struct bfq_group *bfqg;
|
||||
|
||||
while (blkg) {
|
||||
bfqg = blkg_to_bfqg(blkg);
|
||||
if (bfqg->online) {
|
||||
bio_associate_blkg_from_css(bio, &blkg->blkcg->css);
|
||||
return bfqg;
|
||||
}
|
||||
blkg = blkg->parent;
|
||||
}
|
||||
bio_associate_blkg_from_css(bio,
|
||||
&bfqg_to_blkg(bfqd->root_group)->blkcg->css);
|
||||
return bfqd->root_group;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -714,25 +714,15 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
* Move bic to blkcg, assuming that bfqd->lock is held; which makes
|
||||
* sure that the reference to cgroup is valid across the call (see
|
||||
* comments in bfq_bic_update_cgroup on this issue)
|
||||
*
|
||||
* NOTE: an alternative approach might have been to store the current
|
||||
* cgroup in bfqq and getting a reference to it, reducing the lookup
|
||||
* time here, at the price of slightly more complex code.
|
||||
*/
|
||||
static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
|
||||
struct bfq_io_cq *bic,
|
||||
struct blkcg *blkcg)
|
||||
static void *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
|
||||
struct bfq_io_cq *bic,
|
||||
struct bfq_group *bfqg)
|
||||
{
|
||||
struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
|
||||
struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
|
||||
struct bfq_group *bfqg;
|
||||
struct bfq_entity *entity;
|
||||
|
||||
bfqg = bfq_find_set_group(bfqd, blkcg);
|
||||
|
||||
if (unlikely(!bfqg))
|
||||
bfqg = bfqd->root_group;
|
||||
|
||||
if (async_bfqq) {
|
||||
entity = &async_bfqq->entity;
|
||||
|
||||
@@ -743,9 +733,39 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
|
||||
}
|
||||
|
||||
if (sync_bfqq) {
|
||||
entity = &sync_bfqq->entity;
|
||||
if (entity->sched_data != &bfqg->sched_data)
|
||||
bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
|
||||
if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) {
|
||||
/* We are the only user of this bfqq, just move it */
|
||||
if (sync_bfqq->entity.sched_data != &bfqg->sched_data)
|
||||
bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
|
||||
} else {
|
||||
struct bfq_queue *bfqq;
|
||||
|
||||
/*
|
||||
* The queue was merged to a different queue. Check
|
||||
* that the merge chain still belongs to the same
|
||||
* cgroup.
|
||||
*/
|
||||
for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq)
|
||||
if (bfqq->entity.sched_data !=
|
||||
&bfqg->sched_data)
|
||||
break;
|
||||
if (bfqq) {
|
||||
/*
|
||||
* Some queue changed cgroup so the merge is
|
||||
* not valid anymore. We cannot easily just
|
||||
* cancel the merge (by clearing new_bfqq) as
|
||||
* there may be other processes using this
|
||||
* queue and holding refs to all queues below
|
||||
* sync_bfqq->new_bfqq. Similarly if the merge
|
||||
* already happened, we need to detach from
|
||||
* bfqq now so that we cannot merge bio to a
|
||||
* request from the old cgroup.
|
||||
*/
|
||||
bfq_put_cooperator(sync_bfqq);
|
||||
bfq_release_process_ref(bfqd, sync_bfqq);
|
||||
bic_set_bfqq(bic, NULL, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return bfqg;
|
||||
@@ -754,20 +774,24 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
|
||||
void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
|
||||
{
|
||||
struct bfq_data *bfqd = bic_to_bfqd(bic);
|
||||
struct bfq_group *bfqg = NULL;
|
||||
struct bfq_group *bfqg = bfq_bio_bfqg(bfqd, bio);
|
||||
uint64_t serial_nr;
|
||||
|
||||
rcu_read_lock();
|
||||
serial_nr = __bio_blkcg(bio)->css.serial_nr;
|
||||
serial_nr = bfqg_to_blkg(bfqg)->blkcg->css.serial_nr;
|
||||
|
||||
/*
|
||||
* Check whether blkcg has changed. The condition may trigger
|
||||
* spuriously on a newly created cic but there's no harm.
|
||||
*/
|
||||
if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
|
||||
goto out;
|
||||
return;
|
||||
|
||||
bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio));
|
||||
/*
|
||||
* New cgroup for this process. Make sure it is linked to bfq internal
|
||||
* cgroup hierarchy.
|
||||
*/
|
||||
bfq_link_bfqg(bfqd, bfqg);
|
||||
__bfq_bic_change_cgroup(bfqd, bic, bfqg);
|
||||
/*
|
||||
* Update blkg_path for bfq_log_* functions. We cache this
|
||||
* path, and update it here, for the following
|
||||
@@ -820,8 +844,6 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
|
||||
*/
|
||||
blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path));
|
||||
bic->blkcg_serial_nr = serial_nr;
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -949,6 +971,7 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
|
||||
|
||||
put_async_queues:
|
||||
bfq_put_async_queues(bfqd, bfqg);
|
||||
bfqg->online = false;
|
||||
|
||||
spin_unlock_irqrestore(&bfqd->lock, flags);
|
||||
/*
|
||||
@@ -1438,7 +1461,7 @@ void bfq_end_wr_async(struct bfq_data *bfqd)
|
||||
bfq_end_wr_async_queues(bfqd, bfqd->root_group);
|
||||
}
|
||||
|
||||
struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, struct blkcg *blkcg)
|
||||
struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio)
|
||||
{
|
||||
return bfqd->root_group;
|
||||
}
|
||||
|
||||
@@ -374,7 +374,7 @@ static const unsigned long bfq_activation_stable_merging = 600;
|
||||
*/
|
||||
static const unsigned long bfq_late_stable_merging = 600;
|
||||
|
||||
#define RQ_BIC(rq) icq_to_bic((rq)->elv.priv[0])
|
||||
#define RQ_BIC(rq) ((struct bfq_io_cq *)((rq)->elv.priv[0]))
|
||||
#define RQ_BFQQ(rq) ((rq)->elv.priv[1])
|
||||
|
||||
struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync)
|
||||
@@ -456,6 +456,8 @@ static struct bfq_io_cq *bfq_bic_lookup(struct request_queue *q)
|
||||
*/
|
||||
void bfq_schedule_dispatch(struct bfq_data *bfqd)
|
||||
{
|
||||
lockdep_assert_held(&bfqd->lock);
|
||||
|
||||
if (bfqd->queued != 0) {
|
||||
bfq_log(bfqd, "schedule dispatch");
|
||||
blk_mq_run_hw_queues(bfqd->queue, true);
|
||||
@@ -2133,9 +2135,7 @@ static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
if (!bfqd->last_completed_rq_bfqq ||
|
||||
bfqd->last_completed_rq_bfqq == bfqq ||
|
||||
bfq_bfqq_has_short_ttime(bfqq) ||
|
||||
bfqq->dispatched > 0 ||
|
||||
now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC ||
|
||||
bfqd->last_completed_rq_bfqq == bfqq->waker_bfqq)
|
||||
now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC)
|
||||
return;
|
||||
|
||||
/*
|
||||
@@ -2208,9 +2208,13 @@ static void bfq_add_request(struct request *rq)
|
||||
|
||||
bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq));
|
||||
bfqq->queued[rq_is_sync(rq)]++;
|
||||
bfqd->queued++;
|
||||
/*
|
||||
* Updating of 'bfqd->queued' is protected by 'bfqd->lock', however, it
|
||||
* may be read without holding the lock in bfq_has_work().
|
||||
*/
|
||||
WRITE_ONCE(bfqd->queued, bfqd->queued + 1);
|
||||
|
||||
if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_sync(bfqq)) {
|
||||
if (bfq_bfqq_sync(bfqq) && RQ_BIC(rq)->requests <= 1) {
|
||||
bfq_check_waker(bfqd, bfqq, now_ns);
|
||||
|
||||
/*
|
||||
@@ -2400,7 +2404,11 @@ static void bfq_remove_request(struct request_queue *q,
|
||||
if (rq->queuelist.prev != &rq->queuelist)
|
||||
list_del_init(&rq->queuelist);
|
||||
bfqq->queued[sync]--;
|
||||
bfqd->queued--;
|
||||
/*
|
||||
* Updating of 'bfqd->queued' is protected by 'bfqd->lock', however, it
|
||||
* may be read without holding the lock in bfq_has_work().
|
||||
*/
|
||||
WRITE_ONCE(bfqd->queued, bfqd->queued - 1);
|
||||
elv_rb_del(&bfqq->sort_list, rq);
|
||||
|
||||
elv_rqhash_del(q, rq);
|
||||
@@ -2463,10 +2471,17 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio,
|
||||
|
||||
spin_lock_irq(&bfqd->lock);
|
||||
|
||||
if (bic)
|
||||
if (bic) {
|
||||
/*
|
||||
* Make sure cgroup info is uptodate for current process before
|
||||
* considering the merge.
|
||||
*/
|
||||
bfq_bic_update_cgroup(bic, bio);
|
||||
|
||||
bfqd->bio_bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf));
|
||||
else
|
||||
} else {
|
||||
bfqd->bio_bfqq = NULL;
|
||||
}
|
||||
bfqd->bio_bic = bic;
|
||||
|
||||
ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free);
|
||||
@@ -2496,8 +2511,6 @@ static int bfq_request_merge(struct request_queue *q, struct request **req,
|
||||
return ELEVATOR_NO_MERGE;
|
||||
}
|
||||
|
||||
static struct bfq_queue *bfq_init_rq(struct request *rq);
|
||||
|
||||
static void bfq_request_merged(struct request_queue *q, struct request *req,
|
||||
enum elv_merge type)
|
||||
{
|
||||
@@ -2506,7 +2519,7 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
|
||||
blk_rq_pos(req) <
|
||||
blk_rq_pos(container_of(rb_prev(&req->rb_node),
|
||||
struct request, rb_node))) {
|
||||
struct bfq_queue *bfqq = bfq_init_rq(req);
|
||||
struct bfq_queue *bfqq = RQ_BFQQ(req);
|
||||
struct bfq_data *bfqd;
|
||||
struct request *prev, *next_rq;
|
||||
|
||||
@@ -2558,8 +2571,8 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
|
||||
static void bfq_requests_merged(struct request_queue *q, struct request *rq,
|
||||
struct request *next)
|
||||
{
|
||||
struct bfq_queue *bfqq = bfq_init_rq(rq),
|
||||
*next_bfqq = bfq_init_rq(next);
|
||||
struct bfq_queue *bfqq = RQ_BFQQ(rq),
|
||||
*next_bfqq = RQ_BFQQ(next);
|
||||
|
||||
if (!bfqq)
|
||||
goto remove;
|
||||
@@ -2764,6 +2777,14 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
|
||||
if (process_refs == 0 || new_process_refs == 0)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Make sure merged queues belong to the same parent. Parents could
|
||||
* have changed since the time we decided the two queues are suitable
|
||||
* for merging.
|
||||
*/
|
||||
if (new_bfqq->entity.parent != bfqq->entity.parent)
|
||||
return NULL;
|
||||
|
||||
bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",
|
||||
new_bfqq->pid);
|
||||
|
||||
@@ -2901,9 +2922,12 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
struct bfq_queue *new_bfqq =
|
||||
bfq_setup_merge(bfqq, stable_merge_bfqq);
|
||||
|
||||
bic->stably_merged = true;
|
||||
if (new_bfqq && new_bfqq->bic)
|
||||
new_bfqq->bic->stably_merged = true;
|
||||
if (new_bfqq) {
|
||||
bic->stably_merged = true;
|
||||
if (new_bfqq->bic)
|
||||
new_bfqq->bic->stably_merged =
|
||||
true;
|
||||
}
|
||||
return new_bfqq;
|
||||
} else
|
||||
return NULL;
|
||||
@@ -5045,11 +5069,11 @@ static bool bfq_has_work(struct blk_mq_hw_ctx *hctx)
|
||||
struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
|
||||
|
||||
/*
|
||||
* Avoiding lock: a race on bfqd->busy_queues should cause at
|
||||
* Avoiding lock: a race on bfqd->queued should cause at
|
||||
* most a call to dispatch for nothing
|
||||
*/
|
||||
return !list_empty_careful(&bfqd->dispatch) ||
|
||||
bfq_tot_busy_queues(bfqd) > 0;
|
||||
READ_ONCE(bfqd->queued);
|
||||
}
|
||||
|
||||
static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
|
||||
@@ -5310,7 +5334,7 @@ static void bfq_put_stable_ref(struct bfq_queue *bfqq)
|
||||
bfq_put_queue(bfqq);
|
||||
}
|
||||
|
||||
static void bfq_put_cooperator(struct bfq_queue *bfqq)
|
||||
void bfq_put_cooperator(struct bfq_queue *bfqq)
|
||||
{
|
||||
struct bfq_queue *__bfqq, *next;
|
||||
|
||||
@@ -5716,14 +5740,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
|
||||
struct bfq_queue *bfqq;
|
||||
struct bfq_group *bfqg;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio));
|
||||
if (!bfqg) {
|
||||
bfqq = &bfqd->oom_bfqq;
|
||||
goto out;
|
||||
}
|
||||
|
||||
bfqg = bfq_bio_bfqg(bfqd, bio);
|
||||
if (!is_sync) {
|
||||
async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class,
|
||||
ioprio);
|
||||
@@ -5769,8 +5786,6 @@ out:
|
||||
|
||||
if (bfqq != &bfqd->oom_bfqq && is_sync && !respawn)
|
||||
bfqq = bfq_do_or_sched_stable_merge(bfqd, bfqq, bic);
|
||||
|
||||
rcu_read_unlock();
|
||||
return bfqq;
|
||||
}
|
||||
|
||||
@@ -6117,6 +6132,8 @@ static inline void bfq_update_insert_stats(struct request_queue *q,
|
||||
unsigned int cmd_flags) {}
|
||||
#endif /* CONFIG_BFQ_CGROUP_DEBUG */
|
||||
|
||||
static struct bfq_queue *bfq_init_rq(struct request *rq);
|
||||
|
||||
static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
bool at_head)
|
||||
{
|
||||
@@ -6132,18 +6149,15 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
bfqg_stats_update_legacy_io(q, rq);
|
||||
#endif
|
||||
spin_lock_irq(&bfqd->lock);
|
||||
bfqq = bfq_init_rq(rq);
|
||||
if (blk_mq_sched_try_insert_merge(q, rq, &free)) {
|
||||
spin_unlock_irq(&bfqd->lock);
|
||||
blk_mq_free_requests(&free);
|
||||
return;
|
||||
}
|
||||
|
||||
spin_unlock_irq(&bfqd->lock);
|
||||
|
||||
trace_block_rq_insert(rq);
|
||||
|
||||
spin_lock_irq(&bfqd->lock);
|
||||
bfqq = bfq_init_rq(rq);
|
||||
if (!bfqq || at_head) {
|
||||
if (at_head)
|
||||
list_add(&rq->queuelist, &bfqd->dispatch);
|
||||
@@ -6360,12 +6374,6 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
|
||||
bfq_schedule_dispatch(bfqd);
|
||||
}
|
||||
|
||||
static void bfq_finish_requeue_request_body(struct bfq_queue *bfqq)
|
||||
{
|
||||
bfqq_request_freed(bfqq);
|
||||
bfq_put_queue(bfqq);
|
||||
}
|
||||
|
||||
/*
|
||||
* The processes associated with bfqq may happen to generate their
|
||||
* cumulative I/O at a lower rate than the rate at which the device
|
||||
@@ -6562,7 +6570,9 @@ static void bfq_finish_requeue_request(struct request *rq)
|
||||
|
||||
bfq_completed_request(bfqq, bfqd);
|
||||
}
|
||||
bfq_finish_requeue_request_body(bfqq);
|
||||
bfqq_request_freed(bfqq);
|
||||
bfq_put_queue(bfqq);
|
||||
RQ_BIC(rq)->requests--;
|
||||
spin_unlock_irqrestore(&bfqd->lock, flags);
|
||||
|
||||
/*
|
||||
@@ -6796,6 +6806,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
|
||||
|
||||
bfqq_request_allocated(bfqq);
|
||||
bfqq->ref++;
|
||||
bic->requests++;
|
||||
bfq_log_bfqq(bfqd, bfqq, "get_request %p: bfqq %p, %d",
|
||||
rq, bfqq, bfqq->ref);
|
||||
|
||||
@@ -6892,8 +6903,8 @@ bfq_idle_slice_timer_body(struct bfq_data *bfqd, struct bfq_queue *bfqq)
|
||||
bfq_bfqq_expire(bfqd, bfqq, true, reason);
|
||||
|
||||
schedule_dispatch:
|
||||
spin_unlock_irqrestore(&bfqd->lock, flags);
|
||||
bfq_schedule_dispatch(bfqd);
|
||||
spin_unlock_irqrestore(&bfqd->lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -468,6 +468,7 @@ struct bfq_io_cq {
|
||||
struct bfq_queue *stable_merge_bfqq;
|
||||
|
||||
bool stably_merged; /* non splittable if true */
|
||||
unsigned int requests; /* Number of requests this process has in flight */
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -928,6 +929,8 @@ struct bfq_group {
|
||||
|
||||
/* reference counter (see comments in bfq_bic_update_cgroup) */
|
||||
int ref;
|
||||
/* Is bfq_group still online? */
|
||||
bool online;
|
||||
|
||||
struct bfq_entity entity;
|
||||
struct bfq_sched_data sched_data;
|
||||
@@ -979,6 +982,7 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd,
|
||||
void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
bool compensate, enum bfqq_expiration reason);
|
||||
void bfq_put_queue(struct bfq_queue *bfqq);
|
||||
void bfq_put_cooperator(struct bfq_queue *bfqq);
|
||||
void bfq_end_wr_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
|
||||
void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq);
|
||||
void bfq_schedule_dispatch(struct bfq_data *bfqd);
|
||||
@@ -1006,8 +1010,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg);
|
||||
void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio);
|
||||
void bfq_end_wr_async(struct bfq_data *bfqd);
|
||||
struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
|
||||
struct blkcg *blkcg);
|
||||
struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio);
|
||||
struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
|
||||
struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
|
||||
struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node);
|
||||
@@ -1100,13 +1103,13 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
|
||||
break; \
|
||||
bfq_bfqq_name((bfqq), pid_str, MAX_BFQQ_NAME_LENGTH); \
|
||||
blk_add_cgroup_trace_msg((bfqd)->queue, \
|
||||
bfqg_to_blkg(bfqq_group(bfqq))->blkcg, \
|
||||
&bfqg_to_blkg(bfqq_group(bfqq))->blkcg->css, \
|
||||
"%s " fmt, pid_str, ##args); \
|
||||
} while (0)
|
||||
|
||||
#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \
|
||||
blk_add_cgroup_trace_msg((bfqd)->queue, \
|
||||
bfqg_to_blkg(bfqg)->blkcg, fmt, ##args); \
|
||||
&bfqg_to_blkg(bfqg)->blkcg->css, fmt, ##args); \
|
||||
} while (0)
|
||||
|
||||
#else /* CONFIG_BFQ_GROUP_IOSCHED */
|
||||
|
||||
146
block/bio.c
146
block/bio.c
@@ -224,24 +224,13 @@ EXPORT_SYMBOL(bio_uninit);
|
||||
static void bio_free(struct bio *bio)
|
||||
{
|
||||
struct bio_set *bs = bio->bi_pool;
|
||||
void *p;
|
||||
void *p = bio;
|
||||
|
||||
WARN_ON_ONCE(!bs);
|
||||
|
||||
bio_uninit(bio);
|
||||
|
||||
if (bs) {
|
||||
bvec_free(&bs->bvec_pool, bio->bi_io_vec, bio->bi_max_vecs);
|
||||
|
||||
/*
|
||||
* If we have front padding, adjust the bio pointer before freeing
|
||||
*/
|
||||
p = bio;
|
||||
p -= bs->front_pad;
|
||||
|
||||
mempool_free(p, &bs->bio_pool);
|
||||
} else {
|
||||
/* Bio was allocated by bio_kmalloc() */
|
||||
kfree(bio);
|
||||
}
|
||||
bvec_free(&bs->bvec_pool, bio->bi_io_vec, bio->bi_max_vecs);
|
||||
mempool_free(p - bs->front_pad, &bs->bio_pool);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -422,6 +411,28 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
|
||||
queue_work(bs->rescue_workqueue, &bs->rescue_work);
|
||||
}
|
||||
|
||||
static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
|
||||
unsigned short nr_vecs, unsigned int opf, gfp_t gfp,
|
||||
struct bio_set *bs)
|
||||
{
|
||||
struct bio_alloc_cache *cache;
|
||||
struct bio *bio;
|
||||
|
||||
cache = per_cpu_ptr(bs->cache, get_cpu());
|
||||
if (!cache->free_list) {
|
||||
put_cpu();
|
||||
return NULL;
|
||||
}
|
||||
bio = cache->free_list;
|
||||
cache->free_list = bio->bi_next;
|
||||
cache->nr--;
|
||||
put_cpu();
|
||||
|
||||
bio_init(bio, bdev, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs, opf);
|
||||
bio->bi_pool = bs;
|
||||
return bio;
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_alloc_bioset - allocate a bio for I/O
|
||||
* @bdev: block device to allocate the bio for (can be %NULL)
|
||||
@@ -454,6 +465,9 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
|
||||
* submit_bio_noacct() should be avoided - instead, use bio_set's front_pad
|
||||
* for per bio allocations.
|
||||
*
|
||||
* If REQ_ALLOC_CACHE is set, the final put of the bio MUST be done from process
|
||||
* context, not hard/soft IRQ.
|
||||
*
|
||||
* Returns: Pointer to new bio on success, NULL on failure.
|
||||
*/
|
||||
struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
|
||||
@@ -468,6 +482,21 @@ struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
|
||||
if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_vecs > 0))
|
||||
return NULL;
|
||||
|
||||
if (opf & REQ_ALLOC_CACHE) {
|
||||
if (bs->cache && nr_vecs <= BIO_INLINE_VECS) {
|
||||
bio = bio_alloc_percpu_cache(bdev, nr_vecs, opf,
|
||||
gfp_mask, bs);
|
||||
if (bio)
|
||||
return bio;
|
||||
/*
|
||||
* No cached bio available, bio returned below marked with
|
||||
* REQ_ALLOC_CACHE to particpate in per-cpu alloc cache.
|
||||
*/
|
||||
} else {
|
||||
opf &= ~REQ_ALLOC_CACHE;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* submit_bio_noacct() converts recursion to iteration; this means if
|
||||
* we're running beneath it, any bios we allocate and submit will not be
|
||||
@@ -531,28 +560,28 @@ err_free:
|
||||
EXPORT_SYMBOL(bio_alloc_bioset);
|
||||
|
||||
/**
|
||||
* bio_kmalloc - kmalloc a bio for I/O
|
||||
* bio_kmalloc - kmalloc a bio
|
||||
* @nr_vecs: number of bio_vecs to allocate
|
||||
* @gfp_mask: the GFP_* mask given to the slab allocator
|
||||
* @nr_iovecs: number of iovecs to pre-allocate
|
||||
*
|
||||
* Use kmalloc to allocate and initialize a bio.
|
||||
* Use kmalloc to allocate a bio (including bvecs). The bio must be initialized
|
||||
* using bio_init() before use. To free a bio returned from this function use
|
||||
* kfree() after calling bio_uninit(). A bio returned from this function can
|
||||
* be reused by calling bio_uninit() before calling bio_init() again.
|
||||
*
|
||||
* Note that unlike bio_alloc() or bio_alloc_bioset() allocations from this
|
||||
* function are not backed by a mempool can can fail. Do not use this function
|
||||
* for allocations in the file system I/O path.
|
||||
*
|
||||
* Returns: Pointer to new bio on success, NULL on failure.
|
||||
*/
|
||||
struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs)
|
||||
struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask)
|
||||
{
|
||||
struct bio *bio;
|
||||
|
||||
if (nr_iovecs > UIO_MAXIOV)
|
||||
if (nr_vecs > UIO_MAXIOV)
|
||||
return NULL;
|
||||
|
||||
bio = kmalloc(struct_size(bio, bi_inline_vecs, nr_iovecs), gfp_mask);
|
||||
if (unlikely(!bio))
|
||||
return NULL;
|
||||
bio_init(bio, NULL, nr_iovecs ? bio->bi_inline_vecs : NULL, nr_iovecs,
|
||||
0);
|
||||
bio->bi_pool = NULL;
|
||||
return bio;
|
||||
return kmalloc(struct_size(bio, bi_inline_vecs, nr_vecs), gfp_mask);
|
||||
}
|
||||
EXPORT_SYMBOL(bio_kmalloc);
|
||||
|
||||
@@ -714,7 +743,7 @@ void bio_put(struct bio *bio)
|
||||
return;
|
||||
}
|
||||
|
||||
if (bio_flagged(bio, BIO_PERCPU_CACHE)) {
|
||||
if (bio->bi_opf & REQ_ALLOC_CACHE) {
|
||||
struct bio_alloc_cache *cache;
|
||||
|
||||
bio_uninit(bio);
|
||||
@@ -735,14 +764,15 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
|
||||
bio_set_flag(bio, BIO_CLONED);
|
||||
if (bio_flagged(bio_src, BIO_THROTTLED))
|
||||
bio_set_flag(bio, BIO_THROTTLED);
|
||||
if (bio->bi_bdev == bio_src->bi_bdev &&
|
||||
bio_flagged(bio_src, BIO_REMAPPED))
|
||||
bio_set_flag(bio, BIO_REMAPPED);
|
||||
bio->bi_ioprio = bio_src->bi_ioprio;
|
||||
bio->bi_iter = bio_src->bi_iter;
|
||||
|
||||
bio_clone_blkg_association(bio, bio_src);
|
||||
blkcg_bio_issue_init(bio);
|
||||
if (bio->bi_bdev) {
|
||||
if (bio->bi_bdev == bio_src->bi_bdev &&
|
||||
bio_flagged(bio_src, BIO_REMAPPED))
|
||||
bio_set_flag(bio, BIO_REMAPPED);
|
||||
bio_clone_blkg_association(bio, bio_src);
|
||||
}
|
||||
|
||||
if (bio_crypt_clone(bio, bio_src, gfp) < 0)
|
||||
return -ENOMEM;
|
||||
@@ -1730,55 +1760,13 @@ int bioset_init_from_src(struct bio_set *bs, struct bio_set *src)
|
||||
flags |= BIOSET_NEED_BVECS;
|
||||
if (src->rescue_workqueue)
|
||||
flags |= BIOSET_NEED_RESCUER;
|
||||
if (src->cache)
|
||||
flags |= BIOSET_PERCPU_CACHE;
|
||||
|
||||
return bioset_init(bs, src->bio_pool.min_nr, src->front_pad, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(bioset_init_from_src);
|
||||
|
||||
/**
|
||||
* bio_alloc_kiocb - Allocate a bio from bio_set based on kiocb
|
||||
* @kiocb: kiocb describing the IO
|
||||
* @bdev: block device to allocate the bio for (can be %NULL)
|
||||
* @nr_vecs: number of iovecs to pre-allocate
|
||||
* @opf: operation and flags for bio
|
||||
* @bs: bio_set to allocate from
|
||||
*
|
||||
* Description:
|
||||
* Like @bio_alloc_bioset, but pass in the kiocb. The kiocb is only
|
||||
* used to check if we should dip into the per-cpu bio_set allocation
|
||||
* cache. The allocation uses GFP_KERNEL internally. On return, the
|
||||
* bio is marked BIO_PERCPU_CACHEABLE, and the final put of the bio
|
||||
* MUST be done from process context, not hard/soft IRQ.
|
||||
*
|
||||
*/
|
||||
struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev,
|
||||
unsigned short nr_vecs, unsigned int opf, struct bio_set *bs)
|
||||
{
|
||||
struct bio_alloc_cache *cache;
|
||||
struct bio *bio;
|
||||
|
||||
if (!(kiocb->ki_flags & IOCB_ALLOC_CACHE) || nr_vecs > BIO_INLINE_VECS)
|
||||
return bio_alloc_bioset(bdev, nr_vecs, opf, GFP_KERNEL, bs);
|
||||
|
||||
cache = per_cpu_ptr(bs->cache, get_cpu());
|
||||
if (cache->free_list) {
|
||||
bio = cache->free_list;
|
||||
cache->free_list = bio->bi_next;
|
||||
cache->nr--;
|
||||
put_cpu();
|
||||
bio_init(bio, bdev, nr_vecs ? bio->bi_inline_vecs : NULL,
|
||||
nr_vecs, opf);
|
||||
bio->bi_pool = bs;
|
||||
bio_set_flag(bio, BIO_PERCPU_CACHE);
|
||||
return bio;
|
||||
}
|
||||
put_cpu();
|
||||
bio = bio_alloc_bioset(bdev, nr_vecs, opf, GFP_KERNEL, bs);
|
||||
bio_set_flag(bio, BIO_PERCPU_CACHE);
|
||||
return bio;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_alloc_kiocb);
|
||||
|
||||
static int __init init_bio(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
57
block/blk-cgroup-fc-appid.c
Normal file
57
block/blk-cgroup-fc-appid.c
Normal file
@@ -0,0 +1,57 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "blk-cgroup.h"
|
||||
|
||||
/**
|
||||
* blkcg_set_fc_appid - set the fc_app_id field associted to blkcg
|
||||
* @app_id: application identifier
|
||||
* @cgrp_id: cgroup id
|
||||
* @app_id_len: size of application identifier
|
||||
*/
|
||||
int blkcg_set_fc_appid(char *app_id, u64 cgrp_id, size_t app_id_len)
|
||||
{
|
||||
struct cgroup *cgrp;
|
||||
struct cgroup_subsys_state *css;
|
||||
struct blkcg *blkcg;
|
||||
int ret = 0;
|
||||
|
||||
if (app_id_len > FC_APPID_LEN)
|
||||
return -EINVAL;
|
||||
|
||||
cgrp = cgroup_get_from_id(cgrp_id);
|
||||
if (!cgrp)
|
||||
return -ENOENT;
|
||||
css = cgroup_get_e_css(cgrp, &io_cgrp_subsys);
|
||||
if (!css) {
|
||||
ret = -ENOENT;
|
||||
goto out_cgrp_put;
|
||||
}
|
||||
blkcg = css_to_blkcg(css);
|
||||
/*
|
||||
* There is a slight race condition on setting the appid.
|
||||
* Worst case an I/O may not find the right id.
|
||||
* This is no different from the I/O we let pass while obtaining
|
||||
* the vmid from the fabric.
|
||||
* Adding the overhead of a lock is not necessary.
|
||||
*/
|
||||
strlcpy(blkcg->fc_app_id, app_id, app_id_len);
|
||||
css_put(css);
|
||||
out_cgrp_put:
|
||||
cgroup_put(cgrp);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkcg_set_fc_appid);
|
||||
|
||||
/**
|
||||
* blkcg_get_fc_appid - get the fc app identifier associated with a bio
|
||||
* @bio: target bio
|
||||
*
|
||||
* On success return the fc_app_id, on failure return NULL
|
||||
*/
|
||||
char *blkcg_get_fc_appid(struct bio *bio)
|
||||
{
|
||||
if (!bio->bi_blkg || bio->bi_blkg->blkcg->fc_app_id[0] == '\0')
|
||||
return NULL;
|
||||
return bio->bi_blkg->blkcg->fc_app_id;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkcg_get_fc_appid);
|
||||
@@ -59,6 +59,23 @@ static struct workqueue_struct *blkcg_punt_bio_wq;
|
||||
|
||||
#define BLKG_DESTROY_BATCH_SIZE 64
|
||||
|
||||
/**
|
||||
* blkcg_css - find the current css
|
||||
*
|
||||
* Find the css associated with either the kthread or the current task.
|
||||
* This may return a dying css, so it is up to the caller to use tryget logic
|
||||
* to confirm it is alive and well.
|
||||
*/
|
||||
static struct cgroup_subsys_state *blkcg_css(void)
|
||||
{
|
||||
struct cgroup_subsys_state *css;
|
||||
|
||||
css = kthread_blkcg();
|
||||
if (css)
|
||||
return css;
|
||||
return task_css(current, io_cgrp_id);
|
||||
}
|
||||
|
||||
static bool blkcg_policy_enabled(struct request_queue *q,
|
||||
const struct blkcg_policy *pol)
|
||||
{
|
||||
@@ -155,6 +172,33 @@ static void blkg_async_bio_workfn(struct work_struct *work)
|
||||
blk_finish_plug(&plug);
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_blkcg_css - return the blkcg CSS associated with a bio
|
||||
* @bio: target bio
|
||||
*
|
||||
* This returns the CSS for the blkcg associated with a bio, or %NULL if not
|
||||
* associated. Callers are expected to either handle %NULL or know association
|
||||
* has been done prior to calling this.
|
||||
*/
|
||||
struct cgroup_subsys_state *bio_blkcg_css(struct bio *bio)
|
||||
{
|
||||
if (!bio || !bio->bi_blkg)
|
||||
return NULL;
|
||||
return &bio->bi_blkg->blkcg->css;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_blkcg_css);
|
||||
|
||||
/**
|
||||
* blkcg_parent - get the parent of a blkcg
|
||||
* @blkcg: blkcg of interest
|
||||
*
|
||||
* Return the parent blkcg of @blkcg. Can be called anytime.
|
||||
*/
|
||||
static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
|
||||
{
|
||||
return css_to_blkcg(blkcg->css.parent);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_alloc - allocate a blkg
|
||||
* @blkcg: block cgroup the new blkg is associated with
|
||||
@@ -254,7 +298,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
|
||||
struct blkcg_gq *blkg;
|
||||
int i, ret;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
lockdep_assert_held(&q->queue_lock);
|
||||
|
||||
/* request_queue is dying, do not create/recreate a blkg */
|
||||
@@ -905,7 +948,6 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
|
||||
{
|
||||
struct blkg_iostat_set *bis = &blkg->iostat;
|
||||
u64 rbytes, wbytes, rios, wios, dbytes, dios;
|
||||
bool has_stats = false;
|
||||
const char *dname;
|
||||
unsigned seq;
|
||||
int i;
|
||||
@@ -931,14 +973,12 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
|
||||
} while (u64_stats_fetch_retry(&bis->sync, seq));
|
||||
|
||||
if (rbytes || wbytes || rios || wios) {
|
||||
has_stats = true;
|
||||
seq_printf(s, "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu",
|
||||
rbytes, wbytes, rios, wios,
|
||||
dbytes, dios);
|
||||
}
|
||||
|
||||
if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) {
|
||||
has_stats = true;
|
||||
seq_printf(s, " use_delay=%d delay_nsec=%llu",
|
||||
atomic_read(&blkg->use_delay),
|
||||
atomic64_read(&blkg->delay_nsec));
|
||||
@@ -950,12 +990,10 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
|
||||
if (!blkg->pd[i] || !pol->pd_stat_fn)
|
||||
continue;
|
||||
|
||||
if (pol->pd_stat_fn(blkg->pd[i], s))
|
||||
has_stats = true;
|
||||
pol->pd_stat_fn(blkg->pd[i], s);
|
||||
}
|
||||
|
||||
if (has_stats)
|
||||
seq_printf(s, "\n");
|
||||
seq_puts(s, "\n");
|
||||
}
|
||||
|
||||
static int blkcg_print_stat(struct seq_file *sf, void *v)
|
||||
@@ -994,6 +1032,13 @@ static struct cftype blkcg_legacy_files[] = {
|
||||
{ } /* terminate */
|
||||
};
|
||||
|
||||
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||
struct list_head *blkcg_get_cgwb_list(struct cgroup_subsys_state *css)
|
||||
{
|
||||
return &css_to_blkcg(css)->cgwb_list;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* blkcg destruction is a three-stage process.
|
||||
*
|
||||
@@ -1015,25 +1060,6 @@ static struct cftype blkcg_legacy_files[] = {
|
||||
* This finally frees the blkcg.
|
||||
*/
|
||||
|
||||
/**
|
||||
* blkcg_css_offline - cgroup css_offline callback
|
||||
* @css: css of interest
|
||||
*
|
||||
* This function is called when @css is about to go away. Here the cgwbs are
|
||||
* offlined first and only once writeback associated with the blkcg has
|
||||
* finished do we start step 2 (see above).
|
||||
*/
|
||||
static void blkcg_css_offline(struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct blkcg *blkcg = css_to_blkcg(css);
|
||||
|
||||
/* this prevents anyone from attaching or migrating to this blkcg */
|
||||
wb_blkcg_offline(blkcg);
|
||||
|
||||
/* put the base online pin allowing step 2 to be triggered */
|
||||
blkcg_unpin_online(blkcg);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkcg_destroy_blkgs - responsible for shooting down blkgs
|
||||
* @blkcg: blkcg of interest
|
||||
@@ -1045,7 +1071,7 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css)
|
||||
*
|
||||
* This is the blkcg counterpart of ioc_release_fn().
|
||||
*/
|
||||
void blkcg_destroy_blkgs(struct blkcg *blkcg)
|
||||
static void blkcg_destroy_blkgs(struct blkcg *blkcg)
|
||||
{
|
||||
might_sleep();
|
||||
|
||||
@@ -1075,6 +1101,57 @@ void blkcg_destroy_blkgs(struct blkcg *blkcg)
|
||||
spin_unlock_irq(&blkcg->lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkcg_pin_online - pin online state
|
||||
* @blkcg_css: blkcg of interest
|
||||
*
|
||||
* While pinned, a blkcg is kept online. This is primarily used to
|
||||
* impedance-match blkg and cgwb lifetimes so that blkg doesn't go offline
|
||||
* while an associated cgwb is still active.
|
||||
*/
|
||||
void blkcg_pin_online(struct cgroup_subsys_state *blkcg_css)
|
||||
{
|
||||
refcount_inc(&css_to_blkcg(blkcg_css)->online_pin);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkcg_unpin_online - unpin online state
|
||||
* @blkcg_css: blkcg of interest
|
||||
*
|
||||
* This is primarily used to impedance-match blkg and cgwb lifetimes so
|
||||
* that blkg doesn't go offline while an associated cgwb is still active.
|
||||
* When this count goes to zero, all active cgwbs have finished so the
|
||||
* blkcg can continue destruction by calling blkcg_destroy_blkgs().
|
||||
*/
|
||||
void blkcg_unpin_online(struct cgroup_subsys_state *blkcg_css)
|
||||
{
|
||||
struct blkcg *blkcg = css_to_blkcg(blkcg_css);
|
||||
|
||||
do {
|
||||
if (!refcount_dec_and_test(&blkcg->online_pin))
|
||||
break;
|
||||
blkcg_destroy_blkgs(blkcg);
|
||||
blkcg = blkcg_parent(blkcg);
|
||||
} while (blkcg);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkcg_css_offline - cgroup css_offline callback
|
||||
* @css: css of interest
|
||||
*
|
||||
* This function is called when @css is about to go away. Here the cgwbs are
|
||||
* offlined first and only once writeback associated with the blkcg has
|
||||
* finished do we start step 2 (see above).
|
||||
*/
|
||||
static void blkcg_css_offline(struct cgroup_subsys_state *css)
|
||||
{
|
||||
/* this prevents anyone from attaching or migrating to this blkcg */
|
||||
wb_blkcg_offline(css);
|
||||
|
||||
/* put the base online pin allowing step 2 to be triggered */
|
||||
blkcg_unpin_online(css);
|
||||
}
|
||||
|
||||
static void blkcg_css_free(struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct blkcg *blkcg = css_to_blkcg(css);
|
||||
@@ -1163,8 +1240,7 @@ unlock:
|
||||
|
||||
static int blkcg_css_online(struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct blkcg *blkcg = css_to_blkcg(css);
|
||||
struct blkcg *parent = blkcg_parent(blkcg);
|
||||
struct blkcg *parent = blkcg_parent(css_to_blkcg(css));
|
||||
|
||||
/*
|
||||
* blkcg_pin_online() is used to delay blkcg offline so that blkgs
|
||||
@@ -1172,7 +1248,7 @@ static int blkcg_css_online(struct cgroup_subsys_state *css)
|
||||
* parent so that offline always happens towards the root.
|
||||
*/
|
||||
if (parent)
|
||||
blkcg_pin_online(parent);
|
||||
blkcg_pin_online(css);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1201,14 +1277,13 @@ int blkcg_init_queue(struct request_queue *q)
|
||||
preloaded = !radix_tree_preload(GFP_KERNEL);
|
||||
|
||||
/* Make sure the root blkg exists. */
|
||||
rcu_read_lock();
|
||||
/* spin_lock_irq can serve as RCU read-side critical section. */
|
||||
spin_lock_irq(&q->queue_lock);
|
||||
blkg = blkg_create(&blkcg_root, q, new_blkg);
|
||||
if (IS_ERR(blkg))
|
||||
goto err_unlock;
|
||||
q->root_blkg = blkg;
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (preloaded)
|
||||
radix_tree_preload_end();
|
||||
@@ -1234,7 +1309,6 @@ err_destroy_all:
|
||||
return ret;
|
||||
err_unlock:
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
rcu_read_unlock();
|
||||
if (preloaded)
|
||||
radix_tree_preload_end();
|
||||
return PTR_ERR(blkg);
|
||||
@@ -1726,7 +1800,6 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
|
||||
void blkcg_maybe_throttle_current(void)
|
||||
{
|
||||
struct request_queue *q = current->throttle_queue;
|
||||
struct cgroup_subsys_state *css;
|
||||
struct blkcg *blkcg;
|
||||
struct blkcg_gq *blkg;
|
||||
bool use_memdelay = current->use_memdelay;
|
||||
@@ -1738,12 +1811,7 @@ void blkcg_maybe_throttle_current(void)
|
||||
current->use_memdelay = false;
|
||||
|
||||
rcu_read_lock();
|
||||
css = kthread_blkcg();
|
||||
if (css)
|
||||
blkcg = css_to_blkcg(css);
|
||||
else
|
||||
blkcg = css_to_blkcg(task_css(current, io_cgrp_id));
|
||||
|
||||
blkcg = css_to_blkcg(blkcg_css());
|
||||
if (!blkcg)
|
||||
goto out;
|
||||
blkg = blkg_lookup(blkcg, q);
|
||||
@@ -1889,7 +1957,7 @@ void bio_associate_blkg(struct bio *bio)
|
||||
rcu_read_lock();
|
||||
|
||||
if (bio->bi_blkg)
|
||||
css = &bio_blkcg(bio)->css;
|
||||
css = bio_blkcg_css(bio);
|
||||
else
|
||||
css = blkcg_css();
|
||||
|
||||
@@ -1950,6 +2018,22 @@ void blk_cgroup_bio_start(struct bio *bio)
|
||||
put_cpu();
|
||||
}
|
||||
|
||||
bool blk_cgroup_congested(void)
|
||||
{
|
||||
struct cgroup_subsys_state *css;
|
||||
bool ret = false;
|
||||
|
||||
rcu_read_lock();
|
||||
for (css = blkcg_css(); css; css = css->parent) {
|
||||
if (atomic_read(&css->cgroup->congestion_count)) {
|
||||
ret = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __init blkcg_init(void)
|
||||
{
|
||||
blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio",
|
||||
|
||||
@@ -15,13 +15,101 @@
|
||||
*/
|
||||
|
||||
#include <linux/blk-cgroup.h>
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/blk-mq.h>
|
||||
|
||||
struct blkcg_gq;
|
||||
struct blkg_policy_data;
|
||||
|
||||
|
||||
/* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
|
||||
#define BLKG_STAT_CPU_BATCH (INT_MAX / 2)
|
||||
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
|
||||
enum blkg_iostat_type {
|
||||
BLKG_IOSTAT_READ,
|
||||
BLKG_IOSTAT_WRITE,
|
||||
BLKG_IOSTAT_DISCARD,
|
||||
|
||||
BLKG_IOSTAT_NR,
|
||||
};
|
||||
|
||||
struct blkg_iostat {
|
||||
u64 bytes[BLKG_IOSTAT_NR];
|
||||
u64 ios[BLKG_IOSTAT_NR];
|
||||
};
|
||||
|
||||
struct blkg_iostat_set {
|
||||
struct u64_stats_sync sync;
|
||||
struct blkg_iostat cur;
|
||||
struct blkg_iostat last;
|
||||
};
|
||||
|
||||
/* association between a blk cgroup and a request queue */
|
||||
struct blkcg_gq {
|
||||
/* Pointer to the associated request_queue */
|
||||
struct request_queue *q;
|
||||
struct list_head q_node;
|
||||
struct hlist_node blkcg_node;
|
||||
struct blkcg *blkcg;
|
||||
|
||||
/* all non-root blkcg_gq's are guaranteed to have access to parent */
|
||||
struct blkcg_gq *parent;
|
||||
|
||||
/* reference count */
|
||||
struct percpu_ref refcnt;
|
||||
|
||||
/* is this blkg online? protected by both blkcg and q locks */
|
||||
bool online;
|
||||
|
||||
struct blkg_iostat_set __percpu *iostat_cpu;
|
||||
struct blkg_iostat_set iostat;
|
||||
|
||||
struct blkg_policy_data *pd[BLKCG_MAX_POLS];
|
||||
|
||||
spinlock_t async_bio_lock;
|
||||
struct bio_list async_bios;
|
||||
union {
|
||||
struct work_struct async_bio_work;
|
||||
struct work_struct free_work;
|
||||
};
|
||||
|
||||
atomic_t use_delay;
|
||||
atomic64_t delay_nsec;
|
||||
atomic64_t delay_start;
|
||||
u64 last_delay;
|
||||
int last_use;
|
||||
|
||||
struct rcu_head rcu_head;
|
||||
};
|
||||
|
||||
struct blkcg {
|
||||
struct cgroup_subsys_state css;
|
||||
spinlock_t lock;
|
||||
refcount_t online_pin;
|
||||
|
||||
struct radix_tree_root blkg_tree;
|
||||
struct blkcg_gq __rcu *blkg_hint;
|
||||
struct hlist_head blkg_list;
|
||||
|
||||
struct blkcg_policy_data *cpd[BLKCG_MAX_POLS];
|
||||
|
||||
struct list_head all_blkcgs_node;
|
||||
#ifdef CONFIG_BLK_CGROUP_FC_APPID
|
||||
char fc_app_id[FC_APPID_LEN];
|
||||
#endif
|
||||
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||
struct list_head cgwb_list;
|
||||
#endif
|
||||
};
|
||||
|
||||
static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
|
||||
{
|
||||
return css ? container_of(css, struct blkcg, css) : NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a
|
||||
* request_queue (q). This is used by blkcg policies which need to track
|
||||
@@ -63,7 +151,7 @@ typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
|
||||
typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
|
||||
typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
|
||||
typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
|
||||
typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
|
||||
typedef void (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
|
||||
struct seq_file *s);
|
||||
|
||||
struct blkcg_policy {
|
||||
@@ -122,53 +210,15 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
|
||||
char *input, struct blkg_conf_ctx *ctx);
|
||||
void blkg_conf_finish(struct blkg_conf_ctx *ctx);
|
||||
|
||||
/**
|
||||
* blkcg_css - find the current css
|
||||
*
|
||||
* Find the css associated with either the kthread or the current task.
|
||||
* This may return a dying css, so it is up to the caller to use tryget logic
|
||||
* to confirm it is alive and well.
|
||||
*/
|
||||
static inline struct cgroup_subsys_state *blkcg_css(void)
|
||||
{
|
||||
struct cgroup_subsys_state *css;
|
||||
|
||||
css = kthread_blkcg();
|
||||
if (css)
|
||||
return css;
|
||||
return task_css(current, io_cgrp_id);
|
||||
}
|
||||
|
||||
/**
|
||||
* __bio_blkcg - internal, inconsistent version to get blkcg
|
||||
*
|
||||
* DO NOT USE.
|
||||
* This function is inconsistent and consequently is dangerous to use. The
|
||||
* first part of the function returns a blkcg where a reference is owned by the
|
||||
* bio. This means it does not need to be rcu protected as it cannot go away
|
||||
* with the bio owning a reference to it. However, the latter potentially gets
|
||||
* it from task_css(). This can race against task migration and the cgroup
|
||||
* dying. It is also semantically different as it must be called rcu protected
|
||||
* and is susceptible to failure when trying to get a reference to it.
|
||||
* Therefore, it is not ok to assume that *_get() will always succeed on the
|
||||
* blkcg returned here.
|
||||
*/
|
||||
static inline struct blkcg *__bio_blkcg(struct bio *bio)
|
||||
{
|
||||
if (bio && bio->bi_blkg)
|
||||
return bio->bi_blkg->blkcg;
|
||||
return css_to_blkcg(blkcg_css());
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
|
||||
* @return: true if this bio needs to be submitted with the root blkg context.
|
||||
*
|
||||
* In order to avoid priority inversions we sometimes need to issue a bio as if
|
||||
* it were attached to the root blkg, and then backcharge to the actual owning
|
||||
* blkg. The idea is we do bio_blkcg() to look up the actual context for the
|
||||
* bio and attach the appropriate blkg to the bio. Then we call this helper and
|
||||
* if it is true run with the root blkg for that queue and then do any
|
||||
* blkg. The idea is we do bio_blkcg_css() to look up the actual context for
|
||||
* the bio and attach the appropriate blkg to the bio. Then we call this helper
|
||||
* and if it is true run with the root blkg for that queue and then do any
|
||||
* backcharging to the originating cgroup once the io is complete.
|
||||
*/
|
||||
static inline bool bio_issue_as_root_blkg(struct bio *bio)
|
||||
@@ -457,7 +507,8 @@ struct blkcg_policy_data {
|
||||
struct blkcg_policy {
|
||||
};
|
||||
|
||||
#ifdef CONFIG_BLOCK
|
||||
struct blkcg {
|
||||
};
|
||||
|
||||
static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
|
||||
static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
|
||||
@@ -471,8 +522,6 @@ static inline int blkcg_activate_policy(struct request_queue *q,
|
||||
static inline void blkcg_deactivate_policy(struct request_queue *q,
|
||||
const struct blkcg_policy *pol) { }
|
||||
|
||||
static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
|
||||
|
||||
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
|
||||
struct blkcg_policy *pol) { return NULL; }
|
||||
static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
|
||||
@@ -488,7 +537,6 @@ static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { r
|
||||
#define blk_queue_for_each_rl(rl, q) \
|
||||
for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
|
||||
|
||||
#endif /* CONFIG_BLOCK */
|
||||
#endif /* CONFIG_BLK_CGROUP */
|
||||
|
||||
#endif /* _BLK_CGROUP_PRIVATE_H */
|
||||
|
||||
@@ -588,10 +588,9 @@ static inline int bio_check_eod(struct bio *bio)
|
||||
(nr_sectors > maxsector ||
|
||||
bio->bi_iter.bi_sector > maxsector - nr_sectors)) {
|
||||
pr_info_ratelimited("%s: attempt to access beyond end of device\n"
|
||||
"%pg: rw=%d, want=%llu, limit=%llu\n",
|
||||
current->comm,
|
||||
bio->bi_bdev, bio->bi_opf,
|
||||
bio_end_sector(bio), maxsector);
|
||||
"%pg: rw=%d, sector=%llu, nr_sectors = %u limit=%llu\n",
|
||||
current->comm, bio->bi_bdev, bio->bi_opf,
|
||||
bio->bi_iter.bi_sector, nr_sectors, maxsector);
|
||||
return -EIO;
|
||||
}
|
||||
return 0;
|
||||
@@ -816,11 +815,11 @@ void submit_bio_noacct(struct bio *bio)
|
||||
|
||||
switch (bio_op(bio)) {
|
||||
case REQ_OP_DISCARD:
|
||||
if (!blk_queue_discard(q))
|
||||
if (!bdev_max_discard_sectors(bdev))
|
||||
goto not_supported;
|
||||
break;
|
||||
case REQ_OP_SECURE_ERASE:
|
||||
if (!blk_queue_secure_erase(q))
|
||||
if (!bdev_max_secure_erase_sectors(bdev))
|
||||
goto not_supported;
|
||||
break;
|
||||
case REQ_OP_ZONE_APPEND:
|
||||
@@ -889,19 +888,11 @@ void submit_bio(struct bio *bio)
|
||||
if (blkcg_punt_bio_submit(bio))
|
||||
return;
|
||||
|
||||
/*
|
||||
* If it's a regular read/write or a barrier with data attached,
|
||||
* go through the normal accounting stuff before submission.
|
||||
*/
|
||||
if (bio_has_data(bio)) {
|
||||
unsigned int count = bio_sectors(bio);
|
||||
|
||||
if (op_is_write(bio_op(bio))) {
|
||||
count_vm_events(PGPGOUT, count);
|
||||
} else {
|
||||
task_io_account_read(bio->bi_iter.bi_size);
|
||||
count_vm_events(PGPGIN, count);
|
||||
}
|
||||
if (bio_op(bio) == REQ_OP_READ) {
|
||||
task_io_account_read(bio->bi_iter.bi_size);
|
||||
count_vm_events(PGPGIN, bio_sectors(bio));
|
||||
} else if (bio_op(bio) == REQ_OP_WRITE) {
|
||||
count_vm_events(PGPGOUT, bio_sectors(bio));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1018,21 +1009,22 @@ again:
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned long __part_start_io_acct(struct block_device *part,
|
||||
unsigned int sectors, unsigned int op,
|
||||
unsigned long start_time)
|
||||
unsigned long bdev_start_io_acct(struct block_device *bdev,
|
||||
unsigned int sectors, unsigned int op,
|
||||
unsigned long start_time)
|
||||
{
|
||||
const int sgrp = op_stat_group(op);
|
||||
|
||||
part_stat_lock();
|
||||
update_io_ticks(part, start_time, false);
|
||||
part_stat_inc(part, ios[sgrp]);
|
||||
part_stat_add(part, sectors[sgrp], sectors);
|
||||
part_stat_local_inc(part, in_flight[op_is_write(op)]);
|
||||
update_io_ticks(bdev, start_time, false);
|
||||
part_stat_inc(bdev, ios[sgrp]);
|
||||
part_stat_add(bdev, sectors[sgrp], sectors);
|
||||
part_stat_local_inc(bdev, in_flight[op_is_write(op)]);
|
||||
part_stat_unlock();
|
||||
|
||||
return start_time;
|
||||
}
|
||||
EXPORT_SYMBOL(bdev_start_io_acct);
|
||||
|
||||
/**
|
||||
* bio_start_io_acct_time - start I/O accounting for bio based drivers
|
||||
@@ -1041,8 +1033,8 @@ static unsigned long __part_start_io_acct(struct block_device *part,
|
||||
*/
|
||||
void bio_start_io_acct_time(struct bio *bio, unsigned long start_time)
|
||||
{
|
||||
__part_start_io_acct(bio->bi_bdev, bio_sectors(bio),
|
||||
bio_op(bio), start_time);
|
||||
bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
|
||||
bio_op(bio), start_time);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_start_io_acct_time);
|
||||
|
||||
@@ -1054,46 +1046,33 @@ EXPORT_SYMBOL_GPL(bio_start_io_acct_time);
|
||||
*/
|
||||
unsigned long bio_start_io_acct(struct bio *bio)
|
||||
{
|
||||
return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio),
|
||||
bio_op(bio), jiffies);
|
||||
return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
|
||||
bio_op(bio), jiffies);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_start_io_acct);
|
||||
|
||||
unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
|
||||
unsigned int op)
|
||||
{
|
||||
return __part_start_io_acct(disk->part0, sectors, op, jiffies);
|
||||
}
|
||||
EXPORT_SYMBOL(disk_start_io_acct);
|
||||
|
||||
static void __part_end_io_acct(struct block_device *part, unsigned int op,
|
||||
unsigned long start_time)
|
||||
void bdev_end_io_acct(struct block_device *bdev, unsigned int op,
|
||||
unsigned long start_time)
|
||||
{
|
||||
const int sgrp = op_stat_group(op);
|
||||
unsigned long now = READ_ONCE(jiffies);
|
||||
unsigned long duration = now - start_time;
|
||||
|
||||
part_stat_lock();
|
||||
update_io_ticks(part, now, true);
|
||||
part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration));
|
||||
part_stat_local_dec(part, in_flight[op_is_write(op)]);
|
||||
update_io_ticks(bdev, now, true);
|
||||
part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration));
|
||||
part_stat_local_dec(bdev, in_flight[op_is_write(op)]);
|
||||
part_stat_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL(bdev_end_io_acct);
|
||||
|
||||
void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
|
||||
struct block_device *orig_bdev)
|
||||
struct block_device *orig_bdev)
|
||||
{
|
||||
__part_end_io_acct(orig_bdev, bio_op(bio), start_time);
|
||||
bdev_end_io_acct(orig_bdev, bio_op(bio), start_time);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped);
|
||||
|
||||
void disk_end_io_acct(struct gendisk *disk, unsigned int op,
|
||||
unsigned long start_time)
|
||||
{
|
||||
__part_end_io_acct(disk->part0, op, start_time);
|
||||
}
|
||||
EXPORT_SYMBOL(disk_end_io_acct);
|
||||
|
||||
/**
|
||||
* blk_lld_busy - Check if underlying low-level drivers of a device are busy
|
||||
* @q : the queue of the device being checked
|
||||
|
||||
@@ -152,23 +152,25 @@ static void blk_crypto_fallback_encrypt_endio(struct bio *enc_bio)
|
||||
|
||||
src_bio->bi_status = enc_bio->bi_status;
|
||||
|
||||
bio_put(enc_bio);
|
||||
bio_uninit(enc_bio);
|
||||
kfree(enc_bio);
|
||||
bio_endio(src_bio);
|
||||
}
|
||||
|
||||
static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src)
|
||||
{
|
||||
unsigned int nr_segs = bio_segments(bio_src);
|
||||
struct bvec_iter iter;
|
||||
struct bio_vec bv;
|
||||
struct bio *bio;
|
||||
|
||||
bio = bio_kmalloc(GFP_NOIO, bio_segments(bio_src));
|
||||
bio = bio_kmalloc(nr_segs, GFP_NOIO);
|
||||
if (!bio)
|
||||
return NULL;
|
||||
bio->bi_bdev = bio_src->bi_bdev;
|
||||
bio_init(bio, bio_src->bi_bdev, bio->bi_inline_vecs, nr_segs,
|
||||
bio_src->bi_opf);
|
||||
if (bio_flagged(bio_src, BIO_REMAPPED))
|
||||
bio_set_flag(bio, BIO_REMAPPED);
|
||||
bio->bi_opf = bio_src->bi_opf;
|
||||
bio->bi_ioprio = bio_src->bi_ioprio;
|
||||
bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
|
||||
bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
|
||||
@@ -177,7 +179,6 @@ static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src)
|
||||
bio->bi_io_vec[bio->bi_vcnt++] = bv;
|
||||
|
||||
bio_clone_blkg_association(bio, bio_src);
|
||||
blkcg_bio_issue_init(bio);
|
||||
|
||||
bio_clone_skip_dm_default_key(bio, bio_src);
|
||||
|
||||
@@ -365,8 +366,8 @@ out_release_keyslot:
|
||||
blk_crypto_put_keyslot(slot);
|
||||
out_put_enc_bio:
|
||||
if (enc_bio)
|
||||
bio_put(enc_bio);
|
||||
|
||||
bio_uninit(enc_bio);
|
||||
kfree(enc_bio);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@@ -533,8 +533,7 @@ struct ioc_gq {
|
||||
|
||||
/* statistics */
|
||||
struct iocg_pcpu_stat __percpu *pcpu_stat;
|
||||
struct iocg_stat local_stat;
|
||||
struct iocg_stat desc_stat;
|
||||
struct iocg_stat stat;
|
||||
struct iocg_stat last_stat;
|
||||
u64 last_stat_abs_vusage;
|
||||
u64 usage_delta_us;
|
||||
@@ -1371,7 +1370,7 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
|
||||
return true;
|
||||
} else {
|
||||
if (iocg->indelay_since) {
|
||||
iocg->local_stat.indelay_us += now->now - iocg->indelay_since;
|
||||
iocg->stat.indelay_us += now->now - iocg->indelay_since;
|
||||
iocg->indelay_since = 0;
|
||||
}
|
||||
iocg->delay = 0;
|
||||
@@ -1419,7 +1418,7 @@ static void iocg_pay_debt(struct ioc_gq *iocg, u64 abs_vpay,
|
||||
|
||||
/* if debt is paid in full, restore inuse */
|
||||
if (!iocg->abs_vdebt) {
|
||||
iocg->local_stat.indebt_us += now->now - iocg->indebt_since;
|
||||
iocg->stat.indebt_us += now->now - iocg->indebt_since;
|
||||
iocg->indebt_since = 0;
|
||||
|
||||
propagate_weights(iocg, iocg->active, iocg->last_inuse,
|
||||
@@ -1513,7 +1512,7 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, bool pay_debt,
|
||||
|
||||
if (!waitqueue_active(&iocg->waitq)) {
|
||||
if (iocg->wait_since) {
|
||||
iocg->local_stat.wait_us += now->now - iocg->wait_since;
|
||||
iocg->stat.wait_us += now->now - iocg->wait_since;
|
||||
iocg->wait_since = 0;
|
||||
}
|
||||
return;
|
||||
@@ -1641,11 +1640,30 @@ static void iocg_build_inner_walk(struct ioc_gq *iocg,
|
||||
}
|
||||
}
|
||||
|
||||
/* propagate the deltas to the parent */
|
||||
static void iocg_flush_stat_upward(struct ioc_gq *iocg)
|
||||
{
|
||||
if (iocg->level > 0) {
|
||||
struct iocg_stat *parent_stat =
|
||||
&iocg->ancestors[iocg->level - 1]->stat;
|
||||
|
||||
parent_stat->usage_us +=
|
||||
iocg->stat.usage_us - iocg->last_stat.usage_us;
|
||||
parent_stat->wait_us +=
|
||||
iocg->stat.wait_us - iocg->last_stat.wait_us;
|
||||
parent_stat->indebt_us +=
|
||||
iocg->stat.indebt_us - iocg->last_stat.indebt_us;
|
||||
parent_stat->indelay_us +=
|
||||
iocg->stat.indelay_us - iocg->last_stat.indelay_us;
|
||||
}
|
||||
|
||||
iocg->last_stat = iocg->stat;
|
||||
}
|
||||
|
||||
/* collect per-cpu counters and propagate the deltas to the parent */
|
||||
static void iocg_flush_stat_one(struct ioc_gq *iocg, struct ioc_now *now)
|
||||
static void iocg_flush_stat_leaf(struct ioc_gq *iocg, struct ioc_now *now)
|
||||
{
|
||||
struct ioc *ioc = iocg->ioc;
|
||||
struct iocg_stat new_stat;
|
||||
u64 abs_vusage = 0;
|
||||
u64 vusage_delta;
|
||||
int cpu;
|
||||
@@ -1661,34 +1679,9 @@ static void iocg_flush_stat_one(struct ioc_gq *iocg, struct ioc_now *now)
|
||||
iocg->last_stat_abs_vusage = abs_vusage;
|
||||
|
||||
iocg->usage_delta_us = div64_u64(vusage_delta, ioc->vtime_base_rate);
|
||||
iocg->local_stat.usage_us += iocg->usage_delta_us;
|
||||
iocg->stat.usage_us += iocg->usage_delta_us;
|
||||
|
||||
/* propagate upwards */
|
||||
new_stat.usage_us =
|
||||
iocg->local_stat.usage_us + iocg->desc_stat.usage_us;
|
||||
new_stat.wait_us =
|
||||
iocg->local_stat.wait_us + iocg->desc_stat.wait_us;
|
||||
new_stat.indebt_us =
|
||||
iocg->local_stat.indebt_us + iocg->desc_stat.indebt_us;
|
||||
new_stat.indelay_us =
|
||||
iocg->local_stat.indelay_us + iocg->desc_stat.indelay_us;
|
||||
|
||||
/* propagate the deltas to the parent */
|
||||
if (iocg->level > 0) {
|
||||
struct iocg_stat *parent_stat =
|
||||
&iocg->ancestors[iocg->level - 1]->desc_stat;
|
||||
|
||||
parent_stat->usage_us +=
|
||||
new_stat.usage_us - iocg->last_stat.usage_us;
|
||||
parent_stat->wait_us +=
|
||||
new_stat.wait_us - iocg->last_stat.wait_us;
|
||||
parent_stat->indebt_us +=
|
||||
new_stat.indebt_us - iocg->last_stat.indebt_us;
|
||||
parent_stat->indelay_us +=
|
||||
new_stat.indelay_us - iocg->last_stat.indelay_us;
|
||||
}
|
||||
|
||||
iocg->last_stat = new_stat;
|
||||
iocg_flush_stat_upward(iocg);
|
||||
}
|
||||
|
||||
/* get stat counters ready for reading on all active iocgs */
|
||||
@@ -1699,13 +1692,13 @@ static void iocg_flush_stat(struct list_head *target_iocgs, struct ioc_now *now)
|
||||
|
||||
/* flush leaves and build inner node walk list */
|
||||
list_for_each_entry(iocg, target_iocgs, active_list) {
|
||||
iocg_flush_stat_one(iocg, now);
|
||||
iocg_flush_stat_leaf(iocg, now);
|
||||
iocg_build_inner_walk(iocg, &inner_walk);
|
||||
}
|
||||
|
||||
/* keep flushing upwards by walking the inner list backwards */
|
||||
list_for_each_entry_safe_reverse(iocg, tiocg, &inner_walk, walk_list) {
|
||||
iocg_flush_stat_one(iocg, now);
|
||||
iocg_flush_stat_upward(iocg);
|
||||
list_del_init(&iocg->walk_list);
|
||||
}
|
||||
}
|
||||
@@ -2152,16 +2145,16 @@ static int ioc_check_iocgs(struct ioc *ioc, struct ioc_now *now)
|
||||
|
||||
/* flush wait and indebt stat deltas */
|
||||
if (iocg->wait_since) {
|
||||
iocg->local_stat.wait_us += now->now - iocg->wait_since;
|
||||
iocg->stat.wait_us += now->now - iocg->wait_since;
|
||||
iocg->wait_since = now->now;
|
||||
}
|
||||
if (iocg->indebt_since) {
|
||||
iocg->local_stat.indebt_us +=
|
||||
iocg->stat.indebt_us +=
|
||||
now->now - iocg->indebt_since;
|
||||
iocg->indebt_since = now->now;
|
||||
}
|
||||
if (iocg->indelay_since) {
|
||||
iocg->local_stat.indelay_us +=
|
||||
iocg->stat.indelay_us +=
|
||||
now->now - iocg->indelay_since;
|
||||
iocg->indelay_since = now->now;
|
||||
}
|
||||
@@ -3005,13 +2998,13 @@ static void ioc_pd_free(struct blkg_policy_data *pd)
|
||||
kfree(iocg);
|
||||
}
|
||||
|
||||
static bool ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
|
||||
static void ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
|
||||
{
|
||||
struct ioc_gq *iocg = pd_to_iocg(pd);
|
||||
struct ioc *ioc = iocg->ioc;
|
||||
|
||||
if (!ioc->enabled)
|
||||
return false;
|
||||
return;
|
||||
|
||||
if (iocg->level == 0) {
|
||||
unsigned vp10k = DIV64_U64_ROUND_CLOSEST(
|
||||
@@ -3027,7 +3020,6 @@ static bool ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
|
||||
iocg->last_stat.wait_us,
|
||||
iocg->last_stat.indebt_us,
|
||||
iocg->last_stat.indelay_us);
|
||||
return true;
|
||||
}
|
||||
|
||||
static u64 ioc_weight_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
|
||||
|
||||
@@ -891,7 +891,7 @@ static int iolatency_print_limit(struct seq_file *sf, void *v)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
|
||||
static void iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
|
||||
{
|
||||
struct latency_stat stat;
|
||||
int cpu;
|
||||
@@ -914,17 +914,16 @@ static bool iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
|
||||
(unsigned long long)stat.ps.missed,
|
||||
(unsigned long long)stat.ps.total,
|
||||
iolat->rq_depth.max_depth);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
|
||||
static void iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
|
||||
{
|
||||
struct iolatency_grp *iolat = pd_to_lat(pd);
|
||||
unsigned long long avg_lat;
|
||||
unsigned long long cur_win;
|
||||
|
||||
if (!blkcg_debug_stats)
|
||||
return false;
|
||||
return;
|
||||
|
||||
if (iolat->ssd)
|
||||
return iolatency_ssd_stat(iolat, s);
|
||||
@@ -937,7 +936,6 @@ static bool iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
|
||||
else
|
||||
seq_printf(s, " depth=%u avg_lat=%llu win=%llu",
|
||||
iolat->rq_depth.max_depth, avg_lat, cur_win);
|
||||
return true;
|
||||
}
|
||||
|
||||
static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp,
|
||||
|
||||
126
block/blk-lib.c
126
block/blk-lib.c
@@ -10,30 +10,44 @@
|
||||
|
||||
#include "blk.h"
|
||||
|
||||
int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||
sector_t nr_sects, gfp_t gfp_mask, int flags,
|
||||
struct bio **biop)
|
||||
static sector_t bio_discard_limit(struct block_device *bdev, sector_t sector)
|
||||
{
|
||||
unsigned int discard_granularity = bdev_discard_granularity(bdev);
|
||||
sector_t granularity_aligned_sector;
|
||||
|
||||
if (bdev_is_partition(bdev))
|
||||
sector += bdev->bd_start_sect;
|
||||
|
||||
granularity_aligned_sector =
|
||||
round_up(sector, discard_granularity >> SECTOR_SHIFT);
|
||||
|
||||
/*
|
||||
* Make sure subsequent bios start aligned to the discard granularity if
|
||||
* it needs to be split.
|
||||
*/
|
||||
if (granularity_aligned_sector != sector)
|
||||
return granularity_aligned_sector - sector;
|
||||
|
||||
/*
|
||||
* Align the bio size to the discard granularity to make splitting the bio
|
||||
* at discard granularity boundaries easier in the driver if needed.
|
||||
*/
|
||||
return round_down(UINT_MAX, discard_granularity) >> SECTOR_SHIFT;
|
||||
}
|
||||
|
||||
int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||
sector_t nr_sects, gfp_t gfp_mask, struct bio **biop)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
struct bio *bio = *biop;
|
||||
unsigned int op;
|
||||
sector_t bs_mask, part_offset = 0;
|
||||
sector_t bs_mask;
|
||||
|
||||
if (bdev_read_only(bdev))
|
||||
return -EPERM;
|
||||
|
||||
if (flags & BLKDEV_DISCARD_SECURE) {
|
||||
if (!blk_queue_secure_erase(q))
|
||||
return -EOPNOTSUPP;
|
||||
op = REQ_OP_SECURE_ERASE;
|
||||
} else {
|
||||
if (!blk_queue_discard(q))
|
||||
return -EOPNOTSUPP;
|
||||
op = REQ_OP_DISCARD;
|
||||
}
|
||||
if (!bdev_max_discard_sectors(bdev))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* In case the discard granularity isn't set by buggy device driver */
|
||||
if (WARN_ON_ONCE(!q->limits.discard_granularity)) {
|
||||
if (WARN_ON_ONCE(!bdev_discard_granularity(bdev))) {
|
||||
char dev_name[BDEVNAME_SIZE];
|
||||
|
||||
bdevname(bdev, dev_name);
|
||||
@@ -48,38 +62,11 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||
if (!nr_sects)
|
||||
return -EINVAL;
|
||||
|
||||
/* In case the discard request is in a partition */
|
||||
if (bdev_is_partition(bdev))
|
||||
part_offset = bdev->bd_start_sect;
|
||||
|
||||
while (nr_sects) {
|
||||
sector_t granularity_aligned_lba, req_sects;
|
||||
sector_t sector_mapped = sector + part_offset;
|
||||
sector_t req_sects =
|
||||
min(nr_sects, bio_discard_limit(bdev, sector));
|
||||
|
||||
granularity_aligned_lba = round_up(sector_mapped,
|
||||
q->limits.discard_granularity >> SECTOR_SHIFT);
|
||||
|
||||
/*
|
||||
* Check whether the discard bio starts at a discard_granularity
|
||||
* aligned LBA,
|
||||
* - If no: set (granularity_aligned_lba - sector_mapped) to
|
||||
* bi_size of the first split bio, then the second bio will
|
||||
* start at a discard_granularity aligned LBA on the device.
|
||||
* - If yes: use bio_aligned_discard_max_sectors() as the max
|
||||
* possible bi_size of the first split bio. Then when this bio
|
||||
* is split in device drive, the split ones are very probably
|
||||
* to be aligned to discard_granularity of the device's queue.
|
||||
*/
|
||||
if (granularity_aligned_lba == sector_mapped)
|
||||
req_sects = min_t(sector_t, nr_sects,
|
||||
bio_aligned_discard_max_sectors(q));
|
||||
else
|
||||
req_sects = min_t(sector_t, nr_sects,
|
||||
granularity_aligned_lba - sector_mapped);
|
||||
|
||||
WARN_ON_ONCE((req_sects << 9) > UINT_MAX);
|
||||
|
||||
bio = blk_next_bio(bio, bdev, 0, op, gfp_mask);
|
||||
bio = blk_next_bio(bio, bdev, 0, REQ_OP_DISCARD, gfp_mask);
|
||||
bio->bi_iter.bi_sector = sector;
|
||||
bio->bi_iter.bi_size = req_sects << 9;
|
||||
sector += req_sects;
|
||||
@@ -105,21 +92,19 @@ EXPORT_SYMBOL(__blkdev_issue_discard);
|
||||
* @sector: start sector
|
||||
* @nr_sects: number of sectors to discard
|
||||
* @gfp_mask: memory allocation flags (for bio_alloc)
|
||||
* @flags: BLKDEV_DISCARD_* flags to control behaviour
|
||||
*
|
||||
* Description:
|
||||
* Issue a discard request for the sectors in question.
|
||||
*/
|
||||
int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||
sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
|
||||
sector_t nr_sects, gfp_t gfp_mask)
|
||||
{
|
||||
struct bio *bio = NULL;
|
||||
struct blk_plug plug;
|
||||
int ret;
|
||||
|
||||
blk_start_plug(&plug);
|
||||
ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags,
|
||||
&bio);
|
||||
ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, &bio);
|
||||
if (!ret && bio) {
|
||||
ret = submit_bio_wait(bio);
|
||||
if (ret == -EOPNOTSUPP)
|
||||
@@ -316,3 +301,42 @@ retry:
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(blkdev_issue_zeroout);
|
||||
|
||||
int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
|
||||
sector_t nr_sects, gfp_t gfp)
|
||||
{
|
||||
sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
|
||||
unsigned int max_sectors = bdev_max_secure_erase_sectors(bdev);
|
||||
struct bio *bio = NULL;
|
||||
struct blk_plug plug;
|
||||
int ret = 0;
|
||||
|
||||
if (max_sectors == 0)
|
||||
return -EOPNOTSUPP;
|
||||
if ((sector | nr_sects) & bs_mask)
|
||||
return -EINVAL;
|
||||
if (bdev_read_only(bdev))
|
||||
return -EPERM;
|
||||
|
||||
blk_start_plug(&plug);
|
||||
for (;;) {
|
||||
unsigned int len = min_t(sector_t, nr_sects, max_sectors);
|
||||
|
||||
bio = blk_next_bio(bio, bdev, 0, REQ_OP_SECURE_ERASE, gfp);
|
||||
bio->bi_iter.bi_sector = sector;
|
||||
bio->bi_iter.bi_size = len;
|
||||
|
||||
sector += len << SECTOR_SHIFT;
|
||||
nr_sects -= len << SECTOR_SHIFT;
|
||||
if (!nr_sects) {
|
||||
ret = submit_bio_wait(bio);
|
||||
bio_put(bio);
|
||||
break;
|
||||
}
|
||||
cond_resched();
|
||||
}
|
||||
blk_finish_plug(&plug);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(blkdev_issue_secure_erase);
|
||||
|
||||
@@ -152,10 +152,10 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
|
||||
nr_pages = bio_max_segs(DIV_ROUND_UP(offset + len, PAGE_SIZE));
|
||||
|
||||
ret = -ENOMEM;
|
||||
bio = bio_kmalloc(gfp_mask, nr_pages);
|
||||
bio = bio_kmalloc(nr_pages, gfp_mask);
|
||||
if (!bio)
|
||||
goto out_bmd;
|
||||
bio->bi_opf |= req_op(rq);
|
||||
bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, req_op(rq));
|
||||
|
||||
if (map_data) {
|
||||
nr_pages = 1 << map_data->page_order;
|
||||
@@ -224,7 +224,8 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
|
||||
cleanup:
|
||||
if (!map_data)
|
||||
bio_free_pages(bio);
|
||||
bio_put(bio);
|
||||
bio_uninit(bio);
|
||||
kfree(bio);
|
||||
out_bmd:
|
||||
kfree(bmd);
|
||||
return ret;
|
||||
@@ -234,6 +235,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
unsigned int max_sectors = queue_max_hw_sectors(rq->q);
|
||||
unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS);
|
||||
struct bio *bio;
|
||||
int ret;
|
||||
int j;
|
||||
@@ -241,10 +243,10 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
|
||||
if (!iov_iter_count(iter))
|
||||
return -EINVAL;
|
||||
|
||||
bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_VECS));
|
||||
bio = bio_kmalloc(nr_vecs, gfp_mask);
|
||||
if (!bio)
|
||||
return -ENOMEM;
|
||||
bio->bi_opf |= req_op(rq);
|
||||
bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs, req_op(rq));
|
||||
|
||||
while (iov_iter_count(iter)) {
|
||||
struct page **pages;
|
||||
@@ -260,10 +262,9 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
|
||||
|
||||
npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
|
||||
|
||||
if (unlikely(offs & queue_dma_alignment(rq->q))) {
|
||||
ret = -EINVAL;
|
||||
if (unlikely(offs & queue_dma_alignment(rq->q)))
|
||||
j = 0;
|
||||
} else {
|
||||
else {
|
||||
for (j = 0; j < npages; j++) {
|
||||
struct page *page = pages[j];
|
||||
unsigned int n = PAGE_SIZE - offs;
|
||||
@@ -303,7 +304,8 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
|
||||
|
||||
out_unmap:
|
||||
bio_release_pages(bio, false);
|
||||
bio_put(bio);
|
||||
bio_uninit(bio);
|
||||
kfree(bio);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -323,7 +325,8 @@ static void bio_invalidate_vmalloc_pages(struct bio *bio)
|
||||
static void bio_map_kern_endio(struct bio *bio)
|
||||
{
|
||||
bio_invalidate_vmalloc_pages(bio);
|
||||
bio_put(bio);
|
||||
bio_uninit(bio);
|
||||
kfree(bio);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -348,9 +351,10 @@ static struct bio *bio_map_kern(struct request_queue *q, void *data,
|
||||
int offset, i;
|
||||
struct bio *bio;
|
||||
|
||||
bio = bio_kmalloc(gfp_mask, nr_pages);
|
||||
bio = bio_kmalloc(nr_pages, gfp_mask);
|
||||
if (!bio)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, 0);
|
||||
|
||||
if (is_vmalloc) {
|
||||
flush_kernel_vmap_range(data, len);
|
||||
@@ -374,7 +378,8 @@ static struct bio *bio_map_kern(struct request_queue *q, void *data,
|
||||
if (bio_add_pc_page(q, bio, page, bytes,
|
||||
offset) < bytes) {
|
||||
/* we don't support partial mappings */
|
||||
bio_put(bio);
|
||||
bio_uninit(bio);
|
||||
kfree(bio);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
@@ -390,7 +395,8 @@ static struct bio *bio_map_kern(struct request_queue *q, void *data,
|
||||
static void bio_copy_kern_endio(struct bio *bio)
|
||||
{
|
||||
bio_free_pages(bio);
|
||||
bio_put(bio);
|
||||
bio_uninit(bio);
|
||||
kfree(bio);
|
||||
}
|
||||
|
||||
static void bio_copy_kern_endio_read(struct bio *bio)
|
||||
@@ -435,9 +441,10 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data,
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
nr_pages = end - start;
|
||||
bio = bio_kmalloc(gfp_mask, nr_pages);
|
||||
bio = bio_kmalloc(nr_pages, gfp_mask);
|
||||
if (!bio)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, 0);
|
||||
|
||||
while (len) {
|
||||
struct page *page;
|
||||
@@ -471,7 +478,8 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data,
|
||||
|
||||
cleanup:
|
||||
bio_free_pages(bio);
|
||||
bio_put(bio);
|
||||
bio_uninit(bio);
|
||||
kfree(bio);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
@@ -602,7 +610,8 @@ int blk_rq_unmap_user(struct bio *bio)
|
||||
|
||||
next_bio = bio;
|
||||
bio = bio->bi_next;
|
||||
bio_put(next_bio);
|
||||
bio_uninit(next_bio);
|
||||
kfree(next_bio);
|
||||
}
|
||||
|
||||
return ret;
|
||||
@@ -648,8 +657,10 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
|
||||
bio->bi_opf |= req_op(rq);
|
||||
|
||||
ret = blk_rq_append_bio(rq, bio);
|
||||
if (unlikely(ret))
|
||||
bio_put(bio);
|
||||
if (unlikely(ret)) {
|
||||
bio_uninit(bio);
|
||||
kfree(bio);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_rq_map_kern);
|
||||
|
||||
@@ -113,10 +113,8 @@ static const char *const blk_queue_flag_name[] = {
|
||||
QUEUE_FLAG_NAME(FAIL_IO),
|
||||
QUEUE_FLAG_NAME(NONROT),
|
||||
QUEUE_FLAG_NAME(IO_STAT),
|
||||
QUEUE_FLAG_NAME(DISCARD),
|
||||
QUEUE_FLAG_NAME(NOXMERGES),
|
||||
QUEUE_FLAG_NAME(ADD_RANDOM),
|
||||
QUEUE_FLAG_NAME(SECERASE),
|
||||
QUEUE_FLAG_NAME(SAME_FORCE),
|
||||
QUEUE_FLAG_NAME(DEAD),
|
||||
QUEUE_FLAG_NAME(INIT_DONE),
|
||||
|
||||
@@ -1083,7 +1083,7 @@ bool blk_mq_complete_request_remote(struct request *rq)
|
||||
WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
|
||||
|
||||
/*
|
||||
* For a polled request, always complete locallly, it's pointless
|
||||
* For a polled request, always complete locally, it's pointless
|
||||
* to redirect the completion.
|
||||
*/
|
||||
if (rq->cmd_flags & REQ_POLLED)
|
||||
|
||||
@@ -46,6 +46,7 @@ void blk_set_default_limits(struct queue_limits *lim)
|
||||
lim->max_zone_append_sectors = 0;
|
||||
lim->max_discard_sectors = 0;
|
||||
lim->max_hw_discard_sectors = 0;
|
||||
lim->max_secure_erase_sectors = 0;
|
||||
lim->discard_granularity = 0;
|
||||
lim->discard_alignment = 0;
|
||||
lim->discard_misaligned = 0;
|
||||
@@ -176,6 +177,18 @@ void blk_queue_max_discard_sectors(struct request_queue *q,
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_max_discard_sectors);
|
||||
|
||||
/**
|
||||
* blk_queue_max_secure_erase_sectors - set max sectors for a secure erase
|
||||
* @q: the request queue for the device
|
||||
* @max_sectors: maximum number of sectors to secure_erase
|
||||
**/
|
||||
void blk_queue_max_secure_erase_sectors(struct request_queue *q,
|
||||
unsigned int max_sectors)
|
||||
{
|
||||
q->limits.max_secure_erase_sectors = max_sectors;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_max_secure_erase_sectors);
|
||||
|
||||
/**
|
||||
* blk_queue_max_write_zeroes_sectors - set max sectors for a single
|
||||
* write zeroes
|
||||
@@ -468,6 +481,40 @@ void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_io_opt);
|
||||
|
||||
static int queue_limit_alignment_offset(struct queue_limits *lim,
|
||||
sector_t sector)
|
||||
{
|
||||
unsigned int granularity = max(lim->physical_block_size, lim->io_min);
|
||||
unsigned int alignment = sector_div(sector, granularity >> SECTOR_SHIFT)
|
||||
<< SECTOR_SHIFT;
|
||||
|
||||
return (granularity + lim->alignment_offset - alignment) % granularity;
|
||||
}
|
||||
|
||||
static unsigned int queue_limit_discard_alignment(struct queue_limits *lim,
|
||||
sector_t sector)
|
||||
{
|
||||
unsigned int alignment, granularity, offset;
|
||||
|
||||
if (!lim->max_discard_sectors)
|
||||
return 0;
|
||||
|
||||
/* Why are these in bytes, not sectors? */
|
||||
alignment = lim->discard_alignment >> SECTOR_SHIFT;
|
||||
granularity = lim->discard_granularity >> SECTOR_SHIFT;
|
||||
if (!granularity)
|
||||
return 0;
|
||||
|
||||
/* Offset of the partition start in 'granularity' sectors */
|
||||
offset = sector_div(sector, granularity);
|
||||
|
||||
/* And why do we do this modulus *again* in blkdev_issue_discard()? */
|
||||
offset = (granularity + alignment - offset) % granularity;
|
||||
|
||||
/* Turn it back into bytes, gaah */
|
||||
return offset << SECTOR_SHIFT;
|
||||
}
|
||||
|
||||
static unsigned int blk_round_down_sectors(unsigned int sectors, unsigned int lbs)
|
||||
{
|
||||
sectors = round_down(sectors, lbs >> SECTOR_SHIFT);
|
||||
@@ -627,7 +674,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
|
||||
t->discard_alignment = lcm_not_zero(t->discard_alignment, alignment) %
|
||||
t->discard_granularity;
|
||||
}
|
||||
|
||||
t->max_secure_erase_sectors = min_not_zero(t->max_secure_erase_sectors,
|
||||
b->max_secure_erase_sectors);
|
||||
t->zone_write_granularity = max(t->zone_write_granularity,
|
||||
b->zone_write_granularity);
|
||||
t->zoned = max(t->zoned, b->zoned);
|
||||
@@ -901,3 +949,27 @@ void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_queue_set_zoned);
|
||||
|
||||
int bdev_alignment_offset(struct block_device *bdev)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
|
||||
if (q->limits.misaligned)
|
||||
return -1;
|
||||
if (bdev_is_partition(bdev))
|
||||
return queue_limit_alignment_offset(&q->limits,
|
||||
bdev->bd_start_sect);
|
||||
return q->limits.alignment_offset;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bdev_alignment_offset);
|
||||
|
||||
unsigned int bdev_discard_alignment(struct block_device *bdev)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
|
||||
if (bdev_is_partition(bdev))
|
||||
return queue_limit_discard_alignment(&q->limits,
|
||||
bdev->bd_start_sect);
|
||||
return q->limits.discard_alignment;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bdev_discard_alignment);
|
||||
|
||||
@@ -227,7 +227,7 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
|
||||
break; \
|
||||
if ((__tg)) { \
|
||||
blk_add_cgroup_trace_msg(__td->queue, \
|
||||
tg_to_blkg(__tg)->blkcg, "throtl " fmt, ##args);\
|
||||
&tg_to_blkg(__tg)->blkcg->css, "throtl " fmt, ##args);\
|
||||
} else { \
|
||||
blk_add_trace_msg(__td->queue, "throtl " fmt, ##args); \
|
||||
} \
|
||||
@@ -2189,13 +2189,14 @@ again:
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
bio_set_flag(bio, BIO_THROTTLED);
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||
if (throttled || !td->track_bio_latency)
|
||||
bio->bi_issue.value |= BIO_ISSUE_THROTL_SKIP_LATENCY;
|
||||
#endif
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
|
||||
rcu_read_unlock();
|
||||
return throttled;
|
||||
}
|
||||
|
||||
21
block/blk.h
21
block/blk.h
@@ -346,20 +346,6 @@ static inline unsigned int bio_allowed_max_sectors(struct request_queue *q)
|
||||
return round_down(UINT_MAX, queue_logical_block_size(q)) >> 9;
|
||||
}
|
||||
|
||||
/*
|
||||
* The max bio size which is aligned to q->limits.discard_granularity. This
|
||||
* is a hint to split large discard bio in generic block layer, then if device
|
||||
* driver needs to split the discard bio into smaller ones, their bi_size can
|
||||
* be very probably and easily aligned to discard_granularity of the device's
|
||||
* queue.
|
||||
*/
|
||||
static inline unsigned int bio_aligned_discard_max_sectors(
|
||||
struct request_queue *q)
|
||||
{
|
||||
return round_down(UINT_MAX, q->limits.discard_granularity) >>
|
||||
SECTOR_SHIFT;
|
||||
}
|
||||
|
||||
/*
|
||||
* Internal io_context interface
|
||||
*/
|
||||
@@ -450,13 +436,6 @@ extern struct device_attribute dev_attr_events;
|
||||
extern struct device_attribute dev_attr_events_async;
|
||||
extern struct device_attribute dev_attr_events_poll_msecs;
|
||||
|
||||
static inline void bio_clear_polled(struct bio *bio)
|
||||
{
|
||||
/* can't support alloc cache if we turn off polling */
|
||||
bio_clear_flag(bio, BIO_PERCPU_CACHE);
|
||||
bio->bi_opf &= ~REQ_POLLED;
|
||||
}
|
||||
|
||||
long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
|
||||
long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
|
||||
|
||||
|
||||
@@ -191,7 +191,6 @@ static struct bio *bounce_clone_bio(struct bio *bio_src)
|
||||
goto err_put;
|
||||
|
||||
bio_clone_blkg_association(bio, bio_src);
|
||||
blkcg_bio_issue_init(bio);
|
||||
|
||||
return bio;
|
||||
|
||||
|
||||
35
block/fops.c
35
block/fops.c
@@ -44,14 +44,6 @@ static unsigned int dio_bio_write_op(struct kiocb *iocb)
|
||||
|
||||
#define DIO_INLINE_BIO_VECS 4
|
||||
|
||||
static void blkdev_bio_end_io_simple(struct bio *bio)
|
||||
{
|
||||
struct task_struct *waiter = bio->bi_private;
|
||||
|
||||
WRITE_ONCE(bio->bi_private, NULL);
|
||||
blk_wake_io_task(waiter);
|
||||
}
|
||||
|
||||
static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
|
||||
struct iov_iter *iter, unsigned int nr_pages)
|
||||
{
|
||||
@@ -83,8 +75,6 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
|
||||
bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
|
||||
}
|
||||
bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
|
||||
bio.bi_private = current;
|
||||
bio.bi_end_io = blkdev_bio_end_io_simple;
|
||||
bio.bi_ioprio = iocb->ki_ioprio;
|
||||
|
||||
ret = bio_iov_iter_get_pages(&bio, iter);
|
||||
@@ -97,18 +87,8 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
|
||||
|
||||
if (iocb->ki_flags & IOCB_NOWAIT)
|
||||
bio.bi_opf |= REQ_NOWAIT;
|
||||
if (iocb->ki_flags & IOCB_HIPRI)
|
||||
bio_set_polled(&bio, iocb);
|
||||
|
||||
submit_bio(&bio);
|
||||
for (;;) {
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
if (!READ_ONCE(bio.bi_private))
|
||||
break;
|
||||
if (!(iocb->ki_flags & IOCB_HIPRI) || !bio_poll(&bio, NULL, 0))
|
||||
blk_io_schedule();
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
||||
submit_bio_wait(&bio);
|
||||
|
||||
bio_release_pages(&bio, should_dirty);
|
||||
if (unlikely(bio.bi_status))
|
||||
@@ -197,8 +177,10 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
|
||||
(bdev_logical_block_size(bdev) - 1))
|
||||
return -EINVAL;
|
||||
|
||||
bio = bio_alloc_kiocb(iocb, bdev, nr_pages, opf, &blkdev_dio_pool);
|
||||
|
||||
if (iocb->ki_flags & IOCB_ALLOC_CACHE)
|
||||
opf |= REQ_ALLOC_CACHE;
|
||||
bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL,
|
||||
&blkdev_dio_pool);
|
||||
dio = container_of(bio, struct blkdev_dio, bio);
|
||||
atomic_set(&dio->ref, 1);
|
||||
/*
|
||||
@@ -320,7 +302,10 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
|
||||
(bdev_logical_block_size(bdev) - 1))
|
||||
return -EINVAL;
|
||||
|
||||
bio = bio_alloc_kiocb(iocb, bdev, nr_pages, opf, &blkdev_dio_pool);
|
||||
if (iocb->ki_flags & IOCB_ALLOC_CACHE)
|
||||
opf |= REQ_ALLOC_CACHE;
|
||||
bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL,
|
||||
&blkdev_dio_pool);
|
||||
dio = container_of(bio, struct blkdev_dio, bio);
|
||||
dio->flags = 0;
|
||||
dio->iocb = iocb;
|
||||
@@ -672,7 +657,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
|
||||
break;
|
||||
case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
|
||||
error = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
|
||||
len >> SECTOR_SHIFT, GFP_KERNEL, 0);
|
||||
len >> SECTOR_SHIFT, GFP_KERNEL);
|
||||
break;
|
||||
default:
|
||||
error = -EOPNOTSUPP;
|
||||
|
||||
@@ -1010,7 +1010,7 @@ static ssize_t disk_alignment_offset_show(struct device *dev,
|
||||
{
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
|
||||
return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
|
||||
return sprintf(buf, "%d\n", bdev_alignment_offset(disk->part0));
|
||||
}
|
||||
|
||||
static ssize_t disk_discard_alignment_show(struct device *dev,
|
||||
@@ -1019,7 +1019,7 @@ static ssize_t disk_discard_alignment_show(struct device *dev,
|
||||
{
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
|
||||
return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
|
||||
return sprintf(buf, "%d\n", bdev_alignment_offset(disk->part0));
|
||||
}
|
||||
|
||||
static ssize_t diskseq_show(struct device *dev,
|
||||
|
||||
@@ -83,18 +83,17 @@ static int compat_blkpg_ioctl(struct block_device *bdev,
|
||||
#endif
|
||||
|
||||
static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
|
||||
unsigned long arg, unsigned long flags)
|
||||
unsigned long arg)
|
||||
{
|
||||
uint64_t range[2];
|
||||
uint64_t start, len;
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
struct inode *inode = bdev->bd_inode;
|
||||
int err;
|
||||
|
||||
if (!(mode & FMODE_WRITE))
|
||||
return -EBADF;
|
||||
|
||||
if (!blk_queue_discard(q))
|
||||
if (!bdev_max_discard_sectors(bdev))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (copy_from_user(range, (void __user *)arg, sizeof(range)))
|
||||
@@ -115,15 +114,43 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
|
||||
err = truncate_bdev_range(bdev, mode, start, start + len - 1);
|
||||
if (err)
|
||||
goto fail;
|
||||
|
||||
err = blkdev_issue_discard(bdev, start >> 9, len >> 9,
|
||||
GFP_KERNEL, flags);
|
||||
|
||||
err = blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL);
|
||||
fail:
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int blk_ioctl_secure_erase(struct block_device *bdev, fmode_t mode,
|
||||
void __user *argp)
|
||||
{
|
||||
uint64_t start, len;
|
||||
uint64_t range[2];
|
||||
int err;
|
||||
|
||||
if (!(mode & FMODE_WRITE))
|
||||
return -EBADF;
|
||||
if (!bdev_max_secure_erase_sectors(bdev))
|
||||
return -EOPNOTSUPP;
|
||||
if (copy_from_user(range, argp, sizeof(range)))
|
||||
return -EFAULT;
|
||||
|
||||
start = range[0];
|
||||
len = range[1];
|
||||
if ((start & 511) || (len & 511))
|
||||
return -EINVAL;
|
||||
if (start + len > bdev_nr_bytes(bdev))
|
||||
return -EINVAL;
|
||||
|
||||
filemap_invalidate_lock(bdev->bd_inode->i_mapping);
|
||||
err = truncate_bdev_range(bdev, mode, start, start + len - 1);
|
||||
if (!err)
|
||||
err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9,
|
||||
GFP_KERNEL);
|
||||
filemap_invalidate_unlock(bdev->bd_inode->i_mapping);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
|
||||
unsigned long arg)
|
||||
{
|
||||
@@ -451,10 +478,9 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
case BLKROSET:
|
||||
return blkdev_roset(bdev, mode, cmd, arg);
|
||||
case BLKDISCARD:
|
||||
return blk_ioctl_discard(bdev, mode, arg, 0);
|
||||
return blk_ioctl_discard(bdev, mode, arg);
|
||||
case BLKSECDISCARD:
|
||||
return blk_ioctl_discard(bdev, mode, arg,
|
||||
BLKDEV_DISCARD_SECURE);
|
||||
return blk_ioctl_secure_erase(bdev, mode, argp);
|
||||
case BLKZEROOUT:
|
||||
return blk_ioctl_zeroout(bdev, mode, arg);
|
||||
case BLKGETDISKSEQ:
|
||||
@@ -489,7 +515,7 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
queue_max_sectors(bdev_get_queue(bdev)));
|
||||
return put_ushort(argp, max_sectors);
|
||||
case BLKROTATIONAL:
|
||||
return put_ushort(argp, !blk_queue_nonrot(bdev_get_queue(bdev)));
|
||||
return put_ushort(argp, !bdev_nonrot(bdev));
|
||||
case BLKRASET:
|
||||
case BLKFRASET:
|
||||
if(!capable(CAP_SYS_ADMIN))
|
||||
|
||||
@@ -282,13 +282,13 @@ int adfspart_check_ADFS(struct parsed_partitions *state)
|
||||
#ifdef CONFIG_ACORN_PARTITION_RISCIX
|
||||
case PARTITION_RISCIX_SCSI:
|
||||
case PARTITION_RISCIX_MFM:
|
||||
slot = riscix_partition(state, start_sect, slot,
|
||||
riscix_partition(state, start_sect, slot,
|
||||
nr_sects);
|
||||
break;
|
||||
#endif
|
||||
|
||||
case PARTITION_LINUX:
|
||||
slot = linux_partition(state, start_sect, slot,
|
||||
linux_partition(state, start_sect, slot,
|
||||
nr_sects);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -140,7 +140,6 @@ int atari_partition(struct parsed_partitions *state)
|
||||
/* accept only GEM,BGM,RAW,LNX,SWP partitions */
|
||||
if (!((pi->flg & 1) && OK_id(pi->id)))
|
||||
continue;
|
||||
part_fmt = 2;
|
||||
put_partition (state, slot,
|
||||
be32_to_cpu(pi->st),
|
||||
be32_to_cpu(pi->siz));
|
||||
|
||||
@@ -200,21 +200,13 @@ static ssize_t part_ro_show(struct device *dev,
|
||||
static ssize_t part_alignment_offset_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct block_device *bdev = dev_to_bdev(dev);
|
||||
|
||||
return sprintf(buf, "%u\n",
|
||||
queue_limit_alignment_offset(&bdev_get_queue(bdev)->limits,
|
||||
bdev->bd_start_sect));
|
||||
return sprintf(buf, "%u\n", bdev_alignment_offset(dev_to_bdev(dev)));
|
||||
}
|
||||
|
||||
static ssize_t part_discard_alignment_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct block_device *bdev = dev_to_bdev(dev);
|
||||
|
||||
return sprintf(buf, "%u\n",
|
||||
queue_limit_discard_alignment(&bdev_get_queue(bdev)->limits,
|
||||
bdev->bd_start_sect));
|
||||
return sprintf(buf, "%u\n", bdev_discard_alignment(dev_to_bdev(dev)));
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(partition, 0444, part_partition_show, NULL);
|
||||
@@ -486,7 +478,7 @@ int bdev_del_partition(struct gendisk *disk, int partno)
|
||||
goto out_unlock;
|
||||
|
||||
ret = -EBUSY;
|
||||
if (part->bd_openers)
|
||||
if (atomic_read(&part->bd_openers))
|
||||
goto out_unlock;
|
||||
|
||||
delete_partition(part);
|
||||
|
||||
@@ -736,7 +736,6 @@ static bool ldm_parse_cmp3 (const u8 *buffer, int buflen, struct vblk *vb)
|
||||
len = r_cols;
|
||||
} else {
|
||||
r_stripe = 0;
|
||||
r_cols = 0;
|
||||
len = r_parent;
|
||||
}
|
||||
if (len < 0)
|
||||
@@ -783,11 +782,8 @@ static int ldm_parse_dgr3 (const u8 *buffer, int buflen, struct vblk *vb)
|
||||
r_id1 = ldm_relative (buffer, buflen, 0x24, r_diskid);
|
||||
r_id2 = ldm_relative (buffer, buflen, 0x24, r_id1);
|
||||
len = r_id2;
|
||||
} else {
|
||||
r_id1 = 0;
|
||||
r_id2 = 0;
|
||||
} else
|
||||
len = r_diskid;
|
||||
}
|
||||
if (len < 0)
|
||||
return false;
|
||||
|
||||
@@ -826,11 +822,8 @@ static bool ldm_parse_dgr4 (const u8 *buffer, int buflen, struct vblk *vb)
|
||||
r_id1 = ldm_relative (buffer, buflen, 0x44, r_name);
|
||||
r_id2 = ldm_relative (buffer, buflen, 0x44, r_id1);
|
||||
len = r_id2;
|
||||
} else {
|
||||
r_id1 = 0;
|
||||
r_id2 = 0;
|
||||
} else
|
||||
len = r_name;
|
||||
}
|
||||
if (len < 0)
|
||||
return false;
|
||||
|
||||
@@ -963,10 +956,8 @@ static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb)
|
||||
return false;
|
||||
}
|
||||
len = r_index;
|
||||
} else {
|
||||
r_index = 0;
|
||||
} else
|
||||
len = r_diskid;
|
||||
}
|
||||
if (len < 0) {
|
||||
ldm_error("len %d < 0", len);
|
||||
return false;
|
||||
|
||||
@@ -244,3 +244,5 @@ void aoenet_exit(void);
|
||||
void aoenet_xmit(struct sk_buff_head *);
|
||||
int is_aoe_netif(struct net_device *ifp);
|
||||
int set_aoe_iflist(const char __user *str, size_t size);
|
||||
|
||||
extern struct workqueue_struct *aoe_wq;
|
||||
|
||||
@@ -435,7 +435,7 @@ err_mempool:
|
||||
err:
|
||||
spin_lock_irqsave(&d->lock, flags);
|
||||
d->flags &= ~DEVFL_GD_NOW;
|
||||
schedule_work(&d->work);
|
||||
queue_work(aoe_wq, &d->work);
|
||||
spin_unlock_irqrestore(&d->lock, flags);
|
||||
}
|
||||
|
||||
|
||||
@@ -968,7 +968,7 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
|
||||
d->flags |= DEVFL_NEWSIZE;
|
||||
else
|
||||
d->flags |= DEVFL_GDALLOC;
|
||||
schedule_work(&d->work);
|
||||
queue_work(aoe_wq, &d->work);
|
||||
}
|
||||
|
||||
static void
|
||||
|
||||
@@ -321,7 +321,7 @@ flush(const char __user *str, size_t cnt, int exiting)
|
||||
specified = 1;
|
||||
}
|
||||
|
||||
flush_scheduled_work();
|
||||
flush_workqueue(aoe_wq);
|
||||
/* pass one: do aoedev_downdev, which might sleep */
|
||||
restart1:
|
||||
spin_lock_irqsave(&devlist_lock, flags);
|
||||
@@ -520,7 +520,7 @@ freetgt(struct aoedev *d, struct aoetgt *t)
|
||||
void
|
||||
aoedev_exit(void)
|
||||
{
|
||||
flush_scheduled_work();
|
||||
flush_workqueue(aoe_wq);
|
||||
flush(NULL, 0, EXITING);
|
||||
}
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ MODULE_DESCRIPTION("AoE block/char driver for 2.6.2 and newer 2.6 kernels");
|
||||
MODULE_VERSION(VERSION);
|
||||
|
||||
static struct timer_list timer;
|
||||
struct workqueue_struct *aoe_wq;
|
||||
|
||||
static void discover_timer(struct timer_list *t)
|
||||
{
|
||||
@@ -35,6 +36,7 @@ aoe_exit(void)
|
||||
aoechr_exit();
|
||||
aoedev_exit();
|
||||
aoeblk_exit(); /* free cache after de-allocating bufs */
|
||||
destroy_workqueue(aoe_wq);
|
||||
}
|
||||
|
||||
static int __init
|
||||
@@ -42,9 +44,13 @@ aoe_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
aoe_wq = alloc_workqueue("aoe_wq", 0, 0);
|
||||
if (!aoe_wq)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = aoedev_init();
|
||||
if (ret)
|
||||
return ret;
|
||||
goto dev_fail;
|
||||
ret = aoechr_init();
|
||||
if (ret)
|
||||
goto chr_fail;
|
||||
@@ -77,6 +83,8 @@ aoe_init(void)
|
||||
aoechr_exit();
|
||||
chr_fail:
|
||||
aoedev_exit();
|
||||
dev_fail:
|
||||
destroy_workqueue(aoe_wq);
|
||||
|
||||
printk(KERN_INFO "aoe: initialisation failure.\n");
|
||||
return ret;
|
||||
|
||||
@@ -683,7 +683,7 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
|
||||
}
|
||||
}
|
||||
|
||||
want = ALIGN(words*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT;
|
||||
want = PFN_UP(words*sizeof(long));
|
||||
have = b->bm_number_of_pages;
|
||||
if (want == have) {
|
||||
D_ASSERT(device, b->bm_pages != NULL);
|
||||
|
||||
@@ -903,31 +903,6 @@ void drbd_gen_and_send_sync_uuid(struct drbd_peer_device *peer_device)
|
||||
}
|
||||
}
|
||||
|
||||
/* communicated if (agreed_features & DRBD_FF_WSAME) */
|
||||
static void
|
||||
assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p,
|
||||
struct request_queue *q)
|
||||
{
|
||||
if (q) {
|
||||
p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q));
|
||||
p->qlim->logical_block_size = cpu_to_be32(queue_logical_block_size(q));
|
||||
p->qlim->alignment_offset = cpu_to_be32(queue_alignment_offset(q));
|
||||
p->qlim->io_min = cpu_to_be32(queue_io_min(q));
|
||||
p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
|
||||
p->qlim->discard_enabled = blk_queue_discard(q);
|
||||
p->qlim->write_same_capable = 0;
|
||||
} else {
|
||||
q = device->rq_queue;
|
||||
p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q));
|
||||
p->qlim->logical_block_size = cpu_to_be32(queue_logical_block_size(q));
|
||||
p->qlim->alignment_offset = 0;
|
||||
p->qlim->io_min = cpu_to_be32(queue_io_min(q));
|
||||
p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
|
||||
p->qlim->discard_enabled = 0;
|
||||
p->qlim->write_same_capable = 0;
|
||||
}
|
||||
}
|
||||
|
||||
int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enum dds_flags flags)
|
||||
{
|
||||
struct drbd_device *device = peer_device->device;
|
||||
@@ -949,7 +924,9 @@ int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enu
|
||||
|
||||
memset(p, 0, packet_size);
|
||||
if (get_ldev_if_state(device, D_NEGOTIATING)) {
|
||||
struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
|
||||
struct block_device *bdev = device->ldev->backing_bdev;
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
|
||||
d_size = drbd_get_max_capacity(device->ldev);
|
||||
rcu_read_lock();
|
||||
u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
|
||||
@@ -957,14 +934,32 @@ int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enu
|
||||
q_order_type = drbd_queue_order_type(device);
|
||||
max_bio_size = queue_max_hw_sectors(q) << 9;
|
||||
max_bio_size = min(max_bio_size, DRBD_MAX_BIO_SIZE);
|
||||
assign_p_sizes_qlim(device, p, q);
|
||||
p->qlim->physical_block_size =
|
||||
cpu_to_be32(bdev_physical_block_size(bdev));
|
||||
p->qlim->logical_block_size =
|
||||
cpu_to_be32(bdev_logical_block_size(bdev));
|
||||
p->qlim->alignment_offset =
|
||||
cpu_to_be32(bdev_alignment_offset(bdev));
|
||||
p->qlim->io_min = cpu_to_be32(bdev_io_min(bdev));
|
||||
p->qlim->io_opt = cpu_to_be32(bdev_io_opt(bdev));
|
||||
p->qlim->discard_enabled = !!bdev_max_discard_sectors(bdev);
|
||||
put_ldev(device);
|
||||
} else {
|
||||
struct request_queue *q = device->rq_queue;
|
||||
|
||||
p->qlim->physical_block_size =
|
||||
cpu_to_be32(queue_physical_block_size(q));
|
||||
p->qlim->logical_block_size =
|
||||
cpu_to_be32(queue_logical_block_size(q));
|
||||
p->qlim->alignment_offset = 0;
|
||||
p->qlim->io_min = cpu_to_be32(queue_io_min(q));
|
||||
p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
|
||||
p->qlim->discard_enabled = 0;
|
||||
|
||||
d_size = 0;
|
||||
u_size = 0;
|
||||
q_order_type = QUEUE_ORDERED_NONE;
|
||||
max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
|
||||
assign_p_sizes_qlim(device, p, NULL);
|
||||
}
|
||||
|
||||
if (peer_device->connection->agreed_pro_version <= 94)
|
||||
@@ -3586,9 +3581,8 @@ const char *cmdname(enum drbd_packet cmd)
|
||||
* when we want to support more than
|
||||
* one PRO_VERSION */
|
||||
static const char *cmdnames[] = {
|
||||
|
||||
[P_DATA] = "Data",
|
||||
[P_WSAME] = "WriteSame",
|
||||
[P_TRIM] = "Trim",
|
||||
[P_DATA_REPLY] = "DataReply",
|
||||
[P_RS_DATA_REPLY] = "RSDataReply",
|
||||
[P_BARRIER] = "Barrier",
|
||||
@@ -3599,7 +3593,6 @@ const char *cmdname(enum drbd_packet cmd)
|
||||
[P_DATA_REQUEST] = "DataRequest",
|
||||
[P_RS_DATA_REQUEST] = "RSDataRequest",
|
||||
[P_SYNC_PARAM] = "SyncParam",
|
||||
[P_SYNC_PARAM89] = "SyncParam89",
|
||||
[P_PROTOCOL] = "ReportProtocol",
|
||||
[P_UUIDS] = "ReportUUIDs",
|
||||
[P_SIZES] = "ReportSizes",
|
||||
@@ -3607,6 +3600,7 @@ const char *cmdname(enum drbd_packet cmd)
|
||||
[P_SYNC_UUID] = "ReportSyncUUID",
|
||||
[P_AUTH_CHALLENGE] = "AuthChallenge",
|
||||
[P_AUTH_RESPONSE] = "AuthResponse",
|
||||
[P_STATE_CHG_REQ] = "StateChgRequest",
|
||||
[P_PING] = "Ping",
|
||||
[P_PING_ACK] = "PingAck",
|
||||
[P_RECV_ACK] = "RecvAck",
|
||||
@@ -3617,23 +3611,25 @@ const char *cmdname(enum drbd_packet cmd)
|
||||
[P_NEG_DREPLY] = "NegDReply",
|
||||
[P_NEG_RS_DREPLY] = "NegRSDReply",
|
||||
[P_BARRIER_ACK] = "BarrierAck",
|
||||
[P_STATE_CHG_REQ] = "StateChgRequest",
|
||||
[P_STATE_CHG_REPLY] = "StateChgReply",
|
||||
[P_OV_REQUEST] = "OVRequest",
|
||||
[P_OV_REPLY] = "OVReply",
|
||||
[P_OV_RESULT] = "OVResult",
|
||||
[P_CSUM_RS_REQUEST] = "CsumRSRequest",
|
||||
[P_RS_IS_IN_SYNC] = "CsumRSIsInSync",
|
||||
[P_SYNC_PARAM89] = "SyncParam89",
|
||||
[P_COMPRESSED_BITMAP] = "CBitmap",
|
||||
[P_DELAY_PROBE] = "DelayProbe",
|
||||
[P_OUT_OF_SYNC] = "OutOfSync",
|
||||
[P_RETRY_WRITE] = "RetryWrite",
|
||||
[P_RS_CANCEL] = "RSCancel",
|
||||
[P_CONN_ST_CHG_REQ] = "conn_st_chg_req",
|
||||
[P_CONN_ST_CHG_REPLY] = "conn_st_chg_reply",
|
||||
[P_PROTOCOL_UPDATE] = "protocol_update",
|
||||
[P_TRIM] = "Trim",
|
||||
[P_RS_THIN_REQ] = "rs_thin_req",
|
||||
[P_RS_DEALLOCATED] = "rs_deallocated",
|
||||
[P_WSAME] = "WriteSame",
|
||||
[P_ZEROES] = "Zeroes",
|
||||
|
||||
/* enum drbd_packet, but not commands - obsoleted flags:
|
||||
* P_MAY_IGNORE
|
||||
|
||||
@@ -770,6 +770,7 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
|
||||
struct set_role_parms parms;
|
||||
int err;
|
||||
enum drbd_ret_code retcode;
|
||||
enum drbd_state_rv rv;
|
||||
|
||||
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
|
||||
if (!adm_ctx.reply_skb)
|
||||
@@ -790,14 +791,14 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
|
||||
mutex_lock(&adm_ctx.resource->adm_mutex);
|
||||
|
||||
if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
|
||||
retcode = (enum drbd_ret_code)drbd_set_role(adm_ctx.device,
|
||||
R_PRIMARY, parms.assume_uptodate);
|
||||
rv = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
|
||||
else
|
||||
retcode = (enum drbd_ret_code)drbd_set_role(adm_ctx.device,
|
||||
R_SECONDARY, 0);
|
||||
rv = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
|
||||
|
||||
mutex_unlock(&adm_ctx.resource->adm_mutex);
|
||||
genl_lock();
|
||||
drbd_adm_finish(&adm_ctx, info, rv);
|
||||
return 0;
|
||||
out:
|
||||
drbd_adm_finish(&adm_ctx, info, retcode);
|
||||
return 0;
|
||||
@@ -1204,50 +1205,40 @@ static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection)
|
||||
}
|
||||
|
||||
static void decide_on_discard_support(struct drbd_device *device,
|
||||
struct request_queue *q,
|
||||
struct request_queue *b,
|
||||
bool discard_zeroes_if_aligned)
|
||||
struct drbd_backing_dev *bdev)
|
||||
{
|
||||
/* q = drbd device queue (device->rq_queue)
|
||||
* b = backing device queue (device->ldev->backing_bdev->bd_disk->queue),
|
||||
* or NULL if diskless
|
||||
struct drbd_connection *connection =
|
||||
first_peer_device(device)->connection;
|
||||
struct request_queue *q = device->rq_queue;
|
||||
|
||||
if (bdev && !bdev_max_discard_sectors(bdev->backing_bdev))
|
||||
goto not_supported;
|
||||
|
||||
if (connection->cstate >= C_CONNECTED &&
|
||||
!(connection->agreed_features & DRBD_FF_TRIM)) {
|
||||
drbd_info(connection,
|
||||
"peer DRBD too old, does not support TRIM: disabling discards\n");
|
||||
goto not_supported;
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't care for the granularity, really.
|
||||
*
|
||||
* Stacking limits below should fix it for the local device. Whether or
|
||||
* not it is a suitable granularity on the remote device is not our
|
||||
* problem, really. If you care, you need to use devices with similar
|
||||
* topology on all peers.
|
||||
*/
|
||||
struct drbd_connection *connection = first_peer_device(device)->connection;
|
||||
bool can_do = b ? blk_queue_discard(b) : true;
|
||||
blk_queue_discard_granularity(q, 512);
|
||||
q->limits.max_discard_sectors = drbd_max_discard_sectors(connection);
|
||||
q->limits.max_write_zeroes_sectors =
|
||||
drbd_max_discard_sectors(connection);
|
||||
return;
|
||||
|
||||
if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_TRIM)) {
|
||||
can_do = false;
|
||||
drbd_info(connection, "peer DRBD too old, does not support TRIM: disabling discards\n");
|
||||
}
|
||||
if (can_do) {
|
||||
/* We don't care for the granularity, really.
|
||||
* Stacking limits below should fix it for the local
|
||||
* device. Whether or not it is a suitable granularity
|
||||
* on the remote device is not our problem, really. If
|
||||
* you care, you need to use devices with similar
|
||||
* topology on all peers. */
|
||||
blk_queue_discard_granularity(q, 512);
|
||||
q->limits.max_discard_sectors = drbd_max_discard_sectors(connection);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
|
||||
q->limits.max_write_zeroes_sectors = drbd_max_discard_sectors(connection);
|
||||
} else {
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
|
||||
blk_queue_discard_granularity(q, 0);
|
||||
q->limits.max_discard_sectors = 0;
|
||||
q->limits.max_write_zeroes_sectors = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void fixup_discard_if_not_supported(struct request_queue *q)
|
||||
{
|
||||
/* To avoid confusion, if this queue does not support discard, clear
|
||||
* max_discard_sectors, which is what lsblk -D reports to the user.
|
||||
* Older kernels got this wrong in "stack limits".
|
||||
* */
|
||||
if (!blk_queue_discard(q)) {
|
||||
blk_queue_max_discard_sectors(q, 0);
|
||||
blk_queue_discard_granularity(q, 0);
|
||||
}
|
||||
not_supported:
|
||||
blk_queue_discard_granularity(q, 0);
|
||||
q->limits.max_discard_sectors = 0;
|
||||
q->limits.max_write_zeroes_sectors = 0;
|
||||
}
|
||||
|
||||
static void fixup_write_zeroes(struct drbd_device *device, struct request_queue *q)
|
||||
@@ -1273,7 +1264,6 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
|
||||
unsigned int max_segments = 0;
|
||||
struct request_queue *b = NULL;
|
||||
struct disk_conf *dc;
|
||||
bool discard_zeroes_if_aligned = true;
|
||||
|
||||
if (bdev) {
|
||||
b = bdev->backing_bdev->bd_disk->queue;
|
||||
@@ -1282,7 +1272,6 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
|
||||
rcu_read_lock();
|
||||
dc = rcu_dereference(device->ldev->disk_conf);
|
||||
max_segments = dc->max_bio_bvecs;
|
||||
discard_zeroes_if_aligned = dc->discard_zeroes_if_aligned;
|
||||
rcu_read_unlock();
|
||||
|
||||
blk_set_stacking_limits(&q->limits);
|
||||
@@ -1292,13 +1281,12 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
|
||||
/* This is the workaround for "bio would need to, but cannot, be split" */
|
||||
blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
|
||||
blk_queue_segment_boundary(q, PAGE_SIZE-1);
|
||||
decide_on_discard_support(device, q, b, discard_zeroes_if_aligned);
|
||||
decide_on_discard_support(device, bdev);
|
||||
|
||||
if (b) {
|
||||
blk_stack_limits(&q->limits, &b->limits, 0);
|
||||
disk_update_readahead(device->vdisk);
|
||||
}
|
||||
fixup_discard_if_not_supported(q);
|
||||
fixup_write_zeroes(device, q);
|
||||
}
|
||||
|
||||
@@ -1437,14 +1425,14 @@ static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b)
|
||||
static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *disk_conf,
|
||||
struct drbd_backing_dev *nbc)
|
||||
{
|
||||
struct request_queue * const q = nbc->backing_bdev->bd_disk->queue;
|
||||
struct block_device *bdev = nbc->backing_bdev;
|
||||
|
||||
if (disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
|
||||
disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
|
||||
if (disk_conf->al_extents > drbd_al_extents_max(nbc))
|
||||
disk_conf->al_extents = drbd_al_extents_max(nbc);
|
||||
|
||||
if (!blk_queue_discard(q)) {
|
||||
if (!bdev_max_discard_sectors(bdev)) {
|
||||
if (disk_conf->rs_discard_granularity) {
|
||||
disk_conf->rs_discard_granularity = 0; /* disable feature */
|
||||
drbd_info(device, "rs_discard_granularity feature disabled\n");
|
||||
@@ -1453,16 +1441,19 @@ static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *dis
|
||||
|
||||
if (disk_conf->rs_discard_granularity) {
|
||||
int orig_value = disk_conf->rs_discard_granularity;
|
||||
sector_t discard_size = bdev_max_discard_sectors(bdev) << 9;
|
||||
unsigned int discard_granularity = bdev_discard_granularity(bdev);
|
||||
int remainder;
|
||||
|
||||
if (q->limits.discard_granularity > disk_conf->rs_discard_granularity)
|
||||
disk_conf->rs_discard_granularity = q->limits.discard_granularity;
|
||||
if (discard_granularity > disk_conf->rs_discard_granularity)
|
||||
disk_conf->rs_discard_granularity = discard_granularity;
|
||||
|
||||
remainder = disk_conf->rs_discard_granularity % q->limits.discard_granularity;
|
||||
remainder = disk_conf->rs_discard_granularity %
|
||||
discard_granularity;
|
||||
disk_conf->rs_discard_granularity += remainder;
|
||||
|
||||
if (disk_conf->rs_discard_granularity > q->limits.max_discard_sectors << 9)
|
||||
disk_conf->rs_discard_granularity = q->limits.max_discard_sectors << 9;
|
||||
if (disk_conf->rs_discard_granularity > discard_size)
|
||||
disk_conf->rs_discard_granularity = discard_size;
|
||||
|
||||
if (disk_conf->rs_discard_granularity != orig_value)
|
||||
drbd_info(device, "rs_discard_granularity changed to %d\n",
|
||||
@@ -1611,8 +1602,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
|
||||
drbd_send_sync_param(peer_device);
|
||||
}
|
||||
|
||||
synchronize_rcu();
|
||||
kfree(old_disk_conf);
|
||||
kvfree_rcu(old_disk_conf);
|
||||
kfree(old_plan);
|
||||
mod_timer(&device->request_timer, jiffies + HZ);
|
||||
goto success;
|
||||
@@ -2443,8 +2433,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
|
||||
|
||||
mutex_unlock(&connection->resource->conf_update);
|
||||
mutex_unlock(&connection->data.mutex);
|
||||
synchronize_rcu();
|
||||
kfree(old_net_conf);
|
||||
kvfree_rcu(old_net_conf);
|
||||
|
||||
if (connection->cstate >= C_WF_REPORT_PARAMS) {
|
||||
struct drbd_peer_device *peer_device;
|
||||
@@ -2502,6 +2491,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
|
||||
struct drbd_resource *resource;
|
||||
struct drbd_connection *connection;
|
||||
enum drbd_ret_code retcode;
|
||||
enum drbd_state_rv rv;
|
||||
int i;
|
||||
int err;
|
||||
|
||||
@@ -2621,12 +2611,11 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
retcode = (enum drbd_ret_code)conn_request_state(connection,
|
||||
NS(conn, C_UNCONNECTED), CS_VERBOSE);
|
||||
rv = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
|
||||
|
||||
conn_reconfig_done(connection);
|
||||
mutex_unlock(&adm_ctx.resource->adm_mutex);
|
||||
drbd_adm_finish(&adm_ctx, info, retcode);
|
||||
drbd_adm_finish(&adm_ctx, info, rv);
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
@@ -2734,11 +2723,12 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
|
||||
|
||||
mutex_lock(&adm_ctx.resource->adm_mutex);
|
||||
rv = conn_try_disconnect(connection, parms.force_disconnect);
|
||||
if (rv < SS_SUCCESS)
|
||||
retcode = (enum drbd_ret_code)rv;
|
||||
else
|
||||
retcode = NO_ERROR;
|
||||
mutex_unlock(&adm_ctx.resource->adm_mutex);
|
||||
if (rv < SS_SUCCESS) {
|
||||
drbd_adm_finish(&adm_ctx, info, rv);
|
||||
return 0;
|
||||
}
|
||||
retcode = NO_ERROR;
|
||||
fail:
|
||||
drbd_adm_finish(&adm_ctx, info, retcode);
|
||||
return 0;
|
||||
@@ -2857,8 +2847,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
|
||||
new_disk_conf->disk_size = (sector_t)rs.resize_size;
|
||||
rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
|
||||
mutex_unlock(&device->resource->conf_update);
|
||||
synchronize_rcu();
|
||||
kfree(old_disk_conf);
|
||||
kvfree_rcu(old_disk_conf);
|
||||
new_disk_conf = NULL;
|
||||
}
|
||||
|
||||
|
||||
@@ -364,7 +364,7 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
|
||||
struct drbd_device *device = peer_device->device;
|
||||
struct drbd_peer_request *peer_req;
|
||||
struct page *page = NULL;
|
||||
unsigned nr_pages = (payload_size + PAGE_SIZE -1) >> PAGE_SHIFT;
|
||||
unsigned int nr_pages = PFN_UP(payload_size);
|
||||
|
||||
if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
|
||||
return NULL;
|
||||
@@ -1511,7 +1511,6 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
|
||||
int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, int flags)
|
||||
{
|
||||
struct block_device *bdev = device->ldev->backing_bdev;
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
sector_t tmp, nr;
|
||||
unsigned int max_discard_sectors, granularity;
|
||||
int alignment;
|
||||
@@ -1521,10 +1520,10 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u
|
||||
goto zero_out;
|
||||
|
||||
/* Zero-sector (unknown) and one-sector granularities are the same. */
|
||||
granularity = max(q->limits.discard_granularity >> 9, 1U);
|
||||
granularity = max(bdev_discard_granularity(bdev) >> 9, 1U);
|
||||
alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
|
||||
|
||||
max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22));
|
||||
max_discard_sectors = min(bdev_max_discard_sectors(bdev), (1U << 22));
|
||||
max_discard_sectors -= max_discard_sectors % granularity;
|
||||
if (unlikely(!max_discard_sectors))
|
||||
goto zero_out;
|
||||
@@ -1548,7 +1547,8 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u
|
||||
start = tmp;
|
||||
}
|
||||
while (nr_sectors >= max_discard_sectors) {
|
||||
err |= blkdev_issue_discard(bdev, start, max_discard_sectors, GFP_NOIO, 0);
|
||||
err |= blkdev_issue_discard(bdev, start, max_discard_sectors,
|
||||
GFP_NOIO);
|
||||
nr_sectors -= max_discard_sectors;
|
||||
start += max_discard_sectors;
|
||||
}
|
||||
@@ -1560,7 +1560,7 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u
|
||||
nr = nr_sectors;
|
||||
nr -= (unsigned int)nr % granularity;
|
||||
if (nr) {
|
||||
err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0);
|
||||
err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO);
|
||||
nr_sectors -= nr;
|
||||
start += nr;
|
||||
}
|
||||
@@ -1575,11 +1575,10 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u
|
||||
|
||||
static bool can_do_reliable_discards(struct drbd_device *device)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
|
||||
struct disk_conf *dc;
|
||||
bool can_do;
|
||||
|
||||
if (!blk_queue_discard(q))
|
||||
if (!bdev_max_discard_sectors(device->ldev->backing_bdev))
|
||||
return false;
|
||||
|
||||
rcu_read_lock();
|
||||
@@ -1629,9 +1628,9 @@ int drbd_submit_peer_request(struct drbd_device *device,
|
||||
struct bio *bio;
|
||||
struct page *page = peer_req->pages;
|
||||
sector_t sector = peer_req->i.sector;
|
||||
unsigned data_size = peer_req->i.size;
|
||||
unsigned n_bios = 0;
|
||||
unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
|
||||
unsigned int data_size = peer_req->i.size;
|
||||
unsigned int n_bios = 0;
|
||||
unsigned int nr_pages = PFN_UP(data_size);
|
||||
|
||||
/* TRIM/DISCARD: for now, always use the helper function
|
||||
* blkdev_issue_zeroout(..., discard=true).
|
||||
@@ -3751,8 +3750,7 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in
|
||||
drbd_info(connection, "peer data-integrity-alg: %s\n",
|
||||
integrity_alg[0] ? integrity_alg : "(none)");
|
||||
|
||||
synchronize_rcu();
|
||||
kfree(old_net_conf);
|
||||
kvfree_rcu(old_net_conf);
|
||||
return 0;
|
||||
|
||||
disconnect_rcu_unlock:
|
||||
@@ -3903,7 +3901,6 @@ static int receive_SyncParam(struct drbd_connection *connection, struct packet_i
|
||||
drbd_err(device, "verify-alg of wrong size, "
|
||||
"peer wants %u, accepting only up to %u byte\n",
|
||||
data_size, SHARED_SECRET_MAX);
|
||||
err = -EIO;
|
||||
goto reconnect;
|
||||
}
|
||||
|
||||
@@ -4121,8 +4118,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
|
||||
|
||||
rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
|
||||
mutex_unlock(&connection->resource->conf_update);
|
||||
synchronize_rcu();
|
||||
kfree(old_disk_conf);
|
||||
kvfree_rcu(old_disk_conf);
|
||||
|
||||
drbd_info(device, "Peer sets u_size to %lu sectors (old: %lu)\n",
|
||||
(unsigned long)p_usize, (unsigned long)my_usize);
|
||||
|
||||
@@ -922,7 +922,7 @@ static bool remote_due_to_read_balancing(struct drbd_device *device, sector_t se
|
||||
|
||||
switch (rbm) {
|
||||
case RB_CONGESTED_REMOTE:
|
||||
return 0;
|
||||
return false;
|
||||
case RB_LEAST_PENDING:
|
||||
return atomic_read(&device->local_cnt) >
|
||||
atomic_read(&device->ap_pending_cnt) + atomic_read(&device->rs_pending_cnt);
|
||||
|
||||
@@ -2071,8 +2071,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
|
||||
conn_free_crypto(connection);
|
||||
mutex_unlock(&connection->resource->conf_update);
|
||||
|
||||
synchronize_rcu();
|
||||
kfree(old_conf);
|
||||
kvfree_rcu(old_conf);
|
||||
}
|
||||
|
||||
if (ns_max.susp_fen) {
|
||||
|
||||
@@ -1030,7 +1030,7 @@ static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_
|
||||
{
|
||||
if (drbd_peer_req_has_active_page(peer_req)) {
|
||||
/* This might happen if sendpage() has not finished */
|
||||
int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
|
||||
int i = PFN_UP(peer_req->i.size);
|
||||
atomic_add(i, &device->pp_in_use_by_net);
|
||||
atomic_sub(i, &device->pp_in_use);
|
||||
spin_lock_irq(&device->resource->req_lock);
|
||||
|
||||
@@ -1,54 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* linux/drivers/block/loop.c
|
||||
*
|
||||
* Written by Theodore Ts'o, 3/29/93
|
||||
*
|
||||
* Copyright 1993 by Theodore Ts'o. Redistribution of this file is
|
||||
* permitted under the GNU General Public License.
|
||||
*
|
||||
* DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993
|
||||
* more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996
|
||||
*
|
||||
* Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
|
||||
* Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
|
||||
*
|
||||
* Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997
|
||||
*
|
||||
* Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998
|
||||
*
|
||||
* Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
|
||||
*
|
||||
* Loadable modules and other fixes by AK, 1998
|
||||
*
|
||||
* Make real block number available to downstream transfer functions, enables
|
||||
* CBC (and relatives) mode encryption requiring unique IVs per data block.
|
||||
* Reed H. Petty, rhp@draper.net
|
||||
*
|
||||
* Maximum number of loop devices now dynamic via max_loop module parameter.
|
||||
* Russell Kroll <rkroll@exploits.org> 19990701
|
||||
*
|
||||
* Maximum number of loop devices when compiled-in now selectable by passing
|
||||
* max_loop=<1-255> to the kernel on boot.
|
||||
* Erik I. Bolsø, <eriki@himolde.no>, Oct 31, 1999
|
||||
*
|
||||
* Completely rewrite request handling to be make_request_fn style and
|
||||
* non blocking, pushing work to a helper thread. Lots of fixes from
|
||||
* Al Viro too.
|
||||
* Jens Axboe <axboe@suse.de>, Nov 2000
|
||||
*
|
||||
* Support up to 256 loop devices
|
||||
* Heinz Mauelshagen <mge@sistina.com>, Feb 2002
|
||||
*
|
||||
* Support for falling back on the write file operation when the address space
|
||||
* operations write_begin is not available on the backing filesystem.
|
||||
* Anton Altaparmakov, 16 Feb 2005
|
||||
*
|
||||
* Still To Fix:
|
||||
* - Advisory locking is ignored here.
|
||||
* - Should use an own CAP_* category instead of CAP_SYS_ADMIN
|
||||
*
|
||||
* Copyright 1993 by Theodore Ts'o.
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/sched.h>
|
||||
@@ -59,7 +12,6 @@
|
||||
#include <linux/errno.h>
|
||||
#include <linux/major.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blkpg.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/swap.h>
|
||||
@@ -80,10 +32,62 @@
|
||||
#include <linux/blk-cgroup.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/statfs.h>
|
||||
|
||||
#include "loop.h"
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <uapi/linux/loop.h>
|
||||
|
||||
/* Possible states of device */
|
||||
enum {
|
||||
Lo_unbound,
|
||||
Lo_bound,
|
||||
Lo_rundown,
|
||||
Lo_deleting,
|
||||
};
|
||||
|
||||
struct loop_func_table;
|
||||
|
||||
struct loop_device {
|
||||
int lo_number;
|
||||
loff_t lo_offset;
|
||||
loff_t lo_sizelimit;
|
||||
int lo_flags;
|
||||
char lo_file_name[LO_NAME_SIZE];
|
||||
|
||||
struct file * lo_backing_file;
|
||||
struct block_device *lo_device;
|
||||
|
||||
gfp_t old_gfp_mask;
|
||||
|
||||
spinlock_t lo_lock;
|
||||
int lo_state;
|
||||
spinlock_t lo_work_lock;
|
||||
struct workqueue_struct *workqueue;
|
||||
struct work_struct rootcg_work;
|
||||
struct list_head rootcg_cmd_list;
|
||||
struct list_head idle_worker_list;
|
||||
struct rb_root worker_tree;
|
||||
struct timer_list timer;
|
||||
bool use_dio;
|
||||
bool sysfs_inited;
|
||||
|
||||
struct request_queue *lo_queue;
|
||||
struct blk_mq_tag_set tag_set;
|
||||
struct gendisk *lo_disk;
|
||||
struct mutex lo_mutex;
|
||||
bool idr_visible;
|
||||
};
|
||||
|
||||
struct loop_cmd {
|
||||
struct list_head list_entry;
|
||||
bool use_aio; /* use AIO interface to handle I/O */
|
||||
atomic_t ref; /* only for aio */
|
||||
long ret;
|
||||
struct kiocb iocb;
|
||||
struct bio_vec *bvec;
|
||||
struct cgroup_subsys_state *blkcg_css;
|
||||
struct cgroup_subsys_state *memcg_css;
|
||||
};
|
||||
|
||||
#define LOOP_IDLE_WORKER_TIMEOUT (60 * HZ)
|
||||
#define LOOP_DEFAULT_HW_Q_DEPTH (128)
|
||||
@@ -314,15 +318,12 @@ static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos,
|
||||
|
||||
mode |= FALLOC_FL_KEEP_SIZE;
|
||||
|
||||
if (!blk_queue_discard(lo->lo_queue)) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto out;
|
||||
}
|
||||
if (!bdev_max_discard_sectors(lo->lo_device))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
ret = file->f_op->fallocate(file, mode, pos, blk_rq_bytes(rq));
|
||||
if (unlikely(ret && ret != -EINVAL && ret != -EOPNOTSUPP))
|
||||
ret = -EIO;
|
||||
out:
|
||||
return -EIO;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -572,6 +573,10 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
|
||||
|
||||
if (!file)
|
||||
return -EBADF;
|
||||
|
||||
/* suppress uevents while reconfiguring the device */
|
||||
dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
|
||||
|
||||
is_loop = is_loop_device(file);
|
||||
error = loop_global_lock_killable(lo, is_loop);
|
||||
if (error)
|
||||
@@ -626,13 +631,18 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
|
||||
fput(old_file);
|
||||
if (partscan)
|
||||
loop_reread_partitions(lo);
|
||||
return 0;
|
||||
|
||||
error = 0;
|
||||
done:
|
||||
/* enable and uncork uevent now that we are done */
|
||||
dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
|
||||
return error;
|
||||
|
||||
out_err:
|
||||
loop_global_unlock(lo, is_loop);
|
||||
out_putf:
|
||||
fput(file);
|
||||
return error;
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* loop sysfs attributes */
|
||||
@@ -762,7 +772,7 @@ static void loop_config_discard(struct loop_device *lo)
|
||||
struct request_queue *backingq = bdev_get_queue(I_BDEV(inode));
|
||||
|
||||
max_discard_sectors = backingq->limits.max_write_zeroes_sectors;
|
||||
granularity = backingq->limits.discard_granularity ?:
|
||||
granularity = bdev_discard_granularity(I_BDEV(inode)) ?:
|
||||
queue_physical_block_size(backingq);
|
||||
|
||||
/*
|
||||
@@ -787,14 +797,11 @@ static void loop_config_discard(struct loop_device *lo)
|
||||
q->limits.discard_granularity = granularity;
|
||||
blk_queue_max_discard_sectors(q, max_discard_sectors);
|
||||
blk_queue_max_write_zeroes_sectors(q, max_discard_sectors);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
|
||||
} else {
|
||||
q->limits.discard_granularity = 0;
|
||||
blk_queue_max_discard_sectors(q, 0);
|
||||
blk_queue_max_write_zeroes_sectors(q, 0);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
|
||||
}
|
||||
q->limits.discard_alignment = 0;
|
||||
}
|
||||
|
||||
struct loop_worker {
|
||||
@@ -808,8 +815,6 @@ struct loop_worker {
|
||||
};
|
||||
|
||||
static void loop_workfn(struct work_struct *work);
|
||||
static void loop_rootcg_workfn(struct work_struct *work);
|
||||
static void loop_free_idle_workers(struct timer_list *timer);
|
||||
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
static inline int queue_on_root_worker(struct cgroup_subsys_state *css)
|
||||
@@ -893,6 +898,39 @@ queue_work:
|
||||
spin_unlock_irq(&lo->lo_work_lock);
|
||||
}
|
||||
|
||||
static void loop_set_timer(struct loop_device *lo)
|
||||
{
|
||||
timer_reduce(&lo->timer, jiffies + LOOP_IDLE_WORKER_TIMEOUT);
|
||||
}
|
||||
|
||||
static void loop_free_idle_workers(struct loop_device *lo, bool delete_all)
|
||||
{
|
||||
struct loop_worker *pos, *worker;
|
||||
|
||||
spin_lock_irq(&lo->lo_work_lock);
|
||||
list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
|
||||
idle_list) {
|
||||
if (!delete_all &&
|
||||
time_is_after_jiffies(worker->last_ran_at +
|
||||
LOOP_IDLE_WORKER_TIMEOUT))
|
||||
break;
|
||||
list_del(&worker->idle_list);
|
||||
rb_erase(&worker->rb_node, &lo->worker_tree);
|
||||
css_put(worker->blkcg_css);
|
||||
kfree(worker);
|
||||
}
|
||||
if (!list_empty(&lo->idle_worker_list))
|
||||
loop_set_timer(lo);
|
||||
spin_unlock_irq(&lo->lo_work_lock);
|
||||
}
|
||||
|
||||
static void loop_free_idle_workers_timer(struct timer_list *timer)
|
||||
{
|
||||
struct loop_device *lo = container_of(timer, struct loop_device, timer);
|
||||
|
||||
return loop_free_idle_workers(lo, false);
|
||||
}
|
||||
|
||||
static void loop_update_rotational(struct loop_device *lo)
|
||||
{
|
||||
struct file *file = lo->lo_backing_file;
|
||||
@@ -903,7 +941,7 @@ static void loop_update_rotational(struct loop_device *lo)
|
||||
|
||||
/* not all filesystems (e.g. tmpfs) have a sb->s_bdev */
|
||||
if (file_bdev)
|
||||
nonrot = blk_queue_nonrot(bdev_get_queue(file_bdev));
|
||||
nonrot = bdev_nonrot(file_bdev);
|
||||
|
||||
if (nonrot)
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
|
||||
@@ -967,6 +1005,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
|
||||
/* This is safe, since we have a reference from open(). */
|
||||
__module_get(THIS_MODULE);
|
||||
|
||||
/* suppress uevents while reconfiguring the device */
|
||||
dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
|
||||
|
||||
/*
|
||||
* If we don't hold exclusive handle for the device, upgrade to it
|
||||
* here to avoid changing device under exclusive owner.
|
||||
@@ -1011,24 +1052,19 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
|
||||
!file->f_op->write_iter)
|
||||
lo->lo_flags |= LO_FLAGS_READ_ONLY;
|
||||
|
||||
lo->workqueue = alloc_workqueue("loop%d",
|
||||
WQ_UNBOUND | WQ_FREEZABLE,
|
||||
0,
|
||||
lo->lo_number);
|
||||
if (!lo->workqueue) {
|
||||
error = -ENOMEM;
|
||||
goto out_unlock;
|
||||
lo->workqueue = alloc_workqueue("loop%d",
|
||||
WQ_UNBOUND | WQ_FREEZABLE,
|
||||
0, lo->lo_number);
|
||||
if (!lo->workqueue) {
|
||||
error = -ENOMEM;
|
||||
goto out_unlock;
|
||||
}
|
||||
}
|
||||
|
||||
disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
|
||||
set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
|
||||
|
||||
INIT_WORK(&lo->rootcg_work, loop_rootcg_workfn);
|
||||
INIT_LIST_HEAD(&lo->rootcg_cmd_list);
|
||||
INIT_LIST_HEAD(&lo->idle_worker_list);
|
||||
lo->worker_tree = RB_ROOT;
|
||||
timer_setup(&lo->timer, loop_free_idle_workers,
|
||||
TIMER_DEFERRABLE);
|
||||
lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO;
|
||||
lo->lo_device = bdev;
|
||||
lo->lo_backing_file = file;
|
||||
@@ -1073,7 +1109,12 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
|
||||
loop_reread_partitions(lo);
|
||||
if (!(mode & FMODE_EXCL))
|
||||
bd_abort_claiming(bdev, loop_configure);
|
||||
return 0;
|
||||
|
||||
error = 0;
|
||||
done:
|
||||
/* enable and uncork uevent now that we are done */
|
||||
dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
|
||||
return error;
|
||||
|
||||
out_unlock:
|
||||
loop_global_unlock(lo, is_loop);
|
||||
@@ -1084,53 +1125,24 @@ out_putf:
|
||||
fput(file);
|
||||
/* This is safe: open() is still holding a reference. */
|
||||
module_put(THIS_MODULE);
|
||||
return error;
|
||||
goto done;
|
||||
}
|
||||
|
||||
static void __loop_clr_fd(struct loop_device *lo, bool release)
|
||||
{
|
||||
struct file *filp;
|
||||
gfp_t gfp = lo->old_gfp_mask;
|
||||
struct loop_worker *pos, *worker;
|
||||
|
||||
/*
|
||||
* Flush loop_configure() and loop_change_fd(). It is acceptable for
|
||||
* loop_validate_file() to succeed, for actual clear operation has not
|
||||
* started yet.
|
||||
*/
|
||||
mutex_lock(&loop_validate_mutex);
|
||||
mutex_unlock(&loop_validate_mutex);
|
||||
/*
|
||||
* loop_validate_file() now fails because l->lo_state != Lo_bound
|
||||
* became visible.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Since this function is called upon "ioctl(LOOP_CLR_FD)" xor "close()
|
||||
* after ioctl(LOOP_CLR_FD)", it is a sign of something going wrong if
|
||||
* lo->lo_state has changed while waiting for lo->lo_mutex.
|
||||
*/
|
||||
mutex_lock(&lo->lo_mutex);
|
||||
BUG_ON(lo->lo_state != Lo_rundown);
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
|
||||
if (test_bit(QUEUE_FLAG_WC, &lo->lo_queue->queue_flags))
|
||||
blk_queue_write_cache(lo->lo_queue, false, false);
|
||||
|
||||
/* freeze request queue during the transition */
|
||||
blk_mq_freeze_queue(lo->lo_queue);
|
||||
|
||||
destroy_workqueue(lo->workqueue);
|
||||
spin_lock_irq(&lo->lo_work_lock);
|
||||
list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
|
||||
idle_list) {
|
||||
list_del(&worker->idle_list);
|
||||
rb_erase(&worker->rb_node, &lo->worker_tree);
|
||||
css_put(worker->blkcg_css);
|
||||
kfree(worker);
|
||||
}
|
||||
spin_unlock_irq(&lo->lo_work_lock);
|
||||
del_timer_sync(&lo->timer);
|
||||
/*
|
||||
* Freeze the request queue when unbinding on a live file descriptor and
|
||||
* thus an open device. When called from ->release we are guaranteed
|
||||
* that there is no I/O in progress already.
|
||||
*/
|
||||
if (!release)
|
||||
blk_mq_freeze_queue(lo->lo_queue);
|
||||
|
||||
spin_lock_irq(&lo->lo_lock);
|
||||
filp = lo->lo_backing_file;
|
||||
@@ -1151,7 +1163,8 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
|
||||
mapping_set_gfp_mask(filp->f_mapping, gfp);
|
||||
/* This is safe: open() is still holding a reference. */
|
||||
module_put(THIS_MODULE);
|
||||
blk_mq_unfreeze_queue(lo->lo_queue);
|
||||
if (!release)
|
||||
blk_mq_unfreeze_queue(lo->lo_queue);
|
||||
|
||||
disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
|
||||
|
||||
@@ -1202,11 +1215,20 @@ static int loop_clr_fd(struct loop_device *lo)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = mutex_lock_killable(&lo->lo_mutex);
|
||||
/*
|
||||
* Since lo_ioctl() is called without locks held, it is possible that
|
||||
* loop_configure()/loop_change_fd() and loop_clr_fd() run in parallel.
|
||||
*
|
||||
* Therefore, use global lock when setting Lo_rundown state in order to
|
||||
* make sure that loop_validate_file() will fail if the "struct file"
|
||||
* which loop_configure()/loop_change_fd() found via fget() was this
|
||||
* loop device.
|
||||
*/
|
||||
err = loop_global_lock_killable(lo, true);
|
||||
if (err)
|
||||
return err;
|
||||
if (lo->lo_state != Lo_bound) {
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
loop_global_unlock(lo, true);
|
||||
return -ENXIO;
|
||||
}
|
||||
/*
|
||||
@@ -1219,13 +1241,13 @@ static int loop_clr_fd(struct loop_device *lo)
|
||||
* <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d
|
||||
* command to fail with EBUSY.
|
||||
*/
|
||||
if (atomic_read(&lo->lo_refcnt) > 1) {
|
||||
if (disk_openers(lo->lo_disk) > 1) {
|
||||
lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
loop_global_unlock(lo, true);
|
||||
return 0;
|
||||
}
|
||||
lo->lo_state = Lo_rundown;
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
loop_global_unlock(lo, true);
|
||||
|
||||
__loop_clr_fd(lo, false);
|
||||
return 0;
|
||||
@@ -1257,15 +1279,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
|
||||
/* I/O need to be drained during transfer transition */
|
||||
blk_mq_freeze_queue(lo->lo_queue);
|
||||
|
||||
if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) {
|
||||
/* If any pages were dirtied after invalidate_bdev(), try again */
|
||||
err = -EAGAIN;
|
||||
pr_warn("%s: loop%d (%s) still has dirty pages (nrpages=%lu)\n",
|
||||
__func__, lo->lo_number, lo->lo_file_name,
|
||||
lo->lo_device->bd_inode->i_mapping->nrpages);
|
||||
goto out_unfreeze;
|
||||
}
|
||||
|
||||
prev_lo_flags = lo->lo_flags;
|
||||
|
||||
err = loop_set_status_from_info(lo, info);
|
||||
@@ -1476,21 +1489,10 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
|
||||
invalidate_bdev(lo->lo_device);
|
||||
|
||||
blk_mq_freeze_queue(lo->lo_queue);
|
||||
|
||||
/* invalidate_bdev should have truncated all the pages */
|
||||
if (lo->lo_device->bd_inode->i_mapping->nrpages) {
|
||||
err = -EAGAIN;
|
||||
pr_warn("%s: loop%d (%s) still has dirty pages (nrpages=%lu)\n",
|
||||
__func__, lo->lo_number, lo->lo_file_name,
|
||||
lo->lo_device->bd_inode->i_mapping->nrpages);
|
||||
goto out_unfreeze;
|
||||
}
|
||||
|
||||
blk_queue_logical_block_size(lo->lo_queue, arg);
|
||||
blk_queue_physical_block_size(lo->lo_queue, arg);
|
||||
blk_queue_io_min(lo->lo_queue, arg);
|
||||
loop_update_dio(lo);
|
||||
out_unfreeze:
|
||||
blk_mq_unfreeze_queue(lo->lo_queue);
|
||||
|
||||
return err;
|
||||
@@ -1720,33 +1722,15 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
}
|
||||
#endif
|
||||
|
||||
static int lo_open(struct block_device *bdev, fmode_t mode)
|
||||
{
|
||||
struct loop_device *lo = bdev->bd_disk->private_data;
|
||||
int err;
|
||||
|
||||
err = mutex_lock_killable(&lo->lo_mutex);
|
||||
if (err)
|
||||
return err;
|
||||
if (lo->lo_state == Lo_deleting)
|
||||
err = -ENXIO;
|
||||
else
|
||||
atomic_inc(&lo->lo_refcnt);
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void lo_release(struct gendisk *disk, fmode_t mode)
|
||||
{
|
||||
struct loop_device *lo = disk->private_data;
|
||||
|
||||
mutex_lock(&lo->lo_mutex);
|
||||
if (atomic_dec_return(&lo->lo_refcnt))
|
||||
goto out_unlock;
|
||||
if (disk_openers(disk) > 0)
|
||||
return;
|
||||
|
||||
if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
|
||||
if (lo->lo_state != Lo_bound)
|
||||
goto out_unlock;
|
||||
mutex_lock(&lo->lo_mutex);
|
||||
if (lo->lo_state == Lo_bound && (lo->lo_flags & LO_FLAGS_AUTOCLEAR)) {
|
||||
lo->lo_state = Lo_rundown;
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
/*
|
||||
@@ -1755,27 +1739,30 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
|
||||
*/
|
||||
__loop_clr_fd(lo, true);
|
||||
return;
|
||||
} else if (lo->lo_state == Lo_bound) {
|
||||
/*
|
||||
* Otherwise keep thread (if running) and config,
|
||||
* but flush possible ongoing bios in thread.
|
||||
*/
|
||||
blk_mq_freeze_queue(lo->lo_queue);
|
||||
blk_mq_unfreeze_queue(lo->lo_queue);
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
}
|
||||
|
||||
static void lo_free_disk(struct gendisk *disk)
|
||||
{
|
||||
struct loop_device *lo = disk->private_data;
|
||||
|
||||
if (lo->workqueue)
|
||||
destroy_workqueue(lo->workqueue);
|
||||
loop_free_idle_workers(lo, true);
|
||||
del_timer_sync(&lo->timer);
|
||||
mutex_destroy(&lo->lo_mutex);
|
||||
kfree(lo);
|
||||
}
|
||||
|
||||
static const struct block_device_operations lo_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = lo_open,
|
||||
.release = lo_release,
|
||||
.ioctl = lo_ioctl,
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = lo_compat_ioctl,
|
||||
#endif
|
||||
.free_disk = lo_free_disk,
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -1834,12 +1821,14 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
cmd->blkcg_css = NULL;
|
||||
cmd->memcg_css = NULL;
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
if (rq->bio && rq->bio->bi_blkg) {
|
||||
cmd->blkcg_css = &bio_blkcg(rq->bio)->css;
|
||||
if (rq->bio) {
|
||||
cmd->blkcg_css = bio_blkcg_css(rq->bio);
|
||||
#ifdef CONFIG_MEMCG
|
||||
cmd->memcg_css =
|
||||
cgroup_get_e_css(cmd->blkcg_css->cgroup,
|
||||
&memory_cgrp_subsys);
|
||||
if (cmd->blkcg_css) {
|
||||
cmd->memcg_css =
|
||||
cgroup_get_e_css(cmd->blkcg_css->cgroup,
|
||||
&memory_cgrp_subsys);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
@@ -1888,11 +1877,6 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
|
||||
}
|
||||
}
|
||||
|
||||
static void loop_set_timer(struct loop_device *lo)
|
||||
{
|
||||
timer_reduce(&lo->timer, jiffies + LOOP_IDLE_WORKER_TIMEOUT);
|
||||
}
|
||||
|
||||
static void loop_process_work(struct loop_worker *worker,
|
||||
struct list_head *cmd_list, struct loop_device *lo)
|
||||
{
|
||||
@@ -1941,27 +1925,6 @@ static void loop_rootcg_workfn(struct work_struct *work)
|
||||
loop_process_work(NULL, &lo->rootcg_cmd_list, lo);
|
||||
}
|
||||
|
||||
static void loop_free_idle_workers(struct timer_list *timer)
|
||||
{
|
||||
struct loop_device *lo = container_of(timer, struct loop_device, timer);
|
||||
struct loop_worker *pos, *worker;
|
||||
|
||||
spin_lock_irq(&lo->lo_work_lock);
|
||||
list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
|
||||
idle_list) {
|
||||
if (time_is_after_jiffies(worker->last_ran_at +
|
||||
LOOP_IDLE_WORKER_TIMEOUT))
|
||||
break;
|
||||
list_del(&worker->idle_list);
|
||||
rb_erase(&worker->rb_node, &lo->worker_tree);
|
||||
css_put(worker->blkcg_css);
|
||||
kfree(worker);
|
||||
}
|
||||
if (!list_empty(&lo->idle_worker_list))
|
||||
loop_set_timer(lo);
|
||||
spin_unlock_irq(&lo->lo_work_lock);
|
||||
}
|
||||
|
||||
static const struct blk_mq_ops loop_mq_ops = {
|
||||
.queue_rq = loop_queue_rq,
|
||||
.complete = lo_complete_rq,
|
||||
@@ -1977,6 +1940,9 @@ static int loop_add(int i)
|
||||
lo = kzalloc(sizeof(*lo), GFP_KERNEL);
|
||||
if (!lo)
|
||||
goto out;
|
||||
lo->worker_tree = RB_ROOT;
|
||||
INIT_LIST_HEAD(&lo->idle_worker_list);
|
||||
timer_setup(&lo->timer, loop_free_idle_workers_timer, TIMER_DEFERRABLE);
|
||||
lo->lo_state = Lo_unbound;
|
||||
|
||||
err = mutex_lock_killable(&loop_ctl_mutex);
|
||||
@@ -2046,11 +2012,12 @@ static int loop_add(int i)
|
||||
*/
|
||||
if (!part_shift)
|
||||
disk->flags |= GENHD_FL_NO_PART;
|
||||
atomic_set(&lo->lo_refcnt, 0);
|
||||
mutex_init(&lo->lo_mutex);
|
||||
lo->lo_number = i;
|
||||
spin_lock_init(&lo->lo_lock);
|
||||
spin_lock_init(&lo->lo_work_lock);
|
||||
INIT_WORK(&lo->rootcg_work, loop_rootcg_workfn);
|
||||
INIT_LIST_HEAD(&lo->rootcg_cmd_list);
|
||||
disk->major = LOOP_MAJOR;
|
||||
disk->first_minor = i << part_shift;
|
||||
disk->minors = 1 << part_shift;
|
||||
@@ -2090,15 +2057,14 @@ static void loop_remove(struct loop_device *lo)
|
||||
{
|
||||
/* Make this loop device unreachable from pathname. */
|
||||
del_gendisk(lo->lo_disk);
|
||||
blk_cleanup_disk(lo->lo_disk);
|
||||
blk_cleanup_queue(lo->lo_disk->queue);
|
||||
blk_mq_free_tag_set(&lo->tag_set);
|
||||
|
||||
mutex_lock(&loop_ctl_mutex);
|
||||
idr_remove(&loop_index_idr, lo->lo_number);
|
||||
mutex_unlock(&loop_ctl_mutex);
|
||||
/* There is no route which can find this loop device. */
|
||||
mutex_destroy(&lo->lo_mutex);
|
||||
kfree(lo);
|
||||
|
||||
put_disk(lo->lo_disk);
|
||||
}
|
||||
|
||||
static void loop_probe(dev_t dev)
|
||||
@@ -2137,13 +2103,12 @@ static int loop_control_remove(int idx)
|
||||
ret = mutex_lock_killable(&lo->lo_mutex);
|
||||
if (ret)
|
||||
goto mark_visible;
|
||||
if (lo->lo_state != Lo_unbound ||
|
||||
atomic_read(&lo->lo_refcnt) > 0) {
|
||||
if (lo->lo_state != Lo_unbound || disk_openers(lo->lo_disk) > 0) {
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
ret = -EBUSY;
|
||||
goto mark_visible;
|
||||
}
|
||||
/* Mark this loop device no longer open()-able. */
|
||||
/* Mark this loop device as no more bound, but not quite unbound yet */
|
||||
lo->lo_state = Lo_deleting;
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
|
||||
|
||||
@@ -1,72 +0,0 @@
|
||||
/*
|
||||
* loop.h
|
||||
*
|
||||
* Written by Theodore Ts'o, 3/29/93.
|
||||
*
|
||||
* Copyright 1993 by Theodore Ts'o. Redistribution of this file is
|
||||
* permitted under the GNU General Public License.
|
||||
*/
|
||||
#ifndef _LINUX_LOOP_H
|
||||
#define _LINUX_LOOP_H
|
||||
|
||||
#include <linux/bio.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <uapi/linux/loop.h>
|
||||
|
||||
/* Possible states of device */
|
||||
enum {
|
||||
Lo_unbound,
|
||||
Lo_bound,
|
||||
Lo_rundown,
|
||||
Lo_deleting,
|
||||
};
|
||||
|
||||
struct loop_func_table;
|
||||
|
||||
struct loop_device {
|
||||
int lo_number;
|
||||
atomic_t lo_refcnt;
|
||||
loff_t lo_offset;
|
||||
loff_t lo_sizelimit;
|
||||
int lo_flags;
|
||||
char lo_file_name[LO_NAME_SIZE];
|
||||
|
||||
struct file * lo_backing_file;
|
||||
struct block_device *lo_device;
|
||||
|
||||
gfp_t old_gfp_mask;
|
||||
|
||||
spinlock_t lo_lock;
|
||||
int lo_state;
|
||||
spinlock_t lo_work_lock;
|
||||
struct workqueue_struct *workqueue;
|
||||
struct work_struct rootcg_work;
|
||||
struct list_head rootcg_cmd_list;
|
||||
struct list_head idle_worker_list;
|
||||
struct rb_root worker_tree;
|
||||
struct timer_list timer;
|
||||
bool use_dio;
|
||||
bool sysfs_inited;
|
||||
|
||||
struct request_queue *lo_queue;
|
||||
struct blk_mq_tag_set tag_set;
|
||||
struct gendisk *lo_disk;
|
||||
struct mutex lo_mutex;
|
||||
bool idr_visible;
|
||||
};
|
||||
|
||||
struct loop_cmd {
|
||||
struct list_head list_entry;
|
||||
bool use_aio; /* use AIO interface to handle I/O */
|
||||
atomic_t ref; /* only for aio */
|
||||
long ret;
|
||||
struct kiocb iocb;
|
||||
struct bio_vec *bvec;
|
||||
struct cgroup_subsys_state *blkcg_css;
|
||||
struct cgroup_subsys_state *memcg_css;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -2729,7 +2729,7 @@ static int mtip_dma_alloc(struct driver_data *dd)
|
||||
{
|
||||
struct mtip_port *port = dd->port;
|
||||
|
||||
/* Allocate dma memory for RX Fis, Identify, and Sector Bufffer */
|
||||
/* Allocate dma memory for RX Fis, Identify, and Sector Buffer */
|
||||
port->block1 =
|
||||
dma_alloc_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
|
||||
&port->block1_dma, GFP_KERNEL);
|
||||
|
||||
@@ -333,7 +333,6 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
|
||||
|
||||
if (nbd->config->flags & NBD_FLAG_SEND_TRIM) {
|
||||
nbd->disk->queue->limits.discard_granularity = blksize;
|
||||
nbd->disk->queue->limits.discard_alignment = blksize;
|
||||
blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
|
||||
}
|
||||
blk_queue_logical_block_size(nbd->disk->queue, blksize);
|
||||
@@ -947,11 +946,15 @@ static int wait_for_reconnect(struct nbd_device *nbd)
|
||||
struct nbd_config *config = nbd->config;
|
||||
if (!config->dead_conn_timeout)
|
||||
return 0;
|
||||
if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
|
||||
|
||||
if (!wait_event_timeout(config->conn_wait,
|
||||
test_bit(NBD_RT_DISCONNECTED,
|
||||
&config->runtime_flags) ||
|
||||
atomic_read(&config->live_connections) > 0,
|
||||
config->dead_conn_timeout))
|
||||
return 0;
|
||||
return wait_event_timeout(config->conn_wait,
|
||||
atomic_read(&config->live_connections) > 0,
|
||||
config->dead_conn_timeout) > 0;
|
||||
|
||||
return !test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags);
|
||||
}
|
||||
|
||||
static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
|
||||
@@ -1217,11 +1220,11 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
static void nbd_bdev_reset(struct block_device *bdev)
|
||||
static void nbd_bdev_reset(struct nbd_device *nbd)
|
||||
{
|
||||
if (bdev->bd_openers > 1)
|
||||
if (disk_openers(nbd->disk) > 1)
|
||||
return;
|
||||
set_capacity(bdev->bd_disk, 0);
|
||||
set_capacity(nbd->disk, 0);
|
||||
}
|
||||
|
||||
static void nbd_parse_flags(struct nbd_device *nbd)
|
||||
@@ -1231,8 +1234,6 @@ static void nbd_parse_flags(struct nbd_device *nbd)
|
||||
set_disk_ro(nbd->disk, true);
|
||||
else
|
||||
set_disk_ro(nbd->disk, false);
|
||||
if (config->flags & NBD_FLAG_SEND_TRIM)
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, nbd->disk->queue);
|
||||
if (config->flags & NBD_FLAG_SEND_FLUSH) {
|
||||
if (config->flags & NBD_FLAG_SEND_FUA)
|
||||
blk_queue_write_cache(nbd->disk->queue, true, true);
|
||||
@@ -1318,9 +1319,7 @@ static void nbd_config_put(struct nbd_device *nbd)
|
||||
|
||||
nbd->tag_set.timeout = 0;
|
||||
nbd->disk->queue->limits.discard_granularity = 0;
|
||||
nbd->disk->queue->limits.discard_alignment = 0;
|
||||
blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue);
|
||||
blk_queue_max_discard_sectors(nbd->disk->queue, 0);
|
||||
|
||||
mutex_unlock(&nbd->config_lock);
|
||||
nbd_put(nbd);
|
||||
@@ -1389,7 +1388,7 @@ static int nbd_start_device(struct nbd_device *nbd)
|
||||
return nbd_set_size(nbd, config->bytesize, nbd_blksize(config));
|
||||
}
|
||||
|
||||
static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev)
|
||||
static int nbd_start_device_ioctl(struct nbd_device *nbd)
|
||||
{
|
||||
struct nbd_config *config = nbd->config;
|
||||
int ret;
|
||||
@@ -1408,7 +1407,7 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
|
||||
flush_workqueue(nbd->recv_workq);
|
||||
|
||||
mutex_lock(&nbd->config_lock);
|
||||
nbd_bdev_reset(bdev);
|
||||
nbd_bdev_reset(nbd);
|
||||
/* user requested, ignore socket errors */
|
||||
if (test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags))
|
||||
ret = 0;
|
||||
@@ -1422,7 +1421,7 @@ static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
|
||||
{
|
||||
sock_shutdown(nbd);
|
||||
__invalidate_device(bdev, true);
|
||||
nbd_bdev_reset(bdev);
|
||||
nbd_bdev_reset(nbd);
|
||||
if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
|
||||
&nbd->config->runtime_flags))
|
||||
nbd_config_put(nbd);
|
||||
@@ -1468,7 +1467,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
|
||||
config->flags = arg;
|
||||
return 0;
|
||||
case NBD_DO_IT:
|
||||
return nbd_start_device_ioctl(nbd, bdev);
|
||||
return nbd_start_device_ioctl(nbd);
|
||||
case NBD_CLEAR_QUE:
|
||||
/*
|
||||
* This is for compatibility only. The queue is always cleared
|
||||
@@ -1579,7 +1578,7 @@ static void nbd_release(struct gendisk *disk, fmode_t mode)
|
||||
struct nbd_device *nbd = disk->private_data;
|
||||
|
||||
if (test_bit(NBD_RT_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
|
||||
disk->part0->bd_openers == 0)
|
||||
disk_openers(disk) == 0)
|
||||
nbd_disconnect_and_put(nbd);
|
||||
|
||||
nbd_config_put(nbd);
|
||||
@@ -1784,7 +1783,6 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
|
||||
disk->queue->limits.discard_granularity = 0;
|
||||
disk->queue->limits.discard_alignment = 0;
|
||||
blk_queue_max_discard_sectors(disk->queue, 0);
|
||||
blk_queue_max_segment_size(disk->queue, UINT_MAX);
|
||||
blk_queue_max_segments(disk->queue, USHRT_MAX);
|
||||
@@ -2082,6 +2080,7 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd)
|
||||
mutex_lock(&nbd->config_lock);
|
||||
nbd_disconnect(nbd);
|
||||
sock_shutdown(nbd);
|
||||
wake_up(&nbd->config->conn_wait);
|
||||
/*
|
||||
* Make sure recv thread has finished, we can safely call nbd_clear_que()
|
||||
* to cancel the inflight I/Os.
|
||||
|
||||
@@ -11,6 +11,9 @@
|
||||
#include <linux/init.h>
|
||||
#include "null_blk.h"
|
||||
|
||||
#undef pr_fmt
|
||||
#define pr_fmt(fmt) "null_blk: " fmt
|
||||
|
||||
#define FREE_BATCH 16
|
||||
|
||||
#define TICKS_PER_SEC 50ULL
|
||||
@@ -232,6 +235,7 @@ static struct nullb_device *null_alloc_dev(void);
|
||||
static void null_free_dev(struct nullb_device *dev);
|
||||
static void null_del_dev(struct nullb *nullb);
|
||||
static int null_add_dev(struct nullb_device *dev);
|
||||
static struct nullb *null_find_dev_by_name(const char *name);
|
||||
static void null_free_device_storage(struct nullb_device *dev, bool is_cache);
|
||||
|
||||
static inline struct nullb_device *to_nullb_device(struct config_item *item)
|
||||
@@ -560,6 +564,9 @@ config_item *nullb_group_make_item(struct config_group *group, const char *name)
|
||||
{
|
||||
struct nullb_device *dev;
|
||||
|
||||
if (null_find_dev_by_name(name))
|
||||
return ERR_PTR(-EEXIST);
|
||||
|
||||
dev = null_alloc_dev();
|
||||
if (!dev)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
@@ -1765,9 +1772,7 @@ static void null_config_discard(struct nullb *nullb)
|
||||
}
|
||||
|
||||
nullb->q->limits.discard_granularity = nullb->dev->blocksize;
|
||||
nullb->q->limits.discard_alignment = nullb->dev->blocksize;
|
||||
blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, nullb->q);
|
||||
}
|
||||
|
||||
static const struct block_device_operations null_bio_ops = {
|
||||
@@ -2061,7 +2066,13 @@ static int null_add_dev(struct nullb_device *dev)
|
||||
|
||||
null_config_discard(nullb);
|
||||
|
||||
sprintf(nullb->disk_name, "nullb%d", nullb->index);
|
||||
if (config_item_name(&dev->item)) {
|
||||
/* Use configfs dir name as the device name */
|
||||
snprintf(nullb->disk_name, sizeof(nullb->disk_name),
|
||||
"%s", config_item_name(&dev->item));
|
||||
} else {
|
||||
sprintf(nullb->disk_name, "nullb%d", nullb->index);
|
||||
}
|
||||
|
||||
rv = null_gendisk_register(nullb);
|
||||
if (rv)
|
||||
@@ -2071,6 +2082,8 @@ static int null_add_dev(struct nullb_device *dev)
|
||||
list_add_tail(&nullb->list, &nullb_list);
|
||||
mutex_unlock(&lock);
|
||||
|
||||
pr_info("disk %s created\n", nullb->disk_name);
|
||||
|
||||
return 0;
|
||||
out_cleanup_zone:
|
||||
null_free_zoned_dev(dev);
|
||||
@@ -2088,12 +2101,53 @@ out:
|
||||
return rv;
|
||||
}
|
||||
|
||||
static struct nullb *null_find_dev_by_name(const char *name)
|
||||
{
|
||||
struct nullb *nullb = NULL, *nb;
|
||||
|
||||
mutex_lock(&lock);
|
||||
list_for_each_entry(nb, &nullb_list, list) {
|
||||
if (strcmp(nb->disk_name, name) == 0) {
|
||||
nullb = nb;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&lock);
|
||||
|
||||
return nullb;
|
||||
}
|
||||
|
||||
static int null_create_dev(void)
|
||||
{
|
||||
struct nullb_device *dev;
|
||||
int ret;
|
||||
|
||||
dev = null_alloc_dev();
|
||||
if (!dev)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = null_add_dev(dev);
|
||||
if (ret) {
|
||||
null_free_dev(dev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void null_destroy_dev(struct nullb *nullb)
|
||||
{
|
||||
struct nullb_device *dev = nullb->dev;
|
||||
|
||||
null_del_dev(nullb);
|
||||
null_free_dev(dev);
|
||||
}
|
||||
|
||||
static int __init null_init(void)
|
||||
{
|
||||
int ret = 0;
|
||||
unsigned int i;
|
||||
struct nullb *nullb;
|
||||
struct nullb_device *dev;
|
||||
|
||||
if (g_bs > PAGE_SIZE) {
|
||||
pr_warn("invalid block size\n");
|
||||
@@ -2113,19 +2167,21 @@ static int __init null_init(void)
|
||||
}
|
||||
|
||||
if (g_queue_mode == NULL_Q_RQ) {
|
||||
pr_err("legacy IO path no longer available\n");
|
||||
pr_err("legacy IO path is no longer available\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
|
||||
if (g_submit_queues != nr_online_nodes) {
|
||||
pr_warn("submit_queues param is set to %u.\n",
|
||||
nr_online_nodes);
|
||||
nr_online_nodes);
|
||||
g_submit_queues = nr_online_nodes;
|
||||
}
|
||||
} else if (g_submit_queues > nr_cpu_ids)
|
||||
} else if (g_submit_queues > nr_cpu_ids) {
|
||||
g_submit_queues = nr_cpu_ids;
|
||||
else if (g_submit_queues <= 0)
|
||||
} else if (g_submit_queues <= 0) {
|
||||
g_submit_queues = 1;
|
||||
}
|
||||
|
||||
if (g_queue_mode == NULL_Q_MQ && shared_tags) {
|
||||
ret = null_init_tag_set(NULL, &tag_set);
|
||||
@@ -2149,16 +2205,9 @@ static int __init null_init(void)
|
||||
}
|
||||
|
||||
for (i = 0; i < nr_devices; i++) {
|
||||
dev = null_alloc_dev();
|
||||
if (!dev) {
|
||||
ret = -ENOMEM;
|
||||
ret = null_create_dev();
|
||||
if (ret)
|
||||
goto err_dev;
|
||||
}
|
||||
ret = null_add_dev(dev);
|
||||
if (ret) {
|
||||
null_free_dev(dev);
|
||||
goto err_dev;
|
||||
}
|
||||
}
|
||||
|
||||
pr_info("module loaded\n");
|
||||
@@ -2167,9 +2216,7 @@ static int __init null_init(void)
|
||||
err_dev:
|
||||
while (!list_empty(&nullb_list)) {
|
||||
nullb = list_entry(nullb_list.next, struct nullb, list);
|
||||
dev = nullb->dev;
|
||||
null_del_dev(nullb);
|
||||
null_free_dev(dev);
|
||||
null_destroy_dev(nullb);
|
||||
}
|
||||
unregister_blkdev(null_major, "nullb");
|
||||
err_conf:
|
||||
@@ -2190,12 +2237,8 @@ static void __exit null_exit(void)
|
||||
|
||||
mutex_lock(&lock);
|
||||
while (!list_empty(&nullb_list)) {
|
||||
struct nullb_device *dev;
|
||||
|
||||
nullb = list_entry(nullb_list.next, struct nullb, list);
|
||||
dev = nullb->dev;
|
||||
null_del_dev(nullb);
|
||||
null_free_dev(dev);
|
||||
null_destroy_dev(nullb);
|
||||
}
|
||||
mutex_unlock(&lock);
|
||||
|
||||
|
||||
@@ -16,13 +16,15 @@
|
||||
#include <linux/mutex.h>
|
||||
|
||||
struct nullb_cmd {
|
||||
struct request *rq;
|
||||
struct bio *bio;
|
||||
union {
|
||||
struct request *rq;
|
||||
struct bio *bio;
|
||||
};
|
||||
unsigned int tag;
|
||||
blk_status_t error;
|
||||
bool fake_timeout;
|
||||
struct nullb_queue *nq;
|
||||
struct hrtimer timer;
|
||||
bool fake_timeout;
|
||||
};
|
||||
|
||||
struct nullb_queue {
|
||||
|
||||
@@ -6,6 +6,9 @@
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "trace.h"
|
||||
|
||||
#undef pr_fmt
|
||||
#define pr_fmt(fmt) "null_blk: " fmt
|
||||
|
||||
static inline sector_t mb_to_sects(unsigned long mb)
|
||||
{
|
||||
return ((sector_t)mb * SZ_1M) >> SECTOR_SHIFT;
|
||||
@@ -75,8 +78,8 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
|
||||
dev->zone_capacity = dev->zone_size;
|
||||
|
||||
if (dev->zone_capacity > dev->zone_size) {
|
||||
pr_err("null_blk: zone capacity (%lu MB) larger than zone size (%lu MB)\n",
|
||||
dev->zone_capacity, dev->zone_size);
|
||||
pr_err("zone capacity (%lu MB) larger than zone size (%lu MB)\n",
|
||||
dev->zone_capacity, dev->zone_size);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
* Theory of operation:
|
||||
*
|
||||
* At the lowest level, there is the standard driver for the CD/DVD device,
|
||||
* typically ide-cd.c or sr.c. This driver can handle read and write requests,
|
||||
* such as drivers/scsi/sr.c. This driver can handle read and write requests,
|
||||
* but it doesn't know anything about the special restrictions that apply to
|
||||
* packet writing. One restriction is that write requests must be aligned to
|
||||
* packet boundaries on the physical media, and the size of a write request
|
||||
@@ -522,7 +522,7 @@ static struct packet_data *pkt_alloc_packet_data(int frames)
|
||||
goto no_pkt;
|
||||
|
||||
pkt->frames = frames;
|
||||
pkt->w_bio = bio_kmalloc(GFP_KERNEL, frames);
|
||||
pkt->w_bio = bio_kmalloc(frames, GFP_KERNEL);
|
||||
if (!pkt->w_bio)
|
||||
goto no_bio;
|
||||
|
||||
@@ -536,27 +536,21 @@ static struct packet_data *pkt_alloc_packet_data(int frames)
|
||||
bio_list_init(&pkt->orig_bios);
|
||||
|
||||
for (i = 0; i < frames; i++) {
|
||||
struct bio *bio = bio_kmalloc(GFP_KERNEL, 1);
|
||||
if (!bio)
|
||||
pkt->r_bios[i] = bio_kmalloc(1, GFP_KERNEL);
|
||||
if (!pkt->r_bios[i])
|
||||
goto no_rd_bio;
|
||||
|
||||
pkt->r_bios[i] = bio;
|
||||
}
|
||||
|
||||
return pkt;
|
||||
|
||||
no_rd_bio:
|
||||
for (i = 0; i < frames; i++) {
|
||||
struct bio *bio = pkt->r_bios[i];
|
||||
if (bio)
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
for (i = 0; i < frames; i++)
|
||||
kfree(pkt->r_bios[i]);
|
||||
no_page:
|
||||
for (i = 0; i < frames / FRAMES_PER_PAGE; i++)
|
||||
if (pkt->pages[i])
|
||||
__free_page(pkt->pages[i]);
|
||||
bio_put(pkt->w_bio);
|
||||
kfree(pkt->w_bio);
|
||||
no_bio:
|
||||
kfree(pkt);
|
||||
no_pkt:
|
||||
@@ -570,14 +564,11 @@ static void pkt_free_packet_data(struct packet_data *pkt)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < pkt->frames; i++) {
|
||||
struct bio *bio = pkt->r_bios[i];
|
||||
if (bio)
|
||||
bio_put(bio);
|
||||
}
|
||||
for (i = 0; i < pkt->frames; i++)
|
||||
kfree(pkt->r_bios[i]);
|
||||
for (i = 0; i < pkt->frames / FRAMES_PER_PAGE; i++)
|
||||
__free_page(pkt->pages[i]);
|
||||
bio_put(pkt->w_bio);
|
||||
kfree(pkt->w_bio);
|
||||
kfree(pkt);
|
||||
}
|
||||
|
||||
@@ -951,6 +942,7 @@ static void pkt_end_io_read(struct bio *bio)
|
||||
|
||||
if (bio->bi_status)
|
||||
atomic_inc(&pkt->io_errors);
|
||||
bio_uninit(bio);
|
||||
if (atomic_dec_and_test(&pkt->io_wait)) {
|
||||
atomic_inc(&pkt->run_sm);
|
||||
wake_up(&pd->wqueue);
|
||||
@@ -968,6 +960,7 @@ static void pkt_end_io_packet_write(struct bio *bio)
|
||||
|
||||
pd->stats.pkt_ended++;
|
||||
|
||||
bio_uninit(bio);
|
||||
pkt_bio_finished(pd);
|
||||
atomic_dec(&pkt->io_wait);
|
||||
atomic_inc(&pkt->run_sm);
|
||||
@@ -1022,7 +1015,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
|
||||
continue;
|
||||
|
||||
bio = pkt->r_bios[f];
|
||||
bio_reset(bio, pd->bdev, REQ_OP_READ);
|
||||
bio_init(bio, pd->bdev, bio->bi_inline_vecs, 1, REQ_OP_READ);
|
||||
bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
|
||||
bio->bi_end_io = pkt_end_io_read;
|
||||
bio->bi_private = pkt;
|
||||
@@ -1235,7 +1228,8 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
|
||||
{
|
||||
int f;
|
||||
|
||||
bio_reset(pkt->w_bio, pd->bdev, REQ_OP_WRITE);
|
||||
bio_init(pkt->w_bio, pd->bdev, pkt->w_bio->bi_inline_vecs, pkt->frames,
|
||||
REQ_OP_WRITE);
|
||||
pkt->w_bio->bi_iter.bi_sector = pkt->sector;
|
||||
pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
|
||||
pkt->w_bio->bi_private = pkt;
|
||||
|
||||
@@ -4942,7 +4942,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
|
||||
blk_queue_io_opt(q, rbd_dev->opts->alloc_size);
|
||||
|
||||
if (rbd_dev->opts->trim) {
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
|
||||
q->limits.discard_granularity = rbd_dev->opts->alloc_size;
|
||||
blk_queue_max_discard_sectors(q, objset_bytes >> SECTOR_SHIFT);
|
||||
blk_queue_max_write_zeroes_sectors(q, objset_bytes >> SECTOR_SHIFT);
|
||||
|
||||
@@ -25,6 +25,7 @@ static int rnbd_client_major;
|
||||
static DEFINE_IDA(index_ida);
|
||||
static DEFINE_MUTEX(sess_lock);
|
||||
static LIST_HEAD(sess_list);
|
||||
static struct workqueue_struct *rnbd_clt_wq;
|
||||
|
||||
/*
|
||||
* Maximum number of partitions an instance can have.
|
||||
@@ -1364,11 +1365,9 @@ static void setup_request_queue(struct rnbd_clt_dev *dev)
|
||||
blk_queue_max_discard_sectors(dev->queue, dev->max_discard_sectors);
|
||||
dev->queue->limits.discard_granularity = dev->discard_granularity;
|
||||
dev->queue->limits.discard_alignment = dev->discard_alignment;
|
||||
if (dev->max_discard_sectors)
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, dev->queue);
|
||||
if (dev->secure_discard)
|
||||
blk_queue_flag_set(QUEUE_FLAG_SECERASE, dev->queue);
|
||||
|
||||
blk_queue_max_secure_erase_sectors(dev->queue,
|
||||
dev->max_discard_sectors);
|
||||
blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue);
|
||||
blk_queue_max_segments(dev->queue, dev->max_segments);
|
||||
@@ -1761,12 +1760,12 @@ static void rnbd_destroy_sessions(void)
|
||||
* procedure takes minutes.
|
||||
*/
|
||||
INIT_WORK(&dev->unmap_on_rmmod_work, unmap_device_work);
|
||||
queue_work(system_long_wq, &dev->unmap_on_rmmod_work);
|
||||
queue_work(rnbd_clt_wq, &dev->unmap_on_rmmod_work);
|
||||
}
|
||||
rnbd_clt_put_sess(sess);
|
||||
}
|
||||
/* Wait for all scheduled unmap works */
|
||||
flush_workqueue(system_long_wq);
|
||||
flush_workqueue(rnbd_clt_wq);
|
||||
WARN_ON(!list_empty(&sess_list));
|
||||
}
|
||||
|
||||
@@ -1791,6 +1790,14 @@ static int __init rnbd_client_init(void)
|
||||
pr_err("Failed to load module, creating sysfs device files failed, err: %d\n",
|
||||
err);
|
||||
unregister_blkdev(rnbd_client_major, "rnbd");
|
||||
return err;
|
||||
}
|
||||
rnbd_clt_wq = alloc_workqueue("rnbd_clt_wq", 0, 0);
|
||||
if (!rnbd_clt_wq) {
|
||||
pr_err("Failed to load module, alloc_workqueue failed.\n");
|
||||
rnbd_clt_destroy_sysfs_files();
|
||||
unregister_blkdev(rnbd_client_major, "rnbd");
|
||||
err = -ENOMEM;
|
||||
}
|
||||
|
||||
return err;
|
||||
@@ -1801,6 +1808,7 @@ static void __exit rnbd_client_exit(void)
|
||||
rnbd_destroy_sessions();
|
||||
unregister_blkdev(rnbd_client_major, "rnbd");
|
||||
ida_destroy(&index_ida);
|
||||
destroy_workqueue(rnbd_clt_wq);
|
||||
}
|
||||
|
||||
module_init(rnbd_client_init);
|
||||
|
||||
@@ -44,16 +44,12 @@ static inline int rnbd_dev_get_max_hw_sects(const struct rnbd_dev *dev)
|
||||
|
||||
static inline int rnbd_dev_get_secure_discard(const struct rnbd_dev *dev)
|
||||
{
|
||||
return blk_queue_secure_erase(bdev_get_queue(dev->bdev));
|
||||
return bdev_max_secure_erase_sectors(dev->bdev);
|
||||
}
|
||||
|
||||
static inline int rnbd_dev_get_max_discard_sects(const struct rnbd_dev *dev)
|
||||
{
|
||||
if (!blk_queue_discard(bdev_get_queue(dev->bdev)))
|
||||
return 0;
|
||||
|
||||
return blk_queue_get_max_sectors(bdev_get_queue(dev->bdev),
|
||||
REQ_OP_DISCARD);
|
||||
return bdev_max_discard_sectors(dev->bdev);
|
||||
}
|
||||
|
||||
static inline int rnbd_dev_get_discard_granularity(const struct rnbd_dev *dev)
|
||||
@@ -63,7 +59,7 @@ static inline int rnbd_dev_get_discard_granularity(const struct rnbd_dev *dev)
|
||||
|
||||
static inline int rnbd_dev_get_discard_alignment(const struct rnbd_dev *dev)
|
||||
{
|
||||
return bdev_get_queue(dev->bdev)->limits.discard_alignment;
|
||||
return bdev_discard_alignment(dev->bdev);
|
||||
}
|
||||
|
||||
#endif /* RNBD_SRV_DEV_H */
|
||||
|
||||
@@ -533,7 +533,6 @@ static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp,
|
||||
struct rnbd_srv_sess_dev *sess_dev)
|
||||
{
|
||||
struct rnbd_dev *rnbd_dev = sess_dev->rnbd_dev;
|
||||
struct request_queue *q = bdev_get_queue(rnbd_dev->bdev);
|
||||
|
||||
rsp->hdr.type = cpu_to_le16(RNBD_MSG_OPEN_RSP);
|
||||
rsp->device_id =
|
||||
@@ -558,9 +557,9 @@ static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp,
|
||||
rsp->secure_discard =
|
||||
cpu_to_le16(rnbd_dev_get_secure_discard(rnbd_dev));
|
||||
rsp->cache_policy = 0;
|
||||
if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
|
||||
if (bdev_write_cache(rnbd_dev->bdev))
|
||||
rsp->cache_policy |= RNBD_WRITEBACK;
|
||||
if (blk_queue_fua(q))
|
||||
if (bdev_fua(rnbd_dev->bdev))
|
||||
rsp->cache_policy |= RNBD_FUA;
|
||||
}
|
||||
|
||||
|
||||
@@ -867,11 +867,12 @@ static int virtblk_probe(struct virtio_device *vdev)
|
||||
blk_queue_io_opt(q, blk_size * opt_io_size);
|
||||
|
||||
if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
|
||||
q->limits.discard_granularity = blk_size;
|
||||
|
||||
virtio_cread(vdev, struct virtio_blk_config,
|
||||
discard_sector_alignment, &v);
|
||||
q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0;
|
||||
if (v)
|
||||
q->limits.discard_granularity = v << SECTOR_SHIFT;
|
||||
else
|
||||
q->limits.discard_granularity = blk_size;
|
||||
|
||||
virtio_cread(vdev, struct virtio_blk_config,
|
||||
max_discard_sectors, &v);
|
||||
@@ -888,8 +889,6 @@ static int virtblk_probe(struct virtio_device *vdev)
|
||||
v = sg_elems;
|
||||
blk_queue_max_discard_segments(q,
|
||||
min(v, MAX_DISCARD_SEGMENTS));
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
|
||||
}
|
||||
|
||||
if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
|
||||
|
||||
@@ -970,7 +970,6 @@ static int dispatch_discard_io(struct xen_blkif_ring *ring,
|
||||
int status = BLKIF_RSP_OKAY;
|
||||
struct xen_blkif *blkif = ring->blkif;
|
||||
struct block_device *bdev = blkif->vbd.bdev;
|
||||
unsigned long secure;
|
||||
struct phys_req preq;
|
||||
|
||||
xen_blkif_get(blkif);
|
||||
@@ -987,13 +986,15 @@ static int dispatch_discard_io(struct xen_blkif_ring *ring,
|
||||
}
|
||||
ring->st_ds_req++;
|
||||
|
||||
secure = (blkif->vbd.discard_secure &&
|
||||
(req->u.discard.flag & BLKIF_DISCARD_SECURE)) ?
|
||||
BLKDEV_DISCARD_SECURE : 0;
|
||||
if (blkif->vbd.discard_secure &&
|
||||
(req->u.discard.flag & BLKIF_DISCARD_SECURE))
|
||||
err = blkdev_issue_secure_erase(bdev,
|
||||
req->u.discard.sector_number,
|
||||
req->u.discard.nr_sectors, GFP_KERNEL);
|
||||
else
|
||||
err = blkdev_issue_discard(bdev, req->u.discard.sector_number,
|
||||
req->u.discard.nr_sectors, GFP_KERNEL);
|
||||
|
||||
err = blkdev_issue_discard(bdev, req->u.discard.sector_number,
|
||||
req->u.discard.nr_sectors,
|
||||
GFP_KERNEL, secure);
|
||||
fail_response:
|
||||
if (err == -EOPNOTSUPP) {
|
||||
pr_debug("discard op failed, not supported\n");
|
||||
|
||||
@@ -484,7 +484,6 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
|
||||
{
|
||||
struct xen_vbd *vbd;
|
||||
struct block_device *bdev;
|
||||
struct request_queue *q;
|
||||
|
||||
vbd = &blkif->vbd;
|
||||
vbd->handle = handle;
|
||||
@@ -516,11 +515,9 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
|
||||
if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
|
||||
vbd->type |= VDISK_REMOVABLE;
|
||||
|
||||
q = bdev_get_queue(bdev);
|
||||
if (q && test_bit(QUEUE_FLAG_WC, &q->queue_flags))
|
||||
if (bdev_write_cache(bdev))
|
||||
vbd->flush_support = true;
|
||||
|
||||
if (q && blk_queue_secure_erase(q))
|
||||
if (bdev_max_secure_erase_sectors(bdev))
|
||||
vbd->discard_secure = true;
|
||||
|
||||
vbd->feature_gnt_persistent = feature_persistent;
|
||||
@@ -578,22 +575,21 @@ static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info
|
||||
int err;
|
||||
int state = 0;
|
||||
struct block_device *bdev = be->blkif->vbd.bdev;
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
|
||||
if (!xenbus_read_unsigned(dev->nodename, "discard-enable", 1))
|
||||
return;
|
||||
|
||||
if (blk_queue_discard(q)) {
|
||||
if (bdev_max_discard_sectors(bdev)) {
|
||||
err = xenbus_printf(xbt, dev->nodename,
|
||||
"discard-granularity", "%u",
|
||||
q->limits.discard_granularity);
|
||||
bdev_discard_granularity(bdev));
|
||||
if (err) {
|
||||
dev_warn(&dev->dev, "writing discard-granularity (%d)", err);
|
||||
return;
|
||||
}
|
||||
err = xenbus_printf(xbt, dev->nodename,
|
||||
"discard-alignment", "%u",
|
||||
q->limits.discard_alignment);
|
||||
bdev_discard_alignment(bdev));
|
||||
if (err) {
|
||||
dev_warn(&dev->dev, "writing discard-alignment (%d)", err);
|
||||
return;
|
||||
|
||||
@@ -944,13 +944,13 @@ static void blkif_set_queue_limits(struct blkfront_info *info)
|
||||
blk_queue_flag_set(QUEUE_FLAG_VIRT, rq);
|
||||
|
||||
if (info->feature_discard) {
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, rq);
|
||||
blk_queue_max_discard_sectors(rq, get_capacity(gd));
|
||||
rq->limits.discard_granularity = info->discard_granularity ?:
|
||||
info->physical_sector_size;
|
||||
rq->limits.discard_alignment = info->discard_alignment;
|
||||
if (info->feature_secdiscard)
|
||||
blk_queue_flag_set(QUEUE_FLAG_SECERASE, rq);
|
||||
blk_queue_max_secure_erase_sectors(rq,
|
||||
get_capacity(gd));
|
||||
}
|
||||
|
||||
/* Hard sector size and max sectors impersonate the equiv. hardware. */
|
||||
@@ -1606,8 +1606,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
|
||||
blkif_req(req)->error = BLK_STS_NOTSUPP;
|
||||
info->feature_discard = 0;
|
||||
info->feature_secdiscard = 0;
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_SECERASE, rq);
|
||||
blk_queue_max_discard_sectors(rq, 0);
|
||||
blk_queue_max_secure_erase_sectors(rq, 0);
|
||||
}
|
||||
break;
|
||||
case BLKIF_OP_FLUSH_DISKCACHE:
|
||||
|
||||
@@ -1675,9 +1675,10 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
|
||||
bv.bv_len = PAGE_SIZE;
|
||||
bv.bv_offset = 0;
|
||||
|
||||
start_time = disk_start_io_acct(bdev->bd_disk, SECTORS_PER_PAGE, op);
|
||||
start_time = bdev_start_io_acct(bdev->bd_disk->part0,
|
||||
SECTORS_PER_PAGE, op, jiffies);
|
||||
ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL);
|
||||
disk_end_io_acct(bdev->bd_disk, op, start_time);
|
||||
bdev_end_io_acct(bdev->bd_disk->part0, op, start_time);
|
||||
out:
|
||||
/*
|
||||
* If I/O fails, just return error(ie, non-zero) without
|
||||
@@ -1786,7 +1787,7 @@ static ssize_t reset_store(struct device *dev,
|
||||
int ret;
|
||||
unsigned short do_reset;
|
||||
struct zram *zram;
|
||||
struct block_device *bdev;
|
||||
struct gendisk *disk;
|
||||
|
||||
ret = kstrtou16(buf, 10, &do_reset);
|
||||
if (ret)
|
||||
@@ -1796,26 +1797,26 @@ static ssize_t reset_store(struct device *dev,
|
||||
return -EINVAL;
|
||||
|
||||
zram = dev_to_zram(dev);
|
||||
bdev = zram->disk->part0;
|
||||
disk = zram->disk;
|
||||
|
||||
mutex_lock(&bdev->bd_disk->open_mutex);
|
||||
mutex_lock(&disk->open_mutex);
|
||||
/* Do not reset an active device or claimed device */
|
||||
if (bdev->bd_openers || zram->claim) {
|
||||
mutex_unlock(&bdev->bd_disk->open_mutex);
|
||||
if (disk_openers(disk) || zram->claim) {
|
||||
mutex_unlock(&disk->open_mutex);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
/* From now on, anyone can't open /dev/zram[0-9] */
|
||||
zram->claim = true;
|
||||
mutex_unlock(&bdev->bd_disk->open_mutex);
|
||||
mutex_unlock(&disk->open_mutex);
|
||||
|
||||
/* Make sure all the pending I/O are finished */
|
||||
sync_blockdev(bdev);
|
||||
sync_blockdev(disk->part0);
|
||||
zram_reset_device(zram);
|
||||
|
||||
mutex_lock(&bdev->bd_disk->open_mutex);
|
||||
mutex_lock(&disk->open_mutex);
|
||||
zram->claim = false;
|
||||
mutex_unlock(&bdev->bd_disk->open_mutex);
|
||||
mutex_unlock(&disk->open_mutex);
|
||||
|
||||
return len;
|
||||
}
|
||||
@@ -1952,7 +1953,6 @@ static int zram_add(void)
|
||||
blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
|
||||
zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
|
||||
blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, zram->disk->queue);
|
||||
|
||||
/*
|
||||
* zram_bio_discard() will clear all logical blocks if logical block
|
||||
@@ -1987,19 +1987,18 @@ out_free_dev:
|
||||
|
||||
static int zram_remove(struct zram *zram)
|
||||
{
|
||||
struct block_device *bdev = zram->disk->part0;
|
||||
bool claimed;
|
||||
|
||||
mutex_lock(&bdev->bd_disk->open_mutex);
|
||||
if (bdev->bd_openers) {
|
||||
mutex_unlock(&bdev->bd_disk->open_mutex);
|
||||
mutex_lock(&zram->disk->open_mutex);
|
||||
if (disk_openers(zram->disk)) {
|
||||
mutex_unlock(&zram->disk->open_mutex);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
claimed = zram->claim;
|
||||
if (!claimed)
|
||||
zram->claim = true;
|
||||
mutex_unlock(&bdev->bd_disk->open_mutex);
|
||||
mutex_unlock(&zram->disk->open_mutex);
|
||||
|
||||
zram_debugfs_unregister(zram);
|
||||
|
||||
@@ -2011,7 +2010,7 @@ static int zram_remove(struct zram *zram)
|
||||
;
|
||||
} else {
|
||||
/* Make sure all the pending I/O are finished */
|
||||
sync_blockdev(bdev);
|
||||
sync_blockdev(zram->disk->part0);
|
||||
zram_reset_device(zram);
|
||||
}
|
||||
|
||||
|
||||
@@ -14,15 +14,6 @@
|
||||
actually talk to the hardware. Suggestions are welcome.
|
||||
Patches that work are more welcome though. ;-)
|
||||
|
||||
To Do List:
|
||||
----------------------------------
|
||||
|
||||
-- Modify sysctl/proc interface. I plan on having one directory per
|
||||
drive, with entries for outputing general drive information, and sysctl
|
||||
based tunable parameters such as whether the tray should auto-close for
|
||||
that drive. Suggestions (or patches) for this welcome!
|
||||
|
||||
|
||||
Revision History
|
||||
----------------------------------
|
||||
1.00 Date Unknown -- David van Leeuwen <david@tm.tno.nl>
|
||||
@@ -648,6 +639,7 @@ int register_cdrom(struct gendisk *disk, struct cdrom_device_info *cdi)
|
||||
mutex_unlock(&cdrom_mutex);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(register_cdrom);
|
||||
#undef ENSURE
|
||||
|
||||
void unregister_cdrom(struct cdrom_device_info *cdi)
|
||||
@@ -663,6 +655,7 @@ void unregister_cdrom(struct cdrom_device_info *cdi)
|
||||
|
||||
cd_dbg(CD_REG_UNREG, "drive \"/dev/%s\" unregistered\n", cdi->name);
|
||||
}
|
||||
EXPORT_SYMBOL(unregister_cdrom);
|
||||
|
||||
int cdrom_get_media_event(struct cdrom_device_info *cdi,
|
||||
struct media_event_desc *med)
|
||||
@@ -690,6 +683,7 @@ int cdrom_get_media_event(struct cdrom_device_info *cdi,
|
||||
memcpy(med, &buffer[sizeof(*eh)], sizeof(*med));
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(cdrom_get_media_event);
|
||||
|
||||
static int cdrom_get_random_writable(struct cdrom_device_info *cdi,
|
||||
struct rwrt_feature_desc *rfd)
|
||||
@@ -1206,6 +1200,7 @@ err:
|
||||
cdi->use_count--;
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(cdrom_open);
|
||||
|
||||
/* This code is similar to that in open_for_data. The routine is called
|
||||
whenever an audio play operation is requested.
|
||||
@@ -1301,6 +1296,7 @@ void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode)
|
||||
cdo->tray_move(cdi, 1);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(cdrom_release);
|
||||
|
||||
static int cdrom_read_mech_status(struct cdrom_device_info *cdi,
|
||||
struct cdrom_changer_info *buf)
|
||||
@@ -1382,6 +1378,7 @@ int cdrom_number_of_slots(struct cdrom_device_info *cdi)
|
||||
kfree(info);
|
||||
return nslots;
|
||||
}
|
||||
EXPORT_SYMBOL(cdrom_number_of_slots);
|
||||
|
||||
|
||||
/* If SLOT < 0, unload the current slot. Otherwise, try to load SLOT. */
|
||||
@@ -1581,6 +1578,7 @@ void init_cdrom_command(struct packet_command *cgc, void *buf, int len,
|
||||
cgc->data_direction = type;
|
||||
cgc->timeout = CDROM_DEF_TIMEOUT;
|
||||
}
|
||||
EXPORT_SYMBOL(init_cdrom_command);
|
||||
|
||||
/* DVD handling */
|
||||
|
||||
@@ -1999,6 +1997,7 @@ int cdrom_mode_sense(struct cdrom_device_info *cdi,
|
||||
cgc->data_direction = CGC_DATA_READ;
|
||||
return cdo->generic_packet(cdi, cgc);
|
||||
}
|
||||
EXPORT_SYMBOL(cdrom_mode_sense);
|
||||
|
||||
int cdrom_mode_select(struct cdrom_device_info *cdi,
|
||||
struct packet_command *cgc)
|
||||
@@ -2014,6 +2013,7 @@ int cdrom_mode_select(struct cdrom_device_info *cdi,
|
||||
cgc->data_direction = CGC_DATA_WRITE;
|
||||
return cdo->generic_packet(cdi, cgc);
|
||||
}
|
||||
EXPORT_SYMBOL(cdrom_mode_select);
|
||||
|
||||
static int cdrom_read_subchannel(struct cdrom_device_info *cdi,
|
||||
struct cdrom_subchnl *subchnl, int mcn)
|
||||
@@ -2443,14 +2443,6 @@ static int cdrom_ioctl_select_disc(struct cdrom_device_info *cdi,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* ->select_disc is a hook to allow a driver-specific way of
|
||||
* seleting disc. However, since there is no equivalent hook for
|
||||
* cdrom_slot_status this may not actually be useful...
|
||||
*/
|
||||
if (cdi->ops->select_disc)
|
||||
return cdi->ops->select_disc(cdi, arg);
|
||||
|
||||
cd_dbg(CD_CHANGER, "Using generic cdrom_select_disc()\n");
|
||||
return cdrom_select_disc(cdi, arg);
|
||||
}
|
||||
@@ -2892,6 +2884,7 @@ use_toc:
|
||||
*last_written = toc.cdte_addr.lba;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(cdrom_get_last_written);
|
||||
|
||||
/* return the next writable block. also for udf file system. */
|
||||
static int cdrom_get_next_writable(struct cdrom_device_info *cdi,
|
||||
@@ -3429,18 +3422,7 @@ int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev,
|
||||
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(cdrom_get_last_written);
|
||||
EXPORT_SYMBOL(register_cdrom);
|
||||
EXPORT_SYMBOL(unregister_cdrom);
|
||||
EXPORT_SYMBOL(cdrom_open);
|
||||
EXPORT_SYMBOL(cdrom_release);
|
||||
EXPORT_SYMBOL(cdrom_ioctl);
|
||||
EXPORT_SYMBOL(cdrom_number_of_slots);
|
||||
EXPORT_SYMBOL(cdrom_mode_select);
|
||||
EXPORT_SYMBOL(cdrom_mode_sense);
|
||||
EXPORT_SYMBOL(init_cdrom_command);
|
||||
EXPORT_SYMBOL(cdrom_get_media_event);
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
|
||||
|
||||
@@ -336,7 +336,7 @@ static int bch_allocator_thread(void *arg)
|
||||
mutex_unlock(&ca->set->bucket_lock);
|
||||
blkdev_issue_discard(ca->bdev,
|
||||
bucket_to_sector(ca->set, bucket),
|
||||
ca->sb.bucket_size, GFP_KERNEL, 0);
|
||||
ca->sb.bucket_size, GFP_KERNEL);
|
||||
mutex_lock(&ca->set->bucket_lock);
|
||||
}
|
||||
|
||||
|
||||
@@ -107,15 +107,16 @@ void bch_btree_verify(struct btree *b)
|
||||
|
||||
void bch_data_verify(struct cached_dev *dc, struct bio *bio)
|
||||
{
|
||||
unsigned int nr_segs = bio_segments(bio);
|
||||
struct bio *check;
|
||||
struct bio_vec bv, cbv;
|
||||
struct bvec_iter iter, citer = { 0 };
|
||||
|
||||
check = bio_kmalloc(GFP_NOIO, bio_segments(bio));
|
||||
check = bio_kmalloc(nr_segs, GFP_NOIO);
|
||||
if (!check)
|
||||
return;
|
||||
bio_set_dev(check, bio->bi_bdev);
|
||||
check->bi_opf = REQ_OP_READ;
|
||||
bio_init(check, bio->bi_bdev, check->bi_inline_vecs, nr_segs,
|
||||
REQ_OP_READ);
|
||||
check->bi_iter.bi_sector = bio->bi_iter.bi_sector;
|
||||
check->bi_iter.bi_size = bio->bi_iter.bi_size;
|
||||
|
||||
@@ -146,7 +147,8 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
|
||||
|
||||
bio_free_pages(check);
|
||||
out_put:
|
||||
bio_put(check);
|
||||
bio_uninit(check);
|
||||
kfree(check);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1005,7 +1005,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
|
||||
bio_get(s->iop.bio);
|
||||
|
||||
if (bio_op(bio) == REQ_OP_DISCARD &&
|
||||
!blk_queue_discard(bdev_get_queue(dc->bdev)))
|
||||
!bdev_max_discard_sectors(dc->bdev))
|
||||
goto insert_data;
|
||||
|
||||
/* I/O request sent to backing device */
|
||||
@@ -1115,7 +1115,7 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio,
|
||||
bio->bi_private = ddip;
|
||||
|
||||
if ((bio_op(bio) == REQ_OP_DISCARD) &&
|
||||
!blk_queue_discard(bdev_get_queue(dc->bdev)))
|
||||
!bdev_max_discard_sectors(dc->bdev))
|
||||
bio->bi_end_io(bio);
|
||||
else
|
||||
submit_bio_noacct(bio);
|
||||
|
||||
@@ -973,7 +973,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, d->disk->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, d->disk->queue);
|
||||
|
||||
blk_queue_write_cache(q, true, true);
|
||||
|
||||
@@ -2350,7 +2349,7 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
|
||||
ca->bdev->bd_holder = ca;
|
||||
ca->sb_disk = sb_disk;
|
||||
|
||||
if (blk_queue_discard(bdev_get_queue(bdev)))
|
||||
if (bdev_max_discard_sectors((bdev)))
|
||||
ca->discard = CACHE_DISCARD(&ca->sb);
|
||||
|
||||
ret = cache_alloc(ca);
|
||||
|
||||
@@ -1151,7 +1151,7 @@ STORE(__bch_cache)
|
||||
if (attr == &sysfs_discard) {
|
||||
bool v = strtoul_or_return(buf);
|
||||
|
||||
if (blk_queue_discard(bdev_get_queue(ca->bdev)))
|
||||
if (bdev_max_discard_sectors(ca->bdev))
|
||||
ca->discard = v;
|
||||
|
||||
if (v != CACHE_DISCARD(&ca->sb)) {
|
||||
|
||||
@@ -611,7 +611,8 @@ static void bio_complete(struct bio *bio)
|
||||
{
|
||||
struct dm_buffer *b = bio->bi_private;
|
||||
blk_status_t status = bio->bi_status;
|
||||
bio_put(bio);
|
||||
bio_uninit(bio);
|
||||
kfree(bio);
|
||||
b->end_io(b, status);
|
||||
}
|
||||
|
||||
@@ -626,16 +627,14 @@ static void use_bio(struct dm_buffer *b, int rw, sector_t sector,
|
||||
if (unlikely(b->c->sectors_per_block_bits < PAGE_SHIFT - SECTOR_SHIFT))
|
||||
vec_size += 2;
|
||||
|
||||
bio = bio_kmalloc(GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN, vec_size);
|
||||
bio = bio_kmalloc(vec_size, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN);
|
||||
if (!bio) {
|
||||
dmio:
|
||||
use_dmio(b, rw, sector, n_sectors, offset);
|
||||
return;
|
||||
}
|
||||
|
||||
bio_init(bio, b->c->bdev, bio->bi_inline_vecs, vec_size, rw);
|
||||
bio->bi_iter.bi_sector = sector;
|
||||
bio_set_dev(bio, b->c->bdev);
|
||||
bio_set_op_attrs(bio, rw, 0);
|
||||
bio->bi_end_io = bio_complete;
|
||||
bio->bi_private = b;
|
||||
|
||||
|
||||
@@ -3329,13 +3329,6 @@ static int cache_iterate_devices(struct dm_target *ti,
|
||||
return r;
|
||||
}
|
||||
|
||||
static bool origin_dev_supports_discard(struct block_device *origin_bdev)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(origin_bdev);
|
||||
|
||||
return blk_queue_discard(q);
|
||||
}
|
||||
|
||||
/*
|
||||
* If discard_passdown was enabled verify that the origin device
|
||||
* supports discards. Disable discard_passdown if not.
|
||||
@@ -3349,7 +3342,7 @@ static void disable_passdown_if_not_supported(struct cache *cache)
|
||||
if (!cache->features.discard_passdown)
|
||||
return;
|
||||
|
||||
if (!origin_dev_supports_discard(origin_bdev))
|
||||
if (!bdev_max_discard_sectors(origin_bdev))
|
||||
reason = "discard unsupported";
|
||||
|
||||
else if (origin_limits->max_discard_sectors < cache->sectors_per_block)
|
||||
|
||||
@@ -2016,13 +2016,6 @@ static void clone_resume(struct dm_target *ti)
|
||||
do_waker(&clone->waker.work);
|
||||
}
|
||||
|
||||
static bool bdev_supports_discards(struct block_device *bdev)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
|
||||
return (q && blk_queue_discard(q));
|
||||
}
|
||||
|
||||
/*
|
||||
* If discard_passdown was enabled verify that the destination device supports
|
||||
* discards. Disable discard_passdown if not.
|
||||
@@ -2036,7 +2029,7 @@ static void disable_passdown_if_not_supported(struct clone *clone)
|
||||
if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags))
|
||||
return;
|
||||
|
||||
if (!bdev_supports_discards(dest_dev))
|
||||
if (!bdev_max_discard_sectors(dest_dev))
|
||||
reason = "discard unsupported";
|
||||
else if (dest_limits->max_discard_sectors < clone->region_size)
|
||||
reason = "max discard sectors smaller than a region";
|
||||
|
||||
@@ -311,7 +311,7 @@ static void do_region(int op, int op_flags, unsigned region,
|
||||
* Reject unsupported discard and write same requests.
|
||||
*/
|
||||
if (op == REQ_OP_DISCARD)
|
||||
special_cmd_max_sectors = q->limits.max_discard_sectors;
|
||||
special_cmd_max_sectors = bdev_max_discard_sectors(where->bdev);
|
||||
else if (op == REQ_OP_WRITE_ZEROES)
|
||||
special_cmd_max_sectors = q->limits.max_write_zeroes_sectors;
|
||||
if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES) &&
|
||||
|
||||
@@ -866,9 +866,8 @@ static int log_writes_message(struct dm_target *ti, unsigned argc, char **argv,
|
||||
static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limits)
|
||||
{
|
||||
struct log_writes_c *lc = ti->private;
|
||||
struct request_queue *q = bdev_get_queue(lc->dev->bdev);
|
||||
|
||||
if (!q || !blk_queue_discard(q)) {
|
||||
if (!bdev_max_discard_sectors(lc->dev->bdev)) {
|
||||
lc->device_supports_discard = false;
|
||||
limits->discard_granularity = lc->sectorsize;
|
||||
limits->max_discard_sectors = (UINT_MAX >> SECTOR_SHIFT);
|
||||
|
||||
@@ -2963,13 +2963,8 @@ static void configure_discard_support(struct raid_set *rs)
|
||||
raid456 = rs_is_raid456(rs);
|
||||
|
||||
for (i = 0; i < rs->raid_disks; i++) {
|
||||
struct request_queue *q;
|
||||
|
||||
if (!rs->dev[i].rdev.bdev)
|
||||
continue;
|
||||
|
||||
q = bdev_get_queue(rs->dev[i].rdev.bdev);
|
||||
if (!q || !blk_queue_discard(q))
|
||||
if (!rs->dev[i].rdev.bdev ||
|
||||
!bdev_max_discard_sectors(rs->dev[i].rdev.bdev))
|
||||
return;
|
||||
|
||||
if (raid456) {
|
||||
|
||||
@@ -1886,9 +1886,7 @@ static int device_dax_write_cache_enabled(struct dm_target *ti,
|
||||
static int device_is_rotational(struct dm_target *ti, struct dm_dev *dev,
|
||||
sector_t start, sector_t len, void *data)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(dev->bdev);
|
||||
|
||||
return !blk_queue_nonrot(q);
|
||||
return !bdev_nonrot(dev->bdev);
|
||||
}
|
||||
|
||||
static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev,
|
||||
@@ -1956,9 +1954,7 @@ static bool dm_table_supports_nowait(struct dm_table *t)
|
||||
static int device_not_discard_capable(struct dm_target *ti, struct dm_dev *dev,
|
||||
sector_t start, sector_t len, void *data)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(dev->bdev);
|
||||
|
||||
return !blk_queue_discard(q);
|
||||
return !bdev_max_discard_sectors(dev->bdev);
|
||||
}
|
||||
|
||||
static bool dm_table_supports_discards(struct dm_table *t)
|
||||
@@ -1990,9 +1986,7 @@ static int device_not_secure_erase_capable(struct dm_target *ti,
|
||||
struct dm_dev *dev, sector_t start,
|
||||
sector_t len, void *data)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(dev->bdev);
|
||||
|
||||
return !blk_queue_secure_erase(q);
|
||||
return !bdev_max_secure_erase_sectors(dev->bdev);
|
||||
}
|
||||
|
||||
static bool dm_table_supports_secure_erase(struct dm_table *t)
|
||||
@@ -2018,9 +2012,7 @@ static int device_requires_stable_pages(struct dm_target *ti,
|
||||
struct dm_dev *dev, sector_t start,
|
||||
sector_t len, void *data)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(dev->bdev);
|
||||
|
||||
return blk_queue_stable_writes(q);
|
||||
return bdev_stable_writes(dev->bdev);
|
||||
}
|
||||
|
||||
int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
|
||||
@@ -2040,18 +2032,15 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, q);
|
||||
|
||||
if (!dm_table_supports_discards(t)) {
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
|
||||
/* Must also clear discard limits... */
|
||||
q->limits.max_discard_sectors = 0;
|
||||
q->limits.max_hw_discard_sectors = 0;
|
||||
q->limits.discard_granularity = 0;
|
||||
q->limits.discard_alignment = 0;
|
||||
q->limits.discard_misaligned = 0;
|
||||
} else
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
|
||||
}
|
||||
|
||||
if (dm_table_supports_secure_erase(t))
|
||||
blk_queue_flag_set(QUEUE_FLAG_SECERASE, q);
|
||||
if (!dm_table_supports_secure_erase(t))
|
||||
q->limits.max_secure_erase_sectors = 0;
|
||||
|
||||
if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) {
|
||||
wc = true;
|
||||
|
||||
@@ -398,8 +398,8 @@ static int issue_discard(struct discard_op *op, dm_block_t data_b, dm_block_t da
|
||||
sector_t s = block_to_sectors(tc->pool, data_b);
|
||||
sector_t len = block_to_sectors(tc->pool, data_e - data_b);
|
||||
|
||||
return __blkdev_issue_discard(tc->pool_dev->bdev, s, len,
|
||||
GFP_NOWAIT, 0, &op->bio);
|
||||
return __blkdev_issue_discard(tc->pool_dev->bdev, s, len, GFP_NOWAIT,
|
||||
&op->bio);
|
||||
}
|
||||
|
||||
static void end_discard(struct discard_op *op, int r)
|
||||
@@ -2802,13 +2802,6 @@ static void requeue_bios(struct pool *pool)
|
||||
/*----------------------------------------------------------------
|
||||
* Binding of control targets to a pool object
|
||||
*--------------------------------------------------------------*/
|
||||
static bool data_dev_supports_discard(struct pool_c *pt)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
|
||||
|
||||
return blk_queue_discard(q);
|
||||
}
|
||||
|
||||
static bool is_factor(sector_t block_size, uint32_t n)
|
||||
{
|
||||
return !sector_div(block_size, n);
|
||||
@@ -2828,7 +2821,7 @@ static void disable_passdown_if_not_supported(struct pool_c *pt)
|
||||
if (!pt->adjusted_pf.discard_passdown)
|
||||
return;
|
||||
|
||||
if (!data_dev_supports_discard(pt))
|
||||
if (!bdev_max_discard_sectors(pt->data_dev->bdev))
|
||||
reason = "discard unsupported";
|
||||
|
||||
else if (data_limits->max_discard_sectors < pool->sectors_per_block)
|
||||
@@ -4057,8 +4050,6 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
|
||||
/*
|
||||
* Must explicitly disallow stacking discard limits otherwise the
|
||||
* block layer will stack them if pool's data device has support.
|
||||
* QUEUE_FLAG_DISCARD wouldn't be set but there is no way for the
|
||||
* user to see that, so make sure to set all discard limits to 0.
|
||||
*/
|
||||
limits->discard_granularity = 0;
|
||||
return;
|
||||
|
||||
@@ -1001,7 +1001,7 @@ static void dmz_io_hints(struct dm_target *ti, struct queue_limits *limits)
|
||||
blk_limits_io_min(limits, DMZ_BLOCK_SIZE);
|
||||
blk_limits_io_opt(limits, DMZ_BLOCK_SIZE);
|
||||
|
||||
limits->discard_alignment = DMZ_BLOCK_SIZE;
|
||||
limits->discard_alignment = 0;
|
||||
limits->discard_granularity = DMZ_BLOCK_SIZE;
|
||||
limits->max_discard_sectors = chunk_sectors;
|
||||
limits->max_hw_discard_sectors = chunk_sectors;
|
||||
|
||||
@@ -955,7 +955,6 @@ void disable_discard(struct mapped_device *md)
|
||||
|
||||
/* device doesn't really support DISCARD, disable it */
|
||||
limits->max_discard_sectors = 0;
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, md->queue);
|
||||
}
|
||||
|
||||
void disable_write_zeroes(struct mapped_device *md)
|
||||
@@ -982,7 +981,7 @@ static void clone_endio(struct bio *bio)
|
||||
|
||||
if (unlikely(error == BLK_STS_TARGET)) {
|
||||
if (bio_op(bio) == REQ_OP_DISCARD &&
|
||||
!q->limits.max_discard_sectors)
|
||||
!bdev_max_discard_sectors(bio->bi_bdev))
|
||||
disable_discard(md);
|
||||
else if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
|
||||
!q->limits.max_write_zeroes_sectors)
|
||||
|
||||
@@ -639,14 +639,6 @@ re_read:
|
||||
daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
|
||||
write_behind = le32_to_cpu(sb->write_behind);
|
||||
sectors_reserved = le32_to_cpu(sb->sectors_reserved);
|
||||
/* Setup nodes/clustername only if bitmap version is
|
||||
* cluster-compatible
|
||||
*/
|
||||
if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
|
||||
nodes = le32_to_cpu(sb->nodes);
|
||||
strlcpy(bitmap->mddev->bitmap_info.cluster_name,
|
||||
sb->cluster_name, 64);
|
||||
}
|
||||
|
||||
/* verify that the bitmap-specific fields are valid */
|
||||
if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
|
||||
@@ -668,6 +660,16 @@ re_read:
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup nodes/clustername only if bitmap version is
|
||||
* cluster-compatible
|
||||
*/
|
||||
if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
|
||||
nodes = le32_to_cpu(sb->nodes);
|
||||
strscpy(bitmap->mddev->bitmap_info.cluster_name,
|
||||
sb->cluster_name, 64);
|
||||
}
|
||||
|
||||
/* keep the array size field of the bitmap superblock up to date */
|
||||
sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
|
||||
|
||||
@@ -695,14 +697,13 @@ re_read:
|
||||
if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
|
||||
set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
|
||||
bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
|
||||
strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
|
||||
err = 0;
|
||||
|
||||
out:
|
||||
kunmap_atomic(sb);
|
||||
/* Assigning chunksize is required for "re_read" */
|
||||
bitmap->mddev->bitmap_info.chunksize = chunksize;
|
||||
if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
|
||||
/* Assigning chunksize is required for "re_read" */
|
||||
bitmap->mddev->bitmap_info.chunksize = chunksize;
|
||||
err = md_setup_cluster(bitmap->mddev, nodes);
|
||||
if (err) {
|
||||
pr_warn("%s: Could not setup cluster service (%d)\n",
|
||||
@@ -713,18 +714,18 @@ out:
|
||||
goto re_read;
|
||||
}
|
||||
|
||||
|
||||
out_no_sb:
|
||||
if (test_bit(BITMAP_STALE, &bitmap->flags))
|
||||
bitmap->events_cleared = bitmap->mddev->events;
|
||||
bitmap->mddev->bitmap_info.chunksize = chunksize;
|
||||
bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
|
||||
bitmap->mddev->bitmap_info.max_write_behind = write_behind;
|
||||
bitmap->mddev->bitmap_info.nodes = nodes;
|
||||
if (bitmap->mddev->bitmap_info.space == 0 ||
|
||||
bitmap->mddev->bitmap_info.space > sectors_reserved)
|
||||
bitmap->mddev->bitmap_info.space = sectors_reserved;
|
||||
if (err) {
|
||||
if (err == 0) {
|
||||
if (test_bit(BITMAP_STALE, &bitmap->flags))
|
||||
bitmap->events_cleared = bitmap->mddev->events;
|
||||
bitmap->mddev->bitmap_info.chunksize = chunksize;
|
||||
bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
|
||||
bitmap->mddev->bitmap_info.max_write_behind = write_behind;
|
||||
bitmap->mddev->bitmap_info.nodes = nodes;
|
||||
if (bitmap->mddev->bitmap_info.space == 0 ||
|
||||
bitmap->mddev->bitmap_info.space > sectors_reserved)
|
||||
bitmap->mddev->bitmap_info.space = sectors_reserved;
|
||||
} else {
|
||||
md_bitmap_print_sb(bitmap);
|
||||
if (bitmap->cluster_slot < 0)
|
||||
md_cluster_stop(bitmap->mddev);
|
||||
|
||||
@@ -201,7 +201,7 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
|
||||
pr_err("md-cluster: Unable to allocate resource name for resource %s\n", name);
|
||||
goto out_err;
|
||||
}
|
||||
strlcpy(res->name, name, namelen + 1);
|
||||
strscpy(res->name, name, namelen + 1);
|
||||
if (with_lvb) {
|
||||
res->lksb.sb_lvbptr = kzalloc(LVB_SIZE, GFP_KERNEL);
|
||||
if (!res->lksb.sb_lvbptr) {
|
||||
|
||||
@@ -64,7 +64,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
|
||||
struct linear_conf *conf;
|
||||
struct md_rdev *rdev;
|
||||
int i, cnt;
|
||||
bool discard_supported = false;
|
||||
|
||||
conf = kzalloc(struct_size(conf, disks, raid_disks), GFP_KERNEL);
|
||||
if (!conf)
|
||||
@@ -96,9 +95,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
|
||||
|
||||
conf->array_sectors += rdev->sectors;
|
||||
cnt++;
|
||||
|
||||
if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
|
||||
discard_supported = true;
|
||||
}
|
||||
if (cnt != raid_disks) {
|
||||
pr_warn("md/linear:%s: not enough drives present. Aborting!\n",
|
||||
@@ -106,11 +102,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!discard_supported)
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue);
|
||||
else
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
|
||||
|
||||
/*
|
||||
* Here we calculate the device offsets.
|
||||
*/
|
||||
@@ -252,7 +243,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
|
||||
start_sector + data_offset;
|
||||
|
||||
if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
|
||||
!blk_queue_discard(bio->bi_bdev->bd_disk->queue))) {
|
||||
!bdev_max_discard_sectors(bio->bi_bdev))) {
|
||||
/* Just ignore it */
|
||||
bio_endio(bio);
|
||||
} else {
|
||||
|
||||
@@ -2627,14 +2627,16 @@ static void sync_sbs(struct mddev *mddev, int nospares)
|
||||
|
||||
static bool does_sb_need_changing(struct mddev *mddev)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
struct md_rdev *rdev = NULL, *iter;
|
||||
struct mdp_superblock_1 *sb;
|
||||
int role;
|
||||
|
||||
/* Find a good rdev */
|
||||
rdev_for_each(rdev, mddev)
|
||||
if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
|
||||
rdev_for_each(iter, mddev)
|
||||
if ((iter->raid_disk >= 0) && !test_bit(Faulty, &iter->flags)) {
|
||||
rdev = iter;
|
||||
break;
|
||||
}
|
||||
|
||||
/* No good device found. */
|
||||
if (!rdev)
|
||||
@@ -2645,11 +2647,11 @@ static bool does_sb_need_changing(struct mddev *mddev)
|
||||
rdev_for_each(rdev, mddev) {
|
||||
role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
|
||||
/* Device activated? */
|
||||
if (role == 0xffff && rdev->raid_disk >=0 &&
|
||||
if (role == MD_DISK_ROLE_SPARE && rdev->raid_disk >= 0 &&
|
||||
!test_bit(Faulty, &rdev->flags))
|
||||
return true;
|
||||
/* Device turned faulty? */
|
||||
if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
|
||||
if (test_bit(Faulty, &rdev->flags) && (role < MD_DISK_ROLE_MAX))
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -2984,10 +2986,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
|
||||
|
||||
if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
|
||||
md_error(rdev->mddev, rdev);
|
||||
if (test_bit(Faulty, &rdev->flags))
|
||||
err = 0;
|
||||
else
|
||||
|
||||
if (test_bit(MD_BROKEN, &rdev->mddev->flags))
|
||||
err = -EBUSY;
|
||||
else
|
||||
err = 0;
|
||||
} else if (cmd_match(buf, "remove")) {
|
||||
if (rdev->mddev->pers) {
|
||||
clear_bit(Blocked, &rdev->flags);
|
||||
@@ -4028,7 +4031,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
oldpriv = mddev->private;
|
||||
mddev->pers = pers;
|
||||
mddev->private = priv;
|
||||
strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
|
||||
strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
|
||||
mddev->level = mddev->new_level;
|
||||
mddev->layout = mddev->new_layout;
|
||||
mddev->chunk_sectors = mddev->new_chunk_sectors;
|
||||
@@ -4353,10 +4356,9 @@ __ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
|
||||
* like active, but no writes have been seen for a while (100msec).
|
||||
*
|
||||
* broken
|
||||
* RAID0/LINEAR-only: same as clean, but array is missing a member.
|
||||
* It's useful because RAID0/LINEAR mounted-arrays aren't stopped
|
||||
* when a member is gone, so this state will at least alert the
|
||||
* user that something is wrong.
|
||||
* Array is failed. It's useful because mounted-arrays aren't stopped
|
||||
* when array is failed, so this state will at least alert the user that
|
||||
* something is wrong.
|
||||
*/
|
||||
enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
|
||||
write_pending, active_idle, broken, bad_word};
|
||||
@@ -5763,7 +5765,7 @@ static int add_named_array(const char *val, const struct kernel_param *kp)
|
||||
len--;
|
||||
if (len >= DISK_NAME_LEN)
|
||||
return -E2BIG;
|
||||
strlcpy(buf, val, len+1);
|
||||
strscpy(buf, val, len+1);
|
||||
if (strncmp(buf, "md_", 3) == 0)
|
||||
return md_alloc(0, buf);
|
||||
if (strncmp(buf, "md", 2) == 0 &&
|
||||
@@ -5896,7 +5898,7 @@ int md_run(struct mddev *mddev)
|
||||
mddev->level = pers->level;
|
||||
mddev->new_level = pers->level;
|
||||
}
|
||||
strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
|
||||
strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
|
||||
|
||||
if (mddev->reshape_position != MaxSector &&
|
||||
pers->start_reshape == NULL) {
|
||||
@@ -5991,8 +5993,7 @@ int md_run(struct mddev *mddev)
|
||||
bool nonrot = true;
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if (rdev->raid_disk >= 0 &&
|
||||
!blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
|
||||
if (rdev->raid_disk >= 0 && !bdev_nonrot(rdev->bdev)) {
|
||||
nonrot = false;
|
||||
break;
|
||||
}
|
||||
@@ -7444,7 +7445,7 @@ static int set_disk_faulty(struct mddev *mddev, dev_t dev)
|
||||
err = -ENODEV;
|
||||
else {
|
||||
md_error(mddev, rdev);
|
||||
if (!test_bit(Faulty, &rdev->flags))
|
||||
if (test_bit(MD_BROKEN, &mddev->flags))
|
||||
err = -EBUSY;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
@@ -7985,13 +7986,16 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
|
||||
|
||||
if (!mddev->pers || !mddev->pers->error_handler)
|
||||
return;
|
||||
mddev->pers->error_handler(mddev,rdev);
|
||||
if (mddev->degraded)
|
||||
mddev->pers->error_handler(mddev, rdev);
|
||||
|
||||
if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
|
||||
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||
sysfs_notify_dirent_safe(rdev->sysfs_state);
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
if (!test_bit(MD_BROKEN, &mddev->flags)) {
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
}
|
||||
if (mddev->event_work.func)
|
||||
queue_work(md_misc_wq, &mddev->event_work);
|
||||
md_new_event();
|
||||
@@ -8585,7 +8589,7 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
|
||||
{
|
||||
struct bio *discard_bio = NULL;
|
||||
|
||||
if (__blkdev_issue_discard(rdev->bdev, start, size, GFP_NOIO, 0,
|
||||
if (__blkdev_issue_discard(rdev->bdev, start, size, GFP_NOIO,
|
||||
&discard_bio) || !discard_bio)
|
||||
return;
|
||||
|
||||
@@ -9671,7 +9675,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
|
||||
role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
|
||||
|
||||
if (test_bit(Candidate, &rdev2->flags)) {
|
||||
if (role == 0xfffe) {
|
||||
if (role == MD_DISK_ROLE_FAULTY) {
|
||||
pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
|
||||
md_kick_rdev_from_array(rdev2);
|
||||
continue;
|
||||
@@ -9684,7 +9688,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
|
||||
/*
|
||||
* got activated except reshape is happening.
|
||||
*/
|
||||
if (rdev2->raid_disk == -1 && role != 0xffff &&
|
||||
if (rdev2->raid_disk == -1 && role != MD_DISK_ROLE_SPARE &&
|
||||
!(le32_to_cpu(sb->feature_map) &
|
||||
MD_FEATURE_RESHAPE_ACTIVE)) {
|
||||
rdev2->saved_raid_disk = role;
|
||||
@@ -9701,7 +9705,8 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
|
||||
* as faulty. The recovery is performed by the
|
||||
* one who initiated the error.
|
||||
*/
|
||||
if ((role == 0xfffe) || (role == 0xfffd)) {
|
||||
if (role == MD_DISK_ROLE_FAULTY ||
|
||||
role == MD_DISK_ROLE_JOURNAL) {
|
||||
md_error(mddev, rdev2);
|
||||
clear_bit(Blocked, &rdev2->flags);
|
||||
}
|
||||
@@ -9791,16 +9796,18 @@ static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
|
||||
|
||||
void md_reload_sb(struct mddev *mddev, int nr)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
struct md_rdev *rdev = NULL, *iter;
|
||||
int err;
|
||||
|
||||
/* Find the rdev */
|
||||
rdev_for_each_rcu(rdev, mddev) {
|
||||
if (rdev->desc_nr == nr)
|
||||
rdev_for_each_rcu(iter, mddev) {
|
||||
if (iter->desc_nr == nr) {
|
||||
rdev = iter;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!rdev || rdev->desc_nr != nr) {
|
||||
if (!rdev) {
|
||||
pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -234,34 +234,42 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
|
||||
int is_new);
|
||||
struct md_cluster_info;
|
||||
|
||||
/* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added */
|
||||
/**
|
||||
* enum mddev_flags - md device flags.
|
||||
* @MD_ARRAY_FIRST_USE: First use of array, needs initialization.
|
||||
* @MD_CLOSING: If set, we are closing the array, do not open it then.
|
||||
* @MD_JOURNAL_CLEAN: A raid with journal is already clean.
|
||||
* @MD_HAS_JOURNAL: The raid array has journal feature set.
|
||||
* @MD_CLUSTER_RESYNC_LOCKED: cluster raid only, which means node, already took
|
||||
* resync lock, need to release the lock.
|
||||
* @MD_FAILFAST_SUPPORTED: Using MD_FAILFAST on metadata writes is supported as
|
||||
* calls to md_error() will never cause the array to
|
||||
* become failed.
|
||||
* @MD_HAS_PPL: The raid array has PPL feature set.
|
||||
* @MD_HAS_MULTIPLE_PPLS: The raid array has multiple PPLs feature set.
|
||||
* @MD_ALLOW_SB_UPDATE: md_check_recovery is allowed to update the metadata
|
||||
* without taking reconfig_mutex.
|
||||
* @MD_UPDATING_SB: md_check_recovery is updating the metadata without
|
||||
* explicitly holding reconfig_mutex.
|
||||
* @MD_NOT_READY: do_md_run() is active, so 'array_state', ust not report that
|
||||
* array is ready yet.
|
||||
* @MD_BROKEN: This is used to stop writes and mark array as failed.
|
||||
*
|
||||
* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added
|
||||
*/
|
||||
enum mddev_flags {
|
||||
MD_ARRAY_FIRST_USE, /* First use of array, needs initialization */
|
||||
MD_CLOSING, /* If set, we are closing the array, do not open
|
||||
* it then */
|
||||
MD_JOURNAL_CLEAN, /* A raid with journal is already clean */
|
||||
MD_HAS_JOURNAL, /* The raid array has journal feature set */
|
||||
MD_CLUSTER_RESYNC_LOCKED, /* cluster raid only, which means node
|
||||
* already took resync lock, need to
|
||||
* release the lock */
|
||||
MD_FAILFAST_SUPPORTED, /* Using MD_FAILFAST on metadata writes is
|
||||
* supported as calls to md_error() will
|
||||
* never cause the array to become failed.
|
||||
*/
|
||||
MD_HAS_PPL, /* The raid array has PPL feature set */
|
||||
MD_HAS_MULTIPLE_PPLS, /* The raid array has multiple PPLs feature set */
|
||||
MD_ALLOW_SB_UPDATE, /* md_check_recovery is allowed to update
|
||||
* the metadata without taking reconfig_mutex.
|
||||
*/
|
||||
MD_UPDATING_SB, /* md_check_recovery is updating the metadata
|
||||
* without explicitly holding reconfig_mutex.
|
||||
*/
|
||||
MD_NOT_READY, /* do_md_run() is active, so 'array_state'
|
||||
* must not report that array is ready yet
|
||||
*/
|
||||
MD_BROKEN, /* This is used in RAID-0/LINEAR only, to stop
|
||||
* I/O in case an array member is gone/failed.
|
||||
*/
|
||||
MD_ARRAY_FIRST_USE,
|
||||
MD_CLOSING,
|
||||
MD_JOURNAL_CLEAN,
|
||||
MD_HAS_JOURNAL,
|
||||
MD_CLUSTER_RESYNC_LOCKED,
|
||||
MD_FAILFAST_SUPPORTED,
|
||||
MD_HAS_PPL,
|
||||
MD_HAS_MULTIPLE_PPLS,
|
||||
MD_ALLOW_SB_UPDATE,
|
||||
MD_UPDATING_SB,
|
||||
MD_NOT_READY,
|
||||
MD_BROKEN,
|
||||
};
|
||||
|
||||
enum mddev_sb_flags {
|
||||
|
||||
@@ -128,21 +128,6 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
|
||||
pr_debug("md/raid0:%s: FINAL %d zones\n",
|
||||
mdname(mddev), conf->nr_strip_zones);
|
||||
|
||||
if (conf->nr_strip_zones == 1) {
|
||||
conf->layout = RAID0_ORIG_LAYOUT;
|
||||
} else if (mddev->layout == RAID0_ORIG_LAYOUT ||
|
||||
mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
|
||||
conf->layout = mddev->layout;
|
||||
} else if (default_layout == RAID0_ORIG_LAYOUT ||
|
||||
default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
|
||||
conf->layout = default_layout;
|
||||
} else {
|
||||
pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
|
||||
mdname(mddev));
|
||||
pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
|
||||
err = -ENOTSUPP;
|
||||
goto abort;
|
||||
}
|
||||
/*
|
||||
* now since we have the hard sector sizes, we can make sure
|
||||
* chunk size is a multiple of that sector size
|
||||
@@ -273,6 +258,22 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
|
||||
(unsigned long long)smallest->sectors);
|
||||
}
|
||||
|
||||
if (conf->nr_strip_zones == 1 || conf->strip_zone[1].nb_dev == 1) {
|
||||
conf->layout = RAID0_ORIG_LAYOUT;
|
||||
} else if (mddev->layout == RAID0_ORIG_LAYOUT ||
|
||||
mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
|
||||
conf->layout = mddev->layout;
|
||||
} else if (default_layout == RAID0_ORIG_LAYOUT ||
|
||||
default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
|
||||
conf->layout = default_layout;
|
||||
} else {
|
||||
pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
|
||||
mdname(mddev));
|
||||
pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
|
||||
err = -EOPNOTSUPP;
|
||||
goto abort;
|
||||
}
|
||||
|
||||
pr_debug("md/raid0:%s: done.\n", mdname(mddev));
|
||||
*private_conf = conf;
|
||||
|
||||
@@ -399,7 +400,6 @@ static int raid0_run(struct mddev *mddev)
|
||||
conf = mddev->private;
|
||||
if (mddev->queue) {
|
||||
struct md_rdev *rdev;
|
||||
bool discard_supported = false;
|
||||
|
||||
blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
|
||||
blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors);
|
||||
@@ -412,13 +412,7 @@ static int raid0_run(struct mddev *mddev)
|
||||
rdev_for_each(rdev, mddev) {
|
||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||
rdev->data_offset << 9);
|
||||
if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
|
||||
discard_supported = true;
|
||||
}
|
||||
if (!discard_supported)
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue);
|
||||
else
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
|
||||
}
|
||||
|
||||
/* calculate array device size */
|
||||
|
||||
@@ -165,9 +165,10 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
|
||||
* Allocate bios : 1 for reading, n-1 for writing
|
||||
*/
|
||||
for (j = pi->raid_disks ; j-- ; ) {
|
||||
bio = bio_kmalloc(gfp_flags, RESYNC_PAGES);
|
||||
bio = bio_kmalloc(RESYNC_PAGES, gfp_flags);
|
||||
if (!bio)
|
||||
goto out_free_bio;
|
||||
bio_init(bio, NULL, bio->bi_inline_vecs, RESYNC_PAGES, 0);
|
||||
r1_bio->bios[j] = bio;
|
||||
}
|
||||
/*
|
||||
@@ -206,8 +207,10 @@ out_free_pages:
|
||||
resync_free_pages(&rps[j]);
|
||||
|
||||
out_free_bio:
|
||||
while (++j < pi->raid_disks)
|
||||
bio_put(r1_bio->bios[j]);
|
||||
while (++j < pi->raid_disks) {
|
||||
bio_uninit(r1_bio->bios[j]);
|
||||
kfree(r1_bio->bios[j]);
|
||||
}
|
||||
kfree(rps);
|
||||
|
||||
out_free_r1bio:
|
||||
@@ -225,7 +228,8 @@ static void r1buf_pool_free(void *__r1_bio, void *data)
|
||||
for (i = pi->raid_disks; i--; ) {
|
||||
rp = get_resync_pages(r1bio->bios[i]);
|
||||
resync_free_pages(rp);
|
||||
bio_put(r1bio->bios[i]);
|
||||
bio_uninit(r1bio->bios[i]);
|
||||
kfree(r1bio->bios[i]);
|
||||
}
|
||||
|
||||
/* resync pages array stored in the 1st bio's .bi_private */
|
||||
@@ -704,7 +708,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
|
||||
/* At least two disks to choose from so failfast is OK */
|
||||
set_bit(R1BIO_FailFast, &r1_bio->state);
|
||||
|
||||
nonrot = blk_queue_nonrot(bdev_get_queue(rdev->bdev));
|
||||
nonrot = bdev_nonrot(rdev->bdev);
|
||||
has_nonrot_disk |= nonrot;
|
||||
pending = atomic_read(&rdev->nr_pending);
|
||||
dist = abs(this_sector - conf->mirrors[disk].head_position);
|
||||
@@ -802,7 +806,7 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio)
|
||||
if (test_bit(Faulty, &rdev->flags)) {
|
||||
bio_io_error(bio);
|
||||
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
|
||||
!blk_queue_discard(bio->bi_bdev->bd_disk->queue)))
|
||||
!bdev_max_discard_sectors(bio->bi_bdev)))
|
||||
/* Just ignore it */
|
||||
bio_endio(bio);
|
||||
else
|
||||
@@ -1637,30 +1641,39 @@ static void raid1_status(struct seq_file *seq, struct mddev *mddev)
|
||||
seq_printf(seq, "]");
|
||||
}
|
||||
|
||||
/**
|
||||
* raid1_error() - RAID1 error handler.
|
||||
* @mddev: affected md device.
|
||||
* @rdev: member device to fail.
|
||||
*
|
||||
* The routine acknowledges &rdev failure and determines new @mddev state.
|
||||
* If it failed, then:
|
||||
* - &MD_BROKEN flag is set in &mddev->flags.
|
||||
* - recovery is disabled.
|
||||
* Otherwise, it must be degraded:
|
||||
* - recovery is interrupted.
|
||||
* - &mddev->degraded is bumped.
|
||||
*
|
||||
* @rdev is marked as &Faulty excluding case when array is failed and
|
||||
* &mddev->fail_last_dev is off.
|
||||
*/
|
||||
static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
char b[BDEVNAME_SIZE];
|
||||
struct r1conf *conf = mddev->private;
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* If it is not operational, then we have already marked it as dead
|
||||
* else if it is the last working disks with "fail_last_dev == false",
|
||||
* ignore the error, let the next level up know.
|
||||
* else mark the drive as failed
|
||||
*/
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
|
||||
&& (conf->raid_disks - mddev->degraded) == 1) {
|
||||
/*
|
||||
* Don't fail the drive, act as though we were just a
|
||||
* normal single drive.
|
||||
* However don't try a recovery from this drive as
|
||||
* it is very likely to fail.
|
||||
*/
|
||||
conf->recovery_disabled = mddev->recovery_disabled;
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
return;
|
||||
|
||||
if (test_bit(In_sync, &rdev->flags) &&
|
||||
(conf->raid_disks - mddev->degraded) == 1) {
|
||||
set_bit(MD_BROKEN, &mddev->flags);
|
||||
|
||||
if (!mddev->fail_last_dev) {
|
||||
conf->recovery_disabled = mddev->recovery_disabled;
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
return;
|
||||
}
|
||||
}
|
||||
set_bit(Blocked, &rdev->flags);
|
||||
if (test_and_clear_bit(In_sync, &rdev->flags))
|
||||
@@ -1826,8 +1839,6 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
|
||||
print_conf(conf);
|
||||
return err;
|
||||
}
|
||||
@@ -3106,7 +3117,6 @@ static int raid1_run(struct mddev *mddev)
|
||||
int i;
|
||||
struct md_rdev *rdev;
|
||||
int ret;
|
||||
bool discard_supported = false;
|
||||
|
||||
if (mddev->level != 1) {
|
||||
pr_warn("md/raid1:%s: raid level not set to mirroring (%d)\n",
|
||||
@@ -3141,8 +3151,6 @@ static int raid1_run(struct mddev *mddev)
|
||||
continue;
|
||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||
rdev->data_offset << 9);
|
||||
if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
|
||||
discard_supported = true;
|
||||
}
|
||||
|
||||
mddev->degraded = 0;
|
||||
@@ -3179,15 +3187,6 @@ static int raid1_run(struct mddev *mddev)
|
||||
|
||||
md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
|
||||
|
||||
if (mddev->queue) {
|
||||
if (discard_supported)
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD,
|
||||
mddev->queue);
|
||||
else
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
|
||||
mddev->queue);
|
||||
}
|
||||
|
||||
ret = md_integrity_register(mddev);
|
||||
if (ret) {
|
||||
md_unregister_thread(&mddev->thread);
|
||||
|
||||
@@ -145,15 +145,17 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
|
||||
* Allocate bios.
|
||||
*/
|
||||
for (j = nalloc ; j-- ; ) {
|
||||
bio = bio_kmalloc(gfp_flags, RESYNC_PAGES);
|
||||
bio = bio_kmalloc(RESYNC_PAGES, gfp_flags);
|
||||
if (!bio)
|
||||
goto out_free_bio;
|
||||
bio_init(bio, NULL, bio->bi_inline_vecs, RESYNC_PAGES, 0);
|
||||
r10_bio->devs[j].bio = bio;
|
||||
if (!conf->have_replacement)
|
||||
continue;
|
||||
bio = bio_kmalloc(gfp_flags, RESYNC_PAGES);
|
||||
bio = bio_kmalloc(RESYNC_PAGES, gfp_flags);
|
||||
if (!bio)
|
||||
goto out_free_bio;
|
||||
bio_init(bio, NULL, bio->bi_inline_vecs, RESYNC_PAGES, 0);
|
||||
r10_bio->devs[j].repl_bio = bio;
|
||||
}
|
||||
/*
|
||||
@@ -197,9 +199,11 @@ out_free_pages:
|
||||
out_free_bio:
|
||||
for ( ; j < nalloc; j++) {
|
||||
if (r10_bio->devs[j].bio)
|
||||
bio_put(r10_bio->devs[j].bio);
|
||||
bio_uninit(r10_bio->devs[j].bio);
|
||||
kfree(r10_bio->devs[j].bio);
|
||||
if (r10_bio->devs[j].repl_bio)
|
||||
bio_put(r10_bio->devs[j].repl_bio);
|
||||
bio_uninit(r10_bio->devs[j].repl_bio);
|
||||
kfree(r10_bio->devs[j].repl_bio);
|
||||
}
|
||||
kfree(rps);
|
||||
out_free_r10bio:
|
||||
@@ -220,12 +224,15 @@ static void r10buf_pool_free(void *__r10_bio, void *data)
|
||||
if (bio) {
|
||||
rp = get_resync_pages(bio);
|
||||
resync_free_pages(rp);
|
||||
bio_put(bio);
|
||||
bio_uninit(bio);
|
||||
kfree(bio);
|
||||
}
|
||||
|
||||
bio = r10bio->devs[j].repl_bio;
|
||||
if (bio)
|
||||
bio_put(bio);
|
||||
if (bio) {
|
||||
bio_uninit(bio);
|
||||
kfree(bio);
|
||||
}
|
||||
}
|
||||
|
||||
/* resync pages array stored in the 1st bio's .bi_private */
|
||||
@@ -796,7 +803,7 @@ static struct md_rdev *read_balance(struct r10conf *conf,
|
||||
if (!do_balance)
|
||||
break;
|
||||
|
||||
nonrot = blk_queue_nonrot(bdev_get_queue(rdev->bdev));
|
||||
nonrot = bdev_nonrot(rdev->bdev);
|
||||
has_nonrot_disk |= nonrot;
|
||||
pending = atomic_read(&rdev->nr_pending);
|
||||
if (min_pending > pending && nonrot) {
|
||||
@@ -888,7 +895,7 @@ static void flush_pending_writes(struct r10conf *conf)
|
||||
if (test_bit(Faulty, &rdev->flags)) {
|
||||
bio_io_error(bio);
|
||||
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
|
||||
!blk_queue_discard(bio->bi_bdev->bd_disk->queue)))
|
||||
!bdev_max_discard_sectors(bio->bi_bdev)))
|
||||
/* Just ignore it */
|
||||
bio_endio(bio);
|
||||
else
|
||||
@@ -1083,7 +1090,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
|
||||
if (test_bit(Faulty, &rdev->flags)) {
|
||||
bio_io_error(bio);
|
||||
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
|
||||
!blk_queue_discard(bio->bi_bdev->bd_disk->queue)))
|
||||
!bdev_max_discard_sectors(bio->bi_bdev)))
|
||||
/* Just ignore it */
|
||||
bio_endio(bio);
|
||||
else
|
||||
@@ -1963,32 +1970,40 @@ static int enough(struct r10conf *conf, int ignore)
|
||||
_enough(conf, 1, ignore);
|
||||
}
|
||||
|
||||
/**
|
||||
* raid10_error() - RAID10 error handler.
|
||||
* @mddev: affected md device.
|
||||
* @rdev: member device to fail.
|
||||
*
|
||||
* The routine acknowledges &rdev failure and determines new @mddev state.
|
||||
* If it failed, then:
|
||||
* - &MD_BROKEN flag is set in &mddev->flags.
|
||||
* Otherwise, it must be degraded:
|
||||
* - recovery is interrupted.
|
||||
* - &mddev->degraded is bumped.
|
||||
|
||||
* @rdev is marked as &Faulty excluding case when array is failed and
|
||||
* &mddev->fail_last_dev is off.
|
||||
*/
|
||||
static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
char b[BDEVNAME_SIZE];
|
||||
struct r10conf *conf = mddev->private;
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* If it is not operational, then we have already marked it as dead
|
||||
* else if it is the last working disks with "fail_last_dev == false",
|
||||
* ignore the error, let the next level up know.
|
||||
* else mark the drive as failed
|
||||
*/
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
|
||||
&& !enough(conf, rdev->raid_disk)) {
|
||||
/*
|
||||
* Don't fail the drive, just return an IO error.
|
||||
*/
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
return;
|
||||
|
||||
if (test_bit(In_sync, &rdev->flags) && !enough(conf, rdev->raid_disk)) {
|
||||
set_bit(MD_BROKEN, &mddev->flags);
|
||||
|
||||
if (!mddev->fail_last_dev) {
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (test_and_clear_bit(In_sync, &rdev->flags))
|
||||
mddev->degraded++;
|
||||
/*
|
||||
* If recovery is running, make sure it aborts.
|
||||
*/
|
||||
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
set_bit(Blocked, &rdev->flags);
|
||||
set_bit(Faulty, &rdev->flags);
|
||||
@@ -2144,8 +2159,6 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
rcu_assign_pointer(p->rdev, rdev);
|
||||
break;
|
||||
}
|
||||
if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
|
||||
|
||||
print_conf(conf);
|
||||
return err;
|
||||
@@ -4069,7 +4082,6 @@ static int raid10_run(struct mddev *mddev)
|
||||
sector_t size;
|
||||
sector_t min_offset_diff = 0;
|
||||
int first = 1;
|
||||
bool discard_supported = false;
|
||||
|
||||
if (mddev_init_writes_pending(mddev) < 0)
|
||||
return -ENOMEM;
|
||||
@@ -4140,20 +4152,9 @@ static int raid10_run(struct mddev *mddev)
|
||||
rdev->data_offset << 9);
|
||||
|
||||
disk->head_position = 0;
|
||||
|
||||
if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
|
||||
discard_supported = true;
|
||||
first = 0;
|
||||
}
|
||||
|
||||
if (mddev->queue) {
|
||||
if (discard_supported)
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD,
|
||||
mddev->queue);
|
||||
else
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
|
||||
mddev->queue);
|
||||
}
|
||||
/* need to check that every block has at least one working mirror */
|
||||
if (!enough(conf, -1)) {
|
||||
pr_err("md/raid10:%s: not enough operational mirrors.\n",
|
||||
|
||||
@@ -1318,7 +1318,7 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
|
||||
|
||||
r5l_write_super(log, end);
|
||||
|
||||
if (!blk_queue_discard(bdev_get_queue(bdev)))
|
||||
if (!bdev_max_discard_sectors(bdev))
|
||||
return;
|
||||
|
||||
mddev = log->rdev->mddev;
|
||||
@@ -1344,14 +1344,14 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
|
||||
if (log->last_checkpoint < end) {
|
||||
blkdev_issue_discard(bdev,
|
||||
log->last_checkpoint + log->rdev->data_offset,
|
||||
end - log->last_checkpoint, GFP_NOIO, 0);
|
||||
end - log->last_checkpoint, GFP_NOIO);
|
||||
} else {
|
||||
blkdev_issue_discard(bdev,
|
||||
log->last_checkpoint + log->rdev->data_offset,
|
||||
log->device_size - log->last_checkpoint,
|
||||
GFP_NOIO, 0);
|
||||
GFP_NOIO);
|
||||
blkdev_issue_discard(bdev, log->rdev->data_offset, end,
|
||||
GFP_NOIO, 0);
|
||||
GFP_NOIO);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -883,7 +883,9 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
|
||||
(unsigned long long)r_sector, dd_idx,
|
||||
(unsigned long long)sector);
|
||||
|
||||
rdev = conf->disks[dd_idx].rdev;
|
||||
/* Array has not started so rcu dereference is safe */
|
||||
rdev = rcu_dereference_protected(
|
||||
conf->disks[dd_idx].rdev, 1);
|
||||
if (!rdev || (!test_bit(In_sync, &rdev->flags) &&
|
||||
sector >= rdev->recovery_offset)) {
|
||||
pr_debug("%s:%*s data member disk %d missing\n",
|
||||
@@ -934,7 +936,10 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
|
||||
parity_sector = raid5_compute_sector(conf, r_sector_first + i,
|
||||
0, &disk, &sh);
|
||||
BUG_ON(sh.pd_idx != le32_to_cpu(e->parity_disk));
|
||||
parity_rdev = conf->disks[sh.pd_idx].rdev;
|
||||
|
||||
/* Array has not started so rcu dereference is safe */
|
||||
parity_rdev = rcu_dereference_protected(
|
||||
conf->disks[sh.pd_idx].rdev, 1);
|
||||
|
||||
BUG_ON(parity_rdev->bdev->bd_dev != log->rdev->bdev->bd_dev);
|
||||
pr_debug("%s:%*s write parity at sector %llu, disk %s\n",
|
||||
@@ -1404,7 +1409,9 @@ int ppl_init_log(struct r5conf *conf)
|
||||
|
||||
for (i = 0; i < ppl_conf->count; i++) {
|
||||
struct ppl_log *log = &ppl_conf->child_logs[i];
|
||||
struct md_rdev *rdev = conf->disks[i].rdev;
|
||||
/* Array has not started so rcu dereference is safe */
|
||||
struct md_rdev *rdev =
|
||||
rcu_dereference_protected(conf->disks[i].rdev, 1);
|
||||
|
||||
mutex_init(&log->io_mutex);
|
||||
spin_lock_init(&log->io_list_lock);
|
||||
|
||||
@@ -79,18 +79,21 @@ static inline int stripe_hash_locks_hash(struct r5conf *conf, sector_t sect)
|
||||
}
|
||||
|
||||
static inline void lock_device_hash_lock(struct r5conf *conf, int hash)
|
||||
__acquires(&conf->device_lock)
|
||||
{
|
||||
spin_lock_irq(conf->hash_locks + hash);
|
||||
spin_lock(&conf->device_lock);
|
||||
}
|
||||
|
||||
static inline void unlock_device_hash_lock(struct r5conf *conf, int hash)
|
||||
__releases(&conf->device_lock)
|
||||
{
|
||||
spin_unlock(&conf->device_lock);
|
||||
spin_unlock_irq(conf->hash_locks + hash);
|
||||
}
|
||||
|
||||
static inline void lock_all_device_hash_locks_irq(struct r5conf *conf)
|
||||
__acquires(&conf->device_lock)
|
||||
{
|
||||
int i;
|
||||
spin_lock_irq(conf->hash_locks);
|
||||
@@ -100,6 +103,7 @@ static inline void lock_all_device_hash_locks_irq(struct r5conf *conf)
|
||||
}
|
||||
|
||||
static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf)
|
||||
__releases(&conf->device_lock)
|
||||
{
|
||||
int i;
|
||||
spin_unlock(&conf->device_lock);
|
||||
@@ -164,6 +168,7 @@ static bool stripe_is_lowprio(struct stripe_head *sh)
|
||||
}
|
||||
|
||||
static void raid5_wakeup_stripe_thread(struct stripe_head *sh)
|
||||
__must_hold(&sh->raid_conf->device_lock)
|
||||
{
|
||||
struct r5conf *conf = sh->raid_conf;
|
||||
struct r5worker_group *group;
|
||||
@@ -211,6 +216,7 @@ static void raid5_wakeup_stripe_thread(struct stripe_head *sh)
|
||||
|
||||
static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
|
||||
struct list_head *temp_inactive_list)
|
||||
__must_hold(&conf->device_lock)
|
||||
{
|
||||
int i;
|
||||
int injournal = 0; /* number of date pages with R5_InJournal */
|
||||
@@ -296,6 +302,7 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
|
||||
|
||||
static void __release_stripe(struct r5conf *conf, struct stripe_head *sh,
|
||||
struct list_head *temp_inactive_list)
|
||||
__must_hold(&conf->device_lock)
|
||||
{
|
||||
if (atomic_dec_and_test(&sh->count))
|
||||
do_release_stripe(conf, sh, temp_inactive_list);
|
||||
@@ -350,9 +357,9 @@ static void release_inactive_stripe_list(struct r5conf *conf,
|
||||
}
|
||||
}
|
||||
|
||||
/* should hold conf->device_lock already */
|
||||
static int release_stripe_list(struct r5conf *conf,
|
||||
struct list_head *temp_inactive_list)
|
||||
__must_hold(&conf->device_lock)
|
||||
{
|
||||
struct stripe_head *sh, *t;
|
||||
int count = 0;
|
||||
@@ -629,6 +636,10 @@ static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector,
|
||||
* This is because some failed devices may only affect one
|
||||
* of the two sections, and some non-in_sync devices may
|
||||
* be insync in the section most affected by failed devices.
|
||||
*
|
||||
* Most calls to this function hold &conf->device_lock. Calls
|
||||
* in raid5_run() do not require the lock as no other threads
|
||||
* have been started yet.
|
||||
*/
|
||||
int raid5_calc_degraded(struct r5conf *conf)
|
||||
{
|
||||
@@ -686,17 +697,17 @@ int raid5_calc_degraded(struct r5conf *conf)
|
||||
return degraded;
|
||||
}
|
||||
|
||||
static int has_failed(struct r5conf *conf)
|
||||
static bool has_failed(struct r5conf *conf)
|
||||
{
|
||||
int degraded;
|
||||
int degraded = conf->mddev->degraded;
|
||||
|
||||
if (conf->mddev->reshape_position == MaxSector)
|
||||
return conf->mddev->degraded > conf->max_degraded;
|
||||
if (test_bit(MD_BROKEN, &conf->mddev->flags))
|
||||
return true;
|
||||
|
||||
degraded = raid5_calc_degraded(conf);
|
||||
if (degraded > conf->max_degraded)
|
||||
return 1;
|
||||
return 0;
|
||||
if (conf->mddev->reshape_position != MaxSector)
|
||||
degraded = raid5_calc_degraded(conf);
|
||||
|
||||
return degraded > conf->max_degraded;
|
||||
}
|
||||
|
||||
struct stripe_head *
|
||||
@@ -2648,6 +2659,28 @@ static void shrink_stripes(struct r5conf *conf)
|
||||
conf->slab_cache = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* This helper wraps rcu_dereference_protected() and can be used when
|
||||
* it is known that the nr_pending of the rdev is elevated.
|
||||
*/
|
||||
static struct md_rdev *rdev_pend_deref(struct md_rdev __rcu *rdev)
|
||||
{
|
||||
return rcu_dereference_protected(rdev,
|
||||
atomic_read(&rcu_access_pointer(rdev)->nr_pending));
|
||||
}
|
||||
|
||||
/*
|
||||
* This helper wraps rcu_dereference_protected() and should be used
|
||||
* when it is known that the mddev_lock() is held. This is safe
|
||||
* seeing raid5_remove_disk() has the same lock held.
|
||||
*/
|
||||
static struct md_rdev *rdev_mdlock_deref(struct mddev *mddev,
|
||||
struct md_rdev __rcu *rdev)
|
||||
{
|
||||
return rcu_dereference_protected(rdev,
|
||||
lockdep_is_held(&mddev->reconfig_mutex));
|
||||
}
|
||||
|
||||
static void raid5_end_read_request(struct bio * bi)
|
||||
{
|
||||
struct stripe_head *sh = bi->bi_private;
|
||||
@@ -2674,9 +2707,9 @@ static void raid5_end_read_request(struct bio * bi)
|
||||
* In that case it moved down to 'rdev'.
|
||||
* rdev is not removed until all requests are finished.
|
||||
*/
|
||||
rdev = conf->disks[i].replacement;
|
||||
rdev = rdev_pend_deref(conf->disks[i].replacement);
|
||||
if (!rdev)
|
||||
rdev = conf->disks[i].rdev;
|
||||
rdev = rdev_pend_deref(conf->disks[i].rdev);
|
||||
|
||||
if (use_new_offset(conf, sh))
|
||||
s = sh->sector + rdev->new_data_offset;
|
||||
@@ -2790,11 +2823,11 @@ static void raid5_end_write_request(struct bio *bi)
|
||||
|
||||
for (i = 0 ; i < disks; i++) {
|
||||
if (bi == &sh->dev[i].req) {
|
||||
rdev = conf->disks[i].rdev;
|
||||
rdev = rdev_pend_deref(conf->disks[i].rdev);
|
||||
break;
|
||||
}
|
||||
if (bi == &sh->dev[i].rreq) {
|
||||
rdev = conf->disks[i].replacement;
|
||||
rdev = rdev_pend_deref(conf->disks[i].replacement);
|
||||
if (rdev)
|
||||
replacement = 1;
|
||||
else
|
||||
@@ -2802,7 +2835,7 @@ static void raid5_end_write_request(struct bio *bi)
|
||||
* replaced it. rdev is not removed
|
||||
* until all requests are finished.
|
||||
*/
|
||||
rdev = conf->disks[i].rdev;
|
||||
rdev = rdev_pend_deref(conf->disks[i].rdev);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -2863,34 +2896,31 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
|
||||
unsigned long flags;
|
||||
pr_debug("raid456: error called\n");
|
||||
|
||||
pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n",
|
||||
mdname(mddev), bdevname(rdev->bdev, b));
|
||||
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
|
||||
if (test_bit(In_sync, &rdev->flags) &&
|
||||
mddev->degraded == conf->max_degraded) {
|
||||
/*
|
||||
* Don't allow to achieve failed state
|
||||
* Don't try to recover this device
|
||||
*/
|
||||
conf->recovery_disabled = mddev->recovery_disabled;
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
set_bit(Faulty, &rdev->flags);
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
mddev->degraded = raid5_calc_degraded(conf);
|
||||
|
||||
if (has_failed(conf)) {
|
||||
set_bit(MD_BROKEN, &conf->mddev->flags);
|
||||
conf->recovery_disabled = mddev->recovery_disabled;
|
||||
|
||||
pr_crit("md/raid:%s: Cannot continue operation (%d/%d failed).\n",
|
||||
mdname(mddev), mddev->degraded, conf->raid_disks);
|
||||
} else {
|
||||
pr_crit("md/raid:%s: Operation continuing on %d devices.\n",
|
||||
mdname(mddev), conf->raid_disks - mddev->degraded);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
|
||||
set_bit(Blocked, &rdev->flags);
|
||||
set_mask_bits(&mddev->sb_flags, 0,
|
||||
BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
|
||||
pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n"
|
||||
"md/raid:%s: Operation continuing on %d devices.\n",
|
||||
mdname(mddev),
|
||||
bdevname(rdev->bdev, b),
|
||||
mdname(mddev),
|
||||
conf->raid_disks - mddev->degraded);
|
||||
r5c_update_on_rdev_error(mddev, rdev);
|
||||
}
|
||||
|
||||
@@ -5213,23 +5243,23 @@ finish:
|
||||
struct r5dev *dev = &sh->dev[i];
|
||||
if (test_and_clear_bit(R5_WriteError, &dev->flags)) {
|
||||
/* We own a safe reference to the rdev */
|
||||
rdev = conf->disks[i].rdev;
|
||||
rdev = rdev_pend_deref(conf->disks[i].rdev);
|
||||
if (!rdev_set_badblocks(rdev, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf), 0))
|
||||
md_error(conf->mddev, rdev);
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
}
|
||||
if (test_and_clear_bit(R5_MadeGood, &dev->flags)) {
|
||||
rdev = conf->disks[i].rdev;
|
||||
rdev = rdev_pend_deref(conf->disks[i].rdev);
|
||||
rdev_clear_badblocks(rdev, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf), 0);
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
}
|
||||
if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) {
|
||||
rdev = conf->disks[i].replacement;
|
||||
rdev = rdev_pend_deref(conf->disks[i].replacement);
|
||||
if (!rdev)
|
||||
/* rdev have been moved down */
|
||||
rdev = conf->disks[i].rdev;
|
||||
rdev = rdev_pend_deref(conf->disks[i].rdev);
|
||||
rdev_clear_badblocks(rdev, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf), 0);
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
@@ -5256,6 +5286,7 @@ finish:
|
||||
}
|
||||
|
||||
static void raid5_activate_delayed(struct r5conf *conf)
|
||||
__must_hold(&conf->device_lock)
|
||||
{
|
||||
if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) {
|
||||
while (!list_empty(&conf->delayed_list)) {
|
||||
@@ -5273,9 +5304,9 @@ static void raid5_activate_delayed(struct r5conf *conf)
|
||||
}
|
||||
|
||||
static void activate_bit_delay(struct r5conf *conf,
|
||||
struct list_head *temp_inactive_list)
|
||||
struct list_head *temp_inactive_list)
|
||||
__must_hold(&conf->device_lock)
|
||||
{
|
||||
/* device_lock is held */
|
||||
struct list_head head;
|
||||
list_add(&head, &conf->bitmap_list);
|
||||
list_del_init(&conf->bitmap_list);
|
||||
@@ -5500,6 +5531,7 @@ static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
|
||||
* handle_list.
|
||||
*/
|
||||
static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group)
|
||||
__must_hold(&conf->device_lock)
|
||||
{
|
||||
struct stripe_head *sh, *tmp;
|
||||
struct list_head *handle_list = NULL;
|
||||
@@ -6288,7 +6320,7 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n
|
||||
*/
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < conf->raid_disks; i++) {
|
||||
struct md_rdev *rdev = READ_ONCE(conf->disks[i].rdev);
|
||||
struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
|
||||
|
||||
if (rdev == NULL || test_bit(Faulty, &rdev->flags))
|
||||
still_degraded = 1;
|
||||
@@ -6371,8 +6403,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio,
|
||||
static int handle_active_stripes(struct r5conf *conf, int group,
|
||||
struct r5worker *worker,
|
||||
struct list_head *temp_inactive_list)
|
||||
__releases(&conf->device_lock)
|
||||
__acquires(&conf->device_lock)
|
||||
__must_hold(&conf->device_lock)
|
||||
{
|
||||
struct stripe_head *batch[MAX_STRIPE_BATCH], *sh;
|
||||
int i, batch_size = 0, hash;
|
||||
@@ -7166,7 +7197,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
||||
int i;
|
||||
int group_cnt;
|
||||
struct r5worker_group *new_group;
|
||||
int ret;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
if (mddev->new_level != 5
|
||||
&& mddev->new_level != 4
|
||||
@@ -7225,6 +7256,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
||||
spin_lock_init(&conf->device_lock);
|
||||
seqcount_spinlock_init(&conf->gen_lock, &conf->device_lock);
|
||||
mutex_init(&conf->cache_size_mutex);
|
||||
|
||||
init_waitqueue_head(&conf->wait_for_quiescent);
|
||||
init_waitqueue_head(&conf->wait_for_stripe);
|
||||
init_waitqueue_head(&conf->wait_for_overlap);
|
||||
@@ -7242,7 +7274,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if (test_bit(Journal, &rdev->flags))
|
||||
continue;
|
||||
if (blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
|
||||
if (bdev_nonrot(rdev->bdev)) {
|
||||
conf->batch_bio_dispatch = false;
|
||||
break;
|
||||
}
|
||||
@@ -7302,11 +7334,13 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
||||
|
||||
conf->level = mddev->new_level;
|
||||
conf->chunk_sectors = mddev->new_chunk_sectors;
|
||||
if (raid5_alloc_percpu(conf) != 0)
|
||||
ret = raid5_alloc_percpu(conf);
|
||||
if (ret)
|
||||
goto abort;
|
||||
|
||||
pr_debug("raid456: run(%s) called.\n", mdname(mddev));
|
||||
|
||||
ret = -EIO;
|
||||
rdev_for_each(rdev, mddev) {
|
||||
raid_disk = rdev->raid_disk;
|
||||
if (raid_disk >= max_disks
|
||||
@@ -7317,11 +7351,11 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
||||
if (test_bit(Replacement, &rdev->flags)) {
|
||||
if (disk->replacement)
|
||||
goto abort;
|
||||
disk->replacement = rdev;
|
||||
RCU_INIT_POINTER(disk->replacement, rdev);
|
||||
} else {
|
||||
if (disk->rdev)
|
||||
goto abort;
|
||||
disk->rdev = rdev;
|
||||
RCU_INIT_POINTER(disk->rdev, rdev);
|
||||
}
|
||||
|
||||
if (test_bit(In_sync, &rdev->flags)) {
|
||||
@@ -7370,6 +7404,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
||||
if (grow_stripes(conf, conf->min_nr_stripes)) {
|
||||
pr_warn("md/raid:%s: couldn't allocate %dkB for buffers\n",
|
||||
mdname(mddev), memory);
|
||||
ret = -ENOMEM;
|
||||
goto abort;
|
||||
} else
|
||||
pr_debug("md/raid:%s: allocated %dkB\n", mdname(mddev), memory);
|
||||
@@ -7383,7 +7418,8 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
||||
conf->shrinker.count_objects = raid5_cache_count;
|
||||
conf->shrinker.batch = 128;
|
||||
conf->shrinker.flags = 0;
|
||||
if (register_shrinker(&conf->shrinker)) {
|
||||
ret = register_shrinker(&conf->shrinker);
|
||||
if (ret) {
|
||||
pr_warn("md/raid:%s: couldn't register shrinker.\n",
|
||||
mdname(mddev));
|
||||
goto abort;
|
||||
@@ -7394,17 +7430,16 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
||||
if (!conf->thread) {
|
||||
pr_warn("md/raid:%s: couldn't allocate thread.\n",
|
||||
mdname(mddev));
|
||||
ret = -ENOMEM;
|
||||
goto abort;
|
||||
}
|
||||
|
||||
return conf;
|
||||
|
||||
abort:
|
||||
if (conf) {
|
||||
if (conf)
|
||||
free_conf(conf);
|
||||
return ERR_PTR(-EIO);
|
||||
} else
|
||||
return ERR_PTR(-ENOMEM);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded)
|
||||
@@ -7621,17 +7656,18 @@ static int raid5_run(struct mddev *mddev)
|
||||
|
||||
for (i = 0; i < conf->raid_disks && conf->previous_raid_disks;
|
||||
i++) {
|
||||
rdev = conf->disks[i].rdev;
|
||||
rdev = rdev_mdlock_deref(mddev, conf->disks[i].rdev);
|
||||
if (!rdev && conf->disks[i].replacement) {
|
||||
/* The replacement is all we have yet */
|
||||
rdev = conf->disks[i].replacement;
|
||||
rdev = rdev_mdlock_deref(mddev,
|
||||
conf->disks[i].replacement);
|
||||
conf->disks[i].replacement = NULL;
|
||||
clear_bit(Replacement, &rdev->flags);
|
||||
conf->disks[i].rdev = rdev;
|
||||
rcu_assign_pointer(conf->disks[i].rdev, rdev);
|
||||
}
|
||||
if (!rdev)
|
||||
continue;
|
||||
if (conf->disks[i].replacement &&
|
||||
if (rcu_access_pointer(conf->disks[i].replacement) &&
|
||||
conf->reshape_progress != MaxSector) {
|
||||
/* replacements and reshape simply do not mix. */
|
||||
pr_warn("md: cannot handle concurrent replacement and reshape.\n");
|
||||
@@ -7749,7 +7785,6 @@ static int raid5_run(struct mddev *mddev)
|
||||
*/
|
||||
stripe = stripe * PAGE_SIZE;
|
||||
stripe = roundup_pow_of_two(stripe);
|
||||
mddev->queue->limits.discard_alignment = stripe;
|
||||
mddev->queue->limits.discard_granularity = stripe;
|
||||
|
||||
blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
|
||||
@@ -7776,14 +7811,10 @@ static int raid5_run(struct mddev *mddev)
|
||||
* A better idea might be to turn DISCARD into WRITE_ZEROES
|
||||
* requests, as that is required to be safe.
|
||||
*/
|
||||
if (devices_handle_discard_safely &&
|
||||
mddev->queue->limits.max_discard_sectors >= (stripe >> 9) &&
|
||||
mddev->queue->limits.discard_granularity >= stripe)
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD,
|
||||
mddev->queue);
|
||||
else
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
|
||||
mddev->queue);
|
||||
if (!devices_handle_discard_safely ||
|
||||
mddev->queue->limits.max_discard_sectors < (stripe >> 9) ||
|
||||
mddev->queue->limits.discard_granularity < stripe)
|
||||
blk_queue_max_discard_sectors(mddev->queue, 0);
|
||||
|
||||
blk_queue_max_hw_sectors(mddev->queue, UINT_MAX);
|
||||
}
|
||||
@@ -7832,8 +7863,8 @@ static void raid5_status(struct seq_file *seq, struct mddev *mddev)
|
||||
|
||||
static void print_raid5_conf (struct r5conf *conf)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
int i;
|
||||
struct disk_info *tmp;
|
||||
|
||||
pr_debug("RAID conf printout:\n");
|
||||
if (!conf) {
|
||||
@@ -7844,50 +7875,54 @@ static void print_raid5_conf (struct r5conf *conf)
|
||||
conf->raid_disks,
|
||||
conf->raid_disks - conf->mddev->degraded);
|
||||
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < conf->raid_disks; i++) {
|
||||
char b[BDEVNAME_SIZE];
|
||||
tmp = conf->disks + i;
|
||||
if (tmp->rdev)
|
||||
rdev = rcu_dereference(conf->disks[i].rdev);
|
||||
if (rdev)
|
||||
pr_debug(" disk %d, o:%d, dev:%s\n",
|
||||
i, !test_bit(Faulty, &tmp->rdev->flags),
|
||||
bdevname(tmp->rdev->bdev, b));
|
||||
i, !test_bit(Faulty, &rdev->flags),
|
||||
bdevname(rdev->bdev, b));
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static int raid5_spare_active(struct mddev *mddev)
|
||||
{
|
||||
int i;
|
||||
struct r5conf *conf = mddev->private;
|
||||
struct disk_info *tmp;
|
||||
struct md_rdev *rdev, *replacement;
|
||||
int count = 0;
|
||||
unsigned long flags;
|
||||
|
||||
for (i = 0; i < conf->raid_disks; i++) {
|
||||
tmp = conf->disks + i;
|
||||
if (tmp->replacement
|
||||
&& tmp->replacement->recovery_offset == MaxSector
|
||||
&& !test_bit(Faulty, &tmp->replacement->flags)
|
||||
&& !test_and_set_bit(In_sync, &tmp->replacement->flags)) {
|
||||
rdev = rdev_mdlock_deref(mddev, conf->disks[i].rdev);
|
||||
replacement = rdev_mdlock_deref(mddev,
|
||||
conf->disks[i].replacement);
|
||||
if (replacement
|
||||
&& replacement->recovery_offset == MaxSector
|
||||
&& !test_bit(Faulty, &replacement->flags)
|
||||
&& !test_and_set_bit(In_sync, &replacement->flags)) {
|
||||
/* Replacement has just become active. */
|
||||
if (!tmp->rdev
|
||||
|| !test_and_clear_bit(In_sync, &tmp->rdev->flags))
|
||||
if (!rdev
|
||||
|| !test_and_clear_bit(In_sync, &rdev->flags))
|
||||
count++;
|
||||
if (tmp->rdev) {
|
||||
if (rdev) {
|
||||
/* Replaced device not technically faulty,
|
||||
* but we need to be sure it gets removed
|
||||
* and never re-added.
|
||||
*/
|
||||
set_bit(Faulty, &tmp->rdev->flags);
|
||||
set_bit(Faulty, &rdev->flags);
|
||||
sysfs_notify_dirent_safe(
|
||||
tmp->rdev->sysfs_state);
|
||||
rdev->sysfs_state);
|
||||
}
|
||||
sysfs_notify_dirent_safe(tmp->replacement->sysfs_state);
|
||||
} else if (tmp->rdev
|
||||
&& tmp->rdev->recovery_offset == MaxSector
|
||||
&& !test_bit(Faulty, &tmp->rdev->flags)
|
||||
&& !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
|
||||
sysfs_notify_dirent_safe(replacement->sysfs_state);
|
||||
} else if (rdev
|
||||
&& rdev->recovery_offset == MaxSector
|
||||
&& !test_bit(Faulty, &rdev->flags)
|
||||
&& !test_and_set_bit(In_sync, &rdev->flags)) {
|
||||
count++;
|
||||
sysfs_notify_dirent_safe(tmp->rdev->sysfs_state);
|
||||
sysfs_notify_dirent_safe(rdev->sysfs_state);
|
||||
}
|
||||
}
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
@@ -7902,8 +7937,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
struct r5conf *conf = mddev->private;
|
||||
int err = 0;
|
||||
int number = rdev->raid_disk;
|
||||
struct md_rdev **rdevp;
|
||||
struct md_rdev __rcu **rdevp;
|
||||
struct disk_info *p = conf->disks + number;
|
||||
struct md_rdev *tmp;
|
||||
|
||||
print_raid5_conf(conf);
|
||||
if (test_bit(Journal, &rdev->flags) && conf->log) {
|
||||
@@ -7921,9 +7957,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
log_exit(conf);
|
||||
return 0;
|
||||
}
|
||||
if (rdev == p->rdev)
|
||||
if (rdev == rcu_access_pointer(p->rdev))
|
||||
rdevp = &p->rdev;
|
||||
else if (rdev == p->replacement)
|
||||
else if (rdev == rcu_access_pointer(p->replacement))
|
||||
rdevp = &p->replacement;
|
||||
else
|
||||
return 0;
|
||||
@@ -7943,18 +7979,20 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
if (!test_bit(Faulty, &rdev->flags) &&
|
||||
mddev->recovery_disabled != conf->recovery_disabled &&
|
||||
!has_failed(conf) &&
|
||||
(!p->replacement || p->replacement == rdev) &&
|
||||
(!rcu_access_pointer(p->replacement) ||
|
||||
rcu_access_pointer(p->replacement) == rdev) &&
|
||||
number < conf->raid_disks) {
|
||||
err = -EBUSY;
|
||||
goto abort;
|
||||
}
|
||||
*rdevp = NULL;
|
||||
if (!test_bit(RemoveSynchronized, &rdev->flags)) {
|
||||
lockdep_assert_held(&mddev->reconfig_mutex);
|
||||
synchronize_rcu();
|
||||
if (atomic_read(&rdev->nr_pending)) {
|
||||
/* lost the race, try later */
|
||||
err = -EBUSY;
|
||||
*rdevp = rdev;
|
||||
rcu_assign_pointer(*rdevp, rdev);
|
||||
}
|
||||
}
|
||||
if (!err) {
|
||||
@@ -7962,17 +8000,19 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
if (err)
|
||||
goto abort;
|
||||
}
|
||||
if (p->replacement) {
|
||||
|
||||
tmp = rcu_access_pointer(p->replacement);
|
||||
if (tmp) {
|
||||
/* We must have just cleared 'rdev' */
|
||||
p->rdev = p->replacement;
|
||||
clear_bit(Replacement, &p->replacement->flags);
|
||||
rcu_assign_pointer(p->rdev, tmp);
|
||||
clear_bit(Replacement, &tmp->flags);
|
||||
smp_mb(); /* Make sure other CPUs may see both as identical
|
||||
* but will never see neither - if they are careful
|
||||
*/
|
||||
p->replacement = NULL;
|
||||
rcu_assign_pointer(p->replacement, NULL);
|
||||
|
||||
if (!err)
|
||||
err = log_modify(conf, p->rdev, true);
|
||||
err = log_modify(conf, tmp, true);
|
||||
}
|
||||
|
||||
clear_bit(WantReplacement, &rdev->flags);
|
||||
@@ -7988,6 +8028,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
int ret, err = -EEXIST;
|
||||
int disk;
|
||||
struct disk_info *p;
|
||||
struct md_rdev *tmp;
|
||||
int first = 0;
|
||||
int last = conf->raid_disks - 1;
|
||||
|
||||
@@ -8045,7 +8086,8 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
}
|
||||
for (disk = first; disk <= last; disk++) {
|
||||
p = conf->disks + disk;
|
||||
if (test_bit(WantReplacement, &p->rdev->flags) &&
|
||||
tmp = rdev_mdlock_deref(mddev, p->rdev);
|
||||
if (test_bit(WantReplacement, &tmp->flags) &&
|
||||
p->replacement == NULL) {
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
set_bit(Replacement, &rdev->flags);
|
||||
@@ -8336,6 +8378,7 @@ static void end_reshape(struct r5conf *conf)
|
||||
static void raid5_finish_reshape(struct mddev *mddev)
|
||||
{
|
||||
struct r5conf *conf = mddev->private;
|
||||
struct md_rdev *rdev;
|
||||
|
||||
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
|
||||
|
||||
@@ -8347,10 +8390,12 @@ static void raid5_finish_reshape(struct mddev *mddev)
|
||||
for (d = conf->raid_disks ;
|
||||
d < conf->raid_disks - mddev->delta_disks;
|
||||
d++) {
|
||||
struct md_rdev *rdev = conf->disks[d].rdev;
|
||||
rdev = rdev_mdlock_deref(mddev,
|
||||
conf->disks[d].rdev);
|
||||
if (rdev)
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
rdev = conf->disks[d].replacement;
|
||||
rdev = rdev_mdlock_deref(mddev,
|
||||
conf->disks[d].replacement);
|
||||
if (rdev)
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
}
|
||||
|
||||
@@ -473,7 +473,8 @@ enum {
|
||||
*/
|
||||
|
||||
struct disk_info {
|
||||
struct md_rdev *rdev, *replacement;
|
||||
struct md_rdev __rcu *rdev;
|
||||
struct md_rdev __rcu *replacement;
|
||||
struct page *extra_page; /* extra page to use in prexor */
|
||||
};
|
||||
|
||||
@@ -560,6 +561,16 @@ struct r5pending_data {
|
||||
struct bio_list bios;
|
||||
};
|
||||
|
||||
struct raid5_percpu {
|
||||
struct page *spare_page; /* Used when checking P/Q in raid6 */
|
||||
void *scribble; /* space for constructing buffer
|
||||
* lists and performing address
|
||||
* conversions
|
||||
*/
|
||||
int scribble_obj_size;
|
||||
local_lock_t lock;
|
||||
};
|
||||
|
||||
struct r5conf {
|
||||
struct hlist_head *stripe_hashtbl;
|
||||
/* only protect corresponding hash list and inactive_list */
|
||||
@@ -635,15 +646,7 @@ struct r5conf {
|
||||
*/
|
||||
int recovery_disabled;
|
||||
/* per cpu variables */
|
||||
struct raid5_percpu {
|
||||
struct page *spare_page; /* Used when checking P/Q in raid6 */
|
||||
void *scribble; /* space for constructing buffer
|
||||
* lists and performing address
|
||||
* conversions
|
||||
*/
|
||||
int scribble_obj_size;
|
||||
local_lock_t lock;
|
||||
} __percpu *percpu;
|
||||
struct raid5_percpu __percpu *percpu;
|
||||
int scribble_disks;
|
||||
int scribble_sectors;
|
||||
struct hlist_node node;
|
||||
|
||||
@@ -183,14 +183,13 @@ static void mmc_queue_setup_discard(struct request_queue *q,
|
||||
if (!max_discard)
|
||||
return;
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
|
||||
blk_queue_max_discard_sectors(q, max_discard);
|
||||
q->limits.discard_granularity = card->pref_erase << 9;
|
||||
/* granularity must not be greater than max. discard */
|
||||
if (card->pref_erase > max_discard)
|
||||
q->limits.discard_granularity = SECTOR_SIZE;
|
||||
if (mmc_can_secure_erase_trim(card))
|
||||
blk_queue_flag_set(QUEUE_FLAG_SECERASE, q);
|
||||
blk_queue_max_secure_erase_sectors(q, max_discard);
|
||||
}
|
||||
|
||||
static unsigned short mmc_get_max_segments(struct mmc_host *host)
|
||||
|
||||
@@ -377,7 +377,6 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, new->rq);
|
||||
|
||||
if (tr->discard) {
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, new->rq);
|
||||
blk_queue_max_discard_sectors(new->rq, UINT_MAX);
|
||||
new->rq->limits.discard_granularity = tr->blksize;
|
||||
}
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
* Copyright (c) 2022, Oracle and/or its affiliates
|
||||
*/
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
#include "nvme.h"
|
||||
|
||||
#ifdef CONFIG_NVME_VERBOSE_ERRORS
|
||||
@@ -92,6 +91,7 @@ static const char * const nvme_statuses[] = {
|
||||
[NVME_SC_NS_WRITE_PROTECTED] = "Namespace is Write Protected",
|
||||
[NVME_SC_CMD_INTERRUPTED] = "Command Interrupted",
|
||||
[NVME_SC_TRANSIENT_TR_ERR] = "Transient Transport Error",
|
||||
[NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY] = "Admin Command Media Not Ready",
|
||||
[NVME_SC_INVALID_IO_CMD_SET] = "Invalid IO Command Set",
|
||||
[NVME_SC_LBA_RANGE] = "LBA Out of Range",
|
||||
[NVME_SC_CAP_EXCEEDED] = "Capacity Exceeded",
|
||||
@@ -155,10 +155,13 @@ static const char * const nvme_statuses[] = {
|
||||
[NVME_SC_COMPARE_FAILED] = "Compare Failure",
|
||||
[NVME_SC_ACCESS_DENIED] = "Access Denied",
|
||||
[NVME_SC_UNWRITTEN_BLOCK] = "Deallocated or Unwritten Logical Block",
|
||||
[NVME_SC_INTERNAL_PATH_ERROR] = "Internal Pathing Error",
|
||||
[NVME_SC_ANA_PERSISTENT_LOSS] = "Asymmetric Access Persistent Loss",
|
||||
[NVME_SC_ANA_INACCESSIBLE] = "Asymmetric Access Inaccessible",
|
||||
[NVME_SC_ANA_TRANSITION] = "Asymmetric Access Transition",
|
||||
[NVME_SC_CTRL_PATH_ERROR] = "Controller Pathing Error",
|
||||
[NVME_SC_HOST_PATH_ERROR] = "Host Pathing Error",
|
||||
[NVME_SC_HOST_ABORTED_CMD] = "Host Aborted Command",
|
||||
};
|
||||
|
||||
const unsigned char *nvme_get_error_status_str(u16 status)
|
||||
|
||||
@@ -1207,6 +1207,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
|
||||
|
||||
rq->timeout = ctrl->kato * HZ;
|
||||
rq->end_io_data = ctrl;
|
||||
rq->rq_flags |= RQF_QUIET;
|
||||
blk_execute_rq_nowait(rq, false, nvme_keep_alive_end_io);
|
||||
}
|
||||
|
||||
@@ -1426,6 +1427,32 @@ out_free_id:
|
||||
return error;
|
||||
}
|
||||
|
||||
static int nvme_identify_ns_cs_indep(struct nvme_ctrl *ctrl, unsigned nsid,
|
||||
struct nvme_id_ns_cs_indep **id)
|
||||
{
|
||||
struct nvme_command c = {
|
||||
.identify.opcode = nvme_admin_identify,
|
||||
.identify.nsid = cpu_to_le32(nsid),
|
||||
.identify.cns = NVME_ID_CNS_NS_CS_INDEP,
|
||||
};
|
||||
int ret;
|
||||
|
||||
*id = kmalloc(sizeof(**id), GFP_KERNEL);
|
||||
if (!*id)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, *id, sizeof(**id));
|
||||
if (ret) {
|
||||
dev_warn(ctrl->device,
|
||||
"Identify namespace (CS independent) failed (%d)\n",
|
||||
ret);
|
||||
kfree(*id);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid,
|
||||
unsigned int dword11, void *buffer, size_t buflen, u32 *result)
|
||||
{
|
||||
@@ -1621,20 +1648,22 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
|
||||
u32 size = queue_logical_block_size(queue);
|
||||
|
||||
if (ctrl->max_discard_sectors == 0) {
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, queue);
|
||||
blk_queue_max_discard_sectors(queue, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
|
||||
NVME_DSM_MAX_RANGES);
|
||||
|
||||
queue->limits.discard_alignment = 0;
|
||||
queue->limits.discard_granularity = size;
|
||||
|
||||
/* If discard is already enabled, don't reset queue limits */
|
||||
if (blk_queue_flag_test_and_set(QUEUE_FLAG_DISCARD, queue))
|
||||
if (queue->limits.max_discard_sectors)
|
||||
return;
|
||||
|
||||
if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns, UINT_MAX))
|
||||
ctrl->max_discard_sectors = nvme_lba_to_sect(ns, ctrl->dmrsl);
|
||||
|
||||
blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors);
|
||||
blk_queue_max_discard_segments(queue, ctrl->max_discard_segments);
|
||||
|
||||
@@ -1771,7 +1800,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
|
||||
blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
|
||||
}
|
||||
blk_queue_virt_boundary(q, NVME_CTRL_PAGE_SIZE - 1);
|
||||
blk_queue_dma_alignment(q, 7);
|
||||
blk_queue_dma_alignment(q, 3);
|
||||
blk_queue_write_cache(q, vwc, vwc);
|
||||
}
|
||||
|
||||
@@ -2100,10 +2129,9 @@ static const struct block_device_operations nvme_bdev_ops = {
|
||||
.pr_ops = &nvme_pr_ops,
|
||||
};
|
||||
|
||||
static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
|
||||
static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 timeout, bool enabled)
|
||||
{
|
||||
unsigned long timeout =
|
||||
((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
|
||||
unsigned long timeout_jiffies = ((timeout + 1) * HZ / 2) + jiffies;
|
||||
u32 csts, bit = enabled ? NVME_CSTS_RDY : 0;
|
||||
int ret;
|
||||
|
||||
@@ -2116,7 +2144,7 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
|
||||
usleep_range(1000, 2000);
|
||||
if (fatal_signal_pending(current))
|
||||
return -EINTR;
|
||||
if (time_after(jiffies, timeout)) {
|
||||
if (time_after(jiffies, timeout_jiffies)) {
|
||||
dev_err(ctrl->device,
|
||||
"Device not ready; aborting %s, CSTS=0x%x\n",
|
||||
enabled ? "initialisation" : "reset", csts);
|
||||
@@ -2147,13 +2175,14 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl)
|
||||
if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY)
|
||||
msleep(NVME_QUIRK_DELAY_AMOUNT);
|
||||
|
||||
return nvme_wait_ready(ctrl, ctrl->cap, false);
|
||||
return nvme_wait_ready(ctrl, NVME_CAP_TIMEOUT(ctrl->cap), false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_disable_ctrl);
|
||||
|
||||
int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
unsigned dev_page_min;
|
||||
u32 timeout;
|
||||
int ret;
|
||||
|
||||
ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap);
|
||||
@@ -2174,6 +2203,27 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
|
||||
ctrl->ctrl_config = NVME_CC_CSS_CSI;
|
||||
else
|
||||
ctrl->ctrl_config = NVME_CC_CSS_NVM;
|
||||
|
||||
if (ctrl->cap & NVME_CAP_CRMS_CRWMS) {
|
||||
u32 crto;
|
||||
|
||||
ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CRTO, &crto);
|
||||
if (ret) {
|
||||
dev_err(ctrl->device, "Reading CRTO failed (%d)\n",
|
||||
ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (ctrl->cap & NVME_CAP_CRMS_CRIMS) {
|
||||
ctrl->ctrl_config |= NVME_CC_CRIME;
|
||||
timeout = NVME_CRTO_CRIMT(crto);
|
||||
} else {
|
||||
timeout = NVME_CRTO_CRWMT(crto);
|
||||
}
|
||||
} else {
|
||||
timeout = NVME_CAP_TIMEOUT(ctrl->cap);
|
||||
}
|
||||
|
||||
ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
|
||||
ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
|
||||
ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
|
||||
@@ -2182,7 +2232,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
|
||||
ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
|
||||
if (ret)
|
||||
return ret;
|
||||
return nvme_wait_ready(ctrl, ctrl->cap, true);
|
||||
return nvme_wait_ready(ctrl, timeout, true);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
|
||||
|
||||
@@ -2894,8 +2944,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
|
||||
|
||||
if (id->dmrl)
|
||||
ctrl->max_discard_segments = id->dmrl;
|
||||
if (id->dmrsl)
|
||||
ctrl->max_discard_sectors = le32_to_cpu(id->dmrsl);
|
||||
ctrl->dmrsl = le32_to_cpu(id->dmrsl);
|
||||
if (id->wzsl)
|
||||
ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl);
|
||||
|
||||
@@ -3080,10 +3129,6 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = nvme_init_non_mdts_limits(ctrl);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = nvme_configure_apst(ctrl);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
@@ -4092,11 +4137,26 @@ out:
|
||||
static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
|
||||
{
|
||||
struct nvme_ns_ids ids = { };
|
||||
struct nvme_id_ns_cs_indep *id;
|
||||
struct nvme_ns *ns;
|
||||
bool ready = true;
|
||||
|
||||
if (nvme_identify_ns_descs(ctrl, nsid, &ids))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Check if the namespace is ready. If not ignore it, we will get an
|
||||
* AEN once it becomes ready and restart the scan.
|
||||
*/
|
||||
if ((ctrl->cap & NVME_CAP_CRMS_CRIMS) &&
|
||||
!nvme_identify_ns_cs_indep(ctrl, nsid, &id)) {
|
||||
ready = id->nstat & NVME_NSTAT_NRDY;
|
||||
kfree(id);
|
||||
}
|
||||
|
||||
if (!ready)
|
||||
return;
|
||||
|
||||
ns = nvme_find_get_ns(ctrl, nsid);
|
||||
if (ns) {
|
||||
nvme_validate_ns(ns, &ids);
|
||||
@@ -4239,11 +4299,26 @@ static void nvme_scan_work(struct work_struct *work)
|
||||
{
|
||||
struct nvme_ctrl *ctrl =
|
||||
container_of(work, struct nvme_ctrl, scan_work);
|
||||
int ret;
|
||||
|
||||
/* No tagset on a live ctrl means IO queues could not created */
|
||||
if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Identify controller limits can change at controller reset due to
|
||||
* new firmware download, even though it is not common we cannot ignore
|
||||
* such scenario. Controller's non-mdts limits are reported in the unit
|
||||
* of logical blocks that is dependent on the format of attached
|
||||
* namespace. Hence re-read the limits at the time of ns allocation.
|
||||
*/
|
||||
ret = nvme_init_non_mdts_limits(ctrl);
|
||||
if (ret < 0) {
|
||||
dev_warn(ctrl->device,
|
||||
"reading non-mdts-limits failed: %d\n", ret);
|
||||
return;
|
||||
}
|
||||
|
||||
if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) {
|
||||
dev_info(ctrl->device, "rescanning namespaces.\n");
|
||||
nvme_clear_changed_ns_log(ctrl);
|
||||
@@ -4841,6 +4916,8 @@ static inline void _nvme_check_size(void)
|
||||
BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_id_ns_cs_indep) !=
|
||||
NVME_IDENTIFY_DATA_SIZE);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_id_ns_zns) != NVME_IDENTIFY_DATA_SIZE);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_id_ns_nvm) != NVME_IDENTIFY_DATA_SIZE);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_zns) != NVME_IDENTIFY_DATA_SIZE);
|
||||
|
||||
@@ -187,6 +187,14 @@ static inline char *nvmf_ctrl_subsysnqn(struct nvme_ctrl *ctrl)
|
||||
return ctrl->subsys->subnqn;
|
||||
}
|
||||
|
||||
static inline void nvmf_complete_timed_out_request(struct request *rq)
|
||||
{
|
||||
if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
|
||||
nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
|
||||
blk_mq_complete_request(rq);
|
||||
}
|
||||
}
|
||||
|
||||
int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val);
|
||||
int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val);
|
||||
int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val);
|
||||
|
||||
@@ -3831,6 +3831,9 @@ process_local_list:
|
||||
return count;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store);
|
||||
|
||||
#ifdef CONFIG_BLK_CGROUP_FC_APPID
|
||||
/* Parse the cgroup id from a buf and return the length of cgrpid */
|
||||
static int fc_parse_cgrpid(const char *buf, u64 *id)
|
||||
{
|
||||
@@ -3854,12 +3857,10 @@ static int fc_parse_cgrpid(const char *buf, u64 *id)
|
||||
}
|
||||
|
||||
/*
|
||||
* fc_update_appid: Parse and update the appid in the blkcg associated with
|
||||
* cgroupid.
|
||||
* @buf: buf contains both cgrpid and appid info
|
||||
* @count: size of the buffer
|
||||
* Parse and update the appid in the blkcg associated with the cgroupid.
|
||||
*/
|
||||
static int fc_update_appid(const char *buf, size_t count)
|
||||
static ssize_t fc_appid_store(struct device *dev,
|
||||
struct device_attribute *attr, const char *buf, size_t count)
|
||||
{
|
||||
u64 cgrp_id;
|
||||
int appid_len = 0;
|
||||
@@ -3887,23 +3888,14 @@ static int fc_update_appid(const char *buf, size_t count)
|
||||
return ret;
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t fc_appid_store(struct device *dev,
|
||||
struct device_attribute *attr, const char *buf, size_t count)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
ret = fc_update_appid(buf, count);
|
||||
if (ret < 0)
|
||||
return -EINVAL;
|
||||
return count;
|
||||
}
|
||||
static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store);
|
||||
static DEVICE_ATTR(appid_store, 0200, NULL, fc_appid_store);
|
||||
#endif /* CONFIG_BLK_CGROUP_FC_APPID */
|
||||
|
||||
static struct attribute *nvme_fc_attrs[] = {
|
||||
&dev_attr_nvme_discovery.attr,
|
||||
#ifdef CONFIG_BLK_CGROUP_FC_APPID
|
||||
&dev_attr_appid_store.attr,
|
||||
#endif
|
||||
NULL
|
||||
};
|
||||
|
||||
|
||||
@@ -284,6 +284,7 @@ struct nvme_ctrl {
|
||||
#endif
|
||||
u16 crdt[3];
|
||||
u16 oncs;
|
||||
u32 dmrsl;
|
||||
u16 oacs;
|
||||
u16 sqsize;
|
||||
u32 max_namespaces;
|
||||
|
||||
@@ -1439,6 +1439,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
|
||||
nvme_init_request(abort_req, &cmd);
|
||||
|
||||
abort_req->end_io_data = NULL;
|
||||
abort_req->rq_flags |= RQF_QUIET;
|
||||
blk_execute_rq_nowait(abort_req, false, abort_endio);
|
||||
|
||||
/*
|
||||
@@ -1775,6 +1776,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
|
||||
dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset);
|
||||
if (IS_ERR(dev->ctrl.admin_q)) {
|
||||
blk_mq_free_tag_set(&dev->admin_tagset);
|
||||
dev->ctrl.admin_q = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (!blk_get_queue(dev->ctrl.admin_q)) {
|
||||
@@ -2486,6 +2488,7 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
|
||||
req->end_io_data = nvmeq;
|
||||
|
||||
init_completion(&nvmeq->delete_done);
|
||||
req->rq_flags |= RQF_QUIET;
|
||||
blk_execute_rq_nowait(req, false, opcode == nvme_admin_delete_cq ?
|
||||
nvme_del_cq_end : nvme_del_queue_end);
|
||||
return 0;
|
||||
@@ -2675,7 +2678,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
|
||||
struct pci_dev *pdev = to_pci_dev(dev->dev);
|
||||
|
||||
mutex_lock(&dev->shutdown_lock);
|
||||
if (pci_is_enabled(pdev)) {
|
||||
if (pci_device_is_present(pdev) && pci_is_enabled(pdev)) {
|
||||
u32 csts = readl(dev->bar + NVME_REG_CSTS);
|
||||
|
||||
if (dev->ctrl.state == NVME_CTRL_LIVE ||
|
||||
|
||||
@@ -2010,10 +2010,7 @@ static void nvme_rdma_complete_timed_out(struct request *rq)
|
||||
struct nvme_rdma_queue *queue = req->queue;
|
||||
|
||||
nvme_rdma_stop_queue(queue);
|
||||
if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
|
||||
nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
|
||||
blk_mq_complete_request(rq);
|
||||
}
|
||||
nvmf_complete_timed_out_request(rq);
|
||||
}
|
||||
|
||||
static enum blk_eh_timer_return
|
||||
|
||||
@@ -2318,10 +2318,7 @@ static void nvme_tcp_complete_timed_out(struct request *rq)
|
||||
struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
|
||||
|
||||
nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue));
|
||||
if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
|
||||
nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
|
||||
blk_mq_complete_request(rq);
|
||||
}
|
||||
nvmf_complete_timed_out_request(rq);
|
||||
}
|
||||
|
||||
static enum blk_eh_timer_return
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user