mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-06 19:08:57 +09:00
Merge commit 750a02ab8d ("Merge tag 'for-5.8/block-2020-06-01' of git://git.kernel.dk/linux-block") into android-mainline
Conflicts: block/blk-core.c block/blk-crypto-fallback.c block/blk-crypto.c block/keyslot-manager.c drivers/md/dm.c include/linux/blk-crypto.h include/linux/blk_types.h include/linux/keyslot-manager.h Change-Id: Ie757c41fa41e6a9aacdf123d82d4f681623a02a8 Signed-off-by: Eric Biggers <ebiggers@google.com>
This commit is contained in:
@@ -146,6 +146,7 @@ config BLK_CGROUP_IOLATENCY
|
||||
config BLK_CGROUP_IOCOST
|
||||
bool "Enable support for cost model based cgroup IO controller"
|
||||
depends on BLK_CGROUP=y
|
||||
select BLK_RQ_IO_DATA_LEN
|
||||
select BLK_RQ_ALLOC_TIME
|
||||
---help---
|
||||
Enabling this option enables the .weight interface for cost
|
||||
|
||||
@@ -6073,7 +6073,7 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
|
||||
* comments on bfq_init_rq for the reason behind this delayed
|
||||
* preparation.
|
||||
*/
|
||||
static void bfq_prepare_request(struct request *rq, struct bio *bio)
|
||||
static void bfq_prepare_request(struct request *rq)
|
||||
{
|
||||
/*
|
||||
* Regardless of whether we have an icq attached, we have to
|
||||
|
||||
178
block/bio.c
178
block/bio.c
@@ -753,9 +753,14 @@ static inline bool page_is_mergeable(const struct bio_vec *bv,
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool bio_try_merge_pc_page(struct request_queue *q, struct bio *bio,
|
||||
struct page *page, unsigned len, unsigned offset,
|
||||
bool *same_page)
|
||||
/*
|
||||
* Try to merge a page into a segment, while obeying the hardware segment
|
||||
* size limit. This is not for normal read/write bios, but for passthrough
|
||||
* or Zone Append operations that we can't split.
|
||||
*/
|
||||
static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio,
|
||||
struct page *page, unsigned len,
|
||||
unsigned offset, bool *same_page)
|
||||
{
|
||||
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
|
||||
unsigned long mask = queue_segment_boundary(q);
|
||||
@@ -770,38 +775,32 @@ static bool bio_try_merge_pc_page(struct request_queue *q, struct bio *bio,
|
||||
}
|
||||
|
||||
/**
|
||||
* __bio_add_pc_page - attempt to add page to passthrough bio
|
||||
* @q: the target queue
|
||||
* @bio: destination bio
|
||||
* @page: page to add
|
||||
* @len: vec entry length
|
||||
* @offset: vec entry offset
|
||||
* @same_page: return if the merge happen inside the same page
|
||||
* bio_add_hw_page - attempt to add a page to a bio with hw constraints
|
||||
* @q: the target queue
|
||||
* @bio: destination bio
|
||||
* @page: page to add
|
||||
* @len: vec entry length
|
||||
* @offset: vec entry offset
|
||||
* @max_sectors: maximum number of sectors that can be added
|
||||
* @same_page: return if the segment has been merged inside the same page
|
||||
*
|
||||
* Attempt to add a page to the bio_vec maplist. This can fail for a
|
||||
* number of reasons, such as the bio being full or target block device
|
||||
* limitations. The target block device must allow bio's up to PAGE_SIZE,
|
||||
* so it is always possible to add a single page to an empty bio.
|
||||
*
|
||||
* This should only be used by passthrough bios.
|
||||
* Add a page to a bio while respecting the hardware max_sectors, max_segment
|
||||
* and gap limitations.
|
||||
*/
|
||||
int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
|
||||
int bio_add_hw_page(struct request_queue *q, struct bio *bio,
|
||||
struct page *page, unsigned int len, unsigned int offset,
|
||||
bool *same_page)
|
||||
unsigned int max_sectors, bool *same_page)
|
||||
{
|
||||
struct bio_vec *bvec;
|
||||
|
||||
/*
|
||||
* cloned bio must not modify vec list
|
||||
*/
|
||||
if (unlikely(bio_flagged(bio, BIO_CLONED)))
|
||||
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
|
||||
return 0;
|
||||
|
||||
if (((bio->bi_iter.bi_size + len) >> 9) > queue_max_hw_sectors(q))
|
||||
if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors)
|
||||
return 0;
|
||||
|
||||
if (bio->bi_vcnt > 0) {
|
||||
if (bio_try_merge_pc_page(q, bio, page, len, offset, same_page))
|
||||
if (bio_try_merge_hw_seg(q, bio, page, len, offset, same_page))
|
||||
return len;
|
||||
|
||||
/*
|
||||
@@ -828,11 +827,27 @@ int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
|
||||
return len;
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_add_pc_page - attempt to add page to passthrough bio
|
||||
* @q: the target queue
|
||||
* @bio: destination bio
|
||||
* @page: page to add
|
||||
* @len: vec entry length
|
||||
* @offset: vec entry offset
|
||||
*
|
||||
* Attempt to add a page to the bio_vec maplist. This can fail for a
|
||||
* number of reasons, such as the bio being full or target block device
|
||||
* limitations. The target block device must allow bio's up to PAGE_SIZE,
|
||||
* so it is always possible to add a single page to an empty bio.
|
||||
*
|
||||
* This should only be used by passthrough bios.
|
||||
*/
|
||||
int bio_add_pc_page(struct request_queue *q, struct bio *bio,
|
||||
struct page *page, unsigned int len, unsigned int offset)
|
||||
{
|
||||
bool same_page = false;
|
||||
return __bio_add_pc_page(q, bio, page, len, offset, &same_page);
|
||||
return bio_add_hw_page(q, bio, page, len, offset,
|
||||
queue_max_hw_sectors(q), &same_page);
|
||||
}
|
||||
EXPORT_SYMBOL(bio_add_pc_page);
|
||||
|
||||
@@ -941,6 +956,7 @@ void bio_release_pages(struct bio *bio, bool mark_dirty)
|
||||
put_page(bvec->bv_page);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_release_pages);
|
||||
|
||||
static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter)
|
||||
{
|
||||
@@ -1015,6 +1031,50 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
|
||||
{
|
||||
unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
|
||||
unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
unsigned int max_append_sectors = queue_max_zone_append_sectors(q);
|
||||
struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
|
||||
struct page **pages = (struct page **)bv;
|
||||
ssize_t size, left;
|
||||
unsigned len, i;
|
||||
size_t offset;
|
||||
|
||||
if (WARN_ON_ONCE(!max_append_sectors))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Move page array up in the allocated memory for the bio vecs as far as
|
||||
* possible so that we can start filling biovecs from the beginning
|
||||
* without overwriting the temporary page array.
|
||||
*/
|
||||
BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
|
||||
pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
|
||||
|
||||
size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
|
||||
if (unlikely(size <= 0))
|
||||
return size ? size : -EFAULT;
|
||||
|
||||
for (left = size, i = 0; left > 0; left -= len, i++) {
|
||||
struct page *page = pages[i];
|
||||
bool same_page = false;
|
||||
|
||||
len = min_t(size_t, PAGE_SIZE - offset, left);
|
||||
if (bio_add_hw_page(q, bio, page, len, offset,
|
||||
max_append_sectors, &same_page) != len)
|
||||
return -EINVAL;
|
||||
if (same_page)
|
||||
put_page(page);
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
iov_iter_advance(iter, size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_iov_iter_get_pages - add user or kernel pages to a bio
|
||||
* @bio: bio to add pages to
|
||||
@@ -1044,16 +1104,23 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
|
||||
return -EINVAL;
|
||||
|
||||
do {
|
||||
if (is_bvec)
|
||||
ret = __bio_iov_bvec_add_pages(bio, iter);
|
||||
else
|
||||
ret = __bio_iov_iter_get_pages(bio, iter);
|
||||
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
|
||||
if (WARN_ON_ONCE(is_bvec))
|
||||
return -EINVAL;
|
||||
ret = __bio_iov_append_get_pages(bio, iter);
|
||||
} else {
|
||||
if (is_bvec)
|
||||
ret = __bio_iov_bvec_add_pages(bio, iter);
|
||||
else
|
||||
ret = __bio_iov_iter_get_pages(bio, iter);
|
||||
}
|
||||
} while (!ret && iov_iter_count(iter) && !bio_full(bio, 0));
|
||||
|
||||
if (is_bvec)
|
||||
bio_set_flag(bio, BIO_NO_PAGE_REF);
|
||||
return bio->bi_vcnt ? 0 : ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages);
|
||||
|
||||
static void submit_bio_wait_endio(struct bio *bio)
|
||||
{
|
||||
@@ -1309,55 +1376,6 @@ defer:
|
||||
schedule_work(&bio_dirty_work);
|
||||
}
|
||||
|
||||
void update_io_ticks(struct hd_struct *part, unsigned long now, bool end)
|
||||
{
|
||||
unsigned long stamp;
|
||||
again:
|
||||
stamp = READ_ONCE(part->stamp);
|
||||
if (unlikely(stamp != now)) {
|
||||
if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) {
|
||||
__part_stat_add(part, io_ticks, end ? now - stamp : 1);
|
||||
}
|
||||
}
|
||||
if (part->partno) {
|
||||
part = &part_to_disk(part)->part0;
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
|
||||
void generic_start_io_acct(struct request_queue *q, int op,
|
||||
unsigned long sectors, struct hd_struct *part)
|
||||
{
|
||||
const int sgrp = op_stat_group(op);
|
||||
|
||||
part_stat_lock();
|
||||
|
||||
update_io_ticks(part, jiffies, false);
|
||||
part_stat_inc(part, ios[sgrp]);
|
||||
part_stat_add(part, sectors[sgrp], sectors);
|
||||
part_inc_in_flight(q, part, op_is_write(op));
|
||||
|
||||
part_stat_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL(generic_start_io_acct);
|
||||
|
||||
void generic_end_io_acct(struct request_queue *q, int req_op,
|
||||
struct hd_struct *part, unsigned long start_time)
|
||||
{
|
||||
unsigned long now = jiffies;
|
||||
unsigned long duration = now - start_time;
|
||||
const int sgrp = op_stat_group(req_op);
|
||||
|
||||
part_stat_lock();
|
||||
|
||||
update_io_ticks(part, now, true);
|
||||
part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration));
|
||||
part_dec_in_flight(q, part, op_is_write(req_op));
|
||||
|
||||
part_stat_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL(generic_end_io_acct);
|
||||
|
||||
static inline bool bio_remaining_done(struct bio *bio)
|
||||
{
|
||||
/*
|
||||
@@ -1451,6 +1469,10 @@ struct bio *bio_split(struct bio *bio, int sectors,
|
||||
BUG_ON(sectors <= 0);
|
||||
BUG_ON(sectors >= bio_sectors(bio));
|
||||
|
||||
/* Zone append commands cannot be split */
|
||||
if (WARN_ON_ONCE(bio_op(bio) == REQ_OP_ZONE_APPEND))
|
||||
return NULL;
|
||||
|
||||
split = bio_clone_fast(bio, gfp, bs);
|
||||
if (!split)
|
||||
return NULL;
|
||||
|
||||
@@ -1530,6 +1530,10 @@ static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now)
|
||||
{
|
||||
u64 old = atomic64_read(&blkg->delay_start);
|
||||
|
||||
/* negative use_delay means no scaling, see blkcg_set_delay() */
|
||||
if (atomic_read(&blkg->use_delay) < 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* We only want to scale down every second. The idea here is that we
|
||||
* want to delay people for min(delay_nsec, NSEC_PER_SEC) in a certain
|
||||
@@ -1717,6 +1721,8 @@ void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay)
|
||||
*/
|
||||
void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta)
|
||||
{
|
||||
if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0))
|
||||
return;
|
||||
blkcg_scale_delay(blkg, now);
|
||||
atomic64_add(delta, &blkg->delay_nsec);
|
||||
}
|
||||
|
||||
324
block/blk-core.c
324
block/blk-core.c
@@ -39,6 +39,7 @@
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/psi.h>
|
||||
#include <linux/sched/sysctl.h>
|
||||
#include <linux/blk-crypto.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
@@ -138,6 +139,7 @@ static const char *const blk_op_name[] = {
|
||||
REQ_OP_NAME(ZONE_OPEN),
|
||||
REQ_OP_NAME(ZONE_CLOSE),
|
||||
REQ_OP_NAME(ZONE_FINISH),
|
||||
REQ_OP_NAME(ZONE_APPEND),
|
||||
REQ_OP_NAME(WRITE_SAME),
|
||||
REQ_OP_NAME(WRITE_ZEROES),
|
||||
REQ_OP_NAME(SCSI_IN),
|
||||
@@ -243,6 +245,17 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
|
||||
|
||||
bio_advance(bio, nbytes);
|
||||
|
||||
if (req_op(rq) == REQ_OP_ZONE_APPEND && error == BLK_STS_OK) {
|
||||
/*
|
||||
* Partial zone append completions cannot be supported as the
|
||||
* BIO fragments may end up not being written sequentially.
|
||||
*/
|
||||
if (bio->bi_iter.bi_size)
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
else
|
||||
bio->bi_iter.bi_sector = rq->__sector;
|
||||
}
|
||||
|
||||
/* don't actually finish bio if it's part of flush sequence */
|
||||
if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
|
||||
bio_endio(bio);
|
||||
@@ -443,6 +456,23 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
|
||||
}
|
||||
}
|
||||
|
||||
static inline int bio_queue_enter(struct bio *bio)
|
||||
{
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
bool nowait = bio->bi_opf & REQ_NOWAIT;
|
||||
int ret;
|
||||
|
||||
ret = blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0);
|
||||
if (unlikely(ret)) {
|
||||
if (nowait && !blk_queue_dying(q))
|
||||
bio_wouldblock_error(bio);
|
||||
else
|
||||
bio_io_error(bio);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void blk_queue_exit(struct request_queue *q)
|
||||
{
|
||||
percpu_ref_put(&q->q_usage_counter);
|
||||
@@ -487,7 +517,7 @@ struct request_queue *__blk_alloc_queue(int node_id)
|
||||
if (ret)
|
||||
goto fail_id;
|
||||
|
||||
q->backing_dev_info = bdi_alloc_node(GFP_KERNEL, node_id);
|
||||
q->backing_dev_info = bdi_alloc(node_id);
|
||||
if (!q->backing_dev_info)
|
||||
goto fail_split;
|
||||
|
||||
@@ -497,7 +527,6 @@ struct request_queue *__blk_alloc_queue(int node_id)
|
||||
|
||||
q->backing_dev_info->ra_pages = VM_READAHEAD_PAGES;
|
||||
q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK;
|
||||
q->backing_dev_info->name = "block";
|
||||
q->node = node_id;
|
||||
|
||||
timer_setup(&q->backing_dev_info->laptop_mode_wb_timer,
|
||||
@@ -608,6 +637,16 @@ void blk_put_request(struct request *req)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_put_request);
|
||||
|
||||
static void blk_account_io_merge_bio(struct request *req)
|
||||
{
|
||||
if (!blk_do_io_stat(req))
|
||||
return;
|
||||
|
||||
part_stat_lock();
|
||||
part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
|
||||
part_stat_unlock();
|
||||
}
|
||||
|
||||
bool bio_attempt_back_merge(struct request *req, struct bio *bio,
|
||||
unsigned int nr_segs)
|
||||
{
|
||||
@@ -628,7 +667,7 @@ bool bio_attempt_back_merge(struct request *req, struct bio *bio,
|
||||
|
||||
bio_crypt_free_ctx(bio);
|
||||
|
||||
blk_account_io_start(req, false);
|
||||
blk_account_io_merge_bio(req);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -654,7 +693,7 @@ bool bio_attempt_front_merge(struct request *req, struct bio *bio,
|
||||
|
||||
bio_crypt_do_front_merge(req, bio);
|
||||
|
||||
blk_account_io_start(req, false);
|
||||
blk_account_io_merge_bio(req);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -676,7 +715,7 @@ bool bio_attempt_discard_merge(struct request_queue *q, struct request *req,
|
||||
req->__data_len += bio->bi_iter.bi_size;
|
||||
req->nr_phys_segments = segments + 1;
|
||||
|
||||
blk_account_io_start(req, false);
|
||||
blk_account_io_merge_bio(req);
|
||||
return true;
|
||||
no_merge:
|
||||
req_set_nomerge(q, req);
|
||||
@@ -878,6 +917,41 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check write append to a zoned block device.
|
||||
*/
|
||||
static inline blk_status_t blk_check_zone_append(struct request_queue *q,
|
||||
struct bio *bio)
|
||||
{
|
||||
sector_t pos = bio->bi_iter.bi_sector;
|
||||
int nr_sectors = bio_sectors(bio);
|
||||
|
||||
/* Only applicable to zoned block devices */
|
||||
if (!blk_queue_is_zoned(q))
|
||||
return BLK_STS_NOTSUPP;
|
||||
|
||||
/* The bio sector must point to the start of a sequential zone */
|
||||
if (pos & (blk_queue_zone_sectors(q) - 1) ||
|
||||
!blk_queue_zone_is_seq(q, pos))
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
/*
|
||||
* Not allowed to cross zone boundaries. Otherwise, the BIO will be
|
||||
* split and could result in non-contiguous sectors being written in
|
||||
* different zones.
|
||||
*/
|
||||
if (nr_sectors > q->limits.chunk_sectors)
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
/* Make sure the BIO is small enough and will not get split */
|
||||
if (nr_sectors > q->limits.max_zone_append_sectors)
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
bio->bi_opf |= REQ_NOMERGE;
|
||||
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static noinline_for_stack bool
|
||||
generic_make_request_checks(struct bio *bio)
|
||||
{
|
||||
@@ -947,6 +1021,11 @@ generic_make_request_checks(struct bio *bio)
|
||||
if (!q->limits.max_write_same_sectors)
|
||||
goto not_supported;
|
||||
break;
|
||||
case REQ_OP_ZONE_APPEND:
|
||||
status = blk_check_zone_append(q, bio);
|
||||
if (status != BLK_STS_OK)
|
||||
goto end_io;
|
||||
break;
|
||||
case REQ_OP_ZONE_RESET:
|
||||
case REQ_OP_ZONE_OPEN:
|
||||
case REQ_OP_ZONE_CLOSE:
|
||||
@@ -967,12 +1046,13 @@ generic_make_request_checks(struct bio *bio)
|
||||
}
|
||||
|
||||
/*
|
||||
* Various block parts want %current->io_context and lazy ioc
|
||||
* allocation ends up trading a lot of pain for a small amount of
|
||||
* memory. Just allocate it upfront. This may fail and block
|
||||
* layer knows how to live with it.
|
||||
* Various block parts want %current->io_context, so allocate it up
|
||||
* front rather than dealing with lots of pain to allocate it only
|
||||
* where needed. This may fail and the block layer knows how to live
|
||||
* with it.
|
||||
*/
|
||||
create_io_context(GFP_ATOMIC, q->node);
|
||||
if (unlikely(!current->io_context))
|
||||
create_task_io_context(current, GFP_ATOMIC, q->node);
|
||||
|
||||
if (!blkcg_bio_issue_check(q, bio))
|
||||
return false;
|
||||
@@ -994,29 +1074,28 @@ end_io:
|
||||
return false;
|
||||
}
|
||||
|
||||
static blk_qc_t do_make_request(struct bio *bio)
|
||||
{
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
blk_qc_t ret = BLK_QC_T_NONE;
|
||||
|
||||
if (blk_crypto_bio_prep(&bio)) {
|
||||
if (!q->make_request_fn)
|
||||
return blk_mq_make_request(q, bio);
|
||||
ret = q->make_request_fn(q, bio);
|
||||
}
|
||||
blk_queue_exit(q);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* generic_make_request - hand a buffer to its device driver for I/O
|
||||
* generic_make_request - re-submit a bio to the block device layer for I/O
|
||||
* @bio: The bio describing the location in memory and on the device.
|
||||
*
|
||||
* generic_make_request() is used to make I/O requests of block
|
||||
* devices. It is passed a &struct bio, which describes the I/O that needs
|
||||
* to be done.
|
||||
*
|
||||
* generic_make_request() does not return any status. The
|
||||
* success/failure status of the request, along with notification of
|
||||
* completion, is delivered asynchronously through the bio->bi_end_io
|
||||
* function described (one day) else where.
|
||||
*
|
||||
* The caller of generic_make_request must make sure that bi_io_vec
|
||||
* are set to describe the memory buffer, and that bi_dev and bi_sector are
|
||||
* set to describe the device address, and the
|
||||
* bi_end_io and optionally bi_private are set to describe how
|
||||
* completion notification should be signaled.
|
||||
*
|
||||
* generic_make_request and the drivers it calls may use bi_next if this
|
||||
* bio happens to be merged with someone else, and may resubmit the bio to
|
||||
* a lower device by calling into generic_make_request recursively, which
|
||||
* means the bio should NOT be touched after the call to ->make_request_fn.
|
||||
* This is a version of submit_bio() that shall only be used for I/O that is
|
||||
* resubmitted to lower level drivers by stacking block drivers. All file
|
||||
* systems and other upper level users of the block layer should use
|
||||
* submit_bio() instead.
|
||||
*/
|
||||
blk_qc_t generic_make_request(struct bio *bio)
|
||||
{
|
||||
@@ -1067,19 +1146,14 @@ blk_qc_t generic_make_request(struct bio *bio)
|
||||
current->bio_list = bio_list_on_stack;
|
||||
do {
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
blk_mq_req_flags_t flags = bio->bi_opf & REQ_NOWAIT ?
|
||||
BLK_MQ_REQ_NOWAIT : 0;
|
||||
|
||||
if (likely(blk_queue_enter(q, flags) == 0)) {
|
||||
if (likely(bio_queue_enter(bio) == 0)) {
|
||||
struct bio_list lower, same;
|
||||
|
||||
/* Create a fresh bio_list for all subordinate requests */
|
||||
bio_list_on_stack[1] = bio_list_on_stack[0];
|
||||
bio_list_init(&bio_list_on_stack[0]);
|
||||
if (blk_crypto_bio_prep(&bio))
|
||||
ret = q->make_request_fn(q, bio);
|
||||
|
||||
blk_queue_exit(q);
|
||||
ret = do_make_request(bio);
|
||||
|
||||
/* sort new bios into those for a lower level
|
||||
* and those for the same level
|
||||
@@ -1095,12 +1169,6 @@ blk_qc_t generic_make_request(struct bio *bio)
|
||||
bio_list_merge(&bio_list_on_stack[0], &lower);
|
||||
bio_list_merge(&bio_list_on_stack[0], &same);
|
||||
bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
|
||||
} else {
|
||||
if (unlikely(!blk_queue_dying(q) &&
|
||||
(bio->bi_opf & REQ_NOWAIT)))
|
||||
bio_wouldblock_error(bio);
|
||||
else
|
||||
bio_io_error(bio);
|
||||
}
|
||||
bio = bio_list_pop(&bio_list_on_stack[0]);
|
||||
} while (bio);
|
||||
@@ -1117,30 +1185,25 @@ EXPORT_SYMBOL(generic_make_request);
|
||||
*
|
||||
* This function behaves like generic_make_request(), but does not protect
|
||||
* against recursion. Must only be used if the called driver is known
|
||||
* to not call generic_make_request (or direct_make_request) again from
|
||||
* its make_request function. (Calling direct_make_request again from
|
||||
* a workqueue is perfectly fine as that doesn't recurse).
|
||||
* to be blk-mq based.
|
||||
*/
|
||||
blk_qc_t direct_make_request(struct bio *bio)
|
||||
{
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
bool nowait = bio->bi_opf & REQ_NOWAIT;
|
||||
blk_qc_t ret = BLK_QC_T_NONE;
|
||||
|
||||
if (!generic_make_request_checks(bio))
|
||||
return BLK_QC_T_NONE;
|
||||
|
||||
if (unlikely(blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0))) {
|
||||
if (nowait && !blk_queue_dying(q))
|
||||
bio_wouldblock_error(bio);
|
||||
else
|
||||
bio_io_error(bio);
|
||||
if (WARN_ON_ONCE(q->make_request_fn)) {
|
||||
bio_io_error(bio);
|
||||
return BLK_QC_T_NONE;
|
||||
}
|
||||
if (blk_crypto_bio_prep(&bio))
|
||||
ret = q->make_request_fn(q, bio);
|
||||
blk_queue_exit(q);
|
||||
return ret;
|
||||
if (!generic_make_request_checks(bio))
|
||||
return BLK_QC_T_NONE;
|
||||
if (unlikely(bio_queue_enter(bio)))
|
||||
return BLK_QC_T_NONE;
|
||||
if (!blk_crypto_bio_prep(&bio)) {
|
||||
blk_queue_exit(q);
|
||||
return BLK_QC_T_NONE;
|
||||
}
|
||||
return blk_mq_make_request(q, bio);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(direct_make_request);
|
||||
|
||||
@@ -1148,17 +1211,17 @@ EXPORT_SYMBOL_GPL(direct_make_request);
|
||||
* submit_bio - submit a bio to the block device layer for I/O
|
||||
* @bio: The &struct bio which describes the I/O
|
||||
*
|
||||
* submit_bio() is very similar in purpose to generic_make_request(), and
|
||||
* uses that function to do most of the work. Both are fairly rough
|
||||
* interfaces; @bio must be presetup and ready for I/O.
|
||||
* submit_bio() is used to submit I/O requests to block devices. It is passed a
|
||||
* fully set up &struct bio that describes the I/O that needs to be done. The
|
||||
* bio will be send to the device described by the bi_disk and bi_partno fields.
|
||||
*
|
||||
* The success/failure status of the request, along with notification of
|
||||
* completion, is delivered asynchronously through the ->bi_end_io() callback
|
||||
* in @bio. The bio must NOT be touched by thecaller until ->bi_end_io() has
|
||||
* been called.
|
||||
*/
|
||||
blk_qc_t submit_bio(struct bio *bio)
|
||||
{
|
||||
bool workingset_read = false;
|
||||
unsigned long pflags;
|
||||
blk_qc_t ret;
|
||||
|
||||
if (blkcg_punt_bio_submit(bio))
|
||||
return BLK_QC_T_NONE;
|
||||
|
||||
@@ -1177,8 +1240,6 @@ blk_qc_t submit_bio(struct bio *bio)
|
||||
if (op_is_write(bio_op(bio))) {
|
||||
count_vm_events(PGPGOUT, count);
|
||||
} else {
|
||||
if (bio_flagged(bio, BIO_WORKINGSET))
|
||||
workingset_read = true;
|
||||
task_io_account_read(bio->bi_iter.bi_size);
|
||||
count_vm_events(PGPGIN, count);
|
||||
}
|
||||
@@ -1194,20 +1255,24 @@ blk_qc_t submit_bio(struct bio *bio)
|
||||
}
|
||||
|
||||
/*
|
||||
* If we're reading data that is part of the userspace
|
||||
* workingset, count submission time as memory stall. When the
|
||||
* device is congested, or the submitting cgroup IO-throttled,
|
||||
* submission can be a significant part of overall IO time.
|
||||
* If we're reading data that is part of the userspace workingset, count
|
||||
* submission time as memory stall. When the device is congested, or
|
||||
* the submitting cgroup IO-throttled, submission can be a significant
|
||||
* part of overall IO time.
|
||||
*/
|
||||
if (workingset_read)
|
||||
if (unlikely(bio_op(bio) == REQ_OP_READ &&
|
||||
bio_flagged(bio, BIO_WORKINGSET))) {
|
||||
unsigned long pflags;
|
||||
blk_qc_t ret;
|
||||
|
||||
psi_memstall_enter(&pflags);
|
||||
|
||||
ret = generic_make_request(bio);
|
||||
|
||||
if (workingset_read)
|
||||
ret = generic_make_request(bio);
|
||||
psi_memstall_leave(&pflags);
|
||||
|
||||
return ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
return generic_make_request(bio);
|
||||
}
|
||||
EXPORT_SYMBOL(submit_bio);
|
||||
|
||||
@@ -1272,7 +1337,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
if (blk_queue_io_stat(q))
|
||||
blk_account_io_start(rq, true);
|
||||
blk_account_io_start(rq);
|
||||
|
||||
/*
|
||||
* Since we have a scheduler attached on the top device,
|
||||
@@ -1324,7 +1389,22 @@ unsigned int blk_rq_err_bytes(const struct request *rq)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
|
||||
|
||||
void blk_account_io_completion(struct request *req, unsigned int bytes)
|
||||
static void update_io_ticks(struct hd_struct *part, unsigned long now, bool end)
|
||||
{
|
||||
unsigned long stamp;
|
||||
again:
|
||||
stamp = READ_ONCE(part->stamp);
|
||||
if (unlikely(stamp != now)) {
|
||||
if (likely(cmpxchg(&part->stamp, stamp, now) == stamp))
|
||||
__part_stat_add(part, io_ticks, end ? now - stamp : 1);
|
||||
}
|
||||
if (part->partno) {
|
||||
part = &part_to_disk(part)->part0;
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
|
||||
static void blk_account_io_completion(struct request *req, unsigned int bytes)
|
||||
{
|
||||
if (req->part && blk_do_io_stat(req)) {
|
||||
const int sgrp = op_stat_group(req_op(req));
|
||||
@@ -1355,49 +1435,58 @@ void blk_account_io_done(struct request *req, u64 now)
|
||||
update_io_ticks(part, jiffies, true);
|
||||
part_stat_inc(part, ios[sgrp]);
|
||||
part_stat_add(part, nsecs[sgrp], now - req->start_time_ns);
|
||||
part_dec_in_flight(req->q, part, rq_data_dir(req));
|
||||
part_stat_unlock();
|
||||
|
||||
hd_struct_put(part);
|
||||
part_stat_unlock();
|
||||
}
|
||||
}
|
||||
|
||||
void blk_account_io_start(struct request *rq, bool new_io)
|
||||
void blk_account_io_start(struct request *rq)
|
||||
{
|
||||
struct hd_struct *part;
|
||||
int rw = rq_data_dir(rq);
|
||||
|
||||
if (!blk_do_io_stat(rq))
|
||||
return;
|
||||
|
||||
rq->part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
|
||||
|
||||
part_stat_lock();
|
||||
|
||||
if (!new_io) {
|
||||
part = rq->part;
|
||||
part_stat_inc(part, merges[rw]);
|
||||
} else {
|
||||
part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
|
||||
if (!hd_struct_try_get(part)) {
|
||||
/*
|
||||
* The partition is already being removed,
|
||||
* the request will be accounted on the disk only
|
||||
*
|
||||
* We take a reference on disk->part0 although that
|
||||
* partition will never be deleted, so we can treat
|
||||
* it as any other partition.
|
||||
*/
|
||||
part = &rq->rq_disk->part0;
|
||||
hd_struct_get(part);
|
||||
}
|
||||
part_inc_in_flight(rq->q, part, rw);
|
||||
rq->part = part;
|
||||
}
|
||||
|
||||
update_io_ticks(part, jiffies, false);
|
||||
|
||||
update_io_ticks(rq->part, jiffies, false);
|
||||
part_stat_unlock();
|
||||
}
|
||||
|
||||
unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
|
||||
unsigned int op)
|
||||
{
|
||||
struct hd_struct *part = &disk->part0;
|
||||
const int sgrp = op_stat_group(op);
|
||||
unsigned long now = READ_ONCE(jiffies);
|
||||
|
||||
part_stat_lock();
|
||||
update_io_ticks(part, now, false);
|
||||
part_stat_inc(part, ios[sgrp]);
|
||||
part_stat_add(part, sectors[sgrp], sectors);
|
||||
part_stat_local_inc(part, in_flight[op_is_write(op)]);
|
||||
part_stat_unlock();
|
||||
|
||||
return now;
|
||||
}
|
||||
EXPORT_SYMBOL(disk_start_io_acct);
|
||||
|
||||
void disk_end_io_acct(struct gendisk *disk, unsigned int op,
|
||||
unsigned long start_time)
|
||||
{
|
||||
struct hd_struct *part = &disk->part0;
|
||||
const int sgrp = op_stat_group(op);
|
||||
unsigned long now = READ_ONCE(jiffies);
|
||||
unsigned long duration = now - start_time;
|
||||
|
||||
part_stat_lock();
|
||||
update_io_ticks(part, now, true);
|
||||
part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration));
|
||||
part_stat_local_dec(part, in_flight[op_is_write(op)]);
|
||||
part_stat_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL(disk_end_io_acct);
|
||||
|
||||
/*
|
||||
* Steal bios from a request and add them to a bio list.
|
||||
* The request must not have been partially completed before.
|
||||
@@ -1646,7 +1735,6 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
|
||||
}
|
||||
rq->nr_phys_segments = rq_src->nr_phys_segments;
|
||||
rq->ioprio = rq_src->ioprio;
|
||||
rq->extra_len = rq_src->extra_len;
|
||||
|
||||
if (rq->bio)
|
||||
blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask);
|
||||
@@ -1791,6 +1879,18 @@ void blk_finish_plug(struct blk_plug *plug)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_finish_plug);
|
||||
|
||||
void blk_io_schedule(void)
|
||||
{
|
||||
/* Prevent hang_check timer from firing at us during very long I/O */
|
||||
unsigned long timeout = sysctl_hung_task_timeout_secs * HZ / 2;
|
||||
|
||||
if (timeout)
|
||||
io_schedule_timeout(timeout);
|
||||
else
|
||||
io_schedule();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_io_schedule);
|
||||
|
||||
int __init blk_dev_init(void)
|
||||
{
|
||||
BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS));
|
||||
|
||||
@@ -537,7 +537,7 @@ static bool blk_crypto_fallback_inited;
|
||||
static int blk_crypto_fallback_init(void)
|
||||
{
|
||||
int i;
|
||||
int err = -ENOMEM;
|
||||
int err;
|
||||
|
||||
if (blk_crypto_fallback_inited)
|
||||
return 0;
|
||||
|
||||
@@ -55,7 +55,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
|
||||
rq->rq_disk = bd_disk;
|
||||
rq->end_io = done;
|
||||
|
||||
blk_account_io_start(rq, true);
|
||||
blk_account_io_start(rq);
|
||||
|
||||
/*
|
||||
* don't check dying flag for MQ because the request won't
|
||||
|
||||
@@ -258,7 +258,6 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
|
||||
blk_flush_complete_seq(rq, fq, seq, error);
|
||||
}
|
||||
|
||||
fq->flush_queue_delayed = 0;
|
||||
spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
|
||||
}
|
||||
|
||||
@@ -433,41 +432,20 @@ void blk_insert_flush(struct request *rq)
|
||||
* blkdev_issue_flush - queue a flush
|
||||
* @bdev: blockdev to issue flush for
|
||||
* @gfp_mask: memory allocation flags (for bio_alloc)
|
||||
* @error_sector: error sector
|
||||
*
|
||||
* Description:
|
||||
* Issue a flush for the block device in question. Caller can supply
|
||||
* room for storing the error offset in case of a flush error, if they
|
||||
* wish to.
|
||||
* Issue a flush for the block device in question.
|
||||
*/
|
||||
int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
|
||||
sector_t *error_sector)
|
||||
int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask)
|
||||
{
|
||||
struct request_queue *q;
|
||||
struct bio *bio;
|
||||
int ret = 0;
|
||||
|
||||
if (bdev->bd_disk == NULL)
|
||||
return -ENXIO;
|
||||
|
||||
q = bdev_get_queue(bdev);
|
||||
if (!q)
|
||||
return -ENXIO;
|
||||
|
||||
bio = bio_alloc(gfp_mask, 0);
|
||||
bio_set_dev(bio, bdev);
|
||||
bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
|
||||
|
||||
ret = submit_bio_wait(bio);
|
||||
|
||||
/*
|
||||
* The driver must store the error location in ->bi_sector, if
|
||||
* it supports it. For non-stacked drivers, this should be
|
||||
* copied from blk_rq_pos(rq).
|
||||
*/
|
||||
if (error_sector)
|
||||
*error_sector = bio->bi_iter.bi_sector;
|
||||
|
||||
bio_put(bio);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -260,6 +260,7 @@ enum {
|
||||
VTIME_PER_SEC_SHIFT = 37,
|
||||
VTIME_PER_SEC = 1LLU << VTIME_PER_SEC_SHIFT,
|
||||
VTIME_PER_USEC = VTIME_PER_SEC / USEC_PER_SEC,
|
||||
VTIME_PER_NSEC = VTIME_PER_SEC / NSEC_PER_SEC,
|
||||
|
||||
/* bound vrate adjustments within two orders of magnitude */
|
||||
VRATE_MIN_PPM = 10000, /* 1% */
|
||||
@@ -1206,14 +1207,14 @@ static enum hrtimer_restart iocg_waitq_timer_fn(struct hrtimer *timer)
|
||||
return HRTIMER_NORESTART;
|
||||
}
|
||||
|
||||
static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost)
|
||||
static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
|
||||
{
|
||||
struct ioc *ioc = iocg->ioc;
|
||||
struct blkcg_gq *blkg = iocg_to_blkg(iocg);
|
||||
u64 vtime = atomic64_read(&iocg->vtime);
|
||||
u64 vmargin = ioc->margin_us * now->vrate;
|
||||
u64 margin_ns = ioc->margin_us * NSEC_PER_USEC;
|
||||
u64 expires, oexpires;
|
||||
u64 delta_ns, expires, oexpires;
|
||||
u32 hw_inuse;
|
||||
|
||||
lockdep_assert_held(&iocg->waitq.lock);
|
||||
@@ -1236,15 +1237,10 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost)
|
||||
return false;
|
||||
|
||||
/* use delay */
|
||||
if (cost) {
|
||||
u64 cost_ns = DIV64_U64_ROUND_UP(cost * NSEC_PER_USEC,
|
||||
now->vrate);
|
||||
blkcg_add_delay(blkg, now->now_ns, cost_ns);
|
||||
}
|
||||
blkcg_use_delay(blkg);
|
||||
|
||||
expires = now->now_ns + DIV64_U64_ROUND_UP(vtime - now->vnow,
|
||||
now->vrate) * NSEC_PER_USEC;
|
||||
delta_ns = DIV64_U64_ROUND_UP(vtime - now->vnow,
|
||||
now->vrate) * NSEC_PER_USEC;
|
||||
blkcg_set_delay(blkg, delta_ns);
|
||||
expires = now->now_ns + delta_ns;
|
||||
|
||||
/* if already active and close enough, don't bother */
|
||||
oexpires = ktime_to_ns(hrtimer_get_softexpires(&iocg->delay_timer));
|
||||
@@ -1265,7 +1261,7 @@ static enum hrtimer_restart iocg_delay_timer_fn(struct hrtimer *timer)
|
||||
|
||||
spin_lock_irqsave(&iocg->waitq.lock, flags);
|
||||
ioc_now(iocg->ioc, &now);
|
||||
iocg_kick_delay(iocg, &now, 0);
|
||||
iocg_kick_delay(iocg, &now);
|
||||
spin_unlock_irqrestore(&iocg->waitq.lock, flags);
|
||||
|
||||
return HRTIMER_NORESTART;
|
||||
@@ -1383,7 +1379,7 @@ static void ioc_timer_fn(struct timer_list *timer)
|
||||
if (waitqueue_active(&iocg->waitq) || iocg->abs_vdebt) {
|
||||
/* might be oversleeping vtime / hweight changes, kick */
|
||||
iocg_kick_waitq(iocg, &now);
|
||||
iocg_kick_delay(iocg, &now, 0);
|
||||
iocg_kick_delay(iocg, &now);
|
||||
} else if (iocg_is_idle(iocg)) {
|
||||
/* no waiter and idle, deactivate */
|
||||
iocg->last_inuse = iocg->inuse;
|
||||
@@ -1543,19 +1539,39 @@ skip_surplus_transfers:
|
||||
if (rq_wait_pct > RQ_WAIT_BUSY_PCT ||
|
||||
missed_ppm[READ] > ppm_rthr ||
|
||||
missed_ppm[WRITE] > ppm_wthr) {
|
||||
/* clearly missing QoS targets, slow down vrate */
|
||||
ioc->busy_level = max(ioc->busy_level, 0);
|
||||
ioc->busy_level++;
|
||||
} else if (rq_wait_pct <= RQ_WAIT_BUSY_PCT * UNBUSY_THR_PCT / 100 &&
|
||||
missed_ppm[READ] <= ppm_rthr * UNBUSY_THR_PCT / 100 &&
|
||||
missed_ppm[WRITE] <= ppm_wthr * UNBUSY_THR_PCT / 100) {
|
||||
/* take action iff there is contention */
|
||||
if (nr_shortages && !nr_lagging) {
|
||||
/* QoS targets are being met with >25% margin */
|
||||
if (nr_shortages) {
|
||||
/*
|
||||
* We're throttling while the device has spare
|
||||
* capacity. If vrate was being slowed down, stop.
|
||||
*/
|
||||
ioc->busy_level = min(ioc->busy_level, 0);
|
||||
/* redistribute surpluses first */
|
||||
if (!nr_surpluses)
|
||||
|
||||
/*
|
||||
* If there are IOs spanning multiple periods, wait
|
||||
* them out before pushing the device harder. If
|
||||
* there are surpluses, let redistribution work it
|
||||
* out first.
|
||||
*/
|
||||
if (!nr_lagging && !nr_surpluses)
|
||||
ioc->busy_level--;
|
||||
} else {
|
||||
/*
|
||||
* Nobody is being throttled and the users aren't
|
||||
* issuing enough IOs to saturate the device. We
|
||||
* simply don't know how close the device is to
|
||||
* saturation. Coast.
|
||||
*/
|
||||
ioc->busy_level = 0;
|
||||
}
|
||||
} else {
|
||||
/* inside the hysterisis margin, we're good */
|
||||
ioc->busy_level = 0;
|
||||
}
|
||||
|
||||
@@ -1678,6 +1694,31 @@ static u64 calc_vtime_cost(struct bio *bio, struct ioc_gq *iocg, bool is_merge)
|
||||
return cost;
|
||||
}
|
||||
|
||||
static void calc_size_vtime_cost_builtin(struct request *rq, struct ioc *ioc,
|
||||
u64 *costp)
|
||||
{
|
||||
unsigned int pages = blk_rq_stats_sectors(rq) >> IOC_SECT_TO_PAGE_SHIFT;
|
||||
|
||||
switch (req_op(rq)) {
|
||||
case REQ_OP_READ:
|
||||
*costp = pages * ioc->params.lcoefs[LCOEF_RPAGE];
|
||||
break;
|
||||
case REQ_OP_WRITE:
|
||||
*costp = pages * ioc->params.lcoefs[LCOEF_WPAGE];
|
||||
break;
|
||||
default:
|
||||
*costp = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static u64 calc_size_vtime_cost(struct request *rq, struct ioc *ioc)
|
||||
{
|
||||
u64 cost;
|
||||
|
||||
calc_size_vtime_cost_builtin(rq, ioc, &cost);
|
||||
return cost;
|
||||
}
|
||||
|
||||
static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
|
||||
{
|
||||
struct blkcg_gq *blkg = bio->bi_blkg;
|
||||
@@ -1762,7 +1803,7 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
|
||||
*/
|
||||
if (bio_issue_as_root_blkg(bio) || fatal_signal_pending(current)) {
|
||||
iocg->abs_vdebt += abs_cost;
|
||||
if (iocg_kick_delay(iocg, &now, cost))
|
||||
if (iocg_kick_delay(iocg, &now))
|
||||
blkcg_schedule_throttle(rqos->q,
|
||||
(bio->bi_opf & REQ_SWAP) == REQ_SWAP);
|
||||
spin_unlock_irq(&iocg->waitq.lock);
|
||||
@@ -1850,7 +1891,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
|
||||
spin_lock_irqsave(&iocg->waitq.lock, flags);
|
||||
if (likely(!list_empty(&iocg->active_list))) {
|
||||
iocg->abs_vdebt += abs_cost;
|
||||
iocg_kick_delay(iocg, &now, cost);
|
||||
iocg_kick_delay(iocg, &now);
|
||||
} else {
|
||||
iocg_commit_bio(iocg, bio, cost);
|
||||
}
|
||||
@@ -1868,7 +1909,7 @@ static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio)
|
||||
static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq)
|
||||
{
|
||||
struct ioc *ioc = rqos_to_ioc(rqos);
|
||||
u64 on_q_ns, rq_wait_ns;
|
||||
u64 on_q_ns, rq_wait_ns, size_nsec;
|
||||
int pidx, rw;
|
||||
|
||||
if (!ioc->enabled || !rq->alloc_time_ns || !rq->start_time_ns)
|
||||
@@ -1889,8 +1930,10 @@ static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq)
|
||||
|
||||
on_q_ns = ktime_get_ns() - rq->alloc_time_ns;
|
||||
rq_wait_ns = rq->start_time_ns - rq->alloc_time_ns;
|
||||
size_nsec = div64_u64(calc_size_vtime_cost(rq, ioc), VTIME_PER_NSEC);
|
||||
|
||||
if (on_q_ns <= ioc->params.qos[pidx] * NSEC_PER_USEC)
|
||||
if (on_q_ns <= size_nsec ||
|
||||
on_q_ns - size_nsec <= ioc->params.qos[pidx] * NSEC_PER_USEC)
|
||||
this_cpu_inc(ioc->pcpu_stat->missed[rw].nr_met);
|
||||
else
|
||||
this_cpu_inc(ioc->pcpu_stat->missed[rw].nr_missed);
|
||||
@@ -2297,6 +2340,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
|
||||
spin_lock_irq(&ioc->lock);
|
||||
|
||||
if (enable) {
|
||||
blk_stat_enable_accounting(ioc->rqos.q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, ioc->rqos.q);
|
||||
ioc->enabled = true;
|
||||
} else {
|
||||
|
||||
@@ -257,6 +257,7 @@ out_bmd:
|
||||
static struct bio *bio_map_user_iov(struct request_queue *q,
|
||||
struct iov_iter *iter, gfp_t gfp_mask)
|
||||
{
|
||||
unsigned int max_sectors = queue_max_hw_sectors(q);
|
||||
int j;
|
||||
struct bio *bio;
|
||||
int ret;
|
||||
@@ -294,8 +295,8 @@ static struct bio *bio_map_user_iov(struct request_queue *q,
|
||||
if (n > bytes)
|
||||
n = bytes;
|
||||
|
||||
if (!__bio_add_pc_page(q, bio, page, n, offs,
|
||||
&same_page)) {
|
||||
if (!bio_add_hw_page(q, bio, page, n, offs,
|
||||
max_sectors, &same_page)) {
|
||||
if (same_page)
|
||||
put_page(page);
|
||||
break;
|
||||
@@ -655,8 +656,6 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
|
||||
bio = rq->bio;
|
||||
} while (iov_iter_count(&i));
|
||||
|
||||
if (!bio_flagged(bio, BIO_USER_MAPPED))
|
||||
rq->rq_flags |= RQF_COPY_USER;
|
||||
return 0;
|
||||
|
||||
unmap_rq:
|
||||
@@ -732,7 +731,6 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
|
||||
{
|
||||
int reading = rq_data_dir(rq) == READ;
|
||||
unsigned long addr = (unsigned long) kbuf;
|
||||
int do_copy = 0;
|
||||
struct bio *bio, *orig_bio;
|
||||
int ret;
|
||||
|
||||
@@ -741,8 +739,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
|
||||
if (!len || !kbuf)
|
||||
return -EINVAL;
|
||||
|
||||
do_copy = !blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf);
|
||||
if (do_copy)
|
||||
if (!blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf))
|
||||
bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading);
|
||||
else
|
||||
bio = bio_map_kern(q, kbuf, len, gfp_mask);
|
||||
@@ -753,9 +750,6 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
|
||||
bio->bi_opf &= ~REQ_OP_MASK;
|
||||
bio->bi_opf |= req_op(rq);
|
||||
|
||||
if (do_copy)
|
||||
rq->rq_flags |= RQF_COPY_USER;
|
||||
|
||||
orig_bio = bio;
|
||||
ret = blk_rq_append_bio(rq, &bio);
|
||||
if (unlikely(ret)) {
|
||||
|
||||
@@ -336,16 +336,6 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio,
|
||||
/* there isn't chance to merge the splitted bio */
|
||||
split->bi_opf |= REQ_NOMERGE;
|
||||
|
||||
/*
|
||||
* Since we're recursing into make_request here, ensure
|
||||
* that we mark this bio as already having entered the queue.
|
||||
* If not, and the queue is going away, we can get stuck
|
||||
* forever on waiting for the queue reference to drop. But
|
||||
* that will never happen, as we're already holding a
|
||||
* reference to it.
|
||||
*/
|
||||
bio_set_flag(*bio, BIO_QUEUE_ENTERED);
|
||||
|
||||
bio_chain(split, *bio);
|
||||
trace_block_split(q, split, (*bio)->bi_iter.bi_sector);
|
||||
generic_make_request(*bio);
|
||||
@@ -519,44 +509,20 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
|
||||
* map a request to scatterlist, return number of sg entries setup. Caller
|
||||
* must make sure sg can hold rq->nr_phys_segments entries
|
||||
*/
|
||||
int blk_rq_map_sg(struct request_queue *q, struct request *rq,
|
||||
struct scatterlist *sglist)
|
||||
int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
|
||||
struct scatterlist *sglist, struct scatterlist **last_sg)
|
||||
{
|
||||
struct scatterlist *sg = NULL;
|
||||
int nsegs = 0;
|
||||
|
||||
if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
|
||||
nsegs = __blk_bvec_map_sg(rq->special_vec, sglist, &sg);
|
||||
nsegs = __blk_bvec_map_sg(rq->special_vec, sglist, last_sg);
|
||||
else if (rq->bio && bio_op(rq->bio) == REQ_OP_WRITE_SAME)
|
||||
nsegs = __blk_bvec_map_sg(bio_iovec(rq->bio), sglist, &sg);
|
||||
nsegs = __blk_bvec_map_sg(bio_iovec(rq->bio), sglist, last_sg);
|
||||
else if (rq->bio)
|
||||
nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg);
|
||||
nsegs = __blk_bios_map_sg(q, rq->bio, sglist, last_sg);
|
||||
|
||||
if (unlikely(rq->rq_flags & RQF_COPY_USER) &&
|
||||
(blk_rq_bytes(rq) & q->dma_pad_mask)) {
|
||||
unsigned int pad_len =
|
||||
(q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1;
|
||||
|
||||
sg->length += pad_len;
|
||||
rq->extra_len += pad_len;
|
||||
}
|
||||
|
||||
if (q->dma_drain_size && q->dma_drain_needed(rq)) {
|
||||
if (op_is_write(req_op(rq)))
|
||||
memset(q->dma_drain_buffer, 0, q->dma_drain_size);
|
||||
|
||||
sg_unmark_end(sg);
|
||||
sg = sg_next(sg);
|
||||
sg_set_page(sg, virt_to_page(q->dma_drain_buffer),
|
||||
q->dma_drain_size,
|
||||
((unsigned long)q->dma_drain_buffer) &
|
||||
(PAGE_SIZE - 1));
|
||||
nsegs++;
|
||||
rq->extra_len += q->dma_drain_size;
|
||||
}
|
||||
|
||||
if (sg)
|
||||
sg_mark_end(sg);
|
||||
if (*last_sg)
|
||||
sg_mark_end(*last_sg);
|
||||
|
||||
/*
|
||||
* Something must have been wrong if the figured number of
|
||||
@@ -566,7 +532,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
|
||||
|
||||
return nsegs;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_rq_map_sg);
|
||||
EXPORT_SYMBOL(__blk_rq_map_sg);
|
||||
|
||||
static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
|
||||
unsigned int nr_phys_segs)
|
||||
@@ -703,20 +669,17 @@ void blk_rq_set_mixed_merge(struct request *rq)
|
||||
rq->rq_flags |= RQF_MIXED_MERGE;
|
||||
}
|
||||
|
||||
static void blk_account_io_merge(struct request *req)
|
||||
static void blk_account_io_merge_request(struct request *req)
|
||||
{
|
||||
if (blk_do_io_stat(req)) {
|
||||
struct hd_struct *part;
|
||||
|
||||
part_stat_lock();
|
||||
part = req->part;
|
||||
|
||||
part_dec_in_flight(req->q, part, rq_data_dir(req));
|
||||
|
||||
hd_struct_put(part);
|
||||
part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
|
||||
part_stat_unlock();
|
||||
|
||||
hd_struct_put(req->part);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Two cases of handling DISCARD merge:
|
||||
* If max_discard_segments > 1, the driver takes every bio
|
||||
@@ -828,7 +791,7 @@ static struct request *attempt_merge(struct request_queue *q,
|
||||
/*
|
||||
* 'next' is going away, so update stats accordingly
|
||||
*/
|
||||
blk_account_io_merge(next);
|
||||
blk_account_io_merge_request(next);
|
||||
|
||||
/*
|
||||
* ownership of bio passed from next to req, return 'next' for
|
||||
|
||||
@@ -213,6 +213,7 @@ static const char *const hctx_state_name[] = {
|
||||
HCTX_STATE_NAME(STOPPED),
|
||||
HCTX_STATE_NAME(TAG_ACTIVE),
|
||||
HCTX_STATE_NAME(SCHED_RESTART),
|
||||
HCTX_STATE_NAME(INACTIVE),
|
||||
};
|
||||
#undef HCTX_STATE_NAME
|
||||
|
||||
@@ -239,6 +240,7 @@ static const char *const hctx_flag_name[] = {
|
||||
HCTX_FLAG_NAME(TAG_SHARED),
|
||||
HCTX_FLAG_NAME(BLOCKING),
|
||||
HCTX_FLAG_NAME(NO_SCHED),
|
||||
HCTX_FLAG_NAME(STACKING),
|
||||
};
|
||||
#undef HCTX_FLAG_NAME
|
||||
|
||||
@@ -292,7 +294,6 @@ static const char *const rqf_name[] = {
|
||||
RQF_NAME(MQ_INFLIGHT),
|
||||
RQF_NAME(DONTPREP),
|
||||
RQF_NAME(PREEMPT),
|
||||
RQF_NAME(COPY_USER),
|
||||
RQF_NAME(FAILED),
|
||||
RQF_NAME(QUIET),
|
||||
RQF_NAME(ELVPRIV),
|
||||
|
||||
@@ -80,16 +80,22 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
|
||||
blk_mq_run_hw_queue(hctx, true);
|
||||
}
|
||||
|
||||
#define BLK_MQ_BUDGET_DELAY 3 /* ms units */
|
||||
|
||||
/*
|
||||
* Only SCSI implements .get_budget and .put_budget, and SCSI restarts
|
||||
* its queue by itself in its completion handler, so we don't need to
|
||||
* restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
|
||||
*
|
||||
* Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
|
||||
* be run again. This is necessary to avoid starving flushes.
|
||||
*/
|
||||
static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
|
||||
static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct elevator_queue *e = q->elevator;
|
||||
LIST_HEAD(rq_list);
|
||||
int ret = 0;
|
||||
|
||||
do {
|
||||
struct request *rq;
|
||||
@@ -97,12 +103,25 @@ static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
|
||||
if (e->type->ops.has_work && !e->type->ops.has_work(hctx))
|
||||
break;
|
||||
|
||||
if (!list_empty_careful(&hctx->dispatch)) {
|
||||
ret = -EAGAIN;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!blk_mq_get_dispatch_budget(hctx))
|
||||
break;
|
||||
|
||||
rq = e->type->ops.dispatch_request(hctx);
|
||||
if (!rq) {
|
||||
blk_mq_put_dispatch_budget(hctx);
|
||||
/*
|
||||
* We're releasing without dispatching. Holding the
|
||||
* budget could have blocked any "hctx"s with the
|
||||
* same queue and if we didn't dispatch then there's
|
||||
* no guarantee anyone will kick the queue. Kick it
|
||||
* ourselves.
|
||||
*/
|
||||
blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -113,6 +132,8 @@ static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
|
||||
*/
|
||||
list_add(&rq->queuelist, &rq_list);
|
||||
} while (blk_mq_dispatch_rq_list(q, &rq_list, true));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
|
||||
@@ -130,16 +151,25 @@ static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
|
||||
* Only SCSI implements .get_budget and .put_budget, and SCSI restarts
|
||||
* its queue by itself in its completion handler, so we don't need to
|
||||
* restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
|
||||
*
|
||||
* Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
|
||||
* to be run again. This is necessary to avoid starving flushes.
|
||||
*/
|
||||
static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
|
||||
static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
LIST_HEAD(rq_list);
|
||||
struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from);
|
||||
int ret = 0;
|
||||
|
||||
do {
|
||||
struct request *rq;
|
||||
|
||||
if (!list_empty_careful(&hctx->dispatch)) {
|
||||
ret = -EAGAIN;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!sbitmap_any_bit_set(&hctx->ctx_map))
|
||||
break;
|
||||
|
||||
@@ -149,6 +179,14 @@ static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
|
||||
rq = blk_mq_dequeue_from_ctx(hctx, ctx);
|
||||
if (!rq) {
|
||||
blk_mq_put_dispatch_budget(hctx);
|
||||
/*
|
||||
* We're releasing without dispatching. Holding the
|
||||
* budget could have blocked any "hctx"s with the
|
||||
* same queue and if we didn't dispatch then there's
|
||||
* no guarantee anyone will kick the queue. Kick it
|
||||
* ourselves.
|
||||
*/
|
||||
blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -165,21 +203,17 @@ static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
|
||||
} while (blk_mq_dispatch_rq_list(q, &rq_list, true));
|
||||
|
||||
WRITE_ONCE(hctx->dispatch_from, ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
|
||||
static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct elevator_queue *e = q->elevator;
|
||||
const bool has_sched_dispatch = e && e->type->ops.dispatch_request;
|
||||
int ret = 0;
|
||||
LIST_HEAD(rq_list);
|
||||
|
||||
/* RCU or SRCU read lock is needed before checking quiesced flag */
|
||||
if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
|
||||
return;
|
||||
|
||||
hctx->run++;
|
||||
|
||||
/*
|
||||
* If we have previous entries on our dispatch list, grab them first for
|
||||
* more fair dispatch.
|
||||
@@ -208,19 +242,41 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
|
||||
blk_mq_sched_mark_restart_hctx(hctx);
|
||||
if (blk_mq_dispatch_rq_list(q, &rq_list, false)) {
|
||||
if (has_sched_dispatch)
|
||||
blk_mq_do_dispatch_sched(hctx);
|
||||
ret = blk_mq_do_dispatch_sched(hctx);
|
||||
else
|
||||
blk_mq_do_dispatch_ctx(hctx);
|
||||
ret = blk_mq_do_dispatch_ctx(hctx);
|
||||
}
|
||||
} else if (has_sched_dispatch) {
|
||||
blk_mq_do_dispatch_sched(hctx);
|
||||
ret = blk_mq_do_dispatch_sched(hctx);
|
||||
} else if (hctx->dispatch_busy) {
|
||||
/* dequeue request one by one from sw queue if queue is busy */
|
||||
blk_mq_do_dispatch_ctx(hctx);
|
||||
ret = blk_mq_do_dispatch_ctx(hctx);
|
||||
} else {
|
||||
blk_mq_flush_busy_ctxs(hctx, &rq_list);
|
||||
blk_mq_dispatch_rq_list(q, &rq_list, false);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
|
||||
/* RCU or SRCU read lock is needed before checking quiesced flag */
|
||||
if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
|
||||
return;
|
||||
|
||||
hctx->run++;
|
||||
|
||||
/*
|
||||
* A return of -EAGAIN is an indication that hctx->dispatch is not
|
||||
* empty and we must run again in order to avoid starving flushes.
|
||||
*/
|
||||
if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN) {
|
||||
if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN)
|
||||
blk_mq_run_hw_queue(hctx, true);
|
||||
}
|
||||
}
|
||||
|
||||
bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
|
||||
|
||||
@@ -92,7 +92,7 @@ static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
|
||||
{
|
||||
if (!(data->flags & BLK_MQ_REQ_INTERNAL) &&
|
||||
!hctx_may_queue(data->hctx, bt))
|
||||
return -1;
|
||||
return BLK_MQ_NO_TAG;
|
||||
if (data->shallow_depth)
|
||||
return __sbitmap_queue_get_shallow(bt, data->shallow_depth);
|
||||
else
|
||||
@@ -111,7 +111,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
|
||||
if (data->flags & BLK_MQ_REQ_RESERVED) {
|
||||
if (unlikely(!tags->nr_reserved_tags)) {
|
||||
WARN_ON_ONCE(1);
|
||||
return BLK_MQ_TAG_FAIL;
|
||||
return BLK_MQ_NO_TAG;
|
||||
}
|
||||
bt = &tags->breserved_tags;
|
||||
tag_offset = 0;
|
||||
@@ -121,11 +121,11 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
|
||||
}
|
||||
|
||||
tag = __blk_mq_get_tag(data, bt);
|
||||
if (tag != -1)
|
||||
if (tag != BLK_MQ_NO_TAG)
|
||||
goto found_tag;
|
||||
|
||||
if (data->flags & BLK_MQ_REQ_NOWAIT)
|
||||
return BLK_MQ_TAG_FAIL;
|
||||
return BLK_MQ_NO_TAG;
|
||||
|
||||
ws = bt_wait_ptr(bt, data->hctx);
|
||||
do {
|
||||
@@ -143,13 +143,13 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
|
||||
* as running the queue may also have found completions.
|
||||
*/
|
||||
tag = __blk_mq_get_tag(data, bt);
|
||||
if (tag != -1)
|
||||
if (tag != BLK_MQ_NO_TAG)
|
||||
break;
|
||||
|
||||
sbitmap_prepare_to_wait(bt, ws, &wait, TASK_UNINTERRUPTIBLE);
|
||||
|
||||
tag = __blk_mq_get_tag(data, bt);
|
||||
if (tag != -1)
|
||||
if (tag != BLK_MQ_NO_TAG)
|
||||
break;
|
||||
|
||||
bt_prev = bt;
|
||||
@@ -180,6 +180,14 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
|
||||
sbitmap_finish_wait(bt, ws, &wait);
|
||||
|
||||
found_tag:
|
||||
/*
|
||||
* Give up this allocation if the hctx is inactive. The caller will
|
||||
* retry on an active hctx.
|
||||
*/
|
||||
if (unlikely(test_bit(BLK_MQ_S_INACTIVE, &data->hctx->state))) {
|
||||
blk_mq_put_tag(tags, data->ctx, tag + tag_offset);
|
||||
return BLK_MQ_NO_TAG;
|
||||
}
|
||||
return tag + tag_offset;
|
||||
}
|
||||
|
||||
@@ -256,14 +264,17 @@ struct bt_tags_iter_data {
|
||||
struct blk_mq_tags *tags;
|
||||
busy_tag_iter_fn *fn;
|
||||
void *data;
|
||||
bool reserved;
|
||||
unsigned int flags;
|
||||
};
|
||||
|
||||
#define BT_TAG_ITER_RESERVED (1 << 0)
|
||||
#define BT_TAG_ITER_STARTED (1 << 1)
|
||||
|
||||
static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
|
||||
{
|
||||
struct bt_tags_iter_data *iter_data = data;
|
||||
struct blk_mq_tags *tags = iter_data->tags;
|
||||
bool reserved = iter_data->reserved;
|
||||
bool reserved = iter_data->flags & BT_TAG_ITER_RESERVED;
|
||||
struct request *rq;
|
||||
|
||||
if (!reserved)
|
||||
@@ -274,10 +285,12 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
|
||||
* test and set the bit before assining ->rqs[].
|
||||
*/
|
||||
rq = tags->rqs[bitnr];
|
||||
if (rq && blk_mq_request_started(rq))
|
||||
return iter_data->fn(rq, iter_data->data, reserved);
|
||||
|
||||
return true;
|
||||
if (!rq)
|
||||
return true;
|
||||
if ((iter_data->flags & BT_TAG_ITER_STARTED) &&
|
||||
!blk_mq_request_started(rq))
|
||||
return true;
|
||||
return iter_data->fn(rq, iter_data->data, reserved);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -290,39 +303,47 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
|
||||
* @reserved) where rq is a pointer to a request. Return true
|
||||
* to continue iterating tags, false to stop.
|
||||
* @data: Will be passed as second argument to @fn.
|
||||
* @reserved: Indicates whether @bt is the breserved_tags member or the
|
||||
* bitmap_tags member of struct blk_mq_tags.
|
||||
* @flags: BT_TAG_ITER_*
|
||||
*/
|
||||
static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt,
|
||||
busy_tag_iter_fn *fn, void *data, bool reserved)
|
||||
busy_tag_iter_fn *fn, void *data, unsigned int flags)
|
||||
{
|
||||
struct bt_tags_iter_data iter_data = {
|
||||
.tags = tags,
|
||||
.fn = fn,
|
||||
.data = data,
|
||||
.reserved = reserved,
|
||||
.flags = flags,
|
||||
};
|
||||
|
||||
if (tags->rqs)
|
||||
sbitmap_for_each_set(&bt->sb, bt_tags_iter, &iter_data);
|
||||
}
|
||||
|
||||
static void __blk_mq_all_tag_iter(struct blk_mq_tags *tags,
|
||||
busy_tag_iter_fn *fn, void *priv, unsigned int flags)
|
||||
{
|
||||
WARN_ON_ONCE(flags & BT_TAG_ITER_RESERVED);
|
||||
|
||||
if (tags->nr_reserved_tags)
|
||||
bt_tags_for_each(tags, &tags->breserved_tags, fn, priv,
|
||||
flags | BT_TAG_ITER_RESERVED);
|
||||
bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_mq_all_tag_busy_iter - iterate over all started requests in a tag map
|
||||
* blk_mq_all_tag_iter - iterate over all requests in a tag map
|
||||
* @tags: Tag map to iterate over.
|
||||
* @fn: Pointer to the function that will be called for each started
|
||||
* @fn: Pointer to the function that will be called for each
|
||||
* request. @fn will be called as follows: @fn(rq, @priv,
|
||||
* reserved) where rq is a pointer to a request. 'reserved'
|
||||
* indicates whether or not @rq is a reserved request. Return
|
||||
* true to continue iterating tags, false to stop.
|
||||
* @priv: Will be passed as second argument to @fn.
|
||||
*/
|
||||
static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags,
|
||||
busy_tag_iter_fn *fn, void *priv)
|
||||
void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
|
||||
void *priv)
|
||||
{
|
||||
if (tags->nr_reserved_tags)
|
||||
bt_tags_for_each(tags, &tags->breserved_tags, fn, priv, true);
|
||||
bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, false);
|
||||
return __blk_mq_all_tag_iter(tags, fn, priv, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -342,7 +363,8 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
|
||||
|
||||
for (i = 0; i < tagset->nr_hw_queues; i++) {
|
||||
if (tagset->tags && tagset->tags[i])
|
||||
blk_mq_all_tag_busy_iter(tagset->tags[i], fn, priv);
|
||||
__blk_mq_all_tag_iter(tagset->tags[i], fn, priv,
|
||||
BT_TAG_ITER_STARTED);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
|
||||
|
||||
@@ -34,6 +34,8 @@ extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
|
||||
extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool);
|
||||
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
|
||||
void *priv);
|
||||
void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
|
||||
void *priv);
|
||||
|
||||
static inline struct sbq_wait_state *bt_wait_ptr(struct sbitmap_queue *bt,
|
||||
struct blk_mq_hw_ctx *hctx)
|
||||
@@ -44,9 +46,9 @@ static inline struct sbq_wait_state *bt_wait_ptr(struct sbitmap_queue *bt,
|
||||
}
|
||||
|
||||
enum {
|
||||
BLK_MQ_TAG_FAIL = -1U,
|
||||
BLK_MQ_NO_TAG = -1U,
|
||||
BLK_MQ_TAG_MIN = 1,
|
||||
BLK_MQ_TAG_MAX = BLK_MQ_TAG_FAIL - 1,
|
||||
BLK_MQ_TAG_MAX = BLK_MQ_NO_TAG - 1,
|
||||
};
|
||||
|
||||
extern bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *);
|
||||
|
||||
394
block/blk-mq.c
394
block/blk-mq.c
@@ -271,14 +271,14 @@ static inline bool blk_mq_need_time_stamp(struct request *rq)
|
||||
}
|
||||
|
||||
static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
|
||||
unsigned int tag, unsigned int op, u64 alloc_time_ns)
|
||||
unsigned int tag, u64 alloc_time_ns)
|
||||
{
|
||||
struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
|
||||
struct request *rq = tags->static_rqs[tag];
|
||||
req_flags_t rq_flags = 0;
|
||||
|
||||
if (data->flags & BLK_MQ_REQ_INTERNAL) {
|
||||
rq->tag = -1;
|
||||
rq->tag = BLK_MQ_NO_TAG;
|
||||
rq->internal_tag = tag;
|
||||
} else {
|
||||
if (data->hctx->flags & BLK_MQ_F_TAG_SHARED) {
|
||||
@@ -286,7 +286,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
|
||||
atomic_inc(&data->hctx->nr_active);
|
||||
}
|
||||
rq->tag = tag;
|
||||
rq->internal_tag = -1;
|
||||
rq->internal_tag = BLK_MQ_NO_TAG;
|
||||
data->hctx->tags->rqs[rq->tag] = rq;
|
||||
}
|
||||
|
||||
@@ -295,7 +295,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
|
||||
rq->mq_ctx = data->ctx;
|
||||
rq->mq_hctx = data->hctx;
|
||||
rq->rq_flags = rq_flags;
|
||||
rq->cmd_flags = op;
|
||||
rq->cmd_flags = data->cmd_flags;
|
||||
if (data->flags & BLK_MQ_REQ_PREEMPT)
|
||||
rq->rq_flags |= RQF_PREEMPT;
|
||||
if (blk_queue_io_stat(data->q))
|
||||
@@ -320,7 +320,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
|
||||
#endif
|
||||
blk_crypto_rq_set_defaults(rq);
|
||||
/* tag was already set */
|
||||
rq->extra_len = 0;
|
||||
WRITE_ONCE(rq->deadline, 0);
|
||||
|
||||
rq->timeout = 0;
|
||||
@@ -328,35 +327,37 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
|
||||
rq->end_io = NULL;
|
||||
rq->end_io_data = NULL;
|
||||
|
||||
data->ctx->rq_dispatched[op_is_sync(op)]++;
|
||||
data->ctx->rq_dispatched[op_is_sync(data->cmd_flags)]++;
|
||||
refcount_set(&rq->ref, 1);
|
||||
|
||||
if (!op_is_flush(data->cmd_flags)) {
|
||||
struct elevator_queue *e = data->q->elevator;
|
||||
|
||||
rq->elv.icq = NULL;
|
||||
if (e && e->type->ops.prepare_request) {
|
||||
if (e->type->icq_cache)
|
||||
blk_mq_sched_assign_ioc(rq);
|
||||
|
||||
e->type->ops.prepare_request(rq);
|
||||
rq->rq_flags |= RQF_ELVPRIV;
|
||||
}
|
||||
}
|
||||
|
||||
data->hctx->queued++;
|
||||
return rq;
|
||||
}
|
||||
|
||||
static struct request *blk_mq_get_request(struct request_queue *q,
|
||||
struct bio *bio,
|
||||
struct blk_mq_alloc_data *data)
|
||||
static struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data)
|
||||
{
|
||||
struct request_queue *q = data->q;
|
||||
struct elevator_queue *e = q->elevator;
|
||||
struct request *rq;
|
||||
unsigned int tag;
|
||||
bool clear_ctx_on_error = false;
|
||||
u64 alloc_time_ns = 0;
|
||||
|
||||
blk_queue_enter_live(q);
|
||||
unsigned int tag;
|
||||
|
||||
/* alloc_time includes depth and tag waits */
|
||||
if (blk_queue_rq_alloc_time(q))
|
||||
alloc_time_ns = ktime_get_ns();
|
||||
|
||||
data->q = q;
|
||||
if (likely(!data->ctx)) {
|
||||
data->ctx = blk_mq_get_ctx(q);
|
||||
clear_ctx_on_error = true;
|
||||
}
|
||||
if (likely(!data->hctx))
|
||||
data->hctx = blk_mq_map_queue(q, data->cmd_flags,
|
||||
data->ctx);
|
||||
if (data->cmd_flags & REQ_NOWAIT)
|
||||
data->flags |= BLK_MQ_REQ_NOWAIT;
|
||||
|
||||
@@ -372,37 +373,43 @@ static struct request *blk_mq_get_request(struct request_queue *q,
|
||||
e->type->ops.limit_depth &&
|
||||
!(data->flags & BLK_MQ_REQ_RESERVED))
|
||||
e->type->ops.limit_depth(data->cmd_flags, data);
|
||||
} else {
|
||||
}
|
||||
|
||||
retry:
|
||||
data->ctx = blk_mq_get_ctx(q);
|
||||
data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
|
||||
if (!(data->flags & BLK_MQ_REQ_INTERNAL))
|
||||
blk_mq_tag_busy(data->hctx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Waiting allocations only fail because of an inactive hctx. In that
|
||||
* case just retry the hctx assignment and tag allocation as CPU hotplug
|
||||
* should have migrated us to an online CPU by now.
|
||||
*/
|
||||
tag = blk_mq_get_tag(data);
|
||||
if (tag == BLK_MQ_TAG_FAIL) {
|
||||
if (clear_ctx_on_error)
|
||||
data->ctx = NULL;
|
||||
blk_queue_exit(q);
|
||||
return NULL;
|
||||
}
|
||||
if (tag == BLK_MQ_NO_TAG) {
|
||||
if (data->flags & BLK_MQ_REQ_NOWAIT)
|
||||
return NULL;
|
||||
|
||||
rq = blk_mq_rq_ctx_init(data, tag, data->cmd_flags, alloc_time_ns);
|
||||
if (!op_is_flush(data->cmd_flags)) {
|
||||
rq->elv.icq = NULL;
|
||||
if (e && e->type->ops.prepare_request) {
|
||||
if (e->type->icq_cache)
|
||||
blk_mq_sched_assign_ioc(rq);
|
||||
|
||||
e->type->ops.prepare_request(rq, bio);
|
||||
rq->rq_flags |= RQF_ELVPRIV;
|
||||
}
|
||||
/*
|
||||
* Give up the CPU and sleep for a random short time to ensure
|
||||
* that thread using a realtime scheduling class are migrated
|
||||
* off the the CPU, and thus off the hctx that is going away.
|
||||
*/
|
||||
msleep(3);
|
||||
goto retry;
|
||||
}
|
||||
data->hctx->queued++;
|
||||
return rq;
|
||||
return blk_mq_rq_ctx_init(data, tag, alloc_time_ns);
|
||||
}
|
||||
|
||||
struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
|
||||
blk_mq_req_flags_t flags)
|
||||
{
|
||||
struct blk_mq_alloc_data alloc_data = { .flags = flags, .cmd_flags = op };
|
||||
struct blk_mq_alloc_data data = {
|
||||
.q = q,
|
||||
.flags = flags,
|
||||
.cmd_flags = op,
|
||||
};
|
||||
struct request *rq;
|
||||
int ret;
|
||||
|
||||
@@ -410,34 +417,43 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
rq = blk_mq_get_request(q, NULL, &alloc_data);
|
||||
blk_queue_exit(q);
|
||||
|
||||
rq = __blk_mq_alloc_request(&data);
|
||||
if (!rq)
|
||||
return ERR_PTR(-EWOULDBLOCK);
|
||||
|
||||
goto out_queue_exit;
|
||||
rq->__data_len = 0;
|
||||
rq->__sector = (sector_t) -1;
|
||||
rq->bio = rq->biotail = NULL;
|
||||
return rq;
|
||||
out_queue_exit:
|
||||
blk_queue_exit(q);
|
||||
return ERR_PTR(-EWOULDBLOCK);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_alloc_request);
|
||||
|
||||
struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
|
||||
unsigned int op, blk_mq_req_flags_t flags, unsigned int hctx_idx)
|
||||
{
|
||||
struct blk_mq_alloc_data alloc_data = { .flags = flags, .cmd_flags = op };
|
||||
struct request *rq;
|
||||
struct blk_mq_alloc_data data = {
|
||||
.q = q,
|
||||
.flags = flags,
|
||||
.cmd_flags = op,
|
||||
};
|
||||
u64 alloc_time_ns = 0;
|
||||
unsigned int cpu;
|
||||
unsigned int tag;
|
||||
int ret;
|
||||
|
||||
/* alloc_time includes depth and tag waits */
|
||||
if (blk_queue_rq_alloc_time(q))
|
||||
alloc_time_ns = ktime_get_ns();
|
||||
|
||||
/*
|
||||
* If the tag allocator sleeps we could get an allocation for a
|
||||
* different hardware context. No need to complicate the low level
|
||||
* allocator for this for the rare use case of a command tied to
|
||||
* a specific queue.
|
||||
*/
|
||||
if (WARN_ON_ONCE(!(flags & BLK_MQ_REQ_NOWAIT)))
|
||||
if (WARN_ON_ONCE(!(flags & (BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED))))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
if (hctx_idx >= q->nr_hw_queues)
|
||||
@@ -451,21 +467,27 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
|
||||
* Check if the hardware context is actually mapped to anything.
|
||||
* If not tell the caller that it should skip this queue.
|
||||
*/
|
||||
alloc_data.hctx = q->queue_hw_ctx[hctx_idx];
|
||||
if (!blk_mq_hw_queue_mapped(alloc_data.hctx)) {
|
||||
blk_queue_exit(q);
|
||||
return ERR_PTR(-EXDEV);
|
||||
}
|
||||
cpu = cpumask_first_and(alloc_data.hctx->cpumask, cpu_online_mask);
|
||||
alloc_data.ctx = __blk_mq_get_ctx(q, cpu);
|
||||
ret = -EXDEV;
|
||||
data.hctx = q->queue_hw_ctx[hctx_idx];
|
||||
if (!blk_mq_hw_queue_mapped(data.hctx))
|
||||
goto out_queue_exit;
|
||||
cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask);
|
||||
data.ctx = __blk_mq_get_ctx(q, cpu);
|
||||
|
||||
rq = blk_mq_get_request(q, NULL, &alloc_data);
|
||||
if (q->elevator)
|
||||
data.flags |= BLK_MQ_REQ_INTERNAL;
|
||||
else
|
||||
blk_mq_tag_busy(data.hctx);
|
||||
|
||||
ret = -EWOULDBLOCK;
|
||||
tag = blk_mq_get_tag(&data);
|
||||
if (tag == BLK_MQ_NO_TAG)
|
||||
goto out_queue_exit;
|
||||
return blk_mq_rq_ctx_init(&data, tag, alloc_time_ns);
|
||||
|
||||
out_queue_exit:
|
||||
blk_queue_exit(q);
|
||||
|
||||
if (!rq)
|
||||
return ERR_PTR(-EWOULDBLOCK);
|
||||
|
||||
return rq;
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
|
||||
|
||||
@@ -479,9 +501,9 @@ static void __blk_mq_free_request(struct request *rq)
|
||||
blk_crypto_free_request(rq);
|
||||
blk_pm_mark_last_busy(rq);
|
||||
rq->mq_hctx = NULL;
|
||||
if (rq->tag != -1)
|
||||
if (rq->tag != BLK_MQ_NO_TAG)
|
||||
blk_mq_put_tag(hctx->tags, ctx, rq->tag);
|
||||
if (sched_tag != -1)
|
||||
if (sched_tag != BLK_MQ_NO_TAG)
|
||||
blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag);
|
||||
blk_mq_sched_restart(hctx);
|
||||
blk_queue_exit(q);
|
||||
@@ -530,7 +552,7 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
|
||||
blk_stat_add(rq, now);
|
||||
}
|
||||
|
||||
if (rq->internal_tag != -1)
|
||||
if (rq->internal_tag != BLK_MQ_NO_TAG)
|
||||
blk_mq_sched_completed_request(rq, now);
|
||||
|
||||
blk_account_io_done(rq, now);
|
||||
@@ -560,7 +582,17 @@ static void __blk_mq_complete_request_remote(void *data)
|
||||
q->mq_ops->complete(rq);
|
||||
}
|
||||
|
||||
static void __blk_mq_complete_request(struct request *rq)
|
||||
/**
|
||||
* blk_mq_force_complete_rq() - Force complete the request, bypassing any error
|
||||
* injection that could drop the completion.
|
||||
* @rq: Request to be force completed
|
||||
*
|
||||
* Drivers should use blk_mq_complete_request() to complete requests in their
|
||||
* normal IO path. For timeout error recovery, drivers may call this forced
|
||||
* completion routine after they've reclaimed timed out requests to bypass
|
||||
* potentially subsequent fake timeouts.
|
||||
*/
|
||||
void blk_mq_force_complete_rq(struct request *rq)
|
||||
{
|
||||
struct blk_mq_ctx *ctx = rq->mq_ctx;
|
||||
struct request_queue *q = rq->q;
|
||||
@@ -606,6 +638,7 @@ static void __blk_mq_complete_request(struct request *rq)
|
||||
}
|
||||
put_cpu();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_force_complete_rq);
|
||||
|
||||
static void hctx_unlock(struct blk_mq_hw_ctx *hctx, int srcu_idx)
|
||||
__releases(hctx->srcu)
|
||||
@@ -639,7 +672,7 @@ bool blk_mq_complete_request(struct request *rq)
|
||||
{
|
||||
if (unlikely(blk_should_fake_timeout(rq->q)))
|
||||
return false;
|
||||
__blk_mq_complete_request(rq);
|
||||
blk_mq_force_complete_rq(rq);
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_complete_request);
|
||||
@@ -670,15 +703,6 @@ void blk_mq_start_request(struct request *rq)
|
||||
blk_add_timer(rq);
|
||||
WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT);
|
||||
|
||||
if (q->dma_drain_size && blk_rq_bytes(rq)) {
|
||||
/*
|
||||
* Make sure space for the drain appears. We know we can do
|
||||
* this because max_hw_segments has been adjusted to be one
|
||||
* fewer than the device can handle.
|
||||
*/
|
||||
rq->nr_phys_segments++;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_INTEGRITY
|
||||
if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE)
|
||||
q->integrity.profile->prepare_fn(rq);
|
||||
@@ -698,8 +722,6 @@ static void __blk_mq_requeue_request(struct request *rq)
|
||||
if (blk_mq_request_started(rq)) {
|
||||
WRITE_ONCE(rq->state, MQ_RQ_IDLE);
|
||||
rq->rq_flags &= ~RQF_TIMED_OUT;
|
||||
if (q->dma_drain_size && blk_rq_bytes(rq))
|
||||
rq->nr_phys_segments--;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1040,7 +1062,7 @@ bool blk_mq_get_driver_tag(struct request *rq)
|
||||
};
|
||||
bool shared;
|
||||
|
||||
if (rq->tag != -1)
|
||||
if (rq->tag != BLK_MQ_NO_TAG)
|
||||
return true;
|
||||
|
||||
if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag))
|
||||
@@ -1056,7 +1078,7 @@ bool blk_mq_get_driver_tag(struct request *rq)
|
||||
data.hctx->tags->rqs[rq->tag] = rq;
|
||||
}
|
||||
|
||||
return rq->tag != -1;
|
||||
return rq->tag != BLK_MQ_NO_TAG;
|
||||
}
|
||||
|
||||
static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
|
||||
@@ -1198,6 +1220,19 @@ static void blk_mq_handle_dev_resource(struct request *rq,
|
||||
__blk_mq_requeue_request(rq);
|
||||
}
|
||||
|
||||
static void blk_mq_handle_zone_resource(struct request *rq,
|
||||
struct list_head *zone_list)
|
||||
{
|
||||
/*
|
||||
* If we end up here it is because we cannot dispatch a request to a
|
||||
* specific zone due to LLD level zone-write locking or other zone
|
||||
* related resource not being available. In this case, set the request
|
||||
* aside in zone_list for retrying it later.
|
||||
*/
|
||||
list_add(&rq->queuelist, zone_list);
|
||||
__blk_mq_requeue_request(rq);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if we did some work AND can potentially do more.
|
||||
*/
|
||||
@@ -1209,6 +1244,8 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
|
||||
bool no_tag = false;
|
||||
int errors, queued;
|
||||
blk_status_t ret = BLK_STS_OK;
|
||||
bool no_budget_avail = false;
|
||||
LIST_HEAD(zone_list);
|
||||
|
||||
if (list_empty(list))
|
||||
return false;
|
||||
@@ -1227,6 +1264,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
|
||||
hctx = rq->mq_hctx;
|
||||
if (!got_budget && !blk_mq_get_dispatch_budget(hctx)) {
|
||||
blk_mq_put_driver_tag(rq);
|
||||
no_budget_avail = true;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -1269,6 +1307,16 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
|
||||
if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
|
||||
blk_mq_handle_dev_resource(rq, list);
|
||||
break;
|
||||
} else if (ret == BLK_STS_ZONE_RESOURCE) {
|
||||
/*
|
||||
* Move the request to zone_list and keep going through
|
||||
* the dispatch list to find more requests the drive can
|
||||
* accept.
|
||||
*/
|
||||
blk_mq_handle_zone_resource(rq, &zone_list);
|
||||
if (list_empty(list))
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (unlikely(ret != BLK_STS_OK)) {
|
||||
@@ -1280,6 +1328,9 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
|
||||
queued++;
|
||||
} while (!list_empty(list));
|
||||
|
||||
if (!list_empty(&zone_list))
|
||||
list_splice_tail_init(&zone_list, list);
|
||||
|
||||
hctx->dispatched[queued_to_index(queued)]++;
|
||||
|
||||
/*
|
||||
@@ -1323,13 +1374,15 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
|
||||
*
|
||||
* If driver returns BLK_STS_RESOURCE and SCHED_RESTART
|
||||
* bit is set, run queue after a delay to avoid IO stalls
|
||||
* that could otherwise occur if the queue is idle.
|
||||
* that could otherwise occur if the queue is idle. We'll do
|
||||
* similar if we couldn't get budget and SCHED_RESTART is set.
|
||||
*/
|
||||
needs_restart = blk_mq_sched_needs_restart(hctx);
|
||||
if (!needs_restart ||
|
||||
(no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))
|
||||
blk_mq_run_hw_queue(hctx, true);
|
||||
else if (needs_restart && (ret == BLK_STS_RESOURCE))
|
||||
else if (needs_restart && (ret == BLK_STS_RESOURCE ||
|
||||
no_budget_avail))
|
||||
blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);
|
||||
|
||||
blk_mq_update_dispatch_busy(hctx, true);
|
||||
@@ -1544,6 +1597,25 @@ void blk_mq_run_hw_queues(struct request_queue *q, bool async)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_run_hw_queues);
|
||||
|
||||
/**
|
||||
* blk_mq_delay_run_hw_queues - Run all hardware queues asynchronously.
|
||||
* @q: Pointer to the request queue to run.
|
||||
* @msecs: Microseconds of delay to wait before running the queues.
|
||||
*/
|
||||
void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
int i;
|
||||
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
if (blk_mq_hctx_stopped(hctx))
|
||||
continue;
|
||||
|
||||
blk_mq_delay_run_hw_queue(hctx, msecs);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_delay_run_hw_queues);
|
||||
|
||||
/**
|
||||
* blk_mq_queue_stopped() - check whether one or more hctxs have been stopped
|
||||
* @q: request queue.
|
||||
@@ -1787,7 +1859,7 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
|
||||
blk_rq_bio_prep(rq, bio, nr_segs);
|
||||
blk_crypto_rq_bio_prep(rq, bio, GFP_NOIO);
|
||||
|
||||
blk_account_io_start(rq, true);
|
||||
blk_account_io_start(rq);
|
||||
}
|
||||
|
||||
static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
|
||||
@@ -1977,11 +2049,13 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
|
||||
*
|
||||
* Returns: Request queue cookie.
|
||||
*/
|
||||
static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
|
||||
blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
|
||||
{
|
||||
const int is_sync = op_is_sync(bio->bi_opf);
|
||||
const int is_flush_fua = op_is_flush(bio->bi_opf);
|
||||
struct blk_mq_alloc_data data = { .flags = 0};
|
||||
struct blk_mq_alloc_data data = {
|
||||
.q = q,
|
||||
};
|
||||
struct request *rq;
|
||||
struct blk_plug *plug;
|
||||
struct request *same_queue_rq = NULL;
|
||||
@@ -1993,24 +2067,24 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
|
||||
__blk_queue_split(q, &bio, &nr_segs);
|
||||
|
||||
if (!bio_integrity_prep(bio))
|
||||
return BLK_QC_T_NONE;
|
||||
goto queue_exit;
|
||||
|
||||
if (!is_flush_fua && !blk_queue_nomerges(q) &&
|
||||
blk_attempt_plug_merge(q, bio, nr_segs, &same_queue_rq))
|
||||
return BLK_QC_T_NONE;
|
||||
goto queue_exit;
|
||||
|
||||
if (blk_mq_sched_bio_merge(q, bio, nr_segs))
|
||||
return BLK_QC_T_NONE;
|
||||
goto queue_exit;
|
||||
|
||||
rq_qos_throttle(q, bio);
|
||||
|
||||
data.cmd_flags = bio->bi_opf;
|
||||
rq = blk_mq_get_request(q, bio, &data);
|
||||
rq = __blk_mq_alloc_request(&data);
|
||||
if (unlikely(!rq)) {
|
||||
rq_qos_cleanup(q, bio);
|
||||
if (bio->bi_opf & REQ_NOWAIT)
|
||||
bio_wouldblock_error(bio);
|
||||
return BLK_QC_T_NONE;
|
||||
goto queue_exit;
|
||||
}
|
||||
|
||||
trace_block_getrq(q, bio, bio->bi_opf);
|
||||
@@ -2097,7 +2171,11 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
|
||||
}
|
||||
|
||||
return cookie;
|
||||
queue_exit:
|
||||
blk_queue_exit(q);
|
||||
return BLK_QC_T_NONE;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_make_request); /* only for request based dm */
|
||||
|
||||
void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
|
||||
unsigned int hctx_idx)
|
||||
@@ -2273,6 +2351,86 @@ fail:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
struct rq_iter_data {
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
bool has_rq;
|
||||
};
|
||||
|
||||
static bool blk_mq_has_request(struct request *rq, void *data, bool reserved)
|
||||
{
|
||||
struct rq_iter_data *iter_data = data;
|
||||
|
||||
if (rq->mq_hctx != iter_data->hctx)
|
||||
return true;
|
||||
iter_data->has_rq = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool blk_mq_hctx_has_requests(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct blk_mq_tags *tags = hctx->sched_tags ?
|
||||
hctx->sched_tags : hctx->tags;
|
||||
struct rq_iter_data data = {
|
||||
.hctx = hctx,
|
||||
};
|
||||
|
||||
blk_mq_all_tag_iter(tags, blk_mq_has_request, &data);
|
||||
return data.has_rq;
|
||||
}
|
||||
|
||||
static inline bool blk_mq_last_cpu_in_hctx(unsigned int cpu,
|
||||
struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
if (cpumask_next_and(-1, hctx->cpumask, cpu_online_mask) != cpu)
|
||||
return false;
|
||||
if (cpumask_next_and(cpu, hctx->cpumask, cpu_online_mask) < nr_cpu_ids)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static int blk_mq_hctx_notify_offline(unsigned int cpu, struct hlist_node *node)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx = hlist_entry_safe(node,
|
||||
struct blk_mq_hw_ctx, cpuhp_online);
|
||||
|
||||
if (!cpumask_test_cpu(cpu, hctx->cpumask) ||
|
||||
!blk_mq_last_cpu_in_hctx(cpu, hctx))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Prevent new request from being allocated on the current hctx.
|
||||
*
|
||||
* The smp_mb__after_atomic() Pairs with the implied barrier in
|
||||
* test_and_set_bit_lock in sbitmap_get(). Ensures the inactive flag is
|
||||
* seen once we return from the tag allocator.
|
||||
*/
|
||||
set_bit(BLK_MQ_S_INACTIVE, &hctx->state);
|
||||
smp_mb__after_atomic();
|
||||
|
||||
/*
|
||||
* Try to grab a reference to the queue and wait for any outstanding
|
||||
* requests. If we could not grab a reference the queue has been
|
||||
* frozen and there are no requests.
|
||||
*/
|
||||
if (percpu_ref_tryget(&hctx->queue->q_usage_counter)) {
|
||||
while (blk_mq_hctx_has_requests(hctx))
|
||||
msleep(5);
|
||||
percpu_ref_put(&hctx->queue->q_usage_counter);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int blk_mq_hctx_notify_online(unsigned int cpu, struct hlist_node *node)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx = hlist_entry_safe(node,
|
||||
struct blk_mq_hw_ctx, cpuhp_online);
|
||||
|
||||
if (cpumask_test_cpu(cpu, hctx->cpumask))
|
||||
clear_bit(BLK_MQ_S_INACTIVE, &hctx->state);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* 'cpu' is going away. splice any existing rq_list entries from this
|
||||
* software queue to the hw queue dispatch list, and ensure that it
|
||||
@@ -2286,6 +2444,9 @@ static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
|
||||
enum hctx_type type;
|
||||
|
||||
hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_dead);
|
||||
if (!cpumask_test_cpu(cpu, hctx->cpumask))
|
||||
return 0;
|
||||
|
||||
ctx = __blk_mq_get_ctx(hctx->queue, cpu);
|
||||
type = hctx->type;
|
||||
|
||||
@@ -2309,6 +2470,9 @@ static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
|
||||
|
||||
static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
if (!(hctx->flags & BLK_MQ_F_STACKING))
|
||||
cpuhp_state_remove_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE,
|
||||
&hctx->cpuhp_online);
|
||||
cpuhp_state_remove_instance_nocalls(CPUHP_BLK_MQ_DEAD,
|
||||
&hctx->cpuhp_dead);
|
||||
}
|
||||
@@ -2368,6 +2532,9 @@ static int blk_mq_init_hctx(struct request_queue *q,
|
||||
{
|
||||
hctx->queue_num = hctx_idx;
|
||||
|
||||
if (!(hctx->flags & BLK_MQ_F_STACKING))
|
||||
cpuhp_state_add_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE,
|
||||
&hctx->cpuhp_online);
|
||||
cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);
|
||||
|
||||
hctx->tags = set->tags[hctx_idx];
|
||||
@@ -2486,7 +2653,8 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
|
||||
}
|
||||
}
|
||||
|
||||
static bool __blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, int hctx_idx)
|
||||
static bool __blk_mq_alloc_map_and_request(struct blk_mq_tag_set *set,
|
||||
int hctx_idx)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
@@ -2534,18 +2702,6 @@ static void blk_mq_map_swqueue(struct request_queue *q)
|
||||
* If the cpu isn't present, the cpu is mapped to first hctx.
|
||||
*/
|
||||
for_each_possible_cpu(i) {
|
||||
hctx_idx = set->map[HCTX_TYPE_DEFAULT].mq_map[i];
|
||||
/* unmapped hw queue can be remapped after CPU topo changed */
|
||||
if (!set->tags[hctx_idx] &&
|
||||
!__blk_mq_alloc_rq_map(set, hctx_idx)) {
|
||||
/*
|
||||
* If tags initialization fail for some hctx,
|
||||
* that hctx won't be brought online. In this
|
||||
* case, remap the current ctx to hctx[0] which
|
||||
* is guaranteed to always have tags allocated
|
||||
*/
|
||||
set->map[HCTX_TYPE_DEFAULT].mq_map[i] = 0;
|
||||
}
|
||||
|
||||
ctx = per_cpu_ptr(q->queue_ctx, i);
|
||||
for (j = 0; j < set->nr_maps; j++) {
|
||||
@@ -2554,6 +2710,18 @@ static void blk_mq_map_swqueue(struct request_queue *q)
|
||||
HCTX_TYPE_DEFAULT, i);
|
||||
continue;
|
||||
}
|
||||
hctx_idx = set->map[j].mq_map[i];
|
||||
/* unmapped hw queue can be remapped after CPU topo changed */
|
||||
if (!set->tags[hctx_idx] &&
|
||||
!__blk_mq_alloc_map_and_request(set, hctx_idx)) {
|
||||
/*
|
||||
* If tags initialization fail for some hctx,
|
||||
* that hctx won't be brought online. In this
|
||||
* case, remap the current ctx to hctx[0] which
|
||||
* is guaranteed to always have tags allocated
|
||||
*/
|
||||
set->map[j].mq_map[i] = 0;
|
||||
}
|
||||
|
||||
hctx = blk_mq_map_queue_type(q, j, i);
|
||||
ctx->hctxs[j] = hctx;
|
||||
@@ -2957,7 +3125,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||
INIT_LIST_HEAD(&q->requeue_list);
|
||||
spin_lock_init(&q->requeue_lock);
|
||||
|
||||
q->make_request_fn = blk_mq_make_request;
|
||||
q->nr_requests = set->queue_depth;
|
||||
|
||||
/*
|
||||
@@ -3001,14 +3168,14 @@ static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
|
||||
int i;
|
||||
|
||||
for (i = 0; i < set->nr_hw_queues; i++)
|
||||
if (!__blk_mq_alloc_rq_map(set, i))
|
||||
if (!__blk_mq_alloc_map_and_request(set, i))
|
||||
goto out_unwind;
|
||||
|
||||
return 0;
|
||||
|
||||
out_unwind:
|
||||
while (--i >= 0)
|
||||
blk_mq_free_rq_map(set->tags[i]);
|
||||
blk_mq_free_map_and_requests(set, i);
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
@@ -3018,7 +3185,7 @@ out_unwind:
|
||||
* may reduce the depth asked for, if memory is tight. set->queue_depth
|
||||
* will be updated to reflect the allocated depth.
|
||||
*/
|
||||
static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
|
||||
static int blk_mq_alloc_map_and_requests(struct blk_mq_tag_set *set)
|
||||
{
|
||||
unsigned int depth;
|
||||
int err;
|
||||
@@ -3178,7 +3345,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
|
||||
if (ret)
|
||||
goto out_free_mq_map;
|
||||
|
||||
ret = blk_mq_alloc_rq_maps(set);
|
||||
ret = blk_mq_alloc_map_and_requests(set);
|
||||
if (ret)
|
||||
goto out_free_mq_map;
|
||||
|
||||
@@ -3360,14 +3527,14 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
|
||||
blk_mq_sysfs_unregister(q);
|
||||
}
|
||||
|
||||
prev_nr_hw_queues = set->nr_hw_queues;
|
||||
if (blk_mq_realloc_tag_set_tags(set, set->nr_hw_queues, nr_hw_queues) <
|
||||
0)
|
||||
goto reregister;
|
||||
|
||||
prev_nr_hw_queues = set->nr_hw_queues;
|
||||
set->nr_hw_queues = nr_hw_queues;
|
||||
blk_mq_update_queue_map(set);
|
||||
fallback:
|
||||
blk_mq_update_queue_map(set);
|
||||
list_for_each_entry(q, &set->tag_list, tag_set_list) {
|
||||
blk_mq_realloc_hw_ctxs(set, q);
|
||||
if (q->nr_hw_queues != set->nr_hw_queues) {
|
||||
@@ -3622,6 +3789,9 @@ static int __init blk_mq_init(void)
|
||||
{
|
||||
cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL,
|
||||
blk_mq_hctx_notify_dead);
|
||||
cpuhp_setup_state_multi(CPUHP_AP_BLK_MQ_ONLINE, "block/mq:online",
|
||||
blk_mq_hctx_notify_online,
|
||||
blk_mq_hctx_notify_offline);
|
||||
return 0;
|
||||
}
|
||||
subsys_initcall(blk_mq_init);
|
||||
|
||||
@@ -201,7 +201,7 @@ static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
|
||||
struct request *rq)
|
||||
{
|
||||
blk_mq_put_tag(hctx->tags, rq->mq_ctx, rq->tag);
|
||||
rq->tag = -1;
|
||||
rq->tag = BLK_MQ_NO_TAG;
|
||||
|
||||
if (rq->rq_flags & RQF_MQ_INFLIGHT) {
|
||||
rq->rq_flags &= ~RQF_MQ_INFLIGHT;
|
||||
@@ -211,7 +211,7 @@ static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
|
||||
|
||||
static inline void blk_mq_put_driver_tag(struct request *rq)
|
||||
{
|
||||
if (rq->tag == -1 || rq->internal_tag == -1)
|
||||
if (rq->tag == BLK_MQ_NO_TAG || rq->internal_tag == BLK_MQ_NO_TAG)
|
||||
return;
|
||||
|
||||
__blk_mq_put_driver_tag(rq->mq_hctx, rq);
|
||||
|
||||
@@ -48,6 +48,7 @@ void blk_set_default_limits(struct queue_limits *lim)
|
||||
lim->chunk_sectors = 0;
|
||||
lim->max_write_same_sectors = 0;
|
||||
lim->max_write_zeroes_sectors = 0;
|
||||
lim->max_zone_append_sectors = 0;
|
||||
lim->max_discard_sectors = 0;
|
||||
lim->max_hw_discard_sectors = 0;
|
||||
lim->discard_granularity = 0;
|
||||
@@ -83,6 +84,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
|
||||
lim->max_dev_sectors = UINT_MAX;
|
||||
lim->max_write_same_sectors = UINT_MAX;
|
||||
lim->max_write_zeroes_sectors = UINT_MAX;
|
||||
lim->max_zone_append_sectors = UINT_MAX;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_set_stacking_limits);
|
||||
|
||||
@@ -221,6 +223,33 @@ void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_max_write_zeroes_sectors);
|
||||
|
||||
/**
|
||||
* blk_queue_max_zone_append_sectors - set max sectors for a single zone append
|
||||
* @q: the request queue for the device
|
||||
* @max_zone_append_sectors: maximum number of sectors to write per command
|
||||
**/
|
||||
void blk_queue_max_zone_append_sectors(struct request_queue *q,
|
||||
unsigned int max_zone_append_sectors)
|
||||
{
|
||||
unsigned int max_sectors;
|
||||
|
||||
if (WARN_ON(!blk_queue_is_zoned(q)))
|
||||
return;
|
||||
|
||||
max_sectors = min(q->limits.max_hw_sectors, max_zone_append_sectors);
|
||||
max_sectors = min(q->limits.chunk_sectors, max_sectors);
|
||||
|
||||
/*
|
||||
* Signal eventual driver bugs resulting in the max_zone_append sectors limit
|
||||
* being 0 due to a 0 argument, the chunk_sectors limit (zone size) not set,
|
||||
* or the max_hw_sectors limit not set.
|
||||
*/
|
||||
WARN_ON(!max_sectors);
|
||||
|
||||
q->limits.max_zone_append_sectors = max_sectors;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_queue_max_zone_append_sectors);
|
||||
|
||||
/**
|
||||
* blk_queue_max_segments - set max hw segments for a request for this queue
|
||||
* @q: the request queue for the device
|
||||
@@ -470,6 +499,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
|
||||
b->max_write_same_sectors);
|
||||
t->max_write_zeroes_sectors = min(t->max_write_zeroes_sectors,
|
||||
b->max_write_zeroes_sectors);
|
||||
t->max_zone_append_sectors = min(t->max_zone_append_sectors,
|
||||
b->max_zone_append_sectors);
|
||||
t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
|
||||
|
||||
t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
|
||||
@@ -651,43 +682,6 @@ void blk_queue_update_dma_pad(struct request_queue *q, unsigned int mask)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_update_dma_pad);
|
||||
|
||||
/**
|
||||
* blk_queue_dma_drain - Set up a drain buffer for excess dma.
|
||||
* @q: the request queue for the device
|
||||
* @dma_drain_needed: fn which returns non-zero if drain is necessary
|
||||
* @buf: physically contiguous buffer
|
||||
* @size: size of the buffer in bytes
|
||||
*
|
||||
* Some devices have excess DMA problems and can't simply discard (or
|
||||
* zero fill) the unwanted piece of the transfer. They have to have a
|
||||
* real area of memory to transfer it into. The use case for this is
|
||||
* ATAPI devices in DMA mode. If the packet command causes a transfer
|
||||
* bigger than the transfer size some HBAs will lock up if there
|
||||
* aren't DMA elements to contain the excess transfer. What this API
|
||||
* does is adjust the queue so that the buf is always appended
|
||||
* silently to the scatterlist.
|
||||
*
|
||||
* Note: This routine adjusts max_hw_segments to make room for appending
|
||||
* the drain buffer. If you call blk_queue_max_segments() after calling
|
||||
* this routine, you must set the limit to one fewer than your device
|
||||
* can support otherwise there won't be room for the drain buffer.
|
||||
*/
|
||||
int blk_queue_dma_drain(struct request_queue *q,
|
||||
dma_drain_needed_fn *dma_drain_needed,
|
||||
void *buf, unsigned int size)
|
||||
{
|
||||
if (queue_max_segments(q) < 2)
|
||||
return -EINVAL;
|
||||
/* make room for appending the drain */
|
||||
blk_queue_max_segments(q, queue_max_segments(q) - 1);
|
||||
q->dma_drain_needed = dma_drain_needed;
|
||||
q->dma_drain_buffer = buf;
|
||||
q->dma_drain_size = size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_queue_dma_drain);
|
||||
|
||||
/**
|
||||
* blk_queue_segment_boundary - set boundary rules for segment merging
|
||||
* @q: the request queue for the device
|
||||
|
||||
@@ -218,6 +218,13 @@ static ssize_t queue_write_zeroes_max_show(struct request_queue *q, char *page)
|
||||
(unsigned long long)q->limits.max_write_zeroes_sectors << 9);
|
||||
}
|
||||
|
||||
static ssize_t queue_zone_append_max_show(struct request_queue *q, char *page)
|
||||
{
|
||||
unsigned long long max_sectors = q->limits.max_zone_append_sectors;
|
||||
|
||||
return sprintf(page, "%llu\n", max_sectors << SECTOR_SHIFT);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
|
||||
{
|
||||
@@ -639,6 +646,11 @@ static struct queue_sysfs_entry queue_write_zeroes_max_entry = {
|
||||
.show = queue_write_zeroes_max_show,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_zone_append_max_entry = {
|
||||
.attr = {.name = "zone_append_max_bytes", .mode = 0444 },
|
||||
.show = queue_zone_append_max_show,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_nonrot_entry = {
|
||||
.attr = {.name = "rotational", .mode = 0644 },
|
||||
.show = queue_show_nonrot,
|
||||
@@ -749,6 +761,7 @@ static struct attribute *queue_attrs[] = {
|
||||
&queue_discard_zeroes_data_entry.attr,
|
||||
&queue_write_same_max_entry.attr,
|
||||
&queue_write_zeroes_max_entry.attr,
|
||||
&queue_zone_append_max_entry.attr,
|
||||
&queue_nonrot_entry.attr,
|
||||
&queue_zoned_entry.attr,
|
||||
&queue_nr_zones_entry.attr,
|
||||
|
||||
@@ -2358,69 +2358,6 @@ void blk_throtl_bio_endio(struct bio *bio)
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Dispatch all bios from all children tg's queued on @parent_sq. On
|
||||
* return, @parent_sq is guaranteed to not have any active children tg's
|
||||
* and all bios from previously active tg's are on @parent_sq->bio_lists[].
|
||||
*/
|
||||
static void tg_drain_bios(struct throtl_service_queue *parent_sq)
|
||||
{
|
||||
struct throtl_grp *tg;
|
||||
|
||||
while ((tg = throtl_rb_first(parent_sq))) {
|
||||
struct throtl_service_queue *sq = &tg->service_queue;
|
||||
struct bio *bio;
|
||||
|
||||
throtl_dequeue_tg(tg);
|
||||
|
||||
while ((bio = throtl_peek_queued(&sq->queued[READ])))
|
||||
tg_dispatch_one_bio(tg, bio_data_dir(bio));
|
||||
while ((bio = throtl_peek_queued(&sq->queued[WRITE])))
|
||||
tg_dispatch_one_bio(tg, bio_data_dir(bio));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_throtl_drain - drain throttled bios
|
||||
* @q: request_queue to drain throttled bios for
|
||||
*
|
||||
* Dispatch all currently throttled bios on @q through ->make_request_fn().
|
||||
*/
|
||||
void blk_throtl_drain(struct request_queue *q)
|
||||
__releases(&q->queue_lock) __acquires(&q->queue_lock)
|
||||
{
|
||||
struct throtl_data *td = q->td;
|
||||
struct blkcg_gq *blkg;
|
||||
struct cgroup_subsys_state *pos_css;
|
||||
struct bio *bio;
|
||||
int rw;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
/*
|
||||
* Drain each tg while doing post-order walk on the blkg tree, so
|
||||
* that all bios are propagated to td->service_queue. It'd be
|
||||
* better to walk service_queue tree directly but blkg walk is
|
||||
* easier.
|
||||
*/
|
||||
blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg)
|
||||
tg_drain_bios(&blkg_to_tg(blkg)->service_queue);
|
||||
|
||||
/* finally, transfer bios from top-level tg's into the td */
|
||||
tg_drain_bios(&td->service_queue);
|
||||
|
||||
rcu_read_unlock();
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
|
||||
/* all bios now should be in td->service_queue, issue them */
|
||||
for (rw = READ; rw <= WRITE; rw++)
|
||||
while ((bio = throtl_pop_queued(&td->service_queue.queued[rw],
|
||||
NULL)))
|
||||
generic_make_request(bio);
|
||||
|
||||
spin_lock_irq(&q->queue_lock);
|
||||
}
|
||||
|
||||
int blk_throtl_init(struct request_queue *q)
|
||||
{
|
||||
struct throtl_data *td;
|
||||
|
||||
@@ -405,7 +405,7 @@ static void wb_timer_fn(struct blk_stat_callback *cb)
|
||||
rwb_arm_timer(rwb);
|
||||
}
|
||||
|
||||
static void __wbt_update_limits(struct rq_wb *rwb)
|
||||
static void wbt_update_limits(struct rq_wb *rwb)
|
||||
{
|
||||
struct rq_depth *rqd = &rwb->rq_depth;
|
||||
|
||||
@@ -418,14 +418,6 @@ static void __wbt_update_limits(struct rq_wb *rwb)
|
||||
rwb_wake_all(rwb);
|
||||
}
|
||||
|
||||
void wbt_update_limits(struct request_queue *q)
|
||||
{
|
||||
struct rq_qos *rqos = wbt_rq_qos(q);
|
||||
if (!rqos)
|
||||
return;
|
||||
__wbt_update_limits(RQWB(rqos));
|
||||
}
|
||||
|
||||
u64 wbt_get_min_lat(struct request_queue *q)
|
||||
{
|
||||
struct rq_qos *rqos = wbt_rq_qos(q);
|
||||
@@ -441,7 +433,7 @@ void wbt_set_min_lat(struct request_queue *q, u64 val)
|
||||
return;
|
||||
RQWB(rqos)->min_lat_nsec = val;
|
||||
RQWB(rqos)->enable_state = WBT_STATE_ON_MANUAL;
|
||||
__wbt_update_limits(RQWB(rqos));
|
||||
wbt_update_limits(RQWB(rqos));
|
||||
}
|
||||
|
||||
|
||||
@@ -685,7 +677,7 @@ static int wbt_data_dir(const struct request *rq)
|
||||
static void wbt_queue_depth_changed(struct rq_qos *rqos)
|
||||
{
|
||||
RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->q);
|
||||
__wbt_update_limits(RQWB(rqos));
|
||||
wbt_update_limits(RQWB(rqos));
|
||||
}
|
||||
|
||||
static void wbt_exit(struct rq_qos *rqos)
|
||||
@@ -843,7 +835,7 @@ int wbt_init(struct request_queue *q)
|
||||
rwb->enable_state = WBT_STATE_ON_DEFAULT;
|
||||
rwb->wc = 1;
|
||||
rwb->rq_depth.default_depth = RWB_DEF_DEPTH;
|
||||
__wbt_update_limits(rwb);
|
||||
wbt_update_limits(rwb);
|
||||
|
||||
/*
|
||||
* Assign rwb and add the stats callback.
|
||||
|
||||
@@ -88,7 +88,6 @@ static inline unsigned int wbt_inflight(struct rq_wb *rwb)
|
||||
#ifdef CONFIG_BLK_WBT
|
||||
|
||||
int wbt_init(struct request_queue *);
|
||||
void wbt_update_limits(struct request_queue *);
|
||||
void wbt_disable_default(struct request_queue *);
|
||||
void wbt_enable_default(struct request_queue *);
|
||||
|
||||
@@ -108,9 +107,6 @@ static inline int wbt_init(struct request_queue *q)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
static inline void wbt_update_limits(struct request_queue *q)
|
||||
{
|
||||
}
|
||||
static inline void wbt_disable_default(struct request_queue *q)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -82,6 +82,20 @@ bool blk_req_needs_zone_write_lock(struct request *rq)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_req_needs_zone_write_lock);
|
||||
|
||||
bool blk_req_zone_write_trylock(struct request *rq)
|
||||
{
|
||||
unsigned int zno = blk_rq_zone_no(rq);
|
||||
|
||||
if (test_and_set_bit(zno, rq->q->seq_zones_wlock))
|
||||
return false;
|
||||
|
||||
WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED);
|
||||
rq->rq_flags |= RQF_ZONE_WRITE_LOCKED;
|
||||
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_req_zone_write_trylock);
|
||||
|
||||
void __blk_req_zone_write_lock(struct request *rq)
|
||||
{
|
||||
if (WARN_ON_ONCE(test_and_set_bit(blk_rq_zone_no(rq),
|
||||
@@ -457,14 +471,19 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
|
||||
/**
|
||||
* blk_revalidate_disk_zones - (re)allocate and initialize zone bitmaps
|
||||
* @disk: Target disk
|
||||
* @update_driver_data: Callback to update driver data on the frozen disk
|
||||
*
|
||||
* Helper function for low-level device drivers to (re) allocate and initialize
|
||||
* a disk request queue zone bitmaps. This functions should normally be called
|
||||
* within the disk ->revalidate method for blk-mq based drivers. For BIO based
|
||||
* drivers only q->nr_zones needs to be updated so that the sysfs exposed value
|
||||
* is correct.
|
||||
* If the @update_driver_data callback function is not NULL, the callback is
|
||||
* executed with the device request queue frozen after all zones have been
|
||||
* checked.
|
||||
*/
|
||||
int blk_revalidate_disk_zones(struct gendisk *disk)
|
||||
int blk_revalidate_disk_zones(struct gendisk *disk,
|
||||
void (*update_driver_data)(struct gendisk *disk))
|
||||
{
|
||||
struct request_queue *q = disk->queue;
|
||||
struct blk_revalidate_zone_args args = {
|
||||
@@ -498,6 +517,8 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
|
||||
q->nr_zones = args.nr_zones;
|
||||
swap(q->seq_zones_wlock, args.seq_zones_wlock);
|
||||
swap(q->conv_zones_bitmap, args.conv_zones_bitmap);
|
||||
if (update_driver_data)
|
||||
update_driver_data(disk);
|
||||
ret = 0;
|
||||
} else {
|
||||
pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
|
||||
|
||||
86
block/blk.h
86
block/blk.h
@@ -19,7 +19,6 @@ extern struct dentry *blk_debugfs_root;
|
||||
#endif
|
||||
|
||||
struct blk_flush_queue {
|
||||
unsigned int flush_queue_delayed:1;
|
||||
unsigned int flush_pending_idx:1;
|
||||
unsigned int flush_running_idx:1;
|
||||
blk_status_t rq_status;
|
||||
@@ -64,17 +63,6 @@ void blk_free_flush_queue(struct blk_flush_queue *q);
|
||||
|
||||
void blk_freeze_queue(struct request_queue *q);
|
||||
|
||||
static inline void blk_queue_enter_live(struct request_queue *q)
|
||||
{
|
||||
/*
|
||||
* Given that running in generic_make_request() context
|
||||
* guarantees that a live reference against q_usage_counter has
|
||||
* been established, further references under that same context
|
||||
* need not check that the queue has been frozen (marked dead).
|
||||
*/
|
||||
percpu_ref_get(&q->q_usage_counter);
|
||||
}
|
||||
|
||||
static inline bool biovec_phys_mergeable(struct request_queue *q,
|
||||
struct bio_vec *vec1, struct bio_vec *vec2)
|
||||
{
|
||||
@@ -197,8 +185,7 @@ bool bio_attempt_discard_merge(struct request_queue *q, struct request *req,
|
||||
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
|
||||
unsigned int nr_segs, struct request **same_queue_rq);
|
||||
|
||||
void blk_account_io_start(struct request *req, bool new_io);
|
||||
void blk_account_io_completion(struct request *req, unsigned int bytes);
|
||||
void blk_account_io_start(struct request *req);
|
||||
void blk_account_io_done(struct request *req, u64 now);
|
||||
|
||||
/*
|
||||
@@ -305,36 +292,14 @@ void ioc_clear_queue(struct request_queue *q);
|
||||
|
||||
int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node);
|
||||
|
||||
/**
|
||||
* create_io_context - try to create task->io_context
|
||||
* @gfp_mask: allocation mask
|
||||
* @node: allocation node
|
||||
*
|
||||
* If %current->io_context is %NULL, allocate a new io_context and install
|
||||
* it. Returns the current %current->io_context which may be %NULL if
|
||||
* allocation failed.
|
||||
*
|
||||
* Note that this function can't be called with IRQ disabled because
|
||||
* task_lock which protects %current->io_context is IRQ-unsafe.
|
||||
*/
|
||||
static inline struct io_context *create_io_context(gfp_t gfp_mask, int node)
|
||||
{
|
||||
WARN_ON_ONCE(irqs_disabled());
|
||||
if (unlikely(!current->io_context))
|
||||
create_task_io_context(current, gfp_mask, node);
|
||||
return current->io_context;
|
||||
}
|
||||
|
||||
/*
|
||||
* Internal throttling interface
|
||||
*/
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING
|
||||
extern void blk_throtl_drain(struct request_queue *q);
|
||||
extern int blk_throtl_init(struct request_queue *q);
|
||||
extern void blk_throtl_exit(struct request_queue *q);
|
||||
extern void blk_throtl_register_queue(struct request_queue *q);
|
||||
#else /* CONFIG_BLK_DEV_THROTTLING */
|
||||
static inline void blk_throtl_drain(struct request_queue *q) { }
|
||||
static inline int blk_throtl_init(struct request_queue *q) { return 0; }
|
||||
static inline void blk_throtl_exit(struct request_queue *q) { }
|
||||
static inline void blk_throtl_register_queue(struct request_queue *q) { }
|
||||
@@ -377,11 +342,6 @@ void blk_queue_free_zone_bitmaps(struct request_queue *q);
|
||||
static inline void blk_queue_free_zone_bitmaps(struct request_queue *q) {}
|
||||
#endif
|
||||
|
||||
void part_dec_in_flight(struct request_queue *q, struct hd_struct *part,
|
||||
int rw);
|
||||
void part_inc_in_flight(struct request_queue *q, struct hd_struct *part,
|
||||
int rw);
|
||||
void update_io_ticks(struct hd_struct *part, unsigned long now, bool end);
|
||||
struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector);
|
||||
|
||||
int blk_alloc_devt(struct hd_struct *part, dev_t *devt);
|
||||
@@ -391,44 +351,32 @@ char *disk_name(struct gendisk *hd, int partno, char *buf);
|
||||
#define ADDPART_FLAG_NONE 0
|
||||
#define ADDPART_FLAG_RAID 1
|
||||
#define ADDPART_FLAG_WHOLEDISK 2
|
||||
struct hd_struct *__must_check add_partition(struct gendisk *disk, int partno,
|
||||
sector_t start, sector_t len, int flags,
|
||||
struct partition_meta_info *info);
|
||||
void __delete_partition(struct percpu_ref *ref);
|
||||
void delete_partition(struct gendisk *disk, int partno);
|
||||
void delete_partition(struct gendisk *disk, struct hd_struct *part);
|
||||
int bdev_add_partition(struct block_device *bdev, int partno,
|
||||
sector_t start, sector_t length);
|
||||
int bdev_del_partition(struct block_device *bdev, int partno);
|
||||
int bdev_resize_partition(struct block_device *bdev, int partno,
|
||||
sector_t start, sector_t length);
|
||||
int disk_expand_part_tbl(struct gendisk *disk, int target);
|
||||
int hd_ref_init(struct hd_struct *part);
|
||||
|
||||
static inline int hd_ref_init(struct hd_struct *part)
|
||||
{
|
||||
if (percpu_ref_init(&part->ref, __delete_partition, 0,
|
||||
GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void hd_struct_get(struct hd_struct *part)
|
||||
{
|
||||
percpu_ref_get(&part->ref);
|
||||
}
|
||||
|
||||
/* no need to get/put refcount of part0 */
|
||||
static inline int hd_struct_try_get(struct hd_struct *part)
|
||||
{
|
||||
return percpu_ref_tryget_live(&part->ref);
|
||||
if (part->partno)
|
||||
return percpu_ref_tryget_live(&part->ref);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline void hd_struct_put(struct hd_struct *part)
|
||||
{
|
||||
percpu_ref_put(&part->ref);
|
||||
}
|
||||
|
||||
static inline void hd_struct_kill(struct hd_struct *part)
|
||||
{
|
||||
percpu_ref_kill(&part->ref);
|
||||
if (part->partno)
|
||||
percpu_ref_put(&part->ref);
|
||||
}
|
||||
|
||||
static inline void hd_free_part(struct hd_struct *part)
|
||||
{
|
||||
free_part_stats(part);
|
||||
free_percpu(part->dkstats);
|
||||
kfree(part->info);
|
||||
percpu_ref_exit(&part->ref);
|
||||
}
|
||||
@@ -486,8 +434,8 @@ static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
|
||||
|
||||
struct request_queue *__blk_alloc_queue(int node_id);
|
||||
|
||||
int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
|
||||
int bio_add_hw_page(struct request_queue *q, struct bio *bio,
|
||||
struct page *page, unsigned int len, unsigned int offset,
|
||||
bool *same_page);
|
||||
unsigned int max_sectors, bool *same_page);
|
||||
|
||||
#endif /* BLK_INTERNAL_H */
|
||||
|
||||
133
block/genhd.c
133
block/genhd.c
@@ -92,7 +92,6 @@ const char *bdevname(struct block_device *bdev, char *buf)
|
||||
}
|
||||
EXPORT_SYMBOL(bdevname);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat)
|
||||
{
|
||||
int cpu;
|
||||
@@ -112,44 +111,13 @@ static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat)
|
||||
stat->io_ticks += ptr->io_ticks;
|
||||
}
|
||||
}
|
||||
#else /* CONFIG_SMP */
|
||||
static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat)
|
||||
{
|
||||
memcpy(stat, &part->dkstats, sizeof(struct disk_stats));
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, int rw)
|
||||
{
|
||||
if (queue_is_mq(q))
|
||||
return;
|
||||
|
||||
part_stat_local_inc(part, in_flight[rw]);
|
||||
if (part->partno)
|
||||
part_stat_local_inc(&part_to_disk(part)->part0, in_flight[rw]);
|
||||
}
|
||||
|
||||
void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, int rw)
|
||||
{
|
||||
if (queue_is_mq(q))
|
||||
return;
|
||||
|
||||
part_stat_local_dec(part, in_flight[rw]);
|
||||
if (part->partno)
|
||||
part_stat_local_dec(&part_to_disk(part)->part0, in_flight[rw]);
|
||||
}
|
||||
|
||||
static unsigned int part_in_flight(struct request_queue *q,
|
||||
struct hd_struct *part)
|
||||
{
|
||||
unsigned int inflight = 0;
|
||||
int cpu;
|
||||
unsigned int inflight;
|
||||
|
||||
if (queue_is_mq(q)) {
|
||||
return blk_mq_in_flight(q, part);
|
||||
}
|
||||
|
||||
inflight = 0;
|
||||
for_each_possible_cpu(cpu) {
|
||||
inflight += part_stat_local_read_cpu(part, in_flight[0], cpu) +
|
||||
part_stat_local_read_cpu(part, in_flight[1], cpu);
|
||||
@@ -165,11 +133,6 @@ static void part_in_flight_rw(struct request_queue *q, struct hd_struct *part,
|
||||
{
|
||||
int cpu;
|
||||
|
||||
if (queue_is_mq(q)) {
|
||||
blk_mq_in_flight_rw(q, part, inflight);
|
||||
return;
|
||||
}
|
||||
|
||||
inflight[0] = 0;
|
||||
inflight[1] = 0;
|
||||
for_each_possible_cpu(cpu) {
|
||||
@@ -344,11 +307,13 @@ static inline int sector_in_part(struct hd_struct *part, sector_t sector)
|
||||
* primarily used for stats accounting.
|
||||
*
|
||||
* CONTEXT:
|
||||
* RCU read locked. The returned partition pointer is valid only
|
||||
* while preemption is disabled.
|
||||
* RCU read locked. The returned partition pointer is always valid
|
||||
* because its refcount is grabbed except for part0, which lifetime
|
||||
* is same with the disk.
|
||||
*
|
||||
* RETURNS:
|
||||
* Found partition on success, part0 is returned if no partition matches
|
||||
* or the matched partition is being deleted.
|
||||
*/
|
||||
struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
|
||||
{
|
||||
@@ -356,21 +321,33 @@ struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
|
||||
struct hd_struct *part;
|
||||
int i;
|
||||
|
||||
rcu_read_lock();
|
||||
ptbl = rcu_dereference(disk->part_tbl);
|
||||
|
||||
part = rcu_dereference(ptbl->last_lookup);
|
||||
if (part && sector_in_part(part, sector))
|
||||
return part;
|
||||
if (part && sector_in_part(part, sector) && hd_struct_try_get(part))
|
||||
goto out_unlock;
|
||||
|
||||
for (i = 1; i < ptbl->len; i++) {
|
||||
part = rcu_dereference(ptbl->part[i]);
|
||||
|
||||
if (part && sector_in_part(part, sector)) {
|
||||
/*
|
||||
* only live partition can be cached for lookup,
|
||||
* so use-after-free on cached & deleting partition
|
||||
* can be avoided
|
||||
*/
|
||||
if (!hd_struct_try_get(part))
|
||||
break;
|
||||
rcu_assign_pointer(ptbl->last_lookup, part);
|
||||
return part;
|
||||
goto out_unlock;
|
||||
}
|
||||
}
|
||||
return &disk->part0;
|
||||
|
||||
part = &disk->part0;
|
||||
out_unlock:
|
||||
rcu_read_unlock();
|
||||
return part;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -840,13 +817,15 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
|
||||
disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
|
||||
disk->flags |= GENHD_FL_NO_PART_SCAN;
|
||||
} else {
|
||||
struct backing_dev_info *bdi = disk->queue->backing_dev_info;
|
||||
struct device *dev = disk_to_dev(disk);
|
||||
int ret;
|
||||
|
||||
/* Register BDI before referencing it from bdev */
|
||||
disk_to_dev(disk)->devt = devt;
|
||||
ret = bdi_register_owner(disk->queue->backing_dev_info,
|
||||
disk_to_dev(disk));
|
||||
dev->devt = devt;
|
||||
ret = bdi_register(bdi, "%u:%u", MAJOR(devt), MINOR(devt));
|
||||
WARN_ON(ret);
|
||||
bdi_set_owner(bdi, dev);
|
||||
blk_register_region(disk_devt(disk), disk->minors, NULL,
|
||||
exact_match, exact_lock, disk);
|
||||
}
|
||||
@@ -878,6 +857,25 @@ void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk)
|
||||
}
|
||||
EXPORT_SYMBOL(device_add_disk_no_queue_reg);
|
||||
|
||||
static void invalidate_partition(struct gendisk *disk, int partno)
|
||||
{
|
||||
struct block_device *bdev;
|
||||
|
||||
bdev = bdget_disk(disk, partno);
|
||||
if (!bdev)
|
||||
return;
|
||||
|
||||
fsync_bdev(bdev);
|
||||
__invalidate_device(bdev, true);
|
||||
|
||||
/*
|
||||
* Unhash the bdev inode for this device so that it gets evicted as soon
|
||||
* as last inode reference is dropped.
|
||||
*/
|
||||
remove_inode_hash(bdev->bd_inode);
|
||||
bdput(bdev);
|
||||
}
|
||||
|
||||
void del_gendisk(struct gendisk *disk)
|
||||
{
|
||||
struct disk_part_iter piter;
|
||||
@@ -896,13 +894,11 @@ void del_gendisk(struct gendisk *disk)
|
||||
DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
|
||||
while ((part = disk_part_iter_next(&piter))) {
|
||||
invalidate_partition(disk, part->partno);
|
||||
bdev_unhash_inode(part_devt(part));
|
||||
delete_partition(disk, part->partno);
|
||||
delete_partition(disk, part);
|
||||
}
|
||||
disk_part_iter_exit(&piter);
|
||||
|
||||
invalidate_partition(disk, 0);
|
||||
bdev_unhash_inode(disk_devt(disk));
|
||||
set_capacity(disk, 0);
|
||||
disk->flags &= ~GENHD_FL_UP;
|
||||
up_write(&disk->lookup_sem);
|
||||
@@ -1279,7 +1275,10 @@ ssize_t part_stat_show(struct device *dev,
|
||||
unsigned int inflight;
|
||||
|
||||
part_stat_read_all(p, &stat);
|
||||
inflight = part_in_flight(q, p);
|
||||
if (queue_is_mq(q))
|
||||
inflight = blk_mq_in_flight(q, p);
|
||||
else
|
||||
inflight = part_in_flight(q, p);
|
||||
|
||||
return sprintf(buf,
|
||||
"%8lu %8lu %8llu %8u "
|
||||
@@ -1318,7 +1317,11 @@ ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
|
||||
struct request_queue *q = part_to_disk(p)->queue;
|
||||
unsigned int inflight[2];
|
||||
|
||||
part_in_flight_rw(q, p, inflight);
|
||||
if (queue_is_mq(q))
|
||||
blk_mq_in_flight_rw(q, p, inflight);
|
||||
else
|
||||
part_in_flight_rw(q, p, inflight);
|
||||
|
||||
return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]);
|
||||
}
|
||||
|
||||
@@ -1573,7 +1576,10 @@ static int diskstats_show(struct seq_file *seqf, void *v)
|
||||
disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
|
||||
while ((hd = disk_part_iter_next(&piter))) {
|
||||
part_stat_read_all(hd, &stat);
|
||||
inflight = part_in_flight(gp->queue, hd);
|
||||
if (queue_is_mq(gp->queue))
|
||||
inflight = blk_mq_in_flight(gp->queue, hd);
|
||||
else
|
||||
inflight = part_in_flight(gp->queue, hd);
|
||||
|
||||
seq_printf(seqf, "%4d %7d %s "
|
||||
"%lu %lu %lu %u "
|
||||
@@ -1680,14 +1686,15 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
|
||||
|
||||
disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id);
|
||||
if (disk) {
|
||||
if (!init_part_stats(&disk->part0)) {
|
||||
disk->part0.dkstats = alloc_percpu(struct disk_stats);
|
||||
if (!disk->part0.dkstats) {
|
||||
kfree(disk);
|
||||
return NULL;
|
||||
}
|
||||
init_rwsem(&disk->lookup_sem);
|
||||
disk->node_id = node_id;
|
||||
if (disk_expand_part_tbl(disk, 0)) {
|
||||
free_part_stats(&disk->part0);
|
||||
free_percpu(disk->part0.dkstats);
|
||||
kfree(disk);
|
||||
return NULL;
|
||||
}
|
||||
@@ -1703,7 +1710,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
|
||||
* TODO: Ideally set_capacity() and get_capacity() should be
|
||||
* converted to make use of bd_mutex and sequence counters.
|
||||
*/
|
||||
seqcount_init(&disk->part0.nr_sects_seq);
|
||||
hd_sects_seq_init(&disk->part0);
|
||||
if (hd_ref_init(&disk->part0)) {
|
||||
hd_free_part(&disk->part0);
|
||||
kfree(disk);
|
||||
@@ -1806,20 +1813,6 @@ int bdev_read_only(struct block_device *bdev)
|
||||
|
||||
EXPORT_SYMBOL(bdev_read_only);
|
||||
|
||||
int invalidate_partition(struct gendisk *disk, int partno)
|
||||
{
|
||||
int res = 0;
|
||||
struct block_device *bdev = bdget_disk(disk, partno);
|
||||
if (bdev) {
|
||||
fsync_bdev(bdev);
|
||||
res = __invalidate_device(bdev, true);
|
||||
bdput(bdev);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(invalidate_partition);
|
||||
|
||||
/*
|
||||
* Disk events - monitor disk events like media change and eject request.
|
||||
*/
|
||||
|
||||
148
block/ioctl.c
148
block/ioctl.c
@@ -16,143 +16,45 @@
|
||||
static int blkpg_do_ioctl(struct block_device *bdev,
|
||||
struct blkpg_partition __user *upart, int op)
|
||||
{
|
||||
struct block_device *bdevp;
|
||||
struct gendisk *disk;
|
||||
struct hd_struct *part, *lpart;
|
||||
struct blkpg_partition p;
|
||||
struct disk_part_iter piter;
|
||||
long long start, length;
|
||||
int partno;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
if (copy_from_user(&p, upart, sizeof(struct blkpg_partition)))
|
||||
return -EFAULT;
|
||||
disk = bdev->bd_disk;
|
||||
if (bdev != bdev->bd_contains)
|
||||
return -EINVAL;
|
||||
partno = p.pno;
|
||||
if (partno <= 0)
|
||||
|
||||
if (p.pno <= 0)
|
||||
return -EINVAL;
|
||||
switch (op) {
|
||||
case BLKPG_ADD_PARTITION:
|
||||
start = p.start >> 9;
|
||||
length = p.length >> 9;
|
||||
/* check for fit in a hd_struct */
|
||||
if (sizeof(sector_t) == sizeof(long) &&
|
||||
sizeof(long long) > sizeof(long)) {
|
||||
long pstart = start, plength = length;
|
||||
if (pstart != start || plength != length
|
||||
|| pstart < 0 || plength < 0 || partno > 65535)
|
||||
return -EINVAL;
|
||||
}
|
||||
/* check if partition is aligned to blocksize */
|
||||
if (p.start & (bdev_logical_block_size(bdev) - 1))
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&bdev->bd_mutex);
|
||||
if (op == BLKPG_DEL_PARTITION)
|
||||
return bdev_del_partition(bdev, p.pno);
|
||||
|
||||
/* overlap? */
|
||||
disk_part_iter_init(&piter, disk,
|
||||
DISK_PITER_INCL_EMPTY);
|
||||
while ((part = disk_part_iter_next(&piter))) {
|
||||
if (!(start + length <= part->start_sect ||
|
||||
start >= part->start_sect + part->nr_sects)) {
|
||||
disk_part_iter_exit(&piter);
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
return -EBUSY;
|
||||
}
|
||||
}
|
||||
disk_part_iter_exit(&piter);
|
||||
start = p.start >> SECTOR_SHIFT;
|
||||
length = p.length >> SECTOR_SHIFT;
|
||||
|
||||
/* all seems OK */
|
||||
part = add_partition(disk, partno, start, length,
|
||||
ADDPART_FLAG_NONE, NULL);
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
return PTR_ERR_OR_ZERO(part);
|
||||
case BLKPG_DEL_PARTITION:
|
||||
part = disk_get_part(disk, partno);
|
||||
if (!part)
|
||||
return -ENXIO;
|
||||
/* check for fit in a hd_struct */
|
||||
if (sizeof(sector_t) < sizeof(long long)) {
|
||||
long pstart = start, plength = length;
|
||||
|
||||
bdevp = bdget(part_devt(part));
|
||||
disk_put_part(part);
|
||||
if (!bdevp)
|
||||
return -ENOMEM;
|
||||
|
||||
mutex_lock(&bdevp->bd_mutex);
|
||||
if (bdevp->bd_openers) {
|
||||
mutex_unlock(&bdevp->bd_mutex);
|
||||
bdput(bdevp);
|
||||
return -EBUSY;
|
||||
}
|
||||
/* all seems OK */
|
||||
fsync_bdev(bdevp);
|
||||
invalidate_bdev(bdevp);
|
||||
|
||||
mutex_lock_nested(&bdev->bd_mutex, 1);
|
||||
delete_partition(disk, partno);
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
mutex_unlock(&bdevp->bd_mutex);
|
||||
bdput(bdevp);
|
||||
|
||||
return 0;
|
||||
case BLKPG_RESIZE_PARTITION:
|
||||
start = p.start >> 9;
|
||||
/* new length of partition in bytes */
|
||||
length = p.length >> 9;
|
||||
/* check for fit in a hd_struct */
|
||||
if (sizeof(sector_t) == sizeof(long) &&
|
||||
sizeof(long long) > sizeof(long)) {
|
||||
long pstart = start, plength = length;
|
||||
if (pstart != start || plength != length
|
||||
|| pstart < 0 || plength < 0)
|
||||
return -EINVAL;
|
||||
}
|
||||
part = disk_get_part(disk, partno);
|
||||
if (!part)
|
||||
return -ENXIO;
|
||||
bdevp = bdget(part_devt(part));
|
||||
if (!bdevp) {
|
||||
disk_put_part(part);
|
||||
return -ENOMEM;
|
||||
}
|
||||
mutex_lock(&bdevp->bd_mutex);
|
||||
mutex_lock_nested(&bdev->bd_mutex, 1);
|
||||
if (start != part->start_sect) {
|
||||
mutex_unlock(&bdevp->bd_mutex);
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
bdput(bdevp);
|
||||
disk_put_part(part);
|
||||
return -EINVAL;
|
||||
}
|
||||
/* overlap? */
|
||||
disk_part_iter_init(&piter, disk,
|
||||
DISK_PITER_INCL_EMPTY);
|
||||
while ((lpart = disk_part_iter_next(&piter))) {
|
||||
if (lpart->partno != partno &&
|
||||
!(start + length <= lpart->start_sect ||
|
||||
start >= lpart->start_sect + lpart->nr_sects)
|
||||
) {
|
||||
disk_part_iter_exit(&piter);
|
||||
mutex_unlock(&bdevp->bd_mutex);
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
bdput(bdevp);
|
||||
disk_put_part(part);
|
||||
return -EBUSY;
|
||||
}
|
||||
}
|
||||
disk_part_iter_exit(&piter);
|
||||
part_nr_sects_write(part, (sector_t)length);
|
||||
i_size_write(bdevp->bd_inode, p.length);
|
||||
mutex_unlock(&bdevp->bd_mutex);
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
bdput(bdevp);
|
||||
disk_put_part(part);
|
||||
return 0;
|
||||
default:
|
||||
if (pstart != start || plength != length || pstart < 0 ||
|
||||
plength < 0 || p.pno > 65535)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
switch (op) {
|
||||
case BLKPG_ADD_PARTITION:
|
||||
/* check if partition is aligned to blocksize */
|
||||
if (p.start & (bdev_logical_block_size(bdev) - 1))
|
||||
return -EINVAL;
|
||||
return bdev_add_partition(bdev, p.pno, start, length);
|
||||
case BLKPG_RESIZE_PARTITION:
|
||||
return bdev_resize_partition(bdev, p.pno, start, length);
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
static int blkpg_ioctl(struct block_device *bdev,
|
||||
@@ -302,12 +204,12 @@ static int put_u64(u64 __user *argp, u64 val)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
static int compat_put_long(compat_long_t *argp, long val)
|
||||
static int compat_put_long(compat_long_t __user *argp, long val)
|
||||
{
|
||||
return put_user(val, argp);
|
||||
}
|
||||
|
||||
static int compat_put_ulong(compat_ulong_t *argp, compat_ulong_t val)
|
||||
static int compat_put_ulong(compat_ulong_t __user *argp, compat_ulong_t val)
|
||||
{
|
||||
return put_user(val, argp);
|
||||
}
|
||||
|
||||
@@ -579,7 +579,7 @@ static bool kyber_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio,
|
||||
return merged;
|
||||
}
|
||||
|
||||
static void kyber_prepare_request(struct request *rq, struct bio *bio)
|
||||
static void kyber_prepare_request(struct request *rq)
|
||||
{
|
||||
rq_set_domain_token(rq, -1);
|
||||
}
|
||||
|
||||
@@ -541,7 +541,7 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
|
||||
* Nothing to do here. This is defined only to ensure that .finish_request
|
||||
* method is called upon request completion.
|
||||
*/
|
||||
static void dd_prepare_request(struct request *rq, struct bio *bio)
|
||||
static void dd_prepare_request(struct request *rq)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
@@ -274,10 +274,10 @@ struct device_type part_type = {
|
||||
.uevent = part_uevent,
|
||||
};
|
||||
|
||||
static void delete_partition_work_fn(struct work_struct *work)
|
||||
static void hd_struct_free_work(struct work_struct *work)
|
||||
{
|
||||
struct hd_struct *part = container_of(to_rcu_work(work), struct hd_struct,
|
||||
rcu_work);
|
||||
struct hd_struct *part =
|
||||
container_of(to_rcu_work(work), struct hd_struct, rcu_work);
|
||||
|
||||
part->start_sect = 0;
|
||||
part->nr_sects = 0;
|
||||
@@ -285,32 +285,42 @@ static void delete_partition_work_fn(struct work_struct *work)
|
||||
put_device(part_to_dev(part));
|
||||
}
|
||||
|
||||
void __delete_partition(struct percpu_ref *ref)
|
||||
static void hd_struct_free(struct percpu_ref *ref)
|
||||
{
|
||||
struct hd_struct *part = container_of(ref, struct hd_struct, ref);
|
||||
INIT_RCU_WORK(&part->rcu_work, delete_partition_work_fn);
|
||||
struct gendisk *disk = part_to_disk(part);
|
||||
struct disk_part_tbl *ptbl =
|
||||
rcu_dereference_protected(disk->part_tbl, 1);
|
||||
|
||||
rcu_assign_pointer(ptbl->last_lookup, NULL);
|
||||
put_device(disk_to_dev(disk));
|
||||
|
||||
INIT_RCU_WORK(&part->rcu_work, hd_struct_free_work);
|
||||
queue_rcu_work(system_wq, &part->rcu_work);
|
||||
}
|
||||
|
||||
int hd_ref_init(struct hd_struct *part)
|
||||
{
|
||||
if (percpu_ref_init(&part->ref, hd_struct_free, 0, GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Must be called either with bd_mutex held, before a disk can be opened or
|
||||
* after all disk users are gone.
|
||||
*/
|
||||
void delete_partition(struct gendisk *disk, int partno)
|
||||
void delete_partition(struct gendisk *disk, struct hd_struct *part)
|
||||
{
|
||||
struct disk_part_tbl *ptbl =
|
||||
rcu_dereference_protected(disk->part_tbl, 1);
|
||||
struct hd_struct *part;
|
||||
|
||||
if (partno >= ptbl->len)
|
||||
return;
|
||||
|
||||
part = rcu_dereference_protected(ptbl->part[partno], 1);
|
||||
if (!part)
|
||||
return;
|
||||
|
||||
rcu_assign_pointer(ptbl->part[partno], NULL);
|
||||
rcu_assign_pointer(ptbl->last_lookup, NULL);
|
||||
/*
|
||||
* ->part_tbl is referenced in this part's release handler, so
|
||||
* we have to hold the disk device
|
||||
*/
|
||||
get_device(disk_to_dev(part_to_disk(part)));
|
||||
rcu_assign_pointer(ptbl->part[part->partno], NULL);
|
||||
kobject_put(part->holder_dir);
|
||||
device_del(part_to_dev(part));
|
||||
|
||||
@@ -321,7 +331,7 @@ void delete_partition(struct gendisk *disk, int partno)
|
||||
* "in-use" until we really free the gendisk.
|
||||
*/
|
||||
blk_invalidate_devt(part_devt(part));
|
||||
hd_struct_kill(part);
|
||||
percpu_ref_kill(&part->ref);
|
||||
}
|
||||
|
||||
static ssize_t whole_disk_show(struct device *dev,
|
||||
@@ -335,7 +345,7 @@ static DEVICE_ATTR(whole_disk, 0444, whole_disk_show, NULL);
|
||||
* Must be called either with bd_mutex held, before a disk can be opened or
|
||||
* after all disk users are gone.
|
||||
*/
|
||||
struct hd_struct *add_partition(struct gendisk *disk, int partno,
|
||||
static struct hd_struct *add_partition(struct gendisk *disk, int partno,
|
||||
sector_t start, sector_t len, int flags,
|
||||
struct partition_meta_info *info)
|
||||
{
|
||||
@@ -377,12 +387,13 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
|
||||
if (!p)
|
||||
return ERR_PTR(-EBUSY);
|
||||
|
||||
if (!init_part_stats(p)) {
|
||||
p->dkstats = alloc_percpu(struct disk_stats);
|
||||
if (!p->dkstats) {
|
||||
err = -ENOMEM;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
seqcount_init(&p->nr_sects_seq);
|
||||
hd_sects_seq_init(p);
|
||||
pdev = part_to_dev(p);
|
||||
|
||||
p->start_sect = start;
|
||||
@@ -458,7 +469,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
|
||||
out_free_info:
|
||||
kfree(p->info);
|
||||
out_free_stats:
|
||||
free_part_stats(p);
|
||||
free_percpu(p->dkstats);
|
||||
out_free:
|
||||
kfree(p);
|
||||
return ERR_PTR(err);
|
||||
@@ -472,6 +483,121 @@ out_put:
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static bool partition_overlaps(struct gendisk *disk, sector_t start,
|
||||
sector_t length, int skip_partno)
|
||||
{
|
||||
struct disk_part_iter piter;
|
||||
struct hd_struct *part;
|
||||
bool overlap = false;
|
||||
|
||||
disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
|
||||
while ((part = disk_part_iter_next(&piter))) {
|
||||
if (part->partno == skip_partno ||
|
||||
start >= part->start_sect + part->nr_sects ||
|
||||
start + length <= part->start_sect)
|
||||
continue;
|
||||
overlap = true;
|
||||
break;
|
||||
}
|
||||
|
||||
disk_part_iter_exit(&piter);
|
||||
return overlap;
|
||||
}
|
||||
|
||||
int bdev_add_partition(struct block_device *bdev, int partno,
|
||||
sector_t start, sector_t length)
|
||||
{
|
||||
struct hd_struct *part;
|
||||
|
||||
mutex_lock(&bdev->bd_mutex);
|
||||
if (partition_overlaps(bdev->bd_disk, start, length, -1)) {
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
part = add_partition(bdev->bd_disk, partno, start, length,
|
||||
ADDPART_FLAG_NONE, NULL);
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
return PTR_ERR_OR_ZERO(part);
|
||||
}
|
||||
|
||||
int bdev_del_partition(struct block_device *bdev, int partno)
|
||||
{
|
||||
struct block_device *bdevp;
|
||||
struct hd_struct *part;
|
||||
int ret = 0;
|
||||
|
||||
part = disk_get_part(bdev->bd_disk, partno);
|
||||
if (!part)
|
||||
return -ENXIO;
|
||||
|
||||
ret = -ENOMEM;
|
||||
bdevp = bdget(part_devt(part));
|
||||
if (!bdevp)
|
||||
goto out_put_part;
|
||||
|
||||
mutex_lock(&bdevp->bd_mutex);
|
||||
|
||||
ret = -EBUSY;
|
||||
if (bdevp->bd_openers)
|
||||
goto out_unlock;
|
||||
|
||||
sync_blockdev(bdevp);
|
||||
invalidate_bdev(bdevp);
|
||||
|
||||
mutex_lock_nested(&bdev->bd_mutex, 1);
|
||||
delete_partition(bdev->bd_disk, part);
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
|
||||
ret = 0;
|
||||
out_unlock:
|
||||
mutex_unlock(&bdevp->bd_mutex);
|
||||
bdput(bdevp);
|
||||
out_put_part:
|
||||
disk_put_part(part);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bdev_resize_partition(struct block_device *bdev, int partno,
|
||||
sector_t start, sector_t length)
|
||||
{
|
||||
struct block_device *bdevp;
|
||||
struct hd_struct *part;
|
||||
int ret = 0;
|
||||
|
||||
part = disk_get_part(bdev->bd_disk, partno);
|
||||
if (!part)
|
||||
return -ENXIO;
|
||||
|
||||
ret = -ENOMEM;
|
||||
bdevp = bdget(part_devt(part));
|
||||
if (!bdevp)
|
||||
goto out_put_part;
|
||||
|
||||
mutex_lock(&bdevp->bd_mutex);
|
||||
mutex_lock_nested(&bdev->bd_mutex, 1);
|
||||
|
||||
ret = -EINVAL;
|
||||
if (start != part->start_sect)
|
||||
goto out_unlock;
|
||||
|
||||
ret = -EBUSY;
|
||||
if (partition_overlaps(bdev->bd_disk, start, length, partno))
|
||||
goto out_unlock;
|
||||
|
||||
part_nr_sects_write(part, (sector_t)length);
|
||||
i_size_write(bdevp->bd_inode, length << SECTOR_SHIFT);
|
||||
|
||||
ret = 0;
|
||||
out_unlock:
|
||||
mutex_unlock(&bdevp->bd_mutex);
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
bdput(bdevp);
|
||||
out_put_part:
|
||||
disk_put_part(part);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool disk_unlock_native_capacity(struct gendisk *disk)
|
||||
{
|
||||
const struct block_device_operations *bdops = disk->fops;
|
||||
@@ -488,27 +614,30 @@ static bool disk_unlock_native_capacity(struct gendisk *disk)
|
||||
}
|
||||
}
|
||||
|
||||
int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev)
|
||||
int blk_drop_partitions(struct block_device *bdev)
|
||||
{
|
||||
struct disk_part_iter piter;
|
||||
struct hd_struct *part;
|
||||
int res;
|
||||
|
||||
if (!disk_part_scan_enabled(disk))
|
||||
if (!disk_part_scan_enabled(bdev->bd_disk))
|
||||
return 0;
|
||||
if (bdev->bd_part_count)
|
||||
return -EBUSY;
|
||||
res = invalidate_partition(disk, 0);
|
||||
if (res)
|
||||
return res;
|
||||
|
||||
disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
|
||||
sync_blockdev(bdev);
|
||||
invalidate_bdev(bdev);
|
||||
|
||||
disk_part_iter_init(&piter, bdev->bd_disk, DISK_PITER_INCL_EMPTY);
|
||||
while ((part = disk_part_iter_next(&piter)))
|
||||
delete_partition(disk, part->partno);
|
||||
delete_partition(bdev->bd_disk, part);
|
||||
disk_part_iter_exit(&piter);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#ifdef CONFIG_S390
|
||||
/* for historic reasons in the DASD driver */
|
||||
EXPORT_SYMBOL_GPL(blk_drop_partitions);
|
||||
#endif
|
||||
|
||||
static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev,
|
||||
struct parsed_partitions *state, int p)
|
||||
|
||||
@@ -649,7 +649,7 @@ static void ata_qc_set_pc_nbytes(struct ata_queued_cmd *qc)
|
||||
{
|
||||
struct scsi_cmnd *scmd = qc->scsicmd;
|
||||
|
||||
qc->extrabytes = scmd->request->extra_len;
|
||||
qc->extrabytes = scmd->extra_len;
|
||||
qc->nbytes = scsi_bufflen(scmd) + qc->extrabytes;
|
||||
}
|
||||
|
||||
@@ -1017,16 +1017,11 @@ void ata_scsi_sdev_config(struct scsi_device *sdev)
|
||||
* RETURNS:
|
||||
* 1 if ; otherwise, 0.
|
||||
*/
|
||||
static int atapi_drain_needed(struct request *rq)
|
||||
bool ata_scsi_dma_need_drain(struct request *rq)
|
||||
{
|
||||
if (likely(!blk_rq_is_passthrough(rq)))
|
||||
return 0;
|
||||
|
||||
if (!blk_rq_bytes(rq) || op_is_write(req_op(rq)))
|
||||
return 0;
|
||||
|
||||
return atapi_cmd_type(scsi_req(rq)->cmd[0]) == ATAPI_MISC;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ata_scsi_dma_need_drain);
|
||||
|
||||
int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev)
|
||||
{
|
||||
@@ -1039,21 +1034,21 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev)
|
||||
blk_queue_max_hw_sectors(q, dev->max_sectors);
|
||||
|
||||
if (dev->class == ATA_DEV_ATAPI) {
|
||||
void *buf;
|
||||
|
||||
sdev->sector_size = ATA_SECT_SIZE;
|
||||
|
||||
/* set DMA padding */
|
||||
blk_queue_update_dma_pad(q, ATA_DMA_PAD_SZ - 1);
|
||||
|
||||
/* configure draining */
|
||||
buf = kmalloc(ATAPI_MAX_DRAIN, q->bounce_gfp | GFP_KERNEL);
|
||||
if (!buf) {
|
||||
/* make room for appending the drain */
|
||||
blk_queue_max_segments(q, queue_max_segments(q) - 1);
|
||||
|
||||
sdev->dma_drain_len = ATAPI_MAX_DRAIN;
|
||||
sdev->dma_drain_buf = kmalloc(sdev->dma_drain_len,
|
||||
q->bounce_gfp | GFP_KERNEL);
|
||||
if (!sdev->dma_drain_buf) {
|
||||
ata_dev_err(dev, "drain buffer allocation failed\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
blk_queue_dma_drain(q, atapi_drain_needed, buf, ATAPI_MAX_DRAIN);
|
||||
} else {
|
||||
sdev->sector_size = ata_id_logical_sector_size(dev->id);
|
||||
sdev->manage_start_stop = 1;
|
||||
@@ -1135,7 +1130,6 @@ EXPORT_SYMBOL_GPL(ata_scsi_slave_config);
|
||||
void ata_scsi_slave_destroy(struct scsi_device *sdev)
|
||||
{
|
||||
struct ata_port *ap = ata_shost_to_port(sdev->host);
|
||||
struct request_queue *q = sdev->request_queue;
|
||||
unsigned long flags;
|
||||
struct ata_device *dev;
|
||||
|
||||
@@ -1152,9 +1146,7 @@ void ata_scsi_slave_destroy(struct scsi_device *sdev)
|
||||
}
|
||||
spin_unlock_irqrestore(ap->lock, flags);
|
||||
|
||||
kfree(q->dma_drain_buffer);
|
||||
q->dma_drain_buffer = NULL;
|
||||
q->dma_drain_size = 0;
|
||||
kfree(sdev->dma_drain_buf);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ata_scsi_slave_destroy);
|
||||
|
||||
|
||||
@@ -3212,40 +3212,6 @@ error:
|
||||
return ERR_PTR(retval);
|
||||
}
|
||||
|
||||
/**
|
||||
* device_create_vargs - creates a device and registers it with sysfs
|
||||
* @class: pointer to the struct class that this device should be registered to
|
||||
* @parent: pointer to the parent struct device of this new device, if any
|
||||
* @devt: the dev_t for the char device to be added
|
||||
* @drvdata: the data to be added to the device for callbacks
|
||||
* @fmt: string for the device's name
|
||||
* @args: va_list for the device's name
|
||||
*
|
||||
* This function can be used by char device classes. A struct device
|
||||
* will be created in sysfs, registered to the specified class.
|
||||
*
|
||||
* A "dev" file will be created, showing the dev_t for the device, if
|
||||
* the dev_t is not 0,0.
|
||||
* If a pointer to a parent struct device is passed in, the newly created
|
||||
* struct device will be a child of that device in sysfs.
|
||||
* The pointer to the struct device will be returned from the call.
|
||||
* Any further sysfs files that might be required can be created using this
|
||||
* pointer.
|
||||
*
|
||||
* Returns &struct device pointer on success, or ERR_PTR() on error.
|
||||
*
|
||||
* Note: the struct class passed to this function must have previously
|
||||
* been created with a call to class_create().
|
||||
*/
|
||||
struct device *device_create_vargs(struct class *class, struct device *parent,
|
||||
dev_t devt, void *drvdata, const char *fmt,
|
||||
va_list args)
|
||||
{
|
||||
return device_create_groups_vargs(class, parent, devt, drvdata, NULL,
|
||||
fmt, args);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(device_create_vargs);
|
||||
|
||||
/**
|
||||
* device_create - creates a device and registers it with sysfs
|
||||
* @class: pointer to the struct class that this device should be registered to
|
||||
@@ -3277,7 +3243,8 @@ struct device *device_create(struct class *class, struct device *parent,
|
||||
struct device *dev;
|
||||
|
||||
va_start(vargs, fmt);
|
||||
dev = device_create_vargs(class, parent, devt, drvdata, fmt, vargs);
|
||||
dev = device_create_groups_vargs(class, parent, devt, drvdata, NULL,
|
||||
fmt, vargs);
|
||||
va_end(vargs);
|
||||
return dev;
|
||||
}
|
||||
|
||||
@@ -407,7 +407,6 @@ aoeblk_gdalloc(void *vp)
|
||||
WARN_ON(d->gd);
|
||||
WARN_ON(d->flags & DEVFL_UP);
|
||||
blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS);
|
||||
q->backing_dev_info->name = "aoe";
|
||||
q->backing_dev_info->ra_pages = READ_AHEAD / PAGE_SIZE;
|
||||
d->bufpool = mp;
|
||||
d->blkq = gd->queue = q;
|
||||
|
||||
@@ -21,24 +21,6 @@
|
||||
|
||||
static bool drbd_may_do_local_read(struct drbd_device *device, sector_t sector, int size);
|
||||
|
||||
/* Update disk stats at start of I/O request */
|
||||
static void _drbd_start_io_acct(struct drbd_device *device, struct drbd_request *req)
|
||||
{
|
||||
struct request_queue *q = device->rq_queue;
|
||||
|
||||
generic_start_io_acct(q, bio_op(req->master_bio),
|
||||
req->i.size >> 9, &device->vdisk->part0);
|
||||
}
|
||||
|
||||
/* Update disk stats when completing request upwards */
|
||||
static void _drbd_end_io_acct(struct drbd_device *device, struct drbd_request *req)
|
||||
{
|
||||
struct request_queue *q = device->rq_queue;
|
||||
|
||||
generic_end_io_acct(q, bio_op(req->master_bio),
|
||||
&device->vdisk->part0, req->start_jif);
|
||||
}
|
||||
|
||||
static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio *bio_src)
|
||||
{
|
||||
struct drbd_request *req;
|
||||
@@ -263,7 +245,7 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
|
||||
start_new_tl_epoch(first_peer_device(device)->connection);
|
||||
|
||||
/* Update disk stats */
|
||||
_drbd_end_io_acct(device, req);
|
||||
bio_end_io_acct(req->master_bio, req->start_jif);
|
||||
|
||||
/* If READ failed,
|
||||
* have it be pushed back to the retry work queue,
|
||||
@@ -1222,16 +1204,15 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
|
||||
bio_endio(bio);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
req->start_jif = start_jif;
|
||||
|
||||
/* Update disk stats */
|
||||
req->start_jif = bio_start_io_acct(req->master_bio);
|
||||
|
||||
if (!get_ldev(device)) {
|
||||
bio_put(req->private_bio);
|
||||
req->private_bio = NULL;
|
||||
}
|
||||
|
||||
/* Update disk stats */
|
||||
_drbd_start_io_acct(device, req);
|
||||
|
||||
/* process discards always from our submitter thread */
|
||||
if (bio_op(bio) == REQ_OP_WRITE_ZEROES ||
|
||||
bio_op(bio) == REQ_OP_DISCARD)
|
||||
|
||||
@@ -2037,7 +2037,7 @@ static int loop_add(struct loop_device **l, int i)
|
||||
lo->tag_set.queue_depth = 128;
|
||||
lo->tag_set.numa_node = NUMA_NO_NODE;
|
||||
lo->tag_set.cmd_size = sizeof(struct loop_cmd);
|
||||
lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
|
||||
lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING;
|
||||
lo->tag_set.driver_data = lo;
|
||||
|
||||
err = blk_mq_alloc_tag_set(&lo->tag_set);
|
||||
|
||||
@@ -1250,8 +1250,34 @@ static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd,
|
||||
return errno_to_blk_status(err);
|
||||
}
|
||||
|
||||
static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd)
|
||||
{
|
||||
struct nullb_device *dev = cmd->nq->dev;
|
||||
struct bio *bio;
|
||||
|
||||
if (dev->memory_backed)
|
||||
return;
|
||||
|
||||
if (dev->queue_mode == NULL_Q_BIO && bio_op(cmd->bio) == REQ_OP_READ) {
|
||||
zero_fill_bio(cmd->bio);
|
||||
} else if (req_op(cmd->rq) == REQ_OP_READ) {
|
||||
__rq_for_each_bio(bio, cmd->rq)
|
||||
zero_fill_bio(bio);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
|
||||
{
|
||||
/*
|
||||
* Since root privileges are required to configure the null_blk
|
||||
* driver, it is fine that this driver does not initialize the
|
||||
* data buffers of read commands. Zero-initialize these buffers
|
||||
* anyway if KMSAN is enabled to prevent that KMSAN complains
|
||||
* about null_blk not initializing read data buffers.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_KMSAN))
|
||||
nullb_zero_read_cmd_buffer(cmd);
|
||||
|
||||
/* Complete IO by inline, softirq or timer */
|
||||
switch (cmd->nq->dev->irqmode) {
|
||||
case NULL_IRQ_SOFTIRQ:
|
||||
@@ -1397,7 +1423,7 @@ static bool should_requeue_request(struct request *rq)
|
||||
static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res)
|
||||
{
|
||||
pr_info("rq %p timed out\n", rq);
|
||||
blk_mq_complete_request(rq);
|
||||
blk_mq_force_complete_rq(rq);
|
||||
return BLK_EH_DONE;
|
||||
}
|
||||
|
||||
|
||||
@@ -74,13 +74,20 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
|
||||
|
||||
int null_register_zoned_dev(struct nullb *nullb)
|
||||
{
|
||||
struct nullb_device *dev = nullb->dev;
|
||||
struct request_queue *q = nullb->q;
|
||||
|
||||
if (queue_is_mq(q))
|
||||
return blk_revalidate_disk_zones(nullb->disk);
|
||||
if (queue_is_mq(q)) {
|
||||
int ret = blk_revalidate_disk_zones(nullb->disk, NULL);
|
||||
|
||||
blk_queue_chunk_sectors(q, nullb->dev->zone_size_sects);
|
||||
q->nr_zones = blkdev_nr_zones(nullb->disk);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else {
|
||||
blk_queue_chunk_sectors(q, dev->zone_size_sects);
|
||||
q->nr_zones = blkdev_nr_zones(nullb->disk);
|
||||
}
|
||||
|
||||
blk_queue_max_zone_append_sectors(q, dev->zone_size_sects);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -142,7 +149,7 @@ size_t null_zone_valid_read_len(struct nullb *nullb,
|
||||
}
|
||||
|
||||
static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
|
||||
unsigned int nr_sectors)
|
||||
unsigned int nr_sectors, bool append)
|
||||
{
|
||||
struct nullb_device *dev = cmd->nq->dev;
|
||||
unsigned int zno = null_zone_no(dev, sector);
|
||||
@@ -162,9 +169,21 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
|
||||
case BLK_ZONE_COND_IMP_OPEN:
|
||||
case BLK_ZONE_COND_EXP_OPEN:
|
||||
case BLK_ZONE_COND_CLOSED:
|
||||
/* Writes must be at the write pointer position */
|
||||
if (sector != zone->wp)
|
||||
/*
|
||||
* Regular writes must be at the write pointer position.
|
||||
* Zone append writes are automatically issued at the write
|
||||
* pointer and the position returned using the request or BIO
|
||||
* sector.
|
||||
*/
|
||||
if (append) {
|
||||
sector = zone->wp;
|
||||
if (cmd->bio)
|
||||
cmd->bio->bi_iter.bi_sector = sector;
|
||||
else
|
||||
cmd->rq->__sector = sector;
|
||||
} else if (sector != zone->wp) {
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
|
||||
if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
|
||||
zone->cond = BLK_ZONE_COND_IMP_OPEN;
|
||||
@@ -246,7 +265,9 @@ blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op,
|
||||
{
|
||||
switch (op) {
|
||||
case REQ_OP_WRITE:
|
||||
return null_zone_write(cmd, sector, nr_sectors);
|
||||
return null_zone_write(cmd, sector, nr_sectors, false);
|
||||
case REQ_OP_ZONE_APPEND:
|
||||
return null_zone_write(cmd, sector, nr_sectors, true);
|
||||
case REQ_OP_ZONE_RESET:
|
||||
case REQ_OP_ZONE_RESET_ALL:
|
||||
case REQ_OP_ZONE_OPEN:
|
||||
|
||||
@@ -1032,7 +1032,7 @@ static int __init pcd_init(void)
|
||||
|
||||
for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
|
||||
if (cd->present) {
|
||||
register_cdrom(&cd->info);
|
||||
register_cdrom(cd->disk, &cd->info);
|
||||
cd->disk->private_data = cd;
|
||||
add_disk(cd->disk);
|
||||
}
|
||||
|
||||
@@ -96,20 +96,6 @@ static const struct block_device_operations rsxx_fops = {
|
||||
.ioctl = rsxx_blkdev_ioctl,
|
||||
};
|
||||
|
||||
static void disk_stats_start(struct rsxx_cardinfo *card, struct bio *bio)
|
||||
{
|
||||
generic_start_io_acct(card->queue, bio_op(bio), bio_sectors(bio),
|
||||
&card->gendisk->part0);
|
||||
}
|
||||
|
||||
static void disk_stats_complete(struct rsxx_cardinfo *card,
|
||||
struct bio *bio,
|
||||
unsigned long start_time)
|
||||
{
|
||||
generic_end_io_acct(card->queue, bio_op(bio),
|
||||
&card->gendisk->part0, start_time);
|
||||
}
|
||||
|
||||
static void bio_dma_done_cb(struct rsxx_cardinfo *card,
|
||||
void *cb_data,
|
||||
unsigned int error)
|
||||
@@ -121,7 +107,7 @@ static void bio_dma_done_cb(struct rsxx_cardinfo *card,
|
||||
|
||||
if (atomic_dec_and_test(&meta->pending_dmas)) {
|
||||
if (!card->eeh_state && card->gendisk)
|
||||
disk_stats_complete(card, meta->bio, meta->start_time);
|
||||
bio_end_io_acct(meta->bio, meta->start_time);
|
||||
|
||||
if (atomic_read(&meta->error))
|
||||
bio_io_error(meta->bio);
|
||||
@@ -167,10 +153,9 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
|
||||
bio_meta->bio = bio;
|
||||
atomic_set(&bio_meta->error, 0);
|
||||
atomic_set(&bio_meta->pending_dmas, 0);
|
||||
bio_meta->start_time = jiffies;
|
||||
|
||||
if (!unlikely(card->halt))
|
||||
disk_stats_start(card, bio);
|
||||
bio_meta->start_time = bio_start_io_acct(bio);
|
||||
|
||||
dev_dbg(CARD_TO_DEV(card), "BIO[%c]: meta: %p addr8: x%llx size: %d\n",
|
||||
bio_data_dir(bio) ? 'W' : 'R', bio_meta,
|
||||
|
||||
@@ -1510,13 +1510,8 @@ static void zram_bio_discard(struct zram *zram, u32 index,
|
||||
static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
|
||||
int offset, unsigned int op, struct bio *bio)
|
||||
{
|
||||
unsigned long start_time = jiffies;
|
||||
struct request_queue *q = zram->disk->queue;
|
||||
int ret;
|
||||
|
||||
generic_start_io_acct(q, op, bvec->bv_len >> SECTOR_SHIFT,
|
||||
&zram->disk->part0);
|
||||
|
||||
if (!op_is_write(op)) {
|
||||
atomic64_inc(&zram->stats.num_reads);
|
||||
ret = zram_bvec_read(zram, bvec, index, offset, bio);
|
||||
@@ -1526,8 +1521,6 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
|
||||
ret = zram_bvec_write(zram, bvec, index, offset, bio);
|
||||
}
|
||||
|
||||
generic_end_io_acct(q, op, &zram->disk->part0, start_time);
|
||||
|
||||
zram_slot_lock(zram, index);
|
||||
zram_accessed(zram, index);
|
||||
zram_slot_unlock(zram, index);
|
||||
@@ -1548,6 +1541,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
|
||||
u32 index;
|
||||
struct bio_vec bvec;
|
||||
struct bvec_iter iter;
|
||||
unsigned long start_time;
|
||||
|
||||
index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
|
||||
offset = (bio->bi_iter.bi_sector &
|
||||
@@ -1563,6 +1557,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
|
||||
break;
|
||||
}
|
||||
|
||||
start_time = bio_start_io_acct(bio);
|
||||
bio_for_each_segment(bvec, bio, iter) {
|
||||
struct bio_vec bv = bvec;
|
||||
unsigned int unwritten = bvec.bv_len;
|
||||
@@ -1571,8 +1566,10 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
|
||||
bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
|
||||
unwritten);
|
||||
if (zram_bvec_rw(zram, &bv, index, offset,
|
||||
bio_op(bio), bio) < 0)
|
||||
goto out;
|
||||
bio_op(bio), bio) < 0) {
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
break;
|
||||
}
|
||||
|
||||
bv.bv_offset += bv.bv_len;
|
||||
unwritten -= bv.bv_len;
|
||||
@@ -1580,12 +1577,8 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
|
||||
update_position(&index, &offset, &bv);
|
||||
} while (unwritten);
|
||||
}
|
||||
|
||||
bio_end_io_acct(bio, start_time);
|
||||
bio_endio(bio);
|
||||
return;
|
||||
|
||||
out:
|
||||
bio_io_error(bio);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1633,6 +1626,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
|
||||
u32 index;
|
||||
struct zram *zram;
|
||||
struct bio_vec bv;
|
||||
unsigned long start_time;
|
||||
|
||||
if (PageTransHuge(page))
|
||||
return -ENOTSUPP;
|
||||
@@ -1651,7 +1645,9 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
|
||||
bv.bv_len = PAGE_SIZE;
|
||||
bv.bv_offset = 0;
|
||||
|
||||
start_time = disk_start_io_acct(bdev->bd_disk, SECTORS_PER_PAGE, op);
|
||||
ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL);
|
||||
disk_end_io_acct(bdev->bd_disk, op, start_time);
|
||||
out:
|
||||
/*
|
||||
* If I/O fails, just return error(ie, non-zero) without
|
||||
|
||||
@@ -586,7 +586,7 @@ static int cdrom_mrw_set_lba_space(struct cdrom_device_info *cdi, int space)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int register_cdrom(struct cdrom_device_info *cdi)
|
||||
int register_cdrom(struct gendisk *disk, struct cdrom_device_info *cdi)
|
||||
{
|
||||
static char banner_printed;
|
||||
const struct cdrom_device_ops *cdo = cdi->ops;
|
||||
@@ -601,6 +601,9 @@ int register_cdrom(struct cdrom_device_info *cdi)
|
||||
cdrom_sysctl_register();
|
||||
}
|
||||
|
||||
cdi->disk = disk;
|
||||
disk->cdi = cdi;
|
||||
|
||||
ENSURE(cdo, drive_status, CDC_DRIVE_STATUS);
|
||||
if (cdo->check_events == NULL && cdo->media_changed == NULL)
|
||||
WARN_ON_ONCE(cdo->capability & (CDC_MEDIA_CHANGED | CDC_SELECT_DISC));
|
||||
@@ -2292,37 +2295,46 @@ retry:
|
||||
return cdrom_read_cdda_old(cdi, ubuf, lba, nframes);
|
||||
}
|
||||
|
||||
static int cdrom_ioctl_multisession(struct cdrom_device_info *cdi,
|
||||
void __user *argp)
|
||||
int cdrom_multisession(struct cdrom_device_info *cdi,
|
||||
struct cdrom_multisession *info)
|
||||
{
|
||||
struct cdrom_multisession ms_info;
|
||||
u8 requested_format;
|
||||
int ret;
|
||||
|
||||
cd_dbg(CD_DO_IOCTL, "entering CDROMMULTISESSION\n");
|
||||
|
||||
if (!(cdi->ops->capability & CDC_MULTI_SESSION))
|
||||
return -ENOSYS;
|
||||
|
||||
if (copy_from_user(&ms_info, argp, sizeof(ms_info)))
|
||||
return -EFAULT;
|
||||
|
||||
requested_format = ms_info.addr_format;
|
||||
requested_format = info->addr_format;
|
||||
if (requested_format != CDROM_MSF && requested_format != CDROM_LBA)
|
||||
return -EINVAL;
|
||||
ms_info.addr_format = CDROM_LBA;
|
||||
info->addr_format = CDROM_LBA;
|
||||
|
||||
ret = cdi->ops->get_last_session(cdi, &ms_info);
|
||||
ret = cdi->ops->get_last_session(cdi, info);
|
||||
if (!ret)
|
||||
sanitize_format(&info->addr, &info->addr_format,
|
||||
requested_format);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cdrom_multisession);
|
||||
|
||||
static int cdrom_ioctl_multisession(struct cdrom_device_info *cdi,
|
||||
void __user *argp)
|
||||
{
|
||||
struct cdrom_multisession info;
|
||||
int ret;
|
||||
|
||||
cd_dbg(CD_DO_IOCTL, "entering CDROMMULTISESSION\n");
|
||||
|
||||
if (copy_from_user(&info, argp, sizeof(info)))
|
||||
return -EFAULT;
|
||||
ret = cdrom_multisession(cdi, &info);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
sanitize_format(&ms_info.addr, &ms_info.addr_format, requested_format);
|
||||
|
||||
if (copy_to_user(argp, &ms_info, sizeof(ms_info)))
|
||||
if (copy_to_user(argp, &info, sizeof(info)))
|
||||
return -EFAULT;
|
||||
|
||||
cd_dbg(CD_DO_IOCTL, "CDROMMULTISESSION successful\n");
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int cdrom_ioctl_eject(struct cdrom_device_info *cdi)
|
||||
@@ -2663,32 +2675,37 @@ static int cdrom_ioctl_read_tochdr(struct cdrom_device_info *cdi,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cdrom_read_tocentry(struct cdrom_device_info *cdi,
|
||||
struct cdrom_tocentry *entry)
|
||||
{
|
||||
u8 requested_format = entry->cdte_format;
|
||||
int ret;
|
||||
|
||||
if (requested_format != CDROM_MSF && requested_format != CDROM_LBA)
|
||||
return -EINVAL;
|
||||
|
||||
/* make interface to low-level uniform */
|
||||
entry->cdte_format = CDROM_MSF;
|
||||
ret = cdi->ops->audio_ioctl(cdi, CDROMREADTOCENTRY, entry);
|
||||
if (!ret)
|
||||
sanitize_format(&entry->cdte_addr, &entry->cdte_format,
|
||||
requested_format);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cdrom_read_tocentry);
|
||||
|
||||
static int cdrom_ioctl_read_tocentry(struct cdrom_device_info *cdi,
|
||||
void __user *argp)
|
||||
{
|
||||
struct cdrom_tocentry entry;
|
||||
u8 requested_format;
|
||||
int ret;
|
||||
|
||||
/* cd_dbg(CD_DO_IOCTL, "entering CDROMREADTOCENTRY\n"); */
|
||||
|
||||
if (copy_from_user(&entry, argp, sizeof(entry)))
|
||||
return -EFAULT;
|
||||
|
||||
requested_format = entry.cdte_format;
|
||||
if (requested_format != CDROM_MSF && requested_format != CDROM_LBA)
|
||||
return -EINVAL;
|
||||
/* make interface to low-level uniform */
|
||||
entry.cdte_format = CDROM_MSF;
|
||||
ret = cdi->ops->audio_ioctl(cdi, CDROMREADTOCENTRY, &entry);
|
||||
if (ret)
|
||||
return ret;
|
||||
sanitize_format(&entry.cdte_addr, &entry.cdte_format, requested_format);
|
||||
|
||||
if (copy_to_user(argp, &entry, sizeof(entry)))
|
||||
ret = cdrom_read_tocentry(cdi, &entry);
|
||||
if (!ret && copy_to_user(argp, &entry, sizeof(entry)))
|
||||
return -EFAULT;
|
||||
/* cd_dbg(CD_DO_IOCTL, "CDROMREADTOCENTRY successful\n"); */
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int cdrom_ioctl_play_msf(struct cdrom_device_info *cdi,
|
||||
|
||||
@@ -770,7 +770,7 @@ static int probe_gdrom(struct platform_device *devptr)
|
||||
goto probe_fail_no_disk;
|
||||
}
|
||||
probe_gdrom_setupdisk();
|
||||
if (register_cdrom(gd.cd_info)) {
|
||||
if (register_cdrom(gd.disk, gd.cd_info)) {
|
||||
err = -ENODEV;
|
||||
goto probe_fail_cdrom_register;
|
||||
}
|
||||
|
||||
@@ -1034,8 +1034,8 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cdrom_read_tocentry(ide_drive_t *drive, int trackno, int msf_flag,
|
||||
int format, char *buf, int buflen)
|
||||
static int ide_cdrom_read_tocentry(ide_drive_t *drive, int trackno,
|
||||
int msf_flag, int format, char *buf, int buflen)
|
||||
{
|
||||
unsigned char cmd[BLK_MAX_CDB];
|
||||
|
||||
@@ -1104,7 +1104,7 @@ int ide_cd_read_toc(ide_drive_t *drive)
|
||||
sectors_per_frame << SECTOR_SHIFT);
|
||||
|
||||
/* first read just the header, so we know how long the TOC is */
|
||||
stat = cdrom_read_tocentry(drive, 0, 1, 0, (char *) &toc->hdr,
|
||||
stat = ide_cdrom_read_tocentry(drive, 0, 1, 0, (char *) &toc->hdr,
|
||||
sizeof(struct atapi_toc_header));
|
||||
if (stat)
|
||||
return stat;
|
||||
@@ -1121,7 +1121,7 @@ int ide_cd_read_toc(ide_drive_t *drive)
|
||||
ntracks = MAX_TRACKS;
|
||||
|
||||
/* now read the whole schmeer */
|
||||
stat = cdrom_read_tocentry(drive, toc->hdr.first_track, 1, 0,
|
||||
stat = ide_cdrom_read_tocentry(drive, toc->hdr.first_track, 1, 0,
|
||||
(char *)&toc->hdr,
|
||||
sizeof(struct atapi_toc_header) +
|
||||
(ntracks + 1) *
|
||||
@@ -1141,7 +1141,7 @@ int ide_cd_read_toc(ide_drive_t *drive)
|
||||
* Heiko Eißfeldt.
|
||||
*/
|
||||
ntracks = 0;
|
||||
stat = cdrom_read_tocentry(drive, CDROM_LEADOUT, 1, 0,
|
||||
stat = ide_cdrom_read_tocentry(drive, CDROM_LEADOUT, 1, 0,
|
||||
(char *)&toc->hdr,
|
||||
sizeof(struct atapi_toc_header) +
|
||||
(ntracks + 1) *
|
||||
@@ -1181,7 +1181,7 @@ int ide_cd_read_toc(ide_drive_t *drive)
|
||||
|
||||
if (toc->hdr.first_track != CDROM_LEADOUT) {
|
||||
/* read the multisession information */
|
||||
stat = cdrom_read_tocentry(drive, 0, 0, 1, (char *)&ms_tmp,
|
||||
stat = ide_cdrom_read_tocentry(drive, 0, 0, 1, (char *)&ms_tmp,
|
||||
sizeof(ms_tmp));
|
||||
if (stat)
|
||||
return stat;
|
||||
@@ -1195,7 +1195,7 @@ int ide_cd_read_toc(ide_drive_t *drive)
|
||||
|
||||
if (drive->atapi_flags & IDE_AFLAG_TOCADDR_AS_BCD) {
|
||||
/* re-read multisession information using MSF format */
|
||||
stat = cdrom_read_tocentry(drive, 0, 1, 1, (char *)&ms_tmp,
|
||||
stat = ide_cdrom_read_tocentry(drive, 0, 1, 1, (char *)&ms_tmp,
|
||||
sizeof(ms_tmp));
|
||||
if (stat)
|
||||
return stat;
|
||||
@@ -1305,8 +1305,7 @@ static int ide_cdrom_register(ide_drive_t *drive, int nslots)
|
||||
if (drive->atapi_flags & IDE_AFLAG_NO_SPEED_SELECT)
|
||||
devinfo->mask |= CDC_SELECT_SPEED;
|
||||
|
||||
devinfo->disk = info->disk;
|
||||
return register_cdrom(devinfo);
|
||||
return register_cdrom(info->disk, devinfo);
|
||||
}
|
||||
|
||||
static int ide_cdrom_probe_capabilities(ide_drive_t *drive)
|
||||
|
||||
@@ -233,10 +233,13 @@ static ide_startstop_t do_special(ide_drive_t *drive)
|
||||
void ide_map_sg(ide_drive_t *drive, struct ide_cmd *cmd)
|
||||
{
|
||||
ide_hwif_t *hwif = drive->hwif;
|
||||
struct scatterlist *sg = hwif->sg_table;
|
||||
struct scatterlist *sg = hwif->sg_table, *last_sg = NULL;
|
||||
struct request *rq = cmd->rq;
|
||||
|
||||
cmd->sg_nents = blk_rq_map_sg(drive->queue, rq, sg);
|
||||
cmd->sg_nents = __blk_rq_map_sg(drive->queue, rq, sg, &last_sg);
|
||||
if (blk_rq_bytes(rq) && (blk_rq_bytes(rq) & rq->q->dma_pad_mask))
|
||||
last_sg->length +=
|
||||
(rq->q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ide_map_sg);
|
||||
|
||||
|
||||
@@ -21,16 +21,14 @@
|
||||
void pblk_write_to_cache(struct pblk *pblk, struct bio *bio,
|
||||
unsigned long flags)
|
||||
{
|
||||
struct request_queue *q = pblk->dev->q;
|
||||
struct pblk_w_ctx w_ctx;
|
||||
sector_t lba = pblk_get_lba(bio);
|
||||
unsigned long start_time = jiffies;
|
||||
unsigned long start_time;
|
||||
unsigned int bpos, pos;
|
||||
int nr_entries = pblk_get_secs(bio);
|
||||
int i, ret;
|
||||
|
||||
generic_start_io_acct(q, REQ_OP_WRITE, bio_sectors(bio),
|
||||
&pblk->disk->part0);
|
||||
start_time = bio_start_io_acct(bio);
|
||||
|
||||
/* Update the write buffer head (mem) with the entries that we can
|
||||
* write. The write in itself cannot fail, so there is no need to
|
||||
@@ -79,7 +77,7 @@ retry:
|
||||
pblk_rl_inserted(&pblk->rl, nr_entries);
|
||||
|
||||
out:
|
||||
generic_end_io_acct(q, REQ_OP_WRITE, &pblk->disk->part0, start_time);
|
||||
bio_end_io_acct(bio, start_time);
|
||||
pblk_write_should_kick(pblk);
|
||||
|
||||
if (ret == NVM_IO_DONE)
|
||||
|
||||
@@ -187,12 +187,11 @@ static void pblk_end_user_read(struct bio *bio, int error)
|
||||
static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
|
||||
bool put_line)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
|
||||
struct bio *int_bio = rqd->bio;
|
||||
unsigned long start_time = r_ctx->start_time;
|
||||
|
||||
generic_end_io_acct(dev->q, REQ_OP_READ, &pblk->disk->part0, start_time);
|
||||
bio_end_io_acct(int_bio, start_time);
|
||||
|
||||
if (rqd->error)
|
||||
pblk_log_read_err(pblk, rqd);
|
||||
@@ -263,17 +262,15 @@ retry:
|
||||
|
||||
void pblk_submit_read(struct pblk *pblk, struct bio *bio)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct request_queue *q = dev->q;
|
||||
sector_t blba = pblk_get_lba(bio);
|
||||
unsigned int nr_secs = pblk_get_secs(bio);
|
||||
bool from_cache;
|
||||
struct pblk_g_ctx *r_ctx;
|
||||
struct nvm_rq *rqd;
|
||||
struct bio *int_bio, *split_bio;
|
||||
unsigned long start_time;
|
||||
|
||||
generic_start_io_acct(q, REQ_OP_READ, bio_sectors(bio),
|
||||
&pblk->disk->part0);
|
||||
start_time = bio_start_io_acct(bio);
|
||||
|
||||
rqd = pblk_alloc_rqd(pblk, PBLK_READ);
|
||||
|
||||
@@ -283,7 +280,7 @@ void pblk_submit_read(struct pblk *pblk, struct bio *bio)
|
||||
rqd->end_io = pblk_end_io_read;
|
||||
|
||||
r_ctx = nvm_rq_to_pdu(rqd);
|
||||
r_ctx->start_time = jiffies;
|
||||
r_ctx->start_time = start_time;
|
||||
r_ctx->lba = blba;
|
||||
|
||||
if (pblk_alloc_rqd_meta(pblk, rqd)) {
|
||||
|
||||
@@ -668,9 +668,7 @@ static void backing_request_endio(struct bio *bio)
|
||||
static void bio_complete(struct search *s)
|
||||
{
|
||||
if (s->orig_bio) {
|
||||
generic_end_io_acct(s->d->disk->queue, bio_op(s->orig_bio),
|
||||
&s->d->disk->part0, s->start_time);
|
||||
|
||||
bio_end_io_acct(s->orig_bio, s->start_time);
|
||||
trace_bcache_request_end(s->d, s->orig_bio);
|
||||
s->orig_bio->bi_status = s->iop.status;
|
||||
bio_endio(s->orig_bio);
|
||||
@@ -730,7 +728,7 @@ static inline struct search *search_alloc(struct bio *bio,
|
||||
s->recoverable = 1;
|
||||
s->write = op_is_write(bio_op(bio));
|
||||
s->read_dirty_data = 0;
|
||||
s->start_time = jiffies;
|
||||
s->start_time = bio_start_io_acct(bio);
|
||||
|
||||
s->iop.c = d->c;
|
||||
s->iop.bio = NULL;
|
||||
@@ -1082,8 +1080,7 @@ static void detached_dev_end_io(struct bio *bio)
|
||||
bio->bi_end_io = ddip->bi_end_io;
|
||||
bio->bi_private = ddip->bi_private;
|
||||
|
||||
generic_end_io_acct(ddip->d->disk->queue, bio_op(bio),
|
||||
&ddip->d->disk->part0, ddip->start_time);
|
||||
bio_end_io_acct(bio, ddip->start_time);
|
||||
|
||||
if (bio->bi_status) {
|
||||
struct cached_dev *dc = container_of(ddip->d,
|
||||
@@ -1108,7 +1105,7 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
|
||||
*/
|
||||
ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO);
|
||||
ddip->d = d;
|
||||
ddip->start_time = jiffies;
|
||||
ddip->start_time = bio_start_io_acct(bio);
|
||||
ddip->bi_end_io = bio->bi_end_io;
|
||||
ddip->bi_private = bio->bi_private;
|
||||
bio->bi_end_io = detached_dev_end_io;
|
||||
@@ -1190,11 +1187,6 @@ blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio)
|
||||
}
|
||||
}
|
||||
|
||||
generic_start_io_acct(q,
|
||||
bio_op(bio),
|
||||
bio_sectors(bio),
|
||||
&d->disk->part0);
|
||||
|
||||
bio_set_dev(bio, dc->bdev);
|
||||
bio->bi_iter.bi_sector += dc->sb.data_offset;
|
||||
|
||||
@@ -1311,8 +1303,6 @@ blk_qc_t flash_dev_make_request(struct request_queue *q, struct bio *bio)
|
||||
return BLK_QC_T_NONE;
|
||||
}
|
||||
|
||||
generic_start_io_acct(q, bio_op(bio), bio_sectors(bio), &d->disk->part0);
|
||||
|
||||
s = search_alloc(bio, d);
|
||||
cl = &s->cl;
|
||||
bio = &s->bio.bio;
|
||||
@@ -1372,7 +1362,6 @@ void bch_flash_dev_request_init(struct bcache_device *d)
|
||||
{
|
||||
struct gendisk *g = d->disk;
|
||||
|
||||
g->queue->make_request_fn = flash_dev_make_request;
|
||||
g->queue->backing_dev_info->congested_fn = flash_dev_congested;
|
||||
d->cache_miss = flash_dev_cache_miss;
|
||||
d->ioctl = flash_dev_ioctl;
|
||||
|
||||
@@ -2657,7 +2657,7 @@ static void bitmap_flush_work(struct work_struct *work)
|
||||
|
||||
dm_integrity_flush_buffers(ic);
|
||||
if (ic->meta_dev)
|
||||
blkdev_issue_flush(ic->dev->bdev, GFP_NOIO, NULL);
|
||||
blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
|
||||
|
||||
limit = ic->provided_data_sectors;
|
||||
if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
|
||||
|
||||
@@ -547,7 +547,7 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
|
||||
md->tag_set->ops = &dm_mq_ops;
|
||||
md->tag_set->queue_depth = dm_get_blk_mq_queue_depth();
|
||||
md->tag_set->numa_node = md->numa_node_id;
|
||||
md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE;
|
||||
md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING;
|
||||
md->tag_set->nr_hw_queues = dm_get_blk_mq_nr_hw_queues();
|
||||
md->tag_set->driver_data = md;
|
||||
|
||||
|
||||
@@ -281,7 +281,6 @@ static struct dm_dev_internal *find_device(struct list_head *l, dev_t dev)
|
||||
static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
|
||||
sector_t start, sector_t len, void *data)
|
||||
{
|
||||
struct request_queue *q;
|
||||
struct queue_limits *limits = data;
|
||||
struct block_device *bdev = dev->bdev;
|
||||
sector_t dev_size =
|
||||
@@ -290,22 +289,6 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
|
||||
limits->logical_block_size >> SECTOR_SHIFT;
|
||||
char b[BDEVNAME_SIZE];
|
||||
|
||||
/*
|
||||
* Some devices exist without request functions,
|
||||
* such as loop devices not yet bound to backing files.
|
||||
* Forbid the use of such devices.
|
||||
*/
|
||||
q = bdev_get_queue(bdev);
|
||||
if (!q || !q->make_request_fn) {
|
||||
DMWARN("%s: %s is not yet initialised: "
|
||||
"start=%llu, len=%llu, dev_size=%llu",
|
||||
dm_device_name(ti->table->md), bdevname(bdev, b),
|
||||
(unsigned long long)start,
|
||||
(unsigned long long)len,
|
||||
(unsigned long long)dev_size);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!dev_size)
|
||||
return 0;
|
||||
|
||||
|
||||
@@ -661,7 +661,7 @@ static int dmz_write_sb(struct dmz_metadata *zmd, unsigned int set)
|
||||
|
||||
ret = dmz_rdwr_block(zmd, REQ_OP_WRITE, block, mblk->page);
|
||||
if (ret == 0)
|
||||
ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO, NULL);
|
||||
ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -703,7 +703,7 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd,
|
||||
|
||||
/* Flush drive cache (this will also sync data) */
|
||||
if (ret == 0)
|
||||
ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO, NULL);
|
||||
ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -772,7 +772,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)
|
||||
|
||||
/* If there are no dirty metadata blocks, just flush the device cache */
|
||||
if (list_empty(&write_list)) {
|
||||
ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO, NULL);
|
||||
ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO);
|
||||
goto err;
|
||||
}
|
||||
|
||||
|
||||
@@ -682,11 +682,7 @@ static void start_io_acct(struct dm_io *io)
|
||||
struct mapped_device *md = io->md;
|
||||
struct bio *bio = io->orig_bio;
|
||||
|
||||
io->start_time = jiffies;
|
||||
|
||||
generic_start_io_acct(md->queue, bio_op(bio), bio_sectors(bio),
|
||||
&dm_disk(md)->part0);
|
||||
|
||||
io->start_time = bio_start_io_acct(bio);
|
||||
if (unlikely(dm_stats_used(&md->stats)))
|
||||
dm_stats_account_io(&md->stats, bio_data_dir(bio),
|
||||
bio->bi_iter.bi_sector, bio_sectors(bio),
|
||||
@@ -699,8 +695,7 @@ static void end_io_acct(struct dm_io *io)
|
||||
struct bio *bio = io->orig_bio;
|
||||
unsigned long duration = jiffies - io->start_time;
|
||||
|
||||
generic_end_io_acct(md->queue, bio_op(bio), &dm_disk(md)->part0,
|
||||
io->start_time);
|
||||
bio_end_io_acct(bio, io->start_time);
|
||||
|
||||
if (unlikely(dm_stats_used(&md->stats)))
|
||||
dm_stats_account_io(&md->stats, bio_data_dir(bio),
|
||||
@@ -1792,6 +1787,18 @@ static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
|
||||
int srcu_idx;
|
||||
struct dm_table *map;
|
||||
|
||||
if (dm_get_md_type(md) == DM_TYPE_REQUEST_BASED) {
|
||||
/*
|
||||
* We are called with a live reference on q_usage_counter, but
|
||||
* that one will be released as soon as we return. Grab an
|
||||
* extra one as blk_mq_make_request expects to be able to
|
||||
* consume a reference (which lives until the request is freed
|
||||
* in case a request is allocated).
|
||||
*/
|
||||
percpu_ref_get(&q->q_usage_counter);
|
||||
return blk_mq_make_request(q, bio);
|
||||
}
|
||||
|
||||
map = dm_get_live_table(md, &srcu_idx);
|
||||
|
||||
/* if we're suspended, we have to queue this io for later */
|
||||
|
||||
@@ -1037,7 +1037,7 @@ static int ppl_recover(struct ppl_log *log, struct ppl_header *pplhdr,
|
||||
}
|
||||
|
||||
/* flush the disk cache after recovery if necessary */
|
||||
ret = blkdev_issue_flush(rdev->bdev, GFP_KERNEL, NULL);
|
||||
ret = blkdev_issue_flush(rdev->bdev, GFP_KERNEL);
|
||||
out:
|
||||
__free_page(page);
|
||||
return ret;
|
||||
|
||||
@@ -2036,11 +2036,10 @@ static struct backing_dev_info * __init mtd_bdi_init(char *name)
|
||||
struct backing_dev_info *bdi;
|
||||
int ret;
|
||||
|
||||
bdi = bdi_alloc(GFP_KERNEL);
|
||||
bdi = bdi_alloc(NUMA_NO_NODE);
|
||||
if (!bdi)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
bdi->name = name;
|
||||
/*
|
||||
* We put '-0' suffix to the name to get the same name format as we
|
||||
* used to get. Since this is called only once, we get a unique name.
|
||||
|
||||
@@ -178,7 +178,9 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
|
||||
bip = bio_integrity(bio);
|
||||
nsblk = q->queuedata;
|
||||
rw = bio_data_dir(bio);
|
||||
do_acct = nd_iostat_start(bio, &start);
|
||||
do_acct = blk_queue_io_stat(bio->bi_disk->queue);
|
||||
if (do_acct)
|
||||
start = bio_start_io_acct(bio);
|
||||
bio_for_each_segment(bvec, bio, iter) {
|
||||
unsigned int len = bvec.bv_len;
|
||||
|
||||
@@ -195,7 +197,7 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
|
||||
}
|
||||
}
|
||||
if (do_acct)
|
||||
nd_iostat_end(bio, start);
|
||||
bio_end_io_acct(bio, start);
|
||||
|
||||
bio_endio(bio);
|
||||
return BLK_QC_T_NONE;
|
||||
|
||||
@@ -1452,7 +1452,9 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
|
||||
if (!bio_integrity_prep(bio))
|
||||
return BLK_QC_T_NONE;
|
||||
|
||||
do_acct = nd_iostat_start(bio, &start);
|
||||
do_acct = blk_queue_io_stat(bio->bi_disk->queue);
|
||||
if (do_acct)
|
||||
start = bio_start_io_acct(bio);
|
||||
bio_for_each_segment(bvec, bio, iter) {
|
||||
unsigned int len = bvec.bv_len;
|
||||
|
||||
@@ -1477,7 +1479,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
|
||||
}
|
||||
}
|
||||
if (do_acct)
|
||||
nd_iostat_end(bio, start);
|
||||
bio_end_io_acct(bio, start);
|
||||
|
||||
bio_endio(bio);
|
||||
return BLK_QC_T_NONE;
|
||||
|
||||
@@ -396,25 +396,6 @@ static inline int nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
|
||||
#endif
|
||||
int nd_blk_region_init(struct nd_region *nd_region);
|
||||
int nd_region_activate(struct nd_region *nd_region);
|
||||
void __nd_iostat_start(struct bio *bio, unsigned long *start);
|
||||
static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
|
||||
{
|
||||
struct gendisk *disk = bio->bi_disk;
|
||||
|
||||
if (!blk_queue_io_stat(disk->queue))
|
||||
return false;
|
||||
|
||||
*start = jiffies;
|
||||
generic_start_io_acct(disk->queue, bio_op(bio), bio_sectors(bio),
|
||||
&disk->part0);
|
||||
return true;
|
||||
}
|
||||
static inline void nd_iostat_end(struct bio *bio, unsigned long start)
|
||||
{
|
||||
struct gendisk *disk = bio->bi_disk;
|
||||
|
||||
generic_end_io_acct(disk->queue, bio_op(bio), &disk->part0, start);
|
||||
}
|
||||
static inline bool is_bad_pmem(struct badblocks *bb, sector_t sector,
|
||||
unsigned int len)
|
||||
{
|
||||
|
||||
@@ -202,7 +202,9 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
|
||||
if (bio->bi_opf & REQ_PREFLUSH)
|
||||
ret = nvdimm_flush(nd_region, bio);
|
||||
|
||||
do_acct = nd_iostat_start(bio, &start);
|
||||
do_acct = blk_queue_io_stat(bio->bi_disk->queue);
|
||||
if (do_acct)
|
||||
start = bio_start_io_acct(bio);
|
||||
bio_for_each_segment(bvec, bio, iter) {
|
||||
if (op_is_write(bio_op(bio)))
|
||||
rc = pmem_do_write(pmem, bvec.bv_page, bvec.bv_offset,
|
||||
@@ -216,7 +218,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
|
||||
}
|
||||
}
|
||||
if (do_acct)
|
||||
nd_iostat_end(bio, start);
|
||||
bio_end_io_acct(bio, start);
|
||||
|
||||
if (bio->bi_opf & REQ_FUA)
|
||||
ret = nvdimm_flush(nd_region, bio);
|
||||
|
||||
@@ -310,7 +310,7 @@ bool nvme_cancel_request(struct request *req, void *data, bool reserved)
|
||||
return true;
|
||||
|
||||
nvme_req(req)->status = NVME_SC_HOST_ABORTED_CMD;
|
||||
blk_mq_complete_request(req);
|
||||
blk_mq_force_complete_rq(req);
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_cancel_request);
|
||||
|
||||
@@ -226,7 +226,7 @@ static void nvmet_bdev_execute_flush(struct nvmet_req *req)
|
||||
|
||||
u16 nvmet_bdev_flush(struct nvmet_req *req)
|
||||
{
|
||||
if (blkdev_issue_flush(req->ns->bdev, GFP_KERNEL, NULL))
|
||||
if (blkdev_issue_flush(req->ns->bdev, GFP_KERNEL))
|
||||
return NVME_SC_INTERNAL | NVME_SC_DNR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -143,9 +143,6 @@ int dasd_scan_partitions(struct dasd_block *block)
|
||||
*/
|
||||
void dasd_destroy_partitions(struct dasd_block *block)
|
||||
{
|
||||
/* The two structs have 168/176 byte on 31/64 bit. */
|
||||
struct blkpg_partition bpart;
|
||||
struct blkpg_ioctl_arg barg;
|
||||
struct block_device *bdev;
|
||||
|
||||
/*
|
||||
@@ -155,19 +152,10 @@ void dasd_destroy_partitions(struct dasd_block *block)
|
||||
bdev = block->bdev;
|
||||
block->bdev = NULL;
|
||||
|
||||
/*
|
||||
* See fs/partition/check.c:delete_partition
|
||||
* Can't call delete_partitions directly. Use ioctl.
|
||||
* The ioctl also does locking and invalidation.
|
||||
*/
|
||||
memset(&bpart, 0, sizeof(struct blkpg_partition));
|
||||
memset(&barg, 0, sizeof(struct blkpg_ioctl_arg));
|
||||
barg.data = (void __force __user *) &bpart;
|
||||
barg.op = BLKPG_DEL_PARTITION;
|
||||
for (bpart.pno = block->gdp->minors - 1; bpart.pno > 0; bpart.pno--)
|
||||
ioctl_by_bdev(bdev, BLKPG, (unsigned long) &barg);
|
||||
mutex_lock(&bdev->bd_mutex);
|
||||
blk_drop_partitions(bdev);
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
|
||||
invalidate_partition(block->gdp, 0);
|
||||
/* Matching blkdev_put to the blkdev_get in dasd_scan_partitions. */
|
||||
blkdev_put(bdev, FMODE_READ);
|
||||
set_capacity(block->gdp, 0);
|
||||
|
||||
@@ -978,28 +978,12 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
|
||||
scsi_io_completion_action(cmd, result);
|
||||
}
|
||||
|
||||
static blk_status_t scsi_init_sgtable(struct request *req,
|
||||
struct scsi_data_buffer *sdb)
|
||||
static inline bool scsi_cmd_needs_dma_drain(struct scsi_device *sdev,
|
||||
struct request *rq)
|
||||
{
|
||||
int count;
|
||||
|
||||
/*
|
||||
* If sg table allocation fails, requeue request later.
|
||||
*/
|
||||
if (unlikely(sg_alloc_table_chained(&sdb->table,
|
||||
blk_rq_nr_phys_segments(req), sdb->table.sgl,
|
||||
SCSI_INLINE_SG_CNT)))
|
||||
return BLK_STS_RESOURCE;
|
||||
|
||||
/*
|
||||
* Next, walk the list, and fill in the addresses and sizes of
|
||||
* each segment.
|
||||
*/
|
||||
count = blk_rq_map_sg(req->q, req, sdb->table.sgl);
|
||||
BUG_ON(count > sdb->table.nents);
|
||||
sdb->table.nents = count;
|
||||
sdb->length = blk_rq_payload_bytes(req);
|
||||
return BLK_STS_OK;
|
||||
return sdev->dma_drain_len && blk_rq_is_passthrough(rq) &&
|
||||
!op_is_write(req_op(rq)) &&
|
||||
sdev->host->hostt->dma_need_drain(rq);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1015,19 +999,62 @@ static blk_status_t scsi_init_sgtable(struct request *req,
|
||||
*/
|
||||
blk_status_t scsi_init_io(struct scsi_cmnd *cmd)
|
||||
{
|
||||
struct scsi_device *sdev = cmd->device;
|
||||
struct request *rq = cmd->request;
|
||||
unsigned short nr_segs = blk_rq_nr_phys_segments(rq);
|
||||
struct scatterlist *last_sg = NULL;
|
||||
blk_status_t ret;
|
||||
bool need_drain = scsi_cmd_needs_dma_drain(sdev, rq);
|
||||
int count;
|
||||
|
||||
if (WARN_ON_ONCE(!blk_rq_nr_phys_segments(rq)))
|
||||
if (WARN_ON_ONCE(!nr_segs))
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
ret = scsi_init_sgtable(rq, &cmd->sdb);
|
||||
if (ret)
|
||||
return ret;
|
||||
/*
|
||||
* Make sure there is space for the drain. The driver must adjust
|
||||
* max_hw_segments to be prepared for this.
|
||||
*/
|
||||
if (need_drain)
|
||||
nr_segs++;
|
||||
|
||||
/*
|
||||
* If sg table allocation fails, requeue request later.
|
||||
*/
|
||||
if (unlikely(sg_alloc_table_chained(&cmd->sdb.table, nr_segs,
|
||||
cmd->sdb.table.sgl, SCSI_INLINE_SG_CNT)))
|
||||
return BLK_STS_RESOURCE;
|
||||
|
||||
/*
|
||||
* Next, walk the list, and fill in the addresses and sizes of
|
||||
* each segment.
|
||||
*/
|
||||
count = __blk_rq_map_sg(rq->q, rq, cmd->sdb.table.sgl, &last_sg);
|
||||
|
||||
if (blk_rq_bytes(rq) & rq->q->dma_pad_mask) {
|
||||
unsigned int pad_len =
|
||||
(rq->q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1;
|
||||
|
||||
last_sg->length += pad_len;
|
||||
cmd->extra_len += pad_len;
|
||||
}
|
||||
|
||||
if (need_drain) {
|
||||
sg_unmark_end(last_sg);
|
||||
last_sg = sg_next(last_sg);
|
||||
sg_set_buf(last_sg, sdev->dma_drain_buf, sdev->dma_drain_len);
|
||||
sg_mark_end(last_sg);
|
||||
|
||||
cmd->extra_len += sdev->dma_drain_len;
|
||||
count++;
|
||||
}
|
||||
|
||||
BUG_ON(count > cmd->sdb.table.nents);
|
||||
cmd->sdb.table.nents = count;
|
||||
cmd->sdb.length = blk_rq_payload_bytes(rq);
|
||||
|
||||
if (blk_integrity_rq(rq)) {
|
||||
struct scsi_data_buffer *prot_sdb = cmd->prot_sdb;
|
||||
int ivecs, count;
|
||||
int ivecs;
|
||||
|
||||
if (WARN_ON_ONCE(!prot_sdb)) {
|
||||
/*
|
||||
@@ -1610,12 +1637,7 @@ static bool scsi_mq_get_budget(struct blk_mq_hw_ctx *hctx)
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct scsi_device *sdev = q->queuedata;
|
||||
|
||||
if (scsi_dev_queue_ready(q, sdev))
|
||||
return true;
|
||||
|
||||
if (atomic_read(&sdev->device_busy) == 0 && !scsi_device_blocked(sdev))
|
||||
blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY);
|
||||
return false;
|
||||
return scsi_dev_queue_ready(q, sdev);
|
||||
}
|
||||
|
||||
static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
@@ -1684,6 +1706,7 @@ out_put_budget:
|
||||
case BLK_STS_OK:
|
||||
break;
|
||||
case BLK_STS_RESOURCE:
|
||||
case BLK_STS_ZONE_RESOURCE:
|
||||
if (atomic_read(&sdev->device_busy) ||
|
||||
scsi_device_blocked(sdev))
|
||||
ret = BLK_STS_DEV_RESOURCE;
|
||||
|
||||
@@ -1206,6 +1206,12 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
|
||||
}
|
||||
}
|
||||
|
||||
if (req_op(rq) == REQ_OP_ZONE_APPEND) {
|
||||
ret = sd_zbc_prepare_zone_append(cmd, &lba, nr_blocks);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
fua = rq->cmd_flags & REQ_FUA ? 0x8 : 0;
|
||||
dix = scsi_prot_sg_count(cmd);
|
||||
dif = scsi_host_dif_capable(cmd->device->host, sdkp->protection_type);
|
||||
@@ -1287,6 +1293,7 @@ static blk_status_t sd_init_command(struct scsi_cmnd *cmd)
|
||||
return sd_setup_flush_cmnd(cmd);
|
||||
case REQ_OP_READ:
|
||||
case REQ_OP_WRITE:
|
||||
case REQ_OP_ZONE_APPEND:
|
||||
return sd_setup_read_write_cmnd(cmd);
|
||||
case REQ_OP_ZONE_RESET:
|
||||
return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_RESET_WRITE_POINTER,
|
||||
@@ -2055,7 +2062,7 @@ static int sd_done(struct scsi_cmnd *SCpnt)
|
||||
|
||||
out:
|
||||
if (sd_is_zoned(sdkp))
|
||||
sd_zbc_complete(SCpnt, good_bytes, &sshdr);
|
||||
good_bytes = sd_zbc_complete(SCpnt, good_bytes, &sshdr);
|
||||
|
||||
SCSI_LOG_HLCOMPLETE(1, scmd_printk(KERN_INFO, SCpnt,
|
||||
"sd_done: completed %d of %d bytes\n",
|
||||
@@ -3372,6 +3379,10 @@ static int sd_probe(struct device *dev)
|
||||
sdkp->first_scan = 1;
|
||||
sdkp->max_medium_access_timeouts = SD_MAX_MEDIUM_TIMEOUTS;
|
||||
|
||||
error = sd_zbc_init_disk(sdkp);
|
||||
if (error)
|
||||
goto out_free_index;
|
||||
|
||||
sd_revalidate_disk(gd);
|
||||
|
||||
gd->flags = GENHD_FL_EXT_DEVT;
|
||||
@@ -3409,6 +3420,7 @@ static int sd_probe(struct device *dev)
|
||||
out_put:
|
||||
put_disk(gd);
|
||||
out_free:
|
||||
sd_zbc_release_disk(sdkp);
|
||||
kfree(sdkp);
|
||||
out:
|
||||
scsi_autopm_put_device(sdp);
|
||||
@@ -3485,6 +3497,8 @@ static void scsi_disk_release(struct device *dev)
|
||||
put_disk(disk);
|
||||
put_device(&sdkp->device->sdev_gendev);
|
||||
|
||||
sd_zbc_release_disk(sdkp);
|
||||
|
||||
kfree(sdkp);
|
||||
}
|
||||
|
||||
|
||||
@@ -79,6 +79,12 @@ struct scsi_disk {
|
||||
u32 zones_optimal_open;
|
||||
u32 zones_optimal_nonseq;
|
||||
u32 zones_max_open;
|
||||
u32 *zones_wp_offset;
|
||||
spinlock_t zones_wp_offset_lock;
|
||||
u32 *rev_wp_offset;
|
||||
struct mutex rev_mutex;
|
||||
struct work_struct zone_wp_offset_work;
|
||||
char *zone_wp_update_buf;
|
||||
#endif
|
||||
atomic_t openers;
|
||||
sector_t capacity; /* size in logical blocks */
|
||||
@@ -207,17 +213,35 @@ static inline int sd_is_zoned(struct scsi_disk *sdkp)
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
|
||||
int sd_zbc_init_disk(struct scsi_disk *sdkp);
|
||||
void sd_zbc_release_disk(struct scsi_disk *sdkp);
|
||||
extern int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buffer);
|
||||
extern void sd_zbc_print_zones(struct scsi_disk *sdkp);
|
||||
blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
|
||||
unsigned char op, bool all);
|
||||
extern void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
|
||||
struct scsi_sense_hdr *sshdr);
|
||||
unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
|
||||
struct scsi_sense_hdr *sshdr);
|
||||
int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
|
||||
unsigned int nr_zones, report_zones_cb cb, void *data);
|
||||
|
||||
blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
|
||||
unsigned int nr_blocks);
|
||||
|
||||
#else /* CONFIG_BLK_DEV_ZONED */
|
||||
|
||||
static inline int sd_zbc_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int sd_zbc_init_disk(struct scsi_disk *sdkp)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void sd_zbc_exit(void) {}
|
||||
static inline void sd_zbc_release_disk(struct scsi_disk *sdkp) {}
|
||||
|
||||
static inline int sd_zbc_read_zones(struct scsi_disk *sdkp,
|
||||
unsigned char *buf)
|
||||
{
|
||||
@@ -233,9 +257,18 @@ static inline blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
|
||||
return BLK_STS_TARGET;
|
||||
}
|
||||
|
||||
static inline void sd_zbc_complete(struct scsi_cmnd *cmd,
|
||||
unsigned int good_bytes,
|
||||
struct scsi_sense_hdr *sshdr) {}
|
||||
static inline unsigned int sd_zbc_complete(struct scsi_cmnd *cmd,
|
||||
unsigned int good_bytes, struct scsi_sense_hdr *sshdr)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd,
|
||||
sector_t *lba,
|
||||
unsigned int nr_blocks)
|
||||
{
|
||||
return BLK_STS_TARGET;
|
||||
}
|
||||
|
||||
#define sd_zbc_report_zones NULL
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/mutex.h>
|
||||
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
@@ -19,11 +20,36 @@
|
||||
|
||||
#include "sd.h"
|
||||
|
||||
static unsigned int sd_zbc_get_zone_wp_offset(struct blk_zone *zone)
|
||||
{
|
||||
if (zone->type == ZBC_ZONE_TYPE_CONV)
|
||||
return 0;
|
||||
|
||||
switch (zone->cond) {
|
||||
case BLK_ZONE_COND_IMP_OPEN:
|
||||
case BLK_ZONE_COND_EXP_OPEN:
|
||||
case BLK_ZONE_COND_CLOSED:
|
||||
return zone->wp - zone->start;
|
||||
case BLK_ZONE_COND_FULL:
|
||||
return zone->len;
|
||||
case BLK_ZONE_COND_EMPTY:
|
||||
case BLK_ZONE_COND_OFFLINE:
|
||||
case BLK_ZONE_COND_READONLY:
|
||||
default:
|
||||
/*
|
||||
* Offline and read-only zones do not have a valid
|
||||
* write pointer. Use 0 as for an empty zone.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int sd_zbc_parse_report(struct scsi_disk *sdkp, u8 *buf,
|
||||
unsigned int idx, report_zones_cb cb, void *data)
|
||||
{
|
||||
struct scsi_device *sdp = sdkp->device;
|
||||
struct blk_zone zone = { 0 };
|
||||
int ret;
|
||||
|
||||
zone.type = buf[0] & 0x0f;
|
||||
zone.cond = (buf[1] >> 4) & 0xf;
|
||||
@@ -39,7 +65,14 @@ static int sd_zbc_parse_report(struct scsi_disk *sdkp, u8 *buf,
|
||||
zone.cond == ZBC_ZONE_COND_FULL)
|
||||
zone.wp = zone.start + zone.len;
|
||||
|
||||
return cb(&zone, idx, data);
|
||||
ret = cb(&zone, idx, data);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (sdkp->rev_wp_offset)
|
||||
sdkp->rev_wp_offset[idx] = sd_zbc_get_zone_wp_offset(&zone);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -208,6 +241,136 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static blk_status_t sd_zbc_cmnd_checks(struct scsi_cmnd *cmd)
|
||||
{
|
||||
struct request *rq = cmd->request;
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
|
||||
sector_t sector = blk_rq_pos(rq);
|
||||
|
||||
if (!sd_is_zoned(sdkp))
|
||||
/* Not a zoned device */
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
if (sdkp->device->changed)
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
if (sector & (sd_zbc_zone_sectors(sdkp) - 1))
|
||||
/* Unaligned request */
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
#define SD_ZBC_INVALID_WP_OFST (~0u)
|
||||
#define SD_ZBC_UPDATING_WP_OFST (SD_ZBC_INVALID_WP_OFST - 1)
|
||||
|
||||
static int sd_zbc_update_wp_offset_cb(struct blk_zone *zone, unsigned int idx,
|
||||
void *data)
|
||||
{
|
||||
struct scsi_disk *sdkp = data;
|
||||
|
||||
lockdep_assert_held(&sdkp->zones_wp_offset_lock);
|
||||
|
||||
sdkp->zones_wp_offset[idx] = sd_zbc_get_zone_wp_offset(zone);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void sd_zbc_update_wp_offset_workfn(struct work_struct *work)
|
||||
{
|
||||
struct scsi_disk *sdkp;
|
||||
unsigned int zno;
|
||||
int ret;
|
||||
|
||||
sdkp = container_of(work, struct scsi_disk, zone_wp_offset_work);
|
||||
|
||||
spin_lock_bh(&sdkp->zones_wp_offset_lock);
|
||||
for (zno = 0; zno < sdkp->nr_zones; zno++) {
|
||||
if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST)
|
||||
continue;
|
||||
|
||||
spin_unlock_bh(&sdkp->zones_wp_offset_lock);
|
||||
ret = sd_zbc_do_report_zones(sdkp, sdkp->zone_wp_update_buf,
|
||||
SD_BUF_SIZE,
|
||||
zno * sdkp->zone_blocks, true);
|
||||
spin_lock_bh(&sdkp->zones_wp_offset_lock);
|
||||
if (!ret)
|
||||
sd_zbc_parse_report(sdkp, sdkp->zone_wp_update_buf + 64,
|
||||
zno, sd_zbc_update_wp_offset_cb,
|
||||
sdkp);
|
||||
}
|
||||
spin_unlock_bh(&sdkp->zones_wp_offset_lock);
|
||||
|
||||
scsi_device_put(sdkp->device);
|
||||
}
|
||||
|
||||
/**
|
||||
* sd_zbc_prepare_zone_append() - Prepare an emulated ZONE_APPEND command.
|
||||
* @cmd: the command to setup
|
||||
* @lba: the LBA to patch
|
||||
* @nr_blocks: the number of LBAs to be written
|
||||
*
|
||||
* Called from sd_setup_read_write_cmnd() for REQ_OP_ZONE_APPEND.
|
||||
* @sd_zbc_prepare_zone_append() handles the necessary zone wrote locking and
|
||||
* patching of the lba for an emulated ZONE_APPEND command.
|
||||
*
|
||||
* In case the cached write pointer offset is %SD_ZBC_INVALID_WP_OFST it will
|
||||
* schedule a REPORT ZONES command and return BLK_STS_IOERR.
|
||||
*/
|
||||
blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
|
||||
unsigned int nr_blocks)
|
||||
{
|
||||
struct request *rq = cmd->request;
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
|
||||
unsigned int wp_offset, zno = blk_rq_zone_no(rq);
|
||||
blk_status_t ret;
|
||||
|
||||
ret = sd_zbc_cmnd_checks(cmd);
|
||||
if (ret != BLK_STS_OK)
|
||||
return ret;
|
||||
|
||||
if (!blk_rq_zone_is_seq(rq))
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
/* Unlock of the write lock will happen in sd_zbc_complete() */
|
||||
if (!blk_req_zone_write_trylock(rq))
|
||||
return BLK_STS_ZONE_RESOURCE;
|
||||
|
||||
spin_lock_bh(&sdkp->zones_wp_offset_lock);
|
||||
wp_offset = sdkp->zones_wp_offset[zno];
|
||||
switch (wp_offset) {
|
||||
case SD_ZBC_INVALID_WP_OFST:
|
||||
/*
|
||||
* We are about to schedule work to update a zone write pointer
|
||||
* offset, which will cause the zone append command to be
|
||||
* requeued. So make sure that the scsi device does not go away
|
||||
* while the work is being processed.
|
||||
*/
|
||||
if (scsi_device_get(sdkp->device)) {
|
||||
ret = BLK_STS_IOERR;
|
||||
break;
|
||||
}
|
||||
sdkp->zones_wp_offset[zno] = SD_ZBC_UPDATING_WP_OFST;
|
||||
schedule_work(&sdkp->zone_wp_offset_work);
|
||||
fallthrough;
|
||||
case SD_ZBC_UPDATING_WP_OFST:
|
||||
ret = BLK_STS_DEV_RESOURCE;
|
||||
break;
|
||||
default:
|
||||
wp_offset = sectors_to_logical(sdkp->device, wp_offset);
|
||||
if (wp_offset + nr_blocks > sdkp->zone_blocks) {
|
||||
ret = BLK_STS_IOERR;
|
||||
break;
|
||||
}
|
||||
|
||||
*lba += wp_offset;
|
||||
}
|
||||
spin_unlock_bh(&sdkp->zones_wp_offset_lock);
|
||||
if (ret)
|
||||
blk_req_zone_write_unlock(rq);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* sd_zbc_setup_zone_mgmt_cmnd - Prepare a zone ZBC_OUT command. The operations
|
||||
* can be RESET WRITE POINTER, OPEN, CLOSE or FINISH.
|
||||
@@ -222,20 +385,14 @@ blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
|
||||
unsigned char op, bool all)
|
||||
{
|
||||
struct request *rq = cmd->request;
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
|
||||
sector_t sector = blk_rq_pos(rq);
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
|
||||
sector_t block = sectors_to_logical(sdkp->device, sector);
|
||||
blk_status_t ret;
|
||||
|
||||
if (!sd_is_zoned(sdkp))
|
||||
/* Not a zoned device */
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
if (sdkp->device->changed)
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
if (sector & (sd_zbc_zone_sectors(sdkp) - 1))
|
||||
/* Unaligned request */
|
||||
return BLK_STS_IOERR;
|
||||
ret = sd_zbc_cmnd_checks(cmd);
|
||||
if (ret != BLK_STS_OK)
|
||||
return ret;
|
||||
|
||||
cmd->cmd_len = 16;
|
||||
memset(cmd->cmnd, 0, cmd->cmd_len);
|
||||
@@ -254,16 +411,105 @@ blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static bool sd_zbc_need_zone_wp_update(struct request *rq)
|
||||
{
|
||||
switch (req_op(rq)) {
|
||||
case REQ_OP_ZONE_APPEND:
|
||||
case REQ_OP_ZONE_FINISH:
|
||||
case REQ_OP_ZONE_RESET:
|
||||
case REQ_OP_ZONE_RESET_ALL:
|
||||
return true;
|
||||
case REQ_OP_WRITE:
|
||||
case REQ_OP_WRITE_ZEROES:
|
||||
case REQ_OP_WRITE_SAME:
|
||||
return blk_rq_zone_is_seq(rq);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* sd_zbc_zone_wp_update - Update cached zone write pointer upon cmd completion
|
||||
* @cmd: Completed command
|
||||
* @good_bytes: Command reply bytes
|
||||
*
|
||||
* Called from sd_zbc_complete() to handle the update of the cached zone write
|
||||
* pointer value in case an update is needed.
|
||||
*/
|
||||
static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd,
|
||||
unsigned int good_bytes)
|
||||
{
|
||||
int result = cmd->result;
|
||||
struct request *rq = cmd->request;
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
|
||||
unsigned int zno = blk_rq_zone_no(rq);
|
||||
enum req_opf op = req_op(rq);
|
||||
|
||||
/*
|
||||
* If we got an error for a command that needs updating the write
|
||||
* pointer offset cache, we must mark the zone wp offset entry as
|
||||
* invalid to force an update from disk the next time a zone append
|
||||
* command is issued.
|
||||
*/
|
||||
spin_lock_bh(&sdkp->zones_wp_offset_lock);
|
||||
|
||||
if (result && op != REQ_OP_ZONE_RESET_ALL) {
|
||||
if (op == REQ_OP_ZONE_APPEND) {
|
||||
/* Force complete completion (no retry) */
|
||||
good_bytes = 0;
|
||||
scsi_set_resid(cmd, blk_rq_bytes(rq));
|
||||
}
|
||||
|
||||
/*
|
||||
* Force an update of the zone write pointer offset on
|
||||
* the next zone append access.
|
||||
*/
|
||||
if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST)
|
||||
sdkp->zones_wp_offset[zno] = SD_ZBC_INVALID_WP_OFST;
|
||||
goto unlock_wp_offset;
|
||||
}
|
||||
|
||||
switch (op) {
|
||||
case REQ_OP_ZONE_APPEND:
|
||||
rq->__sector += sdkp->zones_wp_offset[zno];
|
||||
fallthrough;
|
||||
case REQ_OP_WRITE_ZEROES:
|
||||
case REQ_OP_WRITE_SAME:
|
||||
case REQ_OP_WRITE:
|
||||
if (sdkp->zones_wp_offset[zno] < sd_zbc_zone_sectors(sdkp))
|
||||
sdkp->zones_wp_offset[zno] +=
|
||||
good_bytes >> SECTOR_SHIFT;
|
||||
break;
|
||||
case REQ_OP_ZONE_RESET:
|
||||
sdkp->zones_wp_offset[zno] = 0;
|
||||
break;
|
||||
case REQ_OP_ZONE_FINISH:
|
||||
sdkp->zones_wp_offset[zno] = sd_zbc_zone_sectors(sdkp);
|
||||
break;
|
||||
case REQ_OP_ZONE_RESET_ALL:
|
||||
memset(sdkp->zones_wp_offset, 0,
|
||||
sdkp->nr_zones * sizeof(unsigned int));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
unlock_wp_offset:
|
||||
spin_unlock_bh(&sdkp->zones_wp_offset_lock);
|
||||
|
||||
return good_bytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* sd_zbc_complete - ZBC command post processing.
|
||||
* @cmd: Completed command
|
||||
* @good_bytes: Command reply bytes
|
||||
* @sshdr: command sense header
|
||||
*
|
||||
* Called from sd_done(). Process report zones reply and handle reset zone
|
||||
* and write commands errors.
|
||||
* Called from sd_done() to handle zone commands errors and updates to the
|
||||
* device queue zone write pointer offset cahce.
|
||||
*/
|
||||
void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
|
||||
unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
|
||||
struct scsi_sense_hdr *sshdr)
|
||||
{
|
||||
int result = cmd->result;
|
||||
@@ -279,7 +525,13 @@ void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
|
||||
* so be quiet about the error.
|
||||
*/
|
||||
rq->rq_flags |= RQF_QUIET;
|
||||
}
|
||||
} else if (sd_zbc_need_zone_wp_update(rq))
|
||||
good_bytes = sd_zbc_zone_wp_update(cmd, good_bytes);
|
||||
|
||||
if (req_op(rq) == REQ_OP_ZONE_APPEND)
|
||||
blk_req_zone_write_unlock(rq);
|
||||
|
||||
return good_bytes;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -381,11 +633,67 @@ static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void sd_zbc_revalidate_zones_cb(struct gendisk *disk)
|
||||
{
|
||||
struct scsi_disk *sdkp = scsi_disk(disk);
|
||||
|
||||
swap(sdkp->zones_wp_offset, sdkp->rev_wp_offset);
|
||||
}
|
||||
|
||||
static int sd_zbc_revalidate_zones(struct scsi_disk *sdkp,
|
||||
u32 zone_blocks,
|
||||
unsigned int nr_zones)
|
||||
{
|
||||
struct gendisk *disk = sdkp->disk;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* Make sure revalidate zones are serialized to ensure exclusive
|
||||
* updates of the scsi disk data.
|
||||
*/
|
||||
mutex_lock(&sdkp->rev_mutex);
|
||||
|
||||
/*
|
||||
* Revalidate the disk zones to update the device request queue zone
|
||||
* bitmaps and the zone write pointer offset array. Do this only once
|
||||
* the device capacity is set on the second revalidate execution for
|
||||
* disk scan or if something changed when executing a normal revalidate.
|
||||
*/
|
||||
if (sdkp->first_scan) {
|
||||
sdkp->zone_blocks = zone_blocks;
|
||||
sdkp->nr_zones = nr_zones;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (sdkp->zone_blocks == zone_blocks &&
|
||||
sdkp->nr_zones == nr_zones &&
|
||||
disk->queue->nr_zones == nr_zones)
|
||||
goto unlock;
|
||||
|
||||
sdkp->rev_wp_offset = kvcalloc(nr_zones, sizeof(u32), GFP_NOIO);
|
||||
if (!sdkp->rev_wp_offset) {
|
||||
ret = -ENOMEM;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
ret = blk_revalidate_disk_zones(disk, sd_zbc_revalidate_zones_cb);
|
||||
|
||||
kvfree(sdkp->rev_wp_offset);
|
||||
sdkp->rev_wp_offset = NULL;
|
||||
|
||||
unlock:
|
||||
mutex_unlock(&sdkp->rev_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
|
||||
{
|
||||
struct gendisk *disk = sdkp->disk;
|
||||
struct request_queue *q = disk->queue;
|
||||
unsigned int nr_zones;
|
||||
u32 zone_blocks = 0;
|
||||
u32 max_append;
|
||||
int ret;
|
||||
|
||||
if (!sd_is_zoned(sdkp))
|
||||
@@ -406,35 +714,31 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
|
||||
goto err;
|
||||
|
||||
/* The drive satisfies the kernel restrictions: set it up */
|
||||
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, sdkp->disk->queue);
|
||||
blk_queue_required_elevator_features(sdkp->disk->queue,
|
||||
ELEVATOR_F_ZBD_SEQ_WRITE);
|
||||
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
|
||||
blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
|
||||
nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks);
|
||||
|
||||
/* READ16/WRITE16 is mandatory for ZBC disks */
|
||||
sdkp->device->use_16_for_rw = 1;
|
||||
sdkp->device->use_10_for_rw = 0;
|
||||
|
||||
/*
|
||||
* Revalidate the disk zone bitmaps once the block device capacity is
|
||||
* set on the second revalidate execution during disk scan and if
|
||||
* something changed when executing a normal revalidate.
|
||||
*/
|
||||
if (sdkp->first_scan) {
|
||||
sdkp->zone_blocks = zone_blocks;
|
||||
sdkp->nr_zones = nr_zones;
|
||||
return 0;
|
||||
}
|
||||
ret = sd_zbc_revalidate_zones(sdkp, zone_blocks, nr_zones);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (sdkp->zone_blocks != zone_blocks ||
|
||||
sdkp->nr_zones != nr_zones ||
|
||||
disk->queue->nr_zones != nr_zones) {
|
||||
ret = blk_revalidate_disk_zones(disk);
|
||||
if (ret != 0)
|
||||
goto err;
|
||||
sdkp->zone_blocks = zone_blocks;
|
||||
sdkp->nr_zones = nr_zones;
|
||||
}
|
||||
/*
|
||||
* On the first scan 'chunk_sectors' isn't setup yet, so calling
|
||||
* blk_queue_max_zone_append_sectors() will result in a WARN(). Defer
|
||||
* this setting to the second scan.
|
||||
*/
|
||||
if (sdkp->first_scan)
|
||||
return 0;
|
||||
|
||||
max_append = min_t(u32, logical_to_sectors(sdkp->device, zone_blocks),
|
||||
q->limits.max_segments << (PAGE_SHIFT - 9));
|
||||
max_append = min_t(u32, max_append, queue_max_hw_sectors(q));
|
||||
|
||||
blk_queue_max_zone_append_sectors(q, max_append);
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -460,3 +764,28 @@ void sd_zbc_print_zones(struct scsi_disk *sdkp)
|
||||
sdkp->nr_zones,
|
||||
sdkp->zone_blocks);
|
||||
}
|
||||
|
||||
int sd_zbc_init_disk(struct scsi_disk *sdkp)
|
||||
{
|
||||
if (!sd_is_zoned(sdkp))
|
||||
return 0;
|
||||
|
||||
sdkp->zones_wp_offset = NULL;
|
||||
spin_lock_init(&sdkp->zones_wp_offset_lock);
|
||||
sdkp->rev_wp_offset = NULL;
|
||||
mutex_init(&sdkp->rev_mutex);
|
||||
INIT_WORK(&sdkp->zone_wp_offset_work, sd_zbc_update_wp_offset_workfn);
|
||||
sdkp->zone_wp_update_buf = kzalloc(SD_BUF_SIZE, GFP_KERNEL);
|
||||
if (!sdkp->zone_wp_update_buf)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void sd_zbc_release_disk(struct scsi_disk *sdkp)
|
||||
{
|
||||
kvfree(sdkp->zones_wp_offset);
|
||||
sdkp->zones_wp_offset = NULL;
|
||||
kfree(sdkp->zone_wp_update_buf);
|
||||
sdkp->zone_wp_update_buf = NULL;
|
||||
}
|
||||
|
||||
@@ -794,9 +794,8 @@ static int sr_probe(struct device *dev)
|
||||
set_capacity(disk, cd->capacity);
|
||||
disk->private_data = &cd->driver;
|
||||
disk->queue = sdev->request_queue;
|
||||
cd->cdi.disk = disk;
|
||||
|
||||
if (register_cdrom(&cd->cdi))
|
||||
if (register_cdrom(disk, &cd->cdi))
|
||||
goto fail_put;
|
||||
|
||||
/*
|
||||
|
||||
@@ -255,7 +255,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
|
||||
break;
|
||||
if (!(iocb->ki_flags & IOCB_HIPRI) ||
|
||||
!blk_poll(bdev_get_queue(bdev), qc, true))
|
||||
io_schedule();
|
||||
blk_io_schedule();
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
@@ -449,7 +449,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
|
||||
|
||||
if (!(iocb->ki_flags & IOCB_HIPRI) ||
|
||||
!blk_poll(bdev_get_queue(bdev), qc, true))
|
||||
io_schedule();
|
||||
blk_io_schedule();
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
@@ -671,7 +671,7 @@ int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
|
||||
* i_mutex and doing so causes performance issues with concurrent
|
||||
* O_SYNC writers to a block device.
|
||||
*/
|
||||
error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
|
||||
error = blkdev_issue_flush(bdev, GFP_KERNEL);
|
||||
if (error == -EOPNOTSUPP)
|
||||
error = 0;
|
||||
|
||||
@@ -712,7 +712,6 @@ int bdev_read_page(struct block_device *bdev, sector_t sector,
|
||||
blk_queue_exit(bdev->bd_queue);
|
||||
return result;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bdev_read_page);
|
||||
|
||||
/**
|
||||
* bdev_write_page() - Start writing a page to a block device
|
||||
@@ -757,7 +756,6 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
|
||||
blk_queue_exit(bdev->bd_queue);
|
||||
return result;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bdev_write_page);
|
||||
|
||||
/*
|
||||
* pseudo-fs
|
||||
@@ -881,21 +879,6 @@ static int bdev_set(struct inode *inode, void *data)
|
||||
|
||||
static LIST_HEAD(all_bdevs);
|
||||
|
||||
/*
|
||||
* If there is a bdev inode for this device, unhash it so that it gets evicted
|
||||
* as soon as last inode reference is dropped.
|
||||
*/
|
||||
void bdev_unhash_inode(dev_t dev)
|
||||
{
|
||||
struct inode *inode;
|
||||
|
||||
inode = ilookup5(blockdev_superblock, hash(dev), bdev_test, &dev);
|
||||
if (inode) {
|
||||
remove_inode_hash(inode);
|
||||
iput(inode);
|
||||
}
|
||||
}
|
||||
|
||||
struct block_device *bdget(dev_t dev)
|
||||
{
|
||||
struct block_device *bdev;
|
||||
@@ -1515,7 +1498,7 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate)
|
||||
lockdep_assert_held(&bdev->bd_mutex);
|
||||
|
||||
rescan:
|
||||
ret = blk_drop_partitions(disk, bdev);
|
||||
ret = blk_drop_partitions(bdev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
||||
@@ -505,7 +505,7 @@ static struct bio *dio_await_one(struct dio *dio)
|
||||
spin_unlock_irqrestore(&dio->bio_lock, flags);
|
||||
if (!(dio->iocb->ki_flags & IOCB_HIPRI) ||
|
||||
!blk_poll(dio->bio_disk->queue, dio->bio_cookie, true))
|
||||
io_schedule();
|
||||
blk_io_schedule();
|
||||
/* wake up sets us TASK_RUNNING */
|
||||
spin_lock_irqsave(&dio->bio_lock, flags);
|
||||
dio->waiter = NULL;
|
||||
|
||||
@@ -176,7 +176,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
ret = ext4_fsync_journal(inode, datasync, &needs_barrier);
|
||||
|
||||
if (needs_barrier) {
|
||||
err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
|
||||
err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL);
|
||||
if (!ret)
|
||||
ret = err;
|
||||
}
|
||||
|
||||
@@ -1440,7 +1440,7 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
|
||||
if (ret < 0)
|
||||
goto err_out;
|
||||
if (barrier)
|
||||
blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL);
|
||||
blkdev_issue_flush(sb->s_bdev, GFP_NOFS);
|
||||
|
||||
skip_zeroout:
|
||||
ext4_lock_group(sb, group);
|
||||
|
||||
@@ -5300,7 +5300,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
|
||||
needs_barrier = true;
|
||||
if (needs_barrier) {
|
||||
int err;
|
||||
err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
|
||||
err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL);
|
||||
if (!ret)
|
||||
ret = err;
|
||||
}
|
||||
|
||||
@@ -195,7 +195,7 @@ int fat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
|
||||
return blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -2319,7 +2319,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
|
||||
|
||||
WARN(bdi_cap_writeback_dirty(wb->bdi) &&
|
||||
!test_bit(WB_registered, &wb->state),
|
||||
"bdi-%s not registered\n", wb->bdi->name);
|
||||
"bdi-%s not registered\n", bdi_dev_name(wb->bdi));
|
||||
|
||||
inode->dirtied_when = jiffies;
|
||||
if (dirtytime)
|
||||
|
||||
32
fs/hfs/mdb.c
32
fs/hfs/mdb.c
@@ -32,29 +32,35 @@
|
||||
static int hfs_get_last_session(struct super_block *sb,
|
||||
sector_t *start, sector_t *size)
|
||||
{
|
||||
struct cdrom_multisession ms_info;
|
||||
struct cdrom_tocentry te;
|
||||
int res;
|
||||
struct cdrom_device_info *cdi = disk_to_cdi(sb->s_bdev->bd_disk);
|
||||
|
||||
/* default values */
|
||||
*start = 0;
|
||||
*size = i_size_read(sb->s_bdev->bd_inode) >> 9;
|
||||
|
||||
if (HFS_SB(sb)->session >= 0) {
|
||||
struct cdrom_tocentry te;
|
||||
|
||||
if (!cdi)
|
||||
return -EINVAL;
|
||||
|
||||
te.cdte_track = HFS_SB(sb)->session;
|
||||
te.cdte_format = CDROM_LBA;
|
||||
res = ioctl_by_bdev(sb->s_bdev, CDROMREADTOCENTRY, (unsigned long)&te);
|
||||
if (!res && (te.cdte_ctrl & CDROM_DATA_TRACK) == 4) {
|
||||
*start = (sector_t)te.cdte_addr.lba << 2;
|
||||
return 0;
|
||||
if (cdrom_read_tocentry(cdi, &te) ||
|
||||
(te.cdte_ctrl & CDROM_DATA_TRACK) != 4) {
|
||||
pr_err("invalid session number or type of track\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
pr_err("invalid session number or type of track\n");
|
||||
return -EINVAL;
|
||||
|
||||
*start = (sector_t)te.cdte_addr.lba << 2;
|
||||
} else if (cdi) {
|
||||
struct cdrom_multisession ms_info;
|
||||
|
||||
ms_info.addr_format = CDROM_LBA;
|
||||
if (cdrom_multisession(cdi, &ms_info) == 0 && ms_info.xa_flag)
|
||||
*start = (sector_t)ms_info.addr.lba << 2;
|
||||
}
|
||||
ms_info.addr_format = CDROM_LBA;
|
||||
res = ioctl_by_bdev(sb->s_bdev, CDROMMULTISESSION, (unsigned long)&ms_info);
|
||||
if (!res && ms_info.xa_flag)
|
||||
*start = (sector_t)ms_info.addr.lba << 2;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -340,7 +340,7 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
|
||||
}
|
||||
|
||||
if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags))
|
||||
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
|
||||
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL);
|
||||
|
||||
inode_unlock(inode);
|
||||
|
||||
|
||||
@@ -239,7 +239,7 @@ out:
|
||||
mutex_unlock(&sbi->vh_mutex);
|
||||
|
||||
if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags))
|
||||
blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
|
||||
blkdev_issue_flush(sb->s_bdev, GFP_KERNEL);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -127,31 +127,34 @@ static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd)
|
||||
static int hfsplus_get_last_session(struct super_block *sb,
|
||||
sector_t *start, sector_t *size)
|
||||
{
|
||||
struct cdrom_multisession ms_info;
|
||||
struct cdrom_tocentry te;
|
||||
int res;
|
||||
struct cdrom_device_info *cdi = disk_to_cdi(sb->s_bdev->bd_disk);
|
||||
|
||||
/* default values */
|
||||
*start = 0;
|
||||
*size = i_size_read(sb->s_bdev->bd_inode) >> 9;
|
||||
|
||||
if (HFSPLUS_SB(sb)->session >= 0) {
|
||||
struct cdrom_tocentry te;
|
||||
|
||||
if (!cdi)
|
||||
return -EINVAL;
|
||||
|
||||
te.cdte_track = HFSPLUS_SB(sb)->session;
|
||||
te.cdte_format = CDROM_LBA;
|
||||
res = ioctl_by_bdev(sb->s_bdev,
|
||||
CDROMREADTOCENTRY, (unsigned long)&te);
|
||||
if (!res && (te.cdte_ctrl & CDROM_DATA_TRACK) == 4) {
|
||||
*start = (sector_t)te.cdte_addr.lba << 2;
|
||||
return 0;
|
||||
if (cdrom_read_tocentry(cdi, &te) ||
|
||||
(te.cdte_ctrl & CDROM_DATA_TRACK) != 4) {
|
||||
pr_err("invalid session number or type of track\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
pr_err("invalid session number or type of track\n");
|
||||
return -EINVAL;
|
||||
*start = (sector_t)te.cdte_addr.lba << 2;
|
||||
} else if (cdi) {
|
||||
struct cdrom_multisession ms_info;
|
||||
|
||||
ms_info.addr_format = CDROM_LBA;
|
||||
if (cdrom_multisession(cdi, &ms_info) == 0 && ms_info.xa_flag)
|
||||
*start = (sector_t)ms_info.addr.lba << 2;
|
||||
}
|
||||
ms_info.addr_format = CDROM_LBA;
|
||||
res = ioctl_by_bdev(sb->s_bdev, CDROMMULTISESSION,
|
||||
(unsigned long)&ms_info);
|
||||
if (!res && ms_info.xa_flag)
|
||||
*start = (sector_t)ms_info.addr.lba << 2;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -569,7 +569,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||
!dio->submit.last_queue ||
|
||||
!blk_poll(dio->submit.last_queue,
|
||||
dio->submit.cookie, true))
|
||||
io_schedule();
|
||||
blk_io_schedule();
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
||||
}
|
||||
|
||||
@@ -544,43 +544,41 @@ static int isofs_show_options(struct seq_file *m, struct dentry *root)
|
||||
|
||||
static unsigned int isofs_get_last_session(struct super_block *sb, s32 session)
|
||||
{
|
||||
struct cdrom_multisession ms_info;
|
||||
unsigned int vol_desc_start;
|
||||
struct block_device *bdev = sb->s_bdev;
|
||||
int i;
|
||||
struct cdrom_device_info *cdi = disk_to_cdi(sb->s_bdev->bd_disk);
|
||||
unsigned int vol_desc_start = 0;
|
||||
|
||||
vol_desc_start=0;
|
||||
ms_info.addr_format=CDROM_LBA;
|
||||
if (session > 0) {
|
||||
struct cdrom_tocentry Te;
|
||||
Te.cdte_track=session;
|
||||
Te.cdte_format=CDROM_LBA;
|
||||
i = ioctl_by_bdev(bdev, CDROMREADTOCENTRY, (unsigned long) &Te);
|
||||
if (!i) {
|
||||
struct cdrom_tocentry te;
|
||||
|
||||
if (!cdi)
|
||||
return 0;
|
||||
|
||||
te.cdte_track = session;
|
||||
te.cdte_format = CDROM_LBA;
|
||||
if (cdrom_read_tocentry(cdi, &te) == 0) {
|
||||
printk(KERN_DEBUG "ISOFS: Session %d start %d type %d\n",
|
||||
session, Te.cdte_addr.lba,
|
||||
Te.cdte_ctrl&CDROM_DATA_TRACK);
|
||||
if ((Te.cdte_ctrl&CDROM_DATA_TRACK) == 4)
|
||||
return Te.cdte_addr.lba;
|
||||
session, te.cdte_addr.lba,
|
||||
te.cdte_ctrl & CDROM_DATA_TRACK);
|
||||
if ((te.cdte_ctrl & CDROM_DATA_TRACK) == 4)
|
||||
return te.cdte_addr.lba;
|
||||
}
|
||||
|
||||
printk(KERN_ERR "ISOFS: Invalid session number or type of track\n");
|
||||
}
|
||||
i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long) &ms_info);
|
||||
if (session > 0)
|
||||
printk(KERN_ERR "ISOFS: Invalid session number\n");
|
||||
#if 0
|
||||
printk(KERN_DEBUG "isofs.inode: CDROMMULTISESSION: rc=%d\n",i);
|
||||
if (i==0) {
|
||||
printk(KERN_DEBUG "isofs.inode: XA disk: %s\n",ms_info.xa_flag?"yes":"no");
|
||||
printk(KERN_DEBUG "isofs.inode: vol_desc_start = %d\n", ms_info.addr.lba);
|
||||
}
|
||||
#endif
|
||||
if (i==0)
|
||||
|
||||
if (cdi) {
|
||||
struct cdrom_multisession ms_info;
|
||||
|
||||
ms_info.addr_format = CDROM_LBA;
|
||||
if (cdrom_multisession(cdi, &ms_info) == 0) {
|
||||
#if WE_OBEY_THE_WRITTEN_STANDARDS
|
||||
if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */
|
||||
/* necessary for a valid ms_info.addr */
|
||||
if (ms_info.xa_flag)
|
||||
#endif
|
||||
vol_desc_start=ms_info.addr.lba;
|
||||
vol_desc_start = ms_info.addr.lba;
|
||||
}
|
||||
}
|
||||
|
||||
return vol_desc_start;
|
||||
}
|
||||
|
||||
|
||||
@@ -414,7 +414,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
|
||||
* jbd2_cleanup_journal_tail() doesn't get called all that often.
|
||||
*/
|
||||
if (journal->j_flags & JBD2_BARRIER)
|
||||
blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL);
|
||||
blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS);
|
||||
|
||||
return __jbd2_update_log_tail(journal, first_tid, blocknr);
|
||||
}
|
||||
|
||||
@@ -775,7 +775,7 @@ start_journal_io:
|
||||
if (commit_transaction->t_need_data_flush &&
|
||||
(journal->j_fs_dev != journal->j_dev) &&
|
||||
(journal->j_flags & JBD2_BARRIER))
|
||||
blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL);
|
||||
blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS);
|
||||
|
||||
/* Done it all: now write the commit record asynchronously. */
|
||||
if (jbd2_has_feature_async_commit(journal)) {
|
||||
@@ -882,7 +882,7 @@ start_journal_io:
|
||||
stats.run.rs_blocks_logged++;
|
||||
if (jbd2_has_feature_async_commit(journal) &&
|
||||
journal->j_flags & JBD2_BARRIER) {
|
||||
blkdev_issue_flush(journal->j_dev, GFP_NOFS, NULL);
|
||||
blkdev_issue_flush(journal->j_dev, GFP_NOFS);
|
||||
}
|
||||
|
||||
if (err)
|
||||
|
||||
@@ -286,7 +286,7 @@ int jbd2_journal_recover(journal_t *journal)
|
||||
err = err2;
|
||||
/* Make sure all replayed data is on permanent storage */
|
||||
if (journal->j_flags & JBD2_BARRIER) {
|
||||
err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
|
||||
err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL);
|
||||
if (!err)
|
||||
err = err2;
|
||||
}
|
||||
|
||||
@@ -1115,7 +1115,7 @@ int generic_file_fsync(struct file *file, loff_t start, loff_t end,
|
||||
err = __generic_file_fsync(file, start, end, datasync);
|
||||
if (err)
|
||||
return err;
|
||||
return blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
|
||||
return blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL);
|
||||
}
|
||||
EXPORT_SYMBOL(generic_file_fsync);
|
||||
|
||||
|
||||
@@ -375,7 +375,7 @@ static inline int nilfs_flush_device(struct the_nilfs *nilfs)
|
||||
*/
|
||||
smp_wmb();
|
||||
|
||||
err = blkdev_issue_flush(nilfs->ns_bdev, GFP_KERNEL, NULL);
|
||||
err = blkdev_issue_flush(nilfs->ns_bdev, GFP_KERNEL);
|
||||
if (err != -EIO)
|
||||
err = 0;
|
||||
return err;
|
||||
|
||||
@@ -194,7 +194,7 @@ static int ocfs2_sync_file(struct file *file, loff_t start, loff_t end,
|
||||
needs_barrier = true;
|
||||
err = jbd2_complete_transaction(journal, commit_tid);
|
||||
if (needs_barrier) {
|
||||
ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
|
||||
ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL);
|
||||
if (!err)
|
||||
err = ret;
|
||||
}
|
||||
|
||||
@@ -159,7 +159,7 @@ static int reiserfs_sync_file(struct file *filp, loff_t start, loff_t end,
|
||||
barrier_done = reiserfs_commit_for_inode(inode);
|
||||
reiserfs_write_unlock(inode->i_sb);
|
||||
if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb))
|
||||
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
|
||||
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL);
|
||||
inode_unlock(inode);
|
||||
if (barrier_done < 0)
|
||||
return barrier_done;
|
||||
|
||||
@@ -1598,12 +1598,10 @@ int super_setup_bdi_name(struct super_block *sb, char *fmt, ...)
|
||||
int err;
|
||||
va_list args;
|
||||
|
||||
bdi = bdi_alloc(GFP_KERNEL);
|
||||
bdi = bdi_alloc(NUMA_NO_NODE);
|
||||
if (!bdi)
|
||||
return -ENOMEM;
|
||||
|
||||
bdi->name = sb->s_type->name;
|
||||
|
||||
va_start(args, fmt);
|
||||
err = bdi_register_va(bdi, fmt, args);
|
||||
va_end(args);
|
||||
|
||||
@@ -27,41 +27,38 @@
|
||||
|
||||
unsigned int udf_get_last_session(struct super_block *sb)
|
||||
{
|
||||
struct cdrom_device_info *cdi = disk_to_cdi(sb->s_bdev->bd_disk);
|
||||
struct cdrom_multisession ms_info;
|
||||
unsigned int vol_desc_start;
|
||||
struct block_device *bdev = sb->s_bdev;
|
||||
int i;
|
||||
|
||||
vol_desc_start = 0;
|
||||
if (!cdi) {
|
||||
udf_debug("CDROMMULTISESSION not supported.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
ms_info.addr_format = CDROM_LBA;
|
||||
i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long)&ms_info);
|
||||
|
||||
if (i == 0) {
|
||||
if (cdrom_multisession(cdi, &ms_info) == 0) {
|
||||
udf_debug("XA disk: %s, vol_desc_start=%d\n",
|
||||
ms_info.xa_flag ? "yes" : "no", ms_info.addr.lba);
|
||||
if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */
|
||||
vol_desc_start = ms_info.addr.lba;
|
||||
} else {
|
||||
udf_debug("CDROMMULTISESSION not supported: rc=%d\n", i);
|
||||
return ms_info.addr.lba;
|
||||
}
|
||||
return vol_desc_start;
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned long udf_get_last_block(struct super_block *sb)
|
||||
{
|
||||
struct block_device *bdev = sb->s_bdev;
|
||||
struct cdrom_device_info *cdi = disk_to_cdi(bdev->bd_disk);
|
||||
unsigned long lblock = 0;
|
||||
|
||||
/*
|
||||
* ioctl failed or returned obviously bogus value?
|
||||
* The cdrom layer call failed or returned obviously bogus value?
|
||||
* Try using the device size...
|
||||
*/
|
||||
if (ioctl_by_bdev(bdev, CDROM_LAST_WRITTEN, (unsigned long) &lblock) ||
|
||||
lblock == 0)
|
||||
if (!cdi || cdrom_get_last_written(cdi, &lblock) || lblock == 0)
|
||||
lblock = i_size_read(bdev->bd_inode) >> sb->s_blocksize_bits;
|
||||
|
||||
if (lblock)
|
||||
return lblock - 1;
|
||||
else
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -305,7 +305,7 @@ void
|
||||
xfs_blkdev_issue_flush(
|
||||
xfs_buftarg_t *buftarg)
|
||||
{
|
||||
blkdev_issue_flush(buftarg->bt_bdev, GFP_NOFS, NULL);
|
||||
blkdev_issue_flush(buftarg->bt_bdev, GFP_NOFS);
|
||||
}
|
||||
|
||||
STATIC void
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
#include <linux/mman.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/crc32.h>
|
||||
#include <linux/task_io_accounting_ops.h>
|
||||
|
||||
#include "zonefs.h"
|
||||
|
||||
@@ -477,7 +478,7 @@ static int zonefs_file_fsync(struct file *file, loff_t start, loff_t end,
|
||||
if (ZONEFS_I(inode)->i_ztype == ZONEFS_ZTYPE_CNV)
|
||||
ret = file_write_and_wait_range(file, start, end);
|
||||
if (!ret)
|
||||
ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
|
||||
ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL);
|
||||
|
||||
if (ret)
|
||||
zonefs_io_error(inode, true);
|
||||
@@ -595,6 +596,61 @@ static const struct iomap_dio_ops zonefs_write_dio_ops = {
|
||||
.end_io = zonefs_file_write_dio_end_io,
|
||||
};
|
||||
|
||||
static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
|
||||
{
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
struct zonefs_inode_info *zi = ZONEFS_I(inode);
|
||||
struct block_device *bdev = inode->i_sb->s_bdev;
|
||||
unsigned int max;
|
||||
struct bio *bio;
|
||||
ssize_t size;
|
||||
int nr_pages;
|
||||
ssize_t ret;
|
||||
|
||||
nr_pages = iov_iter_npages(from, BIO_MAX_PAGES);
|
||||
if (!nr_pages)
|
||||
return 0;
|
||||
|
||||
max = queue_max_zone_append_sectors(bdev_get_queue(bdev));
|
||||
max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize);
|
||||
iov_iter_truncate(from, max);
|
||||
|
||||
bio = bio_alloc_bioset(GFP_NOFS, nr_pages, &fs_bio_set);
|
||||
if (!bio)
|
||||
return -ENOMEM;
|
||||
|
||||
bio_set_dev(bio, bdev);
|
||||
bio->bi_iter.bi_sector = zi->i_zsector;
|
||||
bio->bi_write_hint = iocb->ki_hint;
|
||||
bio->bi_ioprio = iocb->ki_ioprio;
|
||||
bio->bi_opf = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE;
|
||||
if (iocb->ki_flags & IOCB_DSYNC)
|
||||
bio->bi_opf |= REQ_FUA;
|
||||
|
||||
ret = bio_iov_iter_get_pages(bio, from);
|
||||
if (unlikely(ret)) {
|
||||
bio_io_error(bio);
|
||||
return ret;
|
||||
}
|
||||
size = bio->bi_iter.bi_size;
|
||||
task_io_account_write(ret);
|
||||
|
||||
if (iocb->ki_flags & IOCB_HIPRI)
|
||||
bio_set_polled(bio, iocb);
|
||||
|
||||
ret = submit_bio_wait(bio);
|
||||
|
||||
bio_put(bio);
|
||||
|
||||
zonefs_file_write_dio_end_io(iocb, size, ret, 0);
|
||||
if (ret >= 0) {
|
||||
iocb->ki_pos += size;
|
||||
return size;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle direct writes. For sequential zone files, this is the only possible
|
||||
* write path. For these files, check that the user is issuing writes
|
||||
@@ -610,6 +666,8 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
struct zonefs_inode_info *zi = ZONEFS_I(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
bool sync = is_sync_kiocb(iocb);
|
||||
bool append = false;
|
||||
size_t count;
|
||||
ssize_t ret;
|
||||
|
||||
@@ -618,7 +676,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
|
||||
* as this can cause write reordering (e.g. the first aio gets EAGAIN
|
||||
* on the inode lock but the second goes through but is now unaligned).
|
||||
*/
|
||||
if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !is_sync_kiocb(iocb) &&
|
||||
if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !sync &&
|
||||
(iocb->ki_flags & IOCB_NOWAIT))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
@@ -642,16 +700,22 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
|
||||
}
|
||||
|
||||
/* Enforce sequential writes (append only) in sequential zones */
|
||||
mutex_lock(&zi->i_truncate_mutex);
|
||||
if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && iocb->ki_pos != zi->i_wpoffset) {
|
||||
if (zi->i_ztype == ZONEFS_ZTYPE_SEQ) {
|
||||
mutex_lock(&zi->i_truncate_mutex);
|
||||
if (iocb->ki_pos != zi->i_wpoffset) {
|
||||
mutex_unlock(&zi->i_truncate_mutex);
|
||||
ret = -EINVAL;
|
||||
goto inode_unlock;
|
||||
}
|
||||
mutex_unlock(&zi->i_truncate_mutex);
|
||||
ret = -EINVAL;
|
||||
goto inode_unlock;
|
||||
append = sync;
|
||||
}
|
||||
mutex_unlock(&zi->i_truncate_mutex);
|
||||
|
||||
ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops,
|
||||
&zonefs_write_dio_ops, is_sync_kiocb(iocb));
|
||||
if (append)
|
||||
ret = zonefs_file_dio_append(iocb, from);
|
||||
else
|
||||
ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops,
|
||||
&zonefs_write_dio_ops, sync);
|
||||
if (zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
|
||||
(ret > 0 || ret == -EIOCBQUEUED)) {
|
||||
if (ret > 0)
|
||||
|
||||
@@ -193,8 +193,6 @@ struct backing_dev_info {
|
||||
congested_fn *congested_fn; /* Function pointer if device is md/dm */
|
||||
void *congested_data; /* Pointer to aux data for congested func */
|
||||
|
||||
const char *name;
|
||||
|
||||
struct kref refcnt; /* Reference counter for the structure */
|
||||
unsigned int capabilities; /* Device capabilities */
|
||||
unsigned int min_ratio;
|
||||
|
||||
@@ -33,14 +33,10 @@ int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...);
|
||||
__printf(2, 0)
|
||||
int bdi_register_va(struct backing_dev_info *bdi, const char *fmt,
|
||||
va_list args);
|
||||
int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner);
|
||||
void bdi_set_owner(struct backing_dev_info *bdi, struct device *owner);
|
||||
void bdi_unregister(struct backing_dev_info *bdi);
|
||||
|
||||
struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id);
|
||||
static inline struct backing_dev_info *bdi_alloc(gfp_t gfp_mask)
|
||||
{
|
||||
return bdi_alloc_node(gfp_mask, NUMA_NO_NODE);
|
||||
}
|
||||
struct backing_dev_info *bdi_alloc(int node_id);
|
||||
|
||||
void wb_start_background_writeback(struct bdi_writeback *wb);
|
||||
void wb_workfn(struct work_struct *work);
|
||||
|
||||
@@ -70,7 +70,7 @@ static inline bool bio_has_data(struct bio *bio)
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool bio_no_advance_iter(struct bio *bio)
|
||||
static inline bool bio_no_advance_iter(const struct bio *bio)
|
||||
{
|
||||
return bio_op(bio) == REQ_OP_DISCARD ||
|
||||
bio_op(bio) == REQ_OP_SECURE_ERASE ||
|
||||
@@ -138,8 +138,8 @@ static inline bool bio_next_segment(const struct bio *bio,
|
||||
#define bio_for_each_segment_all(bvl, bio, iter) \
|
||||
for (bvl = bvec_init_iter_all(&iter); bio_next_segment((bio), &iter); )
|
||||
|
||||
static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
|
||||
unsigned bytes)
|
||||
static inline void bio_advance_iter(const struct bio *bio,
|
||||
struct bvec_iter *iter, unsigned int bytes)
|
||||
{
|
||||
iter->bi_sector += bytes >> 9;
|
||||
|
||||
@@ -417,6 +417,7 @@ static inline void bio_io_error(struct bio *bio)
|
||||
|
||||
static inline void bio_wouldblock_error(struct bio *bio)
|
||||
{
|
||||
bio_set_flag(bio, BIO_QUIET);
|
||||
bio->bi_status = BLK_STS_AGAIN;
|
||||
bio_endio(bio);
|
||||
}
|
||||
@@ -444,12 +445,6 @@ void bio_release_pages(struct bio *bio, bool mark_dirty);
|
||||
extern void bio_set_pages_dirty(struct bio *bio);
|
||||
extern void bio_check_pages_dirty(struct bio *bio);
|
||||
|
||||
void generic_start_io_acct(struct request_queue *q, int op,
|
||||
unsigned long sectors, struct hd_struct *part);
|
||||
void generic_end_io_acct(struct request_queue *q, int op,
|
||||
struct hd_struct *part,
|
||||
unsigned long start_time);
|
||||
|
||||
extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
|
||||
struct bio *src, struct bvec_iter *src_iter);
|
||||
extern void bio_copy_data(struct bio *dst, struct bio *src);
|
||||
|
||||
@@ -607,12 +607,14 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
|
||||
u64_stats_update_begin(&bis->sync);
|
||||
|
||||
/*
|
||||
* If the bio is flagged with BIO_QUEUE_ENTERED it means this
|
||||
* is a split bio and we would have already accounted for the
|
||||
* size of the bio.
|
||||
* If the bio is flagged with BIO_CGROUP_ACCT it means this is a
|
||||
* split bio and we would have already accounted for the size of
|
||||
* the bio.
|
||||
*/
|
||||
if (!bio_flagged(bio, BIO_QUEUE_ENTERED))
|
||||
if (!bio_flagged(bio, BIO_CGROUP_ACCT)) {
|
||||
bio_set_flag(bio, BIO_CGROUP_ACCT);
|
||||
bis->cur.bytes[rwd] += bio->bi_iter.bi_size;
|
||||
}
|
||||
bis->cur.ios[rwd]++;
|
||||
|
||||
u64_stats_update_end(&bis->sync);
|
||||
@@ -629,6 +631,8 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
|
||||
|
||||
static inline void blkcg_use_delay(struct blkcg_gq *blkg)
|
||||
{
|
||||
if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0))
|
||||
return;
|
||||
if (atomic_add_return(1, &blkg->use_delay) == 1)
|
||||
atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
|
||||
}
|
||||
@@ -637,6 +641,8 @@ static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
|
||||
{
|
||||
int old = atomic_read(&blkg->use_delay);
|
||||
|
||||
if (WARN_ON_ONCE(old < 0))
|
||||
return 0;
|
||||
if (old == 0)
|
||||
return 0;
|
||||
|
||||
@@ -661,20 +667,39 @@ static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* blkcg_set_delay - Enable allocator delay mechanism with the specified delay amount
|
||||
* @blkg: target blkg
|
||||
* @delay: delay duration in nsecs
|
||||
*
|
||||
* When enabled with this function, the delay is not decayed and must be
|
||||
* explicitly cleared with blkcg_clear_delay(). Must not be mixed with
|
||||
* blkcg_[un]use_delay() and blkcg_add_delay() usages.
|
||||
*/
|
||||
static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay)
|
||||
{
|
||||
int old = atomic_read(&blkg->use_delay);
|
||||
|
||||
/* We only want 1 person setting the congestion count for this blkg. */
|
||||
if (!old && atomic_cmpxchg(&blkg->use_delay, old, -1) == old)
|
||||
atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
|
||||
|
||||
atomic64_set(&blkg->delay_nsec, delay);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkcg_clear_delay - Disable allocator delay mechanism
|
||||
* @blkg: target blkg
|
||||
*
|
||||
* Disable use_delay mechanism. See blkcg_set_delay().
|
||||
*/
|
||||
static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
|
||||
{
|
||||
int old = atomic_read(&blkg->use_delay);
|
||||
if (!old)
|
||||
return;
|
||||
|
||||
/* We only want 1 person clearing the congestion count for this blkg. */
|
||||
while (old) {
|
||||
int cur = atomic_cmpxchg(&blkg->use_delay, old, 0);
|
||||
if (cur == old) {
|
||||
atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
|
||||
break;
|
||||
}
|
||||
old = cur;
|
||||
}
|
||||
if (old && atomic_cmpxchg(&blkg->use_delay, old, 0) == old)
|
||||
atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
|
||||
}
|
||||
|
||||
void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
|
||||
|
||||
@@ -121,6 +121,7 @@ static inline bool bio_has_crypt_ctx(struct bio *bio)
|
||||
|
||||
static inline void bio_clone_skip_dm_default_key(struct bio *dst,
|
||||
const struct bio *src);
|
||||
|
||||
void __bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask);
|
||||
static inline void bio_crypt_clone(struct bio *dst, struct bio *src,
|
||||
gfp_t gfp_mask)
|
||||
|
||||
@@ -140,6 +140,8 @@ struct blk_mq_hw_ctx {
|
||||
*/
|
||||
atomic_t nr_active;
|
||||
|
||||
/** @cpuhp_online: List to store request if CPU is going to die */
|
||||
struct hlist_node cpuhp_online;
|
||||
/** @cpuhp_dead: List to store request if some CPU die. */
|
||||
struct hlist_node cpuhp_dead;
|
||||
/** @kobj: Kernel object for sysfs. */
|
||||
@@ -391,6 +393,11 @@ struct blk_mq_ops {
|
||||
enum {
|
||||
BLK_MQ_F_SHOULD_MERGE = 1 << 0,
|
||||
BLK_MQ_F_TAG_SHARED = 1 << 1,
|
||||
/*
|
||||
* Set when this device requires underlying blk-mq device for
|
||||
* completing IO:
|
||||
*/
|
||||
BLK_MQ_F_STACKING = 1 << 2,
|
||||
BLK_MQ_F_BLOCKING = 1 << 5,
|
||||
BLK_MQ_F_NO_SCHED = 1 << 6,
|
||||
BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
|
||||
@@ -400,6 +407,9 @@ enum {
|
||||
BLK_MQ_S_TAG_ACTIVE = 1,
|
||||
BLK_MQ_S_SCHED_RESTART = 2,
|
||||
|
||||
/* hw queue is inactive after all its CPUs become offline */
|
||||
BLK_MQ_S_INACTIVE = 3,
|
||||
|
||||
BLK_MQ_MAX_DEPTH = 10240,
|
||||
|
||||
BLK_MQ_CPU_WORK_BATCH = 8,
|
||||
@@ -494,6 +504,7 @@ void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
|
||||
void blk_mq_kick_requeue_list(struct request_queue *q);
|
||||
void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
|
||||
bool blk_mq_complete_request(struct request *rq);
|
||||
void blk_mq_force_complete_rq(struct request *rq);
|
||||
bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list,
|
||||
struct bio *bio, unsigned int nr_segs);
|
||||
bool blk_mq_queue_stopped(struct request_queue *q);
|
||||
@@ -508,6 +519,7 @@ void blk_mq_unquiesce_queue(struct request_queue *q);
|
||||
void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
|
||||
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
|
||||
void blk_mq_run_hw_queues(struct request_queue *q, bool async);
|
||||
void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs);
|
||||
void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
|
||||
busy_tag_iter_fn *fn, void *priv);
|
||||
void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset);
|
||||
@@ -577,4 +589,6 @@ static inline void blk_mq_cleanup_rq(struct request *rq)
|
||||
rq->q->mq_ops->cleanup_rq(rq);
|
||||
}
|
||||
|
||||
blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -64,6 +64,18 @@ typedef u8 __bitwise blk_status_t;
|
||||
*/
|
||||
#define BLK_STS_DEV_RESOURCE ((__force blk_status_t)13)
|
||||
|
||||
/*
|
||||
* BLK_STS_ZONE_RESOURCE is returned from the driver to the block layer if zone
|
||||
* related resources are unavailable, but the driver can guarantee the queue
|
||||
* will be rerun in the future once the resources become available again.
|
||||
*
|
||||
* This is different from BLK_STS_DEV_RESOURCE in that it explicitly references
|
||||
* a zone specific resource and IO to a different zone on the same device could
|
||||
* still be served. Examples of that are zones that are write-locked, but a read
|
||||
* to the same zone could be served.
|
||||
*/
|
||||
#define BLK_STS_ZONE_RESOURCE ((__force blk_status_t)14)
|
||||
|
||||
/**
|
||||
* blk_path_error - returns true if error may be path related
|
||||
* @error: status the request was completed with
|
||||
@@ -229,7 +241,7 @@ enum {
|
||||
* throttling rules. Don't do it again. */
|
||||
BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion
|
||||
* of this bio. */
|
||||
BIO_QUEUE_ENTERED, /* can use blk_queue_enter_live() */
|
||||
BIO_CGROUP_ACCT, /* has been accounted to a cgroup */
|
||||
BIO_TRACKED, /* set if bio goes through the rq_qos path */
|
||||
BIO_FLAG_LAST
|
||||
};
|
||||
@@ -305,6 +317,8 @@ enum req_opf {
|
||||
REQ_OP_ZONE_CLOSE = 11,
|
||||
/* Transition a zone to full */
|
||||
REQ_OP_ZONE_FINISH = 12,
|
||||
/* write data at the current zone write pointer */
|
||||
REQ_OP_ZONE_APPEND = 13,
|
||||
|
||||
/* SCSI passthrough using struct scsi_request */
|
||||
REQ_OP_SCSI_IN = 32,
|
||||
@@ -332,7 +346,6 @@ enum req_flag_bits {
|
||||
__REQ_RAHEAD, /* read ahead, can fail anytime */
|
||||
__REQ_BACKGROUND, /* background IO */
|
||||
__REQ_NOWAIT, /* Don't wait if request will block */
|
||||
__REQ_NOWAIT_INLINE, /* Return would-block error inline */
|
||||
/*
|
||||
* When a shared kthread needs to issue a bio for a cgroup, doing
|
||||
* so synchronously can lead to priority inversions as the kthread
|
||||
@@ -367,7 +380,6 @@ enum req_flag_bits {
|
||||
#define REQ_RAHEAD (1ULL << __REQ_RAHEAD)
|
||||
#define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND)
|
||||
#define REQ_NOWAIT (1ULL << __REQ_NOWAIT)
|
||||
#define REQ_NOWAIT_INLINE (1ULL << __REQ_NOWAIT_INLINE)
|
||||
#define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT)
|
||||
|
||||
#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP)
|
||||
|
||||
@@ -83,8 +83,6 @@ typedef __u32 __bitwise req_flags_t;
|
||||
/* set for "ide_preempt" requests and also for requests for which the SCSI
|
||||
"quiesce" state must be ignored. */
|
||||
#define RQF_PREEMPT ((__force req_flags_t)(1 << 8))
|
||||
/* contains copies of user pages */
|
||||
#define RQF_COPY_USER ((__force req_flags_t)(1 << 9))
|
||||
/* vaguely specified driver internal error. Ignored by the block layer */
|
||||
#define RQF_FAILED ((__force req_flags_t)(1 << 10))
|
||||
/* don't warn about errors */
|
||||
@@ -232,8 +230,6 @@ struct request {
|
||||
unsigned short write_hint;
|
||||
unsigned short ioprio;
|
||||
|
||||
unsigned int extra_len; /* length of alignment and padding */
|
||||
|
||||
enum mq_rq_state state;
|
||||
refcount_t ref;
|
||||
|
||||
@@ -296,7 +292,6 @@ struct blk_queue_ctx;
|
||||
typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio);
|
||||
|
||||
struct bio_vec;
|
||||
typedef int (dma_drain_needed_fn)(struct request *);
|
||||
|
||||
enum blk_eh_timer_return {
|
||||
BLK_EH_DONE, /* drivers has completed the command */
|
||||
@@ -342,6 +337,7 @@ struct queue_limits {
|
||||
unsigned int max_hw_discard_sectors;
|
||||
unsigned int max_write_same_sectors;
|
||||
unsigned int max_write_zeroes_sectors;
|
||||
unsigned int max_zone_append_sectors;
|
||||
unsigned int discard_granularity;
|
||||
unsigned int discard_alignment;
|
||||
|
||||
@@ -367,7 +363,8 @@ unsigned int blkdev_nr_zones(struct gendisk *disk);
|
||||
extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
|
||||
sector_t sectors, sector_t nr_sectors,
|
||||
gfp_t gfp_mask);
|
||||
extern int blk_revalidate_disk_zones(struct gendisk *disk);
|
||||
int blk_revalidate_disk_zones(struct gendisk *disk,
|
||||
void (*update_driver_data)(struct gendisk *disk));
|
||||
|
||||
extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
unsigned int cmd, unsigned long arg);
|
||||
@@ -405,7 +402,6 @@ struct request_queue {
|
||||
struct rq_qos *rq_qos;
|
||||
|
||||
make_request_fn *make_request_fn;
|
||||
dma_drain_needed_fn *dma_drain_needed;
|
||||
|
||||
const struct blk_mq_ops *mq_ops;
|
||||
|
||||
@@ -475,8 +471,6 @@ struct request_queue {
|
||||
*/
|
||||
unsigned long nr_requests; /* Max # of requests */
|
||||
|
||||
unsigned int dma_drain_size;
|
||||
void *dma_drain_buffer;
|
||||
unsigned int dma_pad_mask;
|
||||
unsigned int dma_alignment;
|
||||
|
||||
@@ -740,6 +734,16 @@ static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline bool blk_queue_zone_is_seq(struct request_queue *q,
|
||||
sector_t sector)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline unsigned int blk_queue_zone_no(struct request_queue *q,
|
||||
sector_t sector)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_BLK_DEV_ZONED */
|
||||
|
||||
static inline bool rq_is_sync(struct request *rq)
|
||||
@@ -758,6 +762,9 @@ static inline bool rq_mergeable(struct request *rq)
|
||||
if (req_op(rq) == REQ_OP_WRITE_ZEROES)
|
||||
return false;
|
||||
|
||||
if (req_op(rq) == REQ_OP_ZONE_APPEND)
|
||||
return false;
|
||||
|
||||
if (rq->cmd_flags & REQ_NOMERGE_FLAGS)
|
||||
return false;
|
||||
if (rq->rq_flags & RQF_NOMERGE_FLAGS)
|
||||
@@ -1092,6 +1099,8 @@ extern void blk_queue_max_write_same_sectors(struct request_queue *q,
|
||||
extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
|
||||
unsigned int max_write_same_sectors);
|
||||
extern void blk_queue_logical_block_size(struct request_queue *, unsigned int);
|
||||
extern void blk_queue_max_zone_append_sectors(struct request_queue *q,
|
||||
unsigned int max_zone_append_sectors);
|
||||
extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
|
||||
extern void blk_queue_alignment_offset(struct request_queue *q,
|
||||
unsigned int alignment);
|
||||
@@ -1110,9 +1119,6 @@ extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
|
||||
sector_t offset);
|
||||
extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
|
||||
extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
|
||||
extern int blk_queue_dma_drain(struct request_queue *q,
|
||||
dma_drain_needed_fn *dma_drain_needed,
|
||||
void *buf, unsigned int size);
|
||||
extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
|
||||
extern void blk_queue_virt_boundary(struct request_queue *, unsigned long);
|
||||
extern void blk_queue_dma_alignment(struct request_queue *, int);
|
||||
@@ -1149,7 +1155,15 @@ static inline unsigned short blk_rq_nr_discard_segments(struct request *rq)
|
||||
return max_t(unsigned short, rq->nr_phys_segments, 1);
|
||||
}
|
||||
|
||||
extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
|
||||
int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
|
||||
struct scatterlist *sglist, struct scatterlist **last_sg);
|
||||
static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq,
|
||||
struct scatterlist *sglist)
|
||||
{
|
||||
struct scatterlist *last_sg = NULL;
|
||||
|
||||
return __blk_rq_map_sg(q, rq, sglist, &last_sg);
|
||||
}
|
||||
extern void blk_dump_rq_flags(struct request *, char *);
|
||||
extern long nr_blockdev_pages(void);
|
||||
|
||||
@@ -1217,7 +1231,9 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
|
||||
!list_empty(&plug->cb_list));
|
||||
}
|
||||
|
||||
extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *);
|
||||
extern void blk_io_schedule(void);
|
||||
|
||||
int blkdev_issue_flush(struct block_device *, gfp_t);
|
||||
extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
|
||||
sector_t nr_sects, gfp_t gfp_mask, struct page *page);
|
||||
|
||||
@@ -1304,6 +1320,11 @@ static inline unsigned int queue_max_segment_size(const struct request_queue *q)
|
||||
return q->limits.max_segment_size;
|
||||
}
|
||||
|
||||
static inline unsigned int queue_max_zone_append_sectors(const struct request_queue *q)
|
||||
{
|
||||
return q->limits.max_zone_append_sectors;
|
||||
}
|
||||
|
||||
static inline unsigned queue_logical_block_size(const struct request_queue *q)
|
||||
{
|
||||
int retval = 512;
|
||||
@@ -1760,6 +1781,7 @@ extern int bdev_write_page(struct block_device *, sector_t, struct page *,
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
bool blk_req_needs_zone_write_lock(struct request *rq);
|
||||
bool blk_req_zone_write_trylock(struct request *rq);
|
||||
void __blk_req_zone_write_lock(struct request *rq);
|
||||
void __blk_req_zone_write_unlock(struct request *rq);
|
||||
|
||||
@@ -1850,8 +1872,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
|
||||
sector_t *error_sector)
|
||||
static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@@ -1871,4 +1892,32 @@ static inline void blk_wake_io_task(struct task_struct *waiter)
|
||||
wake_up_process(waiter);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLOCK
|
||||
unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
|
||||
unsigned int op);
|
||||
void disk_end_io_acct(struct gendisk *disk, unsigned int op,
|
||||
unsigned long start_time);
|
||||
|
||||
/**
|
||||
* bio_start_io_acct - start I/O accounting for bio based drivers
|
||||
* @bio: bio to start account for
|
||||
*
|
||||
* Returns the start time that should be passed back to bio_end_io_acct().
|
||||
*/
|
||||
static inline unsigned long bio_start_io_acct(struct bio *bio)
|
||||
{
|
||||
return disk_start_io_acct(bio->bi_disk, bio_sectors(bio), bio_op(bio));
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_end_io_acct - end I/O accounting for bio based drivers
|
||||
* @bio: bio to end account for
|
||||
* @start: start time returned by bio_start_io_acct()
|
||||
*/
|
||||
static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time)
|
||||
{
|
||||
return disk_end_io_acct(bio->bi_disk, bio_op(bio), start_time);
|
||||
}
|
||||
#endif /* CONFIG_BLOCK */
|
||||
|
||||
#endif
|
||||
|
||||
@@ -12,8 +12,17 @@
|
||||
#include <linux/errno.h>
|
||||
#include <linux/mm.h>
|
||||
|
||||
/*
|
||||
* was unsigned short, but we might as well be ready for > 64kB I/O pages
|
||||
/**
|
||||
* struct bio_vec - a contiguous range of physical memory addresses
|
||||
* @bv_page: First page associated with the address range.
|
||||
* @bv_len: Number of bytes in the address range.
|
||||
* @bv_offset: Start of the address range relative to the start of @bv_page.
|
||||
*
|
||||
* The following holds for a bvec if n * PAGE_SIZE < bv_offset + bv_len:
|
||||
*
|
||||
* nth_page(@bv_page, n) == @bv_page + n
|
||||
*
|
||||
* This holds because page_is_mergeable() checks the above property.
|
||||
*/
|
||||
struct bio_vec {
|
||||
struct page *bv_page;
|
||||
|
||||
@@ -94,6 +94,11 @@ struct cdrom_device_ops {
|
||||
struct packet_command *);
|
||||
};
|
||||
|
||||
int cdrom_multisession(struct cdrom_device_info *cdi,
|
||||
struct cdrom_multisession *info);
|
||||
int cdrom_read_tocentry(struct cdrom_device_info *cdi,
|
||||
struct cdrom_tocentry *entry);
|
||||
|
||||
/* the general block_device operations structure: */
|
||||
extern int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev,
|
||||
fmode_t mode);
|
||||
@@ -104,7 +109,7 @@ extern unsigned int cdrom_check_events(struct cdrom_device_info *cdi,
|
||||
unsigned int clearing);
|
||||
extern int cdrom_media_changed(struct cdrom_device_info *);
|
||||
|
||||
extern int register_cdrom(struct cdrom_device_info *cdi);
|
||||
extern int register_cdrom(struct gendisk *disk, struct cdrom_device_info *cdi);
|
||||
extern void unregister_cdrom(struct cdrom_device_info *cdi);
|
||||
|
||||
typedef struct {
|
||||
|
||||
@@ -152,6 +152,7 @@ enum cpuhp_state {
|
||||
CPUHP_AP_SMPBOOT_THREADS,
|
||||
CPUHP_AP_X86_VDSO_VMA_ONLINE,
|
||||
CPUHP_AP_IRQ_AFFINITY_ONLINE,
|
||||
CPUHP_AP_BLK_MQ_ONLINE,
|
||||
CPUHP_AP_ARM_MVEBU_SYNC_CLOCKS,
|
||||
CPUHP_AP_X86_INTEL_EPB_ONLINE,
|
||||
CPUHP_AP_PERF_ONLINE,
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user