From 1e0dcca9e1aa3caa1a0dc4300db1a091078fe40b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 9 Jan 2021 11:42:49 +0100 Subject: [PATCH 1/8] dm: use bdev_read_only to check if a device is read-only dm-thin and dm-cache also work on partitions, so use the proper interface to check if the device is read-only. Signed-off-by: Christoph Hellwig Reviewed-by: Ming Lei Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/md/dm-cache-metadata.c | 2 +- drivers/md/dm-thin-metadata.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index af6d4f898e4c..89a73204dbf4 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -449,7 +449,7 @@ static int __check_incompat_features(struct cache_disk_superblock *disk_super, /* * Check for read-only metadata to skip the following RDWR checks. */ - if (get_disk_ro(cmd->bdev->bd_disk)) + if (bdev_read_only(cmd->bdev)) return 0; features = le32_to_cpu(disk_super->compat_ro_flags) & ~DM_CACHE_FEATURE_COMPAT_RO_SUPP; diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 6ebb2127f3e2..e75b20480e46 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -636,7 +636,7 @@ static int __check_incompat_features(struct thin_disk_superblock *disk_super, /* * Check for read-only metadata to skip the following RDWR checks. */ - if (get_disk_ro(pmd->bdev->bd_disk)) + if (bdev_read_only(pmd->bdev)) return 0; features = le32_to_cpu(disk_super->compat_ro_flags) & ~THIN_FEATURE_COMPAT_RO_SUPP; From 6f0d9689b670bc9f9640ff87b3f9226b7806dea2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 9 Jan 2021 11:42:50 +0100 Subject: [PATCH 2/8] block: remove the NULL bdev check in bdev_read_only Only a single caller can end up in bdev_read_only, so move the check there. Signed-off-by: Christoph Hellwig Reviewed-by: Ming Lei Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/genhd.c | 3 --- fs/super.c | 3 ++- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index 419548e92d82..484a474648d5 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1657,11 +1657,8 @@ EXPORT_SYMBOL(set_disk_ro); int bdev_read_only(struct block_device *bdev) { - if (!bdev) - return 0; return bdev->bd_read_only; } - EXPORT_SYMBOL(bdev_read_only); /* diff --git a/fs/super.c b/fs/super.c index 2c6cdea2ab2d..5a1f384ffc74 100644 --- a/fs/super.c +++ b/fs/super.c @@ -865,7 +865,8 @@ int reconfigure_super(struct fs_context *fc) if (fc->sb_flags_mask & SB_RDONLY) { #ifdef CONFIG_BLOCK - if (!(fc->sb_flags & SB_RDONLY) && bdev_read_only(sb->s_bdev)) + if (!(fc->sb_flags & SB_RDONLY) && sb->s_bdev && + bdev_read_only(sb->s_bdev)) return -EACCES; #endif From 52f019d43c229afd65dc11c8c1b05b6436bf6765 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 9 Jan 2021 11:42:51 +0100 Subject: [PATCH 3/8] block: add a hard-readonly flag to struct gendisk Commit 20bd1d026aac ("scsi: sd: Keep disk read-only when re-reading partition") addressed a long-standing problem with user read-only policy being overridden as a result of a device-initiated revalidate. The commit has since been reverted due to a regression that left some USB devices read-only indefinitely. To fix the underlying problems with revalidate we need to keep track of hardware state and user policy separately. The gendisk has been updated to reflect the current hardware state set by the device driver. This is done to allow returning the device to the hardware state once the user clears the BLKROSET flag. The resulting semantics are as follows: - If BLKROSET sets a given partition read-only, that partition will remain read-only even if the underlying storage stack initiates a revalidate. However, the BLKRRPART ioctl will cause the partition table to be dropped and any user policy on partitions will be lost. - If BLKROSET has not been set, both the whole disk device and any partitions will reflect the current write-protect state of the underlying device. Based on a patch from Martin K. Petersen . Reported-by: Oleksii Kurochko Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201221 Signed-off-by: Christoph Hellwig Reviewed-by: Ming Lei Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-core.c | 4 +--- block/genhd.c | 33 +++++++++++++++++++-------------- block/partitions/core.c | 3 +-- include/linux/genhd.h | 6 ++++-- 4 files changed, 25 insertions(+), 21 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 7663a9b94b80..08ff8ca32529 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -694,9 +694,7 @@ static inline bool should_fail_request(struct block_device *part, static inline bool bio_check_ro(struct bio *bio, struct block_device *part) { - const int op = bio_op(bio); - - if (part->bd_read_only && op_is_write(op)) { + if (op_is_write(bio_op(bio)) && bdev_read_only(part)) { char b[BDEVNAME_SIZE]; if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) diff --git a/block/genhd.c b/block/genhd.c index 484a474648d5..1873e4571328 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1637,27 +1637,32 @@ static void set_disk_ro_uevent(struct gendisk *gd, int ro) kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); } -void set_disk_ro(struct gendisk *disk, int flag) +/** + * set_disk_ro - set a gendisk read-only + * @disk: gendisk to operate on + * @ready_only: %true to set the disk read-only, %false set the disk read/write + * + * This function is used to indicate whether a given disk device should have its + * read-only flag set. set_disk_ro() is typically used by device drivers to + * indicate whether the underlying physical device is write-protected. + */ +void set_disk_ro(struct gendisk *disk, bool read_only) { - struct disk_part_iter piter; - struct block_device *part; - - if (disk->part0->bd_read_only != flag) { - set_disk_ro_uevent(disk, flag); - disk->part0->bd_read_only = flag; + if (read_only) { + if (test_and_set_bit(GD_READ_ONLY, &disk->state)) + return; + } else { + if (!test_and_clear_bit(GD_READ_ONLY, &disk->state)) + return; } - - disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); - while ((part = disk_part_iter_next(&piter))) - part->bd_read_only = flag; - disk_part_iter_exit(&piter); + set_disk_ro_uevent(disk, read_only); } - EXPORT_SYMBOL(set_disk_ro); int bdev_read_only(struct block_device *bdev) { - return bdev->bd_read_only; + return bdev->bd_read_only || + test_bit(GD_READ_ONLY, &bdev->bd_disk->state); } EXPORT_SYMBOL(bdev_read_only); diff --git a/block/partitions/core.c b/block/partitions/core.c index e7d776db803b..168d5906077c 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -195,7 +195,7 @@ static ssize_t part_start_show(struct device *dev, static ssize_t part_ro_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_read_only); + return sprintf(buf, "%d\n", bdev_read_only(dev_to_bdev(dev))); } static ssize_t part_alignment_offset_show(struct device *dev, @@ -361,7 +361,6 @@ static struct block_device *add_partition(struct gendisk *disk, int partno, bdev->bd_start_sect = start; bdev_set_nr_sectors(bdev, len); - bdev->bd_read_only = get_disk_ro(disk); if (info) { err = -ENOMEM; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 809aaa32d53c..a62ccbfac54b 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -163,6 +163,7 @@ struct gendisk { int flags; unsigned long state; #define GD_NEED_PART_SCAN 0 +#define GD_READ_ONLY 1 struct kobject *slave_dir; struct timer_rand_state *random; @@ -249,11 +250,12 @@ static inline void add_disk_no_queue_reg(struct gendisk *disk) extern void del_gendisk(struct gendisk *gp); extern struct block_device *bdget_disk(struct gendisk *disk, int partno); -extern void set_disk_ro(struct gendisk *disk, int flag); +void set_disk_ro(struct gendisk *disk, bool read_only); static inline int get_disk_ro(struct gendisk *disk) { - return disk->part0->bd_read_only; + return disk->part0->bd_read_only || + test_bit(GD_READ_ONLY, &disk->state); } extern void disk_block_events(struct gendisk *disk); From 947139bf3cce097739380c9782a35de504f24203 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 9 Jan 2021 11:42:52 +0100 Subject: [PATCH 4/8] block: propagate BLKROSET on the whole device to all partitions Change the policy so that a BLKROSET on the whole device also affects partitions. To quote Martin K. Petersen: It's very common for database folks to twiddle the read-only state of block devices and partitions. I know that our users will find it very counter-intuitive that setting /dev/sda read-only won't prevent writes to /dev/sda1. The existing behavior is inconsistent in the sense that doing: # blockdev --setro /dev/sda # echo foo > /dev/sda1 permits writes. But: # blockdev --setro /dev/sda # echo foo > /dev/sda1 doesn't. And a subsequent: # blockdev --setrw /dev/sda # echo foo > /dev/sda1 doesn't work either since sda1's read-only policy has been inherited from the whole-disk device. You need to do: # blockdev --rereadpt after setting the whole-disk device rw to effectuate the same change on the partitions, otherwise they are stuck being read-only indefinitely. However, setting the read-only policy on a partition does *not* require the revalidate step. As a matter of fact, doing the revalidate will blow away the policy setting you just made. So the user needs to take different actions depending on whether they are trying to read-protect a whole-disk device or a partition. Despite using the same ioctl. That is really confusing. I have lost count how many times our customers have had data clobbered because of ambiguity of the existing whole-disk device policy. The current behavior violates the principle of least surprise by letting the user think they write protected the whole disk when they actually didn't. Suggested-by: Martin K. Petersen Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/genhd.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index 1873e4571328..ca5d880af512 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1661,8 +1661,7 @@ EXPORT_SYMBOL(set_disk_ro); int bdev_read_only(struct block_device *bdev) { - return bdev->bd_read_only || - test_bit(GD_READ_ONLY, &bdev->bd_disk->state); + return bdev->bd_read_only || get_disk_ro(bdev->bd_disk); } EXPORT_SYMBOL(bdev_read_only); From cbf72cce6370b3ec1a6073cf777ab9b6ba5bf5b9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 9 Jan 2021 11:42:53 +0100 Subject: [PATCH 5/8] rbd: remove the ->set_read_only method Now that the hardware read-only state can't be changed by the BLKROSET ioctl, the code in this method is not required anymore. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Acked-by: Ilya Dryomov Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- drivers/block/rbd.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 59cfe71d0b3a..bbb88eb009e0 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -692,29 +692,10 @@ static void rbd_release(struct gendisk *disk, fmode_t mode) put_device(&rbd_dev->dev); } -static int rbd_set_read_only(struct block_device *bdev, bool ro) -{ - struct rbd_device *rbd_dev = bdev->bd_disk->private_data; - - /* - * Both images mapped read-only and snapshots can't be marked - * read-write. - */ - if (!ro) { - if (rbd_is_ro(rbd_dev)) - return -EROFS; - - rbd_assert(!rbd_is_snap(rbd_dev)); - } - - return 0; -} - static const struct block_device_operations rbd_bd_ops = { .owner = THIS_MODULE, .open = rbd_open, .release = rbd_release, - .set_read_only = rbd_set_read_only, }; /* From d11cd28998e9d25389d8c20e7cce0e4b4f17bee1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 9 Jan 2021 11:42:54 +0100 Subject: [PATCH 6/8] nvme: allow revalidate to set a namespace read-only Unconditionally call set_disk_ro now that it only updates the hardware state. This allows to properly set up the Linux devices read-only when the controller turns a previously writable namespace read-only. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8caf9b34734d..566788ba4e7d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2125,9 +2125,8 @@ static void nvme_update_disk_info(struct gendisk *disk, nvme_config_discard(disk, ns); nvme_config_write_zeroes(disk, ns); - if ((id->nsattr & NVME_NS_ATTR_RO) || - test_bit(NVME_NS_FORCE_RO, &ns->flags)) - set_disk_ro(disk, true); + set_disk_ro(disk, (id->nsattr & NVME_NS_ATTR_RO) || + test_bit(NVME_NS_FORCE_RO, &ns->flags)); } static inline bool nvme_first_scan(struct gendisk *disk) From 74cb8994b22ad7b95ac38dad9c9609ae49e88ec1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 24 Jan 2021 11:02:32 +0100 Subject: [PATCH 7/8] brd: remove the end of device check in brd_do_bvec The block layer already checks for this conditions in bio_check_eod before calling the driver. Signed-off-by: Christoph Hellwig Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/block/brd.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/block/brd.c b/drivers/block/brd.c index c43a6ab4b1f3..c7c821419079 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -285,14 +285,10 @@ out: static blk_qc_t brd_submit_bio(struct bio *bio) { struct brd_device *brd = bio->bi_disk->private_data; + sector_t sector = bio->bi_iter.bi_sector; struct bio_vec bvec; - sector_t sector; struct bvec_iter iter; - sector = bio->bi_iter.bi_sector; - if (bio_end_sector(bio) > get_capacity(bio->bi_disk)) - goto io_error; - bio_for_each_segment(bvec, bio, iter) { unsigned int len = bvec.bv_len; int err; From cf9a978f9781fb30b778ee61ef6bd164c655d9ff Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 24 Jan 2021 11:02:33 +0100 Subject: [PATCH 8/8] dcssblk: remove the end of device check in dcssblk_submit_bio The block layer already checks for this conditions in bio_check_eod before calling the driver. Signed-off-by: Christoph Hellwig Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/s390/block/dcssblk.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 299e77ec2c41..5c5cff3f2374 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -886,10 +886,6 @@ dcssblk_submit_bio(struct bio *bio) (bio->bi_iter.bi_size & 4095) != 0) /* Request is not page-aligned. */ goto fail; - if (bio_end_sector(bio) > get_capacity(bio->bi_disk)) { - /* Request beyond end of DCSS segment. */ - goto fail; - } /* verify data transfer direction */ if (dev_info->is_shared) { switch (dev_info->segment_type) {