[PATCH alternative 2] block: fix the REQ_OP_SECURE_ERASE handling to not leak erased data

From: Christoph Hellwig
Date: Wed Mar 16 2022 - 05:39:02 EST


The support for this "secure erase" is completely broken, given that
the blk-lib code aligns it to the discard granularity and alignment
and thus skips parts of the two be discarded area, leaking plenty of
securely erased data. Fix this by adding a new blkdev_secure_erase
helper instead.

Note that even if with these rounding errors fixed, a LBA based
"secure erase" can't actually work on flash media. As flash media
requires erase cycles before writing instead of overwrites there
usually will be copied of this data left somewhere on the media.

Signed-off-by: Christoph Hellwig <hch@xxxxxx>
---
block/blk-lib.c | 55 +++++++++++++++++++++++------
block/ioctl.c | 43 +++++++++++++++++-----
drivers/block/xen-blkback/blkback.c | 15 ++++----
fs/f2fs/file.c | 9 ++---
include/linux/blkdev.h | 4 +--
5 files changed, 95 insertions(+), 31 deletions(-)

diff --git a/block/blk-lib.c b/block/blk-lib.c
index 9f09beadcbe30..5fc2c0bf5c940 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -29,7 +29,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
{
struct request_queue *q = bdev_get_queue(bdev);
struct bio *bio = *biop;
- unsigned int op;
+ unsigned int op = REQ_OP_DISCARD;
sector_t bs_mask, part_offset = 0;

if (!q)
@@ -38,15 +38,8 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
if (bdev_read_only(bdev))
return -EPERM;

- if (flags & BLKDEV_DISCARD_SECURE) {
- if (!blk_queue_secure_erase(q))
- return -EOPNOTSUPP;
- op = REQ_OP_SECURE_ERASE;
- } else {
- if (!blk_queue_discard(q))
- return -EOPNOTSUPP;
- op = REQ_OP_DISCARD;
- }
+ if (!blk_queue_discard(q))
+ return -EOPNOTSUPP;

/* In case the discard granularity isn't set by buggy device driver */
if (WARN_ON_ONCE(!q->limits.discard_granularity)) {
@@ -440,3 +433,45 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
return ret;
}
EXPORT_SYMBOL(blkdev_issue_zeroout);
+
+int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp)
+{
+ sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
+ unsigned int max_sectors =
+ bdev_get_queue(bdev)->limits.max_discard_sectors;
+ struct bio *bio = NULL;
+ struct blk_plug plug;
+ int ret = 0;
+
+ if (max_sectors == 0)
+ return -EOPNOTSUPP;
+ if ((sector | nr_sects) & bs_mask)
+ return -EINVAL;
+ if (bdev_read_only(bdev))
+ return -EPERM;
+
+ blk_start_plug(&plug);
+ for (;;) {
+ unsigned int len = min_t(sector_t, nr_sects, max_sectors);
+
+ bio = blk_next_bio(bio, 0, gfp);
+ bio_set_dev(bio, bdev);
+ bio->bi_opf = REQ_OP_SECURE_ERASE;
+ bio->bi_iter.bi_sector = sector;
+ bio->bi_iter.bi_size = len;
+
+ sector += len << SECTOR_SHIFT;
+ nr_sects -= len << SECTOR_SHIFT;
+ if (!nr_sects) {
+ ret = submit_bio_wait(bio);
+ bio_put(bio);
+ break;
+ }
+ cond_resched();
+ }
+ blk_finish_plug(&plug);
+
+ return ret;
+}
+EXPORT_SYMBOL(blkdev_issue_secure_erase);
diff --git a/block/ioctl.c b/block/ioctl.c
index 4a86340133e46..0821142f921d7 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -83,7 +83,7 @@ static int compat_blkpg_ioctl(struct block_device *bdev,
#endif

static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
- unsigned long arg, unsigned long flags)
+ unsigned long arg)
{
uint64_t range[2];
uint64_t start, len;
@@ -115,15 +115,43 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
err = truncate_bdev_range(bdev, mode, start, start + len - 1);
if (err)
goto fail;
-
- err = blkdev_issue_discard(bdev, start >> 9, len >> 9,
- GFP_KERNEL, flags);
-
+ err = blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 0);
fail:
filemap_invalidate_unlock(inode->i_mapping);
return err;
}

+static int blk_ioctl_secure_erase(struct block_device *bdev, fmode_t mode,
+ void __user *argp)
+{
+ uint64_t start, len;
+ uint64_t range[2];
+ int err;
+
+ if (!(mode & FMODE_WRITE))
+ return -EBADF;
+ if (!blk_queue_discard(bdev_get_queue(bdev)))
+ return -EOPNOTSUPP;
+ if (copy_from_user(range, argp, sizeof(range)))
+ return -EFAULT;
+
+ start = range[0];
+ len = range[1];
+ if ((start & 511) || (len & 511))
+ return -EINVAL;
+ if (start + len > bdev_nr_bytes(bdev))
+ return -EINVAL;
+
+ filemap_invalidate_lock(bdev->bd_inode->i_mapping);
+ err = truncate_bdev_range(bdev, mode, start, start + len - 1);
+ if (!err)
+ err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9,
+ GFP_KERNEL);
+ filemap_invalidate_unlock(bdev->bd_inode->i_mapping);
+ return err;
+}
+
+
static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
unsigned long arg)
{
@@ -451,10 +479,9 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
case BLKROSET:
return blkdev_roset(bdev, mode, cmd, arg);
case BLKDISCARD:
- return blk_ioctl_discard(bdev, mode, arg, 0);
+ return blk_ioctl_discard(bdev, mode, arg);
case BLKSECDISCARD:
- return blk_ioctl_discard(bdev, mode, arg,
- BLKDEV_DISCARD_SECURE);
+ return blk_ioctl_secure_erase(bdev, mode, argp);
case BLKZEROOUT:
return blk_ioctl_zeroout(bdev, mode, arg);
case BLKGETDISKSEQ:
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 14e452896d04c..12f741068bcdf 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -970,7 +970,6 @@ static int dispatch_discard_io(struct xen_blkif_ring *ring,
int status = BLKIF_RSP_OKAY;
struct xen_blkif *blkif = ring->blkif;
struct block_device *bdev = blkif->vbd.bdev;
- unsigned long secure;
struct phys_req preq;

xen_blkif_get(blkif);
@@ -987,13 +986,15 @@ static int dispatch_discard_io(struct xen_blkif_ring *ring,
}
ring->st_ds_req++;

- secure = (blkif->vbd.discard_secure &&
- (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ?
- BLKDEV_DISCARD_SECURE : 0;
+ if (blkif->vbd.discard_secure &&
+ (req->u.discard.flag & BLKIF_DISCARD_SECURE))
+ err = blkdev_issue_secure_erase(bdev,
+ req->u.discard.sector_number,
+ req->u.discard.nr_sectors, GFP_KERNEL);
+ else
+ err = blkdev_issue_discard(bdev, req->u.discard.sector_number,
+ req->u.discard.nr_sectors, GFP_KERNEL, 0);

- err = blkdev_issue_discard(bdev, req->u.discard.sector_number,
- req->u.discard.nr_sectors,
- GFP_KERNEL, secure);
fail_response:
if (err == -EOPNOTSUPP) {
pr_debug("discard op failed, not supported\n");
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 3c98ef6af97d1..a83548ad7171f 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -3691,10 +3691,11 @@ static int f2fs_secure_erase(struct block_device *bdev, struct inode *inode,
if (!q)
return -ENXIO;

- if (flags & F2FS_TRIM_FILE_DISCARD)
- ret = blkdev_issue_discard(bdev, sector, nr_sects, GFP_NOFS,
- blk_queue_secure_erase(q) ?
- BLKDEV_DISCARD_SECURE : 0);
+ if ((flags & F2FS_TRIM_FILE_DISCARD) && blk_queue_secure_erase(q))
+ ret = blkdev_issue_secure_erase(bdev, sector, nr_sects,
+ GFP_NOFS);
+ else if (flags & F2FS_TRIM_FILE_DISCARD)
+ ret = blkdev_issue_discard(bdev, sector, nr_sects, GFP_NOFS, 0);

if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT)) {
if (IS_ENCRYPTED(inode))
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 16b47035e4b06..6cfc60090b119 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -846,13 +846,13 @@ extern void blk_io_schedule(void);
extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct page *page);

-#define BLKDEV_DISCARD_SECURE (1 << 0) /* issue a secure erase */
-
extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, int flags,
struct bio **biop);
+int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp);

#define BLKDEV_ZERO_NOUNMAP (1 << 0) /* do not free blocks */
#define BLKDEV_ZERO_NOFALLBACK (1 << 1) /* don't write explicit zeroes */
--
2.30.2