Re: [PATCH 1/2] block: revert runtime dax control of the raw block device
From: Ross Zwisler
Date: Fri Jan 29 2016 - 12:54:21 EST
On Fri, Jan 29, 2016 at 07:18:41AM -0800, Dan Williams wrote:
> Dynamically enabling DAX requires that the page cache first be flushed
> and invalidated. This must occur atomically with the change of DAX mode
> otherwise we confuse the fsync/msync tracking and violate data
> durability guarantees. Eliminate the possibilty of DAX-disabled to
> DAX-enabled transitions for now and revisit this for the next cycle.
>
> Cc: Jan Kara <jack@xxxxxxxx>
> Cc: Jeff Moyer <jmoyer@xxxxxxxxxx>
> Cc: Christoph Hellwig <hch@xxxxxx>
> Cc: Dave Chinner <david@xxxxxxxxxxxxx>
> Cc: Matthew Wilcox <willy@xxxxxxxxxxxxxxx>
> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
> Cc: Ross Zwisler <ross.zwisler@xxxxxxxxxxxxxxx>
> Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx>
Sure, makes sense.
Reviewed-by: Ross Zwisler <ross.zwisler@xxxxxxxxxxxxxxx>
> ---
> block/ioctl.c | 38 --------------------------------------
> fs/block_dev.c | 28 ----------------------------
> include/linux/fs.h | 3 ---
> include/uapi/linux/fs.h | 1 -
> 4 files changed, 70 deletions(-)
>
> diff --git a/block/ioctl.c b/block/ioctl.c
> index 77f5d17779d6..d8996bbd7f12 100644
> --- a/block/ioctl.c
> +++ b/block/ioctl.c
> @@ -434,42 +434,6 @@ bool blkdev_dax_capable(struct block_device *bdev)
>
> return true;
> }
> -
> -static int blkdev_daxset(struct block_device *bdev, unsigned long argp)
> -{
> - unsigned long arg;
> - int rc = 0;
> -
> - if (!capable(CAP_SYS_ADMIN))
> - return -EACCES;
> -
> - if (get_user(arg, (int __user *)(argp)))
> - return -EFAULT;
> - arg = !!arg;
> - if (arg == !!(bdev->bd_inode->i_flags & S_DAX))
> - return 0;
> -
> - if (arg)
> - arg = S_DAX;
> -
> - if (arg && !blkdev_dax_capable(bdev))
> - return -ENOTTY;
> -
> - inode_lock(bdev->bd_inode);
> - if (bdev->bd_map_count == 0)
> - inode_set_flags(bdev->bd_inode, arg, S_DAX);
> - else
> - rc = -EBUSY;
> - inode_unlock(bdev->bd_inode);
> - return rc;
> -}
> -#else
> -static int blkdev_daxset(struct block_device *bdev, int arg)
> -{
> - if (arg)
> - return -ENOTTY;
> - return 0;
> -}
> #endif
>
> static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode,
> @@ -634,8 +598,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
> case BLKTRACESETUP:
> case BLKTRACETEARDOWN:
> return blk_trace_ioctl(bdev, cmd, argp);
> - case BLKDAXSET:
> - return blkdev_daxset(bdev, arg);
> case BLKDAXGET:
> return put_int(arg, !!(bdev->bd_inode->i_flags & S_DAX));
> break;
> diff --git a/fs/block_dev.c b/fs/block_dev.c
> index 7b9cd49622b1..afb437484362 100644
> --- a/fs/block_dev.c
> +++ b/fs/block_dev.c
> @@ -1736,37 +1736,13 @@ static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
> return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL);
> }
>
> -static void blkdev_vm_open(struct vm_area_struct *vma)
> -{
> - struct inode *bd_inode = bdev_file_inode(vma->vm_file);
> - struct block_device *bdev = I_BDEV(bd_inode);
> -
> - inode_lock(bd_inode);
> - bdev->bd_map_count++;
> - inode_unlock(bd_inode);
> -}
> -
> -static void blkdev_vm_close(struct vm_area_struct *vma)
> -{
> - struct inode *bd_inode = bdev_file_inode(vma->vm_file);
> - struct block_device *bdev = I_BDEV(bd_inode);
> -
> - inode_lock(bd_inode);
> - bdev->bd_map_count--;
> - inode_unlock(bd_inode);
> -}
> -
> static const struct vm_operations_struct blkdev_dax_vm_ops = {
> - .open = blkdev_vm_open,
> - .close = blkdev_vm_close,
> .fault = blkdev_dax_fault,
> .pmd_fault = blkdev_dax_pmd_fault,
> .pfn_mkwrite = blkdev_dax_fault,
> };
>
> static const struct vm_operations_struct blkdev_default_vm_ops = {
> - .open = blkdev_vm_open,
> - .close = blkdev_vm_close,
> .fault = filemap_fault,
> .map_pages = filemap_map_pages,
> };
> @@ -1774,18 +1750,14 @@ static const struct vm_operations_struct blkdev_default_vm_ops = {
> static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
> {
> struct inode *bd_inode = bdev_file_inode(file);
> - struct block_device *bdev = I_BDEV(bd_inode);
>
> file_accessed(file);
> - inode_lock(bd_inode);
> - bdev->bd_map_count++;
> if (IS_DAX(bd_inode)) {
> vma->vm_ops = &blkdev_dax_vm_ops;
> vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
> } else {
> vma->vm_ops = &blkdev_default_vm_ops;
> }
> - inode_unlock(bd_inode);
>
> return 0;
> }
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index b10002d4a5f5..ae681002100a 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -484,9 +484,6 @@ struct block_device {
> int bd_fsfreeze_count;
> /* Mutex for freeze */
> struct mutex bd_fsfreeze_mutex;
> -#ifdef CONFIG_FS_DAX
> - int bd_map_count;
> -#endif
> };
>
> /*
> diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
> index 41e0433b4a83..149bec83a907 100644
> --- a/include/uapi/linux/fs.h
> +++ b/include/uapi/linux/fs.h
> @@ -222,7 +222,6 @@ struct fsxattr {
> #define BLKSECDISCARD _IO(0x12,125)
> #define BLKROTATIONAL _IO(0x12,126)
> #define BLKZEROOUT _IO(0x12,127)
> -#define BLKDAXSET _IO(0x12,128)
> #define BLKDAXGET _IO(0x12,129)
>
> #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
>