Re: [PATCH 3/7] vfs: add renameat2 syscall and cross-rename
From: Jan Kara
Date: Wed Oct 02 2013 - 08:27:06 EST
On Tue 01-10-13 18:00:35, Miklos Szeredi wrote:
> From: Miklos Szeredi <mszeredi@xxxxxxx>
>
> Add new renameat2 syscall, which is the same as renameat with an added
> flags argument.
>
> If flags is zero then this is a plain overwriting rename. If flags contain
> RENAME_EXCHANGE then exchange source and destination files. There's no
> restriction on the type of the files; e.g. a directory can be exchanged
> with a symlink.
It's not completely clear to me what should happen if RENAME_EXCHANGE is
set but destination doesn't exist. Return -ENOENT or just do standard
rename?
Honza
>
> Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxx>
> ---
> arch/x86/syscalls/syscall_64.tbl | 1 +
> fs/dcache.c | 46 ++++++++++---
> fs/namei.c | 139 ++++++++++++++++++++++++++++++---------
> include/linux/dcache.h | 1 +
> include/linux/fs.h | 2 +
> include/uapi/linux/fs.h | 2 +
> 6 files changed, 152 insertions(+), 39 deletions(-)
>
> diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
> index 38ae65d..fafd734 100644
> --- a/arch/x86/syscalls/syscall_64.tbl
> +++ b/arch/x86/syscalls/syscall_64.tbl
> @@ -320,6 +320,7 @@
> 311 64 process_vm_writev sys_process_vm_writev
> 312 common kcmp sys_kcmp
> 313 common finit_module sys_finit_module
> +314 common renameat2 sys_renameat2
>
> #
> # x32-specific system call numbers start at 512 to avoid cache impact
> diff --git a/fs/dcache.c b/fs/dcache.c
> index 4100030..1735bac 100644
> --- a/fs/dcache.c
> +++ b/fs/dcache.c
> @@ -2495,12 +2495,14 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
> dentry->d_name.name = dentry->d_iname;
> } else {
> /*
> - * Both are internal. Just copy target to dentry
> + * Both are internal.
> */
> - memcpy(dentry->d_iname, target->d_name.name,
> - target->d_name.len + 1);
> - dentry->d_name.len = target->d_name.len;
> - return;
> + unsigned int i;
> + BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long)));
> + for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) {
> + swap(((long *) &dentry->d_iname)[i],
> + ((long *) &target->d_iname)[i]);
> + }
> }
> }
> swap(dentry->d_name.len, target->d_name.len);
> @@ -2557,13 +2559,15 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry,
> * __d_move - move a dentry
> * @dentry: entry to move
> * @target: new dentry
> + * @exchange: exchange the two dentries
> *
> * Update the dcache to reflect the move of a file name. Negative
> * dcache entries should not be moved in this way. Caller must hold
> * rename_lock, the i_mutex of the source and target directories,
> * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
> */
> -static void __d_move(struct dentry * dentry, struct dentry * target)
> +static void __d_move(struct dentry *dentry, struct dentry *target,
> + bool exchange)
> {
> if (!dentry->d_inode)
> printk(KERN_WARNING "VFS: moving negative dcache entry\n");
> @@ -2587,6 +2591,10 @@ static void __d_move(struct dentry * dentry, struct dentry * target)
>
> /* Unhash the target: dput() will then get rid of it */
> __d_drop(target);
> + if (exchange) {
> + __d_rehash(target,
> + d_hash(dentry->d_parent, dentry->d_name.hash));
> + }
>
> list_del(&dentry->d_u.d_child);
> list_del(&target->d_u.d_child);
> @@ -2613,6 +2621,8 @@ static void __d_move(struct dentry * dentry, struct dentry * target)
> write_seqcount_end(&dentry->d_seq);
>
> dentry_unlock_parents_for_move(dentry, target);
> + if (exchange)
> + fsnotify_d_move(target);
> spin_unlock(&target->d_lock);
> fsnotify_d_move(dentry);
> spin_unlock(&dentry->d_lock);
> @@ -2630,11 +2640,31 @@ static void __d_move(struct dentry * dentry, struct dentry * target)
> void d_move(struct dentry *dentry, struct dentry *target)
> {
> write_seqlock(&rename_lock);
> - __d_move(dentry, target);
> + __d_move(dentry, target, false);
> write_sequnlock(&rename_lock);
> }
> EXPORT_SYMBOL(d_move);
>
> +/*
> + * d_exchange - exchange two dentries
> + * @dentry1: first dentry
> + * @dentry2: second dentry
> + */
> +void d_exchange(struct dentry *dentry1, struct dentry *dentry2)
> +{
> + write_seqlock(&rename_lock);
> +
> + WARN_ON(!dentry1->d_inode);
> + WARN_ON(!dentry2->d_inode);
> + WARN_ON(IS_ROOT(dentry1));
> + WARN_ON(IS_ROOT(dentry2));
> +
> + __d_move(dentry1, dentry2, true);
> +
> + write_sequnlock(&rename_lock);
> +}
> +
> +
> /**
> * d_ancestor - search for an ancestor
> * @p1: ancestor dentry
> @@ -2682,7 +2712,7 @@ static struct dentry *__d_unalias(struct inode *inode,
> m2 = &alias->d_parent->d_inode->i_mutex;
> out_unalias:
> if (likely(!d_mountpoint(alias))) {
> - __d_move(alias, dentry);
> + __d_move(alias, dentry, false);
> ret = alias;
> }
> out_err:
> diff --git a/fs/namei.c b/fs/namei.c
> index 7ec6a12..55700b3 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -3963,14 +3963,18 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
> * ->i_mutex on parents, which works but leads to some truly excessive
> * locking].
> */
> -int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
> - struct inode *new_dir, struct dentry *new_dentry)
> +static int vfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
> + struct inode *new_dir, struct dentry *new_dentry,
> + unsigned int flags)
> {
> int error;
> const unsigned char *old_name;
> struct inode *source = old_dentry->d_inode;
> struct inode *target = new_dentry->d_inode;
> bool is_dir = S_ISDIR(source->i_mode);
> + bool new_is_dir = target ? S_ISDIR(target->i_mode) : false;
> + bool overwrite = !(flags & RENAME_EXCHANGE);
> + unsigned max_links = new_dir->i_sb->s_max_links;
>
> if (source == target)
> return 0;
> @@ -3981,74 +3985,116 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
>
> if (!target)
> error = may_create(new_dir, new_dentry);
> - else
> + else if (overwrite)
> error = may_delete(new_dir, new_dentry, is_dir);
> + else
> + error = may_delete(new_dir, new_dentry, new_is_dir);
> if (error)
> return error;
>
> - if (!old_dir->i_op->rename)
> + if (!old_dir->i_op->rename && !old_dir->i_op->rename2)
> return -EPERM;
>
> + if (flags && !old_dir->i_op->rename2)
> + return -EOPNOTSUPP;
> +
> /*
> * If we are going to change the parent - check write permissions,
> * we'll need to flip '..'.
> */
> - if (is_dir && new_dir != old_dir) {
> - error = inode_permission(source, MAY_WRITE);
> - if (error)
> - return error;
> + if (new_dir != old_dir) {
> + if (is_dir) {
> + error = inode_permission(source, MAY_WRITE);
> + if (error)
> + return error;
> + }
> + if (!overwrite && new_is_dir) {
> + error = inode_permission(target, MAY_WRITE);
> + if (error)
> + return error;
> + }
> }
>
> error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
> if (error)
> return error;
>
> + if (!overwrite) {
> + error = security_inode_rename(new_dir, new_dentry,
> + old_dir, old_dentry);
> + if (error)
> + return error;
> + }
> +
> old_name = fsnotify_oldname_init(old_dentry->d_name.name);
> dget(new_dentry);
> - if (target)
> + if (overwrite && target)
> mutex_lock(&target->i_mutex);
>
> error = -EBUSY;
> if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry))
> goto out;
>
> - if (is_dir) {
> - unsigned max_links = new_dir->i_sb->s_max_links;
> -
> + if (max_links && new_dir != old_dir) {
> error = -EMLINK;
> - if (max_links && !target && new_dir != old_dir &&
> - new_dir->i_nlink >= max_links)
> + if (is_dir && !new_is_dir && new_dir->i_nlink >= max_links)
> goto out;
> + if (!overwrite && !is_dir && new_is_dir &&
> + old_dir->i_nlink > max_links)
> + goto out;
> + }
> +
> + if (overwrite && is_dir && target)
> + shrink_dcache_parent(new_dentry);
>
> - if (target)
> - shrink_dcache_parent(new_dentry);
> + if (old_dir->i_op->rename2) {
> + error = old_dir->i_op->rename2(old_dir, old_dentry,
> + new_dir, new_dentry, flags);
> + } else {
> + error = old_dir->i_op->rename(old_dir, old_dentry,
> + new_dir, new_dentry);
> }
>
> - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
> if (error)
> goto out;
>
> - if (target) {
> + if (overwrite && target) {
> if (is_dir)
> target->i_flags |= S_DEAD;
> dont_mount(new_dentry);
> }
> - if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
> - d_move(old_dentry, new_dentry);
> + if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) {
> + if (overwrite)
> + d_move(old_dentry, new_dentry);
> + else
> + d_exchange(old_dentry, new_dentry);
> + }
> out:
> - if (target)
> + if (overwrite && target)
> mutex_unlock(&target->i_mutex);
> dput(new_dentry);
> - if (!error)
> + if (!error) {
> fsnotify_move(old_dir, new_dir, old_name, is_dir,
> - target, old_dentry);
> + overwrite ? target : NULL, old_dentry);
> + if (!overwrite) {
> + fsnotify_move(new_dir, old_dir, old_dentry->d_name.name,
> + new_is_dir, NULL, new_dentry);
> + }
> + }
> fsnotify_oldname_free(old_name);
>
> return error;
> }
>
> -SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
> - int, newdfd, const char __user *, newname)
> +int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
> + struct inode *new_dir, struct dentry *new_dentry)
> +{
> + return vfs_rename2(old_dir, old_dentry, new_dir, new_dentry, 0);
> +}
> +
> +
> +SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
> + int, newdfd, const char __user *, newname, unsigned int, flags)
> {
> struct dentry *old_dir, *new_dir;
> struct dentry *old_dentry, *new_dentry;
> @@ -4058,7 +4104,13 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
> struct filename *to;
> unsigned int lookup_flags = 0;
> bool should_retry = false;
> + bool overwrite = !(flags & RENAME_EXCHANGE);
> int error;
> +
> + error = -EOPNOTSUPP;
> + if (flags & ~RENAME_EXCHANGE)
> + goto exit;
> +
> retry:
> from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags);
> if (IS_ERR(from)) {
> @@ -4091,7 +4143,8 @@ retry:
>
> oldnd.flags &= ~LOOKUP_PARENT;
> newnd.flags &= ~LOOKUP_PARENT;
> - newnd.flags |= LOOKUP_RENAME_TARGET;
> + if (overwrite)
> + newnd.flags |= LOOKUP_RENAME_TARGET;
>
> trap = lock_rename(new_dir, old_dir);
>
> @@ -4108,7 +4161,7 @@ retry:
> error = -ENOTDIR;
> if (oldnd.last.name[oldnd.last.len])
> goto exit4;
> - if (newnd.last.name[newnd.last.len])
> + if (overwrite && newnd.last.name[newnd.last.len])
> goto exit4;
> }
> /* source should not be ancestor of target */
> @@ -4119,8 +4172,19 @@ retry:
> error = PTR_ERR(new_dentry);
> if (IS_ERR(new_dentry))
> goto exit4;
> + if (!overwrite) {
> + error = -ENOENT;
> + if (!new_dentry->d_inode)
> + goto exit4;
> +
> + if (!S_ISDIR(new_dentry->d_inode->i_mode)) {
> + error = -ENOTDIR;
> + if (newnd.last.name[newnd.last.len])
> + goto exit4;
> + }
> + }
> /* target should not be an ancestor of source */
> - error = -ENOTEMPTY;
> + error = overwrite ? -ENOTEMPTY : -EINVAL;
> if (new_dentry == trap)
> goto exit5;
>
> @@ -4128,8 +4192,15 @@ retry:
> &newnd.path, new_dentry);
> if (error)
> goto exit5;
> - error = vfs_rename(old_dir->d_inode, old_dentry,
> - new_dir->d_inode, new_dentry);
> + if (!overwrite) {
> + error = security_path_rename(&newnd.path, new_dentry,
> + &oldnd.path, old_dentry);
> + if (error)
> + goto exit5;
> + }
> +
> + error = vfs_rename2(old_dir->d_inode, old_dentry,
> + new_dir->d_inode, new_dentry, flags);
> exit5:
> dput(new_dentry);
> exit4:
> @@ -4154,9 +4225,15 @@ exit:
> return error;
> }
>
> +SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
> + int, newdfd, const char __user *, newname)
> +{
> + return sys_renameat2(olddfd, oldname, newdfd, newname, 0);
> +}
> +
> SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname)
> {
> - return sys_renameat(AT_FDCWD, oldname, AT_FDCWD, newname);
> + return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
> }
>
> int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link)
> diff --git a/include/linux/dcache.h b/include/linux/dcache.h
> index 59066e0..ce5ebed 100644
> --- a/include/linux/dcache.h
> +++ b/include/linux/dcache.h
> @@ -297,6 +297,7 @@ extern void dentry_update_name_case(struct dentry *, struct qstr *);
>
> /* used for rename() and baskets */
> extern void d_move(struct dentry *, struct dentry *);
> +extern void d_exchange(struct dentry *, struct dentry *);
> extern struct dentry *d_ancestor(struct dentry *, struct dentry *);
>
> /* appendix may either be NULL or be used for transname suffixes */
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 3f40547..71c6cf9 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1572,6 +1572,8 @@ struct inode_operations {
> int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
> int (*rename) (struct inode *, struct dentry *,
> struct inode *, struct dentry *);
> + int (*rename2) (struct inode *, struct dentry *,
> + struct inode *, struct dentry *, unsigned int);
> int (*setattr) (struct dentry *, struct iattr *);
> int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
> int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
> diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
> index 6c28b61..ebdafb6 100644
> --- a/include/uapi/linux/fs.h
> +++ b/include/uapi/linux/fs.h
> @@ -35,6 +35,8 @@
> #define SEEK_HOLE 4 /* seek to the next hole */
> #define SEEK_MAX SEEK_HOLE
>
> +#define RENAME_EXCHANGE (1 << 0) /* Exchange source and dest */
> +
> struct fstrim_range {
> __u64 start;
> __u64 len;
> --
> 1.8.1.4
>
--
Jan Kara <jack@xxxxxxx>
SUSE Labs, CR
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/