linux-next: manual merge of the zonefs tree with the vfs-idmapping tree

From: Stephen Rothwell
Date: Mon Jan 23 2023 - 17:00:07 EST


Hi all,

Today's linux-next merge of the zonefs tree got a conflict in:

fs/zonefs/super.c

between commits:

c1632a0f1120 ("fs: port ->setattr() to pass mnt_idmap")
f2d40141d5d9 ("fs: port inode_init_owner() to mnt_idmap")
f861646a6562 ("quota: port to mnt_idmap")

from the vfs-idmapping tree and commits:

4008e2a0b01a ("zonefs: Reorganize code")
d207794ababe ("zonefs: Dynamically create file inodes when needed")

from the zonefs tree.

This is a bit of a mess :-(

I fixed it up (I think - see below) and can carry the fix as necessary.
This is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging. You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

--
Cheers,
Stephen Rothwell

diff --cc fs/zonefs/super.c
index 72ef97320b99,010b53545e5b..000000000000
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@@ -526,85 -402,145 +402,145 @@@ void __zonefs_io_error(struct inode *in
memalloc_noio_restore(noio_flag);
}

- static void zonefs_io_error(struct inode *inode, bool write)
+ static struct kmem_cache *zonefs_inode_cachep;
+
+ static struct inode *zonefs_alloc_inode(struct super_block *sb)
{
- struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ struct zonefs_inode_info *zi;
+
+ zi = alloc_inode_sb(sb, zonefs_inode_cachep, GFP_KERNEL);
+ if (!zi)
+ return NULL;
+
+ inode_init_once(&zi->i_vnode);
+ mutex_init(&zi->i_truncate_mutex);
+ zi->i_wr_refcnt = 0;

- mutex_lock(&zi->i_truncate_mutex);
- __zonefs_io_error(inode, write);
- mutex_unlock(&zi->i_truncate_mutex);
+ return &zi->i_vnode;
}

- static int zonefs_file_truncate(struct inode *inode, loff_t isize)
+ static void zonefs_free_inode(struct inode *inode)
{
- struct zonefs_inode_info *zi = ZONEFS_I(inode);
- loff_t old_isize;
- enum req_op op;
- int ret = 0;
+ kmem_cache_free(zonefs_inode_cachep, ZONEFS_I(inode));
+ }

- /*
- * Only sequential zone files can be truncated and truncation is allowed
- * only down to a 0 size, which is equivalent to a zone reset, and to
- * the maximum file size, which is equivalent to a zone finish.
- */
- if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
- return -EPERM;
+ /*
+ * File system stat.
+ */
+ static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf)
+ {
+ struct super_block *sb = dentry->d_sb;
+ struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+ enum zonefs_ztype t;

- if (!isize)
- op = REQ_OP_ZONE_RESET;
- else if (isize == zi->i_max_size)
- op = REQ_OP_ZONE_FINISH;
+ buf->f_type = ZONEFS_MAGIC;
+ buf->f_bsize = sb->s_blocksize;
+ buf->f_namelen = ZONEFS_NAME_MAX;
+
+ spin_lock(&sbi->s_lock);
+
+ buf->f_blocks = sbi->s_blocks;
+ if (WARN_ON(sbi->s_used_blocks > sbi->s_blocks))
+ buf->f_bfree = 0;
else
- return -EPERM;
+ buf->f_bfree = buf->f_blocks - sbi->s_used_blocks;
+ buf->f_bavail = buf->f_bfree;
+
+ for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) {
+ if (sbi->s_zgroup[t].g_nr_zones)
+ buf->f_files += sbi->s_zgroup[t].g_nr_zones + 1;
+ }
+ buf->f_ffree = 0;

- inode_dio_wait(inode);
+ spin_unlock(&sbi->s_lock);

- /* Serialize against page faults */
- filemap_invalidate_lock(inode->i_mapping);
+ buf->f_fsid = uuid_to_fsid(sbi->s_uuid.b);

- /* Serialize against zonefs_iomap_begin() */
- mutex_lock(&zi->i_truncate_mutex);
+ return 0;
+ }

- old_isize = i_size_read(inode);
- if (isize == old_isize)
- goto unlock;
+ enum {
+ Opt_errors_ro, Opt_errors_zro, Opt_errors_zol, Opt_errors_repair,
+ Opt_explicit_open, Opt_err,
+ };

- ret = zonefs_zone_mgmt(inode, op);
- if (ret)
- goto unlock;
+ static const match_table_t tokens = {
+ { Opt_errors_ro, "errors=remount-ro"},
+ { Opt_errors_zro, "errors=zone-ro"},
+ { Opt_errors_zol, "errors=zone-offline"},
+ { Opt_errors_repair, "errors=repair"},
+ { Opt_explicit_open, "explicit-open" },
+ { Opt_err, NULL}
+ };

- /*
- * If the mount option ZONEFS_MNTOPT_EXPLICIT_OPEN is set,
- * take care of open zones.
- */
- if (zi->i_flags & ZONEFS_ZONE_OPEN) {
- /*
- * Truncating a zone to EMPTY or FULL is the equivalent of
- * closing the zone. For a truncation to 0, we need to
- * re-open the zone to ensure new writes can be processed.
- * For a truncation to the maximum file size, the zone is
- * closed and writes cannot be accepted anymore, so clear
- * the open flag.
- */
- if (!isize)
- ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
- else
- zi->i_flags &= ~ZONEFS_ZONE_OPEN;
+ static int zonefs_parse_options(struct super_block *sb, char *options)
+ {
+ struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+ substring_t args[MAX_OPT_ARGS];
+ char *p;
+
+ if (!options)
+ return 0;
+
+ while ((p = strsep(&options, ",")) != NULL) {
+ int token;
+
+ if (!*p)
+ continue;
+
+ token = match_token(p, tokens, args);
+ switch (token) {
+ case Opt_errors_ro:
+ sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
+ sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_RO;
+ break;
+ case Opt_errors_zro:
+ sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
+ sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZRO;
+ break;
+ case Opt_errors_zol:
+ sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
+ sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZOL;
+ break;
+ case Opt_errors_repair:
+ sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
+ sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_REPAIR;
+ break;
+ case Opt_explicit_open:
+ sbi->s_mount_opts |= ZONEFS_MNTOPT_EXPLICIT_OPEN;
+ break;
+ default:
+ return -EINVAL;
+ }
}

- zonefs_update_stats(inode, isize);
- truncate_setsize(inode, isize);
- zi->i_wpoffset = isize;
- zonefs_account_active(inode);
+ return 0;
+ }
+
+ static int zonefs_show_options(struct seq_file *seq, struct dentry *root)
+ {
+ struct zonefs_sb_info *sbi = ZONEFS_SB(root->d_sb);
+
+ if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO)
+ seq_puts(seq, ",errors=remount-ro");
+ if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO)
+ seq_puts(seq, ",errors=zone-ro");
+ if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL)
+ seq_puts(seq, ",errors=zone-offline");
+ if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_REPAIR)
+ seq_puts(seq, ",errors=repair");

- unlock:
- mutex_unlock(&zi->i_truncate_mutex);
- filemap_invalidate_unlock(inode->i_mapping);
+ return 0;
+ }

- return ret;
+ static int zonefs_remount(struct super_block *sb, int *flags, char *data)
+ {
+ sync_filesystem(sb);
+
+ return zonefs_parse_options(sb, data);
}

-static int zonefs_inode_setattr(struct user_namespace *mnt_userns,
+static int zonefs_inode_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *iattr)
{
struct inode *inode = d_inode(dentry);
@@@ -641,8 -577,16 +577,16 @@@
return ret;
}

- setattr_copy(&init_user_ns, inode, iattr);
+ setattr_copy(&nop_mnt_idmap, inode, iattr);

+ if (S_ISREG(inode->i_mode)) {
+ struct zonefs_zone *z = zonefs_inode_zone(inode);
+
+ z->z_mode = inode->i_mode;
+ z->z_uid = inode->i_uid;
+ z->z_gid = inode->i_gid;
+ }
+
return 0;
}

@@@ -650,753 -594,194 +594,194 @@@ static const struct inode_operations zo
.setattr = zonefs_inode_setattr,
};

- static int zonefs_file_fsync(struct file *file, loff_t start, loff_t end,
- int datasync)
+ static long zonefs_fname_to_fno(const struct qstr *fname)
{
- struct inode *inode = file_inode(file);
- int ret = 0;
-
- if (unlikely(IS_IMMUTABLE(inode)))
- return -EPERM;
+ const char *name = fname->name;
+ unsigned int len = fname->len;
+ long fno = 0, shift = 1;
+ const char *rname;
+ char c = *name;
+ unsigned int i;

/*
- * Since only direct writes are allowed in sequential files, page cache
- * flush is needed only for conventional zone files.
+ * File names are always a base-10 number string without any
+ * leading 0s.
*/
- if (ZONEFS_I(inode)->i_ztype == ZONEFS_ZTYPE_CNV)
- ret = file_write_and_wait_range(file, start, end);
- if (!ret)
- ret = blkdev_issue_flush(inode->i_sb->s_bdev);
+ if (!isdigit(c))
+ return -ENOENT;

- if (ret)
- zonefs_io_error(inode, true);
+ if (len > 1 && c == '0')
+ return -ENOENT;

- return ret;
- }
+ if (len == 1)
+ return c - '0';

- static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf)
- {
- struct inode *inode = file_inode(vmf->vma->vm_file);
- struct zonefs_inode_info *zi = ZONEFS_I(inode);
- vm_fault_t ret;
-
- if (unlikely(IS_IMMUTABLE(inode)))
- return VM_FAULT_SIGBUS;
-
- /*
- * Sanity check: only conventional zone files can have shared
- * writeable mappings.
- */
- if (WARN_ON_ONCE(zi->i_ztype != ZONEFS_ZTYPE_CNV))
- return VM_FAULT_NOPAGE;
-
- sb_start_pagefault(inode->i_sb);
- file_update_time(vmf->vma->vm_file);
-
- /* Serialize against truncates */
- filemap_invalidate_lock_shared(inode->i_mapping);
- ret = iomap_page_mkwrite(vmf, &zonefs_write_iomap_ops);
- filemap_invalidate_unlock_shared(inode->i_mapping);
-
- sb_end_pagefault(inode->i_sb);
- return ret;
- }
-
- static const struct vm_operations_struct zonefs_file_vm_ops = {
- .fault = filemap_fault,
- .map_pages = filemap_map_pages,
- .page_mkwrite = zonefs_filemap_page_mkwrite,
- };
-
- static int zonefs_file_mmap(struct file *file, struct vm_area_struct *vma)
- {
- /*
- * Conventional zones accept random writes, so their files can support
- * shared writable mappings. For sequential zone files, only read
- * mappings are possible since there are no guarantees for write
- * ordering between msync() and page cache writeback.
- */
- if (ZONEFS_I(file_inode(file))->i_ztype == ZONEFS_ZTYPE_SEQ &&
- (vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
- return -EINVAL;
-
- file_accessed(file);
- vma->vm_ops = &zonefs_file_vm_ops;
-
- return 0;
- }
-
- static loff_t zonefs_file_llseek(struct file *file, loff_t offset, int whence)
- {
- loff_t isize = i_size_read(file_inode(file));
-
- /*
- * Seeks are limited to below the zone size for conventional zones
- * and below the zone write pointer for sequential zones. In both
- * cases, this limit is the inode size.
- */
- return generic_file_llseek_size(file, offset, whence, isize, isize);
- }
-
- static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
- int error, unsigned int flags)
- {
- struct inode *inode = file_inode(iocb->ki_filp);
- struct zonefs_inode_info *zi = ZONEFS_I(inode);
-
- if (error) {
- zonefs_io_error(inode, true);
- return error;
- }
-
- if (size && zi->i_ztype != ZONEFS_ZTYPE_CNV) {
- /*
- * Note that we may be seeing completions out of order,
- * but that is not a problem since a write completed
- * successfully necessarily means that all preceding writes
- * were also successful. So we can safely increase the inode
- * size to the write end location.
- */
- mutex_lock(&zi->i_truncate_mutex);
- if (i_size_read(inode) < iocb->ki_pos + size) {
- zonefs_update_stats(inode, iocb->ki_pos + size);
- zonefs_i_size_write(inode, iocb->ki_pos + size);
- }
- mutex_unlock(&zi->i_truncate_mutex);
- }
-
- return 0;
- }
-
- static const struct iomap_dio_ops zonefs_write_dio_ops = {
- .end_io = zonefs_file_write_dio_end_io,
- };
-
- static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
- {
- struct inode *inode = file_inode(iocb->ki_filp);
- struct zonefs_inode_info *zi = ZONEFS_I(inode);
- struct block_device *bdev = inode->i_sb->s_bdev;
- unsigned int max = bdev_max_zone_append_sectors(bdev);
- struct bio *bio;
- ssize_t size;
- int nr_pages;
- ssize_t ret;
-
- max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize);
- iov_iter_truncate(from, max);
-
- nr_pages = iov_iter_npages(from, BIO_MAX_VECS);
- if (!nr_pages)
- return 0;
-
- bio = bio_alloc(bdev, nr_pages,
- REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE, GFP_NOFS);
- bio->bi_iter.bi_sector = zi->i_zsector;
- bio->bi_ioprio = iocb->ki_ioprio;
- if (iocb_is_dsync(iocb))
- bio->bi_opf |= REQ_FUA;
-
- ret = bio_iov_iter_get_pages(bio, from);
- if (unlikely(ret))
- goto out_release;
-
- size = bio->bi_iter.bi_size;
- task_io_account_write(size);
-
- if (iocb->ki_flags & IOCB_HIPRI)
- bio_set_polled(bio, iocb);
-
- ret = submit_bio_wait(bio);
-
- /*
- * If the file zone was written underneath the file system, the zone
- * write pointer may not be where we expect it to be, but the zone
- * append write can still succeed. So check manually that we wrote where
- * we intended to, that is, at zi->i_wpoffset.
- */
- if (!ret) {
- sector_t wpsector =
- zi->i_zsector + (zi->i_wpoffset >> SECTOR_SHIFT);
-
- if (bio->bi_iter.bi_sector != wpsector) {
- zonefs_warn(inode->i_sb,
- "Corrupted write pointer %llu for zone at %llu\n",
- wpsector, zi->i_zsector);
- ret = -EIO;
- }
- }
-
- zonefs_file_write_dio_end_io(iocb, size, ret, 0);
- trace_zonefs_file_dio_append(inode, size, ret);
-
- out_release:
- bio_release_pages(bio, false);
- bio_put(bio);
-
- if (ret >= 0) {
- iocb->ki_pos += size;
- return size;
- }
-
- return ret;
- }
-
- /*
- * Do not exceed the LFS limits nor the file zone size. If pos is under the
- * limit it becomes a short access. If it exceeds the limit, return -EFBIG.
- */
- static loff_t zonefs_write_check_limits(struct file *file, loff_t pos,
- loff_t count)
- {
- struct inode *inode = file_inode(file);
- struct zonefs_inode_info *zi = ZONEFS_I(inode);
- loff_t limit = rlimit(RLIMIT_FSIZE);
- loff_t max_size = zi->i_max_size;
-
- if (limit != RLIM_INFINITY) {
- if (pos >= limit) {
- send_sig(SIGXFSZ, current, 0);
- return -EFBIG;
- }
- count = min(count, limit - pos);
- }
-
- if (!(file->f_flags & O_LARGEFILE))
- max_size = min_t(loff_t, MAX_NON_LFS, max_size);
-
- if (unlikely(pos >= max_size))
- return -EFBIG;
-
- return min(count, max_size - pos);
- }
-
- static ssize_t zonefs_write_checks(struct kiocb *iocb, struct iov_iter *from)
- {
- struct file *file = iocb->ki_filp;
- struct inode *inode = file_inode(file);
- struct zonefs_inode_info *zi = ZONEFS_I(inode);
- loff_t count;
-
- if (IS_SWAPFILE(inode))
- return -ETXTBSY;
-
- if (!iov_iter_count(from))
- return 0;
-
- if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
- return -EINVAL;
-
- if (iocb->ki_flags & IOCB_APPEND) {
- if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
- return -EINVAL;
- mutex_lock(&zi->i_truncate_mutex);
- iocb->ki_pos = zi->i_wpoffset;
- mutex_unlock(&zi->i_truncate_mutex);
+ for (i = 0, rname = name + len - 1; i < len; i++, rname--) {
+ c = *rname;
+ if (!isdigit(c))
+ return -ENOENT;
+ fno += (c - '0') * shift;
+ shift *= 10;
}

- count = zonefs_write_check_limits(file, iocb->ki_pos,
- iov_iter_count(from));
- if (count < 0)
- return count;
-
- iov_iter_truncate(from, count);
- return iov_iter_count(from);
- }
-
- /*
- * Handle direct writes. For sequential zone files, this is the only possible
- * write path. For these files, check that the user is issuing writes
- * sequentially from the end of the file. This code assumes that the block layer
- * delivers write requests to the device in sequential order. This is always the
- * case if a block IO scheduler implementing the ELEVATOR_F_ZBD_SEQ_WRITE
- * elevator feature is being used (e.g. mq-deadline). The block layer always
- * automatically select such an elevator for zoned block devices during the
- * device initialization.
- */
- static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
- {
- struct inode *inode = file_inode(iocb->ki_filp);
- struct zonefs_inode_info *zi = ZONEFS_I(inode);
- struct super_block *sb = inode->i_sb;
- bool sync = is_sync_kiocb(iocb);
- bool append = false;
- ssize_t ret, count;
-
- /*
- * For async direct IOs to sequential zone files, refuse IOCB_NOWAIT
- * as this can cause write reordering (e.g. the first aio gets EAGAIN
- * on the inode lock but the second goes through but is now unaligned).
- */
- if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !sync &&
- (iocb->ki_flags & IOCB_NOWAIT))
- return -EOPNOTSUPP;
-
- if (iocb->ki_flags & IOCB_NOWAIT) {
- if (!inode_trylock(inode))
- return -EAGAIN;
- } else {
- inode_lock(inode);
- }
-
- count = zonefs_write_checks(iocb, from);
- if (count <= 0) {
- ret = count;
- goto inode_unlock;
- }
-
- if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) {
- ret = -EINVAL;
- goto inode_unlock;
- }
-
- /* Enforce sequential writes (append only) in sequential zones */
- if (zi->i_ztype == ZONEFS_ZTYPE_SEQ) {
- mutex_lock(&zi->i_truncate_mutex);
- if (iocb->ki_pos != zi->i_wpoffset) {
- mutex_unlock(&zi->i_truncate_mutex);
- ret = -EINVAL;
- goto inode_unlock;
- }
- mutex_unlock(&zi->i_truncate_mutex);
- append = sync;
- }
-
- if (append)
- ret = zonefs_file_dio_append(iocb, from);
- else
- ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops,
- &zonefs_write_dio_ops, 0, NULL, 0);
- if (zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
- (ret > 0 || ret == -EIOCBQUEUED)) {
- if (ret > 0)
- count = ret;
-
- /*
- * Update the zone write pointer offset assuming the write
- * operation succeeded. If it did not, the error recovery path
- * will correct it. Also do active seq file accounting.
- */
- mutex_lock(&zi->i_truncate_mutex);
- zi->i_wpoffset += count;
- zonefs_account_active(inode);
- mutex_unlock(&zi->i_truncate_mutex);
- }
-
- inode_unlock:
- inode_unlock(inode);
-
- return ret;
- }
-
- static ssize_t zonefs_file_buffered_write(struct kiocb *iocb,
- struct iov_iter *from)
- {
- struct inode *inode = file_inode(iocb->ki_filp);
- struct zonefs_inode_info *zi = ZONEFS_I(inode);
- ssize_t ret;
-
- /*
- * Direct IO writes are mandatory for sequential zone files so that the
- * write IO issuing order is preserved.
- */
- if (zi->i_ztype != ZONEFS_ZTYPE_CNV)
- return -EIO;
-
- if (iocb->ki_flags & IOCB_NOWAIT) {
- if (!inode_trylock(inode))
- return -EAGAIN;
- } else {
- inode_lock(inode);
- }
-
- ret = zonefs_write_checks(iocb, from);
- if (ret <= 0)
- goto inode_unlock;
-
- ret = iomap_file_buffered_write(iocb, from, &zonefs_write_iomap_ops);
- if (ret > 0)
- iocb->ki_pos += ret;
- else if (ret == -EIO)
- zonefs_io_error(inode, true);
-
- inode_unlock:
- inode_unlock(inode);
- if (ret > 0)
- ret = generic_write_sync(iocb, ret);
-
- return ret;
+ return fno;
}

- static ssize_t zonefs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+ static struct inode *zonefs_get_file_inode(struct inode *dir,
+ struct dentry *dentry)
{
- struct inode *inode = file_inode(iocb->ki_filp);
-
- if (unlikely(IS_IMMUTABLE(inode)))
- return -EPERM;
-
- if (sb_rdonly(inode->i_sb))
- return -EROFS;
-
- /* Write operations beyond the zone size are not allowed */
- if (iocb->ki_pos >= ZONEFS_I(inode)->i_max_size)
- return -EFBIG;
-
- if (iocb->ki_flags & IOCB_DIRECT) {
- ssize_t ret = zonefs_file_dio_write(iocb, from);
- if (ret != -ENOTBLK)
- return ret;
- }
-
- return zonefs_file_buffered_write(iocb, from);
- }
-
- static int zonefs_file_read_dio_end_io(struct kiocb *iocb, ssize_t size,
- int error, unsigned int flags)
- {
- if (error) {
- zonefs_io_error(file_inode(iocb->ki_filp), false);
- return error;
- }
-
- return 0;
- }
-
- static const struct iomap_dio_ops zonefs_read_dio_ops = {
- .end_io = zonefs_file_read_dio_end_io,
- };
-
- static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
- {
- struct inode *inode = file_inode(iocb->ki_filp);
- struct zonefs_inode_info *zi = ZONEFS_I(inode);
- struct super_block *sb = inode->i_sb;
- loff_t isize;
- ssize_t ret;
-
- /* Offline zones cannot be read */
- if (unlikely(IS_IMMUTABLE(inode) && !(inode->i_mode & 0777)))
- return -EPERM;
-
- if (iocb->ki_pos >= zi->i_max_size)
- return 0;
-
- if (iocb->ki_flags & IOCB_NOWAIT) {
- if (!inode_trylock_shared(inode))
- return -EAGAIN;
- } else {
- inode_lock_shared(inode);
- }
+ struct zonefs_zone_group *zgroup = dir->i_private;
+ struct super_block *sb = dir->i_sb;
+ struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+ struct zonefs_zone *z;
+ struct inode *inode;
+ ino_t ino;
+ long fno;

- /* Limit read operations to written data */
- mutex_lock(&zi->i_truncate_mutex);
- isize = i_size_read(inode);
- if (iocb->ki_pos >= isize) {
- mutex_unlock(&zi->i_truncate_mutex);
- ret = 0;
- goto inode_unlock;
- }
- iov_iter_truncate(to, isize - iocb->ki_pos);
- mutex_unlock(&zi->i_truncate_mutex);
+ /* Get the file number from the file name */
+ fno = zonefs_fname_to_fno(&dentry->d_name);
+ if (fno < 0)
+ return ERR_PTR(fno);

- if (iocb->ki_flags & IOCB_DIRECT) {
- size_t count = iov_iter_count(to);
+ if (!zgroup->g_nr_zones || fno >= zgroup->g_nr_zones)
+ return ERR_PTR(-ENOENT);

- if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) {
- ret = -EINVAL;
- goto inode_unlock;
- }
- file_accessed(iocb->ki_filp);
- ret = iomap_dio_rw(iocb, to, &zonefs_read_iomap_ops,
- &zonefs_read_dio_ops, 0, NULL, 0);
- } else {
- ret = generic_file_read_iter(iocb, to);
- if (ret == -EIO)
- zonefs_io_error(inode, false);
+ z = &zgroup->g_zones[fno];
+ ino = z->z_sector >> sbi->s_zone_sectors_shift;
+ inode = iget_locked(sb, ino);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+ if (!(inode->i_state & I_NEW)) {
+ WARN_ON_ONCE(inode->i_private != z);
+ return inode;
}

- inode_unlock:
- inode_unlock_shared(inode);
-
- return ret;
- }
+ inode->i_ino = ino;
+ inode->i_mode = z->z_mode;
+ inode->i_ctime = inode->i_mtime = inode->i_atime = dir->i_ctime;
+ inode->i_uid = z->z_uid;
+ inode->i_gid = z->z_gid;
+ inode->i_size = z->z_wpoffset;
+ inode->i_blocks = z->z_capacity >> SECTOR_SHIFT;
+ inode->i_private = z;

- /*
- * Write open accounting is done only for sequential files.
- */
- static inline bool zonefs_seq_file_need_wro(struct inode *inode,
- struct file *file)
- {
- struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ inode->i_op = &zonefs_file_inode_operations;
+ inode->i_fop = &zonefs_file_operations;
+ inode->i_mapping->a_ops = &zonefs_file_aops;

- if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
- return false;
+ /* Update the inode access rights depending on the zone condition */
+ zonefs_inode_update_mode(inode);

- if (!(file->f_mode & FMODE_WRITE))
- return false;
+ unlock_new_inode(inode);

- return true;
+ return inode;
}

- static int zonefs_seq_file_write_open(struct inode *inode)
+ static struct inode *zonefs_get_zgroup_inode(struct super_block *sb,
+ enum zonefs_ztype ztype)
{
- struct zonefs_inode_info *zi = ZONEFS_I(inode);
- int ret = 0;
-
- mutex_lock(&zi->i_truncate_mutex);
-
- if (!zi->i_wr_refcnt) {
- struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
- unsigned int wro = atomic_inc_return(&sbi->s_wro_seq_files);
-
- if (sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) {
-
- if (sbi->s_max_wro_seq_files
- && wro > sbi->s_max_wro_seq_files) {
- atomic_dec(&sbi->s_wro_seq_files);
- ret = -EBUSY;
- goto unlock;
- }
+ struct inode *root = d_inode(sb->s_root);
+ struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+ struct inode *inode;
+ ino_t ino = bdev_nr_zones(sb->s_bdev) + ztype + 1;

- if (i_size_read(inode) < zi->i_max_size) {
- ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
- if (ret) {
- atomic_dec(&sbi->s_wro_seq_files);
- goto unlock;
- }
- zi->i_flags |= ZONEFS_ZONE_OPEN;
- zonefs_account_active(inode);
- }
- }
- }
+ inode = iget_locked(sb, ino);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+ if (!(inode->i_state & I_NEW))
+ return inode;
+
+ inode->i_ino = ino;
- inode_init_owner(&init_user_ns, inode, root, S_IFDIR | 0555);
++ inode_init_owner(&nop_mnt_idmap, inode, root, S_IFDIR | 0555);
+ inode->i_size = sbi->s_zgroup[ztype].g_nr_zones;
+ inode->i_ctime = inode->i_mtime = inode->i_atime = root->i_ctime;
+ inode->i_private = &sbi->s_zgroup[ztype];
+ set_nlink(inode, 2);

- zi->i_wr_refcnt++;
+ inode->i_op = &zonefs_dir_inode_operations;
+ inode->i_fop = &zonefs_dir_operations;

- unlock:
- mutex_unlock(&zi->i_truncate_mutex);
+ unlock_new_inode(inode);

- return ret;
+ return inode;
}

- static int zonefs_file_open(struct inode *inode, struct file *file)
- {
- int ret;
-
- ret = generic_file_open(inode, file);
- if (ret)
- return ret;
-
- if (zonefs_seq_file_need_wro(inode, file))
- return zonefs_seq_file_write_open(inode);
-
- return 0;
- }

- static void zonefs_seq_file_write_close(struct inode *inode)
+ static struct inode *zonefs_get_dir_inode(struct inode *dir,
+ struct dentry *dentry)
{
- struct zonefs_inode_info *zi = ZONEFS_I(inode);
- struct super_block *sb = inode->i_sb;
+ struct super_block *sb = dir->i_sb;
struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
- int ret = 0;
-
- mutex_lock(&zi->i_truncate_mutex);
-
- zi->i_wr_refcnt--;
- if (zi->i_wr_refcnt)
- goto unlock;
+ const char *name = dentry->d_name.name;
+ enum zonefs_ztype ztype;

/*
- * The file zone may not be open anymore (e.g. the file was truncated to
- * its maximum size or it was fully written). For this case, we only
- * need to decrement the write open count.
+ * We only need to check for the "seq" directory and
+ * the "cnv" directory if we have conventional zones.
*/
- if (zi->i_flags & ZONEFS_ZONE_OPEN) {
- ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE);
- if (ret) {
- __zonefs_io_error(inode, false);
- /*
- * Leaving zones explicitly open may lead to a state
- * where most zones cannot be written (zone resources
- * exhausted). So take preventive action by remounting
- * read-only.
- */
- if (zi->i_flags & ZONEFS_ZONE_OPEN &&
- !(sb->s_flags & SB_RDONLY)) {
- zonefs_warn(sb,
- "closing zone at %llu failed %d\n",
- zi->i_zsector, ret);
- zonefs_warn(sb,
- "remounting filesystem read-only\n");
- sb->s_flags |= SB_RDONLY;
- }
- goto unlock;
- }
+ if (dentry->d_name.len != 3)
+ return ERR_PTR(-ENOENT);

- zi->i_flags &= ~ZONEFS_ZONE_OPEN;
- zonefs_account_active(inode);
+ for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) {
+ if (sbi->s_zgroup[ztype].g_nr_zones &&
+ memcmp(name, zonefs_zgroup_name(ztype), 3) == 0)
+ break;
}
+ if (ztype == ZONEFS_ZTYPE_MAX)
+ return ERR_PTR(-ENOENT);

- atomic_dec(&sbi->s_wro_seq_files);
-
- unlock:
- mutex_unlock(&zi->i_truncate_mutex);
- }
-
- static int zonefs_file_release(struct inode *inode, struct file *file)
- {
- /*
- * If we explicitly open a zone we must close it again as well, but the
- * zone management operation can fail (either due to an IO error or as
- * the zone has gone offline or read-only). Make sure we don't fail the
- * close(2) for user-space.
- */
- if (zonefs_seq_file_need_wro(inode, file))
- zonefs_seq_file_write_close(inode);
-
- return 0;
+ return zonefs_get_zgroup_inode(sb, ztype);
}

- static const struct file_operations zonefs_file_operations = {
- .open = zonefs_file_open,
- .release = zonefs_file_release,
- .fsync = zonefs_file_fsync,
- .mmap = zonefs_file_mmap,
- .llseek = zonefs_file_llseek,
- .read_iter = zonefs_file_read_iter,
- .write_iter = zonefs_file_write_iter,
- .splice_read = generic_file_splice_read,
- .splice_write = iter_file_splice_write,
- .iopoll = iocb_bio_iopoll,
- };
-
- static struct kmem_cache *zonefs_inode_cachep;
-
- static struct inode *zonefs_alloc_inode(struct super_block *sb)
+ static struct dentry *zonefs_lookup(struct inode *dir, struct dentry *dentry,
+ unsigned int flags)
{
- struct zonefs_inode_info *zi;
-
- zi = alloc_inode_sb(sb, zonefs_inode_cachep, GFP_KERNEL);
- if (!zi)
- return NULL;
-
- inode_init_once(&zi->i_vnode);
- mutex_init(&zi->i_truncate_mutex);
- zi->i_wr_refcnt = 0;
- zi->i_flags = 0;
-
- return &zi->i_vnode;
- }
-
- static void zonefs_free_inode(struct inode *inode)
- {
- kmem_cache_free(zonefs_inode_cachep, ZONEFS_I(inode));
- }
-
- /*
- * File system stat.
- */
- static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf)
- {
- struct super_block *sb = dentry->d_sb;
- struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
- enum zonefs_ztype t;
+ struct inode *inode;

- buf->f_type = ZONEFS_MAGIC;
- buf->f_bsize = sb->s_blocksize;
- buf->f_namelen = ZONEFS_NAME_MAX;
+ if (dentry->d_name.len > ZONEFS_NAME_MAX)
+ return ERR_PTR(-ENAMETOOLONG);

- spin_lock(&sbi->s_lock);
-
- buf->f_blocks = sbi->s_blocks;
- if (WARN_ON(sbi->s_used_blocks > sbi->s_blocks))
- buf->f_bfree = 0;
+ if (dir == d_inode(dir->i_sb->s_root))
+ inode = zonefs_get_dir_inode(dir, dentry);
else
- buf->f_bfree = buf->f_blocks - sbi->s_used_blocks;
- buf->f_bavail = buf->f_bfree;
+ inode = zonefs_get_file_inode(dir, dentry);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);

- for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) {
- if (sbi->s_nr_files[t])
- buf->f_files += sbi->s_nr_files[t] + 1;
- }
- buf->f_ffree = 0;
-
- spin_unlock(&sbi->s_lock);
-
- buf->f_fsid = uuid_to_fsid(sbi->s_uuid.b);
-
- return 0;
+ return d_splice_alias(inode, dentry);
}

- enum {
- Opt_errors_ro, Opt_errors_zro, Opt_errors_zol, Opt_errors_repair,
- Opt_explicit_open, Opt_err,
- };
-
- static const match_table_t tokens = {
- { Opt_errors_ro, "errors=remount-ro"},
- { Opt_errors_zro, "errors=zone-ro"},
- { Opt_errors_zol, "errors=zone-offline"},
- { Opt_errors_repair, "errors=repair"},
- { Opt_explicit_open, "explicit-open" },
- { Opt_err, NULL}
- };
-
- static int zonefs_parse_options(struct super_block *sb, char *options)
+ static int zonefs_readdir_root(struct file *file, struct dir_context *ctx)
{
+ struct inode *inode = file_inode(file);
+ struct super_block *sb = inode->i_sb;
struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
- substring_t args[MAX_OPT_ARGS];
- char *p;
+ enum zonefs_ztype ztype = ZONEFS_ZTYPE_CNV;
+ ino_t base_ino = bdev_nr_zones(sb->s_bdev) + 1;

- if (!options)
+ if (ctx->pos >= inode->i_size)
return 0;

- while ((p = strsep(&options, ",")) != NULL) {
- int token;
+ if (!dir_emit_dots(file, ctx))
+ return 0;

- if (!*p)
- continue;
+ if (ctx->pos == 2) {
+ if (!sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones)
+ ztype = ZONEFS_ZTYPE_SEQ;

- token = match_token(p, tokens, args);
- switch (token) {
- case Opt_errors_ro:
- sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
- sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_RO;
- break;
- case Opt_errors_zro:
- sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
- sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZRO;
- break;
- case Opt_errors_zol:
- sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
- sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZOL;
- break;
- case Opt_errors_repair:
- sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
- sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_REPAIR;
- break;
- case Opt_explicit_open:
- sbi->s_mount_opts |= ZONEFS_MNTOPT_EXPLICIT_OPEN;
- break;
- default:
- return -EINVAL;
- }
+ if (!dir_emit(ctx, zonefs_zgroup_name(ztype), 3,
+ base_ino + ztype, DT_DIR))
+ return 0;
+ ctx->pos++;
}

- return 0;
- }
-
- static int zonefs_show_options(struct seq_file *seq, struct dentry *root)
- {
- struct zonefs_sb_info *sbi = ZONEFS_SB(root->d_sb);
-
- if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO)
- seq_puts(seq, ",errors=remount-ro");
- if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO)
- seq_puts(seq, ",errors=zone-ro");
- if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL)
- seq_puts(seq, ",errors=zone-offline");
- if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_REPAIR)
- seq_puts(seq, ",errors=repair");
+ if (ctx->pos == 3 && ztype != ZONEFS_ZTYPE_SEQ) {
+ ztype = ZONEFS_ZTYPE_SEQ;
+ if (!dir_emit(ctx, zonefs_zgroup_name(ztype), 3,
+ base_ino + ztype, DT_DIR))
+ return 0;
+ ctx->pos++;
+ }

return 0;
}

Attachment: pgpcUAaRgH8z4.pgp
Description: OpenPGP digital signature