[RFC PATCH 44/76] cachefiles: Implement read and write parts of new I/O API

From: David Howells
Date: Fri Nov 20 2020 - 10:14:42 EST


Implement writing into the cache and reading back from the cache inside
cachefiles using asynchronous direct I/O from the specified iterator. The
size and position of the request should be aligned to the reported
dio_block_size.

Errors and completion are reported by callback.

Signed-off-by: David Howells <dhowells@xxxxxxxxxx>
---

fs/cachefiles/interface.c | 20 ++-
fs/cachefiles/internal.h | 2
fs/cachefiles/io.c | 270 ++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 277 insertions(+), 15 deletions(-)

diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 0e3d5b5ffc55..c14e2b4f5b24 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -12,8 +12,6 @@
#include "internal.h"

static int cachefiles_attr_changed(struct cachefiles_object *object);
-static void cachefiles_put_object(struct fscache_object *_object,
- enum fscache_obj_ref_trace why);

/*
* Allocate an object record for a cookie lookup and prepare the lookup data.
@@ -160,7 +158,8 @@ static void cachefiles_update_object(struct fscache_object *_object)
struct cachefiles_object *object;
struct cachefiles_cache *cache;
const struct cred *saved_cred;
- loff_t object_size;
+ struct inode *inode;
+ loff_t object_size, i_size;
int ret;

_enter("{OBJ%x}", _object->debug_id);
@@ -172,12 +171,14 @@ static void cachefiles_update_object(struct fscache_object *_object)
cachefiles_begin_secure(cache, &saved_cred);

object_size = object->fscache.cookie->object_size;
- if (i_size_read(d_inode(object->dentry)) > object_size) {
+ inode = d_inode(object->dentry);
+ i_size = i_size_read(inode);
+ if (i_size > object_size) {
struct path path = {
.mnt = cache->mnt,
.dentry = object->dentry
};
- _debug("trunc %llx -> %llx", i_size_read(d_inode(object->dentry)), object_size);
+ _debug("trunc %llx -> %llx", i_size, object_size);
ret = vfs_truncate(&path, object_size);
if (ret < 0) {
cachefiles_io_error_obj(object, "Trunc-to-size failed");
@@ -186,8 +187,9 @@ static void cachefiles_update_object(struct fscache_object *_object)
}

object_size = round_up(object_size, CACHEFILES_DIO_BLOCK_SIZE);
- _debug("trunc %llx -> %llx", i_size_read(d_inode(object->dentry)), object_size);
- if (i_size_read(d_inode(object->dentry)) < object_size) {
+ i_size = i_size_read(inode);
+ _debug("trunc %llx -> %llx", i_size, object_size);
+ if (i_size < object_size) {
ret = vfs_truncate(&path, object_size);
if (ret < 0) {
cachefiles_io_error_obj(object, "Trunc-to-dio-size failed");
@@ -283,8 +285,8 @@ static void cachefiles_drop_object(struct fscache_object *_object,
/*
* dispose of a reference to an object
*/
-static void cachefiles_put_object(struct fscache_object *_object,
- enum fscache_obj_ref_trace why)
+void cachefiles_put_object(struct fscache_object *_object,
+ enum fscache_obj_ref_trace why)
{
struct cachefiles_object *object;
struct fscache_cache *cache;
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index e9f45d5053b1..24a8aed2600d 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -150,6 +150,8 @@ extern int cachefiles_has_space(struct cachefiles_cache *cache,
extern const struct fscache_cache_ops cachefiles_cache_ops;
extern struct fscache_object *cachefiles_grab_object(struct fscache_object *_object,
enum fscache_obj_ref_trace why);
+extern void cachefiles_put_object(struct fscache_object *_object,
+ enum fscache_obj_ref_trace why);

/*
* io.c
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index bf1930699636..4c66b9183dd6 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -10,9 +10,52 @@
#include <linux/file.h>
#include <linux/uio.h>
#include <linux/xattr.h>
+#include <linux/sched/mm.h>
#include "internal.h"
#include <trace/events/fscache.h>

+struct cachefiles_kiocb {
+ struct kiocb iocb;
+ refcount_t ki_refcnt;
+ loff_t start;
+ union {
+ size_t skipped;
+ size_t len;
+ };
+ struct cachefiles_object *object;
+ fscache_io_terminated_t term_func;
+ void *term_func_priv;
+};
+
+static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki)
+{
+ if (refcount_dec_and_test(&ki->ki_refcnt)) {
+ cachefiles_put_object(&ki->object->fscache, fscache_obj_put_ioreq);
+ fput(ki->iocb.ki_filp);
+ kfree(ki);
+ }
+}
+
+/*
+ * Handle completion of a read from the cache.
+ */
+static void cachefiles_read_complete(struct kiocb *iocb, long ret, long ret2)
+{
+ struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
+
+ _enter("%ld,%ld", ret, ret2);
+
+ if (ki->term_func) {
+ if (ret < 0)
+ ki->term_func(ki->term_func_priv, ret);
+ else
+ ki->term_func(ki->term_func_priv, ki->skipped + ret);
+ }
+
+ fscache_uncount_io_operation(ki->object->fscache.cookie);
+ cachefiles_put_kiocb(ki);
+}
+
/*
* Initiate a read from the cache.
*/
@@ -23,11 +66,142 @@ int cachefiles_read(struct fscache_op_resources *opr,
fscache_io_terminated_t term_func,
void *term_func_priv)
{
- fscache_wait_for_operation(opr, FSCACHE_WANT_READ);
+ struct cachefiles_object *object =
+ container_of(opr->object, struct cachefiles_object, fscache);
+ struct cachefiles_kiocb *ki;
+ struct file *file;
+ unsigned int old_nofs;
+ ssize_t ret = -ENOBUFS;
+ size_t len = iov_iter_count(iter), skipped = 0;
+
+ spin_lock(&object->fscache.lock);
+ file = get_file(object->backing_file);
+ spin_unlock(&object->fscache.lock);
+
+ _enter("%pD,%li,%llx,%zx/%llx",
+ file, file_inode(file)->i_ino, start_pos, len,
+ i_size_read(file->f_inode));
+
+ __fscache_wait_for_operation(opr, FSCACHE_WANT_READ);
fscache_count_io_operation(opr->object->cookie);
+
+ /* If the caller asked us to seek for data before doing the read, then
+ * we should do that now. If we find a gap, we fill it with zeros.
+ */
+ if (seek_data) {
+ loff_t off = start_pos, off2;
+
+ off2 = vfs_llseek(file, off, SEEK_DATA);
+ if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO && off2 != -ENXIO) {
+ skipped = 0;
+ ret = off2;
+ goto presubmission_error;
+ }
+
+ if (off2 == -ENXIO || off2 >= start_pos + len) {
+ /* The region is beyond the EOF or there's no more data
+ * in the region, so clear the rest of the buffer and
+ * return success.
+ */
+ iov_iter_zero(len, iter);
+ skipped = len;
+ fscache_uncount_io_operation(object->fscache.cookie);
+ ret = 0;
+ goto presubmission_error;
+ }
+
+ skipped = off2 - off;
+ iov_iter_zero(skipped, iter);
+ }
+
+ ret = -ENOBUFS;
+ ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
+ if (!ki)
+ goto presubmission_error;
+
+ refcount_set(&ki->ki_refcnt, 2);
+ ki->iocb.ki_filp = file;
+ ki->iocb.ki_pos = start_pos + skipped;
+ ki->iocb.ki_flags = IOCB_DIRECT;
+ ki->iocb.ki_hint = ki_hint_validate(file_write_hint(file));
+ ki->iocb.ki_ioprio = get_current_ioprio();
+ ki->skipped = skipped;
+ ki->object = object;
+ ki->term_func = term_func;
+ ki->term_func_priv = term_func_priv;
+
+ if (ki->term_func)
+ ki->iocb.ki_complete = cachefiles_read_complete;
+
+ ret = rw_verify_area(READ, file, &ki->iocb.ki_pos, len - skipped);
+ if (ret < 0)
+ goto presubmission_error_free;
+
+ cachefiles_grab_object(&object->fscache, fscache_obj_get_ioreq);
+
+ old_nofs = memalloc_nofs_save();
+ ret = call_read_iter(file, &ki->iocb, iter);
+ memalloc_nofs_restore(old_nofs);
+ switch (ret) {
+ case -EIOCBQUEUED:
+ goto in_progress;
+
+ case -ERESTARTSYS:
+ case -ERESTARTNOINTR:
+ case -ERESTARTNOHAND:
+ case -ERESTART_RESTARTBLOCK:
+ /* There's no easy way to restart the syscall since other AIO's
+ * may be already running. Just fail this IO with EINTR.
+ */
+ ret = -EINTR;
+ fallthrough;
+ default:
+ cachefiles_read_complete(&ki->iocb, ret, 0);
+ if (ret > 0)
+ ret = 0;
+ break;
+ }
+
+in_progress:
+ cachefiles_put_kiocb(ki);
+ _leave(" = %zd", ret);
+ return ret;
+
+presubmission_error_free:
+ kfree(ki);
+presubmission_error:
+ fput(file);
if (term_func)
- term_func(term_func_priv, -ENODATA);
- return -ENODATA;
+ term_func(term_func_priv, ret < 0 ? ret : skipped);
+ return ret;
+}
+
+/*
+ * Handle completion of a write to the cache.
+ */
+static void cachefiles_write_complete(struct kiocb *iocb, long ret, long ret2)
+{
+ struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
+ struct inode *inode = file_inode(ki->iocb.ki_filp);
+
+ _enter("%ld,%ld", ret, ret2);
+
+ /* Tell lockdep we inherited freeze protection from submission thread */
+ __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
+ __sb_end_write(inode->i_sb, SB_FREEZE_WRITE);
+
+ if (ret < 0) {
+ if (ki->term_func)
+ ki->term_func(ki->term_func_priv, ret);
+ } else {
+ if (ret == ki->len)
+ cachefiles_mark_content_map(ki->object, ki->start, ki->len);
+ if (ki->term_func)
+ ki->term_func(ki->term_func_priv, ret);
+ }
+
+ fscache_uncount_io_operation(ki->object->fscache.cookie);
+ cachefiles_put_kiocb(ki);
}

/*
@@ -39,11 +213,95 @@ int cachefiles_write(struct fscache_op_resources *opr,
fscache_io_terminated_t term_func,
void *term_func_priv)
{
- fscache_wait_for_operation(opr, FSCACHE_WANT_WRITE);
+ struct cachefiles_object *object =
+ container_of(opr->object, struct cachefiles_object, fscache);
+ struct cachefiles_kiocb *ki;
+ struct inode *inode;
+ struct file *file;
+ unsigned int old_nofs;
+ ssize_t ret = -ENOBUFS;
+ size_t len = iov_iter_count(iter);
+
+ spin_lock(&object->fscache.lock);
+ file = get_file(object->backing_file);
+ spin_unlock(&object->fscache.lock);
+
+ _enter("%pD,%li,%llx,%zx/%llx",
+ file, file_inode(file)->i_ino, start_pos, len,
+ i_size_read(file->f_inode));
+
+ __fscache_wait_for_operation(opr, FSCACHE_WANT_WRITE);
fscache_count_io_operation(opr->object->cookie);
+
+ ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
+ if (!ki)
+ goto presubmission_error;
+
+ refcount_set(&ki->ki_refcnt, 2);
+ ki->iocb.ki_filp = file;
+ ki->iocb.ki_pos = start_pos;
+ ki->iocb.ki_flags = IOCB_DIRECT | IOCB_WRITE;
+ ki->iocb.ki_hint = ki_hint_validate(file_write_hint(file));
+ ki->iocb.ki_ioprio = get_current_ioprio();
+ ki->start = start_pos;
+ ki->len = len;
+ ki->object = object;
+ ki->term_func = term_func;
+ ki->term_func_priv = term_func_priv;
+
+ if (ki->term_func)
+ ki->iocb.ki_complete = cachefiles_write_complete;
+
+ ret = rw_verify_area(WRITE, file, &ki->iocb.ki_pos, iov_iter_count(iter));
+ if (ret < 0)
+ goto presubmission_error_free;
+
+ /* Open-code file_start_write here to grab freeze protection, which
+ * will be released by another thread in aio_complete_rw(). Fool
+ * lockdep by telling it the lock got released so that it doesn't
+ * complain about the held lock when we return to userspace.
+ */
+ inode = file_inode(file);
+ __sb_start_write(inode->i_sb, SB_FREEZE_WRITE);
+ __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
+
+ cachefiles_grab_object(&object->fscache, fscache_obj_get_ioreq);
+
+ old_nofs = memalloc_nofs_save();
+ ret = call_write_iter(file, &ki->iocb, iter);
+ memalloc_nofs_restore(old_nofs);
+ switch (ret) {
+ case -EIOCBQUEUED:
+ goto in_progress;
+
+ case -ERESTARTSYS:
+ case -ERESTARTNOINTR:
+ case -ERESTARTNOHAND:
+ case -ERESTART_RESTARTBLOCK:
+ /* There's no easy way to restart the syscall since other AIO's
+ * may be already running. Just fail this IO with EINTR.
+ */
+ ret = -EINTR;
+ /* Fall through */
+ default:
+ cachefiles_write_complete(&ki->iocb, ret, 0);
+ if (ret > 0)
+ ret = 0;
+ break;
+ }
+
+in_progress:
+ cachefiles_put_kiocb(ki);
+ _leave(" = %zd", ret);
+ return ret;
+
+presubmission_error_free:
+ kfree(ki);
+presubmission_error:
+ fput(file);
if (term_func)
- term_func(term_func_priv, -ENOBUFS);
- return -ENOBUFS;
+ term_func(term_func_priv, -ENOMEM);
+ return -ENOMEM;
}

/*