Re: [PATCH] fuse: implement cuse mmap

From: Luca Risolia
Date: Tue Jan 05 2016 - 15:35:52 EST


Hi Andrew,

I was wondering if there's a chance to have this patch merged anytime soon. Note that FUSE maintainer left off sometime ago and there's no one taking care of kernel patches at the moment.

Please let Jader and me know if there are any problems.

Thanks

Jader H. Silva wrote:
Implement cuse mmap using shmem to provide the actual memory maps.
Pages must be read/written using fuse's NOTIFY_RETRIEVE and NOTIFY_STORE api.

Signed-off-by: Jader H. Silva <jaderhs5@xxxxxxxxx>
---
fs/fuse/cuse.c | 459 +++++++++++++++++++++++++++++++++++++++++++++-
fs/fuse/dev.c | 163 +---------------
fs/fuse/fuse_i.h | 34 +++-
fs/fuse/inode.c | 166 ++++++++++++++++-
include/uapi/linux/fuse.h | 26 +++
5 files changed, 688 insertions(+), 160 deletions(-)

diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index eae2c11..7749c13 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -48,6 +48,9 @@
#include <linux/stat.h>
#include <linux/module.h>
#include <linux/uio.h>
+#include <linux/mman.h>
+#include <linux/falloc.h>
+#include <linux/shmem_fs.h>

#include "fuse_i.h"

@@ -175,6 +178,441 @@ static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd,
return fuse_do_ioctl(file, cmd, arg, flags);
}

+struct fuse_dmmap_region {
+ u64 mapid;
+ u64 size;
+ struct file *filp;
+ struct vm_operations_struct vm_ops;
+ const struct vm_operations_struct *vm_original_ops;
+ struct list_head list;
+ atomic_t ref;
+};
+
+/*
+ * fuse_dmmap_vm represents the result of a single mmap() call, which
+ * can be shared by multiple client vmas created by forking.
+ */
+struct fuse_dmmap_vm {
+ u64 len;
+ u64 off;
+ atomic_t open_count;
+ struct fuse_dmmap_region *region;
+};
+
+static void fuse_dmmap_region_put(struct fuse_conn *fc,
+ struct fuse_dmmap_region *fdr)
+{
+ if (atomic_dec_and_lock(&fdr->ref, &fc->lock)) {
+
+ list_del(&fdr->list);
+
+ spin_unlock(&fc->lock);
+
+ fput(fdr->filp);
+ kfree(fdr);
+ }
+}
+
+static void fuse_dmmap_vm_open(struct vm_area_struct *vma)
+{
+ struct fuse_dmmap_vm *fdvm = vma->vm_private_data;
+ struct fuse_dmmap_region *fdr = fdvm->region;
+
+ /* vma copied */
+ atomic_inc(&fdvm->open_count);
+
+ if (fdr->vm_original_ops->open)
+ fdr->vm_original_ops->open(vma);
+}
+
+static void fuse_dmmap_vm_close(struct vm_area_struct *vma)
+{
+ struct fuse_dmmap_vm *fdvm = vma->vm_private_data;
+ struct fuse_dmmap_region *fdr = fdvm->region;
+ struct fuse_file *ff = vma->vm_file->private_data;
+ struct fuse_conn *fc = ff->fc;
+ struct fuse_req *req;
+ struct fuse_munmap_in *inarg;
+
+ if (fdr->vm_original_ops->close)
+ fdr->vm_original_ops->close(vma);
+
+ if (!atomic_dec_and_test(&fdvm->open_count))
+ return;
+
+ /*
+ * Notify server that the mmap region has been unmapped.
+ * Failing this might lead to resource leak in server, don't
+ * fail.
+ */
+ req = fuse_get_req_nofail_nopages(fc, vma->vm_file);
+ inarg = &req->misc.munmap_in;
+
+ inarg->fh = ff->fh;
+ inarg->mapid = fdvm->region->mapid;
+ inarg->size = fdvm->len;
+ inarg->offset = fdvm->off;
+
+ req->in.h.opcode = FUSE_MUNMAP;
+ req->in.h.nodeid = ff->nodeid;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(*inarg);
+ req->in.args[0].value = inarg;
+
+ fuse_request_send(fc, req);
+ fuse_put_request(fc, req);
+ fuse_dmmap_region_put(fc, fdvm->region);
+ kfree(fdvm);
+}
+
+static int fuse_dmmap_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ int ret;
+ struct file *filp = vma->vm_file;
+ struct fuse_dmmap_vm *fdvm = vma->vm_private_data;
+ struct fuse_dmmap_region *fdr = fdvm->region;
+
+ vma->vm_file = fdr->filp;
+ ret = fdr->vm_original_ops->fault(vma, vmf);
+
+ vma->vm_file = filp;
+
+ return ret;
+}
+
+static const struct vm_operations_struct fuse_dmmap_vm_ops = {
+ .open = fuse_dmmap_vm_open,
+ .close = fuse_dmmap_vm_close,
+ .fault = fuse_dmmap_vm_fault,
+};
+
+static struct fuse_dmmap_region *fuse_dmmap_find_locked(struct fuse_conn *fc,
+ u64 mapid)
+{
+ struct fuse_dmmap_region *curr;
+ struct fuse_dmmap_region *fdr = NULL;
+
+ list_for_each_entry(curr, &fc->dmmap_list, list) {
+ if (curr->mapid == mapid) {
+ fdr = curr;
+ atomic_inc(&fdr->ref);
+ break;
+ }
+ }
+
+ return fdr;
+}
+
+static struct fuse_dmmap_region *fuse_dmmap_find(struct fuse_conn *fc,
+ u64 mapid)
+{
+ struct fuse_dmmap_region *fdr;
+
+ spin_lock(&fc->lock);
+ fdr = fuse_dmmap_find_locked(fc, mapid);
+ spin_unlock(&fc->lock);
+
+ return fdr;
+}
+
+static struct fuse_dmmap_region *fuse_dmmap_get(struct fuse_conn *fc,
+ struct file *file, u64 mapid,
+ u64 size, unsigned long flags)
+{
+ struct fuse_dmmap_region *fdr;
+ char *pathbuf, *filepath;
+ struct file *shmem_file;
+
+ fdr = fuse_dmmap_find(fc, mapid);
+ if (!fdr) {
+ struct fuse_dmmap_region *tmp;
+
+ fdr = kzalloc(sizeof(struct fuse_dmmap_region), GFP_KERNEL);
+ if (!fdr)
+ return ERR_PTR(-ENOMEM);
+
+ atomic_set(&fdr->ref, 1);
+
+ pathbuf = kzalloc(PATH_MAX+1, GFP_KERNEL);
+ if (!pathbuf) {
+ kfree(fdr);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ filepath = d_path(&file->f_path, pathbuf, PATH_MAX+1);
+ if (IS_ERR(filepath)) {
+ kfree(fdr);
+ kfree(pathbuf);
+ return (struct fuse_dmmap_region *) filepath;
+ }
+
+ fdr->mapid = mapid;
+ shmem_file = shmem_file_setup(filepath, size, flags);
+ kfree(pathbuf);
+
+ if (IS_ERR(shmem_file)) {
+ kfree(fdr);
+ return (struct fuse_dmmap_region *) shmem_file;
+ }
+
+ fdr->filp = shmem_file;
+
+ spin_lock(&fc->lock);
+ tmp = fuse_dmmap_find_locked(fc, mapid);
+ if (tmp) {
+ fput(fdr->filp);
+ kfree(fdr);
+ fdr = tmp;
+ } else {
+ INIT_LIST_HEAD(&fdr->list);
+ list_add(&fdr->list, &fc->dmmap_list);
+ }
+ spin_unlock(&fc->lock);
+ }
+
+ if (size > fdr->size) {
+
+ fdr->filp->f_op->fallocate(fdr->filp, 0, 0, size);
+ fdr->size = size;
+ }
+
+ return fdr;
+}
+
+static int cuse_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ int err;
+ struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = ff->fc;
+ struct fuse_dmmap_vm *fdvm;
+ struct fuse_dmmap_region *fdr;
+ struct fuse_req *req = NULL;
+ struct fuse_mmap_in inarg;
+ struct fuse_mmap_out outarg;
+
+ if (fc->no_dmmap)
+ return -ENOSYS;
+
+ req = fuse_get_req(fc, 0);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ /* ask server whether this mmap is okay and what the size should be */
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.fh = ff->fh;
+ inarg.addr = vma->vm_start;
+ inarg.len = vma->vm_end - vma->vm_start;
+ inarg.prot = ((vma->vm_flags & VM_READ) ? PROT_READ : 0) |
+ ((vma->vm_flags & VM_WRITE) ? PROT_WRITE : 0) |
+ ((vma->vm_flags & VM_EXEC) ? PROT_EXEC : 0);
+ inarg.flags = ((vma->vm_flags & VM_SHARED) ? MAP_SHARED : 0 ) |
+ ((vma->vm_flags & VM_GROWSDOWN) ? MAP_GROWSDOWN : 0) |
+ ((vma->vm_flags & VM_DENYWRITE) ? MAP_DENYWRITE : 0) |
+ ((vma->vm_flags & VM_EXEC) ? MAP_EXECUTABLE : 0) |
+ ((vma->vm_flags & VM_LOCKED) ? MAP_LOCKED : 0);
+ inarg.offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
+
+ req->in.h.opcode = FUSE_MMAP;
+ req->in.h.nodeid = ff->nodeid;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->out.numargs = 1;
+ req->out.args[0].size = sizeof(outarg);
+ req->out.args[0].value = &outarg;
+
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ if (err) {
+ if (err == -ENOSYS)
+ fc->no_dmmap = 1;
+ goto free_req;
+ }
+
+ fdr = fuse_dmmap_get(fc, file, outarg.mapid, outarg.size,
+ vma->vm_flags);
+ err = PTR_ERR(fdr);
+ if (IS_ERR(fdr))
+ goto free_req;
+
+ err = -ENOMEM;
+
+ fdvm = kzalloc(sizeof(*fdvm), GFP_KERNEL);
+ if (!fdvm) {
+ fuse_dmmap_region_put(fc, fdr);
+ goto free_req;
+ }
+
+ atomic_set(&fdvm->open_count, 1);
+ fdvm->region = fdr;
+ fdvm->len = inarg.len;
+ fdvm->off = inarg.offset;
+
+ fdr->filp->f_op->mmap(fdr->filp, vma);
+
+ memcpy(&fdr->vm_ops, vma->vm_ops, sizeof(fdr->vm_ops));
+ fdr->vm_ops.open = fuse_dmmap_vm_ops.open;
+ fdr->vm_ops.close = fuse_dmmap_vm_ops.close;
+ fdr->vm_ops.fault = fuse_dmmap_vm_ops.fault;
+
+ fdr->vm_original_ops = vma->vm_ops;
+
+ vma->vm_ops = &fdr->vm_ops;
+
+ vma->vm_private_data = fdvm;
+ vma->vm_flags |= VM_DONTEXPAND; /* disallow expansion for now */
+ err = 0;
+
+free_req:
+ fuse_put_request(fc, req);
+ return err;
+}
+
+static int fuse_notify_store_to_dmmap(struct fuse_conn *fc,
+ struct fuse_copy_state *cs,
+ u64 nodeid, u32 size, u64 pos)
+{
+ struct fuse_dmmap_region *fdr;
+ struct file *filp;
+ pgoff_t index;
+ unsigned int off;
+ int err;
+
+ fdr = fuse_dmmap_find(fc, nodeid);
+ if (!fdr)
+ return -ENOENT;
+
+ index = pos >> PAGE_SHIFT;
+ off = pos & ~PAGE_MASK;
+ if (pos > fdr->size)
+ size = 0;
+ else if (size > fdr->size - pos)
+ size = fdr->size - pos;
+
+ filp = fdr->filp;
+
+ while (size) {
+ struct page *page;
+ unsigned int this_num;
+
+ page = shmem_read_mapping_page_gfp(filp->f_inode->i_mapping,
+ index, GFP_HIGHUSER);
+ if (IS_ERR(page)) {
+
+ err = -ENOMEM;
+ goto out_iput;
+ }
+
+ this_num = min_t(unsigned, size, PAGE_SIZE - off);
+ err = fuse_copy_page(cs, &page, off, this_num, 0);
+
+ unlock_page(page);
+ page_cache_release(page);
+
+ if (err)
+ goto out_iput;
+
+ size -= this_num;
+ off = 0;
+ index++;
+ }
+
+ err = 0;
+
+out_iput:
+ fuse_dmmap_region_put(fc, fdr);
+
+ return err;
+}
+
+static void fuse_retrieve_dmmap_end(struct fuse_conn *fc, struct fuse_req *req)
+{
+ release_pages(req->pages, req->num_pages, 0);
+}
+
+static int fuse_notify_retrieve_from_dmmap(struct fuse_conn *fc,
+ struct fuse_notify_retrieve_out *outarg)
+{
+ struct fuse_dmmap_region *fdr;
+ struct fuse_req *req;
+ struct page *page;
+ struct file *filp;
+ pgoff_t index;
+ unsigned int num;
+ unsigned int offset;
+ unsigned int npages;
+ unsigned int this_num;
+ size_t total_len = 0;
+ int err;
+
+ fdr = fuse_dmmap_find(fc, outarg->nodeid);
+ if (!fdr)
+ return -ENOENT;
+
+ npages = outarg->size >> PAGE_SHIFT;
+ if (outarg->size & ~PAGE_MASK)
+ npages++;
+
+ req = fuse_get_req(fc, npages);
+ err = PTR_ERR(req);
+ if (IS_ERR(req))
+ goto out_put_region;
+
+ offset = outarg->offset & ~PAGE_MASK;
+
+ req->in.h.opcode = FUSE_NOTIFY_REPLY;
+ req->in.h.nodeid = outarg->nodeid;
+ req->in.numargs = 2;
+ req->in.argpages = 1;
+ req->end = fuse_retrieve_dmmap_end;
+
+ index = outarg->offset >> PAGE_SHIFT;
+ num = outarg->size;
+ if (outarg->offset > fdr->size)
+ num = 0;
+ else if (outarg->offset + num > fdr->size)
+ num = fdr->size - outarg->offset;
+
+ filp = fdr->filp;
+
+ npages = 0;
+ while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
+
+ page = shmem_read_mapping_page_gfp(filp->f_inode->i_mapping,
+ index,
+ GFP_KERNEL);
+ if (IS_ERR(page)) {
+ err = -ENOMEM;
+ goto out_put_region;
+ }
+
+ this_num = min_t(unsigned, num, PAGE_SIZE - offset);
+ req->pages[req->num_pages] = page;
+ req->page_descs[req->num_pages].length = this_num;
+ req->num_pages++;
+
+ num -= this_num;
+ total_len += this_num;
+ index++;
+ npages++;
+ }
+ req->misc.retrieve_in.offset = outarg->offset;
+ req->misc.retrieve_in.size = total_len;
+ req->in.args[0].size = sizeof(req->misc.retrieve_in);
+ req->in.args[0].value = &req->misc.retrieve_in;
+ req->in.args[1].size = total_len;
+
+ err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
+ if (err)
+ fuse_retrieve_dmmap_end(fc, req);
+
+out_put_region:
+ fuse_dmmap_region_put(fc, fdr);
+
+ return err;
+}
+
+
static const struct file_operations cuse_frontend_fops = {
.owner = THIS_MODULE,
.read_iter = cuse_read_iter,
@@ -184,7 +622,8 @@ static const struct file_operations cuse_frontend_fops = {
.unlocked_ioctl = cuse_file_ioctl,
.compat_ioctl = cuse_file_compat_ioctl,
.poll = fuse_file_poll,
- .llseek = noop_llseek,
+ .llseek = noop_llseek,
+ .mmap = cuse_mmap,
};


@@ -468,10 +907,26 @@ err:

static void cuse_fc_release(struct fuse_conn *fc)
{
+ struct fuse_dmmap_region *fdr;
struct cuse_conn *cc = fc_to_cc(fc);
+
+ spin_lock(&fc->lock);
+ while (!list_empty(&fc->dmmap_list)) {
+
+ fdr = list_entry(fc->dmmap_list.next, typeof(*fdr), list);
+ fuse_dmmap_region_put(fc, fdr);
+ }
+ spin_unlock(&fc->lock);
+
kfree_rcu(cc, fc.rcu);
}

+static const struct fuse_conn_operations cuse_ops = {
+ .release = cuse_fc_release,
+ .notify_store = fuse_notify_store_to_dmmap,
+ .notify_retrieve = fuse_notify_retrieve_from_dmmap,
+};
+
/**
* cuse_channel_open - open method for /dev/cuse
* @inode: inode for /dev/cuse
@@ -507,7 +962,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
}

INIT_LIST_HEAD(&cc->list);
- cc->fc.release = cuse_fc_release;
+ cc->fc.ops = &cuse_ops;

cc->fc.initialized = 1;
rc = cuse_send_init(cc);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 80cc1b3..0faf92c 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -279,6 +279,7 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
__clear_bit(FR_BACKGROUND, &req->flags);
return req;
}
+EXPORT_SYMBOL_GPL(fuse_get_req_nofail_nopages);

void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
{
@@ -617,8 +618,8 @@ void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
}
EXPORT_SYMBOL_GPL(fuse_request_send_background);

-static int fuse_request_send_notify_reply(struct fuse_conn *fc,
- struct fuse_req *req, u64 unique)
+int fuse_request_send_notify_reply(struct fuse_conn *fc,
+ struct fuse_req *req, u64 unique)
{
int err = -ENODEV;
struct fuse_iqueue *fiq = &fc->iq;
@@ -674,6 +675,7 @@ static int lock_request(struct fuse_req *req)
}
return err;
}
+EXPORT_SYMBOL_GPL(fuse_request_send_notify_reply);

/*
* Unlock request. If it was aborted while locked, caller is responsible
@@ -967,8 +969,8 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
* Copy a page in the request to/from the userspace buffer. Must be
* done atomically
*/
-static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
- unsigned offset, unsigned count, int zeroing)
+int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
+ unsigned offset, unsigned count, int zeroing)
{
int err;
struct page *page = *pagep;
@@ -1003,6 +1005,7 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
flush_dcache_page(page);
return 0;
}
+EXPORT_SYMBOL_GPL(fuse_copy_page);

/* Copy pages in the request to/from userspace buffer */
static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
@@ -1597,15 +1600,7 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
struct fuse_copy_state *cs)
{
struct fuse_notify_store_out outarg;
- struct inode *inode;
- struct address_space *mapping;
- u64 nodeid;
int err;
- pgoff_t index;
- unsigned int offset;
- unsigned int num;
- loff_t file_size;
- loff_t end;

err = -EINVAL;
if (size < sizeof(outarg))
@@ -1619,145 +1614,18 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
if (size - sizeof(outarg) != outarg.size)
goto out_finish;

- nodeid = outarg.nodeid;
+ err = fc->ops->notify_store(fc, cs, outarg.nodeid, outarg.size,
+ outarg.offset);

- down_read(&fc->killsb);
-
- err = -ENOENT;
- if (!fc->sb)
- goto out_up_killsb;
-
- inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
- if (!inode)
- goto out_up_killsb;
-
- mapping = inode->i_mapping;
- index = outarg.offset >> PAGE_CACHE_SHIFT;
- offset = outarg.offset & ~PAGE_CACHE_MASK;
- file_size = i_size_read(inode);
- end = outarg.offset + outarg.size;
- if (end > file_size) {
- file_size = end;
- fuse_write_update_size(inode, file_size);
- }
-
- num = outarg.size;
- while (num) {
- struct page *page;
- unsigned int this_num;
-
- err = -ENOMEM;
- page = find_or_create_page(mapping, index,
- mapping_gfp_mask(mapping));
- if (!page)
- goto out_iput;
-
- this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
- err = fuse_copy_page(cs, &page, offset, this_num, 0);
- if (!err && offset == 0 &&
- (this_num == PAGE_CACHE_SIZE || file_size == end))
- SetPageUptodate(page);
- unlock_page(page);
- page_cache_release(page);
-
- if (err)
- goto out_iput;
-
- num -= this_num;
- offset = 0;
- index++;
- }
-
- err = 0;
-
-out_iput:
- iput(inode);
-out_up_killsb:
- up_read(&fc->killsb);
out_finish:
fuse_copy_finish(cs);
return err;
}

-static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
-{
- release_pages(req->pages, req->num_pages, false);
-}
-
-static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
- struct fuse_notify_retrieve_out *outarg)
-{
- int err;
- struct address_space *mapping = inode->i_mapping;
- struct fuse_req *req;
- pgoff_t index;
- loff_t file_size;
- unsigned int num;
- unsigned int offset;
- size_t total_len = 0;
- int num_pages;
-
- offset = outarg->offset & ~PAGE_CACHE_MASK;
- file_size = i_size_read(inode);
-
- num = outarg->size;
- if (outarg->offset > file_size)
- num = 0;
- else if (outarg->offset + num > file_size)
- num = file_size - outarg->offset;
-
- num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
- num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ);
-
- req = fuse_get_req(fc, num_pages);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- req->in.h.opcode = FUSE_NOTIFY_REPLY;
- req->in.h.nodeid = outarg->nodeid;
- req->in.numargs = 2;
- req->in.argpages = 1;
- req->page_descs[0].offset = offset;
- req->end = fuse_retrieve_end;
-
- index = outarg->offset >> PAGE_CACHE_SHIFT;
-
- while (num && req->num_pages < num_pages) {
- struct page *page;
- unsigned int this_num;
-
- page = find_get_page(mapping, index);
- if (!page)
- break;
-
- this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
- req->pages[req->num_pages] = page;
- req->page_descs[req->num_pages].length = this_num;
- req->num_pages++;
-
- offset = 0;
- num -= this_num;
- total_len += this_num;
- index++;
- }
- req->misc.retrieve_in.offset = outarg->offset;
- req->misc.retrieve_in.size = total_len;
- req->in.args[0].size = sizeof(req->misc.retrieve_in);
- req->in.args[0].value = &req->misc.retrieve_in;
- req->in.args[1].size = total_len;
-
- err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
- if (err)
- fuse_retrieve_end(fc, req);
-
- return err;
-}
-
static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
struct fuse_copy_state *cs)
{
struct fuse_notify_retrieve_out outarg;
- struct inode *inode;
int err;

err = -EINVAL;
@@ -1770,18 +1638,7 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,

fuse_copy_finish(cs);

- down_read(&fc->killsb);
- err = -ENOENT;
- if (fc->sb) {
- u64 nodeid = outarg.nodeid;
-
- inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
- if (inode) {
- err = fuse_retrieve(fc, inode, &outarg);
- iput(inode);
- }
- }
- up_read(&fc->killsb);
+ err = fc->ops->notify_retrieve(fc, &outarg);

return err;

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 4051131..a56222b 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -337,6 +337,7 @@ struct fuse_req {
struct fuse_req *next;
} write;
struct fuse_notify_retrieve_in retrieve_in;
+ struct fuse_munmap_in munmap_in;
} misc;

/** page vector */
@@ -431,6 +432,21 @@ struct fuse_dev {
struct list_head entry;
};

+struct fuse_copy_state;
+
+struct fuse_conn_operations {
+ /** Called on final put */
+ void (*release)(struct fuse_conn *);
+
+ /** Called to store data into a mapping */
+ int (*notify_store)(struct fuse_conn *, struct fuse_copy_state *,
+ u64 nodeid, u32 size, u64 pos);
+
+ /** Called to retrieve data from a mapping */
+ int (*notify_retrieve)(struct fuse_conn *,
+ struct fuse_notify_retrieve_out *);
+};
+
/**
* A Fuse connection.
*
@@ -578,6 +594,9 @@ struct fuse_conn {
/** Is poll not implemented by fs? */
unsigned no_poll:1;

+ /** Is direct mmap not implemente by fs? */
+ unsigned no_dmmap:1;
+
/** Do multi-page cached writes */
unsigned big_writes:1;

@@ -635,9 +654,6 @@ struct fuse_conn {
/** Version counter for attribute changes */
u64 attr_version;

- /** Called on final put */
- void (*release)(struct fuse_conn *);
-
/** Super block for this connection. */
struct super_block *sb;

@@ -646,6 +662,12 @@ struct fuse_conn {

/** List of device instances belonging to this connection */
struct list_head devices;
+
+ /** List of direct mmaps (currently CUSE only) */
+ struct list_head dmmap_list;
+
+ /** Operations that fuse and cuse can implement differently */
+ const struct fuse_conn_operations *ops;
};

static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
@@ -944,4 +966,10 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,

void fuse_set_initialized(struct fuse_conn *fc);

+int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
+ unsigned offset, unsigned count, int zeroing);
+
+int fuse_request_send_notify_reply(struct fuse_conn *fc,
+ struct fuse_req *req, u64 unique);
+
#endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index ac81f48..5284b84 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -609,6 +609,7 @@ void fuse_conn_init(struct fuse_conn *fc)
fc->connected = 1;
fc->attr_version = 1;
get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
+ INIT_LIST_HEAD(&fc->dmmap_list);
}
EXPORT_SYMBOL_GPL(fuse_conn_init);

@@ -617,7 +618,7 @@ void fuse_conn_put(struct fuse_conn *fc)
if (atomic_dec_and_test(&fc->count)) {
if (fc->destroy_req)
fuse_request_free(fc->destroy_req);
- fc->release(fc);
+ fc->ops->release(fc);
}
}
EXPORT_SYMBOL_GPL(fuse_conn_put);
@@ -1025,6 +1026,167 @@ void fuse_dev_free(struct fuse_dev *fud)
}
EXPORT_SYMBOL_GPL(fuse_dev_free);

+static int fuse_notify_store_to_inode(struct fuse_conn *fc,
+ struct fuse_copy_state *cs,
+ u64 nodeid, u32 size, u64 pos)
+{
+ struct inode *inode;
+ struct address_space *mapping;
+ pgoff_t index;
+ unsigned int off;
+ loff_t file_size;
+ loff_t end;
+ int err;
+
+ down_read(&fc->killsb);
+
+ err = -ENOENT;
+ if (!fc->sb)
+ goto out_up_killsb;
+
+ inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
+ if (!inode)
+ goto out_up_killsb;
+
+ mapping = inode->i_mapping;
+ index = pos >> PAGE_CACHE_SHIFT;
+ off = pos & ~PAGE_CACHE_MASK;
+ file_size = i_size_read(inode);
+ end = pos + size;
+ if (end > file_size) {
+ file_size = end;
+ fuse_write_update_size(inode, file_size);
+ }
+
+ while (size) {
+ struct page *page;
+ unsigned int this_num;
+
+ err = -ENOMEM;
+ page = find_or_create_page(mapping, index,
+ mapping_gfp_mask(mapping));
+ if (!page)
+ goto out_iput;
+
+ this_num = min_t(unsigned, size, PAGE_CACHE_SIZE - off);
+ err = fuse_copy_page(cs, &page, off, this_num, 0);
+ if (!err && off == 0 && (size != 0 || file_size == end))
+ SetPageUptodate(page);
+ unlock_page(page);
+ page_cache_release(page);
+
+ if (err)
+ goto out_iput;
+
+ size -= this_num;
+ off = 0;
+ index++;
+ }
+
+ err = 0;
+
+out_iput:
+ iput(inode);
+out_up_killsb:
+ up_read(&fc->killsb);
+
+ return err;
+}
+
+static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
+{
+ release_pages(req->pages, req->num_pages, 0);
+}
+
+static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
+ struct fuse_notify_retrieve_out *outarg)
+{
+ int err;
+ struct address_space *mapping = inode->i_mapping;
+ struct fuse_req *req;
+ pgoff_t index;
+ loff_t file_size;
+ unsigned int num;
+ unsigned int offset;
+ size_t total_len = 0;
+
+ req = fuse_get_req(fc, 0);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ offset = outarg->offset & ~PAGE_CACHE_MASK;
+
+ req->in.h.opcode = FUSE_NOTIFY_REPLY;
+ req->in.h.nodeid = outarg->nodeid;
+ req->in.numargs = 2;
+ req->in.argpages = 1;
+ req->end = fuse_retrieve_end;
+
+ index = outarg->offset >> PAGE_CACHE_SHIFT;
+ file_size = i_size_read(inode);
+ num = outarg->size;
+ if (outarg->offset > file_size)
+ num = 0;
+ else if (outarg->offset + num > file_size)
+ num = file_size - outarg->offset;
+
+ while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
+ struct page *page;
+ unsigned int this_num;
+
+ page = find_get_page(mapping, index);
+ if (!page)
+ break;
+
+ this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
+ req->pages[req->num_pages] = page;
+ req->num_pages++;
+
+ num -= this_num;
+ total_len += this_num;
+ index++;
+ }
+ req->misc.retrieve_in.offset = outarg->offset;
+ req->misc.retrieve_in.size = total_len;
+ req->in.args[0].size = sizeof(req->misc.retrieve_in);
+ req->in.args[0].value = &req->misc.retrieve_in;
+ req->in.args[1].size = total_len;
+
+ err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
+ if (err)
+ fuse_retrieve_end(fc, req);
+
+ return err;
+}
+
+static int fuse_notify_retrieve_from_inode(struct fuse_conn *fc,
+ struct fuse_notify_retrieve_out *outarg)
+{
+ struct inode *inode;
+ int err;
+
+ down_read(&fc->killsb);
+ err = -ENOENT;
+ if (fc->sb) {
+ u64 nodeid = outarg->nodeid;
+
+ inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
+ if (inode) {
+ err = fuse_retrieve(fc, inode, outarg);
+ iput(inode);
+ }
+ }
+ up_read(&fc->killsb);
+
+ return err;
+}
+
+static const struct fuse_conn_operations fuse_default_ops = {
+ .release = fuse_free_conn,
+ .notify_store = fuse_notify_store_to_inode,
+ .notify_retrieve = fuse_notify_retrieve_from_inode,
+};
+
static int fuse_fill_super(struct super_block *sb, void *data, int silent)
{
struct fuse_dev *fud;
@@ -1077,7 +1239,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
goto err_fput;

fuse_conn_init(fc);
- fc->release = fuse_free_conn;
+ fc->ops = &fuse_default_ops;

fud = fuse_dev_alloc(fc);
if (!fud)
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index c9aca04..3f4c54b 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -102,6 +102,7 @@
* - add ctime and ctimensec to fuse_setattr_in
* - add FUSE_RENAME2 request
* - add FUSE_NO_OPEN_SUPPORT flag
+ * - add FUSE_MMAP and FUSE_MUNMAP
*/

#ifndef _LINUX_FUSE_H
@@ -358,6 +359,8 @@ enum fuse_opcode {
FUSE_FALLOCATE = 43,
FUSE_READDIRPLUS = 44,
FUSE_RENAME2 = 45,
+ FUSE_MMAP = 46,
+ FUSE_MUNMAP = 47,

/* CUSE specific operations */
CUSE_INIT = 4096,
@@ -670,6 +673,29 @@ struct fuse_fallocate_in {
uint32_t padding;
};

+struct fuse_mmap_in {
+ __u64 fh;
+ __u64 addr;
+ __u64 len;
+ __u32 prot;
+ __u32 flags;
+ __u64 offset;
+};
+
+struct fuse_mmap_out {
+ __u64 mapid; /* Mmap ID, same namespace as Inode ID */
+ __u64 size; /* Size of memory region */
+ __u64 reserved;
+};
+
+struct fuse_munmap_in {
+ __u64 fh;
+ __u64 mapid;
+ __u64 size; /* Size of memory region */
+ __u64 offset;
+ __u64 reserved;
+};
+
struct fuse_in_header {
uint32_t len;
uint32_t opcode;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/