Re: iov_iter_pipe warning.

From: Dave Jones
Date: Fri Apr 28 2017 - 12:50:34 EST


On Fri, Apr 28, 2017 at 05:43:13PM +0100, Al Viro wrote:
> On Fri, Apr 28, 2017 at 11:29:55AM -0400, Dave Jones wrote:
> > On Fri, Apr 21, 2017 at 06:54:30PM +0100, Al Viro wrote:
> > > On Wed, Apr 12, 2017 at 03:03:18PM -0400, Dave Jones wrote:
> > >
> > > > Well it's been running an hour without incident, which looks promising.
> > > > I'll leave it run, but I'd say you're on the right track given how quick
> > > > it reproduced so far.
> > >
> > > Could you try this and see if it works? What happens is that unlike
> > > e.g. generic_file_read_iter/generic_file_write_iter, NFS O_DIRECT handling
> > > does not make sure that iov_iter had been advanced by the amount
> > > actually transferred - it is left advanced by the amount *requested*.
> >
> > Crap. So I never ran it long enough it seems. I can still hit that trace.
> > I re-added one of your earlier debug patches, and got this..
>
> Could you send me the diff against something from mainline (identified by
> sha1, ideally)?

currently running v4.11-rc8-75-gf83246089ca0

sunrpc bit is for the other unrelated problem I'm chasing.

note also, I saw the backtrace without the fs/splice.c changes.



diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index aab32fc3d6a8..c1b5fed7c863 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -537,7 +537,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,

if (put_dreq(dreq))
nfs_direct_complete(dreq);
- return 0;
+ return requested_bytes;
}

/**
@@ -566,7 +566,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
struct inode *inode = mapping->host;
struct nfs_direct_req *dreq;
struct nfs_lock_context *l_ctx;
- ssize_t result = -EINVAL;
+ ssize_t result = -EINVAL, requested;
size_t count = iov_iter_count(iter);
nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);

@@ -600,14 +600,19 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
nfs_start_io_direct(inode);

NFS_I(inode)->read_io += count;
- result = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos);
+ requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos);

nfs_end_io_direct(inode);

- if (!result) {
+ if (requested > 0) {
result = nfs_direct_wait(dreq);
- if (result > 0)
+ if (result > 0) {
+ requested -= result;
iocb->ki_pos += result;
+ }
+ iov_iter_revert(iter, requested);
+ } else {
+ result = requested;
}

out_release:
@@ -954,7 +959,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,

if (put_dreq(dreq))
nfs_direct_write_complete(dreq);
- return 0;
+ return requested_bytes;
}

/**
@@ -979,7 +984,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
*/
ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
{
- ssize_t result = -EINVAL;
+ ssize_t result = -EINVAL, requested;
size_t count;
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
@@ -1022,7 +1027,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)

nfs_start_io_direct(inode);

- result = nfs_direct_write_schedule_iovec(dreq, iter, pos);
+ requested = nfs_direct_write_schedule_iovec(dreq, iter, pos);

if (mapping->nrpages) {
invalidate_inode_pages2_range(mapping,
@@ -1031,13 +1036,17 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)

nfs_end_io_direct(inode);

- if (!result) {
+ if (requested > 0) {
result = nfs_direct_wait(dreq);
if (result > 0) {
+ requested -= result;
iocb->ki_pos = pos + result;
/* XXX: should check the generic_write_sync retval */
generic_write_sync(iocb, result);
}
+ iov_iter_revert(iter, requested);
+ } else {
+ result = requested;
}
out_release:
nfs_direct_req_release(dreq);
diff --git a/fs/splice.c b/fs/splice.c
index 006ba50f4ece..0e67ddf8618d 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -284,6 +284,43 @@ void splice_shrink_spd(struct splice_pipe_desc *spd)
kfree(spd->partial);
}

+static bool test_it(struct pipe_inode_info *pipe, size_t len, long ret)
+{
+ int idx = pipe->curbuf;
+ int n = pipe->nrbufs;
+ size_t size = 0;
+ while (n--) {
+ size += pipe->bufs[idx++].len;
+ if (idx == pipe->buffers)
+ idx = 0;
+ }
+ if (WARN_ON(size != ret)) {
+ char c = '[';
+ printk(KERN_ERR "asked to read %zu, claims to have read %ld",
+ len, ret);
+ printk(KERN_CONT "actual size of data in pipe %zd ", size);
+ for (n = pipe->nrbufs, idx = pipe->curbuf; n--; ) {
+ printk(KERN_CONT "%c%d:%u", c, idx,
+ pipe->bufs[idx].len);
+ c = ',';
+ if (++idx == pipe->buffers)
+ idx = 0;
+ }
+ if (c != '[')
+ printk(KERN_CONT "]");
+ return true;
+ }
+ return false;
+}
+
+static inline bool insane_splice_read(struct pipe_inode_info *pipe,
+ size_t len, long ret)
+{
+ if (ret <= 0 || pipe != current->splice_pipe)
+ return false;
+ return test_it(pipe, len, ret);
+}
+
/**
* generic_file_splice_read - splice data from file to a pipe
* @in: file to splice from
@@ -311,8 +348,14 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
kiocb.ki_pos = *ppos;
ret = call_read_iter(in, &kiocb, &to);
if (ret > 0) {
- *ppos = kiocb.ki_pos;
file_accessed(in);
+ if (unlikely(insane_splice_read(pipe, len, ret))) {
+ printk(KERN_ERR "f_op: %p, f_flags: %d, pos: %lld/%lld, size: %lld",
+ in->f_op, in->f_flags, (long long)*ppos,
+ (long long)kiocb.ki_pos,
+ (long long)i_size_read(file_inode(in)));
+ }
+ *ppos = kiocb.ki_pos;
} else if (ret < 0) {
to.idx = idx;
to.iov_offset = 0;
@@ -394,7 +437,7 @@ static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
struct page **pages;
unsigned int nr_pages;
size_t offset, dummy, copied = 0;
- ssize_t res;
+ ssize_t res, old_len = len;
int i;

if (pipe->nrbufs == pipe->buffers)
@@ -448,6 +491,7 @@ static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
put_page(pages[i]);
kvfree(pages);
iov_iter_advance(&to, copied); /* truncates and discards */
+ insane_splice_read(pipe, old_len, res);
return res;
}

@@ -970,6 +1014,11 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
while (len) {
size_t read_len;
loff_t pos = sd->pos, prev_pos = pos;
+ if (WARN_ON(pipe->nrbufs)) {
+ printk(KERN_ERR "in->f_op = %p, ->splice_write = %p\n",
+ in->f_op,
+ sd->u.file->f_op->splice_write);
+ }

ret = do_splice_to(in, &pos, pipe, len, flags);
if (unlikely(ret <= 0))
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 7bfe1fb42add..eb5297573a8a 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -927,6 +927,15 @@ int svc_send(struct svc_rqst *rqstp)
if (!xprt)
goto out;

+ if (WARN_ON(rqstp->rq_res.head[0].iov_len +
+ rqstp->rq_res.page_len +
+ rqstp->rq_res.tail[0].iov_len > rqstp->rq_reserved)) {
+
+ printk("dbg: rqstp->rq_res.head[0].iov_len:%ld\n", rqstp->rq_res.head[0].iov_len);
+ printk("dbg: rqstp->rq_res.page_len:%d\n", rqstp->rq_res.page_len);
+ printk("dbg: rqstp->rq_reserved:%d\n", rqstp->rq_reserved);
+ }
+
/* release the receive skb before sending the reply */
rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp);

@@ -936,6 +945,8 @@ int svc_send(struct svc_rqst *rqstp)
xb->page_len +
xb->tail[0].iov_len;

+ WARN_ON(xb->len > rqstp->rq_reserved);
+
/* Grab mutex to serialize outgoing data. */
mutex_lock(&xprt->xpt_mutex);
if (test_bit(XPT_DEAD, &xprt->xpt_flags)
@@ -944,6 +955,7 @@ int svc_send(struct svc_rqst *rqstp)
else
len = xprt->xpt_ops->xpo_sendto(rqstp);
mutex_unlock(&xprt->xpt_mutex);
+
rpc_wake_up(&xprt->xpt_bc_pending);
svc_xprt_release(rqstp);