Re: [PATCH 02/11] mm: call import_iovec() instead of rw_copy_check_uvector() in process_vm_rw()
From: Al Viro
Date: Mon Sep 21 2020 - 11:02:29 EST
On Mon, Sep 21, 2020 at 04:34:25PM +0200, Christoph Hellwig wrote:
> From: David Laight <David.Laight@xxxxxxxxxx>
>
> This is the only direct call of rw_copy_check_uvector(). Removing it
> will allow rw_copy_check_uvector() to be inlined into import_iovec(),
> while only paying a minor price by setting up an otherwise unused
> iov_iter in the process_vm_readv/process_vm_writev syscalls that aren't
> in a super hot path.
> @@ -443,7 +443,7 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction,
> const struct iovec *iov, unsigned long nr_segs,
> size_t count)
> {
> - WARN_ON(direction & ~(READ | WRITE));
> + WARN_ON(direction & ~(READ | WRITE | CHECK_IOVEC_ONLY));
> direction &= READ | WRITE;
Ugh...
> - rc = rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV,
> - iovstack_r, &iov_r);
> + rc = import_iovec(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV, &iov_r,
> + &iter_r);
> if (rc <= 0)
> goto free_iovecs;
>
> - rc = process_vm_rw_core(pid, &iter, iov_r, riovcnt, flags, vm_write);
> + rc = process_vm_rw_core(pid, &iter_l, iter_r.iov, iter_r.nr_segs,
> + flags, vm_write);
... and ugh^2, since now you are not only setting a meaningless iov_iter,
you are creating a new place that pokes directly into struct iov_iter
guts.
Sure, moving rw_copy_check_uvector() over to lib/iov_iter.c makes sense.
But I would rather split the access_ok()-related checks out of that thing
and bury CHECK_IOVEC_ONLY.
Step 1: move the damn thing to lib/iov_iter.c (same as you do, but without
making it static)
Step 2: split it in two:
ssize_t rw_copy_check_uvector(const struct iovec __user * uvector,
unsigned long nr_segs, unsigned long fast_segs,
struct iovec *fast_pointer,
struct iovec **ret_pointer)
{
unsigned long seg;
ssize_t ret;
struct iovec *iov = fast_pointer;
*ret_pointer = fast_pointer;
/*
* SuS says "The readv() function *may* fail if the iovcnt argument
* was less than or equal to 0, or greater than {IOV_MAX}. Linux has
* traditionally returned zero for zero segments, so...
*/
if (nr_segs == 0)
return 0;
/*
* First get the "struct iovec" from user memory and
* verify all the pointers
*/
if (nr_segs > UIO_MAXIOV)
return -EINVAL;
if (nr_segs > fast_segs) {
iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
if (!iov)
return -ENOMEM;
*ret_pointer = iov;
}
if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector)))
return -EFAULT;
/*
* According to the Single Unix Specification we should return EINVAL
* if an element length is < 0 when cast to ssize_t or if the
* total length would overflow the ssize_t return value of the
* system call.
*
* Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
* overflow case.
*/
ret = 0;
for (seg = 0; seg < nr_segs; seg++) {
void __user *buf = iov[seg].iov_base;
ssize_t len = (ssize_t)iov[seg].iov_len;
/* see if we we're about to use an invalid len or if
* it's about to overflow ssize_t */
if (len < 0)
return -EINVAL;
if (len > MAX_RW_COUNT - ret) {
len = MAX_RW_COUNT - ret;
iov[seg].iov_len = len;
}
ret += len;
}
return ret;
}
/*
* This is merely an early sanity check; we do _not_ rely upon
* it when we get to the actual memory accesses.
*/
static bool check_iovecs(const struct iovec *iov, int nr_segs)
{
for (seg = 0; seg < nr_segs; seg++) {
void __user *buf = iov[seg].iov_base;
ssize_t len = (ssize_t)iov[seg].iov_len;
if (unlikely(!access_ok(buf, len)))
return false;
}
return true;
}
ssize_t import_iovec(int type, const struct iovec __user * uvector,
unsigned nr_segs, unsigned fast_segs,
struct iovec **iov, struct iov_iter *i)
{
struct iovec *p;
ssize_t n;
n = rw_copy_check_uvector(uvector, nr_segs, fast_segs, *iov, &p);
if (n > 0 && !check_iovecs(p, nr_segs))
n = -EFAULT;
if (n < 0) {
if (p != *iov)
kfree(p);
*iov = NULL;
return n;
}
iov_iter_init(i, type, p, nr_segs, n);
*iov = p == *iov ? NULL : p;
return n;
}
kill CHECK_IOVEC_ONLY and use rw_copy_check_uvector() without the type
argument in mm/process_vm_access.c
Saner that way, IMO...