[PATCH 2.6.32 14/55] readv/writev: do the same MAX_RW_COUNT truncation that read/write does

From: Willy Tarreau
Date: Fri Mar 04 2016 - 11:13:51 EST


2.6.32-longterm review patch. If anyone has any objections, please let me know.

------------------

From: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>

commit 435f49a518c78eec8e2edbbadd912737246cbe20 upstream.

We used to protect against overflow, but rather than return an error, do
what read/write does, namely to limit the total size to MAX_RW_COUNT.
This is not only more consistent, but it also means that any broken
low-level read/write routine that still keeps counts in 'int' can't
break.

Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Ben Hutchings <ben@xxxxxxxxxxxxxxx>
Signed-off-by: Willy Tarreau <w@xxxxxx>
---
fs/read_write.c | 62 +++++++++++++++++++++++++++++-------------------------
include/linux/fs.h | 1 +
2 files changed, 34 insertions(+), 29 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index b7f4a1f..ab80120 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -210,8 +210,6 @@ bad:
* them to something that fits in "int" so that others
* won't have to do range checks all the time.
*/
-#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK)
-
int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count)
{
struct inode *inode;
@@ -546,65 +544,71 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
unsigned long nr_segs, unsigned long fast_segs,
struct iovec *fast_pointer,
struct iovec **ret_pointer)
- {
+{
unsigned long seg;
- ssize_t ret;
+ ssize_t ret;
struct iovec *iov = fast_pointer;

- /*
- * SuS says "The readv() function *may* fail if the iovcnt argument
- * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
- * traditionally returned zero for zero segments, so...
- */
+ /*
+ * SuS says "The readv() function *may* fail if the iovcnt argument
+ * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
+ * traditionally returned zero for zero segments, so...
+ */
if (nr_segs == 0) {
ret = 0;
- goto out;
+ goto out;
}

- /*
- * First get the "struct iovec" from user memory and
- * verify all the pointers
- */
+ /*
+ * First get the "struct iovec" from user memory and
+ * verify all the pointers
+ */
if (nr_segs > UIO_MAXIOV) {
ret = -EINVAL;
- goto out;
+ goto out;
}
if (nr_segs > fast_segs) {
- iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
+ iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
if (iov == NULL) {
ret = -ENOMEM;
- goto out;
+ goto out;
}
- }
+ }
if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
ret = -EFAULT;
- goto out;
+ goto out;
}

- /*
+ /*
* According to the Single Unix Specification we should return EINVAL
* if an element length is < 0 when cast to ssize_t or if the
* total length would overflow the ssize_t return value of the
* system call.
- */
+ *
+ * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
+ * overflow case.
+ */
ret = 0;
- for (seg = 0; seg < nr_segs; seg++) {
- void __user *buf = iov[seg].iov_base;
- ssize_t len = (ssize_t)iov[seg].iov_len;
+ for (seg = 0; seg < nr_segs; seg++) {
+ void __user *buf = iov[seg].iov_base;
+ ssize_t len = (ssize_t)iov[seg].iov_len;

/* see if we we're about to use an invalid len or if
* it's about to overflow ssize_t */
- if (len < 0 || (ret + len < ret)) {
+ if (len < 0) {
ret = -EINVAL;
- goto out;
+ goto out;
}
if (unlikely(!access_ok(vrfy_dir(type), buf, len))) {
ret = -EFAULT;
- goto out;
+ goto out;
+ }
+ if (len > MAX_RW_COUNT - ret) {
+ len = MAX_RW_COUNT - ret;
+ iov[seg].iov_len = len;
}
-
ret += len;
- }
+ }
out:
*ret_pointer = iov;
return ret;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 860cb6d..acfad75 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1817,6 +1817,7 @@ extern int current_umask(void);
/* /sys/fs */
extern struct kobject *fs_kobj;

+#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK)
extern int rw_verify_area(int, struct file *, loff_t *, size_t);

#define FLOCK_VERIFY_READ 1
--
1.7.12.2.21.g234cd45.dirty