Re: [patch] readv/writev rework

From: Hirokazu Takahashi (taka@valinux.co.jp)
Date: Fri Sep 13 2002 - 02:22:52 EST


Hello,

I updated the writev patch which may be easy to understand.
How about it?

But I have one question, Could let me know if you have any idea,
why does filemap_copy_from_user() try to call kamp()+__copy_from_user()
again after the first trial get fault.

Is there any meanings?

> > a_ops->prepare_write() and a_ops->commit_write will have a
> > penalty when I/O size isn't PAGE_SIZE.
> > With following patch generic_file_write_nolock() will try to
> > make each I/O size become PAGE_SIZE.
> >
>
> Certainly makes a lot of sense. If an application has a large
> number of small objects which are to be appended to a file, and
> they are not contiguous in user memory then this patch makes
> writev() a very attractive way of doing that. Tons faster.
>
> However I'd be a little concerned over the increased work which a boring
> old write() has to do. Perhaps we could add a special code path
> for it:

Thank you,
Hirokazu Takahashi.

--- linux/mm/filemap.c.ORG Wed Sep 11 19:48:00 2030
+++ linux/mm/filemap.c Fri Sep 13 16:08:51 2030
@@ -1940,6 +1940,48 @@ filemap_copy_from_user(struct page *page
         return left;
 }
 
+static inline int
+filemap_copy_from_user_iovec(struct page *page, unsigned long offset,
+ const struct iovec *iov, size_t base, unsigned bytes)
+{
+ char *kaddr;
+ int left = 0;
+
+ kaddr = kmap_atomic(page, KM_USER0);
+ while (bytes) {
+ char *buf = iov->iov_base + base;
+ int copy = min(bytes, iov->iov_len - base);
+ base = 0;
+ if ((left = __copy_from_user(kaddr + offset, buf, copy)))
+ break;
+ bytes -= copy;
+ offset += copy;
+ iov++;
+ }
+ kunmap_atomic(kaddr, KM_USER0);
+ return left;
+}
+
+static inline void
+filemap_set_next_iovec(const struct iovec **iovp, size_t *basep, unsigned bytes)
+{
+ const struct iovec *iov = *iovp;
+ size_t base = *basep;
+
+ while (bytes) {
+ int copy = min(bytes, iov->iov_len - base);
+ bytes -= copy;
+ base += copy;
+ if (iov->iov_len == base) {
+ iov++;
+ base = 0;
+ }
+ }
+ *iovp = iov;
+ *basep = base;
+}
+
+
 /*
  * Write to a file through the page cache.
  *
@@ -1968,9 +2010,8 @@ generic_file_write_nolock(struct file *f
         unsigned bytes;
         time_t time_now;
         struct pagevec lru_pvec;
- struct iovec *cur_iov;
- unsigned iov_bytes; /* Cumulative count to the end of the
- current iovec */
+ const struct iovec *cur_iov = iov; /* current iovec */
+ unsigned iov_base = 0; /* offset in the current iovec */
         unsigned long seg;
         char *buf;
 
@@ -2102,9 +2143,7 @@ generic_file_write_nolock(struct file *f
                 goto out_status;
         }
 
- cur_iov = (struct iovec *)iov;
- iov_bytes = cur_iov->iov_len;
- buf = cur_iov->iov_base;
+ buf = iov->iov_base;
         do {
                 unsigned long index;
                 unsigned long offset;
@@ -2115,8 +2154,6 @@ generic_file_write_nolock(struct file *f
                 bytes = PAGE_CACHE_SIZE - offset;
                 if (bytes > count)
                         bytes = count;
- if (bytes + written > iov_bytes)
- bytes = iov_bytes - written;
 
                 /*
                  * Bring in the user page that we will copy from _first_.
@@ -2144,7 +2181,12 @@ generic_file_write_nolock(struct file *f
                                 vmtruncate(inode, inode->i_size);
                         break;
                 }
- page_fault = filemap_copy_from_user(page, offset, buf, bytes);
+
+ if (nr_segs == 1) {
+ page_fault = filemap_copy_from_user(page, offset, buf, bytes);
+ } else {
+ page_fault = filemap_copy_from_user_iovec(page, offset, cur_iov, iov_base, bytes);
+ }
                 status = a_ops->commit_write(file, page, offset, offset+bytes);
                 if (unlikely(page_fault)) {
                         status = -EFAULT;
@@ -2157,11 +2199,8 @@ generic_file_write_nolock(struct file *f
                                 count -= status;
                                 pos += status;
                                 buf += status;
- if (written == iov_bytes && count) {
- cur_iov++;
- iov_bytes += cur_iov->iov_len;
- buf = cur_iov->iov_base;
- }
+ if (nr_segs > 1)
+ filemap_set_next_iovec(&cur_iov, &iov_base, status);
                         }
                 }
                 if (!PageReferenced(page))
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Sun Sep 15 2002 - 22:00:32 EST