Rewriting read_kmem()/write_kmem() ?

From: Tetsuo Handa
Date: Mon Aug 26 2019 - 11:02:27 EST


On 2019/08/26 20:19, Ingo Molnar wrote:
> Basically making IO primitives interruptible is the norm and it's a
> quality of implementation issue: it's only a historic accident that
> /dev/mem read()s aren't.
>
> So let's try and make it interruptible as the #3 patch I sent did - of
> course if anything breaks we'll have to undo it. But if we can get away
> with then by all means let's do so - even shorter reads can generate
> nasty long processing latencies.
>
> Ok?

This is how read_kmem()/write_kmem() could be rewritten (before doing
s/fatal_signal_pending/signal_pending/ change). Only compile tested.
Do we want to try this change using several patches?

drivers/char/mem.c | 202 +++++++++++++++++++----------------------------------
1 file changed, 73 insertions(+), 129 deletions(-)

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 9eb564c..12bca2a 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -430,114 +430,32 @@ static int mmap_kmem(struct file *file, struct vm_area_struct *vma)
return mmap_mem(file, vma);
}

-/*
- * This function reads the *virtual* memory as seen by the kernel.
- */
-static ssize_t read_kmem(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
+static ssize_t do_copy_kmem(unsigned long p, char __user *buf, size_t count,
+ loff_t *ppos, const bool is_write)
{
- unsigned long p = *ppos;
- ssize_t low_count, read, sz;
- char *kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
- int err = 0;
-
- read = 0;
- if (p < (unsigned long) high_memory) {
- low_count = count;
- if (count > (unsigned long)high_memory - p)
- low_count = (unsigned long)high_memory - p;
+ ssize_t copied = 0;
+ ssize_t sz;

#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
- /* we don't have page 0 mapped on sparc and m68k.. */
- if (p < PAGE_SIZE && low_count > 0) {
- sz = size_inside_page(p, low_count);
- if (clear_user(buf, sz))
- return -EFAULT;
- buf += sz;
- p += sz;
- read += sz;
- low_count -= sz;
- count -= sz;
- }
-#endif
- while (low_count > 0) {
- sz = size_inside_page(p, low_count);
-
- /*
- * On ia64 if a page has been mapped somewhere as
- * uncached, then it must also be accessed uncached
- * by the kernel or data corruption may occur
- */
- kbuf = xlate_dev_kmem_ptr((void *)p);
- if (!virt_addr_valid(kbuf))
- return -ENXIO;
-
- if (copy_to_user(buf, kbuf, sz))
- return -EFAULT;
- buf += sz;
- p += sz;
- read += sz;
- low_count -= sz;
- count -= sz;
- if (should_stop_iteration()) {
- count = 0;
- break;
- }
- }
- }
-
- if (count > 0) {
- kbuf = (char *)__get_free_page(GFP_KERNEL);
- if (!kbuf)
- return -ENOMEM;
- while (count > 0) {
- sz = size_inside_page(p, count);
- if (!is_vmalloc_or_module_addr((void *)p)) {
- err = -ENXIO;
- break;
- }
- sz = vread(kbuf, (char *)p, sz);
- if (!sz)
- break;
- if (copy_to_user(buf, kbuf, sz)) {
- err = -EFAULT;
- break;
- }
- count -= sz;
- buf += sz;
- read += sz;
- p += sz;
- if (should_stop_iteration())
- break;
- }
- free_page((unsigned long)kbuf);
- }
- *ppos = p;
- return read ? read : err;
-}
-
-
-static ssize_t do_write_kmem(unsigned long p, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- ssize_t written, sz;
- unsigned long copied;
-
- written = 0;
-#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
/* we don't have page 0 mapped on sparc and m68k.. */
if (p < PAGE_SIZE) {
sz = size_inside_page(p, count);
- /* Hmm. Do something? */
+ if (is_write) {
+ /* Hmm. Do something? */
+ } else {
+ if (clear_user(buf, sz))
+ return -EFAULT;
+ }
buf += sz;
p += sz;
count -= sz;
- written += sz;
+ copied += sz;
}
#endif

while (count > 0) {
void *ptr;
+ unsigned long n;

sz = size_inside_page(p, count);

@@ -550,78 +468,104 @@ static ssize_t do_write_kmem(unsigned long p, const char __user *buf,
if (!virt_addr_valid(ptr))
return -ENXIO;

- copied = copy_from_user(ptr, buf, sz);
- if (copied) {
- written += sz - copied;
- if (written)
+ if (is_write)
+ n = copy_from_user(ptr, buf, sz);
+ else
+ n = copy_to_user(buf, buf, sz);
+ if (n) {
+ copied += sz - n;
+ if (copied)
break;
return -EFAULT;
}
buf += sz;
p += sz;
count -= sz;
- written += sz;
+ copied += sz;
if (should_stop_iteration())
break;
}

- *ppos += written;
- return written;
+ *ppos += copied;
+ return copied;
}

-/*
- * This function writes to the *virtual* memory as seen by the kernel.
- */
-static ssize_t write_kmem(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
+static ssize_t copy_kmem(char __user *buf, size_t count, loff_t *ppos,
+ const bool is_write)
{
unsigned long p = *ppos;
- ssize_t wrote = 0;
- ssize_t virtr = 0;
- char *kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */
+ ssize_t copied = 0;
int err = 0;

if (p < (unsigned long) high_memory) {
- unsigned long to_write = min_t(unsigned long, count,
- (unsigned long)high_memory - p);
- wrote = do_write_kmem(p, buf, to_write, ppos);
- if (wrote != to_write)
- return wrote;
- p += wrote;
- buf += wrote;
- count -= wrote;
+ unsigned long to_copy = min_t(unsigned long, count,
+ (unsigned long)high_memory - p);
+
+ copied = do_copy_kmem(p, buf, to_copy, ppos, is_write);
+ if (copied != to_copy)
+ return copied;
+ p += copied;
+ buf += copied;
+ count -= copied;
}

if (count > 0) {
- kbuf = (char *)__get_free_page(GFP_KERNEL);
+ /* k-addr because vread()/vwrite() takes vmlist_lock rwlock */
+ char *kbuf = (char *)__get_free_page(GFP_KERNEL);
+
if (!kbuf)
- return wrote ? wrote : -ENOMEM;
+ return copied ? copied : -ENOMEM;
while (count > 0) {
- unsigned long sz = size_inside_page(p, count);
- unsigned long n;
+ ssize_t sz = size_inside_page(p, count);

if (!is_vmalloc_or_module_addr((void *)p)) {
err = -ENXIO;
break;
}
- n = copy_from_user(kbuf, buf, sz);
- if (n) {
- err = -EFAULT;
- break;
+ if (is_write) {
+ if (copy_from_user(kbuf, buf, sz)) {
+ err = -EFAULT;
+ break;
+ }
+ vwrite(kbuf, (char *)p, sz);
+ } else {
+ sz = vread(kbuf, (char *)p, sz);
+ if (!sz)
+ break;
+ if (copy_to_user(buf, kbuf, sz)) {
+ err = -EFAULT;
+ break;
+ }
}
- vwrite(kbuf, (char *)p, sz);
count -= sz;
buf += sz;
- virtr += sz;
+ copied += sz;
p += sz;
if (should_stop_iteration())
break;
}
free_page((unsigned long)kbuf);
}
-
*ppos = p;
- return virtr + wrote ? : err;
+ return copied ? copied : err;
+}
+
+/*
+ * This function reads the *virtual* memory as seen by the kernel.
+ */
+static ssize_t read_kmem(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return copy_kmem(buf, count, ppos, false);
+}
+
+/*
+ * This function writes to the *virtual* memory as seen by the kernel.
+ */
+static ssize_t write_kmem(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return copy_kmem((char __user *) buf, count, ppos, true);
}

static ssize_t read_port(struct file *file, char __user *buf,
--
1.8.3.1