Re: [PATCH v2] x86: bring back rep movsq for user access on CPUs without ERMS

From: Linus Torvalds
Date: Sun Sep 03 2023 - 17:05:59 EST


On Sun, 3 Sept 2023 at 13:49, Mateusz Guzik <mjguzik@xxxxxxxxx> wrote:
>
> "real fstat" is syscall(5, fd, &sb).
>
> Sapphire Rapids, will-it-scale, ops/s
>
> stock fstat 5088199
> patched fstat 7625244 (+49%)
> real fstat 8540383 (+67% / +12%)
>
> It dodges lockref et al, but it does not dodge SMAP which accounts for
> the difference.

Side note, since I was looking at this, I hacked up a quick way for
architectures to do their own optimized cp_new_stat() that avoids the
double-buffering.

Sadly it *is* architecture-specific due to padding and
architecture-specific field sizes (and thus EOVERFLOW rules), but it
is what it is.

I don't know how much it matters, but it might make a difference. And
'stat()' is most certainly worth optimizing for, even if glibc has
made our life more difficult.

Want to try out another entirely untested patch? Attached.

Linus
arch/x86/kernel/sys_x86_64.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
fs/stat.c | 2 +-
include/linux/stat.h | 2 ++
3 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index c783aeb37dce..fca647f61bc1 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -22,6 +22,50 @@
#include <asm/elf.h>
#include <asm/ia32.h>

+int cp_new_stat(struct kstat *stat, struct stat __user *ubuf)
+{
+ typeof(ubuf->st_uid) uid;
+ typeof(ubuf->st_gid) gid;
+ typeof(ubuf->st_dev) dev = new_encode_dev(stat->dev);
+ typeof(ubuf->st_rdev) rdev = new_encode_dev(stat->rdev);
+
+ SET_UID(uid, from_kuid_munged(current_user_ns(), stat->uid));
+ SET_GID(gid, from_kgid_munged(current_user_ns(), stat->gid));
+
+ if (!user_write_access_begin(ubuf, sizeof(struct stat)))
+ return -EFAULT;
+
+ unsafe_put_user(dev, &ubuf->st_dev, Efault);
+ unsafe_put_user(stat->ino, &ubuf->st_ino, Efault);
+ unsafe_put_user(stat->nlink, &ubuf->st_nlink, Efault);
+
+ unsafe_put_user(stat->mode, &ubuf->st_mode, Efault);
+ unsafe_put_user(uid, &ubuf->st_uid, Efault);
+ unsafe_put_user(gid, &ubuf->st_gid, Efault);
+ unsafe_put_user(0, &ubuf->__pad0, Efault);
+ unsafe_put_user(rdev, &ubuf->st_rdev, Efault);
+ unsafe_put_user(stat->size, &ubuf->st_size, Efault);
+ unsafe_put_user(stat->blksize, &ubuf->st_blksize, Efault);
+ unsafe_put_user(stat->blocks, &ubuf->st_blocks, Efault);
+
+ unsafe_put_user(stat->atime.tv_sec, &ubuf->st_atime, Efault);
+ unsafe_put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec, Efault);
+ unsafe_put_user(stat->mtime.tv_sec, &ubuf->st_mtime, Efault);
+ unsafe_put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec, Efault);
+ unsafe_put_user(stat->ctime.tv_sec, &ubuf->st_ctime, Efault);
+ unsafe_put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec, Efault);
+ unsafe_put_user(0, &ubuf->__unused[0], Efault);
+ unsafe_put_user(0, &ubuf->__unused[1], Efault);
+ unsafe_put_user(0, &ubuf->__unused[2], Efault);
+
+ user_write_access_end();
+ return 0;
+
+Efault:
+ user_write_access_end();
+ return -EFAULT;
+}
+
/*
* Align a virtual address to avoid aliasing in the I$ on AMD F15h.
*/
diff --git a/fs/stat.c b/fs/stat.c
index e187dc79a313..782ad646ed27 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -415,7 +415,7 @@ SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, stat
# define INIT_STRUCT_STAT_PADDING(st) memset(&st, 0, sizeof(st))
#endif

-static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
+int __weak cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
{
struct stat tmp;

diff --git a/include/linux/stat.h b/include/linux/stat.h
index 52150570d37a..f6199aa3e1cb 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -63,4 +63,6 @@ struct kstat {
/* file attribute values */
#define STATX_ATTR_CHANGE_MONOTONIC 0x8000000000000000ULL /* version monotonically increases */

+int cp_new_stat(struct kstat *stat, struct stat __user *ubuf);
+
#endif