Re: [PATCH 4/4] mm: remove compat numa syscalls

From: Christoph Hellwig
Date: Sat Sep 19 2020 - 01:42:00 EST


On Fri, Sep 18, 2020 at 03:24:39PM +0200, Arnd Bergmann wrote:
> The compat implementations for mbind, get_mempolicy, set_mempolicy
> and migrate_pages are just there to handle the subtly different
> layout of bitmaps on 32-bit hosts.
>
> The compat implementation however lacks some of the checks that
> are present in the native one, in particular for checking that
> the extra bits are all zero when user space has a larger mask
> size than the kernel. Worse, those extra bits do not get cleared
> when copying in or out of the kernel, which can lead to incorrect
> data as well.
>
> Unify the implementation to handle the compat bitmap layout directly
> in the get_nodes() and copy_nodes_to_user() helpers. Splitting out
> the get_bitmap() helper from get_nodes() also helps readability of the
> native case.
>
> On x86, two additional problems are addressed by this: compat tasks can
> pass a bitmap at the end of a mapping, causing a fault when reading
> across the page boundary for a 64-bit word. x32 tasks might also run
> into problems with get_mempolicy corrupting data when an odd number of
> 32-bit words gets passed.
>
> On parisc the migrate_pages() system call apparently had the wrong
> calling convention, as big-endian architectures expect the words
> inside of a bitmap to be swapped. This is not a problem though
> since parisc has no NUMA support.
>
> Signed-off-by: Arnd Bergmann <arnd@xxxxxxxx>
> ---
> arch/arm64/include/asm/unistd32.h | 8 +-
> arch/mips/kernel/syscalls/syscall_n32.tbl | 8 +-
> arch/mips/kernel/syscalls/syscall_o32.tbl | 8 +-
> arch/parisc/kernel/syscalls/syscall.tbl | 6 +-
> arch/powerpc/kernel/syscalls/syscall.tbl | 8 +-
> arch/s390/kernel/syscalls/syscall.tbl | 8 +-
> arch/sparc/kernel/syscalls/syscall.tbl | 8 +-
> arch/x86/entry/syscalls/syscall_32.tbl | 2 +-
> include/linux/compat.h | 15 --
> include/uapi/asm-generic/unistd.h | 8 +-
> kernel/kexec.c | 6 +-
> kernel/sys_ni.c | 4 -
> mm/mempolicy.c | 193 +++++-----------------
> 13 files changed, 79 insertions(+), 203 deletions(-)
>
> diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
> index af793775ba98..31479f7120a0 100644
> --- a/arch/arm64/include/asm/unistd32.h
> +++ b/arch/arm64/include/asm/unistd32.h
> @@ -649,11 +649,11 @@ __SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch)
> #define __NR_inotify_rm_watch 318
> __SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch)
> #define __NR_mbind 319
> -__SYSCALL(__NR_mbind, compat_sys_mbind)
> +__SYSCALL(__NR_mbind, sys_mbind)
> #define __NR_get_mempolicy 320
> -__SYSCALL(__NR_get_mempolicy, compat_sys_get_mempolicy)
> +__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy)
> #define __NR_set_mempolicy 321
> -__SYSCALL(__NR_set_mempolicy, compat_sys_set_mempolicy)
> +__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy)
> #define __NR_openat 322
> __SYSCALL(__NR_openat, compat_sys_openat)
> #define __NR_mkdirat 323
> @@ -811,7 +811,7 @@ __SYSCALL(__NR_rseq, sys_rseq)
> #define __NR_io_pgetevents 399
> __SYSCALL(__NR_io_pgetevents, compat_sys_io_pgetevents)
> #define __NR_migrate_pages 400
> -__SYSCALL(__NR_migrate_pages, compat_sys_migrate_pages)
> +__SYSCALL(__NR_migrate_pages, sys_migrate_pages)
> #define __NR_kexec_file_load 401
> __SYSCALL(__NR_kexec_file_load, sys_kexec_file_load)
> /* 402 is unused */
> diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
> index 7fa1ca45e44c..15fda882d07e 100644
> --- a/arch/mips/kernel/syscalls/syscall_n32.tbl
> +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
> @@ -239,9 +239,9 @@
> 228 n32 clock_nanosleep sys_clock_nanosleep_time32
> 229 n32 tgkill sys_tgkill
> 230 n32 utimes sys_utimes_time32
> -231 n32 mbind compat_sys_mbind
> -232 n32 get_mempolicy compat_sys_get_mempolicy
> -233 n32 set_mempolicy compat_sys_set_mempolicy
> +231 n32 mbind sys_mbind
> +232 n32 get_mempolicy sys_get_mempolicy
> +233 n32 set_mempolicy sys_set_mempolicy
> 234 n32 mq_open compat_sys_mq_open
> 235 n32 mq_unlink sys_mq_unlink
> 236 n32 mq_timedsend sys_mq_timedsend_time32
> @@ -258,7 +258,7 @@
> 247 n32 inotify_init sys_inotify_init
> 248 n32 inotify_add_watch sys_inotify_add_watch
> 249 n32 inotify_rm_watch sys_inotify_rm_watch
> -250 n32 migrate_pages compat_sys_migrate_pages
> +250 n32 migrate_pages sys_migrate_pages
> 251 n32 openat sys_openat
> 252 n32 mkdirat sys_mkdirat
> 253 n32 mknodat sys_mknodat
> diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
> index 194c7fbeedf7..6591388a9d88 100644
> --- a/arch/mips/kernel/syscalls/syscall_o32.tbl
> +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
> @@ -279,9 +279,9 @@
> 265 o32 clock_nanosleep sys_clock_nanosleep_time32
> 266 o32 tgkill sys_tgkill
> 267 o32 utimes sys_utimes_time32
> -268 o32 mbind sys_mbind compat_sys_mbind
> -269 o32 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
> -270 o32 set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy
> +268 o32 mbind sys_mbind
> +269 o32 get_mempolicy sys_get_mempolicy
> +270 o32 set_mempolicy sys_set_mempolicy
> 271 o32 mq_open sys_mq_open compat_sys_mq_open
> 272 o32 mq_unlink sys_mq_unlink
> 273 o32 mq_timedsend sys_mq_timedsend_time32
> @@ -298,7 +298,7 @@
> 284 o32 inotify_init sys_inotify_init
> 285 o32 inotify_add_watch sys_inotify_add_watch
> 286 o32 inotify_rm_watch sys_inotify_rm_watch
> -287 o32 migrate_pages sys_migrate_pages compat_sys_migrate_pages
> +287 o32 migrate_pages sys_migrate_pages
> 288 o32 openat sys_openat compat_sys_openat
> 289 o32 mkdirat sys_mkdirat
> 290 o32 mknodat sys_mknodat
> diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
> index 5c17edaffe70..30f3c0146abf 100644
> --- a/arch/parisc/kernel/syscalls/syscall.tbl
> +++ b/arch/parisc/kernel/syscalls/syscall.tbl
> @@ -292,9 +292,9 @@
> 258 32 clock_nanosleep sys_clock_nanosleep_time32
> 258 64 clock_nanosleep sys_clock_nanosleep
> 259 common tgkill sys_tgkill
> -260 common mbind sys_mbind compat_sys_mbind
> -261 common get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
> -262 common set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy
> +260 common mbind sys_mbind
> +261 common get_mempolicy sys_get_mempolicy
> +262 common set_mempolicy sys_set_mempolicy
> # 263 was vserver
> 264 common add_key sys_add_key
> 265 common request_key sys_request_key
> diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
> index 04fb42d7b377..4f5216320721 100644
> --- a/arch/powerpc/kernel/syscalls/syscall.tbl
> +++ b/arch/powerpc/kernel/syscalls/syscall.tbl
> @@ -338,10 +338,10 @@
> 256 64 sys_debug_setcontext sys_ni_syscall
> 256 spu sys_debug_setcontext sys_ni_syscall
> # 257 reserved for vserver
> -258 nospu migrate_pages sys_migrate_pages compat_sys_migrate_pages
> -259 nospu mbind sys_mbind compat_sys_mbind
> -260 nospu get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
> -261 nospu set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy
> +258 nospu migrate_pages sys_migrate_pages
> +259 nospu mbind sys_mbind
> +260 nospu get_mempolicy sys_get_mempolicy
> +261 nospu set_mempolicy sys_set_mempolicy
> 262 nospu mq_open sys_mq_open compat_sys_mq_open
> 263 nospu mq_unlink sys_mq_unlink
> 264 32 mq_timedsend sys_mq_timedsend_time32
> diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
> index 3197965d45e9..70c0b830d14f 100644
> --- a/arch/s390/kernel/syscalls/syscall.tbl
> +++ b/arch/s390/kernel/syscalls/syscall.tbl
> @@ -274,9 +274,9 @@
> 265 common statfs64 sys_statfs64 compat_sys_statfs64
> 266 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
> 267 common remap_file_pages sys_remap_file_pages sys_remap_file_pages
> -268 common mbind sys_mbind compat_sys_mbind
> -269 common get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
> -270 common set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy
> +268 common mbind sys_mbind sys_mbind
> +269 common get_mempolicy sys_get_mempolicy sys_get_mempolicy
> +270 common set_mempolicy sys_set_mempolicy sys_set_mempolicy
> 271 common mq_open sys_mq_open compat_sys_mq_open
> 272 common mq_unlink sys_mq_unlink sys_mq_unlink
> 273 common mq_timedsend sys_mq_timedsend sys_mq_timedsend_time32
> @@ -293,7 +293,7 @@
> 284 common inotify_init sys_inotify_init sys_inotify_init
> 285 common inotify_add_watch sys_inotify_add_watch sys_inotify_add_watch
> 286 common inotify_rm_watch sys_inotify_rm_watch sys_inotify_rm_watch
> -287 common migrate_pages sys_migrate_pages compat_sys_migrate_pages
> +287 common migrate_pages sys_migrate_pages sys_migrate_pages
> 288 common openat sys_openat compat_sys_openat
> 289 common mkdirat sys_mkdirat sys_mkdirat
> 290 common mknodat sys_mknodat sys_mknodat
> diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
> index e36ac364e61a..50ff839a2661 100644
> --- a/arch/sparc/kernel/syscalls/syscall.tbl
> +++ b/arch/sparc/kernel/syscalls/syscall.tbl
> @@ -365,10 +365,10 @@
> 299 common unshare sys_unshare
> 300 common set_robust_list sys_set_robust_list compat_sys_set_robust_list
> 301 common get_robust_list sys_get_robust_list compat_sys_get_robust_list
> -302 common migrate_pages sys_migrate_pages compat_sys_migrate_pages
> -303 common mbind sys_mbind compat_sys_mbind
> -304 common get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
> -305 common set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy
> +302 common migrate_pages sys_migrate_pages
> +303 common mbind sys_mbind
> +304 common get_mempolicy sys_get_mempolicy
> +305 common set_mempolicy sys_set_mempolicy
> 306 common kexec_load sys_kexec_load sys_kexec_load
> 307 common move_pages sys_move_pages
> 308 common getcpu sys_getcpu
> diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
> index b3263b8b2eae..d07c3fbd4697 100644
> --- a/arch/x86/entry/syscalls/syscall_32.tbl
> +++ b/arch/x86/entry/syscalls/syscall_32.tbl
> @@ -286,7 +286,7 @@
> 272 i386 fadvise64_64 sys_ia32_fadvise64_64
> 273 i386 vserver
> 274 i386 mbind sys_mbind
> -275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
> +275 i386 get_mempolicy sys_get_mempolicy
> 276 i386 set_mempolicy sys_set_mempolicy
> 277 i386 mq_open sys_mq_open compat_sys_mq_open
> 278 i386 mq_unlink sys_mq_unlink
> diff --git a/include/linux/compat.h b/include/linux/compat.h
> index db1d7ac2c9e0..be06367b336c 100644
> --- a/include/linux/compat.h
> +++ b/include/linux/compat.h
> @@ -749,21 +749,6 @@ asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr
> /* mm/fadvise.c: No generic prototype for fadvise64_64 */
>
> /* mm/, CONFIG_MMU only */
> -asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len,
> - compat_ulong_t mode,
> - compat_ulong_t __user *nmask,
> - compat_ulong_t maxnode, compat_ulong_t flags);
> -asmlinkage long compat_sys_get_mempolicy(int __user *policy,
> - compat_ulong_t __user *nmask,
> - compat_ulong_t maxnode,
> - compat_ulong_t addr,
> - compat_ulong_t flags);
> -asmlinkage long compat_sys_set_mempolicy(int mode, compat_ulong_t __user *nmask,
> - compat_ulong_t maxnode);
> -asmlinkage long compat_sys_migrate_pages(compat_pid_t pid,
> - compat_ulong_t maxnode, const compat_ulong_t __user *old_nodes,
> - const compat_ulong_t __user *new_nodes);
> -
> asmlinkage long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid,
> compat_pid_t pid, int sig,
> struct compat_siginfo __user *uinfo);
> diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
> index 4da51702fb21..4e31f9b68a8f 100644
> --- a/include/uapi/asm-generic/unistd.h
> +++ b/include/uapi/asm-generic/unistd.h
> @@ -673,13 +673,13 @@ __SYSCALL(__NR_madvise, sys_madvise)
> #define __NR_remap_file_pages 234
> __SYSCALL(__NR_remap_file_pages, sys_remap_file_pages)
> #define __NR_mbind 235
> -__SC_COMP(__NR_mbind, sys_mbind, compat_sys_mbind)
> +__SYSCALL(__NR_mbind, sys_mbind)
> #define __NR_get_mempolicy 236
> -__SC_COMP(__NR_get_mempolicy, sys_get_mempolicy, compat_sys_get_mempolicy)
> +__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy)
> #define __NR_set_mempolicy 237
> -__SC_COMP(__NR_set_mempolicy, sys_set_mempolicy, compat_sys_set_mempolicy)
> +__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy)
> #define __NR_migrate_pages 238
> -__SC_COMP(__NR_migrate_pages, sys_migrate_pages, compat_sys_migrate_pages)
> +__SYSCALL(__NR_migrate_pages, sys_migrate_pages)
> #define __NR_move_pages 239
> __SYSCALL(__NR_move_pages, sys_move_pages)
> #endif
> diff --git a/kernel/kexec.c b/kernel/kexec.c
> index 1ef7d3dc906f..0fecf2370be1 100644
> --- a/kernel/kexec.c
> +++ b/kernel/kexec.c
> @@ -30,11 +30,13 @@ static int copy_user_segment_list(struct kimage *image,
> image->nr_segments = nr_segments;
> segment_bytes = nr_segments * sizeof(*segments);
> if (in_compat_syscall()) {
> - struct compat_kexec_segment __user *cs = (void __user *)segments;
> + struct compat_kexec_segment __user *cs;
> struct compat_kexec_segment segment;
> int i;
> +
> + cs = (struct compat_kexec_segment __user *)segments;
> for (i=0; i< nr_segments; i++) {
> - copy_from_user(&segment, &cs[i], sizeof(segment));
> + ret = copy_from_user(&segment, &cs[i], sizeof(segment));
> if (ret)
> break;
>
> diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
> index 783a24ceee88..0850111f888e 100644
> --- a/kernel/sys_ni.c
> +++ b/kernel/sys_ni.c
> @@ -282,13 +282,9 @@ COND_SYSCALL(mincore);
> COND_SYSCALL(madvise);
> COND_SYSCALL(remap_file_pages);
> COND_SYSCALL(mbind);
> -COND_SYSCALL_COMPAT(mbind);
> COND_SYSCALL(get_mempolicy);
> -COND_SYSCALL_COMPAT(get_mempolicy);
> COND_SYSCALL(set_mempolicy);
> -COND_SYSCALL_COMPAT(set_mempolicy);
> COND_SYSCALL(migrate_pages);
> -COND_SYSCALL_COMPAT(migrate_pages);
> COND_SYSCALL(move_pages);
>
> COND_SYSCALL(perf_event_open);
> diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> index eddbe4e56c73..2e1b90143b2c 100644
> --- a/mm/mempolicy.c
> +++ b/mm/mempolicy.c
> @@ -1374,16 +1374,30 @@ static long do_mbind(unsigned long start, unsigned long len,
> /*
> * User space interface with variable sized bitmaps for nodelists.
> */
> +static int get_bitmap(unsigned long *mask, const unsigned long __user *nmask,
> + unsigned long maxnode)
> +{
> + unsigned long nlongs = BITS_TO_LONGS(maxnode);
> + int ret;
> +
> + if (in_compat_syscall())
> + ret = compat_get_bitmap(mask, (void __user *)nmask, maxnode);

I'd either pass void __user all the way, or do an explicit case from
the native to the compat version in the compat handler.

> + else
> + ret = copy_from_user(mask, nmask, nlongs*sizeof(unsigned long));

That whole BITS_TO_LONGS(b) * sizeof(unsigned long) pattern is
duplicated in various places including the checking of compat vs native
and probably want a helper that includes the in_compat_syscall() check.