Re: [PATCH v3] module: replace module_layout with module_memory

From: Christophe Leroy
Date: Sat Jan 28 2023 - 02:43:18 EST




Le 27/01/2023 à 00:36, Song Liu a écrit :
> module_layout manages different types of memory (text, data, rodata, etc.)
> in one allocation, which is problematic for some reasons:
>
> 1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
> 2. It is hard to use huge pages in modules (and not break strict rwx).
> 3. Many archs uses module_layout for arch-specific data, but it is not
> obvious how these data are used (are they RO, RX, or RW?)
>
> Improve the scenario by replacing 2 (or 3) module_layout per module with
> up to 7 module_memory per module:
>
> MOD_MEM_TYPE_TEXT,
> MOD_MEM_TYPE_DATA,
> MOD_MEM_TYPE_RODATA,
> MOD_MEM_TYPE_RO_AFTER_INIT,
> MOD_MEM_TYPE_INIT_TEXT,
> MOD_MEM_TYPE_INIT_DATA,
> MOD_MEM_TYPE_INIT_RODATA,
>
> and allocating them separately. This adds slightly more entries to
> mod_tree (from up to 3 entries per module, to up to 7 entries per
> module). However, this at most adds a small constant overhead to
> __module_address(), which is expected to be fast.
>
> Various archs use module_layout for different data. These data are put
> into different module_memory based on their location in module_layout.
> IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
> data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.
>
> module_memory simplifies quite some of the module code. For example,
> ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
> different allocator for the data. kernel/module/strict_rwx.c is also
> much cleaner with module_memory.
>
> Signed-off-by: Song Liu <song@xxxxxxxxxx>
> Cc: Luis Chamberlain <mcgrof@xxxxxxxxxx>
> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> Cc: Guenter Roeck <linux@xxxxxxxxxxxx>
>
> ---
>
> This is the preparation work for the type aware module_alloc() discussed
> in [1]. While this work is not covered much in the discussion, it is a
> critical step of the effort.
>
> As this part grows pretty big (~1000 lines, + and -), I would like get
> some feedback on it, so that I know it is on the right track.
>
> Please share your comments. Thanks!
>
> Test coverage: Tested on x86_64.
> Build tested by kernel test bot in [2]. The only regression in [2] was a
> typo in parisc, which is also fixed.
>
> [1] https://lore.kernel.org/linux-mm/20221107223921.3451913-1-song@xxxxxxxxxx/T/#u
> [2] https://lore.kernel.org/linux-raid/63b8827e.clJQX2wg+I+tiX7m%25lkp@xxxxxxxxx/T/#u
>
> Changes v1 => v2:
> 1. Add data_addr_[min|max] for CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
> case.
>
> Changes v2 => v3:
> 1. Fix and remove the last use of INIT_OFFSET_MASK.
> 2. Add more information in the commit log. (Luis Chamberlain)
> 3. Rebase and fix issues in x86/calltrunks.
> 4. Minor cleanup.
> ---
> arch/alpha/kernel/module.c | 3 +-
> arch/arc/kernel/unwind.c | 9 +-
> arch/arm/kernel/module-plts.c | 5 +-
> arch/arm64/kernel/module-plts.c | 5 +-
> arch/ia64/kernel/module.c | 31 ++-
> arch/mips/kernel/vpe.c | 11 +-
> arch/parisc/kernel/module.c | 53 +++--
> arch/powerpc/kernel/module_32.c | 10 +-
> arch/s390/kernel/module.c | 30 +--
> arch/x86/kernel/callthunks.c | 5 +-
> arch/x86/kernel/module.c | 4 +-
> include/linux/module.h | 65 +++---
> kernel/module/internal.h | 47 ++--
> kernel/module/kallsyms.c | 60 ++---
> kernel/module/kdb.c | 17 +-
> kernel/module/main.c | 378 +++++++++++++++++++-------------
> kernel/module/procfs.c | 17 +-
> kernel/module/strict_rwx.c | 113 +++-------
> kernel/module/tree_lookup.c | 43 ++--
> 19 files changed, 497 insertions(+), 409 deletions(-)
>

> diff --git a/include/linux/module.h b/include/linux/module.h
> index 8c5909c0076c..c5e78cb47e13 100644
> --- a/include/linux/module.h
> +++ b/include/linux/module.h
> @@ -320,17 +320,22 @@ struct mod_tree_node {
> struct latch_tree_node node;
> };
>
> -struct module_layout {
> - /* The actual code + data. */
> +enum mod_mem_type {
> + MOD_MEM_TYPE_TEXT,
> + MOD_MEM_TYPE_DATA,
> + MOD_MEM_TYPE_RODATA,
> + MOD_MEM_TYPE_RO_AFTER_INIT,
> + MOD_MEM_TYPE_INIT_TEXT,
> + MOD_MEM_TYPE_INIT_DATA,
> + MOD_MEM_TYPE_INIT_RODATA,
> +
> + MOD_MEM_NUM_TYPES,
> + MOD_MEM_TYPE_INVALID = -1,
> +};

Ok, so we agreed to keep it as a table with enums. Fair enough.

However, can we try to make it less ugly and more readable ?

I don't thing the enums needs to be prefixed by MOD_MEM_TYPE_
Would be enough with MOD_TEXT, MOD_DATA, MOD_RODATA, MOD_RO_AFTER_INIT,
MOD_INIT_TEXT, MOD_INIT_DATA, MOD_INIT_RODATA, MOD_INVALID.

> +
> +struct module_memory {
> void *base;
> - /* Total size. */
> unsigned int size;
> - /* The size of the executable code. */
> - unsigned int text_size;
> - /* Size of RO section of the module (text+rodata) */
> - unsigned int ro_size;
> - /* Size of RO after init section */
> - unsigned int ro_after_init_size;
>
> #ifdef CONFIG_MODULES_TREE_LOOKUP
> struct mod_tree_node mtn;
> @@ -339,9 +344,9 @@ struct module_layout {
>
> #ifdef CONFIG_MODULES_TREE_LOOKUP
> /* Only touch one cacheline for common rbtree-for-core-layout case. */
> -#define __module_layout_align ____cacheline_aligned
> +#define __module_memory_align ____cacheline_aligned
> #else
> -#define __module_layout_align
> +#define __module_memory_align
> #endif
>
> struct mod_kallsyms {
> @@ -418,12 +423,8 @@ struct module {
> /* Startup function. */
> int (*init)(void);
>
> - /* Core layout: rbtree is accessed frequently, so keep together. */
> - struct module_layout core_layout __module_layout_align;
> - struct module_layout init_layout;
> -#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
> - struct module_layout data_layout;
> -#endif
> + /* rbtree is accessed frequently, so keep together. */
> + struct module_memory mod_mem[MOD_MEM_NUM_TYPES] __module_memory_align;

We are already in a struct called module, so the module_memory struct
could be called mem[MOD_MEM_NUM_TYPES]

>
> /* Arch-specific module values */
> struct mod_arch_specific arch;
> @@ -573,23 +574,35 @@ bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr);
> bool is_module_percpu_address(unsigned long addr);
> bool is_module_text_address(unsigned long addr);
>
> +static inline bool within_module_mem_type(unsigned long addr,
> + const struct module *mod,
> + enum mod_mem_type type)
> +{
> + const struct module_memory *mod_mem;
> +
> + if (WARN_ON_ONCE(type < MOD_MEM_TYPE_TEXT || type >= MOD_MEM_NUM_TYPES))

Here I would rather use 0 instead of MOD_MEM_TYPE_TEXT because
MOD_MEM_TYPE_TEXT may change in the future.

> + return false;
> +
> + mod_mem = &mod->mod_mem[type];

I can't see the added value of the mod_ prefix.

Would read better as

mem = &mod->mem[type];

return (unsigned long)mem->base <= addr && addr < (unsigned
long)mem->base + mem->size;

And could be even more readable as:

unsigned long base, size;

base = (unsigned long)mod->mod_mem[type].base;
size = mod->mod_mem[type].size;

return base <= addr && addr < base + size;


> + return (unsigned long)mod_mem->base <= addr &&
> + addr < (unsigned long)mod_mem->base + mod_mem->size;
> +}
> +
> static inline bool within_module_core(unsigned long addr,
> const struct module *mod)
> {
> -#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
> - if ((unsigned long)mod->data_layout.base <= addr &&
> - addr < (unsigned long)mod->data_layout.base + mod->data_layout.size)
> - return true;
> -#endif
> - return (unsigned long)mod->core_layout.base <= addr &&
> - addr < (unsigned long)mod->core_layout.base + mod->core_layout.size;
> + return within_module_mem_type(addr, mod, MOD_MEM_TYPE_TEXT) ||
> + within_module_mem_type(addr, mod, MOD_MEM_TYPE_DATA) ||
> + within_module_mem_type(addr, mod, MOD_MEM_TYPE_RODATA) ||
> + within_module_mem_type(addr, mod, MOD_MEM_TYPE_RO_AFTER_INIT);
> }
>
> static inline bool within_module_init(unsigned long addr,
> const struct module *mod)
> {
> - return (unsigned long)mod->init_layout.base <= addr &&
> - addr < (unsigned long)mod->init_layout.base + mod->init_layout.size;
> + return within_module_mem_type(addr, mod, MOD_MEM_TYPE_INIT_TEXT) ||
> + within_module_mem_type(addr, mod, MOD_MEM_TYPE_INIT_DATA) ||
> + within_module_mem_type(addr, mod, MOD_MEM_TYPE_INIT_RODATA);
> }
>
> static inline bool within_module(unsigned long addr, const struct module *mod)

> diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c
> index 4523f99b0358..9ed233de312a 100644
> --- a/kernel/module/kallsyms.c
> +++ b/kernel/module/kallsyms.c
> @@ -113,11 +117,13 @@ void layout_symtab(struct module *mod, struct load_info *info)
> Elf_Shdr *strsect = info->sechdrs + info->index.str;
> const Elf_Sym *src;
> unsigned int i, nsrc, ndst, strtab_size = 0;
> + struct module_memory *mod_mem_data = &mod->mod_mem[MOD_MEM_TYPE_DATA];
> + struct module_memory *mod_mem_init_data = &mod->mod_mem[MOD_MEM_TYPE_INIT_DATA];

One more exemple of something that would be more readable as:

struct module_memory *mod_data = &mod->mem[MOD_DATA];
struct module_memory *mod_init_data = &mod->mod[MOD_INIT_DATA];
>
> /* Put symbol section at end of init part of module. */
> symsect->sh_flags |= SHF_ALLOC;
> - symsect->sh_entsize = module_get_offset(mod, &mod->init_layout.size, symsect,
> - info->index.sym) | INIT_OFFSET_MASK;
> + symsect->sh_entsize = module_get_offset_and_type(mod, MOD_MEM_TYPE_INIT_DATA,
> + symsect, info->index.sym);
> pr_debug("\t%s\n", info->secstrings + symsect->sh_name);
>
> src = (void *)info->hdr + symsect->sh_offset;


> diff --git a/kernel/module/main.c b/kernel/module/main.c
> index d3be89de706d..32f63c6eaa61 100644
> --- a/kernel/module/main.c
> +++ b/kernel/module/main.c
> @@ -923,11 +930,30 @@ static ssize_t store_uevent(struct module_attribute *mattr,
> struct module_attribute module_uevent =
> __ATTR(uevent, 0200, NULL, store_uevent);
>
> +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
> +
> +static ssize_t show_coresize(struct module_attribute *mattr,
> + struct module_kobject *mk, char *buffer)
> +{
> + unsigned int size = 0;

size is not used

> +
> + return sprintf(buffer, "%u\n",
> + mk->mod->mod_mem[MOD_MEM_TYPE_TEXT].size);

Wouldn't it read better as:

return sprintf(buffer, "%u\n", mk->mod->mem[MOD_TEXT].size);

> +}
> +
> +#else
> +
> static ssize_t show_coresize(struct module_attribute *mattr,
> struct module_kobject *mk, char *buffer)
> {
> - return sprintf(buffer, "%u\n", mk->mod->core_layout.size);
> + enum mod_mem_type type;
> + unsigned int size = 0;
> +
> + for_core_mod_mem_type(type)
> + size += mk->mod->mod_mem[type].size;
> + return sprintf(buffer, "%u\n", size);
> }
> +#endif
>
> static struct module_attribute modinfo_coresize =
> __ATTR(coresize, 0444, show_coresize, NULL);


Christophe