Re: [Bug 111541] Race between cat /proc/kallsyms and rmmod
From: Rusty Russell
Date: Wed Feb 03 2016 - 18:56:12 EST
bugzilla-daemon@xxxxxxxxxxxxxxxxxxx writes:
> https://bugzilla.kernel.org/show_bug.cgi?id=111541
>
> --- Comment #8 from Weilong Chen <chenweilong@xxxxxxxxxx> ---
> (In reply to Rusty Russell from comment #7)
>> Rusty Russell <rusty@xxxxxxxxxxxxxxx> writes:
>> > And there are other places with the same issue. This is a more
>> > complex, but I think worth it (actually two patches, rolled into one
>> > for testing):
>>
>> And this one actually works...
>>
>> diff --git a/include/linux/module.h b/include/linux/module.h
>> index 4560d8f1545d..2bb0c3085706 100644
>> --- a/include/linux/module.h
>> +++ b/include/linux/module.h
>> @@ -324,6 +324,12 @@ struct module_layout {
>> #define __module_layout_align
>> #endif
>>
>> +struct mod_kallsyms {
>> + Elf_Sym *symtab;
>> + unsigned int num_symtab;
>> + char *strtab;
>> +};
>> +
>> struct module {
>> enum module_state state;
>>
>> @@ -405,15 +411,10 @@ struct module {
>> #endif
>>
>> #ifdef CONFIG_KALLSYMS
>> - /*
>> - * We keep the symbol and string tables for kallsyms.
>> - * The core_* fields below are temporary, loader-only (they
>> - * could really be discarded after module init).
>> - */
>> - Elf_Sym *symtab, *core_symtab;
>> - unsigned int num_symtab, core_num_syms;
>> - char *strtab, *core_strtab;
>> -
>> + /* Protected by RCU and/or module_mutex: use rcu_dereference() */
>> + struct mod_kallsyms *kallsyms;
>> + struct mod_kallsyms core_kallsyms;
>> +
>> /* Section attributes */
>> struct module_sect_attrs *sect_attrs;
>>
>> diff --git a/kernel/module.c b/kernel/module.c
>> index 2149f7003e49..4e4d567139f4 100644
>> --- a/kernel/module.c
>> +++ b/kernel/module.c
>> @@ -303,6 +303,9 @@ struct load_info {
>> struct _ddebug *debug;
>> unsigned int num_debug;
>> bool sig_ok;
>> +#ifdef CONFIG_KALLSYMS
>> + unsigned long mod_kallsyms_init_off;
>> +#endif
>> struct {
>> unsigned int sym, str, mod, vers, info, pcpu;
>> } index;
>> @@ -2480,10 +2483,21 @@ static void layout_symtab(struct module *mod, struct
>> load_info *info)
>> strsect->sh_flags |= SHF_ALLOC;
>> strsect->sh_entsize = get_offset(mod, &mod->init_layout.size, strsect,
>> info->index.str) | INIT_OFFSET_MASK;
>> - mod->init_layout.size = debug_align(mod->init_layout.size);
>> pr_debug("\t%s\n", info->secstrings + strsect->sh_name);
>> +
>> + /* We'll tack temporary mod_kallsyms on the end. */
>> + mod->init_layout.size = ALIGN(mod->init_layout.size,
>> + __alignof__(struct mod_kallsyms));
>> + info->mod_kallsyms_init_off = mod->init_layout.size;
>> + mod->init_layout.size += sizeof(struct mod_kallsyms);
>> + mod->init_layout.size = debug_align(mod->init_layout.size);
>> }
>>
>> +/*
>> + * We use the full symtab and strtab which layout_symtab arranged to
>> + * be appended to the init section Later we switch to the cut-down
>> + * core-only one.
>> + */
>> static void add_kallsyms(struct module *mod, const struct load_info *info)
>> {
>> unsigned int i, ndst;
>> @@ -2492,29 +2506,34 @@ static void add_kallsyms(struct module *mod, const
>> struct load_info *info)
>> char *s;
>> Elf_Shdr *symsec = &info->sechdrs[info->index.sym];
>>
>> - mod->symtab = (void *)symsec->sh_addr;
>> - mod->num_symtab = symsec->sh_size / sizeof(Elf_Sym);
>> + /* Set up to point into init section. */
>> + mod->kallsyms = mod->init_layout.base + info->mod_kallsyms_init_off;
>> +
>> + mod->kallsyms->symtab = (void *)symsec->sh_addr;
>> + mod->kallsyms->num_symtab = symsec->sh_size / sizeof(Elf_Sym);
>> /* Make sure we get permanent strtab: don't use info->strtab. */
>> - mod->strtab = (void *)info->sechdrs[info->index.str].sh_addr;
>> + mod->kallsyms->strtab = (void *)info->sechdrs[info->index.str].sh_addr;
>>
>> /* Set types up while we still have access to sections. */
>> - for (i = 0; i < mod->num_symtab; i++)
>> - mod->symtab[i].st_info = elf_type(&mod->symtab[i], info);
>> -
>> - mod->core_symtab = dst = mod->core_layout.base + info->symoffs;
>> - mod->core_strtab = s = mod->core_layout.base + info->stroffs;
>> - src = mod->symtab;
>> - for (ndst = i = 0; i < mod->num_symtab; i++) {
>> + for (i = 0; i < mod->kallsyms->num_symtab; i++)
>> + mod->kallsyms->symtab[i].st_info
>> + = elf_type(&mod->kallsyms->symtab[i], info);
>> +
>> + /* Now populate the cut down core kallsyms for after init. */
>> + mod->core_kallsyms.symtab = dst = mod->core_layout.base + info->symoffs;
>> + mod->core_kallsyms.strtab = s = mod->core_layout.base + info->stroffs;
>> + src = mod->kallsyms->symtab;
>> + for (ndst = i = 0; i < mod->kallsyms->num_symtab; i++) {
>> if (i == 0 ||
>> is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum,
>> info->index.pcpu)) {
>> dst[ndst] = src[i];
>> - dst[ndst++].st_name = s - mod->core_strtab;
>> - s += strlcpy(s, &mod->strtab[src[i].st_name],
>> + dst[ndst++].st_name = s - mod->core_kallsyms.strtab;
>> + s += strlcpy(s, &mod->kallsyms->strtab[src[i].st_name],
>> KSYM_NAME_LEN) + 1;
>> }
>> }
>> - mod->core_num_syms = ndst;
>> + mod->core_kallsyms.num_symtab = ndst;
>> }
>> #else
>> static inline void layout_symtab(struct module *mod, struct load_info *info)
>> @@ -3263,9 +3282,8 @@ static noinline int do_init_module(struct module *mod)
>> module_put(mod);
>> trim_init_extable(mod);
>> #ifdef CONFIG_KALLSYMS
>> - mod->num_symtab = mod->core_num_syms;
>> - mod->symtab = mod->core_symtab;
>> - mod->strtab = mod->core_strtab;
>> + /* Switch to core kallsyms now init is done. */
>> + rcu_assign_pointer(mod->kallsyms, &mod->core_kallsyms);
>> #endif
>> mod_tree_remove_init(mod);
>> disable_ro_nx(&mod->init_layout);
>> @@ -3627,6 +3645,11 @@ static inline int is_arm_mapping_symbol(const char
>> *str)
>> && (str[2] == '\0' || str[2] == '.');
>> }
>>
>> +static const char *symname(struct mod_kallsyms *kallsyms, unsigned int
>> symnum)
>> +{
>> + return kallsyms->strtab + kallsyms->symtab[symnum].st_name;
>> +}
>> +
>> static const char *get_ksymbol(struct module *mod,
>> unsigned long addr,
>> unsigned long *size,
>> @@ -3634,6 +3657,7 @@ static const char *get_ksymbol(struct module *mod,
>> {
>> unsigned int i, best = 0;
>> unsigned long nextval;
>> + struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms);
>>
>> /* At worse, next value is at end of module */
>> if (within_module_init(addr, mod))
>> @@ -3643,32 +3667,32 @@ static const char *get_ksymbol(struct module *mod,
>>
>> /* Scan for closest preceding symbol, and next symbol. (ELF
>> starts real symbols at 1). */
>> - for (i = 1; i < mod->num_symtab; i++) {
>> - if (mod->symtab[i].st_shndx == SHN_UNDEF)
>> + for (i = 1; i < kallsyms->num_symtab; i++) {
>> + if (kallsyms->symtab[i].st_shndx == SHN_UNDEF)
>> continue;
>>
>> /* We ignore unnamed symbols: they're uninformative
>> * and inserted at a whim. */
>> - if (mod->symtab[i].st_value <= addr
>> - && mod->symtab[i].st_value > mod->symtab[best].st_value
>> - && *(mod->strtab + mod->symtab[i].st_name) != '\0'
>> - && !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name))
>> + if (*symname(kallsyms, i) == '\0'
>> + || is_arm_mapping_symbol(symname(kallsyms, i)))
>> + continue;
>> +
>> + if (kallsyms->symtab[i].st_value <= addr
>> + && kallsyms->symtab[i].st_value > kallsyms->symtab[best].st_value)
>> best = i;
>> - if (mod->symtab[i].st_value > addr
>> - && mod->symtab[i].st_value < nextval
>> - && *(mod->strtab + mod->symtab[i].st_name) != '\0'
>> - && !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name))
>> - nextval = mod->symtab[i].st_value;
>> + if (kallsyms->symtab[i].st_value > addr
>> + && kallsyms->symtab[i].st_value < nextval)
>> + nextval = kallsyms->symtab[i].st_value;
>> }
>>
>> if (!best)
>> return NULL;
>>
>> if (size)
>> - *size = nextval - mod->symtab[best].st_value;
>> + *size = nextval - kallsyms->symtab[best].st_value;
>> if (offset)
>> - *offset = addr - mod->symtab[best].st_value;
>> - return mod->strtab + mod->symtab[best].st_name;
>> + *offset = addr - kallsyms->symtab[best].st_value;
>> + return symname(kallsyms, best);
>> }
>>
>> /* For kallsyms to ask for address resolution. NULL means not found.
>> Careful
>> @@ -3758,19 +3782,21 @@ int module_get_kallsym(unsigned int symnum, unsigned
>> long *value, char *type,
>>
>> preempt_disable();
>> list_for_each_entry_rcu(mod, &modules, list) {
>> + struct mod_kallsyms *kallsyms;
>> +
>> if (mod->state == MODULE_STATE_UNFORMED)
>> continue;
>> - if (symnum < mod->num_symtab) {
>> - *value = mod->symtab[symnum].st_value;
>> - *type = mod->symtab[symnum].st_info;
>> - strlcpy(name, mod->strtab + mod->symtab[symnum].st_name,
>> - KSYM_NAME_LEN);
>> + kallsyms = rcu_dereference_sched(mod->kallsyms);
>> + if (symnum < kallsyms->num_symtab) {
>> + *value = kallsyms->symtab[symnum].st_value;
>> + *type = kallsyms->symtab[symnum].st_info;
>> + strlcpy(name, symname(kallsyms, symnum), KSYM_NAME_LEN);
>> strlcpy(module_name, mod->name, MODULE_NAME_LEN);
>> *exported = is_exported(name, *value, mod);
>> preempt_enable();
>> return 0;
>> }
>> - symnum -= mod->num_symtab;
>> + symnum -= kallsyms->num_symtab;
>> }
>> preempt_enable();
>> return -ERANGE;
>> @@ -3779,11 +3805,12 @@ int module_get_kallsym(unsigned int symnum, unsigned
>> long *value, char *type,
>> static unsigned long mod_find_symname(struct module *mod, const char *name)
>> {
>> unsigned int i;
>> + struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms);
>>
>> - for (i = 0; i < mod->num_symtab; i++)
>> - if (strcmp(name, mod->strtab+mod->symtab[i].st_name) == 0 &&
>> - mod->symtab[i].st_info != 'U')
>> - return mod->symtab[i].st_value;
>> + for (i = 0; i < kallsyms->num_symtab; i++)
>> + if (strcmp(name, symname(kallsyms, i)) == 0 &&
>> + kallsyms->symtab[i].st_info != 'U')
>> + return kallsyms->symtab[i].st_value;
>> return 0;
>> }
>>
>> @@ -3822,11 +3849,14 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *,
>> const char *,
>> module_assert_mutex();
>>
>> list_for_each_entry(mod, &modules, list) {
>> + /* We hold module_mutex: no need for rcu_dereference_sched */
>> + struct mod_kallsyms *kallsyms = mod->kallsyms;
>> +
>> if (mod->state == MODULE_STATE_UNFORMED)
>> continue;
>> - for (i = 0; i < mod->num_symtab; i++) {
>> - ret = fn(data, mod->strtab + mod->symtab[i].st_name,
>> - mod, mod->symtab[i].st_value);
>> + for (i = 0; i < kallsyms->num_symtab; i++) {
>> + ret = fn(data, symname(kallsyms, i),
>> + mod, kallsyms->symtab[i].st_value);
>> if (ret != 0)
>> return ret;
>> }
>
> Test ok.
> But its really a big patch.
> The lts kernels such as 3.10 also have the problem, this patch seems not work
> for them, as some code are different.
Yes :(
Thankyou for testing! I will push this with CC: stable, and do the
backports myself as required.
Thanks,
Rusty.