Re: rcu-refcount stacker performance

From: Paul E. McKenney
Date: Thu Jul 14 2005 - 10:24:17 EST


On Thu, Jul 14, 2005 at 09:21:07AM -0500, serue@xxxxxxxxxx wrote:
> On July 8 I sent out a patch which re-implemented the rcu-refcounting
> of the LSM list in stacker for the sake of supporting safe security
> module unloading. (patch reattached here for convenience) Here are
> some performance results with and without that patch. Tests were run
> on a 16-way ppc64 machine. Dbench was run 50 times, and kernbench
> and reaim were run 10 times, and intervals are 95% confidence half-
> intervals.
>
> These results seem pretty poor. I'm now wondering whether this is
> really necessary. David Wheeler's original stacker had an option
> of simply waiting a while after a module was taken out of the list
> of active modules before freeing the modules. Something like that
> is of course one option. I'm hoping we can also take advantage of
> some already known module state info to be a little less coarse
> about it. For instance, sys_delete_module() sets m->state to
> MODULE_STATE_GOING before calling mod->exit(). If in place of
> doing atomic_inc(&m->use), stacker skipped the m->hook() if
> m->state!=MODULE_STATE_LIVE, then it may be safe to assume that
> any m->hook() should be finished before sys_delete_module() gets
> to free_module(mod). This seems to require adding a struct
> module argument to security/security:mod_reg_security() so an LSM
> can pass itself along.
>
> So I'll try that next. Hopefully by avoiding the potential cache
> line bounces which atomic_inc(&m->use) bring, this should provide
> far better performance.

My guess is that the reference count is indeed costing you quite a
bit. I glance quickly at the patch, and most of the uses seem to
be of the form:

increment ref count
rcu_read_lock()
do something
rcu_read_unlock()
decrement ref count

Can't these cases rely solely on rcu_read_lock()? Why do you also
need to increment the reference count in these cases?

Thanx, Paul

> thanks,
> -serge
>
> Dbench (throughput, larger is better)
> --------------------------------------------
> plain stacker: 1531.448400 +/- 15.791116
> stacker with rcu: 1408.056200 +/- 12.597277
>
> Kernbench (runtime, smaller is better)
> --------------------------------------------
> plain stacker: 52.341000 +/- 0.184995
> stacker with rcu: 53.722000 +/- 0.161473
>
> Reaim (numjobs, larger is better) (gnuplot-friendly format)
> plain stacker:
> ----------------------------------------------------------
> Numforked jobs/minute 95% CI
> 1 106662.857000 5354.267865
> 3 301628.571000 6297.121934
> 5 488142.858000 16031.685536
> 7 673200.000000 23994.030784
> 9 852428.570000 31485.607271
> 11 961714.290000 0.000000
> 13 1108157.144000 27287.525982
> 15 1171178.571000 49790.796869
>
> Reaim (numjobs, larger is better) (gnuplot-friendly format)
> plain stacker:
> ----------------------------------------------------------
> Numforked jobs/minute 95% CI
> 1 100542.857000 2099.040645
> 3 266657.139000 6297.121934
> 5 398892.858000 12023.765252
> 7 467670.000000 14911.383385
> 9 418648.352000 11665.751441
> 11 396825.000000 8700.115252
> 13 357480.912000 7567.947838
> 15 337571.428000 2332.267703
>
> Patch:
>
> Index: linux-2.6.12/security/stacker.c
> ===================================================================
> --- linux-2.6.12.orig/security/stacker.c 2005-07-08 13:43:15.000000000 -0500
> +++ linux-2.6.12/security/stacker.c 2005-07-08 16:21:54.000000000 -0500
> @@ -33,13 +33,13 @@
>
> struct module_entry {
> struct list_head lsm_list; /* list of active lsms */
> - struct list_head all_lsms; /* list of all lsms */
> char *module_name;
> int namelen;
> struct security_operations module_operations;
> + struct rcu_head m_rcu;
> + atomic_t use;
> };
> static struct list_head stacked_modules; /* list of stacked modules */
> -static struct list_head all_modules; /* list of all modules, including freed */
>
> static short sysfsfiles_registered;
>
> @@ -84,6 +84,32 @@ MODULE_PARM_DESC(debug, "Debug enabled o
> * We return as soon as an error is returned.
> */
>
> +static inline void stacker_free_module(struct module_entry *m)
> +{
> + kfree(m->module_name);
> + kfree(m);
> +}
> +
> +/*
> + * Version of stacker_free_module called from call_rcu
> + */
> +static void free_mod_fromrcu(struct rcu_head *head)
> +{
> + struct module_entry *m;
> +
> + m = container_of(head, struct module_entry, m_rcu);
> + stacker_free_module(m);
> +}
> +
> +static void stacker_del_module(struct rcu_head *head)
> +{
> + struct module_entry *m;
> +
> + m = container_of(head, struct module_entry, m_rcu);
> + if (atomic_dec_and_test(&m->use))
> + stacker_free_module(m);
> +}
> +
> #define stack_for_each_entry(pos, head, member) \
> for (pos = list_entry((head)->next, typeof(*pos), member); \
> &pos->member != (head); \
> @@ -93,16 +119,27 @@ MODULE_PARM_DESC(debug, "Debug enabled o
> /* to make this safe for module deletion, we would need to
> * add a reference count to m as we had before
> */
> +/*
> + * XXX We can't quite do this - we delete the module before we grab
> + * m->next?
> + * We could just do a call_rcu. Then the call_rcu happens in same
> + * rcu cycle has dereference, so module won't be deleted until the
> + * next cycle.
> + * That's what I'm going to do.
> + */
> #define RETURN_ERROR_IF_ANY_ERROR(BASE_FUNC, FUNC_WITH_ARGS) do { \
> int result = 0; \
> struct module_entry *m; \
> rcu_read_lock(); \
> stack_for_each_entry(m, &stacked_modules, lsm_list) { \
> - if (!m->module_operations.BASE_FUNC) \
> - continue; \
> - rcu_read_unlock(); \
> - result = m->module_operations.FUNC_WITH_ARGS; \
> - rcu_read_lock(); \
> + if (m->module_operations.BASE_FUNC) { \
> + atomic_inc(&m->use); \
> + rcu_read_unlock(); \
> + result = m->module_operations.FUNC_WITH_ARGS; \
> + rcu_read_lock(); \
> + if (unlikely(atomic_dec_and_test(&m->use))) \
> + call_rcu(&m->m_rcu, free_mod_fromrcu); \
> + } \
> if (result) \
> break; \
> } \
> @@ -116,9 +153,12 @@ MODULE_PARM_DESC(debug, "Debug enabled o
> rcu_read_lock(); \
> stack_for_each_entry(m, &stacked_modules, lsm_list) { \
> if (m->module_operations.BASE_FUNC) { \
> + atomic_inc(&m->use); \
> rcu_read_unlock(); \
> m->module_operations.FUNC_WITH_ARGS; \
> rcu_read_lock(); \
> + if (unlikely(atomic_dec_and_test(&m->use))) \
> + call_rcu(&m->m_rcu, free_mod_fromrcu); \
> } \
> } \
> rcu_read_unlock(); \
> @@ -129,38 +169,47 @@ MODULE_PARM_DESC(debug, "Debug enabled o
> rcu_read_lock(); \
> stack_for_each_entry(m, &stacked_modules, lsm_list ) { \
> if (m->module_operations.BASE_FREE) { \
> + atomic_inc(&m->use); \
> rcu_read_unlock(); \
> m->module_operations.FREE_WITH_ARGS; \
> rcu_read_lock(); \
> + if (unlikely(atomic_dec_and_test(&m->use))) \
> + call_rcu(&m->m_rcu, free_mod_fromrcu); \
> } \
> } \
> rcu_read_unlock(); \
> } while (0)
>
> #define ALLOC_SECURITY(BASE_FUNC,FUNC_WITH_ARGS,BASE_FREE,FREE_WITH_ARGS) do { \
> - int result; \
> + int result = 0; \
> struct module_entry *m, *m2; \
> rcu_read_lock(); \
> stack_for_each_entry(m, &stacked_modules, lsm_list) { \
> - if (!m->module_operations.BASE_FUNC) \
> - continue; \
> - rcu_read_unlock(); \
> - result = m->module_operations.FUNC_WITH_ARGS; \
> - rcu_read_lock(); \
> + if (m->module_operations.BASE_FUNC) { \
> + atomic_inc(&m->use); \
> + rcu_read_unlock(); \
> + result = m->module_operations.FUNC_WITH_ARGS; \
> + rcu_read_lock(); \
> + if (unlikely(atomic_dec_and_test(&m->use))) \
> + call_rcu(&m->m_rcu, free_mod_fromrcu); \
> + } \
> if (result) \
> goto bad; \
> } \
> rcu_read_unlock(); \
> return 0; \
> bad: \
> - stack_for_each_entry(m2, &all_modules, all_lsms) { \
> + stack_for_each_entry(m2, &stacked_modules, lsm_list) { \
> if (m == m2) \
> break; \
> if (!m2->module_operations.BASE_FREE) \
> continue; \
> + atomic_inc(&m2->use); \
> rcu_read_unlock(); \
> m2->module_operations.FREE_WITH_ARGS; \
> rcu_read_lock(); \
> + if (unlikely(atomic_dec_and_test(&m2->use))) \
> + call_rcu(&m2->m_rcu, free_mod_fromrcu); \
> } \
> rcu_read_unlock(); \
> return result; \
> @@ -251,10 +300,16 @@ static int stacker_vm_enough_memory(long
>
> rcu_read_lock();
> stack_for_each_entry(m, &stacked_modules, lsm_list) {
> - if (!m->module_operations.vm_enough_memory)
> + if (!m->module_operations.vm_enough_memory) {
> continue;
> + }
> + atomic_inc(&m->use);
> rcu_read_unlock();
> result = m->module_operations.vm_enough_memory(pages);
> + rcu_read_lock();
> + if (unlikely(atomic_dec_and_test(&m->use)))
> + stacker_free_module(m);
> + rcu_read_unlock();
> return result;
> }
> rcu_read_unlock();
> @@ -281,9 +336,12 @@ static int stacker_netlink_send (struct
> if (!m->module_operations.netlink_send)
> continue;
> NETLINK_CB(skb).eff_cap = ~0;
> + atomic_inc(&m->use);
> rcu_read_unlock();
> result = m->module_operations.netlink_send(sk, skb);
> rcu_read_lock();
> + if (unlikely(atomic_dec_and_test(&m->use)))
> + call_rcu(&m->m_rcu, free_mod_fromrcu);
> tmpcap &= NETLINK_CB(skb).eff_cap;
> if (result)
> break;
> @@ -987,33 +1045,42 @@ stacker_getprocattr(struct task_struct *
> stack_for_each_entry(m, &stacked_modules, lsm_list) {
> if (!m->module_operations.getprocattr)
> continue;
> + atomic_inc(&m->use);
> rcu_read_unlock();
> ret = m->module_operations.getprocattr(p, name,
> value+len, size-len);
> rcu_read_lock();
> - if (ret == -EINVAL)
> - continue;
> - found_noneinval = 1;
> - if (ret < 0) {
> + if (ret > 0) {
> + found_noneinval = 1;
> + len += ret;
> + if (len+m->namelen+4 < size) {
> + char *v = value;
> + if (v[len-1]=='\n')
> + len--;
> + len += sprintf(value+len, " (%s)\n",
> + m->module_name);
> + }
> + } else if (ret != -EINVAL) {
> + found_noneinval = 1;
> memset(value, 0, len);
> len = ret;
> + } else
> + ret = 0;
> +
> + if (unlikely(atomic_dec_and_test(&m->use)))
> + call_rcu(&m->m_rcu, free_mod_fromrcu);
> +
> + if (ret < 0)
> break;
> - }
> - if (ret == 0)
> - continue;
> - len += ret;
> - if (len+m->namelen+4 < size) {
> - char *v = value;
> - if (v[len-1]=='\n')
> - len--;
> - len += sprintf(value+len, " (%s)\n", m->module_name);
> - }
> }
> rcu_read_unlock();
>
> return found_noneinval ? len : -EINVAL;
> }
>
> +/*
> + * find an lsm by name. If found, increment its use count and return it
> + */
> static struct module_entry *
> find_active_lsm(const char *name, int len)
> {
> @@ -1022,6 +1089,7 @@ find_active_lsm(const char *name, int le
> rcu_read_lock();
> stack_for_each_entry(m, &stacked_modules, lsm_list) {
> if (m->namelen == len && !strncmp(m->module_name, name, len)) {
> + atomic_inc(&m->use);
> ret = m;
> break;
> }
> @@ -1043,6 +1111,7 @@ stacker_setprocattr(struct task_struct *
> char *realv = (char *)value;
> size_t dsize = size;
> int loc = 0, end_data = size;
> + int ret, free_module = 0;
>
> if (list_empty(&stacked_modules))
> return -EINVAL;
> @@ -1063,7 +1132,7 @@ stacker_setprocattr(struct task_struct *
> callm = find_active_lsm(realv+loc+1, dsize-loc-1);
> if (!callm)
> goto call;
> -
> + free_module = 1;
>
> loc--;
> while (loc && realv[loc]==' ')
> @@ -1074,8 +1143,14 @@ call:
> if (!callm || !callm->module_operations.setprocattr)
> return -EINVAL;
>
> - return callm->module_operations.setprocattr(p, name, value, end_data) +
> + ret = callm->module_operations.setprocattr(p, name, value, end_data) +
> (size-end_data);
> +
> + if (free_module && atomic_dec_and_test(&callm->use))
> + stacker_free_module(callm);
> +
> + return ret;
> +
> }
>
> /*
> @@ -1116,15 +1191,15 @@ static int stacker_register (const char
> new_module_entry->module_name = new_module_name;
> new_module_entry->namelen = namelen;
>
> + atomic_set(&new_module_entry->use, 1);
> +
> INIT_LIST_HEAD(&new_module_entry->lsm_list);
> - INIT_LIST_HEAD(&new_module_entry->all_lsms);
>
> rcu_read_lock();
> if (!modules_registered) {
> modules_registered++;
> list_del_rcu(&default_module.lsm_list);
> }
> - list_add_tail_rcu(&new_module_entry->all_lsms, &all_modules);
> list_add_tail_rcu(&new_module_entry->lsm_list, &stacked_modules);
> if (strcmp(name, "selinux") == 0)
> selinux_module = new_module_entry;
> @@ -1141,16 +1216,60 @@ out:
> }
>
> /*
> - * Currently this version of stacker does not allow for module
> - * unregistering.
> - * Easy way to allow for this is using rcu ref counting like an older
> - * version of stacker did.
> - * Another way would be to force stacker_unregister to sleep between
> - * removing the module from all_modules and free_modules and unloading it.
> + * find_lsm_module_by_name:
> + * Find a module by name. Used by stacker_unregister. Called with
> + * stacker spinlock held.
> + */
> +static struct module_entry *
> +find_lsm_with_namelen(const char *name, int len)
> +{
> + struct module_entry *m, *ret = NULL;
> +
> + rcu_read_lock();
> + list_for_each_entry_rcu(m, &stacked_modules, lsm_list) {
> + atomic_inc(&m->use);
> + rcu_read_unlock();
> + if (m->namelen == len && !strncmp(m->module_name, name, len))
> + ret = m;
> + rcu_read_lock();
> + if (unlikely(atomic_dec_and_test(&m->use)))
> + call_rcu(&m->m_rcu, free_mod_fromrcu);
> + if (ret)
> + break;
> + }
> + rcu_read_unlock();
> +
> + return ret;
> +}
> +
> +/*
> */
> static int stacker_unregister (const char *name, struct security_operations *ops)
> {
> - return -EPERM;
> + struct module_entry *m;
> + int len = strnlen(name, MAX_MODULE_NAME_LEN);
> + int ret = 0;
> +
> + spin_lock(&stacker_lock);
> + m = find_lsm_with_namelen(name, len);
> +
> + if (!m) {
> + printk(KERN_INFO "%s: could not find module %s.\n",
> + __FUNCTION__, name);
> + ret = -ENOENT;
> + goto out;
> + }
> +
> + list_del_rcu(&m->lsm_list);
> +
> + if (strcmp(m->module_name, "selinux") == 0)
> + selinux_module = NULL;
> + call_rcu(&m->m_rcu, stacker_del_module);
> +
> +out:
> + spin_unlock(&stacker_lock);
> +
> + return ret;
> }
>
> static struct security_operations stacker_ops = {
> @@ -1407,57 +1526,6 @@ static struct stacker_attribute stacker_
> .show = listmodules_read,
> };
>
> -/* respond to a request to unload a module */
> -static ssize_t stacker_unload_write (struct stacker_kobj *obj, const char *name,
> - size_t count)
> -{
> - struct module_entry *m;
> - int len = strnlen(name, MAX_MODULE_NAME_LEN);
> - int ret = count;
> -
> - if (!capable(CAP_SYS_ADMIN))
> - return -EPERM;
> -
> - if (count <= 0)
> - return -EINVAL;
> -
> - if (!modules_registered)
> - return -EINVAL;
> -
> - spin_lock(&stacker_lock);
> - m = find_active_lsm(name, len);
> -
> - if (!m) {
> - printk(KERN_INFO "%s: could not find module %s.\n",
> - __FUNCTION__, name);
> - ret = -ENOENT;
> - goto out;
> - }
> -
> - if (strcmp(m->module_name, "selinux") == 0)
> - selinux_module = NULL;
> -
> - rcu_read_lock();
> - list_del_rcu(&m->lsm_list);
> - if (list_empty(&stacked_modules)) {
> - INIT_LIST_HEAD(&default_module.lsm_list);
> - list_add_tail_rcu(&default_module.lsm_list, &stacked_modules);
> - modules_registered = 0;
> - }
> - rcu_read_unlock();
> -
> -out:
> - spin_unlock(&stacker_lock);
> -
> - return ret;
> -}
> -
> -static struct stacker_attribute stacker_attr_unload = {
> - .attr = {.name = "unload", .mode = S_IFREG | S_IRUGO | S_IWUSR},
> - .store = stacker_unload_write,
> -};
> -
> -
> /* stop responding to sysfs */
> static ssize_t stop_responding_write (struct stacker_kobj *obj,
> const char *buff, size_t count)
> @@ -1483,7 +1551,6 @@ static void unregister_sysfs_files(void)
> sysfs_remove_file(kobj, &stacker_attr_lockdown.attr);
> sysfs_remove_file(kobj, &stacker_attr_listmodules.attr);
> sysfs_remove_file(kobj, &stacker_attr_stop_responding.attr);
> - sysfs_remove_file(kobj, &stacker_attr_unload.attr);
>
> sysfsfiles_registered = 0;
> }
> @@ -1506,8 +1573,6 @@ static int register_sysfs_files(void)
> &stacker_attr_listmodules.attr);
> sysfs_create_file(&stacker_subsys.kset.kobj,
> &stacker_attr_stop_responding.attr);
> - sysfs_create_file(&stacker_subsys.kset.kobj,
> - &stacker_attr_unload.attr);
> sysfsfiles_registered = 1;
> stacker_dbg("sysfs files registered\n");
> return 0;
> @@ -1524,13 +1589,13 @@ static int __init stacker_init (void)
> sysfsfiles_registered = 0;
>
> INIT_LIST_HEAD(&stacked_modules);
> - INIT_LIST_HEAD(&all_modules);
> spin_lock_init(&stacker_lock);
> default_module.module_name = DEFAULT_MODULE_NAME;
> default_module.namelen = strlen(DEFAULT_MODULE_NAME);
> memcpy(&default_module.module_operations, &dummy_security_ops,
> sizeof(struct security_operations));
> INIT_LIST_HEAD(&default_module.lsm_list);
> + atomic_set(&default_module.use, 1);
> list_add_tail(&default_module.lsm_list, &stacked_modules);
>
> if (register_security (&stacker_ops)) {
>
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/