Re: [PATCHSET] mempool, percpu, blkcg: fix percpu stat allocationand remove stats_lock

From: Vivek Goyal
Date: Thu Mar 08 2012 - 12:57:18 EST


On Wed, Mar 07, 2012 at 03:05:49PM -0800, Andrew Morton wrote:

[..]
> > > btw, speaking of uniprocessor: please do perform a uniprocessor build
> > > and see what impact the patch has upon the size(1) output for the .o
> > > files. We should try to minimize the pointless bloat for the UP
> > > kernel.
> >
> > But this logic is required both for UP and SMP kernels. So bloat on UP
> > is not unnecessary?
>
> UP doesn't need a per-cpu variable, hence it doesn't need to run
> alloc_per_cpu() at all. For UP all we need to do is to aggregate a
> `struct blkio_group_stats' within `struct blkg_policy_data'?
>
> This could still be done with suitable abstraction and wrappers.
> Whether that's desirable depends on how fat the API ends up, I guess.
>

Ok, here is the patch which gets rid of allocating per cpu stats in case
of UP. Here are the size stats with and without patch.

Without patch (UP kernel)
-------------------------
# size block/blk-cgroup.o
text data bss dec hex filename
13377 5504 58 18939 49fb block/blk-cgroup.o

With patch (UP kernel)
----------------------
# size block/blk-cgroup.o
text data bss dec hex filename
12678 5312 49 18039 4677 block/blk-cgroup.o

Do you think it is worth introducing these ifdefs.

Anyway, I am assuming that optimization for UP issue is not blocking the
acceptance of other alloc per cpu patch. I have replaced msleep()
with queue_delayed_work(). Hopefully it is little less miserable now.

Tejun, I noticed that in UP case, once in a while cgroup removal is
hanging. Looks like it is hung in cgroup_rmdir() somewhere. I will debug
more to find out what is happening. May be blkcg->refcount issue.

Thanks
Vivek

---
block/blk-cgroup.c | 65 +++++++++++++++++++++++++++++++++++++++++++++--------
block/blk-cgroup.h | 6 ++++
2 files changed, 61 insertions(+), 10 deletions(-)

Index: tejun-misc/block/blk-cgroup.h
===================================================================
--- tejun-misc.orig/block/blk-cgroup.h 2012-03-08 23:28:17.000000000 -0500
+++ tejun-misc/block/blk-cgroup.h 2012-03-08 23:37:25.310887020 -0500
@@ -168,9 +168,13 @@ struct blkg_policy_data {
struct blkio_group_conf conf;

struct blkio_group_stats stats;
+
+#ifdef CONFIG_SMP
/* Per cpu stats pointer */
struct blkio_group_stats_cpu __percpu *stats_cpu;
-
+#else
+ struct blkio_group_stats_cpu stats_cpu;
+#endif
/* pol->pdata_size bytes of private data used by policy impl */
char pdata[] __aligned(__alignof__(unsigned long long));
};
Index: tejun-misc/block/blk-cgroup.c
===================================================================
--- tejun-misc.orig/block/blk-cgroup.c 2012-03-08 23:37:14.079886676 -0500
+++ tejun-misc/block/blk-cgroup.c 2012-03-08 23:37:55.104888008 -0500
@@ -35,9 +35,11 @@ static DEFINE_SPINLOCK(alloc_list_lock);
static LIST_HEAD(alloc_list);

/* Array of per cpu stat pointers allocated for blk groups */
+#ifdef CONFIG_SMP
static void *pcpu_stats[BLKIO_NR_POLICIES];
static void blkio_stat_alloc_fn(struct work_struct *);
static DECLARE_DELAYED_WORK(blkio_stat_alloc_work, blkio_stat_alloc_fn);
+#endif

struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
EXPORT_SYMBOL_GPL(blkio_root_cgroup);
@@ -73,6 +75,42 @@ struct cgroup_subsys blkio_subsys = {
};
EXPORT_SYMBOL_GPL(blkio_subsys);

+#ifdef CONFIG_SMP
+static inline struct blkio_group_stats_cpu *
+pd_this_cpu_stat_ptr(struct blkg_policy_data *pd)
+{
+ return this_cpu_ptr(pd->stats_cpu);
+}
+
+static inline struct blkio_group_stats_cpu *
+pd_cpu_stat_ptr(struct blkg_policy_data *pd, int cpu)
+{
+ return per_cpu_ptr(pd->stats_cpu, cpu);
+}
+
+static inline bool pd_stat_cpu_valid(struct blkg_policy_data *pd)
+{
+ return pd->stats_cpu ? true : false;
+}
+#else /* UP */
+static inline struct blkio_group_stats_cpu *
+pd_this_cpu_stat_ptr(struct blkg_policy_data *pd)
+{
+ return &pd->stats_cpu;
+}
+
+static inline struct blkio_group_stats_cpu *
+pd_cpu_stat_ptr(struct blkg_policy_data *pd, int cpu)
+{
+ return &pd->stats_cpu;
+}
+
+static inline bool pd_stat_cpu_valid(struct blkg_policy_data *pd)
+{
+ return true;
+}
+#endif /* CONFIG_SMP */
+
struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
{
return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
@@ -401,7 +439,7 @@ void blkiocg_update_dispatch_stats(struc
unsigned long flags;

/* If per cpu stats are not allocated yet, don't do any accounting. */
- if (pd->stats_cpu == NULL)
+ if (!pd_stat_cpu_valid(pd))
return;

/*
@@ -411,7 +449,7 @@ void blkiocg_update_dispatch_stats(struc
*/
local_irq_save(flags);

- stats_cpu = this_cpu_ptr(pd->stats_cpu);
+ stats_cpu = pd_this_cpu_stat_ptr(pd);

u64_stats_update_begin(&stats_cpu->syncp);
stats_cpu->sectors += bytes >> 9;
@@ -457,7 +495,7 @@ void blkiocg_update_io_merged_stats(stru
unsigned long flags;

/* If per cpu stats are not allocated yet, don't do any accounting. */
- if (pd->stats_cpu == NULL)
+ if (!pd_stat_cpu_valid(pd))
return;

/*
@@ -467,7 +505,7 @@ void blkiocg_update_io_merged_stats(stru
*/
local_irq_save(flags);

- stats_cpu = this_cpu_ptr(pd->stats_cpu);
+ stats_cpu = pd_this_cpu_stat_ptr(pd);

u64_stats_update_begin(&stats_cpu->syncp);
blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_MERGED], 1,
@@ -481,6 +519,7 @@ EXPORT_SYMBOL_GPL(blkiocg_update_io_merg
* Worker for allocating per cpu stat for blk groups. This is scheduled
* once there are some groups on the alloc_list waiting for allocation
*/
+#ifdef CONFIG_SMP
static void blkio_stat_alloc_fn(struct work_struct *work)
{

@@ -530,6 +569,7 @@ alloc_stats:
if (!empty)
goto alloc_stats;
}
+#endif

/**
* blkg_free - free a blkg
@@ -548,7 +588,9 @@ static void blkg_free(struct blkio_group
struct blkg_policy_data *pd = blkg->pd[i];

if (pd) {
+#ifdef CONFIG_SMP
free_percpu(pd->stats_cpu);
+#endif
kfree(pd);
}
}
@@ -657,11 +699,15 @@ struct blkio_group *blkg_lookup_create(s
list_add(&blkg->q_node, &q->blkg_list);
spin_unlock(&blkcg->lock);

+ /* In case of UP, stats are not per cpu */
+#ifdef CONFIG_SMP
spin_lock(&alloc_list_lock);
list_add(&blkg->alloc_node, &alloc_list);
/* Queue per cpu stat allocation from worker thread. */
queue_delayed_work(system_nrt_wq, &blkio_stat_alloc_work, 0);
spin_unlock(&alloc_list_lock);
+#endif
+
out:
return blkg;
}
@@ -730,9 +776,10 @@ void update_root_blkg_pd(struct request_
pd = kzalloc(sizeof(*pd) + pol->pdata_size, GFP_KERNEL);
WARN_ON_ONCE(!pd);

+#ifdef CONFIG_SMP
pd->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
WARN_ON_ONCE(!pd->stats_cpu);
-
+#endif
blkg->pd[plid] = pd;
pd->blkg = blkg;
pol->ops.blkio_init_group_fn(blkg);
@@ -798,7 +845,7 @@ static void blkio_reset_stats_cpu(struct
struct blkio_group_stats_cpu *stats_cpu;
int i, j, k;

- if (pd->stats_cpu == NULL)
+ if (!pd_stat_cpu_valid(pd))
return;
/*
* Note: On 64 bit arch this should not be an issue. This has the
@@ -812,7 +859,7 @@ static void blkio_reset_stats_cpu(struct
* unless this becomes a real issue.
*/
for_each_possible_cpu(i) {
- stats_cpu = per_cpu_ptr(pd->stats_cpu, i);
+ stats_cpu = pd_cpu_stat_ptr(pd, i);
stats_cpu->sectors = 0;
for(j = 0; j < BLKIO_STAT_CPU_NR; j++)
for (k = 0; k < BLKIO_STAT_TOTAL; k++)
@@ -931,12 +978,12 @@ static uint64_t blkio_read_stat_cpu(stru
struct blkio_group_stats_cpu *stats_cpu;
u64 val = 0, tval;

- if (pd->stats_cpu == NULL)
+ if (!pd_stat_cpu_valid(pd))
return val;

for_each_possible_cpu(cpu) {
unsigned int start;
- stats_cpu = per_cpu_ptr(pd->stats_cpu, cpu);
+ stats_cpu = pd_cpu_stat_ptr(pd, cpu);

do {
start = u64_stats_fetch_begin(&stats_cpu->syncp);


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/