Re: Overview of concurrency managed workqueue
From: Christoph Lameter
Date: Tue Jun 15 2010 - 14:46:30 EST
On Tue, 15 Jun 2010, Tejun Heo wrote:
> == Benefits
>
> * Less to worry about causing deadlocks around execution resources.
>
> * Far fewer number of kthreads.
>
> * More flexibility without runtime overhead.
>
> * As concurrency is no longer a problem, workloads which needed
> separate mechanisms can now use generic workqueue instead. This
> easy access to concurrency also allows stuff which wasn't worth
> implementing a dedicated mechanism for but still needed flexible
> concurrency.
Start the whole with the above? Otherwise people get tired of reading
before finding out what the point of the exercise is?
#include <linux/module.h>
#include <linux/workqueue.h>
#include <linux/jiffies.h>
#include <linux/delay.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/cpu.h>
#include <linux/kthread.h>
#include <linux/random.h>
#include <linux/completion.h>
#define MAX_TEST_SECS 300
struct workload_spec {
const char *name;
unsigned int burn_usecs;
unsigned int mean_sleep_msecs;
unsigned int mean_resched_msecs;
unsigned int factor;
};
struct test_spec {
const struct workload_spec *workload;
unsigned int wq_id;
unsigned int nr_works;
};
struct test_run {
char name[64];
struct delayed_work dwork;
struct workqueue_struct *wq;
const struct workload_spec *spec;
unsigned int cycles_left;
unsigned long start;
unsigned long end;
struct completion done;
};
static const struct workload_spec workload_short = {
.name = "sht",
.burn_usecs = 50,
.mean_sleep_msecs = 1,
.mean_resched_msecs = 10,
.factor = 3,
};
static const struct workload_spec workload_medium = {
.name = "med",
.burn_usecs = 50,
.mean_sleep_msecs = 10,
.mean_resched_msecs = 50,
.factor = 2,
};
static const struct workload_spec workload_long = {
.name = "lng",
.burn_usecs = 50,
.mean_sleep_msecs = 100,
.mean_resched_msecs = 250,
.factor = 1,
};
static const struct test_spec test_specs[] = {
/* workload wq_id nr_works */
{ &workload_short, 0, 4 },
{ &workload_short, 1, 4 },
{ &workload_short, 2, 4 },
{ &workload_short, 3, 4 },
{ &workload_short, 4, 2 },
{ &workload_medium, 4, 2 },
{ &workload_short, 5, 2 },
{ &workload_medium, 5, 2 },
{ &workload_medium, 6, 2 },
{ &workload_long, 6, 1 },
{ &workload_medium, 7, 2 },
{ &workload_long, 7, 1 },
{ &workload_medium, 8, 2 },
{ &workload_long, 8, 1 },
{ &workload_medium, 9, 2 },
{ &workload_long, 9, 1 },
{ &workload_long, 10, 1 },
{ &workload_long, 11, 1 },
{ &workload_long, 12, 1 },
{ &workload_long, 13, 1 },
{ &workload_long, 14, 1 },
{ &workload_long, 15, 1 },
{ &workload_long, 16, 1 },
{ &workload_long, 17, 1 },
{ &workload_short, 18, 4 },
{ &workload_short, 19, 4 },
{ &workload_short, 20, 4 },
{ &workload_short, 21, 4 },
{ &workload_short, 22, 4 },
{ &workload_short, 23, 4 },
{ &workload_short, 24, 4 },
{ &workload_short, 25, 4 },
};
static const int nr_test_specs = ARRAY_SIZE(test_specs);
static unsigned int nr_wqs;
static unsigned int nr_test_runs;
static struct workqueue_struct **wqs;
static struct test_run *test_runs;
static void perf_wq_func(struct work_struct *work)
{
struct delayed_work *dwork = to_delayed_work(work);
struct test_run *run = container_of(dwork, struct test_run, dwork);
const struct workload_spec *spec = run->spec;
unsigned int sleep, tmp, delay;
sleep = (spec->mean_sleep_msecs * (random32() % 200)) / 100;
tmp = sleep * (random32() % 100) / 100;
msleep(tmp);
sleep -= tmp;
udelay(spec->burn_usecs);
msleep(sleep);
if (--run->cycles_left) {
delay = (spec->mean_resched_msecs * (random32() % 200)) / 100;
queue_delayed_work(run->wq, dwork, msecs_to_jiffies(delay));
} else {
run->end = jiffies;
complete(&run->done);
}
}
static int param_set_trigger(const char *val, struct kernel_param *kp)
{
static DEFINE_MUTEX(mutex);
int i, dur;
if (!mutex_trylock(&mutex))
return -EBUSY;
dur = simple_strtoul(val, NULL, 0);
if (dur <= 0 || dur > MAX_TEST_SECS) {
pr_err("perf-wq: invalid duration %s\n", val);
return -EINVAL;
}
pr_info("perf-wq: duration %d\n", dur);
for (i = 0; i < nr_test_runs; i++) {
struct test_run *run = &test_runs[i];
const struct workload_spec *spec = run->spec;
unsigned int cycle_msec =
spec->mean_sleep_msecs + spec->mean_resched_msecs;
run->start = jiffies;
run->cycles_left = dur * 1000 / cycle_msec;
if (spec->factor)
run->cycles_left /= spec->factor;
INIT_COMPLETION(run->done);
queue_delayed_work(run->wq, &run->dwork, 0);
}
for (i = 0; i < nr_test_runs; i++) {
struct test_run *run = &test_runs[i];
wait_for_completion(&run->done);
pr_info("perf-wq: test %s ran for %u msecs\n",
run->name, jiffies_to_msecs(run->end - run->start));
}
mutex_unlock(&mutex);
return 0;
}
module_param_call(trigger, param_set_trigger, NULL, NULL, 0600);
static int __init perf_wq_init(void)
{
struct test_run *run;
int i, j;
for (i = 0; i < nr_test_specs; i++) {
nr_wqs = max(nr_wqs, test_specs[i].wq_id + 1);
nr_test_runs += test_specs[i].nr_works;
}
wqs = kzalloc(sizeof(wqs[0]) * nr_wqs, GFP_KERNEL);
test_runs = kzalloc(sizeof(test_runs[0]) * nr_test_runs, GFP_KERNEL);
if (!wqs || !test_runs) {
pr_err("perf-wq: allocation failed\n");
goto fail;
}
for (i = 0; i < nr_wqs; i++) {
char buf[32];
snprintf(buf, sizeof(buf), "pwq-%02d", i);
wqs[i] = create_workqueue(buf);
if (!wqs[i])
goto fail;
}
run = test_runs;
for (i = 0; i < nr_test_specs; i++) {
const struct test_spec *spec = &test_specs[i];
for (j = 0; j < spec->nr_works; j++) {
snprintf(run->name, sizeof(run->name), "%s-%d:%d@%d",
spec->workload->name, i, j, spec->wq_id);
INIT_DELAYED_WORK(&run->dwork, perf_wq_func);
init_completion(&run->done);
run->wq = wqs[spec->wq_id];
run->spec = spec->workload;
run++;
}
}
pr_info("perf-wq initialized, echo duration in seconds to "
"/sys/module/perf_wq/parameters/trigger to start test cycles\n");
return 0;
fail:
if (wqs)
for (i = 0; i < nr_wqs; i++)
if (wqs[i])
destroy_workqueue(wqs[i]);
kfree(wqs);
kfree(test_runs);
return -ENOMEM;
}
static void __exit perf_wq_exit(void)
{
int i;
for (i = 0; i < nr_wqs; i++)
destroy_workqueue(wqs[i]);
kfree(wqs);
kfree(test_runs);
}
module_init(perf_wq_init);
module_exit(perf_wq_exit);
MODULE_LICENSE("GPL");