Re: [dm-devel] dm-writeboost testing

From: Mikulas Patocka
Date: Sat Oct 05 2013 - 20:47:34 EST




On Sat, 5 Oct 2013, Akira Hayakawa wrote:

> Mikulas,
>
> > nvidia binary driver, but it may happen in other parts of the kernel too.
> > The fact that it works in your setup doesn't mean that it is correct.
> You are right. I am convinced.
>
> As far as I looked around the kernel code,
> it seems to be choosing kthread when one needs looping in background.
> There are other examples such as loop_thread in drivers/block/loop.c .
>
> And done. Please git pull.
> commit b6d2d3892e09145cd0b9c5ad69ea2d8c410fdeab is tested in my environment.
> All the looping daemons are running on kthread now.
>
> The diff between the older version (with wq) and the new version (with kthread)
> is shown below. I defined fancy CREATE_DAEMON macro.
>
> Another by-product is that
> you are no longer in need to wait for long time in dmsetup remove
> since kthread_stop() forcefully wakes the thread up.

The change seems ok. Please, also move this piece of code in flush_proc
out of the spinlock:
if (kthread_should_stop())
return 0;

It caused the workqueue warning I reported before and still causes warning
with kthreads:
note: flush_daemon[5145] exited with preempt_count 1

I will send you next email with more bugs that I found in your code.

Mikulas

> diff --git a/Driver/dm-writeboost-daemon.c b/Driver/dm-writeboost-daemon.c
> index 6090661..65974e2 100644
> --- a/Driver/dm-writeboost-daemon.c
> +++ b/Driver/dm-writeboost-daemon.c
> @@ -10,12 +10,11 @@
>
> /*----------------------------------------------------------------*/
>
> -void flush_proc(struct work_struct *work)
> +int flush_proc(void *data)
> {
> unsigned long flags;
>
> - struct wb_cache *cache =
> - container_of(work, struct wb_cache, flush_work);
> + struct wb_cache *cache = data;
>
> while (true) {
> struct flush_job *job;
> @@ -32,8 +31,12 @@ void flush_proc(struct work_struct *work)
> msecs_to_jiffies(100));
> spin_lock_irqsave(&cache->flush_queue_lock, flags);
>
> - if (cache->on_terminate)
> - return;
> + /*
> + * flush daemon can exit
> + * only if no flush job is queued.
> + */
> + if (kthread_should_stop())
> + return 0;
> }
>
> /*
> @@ -84,6 +87,7 @@ void flush_proc(struct work_struct *work)
>
> kfree(job);
> }
> + return 0;
> }
>
> /*----------------------------------------------------------------*/
> @@ -353,19 +357,15 @@ migrate_write:
> }
> }
>
> -void migrate_proc(struct work_struct *work)
> +int migrate_proc(void *data)
> {
> - struct wb_cache *cache =
> - container_of(work, struct wb_cache, migrate_work);
> + struct wb_cache *cache = data;
>
> - while (true) {
> + while (!kthread_should_stop()) {
> bool allow_migrate;
> size_t i, nr_mig_candidates, nr_mig, nr_max_batch;
> struct segment_header *seg, *tmp;
>
> - if (cache->on_terminate)
> - return;
> -
> /*
> * If reserving_id > 0
> * Migration should be immediate.
> @@ -430,6 +430,7 @@ void migrate_proc(struct work_struct *work)
> list_del(&seg->migrate_list);
> }
> }
> + return 0;
> }
>
> /*
> @@ -453,19 +454,16 @@ void wait_for_migration(struct wb_cache *cache, u64 id)
>
> /*----------------------------------------------------------------*/
>
> -void modulator_proc(struct work_struct *work)
> +int modulator_proc(void *data)
> {
> - struct wb_cache *cache =
> - container_of(work, struct wb_cache, modulator_work);
> + struct wb_cache *cache = data;
> struct wb_device *wb = cache->wb;
>
> struct hd_struct *hd = wb->device->bdev->bd_part;
> unsigned long old = 0, new, util;
> unsigned long intvl = 1000;
>
> - while (true) {
> - if (cache->on_terminate)
> - return;
> + while (!kthread_should_stop()) {
>
> new = jiffies_to_msecs(part_stat_read(hd, io_ticks));
>
> @@ -484,6 +482,7 @@ modulator_update:
>
> schedule_timeout_interruptible(msecs_to_jiffies(intvl));
> }
> + return 0;
> }
>
> /*----------------------------------------------------------------*/
> @@ -517,16 +516,12 @@ static void update_superblock_record(struct wb_cache *cache)
> kfree(buf);
> }
>
> -void recorder_proc(struct work_struct *work)
> +int recorder_proc(void *data)
> {
> - struct wb_cache *cache =
> - container_of(work, struct wb_cache, recorder_work);
> + struct wb_cache *cache = data;
> unsigned long intvl;
>
> - while (true) {
> - if (cache->on_terminate)
> - return;
> -
> + while (!kthread_should_stop()) {
> /* sec -> ms */
> intvl = cache->update_record_interval * 1000;
>
> @@ -539,20 +534,17 @@ void recorder_proc(struct work_struct *work)
>
> schedule_timeout_interruptible(msecs_to_jiffies(intvl));
> }
> + return 0;
> }
>
> /*----------------------------------------------------------------*/
>
> -void sync_proc(struct work_struct *work)
> +int sync_proc(void *data)
> {
> - struct wb_cache *cache =
> - container_of(work, struct wb_cache, sync_work);
> + struct wb_cache *cache = data;
> unsigned long intvl;
>
> - while (true) {
> - if (cache->on_terminate)
> - return;
> -
> + while (!kthread_should_stop()) {
> /* sec -> ms */
> intvl = cache->sync_interval * 1000;
>
> @@ -566,4 +558,5 @@ void sync_proc(struct work_struct *work)
>
> schedule_timeout_interruptible(msecs_to_jiffies(intvl));
> }
> + return 0;
> }
> diff --git a/Driver/dm-writeboost-daemon.h b/Driver/dm-writeboost-daemon.h
> index 3bdd862..d21d66c 100644
> --- a/Driver/dm-writeboost-daemon.h
> +++ b/Driver/dm-writeboost-daemon.h
> @@ -9,7 +9,7 @@
>
> /*----------------------------------------------------------------*/
>
> -void flush_proc(struct work_struct *);
> +int flush_proc(void *);
>
> /*----------------------------------------------------------------*/
>
> @@ -19,20 +19,20 @@ void flush_barrier_ios(struct work_struct *);
>
> /*----------------------------------------------------------------*/
>
> -void migrate_proc(struct work_struct *);
> +int migrate_proc(void *);
> void wait_for_migration(struct wb_cache *, u64 id);
>
> /*----------------------------------------------------------------*/
>
> -void modulator_proc(struct work_struct *);
> +int modulator_proc(void *);
>
> /*----------------------------------------------------------------*/
>
> -void sync_proc(struct work_struct *);
> +int sync_proc(void *);
>
> /*----------------------------------------------------------------*/
>
> -void recorder_proc(struct work_struct *);
> +int recorder_proc(void *);
>
> /*----------------------------------------------------------------*/
>
> diff --git a/Driver/dm-writeboost-metadata.c b/Driver/dm-writeboost-metadata.c
> index 1cd9e0c..4f5fa5e 100644
> --- a/Driver/dm-writeboost-metadata.c
> +++ b/Driver/dm-writeboost-metadata.c
> @@ -994,6 +994,17 @@ void free_migration_buffer(struct wb_cache *cache)
>
> /*----------------------------------------------------------------*/
>
> +#define CREATE_DAEMON(name)\
> + cache->name##_daemon = kthread_create(name##_proc, cache,\
> + #name "_daemon");\
> + if (IS_ERR(cache->name##_daemon)) {\
> + r = PTR_ERR(cache->name##_daemon);\
> + cache->name##_daemon = NULL;\
> + WBERR("couldn't spawn" #name "daemon");\
> + goto bad_##name##_daemon;\
> + }\
> + wake_up_process(cache->name##_daemon);
> +
> int __must_check resume_cache(struct wb_cache *cache, struct dm_dev *dev)
> {
> int r = 0;
> @@ -1010,8 +1021,6 @@ int __must_check resume_cache(struct wb_cache *cache, struct dm_dev *dev)
>
> mutex_init(&cache->io_lock);
>
> - cache->on_terminate = false;
> -
> /*
> * (i) Harmless Initializations
> */
> @@ -1042,12 +1051,6 @@ int __must_check resume_cache(struct wb_cache *cache, struct dm_dev *dev)
> * Recovering the cache metadata
> * prerequires the migration daemon working.
> */
> - cache->migrate_wq = create_singlethread_workqueue("migratewq");
> - if (!cache->migrate_wq) {
> - r = -ENOMEM;
> - WBERR();
> - goto bad_migratewq;
> - }
>
> /* Migration Daemon */
> atomic_set(&cache->migrate_fail_count, 0);
> @@ -1070,9 +1073,7 @@ int __must_check resume_cache(struct wb_cache *cache, struct dm_dev *dev)
>
> cache->allow_migrate = true;
> cache->reserving_segment_id = 0;
> - INIT_WORK(&cache->migrate_work, migrate_proc);
> - queue_work(cache->migrate_wq, &cache->migrate_work);
> -
> + CREATE_DAEMON(migrate);
>
> r = recover_cache(cache);
> if (r) {
> @@ -1086,19 +1087,12 @@ int __must_check resume_cache(struct wb_cache *cache, struct dm_dev *dev)
> * These are only working
> * after the logical device created.
> */
> - cache->flush_wq = create_singlethread_workqueue("flushwq");
> - if (!cache->flush_wq) {
> - r = -ENOMEM;
> - WBERR();
> - goto bad_flushwq;
> - }
>
> /* Flush Daemon */
> - INIT_WORK(&cache->flush_work, flush_proc);
> spin_lock_init(&cache->flush_queue_lock);
> INIT_LIST_HEAD(&cache->flush_queue);
> init_waitqueue_head(&cache->flush_wait_queue);
> - queue_work(cache->flush_wq, &cache->flush_work);
> + CREATE_DAEMON(flush);
>
> /* Deferred ACK for barrier writes */
>
> @@ -1118,29 +1112,30 @@ int __must_check resume_cache(struct wb_cache *cache, struct dm_dev *dev)
>
> /* Migartion Modulator */
> cache->enable_migration_modulator = true;
> - INIT_WORK(&cache->modulator_work, modulator_proc);
> - schedule_work(&cache->modulator_work);
> + CREATE_DAEMON(modulator);
>
> /* Superblock Recorder */
> cache->update_record_interval = 60;
> - INIT_WORK(&cache->recorder_work, recorder_proc);
> - schedule_work(&cache->recorder_work);
> + CREATE_DAEMON(recorder);
>
> /* Dirty Synchronizer */
> cache->sync_interval = 60;
> - INIT_WORK(&cache->sync_work, sync_proc);
> - schedule_work(&cache->sync_work);
> + CREATE_DAEMON(sync);
>
> return 0;
>
> -bad_flushwq:
> +bad_sync_daemon:
> + kthread_stop(cache->recorder_daemon);
> +bad_recorder_daemon:
> + kthread_stop(cache->modulator_daemon);
> +bad_modulator_daemon:
> + kthread_stop(cache->flush_daemon);
> +bad_flush_daemon:
> bad_recover:
> - cache->on_terminate = true;
> - cancel_work_sync(&cache->migrate_work);
> + kthread_stop(cache->migrate_daemon);
> +bad_migrate_daemon:
> free_migration_buffer(cache);
> bad_alloc_migrate_buffer:
> - destroy_workqueue(cache->migrate_wq);
> -bad_migratewq:
> free_ht(cache);
> bad_alloc_ht:
> free_segment_header_array(cache);
> @@ -1153,20 +1148,21 @@ bad_init_rambuf_pool:
>
> void free_cache(struct wb_cache *cache)
> {
> - cache->on_terminate = true;
> + /*
> + * Must clean up all the volatile data
> + * before termination.
> + */
> + flush_current_buffer(cache);
>
> - /* Kill in-kernel daemons */
> - cancel_work_sync(&cache->sync_work);
> - cancel_work_sync(&cache->recorder_work);
> - cancel_work_sync(&cache->modulator_work);
> + kthread_stop(cache->sync_daemon);
> + kthread_stop(cache->recorder_daemon);
> + kthread_stop(cache->modulator_daemon);
>
> - cancel_work_sync(&cache->flush_work);
> - destroy_workqueue(cache->flush_wq);
> + kthread_stop(cache->flush_daemon);
>
> cancel_work_sync(&cache->barrier_deadline_work);
>
> - cancel_work_sync(&cache->migrate_work);
> - destroy_workqueue(cache->migrate_wq);
> + kthread_stop(cache->migrate_daemon);
> free_migration_buffer(cache);
>
> /* Destroy in-core structures */
> diff --git a/Driver/dm-writeboost-target.c b/Driver/dm-writeboost-target.c
> index 6aa4c7c..4b5b7aa 100644
> --- a/Driver/dm-writeboost-target.c
> +++ b/Driver/dm-writeboost-target.c
> @@ -973,12 +973,6 @@ static void writeboost_dtr(struct dm_target *ti)
> struct wb_device *wb = ti->private;
> struct wb_cache *cache = wb->cache;
>
> - /*
> - * Synchronize all the dirty writes
> - * before termination.
> - */
> - cache->sync_interval = 1;
> -
> free_cache(cache);
> kfree(cache);
>
> diff --git a/Driver/dm-writeboost.h b/Driver/dm-writeboost.h
> index 74ff194..092c100 100644
> --- a/Driver/dm-writeboost.h
> +++ b/Driver/dm-writeboost.h
> @@ -16,6 +16,7 @@
> #include <linux/list.h>
> #include <linux/slab.h>
> #include <linux/mutex.h>
> +#include <linux/kthread.h>
> #include <linux/sched.h>
> #include <linux/timer.h>
> #include <linux/device-mapper.h>
> @@ -265,8 +266,7 @@ struct wb_cache {
> * and flush daemon asynchronously
> * flush them to the cache device.
> */
> - struct work_struct flush_work;
> - struct workqueue_struct *flush_wq;
> + struct task_struct *flush_daemon;
> spinlock_t flush_queue_lock;
> struct list_head flush_queue;
> wait_queue_head_t flush_wait_queue;
> @@ -288,8 +288,7 @@ struct wb_cache {
> * migrate daemon goes into migration
> * if they are segments to migrate.
> */
> - struct work_struct migrate_work;
> - struct workqueue_struct *migrate_wq;
> + struct task_struct *migrate_daemon;
> bool allow_migrate; /* param */
>
> /*
> @@ -314,7 +313,7 @@ struct wb_cache {
> * the migration
> * according to the load of backing store.
> */
> - struct work_struct modulator_work;
> + struct task_struct *modulator_daemon;
> bool enable_migration_modulator; /* param */
>
> /*
> @@ -323,7 +322,7 @@ struct wb_cache {
> * Update the superblock record
> * periodically.
> */
> - struct work_struct recorder_work;
> + struct task_struct *recorder_daemon;
> unsigned long update_record_interval; /* param */
>
> /*
> @@ -332,16 +331,9 @@ struct wb_cache {
> * Sync the dirty writes
> * periodically.
> */
> - struct work_struct sync_work;
> + struct task_struct *sync_daemon;
> unsigned long sync_interval; /* param */
>
> - /*
> - * on_terminate is true
> - * to notify all the background daemons to
> - * stop their operations.
> - */
> - bool on_terminate;
> -
> atomic64_t stat[STATLEN];
> };
>
> Akira
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/