[PATCH] [RFC] timerfd: add TFD_NOTIFY_CLOCK_SET to watch for clock changes

From: Alexander Shishkin
Date: Tue Nov 23 2010 - 12:26:10 EST


Certain userspace applications (like "clock" desktop applets or cron or
systemd) might want to be notified when some other application changes
the system time. There are several known to me reasons for this:
- avoiding periodic wakeups to poll time changes;
- rearming CLOCK_REALTIME timers when said changes happen;
- changing system timekeeping policy for system-wide time management
programs;
- keeping guest applications/operating systems running in emulators
up to date.

This is another attempt to approach notifying userspace about system
clock changes. The other one is using an eventfd and a syscall [1]. In
the course of discussing the necessity of a syscall for this kind of
notifications, it was suggested that this functionality can be achieved
via timers [2] (and timerfd in particular [3]). This idea got quite
some support [4], [5], [6] and some vague criticism [7], so I decided
to try and go a bit further with it.

[1] http://marc.info/?l=linux-kernel&m=128950389423614&w=2
[2] http://marc.info/?l=linux-kernel&m=128951020831573&w=2
[3] http://marc.info/?l=linux-kernel&m=128951588006157&w=2
[4] http://marc.info/?l=linux-kernel&m=128951503205111&w=2
[5] http://marc.info/?l=linux-kernel&m=128955890118477&w=2
[6] http://marc.info/?l=linux-kernel&m=129002967031104&w=2
[7] http://marc.info/?l=linux-kernel&m=129002672227263&w=2

Signed-off-by: Alexander Shishkin <virtuoso@xxxxxxxxx>
CC: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
CC: Alexander Viro <viro@xxxxxxxxxxxxxxxxxx>
CC: Greg Kroah-Hartman <gregkh@xxxxxxx>
CC: Feng Tang <feng.tang@xxxxxxxxx>
CC: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
CC: Michael Tokarev <mjt@xxxxxxxxxx>
CC: Marcelo Tosatti <mtosatti@xxxxxxxxxx>
CC: John Stultz <johnstul@xxxxxxxxxx>
CC: Chris Friesen <chris.friesen@xxxxxxxxxxx>
CC: Kay Sievers <kay.sievers@xxxxxxxx>
CC: Kirill A. Shutemov <kirill@xxxxxxxxxxxxx>
CC: Artem Bityutskiy <dedekind1@xxxxxxxxx>
CC: Davide Libenzi <davidel@xxxxxxxxxxxxxxx>
CC: linux-fsdevel@xxxxxxxxxxxxxxx
CC: linux-kernel@xxxxxxxxxxxxxxx
---
fs/timerfd.c | 51 +++++++++++++++++++++++++++++++++++++++++++++-
include/linux/hrtimer.h | 6 +++++
include/linux/timerfd.h | 3 +-
kernel/hrtimer.c | 3 ++
4 files changed, 60 insertions(+), 3 deletions(-)

diff --git a/fs/timerfd.c b/fs/timerfd.c
index 8c4fc14..7890815 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -30,8 +30,13 @@ struct timerfd_ctx {
u64 ticks;
int expired;
int clockid;
+ struct list_head notifiers_list;
};

+/* TFD_NOTIFY_CLOCK_SET timers go here */
+static DEFINE_SPINLOCK(notifiers_lock);
+static LIST_HEAD(notifiers_list);
+
/*
* This gets called when the timer event triggers. We set the "expired"
* flag, but we do not re-arm the timer (in case it's necessary,
@@ -51,6 +56,21 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
return HRTIMER_NORESTART;
}

+void timerfd_clock_was_set(void)
+{
+ struct timerfd_ctx *ctx;
+ unsigned long flags;
+
+ spin_lock(&notifiers_lock);
+ list_for_each_entry(ctx, &notifiers_list, notifiers_list) {
+ spin_lock_irqsave(&ctx->wqh.lock, flags);
+ ctx->ticks++;
+ wake_up_locked(&ctx->wqh);
+ spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+ }
+ spin_unlock(&notifiers_lock);
+}
+
static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
{
ktime_t remaining;
@@ -72,6 +92,12 @@ static void timerfd_setup(struct timerfd_ctx *ctx, int flags,
ctx->expired = 0;
ctx->ticks = 0;
ctx->tintv = timespec_to_ktime(ktmr->it_interval);
+
+ if (flags & TFD_NOTIFY_CLOCK_SET) {
+ list_add(&ctx->notifiers_list, &notifiers_list);
+ return;
+ }
+
hrtimer_init(&ctx->tmr, ctx->clockid, htmode);
hrtimer_set_expires(&ctx->tmr, texp);
ctx->tmr.function = timerfd_tmrproc;
@@ -83,7 +109,12 @@ static int timerfd_release(struct inode *inode, struct file *file)
{
struct timerfd_ctx *ctx = file->private_data;

- hrtimer_cancel(&ctx->tmr);
+ if (!list_empty(&ctx->notifiers_list)) {
+ spin_lock(&notifiers_lock);
+ list_del(&ctx->notifiers_list);
+ spin_unlock(&notifiers_lock);
+ } else
+ hrtimer_cancel(&ctx->tmr);
kfree(ctx);
return 0;
}
@@ -113,6 +144,7 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,

if (count < sizeof(ticks))
return -EINVAL;
+
spin_lock_irq(&ctx->wqh.lock);
if (file->f_flags & O_NONBLOCK)
res = -EAGAIN;
@@ -120,7 +152,8 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
if (ctx->ticks) {
ticks = ctx->ticks;
- if (ctx->expired && ctx->tintv.tv64) {
+ if (ctx->expired && ctx->tintv.tv64 &&
+ list_empty(&ctx->notifiers_list)) {
/*
* If tintv.tv64 != 0, this is a periodic timer that
* needs to be re-armed. We avoid doing it in the timer
@@ -218,13 +251,17 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
* it to the new values.
*/
for (;;) {
+ spin_lock(&notifiers_lock);
spin_lock_irq(&ctx->wqh.lock);
if (hrtimer_try_to_cancel(&ctx->tmr) >= 0)
break;
spin_unlock_irq(&ctx->wqh.lock);
+ spin_unlock(&notifiers_lock);
cpu_relax();
}

+ INIT_LIST_HEAD(&ctx->notifiers_list);
+
/*
* If the timer is expired and it's periodic, we need to advance it
* because the caller may want to know the previous expiration time.
@@ -243,6 +280,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
timerfd_setup(ctx, flags, &ktmr);

spin_unlock_irq(&ctx->wqh.lock);
+ spin_unlock(&notifiers_lock);
fput(file);
if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr)))
return -EFAULT;
@@ -262,6 +300,14 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
ctx = file->private_data;

spin_lock_irq(&ctx->wqh.lock);
+ if (!list_empty(&ctx->notifiers_list)) {
+ kotmr.it_value = current_kernel_time();
+ kotmr.it_interval.tv_sec = 0;
+ kotmr.it_interval.tv_nsec = 0;
+ spin_unlock_irq(&ctx->wqh.lock);
+ goto out;
+ }
+
if (ctx->expired && ctx->tintv.tv64) {
ctx->expired = 0;
ctx->ticks +=
@@ -273,6 +319,7 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
spin_unlock_irq(&ctx->wqh.lock);
fput(file);

+out:
return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0;
}

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index fd0c1b8..eb9d331 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -247,6 +247,12 @@ static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
return ktime_sub(timer->_expires, timer->base->get_time());
}

+#ifdef CONFIG_TIMERFD
+extern void timerfd_clock_was_set(void);
+#else
+static inline void timerfd_clock_was_set(void) {}
+#endif
+
#ifdef CONFIG_HIGH_RES_TIMERS
struct clock_event_device;

diff --git a/include/linux/timerfd.h b/include/linux/timerfd.h
index 2d07929..c3ddad9 100644
--- a/include/linux/timerfd.h
+++ b/include/linux/timerfd.h
@@ -19,6 +19,7 @@
* shared O_* flags.
*/
#define TFD_TIMER_ABSTIME (1 << 0)
+#define TFD_NOTIFY_CLOCK_SET (1 << 1)
#define TFD_CLOEXEC O_CLOEXEC
#define TFD_NONBLOCK O_NONBLOCK

@@ -26,6 +27,6 @@
/* Flags for timerfd_create. */
#define TFD_CREATE_FLAGS TFD_SHARED_FCNTL_FLAGS
/* Flags for timerfd_settime. */
-#define TFD_SETTIME_FLAGS TFD_TIMER_ABSTIME
+#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_NOTIFY_CLOCK_SET)

#endif /* _LINUX_TIMERFD_H */
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 72206cf..6f6403a 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -646,6 +646,9 @@ void clock_was_set(void)
{
/* Retrigger the CPU local events everywhere */
on_each_cpu(retrigger_next_event, NULL, 1);
+
+ /* Trigger timerfd notifiers */
+ timerfd_clock_was_set();
}

/*
--
1.7.2.1.45.gb66c2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/