[take27 8/8] kevent: Kevent posix timer notifications.

From: Evgeniy Polyakov
Date: Tue Dec 12 2006 - 02:04:25 EST



Kevent posix timer notifications.

Simple extensions to POSIX timers which allows
to deliver notification of the timer expiration
through kevent queue.

Example application posix_timer.c can be found
in archive on project homepage.

Signed-off-by: Evgeniy Polyakov <johnpol@xxxxxxxxxxx>


diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h
index 8786e01..3768746 100644
--- a/include/asm-generic/siginfo.h
+++ b/include/asm-generic/siginfo.h
@@ -235,6 +235,7 @@ typedef struct siginfo {
#define SIGEV_NONE 1 /* other notification: meaningless */
#define SIGEV_THREAD 2 /* deliver via thread creation */
#define SIGEV_THREAD_ID 4 /* deliver to thread */
+#define SIGEV_KEVENT 8 /* deliver through kevent queue */

/*
* This works because the alignment is ok on all current architectures
@@ -260,6 +261,8 @@ typedef struct sigevent {
void (*_function)(sigval_t);
void *_attribute; /* really pthread_attr_t */
} _sigev_thread;
+
+ int kevent_fd;
} _sigev_un;
} sigevent_t;

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index a7dd38f..4b9deb4 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -4,6 +4,7 @@
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/sched.h>
+#include <linux/kevent_storage.h>

union cpu_time_count {
cputime_t cpu;
@@ -49,6 +50,9 @@ struct k_itimer {
sigval_t it_sigev_value; /* value word of sigevent struct */
struct task_struct *it_process; /* process to send signal to */
struct sigqueue *sigq; /* signal queue entry. */
+#ifdef CONFIG_KEVENT_TIMER
+ struct kevent_storage st;
+#endif
union {
struct {
struct hrtimer timer;
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index e5ebcc1..8d0e7a3 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -48,6 +48,8 @@
#include <linux/wait.h>
#include <linux/workqueue.h>
#include <linux/module.h>
+#include <linux/kevent.h>
+#include <linux/file.h>

/*
* Management arrays for POSIX timers. Timers are kept in slab memory
@@ -224,6 +226,99 @@ static int posix_ktime_get_ts(clockid_t which_clock, struct timespec *tp)
return 0;
}

+#ifdef CONFIG_KEVENT_TIMER
+static int posix_kevent_enqueue(struct kevent *k)
+{
+ /*
+ * It is not ugly - there is no pointer in the id field union,
+ * but its size is 64bits, which is ok for any known pointer size.
+ */
+ struct k_itimer *tmr = (struct k_itimer *)(unsigned long)k->event.id.raw_u64;
+ return kevent_storage_enqueue(&tmr->st, k);
+}
+static int posix_kevent_dequeue(struct kevent *k)
+{
+ struct k_itimer *tmr = (struct k_itimer *)(unsigned long)k->event.id.raw_u64;
+ kevent_storage_dequeue(&tmr->st, k);
+ return 0;
+}
+static int posix_kevent_callback(struct kevent *k)
+{
+ return 1;
+}
+static int posix_kevent_init(void)
+{
+ struct kevent_callbacks tc = {
+ .callback = &posix_kevent_callback,
+ .enqueue = &posix_kevent_enqueue,
+ .dequeue = &posix_kevent_dequeue};
+
+ return kevent_add_callbacks(&tc, KEVENT_POSIX_TIMER);
+}
+
+extern struct file_operations kevent_user_fops;
+
+static int posix_kevent_init_timer(struct k_itimer *tmr, int fd)
+{
+ struct ukevent uk;
+ struct file *file;
+ struct kevent_user *u;
+ int err;
+
+ file = fget(fd);
+ if (!file) {
+ err = -EBADF;
+ goto err_out;
+ }
+
+ if (file->f_op != &kevent_user_fops) {
+ err = -EINVAL;
+ goto err_out_fput;
+ }
+
+ u = file->private_data;
+
+ memset(&uk, 0, sizeof(struct ukevent));
+
+ uk.event = KEVENT_MASK_ALL;
+ uk.type = KEVENT_POSIX_TIMER;
+ uk.id.raw_u64 = (unsigned long)(tmr); /* Just cast to something unique */
+ uk.req_flags = KEVENT_REQ_ONESHOT | KEVENT_REQ_ALWAYS_QUEUE;
+ uk.ptr = tmr->it_sigev_value.sival_ptr;
+
+ err = kevent_user_add_ukevent(&uk, u);
+ if (err)
+ goto err_out_fput;
+
+ fput(file);
+
+ return 0;
+
+err_out_fput:
+ fput(file);
+err_out:
+ return err;
+}
+
+static void posix_kevent_fini_timer(struct k_itimer *tmr)
+{
+ kevent_storage_fini(&tmr->st);
+}
+#else
+static int posix_kevent_init_timer(struct k_itimer *tmr, int fd)
+{
+ return -ENOSYS;
+}
+static int posix_kevent_init(void)
+{
+ return 0;
+}
+static void posix_kevent_fini_timer(struct k_itimer *tmr)
+{
+}
+#endif
+
+
/*
* Initialize everything, well, just everything in Posix clocks/timers ;)
*/
@@ -241,6 +336,11 @@ static __init int init_posix_timers(void)
register_posix_clock(CLOCK_REALTIME, &clock_realtime);
register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);

+ if (posix_kevent_init()) {
+ printk(KERN_ERR "Failed to initialize kevent posix timers.\n");
+ BUG();
+ }
+
posix_timers_cache = kmem_cache_create("posix_timers_cache",
sizeof (struct k_itimer), 0, 0, NULL, NULL);
idr_init(&posix_timers_id);
@@ -343,23 +443,27 @@ static int posix_timer_fn(struct hrtimer *timer)

timr = container_of(timer, struct k_itimer, it.real.timer);
spin_lock_irqsave(&timr->it_lock, flags);
+
+ if (timr->it_sigev_notify == SIGEV_KEVENT) {
+ kevent_storage_ready(&timr->st, NULL, KEVENT_MASK_ALL);
+ } else {
+ if (timr->it.real.interval.tv64 != 0)
+ si_private = ++timr->it_requeue_pending;

- if (timr->it.real.interval.tv64 != 0)
- si_private = ++timr->it_requeue_pending;
-
- if (posix_timer_event(timr, si_private)) {
- /*
- * signal was not sent because of sig_ignor
- * we will not get a call back to restart it AND
- * it should be restarted.
- */
- if (timr->it.real.interval.tv64 != 0) {
- timr->it_overrun +=
- hrtimer_forward(timer,
- timer->base->softirq_time,
- timr->it.real.interval);
- ret = HRTIMER_RESTART;
- ++timr->it_requeue_pending;
+ if (posix_timer_event(timr, si_private)) {
+ /*
+ * signal was not sent because of sig_ignor
+ * we will not get a call back to restart it AND
+ * it should be restarted.
+ */
+ if (timr->it.real.interval.tv64 != 0) {
+ timr->it_overrun +=
+ hrtimer_forward(timer,
+ timer->base->softirq_time,
+ timr->it.real.interval);
+ ret = HRTIMER_RESTART;
+ ++timr->it_requeue_pending;
+ }
}
}

@@ -407,6 +511,9 @@ static struct k_itimer * alloc_posix_timer(void)
kmem_cache_free(posix_timers_cache, tmr);
tmr = NULL;
}
+#ifdef CONFIG_KEVENT_TIMER
+ kevent_storage_init(tmr, &tmr->st);
+#endif
return tmr;
}

@@ -424,6 +531,7 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
if (unlikely(tmr->it_process) &&
tmr->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
put_task_struct(tmr->it_process);
+ posix_kevent_fini_timer(tmr);
kmem_cache_free(posix_timers_cache, tmr);
}

@@ -496,40 +604,52 @@ sys_timer_create(const clockid_t which_clock,
new_timer->it_sigev_signo = event.sigev_signo;
new_timer->it_sigev_value = event.sigev_value;

- read_lock(&tasklist_lock);
- if ((process = good_sigevent(&event))) {
- /*
- * We may be setting up this process for another
- * thread. It may be exiting. To catch this
- * case the we check the PF_EXITING flag. If
- * the flag is not set, the siglock will catch
- * him before it is too late (in exit_itimers).
- *
- * The exec case is a bit more invloved but easy
- * to code. If the process is in our thread
- * group (and it must be or we would not allow
- * it here) and is doing an exec, it will cause
- * us to be killed. In this case it will wait
- * for us to die which means we can finish this
- * linkage with our last gasp. I.e. no code :)
- */
+ if (event.sigev_notify == SIGEV_KEVENT) {
+ error = posix_kevent_init_timer(new_timer, event._sigev_un.kevent_fd);
+ if (error)
+ goto out;
+
+ process = current->group_leader;
spin_lock_irqsave(&process->sighand->siglock, flags);
- if (!(process->flags & PF_EXITING)) {
- new_timer->it_process = process;
- list_add(&new_timer->list,
- &process->signal->posix_timers);
- spin_unlock_irqrestore(&process->sighand->siglock, flags);
- if (new_timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
- get_task_struct(process);
- } else {
- spin_unlock_irqrestore(&process->sighand->siglock, flags);
- process = NULL;
+ new_timer->it_process = process;
+ list_add(&new_timer->list, &process->signal->posix_timers);
+ spin_unlock_irqrestore(&process->sighand->siglock, flags);
+ } else {
+ read_lock(&tasklist_lock);
+ if ((process = good_sigevent(&event))) {
+ /*
+ * We may be setting up this process for another
+ * thread. It may be exiting. To catch this
+ * case the we check the PF_EXITING flag. If
+ * the flag is not set, the siglock will catch
+ * him before it is too late (in exit_itimers).
+ *
+ * The exec case is a bit more invloved but easy
+ * to code. If the process is in our thread
+ * group (and it must be or we would not allow
+ * it here) and is doing an exec, it will cause
+ * us to be killed. In this case it will wait
+ * for us to die which means we can finish this
+ * linkage with our last gasp. I.e. no code :)
+ */
+ spin_lock_irqsave(&process->sighand->siglock, flags);
+ if (!(process->flags & PF_EXITING)) {
+ new_timer->it_process = process;
+ list_add(&new_timer->list,
+ &process->signal->posix_timers);
+ spin_unlock_irqrestore(&process->sighand->siglock, flags);
+ if (new_timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
+ get_task_struct(process);
+ } else {
+ spin_unlock_irqrestore(&process->sighand->siglock, flags);
+ process = NULL;
+ }
+ }
+ read_unlock(&tasklist_lock);
+ if (!process) {
+ error = -EINVAL;
+ goto out;
}
- }
- read_unlock(&tasklist_lock);
- if (!process) {
- error = -EINVAL;
- goto out;
}
} else {
new_timer->it_sigev_notify = SIGEV_SIGNAL;

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/