[PATCH v2] epoll: add nsec timeout support
From: Willem de Bruijn
Date: Mon Nov 16 2020 - 11:10:13 EST
From: Willem de Bruijn <willemb@xxxxxxxxxx>
Add epoll_create1 flag EPOLL_NSTIMEO. When passed, this changes the
interpretation of argument timeout in epoll_wait from msec to nsec.
Use cases such as datacenter networking operate on timescales well
below milliseconds. Shorter timeouts bounds their tail latency.
The underlying hrtimer is already programmed with nsec resolution.
Changes (v2):
- cast to s64: avoid overflow on 32-bit platforms (Shuo Chen)
- minor commit message rewording
Signed-off-by: Willem de Bruijn <willemb@xxxxxxxxxx>
---
Applies cleanly both to 5.10-rc4 and next-20201116.
In next, nstimeout no longer fills padding with new field refs.
Selftest for now at github. Can follow-up for kselftests.
https://github.com/wdebruij/kerneltools/blob/master/tests/epoll_nstimeo.c
---
fs/eventpoll.c | 26 +++++++++++++++++++-------
include/uapi/linux/eventpoll.h | 1 +
2 files changed, 20 insertions(+), 7 deletions(-)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 4df61129566d..817d9cc5b8b8 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -225,6 +225,9 @@ struct eventpoll {
unsigned int napi_id;
#endif
+ /* Accept timeout in ns resolution (EPOLL_NSTIMEO) */
+ unsigned int nstimeout:1;
+
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/* tracks wakeup nests for lockdep validation */
u8 nests;
@@ -1787,17 +1790,20 @@ static int ep_send_events(struct eventpoll *ep,
return esed.res;
}
-static inline struct timespec64 ep_set_mstimeout(long ms)
+static inline struct timespec64 ep_set_nstimeout(s64 ns)
{
- struct timespec64 now, ts = {
- .tv_sec = ms / MSEC_PER_SEC,
- .tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC),
- };
+ struct timespec64 now, ts;
+ ts = ns_to_timespec64(ns);
ktime_get_ts64(&now);
return timespec64_add_safe(now, ts);
}
+static inline struct timespec64 ep_set_mstimeout(long ms)
+{
+ return ep_set_nstimeout(ms * (s64)NSEC_PER_MSEC);
+}
+
/**
* ep_poll - Retrieves ready events, and delivers them to the caller supplied
* event buffer.
@@ -1826,7 +1832,10 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
lockdep_assert_irqs_enabled();
if (timeout > 0) {
- struct timespec64 end_time = ep_set_mstimeout(timeout);
+ struct timespec64 end_time;
+
+ end_time = ep->nstimeout ? ep_set_nstimeout(timeout) :
+ ep_set_mstimeout(timeout);
slack = select_estimate_accuracy(&end_time);
to = &expires;
@@ -2046,7 +2055,7 @@ static int do_epoll_create(int flags)
/* Check the EPOLL_* constant for consistency. */
BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
- if (flags & ~EPOLL_CLOEXEC)
+ if (flags & ~(EPOLL_CLOEXEC | EPOLL_NSTIMEO))
return -EINVAL;
/*
* Create the internal data structure ("struct eventpoll").
@@ -2054,6 +2063,9 @@ static int do_epoll_create(int flags)
error = ep_alloc(&ep);
if (error < 0)
return error;
+
+ ep->nstimeout = !!(flags & EPOLL_NSTIMEO);
+
/*
* Creates all the items needed to setup an eventpoll file. That is,
* a file structure and a free file descriptor.
diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h
index 8a3432d0f0dc..f6ef9c9f8ac2 100644
--- a/include/uapi/linux/eventpoll.h
+++ b/include/uapi/linux/eventpoll.h
@@ -21,6 +21,7 @@
/* Flags for epoll_create1. */
#define EPOLL_CLOEXEC O_CLOEXEC
+#define EPOLL_NSTIMEO 0x1
/* Valid opcodes to issue to sys_epoll_ctl() */
#define EPOLL_CTL_ADD 1
--
2.29.2.299.gdc1121823c-goog