[RFC 03/15] kernel: time: Add macros and functions to support 64 bit time

From: Deepa Dinamani
Date: Thu Jan 07 2016 - 00:39:40 EST


The current_fs_time function is not y2038 safe because
of the use of struct timespec.

The macros CURRENT_TIME and CURRENT_TIME_SEC do not represent
file system times correctly as they cannnot perform range checks
or truncations. These are also not y2038 safe. Add 64 bit versions
of the above macros.

Provide a new set of FS_TIME macros which will return time in
timespec or timespec64 based on CONFIG_FS_USES_64_BIT_TIME.
These are meant to be used only within file systems because of
being tied to the above config. Once the config is enabled,
the timespec version of it can be deleted and the 64 bit time version
can be used elsewhere also.

Add struct timespec64 version for current_fs_time().
Current version of current_fs_time() can be dropped after enabling
CONFIG_FS_USES_64BIT_TIME.

Provide an alternative to timespec_trunc(): fs_time_trunc().
This function takes super block as an argument in addition to
timestamp so that it can include range and precision checks.
Additionally, the function uses y2038 safe timespec64 instead of
timespec for timestamp representation.

Add function: current_fs_time_sec() to obtain only the seconds
portion of the current time(Equivalent to CURRENT_TIME_SEC).
This function has two versions selected by the config
CONFIG_FS_USES_64BIT_TIME.
The 32 bit version support can be dropped after the above config
is enabled globally.

All calls to timespec_trunc() will be eventually replaced by
fs_time_trunc(). At which point, timespec_trunc() can be deleted.

All the above function calls use fs_time_range_check() to clamp
the timestamps.

Inodes that are saved in memory and on disk always have valid
timestamps. But, the accessors can detect a clamped timestamp
while saving the timestamps into inodes.
The clamped timestamp handling is split into two seperate cases:
a. Mounting a fs that has exceeded it's current timestamp needs.
b. A mounted fs exceeds timestamps needs.
Both the above cases are handled using seperate callbacks:
superblock bad_timestamp_mount and bad_timestamp operations.

Motivation for the above callbacks being that the Linux kernel does
not internally use timestamps and it cannot decide how catastrophic
these timestamp clamps can be for the on disk file system or
user space applications that use it.

Signed-off-by: Deepa Dinamani <deepa.kernel@xxxxxxxxx>
---
fs/libfs.c | 5 ++
fs/super.c | 10 ++++
include/linux/fs.h | 50 ++++++++++++++--
include/linux/time64.h | 4 ++
kernel/time/time.c | 156 +++++++++++++++++++++++++++++++++++++++++--------
5 files changed, 196 insertions(+), 29 deletions(-)

diff --git a/fs/libfs.c b/fs/libfs.c
index 8dc37fc..4fa2002 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -227,6 +227,9 @@ struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name,
s->s_magic = magic;
s->s_op = ops ? ops : &simple_super_operations;
s->s_time_gran = 1;
+ s->s_time_min = FS_DEFAULT_MIN_TIMESTAMP;
+ s->s_time_max = FS_DEFAULT_MAX_TIMESTAMP;
+
root = new_inode(s);
if (!root)
goto Enomem;
@@ -482,6 +485,8 @@ int simple_fill_super(struct super_block *s, unsigned long magic,
s->s_magic = magic;
s->s_op = &simple_super_operations;
s->s_time_gran = 1;
+ s->s_time_min = FS_DEFAULT_MIN_TIMESTAMP;
+ s->s_time_max = FS_DEFAULT_MAX_TIMESTAMP;

inode = new_inode(s);
if (!inode)
diff --git a/fs/super.c b/fs/super.c
index 7ea56de..3f53def 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -239,6 +239,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
s->s_maxbytes = MAX_NON_LFS;
s->s_op = &default_op;
s->s_time_gran = 1000000000;
+ s->s_time_min = FS_DEFAULT_MIN_TIMESTAMP;
+ s->s_time_max = FS_DEFAULT_MAX_TIMESTAMP;
s->cleancache_poolid = CLEANCACHE_NO_POOL;

s->s_shrink.seeks = DEFAULT_SEEKS;
@@ -1143,6 +1145,14 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
"negative value (%lld)\n", type->name, sb->s_maxbytes);

+ /* check timestamp range */
+ if (unlikely(is_fs_timestamp_bad(current_fs_time(sb))) &&
+ (sb->s_op->bad_timestamp_mount)) {
+ error = sb->s_op->bad_timestamp_mount(sb);
+ if (error)
+ goto out_sb;
+ }
+
up_write(&sb->s_umount);
free_secdata(secdata);
return root;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b9f3cee..5112bc2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -693,8 +693,14 @@ struct inode {
#define VFS_INODE_SET_XTIME(xtime, inode, ts64) \
do { \
struct inode_timespec __ts = (ts64); \
+ struct super_block *__sb = inode->i_sb; \
(inode)->xtime##_sec = __ts.tv_sec; \
(inode)->xtime##_nsec = __ts.tv_nsec; \
+ if (unlikely(is_fs_timestamp_bad(ts64))) { \
+ (inode)->xtime##_nsec = 0; \
+ if (__sb->s_op->bad_timestamp) \
+ __sb->s_op->bad_timestamp(__sb); \
+ } \
} while (0)

#define VFS_INODE_GET_XTIME(xtime, inode) \
@@ -703,8 +709,16 @@ struct inode {

#else

-#define VFS_INODE_SET_XTIME(xtime, inode, ts) \
- ((inode)->xtime = (ts))
+#define VFS_INODE_SET_XTIME(xtime, inode, ts) \
+ do { \
+ struct super_block *__sb = inode->i_sb; \
+ (inode)->xtime = (ts); \
+ if (unlikely(is_fs_timestamp_bad((inode)->xtime))) { \
+ (inode)->xtime.tv_nsec = 0; \
+ if (__sb->s_op->bad_timestamp) \
+ __sb->s_op->bad_timestamp(__sb); \
+ } \
+ } while (0)

#define VFS_INODE_GET_XTIME(xtime, inode) \
((inode)->xtime)
@@ -1355,6 +1369,9 @@ struct super_block {
unsigned int s_max_links;
fmode_t s_mode;

+ /* Max and min values of c/m/atime in UNIX time. */
+ time64_t s_time_max;
+ time64_t s_time_min;
/* Granularity of c/m/atime in ns.
Cannot be worse than a second */
u32 s_time_gran;
@@ -1416,7 +1433,26 @@ struct super_block {
struct list_head s_inodes; /* all inodes */
};

-extern struct timespec current_fs_time(struct super_block *sb);
+/* Temporary macros to be used within fs code for current times.
+ * To aid moving all of fs code to timespec64.
+ */
+#ifdef CONFIG_FS_USES_64BIT_TIME
+
+#define FS_TIME CURRENT_TIME64
+#define FS_TIME_SEC CURRENT_TIME64_SEC
+
+#else
+
+#define FS_TIME CURRENT_TIME
+#define FS_TIME_SEC CURRENT_TIME_SEC
+
+#endif
+
+extern int is_fs_timestamp_bad(struct inode_timespec ts);
+extern struct inode_timespec current_fs_time(struct super_block *sb);
+extern struct inode_timespec current_fs_time_sec(struct super_block *sb);
+extern struct inode_timespec
+fs_time_trunc(struct inode_timespec ts, struct super_block *sb);

/*
* Snapshotting support.
@@ -1635,6 +1671,11 @@ struct block_device_operations;
#define NOMMU_VMFLAGS \
(NOMMU_MAP_READ | NOMMU_MAP_WRITE | NOMMU_MAP_EXEC)

+#define FS_TIMESTAMP_NSEC_NOT_VALID INT_MAX
+/* Max timestamp is set to (2038-01-19 03:14:07 UTC) */
+#define FS_DEFAULT_MAX_TIMESTAMP INT_MAX
+/* Min timestamp is set to Epoch (1970-01-01 UTC). */
+#define FS_DEFAULT_MIN_TIMESTAMP 0

struct iov_iter;

@@ -1732,8 +1773,9 @@ extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,

struct super_operations {
struct inode *(*alloc_inode)(struct super_block *sb);
+ int (*bad_timestamp_mount)(struct super_block *);
+ void (*bad_timestamp)(struct super_block *);
void (*destroy_inode)(struct inode *);
-
void (*dirty_inode) (struct inode *, int flags);
int (*write_inode) (struct inode *, struct writeback_control *wbc);
int (*drop_inode) (struct inode *);
diff --git a/include/linux/time64.h b/include/linux/time64.h
index be98201..eb3cdc0 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -47,6 +47,10 @@ struct itimerspec64 {

#endif

+#define CURRENT_TIME64 (current_kernel_time64())
+#define CURRENT_TIME64_SEC \
+ ((struct timespec64) { ktime_get_real_seconds(), 0 })
+
/* Parameters used to convert the timespec values: */
#define MSEC_PER_SEC 1000L
#define USEC_PER_MSEC 1000L
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 86751c6..24ca258 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -230,6 +230,103 @@ SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p)
return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
}

+/* fs_time_range_check:
+ * Function to check if a given timestamp is in the range allowed for a
+ * filesystem.
+ * Assume input timespec is normalized.
+ * Clamp it to max or min value allowed for seconds, whenever values are
+ * out of range.
+ * Also set ts->nsec value to FS_TIMESTAMP_NSEC_NOT_VALID if clamped.
+ * nsec is set to 0 if not in allowed range.
+ */
+static void
+fs_time_range_check(struct super_block *sb, struct inode_timespec *ts)
+{
+ if (unlikely(sb->s_time_max < ts->tv_sec ||
+ sb->s_time_min > ts->tv_sec)) {
+ ts->tv_sec = clamp_val(ts->tv_sec, sb->s_time_min, sb->s_time_max);
+ ts->tv_nsec = FS_TIMESTAMP_NSEC_NOT_VALID;
+ return;
+ }
+
+ if(unlikely(ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC))
+ ts->tv_nsec = 0;
+}
+
+/* returns -1 if timestamp is bad/ clamped according to
+ * fs_time_range_check.
+ * returns 0 otherwise.
+ */
+int is_fs_timestamp_bad(struct inode_timespec ts)
+{
+ if (ts.tv_nsec == FS_TIMESTAMP_NSEC_NOT_VALID)
+ return -1;
+
+ return 0;
+}
+EXPORT_SYMBOL(is_fs_timestamp_bad);
+
+/*
+ * fs_time_trunc - Truncate inode_timespec to a granularity
+ * @t: inode_timespec
+ * @sb: Super block.
+ *
+ * Truncate a timespec to a granularity. Always rounds down. Granularity
+ * must * not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns).
+ * Returns 1 on error, 0 otherwise.
+ */
+struct inode_timespec
+fs_time_trunc(struct inode_timespec t, struct super_block *sb)
+{
+ u32 gran = sb->s_time_gran;
+
+ /* range check for time. */
+ fs_time_range_check(sb, &t);
+ if (unlikely(is_fs_timestamp_bad(t)))
+ return t;
+
+ /* Avoid division in the common cases 1 ns and 1 s. */
+ if (gran == 1)
+ ;/* nothing */
+ else if (gran == NSEC_PER_SEC)
+ t.tv_nsec = 0;
+ else if (gran > 1 && gran < NSEC_PER_SEC)
+ t.tv_nsec -= t.tv_nsec % gran;
+ else
+ WARN(1, "illegal file time granularity: %u", gran);
+
+ return t;
+}
+EXPORT_SYMBOL(fs_time_trunc);
+
+/**
+ * timespec_trunc - Truncate timespec to a granularity
+ * @t: Timespec
+ * @gran: Granularity in ns.
+ *
+ * Truncate a timespec to a granularity. Always rounds down. gran must
+ * not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns).
+ *
+ * This function is deprecated and should no longer be used for filesystems.
+ * fs_time_trunc should be used instead.
+ */
+struct timespec timespec_trunc(struct timespec t, unsigned gran)
+{
+
+ /* Avoid division in the common cases 1 ns and 1 s. */
+ if (gran == 1) {
+ /* nothing */
+ } else if (gran == NSEC_PER_SEC) {
+ t.tv_nsec = 0;
+ } else if (gran > 1 && gran < NSEC_PER_SEC) {
+ t.tv_nsec -= t.tv_nsec % gran;
+ } else {
+ WARN(1, "illegal file time granularity: %u", gran);
+ }
+ return t;
+}
+EXPORT_SYMBOL(timespec_trunc);
+
/**
* current_fs_time - Return FS time
* @sb: Superblock.
@@ -237,13 +334,46 @@ SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p)
* Return the current time truncated to the time granularity supported by
* the fs.
*/
+#ifdef CONFIG_FS_USES_64BIT_TIME
+struct timespec64 current_fs_time(struct super_block *sb)
+{
+ struct timespec64 now = current_kernel_time64();
+
+ return fs_time_trunc(now, sb);
+}
+EXPORT_SYMBOL(current_fs_time);
+
+struct timespec64 current_fs_time_sec(struct super_block *sb)
+{
+ struct timespec64 ts = {ktime_get_real_seconds(), 0};
+
+ /* range check for time. */
+ fs_time_range_check(sb, &ts);
+
+ return ts;
+}
+EXPORT_SYMBOL(current_fs_time_sec);
+#else
struct timespec current_fs_time(struct super_block *sb)
{
struct timespec now = current_kernel_time();
- return timespec_trunc(now, sb->s_time_gran);
+
+ return fs_time_trunc(now, sb);
}
EXPORT_SYMBOL(current_fs_time);

+struct timespec current_fs_time_sec(struct super_block *sb)
+{
+ struct timespec ts = { get_seconds(), 0 };
+
+ /* range check for time. */
+ fs_time_range_check(sb, &ts);
+
+ return ts;
+}
+EXPORT_SYMBOL(current_fs_time_sec);
+#endif
+
/*
* Convert jiffies to milliseconds and back.
*
@@ -286,30 +416,6 @@ unsigned int jiffies_to_usecs(const unsigned long j)
}
EXPORT_SYMBOL(jiffies_to_usecs);

-/**
- * timespec_trunc - Truncate timespec to a granularity
- * @t: Timespec
- * @gran: Granularity in ns.
- *
- * Truncate a timespec to a granularity. Always rounds down. gran must
- * not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns).
- */
-struct timespec timespec_trunc(struct timespec t, unsigned gran)
-{
- /* Avoid division in the common cases 1 ns and 1 s. */
- if (gran == 1) {
- /* nothing */
- } else if (gran == NSEC_PER_SEC) {
- t.tv_nsec = 0;
- } else if (gran > 1 && gran < NSEC_PER_SEC) {
- t.tv_nsec -= t.tv_nsec % gran;
- } else {
- WARN(1, "illegal file time granularity: %u", gran);
- }
- return t;
-}
-EXPORT_SYMBOL(timespec_trunc);
-
/*
* mktime64 - Converts date to seconds.
* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
--
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/