[PATCH v3 2/3] fs/proc: apply the time namespace offset to /proc/stat btime

From: Michael Weiß
Date: Sun Oct 11 2020 - 11:00:57 EST


'/proc/stat' provides the field 'btime' which states the time stamp of
system boot in seconds. In case of time namespaces, the offset to the
boot time stamp was not applied earlier. However, in container
runtimes which utilize time namespaces to virtualize boottime of a
container, this leaks information about the host system boot time.

Therefore, we make procfs to virtualize also the btime field by
subtracting the offset of the timens boottime from 'btime' before
printing the stats.

Since start_boottime of processes are seconds since boottime and the
boottime stamp is now shifted according to the timens offset, the
offset of the time namespace also needs to be applied before the
process stats are given to userspace.

This avoids that processes shown, e.g., by 'ps' appear as time
travelers in the corresponding time namespace.

Signed-off-by: Michael Weiß <michael.weiss@xxxxxxxxxxxxxxxxxxx>
---
fs/proc/array.c | 6 ++++--
fs/proc/stat.c | 17 ++++++++++++++++-
2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 65ec2029fa80..277f654f289e 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -56,6 +56,7 @@
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/time.h>
+#include <linux/time_namespace.h>
#include <linux/kernel.h>
#include <linux/kernel_stat.h>
#include <linux/tty.h>
@@ -533,8 +534,9 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
priority = task_prio(task);
nice = task_nice(task);

- /* convert nsec -> ticks */
- start_time = nsec_to_clock_t(task->start_boottime);
+ /* apply timens offset for boottime and convert nsec -> ticks */
+ start_time =
+ nsec_to_clock_t(timens_add_boottime_ns(task->start_boottime));

seq_put_decimal_ull(m, "", pid_nr_ns(pid, ns));
seq_puts(m, " (");
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 46b3293015fe..5ae59297591a 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -10,6 +10,7 @@
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/time.h>
+#include <linux/time_namespace.h>
#include <linux/irqnr.h>
#include <linux/sched/cputime.h>
#include <linux/tick.h>
@@ -79,6 +80,20 @@ static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu)

#endif

+static void get_boottime(struct timespec64 *ts)
+{
+ ktime_t boottime;
+
+ /* get kernel internal system boot timestamp */
+ getboottime64(ts);
+
+ /* shift boot timestamp according to the timens offset */
+ boottime = timespec64_to_ktime(*ts);
+ boottime = timens_ktime_to_host(CLOCK_BOOTTIME, boottime);
+
+ *ts = ktime_to_timespec64(boottime);
+}
+
static void show_irq_gap(struct seq_file *p, unsigned int gap)
{
static const char zeros[] = " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0";
@@ -117,7 +132,7 @@ static int show_stat(struct seq_file *p, void *v)
user = nice = system = idle = iowait =
irq = softirq = steal = 0;
guest = guest_nice = 0;
- getboottime64(&boottime);
+ get_boottime(&boottime);

for_each_possible_cpu(i) {
struct kernel_cpustat kcpustat;
--
2.20.1