[RFC] [PATCH] Add a "nolinks" mount option.

From: Mattias Nissler
Date: Fri Oct 14 2016 - 10:29:06 EST


For mounts that have the new "nolinks" option, don't follow symlinks
and reject to open files with a hard link count larger than one. The
new option is similar in spirit to the existing "nodev", "noexec", and
"nosuid" options.

Note that symlinks and hard links may still be created on mounts where
the "nolinks" option is present. readlink() remains functional, so
user space code that is aware of symlinks can still choose to follow
them explicitly. Similarly, hard-linked files can be identified from
userspace using stat() output while the "nolinks" option is set.

Setting the "nolinks" mount option helps prevent privileged writers
from modifying files unintentionally in case there is an unexpected
link along the accessed path. The "nolinks" option is thus useful as a
defensive measure against persistent exploits (i.e. a system getting
re-exploited after a reboot) for systems that employ a read-only or
dm-verity-protected rootfs. These systems prevent non-legit binaries
from running after reboot. However, legit code typically still reads
from and writes to a writable file system previously under full
control of the attacker, who can place symlinks to trick file writes
after reboot to target a file of their choice. "nolinks" fundamentally
prevents this.

Signed-off-by: Mattias Nissler <mnissler@xxxxxxxxxxxx>
---
fs/namei.c | 9 ++++++++-
fs/namespace.c | 8 +++++---
fs/proc_namespace.c | 1 +
include/linux/mount.h | 3 ++-
include/uapi/linux/fs.h | 1 +
5 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index a7f601c..f152687 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1021,6 +1021,10 @@ const char *get_link(struct nameidata *nd)
touch_atime(&last->link);
}

+ error = -EACCES;
+ if (nd->path.mnt->mnt_flags & MNT_NOLINKS)
+ return ERR_PTR(error);
+
error = security_inode_follow_link(dentry, inode,
nd->flags & LOOKUP_RCU);
if (unlikely(error))
@@ -2919,7 +2923,10 @@ static int may_open(struct path *path, int acc_mode, int flag)
case S_IFIFO:
case S_IFSOCK:
flag &= ~O_TRUNC;
- break;
+ /*FALLTHRU*/
+ default:
+ if ((path->mnt->mnt_flags & MNT_NOLINKS) && inode->i_nlink > 1)
+ return -EACCES;
}

error = inode_permission(inode, MAY_OPEN | acc_mode);
diff --git a/fs/namespace.c b/fs/namespace.c
index 58aca9c..c421fbb 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2732,6 +2732,8 @@ long do_mount(const char *dev_name, const char __user *dir_name,
mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
if (flags & MS_RDONLY)
mnt_flags |= MNT_READONLY;
+ if (flags & MS_NOLINKS)
+ mnt_flags |= MNT_NOLINKS;

/* The default atime for remount is preservation */
if ((flags & MS_REMOUNT) &&
@@ -2741,9 +2743,9 @@ long do_mount(const char *dev_name, const char __user *dir_name,
mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
}

- flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
- MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
- MS_STRICTATIME | MS_NOREMOTELOCK);
+ flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_NOLINKS | MS_ACTIVE |
+ MS_BORN | MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
+ MS_KERNMOUNT | MS_STRICTATIME | MS_NOREMOTELOCK);

if (flags & MS_REMOUNT)
retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 3f1190d..b5d9d35 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -67,6 +67,7 @@ static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
{ MNT_NOATIME, ",noatime" },
{ MNT_NODIRATIME, ",nodiratime" },
{ MNT_RELATIME, ",relatime" },
+ { MNT_NOLINKS, ",nolinks" },
{ 0, NULL }
};
const struct proc_fs_info *fs_infop;
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 1172cce..df4eb6a 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -28,6 +28,7 @@ struct mnt_namespace;
#define MNT_NODIRATIME 0x10
#define MNT_RELATIME 0x20
#define MNT_READONLY 0x40 /* does the user want this to be r/o? */
+#define MNT_NOLINKS 0x80

#define MNT_SHRINKABLE 0x100
#define MNT_WRITE_HOLD 0x200
@@ -44,7 +45,7 @@ struct mnt_namespace;
#define MNT_SHARED_MASK (MNT_UNBINDABLE)
#define MNT_USER_SETTABLE_MASK (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \
| MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \
- | MNT_READONLY)
+ | MNT_READONLY | MNT_NOLINKS)
#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME )

#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 2473272..6624ece 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -130,6 +130,7 @@ struct inodes_stat_t {
#define MS_I_VERSION (1<<23) /* Update inode I_version field */
#define MS_STRICTATIME (1<<24) /* Always perform atime updates */
#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
+#define MS_NOLINKS (1<<26) /* Ignore symbolic and hard links */

/* These sb flags are internal to the kernel */
#define MS_NOREMOTELOCK (1<<27)
--
2.8.0.rc3.226.g39d4020