Use struct page for filename
From: Matthew Wilcox
Date: Sat Apr 07 2018 - 23:16:54 EST
On Fri, Apr 06, 2018 at 03:33:36PM +0200, Dmitry Vyukov wrote:
> On Fri, Apr 6, 2018 at 3:24 PM, syzbot
> <syzbot+75397ee3df5c70164154@xxxxxxxxxxxxxxxxxxxxxxxxx> wrote:
> > cache_from_obj: Wrong slab cache. names_cache but object is from kmalloc-96
>
> Al, do you see how this can happen?
I don't see how it happened, but when looking at this bug, I thought
"This is very complicated, I think there's a simpler way to handle this".
Here's a proposal. It won't apply to any existing tree (depends on a
couple of local commits), but I think you'll get the general flavour
of it. It's mostly compile-tested (build still running, but fs/ and
kernel/ compiled without issue).
fs/dcache.c | 7 ----
fs/namei.c | 102 ++++++++++++-----------------------------------
include/linux/fs.h | 26 ++++++------
include/linux/mm_types.h | 12 +++++-
kernel/auditsc.c | 8 ++--
5 files changed, 51 insertions(+), 104 deletions(-)
diff --git a/fs/dcache.c b/fs/dcache.c
index 593079176123..749b82b8fa1c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -3172,10 +3172,6 @@ static void __init dcache_init(void)
d_hash_shift = 32 - d_hash_shift;
}
-/* SLAB cache for __getname() consumers */
-struct kmem_cache *names_cachep __read_mostly;
-EXPORT_SYMBOL(names_cachep);
-
void __init vfs_caches_init_early(void)
{
int i;
@@ -3189,9 +3185,6 @@ void __init vfs_caches_init_early(void)
void __init vfs_caches_init(void)
{
- names_cachep = kmem_cache_create_usercopy("names_cache", PATH_MAX, 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC, 0, PATH_MAX, NULL);
-
dcache_init();
inode_init();
files_init();
diff --git a/fs/namei.c b/fs/namei.c
index a09419379f5d..16fb4779d29f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -122,71 +122,46 @@
* PATH_MAX includes the nul terminator --RR.
*/
-#define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname))
+struct filename *alloc_filename(void)
+{
+ struct page *page = alloc_page(GFP_KERNEL);
+
+ page->filename.name = page_to_virt(page);
+ return &page->filename;
+}
+
+void putname(struct filename *name)
+{
+ __free_page(container_of(name, struct page, filename));
+}
+
+void filename_get(struct filename *name)
+{
+ page_ref_inc(container_of(name, struct page, filename));
+}
struct filename *
getname_flags(const char __user *filename, int flags, int *empty)
{
struct filename *result;
- char *kname;
int len;
result = audit_reusename(filename);
if (result)
return result;
- result = __getname();
+ result = alloc_filename();
if (unlikely(!result))
return ERR_PTR(-ENOMEM);
- /*
- * First, try to embed the struct filename inside the names_cache
- * allocation
- */
- kname = (char *)result->iname;
- result->name = kname;
-
- len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX);
+ len = strncpy_from_user((char *)result->name, filename, PATH_MAX);
+ if (unlikely(len == PATH_MAX))
+ len = -ENAMETOOLONG;
if (unlikely(len < 0)) {
- __putname(result);
+ putname(result);
return ERR_PTR(len);
}
- /*
- * Uh-oh. We have a name that's approaching PATH_MAX. Allocate a
- * separate struct filename so we can dedicate the entire
- * names_cache allocation for the pathname, and re-do the copy from
- * userland.
- */
- if (unlikely(len == EMBEDDED_NAME_MAX)) {
- const size_t size = offsetof(struct filename, iname[1]);
- kname = (char *)result;
-
- /*
- * size is chosen that way we to guarantee that
- * result->iname[0] is within the same object and that
- * kname can't be equal to result->iname, no matter what.
- */
- result = kzalloc(size, GFP_KERNEL);
- if (unlikely(!result)) {
- __putname(kname);
- return ERR_PTR(-ENOMEM);
- }
- result->name = kname;
- len = strncpy_from_user(kname, filename, PATH_MAX);
- if (unlikely(len < 0)) {
- __putname(kname);
- kfree(result);
- return ERR_PTR(len);
- }
- if (unlikely(len == PATH_MAX)) {
- __putname(kname);
- kfree(result);
- return ERR_PTR(-ENAMETOOLONG);
- }
- }
-
- result->refcnt = 1;
/* The empty path is special. */
if (unlikely(!len)) {
if (empty)
@@ -215,49 +190,22 @@ getname_kernel(const char * filename)
struct filename *result;
int len = strlen(filename) + 1;
- result = __getname();
+ result = alloc_filename();
if (unlikely(!result))
return ERR_PTR(-ENOMEM);
- if (len <= EMBEDDED_NAME_MAX) {
- result->name = (char *)result->iname;
- } else if (len <= PATH_MAX) {
- struct filename *tmp;
-
- tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
- if (unlikely(!tmp)) {
- __putname(result);
- return ERR_PTR(-ENOMEM);
- }
- tmp->name = (char *)result;
- result = tmp;
- } else {
- __putname(result);
+ if (len > PATH_MAX) {
+ putname(result);
return ERR_PTR(-ENAMETOOLONG);
}
memcpy((char *)result->name, filename, len);
result->uptr = NULL;
result->aname = NULL;
- result->refcnt = 1;
audit_getname(result);
return result;
}
-void putname(struct filename *name)
-{
- BUG_ON(name->refcnt <= 0);
-
- if (--name->refcnt > 0)
- return;
-
- if (name->name != name->iname) {
- __putname(name->name);
- kfree(name);
- } else
- __putname(name);
-}
-
static int check_acl(struct inode *inode, int mask)
{
#ifdef CONFIG_FS_POSIX_ACL
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ab44a19f2ddd..5c93ebc519fe 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2376,15 +2376,6 @@ static inline int break_layout(struct inode *inode, bool wait)
#endif /* CONFIG_FILE_LOCKING */
/* fs/open.c */
-struct audit_names;
-struct filename {
- const char *name; /* pointer to actual string */
- const __user char *uptr; /* original userland pointer */
- struct audit_names *aname;
- int refcnt;
- const char iname[];
-};
-
extern long vfs_truncate(const struct path *, loff_t);
extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
struct file *filp);
@@ -2399,6 +2390,18 @@ extern struct file *file_open_root(struct dentry *, struct vfsmount *,
extern struct file * dentry_open(const struct path *, int, const struct cred *);
extern int filp_close(struct file *, fl_owner_t id);
+/* fs/namei.c */
+static inline void *__getname(void)
+{
+ return (void *)__get_free_page(GFP_KERNEL);
+}
+
+static inline void __putname(const void *name)
+{
+ free_page((unsigned long)name);
+}
+
+void filename_get(struct filename *);
extern struct filename *getname_flags(const char __user *, int, int *);
extern struct filename *getname(const char __user *);
extern struct filename *getname_kernel(const char *);
@@ -2421,11 +2424,6 @@ extern int ioctl_preallocate(struct file *filp, void __user *argp);
extern void __init vfs_caches_init_early(void);
extern void __init vfs_caches_init(void);
-extern struct kmem_cache *names_cachep;
-
-#define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL)
-#define __putname(name) kmem_cache_free(names_cachep, (void *)(name))
-
#ifdef CONFIG_BLOCK
extern int register_blkdev(unsigned int, const char *);
extern void unregister_blkdev(unsigned int, const char *);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 97ceec1c6e21..a6ca28ef4277 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -25,6 +25,13 @@
struct address_space;
struct mem_cgroup;
struct hmm;
+struct audit_names;
+
+struct filename {
+ const char *name; /* pointer to actual string */
+ const __user char *uptr; /* original userland pointer */
+ struct audit_names *aname;
+};
/*
* Each physical page in the system has a struct page associated with
@@ -188,6 +195,7 @@ struct page {
spinlock_t ptl;
#endif
};
+ struct filename filename;
};
#ifdef CONFIG_MEMCG
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index e80459f7e132..e539550f5983 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1722,7 +1722,7 @@ __audit_reusename(const __user char *uptr)
if (!n->name)
continue;
if (n->name->uptr == uptr) {
- n->name->refcnt++;
+ filename_get(n->name);
return n->name;
}
}
@@ -1751,7 +1751,7 @@ void __audit_getname(struct filename *name)
n->name = name;
n->name_len = AUDIT_NAME_FULL;
name->aname = n;
- name->refcnt++;
+ filename_get(name);
if (!context->pwd.dentry)
get_fs_pwd(current->fs, &context->pwd);
@@ -1825,7 +1825,7 @@ void __audit_inode(struct filename *name, const struct dentry *dentry,
return;
if (name) {
n->name = name;
- name->refcnt++;
+ filename_get(name);
}
out:
@@ -1954,7 +1954,7 @@ void __audit_inode_child(struct inode *parent,
if (found_parent) {
found_child->name = found_parent->name;
found_child->name_len = AUDIT_NAME_FULL;
- found_child->name->refcnt++;
+ filename_get(found_child->name);
}
}