[PATCH] struct file cleanup : the very large file_ra_state is nowallocated only on demand.

From: Eric Dumazet
Date: Wed Aug 17 2005 - 19:41:30 EST


Andi Kleen a écrit :


(because of the insane struct file_ra_state f_ra. I wish this structure were dynamically allocated only for files that really use it)


How about you submit a patch for that instead?

-Andi

OK, could you please comment this patch ?

The problem of dynamically allocating the readahead state data is that the allocation can fail and should not be fatal.
I made some choices that might be not good.

I also chose not to align "file_ra" slab on SLAB_HWCACHE_ALIGN because the object size is 10*sizeof(long), so alignment would loose 6*sizeof(long) bytes for each object.


[PATCH]

* struct file cleanup : the very large file_ra_state is now allocated only on demand, using a dedicated "file_ra" slab.
64bits machines handling lot of sockets can save about 72 bytes per file.
* private_data : The field is moved close to f_count and f_op fields to speedup sockfd_lookups

Thank you

Signed-off-by: Eric Dumazet <dada1@xxxxxxxxxxxxx>



--- linux-2.6.13-rc6/include/linux/fs.h 2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/include/linux/fs.h 2005-08-18 01:33:04.000000000 +0200
@@ -586,20 +586,19 @@
struct dentry *f_dentry;
struct vfsmount *f_vfsmnt;
struct file_operations *f_op;
+ void *private_data;
atomic_t f_count;
unsigned int f_flags;
mode_t f_mode;
loff_t f_pos;
struct fown_struct f_owner;
unsigned int f_uid, f_gid;
- struct file_ra_state f_ra;
+ struct file_ra_state *f_rap;

size_t f_maxcount;
unsigned long f_version;
void *f_security;

- /* needed for tty driver, and maybe others */
- void *private_data;

#ifdef CONFIG_EPOLL
/* Used by fs/eventpoll.c to link all the hooks to this file */
@@ -1514,8 +1513,7 @@
extern void do_generic_mapping_read(struct address_space *mapping,
struct file_ra_state *, struct file *,
loff_t *, read_descriptor_t *, read_actor_t);
-extern void
-file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
+extern struct file_ra_state *file_ra_state_init(struct file *);
extern ssize_t generic_file_direct_IO(int rw, struct kiocb *iocb,
const struct iovec *iov, loff_t offset, unsigned long nr_segs);
extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov,
@@ -1549,8 +1547,10 @@
read_descriptor_t * desc,
read_actor_t actor)
{
+ if (filp->f_rap == NULL)
+ file_ra_state_init(filp);
do_generic_mapping_read(filp->f_mapping,
- &filp->f_ra,
+ filp->f_rap,
filp,
ppos,
desc,
--- linux-2.6.13-rc6/include/linux/slab.h 2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/include/linux/slab.h 2005-08-18 00:37:59.000000000 +0200
@@ -125,6 +125,7 @@
extern kmem_cache_t *names_cachep;
extern kmem_cache_t *files_cachep;
extern kmem_cache_t *filp_cachep;
+extern kmem_cache_t *ra_cachep;
extern kmem_cache_t *fs_cachep;
extern kmem_cache_t *signal_cachep;
extern kmem_cache_t *sighand_cachep;
--- linux-2.6.13-rc6/mm/readahead.c 2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/mm/readahead.c 2005-08-18 01:14:11.000000000 +0200
@@ -29,14 +29,20 @@
EXPORT_SYMBOL_GPL(default_backing_dev_info);

/*
- * Initialise a struct file's readahead state. Assumes that the caller has
- * memset *ra to zero.
+ * Initialise a struct file's readahead state.
*/
-void
-file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
+struct file_ra_state *
+file_ra_state_init(struct file *file)
{
- ra->ra_pages = mapping->backing_dev_info->ra_pages;
- ra->prev_page = -1;
+ struct file_ra_state *ra = kmem_cache_alloc(ra_cachep, GFP_KERNEL);
+
+ if (ra != NULL) {
+ file->f_rap = ra;
+ memset(ra, 0, sizeof(*ra));
+ ra->ra_pages = file->f_mapping->host->i_mapping->backing_dev_info->ra_pages;
+ ra->prev_page = -1;
+ }
+ return ra;
}

/*
--- linux-2.6.13-rc6/mm/filemap.c 2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/mm/filemap.c 2005-08-18 01:33:58.000000000 +0200
@@ -711,7 +711,7 @@
* NULL.
*/
void do_generic_mapping_read(struct address_space *mapping,
- struct file_ra_state *_ra,
+ struct file_ra_state *_rap,
struct file *filp,
loff_t *ppos,
read_descriptor_t *desc,
@@ -727,8 +727,15 @@
loff_t isize;
struct page *cached_page;
int error;
- struct file_ra_state ra = *_ra;
+ struct file_ra_state ra;

+ if (likely(_rap != NULL))
+ ra = *_rap;
+ else {
+ memset(&ra, 0, sizeof(ra));
+ ra.prev_page = -1;
+ ra.ra_pages = default_backing_dev_info.ra_pages;
+ }
cached_page = NULL;
index = *ppos >> PAGE_CACHE_SHIFT;
next_index = index;
@@ -908,7 +915,8 @@
}

out:
- *_ra = ra;
+ if (_rap != NULL)
+ *_rap = ra;

*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
if (cached_page)
@@ -1187,12 +1195,15 @@
int error;
struct file *file = area->vm_file;
struct address_space *mapping = file->f_mapping;
- struct file_ra_state *ra = &file->f_ra;
+ struct file_ra_state *ra = file->f_rap;
struct inode *inode = mapping->host;
struct page *page;
unsigned long size, pgoff;
int did_readaround = 0, majmin = VM_FAULT_MINOR;

+ if (ra == NULL)
+ ra = file_ra_state_init(file);
+
pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;

retry_all:
@@ -1225,14 +1236,16 @@
handle_ra_miss(mapping, ra, pgoff);
goto no_cached_page;
}
- ra->mmap_miss++;
+ if (ra != NULL) {
+ ra->mmap_miss++;

- /*
- * Do we miss much more than hit in this file? If so,
- * stop bothering with read-ahead. It will only hurt.
- */
- if (ra->mmap_miss > ra->mmap_hit + MMAP_LOTSAMISS)
- goto no_cached_page;
+ /*
+ * Do we miss much more than hit in this file? If so,
+ * stop bothering with read-ahead. It will only hurt.
+ */
+ if (ra->mmap_miss > ra->mmap_hit + MMAP_LOTSAMISS)
+ goto no_cached_page;
+ }

/*
* To keep the pgmajfault counter straight, we need to
@@ -1243,7 +1256,7 @@
inc_page_state(pgmajfault);
}
did_readaround = 1;
- ra_pages = max_sane_readahead(file->f_ra.ra_pages);
+ ra_pages = max_sane_readahead(ra != NULL ? ra->ra_pages : default_backing_dev_info.ra_pages);
if (ra_pages) {
pgoff_t start = 0;

@@ -1256,7 +1269,7 @@
goto no_cached_page;
}

- if (!did_readaround)
+ if (!did_readaround && ra != NULL)
ra->mmap_hit++;

/*
--- linux-2.6.13-rc6/mm/fadvise.c 2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/mm/fadvise.c 2005-08-18 01:05:15.000000000 +0200
@@ -28,6 +28,7 @@
struct file *file = fget(fd);
struct address_space *mapping;
struct backing_dev_info *bdi;
+ struct file_ra_state *ra;
loff_t endbyte;
pgoff_t start_index;
pgoff_t end_index;
@@ -54,15 +55,20 @@

bdi = mapping->backing_dev_info;

+ if ((ra = file->f_rap) == NULL)
+ ra = file_ra_state_init(file);
switch (advice) {
case POSIX_FADV_NORMAL:
- file->f_ra.ra_pages = bdi->ra_pages;
+ if (ra != NULL)
+ ra->ra_pages = bdi->ra_pages;
break;
case POSIX_FADV_RANDOM:
- file->f_ra.ra_pages = 0;
+ if (ra != NULL)
+ ra->ra_pages = 0;
break;
case POSIX_FADV_SEQUENTIAL:
- file->f_ra.ra_pages = bdi->ra_pages * 2;
+ if (ra != NULL)
+ ra->ra_pages = bdi->ra_pages * 2;
break;
case POSIX_FADV_WILLNEED:
case POSIX_FADV_NOREUSE:
--- linux-2.6.13-rc6/fs/open.c 2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/fs/open.c 2005-08-18 01:05:15.000000000 +0200
@@ -804,7 +804,7 @@
}
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);

- file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
+ f->f_rap = NULL;

/* NB: we're sure to have correct a_ops only after f_op->open */
if (f->f_flags & O_DIRECT) {
--- linux-2.6.13-rc6/fs/file_table.c 2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/fs/file_table.c 2005-08-18 00:37:59.000000000 +0200
@@ -55,6 +55,8 @@

static inline void file_free(struct file *f)
{
+ if (f->f_rap != NULL)
+ kmem_cache_free(ra_cachep, f->f_rap);
kmem_cache_free(filp_cachep, f);
}

--- linux-2.6.13-rc6/fs/dcache.c 2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/fs/dcache.c 2005-08-18 02:22:56.000000000 +0200
@@ -1705,6 +1705,8 @@

/* SLAB cache for file structures */
kmem_cache_t *filp_cachep;
+/* SLAB cache for ra structures */
+kmem_cache_t *ra_cachep;

EXPORT_SYMBOL(d_genocide);

@@ -1733,6 +1735,9 @@
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, filp_ctor, filp_dtor);

+ ra_cachep = kmem_cache_create("file_ra", sizeof(struct file_ra_state), 0,
+ SLAB_PANIC, NULL, NULL);
+
dcache_init(mempages);
inode_init(mempages);
files_init(mempages);