> Rather than pegging to zero, _I_ would prefer that atime was weakly
> updated (i.e. some "logical updates" are allowed to get lost). More
> specifically, we update the in-memory atime just as we do currently, but
> either:
>
> 1) not marking the inode dirty if only the atime is changed; or possibly
> 2) marking it "mildly dirty", meaning that it should be written back to
> disk only if cheap to do so (e.g. if there's no other I/O to be
> done and the disk isn't spun down; on some systems this may be almost
> never). Of course the inode can also be written to disk if it is made
> dirty in some other way (e.g. ctime change).
I implemented this ("lazy atime updates"), first for 2.2, then later
for an early 2.3 kernel. I changed the fs code to keep two dirty
inode lists: one really-dirty list, and an atime-dirty list. A sync
will flush both lists to disk, but sync_old_buffers() will not flush
the atime-dirty list unless some other buffers need to be written to
disk.
Just now, I ported my patch up to 2.3.99pre5. However, it seems to
have some new problems that I didn't see before. The lazy atime part
seems to work (you also need to update "mount" to add the new option),
but "umount" complained that the filesystem was always busy... also I
got complaints from ext2_fs about the unrecognized lazyatime mount
option, but I don't understand why the filesystem-specific code is
seeing the option at all.
Here's the patch, in case anyone is interested in fiddling with it
and/or knows what might be causing the problems I'm seeing.
-- Dave Hinds
diff -ur v2.3i/linux/fs/buffer.c v2.3/linux/fs/buffer.c
--- v2.3i/linux/fs/buffer.c Tue Mar 21 11:30:08 2000
+++ v2.3/linux/fs/buffer.c Tue May 2 12:04:18 2000
@@ -279,7 +279,7 @@
void sync_dev(kdev_t dev)
{
sync_supers(dev);
- sync_inodes(dev);
+ sync_inodes(dev, 0);
DQUOT_SYNC(dev);
/* sync all the dirty buffers out to disk only _after_ all the
high level layers finished generated buffer dirty data
@@ -305,7 +305,7 @@
lock_kernel();
sync_supers(dev);
- sync_inodes(dev);
+ sync_inodes(dev, 0);
DQUOT_SYNC(dev);
unlock_kernel();
@@ -2403,14 +2403,18 @@
* and superblocks so that we could write back only the old ones as well
*/
-static int sync_old_buffers(void)
+static int sync_old_buffers(int lazy)
{
+ int atime;
lock_kernel();
sync_supers(0);
- sync_inodes(0);
+ atime = sync_inodes(0, lazy);
unlock_kernel();
- flush_dirty_buffers(1);
+ /* If there are inodes on the atime list, and we had other
+ work to do, then go back and do a non-lazy atime sync */
+ if (flush_dirty_buffers(1) && lazy && atime)
+ return sync_old_buffers(0);
/* must really sync all the active I/O request to disk here */
run_task_queue(&tq_disk);
return 0;
@@ -2441,7 +2445,7 @@
* to and from bdflush.
*/
user_mm = start_lazy_tlb();
- error = sync_old_buffers();
+ error = sync_old_buffers(1);
end_lazy_tlb(user_mm);
return error;
#endif
@@ -2573,7 +2577,7 @@
#ifdef DEBUG
printk("kupdate() activated...\n");
#endif
- sync_old_buffers();
+ sync_old_buffers(1);
}
}
diff -ur v2.3i/linux/fs/inode.c v2.3/linux/fs/inode.c
--- v2.3i/linux/fs/inode.c Sun Apr 2 15:38:54 2000
+++ v2.3/linux/fs/inode.c Tue May 2 12:04:17 2000
@@ -137,6 +137,24 @@
}
}
+static void mark_inode_atime(struct inode *inode)
+{
+ struct super_block * sb = inode->i_sb;
+
+ if (sb) {
+ spin_lock(&inode_lock);
+ if (!(inode->i_state & (I_DIRTY|I_ATIME))) {
+ inode->i_state |= I_ATIME;
+ /* Only add valid (ie hashed) inodes to the atime list */
+ if (!list_empty(&inode->i_hash)) {
+ list_del(&inode->i_list);
+ list_add(&inode->i_list, &sb->s_atime);
+ }
+ }
+ spin_unlock(&inode_lock);
+ }
+}
+
static void __wait_on_inode(struct inode * inode)
{
DECLARE_WAITQUEUE(wait, current);
@@ -190,8 +208,9 @@
list_del(&inode->i_list);
list_add(&inode->i_list,
inode->i_count ? &inode_in_use : &inode_unused);
- /* Set I_LOCK, reset I_DIRTY */
- inode->i_state ^= I_DIRTY | I_LOCK;
+ /* Set I_LOCK, reset I_DIRTY & I_ATIME */
+ inode->i_state |= I_LOCK;
+ inode->i_state &= ~(I_DIRTY|I_ATIME);
spin_unlock(&inode_lock);
write_inode(inode);
@@ -216,11 +235,15 @@
*
* sync_inodes goes through the super block's dirty list,
* writes them out, and puts them back on the normal list.
+ *
+ * If lazy is set, then don't try to flush dirty-atime inodes. The
+ * return value is then non-zero if there were any dirty-atime inodes.
*/
-void sync_inodes(kdev_t dev)
+int sync_inodes(kdev_t dev, int lazy)
{
struct super_block * sb = sb_entry(super_blocks.next);
+ int atime = 0;
/*
* Search the super_blocks array for the device(s) to sync.
@@ -233,11 +256,16 @@
continue;
sync_list(&sb->s_dirty);
+ if (lazy)
+ atime |= (!list_empty(&sb->s_atime));
+ else
+ sync_list(&sb->s_atime);
if (dev)
break;
}
spin_unlock(&inode_lock);
+ return atime;
}
/*
@@ -250,6 +278,7 @@
if (!sb->s_dev)
continue;
sync_list(&sb->s_dirty);
+ sync_list(&sb->s_atime);
}
}
@@ -386,6 +415,7 @@
busy = invalidate_list(&inode_in_use, sb, &throw_away);
busy |= invalidate_list(&inode_unused, sb, &throw_away);
busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
+ busy |= invalidate_list(&sb->s_atime, sb, &throw_away);
spin_unlock(&inode_lock);
dispose_list(&throw_away);
@@ -880,7 +910,10 @@
if ( IS_NODIRATIME (inode) && S_ISDIR (inode->i_mode) ) return;
if ( IS_RDONLY (inode) ) return;
inode->i_atime = CURRENT_TIME;
- mark_inode_dirty (inode);
+ if ( IS_LAZYATIME (inode) )
+ mark_inode_atime (inode);
+ else
+ mark_inode_dirty (inode);
} /* End Function update_atime */
diff -ur v2.3i/linux/fs/super.c v2.3/linux/fs/super.c
--- v2.3i/linux/fs/super.c Fri Apr 7 13:38:00 2000
+++ v2.3/linux/fs/super.c Tue May 2 11:49:51 2000
@@ -355,6 +355,7 @@
{ MS_MANDLOCK, ",mand" },
{ MS_NOATIME, ",noatime" },
{ MS_NODIRATIME, ",nodiratime" },
+ { MS_LAZYATIME, ",lazyatime" },
#ifdef MS_NOSUB /* Can't find this except in mount.c */
{ MS_NOSUB, ",nosub" },
#endif
@@ -588,6 +589,7 @@
nr_super_blocks++;
memset(s, 0, sizeof(struct super_block));
INIT_LIST_HEAD(&s->s_dirty);
+ INIT_LIST_HEAD(&s->s_atime);
list_add (&s->s_list, super_blocks.prev);
init_waitqueue_head(&s->s_wait);
INIT_LIST_HEAD(&s->s_files);
diff -ur v2.3i/linux/include/linux/fs.h v2.3/linux/include/linux/fs.h
--- v2.3i/linux/include/linux/fs.h Tue Apr 11 17:31:26 2000
+++ v2.3/linux/include/linux/fs.h Tue May 2 12:05:03 2000
@@ -95,6 +95,7 @@
#define S_IMMUTABLE 512 /* Immutable file */
#define MS_NOATIME 1024 /* Do not update access times. */
#define MS_NODIRATIME 2048 /* Do not update directory access times */
+#define MS_LAZYATIME 4096 /* Lazy access time updates */
#define MS_ODD_RENAME 32768 /* Temporary stuff; will go away as soon
* as nfs_rename() will be cleaned up
@@ -104,7 +105,8 @@
* Flags that can be altered by MS_REMOUNT
*/
#define MS_RMT_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|\
- MS_SYNCHRONOUS|MS_MANDLOCK|MS_NOATIME|MS_NODIRATIME)
+ MS_SYNCHRONOUS|MS_MANDLOCK|MS_NOATIME|\
+ MS_NODIRATIME|MS_LAZYATIME)
/*
* Magic mount flag number. Has to be or-ed to the flag values.
@@ -140,6 +142,7 @@
#define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE)
#define IS_NOATIME(inode) __IS_FLG(inode, MS_NOATIME)
#define IS_NODIRATIME(inode) __IS_FLG(inode, MS_NODIRATIME)
+#define IS_LAZYATIME(inode) __IS_FLG(inode, MS_LAZYATIME)
/* the read-only stuff doesn't really belong here, but any other place is
@@ -438,6 +441,7 @@
#define I_LOCK 2
#define I_FREEING 4
#define I_CLEAR 8
+#define I_ATIME 16
extern void __mark_inode_dirty(struct inode *);
static inline void mark_inode_dirty(struct inode *inode)
@@ -617,6 +621,7 @@
wait_queue_head_t s_wait;
struct list_head s_dirty; /* dirty inodes */
+ struct list_head s_atime; /* atime inodes */
struct list_head s_files;
struct block_device *s_bdev;
@@ -948,7 +953,7 @@
#define destroy_buffers(dev) __invalidate_buffers((dev), 1)
extern void __invalidate_buffers(kdev_t dev, int);
extern int floppy_is_wp(int);
-extern void sync_inodes(kdev_t);
+extern int sync_inodes(kdev_t, int);
extern void write_inode_now(struct inode *);
extern void sync_dev(kdev_t);
extern int fsync_dev(kdev_t);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/
This archive was generated by hypermail 2b29 : Sun May 07 2000 - 21:00:11 EST