Re: atime

From: David Hinds (dhinds@valinux.com)
Date: Tue May 02 2000 - 17:01:30 EST


> Rather than pegging to zero, _I_ would prefer that atime was weakly
> updated (i.e. some "logical updates" are allowed to get lost). More
> specifically, we update the in-memory atime just as we do currently, but
> either:
>
> 1) not marking the inode dirty if only the atime is changed; or possibly
> 2) marking it "mildly dirty", meaning that it should be written back to
> disk only if cheap to do so (e.g. if there's no other I/O to be
> done and the disk isn't spun down; on some systems this may be almost
> never). Of course the inode can also be written to disk if it is made
> dirty in some other way (e.g. ctime change).

I implemented this ("lazy atime updates"), first for 2.2, then later
for an early 2.3 kernel. I changed the fs code to keep two dirty
inode lists: one really-dirty list, and an atime-dirty list. A sync
will flush both lists to disk, but sync_old_buffers() will not flush
the atime-dirty list unless some other buffers need to be written to
disk.

Just now, I ported my patch up to 2.3.99pre5. However, it seems to
have some new problems that I didn't see before. The lazy atime part
seems to work (you also need to update "mount" to add the new option),
but "umount" complained that the filesystem was always busy... also I
got complaints from ext2_fs about the unrecognized lazyatime mount
option, but I don't understand why the filesystem-specific code is
seeing the option at all.

Here's the patch, in case anyone is interested in fiddling with it
and/or knows what might be causing the problems I'm seeing.

-- Dave Hinds

diff -ur v2.3i/linux/fs/buffer.c v2.3/linux/fs/buffer.c
--- v2.3i/linux/fs/buffer.c Tue Mar 21 11:30:08 2000
+++ v2.3/linux/fs/buffer.c Tue May 2 12:04:18 2000
@@ -279,7 +279,7 @@
 void sync_dev(kdev_t dev)
 {
         sync_supers(dev);
- sync_inodes(dev);
+ sync_inodes(dev, 0);
         DQUOT_SYNC(dev);
         /* sync all the dirty buffers out to disk only _after_ all the
            high level layers finished generated buffer dirty data
@@ -305,7 +305,7 @@
 
         lock_kernel();
         sync_supers(dev);
- sync_inodes(dev);
+ sync_inodes(dev, 0);
         DQUOT_SYNC(dev);
         unlock_kernel();
 
@@ -2403,14 +2403,18 @@
  * and superblocks so that we could write back only the old ones as well
  */
 
-static int sync_old_buffers(void)
+static int sync_old_buffers(int lazy)
 {
+ int atime;
         lock_kernel();
         sync_supers(0);
- sync_inodes(0);
+ atime = sync_inodes(0, lazy);
         unlock_kernel();
 
- flush_dirty_buffers(1);
+ /* If there are inodes on the atime list, and we had other
+ work to do, then go back and do a non-lazy atime sync */
+ if (flush_dirty_buffers(1) && lazy && atime)
+ return sync_old_buffers(0);
         /* must really sync all the active I/O request to disk here */
         run_task_queue(&tq_disk);
         return 0;
@@ -2441,7 +2445,7 @@
                  * to and from bdflush.
                  */
                 user_mm = start_lazy_tlb();
- error = sync_old_buffers();
+ error = sync_old_buffers(1);
                 end_lazy_tlb(user_mm);
                 return error;
 #endif
@@ -2573,7 +2577,7 @@
 #ifdef DEBUG
                 printk("kupdate() activated...\n");
 #endif
- sync_old_buffers();
+ sync_old_buffers(1);
         }
 }
 
diff -ur v2.3i/linux/fs/inode.c v2.3/linux/fs/inode.c
--- v2.3i/linux/fs/inode.c Sun Apr 2 15:38:54 2000
+++ v2.3/linux/fs/inode.c Tue May 2 12:04:17 2000
@@ -137,6 +137,24 @@
         }
 }
 
+static void mark_inode_atime(struct inode *inode)
+{
+ struct super_block * sb = inode->i_sb;
+
+ if (sb) {
+ spin_lock(&inode_lock);
+ if (!(inode->i_state & (I_DIRTY|I_ATIME))) {
+ inode->i_state |= I_ATIME;
+ /* Only add valid (ie hashed) inodes to the atime list */
+ if (!list_empty(&inode->i_hash)) {
+ list_del(&inode->i_list);
+ list_add(&inode->i_list, &sb->s_atime);
+ }
+ }
+ spin_unlock(&inode_lock);
+ }
+}
+
 static void __wait_on_inode(struct inode * inode)
 {
         DECLARE_WAITQUEUE(wait, current);
@@ -190,8 +208,9 @@
                 list_del(&inode->i_list);
                 list_add(&inode->i_list,
                          inode->i_count ? &inode_in_use : &inode_unused);
- /* Set I_LOCK, reset I_DIRTY */
- inode->i_state ^= I_DIRTY | I_LOCK;
+ /* Set I_LOCK, reset I_DIRTY & I_ATIME */
+ inode->i_state |= I_LOCK;
+ inode->i_state &= ~(I_DIRTY|I_ATIME);
                 spin_unlock(&inode_lock);
 
                 write_inode(inode);
@@ -216,11 +235,15 @@
  *
  * sync_inodes goes through the super block's dirty list,
  * writes them out, and puts them back on the normal list.
+ *
+ * If lazy is set, then don't try to flush dirty-atime inodes. The
+ * return value is then non-zero if there were any dirty-atime inodes.
  */
  
-void sync_inodes(kdev_t dev)
+int sync_inodes(kdev_t dev, int lazy)
 {
         struct super_block * sb = sb_entry(super_blocks.next);
+ int atime = 0;
 
         /*
          * Search the super_blocks array for the device(s) to sync.
@@ -233,11 +256,16 @@
                         continue;
 
                 sync_list(&sb->s_dirty);
+ if (lazy)
+ atime |= (!list_empty(&sb->s_atime));
+ else
+ sync_list(&sb->s_atime);
 
                 if (dev)
                         break;
         }
         spin_unlock(&inode_lock);
+ return atime;
 }
 
 /*
@@ -250,6 +278,7 @@
                 if (!sb->s_dev)
                         continue;
                 sync_list(&sb->s_dirty);
+ sync_list(&sb->s_atime);
         }
 }
 
@@ -386,6 +415,7 @@
         busy = invalidate_list(&inode_in_use, sb, &throw_away);
         busy |= invalidate_list(&inode_unused, sb, &throw_away);
         busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
+ busy |= invalidate_list(&sb->s_atime, sb, &throw_away);
         spin_unlock(&inode_lock);
 
         dispose_list(&throw_away);
@@ -880,7 +910,10 @@
         if ( IS_NODIRATIME (inode) && S_ISDIR (inode->i_mode) ) return;
         if ( IS_RDONLY (inode) ) return;
         inode->i_atime = CURRENT_TIME;
- mark_inode_dirty (inode);
+ if ( IS_LAZYATIME (inode) )
+ mark_inode_atime (inode);
+ else
+ mark_inode_dirty (inode);
 } /* End Function update_atime */
 
 
diff -ur v2.3i/linux/fs/super.c v2.3/linux/fs/super.c
--- v2.3i/linux/fs/super.c Fri Apr 7 13:38:00 2000
+++ v2.3/linux/fs/super.c Tue May 2 11:49:51 2000
@@ -355,6 +355,7 @@
         { MS_MANDLOCK, ",mand" },
         { MS_NOATIME, ",noatime" },
         { MS_NODIRATIME, ",nodiratime" },
+ { MS_LAZYATIME, ",lazyatime" },
 #ifdef MS_NOSUB /* Can't find this except in mount.c */
         { MS_NOSUB, ",nosub" },
 #endif
@@ -588,6 +589,7 @@
                 nr_super_blocks++;
                 memset(s, 0, sizeof(struct super_block));
                 INIT_LIST_HEAD(&s->s_dirty);
+ INIT_LIST_HEAD(&s->s_atime);
                 list_add (&s->s_list, super_blocks.prev);
                 init_waitqueue_head(&s->s_wait);
                 INIT_LIST_HEAD(&s->s_files);
diff -ur v2.3i/linux/include/linux/fs.h v2.3/linux/include/linux/fs.h
--- v2.3i/linux/include/linux/fs.h Tue Apr 11 17:31:26 2000
+++ v2.3/linux/include/linux/fs.h Tue May 2 12:05:03 2000
@@ -95,6 +95,7 @@
 #define S_IMMUTABLE 512 /* Immutable file */
 #define MS_NOATIME 1024 /* Do not update access times. */
 #define MS_NODIRATIME 2048 /* Do not update directory access times */
+#define MS_LAZYATIME 4096 /* Lazy access time updates */
 
 #define MS_ODD_RENAME 32768 /* Temporary stuff; will go away as soon
                                   * as nfs_rename() will be cleaned up
@@ -104,7 +105,8 @@
  * Flags that can be altered by MS_REMOUNT
  */
 #define MS_RMT_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|\
- MS_SYNCHRONOUS|MS_MANDLOCK|MS_NOATIME|MS_NODIRATIME)
+ MS_SYNCHRONOUS|MS_MANDLOCK|MS_NOATIME|\
+ MS_NODIRATIME|MS_LAZYATIME)
 
 /*
  * Magic mount flag number. Has to be or-ed to the flag values.
@@ -140,6 +142,7 @@
 #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE)
 #define IS_NOATIME(inode) __IS_FLG(inode, MS_NOATIME)
 #define IS_NODIRATIME(inode) __IS_FLG(inode, MS_NODIRATIME)
+#define IS_LAZYATIME(inode) __IS_FLG(inode, MS_LAZYATIME)
 
 
 /* the read-only stuff doesn't really belong here, but any other place is
@@ -438,6 +441,7 @@
 #define I_LOCK 2
 #define I_FREEING 4
 #define I_CLEAR 8
+#define I_ATIME 16
 
 extern void __mark_inode_dirty(struct inode *);
 static inline void mark_inode_dirty(struct inode *inode)
@@ -617,6 +621,7 @@
         wait_queue_head_t s_wait;
 
         struct list_head s_dirty; /* dirty inodes */
+ struct list_head s_atime; /* atime inodes */
         struct list_head s_files;
 
         struct block_device *s_bdev;
@@ -948,7 +953,7 @@
 #define destroy_buffers(dev) __invalidate_buffers((dev), 1)
 extern void __invalidate_buffers(kdev_t dev, int);
 extern int floppy_is_wp(int);
-extern void sync_inodes(kdev_t);
+extern int sync_inodes(kdev_t, int);
 extern void write_inode_now(struct inode *);
 extern void sync_dev(kdev_t);
 extern int fsync_dev(kdev_t);

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Sun May 07 2000 - 21:00:11 EST