[PATCH -v5 2/2] Updating ctime and mtime at syncing

From: Anton Salikhmetov
Date: Wed Jan 16 2008 - 19:58:37 EST


http://bugzilla.kernel.org/show_bug.cgi?id=2645

Changes for updating the ctime and mtime fields for memory-mapped files:

1) a new flag triggering update of the inode data;
2) a new field in the address_space structure for saving modification time;
3) a new helper function to update ctime and mtime when needed;
4) updating time stamps for mapped files in sys_msync() and do_fsync();
5) implementing lazy ctime and mtime update.

Signed-off-by: Anton Salikhmetov <salikhmetov@xxxxxxxxx>
---
fs/buffer.c | 3 ++
fs/fs-writeback.c | 2 +
fs/inode.c | 43 +++++++++++++++++++++++----------
fs/sync.c | 2 +
include/linux/fs.h | 13 +++++++++-
include/linux/pagemap.h | 3 +-
mm/msync.c | 61 +++++++++++++++++++++++++++++++++-------------
mm/page-writeback.c | 54 ++++++++++++++++++++++-------------------
8 files changed, 124 insertions(+), 57 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 7249e01..3967aa7 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -701,6 +701,9 @@ static int __set_page_dirty(struct page *page,
if (unlikely(!mapping))
return !TestSetPageDirty(page);

+ mapping->mtime = CURRENT_TIME;
+ set_bit(AS_MCTIME, &mapping->flags);
+
if (TestSetPageDirty(page))
return 0;

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 300324b..affd291 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -243,6 +243,8 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)

spin_unlock(&inode_lock);

+ mapping_update_time(mapping);
+
ret = do_writepages(mapping, wbc);

/* Don't write the inode if only I_DIRTY_PAGES was set */
diff --git a/fs/inode.c b/fs/inode.c
index ed35383..edd5bf4 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1243,8 +1243,10 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
EXPORT_SYMBOL(touch_atime);

/**
- * file_update_time - update mtime and ctime time
- * @file: file accessed
+ * inode_update_time - update mtime and ctime time
+ * @inode: inode accessed
+ * @ts: time when inode was accessed
+ * @sync: whether to do synchronous update
*
* Update the mtime and ctime members of an inode and mark the inode
* for writeback. Note that this function is meant exclusively for
@@ -1253,11 +1255,8 @@ EXPORT_SYMBOL(touch_atime);
* S_NOCTIME inode flag, e.g. for network filesystem where these
* timestamps are handled by the server.
*/
-
-void file_update_time(struct file *file)
+void inode_update_time(struct inode *inode, struct timespec *ts)
{
- struct inode *inode = file->f_path.dentry->d_inode;
- struct timespec now;
int sync_it = 0;

if (IS_NOCMTIME(inode))
@@ -1265,22 +1264,41 @@ void file_update_time(struct file *file)
if (IS_RDONLY(inode))
return;

- now = current_fs_time(inode->i_sb);
- if (!timespec_equal(&inode->i_mtime, &now)) {
- inode->i_mtime = now;
+ if (timespec_compare(&inode->i_mtime, ts) < 0) {
+ inode->i_mtime = *ts;
sync_it = 1;
}

- if (!timespec_equal(&inode->i_ctime, &now)) {
- inode->i_ctime = now;
+ if (timespec_compare(&inode->i_ctime, ts) < 0) {
+ inode->i_ctime = *ts;
sync_it = 1;
}

if (sync_it)
mark_inode_dirty_sync(inode);
}
+EXPORT_SYMBOL(inode_update_time);

-EXPORT_SYMBOL(file_update_time);
+/*
+ * Update the ctime and mtime stamps after checking if they are to be updated.
+ */
+void mapping_update_time(struct address_space *mapping)
+{
+ if (test_and_clear_bit(AS_MCTIME, &mapping->flags)) {
+ struct inode *inode = mapping->host;
+ struct timespec *ts = &mapping->mtime;
+
+ if (S_ISBLK(inode->i_mode)) {
+ struct block_device *bdev = inode->i_bdev;
+
+ mutex_lock(&bdev->bd_mutex);
+ list_for_each_entry(inode, &bdev->bd_inodes, i_devices)
+ inode_update_time(inode, ts);
+ mutex_unlock(&bdev->bd_mutex);
+ } else
+ inode_update_time(inode, ts);
+ }
+}

int inode_needs_sync(struct inode *inode)
{
@@ -1290,7 +1308,6 @@ int inode_needs_sync(struct inode *inode)
return 1;
return 0;
}
-
EXPORT_SYMBOL(inode_needs_sync);

int inode_wait(void *word)
diff --git a/fs/sync.c b/fs/sync.c
index 7cd005e..5561464 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -87,6 +87,8 @@ long do_fsync(struct file *file, int datasync)
goto out;
}

+ mapping_update_time(mapping);
+
ret = filemap_fdatawrite(mapping);

/*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b3ec4a4..f0d3ced 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -511,6 +511,7 @@ struct address_space {
spinlock_t private_lock; /* for use by the address_space */
struct list_head private_list; /* ditto */
struct address_space *assoc_mapping; /* ditto */
+ struct timespec mtime; /* modification time */
} __attribute__((aligned(sizeof(long))));
/*
* On most architectures that alignment is already the case; but
@@ -1977,7 +1978,17 @@ extern int buffer_migrate_page(struct address_space *,
extern int inode_change_ok(struct inode *, struct iattr *);
extern int __must_check inode_setattr(struct inode *, struct iattr *);

-extern void file_update_time(struct file *file);
+extern void inode_update_time(struct inode *, struct timespec *);
+
+static inline void file_update_time(struct file *file)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct timespec ts = current_fs_time(inode->i_sb);
+
+ inode_update_time(inode, &ts);
+}
+
+extern void mapping_update_time(struct address_space *);

static inline ino_t parent_ino(struct dentry *dentry)
{
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index db8a410..bf0f9e7 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -17,8 +17,9 @@
* Bits in mapping->flags. The lower __GFP_BITS_SHIFT bits are the page
* allocation mode flags.
*/
-#define AS_EIO (__GFP_BITS_SHIFT + 0) /* IO error on async write */
+#define AS_EIO (__GFP_BITS_SHIFT + 0) /* IO error on async write */
#define AS_ENOSPC (__GFP_BITS_SHIFT + 1) /* ENOSPC on async write */
+#define AS_MCTIME (__GFP_BITS_SHIFT + 2) /* mtime and ctime to update */

static inline void mapping_set_error(struct address_space *mapping, int error)
{
diff --git a/mm/msync.c b/mm/msync.c
index 44997bf..7657776 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -13,16 +13,37 @@
#include <linux/syscalls.h>

/*
+ * Scan the PTEs for pages belonging to the VMA and mark them read-only.
+ * It will force a pagefault on the next write access.
+ */
+static void vma_wrprotect(struct vm_area_struct *vma)
+{
+ unsigned long addr;
+
+ for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
+ spinlock_t *ptl;
+ pgd_t *pgd = pgd_offset(vma->vm_mm, addr);
+ pud_t *pud = pud_offset(pgd, addr);
+ pmd_t *pmd = pmd_offset(pud, addr);
+ pte_t *pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+
+ if (pte_dirty(*pte) && pte_write(*pte))
+ *pte = pte_wrprotect(*pte);
+ pte_unmap_unlock(pte, ptl);
+ }
+}
+
+/*
* MS_SYNC syncs the entire file - including mappings.
*
- * MS_ASYNC does not start I/O (it used to, up to 2.5.67).
- * Nor does it mark the relevant pages dirty (it used to up to 2.6.17).
- * Now it doesn't do anything, since dirty pages are properly tracked.
+ * MS_ASYNC does not start I/O. Instead, it marks the relevant pages
+ * read-only by calling vma_wrprotect(). This is needed to catch the next
+ * write reference to the mapped region and update the file times
+ * accordingly.
*
- * The application may now run fsync() to
- * write out the dirty pages and wait on the writeout and check the result.
- * Or the application may run fadvise(FADV_DONTNEED) against the fd to start
- * async writeout immediately.
+ * The application may now run fsync() to write out the dirty pages and
+ * wait on the writeout and check the result. Or the application may run
+ * fadvise(FADV_DONTNEED) against the fd to start async writeout immediately.
* So by _not_ starting I/O in MS_ASYNC we provide complete flexibility to
* applications.
*/
@@ -80,16 +101,22 @@ asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
start = vma->vm_end;

file = vma->vm_file;
- if (file && (vma->vm_flags & VM_SHARED) && (flags & MS_SYNC)) {
- get_file(file);
- up_read(&mm->mmap_sem);
- error = do_fsync(file, 0);
- fput(file);
- if (error)
- goto out;
- down_read(&mm->mmap_sem);
- vma = find_vma(mm, start);
- continue;
+ if (file && (vma->vm_flags & VM_SHARED)) {
+ if (flags & MS_ASYNC) {
+ vma_wrprotect(vma);
+ mapping_update_time(file->f_mapping);
+ }
+ if (flags & MS_SYNC) {
+ get_file(file);
+ up_read(&mm->mmap_sem);
+ error = do_fsync(file, 0);
+ fput(file);
+ if (error)
+ goto out;
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, start);
+ continue;
+ }
}

vma = vma->vm_next;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 3d3848f..53d0e34 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -997,35 +997,39 @@ int __set_page_dirty_no_writeback(struct page *page)
*/
int __set_page_dirty_nobuffers(struct page *page)
{
- if (!TestSetPageDirty(page)) {
- struct address_space *mapping = page_mapping(page);
- struct address_space *mapping2;
+ struct address_space *mapping = page_mapping(page);
+ struct address_space *mapping2;

- if (!mapping)
- return 1;
+ if (!mapping)
+ return 1;

- write_lock_irq(&mapping->tree_lock);
- mapping2 = page_mapping(page);
- if (mapping2) { /* Race with truncate? */
- BUG_ON(mapping2 != mapping);
- WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
- if (mapping_cap_account_dirty(mapping)) {
- __inc_zone_page_state(page, NR_FILE_DIRTY);
- __inc_bdi_stat(mapping->backing_dev_info,
- BDI_RECLAIMABLE);
- task_io_account_write(PAGE_CACHE_SIZE);
- }
- radix_tree_tag_set(&mapping->page_tree,
- page_index(page), PAGECACHE_TAG_DIRTY);
- }
- write_unlock_irq(&mapping->tree_lock);
- if (mapping->host) {
- /* !PageAnon && !swapper_space */
- __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+ mapping->mtime = CURRENT_TIME;
+ set_bit(AS_MCTIME, &mapping->flags);
+
+ if (TestSetPageDirty(page))
+ return 0;
+
+ write_lock_irq(&mapping->tree_lock);
+ mapping2 = page_mapping(page);
+ if (mapping2) {
+ /* Race with truncate? */
+ BUG_ON(mapping2 != mapping);
+ WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
+ if (mapping_cap_account_dirty(mapping)) {
+ __inc_zone_page_state(page, NR_FILE_DIRTY);
+ __inc_bdi_stat(mapping->backing_dev_info,
+ BDI_RECLAIMABLE);
+ task_io_account_write(PAGE_CACHE_SIZE);
}
- return 1;
+ radix_tree_tag_set(&mapping->page_tree,
+ page_index(page), PAGECACHE_TAG_DIRTY);
}
- return 0;
+ write_unlock_irq(&mapping->tree_lock);
+
+ if (mapping->host)
+ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+
+ return 1;
}
EXPORT_SYMBOL(__set_page_dirty_nobuffers);

--
1.4.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/