[PATCH v2 05/10] fsdax: Replace mmap entry in case of CoW

From: Shiyang Ruan
Date: Thu Feb 25 2021 - 19:25:51 EST


We replace the existing entry to the newly allocated one in case of CoW.
Also, we mark the entry as PAGECACHE_TAG_TOWRITE so writeback marks this
entry as writeprotected. This helps us snapshots so new write
pagefaults after snapshots trigger a CoW.

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@xxxxxxxx>
Signed-off-by: Shiyang Ruan <ruansy.fnst@xxxxxxxxxxx>
---
fs/dax.c | 37 ++++++++++++++++++++++++++-----------
1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 748dfb89fb41..ec4b733e0b59 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -722,6 +722,9 @@ static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_d
return 0;
}

+#define DAX_IF_DIRTY (1 << 0)
+#define DAX_IF_COW (1 << 1)
+
/*
* By this point grab_mapping_entry() has ensured that we have a locked entry
* of the appropriate size so we don't have to worry about downgrading PMDs to
@@ -729,16 +732,19 @@ static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_d
* already in the tree, we will skip the insertion and just dirty the PMD as
* appropriate.
*/
-static void *dax_insert_entry(struct xa_state *xas,
- struct address_space *mapping, struct vm_fault *vmf,
- void *entry, pfn_t pfn, unsigned long flags, bool dirty)
+static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf,
+ void *entry, pfn_t pfn, unsigned long flags,
+ unsigned int insert_flags)
{
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
void *new_entry = dax_make_entry(pfn, flags);
+ bool dirty = insert_flags & DAX_IF_DIRTY;
+ bool cow = insert_flags & DAX_IF_COW;

if (dirty)
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);

- if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) {
+ if (cow || (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE))) {
unsigned long index = xas->xa_index;
/* we are replacing a zero page with block mapping */
if (dax_is_pmd_entry(entry))
@@ -750,7 +756,7 @@ static void *dax_insert_entry(struct xa_state *xas,

xas_reset(xas);
xas_lock_irq(xas);
- if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
+ if (cow || dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
void *old;

dax_disassociate_entry(entry, mapping, false);
@@ -774,6 +780,9 @@ static void *dax_insert_entry(struct xa_state *xas,
if (dirty)
xas_set_mark(xas, PAGECACHE_TAG_DIRTY);

+ if (cow)
+ xas_set_mark(xas, PAGECACHE_TAG_TOWRITE);
+
xas_unlock_irq(xas);
return entry;
}
@@ -1100,8 +1109,7 @@ static vm_fault_t dax_load_hole(struct xa_state *xas,
pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr));
vm_fault_t ret;

- *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
- DAX_ZERO_PAGE, false);
+ *entry = dax_insert_entry(xas, vmf, *entry, pfn, DAX_ZERO_PAGE, 0);

ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
trace_dax_load_hole(inode, vmf, ret);
@@ -1403,6 +1411,7 @@ static vm_fault_t dax_fault_actor(struct vm_fault *vmf, pfn_t *pfnp,
loff_t pos = (loff_t)xas->xa_offset << PAGE_SHIFT;
bool write = vmf->flags & FAULT_FLAG_WRITE;
bool sync = dax_fault_is_synchronous(flags, vmf->vma, iomap);
+ unsigned int insert_flags = 0;
vm_fault_t ret = 0;
int err = 0;
pfn_t pfn;
@@ -1426,8 +1435,14 @@ static vm_fault_t dax_fault_actor(struct vm_fault *vmf, pfn_t *pfnp,
if (err)
goto error_fault;

- entry = dax_insert_entry(xas, mapping, vmf, entry, pfn, 0,
- write && !sync);
+ if (write) {
+ if (!sync)
+ insert_flags |= DAX_IF_DIRTY;
+ if (iomap->flags & IOMAP_F_SHARED)
+ insert_flags |= DAX_IF_COW;
+ }
+
+ entry = dax_insert_entry(xas, vmf, entry, pfn, 0, insert_flags);

if (write && srcmap->addr != iomap->addr) {
err = dax_iomap_cow_copy(pos, size, size, srcmap, kaddr, false);
@@ -1571,8 +1586,8 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
goto fallback;

pfn = page_to_pfn_t(zero_page);
- *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
- DAX_PMD | DAX_ZERO_PAGE, false);
+ *entry = dax_insert_entry(xas, vmf, *entry, pfn,
+ DAX_PMD | DAX_ZERO_PAGE, 0);

if (arch_needs_pgtable_deposit()) {
pgtable = pte_alloc_one(vma->vm_mm);
--
2.30.1