[PATCH] xfs: on memory failure, only shut down fs after scanning all mappings
From: Darrick J. Wong
Date: Thu Aug 18 2022 - 13:01:14 EST
From: Darrick J. Wong <djwong@xxxxxxxxxx>
xfs_dax_failure_fn is used to scan the filesystem during a memory
failure event to look for memory mappings to revoke. Unfortunately, if
it encounters an rmap record for filesystem metadata, it will shut down
the filesystem and the scan immediately. This means that we don't
complete the mapping revocation scan and instead leave live mappings to
failed memory. Fix the function to defer the shutdown until after we've
finished culling mappings.
While we're at it, add the usual "xfs_" prefix to struct failure_info,
and actually initialize mf_flags.
Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx>
---
fs/xfs/xfs_notify_failure.c | 26 +++++++++++++++++---------
1 file changed, 17 insertions(+), 9 deletions(-)
diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c
index 69d9c83ea4b2..65d5eb20878e 100644
--- a/fs/xfs/xfs_notify_failure.c
+++ b/fs/xfs/xfs_notify_failure.c
@@ -23,17 +23,18 @@
#include <linux/mm.h>
#include <linux/dax.h>
-struct failure_info {
+struct xfs_failure_info {
xfs_agblock_t startblock;
xfs_extlen_t blockcount;
int mf_flags;
+ bool want_shutdown;
};
static pgoff_t
xfs_failure_pgoff(
struct xfs_mount *mp,
const struct xfs_rmap_irec *rec,
- const struct failure_info *notify)
+ const struct xfs_failure_info *notify)
{
loff_t pos = XFS_FSB_TO_B(mp, rec->rm_offset);
@@ -47,7 +48,7 @@ static unsigned long
xfs_failure_pgcnt(
struct xfs_mount *mp,
const struct xfs_rmap_irec *rec,
- const struct failure_info *notify)
+ const struct xfs_failure_info *notify)
{
xfs_agblock_t end_rec;
xfs_agblock_t end_notify;
@@ -71,13 +72,13 @@ xfs_dax_failure_fn(
{
struct xfs_mount *mp = cur->bc_mp;
struct xfs_inode *ip;
- struct failure_info *notify = data;
+ struct xfs_failure_info *notify = data;
int error = 0;
if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) ||
(rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))) {
- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_ONDISK);
- return -EFSCORRUPTED;
+ notify->want_shutdown = true;
+ return 0;
}
/* Get files that incore, filter out others that are not in use. */
@@ -86,8 +87,10 @@ xfs_dax_failure_fn(
/* Continue the rmap query if the inode isn't incore */
if (error == -ENODATA)
return 0;
- if (error)
- return error;
+ if (error) {
+ notify->want_shutdown = true;
+ return 0;
+ }
error = mf_dax_kill_procs(VFS_I(ip)->i_mapping,
xfs_failure_pgoff(mp, rec, notify),
@@ -104,6 +107,7 @@ xfs_dax_notify_ddev_failure(
xfs_daddr_t bblen,
int mf_flags)
{
+ struct xfs_failure_info notify = { .mf_flags = mf_flags };
struct xfs_trans *tp = NULL;
struct xfs_btree_cur *cur = NULL;
struct xfs_buf *agf_bp = NULL;
@@ -120,7 +124,6 @@ xfs_dax_notify_ddev_failure(
for (; agno <= end_agno; agno++) {
struct xfs_rmap_irec ri_low = { };
struct xfs_rmap_irec ri_high;
- struct failure_info notify;
struct xfs_agf *agf;
xfs_agblock_t agend;
struct xfs_perag *pag;
@@ -161,6 +164,11 @@ xfs_dax_notify_ddev_failure(
}
xfs_trans_cancel(tp);
+ if (error || notify.want_shutdown) {
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_ONDISK);
+ if (!error)
+ error = -EFSCORRUPTED;
+ }
return error;
}