[PATCH 4.4 98/99] xfs: handle dquot buffer readahead in log recovery correctly

From: Greg Kroah-Hartman
Date: Sun Jun 05 2016 - 18:23:55 EST

4.4-stable review patch. If anyone has any objections, please let me know.


From: Dave Chinner <dchinner@xxxxxxxxxx>

commit 7d6a13f023567d573ac362502bb702eda716e654 upstream.

When we do dquot readahead in log recovery, we do not use a verifier
as the underlying buffer may not have dquots in it. e.g. the
allocation operation hasn't yet been replayed. Hence we do not want
to fail recovery because we detect an operation to be replayed has
not been run yet. This problem was addressed for inodes in commit
d891400 ("xfs: inode buffers may not be valid during recovery
readahead") but the problem was not recognised to exist for dquots
and their buffers as the dquot readahead did not have a verifier.

The result of not using a verifier is that when the buffer is then
next read to replay a dquot modification, the dquot buffer verifier
will only be attached to the buffer if *readahead is not complete*.
Hence we can read the buffer, replay the dquot changes and then add
it to the delwri submission list without it having a verifier
attached to it. This then generates warnings in xfs_buf_ioapply(),
which catches and warns about this case.

Fix this and make it handle the same readahead verifier error cases
as for inode buffers by adding a new readahead verifier that has a
write operation as well as a read operation that marks the buffer as
not done if any corruption is detected. Also make sure we don't run
readahead if the dquot buffer has been marked as cancelled by

This will result in readahead either succeeding and the buffer
having a valid write verifier, or readahead failing and the buffer
state requiring the subsequent read to resubmit the IO with the new
verifier. In either case, this will result in the buffer always
ending up with a valid write verifier on it.

Note: we also need to fix the inode buffer readahead error handling
to mark the buffer with EIO. Brian noticed the code I copied from
there wrong during review, so fix it at the same time. Add comments
linking the two functions that handle readahead verifier errors
together so we don't forget this behavioural link in future.

Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
Reviewed-by: Brian Foster <bfoster@xxxxxxxxxx>
Signed-off-by: Dave Chinner <david@xxxxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>

fs/xfs/libxfs/xfs_dquot_buf.c | 36 ++++++++++++++++++++++++++++++------
fs/xfs/libxfs/xfs_inode_buf.c | 2 ++
fs/xfs/libxfs/xfs_quota_defs.h | 2 +-
fs/xfs/libxfs/xfs_shared.h | 1 +
fs/xfs/xfs_log_recover.c | 9 +++++++--
5 files changed, 41 insertions(+), 9 deletions(-)

--- a/fs/xfs/libxfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -54,7 +54,7 @@ xfs_dqcheck(
xfs_dqid_t id,
uint type, /* used only when IO_dorepair is true */
uint flags,
- char *str)
+ const char *str)
xfs_dqblk_t *d = (xfs_dqblk_t *)ddq;
int errs = 0;
@@ -207,7 +207,8 @@ xfs_dquot_buf_verify_crc(
struct xfs_mount *mp,
- struct xfs_buf *bp)
+ struct xfs_buf *bp,
+ int warn)
struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
xfs_dqid_t id = 0;
@@ -240,8 +241,7 @@ xfs_dquot_buf_verify(
if (i == 0)
id = be32_to_cpu(ddq->d_id);

- error = xfs_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN,
- "xfs_dquot_buf_verify");
+ error = xfs_dqcheck(mp, ddq, id + i, 0, warn, __func__);
if (error)
return false;
@@ -256,7 +256,7 @@ xfs_dquot_buf_read_verify(

if (!xfs_dquot_buf_verify_crc(mp, bp))
xfs_buf_ioerror(bp, -EFSBADCRC);
- else if (!xfs_dquot_buf_verify(mp, bp))
+ else if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN))
xfs_buf_ioerror(bp, -EFSCORRUPTED);

if (bp->b_error)
@@ -264,6 +264,25 @@ xfs_dquot_buf_read_verify(

+ * readahead errors are silent and simply leave the buffer as !done so a real
+ * read will then be run with the xfs_dquot_buf_ops verifier. See
+ * xfs_inode_buf_verify() for why we use EIO and ~XBF_DONE here rather than
+ * reporting the failure.
+ */
+static void
+ struct xfs_buf *bp)
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ if (!xfs_dquot_buf_verify_crc(mp, bp) ||
+ !xfs_dquot_buf_verify(mp, bp, 0)) {
+ xfs_buf_ioerror(bp, -EIO);
+ bp->b_flags &= ~XBF_DONE;
+ }
* we don't calculate the CRC here as that is done when the dquot is flushed to
* the buffer after the update is done. This ensures that the dquot in the
* buffer always has an up-to-date CRC value.
@@ -274,7 +293,7 @@ xfs_dquot_buf_write_verify(
struct xfs_mount *mp = bp->b_target->bt_mount;

- if (!xfs_dquot_buf_verify(mp, bp)) {
+ if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) {
xfs_buf_ioerror(bp, -EFSCORRUPTED);
@@ -287,3 +306,8 @@ const struct xfs_buf_ops xfs_dquot_buf_o
.verify_write = xfs_dquot_buf_write_verify,

+const struct xfs_buf_ops xfs_dquot_buf_ra_ops = {
+ .name = "xfs_dquot_ra",
+ .verify_read = xfs_dquot_buf_readahead_verify,
+ .verify_write = xfs_dquot_buf_write_verify,
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -68,6 +68,8 @@ xfs_inobp_check(
* recovery and we don't get unnecssary panics on debug kernels. We use EIO here
* because all we want to do is say readahead failed; there is no-one to report
* the error to, so this will distinguish it from a non-ra verifier failure.
+ * Changes to this readahead error behavour also need to be reflected in
+ * xfs_dquot_buf_readahead_verify().
static void
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -153,7 +153,7 @@ typedef __uint16_t xfs_qwarncnt_t;

extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq,
- xfs_dqid_t id, uint type, uint flags, char *str);
+ xfs_dqid_t id, uint type, uint flags, const char *str);
extern int xfs_calc_dquots_per_chunk(unsigned int nbblks);

#endif /* __XFS_QUOTA_H__ */
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -49,6 +49,7 @@ extern const struct xfs_buf_ops xfs_inob
extern const struct xfs_buf_ops xfs_inode_buf_ops;
extern const struct xfs_buf_ops xfs_inode_buf_ra_ops;
extern const struct xfs_buf_ops xfs_dquot_buf_ops;
+extern const struct xfs_buf_ops xfs_dquot_buf_ra_ops;
extern const struct xfs_buf_ops xfs_sb_buf_ops;
extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops;
extern const struct xfs_buf_ops xfs_symlink_buf_ops;
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3204,6 +3204,7 @@ xlog_recover_dquot_ra_pass2(
struct xfs_disk_dquot *recddq;
struct xfs_dq_logformat *dq_f;
uint type;
+ int len;

if (mp->m_qflags == 0)
@@ -3224,8 +3225,12 @@ xlog_recover_dquot_ra_pass2(
ASSERT(dq_f->qlf_len == 1);

- xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno,
- XFS_FSB_TO_BB(mp, dq_f->qlf_len), NULL);
+ len = XFS_FSB_TO_BB(mp, dq_f->qlf_len);
+ if (xlog_peek_buffer_cancelled(log, dq_f->qlf_blkno, len, 0))
+ return;
+ xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno, len,
+ &xfs_dquot_buf_ra_ops);