aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Foster <bfoster@redhat.com>2017-04-04 15:37:44 -0500
committerEric Sandeen <sandeen@redhat.com>2017-04-04 15:37:44 -0500
commit7a868ee88b7405cf15b8736f639512c786f40b27 (patch)
tree6fcc3889cad4202e3fdf2bf303c5ee81a86aae0d
parentf3eda3a5cc5ab08253e548cc4d12bd283f76cb6b (diff)
downloadxfsprogs-dev-7a868ee88b7405cf15b8736f639512c786f40b27.tar.gz
xfs: use iomap new flag for newly allocated delalloc blocks
Source kernel commit: f65e6fad293b3a5793b7fa2044800506490e7a2e Commit fa7f138 ("xfs: clear delalloc and cache on buffered write failure") fixed one regression in the iomap error handling code and exposed another. The fundamental problem is that if a buffered write is a rewrite of preexisting delalloc blocks and the write fails, the failure handling code can punch out preexisting blocks with valid file data. This was reproduced directly by sub-block writes in the LTP kernel/syscalls/write/write03 test. A first 100 byte write allocates a single block in a file. A subsequent 100 byte write fails and punches out the block, including the data successfully written by the previous write. To address this problem, update the ->iomap_begin() handler to distinguish newly allocated delalloc blocks from preexisting delalloc blocks via the IOMAP_F_NEW flag. Use this flag in the ->iomap_end() handler to decide when a failed or short write should punch out delalloc blocks. This introduces the subtle requirement that ->iomap_begin() should never combine newly allocated delalloc blocks with existing blocks in the resulting iomap descriptor. This can occur when a new delalloc reservation merges with a neighboring extent that is part of the current write, for example. Therefore, drop the post-allocation extent lookup from xfs_bmapi_reserve_delalloc() and just return the record inserted into the fork. This ensures only new blocks are returned and thus that preexisting delalloc blocks are always handled as "found" blocks and not punched out on a failed rewrite. Reported-by: Xiong Zhou <xzhou@redhat.com> Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
-rw-r--r--libxfs/xfs_bmap.c24
1 files changed, 14 insertions, 10 deletions
diff --git a/libxfs/xfs_bmap.c b/libxfs/xfs_bmap.c
index 8ec938c1bb..4b825f5b4d 100644
--- a/libxfs/xfs_bmap.c
+++ b/libxfs/xfs_bmap.c
@@ -4141,6 +4141,19 @@ xfs_bmapi_read(
return 0;
}
+/*
+ * Add a delayed allocation extent to an inode. Blocks are reserved from the
+ * global pool and the extent inserted into the inode in-core extent tree.
+ *
+ * On entry, got refers to the first extent beyond the offset of the extent to
+ * allocate or eof is specified if no such extent exists. On return, got refers
+ * to the extent record that was inserted to the inode fork.
+ *
+ * Note that the allocated extent may have been merged with contiguous extents
+ * during insertion into the inode fork. Thus, got does not reflect the current
+ * state of the inode fork on return. If necessary, the caller can use lastx to
+ * look up the updated record in the inode fork.
+ */
int
xfs_bmapi_reserve_delalloc(
struct xfs_inode *ip,
@@ -4227,13 +4240,8 @@ xfs_bmapi_reserve_delalloc(
got->br_startblock = nullstartblock(indlen);
got->br_blockcount = alen;
got->br_state = XFS_EXT_NORM;
- xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);
- /*
- * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay
- * might have merged it into one of the neighbouring ones.
- */
- xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
+ xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);
/*
* Tag the inode if blocks were preallocated. Note that COW fork
@@ -4245,10 +4253,6 @@ xfs_bmapi_reserve_delalloc(
if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
xfs_inode_set_cowblocks_tag(ip);
- ASSERT(got->br_startoff <= aoff);
- ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
- ASSERT(isnullstartblock(got->br_startblock));
- ASSERT(got->br_state == XFS_EXT_NORM);
return 0;
out_unreserve_blocks: