aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2023-05-09 11:28:48 +0200
committerCarlos Maiolino <cem@kernel.org>2023-05-09 19:52:50 +0200
commit33f3aac8b28d7d41bd24bf808f7217d21d22d51c (patch)
tree153743836966dbb11e27fa327dfa71aaee7c2ac8
parent755477b4dedd91c5c7b651c55591a974eb10fec5 (diff)
downloadxfsprogs-dev-33f3aac8b28d7d41bd24bf808f7217d21d22d51c.tar.gz
xfs: prefer free inodes at ENOSPC over chunk allocation
Source kernel commit: f08f984c63e9980614ae3a0a574b31eaaef284b2 When an XFS filesystem has free inodes in chunks already allocated on disk, it will still allocate new inode chunks if the target AG has no free inodes in it. Normally, this is a good idea as it preserves locality of all the inodes in a given directory. However, at ENOSPC this can lead to using the last few remaining free filesystem blocks to allocate a new chunk when there are many, many free inodes that could be allocated without consuming free space. This results in speeding up the consumption of the last few blocks and inode create operations then returning ENOSPC when there free inodes available because we don't have enough block left in the filesystem for directory creation reservations to proceed. Hence when we are near ENOSPC, we should be attempting to preserve the remaining blocks for directory block allocation rather than using them for unnecessary inode chunk creation. This particular behaviour is exposed by xfs/294, when it drives to ENOSPC on empty file creation whilst there are still thousands of free inodes available for allocation in other AGs in the filesystem. Hence, when we are within 1% of ENOSPC, change the inode allocation behaviour to prefer to use existing free inodes over allocating new inode chunks, even though it results is poorer locality of the data set. It is more important for the allocations to be space efficient near ENOSPC than to have optimal locality for performance, so lets modify the inode AG selection code to reflect that fact. This allows generic/294 to not only pass with this allocator rework patchset, but to increase the number of post-ENOSPC empty inode allocations to from ~600 to ~9080 before we hit ENOSPC on the directory create transaction reservation. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Allison Henderson <allison.henderson@oracle.com> Reviewed-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Carlos Maiolino <cem@kernel.org>
-rw-r--r--include/xfs_mount.h11
-rw-r--r--libxfs/init.c18
-rw-r--r--libxfs/xfs_ialloc.c17
3 files changed, 46 insertions, 0 deletions
diff --git a/include/xfs_mount.h b/include/xfs_mount.h
index 24b1d87359..59a66eb71a 100644
--- a/include/xfs_mount.h
+++ b/include/xfs_mount.h
@@ -13,6 +13,16 @@ struct xfs_da_geometry;
typedef void (*buf_writeback_fn)(struct xfs_buf *bp);
+/* dynamic preallocation free space thresholds, 5% down to 1% */
+enum {
+ XFS_LOWSP_1_PCNT = 0,
+ XFS_LOWSP_2_PCNT,
+ XFS_LOWSP_3_PCNT,
+ XFS_LOWSP_4_PCNT,
+ XFS_LOWSP_5_PCNT,
+ XFS_LOWSP_MAX,
+};
+
/*
* Define a user-level mount structure with all we need
* in order to make use of the numerous XFS_* macros.
@@ -81,6 +91,7 @@ typedef struct xfs_mount {
uint m_ag_max_usable; /* max space per AG */
struct radix_tree_root m_perag_tree;
uint64_t m_features; /* active filesystem features */
+ uint64_t m_low_space[XFS_LOWSP_MAX];
unsigned long m_opstate; /* dynamic state flags */
bool m_finobt_nores; /* no per-AG finobt resv. */
uint m_qflags; /* quota status flags */
diff --git a/libxfs/init.c b/libxfs/init.c
index 93dc1f1c59..ae9636a8c6 100644
--- a/libxfs/init.c
+++ b/libxfs/init.c
@@ -744,6 +744,22 @@ libxfs_compute_all_maxlevels(
}
/*
+ * precalculate the low space thresholds for dynamic speculative preallocation.
+ */
+static void
+xfs_set_low_space_thresholds(
+ struct xfs_mount *mp)
+{
+ uint64_t dblocks = mp->m_sb.sb_dblocks;
+ int i;
+
+ do_div(dblocks, 100);
+
+ for (i = 0; i < XFS_LOWSP_MAX; i++)
+ mp->m_low_space[i] = dblocks * (i + 1);
+}
+
+/*
* Mount structure initialization, provides a filled-in xfs_mount_t
* such that the numerous XFS_* macros can be used. If dev is zero,
* no IO will be performed (no size checks, read root inodes).
@@ -862,6 +878,8 @@ libxfs_mount(
libxfs_buf_relse(bp);
}
+ xfs_set_low_space_thresholds(mp);
+
/* Initialize realtime fields in the mount structure */
if (rtmount_init(mp)) {
fprintf(stderr, _("%s: realtime device init failed\n"),
diff --git a/libxfs/xfs_ialloc.c b/libxfs/xfs_ialloc.c
index fe7a689a6c..998a7adbca 100644
--- a/libxfs/xfs_ialloc.c
+++ b/libxfs/xfs_ialloc.c
@@ -1732,6 +1732,7 @@ xfs_dialloc(
struct xfs_perag *pag;
struct xfs_ino_geometry *igeo = M_IGEO(mp);
bool ok_alloc = true;
+ bool low_space = false;
int flags;
xfs_ino_t ino;
@@ -1763,6 +1764,20 @@ xfs_dialloc(
}
/*
+ * If we are near to ENOSPC, we want to prefer allocation from AGs that
+ * have free inodes in them rather than use up free space allocating new
+ * inode chunks. Hence we turn off allocation for the first non-blocking
+ * pass through the AGs if we are near ENOSPC to consume free inodes
+ * that we can immediately allocate, but then we allow allocation on the
+ * second pass if we fail to find an AG with free inodes in it.
+ */
+ if (percpu_counter_read_positive(&mp->m_fdblocks) <
+ mp->m_low_space[XFS_LOWSP_1_PCNT]) {
+ ok_alloc = false;
+ low_space = true;
+ }
+
+ /*
* Loop until we find an allocation group that either has free inodes
* or in which we can allocate some inodes. Iterate through the
* allocation groups upward, wrapping at the end.
@@ -1790,6 +1805,8 @@ xfs_dialloc(
break;
}
flags = 0;
+ if (low_space)
+ ok_alloc = true;
}
xfs_perag_put(pag);
}