aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorQu Wenruo <wqu@suse.com>2024-02-02 11:29:21 +1030
committerDavid Sterba <dsterba@suse.com>2024-02-08 08:30:37 +0100
commite54514aaeab6be64679787e78679e8eb700740f6 (patch)
treeaeae53a3c77de6b445df45caadfd0384ec09e4a0
parentace7d241ccf2582e832c4c4a75eb6b638a0f543b (diff)
downloadbtrfs-progs-e54514aaeab6be64679787e78679e8eb700740f6.tar.gz
btrfs-progs: fix stray fd close in open_ctree_fs_info()
[BUG] Although commit b2a1be83b85f ("btrfs-progs: mkfs: keep file descriptors open during whole time") is making sure we're only closing the writeable fds after the fs is properly created, there is still a missing fd not following the requirement. And this explains the issue why sometimes after mkfs.btrfs, lsblk still doesn't give a valid uuid. Shown by the strace output (the command is "mkfs.btrfs -f /dev/test/scratch1"): openat(AT_FDCWD, "/dev/test/scratch1", O_RDWR) = 5 <<< Writeable open fadvise64(5, 0, 0, POSIX_FADV_DONTNEED) = 0 sysinfo({uptime=2529, loads=[8704, 6272, 2496], totalram=4104548352, freeram=3376611328, sharedram=9211904, bufferram=43016192, totalswap=3221221376, freeswap=3221221376, procs=190, totalhigh=0, freehigh=0, mem_unit=1}) = 0 lseek(5, 0, SEEK_END) = 10737418240 lseek(5, 0, SEEK_SET) = 0 ...... close(5) = 0 <<< Closed now pwrite64(6, "O\250\22\261\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 1163264) = 16384 pwrite64(6, "\201\316\272\342\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 1179648) = 16384 pwrite64(6, "K}S\t\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 1196032) = 16384 pwrite64(6, "\207j$\265\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 1212416) = 16384 pwrite64(6, "q\267;\336\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 5242880) = 16384 fsync(6) <<< But we're still writing into the disk. [CAUSE] After more digging, it turns out we have a very obvious escape in open_ctree_fs_info(): open_ctree_fs_info() |- fp = open(oca->filename, flags); |- info = __open_ctree_fd(); |- close(fp); As later we only do IO using the device fd, this close() seems fine. But the truth is, for mkfs usage, this fs_info is a temporary one, with a special magic number for the disk. And since mkfs is doing writeable operations, this close() would immediately trigger udev scan. And since at this stage, the fs is not yet fully created, udev can race with mkfs, and may get the invalid temporary superblock. [FIX] Introduce a new btrfs_fs_info member, initial_fd, for open_ctree_fs_info() to record the fd. And on close_ctree(), if we find fs_info::initial_fd is a valid fd, then close it. By this, we make sure all writeable fds are only closed after we have written valid super blocks into the disk. Issue: #734 Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r--kernel-shared/ctree.h9
-rw-r--r--kernel-shared/disk-io.c8
2 files changed, 16 insertions, 1 deletions
diff --git a/kernel-shared/ctree.h b/kernel-shared/ctree.h
index bcf11d87..94463222 100644
--- a/kernel-shared/ctree.h
+++ b/kernel-shared/ctree.h
@@ -404,6 +404,15 @@ struct btrfs_fs_info {
u32 sectorsize;
u32 stripesize;
u32 leaf_data_size;
+
+ /*
+ * For open_ctree_fs_info() to hold the initial fd until close.
+ *
+ * For writeable open_ctree_fs_info() call, we should not close
+ * the fd until the fs_info is properly closed, or it will trigger
+ * udev scan while our fs is not properly initialized.
+ */
+ int initial_fd;
u16 csum_type;
u16 csum_size;
diff --git a/kernel-shared/disk-io.c b/kernel-shared/disk-io.c
index c0533192..05323b2c 100644
--- a/kernel-shared/disk-io.c
+++ b/kernel-shared/disk-io.c
@@ -913,6 +913,7 @@ struct btrfs_fs_info *btrfs_new_fs_info(int writable, u64 sb_bytenr)
fs_info->metadata_alloc_profile = (u64)-1;
fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
fs_info->nr_global_roots = 1;
+ fs_info->initial_fd = -1;
return fs_info;
@@ -1690,7 +1691,10 @@ struct btrfs_fs_info *open_ctree_fs_info(struct open_ctree_args *oca)
return NULL;
}
info = __open_ctree_fd(fp, oca);
- close(fp);
+ if (info)
+ info->initial_fd = fp;
+ else
+ close(fp);
return info;
}
@@ -2297,6 +2301,8 @@ skip_commit:
btrfs_release_all_roots(fs_info);
ret = btrfs_close_devices(fs_info->fs_devices);
+ if (fs_info->initial_fd >= 0)
+ close(fs_info->initial_fd);
btrfs_cleanup_all_caches(fs_info);
btrfs_free_fs_info(fs_info);
if (!err)