aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndreas Gruenbacher <agruenba@redhat.com>2023-08-28 17:14:32 +0200
committerAndreas Gruenbacher <agruenba@redhat.com>2023-09-05 15:58:17 +0200
commitfe4f7940d212440334f06783e06a15d674013bb1 (patch)
tree17d5c8387e7da59ef005e9398629f72dfd0ab710
parentf66af88e33212b57ea86da2c5d66c0d9d5c46344 (diff)
downloadlinux-fe4f7940d212440334f06783e06a15d674013bb1.tar.gz
gfs2: Fix asynchronous thread destruction
The kernel threads are currently stopped and destroyed synchronously by gfs2_make_fs_ro() and gfs2_put_super(), and asynchronously by signal_our_withdraw(), with no synchronization, so the synchronous and asynchronous contexts can race with each other. First, when creating the kernel threads, take an extra task struct reference so that the task struct won't go away immediately when they terminate. This allows those kthreads to terminate immediately when they're done rather than hanging around as zombies until they are reaped by kthread_stop(). When kthread_stop() is called on a terminated kthread, it will return immediately. Second, in signal_our_withdraw(), once the SDF_JOURNAL_LIVE flag has been cleared, wake up the logd and quotad wait queues instead of stopping the logd and quotad kthreads. The kthreads are then expected to terminate automatically within short time, but if they cannot, they will not block the withdraw. For example, if a user process and one of the kthread decide to withdraw at the same time, only one of them will perform the actual withdraw and the other will wait for it to be done. If the kthread ends up being the one to wait, the withdrawing user process won't be able to stop it. Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
-rw-r--r--fs/gfs2/ops_fstype.c35
-rw-r--r--fs/gfs2/super.c16
-rw-r--r--fs/gfs2/super.h1
-rw-r--r--fs/gfs2/util.c13
4 files changed, 32 insertions, 33 deletions
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 6ea295cee46307..a1a8be56ead582 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1103,29 +1103,49 @@ static int init_threads(struct gfs2_sbd *sdp)
struct task_struct *p;
int error = 0;
- p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
+ p = kthread_create(gfs2_logd, sdp, "gfs2_logd");
if (IS_ERR(p)) {
error = PTR_ERR(p);
- fs_err(sdp, "can't start logd thread: %d\n", error);
+ fs_err(sdp, "can't create logd thread: %d\n", error);
return error;
}
+ get_task_struct(p);
sdp->sd_logd_process = p;
- p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad");
+ p = kthread_create(gfs2_quotad, sdp, "gfs2_quotad");
if (IS_ERR(p)) {
error = PTR_ERR(p);
- fs_err(sdp, "can't start quotad thread: %d\n", error);
+ fs_err(sdp, "can't create quotad thread: %d\n", error);
goto fail;
}
+ get_task_struct(p);
sdp->sd_quotad_process = p;
+
+ wake_up_process(sdp->sd_logd_process);
+ wake_up_process(sdp->sd_quotad_process);
return 0;
fail:
kthread_stop(sdp->sd_logd_process);
+ put_task_struct(sdp->sd_logd_process);
sdp->sd_logd_process = NULL;
return error;
}
+void gfs2_destroy_threads(struct gfs2_sbd *sdp)
+{
+ if (sdp->sd_logd_process) {
+ kthread_stop(sdp->sd_logd_process);
+ put_task_struct(sdp->sd_logd_process);
+ sdp->sd_logd_process = NULL;
+ }
+ if (sdp->sd_quotad_process) {
+ kthread_stop(sdp->sd_quotad_process);
+ put_task_struct(sdp->sd_quotad_process);
+ sdp->sd_quotad_process = NULL;
+ }
+}
+
/**
* gfs2_fill_super - Read in superblock
* @sb: The VFS superblock
@@ -1276,12 +1296,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
if (error) {
gfs2_freeze_unlock(&sdp->sd_freeze_gh);
- if (sdp->sd_quotad_process)
- kthread_stop(sdp->sd_quotad_process);
- sdp->sd_quotad_process = NULL;
- if (sdp->sd_logd_process)
- kthread_stop(sdp->sd_logd_process);
- sdp->sd_logd_process = NULL;
+ gfs2_destroy_threads(sdp);
fs_err(sdp, "can't make FS RW: %d\n", error);
goto fail_per_node;
}
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 119a5b20eb0026..be3f69c15edcd2 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -549,17 +549,7 @@ void gfs2_make_fs_ro(struct gfs2_sbd *sdp)
if (!test_bit(SDF_KILL, &sdp->sd_flags))
gfs2_flush_delete_work(sdp);
- if (!log_write_allowed && current == sdp->sd_quotad_process)
- fs_warn(sdp, "The quotad daemon is withdrawing.\n");
- else if (sdp->sd_quotad_process)
- kthread_stop(sdp->sd_quotad_process);
- sdp->sd_quotad_process = NULL;
-
- if (!log_write_allowed && current == sdp->sd_logd_process)
- fs_warn(sdp, "The logd daemon is withdrawing.\n");
- else if (sdp->sd_logd_process)
- kthread_stop(sdp->sd_logd_process);
- sdp->sd_logd_process = NULL;
+ gfs2_destroy_threads(sdp);
if (log_write_allowed) {
gfs2_quota_sync(sdp->sd_vfs, 0);
@@ -615,8 +605,10 @@ restart:
if (!sb_rdonly(sb)) {
gfs2_make_fs_ro(sdp);
}
- if (gfs2_withdrawn(sdp))
+ if (gfs2_withdrawn(sdp)) {
+ gfs2_destroy_threads(sdp);
gfs2_quota_cleanup(sdp);
+ }
WARN_ON(gfs2_withdrawing(sdp));
/* At this point, we're through modifying the disk */
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index bba58629bc4580..ab9c83106932db 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -36,6 +36,7 @@ extern int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
extern void gfs2_make_fs_ro(struct gfs2_sbd *sdp);
extern void gfs2_online_uevent(struct gfs2_sbd *sdp);
+extern void gfs2_destroy_threads(struct gfs2_sbd *sdp);
extern int gfs2_statfs_init(struct gfs2_sbd *sdp);
extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
s64 dinodes);
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index b3086a9baf0014..65a3c7b1a51e93 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -151,17 +151,8 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
if (!sb_rdonly(sdp->sd_vfs)) {
bool locked = mutex_trylock(&sdp->sd_freeze_mutex);
- if (sdp->sd_quotad_process &&
- current != sdp->sd_quotad_process) {
- kthread_stop(sdp->sd_quotad_process);
- sdp->sd_quotad_process = NULL;
- }
-
- if (sdp->sd_logd_process &&
- current != sdp->sd_logd_process) {
- kthread_stop(sdp->sd_logd_process);
- sdp->sd_logd_process = NULL;
- }
+ wake_up(&sdp->sd_logd_waitq);
+ wake_up(&sdp->sd_quota_wait);
wait_event_timeout(sdp->sd_log_waitq,
gfs2_log_is_empty(sdp),