aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd2/journal.c
diff options
context:
space:
mode:
authorZhang Yi <yi.zhang@huawei.com>2021-06-10 19:24:37 +0800
committerTheodore Ts'o <tytso@mit.edu>2021-06-24 10:54:49 -0400
commit4ba3fcdde7e36af93610ceb3cc38365b14539865 (patch)
treecfe6b1077ca18cc5614a43446bd015e4f0c93afa /fs/jbd2/journal.c
parent214eb5a4d8a2032fb9f0711d1b202eb88ee02920 (diff)
downloadlinux-4ba3fcdde7e36af93610ceb3cc38365b14539865.tar.gz
jbd2,ext4: add a shrinker to release checkpointed buffers
Current metadata buffer release logic in bdev_try_to_free_page() have a lot of use-after-free issues when umount filesystem concurrently, and it is difficult to fix directly because ext4 is the only user of s_op->bdev_try_to_free_page callback and we may have to add more special refcount or lock that is only used by ext4 into the common vfs layer, which is unacceptable. One better solution is remove the bdev_try_to_free_page callback, but the real problem is we cannot easily release journal_head on the checkpointed buffer, so try_to_free_buffers() cannot release buffers and page under memory pressure, which is more likely to trigger out-of-memory. So we cannot remove the callback directly before we find another way to release journal_head. This patch introduce a shrinker to free journal_head on the checkpointed transaction. After the journal_head got freed, try_to_free_buffers() could free buffer properly. Signed-off-by: Zhang Yi <yi.zhang@huawei.com> Suggested-by: Jan Kara <jack@suse.cz> Reviewed-by: Jan Kara <jack@suse.cz> Link: https://lore.kernel.org/r/20210610112440.3438139-6-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Diffstat (limited to 'fs/jbd2/journal.c')
-rw-r--r--fs/jbd2/journal.c87
1 files changed, 87 insertions, 0 deletions
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 8b3f5bbd65f95..7c52feb6f753b 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2051,6 +2051,91 @@ recovery_error:
}
/**
+ * jbd2_journal_shrink_scan()
+ *
+ * Scan the checkpointed buffer on the checkpoint list and release the
+ * journal_head.
+ */
+static unsigned long jbd2_journal_shrink_scan(struct shrinker *shrink,
+ struct shrink_control *sc)
+{
+ journal_t *journal = container_of(shrink, journal_t, j_shrinker);
+ unsigned long nr_to_scan = sc->nr_to_scan;
+ unsigned long nr_shrunk;
+ unsigned long count;
+
+ count = percpu_counter_read_positive(&journal->j_jh_shrink_count);
+ trace_jbd2_shrink_scan_enter(journal, sc->nr_to_scan, count);
+
+ nr_shrunk = jbd2_journal_shrink_checkpoint_list(journal, &nr_to_scan);
+
+ count = percpu_counter_read_positive(&journal->j_jh_shrink_count);
+ trace_jbd2_shrink_scan_exit(journal, nr_to_scan, nr_shrunk, count);
+
+ return nr_shrunk;
+}
+
+/**
+ * jbd2_journal_shrink_count()
+ *
+ * Count the number of checkpoint buffers on the checkpoint list.
+ */
+static unsigned long jbd2_journal_shrink_count(struct shrinker *shrink,
+ struct shrink_control *sc)
+{
+ journal_t *journal = container_of(shrink, journal_t, j_shrinker);
+ unsigned long count;
+
+ count = percpu_counter_read_positive(&journal->j_jh_shrink_count);
+ trace_jbd2_shrink_count(journal, sc->nr_to_scan, count);
+
+ return count;
+}
+
+/**
+ * jbd2_journal_register_shrinker()
+ * @journal: Journal to act on.
+ *
+ * Init a percpu counter to record the checkpointed buffers on the checkpoint
+ * list and register a shrinker to release their journal_head.
+ */
+int jbd2_journal_register_shrinker(journal_t *journal)
+{
+ int err;
+
+ journal->j_shrink_transaction = NULL;
+
+ err = percpu_counter_init(&journal->j_jh_shrink_count, 0, GFP_KERNEL);
+ if (err)
+ return err;
+
+ journal->j_shrinker.scan_objects = jbd2_journal_shrink_scan;
+ journal->j_shrinker.count_objects = jbd2_journal_shrink_count;
+ journal->j_shrinker.seeks = DEFAULT_SEEKS;
+ journal->j_shrinker.batch = journal->j_max_transaction_buffers;
+
+ err = register_shrinker(&journal->j_shrinker);
+ if (err) {
+ percpu_counter_destroy(&journal->j_jh_shrink_count);
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * jbd2_journal_unregister_shrinker()
+ * @journal: Journal to act on.
+ *
+ * Unregister the checkpointed buffer shrinker and destroy the percpu counter.
+ */
+void jbd2_journal_unregister_shrinker(journal_t *journal)
+{
+ percpu_counter_destroy(&journal->j_jh_shrink_count);
+ unregister_shrinker(&journal->j_shrinker);
+}
+
+/**
* jbd2_journal_destroy() - Release a journal_t structure.
* @journal: Journal to act on.
*
@@ -2122,6 +2207,8 @@ int jbd2_journal_destroy(journal_t *journal)
brelse(journal->j_sb_buffer);
}
+ jbd2_journal_unregister_shrinker(journal);
+
if (journal->j_proc_entry)
jbd2_stats_proc_exit(journal);
iput(journal->j_inode);