aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>2024-04-29 16:18:12 +0200
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2024-04-29 16:18:12 +0200
commit67b0bf9fb9c015afdcb87e775d3303be95028d08 (patch)
tree1f525a5909954ab42001bc0e101e33aebaf23e4f
parentd7391a558e3ecf86d4501e4440c2b1f537f0426d (diff)
downloadstable-queue-67b0bf9fb9c015afdcb87e775d3303be95028d08.tar.gz
6.8-stable patches
added patches: mm-turn-folio_test_hugetlb-into-a-pagetype.patch mm-zswap-fix-shrinker-null-crash-with-cgroup_disable-memory.patch
-rw-r--r--queue-6.8/mm-turn-folio_test_hugetlb-into-a-pagetype.patch239
-rw-r--r--queue-6.8/mm-zswap-fix-shrinker-null-crash-with-cgroup_disable-memory.patch85
-rw-r--r--queue-6.8/series2
3 files changed, 326 insertions, 0 deletions
diff --git a/queue-6.8/mm-turn-folio_test_hugetlb-into-a-pagetype.patch b/queue-6.8/mm-turn-folio_test_hugetlb-into-a-pagetype.patch
new file mode 100644
index 0000000000..d85186b21f
--- /dev/null
+++ b/queue-6.8/mm-turn-folio_test_hugetlb-into-a-pagetype.patch
@@ -0,0 +1,239 @@
+From d99e3140a4d33e26066183ff727d8f02f56bec64 Mon Sep 17 00:00:00 2001
+From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Date: Thu, 21 Mar 2024 14:24:43 +0000
+Subject: mm: turn folio_test_hugetlb into a PageType
+
+From: Matthew Wilcox (Oracle) <willy@infradead.org>
+
+commit d99e3140a4d33e26066183ff727d8f02f56bec64 upstream.
+
+The current folio_test_hugetlb() can be fooled by a concurrent folio split
+into returning true for a folio which has never belonged to hugetlbfs.
+This can't happen if the caller holds a refcount on it, but we have a few
+places (memory-failure, compaction, procfs) which do not and should not
+take a speculative reference.
+
+Since hugetlb pages do not use individual page mapcounts (they are always
+fully mapped and use the entire_mapcount field to record the number of
+mappings), the PageType field is available now that page_mapcount()
+ignores the value in this field.
+
+In compaction and with CONFIG_DEBUG_VM enabled, the current implementation
+can result in an oops, as reported by Luis. This happens since 9c5ccf2db04b
+("mm: remove HUGETLB_PAGE_DTOR") effectively added some VM_BUG_ON() checks
+in the PageHuge() testing path.
+
+[willy@infradead.org: update vmcoreinfo]
+ Link: https://lkml.kernel.org/r/ZgGZUvsdhaT1Va-T@casper.infradead.org
+Link: https://lkml.kernel.org/r/20240321142448.1645400-6-willy@infradead.org
+Fixes: 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR")
+Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Reported-by: Luis Chamberlain <mcgrof@kernel.org>
+Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218227
+Cc: Miaohe Lin <linmiaohe@huawei.com>
+Cc: Muchun Song <muchun.song@linux.dev>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/page-flags.h | 70 +++++++++++++++++++----------------------
+ include/trace/events/mmflags.h | 1
+ kernel/crash_core.c | 5 +-
+ mm/hugetlb.c | 22 +-----------
+ 4 files changed, 39 insertions(+), 59 deletions(-)
+
+--- a/include/linux/page-flags.h
++++ b/include/linux/page-flags.h
+@@ -190,7 +190,6 @@ enum pageflags {
+
+ /* At least one page in this folio has the hwpoison flag set */
+ PG_has_hwpoisoned = PG_error,
+- PG_hugetlb = PG_active,
+ PG_large_rmappable = PG_workingset, /* anon or file-backed */
+ };
+
+@@ -850,29 +849,6 @@ TESTPAGEFLAG_FALSE(LargeRmappable, large
+
+ #define PG_head_mask ((1UL << PG_head))
+
+-#ifdef CONFIG_HUGETLB_PAGE
+-int PageHuge(struct page *page);
+-SETPAGEFLAG(HugeTLB, hugetlb, PF_SECOND)
+-CLEARPAGEFLAG(HugeTLB, hugetlb, PF_SECOND)
+-
+-/**
+- * folio_test_hugetlb - Determine if the folio belongs to hugetlbfs
+- * @folio: The folio to test.
+- *
+- * Context: Any context. Caller should have a reference on the folio to
+- * prevent it from being turned into a tail page.
+- * Return: True for hugetlbfs folios, false for anon folios or folios
+- * belonging to other filesystems.
+- */
+-static inline bool folio_test_hugetlb(struct folio *folio)
+-{
+- return folio_test_large(folio) &&
+- test_bit(PG_hugetlb, folio_flags(folio, 1));
+-}
+-#else
+-TESTPAGEFLAG_FALSE(Huge, hugetlb)
+-#endif
+-
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ /*
+ * PageHuge() only returns true for hugetlbfs pages, but not for
+@@ -929,18 +905,6 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpois
+ #endif
+
+ /*
+- * Check if a page is currently marked HWPoisoned. Note that this check is
+- * best effort only and inherently racy: there is no way to synchronize with
+- * failing hardware.
+- */
+-static inline bool is_page_hwpoison(struct page *page)
+-{
+- if (PageHWPoison(page))
+- return true;
+- return PageHuge(page) && PageHWPoison(compound_head(page));
+-}
+-
+-/*
+ * For pages that are never mapped to userspace (and aren't PageSlab),
+ * page_type may be used. Because it is initialised to -1, we invert the
+ * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and
+@@ -956,6 +920,7 @@ static inline bool is_page_hwpoison(stru
+ #define PG_offline 0x00000100
+ #define PG_table 0x00000200
+ #define PG_guard 0x00000400
++#define PG_hugetlb 0x00000800
+
+ #define PageType(page, flag) \
+ ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
+@@ -1050,6 +1015,37 @@ PAGE_TYPE_OPS(Table, table, pgtable)
+ */
+ PAGE_TYPE_OPS(Guard, guard, guard)
+
++#ifdef CONFIG_HUGETLB_PAGE
++FOLIO_TYPE_OPS(hugetlb, hugetlb)
++#else
++FOLIO_TEST_FLAG_FALSE(hugetlb)
++#endif
++
++/**
++ * PageHuge - Determine if the page belongs to hugetlbfs
++ * @page: The page to test.
++ *
++ * Context: Any context.
++ * Return: True for hugetlbfs pages, false for anon pages or pages
++ * belonging to other filesystems.
++ */
++static inline bool PageHuge(const struct page *page)
++{
++ return folio_test_hugetlb(page_folio(page));
++}
++
++/*
++ * Check if a page is currently marked HWPoisoned. Note that this check is
++ * best effort only and inherently racy: there is no way to synchronize with
++ * failing hardware.
++ */
++static inline bool is_page_hwpoison(struct page *page)
++{
++ if (PageHWPoison(page))
++ return true;
++ return PageHuge(page) && PageHWPoison(compound_head(page));
++}
++
+ extern bool is_free_buddy_page(struct page *page);
+
+ PAGEFLAG(Isolated, isolated, PF_ANY);
+@@ -1116,7 +1112,7 @@ static __always_inline void __ClearPageA
+ */
+ #define PAGE_FLAGS_SECOND \
+ (0xffUL /* order */ | 1UL << PG_has_hwpoisoned | \
+- 1UL << PG_hugetlb | 1UL << PG_large_rmappable)
++ 1UL << PG_large_rmappable)
+
+ #define PAGE_FLAGS_PRIVATE \
+ (1UL << PG_private | 1UL << PG_private_2)
+--- a/include/trace/events/mmflags.h
++++ b/include/trace/events/mmflags.h
+@@ -135,6 +135,7 @@ IF_HAVE_PG_ARCH_X(arch_3)
+ #define DEF_PAGETYPE_NAME(_name) { PG_##_name, __stringify(_name) }
+
+ #define __def_pagetype_names \
++ DEF_PAGETYPE_NAME(hugetlb), \
+ DEF_PAGETYPE_NAME(offline), \
+ DEF_PAGETYPE_NAME(guard), \
+ DEF_PAGETYPE_NAME(table), \
+--- a/kernel/crash_core.c
++++ b/kernel/crash_core.c
+@@ -814,11 +814,10 @@ static int __init crash_save_vmcoreinfo_
+ VMCOREINFO_NUMBER(PG_head_mask);
+ #define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy)
+ VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
+-#ifdef CONFIG_HUGETLB_PAGE
+- VMCOREINFO_NUMBER(PG_hugetlb);
++#define PAGE_HUGETLB_MAPCOUNT_VALUE (~PG_hugetlb)
++ VMCOREINFO_NUMBER(PAGE_HUGETLB_MAPCOUNT_VALUE);
+ #define PAGE_OFFLINE_MAPCOUNT_VALUE (~PG_offline)
+ VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE);
+-#endif
+
+ #ifdef CONFIG_KALLSYMS
+ VMCOREINFO_SYMBOL(kallsyms_names);
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -1623,7 +1623,7 @@ static inline void __clear_hugetlb_destr
+ {
+ lockdep_assert_held(&hugetlb_lock);
+
+- folio_clear_hugetlb(folio);
++ __folio_clear_hugetlb(folio);
+ }
+
+ /*
+@@ -1710,7 +1710,7 @@ static void add_hugetlb_folio(struct hst
+ h->surplus_huge_pages_node[nid]++;
+ }
+
+- folio_set_hugetlb(folio);
++ __folio_set_hugetlb(folio);
+ folio_change_private(folio, NULL);
+ /*
+ * We have to set hugetlb_vmemmap_optimized again as above
+@@ -2048,7 +2048,7 @@ static void __prep_account_new_huge_page
+
+ static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio)
+ {
+- folio_set_hugetlb(folio);
++ __folio_set_hugetlb(folio);
+ INIT_LIST_HEAD(&folio->lru);
+ hugetlb_set_folio_subpool(folio, NULL);
+ set_hugetlb_cgroup(folio, NULL);
+@@ -2159,22 +2159,6 @@ static bool prep_compound_gigantic_folio
+ }
+
+ /*
+- * PageHuge() only returns true for hugetlbfs pages, but not for normal or
+- * transparent huge pages. See the PageTransHuge() documentation for more
+- * details.
+- */
+-int PageHuge(struct page *page)
+-{
+- struct folio *folio;
+-
+- if (!PageCompound(page))
+- return 0;
+- folio = page_folio(page);
+- return folio_test_hugetlb(folio);
+-}
+-EXPORT_SYMBOL_GPL(PageHuge);
+-
+-/*
+ * Find and lock address space (mapping) in write mode.
+ *
+ * Upon entry, the page is locked which means that page_mapping() is
diff --git a/queue-6.8/mm-zswap-fix-shrinker-null-crash-with-cgroup_disable-memory.patch b/queue-6.8/mm-zswap-fix-shrinker-null-crash-with-cgroup_disable-memory.patch
new file mode 100644
index 0000000000..f49c2fb3a9
--- /dev/null
+++ b/queue-6.8/mm-zswap-fix-shrinker-null-crash-with-cgroup_disable-memory.patch
@@ -0,0 +1,85 @@
+From 682886ec69d22363819a83ddddd5d66cb5c791e1 Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Thu, 18 Apr 2024 08:26:28 -0400
+Subject: mm: zswap: fix shrinker NULL crash with cgroup_disable=memory
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 682886ec69d22363819a83ddddd5d66cb5c791e1 upstream.
+
+Christian reports a NULL deref in zswap that he bisected down to the zswap
+shrinker. The issue also cropped up in the bug trackers of libguestfs [1]
+and the Red Hat bugzilla [2].
+
+The problem is that when memcg is disabled with the boot time flag, the
+zswap shrinker might get called with sc->memcg == NULL. This is okay in
+many places, like the lruvec operations. But it crashes in
+memcg_page_state() - which is only used due to the non-node accounting of
+cgroup's the zswap memory to begin with.
+
+Nhat spotted that the memcg can be NULL in the memcg-disabled case, and I
+was then able to reproduce the crash locally as well.
+
+[1] https://github.com/libguestfs/libguestfs/issues/139
+[2] https://bugzilla.redhat.com/show_bug.cgi?id=2275252
+
+Link: https://lkml.kernel.org/r/20240418124043.GC1055428@cmpxchg.org
+Link: https://lkml.kernel.org/r/20240417143324.GA1055428@cmpxchg.org
+Fixes: b5ba474f3f51 ("zswap: shrink zswap pool based on memory pressure")
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reported-by: Christian Heusel <christian@heusel.eu>
+Debugged-by: Nhat Pham <nphamcs@gmail.com>
+Suggested-by: Nhat Pham <nphamcs@gmail.com>
+Tested-by: Christian Heusel <christian@heusel.eu>
+Acked-by: Yosry Ahmed <yosryahmed@google.com>
+Cc: Chengming Zhou <chengming.zhou@linux.dev>
+Cc: Dan Streetman <ddstreet@ieee.org>
+Cc: Richard W.M. Jones <rjones@redhat.com>
+Cc: Seth Jennings <sjenning@redhat.com>
+Cc: Vitaly Wool <vitaly.wool@konsulko.com>
+Cc: <stable@vger.kernel.org> [v6.8]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/zswap.c | 25 ++++++++++++++++---------
+ 1 file changed, 16 insertions(+), 9 deletions(-)
+
+Two minor conflicts in the else branch:
+- zswap_pool_total_size was get_zswap_pool_size() in 6.8
+- zswap_nr_stored was pool->nr_stored in 6.8
+
+--- a/mm/zswap.c
++++ b/mm/zswap.c
+@@ -653,15 +653,22 @@ static unsigned long zswap_shrinker_coun
+ if (!gfp_has_io_fs(sc->gfp_mask))
+ return 0;
+
+-#ifdef CONFIG_MEMCG_KMEM
+- mem_cgroup_flush_stats(memcg);
+- nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT;
+- nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED);
+-#else
+- /* use pool stats instead of memcg stats */
+- nr_backing = get_zswap_pool_size(pool) >> PAGE_SHIFT;
+- nr_stored = atomic_read(&pool->nr_stored);
+-#endif
++ /*
++ * For memcg, use the cgroup-wide ZSWAP stats since we don't
++ * have them per-node and thus per-lruvec. Careful if memcg is
++ * runtime-disabled: we can get sc->memcg == NULL, which is ok
++ * for the lruvec, but not for memcg_page_state().
++ *
++ * Without memcg, use the zswap pool-wide metrics.
++ */
++ if (!mem_cgroup_disabled()) {
++ mem_cgroup_flush_stats(memcg);
++ nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT;
++ nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED);
++ } else {
++ nr_backing = get_zswap_pool_size(pool) >> PAGE_SHIFT;
++ nr_stored = atomic_read(&pool->nr_stored);
++ }
+
+ if (!nr_stored)
+ return 0;
diff --git a/queue-6.8/series b/queue-6.8/series
index acb9d05f04..a5d633112e 100644
--- a/queue-6.8/series
+++ b/queue-6.8/series
@@ -195,3 +195,5 @@ mtd-diskonchip-work-around-ubsan-link-failure.patch
firmware-qcom-uefisecapp-fix-memory-related-io-errors-and-crashes.patch
phy-qcom-qmp-combo-fix-register-base-for-qserdes_dp_phy_mode.patch
phy-qcom-qmp-combo-fix-vco-div-offset-on-v3.patch
+mm-turn-folio_test_hugetlb-into-a-pagetype.patch
+mm-zswap-fix-shrinker-null-crash-with-cgroup_disable-memory.patch