diff options
author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2024-04-29 16:18:12 +0200 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2024-04-29 16:18:12 +0200 |
commit | 67b0bf9fb9c015afdcb87e775d3303be95028d08 (patch) | |
tree | 1f525a5909954ab42001bc0e101e33aebaf23e4f | |
parent | d7391a558e3ecf86d4501e4440c2b1f537f0426d (diff) | |
download | stable-queue-67b0bf9fb9c015afdcb87e775d3303be95028d08.tar.gz |
6.8-stable patches
added patches:
mm-turn-folio_test_hugetlb-into-a-pagetype.patch
mm-zswap-fix-shrinker-null-crash-with-cgroup_disable-memory.patch
-rw-r--r-- | queue-6.8/mm-turn-folio_test_hugetlb-into-a-pagetype.patch | 239 | ||||
-rw-r--r-- | queue-6.8/mm-zswap-fix-shrinker-null-crash-with-cgroup_disable-memory.patch | 85 | ||||
-rw-r--r-- | queue-6.8/series | 2 |
3 files changed, 326 insertions, 0 deletions
diff --git a/queue-6.8/mm-turn-folio_test_hugetlb-into-a-pagetype.patch b/queue-6.8/mm-turn-folio_test_hugetlb-into-a-pagetype.patch new file mode 100644 index 0000000000..d85186b21f --- /dev/null +++ b/queue-6.8/mm-turn-folio_test_hugetlb-into-a-pagetype.patch @@ -0,0 +1,239 @@ +From d99e3140a4d33e26066183ff727d8f02f56bec64 Mon Sep 17 00:00:00 2001 +From: "Matthew Wilcox (Oracle)" <willy@infradead.org> +Date: Thu, 21 Mar 2024 14:24:43 +0000 +Subject: mm: turn folio_test_hugetlb into a PageType + +From: Matthew Wilcox (Oracle) <willy@infradead.org> + +commit d99e3140a4d33e26066183ff727d8f02f56bec64 upstream. + +The current folio_test_hugetlb() can be fooled by a concurrent folio split +into returning true for a folio which has never belonged to hugetlbfs. +This can't happen if the caller holds a refcount on it, but we have a few +places (memory-failure, compaction, procfs) which do not and should not +take a speculative reference. + +Since hugetlb pages do not use individual page mapcounts (they are always +fully mapped and use the entire_mapcount field to record the number of +mappings), the PageType field is available now that page_mapcount() +ignores the value in this field. + +In compaction and with CONFIG_DEBUG_VM enabled, the current implementation +can result in an oops, as reported by Luis. This happens since 9c5ccf2db04b +("mm: remove HUGETLB_PAGE_DTOR") effectively added some VM_BUG_ON() checks +in the PageHuge() testing path. + +[willy@infradead.org: update vmcoreinfo] + Link: https://lkml.kernel.org/r/ZgGZUvsdhaT1Va-T@casper.infradead.org +Link: https://lkml.kernel.org/r/20240321142448.1645400-6-willy@infradead.org +Fixes: 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR") +Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> +Reviewed-by: David Hildenbrand <david@redhat.com> +Acked-by: Vlastimil Babka <vbabka@suse.cz> +Reported-by: Luis Chamberlain <mcgrof@kernel.org> +Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218227 +Cc: Miaohe Lin <linmiaohe@huawei.com> +Cc: Muchun Song <muchun.song@linux.dev> +Cc: Oscar Salvador <osalvador@suse.de> +Cc: <stable@vger.kernel.org> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/linux/page-flags.h | 70 +++++++++++++++++++---------------------- + include/trace/events/mmflags.h | 1 + kernel/crash_core.c | 5 +- + mm/hugetlb.c | 22 +----------- + 4 files changed, 39 insertions(+), 59 deletions(-) + +--- a/include/linux/page-flags.h ++++ b/include/linux/page-flags.h +@@ -190,7 +190,6 @@ enum pageflags { + + /* At least one page in this folio has the hwpoison flag set */ + PG_has_hwpoisoned = PG_error, +- PG_hugetlb = PG_active, + PG_large_rmappable = PG_workingset, /* anon or file-backed */ + }; + +@@ -850,29 +849,6 @@ TESTPAGEFLAG_FALSE(LargeRmappable, large + + #define PG_head_mask ((1UL << PG_head)) + +-#ifdef CONFIG_HUGETLB_PAGE +-int PageHuge(struct page *page); +-SETPAGEFLAG(HugeTLB, hugetlb, PF_SECOND) +-CLEARPAGEFLAG(HugeTLB, hugetlb, PF_SECOND) +- +-/** +- * folio_test_hugetlb - Determine if the folio belongs to hugetlbfs +- * @folio: The folio to test. +- * +- * Context: Any context. Caller should have a reference on the folio to +- * prevent it from being turned into a tail page. +- * Return: True for hugetlbfs folios, false for anon folios or folios +- * belonging to other filesystems. +- */ +-static inline bool folio_test_hugetlb(struct folio *folio) +-{ +- return folio_test_large(folio) && +- test_bit(PG_hugetlb, folio_flags(folio, 1)); +-} +-#else +-TESTPAGEFLAG_FALSE(Huge, hugetlb) +-#endif +- + #ifdef CONFIG_TRANSPARENT_HUGEPAGE + /* + * PageHuge() only returns true for hugetlbfs pages, but not for +@@ -929,18 +905,6 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpois + #endif + + /* +- * Check if a page is currently marked HWPoisoned. Note that this check is +- * best effort only and inherently racy: there is no way to synchronize with +- * failing hardware. +- */ +-static inline bool is_page_hwpoison(struct page *page) +-{ +- if (PageHWPoison(page)) +- return true; +- return PageHuge(page) && PageHWPoison(compound_head(page)); +-} +- +-/* + * For pages that are never mapped to userspace (and aren't PageSlab), + * page_type may be used. Because it is initialised to -1, we invert the + * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and +@@ -956,6 +920,7 @@ static inline bool is_page_hwpoison(stru + #define PG_offline 0x00000100 + #define PG_table 0x00000200 + #define PG_guard 0x00000400 ++#define PG_hugetlb 0x00000800 + + #define PageType(page, flag) \ + ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) +@@ -1050,6 +1015,37 @@ PAGE_TYPE_OPS(Table, table, pgtable) + */ + PAGE_TYPE_OPS(Guard, guard, guard) + ++#ifdef CONFIG_HUGETLB_PAGE ++FOLIO_TYPE_OPS(hugetlb, hugetlb) ++#else ++FOLIO_TEST_FLAG_FALSE(hugetlb) ++#endif ++ ++/** ++ * PageHuge - Determine if the page belongs to hugetlbfs ++ * @page: The page to test. ++ * ++ * Context: Any context. ++ * Return: True for hugetlbfs pages, false for anon pages or pages ++ * belonging to other filesystems. ++ */ ++static inline bool PageHuge(const struct page *page) ++{ ++ return folio_test_hugetlb(page_folio(page)); ++} ++ ++/* ++ * Check if a page is currently marked HWPoisoned. Note that this check is ++ * best effort only and inherently racy: there is no way to synchronize with ++ * failing hardware. ++ */ ++static inline bool is_page_hwpoison(struct page *page) ++{ ++ if (PageHWPoison(page)) ++ return true; ++ return PageHuge(page) && PageHWPoison(compound_head(page)); ++} ++ + extern bool is_free_buddy_page(struct page *page); + + PAGEFLAG(Isolated, isolated, PF_ANY); +@@ -1116,7 +1112,7 @@ static __always_inline void __ClearPageA + */ + #define PAGE_FLAGS_SECOND \ + (0xffUL /* order */ | 1UL << PG_has_hwpoisoned | \ +- 1UL << PG_hugetlb | 1UL << PG_large_rmappable) ++ 1UL << PG_large_rmappable) + + #define PAGE_FLAGS_PRIVATE \ + (1UL << PG_private | 1UL << PG_private_2) +--- a/include/trace/events/mmflags.h ++++ b/include/trace/events/mmflags.h +@@ -135,6 +135,7 @@ IF_HAVE_PG_ARCH_X(arch_3) + #define DEF_PAGETYPE_NAME(_name) { PG_##_name, __stringify(_name) } + + #define __def_pagetype_names \ ++ DEF_PAGETYPE_NAME(hugetlb), \ + DEF_PAGETYPE_NAME(offline), \ + DEF_PAGETYPE_NAME(guard), \ + DEF_PAGETYPE_NAME(table), \ +--- a/kernel/crash_core.c ++++ b/kernel/crash_core.c +@@ -814,11 +814,10 @@ static int __init crash_save_vmcoreinfo_ + VMCOREINFO_NUMBER(PG_head_mask); + #define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy) + VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); +-#ifdef CONFIG_HUGETLB_PAGE +- VMCOREINFO_NUMBER(PG_hugetlb); ++#define PAGE_HUGETLB_MAPCOUNT_VALUE (~PG_hugetlb) ++ VMCOREINFO_NUMBER(PAGE_HUGETLB_MAPCOUNT_VALUE); + #define PAGE_OFFLINE_MAPCOUNT_VALUE (~PG_offline) + VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE); +-#endif + + #ifdef CONFIG_KALLSYMS + VMCOREINFO_SYMBOL(kallsyms_names); +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -1623,7 +1623,7 @@ static inline void __clear_hugetlb_destr + { + lockdep_assert_held(&hugetlb_lock); + +- folio_clear_hugetlb(folio); ++ __folio_clear_hugetlb(folio); + } + + /* +@@ -1710,7 +1710,7 @@ static void add_hugetlb_folio(struct hst + h->surplus_huge_pages_node[nid]++; + } + +- folio_set_hugetlb(folio); ++ __folio_set_hugetlb(folio); + folio_change_private(folio, NULL); + /* + * We have to set hugetlb_vmemmap_optimized again as above +@@ -2048,7 +2048,7 @@ static void __prep_account_new_huge_page + + static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio) + { +- folio_set_hugetlb(folio); ++ __folio_set_hugetlb(folio); + INIT_LIST_HEAD(&folio->lru); + hugetlb_set_folio_subpool(folio, NULL); + set_hugetlb_cgroup(folio, NULL); +@@ -2159,22 +2159,6 @@ static bool prep_compound_gigantic_folio + } + + /* +- * PageHuge() only returns true for hugetlbfs pages, but not for normal or +- * transparent huge pages. See the PageTransHuge() documentation for more +- * details. +- */ +-int PageHuge(struct page *page) +-{ +- struct folio *folio; +- +- if (!PageCompound(page)) +- return 0; +- folio = page_folio(page); +- return folio_test_hugetlb(folio); +-} +-EXPORT_SYMBOL_GPL(PageHuge); +- +-/* + * Find and lock address space (mapping) in write mode. + * + * Upon entry, the page is locked which means that page_mapping() is diff --git a/queue-6.8/mm-zswap-fix-shrinker-null-crash-with-cgroup_disable-memory.patch b/queue-6.8/mm-zswap-fix-shrinker-null-crash-with-cgroup_disable-memory.patch new file mode 100644 index 0000000000..f49c2fb3a9 --- /dev/null +++ b/queue-6.8/mm-zswap-fix-shrinker-null-crash-with-cgroup_disable-memory.patch @@ -0,0 +1,85 @@ +From 682886ec69d22363819a83ddddd5d66cb5c791e1 Mon Sep 17 00:00:00 2001 +From: Johannes Weiner <hannes@cmpxchg.org> +Date: Thu, 18 Apr 2024 08:26:28 -0400 +Subject: mm: zswap: fix shrinker NULL crash with cgroup_disable=memory + +From: Johannes Weiner <hannes@cmpxchg.org> + +commit 682886ec69d22363819a83ddddd5d66cb5c791e1 upstream. + +Christian reports a NULL deref in zswap that he bisected down to the zswap +shrinker. The issue also cropped up in the bug trackers of libguestfs [1] +and the Red Hat bugzilla [2]. + +The problem is that when memcg is disabled with the boot time flag, the +zswap shrinker might get called with sc->memcg == NULL. This is okay in +many places, like the lruvec operations. But it crashes in +memcg_page_state() - which is only used due to the non-node accounting of +cgroup's the zswap memory to begin with. + +Nhat spotted that the memcg can be NULL in the memcg-disabled case, and I +was then able to reproduce the crash locally as well. + +[1] https://github.com/libguestfs/libguestfs/issues/139 +[2] https://bugzilla.redhat.com/show_bug.cgi?id=2275252 + +Link: https://lkml.kernel.org/r/20240418124043.GC1055428@cmpxchg.org +Link: https://lkml.kernel.org/r/20240417143324.GA1055428@cmpxchg.org +Fixes: b5ba474f3f51 ("zswap: shrink zswap pool based on memory pressure") +Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> +Reported-by: Christian Heusel <christian@heusel.eu> +Debugged-by: Nhat Pham <nphamcs@gmail.com> +Suggested-by: Nhat Pham <nphamcs@gmail.com> +Tested-by: Christian Heusel <christian@heusel.eu> +Acked-by: Yosry Ahmed <yosryahmed@google.com> +Cc: Chengming Zhou <chengming.zhou@linux.dev> +Cc: Dan Streetman <ddstreet@ieee.org> +Cc: Richard W.M. Jones <rjones@redhat.com> +Cc: Seth Jennings <sjenning@redhat.com> +Cc: Vitaly Wool <vitaly.wool@konsulko.com> +Cc: <stable@vger.kernel.org> [v6.8] +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + mm/zswap.c | 25 ++++++++++++++++--------- + 1 file changed, 16 insertions(+), 9 deletions(-) + +Two minor conflicts in the else branch: +- zswap_pool_total_size was get_zswap_pool_size() in 6.8 +- zswap_nr_stored was pool->nr_stored in 6.8 + +--- a/mm/zswap.c ++++ b/mm/zswap.c +@@ -653,15 +653,22 @@ static unsigned long zswap_shrinker_coun + if (!gfp_has_io_fs(sc->gfp_mask)) + return 0; + +-#ifdef CONFIG_MEMCG_KMEM +- mem_cgroup_flush_stats(memcg); +- nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT; +- nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED); +-#else +- /* use pool stats instead of memcg stats */ +- nr_backing = get_zswap_pool_size(pool) >> PAGE_SHIFT; +- nr_stored = atomic_read(&pool->nr_stored); +-#endif ++ /* ++ * For memcg, use the cgroup-wide ZSWAP stats since we don't ++ * have them per-node and thus per-lruvec. Careful if memcg is ++ * runtime-disabled: we can get sc->memcg == NULL, which is ok ++ * for the lruvec, but not for memcg_page_state(). ++ * ++ * Without memcg, use the zswap pool-wide metrics. ++ */ ++ if (!mem_cgroup_disabled()) { ++ mem_cgroup_flush_stats(memcg); ++ nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT; ++ nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED); ++ } else { ++ nr_backing = get_zswap_pool_size(pool) >> PAGE_SHIFT; ++ nr_stored = atomic_read(&pool->nr_stored); ++ } + + if (!nr_stored) + return 0; diff --git a/queue-6.8/series b/queue-6.8/series index acb9d05f04..a5d633112e 100644 --- a/queue-6.8/series +++ b/queue-6.8/series @@ -195,3 +195,5 @@ mtd-diskonchip-work-around-ubsan-link-failure.patch firmware-qcom-uefisecapp-fix-memory-related-io-errors-and-crashes.patch phy-qcom-qmp-combo-fix-register-base-for-qserdes_dp_phy_mode.patch phy-qcom-qmp-combo-fix-vco-div-offset-on-v3.patch +mm-turn-folio_test_hugetlb-into-a-pagetype.patch +mm-zswap-fix-shrinker-null-crash-with-cgroup_disable-memory.patch |