diff options
author | Andrea Arcangeli <aarcange@redhat.com> | 2023-06-11 21:44:30 -0400 |
---|---|---|
committer | Andrea Arcangeli <aarcange@redhat.com> | 2023-11-11 22:03:36 -0500 |
commit | fbaf87de245cf84f6b21e5b0510986c6b0ea5b5e (patch) | |
tree | 5744abcfe5a32274711cc0640163e7bf75fd0d2e | |
parent | a7ec17d378d0ec8d33b6d2a0d31ac107718662bb (diff) | |
download | aa-fbaf87de245cf84f6b21e5b0510986c6b0ea5b5e.tar.gz |
Revert "mm: thp: avoid false positive copies in fork() with virtually splitted THPs"
This reverts commit fe106e2a3657ebd80571119578e6fe43f3b74ef1.
This worked, but it's still experimental.
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
-rw-r--r-- | include/linux/mm.h | 2 | ||||
-rw-r--r-- | mm/huge_memory.c | 4 | ||||
-rw-r--r-- | mm/hugetlb.c | 2 | ||||
-rw-r--r-- | mm/memory.c | 150 | ||||
-rw-r--r-- | mm/util.c | 14 |
5 files changed, 19 insertions, 153 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index 92729d1da17e7e..74160a09e6886d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1343,7 +1343,7 @@ static inline bool is_cow_mapping(vm_flags_t flags) extern bool page_needs_cow_for_dma(struct vm_area_struct *vma, struct page *page, - bool compound, int mappings); + bool compound); #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define SECTION_IN_PAGE_FLAGS diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 9d8b85831cee86..10101a10747dc2 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1105,7 +1105,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, * best effort that the pinned pages won't be replaced by another * random page during the coming copy-on-write. */ - if (unlikely(page_needs_cow_for_dma(src_vma, src_page, true, 1))) { + if (unlikely(page_needs_cow_for_dma(src_vma, src_page, true))) { pte_free(dst_mm, pgtable); spin_unlock(src_ptl); spin_unlock(dst_ptl); @@ -1219,7 +1219,7 @@ int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm, } /* Please refer to comments in copy_huge_pmd() */ - if (unlikely(page_needs_cow_for_dma(vma, pud_page(pud), true, 1))) { + if (unlikely(page_needs_cow_for_dma(vma, pud_page(pud), true))) { spin_unlock(src_ptl); spin_unlock(dst_ptl); __split_huge_pud(vma, src_pud, addr); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 305908ab04f92c..2552484e9dd46f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4384,7 +4384,7 @@ again: * sleep during the process. */ if (unlikely(page_needs_cow_for_dma(vma, ptepage, - true, 1))) { + true))) { pte_t src_pte_old = entry; struct page *new; diff --git a/mm/memory.c b/mm/memory.c index 2e44362739e588..aa3bb5df701b05 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -883,8 +883,7 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, static inline int copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, pte_t *dst_pte, pte_t *src_pte, unsigned long addr, int *rss, - struct page **prealloc, pte_t pte, struct page *page, - int mappings) + struct page **prealloc, pte_t pte, struct page *page) { struct page *new_page; @@ -901,7 +900,7 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma * the page count. That might give false positives for * for pinning, but it will work correctly. */ - if (likely(!page_needs_cow_for_dma(src_vma, page, false, mappings))) + if (likely(!page_needs_cow_for_dma(src_vma, page, false))) return 1; new_page = *prealloc; @@ -930,125 +929,13 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma } /* - * How many times this THP is mapped in this mm. It must hold the - * mmap_lock for writing to prevent the ptes to change or it will not - * return a stable value. It's safe to return a value lower than the - * mappings in the current mm. It's not safe to return a value higher - * than the mappings. - * - * NOTE: unlike fork(), the COW fault cannot call this to avoid the - * false positive copies on splitted THP mappings. - */ -static inline int count_thp_mappings(struct mm_struct *mm, - pte_t *src_pte, - unsigned long addr) -{ - unsigned long pfn = pte_pfn(*src_pte); - unsigned int count, offset; - unsigned long start, end, pstart, pend; - pte_t *orig_src_pte = src_pte; - int found; - pmd_t *pmd; - - mmap_assert_write_locked(mm); - - /* go beyond vmas, but stay within the pte */ - start = addr & HPAGE_PMD_MASK; - end = start + HPAGE_PMD_SIZE; - - /* pfn range of the THP */ - pstart = pfn & ~(HPAGE_PMD_NR-1); - pend = pstart + HPAGE_PMD_NR; - - offset = (addr - start) >> PAGE_SHIFT; - offset = min(offset, (unsigned int) (pfn - pstart)); - VM_BUG_ON(offset >= HPAGE_PMD_NR); - - pfn -= offset; - src_pte -= offset; - found = count = 0; - for (addr = addr - (offset << PAGE_SHIFT); - addr < end && pfn < pend; - addr += PAGE_SIZE, pfn++, src_pte++) { - VM_BUG_ON(addr < start); - VM_BUG_ON(addr >= end); - VM_BUG_ON(pfn < pstart); - VM_BUG_ON(pfn >= pend); - - if (orig_src_pte == src_pte) - found++; - - if (pte_present(*src_pte) && pte_pfn(*src_pte) == pfn) - count++; - } - VM_BUG_ON(found != 1); - VM_BUG_ON(count < 1); - - if (pfn == pend) { - /* the thp starts before "start" */ - if (addr != end) { - addr -= HPAGE_PMD_SIZE; - pfn -= HPAGE_PMD_NR; - - end -= HPAGE_PMD_SIZE; - start = HPAGE_PMD_SIZE; - pmd = mm_find_pmd(mm, addr); - if (!pmd) - goto out; - src_pte = pte_offset_map(pmd, addr); - for (; addr < end && pfn < pend; - addr += PAGE_SIZE, pfn++, src_pte++) { - VM_BUG_ON(addr < start); - VM_BUG_ON(addr >= end); - VM_BUG_ON(pfn < pstart); - VM_BUG_ON(pfn >= pend); - - VM_BUG_ON(orig_src_pte == src_pte); - - if (pte_present(*src_pte) && - pte_pfn(*src_pte) == pfn) - count++; - } - pte_unmap(src_pte); - } - } else { - VM_BUG_ON(addr != end); - - /* the thp ends after "end" */ - pmd = mm_find_pmd(mm, addr); - if (!pmd) - goto out; - start += HPAGE_PMD_SIZE; - end += HPAGE_PMD_SIZE; - src_pte = pte_offset_map(pmd, addr); - for (; addr < end && pfn < pend; - addr += PAGE_SIZE, pfn++, src_pte++) { - VM_BUG_ON(addr < start); - VM_BUG_ON(addr >= end); - VM_BUG_ON(pfn < pstart); - VM_BUG_ON(pfn >= pend); - - VM_BUG_ON(orig_src_pte == src_pte); - - if (pte_present(*src_pte) && pte_pfn(*src_pte) == pfn) - count++; - } - pte_unmap(src_pte); - } - -out: - return count; -} - -/* * Copy one pte. Returns 0 if succeeded, or -EAGAIN if one preallocated page * is required to copy this pte. */ static inline int copy_present_pte(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, pte_t *dst_pte, pte_t *src_pte, unsigned long addr, int *rss, - struct page **prealloc, - int *last_thp_mappings, struct page **last_thp) + struct page **prealloc) { struct mm_struct *src_mm = src_vma->vm_mm; unsigned long vm_flags = src_vma->vm_flags; @@ -1057,26 +944,10 @@ copy_present_pte(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, page = vm_normal_page(src_vma, addr, pte); if (page) { - int retval, mappings; - struct page *head; - - mappings = 0; - head = compound_head(page); - if (PageTransHuge(head)) { - mappings = *last_thp_mappings; - if (!mappings || head != *last_thp) { - mappings = count_thp_mappings(src_mm, - src_pte, addr); - *last_thp_mappings = mappings; - *last_thp = head; - VM_BUG_ON_PAGE(page_count(head) < mappings, - page); - } - } + int retval; retval = copy_present_page(dst_vma, src_vma, dst_pte, src_pte, - addr, rss, prealloc, pte, page, - mappings ? : 1); + addr, rss, prealloc, pte, page); if (retval <= 0) return retval; @@ -1131,8 +1002,7 @@ page_copy_prealloc(struct mm_struct *src_mm, struct vm_area_struct *vma, static int copy_pte_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, - unsigned long end, - int *last_thp_mappings, struct page **last_thp) + unsigned long end) { struct mm_struct *dst_mm = dst_vma->vm_mm; struct mm_struct *src_mm = src_vma->vm_mm; @@ -1198,8 +1068,7 @@ again: } /* copy_present_pte() will clear `*prealloc' if consumed */ ret = copy_present_pte(dst_vma, src_vma, dst_pte, src_pte, - addr, rss, &prealloc, - last_thp_mappings, last_thp); + addr, rss, &prealloc); /* * If we need a pre-allocated page for this pte, drop the * locks, allocate, and try again. @@ -1263,8 +1132,6 @@ copy_pmd_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, struct mm_struct *src_mm = src_vma->vm_mm; pmd_t *src_pmd, *dst_pmd; unsigned long next; - int last_thp_mappings = 0; - struct page *last_thp; dst_pmd = pmd_alloc(dst_mm, dst_pud, addr); if (!dst_pmd) @@ -1287,8 +1154,7 @@ copy_pmd_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, if (pmd_none_or_clear_bad(src_pmd)) continue; if (copy_pte_range(dst_vma, src_vma, dst_pmd, src_pmd, - addr, next, - &last_thp_mappings, &last_thp)) + addr, next)) return -ENOMEM; } while (dst_pmd++, src_pmd++, addr = next, addr != end); return 0; diff --git a/mm/util.c b/mm/util.c index 6199a69001b7d8..f819bff52a8c75 100644 --- a/mm/util.c +++ b/mm/util.c @@ -794,7 +794,7 @@ struct anon_vma *page_anon_vma(struct page *page) * should break the cow immediately for a page on the src mm. */ bool page_needs_cow_for_dma(struct vm_area_struct *vma, struct page *page, - bool compound, int mappings) + bool compound) { bool copy; int val; @@ -816,12 +816,12 @@ bool page_needs_cow_for_dma(struct vm_area_struct *vma, struct page *page, return false; /* - * If page_count is == mappings there cannot be any GUP pin - * and further GUP pins are prevented with write_protect_seq. + * If page_count is == 1 there cannot be any GUP pin and + * further GUP pins are prevented with write_protect_seq. */ val = page_count(page); - VM_WARN_ON_ONCE_PAGE(val < mappings, page); - if (val == mappings) + VM_WARN_ON_ONCE_PAGE(val < 1, page); + if (val == 1) return false; /* @@ -842,8 +842,8 @@ bool page_needs_cow_for_dma(struct vm_area_struct *vma, struct page *page, copy = true; if (PageSwapCache(page) && trylock_page(page)) { val = page_count(page) - PageSwapCache(page); - VM_WARN_ON_ONCE_PAGE(val < mappings, page); - copy = val != mappings; + VM_WARN_ON_ONCE_PAGE(val < 1, page); + copy = val != 1; unlock_page(page); } |