aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorAndrea Arcangeli <aarcange@redhat.com>2023-06-11 21:44:30 -0400
committerAndrea Arcangeli <aarcange@redhat.com>2023-11-11 22:03:36 -0500
commitfbaf87de245cf84f6b21e5b0510986c6b0ea5b5e (patch)
tree5744abcfe5a32274711cc0640163e7bf75fd0d2e
parenta7ec17d378d0ec8d33b6d2a0d31ac107718662bb (diff)
downloadaa-fbaf87de245cf84f6b21e5b0510986c6b0ea5b5e.tar.gz
Revert "mm: thp: avoid false positive copies in fork() with virtually splitted THPs"
This reverts commit fe106e2a3657ebd80571119578e6fe43f3b74ef1. This worked, but it's still experimental. Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
-rw-r--r--include/linux/mm.h2
-rw-r--r--mm/huge_memory.c4
-rw-r--r--mm/hugetlb.c2
-rw-r--r--mm/memory.c150
-rw-r--r--mm/util.c14
5 files changed, 19 insertions, 153 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 92729d1da17e7e..74160a09e6886d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1343,7 +1343,7 @@ static inline bool is_cow_mapping(vm_flags_t flags)
extern bool page_needs_cow_for_dma(struct vm_area_struct *vma,
struct page *page,
- bool compound, int mappings);
+ bool compound);
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
#define SECTION_IN_PAGE_FLAGS
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9d8b85831cee86..10101a10747dc2 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1105,7 +1105,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
* best effort that the pinned pages won't be replaced by another
* random page during the coming copy-on-write.
*/
- if (unlikely(page_needs_cow_for_dma(src_vma, src_page, true, 1))) {
+ if (unlikely(page_needs_cow_for_dma(src_vma, src_page, true))) {
pte_free(dst_mm, pgtable);
spin_unlock(src_ptl);
spin_unlock(dst_ptl);
@@ -1219,7 +1219,7 @@ int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
}
/* Please refer to comments in copy_huge_pmd() */
- if (unlikely(page_needs_cow_for_dma(vma, pud_page(pud), true, 1))) {
+ if (unlikely(page_needs_cow_for_dma(vma, pud_page(pud), true))) {
spin_unlock(src_ptl);
spin_unlock(dst_ptl);
__split_huge_pud(vma, src_pud, addr);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 305908ab04f92c..2552484e9dd46f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4384,7 +4384,7 @@ again:
* sleep during the process.
*/
if (unlikely(page_needs_cow_for_dma(vma, ptepage,
- true, 1))) {
+ true))) {
pte_t src_pte_old = entry;
struct page *new;
diff --git a/mm/memory.c b/mm/memory.c
index 2e44362739e588..aa3bb5df701b05 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -883,8 +883,7 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
static inline int
copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
pte_t *dst_pte, pte_t *src_pte, unsigned long addr, int *rss,
- struct page **prealloc, pte_t pte, struct page *page,
- int mappings)
+ struct page **prealloc, pte_t pte, struct page *page)
{
struct page *new_page;
@@ -901,7 +900,7 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
* the page count. That might give false positives for
* for pinning, but it will work correctly.
*/
- if (likely(!page_needs_cow_for_dma(src_vma, page, false, mappings)))
+ if (likely(!page_needs_cow_for_dma(src_vma, page, false)))
return 1;
new_page = *prealloc;
@@ -930,125 +929,13 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
}
/*
- * How many times this THP is mapped in this mm. It must hold the
- * mmap_lock for writing to prevent the ptes to change or it will not
- * return a stable value. It's safe to return a value lower than the
- * mappings in the current mm. It's not safe to return a value higher
- * than the mappings.
- *
- * NOTE: unlike fork(), the COW fault cannot call this to avoid the
- * false positive copies on splitted THP mappings.
- */
-static inline int count_thp_mappings(struct mm_struct *mm,
- pte_t *src_pte,
- unsigned long addr)
-{
- unsigned long pfn = pte_pfn(*src_pte);
- unsigned int count, offset;
- unsigned long start, end, pstart, pend;
- pte_t *orig_src_pte = src_pte;
- int found;
- pmd_t *pmd;
-
- mmap_assert_write_locked(mm);
-
- /* go beyond vmas, but stay within the pte */
- start = addr & HPAGE_PMD_MASK;
- end = start + HPAGE_PMD_SIZE;
-
- /* pfn range of the THP */
- pstart = pfn & ~(HPAGE_PMD_NR-1);
- pend = pstart + HPAGE_PMD_NR;
-
- offset = (addr - start) >> PAGE_SHIFT;
- offset = min(offset, (unsigned int) (pfn - pstart));
- VM_BUG_ON(offset >= HPAGE_PMD_NR);
-
- pfn -= offset;
- src_pte -= offset;
- found = count = 0;
- for (addr = addr - (offset << PAGE_SHIFT);
- addr < end && pfn < pend;
- addr += PAGE_SIZE, pfn++, src_pte++) {
- VM_BUG_ON(addr < start);
- VM_BUG_ON(addr >= end);
- VM_BUG_ON(pfn < pstart);
- VM_BUG_ON(pfn >= pend);
-
- if (orig_src_pte == src_pte)
- found++;
-
- if (pte_present(*src_pte) && pte_pfn(*src_pte) == pfn)
- count++;
- }
- VM_BUG_ON(found != 1);
- VM_BUG_ON(count < 1);
-
- if (pfn == pend) {
- /* the thp starts before "start" */
- if (addr != end) {
- addr -= HPAGE_PMD_SIZE;
- pfn -= HPAGE_PMD_NR;
-
- end -= HPAGE_PMD_SIZE;
- start = HPAGE_PMD_SIZE;
- pmd = mm_find_pmd(mm, addr);
- if (!pmd)
- goto out;
- src_pte = pte_offset_map(pmd, addr);
- for (; addr < end && pfn < pend;
- addr += PAGE_SIZE, pfn++, src_pte++) {
- VM_BUG_ON(addr < start);
- VM_BUG_ON(addr >= end);
- VM_BUG_ON(pfn < pstart);
- VM_BUG_ON(pfn >= pend);
-
- VM_BUG_ON(orig_src_pte == src_pte);
-
- if (pte_present(*src_pte) &&
- pte_pfn(*src_pte) == pfn)
- count++;
- }
- pte_unmap(src_pte);
- }
- } else {
- VM_BUG_ON(addr != end);
-
- /* the thp ends after "end" */
- pmd = mm_find_pmd(mm, addr);
- if (!pmd)
- goto out;
- start += HPAGE_PMD_SIZE;
- end += HPAGE_PMD_SIZE;
- src_pte = pte_offset_map(pmd, addr);
- for (; addr < end && pfn < pend;
- addr += PAGE_SIZE, pfn++, src_pte++) {
- VM_BUG_ON(addr < start);
- VM_BUG_ON(addr >= end);
- VM_BUG_ON(pfn < pstart);
- VM_BUG_ON(pfn >= pend);
-
- VM_BUG_ON(orig_src_pte == src_pte);
-
- if (pte_present(*src_pte) && pte_pfn(*src_pte) == pfn)
- count++;
- }
- pte_unmap(src_pte);
- }
-
-out:
- return count;
-}
-
-/*
* Copy one pte. Returns 0 if succeeded, or -EAGAIN if one preallocated page
* is required to copy this pte.
*/
static inline int
copy_present_pte(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
pte_t *dst_pte, pte_t *src_pte, unsigned long addr, int *rss,
- struct page **prealloc,
- int *last_thp_mappings, struct page **last_thp)
+ struct page **prealloc)
{
struct mm_struct *src_mm = src_vma->vm_mm;
unsigned long vm_flags = src_vma->vm_flags;
@@ -1057,26 +944,10 @@ copy_present_pte(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
page = vm_normal_page(src_vma, addr, pte);
if (page) {
- int retval, mappings;
- struct page *head;
-
- mappings = 0;
- head = compound_head(page);
- if (PageTransHuge(head)) {
- mappings = *last_thp_mappings;
- if (!mappings || head != *last_thp) {
- mappings = count_thp_mappings(src_mm,
- src_pte, addr);
- *last_thp_mappings = mappings;
- *last_thp = head;
- VM_BUG_ON_PAGE(page_count(head) < mappings,
- page);
- }
- }
+ int retval;
retval = copy_present_page(dst_vma, src_vma, dst_pte, src_pte,
- addr, rss, prealloc, pte, page,
- mappings ? : 1);
+ addr, rss, prealloc, pte, page);
if (retval <= 0)
return retval;
@@ -1131,8 +1002,7 @@ page_copy_prealloc(struct mm_struct *src_mm, struct vm_area_struct *vma,
static int
copy_pte_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
- unsigned long end,
- int *last_thp_mappings, struct page **last_thp)
+ unsigned long end)
{
struct mm_struct *dst_mm = dst_vma->vm_mm;
struct mm_struct *src_mm = src_vma->vm_mm;
@@ -1198,8 +1068,7 @@ again:
}
/* copy_present_pte() will clear `*prealloc' if consumed */
ret = copy_present_pte(dst_vma, src_vma, dst_pte, src_pte,
- addr, rss, &prealloc,
- last_thp_mappings, last_thp);
+ addr, rss, &prealloc);
/*
* If we need a pre-allocated page for this pte, drop the
* locks, allocate, and try again.
@@ -1263,8 +1132,6 @@ copy_pmd_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
struct mm_struct *src_mm = src_vma->vm_mm;
pmd_t *src_pmd, *dst_pmd;
unsigned long next;
- int last_thp_mappings = 0;
- struct page *last_thp;
dst_pmd = pmd_alloc(dst_mm, dst_pud, addr);
if (!dst_pmd)
@@ -1287,8 +1154,7 @@ copy_pmd_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
if (pmd_none_or_clear_bad(src_pmd))
continue;
if (copy_pte_range(dst_vma, src_vma, dst_pmd, src_pmd,
- addr, next,
- &last_thp_mappings, &last_thp))
+ addr, next))
return -ENOMEM;
} while (dst_pmd++, src_pmd++, addr = next, addr != end);
return 0;
diff --git a/mm/util.c b/mm/util.c
index 6199a69001b7d8..f819bff52a8c75 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -794,7 +794,7 @@ struct anon_vma *page_anon_vma(struct page *page)
* should break the cow immediately for a page on the src mm.
*/
bool page_needs_cow_for_dma(struct vm_area_struct *vma, struct page *page,
- bool compound, int mappings)
+ bool compound)
{
bool copy;
int val;
@@ -816,12 +816,12 @@ bool page_needs_cow_for_dma(struct vm_area_struct *vma, struct page *page,
return false;
/*
- * If page_count is == mappings there cannot be any GUP pin
- * and further GUP pins are prevented with write_protect_seq.
+ * If page_count is == 1 there cannot be any GUP pin and
+ * further GUP pins are prevented with write_protect_seq.
*/
val = page_count(page);
- VM_WARN_ON_ONCE_PAGE(val < mappings, page);
- if (val == mappings)
+ VM_WARN_ON_ONCE_PAGE(val < 1, page);
+ if (val == 1)
return false;
/*
@@ -842,8 +842,8 @@ bool page_needs_cow_for_dma(struct vm_area_struct *vma, struct page *page,
copy = true;
if (PageSwapCache(page) && trylock_page(page)) {
val = page_count(page) - PageSwapCache(page);
- VM_WARN_ON_ONCE_PAGE(val < mappings, page);
- copy = val != mappings;
+ VM_WARN_ON_ONCE_PAGE(val < 1, page);
+ copy = val != 1;
unlock_page(page);
}