summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Morton <akpm@linux-foundation.org>2024-04-10 14:00:57 -0700
committerAndrew Morton <akpm@linux-foundation.org>2024-04-10 14:00:57 -0700
commit1ff2532493b1490f6e99fed1cd9b9fdc425abcaa (patch)
tree5b5f8202145d4dfc25dfc60a5b2a22e378be3e95
parentf92a3cca4a66258bb53db53ec73b01f6e3a1abef (diff)
download25-new-1ff2532493b1490f6e99fed1cd9b9fdc425abcaa.tar.gz
foo
-rw-r--r--patches/mm-follow_pte-improvements.patch83
-rw-r--r--pc/mm-follow_pte-improvements.pc1
-rw-r--r--txt/mm-follow_pte-improvements.txt36
3 files changed, 120 insertions, 0 deletions
diff --git a/patches/mm-follow_pte-improvements.patch b/patches/mm-follow_pte-improvements.patch
new file mode 100644
index 000000000..d0ff503ea
--- /dev/null
+++ b/patches/mm-follow_pte-improvements.patch
@@ -0,0 +1,83 @@
+From: David Hildenbrand <david@redhat.com>
+Subject: mm: follow_pte() improvements
+Date: Wed, 10 Apr 2024 17:55:27 +0200
+
+follow_pte() is now our main function to lookup PTEs in VM_PFNMAP/VM_IO
+VMAs. Let's perform some more sanity checks to make this exported
+function harder to abuse.
+
+Further, extend the doc a bit, it still focuses on the KVM use case with
+MMU notifiers. Drop the KVM+follow_pfn() comment, follow_pfn() is no
+more, and we have other users nowadays.
+
+Also extend the doc regarding refcounted pages and the interaction with
+MMU notifiers.
+
+KVM is one example that uses MMU notifiers and can deal with refcounted
+pages properly. VFIO is one example that doesn't use MMU notifiers, and
+to prevent use-after-free, rejects refcounted pages: pfn_valid(pfn) &&
+!PageReserved(pfn_to_page(pfn)). Protection changes are less of a concern
+for users like VFIO: the behavior is similar to longterm-pinning a page,
+and getting the PTE protection changed afterwards.
+
+The primary concern with refcounted pages is use-after-free, which callers
+should be aware of.
+
+Link: https://lkml.kernel.org/r/20240410155527.474777-4-david@redhat.com
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Cc: Alex Williamson <alex.williamson@redhat.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Fei Li <fei1.li@intel.com>
+Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+Cc: Heiko Carstens <hca@linux.ibm.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Sean Christopherson <seanjc@google.com>
+Cc: Yonghua Huang <yonghua.huang@intel.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+
+ mm/memory.c | 20 +++++++++++++++-----
+ 1 file changed, 15 insertions(+), 5 deletions(-)
+
+--- a/mm/memory.c~mm-follow_pte-improvements
++++ a/mm/memory.c
+@@ -5933,15 +5933,21 @@ int __pmd_alloc(struct mm_struct *mm, pu
+ *
+ * On a successful return, the pointer to the PTE is stored in @ptepp;
+ * the corresponding lock is taken and its location is stored in @ptlp.
+- * The contents of the PTE are only stable until @ptlp is released;
+- * any further use, if any, must be protected against invalidation
+- * with MMU notifiers.
++ *
++ * The contents of the PTE are only stable until @ptlp is released using
++ * pte_unmap_unlock(). This function will fail if the PTE is non-present.
++ * Present PTEs may include PTEs that map refcounted pages, such as
++ * anonymous folios in COW mappings.
++ *
++ * Callers must be careful when relying on PTE content after
++ * pte_unmap_unlock(). Especially if the PTE maps a refcounted page,
++ * callers must protect against invalidation with MMU notifiers; otherwise
++ * access to the PFN at a later point in time can trigger use-after-free.
+ *
+ * Only IO mappings and raw PFN mappings are allowed. The mmap semaphore
+ * should be taken for read.
+ *
+- * KVM uses this function. While it is arguably less bad than the historic
+- * ``follow_pfn``, it is not a good general-purpose API.
++ * This function must not be used to modify PTE content.
+ *
+ * Return: zero on success, -ve otherwise.
+ */
+@@ -5955,6 +5961,10 @@ int follow_pte(struct vm_area_struct *vm
+ pmd_t *pmd;
+ pte_t *ptep;
+
++ mmap_assert_locked(mm);
++ if (unlikely(address < vma->vm_start || address >= vma->vm_end))
++ goto out;
++
+ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+ goto out;
+
+_
diff --git a/pc/mm-follow_pte-improvements.pc b/pc/mm-follow_pte-improvements.pc
new file mode 100644
index 000000000..cf949a50f
--- /dev/null
+++ b/pc/mm-follow_pte-improvements.pc
@@ -0,0 +1 @@
+mm/memory.c
diff --git a/txt/mm-follow_pte-improvements.txt b/txt/mm-follow_pte-improvements.txt
new file mode 100644
index 000000000..ab5ffc013
--- /dev/null
+++ b/txt/mm-follow_pte-improvements.txt
@@ -0,0 +1,36 @@
+From: David Hildenbrand <david@redhat.com>
+Subject: mm: follow_pte() improvements
+Date: Wed, 10 Apr 2024 17:55:27 +0200
+
+follow_pte() is now our main function to lookup PTEs in VM_PFNMAP/VM_IO
+VMAs. Let's perform some more sanity checks to make this exported
+function harder to abuse.
+
+Further, extend the doc a bit, it still focuses on the KVM use case with
+MMU notifiers. Drop the KVM+follow_pfn() comment, follow_pfn() is no
+more, and we have other users nowadays.
+
+Also extend the doc regarding refcounted pages and the interaction with
+MMU notifiers.
+
+KVM is one example that uses MMU notifiers and can deal with refcounted
+pages properly. VFIO is one example that doesn't use MMU notifiers, and
+to prevent use-after-free, rejects refcounted pages: pfn_valid(pfn) &&
+!PageReserved(pfn_to_page(pfn)). Protection changes are less of a concern
+for users like VFIO: the behavior is similar to longterm-pinning a page,
+and getting the PTE protection changed afterwards.
+
+The primary concern with refcounted pages is use-after-free, which callers
+should be aware of.
+
+Link: https://lkml.kernel.org/r/20240410155527.474777-4-david@redhat.com
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Cc: Alex Williamson <alex.williamson@redhat.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Fei Li <fei1.li@intel.com>
+Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+Cc: Heiko Carstens <hca@linux.ibm.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Sean Christopherson <seanjc@google.com>
+Cc: Yonghua Huang <yonghua.huang@intel.com>