summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBen Hutchings <ben@decadent.org.uk>2018-09-28 04:01:44 +0100
committerBen Hutchings <ben@decadent.org.uk>2018-09-28 04:49:54 +0100
commitac5ee8cb77fe9723357b26d1b605a5e6907d4607 (patch)
treedc46252790d2d3a6d232c417e6d01719d5845fb3
parent84e721a6cc8601a459500305fc8fe4c01905696d (diff)
downloadlinux-stable-queue-ac5ee8cb77fe9723357b26d1b605a5e6907d4607.tar.gz
Backport L1TF mitigation
-rw-r--r--queue-3.16/drm-drivers-add-support-for-using-the-arch-wc-mapping-api.patch157
-rw-r--r--queue-3.16/mm-add-vm_insert_pfn_prot.patch97
-rw-r--r--queue-3.16/mm-fix-cache-mode-tracking-in-vm_insert_mixed.patch59
-rw-r--r--queue-3.16/mm-pagewalk-remove-pgd_entry-and-pud_entry.patch74
-rw-r--r--queue-3.16/mm-x86-move-_page_swp_soft_dirty-from-bit-7-to-bit-1.patch97
-rw-r--r--queue-3.16/pagewalk-improve-vma-handling.patch341
-rw-r--r--queue-3.16/series31
-rw-r--r--queue-3.16/x86-bugs-move-the-l1tf-function-and-define-pr_fmt-properly.patch93
-rw-r--r--queue-3.16/x86-init-fix-build-with-config_swap-n.patch35
-rw-r--r--queue-3.16/x86-io-add-interface-to-reserve-io-memtype-for-a-resource-range.patch118
-rw-r--r--queue-3.16/x86-mm-add-pud-functions.patch51
-rw-r--r--queue-3.16/x86-mm-kmmio-make-the-tracer-robust-against-l1tf.patch66
-rw-r--r--queue-3.16/x86-mm-move-swap-offset-type-up-in-pte-to-work-around-erratum.patch104
-rw-r--r--queue-3.16/x86-mm-pat-make-set_memory_np-l1tf-safe.patch45
-rw-r--r--queue-3.16/x86-speculation-l1tf-add-sysfs-reporting-for-l1tf.patch235
-rw-r--r--queue-3.16/x86-speculation-l1tf-change-order-of-offset-type-in-swap-entry.patch108
-rw-r--r--queue-3.16/x86-speculation-l1tf-disallow-non-privileged-high-mmio-prot_none.patch274
-rw-r--r--queue-3.16/x86-speculation-l1tf-exempt-zeroed-ptes-from-inversion.patch69
-rw-r--r--queue-3.16/x86-speculation-l1tf-extend-64bit-swap-file-size-limit.patch42
-rw-r--r--queue-3.16/x86-speculation-l1tf-fix-off-by-one-error-when-warning-that-system.patch78
-rw-r--r--queue-3.16/x86-speculation-l1tf-fix-overflow-in-l1tf_pfn_limit-on-32bit.patch70
-rw-r--r--queue-3.16/x86-speculation-l1tf-fix-up-pte-pfn-conversion-for-pae.patch81
-rw-r--r--queue-3.16/x86-speculation-l1tf-increase-32bit-pae-__physical_page_shift.patch77
-rw-r--r--queue-3.16/x86-speculation-l1tf-invert-all-not-present-mappings.patch31
-rw-r--r--queue-3.16/x86-speculation-l1tf-limit-swap-file-size-to-max_pa-2.patch128
-rw-r--r--queue-3.16/x86-speculation-l1tf-make-pmd-pud_mknotpresent-invert.patch55
-rw-r--r--queue-3.16/x86-speculation-l1tf-make-sure-the-first-page-is-always-reserved.patch39
-rw-r--r--queue-3.16/x86-speculation-l1tf-protect-pae-swap-entries-against-l1tf.patch88
-rw-r--r--queue-3.16/x86-speculation-l1tf-protect-prot_none-ptes-against-speculation.patch254
-rw-r--r--queue-3.16/x86-speculation-l1tf-protect-swap-entries-against-l1tf.patch81
-rw-r--r--queue-3.16/x86-speculation-l1tf-suggest-what-to-do-on-systems-with-too-much-ram.patch41
-rw-r--r--queue-3.16/x86-speculation-l1tf-unbreak-__have_arch_pfn_modify_allowed.patch63
32 files changed, 3182 insertions, 0 deletions
diff --git a/queue-3.16/drm-drivers-add-support-for-using-the-arch-wc-mapping-api.patch b/queue-3.16/drm-drivers-add-support-for-using-the-arch-wc-mapping-api.patch
new file mode 100644
index 00000000..76c52315
--- /dev/null
+++ b/queue-3.16/drm-drivers-add-support-for-using-the-arch-wc-mapping-api.patch
@@ -0,0 +1,157 @@
+From: Dave Airlie <airlied@redhat.com>
+Date: Mon, 24 Oct 2016 15:37:48 +1000
+Subject: drm/drivers: add support for using the arch wc mapping API.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit 7cf321d118a825c1541b43ca45294126fd474efa upstream.
+
+This fixes a regression in all these drivers since the cache
+mode tracking was fixed for mixed mappings. It uses the new
+arch API to add the VRAM range to the PAT mapping tracking
+tables.
+
+Fixes: 87744ab3832 (mm: fix cache mode tracking in vm_insert_mixed())
+Reviewed-by: Christian König <christian.koenig@amd.com>.
+Signed-off-by: Dave Airlie <airlied@redhat.com>
+[bwh: Backported to 3.16:
+ - Drop changes in amdgpu
+ - In nouveau, use struct nouveau_device * and nv_device_resource_{start,len}()
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+--- a/drivers/gpu/drm/ast/ast_ttm.c
++++ b/drivers/gpu/drm/ast/ast_ttm.c
+@@ -275,6 +275,8 @@ int ast_mm_init(struct ast_private *ast)
+ return ret;
+ }
+
++ arch_io_reserve_memtype_wc(pci_resource_start(dev->pdev, 0),
++ pci_resource_len(dev->pdev, 0));
+ ast->fb_mtrr = arch_phys_wc_add(pci_resource_start(dev->pdev, 0),
+ pci_resource_len(dev->pdev, 0));
+
+@@ -283,11 +285,15 @@ int ast_mm_init(struct ast_private *ast)
+
+ void ast_mm_fini(struct ast_private *ast)
+ {
++ struct drm_device *dev = ast->dev;
++
+ ttm_bo_device_release(&ast->ttm.bdev);
+
+ ast_ttm_global_release(ast);
+
+ arch_phys_wc_del(ast->fb_mtrr);
++ arch_io_free_memtype_wc(pci_resource_start(dev->pdev, 0),
++ pci_resource_len(dev->pdev, 0));
+ }
+
+ void ast_ttm_placement(struct ast_bo *bo, int domain)
+--- a/drivers/gpu/drm/cirrus/cirrus_ttm.c
++++ b/drivers/gpu/drm/cirrus/cirrus_ttm.c
+@@ -275,6 +275,9 @@ int cirrus_mm_init(struct cirrus_device
+ return ret;
+ }
+
++ arch_io_reserve_memtype_wc(pci_resource_start(dev->pdev, 0),
++ pci_resource_len(dev->pdev, 0));
++
+ cirrus->fb_mtrr = arch_phys_wc_add(pci_resource_start(dev->pdev, 0),
+ pci_resource_len(dev->pdev, 0));
+
+@@ -284,6 +287,8 @@ int cirrus_mm_init(struct cirrus_device
+
+ void cirrus_mm_fini(struct cirrus_device *cirrus)
+ {
++ struct drm_device *dev = cirrus->dev;
++
+ if (!cirrus->mm_inited)
+ return;
+
+@@ -293,6 +298,8 @@ void cirrus_mm_fini(struct cirrus_device
+
+ arch_phys_wc_del(cirrus->fb_mtrr);
+ cirrus->fb_mtrr = 0;
++ arch_io_free_memtype_wc(pci_resource_start(dev->pdev, 0),
++ pci_resource_len(dev->pdev, 0));
+ }
+
+ void cirrus_ttm_placement(struct cirrus_bo *bo, int domain)
+--- a/drivers/gpu/drm/mgag200/mgag200_ttm.c
++++ b/drivers/gpu/drm/mgag200/mgag200_ttm.c
+@@ -274,6 +274,9 @@ int mgag200_mm_init(struct mga_device *m
+ return ret;
+ }
+
++ arch_io_reserve_memtype_wc(pci_resource_start(dev->pdev, 0),
++ pci_resource_len(dev->pdev, 0));
++
+ mdev->fb_mtrr = arch_phys_wc_add(pci_resource_start(dev->pdev, 0),
+ pci_resource_len(dev->pdev, 0));
+
+@@ -282,10 +285,14 @@ int mgag200_mm_init(struct mga_device *m
+
+ void mgag200_mm_fini(struct mga_device *mdev)
+ {
++ struct drm_device *dev = mdev->dev;
++
+ ttm_bo_device_release(&mdev->ttm.bdev);
+
+ mgag200_ttm_global_release(mdev);
+
++ arch_io_free_memtype_wc(pci_resource_start(dev->pdev, 0),
++ pci_resource_len(dev->pdev, 0));
+ arch_phys_wc_del(mdev->fb_mtrr);
+ mdev->fb_mtrr = 0;
+ }
+--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
++++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
+@@ -397,6 +397,9 @@ nouveau_ttm_init(struct nouveau_drm *drm
+ drm->gem.vram_available = nouveau_fb(drm->device)->ram->size;
+ drm->gem.vram_available -= nouveau_instmem(drm->device)->reserved;
+
++ arch_io_reserve_memtype_wc(nv_device_resource_start(device, 1),
++ nv_device_resource_len(device, 1));
++
+ ret = ttm_bo_init_mm(&drm->ttm.bdev, TTM_PL_VRAM,
+ drm->gem.vram_available >> PAGE_SHIFT);
+ if (ret) {
+@@ -429,6 +432,8 @@ nouveau_ttm_init(struct nouveau_drm *drm
+ void
+ nouveau_ttm_fini(struct nouveau_drm *drm)
+ {
++ struct nouveau_device *device = nv_device(drm->device);
++
+ mutex_lock(&drm->dev->struct_mutex);
+ ttm_bo_clean_mm(&drm->ttm.bdev, TTM_PL_VRAM);
+ ttm_bo_clean_mm(&drm->ttm.bdev, TTM_PL_TT);
+@@ -440,4 +445,7 @@ nouveau_ttm_fini(struct nouveau_drm *drm
+
+ arch_phys_wc_del(drm->ttm.mtrr);
+ drm->ttm.mtrr = 0;
++ arch_io_free_memtype_wc(nv_device_resource_start(device, 1),
++ nv_device_resource_len(device, 1));
++
+ }
+--- a/drivers/gpu/drm/radeon/radeon_object.c
++++ b/drivers/gpu/drm/radeon/radeon_object.c
+@@ -359,6 +359,10 @@ void radeon_bo_force_delete(struct radeo
+
+ int radeon_bo_init(struct radeon_device *rdev)
+ {
++ /* reserve PAT memory space to WC for VRAM */
++ arch_io_reserve_memtype_wc(rdev->mc.aper_base,
++ rdev->mc.aper_size);
++
+ /* Add an MTRR for the VRAM */
+ if (!rdev->fastfb_working) {
+ rdev->mc.vram_mtrr = arch_phys_wc_add(rdev->mc.aper_base,
+@@ -376,6 +380,7 @@ void radeon_bo_fini(struct radeon_device
+ {
+ radeon_ttm_fini(rdev);
+ arch_phys_wc_del(rdev->mc.vram_mtrr);
++ arch_io_free_memtype_wc(rdev->mc.aper_base, rdev->mc.aper_size);
+ }
+
+ /* Returns how many bytes TTM can move per IB.
diff --git a/queue-3.16/mm-add-vm_insert_pfn_prot.patch b/queue-3.16/mm-add-vm_insert_pfn_prot.patch
new file mode 100644
index 00000000..603d67fe
--- /dev/null
+++ b/queue-3.16/mm-add-vm_insert_pfn_prot.patch
@@ -0,0 +1,97 @@
+From: Andy Lutomirski <luto@kernel.org>
+Date: Tue, 29 Dec 2015 20:12:20 -0800
+Subject: mm: Add vm_insert_pfn_prot()
+
+commit 1745cbc5d0dee0749a6bc0ea8e872c5db0074061 upstream.
+
+The x86 vvar vma contains pages with differing cacheability
+flags. x86 currently implements this by manually inserting all
+the ptes using (io_)remap_pfn_range when the vma is set up.
+
+x86 wants to move to using .fault with VM_FAULT_NOPAGE to set up
+the mappings as needed. The correct API to use to insert a pfn
+in .fault is vm_insert_pfn(), but vm_insert_pfn() can't override the
+vma's cache mode, and the HPET page in particular needs to be
+uncached despite the fact that the rest of the VMA is cached.
+
+Add vm_insert_pfn_prot() to support varying cacheability within
+the same non-COW VMA in a more sane manner.
+
+x86 could alternatively use multiple VMAs, but that's messy,
+would break CRIU, and would create unnecessary VMAs that would
+waste memory.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Acked-by: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Fenghua Yu <fenghua.yu@intel.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/d2938d1eb37be7a5e4f86182db646551f11e45aa.1451446564.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ include/linux/mm.h | 2 ++
+ mm/memory.c | 25 +++++++++++++++++++++++--
+ 2 files changed, 25 insertions(+), 2 deletions(-)
+
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1965,6 +1965,8 @@ int remap_pfn_range(struct vm_area_struc
+ int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
+ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
+ unsigned long pfn);
++int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
++ unsigned long pfn, pgprot_t pgprot);
+ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
+ unsigned long pfn);
+ int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len);
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -1574,8 +1574,29 @@ out:
+ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
+ unsigned long pfn)
+ {
++ return vm_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot);
++}
++EXPORT_SYMBOL(vm_insert_pfn);
++
++/**
++ * vm_insert_pfn_prot - insert single pfn into user vma with specified pgprot
++ * @vma: user vma to map to
++ * @addr: target user address of this page
++ * @pfn: source kernel pfn
++ * @pgprot: pgprot flags for the inserted page
++ *
++ * This is exactly like vm_insert_pfn, except that it allows drivers to
++ * to override pgprot on a per-page basis.
++ *
++ * This only makes sense for IO mappings, and it makes no sense for
++ * cow mappings. In general, using multiple vmas is preferable;
++ * vm_insert_pfn_prot should only be used if using multiple VMAs is
++ * impractical.
++ */
++int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
++ unsigned long pfn, pgprot_t pgprot)
++{
+ int ret;
+- pgprot_t pgprot = vma->vm_page_prot;
+ /*
+ * Technically, architectures with pte_special can avoid all these
+ * restrictions (same for remap_pfn_range). However we would like
+@@ -1597,7 +1618,7 @@ int vm_insert_pfn(struct vm_area_struct
+
+ return ret;
+ }
+-EXPORT_SYMBOL(vm_insert_pfn);
++EXPORT_SYMBOL(vm_insert_pfn_prot);
+
+ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
+ unsigned long pfn)
diff --git a/queue-3.16/mm-fix-cache-mode-tracking-in-vm_insert_mixed.patch b/queue-3.16/mm-fix-cache-mode-tracking-in-vm_insert_mixed.patch
new file mode 100644
index 00000000..f74904b8
--- /dev/null
+++ b/queue-3.16/mm-fix-cache-mode-tracking-in-vm_insert_mixed.patch
@@ -0,0 +1,59 @@
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Fri, 7 Oct 2016 17:00:18 -0700
+Subject: mm: fix cache mode tracking in vm_insert_mixed()
+
+commit 9ac0dc7d949db7afd4116d55fa4fcf6a66d820f0 upstream.
+
+commit 87744ab3832b83ba71b931f86f9cfdb000d07da5 upstream
+
+vm_insert_mixed() unlike vm_insert_pfn_prot() and vmf_insert_pfn_pmd(),
+fails to check the pgprot_t it uses for the mapping against the one
+recorded in the memtype tracking tree. Add the missing call to
+track_pfn_insert() to preclude cases where incompatible aliased mappings
+are established for a given physical address range.
+
+[groeck: Backport to v4.4.y]
+
+Link: http://lkml.kernel.org/r/147328717909.35069.14256589123570653697.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Cc: David Airlie <airlied@linux.ie>
+Cc: Matthew Wilcox <mawilcox@microsoft.com>
+Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ mm/memory.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -1623,10 +1623,14 @@ EXPORT_SYMBOL(vm_insert_pfn_prot);
+ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
+ unsigned long pfn)
+ {
++ pgprot_t pgprot = vma->vm_page_prot;
++
+ BUG_ON(!(vma->vm_flags & VM_MIXEDMAP));
+
+ if (addr < vma->vm_start || addr >= vma->vm_end)
+ return -EFAULT;
++ if (track_pfn_insert(vma, &pgprot, pfn))
++ return -EINVAL;
+
+ /*
+ * If we don't have pte special, then we have to use the pfn_valid()
+@@ -1639,9 +1643,9 @@ int vm_insert_mixed(struct vm_area_struc
+ struct page *page;
+
+ page = pfn_to_page(pfn);
+- return insert_page(vma, addr, page, vma->vm_page_prot);
++ return insert_page(vma, addr, page, pgprot);
+ }
+- return insert_pfn(vma, addr, pfn, vma->vm_page_prot);
++ return insert_pfn(vma, addr, pfn, pgprot);
+ }
+ EXPORT_SYMBOL(vm_insert_mixed);
+
diff --git a/queue-3.16/mm-pagewalk-remove-pgd_entry-and-pud_entry.patch b/queue-3.16/mm-pagewalk-remove-pgd_entry-and-pud_entry.patch
new file mode 100644
index 00000000..9e9400cf
--- /dev/null
+++ b/queue-3.16/mm-pagewalk-remove-pgd_entry-and-pud_entry.patch
@@ -0,0 +1,74 @@
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Date: Wed, 11 Feb 2015 15:27:34 -0800
+Subject: mm/pagewalk: remove pgd_entry() and pud_entry()
+
+commit 0b1fbfe50006c41014cc25660c0e735d21c34939 upstream.
+
+Currently no user of page table walker sets ->pgd_entry() or
+->pud_entry(), so checking their existence in each loop is just wasting
+CPU cycle. So let's remove it to reduce overhead.
+
+Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Cyrill Gorcunov <gorcunov@openvz.org>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Pavel Emelyanov <xemul@parallels.com>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+[bwh: Backported to 3.16 as dependency of L1TF mitigation]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ include/linux/mm.h | 6 ------
+ mm/pagewalk.c | 9 ++-------
+ 2 files changed, 2 insertions(+), 13 deletions(-)
+
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1114,8 +1114,6 @@ void unmap_vmas(struct mmu_gather *tlb,
+
+ /**
+ * mm_walk - callbacks for walk_page_range
+- * @pgd_entry: if set, called for each non-empty PGD (top-level) entry
+- * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
+ * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
+ * this handler is required to be able to handle
+ * pmd_trans_huge() pmds. They may simply choose to
+@@ -1129,10 +1127,6 @@ void unmap_vmas(struct mmu_gather *tlb,
+ * (see walk_page_range for more details)
+ */
+ struct mm_walk {
+- int (*pgd_entry)(pgd_t *pgd, unsigned long addr,
+- unsigned long next, struct mm_walk *walk);
+- int (*pud_entry)(pud_t *pud, unsigned long addr,
+- unsigned long next, struct mm_walk *walk);
+ int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk);
+ int (*pte_entry)(pte_t *pte, unsigned long addr,
+--- a/mm/pagewalk.c
++++ b/mm/pagewalk.c
+@@ -86,9 +86,7 @@ static int walk_pud_range(pgd_t *pgd, un
+ break;
+ continue;
+ }
+- if (walk->pud_entry)
+- err = walk->pud_entry(pud, addr, next, walk);
+- if (!err && (walk->pmd_entry || walk->pte_entry))
++ if (walk->pmd_entry || walk->pte_entry)
+ err = walk_pmd_range(pud, addr, next, walk);
+ if (err)
+ break;
+@@ -237,10 +235,7 @@ int walk_page_range(unsigned long addr,
+ pgd++;
+ continue;
+ }
+- if (walk->pgd_entry)
+- err = walk->pgd_entry(pgd, addr, next, walk);
+- if (!err &&
+- (walk->pud_entry || walk->pmd_entry || walk->pte_entry))
++ if (walk->pmd_entry || walk->pte_entry)
+ err = walk_pud_range(pgd, addr, next, walk);
+ if (err)
+ break;
diff --git a/queue-3.16/mm-x86-move-_page_swp_soft_dirty-from-bit-7-to-bit-1.patch b/queue-3.16/mm-x86-move-_page_swp_soft_dirty-from-bit-7-to-bit-1.patch
new file mode 100644
index 00000000..704eb1c8
--- /dev/null
+++ b/queue-3.16/mm-x86-move-_page_swp_soft_dirty-from-bit-7-to-bit-1.patch
@@ -0,0 +1,97 @@
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Date: Fri, 8 Sep 2017 16:10:46 -0700
+Subject: mm: x86: move _PAGE_SWP_SOFT_DIRTY from bit 7 to bit 1
+
+commit eee4818baac0f2b37848fdf90e4b16430dc536ac upstream.
+
+_PAGE_PSE is used to distinguish between a truly non-present
+(_PAGE_PRESENT=0) PMD, and a PMD which is undergoing a THP split and
+should be treated as present.
+
+But _PAGE_SWP_SOFT_DIRTY currently uses the _PAGE_PSE bit, which would
+cause confusion between one of those PMDs undergoing a THP split, and a
+soft-dirty PMD. Dropping _PAGE_PSE check in pmd_present() does not work
+well, because it can hurt optimization of tlb handling in thp split.
+
+Thus, we need to move the bit.
+
+In the current kernel, bits 1-4 are not used in non-present format since
+commit 00839ee3b299 ("x86/mm: Move swap offset/type up in PTE to work
+around erratum"). So let's move _PAGE_SWP_SOFT_DIRTY to bit 1. Bit 7
+is used as reserved (always clear), so please don't use it for other
+purpose.
+
+Link: http://lkml.kernel.org/r/20170717193955.20207-3-zi.yan@sent.com
+Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Signed-off-by: Zi Yan <zi.yan@cs.rutgers.edu>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
+Cc: David Nellans <dnellans@nvidia.com>
+Cc: Ingo Molnar <mingo@elte.hu>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Michal Hocko <mhocko@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+[bwh: Backported to 3.16: Bit 9 may be reserved for PAGE_BIT_NUMA here]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/pgtable_64.h | 12 +++++++++---
+ arch/x86/include/asm/pgtable_types.h | 10 +++++-----
+ 2 files changed, 14 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -165,15 +165,21 @@ static inline int pgd_large(pgd_t pgd) {
+ /*
+ * Encode and de-code a swap entry
+ *
+- * | ... | 11| 10| 9|8|7|6|5| 4| 3|2|1|0| <- bit number
+- * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names
+- * | OFFSET (15->63) | TYPE (10-14) | 0 |0|X|X|X| X| X|X|X|0| <- swp entry
++ * | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number
++ * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names
++ * | OFFSET (15->63) | TYPE (10-14) | 0 |0|0|X|X| X| X|X|SD|0| <- swp entry
+ *
+ * G (8) is aliased and used as a PROT_NONE indicator for
+ * !present ptes. We need to start storing swap entries above
+ * there. We also need to avoid using A and D because of an
+ * erratum where they can be incorrectly set by hardware on
+ * non-present PTEs.
++ *
++ * SD (1) in swp entry is used to store soft dirty bit, which helps us
++ * remember soft dirty over page migration
++ *
++ * Bit 7 in swp entry should be 0 because pmd_present checks not only P,
++ * but also L and G.
+ */
+ #ifdef CONFIG_NUMA_BALANCING
+ /* Automatic NUMA balancing needs to be distinguishable from swap entries */
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -94,15 +94,15 @@
+ /*
+ * Tracking soft dirty bit when a page goes to a swap is tricky.
+ * We need a bit which can be stored in pte _and_ not conflict
+- * with swap entry format. On x86 bits 6 and 7 are *not* involved
+- * into swap entry computation, but bit 6 is used for nonlinear
+- * file mapping, so we borrow bit 7 for soft dirty tracking.
++ * with swap entry format. On x86 bits 1-4 are *not* involved
++ * into swap entry computation, but bit 7 is used for thp migration,
++ * so we borrow bit 1 for soft dirty tracking.
+ *
+ * Please note that this bit must be treated as swap dirty page
+- * mark if and only if the PTE has present bit clear!
++ * mark if and only if the PTE/PMD has present bit clear!
+ */
+ #ifdef CONFIG_MEM_SOFT_DIRTY
+-#define _PAGE_SWP_SOFT_DIRTY _PAGE_PSE
++#define _PAGE_SWP_SOFT_DIRTY _PAGE_RW
+ #else
+ #define _PAGE_SWP_SOFT_DIRTY (_AT(pteval_t, 0))
+ #endif
diff --git a/queue-3.16/pagewalk-improve-vma-handling.patch b/queue-3.16/pagewalk-improve-vma-handling.patch
new file mode 100644
index 00000000..79f7d2f9
--- /dev/null
+++ b/queue-3.16/pagewalk-improve-vma-handling.patch
@@ -0,0 +1,341 @@
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Date: Wed, 11 Feb 2015 15:27:37 -0800
+Subject: pagewalk: improve vma handling
+
+commit fafaa4264eba49fd10695c193a82760558d093f4 upstream.
+
+Current implementation of page table walker has a fundamental problem in
+vma handling, which started when we tried to handle vma(VM_HUGETLB).
+Because it's done in pgd loop, considering vma boundary makes code
+complicated and bug-prone.
+
+From the users viewpoint, some user checks some vma-related condition to
+determine whether the user really does page walk over the vma.
+
+In order to solve these, this patch moves vma check outside pgd loop and
+introduce a new callback ->test_walk().
+
+Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Cyrill Gorcunov <gorcunov@openvz.org>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Pavel Emelyanov <xemul@parallels.com>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+[bwh: Backported to 3.16 as dependency of L1TF mitigation]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ include/linux/mm.h | 15 +++-
+ mm/pagewalk.c | 206 +++++++++++++++++++++++++--------------------
+ 2 files changed, 129 insertions(+), 92 deletions(-)
+
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1121,10 +1121,16 @@ void unmap_vmas(struct mmu_gather *tlb,
+ * @pte_entry: if set, called for each non-empty PTE (4th-level) entry
+ * @pte_hole: if set, called for each hole at all levels
+ * @hugetlb_entry: if set, called for each hugetlb entry
+- * *Caution*: The caller must hold mmap_sem() if @hugetlb_entry
+- * is used.
++ * @test_walk: caller specific callback function to determine whether
++ * we walk over the current vma or not. A positive returned
++ * value means "do page table walk over the current vma,"
++ * and a negative one means "abort current page table walk
++ * right now." 0 means "skip the current vma."
++ * @mm: mm_struct representing the target process of page table walk
++ * @vma: vma currently walked (NULL if walking outside vmas)
++ * @private: private data for callbacks' usage
+ *
+- * (see walk_page_range for more details)
++ * (see the comment on walk_page_range() for more details)
+ */
+ struct mm_walk {
+ int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
+@@ -1136,7 +1142,10 @@ struct mm_walk {
+ int (*hugetlb_entry)(pte_t *pte, unsigned long hmask,
+ unsigned long addr, unsigned long next,
+ struct mm_walk *walk);
++ int (*test_walk)(unsigned long addr, unsigned long next,
++ struct mm_walk *walk);
+ struct mm_struct *mm;
++ struct vm_area_struct *vma;
+ void *private;
+ };
+
+--- a/mm/pagewalk.c
++++ b/mm/pagewalk.c
+@@ -59,7 +59,7 @@ again:
+ continue;
+
+ split_huge_page_pmd_mm(walk->mm, addr, pmd);
+- if (pmd_none_or_trans_huge_or_clear_bad(pmd))
++ if (pmd_trans_unstable(pmd))
+ goto again;
+ err = walk_pte_range(pmd, addr, next, walk);
+ if (err)
+@@ -95,6 +95,32 @@ static int walk_pud_range(pgd_t *pgd, un
+ return err;
+ }
+
++static int walk_pgd_range(unsigned long addr, unsigned long end,
++ struct mm_walk *walk)
++{
++ pgd_t *pgd;
++ unsigned long next;
++ int err = 0;
++
++ pgd = pgd_offset(walk->mm, addr);
++ do {
++ next = pgd_addr_end(addr, end);
++ if (pgd_none_or_clear_bad(pgd)) {
++ if (walk->pte_hole)
++ err = walk->pte_hole(addr, next, walk);
++ if (err)
++ break;
++ continue;
++ }
++ if (walk->pmd_entry || walk->pte_entry)
++ err = walk_pud_range(pgd, addr, next, walk);
++ if (err)
++ break;
++ } while (pgd++, addr = next, addr != end);
++
++ return err;
++}
++
+ #ifdef CONFIG_HUGETLB_PAGE
+ static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr,
+ unsigned long end)
+@@ -103,10 +129,10 @@ static unsigned long hugetlb_entry_end(s
+ return boundary < end ? boundary : end;
+ }
+
+-static int walk_hugetlb_range(struct vm_area_struct *vma,
+- unsigned long addr, unsigned long end,
++static int walk_hugetlb_range(unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+ {
++ struct vm_area_struct *vma = walk->vma;
+ struct hstate *h = hstate_vma(vma);
+ unsigned long next;
+ unsigned long hmask = huge_page_mask(h);
+@@ -119,15 +145,14 @@ static int walk_hugetlb_range(struct vm_
+ if (pte && walk->hugetlb_entry)
+ err = walk->hugetlb_entry(pte, hmask, addr, next, walk);
+ if (err)
+- return err;
++ break;
+ } while (addr = next, addr != end);
+
+- return 0;
++ return err;
+ }
+
+ #else /* CONFIG_HUGETLB_PAGE */
+-static int walk_hugetlb_range(struct vm_area_struct *vma,
+- unsigned long addr, unsigned long end,
++static int walk_hugetlb_range(unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+ {
+ return 0;
+@@ -135,112 +160,115 @@ static int walk_hugetlb_range(struct vm_
+
+ #endif /* CONFIG_HUGETLB_PAGE */
+
++/*
++ * Decide whether we really walk over the current vma on [@start, @end)
++ * or skip it via the returned value. Return 0 if we do walk over the
++ * current vma, and return 1 if we skip the vma. Negative values means
++ * error, where we abort the current walk.
++ *
++ * Default check (only VM_PFNMAP check for now) is used when the caller
++ * doesn't define test_walk() callback.
++ */
++static int walk_page_test(unsigned long start, unsigned long end,
++ struct mm_walk *walk)
++{
++ struct vm_area_struct *vma = walk->vma;
+
++ if (walk->test_walk)
++ return walk->test_walk(start, end, walk);
++
++ /*
++ * Do not walk over vma(VM_PFNMAP), because we have no valid struct
++ * page backing a VM_PFNMAP range. See also commit a9ff785e4437.
++ */
++ if (vma->vm_flags & VM_PFNMAP)
++ return 1;
++ return 0;
++}
++
++static int __walk_page_range(unsigned long start, unsigned long end,
++ struct mm_walk *walk)
++{
++ int err = 0;
++ struct vm_area_struct *vma = walk->vma;
++
++ if (vma && is_vm_hugetlb_page(vma)) {
++ if (walk->hugetlb_entry)
++ err = walk_hugetlb_range(start, end, walk);
++ } else
++ err = walk_pgd_range(start, end, walk);
++
++ return err;
++}
+
+ /**
+- * walk_page_range - walk a memory map's page tables with a callback
+- * @addr: starting address
+- * @end: ending address
+- * @walk: set of callbacks to invoke for each level of the tree
++ * walk_page_range - walk page table with caller specific callbacks
+ *
+- * Recursively walk the page table for the memory area in a VMA,
+- * calling supplied callbacks. Callbacks are called in-order (first
+- * PGD, first PUD, first PMD, first PTE, second PTE... second PMD,
+- * etc.). If lower-level callbacks are omitted, walking depth is reduced.
++ * Recursively walk the page table tree of the process represented by @walk->mm
++ * within the virtual address range [@start, @end). During walking, we can do
++ * some caller-specific works for each entry, by setting up pmd_entry(),
++ * pte_entry(), and/or hugetlb_entry(). If you don't set up for some of these
++ * callbacks, the associated entries/pages are just ignored.
++ * The return values of these callbacks are commonly defined like below:
++ * - 0 : succeeded to handle the current entry, and if you don't reach the
++ * end address yet, continue to walk.
++ * - >0 : succeeded to handle the current entry, and return to the caller
++ * with caller specific value.
++ * - <0 : failed to handle the current entry, and return to the caller
++ * with error code.
+ *
+- * Each callback receives an entry pointer and the start and end of the
+- * associated range, and a copy of the original mm_walk for access to
+- * the ->private or ->mm fields.
++ * Before starting to walk page table, some callers want to check whether
++ * they really want to walk over the current vma, typically by checking
++ * its vm_flags. walk_page_test() and @walk->test_walk() are used for this
++ * purpose.
+ *
+- * Usually no locks are taken, but splitting transparent huge page may
+- * take page table lock. And the bottom level iterator will map PTE
+- * directories from highmem if necessary.
++ * struct mm_walk keeps current values of some common data like vma and pmd,
++ * which are useful for the access from callbacks. If you want to pass some
++ * caller-specific data to callbacks, @walk->private should be helpful.
+ *
+- * If any callback returns a non-zero value, the walk is aborted and
+- * the return value is propagated back to the caller. Otherwise 0 is returned.
+- *
+- * walk->mm->mmap_sem must be held for at least read if walk->hugetlb_entry
+- * is !NULL.
++ * Locking:
++ * Callers of walk_page_range() and walk_page_vma() should hold
++ * @walk->mm->mmap_sem, because these function traverse vma list and/or
++ * access to vma's data.
+ */
+-int walk_page_range(unsigned long addr, unsigned long end,
++int walk_page_range(unsigned long start, unsigned long end,
+ struct mm_walk *walk)
+ {
+- pgd_t *pgd;
+- unsigned long next;
+ int err = 0;
++ unsigned long next;
++ struct vm_area_struct *vma;
+
+- if (addr >= end)
+- return err;
++ if (start >= end)
++ return -EINVAL;
+
+ if (!walk->mm)
+ return -EINVAL;
+
+ VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem));
+
+- pgd = pgd_offset(walk->mm, addr);
++ vma = find_vma(walk->mm, start);
+ do {
+- struct vm_area_struct *vma = NULL;
+-
+- next = pgd_addr_end(addr, end);
++ if (!vma) { /* after the last vma */
++ walk->vma = NULL;
++ next = end;
++ } else if (start < vma->vm_start) { /* outside vma */
++ walk->vma = NULL;
++ next = min(end, vma->vm_start);
++ } else { /* inside vma */
++ walk->vma = vma;
++ next = min(end, vma->vm_end);
++ vma = vma->vm_next;
+
+- /*
+- * This function was not intended to be vma based.
+- * But there are vma special cases to be handled:
+- * - hugetlb vma's
+- * - VM_PFNMAP vma's
+- */
+- vma = find_vma(walk->mm, addr);
+- if (vma) {
+- /*
+- * There are no page structures backing a VM_PFNMAP
+- * range, so do not allow split_huge_page_pmd().
+- */
+- if ((vma->vm_start <= addr) &&
+- (vma->vm_flags & VM_PFNMAP)) {
+- if (walk->pte_hole)
+- err = walk->pte_hole(addr, next, walk);
+- if (err)
+- break;
+- pgd = pgd_offset(walk->mm, next);
+- continue;
+- }
+- /*
+- * Handle hugetlb vma individually because pagetable
+- * walk for the hugetlb page is dependent on the
+- * architecture and we can't handled it in the same
+- * manner as non-huge pages.
+- */
+- if (walk->hugetlb_entry && (vma->vm_start <= addr) &&
+- is_vm_hugetlb_page(vma)) {
+- if (vma->vm_end < next)
+- next = vma->vm_end;
+- /*
+- * Hugepage is very tightly coupled with vma,
+- * so walk through hugetlb entries within a
+- * given vma.
+- */
+- err = walk_hugetlb_range(vma, addr, next, walk);
+- if (err)
+- break;
+- pgd = pgd_offset(walk->mm, next);
++ err = walk_page_test(start, next, walk);
++ if (err > 0)
+ continue;
+- }
+- }
+-
+- if (pgd_none_or_clear_bad(pgd)) {
+- if (walk->pte_hole)
+- err = walk->pte_hole(addr, next, walk);
+- if (err)
++ if (err < 0)
+ break;
+- pgd++;
+- continue;
+ }
+- if (walk->pmd_entry || walk->pte_entry)
+- err = walk_pud_range(pgd, addr, next, walk);
++ if (walk->vma || walk->pte_hole)
++ err = __walk_page_range(start, next, walk);
+ if (err)
+ break;
+- pgd++;
+- } while (addr = next, addr < end);
+-
++ } while (start = next, start < end);
+ return err;
+ }
diff --git a/queue-3.16/series b/queue-3.16/series
index 5de06fd9..2bad97d0 100644
--- a/queue-3.16/series
+++ b/queue-3.16/series
@@ -88,3 +88,34 @@ unicore32-drop-pte_file-related-helpers.patch
x86-drop-_page_file-and-pte_file-related-helpers.patch
xtensa-drop-_page_file-and-pte_file-related-helpers.patch
powerpc-drop-_page_file-and-pte_file-related-helpers.patch
+x86-speculation-l1tf-increase-32bit-pae-__physical_page_shift.patch
+x86-mm-move-swap-offset-type-up-in-pte-to-work-around-erratum.patch
+mm-x86-move-_page_swp_soft_dirty-from-bit-7-to-bit-1.patch
+x86-speculation-l1tf-change-order-of-offset-type-in-swap-entry.patch
+x86-speculation-l1tf-protect-swap-entries-against-l1tf.patch
+x86-mm-add-pud-functions.patch
+x86-speculation-l1tf-protect-prot_none-ptes-against-speculation.patch
+x86-speculation-l1tf-make-sure-the-first-page-is-always-reserved.patch
+x86-speculation-l1tf-add-sysfs-reporting-for-l1tf.patch
+mm-add-vm_insert_pfn_prot.patch
+mm-fix-cache-mode-tracking-in-vm_insert_mixed.patch
+x86-io-add-interface-to-reserve-io-memtype-for-a-resource-range.patch
+drm-drivers-add-support-for-using-the-arch-wc-mapping-api.patch
+mm-pagewalk-remove-pgd_entry-and-pud_entry.patch
+pagewalk-improve-vma-handling.patch
+x86-speculation-l1tf-disallow-non-privileged-high-mmio-prot_none.patch
+x86-speculation-l1tf-limit-swap-file-size-to-max_pa-2.patch
+x86-init-fix-build-with-config_swap-n.patch
+x86-bugs-move-the-l1tf-function-and-define-pr_fmt-properly.patch
+x86-speculation-l1tf-extend-64bit-swap-file-size-limit.patch
+x86-speculation-l1tf-protect-pae-swap-entries-against-l1tf.patch
+x86-speculation-l1tf-fix-overflow-in-l1tf_pfn_limit-on-32bit.patch
+x86-speculation-l1tf-fix-off-by-one-error-when-warning-that-system.patch
+x86-speculation-l1tf-fix-up-pte-pfn-conversion-for-pae.patch
+x86-speculation-l1tf-unbreak-__have_arch_pfn_modify_allowed.patch
+x86-speculation-l1tf-invert-all-not-present-mappings.patch
+x86-speculation-l1tf-exempt-zeroed-ptes-from-inversion.patch
+x86-speculation-l1tf-make-pmd-pud_mknotpresent-invert.patch
+x86-mm-pat-make-set_memory_np-l1tf-safe.patch
+x86-mm-kmmio-make-the-tracer-robust-against-l1tf.patch
+x86-speculation-l1tf-suggest-what-to-do-on-systems-with-too-much-ram.patch
diff --git a/queue-3.16/x86-bugs-move-the-l1tf-function-and-define-pr_fmt-properly.patch b/queue-3.16/x86-bugs-move-the-l1tf-function-and-define-pr_fmt-properly.patch
new file mode 100644
index 00000000..e8858c70
--- /dev/null
+++ b/queue-3.16/x86-bugs-move-the-l1tf-function-and-define-pr_fmt-properly.patch
@@ -0,0 +1,93 @@
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Date: Wed, 20 Jun 2018 16:42:57 -0400
+Subject: x86/bugs: Move the l1tf function and define pr_fmt properly
+
+commit 56563f53d3066afa9e63d6c997bf67e76a8b05c0 upstream.
+
+The pr_warn in l1tf_select_mitigation would have used the prior pr_fmt
+which was defined as "Spectre V2 : ".
+
+Move the function to be past SSBD and also define the pr_fmt.
+
+Fixes: 17dbca119312 ("x86/speculation/l1tf: Add sysfs reporting for l1tf")
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/kernel/cpu/bugs.c | 55 ++++++++++++++++++++------------------
+ 1 file changed, 29 insertions(+), 26 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -270,32 +270,6 @@ static void x86_amd_ssb_disable(void)
+ wrmsrl(MSR_AMD64_LS_CFG, msrval);
+ }
+
+-static void __init l1tf_select_mitigation(void)
+-{
+- u64 half_pa;
+-
+- if (!boot_cpu_has_bug(X86_BUG_L1TF))
+- return;
+-
+-#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
+- pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n");
+- return;
+-#endif
+-
+- /*
+- * This is extremely unlikely to happen because almost all
+- * systems have far more MAX_PA/2 than RAM can be fit into
+- * DIMM slots.
+- */
+- half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
+- if (e820_any_mapped(half_pa, ULLONG_MAX - half_pa, E820_RAM)) {
+- pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
+- return;
+- }
+-
+- setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV);
+-}
+-
+ #ifdef RETPOLINE
+ static bool spectre_v2_bad_module;
+
+@@ -721,6 +695,35 @@ void x86_spec_ctrl_setup_ap(void)
+ x86_amd_ssb_disable();
+ }
+
++#undef pr_fmt
++#define pr_fmt(fmt) "L1TF: " fmt
++static void __init l1tf_select_mitigation(void)
++{
++ u64 half_pa;
++
++ if (!boot_cpu_has_bug(X86_BUG_L1TF))
++ return;
++
++#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
++ pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n");
++ return;
++#endif
++
++ /*
++ * This is extremely unlikely to happen because almost all
++ * systems have far more MAX_PA/2 than RAM can be fit into
++ * DIMM slots.
++ */
++ half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
++ if (e820_any_mapped(half_pa, ULLONG_MAX - half_pa, E820_RAM)) {
++ pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
++ return;
++ }
++
++ setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV);
++}
++#undef pr_fmt
++
+ #ifdef CONFIG_SYSFS
+
+ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
diff --git a/queue-3.16/x86-init-fix-build-with-config_swap-n.patch b/queue-3.16/x86-init-fix-build-with-config_swap-n.patch
new file mode 100644
index 00000000..8e886556
--- /dev/null
+++ b/queue-3.16/x86-init-fix-build-with-config_swap-n.patch
@@ -0,0 +1,35 @@
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Tue, 14 Aug 2018 20:50:47 +0200
+Subject: x86/init: fix build with CONFIG_SWAP=n
+
+commit 792adb90fa724ce07c0171cbc96b9215af4b1045 upstream.
+
+The introduction of generic_max_swapfile_size and arch-specific versions has
+broken linking on x86 with CONFIG_SWAP=n due to undefined reference to
+'generic_max_swapfile_size'. Fix it by compiling the x86-specific
+max_swapfile_size() only with CONFIG_SWAP=y.
+
+Reported-by: Tomas Pruzina <pruzinat@gmail.com>
+Fixes: 377eeaa8e11f ("x86/speculation/l1tf: Limit swap file size to MAX_PA/2")
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/mm/init.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -701,6 +701,7 @@ void __init zone_sizes_init(void)
+ free_area_init_nodes(max_zone_pfns);
+ }
+
++#ifdef CONFIG_SWAP
+ unsigned long max_swapfile_size(void)
+ {
+ unsigned long pages;
+@@ -713,3 +714,4 @@ unsigned long max_swapfile_size(void)
+ }
+ return pages;
+ }
++#endif
diff --git a/queue-3.16/x86-io-add-interface-to-reserve-io-memtype-for-a-resource-range.patch b/queue-3.16/x86-io-add-interface-to-reserve-io-memtype-for-a-resource-range.patch
new file mode 100644
index 00000000..50109452
--- /dev/null
+++ b/queue-3.16/x86-io-add-interface-to-reserve-io-memtype-for-a-resource-range.patch
@@ -0,0 +1,118 @@
+From: Dave Airlie <airlied@redhat.com>
+Date: Mon, 24 Oct 2016 15:27:59 +1000
+Subject: x86/io: add interface to reserve io memtype for a resource range.
+ (v1.1)
+
+commit 8ef4227615e158faa4ee85a1d6466782f7e22f2f upstream.
+
+A recent change to the mm code in:
+87744ab3832b mm: fix cache mode tracking in vm_insert_mixed()
+
+started enforcing checking the memory type against the registered list for
+amixed pfn insertion mappings. It happens that the drm drivers for a number
+of gpus relied on this being broken. Currently the driver only inserted
+VRAM mappings into the tracking table when they came from the kernel,
+and userspace mappings never landed in the table. This led to a regression
+where all the mapping end up as UC instead of WC now.
+
+I've considered a number of solutions but since this needs to be fixed
+in fixes and not next, and some of the solutions were going to introduce
+overhead that hadn't been there before I didn't consider them viable at
+this stage. These mainly concerned hooking into the TTM io reserve APIs,
+but these API have a bunch of fast paths I didn't want to unwind to add
+this to.
+
+The solution I've decided on is to add a new API like the arch_phys_wc
+APIs (these would have worked but wc_del didn't take a range), and
+use them from the drivers to add a WC compatible mapping to the table
+for all VRAM on those GPUs. This means we can then create userspace
+mapping that won't get degraded to UC.
+
+v1.1: use CONFIG_X86_PAT + add some comments in io.h
+
+Cc: Toshi Kani <toshi.kani@hp.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: x86@kernel.org
+Cc: mcgrof@suse.com
+Cc: Dan Williams <dan.j.williams@intel.com>
+Acked-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Dave Airlie <airlied@redhat.com>
+[bwh: Backported to 3.16: Memory types have type unsigned long, and the
+ constant is named _PAGE_CACHE_WC instead of _PAGE_CACHE_MODE_WC.]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/io.h | 6 ++++++
+ arch/x86/mm/pat.c | 14 ++++++++++++++
+ include/linux/io.h | 22 ++++++++++++++++++++++
+ 3 files changed, 42 insertions(+)
+
+--- a/arch/x86/include/asm/io.h
++++ b/arch/x86/include/asm/io.h
+@@ -340,4 +340,10 @@ extern void arch_phys_wc_del(int handle)
+ #define arch_phys_wc_add arch_phys_wc_add
+ #endif
+
++#ifdef CONFIG_X86_PAT
++extern int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size);
++extern void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size);
++#define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc
++#endif
++
+ #endif /* _ASM_X86_IO_H */
+--- a/arch/x86/mm/pat.c
++++ b/arch/x86/mm/pat.c
+@@ -481,6 +481,20 @@ void io_free_memtype(resource_size_t sta
+ free_memtype(start, end);
+ }
+
++int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size)
++{
++ unsigned long type = _PAGE_CACHE_WC;
++
++ return io_reserve_memtype(start, start + size, &type);
++}
++EXPORT_SYMBOL(arch_io_reserve_memtype_wc);
++
++void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size)
++{
++ io_free_memtype(start, start + size);
++}
++EXPORT_SYMBOL(arch_io_free_memtype_wc);
++
+ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+ unsigned long size, pgprot_t vma_prot)
+ {
+--- a/include/linux/io.h
++++ b/include/linux/io.h
+@@ -101,4 +101,26 @@ static inline void arch_phys_wc_del(int
+ #define arch_phys_wc_add arch_phys_wc_add
+ #endif
+
++/*
++ * On x86 PAT systems we have memory tracking that keeps track of
++ * the allowed mappings on memory ranges. This tracking works for
++ * all the in-kernel mapping APIs (ioremap*), but where the user
++ * wishes to map a range from a physical device into user memory
++ * the tracking won't be updated. This API is to be used by
++ * drivers which remap physical device pages into userspace,
++ * and wants to make sure they are mapped WC and not UC.
++ */
++#ifndef arch_io_reserve_memtype_wc
++static inline int arch_io_reserve_memtype_wc(resource_size_t base,
++ resource_size_t size)
++{
++ return 0;
++}
++
++static inline void arch_io_free_memtype_wc(resource_size_t base,
++ resource_size_t size)
++{
++}
++#endif
++
+ #endif /* _LINUX_IO_H */
diff --git a/queue-3.16/x86-mm-add-pud-functions.patch b/queue-3.16/x86-mm-add-pud-functions.patch
new file mode 100644
index 00000000..c985d260
--- /dev/null
+++ b/queue-3.16/x86-mm-add-pud-functions.patch
@@ -0,0 +1,51 @@
+From: Ben Hutchings <ben@decadent.org.uk>
+Date: Fri, 28 Sep 2018 01:15:29 +0100
+Subject: x86: mm: Add PUD functions
+
+These are extracted from commit a00cc7d9dd93 "mm, x86: add support for
+PUD-sized transparent hugepages" and will be used by later patches.
+
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -303,6 +303,25 @@ static inline pmd_t pmd_mknotpresent(pmd
+ return pmd_clear_flags(pmd, _PAGE_PRESENT);
+ }
+
++static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
++{
++ pudval_t v = native_pud_val(pud);
++
++ return __pud(v | set);
++}
++
++static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
++{
++ pudval_t v = native_pud_val(pud);
++
++ return __pud(v & ~clear);
++}
++
++static inline pud_t pud_mkhuge(pud_t pud)
++{
++ return pud_set_flags(pud, _PAGE_PSE);
++}
++
+ #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+ static inline int pte_soft_dirty(pte_t pte)
+ {
+@@ -352,6 +371,12 @@ static inline pmd_t pfn_pmd(unsigned lon
+ massage_pgprot(pgprot));
+ }
+
++static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot)
++{
++ return __pud(((phys_addr_t)page_nr << PAGE_SHIFT) |
++ massage_pgprot(pgprot));
++}
++
+ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+ {
+ pteval_t val = pte_val(pte);
diff --git a/queue-3.16/x86-mm-kmmio-make-the-tracer-robust-against-l1tf.patch b/queue-3.16/x86-mm-kmmio-make-the-tracer-robust-against-l1tf.patch
new file mode 100644
index 00000000..923a8667
--- /dev/null
+++ b/queue-3.16/x86-mm-kmmio-make-the-tracer-robust-against-l1tf.patch
@@ -0,0 +1,66 @@
+From: Andi Kleen <ak@linux.intel.com>
+Date: Tue, 7 Aug 2018 15:09:38 -0700
+Subject: x86/mm/kmmio: Make the tracer robust against L1TF
+
+commit 1063711b57393c1999248cccb57bebfaf16739e7 upstream.
+
+The mmio tracer sets io mapping PTEs and PMDs to non present when enabled
+without inverting the address bits, which makes the PTE entry vulnerable
+for L1TF.
+
+Make it use the right low level macros to actually invert the address bits
+to protect against L1TF.
+
+In principle this could be avoided because MMIO tracing is not likely to be
+enabled on production machines, but the fix is straigt forward and for
+consistency sake it's better to get rid of the open coded PTE manipulation.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/mm/kmmio.c | 25 +++++++++++++++----------
+ 1 file changed, 15 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/mm/kmmio.c
++++ b/arch/x86/mm/kmmio.c
+@@ -114,24 +114,29 @@ static struct kmmio_fault_page *get_kmmi
+
+ static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old)
+ {
++ pmd_t new_pmd;
+ pmdval_t v = pmd_val(*pmd);
+ if (clear) {
+- *old = v & _PAGE_PRESENT;
+- v &= ~_PAGE_PRESENT;
+- } else /* presume this has been called with clear==true previously */
+- v |= *old;
+- set_pmd(pmd, __pmd(v));
++ *old = v;
++ new_pmd = pmd_mknotpresent(*pmd);
++ } else {
++ /* Presume this has been called with clear==true previously */
++ new_pmd = __pmd(*old);
++ }
++ set_pmd(pmd, new_pmd);
+ }
+
+ static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old)
+ {
+ pteval_t v = pte_val(*pte);
+ if (clear) {
+- *old = v & _PAGE_PRESENT;
+- v &= ~_PAGE_PRESENT;
+- } else /* presume this has been called with clear==true previously */
+- v |= *old;
+- set_pte_atomic(pte, __pte(v));
++ *old = v;
++ /* Nothing should care about address */
++ pte_clear(&init_mm, 0, pte);
++ } else {
++ /* Presume this has been called with clear==true previously */
++ set_pte_atomic(pte, __pte(*old));
++ }
+ }
+
+ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
diff --git a/queue-3.16/x86-mm-move-swap-offset-type-up-in-pte-to-work-around-erratum.patch b/queue-3.16/x86-mm-move-swap-offset-type-up-in-pte-to-work-around-erratum.patch
new file mode 100644
index 00000000..32cdfe4f
--- /dev/null
+++ b/queue-3.16/x86-mm-move-swap-offset-type-up-in-pte-to-work-around-erratum.patch
@@ -0,0 +1,104 @@
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Thu, 7 Jul 2016 17:19:11 -0700
+Subject: x86/mm: Move swap offset/type up in PTE to work around erratum
+
+commit 00839ee3b299303c6a5e26a0a2485427a3afcbbf upstream.
+
+This erratum can result in Accessed/Dirty getting set by the hardware
+when we do not expect them to be (on !Present PTEs).
+
+Instead of trying to fix them up after this happens, we just
+allow the bits to get set and try to ignore them. We do this by
+shifting the layout of the bits we use for swap offset/type in
+our 64-bit PTEs.
+
+It looks like this:
+
+ bitnrs: | ... | 11| 10| 9|8|7|6|5| 4| 3|2|1|0|
+ names: | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P|
+ before: | OFFSET (9-63) |0|X|X| TYPE(1-5) |0|
+ after: | OFFSET (14-63) | TYPE (9-13) |0|X|X|X| X| X|X|X|0|
+
+Note that D was already a don't care (X) even before. We just
+move TYPE up and turn its old spot (which could be hit by the
+A bit) into all don't cares.
+
+We take 5 bits away from the offset, but that still leaves us
+with 50 bits which lets us index into a 62-bit swapfile (4 EiB).
+I think that's probably fine for the moment. We could
+theoretically reclaim 5 of the bits (1, 2, 3, 4, 7) but it
+doesn't gain us anything.
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave@sr71.net>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Luis R. Rodriguez <mcgrof@suse.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Toshi Kani <toshi.kani@hp.com>
+Cc: dave.hansen@intel.com
+Cc: linux-mm@kvack.org
+Cc: mhocko@suse.com
+Link: http://lkml.kernel.org/r/20160708001911.9A3FD2B6@viggo.jf.intel.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+[bwh: Backported to 3.16: Bit 9 may be reserved for PAGE_BIT_NUMA, which
+ no longer exists upstream. Adjust the bit numbers accordingly,
+ incorporating commit ace7fab7a6cd "x86/mm: Fix swap entry comment and
+ macro".]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -162,23 +162,37 @@ static inline int pgd_large(pgd_t pgd) {
+ #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
+ #define pte_unmap(pte) ((void)(pte))/* NOP */
+
+-/* Encode and de-code a swap entry */
+-#define SWP_TYPE_BITS 5
++/*
++ * Encode and de-code a swap entry
++ *
++ * | ... | 11| 10| 9|8|7|6|5| 4| 3|2|1|0| <- bit number
++ * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names
++ * | OFFSET (15->63) | TYPE (10-14) | 0 |0|X|X|X| X| X|X|X|0| <- swp entry
++ *
++ * G (8) is aliased and used as a PROT_NONE indicator for
++ * !present ptes. We need to start storing swap entries above
++ * there. We also need to avoid using A and D because of an
++ * erratum where they can be incorrectly set by hardware on
++ * non-present PTEs.
++ */
+ #ifdef CONFIG_NUMA_BALANCING
+ /* Automatic NUMA balancing needs to be distinguishable from swap entries */
+-#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 2)
++#define SWP_TYPE_FIRST_SHIFT (_PAGE_BIT_PROTNONE + 2)
+ #else
+-#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
++#define SWP_TYPE_FIRST_SHIFT (_PAGE_BIT_PROTNONE + 1)
+ #endif
++#define SWP_TYPE_BITS 5
++/* Place the offset above the type: */
++#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS)
+
+ #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
+
+-#define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \
++#define __swp_type(x) (((x).val >> (SWP_TYPE_FIRST_BIT)) \
+ & ((1U << SWP_TYPE_BITS) - 1))
+-#define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT)
++#define __swp_offset(x) ((x).val >> SWP_OFFSET_FIRST_BIT)
+ #define __swp_entry(type, offset) ((swp_entry_t) { \
+- ((type) << (_PAGE_BIT_PRESENT + 1)) \
+- | ((offset) << SWP_OFFSET_SHIFT) })
++ ((type) << (SWP_TYPE_FIRST_BIT)) \
++ | ((offset) << SWP_OFFSET_FIRST_BIT) })
+ #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) })
+ #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val })
+
diff --git a/queue-3.16/x86-mm-pat-make-set_memory_np-l1tf-safe.patch b/queue-3.16/x86-mm-pat-make-set_memory_np-l1tf-safe.patch
new file mode 100644
index 00000000..8cedb8d4
--- /dev/null
+++ b/queue-3.16/x86-mm-pat-make-set_memory_np-l1tf-safe.patch
@@ -0,0 +1,45 @@
+From: Andi Kleen <ak@linux.intel.com>
+Date: Tue, 7 Aug 2018 15:09:39 -0700
+Subject: x86/mm/pat: Make set_memory_np() L1TF safe
+
+commit 958f79b9ee55dfaf00c8106ed1c22a2919e0028b upstream
+
+set_memory_np() is used to mark kernel mappings not present, but it has
+it's own open coded mechanism which does not have the L1TF protection of
+inverting the address bits.
+
+Replace the open coded PTE manipulation with the L1TF protecting low level
+PTE routines.
+
+Passes the CPA self test.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+[bwh: Backported to 3.16:
+ - cpa->pfn is actually a physical address here and needs to be shifted to
+ produce a PFN
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+--- a/arch/x86/mm/pageattr.c
++++ b/arch/x86/mm/pageattr.c
+@@ -952,7 +952,8 @@ static int populate_pmd(struct cpa_data
+
+ pmd = pmd_offset(pud, start);
+
+- set_pmd(pmd, __pmd(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot)));
++ set_pmd(pmd, pmd_mkhuge(pfn_pmd(cpa->pfn >> PAGE_SHIFT,
++ canon_pgprot(pgprot))));
+
+ start += PMD_SIZE;
+ cpa->pfn += PMD_SIZE;
+@@ -1022,7 +1023,8 @@ static int populate_pud(struct cpa_data
+ * Map everything starting from the Gb boundary, possibly with 1G pages
+ */
+ while (end - start >= PUD_SIZE) {
+- set_pud(pud, __pud(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot)));
++ set_pud(pud, pud_mkhuge(pfn_pud(cpa->pfn >> PAGE_SHIFT,
++ canon_pgprot(pgprot))));
+
+ start += PUD_SIZE;
+ cpa->pfn += PUD_SIZE;
diff --git a/queue-3.16/x86-speculation-l1tf-add-sysfs-reporting-for-l1tf.patch b/queue-3.16/x86-speculation-l1tf-add-sysfs-reporting-for-l1tf.patch
new file mode 100644
index 00000000..889f66d7
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-add-sysfs-reporting-for-l1tf.patch
@@ -0,0 +1,235 @@
+From: Andi Kleen <ak@linux.intel.com>
+Date: Wed, 13 Jun 2018 15:48:26 -0700
+Subject: x86/speculation/l1tf: Add sysfs reporting for l1tf
+
+commit 17dbca119312b4e8173d4e25ff64262119fcef38 upstream
+
+L1TF core kernel workarounds are cheap and normally always enabled, However
+they still should be reported in sysfs if the system is vulnerable or
+mitigated. Add the necessary CPU feature/bug bits.
+
+- Extend the existing checks for Meltdowns to determine if the system is
+ vulnerable. All CPUs which are not vulnerable to Meltdown are also not
+ vulnerable to L1TF
+
+- Check for 32bit non PAE and emit a warning as there is no practical way
+ for mitigation due to the limited physical address bits
+
+- If the system has more than MAX_PA/2 physical memory the invert page
+ workarounds don't protect the system against the L1TF attack anymore,
+ because an inverted physical address will also point to valid
+ memory. Print a warning in this case and report that the system is
+ vulnerable.
+
+Add a function which returns the PFN limit for the L1TF mitigation, which
+will be used in follow up patches for sanity and range checks.
+
+[ tglx: Renamed the CPU feature bit to L1TF_PTEINV ]
+[ dwmw2: Backport to 4.9 (cpufeatures.h, E820) ]
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+[bwh: Backported to 3.16:
+ - Assign the next available bits from feature word 7 and bug word 0
+ - CONFIG_PGTABLE_LEVELS is not defined; use other config symbols in the
+ condition
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/cpufeature.h | 3 ++-
+ arch/x86/include/asm/processor.h | 5 ++++
+ arch/x86/kernel/cpu/bugs.c | 40 ++++++++++++++++++++++++++++++
+ arch/x86/kernel/cpu/common.c | 20 +++++++++++++++
+ drivers/base/cpu.c | 8 ++++++
+ include/linux/cpu.h | 2 ++
+ 6 files changed, 77 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -199,6 +199,7 @@
+ #define X86_FEATURE_MSR_SPEC_CTRL (7*32+19) /* "" MSR SPEC_CTRL is implemented */
+ #define X86_FEATURE_SSBD (7*32+20) /* Speculative Store Bypass Disable */
+ #define X86_FEATURE_ZEN (7*32+21) /* "" CPU is AMD family 0x17 (Zen) */
++#define X86_FEATURE_L1TF_PTEINV (7*32+22) /* "" L1TF workaround PTE inversion */
+
+ #define X86_FEATURE_RETPOLINE (7*32+29) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+ #define X86_FEATURE_RETPOLINE_AMD (7*32+30) /* "" AMD Retpoline mitigation for Spectre variant 2 */
+@@ -271,6 +272,7 @@
+ #define X86_BUG_SPECTRE_V1 X86_BUG(6) /* CPU is affected by Spectre variant 1 attack with conditional branches */
+ #define X86_BUG_SPECTRE_V2 X86_BUG(7) /* CPU is affected by Spectre variant 2 attack with indirect branches */
+ #define X86_BUG_SPEC_STORE_BYPASS X86_BUG(8) /* CPU is affected by speculative store bypass attack */
++#define X86_BUG_L1TF X86_BUG(9) /* CPU is affected by L1 Terminal Fault */
+
+ #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -165,6 +165,11 @@ extern const struct seq_operations cpuin
+ extern void cpu_detect(struct cpuinfo_x86 *c);
+ extern void fpu_detect(struct cpuinfo_x86 *c);
+
++static inline unsigned long l1tf_pfn_limit(void)
++{
++ return BIT(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT) - 1;
++}
++
+ extern void early_cpu_init(void);
+ extern void identify_boot_cpu(void);
+ extern void identify_secondary_cpu(struct cpuinfo_x86 *);
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -26,9 +26,11 @@
+ #include <asm/pgtable.h>
+ #include <asm/cacheflush.h>
+ #include <asm/intel-family.h>
++#include <asm/e820.h>
+
+ static void __init spectre_v2_select_mitigation(void);
+ static void __init ssb_select_mitigation(void);
++static void __init l1tf_select_mitigation(void);
+
+ /*
+ * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
+@@ -138,6 +140,8 @@ void __init check_bugs(void)
+ */
+ ssb_select_mitigation();
+
++ l1tf_select_mitigation();
++
+ #ifdef CONFIG_X86_32
+ /*
+ * Check whether we are able to run this kernel safely on SMP.
+@@ -266,6 +270,32 @@ static void x86_amd_ssb_disable(void)
+ wrmsrl(MSR_AMD64_LS_CFG, msrval);
+ }
+
++static void __init l1tf_select_mitigation(void)
++{
++ u64 half_pa;
++
++ if (!boot_cpu_has_bug(X86_BUG_L1TF))
++ return;
++
++#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
++ pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n");
++ return;
++#endif
++
++ /*
++ * This is extremely unlikely to happen because almost all
++ * systems have far more MAX_PA/2 than RAM can be fit into
++ * DIMM slots.
++ */
++ half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
++ if (e820_any_mapped(half_pa, ULLONG_MAX - half_pa, E820_RAM)) {
++ pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
++ return;
++ }
++
++ setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV);
++}
++
+ #ifdef RETPOLINE
+ static bool spectre_v2_bad_module;
+
+@@ -718,6 +748,11 @@ static ssize_t cpu_show_common(struct de
+ case X86_BUG_SPEC_STORE_BYPASS:
+ return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
+
++ case X86_BUG_L1TF:
++ if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV))
++ return sprintf(buf, "Mitigation: Page Table Inversion\n");
++ break;
++
+ default:
+ break;
+ }
+@@ -744,4 +779,9 @@ ssize_t cpu_show_spec_store_bypass(struc
+ {
+ return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS);
+ }
++
++ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf)
++{
++ return cpu_show_common(dev, attr, buf, X86_BUG_L1TF);
++}
+ #endif
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -842,6 +842,21 @@ static const __initconst struct x86_cpu_
+ {}
+ };
+
++static const __initconst struct x86_cpu_id cpu_no_l1tf[] = {
++ /* in addition to cpu_no_speculation */
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MOOREFIELD },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_DENVERTON },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GEMINI_LAKE },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM },
++ {}
++};
++
+ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
+ {
+ u64 ia32_cap = 0;
+@@ -867,6 +882,11 @@ static void __init cpu_set_bug_bits(stru
+ return;
+
+ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
++
++ if (x86_match_cpu(cpu_no_l1tf))
++ return;
++
++ setup_force_cpu_bug(X86_BUG_L1TF);
+ }
+
+ /*
+--- a/drivers/base/cpu.c
++++ b/drivers/base/cpu.c
+@@ -444,16 +444,24 @@ ssize_t __weak cpu_show_spec_store_bypas
+ return sprintf(buf, "Not affected\n");
+ }
+
++ssize_t __weak cpu_show_l1tf(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ return sprintf(buf, "Not affected\n");
++}
++
+ static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
+ static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
+ static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
+ static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
++static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
+
+ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
+ &dev_attr_meltdown.attr,
+ &dev_attr_spectre_v1.attr,
+ &dev_attr_spectre_v2.attr,
+ &dev_attr_spec_store_bypass.attr,
++ &dev_attr_l1tf.attr,
+ NULL
+ };
+
+--- a/include/linux/cpu.h
++++ b/include/linux/cpu.h
+@@ -47,6 +47,8 @@ extern ssize_t cpu_show_spectre_v2(struc
+ struct device_attribute *attr, char *buf);
+ extern ssize_t cpu_show_spec_store_bypass(struct device *dev,
+ struct device_attribute *attr, char *buf);
++extern ssize_t cpu_show_l1tf(struct device *dev,
++ struct device_attribute *attr, char *buf);
+
+ #ifdef CONFIG_HOTPLUG_CPU
+ extern void unregister_cpu(struct cpu *cpu);
diff --git a/queue-3.16/x86-speculation-l1tf-change-order-of-offset-type-in-swap-entry.patch b/queue-3.16/x86-speculation-l1tf-change-order-of-offset-type-in-swap-entry.patch
new file mode 100644
index 00000000..83bf5c3e
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-change-order-of-offset-type-in-swap-entry.patch
@@ -0,0 +1,108 @@
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Wed, 13 Jun 2018 15:48:22 -0700
+Subject: x86/speculation/l1tf: Change order of offset/type in swap entry
+
+commit bcd11afa7adad8d720e7ba5ef58bdcd9775cf45f upstream.
+
+If pages are swapped out, the swap entry is stored in the corresponding
+PTE, which has the Present bit cleared. CPUs vulnerable to L1TF speculate
+on PTE entries which have the present bit set and would treat the swap
+entry as phsyical address (PFN). To mitigate that the upper bits of the PTE
+must be set so the PTE points to non existent memory.
+
+The swap entry stores the type and the offset of a swapped out page in the
+PTE. type is stored in bit 9-13 and offset in bit 14-63. The hardware
+ignores the bits beyond the phsyical address space limit, so to make the
+mitigation effective its required to start 'offset' at the lowest possible
+bit so that even large swap offsets do not reach into the physical address
+space limit bits.
+
+Move offset to bit 9-58 and type to bit 59-63 which are the bits that
+hardware generally doesn't care about.
+
+That, in turn, means that if you on desktop chip with only 40 bits of
+physical addressing, now that the offset starts at bit 9, there needs to be
+30 bits of offset actually *in use* until bit 39 ends up being set, which
+means when inverted it will again point into existing memory.
+
+So that's 4 terabyte of swap space (because the offset is counted in pages,
+so 30 bits of offset is 42 bits of actual coverage). With bigger physical
+addressing, that obviously grows further, until the limit of the offset is
+hit (at 50 bits of offset - 62 bits of actual swap file coverage).
+
+This is a preparatory change for the actual swap entry inversion to protect
+against L1TF.
+
+[ AK: Updated description and minor tweaks. Split into two parts ]
+[ tglx: Massaged changelog ]
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Andi Kleen <ak@linux.intel.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+[bwh: Backported to 3.16: Bit 9 may be reserved for PAGE_BIT_NUMA here]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/pgtable_64.h | 31 ++++++++++++++++++++-----------
+ 1 file changed, 20 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -167,7 +167,7 @@ static inline int pgd_large(pgd_t pgd) {
+ *
+ * | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number
+ * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names
+- * | OFFSET (15->63) | TYPE (10-14) | 0 |0|0|X|X| X| X|X|SD|0| <- swp entry
++ * | TYPE (59-63) | OFFSET (10-58) | 0 |0|0|X|X| X| X|X|SD|0| <- swp entry
+ *
+ * G (8) is aliased and used as a PROT_NONE indicator for
+ * !present ptes. We need to start storing swap entries above
+@@ -181,24 +181,33 @@ static inline int pgd_large(pgd_t pgd) {
+ * Bit 7 in swp entry should be 0 because pmd_present checks not only P,
+ * but also L and G.
+ */
++#define SWP_TYPE_BITS 5
++
+ #ifdef CONFIG_NUMA_BALANCING
+ /* Automatic NUMA balancing needs to be distinguishable from swap entries */
+-#define SWP_TYPE_FIRST_SHIFT (_PAGE_BIT_PROTNONE + 2)
++#define SWP_OFFSET_FIRST_BIT (_PAGE_BIT_PROTNONE + 2)
+ #else
+-#define SWP_TYPE_FIRST_SHIFT (_PAGE_BIT_PROTNONE + 1)
++#define SWP_OFFSET_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
+ #endif
+-#define SWP_TYPE_BITS 5
+-/* Place the offset above the type: */
+-#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS)
++
++/* We always extract/encode the offset by shifting it all the way up, and then down again */
++#define SWP_OFFSET_SHIFT (SWP_OFFSET_FIRST_BIT+SWP_TYPE_BITS)
+
+ #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
+
+-#define __swp_type(x) (((x).val >> (SWP_TYPE_FIRST_BIT)) \
+- & ((1U << SWP_TYPE_BITS) - 1))
+-#define __swp_offset(x) ((x).val >> SWP_OFFSET_FIRST_BIT)
+-#define __swp_entry(type, offset) ((swp_entry_t) { \
+- ((type) << (SWP_TYPE_FIRST_BIT)) \
+- | ((offset) << SWP_OFFSET_FIRST_BIT) })
++/* Extract the high bits for type */
++#define __swp_type(x) ((x).val >> (64 - SWP_TYPE_BITS))
++
++/* Shift up (to get rid of type), then down to get value */
++#define __swp_offset(x) ((x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT)
++
++/*
++ * Shift the offset up "too far" by TYPE bits, then down again
++ */
++#define __swp_entry(type, offset) ((swp_entry_t) { \
++ ((unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \
++ | ((unsigned long)(type) << (64-SWP_TYPE_BITS)) })
++
+ #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) })
+ #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val })
+
diff --git a/queue-3.16/x86-speculation-l1tf-disallow-non-privileged-high-mmio-prot_none.patch b/queue-3.16/x86-speculation-l1tf-disallow-non-privileged-high-mmio-prot_none.patch
new file mode 100644
index 00000000..38919fde
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-disallow-non-privileged-high-mmio-prot_none.patch
@@ -0,0 +1,274 @@
+From: Andi Kleen <ak@linux.intel.com>
+Date: Wed, 13 Jun 2018 15:48:27 -0700
+Subject: x86/speculation/l1tf: Disallow non privileged high MMIO PROT_NONE
+ mappings
+
+commit 42e4089c7890725fcd329999252dc489b72f2921 upstream
+
+For L1TF PROT_NONE mappings are protected by inverting the PFN in the page
+table entry. This sets the high bits in the CPU's address space, thus
+making sure to point to not point an unmapped entry to valid cached memory.
+
+Some server system BIOSes put the MMIO mappings high up in the physical
+address space. If such an high mapping was mapped to unprivileged users
+they could attack low memory by setting such a mapping to PROT_NONE. This
+could happen through a special device driver which is not access
+protected. Normal /dev/mem is of course access protected.
+
+To avoid this forbid PROT_NONE mappings or mprotect for high MMIO mappings.
+
+Valid page mappings are allowed because the system is then unsafe anyways.
+
+It's not expected that users commonly use PROT_NONE on MMIO. But to
+minimize any impact this is only enforced if the mapping actually refers to
+a high MMIO address (defined as the MAX_PA-1 bit being set), and also skip
+the check for root.
+
+For mmaps this is straight forward and can be handled in vm_insert_pfn and
+in remap_pfn_range().
+
+For mprotect it's a bit trickier. At the point where the actual PTEs are
+accessed a lot of state has been changed and it would be difficult to undo
+on an error. Since this is a uncommon case use a separate early page talk
+walk pass for MMIO PROT_NONE mappings that checks for this condition
+early. For non MMIO and non PROT_NONE there are no changes.
+
+[dwmw2: Backport to 4.9]
+[groeck: Backport to 4.4]
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/pgtable.h | 8 ++++++
+ arch/x86/mm/mmap.c | 21 +++++++++++++++
+ include/asm-generic/pgtable.h | 12 +++++++++
+ mm/memory.c | 29 +++++++++++++++-----
+ mm/mprotect.c | 49 ++++++++++++++++++++++++++++++++++
+ 5 files changed, 112 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -940,6 +940,14 @@ static inline pte_t pte_swp_clear_soft_d
+ }
+ #endif
+
++#define __HAVE_ARCH_PFN_MODIFY_ALLOWED 1
++extern bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot);
++
++static inline bool arch_has_pfn_modify_check(void)
++{
++ return boot_cpu_has_bug(X86_BUG_L1TF);
++}
++
+ #include <asm-generic/pgtable.h>
+ #endif /* __ASSEMBLY__ */
+
+--- a/arch/x86/mm/mmap.c
++++ b/arch/x86/mm/mmap.c
+@@ -114,3 +114,24 @@ void arch_pick_mmap_layout(struct mm_str
+ mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+ }
+ }
++
++/*
++ * Only allow root to set high MMIO mappings to PROT_NONE.
++ * This prevents an unpriv. user to set them to PROT_NONE and invert
++ * them, then pointing to valid memory for L1TF speculation.
++ *
++ * Note: for locked down kernels may want to disable the root override.
++ */
++bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
++{
++ if (!boot_cpu_has_bug(X86_BUG_L1TF))
++ return true;
++ if (!__pte_needs_invert(pgprot_val(prot)))
++ return true;
++ /* If it's real memory always allow */
++ if (pfn_valid(pfn))
++ return true;
++ if (pfn > l1tf_pfn_limit() && !capable(CAP_SYS_ADMIN))
++ return false;
++ return true;
++}
+--- a/include/asm-generic/pgtable.h
++++ b/include/asm-generic/pgtable.h
+@@ -812,4 +812,16 @@ static inline void pmdp_set_numa(struct
+ #define io_remap_pfn_range remap_pfn_range
+ #endif
+
++#ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED
++static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
++{
++ return true;
++}
++
++static inline bool arch_has_pfn_modify_check(void)
++{
++ return false;
++}
++#endif
++
+ #endif /* _ASM_GENERIC_PGTABLE_H */
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -1614,6 +1614,9 @@ int vm_insert_pfn_prot(struct vm_area_st
+ if (track_pfn_insert(vma, &pgprot, pfn))
+ return -EINVAL;
+
++ if (!pfn_modify_allowed(pfn, pgprot))
++ return -EACCES;
++
+ ret = insert_pfn(vma, addr, pfn, pgprot);
+
+ return ret;
+@@ -1632,6 +1635,9 @@ int vm_insert_mixed(struct vm_area_struc
+ if (track_pfn_insert(vma, &pgprot, pfn))
+ return -EINVAL;
+
++ if (!pfn_modify_allowed(pfn, pgprot))
++ return -EACCES;
++
+ /*
+ * If we don't have pte special, then we have to use the pfn_valid()
+ * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must*
+@@ -1660,6 +1666,7 @@ static int remap_pte_range(struct mm_str
+ {
+ pte_t *pte;
+ spinlock_t *ptl;
++ int err = 0;
+
+ pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
+ if (!pte)
+@@ -1667,12 +1674,16 @@ static int remap_pte_range(struct mm_str
+ arch_enter_lazy_mmu_mode();
+ do {
+ BUG_ON(!pte_none(*pte));
++ if (!pfn_modify_allowed(pfn, prot)) {
++ err = -EACCES;
++ break;
++ }
+ set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot)));
+ pfn++;
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ arch_leave_lazy_mmu_mode();
+ pte_unmap_unlock(pte - 1, ptl);
+- return 0;
++ return err;
+ }
+
+ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
+@@ -1681,6 +1692,7 @@ static inline int remap_pmd_range(struct
+ {
+ pmd_t *pmd;
+ unsigned long next;
++ int err;
+
+ pfn -= addr >> PAGE_SHIFT;
+ pmd = pmd_alloc(mm, pud, addr);
+@@ -1689,9 +1701,10 @@ static inline int remap_pmd_range(struct
+ VM_BUG_ON(pmd_trans_huge(*pmd));
+ do {
+ next = pmd_addr_end(addr, end);
+- if (remap_pte_range(mm, pmd, addr, next,
+- pfn + (addr >> PAGE_SHIFT), prot))
+- return -ENOMEM;
++ err = remap_pte_range(mm, pmd, addr, next,
++ pfn + (addr >> PAGE_SHIFT), prot);
++ if (err)
++ return err;
+ } while (pmd++, addr = next, addr != end);
+ return 0;
+ }
+@@ -1702,6 +1715,7 @@ static inline int remap_pud_range(struct
+ {
+ pud_t *pud;
+ unsigned long next;
++ int err;
+
+ pfn -= addr >> PAGE_SHIFT;
+ pud = pud_alloc(mm, pgd, addr);
+@@ -1709,9 +1723,10 @@ static inline int remap_pud_range(struct
+ return -ENOMEM;
+ do {
+ next = pud_addr_end(addr, end);
+- if (remap_pmd_range(mm, pud, addr, next,
+- pfn + (addr >> PAGE_SHIFT), prot))
+- return -ENOMEM;
++ err = remap_pmd_range(mm, pud, addr, next,
++ pfn + (addr >> PAGE_SHIFT), prot);
++ if (err)
++ return err;
+ } while (pud++, addr = next, addr != end);
+ return 0;
+ }
+--- a/mm/mprotect.c
++++ b/mm/mprotect.c
+@@ -258,6 +258,42 @@ unsigned long change_protection(struct v
+ return pages;
+ }
+
++static int prot_none_pte_entry(pte_t *pte, unsigned long addr,
++ unsigned long next, struct mm_walk *walk)
++{
++ return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ?
++ 0 : -EACCES;
++}
++
++static int prot_none_hugetlb_entry(pte_t *pte, unsigned long hmask,
++ unsigned long addr, unsigned long next,
++ struct mm_walk *walk)
++{
++ return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ?
++ 0 : -EACCES;
++}
++
++static int prot_none_test(unsigned long addr, unsigned long next,
++ struct mm_walk *walk)
++{
++ return 0;
++}
++
++static int prot_none_walk(struct vm_area_struct *vma, unsigned long start,
++ unsigned long end, unsigned long newflags)
++{
++ pgprot_t new_pgprot = vm_get_page_prot(newflags);
++ struct mm_walk prot_none_walk = {
++ .pte_entry = prot_none_pte_entry,
++ .hugetlb_entry = prot_none_hugetlb_entry,
++ .test_walk = prot_none_test,
++ .mm = current->mm,
++ .private = &new_pgprot,
++ };
++
++ return walk_page_range(start, end, &prot_none_walk);
++}
++
+ int
+ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
+ unsigned long start, unsigned long end, unsigned long newflags)
+@@ -276,6 +312,19 @@ mprotect_fixup(struct vm_area_struct *vm
+ }
+
+ /*
++ * Do PROT_NONE PFN permission checks here when we can still
++ * bail out without undoing a lot of state. This is a rather
++ * uncommon case, so doesn't need to be very optimized.
++ */
++ if (arch_has_pfn_modify_check() &&
++ (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) &&
++ (newflags & (VM_READ|VM_WRITE|VM_EXEC)) == 0) {
++ error = prot_none_walk(vma, start, end, newflags);
++ if (error)
++ return error;
++ }
++
++ /*
+ * If we make a private mapping writable we increase our commit;
+ * but (without finer accounting) cannot reduce our commit if we
+ * make it unwritable again. hugetlb mapping were accounted for
diff --git a/queue-3.16/x86-speculation-l1tf-exempt-zeroed-ptes-from-inversion.patch b/queue-3.16/x86-speculation-l1tf-exempt-zeroed-ptes-from-inversion.patch
new file mode 100644
index 00000000..0fdd9585
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-exempt-zeroed-ptes-from-inversion.patch
@@ -0,0 +1,69 @@
+From: Sean Christopherson <sean.j.christopherson@intel.com>
+Date: Fri, 17 Aug 2018 10:27:36 -0700
+Subject: x86/speculation/l1tf: Exempt zeroed PTEs from inversion
+
+commit f19f5c49bbc3ffcc9126cc245fc1b24cc29f4a37 upstream.
+
+It turns out that we should *not* invert all not-present mappings,
+because the all zeroes case is obviously special.
+
+clear_page() does not undergo the XOR logic to invert the address bits,
+i.e. PTE, PMD and PUD entries that have not been individually written
+will have val=0 and so will trigger __pte_needs_invert(). As a result,
+{pte,pmd,pud}_pfn() will return the wrong PFN value, i.e. all ones
+(adjusted by the max PFN mask) instead of zero. A zeroed entry is ok
+because the page at physical address 0 is reserved early in boot
+specifically to mitigate L1TF, so explicitly exempt them from the
+inversion when reading the PFN.
+
+Manifested as an unexpected mprotect(..., PROT_NONE) failure when called
+on a VMA that has VM_PFNMAP and was mmap'd to as something other than
+PROT_NONE but never used. mprotect() sends the PROT_NONE request down
+prot_none_walk(), which walks the PTEs to check the PFNs.
+prot_none_pte_entry() gets the bogus PFN from pte_pfn() and returns
+-EACCES because it thinks mprotect() is trying to adjust a high MMIO
+address.
+
+[ This is a very modified version of Sean's original patch, but all
+ credit goes to Sean for doing this and also pointing out that
+ sometimes the __pte_needs_invert() function only gets the protection
+ bits, not the full eventual pte. But zero remains special even in
+ just protection bits, so that's ok. - Linus ]
+
+Fixes: f22cc87f6c1f ("x86/speculation/l1tf: Invert all not present mappings")
+Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
+Acked-by: Andi Kleen <ak@linux.intel.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/pgtable-invert.h | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/pgtable-invert.h
++++ b/arch/x86/include/asm/pgtable-invert.h
+@@ -4,9 +4,18 @@
+
+ #ifndef __ASSEMBLY__
+
++/*
++ * A clear pte value is special, and doesn't get inverted.
++ *
++ * Note that even users that only pass a pgprot_t (rather
++ * than a full pte) won't trigger the special zero case,
++ * because even PAGE_NONE has _PAGE_PROTNONE | _PAGE_ACCESSED
++ * set. So the all zero case really is limited to just the
++ * cleared page table entry case.
++ */
+ static inline bool __pte_needs_invert(u64 val)
+ {
+- return !(val & _PAGE_PRESENT);
++ return val && !(val & _PAGE_PRESENT);
+ }
+
+ /* Get a mask to xor with the page table entry to get the correct pfn. */
diff --git a/queue-3.16/x86-speculation-l1tf-extend-64bit-swap-file-size-limit.patch b/queue-3.16/x86-speculation-l1tf-extend-64bit-swap-file-size-limit.patch
new file mode 100644
index 00000000..96858d10
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-extend-64bit-swap-file-size-limit.patch
@@ -0,0 +1,42 @@
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Thu, 21 Jun 2018 12:36:29 +0200
+Subject: x86/speculation/l1tf: Extend 64bit swap file size limit
+
+commit 1a7ed1ba4bba6c075d5ad61bb75e3fbc870840d6 upstream.
+
+The previous patch has limited swap file size so that large offsets cannot
+clear bits above MAX_PA/2 in the pte and interfere with L1TF mitigation.
+
+It assumed that offsets are encoded starting with bit 12, same as pfn. But
+on x86_64, offsets are encoded starting with bit 9.
+
+Thus the limit can be raised by 3 bits. That means 16TB with 42bit MAX_PA
+and 256TB with 46bit MAX_PA.
+
+Fixes: 377eeaa8e11f ("x86/speculation/l1tf: Limit swap file size to MAX_PA/2")
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/mm/init.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -710,7 +710,15 @@ unsigned long max_swapfile_size(void)
+
+ if (boot_cpu_has_bug(X86_BUG_L1TF)) {
+ /* Limit the swap file size to MAX_PA/2 for L1TF workaround */
+- pages = min_t(unsigned long, l1tf_pfn_limit() + 1, pages);
++ unsigned long l1tf_limit = l1tf_pfn_limit() + 1;
++ /*
++ * We encode swap offsets also with 3 bits below those for pfn
++ * which makes the usable limit higher.
++ */
++#ifdef CONFIG_X86_64
++ l1tf_limit <<= PAGE_SHIFT - SWP_OFFSET_FIRST_BIT;
++#endif
++ pages = min_t(unsigned long, l1tf_limit, pages);
+ }
+ return pages;
+ }
diff --git a/queue-3.16/x86-speculation-l1tf-fix-off-by-one-error-when-warning-that-system.patch b/queue-3.16/x86-speculation-l1tf-fix-off-by-one-error-when-warning-that-system.patch
new file mode 100644
index 00000000..0b76a3fb
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-fix-off-by-one-error-when-warning-that-system.patch
@@ -0,0 +1,78 @@
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Thu, 23 Aug 2018 15:44:18 +0200
+Subject: x86/speculation/l1tf: Fix off-by-one error when warning that system
+ has too much RAM
+
+commit b0a182f875689647b014bc01d36b340217792852 upstream.
+
+Two users have reported [1] that they have an "extremely unlikely" system
+with more than MAX_PA/2 memory and L1TF mitigation is not effective. In
+fact it's a CPU with 36bits phys limit (64GB) and 32GB memory, but due to
+holes in the e820 map, the main region is almost 500MB over the 32GB limit:
+
+[ 0.000000] BIOS-e820: [mem 0x0000000100000000-0x000000081effffff] usable
+
+Suggestions to use 'mem=32G' to enable the L1TF mitigation while losing the
+500MB revealed, that there's an off-by-one error in the check in
+l1tf_select_mitigation().
+
+l1tf_pfn_limit() returns the last usable pfn (inclusive) and the range
+check in the mitigation path does not take this into account.
+
+Instead of amending the range check, make l1tf_pfn_limit() return the first
+PFN which is over the limit which is less error prone. Adjust the other
+users accordingly.
+
+[1] https://bugzilla.suse.com/show_bug.cgi?id=1105536
+
+Fixes: 17dbca119312 ("x86/speculation/l1tf: Add sysfs reporting for l1tf")
+Reported-by: George Anchev <studio@anchev.net>
+Reported-by: Christopher Snowhill <kode54@gmail.com>
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: "H . Peter Anvin" <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Michal Hocko <mhocko@kernel.org>
+Link: https://lkml.kernel.org/r/20180823134418.17008-1-vbabka@suse.cz
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/processor.h | 2 +-
+ arch/x86/mm/init.c | 2 +-
+ arch/x86/mm/mmap.c | 2 +-
+ 3 files changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -167,7 +167,7 @@ extern void fpu_detect(struct cpuinfo_x8
+
+ static inline unsigned long long l1tf_pfn_limit(void)
+ {
+- return BIT_ULL(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT) - 1;
++ return BIT_ULL(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT);
+ }
+
+ extern void early_cpu_init(void);
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -710,7 +710,7 @@ unsigned long max_swapfile_size(void)
+
+ if (boot_cpu_has_bug(X86_BUG_L1TF)) {
+ /* Limit the swap file size to MAX_PA/2 for L1TF workaround */
+- unsigned long long l1tf_limit = l1tf_pfn_limit() + 1;
++ unsigned long long l1tf_limit = l1tf_pfn_limit();
+ /*
+ * We encode swap offsets also with 3 bits below those for pfn
+ * which makes the usable limit higher.
+--- a/arch/x86/mm/mmap.c
++++ b/arch/x86/mm/mmap.c
+@@ -131,7 +131,7 @@ bool pfn_modify_allowed(unsigned long pf
+ /* If it's real memory always allow */
+ if (pfn_valid(pfn))
+ return true;
+- if (pfn > l1tf_pfn_limit() && !capable(CAP_SYS_ADMIN))
++ if (pfn >= l1tf_pfn_limit() && !capable(CAP_SYS_ADMIN))
+ return false;
+ return true;
+ }
diff --git a/queue-3.16/x86-speculation-l1tf-fix-overflow-in-l1tf_pfn_limit-on-32bit.patch b/queue-3.16/x86-speculation-l1tf-fix-overflow-in-l1tf_pfn_limit-on-32bit.patch
new file mode 100644
index 00000000..9fbc2d17
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-fix-overflow-in-l1tf_pfn_limit-on-32bit.patch
@@ -0,0 +1,70 @@
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Mon, 20 Aug 2018 11:58:35 +0200
+Subject: x86/speculation/l1tf: Fix overflow in l1tf_pfn_limit() on 32bit
+
+commit 9df9516940a61d29aedf4d91b483ca6597e7d480 upstream.
+
+On 32bit PAE kernels on 64bit hardware with enough physical bits,
+l1tf_pfn_limit() will overflow unsigned long. This in turn affects
+max_swapfile_size() and can lead to swapon returning -EINVAL. This has been
+observed in a 32bit guest with 42 bits physical address size, where
+max_swapfile_size() overflows exactly to 1 << 32, thus zero, and produces
+the following warning to dmesg:
+
+[ 6.396845] Truncating oversized swap area, only using 0k out of 2047996k
+
+Fix this by using unsigned long long instead.
+
+Fixes: 17dbca119312 ("x86/speculation/l1tf: Add sysfs reporting for l1tf")
+Fixes: 377eeaa8e11f ("x86/speculation/l1tf: Limit swap file size to MAX_PA/2")
+Reported-by: Dominique Leuenberger <dimstar@suse.de>
+Reported-by: Adrian Schroeter <adrian@suse.de>
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Andi Kleen <ak@linux.intel.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: "H . Peter Anvin" <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Michal Hocko <mhocko@kernel.org>
+Link: https://lkml.kernel.org/r/20180820095835.5298-1-vbabka@suse.cz
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/processor.h | 4 ++--
+ arch/x86/mm/init.c | 4 ++--
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -165,9 +165,9 @@ extern const struct seq_operations cpuin
+ extern void cpu_detect(struct cpuinfo_x86 *c);
+ extern void fpu_detect(struct cpuinfo_x86 *c);
+
+-static inline unsigned long l1tf_pfn_limit(void)
++static inline unsigned long long l1tf_pfn_limit(void)
+ {
+- return BIT(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT) - 1;
++ return BIT_ULL(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT) - 1;
+ }
+
+ extern void early_cpu_init(void);
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -710,7 +710,7 @@ unsigned long max_swapfile_size(void)
+
+ if (boot_cpu_has_bug(X86_BUG_L1TF)) {
+ /* Limit the swap file size to MAX_PA/2 for L1TF workaround */
+- unsigned long l1tf_limit = l1tf_pfn_limit() + 1;
++ unsigned long long l1tf_limit = l1tf_pfn_limit() + 1;
+ /*
+ * We encode swap offsets also with 3 bits below those for pfn
+ * which makes the usable limit higher.
+@@ -718,7 +718,7 @@ unsigned long max_swapfile_size(void)
+ #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+ l1tf_limit <<= PAGE_SHIFT - SWP_OFFSET_FIRST_BIT;
+ #endif
+- pages = min_t(unsigned long, l1tf_limit, pages);
++ pages = min_t(unsigned long long, l1tf_limit, pages);
+ }
+ return pages;
+ }
diff --git a/queue-3.16/x86-speculation-l1tf-fix-up-pte-pfn-conversion-for-pae.patch b/queue-3.16/x86-speculation-l1tf-fix-up-pte-pfn-conversion-for-pae.patch
new file mode 100644
index 00000000..96858342
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-fix-up-pte-pfn-conversion-for-pae.patch
@@ -0,0 +1,81 @@
+From: Michal Hocko <mhocko@suse.cz>
+Date: Wed, 27 Jun 2018 17:46:50 +0200
+Subject: x86/speculation/l1tf: Fix up pte->pfn conversion for PAE
+
+commit e14d7dfb41f5807a0c1c26a13f2b8ef16af24935 upstream
+
+Jan has noticed that pte_pfn and co. resp. pfn_pte are incorrect for
+CONFIG_PAE because phys_addr_t is wider than unsigned long and so the
+pte_val reps. shift left would get truncated. Fix this up by using proper
+types.
+
+[dwmw2: Backport to 4.9]
+
+Fixes: 6b28baca9b1f ("x86/speculation/l1tf: Protect PROT_NONE PTEs against speculation")
+Reported-by: Jan Beulich <JBeulich@suse.com>
+Signed-off-by: Michal Hocko <mhocko@suse.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+[bwh: Backported to 3.16: Adjust context. Also restore the fix to pfn_pud().]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/pgtable.h | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -147,21 +147,21 @@ static inline u64 protnone_mask(u64 val)
+
+ static inline unsigned long pte_pfn(pte_t pte)
+ {
+- unsigned long pfn = pte_val(pte);
++ phys_addr_t pfn = pte_val(pte);
+ pfn ^= protnone_mask(pfn);
+ return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT;
+ }
+
+ static inline unsigned long pmd_pfn(pmd_t pmd)
+ {
+- unsigned long pfn = pmd_val(pmd);
++ phys_addr_t pfn = pmd_val(pmd);
+ pfn ^= protnone_mask(pfn);
+ return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT;
+ }
+
+ static inline unsigned long pud_pfn(pud_t pud)
+ {
+- unsigned long pfn = pud_val(pud);
++ phys_addr_t pfn = pud_val(pud);
+ pfn ^= protnone_mask(pfn);
+ return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT;
+ }
+@@ -371,7 +371,7 @@ static inline pgprotval_t massage_pgprot
+
+ static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
+ {
+- phys_addr_t pfn = page_nr << PAGE_SHIFT;
++ phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
+ pfn ^= protnone_mask(pgprot_val(pgprot));
+ pfn &= PTE_PFN_MASK;
+ return __pte(pfn | massage_pgprot(pgprot));
+@@ -379,7 +379,7 @@ static inline pte_t pfn_pte(unsigned lon
+
+ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
+ {
+- phys_addr_t pfn = page_nr << PAGE_SHIFT;
++ phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
+ pfn ^= protnone_mask(pgprot_val(pgprot));
+ pfn &= PTE_PFN_MASK;
+ return __pmd(pfn | massage_pgprot(pgprot));
+@@ -387,7 +387,7 @@ static inline pmd_t pfn_pmd(unsigned lon
+
+ static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot)
+ {
+- phys_addr_t pfn = page_nr << PAGE_SHIFT;
++ phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
+ pfn ^= protnone_mask(pgprot_val(pgprot));
+ pfn &= PTE_PFN_MASK;
+ return __pud(pfn | massage_pgprot(pgprot));
diff --git a/queue-3.16/x86-speculation-l1tf-increase-32bit-pae-__physical_page_shift.patch b/queue-3.16/x86-speculation-l1tf-increase-32bit-pae-__physical_page_shift.patch
new file mode 100644
index 00000000..ef156edc
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-increase-32bit-pae-__physical_page_shift.patch
@@ -0,0 +1,77 @@
+From: Andi Kleen <ak@linux.intel.com>
+Date: Wed, 13 Jun 2018 15:48:21 -0700
+Subject: x86/speculation/l1tf: Increase 32bit PAE __PHYSICAL_PAGE_SHIFT
+
+commit 50896e180c6aa3a9c61a26ced99e15d602666a4c upstream.
+
+L1 Terminal Fault (L1TF) is a speculation related vulnerability. The CPU
+speculates on PTE entries which do not have the PRESENT bit set, if the
+content of the resulting physical address is available in the L1D cache.
+
+The OS side mitigation makes sure that a !PRESENT PTE entry points to a
+physical address outside the actually existing and cachable memory
+space. This is achieved by inverting the upper bits of the PTE. Due to the
+address space limitations this only works for 64bit and 32bit PAE kernels,
+but not for 32bit non PAE.
+
+This mitigation applies to both host and guest kernels, but in case of a
+64bit host (hypervisor) and a 32bit PAE guest, inverting the upper bits of
+the PAE address space (44bit) is not enough if the host has more than 43
+bits of populated memory address space, because the speculation treats the
+PTE content as a physical host address bypassing EPT.
+
+The host (hypervisor) protects itself against the guest by flushing L1D as
+needed, but pages inside the guest are not protected against attacks from
+other processes inside the same guest.
+
+For the guest the inverted PTE mask has to match the host to provide the
+full protection for all pages the host could possibly map into the
+guest. The hosts populated address space is not known to the guest, so the
+mask must cover the possible maximal host address space, i.e. 52 bit.
+
+On 32bit PAE the maximum PTE mask is currently set to 44 bit because that
+is the limit imposed by 32bit unsigned long PFNs in the VMs. This limits
+the mask to be below what the host could possible use for physical pages.
+
+The L1TF PROT_NONE protection code uses the PTE masks to determine which
+bits to invert to make sure the higher bits are set for unmapped entries to
+prevent L1TF speculation attacks against EPT inside guests.
+
+In order to invert all bits that could be used by the host, increase
+__PHYSICAL_PAGE_SHIFT to 52 to match 64bit.
+
+The real limit for a 32bit PAE kernel is still 44 bits because all Linux
+PTEs are created from unsigned long PFNs, so they cannot be higher than 44
+bits on a 32bit kernel. So these extra PFN bits should be never set. The
+only users of this macro are using it to look at PTEs, so it's safe.
+
+[ tglx: Massaged changelog ]
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/page_32_types.h | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/page_32_types.h
++++ b/arch/x86/include/asm/page_32_types.h
+@@ -27,8 +27,13 @@
+ #define N_EXCEPTION_STACKS 1
+
+ #ifdef CONFIG_X86_PAE
+-/* 44=32+12, the limit we can fit into an unsigned long pfn */
+-#define __PHYSICAL_MASK_SHIFT 44
++/*
++ * This is beyond the 44 bit limit imposed by the 32bit long pfns,
++ * but we need the full mask to make sure inverted PROT_NONE
++ * entries have all the host bits set in a guest.
++ * The real limit is still 44 bits.
++ */
++#define __PHYSICAL_MASK_SHIFT 52
+ #define __VIRTUAL_MASK_SHIFT 32
+
+ #else /* !CONFIG_X86_PAE */
diff --git a/queue-3.16/x86-speculation-l1tf-invert-all-not-present-mappings.patch b/queue-3.16/x86-speculation-l1tf-invert-all-not-present-mappings.patch
new file mode 100644
index 00000000..9a5eb67e
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-invert-all-not-present-mappings.patch
@@ -0,0 +1,31 @@
+From: Andi Kleen <ak@linux.intel.com>
+Date: Tue, 7 Aug 2018 15:09:36 -0700
+Subject: x86/speculation/l1tf: Invert all not present mappings
+
+commit f22cc87f6c1f771b57c407555cfefd811cdd9507 upstream.
+
+For kernel mappings PAGE_PROTNONE is not necessarily set for a non present
+mapping, but the inversion logic explicitely checks for !PRESENT and
+PROT_NONE.
+
+Remove the PROT_NONE check and make the inversion unconditional for all not
+present mappings.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/pgtable-invert.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/pgtable-invert.h
++++ b/arch/x86/include/asm/pgtable-invert.h
+@@ -6,7 +6,7 @@
+
+ static inline bool __pte_needs_invert(u64 val)
+ {
+- return (val & (_PAGE_PRESENT|_PAGE_PROTNONE)) == _PAGE_PROTNONE;
++ return !(val & _PAGE_PRESENT);
+ }
+
+ /* Get a mask to xor with the page table entry to get the correct pfn. */
diff --git a/queue-3.16/x86-speculation-l1tf-limit-swap-file-size-to-max_pa-2.patch b/queue-3.16/x86-speculation-l1tf-limit-swap-file-size-to-max_pa-2.patch
new file mode 100644
index 00000000..3f61b90a
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-limit-swap-file-size-to-max_pa-2.patch
@@ -0,0 +1,128 @@
+From: Andi Kleen <ak@linux.intel.com>
+Date: Wed, 13 Jun 2018 15:48:28 -0700
+Subject: x86/speculation/l1tf: Limit swap file size to MAX_PA/2
+
+commit 377eeaa8e11fe815b1d07c81c4a0e2843a8c15eb upstream.
+
+For the L1TF workaround its necessary to limit the swap file size to below
+MAX_PA/2, so that the higher bits of the swap offset inverted never point
+to valid memory.
+
+Add a mechanism for the architecture to override the swap file size check
+in swapfile.c and add a x86 specific max swapfile check function that
+enforces that limit.
+
+The check is only enabled if the CPU is vulnerable to L1TF.
+
+In VMs with 42bit MAX_PA the typical limit is 2TB now, on a native system
+with 46bit PA it is 32TB. The limit is only per individual swap file, so
+it's always possible to exceed these limits with multiple swap files or
+partitions.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+[bwh: Backported to 3.16: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -4,6 +4,8 @@
+ #include <linux/swap.h>
+ #include <linux/memblock.h>
+ #include <linux/bootmem.h> /* for max_low_pfn */
++#include <linux/swapfile.h>
++#include <linux/swapops.h>
+
+ #include <asm/cacheflush.h>
+ #include <asm/e820.h>
+@@ -699,3 +701,15 @@ void __init zone_sizes_init(void)
+ free_area_init_nodes(max_zone_pfns);
+ }
+
++unsigned long max_swapfile_size(void)
++{
++ unsigned long pages;
++
++ pages = generic_max_swapfile_size();
++
++ if (boot_cpu_has_bug(X86_BUG_L1TF)) {
++ /* Limit the swap file size to MAX_PA/2 for L1TF workaround */
++ pages = min_t(unsigned long, l1tf_pfn_limit() + 1, pages);
++ }
++ return pages;
++}
+--- a/include/linux/swapfile.h
++++ b/include/linux/swapfile.h
+@@ -9,5 +9,7 @@ extern spinlock_t swap_lock;
+ extern struct plist_head swap_active_head;
+ extern struct swap_info_struct *swap_info[];
+ extern int try_to_unuse(unsigned int, bool, unsigned long);
++extern unsigned long generic_max_swapfile_size(void);
++extern unsigned long max_swapfile_size(void);
+
+ #endif /* _LINUX_SWAPFILE_H */
+--- a/mm/swapfile.c
++++ b/mm/swapfile.c
+@@ -2166,6 +2166,35 @@ static int claim_swapfile(struct swap_in
+ return 0;
+ }
+
++
++/*
++ * Find out how many pages are allowed for a single swap device. There
++ * are two limiting factors:
++ * 1) the number of bits for the swap offset in the swp_entry_t type, and
++ * 2) the number of bits in the swap pte, as defined by the different
++ * architectures.
++ *
++ * In order to find the largest possible bit mask, a swap entry with
++ * swap type 0 and swap offset ~0UL is created, encoded to a swap pte,
++ * decoded to a swp_entry_t again, and finally the swap offset is
++ * extracted.
++ *
++ * This will mask all the bits from the initial ~0UL mask that can't
++ * be encoded in either the swp_entry_t or the architecture definition
++ * of a swap pte.
++ */
++unsigned long generic_max_swapfile_size(void)
++{
++ return swp_offset(pte_to_swp_entry(
++ swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
++}
++
++/* Can be overridden by an architecture for additional checks. */
++__weak unsigned long max_swapfile_size(void)
++{
++ return generic_max_swapfile_size();
++}
++
+ static unsigned long read_swap_header(struct swap_info_struct *p,
+ union swap_header *swap_header,
+ struct inode *inode)
+@@ -2201,22 +2230,7 @@ static unsigned long read_swap_header(st
+ p->cluster_next = 1;
+ p->cluster_nr = 0;
+
+- /*
+- * Find out how many pages are allowed for a single swap
+- * device. There are two limiting factors: 1) the number
+- * of bits for the swap offset in the swp_entry_t type, and
+- * 2) the number of bits in the swap pte as defined by the
+- * different architectures. In order to find the
+- * largest possible bit mask, a swap entry with swap type 0
+- * and swap offset ~0UL is created, encoded to a swap pte,
+- * decoded to a swp_entry_t again, and finally the swap
+- * offset is extracted. This will mask all the bits from
+- * the initial ~0UL mask that can't be encoded in either
+- * the swp_entry_t or the architecture definition of a
+- * swap pte.
+- */
+- maxpages = swp_offset(pte_to_swp_entry(
+- swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
++ maxpages = max_swapfile_size();
+ last_page = swap_header->info.last_page;
+ if (last_page > maxpages) {
+ pr_warn("Truncating oversized swap area, only using %luk out of %luk\n",
diff --git a/queue-3.16/x86-speculation-l1tf-make-pmd-pud_mknotpresent-invert.patch b/queue-3.16/x86-speculation-l1tf-make-pmd-pud_mknotpresent-invert.patch
new file mode 100644
index 00000000..744870c4
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-make-pmd-pud_mknotpresent-invert.patch
@@ -0,0 +1,55 @@
+From: Andi Kleen <ak@linux.intel.com>
+Date: Tue, 7 Aug 2018 15:09:37 -0700
+Subject: x86/speculation/l1tf: Make pmd/pud_mknotpresent() invert
+
+commit 0768f91530ff46683e0b372df14fd79fe8d156e5 upstream.
+
+Some cases in THP like:
+ - MADV_FREE
+ - mprotect
+ - split
+
+mark the PMD non present for temporarily to prevent races. The window for
+an L1TF attack in these contexts is very small, but it wants to be fixed
+for correctness sake.
+
+Use the proper low level functions for pmd/pud_mknotpresent() to address
+this.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+[bwh: Backported to 3.16:
+ - Drop change to pud_mknotpresent()
+ - pmd_mknotpresent() does not touch _PAGE_NONE]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/pgtable.h | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -308,11 +308,6 @@ static inline pmd_t pmd_mkwrite(pmd_t pm
+ return pmd_set_flags(pmd, _PAGE_RW);
+ }
+
+-static inline pmd_t pmd_mknotpresent(pmd_t pmd)
+-{
+- return pmd_clear_flags(pmd, _PAGE_PRESENT);
+-}
+-
+ static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
+ {
+ pudval_t v = native_pud_val(pud);
+@@ -393,6 +388,12 @@ static inline pud_t pfn_pud(unsigned lon
+ return __pud(pfn | massage_pgprot(pgprot));
+ }
+
++static inline pmd_t pmd_mknotpresent(pmd_t pmd)
++{
++ return pfn_pmd(pmd_pfn(pmd),
++ __pgprot(pmd_flags(pmd) & ~_PAGE_PRESENT));
++}
++
+ static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
+
+ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
diff --git a/queue-3.16/x86-speculation-l1tf-make-sure-the-first-page-is-always-reserved.patch b/queue-3.16/x86-speculation-l1tf-make-sure-the-first-page-is-always-reserved.patch
new file mode 100644
index 00000000..d5f661e6
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-make-sure-the-first-page-is-always-reserved.patch
@@ -0,0 +1,39 @@
+From: Andi Kleen <ak@linux.intel.com>
+Date: Wed, 13 Jun 2018 15:48:25 -0700
+Subject: x86/speculation/l1tf: Make sure the first page is always reserved
+
+commit 10a70416e1f067f6c4efda6ffd8ea96002ac4223 upstream.
+
+The L1TF workaround doesn't make any attempt to mitigate speculate accesses
+to the first physical page for zeroed PTEs. Normally it only contains some
+data from the early real mode BIOS.
+
+It's not entirely clear that the first page is reserved in all
+configurations, so add an extra reservation call to make sure it is really
+reserved. In most configurations (e.g. with the standard reservations)
+it's likely a nop.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/kernel/setup.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/arch/x86/kernel/setup.c
++++ b/arch/x86/kernel/setup.c
+@@ -860,6 +860,12 @@ void __init setup_arch(char **cmdline_p)
+ memblock_reserve(__pa_symbol(_text),
+ (unsigned long)__bss_stop - (unsigned long)_text);
+
++ /*
++ * Make sure page 0 is always reserved because on systems with
++ * L1TF its contents can be leaked to user processes.
++ */
++ memblock_reserve(0, PAGE_SIZE);
++
+ early_reserve_initrd();
+
+ /*
diff --git a/queue-3.16/x86-speculation-l1tf-protect-pae-swap-entries-against-l1tf.patch b/queue-3.16/x86-speculation-l1tf-protect-pae-swap-entries-against-l1tf.patch
new file mode 100644
index 00000000..580794a8
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-protect-pae-swap-entries-against-l1tf.patch
@@ -0,0 +1,88 @@
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Fri, 22 Jun 2018 17:39:33 +0200
+Subject: x86/speculation/l1tf: Protect PAE swap entries against L1TF
+
+commit 0d0f6249058834ffe1ceaad0bb31464af66f6e7a upstream.
+
+The PAE 3-level paging code currently doesn't mitigate L1TF by flipping the
+offset bits, and uses the high PTE word, thus bits 32-36 for type, 37-63 for
+offset. The lower word is zeroed, thus systems with less than 4GB memory are
+safe. With 4GB to 128GB the swap type selects the memory locations vulnerable
+to L1TF; with even more memory, also the swap offfset influences the address.
+This might be a problem with 32bit PAE guests running on large 64bit hosts.
+
+By continuing to keep the whole swap entry in either high or low 32bit word of
+PTE we would limit the swap size too much. Thus this patch uses the whole PAE
+PTE with the same layout as the 64bit version does. The macros just become a
+bit tricky since they assume the arch-dependent swp_entry_t to be 32bit.
+
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Michal Hocko <mhocko@suse.com>
+[bwh: Backported to 3.16: CONFIG_PGTABLE_LEVELS is not defined; use other
+ config symbols in the condition.]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/pgtable-3level.h | 35 +++++++++++++++++++++++++--
+ arch/x86/mm/init.c | 2 +-
+ 2 files changed, 34 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable-3level.h
++++ b/arch/x86/include/asm/pgtable-3level.h
+@@ -177,12 +177,43 @@ static inline pmd_t native_pmdp_get_and_
+ #endif
+
+ /* Encode and de-code a swap entry */
++#define SWP_TYPE_BITS 5
++
++#define SWP_OFFSET_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
++
++/* We always extract/encode the offset by shifting it all the way up, and then down again */
++#define SWP_OFFSET_SHIFT (SWP_OFFSET_FIRST_BIT + SWP_TYPE_BITS)
++
+ #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
+ #define __swp_type(x) (((x).val) & 0x1f)
+ #define __swp_offset(x) ((x).val >> 5)
+ #define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << 5})
+-#define __pte_to_swp_entry(pte) ((swp_entry_t){ (pte).pte_high })
+-#define __swp_entry_to_pte(x) ((pte_t){ { .pte_high = (x).val } })
++
++/*
++ * Normally, __swp_entry() converts from arch-independent swp_entry_t to
++ * arch-dependent swp_entry_t, and __swp_entry_to_pte() just stores the result
++ * to pte. But here we have 32bit swp_entry_t and 64bit pte, and need to use the
++ * whole 64 bits. Thus, we shift the "real" arch-dependent conversion to
++ * __swp_entry_to_pte() through the following helper macro based on 64bit
++ * __swp_entry().
++ */
++#define __swp_pteval_entry(type, offset) ((pteval_t) { \
++ (~(pteval_t)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \
++ | ((pteval_t)(type) << (64 - SWP_TYPE_BITS)) })
++
++#define __swp_entry_to_pte(x) ((pte_t){ .pte = \
++ __swp_pteval_entry(__swp_type(x), __swp_offset(x)) })
++/*
++ * Analogically, __pte_to_swp_entry() doesn't just extract the arch-dependent
++ * swp_entry_t, but also has to convert it from 64bit to the 32bit
++ * intermediate representation, using the following macros based on 64bit
++ * __swp_type() and __swp_offset().
++ */
++#define __pteval_swp_type(x) ((unsigned long)((x).pte >> (64 - SWP_TYPE_BITS)))
++#define __pteval_swp_offset(x) ((unsigned long)(~((x).pte) << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT))
++
++#define __pte_to_swp_entry(pte) (__swp_entry(__pteval_swp_type(pte), \
++ __pteval_swp_offset(pte)))
+
+ #include <asm/pgtable-invert.h>
+
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -715,7 +715,7 @@ unsigned long max_swapfile_size(void)
+ * We encode swap offsets also with 3 bits below those for pfn
+ * which makes the usable limit higher.
+ */
+-#ifdef CONFIG_X86_64
++#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+ l1tf_limit <<= PAGE_SHIFT - SWP_OFFSET_FIRST_BIT;
+ #endif
+ pages = min_t(unsigned long, l1tf_limit, pages);
diff --git a/queue-3.16/x86-speculation-l1tf-protect-prot_none-ptes-against-speculation.patch b/queue-3.16/x86-speculation-l1tf-protect-prot_none-ptes-against-speculation.patch
new file mode 100644
index 00000000..23a73c64
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-protect-prot_none-ptes-against-speculation.patch
@@ -0,0 +1,254 @@
+From: Andi Kleen <ak@linux.intel.com>
+Date: Wed, 13 Jun 2018 15:48:24 -0700
+Subject: x86/speculation/l1tf: Protect PROT_NONE PTEs against speculation
+
+commit 6b28baca9b1f0d4a42b865da7a05b1c81424bd5c upstream.
+
+When PTEs are set to PROT_NONE the kernel just clears the Present bit and
+preserves the PFN, which creates attack surface for L1TF speculation
+speculation attacks.
+
+This is important inside guests, because L1TF speculation bypasses physical
+page remapping. While the host has its own migitations preventing leaking
+data from other VMs into the guest, this would still risk leaking the wrong
+page inside the current guest.
+
+This uses the same technique as Linus' swap entry patch: while an entry is
+is in PROTNONE state invert the complete PFN part part of it. This ensures
+that the the highest bit will point to non existing memory.
+
+The invert is done by pte/pmd_modify and pfn/pmd/pud_pte for PROTNONE and
+pte/pmd/pud_pfn undo it.
+
+This assume that no code path touches the PFN part of a PTE directly
+without using these primitives.
+
+This doesn't handle the case that MMIO is on the top of the CPU physical
+memory. If such an MMIO region was exposed by an unpriviledged driver for
+mmap it would be possible to attack some real memory. However this
+situation is all rather unlikely.
+
+For 32bit non PAE the inversion is not done because there are really not
+enough bits to protect anything.
+
+Q: Why does the guest need to be protected when the HyperVisor already has
+ L1TF mitigations?
+
+A: Here's an example:
+
+ Physical pages 1 2 get mapped into a guest as
+ GPA 1 -> PA 2
+ GPA 2 -> PA 1
+ through EPT.
+
+ The L1TF speculation ignores the EPT remapping.
+
+ Now the guest kernel maps GPA 1 to process A and GPA 2 to process B, and
+ they belong to different users and should be isolated.
+
+ A sets the GPA 1 PA 2 PTE to PROT_NONE to bypass the EPT remapping and
+ gets read access to the underlying physical page. Which in this case
+ points to PA 2, so it can read process B's data, if it happened to be in
+ L1, so isolation inside the guest is broken.
+
+ There's nothing the hypervisor can do about this. This mitigation has to
+ be done in the guest itself.
+
+[ tglx: Massaged changelog ]
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+[bwh: Backported to 3.16:
+ - s/check_pgprot/massage_pgprot/
+ - Keep using PTE_PFN_MASK to extract PFN from pmd_pfn() and pud_pfn(),
+ as we don't need to worry about the PAT bit being set here]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/pgtable-2level.h | 17 +++++++++++
+ arch/x86/include/asm/pgtable-3level.h | 2 ++
+ arch/x86/include/asm/pgtable-invert.h | 32 +++++++++++++++++++
+ arch/x86/include/asm/pgtable.h | 44 +++++++++++++++++++--------
+ arch/x86/include/asm/pgtable_64.h | 2 ++
+ 5 files changed, 84 insertions(+), 13 deletions(-)
+ create mode 100644 arch/x86/include/asm/pgtable-invert.h
+
+--- a/arch/x86/include/asm/pgtable-2level.h
++++ b/arch/x86/include/asm/pgtable-2level.h
+@@ -77,4 +77,21 @@ static inline unsigned long pte_bitop(un
+ #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low })
+ #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val })
+
++/* No inverted PFNs on 2 level page tables */
++
++static inline u64 protnone_mask(u64 val)
++{
++ return 0;
++}
++
++static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask)
++{
++ return val;
++}
++
++static inline bool __pte_needs_invert(u64 val)
++{
++ return false;
++}
++
+ #endif /* _ASM_X86_PGTABLE_2LEVEL_H */
+--- a/arch/x86/include/asm/pgtable-3level.h
++++ b/arch/x86/include/asm/pgtable-3level.h
+@@ -184,4 +184,6 @@ static inline pmd_t native_pmdp_get_and_
+ #define __pte_to_swp_entry(pte) ((swp_entry_t){ (pte).pte_high })
+ #define __swp_entry_to_pte(x) ((pte_t){ { .pte_high = (x).val } })
+
++#include <asm/pgtable-invert.h>
++
+ #endif /* _ASM_X86_PGTABLE_3LEVEL_H */
+--- /dev/null
++++ b/arch/x86/include/asm/pgtable-invert.h
+@@ -0,0 +1,32 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _ASM_PGTABLE_INVERT_H
++#define _ASM_PGTABLE_INVERT_H 1
++
++#ifndef __ASSEMBLY__
++
++static inline bool __pte_needs_invert(u64 val)
++{
++ return (val & (_PAGE_PRESENT|_PAGE_PROTNONE)) == _PAGE_PROTNONE;
++}
++
++/* Get a mask to xor with the page table entry to get the correct pfn. */
++static inline u64 protnone_mask(u64 val)
++{
++ return __pte_needs_invert(val) ? ~0ull : 0;
++}
++
++static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask)
++{
++ /*
++ * When a PTE transitions from NONE to !NONE or vice-versa
++ * invert the PFN part to stop speculation.
++ * pte_pfn undoes this when needed.
++ */
++ if (__pte_needs_invert(oldval) != __pte_needs_invert(val))
++ val = (val & ~mask) | (~val & mask);
++ return val;
++}
++
++#endif /* __ASSEMBLY__ */
++
++#endif
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -141,19 +141,29 @@ static inline int pte_special(pte_t pte)
+ (pte_flags(pte) & (_PAGE_PRESENT|_PAGE_PROTNONE));
+ }
+
++/* Entries that were set to PROT_NONE are inverted */
++
++static inline u64 protnone_mask(u64 val);
++
+ static inline unsigned long pte_pfn(pte_t pte)
+ {
+- return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT;
++ unsigned long pfn = pte_val(pte);
++ pfn ^= protnone_mask(pfn);
++ return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT;
+ }
+
+ static inline unsigned long pmd_pfn(pmd_t pmd)
+ {
+- return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
++ unsigned long pfn = pmd_val(pmd);
++ pfn ^= protnone_mask(pfn);
++ return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT;
+ }
+
+ static inline unsigned long pud_pfn(pud_t pud)
+ {
+- return (pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT;
++ unsigned long pfn = pud_val(pud);
++ pfn ^= protnone_mask(pfn);
++ return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT;
+ }
+
+ #define pte_page(pte) pfn_to_page(pte_pfn(pte))
+@@ -361,25 +371,33 @@ static inline pgprotval_t massage_pgprot
+
+ static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
+ {
+- return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) |
+- massage_pgprot(pgprot));
++ phys_addr_t pfn = page_nr << PAGE_SHIFT;
++ pfn ^= protnone_mask(pgprot_val(pgprot));
++ pfn &= PTE_PFN_MASK;
++ return __pte(pfn | massage_pgprot(pgprot));
+ }
+
+ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
+ {
+- return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) |
+- massage_pgprot(pgprot));
++ phys_addr_t pfn = page_nr << PAGE_SHIFT;
++ pfn ^= protnone_mask(pgprot_val(pgprot));
++ pfn &= PTE_PFN_MASK;
++ return __pmd(pfn | massage_pgprot(pgprot));
+ }
+
+ static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot)
+ {
+- return __pud(((phys_addr_t)page_nr << PAGE_SHIFT) |
+- massage_pgprot(pgprot));
++ phys_addr_t pfn = page_nr << PAGE_SHIFT;
++ pfn ^= protnone_mask(pgprot_val(pgprot));
++ pfn &= PTE_PFN_MASK;
++ return __pud(pfn | massage_pgprot(pgprot));
+ }
+
++static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
++
+ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+ {
+- pteval_t val = pte_val(pte);
++ pteval_t val = pte_val(pte), oldval = val;
+
+ /*
+ * Chop off the NX bit (if present), and add the NX portion of
+@@ -387,17 +405,17 @@ static inline pte_t pte_modify(pte_t pte
+ */
+ val &= _PAGE_CHG_MASK;
+ val |= massage_pgprot(newprot) & ~_PAGE_CHG_MASK;
+-
++ val = flip_protnone_guard(oldval, val, PTE_PFN_MASK);
+ return __pte(val);
+ }
+
+ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+ {
+- pmdval_t val = pmd_val(pmd);
++ pmdval_t val = pmd_val(pmd), oldval = val;
+
+ val &= _HPAGE_CHG_MASK;
+ val |= massage_pgprot(newprot) & ~_HPAGE_CHG_MASK;
+-
++ val = flip_protnone_guard(oldval, val, PTE_PFN_MASK);
+ return __pmd(val);
+ }
+
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -239,6 +239,8 @@ extern void cleanup_highmap(void);
+ extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
+ extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
+
++#include <asm/pgtable-invert.h>
++
+ #endif /* !__ASSEMBLY__ */
+
+ #endif /* _ASM_X86_PGTABLE_64_H */
diff --git a/queue-3.16/x86-speculation-l1tf-protect-swap-entries-against-l1tf.patch b/queue-3.16/x86-speculation-l1tf-protect-swap-entries-against-l1tf.patch
new file mode 100644
index 00000000..68ea59b8
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-protect-swap-entries-against-l1tf.patch
@@ -0,0 +1,81 @@
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Wed, 13 Jun 2018 15:48:23 -0700
+Subject: x86/speculation/l1tf: Protect swap entries against L1TF
+
+commit 2f22b4cd45b67b3496f4aa4c7180a1271c6452f6 upstream.
+
+With L1 terminal fault the CPU speculates into unmapped PTEs, and resulting
+side effects allow to read the memory the PTE is pointing too, if its
+values are still in the L1 cache.
+
+For swapped out pages Linux uses unmapped PTEs and stores a swap entry into
+them.
+
+To protect against L1TF it must be ensured that the swap entry is not
+pointing to valid memory, which requires setting higher bits (between bit
+36 and bit 45) that are inside the CPUs physical address space, but outside
+any real memory.
+
+To do this invert the offset to make sure the higher bits are always set,
+as long as the swap file is not too big.
+
+Note there is no workaround for 32bit !PAE, or on systems which have more
+than MAX_PA/2 worth of memory. The later case is very unlikely to happen on
+real systems.
+
+[AK: updated description and minor tweaks by. Split out from the original
+ patch ]
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Andi Kleen <ak@linux.intel.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+[bwh: Backported to 3.16: Bit 9 may be reserved for PAGE_BIT_NUMA here]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/include/asm/pgtable_64.h | 11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -167,7 +167,7 @@ static inline int pgd_large(pgd_t pgd) {
+ *
+ * | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number
+ * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names
+- * | TYPE (59-63) | OFFSET (10-58) | 0 |0|0|X|X| X| X|X|SD|0| <- swp entry
++ * | TYPE (59-63) | ~OFFSET (10-58) | 0 |0|0|X|X| X| X|X|SD|0| <- swp entry
+ *
+ * G (8) is aliased and used as a PROT_NONE indicator for
+ * !present ptes. We need to start storing swap entries above
+@@ -180,6 +180,9 @@ static inline int pgd_large(pgd_t pgd) {
+ *
+ * Bit 7 in swp entry should be 0 because pmd_present checks not only P,
+ * but also L and G.
++ *
++ * The offset is inverted by a binary not operation to make the high
++ * physical bits set.
+ */
+ #define SWP_TYPE_BITS 5
+
+@@ -199,13 +202,15 @@ static inline int pgd_large(pgd_t pgd) {
+ #define __swp_type(x) ((x).val >> (64 - SWP_TYPE_BITS))
+
+ /* Shift up (to get rid of type), then down to get value */
+-#define __swp_offset(x) ((x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT)
++#define __swp_offset(x) (~(x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT)
+
+ /*
+ * Shift the offset up "too far" by TYPE bits, then down again
++ * The offset is inverted by a binary not operation to make the high
++ * physical bits set.
+ */
+ #define __swp_entry(type, offset) ((swp_entry_t) { \
+- ((unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \
++ (~(unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \
+ | ((unsigned long)(type) << (64-SWP_TYPE_BITS)) })
+
+ #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) })
diff --git a/queue-3.16/x86-speculation-l1tf-suggest-what-to-do-on-systems-with-too-much-ram.patch b/queue-3.16/x86-speculation-l1tf-suggest-what-to-do-on-systems-with-too-much-ram.patch
new file mode 100644
index 00000000..34c4327d
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-suggest-what-to-do-on-systems-with-too-much-ram.patch
@@ -0,0 +1,41 @@
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Thu, 23 Aug 2018 16:21:29 +0200
+Subject: x86/speculation/l1tf: Suggest what to do on systems with too much RAM
+
+commit 6a012288d6906fee1dbc244050ade1dafe4a9c8d upstream.
+
+Two users have reported [1] that they have an "extremely unlikely" system
+with more than MAX_PA/2 memory and L1TF mitigation is not effective.
+
+Make the warning more helpful by suggesting the proper mem=X kernel boot
+parameter to make it effective and a link to the L1TF document to help
+decide if the mitigation is worth the unusable RAM.
+
+[1] https://bugzilla.suse.com/show_bug.cgi?id=1105536
+
+Suggested-by: Michal Hocko <mhocko@suse.com>
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: "H . Peter Anvin" <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Link: https://lkml.kernel.org/r/966571f0-9d7f-43dc-92c6-a10eec7a1254@suse.cz
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ arch/x86/kernel/cpu/bugs.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -717,6 +717,10 @@ static void __init l1tf_select_mitigatio
+ half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
+ if (e820_any_mapped(half_pa, ULLONG_MAX - half_pa, E820_RAM)) {
+ pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
++ pr_info("You may make it effective by booting the kernel with mem=%llu parameter.\n",
++ half_pa);
++ pr_info("However, doing so will make a part of your RAM unusable.\n");
++ pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html might help you decide.\n");
+ return;
+ }
+
diff --git a/queue-3.16/x86-speculation-l1tf-unbreak-__have_arch_pfn_modify_allowed.patch b/queue-3.16/x86-speculation-l1tf-unbreak-__have_arch_pfn_modify_allowed.patch
new file mode 100644
index 00000000..f8995787
--- /dev/null
+++ b/queue-3.16/x86-speculation-l1tf-unbreak-__have_arch_pfn_modify_allowed.patch
@@ -0,0 +1,63 @@
+From: Jiri Kosina <jkosina@suse.cz>
+Date: Sat, 14 Jul 2018 21:56:13 +0200
+Subject: x86/speculation/l1tf: Unbreak !__HAVE_ARCH_PFN_MODIFY_ALLOWED
+ architectures
+
+commit 8f2adf3d2118cc0822b83a7bb43475f9149a1d26 upstream.
+
+commit 6c26fcd2abfe0a56bbd95271fce02df2896cfd24 upstream.
+
+pfn_modify_allowed() and arch_has_pfn_modify_check() are outside of the
+!__ASSEMBLY__ section in include/asm-generic/pgtable.h, which confuses
+assembler on archs that don't have __HAVE_ARCH_PFN_MODIFY_ALLOWED (e.g.
+ia64) and breaks build:
+
+ include/asm-generic/pgtable.h: Assembler messages:
+ include/asm-generic/pgtable.h:538: Error: Unknown opcode `static inline bool pfn_modify_allowed(unsigned long pfn,pgprot_t prot)'
+ include/asm-generic/pgtable.h:540: Error: Unknown opcode `return true'
+ include/asm-generic/pgtable.h:543: Error: Unknown opcode `static inline bool arch_has_pfn_modify_check(void)'
+ include/asm-generic/pgtable.h:545: Error: Unknown opcode `return false'
+ arch/ia64/kernel/entry.S:69: Error: `mov' does not fit into bundle
+
+Move those two static inlines into the !__ASSEMBLY__ section so that they
+don't confuse the asm build pass.
+
+Fixes: 42e4089c7890 ("x86/speculation/l1tf: Disallow non privileged high MMIO PROT_NONE mappings")
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+[groeck: Context changes]
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ include/asm-generic/pgtable.h | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/include/asm-generic/pgtable.h
++++ b/include/asm-generic/pgtable.h
+@@ -806,12 +806,6 @@ static inline void pmdp_set_numa(struct
+
+ #endif /* CONFIG_MMU */
+
+-#endif /* !__ASSEMBLY__ */
+-
+-#ifndef io_remap_pfn_range
+-#define io_remap_pfn_range remap_pfn_range
+-#endif
+-
+ #ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED
+ static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
+ {
+@@ -822,6 +816,12 @@ static inline bool arch_has_pfn_modify_c
+ {
+ return false;
+ }
++#endif /* !_HAVE_ARCH_PFN_MODIFY_ALLOWED */
++
++#endif /* !__ASSEMBLY__ */
++
++#ifndef io_remap_pfn_range
++#define io_remap_pfn_range remap_pfn_range
+ #endif
+
+ #endif /* _ASM_GENERIC_PGTABLE_H */