aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/ppc64/mm/hash_utils.c2
-rw-r--r--arch/ppc64/mm/hugetlbpage.c187
-rw-r--r--arch/ppc64/mm/imalloc.c2
-rw-r--r--arch/ppc64/mm/init.c62
-rw-r--r--arch/ppc64/mm/slb_low.S2
-rw-r--r--arch/ppc64/mm/tlb.c95
-rw-r--r--include/asm-ppc64/imalloc.h2
-rw-r--r--include/asm-ppc64/mmu.h7
-rw-r--r--include/asm-ppc64/page.h26
-rw-r--r--include/asm-ppc64/pgalloc.h93
-rw-r--r--include/asm-ppc64/pgtable.h90
-rw-r--r--include/asm-ppc64/processor.h4
12 files changed, 294 insertions, 278 deletions
diff --git a/arch/ppc64/mm/hash_utils.c b/arch/ppc64/mm/hash_utils.c
index 623b5d130c319..65d6e85279481 100644
--- a/arch/ppc64/mm/hash_utils.c
+++ b/arch/ppc64/mm/hash_utils.c
@@ -302,7 +302,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
int local = 0;
cpumask_t tmp;
- if ((ea & ~REGION_MASK) > EADDR_MASK)
+ if ((ea & ~REGION_MASK) >= PGTABLE_RANGE)
return 1;
switch (REGION_ID(ea)) {
diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c
index f9524602818da..a13e44230a6f7 100644
--- a/arch/ppc64/mm/hugetlbpage.c
+++ b/arch/ppc64/mm/hugetlbpage.c
@@ -27,124 +27,91 @@
#include <linux/sysctl.h>
-#define HUGEPGDIR_SHIFT (HPAGE_SHIFT + PAGE_SHIFT - 3)
-#define HUGEPGDIR_SIZE (1UL << HUGEPGDIR_SHIFT)
-#define HUGEPGDIR_MASK (~(HUGEPGDIR_SIZE-1))
-
-#define HUGEPTE_INDEX_SIZE 9
-#define HUGEPGD_INDEX_SIZE 10
-
-#define PTRS_PER_HUGEPTE (1 << HUGEPTE_INDEX_SIZE)
-#define PTRS_PER_HUGEPGD (1 << HUGEPGD_INDEX_SIZE)
-
-static inline int hugepgd_index(unsigned long addr)
-{
- return (addr & ~REGION_MASK) >> HUGEPGDIR_SHIFT;
-}
-
-static pud_t *hugepgd_offset(struct mm_struct *mm, unsigned long addr)
+/* Modelled after find_linux_pte() */
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
- int index;
+ pgd_t *pg;
+ pud_t *pu;
+ pmd_t *pm;
+ pte_t *pt;
- if (! mm->context.huge_pgdir)
- return NULL;
+ BUG_ON(! in_hugepage_area(mm->context, addr));
+ addr &= HPAGE_MASK;
+
+ pg = pgd_offset(mm, addr);
+ if (!pgd_none(*pg)) {
+ pu = pud_offset(pg, addr);
+ if (!pud_none(*pu)) {
+ pm = pmd_offset(pu, addr);
+ pt = (pte_t *)pm;
+ BUG_ON(!pmd_none(*pm)
+ && !(pte_present(*pt) && pte_huge(*pt)));
+ return pt;
+ }
+ }
- index = hugepgd_index(addr);
- BUG_ON(index >= PTRS_PER_HUGEPGD);
- return (pud_t *)(mm->context.huge_pgdir + index);
+ return NULL;
}
-static inline pte_t *hugepte_offset(pud_t *dir, unsigned long addr)
+pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
{
- int index;
-
- if (pud_none(*dir))
- return NULL;
-
- index = (addr >> HPAGE_SHIFT) % PTRS_PER_HUGEPTE;
- return (pte_t *)pud_page(*dir) + index;
-}
+ pgd_t *pg;
+ pud_t *pu;
+ pmd_t *pm;
+ pte_t *pt;
-static pud_t *hugepgd_alloc(struct mm_struct *mm, unsigned long addr)
-{
BUG_ON(! in_hugepage_area(mm->context, addr));
- if (! mm->context.huge_pgdir) {
- pgd_t *new;
- spin_unlock(&mm->page_table_lock);
- /* Don't use pgd_alloc(), because we want __GFP_REPEAT */
- new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT);
- BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE));
- spin_lock(&mm->page_table_lock);
-
- /*
- * Because we dropped the lock, we should re-check the
- * entry, as somebody else could have populated it..
- */
- if (mm->context.huge_pgdir)
- pgd_free(new);
- else
- mm->context.huge_pgdir = new;
- }
- return hugepgd_offset(mm, addr);
-}
+ addr &= HPAGE_MASK;
-static pte_t *hugepte_alloc(struct mm_struct *mm, pud_t *dir, unsigned long addr)
-{
- if (! pud_present(*dir)) {
- pte_t *new;
+ pg = pgd_offset(mm, addr);
+ pu = pud_alloc(mm, pg, addr);
- spin_unlock(&mm->page_table_lock);
- new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT);
- BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE));
- spin_lock(&mm->page_table_lock);
- /*
- * Because we dropped the lock, we should re-check the
- * entry, as somebody else could have populated it..
- */
- if (pud_present(*dir)) {
- if (new)
- kmem_cache_free(zero_cache, new);
- } else {
- struct page *ptepage;
-
- if (! new)
- return NULL;
- ptepage = virt_to_page(new);
- ptepage->mapping = (void *) mm;
- ptepage->index = addr & HUGEPGDIR_MASK;
- pud_populate(mm, dir, new);
+ if (pu) {
+ pm = pmd_alloc(mm, pu, addr);
+ if (pm) {
+ pt = (pte_t *)pm;
+ BUG_ON(!pmd_none(*pm)
+ && !(pte_present(*pt) && pte_huge(*pt)));
+ return pt;
}
}
- return hugepte_offset(dir, addr);
+ return NULL;
}
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
-{
- pud_t *pud;
+#define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE)
- BUG_ON(! in_hugepage_area(mm->context, addr));
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
+{
+ int i;
- pud = hugepgd_offset(mm, addr);
- if (! pud)
- return NULL;
+ if (pte_present(*ptep)) {
+ pte_clear(mm, addr, ptep);
+ flush_tlb_pending();
+ }
- return hugepte_offset(pud, addr);
+ for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) {
+ *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
+ ptep++;
+ }
}
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
{
- pud_t *pud;
+ unsigned long old = pte_update(ptep, ~0UL);
+ int i;
- BUG_ON(! in_hugepage_area(mm->context, addr));
+ if (old & _PAGE_HASHPTE)
+ hpte_update(mm, addr, old, 0);
- pud = hugepgd_alloc(mm, addr);
- if (! pud)
- return NULL;
+ for (i = 1; i < HUGEPTE_BATCH_SIZE; i++)
+ ptep[i] = __pte(0);
- return hugepte_alloc(mm, pud, addr);
+ return __pte(old);
}
/*
@@ -541,42 +508,6 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
}
}
-void hugetlb_mm_free_pgd(struct mm_struct *mm)
-{
- int i;
- pgd_t *pgdir;
-
- spin_lock(&mm->page_table_lock);
-
- pgdir = mm->context.huge_pgdir;
- if (! pgdir)
- goto out;
-
- mm->context.huge_pgdir = NULL;
-
- /* cleanup any hugepte pages leftover */
- for (i = 0; i < PTRS_PER_HUGEPGD; i++) {
- pud_t *pud = (pud_t *)(pgdir + i);
-
- if (! pud_none(*pud)) {
- pte_t *pte = (pte_t *)pud_page(*pud);
- struct page *ptepage = virt_to_page(pte);
-
- ptepage->mapping = NULL;
-
- BUG_ON(memcmp(pte, empty_zero_page, PAGE_SIZE));
- kmem_cache_free(zero_cache, pte);
- }
- pud_clear(pud);
- }
-
- BUG_ON(memcmp(pgdir, empty_zero_page, PAGE_SIZE));
- kmem_cache_free(zero_cache, pgdir);
-
- out:
- spin_unlock(&mm->page_table_lock);
-}
-
int hash_huge_page(struct mm_struct *mm, unsigned long access,
unsigned long ea, unsigned long vsid, int local)
{
diff --git a/arch/ppc64/mm/imalloc.c b/arch/ppc64/mm/imalloc.c
index b6e75b891ac0f..c65b87b927567 100644
--- a/arch/ppc64/mm/imalloc.c
+++ b/arch/ppc64/mm/imalloc.c
@@ -31,7 +31,7 @@ static int get_free_im_addr(unsigned long size, unsigned long *im_addr)
break;
if ((unsigned long)tmp->addr >= ioremap_bot)
addr = tmp->size + (unsigned long) tmp->addr;
- if (addr > IMALLOC_END-size)
+ if (addr >= IMALLOC_END-size)
return 1;
}
*im_addr = addr;
diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c
index e58a24d42879d..87f256df8de55 100644
--- a/arch/ppc64/mm/init.c
+++ b/arch/ppc64/mm/init.c
@@ -66,6 +66,14 @@
#include <asm/vdso.h>
#include <asm/imalloc.h>
+#if PGTABLE_RANGE > USER_VSID_RANGE
+#warning Limited user VSID range means pagetable space is wasted
+#endif
+
+#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
+#warning TASK_SIZE is smaller than it needs to be.
+#endif
+
int mem_init_done;
unsigned long ioremap_bot = IMALLOC_BASE;
static unsigned long phbs_io_bot = PHBS_IO_BASE;
@@ -226,7 +234,7 @@ void __iomem * __ioremap(unsigned long addr, unsigned long size,
* Before that, we map using addresses going
* up from ioremap_bot. imalloc will use
* the addresses from ioremap_bot through
- * IMALLOC_END (0xE000001fffffffff)
+ * IMALLOC_END
*
*/
pa = addr & PAGE_MASK;
@@ -417,12 +425,6 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
int index;
int err;
-#ifdef CONFIG_HUGETLB_PAGE
- /* We leave htlb_segs as it was, but for a fork, we need to
- * clear the huge_pgdir. */
- mm->context.huge_pgdir = NULL;
-#endif
-
again:
if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL))
return -ENOMEM;
@@ -453,8 +455,6 @@ void destroy_context(struct mm_struct *mm)
spin_unlock(&mmu_context_lock);
mm->context.id = NO_CONTEXT;
-
- hugetlb_mm_free_pgd(mm);
}
/*
@@ -833,23 +833,43 @@ void __iomem * reserve_phb_iospace(unsigned long size)
return virt_addr;
}
-kmem_cache_t *zero_cache;
-
-static void zero_ctor(void *pte, kmem_cache_t *cache, unsigned long flags)
+static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
{
- memset(pte, 0, PAGE_SIZE);
+ memset(addr, 0, kmem_cache_size(cache));
}
+static const int pgtable_cache_size[2] = {
+ PTE_TABLE_SIZE, PMD_TABLE_SIZE
+};
+static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
+ "pgd_pte_cache", "pud_pmd_cache",
+};
+
+kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
+
void pgtable_cache_init(void)
{
- zero_cache = kmem_cache_create("zero",
- PAGE_SIZE,
- 0,
- SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN,
- zero_ctor,
- NULL);
- if (!zero_cache)
- panic("pgtable_cache_init(): could not create zero_cache!\n");
+ int i;
+
+ BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]);
+ BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]);
+ BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]);
+ BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]);
+
+ for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) {
+ int size = pgtable_cache_size[i];
+ const char *name = pgtable_cache_name[i];
+
+ pgtable_cache[i] = kmem_cache_create(name,
+ size, size,
+ SLAB_HWCACHE_ALIGN
+ | SLAB_MUST_HWCACHE_ALIGN,
+ zero_ctor,
+ NULL);
+ if (! pgtable_cache[i])
+ panic("pgtable_cache_init(): could not create %s!\n",
+ name);
+ }
}
pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
diff --git a/arch/ppc64/mm/slb_low.S b/arch/ppc64/mm/slb_low.S
index 8379d678f70f8..f20fc52483a76 100644
--- a/arch/ppc64/mm/slb_low.S
+++ b/arch/ppc64/mm/slb_low.S
@@ -91,7 +91,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
0: /* user address: proto-VSID = context<<15 | ESID */
li r11,SLB_VSID_USER
- srdi. r9,r3,13
+ srdi. r9,r3,USER_ESID_BITS
bne- 8f /* invalid ea bits set */
#ifdef CONFIG_HUGETLB_PAGE
diff --git a/arch/ppc64/mm/tlb.c b/arch/ppc64/mm/tlb.c
index 26f0172c4527b..d8a6593a13f0a 100644
--- a/arch/ppc64/mm/tlb.c
+++ b/arch/ppc64/mm/tlb.c
@@ -41,7 +41,58 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
unsigned long pte_freelist_forced_free;
-void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage)
+struct pte_freelist_batch
+{
+ struct rcu_head rcu;
+ unsigned int index;
+ pgtable_free_t tables[0];
+};
+
+DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
+unsigned long pte_freelist_forced_free;
+
+#define PTE_FREELIST_SIZE \
+ ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \
+ / sizeof(pgtable_free_t))
+
+#ifdef CONFIG_SMP
+static void pte_free_smp_sync(void *arg)
+{
+ /* Do nothing, just ensure we sync with all CPUs */
+}
+#endif
+
+/* This is only called when we are critically out of memory
+ * (and fail to get a page in pte_free_tlb).
+ */
+static void pgtable_free_now(pgtable_free_t pgf)
+{
+ pte_freelist_forced_free++;
+
+ smp_call_function(pte_free_smp_sync, NULL, 0, 1);
+
+ pgtable_free(pgf);
+}
+
+static void pte_free_rcu_callback(struct rcu_head *head)
+{
+ struct pte_freelist_batch *batch =
+ container_of(head, struct pte_freelist_batch, rcu);
+ unsigned int i;
+
+ for (i = 0; i < batch->index; i++)
+ pgtable_free(batch->tables[i]);
+
+ free_page((unsigned long)batch);
+}
+
+static void pte_free_submit(struct pte_freelist_batch *batch)
+{
+ INIT_RCU_HEAD(&batch->rcu);
+ call_rcu(&batch->rcu, pte_free_rcu_callback);
+}
+
+void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
{
/* This is safe as we are holding page_table_lock */
cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id());
@@ -49,19 +100,19 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage)
if (atomic_read(&tlb->mm->mm_users) < 2 ||
cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) {
- pte_free(ptepage);
+ pgtable_free(pgf);
return;
}
if (*batchp == NULL) {
*batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
if (*batchp == NULL) {
- pte_free_now(ptepage);
+ pgtable_free_now(pgf);
return;
}
(*batchp)->index = 0;
}
- (*batchp)->pages[(*batchp)->index++] = ptepage;
+ (*batchp)->tables[(*batchp)->index++] = pgf;
if ((*batchp)->index == PTE_FREELIST_SIZE) {
pte_free_submit(*batchp);
*batchp = NULL;
@@ -132,42 +183,6 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
put_cpu();
}
-#ifdef CONFIG_SMP
-static void pte_free_smp_sync(void *arg)
-{
- /* Do nothing, just ensure we sync with all CPUs */
-}
-#endif
-
-/* This is only called when we are critically out of memory
- * (and fail to get a page in pte_free_tlb).
- */
-void pte_free_now(struct page *ptepage)
-{
- pte_freelist_forced_free++;
-
- smp_call_function(pte_free_smp_sync, NULL, 0, 1);
-
- pte_free(ptepage);
-}
-
-static void pte_free_rcu_callback(struct rcu_head *head)
-{
- struct pte_freelist_batch *batch =
- container_of(head, struct pte_freelist_batch, rcu);
- unsigned int i;
-
- for (i = 0; i < batch->index; i++)
- pte_free(batch->pages[i]);
- free_page((unsigned long)batch);
-}
-
-void pte_free_submit(struct pte_freelist_batch *batch)
-{
- INIT_RCU_HEAD(&batch->rcu);
- call_rcu(&batch->rcu, pte_free_rcu_callback);
-}
-
void pte_free_finish(void)
{
/* This is safe as we are holding page_table_lock */
diff --git a/include/asm-ppc64/imalloc.h b/include/asm-ppc64/imalloc.h
index e46ff68a6e418..42adf7033a810 100644
--- a/include/asm-ppc64/imalloc.h
+++ b/include/asm-ppc64/imalloc.h
@@ -6,7 +6,7 @@
*/
#define PHBS_IO_BASE VMALLOC_END
#define IMALLOC_BASE (PHBS_IO_BASE + 0x80000000ul) /* Reserve 2 gigs for PHBs */
-#define IMALLOC_END (VMALLOC_START + EADDR_MASK)
+#define IMALLOC_END (VMALLOC_START + PGTABLE_RANGE)
/* imalloc region types */
diff --git a/include/asm-ppc64/mmu.h b/include/asm-ppc64/mmu.h
index 70348a8513139..959a4bfdcd6a6 100644
--- a/include/asm-ppc64/mmu.h
+++ b/include/asm-ppc64/mmu.h
@@ -259,8 +259,10 @@ extern void stabs_alloc(void);
#define VSID_BITS 36
#define VSID_MODULUS ((1UL<<VSID_BITS)-1)
-#define CONTEXT_BITS 20
-#define USER_ESID_BITS 15
+#define CONTEXT_BITS 19
+#define USER_ESID_BITS 16
+
+#define USER_VSID_RANGE (1UL << (USER_ESID_BITS + SID_SHIFT))
/*
* This macro generates asm code to compute the VSID scramble
@@ -302,7 +304,6 @@ typedef unsigned long mm_context_id_t;
typedef struct {
mm_context_id_t id;
#ifdef CONFIG_HUGETLB_PAGE
- pgd_t *huge_pgdir;
u16 htlb_segs; /* bitmask */
#endif
} mm_context_t;
diff --git a/include/asm-ppc64/page.h b/include/asm-ppc64/page.h
index a5893a305a09b..7e7b18ea986e5 100644
--- a/include/asm-ppc64/page.h
+++ b/include/asm-ppc64/page.h
@@ -46,6 +46,7 @@
#define ARCH_HAS_HUGEPAGE_ONLY_RANGE
#define ARCH_HAS_PREPARE_HUGEPAGE_RANGE
+#define ARCH_HAS_SETCLEAR_HUGE_PTE
#define touches_hugepage_low_range(mm, addr, len) \
(LOW_ESID_MASK((addr), (len)) & mm->context.htlb_segs)
@@ -125,36 +126,42 @@ extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct pag
* Entries in the pte table are 64b, while entries in the pgd & pmd are 32b.
*/
typedef struct { unsigned long pte; } pte_t;
-typedef struct { unsigned int pmd; } pmd_t;
-typedef struct { unsigned int pgd; } pgd_t;
+typedef struct { unsigned long pmd; } pmd_t;
+typedef struct { unsigned long pud; } pud_t;
+typedef struct { unsigned long pgd; } pgd_t;
typedef struct { unsigned long pgprot; } pgprot_t;
#define pte_val(x) ((x).pte)
#define pmd_val(x) ((x).pmd)
+#define pud_val(x) ((x).pud)
#define pgd_val(x) ((x).pgd)
#define pgprot_val(x) ((x).pgprot)
-#define __pte(x) ((pte_t) { (x) } )
-#define __pmd(x) ((pmd_t) { (x) } )
-#define __pgd(x) ((pgd_t) { (x) } )
-#define __pgprot(x) ((pgprot_t) { (x) } )
+#define __pte(x) ((pte_t) { (x) })
+#define __pmd(x) ((pmd_t) { (x) })
+#define __pud(x) ((pud_t) { (x) })
+#define __pgd(x) ((pgd_t) { (x) })
+#define __pgprot(x) ((pgprot_t) { (x) })
#else
/*
* .. while these make it easier on the compiler
*/
typedef unsigned long pte_t;
-typedef unsigned int pmd_t;
-typedef unsigned int pgd_t;
+typedef unsigned long pmd_t;
+typedef unsigned long pud_t;
+typedef unsigned long pgd_t;
typedef unsigned long pgprot_t;
#define pte_val(x) (x)
#define pmd_val(x) (x)
+#define pud_val(x) (x)
#define pgd_val(x) (x)
#define pgprot_val(x) (x)
#define __pte(x) (x)
#define __pmd(x) (x)
+#define __pud(x) (x)
#define __pgd(x) (x)
#define __pgprot(x) (x)
@@ -208,9 +215,6 @@ extern u64 ppc64_pft_size; /* Log 2 of page table size */
#define USER_REGION_ID (0UL)
#define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT)
-#define __bpn_to_ba(x) ((((unsigned long)(x)) << PAGE_SHIFT) + KERNELBASE)
-#define __ba_to_bpn(x) ((((unsigned long)(x)) & ~REGION_MASK) >> PAGE_SHIFT)
-
#define __va(x) ((void *)((unsigned long)(x) + KERNELBASE))
#ifdef CONFIG_DISCONTIGMEM
diff --git a/include/asm-ppc64/pgalloc.h b/include/asm-ppc64/pgalloc.h
index 4fc4b739b380e..26bc49c1108df 100644
--- a/include/asm-ppc64/pgalloc.h
+++ b/include/asm-ppc64/pgalloc.h
@@ -6,7 +6,12 @@
#include <linux/cpumask.h>
#include <linux/percpu.h>
-extern kmem_cache_t *zero_cache;
+extern kmem_cache_t *pgtable_cache[];
+
+#define PTE_CACHE_NUM 0
+#define PMD_CACHE_NUM 1
+#define PUD_CACHE_NUM 1
+#define PGD_CACHE_NUM 0
/*
* This program is free software; you can redistribute it and/or
@@ -15,30 +20,40 @@ extern kmem_cache_t *zero_cache;
* 2 of the License, or (at your option) any later version.
*/
-static inline pgd_t *
-pgd_alloc(struct mm_struct *mm)
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
- return kmem_cache_alloc(zero_cache, GFP_KERNEL);
+ return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM], GFP_KERNEL);
}
-static inline void
-pgd_free(pgd_t *pgd)
+static inline void pgd_free(pgd_t *pgd)
{
- kmem_cache_free(zero_cache, pgd);
+ kmem_cache_free(pgtable_cache[PGD_CACHE_NUM], pgd);
+}
+
+#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, PUD)
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ return kmem_cache_alloc(pgtable_cache[PUD_CACHE_NUM],
+ GFP_KERNEL|__GFP_REPEAT);
+}
+
+static inline void pud_free(pud_t *pud)
+{
+ kmem_cache_free(pgtable_cache[PUD_CACHE_NUM], pud);
}
#define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD)
-static inline pmd_t *
-pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- return kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT);
+ return kmem_cache_alloc(pgtable_cache[PMD_CACHE_NUM],
+ GFP_KERNEL|__GFP_REPEAT);
}
-static inline void
-pmd_free(pmd_t *pmd)
+static inline void pmd_free(pmd_t *pmd)
{
- kmem_cache_free(zero_cache, pmd);
+ kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd);
}
#define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, pte)
@@ -47,44 +62,58 @@ pmd_free(pmd_t *pmd)
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
- return kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT);
+ return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM],
+ GFP_KERNEL|__GFP_REPEAT);
}
static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
- pte_t *pte = kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT);
- if (pte)
- return virt_to_page(pte);
- return NULL;
+ return virt_to_page(pte_alloc_one_kernel(mm, address));
}
static inline void pte_free_kernel(pte_t *pte)
{
- kmem_cache_free(zero_cache, pte);
+ kmem_cache_free(pgtable_cache[PTE_CACHE_NUM], pte);
}
static inline void pte_free(struct page *ptepage)
{
- kmem_cache_free(zero_cache, page_address(ptepage));
+ pte_free_kernel(page_address(ptepage));
}
-struct pte_freelist_batch
+#define PGF_CACHENUM_MASK 0xf
+
+typedef struct pgtable_free {
+ unsigned long val;
+} pgtable_free_t;
+
+static inline pgtable_free_t pgtable_free_cache(void *p, int cachenum,
+ unsigned long mask)
{
- struct rcu_head rcu;
- unsigned int index;
- struct page * pages[0];
-};
+ BUG_ON(cachenum > PGF_CACHENUM_MASK);
-#define PTE_FREELIST_SIZE ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) / \
- sizeof(struct page *))
+ return (pgtable_free_t){.val = ((unsigned long) p & ~mask) | cachenum};
+}
-extern void pte_free_now(struct page *ptepage);
-extern void pte_free_submit(struct pte_freelist_batch *batch);
+static inline void pgtable_free(pgtable_free_t pgf)
+{
+ void *p = (void *)(pgf.val & ~PGF_CACHENUM_MASK);
+ int cachenum = pgf.val & PGF_CACHENUM_MASK;
-DECLARE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
+ kmem_cache_free(pgtable_cache[cachenum], p);
+}
-void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage);
-#define __pmd_free_tlb(tlb, pmd) __pte_free_tlb(tlb, virt_to_page(pmd))
+void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf);
+
+#define __pte_free_tlb(tlb, ptepage) \
+ pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \
+ PTE_CACHE_NUM, PTE_TABLE_SIZE-1))
+#define __pmd_free_tlb(tlb, pmd) \
+ pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \
+ PMD_CACHE_NUM, PMD_TABLE_SIZE-1))
+#define __pud_free_tlb(tlb, pmd) \
+ pgtable_free_tlb(tlb, pgtable_free_cache(pud, \
+ PUD_CACHE_NUM, PUD_TABLE_SIZE-1))
#define check_pgt_cache() do { } while (0)
diff --git a/include/asm-ppc64/pgtable.h b/include/asm-ppc64/pgtable.h
index 46cf61c2ff69a..5ea952ad7164b 100644
--- a/include/asm-ppc64/pgtable.h
+++ b/include/asm-ppc64/pgtable.h
@@ -15,19 +15,24 @@
#include <asm/tlbflush.h>
#endif /* __ASSEMBLY__ */
-#include <asm-generic/pgtable-nopud.h>
-
/*
* Entries per page directory level. The PTE level must use a 64b record
* for each page table entry. The PMD and PGD level use a 32b record for
* each entry by assuming that each entry is page aligned.
*/
#define PTE_INDEX_SIZE 9
-#define PMD_INDEX_SIZE 10
-#define PGD_INDEX_SIZE 10
+#define PMD_INDEX_SIZE 7
+#define PUD_INDEX_SIZE 7
+#define PGD_INDEX_SIZE 9
+
+#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
+#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
+#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE)
+#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE)
+#define PTRS_PER_PUD (1 << PMD_INDEX_SIZE)
#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
/* PMD_SHIFT determines what a second-level page table entry can map */
@@ -35,8 +40,13 @@
#define PMD_SIZE (1UL << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
-/* PGDIR_SHIFT determines what a third-level page table entry can map */
-#define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)
+/* PUD_SHIFT determines what a third-level page table entry can map */
+#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)
+#define PUD_SIZE (1UL << PUD_SHIFT)
+#define PUD_MASK (~(PUD_SIZE-1))
+
+/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
+#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE)
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
@@ -45,15 +55,23 @@
/*
* Size of EA range mapped by our pagetables.
*/
-#define EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \
- PGD_INDEX_SIZE + PAGE_SHIFT)
-#define EADDR_MASK ((1UL << EADDR_SIZE) - 1)
+#define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \
+ PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT)
+#define PGTABLE_RANGE (1UL << PGTABLE_EADDR_SIZE)
+
+#if TASK_SIZE_USER64 > PGTABLE_RANGE
+#error TASK_SIZE_USER64 exceeds pagetable range
+#endif
+
+#if TASK_SIZE_USER64 > (1UL << (USER_ESID_BITS + SID_SHIFT))
+#error TASK_SIZE_USER64 exceeds user VSID range
+#endif
/*
* Define the address range of the vmalloc VM area.
*/
#define VMALLOC_START (0xD000000000000000ul)
-#define VMALLOC_SIZE (0x10000000000UL)
+#define VMALLOC_SIZE (0x80000000000UL)
#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE)
/*
@@ -154,8 +172,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
#ifndef __ASSEMBLY__
int hash_huge_page(struct mm_struct *mm, unsigned long access,
unsigned long ea, unsigned long vsid, int local);
-
-void hugetlb_mm_free_pgd(struct mm_struct *mm);
#endif /* __ASSEMBLY__ */
#define HAVE_ARCH_UNMAPPED_AREA
@@ -163,7 +179,6 @@ void hugetlb_mm_free_pgd(struct mm_struct *mm);
#else
#define hash_huge_page(mm,a,ea,vsid,local) -1
-#define hugetlb_mm_free_pgd(mm) do {} while (0)
#endif
@@ -197,39 +212,45 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
#define pte_pfn(x) ((unsigned long)((pte_val(x) >> PTE_SHIFT)))
#define pte_page(x) pfn_to_page(pte_pfn(x))
-#define pmd_set(pmdp, ptep) \
- (pmd_val(*(pmdp)) = __ba_to_bpn(ptep))
+#define pmd_set(pmdp, ptep) ({BUG_ON((u64)ptep < KERNELBASE); pmd_val(*(pmdp)) = (unsigned long)(ptep);})
#define pmd_none(pmd) (!pmd_val(pmd))
#define pmd_bad(pmd) (pmd_val(pmd) == 0)
#define pmd_present(pmd) (pmd_val(pmd) != 0)
#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0)
-#define pmd_page_kernel(pmd) (__bpn_to_ba(pmd_val(pmd)))
+#define pmd_page_kernel(pmd) (pmd_val(pmd))
#define pmd_page(pmd) virt_to_page(pmd_page_kernel(pmd))
-#define pud_set(pudp, pmdp) (pud_val(*(pudp)) = (__ba_to_bpn(pmdp)))
+#define pud_set(pudp, pmdp) (pud_val(*(pudp)) = (unsigned long)(pmdp))
#define pud_none(pud) (!pud_val(pud))
-#define pud_bad(pud) ((pud_val(pud)) == 0UL)
-#define pud_present(pud) (pud_val(pud) != 0UL)
-#define pud_clear(pudp) (pud_val(*(pudp)) = 0UL)
-#define pud_page(pud) (__bpn_to_ba(pud_val(pud)))
+#define pud_bad(pud) ((pud_val(pud)) == 0)
+#define pud_present(pud) (pud_val(pud) != 0)
+#define pud_clear(pudp) (pud_val(*(pudp)) = 0)
+#define pud_page(pud) (pud_val(pud))
+
+#define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);})
+#define pgd_none(pgd) (!pgd_val(pgd))
+#define pgd_bad(pgd) (pgd_val(pgd) == 0)
+#define pgd_present(pgd) (pgd_val(pgd) != 0)
+#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0)
+#define pgd_page(pgd) (pgd_val(pgd))
/*
* Find an entry in a page-table-directory. We combine the address region
* (the high order N bits) and the pgd portion of the address.
*/
/* to avoid overflow in free_pgtables we don't use PTRS_PER_PGD here */
-#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & 0x7ff)
+#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & 0x1ff)
#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
-/* Find an entry in the second-level page table.. */
+#define pud_offset(pgdp, addr) \
+ (((pud_t *) pgd_page(*(pgdp))) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
+
#define pmd_offset(pudp,addr) \
- ((pmd_t *) pud_page(*(pudp)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
+ (((pmd_t *) pud_page(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
-/* Find an entry in the third-level page table.. */
#define pte_offset_kernel(dir,addr) \
- ((pte_t *) pmd_page_kernel(*(dir)) \
- + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
+ (((pte_t *) pmd_page_kernel(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr))
@@ -458,23 +479,18 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
#define pmd_ERROR(e) \
- printk("%s:%d: bad pmd %08x.\n", __FILE__, __LINE__, pmd_val(e))
+ printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pud_ERROR(e) \
+ printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pud_val(e))
#define pgd_ERROR(e) \
- printk("%s:%d: bad pgd %08x.\n", __FILE__, __LINE__, pgd_val(e))
+ printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
extern pgd_t swapper_pg_dir[];
extern void paging_init(void);
-/*
- * Because the huge pgtables are only 2 level, they can take
- * at most around 4M, much less than one hugepage which the
- * process is presumably entitled to use. So we don't bother
- * freeing up the pagetables on unmap, and wait until
- * destroy_context() to clean up the lot.
- */
#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \
- do { } while (0)
+ free_pgd_range(tlb, addr, end, floor, ceiling)
/*
* This gets called at the end of handling a page fault, when
diff --git a/include/asm-ppc64/processor.h b/include/asm-ppc64/processor.h
index 352306cfb5799..50b14c0ddb870 100644
--- a/include/asm-ppc64/processor.h
+++ b/include/asm-ppc64/processor.h
@@ -382,8 +382,8 @@ extern long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
extern struct task_struct *last_task_used_math;
extern struct task_struct *last_task_used_altivec;
-/* 64-bit user address space is 41-bits (2TBs user VM) */
-#define TASK_SIZE_USER64 (0x0000020000000000UL)
+/* 64-bit user address space is 44-bits (16TB user VM) */
+#define TASK_SIZE_USER64 (0x0000100000000000UL)
/*
* 32-bit user address space is 4GB - 1 page