aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/hash_low_64.S613
-rw-r--r--arch/powerpc/mm/hash_native_64.c377
-rw-r--r--arch/powerpc/mm/hash_utils_64.c532
-rw-r--r--arch/powerpc/mm/hugetlbpage.c134
-rw-r--r--arch/powerpc/mm/init_64.c18
-rw-r--r--arch/powerpc/mm/mem.c56
-rw-r--r--arch/powerpc/mm/pgtable_64.c22
-rw-r--r--arch/powerpc/mm/ppc_mmu_32.c15
-rw-r--r--arch/powerpc/mm/slb.c102
-rw-r--r--arch/powerpc/mm/slb_low.S220
-rw-r--r--arch/powerpc/mm/stab.c30
-rw-r--r--arch/powerpc/mm/tlb_64.c32
12 files changed, 1637 insertions, 514 deletions
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index d6ed9102eeea7a..e0d02c4a2615f8 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -1,7 +1,7 @@
/*
* ppc64 MMU hashtable management routines
*
- * (c) Copyright IBM Corp. 2003
+ * (c) Copyright IBM Corp. 2003, 2005
*
* Maintained by: Benjamin Herrenschmidt
* <benh@kernel.crashing.org>
@@ -10,6 +10,7 @@
* described in the kernel's COPYING file.
*/
+#include <linux/config.h>
#include <asm/reg.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
@@ -42,14 +43,24 @@
/* Save non-volatile offsets */
#define STK_REG(i) (112 + ((i)-14)*8)
+
+#ifndef CONFIG_PPC_64K_PAGES
+
+/*****************************************************************************
+ * *
+ * 4K SW & 4K HW pages implementation *
+ * *
+ *****************************************************************************/
+
+
/*
- * _hash_page(unsigned long ea, unsigned long access, unsigned long vsid,
- * pte_t *ptep, unsigned long trap, int local)
+ * _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
+ * pte_t *ptep, unsigned long trap, int local)
*
- * Adds a page to the hash table. This is the non-LPAR version for now
+ * Adds a 4K page to the hash table in a segment of 4K pages only
*/
-_GLOBAL(__hash_page)
+_GLOBAL(__hash_page_4K)
mflr r0
std r0,16(r1)
stdu r1,-STACKFRAMESIZE(r1)
@@ -88,7 +99,8 @@ _GLOBAL(__hash_page)
/* If so, just bail out and refault if needed. Someone else
* is changing this PTE anyway and might hash it.
*/
- bne- bail_ok
+ bne- htab_bail_ok
+
/* Prepare new PTE value (turn access RW into DIRTY, then
* add BUSY,HASHPTE and ACCESSED)
*/
@@ -118,10 +130,10 @@ _GLOBAL(__hash_page)
/* Convert linux PTE bits into HW equivalents */
andi. r3,r30,0x1fe /* Get basic set of flags */
- xori r3,r3,HW_NO_EXEC /* _PAGE_EXEC -> NOEXEC */
+ xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
- and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY -> r0 bit 30 */
+ and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
andc r0,r30,r0 /* r0 = pte & ~r0 */
rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
@@ -158,19 +170,21 @@ htab_insert_pte:
andc r30,r30,r0
ori r30,r30,_PAGE_HASHPTE
- /* page number in r5 */
- rldicl r5,r31,64-PTE_SHIFT,PTE_SHIFT
+ /* physical address r5 */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT
/* Calculate primary group hash */
and r0,r28,r27
- rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+ rldicr r3,r0,3,63-3 /* r3 = (hash & mask) << 3 */
/* Call ppc_md.hpte_insert */
- ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
mr r4,r29 /* Retreive va */
- li r6,0 /* no vflags */
+ li r7,0 /* !bolted, !secondary */
+ li r8,MMU_PAGE_4K /* page size */
_GLOBAL(htab_call_hpte_insert1)
- bl . /* Will be patched by htab_finish_init() */
+ bl . /* Patched by htab_finish_init() */
cmpdi 0,r3,0
bge htab_pte_insert_ok /* Insertion successful */
cmpdi 0,r3,-2 /* Critical failure */
@@ -178,19 +192,21 @@ _GLOBAL(htab_call_hpte_insert1)
/* Now try secondary slot */
- /* page number in r5 */
- rldicl r5,r31,64-PTE_SHIFT,PTE_SHIFT
+ /* physical address r5 */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT
/* Calculate secondary group hash */
andc r0,r27,r28
rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
/* Call ppc_md.hpte_insert */
- ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
mr r4,r29 /* Retreive va */
- li r6,HPTE_V_SECONDARY@l /* secondary slot */
+ li r7,HPTE_V_SECONDARY /* !bolted, secondary */
+ li r8,MMU_PAGE_4K /* page size */
_GLOBAL(htab_call_hpte_insert2)
- bl . /* Will be patched by htab_finish_init() */
+ bl . /* Patched by htab_finish_init() */
cmpdi 0,r3,0
bge+ htab_pte_insert_ok /* Insertion successful */
cmpdi 0,r3,-2 /* Critical failure */
@@ -207,14 +223,14 @@ _GLOBAL(htab_call_hpte_insert2)
rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
/* Call ppc_md.hpte_remove */
_GLOBAL(htab_call_hpte_remove)
- bl . /* Will be patched by htab_finish_init() */
+ bl . /* Patched by htab_finish_init() */
/* Try all again */
b htab_insert_pte
-bail_ok:
+htab_bail_ok:
li r3,0
- b bail
+ b htab_bail
htab_pte_insert_ok:
/* Insert slot number & secondary bit in PTE */
@@ -227,7 +243,7 @@ htab_write_out_pte:
ld r6,STK_PARM(r6)(r1)
std r30,0(r6)
li r3, 0
-bail:
+htab_bail:
ld r27,STK_REG(r27)(r1)
ld r28,STK_REG(r28)(r1)
ld r29,STK_REG(r29)(r1)
@@ -256,10 +272,10 @@ htab_modify_pte:
/* Call ppc_md.hpte_updatepp */
mr r5,r29 /* va */
- li r6,0 /* large is 0 */
+ li r6,MMU_PAGE_4K /* page size */
ld r7,STK_PARM(r8)(r1) /* get "local" param */
_GLOBAL(htab_call_hpte_updatepp)
- bl . /* Will be patched by htab_finish_init() */
+ bl . /* Patched by htab_finish_init() */
/* if we failed because typically the HPTE wasn't really here
* we try an insertion.
@@ -276,13 +292,556 @@ htab_wrong_access:
/* Bail out clearing reservation */
stdcx. r31,0,r6
li r3,1
- b bail
+ b htab_bail
+
+htab_pte_insert_failure:
+ /* Bail out restoring old PTE */
+ ld r6,STK_PARM(r6)(r1)
+ std r31,0(r6)
+ li r3,-1
+ b htab_bail
+
+
+#else /* CONFIG_PPC_64K_PAGES */
+
+
+/*****************************************************************************
+ * *
+ * 64K SW & 4K or 64K HW in a 4K segment pages implementation *
+ * *
+ *****************************************************************************/
+
+/* _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
+ * pte_t *ptep, unsigned long trap, int local)
+ */
+
+/*
+ * For now, we do NOT implement Admixed pages
+ */
+_GLOBAL(__hash_page_4K)
+ mflr r0
+ std r0,16(r1)
+ stdu r1,-STACKFRAMESIZE(r1)
+ /* Save all params that we need after a function call */
+ std r6,STK_PARM(r6)(r1)
+ std r8,STK_PARM(r8)(r1)
+
+ /* Add _PAGE_PRESENT to access */
+ ori r4,r4,_PAGE_PRESENT
+
+ /* Save non-volatile registers.
+ * r31 will hold "old PTE"
+ * r30 is "new PTE"
+ * r29 is "va"
+ * r28 is a hash value
+ * r27 is hashtab mask (maybe dynamic patched instead ?)
+ * r26 is the hidx mask
+ * r25 is the index in combo page
+ */
+ std r25,STK_REG(r25)(r1)
+ std r26,STK_REG(r26)(r1)
+ std r27,STK_REG(r27)(r1)
+ std r28,STK_REG(r28)(r1)
+ std r29,STK_REG(r29)(r1)
+ std r30,STK_REG(r30)(r1)
+ std r31,STK_REG(r31)(r1)
+
+ /* Step 1:
+ *
+ * Check permissions, atomically mark the linux PTE busy
+ * and hashed.
+ */
+1:
+ ldarx r31,0,r6
+ /* Check access rights (access & ~(pte_val(*ptep))) */
+ andc. r0,r4,r31
+ bne- htab_wrong_access
+ /* Check if PTE is busy */
+ andi. r0,r31,_PAGE_BUSY
+ /* If so, just bail out and refault if needed. Someone else
+ * is changing this PTE anyway and might hash it.
+ */
+ bne- htab_bail_ok
+ /* Prepare new PTE value (turn access RW into DIRTY, then
+ * add BUSY and ACCESSED)
+ */
+ rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
+ or r30,r30,r31
+ ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
+ /* Write the linux PTE atomically (setting busy) */
+ stdcx. r30,0,r6
+ bne- 1b
+ isync
+
+ /* Step 2:
+ *
+ * Insert/Update the HPTE in the hash table. At this point,
+ * r4 (access) is re-useable, we use it for the new HPTE flags
+ */
+
+ /* Load the hidx index */
+ rldicl r25,r3,64-12,60
+
+ /* Calc va and put it in r29 */
+ rldicr r29,r5,28,63-28 /* r29 = (vsid << 28) */
+ rldicl r3,r3,0,36 /* r3 = (ea & 0x0fffffff) */
+ or r29,r3,r29 /* r29 = va
+
+ /* Calculate hash value for primary slot and store it in r28 */
+ rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */
+ rldicl r0,r3,64-12,48 /* (ea >> 12) & 0xffff */
+ xor r28,r5,r0
+
+ /* Convert linux PTE bits into HW equivalents */
+ andi. r3,r30,0x1fe /* Get basic set of flags */
+ xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
+ rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
+ rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
+ and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
+ andc r0,r30,r0 /* r0 = pte & ~r0 */
+ rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
+
+ /* We eventually do the icache sync here (maybe inline that
+ * code rather than call a C function...)
+ */
+BEGIN_FTR_SECTION
+ mr r4,r30
+ mr r5,r7
+ bl .hash_page_do_lazy_icache
+END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
+
+ /* At this point, r3 contains new PP bits, save them in
+ * place of "access" in the param area (sic)
+ */
+ std r3,STK_PARM(r4)(r1)
+
+ /* Get htab_hash_mask */
+ ld r4,htab_hash_mask@got(2)
+ ld r27,0(r4) /* htab_hash_mask -> r27 */
+
+ /* Check if we may already be in the hashtable, in this case, we
+ * go to out-of-line code to try to modify the HPTE. We look for
+ * the bit at (1 >> (index + 32))
+ */
+ andi. r0,r31,_PAGE_HASHPTE
+ li r26,0 /* Default hidx */
+ beq htab_insert_pte
+ ld r6,STK_PARM(r6)(r1)
+ ori r26,r6,0x8000 /* Load the hidx mask */
+ ld r26,0(r26)
+ addi r5,r25,36 /* Check actual HPTE_SUB bit, this */
+ rldcr. r0,r31,r5,0 /* must match pgtable.h definition */
+ bne htab_modify_pte
+
+htab_insert_pte:
+ /* real page number in r5, PTE RPN value + index */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT
+ add r5,r5,r25
+ sldi r5,r5,HW_PAGE_SHIFT
+
+ /* Calculate primary group hash */
+ and r0,r28,r27
+ rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+
+ /* Call ppc_md.hpte_insert */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ mr r4,r29 /* Retreive va */
+ li r7,0 /* !bolted, !secondary */
+ li r8,MMU_PAGE_4K /* page size */
+_GLOBAL(htab_call_hpte_insert1)
+ bl . /* patched by htab_finish_init() */
+ cmpdi 0,r3,0
+ bge htab_pte_insert_ok /* Insertion successful */
+ cmpdi 0,r3,-2 /* Critical failure */
+ beq- htab_pte_insert_failure
+
+ /* Now try secondary slot */
+
+ /* real page number in r5, PTE RPN value + index */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT
+ add r5,r5,r25
+ sldi r5,r5,HW_PAGE_SHIFT
+
+ /* Calculate secondary group hash */
+ andc r0,r27,r28
+ rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
+
+ /* Call ppc_md.hpte_insert */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ mr r4,r29 /* Retreive va */
+ li r7,HPTE_V_SECONDARY /* !bolted, secondary */
+ li r8,MMU_PAGE_4K /* page size */
+_GLOBAL(htab_call_hpte_insert2)
+ bl . /* patched by htab_finish_init() */
+ cmpdi 0,r3,0
+ bge+ htab_pte_insert_ok /* Insertion successful */
+ cmpdi 0,r3,-2 /* Critical failure */
+ beq- htab_pte_insert_failure
+
+ /* Both are full, we need to evict something */
+ mftb r0
+ /* Pick a random group based on TB */
+ andi. r0,r0,1
+ mr r5,r28
+ bne 2f
+ not r5,r5
+2: and r0,r5,r27
+ rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+ /* Call ppc_md.hpte_remove */
+_GLOBAL(htab_call_hpte_remove)
+ bl . /* patched by htab_finish_init() */
+
+ /* Try all again */
+ b htab_insert_pte
+
+htab_bail_ok:
+ li r3,0
+ b htab_bail
+
+htab_pte_insert_ok:
+ /* Insert slot number & secondary bit in PTE second half,
+ * clear _PAGE_BUSY and set approriate HPTE slot bit
+ */
+ ld r6,STK_PARM(r6)(r1)
+ li r0,_PAGE_BUSY
+ andc r30,r30,r0
+ /* HPTE SUB bit */
+ li r0,1
+ subfic r5,r25,27 /* Must match bit position in */
+ sld r0,r0,r5 /* pgtable.h */
+ or r30,r30,r0
+ /* hindx */
+ sldi r5,r25,2
+ sld r3,r3,r5
+ li r4,0xf
+ sld r4,r4,r5
+ andc r26,r26,r4
+ or r26,r26,r3
+ ori r5,r6,0x8000
+ std r26,0(r5)
+ lwsync
+ std r30,0(r6)
+ li r3, 0
+htab_bail:
+ ld r25,STK_REG(r25)(r1)
+ ld r26,STK_REG(r26)(r1)
+ ld r27,STK_REG(r27)(r1)
+ ld r28,STK_REG(r28)(r1)
+ ld r29,STK_REG(r29)(r1)
+ ld r30,STK_REG(r30)(r1)
+ ld r31,STK_REG(r31)(r1)
+ addi r1,r1,STACKFRAMESIZE
+ ld r0,16(r1)
+ mtlr r0
+ blr
+
+htab_modify_pte:
+ /* Keep PP bits in r4 and slot idx from the PTE around in r3 */
+ mr r4,r3
+ sldi r5,r25,2
+ srd r3,r26,r5
+
+ /* Secondary group ? if yes, get a inverted hash value */
+ mr r5,r28
+ andi. r0,r3,0x8 /* page secondary ? */
+ beq 1f
+ not r5,r5
+1: andi. r3,r3,0x7 /* extract idx alone */
+
+ /* Calculate proper slot value for ppc_md.hpte_updatepp */
+ and r0,r5,r27
+ rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+ add r3,r0,r3 /* add slot idx */
+
+ /* Call ppc_md.hpte_updatepp */
+ mr r5,r29 /* va */
+ li r6,MMU_PAGE_4K /* page size */
+ ld r7,STK_PARM(r8)(r1) /* get "local" param */
+_GLOBAL(htab_call_hpte_updatepp)
+ bl . /* patched by htab_finish_init() */
+
+ /* if we failed because typically the HPTE wasn't really here
+ * we try an insertion.
+ */
+ cmpdi 0,r3,-1
+ beq- htab_insert_pte
+
+ /* Clear the BUSY bit and Write out the PTE */
+ li r0,_PAGE_BUSY
+ andc r30,r30,r0
+ ld r6,STK_PARM(r6)(r1)
+ std r30,0(r6)
+ li r3,0
+ b htab_bail
+
+htab_wrong_access:
+ /* Bail out clearing reservation */
+ stdcx. r31,0,r6
+ li r3,1
+ b htab_bail
htab_pte_insert_failure:
/* Bail out restoring old PTE */
ld r6,STK_PARM(r6)(r1)
std r31,0(r6)
li r3,-1
- b bail
+ b htab_bail
+
+
+/*****************************************************************************
+ * *
+ * 64K SW & 64K HW in a 64K segment pages implementation *
+ * *
+ *****************************************************************************/
+
+_GLOBAL(__hash_page_64K)
+ mflr r0
+ std r0,16(r1)
+ stdu r1,-STACKFRAMESIZE(r1)
+ /* Save all params that we need after a function call */
+ std r6,STK_PARM(r6)(r1)
+ std r8,STK_PARM(r8)(r1)
+
+ /* Add _PAGE_PRESENT to access */
+ ori r4,r4,_PAGE_PRESENT
+
+ /* Save non-volatile registers.
+ * r31 will hold "old PTE"
+ * r30 is "new PTE"
+ * r29 is "va"
+ * r28 is a hash value
+ * r27 is hashtab mask (maybe dynamic patched instead ?)
+ */
+ std r27,STK_REG(r27)(r1)
+ std r28,STK_REG(r28)(r1)
+ std r29,STK_REG(r29)(r1)
+ std r30,STK_REG(r30)(r1)
+ std r31,STK_REG(r31)(r1)
+
+ /* Step 1:
+ *
+ * Check permissions, atomically mark the linux PTE busy
+ * and hashed.
+ */
+1:
+ ldarx r31,0,r6
+ /* Check access rights (access & ~(pte_val(*ptep))) */
+ andc. r0,r4,r31
+ bne- ht64_wrong_access
+ /* Check if PTE is busy */
+ andi. r0,r31,_PAGE_BUSY
+ /* If so, just bail out and refault if needed. Someone else
+ * is changing this PTE anyway and might hash it.
+ */
+ bne- ht64_bail_ok
+ /* Prepare new PTE value (turn access RW into DIRTY, then
+ * add BUSY,HASHPTE and ACCESSED)
+ */
+ rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
+ or r30,r30,r31
+ ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
+ /* Write the linux PTE atomically (setting busy) */
+ stdcx. r30,0,r6
+ bne- 1b
+ isync
+
+ /* Step 2:
+ *
+ * Insert/Update the HPTE in the hash table. At this point,
+ * r4 (access) is re-useable, we use it for the new HPTE flags
+ */
+
+ /* Calc va and put it in r29 */
+ rldicr r29,r5,28,63-28
+ rldicl r3,r3,0,36
+ or r29,r3,r29
+
+ /* Calculate hash value for primary slot and store it in r28 */
+ rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */
+ rldicl r0,r3,64-16,52 /* (ea >> 16) & 0xfff */
+ xor r28,r5,r0
+
+ /* Convert linux PTE bits into HW equivalents */
+ andi. r3,r30,0x1fe /* Get basic set of flags */
+ xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
+ rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
+ rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
+ and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
+ andc r0,r30,r0 /* r0 = pte & ~r0 */
+ rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
+
+ /* We eventually do the icache sync here (maybe inline that
+ * code rather than call a C function...)
+ */
+BEGIN_FTR_SECTION
+ mr r4,r30
+ mr r5,r7
+ bl .hash_page_do_lazy_icache
+END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
+
+ /* At this point, r3 contains new PP bits, save them in
+ * place of "access" in the param area (sic)
+ */
+ std r3,STK_PARM(r4)(r1)
+
+ /* Get htab_hash_mask */
+ ld r4,htab_hash_mask@got(2)
+ ld r27,0(r4) /* htab_hash_mask -> r27 */
+
+ /* Check if we may already be in the hashtable, in this case, we
+ * go to out-of-line code to try to modify the HPTE
+ */
+ andi. r0,r31,_PAGE_HASHPTE
+ bne ht64_modify_pte
+
+ht64_insert_pte:
+ /* Clear hpte bits in new pte (we also clear BUSY btw) and
+ * add _PAGE_HASHPTE
+ */
+ lis r0,_PAGE_HPTEFLAGS@h
+ ori r0,r0,_PAGE_HPTEFLAGS@l
+ andc r30,r30,r0
+ ori r30,r30,_PAGE_HASHPTE
+
+ /* Phyical address in r5 */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT
+
+ /* Calculate primary group hash */
+ and r0,r28,r27
+ rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+
+ /* Call ppc_md.hpte_insert */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ mr r4,r29 /* Retreive va */
+ li r7,0 /* !bolted, !secondary */
+ li r8,MMU_PAGE_64K
+_GLOBAL(ht64_call_hpte_insert1)
+ bl . /* patched by htab_finish_init() */
+ cmpdi 0,r3,0
+ bge ht64_pte_insert_ok /* Insertion successful */
+ cmpdi 0,r3,-2 /* Critical failure */
+ beq- ht64_pte_insert_failure
+
+ /* Now try secondary slot */
+
+ /* Phyical address in r5 */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT
+
+ /* Calculate secondary group hash */
+ andc r0,r27,r28
+ rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
+
+ /* Call ppc_md.hpte_insert */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ mr r4,r29 /* Retreive va */
+ li r7,HPTE_V_SECONDARY /* !bolted, secondary */
+ li r8,MMU_PAGE_64K
+_GLOBAL(ht64_call_hpte_insert2)
+ bl . /* patched by htab_finish_init() */
+ cmpdi 0,r3,0
+ bge+ ht64_pte_insert_ok /* Insertion successful */
+ cmpdi 0,r3,-2 /* Critical failure */
+ beq- ht64_pte_insert_failure
+
+ /* Both are full, we need to evict something */
+ mftb r0
+ /* Pick a random group based on TB */
+ andi. r0,r0,1
+ mr r5,r28
+ bne 2f
+ not r5,r5
+2: and r0,r5,r27
+ rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+ /* Call ppc_md.hpte_remove */
+_GLOBAL(ht64_call_hpte_remove)
+ bl . /* patched by htab_finish_init() */
+
+ /* Try all again */
+ b ht64_insert_pte
+
+ht64_bail_ok:
+ li r3,0
+ b ht64_bail
+
+ht64_pte_insert_ok:
+ /* Insert slot number & secondary bit in PTE */
+ rldimi r30,r3,12,63-15
+
+ /* Write out the PTE with a normal write
+ * (maybe add eieio may be good still ?)
+ */
+ht64_write_out_pte:
+ ld r6,STK_PARM(r6)(r1)
+ std r30,0(r6)
+ li r3, 0
+ht64_bail:
+ ld r27,STK_REG(r27)(r1)
+ ld r28,STK_REG(r28)(r1)
+ ld r29,STK_REG(r29)(r1)
+ ld r30,STK_REG(r30)(r1)
+ ld r31,STK_REG(r31)(r1)
+ addi r1,r1,STACKFRAMESIZE
+ ld r0,16(r1)
+ mtlr r0
+ blr
+
+ht64_modify_pte:
+ /* Keep PP bits in r4 and slot idx from the PTE around in r3 */
+ mr r4,r3
+ rlwinm r3,r31,32-12,29,31
+
+ /* Secondary group ? if yes, get a inverted hash value */
+ mr r5,r28
+ andi. r0,r31,_PAGE_F_SECOND
+ beq 1f
+ not r5,r5
+1:
+ /* Calculate proper slot value for ppc_md.hpte_updatepp */
+ and r0,r5,r27
+ rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+ add r3,r0,r3 /* add slot idx */
+
+ /* Call ppc_md.hpte_updatepp */
+ mr r5,r29 /* va */
+ li r6,MMU_PAGE_64K
+ ld r7,STK_PARM(r8)(r1) /* get "local" param */
+_GLOBAL(ht64_call_hpte_updatepp)
+ bl . /* patched by htab_finish_init() */
+
+ /* if we failed because typically the HPTE wasn't really here
+ * we try an insertion.
+ */
+ cmpdi 0,r3,-1
+ beq- ht64_insert_pte
+
+ /* Clear the BUSY bit and Write out the PTE */
+ li r0,_PAGE_BUSY
+ andc r30,r30,r0
+ b ht64_write_out_pte
+
+ht64_wrong_access:
+ /* Bail out clearing reservation */
+ stdcx. r31,0,r6
+ li r3,1
+ b ht64_bail
+
+ht64_pte_insert_failure:
+ /* Bail out restoring old PTE */
+ ld r6,STK_PARM(r6)(r1)
+ std r31,0(r6)
+ li r3,-1
+ b ht64_bail
+
+
+#endif /* CONFIG_PPC_64K_PAGES */
+/*****************************************************************************
+ * *
+ * Huge pages implementation is in hugetlbpage.c *
+ * *
+ *****************************************************************************/
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 174d14576c2816..d96bcfe4c6f6c2 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -9,6 +9,9 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
+
+#undef DEBUG_LOW
+
#include <linux/spinlock.h>
#include <linux/bitops.h>
#include <linux/threads.h>
@@ -22,11 +25,84 @@
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/cputable.h>
+#include <asm/udbg.h>
+
+#ifdef DEBUG_LOW
+#define DBG_LOW(fmt...) udbg_printf(fmt)
+#else
+#define DBG_LOW(fmt...)
+#endif
#define HPTE_LOCK_BIT 3
static DEFINE_SPINLOCK(native_tlbie_lock);
+static inline void __tlbie(unsigned long va, unsigned int psize)
+{
+ unsigned int penc;
+
+ /* clear top 16 bits, non SLS segment */
+ va &= ~(0xffffULL << 48);
+
+ switch (psize) {
+ case MMU_PAGE_4K:
+ va &= ~0xffful;
+ asm volatile("tlbie %0,0" : : "r" (va) : "memory");
+ break;
+ default:
+ penc = mmu_psize_defs[psize].penc;
+ va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
+ va |= (0x7f >> (8 - penc)) << 12;
+ asm volatile("tlbie %0,1" : : "r" (va) : "memory");
+ break;
+ }
+}
+
+static inline void __tlbiel(unsigned long va, unsigned int psize)
+{
+ unsigned int penc;
+
+ /* clear top 16 bits, non SLS segment */
+ va &= ~(0xffffULL << 48);
+
+ switch (psize) {
+ case MMU_PAGE_4K:
+ va &= ~0xffful;
+ asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
+ : : "r"(va) : "memory");
+ break;
+ default:
+ penc = mmu_psize_defs[psize].penc;
+ va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
+ va |= (0x7f >> (8 - penc)) << 12;
+ asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
+ : : "r"(va) : "memory");
+ break;
+ }
+
+}
+
+static inline void tlbie(unsigned long va, int psize, int local)
+{
+ unsigned int use_local = local && cpu_has_feature(CPU_FTR_TLBIEL);
+ int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
+
+ if (use_local)
+ use_local = mmu_psize_defs[psize].tlbiel;
+ if (lock_tlbie && !use_local)
+ spin_lock(&native_tlbie_lock);
+ asm volatile("ptesync": : :"memory");
+ if (use_local) {
+ __tlbiel(va, psize);
+ asm volatile("ptesync": : :"memory");
+ } else {
+ __tlbie(va, psize);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+ if (lock_tlbie && !use_local)
+ spin_unlock(&native_tlbie_lock);
+}
+
static inline void native_lock_hpte(hpte_t *hptep)
{
unsigned long *word = &hptep->v;
@@ -48,13 +124,19 @@ static inline void native_unlock_hpte(hpte_t *hptep)
}
long native_hpte_insert(unsigned long hpte_group, unsigned long va,
- unsigned long prpn, unsigned long vflags,
- unsigned long rflags)
+ unsigned long pa, unsigned long rflags,
+ unsigned long vflags, int psize)
{
hpte_t *hptep = htab_address + hpte_group;
unsigned long hpte_v, hpte_r;
int i;
+ if (!(vflags & HPTE_V_BOLTED)) {
+ DBG_LOW(" insert(group=%lx, va=%016lx, pa=%016lx,"
+ " rflags=%lx, vflags=%lx, psize=%d)\n",
+ hpte_group, va, pa, rflags, vflags, psize);
+ }
+
for (i = 0; i < HPTES_PER_GROUP; i++) {
if (! (hptep->v & HPTE_V_VALID)) {
/* retry with lock held */
@@ -70,10 +152,13 @@ long native_hpte_insert(unsigned long hpte_group, unsigned long va,
if (i == HPTES_PER_GROUP)
return -1;
- hpte_v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID;
- if (vflags & HPTE_V_LARGE)
- va &= ~(1UL << HPTE_V_AVPN_SHIFT);
- hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags;
+ hpte_v = hpte_encode_v(va, psize) | vflags | HPTE_V_VALID;
+ hpte_r = hpte_encode_r(pa, psize) | rflags;
+
+ if (!(vflags & HPTE_V_BOLTED)) {
+ DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
+ i, hpte_v, hpte_r);
+ }
hptep->r = hpte_r;
/* Guarantee the second dword is visible before the valid bit */
@@ -96,6 +181,8 @@ static long native_hpte_remove(unsigned long hpte_group)
int slot_offset;
unsigned long hpte_v;
+ DBG_LOW(" remove(group=%lx)\n", hpte_group);
+
/* pick a random entry to start at */
slot_offset = mftb() & 0x7;
@@ -126,34 +213,51 @@ static long native_hpte_remove(unsigned long hpte_group)
return i;
}
-static inline void set_pp_bit(unsigned long pp, hpte_t *addr)
+static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
+ unsigned long va, int psize, int local)
{
- unsigned long old;
- unsigned long *p = &addr->r;
-
- __asm__ __volatile__(
- "1: ldarx %0,0,%3\n\
- rldimi %0,%2,0,61\n\
- stdcx. %0,0,%3\n\
- bne 1b"
- : "=&r" (old), "=m" (*p)
- : "r" (pp), "r" (p), "m" (*p)
- : "cc");
+ hpte_t *hptep = htab_address + slot;
+ unsigned long hpte_v, want_v;
+ int ret = 0;
+
+ want_v = hpte_encode_v(va, psize);
+
+ DBG_LOW(" update(va=%016lx, avpnv=%016lx, hash=%016lx, newpp=%x)",
+ va, want_v & HPTE_V_AVPN, slot, newpp);
+
+ native_lock_hpte(hptep);
+
+ hpte_v = hptep->v;
+
+ /* Even if we miss, we need to invalidate the TLB */
+ if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
+ DBG_LOW(" -> miss\n");
+ native_unlock_hpte(hptep);
+ ret = -1;
+ } else {
+ DBG_LOW(" -> hit\n");
+ /* Update the HPTE */
+ hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
+ (newpp & (HPTE_R_PP | HPTE_R_N));
+ native_unlock_hpte(hptep);
+ }
+
+ /* Ensure it is out of the tlb too. */
+ tlbie(va, psize, local);
+
+ return ret;
}
-/*
- * Only works on small pages. Yes its ugly to have to check each slot in
- * the group but we only use this during bootup.
- */
-static long native_hpte_find(unsigned long vpn)
+static long native_hpte_find(unsigned long va, int psize)
{
hpte_t *hptep;
unsigned long hash;
unsigned long i, j;
long slot;
- unsigned long hpte_v;
+ unsigned long want_v, hpte_v;
- hash = hpt_hash(vpn, 0);
+ hash = hpt_hash(va, mmu_psize_defs[psize].shift);
+ want_v = hpte_encode_v(va, psize);
for (j = 0; j < 2; j++) {
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
@@ -161,7 +265,7 @@ static long native_hpte_find(unsigned long vpn)
hptep = htab_address + slot;
hpte_v = hptep->v;
- if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11))
+ if (HPTE_V_COMPARE(hpte_v, want_v)
&& (hpte_v & HPTE_V_VALID)
&& ( !!(hpte_v & HPTE_V_SECONDARY) == j)) {
/* HPTE matches */
@@ -177,127 +281,101 @@ static long native_hpte_find(unsigned long vpn)
return -1;
}
-static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
- unsigned long va, int large, int local)
-{
- hpte_t *hptep = htab_address + slot;
- unsigned long hpte_v;
- unsigned long avpn = va >> 23;
- int ret = 0;
-
- if (large)
- avpn &= ~1;
-
- native_lock_hpte(hptep);
-
- hpte_v = hptep->v;
-
- /* Even if we miss, we need to invalidate the TLB */
- if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
- || !(hpte_v & HPTE_V_VALID)) {
- native_unlock_hpte(hptep);
- ret = -1;
- } else {
- set_pp_bit(newpp, hptep);
- native_unlock_hpte(hptep);
- }
-
- /* Ensure it is out of the tlb too */
- if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
- tlbiel(va);
- } else {
- int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
-
- if (lock_tlbie)
- spin_lock(&native_tlbie_lock);
- tlbie(va, large);
- if (lock_tlbie)
- spin_unlock(&native_tlbie_lock);
- }
-
- return ret;
-}
-
/*
* Update the page protection bits. Intended to be used to create
* guard pages for kernel data structures on pages which are bolted
* in the HPT. Assumes pages being operated on will not be stolen.
- * Does not work on large pages.
*
* No need to lock here because we should be the only user.
*/
-static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
+static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
+ int psize)
{
- unsigned long vsid, va, vpn, flags = 0;
+ unsigned long vsid, va;
long slot;
hpte_t *hptep;
- int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
vsid = get_kernel_vsid(ea);
va = (vsid << 28) | (ea & 0x0fffffff);
- vpn = va >> PAGE_SHIFT;
- slot = native_hpte_find(vpn);
+ slot = native_hpte_find(va, psize);
if (slot == -1)
panic("could not find page to bolt\n");
hptep = htab_address + slot;
- set_pp_bit(newpp, hptep);
+ /* Update the HPTE */
+ hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
+ (newpp & (HPTE_R_PP | HPTE_R_N));
- /* Ensure it is out of the tlb too */
- if (lock_tlbie)
- spin_lock_irqsave(&native_tlbie_lock, flags);
- tlbie(va, 0);
- if (lock_tlbie)
- spin_unlock_irqrestore(&native_tlbie_lock, flags);
+ /* Ensure it is out of the tlb too. */
+ tlbie(va, psize, 0);
}
static void native_hpte_invalidate(unsigned long slot, unsigned long va,
- int large, int local)
+ int psize, int local)
{
hpte_t *hptep = htab_address + slot;
unsigned long hpte_v;
- unsigned long avpn = va >> 23;
+ unsigned long want_v;
unsigned long flags;
- int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
-
- if (large)
- avpn &= ~1;
local_irq_save(flags);
- native_lock_hpte(hptep);
+ DBG_LOW(" invalidate(va=%016lx, hash: %x)\n", va, slot);
+
+ want_v = hpte_encode_v(va, psize);
+ native_lock_hpte(hptep);
hpte_v = hptep->v;
/* Even if we miss, we need to invalidate the TLB */
- if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
- || !(hpte_v & HPTE_V_VALID)) {
+ if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
native_unlock_hpte(hptep);
- } else {
+ else
/* Invalidate the hpte. NOTE: this also unlocks it */
hptep->v = 0;
- }
- /* Invalidate the tlb */
- if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
- tlbiel(va);
- } else {
- if (lock_tlbie)
- spin_lock(&native_tlbie_lock);
- tlbie(va, large);
- if (lock_tlbie)
- spin_unlock(&native_tlbie_lock);
- }
+ /* Invalidate the TLB */
+ tlbie(va, psize, local);
+
local_irq_restore(flags);
}
/*
+ * XXX This need fixing based on page size. It's only used by
+ * native_hpte_clear() for now which needs fixing too so they
+ * make a good pair...
+ */
+static unsigned long slot2va(unsigned long hpte_v, unsigned long slot)
+{
+ unsigned long avpn = HPTE_V_AVPN_VAL(hpte_v);
+ unsigned long va;
+
+ va = avpn << 23;
+
+ if (! (hpte_v & HPTE_V_LARGE)) {
+ unsigned long vpi, pteg;
+
+ pteg = slot / HPTES_PER_GROUP;
+ if (hpte_v & HPTE_V_SECONDARY)
+ pteg = ~pteg;
+
+ vpi = ((va >> 28) ^ pteg) & htab_hash_mask;
+
+ va |= vpi << PAGE_SHIFT;
+ }
+
+ return va;
+}
+
+/*
* clear all mappings on kexec. All cpus are in real mode (or they will
* be when they isi), and we are the only one left. We rely on our kernel
* mapping being 0xC0's and the hardware ignoring those two real bits.
*
* TODO: add batching support when enabled. remember, no dynamic memory here,
* athough there is the control page available...
+ *
+ * XXX FIXME: 4k only for now !
*/
static void native_hpte_clear(void)
{
@@ -327,7 +405,7 @@ static void native_hpte_clear(void)
if (hpte_v & HPTE_V_VALID) {
hptep->v = 0;
- tlbie(slot2va(hpte_v, slot), hpte_v & HPTE_V_LARGE);
+ tlbie(slot2va(hpte_v, slot), MMU_PAGE_4K, 0);
}
}
@@ -335,59 +413,59 @@ static void native_hpte_clear(void)
local_irq_restore(flags);
}
+/*
+ * Batched hash table flush, we batch the tlbie's to avoid taking/releasing
+ * the lock all the time
+ */
static void native_flush_hash_range(unsigned long number, int local)
{
- unsigned long va, vpn, hash, secondary, slot, flags, avpn;
- int i, j;
+ unsigned long va, hash, index, hidx, shift, slot;
hpte_t *hptep;
unsigned long hpte_v;
+ unsigned long want_v;
+ unsigned long flags;
+ real_pte_t pte;
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
- unsigned long large = batch->large;
+ unsigned long psize = batch->psize;
+ int i;
local_irq_save(flags);
- j = 0;
for (i = 0; i < number; i++) {
- va = batch->vaddr[j];
- if (large)
- vpn = va >> HPAGE_SHIFT;
- else
- vpn = va >> PAGE_SHIFT;
- hash = hpt_hash(vpn, large);
- secondary = (pte_val(batch->pte[i]) & _PAGE_SECONDARY) >> 15;
- if (secondary)
- hash = ~hash;
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += (pte_val(batch->pte[i]) & _PAGE_GROUP_IX) >> 12;
-
- hptep = htab_address + slot;
-
- avpn = va >> 23;
- if (large)
- avpn &= ~0x1UL;
-
- native_lock_hpte(hptep);
-
- hpte_v = hptep->v;
-
- /* Even if we miss, we need to invalidate the TLB */
- if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
- || !(hpte_v & HPTE_V_VALID)) {
- native_unlock_hpte(hptep);
- } else {
- /* Invalidate the hpte. NOTE: this also unlocks it */
- hptep->v = 0;
- }
-
- j++;
+ va = batch->vaddr[i];
+ pte = batch->pte[i];
+
+ pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
+ hash = hpt_hash(va, shift);
+ hidx = __rpte_to_hidx(pte, index);
+ if (hidx & _PTEIDX_SECONDARY)
+ hash = ~hash;
+ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot += hidx & _PTEIDX_GROUP_IX;
+ hptep = htab_address + slot;
+ want_v = hpte_encode_v(va, psize);
+ native_lock_hpte(hptep);
+ hpte_v = hptep->v;
+ if (!HPTE_V_COMPARE(hpte_v, want_v) ||
+ !(hpte_v & HPTE_V_VALID))
+ native_unlock_hpte(hptep);
+ else
+ hptep->v = 0;
+ } pte_iterate_hashed_end();
}
- if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
+ if (cpu_has_feature(CPU_FTR_TLBIEL) &&
+ mmu_psize_defs[psize].tlbiel && local) {
asm volatile("ptesync":::"memory");
-
- for (i = 0; i < j; i++)
- __tlbiel(batch->vaddr[i]);
-
+ for (i = 0; i < number; i++) {
+ va = batch->vaddr[i];
+ pte = batch->pte[i];
+
+ pte_iterate_hashed_subpages(pte, psize, va, index,
+ shift) {
+ __tlbiel(va, psize);
+ } pte_iterate_hashed_end();
+ }
asm volatile("ptesync":::"memory");
} else {
int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
@@ -396,10 +474,15 @@ static void native_flush_hash_range(unsigned long number, int local)
spin_lock(&native_tlbie_lock);
asm volatile("ptesync":::"memory");
-
- for (i = 0; i < j; i++)
- __tlbie(batch->vaddr[i], large);
-
+ for (i = 0; i < number; i++) {
+ va = batch->vaddr[i];
+ pte = batch->pte[i];
+
+ pte_iterate_hashed_subpages(pte, psize, va, index,
+ shift) {
+ __tlbie(va, psize);
+ } pte_iterate_hashed_end();
+ }
asm volatile("eieio; tlbsync; ptesync":::"memory");
if (lock_tlbie)
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 6e9e05cce02c84..b2f3dbca695223 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -19,6 +19,7 @@
*/
#undef DEBUG
+#undef DEBUG_LOW
#include <linux/config.h>
#include <linux/spinlock.h>
@@ -59,6 +60,15 @@
#define DBG(fmt...)
#endif
+#ifdef DEBUG_LOW
+#define DBG_LOW(fmt...) udbg_printf(fmt)
+#else
+#define DBG_LOW(fmt...)
+#endif
+
+#define KB (1024)
+#define MB (1024*KB)
+
/*
* Note: pte --> Linux PTE
* HPTE --> PowerPC Hashed Page Table Entry
@@ -77,91 +87,290 @@ extern unsigned long dart_tablebase;
hpte_t *htab_address;
unsigned long htab_hash_mask;
-
unsigned long _SDR1;
+struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+int mmu_linear_psize = MMU_PAGE_4K;
+int mmu_virtual_psize = MMU_PAGE_4K;
+#ifdef CONFIG_HUGETLB_PAGE
+int mmu_huge_psize = MMU_PAGE_16M;
+unsigned int HPAGE_SHIFT;
+#endif
-#define KB (1024)
-#define MB (1024*KB)
-
-static inline void loop_forever(void)
-{
- volatile unsigned long x = 1;
- for(;x;x|=1)
- ;
-}
+/* There are definitions of page sizes arrays to be used when none
+ * is provided by the firmware.
+ */
-static inline void create_pte_mapping(unsigned long start, unsigned long end,
- unsigned long mode, int large)
+/* Pre-POWER4 CPUs (4k pages only)
+ */
+struct mmu_psize_def mmu_psize_defaults_old[] = {
+ [MMU_PAGE_4K] = {
+ .shift = 12,
+ .sllp = 0,
+ .penc = 0,
+ .avpnm = 0,
+ .tlbiel = 0,
+ },
+};
+
+/* POWER4, GPUL, POWER5
+ *
+ * Support for 16Mb large pages
+ */
+struct mmu_psize_def mmu_psize_defaults_gp[] = {
+ [MMU_PAGE_4K] = {
+ .shift = 12,
+ .sllp = 0,
+ .penc = 0,
+ .avpnm = 0,
+ .tlbiel = 1,
+ },
+ [MMU_PAGE_16M] = {
+ .shift = 24,
+ .sllp = SLB_VSID_L,
+ .penc = 0,
+ .avpnm = 0x1UL,
+ .tlbiel = 0,
+ },
+};
+
+
+int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
+ unsigned long pstart, unsigned long mode, int psize)
{
- unsigned long addr;
- unsigned int step;
+ unsigned long vaddr, paddr;
+ unsigned int step, shift;
unsigned long tmp_mode;
- unsigned long vflags;
+ int ret = 0;
- if (large) {
- step = 16*MB;
- vflags = HPTE_V_BOLTED | HPTE_V_LARGE;
- } else {
- step = 4*KB;
- vflags = HPTE_V_BOLTED;
- }
+ shift = mmu_psize_defs[psize].shift;
+ step = 1 << shift;
- for (addr = start; addr < end; addr += step) {
+ for (vaddr = vstart, paddr = pstart; vaddr < vend;
+ vaddr += step, paddr += step) {
unsigned long vpn, hash, hpteg;
- unsigned long vsid = get_kernel_vsid(addr);
- unsigned long va = (vsid << 28) | (addr & 0xfffffff);
- int ret = -1;
-
- if (large)
- vpn = va >> HPAGE_SHIFT;
- else
- vpn = va >> PAGE_SHIFT;
-
+ unsigned long vsid = get_kernel_vsid(vaddr);
+ unsigned long va = (vsid << 28) | (vaddr & 0x0fffffff);
+ vpn = va >> shift;
tmp_mode = mode;
/* Make non-kernel text non-executable */
- if (!in_kernel_text(addr))
- tmp_mode = mode | HW_NO_EXEC;
-
- hash = hpt_hash(vpn, large);
+ if (!in_kernel_text(vaddr))
+ tmp_mode = mode | HPTE_R_N;
+ hash = hpt_hash(va, shift);
hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
+ /* The crap below can be cleaned once ppd_md.probe() can
+ * set up the hash callbacks, thus we can just used the
+ * normal insert callback here.
+ */
#ifdef CONFIG_PPC_ISERIES
- if (systemcfg->platform & PLATFORM_ISERIES_LPAR)
- ret = iSeries_hpte_bolt_or_insert(hpteg, va,
- virt_to_abs(addr) >> PAGE_SHIFT,
- vflags, tmp_mode);
+ if (systemcfg->platform == PLATFORM_ISERIES_LPAR)
+ ret = iSeries_hpte_insert(hpteg, va,
+ virt_to_abs(paddr),
+ tmp_mode,
+ HPTE_V_BOLTED,
+ psize);
else
#endif
#ifdef CONFIG_PPC_PSERIES
if (systemcfg->platform & PLATFORM_LPAR)
ret = pSeries_lpar_hpte_insert(hpteg, va,
- virt_to_abs(addr) >> PAGE_SHIFT,
- vflags, tmp_mode);
+ virt_to_abs(paddr),
+ tmp_mode,
+ HPTE_V_BOLTED,
+ psize);
else
#endif
#ifdef CONFIG_PPC_MULTIPLATFORM
ret = native_hpte_insert(hpteg, va,
- virt_to_abs(addr) >> PAGE_SHIFT,
- vflags, tmp_mode);
+ virt_to_abs(paddr),
+ tmp_mode, HPTE_V_BOLTED,
+ psize);
#endif
+ if (ret < 0)
+ break;
+ }
+ return ret < 0 ? ret : 0;
+}
- if (ret == -1) {
- ppc64_terminate_msg(0x20, "create_pte_mapping");
- loop_forever();
+static int __init htab_dt_scan_page_sizes(unsigned long node,
+ const char *uname, int depth,
+ void *data)
+{
+ char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+ u32 *prop;
+ unsigned long size = 0;
+
+ /* We are scanning "cpu" nodes only */
+ if (type == NULL || strcmp(type, "cpu") != 0)
+ return 0;
+
+ prop = (u32 *)of_get_flat_dt_prop(node,
+ "ibm,segment-page-sizes", &size);
+ if (prop != NULL) {
+ DBG("Page sizes from device-tree:\n");
+ size /= 4;
+ cur_cpu_spec->cpu_features &= ~(CPU_FTR_16M_PAGE);
+ while(size > 0) {
+ unsigned int shift = prop[0];
+ unsigned int slbenc = prop[1];
+ unsigned int lpnum = prop[2];
+ unsigned int lpenc = 0;
+ struct mmu_psize_def *def;
+ int idx = -1;
+
+ size -= 3; prop += 3;
+ while(size > 0 && lpnum) {
+ if (prop[0] == shift)
+ lpenc = prop[1];
+ prop += 2; size -= 2;
+ lpnum--;
+ }
+ switch(shift) {
+ case 0xc:
+ idx = MMU_PAGE_4K;
+ break;
+ case 0x10:
+ idx = MMU_PAGE_64K;
+ break;
+ case 0x14:
+ idx = MMU_PAGE_1M;
+ break;
+ case 0x18:
+ idx = MMU_PAGE_16M;
+ cur_cpu_spec->cpu_features |= CPU_FTR_16M_PAGE;
+ break;
+ case 0x22:
+ idx = MMU_PAGE_16G;
+ break;
+ }
+ if (idx < 0)
+ continue;
+ def = &mmu_psize_defs[idx];
+ def->shift = shift;
+ if (shift <= 23)
+ def->avpnm = 0;
+ else
+ def->avpnm = (1 << (shift - 23)) - 1;
+ def->sllp = slbenc;
+ def->penc = lpenc;
+ /* We don't know for sure what's up with tlbiel, so
+ * for now we only set it for 4K and 64K pages
+ */
+ if (idx == MMU_PAGE_4K || idx == MMU_PAGE_64K)
+ def->tlbiel = 1;
+ else
+ def->tlbiel = 0;
+
+ DBG(" %d: shift=%02x, sllp=%04x, avpnm=%08x, "
+ "tlbiel=%d, penc=%d\n",
+ idx, shift, def->sllp, def->avpnm, def->tlbiel,
+ def->penc);
}
+ return 1;
+ }
+ return 0;
+}
+
+
+static void __init htab_init_page_sizes(void)
+{
+ int rc;
+
+ /* Default to 4K pages only */
+ memcpy(mmu_psize_defs, mmu_psize_defaults_old,
+ sizeof(mmu_psize_defaults_old));
+
+ /*
+ * Try to find the available page sizes in the device-tree
+ */
+ rc = of_scan_flat_dt(htab_dt_scan_page_sizes, NULL);
+ if (rc != 0) /* Found */
+ goto found;
+
+ /*
+ * Not in the device-tree, let's fallback on known size
+ * list for 16M capable GP & GR
+ */
+ if ((systemcfg->platform != PLATFORM_ISERIES_LPAR) &&
+ cpu_has_feature(CPU_FTR_16M_PAGE))
+ memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
+ sizeof(mmu_psize_defaults_gp));
+ found:
+ /*
+ * Pick a size for the linear mapping. Currently, we only support
+ * 16M, 1M and 4K which is the default
+ */
+ if (mmu_psize_defs[MMU_PAGE_16M].shift)
+ mmu_linear_psize = MMU_PAGE_16M;
+ else if (mmu_psize_defs[MMU_PAGE_1M].shift)
+ mmu_linear_psize = MMU_PAGE_1M;
+
+ /*
+ * Pick a size for the ordinary pages. Default is 4K, we support
+ * 64K if cache inhibited large pages are supported by the
+ * processor
+ */
+#ifdef CONFIG_PPC_64K_PAGES
+ if (mmu_psize_defs[MMU_PAGE_64K].shift &&
+ cpu_has_feature(CPU_FTR_CI_LARGE_PAGE))
+ mmu_virtual_psize = MMU_PAGE_64K;
+#endif
+
+ printk(KERN_INFO "Page orders: linear mapping = %d, others = %d\n",
+ mmu_psize_defs[mmu_linear_psize].shift,
+ mmu_psize_defs[mmu_virtual_psize].shift);
+
+#ifdef CONFIG_HUGETLB_PAGE
+ /* Init large page size. Currently, we pick 16M or 1M depending
+ * on what is available
+ */
+ if (mmu_psize_defs[MMU_PAGE_16M].shift)
+ mmu_huge_psize = MMU_PAGE_16M;
+ else if (mmu_psize_defs[MMU_PAGE_1M].shift)
+ mmu_huge_psize = MMU_PAGE_1M;
+
+ /* Calculate HPAGE_SHIFT and sanity check it */
+ if (mmu_psize_defs[mmu_huge_psize].shift > 16 &&
+ mmu_psize_defs[mmu_huge_psize].shift < 28)
+ HPAGE_SHIFT = mmu_psize_defs[mmu_huge_psize].shift;
+ else
+ HPAGE_SHIFT = 0; /* No huge pages dude ! */
+#endif /* CONFIG_HUGETLB_PAGE */
+}
+
+static int __init htab_dt_scan_pftsize(unsigned long node,
+ const char *uname, int depth,
+ void *data)
+{
+ char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+ u32 *prop;
+
+ /* We are scanning "cpu" nodes only */
+ if (type == NULL || strcmp(type, "cpu") != 0)
+ return 0;
+
+ prop = (u32 *)of_get_flat_dt_prop(node, "ibm,pft-size", NULL);
+ if (prop != NULL) {
+ /* pft_size[0] is the NUMA CEC cookie */
+ ppc64_pft_size = prop[1];
+ return 1;
}
+ return 0;
}
-static unsigned long get_hashtable_size(void)
+static unsigned long __init htab_get_table_size(void)
{
unsigned long rnd_mem_size, pteg_count;
- /* If hash size wasn't obtained in prom.c, we calculate it now based on
- * the total RAM size
+ /* If hash size isn't already provided by the platform, we try to
+ * retreive it from the device-tree. If it's not there neither, we
+ * calculate it now based on the total RAM size
*/
+ if (ppc64_pft_size == 0)
+ of_scan_flat_dt(htab_dt_scan_pftsize, NULL);
if (ppc64_pft_size)
return 1UL << ppc64_pft_size;
@@ -181,17 +390,21 @@ void __init htab_initialize(void)
unsigned long table, htab_size_bytes;
unsigned long pteg_count;
unsigned long mode_rw;
- int i, use_largepages = 0;
unsigned long base = 0, size = 0;
+ int i;
+
extern unsigned long tce_alloc_start, tce_alloc_end;
DBG(" -> htab_initialize()\n");
+ /* Initialize page sizes */
+ htab_init_page_sizes();
+
/*
* Calculate the required size of the htab. We want the number of
* PTEGs to equal one half the number of real pages.
*/
- htab_size_bytes = get_hashtable_size();
+ htab_size_bytes = htab_get_table_size();
pteg_count = htab_size_bytes >> 7;
/* For debug, make the HTAB 1/8 as big as it normally would be. */
@@ -211,14 +424,11 @@ void __init htab_initialize(void)
* the absolute address space.
*/
table = lmb_alloc(htab_size_bytes, htab_size_bytes);
+ BUG_ON(table == 0);
DBG("Hash table allocated at %lx, size: %lx\n", table,
htab_size_bytes);
- if ( !table ) {
- ppc64_terminate_msg(0x20, "hpt space");
- loop_forever();
- }
htab_address = abs_to_virt(table);
/* htab absolute addr + encoded htabsize */
@@ -234,8 +444,6 @@ void __init htab_initialize(void)
* _NOT_ map it to avoid cache paradoxes as it's remapped non
* cacheable later on
*/
- if (cpu_has_feature(CPU_FTR_16M_PAGE))
- use_largepages = 1;
/* create bolted the linear mapping in the hash table */
for (i=0; i < lmb.memory.cnt; i++) {
@@ -246,27 +454,32 @@ void __init htab_initialize(void)
#ifdef CONFIG_U3_DART
/* Do not map the DART space. Fortunately, it will be aligned
- * in such a way that it will not cross two lmb regions and will
- * fit within a single 16Mb page.
- * The DART space is assumed to be a full 16Mb region even if we
- * only use 2Mb of that space. We will use more of it later for
- * AGP GART. We have to use a full 16Mb large page.
+ * in such a way that it will not cross two lmb regions and
+ * will fit within a single 16Mb page.
+ * The DART space is assumed to be a full 16Mb region even if
+ * we only use 2Mb of that space. We will use more of it later
+ * for AGP GART. We have to use a full 16Mb large page.
*/
DBG("DART base: %lx\n", dart_tablebase);
if (dart_tablebase != 0 && dart_tablebase >= base
&& dart_tablebase < (base + size)) {
if (base != dart_tablebase)
- create_pte_mapping(base, dart_tablebase, mode_rw,
- use_largepages);
+ BUG_ON(htab_bolt_mapping(base, dart_tablebase,
+ base, mode_rw,
+ mmu_linear_psize));
if ((base + size) > (dart_tablebase + 16*MB))
- create_pte_mapping(dart_tablebase + 16*MB, base + size,
- mode_rw, use_largepages);
+ BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB,
+ base + size,
+ dart_tablebase+16*MB,
+ mode_rw,
+ mmu_linear_psize));
continue;
}
#endif /* CONFIG_U3_DART */
- create_pte_mapping(base, base + size, mode_rw, use_largepages);
- }
+ BUG_ON(htab_bolt_mapping(base, base + size, base,
+ mode_rw, mmu_linear_psize));
+ }
/*
* If we have a memory_limit and we've allocated TCEs then we need to
@@ -282,8 +495,9 @@ void __init htab_initialize(void)
if (base + size >= tce_alloc_start)
tce_alloc_start = base + size + 1;
- create_pte_mapping(tce_alloc_start, tce_alloc_end,
- mode_rw, use_largepages);
+ BUG_ON(htab_bolt_mapping(tce_alloc_start, tce_alloc_end,
+ tce_alloc_start, mode_rw,
+ mmu_linear_psize));
}
DBG(" <- htab_initialize()\n");
@@ -298,9 +512,6 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
{
struct page *page;
- if (!pfn_valid(pte_pfn(pte)))
- return pp;
-
page = pte_page(pte);
/* page is dirty */
@@ -309,7 +520,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
__flush_dcache_icache(page_address(page));
set_bit(PG_arch_1, &page->flags);
} else
- pp |= HW_NO_EXEC;
+ pp |= HPTE_R_N;
}
return pp;
}
@@ -325,94 +536,169 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
unsigned long vsid;
struct mm_struct *mm;
pte_t *ptep;
- int ret;
- int user_region = 0;
- int local = 0;
cpumask_t tmp;
+ int rc, user_region = 0, local = 0;
- if ((ea & ~REGION_MASK) >= PGTABLE_RANGE)
- return 1;
+ DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n",
+ ea, access, trap);
+ if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) {
+ DBG_LOW(" out of pgtable range !\n");
+ return 1;
+ }
+
+ /* Get region & vsid */
switch (REGION_ID(ea)) {
case USER_REGION_ID:
user_region = 1;
mm = current->mm;
- if (! mm)
+ if (! mm) {
+ DBG_LOW(" user region with no mm !\n");
return 1;
-
+ }
vsid = get_vsid(mm->context.id, ea);
break;
case VMALLOC_REGION_ID:
mm = &init_mm;
vsid = get_kernel_vsid(ea);
break;
-#if 0
- case KERNEL_REGION_ID:
- /*
- * Should never get here - entire 0xC0... region is bolted.
- * Send the problem up to do_page_fault
- */
-#endif
default:
/* Not a valid range
* Send the problem up to do_page_fault
*/
return 1;
- break;
}
+ DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid);
+ /* Get pgdir */
pgdir = mm->pgd;
-
if (pgdir == NULL)
return 1;
+ /* Check CPU locality */
tmp = cpumask_of_cpu(smp_processor_id());
if (user_region && cpus_equal(mm->cpu_vm_mask, tmp))
local = 1;
- /* Is this a huge page ? */
- if (unlikely(in_hugepage_area(mm->context, ea)))
- ret = hash_huge_page(mm, access, ea, vsid, local);
- else {
- ptep = find_linux_pte(pgdir, ea);
- if (ptep == NULL)
- return 1;
- ret = __hash_page(ea, access, vsid, ptep, trap, local);
+ /* Handle hugepage regions */
+ if (unlikely(in_hugepage_area(mm->context, ea))) {
+ DBG_LOW(" -> huge page !\n");
+ return hash_huge_page(mm, access, ea, vsid, local);
+ }
+
+ /* Get PTE and page size from page tables */
+ ptep = find_linux_pte(pgdir, ea);
+ if (ptep == NULL || !pte_present(*ptep)) {
+ DBG_LOW(" no PTE !\n");
+ return 1;
+ }
+
+#ifndef CONFIG_PPC_64K_PAGES
+ DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep));
+#else
+ DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep),
+ pte_val(*(ptep + PTRS_PER_PTE)));
+#endif
+ /* Pre-check access permissions (will be re-checked atomically
+ * in __hash_page_XX but this pre-check is a fast path
+ */
+ if (access & ~pte_val(*ptep)) {
+ DBG_LOW(" no access !\n");
+ return 1;
}
- return ret;
+ /* Do actual hashing */
+#ifndef CONFIG_PPC_64K_PAGES
+ rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
+#else
+ if (mmu_virtual_psize == MMU_PAGE_64K)
+ rc = __hash_page_64K(ea, access, vsid, ptep, trap, local);
+ else
+ rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
+#endif /* CONFIG_PPC_64K_PAGES */
+
+#ifndef CONFIG_PPC_64K_PAGES
+ DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep));
+#else
+ DBG_LOW(" o-pte: %016lx %016lx\n", pte_val(*ptep),
+ pte_val(*(ptep + PTRS_PER_PTE)));
+#endif
+ DBG_LOW(" -> rc=%d\n", rc);
+ return rc;
}
-void flush_hash_page(unsigned long va, pte_t pte, int local)
+void hash_preload(struct mm_struct *mm, unsigned long ea,
+ unsigned long access, unsigned long trap)
{
- unsigned long vpn, hash, secondary, slot;
- unsigned long huge = pte_huge(pte);
+ unsigned long vsid;
+ void *pgdir;
+ pte_t *ptep;
+ cpumask_t mask;
+ unsigned long flags;
+ int local = 0;
+
+ /* We don't want huge pages prefaulted for now
+ */
+ if (unlikely(in_hugepage_area(mm->context, ea)))
+ return;
+
+ DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx,"
+ " trap=%lx\n", mm, mm->pgd, ea, access, trap);
- if (huge)
- vpn = va >> HPAGE_SHIFT;
+ /* Get PTE, VSID, access mask */
+ pgdir = mm->pgd;
+ if (pgdir == NULL)
+ return;
+ ptep = find_linux_pte(pgdir, ea);
+ if (!ptep)
+ return;
+ vsid = get_vsid(mm->context.id, ea);
+
+ /* Hash it in */
+ local_irq_save(flags);
+ mask = cpumask_of_cpu(smp_processor_id());
+ if (cpus_equal(mm->cpu_vm_mask, mask))
+ local = 1;
+#ifndef CONFIG_PPC_64K_PAGES
+ __hash_page_4K(ea, access, vsid, ptep, trap, local);
+#else
+ if (mmu_virtual_psize == MMU_PAGE_64K)
+ __hash_page_64K(ea, access, vsid, ptep, trap, local);
else
- vpn = va >> PAGE_SHIFT;
- hash = hpt_hash(vpn, huge);
- secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15;
- if (secondary)
- hash = ~hash;
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12;
-
- ppc_md.hpte_invalidate(slot, va, huge, local);
+ __hash_page_4K(ea, access, vsid, ptep, trap, local);
+#endif /* CONFIG_PPC_64K_PAGES */
+ local_irq_restore(flags);
+}
+
+void flush_hash_page(unsigned long va, real_pte_t pte, int psize, int local)
+{
+ unsigned long hash, index, shift, hidx, slot;
+
+ DBG_LOW("flush_hash_page(va=%016x)\n", va);
+ pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
+ hash = hpt_hash(va, shift);
+ hidx = __rpte_to_hidx(pte, index);
+ if (hidx & _PTEIDX_SECONDARY)
+ hash = ~hash;
+ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot += hidx & _PTEIDX_GROUP_IX;
+ DBG_LOW(" sub %d: hash=%x, hidx=%x\n", index, slot, hidx);
+ ppc_md.hpte_invalidate(slot, va, psize, local);
+ } pte_iterate_hashed_end();
}
void flush_hash_range(unsigned long number, int local)
{
- if (ppc_md.flush_hash_range) {
+ if (ppc_md.flush_hash_range)
ppc_md.flush_hash_range(number, local);
- } else {
+ else {
int i;
struct ppc64_tlb_batch *batch =
&__get_cpu_var(ppc64_tlb_batch);
for (i = 0; i < number; i++)
- flush_hash_page(batch->vaddr[i], batch->pte[i], local);
+ flush_hash_page(batch->vaddr[i], batch->pte[i],
+ batch->psize, local);
}
}
@@ -452,6 +738,18 @@ void __init htab_finish_init(void)
extern unsigned int *htab_call_hpte_remove;
extern unsigned int *htab_call_hpte_updatepp;
+#ifdef CONFIG_PPC_64K_PAGES
+ extern unsigned int *ht64_call_hpte_insert1;
+ extern unsigned int *ht64_call_hpte_insert2;
+ extern unsigned int *ht64_call_hpte_remove;
+ extern unsigned int *ht64_call_hpte_updatepp;
+
+ make_bl(ht64_call_hpte_insert1, ppc_md.hpte_insert);
+ make_bl(ht64_call_hpte_insert2, ppc_md.hpte_insert);
+ make_bl(ht64_call_hpte_remove, ppc_md.hpte_remove);
+ make_bl(ht64_call_hpte_updatepp, ppc_md.hpte_updatepp);
+#endif /* CONFIG_PPC_64K_PAGES */
+
make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert);
make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert);
make_bl(htab_call_hpte_remove, ppc_md.hpte_remove);
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 0ea0994ed974e0..0073a04047e48b 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -47,10 +47,25 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
pu = pud_offset(pg, addr);
if (!pud_none(*pu)) {
pm = pmd_offset(pu, addr);
+#ifdef CONFIG_PPC_64K_PAGES
+ /* Currently, we use the normal PTE offset within full
+ * size PTE pages, thus our huge PTEs are scattered in
+ * the PTE page and we do waste some. We may change
+ * that in the future, but the current mecanism keeps
+ * things much simpler
+ */
+ if (!pmd_none(*pm)) {
+ /* Note: pte_offset_* are all equivalent on
+ * ppc64 as we don't have HIGHMEM
+ */
+ pt = pte_offset_kernel(pm, addr);
+ return pt;
+ }
+#else /* CONFIG_PPC_64K_PAGES */
+ /* On 4k pages, we put huge PTEs in the PMD page */
pt = (pte_t *)pm;
- BUG_ON(!pmd_none(*pm)
- && !(pte_present(*pt) && pte_huge(*pt)));
return pt;
+#endif /* CONFIG_PPC_64K_PAGES */
}
}
@@ -74,9 +89,16 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
if (pu) {
pm = pmd_alloc(mm, pu, addr);
if (pm) {
+#ifdef CONFIG_PPC_64K_PAGES
+ /* See comment in huge_pte_offset. Note that if we ever
+ * want to put the page size in the PMD, we would have
+ * to open code our own pte_alloc* function in order
+ * to populate and set the size atomically
+ */
+ pt = pte_alloc_map(mm, pm, addr);
+#else /* CONFIG_PPC_64K_PAGES */
pt = (pte_t *)pm;
- BUG_ON(!pmd_none(*pm)
- && !(pte_present(*pt) && pte_huge(*pt)));
+#endif /* CONFIG_PPC_64K_PAGES */
return pt;
}
}
@@ -84,35 +106,29 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
return NULL;
}
-#define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE)
-
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
- int i;
-
if (pte_present(*ptep)) {
- pte_clear(mm, addr, ptep);
+ /* We open-code pte_clear because we need to pass the right
+ * argument to hpte_update (huge / !huge)
+ */
+ unsigned long old = pte_update(ptep, ~0UL);
+ if (old & _PAGE_HASHPTE)
+ hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1);
flush_tlb_pending();
}
-
- for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) {
- *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
- ptep++;
- }
+ *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
}
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
unsigned long old = pte_update(ptep, ~0UL);
- int i;
if (old & _PAGE_HASHPTE)
- hpte_update(mm, addr, old, 0);
-
- for (i = 1; i < HUGEPTE_BATCH_SIZE; i++)
- ptep[i] = __pte(0);
+ hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1);
+ *ptep = __pte(0);
return __pte(old);
}
@@ -563,6 +579,8 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
int lastshift;
u16 areamask, curareas;
+ if (HPAGE_SHIFT == 0)
+ return -EINVAL;
if (len & ~HPAGE_MASK)
return -EINVAL;
@@ -619,19 +637,15 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
unsigned long ea, unsigned long vsid, int local)
{
pte_t *ptep;
- unsigned long va, vpn;
- pte_t old_pte, new_pte;
- unsigned long rflags, prpn;
+ unsigned long old_pte, new_pte;
+ unsigned long va, rflags, pa;
long slot;
int err = 1;
- spin_lock(&mm->page_table_lock);
-
ptep = huge_pte_offset(mm, ea);
/* Search the Linux page table for a match with va */
va = (vsid << 28) | (ea & 0x0fffffff);
- vpn = va >> HPAGE_SHIFT;
/*
* If no pte found or not present, send the problem up to
@@ -640,8 +654,6 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
if (unlikely(!ptep || pte_none(*ptep)))
goto out;
-/* BUG_ON(pte_bad(*ptep)); */
-
/*
* Check the user's access rights to the page. If access should be
* prevented then send the problem up to do_page_fault.
@@ -661,58 +673,64 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
*/
- old_pte = *ptep;
- new_pte = old_pte;
-
- rflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW));
+ do {
+ old_pte = pte_val(*ptep);
+ if (old_pte & _PAGE_BUSY)
+ goto out;
+ new_pte = old_pte | _PAGE_BUSY |
+ _PAGE_ACCESSED | _PAGE_HASHPTE;
+ } while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
+ old_pte, new_pte));
+
+ rflags = 0x2 | (!(new_pte & _PAGE_RW));
/* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
- rflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC);
+ rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
/* Check if pte already has an hpte (case 2) */
- if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) {
+ if (unlikely(old_pte & _PAGE_HASHPTE)) {
/* There MIGHT be an HPTE for this pte */
unsigned long hash, slot;
- hash = hpt_hash(vpn, 1);
- if (pte_val(old_pte) & _PAGE_SECONDARY)
+ hash = hpt_hash(va, HPAGE_SHIFT);
+ if (old_pte & _PAGE_F_SECOND)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12;
+ slot += (old_pte & _PAGE_F_GIX) >> 12;
if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1)
- pte_val(old_pte) &= ~_PAGE_HPTEFLAGS;
+ old_pte &= ~_PAGE_HPTEFLAGS;
}
- if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) {
- unsigned long hash = hpt_hash(vpn, 1);
+ if (likely(!(old_pte & _PAGE_HASHPTE))) {
+ unsigned long hash = hpt_hash(va, HPAGE_SHIFT);
unsigned long hpte_group;
- prpn = pte_pfn(old_pte);
+ pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
repeat:
hpte_group = ((hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- /* Update the linux pte with the HPTE slot */
- pte_val(new_pte) &= ~_PAGE_HPTEFLAGS;
- pte_val(new_pte) |= _PAGE_HASHPTE;
+ /* clear HPTE slot informations in new PTE */
+ new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
/* Add in WIMG bits */
/* XXX We should store these in the pte */
+ /* --BenH: I think they are ... */
rflags |= _PAGE_COHERENT;
- slot = ppc_md.hpte_insert(hpte_group, va, prpn,
- HPTE_V_LARGE, rflags);
+ /* Insert into the hash table, primary slot */
+ slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0,
+ mmu_huge_psize);
/* Primary is full, try the secondary */
if (unlikely(slot == -1)) {
- pte_val(new_pte) |= _PAGE_SECONDARY;
+ new_pte |= _PAGE_F_SECOND;
hpte_group = ((~hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- slot = ppc_md.hpte_insert(hpte_group, va, prpn,
- HPTE_V_LARGE |
+ slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags,
HPTE_V_SECONDARY,
- rflags);
+ mmu_huge_psize);
if (slot == -1) {
if (mftb() & 0x1)
hpte_group = ((hash & htab_hash_mask) *
@@ -726,20 +744,18 @@ repeat:
if (unlikely(slot == -2))
panic("hash_huge_page: pte_insert failed\n");
- pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX;
-
- /*
- * No need to use ldarx/stdcx here because all who
- * might be updating the pte will hold the
- * page_table_lock
- */
- *ptep = new_pte;
+ new_pte |= (slot << 12) & _PAGE_F_GIX;
}
+ /*
+ * No need to use ldarx/stdcx here because all who
+ * might be updating the pte will hold the
+ * page_table_lock
+ */
+ *ptep = __pte(new_pte & ~_PAGE_BUSY);
+
err = 0;
out:
- spin_unlock(&mm->page_table_lock);
-
return err;
}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index b0fc822ec29f5b..dfe7fa37b41a16 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -188,12 +188,21 @@ static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
memset(addr, 0, kmem_cache_size(cache));
}
+#ifdef CONFIG_PPC_64K_PAGES
+static const int pgtable_cache_size[2] = {
+ PTE_TABLE_SIZE, PGD_TABLE_SIZE
+};
+static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
+ "pte_pmd_cache", "pgd_cache",
+};
+#else
static const int pgtable_cache_size[2] = {
PTE_TABLE_SIZE, PMD_TABLE_SIZE
};
static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
"pgd_pte_cache", "pud_pmd_cache",
};
+#endif /* CONFIG_PPC_64K_PAGES */
kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
@@ -201,19 +210,14 @@ void pgtable_cache_init(void)
{
int i;
- BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]);
- BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]);
- BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]);
- BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]);
-
for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) {
int size = pgtable_cache_size[i];
const char *name = pgtable_cache_name[i];
pgtable_cache[i] = kmem_cache_create(name,
size, size,
- SLAB_HWCACHE_ALIGN
- | SLAB_MUST_HWCACHE_ALIGN,
+ SLAB_HWCACHE_ALIGN |
+ SLAB_MUST_HWCACHE_ALIGN,
zero_ctor,
NULL);
if (! pgtable_cache[i])
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 117b00012e144b..7faa46b71f21ee 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -61,6 +61,9 @@ int init_bootmem_done;
int mem_init_done;
unsigned long memory_limit;
+extern void hash_preload(struct mm_struct *mm, unsigned long ea,
+ unsigned long access, unsigned long trap);
+
/*
* This is called by /dev/mem to know if a given address has to
* be mapped non-cacheable or not
@@ -493,18 +496,10 @@ EXPORT_SYMBOL(flush_icache_user_range);
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
pte_t pte)
{
- /* handle i-cache coherency */
- unsigned long pfn = pte_pfn(pte);
-#ifdef CONFIG_PPC32
- pmd_t *pmd;
-#else
- unsigned long vsid;
- void *pgdir;
- pte_t *ptep;
- int local = 0;
- cpumask_t tmp;
- unsigned long flags;
+#ifdef CONFIG_PPC_STD_MMU
+ unsigned long access = 0, trap;
#endif
+ unsigned long pfn = pte_pfn(pte);
/* handle i-cache coherency */
if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
@@ -535,30 +530,21 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
if (!pte_young(pte) || address >= TASK_SIZE)
return;
-#ifdef CONFIG_PPC32
- if (Hash == 0)
- return;
- pmd = pmd_offset(pgd_offset(vma->vm_mm, address), address);
- if (!pmd_none(*pmd))
- add_hash_page(vma->vm_mm->context, address, pmd_val(*pmd));
-#else
- pgdir = vma->vm_mm->pgd;
- if (pgdir == NULL)
- return;
- ptep = find_linux_pte(pgdir, address);
- if (!ptep)
+ /* We try to figure out if we are coming from an instruction
+ * access fault and pass that down to __hash_page so we avoid
+ * double-faulting on execution of fresh text. We have to test
+ * for regs NULL since init will get here first thing at boot
+ *
+ * We also avoid filling the hash if not coming from a fault
+ */
+ if (current->thread.regs == NULL)
return;
-
- vsid = get_vsid(vma->vm_mm->context.id, address);
-
- local_irq_save(flags);
- tmp = cpumask_of_cpu(smp_processor_id());
- if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp))
- local = 1;
-
- __hash_page(address, 0, vsid, ptep, 0x300, local);
- local_irq_restore(flags);
-#endif
-#endif
+ trap = TRAP(current->thread.regs);
+ if (trap == 0x400)
+ access |= _PAGE_EXEC;
+ else if (trap != 0x300)
+ return;
+ hash_preload(vma->vm_mm, address, access, trap);
+#endif /* CONFIG_PPC_STD_MMU */
}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index b79a7820613558..51b7869409715d 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -101,7 +101,6 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
- unsigned long vsid;
if (mem_init_done) {
pgdp = pgd_offset_k(ea);
@@ -117,28 +116,15 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
__pgprot(flags)));
} else {
- unsigned long va, vpn, hash, hpteg;
-
/*
* If the mm subsystem is not fully up, we cannot create a
* linux page table entry for this mapping. Simply bolt an
* entry in the hardware page table.
+ *
*/
- vsid = get_kernel_vsid(ea);
- va = (vsid << 28) | (ea & 0xFFFFFFF);
- vpn = va >> PAGE_SHIFT;
-
- hash = hpt_hash(vpn, 0);
-
- hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
-
- /* Panic if a pte grpup is full */
- if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT,
- HPTE_V_BOLTED,
- _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX)
- == -1) {
- panic("map_io_page: could not insert mapping");
- }
+ if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
+ mmu_virtual_psize))
+ panic("Can't map bolted IO mapping");
}
return 0;
}
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index cef9e83cc7e9e9..d137abd241ff09 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -179,6 +179,21 @@ void __init setbat(int index, unsigned long virt, unsigned long phys,
}
/*
+ * Preload a translation in the hash table
+ */
+void hash_preload(struct mm_struct *mm, unsigned long ea,
+ unsigned long access, unsigned long trap)
+{
+ pmd_t *pmd;
+
+ if (Hash == 0)
+ return;
+ pmd = pmd_offset(pgd_offset(vma->vm_mm, address), address);
+ if (!pmd_none(*pmd))
+ add_hash_page(vma->vm_mm->context, address, pmd_val(*pmd));
+}
+
+/*
* Initialize the hash table and patch the instructions in hashtable.S.
*/
void __init MMU_init_hw(void)
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 0473953f6a3771..60e852f2f8e59d 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -14,14 +14,32 @@
* 2 of the License, or (at your option) any later version.
*/
+#undef DEBUG
+
#include <linux/config.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/paca.h>
#include <asm/cputable.h>
+#include <asm/cacheflush.h>
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
-extern void slb_allocate(unsigned long ea);
+extern void slb_allocate_realmode(unsigned long ea);
+extern void slb_allocate_user(unsigned long ea);
+
+static void slb_allocate(unsigned long ea)
+{
+ /* Currently, we do real mode for all SLBs including user, but
+ * that will change if we bring back dynamic VSIDs
+ */
+ slb_allocate_realmode(ea);
+}
static inline unsigned long mk_esid_data(unsigned long ea, unsigned long slot)
{
@@ -46,13 +64,15 @@ static void slb_flush_and_rebolt(void)
{
/* If you change this make sure you change SLB_NUM_BOLTED
* appropriately too. */
- unsigned long ksp_flags = SLB_VSID_KERNEL;
+ unsigned long linear_llp, virtual_llp, lflags, vflags;
unsigned long ksp_esid_data;
WARN_ON(!irqs_disabled());
- if (cpu_has_feature(CPU_FTR_16M_PAGE))
- ksp_flags |= SLB_VSID_L;
+ linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
+ virtual_llp = mmu_psize_defs[mmu_virtual_psize].sllp;
+ lflags = SLB_VSID_KERNEL | linear_llp;
+ vflags = SLB_VSID_KERNEL | virtual_llp;
ksp_esid_data = mk_esid_data(get_paca()->kstack, 2);
if ((ksp_esid_data & ESID_MASK) == KERNELBASE)
@@ -67,9 +87,9 @@ static void slb_flush_and_rebolt(void)
/* Slot 2 - kernel stack */
"slbmte %2,%3\n"
"isync"
- :: "r"(mk_vsid_data(VMALLOCBASE, SLB_VSID_KERNEL)),
+ :: "r"(mk_vsid_data(VMALLOCBASE, vflags)),
"r"(mk_esid_data(VMALLOCBASE, 1)),
- "r"(mk_vsid_data(ksp_esid_data, ksp_flags)),
+ "r"(mk_vsid_data(ksp_esid_data, lflags)),
"r"(ksp_esid_data)
: "memory");
}
@@ -102,6 +122,9 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
get_paca()->slb_cache_ptr = 0;
get_paca()->context = mm->context;
+#ifdef CONFIG_PPC_64K_PAGES
+ get_paca()->pgdir = mm->pgd;
+#endif /* CONFIG_PPC_64K_PAGES */
/*
* preload some userspace segments into the SLB.
@@ -131,28 +154,77 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
slb_allocate(unmapped_base);
}
+static inline void patch_slb_encoding(unsigned int *insn_addr,
+ unsigned int immed)
+{
+ /* Assume the instruction had a "0" immediate value, just
+ * "or" in the new value
+ */
+ *insn_addr |= immed;
+ flush_icache_range((unsigned long)insn_addr, 4+
+ (unsigned long)insn_addr);
+}
+
void slb_initialize(void)
{
+ unsigned long linear_llp, virtual_llp;
+ static int slb_encoding_inited;
+ extern unsigned int *slb_miss_kernel_load_linear;
+ extern unsigned int *slb_miss_kernel_load_virtual;
+ extern unsigned int *slb_miss_user_load_normal;
+#ifdef CONFIG_HUGETLB_PAGE
+ extern unsigned int *slb_miss_user_load_huge;
+ unsigned long huge_llp;
+
+ huge_llp = mmu_psize_defs[mmu_huge_psize].sllp;
+#endif
+
+ /* Prepare our SLB miss handler based on our page size */
+ linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
+ virtual_llp = mmu_psize_defs[mmu_virtual_psize].sllp;
+ if (!slb_encoding_inited) {
+ slb_encoding_inited = 1;
+ patch_slb_encoding(slb_miss_kernel_load_linear,
+ SLB_VSID_KERNEL | linear_llp);
+ patch_slb_encoding(slb_miss_kernel_load_virtual,
+ SLB_VSID_KERNEL | virtual_llp);
+ patch_slb_encoding(slb_miss_user_load_normal,
+ SLB_VSID_USER | virtual_llp);
+
+ DBG("SLB: linear LLP = %04x\n", linear_llp);
+ DBG("SLB: virtual LLP = %04x\n", virtual_llp);
+#ifdef CONFIG_HUGETLB_PAGE
+ patch_slb_encoding(slb_miss_user_load_huge,
+ SLB_VSID_USER | huge_llp);
+ DBG("SLB: huge LLP = %04x\n", huge_llp);
+#endif
+ }
+
/* On iSeries the bolted entries have already been set up by
* the hypervisor from the lparMap data in head.S */
#ifndef CONFIG_PPC_ISERIES
- unsigned long flags = SLB_VSID_KERNEL;
+ {
+ unsigned long lflags, vflags;
- /* Invalidate the entire SLB (even slot 0) & all the ERATS */
- if (cpu_has_feature(CPU_FTR_16M_PAGE))
- flags |= SLB_VSID_L;
+ lflags = SLB_VSID_KERNEL | linear_llp;
+ vflags = SLB_VSID_KERNEL | virtual_llp;
- asm volatile("isync":::"memory");
- asm volatile("slbmte %0,%0"::"r" (0) : "memory");
+ /* Invalidate the entire SLB (even slot 0) & all the ERATS */
+ asm volatile("isync":::"memory");
+ asm volatile("slbmte %0,%0"::"r" (0) : "memory");
asm volatile("isync; slbia; isync":::"memory");
- create_slbe(KERNELBASE, flags, 0);
- create_slbe(VMALLOCBASE, SLB_VSID_KERNEL, 1);
+ create_slbe(KERNELBASE, lflags, 0);
+
+ /* VMALLOC space has 4K pages always for now */
+ create_slbe(VMALLOCBASE, vflags, 1);
+
/* We don't bolt the stack for the time being - we're in boot,
* so the stack is in the bolted segment. By the time it goes
* elsewhere, we'll call _switch() which will bolt in the new
* one. */
asm volatile("isync":::"memory");
-#endif
+ }
+#endif /* CONFIG_PPC_ISERIES */
get_paca()->stab_rr = SLB_NUM_BOLTED;
}
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index a3a03da503bcbd..3e18241b6f3521 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -18,61 +18,28 @@
#include <linux/config.h>
#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/cputable.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
-/* void slb_allocate(unsigned long ea);
+/* void slb_allocate_realmode(unsigned long ea);
*
* Create an SLB entry for the given EA (user or kernel).
* r3 = faulting address, r13 = PACA
* r9, r10, r11 are clobbered by this function
* No other registers are examined or changed.
*/
-_GLOBAL(slb_allocate)
- /*
- * First find a slot, round robin. Previously we tried to find
- * a free slot first but that took too long. Unfortunately we
- * dont have any LRU information to help us choose a slot.
- */
-#ifdef CONFIG_PPC_ISERIES
- /*
- * On iSeries, the "bolted" stack segment can be cast out on
- * shared processor switch so we need to check for a miss on
- * it and restore it to the right slot.
- */
- ld r9,PACAKSAVE(r13)
- clrrdi r9,r9,28
- clrrdi r11,r3,28
- li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */
- cmpld r9,r11
- beq 3f
-#endif /* CONFIG_PPC_ISERIES */
-
- ld r10,PACASTABRR(r13)
- addi r10,r10,1
- /* use a cpu feature mask if we ever change our slb size */
- cmpldi r10,SLB_NUM_ENTRIES
-
- blt+ 4f
- li r10,SLB_NUM_BOLTED
-
-4:
- std r10,PACASTABRR(r13)
-3:
- /* r3 = faulting address, r10 = entry */
+_GLOBAL(slb_allocate_realmode)
+ /* r3 = faulting address */
srdi r9,r3,60 /* get region */
- srdi r3,r3,28 /* get esid */
+ srdi r10,r3,28 /* get esid */
cmpldi cr7,r9,0xc /* cmp KERNELBASE for later use */
- rldimi r10,r3,28,0 /* r10= ESID<<28 | entry */
- oris r10,r10,SLB_ESID_V@h /* r10 |= SLB_ESID_V */
-
- /* r3 = esid, r10 = esid_data, cr7 = <>KERNELBASE */
-
+ /* r3 = address, r10 = esid, cr7 = <>KERNELBASE */
blt cr7,0f /* user or kernel? */
/* kernel address: proto-VSID = ESID */
@@ -81,43 +48,161 @@ _GLOBAL(slb_allocate)
* top segment. That's ok, the scramble below will translate
* it to VSID 0, which is reserved as a bad VSID - one which
* will never have any pages in it. */
- li r11,SLB_VSID_KERNEL
-BEGIN_FTR_SECTION
- bne cr7,9f
- li r11,(SLB_VSID_KERNEL|SLB_VSID_L)
-END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
- b 9f
-0: /* user address: proto-VSID = context<<15 | ESID */
- srdi. r9,r3,USER_ESID_BITS
+ /* Check if hitting the linear mapping of the vmalloc/ioremap
+ * kernel space
+ */
+ bne cr7,1f
+
+ /* Linear mapping encoding bits, the "li" instruction below will
+ * be patched by the kernel at boot
+ */
+_GLOBAL(slb_miss_kernel_load_linear)
+ li r11,0
+ b slb_finish_load
+
+1: /* vmalloc/ioremap mapping encoding bits, the "li" instruction below
+ * will be patched by the kernel at boot
+ */
+_GLOBAL(slb_miss_kernel_load_virtual)
+ li r11,0
+ b slb_finish_load
+
+
+0: /* user address: proto-VSID = context << 15 | ESID. First check
+ * if the address is within the boundaries of the user region
+ */
+ srdi. r9,r10,USER_ESID_BITS
bne- 8f /* invalid ea bits set */
+ /* Figure out if the segment contains huge pages */
#ifdef CONFIG_HUGETLB_PAGE
BEGIN_FTR_SECTION
+ b 1f
+END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE)
lhz r9,PACAHIGHHTLBAREAS(r13)
- srdi r11,r3,(HTLB_AREA_SHIFT-SID_SHIFT)
+ srdi r11,r10,(HTLB_AREA_SHIFT-SID_SHIFT)
srd r9,r9,r11
lhz r11,PACALOWHTLBAREAS(r13)
- srd r11,r11,r3
- or r9,r9,r11
-END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
+ srd r11,r11,r10
+ or. r9,r9,r11
+ beq 1f
+_GLOBAL(slb_miss_user_load_huge)
+ li r11,0
+ b 2f
+1:
#endif /* CONFIG_HUGETLB_PAGE */
- li r11,SLB_VSID_USER
+_GLOBAL(slb_miss_user_load_normal)
+ li r11,0
-#ifdef CONFIG_HUGETLB_PAGE
-BEGIN_FTR_SECTION
- rldimi r11,r9,8,55 /* shift masked bit into SLB_VSID_L */
-END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
-#endif /* CONFIG_HUGETLB_PAGE */
+2:
+ ld r9,PACACONTEXTID(r13)
+ rldimi r10,r9,USER_ESID_BITS,0
+ b slb_finish_load
+
+8: /* invalid EA */
+ li r10,0 /* BAD_VSID */
+ li r11,SLB_VSID_USER /* flags don't much matter */
+ b slb_finish_load
+
+#ifdef __DISABLED__
+
+/* void slb_allocate_user(unsigned long ea);
+ *
+ * Create an SLB entry for the given EA (user or kernel).
+ * r3 = faulting address, r13 = PACA
+ * r9, r10, r11 are clobbered by this function
+ * No other registers are examined or changed.
+ *
+ * It is called with translation enabled in order to be able to walk the
+ * page tables. This is not currently used.
+ */
+_GLOBAL(slb_allocate_user)
+ /* r3 = faulting address */
+ srdi r10,r3,28 /* get esid */
+
+ crset 4*cr7+lt /* set "user" flag for later */
+
+ /* check if we fit in the range covered by the pagetables*/
+ srdi. r9,r3,PGTABLE_EADDR_SIZE
+ crnot 4*cr0+eq,4*cr0+eq
+ beqlr
+ /* now we need to get to the page tables in order to get the page
+ * size encoding from the PMD. In the future, we'll be able to deal
+ * with 1T segments too by getting the encoding from the PGD instead
+ */
+ ld r9,PACAPGDIR(r13)
+ cmpldi cr0,r9,0
+ beqlr
+ rlwinm r11,r10,8,25,28
+ ldx r9,r9,r11 /* get pgd_t */
+ cmpldi cr0,r9,0
+ beqlr
+ rlwinm r11,r10,3,17,28
+ ldx r9,r9,r11 /* get pmd_t */
+ cmpldi cr0,r9,0
+ beqlr
+
+ /* build vsid flags */
+ andi. r11,r9,SLB_VSID_LLP
+ ori r11,r11,SLB_VSID_USER
+
+ /* get context to calculate proto-VSID */
ld r9,PACACONTEXTID(r13)
- rldimi r3,r9,USER_ESID_BITS,0
+ rldimi r10,r9,USER_ESID_BITS,0
+
+ /* fall through slb_finish_load */
+
+#endif /* __DISABLED__ */
-9: /* r3 = protovsid, r11 = flags, r10 = esid_data, cr7 = <>KERNELBASE */
- ASM_VSID_SCRAMBLE(r3,r9)
- rldimi r11,r3,SLB_VSID_SHIFT,16 /* combine VSID and flags */
+/*
+ * Finish loading of an SLB entry and return
+ *
+ * r3 = EA, r10 = proto-VSID, r11 = flags, clobbers r9, cr7 = <>KERNELBASE
+ */
+slb_finish_load:
+ ASM_VSID_SCRAMBLE(r10,r9)
+ rldimi r11,r10,SLB_VSID_SHIFT,16 /* combine VSID and flags */
+
+ /* r3 = EA, r11 = VSID data */
+ /*
+ * Find a slot, round robin. Previously we tried to find a
+ * free slot first but that took too long. Unfortunately we
+ * dont have any LRU information to help us choose a slot.
+ */
+#ifdef CONFIG_PPC_ISERIES
+ /*
+ * On iSeries, the "bolted" stack segment can be cast out on
+ * shared processor switch so we need to check for a miss on
+ * it and restore it to the right slot.
+ */
+ ld r9,PACAKSAVE(r13)
+ clrrdi r9,r9,28
+ clrrdi r3,r3,28
+ li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */
+ cmpld r9,r3
+ beq 3f
+#endif /* CONFIG_PPC_ISERIES */
+
+ ld r10,PACASTABRR(r13)
+ addi r10,r10,1
+ /* use a cpu feature mask if we ever change our slb size */
+ cmpldi r10,SLB_NUM_ENTRIES
+
+ blt+ 4f
+ li r10,SLB_NUM_BOLTED
+
+4:
+ std r10,PACASTABRR(r13)
+
+3:
+ rldimi r3,r10,0,36 /* r3= EA[0:35] | entry */
+ oris r10,r3,SLB_ESID_V@h /* r3 |= SLB_ESID_V */
+
+ /* r3 = ESID data, r11 = VSID data */
/*
* No need for an isync before or after this slbmte. The exception
@@ -125,7 +210,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
*/
slbmte r11,r10
- bgelr cr7 /* we're done for kernel addresses */
+ /* we're done for kernel addresses */
+ crclr 4*cr0+eq /* set result to "success" */
+ bgelr cr7
/* Update the slb cache */
lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */
@@ -143,9 +230,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
li r3,SLB_CACHE_ENTRIES+1
2:
sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */
+ crclr 4*cr0+eq /* set result to "success" */
blr
-8: /* invalid EA */
- li r3,0 /* BAD_VSID */
- li r11,SLB_VSID_USER /* flags don't much matter */
- b 9b
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c
index 1b83f002bf27f4..fa325dbf98fc2d 100644
--- a/arch/powerpc/mm/stab.c
+++ b/arch/powerpc/mm/stab.c
@@ -26,7 +26,6 @@ struct stab_entry {
unsigned long vsid_data;
};
-/* Both the segment table and SLB code uses the following cache */
#define NR_STAB_CACHE_ENTRIES 8
DEFINE_PER_CPU(long, stab_cache_ptr);
DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]);
@@ -186,7 +185,7 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
/* Never flush the first entry. */
ste += 1;
for (entry = 1;
- entry < (PAGE_SIZE / sizeof(struct stab_entry));
+ entry < (HW_PAGE_SIZE / sizeof(struct stab_entry));
entry++, ste++) {
unsigned long ea;
ea = ste->esid_data & ESID_MASK;
@@ -200,6 +199,10 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
__get_cpu_var(stab_cache_ptr) = 0;
+#ifdef CONFIG_PPC_64K_PAGES
+ get_paca()->pgdir = mm->pgd;
+#endif /* CONFIG_PPC_64K_PAGES */
+
/* Now preload some entries for the new task */
if (test_tsk_thread_flag(tsk, TIF_32BIT))
unmapped_base = TASK_UNMAPPED_BASE_USER32;
@@ -223,8 +226,6 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
asm volatile("sync" : : : "memory");
}
-extern void slb_initialize(void);
-
/*
* Allocate segment tables for secondary CPUs. These must all go in
* the first (bolted) segment, so that do_stab_bolted won't get a
@@ -243,18 +244,21 @@ void stabs_alloc(void)
if (cpu == 0)
continue; /* stab for CPU 0 is statically allocated */
- newstab = lmb_alloc_base(PAGE_SIZE, PAGE_SIZE, 1<<SID_SHIFT);
+ newstab = lmb_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE,
+ 1<<SID_SHIFT);
if (! newstab)
panic("Unable to allocate segment table for CPU %d.\n",
cpu);
newstab += KERNELBASE;
- memset((void *)newstab, 0, PAGE_SIZE);
+ memset((void *)newstab, 0, HW_PAGE_SIZE);
paca[cpu].stab_addr = newstab;
paca[cpu].stab_real = virt_to_abs(newstab);
- printk(KERN_DEBUG "Segment table for CPU %d at 0x%lx virtual, 0x%lx absolute\n", cpu, paca[cpu].stab_addr, paca[cpu].stab_real);
+ printk(KERN_DEBUG "Segment table for CPU %d at 0x%lx "
+ "virtual, 0x%lx absolute\n",
+ cpu, paca[cpu].stab_addr, paca[cpu].stab_real);
}
}
@@ -267,13 +271,9 @@ void stab_initialize(unsigned long stab)
{
unsigned long vsid = get_kernel_vsid(KERNELBASE);
- if (cpu_has_feature(CPU_FTR_SLB)) {
- slb_initialize();
- } else {
- asm volatile("isync; slbia; isync":::"memory");
- make_ste(stab, GET_ESID(KERNELBASE), vsid);
+ asm volatile("isync; slbia; isync":::"memory");
+ make_ste(stab, GET_ESID(KERNELBASE), vsid);
- /* Order update */
- asm volatile("sync":::"memory");
- }
+ /* Order update */
+ asm volatile("sync":::"memory");
}
diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c
index 09ab81a10f4f05..53e31b834ace00 100644
--- a/arch/powerpc/mm/tlb_64.c
+++ b/arch/powerpc/mm/tlb_64.c
@@ -21,6 +21,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
+
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/mm.h>
@@ -30,7 +31,7 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
-#include <linux/highmem.h>
+#include <asm/bug.h>
DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
@@ -126,28 +127,46 @@ void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
* (if we remove it we should clear the _PTE_HPTEFLAGS bits).
*/
void hpte_update(struct mm_struct *mm, unsigned long addr,
- unsigned long pte, int wrprot)
+ pte_t *ptep, unsigned long pte, int huge)
{
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
unsigned long vsid;
+ unsigned int psize = mmu_virtual_psize;
int i;
i = batch->index;
+ /* We mask the address for the base page size. Huge pages will
+ * have applied their own masking already
+ */
+ addr &= PAGE_MASK;
+
+ /* Get page size (maybe move back to caller) */
+ if (huge) {
+#ifdef CONFIG_HUGETLB_PAGE
+ psize = mmu_huge_psize;
+#else
+ BUG();
+#endif
+ }
+
/*
* This can happen when we are in the middle of a TLB batch and
* we encounter memory pressure (eg copy_page_range when it tries
* to allocate a new pte). If we have to reclaim memory and end
* up scanning and resetting referenced bits then our batch context
* will change mid stream.
+ *
+ * We also need to ensure only one page size is present in a given
+ * batch
*/
- if (i != 0 && (mm != batch->mm || batch->large != pte_huge(pte))) {
+ if (i != 0 && (mm != batch->mm || batch->psize != psize)) {
flush_tlb_pending();
i = 0;
}
if (i == 0) {
batch->mm = mm;
- batch->large = pte_huge(pte);
+ batch->psize = psize;
}
if (addr < KERNELBASE) {
vsid = get_vsid(mm->context.id, addr);
@@ -155,7 +174,7 @@ void hpte_update(struct mm_struct *mm, unsigned long addr,
} else
vsid = get_kernel_vsid(addr);
batch->vaddr[i] = (vsid << 28 ) | (addr & 0x0fffffff);
- batch->pte[i] = __pte(pte);
+ batch->pte[i] = __real_pte(__pte(pte), ptep);
batch->index = ++i;
if (i >= PPC64_TLB_BATCH_NR)
flush_tlb_pending();
@@ -177,7 +196,8 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
local = 1;
if (i == 1)
- flush_hash_page(batch->vaddr[0], batch->pte[0], local);
+ flush_hash_page(batch->vaddr[0], batch->pte[0],
+ batch->psize, local);
else
flush_hash_range(i, local);
batch->index = 0;