From: Anton Blanchard The current SLB handling code has a number of problems: - We loop trying to find an empty SLB entry before deciding to cast one out. On large working sets this really hurts since the SLB is always full and we end up looping through all 64 entries unnecessarily. - During castout we currently invalidate the entry we are replacing. This is to avoid a nasty race where the entry is in the ERAT but not the SLB and another cpu does a tlbie that removes the ERAT at a critical point. If this race is fixed the SLB can be removed. - The SLB prefault code doesnt work properly The following patch addresses all the above concerns and adds some more optimisations: - feature nop out some segment table only code - slb invalidate the kernel segment on context switch (avoids us having to slb invalidate at each cast out) - optimise flush on context switch, the lazy tlb stuff avoids it being called when going from userspace to kernel thread, but it gets called when going to kernel thread to userspace. In many cases we are returning to the same userspace task, we now check for this and avoid the flush - use the optimised POWER4 mtcrf where possible --- arch/ppc64/kernel/head.S | 157 ++++++++------ arch/ppc64/kernel/pacaData.c | 1 arch/ppc64/kernel/process.c | 24 ++ arch/ppc64/kernel/stab.c | 432 ++++++++++++++++++++++------------------ include/asm-ppc64/cputable.h | 9 include/asm-ppc64/mmu.h | 8 include/asm-ppc64/mmu_context.h | 11 - include/asm-ppc64/paca.h | 23 -- 8 files changed, 384 insertions(+), 281 deletions(-) diff -puN arch/ppc64/kernel/head.S~ppc64-slb_rewrite arch/ppc64/kernel/head.S --- 25/arch/ppc64/kernel/head.S~ppc64-slb_rewrite 2004-01-26 19:44:52.000000000 -0800 +++ 25-akpm/arch/ppc64/kernel/head.S 2004-01-26 19:44:52.000000000 -0800 @@ -646,12 +646,14 @@ fast_exception_return: */ .globl DataAccess_common DataAccess_common: +BEGIN_FTR_SECTION mfspr r22,DAR srdi r22,r22,60 cmpi 0,r22,0xc /* Segment fault on a bolted segment. Go off and map that segment. */ beq- .do_stab_bolted +END_FTR_SECTION_IFCLR(CPU_FTR_SLB) stab_bolted_user_return: EXCEPTION_PROLOG_COMMON ld r3,_DSISR(r1) @@ -661,10 +663,12 @@ stab_bolted_user_return: rlwinm r4,r3,32-23,29,29 /* DSISR_STORE -> _PAGE_RW */ ld r3,_DAR(r1) /* into the hash table */ +BEGIN_FTR_SECTION beq+ 2f /* If so handle it */ li r4,0x300 /* Trap number */ bl .do_stab_SI b 1f +END_FTR_SECTION_IFCLR(CPU_FTR_SLB) 2: li r5,0x300 bl .do_hash_page_DSI /* Try to handle as hpte fault */ @@ -690,7 +694,7 @@ DataAccessSLB_common: EXCEPTION_PROLOG_COMMON ld r3,_DAR(r1) li r4,0x380 /* Exception vector */ - bl .ste_allocate + bl .slb_allocate or. r3,r3,r3 /* Check return code */ beq fast_exception_return /* Return if we succeeded */ addi r3,r1,STACK_FRAME_OVERHEAD @@ -705,12 +709,14 @@ DataAccessSLB_common: InstructionAccess_common: EXCEPTION_PROLOG_COMMON +BEGIN_FTR_SECTION andis. r0,r23,0x0020 /* no ste found? */ beq+ 2f mr r3,r22 /* SRR0 at interrupt */ li r4,0x400 /* Trap number */ bl .do_stab_SI b 1f +END_FTR_SECTION_IFCLR(CPU_FTR_SLB) 2: mr r3,r22 li r5,0x400 @@ -730,7 +736,7 @@ InstructionAccessSLB_common: EXCEPTION_PROLOG_COMMON mr r3,r22 /* SRR0 = NIA */ li r4,0x480 /* Exception vector */ - bl .ste_allocate + bl .slb_allocate or. r3,r3,r3 /* Check return code */ beq+ fast_exception_return /* Return if we succeeded */ @@ -1006,48 +1012,27 @@ _GLOBAL(do_stab_bolted) * r20 - r23, SRR0 and SRR1 are saved in the exception frame. * We assume we aren't going to take any exceptions during this procedure. */ +/* XXX note fix masking in get_kernel_vsid to match */ _GLOBAL(do_slb_bolted) - stw r23,EX_CCR(r21) /* save CR in exc. frame */ + stw r23,EX_CCR(r21) /* save CR in exc. frame */ - /* (((ea >> 28) & 0x1fff) << 15) | (ea >> 60) */ - mfspr r21,DAR - rldicl r20,r21,36,32 /* Permits a full 32b of ESID */ - rldicr r20,r20,15,48 - rldicl r21,r21,4,60 - or r20,r20,r21 - - li r21,9 /* VSID_RANDOMIZER */ - sldi r21,r21,32 - oris r21,r21,58231 - ori r21,r21,39831 - - mulld r20,r20,r21 - clrldi r20,r20,28 /* r20 = vsid */ - - /* Search the SLB for a free entry */ - li r22,1 -1: - slbmfee r23,r22 - rldicl r23,r23,37,63 - cmpwi r23,0 - beq 4f /* Found an invalid entry */ - - addi r22,r22,1 - cmpldi r22,64 - blt 1b + /* + * We take the next entry, round robin. Previously we tried + * to find a free slot first but that took too long. Unfortunately + * we dont have any LRU information to help us choose a slot. + */ - /* No free entry - just take the next entry, round-robin */ - /* XXX we should get the number of SLB entries from the naca */ + /* r20 = paca */ + /* use a cpu feature mask if we ever change our slb size */ SLB_NUM_ENTRIES = 64 -2: mfspr r21,SPRG3 - ld r22,PACASTABRR(r21) - addi r23,r22,1 - cmpdi r23,SLB_NUM_ENTRIES - blt 3f - li r23,1 -3: std r23,PACASTABRR(r21) +1: ld r22,PACASTABRR(r20) + addi r21,r22,1 + cmpdi r21,SLB_NUM_ENTRIES + blt+ 2f + li r21,1 /* dont touch bolted slot 0 */ +2: std r21,PACASTABRR(r20) - /* r20 = vsid, r22 = entry */ + /* r20 = paca, r22 = entry */ /* * Never cast out the segment for our kernel stack. Since we @@ -1056,48 +1041,86 @@ SLB_NUM_ENTRIES = 64 * which gets invalidated due to a tlbie from another cpu at a * non recoverable point (after setting srr0/1) - Anton */ - slbmfee r23,r22 - srdi r23,r23,28 + slbmfee r21,r22 + srdi r21,r21,27 /* * This is incorrect (r1 is not the kernel stack) if we entered * from userspace but there is no critical window from userspace * so this should be OK. Also if we cast out the userspace stack * segment while in userspace we will fault it straight back in. */ - srdi r21,r1,28 - cmpd r21,r23 - beq- 2b - - /* Put together the vsid portion of the entry. */ -4: li r21,0 - rldimi r21,r20,12,0 - ori r20,r21,1024 - ori r20,r20,128 /* set class bit for kernel region */ -#ifndef CONFIG_PPC_ISERIES - ori r20,r20,256 /* map kernel region with large ptes */ -#endif + srdi r23,r1,27 + ori r23,r23,1 + cmpd r23,r21 + beq- 1b + + /* r20 = paca, r22 = entry */ + + /* (((ea >> 28) & 0x1fff) << 15) | (ea >> 60) */ + mfspr r21,DAR + rldicl r23,r21,36,51 + sldi r23,r23,15 + srdi r21,r21,60 + or r23,r23,r21 + + /* VSID_RANDOMIZER */ + li r21,9 + sldi r21,r21,32 + oris r21,r21,58231 + ori r21,r21,39831 + + /* vsid = (ordinal * VSID_RANDOMIZER) & VSID_MASK */ + mulld r23,r23,r21 + clrldi r23,r23,28 + + /* r20 = paca, r22 = entry, r23 = vsid */ + + /* Put together slb word1 */ + sldi r23,r23,12 + +BEGIN_FTR_SECTION + /* set kp and c bits */ + ori r23,r23,0x480 +END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE) +BEGIN_FTR_SECTION + /* set kp, l and c bits */ + ori r23,r23,0x580 +END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) + + /* r20 = paca, r22 = entry, r23 = slb word1 */ - /* Put together the esid portion of the entry. */ - mfspr r21,DAR /* Get the new esid */ - rldicl r21,r21,36,28 /* Permits a full 36b of ESID */ - li r23,0 - rldimi r23,r21,28,0 /* Insert esid */ - oris r21,r23,2048 /* valid bit */ - rldimi r21,r22,0,52 /* Insert entry */ + /* Put together slb word0 */ + mfspr r21,DAR + rldicr r21,r21,0,35 /* get the new esid */ + oris r21,r21,2048 /* set valid bit */ + rldimi r21,r22,0,52 /* insert entry */ + + /* r20 = paca, r21 = slb word0, r23 = slb word1 */ /* * No need for an isync before or after this slbmte. The exception * we enter with and the rfid we exit with are context synchronizing . */ - slbmte r20,r21 + slbmte r23,r21 /* All done -- return from exception. */ - mfsprg r20,3 /* Load the PACA pointer */ - ld r21,PACAEXCSP(r20) /* Get the exception frame pointer */ - addi r21,r21,EXC_FRAME_SIZE + ld r21,PACAEXCSP(r20) /* Get the exception frame pointer */ + addi r21,r21,EXC_FRAME_SIZE lwz r23,EX_CCR(r21) /* get saved CR */ /* note that this is almost identical to maskable_exception_exit */ - mtcr r23 /* restore CR */ + + /* + * Until everyone updates binutils hardwire the POWER4 optimised + * single field mtcrf + */ +#if 0 + .machine push + .machine "power4" + mtcrf 0x80,r23 + .machine pop +#else + .long 0x7ef80120 +#endif mfmsr r22 li r23, MSR_RI @@ -1107,10 +1130,10 @@ SLB_NUM_ENTRIES = 64 ld r22,EX_SRR0(r21) /* Get SRR0 from exc. frame */ ld r23,EX_SRR1(r21) /* Get SRR1 from exc. frame */ mtspr SRR0,r22 - mtspr SRR1,r23 + mtspr SRR1,r23 ld r22,EX_R22(r21) /* restore r22 and r23 */ ld r23,EX_R23(r21) - mfspr r20,SPRG2 + ld r20,EX_R20(r21) mfspr r21,SPRG1 rfid diff -puN arch/ppc64/kernel/pacaData.c~ppc64-slb_rewrite arch/ppc64/kernel/pacaData.c --- 25/arch/ppc64/kernel/pacaData.c~ppc64-slb_rewrite 2004-01-26 19:44:52.000000000 -0800 +++ 25-akpm/arch/ppc64/kernel/pacaData.c 2004-01-26 19:44:52.000000000 -0800 @@ -41,7 +41,6 @@ struct systemcfg *systemcfg; .xStab_data = { \ .real = (asrr), /* Real pointer to segment table */ \ .virt = (asrv), /* Virt pointer to segment table */ \ - .next_round_robin = 1 /* Round robin index */ \ }, \ .lpQueuePtr = (lpq), /* &xItLpQueue, */ \ /* .xRtas = { \ diff -puN arch/ppc64/kernel/process.c~ppc64-slb_rewrite arch/ppc64/kernel/process.c --- 25/arch/ppc64/kernel/process.c~ppc64-slb_rewrite 2004-01-26 19:44:52.000000000 -0800 +++ 25-akpm/arch/ppc64/kernel/process.c 2004-01-26 19:44:52.000000000 -0800 @@ -151,7 +151,31 @@ struct task_struct *__switch_to(struct t local_irq_save(flags); last = _switch(old_thread, new_thread); + + /* + * force our kernel stack out of the ERAT and SLB, this is to + * avoid the race where we it hangs around in the ERAT but not the + * SLB and the ERAT gets invalidated at just the wrong moment by + * another CPU doing a tlbie. + * + * We definitely dont want to flush our bolted segment, so check + * for that first. + */ + if ((cur_cpu_spec->cpu_features & CPU_FTR_SLB) && + GET_ESID((unsigned long)_get_SP()) != GET_ESID(PAGE_OFFSET)) { + union { + unsigned long word0; + slb_dword0 data; + } esid_data; + + esid_data.word0 = 0; + /* class bit is in valid field for slbie instruction */ + esid_data.data.v = 1; + esid_data.data.esid = GET_ESID((unsigned long)_get_SP()); + asm volatile("isync; slbie %0; isync" : : "r" (esid_data)); + } local_irq_restore(flags); + return last; } diff -puN arch/ppc64/kernel/stab.c~ppc64-slb_rewrite arch/ppc64/kernel/stab.c --- 25/arch/ppc64/kernel/stab.c~ppc64-slb_rewrite 2004-01-26 19:44:52.000000000 -0800 +++ 25-akpm/arch/ppc64/kernel/stab.c 2004-01-26 19:44:52.000000000 -0800 @@ -12,8 +12,6 @@ * 2 of the License, or (at your option) any later version. */ -/* XXX Note: Changes for bolted region have not been merged - Anton */ - #include #include #include @@ -59,6 +57,15 @@ void stab_initialize(unsigned long stab) } } +/* Both the segment table and SLB code uses the following cache */ +#define NR_STAB_CACHE_ENTRIES 8 +DEFINE_PER_CPU(long, stab_cache_ptr); +DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]); + +/* + * Segment table stuff + */ + /* * Create a segment table entry for the given esid/vsid pair. */ @@ -91,14 +98,8 @@ int make_ste(unsigned long stab, unsigne /* * Could not find empty entry, pick one with a round robin selection. - * Search all entries in the two groups. Note that the first time - * we get here, we start with entry 1 so the initializer - * can be common with the SLB castout code. + * Search all entries in the two groups. */ - - /* This assumes we never castout when initializing the stab. */ - PMC_SW_PROCESSOR(stab_capacity_castouts); - castout_entry = get_paca()->xStab_data.next_round_robin; for (i = 0; i < 16; i++) { if (castout_entry < 8) { @@ -123,23 +124,169 @@ int make_ste(unsigned long stab, unsigne /* Modify the old entry to the new value. */ /* Force previous translations to complete. DRENG */ - asm volatile("isync" : : : "memory" ); + asm volatile("isync" : : : "memory"); castout_ste->dw0.dw0.v = 0; - asm volatile("sync" : : : "memory" ); /* Order update */ + asm volatile("sync" : : : "memory"); /* Order update */ castout_ste->dw1.dw1.vsid = vsid; old_esid = castout_ste->dw0.dw0.esid; castout_ste->dw0.dw0.esid = esid; castout_ste->dw0.dw0.kp = 1; - asm volatile("eieio" : : : "memory" ); /* Order update */ + asm volatile("eieio" : : : "memory"); /* Order update */ castout_ste->dw0.dw0.v = 1; asm volatile("slbie %0" : : "r" (old_esid << SID_SHIFT)); /* Ensure completion of slbie */ - asm volatile("sync" : : : "memory" ); + asm volatile("sync" : : : "memory"); return (global_entry | (castout_entry & 0x7)); } +static inline void __ste_allocate(unsigned long esid, unsigned long vsid, + mm_context_t context) +{ + unsigned char stab_entry; + unsigned long *offset; + int region_id = REGION_ID(esid << SID_SHIFT); + + stab_entry = make_ste(get_paca()->xStab_data.virt, esid, vsid); + + if (region_id != USER_REGION_ID) + return; + + offset = &__get_cpu_var(stab_cache_ptr); + if (*offset < NR_STAB_CACHE_ENTRIES) { + __get_cpu_var(stab_cache[*offset]) = stab_entry; + } + (*offset)++; +} + +/* + * Allocate a segment table entry for the given ea. + */ +int ste_allocate(unsigned long ea) +{ + unsigned long vsid, esid; + mm_context_t context; + + /* Check for invalid effective addresses. */ + if (!IS_VALID_EA(ea)) + return 1; + + /* Kernel or user address? */ + if (REGION_ID(ea) >= KERNEL_REGION_ID) { + vsid = get_kernel_vsid(ea); + context = REGION_ID(ea); + } else { + if (!current->mm) + return 1; + + context = current->mm->context; + vsid = get_vsid(context, ea); + } + + esid = GET_ESID(ea); + __ste_allocate(esid, vsid, context); + /* Order update */ + asm volatile("sync":::"memory"); + + return 0; +} + +/* + * preload some userspace segments into the segment table. + */ +static void preload_stab(struct task_struct *tsk, struct mm_struct *mm) +{ + unsigned long pc = KSTK_EIP(tsk); + unsigned long stack = KSTK_ESP(tsk); + unsigned long unmapped_base; + unsigned long pc_esid = GET_ESID(pc); + unsigned long stack_esid = GET_ESID(stack); + unsigned long unmapped_base_esid; + unsigned long vsid; + + if (test_tsk_thread_flag(tsk, TIF_32BIT)) + unmapped_base = TASK_UNMAPPED_BASE_USER32; + else + unmapped_base = TASK_UNMAPPED_BASE_USER64; + + unmapped_base_esid = GET_ESID(unmapped_base); + + if (!IS_VALID_EA(pc) || (REGION_ID(pc) >= KERNEL_REGION_ID)) + return; + vsid = get_vsid(mm->context, pc); + __ste_allocate(pc_esid, vsid, mm->context); + + if (pc_esid == stack_esid) + return; + + if (!IS_VALID_EA(stack) || (REGION_ID(stack) >= KERNEL_REGION_ID)) + return; + vsid = get_vsid(mm->context, stack); + __ste_allocate(stack_esid, vsid, mm->context); + + if (pc_esid == unmapped_base_esid || stack_esid == unmapped_base_esid) + return; + + if (!IS_VALID_EA(unmapped_base) || + (REGION_ID(unmapped_base) >= KERNEL_REGION_ID)) + return; + vsid = get_vsid(mm->context, unmapped_base); + __ste_allocate(unmapped_base_esid, vsid, mm->context); + + /* Order update */ + asm volatile("sync" : : : "memory"); +} + +/* Flush all user entries from the segment table of the current processor. */ +void flush_stab(struct task_struct *tsk, struct mm_struct *mm) +{ + STE *stab = (STE *) get_paca()->xStab_data.virt; + STE *ste; + unsigned long *offset = &__get_cpu_var(stab_cache_ptr); + + /* Force previous translations to complete. DRENG */ + asm volatile("isync" : : : "memory"); + + if (*offset <= NR_STAB_CACHE_ENTRIES) { + int i; + + for (i = 0; i < *offset; i++) { + ste = stab + __get_cpu_var(stab_cache[i]); + ste->dw0.dw0.v = 0; + } + + asm volatile("sync; slbia; sync":::"memory"); + } else { + unsigned long entry; + + /* Invalidate all entries. */ + ste = stab; + + /* Never flush the first entry. */ + ste += 1; + for (entry = 1; + entry < (PAGE_SIZE / sizeof(STE)); + entry++, ste++) { + unsigned long ea; + ea = ste->dw0.dw0.esid << SID_SHIFT; + if (ea < KERNELBASE) { + ste->dw0.dw0.v = 0; + } + } + + asm volatile("sync; slbia; sync":::"memory"); + } + + *offset = 0; + + preload_stab(tsk, mm); +} + +/* + * SLB stuff + */ + /* * Create a segment buffer entry for the given esid/vsid pair. * @@ -160,22 +307,11 @@ void make_slbe(unsigned long esid, unsig } vsid_data; /* - * Find an empty entry, if one exists. Must start at 0 because - * we use this code to load SLB entry 0 at boot. - */ - for (entry = 0; entry < naca->slb_size; entry++) { - asm volatile("slbmfee %0,%1" - : "=r" (esid_data) : "r" (entry)); - if (!esid_data.data.v) - goto write_entry; - } - - /* - * Could not find empty entry, pick one with a round robin selection. + * We take the next entry, round robin. Previously we tried + * to find a free slot first but that took too long. Unfortunately + * we dont have any LRU information to help us choose a slot. */ - PMC_SW_PROCESSOR(stab_capacity_castouts); - /* * Never cast out the segment for our kernel stack. Since we * dont invalidate the ERAT we could have a valid translation @@ -190,13 +326,13 @@ void make_slbe(unsigned long esid, unsig if (castout_entry >= naca->slb_size) castout_entry = 1; asm volatile("slbmfee %0,%1" : "=r" (esid_data) : "r" (entry)); - } while (esid_data.data.esid == GET_ESID((unsigned long)_get_SP())); + } while (esid_data.data.v && + esid_data.data.esid == GET_ESID((unsigned long)_get_SP())); get_paca()->xStab_data.next_round_robin = castout_entry; /* slbie not needed as the previous mapping is still valid. */ -write_entry: /* * Write the new SLB entry. */ @@ -220,211 +356,129 @@ write_entry: asm volatile("slbmte %0,%1" : : "r" (vsid_data), "r" (esid_data)); } -static inline void __ste_allocate(unsigned long esid, unsigned long vsid, - int kernel_segment, mm_context_t context) +static inline void __slb_allocate(unsigned long esid, unsigned long vsid, + mm_context_t context) { - if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) { - int large = 0; + int large = 0; + int region_id = REGION_ID(esid << SID_SHIFT); + unsigned long *offset; -#ifndef CONFIG_PPC_ISERIES - if (REGION_ID(esid << SID_SHIFT) == KERNEL_REGION_ID) + if (cur_cpu_spec->cpu_features & CPU_FTR_16M_PAGE) { + if (region_id == KERNEL_REGION_ID) large = 1; - else if (REGION_ID(esid << SID_SHIFT) == USER_REGION_ID) + else if (region_id == USER_REGION_ID) large = in_hugepage_area(context, esid << SID_SHIFT); -#endif - make_slbe(esid, vsid, large, kernel_segment); - } else { - unsigned char top_entry, stab_entry, *segments; + } - stab_entry = make_ste(get_paca()->xStab_data.virt, esid, vsid); - PMC_SW_PROCESSOR_A(stab_entry_use, stab_entry & 0xf); + make_slbe(esid, vsid, large, region_id != USER_REGION_ID); - segments = get_paca()->xSegments; - top_entry = get_paca()->stab_cache_pointer; - if (!kernel_segment && top_entry < STAB_CACHE_SIZE) { - segments[top_entry] = stab_entry; - if (top_entry == STAB_CACHE_SIZE) - top_entry = 0xff; - top_entry++; - get_paca()->stab_cache_pointer = top_entry; - } + if (region_id != USER_REGION_ID) + return; + + offset = &__get_cpu_var(stab_cache_ptr); + if (*offset < NR_STAB_CACHE_ENTRIES) { + __get_cpu_var(stab_cache[*offset]) = esid; } + (*offset)++; } /* * Allocate a segment table entry for the given ea. */ -int ste_allocate(unsigned long ea) +int slb_allocate(unsigned long ea) { unsigned long vsid, esid; - int kernel_segment = 0; mm_context_t context; - PMC_SW_PROCESSOR(stab_faults); - /* Check for invalid effective addresses. */ - if (!IS_VALID_EA(ea)) + if (unlikely(!IS_VALID_EA(ea))) return 1; /* Kernel or user address? */ if (REGION_ID(ea) >= KERNEL_REGION_ID) { - kernel_segment = 1; - vsid = get_kernel_vsid(ea); context = REGION_ID(ea); + vsid = get_kernel_vsid(ea); } else { - if (! current->mm) + if (unlikely(!current->mm)) return 1; context = current->mm->context; - vsid = get_vsid(context, ea); } esid = GET_ESID(ea); - __ste_allocate(esid, vsid, kernel_segment, context); - if (!(cur_cpu_spec->cpu_features & CPU_FTR_SLB)) { - /* Order update */ - asm volatile("sync":::"memory"); - } + __slb_allocate(esid, vsid, context); return 0; } -unsigned long ppc64_preload_all_segments; -unsigned long ppc64_stab_preload = 1; -#define STAB_PRESSURE 0 -#define USE_SLBIE_ON_STAB 0 - /* - * preload all 16 segments for a 32 bit process and the PC and SP segments - * for a 64 bit process. + * preload some userspace segments into the SLB. */ -static void preload_stab(struct task_struct *tsk, struct mm_struct *mm) +static void preload_slb(struct task_struct *tsk, struct mm_struct *mm) { - if (ppc64_preload_all_segments && - test_tsk_thread_flag(tsk, TIF_32BIT)) { - unsigned long esid, vsid; - - for (esid = 0; esid < 16; esid++) { - unsigned long ea = esid << SID_SHIFT; - vsid = get_vsid(mm->context, ea); - __ste_allocate(esid, vsid, 0, mm->context); - } - } else { - unsigned long pc = KSTK_EIP(tsk); - unsigned long stack = KSTK_ESP(tsk); - unsigned long pc_segment = pc & ~SID_MASK; - unsigned long stack_segment = stack & ~SID_MASK; - unsigned long vsid; - - if (pc) { - if (!IS_VALID_EA(pc) || - (REGION_ID(pc) >= KERNEL_REGION_ID)) - return; - vsid = get_vsid(mm->context, pc); - __ste_allocate(GET_ESID(pc), vsid, 0, mm->context); - } - - if (stack && (pc_segment != stack_segment)) { - if (!IS_VALID_EA(stack) || - (REGION_ID(stack) >= KERNEL_REGION_ID)) - return; - vsid = get_vsid(mm->context, stack); - __ste_allocate(GET_ESID(stack), vsid, 0, mm->context); - } - } - - if (!(cur_cpu_spec->cpu_features & CPU_FTR_SLB)) { - /* Order update */ - asm volatile("sync" : : : "memory"); - } + unsigned long pc = KSTK_EIP(tsk); + unsigned long stack = KSTK_ESP(tsk); + unsigned long unmapped_base; + unsigned long pc_esid = GET_ESID(pc); + unsigned long stack_esid = GET_ESID(stack); + unsigned long unmapped_base_esid; + unsigned long vsid; + + if (test_tsk_thread_flag(tsk, TIF_32BIT)) + unmapped_base = TASK_UNMAPPED_BASE_USER32; + else + unmapped_base = TASK_UNMAPPED_BASE_USER64; + + unmapped_base_esid = GET_ESID(unmapped_base); + + if (!IS_VALID_EA(pc) || (REGION_ID(pc) >= KERNEL_REGION_ID)) + return; + vsid = get_vsid(mm->context, pc); + __slb_allocate(pc_esid, vsid, mm->context); + + if (pc_esid == stack_esid) + return; + + if (!IS_VALID_EA(stack) || (REGION_ID(stack) >= KERNEL_REGION_ID)) + return; + vsid = get_vsid(mm->context, stack); + __slb_allocate(stack_esid, vsid, mm->context); + + if (pc_esid == unmapped_base_esid || stack_esid == unmapped_base_esid) + return; + + if (!IS_VALID_EA(unmapped_base) || + (REGION_ID(unmapped_base) >= KERNEL_REGION_ID)) + return; + vsid = get_vsid(mm->context, unmapped_base); + __slb_allocate(unmapped_base_esid, vsid, mm->context); } /* Flush all user entries from the segment table of the current processor. */ -void flush_stab(struct task_struct *tsk, struct mm_struct *mm) +void flush_slb(struct task_struct *tsk, struct mm_struct *mm) { - if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) { - /* - * XXX disable 32bit slb invalidate optimisation until we fix - * the issue where a 32bit app execed out of a 64bit app can - * cause segments above 4GB not to be flushed - Anton - */ - if (0 && !STAB_PRESSURE && test_thread_flag(TIF_32BIT)) { - union { - unsigned long word0; - slb_dword0 data; - } esid_data; - unsigned long esid; - - asm volatile("isync" : : : "memory"); - for (esid = 0; esid < 16; esid++) { - esid_data.word0 = 0; - esid_data.data.esid = esid; - asm volatile("slbie %0" : : "r" (esid_data)); - } - asm volatile("isync" : : : "memory"); - } else { - asm volatile("isync; slbia; isync":::"memory"); - } + unsigned long *offset = &__get_cpu_var(stab_cache_ptr); - PMC_SW_PROCESSOR(stab_invalidations); - } else { - STE *stab = (STE *) get_paca()->xStab_data.virt; - STE *ste; - unsigned long flags; + if (*offset <= NR_STAB_CACHE_ENTRIES) { + int i; + union { + unsigned long word0; + slb_dword0 data; + } esid_data; - /* Force previous translations to complete. DRENG */ asm volatile("isync" : : : "memory"); - - local_irq_save(flags); - if (get_paca()->stab_cache_pointer != 0xff && !STAB_PRESSURE) { - int i; - unsigned char *segments = get_paca()->xSegments; - - for (i = 0; i < get_paca()->stab_cache_pointer; i++) { - ste = stab + segments[i]; - ste->dw0.dw0.v = 0; - PMC_SW_PROCESSOR(stab_invalidations); - } - -#if USE_SLBIE_ON_STAB - asm volatile("sync":::"memory"); - for (i = 0; i < get_paca()->stab_cache_pointer; i++) { - ste = stab + segments[i]; - asm volatile("slbie %0" : : - "r" (ste->dw0.dw0.esid << SID_SHIFT)); - } - asm volatile("sync":::"memory"); -#else - asm volatile("sync; slbia; sync":::"memory"); -#endif - - } else { - unsigned long entry; - - /* Invalidate all entries. */ - ste = stab; - - /* Never flush the first entry. */ - ste += 1; - for (entry = 1; - entry < (PAGE_SIZE / sizeof(STE)); - entry++, ste++) { - unsigned long ea; - ea = ste->dw0.dw0.esid << SID_SHIFT; - if (STAB_PRESSURE || ea < KERNELBASE) { - ste->dw0.dw0.v = 0; - PMC_SW_PROCESSOR(stab_invalidations); - } - } - - asm volatile("sync; slbia; sync":::"memory"); + for (i = 0; i < *offset; i++) { + esid_data.word0 = 0; + esid_data.data.esid = __get_cpu_var(stab_cache[i]); + asm volatile("slbie %0" : : "r" (esid_data)); } - - get_paca()->stab_cache_pointer = 0; - local_irq_restore(flags); + asm volatile("isync" : : : "memory"); + } else { + asm volatile("isync; slbia; isync" : : : "memory"); } - if (ppc64_stab_preload) - preload_stab(tsk, mm); + *offset = 0; + + preload_slb(tsk, mm); } diff -puN include/asm-ppc64/cputable.h~ppc64-slb_rewrite include/asm-ppc64/cputable.h --- 25/include/asm-ppc64/cputable.h~ppc64-slb_rewrite 2004-01-26 19:44:52.000000000 -0800 +++ 25-akpm/include/asm-ppc64/cputable.h 2004-01-26 19:44:52.000000000 -0800 @@ -135,10 +135,17 @@ extern firmware_feature_t firmware_featu #define COMMON_USER_PPC64 (PPC_FEATURE_32 | PPC_FEATURE_64 | \ PPC_FEATURE_HAS_FPU | PPC_FEATURE_HAS_MMU) -#define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_SLB | CPU_FTR_16M_PAGE | \ +#define CPU_FTR_PPCAS_ARCH_V2_BASE (CPU_FTR_SLB | \ CPU_FTR_TLBIEL | CPU_FTR_NOEXECUTE | \ CPU_FTR_NODSISRALIGN) +/* iSeries doesn't support large pages */ +#ifdef CONFIG_PPC_ISERIES +#define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_PPCAS_ARCH_V2_BASE) +#else +#define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_PPCAS_ARCH_V2_BASE | CPU_FTR_16M_PAGE) +#endif + #define COMMON_PPC64_FW (0) #endif diff -puN include/asm-ppc64/mmu_context.h~ppc64-slb_rewrite include/asm-ppc64/mmu_context.h --- 25/include/asm-ppc64/mmu_context.h~ppc64-slb_rewrite 2004-01-26 19:44:52.000000000 -0800 +++ 25-akpm/include/asm-ppc64/mmu_context.h 2004-01-26 19:44:52.000000000 -0800 @@ -139,6 +139,7 @@ destroy_context(struct mm_struct *mm) } extern void flush_stab(struct task_struct *tsk, struct mm_struct *mm); +extern void flush_slb(struct task_struct *tsk, struct mm_struct *mm); /* * switch_mm is the entry point called from the architecture independent @@ -154,7 +155,15 @@ static inline void switch_mm(struct mm_s END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) : : ); #endif /* CONFIG_ALTIVEC */ - flush_stab(tsk, next); + + /* No need to flush userspace segments if the mm doesnt change */ + if (prev == next) + return; + + if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) + flush_slb(tsk, next); + else + flush_stab(tsk, next); cpu_set(smp_processor_id(), next->cpu_vm_mask); } diff -puN include/asm-ppc64/mmu.h~ppc64-slb_rewrite include/asm-ppc64/mmu.h --- 25/include/asm-ppc64/mmu.h~ppc64-slb_rewrite 2004-01-26 19:44:52.000000000 -0800 +++ 25-akpm/include/asm-ppc64/mmu.h 2004-01-26 19:44:52.000000000 -0800 @@ -28,14 +28,6 @@ typedef unsigned long mm_context_t; #endif /* - * Define the size of the cache used for segment table entries. The first - * entry is used as a cache pointer, therefore the actual number of entries - * stored is one less than defined here. Do not change this value without - * considering the impact it will have on the layout of the paca in paca.h. - */ -#define STAB_CACHE_SIZE 16 - -/* * Hardware Segment Lookaside Buffer Entry * This structure has been padded out to two 64b doublewords (actual SLBE's are * 94 bits). This padding facilites use by the segment management diff -puN include/asm-ppc64/paca.h~ppc64-slb_rewrite include/asm-ppc64/paca.h --- 25/include/asm-ppc64/paca.h~ppc64-slb_rewrite 2004-01-26 19:44:52.000000000 -0800 +++ 25-akpm/include/asm-ppc64/paca.h 2004-01-26 19:44:52.000000000 -0800 @@ -63,20 +63,15 @@ struct paca_struct { u16 xPacaIndex; /* Logical processor number 0x18 */ u16 xHwProcNum; /* Physical processor number 0x1A */ u32 default_decr; /* Default decrementer value 0x1c */ - u64 unused1; - u64 xKsave; /* Saved Kernel stack addr or zero 0x28 */ - u64 pvr; /* Processor version register 0x30 */ - u8 *exception_sp; /* 0x38 */ - - struct ItLpQueue *lpQueuePtr; /* LpQueue handled by this processor 0x40 */ - u64 xTOC; /* Kernel TOC address 0x48 */ - STAB xStab_data; /* Segment table information 0x50,0x58,0x60 */ - u8 xSegments[STAB_CACHE_SIZE]; /* Cache of used stab entries 0x68,0x70 */ - u8 xProcEnabled; /* 1=soft enabled 0x78 */ - u8 unused2; - u8 prof_enabled; /* 1=iSeries profiling enabled 0x7A */ - u8 stab_cache_pointer; - u8 resv1[4]; /* 0x7B-0x7F */ + u64 xKsave; /* Saved Kernel stack addr or zero 0x20 */ + u64 pvr; /* Processor version register 0x28 */ + struct ItLpQueue *lpQueuePtr; /* LpQueue handled by this processor 0x30 */ + u64 xTOC; /* Kernel TOC address 0x38 */ + STAB xStab_data; /* Segment table information 0x40,0x48,0x50 */ + u8 *exception_sp; /* 0x58 */ + u8 xProcEnabled; /* 0x59 */ + u8 prof_enabled; /* 1=iSeries profiling enabled 0x60 */ + u8 resv1[30]; /* 0x61-0x7F */ /*===================================================================================== * CACHE_LINE_2 0x0080 - 0x00FF _