aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Mundt <lethal@linux-sh.org>2006-09-27 14:57:44 +0900
committerPaul Mundt <lethal@linux-sh.org>2006-09-27 14:57:44 +0900
commit298476220d1f793ca0ac6c9e5dc817e1ad3e9851 (patch)
tree59cff744ad1837844cb7a5a43a0623d39058fb44
parent749cf486920bf53f16e6a6889d9635a91ffb6c82 (diff)
downloadlinux-298476220d1f793ca0ac6c9e5dc817e1ad3e9851.tar.gz
sh: Add control register barriers.
Currently when making changes to control registers, we typically need some time for changes to take effect (8 nops, generally). However, for sh4a we simply need to do an icbi.. This is a simple patch for implementing a general purpose ctrl_barrier() which functions as a control register write barrier. There's some additional documentation in the patch itself, but it's pretty self explanatory. There were also some places where we were not doing the barrier, which didn't seem to have any adverse effects on legacy parts, but certainly did on sh4a. It's safer to have the barrier in place for legacy parts as well in these cases, though this does make flush_tlb_all() more expensive (by an order of 8 nops). We can ifdef around the flush_tlb_all() case for now if it's clear that all legacy parts won't have a problem with this. Signed-off-by: Paul Mundt <lethal@linux-sh.org>
-rw-r--r--arch/sh/mm/cache-sh4.c5
-rw-r--r--arch/sh/mm/fault.c5
-rw-r--r--include/asm-sh/mmu_context.h7
-rw-r--r--include/asm-sh/system.h43
4 files changed, 45 insertions, 15 deletions
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c
index c036c2b4ac2bd9..2203bd6aadb3b6 100644
--- a/arch/sh/mm/cache-sh4.c
+++ b/arch/sh/mm/cache-sh4.c
@@ -269,6 +269,11 @@ static inline void flush_icache_all(void)
ccr |= CCR_CACHE_ICI;
ctrl_outl(ccr, CCR);
+ /*
+ * back_to_P1() will take care of the barrier for us, don't add
+ * another one!
+ */
+
back_to_P1();
local_irq_restore(flags);
}
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 775f86cd3fe8f5..364181f27b79b9 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -369,12 +369,13 @@ void flush_tlb_all(void)
* Flush all the TLB.
*
* Write to the MMU control register's bit:
- * TF-bit for SH-3, TI-bit for SH-4.
+ * TF-bit for SH-3, TI-bit for SH-4.
* It's same position, bit #2.
*/
local_irq_save(flags);
status = ctrl_inl(MMUCR);
- status |= 0x04;
+ status |= 0x04;
ctrl_outl(status, MMUCR);
+ ctrl_barrier();
local_irq_restore(flags);
}
diff --git a/include/asm-sh/mmu_context.h b/include/asm-sh/mmu_context.h
index 6760d064bd0233..87678ba8d6b61b 100644
--- a/include/asm-sh/mmu_context.h
+++ b/include/asm-sh/mmu_context.h
@@ -174,9 +174,7 @@ static inline void enable_mmu(void)
{
/* Enable MMU */
ctrl_outl(MMU_CONTROL_INIT, MMUCR);
-
- /* The manual suggests doing some nops after turning on the MMU */
- __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop\n\t");
+ ctrl_barrier();
if (mmu_context_cache == NO_CONTEXT)
mmu_context_cache = MMU_CONTEXT_FIRST_VERSION;
@@ -191,7 +189,8 @@ static inline void disable_mmu(void)
cr = ctrl_inl(MMUCR);
cr &= ~MMU_CONTROL_INIT;
ctrl_outl(cr, MMUCR);
- __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop\n\t");
+
+ ctrl_barrier();
}
#else
/*
diff --git a/include/asm-sh/system.h b/include/asm-sh/system.h
index eb4902ed920a94..1630a5411e5fb8 100644
--- a/include/asm-sh/system.h
+++ b/include/asm-sh/system.h
@@ -67,8 +67,17 @@ static inline void sched_cacheflush(void)
{
}
-#define nop() __asm__ __volatile__ ("nop")
-
+#ifdef CONFIG_CPU_SH4A
+#define __icbi() \
+{ \
+ unsigned long __addr; \
+ __addr = 0xa8000000; \
+ __asm__ __volatile__( \
+ "icbi %0\n\t" \
+ : /* no output */ \
+ : "m" (__m(__addr))); \
+}
+#endif
#define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
@@ -84,15 +93,31 @@ static __inline__ unsigned long tas(volatile int *m)
extern void __xchg_called_with_bad_pointer(void);
+/*
+ * A brief note on ctrl_barrier(), the control register write barrier.
+ *
+ * Legacy SH cores typically require a sequence of 8 nops after
+ * modification of a control register in order for the changes to take
+ * effect. On newer cores (like the sh4a and sh5) this is accomplished
+ * with icbi.
+ *
+ * Also note that on sh4a in the icbi case we can forego a synco for the
+ * write barrier, as it's not necessary for control registers.
+ *
+ * Historically we have only done this type of barrier for the MMUCR, but
+ * it's also necessary for the CCR, so we make it generic here instead.
+ */
#ifdef CONFIG_CPU_SH4A
-#define mb() __asm__ __volatile__ ("synco": : :"memory")
-#define rmb() mb()
-#define wmb() __asm__ __volatile__ ("synco": : :"memory")
+#define mb() __asm__ __volatile__ ("synco": : :"memory")
+#define rmb() mb()
+#define wmb() __asm__ __volatile__ ("synco": : :"memory")
+#define ctrl_barrier() __icbi()
#define read_barrier_depends() do { } while(0)
#else
-#define mb() __asm__ __volatile__ ("": : :"memory")
-#define rmb() mb()
-#define wmb() __asm__ __volatile__ ("": : :"memory")
+#define mb() __asm__ __volatile__ ("": : :"memory")
+#define rmb() mb()
+#define wmb() __asm__ __volatile__ ("": : :"memory")
+#define ctrl_barrier() __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop")
#define read_barrier_depends() do { } while(0)
#endif
@@ -218,8 +243,8 @@ do { \
#define back_to_P1() \
do { \
unsigned long __dummy; \
+ ctrl_barrier(); \
__asm__ __volatile__( \
- "nop;nop;nop;nop;nop;nop;nop\n\t" \
"mov.l 1f, %0\n\t" \
"jmp @%0\n\t" \
" nop\n\t" \