[SPARC64]: More fully work around Spitfire Errata 51.
authorDavid S. Miller <davem@davemloft.net>
Mon, 29 Aug 2005 19:46:22 +0000 (12:46 -0700)
committerDavid S. Miller <davem@davemloft.net>
Mon, 29 Aug 2005 19:46:22 +0000 (12:46 -0700)
It appears that a memory barrier soon after a mispredicted
branch, not just in the delay slot, can cause the hang
condition of this cpu errata.

So move them out-of-line, and explicitly put them into
a "branch always, predict taken" delay slot which should
fully kill this problem.

Signed-off-by: David S. Miller <davem@davemloft.net>
14 files changed:
arch/sparc64/kernel/pci_iommu.c
arch/sparc64/kernel/process.c
arch/sparc64/kernel/sbus.c
arch/sparc64/kernel/signal32.c
arch/sparc64/kernel/smp.c
arch/sparc64/kernel/sparc64_ksyms.c
arch/sparc64/lib/Makefile
arch/sparc64/lib/debuglocks.c
arch/sparc64/lib/mb.S [new file with mode: 0644]
arch/sparc64/solaris/misc.c
include/asm-sparc64/atomic.h
include/asm-sparc64/bitops.h
include/asm-sparc64/spinlock.h
include/asm-sparc64/system.h

index 2803bc7c2c798af12fc0d57b701991bc51727995..425c60cfea195a2b57f4931ceb42a5cb94b970f1 100644 (file)
@@ -466,7 +466,7 @@ do_flush_sync:
                if (!limit)
                        break;
                udelay(1);
-               membar("#LoadLoad");
+               rmb();
        }
        if (!limit)
                printk(KERN_WARNING "pci_strbuf_flush: flushflag timeout "
index 07424b075938b7651e53380cddcf712c42b324e0..66255434128a2e6ebf6b144d5d030003fe4ebaca 100644 (file)
@@ -103,7 +103,7 @@ void cpu_idle(void)
                 * other cpus see our increasing idleness for the buddy
                 * redistribution algorithm.  -DaveM
                 */
-               membar("#StoreStore | #StoreLoad");
+               membar_storeload_storestore();
        }
 }
 
index 89f5e019f24c02ade1ed11ab0c6ed721de1b4bfd..e09ddf927655e4f608bdf0cae6e349a26cf6f172 100644 (file)
@@ -147,7 +147,7 @@ static void sbus_strbuf_flush(struct sbus_iommu *iommu, u32 base, unsigned long
                if (!limit)
                        break;
                udelay(1);
-               membar("#LoadLoad");
+               rmb();
        }
        if (!limit)
                printk(KERN_WARNING "sbus_strbuf_flush: flushflag timeout "
index b1ed23091fbb9c4ec6961de6dfd1c3e2857b8262..aecccd0df1d129a6981ae85d86ef87d11bf47638 100644 (file)
@@ -877,11 +877,12 @@ static void new_setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
                        unsigned long page = (unsigned long)
                                page_address(pte_page(*ptep));
 
-                       __asm__ __volatile__(
-                       "       membar  #StoreStore\n"
-                       "       flush   %0 + %1"
-                       : : "r" (page), "r" (address & (PAGE_SIZE - 1))
-                       : "memory");
+                       wmb();
+                       __asm__ __volatile__("flush     %0 + %1"
+                                            : /* no outputs */
+                                            : "r" (page),
+                                              "r" (address & (PAGE_SIZE - 1))
+                                            : "memory");
                }
                pte_unmap(ptep);
                preempt_enable();
@@ -1292,11 +1293,12 @@ static void setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs,
                        unsigned long page = (unsigned long)
                                page_address(pte_page(*ptep));
 
-                       __asm__ __volatile__(
-                       "       membar  #StoreStore\n"
-                       "       flush   %0 + %1"
-                       : : "r" (page), "r" (address & (PAGE_SIZE - 1))
-                       : "memory");
+                       wmb();
+                       __asm__ __volatile__("flush     %0 + %1"
+                                            : /* no outputs */
+                                            : "r" (page),
+                                              "r" (address & (PAGE_SIZE - 1))
+                                            : "memory");
                }
                pte_unmap(ptep);
                preempt_enable();
index b9b42491e118b6e273f40889de677ffb5d98cc1f..b4fc6a5462b2192065052a1693d194e375066910 100644 (file)
@@ -144,7 +144,7 @@ void __init smp_callin(void)
        current->active_mm = &init_mm;
 
        while (!cpu_isset(cpuid, smp_commenced_mask))
-               membar("#LoadLoad");
+               rmb();
 
        cpu_set(cpuid, cpu_online_map);
 }
@@ -184,11 +184,11 @@ static inline long get_delta (long *rt, long *master)
        for (i = 0; i < NUM_ITERS; i++) {
                t0 = tick_ops->get_tick();
                go[MASTER] = 1;
-               membar("#StoreLoad");
+               membar_storeload();
                while (!(tm = go[SLAVE]))
-                       membar("#LoadLoad");
+                       rmb();
                go[SLAVE] = 0;
-               membar("#StoreStore");
+               wmb();
                t1 = tick_ops->get_tick();
 
                if (t1 - t0 < best_t1 - best_t0)
@@ -221,7 +221,7 @@ void smp_synchronize_tick_client(void)
        go[MASTER] = 1;
 
        while (go[MASTER])
-               membar("#LoadLoad");
+               rmb();
 
        local_irq_save(flags);
        {
@@ -273,21 +273,21 @@ static void smp_synchronize_one_tick(int cpu)
 
        /* wait for client to be ready */
        while (!go[MASTER])
-               membar("#LoadLoad");
+               rmb();
 
        /* now let the client proceed into his loop */
        go[MASTER] = 0;
-       membar("#StoreLoad");
+       membar_storeload();
 
        spin_lock_irqsave(&itc_sync_lock, flags);
        {
                for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) {
                        while (!go[MASTER])
-                               membar("#LoadLoad");
+                               rmb();
                        go[MASTER] = 0;
-                       membar("#StoreStore");
+                       wmb();
                        go[SLAVE] = tick_ops->get_tick();
-                       membar("#StoreLoad");
+                       membar_storeload();
                }
        }
        spin_unlock_irqrestore(&itc_sync_lock, flags);
@@ -927,11 +927,11 @@ void smp_capture(void)
                       smp_processor_id());
 #endif
                penguins_are_doing_time = 1;
-               membar("#StoreStore | #LoadStore");
+               membar_storestore_loadstore();
                atomic_inc(&smp_capture_registry);
                smp_cross_call(&xcall_capture, 0, 0, 0);
                while (atomic_read(&smp_capture_registry) != ncpus)
-                       membar("#LoadLoad");
+                       rmb();
 #ifdef CAPTURE_DEBUG
                printk("done\n");
 #endif
@@ -947,7 +947,7 @@ void smp_release(void)
                       smp_processor_id());
 #endif
                penguins_are_doing_time = 0;
-               membar("#StoreStore | #StoreLoad");
+               membar_storeload_storestore();
                atomic_dec(&smp_capture_registry);
        }
 }
@@ -970,9 +970,9 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs)
        save_alternate_globals(global_save);
        prom_world(1);
        atomic_inc(&smp_capture_registry);
-       membar("#StoreLoad | #StoreStore");
+       membar_storeload_storestore();
        while (penguins_are_doing_time)
-               membar("#LoadLoad");
+               rmb();
        restore_alternate_globals(global_save);
        atomic_dec(&smp_capture_registry);
        prom_world(0);
index 0764b93307845e98e3b7bca2aa8d5e771ff422c2..a3ea697f1adbffb88f7de3ec9d5c4a8fe969da19 100644 (file)
@@ -406,3 +406,12 @@ EXPORT_SYMBOL(xor_vis_4);
 EXPORT_SYMBOL(xor_vis_5);
 
 EXPORT_SYMBOL(prom_palette);
+
+/* memory barriers */
+EXPORT_SYMBOL(mb);
+EXPORT_SYMBOL(rmb);
+EXPORT_SYMBOL(wmb);
+EXPORT_SYMBOL(membar_storeload);
+EXPORT_SYMBOL(membar_storeload_storestore);
+EXPORT_SYMBOL(membar_storeload_loadload);
+EXPORT_SYMBOL(membar_storestore_loadstore);
index 40dbeec7e5d6a8ed75006dd2873e0d40e019b29e..6201f1040982aabde591d28673a4a0d7e2697f95 100644 (file)
@@ -12,7 +12,7 @@ lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \
         U1memcpy.o U1copy_from_user.o U1copy_to_user.o \
         U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \
         copy_in_user.o user_fixup.o memmove.o \
-        mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o
+        mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o mb.o
 
 lib-$(CONFIG_DEBUG_SPINLOCK) += debuglocks.o
 lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
index 7f6ccc4114c50befc33c32e1a48ee63a32f99287..f5f0b5586f01a76ca110d4c04aec77c2be7d2931 100644 (file)
@@ -61,7 +61,7 @@ again:
                             : "=r" (val)
                             : "r" (&(lock->lock))
                             : "memory");
-       membar("#StoreLoad | #StoreStore");
+       membar_storeload_storestore();
        if (val) {
                while (lock->lock) {
                        if (!--stuck) {
@@ -69,7 +69,7 @@ again:
                                        show(str, lock, caller);
                                stuck = INIT_STUCK;
                        }
-                       membar("#LoadLoad");
+                       rmb();
                }
                goto again;
        }
@@ -90,7 +90,7 @@ int _do_spin_trylock(spinlock_t *lock, unsigned long caller)
                             : "=r" (val)
                             : "r" (&(lock->lock))
                             : "memory");
-       membar("#StoreLoad | #StoreStore");
+       membar_storeload_storestore();
        if (!val) {
                lock->owner_pc = ((unsigned int)caller);
                lock->owner_cpu = cpu;
@@ -107,7 +107,7 @@ void _do_spin_unlock(spinlock_t *lock)
 {
        lock->owner_pc = 0;
        lock->owner_cpu = NO_PROC_ID;
-       membar("#StoreStore | #LoadStore");
+       membar_storestore_loadstore();
        lock->lock = 0;
        current->thread.smp_lock_count--;
 }
@@ -129,7 +129,7 @@ wlock_again:
                                show_read(str, rw, caller);
                        stuck = INIT_STUCK;
                }
-               membar("#LoadLoad");
+               rmb();
        }
        /* Try once to increment the counter.  */
        __asm__ __volatile__(
@@ -142,7 +142,7 @@ wlock_again:
 "2:"   : "=r" (val)
        : "0" (&(rw->lock))
        : "g1", "g7", "memory");
-       membar("#StoreLoad | #StoreStore");
+       membar_storeload_storestore();
        if (val)
                goto wlock_again;
        rw->reader_pc[cpu] = ((unsigned int)caller);
@@ -201,7 +201,7 @@ wlock_again:
                                show_write(str, rw, caller);
                        stuck = INIT_STUCK;
                }
-               membar("#LoadLoad");
+               rmb();
        }
 
        /* Try to acuire the write bit.  */
@@ -256,7 +256,7 @@ wlock_again:
                                        show_write(str, rw, caller);
                                stuck = INIT_STUCK;
                        }
-                       membar("#LoadLoad");
+                       rmb();
                }
                goto wlock_again;
        }
diff --git a/arch/sparc64/lib/mb.S b/arch/sparc64/lib/mb.S
new file mode 100644 (file)
index 0000000..4004f74
--- /dev/null
@@ -0,0 +1,73 @@
+/* mb.S: Out of line memory barriers.
+ *
+ * Copyright (C) 2005 David S. Miller (davem@davemloft.net)
+ */
+
+       /* These are here in an effort to more fully work around
+        * Spitfire Errata #51.  Essentially, if a memory barrier
+        * occurs soon after a mispredicted branch, the chip can stop
+        * executing instructions until a trap occurs.  Therefore, if
+        * interrupts are disabled, the chip can hang forever.
+        *
+        * It used to be believed that the memory barrier had to be
+        * right in the delay slot, but a case has been traced
+        * recently wherein the memory barrier was one instruction
+        * after the branch delay slot and the chip still hung.  The
+        * offending sequence was the following in sym_wakeup_done()
+        * of the sym53c8xx_2 driver:
+        *
+        *      call    sym_ccb_from_dsa, 0
+        *       movge  %icc, 0, %l0
+        *      brz,pn  %o0, .LL1303
+        *       mov    %o0, %l2
+        *      membar  #LoadLoad
+        *
+        * The branch has to be mispredicted for the bug to occur.
+        * Therefore, we put the memory barrier explicitly into a
+        * "branch always, predicted taken" delay slot to avoid the
+        * problem case.
+        */
+
+       .text
+
+99:    retl
+        nop
+
+       .globl  mb
+mb:    ba,pt   %xcc, 99b
+        membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad
+       .size   mb, .-mb
+
+       .globl  rmb
+rmb:   ba,pt   %xcc, 99b
+        membar #LoadLoad
+       .size   rmb, .-rmb
+
+       .globl  wmb
+wmb:   ba,pt   %xcc, 99b
+        membar #StoreStore
+       .size   wmb, .-wmb
+
+       .globl  membar_storeload
+membar_storeload:
+       ba,pt   %xcc, 99b
+        membar #StoreLoad
+       .size   membar_storeload, .-membar_storeload
+
+       .globl  membar_storeload_storestore
+membar_storeload_storestore:
+       ba,pt   %xcc, 99b
+        membar #StoreLoad | #StoreStore
+       .size   membar_storeload_storestore, .-membar_storeload_storestore
+
+       .globl  membar_storeload_loadload
+membar_storeload_loadload:
+       ba,pt   %xcc, 99b
+        membar #StoreLoad | #LoadLoad
+       .size   membar_storeload_loadload, .-membar_storeload_loadload
+
+       .globl  membar_storestore_loadstore
+membar_storestore_loadstore:
+       ba,pt   %xcc, 99b
+        membar #StoreStore | #LoadStore
+       .size   membar_storestore_loadstore, .-membar_storestore_loadstore
index 15b4cfe075572afbd1c9c2e19a28aef3e574ae70..302efbcba70e85208d1fa8ba7387da963b8f8181 100644 (file)
@@ -737,7 +737,8 @@ MODULE_LICENSE("GPL");
 extern u32 tl0_solaris[8];
 #define update_ttable(x)                                                                               \
        tl0_solaris[3] = (((long)(x) - (long)tl0_solaris - 3) >> 2) | 0x40000000;                       \
-       __asm__ __volatile__ ("membar #StoreStore; flush %0" : : "r" (&tl0_solaris[3]))
+       wmb();          \
+       __asm__ __volatile__ ("flush %0" : : "r" (&tl0_solaris[3]))
 #else
 #endif 
 
@@ -761,7 +762,8 @@ int init_module(void)
        entry64_personality_patch |=
                (offsetof(struct task_struct, personality) +
                 (sizeof(unsigned long) - 1));
-       __asm__ __volatile__("membar #StoreStore; flush %0"
+       wmb();
+       __asm__ __volatile__("flush %0"
                             : : "r" (&entry64_personality_patch));
        return 0;
 }
index d80f3379669b4625e1be10b21c7498d9cbfb6080..e175afcf2cdeb852d712a40f51b87678dbb26521 100644 (file)
@@ -72,10 +72,10 @@ extern int atomic64_sub_ret(int, atomic64_t *);
 
 /* Atomic operations are already serializing */
 #ifdef CONFIG_SMP
-#define smp_mb__before_atomic_dec()    membar("#StoreLoad | #LoadLoad")
-#define smp_mb__after_atomic_dec()     membar("#StoreLoad | #StoreStore")
-#define smp_mb__before_atomic_inc()    membar("#StoreLoad | #LoadLoad")
-#define smp_mb__after_atomic_inc()     membar("#StoreLoad | #StoreStore")
+#define smp_mb__before_atomic_dec()    membar_storeload_loadload();
+#define smp_mb__after_atomic_dec()     membar_storeload_storestore();
+#define smp_mb__before_atomic_inc()    membar_storeload_loadload();
+#define smp_mb__after_atomic_inc()     membar_storeload_storestore();
 #else
 #define smp_mb__before_atomic_dec()    barrier()
 #define smp_mb__after_atomic_dec()     barrier()
index 9c5e71970287fbaebb37297315be05dcce55bcb3..6388b8376c50227bc8609d87a40e52451f74a435 100644 (file)
@@ -72,8 +72,8 @@ static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
 }
 
 #ifdef CONFIG_SMP
-#define smp_mb__before_clear_bit()     membar("#StoreLoad | #LoadLoad")
-#define smp_mb__after_clear_bit()      membar("#StoreLoad | #StoreStore")
+#define smp_mb__before_clear_bit()     membar_storeload_loadload()
+#define smp_mb__after_clear_bit()      membar_storeload_storestore()
 #else
 #define smp_mb__before_clear_bit()     barrier()
 #define smp_mb__after_clear_bit()      barrier()
index d265bf6570fe421bc9f85f0915cd4897dc19d0c5..a02c4370eb42e0d1c8f6043a25f900e0ea03ff2c 100644 (file)
@@ -43,7 +43,7 @@ typedef struct {
 #define spin_is_locked(lp)  ((lp)->lock != 0)
 
 #define spin_unlock_wait(lp)   \
-do {   membar("#LoadLoad");    \
+do {   rmb();                  \
 } while((lp)->lock)
 
 static inline void _raw_spin_lock(spinlock_t *lock)
@@ -129,7 +129,7 @@ typedef struct {
 #define spin_is_locked(__lock) ((__lock)->lock != 0)
 #define spin_unlock_wait(__lock)       \
 do { \
-       membar("#LoadLoad"); \
+       rmb(); \
 } while((__lock)->lock)
 
 extern void _do_spin_lock(spinlock_t *lock, char *str, unsigned long caller);
index ee4bdfc6b88f3a7b047da2361bb117bfbe312581..5e94c05dc2fccf08d6d6fd4440adbb017fd41edf 100644 (file)
@@ -28,6 +28,14 @@ enum sparc_cpu {
 #define ARCH_SUN4C_SUN4 0
 #define ARCH_SUN4 0
 
+extern void mb(void);
+extern void rmb(void);
+extern void wmb(void);
+extern void membar_storeload(void);
+extern void membar_storeload_storestore(void);
+extern void membar_storeload_loadload(void);
+extern void membar_storestore_loadstore(void);
+
 #endif
 
 #define setipl(__new_ipl) \
@@ -78,16 +86,11 @@ enum sparc_cpu {
 
 #define nop()          __asm__ __volatile__ ("nop")
 
-#define membar(type)   __asm__ __volatile__ ("membar " type : : : "memory")
-#define mb()           \
-       membar("#LoadLoad | #LoadStore | #StoreStore | #StoreLoad")
-#define rmb()          membar("#LoadLoad")
-#define wmb()          membar("#StoreStore")
 #define read_barrier_depends()         do { } while(0)
 #define set_mb(__var, __value) \
-       do { __var = __value; membar("#StoreLoad | #StoreStore"); } while(0)
+       do { __var = __value; membar_storeload_storestore(); } while(0)
 #define set_wmb(__var, __value) \
-       do { __var = __value; membar("#StoreStore"); } while(0)
+       do { __var = __value; wmb(); } while(0)
 
 #ifdef CONFIG_SMP
 #define smp_mb()       mb()