arch: remove blackfin port
[sfrench/cifs-2.6.git] / arch / blackfin / mach-bf561 / atomic.S
diff --git a/arch/blackfin/mach-bf561/atomic.S b/arch/blackfin/mach-bf561/atomic.S
deleted file mode 100644 (file)
index 1e2989c..0000000
+++ /dev/null
@@ -1,945 +0,0 @@
-/*
- * Copyright 2007-2008 Analog Devices Inc.
- *              Philippe Gerum <rpm@xenomai.org>
- *
- * Licensed under the GPL-2 or later.
- */
-
-#include <linux/linkage.h>
-#include <asm/blackfin.h>
-#include <asm/cache.h>
-#include <asm/asm-offsets.h>
-#include <asm/rwlock.h>
-#include <asm/cplb.h>
-
-.text
-
-.macro coreslot_loadaddr reg:req
-       \reg\().l = _corelock;
-       \reg\().h = _corelock;
-.endm
-
-.macro safe_testset addr:req, scratch:req
-#if ANOMALY_05000477
-       cli \scratch;
-       testset (\addr);
-       sti \scratch;
-#else
-       testset (\addr);
-#endif
-.endm
-
-/*
- * r0 = address of atomic data to flush and invalidate (32bit).
- *
- * Clear interrupts and return the old mask.
- * We assume that no atomic data can span cachelines.
- *
- * Clobbers: r2:0, p0
- */
-ENTRY(_get_core_lock)
-       r1 = -L1_CACHE_BYTES;
-       r1 = r0 & r1;
-       cli r0;
-       coreslot_loadaddr p0;
-.Lretry_corelock:
-       safe_testset p0, r2;
-       if cc jump .Ldone_corelock;
-       SSYNC(r2);
-       jump .Lretry_corelock
-.Ldone_corelock:
-       p0 = r1;
-       /* flush core internal write buffer before invalidate dcache */
-       CSYNC(r2);
-       flushinv[p0];
-       SSYNC(r2);
-       rts;
-ENDPROC(_get_core_lock)
-
-/*
- * r0 = address of atomic data in uncacheable memory region (32bit).
- *
- * Clear interrupts and return the old mask.
- *
- * Clobbers: r0, p0
- */
-ENTRY(_get_core_lock_noflush)
-       cli r0;
-       coreslot_loadaddr p0;
-.Lretry_corelock_noflush:
-       safe_testset p0, r2;
-       if cc jump .Ldone_corelock_noflush;
-       SSYNC(r2);
-       jump .Lretry_corelock_noflush
-.Ldone_corelock_noflush:
-       /*
-        * SMP kgdb runs into dead loop without NOP here, when one core
-        * single steps over get_core_lock_noflush and the other executes
-        * get_core_lock as a slave node.
-        */
-       nop;
-       CSYNC(r2);
-       rts;
-ENDPROC(_get_core_lock_noflush)
-
-/*
- * r0 = interrupt mask to restore.
- * r1 = address of atomic data to flush and invalidate (32bit).
- *
- * Interrupts are masked on entry (see _get_core_lock).
- * Clobbers: r2:0, p0
- */
-ENTRY(_put_core_lock)
-       /* Write-through cache assumed, so no flush needed here. */
-       coreslot_loadaddr p0;
-       r1 = 0;
-       [p0] = r1;
-       SSYNC(r2);
-       sti r0;
-       rts;
-ENDPROC(_put_core_lock)
-
-#ifdef __ARCH_SYNC_CORE_DCACHE
-
-ENTRY(___raw_smp_mark_barrier_asm)
-       [--sp] = rets;
-       [--sp] = ( r7:5 );
-       [--sp] = r0;
-       [--sp] = p1;
-       [--sp] = p0;
-       call _get_core_lock_noflush;
-
-       /*
-        * Calculate current core mask
-        */
-       GET_CPUID(p1, r7);
-       r6 = 1;
-       r6 <<= r7;
-
-       /*
-        * Set bit of other cores in barrier mask. Don't change current core bit.
-        */
-       p1.l = _barrier_mask;
-       p1.h = _barrier_mask;
-       r7 = [p1];
-       r5 = r7 & r6;
-       r7 = ~r6;
-       cc = r5 == 0;
-       if cc jump 1f;
-       r7 = r7 | r6;
-1:
-       [p1] = r7;
-       SSYNC(r2);
-
-       call _put_core_lock;
-       p0 = [sp++];
-       p1 = [sp++];
-       r0 = [sp++];
-       ( r7:5 ) = [sp++];
-       rets = [sp++];
-       rts;
-ENDPROC(___raw_smp_mark_barrier_asm)
-
-ENTRY(___raw_smp_check_barrier_asm)
-       [--sp] = rets;
-       [--sp] = ( r7:5 );
-       [--sp] = r0;
-       [--sp] = p1;
-       [--sp] = p0;
-       call _get_core_lock_noflush;
-
-       /*
-        * Calculate current core mask
-        */
-       GET_CPUID(p1, r7);
-       r6 = 1;
-       r6 <<= r7;
-
-       /*
-        * Clear current core bit in barrier mask if it is set.
-        */
-       p1.l = _barrier_mask;
-       p1.h = _barrier_mask;
-       r7 = [p1];
-       r5 = r7 & r6;
-       cc = r5 == 0;
-       if cc jump 1f;
-       r6 = ~r6;
-       r7 = r7 & r6;
-       [p1] = r7;
-       SSYNC(r2);
-
-       call _put_core_lock;
-
-       /*
-        * Invalidate the entire D-cache of current core.
-        */
-       sp += -12;
-       call _resync_core_dcache
-       sp += 12;
-       jump 2f;
-1:
-       call _put_core_lock;
-2:
-       p0 = [sp++];
-       p1 = [sp++];
-       r0 = [sp++];
-       ( r7:5 ) = [sp++];
-       rets = [sp++];
-       rts;
-ENDPROC(___raw_smp_check_barrier_asm)
-
-/*
- * r0 = irqflags
- * r1 = address of atomic data
- *
- * Clobbers: r2:0, p1:0
- */
-_start_lock_coherent:
-
-       [--sp] = rets;
-       [--sp] = ( r7:6 );
-       r7 = r0;
-       p1 = r1;
-
-       /*
-        * Determine whether the atomic data was previously
-        * owned by another CPU (=r6).
-        */
-       GET_CPUID(p0, r2);
-       r1 = 1;
-       r1 <<= r2;
-       r2 = ~r1;
-
-       r1 = [p1];
-       r1 >>= 28;   /* CPU fingerprints are stored in the high nibble. */
-       r6 = r1 & r2;
-       r1 = [p1];
-       r1 <<= 4;
-       r1 >>= 4;
-       [p1] = r1;
-
-       /*
-        * Release the core lock now, but keep IRQs disabled while we are
-        * performing the remaining housekeeping chores for the current CPU.
-        */
-       coreslot_loadaddr p0;
-       r1 = 0;
-       [p0] = r1;
-
-       /*
-        * If another CPU has owned the same atomic section before us,
-        * then our D-cached copy of the shared data protected by the
-        * current spin/write_lock may be obsolete.
-        */
-       cc = r6 == 0;
-       if cc jump .Lcache_synced
-
-       /*
-        * Invalidate the entire D-cache of the current core.
-        */
-       sp += -12;
-       call _resync_core_dcache
-       sp += 12;
-
-.Lcache_synced:
-       SSYNC(r2);
-       sti r7;
-       ( r7:6 ) = [sp++];
-       rets = [sp++];
-       rts
-
-/*
- * r0 = irqflags
- * r1 = address of atomic data
- *
- * Clobbers: r2:0, p1:0
- */
-_end_lock_coherent:
-
-       p1 = r1;
-       GET_CPUID(p0, r2);
-       r2 += 28;
-       r1 = 1;
-       r1 <<= r2;
-       r2 = [p1];
-       r2 = r1 | r2;
-       [p1] = r2;
-       r1 = p1;
-       jump _put_core_lock;
-
-#endif /* __ARCH_SYNC_CORE_DCACHE */
-
-/*
- * r0 = &spinlock->lock
- *
- * Clobbers: r3:0, p1:0
- */
-ENTRY(___raw_spin_is_locked_asm)
-       p1 = r0;
-       [--sp] = rets;
-       call _get_core_lock;
-       r3 = [p1];
-       cc = bittst( r3, 0 );
-       r3 = cc;
-       r1 = p1;
-       call _put_core_lock;
-       rets = [sp++];
-       r0 = r3;
-       rts;
-ENDPROC(___raw_spin_is_locked_asm)
-
-/*
- * r0 = &spinlock->lock
- *
- * Clobbers: r3:0, p1:0
- */
-ENTRY(___raw_spin_lock_asm)
-       p1 = r0;
-       [--sp] = rets;
-.Lretry_spinlock:
-       call _get_core_lock;
-       r1 = p1;
-       r2 = [p1];
-       cc = bittst( r2, 0 );
-       if cc jump .Lbusy_spinlock
-#ifdef __ARCH_SYNC_CORE_DCACHE
-       r3 = p1;
-       bitset ( r2, 0 ); /* Raise the lock bit. */
-       [p1] = r2;
-       call _start_lock_coherent
-#else
-       r2 = 1;
-       [p1] = r2;
-       call _put_core_lock;
-#endif
-       rets = [sp++];
-       rts;
-
-.Lbusy_spinlock:
-       /* We don't touch the atomic area if busy, so that flush
-          will behave like nop in _put_core_lock. */
-       call _put_core_lock;
-       SSYNC(r2);
-       r0 = p1;
-       jump .Lretry_spinlock
-ENDPROC(___raw_spin_lock_asm)
-
-/*
- * r0 = &spinlock->lock
- *
- * Clobbers: r3:0, p1:0
- */
-ENTRY(___raw_spin_trylock_asm)
-       p1 = r0;
-       [--sp] = rets;
-       call _get_core_lock;
-       r1 = p1;
-       r3 = [p1];
-       cc = bittst( r3, 0 );
-       if cc jump .Lfailed_trylock
-#ifdef __ARCH_SYNC_CORE_DCACHE
-       bitset ( r3, 0 ); /* Raise the lock bit. */
-       [p1] = r3;
-       call _start_lock_coherent
-#else
-       r2 = 1;
-       [p1] = r2;
-       call _put_core_lock;
-#endif
-       r0 = 1;
-       rets = [sp++];
-       rts;
-.Lfailed_trylock:
-       call _put_core_lock;
-       r0 = 0;
-       rets = [sp++];
-       rts;
-ENDPROC(___raw_spin_trylock_asm)
-
-/*
- * r0 = &spinlock->lock
- *
- * Clobbers: r2:0, p1:0
- */
-ENTRY(___raw_spin_unlock_asm)
-       p1 = r0;
-       [--sp] = rets;
-       call _get_core_lock;
-       r2 = [p1];
-       bitclr ( r2, 0 );
-       [p1] = r2;
-       r1 = p1;
-#ifdef __ARCH_SYNC_CORE_DCACHE
-       call _end_lock_coherent
-#else
-       call _put_core_lock;
-#endif
-       rets = [sp++];
-       rts;
-ENDPROC(___raw_spin_unlock_asm)
-
-/*
- * r0 = &rwlock->lock
- *
- * Clobbers: r2:0, p1:0
- */
-ENTRY(___raw_read_lock_asm)
-       p1 = r0;
-       [--sp] = rets;
-       call _get_core_lock;
-.Lrdlock_try:
-       r1 = [p1];
-       r1 += -1;
-       [p1] = r1;
-       cc = r1 < 0;
-       if cc jump .Lrdlock_failed
-       r1 = p1;
-#ifdef __ARCH_SYNC_CORE_DCACHE
-       call _start_lock_coherent
-#else
-       call _put_core_lock;
-#endif
-       rets = [sp++];
-       rts;
-
-.Lrdlock_failed:
-       r1 += 1;
-       [p1] = r1;
-.Lrdlock_wait:
-       r1 = p1;
-       call _put_core_lock;
-       SSYNC(r2);
-       r0 = p1;
-       call _get_core_lock;
-       r1 = [p1];
-       cc = r1 < 2;
-       if cc jump .Lrdlock_wait;
-       jump .Lrdlock_try
-ENDPROC(___raw_read_lock_asm)
-
-/*
- * r0 = &rwlock->lock
- *
- * Clobbers: r3:0, p1:0
- */
-ENTRY(___raw_read_trylock_asm)
-       p1 = r0;
-       [--sp] = rets;
-       call _get_core_lock;
-       r1 = [p1];
-       cc = r1 <= 0;
-       if cc jump .Lfailed_tryrdlock;
-       r1 += -1;
-       [p1] = r1;
-       r1 = p1;
-#ifdef __ARCH_SYNC_CORE_DCACHE
-       call _start_lock_coherent
-#else
-       call _put_core_lock;
-#endif
-       rets = [sp++];
-       r0 = 1;
-       rts;
-.Lfailed_tryrdlock:
-       r1 = p1;
-       call _put_core_lock;
-       rets = [sp++];
-       r0 = 0;
-       rts;
-ENDPROC(___raw_read_trylock_asm)
-
-/*
- * r0 = &rwlock->lock
- *
- * Note: Processing controlled by a reader lock should not have
- * any side-effect on cache issues with the other core, so we
- * just release the core lock and exit (no _end_lock_coherent).
- *
- * Clobbers: r3:0, p1:0
- */
-ENTRY(___raw_read_unlock_asm)
-       p1 = r0;
-       [--sp] = rets;
-       call _get_core_lock;
-       r1 = [p1];
-       r1 += 1;
-       [p1] = r1;
-       r1 = p1;
-       call _put_core_lock;
-       rets = [sp++];
-       rts;
-ENDPROC(___raw_read_unlock_asm)
-
-/*
- * r0 = &rwlock->lock
- *
- * Clobbers: r3:0, p1:0
- */
-ENTRY(___raw_write_lock_asm)
-       p1 = r0;
-       r3.l = lo(RW_LOCK_BIAS);
-       r3.h = hi(RW_LOCK_BIAS);
-       [--sp] = rets;
-       call _get_core_lock;
-.Lwrlock_try:
-       r1 = [p1];
-       r1 = r1 - r3;
-#ifdef __ARCH_SYNC_CORE_DCACHE
-       r2 = r1;
-       r2 <<= 4;
-       r2 >>= 4;
-       cc = r2 == 0;
-#else
-       cc = r1 == 0;
-#endif
-       if !cc jump .Lwrlock_wait
-       [p1] = r1;
-       r1 = p1;
-#ifdef __ARCH_SYNC_CORE_DCACHE
-       call _start_lock_coherent
-#else
-       call _put_core_lock;
-#endif
-       rets = [sp++];
-       rts;
-
-.Lwrlock_wait:
-       r1 = p1;
-       call _put_core_lock;
-       SSYNC(r2);
-       r0 = p1;
-       call _get_core_lock;
-       r1 = [p1];
-#ifdef __ARCH_SYNC_CORE_DCACHE
-       r1 <<= 4;
-       r1 >>= 4;
-#endif
-       cc = r1 == r3;
-       if !cc jump .Lwrlock_wait;
-       jump .Lwrlock_try
-ENDPROC(___raw_write_lock_asm)
-
-/*
- * r0 = &rwlock->lock
- *
- * Clobbers: r3:0, p1:0
- */
-ENTRY(___raw_write_trylock_asm)
-       p1 = r0;
-       [--sp] = rets;
-       call _get_core_lock;
-       r1 = [p1];
-       r2.l = lo(RW_LOCK_BIAS);
-       r2.h = hi(RW_LOCK_BIAS);
-       cc = r1 == r2;
-       if !cc jump .Lfailed_trywrlock;
-#ifdef __ARCH_SYNC_CORE_DCACHE
-       r1 >>= 28;
-       r1 <<= 28;
-#else
-       r1 = 0;
-#endif
-       [p1] = r1;
-       r1 = p1;
-#ifdef __ARCH_SYNC_CORE_DCACHE
-       call _start_lock_coherent
-#else
-       call _put_core_lock;
-#endif
-       rets = [sp++];
-       r0 = 1;
-       rts;
-
-.Lfailed_trywrlock:
-       r1 = p1;
-       call _put_core_lock;
-       rets = [sp++];
-       r0 = 0;
-       rts;
-ENDPROC(___raw_write_trylock_asm)
-
-/*
- * r0 = &rwlock->lock
- *
- * Clobbers: r3:0, p1:0
- */
-ENTRY(___raw_write_unlock_asm)
-       p1 = r0;
-       r3.l = lo(RW_LOCK_BIAS);
-       r3.h = hi(RW_LOCK_BIAS);
-       [--sp] = rets;
-       call _get_core_lock;
-       r1 = [p1];
-       r1 = r1 + r3;
-       [p1] = r1;
-       r1 = p1;
-#ifdef __ARCH_SYNC_CORE_DCACHE
-       call _end_lock_coherent
-#else
-       call _put_core_lock;
-#endif
-       rets = [sp++];
-       rts;
-ENDPROC(___raw_write_unlock_asm)
-
-/*
- * r0 = ptr
- * r1 = value
- *
- * ADD a signed value to a 32bit word and return the new value atomically.
- * Clobbers: r3:0, p1:0
- */
-ENTRY(___raw_atomic_add_asm)
-       p1 = r0;
-       r3 = r1;
-       [--sp] = rets;
-       call _get_core_lock;
-       r2 = [p1];
-       r3 = r3 + r2;
-       [p1] = r3;
-       r1 = p1;
-       call _put_core_lock;
-       r0 = r3;
-       rets = [sp++];
-       rts;
-ENDPROC(___raw_atomic_add_asm)
-
-/*
- * r0 = ptr
- * r1 = value
- *
- * ADD a signed value to a 32bit word and return the old value atomically.
- * Clobbers: r3:0, p1:0
- */
-ENTRY(___raw_atomic_xadd_asm)
-       p1 = r0;
-       r3 = r1;
-       [--sp] = rets;
-       call _get_core_lock;
-       r3 = [p1];
-       r2 = r3 + r2;
-       [p1] = r2;
-       r1 = p1;
-       call _put_core_lock;
-       r0 = r3;
-       rets = [sp++];
-       rts;
-ENDPROC(___raw_atomic_add_asm)
-
-/*
- * r0 = ptr
- * r1 = mask
- *
- * AND the mask bits from a 32bit word and return the old 32bit value
- * atomically.
- * Clobbers: r3:0, p1:0
- */
-ENTRY(___raw_atomic_and_asm)
-       p1 = r0;
-       r3 = r1;
-       [--sp] = rets;
-       call _get_core_lock;
-       r3 = [p1];
-       r2 = r2 & r3;
-       [p1] = r2;
-       r1 = p1;
-       call _put_core_lock;
-       r0 = r3;
-       rets = [sp++];
-       rts;
-ENDPROC(___raw_atomic_and_asm)
-
-/*
- * r0 = ptr
- * r1 = mask
- *
- * OR the mask bits into a 32bit word and return the old 32bit value
- * atomically.
- * Clobbers: r3:0, p1:0
- */
-ENTRY(___raw_atomic_or_asm)
-       p1 = r0;
-       r3 = r1;
-       [--sp] = rets;
-       call _get_core_lock;
-       r3 = [p1];
-       r2 = r2 | r3;
-       [p1] = r2;
-       r1 = p1;
-       call _put_core_lock;
-       r0 = r3;
-       rets = [sp++];
-       rts;
-ENDPROC(___raw_atomic_or_asm)
-
-/*
- * r0 = ptr
- * r1 = mask
- *
- * XOR the mask bits with a 32bit word and return the old 32bit value
- * atomically.
- * Clobbers: r3:0, p1:0
- */
-ENTRY(___raw_atomic_xor_asm)
-       p1 = r0;
-       r3 = r1;
-       [--sp] = rets;
-       call _get_core_lock;
-       r3 = [p1];
-       r2 = r2 ^ r3;
-       [p1] = r2;
-       r1 = p1;
-       call _put_core_lock;
-       r0 = r3;
-       rets = [sp++];
-       rts;
-ENDPROC(___raw_atomic_xor_asm)
-
-/*
- * r0 = ptr
- * r1 = mask
- *
- * Perform a logical AND between the mask bits and a 32bit word, and
- * return the masked value. We need this on this architecture in
- * order to invalidate the local cache before testing.
- *
- * Clobbers: r3:0, p1:0
- */
-ENTRY(___raw_atomic_test_asm)
-       p1 = r0;
-       r3 = r1;
-       r1 = -L1_CACHE_BYTES;
-       r1 = r0 & r1;
-       p0 = r1;
-       /* flush core internal write buffer before invalidate dcache */
-       CSYNC(r2);
-       flushinv[p0];
-       SSYNC(r2);
-       r0 = [p1];
-       r0 = r0 & r3;
-       rts;
-ENDPROC(___raw_atomic_test_asm)
-
-/*
- * r0 = ptr
- * r1 = value
- *
- * Swap *ptr with value and return the old 32bit value atomically.
- * Clobbers: r3:0, p1:0
- */
-#define        __do_xchg(src, dst)             \
-       p1 = r0;                        \
-       r3 = r1;                        \
-       [--sp] = rets;                  \
-       call _get_core_lock;            \
-       r2 = src;                       \
-       dst = r3;                       \
-       r3 = r2;                        \
-       r1 = p1;                        \
-       call _put_core_lock;            \
-       r0 = r3;                        \
-       rets = [sp++];                  \
-       rts;
-
-ENTRY(___raw_xchg_1_asm)
-       __do_xchg(b[p1] (z), b[p1])
-ENDPROC(___raw_xchg_1_asm)
-
-ENTRY(___raw_xchg_2_asm)
-       __do_xchg(w[p1] (z), w[p1])
-ENDPROC(___raw_xchg_2_asm)
-
-ENTRY(___raw_xchg_4_asm)
-       __do_xchg([p1], [p1])
-ENDPROC(___raw_xchg_4_asm)
-
-/*
- * r0 = ptr
- * r1 = new
- * r2 = old
- *
- * Swap *ptr with new if *ptr == old and return the previous *ptr
- * value atomically.
- *
- * Clobbers: r3:0, p1:0
- */
-#define        __do_cmpxchg(src, dst)          \
-       [--sp] = rets;                  \
-       [--sp] = r4;                    \
-       p1 = r0;                        \
-       r3 = r1;                        \
-       r4 = r2;                        \
-       call _get_core_lock;            \
-       r2 = src;                       \
-       cc = r2 == r4;                  \
-       if !cc jump 1f;                 \
-       dst = r3;                       \
-     1: r3 = r2;                       \
-       r1 = p1;                        \
-       call _put_core_lock;            \
-       r0 = r3;                        \
-       r4 = [sp++];                    \
-       rets = [sp++];                  \
-       rts;
-
-ENTRY(___raw_cmpxchg_1_asm)
-       __do_cmpxchg(b[p1] (z), b[p1])
-ENDPROC(___raw_cmpxchg_1_asm)
-
-ENTRY(___raw_cmpxchg_2_asm)
-       __do_cmpxchg(w[p1] (z), w[p1])
-ENDPROC(___raw_cmpxchg_2_asm)
-
-ENTRY(___raw_cmpxchg_4_asm)
-       __do_cmpxchg([p1], [p1])
-ENDPROC(___raw_cmpxchg_4_asm)
-
-/*
- * r0 = ptr
- * r1 = bitnr
- *
- * Set a bit in a 32bit word and return the old 32bit value atomically.
- * Clobbers: r3:0, p1:0
- */
-ENTRY(___raw_bit_set_asm)
-       r2 = r1;
-       r1 = 1;
-       r1 <<= r2;
-       jump ___raw_atomic_or_asm
-ENDPROC(___raw_bit_set_asm)
-
-/*
- * r0 = ptr
- * r1 = bitnr
- *
- * Clear a bit in a 32bit word and return the old 32bit value atomically.
- * Clobbers: r3:0, p1:0
- */
-ENTRY(___raw_bit_clear_asm)
-       r2 = 1;
-       r2 <<= r1;
-       r1 = ~r2;
-       jump ___raw_atomic_and_asm
-ENDPROC(___raw_bit_clear_asm)
-
-/*
- * r0 = ptr
- * r1 = bitnr
- *
- * Toggle a bit in a 32bit word and return the old 32bit value atomically.
- * Clobbers: r3:0, p1:0
- */
-ENTRY(___raw_bit_toggle_asm)
-       r2 = r1;
-       r1 = 1;
-       r1 <<= r2;
-       jump ___raw_atomic_xor_asm
-ENDPROC(___raw_bit_toggle_asm)
-
-/*
- * r0 = ptr
- * r1 = bitnr
- *
- * Test-and-set a bit in a 32bit word and return the old bit value atomically.
- * Clobbers: r3:0, p1:0
- */
-ENTRY(___raw_bit_test_set_asm)
-       [--sp] = rets;
-       [--sp] = r1;
-       call ___raw_bit_set_asm
-       r1 = [sp++];
-       r2 = 1;
-       r2 <<= r1;
-       r0 = r0 & r2;
-       cc = r0 == 0;
-       if cc jump 1f
-       r0 = 1;
-1:
-       rets = [sp++];
-       rts;
-ENDPROC(___raw_bit_test_set_asm)
-
-/*
- * r0 = ptr
- * r1 = bitnr
- *
- * Test-and-clear a bit in a 32bit word and return the old bit value atomically.
- * Clobbers: r3:0, p1:0
- */
-ENTRY(___raw_bit_test_clear_asm)
-       [--sp] = rets;
-       [--sp] = r1;
-       call ___raw_bit_clear_asm
-       r1 = [sp++];
-       r2 = 1;
-       r2 <<= r1;
-       r0 = r0 & r2;
-       cc = r0 == 0;
-       if cc jump 1f
-       r0 = 1;
-1:
-       rets = [sp++];
-       rts;
-ENDPROC(___raw_bit_test_clear_asm)
-
-/*
- * r0 = ptr
- * r1 = bitnr
- *
- * Test-and-toggle a bit in a 32bit word,
- * and return the old bit value atomically.
- * Clobbers: r3:0, p1:0
- */
-ENTRY(___raw_bit_test_toggle_asm)
-       [--sp] = rets;
-       [--sp] = r1;
-       call ___raw_bit_toggle_asm
-       r1 = [sp++];
-       r2 = 1;
-       r2 <<= r1;
-       r0 = r0 & r2;
-       cc = r0 == 0;
-       if cc jump 1f
-       r0 = 1;
-1:
-       rets = [sp++];
-       rts;
-ENDPROC(___raw_bit_test_toggle_asm)
-
-/*
- * r0 = ptr
- * r1 = bitnr
- *
- * Test a bit in a 32bit word and return its value.
- * We need this on this architecture in order to invalidate
- * the local cache before testing.
- *
- * Clobbers: r3:0, p1:0
- */
-ENTRY(___raw_bit_test_asm)
-       r2 = r1;
-       r1 = 1;
-       r1 <<= r2;
-       jump ___raw_atomic_test_asm
-ENDPROC(___raw_bit_test_asm)
-
-/*
- * r0 = ptr
- *
- * Fetch and return an uncached 32bit value.
- *
- * Clobbers: r2:0, p1:0
- */
-ENTRY(___raw_uncached_fetch_asm)
-       p1 = r0;
-       r1 = -L1_CACHE_BYTES;
-       r1 = r0 & r1;
-       p0 = r1;
-       /* flush core internal write buffer before invalidate dcache */
-       CSYNC(r2);
-       flushinv[p0];
-       SSYNC(r2);
-       r0 = [p1];
-       rts;
-ENDPROC(___raw_uncached_fetch_asm)