arch/arc/include/asm/bitops.h

   1 /*
   2  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
   3  *
   4  * This program is free software; you can redistribute it and/or modify
   5  * it under the terms of the GNU General Public License version 2 as
   6  * published by the Free Software Foundation.
   7  */
   8
   9 #ifndef _ASM_BITOPS_H
  10 #define _ASM_BITOPS_H
  11
  12 #ifndef _LINUX_BITOPS_H
  13 #error only <linux/bitops.h> can be included directly
  14 #endif
  15
  16 #ifndef __ASSEMBLY__
  17
  18 #include <linux/types.h>
  19 #include <linux/compiler.h>
  20 #include <asm/barrier.h>
  21 #ifndef CONFIG_ARC_HAS_LLSC
  22 #include <asm/smp.h>
  23 #endif
  24
  25 #if defined(CONFIG_ARC_HAS_LLSC)
  26
  27 /*
  28  * Hardware assisted Atomic-R-M-W
  29  */
  30
  31 #define BIT_OP(op, c_op, asm_op)                                        \
  32 static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
  33 {                                                                       \
  34         unsigned int temp;                                              \
  35                                                                         \
  36         m += nr >> 5;                                                   \
  37                                                                         \
  38         /*                                                              \
  39          * ARC ISA micro-optimization:                                  \
  40          *                                                              \
  41          * Instructions dealing with bitpos only consider lower 5 bits  \
  42          * e.g (x << 33) is handled like (x << 1) by ASL instruction    \
  43          *  (mem pointer still needs adjustment to point to next word)  \
  44          *                                                              \
  45          * Hence the masking to clamp @nr arg can be elided in general. \
  46          *                                                              \
  47          * However if @nr is a constant (above assumed in a register),  \
  48          * and greater than 31, gcc can optimize away (x << 33) to 0,   \
  49          * as overflow, given the 32-bit ISA. Thus masking needs to be  \
  50          * done for const @nr, but no code is generated due to gcc      \
  51          * const prop.                                                  \
  52          */                                                             \
  53         nr &= 0x1f;                                                     \
  54                                                                         \
  55         __asm__ __volatile__(                                           \
  56         "1:     llock       %0, [%1]            \n"                     \
  57         "       " #asm_op " %0, %0, %2  \n"                             \
  58         "       scond       %0, [%1]            \n"                     \
  59         "       bnz         1b                  \n"                     \
  60         : "=&r"(temp)   /* Early clobber, to prevent reg reuse */       \
  61         : "r"(m),       /* Not "m": llock only supports reg direct addr mode */ \
  62           "ir"(nr)                                                      \
  63         : "cc");                                                        \
  64 }
  65
  66 /*
  67  * Semantically:
  68  *    Test the bit
  69  *    if clear
  70  *        set it and return 0 (old value)
  71  *    else
  72  *        return 1 (old value).
  73  *
  74  * Since ARC lacks a equivalent h/w primitive, the bit is set unconditionally
  75  * and the old value of bit is returned
  76  */
  77 #define TEST_N_BIT_OP(op, c_op, asm_op)                                 \
  78 static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
  79 {                                                                       \
  80         unsigned long old, temp;                                        \
  81                                                                         \
  82         m += nr >> 5;                                                   \
  83                                                                         \
  84         nr &= 0x1f;                                                     \
  85                                                                         \
  86         /*                                                              \
  87          * Explicit full memory barrier needed before/after as          \
  88          * LLOCK/SCOND themselves don't provide any such smenatic       \
  89          */                                                             \
  90         smp_mb();                                                       \
  91                                                                         \
  92         __asm__ __volatile__(                                           \
  93         "1:     llock       %0, [%2]    \n"                             \
  94         "       " #asm_op " %1, %0, %3  \n"                             \
  95         "       scond       %1, [%2]    \n"                             \
  96         "       bnz         1b          \n"                             \
  97         : "=&r"(old), "=&r"(temp)                                       \
  98         : "r"(m), "ir"(nr)                                              \
  99         : "cc");                                                        \
 100                                                                         \
 101         smp_mb();                                                       \
 102                                                                         \
 103         return (old & (1 << nr)) != 0;                                  \
 104 }
 105
 106 #else   /* !CONFIG_ARC_HAS_LLSC */
 107
 108 /*
 109  * Non hardware assisted Atomic-R-M-W
 110  * Locking would change to irq-disabling only (UP) and spinlocks (SMP)
 111  *
 112  * There's "significant" micro-optimization in writing our own variants of
 113  * bitops (over generic variants)
 114  *
 115  * (1) The generic APIs have "signed" @nr while we have it "unsigned"
 116  *     This avoids extra code to be generated for pointer arithmatic, since
 117  *     is "not sure" that index is NOT -ve
 118  * (2) Utilize the fact that ARCompact bit fidding insn (BSET/BCLR/ASL) etc
 119  *     only consider bottom 5 bits of @nr, so NO need to mask them off.
 120  *     (GCC Quirk: however for constant @nr we still need to do the masking
 121  *             at compile time)
 122  */
 123
 124 #define BIT_OP(op, c_op, asm_op)                                        \
 125 static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
 126 {                                                                       \
 127         unsigned long temp, flags;                                      \
 128         m += nr >> 5;                                                   \
 129                                                                         \
 130         /*                                                              \
 131          * spin lock/unlock provide the needed smp_mb() before/after    \
 132          */                                                             \
 133         bitops_lock(flags);                                             \
 134                                                                         \
 135         temp = *m;                                                      \
 136         *m = temp c_op (1UL << (nr & 0x1f));                                    \
 137                                                                         \
 138         bitops_unlock(flags);                                           \
 139 }
 140
 141 #define TEST_N_BIT_OP(op, c_op, asm_op)                                 \
 142 static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
 143 {                                                                       \
 144         unsigned long old, flags;                                       \
 145         m += nr >> 5;                                                   \
 146                                                                         \
 147         bitops_lock(flags);                                             \
 148                                                                         \
 149         old = *m;                                                       \
 150         *m = old c_op (1UL << (nr & 0x1f));                             \
 151                                                                         \
 152         bitops_unlock(flags);                                           \
 153                                                                         \
 154         return (old & (1UL << (nr & 0x1f))) != 0;                       \
 155 }
 156
 157 #endif /* CONFIG_ARC_HAS_LLSC */
 158
 159 /***************************************
 160  * Non atomic variants
 161  **************************************/
 162
 163 #define __BIT_OP(op, c_op, asm_op)                                      \
 164 static inline void __##op##_bit(unsigned long nr, volatile unsigned long *m)    \
 165 {                                                                       \
 166         unsigned long temp;                                             \
 167         m += nr >> 5;                                                   \
 168                                                                         \
 169         temp = *m;                                                      \
 170         *m = temp c_op (1UL << (nr & 0x1f));                            \
 171 }
 172
 173 #define __TEST_N_BIT_OP(op, c_op, asm_op)                               \
 174 static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
 175 {                                                                       \
 176         unsigned long old;                                              \
 177         m += nr >> 5;                                                   \
 178                                                                         \
 179         old = *m;                                                       \
 180         *m = old c_op (1UL << (nr & 0x1f));                             \
 181                                                                         \
 182         return (old & (1UL << (nr & 0x1f))) != 0;                       \
 183 }
 184
 185 #define BIT_OPS(op, c_op, asm_op)                                       \
 186                                                                         \
 187         /* set_bit(), clear_bit(), change_bit() */                      \
 188         BIT_OP(op, c_op, asm_op)                                        \
 189                                                                         \
 190         /* test_and_set_bit(), test_and_clear_bit(), test_and_change_bit() */\
 191         TEST_N_BIT_OP(op, c_op, asm_op)                                 \
 192                                                                         \
 193         /* __set_bit(), __clear_bit(), __change_bit() */                \
 194         __BIT_OP(op, c_op, asm_op)                                      \
 195                                                                         \
 196         /* __test_and_set_bit(), __test_and_clear_bit(), __test_and_change_bit() */\
 197         __TEST_N_BIT_OP(op, c_op, asm_op)
 198
 199 BIT_OPS(set, |, bset)
 200 BIT_OPS(clear, & ~, bclr)
 201 BIT_OPS(change, ^, bxor)
 202
 203 /*
 204  * This routine doesn't need to be atomic.
 205  */
 206 static inline int
 207 test_bit(unsigned int nr, const volatile unsigned long *addr)
 208 {
 209         unsigned long mask;
 210
 211         addr += nr >> 5;
 212
 213         mask = 1UL << (nr & 0x1f);
 214
 215         return ((mask & *addr) != 0);
 216 }
 217
 218 #ifdef CONFIG_ISA_ARCOMPACT
 219
 220 /*
 221  * Count the number of zeros, starting from MSB
 222  * Helper for fls( ) friends
 223  * This is a pure count, so (1-32) or (0-31) doesn't apply
 224  * It could be 0 to 32, based on num of 0's in there
 225  * clz(0x8000_0000) = 0, clz(0xFFFF_FFFF)=0, clz(0) = 32, clz(1) = 31
 226  */
 227 static inline __attribute__ ((const)) int clz(unsigned int x)
 228 {
 229         unsigned int res;
 230
 231         __asm__ __volatile__(
 232         "       norm.f  %0, %1          \n"
 233         "       mov.n   %0, 0           \n"
 234         "       add.p   %0, %0, 1       \n"
 235         : "=r"(res)
 236         : "r"(x)
 237         : "cc");
 238
 239         return res;
 240 }
 241
 242 static inline int constant_fls(int x)
 243 {
 244         int r = 32;
 245
 246         if (!x)
 247                 return 0;
 248         if (!(x & 0xffff0000u)) {
 249                 x <<= 16;
 250                 r -= 16;
 251         }
 252         if (!(x & 0xff000000u)) {
 253                 x <<= 8;
 254                 r -= 8;
 255         }
 256         if (!(x & 0xf0000000u)) {
 257                 x <<= 4;
 258                 r -= 4;
 259         }
 260         if (!(x & 0xc0000000u)) {
 261                 x <<= 2;
 262                 r -= 2;
 263         }
 264         if (!(x & 0x80000000u)) {
 265                 x <<= 1;
 266                 r -= 1;
 267         }
 268         return r;
 269 }
 270
 271 /*
 272  * fls = Find Last Set in word
 273  * @result: [1-32]
 274  * fls(1) = 1, fls(0x80000000) = 32, fls(0) = 0
 275  */
 276 static inline __attribute__ ((const)) int fls(unsigned long x)
 277 {
 278         if (__builtin_constant_p(x))
 279                return constant_fls(x);
 280
 281         return 32 - clz(x);
 282 }
 283
 284 /*
 285  * __fls: Similar to fls, but zero based (0-31)
 286  */
 287 static inline __attribute__ ((const)) int __fls(unsigned long x)
 288 {
 289         if (!x)
 290                 return 0;
 291         else
 292                 return fls(x) - 1;
 293 }
 294
 295 /*
 296  * ffs = Find First Set in word (LSB to MSB)
 297  * @result: [1-32], 0 if all 0's
 298  */
 299 #define ffs(x)  ({ unsigned long __t = (x); fls(__t & -__t); })
 300
 301 /*
 302  * __ffs: Similar to ffs, but zero based (0-31)
 303  */
 304 static inline __attribute__ ((const)) int __ffs(unsigned long word)
 305 {
 306         if (!word)
 307                 return word;
 308
 309         return ffs(word) - 1;
 310 }
 311
 312 #else   /* CONFIG_ISA_ARCV2 */
 313
 314 /*
 315  * fls = Find Last Set in word
 316  * @result: [1-32]
 317  * fls(1) = 1, fls(0x80000000) = 32, fls(0) = 0
 318  */
 319 static inline __attribute__ ((const)) int fls(unsigned long x)
 320 {
 321         int n;
 322
 323         asm volatile(
 324         "       fls.f   %0, %1          \n"  /* 0:31; 0(Z) if src 0 */
 325         "       add.nz  %0, %0, 1       \n"  /* 0:31 -> 1:32 */
 326         : "=r"(n)       /* Early clobber not needed */
 327         : "r"(x)
 328         : "cc");
 329
 330         return n;
 331 }
 332
 333 /*
 334  * __fls: Similar to fls, but zero based (0-31). Also 0 if no bit set
 335  */
 336 static inline __attribute__ ((const)) int __fls(unsigned long x)
 337 {
 338         /* FLS insn has exactly same semantics as the API */
 339         return  __builtin_arc_fls(x);
 340 }
 341
 342 /*
 343  * ffs = Find First Set in word (LSB to MSB)
 344  * @result: [1-32], 0 if all 0's
 345  */
 346 static inline __attribute__ ((const)) int ffs(unsigned long x)
 347 {
 348         int n;
 349
 350         asm volatile(
 351         "       ffs.f   %0, %1          \n"  /* 0:31; 31(Z) if src 0 */
 352         "       add.nz  %0, %0, 1       \n"  /* 0:31 -> 1:32 */
 353         "       mov.z   %0, 0           \n"  /* 31(Z)-> 0 */
 354         : "=r"(n)       /* Early clobber not needed */
 355         : "r"(x)
 356         : "cc");
 357
 358         return n;
 359 }
 360
 361 /*
 362  * __ffs: Similar to ffs, but zero based (0-31)
 363  */
 364 static inline __attribute__ ((const)) int __ffs(unsigned long x)
 365 {
 366         int n;
 367
 368         asm volatile(
 369         "       ffs.f   %0, %1          \n"  /* 0:31; 31(Z) if src 0 */
 370         "       mov.z   %0, 0           \n"  /* 31(Z)-> 0 */
 371         : "=r"(n)
 372         : "r"(x)
 373         : "cc");
 374
 375         return n;
 376
 377 }
 378
 379 #endif  /* CONFIG_ISA_ARCOMPACT */
 380
 381 /*
 382  * ffz = Find First Zero in word.
 383  * @return:[0-31], 32 if all 1's
 384  */
 385 #define ffz(x)  __ffs(~(x))
 386
 387 #include <asm-generic/bitops/hweight.h>
 388 #include <asm-generic/bitops/fls64.h>
 389 #include <asm-generic/bitops/sched.h>
 390 #include <asm-generic/bitops/lock.h>
 391
 392 #include <asm-generic/bitops/find.h>
 393 #include <asm-generic/bitops/le.h>
 394 #include <asm-generic/bitops/ext2-atomic-setbit.h>
 395
 396 #endif /* !__ASSEMBLY__ */
 397
 398 #endif