atomic_arm.h

Go to the documentation of this file.
00001 /* 
00002  * Copyright (C) 2006 iptelorg GmbH
00003  *
00004  * Permission to use, copy, modify, and distribute this software for any
00005  * purpose with or without fee is hereby granted, provided that the above
00006  * copyright notice and this permission notice appear in all copies.
00007  *
00008  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
00009  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
00010  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
00011  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
00012  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
00013  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
00014  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
00015  */
00016 
00031 /* 
00032  * History:
00033  * --------
00034  *  2006-03-31  created by andrei
00035  *  2007-05-10  added atomic_add and atomic_cmpxchg (andrei)
00036  *  2007-05-29  added membar_depends(), membar_*_atomic_op and
00037  *                membar_*_atomic_setget (andrei)
00038  */
00039 
00040 
00041 #ifndef _atomic_arm_h
00042 #define _atomic_arm_h
00043 
00044 
00045 
00046 
00047 #ifdef NOSMP
00048 #define HAVE_ASM_INLINE_MEMBAR
00049 #define membar() asm volatile ("" : : : "memory") /* gcc do not cache barrier*/
00050 #define membar_read()  membar()
00051 #define membar_write() membar()
00052 #define membar_depends()   do {} while(0) /* really empty, not even a cc bar.*/
00053 /* lock barriers: empty, not needed for NOSMP; the lock/unlock should already
00054  * contain gcc barriers*/
00055 #define membar_enter_lock() do {} while(0)
00056 #define membar_leave_lock() do {} while(0)
00057 /* membars after or before atomic_ops or atomic_setget -> use these or
00058  *  mb_<atomic_op_name>() if you need a memory barrier in one of these
00059  *  situations (on some archs where the atomic operations imply memory
00060  *   barriers is better to use atomic_op_x(); membar_atomic_op() then
00061  *    atomic_op_x(); membar()) */
00062 #define membar_atomic_op()                              membar()
00063 #define membar_atomic_setget()                  membar()
00064 #define membar_write_atomic_op()                membar_write()
00065 #define membar_write_atomic_setget()    membar_write()
00066 #define membar_read_atomic_op()                 membar_read()
00067 #define membar_read_atomic_setget()             membar_read()
00068 #else /* SMP */
00069 #warning SMP not supported for arm atomic ops, try compiling with -DNOSMP
00070 /* fall back to default lock based barriers (don't define HAVE_ASM...) */
00071 #endif /* NOSMP */
00072 
00073 
00074 #ifdef __CPU_arm6
00075 
00076 
00077 #define HAVE_ASM_INLINE_ATOMIC_OPS
00078 
00079 /* hack to get some membars */
00080 #ifndef NOSMP
00081 #include "atomic_unknown.h"
00082 #endif
00083 
00084 /* main asm block 
00085  *  use %0 as input and write the output in %1*/
00086 #define ATOMIC_ASM_OP(op) \
00087                         "1:   ldrex %0, [%3] \n\t" \
00088                         "     " op "\n\t" \
00089                         "     strex %0, %1, [%3] \n\t" \
00090                         "     cmp %0, #0 \n\t" \
00091                         "     bne 1b \n\t"
00092 
00093 /* same as above but writes %4 instead of %1, and %0 will contain 
00094  * the prev. val*/
00095 #define ATOMIC_ASM_OP2(op) \
00096                         "1:   ldrex %0, [%3] \n\t" \
00097                         "     " op "\n\t" \
00098                         "     strex %1, %4, [%3] \n\t" \
00099                         "     cmp %1, #0 \n\t" \
00100                         "     bne 1b \n\t"
00101 
00102 /* no extra param, %0 contains *var, %1 should contain the result */
00103 #define ATOMIC_FUNC_DECL(NAME, OP, P_TYPE, RET_TYPE, RET_EXPR) \
00104         inline static RET_TYPE atomic_##NAME##_##P_TYPE (volatile P_TYPE *var) \
00105         { \
00106                 P_TYPE ret, tmp; \
00107                 asm volatile( \
00108                         ATOMIC_ASM_OP(OP) \
00109                         : "=&r"(tmp), "=&r"(ret), "=m"(*var) : "r"(var)  : "cc" \
00110                         ); \
00111                 return RET_EXPR; \
00112         }
00113 
00114 /* one extra param in %4 */
00115 #define ATOMIC_FUNC_DECL1(NAME, OP, P_TYPE, RET_TYPE, RET_EXPR) \
00116         inline static RET_TYPE atomic_##NAME##_##P_TYPE (volatile P_TYPE *var, \
00117                                                                                                                 P_TYPE v) \
00118         { \
00119                 P_TYPE ret, tmp; \
00120                 asm volatile( \
00121                         ATOMIC_ASM_OP(OP) \
00122                         : "=&r"(tmp), "=&r"(ret), "=m"(*var) : "r"(var), "r"(v) : "cc" \
00123                         ); \
00124                 return RET_EXPR; \
00125         }
00126 
00127 
00128 /* as above, but %4 should contain the result, and %0 is returned*/
00129 #define ATOMIC_FUNC_DECL2(NAME, OP, P_TYPE, RET_TYPE, RET_EXPR) \
00130         inline static RET_TYPE atomic_##NAME##_##P_TYPE (volatile P_TYPE *var, \
00131                                                                                                                 P_TYPE v) \
00132         { \
00133                 P_TYPE ret, tmp; \
00134                 asm volatile( \
00135                         ATOMIC_ASM_OP2(OP) \
00136                         : "=&r"(ret), "=&r"(tmp), "=m"(*var) : "r"(var), "r"(v) : "cc" \
00137                         ); \
00138                 return RET_EXPR; \
00139         }
00140 
00141 
00142 #define ATOMIC_XCHG_DECL(NAME, P_TYPE) \
00143         inline static P_TYPE atomic_##NAME##_##P_TYPE (volatile P_TYPE *var, \
00144                                                                                                                 P_TYPE v ) \
00145         { \
00146                 P_TYPE ret; \
00147                 asm volatile( \
00148                         "     swp %0, %2, [%3] \n\t" \
00149                         : "=&r"(ret),  "=m"(*var) :\
00150                                 "r"(v), "r"(var) \
00151                         ); \
00152                 return ret; \
00153         }
00154 
00155 
00156 /* cmpxchg: %5=old, %4=new_v, %3=var
00157  * if (*var==old) *var=new_v
00158  * returns the original *var (can be used to check if it succeeded: 
00159  *  if old==cmpxchg(var, old, new_v) -> success
00160  */
00161 #define ATOMIC_CMPXCHG_DECL(NAME, P_TYPE) \
00162         inline static P_TYPE atomic_##NAME##_##P_TYPE (volatile P_TYPE *var, \
00163                                                                                                                 P_TYPE old, \
00164                                                                                                                 P_TYPE new_v) \
00165         { \
00166                 P_TYPE ret, tmp; \
00167                 asm volatile( \
00168                         "1:   ldrex %0, [%3] \n\t" \
00169                         "     cmp %0, %5 \n\t" \
00170                         "     strexeq %1, %4, [%3] \n\t" \
00171                         "     cmp %1, #0 \n\t" \
00172                         "     bne 1b \n\t" \
00173                         /* strexeq is exec. only if cmp was successful \
00174                          * => if not successful %1 is not changed and remains 0 */ \
00175                         : "=&r"(ret), "=&r"(tmp), "=m"(*var) :\
00176                                 "r"(var), "r"(new_v), "r"(old), "1"(0) : "cc" \
00177                         ); \
00178                 return ret; \
00179         }
00180 
00181 
00182 
00183 ATOMIC_FUNC_DECL(inc,      "add  %1, %0, #1", int, void, /* no return */ )
00184 ATOMIC_FUNC_DECL(dec,      "sub  %1, %0, #1", int, void, /* no return */ )
00185 ATOMIC_FUNC_DECL1(and,     "and  %1, %0, %4", int, void, /* no return */ )
00186 ATOMIC_FUNC_DECL1(or,      "orr  %1, %0, %4", int, void, /* no return */ )
00187 ATOMIC_FUNC_DECL(inc_and_test, "add  %1, %0, #1", int, int, ret==0 )
00188 ATOMIC_FUNC_DECL(dec_and_test, "sub  %1, %0, #1", int, int, ret==0 )
00189 //ATOMIC_FUNC_DECL2(get_and_set, /* no extra op needed */ , int, int,  ret)
00190 ATOMIC_XCHG_DECL(get_and_set, int)
00191 ATOMIC_CMPXCHG_DECL(cmpxchg, int)
00192 ATOMIC_FUNC_DECL1(add,     "add  %1, %0, %4", int, int, ret )
00193 
00194 ATOMIC_FUNC_DECL(inc,      "add  %1, %0, #1", long, void, /* no return */ )
00195 ATOMIC_FUNC_DECL(dec,      "sub  %1, %0, #1", long, void, /* no return */ )
00196 ATOMIC_FUNC_DECL1(and,     "and  %1, %0, %4", long, void, /* no return */ )
00197 ATOMIC_FUNC_DECL1(or,      "orr  %1, %0, %4", long, void, /* no return */ )
00198 ATOMIC_FUNC_DECL(inc_and_test, "add  %1, %0, #1", long, long, ret==0 )
00199 ATOMIC_FUNC_DECL(dec_and_test, "sub  %1, %0, #1", long, long, ret==0 )
00200 //ATOMIC_FUNC_DECL2(get_and_set, /* no extra op needed */ , long, long,  ret)
00201 ATOMIC_XCHG_DECL(get_and_set, long)
00202 ATOMIC_CMPXCHG_DECL(cmpxchg, long)
00203 ATOMIC_FUNC_DECL1(add,     "add  %1, %0, %4", long, long, ret )
00204 
00205 #define atomic_inc(var) atomic_inc_int(&(var)->val)
00206 #define atomic_dec(var) atomic_dec_int(&(var)->val)
00207 #define atomic_and(var, mask) atomic_and_int(&(var)->val, (mask))
00208 #define atomic_or(var, mask)  atomic_or_int(&(var)->val, (mask))
00209 #define atomic_dec_and_test(var) atomic_dec_and_test_int(&(var)->val)
00210 #define atomic_inc_and_test(var) atomic_inc_and_test_int(&(var)->val)
00211 #define atomic_get_and_set(var, i) atomic_get_and_set_int(&(var)->val, i)
00212 #define atomic_cmpxchg(var, old, new_v) \
00213         atomic_cmpxchg_int(&(var)->val, old, new_v)
00214 #define atomic_add(var, v) atomic_add_int(&(var)->val, (v))
00215 
00216 
00217 /* with integrated membar */
00218 
00219 #define mb_atomic_set_int(v, i) \
00220         do{ \
00221                 membar(); \
00222                 atomic_set_int(v, i); \
00223         }while(0)
00224 
00225 
00226 
00227 inline static int mb_atomic_get_int(volatile int* v)
00228 {
00229         membar();
00230         return atomic_get_int(v);
00231 }
00232 
00233 
00234 #define mb_atomic_inc_int(v) \
00235         do{ \
00236                 membar(); \
00237                 atomic_inc_int(v); \
00238         }while(0)
00239 
00240 #define mb_atomic_dec_int(v) \
00241         do{ \
00242                 membar(); \
00243                 atomic_dec_int(v); \
00244         }while(0)
00245 
00246 #define mb_atomic_or_int(v, m) \
00247         do{ \
00248                 membar(); \
00249                 atomic_or_int(v, m); \
00250         }while(0)
00251 
00252 #define mb_atomic_and_int(v, m) \
00253         do{ \
00254                 membar(); \
00255                 atomic_and_int(v, m); \
00256         }while(0)
00257 
00258 inline static int mb_atomic_inc_and_test_int(volatile int* v)
00259 {
00260         membar();
00261         return atomic_inc_and_test_int(v);
00262 }
00263 
00264 inline static int mb_atomic_dec_and_test_int(volatile int* v)
00265 {
00266         membar();
00267         return atomic_dec_and_test_int(v);
00268 }
00269 
00270 
00271 inline static int mb_atomic_get_and_set_int(volatile int* v, int i)
00272 {
00273         membar();
00274         return atomic_get_and_set_int(v, i);
00275 }
00276 
00277 inline static int mb_atomic_cmpxchg_int(volatile int* v, int o, int n)
00278 {
00279         membar();
00280         return atomic_cmpxchg_int(v, o, n);
00281 }
00282 
00283 inline static int mb_atomic_add_int(volatile int* v, int i)
00284 {
00285         membar();
00286         return atomic_add_int(v, i);
00287 }
00288 
00289 
00290 
00291 #define mb_atomic_set_long(v, i) \
00292         do{ \
00293                 membar(); \
00294                 atomic_set_long(v, i); \
00295         }while(0)
00296 
00297 
00298 
00299 inline static long mb_atomic_get_long(volatile long* v)
00300 {
00301         membar();
00302         return atomic_get_long(v);
00303 }
00304 
00305 
00306 #define mb_atomic_inc_long(v) \
00307         do{ \
00308                 membar(); \
00309                 atomic_inc_long(v); \
00310         }while(0)
00311 
00312 
00313 #define mb_atomic_dec_long(v) \
00314         do{ \
00315                 membar(); \
00316                 atomic_dec_long(v); \
00317         }while(0)
00318 
00319 #define mb_atomic_or_long(v, m) \
00320         do{ \
00321                 membar(); \
00322                 atomic_or_long(v, m); \
00323         }while(0)
00324 
00325 #define mb_atomic_and_long(v, m) \
00326         do{ \
00327                 membar(); \
00328                 atomic_and_long(v, m); \
00329         }while(0)
00330 
00331 inline static long mb_atomic_inc_and_test_long(volatile long* v)
00332 {
00333         membar();
00334         return atomic_inc_and_test_long(v);
00335 }
00336 
00337 inline static long mb_atomic_dec_and_test_long(volatile long* v)
00338 {
00339         membar();
00340         return atomic_dec_and_test_long(v);
00341 }
00342 
00343 
00344 inline static long mb_atomic_get_and_set_long(volatile long* v, long l)
00345 {
00346         membar();
00347         return atomic_get_and_set_long(v, l);
00348 }
00349 
00350 inline static long mb_atomic_cmpxchg_long(volatile long* v, long o, long n)
00351 {
00352         membar();
00353         return atomic_cmpxchg_long(v, o, n);
00354 }
00355 
00356 inline static long mb_atomic_add_long(volatile long* v, long i)
00357 {
00358         membar();
00359         return atomic_add_long(v, i);
00360 }
00361 
00362 #define mb_atomic_inc(var) mb_atomic_inc_int(&(var)->val)
00363 #define mb_atomic_dec(var) mb_atomic_dec_int(&(var)->val)
00364 #define mb_atomic_and(var, mask) mb_atomic_and_int(&(var)->val, (mask))
00365 #define mb_atomic_or(var, mask)  mb_atomic_or_int(&(var)->val, (mask))
00366 #define mb_atomic_dec_and_test(var) mb_atomic_dec_and_test_int(&(var)->val)
00367 #define mb_atomic_inc_and_test(var) mb_atomic_inc_and_test_int(&(var)->val)
00368 #define mb_atomic_get(var)      mb_atomic_get_int(&(var)->val)
00369 #define mb_atomic_set(var, i)   mb_atomic_set_int(&(var)->val, i)
00370 #define mb_atomic_get_and_set(var, i) mb_atomic_get_and_set_int(&(var)->val, i)
00371 #define mb_atomic_cmpxchg(var, o, n) mb_atomic_cmpxchg_int(&(var)->val, o, n)
00372 #define mb_atomic_add(var, i) mb_atomic_add_int(&(var)->val, i)
00373 
00374 
00375 
00376 #else /* ! __CPU_arm6 => __CPU_arm */
00377 
00378 /* no atomic ops for v <6 , only SWP supported
00379  * Atomic ops could be implemented if one bit is sacrificed and used like
00380  *  a spinlock, e.g:
00381  *          mov %r0, #0x1
00382  *       1: swp %r1, %r0, [&atomic_val]
00383  *          if (%r1 & 0x1) goto 1 # wait if first bit is 1 
00384  *          %r1>>=1  # restore the value (only 31 bits can be used )
00385  *          %r1=op (%r1, ...) 
00386  *          %r1<<=1   # shift back the value, such that the first bit is 0
00387  *          str %r1, [&atomic_val]  # write the value
00388  *
00389  * However only 31 bits could be used (=> atomic_*_int and atomic_*_long
00390  *  would still have to be lock based, since in these cases we guarantee all 
00391  *  the bits)  and I'm not sure there would be a significant performance
00392  *  benefit when compared with the fallback lock based version:
00393  *    lock(atomic_lock);
00394  *    atomic_val=op(*atomic_val, ...)
00395  *    unlock(atomic_lock);
00396  *
00397  *  -- andrei
00398  */
00399 
00400 #endif /* __CPU_arm6 */
00401 
00402 
00403 #endif