atomic_x86.h

Go to the documentation of this file.
00001 /* 
00002  * Copyright (C) 2006 iptelorg GmbH
00003  *
00004  * Permission to use, copy, modify, and distribute this software for any
00005  * purpose with or without fee is hereby granted, provided that the above
00006  * copyright notice and this permission notice appear in all copies.
00007  *
00008  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
00009  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
00010  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
00011  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
00012  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
00013  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
00014  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
00015  */
00016 
00034 /* 
00035  * History:
00036  * --------
00037  *  2006-03-08  created by andrei
00038  *  2007-05-07  added cmpxchg (andrei)
00039  *  2007-05-08  added atomic_add (andrei)
00040  *  2007-05-29  added membar_depends(), membar_*_atomic_op and
00041  *                membar_*_atomic_setget (andrei)
00042  */
00043 
00044 #ifndef _atomic_x86_h
00045 #define _atomic_x86_h
00046 
00047 #define HAVE_ASM_INLINE_ATOMIC_OPS
00048 #define HAVE_ASM_INLINE_MEMBAR
00049 
00050 #ifdef NOSMP
00051 #define __LOCK_PREF 
00052 #else
00053 #define __LOCK_PREF "lock ;"
00054 #endif
00055 
00056 
00057 /* memory barriers */
00058 
00059 #ifdef NOSMP
00060 
00061 #define membar()        asm volatile ("" : : : "memory")
00062 #define membar_read()   membar()
00063 #define membar_write()  membar()
00064 #define membar_depends()  do {} while(0) /* really empty, not even a cc bar. */
00065 /* lock barrriers: empty, not needed for NOSMP; the lock/unlock should already
00066  * contain gcc barriers*/
00067 #define membar_enter_lock() do {} while(0)
00068 #define membar_leave_lock() do {} while(0)
00069 /* membars after or before atomic_ops or atomic_setget -> use these or
00070  *  mb_<atomic_op_name>() if you need a memory barrier in one of these
00071  *  situations (on some archs where the atomic operations imply memory
00072  *   barriers is better to use atomic_op_x(); membar_atomic_op() then
00073  *    atomic_op_x(); membar()) */
00074 #define membar_atomic_op()                              do {} while(0)
00075 #define membar_atomic_setget()                  membar()
00076 #define membar_write_atomic_op()                do {} while(0)
00077 #define membar_write_atomic_setget()    membar_write()
00078 #define membar_read_atomic_op()                 do {} while(0)
00079 #define membar_read_atomic_setget()             membar_read()
00080 
00081 #else
00082 
00083 /* although most x86 do stores in order, we're playing it safe and use
00084  *  oostore ready write barriers */
00085 #define X86_OOSTORE 
00086 
00087 #ifdef __CPU_x86_64
00088 /*
00089 #define membar() \
00090         asm volatile( \
00091                                         " lock; addq $0, 0(%%rsp) \n\t " \
00092                                         : : : "memory" \
00093                                 ) 
00094 */
00095 #define membar()                asm volatile( " mfence \n\t " : : : "memory" )
00096 #define membar_read()   asm volatile( " lfence \n\t " : : : "memory" )
00097 #ifdef X86_64_OOSTORE
00098 #define membar_write()  asm volatile( " sfence \n\t " : : : "memory" )
00099 #else
00100 #define membar_write()  asm volatile ("" : : : "memory") /* gcc don't cache*/
00101 #endif /* X86_OOSTORE */
00102 
00103 #else /* ! __CPU_x86_64  => __CPU_i386*/
00104 /* membar: lfence, mfence, sfence available only on newer cpus, so for now
00105  * stick to lock addl */
00106 #define membar() asm volatile(" lock; addl $0, 0(%%esp) \n\t " : : : "memory" )
00107 #define membar_read()   membar()
00108 #ifdef X86_OOSTORE
00109 /* out of order store version */
00110 #define membar_write()  membar()
00111 #else
00112 /* no oostore, most x86 cpus => do nothing, just a gcc do_not_cache barrier*/
00113 #define membar_write()  asm volatile ("" : : : "memory")
00114 #endif /* X86_OOSTORE */
00115 
00116 #endif /* __CPU_x86_64 */
00117 
00118 #define membar_depends()  do {} while(0) /* really empty, not even a cc bar. */
00119 /* lock barrriers: empty, not needed on x86 or x86_64 (atomic ops already
00120  *  force the barriers if needed); the lock/unlock should already contain the 
00121  *  gcc do_not_cache barriers*/
00122 #define membar_enter_lock() do {} while(0)
00123 #define membar_leave_lock() do {} while(0)
00124 /* membars after or before atomic_ops or atomic_setget -> use these or
00125  *  mb_<atomic_op_name>() if you need a memory barrier in one of these
00126  *  situations (on some archs where the atomic operations imply memory
00127  *   barriers is better to use atomic_op_x(); membar_atomic_op() then
00128  *    atomic_op_x(); membar()) */
00129 #define membar_atomic_op()                              do {} while(0)
00130 #define membar_atomic_setget()                  membar()
00131 #define membar_write_atomic_op()                do {} while(0)
00132 #define membar_write_atomic_setget()    membar_write()
00133 #define membar_read_atomic_op()                 do {} while(0)
00134 #define membar_read_atomic_setget()             membar_read()
00135 
00136 
00137 #endif /* NOSMP */
00138 
00139 /* 1 param atomic f */
00140 #define ATOMIC_FUNC_DECL1(NAME, OP, P_TYPE) \
00141         inline static void atomic_##NAME##_##P_TYPE (volatile P_TYPE* var) \
00142         { \
00143                 asm volatile( \
00144                                 __LOCK_PREF " " OP " \n\t" \
00145                                 : "=m"(*var) : "m"(*var) : "cc", "memory" \
00146                                 ); \
00147         }
00148 
00149 /* 2 params atomic f */
00150 #define ATOMIC_FUNC_DECL2(NAME, OP, P_TYPE) \
00151         inline static void atomic_##NAME##_##P_TYPE (volatile P_TYPE* var, \
00152                                                             P_TYPE v) \
00153         { \
00154                 asm volatile( \
00155                                 __LOCK_PREF " " OP " \n\t" \
00156                                 : "=m"(*var) : "ri" (v), "m"(*var) : "cc", "memory" \
00157                                 ); \
00158         }
00159 
00160 #if defined __GNUC__ &&  __GNUC__ < 3 && __GNUC_MINOR__ < 9
00161 /* gcc version < 2.9 */
00162 #define ATOMIC_FUNC_XCHG(NAME, OP, TYPE) \
00163         inline static TYPE atomic_##NAME##_##TYPE(volatile TYPE* var, TYPE v) \
00164 { \
00165         asm volatile( \
00166                         OP " \n\t" \
00167                         : "=q"(v), "=m"(*var) :"m"(*var), "0"(v) : "memory" \
00168                         ); \
00169         return v; \
00170 }
00171 #else
00172 #define ATOMIC_FUNC_XCHG(NAME, OP, TYPE) \
00173         inline static TYPE atomic_##NAME##_##TYPE(volatile TYPE* var, TYPE v) \
00174 { \
00175         asm volatile( \
00176                         OP " \n\t" \
00177                         : "+q"(v), "=m"(*var) : "m"(*var) : "memory" \
00178                         ); \
00179         return v; \
00180 }
00181 #endif /* gcc & gcc version < 2.9 */
00182 
00183 /* returns a value, 1 param */
00184 #define ATOMIC_FUNC_TEST(NAME, OP, P_TYPE, RET_TYPE) \
00185         inline static RET_TYPE atomic_##NAME##_##P_TYPE(volatile P_TYPE* var) \
00186         { \
00187                 char ret; \
00188                 asm volatile( \
00189                                 __LOCK_PREF " " OP "\n\t" \
00190                                 "setz %1 \n\t" \
00191                                 : "=m" (*var), "=qm"(ret) : "m"(*var) : "cc", "memory" \
00192                                 ); \
00193                 return ret; \
00194         }
00195 
00196 /* returns a value, 3 params (var, old, new)
00197  * The returned value is the value before the xchg:
00198  *  if ret!=old => cmpxchg failed and ret is var's value
00199  *  else  => success and new_v is var's new value */
00200 #define ATOMIC_FUNC_CMPXCHG(NAME, OP, P_TYPE, RET_TYPE) \
00201         inline static RET_TYPE atomic_##NAME##_##P_TYPE(volatile P_TYPE* var, \
00202                                                                                                         P_TYPE old, P_TYPE new_v)\
00203         { \
00204                 P_TYPE ret; \
00205                 asm volatile( \
00206                                 __LOCK_PREF " " OP "\n\t" \
00207                                 : "=a"(ret), "=m" (*var) :\
00208                                         "r"(new_v), "m"(*var), "0"(old):\
00209                                         "cc", "memory" \
00210                                 ); \
00211                 return ret; \
00212         }
00213 
00214 /* similar w/ XCHG but with LOCK prefix, relaxed constraints & diff. return */
00215 #define ATOMIC_FUNC_XADD(NAME, OP, TYPE) \
00216         inline static TYPE atomic_##NAME##_##TYPE(volatile TYPE* var, TYPE v) \
00217 { \
00218         TYPE ret; \
00219         asm volatile( \
00220                         __LOCK_PREF " " OP " \n\t" \
00221                         : "=r"(ret), "=m"(*var) :"m"(*var), "0"(v) : "cc", "memory" \
00222                         ); \
00223         return ret+v; \
00224 }
00225 
00226 ATOMIC_FUNC_DECL1(inc, "incl %0", int)
00227 ATOMIC_FUNC_DECL1(dec, "decl %0", int)
00228 ATOMIC_FUNC_DECL2(and, "andl %1, %0", int)
00229 ATOMIC_FUNC_DECL2(or,  "orl %1, %0", int)
00230 ATOMIC_FUNC_TEST(inc_and_test, "incl %0", int, int)
00231 ATOMIC_FUNC_TEST(dec_and_test, "decl %0", int, int)
00232 ATOMIC_FUNC_XCHG(get_and_set,  "xchgl %1, %0", int)
00233 ATOMIC_FUNC_CMPXCHG(cmpxchg, "cmpxchgl %2, %1", int , int)
00234 ATOMIC_FUNC_XADD(add, "xaddl %0, %1", int) 
00235 #ifdef __CPU_x86_64
00236 ATOMIC_FUNC_DECL1(inc, "incq %0", long)
00237 ATOMIC_FUNC_DECL1(dec, "decq %0", long)
00238 ATOMIC_FUNC_DECL2(and, "andq %1, %0", long)
00239 ATOMIC_FUNC_DECL2(or,  "orq %1, %0", long)
00240 ATOMIC_FUNC_TEST(inc_and_test, "incq %0", long, int)
00241 ATOMIC_FUNC_TEST(dec_and_test, "decq %0", long, int)
00242 ATOMIC_FUNC_XCHG(get_and_set,  "xchgq %1, %0", long)
00243 ATOMIC_FUNC_CMPXCHG(cmpxchg, "cmpxchgq %2, %1", long , long)
00244 ATOMIC_FUNC_XADD(add, "xaddq %0, %1",long) 
00245 #else
00246 ATOMIC_FUNC_DECL1(inc, "incl %0", long)
00247 ATOMIC_FUNC_DECL1(dec, "decl %0", long)
00248 ATOMIC_FUNC_DECL2(and, "andl %1, %0", long)
00249 ATOMIC_FUNC_DECL2(or,  "orl %1, %0", long)
00250 ATOMIC_FUNC_TEST(inc_and_test, "incl %0", long, int)
00251 ATOMIC_FUNC_TEST(dec_and_test, "decl %0", long, int)
00252 ATOMIC_FUNC_XCHG(get_and_set,  "xchgl %1, %0", long)
00253 ATOMIC_FUNC_CMPXCHG(cmpxchg, "cmpxchgl %2, %1", long , long)
00254 ATOMIC_FUNC_XADD(add, "xaddl %0, %1",long) 
00255 #endif
00256 
00257 #define atomic_inc(var) atomic_inc_int(&(var)->val)
00258 #define atomic_dec(var) atomic_dec_int(&(var)->val)
00259 #define atomic_and(var, mask) atomic_and_int(&(var)->val, (mask))
00260 #define atomic_or(var, mask)  atomic_or_int(&(var)->val, (mask))
00261 #define atomic_dec_and_test(var) atomic_dec_and_test_int(&(var)->val)
00262 #define atomic_inc_and_test(var) atomic_inc_and_test_int(&(var)->val)
00263 #define atomic_get_and_set(var, i) atomic_get_and_set_int(&(var)->val, i)
00264 #define atomic_cmpxchg(var, old, newv) \
00265                 atomic_cmpxchg_int(&(var)->val, old, newv)
00266 #define atomic_add(var, v) atomic_add_int(&(var)->val, v)
00267 
00268 
00269 #ifdef NOSMP
00270 
00271 #define mb_atomic_set_int(v, i) \
00272         do{ \
00273                 membar(); atomic_set_int(v, i); \
00274         }while(0)
00275 
00276 #define mb_atomic_set_long(v, i) \
00277         do{ \
00278                 membar(); atomic_set_long(v, i); \
00279         }while(0)
00280 
00281 
00282 
00283 inline static int mb_atomic_get_int(volatile int* v)
00284 {
00285         membar(); return atomic_get_int(v);
00286 }
00287 
00288 inline static long mb_atomic_get_long(volatile long* v)
00289 {
00290         membar(); return atomic_get_long(v);
00291 }
00292 
00293 
00294 #else /* NOSMP */
00295 
00296 
00297 inline static void mb_atomic_set_int(volatile int* v, int i)
00298 {
00299         asm volatile(
00300                         "xchgl %1, %0 \n\t"
00301 #if defined __GNUC__ &&  __GNUC__ < 3 && __GNUC_MINOR__ < 9
00302                         : "=q"(i), "=m"(*v) : "m"(*v), "0"(i) : "memory" 
00303 #else
00304                         : "+q"(i), "=m"(*v) : "m"(*v) : "memory" 
00305 #endif
00306                         );
00307 }
00308 
00309 
00310 inline static void mb_atomic_set_long(volatile long* v, long l)
00311 {
00312         asm volatile(
00313 #ifdef __CPU_x86_64
00314                         "xchgq %1, %0 \n\t"
00315 #else
00316                         "xchgl %1, %0 \n\t"
00317 #endif
00318 #if defined __GNUC__ &&  __GNUC__ < 3 && __GNUC_MINOR__ < 9
00319                         : "=q"(l), "=m"(*v) : "m"(*v), "0"(l) : "memory" 
00320 #else
00321                         : "+q"(l), "=m"(*v) : "m"(*v) : "memory" 
00322 #endif
00323                         );
00324 }
00325 
00326 
00327 inline static int mb_atomic_get_int(volatile int* var)
00328 {
00329         int ret;
00330         
00331         asm volatile(
00332                         __LOCK_PREF " cmpxchgl %0, %1 \n\t"
00333                         : "=a"(ret)  : "m"(*var) : "cc", "memory"
00334                         );
00335         return ret;
00336 }
00337 
00338 inline static long mb_atomic_get_long(volatile long* var)
00339 {
00340         long ret;
00341         
00342         asm volatile(
00343 #ifdef __CPU_x86_64
00344                         __LOCK_PREF " cmpxchgq %0, %1 \n\t"
00345 #else
00346                         __LOCK_PREF " cmpxchgl %0, %1 \n\t"
00347 #endif
00348                         : "=a"(ret)  : "m"(*var) : "cc", "memory"
00349                         );
00350         return ret;
00351 }
00352 
00353 #endif /* NOSMP */
00354 
00355 
00356 /* on x86 atomic intructions act also as barriers */
00357 #define mb_atomic_inc_int(v)    atomic_inc_int(v)
00358 #define mb_atomic_dec_int(v)    atomic_dec_int(v)
00359 #define mb_atomic_or_int(v, m)  atomic_or_int(v, m)
00360 #define mb_atomic_and_int(v, m) atomic_and_int(v, m)
00361 #define mb_atomic_inc_and_test_int(v)   atomic_inc_and_test_int(v)
00362 #define mb_atomic_dec_and_test_int(v)   atomic_dec_and_test_int(v)
00363 #define mb_atomic_get_and_set_int(v, i) atomic_get_and_set_int(v, i)
00364 #define mb_atomic_cmpxchg_int(v, o, n)  atomic_cmpxchg_int(v, o, n)
00365 #define mb_atomic_add_int(v, a) atomic_add_int(v, a)
00366 
00367 #define mb_atomic_inc_long(v)   atomic_inc_long(v)
00368 #define mb_atomic_dec_long(v)   atomic_dec_long(v)
00369 #define mb_atomic_or_long(v, m) atomic_or_long(v, m)
00370 #define mb_atomic_and_long(v, m)        atomic_and_long(v, m)
00371 #define mb_atomic_inc_and_test_long(v)  atomic_inc_and_test_long(v)
00372 #define mb_atomic_dec_and_test_long(v)  atomic_dec_and_test_long(v)
00373 #define mb_atomic_get_and_set_long(v, i)        atomic_get_and_set_long(v, i)
00374 #define mb_atomic_cmpxchg_long(v, o, n) atomic_cmpxchg_long(v, o, n)
00375 #define mb_atomic_add_long(v, a)        atomic_add_long(v, a)
00376 
00377 #define mb_atomic_inc(v)        atomic_inc(v)
00378 #define mb_atomic_dec(v)        atomic_dec(v)
00379 #define mb_atomic_or(v, m)      atomic_or(v, m)
00380 #define mb_atomic_and(v, m)     atomic_and(v, m)
00381 #define mb_atomic_inc_and_test(v)       atomic_inc_and_test(v)
00382 #define mb_atomic_dec_and_test(v)       atomic_dec_and_test(v)
00383 #define mb_atomic_get(v)        mb_atomic_get_int( &(v)->val)
00384 #define mb_atomic_set(v, i)     mb_atomic_set_int(&(v)->val, i)
00385 #define mb_atomic_get_and_set(v, i)     atomic_get_and_set_int(&(v)->val, i)
00386 #define mb_atomic_cmpxchg(v, o, n)      atomic_cmpxchg_int(&(v)->val, o, n)
00387 #define mb_atomic_add(v, a)     atomic_add_int(&(v)->val, a)
00388 
00389 
00390 #endif