[linux-2.6-block.git] / arch / x86 / include / asm / barrier.h

#ifndef _ASM_X86_BARRIER_H
#define _ASM_X86_BARRIER_H

#include <asm/alternative.h>
#include <asm/nops.h>

/*
 * Force strict CPU ordering.
 * And yes, this is required on UP too when we're talking
 * to devices.
 */

#ifdef CONFIG_X86_32
/*
 * Some non-Intel clones support out of order store. wmb() ceases to be a
 * nop for these.
 */
#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
#else
#define mb() 	asm volatile("mfence":::"memory")
#define rmb()	asm volatile("lfence":::"memory")
#define wmb()	asm volatile("sfence" ::: "memory")
#endif

/**
 * read_barrier_depends - Flush all pending reads that subsequents reads
 * depend on.
 *
 * No data-dependent reads from memory-like regions are ever reordered
 * over this barrier.  All reads preceding this primitive are guaranteed
 * to access memory (but not necessarily other CPUs' caches) before any
 * reads following this primitive that depend on the data return by
 * any of the preceding reads.  This primitive is much lighter weight than
 * rmb() on most CPUs, and is never heavier weight than is
 * rmb().
 *
 * These ordering constraints are respected by both the local CPU
 * and the compiler.
 *
 * Ordering is not guaranteed by anything other than these primitives,
 * not even by data dependencies.  See the documentation for
 * memory_barrier() for examples and URLs to more information.
 *
 * For example, the following code would force ordering (the initial
 * value of "a" is zero, "b" is one, and "p" is "&a"):
 *
 * <programlisting>
 *	CPU 0				CPU 1
 *
 *	b = 2;
 *	memory_barrier();
 *	p = &b;				q = p;
 *					read_barrier_depends();
 *					d = *q;
 * </programlisting>
 *
 * because the read of "*q" depends on the read of "p" and these
 * two reads are separated by a read_barrier_depends().  However,
 * the following code, with the same initial values for "a" and "b":
 *
 * <programlisting>
 *	CPU 0				CPU 1
 *
 *	a = 2;
 *	memory_barrier();
 *	b = 3;				y = b;
 *					read_barrier_depends();
 *					x = a;
 * </programlisting>
 *
 * does not enforce ordering, since there is no data dependency between
 * the read of "a" and the read of "b".  Therefore, on some CPUs, such
 * as Alpha, "y" could be set to 3 and "x" to 0.  Use rmb()
 * in cases like this where there are no data dependencies.
 **/

#define read_barrier_depends()	do { } while (0)

#ifdef CONFIG_SMP
#define smp_mb()	mb()
#ifdef CONFIG_X86_PPRO_FENCE
# define smp_rmb()	rmb()
#else
# define smp_rmb()	barrier()
#endif
#define smp_wmb()	barrier()
#define smp_read_barrier_depends()	read_barrier_depends()
#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
#else /* !SMP */
#define smp_mb()	barrier()
#define smp_rmb()	barrier()
#define smp_wmb()	barrier()
#define smp_read_barrier_depends()	do { } while (0)
#define set_mb(var, value) do { var = value; barrier(); } while (0)
#endif /* SMP */

#if defined(CONFIG_X86_PPRO_FENCE)

/*
 * For either of these options x86 doesn't have a strong TSO memory
 * model and we should fall back to full barriers.
 */

#define smp_store_release(p, v)						\
do {									\
	compiletime_assert_atomic_type(*p);				\
	smp_mb();							\
	ACCESS_ONCE(*p) = (v);						\
} while (0)

#define smp_load_acquire(p)						\
({									\
	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
	compiletime_assert_atomic_type(*p);				\
	smp_mb();							\
	___p1;								\
})

#else /* regular x86 TSO memory ordering */

#define smp_store_release(p, v)						\
do {									\
	compiletime_assert_atomic_type(*p);				\
	barrier();							\
	ACCESS_ONCE(*p) = (v);						\
} while (0)

#define smp_load_acquire(p)						\
({									\
	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
	compiletime_assert_atomic_type(*p);				\
	barrier();							\
	___p1;								\
})

#endif

/*
 * Stop RDTSC speculation. This is needed when you need to use RDTSC
 * (or get_cycles or vread that possibly accesses the TSC) in a defined
 * code region.
 *
 * (Could use an alternative three way for this if there was one.)
 */
static __always_inline void rdtsc_barrier(void)
{
	alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
	alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
}

#endif /* _ASM_X86_BARRIER_H */
Commit	Line	Data
f05e798a DH	1	#ifndef _ASM_X86_BARRIER_H
	2	#define _ASM_X86_BARRIER_H
	3
	4	#include <asm/alternative.h>
	5	#include <asm/nops.h>
	6
	7	/*
	8	* Force strict CPU ordering.
	9	* And yes, this is required on UP too when we're talking
	10	* to devices.
	11	*/
	12
	13	#ifdef CONFIG_X86_32
	14	/*
	15	* Some non-Intel clones support out of order store. wmb() ceases to be a
	16	* nop for these.
	17	*/
	18	#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
	19	#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
	20	#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
	21	#else
	22	#define mb() asm volatile("mfence":::"memory")
	23	#define rmb() asm volatile("lfence":::"memory")
	24	#define wmb() asm volatile("sfence" ::: "memory")
	25	#endif
	26
	27	/**
	28	* read_barrier_depends - Flush all pending reads that subsequents reads
	29	* depend on.
	30	*
	31	* No data-dependent reads from memory-like regions are ever reordered
	32	* over this barrier. All reads preceding this primitive are guaranteed
	33	* to access memory (but not necessarily other CPUs' caches) before any
	34	* reads following this primitive that depend on the data return by
	35	* any of the preceding reads. This primitive is much lighter weight than
	36	* rmb() on most CPUs, and is never heavier weight than is
	37	* rmb().
	38	*
	39	* These ordering constraints are respected by both the local CPU
	40	* and the compiler.
	41	*
	42	* Ordering is not guaranteed by anything other than these primitives,
	43	* not even by data dependencies. See the documentation for
	44	* memory_barrier() for examples and URLs to more information.
	45	*
	46	* For example, the following code would force ordering (the initial
	47	* value of "a" is zero, "b" is one, and "p" is "&a"):
	48	*
	49	* <programlisting>
	50	* CPU 0 CPU 1
	51	*
	52	* b = 2;
	53	* memory_barrier();
	54	* p = &b; q = p;
	55	* read_barrier_depends();
	56	* d = *q;
	57	* </programlisting>
	58	*
	59	* because the read of "*q" depends on the read of "p" and these
	60	* two reads are separated by a read_barrier_depends(). However,
	61	* the following code, with the same initial values for "a" and "b":
	62	*
	63	* <programlisting>
	64	* CPU 0 CPU 1
65	*
66	* a = 2;
67	* memory_barrier();
68	* b = 3; y = b;
69	* read_barrier_depends();
70	* x = a;
71	* </programlisting>
72	*
73	* does not enforce ordering, since there is no data dependency between
74	* the read of "a" and the read of "b". Therefore, on some CPUs, such
75	* as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
76	* in cases like this where there are no data dependencies.
77	**/
78
79	#define read_barrier_depends() do { } while (0)
80
81	#ifdef CONFIG_SMP
82	#define smp_mb() mb()
83	#ifdef CONFIG_X86_PPRO_FENCE
84	# define smp_rmb() rmb()
85	#else
86	# define smp_rmb() barrier()
87	#endif
09df7c4c	88	#define smp_wmb() barrier()
f05e798a DH	89	#define smp_read_barrier_depends() read_barrier_depends()
f05e798a DH	90	#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
47933ad4	91	#else /* !SMP */
f05e798a DH	92	#define smp_mb() barrier()
	93	#define smp_rmb() barrier()
	94	#define smp_wmb() barrier()
	95	#define smp_read_barrier_depends() do { } while (0)
	96	#define set_mb(var, value) do { var = value; barrier(); } while (0)
47933ad4 PZ	97	#endif /* SMP */
47933ad4 PZ	98
09df7c4c	99	#if defined(CONFIG_X86_PPRO_FENCE)
47933ad4 PZ	100
	101	/*
	102	* For either of these options x86 doesn't have a strong TSO memory
	103	* model and we should fall back to full barriers.
	104	*/
	105
	106	#define smp_store_release(p, v) \
	107	do { \
	108	compiletime_assert_atomic_type(*p); \
	109	smp_mb(); \
	110	ACCESS_ONCE(*p) = (v); \
	111	} while (0)
	112
	113	#define smp_load_acquire(p) \
	114	({ \
	115	typeof(p) ___p1 = ACCESS_ONCE(p); \
	116	compiletime_assert_atomic_type(*p); \
	117	smp_mb(); \
	118	___p1; \
	119	})
	120
	121	#else /* regular x86 TSO memory ordering */
	122
	123	#define smp_store_release(p, v) \
	124	do { \
	125	compiletime_assert_atomic_type(*p); \
	126	barrier(); \
	127	ACCESS_ONCE(*p) = (v); \
	128	} while (0)
	129
	130	#define smp_load_acquire(p) \
	131	({ \
	132	typeof(p) ___p1 = ACCESS_ONCE(p); \
	133	compiletime_assert_atomic_type(*p); \
	134	barrier(); \
	135	___p1; \
	136	})
	137
f05e798a DH	138	#endif
	139
	140	/*
	141	* Stop RDTSC speculation. This is needed when you need to use RDTSC
	142	* (or get_cycles or vread that possibly accesses the TSC) in a defined
	143	* code region.
	144	*
	145	* (Could use an alternative three way for this if there was one.)
	146	*/
	147	static __always_inline void rdtsc_barrier(void)
	148	{
	149	alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
	150	alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
	151	}
	152
	153	#endif /* _ASM_X86_BARRIER_H */