Merge branch 'x86-percpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6.git] / arch / x86 / include / asm / percpu.h
index 556f84b..04eacef 100644 (file)
 #define PER_CPU_VAR(var)       per_cpu__##var
 #endif /* SMP */
 
+#ifdef CONFIG_X86_64_SMP
+#define INIT_PER_CPU_VAR(var)  init_per_cpu__##var
+#else
+#define INIT_PER_CPU_VAR(var)  per_cpu__##var
+#endif
+
 #else /* ...!ASSEMBLY */
 
+#include <linux/kernel.h>
 #include <linux/stringify.h>
 
 #ifdef CONFIG_SMP
-#define __percpu_seg_str       "%%"__stringify(__percpu_seg)":"
-#define __my_cpu_offset                x86_read_percpu(this_cpu_off)
+#define __percpu_arg(x)                "%%"__stringify(__percpu_seg)":%P" #x
+#define __my_cpu_offset                percpu_read(this_cpu_off)
 #else
-#define __percpu_seg_str
+#define __percpu_arg(x)                "%P" #x
 #endif
 
-#include <asm-generic/percpu.h>
+/*
+ * Initialized pointers to per-cpu variables needed for the boot
+ * processor need to use these macros to get the proper address
+ * offset from __per_cpu_load on SMP.
+ *
+ * There also must be an entry in vmlinux_64.lds.S
+ */
+#define DECLARE_INIT_PER_CPU(var) \
+       extern typeof(per_cpu_var(var)) init_per_cpu_var(var)
 
-/* We can use this directly for local CPU (faster). */
-DECLARE_PER_CPU(unsigned long, this_cpu_off);
+#ifdef CONFIG_X86_64_SMP
+#define init_per_cpu_var(var)  init_per_cpu__##var
+#else
+#define init_per_cpu_var(var)  per_cpu_var(var)
+#endif
 
 /* For arch-specific code, we can use direct single-insn ops (they
  * don't give an lvalue though). */
@@ -63,68 +81,100 @@ do {                                                       \
        }                                               \
        switch (sizeof(var)) {                          \
        case 1:                                         \
-               asm(op "b %1,"__percpu_seg_str"%0"      \
+               asm(op "b %1,"__percpu_arg(0)           \
                    : "+m" (var)                        \
-                   : "ri" ((T__)val));                 \
+                   : "qi" ((T__)(val)));               \
                break;                                  \
        case 2:                                         \
-               asm(op "w %1,"__percpu_seg_str"%0"      \
+               asm(op "w %1,"__percpu_arg(0)           \
                    : "+m" (var)                        \
-                   : "ri" ((T__)val));                 \
+                   : "ri" ((T__)(val)));               \
                break;                                  \
        case 4:                                         \
-               asm(op "l %1,"__percpu_seg_str"%0"      \
+               asm(op "l %1,"__percpu_arg(0)           \
                    : "+m" (var)                        \
-                   : "ri" ((T__)val));                 \
+                   : "ri" ((T__)(val)));               \
                break;                                  \
        case 8:                                         \
-               asm(op "q %1,"__percpu_seg_str"%0"      \
+               asm(op "q %1,"__percpu_arg(0)           \
                    : "+m" (var)                        \
-                   : "r" ((T__)val));                  \
+                   : "re" ((T__)(val)));               \
                break;                                  \
        default: __bad_percpu_size();                   \
        }                                               \
 } while (0)
 
-#define percpu_from_op(op, var)                                \
+#define percpu_from_op(op, var, constraint)            \
 ({                                                     \
        typeof(var) ret__;                              \
        switch (sizeof(var)) {                          \
        case 1:                                         \
-               asm(op "b "__percpu_seg_str"%1,%0"      \
-                   : "=r" (ret__)                      \
-                   : "m" (var));                       \
+               asm(op "b "__percpu_arg(1)",%0"         \
+                   : "=q" (ret__)                      \
+                   : constraint);                      \
                break;                                  \
        case 2:                                         \
-               asm(op "w "__percpu_seg_str"%1,%0"      \
+               asm(op "w "__percpu_arg(1)",%0"         \
                    : "=r" (ret__)                      \
-                   : "m" (var));                       \
+                   : constraint);                      \
                break;                                  \
        case 4:                                         \
-               asm(op "l "__percpu_seg_str"%1,%0"      \
+               asm(op "l "__percpu_arg(1)",%0"         \
                    : "=r" (ret__)                      \
-                   : "m" (var));                       \
+                   : constraint);                      \
                break;                                  \
        case 8:                                         \
-               asm(op "q "__percpu_seg_str"%1,%0"      \
+               asm(op "q "__percpu_arg(1)",%0"         \
                    : "=r" (ret__)                      \
-                   : "m" (var));                       \
+                   : constraint);                      \
                break;                                  \
        default: __bad_percpu_size();                   \
        }                                               \
        ret__;                                          \
 })
 
-#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var)
-#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu__##var, val)
-#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val)
-#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val)
-#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val)
+/*
+ * percpu_read() makes gcc load the percpu variable every time it is
+ * accessed while percpu_read_stable() allows the value to be cached.
+ * percpu_read_stable() is more efficient and can be used if its value
+ * is guaranteed to be valid across cpus.  The current users include
+ * get_current() and get_thread_info() both of which are actually
+ * per-thread variables implemented as per-cpu variables and thus
+ * stable for the duration of the respective task.
+ */
+#define percpu_read(var)       percpu_from_op("mov", per_cpu__##var,   \
+                                              "m" (per_cpu__##var))
+#define percpu_read_stable(var)        percpu_from_op("mov", per_cpu__##var,   \
+                                              "p" (&per_cpu__##var))
+#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val)
+#define percpu_add(var, val)   percpu_to_op("add", per_cpu__##var, val)
+#define percpu_sub(var, val)   percpu_to_op("sub", per_cpu__##var, val)
+#define percpu_and(var, val)   percpu_to_op("and", per_cpu__##var, val)
+#define percpu_or(var, val)    percpu_to_op("or", per_cpu__##var, val)
+#define percpu_xor(var, val)   percpu_to_op("xor", per_cpu__##var, val)
+
+/* This is not atomic against other CPUs -- CPU preemption needs to be off */
+#define x86_test_and_clear_bit_percpu(bit, var)                                \
+({                                                                     \
+       int old__;                                                      \
+       asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0"           \
+                    : "=r" (old__), "+m" (per_cpu__##var)              \
+                    : "dIr" (bit));                                    \
+       old__;                                                          \
+})
 
-#ifdef CONFIG_X86_64
-extern void load_pda_offset(int cpu);
+#include <asm-generic/percpu.h>
+
+/* We can use this directly for local CPU (faster). */
+DECLARE_PER_CPU(unsigned long, this_cpu_off);
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+void *pcpu_lpage_remapped(void *kaddr);
 #else
-static inline void load_pda_offset(int cpu) { }
+static inline void *pcpu_lpage_remapped(void *kaddr)
+{
+       return NULL;
+}
 #endif
 
 #endif /* !__ASSEMBLY__ */