x86, percpu: Add 'percpu_read_stable()' interface for cacheable accesses
Linus Torvalds [Mon, 3 Aug 2009 05:08:48 +0000 (14:08 +0900)]
This is very useful for some common things like 'get_current()' and
'get_thread_info()', which can be used multiple times in a function, and
where the result is cacheable.

tj: Added the magical undocumented "P" modifier to UP __percpu_arg()
    to force gcc to dereference the pointer value passed in via the
    "p" input constraint.  Without this, percpu_read_stable() returns
    the address of the percpu variable.  Also added comment explaining
    the difference between percpu_read() and percpu_read_stable().

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

arch/x86/include/asm/current.h
arch/x86/include/asm/percpu.h
arch/x86/include/asm/thread_info.h

index c68c361..4d447b7 100644 (file)
@@ -11,7 +11,7 @@ DECLARE_PER_CPU(struct task_struct *, current_task);
 
 static __always_inline struct task_struct *get_current(void)
 {
-       return percpu_read(current_task);
+       return percpu_read_stable(current_task);
 }
 
 #define current get_current()
index 103f1dd..04eacef 100644 (file)
@@ -49,7 +49,7 @@
 #define __percpu_arg(x)                "%%"__stringify(__percpu_seg)":%P" #x
 #define __my_cpu_offset                percpu_read(this_cpu_off)
 #else
-#define __percpu_arg(x)                "%" #x
+#define __percpu_arg(x)                "%P" #x
 #endif
 
 /*
@@ -104,36 +104,48 @@ do {                                                      \
        }                                               \
 } while (0)
 
-#define percpu_from_op(op, var)                                \
+#define percpu_from_op(op, var, constraint)            \
 ({                                                     \
        typeof(var) ret__;                              \
        switch (sizeof(var)) {                          \
        case 1:                                         \
                asm(op "b "__percpu_arg(1)",%0"         \
                    : "=q" (ret__)                      \
-                   : "m" (var));                       \
+                   : constraint);                      \
                break;                                  \
        case 2:                                         \
                asm(op "w "__percpu_arg(1)",%0"         \
                    : "=r" (ret__)                      \
-                   : "m" (var));                       \
+                   : constraint);                      \
                break;                                  \
        case 4:                                         \
                asm(op "l "__percpu_arg(1)",%0"         \
                    : "=r" (ret__)                      \
-                   : "m" (var));                       \
+                   : constraint);                      \
                break;                                  \
        case 8:                                         \
                asm(op "q "__percpu_arg(1)",%0"         \
                    : "=r" (ret__)                      \
-                   : "m" (var));                       \
+                   : constraint);                      \
                break;                                  \
        default: __bad_percpu_size();                   \
        }                                               \
        ret__;                                          \
 })
 
-#define percpu_read(var)       percpu_from_op("mov", per_cpu__##var)
+/*
+ * percpu_read() makes gcc load the percpu variable every time it is
+ * accessed while percpu_read_stable() allows the value to be cached.
+ * percpu_read_stable() is more efficient and can be used if its value
+ * is guaranteed to be valid across cpus.  The current users include
+ * get_current() and get_thread_info() both of which are actually
+ * per-thread variables implemented as per-cpu variables and thus
+ * stable for the duration of the respective task.
+ */
+#define percpu_read(var)       percpu_from_op("mov", per_cpu__##var,   \
+                                              "m" (per_cpu__##var))
+#define percpu_read_stable(var)        percpu_from_op("mov", per_cpu__##var,   \
+                                              "p" (&per_cpu__##var))
 #define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val)
 #define percpu_add(var, val)   percpu_to_op("add", per_cpu__##var, val)
 #define percpu_sub(var, val)   percpu_to_op("sub", per_cpu__##var, val)
index fad7d40..a1bb5a1 100644 (file)
@@ -213,7 +213,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack);
 static inline struct thread_info *current_thread_info(void)
 {
        struct thread_info *ti;
-       ti = (void *)(percpu_read(kernel_stack) +
+       ti = (void *)(percpu_read_stable(kernel_stack) +
                      KERNEL_STACK_OFFSET - THREAD_SIZE);
        return ti;
 }