tracing: Use __this_cpu_inc/dec operation instead of __get_cpu_var

__this_cpu_inc_return() or __this_cpu_dec generates a single instruction,
which is faster than __get_cpu_var operation.

Link: http://lkml.kernel.org/r/50A9C1BD.1060308@gmail.com

Reviewed-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Shan Wei <davidshan@tencent.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ca9b7df..07888e1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1344,7 +1344,7 @@
 	 */
 	preempt_disable_notrace();
 
-	use_stack = ++__get_cpu_var(ftrace_stack_reserve);
+	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
 	/*
 	 * We don't need any atomic variables, just a barrier.
 	 * If an interrupt comes in, we don't care, because it would
@@ -1398,7 +1398,7 @@
  out:
 	/* Again, don't let gcc optimize things here */
 	barrier();
-	__get_cpu_var(ftrace_stack_reserve)--;
+	__this_cpu_dec(ftrace_stack_reserve);
 	preempt_enable_notrace();
 
 }