x86-64: Move kernelstack from PDA to per-cpu.

Also clean up PER_CPU_VAR usage in xen-asm_64.S

tj: * remove now unused stack_thread_info()
    * s/kernelstack/kernel_stack/
    * added FIXME comment in xen-asm_64.S

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index e00c31a..6c5f576 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -620,9 +620,9 @@
 	write_pda(oldrsp, next->usersp);
 	percpu_write(current_task, next_p);
 
-	write_pda(kernelstack,
+	percpu_write(kernel_stack,
 		  (unsigned long)task_stack_page(next_p) +
-		  THREAD_SIZE - PDA_STACKOFFSET);
+		  THREAD_SIZE - KERNEL_STACK_OFFSET);
 #ifdef CONFIG_CC_STACKPROTECTOR
 	write_pda(stack_canary, next_p->stack_canary);
 	/*