x86-64: make compat_start_thread() match start_thread()

For no real good reason, compat_start_thread() was embedded inline in
<asm/elf.h> whereas the native start_thread() lives in process_*.c.
Move compat_start_thread() to process_64.c, remove gratuitious
differences, and fix a few items which mostly look like bit rot.

In particular, compat_start_thread() didn't do free_thread_xstate(),
which means it was hanging on to the xstate store area even when it
was not needed.  It was also not setting old_rsp, but it looks like
that generally shouldn't matter for a 32-bit process.

Note: compat_start_thread *has* to be a macro, since it is tested with
start_thread_ia32() as the out of line function name.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ad535b6..7cf0a6b 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -356,7 +356,7 @@
 	percpu_write(old_rsp, new_sp);
 	regs->cs		= __USER_CS;
 	regs->ss		= __USER_DS;
-	regs->flags		= 0x200;
+	regs->flags		= X86_EFLAGS_IF;
 	set_fs(USER_DS);
 	/*
 	 * Free the old FP and other extended state
@@ -365,6 +365,27 @@
 }
 EXPORT_SYMBOL_GPL(start_thread);
 
+#ifdef CONFIG_IA32_EMULATION
+void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp)
+{
+	loadsegment(fs, 0);
+	loadsegment(ds, __USER32_DS);
+	loadsegment(es, __USER32_DS);
+	load_gs_index(0);
+	regs->ip		= new_ip;
+	regs->sp		= new_sp;
+	percpu_write(old_rsp, new_sp);
+	regs->cs		= __USER32_CS;
+	regs->ss		= __USER32_DS;
+	regs->flags		= X86_EFLAGS_IF;
+	set_fs(USER_DS);
+	/*
+	 * Free the old FP and other extended state
+	 */
+	free_thread_xstate(current);
+}
+#endif
+
 /*
  *	switch_to(x,y) should switch tasks from x to y.
  *