x86 ptrace: unify syscall tracing
Roland McGrath [Wed, 9 Jul 2008 09:38:07 +0000 (02:38 -0700)]
This unifies and cleans up the syscall tracing code on i386 and x86_64.

Using a single function for entry and exit tracing on 32-bit made the
do_syscall_trace() into some terrible spaghetti.  The logic is clear and
simple using separate syscall_trace_enter() and syscall_trace_leave()
functions as on 64-bit.

The unification adds PTRACE_SYSEMU and PTRACE_SYSEMU_SINGLESTEP support
on x86_64, for 32-bit ptrace() callers and for 64-bit ptrace() callers
tracing either 32-bit or 64-bit tasks.  It behaves just like 32-bit.

Changing syscall_trace_enter() to return the syscall number shortens
all the assembly paths, while adding the SYSEMU feature in a simple way.

Signed-off-by: Roland McGrath <roland@redhat.com>

arch/x86/ia32/ia32entry.S
arch/x86/kernel/entry_32.S
arch/x86/kernel/entry_64.S
arch/x86/kernel/ptrace.c
include/asm-x86/calling.h
include/asm-x86/ptrace-abi.h
include/asm-x86/thread_info.h

index 20371d0..8796d19 100644 (file)
        movq    %rax,R8(%rsp)
        .endm
 
+       /*
+        * Reload arg registers from stack in case ptrace changed them.
+        * We don't reload %eax because syscall_trace_enter() returned
+        * the value it wants us to use in the table lookup.
+        */
        .macro LOAD_ARGS32 offset
        movl \offset(%rsp),%r11d
        movl \offset+8(%rsp),%r10d
@@ -46,7 +51,6 @@
        movl \offset+48(%rsp),%edx
        movl \offset+56(%rsp),%esi
        movl \offset+64(%rsp),%edi
-       movl \offset+72(%rsp),%eax
        .endm
        
        .macro CFI_STARTPROC32 simple
@@ -137,13 +141,12 @@ ENTRY(ia32_sysenter_target)
        .previous       
        GET_THREAD_INFO(%r10)
        orl    $TS_COMPAT,TI_status(%r10)
-       testl  $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
-                TI_flags(%r10)
+       testl  $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
        CFI_REMEMBER_STATE
        jnz  sysenter_tracesys
-sysenter_do_call:      
        cmpl    $(IA32_NR_syscalls-1),%eax
        ja      ia32_badsys
+sysenter_do_call:
        IA32_ARG_FIXUP 1
        call    *ia32_sys_call_table(,%rax,8)
        movq    %rax,RAX-ARGOFFSET(%rsp)
@@ -242,8 +245,7 @@ ENTRY(ia32_cstar_target)
        .previous       
        GET_THREAD_INFO(%r10)
        orl   $TS_COMPAT,TI_status(%r10)
-       testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
-               TI_flags(%r10)
+       testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
        CFI_REMEMBER_STATE
        jnz   cstar_tracesys
 cstar_do_call: 
@@ -336,8 +338,7 @@ ENTRY(ia32_syscall)
        SAVE_ARGS 0,0,1
        GET_THREAD_INFO(%r10)
        orl   $TS_COMPAT,TI_status(%r10)
-       testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
-               TI_flags(%r10)
+       testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
        jnz ia32_tracesys
 ia32_do_syscall:       
        cmpl $(IA32_NR_syscalls-1),%eax
index 0ad987d..cadf73f 100644 (file)
@@ -332,7 +332,7 @@ sysenter_past_esp:
        GET_THREAD_INFO(%ebp)
 
        /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
-       testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
+       testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
        jnz syscall_trace_entry
        cmpl $(nr_syscalls), %eax
        jae syscall_badsys
@@ -370,7 +370,7 @@ ENTRY(system_call)
        GET_THREAD_INFO(%ebp)
                                        # system call tracing in operation / emulation
        /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
-       testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
+       testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
        jnz syscall_trace_entry
        cmpl $(nr_syscalls), %eax
        jae syscall_badsys
@@ -510,12 +510,8 @@ END(work_pending)
 syscall_trace_entry:
        movl $-ENOSYS,PT_EAX(%esp)
        movl %esp, %eax
-       xorl %edx,%edx
-       call do_syscall_trace
-       cmpl $0, %eax
-       jne resume_userspace            # ret != 0 -> running under PTRACE_SYSEMU,
-                                       # so must skip actual syscall
-       movl PT_ORIG_EAX(%esp), %eax
+       call syscall_trace_enter
+       /* What it returned is what we'll actually use.  */
        cmpl $(nr_syscalls), %eax
        jnae syscall_call
        jmp syscall_exit
@@ -524,14 +520,13 @@ END(syscall_trace_entry)
        # perform syscall exit tracing
        ALIGN
 syscall_exit_work:
-       testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
+       testb $_TIF_WORK_SYSCALL_EXIT, %cl
        jz work_pending
        TRACE_IRQS_ON
-       ENABLE_INTERRUPTS(CLBR_ANY)     # could let do_syscall_trace() call
+       ENABLE_INTERRUPTS(CLBR_ANY)     # could let syscall_trace_leave() call
                                        # schedule() instead
        movl %esp, %eax
-       movl $1, %edx
-       call do_syscall_trace
+       call syscall_trace_leave
        jmp resume_userspace
 END(syscall_exit_work)
        CFI_ENDPROC
index ae63e58..63001c6 100644 (file)
@@ -349,8 +349,7 @@ ENTRY(system_call_after_swapgs)
        movq  %rcx,RIP-ARGOFFSET(%rsp)
        CFI_REL_OFFSET rip,RIP-ARGOFFSET
        GET_THREAD_INFO(%rcx)
-       testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
-               TI_flags(%rcx)
+       testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
        jnz tracesys
        cmpq $__NR_syscall_max,%rax
        ja badsys
@@ -430,7 +429,12 @@ tracesys:
        FIXUP_TOP_OF_STACK %rdi
        movq %rsp,%rdi
        call syscall_trace_enter
-       LOAD_ARGS ARGOFFSET  /* reload args from stack in case ptrace changed it */
+       /*
+        * Reload arg registers from stack in case ptrace changed them.
+        * We don't reload %rax because syscall_trace_enter() returned
+        * the value it wants us to use in the table lookup.
+        */
+       LOAD_ARGS ARGOFFSET, 1
        RESTORE_REST
        cmpq $__NR_syscall_max,%rax
        ja   int_ret_from_sys_call      /* RAX(%rsp) set to -ENOSYS above */
@@ -483,7 +487,7 @@ int_very_careful:
        ENABLE_INTERRUPTS(CLBR_NONE)
        SAVE_REST
        /* Check for syscall exit trace */      
-       testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
+       testl $_TIF_WORK_SYSCALL_EXIT,%edx
        jz int_signal
        pushq %rdi
        CFI_ADJUST_CFA_OFFSET 8
@@ -491,7 +495,7 @@ int_very_careful:
        call syscall_trace_leave
        popq %rdi
        CFI_ADJUST_CFA_OFFSET -8
-       andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
+       andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
        jmp int_restore_rest
        
 int_signal:
index 77040b6..34e77b1 100644 (file)
@@ -1357,8 +1357,6 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
 #endif
 }
 
-#ifdef CONFIG_X86_32
-
 void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
 {
        struct siginfo info;
@@ -1377,89 +1375,10 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
        force_sig_info(SIGTRAP, &info, tsk);
 }
 
-/* notification of system call entry/exit
- * - triggered by current->work.syscall_trace
- */
-int do_syscall_trace(struct pt_regs *regs, int entryexit)
-{
-       int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU);
-       /*
-        * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall
-        * interception
-        */
-       int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP);
-       int ret = 0;
-
-       /* do the secure computing check first */
-       if (!entryexit)
-               secure_computing(regs->orig_ax);
-
-       if (unlikely(current->audit_context)) {
-               if (entryexit)
-                       audit_syscall_exit(AUDITSC_RESULT(regs->ax),
-                                               regs->ax);
-               /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only
-                * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is
-                * not used, entry.S will call us only on syscall exit, not
-                * entry; so when TIF_SYSCALL_AUDIT is used we must avoid
-                * calling send_sigtrap() on syscall entry.
-                *
-                * Note that when PTRACE_SYSEMU_SINGLESTEP is used,
-                * is_singlestep is false, despite his name, so we will still do
-                * the correct thing.
-                */
-               else if (is_singlestep)
-                       goto out;
-       }
-
-       if (!(current->ptrace & PT_PTRACED))
-               goto out;
-
-       /* If a process stops on the 1st tracepoint with SYSCALL_TRACE
-        * and then is resumed with SYSEMU_SINGLESTEP, it will come in
-        * here. We have to check this and return */
-       if (is_sysemu && entryexit)
-               return 0;
-
-       /* Fake a debug trap */
-       if (is_singlestep)
-               send_sigtrap(current, regs, 0);
-
-       if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu)
-               goto out;
-
-       /* the 0x80 provides a way for the tracing parent to distinguish
-          between a syscall stop and SIGTRAP delivery */
-       /* Note that the debugger could change the result of test_thread_flag!*/
-       ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0));
-
-       /*
-        * this isn't the same as continuing with a signal, but it will do
-        * for normal use.  strace only continues with a signal if the
-        * stopping signal is not SIGTRAP.  -brl
-        */
-       if (current->exit_code) {
-               send_sig(current->exit_code, current, 1);
-               current->exit_code = 0;
-       }
-       ret = is_sysemu;
-out:
-       if (unlikely(current->audit_context) && !entryexit)
-               audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax,
-                                   regs->bx, regs->cx, regs->dx, regs->si);
-       if (ret == 0)
-               return 0;
-
-       regs->orig_ax = -1; /* force skip of syscall restarting */
-       if (unlikely(current->audit_context))
-               audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
-       return 1;
-}
-
-#else  /* CONFIG_X86_64 */
-
 static void syscall_trace(struct pt_regs *regs)
 {
+       if (!(current->ptrace & PT_PTRACED))
+               return;
 
 #if 0
        printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n",
@@ -1481,39 +1400,71 @@ static void syscall_trace(struct pt_regs *regs)
        }
 }
 
-asmlinkage void syscall_trace_enter(struct pt_regs *regs)
+#ifdef CONFIG_X86_32
+# define IS_IA32       1
+#elif defined CONFIG_IA32_EMULATION
+# define IS_IA32       test_thread_flag(TIF_IA32)
+#else
+# define IS_IA32       0
+#endif
+
+/*
+ * We must return the syscall number to actually look up in the table.
+ * This can be -1L to skip running any syscall at all.
+ */
+asmregparm long syscall_trace_enter(struct pt_regs *regs)
 {
+       long ret = 0;
+
        /* do the secure computing check first */
        secure_computing(regs->orig_ax);
 
-       if (test_thread_flag(TIF_SYSCALL_TRACE)
-           && (current->ptrace & PT_PTRACED))
+       if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
+               ret = -1L;
+
+       if (ret || test_thread_flag(TIF_SYSCALL_TRACE))
                syscall_trace(regs);
 
        if (unlikely(current->audit_context)) {
-               if (test_thread_flag(TIF_IA32)) {
+               if (IS_IA32)
                        audit_syscall_entry(AUDIT_ARCH_I386,
                                            regs->orig_ax,
                                            regs->bx, regs->cx,
                                            regs->dx, regs->si);
-               } else {
+#ifdef CONFIG_X86_64
+               else
                        audit_syscall_entry(AUDIT_ARCH_X86_64,
                                            regs->orig_ax,
                                            regs->di, regs->si,
                                            regs->dx, regs->r10);
-               }
+#endif
        }
+
+       return ret ?: regs->orig_ax;
 }
 
-asmlinkage void syscall_trace_leave(struct pt_regs *regs)
+asmregparm void syscall_trace_leave(struct pt_regs *regs)
 {
        if (unlikely(current->audit_context))
                audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
 
-       if ((test_thread_flag(TIF_SYSCALL_TRACE)
-            || test_thread_flag(TIF_SINGLESTEP))
-           && (current->ptrace & PT_PTRACED))
+       if (test_thread_flag(TIF_SYSCALL_TRACE))
                syscall_trace(regs);
-}
 
-#endif /* CONFIG_X86_32 */
+       /*
+        * If TIF_SYSCALL_EMU is set, we only get here because of
+        * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
+        * We already reported this syscall instruction in
+        * syscall_trace_enter(), so don't do any more now.
+        */
+       if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
+               return;
+
+       /*
+        * If we are single-stepping, synthesize a trap to follow the
+        * system call instruction.
+        */
+       if (test_thread_flag(TIF_SINGLESTEP) &&
+           (current->ptrace & PT_PTRACED))
+               send_sigtrap(current, regs, 0);
+}
index f13e62e..2bc162e 100644 (file)
        .endif
        .endm
 
-       .macro LOAD_ARGS offset
+       .macro LOAD_ARGS offset, skiprax=0
        movq \offset(%rsp),    %r11
        movq \offset+8(%rsp),  %r10
        movq \offset+16(%rsp), %r9
        movq \offset+48(%rsp), %rdx
        movq \offset+56(%rsp), %rsi
        movq \offset+64(%rsp), %rdi
+       .if \skiprax
+       .else
        movq \offset+72(%rsp), %rax
+       .endif
        .endm
 
 #define REST_SKIP      6*8
        .macro icebp
        .byte 0xf1
        .endm
-
index f224eb3..72e7b9d 100644 (file)
 
 #ifdef __x86_64__
 # define PTRACE_ARCH_PRCTL       30
-#else
-# define PTRACE_SYSEMU           31
-# define PTRACE_SYSEMU_SINGLESTEP 32
 #endif
 
+#define PTRACE_SYSEMU            31
+#define PTRACE_SYSEMU_SINGLESTEP  32
+
 #define PTRACE_SINGLEBLOCK     33      /* resume execution until next branch */
 
 #ifndef __ASSEMBLY__
index fb8d3cd..b2702a1 100644 (file)
@@ -75,9 +75,7 @@ struct thread_info {
 #define TIF_NEED_RESCHED       3       /* rescheduling necessary */
 #define TIF_SINGLESTEP         4       /* reenable singlestep on user return*/
 #define TIF_IRET               5       /* force IRET */
-#ifdef CONFIG_X86_32
 #define TIF_SYSCALL_EMU                6       /* syscall emulation active */
-#endif
 #define TIF_SYSCALL_AUDIT      7       /* syscall auditing active */
 #define TIF_SECCOMP            8       /* secure computing */
 #define TIF_MCE_NOTIFY         10      /* notify userspace of an MCE */
@@ -100,11 +98,7 @@ struct thread_info {
 #define _TIF_SINGLESTEP                (1 << TIF_SINGLESTEP)
 #define _TIF_NEED_RESCHED      (1 << TIF_NEED_RESCHED)
 #define _TIF_IRET              (1 << TIF_IRET)
-#ifdef CONFIG_X86_32
 #define _TIF_SYSCALL_EMU       (1 << TIF_SYSCALL_EMU)
-#else
-#define _TIF_SYSCALL_EMU       0
-#endif
 #define _TIF_SYSCALL_AUDIT     (1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP           (1 << TIF_SECCOMP)
 #define _TIF_MCE_NOTIFY                (1 << TIF_MCE_NOTIFY)
@@ -121,11 +115,20 @@ struct thread_info {
 #define _TIF_DS_AREA_MSR       (1 << TIF_DS_AREA_MSR)
 #define _TIF_BTS_TRACE_TS      (1 << TIF_BTS_TRACE_TS)
 
+/* work to do in syscall_trace_enter() */
+#define _TIF_WORK_SYSCALL_ENTRY        \
+       (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | \
+        _TIF_SYSCALL_AUDIT | _TIF_SECCOMP)
+
+/* work to do in syscall_trace_leave() */
+#define _TIF_WORK_SYSCALL_EXIT \
+       (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP)
+
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK                                                 \
        (0x0000FFFF &                                                   \
         ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|                       \
-        _TIF_SECCOMP|_TIF_SYSCALL_EMU))
+          _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU))
 
 /* work to do on any return to user space */
 #define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP)