perf: Optimize perf_output_*() by avoiding local_xchg()
Peter Zijlstra [Tue, 18 May 2010 09:12:48 +0000 (11:12 +0200)]
Since the x86 XCHG ins implies LOCK, avoid the use by
using a sequence count instead.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

include/linux/perf_event.h
kernel/perf_event.c

index ce76676..fe50347 100644 (file)
@@ -804,6 +804,7 @@ struct perf_output_handle {
        struct perf_mmap_data           *data;
        unsigned long                   head;
        unsigned long                   offset;
+       unsigned long                   wakeup;
        int                             nmi;
        int                             sample;
 };
index 1f98c78..7e3bcf1 100644 (file)
@@ -2917,6 +2917,7 @@ static void perf_output_get_handle(struct perf_output_handle *handle)
 
        preempt_disable();
        local_inc(&data->nest);
+       handle->wakeup = local_read(&data->wakeup);
 }
 
 static void perf_output_put_handle(struct perf_output_handle *handle)
@@ -2950,7 +2951,7 @@ again:
                goto again;
        }
 
-       if (local_xchg(&data->wakeup, 0))
+       if (handle->wakeup != local_read(&data->wakeup))
                perf_output_wakeup(handle);
 
        preempt_enable();