Merge branch 'core/percpu' into perfcounters/core
Ingo Molnar [Fri, 23 Jan 2009 09:20:15 +0000 (10:20 +0100)]
Conflicts:
arch/x86/include/asm/hardirq_32.h
arch/x86/include/asm/hardirq_64.h

Semantic merge:
arch/x86/include/asm/hardirq.h
[ added apic_perf_irqs field. ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>

12 files changed:
1  2 
arch/x86/Kconfig
arch/x86/include/asm/hardirq.h
arch/x86/include/asm/irq_vectors.h
arch/x86/include/asm/mach-default/entry_arch.h
arch/x86/kernel/apic.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/entry_64.S
arch/x86/kernel/irqinit_32.c
include/linux/sched.h
kernel/exit.c
kernel/fork.c
kernel/sched.c

diff --combined arch/x86/Kconfig
@@@ -391,6 -391,13 +391,13 @@@ config X86_RDC321
          as R-8610-(G).
          If you don't have one of these chips, you should say N here.
  
+ config X86_UV
+       bool "SGI Ultraviolet"
+       depends on X86_64
+       help
+         This option is needed in order to support SGI Ultraviolet systems.
+         If you don't have one of these, you should say N here.
  config SCHED_OMIT_FRAME_POINTER
        def_bool y
        prompt "Single-depth WCHAN output"
@@@ -685,7 -692,6 +692,7 @@@ config X86_UP_IOAPI
  config X86_LOCAL_APIC
        def_bool y
        depends on X86_64 || (X86_32 && (X86_UP_APIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH))
 +      select HAVE_PERF_COUNTERS if (!M386 && !M486)
  
  config X86_IO_APIC
        def_bool y
@@@ -1341,13 -1347,17 +1348,17 @@@ config SECCOM
  
          If unsure, say Y. Only embedded should say N here.
  
+ config CC_STACKPROTECTOR_ALL
+       bool
  config CC_STACKPROTECTOR
        bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
-       depends on X86_64 && EXPERIMENTAL && BROKEN
+       depends on X86_64
+       select CC_STACKPROTECTOR_ALL
        help
-          This option turns on the -fstack-protector GCC feature. This
-         feature puts, at the beginning of critical functions, a canary
-         value on the stack just before the return address, and validates
+           This option turns on the -fstack-protector GCC feature. This
+         feature puts, at the beginning of functions, a canary value on
+         the stack just before the return address, and validates
          the value just before actually returning.  Stack based buffer
          overflows (that need to overwrite this return address) now also
          overwrite the canary, which gets detected and the attack is then
  
          This feature requires gcc version 4.2 or above, or a distribution
          gcc with the feature backported. Older versions are automatically
-         detected and for those versions, this configuration option is ignored.
- config CC_STACKPROTECTOR_ALL
-       bool "Use stack-protector for all functions"
-       depends on CC_STACKPROTECTOR
-       help
-         Normally, GCC only inserts the canary value protection for
-         functions that use large-ish on-stack buffers. By enabling
-         this option, GCC will be asked to do this for ALL functions.
+         detected and for those versions, this configuration option is
+         ignored. (and a warning is printed during bootup)
  
  source kernel/Kconfig.hz
  
@@@ -1,11 -1,52 +1,53 @@@
- #ifdef CONFIG_X86_32
- # include "hardirq_32.h"
- #else
- # include "hardirq_64.h"
+ #ifndef _ASM_X86_HARDIRQ_H
+ #define _ASM_X86_HARDIRQ_H
+ #include <linux/threads.h>
+ #include <linux/irq.h>
+ typedef struct {
+       unsigned int __softirq_pending;
+       unsigned int __nmi_count;       /* arch dependent */
+       unsigned int irq0_irqs;
+ #ifdef CONFIG_X86_LOCAL_APIC
+       unsigned int apic_timer_irqs;   /* arch dependent */
+       unsigned int irq_spurious_count;
+ #endif
++      unsigned int apic_perf_irqs;
+ #ifdef CONFIG_SMP
+       unsigned int irq_resched_count;
+       unsigned int irq_call_count;
+       unsigned int irq_tlb_count;
+ #endif
+ #ifdef CONFIG_X86_MCE
+       unsigned int irq_thermal_count;
+ # ifdef CONFIG_X86_64
+       unsigned int irq_threshold_count;
+ # endif
  #endif
+ } ____cacheline_aligned irq_cpustat_t;
+ DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
+ /* We can have at most NR_VECTORS irqs routed to a cpu at a time */
+ #define MAX_HARDIRQS_PER_CPU NR_VECTORS
+ #define __ARCH_IRQ_STAT
+ #define inc_irq_stat(member)  percpu_add(irq_stat.member, 1)
+ #define local_softirq_pending()       percpu_read(irq_stat.__softirq_pending)
+ #define __ARCH_SET_SOFTIRQ_PENDING
+ #define set_softirq_pending(x)        percpu_write(irq_stat.__softirq_pending, (x))
+ #define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x))
+ extern void ack_bad_irq(unsigned int irq);
  
  extern u64 arch_irq_stat_cpu(unsigned int cpu);
  #define arch_irq_stat_cpu     arch_irq_stat_cpu
  
  extern u64 arch_irq_stat(void);
  #define arch_irq_stat         arch_irq_stat
+ #endif /* _ASM_X86_HARDIRQ_H */
   *  some of the following vectors are 'rare', they are merged
   *  into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
   *  TLB, reschedule and local APIC vectors are performance-critical.
-  *
-  *  Vectors 0xf0-0xfa are free (reserved for future Linux use).
   */
  #ifdef CONFIG_X86_32
  
  # define SPURIOUS_APIC_VECTOR         0xff
  # define ERROR_APIC_VECTOR            0xfe
- # define INVALIDATE_TLB_VECTOR                0xfd
- # define RESCHEDULE_VECTOR            0xfc
- # define CALL_FUNCTION_VECTOR         0xfb
- # define CALL_FUNCTION_SINGLE_VECTOR  0xfa
- # define THERMAL_APIC_VECTOR          0xf0
+ # define RESCHEDULE_VECTOR            0xfd
+ # define CALL_FUNCTION_VECTOR         0xfc
+ # define CALL_FUNCTION_SINGLE_VECTOR  0xfb
+ # define THERMAL_APIC_VECTOR          0xfa
+ /* 0xf8 - 0xf9 : free */
+ # define INVALIDATE_TLB_VECTOR_END    0xf7
+ # define INVALIDATE_TLB_VECTOR_START  0xf0    /* f0-f7 used for TLB flush */
+ # define NUM_INVALIDATE_TLB_VECTORS   8
  
  #else
  
- #define SPURIOUS_APIC_VECTOR          0xff
- #define ERROR_APIC_VECTOR             0xfe
- #define RESCHEDULE_VECTOR             0xfd
- #define CALL_FUNCTION_VECTOR          0xfc
- #define CALL_FUNCTION_SINGLE_VECTOR   0xfb
- #define THERMAL_APIC_VECTOR           0xfa
- #define THRESHOLD_APIC_VECTOR         0xf9
- #define UV_BAU_MESSAGE                        0xf8
- #define INVALIDATE_TLB_VECTOR_END     0xf7
- #define INVALIDATE_TLB_VECTOR_START   0xf0    /* f0-f7 used for TLB flush */
+ # define SPURIOUS_APIC_VECTOR         0xff
+ # define ERROR_APIC_VECTOR            0xfe
+ # define RESCHEDULE_VECTOR            0xfd
+ # define CALL_FUNCTION_VECTOR         0xfc
+ # define CALL_FUNCTION_SINGLE_VECTOR  0xfb
+ # define THERMAL_APIC_VECTOR          0xfa
+ # define THRESHOLD_APIC_VECTOR                0xf9
+ # define UV_BAU_MESSAGE                       0xf8
+ # define INVALIDATE_TLB_VECTOR_END    0xf7
+ # define INVALIDATE_TLB_VECTOR_START  0xf0    /* f0-f7 used for TLB flush */
  
  #define NUM_INVALIDATE_TLB_VECTORS    8
  
  #define LOCAL_TIMER_VECTOR    0xef
  
  /*
 + * Performance monitoring interrupt vector:
 + */
 +#define LOCAL_PERF_VECTOR     0xee
 +
 +/*
   * First APIC vector available to drivers: (vectors 0x30-0xee) we
   * start at 0x31(0x41) to spread out vectors evenly between priority
   * levels. (0x80 is the syscall vector)
   */
  #ifdef CONFIG_X86_SMP
  BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
  BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
  BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
  BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
+ BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0,
+                smp_invalidate_interrupt)
+ BUILD_INTERRUPT3(invalidate_interrupt1,INVALIDATE_TLB_VECTOR_START+1,
+                smp_invalidate_interrupt)
+ BUILD_INTERRUPT3(invalidate_interrupt2,INVALIDATE_TLB_VECTOR_START+2,
+                smp_invalidate_interrupt)
+ BUILD_INTERRUPT3(invalidate_interrupt3,INVALIDATE_TLB_VECTOR_START+3,
+                smp_invalidate_interrupt)
+ BUILD_INTERRUPT3(invalidate_interrupt4,INVALIDATE_TLB_VECTOR_START+4,
+                smp_invalidate_interrupt)
+ BUILD_INTERRUPT3(invalidate_interrupt5,INVALIDATE_TLB_VECTOR_START+5,
+                smp_invalidate_interrupt)
+ BUILD_INTERRUPT3(invalidate_interrupt6,INVALIDATE_TLB_VECTOR_START+6,
+                smp_invalidate_interrupt)
+ BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7,
+                smp_invalidate_interrupt)
  #endif
  
  /*
   * a much simpler SMP time architecture:
   */
  #ifdef CONFIG_X86_LOCAL_APIC
 +
  BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
  BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
  BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
  
 +#ifdef CONFIG_PERF_COUNTERS
 +BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR)
 +#endif
 +
  #ifdef CONFIG_X86_MCE_P4THERMAL
  BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
  #endif
diff --combined arch/x86/kernel/apic.c
@@@ -35,7 -35,6 +35,7 @@@
  #include <linux/nmi.h>
  #include <linux/timex.h>
  
 +#include <asm/perf_counter.h>
  #include <asm/atomic.h>
  #include <asm/mtrr.h>
  #include <asm/mpspec.h>
@@@ -1132,7 -1131,9 +1132,9 @@@ void __cpuinit setup_local_APIC(void
        int i, j;
  
        if (disable_apic) {
+ #ifdef CONFIG_X86_IO_APIC
                disable_ioapic_setup();
+ #endif
                return;
        }
  
                apic_write(APIC_ESR, 0);
        }
  #endif
 +      perf_counters_lapic_init(0);
  
        preempt_disable();
  
@@@ -1844,6 -1844,11 +1846,11 @@@ void __cpuinit generic_processor_info(i
        num_processors++;
        cpu = cpumask_next_zero(-1, cpu_present_mask);
  
+       if (version != apic_version[boot_cpu_physical_apicid])
+               WARN_ONCE(1,
+                       "ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n",
+                       apic_version[boot_cpu_physical_apicid], cpu, version);
        physid_set(apicid, phys_cpu_present_map);
        if (apicid == boot_cpu_physical_apicid) {
                /*
@@@ -17,7 -17,6 +17,7 @@@
  #include <asm/mmu_context.h>
  #include <asm/mtrr.h>
  #include <asm/mce.h>
 +#include <asm/perf_counter.h>
  #include <asm/pat.h>
  #include <asm/asm.h>
  #include <asm/numa.h>
@@@ -29,9 -28,9 +29,9 @@@
  #include <asm/apic.h>
  #include <mach_apic.h>
  #include <asm/genapic.h>
+ #include <asm/uv/uv.h>
  #endif
  
- #include <asm/pda.h>
  #include <asm/pgtable.h>
  #include <asm/processor.h>
  #include <asm/desc.h>
@@@ -65,23 -64,23 +65,23 @@@ cpumask_t cpu_sibling_setup_map
  
  static struct cpu_dev *this_cpu __cpuinitdata;
  
+ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
  #ifdef CONFIG_X86_64
- /* We need valid kernel segments for data and code in long mode too
-  * IRET will check the segment types  kkeil 2000/10/28
-  * Also sysret mandates a special GDT layout
-  */
- /* The TLS descriptors are currently at a different place compared to i386.
-    Hopefully nobody expects them at a fixed place (Wine?) */
- DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
+       /*
+        * We need valid kernel segments for data and code in long mode too
+        * IRET will check the segment types  kkeil 2000/10/28
+        * Also sysret mandates a special GDT layout
+        *
+        * The TLS descriptors are currently at a different place compared to i386.
+        * Hopefully nobody expects them at a fixed place (Wine?)
+        */
        [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
        [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
        [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
        [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
        [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
        [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
- } };
  #else
- DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
        [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
        [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
        [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
        [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
  
        [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
-       [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } },
- } };
+       [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
  #endif
+ } };
  EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
  
  #ifdef CONFIG_X86_32
@@@ -775,7 -774,6 +775,7 @@@ void __init identify_boot_cpu(void
  #else
        vgetcpu_set_mode();
  #endif
 +      init_hw_perf_counters();
  }
  
  void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
@@@ -883,12 -881,13 +883,13 @@@ __setup("clearcpuid=", setup_disablecpu
  #ifdef CONFIG_X86_64
  struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
  
- DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack);
+ DEFINE_PER_CPU_FIRST(union irq_stack_union,
+                    irq_stack_union) __aligned(PAGE_SIZE);
  #ifdef CONFIG_SMP
  DEFINE_PER_CPU(char *, irq_stack_ptr);        /* will be set during per cpu init */
  #else
  DEFINE_PER_CPU(char *, irq_stack_ptr) =
-       per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64;
+       per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
  #endif
  
  DEFINE_PER_CPU(unsigned long, kernel_stack) =
@@@ -897,15 -896,6 +898,6 @@@ EXPORT_PER_CPU_SYMBOL(kernel_stack)
  
  DEFINE_PER_CPU(unsigned int, irq_count) = -1;
  
- void __cpuinit pda_init(int cpu)
- {
-       /* Setup up data that may be needed in __get_free_pages early */
-       loadsegment(fs, 0);
-       loadsegment(gs, 0);
-       load_pda_offset(cpu);
- }
  static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
        [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
        __aligned(PAGE_SIZE);
@@@ -969,9 -959,9 +961,9 @@@ void __cpuinit cpu_init(void
        struct task_struct *me;
        int i;
  
-       /* CPU 0 is initialised in head64.c */
-       if (cpu != 0)
-               pda_init(cpu);
+       loadsegment(fs, 0);
+       loadsegment(gs, 0);
+       load_gs_base(cpu);
  
  #ifdef CONFIG_NUMA
        if (cpu != 0 && percpu_read(node_number) == 0 &&
@@@ -982,8 -982,10 +982,10 @@@ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR 
        irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
  #endif
  
+ #ifdef CONFIG_X86_UV
  apicinterrupt UV_BAU_MESSAGE \
        uv_bau_message_intr1 uv_bau_message_interrupt
+ #endif
  apicinterrupt LOCAL_TIMER_VECTOR \
        apic_timer_interrupt smp_apic_timer_interrupt
  
@@@ -1025,11 -1027,6 +1027,11 @@@ apicinterrupt ERROR_APIC_VECTOR 
  apicinterrupt SPURIOUS_APIC_VECTOR \
        spurious_interrupt smp_spurious_interrupt
  
 +#ifdef CONFIG_PERF_COUNTERS
 +apicinterrupt LOCAL_PERF_VECTOR \
 +      perf_counter_interrupt smp_perf_counter_interrupt
 +#endif
 +
  /*
   * Exception entry points.
   */
@@@ -149,8 -149,15 +149,15 @@@ void __init native_init_IRQ(void
         */
        alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
  
-       /* IPI for invalidation */
-       alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
+       /* IPIs for invalidation */
+       alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
+       alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
+       alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
+       alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
+       alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
+       alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
+       alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
+       alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
  
        /* IPI for generic function call */
        alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
        /* IPI vectors for APIC spurious and error interrupts */
        alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
        alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
 +# ifdef CONFIG_PERF_COUNTERS
 +      alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt);
 +# endif
  #endif
  
  #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL)
diff --combined include/linux/sched.h
@@@ -71,7 -71,6 +71,7 @@@ struct sched_param 
  #include <linux/fs_struct.h>
  #include <linux/compiler.h>
  #include <linux/completion.h>
 +#include <linux/perf_counter.h>
  #include <linux/pid.h>
  #include <linux/percpu.h>
  #include <linux/topology.h>
@@@ -1032,8 -1031,6 +1032,8 @@@ struct sched_entity 
        u64                     last_wakeup;
        u64                     avg_overlap;
  
 +      u64                     nr_migrations;
 +
  #ifdef CONFIG_SCHEDSTATS
        u64                     wait_start;
        u64                     wait_max;
        u64                     exec_max;
        u64                     slice_max;
  
 -      u64                     nr_migrations;
        u64                     nr_migrations_cold;
        u64                     nr_failed_migrations_affine;
        u64                     nr_failed_migrations_running;
@@@ -1159,10 -1157,9 +1159,9 @@@ struct task_struct 
        pid_t pid;
        pid_t tgid;
  
- #ifdef CONFIG_CC_STACKPROTECTOR
        /* Canary value for the -fstack-protector gcc feature */
        unsigned long stack_canary;
- #endif
        /* 
         * pointers to (original) parent process, youngest child, younger sibling,
         * older sibling, respectively.  (p->father can be replaced with 
        struct list_head pi_state_list;
        struct futex_pi_state *pi_state_cache;
  #endif
 +      struct perf_counter_context perf_counter_ctx;
  #ifdef CONFIG_NUMA
        struct mempolicy *mempolicy;
        short il_next;
@@@ -2069,6 -2065,19 +2068,19 @@@ static inline int object_is_on_stack(vo
  
  extern void thread_info_cache_init(void);
  
+ #ifdef CONFIG_DEBUG_STACK_USAGE
+ static inline unsigned long stack_not_used(struct task_struct *p)
+ {
+       unsigned long *n = end_of_stack(p);
+       do {    /* Skip over canary */
+               n++;
+       } while (!*n);
+       return (unsigned long)n - (unsigned long)end_of_stack(p);
+ }
+ #endif
  /* set thread flags in other task's structures
   * - see asm/thread_info.h for TIF_xxxx flags available
   */
@@@ -2325,13 -2334,6 +2337,13 @@@ static inline void inc_syscw(struct tas
  #define TASK_SIZE_OF(tsk)     TASK_SIZE
  #endif
  
 +/*
 + * Call the function if the target task is executing on a CPU right now:
 + */
 +extern void task_oncpu_function_call(struct task_struct *p,
 +                                   void (*func) (void *info), void *info);
 +
 +
  #ifdef CONFIG_MM_OWNER
  extern void mm_update_next_owner(struct mm_struct *mm);
  extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);
diff --combined kernel/exit.c
@@@ -159,9 -159,6 +159,9 @@@ static void delayed_put_task_struct(str
  {
        struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
  
 +#ifdef CONFIG_PERF_COUNTERS
 +      WARN_ON_ONCE(!list_empty(&tsk->perf_counter_ctx.counter_list));
 +#endif
        trace_sched_process_free(tsk);
        put_task_struct(tsk);
  }
@@@ -980,12 -977,9 +980,9 @@@ static void check_stack_usage(void
  {
        static DEFINE_SPINLOCK(low_water_lock);
        static int lowest_to_date = THREAD_SIZE;
-       unsigned long *n = end_of_stack(current);
        unsigned long free;
  
-       while (*n == 0)
-               n++;
-       free = (unsigned long)n - (unsigned long)end_of_stack(current);
+       free = stack_not_used(current);
  
        if (free >= lowest_to_date)
                return;
@@@ -1096,6 -1090,10 +1093,6 @@@ NORET_TYPE void do_exit(long code
        tsk->mempolicy = NULL;
  #endif
  #ifdef CONFIG_FUTEX
 -      /*
 -       * This must happen late, after the PID is not
 -       * hashed anymore:
 -       */
        if (unlikely(!list_empty(&tsk->pi_state_list)))
                exit_pi_state_list(tsk);
        if (unlikely(current->pi_state_cache))
@@@ -1362,12 -1360,6 +1359,12 @@@ static int wait_task_zombie(struct task
         */
        read_unlock(&tasklist_lock);
  
 +      /*
 +       * Flush inherited counters to the parent - before the parent
 +       * gets woken up by child-exit notifications.
 +       */
 +      perf_counter_exit_task(p);
 +
        retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
        status = (p->signal->flags & SIGNAL_GROUP_EXIT)
                ? p->signal->group_exit_code : p->exit_code;
diff --combined kernel/fork.c
@@@ -61,6 -61,7 +61,7 @@@
  #include <linux/proc_fs.h>
  #include <linux/blkdev.h>
  #include <trace/sched.h>
+ #include <linux/magic.h>
  
  #include <asm/pgtable.h>
  #include <asm/pgalloc.h>
@@@ -212,6 -213,8 +213,8 @@@ static struct task_struct *dup_task_str
  {
        struct task_struct *tsk;
        struct thread_info *ti;
+       unsigned long *stackend;
        int err;
  
        prepare_to_copy(orig);
                goto out;
  
        setup_thread_stack(tsk, orig);
+       stackend = end_of_stack(tsk);
+       *stackend = STACK_END_MAGIC;    /* for overflow detection */
  
  #ifdef CONFIG_CC_STACKPROTECTOR
        tsk->stack_canary = get_random_int();
@@@ -985,7 -990,6 +990,7 @@@ static struct task_struct *copy_process
                goto fork_out;
  
        rt_mutex_init_task(p);
 +      perf_counter_init_task(p);
  
  #ifdef CONFIG_PROVE_LOCKING
        DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
diff --combined kernel/sched.c
@@@ -668,7 -668,7 +668,7 @@@ static inline int cpu_of(struct rq *rq
  #define task_rq(p)            cpu_rq(task_cpu(p))
  #define cpu_curr(cpu)         (cpu_rq(cpu)->curr)
  
 -static inline void update_rq_clock(struct rq *rq)
 +inline void update_rq_clock(struct rq *rq)
  {
        rq->clock = sched_clock_cpu(cpu_of(rq));
  }
@@@ -979,26 -979,6 +979,26 @@@ static struct rq *task_rq_lock(struct t
        }
  }
  
 +void curr_rq_lock_irq_save(unsigned long *flags)
 +      __acquires(rq->lock)
 +{
 +      struct rq *rq;
 +
 +      local_irq_save(*flags);
 +      rq = cpu_rq(smp_processor_id());
 +      spin_lock(&rq->lock);
 +}
 +
 +void curr_rq_unlock_irq_restore(unsigned long *flags)
 +      __releases(rq->lock)
 +{
 +      struct rq *rq;
 +
 +      rq = cpu_rq(smp_processor_id());
 +      spin_unlock(&rq->lock);
 +      local_irq_restore(*flags);
 +}
 +
  void task_rq_unlock_wait(struct task_struct *p)
  {
        struct rq *rq = task_rq(p);
@@@ -1905,14 -1885,12 +1905,14 @@@ void set_task_cpu(struct task_struct *p
                p->se.sleep_start -= clock_offset;
        if (p->se.block_start)
                p->se.block_start -= clock_offset;
 +#endif
        if (old_cpu != new_cpu) {
 -              schedstat_inc(p, se.nr_migrations);
 +              p->se.nr_migrations++;
 +#ifdef CONFIG_SCHEDSTATS
                if (task_hot(p, old_rq->clock, NULL))
                        schedstat_inc(p, se.nr_forced2_migrations);
 -      }
  #endif
 +      }
        p->se.vruntime -= old_cfsrq->min_vruntime -
                                         new_cfsrq->min_vruntime;
  
@@@ -2264,27 -2242,6 +2264,27 @@@ static int sched_balance_self(int cpu, 
  
  #endif /* CONFIG_SMP */
  
 +/**
 + * task_oncpu_function_call - call a function on the cpu on which a task runs
 + * @p:                the task to evaluate
 + * @func:     the function to be called
 + * @info:     the function call argument
 + *
 + * Calls the function @func when the task is currently running. This might
 + * be on the current CPU, which just calls the function directly
 + */
 +void task_oncpu_function_call(struct task_struct *p,
 +                            void (*func) (void *info), void *info)
 +{
 +      int cpu;
 +
 +      preempt_disable();
 +      cpu = task_cpu(p);
 +      if (task_curr(p))
 +              smp_call_function_single(cpu, func, info, 1);
 +      preempt_enable();
 +}
 +
  /***
   * try_to_wake_up - wake up a thread
   * @p: the to-be-woken-up thread
@@@ -2427,7 -2384,6 +2427,7 @@@ static void __sched_fork(struct task_st
        p->se.exec_start                = 0;
        p->se.sum_exec_runtime          = 0;
        p->se.prev_sum_exec_runtime     = 0;
 +      p->se.nr_migrations             = 0;
        p->se.last_wakeup               = 0;
        p->se.avg_overlap               = 0;
  
@@@ -2648,7 -2604,6 +2648,7 @@@ static void finish_task_switch(struct r
         */
        prev_state = prev->state;
        finish_arch_switch(prev);
 +      perf_counter_task_sched_in(current, cpu_of(rq));
        finish_lock_switch(rq, prev);
  #ifdef CONFIG_SMP
        if (current->sched_class->post_schedule)
@@@ -4177,29 -4132,6 +4177,29 @@@ EXPORT_PER_CPU_SYMBOL(kstat)
   * Return any ns on the sched_clock that have not yet been banked in
   * @p in case that task is currently running.
   */
 +unsigned long long __task_delta_exec(struct task_struct *p, int update)
 +{
 +      s64 delta_exec;
 +      struct rq *rq;
 +
 +      rq = task_rq(p);
 +      WARN_ON_ONCE(!runqueue_is_locked());
 +      WARN_ON_ONCE(!task_current(rq, p));
 +
 +      if (update)
 +              update_rq_clock(rq);
 +
 +      delta_exec = rq->clock - p->se.exec_start;
 +
 +      WARN_ON_ONCE(delta_exec < 0);
 +
 +      return delta_exec;
 +}
 +
 +/*
 + * Return any ns on the sched_clock that have not yet been banked in
 + * @p in case that task is currently running.
 + */
  unsigned long long task_delta_exec(struct task_struct *p)
  {
        unsigned long flags;
@@@ -4459,7 -4391,6 +4459,7 @@@ void scheduler_tick(void
        update_rq_clock(rq);
        update_cpu_load(rq);
        curr->sched_class->task_tick(rq, curr, 0);
 +      perf_counter_task_tick(curr, cpu);
        spin_unlock(&rq->lock);
  
  #ifdef CONFIG_SMP
@@@ -4655,7 -4586,6 +4655,7 @@@ need_resched_nonpreemptible
  
        if (likely(prev != next)) {
                sched_info_switch(prev, next);
 +              perf_counter_task_sched_out(prev, cpu);
  
                rq->nr_switches++;
                rq->curr = next;
@@@ -6009,12 -5939,7 +6009,7 @@@ void sched_show_task(struct task_struc
                printk(KERN_CONT " %016lx ", thread_saved_pc(p));
  #endif
  #ifdef CONFIG_DEBUG_STACK_USAGE
-       {
-               unsigned long *n = end_of_stack(p);
-               while (!*n)
-                       n++;
-               free = (unsigned long)n - (unsigned long)end_of_stack(p);
-       }
+       free = stack_not_used(p);
  #endif
        printk(KERN_CONT "%5lu %5d %6d\n", free,
                task_pid_nr(p), task_pid_nr(p->real_parent));