itimers: remove the per-cpu-ish-ness
Peter Zijlstra [Mon, 24 Nov 2008 16:06:57 +0000 (17:06 +0100)]
Either we bounce once cacheline per cpu per tick, yielding n^2 bounces
or we just bounce a single..

Also, using per-cpu allocations for the thread-groups complicates the
per-cpu allocator in that its currently aimed to be a fixed sized
allocator and the only possible extention to that would be vmap based,
which is seriously constrained on 32 bit archs.

So making the per-cpu memory requirement depend on the number of
processes is an issue.

Lastly, it didn't deal with cpu-hotplug, although admittedly that might
be fixable.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

include/linux/init_task.h
include/linux/sched.h
kernel/fork.c
kernel/posix-cpu-timers.c
kernel/sched_stats.h

index 2f3c2d4..ea0ea1a 100644 (file)
@@ -48,6 +48,12 @@ extern struct fs_struct init_fs;
        .posix_timers    = LIST_HEAD_INIT(sig.posix_timers),            \
        .cpu_timers     = INIT_CPU_TIMERS(sig.cpu_timers),              \
        .rlim           = INIT_RLIMITS,                                 \
+       .cputime        = { .totals = {                                 \
+               .utime = cputime_zero,                                  \
+               .stime = cputime_zero,                                  \
+               .sum_exec_runtime = 0,                                  \
+               .lock = __SPIN_LOCK_UNLOCKED(sig.cputime.totals.lock),  \
+       }, },                                                           \
 }
 
 extern struct nsproxy init_nsproxy;
index 4cae9b8..c20943e 100644 (file)
@@ -450,6 +450,7 @@ struct task_cputime {
        cputime_t utime;
        cputime_t stime;
        unsigned long long sum_exec_runtime;
+       spinlock_t lock;
 };
 /* Alternate field names when used to cache expirations. */
 #define prof_exp       stime
@@ -465,7 +466,7 @@ struct task_cputime {
  * used for thread group CPU clock calculations.
  */
 struct thread_group_cputime {
-       struct task_cputime *totals;
+       struct task_cputime totals;
 };
 
 /*
@@ -2180,24 +2181,30 @@ static inline int spin_needbreak(spinlock_t *lock)
  * Thread group CPU time accounting.
  */
 
-extern int thread_group_cputime_alloc(struct task_struct *);
-extern void thread_group_cputime(struct task_struct *, struct task_cputime *);
-
-static inline void thread_group_cputime_init(struct signal_struct *sig)
+static inline
+void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 {
-       sig->cputime.totals = NULL;
+       struct task_cputime *totals = &tsk->signal->cputime.totals;
+       unsigned long flags;
+
+       spin_lock_irqsave(&totals->lock, flags);
+       *times = *totals;
+       spin_unlock_irqrestore(&totals->lock, flags);
 }
 
-static inline int thread_group_cputime_clone_thread(struct task_struct *curr)
+static inline void thread_group_cputime_init(struct signal_struct *sig)
 {
-       if (curr->signal->cputime.totals)
-               return 0;
-       return thread_group_cputime_alloc(curr);
+       sig->cputime.totals = (struct task_cputime){
+               .utime = cputime_zero,
+               .stime = cputime_zero,
+               .sum_exec_runtime = 0,
+       };
+
+       spin_lock_init(&sig->cputime.totals.lock);
 }
 
 static inline void thread_group_cputime_free(struct signal_struct *sig)
 {
-       free_percpu(sig->cputime.totals);
 }
 
 /*
index 7b8f2a7..7087d8c 100644 (file)
@@ -820,14 +820,15 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
        int ret;
 
        if (clone_flags & CLONE_THREAD) {
-               ret = thread_group_cputime_clone_thread(current);
-               if (likely(!ret)) {
-                       atomic_inc(&current->signal->count);
-                       atomic_inc(&current->signal->live);
-               }
-               return ret;
+               atomic_inc(&current->signal->count);
+               atomic_inc(&current->signal->live);
+               return 0;
        }
        sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
+
+       if (sig)
+               posix_cpu_timers_init_group(sig);
+
        tsk->signal = sig;
        if (!sig)
                return -ENOMEM;
@@ -864,8 +865,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
        memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
        task_unlock(current->group_leader);
 
-       posix_cpu_timers_init_group(sig);
-
        acct_init_pacct(&sig->pacct);
 
        tty_audit_fork(sig);
index 157de3a..fa07da9 100644 (file)
 #include <linux/kernel_stat.h>
 
 /*
- * Allocate the thread_group_cputime structure appropriately and fill in the
- * current values of the fields.  Called from copy_signal() via
- * thread_group_cputime_clone_thread() when adding a second or subsequent
- * thread to a thread group.  Assumes interrupts are enabled when called.
- */
-int thread_group_cputime_alloc(struct task_struct *tsk)
-{
-       struct signal_struct *sig = tsk->signal;
-       struct task_cputime *cputime;
-
-       /*
-        * If we have multiple threads and we don't already have a
-        * per-CPU task_cputime struct (checked in the caller), allocate
-        * one and fill it in with the times accumulated so far.  We may
-        * race with another thread so recheck after we pick up the sighand
-        * lock.
-        */
-       cputime = alloc_percpu(struct task_cputime);
-       if (cputime == NULL)
-               return -ENOMEM;
-       spin_lock_irq(&tsk->sighand->siglock);
-       if (sig->cputime.totals) {
-               spin_unlock_irq(&tsk->sighand->siglock);
-               free_percpu(cputime);
-               return 0;
-       }
-       sig->cputime.totals = cputime;
-       cputime = per_cpu_ptr(sig->cputime.totals, smp_processor_id());
-       cputime->utime = tsk->utime;
-       cputime->stime = tsk->stime;
-       cputime->sum_exec_runtime = tsk->se.sum_exec_runtime;
-       spin_unlock_irq(&tsk->sighand->siglock);
-       return 0;
-}
-
-/**
- * thread_group_cputime - Sum the thread group time fields across all CPUs.
- *
- * @tsk:       The task we use to identify the thread group.
- * @times:     task_cputime structure in which we return the summed fields.
- *
- * Walk the list of CPUs to sum the per-CPU time fields in the thread group
- * time structure.
- */
-void thread_group_cputime(
-       struct task_struct *tsk,
-       struct task_cputime *times)
-{
-       struct task_cputime *totals, *tot;
-       int i;
-
-       totals = tsk->signal->cputime.totals;
-       if (!totals) {
-               times->utime = tsk->utime;
-               times->stime = tsk->stime;
-               times->sum_exec_runtime = tsk->se.sum_exec_runtime;
-               return;
-       }
-
-       times->stime = times->utime = cputime_zero;
-       times->sum_exec_runtime = 0;
-       for_each_possible_cpu(i) {
-               tot = per_cpu_ptr(totals, i);
-               times->utime = cputime_add(times->utime, tot->utime);
-               times->stime = cputime_add(times->stime, tot->stime);
-               times->sum_exec_runtime += tot->sum_exec_runtime;
-       }
-}
-
-/*
  * Called after updating RLIMIT_CPU to set timer expiration if necessary.
  */
 void update_rlimit_cpu(unsigned long rlim_new)
index f2773b5..8ab0cef 100644 (file)
@@ -296,6 +296,7 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
 static inline void account_group_user_time(struct task_struct *tsk,
                                           cputime_t cputime)
 {
+       struct task_cputime *times;
        struct signal_struct *sig;
 
        /* tsk == current, ensure it is safe to use ->signal */
@@ -303,13 +304,11 @@ static inline void account_group_user_time(struct task_struct *tsk,
                return;
 
        sig = tsk->signal;
-       if (sig->cputime.totals) {
-               struct task_cputime *times;
+       times = &sig->cputime.totals;
 
-               times = per_cpu_ptr(sig->cputime.totals, get_cpu());
-               times->utime = cputime_add(times->utime, cputime);
-               put_cpu_no_resched();
-       }
+       spin_lock(&times->lock);
+       times->utime = cputime_add(times->utime, cputime);
+       spin_unlock(&times->lock);
 }
 
 /**
@@ -325,6 +324,7 @@ static inline void account_group_user_time(struct task_struct *tsk,
 static inline void account_group_system_time(struct task_struct *tsk,
                                             cputime_t cputime)
 {
+       struct task_cputime *times;
        struct signal_struct *sig;
 
        /* tsk == current, ensure it is safe to use ->signal */
@@ -332,13 +332,11 @@ static inline void account_group_system_time(struct task_struct *tsk,
                return;
 
        sig = tsk->signal;
-       if (sig->cputime.totals) {
-               struct task_cputime *times;
+       times = &sig->cputime.totals;
 
-               times = per_cpu_ptr(sig->cputime.totals, get_cpu());
-               times->stime = cputime_add(times->stime, cputime);
-               put_cpu_no_resched();
-       }
+       spin_lock(&times->lock);
+       times->stime = cputime_add(times->stime, cputime);
+       spin_unlock(&times->lock);
 }
 
 /**
@@ -354,6 +352,7 @@ static inline void account_group_system_time(struct task_struct *tsk,
 static inline void account_group_exec_runtime(struct task_struct *tsk,
                                              unsigned long long ns)
 {
+       struct task_cputime *times;
        struct signal_struct *sig;
 
        sig = tsk->signal;
@@ -362,11 +361,9 @@ static inline void account_group_exec_runtime(struct task_struct *tsk,
        if (unlikely(!sig))
                return;
 
-       if (sig->cputime.totals) {
-               struct task_cputime *times;
+       times = &sig->cputime.totals;
 
-               times = per_cpu_ptr(sig->cputime.totals, get_cpu());
-               times->sum_exec_runtime += ns;
-               put_cpu_no_resched();
-       }
+       spin_lock(&times->lock);
+       times->sum_exec_runtime += ns;
+       spin_unlock(&times->lock);
 }