Merge branch 'linus' into timers/core
Thomas Gleixner [Mon, 10 May 2010 09:59:37 +0000 (11:59 +0200)]
Reason: Further posix_cpu_timer patches depend on mainline changes

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

12 files changed:
Documentation/feature-removal-schedule.txt
include/linux/hrtimer.h
include/linux/time.h
include/linux/timer.h
include/linux/timex.h
ipc/mqueue.c
kernel/hrtimer.c
kernel/posix-cpu-timers.c
kernel/time.c
kernel/time/ntp.c
kernel/time/timekeeping.c
kernel/timer.c

index 05df0b7..b93b781 100644 (file)
@@ -564,6 +564,16 @@ Who:       Avi Kivity <avi@redhat.com>
 
 ----------------------------
 
+What:  xtime, wall_to_monotonic
+When:  2.6.36+
+Files: kernel/time/timekeeping.c include/linux/time.h
+Why:   Cleaning up timekeeping internal values. Please use
+       existing timekeeping accessor functions to access
+       the equivalent functionality.
+Who:   John Stultz <johnstul@us.ibm.com>
+
+----------------------------
+
 What:  KVM kernel-allocated memory slots
 When:  July 2010
 Why:   Since 2.6.25, kvm supports user-allocated memory slots, which are
index 5d86fb2..fd0c1b8 100644 (file)
@@ -422,6 +422,8 @@ extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
 
 extern int schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
                                                const enum hrtimer_mode mode);
+extern int schedule_hrtimeout_range_clock(ktime_t *expires,
+               unsigned long delta, const enum hrtimer_mode mode, int clock);
 extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode);
 
 /* Soft interrupt function to run the hrtimer queues: */
index 6e026e4..ea3559f 100644 (file)
@@ -150,7 +150,6 @@ extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
 extern int timekeeping_valid_for_hres(void);
 extern u64 timekeeping_max_deferment(void);
 extern void update_wall_time(void);
-extern void update_xtime_cache(u64 nsec);
 extern void timekeeping_leap_insert(int leapsecond);
 
 struct tms;
index a2d1eb6..ea965b8 100644 (file)
 struct tvec_base;
 
 struct timer_list {
+       /*
+        * All fields that change during normal runtime grouped to the
+        * same cacheline
+        */
        struct list_head entry;
        unsigned long expires;
+       struct tvec_base *base;
 
        void (*function)(unsigned long);
        unsigned long data;
 
-       struct tvec_base *base;
+       int slack;
+
 #ifdef CONFIG_TIMER_STATS
        void *start_site;
        char start_comm[16];
@@ -165,6 +171,8 @@ extern int mod_timer(struct timer_list *timer, unsigned long expires);
 extern int mod_timer_pending(struct timer_list *timer, unsigned long expires);
 extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires);
 
+extern void set_timer_slack(struct timer_list *time, int slack_hz);
+
 #define TIMER_NOT_PINNED       0
 #define TIMER_PINNED           1
 /*
index 7a082b3..32d852f 100644 (file)
@@ -232,13 +232,11 @@ struct timex {
  */
 extern unsigned long tick_usec;                /* USER_HZ period (usec) */
 extern unsigned long tick_nsec;                /* ACTHZ          period (nsec) */
-extern int tickadj;                    /* amount of adjustment per tick */
 
 /*
  * phase-lock loop variables
  */
 extern int time_status;                /* clock synchronization status bits */
-extern long time_adjust;       /* The amount of adjtime left */
 
 extern void ntp_init(void);
 extern void ntp_clear(void);
@@ -271,9 +269,6 @@ extern void second_overflow(void);
 extern void update_ntp_one_tick(void);
 extern int do_adjtimex(struct timex *);
 
-/* Don't use! Compatibility define for existing users. */
-#define tickadj        (500/HZ ? : 1)
-
 int read_current_timer(unsigned long *timer_val);
 
 /* The clock frequency of the i8253/i8254 PIT */
index 722b013..d6c09c4 100644 (file)
@@ -429,7 +429,7 @@ static void wq_add(struct mqueue_inode_info *info, int sr,
  * sr: SEND or RECV
  */
 static int wq_sleep(struct mqueue_inode_info *info, int sr,
-                       long timeout, struct ext_wait_queue *ewp)
+                   ktime_t *timeout, struct ext_wait_queue *ewp)
 {
        int retval;
        signed long time;
@@ -440,7 +440,8 @@ static int wq_sleep(struct mqueue_inode_info *info, int sr,
                set_current_state(TASK_INTERRUPTIBLE);
 
                spin_unlock(&info->lock);
-               time = schedule_timeout(timeout);
+               time = schedule_hrtimeout_range_clock(timeout,
+                   HRTIMER_MODE_ABS, 0, CLOCK_REALTIME);
 
                while (ewp->state == STATE_PENDING)
                        cpu_relax();
@@ -552,31 +553,16 @@ static void __do_notify(struct mqueue_inode_info *info)
        wake_up(&info->wait_q);
 }
 
-static long prepare_timeout(struct timespec *p)
+static int prepare_timeout(const struct timespec __user *u_abs_timeout,
+                          ktime_t *expires, struct timespec *ts)
 {
-       struct timespec nowts;
-       long timeout;
-
-       if (p) {
-               if (unlikely(p->tv_nsec < 0 || p->tv_sec < 0
-                       || p->tv_nsec >= NSEC_PER_SEC))
-                       return -EINVAL;
-               nowts = CURRENT_TIME;
-               /* first subtract as jiffies can't be too big */
-               p->tv_sec -= nowts.tv_sec;
-               if (p->tv_nsec < nowts.tv_nsec) {
-                       p->tv_nsec += NSEC_PER_SEC;
-                       p->tv_sec--;
-               }
-               p->tv_nsec -= nowts.tv_nsec;
-               if (p->tv_sec < 0)
-                       return 0;
-
-               timeout = timespec_to_jiffies(p) + 1;
-       } else
-               return MAX_SCHEDULE_TIMEOUT;
+       if (copy_from_user(ts, u_abs_timeout, sizeof(struct timespec)))
+               return -EFAULT;
+       if (!timespec_valid(ts))
+               return -EINVAL;
 
-       return timeout;
+       *expires = timespec_to_ktime(*ts);
+       return 0;
 }
 
 static void remove_notification(struct mqueue_inode_info *info)
@@ -862,22 +848,21 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
        struct ext_wait_queue *receiver;
        struct msg_msg *msg_ptr;
        struct mqueue_inode_info *info;
-       struct timespec ts, *p = NULL;
-       long timeout;
+       ktime_t expires, *timeout = NULL;
+       struct timespec ts;
        int ret;
 
        if (u_abs_timeout) {
-               if (copy_from_user(&ts, u_abs_timeout, 
-                                       sizeof(struct timespec)))
-                       return -EFAULT;
-               p = &ts;
+               int res = prepare_timeout(u_abs_timeout, &expires, &ts);
+               if (res)
+                       return res;
+               timeout = &expires;
        }
 
        if (unlikely(msg_prio >= (unsigned long) MQ_PRIO_MAX))
                return -EINVAL;
 
-       audit_mq_sendrecv(mqdes, msg_len, msg_prio, p);
-       timeout = prepare_timeout(p);
+       audit_mq_sendrecv(mqdes, msg_len, msg_prio, timeout ? &ts : NULL);
 
        filp = fget(mqdes);
        if (unlikely(!filp)) {
@@ -919,9 +904,6 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
                if (filp->f_flags & O_NONBLOCK) {
                        spin_unlock(&info->lock);
                        ret = -EAGAIN;
-               } else if (unlikely(timeout < 0)) {
-                       spin_unlock(&info->lock);
-                       ret = timeout;
                } else {
                        wait.task = current;
                        wait.msg = (void *) msg_ptr;
@@ -954,24 +936,23 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
                size_t, msg_len, unsigned int __user *, u_msg_prio,
                const struct timespec __user *, u_abs_timeout)
 {
-       long timeout;
        ssize_t ret;
        struct msg_msg *msg_ptr;
        struct file *filp;
        struct inode *inode;
        struct mqueue_inode_info *info;
        struct ext_wait_queue wait;
-       struct timespec ts, *p = NULL;
+       ktime_t expires, *timeout = NULL;
+       struct timespec ts;
 
        if (u_abs_timeout) {
-               if (copy_from_user(&ts, u_abs_timeout, 
-                                       sizeof(struct timespec)))
-                       return -EFAULT;
-               p = &ts;
+               int res = prepare_timeout(u_abs_timeout, &expires, &ts);
+               if (res)
+                       return res;
+               timeout = &expires;
        }
 
-       audit_mq_sendrecv(mqdes, msg_len, 0, p);
-       timeout = prepare_timeout(p);
+       audit_mq_sendrecv(mqdes, msg_len, 0, timeout ? &ts : NULL);
 
        filp = fget(mqdes);
        if (unlikely(!filp)) {
@@ -1003,11 +984,6 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
                if (filp->f_flags & O_NONBLOCK) {
                        spin_unlock(&info->lock);
                        ret = -EAGAIN;
-                       msg_ptr = NULL;
-               } else if (unlikely(timeout < 0)) {
-                       spin_unlock(&info->lock);
-                       ret = timeout;
-                       msg_ptr = NULL;
                } else {
                        wait.task = current;
                        wait.state = STATE_NONE;
index 0086628..b9b134b 100644 (file)
@@ -1749,35 +1749,15 @@ void __init hrtimers_init(void)
 }
 
 /**
- * schedule_hrtimeout_range - sleep until timeout
+ * schedule_hrtimeout_range_clock - sleep until timeout
  * @expires:   timeout value (ktime_t)
  * @delta:     slack in expires timeout (ktime_t)
  * @mode:      timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
- *
- * Make the current task sleep until the given expiry time has
- * elapsed. The routine will return immediately unless
- * the current task state has been set (see set_current_state()).
- *
- * The @delta argument gives the kernel the freedom to schedule the
- * actual wakeup to a time that is both power and performance friendly.
- * The kernel give the normal best effort behavior for "@expires+@delta",
- * but may decide to fire the timer earlier, but no earlier than @expires.
- *
- * You can set the task state as follows -
- *
- * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
- * pass before the routine returns.
- *
- * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
- * delivered to the current task.
- *
- * The current task state is guaranteed to be TASK_RUNNING when this
- * routine returns.
- *
- * Returns 0 when the timer has expired otherwise -EINTR
+ * @clock:     timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME
  */
-int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
-                              const enum hrtimer_mode mode)
+int __sched
+schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta,
+                              const enum hrtimer_mode mode, int clock)
 {
        struct hrtimer_sleeper t;
 
@@ -1799,7 +1779,7 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
                return -EINTR;
        }
 
-       hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode);
+       hrtimer_init_on_stack(&t.timer, clock, mode);
        hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
 
        hrtimer_init_sleeper(&t, current);
@@ -1818,6 +1798,41 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
 
        return !t.task ? 0 : -EINTR;
 }
+
+/**
+ * schedule_hrtimeout_range - sleep until timeout
+ * @expires:   timeout value (ktime_t)
+ * @delta:     slack in expires timeout (ktime_t)
+ * @mode:      timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
+ *
+ * Make the current task sleep until the given expiry time has
+ * elapsed. The routine will return immediately unless
+ * the current task state has been set (see set_current_state()).
+ *
+ * The @delta argument gives the kernel the freedom to schedule the
+ * actual wakeup to a time that is both power and performance friendly.
+ * The kernel give the normal best effort behavior for "@expires+@delta",
+ * but may decide to fire the timer earlier, but no earlier than @expires.
+ *
+ * You can set the task state as follows -
+ *
+ * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
+ * pass before the routine returns.
+ *
+ * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
+ * delivered to the current task.
+ *
+ * The current task state is guaranteed to be TASK_RUNNING when this
+ * routine returns.
+ *
+ * Returns 0 when the timer has expired otherwise -EINTR
+ */
+int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
+                                    const enum hrtimer_mode mode)
+{
+       return schedule_hrtimeout_range_clock(expires, delta, mode,
+                                             CLOCK_MONOTONIC);
+}
 EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
 
 /**
index bc7704b..799f360 100644 (file)
 #include <trace/events/timer.h>
 
 /*
- * Called after updating RLIMIT_CPU to set timer expiration if necessary.
+ * Called after updating RLIMIT_CPU to run cpu timer and update
+ * tsk->signal->cputime_expires expiration cache if necessary. Needs
+ * siglock protection since other code may update expiration cache as
+ * well.
  */
 void update_rlimit_cpu(unsigned long rlim_new)
 {
        cputime_t cputime = secs_to_cputime(rlim_new);
-       struct signal_struct *const sig = current->signal;
 
-       if (cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) ||
-           cputime_gt(sig->it[CPUCLOCK_PROF].expires, cputime)) {
-               spin_lock_irq(&current->sighand->siglock);
-               set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
-               spin_unlock_irq(&current->sighand->siglock);
-       }
+       spin_lock_irq(&current->sighand->siglock);
+       set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
+       spin_unlock_irq(&current->sighand->siglock);
 }
 
 static int check_clock(const clockid_t which_clock)
@@ -548,111 +547,62 @@ static inline int expires_gt(cputime_t expires, cputime_t new_exp)
               cputime_gt(expires, new_exp);
 }
 
-static inline int expires_le(cputime_t expires, cputime_t new_exp)
-{
-       return !cputime_eq(expires, cputime_zero) &&
-              cputime_le(expires, new_exp);
-}
 /*
  * Insert the timer on the appropriate list before any timers that
  * expire later.  This must be called with the tasklist_lock held
- * for reading, and interrupts disabled.
+ * for reading, interrupts disabled and p->sighand->siglock taken.
  */
-static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
+static void arm_timer(struct k_itimer *timer)
 {
        struct task_struct *p = timer->it.cpu.task;
        struct list_head *head, *listpos;
+       struct task_cputime *cputime_expires;
        struct cpu_timer_list *const nt = &timer->it.cpu;
        struct cpu_timer_list *next;
-       unsigned long i;
 
-       head = (CPUCLOCK_PERTHREAD(timer->it_clock) ?
-               p->cpu_timers : p->signal->cpu_timers);
+       if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
+               head = p->cpu_timers;
+               cputime_expires = &p->cputime_expires;
+       } else {
+               head = p->signal->cpu_timers;
+               cputime_expires = &p->signal->cputime_expires;
+       }
        head += CPUCLOCK_WHICH(timer->it_clock);
 
-       BUG_ON(!irqs_disabled());
-       spin_lock(&p->sighand->siglock);
-
        listpos = head;
-       if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) {
-               list_for_each_entry(next, head, entry) {
-                       if (next->expires.sched > nt->expires.sched)
-                               break;
-                       listpos = &next->entry;
-               }
-       } else {
-               list_for_each_entry(next, head, entry) {
-                       if (cputime_gt(next->expires.cpu, nt->expires.cpu))
-                               break;
-                       listpos = &next->entry;
-               }
+       list_for_each_entry(next, head, entry) {
+               if (cpu_time_before(timer->it_clock, nt->expires, next->expires))
+                       break;
+               listpos = &next->entry;
        }
        list_add(&nt->entry, listpos);
 
        if (listpos == head) {
+               union cpu_time_count *exp = &nt->expires;
+
                /*
-                * We are the new earliest-expiring timer.
-                * If we are a thread timer, there can always
-                * be a process timer telling us to stop earlier.
+                * We are the new earliest-expiring POSIX 1.b timer, hence
+                * need to update expiration cache. Take into account that
+                * for process timers we share expiration cache with itimers
+                * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
                 */
 
-               if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
-                       union cpu_time_count *exp = &nt->expires;
-
-                       switch (CPUCLOCK_WHICH(timer->it_clock)) {
-                       default:
-                               BUG();
-                       case CPUCLOCK_PROF:
-                               if (expires_gt(p->cputime_expires.prof_exp,
-                                              exp->cpu))
-                                       p->cputime_expires.prof_exp = exp->cpu;
-                               break;
-                       case CPUCLOCK_VIRT:
-                               if (expires_gt(p->cputime_expires.virt_exp,
-                                              exp->cpu))
-                                       p->cputime_expires.virt_exp = exp->cpu;
-                               break;
-                       case CPUCLOCK_SCHED:
-                               if (p->cputime_expires.sched_exp == 0 ||
-                                   p->cputime_expires.sched_exp > exp->sched)
-                                       p->cputime_expires.sched_exp =
-                                                               exp->sched;
-                               break;
-                       }
-               } else {
-                       struct signal_struct *const sig = p->signal;
-                       union cpu_time_count *exp = &timer->it.cpu.expires;
-
-                       /*
-                        * For a process timer, set the cached expiration time.
-                        */
-                       switch (CPUCLOCK_WHICH(timer->it_clock)) {
-                       default:
-                               BUG();
-                       case CPUCLOCK_VIRT:
-                               if (expires_le(sig->it[CPUCLOCK_VIRT].expires,
-                                              exp->cpu))
-                                       break;
-                               sig->cputime_expires.virt_exp = exp->cpu;
-                               break;
-                       case CPUCLOCK_PROF:
-                               if (expires_le(sig->it[CPUCLOCK_PROF].expires,
-                                              exp->cpu))
-                                       break;
-                               i = sig->rlim[RLIMIT_CPU].rlim_cur;
-                               if (i != RLIM_INFINITY &&
-                                   i <= cputime_to_secs(exp->cpu))
-                                       break;
-                               sig->cputime_expires.prof_exp = exp->cpu;
-                               break;
-                       case CPUCLOCK_SCHED:
-                               sig->cputime_expires.sched_exp = exp->sched;
-                               break;
-                       }
+               switch (CPUCLOCK_WHICH(timer->it_clock)) {
+               case CPUCLOCK_PROF:
+                       if (expires_gt(cputime_expires->prof_exp, exp->cpu))
+                               cputime_expires->prof_exp = exp->cpu;
+                       break;
+               case CPUCLOCK_VIRT:
+                       if (expires_gt(cputime_expires->virt_exp, exp->cpu))
+                               cputime_expires->virt_exp = exp->cpu;
+                       break;
+               case CPUCLOCK_SCHED:
+                       if (cputime_expires->sched_exp == 0 ||
+                           cputime_expires->sched_exp > exp->sched)
+                               cputime_expires->sched_exp = exp->sched;
+                       break;
                }
        }
-
-       spin_unlock(&p->sighand->siglock);
 }
 
 /*
@@ -660,7 +610,12 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
  */
 static void cpu_timer_fire(struct k_itimer *timer)
 {
-       if (unlikely(timer->sigq == NULL)) {
+       if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
+               /*
+                * User don't want any signal.
+                */
+               timer->it.cpu.expires.sched = 0;
+       } else if (unlikely(timer->sigq == NULL)) {
                /*
                 * This a special case for clock_nanosleep,
                 * not a normal timer from sys_timer_create.
@@ -721,7 +676,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
                        struct itimerspec *new, struct itimerspec *old)
 {
        struct task_struct *p = timer->it.cpu.task;
-       union cpu_time_count old_expires, new_expires, val;
+       union cpu_time_count old_expires, new_expires, old_incr, val;
        int ret;
 
        if (unlikely(p == NULL)) {
@@ -752,6 +707,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
        BUG_ON(!irqs_disabled());
 
        ret = 0;
+       old_incr = timer->it.cpu.incr;
        spin_lock(&p->sighand->siglock);
        old_expires = timer->it.cpu.expires;
        if (unlikely(timer->it.cpu.firing)) {
@@ -759,7 +715,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
                ret = TIMER_RETRY;
        } else
                list_del_init(&timer->it.cpu.entry);
-       spin_unlock(&p->sighand->siglock);
 
        /*
         * We need to sample the current value to convert the new
@@ -813,6 +768,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
                 * disable this firing since we are already reporting
                 * it as an overrun (thanks to bump_cpu_timer above).
                 */
+               spin_unlock(&p->sighand->siglock);
                read_unlock(&tasklist_lock);
                goto out;
        }
@@ -828,11 +784,11 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
         */
        timer->it.cpu.expires = new_expires;
        if (new_expires.sched != 0 &&
-           (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&
            cpu_time_before(timer->it_clock, val, new_expires)) {
-               arm_timer(timer, val);
+               arm_timer(timer);
        }
 
+       spin_unlock(&p->sighand->siglock);
        read_unlock(&tasklist_lock);
 
        /*
@@ -853,7 +809,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
        timer->it_overrun = -1;
 
        if (new_expires.sched != 0 &&
-           (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&
            !cpu_time_before(timer->it_clock, val, new_expires)) {
                /*
                 * The designated time already passed, so we notify
@@ -867,7 +822,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
  out:
        if (old) {
                sample_to_timespec(timer->it_clock,
-                                  timer->it.cpu.incr, &old->it_interval);
+                                  old_incr, &old->it_interval);
        }
        return ret;
 }
@@ -927,25 +882,6 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
                read_unlock(&tasklist_lock);
        }
 
-       if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
-               if (timer->it.cpu.incr.sched == 0 &&
-                   cpu_time_before(timer->it_clock,
-                                   timer->it.cpu.expires, now)) {
-                       /*
-                        * Do-nothing timer expired and has no reload,
-                        * so it's as if it was never set.
-                        */
-                       timer->it.cpu.expires.sched = 0;
-                       itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
-                       return;
-               }
-               /*
-                * Account for any expirations and reloads that should
-                * have happened.
-                */
-               bump_cpu_timer(timer, now);
-       }
-
        if (unlikely(clear_dead)) {
                /*
                 * We've noticed that the thread is dead, but
@@ -1270,6 +1206,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
                        goto out;
                }
                read_lock(&tasklist_lock); /* arm_timer needs it.  */
+               spin_lock(&p->sighand->siglock);
        } else {
                read_lock(&tasklist_lock);
                if (unlikely(p->signal == NULL)) {
@@ -1290,6 +1227,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
                        clear_dead_task(timer, now);
                        goto out_unlock;
                }
+               spin_lock(&p->sighand->siglock);
                cpu_timer_sample_group(timer->it_clock, p, &now);
                bump_cpu_timer(timer, now);
                /* Leave the tasklist_lock locked for the call below.  */
@@ -1298,7 +1236,9 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
        /*
         * Now re-arm for the new expiry time.
         */
-       arm_timer(timer, now);
+       BUG_ON(!irqs_disabled());
+       arm_timer(timer);
+       spin_unlock(&p->sighand->siglock);
 
 out_unlock:
        read_unlock(&tasklist_lock);
@@ -1390,7 +1330,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
                        return 1;
        }
 
-       return sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY;
+       return 0;
 }
 
 /*
@@ -1456,21 +1396,23 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 }
 
 /*
- * Set one of the process-wide special case CPU timers.
+ * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
  * The tsk->sighand->siglock must be held by the caller.
- * The *newval argument is relative and we update it to be absolute, *oldval
- * is absolute and we update it to be relative.
  */
 void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
                           cputime_t *newval, cputime_t *oldval)
 {
        union cpu_time_count now;
-       struct list_head *head;
 
        BUG_ON(clock_idx == CPUCLOCK_SCHED);
        cpu_timer_sample_group(clock_idx, tsk, &now);
 
        if (oldval) {
+               /*
+                * We are setting itimer. The *oldval is absolute and we update
+                * it to be relative, *newval argument is relative and we update
+                * it to be absolute.
+                */
                if (!cputime_eq(*oldval, cputime_zero)) {
                        if (cputime_le(*oldval, now.cpu)) {
                                /* Just about to fire. */
@@ -1483,33 +1425,21 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
                if (cputime_eq(*newval, cputime_zero))
                        return;
                *newval = cputime_add(*newval, now.cpu);
-
-               /*
-                * If the RLIMIT_CPU timer will expire before the
-                * ITIMER_PROF timer, we have nothing else to do.
-                */
-               if (tsk->signal->rlim[RLIMIT_CPU].rlim_cur
-                   < cputime_to_secs(*newval))
-                       return;
        }
 
        /*
-        * Check whether there are any process timers already set to fire
-        * before this one.  If so, we don't have anything more to do.
+        * Update expiration cache if we are the earliest timer, or eventually
+        * RLIMIT_CPU limit is earlier than prof_exp cpu timer expire.
         */
-       head = &tsk->signal->cpu_timers[clock_idx];
-       if (list_empty(head) ||
-           cputime_ge(list_first_entry(head,
-                                 struct cpu_timer_list, entry)->expires.cpu,
-                      *newval)) {
-               switch (clock_idx) {
-               case CPUCLOCK_PROF:
+       switch (clock_idx) {
+       case CPUCLOCK_PROF:
+               if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval))
                        tsk->signal->cputime_expires.prof_exp = *newval;
-                       break;
-               case CPUCLOCK_VIRT:
+               break;
+       case CPUCLOCK_VIRT:
+               if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval))
                        tsk->signal->cputime_expires.virt_exp = *newval;
-                       break;
-               }
+               break;
        }
 }
 
index 656dccf..50612fa 100644 (file)
@@ -132,12 +132,11 @@ SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv,
  */
 static inline void warp_clock(void)
 {
-       write_seqlock_irq(&xtime_lock);
-       wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60;
-       xtime.tv_sec += sys_tz.tz_minuteswest * 60;
-       update_xtime_cache(0);
-       write_sequnlock_irq(&xtime_lock);
-       clock_was_set();
+       struct timespec delta, adjust;
+       delta.tv_sec = sys_tz.tz_minuteswest * 60;
+       delta.tv_nsec = 0;
+       adjust = timespec_add_safe(current_kernel_time(), delta);
+       do_settimeofday(&adjust);
 }
 
 /*
index 7c0f180..c631168 100644 (file)
@@ -69,7 +69,7 @@ static s64                    time_freq;
 /* time at last adjustment (secs):                                     */
 static long                    time_reftime;
 
-long                           time_adjust;
+static long                    time_adjust;
 
 /* constant (boot-param configurable) NTP tick adjustment (upscaled)   */
 static s64                     ntp_tick_adj;
index 39f6177..caf8d4d 100644 (file)
@@ -165,13 +165,6 @@ struct timespec raw_time;
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
 
-static struct timespec xtime_cache __attribute__ ((aligned (16)));
-void update_xtime_cache(u64 nsec)
-{
-       xtime_cache = xtime;
-       timespec_add_ns(&xtime_cache, nsec);
-}
-
 /* must hold xtime_lock */
 void timekeeping_leap_insert(int leapsecond)
 {
@@ -332,8 +325,6 @@ int do_settimeofday(struct timespec *tv)
 
        xtime = *tv;
 
-       update_xtime_cache(0);
-
        timekeeper.ntp_error = 0;
        ntp_clear();
 
@@ -559,7 +550,6 @@ void __init timekeeping_init(void)
        }
        set_normalized_timespec(&wall_to_monotonic,
                                -boot.tv_sec, -boot.tv_nsec);
-       update_xtime_cache(0);
        total_sleep_time.tv_sec = 0;
        total_sleep_time.tv_nsec = 0;
        write_sequnlock_irqrestore(&xtime_lock, flags);
@@ -593,7 +583,6 @@ static int timekeeping_resume(struct sys_device *dev)
                wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
                total_sleep_time = timespec_add_safe(total_sleep_time, ts);
        }
-       update_xtime_cache(0);
        /* re-base the last cycle value */
        timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
        timekeeper.ntp_error = 0;
@@ -788,7 +777,6 @@ void update_wall_time(void)
 {
        struct clocksource *clock;
        cycle_t offset;
-       u64 nsecs;
        int shift = 0, maxshift;
 
        /* Make sure we're fully resumed: */
@@ -847,7 +835,9 @@ void update_wall_time(void)
                timekeeper.ntp_error += neg << timekeeper.ntp_error_shift;
        }
 
-       /* store full nanoseconds into xtime after rounding it up and
+
+       /*
+        * Store full nanoseconds into xtime after rounding it up and
         * add the remainder to the error difference.
         */
        xtime.tv_nsec = ((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1;
@@ -855,8 +845,15 @@ void update_wall_time(void)
        timekeeper.ntp_error += timekeeper.xtime_nsec <<
                                timekeeper.ntp_error_shift;
 
-       nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift);
-       update_xtime_cache(nsecs);
+       /*
+        * Finally, make sure that after the rounding
+        * xtime.tv_nsec isn't larger then NSEC_PER_SEC
+        */
+       if (unlikely(xtime.tv_nsec >= NSEC_PER_SEC)) {
+               xtime.tv_nsec -= NSEC_PER_SEC;
+               xtime.tv_sec++;
+               second_overflow();
+       }
 
        /* check to see if there is a new clocksource to use */
        update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
@@ -896,13 +893,13 @@ EXPORT_SYMBOL_GPL(monotonic_to_bootbased);
 
 unsigned long get_seconds(void)
 {
-       return xtime_cache.tv_sec;
+       return xtime.tv_sec;
 }
 EXPORT_SYMBOL(get_seconds);
 
 struct timespec __current_kernel_time(void)
 {
-       return xtime_cache;
+       return xtime;
 }
 
 struct timespec current_kernel_time(void)
@@ -913,7 +910,7 @@ struct timespec current_kernel_time(void)
        do {
                seq = read_seqbegin(&xtime_lock);
 
-               now = xtime_cache;
+               now = xtime;
        } while (read_seqretry(&xtime_lock, seq));
 
        return now;
@@ -928,7 +925,7 @@ struct timespec get_monotonic_coarse(void)
        do {
                seq = read_seqbegin(&xtime_lock);
 
-               now = xtime_cache;
+               now = xtime;
                mono = wall_to_monotonic;
        } while (read_seqretry(&xtime_lock, seq));
 
index aeb6a54..9199f3c 100644 (file)
@@ -319,6 +319,24 @@ unsigned long round_jiffies_up_relative(unsigned long j)
 }
 EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
 
+/**
+ * set_timer_slack - set the allowed slack for a timer
+ * @slack_hz: the amount of time (in jiffies) allowed for rounding
+ *
+ * Set the amount of time, in jiffies, that a certain timer has
+ * in terms of slack. By setting this value, the timer subsystem
+ * will schedule the actual timer somewhere between
+ * the time mod_timer() asks for, and that time plus the slack.
+ *
+ * By setting the slack to -1, a percentage of the delay is used
+ * instead.
+ */
+void set_timer_slack(struct timer_list *timer, int slack_hz)
+{
+       timer->slack = slack_hz;
+}
+EXPORT_SYMBOL_GPL(set_timer_slack);
+
 
 static inline void set_running_timer(struct tvec_base *base,
                                        struct timer_list *timer)
@@ -550,6 +568,7 @@ static void __init_timer(struct timer_list *timer,
 {
        timer->entry.next = NULL;
        timer->base = __raw_get_cpu_var(tvec_bases);
+       timer->slack = -1;
 #ifdef CONFIG_TIMER_STATS
        timer->start_site = NULL;
        timer->start_pid = -1;
@@ -715,6 +734,41 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires)
 }
 EXPORT_SYMBOL(mod_timer_pending);
 
+/*
+ * Decide where to put the timer while taking the slack into account
+ *
+ * Algorithm:
+ *   1) calculate the maximum (absolute) time
+ *   2) calculate the highest bit where the expires and new max are different
+ *   3) use this bit to make a mask
+ *   4) use the bitmask to round down the maximum time, so that all last
+ *      bits are zeros
+ */
+static inline
+unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
+{
+       unsigned long expires_limit, mask;
+       int bit;
+
+       expires_limit = expires + timer->slack;
+
+       if (timer->slack < 0) /* auto slack: use 0.4% */
+               expires_limit = expires + (expires - jiffies)/256;
+
+       mask = expires ^ expires_limit;
+
+       if (mask == 0)
+               return expires;
+
+       bit = find_last_bit(&mask, BITS_PER_LONG);
+
+       mask = (1 << bit) - 1;
+
+       expires_limit = expires_limit & ~(mask);
+
+       return expires_limit;
+}
+
 /**
  * mod_timer - modify a timer's timeout
  * @timer: the timer to be modified
@@ -745,6 +799,8 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
        if (timer_pending(timer) && timer->expires == expires)
                return 1;
 
+       expires = apply_slack(timer, expires);
+
        return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
 }
 EXPORT_SYMBOL(mod_timer);
@@ -955,6 +1011,47 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index)
        return index;
 }
 
+static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
+                         unsigned long data)
+{
+       int preempt_count = preempt_count();
+
+#ifdef CONFIG_LOCKDEP
+       /*
+        * It is permissible to free the timer from inside the
+        * function that is called from it, this we need to take into
+        * account for lockdep too. To avoid bogus "held lock freed"
+        * warnings as well as problems when looking into
+        * timer->lockdep_map, make a copy and use that here.
+        */
+       struct lockdep_map lockdep_map = timer->lockdep_map;
+#endif
+       /*
+        * Couple the lock chain with the lock chain at
+        * del_timer_sync() by acquiring the lock_map around the fn()
+        * call here and in del_timer_sync().
+        */
+       lock_map_acquire(&lockdep_map);
+
+       trace_timer_expire_entry(timer);
+       fn(data);
+       trace_timer_expire_exit(timer);
+
+       lock_map_release(&lockdep_map);
+
+       if (preempt_count != preempt_count()) {
+               WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
+                         fn, preempt_count, preempt_count());
+               /*
+                * Restore the preempt count. That gives us a decent
+                * chance to survive and extract information. If the
+                * callback kept a lock held, bad luck, but not worse
+                * than the BUG() we had.
+                */
+               preempt_count() = preempt_count;
+       }
+}
+
 #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
 
 /**
@@ -998,45 +1095,7 @@ static inline void __run_timers(struct tvec_base *base)
                        detach_timer(timer, 1);
 
                        spin_unlock_irq(&base->lock);
-                       {
-                               int preempt_count = preempt_count();
-
-#ifdef CONFIG_LOCKDEP
-                               /*
-                                * It is permissible to free the timer from
-                                * inside the function that is called from
-                                * it, this we need to take into account for
-                                * lockdep too. To avoid bogus "held lock
-                                * freed" warnings as well as problems when
-                                * looking into timer->lockdep_map, make a
-                                * copy and use that here.
-                                */
-                               struct lockdep_map lockdep_map =
-                                       timer->lockdep_map;
-#endif
-                               /*
-                                * Couple the lock chain with the lock chain at
-                                * del_timer_sync() by acquiring the lock_map
-                                * around the fn() call here and in
-                                * del_timer_sync().
-                                */
-                               lock_map_acquire(&lockdep_map);
-
-                               trace_timer_expire_entry(timer);
-                               fn(data);
-                               trace_timer_expire_exit(timer);
-
-                               lock_map_release(&lockdep_map);
-
-                               if (preempt_count != preempt_count()) {
-                                       printk(KERN_ERR "huh, entered %p "
-                                              "with preempt_count %08x, exited"
-                                              " with %08x?\n",
-                                              fn, preempt_count,
-                                              preempt_count());
-                                       BUG();
-                               }
-                       }
+                       call_timer_fn(timer, fn, data);
                        spin_lock_irq(&base->lock);
                }
        }