common implementation of iterative div/mod
Jeremy Fitzhardinge [Thu, 12 Jun 2008 08:47:56 +0000 (10:47 +0200)]
We have a few instances of the open-coded iterative div/mod loop, used
when we don't expcet the dividend to be much bigger than the divisor.
Unfortunately modern gcc's have the tendency to strength "reduce" this
into a full mod operation, which isn't necessarily any faster, and
even if it were, doesn't exist if gcc implements it in libgcc.

The workaround is to put a dummy asm statement in the loop to prevent
gcc from performing the transformation.

This patch creates a single implementation of this loop, and uses it
to replace the open-coded versions I know about.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Segher Boessenkool <segher@kernel.crashing.org>
Cc: Christian Kujau <lists@nerdbynature.de>
Cc: Robert Hancock <hancockr@shaw.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

arch/x86/xen/time.c
include/linux/math64.h
include/linux/time.h
lib/div64.c

index c39e1a5..52b2e38 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
 #include <linux/kernel_stat.h>
+#include <linux/math64.h>
 
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
@@ -150,11 +151,7 @@ static void do_stolen_accounting(void)
        if (stolen < 0)
                stolen = 0;
 
-       ticks = 0;
-       while (stolen >= NS_PER_TICK) {
-               ticks++;
-               stolen -= NS_PER_TICK;
-       }
+       ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
        __get_cpu_var(residual_stolen) = stolen;
        account_steal_time(NULL, ticks);
 
@@ -166,11 +163,7 @@ static void do_stolen_accounting(void)
        if (blocked < 0)
                blocked = 0;
 
-       ticks = 0;
-       while (blocked >= NS_PER_TICK) {
-               ticks++;
-               blocked -= NS_PER_TICK;
-       }
+       ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
        __get_cpu_var(residual_blocked) = blocked;
        account_steal_time(idle_task(smp_processor_id()), ticks);
 }
index c1a5f81..177785e 100644 (file)
@@ -81,4 +81,6 @@ static inline s64 div_s64(s64 dividend, s32 divisor)
 }
 #endif
 
+u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder);
+
 #endif /* _LINUX_MATH64_H */
index d32ef0a..05f9517 100644 (file)
@@ -6,6 +6,7 @@
 #ifdef __KERNEL__
 # include <linux/cache.h>
 # include <linux/seqlock.h>
+# include <linux/math64.h>
 #endif
 
 #ifndef _STRUCT_TIMESPEC
@@ -172,15 +173,7 @@ extern struct timeval ns_to_timeval(const s64 nsec);
  */
 static inline void timespec_add_ns(struct timespec *a, u64 ns)
 {
-       ns += a->tv_nsec;
-       while(unlikely(ns >= NSEC_PER_SEC)) {
-               /* The following asm() prevents the compiler from
-                * optimising this loop into a modulo operation.  */
-               asm("" : "+r"(ns));
-
-               ns -= NSEC_PER_SEC;
-               a->tv_sec++;
-       }
+       a->tv_sec += iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns);
        a->tv_nsec = ns;
 }
 #endif /* __KERNEL__ */
index bb5bd0c..76c0154 100644 (file)
@@ -98,3 +98,26 @@ EXPORT_SYMBOL(div64_u64);
 #endif
 
 #endif /* BITS_PER_LONG == 32 */
+
+/*
+ * Iterative div/mod for use when dividend is not expected to be much
+ * bigger than divisor.
+ */
+u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
+{
+       u32 ret = 0;
+
+       while (dividend >= divisor) {
+               /* The following asm() prevents the compiler from
+                  optimising this loop into a modulo operation.  */
+               asm("" : "+rm"(dividend));
+
+               dividend -= divisor;
+               ret++;
+       }
+
+       *remainder = dividend;
+
+       return ret;
+}
+EXPORT_SYMBOL(iter_div_u64_rem);