[NET]: Hashed spinlocks in net/ipv4/route.c
Eric Dumazet [Tue, 5 Jul 2005 21:55:24 +0000 (14:55 -0700)]
- Locking abstraction
- Spinlocks moved out of rt hash table : Less memory (50%) used by rt
  hash table. it's a win even on UP.
- Sizing of spinlocks table depends on NR_CPUS

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

net/ipv4/route.c

index 12a1cf3..daf82f8 100644 (file)
@@ -54,6 +54,7 @@
  *             Marc Boucher    :       routing by fwmark
  *     Robert Olsson           :       Added rt_cache statistics
  *     Arnaldo C. Melo         :       Convert proc stuff to seq_file
+ *     Eric Dumazet            :       hashed spinlocks
  *
  *             This program is free software; you can redistribute it and/or
  *             modify it under the terms of the GNU General Public License
@@ -201,8 +202,37 @@ __u8 ip_tos2prio[16] = {
 
 struct rt_hash_bucket {
        struct rtable   *chain;
-       spinlock_t      lock;
-} __attribute__((__aligned__(8)));
+};
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+/*
+ * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks
+ * The size of this table is a power of two and depends on the number of CPUS.
+ */
+#if NR_CPUS >= 32
+#define RT_HASH_LOCK_SZ        4096
+#elif NR_CPUS >= 16
+#define RT_HASH_LOCK_SZ        2048
+#elif NR_CPUS >= 8
+#define RT_HASH_LOCK_SZ        1024
+#elif NR_CPUS >= 4
+#define RT_HASH_LOCK_SZ        512
+#else
+#define RT_HASH_LOCK_SZ        256
+#endif
+
+static spinlock_t      *rt_hash_locks;
+# define rt_hash_lock_addr(slot) &rt_hash_locks[(slot) & (RT_HASH_LOCK_SZ - 1)]
+# define rt_hash_lock_init()   { \
+               int i; \
+               rt_hash_locks = kmalloc(sizeof(spinlock_t) * RT_HASH_LOCK_SZ, GFP_KERNEL); \
+               if (!rt_hash_locks) panic("IP: failed to allocate rt_hash_locks\n"); \
+               for (i = 0; i < RT_HASH_LOCK_SZ; i++) \
+                       spin_lock_init(&rt_hash_locks[i]); \
+               }
+#else
+# define rt_hash_lock_addr(slot) NULL
+# define rt_hash_lock_init()
+#endif
 
 static struct rt_hash_bucket   *rt_hash_table;
 static unsigned                        rt_hash_mask;
@@ -587,7 +617,7 @@ static void rt_check_expire(unsigned long dummy)
                i = (i + 1) & rt_hash_mask;
                rthp = &rt_hash_table[i].chain;
 
-               spin_lock(&rt_hash_table[i].lock);
+               spin_lock(rt_hash_lock_addr(i));
                while ((rth = *rthp) != NULL) {
                        if (rth->u.dst.expires) {
                                /* Entry is expired even if it is in use */
@@ -620,7 +650,7 @@ static void rt_check_expire(unsigned long dummy)
                        rt_free(rth);
 #endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
                }
-               spin_unlock(&rt_hash_table[i].lock);
+               spin_unlock(rt_hash_lock_addr(i));
 
                /* Fallback loop breaker. */
                if (time_after(jiffies, now))
@@ -643,11 +673,11 @@ static void rt_run_flush(unsigned long dummy)
        get_random_bytes(&rt_hash_rnd, 4);
 
        for (i = rt_hash_mask; i >= 0; i--) {
-               spin_lock_bh(&rt_hash_table[i].lock);
+               spin_lock_bh(rt_hash_lock_addr(i));
                rth = rt_hash_table[i].chain;
                if (rth)
                        rt_hash_table[i].chain = NULL;
-               spin_unlock_bh(&rt_hash_table[i].lock);
+               spin_unlock_bh(rt_hash_lock_addr(i));
 
                for (; rth; rth = next) {
                        next = rth->u.rt_next;
@@ -780,7 +810,7 @@ static int rt_garbage_collect(void)
 
                        k = (k + 1) & rt_hash_mask;
                        rthp = &rt_hash_table[k].chain;
-                       spin_lock_bh(&rt_hash_table[k].lock);
+                       spin_lock_bh(rt_hash_lock_addr(k));
                        while ((rth = *rthp) != NULL) {
                                if (!rt_may_expire(rth, tmo, expire)) {
                                        tmo >>= 1;
@@ -812,7 +842,7 @@ static int rt_garbage_collect(void)
                                goal--;
 #endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
                        }
-                       spin_unlock_bh(&rt_hash_table[k].lock);
+                       spin_unlock_bh(rt_hash_lock_addr(k));
                        if (goal <= 0)
                                break;
                }
@@ -882,7 +912,7 @@ restart:
 
        rthp = &rt_hash_table[hash].chain;
 
-       spin_lock_bh(&rt_hash_table[hash].lock);
+       spin_lock_bh(rt_hash_lock_addr(hash));
        while ((rth = *rthp) != NULL) {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
                if (!(rth->u.dst.flags & DST_BALANCED) &&
@@ -908,7 +938,7 @@ restart:
                        rth->u.dst.__use++;
                        dst_hold(&rth->u.dst);
                        rth->u.dst.lastuse = now;
-                       spin_unlock_bh(&rt_hash_table[hash].lock);
+                       spin_unlock_bh(rt_hash_lock_addr(hash));
 
                        rt_drop(rt);
                        *rp = rth;
@@ -949,7 +979,7 @@ restart:
        if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
                int err = arp_bind_neighbour(&rt->u.dst);
                if (err) {
-                       spin_unlock_bh(&rt_hash_table[hash].lock);
+                       spin_unlock_bh(rt_hash_lock_addr(hash));
 
                        if (err != -ENOBUFS) {
                                rt_drop(rt);
@@ -990,7 +1020,7 @@ restart:
        }
 #endif
        rt_hash_table[hash].chain = rt;
-       spin_unlock_bh(&rt_hash_table[hash].lock);
+       spin_unlock_bh(rt_hash_lock_addr(hash));
        *rp = rt;
        return 0;
 }
@@ -1058,7 +1088,7 @@ static void rt_del(unsigned hash, struct rtable *rt)
 {
        struct rtable **rthp;
 
-       spin_lock_bh(&rt_hash_table[hash].lock);
+       spin_lock_bh(rt_hash_lock_addr(hash));
        ip_rt_put(rt);
        for (rthp = &rt_hash_table[hash].chain; *rthp;
             rthp = &(*rthp)->u.rt_next)
@@ -1067,7 +1097,7 @@ static void rt_del(unsigned hash, struct rtable *rt)
                        rt_free(rt);
                        break;
                }
-       spin_unlock_bh(&rt_hash_table[hash].lock);
+       spin_unlock_bh(rt_hash_lock_addr(hash));
 }
 
 void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
@@ -3073,7 +3103,7 @@ __setup("rhash_entries=", set_rhash_entries);
 
 int __init ip_rt_init(void)
 {
-       int i, order, goal, rc = 0;
+       int order, goal, rc = 0;
 
        rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^
                             (jiffies ^ (jiffies >> 7)));
@@ -3122,10 +3152,8 @@ int __init ip_rt_init(void)
                /* NOTHING */;
 
        rt_hash_mask--;
-       for (i = 0; i <= rt_hash_mask; i++) {
-               spin_lock_init(&rt_hash_table[i].lock);
-               rt_hash_table[i].chain = NULL;
-       }
+       memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket));
+       rt_hash_lock_init();
 
        ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1);
        ip_rt_max_size = (rt_hash_mask + 1) * 16;