pkt_sched: Manage qdisc list inside of root qdisc.
David S. Miller [Sat, 19 Jul 2008 05:50:15 +0000 (22:50 -0700)]
Idea is from Patrick McHardy.

Instead of managing the list of qdiscs on the device level, manage it
in the root qdisc of a netdev_queue.  This solves all kinds of
visibility issues during qdisc destruction.

The way to iterate over all qdiscs of a netdev_queue is to visit
the netdev_queue->qdisc, and then traverse it's list.

The only special case is to ignore builting qdiscs at the root when
dumping or doing a qdisc_lookup().  That was not needed previously
because builtin qdiscs were not added to the device's qdisc_list.

Signed-off-by: David S. Miller <davem@davemloft.net>

include/linux/netdevice.h
net/core/dev.c
net/sched/sch_api.c
net/sched/sch_generic.c

index 9c5a688..812bcd8 100644 (file)
@@ -636,8 +636,6 @@ struct net_device
        unsigned int            real_num_tx_queues;
 
        unsigned long           tx_queue_len;   /* Max frames per queue allowed */
-       spinlock_t              qdisc_list_lock;
-       struct list_head        qdisc_list;
 
 /*
  * One part is mostly used on xmit path (device)
index e54acde..065b981 100644 (file)
@@ -3888,8 +3888,6 @@ int register_netdevice(struct net_device *dev)
        net = dev_net(dev);
 
        spin_lock_init(&dev->addr_list_lock);
-       spin_lock_init(&dev->qdisc_list_lock);
-       INIT_LIST_HEAD(&dev->qdisc_list);
        netdev_init_queue_locks(dev);
 
        dev->iflink = -1;
index b3ef830..fb43731 100644 (file)
@@ -185,11 +185,20 @@ EXPORT_SYMBOL(unregister_qdisc);
 
 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
 {
-       struct Qdisc *q;
+       unsigned int i;
+
+       for (i = 0; i < dev->num_tx_queues; i++) {
+               struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+               struct Qdisc *q, *txq_root = txq->qdisc;
 
-       list_for_each_entry(q, &dev->qdisc_list, list) {
-               if (q->handle == handle)
-                       return q;
+               if (!(txq_root->flags & TCQ_F_BUILTIN) &&
+                   txq_root->handle == handle)
+                       return txq_root;
+
+               list_for_each_entry(q, &txq_root->list, list) {
+                       if (q->handle == handle)
+                               return q;
+               }
        }
        return NULL;
 }
@@ -676,9 +685,8 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
                                goto err_out3;
                        }
                }
-               spin_lock_bh(&dev->qdisc_list_lock);
-               list_add_tail(&sch->list, &dev->qdisc_list);
-               spin_unlock_bh(&dev->qdisc_list_lock);
+               if (parent)
+                       list_add_tail(&sch->list, &dev_queue->qdisc->list);
 
                return sch;
        }
@@ -1037,13 +1045,57 @@ err_out:
        return -EINVAL;
 }
 
+static bool tc_qdisc_dump_ignore(struct Qdisc *q)
+{
+       return (q->flags & TCQ_F_BUILTIN) ? true : false;
+}
+
+static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
+                             struct netlink_callback *cb,
+                             int *q_idx_p, int s_q_idx)
+{
+       int ret = 0, q_idx = *q_idx_p;
+       struct Qdisc *q;
+
+       if (!root)
+               return 0;
+
+       q = root;
+       if (q_idx < s_q_idx) {
+               q_idx++;
+       } else {
+               if (!tc_qdisc_dump_ignore(q) &&
+                   tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
+                                 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
+                       goto done;
+               q_idx++;
+       }
+       list_for_each_entry(q, &root->list, list) {
+               if (q_idx < s_q_idx) {
+                       q_idx++;
+                       continue;
+               }
+               if (!tc_qdisc_dump_ignore(q) && 
+                   tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
+                                 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
+                       goto done;
+               q_idx++;
+       }
+
+out:
+       *q_idx_p = q_idx;
+       return ret;
+done:
+       ret = -1;
+       goto out;
+}
+
 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 {
        struct net *net = sock_net(skb->sk);
        int idx, q_idx;
        int s_idx, s_q_idx;
        struct net_device *dev;
-       struct Qdisc *q;
 
        if (net != &init_net)
                return 0;
@@ -1053,21 +1105,22 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
        read_lock(&dev_base_lock);
        idx = 0;
        for_each_netdev(&init_net, dev) {
+               struct netdev_queue *dev_queue;
+
                if (idx < s_idx)
                        goto cont;
                if (idx > s_idx)
                        s_q_idx = 0;
                q_idx = 0;
-               list_for_each_entry(q, &dev->qdisc_list, list) {
-                       if (q_idx < s_q_idx) {
-                               q_idx++;
-                               continue;
-                       }
-                       if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
-                                         cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
-                               goto done;
-                       q_idx++;
-               }
+
+               dev_queue = netdev_get_tx_queue(dev, 0);
+               if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
+                       goto done;
+
+               dev_queue = &dev->rx_queue;
+               if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
+                       goto done;
+
 cont:
                idx++;
        }
@@ -1285,15 +1338,62 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk
                              a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
 }
 
+static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
+                               struct tcmsg *tcm, struct netlink_callback *cb,
+                               int *t_p, int s_t)
+{
+       struct qdisc_dump_args arg;
+
+       if (tc_qdisc_dump_ignore(q) ||
+           *t_p < s_t || !q->ops->cl_ops ||
+           (tcm->tcm_parent &&
+            TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
+               (*t_p)++;
+               return 0;
+       }
+       if (*t_p > s_t)
+               memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
+       arg.w.fn = qdisc_class_dump;
+       arg.skb = skb;
+       arg.cb = cb;
+       arg.w.stop  = 0;
+       arg.w.skip = cb->args[1];
+       arg.w.count = 0;
+       q->ops->cl_ops->walk(q, &arg.w);
+       cb->args[1] = arg.w.count;
+       if (arg.w.stop)
+               return -1;
+       (*t_p)++;
+       return 0;
+}
+
+static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
+                              struct tcmsg *tcm, struct netlink_callback *cb,
+                              int *t_p, int s_t)
+{
+       struct Qdisc *q;
+
+       if (!root)
+               return 0;
+
+       if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
+               return -1;
+
+       list_for_each_entry(q, &root->list, list) {
+               if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
+                       return -1;
+       }
+
+       return 0;
+}
+
 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 {
+       struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
        struct net *net = sock_net(skb->sk);
-       int t;
-       int s_t;
+       struct netdev_queue *dev_queue;
        struct net_device *dev;
-       struct Qdisc *q;
-       struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
-       struct qdisc_dump_args arg;
+       int t, s_t;
 
        if (net != &init_net)
                return 0;
@@ -1306,28 +1406,15 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
        s_t = cb->args[0];
        t = 0;
 
-       list_for_each_entry(q, &dev->qdisc_list, list) {
-               if (t < s_t || !q->ops->cl_ops ||
-                   (tcm->tcm_parent &&
-                    TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
-                       t++;
-                       continue;
-               }
-               if (t > s_t)
-                       memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
-               arg.w.fn = qdisc_class_dump;
-               arg.skb = skb;
-               arg.cb = cb;
-               arg.w.stop  = 0;
-               arg.w.skip = cb->args[1];
-               arg.w.count = 0;
-               q->ops->cl_ops->walk(q, &arg.w);
-               cb->args[1] = arg.w.count;
-               if (arg.w.stop)
-                       break;
-               t++;
-       }
+       dev_queue = netdev_get_tx_queue(dev, 0);
+       if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0)
+               goto done;
+
+       dev_queue = &dev->rx_queue;
+       if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0)
+               goto done;
 
+done:
        cb->args[0] = t;
 
        dev_put(dev);
index e244c46..14cc443 100644 (file)
@@ -480,15 +480,12 @@ static void __qdisc_destroy(struct rcu_head *head)
 
 void qdisc_destroy(struct Qdisc *qdisc)
 {
-       struct net_device *dev = qdisc_dev(qdisc);
-
        if (qdisc->flags & TCQ_F_BUILTIN ||
            !atomic_dec_and_test(&qdisc->refcnt))
                return;
 
-       spin_lock_bh(&dev->qdisc_list_lock);
-       list_del(&qdisc->list);
-       spin_unlock_bh(&dev->qdisc_list_lock);
+       if (qdisc->parent)
+               list_del(&qdisc->list);
 
        call_rcu(&qdisc->q_rcu, __qdisc_destroy);
 }
@@ -520,9 +517,6 @@ static void attach_one_default_qdisc(struct net_device *dev,
                        printk(KERN_INFO "%s: activation failed\n", dev->name);
                        return;
                }
-               spin_lock_bh(&dev->qdisc_list_lock);
-               list_add_tail(&qdisc->list, &dev->qdisc_list);
-               spin_unlock_bh(&dev->qdisc_list_lock);
        } else {
                qdisc =  &noqueue_qdisc;
        }