qeth: l3 fix rcu splat in xmit
[linux-2.6.git] / net / sched / sch_teql.c
1 /* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
2  *
3  *              This program is free software; you can redistribute it and/or
4  *              modify it under the terms of the GNU General Public License
5  *              as published by the Free Software Foundation; either version
6  *              2 of the License, or (at your option) any later version.
7  *
8  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/string.h>
16 #include <linux/errno.h>
17 #include <linux/if_arp.h>
18 #include <linux/netdevice.h>
19 #include <linux/init.h>
20 #include <linux/skbuff.h>
21 #include <linux/moduleparam.h>
22 #include <net/dst.h>
23 #include <net/neighbour.h>
24 #include <net/pkt_sched.h>
25
26 /*
27    How to setup it.
28    ----------------
29
30    After loading this module you will find a new device teqlN
31    and new qdisc with the same name. To join a slave to the equalizer
32    you should just set this qdisc on a device f.e.
33
34    # tc qdisc add dev eth0 root teql0
35    # tc qdisc add dev eth1 root teql0
36
37    That's all. Full PnP 8)
38
39    Applicability.
40    --------------
41
42    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
43       signal and generate EOI events. If you want to equalize virtual devices
44       like tunnels, use a normal eql device.
45    2. This device puts no limitations on physical slave characteristics
46       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
47       Certainly, large difference in link speeds will make the resulting
48       eqalized link unusable, because of huge packet reordering.
49       I estimate an upper useful difference as ~10 times.
50    3. If the slave requires address resolution, only protocols using
51       neighbour cache (IPv4/IPv6) will work over the equalized link.
52       Other protocols are still allowed to use the slave device directly,
53       which will not break load balancing, though native slave
54       traffic will have the highest priority.  */
55
56 struct teql_master {
57         struct Qdisc_ops qops;
58         struct net_device *dev;
59         struct Qdisc *slaves;
60         struct list_head master_list;
61         unsigned long   tx_bytes;
62         unsigned long   tx_packets;
63         unsigned long   tx_errors;
64         unsigned long   tx_dropped;
65 };
66
67 struct teql_sched_data {
68         struct Qdisc *next;
69         struct teql_master *m;
70         struct neighbour *ncache;
71         struct sk_buff_head q;
72 };
73
74 #define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
75
76 #define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
77
78 /* "teql*" qdisc routines */
79
80 static int
81 teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
82 {
83         struct net_device *dev = qdisc_dev(sch);
84         struct teql_sched_data *q = qdisc_priv(sch);
85
86         if (q->q.qlen < dev->tx_queue_len) {
87                 __skb_queue_tail(&q->q, skb);
88                 return NET_XMIT_SUCCESS;
89         }
90
91         kfree_skb(skb);
92         sch->qstats.drops++;
93         return NET_XMIT_DROP;
94 }
95
96 static struct sk_buff *
97 teql_dequeue(struct Qdisc *sch)
98 {
99         struct teql_sched_data *dat = qdisc_priv(sch);
100         struct netdev_queue *dat_queue;
101         struct sk_buff *skb;
102
103         skb = __skb_dequeue(&dat->q);
104         dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
105         if (skb == NULL) {
106                 struct net_device *m = qdisc_dev(dat_queue->qdisc);
107                 if (m) {
108                         dat->m->slaves = sch;
109                         netif_wake_queue(m);
110                 }
111         } else {
112                 qdisc_bstats_update(sch, skb);
113         }
114         sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
115         return skb;
116 }
117
118 static struct sk_buff *
119 teql_peek(struct Qdisc *sch)
120 {
121         /* teql is meant to be used as root qdisc */
122         return NULL;
123 }
124
125 static inline void
126 teql_neigh_release(struct neighbour *n)
127 {
128         if (n)
129                 neigh_release(n);
130 }
131
132 static void
133 teql_reset(struct Qdisc *sch)
134 {
135         struct teql_sched_data *dat = qdisc_priv(sch);
136
137         skb_queue_purge(&dat->q);
138         sch->q.qlen = 0;
139         teql_neigh_release(xchg(&dat->ncache, NULL));
140 }
141
142 static void
143 teql_destroy(struct Qdisc *sch)
144 {
145         struct Qdisc *q, *prev;
146         struct teql_sched_data *dat = qdisc_priv(sch);
147         struct teql_master *master = dat->m;
148
149         prev = master->slaves;
150         if (prev) {
151                 do {
152                         q = NEXT_SLAVE(prev);
153                         if (q == sch) {
154                                 NEXT_SLAVE(prev) = NEXT_SLAVE(q);
155                                 if (q == master->slaves) {
156                                         master->slaves = NEXT_SLAVE(q);
157                                         if (q == master->slaves) {
158                                                 struct netdev_queue *txq;
159                                                 spinlock_t *root_lock;
160
161                                                 txq = netdev_get_tx_queue(master->dev, 0);
162                                                 master->slaves = NULL;
163
164                                                 root_lock = qdisc_root_sleeping_lock(txq->qdisc);
165                                                 spin_lock_bh(root_lock);
166                                                 qdisc_reset(txq->qdisc);
167                                                 spin_unlock_bh(root_lock);
168                                         }
169                                 }
170                                 skb_queue_purge(&dat->q);
171                                 teql_neigh_release(xchg(&dat->ncache, NULL));
172                                 break;
173                         }
174
175                 } while ((prev = q) != master->slaves);
176         }
177 }
178
179 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
180 {
181         struct net_device *dev = qdisc_dev(sch);
182         struct teql_master *m = (struct teql_master *)sch->ops;
183         struct teql_sched_data *q = qdisc_priv(sch);
184
185         if (dev->hard_header_len > m->dev->hard_header_len)
186                 return -EINVAL;
187
188         if (m->dev == dev)
189                 return -ELOOP;
190
191         q->m = m;
192
193         skb_queue_head_init(&q->q);
194
195         if (m->slaves) {
196                 if (m->dev->flags & IFF_UP) {
197                         if ((m->dev->flags & IFF_POINTOPOINT &&
198                              !(dev->flags & IFF_POINTOPOINT)) ||
199                             (m->dev->flags & IFF_BROADCAST &&
200                              !(dev->flags & IFF_BROADCAST)) ||
201                             (m->dev->flags & IFF_MULTICAST &&
202                              !(dev->flags & IFF_MULTICAST)) ||
203                             dev->mtu < m->dev->mtu)
204                                 return -EINVAL;
205                 } else {
206                         if (!(dev->flags&IFF_POINTOPOINT))
207                                 m->dev->flags &= ~IFF_POINTOPOINT;
208                         if (!(dev->flags&IFF_BROADCAST))
209                                 m->dev->flags &= ~IFF_BROADCAST;
210                         if (!(dev->flags&IFF_MULTICAST))
211                                 m->dev->flags &= ~IFF_MULTICAST;
212                         if (dev->mtu < m->dev->mtu)
213                                 m->dev->mtu = dev->mtu;
214                 }
215                 q->next = NEXT_SLAVE(m->slaves);
216                 NEXT_SLAVE(m->slaves) = sch;
217         } else {
218                 q->next = sch;
219                 m->slaves = sch;
220                 m->dev->mtu = dev->mtu;
221                 m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
222         }
223         return 0;
224 }
225
226
227 static int
228 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
229 {
230         struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
231         struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
232         struct neighbour *mn = dst_get_neighbour(skb_dst(skb));
233         struct neighbour *n = q->ncache;
234
235         if (mn->tbl == NULL)
236                 return -EINVAL;
237         if (n && n->tbl == mn->tbl &&
238             memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
239                 atomic_inc(&n->refcnt);
240         } else {
241                 n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
242                 if (IS_ERR(n))
243                         return PTR_ERR(n);
244         }
245         if (neigh_event_send(n, skb_res) == 0) {
246                 int err;
247                 char haddr[MAX_ADDR_LEN];
248
249                 neigh_ha_snapshot(haddr, n, dev);
250                 err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
251                                       NULL, skb->len);
252
253                 if (err < 0) {
254                         neigh_release(n);
255                         return -EINVAL;
256                 }
257                 teql_neigh_release(xchg(&q->ncache, n));
258                 return 0;
259         }
260         neigh_release(n);
261         return (skb_res == NULL) ? -EAGAIN : 1;
262 }
263
264 static inline int teql_resolve(struct sk_buff *skb,
265                                struct sk_buff *skb_res, struct net_device *dev)
266 {
267         struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
268         if (txq->qdisc == &noop_qdisc)
269                 return -ENODEV;
270
271         if (dev->header_ops == NULL ||
272             skb_dst(skb) == NULL ||
273             dst_get_neighbour(skb_dst(skb)) == NULL)
274                 return 0;
275         return __teql_resolve(skb, skb_res, dev);
276 }
277
278 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
279 {
280         struct teql_master *master = netdev_priv(dev);
281         struct Qdisc *start, *q;
282         int busy;
283         int nores;
284         int subq = skb_get_queue_mapping(skb);
285         struct sk_buff *skb_res = NULL;
286
287         start = master->slaves;
288
289 restart:
290         nores = 0;
291         busy = 0;
292
293         q = start;
294         if (!q)
295                 goto drop;
296
297         do {
298                 struct net_device *slave = qdisc_dev(q);
299                 struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
300                 const struct net_device_ops *slave_ops = slave->netdev_ops;
301
302                 if (slave_txq->qdisc_sleeping != q)
303                         continue;
304                 if (__netif_subqueue_stopped(slave, subq) ||
305                     !netif_running(slave)) {
306                         busy = 1;
307                         continue;
308                 }
309
310                 switch (teql_resolve(skb, skb_res, slave)) {
311                 case 0:
312                         if (__netif_tx_trylock(slave_txq)) {
313                                 unsigned int length = qdisc_pkt_len(skb);
314
315                                 if (!netif_tx_queue_frozen_or_stopped(slave_txq) &&
316                                     slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
317                                         txq_trans_update(slave_txq);
318                                         __netif_tx_unlock(slave_txq);
319                                         master->slaves = NEXT_SLAVE(q);
320                                         netif_wake_queue(dev);
321                                         master->tx_packets++;
322                                         master->tx_bytes += length;
323                                         return NETDEV_TX_OK;
324                                 }
325                                 __netif_tx_unlock(slave_txq);
326                         }
327                         if (netif_queue_stopped(dev))
328                                 busy = 1;
329                         break;
330                 case 1:
331                         master->slaves = NEXT_SLAVE(q);
332                         return NETDEV_TX_OK;
333                 default:
334                         nores = 1;
335                         break;
336                 }
337                 __skb_pull(skb, skb_network_offset(skb));
338         } while ((q = NEXT_SLAVE(q)) != start);
339
340         if (nores && skb_res == NULL) {
341                 skb_res = skb;
342                 goto restart;
343         }
344
345         if (busy) {
346                 netif_stop_queue(dev);
347                 return NETDEV_TX_BUSY;
348         }
349         master->tx_errors++;
350
351 drop:
352         master->tx_dropped++;
353         dev_kfree_skb(skb);
354         return NETDEV_TX_OK;
355 }
356
357 static int teql_master_open(struct net_device *dev)
358 {
359         struct Qdisc *q;
360         struct teql_master *m = netdev_priv(dev);
361         int mtu = 0xFFFE;
362         unsigned int flags = IFF_NOARP | IFF_MULTICAST;
363
364         if (m->slaves == NULL)
365                 return -EUNATCH;
366
367         flags = FMASK;
368
369         q = m->slaves;
370         do {
371                 struct net_device *slave = qdisc_dev(q);
372
373                 if (slave == NULL)
374                         return -EUNATCH;
375
376                 if (slave->mtu < mtu)
377                         mtu = slave->mtu;
378                 if (slave->hard_header_len > LL_MAX_HEADER)
379                         return -EINVAL;
380
381                 /* If all the slaves are BROADCAST, master is BROADCAST
382                    If all the slaves are PtP, master is PtP
383                    Otherwise, master is NBMA.
384                  */
385                 if (!(slave->flags&IFF_POINTOPOINT))
386                         flags &= ~IFF_POINTOPOINT;
387                 if (!(slave->flags&IFF_BROADCAST))
388                         flags &= ~IFF_BROADCAST;
389                 if (!(slave->flags&IFF_MULTICAST))
390                         flags &= ~IFF_MULTICAST;
391         } while ((q = NEXT_SLAVE(q)) != m->slaves);
392
393         m->dev->mtu = mtu;
394         m->dev->flags = (m->dev->flags&~FMASK) | flags;
395         netif_start_queue(m->dev);
396         return 0;
397 }
398
399 static int teql_master_close(struct net_device *dev)
400 {
401         netif_stop_queue(dev);
402         return 0;
403 }
404
405 static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev,
406                                                      struct rtnl_link_stats64 *stats)
407 {
408         struct teql_master *m = netdev_priv(dev);
409
410         stats->tx_packets       = m->tx_packets;
411         stats->tx_bytes         = m->tx_bytes;
412         stats->tx_errors        = m->tx_errors;
413         stats->tx_dropped       = m->tx_dropped;
414         return stats;
415 }
416
417 static int teql_master_mtu(struct net_device *dev, int new_mtu)
418 {
419         struct teql_master *m = netdev_priv(dev);
420         struct Qdisc *q;
421
422         if (new_mtu < 68)
423                 return -EINVAL;
424
425         q = m->slaves;
426         if (q) {
427                 do {
428                         if (new_mtu > qdisc_dev(q)->mtu)
429                                 return -EINVAL;
430                 } while ((q = NEXT_SLAVE(q)) != m->slaves);
431         }
432
433         dev->mtu = new_mtu;
434         return 0;
435 }
436
437 static const struct net_device_ops teql_netdev_ops = {
438         .ndo_open       = teql_master_open,
439         .ndo_stop       = teql_master_close,
440         .ndo_start_xmit = teql_master_xmit,
441         .ndo_get_stats64 = teql_master_stats64,
442         .ndo_change_mtu = teql_master_mtu,
443 };
444
445 static __init void teql_master_setup(struct net_device *dev)
446 {
447         struct teql_master *master = netdev_priv(dev);
448         struct Qdisc_ops *ops = &master->qops;
449
450         master->dev     = dev;
451         ops->priv_size  = sizeof(struct teql_sched_data);
452
453         ops->enqueue    =       teql_enqueue;
454         ops->dequeue    =       teql_dequeue;
455         ops->peek       =       teql_peek;
456         ops->init       =       teql_qdisc_init;
457         ops->reset      =       teql_reset;
458         ops->destroy    =       teql_destroy;
459         ops->owner      =       THIS_MODULE;
460
461         dev->netdev_ops =       &teql_netdev_ops;
462         dev->type               = ARPHRD_VOID;
463         dev->mtu                = 1500;
464         dev->tx_queue_len       = 100;
465         dev->flags              = IFF_NOARP;
466         dev->hard_header_len    = LL_MAX_HEADER;
467         dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
468 }
469
470 static LIST_HEAD(master_dev_list);
471 static int max_equalizers = 1;
472 module_param(max_equalizers, int, 0);
473 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
474
475 static int __init teql_init(void)
476 {
477         int i;
478         int err = -ENODEV;
479
480         for (i = 0; i < max_equalizers; i++) {
481                 struct net_device *dev;
482                 struct teql_master *master;
483
484                 dev = alloc_netdev(sizeof(struct teql_master),
485                                   "teql%d", teql_master_setup);
486                 if (!dev) {
487                         err = -ENOMEM;
488                         break;
489                 }
490
491                 if ((err = register_netdev(dev))) {
492                         free_netdev(dev);
493                         break;
494                 }
495
496                 master = netdev_priv(dev);
497
498                 strlcpy(master->qops.id, dev->name, IFNAMSIZ);
499                 err = register_qdisc(&master->qops);
500
501                 if (err) {
502                         unregister_netdev(dev);
503                         free_netdev(dev);
504                         break;
505                 }
506
507                 list_add_tail(&master->master_list, &master_dev_list);
508         }
509         return i ? 0 : err;
510 }
511
512 static void __exit teql_exit(void)
513 {
514         struct teql_master *master, *nxt;
515
516         list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
517
518                 list_del(&master->master_list);
519
520                 unregister_qdisc(&master->qops);
521                 unregister_netdev(master->dev);
522                 free_netdev(master->dev);
523         }
524 }
525
526 module_init(teql_init);
527 module_exit(teql_exit);
528
529 MODULE_LICENSE("GPL");