bf03e7fa184953b6112554f829bb691ceb9643df
[linux-2.6.git] / net / sched / sch_teql.c
1 /* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
2  *
3  *              This program is free software; you can redistribute it and/or
4  *              modify it under the terms of the GNU General Public License
5  *              as published by the Free Software Foundation; either version
6  *              2 of the License, or (at your option) any later version.
7  *
8  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/string.h>
15 #include <linux/errno.h>
16 #include <linux/if_arp.h>
17 #include <linux/netdevice.h>
18 #include <linux/init.h>
19 #include <linux/skbuff.h>
20 #include <linux/moduleparam.h>
21 #include <net/dst.h>
22 #include <net/neighbour.h>
23 #include <net/pkt_sched.h>
24
25 /*
26    How to setup it.
27    ----------------
28
29    After loading this module you will find a new device teqlN
30    and new qdisc with the same name. To join a slave to the equalizer
31    you should just set this qdisc on a device f.e.
32
33    # tc qdisc add dev eth0 root teql0
34    # tc qdisc add dev eth1 root teql0
35
36    That's all. Full PnP 8)
37
38    Applicability.
39    --------------
40
41    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
42       signal and generate EOI events. If you want to equalize virtual devices
43       like tunnels, use a normal eql device.
44    2. This device puts no limitations on physical slave characteristics
45       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
46       Certainly, large difference in link speeds will make the resulting
47       eqalized link unusable, because of huge packet reordering.
48       I estimate an upper useful difference as ~10 times.
49    3. If the slave requires address resolution, only protocols using
50       neighbour cache (IPv4/IPv6) will work over the equalized link.
51       Other protocols are still allowed to use the slave device directly,
52       which will not break load balancing, though native slave
53       traffic will have the highest priority.  */
54
55 struct teql_master
56 {
57         struct Qdisc_ops qops;
58         struct net_device *dev;
59         struct Qdisc *slaves;
60         struct list_head master_list;
61         struct net_device_stats stats;
62 };
63
64 struct teql_sched_data
65 {
66         struct Qdisc *next;
67         struct teql_master *m;
68         struct neighbour *ncache;
69         struct sk_buff_head q;
70 };
71
72 #define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
73
74 #define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
75
76 /* "teql*" qdisc routines */
77
78 static int
79 teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
80 {
81         struct net_device *dev = qdisc_dev(sch);
82         struct teql_sched_data *q = qdisc_priv(sch);
83
84         if (q->q.qlen < dev->tx_queue_len) {
85                 __skb_queue_tail(&q->q, skb);
86                 sch->bstats.bytes += qdisc_pkt_len(skb);
87                 sch->bstats.packets++;
88                 return 0;
89         }
90
91         kfree_skb(skb);
92         sch->qstats.drops++;
93         return NET_XMIT_DROP;
94 }
95
96 static int
97 teql_requeue(struct sk_buff *skb, struct Qdisc* sch)
98 {
99         struct teql_sched_data *q = qdisc_priv(sch);
100
101         __skb_queue_head(&q->q, skb);
102         sch->qstats.requeues++;
103         return 0;
104 }
105
106 static struct sk_buff *
107 teql_dequeue(struct Qdisc* sch)
108 {
109         struct teql_sched_data *dat = qdisc_priv(sch);
110         struct netdev_queue *dat_queue;
111         struct sk_buff *skb;
112
113         skb = __skb_dequeue(&dat->q);
114         dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
115         if (skb == NULL) {
116                 struct net_device *m = qdisc_dev(dat_queue->qdisc);
117                 if (m) {
118                         dat->m->slaves = sch;
119                         netif_wake_queue(m);
120                 }
121         }
122         sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
123         return skb;
124 }
125
126 static struct sk_buff *
127 teql_peek(struct Qdisc* sch)
128 {
129         /* teql is meant to be used as root qdisc */
130         return NULL;
131 }
132
133 static __inline__ void
134 teql_neigh_release(struct neighbour *n)
135 {
136         if (n)
137                 neigh_release(n);
138 }
139
140 static void
141 teql_reset(struct Qdisc* sch)
142 {
143         struct teql_sched_data *dat = qdisc_priv(sch);
144
145         skb_queue_purge(&dat->q);
146         sch->q.qlen = 0;
147         teql_neigh_release(xchg(&dat->ncache, NULL));
148 }
149
150 static void
151 teql_destroy(struct Qdisc* sch)
152 {
153         struct Qdisc *q, *prev;
154         struct teql_sched_data *dat = qdisc_priv(sch);
155         struct teql_master *master = dat->m;
156
157         if ((prev = master->slaves) != NULL) {
158                 do {
159                         q = NEXT_SLAVE(prev);
160                         if (q == sch) {
161                                 NEXT_SLAVE(prev) = NEXT_SLAVE(q);
162                                 if (q == master->slaves) {
163                                         master->slaves = NEXT_SLAVE(q);
164                                         if (q == master->slaves) {
165                                                 struct netdev_queue *txq;
166                                                 spinlock_t *root_lock;
167
168                                                 txq = netdev_get_tx_queue(master->dev, 0);
169                                                 master->slaves = NULL;
170
171                                                 root_lock = qdisc_root_sleeping_lock(txq->qdisc);
172                                                 spin_lock_bh(root_lock);
173                                                 qdisc_reset(txq->qdisc);
174                                                 spin_unlock_bh(root_lock);
175                                         }
176                                 }
177                                 skb_queue_purge(&dat->q);
178                                 teql_neigh_release(xchg(&dat->ncache, NULL));
179                                 break;
180                         }
181
182                 } while ((prev = q) != master->slaves);
183         }
184 }
185
186 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
187 {
188         struct net_device *dev = qdisc_dev(sch);
189         struct teql_master *m = (struct teql_master*)sch->ops;
190         struct teql_sched_data *q = qdisc_priv(sch);
191
192         if (dev->hard_header_len > m->dev->hard_header_len)
193                 return -EINVAL;
194
195         if (m->dev == dev)
196                 return -ELOOP;
197
198         q->m = m;
199
200         skb_queue_head_init(&q->q);
201
202         if (m->slaves) {
203                 if (m->dev->flags & IFF_UP) {
204                         if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
205                             || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
206                             || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
207                             || dev->mtu < m->dev->mtu)
208                                 return -EINVAL;
209                 } else {
210                         if (!(dev->flags&IFF_POINTOPOINT))
211                                 m->dev->flags &= ~IFF_POINTOPOINT;
212                         if (!(dev->flags&IFF_BROADCAST))
213                                 m->dev->flags &= ~IFF_BROADCAST;
214                         if (!(dev->flags&IFF_MULTICAST))
215                                 m->dev->flags &= ~IFF_MULTICAST;
216                         if (dev->mtu < m->dev->mtu)
217                                 m->dev->mtu = dev->mtu;
218                 }
219                 q->next = NEXT_SLAVE(m->slaves);
220                 NEXT_SLAVE(m->slaves) = sch;
221         } else {
222                 q->next = sch;
223                 m->slaves = sch;
224                 m->dev->mtu = dev->mtu;
225                 m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
226         }
227         return 0;
228 }
229
230
231 static int
232 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
233 {
234         struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
235         struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
236         struct neighbour *mn = skb->dst->neighbour;
237         struct neighbour *n = q->ncache;
238
239         if (mn->tbl == NULL)
240                 return -EINVAL;
241         if (n && n->tbl == mn->tbl &&
242             memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
243                 atomic_inc(&n->refcnt);
244         } else {
245                 n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
246                 if (IS_ERR(n))
247                         return PTR_ERR(n);
248         }
249         if (neigh_event_send(n, skb_res) == 0) {
250                 int err;
251
252                 read_lock(&n->lock);
253                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
254                                       n->ha, NULL, skb->len);
255                 read_unlock(&n->lock);
256
257                 if (err < 0) {
258                         neigh_release(n);
259                         return -EINVAL;
260                 }
261                 teql_neigh_release(xchg(&q->ncache, n));
262                 return 0;
263         }
264         neigh_release(n);
265         return (skb_res == NULL) ? -EAGAIN : 1;
266 }
267
268 static inline int teql_resolve(struct sk_buff *skb,
269                                struct sk_buff *skb_res, struct net_device *dev)
270 {
271         struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
272         if (txq->qdisc == &noop_qdisc)
273                 return -ENODEV;
274
275         if (dev->header_ops == NULL ||
276             skb->dst == NULL ||
277             skb->dst->neighbour == NULL)
278                 return 0;
279         return __teql_resolve(skb, skb_res, dev);
280 }
281
282 static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
283 {
284         struct teql_master *master = netdev_priv(dev);
285         struct Qdisc *start, *q;
286         int busy;
287         int nores;
288         int subq = skb_get_queue_mapping(skb);
289         struct sk_buff *skb_res = NULL;
290
291         start = master->slaves;
292
293 restart:
294         nores = 0;
295         busy = 0;
296
297         if ((q = start) == NULL)
298                 goto drop;
299
300         do {
301                 struct net_device *slave = qdisc_dev(q);
302                 struct netdev_queue *slave_txq;
303
304                 slave_txq = netdev_get_tx_queue(slave, 0);
305                 if (slave_txq->qdisc_sleeping != q)
306                         continue;
307                 if (__netif_subqueue_stopped(slave, subq) ||
308                     !netif_running(slave)) {
309                         busy = 1;
310                         continue;
311                 }
312
313                 switch (teql_resolve(skb, skb_res, slave)) {
314                 case 0:
315                         if (__netif_tx_trylock(slave_txq)) {
316                                 if (!netif_tx_queue_stopped(slave_txq) &&
317                                     !netif_tx_queue_frozen(slave_txq) &&
318                                     slave->hard_start_xmit(skb, slave) == 0) {
319                                         __netif_tx_unlock(slave_txq);
320                                         master->slaves = NEXT_SLAVE(q);
321                                         netif_wake_queue(dev);
322                                         master->stats.tx_packets++;
323                                         master->stats.tx_bytes +=
324                                                 qdisc_pkt_len(skb);
325                                         return 0;
326                                 }
327                                 __netif_tx_unlock(slave_txq);
328                         }
329                         if (netif_queue_stopped(dev))
330                                 busy = 1;
331                         break;
332                 case 1:
333                         master->slaves = NEXT_SLAVE(q);
334                         return 0;
335                 default:
336                         nores = 1;
337                         break;
338                 }
339                 __skb_pull(skb, skb_network_offset(skb));
340         } while ((q = NEXT_SLAVE(q)) != start);
341
342         if (nores && skb_res == NULL) {
343                 skb_res = skb;
344                 goto restart;
345         }
346
347         if (busy) {
348                 netif_stop_queue(dev);
349                 return 1;
350         }
351         master->stats.tx_errors++;
352
353 drop:
354         master->stats.tx_dropped++;
355         dev_kfree_skb(skb);
356         return 0;
357 }
358
359 static int teql_master_open(struct net_device *dev)
360 {
361         struct Qdisc * q;
362         struct teql_master *m = netdev_priv(dev);
363         int mtu = 0xFFFE;
364         unsigned flags = IFF_NOARP|IFF_MULTICAST;
365
366         if (m->slaves == NULL)
367                 return -EUNATCH;
368
369         flags = FMASK;
370
371         q = m->slaves;
372         do {
373                 struct net_device *slave = qdisc_dev(q);
374
375                 if (slave == NULL)
376                         return -EUNATCH;
377
378                 if (slave->mtu < mtu)
379                         mtu = slave->mtu;
380                 if (slave->hard_header_len > LL_MAX_HEADER)
381                         return -EINVAL;
382
383                 /* If all the slaves are BROADCAST, master is BROADCAST
384                    If all the slaves are PtP, master is PtP
385                    Otherwise, master is NBMA.
386                  */
387                 if (!(slave->flags&IFF_POINTOPOINT))
388                         flags &= ~IFF_POINTOPOINT;
389                 if (!(slave->flags&IFF_BROADCAST))
390                         flags &= ~IFF_BROADCAST;
391                 if (!(slave->flags&IFF_MULTICAST))
392                         flags &= ~IFF_MULTICAST;
393         } while ((q = NEXT_SLAVE(q)) != m->slaves);
394
395         m->dev->mtu = mtu;
396         m->dev->flags = (m->dev->flags&~FMASK) | flags;
397         netif_start_queue(m->dev);
398         return 0;
399 }
400
401 static int teql_master_close(struct net_device *dev)
402 {
403         netif_stop_queue(dev);
404         return 0;
405 }
406
407 static struct net_device_stats *teql_master_stats(struct net_device *dev)
408 {
409         struct teql_master *m = netdev_priv(dev);
410         return &m->stats;
411 }
412
413 static int teql_master_mtu(struct net_device *dev, int new_mtu)
414 {
415         struct teql_master *m = netdev_priv(dev);
416         struct Qdisc *q;
417
418         if (new_mtu < 68)
419                 return -EINVAL;
420
421         q = m->slaves;
422         if (q) {
423                 do {
424                         if (new_mtu > qdisc_dev(q)->mtu)
425                                 return -EINVAL;
426                 } while ((q=NEXT_SLAVE(q)) != m->slaves);
427         }
428
429         dev->mtu = new_mtu;
430         return 0;
431 }
432
433 static __init void teql_master_setup(struct net_device *dev)
434 {
435         struct teql_master *master = netdev_priv(dev);
436         struct Qdisc_ops *ops = &master->qops;
437
438         master->dev     = dev;
439         ops->priv_size  = sizeof(struct teql_sched_data);
440
441         ops->enqueue    =       teql_enqueue;
442         ops->dequeue    =       teql_dequeue;
443         ops->peek       =       teql_peek;
444         ops->requeue    =       teql_requeue;
445         ops->init       =       teql_qdisc_init;
446         ops->reset      =       teql_reset;
447         ops->destroy    =       teql_destroy;
448         ops->owner      =       THIS_MODULE;
449
450         dev->open               = teql_master_open;
451         dev->hard_start_xmit    = teql_master_xmit;
452         dev->stop               = teql_master_close;
453         dev->get_stats          = teql_master_stats;
454         dev->change_mtu         = teql_master_mtu;
455         dev->type               = ARPHRD_VOID;
456         dev->mtu                = 1500;
457         dev->tx_queue_len       = 100;
458         dev->flags              = IFF_NOARP;
459         dev->hard_header_len    = LL_MAX_HEADER;
460 }
461
462 static LIST_HEAD(master_dev_list);
463 static int max_equalizers = 1;
464 module_param(max_equalizers, int, 0);
465 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
466
467 static int __init teql_init(void)
468 {
469         int i;
470         int err = -ENODEV;
471
472         for (i = 0; i < max_equalizers; i++) {
473                 struct net_device *dev;
474                 struct teql_master *master;
475
476                 dev = alloc_netdev(sizeof(struct teql_master),
477                                   "teql%d", teql_master_setup);
478                 if (!dev) {
479                         err = -ENOMEM;
480                         break;
481                 }
482
483                 if ((err = register_netdev(dev))) {
484                         free_netdev(dev);
485                         break;
486                 }
487
488                 master = netdev_priv(dev);
489
490                 strlcpy(master->qops.id, dev->name, IFNAMSIZ);
491                 err = register_qdisc(&master->qops);
492
493                 if (err) {
494                         unregister_netdev(dev);
495                         free_netdev(dev);
496                         break;
497                 }
498
499                 list_add_tail(&master->master_list, &master_dev_list);
500         }
501         return i ? 0 : err;
502 }
503
504 static void __exit teql_exit(void)
505 {
506         struct teql_master *master, *nxt;
507
508         list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
509
510                 list_del(&master->master_list);
511
512                 unregister_qdisc(&master->qops);
513                 unregister_netdev(master->dev);
514                 free_netdev(master->dev);
515         }
516 }
517
518 module_init(teql_init);
519 module_exit(teql_exit);
520
521 MODULE_LICENSE("GPL");