106479a7c94a494665f6783578e61182e82453d5
[linux-2.6.git] / net / sched / sch_teql.c
1 /* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
2  *
3  *              This program is free software; you can redistribute it and/or
4  *              modify it under the terms of the GNU General Public License
5  *              as published by the Free Software Foundation; either version
6  *              2 of the License, or (at your option) any later version.
7  *
8  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/string.h>
16 #include <linux/errno.h>
17 #include <linux/if_arp.h>
18 #include <linux/netdevice.h>
19 #include <linux/init.h>
20 #include <linux/skbuff.h>
21 #include <linux/moduleparam.h>
22 #include <net/dst.h>
23 #include <net/neighbour.h>
24 #include <net/pkt_sched.h>
25
26 /*
27    How to setup it.
28    ----------------
29
30    After loading this module you will find a new device teqlN
31    and new qdisc with the same name. To join a slave to the equalizer
32    you should just set this qdisc on a device f.e.
33
34    # tc qdisc add dev eth0 root teql0
35    # tc qdisc add dev eth1 root teql0
36
37    That's all. Full PnP 8)
38
39    Applicability.
40    --------------
41
42    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
43       signal and generate EOI events. If you want to equalize virtual devices
44       like tunnels, use a normal eql device.
45    2. This device puts no limitations on physical slave characteristics
46       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
47       Certainly, large difference in link speeds will make the resulting
48       eqalized link unusable, because of huge packet reordering.
49       I estimate an upper useful difference as ~10 times.
50    3. If the slave requires address resolution, only protocols using
51       neighbour cache (IPv4/IPv6) will work over the equalized link.
52       Other protocols are still allowed to use the slave device directly,
53       which will not break load balancing, though native slave
54       traffic will have the highest priority.  */
55
56 struct teql_master
57 {
58         struct Qdisc_ops qops;
59         struct net_device *dev;
60         struct Qdisc *slaves;
61         struct list_head master_list;
62 };
63
64 struct teql_sched_data
65 {
66         struct Qdisc *next;
67         struct teql_master *m;
68         struct neighbour *ncache;
69         struct sk_buff_head q;
70 };
71
72 #define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
73
74 #define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
75
76 /* "teql*" qdisc routines */
77
78 static int
79 teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
80 {
81         struct net_device *dev = qdisc_dev(sch);
82         struct teql_sched_data *q = qdisc_priv(sch);
83
84         if (q->q.qlen < dev->tx_queue_len) {
85                 __skb_queue_tail(&q->q, skb);
86                 sch->bstats.bytes += qdisc_pkt_len(skb);
87                 sch->bstats.packets++;
88                 return NET_XMIT_SUCCESS;
89         }
90
91         kfree_skb(skb);
92         sch->qstats.drops++;
93         return NET_XMIT_DROP;
94 }
95
96 static struct sk_buff *
97 teql_dequeue(struct Qdisc* sch)
98 {
99         struct teql_sched_data *dat = qdisc_priv(sch);
100         struct netdev_queue *dat_queue;
101         struct sk_buff *skb;
102
103         skb = __skb_dequeue(&dat->q);
104         dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
105         if (skb == NULL) {
106                 struct net_device *m = qdisc_dev(dat_queue->qdisc);
107                 if (m) {
108                         dat->m->slaves = sch;
109                         netif_wake_queue(m);
110                 }
111         }
112         sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
113         return skb;
114 }
115
116 static struct sk_buff *
117 teql_peek(struct Qdisc* sch)
118 {
119         /* teql is meant to be used as root qdisc */
120         return NULL;
121 }
122
123 static __inline__ void
124 teql_neigh_release(struct neighbour *n)
125 {
126         if (n)
127                 neigh_release(n);
128 }
129
130 static void
131 teql_reset(struct Qdisc* sch)
132 {
133         struct teql_sched_data *dat = qdisc_priv(sch);
134
135         skb_queue_purge(&dat->q);
136         sch->q.qlen = 0;
137         teql_neigh_release(xchg(&dat->ncache, NULL));
138 }
139
140 static void
141 teql_destroy(struct Qdisc* sch)
142 {
143         struct Qdisc *q, *prev;
144         struct teql_sched_data *dat = qdisc_priv(sch);
145         struct teql_master *master = dat->m;
146
147         if ((prev = master->slaves) != NULL) {
148                 do {
149                         q = NEXT_SLAVE(prev);
150                         if (q == sch) {
151                                 NEXT_SLAVE(prev) = NEXT_SLAVE(q);
152                                 if (q == master->slaves) {
153                                         master->slaves = NEXT_SLAVE(q);
154                                         if (q == master->slaves) {
155                                                 struct netdev_queue *txq;
156                                                 spinlock_t *root_lock;
157
158                                                 txq = netdev_get_tx_queue(master->dev, 0);
159                                                 master->slaves = NULL;
160
161                                                 root_lock = qdisc_root_sleeping_lock(txq->qdisc);
162                                                 spin_lock_bh(root_lock);
163                                                 qdisc_reset(txq->qdisc);
164                                                 spin_unlock_bh(root_lock);
165                                         }
166                                 }
167                                 skb_queue_purge(&dat->q);
168                                 teql_neigh_release(xchg(&dat->ncache, NULL));
169                                 break;
170                         }
171
172                 } while ((prev = q) != master->slaves);
173         }
174 }
175
176 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
177 {
178         struct net_device *dev = qdisc_dev(sch);
179         struct teql_master *m = (struct teql_master*)sch->ops;
180         struct teql_sched_data *q = qdisc_priv(sch);
181
182         if (dev->hard_header_len > m->dev->hard_header_len)
183                 return -EINVAL;
184
185         if (m->dev == dev)
186                 return -ELOOP;
187
188         q->m = m;
189
190         skb_queue_head_init(&q->q);
191
192         if (m->slaves) {
193                 if (m->dev->flags & IFF_UP) {
194                         if ((m->dev->flags & IFF_POINTOPOINT &&
195                              !(dev->flags & IFF_POINTOPOINT)) ||
196                             (m->dev->flags & IFF_BROADCAST &&
197                              !(dev->flags & IFF_BROADCAST)) ||
198                             (m->dev->flags & IFF_MULTICAST &&
199                              !(dev->flags & IFF_MULTICAST)) ||
200                             dev->mtu < m->dev->mtu)
201                                 return -EINVAL;
202                 } else {
203                         if (!(dev->flags&IFF_POINTOPOINT))
204                                 m->dev->flags &= ~IFF_POINTOPOINT;
205                         if (!(dev->flags&IFF_BROADCAST))
206                                 m->dev->flags &= ~IFF_BROADCAST;
207                         if (!(dev->flags&IFF_MULTICAST))
208                                 m->dev->flags &= ~IFF_MULTICAST;
209                         if (dev->mtu < m->dev->mtu)
210                                 m->dev->mtu = dev->mtu;
211                 }
212                 q->next = NEXT_SLAVE(m->slaves);
213                 NEXT_SLAVE(m->slaves) = sch;
214         } else {
215                 q->next = sch;
216                 m->slaves = sch;
217                 m->dev->mtu = dev->mtu;
218                 m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
219         }
220         return 0;
221 }
222
223
224 static int
225 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
226 {
227         struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
228         struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
229         struct neighbour *mn = skb_dst(skb)->neighbour;
230         struct neighbour *n = q->ncache;
231
232         if (mn->tbl == NULL)
233                 return -EINVAL;
234         if (n && n->tbl == mn->tbl &&
235             memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
236                 atomic_inc(&n->refcnt);
237         } else {
238                 n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
239                 if (IS_ERR(n))
240                         return PTR_ERR(n);
241         }
242         if (neigh_event_send(n, skb_res) == 0) {
243                 int err;
244                 char haddr[MAX_ADDR_LEN];
245
246                 neigh_ha_snapshot(haddr, n, dev);
247                 err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
248                                       NULL, skb->len);
249
250                 if (err < 0) {
251                         neigh_release(n);
252                         return -EINVAL;
253                 }
254                 teql_neigh_release(xchg(&q->ncache, n));
255                 return 0;
256         }
257         neigh_release(n);
258         return (skb_res == NULL) ? -EAGAIN : 1;
259 }
260
261 static inline int teql_resolve(struct sk_buff *skb,
262                                struct sk_buff *skb_res, struct net_device *dev)
263 {
264         struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
265         if (txq->qdisc == &noop_qdisc)
266                 return -ENODEV;
267
268         if (dev->header_ops == NULL ||
269             skb_dst(skb) == NULL ||
270             skb_dst(skb)->neighbour == NULL)
271                 return 0;
272         return __teql_resolve(skb, skb_res, dev);
273 }
274
275 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
276 {
277         struct teql_master *master = netdev_priv(dev);
278         struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
279         struct Qdisc *start, *q;
280         int busy;
281         int nores;
282         int subq = skb_get_queue_mapping(skb);
283         struct sk_buff *skb_res = NULL;
284
285         start = master->slaves;
286
287 restart:
288         nores = 0;
289         busy = 0;
290
291         if ((q = start) == NULL)
292                 goto drop;
293
294         do {
295                 struct net_device *slave = qdisc_dev(q);
296                 struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
297                 const struct net_device_ops *slave_ops = slave->netdev_ops;
298
299                 if (slave_txq->qdisc_sleeping != q)
300                         continue;
301                 if (__netif_subqueue_stopped(slave, subq) ||
302                     !netif_running(slave)) {
303                         busy = 1;
304                         continue;
305                 }
306
307                 switch (teql_resolve(skb, skb_res, slave)) {
308                 case 0:
309                         if (__netif_tx_trylock(slave_txq)) {
310                                 unsigned int length = qdisc_pkt_len(skb);
311
312                                 if (!netif_tx_queue_frozen_or_stopped(slave_txq) &&
313                                     slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
314                                         txq_trans_update(slave_txq);
315                                         __netif_tx_unlock(slave_txq);
316                                         master->slaves = NEXT_SLAVE(q);
317                                         netif_wake_queue(dev);
318                                         txq->tx_packets++;
319                                         txq->tx_bytes += length;
320                                         return NETDEV_TX_OK;
321                                 }
322                                 __netif_tx_unlock(slave_txq);
323                         }
324                         if (netif_queue_stopped(dev))
325                                 busy = 1;
326                         break;
327                 case 1:
328                         master->slaves = NEXT_SLAVE(q);
329                         return NETDEV_TX_OK;
330                 default:
331                         nores = 1;
332                         break;
333                 }
334                 __skb_pull(skb, skb_network_offset(skb));
335         } while ((q = NEXT_SLAVE(q)) != start);
336
337         if (nores && skb_res == NULL) {
338                 skb_res = skb;
339                 goto restart;
340         }
341
342         if (busy) {
343                 netif_stop_queue(dev);
344                 return NETDEV_TX_BUSY;
345         }
346         dev->stats.tx_errors++;
347
348 drop:
349         txq->tx_dropped++;
350         dev_kfree_skb(skb);
351         return NETDEV_TX_OK;
352 }
353
354 static int teql_master_open(struct net_device *dev)
355 {
356         struct Qdisc * q;
357         struct teql_master *m = netdev_priv(dev);
358         int mtu = 0xFFFE;
359         unsigned flags = IFF_NOARP|IFF_MULTICAST;
360
361         if (m->slaves == NULL)
362                 return -EUNATCH;
363
364         flags = FMASK;
365
366         q = m->slaves;
367         do {
368                 struct net_device *slave = qdisc_dev(q);
369
370                 if (slave == NULL)
371                         return -EUNATCH;
372
373                 if (slave->mtu < mtu)
374                         mtu = slave->mtu;
375                 if (slave->hard_header_len > LL_MAX_HEADER)
376                         return -EINVAL;
377
378                 /* If all the slaves are BROADCAST, master is BROADCAST
379                    If all the slaves are PtP, master is PtP
380                    Otherwise, master is NBMA.
381                  */
382                 if (!(slave->flags&IFF_POINTOPOINT))
383                         flags &= ~IFF_POINTOPOINT;
384                 if (!(slave->flags&IFF_BROADCAST))
385                         flags &= ~IFF_BROADCAST;
386                 if (!(slave->flags&IFF_MULTICAST))
387                         flags &= ~IFF_MULTICAST;
388         } while ((q = NEXT_SLAVE(q)) != m->slaves);
389
390         m->dev->mtu = mtu;
391         m->dev->flags = (m->dev->flags&~FMASK) | flags;
392         netif_start_queue(m->dev);
393         return 0;
394 }
395
396 static int teql_master_close(struct net_device *dev)
397 {
398         netif_stop_queue(dev);
399         return 0;
400 }
401
402 static int teql_master_mtu(struct net_device *dev, int new_mtu)
403 {
404         struct teql_master *m = netdev_priv(dev);
405         struct Qdisc *q;
406
407         if (new_mtu < 68)
408                 return -EINVAL;
409
410         q = m->slaves;
411         if (q) {
412                 do {
413                         if (new_mtu > qdisc_dev(q)->mtu)
414                                 return -EINVAL;
415                 } while ((q=NEXT_SLAVE(q)) != m->slaves);
416         }
417
418         dev->mtu = new_mtu;
419         return 0;
420 }
421
422 static const struct net_device_ops teql_netdev_ops = {
423         .ndo_open       = teql_master_open,
424         .ndo_stop       = teql_master_close,
425         .ndo_start_xmit = teql_master_xmit,
426         .ndo_change_mtu = teql_master_mtu,
427 };
428
429 static __init void teql_master_setup(struct net_device *dev)
430 {
431         struct teql_master *master = netdev_priv(dev);
432         struct Qdisc_ops *ops = &master->qops;
433
434         master->dev     = dev;
435         ops->priv_size  = sizeof(struct teql_sched_data);
436
437         ops->enqueue    =       teql_enqueue;
438         ops->dequeue    =       teql_dequeue;
439         ops->peek       =       teql_peek;
440         ops->init       =       teql_qdisc_init;
441         ops->reset      =       teql_reset;
442         ops->destroy    =       teql_destroy;
443         ops->owner      =       THIS_MODULE;
444
445         dev->netdev_ops =       &teql_netdev_ops;
446         dev->type               = ARPHRD_VOID;
447         dev->mtu                = 1500;
448         dev->tx_queue_len       = 100;
449         dev->flags              = IFF_NOARP;
450         dev->hard_header_len    = LL_MAX_HEADER;
451         dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
452 }
453
454 static LIST_HEAD(master_dev_list);
455 static int max_equalizers = 1;
456 module_param(max_equalizers, int, 0);
457 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
458
459 static int __init teql_init(void)
460 {
461         int i;
462         int err = -ENODEV;
463
464         for (i = 0; i < max_equalizers; i++) {
465                 struct net_device *dev;
466                 struct teql_master *master;
467
468                 dev = alloc_netdev(sizeof(struct teql_master),
469                                   "teql%d", teql_master_setup);
470                 if (!dev) {
471                         err = -ENOMEM;
472                         break;
473                 }
474
475                 if ((err = register_netdev(dev))) {
476                         free_netdev(dev);
477                         break;
478                 }
479
480                 master = netdev_priv(dev);
481
482                 strlcpy(master->qops.id, dev->name, IFNAMSIZ);
483                 err = register_qdisc(&master->qops);
484
485                 if (err) {
486                         unregister_netdev(dev);
487                         free_netdev(dev);
488                         break;
489                 }
490
491                 list_add_tail(&master->master_list, &master_dev_list);
492         }
493         return i ? 0 : err;
494 }
495
496 static void __exit teql_exit(void)
497 {
498         struct teql_master *master, *nxt;
499
500         list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
501
502                 list_del(&master->master_list);
503
504                 unregister_qdisc(&master->qops);
505                 unregister_netdev(master->dev);
506                 free_netdev(master->dev);
507         }
508 }
509
510 module_init(teql_init);
511 module_exit(teql_exit);
512
513 MODULE_LICENSE("GPL");