[SK_BUFF]: Introduce skb_network_offset()
[linux-2.6.git] / net / sched / sch_teql.c
1 /* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
2  *
3  *              This program is free software; you can redistribute it and/or
4  *              modify it under the terms of the GNU General Public License
5  *              as published by the Free Software Foundation; either version
6  *              2 of the License, or (at your option) any later version.
7  *
8  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10
11 #include <linux/module.h>
12 #include <asm/uaccess.h>
13 #include <asm/system.h>
14 #include <linux/bitops.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/string.h>
18 #include <linux/mm.h>
19 #include <linux/socket.h>
20 #include <linux/sockios.h>
21 #include <linux/in.h>
22 #include <linux/errno.h>
23 #include <linux/interrupt.h>
24 #include <linux/if_arp.h>
25 #include <linux/if_ether.h>
26 #include <linux/inet.h>
27 #include <linux/netdevice.h>
28 #include <linux/etherdevice.h>
29 #include <linux/notifier.h>
30 #include <linux/init.h>
31 #include <net/ip.h>
32 #include <net/route.h>
33 #include <linux/skbuff.h>
34 #include <linux/moduleparam.h>
35 #include <net/sock.h>
36 #include <net/pkt_sched.h>
37
38 /*
39    How to setup it.
40    ----------------
41
42    After loading this module you will find a new device teqlN
43    and new qdisc with the same name. To join a slave to the equalizer
44    you should just set this qdisc on a device f.e.
45
46    # tc qdisc add dev eth0 root teql0
47    # tc qdisc add dev eth1 root teql0
48
49    That's all. Full PnP 8)
50
51    Applicability.
52    --------------
53
54    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
55       signal and generate EOI events. If you want to equalize virtual devices
56       like tunnels, use a normal eql device.
57    2. This device puts no limitations on physical slave characteristics
58       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
59       Certainly, large difference in link speeds will make the resulting
60       eqalized link unusable, because of huge packet reordering.
61       I estimate an upper useful difference as ~10 times.
62    3. If the slave requires address resolution, only protocols using
63       neighbour cache (IPv4/IPv6) will work over the equalized link.
64       Other protocols are still allowed to use the slave device directly,
65       which will not break load balancing, though native slave
66       traffic will have the highest priority.  */
67
68 struct teql_master
69 {
70         struct Qdisc_ops qops;
71         struct net_device *dev;
72         struct Qdisc *slaves;
73         struct list_head master_list;
74         struct net_device_stats stats;
75 };
76
77 struct teql_sched_data
78 {
79         struct Qdisc *next;
80         struct teql_master *m;
81         struct neighbour *ncache;
82         struct sk_buff_head q;
83 };
84
85 #define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
86
87 #define FMASK (IFF_BROADCAST|IFF_POINTOPOINT|IFF_BROADCAST)
88
89 /* "teql*" qdisc routines */
90
91 static int
92 teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
93 {
94         struct net_device *dev = sch->dev;
95         struct teql_sched_data *q = qdisc_priv(sch);
96
97         __skb_queue_tail(&q->q, skb);
98         if (q->q.qlen <= dev->tx_queue_len) {
99                 sch->bstats.bytes += skb->len;
100                 sch->bstats.packets++;
101                 return 0;
102         }
103
104         __skb_unlink(skb, &q->q);
105         kfree_skb(skb);
106         sch->qstats.drops++;
107         return NET_XMIT_DROP;
108 }
109
110 static int
111 teql_requeue(struct sk_buff *skb, struct Qdisc* sch)
112 {
113         struct teql_sched_data *q = qdisc_priv(sch);
114
115         __skb_queue_head(&q->q, skb);
116         sch->qstats.requeues++;
117         return 0;
118 }
119
120 static struct sk_buff *
121 teql_dequeue(struct Qdisc* sch)
122 {
123         struct teql_sched_data *dat = qdisc_priv(sch);
124         struct sk_buff *skb;
125
126         skb = __skb_dequeue(&dat->q);
127         if (skb == NULL) {
128                 struct net_device *m = dat->m->dev->qdisc->dev;
129                 if (m) {
130                         dat->m->slaves = sch;
131                         netif_wake_queue(m);
132                 }
133         }
134         sch->q.qlen = dat->q.qlen + dat->m->dev->qdisc->q.qlen;
135         return skb;
136 }
137
138 static __inline__ void
139 teql_neigh_release(struct neighbour *n)
140 {
141         if (n)
142                 neigh_release(n);
143 }
144
145 static void
146 teql_reset(struct Qdisc* sch)
147 {
148         struct teql_sched_data *dat = qdisc_priv(sch);
149
150         skb_queue_purge(&dat->q);
151         sch->q.qlen = 0;
152         teql_neigh_release(xchg(&dat->ncache, NULL));
153 }
154
155 static void
156 teql_destroy(struct Qdisc* sch)
157 {
158         struct Qdisc *q, *prev;
159         struct teql_sched_data *dat = qdisc_priv(sch);
160         struct teql_master *master = dat->m;
161
162         if ((prev = master->slaves) != NULL) {
163                 do {
164                         q = NEXT_SLAVE(prev);
165                         if (q == sch) {
166                                 NEXT_SLAVE(prev) = NEXT_SLAVE(q);
167                                 if (q == master->slaves) {
168                                         master->slaves = NEXT_SLAVE(q);
169                                         if (q == master->slaves) {
170                                                 master->slaves = NULL;
171                                                 spin_lock_bh(&master->dev->queue_lock);
172                                                 qdisc_reset(master->dev->qdisc);
173                                                 spin_unlock_bh(&master->dev->queue_lock);
174                                         }
175                                 }
176                                 skb_queue_purge(&dat->q);
177                                 teql_neigh_release(xchg(&dat->ncache, NULL));
178                                 break;
179                         }
180
181                 } while ((prev = q) != master->slaves);
182         }
183 }
184
185 static int teql_qdisc_init(struct Qdisc *sch, struct rtattr *opt)
186 {
187         struct net_device *dev = sch->dev;
188         struct teql_master *m = (struct teql_master*)sch->ops;
189         struct teql_sched_data *q = qdisc_priv(sch);
190
191         if (dev->hard_header_len > m->dev->hard_header_len)
192                 return -EINVAL;
193
194         if (m->dev == dev)
195                 return -ELOOP;
196
197         q->m = m;
198
199         skb_queue_head_init(&q->q);
200
201         if (m->slaves) {
202                 if (m->dev->flags & IFF_UP) {
203                         if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
204                             || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
205                             || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
206                             || dev->mtu < m->dev->mtu)
207                                 return -EINVAL;
208                 } else {
209                         if (!(dev->flags&IFF_POINTOPOINT))
210                                 m->dev->flags &= ~IFF_POINTOPOINT;
211                         if (!(dev->flags&IFF_BROADCAST))
212                                 m->dev->flags &= ~IFF_BROADCAST;
213                         if (!(dev->flags&IFF_MULTICAST))
214                                 m->dev->flags &= ~IFF_MULTICAST;
215                         if (dev->mtu < m->dev->mtu)
216                                 m->dev->mtu = dev->mtu;
217                 }
218                 q->next = NEXT_SLAVE(m->slaves);
219                 NEXT_SLAVE(m->slaves) = sch;
220         } else {
221                 q->next = sch;
222                 m->slaves = sch;
223                 m->dev->mtu = dev->mtu;
224                 m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
225         }
226         return 0;
227 }
228
229 /* "teql*" netdevice routines */
230
231 static int
232 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
233 {
234         struct teql_sched_data *q = qdisc_priv(dev->qdisc);
235         struct neighbour *mn = skb->dst->neighbour;
236         struct neighbour *n = q->ncache;
237
238         if (mn->tbl == NULL)
239                 return -EINVAL;
240         if (n && n->tbl == mn->tbl &&
241             memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
242                 atomic_inc(&n->refcnt);
243         } else {
244                 n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
245                 if (IS_ERR(n))
246                         return PTR_ERR(n);
247         }
248         if (neigh_event_send(n, skb_res) == 0) {
249                 int err;
250                 read_lock(&n->lock);
251                 err = dev->hard_header(skb, dev, ntohs(skb->protocol), n->ha, NULL, skb->len);
252                 read_unlock(&n->lock);
253                 if (err < 0) {
254                         neigh_release(n);
255                         return -EINVAL;
256                 }
257                 teql_neigh_release(xchg(&q->ncache, n));
258                 return 0;
259         }
260         neigh_release(n);
261         return (skb_res == NULL) ? -EAGAIN : 1;
262 }
263
264 static __inline__ int
265 teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
266 {
267         if (dev->hard_header == NULL ||
268             skb->dst == NULL ||
269             skb->dst->neighbour == NULL)
270                 return 0;
271         return __teql_resolve(skb, skb_res, dev);
272 }
273
274 static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
275 {
276         struct teql_master *master = netdev_priv(dev);
277         struct Qdisc *start, *q;
278         int busy;
279         int nores;
280         int len = skb->len;
281         struct sk_buff *skb_res = NULL;
282
283         start = master->slaves;
284
285 restart:
286         nores = 0;
287         busy = 0;
288
289         if ((q = start) == NULL)
290                 goto drop;
291
292         do {
293                 struct net_device *slave = q->dev;
294
295                 if (slave->qdisc_sleeping != q)
296                         continue;
297                 if (netif_queue_stopped(slave) || ! netif_running(slave)) {
298                         busy = 1;
299                         continue;
300                 }
301
302                 switch (teql_resolve(skb, skb_res, slave)) {
303                 case 0:
304                         if (netif_tx_trylock(slave)) {
305                                 if (!netif_queue_stopped(slave) &&
306                                     slave->hard_start_xmit(skb, slave) == 0) {
307                                         netif_tx_unlock(slave);
308                                         master->slaves = NEXT_SLAVE(q);
309                                         netif_wake_queue(dev);
310                                         master->stats.tx_packets++;
311                                         master->stats.tx_bytes += len;
312                                         return 0;
313                                 }
314                                 netif_tx_unlock(slave);
315                         }
316                         if (netif_queue_stopped(dev))
317                                 busy = 1;
318                         break;
319                 case 1:
320                         master->slaves = NEXT_SLAVE(q);
321                         return 0;
322                 default:
323                         nores = 1;
324                         break;
325                 }
326                 __skb_pull(skb, skb_network_offset(skb));
327         } while ((q = NEXT_SLAVE(q)) != start);
328
329         if (nores && skb_res == NULL) {
330                 skb_res = skb;
331                 goto restart;
332         }
333
334         if (busy) {
335                 netif_stop_queue(dev);
336                 return 1;
337         }
338         master->stats.tx_errors++;
339
340 drop:
341         master->stats.tx_dropped++;
342         dev_kfree_skb(skb);
343         return 0;
344 }
345
346 static int teql_master_open(struct net_device *dev)
347 {
348         struct Qdisc * q;
349         struct teql_master *m = netdev_priv(dev);
350         int mtu = 0xFFFE;
351         unsigned flags = IFF_NOARP|IFF_MULTICAST;
352
353         if (m->slaves == NULL)
354                 return -EUNATCH;
355
356         flags = FMASK;
357
358         q = m->slaves;
359         do {
360                 struct net_device *slave = q->dev;
361
362                 if (slave == NULL)
363                         return -EUNATCH;
364
365                 if (slave->mtu < mtu)
366                         mtu = slave->mtu;
367                 if (slave->hard_header_len > LL_MAX_HEADER)
368                         return -EINVAL;
369
370                 /* If all the slaves are BROADCAST, master is BROADCAST
371                    If all the slaves are PtP, master is PtP
372                    Otherwise, master is NBMA.
373                  */
374                 if (!(slave->flags&IFF_POINTOPOINT))
375                         flags &= ~IFF_POINTOPOINT;
376                 if (!(slave->flags&IFF_BROADCAST))
377                         flags &= ~IFF_BROADCAST;
378                 if (!(slave->flags&IFF_MULTICAST))
379                         flags &= ~IFF_MULTICAST;
380         } while ((q = NEXT_SLAVE(q)) != m->slaves);
381
382         m->dev->mtu = mtu;
383         m->dev->flags = (m->dev->flags&~FMASK) | flags;
384         netif_start_queue(m->dev);
385         return 0;
386 }
387
388 static int teql_master_close(struct net_device *dev)
389 {
390         netif_stop_queue(dev);
391         return 0;
392 }
393
394 static struct net_device_stats *teql_master_stats(struct net_device *dev)
395 {
396         struct teql_master *m = netdev_priv(dev);
397         return &m->stats;
398 }
399
400 static int teql_master_mtu(struct net_device *dev, int new_mtu)
401 {
402         struct teql_master *m = netdev_priv(dev);
403         struct Qdisc *q;
404
405         if (new_mtu < 68)
406                 return -EINVAL;
407
408         q = m->slaves;
409         if (q) {
410                 do {
411                         if (new_mtu > q->dev->mtu)
412                                 return -EINVAL;
413                 } while ((q=NEXT_SLAVE(q)) != m->slaves);
414         }
415
416         dev->mtu = new_mtu;
417         return 0;
418 }
419
420 static __init void teql_master_setup(struct net_device *dev)
421 {
422         struct teql_master *master = netdev_priv(dev);
423         struct Qdisc_ops *ops = &master->qops;
424
425         master->dev     = dev;
426         ops->priv_size  = sizeof(struct teql_sched_data);
427
428         ops->enqueue    =       teql_enqueue;
429         ops->dequeue    =       teql_dequeue;
430         ops->requeue    =       teql_requeue;
431         ops->init       =       teql_qdisc_init;
432         ops->reset      =       teql_reset;
433         ops->destroy    =       teql_destroy;
434         ops->owner      =       THIS_MODULE;
435
436         dev->open               = teql_master_open;
437         dev->hard_start_xmit    = teql_master_xmit;
438         dev->stop               = teql_master_close;
439         dev->get_stats          = teql_master_stats;
440         dev->change_mtu         = teql_master_mtu;
441         dev->type               = ARPHRD_VOID;
442         dev->mtu                = 1500;
443         dev->tx_queue_len       = 100;
444         dev->flags              = IFF_NOARP;
445         dev->hard_header_len    = LL_MAX_HEADER;
446         SET_MODULE_OWNER(dev);
447 }
448
449 static LIST_HEAD(master_dev_list);
450 static int max_equalizers = 1;
451 module_param(max_equalizers, int, 0);
452 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
453
454 static int __init teql_init(void)
455 {
456         int i;
457         int err = -ENODEV;
458
459         for (i = 0; i < max_equalizers; i++) {
460                 struct net_device *dev;
461                 struct teql_master *master;
462
463                 dev = alloc_netdev(sizeof(struct teql_master),
464                                   "teql%d", teql_master_setup);
465                 if (!dev) {
466                         err = -ENOMEM;
467                         break;
468                 }
469
470                 if ((err = register_netdev(dev))) {
471                         free_netdev(dev);
472                         break;
473                 }
474
475                 master = netdev_priv(dev);
476
477                 strlcpy(master->qops.id, dev->name, IFNAMSIZ);
478                 err = register_qdisc(&master->qops);
479
480                 if (err) {
481                         unregister_netdev(dev);
482                         free_netdev(dev);
483                         break;
484                 }
485
486                 list_add_tail(&master->master_list, &master_dev_list);
487         }
488         return i ? 0 : err;
489 }
490
491 static void __exit teql_exit(void)
492 {
493         struct teql_master *master, *nxt;
494
495         list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
496
497                 list_del(&master->master_list);
498
499                 unregister_qdisc(&master->qops);
500                 unregister_netdev(master->dev);
501                 free_netdev(master->dev);
502         }
503 }
504
505 module_init(teql_init);
506 module_exit(teql_exit);
507
508 MODULE_LICENSE("GPL");