net_sched: accurate bytes/packets stats/rates
[linux-2.6.git] / net / sched / sch_teql.c
1 /* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
2  *
3  *              This program is free software; you can redistribute it and/or
4  *              modify it under the terms of the GNU General Public License
5  *              as published by the Free Software Foundation; either version
6  *              2 of the License, or (at your option) any later version.
7  *
8  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/string.h>
16 #include <linux/errno.h>
17 #include <linux/if_arp.h>
18 #include <linux/netdevice.h>
19 #include <linux/init.h>
20 #include <linux/skbuff.h>
21 #include <linux/moduleparam.h>
22 #include <net/dst.h>
23 #include <net/neighbour.h>
24 #include <net/pkt_sched.h>
25
26 /*
27    How to setup it.
28    ----------------
29
30    After loading this module you will find a new device teqlN
31    and new qdisc with the same name. To join a slave to the equalizer
32    you should just set this qdisc on a device f.e.
33
34    # tc qdisc add dev eth0 root teql0
35    # tc qdisc add dev eth1 root teql0
36
37    That's all. Full PnP 8)
38
39    Applicability.
40    --------------
41
42    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
43       signal and generate EOI events. If you want to equalize virtual devices
44       like tunnels, use a normal eql device.
45    2. This device puts no limitations on physical slave characteristics
46       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
47       Certainly, large difference in link speeds will make the resulting
48       eqalized link unusable, because of huge packet reordering.
49       I estimate an upper useful difference as ~10 times.
50    3. If the slave requires address resolution, only protocols using
51       neighbour cache (IPv4/IPv6) will work over the equalized link.
52       Other protocols are still allowed to use the slave device directly,
53       which will not break load balancing, though native slave
54       traffic will have the highest priority.  */
55
56 struct teql_master
57 {
58         struct Qdisc_ops qops;
59         struct net_device *dev;
60         struct Qdisc *slaves;
61         struct list_head master_list;
62         unsigned long   tx_bytes;
63         unsigned long   tx_packets;
64         unsigned long   tx_errors;
65         unsigned long   tx_dropped;
66 };
67
68 struct teql_sched_data
69 {
70         struct Qdisc *next;
71         struct teql_master *m;
72         struct neighbour *ncache;
73         struct sk_buff_head q;
74 };
75
76 #define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
77
78 #define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
79
80 /* "teql*" qdisc routines */
81
82 static int
83 teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
84 {
85         struct net_device *dev = qdisc_dev(sch);
86         struct teql_sched_data *q = qdisc_priv(sch);
87
88         if (q->q.qlen < dev->tx_queue_len) {
89                 __skb_queue_tail(&q->q, skb);
90                 return NET_XMIT_SUCCESS;
91         }
92
93         kfree_skb(skb);
94         sch->qstats.drops++;
95         return NET_XMIT_DROP;
96 }
97
98 static struct sk_buff *
99 teql_dequeue(struct Qdisc* sch)
100 {
101         struct teql_sched_data *dat = qdisc_priv(sch);
102         struct netdev_queue *dat_queue;
103         struct sk_buff *skb;
104
105         skb = __skb_dequeue(&dat->q);
106         dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
107         if (skb == NULL) {
108                 struct net_device *m = qdisc_dev(dat_queue->qdisc);
109                 if (m) {
110                         dat->m->slaves = sch;
111                         netif_wake_queue(m);
112                 }
113         } else {
114                 qdisc_bstats_update(sch, skb);
115         }
116         sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
117         return skb;
118 }
119
120 static struct sk_buff *
121 teql_peek(struct Qdisc* sch)
122 {
123         /* teql is meant to be used as root qdisc */
124         return NULL;
125 }
126
127 static __inline__ void
128 teql_neigh_release(struct neighbour *n)
129 {
130         if (n)
131                 neigh_release(n);
132 }
133
134 static void
135 teql_reset(struct Qdisc* sch)
136 {
137         struct teql_sched_data *dat = qdisc_priv(sch);
138
139         skb_queue_purge(&dat->q);
140         sch->q.qlen = 0;
141         teql_neigh_release(xchg(&dat->ncache, NULL));
142 }
143
144 static void
145 teql_destroy(struct Qdisc* sch)
146 {
147         struct Qdisc *q, *prev;
148         struct teql_sched_data *dat = qdisc_priv(sch);
149         struct teql_master *master = dat->m;
150
151         if ((prev = master->slaves) != NULL) {
152                 do {
153                         q = NEXT_SLAVE(prev);
154                         if (q == sch) {
155                                 NEXT_SLAVE(prev) = NEXT_SLAVE(q);
156                                 if (q == master->slaves) {
157                                         master->slaves = NEXT_SLAVE(q);
158                                         if (q == master->slaves) {
159                                                 struct netdev_queue *txq;
160                                                 spinlock_t *root_lock;
161
162                                                 txq = netdev_get_tx_queue(master->dev, 0);
163                                                 master->slaves = NULL;
164
165                                                 root_lock = qdisc_root_sleeping_lock(txq->qdisc);
166                                                 spin_lock_bh(root_lock);
167                                                 qdisc_reset(txq->qdisc);
168                                                 spin_unlock_bh(root_lock);
169                                         }
170                                 }
171                                 skb_queue_purge(&dat->q);
172                                 teql_neigh_release(xchg(&dat->ncache, NULL));
173                                 break;
174                         }
175
176                 } while ((prev = q) != master->slaves);
177         }
178 }
179
180 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
181 {
182         struct net_device *dev = qdisc_dev(sch);
183         struct teql_master *m = (struct teql_master*)sch->ops;
184         struct teql_sched_data *q = qdisc_priv(sch);
185
186         if (dev->hard_header_len > m->dev->hard_header_len)
187                 return -EINVAL;
188
189         if (m->dev == dev)
190                 return -ELOOP;
191
192         q->m = m;
193
194         skb_queue_head_init(&q->q);
195
196         if (m->slaves) {
197                 if (m->dev->flags & IFF_UP) {
198                         if ((m->dev->flags & IFF_POINTOPOINT &&
199                              !(dev->flags & IFF_POINTOPOINT)) ||
200                             (m->dev->flags & IFF_BROADCAST &&
201                              !(dev->flags & IFF_BROADCAST)) ||
202                             (m->dev->flags & IFF_MULTICAST &&
203                              !(dev->flags & IFF_MULTICAST)) ||
204                             dev->mtu < m->dev->mtu)
205                                 return -EINVAL;
206                 } else {
207                         if (!(dev->flags&IFF_POINTOPOINT))
208                                 m->dev->flags &= ~IFF_POINTOPOINT;
209                         if (!(dev->flags&IFF_BROADCAST))
210                                 m->dev->flags &= ~IFF_BROADCAST;
211                         if (!(dev->flags&IFF_MULTICAST))
212                                 m->dev->flags &= ~IFF_MULTICAST;
213                         if (dev->mtu < m->dev->mtu)
214                                 m->dev->mtu = dev->mtu;
215                 }
216                 q->next = NEXT_SLAVE(m->slaves);
217                 NEXT_SLAVE(m->slaves) = sch;
218         } else {
219                 q->next = sch;
220                 m->slaves = sch;
221                 m->dev->mtu = dev->mtu;
222                 m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
223         }
224         return 0;
225 }
226
227
228 static int
229 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
230 {
231         struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
232         struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
233         struct neighbour *mn = skb_dst(skb)->neighbour;
234         struct neighbour *n = q->ncache;
235
236         if (mn->tbl == NULL)
237                 return -EINVAL;
238         if (n && n->tbl == mn->tbl &&
239             memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
240                 atomic_inc(&n->refcnt);
241         } else {
242                 n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
243                 if (IS_ERR(n))
244                         return PTR_ERR(n);
245         }
246         if (neigh_event_send(n, skb_res) == 0) {
247                 int err;
248                 char haddr[MAX_ADDR_LEN];
249
250                 neigh_ha_snapshot(haddr, n, dev);
251                 err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
252                                       NULL, skb->len);
253
254                 if (err < 0) {
255                         neigh_release(n);
256                         return -EINVAL;
257                 }
258                 teql_neigh_release(xchg(&q->ncache, n));
259                 return 0;
260         }
261         neigh_release(n);
262         return (skb_res == NULL) ? -EAGAIN : 1;
263 }
264
265 static inline int teql_resolve(struct sk_buff *skb,
266                                struct sk_buff *skb_res, struct net_device *dev)
267 {
268         struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
269         if (txq->qdisc == &noop_qdisc)
270                 return -ENODEV;
271
272         if (dev->header_ops == NULL ||
273             skb_dst(skb) == NULL ||
274             skb_dst(skb)->neighbour == NULL)
275                 return 0;
276         return __teql_resolve(skb, skb_res, dev);
277 }
278
279 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
280 {
281         struct teql_master *master = netdev_priv(dev);
282         struct Qdisc *start, *q;
283         int busy;
284         int nores;
285         int subq = skb_get_queue_mapping(skb);
286         struct sk_buff *skb_res = NULL;
287
288         start = master->slaves;
289
290 restart:
291         nores = 0;
292         busy = 0;
293
294         if ((q = start) == NULL)
295                 goto drop;
296
297         do {
298                 struct net_device *slave = qdisc_dev(q);
299                 struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
300                 const struct net_device_ops *slave_ops = slave->netdev_ops;
301
302                 if (slave_txq->qdisc_sleeping != q)
303                         continue;
304                 if (__netif_subqueue_stopped(slave, subq) ||
305                     !netif_running(slave)) {
306                         busy = 1;
307                         continue;
308                 }
309
310                 switch (teql_resolve(skb, skb_res, slave)) {
311                 case 0:
312                         if (__netif_tx_trylock(slave_txq)) {
313                                 unsigned int length = qdisc_pkt_len(skb);
314
315                                 if (!netif_tx_queue_frozen_or_stopped(slave_txq) &&
316                                     slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
317                                         txq_trans_update(slave_txq);
318                                         __netif_tx_unlock(slave_txq);
319                                         master->slaves = NEXT_SLAVE(q);
320                                         netif_wake_queue(dev);
321                                         master->tx_packets++;
322                                         master->tx_bytes += length;
323                                         return NETDEV_TX_OK;
324                                 }
325                                 __netif_tx_unlock(slave_txq);
326                         }
327                         if (netif_queue_stopped(dev))
328                                 busy = 1;
329                         break;
330                 case 1:
331                         master->slaves = NEXT_SLAVE(q);
332                         return NETDEV_TX_OK;
333                 default:
334                         nores = 1;
335                         break;
336                 }
337                 __skb_pull(skb, skb_network_offset(skb));
338         } while ((q = NEXT_SLAVE(q)) != start);
339
340         if (nores && skb_res == NULL) {
341                 skb_res = skb;
342                 goto restart;
343         }
344
345         if (busy) {
346                 netif_stop_queue(dev);
347                 return NETDEV_TX_BUSY;
348         }
349         master->tx_errors++;
350
351 drop:
352         master->tx_dropped++;
353         dev_kfree_skb(skb);
354         return NETDEV_TX_OK;
355 }
356
357 static int teql_master_open(struct net_device *dev)
358 {
359         struct Qdisc * q;
360         struct teql_master *m = netdev_priv(dev);
361         int mtu = 0xFFFE;
362         unsigned flags = IFF_NOARP|IFF_MULTICAST;
363
364         if (m->slaves == NULL)
365                 return -EUNATCH;
366
367         flags = FMASK;
368
369         q = m->slaves;
370         do {
371                 struct net_device *slave = qdisc_dev(q);
372
373                 if (slave == NULL)
374                         return -EUNATCH;
375
376                 if (slave->mtu < mtu)
377                         mtu = slave->mtu;
378                 if (slave->hard_header_len > LL_MAX_HEADER)
379                         return -EINVAL;
380
381                 /* If all the slaves are BROADCAST, master is BROADCAST
382                    If all the slaves are PtP, master is PtP
383                    Otherwise, master is NBMA.
384                  */
385                 if (!(slave->flags&IFF_POINTOPOINT))
386                         flags &= ~IFF_POINTOPOINT;
387                 if (!(slave->flags&IFF_BROADCAST))
388                         flags &= ~IFF_BROADCAST;
389                 if (!(slave->flags&IFF_MULTICAST))
390                         flags &= ~IFF_MULTICAST;
391         } while ((q = NEXT_SLAVE(q)) != m->slaves);
392
393         m->dev->mtu = mtu;
394         m->dev->flags = (m->dev->flags&~FMASK) | flags;
395         netif_start_queue(m->dev);
396         return 0;
397 }
398
399 static int teql_master_close(struct net_device *dev)
400 {
401         netif_stop_queue(dev);
402         return 0;
403 }
404
405 static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev,
406                                                      struct rtnl_link_stats64 *stats)
407 {
408         struct teql_master *m = netdev_priv(dev);
409
410         stats->tx_packets       = m->tx_packets;
411         stats->tx_bytes         = m->tx_bytes;
412         stats->tx_errors        = m->tx_errors;
413         stats->tx_dropped       = m->tx_dropped;
414         return stats;
415 }
416
417 static int teql_master_mtu(struct net_device *dev, int new_mtu)
418 {
419         struct teql_master *m = netdev_priv(dev);
420         struct Qdisc *q;
421
422         if (new_mtu < 68)
423                 return -EINVAL;
424
425         q = m->slaves;
426         if (q) {
427                 do {
428                         if (new_mtu > qdisc_dev(q)->mtu)
429                                 return -EINVAL;
430                 } while ((q=NEXT_SLAVE(q)) != m->slaves);
431         }
432
433         dev->mtu = new_mtu;
434         return 0;
435 }
436
437 static const struct net_device_ops teql_netdev_ops = {
438         .ndo_open       = teql_master_open,
439         .ndo_stop       = teql_master_close,
440         .ndo_start_xmit = teql_master_xmit,
441         .ndo_get_stats64 = teql_master_stats64,
442         .ndo_change_mtu = teql_master_mtu,
443 };
444
445 static __init void teql_master_setup(struct net_device *dev)
446 {
447         struct teql_master *master = netdev_priv(dev);
448         struct Qdisc_ops *ops = &master->qops;
449
450         master->dev     = dev;
451         ops->priv_size  = sizeof(struct teql_sched_data);
452
453         ops->enqueue    =       teql_enqueue;
454         ops->dequeue    =       teql_dequeue;
455         ops->peek       =       teql_peek;
456         ops->init       =       teql_qdisc_init;
457         ops->reset      =       teql_reset;
458         ops->destroy    =       teql_destroy;
459         ops->owner      =       THIS_MODULE;
460
461         dev->netdev_ops =       &teql_netdev_ops;
462         dev->type               = ARPHRD_VOID;
463         dev->mtu                = 1500;
464         dev->tx_queue_len       = 100;
465         dev->flags              = IFF_NOARP;
466         dev->hard_header_len    = LL_MAX_HEADER;
467         dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
468 }
469
470 static LIST_HEAD(master_dev_list);
471 static int max_equalizers = 1;
472 module_param(max_equalizers, int, 0);
473 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
474
475 static int __init teql_init(void)
476 {
477         int i;
478         int err = -ENODEV;
479
480         for (i = 0; i < max_equalizers; i++) {
481                 struct net_device *dev;
482                 struct teql_master *master;
483
484                 dev = alloc_netdev(sizeof(struct teql_master),
485                                   "teql%d", teql_master_setup);
486                 if (!dev) {
487                         err = -ENOMEM;
488                         break;
489                 }
490
491                 if ((err = register_netdev(dev))) {
492                         free_netdev(dev);
493                         break;
494                 }
495
496                 master = netdev_priv(dev);
497
498                 strlcpy(master->qops.id, dev->name, IFNAMSIZ);
499                 err = register_qdisc(&master->qops);
500
501                 if (err) {
502                         unregister_netdev(dev);
503                         free_netdev(dev);
504                         break;
505                 }
506
507                 list_add_tail(&master->master_list, &master_dev_list);
508         }
509         return i ? 0 : err;
510 }
511
512 static void __exit teql_exit(void)
513 {
514         struct teql_master *master, *nxt;
515
516         list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
517
518                 list_del(&master->master_list);
519
520                 unregister_qdisc(&master->qops);
521                 unregister_netdev(master->dev);
522                 free_netdev(master->dev);
523         }
524 }
525
526 module_init(teql_init);
527 module_exit(teql_exit);
528
529 MODULE_LICENSE("GPL");