[NETLINK]: Missing initializations in dumped data
[linux-2.6.git] / net / sched / sch_cbq.c
1 /*
2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  */
12
13 #include <linux/config.h>
14 #include <linux/module.h>
15 #include <asm/uaccess.h>
16 #include <asm/system.h>
17 #include <linux/bitops.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
22 #include <linux/mm.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
25 #include <linux/in.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/if_ether.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/etherdevice.h>
32 #include <linux/notifier.h>
33 #include <net/ip.h>
34 #include <net/route.h>
35 #include <linux/skbuff.h>
36 #include <net/sock.h>
37 #include <net/pkt_sched.h>
38
39
40 /*      Class-Based Queueing (CBQ) algorithm.
41         =======================================
42
43         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
44                  Management Models for Packet Networks",
45                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
46
47                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
48
49                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
50                  Parameters", 1996
51
52                  [4] Sally Floyd and Michael Speer, "Experimental Results
53                  for Class-Based Queueing", 1998, not published.
54
55         -----------------------------------------------------------------------
56
57         Algorithm skeleton was taken from NS simulator cbq.cc.
58         If someone wants to check this code against the LBL version,
59         he should take into account that ONLY the skeleton was borrowed,
60         the implementation is different. Particularly:
61
62         --- The WRR algorithm is different. Our version looks more
63         reasonable (I hope) and works when quanta are allowed to be
64         less than MTU, which is always the case when real time classes
65         have small rates. Note, that the statement of [3] is
66         incomplete, delay may actually be estimated even if class
67         per-round allotment is less than MTU. Namely, if per-round
68         allotment is W*r_i, and r_1+...+r_k = r < 1
69
70         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
71
72         In the worst case we have IntServ estimate with D = W*r+k*MTU
73         and C = MTU*r. The proof (if correct at all) is trivial.
74
75
76         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
77         interpret some places, which look like wrong translations
78         from NS. Anyone is advised to find these differences
79         and explain to me, why I am wrong 8).
80
81         --- Linux has no EOI event, so that we cannot estimate true class
82         idle time. Workaround is to consider the next dequeue event
83         as sign that previous packet is finished. This is wrong because of
84         internal device queueing, but on a permanently loaded link it is true.
85         Moreover, combined with clock integrator, this scheme looks
86         very close to an ideal solution.  */
87
88 struct cbq_sched_data;
89
90
91 struct cbq_class
92 {
93         struct cbq_class        *next;          /* hash table link */
94         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
95
96 /* Parameters */
97         u32                     classid;
98         unsigned char           priority;       /* class priority */
99         unsigned char           priority2;      /* priority to be used after overlimit */
100         unsigned char           ewma_log;       /* time constant for idle time calculation */
101         unsigned char           ovl_strategy;
102 #ifdef CONFIG_NET_CLS_POLICE
103         unsigned char           police;
104 #endif
105
106         u32                     defmap;
107
108         /* Link-sharing scheduler parameters */
109         long                    maxidle;        /* Class parameters: see below. */
110         long                    offtime;
111         long                    minidle;
112         u32                     avpkt;
113         struct qdisc_rate_table *R_tab;
114
115         /* Overlimit strategy parameters */
116         void                    (*overlimit)(struct cbq_class *cl);
117         long                    penalty;
118
119         /* General scheduler (WRR) parameters */
120         long                    allot;
121         long                    quantum;        /* Allotment per WRR round */
122         long                    weight;         /* Relative allotment: see below */
123
124         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
125         struct cbq_class        *split;         /* Ptr to split node */
126         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
127         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
128         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
129                                                    parent otherwise */
130         struct cbq_class        *sibling;       /* Sibling chain */
131         struct cbq_class        *children;      /* Pointer to children chain */
132
133         struct Qdisc            *q;             /* Elementary queueing discipline */
134
135
136 /* Variables */
137         unsigned char           cpriority;      /* Effective priority */
138         unsigned char           delayed;
139         unsigned char           level;          /* level of the class in hierarchy:
140                                                    0 for leaf classes, and maximal
141                                                    level of children + 1 for nodes.
142                                                  */
143
144         psched_time_t           last;           /* Last end of service */
145         psched_time_t           undertime;
146         long                    avgidle;
147         long                    deficit;        /* Saved deficit for WRR */
148         unsigned long           penalized;
149         struct gnet_stats_basic bstats;
150         struct gnet_stats_queue qstats;
151         struct gnet_stats_rate_est rate_est;
152         spinlock_t              *stats_lock;
153         struct tc_cbq_xstats    xstats;
154
155         struct tcf_proto        *filter_list;
156
157         int                     refcnt;
158         int                     filters;
159
160         struct cbq_class        *defaults[TC_PRIO_MAX+1];
161 };
162
163 struct cbq_sched_data
164 {
165         struct cbq_class        *classes[16];           /* Hash table of all classes */
166         int                     nclasses[TC_CBQ_MAXPRIO+1];
167         unsigned                quanta[TC_CBQ_MAXPRIO+1];
168
169         struct cbq_class        link;
170
171         unsigned                activemask;
172         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
173                                                                    with backlog */
174
175 #ifdef CONFIG_NET_CLS_POLICE
176         struct cbq_class        *rx_class;
177 #endif
178         struct cbq_class        *tx_class;
179         struct cbq_class        *tx_borrowed;
180         int                     tx_len;
181         psched_time_t           now;            /* Cached timestamp */
182         psched_time_t           now_rt;         /* Cached real time */
183         unsigned                pmask;
184
185         struct timer_list       delay_timer;
186         struct timer_list       wd_timer;       /* Watchdog timer,
187                                                    started when CBQ has
188                                                    backlog, but cannot
189                                                    transmit just now */
190         long                    wd_expires;
191         int                     toplevel;
192         u32                     hgenerator;
193 };
194
195
196 #define L2T(cl,len)     ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
197
198
199 static __inline__ unsigned cbq_hash(u32 h)
200 {
201         h ^= h>>8;
202         h ^= h>>4;
203         return h&0xF;
204 }
205
206 static __inline__ struct cbq_class *
207 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
208 {
209         struct cbq_class *cl;
210
211         for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
212                 if (cl->classid == classid)
213                         return cl;
214         return NULL;
215 }
216
217 #ifdef CONFIG_NET_CLS_POLICE
218
219 static struct cbq_class *
220 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
221 {
222         struct cbq_class *cl, *new;
223
224         for (cl = this->tparent; cl; cl = cl->tparent)
225                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
226                         return new;
227
228         return NULL;
229 }
230
231 #endif
232
233 /* Classify packet. The procedure is pretty complicated, but
234    it allows us to combine link sharing and priority scheduling
235    transparently.
236
237    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
238    so that it resolves to split nodes. Then packets are classified
239    by logical priority, or a more specific classifier may be attached
240    to the split node.
241  */
242
243 static struct cbq_class *
244 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
245 {
246         struct cbq_sched_data *q = qdisc_priv(sch);
247         struct cbq_class *head = &q->link;
248         struct cbq_class **defmap;
249         struct cbq_class *cl = NULL;
250         u32 prio = skb->priority;
251         struct tcf_result res;
252
253         /*
254          *  Step 1. If skb->priority points to one of our classes, use it.
255          */
256         if (TC_H_MAJ(prio^sch->handle) == 0 &&
257             (cl = cbq_class_lookup(q, prio)) != NULL)
258                 return cl;
259
260         *qerr = NET_XMIT_DROP;
261         for (;;) {
262                 int result = 0;
263                 defmap = head->defaults;
264
265                 /*
266                  * Step 2+n. Apply classifier.
267                  */
268                 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
269                         goto fallback;
270
271                 if ((cl = (void*)res.class) == NULL) {
272                         if (TC_H_MAJ(res.classid))
273                                 cl = cbq_class_lookup(q, res.classid);
274                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
275                                 cl = defmap[TC_PRIO_BESTEFFORT];
276
277                         if (cl == NULL || cl->level >= head->level)
278                                 goto fallback;
279                 }
280
281 #ifdef CONFIG_NET_CLS_ACT
282                 switch (result) {
283                 case TC_ACT_QUEUED:
284                 case TC_ACT_STOLEN: 
285                         *qerr = NET_XMIT_SUCCESS;
286                 case TC_ACT_SHOT:
287                         return NULL;
288                 }
289 #elif defined(CONFIG_NET_CLS_POLICE)
290                 switch (result) {
291                 case TC_POLICE_RECLASSIFY:
292                         return cbq_reclassify(skb, cl);
293                 case TC_POLICE_SHOT:
294                         return NULL;
295                 default:
296                         break;
297                 }
298 #endif
299                 if (cl->level == 0)
300                         return cl;
301
302                 /*
303                  * Step 3+n. If classifier selected a link sharing class,
304                  *         apply agency specific classifier.
305                  *         Repeat this procdure until we hit a leaf node.
306                  */
307                 head = cl;
308         }
309
310 fallback:
311         cl = head;
312
313         /*
314          * Step 4. No success...
315          */
316         if (TC_H_MAJ(prio) == 0 &&
317             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
318             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
319                 return head;
320
321         return cl;
322 }
323
324 /*
325    A packet has just been enqueued on the empty class.
326    cbq_activate_class adds it to the tail of active class list
327    of its priority band.
328  */
329
330 static __inline__ void cbq_activate_class(struct cbq_class *cl)
331 {
332         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
333         int prio = cl->cpriority;
334         struct cbq_class *cl_tail;
335
336         cl_tail = q->active[prio];
337         q->active[prio] = cl;
338
339         if (cl_tail != NULL) {
340                 cl->next_alive = cl_tail->next_alive;
341                 cl_tail->next_alive = cl;
342         } else {
343                 cl->next_alive = cl;
344                 q->activemask |= (1<<prio);
345         }
346 }
347
348 /*
349    Unlink class from active chain.
350    Note that this same procedure is done directly in cbq_dequeue*
351    during round-robin procedure.
352  */
353
354 static void cbq_deactivate_class(struct cbq_class *this)
355 {
356         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
357         int prio = this->cpriority;
358         struct cbq_class *cl;
359         struct cbq_class *cl_prev = q->active[prio];
360
361         do {
362                 cl = cl_prev->next_alive;
363                 if (cl == this) {
364                         cl_prev->next_alive = cl->next_alive;
365                         cl->next_alive = NULL;
366
367                         if (cl == q->active[prio]) {
368                                 q->active[prio] = cl_prev;
369                                 if (cl == q->active[prio]) {
370                                         q->active[prio] = NULL;
371                                         q->activemask &= ~(1<<prio);
372                                         return;
373                                 }
374                         }
375
376                         cl = cl_prev->next_alive;
377                         return;
378                 }
379         } while ((cl_prev = cl) != q->active[prio]);
380 }
381
382 static void
383 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
384 {
385         int toplevel = q->toplevel;
386
387         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
388                 psched_time_t now;
389                 psched_tdiff_t incr;
390
391                 PSCHED_GET_TIME(now);
392                 incr = PSCHED_TDIFF(now, q->now_rt);
393                 PSCHED_TADD2(q->now, incr, now);
394
395                 do {
396                         if (PSCHED_TLESS(cl->undertime, now)) {
397                                 q->toplevel = cl->level;
398                                 return;
399                         }
400                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
401         }
402 }
403
404 static int
405 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
406 {
407         struct cbq_sched_data *q = qdisc_priv(sch);
408         int len = skb->len;
409         int ret;
410         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
411
412 #ifdef CONFIG_NET_CLS_POLICE
413         q->rx_class = cl;
414 #endif
415         if (cl == NULL) {
416                 if (ret == NET_XMIT_DROP)
417                         sch->qstats.drops++;
418                 kfree_skb(skb);
419                 return ret;
420         }
421
422 #ifdef CONFIG_NET_CLS_POLICE
423         cl->q->__parent = sch;
424 #endif
425         if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
426                 sch->q.qlen++;
427                 sch->bstats.packets++;
428                 sch->bstats.bytes+=len;
429                 cbq_mark_toplevel(q, cl);
430                 if (!cl->next_alive)
431                         cbq_activate_class(cl);
432                 return ret;
433         }
434
435         sch->qstats.drops++;
436         cbq_mark_toplevel(q, cl);
437         cl->qstats.drops++;
438         return ret;
439 }
440
441 static int
442 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
443 {
444         struct cbq_sched_data *q = qdisc_priv(sch);
445         struct cbq_class *cl;
446         int ret;
447
448         if ((cl = q->tx_class) == NULL) {
449                 kfree_skb(skb);
450                 sch->qstats.drops++;
451                 return NET_XMIT_CN;
452         }
453         q->tx_class = NULL;
454
455         cbq_mark_toplevel(q, cl);
456
457 #ifdef CONFIG_NET_CLS_POLICE
458         q->rx_class = cl;
459         cl->q->__parent = sch;
460 #endif
461         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
462                 sch->q.qlen++;
463                 sch->qstats.requeues++;
464                 if (!cl->next_alive)
465                         cbq_activate_class(cl);
466                 return 0;
467         }
468         sch->qstats.drops++;
469         cl->qstats.drops++;
470         return ret;
471 }
472
473 /* Overlimit actions */
474
475 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
476
477 static void cbq_ovl_classic(struct cbq_class *cl)
478 {
479         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
480         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
481
482         if (!cl->delayed) {
483                 delay += cl->offtime;
484
485                 /* 
486                    Class goes to sleep, so that it will have no
487                    chance to work avgidle. Let's forgive it 8)
488
489                    BTW cbq-2.0 has a crap in this
490                    place, apparently they forgot to shift it by cl->ewma_log.
491                  */
492                 if (cl->avgidle < 0)
493                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
494                 if (cl->avgidle < cl->minidle)
495                         cl->avgidle = cl->minidle;
496                 if (delay <= 0)
497                         delay = 1;
498                 PSCHED_TADD2(q->now, delay, cl->undertime);
499
500                 cl->xstats.overactions++;
501                 cl->delayed = 1;
502         }
503         if (q->wd_expires == 0 || q->wd_expires > delay)
504                 q->wd_expires = delay;
505
506         /* Dirty work! We must schedule wakeups based on
507            real available rate, rather than leaf rate,
508            which may be tiny (even zero).
509          */
510         if (q->toplevel == TC_CBQ_MAXLEVEL) {
511                 struct cbq_class *b;
512                 psched_tdiff_t base_delay = q->wd_expires;
513
514                 for (b = cl->borrow; b; b = b->borrow) {
515                         delay = PSCHED_TDIFF(b->undertime, q->now);
516                         if (delay < base_delay) {
517                                 if (delay <= 0)
518                                         delay = 1;
519                                 base_delay = delay;
520                         }
521                 }
522
523                 q->wd_expires = base_delay;
524         }
525 }
526
527 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
528    they go overlimit
529  */
530
531 static void cbq_ovl_rclassic(struct cbq_class *cl)
532 {
533         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
534         struct cbq_class *this = cl;
535
536         do {
537                 if (cl->level > q->toplevel) {
538                         cl = NULL;
539                         break;
540                 }
541         } while ((cl = cl->borrow) != NULL);
542
543         if (cl == NULL)
544                 cl = this;
545         cbq_ovl_classic(cl);
546 }
547
548 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
549
550 static void cbq_ovl_delay(struct cbq_class *cl)
551 {
552         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
553         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
554
555         if (!cl->delayed) {
556                 unsigned long sched = jiffies;
557
558                 delay += cl->offtime;
559                 if (cl->avgidle < 0)
560                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
561                 if (cl->avgidle < cl->minidle)
562                         cl->avgidle = cl->minidle;
563                 PSCHED_TADD2(q->now, delay, cl->undertime);
564
565                 if (delay > 0) {
566                         sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
567                         cl->penalized = sched;
568                         cl->cpriority = TC_CBQ_MAXPRIO;
569                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
570                         if (del_timer(&q->delay_timer) &&
571                             (long)(q->delay_timer.expires - sched) > 0)
572                                 q->delay_timer.expires = sched;
573                         add_timer(&q->delay_timer);
574                         cl->delayed = 1;
575                         cl->xstats.overactions++;
576                         return;
577                 }
578                 delay = 1;
579         }
580         if (q->wd_expires == 0 || q->wd_expires > delay)
581                 q->wd_expires = delay;
582 }
583
584 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
585
586 static void cbq_ovl_lowprio(struct cbq_class *cl)
587 {
588         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
589
590         cl->penalized = jiffies + cl->penalty;
591
592         if (cl->cpriority != cl->priority2) {
593                 cl->cpriority = cl->priority2;
594                 q->pmask |= (1<<cl->cpriority);
595                 cl->xstats.overactions++;
596         }
597         cbq_ovl_classic(cl);
598 }
599
600 /* TC_CBQ_OVL_DROP: penalize class by dropping */
601
602 static void cbq_ovl_drop(struct cbq_class *cl)
603 {
604         if (cl->q->ops->drop)
605                 if (cl->q->ops->drop(cl->q))
606                         cl->qdisc->q.qlen--;
607         cl->xstats.overactions++;
608         cbq_ovl_classic(cl);
609 }
610
611 static void cbq_watchdog(unsigned long arg)
612 {
613         struct Qdisc *sch = (struct Qdisc*)arg;
614
615         sch->flags &= ~TCQ_F_THROTTLED;
616         netif_schedule(sch->dev);
617 }
618
619 static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
620 {
621         struct cbq_class *cl;
622         struct cbq_class *cl_prev = q->active[prio];
623         unsigned long now = jiffies;
624         unsigned long sched = now;
625
626         if (cl_prev == NULL)
627                 return now;
628
629         do {
630                 cl = cl_prev->next_alive;
631                 if ((long)(now - cl->penalized) > 0) {
632                         cl_prev->next_alive = cl->next_alive;
633                         cl->next_alive = NULL;
634                         cl->cpriority = cl->priority;
635                         cl->delayed = 0;
636                         cbq_activate_class(cl);
637
638                         if (cl == q->active[prio]) {
639                                 q->active[prio] = cl_prev;
640                                 if (cl == q->active[prio]) {
641                                         q->active[prio] = NULL;
642                                         return 0;
643                                 }
644                         }
645
646                         cl = cl_prev->next_alive;
647                 } else if ((long)(sched - cl->penalized) > 0)
648                         sched = cl->penalized;
649         } while ((cl_prev = cl) != q->active[prio]);
650
651         return (long)(sched - now);
652 }
653
654 static void cbq_undelay(unsigned long arg)
655 {
656         struct Qdisc *sch = (struct Qdisc*)arg;
657         struct cbq_sched_data *q = qdisc_priv(sch);
658         long delay = 0;
659         unsigned pmask;
660
661         pmask = q->pmask;
662         q->pmask = 0;
663
664         while (pmask) {
665                 int prio = ffz(~pmask);
666                 long tmp;
667
668                 pmask &= ~(1<<prio);
669
670                 tmp = cbq_undelay_prio(q, prio);
671                 if (tmp > 0) {
672                         q->pmask |= 1<<prio;
673                         if (tmp < delay || delay == 0)
674                                 delay = tmp;
675                 }
676         }
677
678         if (delay) {
679                 q->delay_timer.expires = jiffies + delay;
680                 add_timer(&q->delay_timer);
681         }
682
683         sch->flags &= ~TCQ_F_THROTTLED;
684         netif_schedule(sch->dev);
685 }
686
687
688 #ifdef CONFIG_NET_CLS_POLICE
689
690 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
691 {
692         int len = skb->len;
693         struct Qdisc *sch = child->__parent;
694         struct cbq_sched_data *q = qdisc_priv(sch);
695         struct cbq_class *cl = q->rx_class;
696
697         q->rx_class = NULL;
698
699         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
700
701                 cbq_mark_toplevel(q, cl);
702
703                 q->rx_class = cl;
704                 cl->q->__parent = sch;
705
706                 if (cl->q->enqueue(skb, cl->q) == 0) {
707                         sch->q.qlen++;
708                         sch->bstats.packets++;
709                         sch->bstats.bytes+=len;
710                         if (!cl->next_alive)
711                                 cbq_activate_class(cl);
712                         return 0;
713                 }
714                 sch->qstats.drops++;
715                 return 0;
716         }
717
718         sch->qstats.drops++;
719         return -1;
720 }
721 #endif
722
723 /* 
724    It is mission critical procedure.
725
726    We "regenerate" toplevel cutoff, if transmitting class
727    has backlog and it is not regulated. It is not part of
728    original CBQ description, but looks more reasonable.
729    Probably, it is wrong. This question needs further investigation.
730 */
731
732 static __inline__ void
733 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
734                     struct cbq_class *borrowed)
735 {
736         if (cl && q->toplevel >= borrowed->level) {
737                 if (cl->q->q.qlen > 1) {
738                         do {
739                                 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
740                                         q->toplevel = borrowed->level;
741                                         return;
742                                 }
743                         } while ((borrowed=borrowed->borrow) != NULL);
744                 }
745 #if 0   
746         /* It is not necessary now. Uncommenting it
747            will save CPU cycles, but decrease fairness.
748          */
749                 q->toplevel = TC_CBQ_MAXLEVEL;
750 #endif
751         }
752 }
753
754 static void
755 cbq_update(struct cbq_sched_data *q)
756 {
757         struct cbq_class *this = q->tx_class;
758         struct cbq_class *cl = this;
759         int len = q->tx_len;
760
761         q->tx_class = NULL;
762
763         for ( ; cl; cl = cl->share) {
764                 long avgidle = cl->avgidle;
765                 long idle;
766
767                 cl->bstats.packets++;
768                 cl->bstats.bytes += len;
769
770                 /*
771                    (now - last) is total time between packet right edges.
772                    (last_pktlen/rate) is "virtual" busy time, so that
773
774                          idle = (now - last) - last_pktlen/rate
775                  */
776
777                 idle = PSCHED_TDIFF(q->now, cl->last);
778                 if ((unsigned long)idle > 128*1024*1024) {
779                         avgidle = cl->maxidle;
780                 } else {
781                         idle -= L2T(cl, len);
782
783                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
784                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
785                    cl->avgidle == true_avgidle/W,
786                    hence:
787                  */
788                         avgidle += idle - (avgidle>>cl->ewma_log);
789                 }
790
791                 if (avgidle <= 0) {
792                         /* Overlimit or at-limit */
793
794                         if (avgidle < cl->minidle)
795                                 avgidle = cl->minidle;
796
797                         cl->avgidle = avgidle;
798
799                         /* Calculate expected time, when this class
800                            will be allowed to send.
801                            It will occur, when:
802                            (1-W)*true_avgidle + W*delay = 0, i.e.
803                            idle = (1/W - 1)*(-true_avgidle)
804                            or
805                            idle = (1 - W)*(-cl->avgidle);
806                          */
807                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
808
809                         /*
810                            That is not all.
811                            To maintain the rate allocated to the class,
812                            we add to undertime virtual clock,
813                            necessary to complete transmitted packet.
814                            (len/phys_bandwidth has been already passed
815                            to the moment of cbq_update)
816                          */
817
818                         idle -= L2T(&q->link, len);
819                         idle += L2T(cl, len);
820
821                         PSCHED_AUDIT_TDIFF(idle);
822
823                         PSCHED_TADD2(q->now, idle, cl->undertime);
824                 } else {
825                         /* Underlimit */
826
827                         PSCHED_SET_PASTPERFECT(cl->undertime);
828                         if (avgidle > cl->maxidle)
829                                 cl->avgidle = cl->maxidle;
830                         else
831                                 cl->avgidle = avgidle;
832                 }
833                 cl->last = q->now;
834         }
835
836         cbq_update_toplevel(q, this, q->tx_borrowed);
837 }
838
839 static __inline__ struct cbq_class *
840 cbq_under_limit(struct cbq_class *cl)
841 {
842         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
843         struct cbq_class *this_cl = cl;
844
845         if (cl->tparent == NULL)
846                 return cl;
847
848         if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
849             !PSCHED_TLESS(q->now, cl->undertime)) {
850                 cl->delayed = 0;
851                 return cl;
852         }
853
854         do {
855                 /* It is very suspicious place. Now overlimit
856                    action is generated for not bounded classes
857                    only if link is completely congested.
858                    Though it is in agree with ancestor-only paradigm,
859                    it looks very stupid. Particularly,
860                    it means that this chunk of code will either
861                    never be called or result in strong amplification
862                    of burstiness. Dangerous, silly, and, however,
863                    no another solution exists.
864                  */
865                 if ((cl = cl->borrow) == NULL) {
866                         this_cl->qstats.overlimits++;
867                         this_cl->overlimit(this_cl);
868                         return NULL;
869                 }
870                 if (cl->level > q->toplevel)
871                         return NULL;
872         } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
873                  PSCHED_TLESS(q->now, cl->undertime));
874
875         cl->delayed = 0;
876         return cl;
877 }
878
879 static __inline__ struct sk_buff *
880 cbq_dequeue_prio(struct Qdisc *sch, int prio)
881 {
882         struct cbq_sched_data *q = qdisc_priv(sch);
883         struct cbq_class *cl_tail, *cl_prev, *cl;
884         struct sk_buff *skb;
885         int deficit;
886
887         cl_tail = cl_prev = q->active[prio];
888         cl = cl_prev->next_alive;
889
890         do {
891                 deficit = 0;
892
893                 /* Start round */
894                 do {
895                         struct cbq_class *borrow = cl;
896
897                         if (cl->q->q.qlen &&
898                             (borrow = cbq_under_limit(cl)) == NULL)
899                                 goto skip_class;
900
901                         if (cl->deficit <= 0) {
902                                 /* Class exhausted its allotment per
903                                    this round. Switch to the next one.
904                                  */
905                                 deficit = 1;
906                                 cl->deficit += cl->quantum;
907                                 goto next_class;
908                         }
909
910                         skb = cl->q->dequeue(cl->q);
911
912                         /* Class did not give us any skb :-(
913                            It could occur even if cl->q->q.qlen != 0 
914                            f.e. if cl->q == "tbf"
915                          */
916                         if (skb == NULL)
917                                 goto skip_class;
918
919                         cl->deficit -= skb->len;
920                         q->tx_class = cl;
921                         q->tx_borrowed = borrow;
922                         if (borrow != cl) {
923 #ifndef CBQ_XSTATS_BORROWS_BYTES
924                                 borrow->xstats.borrows++;
925                                 cl->xstats.borrows++;
926 #else
927                                 borrow->xstats.borrows += skb->len;
928                                 cl->xstats.borrows += skb->len;
929 #endif
930                         }
931                         q->tx_len = skb->len;
932
933                         if (cl->deficit <= 0) {
934                                 q->active[prio] = cl;
935                                 cl = cl->next_alive;
936                                 cl->deficit += cl->quantum;
937                         }
938                         return skb;
939
940 skip_class:
941                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
942                                 /* Class is empty or penalized.
943                                    Unlink it from active chain.
944                                  */
945                                 cl_prev->next_alive = cl->next_alive;
946                                 cl->next_alive = NULL;
947
948                                 /* Did cl_tail point to it? */
949                                 if (cl == cl_tail) {
950                                         /* Repair it! */
951                                         cl_tail = cl_prev;
952
953                                         /* Was it the last class in this band? */
954                                         if (cl == cl_tail) {
955                                                 /* Kill the band! */
956                                                 q->active[prio] = NULL;
957                                                 q->activemask &= ~(1<<prio);
958                                                 if (cl->q->q.qlen)
959                                                         cbq_activate_class(cl);
960                                                 return NULL;
961                                         }
962
963                                         q->active[prio] = cl_tail;
964                                 }
965                                 if (cl->q->q.qlen)
966                                         cbq_activate_class(cl);
967
968                                 cl = cl_prev;
969                         }
970
971 next_class:
972                         cl_prev = cl;
973                         cl = cl->next_alive;
974                 } while (cl_prev != cl_tail);
975         } while (deficit);
976
977         q->active[prio] = cl_prev;
978
979         return NULL;
980 }
981
982 static __inline__ struct sk_buff *
983 cbq_dequeue_1(struct Qdisc *sch)
984 {
985         struct cbq_sched_data *q = qdisc_priv(sch);
986         struct sk_buff *skb;
987         unsigned activemask;
988
989         activemask = q->activemask&0xFF;
990         while (activemask) {
991                 int prio = ffz(~activemask);
992                 activemask &= ~(1<<prio);
993                 skb = cbq_dequeue_prio(sch, prio);
994                 if (skb)
995                         return skb;
996         }
997         return NULL;
998 }
999
1000 static struct sk_buff *
1001 cbq_dequeue(struct Qdisc *sch)
1002 {
1003         struct sk_buff *skb;
1004         struct cbq_sched_data *q = qdisc_priv(sch);
1005         psched_time_t now;
1006         psched_tdiff_t incr;
1007
1008         PSCHED_GET_TIME(now);
1009         incr = PSCHED_TDIFF(now, q->now_rt);
1010
1011         if (q->tx_class) {
1012                 psched_tdiff_t incr2;
1013                 /* Time integrator. We calculate EOS time
1014                    by adding expected packet transmission time.
1015                    If real time is greater, we warp artificial clock,
1016                    so that:
1017
1018                    cbq_time = max(real_time, work);
1019                  */
1020                 incr2 = L2T(&q->link, q->tx_len);
1021                 PSCHED_TADD(q->now, incr2);
1022                 cbq_update(q);
1023                 if ((incr -= incr2) < 0)
1024                         incr = 0;
1025         }
1026         PSCHED_TADD(q->now, incr);
1027         q->now_rt = now;
1028
1029         for (;;) {
1030                 q->wd_expires = 0;
1031
1032                 skb = cbq_dequeue_1(sch);
1033                 if (skb) {
1034                         sch->q.qlen--;
1035                         sch->flags &= ~TCQ_F_THROTTLED;
1036                         return skb;
1037                 }
1038
1039                 /* All the classes are overlimit.
1040
1041                    It is possible, if:
1042
1043                    1. Scheduler is empty.
1044                    2. Toplevel cutoff inhibited borrowing.
1045                    3. Root class is overlimit.
1046
1047                    Reset 2d and 3d conditions and retry.
1048
1049                    Note, that NS and cbq-2.0 are buggy, peeking
1050                    an arbitrary class is appropriate for ancestor-only
1051                    sharing, but not for toplevel algorithm.
1052
1053                    Our version is better, but slower, because it requires
1054                    two passes, but it is unavoidable with top-level sharing.
1055                 */
1056
1057                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1058                     PSCHED_IS_PASTPERFECT(q->link.undertime))
1059                         break;
1060
1061                 q->toplevel = TC_CBQ_MAXLEVEL;
1062                 PSCHED_SET_PASTPERFECT(q->link.undertime);
1063         }
1064
1065         /* No packets in scheduler or nobody wants to give them to us :-(
1066            Sigh... start watchdog timer in the last case. */
1067
1068         if (sch->q.qlen) {
1069                 sch->qstats.overlimits++;
1070                 if (q->wd_expires) {
1071                         long delay = PSCHED_US2JIFFIE(q->wd_expires);
1072                         if (delay <= 0)
1073                                 delay = 1;
1074                         mod_timer(&q->wd_timer, jiffies + delay);
1075                         sch->flags |= TCQ_F_THROTTLED;
1076                 }
1077         }
1078         return NULL;
1079 }
1080
1081 /* CBQ class maintanance routines */
1082
1083 static void cbq_adjust_levels(struct cbq_class *this)
1084 {
1085         if (this == NULL)
1086                 return;
1087
1088         do {
1089                 int level = 0;
1090                 struct cbq_class *cl;
1091
1092                 if ((cl = this->children) != NULL) {
1093                         do {
1094                                 if (cl->level > level)
1095                                         level = cl->level;
1096                         } while ((cl = cl->sibling) != this->children);
1097                 }
1098                 this->level = level+1;
1099         } while ((this = this->tparent) != NULL);
1100 }
1101
1102 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1103 {
1104         struct cbq_class *cl;
1105         unsigned h;
1106
1107         if (q->quanta[prio] == 0)
1108                 return;
1109
1110         for (h=0; h<16; h++) {
1111                 for (cl = q->classes[h]; cl; cl = cl->next) {
1112                         /* BUGGGG... Beware! This expression suffer of
1113                            arithmetic overflows!
1114                          */
1115                         if (cl->priority == prio) {
1116                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1117                                         q->quanta[prio];
1118                         }
1119                         if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1120                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1121                                 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1122                         }
1123                 }
1124         }
1125 }
1126
1127 static void cbq_sync_defmap(struct cbq_class *cl)
1128 {
1129         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1130         struct cbq_class *split = cl->split;
1131         unsigned h;
1132         int i;
1133
1134         if (split == NULL)
1135                 return;
1136
1137         for (i=0; i<=TC_PRIO_MAX; i++) {
1138                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1139                         split->defaults[i] = NULL;
1140         }
1141
1142         for (i=0; i<=TC_PRIO_MAX; i++) {
1143                 int level = split->level;
1144
1145                 if (split->defaults[i])
1146                         continue;
1147
1148                 for (h=0; h<16; h++) {
1149                         struct cbq_class *c;
1150
1151                         for (c = q->classes[h]; c; c = c->next) {
1152                                 if (c->split == split && c->level < level &&
1153                                     c->defmap&(1<<i)) {
1154                                         split->defaults[i] = c;
1155                                         level = c->level;
1156                                 }
1157                         }
1158                 }
1159         }
1160 }
1161
1162 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1163 {
1164         struct cbq_class *split = NULL;
1165
1166         if (splitid == 0) {
1167                 if ((split = cl->split) == NULL)
1168                         return;
1169                 splitid = split->classid;
1170         }
1171
1172         if (split == NULL || split->classid != splitid) {
1173                 for (split = cl->tparent; split; split = split->tparent)
1174                         if (split->classid == splitid)
1175                                 break;
1176         }
1177
1178         if (split == NULL)
1179                 return;
1180
1181         if (cl->split != split) {
1182                 cl->defmap = 0;
1183                 cbq_sync_defmap(cl);
1184                 cl->split = split;
1185                 cl->defmap = def&mask;
1186         } else
1187                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1188
1189         cbq_sync_defmap(cl);
1190 }
1191
1192 static void cbq_unlink_class(struct cbq_class *this)
1193 {
1194         struct cbq_class *cl, **clp;
1195         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1196
1197         for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1198                 if (cl == this) {
1199                         *clp = cl->next;
1200                         cl->next = NULL;
1201                         break;
1202                 }
1203         }
1204
1205         if (this->tparent) {
1206                 clp=&this->sibling;
1207                 cl = *clp;
1208                 do {
1209                         if (cl == this) {
1210                                 *clp = cl->sibling;
1211                                 break;
1212                         }
1213                         clp = &cl->sibling;
1214                 } while ((cl = *clp) != this->sibling);
1215
1216                 if (this->tparent->children == this) {
1217                         this->tparent->children = this->sibling;
1218                         if (this->sibling == this)
1219                                 this->tparent->children = NULL;
1220                 }
1221         } else {
1222                 BUG_TRAP(this->sibling == this);
1223         }
1224 }
1225
1226 static void cbq_link_class(struct cbq_class *this)
1227 {
1228         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1229         unsigned h = cbq_hash(this->classid);
1230         struct cbq_class *parent = this->tparent;
1231
1232         this->sibling = this;
1233         this->next = q->classes[h];
1234         q->classes[h] = this;
1235
1236         if (parent == NULL)
1237                 return;
1238
1239         if (parent->children == NULL) {
1240                 parent->children = this;
1241         } else {
1242                 this->sibling = parent->children->sibling;
1243                 parent->children->sibling = this;
1244         }
1245 }
1246
1247 static unsigned int cbq_drop(struct Qdisc* sch)
1248 {
1249         struct cbq_sched_data *q = qdisc_priv(sch);
1250         struct cbq_class *cl, *cl_head;
1251         int prio;
1252         unsigned int len;
1253
1254         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1255                 if ((cl_head = q->active[prio]) == NULL)
1256                         continue;
1257
1258                 cl = cl_head;
1259                 do {
1260                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1261                                 sch->q.qlen--;
1262                                 return len;
1263                         }
1264                 } while ((cl = cl->next_alive) != cl_head);
1265         }
1266         return 0;
1267 }
1268
1269 static void
1270 cbq_reset(struct Qdisc* sch)
1271 {
1272         struct cbq_sched_data *q = qdisc_priv(sch);
1273         struct cbq_class *cl;
1274         int prio;
1275         unsigned h;
1276
1277         q->activemask = 0;
1278         q->pmask = 0;
1279         q->tx_class = NULL;
1280         q->tx_borrowed = NULL;
1281         del_timer(&q->wd_timer);
1282         del_timer(&q->delay_timer);
1283         q->toplevel = TC_CBQ_MAXLEVEL;
1284         PSCHED_GET_TIME(q->now);
1285         q->now_rt = q->now;
1286
1287         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1288                 q->active[prio] = NULL;
1289
1290         for (h = 0; h < 16; h++) {
1291                 for (cl = q->classes[h]; cl; cl = cl->next) {
1292                         qdisc_reset(cl->q);
1293
1294                         cl->next_alive = NULL;
1295                         PSCHED_SET_PASTPERFECT(cl->undertime);
1296                         cl->avgidle = cl->maxidle;
1297                         cl->deficit = cl->quantum;
1298                         cl->cpriority = cl->priority;
1299                 }
1300         }
1301         sch->q.qlen = 0;
1302 }
1303
1304
1305 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1306 {
1307         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1308                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1309                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1310         }
1311         if (lss->change&TCF_CBQ_LSS_EWMA)
1312                 cl->ewma_log = lss->ewma_log;
1313         if (lss->change&TCF_CBQ_LSS_AVPKT)
1314                 cl->avpkt = lss->avpkt;
1315         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1316                 cl->minidle = -(long)lss->minidle;
1317         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1318                 cl->maxidle = lss->maxidle;
1319                 cl->avgidle = lss->maxidle;
1320         }
1321         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1322                 cl->offtime = lss->offtime;
1323         return 0;
1324 }
1325
1326 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1327 {
1328         q->nclasses[cl->priority]--;
1329         q->quanta[cl->priority] -= cl->weight;
1330         cbq_normalize_quanta(q, cl->priority);
1331 }
1332
1333 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1334 {
1335         q->nclasses[cl->priority]++;
1336         q->quanta[cl->priority] += cl->weight;
1337         cbq_normalize_quanta(q, cl->priority);
1338 }
1339
1340 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1341 {
1342         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1343
1344         if (wrr->allot)
1345                 cl->allot = wrr->allot;
1346         if (wrr->weight)
1347                 cl->weight = wrr->weight;
1348         if (wrr->priority) {
1349                 cl->priority = wrr->priority-1;
1350                 cl->cpriority = cl->priority;
1351                 if (cl->priority >= cl->priority2)
1352                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1353         }
1354
1355         cbq_addprio(q, cl);
1356         return 0;
1357 }
1358
1359 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1360 {
1361         switch (ovl->strategy) {
1362         case TC_CBQ_OVL_CLASSIC:
1363                 cl->overlimit = cbq_ovl_classic;
1364                 break;
1365         case TC_CBQ_OVL_DELAY:
1366                 cl->overlimit = cbq_ovl_delay;
1367                 break;
1368         case TC_CBQ_OVL_LOWPRIO:
1369                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1370                     ovl->priority2-1 <= cl->priority)
1371                         return -EINVAL;
1372                 cl->priority2 = ovl->priority2-1;
1373                 cl->overlimit = cbq_ovl_lowprio;
1374                 break;
1375         case TC_CBQ_OVL_DROP:
1376                 cl->overlimit = cbq_ovl_drop;
1377                 break;
1378         case TC_CBQ_OVL_RCLASSIC:
1379                 cl->overlimit = cbq_ovl_rclassic;
1380                 break;
1381         default:
1382                 return -EINVAL;
1383         }
1384         cl->penalty = (ovl->penalty*HZ)/1000;
1385         return 0;
1386 }
1387
1388 #ifdef CONFIG_NET_CLS_POLICE
1389 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1390 {
1391         cl->police = p->police;
1392
1393         if (cl->q->handle) {
1394                 if (p->police == TC_POLICE_RECLASSIFY)
1395                         cl->q->reshape_fail = cbq_reshape_fail;
1396                 else
1397                         cl->q->reshape_fail = NULL;
1398         }
1399         return 0;
1400 }
1401 #endif
1402
1403 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1404 {
1405         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1406         return 0;
1407 }
1408
1409 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1410 {
1411         struct cbq_sched_data *q = qdisc_priv(sch);
1412         struct rtattr *tb[TCA_CBQ_MAX];
1413         struct tc_ratespec *r;
1414
1415         if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1416             tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1417             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1418                 return -EINVAL;
1419
1420         if (tb[TCA_CBQ_LSSOPT-1] &&
1421             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1422                 return -EINVAL;
1423
1424         r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1425
1426         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1427                 return -EINVAL;
1428
1429         q->link.refcnt = 1;
1430         q->link.sibling = &q->link;
1431         q->link.classid = sch->handle;
1432         q->link.qdisc = sch;
1433         if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1434                 q->link.q = &noop_qdisc;
1435
1436         q->link.priority = TC_CBQ_MAXPRIO-1;
1437         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1438         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1439         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1440         q->link.overlimit = cbq_ovl_classic;
1441         q->link.allot = psched_mtu(sch->dev);
1442         q->link.quantum = q->link.allot;
1443         q->link.weight = q->link.R_tab->rate.rate;
1444
1445         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1446         q->link.avpkt = q->link.allot/2;
1447         q->link.minidle = -0x7FFFFFFF;
1448         q->link.stats_lock = &sch->dev->queue_lock;
1449
1450         init_timer(&q->wd_timer);
1451         q->wd_timer.data = (unsigned long)sch;
1452         q->wd_timer.function = cbq_watchdog;
1453         init_timer(&q->delay_timer);
1454         q->delay_timer.data = (unsigned long)sch;
1455         q->delay_timer.function = cbq_undelay;
1456         q->toplevel = TC_CBQ_MAXLEVEL;
1457         PSCHED_GET_TIME(q->now);
1458         q->now_rt = q->now;
1459
1460         cbq_link_class(&q->link);
1461
1462         if (tb[TCA_CBQ_LSSOPT-1])
1463                 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1464
1465         cbq_addprio(q, &q->link);
1466         return 0;
1467 }
1468
1469 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1470 {
1471         unsigned char    *b = skb->tail;
1472
1473         RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1474         return skb->len;
1475
1476 rtattr_failure:
1477         skb_trim(skb, b - skb->data);
1478         return -1;
1479 }
1480
1481 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1482 {
1483         unsigned char    *b = skb->tail;
1484         struct tc_cbq_lssopt opt;
1485
1486         opt.flags = 0;
1487         if (cl->borrow == NULL)
1488                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1489         if (cl->share == NULL)
1490                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1491         opt.ewma_log = cl->ewma_log;
1492         opt.level = cl->level;
1493         opt.avpkt = cl->avpkt;
1494         opt.maxidle = cl->maxidle;
1495         opt.minidle = (u32)(-cl->minidle);
1496         opt.offtime = cl->offtime;
1497         opt.change = ~0;
1498         RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1499         return skb->len;
1500
1501 rtattr_failure:
1502         skb_trim(skb, b - skb->data);
1503         return -1;
1504 }
1505
1506 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1507 {
1508         unsigned char    *b = skb->tail;
1509         struct tc_cbq_wrropt opt;
1510
1511         opt.flags = 0;
1512         opt.allot = cl->allot;
1513         opt.priority = cl->priority+1;
1514         opt.cpriority = cl->cpriority+1;
1515         opt.weight = cl->weight;
1516         RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1517         return skb->len;
1518
1519 rtattr_failure:
1520         skb_trim(skb, b - skb->data);
1521         return -1;
1522 }
1523
1524 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1525 {
1526         unsigned char    *b = skb->tail;
1527         struct tc_cbq_ovl opt;
1528
1529         opt.strategy = cl->ovl_strategy;
1530         opt.priority2 = cl->priority2+1;
1531         opt.penalty = (cl->penalty*1000)/HZ;
1532         RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1533         return skb->len;
1534
1535 rtattr_failure:
1536         skb_trim(skb, b - skb->data);
1537         return -1;
1538 }
1539
1540 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1541 {
1542         unsigned char    *b = skb->tail;
1543         struct tc_cbq_fopt opt;
1544
1545         if (cl->split || cl->defmap) {
1546                 opt.split = cl->split ? cl->split->classid : 0;
1547                 opt.defmap = cl->defmap;
1548                 opt.defchange = ~0;
1549                 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1550         }
1551         return skb->len;
1552
1553 rtattr_failure:
1554         skb_trim(skb, b - skb->data);
1555         return -1;
1556 }
1557
1558 #ifdef CONFIG_NET_CLS_POLICE
1559 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1560 {
1561         unsigned char    *b = skb->tail;
1562         struct tc_cbq_police opt;
1563
1564         if (cl->police) {
1565                 opt.police = cl->police;
1566                 opt.__res1 = 0;
1567                 opt.__res2 = 0;
1568                 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1569         }
1570         return skb->len;
1571
1572 rtattr_failure:
1573         skb_trim(skb, b - skb->data);
1574         return -1;
1575 }
1576 #endif
1577
1578 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1579 {
1580         if (cbq_dump_lss(skb, cl) < 0 ||
1581             cbq_dump_rate(skb, cl) < 0 ||
1582             cbq_dump_wrr(skb, cl) < 0 ||
1583             cbq_dump_ovl(skb, cl) < 0 ||
1584 #ifdef CONFIG_NET_CLS_POLICE
1585             cbq_dump_police(skb, cl) < 0 ||
1586 #endif
1587             cbq_dump_fopt(skb, cl) < 0)
1588                 return -1;
1589         return 0;
1590 }
1591
1592 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1593 {
1594         struct cbq_sched_data *q = qdisc_priv(sch);
1595         unsigned char    *b = skb->tail;
1596         struct rtattr *rta;
1597
1598         rta = (struct rtattr*)b;
1599         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1600         if (cbq_dump_attr(skb, &q->link) < 0)
1601                 goto rtattr_failure;
1602         rta->rta_len = skb->tail - b;
1603         return skb->len;
1604
1605 rtattr_failure:
1606         skb_trim(skb, b - skb->data);
1607         return -1;
1608 }
1609
1610 static int
1611 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1612 {
1613         struct cbq_sched_data *q = qdisc_priv(sch);
1614
1615         q->link.xstats.avgidle = q->link.avgidle;
1616         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1617 }
1618
1619 static int
1620 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1621                struct sk_buff *skb, struct tcmsg *tcm)
1622 {
1623         struct cbq_class *cl = (struct cbq_class*)arg;
1624         unsigned char    *b = skb->tail;
1625         struct rtattr *rta;
1626
1627         if (cl->tparent)
1628                 tcm->tcm_parent = cl->tparent->classid;
1629         else
1630                 tcm->tcm_parent = TC_H_ROOT;
1631         tcm->tcm_handle = cl->classid;
1632         tcm->tcm_info = cl->q->handle;
1633
1634         rta = (struct rtattr*)b;
1635         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1636         if (cbq_dump_attr(skb, cl) < 0)
1637                 goto rtattr_failure;
1638         rta->rta_len = skb->tail - b;
1639         return skb->len;
1640
1641 rtattr_failure:
1642         skb_trim(skb, b - skb->data);
1643         return -1;
1644 }
1645
1646 static int
1647 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1648         struct gnet_dump *d)
1649 {
1650         struct cbq_sched_data *q = qdisc_priv(sch);
1651         struct cbq_class *cl = (struct cbq_class*)arg;
1652
1653         cl->qstats.qlen = cl->q->q.qlen;
1654         cl->xstats.avgidle = cl->avgidle;
1655         cl->xstats.undertime = 0;
1656
1657         if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1658                 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1659
1660         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1661 #ifdef CONFIG_NET_ESTIMATOR
1662             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1663 #endif
1664             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1665                 return -1;
1666
1667         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1668 }
1669
1670 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1671                      struct Qdisc **old)
1672 {
1673         struct cbq_class *cl = (struct cbq_class*)arg;
1674
1675         if (cl) {
1676                 if (new == NULL) {
1677                         if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)) == NULL)
1678                                 return -ENOBUFS;
1679                 } else {
1680 #ifdef CONFIG_NET_CLS_POLICE
1681                         if (cl->police == TC_POLICE_RECLASSIFY)
1682                                 new->reshape_fail = cbq_reshape_fail;
1683 #endif
1684                 }
1685                 sch_tree_lock(sch);
1686                 *old = cl->q;
1687                 cl->q = new;
1688                 sch->q.qlen -= (*old)->q.qlen;
1689                 qdisc_reset(*old);
1690                 sch_tree_unlock(sch);
1691
1692                 return 0;
1693         }
1694         return -ENOENT;
1695 }
1696
1697 static struct Qdisc *
1698 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1699 {
1700         struct cbq_class *cl = (struct cbq_class*)arg;
1701
1702         return cl ? cl->q : NULL;
1703 }
1704
1705 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1706 {
1707         struct cbq_sched_data *q = qdisc_priv(sch);
1708         struct cbq_class *cl = cbq_class_lookup(q, classid);
1709
1710         if (cl) {
1711                 cl->refcnt++;
1712                 return (unsigned long)cl;
1713         }
1714         return 0;
1715 }
1716
1717 static void cbq_destroy_filters(struct cbq_class *cl)
1718 {
1719         struct tcf_proto *tp;
1720
1721         while ((tp = cl->filter_list) != NULL) {
1722                 cl->filter_list = tp->next;
1723                 tcf_destroy(tp);
1724         }
1725 }
1726
1727 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1728 {
1729         struct cbq_sched_data *q = qdisc_priv(sch);
1730
1731         BUG_TRAP(!cl->filters);
1732
1733         cbq_destroy_filters(cl);
1734         qdisc_destroy(cl->q);
1735         qdisc_put_rtab(cl->R_tab);
1736 #ifdef CONFIG_NET_ESTIMATOR
1737         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1738 #endif
1739         if (cl != &q->link)
1740                 kfree(cl);
1741 }
1742
1743 static void
1744 cbq_destroy(struct Qdisc* sch)
1745 {
1746         struct cbq_sched_data *q = qdisc_priv(sch);
1747         struct cbq_class *cl;
1748         unsigned h;
1749
1750 #ifdef CONFIG_NET_CLS_POLICE
1751         q->rx_class = NULL;
1752 #endif
1753         /*
1754          * Filters must be destroyed first because we don't destroy the
1755          * classes from root to leafs which means that filters can still
1756          * be bound to classes which have been destroyed already. --TGR '04
1757          */
1758         for (h = 0; h < 16; h++)
1759                 for (cl = q->classes[h]; cl; cl = cl->next)
1760                         cbq_destroy_filters(cl);
1761
1762         for (h = 0; h < 16; h++) {
1763                 struct cbq_class *next;
1764
1765                 for (cl = q->classes[h]; cl; cl = next) {
1766                         next = cl->next;
1767                         cbq_destroy_class(sch, cl);
1768                 }
1769         }
1770 }
1771
1772 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1773 {
1774         struct cbq_class *cl = (struct cbq_class*)arg;
1775
1776         if (--cl->refcnt == 0) {
1777 #ifdef CONFIG_NET_CLS_POLICE
1778                 struct cbq_sched_data *q = qdisc_priv(sch);
1779
1780                 spin_lock_bh(&sch->dev->queue_lock);
1781                 if (q->rx_class == cl)
1782                         q->rx_class = NULL;
1783                 spin_unlock_bh(&sch->dev->queue_lock);
1784 #endif
1785
1786                 cbq_destroy_class(sch, cl);
1787         }
1788 }
1789
1790 static int
1791 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1792                  unsigned long *arg)
1793 {
1794         int err;
1795         struct cbq_sched_data *q = qdisc_priv(sch);
1796         struct cbq_class *cl = (struct cbq_class*)*arg;
1797         struct rtattr *opt = tca[TCA_OPTIONS-1];
1798         struct rtattr *tb[TCA_CBQ_MAX];
1799         struct cbq_class *parent;
1800         struct qdisc_rate_table *rtab = NULL;
1801
1802         if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1803                 return -EINVAL;
1804
1805         if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1806             RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1807                 return -EINVAL;
1808
1809         if (tb[TCA_CBQ_FOPT-1] &&
1810             RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1811                 return -EINVAL;
1812
1813         if (tb[TCA_CBQ_RATE-1] &&
1814             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1815                         return -EINVAL;
1816
1817         if (tb[TCA_CBQ_LSSOPT-1] &&
1818             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1819                         return -EINVAL;
1820
1821         if (tb[TCA_CBQ_WRROPT-1] &&
1822             RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1823                         return -EINVAL;
1824
1825 #ifdef CONFIG_NET_CLS_POLICE
1826         if (tb[TCA_CBQ_POLICE-1] &&
1827             RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1828                         return -EINVAL;
1829 #endif
1830
1831         if (cl) {
1832                 /* Check parent */
1833                 if (parentid) {
1834                         if (cl->tparent && cl->tparent->classid != parentid)
1835                                 return -EINVAL;
1836                         if (!cl->tparent && parentid != TC_H_ROOT)
1837                                 return -EINVAL;
1838                 }
1839
1840                 if (tb[TCA_CBQ_RATE-1]) {
1841                         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1842                         if (rtab == NULL)
1843                                 return -EINVAL;
1844                 }
1845
1846                 /* Change class parameters */
1847                 sch_tree_lock(sch);
1848
1849                 if (cl->next_alive != NULL)
1850                         cbq_deactivate_class(cl);
1851
1852                 if (rtab) {
1853                         rtab = xchg(&cl->R_tab, rtab);
1854                         qdisc_put_rtab(rtab);
1855                 }
1856
1857                 if (tb[TCA_CBQ_LSSOPT-1])
1858                         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1859
1860                 if (tb[TCA_CBQ_WRROPT-1]) {
1861                         cbq_rmprio(q, cl);
1862                         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1863                 }
1864
1865                 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1866                         cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1867
1868 #ifdef CONFIG_NET_CLS_POLICE
1869                 if (tb[TCA_CBQ_POLICE-1])
1870                         cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1871 #endif
1872
1873                 if (tb[TCA_CBQ_FOPT-1])
1874                         cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1875
1876                 if (cl->q->q.qlen)
1877                         cbq_activate_class(cl);
1878
1879                 sch_tree_unlock(sch);
1880
1881 #ifdef CONFIG_NET_ESTIMATOR
1882                 if (tca[TCA_RATE-1])
1883                         gen_replace_estimator(&cl->bstats, &cl->rate_est,
1884                                 cl->stats_lock, tca[TCA_RATE-1]);
1885 #endif
1886                 return 0;
1887         }
1888
1889         if (parentid == TC_H_ROOT)
1890                 return -EINVAL;
1891
1892         if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1893             tb[TCA_CBQ_LSSOPT-1] == NULL)
1894                 return -EINVAL;
1895
1896         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1897         if (rtab == NULL)
1898                 return -EINVAL;
1899
1900         if (classid) {
1901                 err = -EINVAL;
1902                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1903                         goto failure;
1904         } else {
1905                 int i;
1906                 classid = TC_H_MAKE(sch->handle,0x8000);
1907
1908                 for (i=0; i<0x8000; i++) {
1909                         if (++q->hgenerator >= 0x8000)
1910                                 q->hgenerator = 1;
1911                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1912                                 break;
1913                 }
1914                 err = -ENOSR;
1915                 if (i >= 0x8000)
1916                         goto failure;
1917                 classid = classid|q->hgenerator;
1918         }
1919
1920         parent = &q->link;
1921         if (parentid) {
1922                 parent = cbq_class_lookup(q, parentid);
1923                 err = -EINVAL;
1924                 if (parent == NULL)
1925                         goto failure;
1926         }
1927
1928         err = -ENOBUFS;
1929         cl = kmalloc(sizeof(*cl), GFP_KERNEL);
1930         if (cl == NULL)
1931                 goto failure;
1932         memset(cl, 0, sizeof(*cl));
1933         cl->R_tab = rtab;
1934         rtab = NULL;
1935         cl->refcnt = 1;
1936         if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1937                 cl->q = &noop_qdisc;
1938         cl->classid = classid;
1939         cl->tparent = parent;
1940         cl->qdisc = sch;
1941         cl->allot = parent->allot;
1942         cl->quantum = cl->allot;
1943         cl->weight = cl->R_tab->rate.rate;
1944         cl->stats_lock = &sch->dev->queue_lock;
1945
1946         sch_tree_lock(sch);
1947         cbq_link_class(cl);
1948         cl->borrow = cl->tparent;
1949         if (cl->tparent != &q->link)
1950                 cl->share = cl->tparent;
1951         cbq_adjust_levels(parent);
1952         cl->minidle = -0x7FFFFFFF;
1953         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1954         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1955         if (cl->ewma_log==0)
1956                 cl->ewma_log = q->link.ewma_log;
1957         if (cl->maxidle==0)
1958                 cl->maxidle = q->link.maxidle;
1959         if (cl->avpkt==0)
1960                 cl->avpkt = q->link.avpkt;
1961         cl->overlimit = cbq_ovl_classic;
1962         if (tb[TCA_CBQ_OVL_STRATEGY-1])
1963                 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1964 #ifdef CONFIG_NET_CLS_POLICE
1965         if (tb[TCA_CBQ_POLICE-1])
1966                 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1967 #endif
1968         if (tb[TCA_CBQ_FOPT-1])
1969                 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1970         sch_tree_unlock(sch);
1971
1972 #ifdef CONFIG_NET_ESTIMATOR
1973         if (tca[TCA_RATE-1])
1974                 gen_new_estimator(&cl->bstats, &cl->rate_est,
1975                         cl->stats_lock, tca[TCA_RATE-1]);
1976 #endif
1977
1978         *arg = (unsigned long)cl;
1979         return 0;
1980
1981 failure:
1982         qdisc_put_rtab(rtab);
1983         return err;
1984 }
1985
1986 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1987 {
1988         struct cbq_sched_data *q = qdisc_priv(sch);
1989         struct cbq_class *cl = (struct cbq_class*)arg;
1990
1991         if (cl->filters || cl->children || cl == &q->link)
1992                 return -EBUSY;
1993
1994         sch_tree_lock(sch);
1995
1996         if (cl->next_alive)
1997                 cbq_deactivate_class(cl);
1998
1999         if (q->tx_borrowed == cl)
2000                 q->tx_borrowed = q->tx_class;
2001         if (q->tx_class == cl) {
2002                 q->tx_class = NULL;
2003                 q->tx_borrowed = NULL;
2004         }
2005 #ifdef CONFIG_NET_CLS_POLICE
2006         if (q->rx_class == cl)
2007                 q->rx_class = NULL;
2008 #endif
2009
2010         cbq_unlink_class(cl);
2011         cbq_adjust_levels(cl->tparent);
2012         cl->defmap = 0;
2013         cbq_sync_defmap(cl);
2014
2015         cbq_rmprio(q, cl);
2016         sch_tree_unlock(sch);
2017
2018         if (--cl->refcnt == 0)
2019                 cbq_destroy_class(sch, cl);
2020
2021         return 0;
2022 }
2023
2024 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2025 {
2026         struct cbq_sched_data *q = qdisc_priv(sch);
2027         struct cbq_class *cl = (struct cbq_class *)arg;
2028
2029         if (cl == NULL)
2030                 cl = &q->link;
2031
2032         return &cl->filter_list;
2033 }
2034
2035 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2036                                      u32 classid)
2037 {
2038         struct cbq_sched_data *q = qdisc_priv(sch);
2039         struct cbq_class *p = (struct cbq_class*)parent;
2040         struct cbq_class *cl = cbq_class_lookup(q, classid);
2041
2042         if (cl) {
2043                 if (p && p->level <= cl->level)
2044                         return 0;
2045                 cl->filters++;
2046                 return (unsigned long)cl;
2047         }
2048         return 0;
2049 }
2050
2051 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2052 {
2053         struct cbq_class *cl = (struct cbq_class*)arg;
2054
2055         cl->filters--;
2056 }
2057
2058 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2059 {
2060         struct cbq_sched_data *q = qdisc_priv(sch);
2061         unsigned h;
2062
2063         if (arg->stop)
2064                 return;
2065
2066         for (h = 0; h < 16; h++) {
2067                 struct cbq_class *cl;
2068
2069                 for (cl = q->classes[h]; cl; cl = cl->next) {
2070                         if (arg->count < arg->skip) {
2071                                 arg->count++;
2072                                 continue;
2073                         }
2074                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2075                                 arg->stop = 1;
2076                                 return;
2077                         }
2078                         arg->count++;
2079                 }
2080         }
2081 }
2082
2083 static struct Qdisc_class_ops cbq_class_ops = {
2084         .graft          =       cbq_graft,
2085         .leaf           =       cbq_leaf,
2086         .get            =       cbq_get,
2087         .put            =       cbq_put,
2088         .change         =       cbq_change_class,
2089         .delete         =       cbq_delete,
2090         .walk           =       cbq_walk,
2091         .tcf_chain      =       cbq_find_tcf,
2092         .bind_tcf       =       cbq_bind_filter,
2093         .unbind_tcf     =       cbq_unbind_filter,
2094         .dump           =       cbq_dump_class,
2095         .dump_stats     =       cbq_dump_class_stats,
2096 };
2097
2098 static struct Qdisc_ops cbq_qdisc_ops = {
2099         .next           =       NULL,
2100         .cl_ops         =       &cbq_class_ops,
2101         .id             =       "cbq",
2102         .priv_size      =       sizeof(struct cbq_sched_data),
2103         .enqueue        =       cbq_enqueue,
2104         .dequeue        =       cbq_dequeue,
2105         .requeue        =       cbq_requeue,
2106         .drop           =       cbq_drop,
2107         .init           =       cbq_init,
2108         .reset          =       cbq_reset,
2109         .destroy        =       cbq_destroy,
2110         .change         =       NULL,
2111         .dump           =       cbq_dump,
2112         .dump_stats     =       cbq_dump_stats,
2113         .owner          =       THIS_MODULE,
2114 };
2115
2116 static int __init cbq_module_init(void)
2117 {
2118         return register_qdisc(&cbq_qdisc_ops);
2119 }
2120 static void __exit cbq_module_exit(void) 
2121 {
2122         unregister_qdisc(&cbq_qdisc_ops);
2123 }
2124 module_init(cbq_module_init)
2125 module_exit(cbq_module_exit)
2126 MODULE_LICENSE("GPL");