[NETLINK]: Use nlmsg_trim() where appropriate
[linux-2.6.git] / net / sched / sch_cbq.c
1 /*
2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  */
12
13 #include <linux/module.h>
14 #include <asm/uaccess.h>
15 #include <asm/system.h>
16 #include <linux/bitops.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/string.h>
20 #include <linux/mm.h>
21 #include <linux/socket.h>
22 #include <linux/sockios.h>
23 #include <linux/in.h>
24 #include <linux/errno.h>
25 #include <linux/interrupt.h>
26 #include <linux/if_ether.h>
27 #include <linux/inet.h>
28 #include <linux/netdevice.h>
29 #include <linux/etherdevice.h>
30 #include <linux/notifier.h>
31 #include <net/ip.h>
32 #include <net/netlink.h>
33 #include <net/route.h>
34 #include <linux/skbuff.h>
35 #include <net/sock.h>
36 #include <net/pkt_sched.h>
37
38
39 /*      Class-Based Queueing (CBQ) algorithm.
40         =======================================
41
42         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
43                  Management Models for Packet Networks",
44                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
45
46                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
47
48                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
49                  Parameters", 1996
50
51                  [4] Sally Floyd and Michael Speer, "Experimental Results
52                  for Class-Based Queueing", 1998, not published.
53
54         -----------------------------------------------------------------------
55
56         Algorithm skeleton was taken from NS simulator cbq.cc.
57         If someone wants to check this code against the LBL version,
58         he should take into account that ONLY the skeleton was borrowed,
59         the implementation is different. Particularly:
60
61         --- The WRR algorithm is different. Our version looks more
62         reasonable (I hope) and works when quanta are allowed to be
63         less than MTU, which is always the case when real time classes
64         have small rates. Note, that the statement of [3] is
65         incomplete, delay may actually be estimated even if class
66         per-round allotment is less than MTU. Namely, if per-round
67         allotment is W*r_i, and r_1+...+r_k = r < 1
68
69         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
70
71         In the worst case we have IntServ estimate with D = W*r+k*MTU
72         and C = MTU*r. The proof (if correct at all) is trivial.
73
74
75         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
76         interpret some places, which look like wrong translations
77         from NS. Anyone is advised to find these differences
78         and explain to me, why I am wrong 8).
79
80         --- Linux has no EOI event, so that we cannot estimate true class
81         idle time. Workaround is to consider the next dequeue event
82         as sign that previous packet is finished. This is wrong because of
83         internal device queueing, but on a permanently loaded link it is true.
84         Moreover, combined with clock integrator, this scheme looks
85         very close to an ideal solution.  */
86
87 struct cbq_sched_data;
88
89
90 struct cbq_class
91 {
92         struct cbq_class        *next;          /* hash table link */
93         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
94
95 /* Parameters */
96         u32                     classid;
97         unsigned char           priority;       /* class priority */
98         unsigned char           priority2;      /* priority to be used after overlimit */
99         unsigned char           ewma_log;       /* time constant for idle time calculation */
100         unsigned char           ovl_strategy;
101 #ifdef CONFIG_NET_CLS_POLICE
102         unsigned char           police;
103 #endif
104
105         u32                     defmap;
106
107         /* Link-sharing scheduler parameters */
108         long                    maxidle;        /* Class parameters: see below. */
109         long                    offtime;
110         long                    minidle;
111         u32                     avpkt;
112         struct qdisc_rate_table *R_tab;
113
114         /* Overlimit strategy parameters */
115         void                    (*overlimit)(struct cbq_class *cl);
116         psched_tdiff_t          penalty;
117
118         /* General scheduler (WRR) parameters */
119         long                    allot;
120         long                    quantum;        /* Allotment per WRR round */
121         long                    weight;         /* Relative allotment: see below */
122
123         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
124         struct cbq_class        *split;         /* Ptr to split node */
125         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
126         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
127         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
128                                                    parent otherwise */
129         struct cbq_class        *sibling;       /* Sibling chain */
130         struct cbq_class        *children;      /* Pointer to children chain */
131
132         struct Qdisc            *q;             /* Elementary queueing discipline */
133
134
135 /* Variables */
136         unsigned char           cpriority;      /* Effective priority */
137         unsigned char           delayed;
138         unsigned char           level;          /* level of the class in hierarchy:
139                                                    0 for leaf classes, and maximal
140                                                    level of children + 1 for nodes.
141                                                  */
142
143         psched_time_t           last;           /* Last end of service */
144         psched_time_t           undertime;
145         long                    avgidle;
146         long                    deficit;        /* Saved deficit for WRR */
147         psched_time_t           penalized;
148         struct gnet_stats_basic bstats;
149         struct gnet_stats_queue qstats;
150         struct gnet_stats_rate_est rate_est;
151         spinlock_t              *stats_lock;
152         struct tc_cbq_xstats    xstats;
153
154         struct tcf_proto        *filter_list;
155
156         int                     refcnt;
157         int                     filters;
158
159         struct cbq_class        *defaults[TC_PRIO_MAX+1];
160 };
161
162 struct cbq_sched_data
163 {
164         struct cbq_class        *classes[16];           /* Hash table of all classes */
165         int                     nclasses[TC_CBQ_MAXPRIO+1];
166         unsigned                quanta[TC_CBQ_MAXPRIO+1];
167
168         struct cbq_class        link;
169
170         unsigned                activemask;
171         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
172                                                                    with backlog */
173
174 #ifdef CONFIG_NET_CLS_POLICE
175         struct cbq_class        *rx_class;
176 #endif
177         struct cbq_class        *tx_class;
178         struct cbq_class        *tx_borrowed;
179         int                     tx_len;
180         psched_time_t           now;            /* Cached timestamp */
181         psched_time_t           now_rt;         /* Cached real time */
182         unsigned                pmask;
183
184         struct hrtimer          delay_timer;
185         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
186                                                    started when CBQ has
187                                                    backlog, but cannot
188                                                    transmit just now */
189         psched_tdiff_t          wd_expires;
190         int                     toplevel;
191         u32                     hgenerator;
192 };
193
194
195 #define L2T(cl,len)     ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
196
197
198 static __inline__ unsigned cbq_hash(u32 h)
199 {
200         h ^= h>>8;
201         h ^= h>>4;
202         return h&0xF;
203 }
204
205 static __inline__ struct cbq_class *
206 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
207 {
208         struct cbq_class *cl;
209
210         for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
211                 if (cl->classid == classid)
212                         return cl;
213         return NULL;
214 }
215
216 #ifdef CONFIG_NET_CLS_POLICE
217
218 static struct cbq_class *
219 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
220 {
221         struct cbq_class *cl, *new;
222
223         for (cl = this->tparent; cl; cl = cl->tparent)
224                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
225                         return new;
226
227         return NULL;
228 }
229
230 #endif
231
232 /* Classify packet. The procedure is pretty complicated, but
233    it allows us to combine link sharing and priority scheduling
234    transparently.
235
236    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
237    so that it resolves to split nodes. Then packets are classified
238    by logical priority, or a more specific classifier may be attached
239    to the split node.
240  */
241
242 static struct cbq_class *
243 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
244 {
245         struct cbq_sched_data *q = qdisc_priv(sch);
246         struct cbq_class *head = &q->link;
247         struct cbq_class **defmap;
248         struct cbq_class *cl = NULL;
249         u32 prio = skb->priority;
250         struct tcf_result res;
251
252         /*
253          *  Step 1. If skb->priority points to one of our classes, use it.
254          */
255         if (TC_H_MAJ(prio^sch->handle) == 0 &&
256             (cl = cbq_class_lookup(q, prio)) != NULL)
257                 return cl;
258
259         *qerr = NET_XMIT_BYPASS;
260         for (;;) {
261                 int result = 0;
262                 defmap = head->defaults;
263
264                 /*
265                  * Step 2+n. Apply classifier.
266                  */
267                 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
268                         goto fallback;
269
270                 if ((cl = (void*)res.class) == NULL) {
271                         if (TC_H_MAJ(res.classid))
272                                 cl = cbq_class_lookup(q, res.classid);
273                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
274                                 cl = defmap[TC_PRIO_BESTEFFORT];
275
276                         if (cl == NULL || cl->level >= head->level)
277                                 goto fallback;
278                 }
279
280 #ifdef CONFIG_NET_CLS_ACT
281                 switch (result) {
282                 case TC_ACT_QUEUED:
283                 case TC_ACT_STOLEN:
284                         *qerr = NET_XMIT_SUCCESS;
285                 case TC_ACT_SHOT:
286                         return NULL;
287                 }
288 #elif defined(CONFIG_NET_CLS_POLICE)
289                 switch (result) {
290                 case TC_POLICE_RECLASSIFY:
291                         return cbq_reclassify(skb, cl);
292                 case TC_POLICE_SHOT:
293                         return NULL;
294                 default:
295                         break;
296                 }
297 #endif
298                 if (cl->level == 0)
299                         return cl;
300
301                 /*
302                  * Step 3+n. If classifier selected a link sharing class,
303                  *         apply agency specific classifier.
304                  *         Repeat this procdure until we hit a leaf node.
305                  */
306                 head = cl;
307         }
308
309 fallback:
310         cl = head;
311
312         /*
313          * Step 4. No success...
314          */
315         if (TC_H_MAJ(prio) == 0 &&
316             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
317             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
318                 return head;
319
320         return cl;
321 }
322
323 /*
324    A packet has just been enqueued on the empty class.
325    cbq_activate_class adds it to the tail of active class list
326    of its priority band.
327  */
328
329 static __inline__ void cbq_activate_class(struct cbq_class *cl)
330 {
331         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
332         int prio = cl->cpriority;
333         struct cbq_class *cl_tail;
334
335         cl_tail = q->active[prio];
336         q->active[prio] = cl;
337
338         if (cl_tail != NULL) {
339                 cl->next_alive = cl_tail->next_alive;
340                 cl_tail->next_alive = cl;
341         } else {
342                 cl->next_alive = cl;
343                 q->activemask |= (1<<prio);
344         }
345 }
346
347 /*
348    Unlink class from active chain.
349    Note that this same procedure is done directly in cbq_dequeue*
350    during round-robin procedure.
351  */
352
353 static void cbq_deactivate_class(struct cbq_class *this)
354 {
355         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
356         int prio = this->cpriority;
357         struct cbq_class *cl;
358         struct cbq_class *cl_prev = q->active[prio];
359
360         do {
361                 cl = cl_prev->next_alive;
362                 if (cl == this) {
363                         cl_prev->next_alive = cl->next_alive;
364                         cl->next_alive = NULL;
365
366                         if (cl == q->active[prio]) {
367                                 q->active[prio] = cl_prev;
368                                 if (cl == q->active[prio]) {
369                                         q->active[prio] = NULL;
370                                         q->activemask &= ~(1<<prio);
371                                         return;
372                                 }
373                         }
374                         return;
375                 }
376         } while ((cl_prev = cl) != q->active[prio]);
377 }
378
379 static void
380 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
381 {
382         int toplevel = q->toplevel;
383
384         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
385                 psched_time_t now;
386                 psched_tdiff_t incr;
387
388                 PSCHED_GET_TIME(now);
389                 incr = PSCHED_TDIFF(now, q->now_rt);
390                 PSCHED_TADD2(q->now, incr, now);
391
392                 do {
393                         if (PSCHED_TLESS(cl->undertime, now)) {
394                                 q->toplevel = cl->level;
395                                 return;
396                         }
397                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
398         }
399 }
400
401 static int
402 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
403 {
404         struct cbq_sched_data *q = qdisc_priv(sch);
405         int len = skb->len;
406         int ret;
407         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
408
409 #ifdef CONFIG_NET_CLS_POLICE
410         q->rx_class = cl;
411 #endif
412         if (cl == NULL) {
413                 if (ret == NET_XMIT_BYPASS)
414                         sch->qstats.drops++;
415                 kfree_skb(skb);
416                 return ret;
417         }
418
419 #ifdef CONFIG_NET_CLS_POLICE
420         cl->q->__parent = sch;
421 #endif
422         if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
423                 sch->q.qlen++;
424                 sch->bstats.packets++;
425                 sch->bstats.bytes+=len;
426                 cbq_mark_toplevel(q, cl);
427                 if (!cl->next_alive)
428                         cbq_activate_class(cl);
429                 return ret;
430         }
431
432         sch->qstats.drops++;
433         cbq_mark_toplevel(q, cl);
434         cl->qstats.drops++;
435         return ret;
436 }
437
438 static int
439 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
440 {
441         struct cbq_sched_data *q = qdisc_priv(sch);
442         struct cbq_class *cl;
443         int ret;
444
445         if ((cl = q->tx_class) == NULL) {
446                 kfree_skb(skb);
447                 sch->qstats.drops++;
448                 return NET_XMIT_CN;
449         }
450         q->tx_class = NULL;
451
452         cbq_mark_toplevel(q, cl);
453
454 #ifdef CONFIG_NET_CLS_POLICE
455         q->rx_class = cl;
456         cl->q->__parent = sch;
457 #endif
458         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
459                 sch->q.qlen++;
460                 sch->qstats.requeues++;
461                 if (!cl->next_alive)
462                         cbq_activate_class(cl);
463                 return 0;
464         }
465         sch->qstats.drops++;
466         cl->qstats.drops++;
467         return ret;
468 }
469
470 /* Overlimit actions */
471
472 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
473
474 static void cbq_ovl_classic(struct cbq_class *cl)
475 {
476         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
477         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
478
479         if (!cl->delayed) {
480                 delay += cl->offtime;
481
482                 /*
483                    Class goes to sleep, so that it will have no
484                    chance to work avgidle. Let's forgive it 8)
485
486                    BTW cbq-2.0 has a crap in this
487                    place, apparently they forgot to shift it by cl->ewma_log.
488                  */
489                 if (cl->avgidle < 0)
490                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
491                 if (cl->avgidle < cl->minidle)
492                         cl->avgidle = cl->minidle;
493                 if (delay <= 0)
494                         delay = 1;
495                 PSCHED_TADD2(q->now, delay, cl->undertime);
496
497                 cl->xstats.overactions++;
498                 cl->delayed = 1;
499         }
500         if (q->wd_expires == 0 || q->wd_expires > delay)
501                 q->wd_expires = delay;
502
503         /* Dirty work! We must schedule wakeups based on
504            real available rate, rather than leaf rate,
505            which may be tiny (even zero).
506          */
507         if (q->toplevel == TC_CBQ_MAXLEVEL) {
508                 struct cbq_class *b;
509                 psched_tdiff_t base_delay = q->wd_expires;
510
511                 for (b = cl->borrow; b; b = b->borrow) {
512                         delay = PSCHED_TDIFF(b->undertime, q->now);
513                         if (delay < base_delay) {
514                                 if (delay <= 0)
515                                         delay = 1;
516                                 base_delay = delay;
517                         }
518                 }
519
520                 q->wd_expires = base_delay;
521         }
522 }
523
524 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
525    they go overlimit
526  */
527
528 static void cbq_ovl_rclassic(struct cbq_class *cl)
529 {
530         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
531         struct cbq_class *this = cl;
532
533         do {
534                 if (cl->level > q->toplevel) {
535                         cl = NULL;
536                         break;
537                 }
538         } while ((cl = cl->borrow) != NULL);
539
540         if (cl == NULL)
541                 cl = this;
542         cbq_ovl_classic(cl);
543 }
544
545 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
546
547 static void cbq_ovl_delay(struct cbq_class *cl)
548 {
549         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
550         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
551
552         if (!cl->delayed) {
553                 psched_time_t sched = q->now;
554                 ktime_t expires;
555
556                 delay += cl->offtime;
557                 if (cl->avgidle < 0)
558                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
559                 if (cl->avgidle < cl->minidle)
560                         cl->avgidle = cl->minidle;
561                 PSCHED_TADD2(q->now, delay, cl->undertime);
562
563                 if (delay > 0) {
564                         sched += delay + cl->penalty;
565                         cl->penalized = sched;
566                         cl->cpriority = TC_CBQ_MAXPRIO;
567                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
568
569                         expires = ktime_set(0, 0);
570                         expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
571                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
572                             ktime_to_ns(ktime_sub(q->delay_timer.expires,
573                                                   expires)) > 0)
574                                 q->delay_timer.expires = expires;
575                         hrtimer_restart(&q->delay_timer);
576                         cl->delayed = 1;
577                         cl->xstats.overactions++;
578                         return;
579                 }
580                 delay = 1;
581         }
582         if (q->wd_expires == 0 || q->wd_expires > delay)
583                 q->wd_expires = delay;
584 }
585
586 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
587
588 static void cbq_ovl_lowprio(struct cbq_class *cl)
589 {
590         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
591
592         cl->penalized = q->now + cl->penalty;
593
594         if (cl->cpriority != cl->priority2) {
595                 cl->cpriority = cl->priority2;
596                 q->pmask |= (1<<cl->cpriority);
597                 cl->xstats.overactions++;
598         }
599         cbq_ovl_classic(cl);
600 }
601
602 /* TC_CBQ_OVL_DROP: penalize class by dropping */
603
604 static void cbq_ovl_drop(struct cbq_class *cl)
605 {
606         if (cl->q->ops->drop)
607                 if (cl->q->ops->drop(cl->q))
608                         cl->qdisc->q.qlen--;
609         cl->xstats.overactions++;
610         cbq_ovl_classic(cl);
611 }
612
613 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
614                                        psched_time_t now)
615 {
616         struct cbq_class *cl;
617         struct cbq_class *cl_prev = q->active[prio];
618         psched_time_t sched = now;
619
620         if (cl_prev == NULL)
621                 return 0;
622
623         do {
624                 cl = cl_prev->next_alive;
625                 if (now - cl->penalized > 0) {
626                         cl_prev->next_alive = cl->next_alive;
627                         cl->next_alive = NULL;
628                         cl->cpriority = cl->priority;
629                         cl->delayed = 0;
630                         cbq_activate_class(cl);
631
632                         if (cl == q->active[prio]) {
633                                 q->active[prio] = cl_prev;
634                                 if (cl == q->active[prio]) {
635                                         q->active[prio] = NULL;
636                                         return 0;
637                                 }
638                         }
639
640                         cl = cl_prev->next_alive;
641                 } else if (sched - cl->penalized > 0)
642                         sched = cl->penalized;
643         } while ((cl_prev = cl) != q->active[prio]);
644
645         return sched - now;
646 }
647
648 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
649 {
650         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
651                                                 delay_timer);
652         struct Qdisc *sch = q->watchdog.qdisc;
653         psched_time_t now;
654         psched_tdiff_t delay = 0;
655         unsigned pmask;
656
657         PSCHED_GET_TIME(now);
658
659         pmask = q->pmask;
660         q->pmask = 0;
661
662         while (pmask) {
663                 int prio = ffz(~pmask);
664                 psched_tdiff_t tmp;
665
666                 pmask &= ~(1<<prio);
667
668                 tmp = cbq_undelay_prio(q, prio, now);
669                 if (tmp > 0) {
670                         q->pmask |= 1<<prio;
671                         if (tmp < delay || delay == 0)
672                                 delay = tmp;
673                 }
674         }
675
676         if (delay) {
677                 ktime_t time;
678
679                 time = ktime_set(0, 0);
680                 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
681                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
682         }
683
684         sch->flags &= ~TCQ_F_THROTTLED;
685         netif_schedule(sch->dev);
686         return HRTIMER_NORESTART;
687 }
688
689
690 #ifdef CONFIG_NET_CLS_POLICE
691
692 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
693 {
694         int len = skb->len;
695         struct Qdisc *sch = child->__parent;
696         struct cbq_sched_data *q = qdisc_priv(sch);
697         struct cbq_class *cl = q->rx_class;
698
699         q->rx_class = NULL;
700
701         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
702
703                 cbq_mark_toplevel(q, cl);
704
705                 q->rx_class = cl;
706                 cl->q->__parent = sch;
707
708                 if (cl->q->enqueue(skb, cl->q) == 0) {
709                         sch->q.qlen++;
710                         sch->bstats.packets++;
711                         sch->bstats.bytes+=len;
712                         if (!cl->next_alive)
713                                 cbq_activate_class(cl);
714                         return 0;
715                 }
716                 sch->qstats.drops++;
717                 return 0;
718         }
719
720         sch->qstats.drops++;
721         return -1;
722 }
723 #endif
724
725 /*
726    It is mission critical procedure.
727
728    We "regenerate" toplevel cutoff, if transmitting class
729    has backlog and it is not regulated. It is not part of
730    original CBQ description, but looks more reasonable.
731    Probably, it is wrong. This question needs further investigation.
732 */
733
734 static __inline__ void
735 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
736                     struct cbq_class *borrowed)
737 {
738         if (cl && q->toplevel >= borrowed->level) {
739                 if (cl->q->q.qlen > 1) {
740                         do {
741                                 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
742                                         q->toplevel = borrowed->level;
743                                         return;
744                                 }
745                         } while ((borrowed=borrowed->borrow) != NULL);
746                 }
747 #if 0
748         /* It is not necessary now. Uncommenting it
749            will save CPU cycles, but decrease fairness.
750          */
751                 q->toplevel = TC_CBQ_MAXLEVEL;
752 #endif
753         }
754 }
755
756 static void
757 cbq_update(struct cbq_sched_data *q)
758 {
759         struct cbq_class *this = q->tx_class;
760         struct cbq_class *cl = this;
761         int len = q->tx_len;
762
763         q->tx_class = NULL;
764
765         for ( ; cl; cl = cl->share) {
766                 long avgidle = cl->avgidle;
767                 long idle;
768
769                 cl->bstats.packets++;
770                 cl->bstats.bytes += len;
771
772                 /*
773                    (now - last) is total time between packet right edges.
774                    (last_pktlen/rate) is "virtual" busy time, so that
775
776                          idle = (now - last) - last_pktlen/rate
777                  */
778
779                 idle = PSCHED_TDIFF(q->now, cl->last);
780                 if ((unsigned long)idle > 128*1024*1024) {
781                         avgidle = cl->maxidle;
782                 } else {
783                         idle -= L2T(cl, len);
784
785                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
786                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
787                    cl->avgidle == true_avgidle/W,
788                    hence:
789                  */
790                         avgidle += idle - (avgidle>>cl->ewma_log);
791                 }
792
793                 if (avgidle <= 0) {
794                         /* Overlimit or at-limit */
795
796                         if (avgidle < cl->minidle)
797                                 avgidle = cl->minidle;
798
799                         cl->avgidle = avgidle;
800
801                         /* Calculate expected time, when this class
802                            will be allowed to send.
803                            It will occur, when:
804                            (1-W)*true_avgidle + W*delay = 0, i.e.
805                            idle = (1/W - 1)*(-true_avgidle)
806                            or
807                            idle = (1 - W)*(-cl->avgidle);
808                          */
809                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
810
811                         /*
812                            That is not all.
813                            To maintain the rate allocated to the class,
814                            we add to undertime virtual clock,
815                            necessary to complete transmitted packet.
816                            (len/phys_bandwidth has been already passed
817                            to the moment of cbq_update)
818                          */
819
820                         idle -= L2T(&q->link, len);
821                         idle += L2T(cl, len);
822
823                         PSCHED_AUDIT_TDIFF(idle);
824
825                         PSCHED_TADD2(q->now, idle, cl->undertime);
826                 } else {
827                         /* Underlimit */
828
829                         PSCHED_SET_PASTPERFECT(cl->undertime);
830                         if (avgidle > cl->maxidle)
831                                 cl->avgidle = cl->maxidle;
832                         else
833                                 cl->avgidle = avgidle;
834                 }
835                 cl->last = q->now;
836         }
837
838         cbq_update_toplevel(q, this, q->tx_borrowed);
839 }
840
841 static __inline__ struct cbq_class *
842 cbq_under_limit(struct cbq_class *cl)
843 {
844         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
845         struct cbq_class *this_cl = cl;
846
847         if (cl->tparent == NULL)
848                 return cl;
849
850         if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
851             !PSCHED_TLESS(q->now, cl->undertime)) {
852                 cl->delayed = 0;
853                 return cl;
854         }
855
856         do {
857                 /* It is very suspicious place. Now overlimit
858                    action is generated for not bounded classes
859                    only if link is completely congested.
860                    Though it is in agree with ancestor-only paradigm,
861                    it looks very stupid. Particularly,
862                    it means that this chunk of code will either
863                    never be called or result in strong amplification
864                    of burstiness. Dangerous, silly, and, however,
865                    no another solution exists.
866                  */
867                 if ((cl = cl->borrow) == NULL) {
868                         this_cl->qstats.overlimits++;
869                         this_cl->overlimit(this_cl);
870                         return NULL;
871                 }
872                 if (cl->level > q->toplevel)
873                         return NULL;
874         } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
875                  PSCHED_TLESS(q->now, cl->undertime));
876
877         cl->delayed = 0;
878         return cl;
879 }
880
881 static __inline__ struct sk_buff *
882 cbq_dequeue_prio(struct Qdisc *sch, int prio)
883 {
884         struct cbq_sched_data *q = qdisc_priv(sch);
885         struct cbq_class *cl_tail, *cl_prev, *cl;
886         struct sk_buff *skb;
887         int deficit;
888
889         cl_tail = cl_prev = q->active[prio];
890         cl = cl_prev->next_alive;
891
892         do {
893                 deficit = 0;
894
895                 /* Start round */
896                 do {
897                         struct cbq_class *borrow = cl;
898
899                         if (cl->q->q.qlen &&
900                             (borrow = cbq_under_limit(cl)) == NULL)
901                                 goto skip_class;
902
903                         if (cl->deficit <= 0) {
904                                 /* Class exhausted its allotment per
905                                    this round. Switch to the next one.
906                                  */
907                                 deficit = 1;
908                                 cl->deficit += cl->quantum;
909                                 goto next_class;
910                         }
911
912                         skb = cl->q->dequeue(cl->q);
913
914                         /* Class did not give us any skb :-(
915                            It could occur even if cl->q->q.qlen != 0
916                            f.e. if cl->q == "tbf"
917                          */
918                         if (skb == NULL)
919                                 goto skip_class;
920
921                         cl->deficit -= skb->len;
922                         q->tx_class = cl;
923                         q->tx_borrowed = borrow;
924                         if (borrow != cl) {
925 #ifndef CBQ_XSTATS_BORROWS_BYTES
926                                 borrow->xstats.borrows++;
927                                 cl->xstats.borrows++;
928 #else
929                                 borrow->xstats.borrows += skb->len;
930                                 cl->xstats.borrows += skb->len;
931 #endif
932                         }
933                         q->tx_len = skb->len;
934
935                         if (cl->deficit <= 0) {
936                                 q->active[prio] = cl;
937                                 cl = cl->next_alive;
938                                 cl->deficit += cl->quantum;
939                         }
940                         return skb;
941
942 skip_class:
943                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
944                                 /* Class is empty or penalized.
945                                    Unlink it from active chain.
946                                  */
947                                 cl_prev->next_alive = cl->next_alive;
948                                 cl->next_alive = NULL;
949
950                                 /* Did cl_tail point to it? */
951                                 if (cl == cl_tail) {
952                                         /* Repair it! */
953                                         cl_tail = cl_prev;
954
955                                         /* Was it the last class in this band? */
956                                         if (cl == cl_tail) {
957                                                 /* Kill the band! */
958                                                 q->active[prio] = NULL;
959                                                 q->activemask &= ~(1<<prio);
960                                                 if (cl->q->q.qlen)
961                                                         cbq_activate_class(cl);
962                                                 return NULL;
963                                         }
964
965                                         q->active[prio] = cl_tail;
966                                 }
967                                 if (cl->q->q.qlen)
968                                         cbq_activate_class(cl);
969
970                                 cl = cl_prev;
971                         }
972
973 next_class:
974                         cl_prev = cl;
975                         cl = cl->next_alive;
976                 } while (cl_prev != cl_tail);
977         } while (deficit);
978
979         q->active[prio] = cl_prev;
980
981         return NULL;
982 }
983
984 static __inline__ struct sk_buff *
985 cbq_dequeue_1(struct Qdisc *sch)
986 {
987         struct cbq_sched_data *q = qdisc_priv(sch);
988         struct sk_buff *skb;
989         unsigned activemask;
990
991         activemask = q->activemask&0xFF;
992         while (activemask) {
993                 int prio = ffz(~activemask);
994                 activemask &= ~(1<<prio);
995                 skb = cbq_dequeue_prio(sch, prio);
996                 if (skb)
997                         return skb;
998         }
999         return NULL;
1000 }
1001
1002 static struct sk_buff *
1003 cbq_dequeue(struct Qdisc *sch)
1004 {
1005         struct sk_buff *skb;
1006         struct cbq_sched_data *q = qdisc_priv(sch);
1007         psched_time_t now;
1008         psched_tdiff_t incr;
1009
1010         PSCHED_GET_TIME(now);
1011         incr = PSCHED_TDIFF(now, q->now_rt);
1012
1013         if (q->tx_class) {
1014                 psched_tdiff_t incr2;
1015                 /* Time integrator. We calculate EOS time
1016                    by adding expected packet transmission time.
1017                    If real time is greater, we warp artificial clock,
1018                    so that:
1019
1020                    cbq_time = max(real_time, work);
1021                  */
1022                 incr2 = L2T(&q->link, q->tx_len);
1023                 PSCHED_TADD(q->now, incr2);
1024                 cbq_update(q);
1025                 if ((incr -= incr2) < 0)
1026                         incr = 0;
1027         }
1028         PSCHED_TADD(q->now, incr);
1029         q->now_rt = now;
1030
1031         for (;;) {
1032                 q->wd_expires = 0;
1033
1034                 skb = cbq_dequeue_1(sch);
1035                 if (skb) {
1036                         sch->q.qlen--;
1037                         sch->flags &= ~TCQ_F_THROTTLED;
1038                         return skb;
1039                 }
1040
1041                 /* All the classes are overlimit.
1042
1043                    It is possible, if:
1044
1045                    1. Scheduler is empty.
1046                    2. Toplevel cutoff inhibited borrowing.
1047                    3. Root class is overlimit.
1048
1049                    Reset 2d and 3d conditions and retry.
1050
1051                    Note, that NS and cbq-2.0 are buggy, peeking
1052                    an arbitrary class is appropriate for ancestor-only
1053                    sharing, but not for toplevel algorithm.
1054
1055                    Our version is better, but slower, because it requires
1056                    two passes, but it is unavoidable with top-level sharing.
1057                 */
1058
1059                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1060                     PSCHED_IS_PASTPERFECT(q->link.undertime))
1061                         break;
1062
1063                 q->toplevel = TC_CBQ_MAXLEVEL;
1064                 PSCHED_SET_PASTPERFECT(q->link.undertime);
1065         }
1066
1067         /* No packets in scheduler or nobody wants to give them to us :-(
1068            Sigh... start watchdog timer in the last case. */
1069
1070         if (sch->q.qlen) {
1071                 sch->qstats.overlimits++;
1072                 if (q->wd_expires)
1073                         qdisc_watchdog_schedule(&q->watchdog,
1074                                                 now + q->wd_expires);
1075         }
1076         return NULL;
1077 }
1078
1079 /* CBQ class maintanance routines */
1080
1081 static void cbq_adjust_levels(struct cbq_class *this)
1082 {
1083         if (this == NULL)
1084                 return;
1085
1086         do {
1087                 int level = 0;
1088                 struct cbq_class *cl;
1089
1090                 if ((cl = this->children) != NULL) {
1091                         do {
1092                                 if (cl->level > level)
1093                                         level = cl->level;
1094                         } while ((cl = cl->sibling) != this->children);
1095                 }
1096                 this->level = level+1;
1097         } while ((this = this->tparent) != NULL);
1098 }
1099
1100 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1101 {
1102         struct cbq_class *cl;
1103         unsigned h;
1104
1105         if (q->quanta[prio] == 0)
1106                 return;
1107
1108         for (h=0; h<16; h++) {
1109                 for (cl = q->classes[h]; cl; cl = cl->next) {
1110                         /* BUGGGG... Beware! This expression suffer of
1111                            arithmetic overflows!
1112                          */
1113                         if (cl->priority == prio) {
1114                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1115                                         q->quanta[prio];
1116                         }
1117                         if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1118                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1119                                 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1120                         }
1121                 }
1122         }
1123 }
1124
1125 static void cbq_sync_defmap(struct cbq_class *cl)
1126 {
1127         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1128         struct cbq_class *split = cl->split;
1129         unsigned h;
1130         int i;
1131
1132         if (split == NULL)
1133                 return;
1134
1135         for (i=0; i<=TC_PRIO_MAX; i++) {
1136                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1137                         split->defaults[i] = NULL;
1138         }
1139
1140         for (i=0; i<=TC_PRIO_MAX; i++) {
1141                 int level = split->level;
1142
1143                 if (split->defaults[i])
1144                         continue;
1145
1146                 for (h=0; h<16; h++) {
1147                         struct cbq_class *c;
1148
1149                         for (c = q->classes[h]; c; c = c->next) {
1150                                 if (c->split == split && c->level < level &&
1151                                     c->defmap&(1<<i)) {
1152                                         split->defaults[i] = c;
1153                                         level = c->level;
1154                                 }
1155                         }
1156                 }
1157         }
1158 }
1159
1160 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1161 {
1162         struct cbq_class *split = NULL;
1163
1164         if (splitid == 0) {
1165                 if ((split = cl->split) == NULL)
1166                         return;
1167                 splitid = split->classid;
1168         }
1169
1170         if (split == NULL || split->classid != splitid) {
1171                 for (split = cl->tparent; split; split = split->tparent)
1172                         if (split->classid == splitid)
1173                                 break;
1174         }
1175
1176         if (split == NULL)
1177                 return;
1178
1179         if (cl->split != split) {
1180                 cl->defmap = 0;
1181                 cbq_sync_defmap(cl);
1182                 cl->split = split;
1183                 cl->defmap = def&mask;
1184         } else
1185                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1186
1187         cbq_sync_defmap(cl);
1188 }
1189
1190 static void cbq_unlink_class(struct cbq_class *this)
1191 {
1192         struct cbq_class *cl, **clp;
1193         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1194
1195         for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1196                 if (cl == this) {
1197                         *clp = cl->next;
1198                         cl->next = NULL;
1199                         break;
1200                 }
1201         }
1202
1203         if (this->tparent) {
1204                 clp=&this->sibling;
1205                 cl = *clp;
1206                 do {
1207                         if (cl == this) {
1208                                 *clp = cl->sibling;
1209                                 break;
1210                         }
1211                         clp = &cl->sibling;
1212                 } while ((cl = *clp) != this->sibling);
1213
1214                 if (this->tparent->children == this) {
1215                         this->tparent->children = this->sibling;
1216                         if (this->sibling == this)
1217                                 this->tparent->children = NULL;
1218                 }
1219         } else {
1220                 BUG_TRAP(this->sibling == this);
1221         }
1222 }
1223
1224 static void cbq_link_class(struct cbq_class *this)
1225 {
1226         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1227         unsigned h = cbq_hash(this->classid);
1228         struct cbq_class *parent = this->tparent;
1229
1230         this->sibling = this;
1231         this->next = q->classes[h];
1232         q->classes[h] = this;
1233
1234         if (parent == NULL)
1235                 return;
1236
1237         if (parent->children == NULL) {
1238                 parent->children = this;
1239         } else {
1240                 this->sibling = parent->children->sibling;
1241                 parent->children->sibling = this;
1242         }
1243 }
1244
1245 static unsigned int cbq_drop(struct Qdisc* sch)
1246 {
1247         struct cbq_sched_data *q = qdisc_priv(sch);
1248         struct cbq_class *cl, *cl_head;
1249         int prio;
1250         unsigned int len;
1251
1252         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1253                 if ((cl_head = q->active[prio]) == NULL)
1254                         continue;
1255
1256                 cl = cl_head;
1257                 do {
1258                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1259                                 sch->q.qlen--;
1260                                 if (!cl->q->q.qlen)
1261                                         cbq_deactivate_class(cl);
1262                                 return len;
1263                         }
1264                 } while ((cl = cl->next_alive) != cl_head);
1265         }
1266         return 0;
1267 }
1268
1269 static void
1270 cbq_reset(struct Qdisc* sch)
1271 {
1272         struct cbq_sched_data *q = qdisc_priv(sch);
1273         struct cbq_class *cl;
1274         int prio;
1275         unsigned h;
1276
1277         q->activemask = 0;
1278         q->pmask = 0;
1279         q->tx_class = NULL;
1280         q->tx_borrowed = NULL;
1281         qdisc_watchdog_cancel(&q->watchdog);
1282         hrtimer_cancel(&q->delay_timer);
1283         q->toplevel = TC_CBQ_MAXLEVEL;
1284         PSCHED_GET_TIME(q->now);
1285         q->now_rt = q->now;
1286
1287         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1288                 q->active[prio] = NULL;
1289
1290         for (h = 0; h < 16; h++) {
1291                 for (cl = q->classes[h]; cl; cl = cl->next) {
1292                         qdisc_reset(cl->q);
1293
1294                         cl->next_alive = NULL;
1295                         PSCHED_SET_PASTPERFECT(cl->undertime);
1296                         cl->avgidle = cl->maxidle;
1297                         cl->deficit = cl->quantum;
1298                         cl->cpriority = cl->priority;
1299                 }
1300         }
1301         sch->q.qlen = 0;
1302 }
1303
1304
1305 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1306 {
1307         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1308                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1309                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1310         }
1311         if (lss->change&TCF_CBQ_LSS_EWMA)
1312                 cl->ewma_log = lss->ewma_log;
1313         if (lss->change&TCF_CBQ_LSS_AVPKT)
1314                 cl->avpkt = lss->avpkt;
1315         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1316                 cl->minidle = -(long)lss->minidle;
1317         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1318                 cl->maxidle = lss->maxidle;
1319                 cl->avgidle = lss->maxidle;
1320         }
1321         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1322                 cl->offtime = lss->offtime;
1323         return 0;
1324 }
1325
1326 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1327 {
1328         q->nclasses[cl->priority]--;
1329         q->quanta[cl->priority] -= cl->weight;
1330         cbq_normalize_quanta(q, cl->priority);
1331 }
1332
1333 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1334 {
1335         q->nclasses[cl->priority]++;
1336         q->quanta[cl->priority] += cl->weight;
1337         cbq_normalize_quanta(q, cl->priority);
1338 }
1339
1340 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1341 {
1342         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1343
1344         if (wrr->allot)
1345                 cl->allot = wrr->allot;
1346         if (wrr->weight)
1347                 cl->weight = wrr->weight;
1348         if (wrr->priority) {
1349                 cl->priority = wrr->priority-1;
1350                 cl->cpriority = cl->priority;
1351                 if (cl->priority >= cl->priority2)
1352                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1353         }
1354
1355         cbq_addprio(q, cl);
1356         return 0;
1357 }
1358
1359 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1360 {
1361         switch (ovl->strategy) {
1362         case TC_CBQ_OVL_CLASSIC:
1363                 cl->overlimit = cbq_ovl_classic;
1364                 break;
1365         case TC_CBQ_OVL_DELAY:
1366                 cl->overlimit = cbq_ovl_delay;
1367                 break;
1368         case TC_CBQ_OVL_LOWPRIO:
1369                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1370                     ovl->priority2-1 <= cl->priority)
1371                         return -EINVAL;
1372                 cl->priority2 = ovl->priority2-1;
1373                 cl->overlimit = cbq_ovl_lowprio;
1374                 break;
1375         case TC_CBQ_OVL_DROP:
1376                 cl->overlimit = cbq_ovl_drop;
1377                 break;
1378         case TC_CBQ_OVL_RCLASSIC:
1379                 cl->overlimit = cbq_ovl_rclassic;
1380                 break;
1381         default:
1382                 return -EINVAL;
1383         }
1384         cl->penalty = ovl->penalty;
1385         return 0;
1386 }
1387
1388 #ifdef CONFIG_NET_CLS_POLICE
1389 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1390 {
1391         cl->police = p->police;
1392
1393         if (cl->q->handle) {
1394                 if (p->police == TC_POLICE_RECLASSIFY)
1395                         cl->q->reshape_fail = cbq_reshape_fail;
1396                 else
1397                         cl->q->reshape_fail = NULL;
1398         }
1399         return 0;
1400 }
1401 #endif
1402
1403 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1404 {
1405         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1406         return 0;
1407 }
1408
1409 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1410 {
1411         struct cbq_sched_data *q = qdisc_priv(sch);
1412         struct rtattr *tb[TCA_CBQ_MAX];
1413         struct tc_ratespec *r;
1414
1415         if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1416             tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1417             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1418                 return -EINVAL;
1419
1420         if (tb[TCA_CBQ_LSSOPT-1] &&
1421             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1422                 return -EINVAL;
1423
1424         r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1425
1426         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1427                 return -EINVAL;
1428
1429         q->link.refcnt = 1;
1430         q->link.sibling = &q->link;
1431         q->link.classid = sch->handle;
1432         q->link.qdisc = sch;
1433         if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1434                                             sch->handle)))
1435                 q->link.q = &noop_qdisc;
1436
1437         q->link.priority = TC_CBQ_MAXPRIO-1;
1438         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1439         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1440         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1441         q->link.overlimit = cbq_ovl_classic;
1442         q->link.allot = psched_mtu(sch->dev);
1443         q->link.quantum = q->link.allot;
1444         q->link.weight = q->link.R_tab->rate.rate;
1445
1446         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1447         q->link.avpkt = q->link.allot/2;
1448         q->link.minidle = -0x7FFFFFFF;
1449         q->link.stats_lock = &sch->dev->queue_lock;
1450
1451         qdisc_watchdog_init(&q->watchdog, sch);
1452         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1453         q->delay_timer.function = cbq_undelay;
1454         q->toplevel = TC_CBQ_MAXLEVEL;
1455         PSCHED_GET_TIME(q->now);
1456         q->now_rt = q->now;
1457
1458         cbq_link_class(&q->link);
1459
1460         if (tb[TCA_CBQ_LSSOPT-1])
1461                 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1462
1463         cbq_addprio(q, &q->link);
1464         return 0;
1465 }
1466
1467 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1468 {
1469         unsigned char *b = skb_tail_pointer(skb);
1470
1471         RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1472         return skb->len;
1473
1474 rtattr_failure:
1475         nlmsg_trim(skb, b);
1476         return -1;
1477 }
1478
1479 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1480 {
1481         unsigned char *b = skb_tail_pointer(skb);
1482         struct tc_cbq_lssopt opt;
1483
1484         opt.flags = 0;
1485         if (cl->borrow == NULL)
1486                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1487         if (cl->share == NULL)
1488                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1489         opt.ewma_log = cl->ewma_log;
1490         opt.level = cl->level;
1491         opt.avpkt = cl->avpkt;
1492         opt.maxidle = cl->maxidle;
1493         opt.minidle = (u32)(-cl->minidle);
1494         opt.offtime = cl->offtime;
1495         opt.change = ~0;
1496         RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1497         return skb->len;
1498
1499 rtattr_failure:
1500         nlmsg_trim(skb, b);
1501         return -1;
1502 }
1503
1504 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1505 {
1506         unsigned char *b = skb_tail_pointer(skb);
1507         struct tc_cbq_wrropt opt;
1508
1509         opt.flags = 0;
1510         opt.allot = cl->allot;
1511         opt.priority = cl->priority+1;
1512         opt.cpriority = cl->cpriority+1;
1513         opt.weight = cl->weight;
1514         RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1515         return skb->len;
1516
1517 rtattr_failure:
1518         nlmsg_trim(skb, b);
1519         return -1;
1520 }
1521
1522 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1523 {
1524         unsigned char *b = skb_tail_pointer(skb);
1525         struct tc_cbq_ovl opt;
1526
1527         opt.strategy = cl->ovl_strategy;
1528         opt.priority2 = cl->priority2+1;
1529         opt.pad = 0;
1530         opt.penalty = cl->penalty;
1531         RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1532         return skb->len;
1533
1534 rtattr_failure:
1535         nlmsg_trim(skb, b);
1536         return -1;
1537 }
1538
1539 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1540 {
1541         unsigned char *b = skb_tail_pointer(skb);
1542         struct tc_cbq_fopt opt;
1543
1544         if (cl->split || cl->defmap) {
1545                 opt.split = cl->split ? cl->split->classid : 0;
1546                 opt.defmap = cl->defmap;
1547                 opt.defchange = ~0;
1548                 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1549         }
1550         return skb->len;
1551
1552 rtattr_failure:
1553         nlmsg_trim(skb, b);
1554         return -1;
1555 }
1556
1557 #ifdef CONFIG_NET_CLS_POLICE
1558 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1559 {
1560         unsigned char *b = skb_tail_pointer(skb);
1561         struct tc_cbq_police opt;
1562
1563         if (cl->police) {
1564                 opt.police = cl->police;
1565                 opt.__res1 = 0;
1566                 opt.__res2 = 0;
1567                 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1568         }
1569         return skb->len;
1570
1571 rtattr_failure:
1572         nlmsg_trim(skb, b);
1573         return -1;
1574 }
1575 #endif
1576
1577 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1578 {
1579         if (cbq_dump_lss(skb, cl) < 0 ||
1580             cbq_dump_rate(skb, cl) < 0 ||
1581             cbq_dump_wrr(skb, cl) < 0 ||
1582             cbq_dump_ovl(skb, cl) < 0 ||
1583 #ifdef CONFIG_NET_CLS_POLICE
1584             cbq_dump_police(skb, cl) < 0 ||
1585 #endif
1586             cbq_dump_fopt(skb, cl) < 0)
1587                 return -1;
1588         return 0;
1589 }
1590
1591 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1592 {
1593         struct cbq_sched_data *q = qdisc_priv(sch);
1594         unsigned char *b = skb_tail_pointer(skb);
1595         struct rtattr *rta;
1596
1597         rta = (struct rtattr*)b;
1598         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1599         if (cbq_dump_attr(skb, &q->link) < 0)
1600                 goto rtattr_failure;
1601         rta->rta_len = skb_tail_pointer(skb) - b;
1602         return skb->len;
1603
1604 rtattr_failure:
1605         nlmsg_trim(skb, b);
1606         return -1;
1607 }
1608
1609 static int
1610 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1611 {
1612         struct cbq_sched_data *q = qdisc_priv(sch);
1613
1614         q->link.xstats.avgidle = q->link.avgidle;
1615         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1616 }
1617
1618 static int
1619 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1620                struct sk_buff *skb, struct tcmsg *tcm)
1621 {
1622         struct cbq_class *cl = (struct cbq_class*)arg;
1623         unsigned char *b = skb_tail_pointer(skb);
1624         struct rtattr *rta;
1625
1626         if (cl->tparent)
1627                 tcm->tcm_parent = cl->tparent->classid;
1628         else
1629                 tcm->tcm_parent = TC_H_ROOT;
1630         tcm->tcm_handle = cl->classid;
1631         tcm->tcm_info = cl->q->handle;
1632
1633         rta = (struct rtattr*)b;
1634         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1635         if (cbq_dump_attr(skb, cl) < 0)
1636                 goto rtattr_failure;
1637         rta->rta_len = skb_tail_pointer(skb) - b;
1638         return skb->len;
1639
1640 rtattr_failure:
1641         nlmsg_trim(skb, b);
1642         return -1;
1643 }
1644
1645 static int
1646 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1647         struct gnet_dump *d)
1648 {
1649         struct cbq_sched_data *q = qdisc_priv(sch);
1650         struct cbq_class *cl = (struct cbq_class*)arg;
1651
1652         cl->qstats.qlen = cl->q->q.qlen;
1653         cl->xstats.avgidle = cl->avgidle;
1654         cl->xstats.undertime = 0;
1655
1656         if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1657                 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1658
1659         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1660 #ifdef CONFIG_NET_ESTIMATOR
1661             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1662 #endif
1663             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1664                 return -1;
1665
1666         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1667 }
1668
1669 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1670                      struct Qdisc **old)
1671 {
1672         struct cbq_class *cl = (struct cbq_class*)arg;
1673
1674         if (cl) {
1675                 if (new == NULL) {
1676                         if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1677                                                      cl->classid)) == NULL)
1678                                 return -ENOBUFS;
1679                 } else {
1680 #ifdef CONFIG_NET_CLS_POLICE
1681                         if (cl->police == TC_POLICE_RECLASSIFY)
1682                                 new->reshape_fail = cbq_reshape_fail;
1683 #endif
1684                 }
1685                 sch_tree_lock(sch);
1686                 *old = xchg(&cl->q, new);
1687                 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1688                 qdisc_reset(*old);
1689                 sch_tree_unlock(sch);
1690
1691                 return 0;
1692         }
1693         return -ENOENT;
1694 }
1695
1696 static struct Qdisc *
1697 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1698 {
1699         struct cbq_class *cl = (struct cbq_class*)arg;
1700
1701         return cl ? cl->q : NULL;
1702 }
1703
1704 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1705 {
1706         struct cbq_class *cl = (struct cbq_class *)arg;
1707
1708         if (cl->q->q.qlen == 0)
1709                 cbq_deactivate_class(cl);
1710 }
1711
1712 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1713 {
1714         struct cbq_sched_data *q = qdisc_priv(sch);
1715         struct cbq_class *cl = cbq_class_lookup(q, classid);
1716
1717         if (cl) {
1718                 cl->refcnt++;
1719                 return (unsigned long)cl;
1720         }
1721         return 0;
1722 }
1723
1724 static void cbq_destroy_filters(struct cbq_class *cl)
1725 {
1726         struct tcf_proto *tp;
1727
1728         while ((tp = cl->filter_list) != NULL) {
1729                 cl->filter_list = tp->next;
1730                 tcf_destroy(tp);
1731         }
1732 }
1733
1734 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1735 {
1736         struct cbq_sched_data *q = qdisc_priv(sch);
1737
1738         BUG_TRAP(!cl->filters);
1739
1740         cbq_destroy_filters(cl);
1741         qdisc_destroy(cl->q);
1742         qdisc_put_rtab(cl->R_tab);
1743 #ifdef CONFIG_NET_ESTIMATOR
1744         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1745 #endif
1746         if (cl != &q->link)
1747                 kfree(cl);
1748 }
1749
1750 static void
1751 cbq_destroy(struct Qdisc* sch)
1752 {
1753         struct cbq_sched_data *q = qdisc_priv(sch);
1754         struct cbq_class *cl;
1755         unsigned h;
1756
1757 #ifdef CONFIG_NET_CLS_POLICE
1758         q->rx_class = NULL;
1759 #endif
1760         /*
1761          * Filters must be destroyed first because we don't destroy the
1762          * classes from root to leafs which means that filters can still
1763          * be bound to classes which have been destroyed already. --TGR '04
1764          */
1765         for (h = 0; h < 16; h++)
1766                 for (cl = q->classes[h]; cl; cl = cl->next)
1767                         cbq_destroy_filters(cl);
1768
1769         for (h = 0; h < 16; h++) {
1770                 struct cbq_class *next;
1771
1772                 for (cl = q->classes[h]; cl; cl = next) {
1773                         next = cl->next;
1774                         cbq_destroy_class(sch, cl);
1775                 }
1776         }
1777 }
1778
1779 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1780 {
1781         struct cbq_class *cl = (struct cbq_class*)arg;
1782
1783         if (--cl->refcnt == 0) {
1784 #ifdef CONFIG_NET_CLS_POLICE
1785                 struct cbq_sched_data *q = qdisc_priv(sch);
1786
1787                 spin_lock_bh(&sch->dev->queue_lock);
1788                 if (q->rx_class == cl)
1789                         q->rx_class = NULL;
1790                 spin_unlock_bh(&sch->dev->queue_lock);
1791 #endif
1792
1793                 cbq_destroy_class(sch, cl);
1794         }
1795 }
1796
1797 static int
1798 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1799                  unsigned long *arg)
1800 {
1801         int err;
1802         struct cbq_sched_data *q = qdisc_priv(sch);
1803         struct cbq_class *cl = (struct cbq_class*)*arg;
1804         struct rtattr *opt = tca[TCA_OPTIONS-1];
1805         struct rtattr *tb[TCA_CBQ_MAX];
1806         struct cbq_class *parent;
1807         struct qdisc_rate_table *rtab = NULL;
1808
1809         if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1810                 return -EINVAL;
1811
1812         if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1813             RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1814                 return -EINVAL;
1815
1816         if (tb[TCA_CBQ_FOPT-1] &&
1817             RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1818                 return -EINVAL;
1819
1820         if (tb[TCA_CBQ_RATE-1] &&
1821             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1822                         return -EINVAL;
1823
1824         if (tb[TCA_CBQ_LSSOPT-1] &&
1825             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1826                         return -EINVAL;
1827
1828         if (tb[TCA_CBQ_WRROPT-1] &&
1829             RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1830                         return -EINVAL;
1831
1832 #ifdef CONFIG_NET_CLS_POLICE
1833         if (tb[TCA_CBQ_POLICE-1] &&
1834             RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1835                         return -EINVAL;
1836 #endif
1837
1838         if (cl) {
1839                 /* Check parent */
1840                 if (parentid) {
1841                         if (cl->tparent && cl->tparent->classid != parentid)
1842                                 return -EINVAL;
1843                         if (!cl->tparent && parentid != TC_H_ROOT)
1844                                 return -EINVAL;
1845                 }
1846
1847                 if (tb[TCA_CBQ_RATE-1]) {
1848                         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1849                         if (rtab == NULL)
1850                                 return -EINVAL;
1851                 }
1852
1853                 /* Change class parameters */
1854                 sch_tree_lock(sch);
1855
1856                 if (cl->next_alive != NULL)
1857                         cbq_deactivate_class(cl);
1858
1859                 if (rtab) {
1860                         rtab = xchg(&cl->R_tab, rtab);
1861                         qdisc_put_rtab(rtab);
1862                 }
1863
1864                 if (tb[TCA_CBQ_LSSOPT-1])
1865                         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1866
1867                 if (tb[TCA_CBQ_WRROPT-1]) {
1868                         cbq_rmprio(q, cl);
1869                         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1870                 }
1871
1872                 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1873                         cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1874
1875 #ifdef CONFIG_NET_CLS_POLICE
1876                 if (tb[TCA_CBQ_POLICE-1])
1877                         cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1878 #endif
1879
1880                 if (tb[TCA_CBQ_FOPT-1])
1881                         cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1882
1883                 if (cl->q->q.qlen)
1884                         cbq_activate_class(cl);
1885
1886                 sch_tree_unlock(sch);
1887
1888 #ifdef CONFIG_NET_ESTIMATOR
1889                 if (tca[TCA_RATE-1])
1890                         gen_replace_estimator(&cl->bstats, &cl->rate_est,
1891                                 cl->stats_lock, tca[TCA_RATE-1]);
1892 #endif
1893                 return 0;
1894         }
1895
1896         if (parentid == TC_H_ROOT)
1897                 return -EINVAL;
1898
1899         if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1900             tb[TCA_CBQ_LSSOPT-1] == NULL)
1901                 return -EINVAL;
1902
1903         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1904         if (rtab == NULL)
1905                 return -EINVAL;
1906
1907         if (classid) {
1908                 err = -EINVAL;
1909                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1910                         goto failure;
1911         } else {
1912                 int i;
1913                 classid = TC_H_MAKE(sch->handle,0x8000);
1914
1915                 for (i=0; i<0x8000; i++) {
1916                         if (++q->hgenerator >= 0x8000)
1917                                 q->hgenerator = 1;
1918                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1919                                 break;
1920                 }
1921                 err = -ENOSR;
1922                 if (i >= 0x8000)
1923                         goto failure;
1924                 classid = classid|q->hgenerator;
1925         }
1926
1927         parent = &q->link;
1928         if (parentid) {
1929                 parent = cbq_class_lookup(q, parentid);
1930                 err = -EINVAL;
1931                 if (parent == NULL)
1932                         goto failure;
1933         }
1934
1935         err = -ENOBUFS;
1936         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1937         if (cl == NULL)
1938                 goto failure;
1939         cl->R_tab = rtab;
1940         rtab = NULL;
1941         cl->refcnt = 1;
1942         if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
1943                 cl->q = &noop_qdisc;
1944         cl->classid = classid;
1945         cl->tparent = parent;
1946         cl->qdisc = sch;
1947         cl->allot = parent->allot;
1948         cl->quantum = cl->allot;
1949         cl->weight = cl->R_tab->rate.rate;
1950         cl->stats_lock = &sch->dev->queue_lock;
1951
1952         sch_tree_lock(sch);
1953         cbq_link_class(cl);
1954         cl->borrow = cl->tparent;
1955         if (cl->tparent != &q->link)
1956                 cl->share = cl->tparent;
1957         cbq_adjust_levels(parent);
1958         cl->minidle = -0x7FFFFFFF;
1959         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1960         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1961         if (cl->ewma_log==0)
1962                 cl->ewma_log = q->link.ewma_log;
1963         if (cl->maxidle==0)
1964                 cl->maxidle = q->link.maxidle;
1965         if (cl->avpkt==0)
1966                 cl->avpkt = q->link.avpkt;
1967         cl->overlimit = cbq_ovl_classic;
1968         if (tb[TCA_CBQ_OVL_STRATEGY-1])
1969                 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1970 #ifdef CONFIG_NET_CLS_POLICE
1971         if (tb[TCA_CBQ_POLICE-1])
1972                 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1973 #endif
1974         if (tb[TCA_CBQ_FOPT-1])
1975                 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1976         sch_tree_unlock(sch);
1977
1978 #ifdef CONFIG_NET_ESTIMATOR
1979         if (tca[TCA_RATE-1])
1980                 gen_new_estimator(&cl->bstats, &cl->rate_est,
1981                         cl->stats_lock, tca[TCA_RATE-1]);
1982 #endif
1983
1984         *arg = (unsigned long)cl;
1985         return 0;
1986
1987 failure:
1988         qdisc_put_rtab(rtab);
1989         return err;
1990 }
1991
1992 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1993 {
1994         struct cbq_sched_data *q = qdisc_priv(sch);
1995         struct cbq_class *cl = (struct cbq_class*)arg;
1996         unsigned int qlen;
1997
1998         if (cl->filters || cl->children || cl == &q->link)
1999                 return -EBUSY;
2000
2001         sch_tree_lock(sch);
2002
2003         qlen = cl->q->q.qlen;
2004         qdisc_reset(cl->q);
2005         qdisc_tree_decrease_qlen(cl->q, qlen);
2006
2007         if (cl->next_alive)
2008                 cbq_deactivate_class(cl);
2009
2010         if (q->tx_borrowed == cl)
2011                 q->tx_borrowed = q->tx_class;
2012         if (q->tx_class == cl) {
2013                 q->tx_class = NULL;
2014                 q->tx_borrowed = NULL;
2015         }
2016 #ifdef CONFIG_NET_CLS_POLICE
2017         if (q->rx_class == cl)
2018                 q->rx_class = NULL;
2019 #endif
2020
2021         cbq_unlink_class(cl);
2022         cbq_adjust_levels(cl->tparent);
2023         cl->defmap = 0;
2024         cbq_sync_defmap(cl);
2025
2026         cbq_rmprio(q, cl);
2027         sch_tree_unlock(sch);
2028
2029         if (--cl->refcnt == 0)
2030                 cbq_destroy_class(sch, cl);
2031
2032         return 0;
2033 }
2034
2035 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2036 {
2037         struct cbq_sched_data *q = qdisc_priv(sch);
2038         struct cbq_class *cl = (struct cbq_class *)arg;
2039
2040         if (cl == NULL)
2041                 cl = &q->link;
2042
2043         return &cl->filter_list;
2044 }
2045
2046 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2047                                      u32 classid)
2048 {
2049         struct cbq_sched_data *q = qdisc_priv(sch);
2050         struct cbq_class *p = (struct cbq_class*)parent;
2051         struct cbq_class *cl = cbq_class_lookup(q, classid);
2052
2053         if (cl) {
2054                 if (p && p->level <= cl->level)
2055                         return 0;
2056                 cl->filters++;
2057                 return (unsigned long)cl;
2058         }
2059         return 0;
2060 }
2061
2062 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2063 {
2064         struct cbq_class *cl = (struct cbq_class*)arg;
2065
2066         cl->filters--;
2067 }
2068
2069 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2070 {
2071         struct cbq_sched_data *q = qdisc_priv(sch);
2072         unsigned h;
2073
2074         if (arg->stop)
2075                 return;
2076
2077         for (h = 0; h < 16; h++) {
2078                 struct cbq_class *cl;
2079
2080                 for (cl = q->classes[h]; cl; cl = cl->next) {
2081                         if (arg->count < arg->skip) {
2082                                 arg->count++;
2083                                 continue;
2084                         }
2085                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2086                                 arg->stop = 1;
2087                                 return;
2088                         }
2089                         arg->count++;
2090                 }
2091         }
2092 }
2093
2094 static struct Qdisc_class_ops cbq_class_ops = {
2095         .graft          =       cbq_graft,
2096         .leaf           =       cbq_leaf,
2097         .qlen_notify    =       cbq_qlen_notify,
2098         .get            =       cbq_get,
2099         .put            =       cbq_put,
2100         .change         =       cbq_change_class,
2101         .delete         =       cbq_delete,
2102         .walk           =       cbq_walk,
2103         .tcf_chain      =       cbq_find_tcf,
2104         .bind_tcf       =       cbq_bind_filter,
2105         .unbind_tcf     =       cbq_unbind_filter,
2106         .dump           =       cbq_dump_class,
2107         .dump_stats     =       cbq_dump_class_stats,
2108 };
2109
2110 static struct Qdisc_ops cbq_qdisc_ops = {
2111         .next           =       NULL,
2112         .cl_ops         =       &cbq_class_ops,
2113         .id             =       "cbq",
2114         .priv_size      =       sizeof(struct cbq_sched_data),
2115         .enqueue        =       cbq_enqueue,
2116         .dequeue        =       cbq_dequeue,
2117         .requeue        =       cbq_requeue,
2118         .drop           =       cbq_drop,
2119         .init           =       cbq_init,
2120         .reset          =       cbq_reset,
2121         .destroy        =       cbq_destroy,
2122         .change         =       NULL,
2123         .dump           =       cbq_dump,
2124         .dump_stats     =       cbq_dump_stats,
2125         .owner          =       THIS_MODULE,
2126 };
2127
2128 static int __init cbq_module_init(void)
2129 {
2130         return register_qdisc(&cbq_qdisc_ops);
2131 }
2132 static void __exit cbq_module_exit(void)
2133 {
2134         unregister_qdisc(&cbq_qdisc_ops);
2135 }
2136 module_init(cbq_module_init)
2137 module_exit(cbq_module_exit)
2138 MODULE_LICENSE("GPL");