[PKT_SCHED]: GRED: Cleanup equalize flag and add new WRED mode detection
[linux-3.10.git] / net / sched / sch_gred.c
1 /*
2  * net/sched/sch_gred.c Generic Random Early Detection queue.
3  *
4  *
5  *              This program is free software; you can redistribute it and/or
6  *              modify it under the terms of the GNU General Public License
7  *              as published by the Free Software Foundation; either version
8  *              2 of the License, or (at your option) any later version.
9  *
10  * Authors:    J Hadi Salim (hadi@cyberus.ca) 1998-2002
11  *
12  *             991129: -  Bug fix with grio mode
13  *                     - a better sing. AvgQ mode with Grio(WRED)
14  *                     - A finer grained VQ dequeue based on sugestion
15  *                       from Ren Liu
16  *                     - More error checks
17  *
18  *
19  *
20  *  For all the glorious comments look at Alexey's sch_red.c
21  */
22
23 #include <linux/config.h>
24 #include <linux/module.h>
25 #include <asm/uaccess.h>
26 #include <asm/system.h>
27 #include <linux/bitops.h>
28 #include <linux/types.h>
29 #include <linux/kernel.h>
30 #include <linux/sched.h>
31 #include <linux/string.h>
32 #include <linux/mm.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/in.h>
36 #include <linux/errno.h>
37 #include <linux/interrupt.h>
38 #include <linux/if_ether.h>
39 #include <linux/inet.h>
40 #include <linux/netdevice.h>
41 #include <linux/etherdevice.h>
42 #include <linux/notifier.h>
43 #include <net/ip.h>
44 #include <net/route.h>
45 #include <linux/skbuff.h>
46 #include <net/sock.h>
47 #include <net/pkt_sched.h>
48
49 #if 1 /* control */
50 #define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
51 #else
52 #define DPRINTK(format,args...)
53 #endif
54
55 #if 0 /* data */
56 #define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
57 #else
58 #define D2PRINTK(format,args...)
59 #endif
60
61 struct gred_sched_data;
62 struct gred_sched;
63
64 struct gred_sched_data
65 {
66 /* Parameters */
67         u32             limit;          /* HARD maximal queue length    */
68         u32             qth_min;        /* Min average length threshold: A scaled */
69         u32             qth_max;        /* Max average length threshold: A scaled */
70         u32             DP;             /* the drop pramaters */
71         char            Wlog;           /* log(W)               */
72         char            Plog;           /* random number bits   */
73         u32             Scell_max;
74         u32             Rmask;
75         u32             bytesin;        /* bytes seen on virtualQ so far*/
76         u32             packetsin;      /* packets seen on virtualQ so far*/
77         u32             backlog;        /* bytes on the virtualQ */
78         u32             forced; /* packets dropped for exceeding limits */
79         u32             early;  /* packets dropped as a warning */
80         u32             other;  /* packets dropped by invoking drop() */
81         u32             pdrop;  /* packets dropped because we exceeded physical queue limits */
82         char            Scell_log;
83         u8              Stab[256];
84         u8              prio;        /* the prio of this vq */
85
86 /* Variables */
87         unsigned long   qave;           /* Average queue length: A scaled */
88         int             qcount;         /* Packets since last random number generation */
89         u32             qR;             /* Cached random number */
90
91         psched_time_t   qidlestart;     /* Start of idle period */
92 };
93
94 enum {
95         GRED_WRED_MODE = 1,
96 };
97
98 struct gred_sched
99 {
100         struct gred_sched_data *tab[MAX_DPs];
101         unsigned long   flags;
102         u32             DPs;   
103         u32             def; 
104         u8              initd; 
105         u8              grio; 
106 };
107
108 static inline int gred_wred_mode(struct gred_sched *table)
109 {
110         return test_bit(GRED_WRED_MODE, &table->flags);
111 }
112
113 static inline void gred_enable_wred_mode(struct gred_sched *table)
114 {
115         __set_bit(GRED_WRED_MODE, &table->flags);
116 }
117
118 static inline void gred_disable_wred_mode(struct gred_sched *table)
119 {
120         __clear_bit(GRED_WRED_MODE, &table->flags);
121 }
122
123 static inline int gred_wred_mode_check(struct Qdisc *sch)
124 {
125         struct gred_sched *table = qdisc_priv(sch);
126         int i;
127
128         /* Really ugly O(n^2) but shouldn't be necessary too frequent. */
129         for (i = 0; i < table->DPs; i++) {
130                 struct gred_sched_data *q = table->tab[i];
131                 int n;
132
133                 if (q == NULL)
134                         continue;
135
136                 for (n = 0; n < table->DPs; n++)
137                         if (table->tab[n] && table->tab[n] != q &&
138                             table->tab[n]->prio == q->prio)
139                                 return 1;
140         }
141
142         return 0;
143 }
144
145 static int
146 gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
147 {
148         psched_time_t now;
149         struct gred_sched_data *q=NULL;
150         struct gred_sched *t= qdisc_priv(sch);
151         unsigned long   qave=0; 
152         int i=0;
153
154         if (!t->initd && skb_queue_len(&sch->q) < (sch->dev->tx_queue_len ? : 1)) {
155                 D2PRINTK("NO GRED Queues setup yet! Enqueued anyway\n");
156                 goto do_enqueue;
157         }
158
159
160         if ( ((skb->tc_index&0xf) > (t->DPs -1)) || !(q=t->tab[skb->tc_index&0xf])) {
161                 printk("GRED: setting to default (%d)\n ",t->def);
162                 if (!(q=t->tab[t->def])) {
163                         DPRINTK("GRED: setting to default FAILED! dropping!! "
164                             "(%d)\n ", t->def);
165                         goto drop;
166                 }
167                 /* fix tc_index? --could be controvesial but needed for
168                    requeueing */
169                 skb->tc_index=(skb->tc_index&0xfffffff0) | t->def;
170         }
171
172         D2PRINTK("gred_enqueue virtualQ 0x%x classid %x backlog %d "
173             "general backlog %d\n",skb->tc_index&0xf,sch->handle,q->backlog,
174             sch->qstats.backlog);
175         /* sum up all the qaves of prios <= to ours to get the new qave*/
176         if (!gred_wred_mode(t) && t->grio) {
177                 for (i=0;i<t->DPs;i++) {
178                         if ((!t->tab[i]) || (i==q->DP)) 
179                                 continue; 
180                                 
181                         if ((t->tab[i]->prio < q->prio) && (PSCHED_IS_PASTPERFECT(t->tab[i]->qidlestart)))
182                                 qave +=t->tab[i]->qave;
183                 }
184                         
185         }
186
187         q->packetsin++;
188         q->bytesin+=skb->len;
189
190         if (gred_wred_mode(t)) {
191                 qave=0;
192                 q->qave=t->tab[t->def]->qave;
193                 q->qidlestart=t->tab[t->def]->qidlestart;
194         }
195
196         if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) {
197                 long us_idle;
198                 PSCHED_GET_TIME(now);
199                 us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max);
200                 PSCHED_SET_PASTPERFECT(q->qidlestart);
201
202                 q->qave >>= q->Stab[(us_idle>>q->Scell_log)&0xFF];
203         } else {
204                 if (gred_wred_mode(t)) {
205                         q->qave += sch->qstats.backlog - (q->qave >> q->Wlog);
206                 } else {
207                         q->qave += q->backlog - (q->qave >> q->Wlog);
208                 }
209
210         }
211         
212
213         if (gred_wred_mode(t))
214                 t->tab[t->def]->qave=q->qave;
215
216         if ((q->qave+qave) < q->qth_min) {
217                 q->qcount = -1;
218 enqueue:
219                 if (q->backlog + skb->len <= q->limit) {
220                         q->backlog += skb->len;
221 do_enqueue:
222                         __skb_queue_tail(&sch->q, skb);
223                         sch->qstats.backlog += skb->len;
224                         sch->bstats.bytes += skb->len;
225                         sch->bstats.packets++;
226                         return 0;
227                 } else {
228                         q->pdrop++;
229                 }
230
231 drop:
232                 kfree_skb(skb);
233                 sch->qstats.drops++;
234                 return NET_XMIT_DROP;
235         }
236         if ((q->qave+qave) >= q->qth_max) {
237                 q->qcount = -1;
238                 sch->qstats.overlimits++;
239                 q->forced++;
240                 goto drop;
241         }
242         if (++q->qcount) {
243                 if ((((qave+q->qave) - q->qth_min)>>q->Wlog)*q->qcount < q->qR)
244                         goto enqueue;
245                 q->qcount = 0;
246                 q->qR = net_random()&q->Rmask;
247                 sch->qstats.overlimits++;
248                 q->early++;
249                 goto drop;
250         }
251         q->qR = net_random()&q->Rmask;
252         goto enqueue;
253 }
254
255 static int
256 gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
257 {
258         struct gred_sched_data *q;
259         struct gred_sched *t= qdisc_priv(sch);
260         q= t->tab[(skb->tc_index&0xf)];
261 /* error checking here -- probably unnecessary */
262         PSCHED_SET_PASTPERFECT(q->qidlestart);
263
264         __skb_queue_head(&sch->q, skb);
265         sch->qstats.backlog += skb->len;
266         sch->qstats.requeues++;
267         q->backlog += skb->len;
268         return 0;
269 }
270
271 static struct sk_buff *
272 gred_dequeue(struct Qdisc* sch)
273 {
274         struct sk_buff *skb;
275         struct gred_sched_data *q;
276         struct gred_sched *t= qdisc_priv(sch);
277
278         skb = __skb_dequeue(&sch->q);
279         if (skb) {
280                 sch->qstats.backlog -= skb->len;
281                 q= t->tab[(skb->tc_index&0xf)];
282                 if (q) {
283                         q->backlog -= skb->len;
284                         if (!q->backlog && !gred_wred_mode(t))
285                                 PSCHED_GET_TIME(q->qidlestart);
286                 } else {
287                         D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf); 
288                 }
289                 return skb;
290         }
291
292         if (gred_wred_mode(t)) {
293                         q= t->tab[t->def];
294                         if (!q) 
295                                 D2PRINTK("no default VQ set: Results will be "
296                                        "screwed up\n");
297                         else
298                                 PSCHED_GET_TIME(q->qidlestart);
299         }
300
301         return NULL;
302 }
303
304 static unsigned int gred_drop(struct Qdisc* sch)
305 {
306         struct sk_buff *skb;
307
308         struct gred_sched_data *q;
309         struct gred_sched *t= qdisc_priv(sch);
310
311         skb = __skb_dequeue_tail(&sch->q);
312         if (skb) {
313                 unsigned int len = skb->len;
314                 sch->qstats.backlog -= len;
315                 sch->qstats.drops++;
316                 q= t->tab[(skb->tc_index&0xf)];
317                 if (q) {
318                         q->backlog -= len;
319                         q->other++;
320                         if (!q->backlog && !gred_wred_mode(t))
321                                 PSCHED_GET_TIME(q->qidlestart);
322                 } else {
323                         D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf); 
324                 }
325
326                 kfree_skb(skb);
327                 return len;
328         }
329
330         q=t->tab[t->def];
331         if (!q) {
332                 D2PRINTK("no default VQ set: Results might be screwed up\n");
333                 return 0;
334         }
335
336         PSCHED_GET_TIME(q->qidlestart);
337         return 0;
338
339 }
340
341 static void gred_reset(struct Qdisc* sch)
342 {
343         int i;
344         struct gred_sched_data *q;
345         struct gred_sched *t= qdisc_priv(sch);
346
347         __skb_queue_purge(&sch->q);
348
349         sch->qstats.backlog = 0;
350
351         for (i=0;i<t->DPs;i++) {
352                 q= t->tab[i];
353                 if (!q) 
354                         continue; 
355                 PSCHED_SET_PASTPERFECT(q->qidlestart);
356                 q->qave = 0;
357                 q->qcount = -1;
358                 q->backlog = 0;
359                 q->other=0;
360                 q->forced=0;
361                 q->pdrop=0;
362                 q->early=0;
363         }
364 }
365
366 static int gred_change(struct Qdisc *sch, struct rtattr *opt)
367 {
368         struct gred_sched *table = qdisc_priv(sch);
369         struct gred_sched_data *q;
370         struct tc_gred_qopt *ctl;
371         struct tc_gred_sopt *sopt;
372         struct rtattr *tb[TCA_GRED_STAB];
373         struct rtattr *tb2[TCA_GRED_DPS];
374
375         if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_STAB, opt))
376                 return -EINVAL;
377
378         if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0) {
379                 rtattr_parse_nested(tb2, TCA_GRED_DPS, opt);
380
381             if (tb2[TCA_GRED_DPS-1] == 0) 
382                         return -EINVAL;
383
384                 sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]);
385                 table->DPs=sopt->DPs;   
386                 table->def=sopt->def_DP; 
387
388                 if (sopt->grio) {
389                         table->grio = 1;
390                         gred_disable_wred_mode(table);
391                         if (gred_wred_mode_check(sch))
392                                 gred_enable_wred_mode(table);
393                 } else {
394                         table->grio = 0;
395                         gred_disable_wred_mode(table);
396                 }
397
398                 table->initd=0;
399                 /* probably need to clear all the table DP entries as well */
400                 return 0;
401             }
402
403
404         if (!table->DPs || tb[TCA_GRED_PARMS-1] == 0 || tb[TCA_GRED_STAB-1] == 0 ||
405                 RTA_PAYLOAD(tb[TCA_GRED_PARMS-1]) < sizeof(*ctl) ||
406                 RTA_PAYLOAD(tb[TCA_GRED_STAB-1]) < 256)
407                         return -EINVAL;
408
409         ctl = RTA_DATA(tb[TCA_GRED_PARMS-1]);
410         if (ctl->DP > MAX_DPs-1 ) {
411                 /* misbehaving is punished! Put in the default drop probability */
412                 DPRINTK("\nGRED: DP %u not in  the proper range fixed. New DP "
413                         "set to default at %d\n",ctl->DP,table->def);
414                 ctl->DP=table->def;
415         }
416         
417         if (table->tab[ctl->DP] == NULL) {
418                 table->tab[ctl->DP]=kmalloc(sizeof(struct gred_sched_data),
419                                             GFP_KERNEL);
420                 if (NULL == table->tab[ctl->DP])
421                         return -ENOMEM;
422                 memset(table->tab[ctl->DP], 0, (sizeof(struct gred_sched_data)));
423         }
424         q= table->tab[ctl->DP]; 
425
426         if (table->grio) {
427                 if (ctl->prio <=0) {
428                         if (table->def && table->tab[table->def]) {
429                                 DPRINTK("\nGRED: DP %u does not have a prio"
430                                         "setting default to %d\n",ctl->DP,
431                                         table->tab[table->def]->prio);
432                                 q->prio=table->tab[table->def]->prio;
433                         } else { 
434                                 DPRINTK("\nGRED: DP %u does not have a prio"
435                                         " setting default to 8\n",ctl->DP);
436                                 q->prio=8;
437                         }
438                 } else {
439                         q->prio=ctl->prio;
440                 }
441         } else {
442                 q->prio=8;
443         }
444
445
446         q->DP=ctl->DP;
447         q->Wlog = ctl->Wlog;
448         q->Plog = ctl->Plog;
449         q->limit = ctl->limit;
450         q->Scell_log = ctl->Scell_log;
451         q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;
452         q->Scell_max = (255<<q->Scell_log);
453         q->qth_min = ctl->qth_min<<ctl->Wlog;
454         q->qth_max = ctl->qth_max<<ctl->Wlog;
455         q->qave=0;
456         q->backlog=0;
457         q->qcount = -1;
458         q->other=0;
459         q->forced=0;
460         q->pdrop=0;
461         q->early=0;
462
463         PSCHED_SET_PASTPERFECT(q->qidlestart);
464         memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256);
465
466         if (table->grio) {
467                 gred_disable_wred_mode(table);
468                 if (gred_wred_mode_check(sch))
469                         gred_enable_wred_mode(table);
470         }
471
472         if (!table->initd) {
473                 table->initd=1;
474                 /* 
475                 the first entry also goes into the default until
476                 over-written 
477                 */
478
479                 if (table->tab[table->def] == NULL) {
480                         table->tab[table->def]=
481                                 kmalloc(sizeof(struct gred_sched_data), GFP_KERNEL);
482                         if (NULL == table->tab[table->def])
483                                 return -ENOMEM;
484
485                         memset(table->tab[table->def], 0,
486                                (sizeof(struct gred_sched_data)));
487                 }
488                 q= table->tab[table->def]; 
489                 q->DP=table->def;
490                 q->Wlog = ctl->Wlog;
491                 q->Plog = ctl->Plog;
492                 q->limit = ctl->limit;
493                 q->Scell_log = ctl->Scell_log;
494                 q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;
495                 q->Scell_max = (255<<q->Scell_log);
496                 q->qth_min = ctl->qth_min<<ctl->Wlog;
497                 q->qth_max = ctl->qth_max<<ctl->Wlog;
498
499                 if (table->grio)
500                         q->prio=table->tab[ctl->DP]->prio;
501                 else
502                         q->prio=8;
503
504                 q->qcount = -1;
505                 PSCHED_SET_PASTPERFECT(q->qidlestart);
506                 memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256);
507         }
508         return 0;
509
510 }
511
512 static int gred_init(struct Qdisc *sch, struct rtattr *opt)
513 {
514         struct gred_sched *table = qdisc_priv(sch);
515         struct tc_gred_sopt *sopt;
516         struct rtattr *tb[TCA_GRED_STAB];
517         struct rtattr *tb2[TCA_GRED_DPS];
518
519         if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_STAB, opt))
520                 return -EINVAL;
521
522         if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0) {
523                 rtattr_parse_nested(tb2, TCA_GRED_DPS, opt);
524
525             if (tb2[TCA_GRED_DPS-1] == 0) 
526                         return -EINVAL;
527
528                 sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]);
529                 table->DPs=sopt->DPs;   
530                 table->def=sopt->def_DP; 
531                 table->grio=sopt->grio; 
532                 table->initd=0;
533                 return 0;
534         }
535
536         DPRINTK("\n GRED_INIT error!\n");
537         return -EINVAL;
538 }
539
540 static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
541 {
542         unsigned long qave;
543         struct rtattr *rta;
544         struct tc_gred_qopt *opt = NULL ;
545         struct tc_gred_qopt *dst;
546         struct gred_sched *table = qdisc_priv(sch);
547         struct gred_sched_data *q;
548         int i;
549         unsigned char    *b = skb->tail;
550
551         rta = (struct rtattr*)b;
552         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
553
554         opt=kmalloc(sizeof(struct tc_gred_qopt)*MAX_DPs, GFP_KERNEL);
555
556         if (opt  == NULL) {
557                 DPRINTK("gred_dump:failed to malloc for %Zd\n",
558                     sizeof(struct tc_gred_qopt)*MAX_DPs);
559                 goto rtattr_failure;
560         }
561
562         memset(opt, 0, (sizeof(struct tc_gred_qopt))*table->DPs);
563
564         if (!table->initd) {
565                 DPRINTK("NO GRED Queues setup!\n");
566         }
567
568         for (i=0;i<MAX_DPs;i++) {
569                 dst= &opt[i]; 
570                 q= table->tab[i]; 
571
572                 if (!q) {
573                         /* hack -- fix at some point with proper message
574                            This is how we indicate to tc that there is no VQ
575                            at this DP */
576
577                         dst->DP=MAX_DPs+i;
578                         continue;
579                 }
580
581                 dst->limit=q->limit;
582                 dst->qth_min=q->qth_min>>q->Wlog;
583                 dst->qth_max=q->qth_max>>q->Wlog;
584                 dst->DP=q->DP;
585                 dst->backlog=q->backlog;
586                 if (q->qave) {
587                         if (gred_wred_mode(table)) {
588                                 q->qidlestart=table->tab[table->def]->qidlestart;
589                                 q->qave=table->tab[table->def]->qave;
590                         }
591                         if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) {
592                                 long idle;
593                                 psched_time_t now;
594                                 PSCHED_GET_TIME(now);
595                                 idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max);
596                                 qave  = q->qave >> q->Stab[(idle>>q->Scell_log)&0xFF];
597                                 dst->qave = qave >> q->Wlog;
598
599                         } else {
600                                 dst->qave = q->qave >> q->Wlog;
601                         }
602                 } else {
603                         dst->qave = 0;
604                 }
605                 
606
607                 dst->Wlog = q->Wlog;
608                 dst->Plog = q->Plog;
609                 dst->Scell_log = q->Scell_log;
610                 dst->other = q->other;
611                 dst->forced = q->forced;
612                 dst->early = q->early;
613                 dst->pdrop = q->pdrop;
614                 dst->prio = q->prio;
615                 dst->packets=q->packetsin;
616                 dst->bytesin=q->bytesin;
617         }
618
619         RTA_PUT(skb, TCA_GRED_PARMS, sizeof(struct tc_gred_qopt)*MAX_DPs, opt);
620         rta->rta_len = skb->tail - b;
621
622         kfree(opt);
623         return skb->len;
624
625 rtattr_failure:
626         if (opt)
627                 kfree(opt);
628         DPRINTK("gred_dump: FAILURE!!!!\n");
629
630 /* also free the opt struct here */
631         skb_trim(skb, b - skb->data);
632         return -1;
633 }
634
635 static void gred_destroy(struct Qdisc *sch)
636 {
637         struct gred_sched *table = qdisc_priv(sch);
638         int i;
639
640         for (i = 0;i < table->DPs; i++) {
641                 if (table->tab[i])
642                         kfree(table->tab[i]);
643         }
644 }
645
646 static struct Qdisc_ops gred_qdisc_ops = {
647         .next           =       NULL,
648         .cl_ops         =       NULL,
649         .id             =       "gred",
650         .priv_size      =       sizeof(struct gred_sched),
651         .enqueue        =       gred_enqueue,
652         .dequeue        =       gred_dequeue,
653         .requeue        =       gred_requeue,
654         .drop           =       gred_drop,
655         .init           =       gred_init,
656         .reset          =       gred_reset,
657         .destroy        =       gred_destroy,
658         .change         =       gred_change,
659         .dump           =       gred_dump,
660         .owner          =       THIS_MODULE,
661 };
662
663 static int __init gred_module_init(void)
664 {
665         return register_qdisc(&gred_qdisc_ops);
666 }
667 static void __exit gred_module_exit(void) 
668 {
669         unregister_qdisc(&gred_qdisc_ops);
670 }
671 module_init(gred_module_init)
672 module_exit(gred_module_exit)
673 MODULE_LICENSE("GPL");