[PKT_SCHED]: Convert sch_red to a classful qdisc
[linux-2.6.git] / net / sched / sch_red.c
1 /*
2  * net/sched/sch_red.c  Random Early Detection queue.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Changes:
12  * J Hadi Salim 980914: computation fixes
13  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14  * J Hadi Salim 980816:  ECN support
15  */
16
17 #include <linux/config.h>
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/netdevice.h>
22 #include <linux/skbuff.h>
23 #include <net/pkt_sched.h>
24 #include <net/inet_ecn.h>
25 #include <net/red.h>
26
27
28 /*      Parameters, settable by user:
29         -----------------------------
30
31         limit           - bytes (must be > qth_max + burst)
32
33         Hard limit on queue length, should be chosen >qth_max
34         to allow packet bursts. This parameter does not
35         affect the algorithms behaviour and can be chosen
36         arbitrarily high (well, less than ram size)
37         Really, this limit will never be reached
38         if RED works correctly.
39  */
40
41 struct red_sched_data
42 {
43         u32                     limit;          /* HARD maximal queue length */
44         unsigned char           flags;
45         struct red_parms        parms;
46         struct red_stats        stats;
47         struct Qdisc            *qdisc;
48 };
49
50 static inline int red_use_ecn(struct red_sched_data *q)
51 {
52         return q->flags & TC_RED_ECN;
53 }
54
55 static inline int red_use_harddrop(struct red_sched_data *q)
56 {
57         return q->flags & TC_RED_HARDDROP;
58 }
59
60 static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
61 {
62         struct red_sched_data *q = qdisc_priv(sch);
63         struct Qdisc *child = q->qdisc;
64         int ret;
65
66         q->parms.qavg = red_calc_qavg(&q->parms, child->qstats.backlog);
67
68         if (red_is_idling(&q->parms))
69                 red_end_of_idle_period(&q->parms);
70
71         switch (red_action(&q->parms, q->parms.qavg)) {
72                 case RED_DONT_MARK:
73                         break;
74
75                 case RED_PROB_MARK:
76                         sch->qstats.overlimits++;
77                         if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
78                                 q->stats.prob_drop++;
79                                 goto congestion_drop;
80                         }
81
82                         q->stats.prob_mark++;
83                         break;
84
85                 case RED_HARD_MARK:
86                         sch->qstats.overlimits++;
87                         if (red_use_harddrop(q) || !red_use_ecn(q) ||
88                             !INET_ECN_set_ce(skb)) {
89                                 q->stats.forced_drop++;
90                                 goto congestion_drop;
91                         }
92
93                         q->stats.forced_mark++;
94                         break;
95         }
96
97         ret = child->enqueue(skb, child);
98         if (likely(ret == NET_XMIT_SUCCESS)) {
99                 sch->bstats.bytes += skb->len;
100                 sch->bstats.packets++;
101                 sch->q.qlen++;
102         } else {
103                 q->stats.pdrop++;
104                 sch->qstats.drops++;
105         }
106         return ret;
107
108 congestion_drop:
109         qdisc_drop(skb, sch);
110         return NET_XMIT_CN;
111 }
112
113 static int red_requeue(struct sk_buff *skb, struct Qdisc* sch)
114 {
115         struct red_sched_data *q = qdisc_priv(sch);
116         struct Qdisc *child = q->qdisc;
117         int ret;
118
119         if (red_is_idling(&q->parms))
120                 red_end_of_idle_period(&q->parms);
121
122         ret = child->ops->requeue(skb, child);
123         if (likely(ret == NET_XMIT_SUCCESS)) {
124                 sch->qstats.requeues++;
125                 sch->q.qlen++;
126         }
127         return ret;
128 }
129
130 static struct sk_buff * red_dequeue(struct Qdisc* sch)
131 {
132         struct sk_buff *skb;
133         struct red_sched_data *q = qdisc_priv(sch);
134         struct Qdisc *child = q->qdisc;
135
136         skb = child->dequeue(child);
137         if (skb)
138                 sch->q.qlen--;
139         else if (!red_is_idling(&q->parms))
140                 red_start_of_idle_period(&q->parms);
141
142         return skb;
143 }
144
145 static unsigned int red_drop(struct Qdisc* sch)
146 {
147         struct red_sched_data *q = qdisc_priv(sch);
148         struct Qdisc *child = q->qdisc;
149         unsigned int len;
150
151         if (child->ops->drop && (len = child->ops->drop(child)) > 0) {
152                 q->stats.other++;
153                 sch->qstats.drops++;
154                 sch->q.qlen--;
155                 return len;
156         }
157
158         if (!red_is_idling(&q->parms))
159                 red_start_of_idle_period(&q->parms);
160
161         return 0;
162 }
163
164 static void red_reset(struct Qdisc* sch)
165 {
166         struct red_sched_data *q = qdisc_priv(sch);
167
168         qdisc_reset(q->qdisc);
169         sch->q.qlen = 0;
170         red_restart(&q->parms);
171 }
172
173 static void red_destroy(struct Qdisc *sch)
174 {
175         struct red_sched_data *q = qdisc_priv(sch);
176         qdisc_destroy(q->qdisc);
177 }
178
179 static struct Qdisc *red_create_dflt(struct net_device *dev, u32 limit)
180 {
181         struct Qdisc *q = qdisc_create_dflt(dev, &bfifo_qdisc_ops);
182         struct rtattr *rta;
183         int ret;
184
185         if (q) {
186                 rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)),
187                               GFP_KERNEL);
188                 if (rta) {
189                         rta->rta_type = RTM_NEWQDISC;
190                         rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt));
191                         ((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit;
192
193                         ret = q->ops->change(q, rta);
194                         kfree(rta);
195
196                         if (ret == 0)
197                                 return q;
198                 }
199                 qdisc_destroy(q);
200         }
201         return NULL;
202 }
203
204 static int red_change(struct Qdisc *sch, struct rtattr *opt)
205 {
206         struct red_sched_data *q = qdisc_priv(sch);
207         struct rtattr *tb[TCA_RED_MAX];
208         struct tc_red_qopt *ctl;
209         struct Qdisc *child = NULL;
210
211         if (opt == NULL || rtattr_parse_nested(tb, TCA_RED_MAX, opt))
212                 return -EINVAL;
213
214         if (tb[TCA_RED_PARMS-1] == NULL ||
215             RTA_PAYLOAD(tb[TCA_RED_PARMS-1]) < sizeof(*ctl) ||
216             tb[TCA_RED_STAB-1] == NULL ||
217             RTA_PAYLOAD(tb[TCA_RED_STAB-1]) < RED_STAB_SIZE)
218                 return -EINVAL;
219
220         ctl = RTA_DATA(tb[TCA_RED_PARMS-1]);
221
222         if (ctl->limit > 0) {
223                 child = red_create_dflt(sch->dev, ctl->limit);
224                 if (child == NULL)
225                         return -ENOMEM;
226         }
227
228         sch_tree_lock(sch);
229         q->flags = ctl->flags;
230         q->limit = ctl->limit;
231         if (child)
232                 qdisc_destroy(xchg(&q->qdisc, child));
233
234         red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
235                                  ctl->Plog, ctl->Scell_log,
236                                  RTA_DATA(tb[TCA_RED_STAB-1]));
237
238         if (skb_queue_empty(&sch->q))
239                 red_end_of_idle_period(&q->parms);
240
241         sch_tree_unlock(sch);
242         return 0;
243 }
244
245 static int red_init(struct Qdisc* sch, struct rtattr *opt)
246 {
247         struct red_sched_data *q = qdisc_priv(sch);
248
249         q->qdisc = &noop_qdisc;
250         return red_change(sch, opt);
251 }
252
253 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
254 {
255         struct red_sched_data *q = qdisc_priv(sch);
256         struct rtattr *opts = NULL;
257         struct tc_red_qopt opt = {
258                 .limit          = q->limit,
259                 .flags          = q->flags,
260                 .qth_min        = q->parms.qth_min >> q->parms.Wlog,
261                 .qth_max        = q->parms.qth_max >> q->parms.Wlog,
262                 .Wlog           = q->parms.Wlog,
263                 .Plog           = q->parms.Plog,
264                 .Scell_log      = q->parms.Scell_log,
265         };
266
267         opts = RTA_NEST(skb, TCA_OPTIONS);
268         RTA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
269         return RTA_NEST_END(skb, opts);
270
271 rtattr_failure:
272         return RTA_NEST_CANCEL(skb, opts);
273 }
274
275 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
276 {
277         struct red_sched_data *q = qdisc_priv(sch);
278         struct tc_red_xstats st = {
279                 .early  = q->stats.prob_drop + q->stats.forced_drop,
280                 .pdrop  = q->stats.pdrop,
281                 .other  = q->stats.other,
282                 .marked = q->stats.prob_mark + q->stats.forced_mark,
283         };
284
285         return gnet_stats_copy_app(d, &st, sizeof(st));
286 }
287
288 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
289                           struct sk_buff *skb, struct tcmsg *tcm)
290 {
291         struct red_sched_data *q = qdisc_priv(sch);
292
293         if (cl != 1)
294                 return -ENOENT;
295         tcm->tcm_handle |= TC_H_MIN(1);
296         tcm->tcm_info = q->qdisc->handle;
297         return 0;
298 }
299
300 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
301                      struct Qdisc **old)
302 {
303         struct red_sched_data *q = qdisc_priv(sch);
304
305         if (new == NULL)
306                 new = &noop_qdisc;
307
308         sch_tree_lock(sch);
309         *old = xchg(&q->qdisc, new);
310         qdisc_reset(*old);
311         sch->q.qlen = 0;
312         sch_tree_unlock(sch);
313         return 0;
314 }
315
316 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
317 {
318         struct red_sched_data *q = qdisc_priv(sch);
319         return q->qdisc;
320 }
321
322 static unsigned long red_get(struct Qdisc *sch, u32 classid)
323 {
324         return 1;
325 }
326
327 static void red_put(struct Qdisc *sch, unsigned long arg)
328 {
329         return;
330 }
331
332 static int red_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
333                             struct rtattr **tca, unsigned long *arg)
334 {
335         return -ENOSYS;
336 }
337
338 static int red_delete(struct Qdisc *sch, unsigned long cl)
339 {
340         return -ENOSYS;
341 }
342
343 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
344 {
345         if (!walker->stop) {
346                 if (walker->count >= walker->skip)
347                         if (walker->fn(sch, 1, walker) < 0) {
348                                 walker->stop = 1;
349                                 return;
350                         }
351                 walker->count++;
352         }
353 }
354
355 static struct tcf_proto **red_find_tcf(struct Qdisc *sch, unsigned long cl)
356 {
357         return NULL;
358 }
359
360 static struct Qdisc_class_ops red_class_ops = {
361         .graft          =       red_graft,
362         .leaf           =       red_leaf,
363         .get            =       red_get,
364         .put            =       red_put,
365         .change         =       red_change_class,
366         .delete         =       red_delete,
367         .walk           =       red_walk,
368         .tcf_chain      =       red_find_tcf,
369         .dump           =       red_dump_class,
370 };
371
372 static struct Qdisc_ops red_qdisc_ops = {
373         .id             =       "red",
374         .priv_size      =       sizeof(struct red_sched_data),
375         .cl_ops         =       &red_class_ops,
376         .enqueue        =       red_enqueue,
377         .dequeue        =       red_dequeue,
378         .requeue        =       red_requeue,
379         .drop           =       red_drop,
380         .init           =       red_init,
381         .reset          =       red_reset,
382         .destroy        =       red_destroy,
383         .change         =       red_change,
384         .dump           =       red_dump,
385         .dump_stats     =       red_dump_stats,
386         .owner          =       THIS_MODULE,
387 };
388
389 static int __init red_module_init(void)
390 {
391         return register_qdisc(&red_qdisc_ops);
392 }
393
394 static void __exit red_module_exit(void)
395 {
396         unregister_qdisc(&red_qdisc_ops);
397 }
398
399 module_init(red_module_init)
400 module_exit(red_module_exit)
401
402 MODULE_LICENSE("GPL");