2005-04-17 00:20:36 +02:00
|
|
|
/*
|
|
|
|
* net/sched/sch_red.c Random Early Detection queue.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
|
|
|
|
*
|
|
|
|
* Changes:
|
2005-11-05 21:14:08 +01:00
|
|
|
* J Hadi Salim 980914: computation fixes
|
2005-04-17 00:20:36 +02:00
|
|
|
* Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
|
2005-11-05 21:14:08 +01:00
|
|
|
* J Hadi Salim 980816: ECN support
|
2005-04-17 00:20:36 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/skbuff.h>
|
|
|
|
#include <net/pkt_sched.h>
|
|
|
|
#include <net/inet_ecn.h>
|
2005-11-05 21:14:05 +01:00
|
|
|
#include <net/red.h>
|
2005-04-17 00:20:36 +02:00
|
|
|
|
|
|
|
|
2005-11-05 21:14:05 +01:00
|
|
|
/* Parameters, settable by user:
|
2005-04-17 00:20:36 +02:00
|
|
|
-----------------------------
|
|
|
|
|
|
|
|
limit - bytes (must be > qth_max + burst)
|
|
|
|
|
|
|
|
Hard limit on queue length, should be chosen >qth_max
|
|
|
|
to allow packet bursts. This parameter does not
|
|
|
|
affect the algorithms behaviour and can be chosen
|
|
|
|
arbitrarily high (well, less than ram size)
|
|
|
|
Really, this limit will never be reached
|
|
|
|
if RED works correctly.
|
|
|
|
*/
|
|
|
|
|
2011-01-19 20:26:56 +01:00
|
|
|
struct red_sched_data {
|
2005-11-05 21:14:05 +01:00
|
|
|
u32 limit; /* HARD maximal queue length */
|
|
|
|
unsigned char flags;
|
sch_red: Adaptative RED AQM
Adaptative RED AQM for linux, based on paper from Sally FLoyd,
Ramakrishna Gummadi, and Scott Shenker, August 2001 :
http://icir.org/floyd/papers/adaptiveRed.pdf
Goal of Adaptative RED is to make max_p a dynamic value between 1% and
50% to reach the target average queue : (max_th - min_th) / 2
Every 500 ms:
if (avg > target and max_p <= 0.5)
increase max_p : max_p += alpha;
else if (avg < target and max_p >= 0.01)
decrease max_p : max_p *= beta;
target :[min_th + 0.4*(min_th - max_th),
min_th + 0.6*(min_th - max_th)].
alpha : min(0.01, max_p / 4)
beta : 0.9
max_P is a Q0.32 fixed point number (unsigned, with 32 bits mantissa)
Changes against our RED implementation are :
max_p is no longer a negative power of two (1/(2^Plog)), but a Q0.32
fixed point number, to allow full range described in Adatative paper.
To deliver a random number, we now use a reciprocal divide (thats really
a multiply), but this operation is done once per marked/droped packet
when in RED_BETWEEN_TRESH window, so added cost (compared to previous
AND operation) is near zero.
dump operation gives current max_p value in a new TCA_RED_MAX_P
attribute.
Example on a 10Mbit link :
tc qdisc add dev $DEV parent 1:1 handle 10: est 1sec 8sec red \
limit 400000 min 30000 max 90000 avpkt 1000 \
burst 55 ecn adaptative bandwidth 10Mbit
# tc -s -d qdisc show dev eth3
...
qdisc red 10: parent 1:1 limit 400000b min 30000b max 90000b ecn
adaptative ewma 5 max_p=0.113335 Scell_log 15
Sent 50414282 bytes 34504 pkt (dropped 35, overlimits 1392 requeues 0)
rate 9749Kbit 831pps backlog 72056b 16p requeues 0
marked 1357 early 35 pdrop 0 other 0
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-08 07:06:03 +01:00
|
|
|
struct timer_list adapt_timer;
|
2005-11-05 21:14:05 +01:00
|
|
|
struct red_parms parms;
|
2012-01-05 03:25:16 +01:00
|
|
|
struct red_vars vars;
|
2005-11-05 21:14:05 +01:00
|
|
|
struct red_stats stats;
|
2006-03-21 04:20:44 +01:00
|
|
|
struct Qdisc *qdisc;
|
2005-04-17 00:20:36 +02:00
|
|
|
};
|
|
|
|
|
2005-11-05 21:14:05 +01:00
|
|
|
static inline int red_use_ecn(struct red_sched_data *q)
|
2005-04-17 00:20:36 +02:00
|
|
|
{
|
2005-11-05 21:14:05 +01:00
|
|
|
return q->flags & TC_RED_ECN;
|
2005-04-17 00:20:36 +02:00
|
|
|
}
|
|
|
|
|
2005-11-05 21:14:28 +01:00
|
|
|
static inline int red_use_harddrop(struct red_sched_data *q)
|
|
|
|
{
|
|
|
|
return q->flags & TC_RED_HARDDROP;
|
|
|
|
}
|
|
|
|
|
2011-01-19 20:26:56 +01:00
|
|
|
static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch)
|
2005-04-17 00:20:36 +02:00
|
|
|
{
|
|
|
|
struct red_sched_data *q = qdisc_priv(sch);
|
2006-03-21 04:20:44 +01:00
|
|
|
struct Qdisc *child = q->qdisc;
|
|
|
|
int ret;
|
2005-04-17 00:20:36 +02:00
|
|
|
|
2012-01-05 03:25:16 +01:00
|
|
|
q->vars.qavg = red_calc_qavg(&q->parms,
|
|
|
|
&q->vars,
|
|
|
|
child->qstats.backlog);
|
2005-04-17 00:20:36 +02:00
|
|
|
|
2012-01-05 03:25:16 +01:00
|
|
|
if (red_is_idling(&q->vars))
|
|
|
|
red_end_of_idle_period(&q->vars);
|
2005-04-17 00:20:36 +02:00
|
|
|
|
2012-01-05 03:25:16 +01:00
|
|
|
switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
|
2011-01-19 20:26:56 +01:00
|
|
|
case RED_DONT_MARK:
|
|
|
|
break;
|
|
|
|
|
|
|
|
case RED_PROB_MARK:
|
|
|
|
sch->qstats.overlimits++;
|
|
|
|
if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
|
|
|
|
q->stats.prob_drop++;
|
|
|
|
goto congestion_drop;
|
|
|
|
}
|
|
|
|
|
|
|
|
q->stats.prob_mark++;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case RED_HARD_MARK:
|
|
|
|
sch->qstats.overlimits++;
|
|
|
|
if (red_use_harddrop(q) || !red_use_ecn(q) ||
|
|
|
|
!INET_ECN_set_ce(skb)) {
|
|
|
|
q->stats.forced_drop++;
|
|
|
|
goto congestion_drop;
|
|
|
|
}
|
|
|
|
|
|
|
|
q->stats.forced_mark++;
|
|
|
|
break;
|
2005-04-17 00:20:36 +02:00
|
|
|
}
|
|
|
|
|
2008-07-20 09:08:04 +02:00
|
|
|
ret = qdisc_enqueue(skb, child);
|
2006-03-21 04:20:44 +01:00
|
|
|
if (likely(ret == NET_XMIT_SUCCESS)) {
|
|
|
|
sch->q.qlen++;
|
2008-08-05 07:31:03 +02:00
|
|
|
} else if (net_xmit_drop_count(ret)) {
|
2006-03-21 04:20:44 +01:00
|
|
|
q->stats.pdrop++;
|
|
|
|
sch->qstats.drops++;
|
|
|
|
}
|
|
|
|
return ret;
|
2005-11-05 21:14:05 +01:00
|
|
|
|
|
|
|
congestion_drop:
|
2005-11-05 21:14:06 +01:00
|
|
|
qdisc_drop(skb, sch);
|
2005-04-17 00:20:36 +02:00
|
|
|
return NET_XMIT_CN;
|
|
|
|
}
|
|
|
|
|
2011-01-19 20:26:56 +01:00
|
|
|
static struct sk_buff *red_dequeue(struct Qdisc *sch)
|
2005-04-17 00:20:36 +02:00
|
|
|
{
|
|
|
|
struct sk_buff *skb;
|
|
|
|
struct red_sched_data *q = qdisc_priv(sch);
|
2006-03-21 04:20:44 +01:00
|
|
|
struct Qdisc *child = q->qdisc;
|
2005-04-17 00:20:36 +02:00
|
|
|
|
2006-03-21 04:20:44 +01:00
|
|
|
skb = child->dequeue(child);
|
2011-01-21 08:31:33 +01:00
|
|
|
if (skb) {
|
|
|
|
qdisc_bstats_update(sch, skb);
|
2006-03-21 04:20:44 +01:00
|
|
|
sch->q.qlen--;
|
2011-01-21 08:31:33 +01:00
|
|
|
} else {
|
2012-01-05 03:25:16 +01:00
|
|
|
if (!red_is_idling(&q->vars))
|
|
|
|
red_start_of_idle_period(&q->vars);
|
2011-01-21 08:31:33 +01:00
|
|
|
}
|
2005-11-05 21:14:06 +01:00
|
|
|
return skb;
|
2005-04-17 00:20:36 +02:00
|
|
|
}
|
|
|
|
|
2011-01-19 20:26:56 +01:00
|
|
|
static struct sk_buff *red_peek(struct Qdisc *sch)
|
2008-10-31 08:45:55 +01:00
|
|
|
{
|
|
|
|
struct red_sched_data *q = qdisc_priv(sch);
|
|
|
|
struct Qdisc *child = q->qdisc;
|
|
|
|
|
|
|
|
return child->ops->peek(child);
|
|
|
|
}
|
|
|
|
|
2011-01-19 20:26:56 +01:00
|
|
|
static unsigned int red_drop(struct Qdisc *sch)
|
2005-04-17 00:20:36 +02:00
|
|
|
{
|
|
|
|
struct red_sched_data *q = qdisc_priv(sch);
|
2006-03-21 04:20:44 +01:00
|
|
|
struct Qdisc *child = q->qdisc;
|
|
|
|
unsigned int len;
|
2005-04-17 00:20:36 +02:00
|
|
|
|
2006-03-21 04:20:44 +01:00
|
|
|
if (child->ops->drop && (len = child->ops->drop(child)) > 0) {
|
2005-11-05 21:14:05 +01:00
|
|
|
q->stats.other++;
|
2006-03-21 04:20:44 +01:00
|
|
|
sch->qstats.drops++;
|
|
|
|
sch->q.qlen--;
|
2005-04-17 00:20:36 +02:00
|
|
|
return len;
|
|
|
|
}
|
2005-11-05 21:14:05 +01:00
|
|
|
|
2012-01-05 03:25:16 +01:00
|
|
|
if (!red_is_idling(&q->vars))
|
|
|
|
red_start_of_idle_period(&q->vars);
|
2005-11-05 21:14:07 +01:00
|
|
|
|
2005-04-17 00:20:36 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-01-19 20:26:56 +01:00
|
|
|
static void red_reset(struct Qdisc *sch)
|
2005-04-17 00:20:36 +02:00
|
|
|
{
|
|
|
|
struct red_sched_data *q = qdisc_priv(sch);
|
|
|
|
|
2006-03-21 04:20:44 +01:00
|
|
|
qdisc_reset(q->qdisc);
|
|
|
|
sch->q.qlen = 0;
|
2012-01-05 03:25:16 +01:00
|
|
|
red_restart(&q->vars);
|
2005-04-17 00:20:36 +02:00
|
|
|
}
|
|
|
|
|
2006-03-21 04:20:44 +01:00
|
|
|
static void red_destroy(struct Qdisc *sch)
|
|
|
|
{
|
|
|
|
struct red_sched_data *q = qdisc_priv(sch);
|
sch_red: Adaptative RED AQM
Adaptative RED AQM for linux, based on paper from Sally FLoyd,
Ramakrishna Gummadi, and Scott Shenker, August 2001 :
http://icir.org/floyd/papers/adaptiveRed.pdf
Goal of Adaptative RED is to make max_p a dynamic value between 1% and
50% to reach the target average queue : (max_th - min_th) / 2
Every 500 ms:
if (avg > target and max_p <= 0.5)
increase max_p : max_p += alpha;
else if (avg < target and max_p >= 0.01)
decrease max_p : max_p *= beta;
target :[min_th + 0.4*(min_th - max_th),
min_th + 0.6*(min_th - max_th)].
alpha : min(0.01, max_p / 4)
beta : 0.9
max_P is a Q0.32 fixed point number (unsigned, with 32 bits mantissa)
Changes against our RED implementation are :
max_p is no longer a negative power of two (1/(2^Plog)), but a Q0.32
fixed point number, to allow full range described in Adatative paper.
To deliver a random number, we now use a reciprocal divide (thats really
a multiply), but this operation is done once per marked/droped packet
when in RED_BETWEEN_TRESH window, so added cost (compared to previous
AND operation) is near zero.
dump operation gives current max_p value in a new TCA_RED_MAX_P
attribute.
Example on a 10Mbit link :
tc qdisc add dev $DEV parent 1:1 handle 10: est 1sec 8sec red \
limit 400000 min 30000 max 90000 avpkt 1000 \
burst 55 ecn adaptative bandwidth 10Mbit
# tc -s -d qdisc show dev eth3
...
qdisc red 10: parent 1:1 limit 400000b min 30000b max 90000b ecn
adaptative ewma 5 max_p=0.113335 Scell_log 15
Sent 50414282 bytes 34504 pkt (dropped 35, overlimits 1392 requeues 0)
rate 9749Kbit 831pps backlog 72056b 16p requeues 0
marked 1357 early 35 pdrop 0 other 0
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-08 07:06:03 +01:00
|
|
|
|
|
|
|
del_timer_sync(&q->adapt_timer);
|
2006-03-21 04:20:44 +01:00
|
|
|
qdisc_destroy(q->qdisc);
|
|
|
|
}
|
|
|
|
|
2008-01-24 05:35:39 +01:00
|
|
|
static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
|
|
|
|
[TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
|
|
|
|
[TCA_RED_STAB] = { .len = RED_STAB_SIZE },
|
2011-12-09 03:46:45 +01:00
|
|
|
[TCA_RED_MAX_P] = { .type = NLA_U32 },
|
2008-01-24 05:35:39 +01:00
|
|
|
};
|
|
|
|
|
2008-01-23 07:11:17 +01:00
|
|
|
static int red_change(struct Qdisc *sch, struct nlattr *opt)
|
2005-04-17 00:20:36 +02:00
|
|
|
{
|
|
|
|
struct red_sched_data *q = qdisc_priv(sch);
|
2008-01-23 07:11:17 +01:00
|
|
|
struct nlattr *tb[TCA_RED_MAX + 1];
|
2005-04-17 00:20:36 +02:00
|
|
|
struct tc_red_qopt *ctl;
|
2006-03-21 04:20:44 +01:00
|
|
|
struct Qdisc *child = NULL;
|
2008-01-24 05:33:32 +01:00
|
|
|
int err;
|
2011-12-09 03:46:45 +01:00
|
|
|
u32 max_P;
|
2005-04-17 00:20:36 +02:00
|
|
|
|
2008-01-24 05:33:32 +01:00
|
|
|
if (opt == NULL)
|
2005-11-05 21:14:08 +01:00
|
|
|
return -EINVAL;
|
|
|
|
|
2008-01-24 05:35:39 +01:00
|
|
|
err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy);
|
2008-01-24 05:33:32 +01:00
|
|
|
if (err < 0)
|
|
|
|
return err;
|
|
|
|
|
2008-01-23 07:11:17 +01:00
|
|
|
if (tb[TCA_RED_PARMS] == NULL ||
|
2008-01-24 05:35:39 +01:00
|
|
|
tb[TCA_RED_STAB] == NULL)
|
2005-04-17 00:20:36 +02:00
|
|
|
return -EINVAL;
|
|
|
|
|
2011-12-09 03:46:45 +01:00
|
|
|
max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
|
|
|
|
|
2008-01-23 07:11:17 +01:00
|
|
|
ctl = nla_data(tb[TCA_RED_PARMS]);
|
2005-04-17 00:20:36 +02:00
|
|
|
|
2006-03-21 04:20:44 +01:00
|
|
|
if (ctl->limit > 0) {
|
2008-07-06 08:40:21 +02:00
|
|
|
child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit);
|
|
|
|
if (IS_ERR(child))
|
|
|
|
return PTR_ERR(child);
|
2006-03-21 04:20:44 +01:00
|
|
|
}
|
|
|
|
|
2005-04-17 00:20:36 +02:00
|
|
|
sch_tree_lock(sch);
|
|
|
|
q->flags = ctl->flags;
|
|
|
|
q->limit = ctl->limit;
|
2006-11-30 02:36:20 +01:00
|
|
|
if (child) {
|
|
|
|
qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
|
2008-11-20 13:11:36 +01:00
|
|
|
qdisc_destroy(q->qdisc);
|
|
|
|
q->qdisc = child;
|
2006-11-30 02:36:20 +01:00
|
|
|
}
|
2005-04-17 00:20:36 +02:00
|
|
|
|
2012-01-05 03:25:16 +01:00
|
|
|
red_set_parms(&q->parms,
|
|
|
|
ctl->qth_min, ctl->qth_max, ctl->Wlog,
|
2011-12-09 03:46:45 +01:00
|
|
|
ctl->Plog, ctl->Scell_log,
|
|
|
|
nla_data(tb[TCA_RED_STAB]),
|
|
|
|
max_P);
|
2012-01-05 03:25:16 +01:00
|
|
|
red_set_vars(&q->vars);
|
2005-11-05 21:14:05 +01:00
|
|
|
|
sch_red: Adaptative RED AQM
Adaptative RED AQM for linux, based on paper from Sally FLoyd,
Ramakrishna Gummadi, and Scott Shenker, August 2001 :
http://icir.org/floyd/papers/adaptiveRed.pdf
Goal of Adaptative RED is to make max_p a dynamic value between 1% and
50% to reach the target average queue : (max_th - min_th) / 2
Every 500 ms:
if (avg > target and max_p <= 0.5)
increase max_p : max_p += alpha;
else if (avg < target and max_p >= 0.01)
decrease max_p : max_p *= beta;
target :[min_th + 0.4*(min_th - max_th),
min_th + 0.6*(min_th - max_th)].
alpha : min(0.01, max_p / 4)
beta : 0.9
max_P is a Q0.32 fixed point number (unsigned, with 32 bits mantissa)
Changes against our RED implementation are :
max_p is no longer a negative power of two (1/(2^Plog)), but a Q0.32
fixed point number, to allow full range described in Adatative paper.
To deliver a random number, we now use a reciprocal divide (thats really
a multiply), but this operation is done once per marked/droped packet
when in RED_BETWEEN_TRESH window, so added cost (compared to previous
AND operation) is near zero.
dump operation gives current max_p value in a new TCA_RED_MAX_P
attribute.
Example on a 10Mbit link :
tc qdisc add dev $DEV parent 1:1 handle 10: est 1sec 8sec red \
limit 400000 min 30000 max 90000 avpkt 1000 \
burst 55 ecn adaptative bandwidth 10Mbit
# tc -s -d qdisc show dev eth3
...
qdisc red 10: parent 1:1 limit 400000b min 30000b max 90000b ecn
adaptative ewma 5 max_p=0.113335 Scell_log 15
Sent 50414282 bytes 34504 pkt (dropped 35, overlimits 1392 requeues 0)
rate 9749Kbit 831pps backlog 72056b 16p requeues 0
marked 1357 early 35 pdrop 0 other 0
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-08 07:06:03 +01:00
|
|
|
del_timer(&q->adapt_timer);
|
|
|
|
if (ctl->flags & TC_RED_ADAPTATIVE)
|
|
|
|
mod_timer(&q->adapt_timer, jiffies + HZ/2);
|
|
|
|
|
sch_red: fix red_change
Le mercredi 30 novembre 2011 à 14:36 -0800, Stephen Hemminger a écrit :
> (Almost) nobody uses RED because they can't figure it out.
> According to Wikipedia, VJ says that:
> "there are not one, but two bugs in classic RED."
RED is useful for high throughput routers, I doubt many linux machines
act as such devices.
I was considering adding Adaptative RED (Sally Floyd, Ramakrishna
Gummadi, Scott Shender), August 2001
In this version, maxp is dynamic (from 1% to 50%), and user only have to
setup min_th (target average queue size)
(max_th and wq (burst in linux RED) are automatically setup)
By the way it seems we have a small bug in red_change()
if (skb_queue_empty(&sch->q))
red_end_of_idle_period(&q->parms);
First, if queue is empty, we should call
red_start_of_idle_period(&q->parms);
Second, since we dont use anymore sch->q, but q->qdisc, the test is
meaningless.
Oh well...
[PATCH] sch_red: fix red_change()
Now RED is classful, we must check q->qdisc->q.qlen, and if queue is empty,
we start an idle period, not end it.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-01 12:06:34 +01:00
|
|
|
if (!q->qdisc->q.qlen)
|
2012-01-05 03:25:16 +01:00
|
|
|
red_start_of_idle_period(&q->vars);
|
2005-11-05 21:14:08 +01:00
|
|
|
|
2005-04-17 00:20:36 +02:00
|
|
|
sch_tree_unlock(sch);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
sch_red: Adaptative RED AQM
Adaptative RED AQM for linux, based on paper from Sally FLoyd,
Ramakrishna Gummadi, and Scott Shenker, August 2001 :
http://icir.org/floyd/papers/adaptiveRed.pdf
Goal of Adaptative RED is to make max_p a dynamic value between 1% and
50% to reach the target average queue : (max_th - min_th) / 2
Every 500 ms:
if (avg > target and max_p <= 0.5)
increase max_p : max_p += alpha;
else if (avg < target and max_p >= 0.01)
decrease max_p : max_p *= beta;
target :[min_th + 0.4*(min_th - max_th),
min_th + 0.6*(min_th - max_th)].
alpha : min(0.01, max_p / 4)
beta : 0.9
max_P is a Q0.32 fixed point number (unsigned, with 32 bits mantissa)
Changes against our RED implementation are :
max_p is no longer a negative power of two (1/(2^Plog)), but a Q0.32
fixed point number, to allow full range described in Adatative paper.
To deliver a random number, we now use a reciprocal divide (thats really
a multiply), but this operation is done once per marked/droped packet
when in RED_BETWEEN_TRESH window, so added cost (compared to previous
AND operation) is near zero.
dump operation gives current max_p value in a new TCA_RED_MAX_P
attribute.
Example on a 10Mbit link :
tc qdisc add dev $DEV parent 1:1 handle 10: est 1sec 8sec red \
limit 400000 min 30000 max 90000 avpkt 1000 \
burst 55 ecn adaptative bandwidth 10Mbit
# tc -s -d qdisc show dev eth3
...
qdisc red 10: parent 1:1 limit 400000b min 30000b max 90000b ecn
adaptative ewma 5 max_p=0.113335 Scell_log 15
Sent 50414282 bytes 34504 pkt (dropped 35, overlimits 1392 requeues 0)
rate 9749Kbit 831pps backlog 72056b 16p requeues 0
marked 1357 early 35 pdrop 0 other 0
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-08 07:06:03 +01:00
|
|
|
static inline void red_adaptative_timer(unsigned long arg)
|
|
|
|
{
|
|
|
|
struct Qdisc *sch = (struct Qdisc *)arg;
|
|
|
|
struct red_sched_data *q = qdisc_priv(sch);
|
|
|
|
spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
|
|
|
|
|
|
|
|
spin_lock(root_lock);
|
2012-01-05 03:25:16 +01:00
|
|
|
red_adaptative_algo(&q->parms, &q->vars);
|
sch_red: Adaptative RED AQM
Adaptative RED AQM for linux, based on paper from Sally FLoyd,
Ramakrishna Gummadi, and Scott Shenker, August 2001 :
http://icir.org/floyd/papers/adaptiveRed.pdf
Goal of Adaptative RED is to make max_p a dynamic value between 1% and
50% to reach the target average queue : (max_th - min_th) / 2
Every 500 ms:
if (avg > target and max_p <= 0.5)
increase max_p : max_p += alpha;
else if (avg < target and max_p >= 0.01)
decrease max_p : max_p *= beta;
target :[min_th + 0.4*(min_th - max_th),
min_th + 0.6*(min_th - max_th)].
alpha : min(0.01, max_p / 4)
beta : 0.9
max_P is a Q0.32 fixed point number (unsigned, with 32 bits mantissa)
Changes against our RED implementation are :
max_p is no longer a negative power of two (1/(2^Plog)), but a Q0.32
fixed point number, to allow full range described in Adatative paper.
To deliver a random number, we now use a reciprocal divide (thats really
a multiply), but this operation is done once per marked/droped packet
when in RED_BETWEEN_TRESH window, so added cost (compared to previous
AND operation) is near zero.
dump operation gives current max_p value in a new TCA_RED_MAX_P
attribute.
Example on a 10Mbit link :
tc qdisc add dev $DEV parent 1:1 handle 10: est 1sec 8sec red \
limit 400000 min 30000 max 90000 avpkt 1000 \
burst 55 ecn adaptative bandwidth 10Mbit
# tc -s -d qdisc show dev eth3
...
qdisc red 10: parent 1:1 limit 400000b min 30000b max 90000b ecn
adaptative ewma 5 max_p=0.113335 Scell_log 15
Sent 50414282 bytes 34504 pkt (dropped 35, overlimits 1392 requeues 0)
rate 9749Kbit 831pps backlog 72056b 16p requeues 0
marked 1357 early 35 pdrop 0 other 0
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-08 07:06:03 +01:00
|
|
|
mod_timer(&q->adapt_timer, jiffies + HZ/2);
|
|
|
|
spin_unlock(root_lock);
|
|
|
|
}
|
|
|
|
|
2011-01-19 20:26:56 +01:00
|
|
|
static int red_init(struct Qdisc *sch, struct nlattr *opt)
|
2005-04-17 00:20:36 +02:00
|
|
|
{
|
2006-03-21 04:20:44 +01:00
|
|
|
struct red_sched_data *q = qdisc_priv(sch);
|
|
|
|
|
|
|
|
q->qdisc = &noop_qdisc;
|
sch_red: Adaptative RED AQM
Adaptative RED AQM for linux, based on paper from Sally FLoyd,
Ramakrishna Gummadi, and Scott Shenker, August 2001 :
http://icir.org/floyd/papers/adaptiveRed.pdf
Goal of Adaptative RED is to make max_p a dynamic value between 1% and
50% to reach the target average queue : (max_th - min_th) / 2
Every 500 ms:
if (avg > target and max_p <= 0.5)
increase max_p : max_p += alpha;
else if (avg < target and max_p >= 0.01)
decrease max_p : max_p *= beta;
target :[min_th + 0.4*(min_th - max_th),
min_th + 0.6*(min_th - max_th)].
alpha : min(0.01, max_p / 4)
beta : 0.9
max_P is a Q0.32 fixed point number (unsigned, with 32 bits mantissa)
Changes against our RED implementation are :
max_p is no longer a negative power of two (1/(2^Plog)), but a Q0.32
fixed point number, to allow full range described in Adatative paper.
To deliver a random number, we now use a reciprocal divide (thats really
a multiply), but this operation is done once per marked/droped packet
when in RED_BETWEEN_TRESH window, so added cost (compared to previous
AND operation) is near zero.
dump operation gives current max_p value in a new TCA_RED_MAX_P
attribute.
Example on a 10Mbit link :
tc qdisc add dev $DEV parent 1:1 handle 10: est 1sec 8sec red \
limit 400000 min 30000 max 90000 avpkt 1000 \
burst 55 ecn adaptative bandwidth 10Mbit
# tc -s -d qdisc show dev eth3
...
qdisc red 10: parent 1:1 limit 400000b min 30000b max 90000b ecn
adaptative ewma 5 max_p=0.113335 Scell_log 15
Sent 50414282 bytes 34504 pkt (dropped 35, overlimits 1392 requeues 0)
rate 9749Kbit 831pps backlog 72056b 16p requeues 0
marked 1357 early 35 pdrop 0 other 0
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-08 07:06:03 +01:00
|
|
|
setup_timer(&q->adapt_timer, red_adaptative_timer, (unsigned long)sch);
|
2005-04-17 00:20:36 +02:00
|
|
|
return red_change(sch, opt);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
struct red_sched_data *q = qdisc_priv(sch);
|
2008-01-23 07:11:17 +01:00
|
|
|
struct nlattr *opts = NULL;
|
2005-11-05 21:14:05 +01:00
|
|
|
struct tc_red_qopt opt = {
|
|
|
|
.limit = q->limit,
|
|
|
|
.flags = q->flags,
|
|
|
|
.qth_min = q->parms.qth_min >> q->parms.Wlog,
|
|
|
|
.qth_max = q->parms.qth_max >> q->parms.Wlog,
|
|
|
|
.Wlog = q->parms.Wlog,
|
|
|
|
.Plog = q->parms.Plog,
|
|
|
|
.Scell_log = q->parms.Scell_log,
|
|
|
|
};
|
2005-04-17 00:20:36 +02:00
|
|
|
|
2011-01-03 09:11:38 +01:00
|
|
|
sch->qstats.backlog = q->qdisc->qstats.backlog;
|
2008-01-23 07:11:17 +01:00
|
|
|
opts = nla_nest_start(skb, TCA_OPTIONS);
|
|
|
|
if (opts == NULL)
|
|
|
|
goto nla_put_failure;
|
2012-03-29 11:11:39 +02:00
|
|
|
if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
|
|
|
|
nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
|
|
|
|
goto nla_put_failure;
|
2008-01-23 07:11:17 +01:00
|
|
|
return nla_nest_end(skb, opts);
|
2005-04-17 00:20:36 +02:00
|
|
|
|
2008-01-23 07:11:17 +01:00
|
|
|
nla_put_failure:
|
2008-06-04 01:36:54 +02:00
|
|
|
nla_nest_cancel(skb, opts);
|
|
|
|
return -EMSGSIZE;
|
2005-04-17 00:20:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
|
|
|
|
{
|
|
|
|
struct red_sched_data *q = qdisc_priv(sch);
|
2005-11-05 21:14:05 +01:00
|
|
|
struct tc_red_xstats st = {
|
|
|
|
.early = q->stats.prob_drop + q->stats.forced_drop,
|
|
|
|
.pdrop = q->stats.pdrop,
|
|
|
|
.other = q->stats.other,
|
|
|
|
.marked = q->stats.prob_mark + q->stats.forced_mark,
|
|
|
|
};
|
|
|
|
|
|
|
|
return gnet_stats_copy_app(d, &st, sizeof(st));
|
2005-04-17 00:20:36 +02:00
|
|
|
}
|
|
|
|
|
2006-03-21 04:20:44 +01:00
|
|
|
static int red_dump_class(struct Qdisc *sch, unsigned long cl,
|
|
|
|
struct sk_buff *skb, struct tcmsg *tcm)
|
|
|
|
{
|
|
|
|
struct red_sched_data *q = qdisc_priv(sch);
|
|
|
|
|
|
|
|
tcm->tcm_handle |= TC_H_MIN(1);
|
|
|
|
tcm->tcm_info = q->qdisc->handle;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
|
|
|
|
struct Qdisc **old)
|
|
|
|
{
|
|
|
|
struct red_sched_data *q = qdisc_priv(sch);
|
|
|
|
|
|
|
|
if (new == NULL)
|
|
|
|
new = &noop_qdisc;
|
|
|
|
|
|
|
|
sch_tree_lock(sch);
|
2008-11-20 13:11:36 +01:00
|
|
|
*old = q->qdisc;
|
|
|
|
q->qdisc = new;
|
2006-11-30 02:36:20 +01:00
|
|
|
qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
|
2006-03-21 04:20:44 +01:00
|
|
|
qdisc_reset(*old);
|
|
|
|
sch_tree_unlock(sch);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
|
|
|
|
{
|
|
|
|
struct red_sched_data *q = qdisc_priv(sch);
|
|
|
|
return q->qdisc;
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned long red_get(struct Qdisc *sch, u32 classid)
|
|
|
|
{
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void red_put(struct Qdisc *sch, unsigned long arg)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
|
|
|
|
{
|
|
|
|
if (!walker->stop) {
|
|
|
|
if (walker->count >= walker->skip)
|
|
|
|
if (walker->fn(sch, 1, walker) < 0) {
|
|
|
|
walker->stop = 1;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
walker->count++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-11-14 10:44:41 +01:00
|
|
|
static const struct Qdisc_class_ops red_class_ops = {
|
2006-03-21 04:20:44 +01:00
|
|
|
.graft = red_graft,
|
|
|
|
.leaf = red_leaf,
|
|
|
|
.get = red_get,
|
|
|
|
.put = red_put,
|
|
|
|
.walk = red_walk,
|
|
|
|
.dump = red_dump_class,
|
|
|
|
};
|
|
|
|
|
2007-11-14 10:44:41 +01:00
|
|
|
static struct Qdisc_ops red_qdisc_ops __read_mostly = {
|
2005-04-17 00:20:36 +02:00
|
|
|
.id = "red",
|
|
|
|
.priv_size = sizeof(struct red_sched_data),
|
2006-03-21 04:20:44 +01:00
|
|
|
.cl_ops = &red_class_ops,
|
2005-04-17 00:20:36 +02:00
|
|
|
.enqueue = red_enqueue,
|
|
|
|
.dequeue = red_dequeue,
|
2008-10-31 08:45:55 +01:00
|
|
|
.peek = red_peek,
|
2005-04-17 00:20:36 +02:00
|
|
|
.drop = red_drop,
|
|
|
|
.init = red_init,
|
|
|
|
.reset = red_reset,
|
2006-03-21 04:20:44 +01:00
|
|
|
.destroy = red_destroy,
|
2005-04-17 00:20:36 +02:00
|
|
|
.change = red_change,
|
|
|
|
.dump = red_dump,
|
|
|
|
.dump_stats = red_dump_stats,
|
|
|
|
.owner = THIS_MODULE,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int __init red_module_init(void)
|
|
|
|
{
|
|
|
|
return register_qdisc(&red_qdisc_ops);
|
|
|
|
}
|
2005-11-05 21:14:08 +01:00
|
|
|
|
|
|
|
static void __exit red_module_exit(void)
|
2005-04-17 00:20:36 +02:00
|
|
|
{
|
|
|
|
unregister_qdisc(&red_qdisc_ops);
|
|
|
|
}
|
2005-11-05 21:14:08 +01:00
|
|
|
|
2005-04-17 00:20:36 +02:00
|
|
|
module_init(red_module_init)
|
|
|
|
module_exit(red_module_exit)
|
2005-11-05 21:14:08 +01:00
|
|
|
|
2005-04-17 00:20:36 +02:00
|
|
|
MODULE_LICENSE("GPL");
|