net: sched: Tegra Network Device Queue packet scheduling algorithm
Michael Hsu [Tue, 12 May 2015 00:54:19 +0000 (17:54 -0700)]
Tegra devices reserve highest priority queue for real-time network
devices, such as audio stream to a network (wifi) gaming controller.

Bug 1640086

Change-Id: I34a97edc87404c630c483389459f8e7b2474e892
Signed-off-by: Michael Hsu <mhsu@nvidia.com>
Signed-off-by: Xiao Bo Zhao <xiaoboz@nvidia.com>
Reviewed-on: http://git-master/r/795857
(cherry picked from commit fae05854103a872c8b9913079ffc83f3a6c79c68)
Reviewed-on: http://git-master/r/815685
Reviewed-by: Vinayak Pane <vpane@nvidia.com>

net/ipv4/tcp_output.c
net/sched/Kconfig
net/sched/Makefile
net/sched/sch_generic.c
net/sched/sch_mq.c
net/sched/sch_mqprio.c
net/sched/sch_tegra.c [new file with mode: 0644]

index fd776c8..6a40f8e 100644 (file)
 #include <linux/gfp.h>
 #include <linux/module.h>
 
+/* allow Tegra qdisc to restrict tcp rx datarate */
+#ifdef CONFIG_NET_SCH_TEGRA
+uint tcp_window_divisor = 1;
+module_param(tcp_window_divisor, uint, 0644);
+#endif
+
 /* People can turn this off for buggy TCP's found in printers etc. */
 int sysctl_tcp_retrans_collapse __read_mostly = 1;
 
@@ -917,6 +923,17 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
                th->window      = htons(min(tp->rcv_wnd, 65535U));
        } else {
                th->window      = htons(tcp_select_window(sk));
+#ifdef CONFIG_NET_SCH_TEGRA
+               if (tcp_window_divisor > 1) {
+                       unsigned short window = ntohs(th->window);
+                       pr_debug("%s: skb %p len %d window %hu"
+                               " scale %d tp->rcv_wnd %lu\n",
+                               __func__, skb, skb->len, window,
+                               tp->rx_opt.rcv_wscale, tp->rcv_wnd);
+                       window /= tcp_window_divisor;
+                       th->window = htons(window);
+               }
+#endif
        }
        th->check               = 0;
        th->urg_ptr             = 0;
index 235e01a..a485232 100644 (file)
@@ -272,6 +272,17 @@ config NET_SCH_FQ_CODEL
 
          If unsure, say N.
 
+config NET_SCH_TEGRA
+       tristate "Tegra Network Device Queue (TEGRA)"
+       help
+         Say Y here if you want to use the Tegra Network Device Queue (TEGRA)
+         packet scheduling algorithm.
+
+         To compile this driver as a module, choose M here: the module
+         will be called sch_tegra.
+
+         If unsure, say N.
+
 config NET_SCH_INGRESS
        tristate "Ingress Qdisc"
        depends on NET_CLS_ACT
index 978cbf0..075f0b6 100644 (file)
@@ -39,6 +39,7 @@ obj-$(CONFIG_NET_SCH_CHOKE)   += sch_choke.o
 obj-$(CONFIG_NET_SCH_QFQ)      += sch_qfq.o
 obj-$(CONFIG_NET_SCH_CODEL)    += sch_codel.o
 obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o
+obj-$(CONFIG_NET_SCH_TEGRA)    += sch_tegra.o
 
 obj-$(CONFIG_NET_CLS_U32)      += cls_u32.o
 obj-$(CONFIG_NET_CLS_ROUTE4)   += cls_route.o
index a7f838b..5b642e6 100644 (file)
@@ -680,8 +680,14 @@ static void attach_one_default_qdisc(struct net_device *dev,
        struct Qdisc *qdisc = &noqueue_qdisc;
 
        if (dev->tx_queue_len) {
+#ifdef CONFIG_NET_SCH_TEGRA
+               extern struct Qdisc_ops sch_tegra_pfifo_fast_ops;
+               qdisc = qdisc_create_dflt(dev_queue,
+                                         &sch_tegra_pfifo_fast_ops, TC_H_ROOT);
+#else
                qdisc = qdisc_create_dflt(dev_queue,
                                          &pfifo_fast_ops, TC_H_ROOT);
+#endif
                if (!qdisc) {
                        netdev_info(dev, "activation failed\n");
                        return;
index 5da78a1..35f78e0 100644 (file)
@@ -57,9 +57,16 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
 
        for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
                dev_queue = netdev_get_tx_queue(dev, ntx);
+#ifdef CONFIG_NET_SCH_TEGRA
+               extern struct Qdisc_ops sch_tegra_pfifo_fast_ops;
+               qdisc = qdisc_create_dflt(dev_queue, &sch_tegra_pfifo_fast_ops,
+                                         TC_H_MAKE(TC_H_MAJ(sch->handle),
+                                                   TC_H_MIN(ntx + 1)));
+#else
                qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops,
                                          TC_H_MAKE(TC_H_MAJ(sch->handle),
                                                    TC_H_MIN(ntx + 1)));
+#endif
                if (qdisc == NULL)
                        goto err;
                priv->qdiscs[ntx] = qdisc;
index accec33..eeb824b 100644 (file)
@@ -124,9 +124,16 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
 
        for (i = 0; i < dev->num_tx_queues; i++) {
                dev_queue = netdev_get_tx_queue(dev, i);
+#ifdef CONFIG_NET_SCH_TEGRA
+               extern struct Qdisc_ops sch_tegra_pfifo_fast_ops;
+               qdisc = qdisc_create_dflt(dev_queue, &sch_tegra_pfifo_fast_ops,
+                                         TC_H_MAKE(TC_H_MAJ(sch->handle),
+                                                   TC_H_MIN(i + 1)));
+#else
                qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops,
                                          TC_H_MAKE(TC_H_MAJ(sch->handle),
                                                    TC_H_MIN(i + 1)));
+#endif
                if (qdisc == NULL) {
                        err = -ENOMEM;
                        goto err;
diff --git a/net/sched/sch_tegra.c b/net/sched/sch_tegra.c
new file mode 100644 (file)
index 0000000..d3c8787
--- /dev/null
@@ -0,0 +1,340 @@
+/*
+ * net/sched/sch_tegra.c
+ *
+ * Tegra Network Device Queue (TEGRA) packet scheduling algorithm.
+ *
+ * Copyright (C) 2015 NVIDIA Corporation. All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/moduleparam.h>
+#include <linux/atomic.h>
+#include <net/pkt_sched.h>
+#include <net/sch_generic.h>
+
+#define SCH_TEGRA_PR_INFO\
+       switch (sch_tegra_debug) case 1: pr_info\
+
+#define PFIFO_FAST_PRIV_SIZEOF\
+       (\
+       sizeof(u32) \
+       + 3 * sizeof(struct sk_buff_head)\
+       )\
+
+#define OZ_ETHERTYPE   0x892e
+
+static int sch_tegra_debug;
+module_param(sch_tegra_debug, int, 0644);
+
+static int sch_tegra_enable = 1;
+module_param(sch_tegra_enable, int, 0644);
+
+static unsigned long sch_tegra_pfifo_fast_dequeue_bits;
+module_param(sch_tegra_pfifo_fast_dequeue_bits, ulong, 0644);
+
+static unsigned long sch_tegra_pfifo_fast_dequeue_jiffies0;
+module_param(sch_tegra_pfifo_fast_dequeue_jiffies0, ulong, 0644);
+
+static unsigned long sch_tegra_pfifo_fast_dequeue_jiffies1;
+module_param(sch_tegra_pfifo_fast_dequeue_jiffies1, ulong, 0644);
+
+static unsigned long sch_tegra_pfifo_fast_dequeue_bits_per_sec;
+module_param(sch_tegra_pfifo_fast_dequeue_bits_per_sec, ulong, 0644);
+
+static unsigned long sch_tegra_pfifo_fast_dequeue_bits_per_sec_threshold
+       = 100000000UL;
+module_param(sch_tegra_pfifo_fast_dequeue_bits_per_sec_threshold, ulong, 0644);
+
+/* Counter of how many sch_tegra_pfifo_fast qdisc(s) have high
+ * priority packet(s)
+ */
+static atomic_t sch_tegra_pfifo_fast_highest_prio = ATOMIC_INIT(0);
+
+/*
+ * Private data for a sch_tegra_pfifo_fast scheduler containing:
+ *     - bitmap + queue for the highest-priority band
+ *       (reserved for real-time tegra devices, such as audio stream to
+ *       network gaming controller)
+ *     - pfifo_fast private data
+ *       (sch_tegra_pfifo_fast subclasses pfifo_fast)
+ */
+struct sch_tegra_pfifo_fast_priv {
+       /* this must be at the beginning of the structure for
+        * subclassing to work
+        */
+       char pfifo_fast_priv[PFIFO_FAST_PRIV_SIZEOF];
+       /* bitmap for highest priority traffic */
+       u32 bitmap_highest_prio;
+       /* queue for the highest priority traffic
+        * - such as audio stream to network gaming controller
+        * - more...
+        */
+       struct sk_buff_head q_highest_prio;
+};
+
+static struct sk_buff *
+sch_tegra_pfifo_fast_dequeue_datarate(struct sk_buff *skb)
+{
+       unsigned long delta;
+
+       if (skb)
+               sch_tegra_pfifo_fast_dequeue_bits += skb->len * 8;
+       if (!sch_tegra_pfifo_fast_dequeue_jiffies0)
+               sch_tegra_pfifo_fast_dequeue_jiffies0 = jiffies;
+       sch_tegra_pfifo_fast_dequeue_jiffies1 = jiffies;
+       delta = sch_tegra_pfifo_fast_dequeue_jiffies1
+               - sch_tegra_pfifo_fast_dequeue_jiffies0;
+       if (delta < 10)
+               return skb;
+       if ((delta > msecs_to_jiffies(1000)) ||
+               (sch_tegra_pfifo_fast_dequeue_bits / (delta + 1)
+                       > ULONG_MAX / HZ)) {
+               sch_tegra_pfifo_fast_dequeue_bits
+                       /= (delta + 1);
+               sch_tegra_pfifo_fast_dequeue_jiffies0
+                       = jiffies;
+               sch_tegra_pfifo_fast_dequeue_jiffies1
+                       = sch_tegra_pfifo_fast_dequeue_jiffies0;
+               delta = 0;
+       }
+       sch_tegra_pfifo_fast_dequeue_bits_per_sec
+               = sch_tegra_pfifo_fast_dequeue_bits / (delta + 1) * HZ;
+
+       return skb;
+}
+
+
+static int
+sch_tegra_pfifo_fast_enqueue
+       (struct sk_buff *skb, struct Qdisc *qdisc)
+{
+       SCH_TEGRA_PR_INFO("%s\n", __func__);
+
+       /* subclass sch_tegra_pfifo_fast_ops -> enqueue() */
+       if (!sch_tegra_enable)
+               goto sch_tegra_disabled;
+       if ((skb->protocol == htons(OZ_ETHERTYPE)) &&
+               (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len)) {
+               struct sch_tegra_pfifo_fast_priv *priv = qdisc_priv(qdisc);
+               struct sk_buff_head *list = &priv->q_highest_prio;
+
+               SCH_TEGRA_PR_INFO("%s: OZ_ETHERTYPE\n", __func__);
+
+               if (!(priv->bitmap_highest_prio & 1)) {
+                       atomic_inc(&sch_tegra_pfifo_fast_highest_prio);
+                       priv->bitmap_highest_prio |= 1;
+               }
+               qdisc->q.qlen++;
+               return __qdisc_enqueue_tail(skb, qdisc, list);
+       }
+sch_tegra_disabled:
+
+       /* superclass pfifo_fast_ops -> enqueue() */
+       if (pfifo_fast_ops.enqueue)
+               return pfifo_fast_ops.enqueue(skb, qdisc);
+       return 0;
+
+}
+
+static struct sk_buff *
+sch_tegra_pfifo_fast_dequeue
+       (struct Qdisc *qdisc)
+{
+       struct sch_tegra_pfifo_fast_priv *priv = qdisc_priv(qdisc);
+       int i;
+
+       SCH_TEGRA_PR_INFO("%s\n", __func__);
+
+       /* subclass sch_tegra_pfifo_fast_ops -> dequeue() */
+       if (priv->bitmap_highest_prio & 1) {
+               struct sk_buff_head *list = &priv->q_highest_prio;
+               struct sk_buff *skb = __qdisc_dequeue_head(qdisc, list);
+
+               qdisc->q.qlen--;
+               if (skb_queue_empty(list)) {
+                       atomic_dec(&sch_tegra_pfifo_fast_highest_prio);
+                       priv->bitmap_highest_prio &= ~1;
+               }
+
+               return skb;
+       }
+       if (!sch_tegra_enable)
+               goto sch_tegra_disabled;
+       i = atomic_read(&sch_tegra_pfifo_fast_highest_prio);
+       if (i > 0) {
+               SCH_TEGRA_PR_INFO("%s:"
+                       " sch_tegra_pfifo_fast_highest_prio (%d) > 0"
+                       " - do not dequeue lower priority packet(s)\n",
+                       __func__,
+                       i);
+               return NULL;
+       }
+       sch_tegra_pfifo_fast_dequeue_datarate(NULL);
+       if (sch_tegra_pfifo_fast_dequeue_bits_per_sec
+               >= sch_tegra_pfifo_fast_dequeue_bits_per_sec_threshold) {
+               SCH_TEGRA_PR_INFO("%s:"
+                       " sch_tegra_pfifo_fast_dequeue_bits_per_sec"
+                       " (%ld) >="
+                       " sch_tegra_pfifo_fast_dequeue_bits_per_sec_threshold"
+                       " (%ld)"
+                       " - do not dequeue lower priority packet(s)\n",
+                       __func__,
+                       sch_tegra_pfifo_fast_dequeue_bits_per_sec,
+                       sch_tegra_pfifo_fast_dequeue_bits_per_sec_threshold);
+               return NULL;
+       }
+sch_tegra_disabled:
+
+       /* superclass pfifo_fast_ops -> dequeue() */
+       if (pfifo_fast_ops.dequeue)
+               return sch_tegra_pfifo_fast_dequeue_datarate
+                       (pfifo_fast_ops.dequeue(qdisc));
+       return NULL;
+
+}
+
+static struct sk_buff *
+sch_tegra_pfifo_fast_peek
+       (struct Qdisc *qdisc)
+{
+       struct sch_tegra_pfifo_fast_priv *priv = qdisc_priv(qdisc);
+
+       SCH_TEGRA_PR_INFO("%s\n", __func__);
+
+       /* subclass sch_tegra_pfifo_fast_ops -> peek() */
+       if (priv->bitmap_highest_prio & 1) {
+               struct sk_buff_head *list = &priv->q_highest_prio;
+
+               return skb_peek(list);
+       }
+
+       /* superclass pfifo_fast_ops -> peek() */
+       if (pfifo_fast_ops.peek)
+               return pfifo_fast_ops.peek(qdisc);
+       return NULL;
+
+}
+
+static int
+sch_tegra_pfifo_fast_init
+       (struct Qdisc *qdisc, struct nlattr *opt)
+{
+       int err = -1;
+       struct sch_tegra_pfifo_fast_priv *priv = qdisc_priv(qdisc);
+
+       SCH_TEGRA_PR_INFO("%s\n", __func__);
+
+       /* superclass pfifo_fast_ops -> init() */
+       if (pfifo_fast_ops.init)
+               err = pfifo_fast_ops.init(qdisc, opt);
+
+       /* override flag set by superclass pfifo_fast_ops -> init() */
+       qdisc->flags &= ~TCQ_F_CAN_BYPASS;
+
+       /* subclass sch_tegra_pfifo_fast_ops -> init() */
+       priv->bitmap_highest_prio = 0;
+       skb_queue_head_init(&priv->q_highest_prio);
+
+       return 0;
+
+}
+
+static void
+sch_tegra_pfifo_fast_reset
+       (struct Qdisc *qdisc)
+{
+       struct sch_tegra_pfifo_fast_priv *priv = qdisc_priv(qdisc);
+
+       SCH_TEGRA_PR_INFO("%s\n", __func__);
+
+       /* subclass sch_tegra_pfifo_fast_ops -> reset() */
+       __qdisc_reset_queue(qdisc, &priv->q_highest_prio);
+       if (priv->bitmap_highest_prio) {
+               atomic_dec(&sch_tegra_pfifo_fast_highest_prio);
+               priv->bitmap_highest_prio = 0;
+       }
+       qdisc->qstats.backlog = 0;
+       qdisc->q.qlen = 0;
+
+       /* superclass pfifo_fast_ops -> reset() */
+       if (pfifo_fast_ops.reset)
+               pfifo_fast_ops.reset(qdisc);
+       return;
+
+}
+
+static int
+sch_tegra_pfifo_fast_dump
+       (struct Qdisc *qdisc, struct sk_buff *skb)
+{
+       SCH_TEGRA_PR_INFO("%s\n", __func__);
+
+       /* subclass sch_tegra_pfifo_fast_ops -> dump() */
+
+       /* superclass pfifo_fast_ops -> dump() */
+       if (pfifo_fast_ops.dump)
+               return pfifo_fast_ops.dump(qdisc, skb);
+       return 0;
+
+}
+
+/*static*/ struct Qdisc_ops sch_tegra_pfifo_fast_ops __read_mostly = {
+       .id             =       "tegra_pfifo",
+       .priv_size      =       sizeof(struct sch_tegra_pfifo_fast_priv),
+       .enqueue        =       sch_tegra_pfifo_fast_enqueue,
+       .dequeue        =       sch_tegra_pfifo_fast_dequeue,
+       .peek           =       sch_tegra_pfifo_fast_peek,
+       .init           =       sch_tegra_pfifo_fast_init,
+       .reset          =       sch_tegra_pfifo_fast_reset,
+       .dump           =       sch_tegra_pfifo_fast_dump,
+       .owner          =       THIS_MODULE,
+};
+EXPORT_SYMBOL(sch_tegra_pfifo_fast_ops);
+
+static int __init sch_tegra_init(void)
+{
+       int err;
+
+       pr_info("%s\n", __func__);
+
+       err = register_qdisc(&sch_tegra_pfifo_fast_ops);
+       if (err < 0) {
+               pr_err("%s: failed to register qdisc: %d\n", __func__, err);
+               return -ENODEV;
+       }
+
+       return 0;
+
+}
+
+static void __exit sch_tegra_exit(void)
+{
+       pr_info("%s\n", __func__);
+
+       unregister_qdisc(&sch_tegra_pfifo_fast_ops);
+
+}
+
+module_init(sch_tegra_init);
+module_exit(sch_tegra_exit);
+
+MODULE_LICENSE("GPL v2");