Vinicius Costa Gomes | 5a781cc | 2018-09-28 17:59:43 -0700 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | |
| 3 | /* net/sched/sch_taprio.c Time Aware Priority Scheduler |
| 4 | * |
| 5 | * Authors: Vinicius Costa Gomes <vinicius.gomes@intel.com> |
| 6 | * |
| 7 | */ |
| 8 | |
| 9 | #include <linux/types.h> |
| 10 | #include <linux/slab.h> |
| 11 | #include <linux/kernel.h> |
| 12 | #include <linux/string.h> |
| 13 | #include <linux/list.h> |
| 14 | #include <linux/errno.h> |
| 15 | #include <linux/skbuff.h> |
| 16 | #include <linux/module.h> |
| 17 | #include <linux/spinlock.h> |
| 18 | #include <net/netlink.h> |
| 19 | #include <net/pkt_sched.h> |
| 20 | #include <net/pkt_cls.h> |
| 21 | #include <net/sch_generic.h> |
| 22 | |
| 23 | #define TAPRIO_ALL_GATES_OPEN -1 |
| 24 | |
| 25 | struct sched_entry { |
| 26 | struct list_head list; |
| 27 | |
| 28 | /* The instant that this entry "closes" and the next one |
| 29 | * should open, the qdisc will make some effort so that no |
| 30 | * packet leaves after this time. |
| 31 | */ |
| 32 | ktime_t close_time; |
| 33 | atomic_t budget; |
| 34 | int index; |
| 35 | u32 gate_mask; |
| 36 | u32 interval; |
| 37 | u8 command; |
| 38 | }; |
| 39 | |
| 40 | struct taprio_sched { |
| 41 | struct Qdisc **qdiscs; |
| 42 | struct Qdisc *root; |
| 43 | s64 base_time; |
| 44 | int clockid; |
| 45 | int picos_per_byte; /* Using picoseconds because for 10Gbps+ |
| 46 | * speeds it's sub-nanoseconds per byte |
| 47 | */ |
| 48 | size_t num_entries; |
| 49 | |
| 50 | /* Protects the update side of the RCU protected current_entry */ |
| 51 | spinlock_t current_entry_lock; |
| 52 | struct sched_entry __rcu *current_entry; |
| 53 | struct list_head entries; |
| 54 | ktime_t (*get_time)(void); |
| 55 | struct hrtimer advance_timer; |
| 56 | }; |
| 57 | |
| 58 | static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, |
| 59 | struct sk_buff **to_free) |
| 60 | { |
| 61 | struct taprio_sched *q = qdisc_priv(sch); |
| 62 | struct Qdisc *child; |
| 63 | int queue; |
| 64 | |
| 65 | queue = skb_get_queue_mapping(skb); |
| 66 | |
| 67 | child = q->qdiscs[queue]; |
| 68 | if (unlikely(!child)) |
| 69 | return qdisc_drop(skb, sch, to_free); |
| 70 | |
| 71 | qdisc_qstats_backlog_inc(sch, skb); |
| 72 | sch->q.qlen++; |
| 73 | |
| 74 | return qdisc_enqueue(skb, child, to_free); |
| 75 | } |
| 76 | |
| 77 | static struct sk_buff *taprio_peek(struct Qdisc *sch) |
| 78 | { |
| 79 | struct taprio_sched *q = qdisc_priv(sch); |
| 80 | struct net_device *dev = qdisc_dev(sch); |
| 81 | struct sched_entry *entry; |
| 82 | struct sk_buff *skb; |
| 83 | u32 gate_mask; |
| 84 | int i; |
| 85 | |
| 86 | rcu_read_lock(); |
| 87 | entry = rcu_dereference(q->current_entry); |
| 88 | gate_mask = entry ? entry->gate_mask : -1; |
| 89 | rcu_read_unlock(); |
| 90 | |
| 91 | if (!gate_mask) |
| 92 | return NULL; |
| 93 | |
| 94 | for (i = 0; i < dev->num_tx_queues; i++) { |
| 95 | struct Qdisc *child = q->qdiscs[i]; |
| 96 | int prio; |
| 97 | u8 tc; |
| 98 | |
| 99 | if (unlikely(!child)) |
| 100 | continue; |
| 101 | |
| 102 | skb = child->ops->peek(child); |
| 103 | if (!skb) |
| 104 | continue; |
| 105 | |
| 106 | prio = skb->priority; |
| 107 | tc = netdev_get_prio_tc_map(dev, prio); |
| 108 | |
| 109 | if (!(gate_mask & BIT(tc))) |
| 110 | return NULL; |
| 111 | |
| 112 | return skb; |
| 113 | } |
| 114 | |
| 115 | return NULL; |
| 116 | } |
| 117 | |
| 118 | static inline int length_to_duration(struct taprio_sched *q, int len) |
| 119 | { |
| 120 | return (len * q->picos_per_byte) / 1000; |
| 121 | } |
| 122 | |
| 123 | static struct sk_buff *taprio_dequeue(struct Qdisc *sch) |
| 124 | { |
| 125 | struct taprio_sched *q = qdisc_priv(sch); |
| 126 | struct net_device *dev = qdisc_dev(sch); |
| 127 | struct sched_entry *entry; |
| 128 | struct sk_buff *skb; |
| 129 | u32 gate_mask; |
| 130 | int i; |
| 131 | |
| 132 | rcu_read_lock(); |
| 133 | entry = rcu_dereference(q->current_entry); |
| 134 | /* if there's no entry, it means that the schedule didn't |
| 135 | * start yet, so force all gates to be open, this is in |
| 136 | * accordance to IEEE 802.1Qbv-2015 Section 8.6.9.4.5 |
| 137 | * "AdminGateSates" |
| 138 | */ |
| 139 | gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN; |
| 140 | rcu_read_unlock(); |
| 141 | |
| 142 | if (!gate_mask) |
| 143 | return NULL; |
| 144 | |
| 145 | for (i = 0; i < dev->num_tx_queues; i++) { |
| 146 | struct Qdisc *child = q->qdiscs[i]; |
| 147 | ktime_t guard; |
| 148 | int prio; |
| 149 | int len; |
| 150 | u8 tc; |
| 151 | |
| 152 | if (unlikely(!child)) |
| 153 | continue; |
| 154 | |
| 155 | skb = child->ops->peek(child); |
| 156 | if (!skb) |
| 157 | continue; |
| 158 | |
| 159 | prio = skb->priority; |
| 160 | tc = netdev_get_prio_tc_map(dev, prio); |
| 161 | |
| 162 | if (!(gate_mask & BIT(tc))) |
| 163 | continue; |
| 164 | |
| 165 | len = qdisc_pkt_len(skb); |
| 166 | guard = ktime_add_ns(q->get_time(), |
| 167 | length_to_duration(q, len)); |
| 168 | |
| 169 | /* In the case that there's no gate entry, there's no |
| 170 | * guard band ... |
| 171 | */ |
| 172 | if (gate_mask != TAPRIO_ALL_GATES_OPEN && |
| 173 | ktime_after(guard, entry->close_time)) |
| 174 | return NULL; |
| 175 | |
| 176 | /* ... and no budget. */ |
| 177 | if (gate_mask != TAPRIO_ALL_GATES_OPEN && |
| 178 | atomic_sub_return(len, &entry->budget) < 0) |
| 179 | return NULL; |
| 180 | |
| 181 | skb = child->ops->dequeue(child); |
| 182 | if (unlikely(!skb)) |
| 183 | return NULL; |
| 184 | |
| 185 | qdisc_bstats_update(sch, skb); |
| 186 | qdisc_qstats_backlog_dec(sch, skb); |
| 187 | sch->q.qlen--; |
| 188 | |
| 189 | return skb; |
| 190 | } |
| 191 | |
| 192 | return NULL; |
| 193 | } |
| 194 | |
| 195 | static bool should_restart_cycle(const struct taprio_sched *q, |
| 196 | const struct sched_entry *entry) |
| 197 | { |
| 198 | WARN_ON(!entry); |
| 199 | |
| 200 | return list_is_last(&entry->list, &q->entries); |
| 201 | } |
| 202 | |
| 203 | static enum hrtimer_restart advance_sched(struct hrtimer *timer) |
| 204 | { |
| 205 | struct taprio_sched *q = container_of(timer, struct taprio_sched, |
| 206 | advance_timer); |
| 207 | struct sched_entry *entry, *next; |
| 208 | struct Qdisc *sch = q->root; |
| 209 | ktime_t close_time; |
| 210 | |
| 211 | spin_lock(&q->current_entry_lock); |
| 212 | entry = rcu_dereference_protected(q->current_entry, |
| 213 | lockdep_is_held(&q->current_entry_lock)); |
| 214 | |
| 215 | /* This is the case that it's the first time that the schedule |
| 216 | * runs, so it only happens once per schedule. The first entry |
| 217 | * is pre-calculated during the schedule initialization. |
| 218 | */ |
| 219 | if (unlikely(!entry)) { |
| 220 | next = list_first_entry(&q->entries, struct sched_entry, |
| 221 | list); |
| 222 | close_time = next->close_time; |
| 223 | goto first_run; |
| 224 | } |
| 225 | |
| 226 | if (should_restart_cycle(q, entry)) |
| 227 | next = list_first_entry(&q->entries, struct sched_entry, |
| 228 | list); |
| 229 | else |
| 230 | next = list_next_entry(entry, list); |
| 231 | |
| 232 | close_time = ktime_add_ns(entry->close_time, next->interval); |
| 233 | |
| 234 | next->close_time = close_time; |
| 235 | atomic_set(&next->budget, |
| 236 | (next->interval * 1000) / q->picos_per_byte); |
| 237 | |
| 238 | first_run: |
| 239 | rcu_assign_pointer(q->current_entry, next); |
| 240 | spin_unlock(&q->current_entry_lock); |
| 241 | |
| 242 | hrtimer_set_expires(&q->advance_timer, close_time); |
| 243 | |
| 244 | rcu_read_lock(); |
| 245 | __netif_schedule(sch); |
| 246 | rcu_read_unlock(); |
| 247 | |
| 248 | return HRTIMER_RESTART; |
| 249 | } |
| 250 | |
| 251 | static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { |
| 252 | [TCA_TAPRIO_SCHED_ENTRY_INDEX] = { .type = NLA_U32 }, |
| 253 | [TCA_TAPRIO_SCHED_ENTRY_CMD] = { .type = NLA_U8 }, |
| 254 | [TCA_TAPRIO_SCHED_ENTRY_GATE_MASK] = { .type = NLA_U32 }, |
| 255 | [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 }, |
| 256 | }; |
| 257 | |
| 258 | static const struct nla_policy entry_list_policy[TCA_TAPRIO_SCHED_MAX + 1] = { |
| 259 | [TCA_TAPRIO_SCHED_ENTRY] = { .type = NLA_NESTED }, |
| 260 | }; |
| 261 | |
| 262 | static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { |
| 263 | [TCA_TAPRIO_ATTR_PRIOMAP] = { |
| 264 | .len = sizeof(struct tc_mqprio_qopt) |
| 265 | }, |
| 266 | [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] = { .type = NLA_NESTED }, |
| 267 | [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 }, |
| 268 | [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED }, |
| 269 | [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 }, |
| 270 | }; |
| 271 | |
| 272 | static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry, |
| 273 | struct netlink_ext_ack *extack) |
| 274 | { |
| 275 | u32 interval = 0; |
| 276 | |
| 277 | if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD]) |
| 278 | entry->command = nla_get_u8( |
| 279 | tb[TCA_TAPRIO_SCHED_ENTRY_CMD]); |
| 280 | |
| 281 | if (tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]) |
| 282 | entry->gate_mask = nla_get_u32( |
| 283 | tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]); |
| 284 | |
| 285 | if (tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]) |
| 286 | interval = nla_get_u32( |
| 287 | tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]); |
| 288 | |
| 289 | if (interval == 0) { |
| 290 | NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry"); |
| 291 | return -EINVAL; |
| 292 | } |
| 293 | |
| 294 | entry->interval = interval; |
| 295 | |
| 296 | return 0; |
| 297 | } |
| 298 | |
| 299 | static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry, |
| 300 | int index, struct netlink_ext_ack *extack) |
| 301 | { |
| 302 | struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { }; |
| 303 | int err; |
| 304 | |
| 305 | err = nla_parse_nested(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, n, |
| 306 | entry_policy, NULL); |
| 307 | if (err < 0) { |
| 308 | NL_SET_ERR_MSG(extack, "Could not parse nested entry"); |
| 309 | return -EINVAL; |
| 310 | } |
| 311 | |
| 312 | entry->index = index; |
| 313 | |
| 314 | return fill_sched_entry(tb, entry, extack); |
| 315 | } |
| 316 | |
| 317 | /* Returns the number of entries in case of success */ |
| 318 | static int parse_sched_single_entry(struct nlattr *n, |
| 319 | struct taprio_sched *q, |
| 320 | struct netlink_ext_ack *extack) |
| 321 | { |
| 322 | struct nlattr *tb_entry[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { }; |
| 323 | struct nlattr *tb_list[TCA_TAPRIO_SCHED_MAX + 1] = { }; |
| 324 | struct sched_entry *entry; |
| 325 | bool found = false; |
| 326 | u32 index; |
| 327 | int err; |
| 328 | |
| 329 | err = nla_parse_nested(tb_list, TCA_TAPRIO_SCHED_MAX, |
| 330 | n, entry_list_policy, NULL); |
| 331 | if (err < 0) { |
| 332 | NL_SET_ERR_MSG(extack, "Could not parse nested entry"); |
| 333 | return -EINVAL; |
| 334 | } |
| 335 | |
| 336 | if (!tb_list[TCA_TAPRIO_SCHED_ENTRY]) { |
| 337 | NL_SET_ERR_MSG(extack, "Single-entry must include an entry"); |
| 338 | return -EINVAL; |
| 339 | } |
| 340 | |
| 341 | err = nla_parse_nested(tb_entry, TCA_TAPRIO_SCHED_ENTRY_MAX, |
| 342 | tb_list[TCA_TAPRIO_SCHED_ENTRY], |
| 343 | entry_policy, NULL); |
| 344 | if (err < 0) { |
| 345 | NL_SET_ERR_MSG(extack, "Could not parse nested entry"); |
| 346 | return -EINVAL; |
| 347 | } |
| 348 | |
| 349 | if (!tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]) { |
| 350 | NL_SET_ERR_MSG(extack, "Entry must specify an index\n"); |
| 351 | return -EINVAL; |
| 352 | } |
| 353 | |
| 354 | index = nla_get_u32(tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]); |
| 355 | if (index >= q->num_entries) { |
| 356 | NL_SET_ERR_MSG(extack, "Index for single entry exceeds number of entries in schedule"); |
| 357 | return -EINVAL; |
| 358 | } |
| 359 | |
| 360 | list_for_each_entry(entry, &q->entries, list) { |
| 361 | if (entry->index == index) { |
| 362 | found = true; |
| 363 | break; |
| 364 | } |
| 365 | } |
| 366 | |
| 367 | if (!found) { |
| 368 | NL_SET_ERR_MSG(extack, "Could not find entry"); |
| 369 | return -ENOENT; |
| 370 | } |
| 371 | |
| 372 | err = fill_sched_entry(tb_entry, entry, extack); |
| 373 | if (err < 0) |
| 374 | return err; |
| 375 | |
| 376 | return q->num_entries; |
| 377 | } |
| 378 | |
| 379 | static int parse_sched_list(struct nlattr *list, |
| 380 | struct taprio_sched *q, |
| 381 | struct netlink_ext_ack *extack) |
| 382 | { |
| 383 | struct nlattr *n; |
| 384 | int err, rem; |
| 385 | int i = 0; |
| 386 | |
| 387 | if (!list) |
| 388 | return -EINVAL; |
| 389 | |
| 390 | nla_for_each_nested(n, list, rem) { |
| 391 | struct sched_entry *entry; |
| 392 | |
| 393 | if (nla_type(n) != TCA_TAPRIO_SCHED_ENTRY) { |
| 394 | NL_SET_ERR_MSG(extack, "Attribute is not of type 'entry'"); |
| 395 | continue; |
| 396 | } |
| 397 | |
| 398 | entry = kzalloc(sizeof(*entry), GFP_KERNEL); |
| 399 | if (!entry) { |
| 400 | NL_SET_ERR_MSG(extack, "Not enough memory for entry"); |
| 401 | return -ENOMEM; |
| 402 | } |
| 403 | |
| 404 | err = parse_sched_entry(n, entry, i, extack); |
| 405 | if (err < 0) { |
| 406 | kfree(entry); |
| 407 | return err; |
| 408 | } |
| 409 | |
| 410 | list_add_tail(&entry->list, &q->entries); |
| 411 | i++; |
| 412 | } |
| 413 | |
| 414 | q->num_entries = i; |
| 415 | |
| 416 | return i; |
| 417 | } |
| 418 | |
| 419 | /* Returns the number of entries in case of success */ |
| 420 | static int parse_taprio_opt(struct nlattr **tb, struct taprio_sched *q, |
| 421 | struct netlink_ext_ack *extack) |
| 422 | { |
| 423 | int err = 0; |
| 424 | int clockid; |
| 425 | |
| 426 | if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] && |
| 427 | tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) |
| 428 | return -EINVAL; |
| 429 | |
| 430 | if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] && q->num_entries == 0) |
| 431 | return -EINVAL; |
| 432 | |
| 433 | if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) |
| 434 | return -EINVAL; |
| 435 | |
| 436 | if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]) |
| 437 | q->base_time = nla_get_s64( |
| 438 | tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]); |
| 439 | |
| 440 | if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { |
| 441 | clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]); |
| 442 | |
| 443 | /* We only support static clockids and we don't allow |
| 444 | * for it to be modified after the first init. |
| 445 | */ |
| 446 | if (clockid < 0 || (q->clockid != -1 && q->clockid != clockid)) |
| 447 | return -EINVAL; |
| 448 | |
| 449 | q->clockid = clockid; |
| 450 | } |
| 451 | |
| 452 | if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]) |
| 453 | err = parse_sched_list( |
| 454 | tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], q, extack); |
| 455 | else if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) |
| 456 | err = parse_sched_single_entry( |
| 457 | tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY], q, extack); |
| 458 | |
| 459 | /* parse_sched_* return the number of entries in the schedule, |
| 460 | * a schedule with zero entries is an error. |
| 461 | */ |
| 462 | if (err == 0) { |
| 463 | NL_SET_ERR_MSG(extack, "The schedule should contain at least one entry"); |
| 464 | return -EINVAL; |
| 465 | } |
| 466 | |
| 467 | return err; |
| 468 | } |
| 469 | |
| 470 | static int taprio_parse_mqprio_opt(struct net_device *dev, |
| 471 | struct tc_mqprio_qopt *qopt, |
| 472 | struct netlink_ext_ack *extack) |
| 473 | { |
| 474 | int i, j; |
| 475 | |
| 476 | if (!qopt) { |
| 477 | NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary"); |
| 478 | return -EINVAL; |
| 479 | } |
| 480 | |
| 481 | /* Verify num_tc is not out of max range */ |
| 482 | if (qopt->num_tc > TC_MAX_QUEUE) { |
| 483 | NL_SET_ERR_MSG(extack, "Number of traffic classes is outside valid range"); |
| 484 | return -EINVAL; |
| 485 | } |
| 486 | |
| 487 | /* taprio imposes that traffic classes map 1:n to tx queues */ |
| 488 | if (qopt->num_tc > dev->num_tx_queues) { |
| 489 | NL_SET_ERR_MSG(extack, "Number of traffic classes is greater than number of HW queues"); |
| 490 | return -EINVAL; |
| 491 | } |
| 492 | |
| 493 | /* Verify priority mapping uses valid tcs */ |
| 494 | for (i = 0; i < TC_BITMASK + 1; i++) { |
| 495 | if (qopt->prio_tc_map[i] >= qopt->num_tc) { |
| 496 | NL_SET_ERR_MSG(extack, "Invalid traffic class in priority to traffic class mapping"); |
| 497 | return -EINVAL; |
| 498 | } |
| 499 | } |
| 500 | |
| 501 | for (i = 0; i < qopt->num_tc; i++) { |
| 502 | unsigned int last = qopt->offset[i] + qopt->count[i]; |
| 503 | |
| 504 | /* Verify the queue count is in tx range being equal to the |
| 505 | * real_num_tx_queues indicates the last queue is in use. |
| 506 | */ |
| 507 | if (qopt->offset[i] >= dev->num_tx_queues || |
| 508 | !qopt->count[i] || |
| 509 | last > dev->real_num_tx_queues) { |
| 510 | NL_SET_ERR_MSG(extack, "Invalid queue in traffic class to queue mapping"); |
| 511 | return -EINVAL; |
| 512 | } |
| 513 | |
| 514 | /* Verify that the offset and counts do not overlap */ |
| 515 | for (j = i + 1; j < qopt->num_tc; j++) { |
| 516 | if (last > qopt->offset[j]) { |
| 517 | NL_SET_ERR_MSG(extack, "Detected overlap in the traffic class to queue mapping"); |
| 518 | return -EINVAL; |
| 519 | } |
| 520 | } |
| 521 | } |
| 522 | |
| 523 | return 0; |
| 524 | } |
| 525 | |
| 526 | static ktime_t taprio_get_start_time(struct Qdisc *sch) |
| 527 | { |
| 528 | struct taprio_sched *q = qdisc_priv(sch); |
| 529 | struct sched_entry *entry; |
| 530 | ktime_t now, base, cycle; |
| 531 | s64 n; |
| 532 | |
| 533 | base = ns_to_ktime(q->base_time); |
| 534 | cycle = 0; |
| 535 | |
| 536 | /* Calculate the cycle_time, by summing all the intervals. |
| 537 | */ |
| 538 | list_for_each_entry(entry, &q->entries, list) |
| 539 | cycle = ktime_add_ns(cycle, entry->interval); |
| 540 | |
| 541 | if (!cycle) |
| 542 | return base; |
| 543 | |
| 544 | now = q->get_time(); |
| 545 | |
| 546 | if (ktime_after(base, now)) |
| 547 | return base; |
| 548 | |
| 549 | /* Schedule the start time for the beginning of the next |
| 550 | * cycle. |
| 551 | */ |
| 552 | n = div64_s64(ktime_sub_ns(now, base), cycle); |
| 553 | |
| 554 | return ktime_add_ns(base, (n + 1) * cycle); |
| 555 | } |
| 556 | |
| 557 | static void taprio_start_sched(struct Qdisc *sch, ktime_t start) |
| 558 | { |
| 559 | struct taprio_sched *q = qdisc_priv(sch); |
| 560 | struct sched_entry *first; |
| 561 | unsigned long flags; |
| 562 | |
| 563 | spin_lock_irqsave(&q->current_entry_lock, flags); |
| 564 | |
| 565 | first = list_first_entry(&q->entries, struct sched_entry, |
| 566 | list); |
| 567 | |
| 568 | first->close_time = ktime_add_ns(start, first->interval); |
| 569 | atomic_set(&first->budget, |
| 570 | (first->interval * 1000) / q->picos_per_byte); |
| 571 | rcu_assign_pointer(q->current_entry, NULL); |
| 572 | |
| 573 | spin_unlock_irqrestore(&q->current_entry_lock, flags); |
| 574 | |
| 575 | hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS); |
| 576 | } |
| 577 | |
| 578 | static int taprio_change(struct Qdisc *sch, struct nlattr *opt, |
| 579 | struct netlink_ext_ack *extack) |
| 580 | { |
| 581 | struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { }; |
| 582 | struct taprio_sched *q = qdisc_priv(sch); |
| 583 | struct net_device *dev = qdisc_dev(sch); |
| 584 | struct tc_mqprio_qopt *mqprio = NULL; |
| 585 | struct ethtool_link_ksettings ecmd; |
| 586 | int i, err, size; |
| 587 | s64 link_speed; |
| 588 | ktime_t start; |
| 589 | |
| 590 | err = nla_parse_nested(tb, TCA_TAPRIO_ATTR_MAX, opt, |
| 591 | taprio_policy, extack); |
| 592 | if (err < 0) |
| 593 | return err; |
| 594 | |
| 595 | err = -EINVAL; |
| 596 | if (tb[TCA_TAPRIO_ATTR_PRIOMAP]) |
| 597 | mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]); |
| 598 | |
| 599 | err = taprio_parse_mqprio_opt(dev, mqprio, extack); |
| 600 | if (err < 0) |
| 601 | return err; |
| 602 | |
| 603 | /* A schedule with less than one entry is an error */ |
| 604 | size = parse_taprio_opt(tb, q, extack); |
| 605 | if (size < 0) |
| 606 | return size; |
| 607 | |
| 608 | hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS); |
| 609 | q->advance_timer.function = advance_sched; |
| 610 | |
| 611 | switch (q->clockid) { |
| 612 | case CLOCK_REALTIME: |
| 613 | q->get_time = ktime_get_real; |
| 614 | break; |
| 615 | case CLOCK_MONOTONIC: |
| 616 | q->get_time = ktime_get; |
| 617 | break; |
| 618 | case CLOCK_BOOTTIME: |
| 619 | q->get_time = ktime_get_boottime; |
| 620 | break; |
| 621 | case CLOCK_TAI: |
| 622 | q->get_time = ktime_get_clocktai; |
| 623 | break; |
| 624 | default: |
| 625 | return -ENOTSUPP; |
| 626 | } |
| 627 | |
| 628 | for (i = 0; i < dev->num_tx_queues; i++) { |
| 629 | struct netdev_queue *dev_queue; |
| 630 | struct Qdisc *qdisc; |
| 631 | |
| 632 | dev_queue = netdev_get_tx_queue(dev, i); |
| 633 | qdisc = qdisc_create_dflt(dev_queue, |
| 634 | &pfifo_qdisc_ops, |
| 635 | TC_H_MAKE(TC_H_MAJ(sch->handle), |
| 636 | TC_H_MIN(i + 1)), |
| 637 | extack); |
| 638 | if (!qdisc) |
| 639 | return -ENOMEM; |
| 640 | |
| 641 | if (i < dev->real_num_tx_queues) |
| 642 | qdisc_hash_add(qdisc, false); |
| 643 | |
| 644 | q->qdiscs[i] = qdisc; |
| 645 | } |
| 646 | |
| 647 | if (mqprio) { |
| 648 | netdev_set_num_tc(dev, mqprio->num_tc); |
| 649 | for (i = 0; i < mqprio->num_tc; i++) |
| 650 | netdev_set_tc_queue(dev, i, |
| 651 | mqprio->count[i], |
| 652 | mqprio->offset[i]); |
| 653 | |
| 654 | /* Always use supplied priority mappings */ |
| 655 | for (i = 0; i < TC_BITMASK + 1; i++) |
| 656 | netdev_set_prio_tc_map(dev, i, |
| 657 | mqprio->prio_tc_map[i]); |
| 658 | } |
| 659 | |
| 660 | if (!__ethtool_get_link_ksettings(dev, &ecmd)) |
| 661 | link_speed = ecmd.base.speed; |
| 662 | else |
| 663 | link_speed = SPEED_1000; |
| 664 | |
| 665 | q->picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8, |
| 666 | link_speed * 1000 * 1000); |
| 667 | |
| 668 | start = taprio_get_start_time(sch); |
| 669 | if (!start) |
| 670 | return 0; |
| 671 | |
| 672 | taprio_start_sched(sch, start); |
| 673 | |
| 674 | return 0; |
| 675 | } |
| 676 | |
| 677 | static void taprio_destroy(struct Qdisc *sch) |
| 678 | { |
| 679 | struct taprio_sched *q = qdisc_priv(sch); |
| 680 | struct net_device *dev = qdisc_dev(sch); |
| 681 | struct sched_entry *entry, *n; |
| 682 | unsigned int i; |
| 683 | |
| 684 | hrtimer_cancel(&q->advance_timer); |
| 685 | |
| 686 | if (q->qdiscs) { |
| 687 | for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++) |
| 688 | qdisc_put(q->qdiscs[i]); |
| 689 | |
| 690 | kfree(q->qdiscs); |
| 691 | } |
| 692 | q->qdiscs = NULL; |
| 693 | |
| 694 | netdev_set_num_tc(dev, 0); |
| 695 | |
| 696 | list_for_each_entry_safe(entry, n, &q->entries, list) { |
| 697 | list_del(&entry->list); |
| 698 | kfree(entry); |
| 699 | } |
| 700 | } |
| 701 | |
| 702 | static int taprio_init(struct Qdisc *sch, struct nlattr *opt, |
| 703 | struct netlink_ext_ack *extack) |
| 704 | { |
| 705 | struct taprio_sched *q = qdisc_priv(sch); |
| 706 | struct net_device *dev = qdisc_dev(sch); |
| 707 | |
| 708 | INIT_LIST_HEAD(&q->entries); |
| 709 | spin_lock_init(&q->current_entry_lock); |
| 710 | |
| 711 | /* We may overwrite the configuration later */ |
| 712 | hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS); |
| 713 | |
| 714 | q->root = sch; |
| 715 | |
| 716 | /* We only support static clockids. Use an invalid value as default |
| 717 | * and get the valid one on taprio_change(). |
| 718 | */ |
| 719 | q->clockid = -1; |
| 720 | |
| 721 | if (sch->parent != TC_H_ROOT) |
| 722 | return -EOPNOTSUPP; |
| 723 | |
| 724 | if (!netif_is_multiqueue(dev)) |
| 725 | return -EOPNOTSUPP; |
| 726 | |
| 727 | /* pre-allocate qdisc, attachment can't fail */ |
| 728 | q->qdiscs = kcalloc(dev->num_tx_queues, |
| 729 | sizeof(q->qdiscs[0]), |
| 730 | GFP_KERNEL); |
| 731 | |
| 732 | if (!q->qdiscs) |
| 733 | return -ENOMEM; |
| 734 | |
| 735 | if (!opt) |
| 736 | return -EINVAL; |
| 737 | |
| 738 | return taprio_change(sch, opt, extack); |
| 739 | } |
| 740 | |
| 741 | static struct netdev_queue *taprio_queue_get(struct Qdisc *sch, |
| 742 | unsigned long cl) |
| 743 | { |
| 744 | struct net_device *dev = qdisc_dev(sch); |
| 745 | unsigned long ntx = cl - 1; |
| 746 | |
| 747 | if (ntx >= dev->num_tx_queues) |
| 748 | return NULL; |
| 749 | |
| 750 | return netdev_get_tx_queue(dev, ntx); |
| 751 | } |
| 752 | |
| 753 | static int taprio_graft(struct Qdisc *sch, unsigned long cl, |
| 754 | struct Qdisc *new, struct Qdisc **old, |
| 755 | struct netlink_ext_ack *extack) |
| 756 | { |
| 757 | struct taprio_sched *q = qdisc_priv(sch); |
| 758 | struct net_device *dev = qdisc_dev(sch); |
| 759 | struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); |
| 760 | |
| 761 | if (!dev_queue) |
| 762 | return -EINVAL; |
| 763 | |
| 764 | if (dev->flags & IFF_UP) |
| 765 | dev_deactivate(dev); |
| 766 | |
| 767 | *old = q->qdiscs[cl - 1]; |
| 768 | q->qdiscs[cl - 1] = new; |
| 769 | |
| 770 | if (new) |
| 771 | new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; |
| 772 | |
| 773 | if (dev->flags & IFF_UP) |
| 774 | dev_activate(dev); |
| 775 | |
| 776 | return 0; |
| 777 | } |
| 778 | |
| 779 | static int dump_entry(struct sk_buff *msg, |
| 780 | const struct sched_entry *entry) |
| 781 | { |
| 782 | struct nlattr *item; |
| 783 | |
| 784 | item = nla_nest_start(msg, TCA_TAPRIO_SCHED_ENTRY); |
| 785 | if (!item) |
| 786 | return -ENOSPC; |
| 787 | |
| 788 | if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INDEX, entry->index)) |
| 789 | goto nla_put_failure; |
| 790 | |
| 791 | if (nla_put_u8(msg, TCA_TAPRIO_SCHED_ENTRY_CMD, entry->command)) |
| 792 | goto nla_put_failure; |
| 793 | |
| 794 | if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_GATE_MASK, |
| 795 | entry->gate_mask)) |
| 796 | goto nla_put_failure; |
| 797 | |
| 798 | if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INTERVAL, |
| 799 | entry->interval)) |
| 800 | goto nla_put_failure; |
| 801 | |
| 802 | return nla_nest_end(msg, item); |
| 803 | |
| 804 | nla_put_failure: |
| 805 | nla_nest_cancel(msg, item); |
| 806 | return -1; |
| 807 | } |
| 808 | |
| 809 | static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) |
| 810 | { |
| 811 | struct taprio_sched *q = qdisc_priv(sch); |
| 812 | struct net_device *dev = qdisc_dev(sch); |
| 813 | struct tc_mqprio_qopt opt = { 0 }; |
| 814 | struct nlattr *nest, *entry_list; |
| 815 | struct sched_entry *entry; |
| 816 | unsigned int i; |
| 817 | |
| 818 | opt.num_tc = netdev_get_num_tc(dev); |
| 819 | memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map)); |
| 820 | |
| 821 | for (i = 0; i < netdev_get_num_tc(dev); i++) { |
| 822 | opt.count[i] = dev->tc_to_txq[i].count; |
| 823 | opt.offset[i] = dev->tc_to_txq[i].offset; |
| 824 | } |
| 825 | |
| 826 | nest = nla_nest_start(skb, TCA_OPTIONS); |
| 827 | if (!nest) |
| 828 | return -ENOSPC; |
| 829 | |
| 830 | if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt)) |
| 831 | goto options_error; |
| 832 | |
| 833 | if (nla_put_s64(skb, TCA_TAPRIO_ATTR_SCHED_BASE_TIME, |
| 834 | q->base_time, TCA_TAPRIO_PAD)) |
| 835 | goto options_error; |
| 836 | |
| 837 | if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid)) |
| 838 | goto options_error; |
| 839 | |
| 840 | entry_list = nla_nest_start(skb, TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST); |
| 841 | if (!entry_list) |
| 842 | goto options_error; |
| 843 | |
| 844 | list_for_each_entry(entry, &q->entries, list) { |
| 845 | if (dump_entry(skb, entry) < 0) |
| 846 | goto options_error; |
| 847 | } |
| 848 | |
| 849 | nla_nest_end(skb, entry_list); |
| 850 | |
| 851 | return nla_nest_end(skb, nest); |
| 852 | |
| 853 | options_error: |
| 854 | nla_nest_cancel(skb, nest); |
| 855 | return -1; |
| 856 | } |
| 857 | |
| 858 | static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl) |
| 859 | { |
| 860 | struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); |
| 861 | |
| 862 | if (!dev_queue) |
| 863 | return NULL; |
| 864 | |
| 865 | return dev_queue->qdisc_sleeping; |
| 866 | } |
| 867 | |
| 868 | static unsigned long taprio_find(struct Qdisc *sch, u32 classid) |
| 869 | { |
| 870 | unsigned int ntx = TC_H_MIN(classid); |
| 871 | |
| 872 | if (!taprio_queue_get(sch, ntx)) |
| 873 | return 0; |
| 874 | return ntx; |
| 875 | } |
| 876 | |
| 877 | static int taprio_dump_class(struct Qdisc *sch, unsigned long cl, |
| 878 | struct sk_buff *skb, struct tcmsg *tcm) |
| 879 | { |
| 880 | struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); |
| 881 | |
| 882 | tcm->tcm_parent = TC_H_ROOT; |
| 883 | tcm->tcm_handle |= TC_H_MIN(cl); |
| 884 | tcm->tcm_info = dev_queue->qdisc_sleeping->handle; |
| 885 | |
| 886 | return 0; |
| 887 | } |
| 888 | |
| 889 | static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, |
| 890 | struct gnet_dump *d) |
| 891 | __releases(d->lock) |
| 892 | __acquires(d->lock) |
| 893 | { |
| 894 | struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); |
| 895 | |
| 896 | sch = dev_queue->qdisc_sleeping; |
| 897 | if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 || |
Paolo Abeni | 5dd431b | 2019-03-28 16:53:12 +0100 | [diff] [blame] | 898 | qdisc_qstats_copy(d, sch) < 0) |
Vinicius Costa Gomes | 5a781cc | 2018-09-28 17:59:43 -0700 | [diff] [blame] | 899 | return -1; |
| 900 | return 0; |
| 901 | } |
| 902 | |
| 903 | static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) |
| 904 | { |
| 905 | struct net_device *dev = qdisc_dev(sch); |
| 906 | unsigned long ntx; |
| 907 | |
| 908 | if (arg->stop) |
| 909 | return; |
| 910 | |
| 911 | arg->count = arg->skip; |
| 912 | for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) { |
| 913 | if (arg->fn(sch, ntx + 1, arg) < 0) { |
| 914 | arg->stop = 1; |
| 915 | break; |
| 916 | } |
| 917 | arg->count++; |
| 918 | } |
| 919 | } |
| 920 | |
| 921 | static struct netdev_queue *taprio_select_queue(struct Qdisc *sch, |
| 922 | struct tcmsg *tcm) |
| 923 | { |
| 924 | return taprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent)); |
| 925 | } |
| 926 | |
| 927 | static const struct Qdisc_class_ops taprio_class_ops = { |
| 928 | .graft = taprio_graft, |
| 929 | .leaf = taprio_leaf, |
| 930 | .find = taprio_find, |
| 931 | .walk = taprio_walk, |
| 932 | .dump = taprio_dump_class, |
| 933 | .dump_stats = taprio_dump_class_stats, |
| 934 | .select_queue = taprio_select_queue, |
| 935 | }; |
| 936 | |
| 937 | static struct Qdisc_ops taprio_qdisc_ops __read_mostly = { |
| 938 | .cl_ops = &taprio_class_ops, |
| 939 | .id = "taprio", |
| 940 | .priv_size = sizeof(struct taprio_sched), |
| 941 | .init = taprio_init, |
| 942 | .destroy = taprio_destroy, |
| 943 | .peek = taprio_peek, |
| 944 | .dequeue = taprio_dequeue, |
| 945 | .enqueue = taprio_enqueue, |
| 946 | .dump = taprio_dump, |
| 947 | .owner = THIS_MODULE, |
| 948 | }; |
| 949 | |
| 950 | static int __init taprio_module_init(void) |
| 951 | { |
| 952 | return register_qdisc(&taprio_qdisc_ops); |
| 953 | } |
| 954 | |
| 955 | static void __exit taprio_module_exit(void) |
| 956 | { |
| 957 | unregister_qdisc(&taprio_qdisc_ops); |
| 958 | } |
| 959 | |
| 960 | module_init(taprio_module_init); |
| 961 | module_exit(taprio_module_exit); |
| 962 | MODULE_LICENSE("GPL"); |