net/sched/sch_taprio.c

   1 // SPDX-License-Identifier: GPL-2.0
   2
   3 /* net/sched/sch_taprio.c        Time Aware Priority Scheduler
   4  *
   5  * Authors:     Vinicius Costa Gomes <vinicius.gomes@intel.com>
   6  *
   7  */
   8
   9 #include <linux/types.h>
  10 #include <linux/slab.h>
  11 #include <linux/kernel.h>
  12 #include <linux/string.h>
  13 #include <linux/list.h>
  14 #include <linux/errno.h>
  15 #include <linux/skbuff.h>
  16 #include <linux/module.h>
  17 #include <linux/spinlock.h>
  18 #include <net/netlink.h>
  19 #include <net/pkt_sched.h>
  20 #include <net/pkt_cls.h>
  21 #include <net/sch_generic.h>
  22
  23 static LIST_HEAD(taprio_list);
  24 static DEFINE_SPINLOCK(taprio_list_lock);
  25
  26 #define TAPRIO_ALL_GATES_OPEN -1
  27
  28 struct sched_entry {
  29         struct list_head list;
  30
  31         /* The instant that this entry "closes" and the next one
  32          * should open, the qdisc will make some effort so that no
  33          * packet leaves after this time.
  34          */
  35         ktime_t close_time;
  36         atomic_t budget;
  37         int index;
  38         u32 gate_mask;
  39         u32 interval;
  40         u8 command;
  41 };
  42
  43 struct taprio_sched {
  44         struct Qdisc **qdiscs;
  45         struct Qdisc *root;
  46         s64 base_time;
  47         int clockid;
  48         atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+
  49                                     * speeds it's sub-nanoseconds per byte
  50                                     */
  51         size_t num_entries;
  52
  53         /* Protects the update side of the RCU protected current_entry */
  54         spinlock_t current_entry_lock;
  55         struct sched_entry __rcu *current_entry;
  56         struct list_head entries;
  57         ktime_t (*get_time)(void);
  58         struct hrtimer advance_timer;
  59         struct list_head taprio_list;
  60 };
  61
  62 static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
  63                           struct sk_buff **to_free)
  64 {
  65         struct taprio_sched *q = qdisc_priv(sch);
  66         struct Qdisc *child;
  67         int queue;
  68
  69         queue = skb_get_queue_mapping(skb);
  70
  71         child = q->qdiscs[queue];
  72         if (unlikely(!child))
  73                 return qdisc_drop(skb, sch, to_free);
  74
  75         qdisc_qstats_backlog_inc(sch, skb);
  76         sch->q.qlen++;
  77
  78         return qdisc_enqueue(skb, child, to_free);
  79 }
  80
  81 static struct sk_buff *taprio_peek(struct Qdisc *sch)
  82 {
  83         struct taprio_sched *q = qdisc_priv(sch);
  84         struct net_device *dev = qdisc_dev(sch);
  85         struct sched_entry *entry;
  86         struct sk_buff *skb;
  87         u32 gate_mask;
  88         int i;
  89
  90         rcu_read_lock();
  91         entry = rcu_dereference(q->current_entry);
  92         gate_mask = entry ? entry->gate_mask : -1;
  93         rcu_read_unlock();
  94
  95         if (!gate_mask)
  96                 return NULL;
  97
  98         for (i = 0; i < dev->num_tx_queues; i++) {
  99                 struct Qdisc *child = q->qdiscs[i];
 100                 int prio;
 101                 u8 tc;
 102
 103                 if (unlikely(!child))
 104                         continue;
 105
 106                 skb = child->ops->peek(child);
 107                 if (!skb)
 108                         continue;
 109
 110                 prio = skb->priority;
 111                 tc = netdev_get_prio_tc_map(dev, prio);
 112
 113                 if (!(gate_mask & BIT(tc)))
 114                         return NULL;
 115
 116                 return skb;
 117         }
 118
 119         return NULL;
 120 }
 121
 122 static inline int length_to_duration(struct taprio_sched *q, int len)
 123 {
 124         return (len * atomic64_read(&q->picos_per_byte)) / 1000;
 125 }
 126
 127 static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
 128 {
 129         struct taprio_sched *q = qdisc_priv(sch);
 130         struct net_device *dev = qdisc_dev(sch);
 131         struct sched_entry *entry;
 132         struct sk_buff *skb;
 133         u32 gate_mask;
 134         int i;
 135
 136         if (atomic64_read(&q->picos_per_byte) == -1) {
 137                 WARN_ONCE(1, "taprio: dequeue() called with unknown picos per byte.");
 138                 return NULL;
 139         }
 140
 141         rcu_read_lock();
 142         entry = rcu_dereference(q->current_entry);
 143         /* if there's no entry, it means that the schedule didn't
 144          * start yet, so force all gates to be open, this is in
 145          * accordance to IEEE 802.1Qbv-2015 Section 8.6.9.4.5
 146          * "AdminGateSates"
 147          */
 148         gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
 149         rcu_read_unlock();
 150
 151         if (!gate_mask)
 152                 return NULL;
 153
 154         for (i = 0; i < dev->num_tx_queues; i++) {
 155                 struct Qdisc *child = q->qdiscs[i];
 156                 ktime_t guard;
 157                 int prio;
 158                 int len;
 159                 u8 tc;
 160
 161                 if (unlikely(!child))
 162                         continue;
 163
 164                 skb = child->ops->peek(child);
 165                 if (!skb)
 166                         continue;
 167
 168                 prio = skb->priority;
 169                 tc = netdev_get_prio_tc_map(dev, prio);
 170
 171                 if (!(gate_mask & BIT(tc)))
 172                         continue;
 173
 174                 len = qdisc_pkt_len(skb);
 175                 guard = ktime_add_ns(q->get_time(),
 176                                      length_to_duration(q, len));
 177
 178                 /* In the case that there's no gate entry, there's no
 179                  * guard band ...
 180                  */
 181                 if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
 182                     ktime_after(guard, entry->close_time))
 183                         return NULL;
 184
 185                 /* ... and no budget. */
 186                 if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
 187                     atomic_sub_return(len, &entry->budget) < 0)
 188                         return NULL;
 189
 190                 skb = child->ops->dequeue(child);
 191                 if (unlikely(!skb))
 192                         return NULL;
 193
 194                 qdisc_bstats_update(sch, skb);
 195                 qdisc_qstats_backlog_dec(sch, skb);
 196                 sch->q.qlen--;
 197
 198                 return skb;
 199         }
 200
 201         return NULL;
 202 }
 203
 204 static bool should_restart_cycle(const struct taprio_sched *q,
 205                                  const struct sched_entry *entry)
 206 {
 207         WARN_ON(!entry);
 208
 209         return list_is_last(&entry->list, &q->entries);
 210 }
 211
 212 static enum hrtimer_restart advance_sched(struct hrtimer *timer)
 213 {
 214         struct taprio_sched *q = container_of(timer, struct taprio_sched,
 215                                               advance_timer);
 216         struct sched_entry *entry, *next;
 217         struct Qdisc *sch = q->root;
 218         ktime_t close_time;
 219
 220         spin_lock(&q->current_entry_lock);
 221         entry = rcu_dereference_protected(q->current_entry,
 222                                           lockdep_is_held(&q->current_entry_lock));
 223
 224         /* This is the case that it's the first time that the schedule
 225          * runs, so it only happens once per schedule. The first entry
 226          * is pre-calculated during the schedule initialization.
 227          */
 228         if (unlikely(!entry)) {
 229                 next = list_first_entry(&q->entries, struct sched_entry,
 230                                         list);
 231                 close_time = next->close_time;
 232                 goto first_run;
 233         }
 234
 235         if (should_restart_cycle(q, entry))
 236                 next = list_first_entry(&q->entries, struct sched_entry,
 237                                         list);
 238         else
 239                 next = list_next_entry(entry, list);
 240
 241         close_time = ktime_add_ns(entry->close_time, next->interval);
 242
 243         next->close_time = close_time;
 244         atomic_set(&next->budget,
 245                    (next->interval * 1000) / atomic64_read(&q->picos_per_byte));
 246
 247 first_run:
 248         rcu_assign_pointer(q->current_entry, next);
 249         spin_unlock(&q->current_entry_lock);
 250
 251         hrtimer_set_expires(&q->advance_timer, close_time);
 252
 253         rcu_read_lock();
 254         __netif_schedule(sch);
 255         rcu_read_unlock();
 256
 257         return HRTIMER_RESTART;
 258 }
 259
 260 static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = {
 261         [TCA_TAPRIO_SCHED_ENTRY_INDEX]     = { .type = NLA_U32 },
 262         [TCA_TAPRIO_SCHED_ENTRY_CMD]       = { .type = NLA_U8 },
 263         [TCA_TAPRIO_SCHED_ENTRY_GATE_MASK] = { .type = NLA_U32 },
 264         [TCA_TAPRIO_SCHED_ENTRY_INTERVAL]  = { .type = NLA_U32 },
 265 };
 266
 267 static const struct nla_policy entry_list_policy[TCA_TAPRIO_SCHED_MAX + 1] = {
 268         [TCA_TAPRIO_SCHED_ENTRY] = { .type = NLA_NESTED },
 269 };
 270
 271 static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
 272         [TCA_TAPRIO_ATTR_PRIOMAP]              = {
 273                 .len = sizeof(struct tc_mqprio_qopt)
 274         },
 275         [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]     = { .type = NLA_NESTED },
 276         [TCA_TAPRIO_ATTR_SCHED_BASE_TIME]      = { .type = NLA_S64 },
 277         [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]   = { .type = NLA_NESTED },
 278         [TCA_TAPRIO_ATTR_SCHED_CLOCKID]        = { .type = NLA_S32 },
 279 };
 280
 281 static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry,
 282                             struct netlink_ext_ack *extack)
 283 {
 284         u32 interval = 0;
 285
 286         if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD])
 287                 entry->command = nla_get_u8(
 288                         tb[TCA_TAPRIO_SCHED_ENTRY_CMD]);
 289
 290         if (tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK])
 291                 entry->gate_mask = nla_get_u32(
 292                         tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]);
 293
 294         if (tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL])
 295                 interval = nla_get_u32(
 296                         tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]);
 297
 298         if (interval == 0) {
 299                 NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry");
 300                 return -EINVAL;
 301         }
 302
 303         entry->interval = interval;
 304
 305         return 0;
 306 }
 307
 308 static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry,
 309                              int index, struct netlink_ext_ack *extack)
 310 {
 311         struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { };
 312         int err;
 313
 314         err = nla_parse_nested(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, n,
 315                                entry_policy, NULL);
 316         if (err < 0) {
 317                 NL_SET_ERR_MSG(extack, "Could not parse nested entry");
 318                 return -EINVAL;
 319         }
 320
 321         entry->index = index;
 322
 323         return fill_sched_entry(tb, entry, extack);
 324 }
 325
 326 /* Returns the number of entries in case of success */
 327 static int parse_sched_single_entry(struct nlattr *n,
 328                                     struct taprio_sched *q,
 329                                     struct netlink_ext_ack *extack)
 330 {
 331         struct nlattr *tb_entry[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { };
 332         struct nlattr *tb_list[TCA_TAPRIO_SCHED_MAX + 1] = { };
 333         struct sched_entry *entry;
 334         bool found = false;
 335         u32 index;
 336         int err;
 337
 338         err = nla_parse_nested(tb_list, TCA_TAPRIO_SCHED_MAX,
 339                                n, entry_list_policy, NULL);
 340         if (err < 0) {
 341                 NL_SET_ERR_MSG(extack, "Could not parse nested entry");
 342                 return -EINVAL;
 343         }
 344
 345         if (!tb_list[TCA_TAPRIO_SCHED_ENTRY]) {
 346                 NL_SET_ERR_MSG(extack, "Single-entry must include an entry");
 347                 return -EINVAL;
 348         }
 349
 350         err = nla_parse_nested(tb_entry, TCA_TAPRIO_SCHED_ENTRY_MAX,
 351                                tb_list[TCA_TAPRIO_SCHED_ENTRY],
 352                                entry_policy, NULL);
 353         if (err < 0) {
 354                 NL_SET_ERR_MSG(extack, "Could not parse nested entry");
 355                 return -EINVAL;
 356         }
 357
 358         if (!tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]) {
 359                 NL_SET_ERR_MSG(extack, "Entry must specify an index\n");
 360                 return -EINVAL;
 361         }
 362
 363         index = nla_get_u32(tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]);
 364         if (index >= q->num_entries) {
 365                 NL_SET_ERR_MSG(extack, "Index for single entry exceeds number of entries in schedule");
 366                 return -EINVAL;
 367         }
 368
 369         list_for_each_entry(entry, &q->entries, list) {
 370                 if (entry->index == index) {
 371                         found = true;
 372                         break;
 373                 }
 374         }
 375
 376         if (!found) {
 377                 NL_SET_ERR_MSG(extack, "Could not find entry");
 378                 return -ENOENT;
 379         }
 380
 381         err = fill_sched_entry(tb_entry, entry, extack);
 382         if (err < 0)
 383                 return err;
 384
 385         return q->num_entries;
 386 }
 387
 388 static int parse_sched_list(struct nlattr *list,
 389                             struct taprio_sched *q,
 390                             struct netlink_ext_ack *extack)
 391 {
 392         struct nlattr *n;
 393         int err, rem;
 394         int i = 0;
 395
 396         if (!list)
 397                 return -EINVAL;
 398
 399         nla_for_each_nested(n, list, rem) {
 400                 struct sched_entry *entry;
 401
 402                 if (nla_type(n) != TCA_TAPRIO_SCHED_ENTRY) {
 403                         NL_SET_ERR_MSG(extack, "Attribute is not of type 'entry'");
 404                         continue;
 405                 }
 406
 407                 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 408                 if (!entry) {
 409                         NL_SET_ERR_MSG(extack, "Not enough memory for entry");
 410                         return -ENOMEM;
 411                 }
 412
 413                 err = parse_sched_entry(n, entry, i, extack);
 414                 if (err < 0) {
 415                         kfree(entry);
 416                         return err;
 417                 }
 418
 419                 list_add_tail(&entry->list, &q->entries);
 420                 i++;
 421         }
 422
 423         q->num_entries = i;
 424
 425         return i;
 426 }
 427
 428 /* Returns the number of entries in case of success */
 429 static int parse_taprio_opt(struct nlattr **tb, struct taprio_sched *q,
 430                             struct netlink_ext_ack *extack)
 431 {
 432         int err = 0;
 433         int clockid;
 434
 435         if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] &&
 436             tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY])
 437                 return -EINVAL;
 438
 439         if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] && q->num_entries == 0)
 440                 return -EINVAL;
 441
 442         if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID])
 443                 return -EINVAL;
 444
 445         if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME])
 446                 q->base_time = nla_get_s64(
 447                         tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]);
 448
 449         if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
 450                 clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]);
 451
 452                 /* We only support static clockids and we don't allow
 453                  * for it to be modified after the first init.
 454                  */
 455                 if (clockid < 0 || (q->clockid != -1 && q->clockid != clockid))
 456                         return -EINVAL;
 457
 458                 q->clockid = clockid;
 459         }
 460
 461         if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST])
 462                 err = parse_sched_list(
 463                         tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], q, extack);
 464         else if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY])
 465                 err = parse_sched_single_entry(
 466                         tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY], q, extack);
 467
 468         /* parse_sched_* return the number of entries in the schedule,
 469          * a schedule with zero entries is an error.
 470          */
 471         if (err == 0) {
 472                 NL_SET_ERR_MSG(extack, "The schedule should contain at least one entry");
 473                 return -EINVAL;
 474         }
 475
 476         return err;
 477 }
 478
 479 static int taprio_parse_mqprio_opt(struct net_device *dev,
 480                                    struct tc_mqprio_qopt *qopt,
 481                                    struct netlink_ext_ack *extack)
 482 {
 483         int i, j;
 484
 485         if (!qopt) {
 486                 NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
 487                 return -EINVAL;
 488         }
 489
 490         /* Verify num_tc is not out of max range */
 491         if (qopt->num_tc > TC_MAX_QUEUE) {
 492                 NL_SET_ERR_MSG(extack, "Number of traffic classes is outside valid range");
 493                 return -EINVAL;
 494         }
 495
 496         /* taprio imposes that traffic classes map 1:n to tx queues */
 497         if (qopt->num_tc > dev->num_tx_queues) {
 498                 NL_SET_ERR_MSG(extack, "Number of traffic classes is greater than number of HW queues");
 499                 return -EINVAL;
 500         }
 501
 502         /* Verify priority mapping uses valid tcs */
 503         for (i = 0; i < TC_BITMASK + 1; i++) {
 504                 if (qopt->prio_tc_map[i] >= qopt->num_tc) {
 505                         NL_SET_ERR_MSG(extack, "Invalid traffic class in priority to traffic class mapping");
 506                         return -EINVAL;
 507                 }
 508         }
 509
 510         for (i = 0; i < qopt->num_tc; i++) {
 511                 unsigned int last = qopt->offset[i] + qopt->count[i];
 512
 513                 /* Verify the queue count is in tx range being equal to the
 514                  * real_num_tx_queues indicates the last queue is in use.
 515                  */
 516                 if (qopt->offset[i] >= dev->num_tx_queues ||
 517                     !qopt->count[i] ||
 518                     last > dev->real_num_tx_queues) {
 519                         NL_SET_ERR_MSG(extack, "Invalid queue in traffic class to queue mapping");
 520                         return -EINVAL;
 521                 }
 522
 523                 /* Verify that the offset and counts do not overlap */
 524                 for (j = i + 1; j < qopt->num_tc; j++) {
 525                         if (last > qopt->offset[j]) {
 526                                 NL_SET_ERR_MSG(extack, "Detected overlap in the traffic class to queue mapping");
 527                                 return -EINVAL;
 528                         }
 529                 }
 530         }
 531
 532         return 0;
 533 }
 534
 535 static ktime_t taprio_get_start_time(struct Qdisc *sch)
 536 {
 537         struct taprio_sched *q = qdisc_priv(sch);
 538         struct sched_entry *entry;
 539         ktime_t now, base, cycle;
 540         s64 n;
 541
 542         base = ns_to_ktime(q->base_time);
 543         cycle = 0;
 544
 545         /* Calculate the cycle_time, by summing all the intervals.
 546          */
 547         list_for_each_entry(entry, &q->entries, list)
 548                 cycle = ktime_add_ns(cycle, entry->interval);
 549
 550         if (!cycle)
 551                 return base;
 552
 553         now = q->get_time();
 554
 555         if (ktime_after(base, now))
 556                 return base;
 557
 558         /* Schedule the start time for the beginning of the next
 559          * cycle.
 560          */
 561         n = div64_s64(ktime_sub_ns(now, base), cycle);
 562
 563         return ktime_add_ns(base, (n + 1) * cycle);
 564 }
 565
 566 static void taprio_start_sched(struct Qdisc *sch, ktime_t start)
 567 {
 568         struct taprio_sched *q = qdisc_priv(sch);
 569         struct sched_entry *first;
 570         unsigned long flags;
 571
 572         spin_lock_irqsave(&q->current_entry_lock, flags);
 573
 574         first = list_first_entry(&q->entries, struct sched_entry,
 575                                  list);
 576
 577         first->close_time = ktime_add_ns(start, first->interval);
 578         atomic_set(&first->budget,
 579                    (first->interval * 1000) /
 580                    atomic64_read(&q->picos_per_byte));
 581         rcu_assign_pointer(q->current_entry, NULL);
 582
 583         spin_unlock_irqrestore(&q->current_entry_lock, flags);
 584
 585         hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS);
 586 }
 587
 588 static void taprio_set_picos_per_byte(struct net_device *dev,
 589                                       struct taprio_sched *q)
 590 {
 591         struct ethtool_link_ksettings ecmd;
 592         int picos_per_byte = -1;
 593
 594         if (!__ethtool_get_link_ksettings(dev, &ecmd) &&
 595             ecmd.base.speed != SPEED_UNKNOWN)
 596                 picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8,
 597                                            ecmd.base.speed * 1000 * 1000);
 598
 599         atomic64_set(&q->picos_per_byte, picos_per_byte);
 600         netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n",
 601                    dev->name, (long long)atomic64_read(&q->picos_per_byte),
 602                    ecmd.base.speed);
 603 }
 604
 605 static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event,
 606                                void *ptr)
 607 {
 608         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 609         struct net_device *qdev;
 610         struct taprio_sched *q;
 611         bool found = false;
 612
 613         ASSERT_RTNL();
 614
 615         if (event != NETDEV_UP && event != NETDEV_CHANGE)
 616                 return NOTIFY_DONE;
 617
 618         spin_lock(&taprio_list_lock);
 619         list_for_each_entry(q, &taprio_list, taprio_list) {
 620                 qdev = qdisc_dev(q->root);
 621                 if (qdev == dev) {
 622                         found = true;
 623                         break;
 624                 }
 625         }
 626         spin_unlock(&taprio_list_lock);
 627
 628         if (found)
 629                 taprio_set_picos_per_byte(dev, q);
 630
 631         return NOTIFY_DONE;
 632 }
 633
 634 static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
 635                          struct netlink_ext_ack *extack)
 636 {
 637         struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { };
 638         struct taprio_sched *q = qdisc_priv(sch);
 639         struct net_device *dev = qdisc_dev(sch);
 640         struct tc_mqprio_qopt *mqprio = NULL;
 641         int i, err, size;
 642         ktime_t start;
 643
 644         err = nla_parse_nested(tb, TCA_TAPRIO_ATTR_MAX, opt,
 645                                taprio_policy, extack);
 646         if (err < 0)
 647                 return err;
 648
 649         err = -EINVAL;
 650         if (tb[TCA_TAPRIO_ATTR_PRIOMAP])
 651                 mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]);
 652
 653         err = taprio_parse_mqprio_opt(dev, mqprio, extack);
 654         if (err < 0)
 655                 return err;
 656
 657         /* A schedule with less than one entry is an error */
 658         size = parse_taprio_opt(tb, q, extack);
 659         if (size < 0)
 660                 return size;
 661
 662         hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS);
 663         q->advance_timer.function = advance_sched;
 664
 665         switch (q->clockid) {
 666         case CLOCK_REALTIME:
 667                 q->get_time = ktime_get_real;
 668                 break;
 669         case CLOCK_MONOTONIC:
 670                 q->get_time = ktime_get;
 671                 break;
 672         case CLOCK_BOOTTIME:
 673                 q->get_time = ktime_get_boottime;
 674                 break;
 675         case CLOCK_TAI:
 676                 q->get_time = ktime_get_clocktai;
 677                 break;
 678         default:
 679                 return -ENOTSUPP;
 680         }
 681
 682         for (i = 0; i < dev->num_tx_queues; i++) {
 683                 struct netdev_queue *dev_queue;
 684                 struct Qdisc *qdisc;
 685
 686                 dev_queue = netdev_get_tx_queue(dev, i);
 687                 qdisc = qdisc_create_dflt(dev_queue,
 688                                           &pfifo_qdisc_ops,
 689                                           TC_H_MAKE(TC_H_MAJ(sch->handle),
 690                                                     TC_H_MIN(i + 1)),
 691                                           extack);
 692                 if (!qdisc)
 693                         return -ENOMEM;
 694
 695                 if (i < dev->real_num_tx_queues)
 696                         qdisc_hash_add(qdisc, false);
 697
 698                 q->qdiscs[i] = qdisc;
 699         }
 700
 701         if (mqprio) {
 702                 netdev_set_num_tc(dev, mqprio->num_tc);
 703                 for (i = 0; i < mqprio->num_tc; i++)
 704                         netdev_set_tc_queue(dev, i,
 705                                             mqprio->count[i],
 706                                             mqprio->offset[i]);
 707
 708                 /* Always use supplied priority mappings */
 709                 for (i = 0; i < TC_BITMASK + 1; i++)
 710                         netdev_set_prio_tc_map(dev, i,
 711                                                mqprio->prio_tc_map[i]);
 712         }
 713
 714         taprio_set_picos_per_byte(dev, q);
 715         start = taprio_get_start_time(sch);
 716         if (!start)
 717                 return 0;
 718
 719         taprio_start_sched(sch, start);
 720
 721         return 0;
 722 }
 723
 724 static void taprio_destroy(struct Qdisc *sch)
 725 {
 726         struct taprio_sched *q = qdisc_priv(sch);
 727         struct net_device *dev = qdisc_dev(sch);
 728         struct sched_entry *entry, *n;
 729         unsigned int i;
 730
 731         spin_lock(&taprio_list_lock);
 732         list_del(&q->taprio_list);
 733         spin_unlock(&taprio_list_lock);
 734
 735         hrtimer_cancel(&q->advance_timer);
 736
 737         if (q->qdiscs) {
 738                 for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++)
 739                         qdisc_put(q->qdiscs[i]);
 740
 741                 kfree(q->qdiscs);
 742         }
 743         q->qdiscs = NULL;
 744
 745         netdev_set_num_tc(dev, 0);
 746
 747         list_for_each_entry_safe(entry, n, &q->entries, list) {
 748                 list_del(&entry->list);
 749                 kfree(entry);
 750         }
 751 }
 752
 753 static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
 754                        struct netlink_ext_ack *extack)
 755 {
 756         struct taprio_sched *q = qdisc_priv(sch);
 757         struct net_device *dev = qdisc_dev(sch);
 758
 759         INIT_LIST_HEAD(&q->entries);
 760         spin_lock_init(&q->current_entry_lock);
 761
 762         /* We may overwrite the configuration later */
 763         hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS);
 764
 765         q->root = sch;
 766
 767         /* We only support static clockids. Use an invalid value as default
 768          * and get the valid one on taprio_change().
 769          */
 770         q->clockid = -1;
 771
 772         if (sch->parent != TC_H_ROOT)
 773                 return -EOPNOTSUPP;
 774
 775         if (!netif_is_multiqueue(dev))
 776                 return -EOPNOTSUPP;
 777
 778         /* pre-allocate qdisc, attachment can't fail */
 779         q->qdiscs = kcalloc(dev->num_tx_queues,
 780                             sizeof(q->qdiscs[0]),
 781                             GFP_KERNEL);
 782
 783         if (!q->qdiscs)
 784                 return -ENOMEM;
 785
 786         if (!opt)
 787                 return -EINVAL;
 788
 789         spin_lock(&taprio_list_lock);
 790         list_add(&q->taprio_list, &taprio_list);
 791         spin_unlock(&taprio_list_lock);
 792
 793         return taprio_change(sch, opt, extack);
 794 }
 795
 796 static struct netdev_queue *taprio_queue_get(struct Qdisc *sch,
 797                                              unsigned long cl)
 798 {
 799         struct net_device *dev = qdisc_dev(sch);
 800         unsigned long ntx = cl - 1;
 801
 802         if (ntx >= dev->num_tx_queues)
 803                 return NULL;
 804
 805         return netdev_get_tx_queue(dev, ntx);
 806 }
 807
 808 static int taprio_graft(struct Qdisc *sch, unsigned long cl,
 809                         struct Qdisc *new, struct Qdisc **old,
 810                         struct netlink_ext_ack *extack)
 811 {
 812         struct taprio_sched *q = qdisc_priv(sch);
 813         struct net_device *dev = qdisc_dev(sch);
 814         struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
 815
 816         if (!dev_queue)
 817                 return -EINVAL;
 818
 819         if (dev->flags & IFF_UP)
 820                 dev_deactivate(dev);
 821
 822         *old = q->qdiscs[cl - 1];
 823         q->qdiscs[cl - 1] = new;
 824
 825         if (new)
 826                 new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
 827
 828         if (dev->flags & IFF_UP)
 829                 dev_activate(dev);
 830
 831         return 0;
 832 }
 833
 834 static int dump_entry(struct sk_buff *msg,
 835                       const struct sched_entry *entry)
 836 {
 837         struct nlattr *item;
 838
 839         item = nla_nest_start(msg, TCA_TAPRIO_SCHED_ENTRY);
 840         if (!item)
 841                 return -ENOSPC;
 842
 843         if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INDEX, entry->index))
 844                 goto nla_put_failure;
 845
 846         if (nla_put_u8(msg, TCA_TAPRIO_SCHED_ENTRY_CMD, entry->command))
 847                 goto nla_put_failure;
 848
 849         if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_GATE_MASK,
 850                         entry->gate_mask))
 851                 goto nla_put_failure;
 852
 853         if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INTERVAL,
 854                         entry->interval))
 855                 goto nla_put_failure;
 856
 857         return nla_nest_end(msg, item);
 858
 859 nla_put_failure:
 860         nla_nest_cancel(msg, item);
 861         return -1;
 862 }
 863
 864 static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
 865 {
 866         struct taprio_sched *q = qdisc_priv(sch);
 867         struct net_device *dev = qdisc_dev(sch);
 868         struct tc_mqprio_qopt opt = { 0 };
 869         struct nlattr *nest, *entry_list;
 870         struct sched_entry *entry;
 871         unsigned int i;
 872
 873         opt.num_tc = netdev_get_num_tc(dev);
 874         memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
 875
 876         for (i = 0; i < netdev_get_num_tc(dev); i++) {
 877                 opt.count[i] = dev->tc_to_txq[i].count;
 878                 opt.offset[i] = dev->tc_to_txq[i].offset;
 879         }
 880
 881         nest = nla_nest_start(skb, TCA_OPTIONS);
 882         if (!nest)
 883                 return -ENOSPC;
 884
 885         if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt))
 886                 goto options_error;
 887
 888         if (nla_put_s64(skb, TCA_TAPRIO_ATTR_SCHED_BASE_TIME,
 889                         q->base_time, TCA_TAPRIO_PAD))
 890                 goto options_error;
 891
 892         if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid))
 893                 goto options_error;
 894
 895         entry_list = nla_nest_start(skb, TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST);
 896         if (!entry_list)
 897                 goto options_error;
 898
 899         list_for_each_entry(entry, &q->entries, list) {
 900                 if (dump_entry(skb, entry) < 0)
 901                         goto options_error;
 902         }
 903
 904         nla_nest_end(skb, entry_list);
 905
 906         return nla_nest_end(skb, nest);
 907
 908 options_error:
 909         nla_nest_cancel(skb, nest);
 910         return -1;
 911 }
 912
 913 static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl)
 914 {
 915         struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
 916
 917         if (!dev_queue)
 918                 return NULL;
 919
 920         return dev_queue->qdisc_sleeping;
 921 }
 922
 923 static unsigned long taprio_find(struct Qdisc *sch, u32 classid)
 924 {
 925         unsigned int ntx = TC_H_MIN(classid);
 926
 927         if (!taprio_queue_get(sch, ntx))
 928                 return 0;
 929         return ntx;
 930 }
 931
 932 static int taprio_dump_class(struct Qdisc *sch, unsigned long cl,
 933                              struct sk_buff *skb, struct tcmsg *tcm)
 934 {
 935         struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
 936
 937         tcm->tcm_parent = TC_H_ROOT;
 938         tcm->tcm_handle |= TC_H_MIN(cl);
 939         tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
 940
 941         return 0;
 942 }
 943
 944 static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 945                                    struct gnet_dump *d)
 946         __releases(d->lock)
 947         __acquires(d->lock)
 948 {
 949         struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
 950
 951         sch = dev_queue->qdisc_sleeping;
 952         if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
 953             qdisc_qstats_copy(d, sch) < 0)
 954                 return -1;
 955         return 0;
 956 }
 957
 958 static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 959 {
 960         struct net_device *dev = qdisc_dev(sch);
 961         unsigned long ntx;
 962
 963         if (arg->stop)
 964                 return;
 965
 966         arg->count = arg->skip;
 967         for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) {
 968                 if (arg->fn(sch, ntx + 1, arg) < 0) {
 969                         arg->stop = 1;
 970                         break;
 971                 }
 972                 arg->count++;
 973         }
 974 }
 975
 976 static struct netdev_queue *taprio_select_queue(struct Qdisc *sch,
 977                                                 struct tcmsg *tcm)
 978 {
 979         return taprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
 980 }
 981
 982 static const struct Qdisc_class_ops taprio_class_ops = {
 983         .graft          = taprio_graft,
 984         .leaf           = taprio_leaf,
 985         .find           = taprio_find,
 986         .walk           = taprio_walk,
 987         .dump           = taprio_dump_class,
 988         .dump_stats     = taprio_dump_class_stats,
 989         .select_queue   = taprio_select_queue,
 990 };
 991
 992 static struct Qdisc_ops taprio_qdisc_ops __read_mostly = {
 993         .cl_ops         = &taprio_class_ops,
 994         .id             = "taprio",
 995         .priv_size      = sizeof(struct taprio_sched),
 996         .init           = taprio_init,
 997         .destroy        = taprio_destroy,
 998         .peek           = taprio_peek,
 999         .dequeue        = taprio_dequeue,
1000         .enqueue        = taprio_enqueue,
1001         .dump           = taprio_dump,
1002         .owner          = THIS_MODULE,
1003 };
1004
1005 static struct notifier_block taprio_device_notifier = {
1006         .notifier_call = taprio_dev_notifier,
1007 };
1008
1009 static int __init taprio_module_init(void)
1010 {
1011         int err = register_netdevice_notifier(&taprio_device_notifier);
1012
1013         if (err)
1014                 return err;
1015
1016         return register_qdisc(&taprio_qdisc_ops);
1017 }
1018
1019 static void __exit taprio_module_exit(void)
1020 {
1021         unregister_qdisc(&taprio_qdisc_ops);
1022         unregister_netdevice_notifier(&taprio_device_notifier);
1023 }
1024
1025 module_init(taprio_module_init);
1026 module_exit(taprio_module_exit);
1027 MODULE_LICENSE("GPL");