2 * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3 * Copyright (c) 2016-2017 Mellanox Technologies. All rights reserved.
4 * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5 * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6 * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7 * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the names of the copyright holders nor the names of its
18 * contributors may be used to endorse or promote products derived from
19 * this software without specific prior written permission.
21 * Alternatively, this software may be distributed under the terms of the
22 * GNU General Public License ("GPL") version 2 as published by the Free
23 * Software Foundation.
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
38 #include <linux/kernel.h>
39 #include <linux/types.h>
40 #include <linux/rhashtable.h>
41 #include <linux/bitops.h>
42 #include <linux/in6.h>
43 #include <linux/notifier.h>
44 #include <linux/inetdevice.h>
45 #include <linux/netdevice.h>
46 #include <linux/if_bridge.h>
47 #include <linux/socket.h>
48 #include <linux/route.h>
49 #include <linux/gcd.h>
50 #include <linux/random.h>
51 #include <net/netevent.h>
52 #include <net/neighbour.h>
54 #include <net/ip_fib.h>
55 #include <net/ip6_fib.h>
56 #include <net/fib_rules.h>
57 #include <net/ip_tunnels.h>
58 #include <net/l3mdev.h>
59 #include <net/addrconf.h>
60 #include <net/ndisc.h>
62 #include <net/fib_notifier.h>
67 #include "spectrum_cnt.h"
68 #include "spectrum_dpipe.h"
69 #include "spectrum_ipip.h"
70 #include "spectrum_mr.h"
71 #include "spectrum_mr_tcam.h"
72 #include "spectrum_router.h"
75 struct mlxsw_sp_lpm_tree;
76 struct mlxsw_sp_rif_ops;
78 struct mlxsw_sp_router {
79 struct mlxsw_sp *mlxsw_sp;
80 struct mlxsw_sp_rif **rifs;
81 struct mlxsw_sp_vr *vrs;
82 struct rhashtable neigh_ht;
83 struct rhashtable nexthop_group_ht;
84 struct rhashtable nexthop_ht;
85 struct list_head nexthop_list;
87 struct mlxsw_sp_lpm_tree *trees;
88 unsigned int tree_count;
91 struct delayed_work dw;
92 unsigned long interval; /* ms */
94 struct delayed_work nexthop_probe_dw;
95 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
96 struct list_head nexthop_neighs_list;
97 struct list_head ipip_list;
99 struct notifier_block fib_nb;
100 struct notifier_block netevent_nb;
101 const struct mlxsw_sp_rif_ops **rif_ops_arr;
102 const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
105 struct mlxsw_sp_rif {
106 struct list_head nexthop_list;
107 struct list_head neigh_list;
108 struct net_device *dev;
109 struct mlxsw_sp_fid *fid;
110 unsigned char addr[ETH_ALEN];
114 const struct mlxsw_sp_rif_ops *ops;
115 struct mlxsw_sp *mlxsw_sp;
117 unsigned int counter_ingress;
118 bool counter_ingress_valid;
119 unsigned int counter_egress;
120 bool counter_egress_valid;
123 struct mlxsw_sp_rif_params {
124 struct net_device *dev;
133 struct mlxsw_sp_rif_subport {
134 struct mlxsw_sp_rif common;
143 struct mlxsw_sp_rif_ipip_lb {
144 struct mlxsw_sp_rif common;
145 struct mlxsw_sp_rif_ipip_lb_config lb_config;
146 u16 ul_vr_id; /* Reserved for Spectrum-2. */
149 struct mlxsw_sp_rif_params_ipip_lb {
150 struct mlxsw_sp_rif_params common;
151 struct mlxsw_sp_rif_ipip_lb_config lb_config;
154 struct mlxsw_sp_rif_ops {
155 enum mlxsw_sp_rif_type type;
158 void (*setup)(struct mlxsw_sp_rif *rif,
159 const struct mlxsw_sp_rif_params *params);
160 int (*configure)(struct mlxsw_sp_rif *rif);
161 void (*deconfigure)(struct mlxsw_sp_rif *rif);
162 struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif);
165 static unsigned int *
166 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
167 enum mlxsw_sp_rif_counter_dir dir)
170 case MLXSW_SP_RIF_COUNTER_EGRESS:
171 return &rif->counter_egress;
172 case MLXSW_SP_RIF_COUNTER_INGRESS:
173 return &rif->counter_ingress;
179 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
180 enum mlxsw_sp_rif_counter_dir dir)
183 case MLXSW_SP_RIF_COUNTER_EGRESS:
184 return rif->counter_egress_valid;
185 case MLXSW_SP_RIF_COUNTER_INGRESS:
186 return rif->counter_ingress_valid;
192 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
193 enum mlxsw_sp_rif_counter_dir dir,
197 case MLXSW_SP_RIF_COUNTER_EGRESS:
198 rif->counter_egress_valid = valid;
200 case MLXSW_SP_RIF_COUNTER_INGRESS:
201 rif->counter_ingress_valid = valid;
206 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
207 unsigned int counter_index, bool enable,
208 enum mlxsw_sp_rif_counter_dir dir)
210 char ritr_pl[MLXSW_REG_RITR_LEN];
211 bool is_egress = false;
214 if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
216 mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
217 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
221 mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
223 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
226 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
227 struct mlxsw_sp_rif *rif,
228 enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
230 char ricnt_pl[MLXSW_REG_RICNT_LEN];
231 unsigned int *p_counter_index;
235 valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
239 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
240 if (!p_counter_index)
242 mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
243 MLXSW_REG_RICNT_OPCODE_NOP);
244 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
247 *cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
251 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
252 unsigned int counter_index)
254 char ricnt_pl[MLXSW_REG_RICNT_LEN];
256 mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
257 MLXSW_REG_RICNT_OPCODE_CLEAR);
258 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
261 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
262 struct mlxsw_sp_rif *rif,
263 enum mlxsw_sp_rif_counter_dir dir)
265 unsigned int *p_counter_index;
268 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
269 if (!p_counter_index)
271 err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
276 err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
278 goto err_counter_clear;
280 err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
281 *p_counter_index, true, dir);
283 goto err_counter_edit;
284 mlxsw_sp_rif_counter_valid_set(rif, dir, true);
289 mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
294 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
295 struct mlxsw_sp_rif *rif,
296 enum mlxsw_sp_rif_counter_dir dir)
298 unsigned int *p_counter_index;
300 if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
303 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
304 if (WARN_ON(!p_counter_index))
306 mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
307 *p_counter_index, false, dir);
308 mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
310 mlxsw_sp_rif_counter_valid_set(rif, dir, false);
313 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
315 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
316 struct devlink *devlink;
318 devlink = priv_to_devlink(mlxsw_sp->core);
319 if (!devlink_dpipe_table_counter_enabled(devlink,
320 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
322 mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
325 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
327 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
329 mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
332 static struct mlxsw_sp_rif *
333 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
334 const struct net_device *dev);
336 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
338 struct mlxsw_sp_prefix_usage {
339 DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
342 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
343 for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
346 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
347 struct mlxsw_sp_prefix_usage *prefix_usage2)
349 return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
353 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
355 struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
357 return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none);
361 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
362 struct mlxsw_sp_prefix_usage *prefix_usage2)
364 memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
368 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
369 unsigned char prefix_len)
371 set_bit(prefix_len, prefix_usage->b);
375 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
376 unsigned char prefix_len)
378 clear_bit(prefix_len, prefix_usage->b);
381 struct mlxsw_sp_fib_key {
382 unsigned char addr[sizeof(struct in6_addr)];
383 unsigned char prefix_len;
386 enum mlxsw_sp_fib_entry_type {
387 MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
388 MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
389 MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
391 /* This is a special case of local delivery, where a packet should be
392 * decapsulated on reception. Note that there is no corresponding ENCAP,
393 * because that's a type of next hop, not of FIB entry. (There can be
394 * several next hops in a REMOTE entry, and some of them may be
395 * encapsulating entries.)
397 MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
400 struct mlxsw_sp_nexthop_group;
403 struct mlxsw_sp_fib_node {
404 struct list_head entry_list;
405 struct list_head list;
406 struct rhash_head ht_node;
407 struct mlxsw_sp_fib *fib;
408 struct mlxsw_sp_fib_key key;
411 struct mlxsw_sp_fib_entry_decap {
412 struct mlxsw_sp_ipip_entry *ipip_entry;
416 struct mlxsw_sp_fib_entry {
417 struct list_head list;
418 struct mlxsw_sp_fib_node *fib_node;
419 enum mlxsw_sp_fib_entry_type type;
420 struct list_head nexthop_group_node;
421 struct mlxsw_sp_nexthop_group *nh_group;
422 struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
425 struct mlxsw_sp_fib4_entry {
426 struct mlxsw_sp_fib_entry common;
433 struct mlxsw_sp_fib6_entry {
434 struct mlxsw_sp_fib_entry common;
435 struct list_head rt6_list;
439 struct mlxsw_sp_rt6 {
440 struct list_head list;
444 struct mlxsw_sp_lpm_tree {
446 unsigned int ref_count;
447 enum mlxsw_sp_l3proto proto;
448 struct mlxsw_sp_prefix_usage prefix_usage;
451 struct mlxsw_sp_fib {
452 struct rhashtable ht;
453 struct list_head node_list;
454 struct mlxsw_sp_vr *vr;
455 struct mlxsw_sp_lpm_tree *lpm_tree;
456 unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
457 struct mlxsw_sp_prefix_usage prefix_usage;
458 enum mlxsw_sp_l3proto proto;
462 u16 id; /* virtual router ID */
463 u32 tb_id; /* kernel fib table id */
464 unsigned int rif_count;
465 struct mlxsw_sp_fib *fib4;
466 struct mlxsw_sp_fib *fib6;
467 struct mlxsw_sp_mr_table *mr4_table;
470 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
472 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp_vr *vr,
473 enum mlxsw_sp_l3proto proto)
475 struct mlxsw_sp_fib *fib;
478 fib = kzalloc(sizeof(*fib), GFP_KERNEL);
480 return ERR_PTR(-ENOMEM);
481 err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
483 goto err_rhashtable_init;
484 INIT_LIST_HEAD(&fib->node_list);
494 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
496 WARN_ON(!list_empty(&fib->node_list));
497 WARN_ON(fib->lpm_tree);
498 rhashtable_destroy(&fib->ht);
502 static struct mlxsw_sp_lpm_tree *
503 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
505 static struct mlxsw_sp_lpm_tree *lpm_tree;
508 for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
509 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
510 if (lpm_tree->ref_count == 0)
516 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
517 struct mlxsw_sp_lpm_tree *lpm_tree)
519 char ralta_pl[MLXSW_REG_RALTA_LEN];
521 mlxsw_reg_ralta_pack(ralta_pl, true,
522 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
524 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
527 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
528 struct mlxsw_sp_lpm_tree *lpm_tree)
530 char ralta_pl[MLXSW_REG_RALTA_LEN];
532 mlxsw_reg_ralta_pack(ralta_pl, false,
533 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
535 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
539 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
540 struct mlxsw_sp_prefix_usage *prefix_usage,
541 struct mlxsw_sp_lpm_tree *lpm_tree)
543 char ralst_pl[MLXSW_REG_RALST_LEN];
546 u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
548 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
551 mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
552 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
555 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
556 MLXSW_REG_RALST_BIN_NO_CHILD);
557 last_prefix = prefix;
559 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
562 static struct mlxsw_sp_lpm_tree *
563 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
564 struct mlxsw_sp_prefix_usage *prefix_usage,
565 enum mlxsw_sp_l3proto proto)
567 struct mlxsw_sp_lpm_tree *lpm_tree;
570 lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
572 return ERR_PTR(-EBUSY);
573 lpm_tree->proto = proto;
574 err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
578 err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
581 goto err_left_struct_set;
582 memcpy(&lpm_tree->prefix_usage, prefix_usage,
583 sizeof(lpm_tree->prefix_usage));
587 mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
591 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
592 struct mlxsw_sp_lpm_tree *lpm_tree)
594 mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
597 static struct mlxsw_sp_lpm_tree *
598 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
599 struct mlxsw_sp_prefix_usage *prefix_usage,
600 enum mlxsw_sp_l3proto proto)
602 struct mlxsw_sp_lpm_tree *lpm_tree;
605 for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
606 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
607 if (lpm_tree->ref_count != 0 &&
608 lpm_tree->proto == proto &&
609 mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
613 return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
616 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
618 lpm_tree->ref_count++;
621 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
622 struct mlxsw_sp_lpm_tree *lpm_tree)
624 if (--lpm_tree->ref_count == 0)
625 mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
628 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
630 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
632 struct mlxsw_sp_lpm_tree *lpm_tree;
636 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
639 max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
640 mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
641 mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
642 sizeof(struct mlxsw_sp_lpm_tree),
644 if (!mlxsw_sp->router->lpm.trees)
647 for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
648 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
649 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
655 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
657 kfree(mlxsw_sp->router->lpm.trees);
660 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
662 return !!vr->fib4 || !!vr->fib6 || !!vr->mr4_table;
665 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
667 struct mlxsw_sp_vr *vr;
670 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
671 vr = &mlxsw_sp->router->vrs[i];
672 if (!mlxsw_sp_vr_is_used(vr))
678 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
679 const struct mlxsw_sp_fib *fib, u8 tree_id)
681 char raltb_pl[MLXSW_REG_RALTB_LEN];
683 mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
684 (enum mlxsw_reg_ralxx_protocol) fib->proto,
686 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
689 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
690 const struct mlxsw_sp_fib *fib)
692 char raltb_pl[MLXSW_REG_RALTB_LEN];
694 /* Bind to tree 0 which is default */
695 mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
696 (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
697 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
700 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
702 /* For our purpose, squash main, default and local tables into one */
703 if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
704 tb_id = RT_TABLE_MAIN;
708 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
711 struct mlxsw_sp_vr *vr;
714 tb_id = mlxsw_sp_fix_tb_id(tb_id);
716 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
717 vr = &mlxsw_sp->router->vrs[i];
718 if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
724 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
725 enum mlxsw_sp_l3proto proto)
728 case MLXSW_SP_L3_PROTO_IPV4:
730 case MLXSW_SP_L3_PROTO_IPV6:
736 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
738 struct netlink_ext_ack *extack)
740 struct mlxsw_sp_vr *vr;
743 vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
745 NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported virtual routers");
746 return ERR_PTR(-EBUSY);
748 vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4);
749 if (IS_ERR(vr->fib4))
750 return ERR_CAST(vr->fib4);
751 vr->fib6 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV6);
752 if (IS_ERR(vr->fib6)) {
753 err = PTR_ERR(vr->fib6);
754 goto err_fib6_create;
756 vr->mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
757 MLXSW_SP_L3_PROTO_IPV4);
758 if (IS_ERR(vr->mr4_table)) {
759 err = PTR_ERR(vr->mr4_table);
760 goto err_mr_table_create;
766 mlxsw_sp_fib_destroy(vr->fib6);
769 mlxsw_sp_fib_destroy(vr->fib4);
774 static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr)
776 mlxsw_sp_mr_table_destroy(vr->mr4_table);
777 vr->mr4_table = NULL;
778 mlxsw_sp_fib_destroy(vr->fib6);
780 mlxsw_sp_fib_destroy(vr->fib4);
784 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
785 struct netlink_ext_ack *extack)
787 struct mlxsw_sp_vr *vr;
789 tb_id = mlxsw_sp_fix_tb_id(tb_id);
790 vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
792 vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
796 static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr)
798 if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
799 list_empty(&vr->fib6->node_list) &&
800 mlxsw_sp_mr_table_empty(vr->mr4_table))
801 mlxsw_sp_vr_destroy(vr);
805 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
806 enum mlxsw_sp_l3proto proto, u8 tree_id)
808 struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
810 if (!mlxsw_sp_vr_is_used(vr))
812 if (fib->lpm_tree && fib->lpm_tree->id == tree_id)
817 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
818 struct mlxsw_sp_fib *fib,
819 struct mlxsw_sp_lpm_tree *new_tree)
821 struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
824 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
827 fib->lpm_tree = new_tree;
828 mlxsw_sp_lpm_tree_hold(new_tree);
829 mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
833 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
834 struct mlxsw_sp_fib *fib,
835 struct mlxsw_sp_lpm_tree *new_tree)
837 struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
838 enum mlxsw_sp_l3proto proto = fib->proto;
839 u8 old_id, new_id = new_tree->id;
840 struct mlxsw_sp_vr *vr;
845 old_id = old_tree->id;
847 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
848 vr = &mlxsw_sp->router->vrs[i];
849 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
851 err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
852 mlxsw_sp_vr_fib(vr, proto),
855 goto err_tree_replace;
861 for (i--; i >= 0; i--) {
862 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
864 mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
865 mlxsw_sp_vr_fib(vr, proto),
871 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
874 fib->lpm_tree = new_tree;
875 mlxsw_sp_lpm_tree_hold(new_tree);
880 mlxsw_sp_vrs_prefixes(struct mlxsw_sp *mlxsw_sp,
881 enum mlxsw_sp_l3proto proto,
882 struct mlxsw_sp_prefix_usage *req_prefix_usage)
886 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
887 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
888 struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
889 unsigned char prefix;
891 if (!mlxsw_sp_vr_is_used(vr))
893 mlxsw_sp_prefix_usage_for_each(prefix, &fib->prefix_usage)
894 mlxsw_sp_prefix_usage_set(req_prefix_usage, prefix);
898 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
900 struct mlxsw_sp_vr *vr;
904 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
907 max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
908 mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
910 if (!mlxsw_sp->router->vrs)
913 for (i = 0; i < max_vrs; i++) {
914 vr = &mlxsw_sp->router->vrs[i];
921 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
923 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
925 /* At this stage we're guaranteed not to have new incoming
926 * FIB notifications and the work queue is free from FIBs
927 * sitting on top of mlxsw netdevs. However, we can still
928 * have other FIBs queued. Flush the queue before flushing
929 * the device's tables. No need for locks, as we're the only
932 mlxsw_core_flush_owq();
933 mlxsw_sp_router_fib_flush(mlxsw_sp);
934 kfree(mlxsw_sp->router->vrs);
937 static struct net_device *
938 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
940 struct ip_tunnel *tun = netdev_priv(ol_dev);
941 struct net *net = dev_net(ol_dev);
943 return __dev_get_by_index(net, tun->parms.link);
946 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
948 struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
951 return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
953 return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
956 static struct mlxsw_sp_rif *
957 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
958 const struct mlxsw_sp_rif_params *params,
959 struct netlink_ext_ack *extack);
961 static struct mlxsw_sp_rif_ipip_lb *
962 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
963 enum mlxsw_sp_ipip_type ipipt,
964 struct net_device *ol_dev,
965 struct netlink_ext_ack *extack)
967 struct mlxsw_sp_rif_params_ipip_lb lb_params;
968 const struct mlxsw_sp_ipip_ops *ipip_ops;
969 struct mlxsw_sp_rif *rif;
971 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
972 lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
973 .common.dev = ol_dev,
975 .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
978 rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
980 return ERR_CAST(rif);
981 return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
984 static struct mlxsw_sp_ipip_entry *
985 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
986 enum mlxsw_sp_ipip_type ipipt,
987 struct net_device *ol_dev)
989 struct mlxsw_sp_ipip_entry *ipip_entry;
990 struct mlxsw_sp_ipip_entry *ret = NULL;
992 ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
994 return ERR_PTR(-ENOMEM);
996 ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
998 if (IS_ERR(ipip_entry->ol_lb)) {
999 ret = ERR_CAST(ipip_entry->ol_lb);
1000 goto err_ol_ipip_lb_create;
1003 ipip_entry->ipipt = ipipt;
1004 ipip_entry->ol_dev = ol_dev;
1005 ipip_entry->parms = mlxsw_sp_ipip_netdev_parms(ol_dev);
1009 err_ol_ipip_lb_create:
1015 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1017 mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1022 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1023 const enum mlxsw_sp_l3proto ul_proto,
1024 union mlxsw_sp_l3addr saddr,
1026 struct mlxsw_sp_ipip_entry *ipip_entry)
1028 u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1029 enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1030 union mlxsw_sp_l3addr tun_saddr;
1032 if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1035 tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1036 return tun_ul_tb_id == ul_tb_id &&
1037 mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1041 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1042 struct mlxsw_sp_fib_entry *fib_entry,
1043 struct mlxsw_sp_ipip_entry *ipip_entry)
1048 err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &tunnel_index);
1052 ipip_entry->decap_fib_entry = fib_entry;
1053 fib_entry->decap.ipip_entry = ipip_entry;
1054 fib_entry->decap.tunnel_index = tunnel_index;
1058 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1059 struct mlxsw_sp_fib_entry *fib_entry)
1061 /* Unlink this node from the IPIP entry that it's the decap entry of. */
1062 fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1063 fib_entry->decap.ipip_entry = NULL;
1064 mlxsw_sp_kvdl_free(mlxsw_sp, fib_entry->decap.tunnel_index);
1067 static struct mlxsw_sp_fib_node *
1068 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1069 size_t addr_len, unsigned char prefix_len);
1070 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1071 struct mlxsw_sp_fib_entry *fib_entry);
1074 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1075 struct mlxsw_sp_ipip_entry *ipip_entry)
1077 struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1079 mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1080 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1082 mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1086 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1087 struct mlxsw_sp_ipip_entry *ipip_entry,
1088 struct mlxsw_sp_fib_entry *decap_fib_entry)
1090 if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1093 decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1095 if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1096 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1099 /* Given an IPIP entry, find the corresponding decap route. */
1100 static struct mlxsw_sp_fib_entry *
1101 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1102 struct mlxsw_sp_ipip_entry *ipip_entry)
1104 static struct mlxsw_sp_fib_node *fib_node;
1105 const struct mlxsw_sp_ipip_ops *ipip_ops;
1106 struct mlxsw_sp_fib_entry *fib_entry;
1107 unsigned char saddr_prefix_len;
1108 union mlxsw_sp_l3addr saddr;
1109 struct mlxsw_sp_fib *ul_fib;
1110 struct mlxsw_sp_vr *ul_vr;
1116 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1118 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1119 ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1123 ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1124 saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1125 ipip_entry->ol_dev);
1127 switch (ipip_ops->ul_proto) {
1128 case MLXSW_SP_L3_PROTO_IPV4:
1129 saddr4 = be32_to_cpu(saddr.addr4);
1132 saddr_prefix_len = 32;
1134 case MLXSW_SP_L3_PROTO_IPV6:
1139 fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1141 if (!fib_node || list_empty(&fib_node->entry_list))
1144 fib_entry = list_first_entry(&fib_node->entry_list,
1145 struct mlxsw_sp_fib_entry, list);
1146 if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1152 static struct mlxsw_sp_ipip_entry *
1153 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1154 enum mlxsw_sp_ipip_type ipipt,
1155 struct net_device *ol_dev)
1157 struct mlxsw_sp_ipip_entry *ipip_entry;
1159 ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1160 if (IS_ERR(ipip_entry))
1163 list_add_tail(&ipip_entry->ipip_list_node,
1164 &mlxsw_sp->router->ipip_list);
1170 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1171 struct mlxsw_sp_ipip_entry *ipip_entry)
1173 list_del(&ipip_entry->ipip_list_node);
1174 mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1178 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1179 const struct net_device *ul_dev,
1180 enum mlxsw_sp_l3proto ul_proto,
1181 union mlxsw_sp_l3addr ul_dip,
1182 struct mlxsw_sp_ipip_entry *ipip_entry)
1184 u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1185 enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1186 struct net_device *ipip_ul_dev;
1188 if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1191 ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1192 return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1193 ul_tb_id, ipip_entry) &&
1194 (!ipip_ul_dev || ipip_ul_dev == ul_dev);
1197 /* Given decap parameters, find the corresponding IPIP entry. */
1198 static struct mlxsw_sp_ipip_entry *
1199 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1200 const struct net_device *ul_dev,
1201 enum mlxsw_sp_l3proto ul_proto,
1202 union mlxsw_sp_l3addr ul_dip)
1204 struct mlxsw_sp_ipip_entry *ipip_entry;
1206 list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1208 if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1216 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1217 const struct net_device *dev,
1218 enum mlxsw_sp_ipip_type *p_type)
1220 struct mlxsw_sp_router *router = mlxsw_sp->router;
1221 const struct mlxsw_sp_ipip_ops *ipip_ops;
1222 enum mlxsw_sp_ipip_type ipipt;
1224 for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1225 ipip_ops = router->ipip_ops_arr[ipipt];
1226 if (dev->type == ipip_ops->dev_type) {
1235 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1236 const struct net_device *dev)
1238 return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1241 static struct mlxsw_sp_ipip_entry *
1242 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1243 const struct net_device *ol_dev)
1245 struct mlxsw_sp_ipip_entry *ipip_entry;
1247 list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1249 if (ipip_entry->ol_dev == ol_dev)
1255 static struct mlxsw_sp_ipip_entry *
1256 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1257 const struct net_device *ul_dev,
1258 struct mlxsw_sp_ipip_entry *start)
1260 struct mlxsw_sp_ipip_entry *ipip_entry;
1262 ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1264 list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1266 struct net_device *ipip_ul_dev =
1267 __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1269 if (ipip_ul_dev == ul_dev)
1276 bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
1277 const struct net_device *dev)
1279 return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1282 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1283 const struct net_device *ol_dev,
1284 enum mlxsw_sp_ipip_type ipipt)
1286 const struct mlxsw_sp_ipip_ops *ops
1287 = mlxsw_sp->router->ipip_ops_arr[ipipt];
1289 /* For deciding whether decap should be offloaded, we don't care about
1290 * overlay protocol, so ask whether either one is supported.
1292 return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1293 ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1296 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1297 struct net_device *ol_dev)
1299 struct mlxsw_sp_ipip_entry *ipip_entry;
1300 enum mlxsw_sp_l3proto ul_proto;
1301 enum mlxsw_sp_ipip_type ipipt;
1302 union mlxsw_sp_l3addr saddr;
1305 mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1306 if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1307 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1308 ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1309 saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1310 if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1313 ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1315 if (IS_ERR(ipip_entry))
1316 return PTR_ERR(ipip_entry);
1323 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1324 struct net_device *ol_dev)
1326 struct mlxsw_sp_ipip_entry *ipip_entry;
1328 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1330 mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1334 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1335 struct mlxsw_sp_ipip_entry *ipip_entry)
1337 struct mlxsw_sp_fib_entry *decap_fib_entry;
1339 decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1340 if (decap_fib_entry)
1341 mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1345 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1346 struct net_device *ol_dev)
1348 struct mlxsw_sp_ipip_entry *ipip_entry;
1350 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1352 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1356 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1357 struct mlxsw_sp_ipip_entry *ipip_entry)
1359 if (ipip_entry->decap_fib_entry)
1360 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1363 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1364 struct net_device *ol_dev)
1366 struct mlxsw_sp_ipip_entry *ipip_entry;
1368 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1370 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1373 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1374 struct mlxsw_sp_rif *old_rif,
1375 struct mlxsw_sp_rif *new_rif);
1377 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1378 struct mlxsw_sp_ipip_entry *ipip_entry,
1380 struct netlink_ext_ack *extack)
1382 struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1383 struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1385 new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1389 if (IS_ERR(new_lb_rif))
1390 return PTR_ERR(new_lb_rif);
1391 ipip_entry->ol_lb = new_lb_rif;
1394 mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1395 &new_lb_rif->common);
1397 mlxsw_sp_rif_destroy(&old_lb_rif->common);
1402 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1403 struct mlxsw_sp_rif *rif);
1406 * Update the offload related to an IPIP entry. This always updates decap, and
1407 * in addition to that it also:
1408 * @recreate_loopback: recreates the associated loopback RIF
1409 * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1410 * relevant when recreate_loopback is true.
1411 * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1412 * is only relevant when recreate_loopback is false.
1414 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1415 struct mlxsw_sp_ipip_entry *ipip_entry,
1416 bool recreate_loopback,
1418 bool update_nexthops,
1419 struct netlink_ext_ack *extack)
1423 /* RIFs can't be edited, so to update loopback, we need to destroy and
1424 * recreate it. That creates a window of opportunity where RALUE and
1425 * RATR registers end up referencing a RIF that's already gone. RATRs
1426 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1427 * of RALUE, demote the decap route back.
1429 if (ipip_entry->decap_fib_entry)
1430 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1432 if (recreate_loopback) {
1433 err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1434 keep_encap, extack);
1437 } else if (update_nexthops) {
1438 mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1439 &ipip_entry->ol_lb->common);
1442 if (ipip_entry->ol_dev->flags & IFF_UP)
1443 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1448 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1449 struct net_device *ol_dev,
1450 struct netlink_ext_ack *extack)
1452 struct mlxsw_sp_ipip_entry *ipip_entry =
1453 mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1454 enum mlxsw_sp_l3proto ul_proto;
1455 union mlxsw_sp_l3addr saddr;
1461 /* For flat configuration cases, moving overlay to a different VRF might
1462 * cause local address conflict, and the conflicting tunnels need to be
1465 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1466 ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1467 saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1468 if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1471 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1475 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1476 true, false, false, extack);
1480 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1481 struct mlxsw_sp_ipip_entry *ipip_entry,
1482 struct net_device *ul_dev,
1483 struct netlink_ext_ack *extack)
1485 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1486 true, true, false, extack);
1490 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1491 struct mlxsw_sp_ipip_entry *ipip_entry,
1492 struct net_device *ul_dev)
1494 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1495 false, false, true, NULL);
1499 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1500 struct mlxsw_sp_ipip_entry *ipip_entry,
1501 struct net_device *ul_dev)
1503 /* A down underlay device causes encapsulated packets to not be
1504 * forwarded, but decap still works. So refresh next hops without
1505 * touching anything else.
1507 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1508 false, false, true, NULL);
1512 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1513 struct net_device *ol_dev,
1514 struct netlink_ext_ack *extack)
1516 const struct mlxsw_sp_ipip_ops *ipip_ops;
1517 struct mlxsw_sp_ipip_entry *ipip_entry;
1520 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1522 /* A change might make a tunnel eligible for offloading, but
1523 * that is currently not implemented. What falls to slow path
1528 /* A change might make a tunnel not eligible for offloading. */
1529 if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1530 ipip_entry->ipipt)) {
1531 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1535 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1536 err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1540 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1541 struct mlxsw_sp_ipip_entry *ipip_entry)
1543 struct net_device *ol_dev = ipip_entry->ol_dev;
1545 if (ol_dev->flags & IFF_UP)
1546 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1547 mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1550 /* The configuration where several tunnels have the same local address in the
1551 * same underlay table needs special treatment in the HW. That is currently not
1552 * implemented in the driver. This function finds and demotes the first tunnel
1553 * with a given source address, except the one passed in in the argument
1557 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1558 enum mlxsw_sp_l3proto ul_proto,
1559 union mlxsw_sp_l3addr saddr,
1561 const struct mlxsw_sp_ipip_entry *except)
1563 struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1565 list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1567 if (ipip_entry != except &&
1568 mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1569 ul_tb_id, ipip_entry)) {
1570 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1578 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1579 struct net_device *ul_dev)
1581 struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1583 list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1585 struct net_device *ipip_ul_dev =
1586 __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1588 if (ipip_ul_dev == ul_dev)
1589 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1593 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1594 struct net_device *ol_dev,
1595 unsigned long event,
1596 struct netdev_notifier_info *info)
1598 struct netdev_notifier_changeupper_info *chup;
1599 struct netlink_ext_ack *extack;
1602 case NETDEV_REGISTER:
1603 return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1604 case NETDEV_UNREGISTER:
1605 mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1608 mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1611 mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1613 case NETDEV_CHANGEUPPER:
1614 chup = container_of(info, typeof(*chup), info);
1615 extack = info->extack;
1616 if (netif_is_l3_master(chup->upper_dev))
1617 return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1622 extack = info->extack;
1623 return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1630 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1631 struct mlxsw_sp_ipip_entry *ipip_entry,
1632 struct net_device *ul_dev,
1633 unsigned long event,
1634 struct netdev_notifier_info *info)
1636 struct netdev_notifier_changeupper_info *chup;
1637 struct netlink_ext_ack *extack;
1640 case NETDEV_CHANGEUPPER:
1641 chup = container_of(info, typeof(*chup), info);
1642 extack = info->extack;
1643 if (netif_is_l3_master(chup->upper_dev))
1644 return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1651 return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1654 return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1662 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1663 struct net_device *ul_dev,
1664 unsigned long event,
1665 struct netdev_notifier_info *info)
1667 struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1670 while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1673 err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1674 ul_dev, event, info);
1676 mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1685 struct mlxsw_sp_neigh_key {
1686 struct neighbour *n;
1689 struct mlxsw_sp_neigh_entry {
1690 struct list_head rif_list_node;
1691 struct rhash_head ht_node;
1692 struct mlxsw_sp_neigh_key key;
1695 unsigned char ha[ETH_ALEN];
1696 struct list_head nexthop_list; /* list of nexthops using
1699 struct list_head nexthop_neighs_list_node;
1700 unsigned int counter_index;
1704 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1705 .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1706 .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1707 .key_len = sizeof(struct mlxsw_sp_neigh_key),
1710 struct mlxsw_sp_neigh_entry *
1711 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1712 struct mlxsw_sp_neigh_entry *neigh_entry)
1715 if (list_empty(&rif->neigh_list))
1718 return list_first_entry(&rif->neigh_list,
1719 typeof(*neigh_entry),
1722 if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1724 return list_next_entry(neigh_entry, rif_list_node);
1727 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1729 return neigh_entry->key.n->tbl->family;
1733 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1735 return neigh_entry->ha;
1738 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1740 struct neighbour *n;
1742 n = neigh_entry->key.n;
1743 return ntohl(*((__be32 *) n->primary_key));
1747 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1749 struct neighbour *n;
1751 n = neigh_entry->key.n;
1752 return (struct in6_addr *) &n->primary_key;
1755 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1756 struct mlxsw_sp_neigh_entry *neigh_entry,
1759 if (!neigh_entry->counter_valid)
1762 return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1766 static struct mlxsw_sp_neigh_entry *
1767 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1770 struct mlxsw_sp_neigh_entry *neigh_entry;
1772 neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1776 neigh_entry->key.n = n;
1777 neigh_entry->rif = rif;
1778 INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1783 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1789 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
1790 struct mlxsw_sp_neigh_entry *neigh_entry)
1792 return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
1793 &neigh_entry->ht_node,
1794 mlxsw_sp_neigh_ht_params);
1798 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
1799 struct mlxsw_sp_neigh_entry *neigh_entry)
1801 rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
1802 &neigh_entry->ht_node,
1803 mlxsw_sp_neigh_ht_params);
1807 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
1808 struct mlxsw_sp_neigh_entry *neigh_entry)
1810 struct devlink *devlink;
1811 const char *table_name;
1813 switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
1815 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
1818 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
1825 devlink = priv_to_devlink(mlxsw_sp->core);
1826 return devlink_dpipe_table_counter_enabled(devlink, table_name);
1830 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
1831 struct mlxsw_sp_neigh_entry *neigh_entry)
1833 if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
1836 if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
1839 neigh_entry->counter_valid = true;
1843 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
1844 struct mlxsw_sp_neigh_entry *neigh_entry)
1846 if (!neigh_entry->counter_valid)
1848 mlxsw_sp_flow_counter_free(mlxsw_sp,
1849 neigh_entry->counter_index);
1850 neigh_entry->counter_valid = false;
1853 static struct mlxsw_sp_neigh_entry *
1854 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1856 struct mlxsw_sp_neigh_entry *neigh_entry;
1857 struct mlxsw_sp_rif *rif;
1860 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
1862 return ERR_PTR(-EINVAL);
1864 neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
1866 return ERR_PTR(-ENOMEM);
1868 err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
1870 goto err_neigh_entry_insert;
1872 mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
1873 list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
1877 err_neigh_entry_insert:
1878 mlxsw_sp_neigh_entry_free(neigh_entry);
1879 return ERR_PTR(err);
1883 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1884 struct mlxsw_sp_neigh_entry *neigh_entry)
1886 list_del(&neigh_entry->rif_list_node);
1887 mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
1888 mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
1889 mlxsw_sp_neigh_entry_free(neigh_entry);
1892 static struct mlxsw_sp_neigh_entry *
1893 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1895 struct mlxsw_sp_neigh_key key;
1898 return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
1899 &key, mlxsw_sp_neigh_ht_params);
1903 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
1905 unsigned long interval;
1907 #if IS_ENABLED(CONFIG_IPV6)
1908 interval = min_t(unsigned long,
1909 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
1910 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
1912 interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
1914 mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
1917 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
1921 struct net_device *dev;
1922 struct neighbour *n;
1927 mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
1929 if (!mlxsw_sp->router->rifs[rif]) {
1930 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
1935 dev = mlxsw_sp->router->rifs[rif]->dev;
1936 n = neigh_lookup(&arp_tbl, &dipn, dev);
1938 netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
1943 netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
1944 neigh_event_send(n, NULL);
1948 #if IS_ENABLED(CONFIG_IPV6)
1949 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
1953 struct net_device *dev;
1954 struct neighbour *n;
1955 struct in6_addr dip;
1958 mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
1961 if (!mlxsw_sp->router->rifs[rif]) {
1962 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
1966 dev = mlxsw_sp->router->rifs[rif]->dev;
1967 n = neigh_lookup(&nd_tbl, &dip, dev);
1969 netdev_err(dev, "Failed to find matching neighbour for IP=%pI6c\n",
1974 netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
1975 neigh_event_send(n, NULL);
1979 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
1986 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
1993 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
1995 /* Hardware starts counting at 0, so add 1. */
1998 /* Each record consists of several neighbour entries. */
1999 for (i = 0; i < num_entries; i++) {
2002 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2003 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2009 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2013 /* One record contains one entry. */
2014 mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2018 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2019 char *rauhtd_pl, int rec_index)
2021 switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2022 case MLXSW_REG_RAUHTD_TYPE_IPV4:
2023 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2026 case MLXSW_REG_RAUHTD_TYPE_IPV6:
2027 mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2033 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2035 u8 num_rec, last_rec_index, num_entries;
2037 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2038 last_rec_index = num_rec - 1;
2040 if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2042 if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2043 MLXSW_REG_RAUHTD_TYPE_IPV6)
2046 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2048 if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2054 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2056 enum mlxsw_reg_rauhtd_type type)
2061 /* Make sure the neighbour's netdev isn't removed in the
2066 mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2067 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2070 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2073 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2074 for (i = 0; i < num_rec; i++)
2075 mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2077 } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2083 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2085 enum mlxsw_reg_rauhtd_type type;
2089 rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2093 type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2094 err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2098 type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2099 err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2105 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2107 struct mlxsw_sp_neigh_entry *neigh_entry;
2109 /* Take RTNL mutex here to prevent lists from changes */
2111 list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2112 nexthop_neighs_list_node)
2113 /* If this neigh have nexthops, make the kernel think this neigh
2114 * is active regardless of the traffic.
2116 neigh_event_send(neigh_entry->key.n, NULL);
2121 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2123 unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2125 mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2126 msecs_to_jiffies(interval));
2129 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2131 struct mlxsw_sp_router *router;
2134 router = container_of(work, struct mlxsw_sp_router,
2135 neighs_update.dw.work);
2136 err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2138 dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2140 mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2142 mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2145 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2147 struct mlxsw_sp_neigh_entry *neigh_entry;
2148 struct mlxsw_sp_router *router;
2150 router = container_of(work, struct mlxsw_sp_router,
2151 nexthop_probe_dw.work);
2152 /* Iterate over nexthop neighbours, find those who are unresolved and
2153 * send arp on them. This solves the chicken-egg problem when
2154 * the nexthop wouldn't get offloaded until the neighbor is resolved
2155 * but it wouldn't get resolved ever in case traffic is flowing in HW
2156 * using different nexthop.
2158 * Take RTNL mutex here to prevent lists from changes.
2161 list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2162 nexthop_neighs_list_node)
2163 if (!neigh_entry->connected)
2164 neigh_event_send(neigh_entry->key.n, NULL);
2167 mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2168 MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2172 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2173 struct mlxsw_sp_neigh_entry *neigh_entry,
2176 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2178 return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2179 MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2183 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2184 struct mlxsw_sp_neigh_entry *neigh_entry,
2185 enum mlxsw_reg_rauht_op op)
2187 struct neighbour *n = neigh_entry->key.n;
2188 u32 dip = ntohl(*((__be32 *) n->primary_key));
2189 char rauht_pl[MLXSW_REG_RAUHT_LEN];
2191 mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2193 if (neigh_entry->counter_valid)
2194 mlxsw_reg_rauht_pack_counter(rauht_pl,
2195 neigh_entry->counter_index);
2196 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2200 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2201 struct mlxsw_sp_neigh_entry *neigh_entry,
2202 enum mlxsw_reg_rauht_op op)
2204 struct neighbour *n = neigh_entry->key.n;
2205 char rauht_pl[MLXSW_REG_RAUHT_LEN];
2206 const char *dip = n->primary_key;
2208 mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2210 if (neigh_entry->counter_valid)
2211 mlxsw_reg_rauht_pack_counter(rauht_pl,
2212 neigh_entry->counter_index);
2213 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2216 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2218 struct neighbour *n = neigh_entry->key.n;
2220 /* Packets with a link-local destination address are trapped
2221 * after LPM lookup and never reach the neighbour table, so
2222 * there is no need to program such neighbours to the device.
2224 if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2225 IPV6_ADDR_LINKLOCAL)
2231 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2232 struct mlxsw_sp_neigh_entry *neigh_entry,
2235 if (!adding && !neigh_entry->connected)
2237 neigh_entry->connected = adding;
2238 if (neigh_entry->key.n->tbl->family == AF_INET) {
2239 mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2240 mlxsw_sp_rauht_op(adding));
2241 } else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2242 if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2244 mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2245 mlxsw_sp_rauht_op(adding));
2252 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2253 struct mlxsw_sp_neigh_entry *neigh_entry,
2257 mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2259 mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2260 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2263 struct mlxsw_sp_netevent_work {
2264 struct work_struct work;
2265 struct mlxsw_sp *mlxsw_sp;
2266 struct neighbour *n;
2269 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2271 struct mlxsw_sp_netevent_work *net_work =
2272 container_of(work, struct mlxsw_sp_netevent_work, work);
2273 struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2274 struct mlxsw_sp_neigh_entry *neigh_entry;
2275 struct neighbour *n = net_work->n;
2276 unsigned char ha[ETH_ALEN];
2277 bool entry_connected;
2280 /* If these parameters are changed after we release the lock,
2281 * then we are guaranteed to receive another event letting us
2284 read_lock_bh(&n->lock);
2285 memcpy(ha, n->ha, ETH_ALEN);
2286 nud_state = n->nud_state;
2288 read_unlock_bh(&n->lock);
2291 entry_connected = nud_state & NUD_VALID && !dead;
2292 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2293 if (!entry_connected && !neigh_entry)
2296 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2297 if (IS_ERR(neigh_entry))
2301 memcpy(neigh_entry->ha, ha, ETH_ALEN);
2302 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2303 mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
2305 if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2306 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2314 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2316 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2318 struct mlxsw_sp_netevent_work *net_work =
2319 container_of(work, struct mlxsw_sp_netevent_work, work);
2320 struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2322 mlxsw_sp_mp_hash_init(mlxsw_sp);
2326 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2327 unsigned long event, void *ptr)
2329 struct mlxsw_sp_netevent_work *net_work;
2330 struct mlxsw_sp_port *mlxsw_sp_port;
2331 struct mlxsw_sp_router *router;
2332 struct mlxsw_sp *mlxsw_sp;
2333 unsigned long interval;
2334 struct neigh_parms *p;
2335 struct neighbour *n;
2339 case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2342 /* We don't care about changes in the default table. */
2343 if (!p->dev || (p->tbl->family != AF_INET &&
2344 p->tbl->family != AF_INET6))
2347 /* We are in atomic context and can't take RTNL mutex,
2348 * so use RCU variant to walk the device chain.
2350 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2354 mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2355 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2356 mlxsw_sp->router->neighs_update.interval = interval;
2358 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2360 case NETEVENT_NEIGH_UPDATE:
2363 if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2366 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2370 net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2372 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2376 INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2377 net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2380 /* Take a reference to ensure the neighbour won't be
2381 * destructed until we drop the reference in delayed
2385 mlxsw_core_schedule_work(&net_work->work);
2386 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2388 case NETEVENT_MULTIPATH_HASH_UPDATE:
2391 if (!net_eq(net, &init_net))
2394 net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2398 router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2399 INIT_WORK(&net_work->work, mlxsw_sp_router_mp_hash_event_work);
2400 net_work->mlxsw_sp = router->mlxsw_sp;
2401 mlxsw_core_schedule_work(&net_work->work);
2408 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2412 err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2413 &mlxsw_sp_neigh_ht_params);
2417 /* Initialize the polling interval according to the default
2420 mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2422 /* Create the delayed works for the activity_update */
2423 INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2424 mlxsw_sp_router_neighs_update_work);
2425 INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2426 mlxsw_sp_router_probe_unresolved_nexthops);
2427 mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2428 mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2432 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2434 cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2435 cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2436 rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2439 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2440 struct mlxsw_sp_rif *rif)
2442 struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2444 list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2446 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2447 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2451 enum mlxsw_sp_nexthop_type {
2452 MLXSW_SP_NEXTHOP_TYPE_ETH,
2453 MLXSW_SP_NEXTHOP_TYPE_IPIP,
2456 struct mlxsw_sp_nexthop_key {
2457 struct fib_nh *fib_nh;
2460 struct mlxsw_sp_nexthop {
2461 struct list_head neigh_list_node; /* member of neigh entry list */
2462 struct list_head rif_list_node;
2463 struct list_head router_list_node;
2464 struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2467 struct rhash_head ht_node;
2468 struct mlxsw_sp_nexthop_key key;
2469 unsigned char gw_addr[sizeof(struct in6_addr)];
2473 int num_adj_entries;
2474 struct mlxsw_sp_rif *rif;
2475 u8 should_offload:1, /* set indicates this neigh is connected and
2476 * should be put to KVD linear area of this group.
2478 offloaded:1, /* set in case the neigh is actually put into
2479 * KVD linear area of this group.
2481 update:1; /* set indicates that MAC of this neigh should be
2484 enum mlxsw_sp_nexthop_type type;
2486 struct mlxsw_sp_neigh_entry *neigh_entry;
2487 struct mlxsw_sp_ipip_entry *ipip_entry;
2489 unsigned int counter_index;
2493 struct mlxsw_sp_nexthop_group {
2495 struct rhash_head ht_node;
2496 struct list_head fib_list; /* list of fib entries that use this group */
2497 struct neigh_table *neigh_tbl;
2498 u8 adj_index_valid:1,
2499 gateway:1; /* routes using the group use a gateway */
2503 int sum_norm_weight;
2504 struct mlxsw_sp_nexthop nexthops[0];
2505 #define nh_rif nexthops[0].rif
2508 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2509 struct mlxsw_sp_nexthop *nh)
2511 struct devlink *devlink;
2513 devlink = priv_to_devlink(mlxsw_sp->core);
2514 if (!devlink_dpipe_table_counter_enabled(devlink,
2515 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2518 if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2521 nh->counter_valid = true;
2524 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2525 struct mlxsw_sp_nexthop *nh)
2527 if (!nh->counter_valid)
2529 mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2530 nh->counter_valid = false;
2533 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2534 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2536 if (!nh->counter_valid)
2539 return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2543 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2544 struct mlxsw_sp_nexthop *nh)
2547 if (list_empty(&router->nexthop_list))
2550 return list_first_entry(&router->nexthop_list,
2551 typeof(*nh), router_list_node);
2553 if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2555 return list_next_entry(nh, router_list_node);
2558 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2560 return nh->offloaded;
2563 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2567 return nh->neigh_entry->ha;
2570 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2571 u32 *p_adj_size, u32 *p_adj_hash_index)
2573 struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2574 u32 adj_hash_index = 0;
2577 if (!nh->offloaded || !nh_grp->adj_index_valid)
2580 *p_adj_index = nh_grp->adj_index;
2581 *p_adj_size = nh_grp->ecmp_size;
2583 for (i = 0; i < nh_grp->count; i++) {
2584 struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2588 if (nh_iter->offloaded)
2589 adj_hash_index += nh_iter->num_adj_entries;
2592 *p_adj_hash_index = adj_hash_index;
2596 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2601 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2603 struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2606 for (i = 0; i < nh_grp->count; i++) {
2607 struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2609 if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2615 static struct fib_info *
2616 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2618 return nh_grp->priv;
2621 struct mlxsw_sp_nexthop_group_cmp_arg {
2622 enum mlxsw_sp_l3proto proto;
2624 struct fib_info *fi;
2625 struct mlxsw_sp_fib6_entry *fib6_entry;
2630 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2631 const struct in6_addr *gw, int ifindex)
2635 for (i = 0; i < nh_grp->count; i++) {
2636 const struct mlxsw_sp_nexthop *nh;
2638 nh = &nh_grp->nexthops[i];
2639 if (nh->ifindex == ifindex &&
2640 ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2648 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2649 const struct mlxsw_sp_fib6_entry *fib6_entry)
2651 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2653 if (nh_grp->count != fib6_entry->nrt6)
2656 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2657 struct in6_addr *gw;
2660 ifindex = mlxsw_sp_rt6->rt->dst.dev->ifindex;
2661 gw = &mlxsw_sp_rt6->rt->rt6i_gateway;
2662 if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex))
2670 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2672 const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2673 const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2675 switch (cmp_arg->proto) {
2676 case MLXSW_SP_L3_PROTO_IPV4:
2677 return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2678 case MLXSW_SP_L3_PROTO_IPV6:
2679 return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2680 cmp_arg->fib6_entry);
2688 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2690 return nh_grp->neigh_tbl->family;
2693 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2695 const struct mlxsw_sp_nexthop_group *nh_grp = data;
2696 const struct mlxsw_sp_nexthop *nh;
2697 struct fib_info *fi;
2701 switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2703 fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2704 return jhash(&fi, sizeof(fi), seed);
2706 val = nh_grp->count;
2707 for (i = 0; i < nh_grp->count; i++) {
2708 nh = &nh_grp->nexthops[i];
2711 return jhash(&val, sizeof(val), seed);
2719 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2721 unsigned int val = fib6_entry->nrt6;
2722 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2723 struct net_device *dev;
2725 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2726 dev = mlxsw_sp_rt6->rt->dst.dev;
2727 val ^= dev->ifindex;
2730 return jhash(&val, sizeof(val), seed);
2734 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2736 const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2738 switch (cmp_arg->proto) {
2739 case MLXSW_SP_L3_PROTO_IPV4:
2740 return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2741 case MLXSW_SP_L3_PROTO_IPV6:
2742 return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2749 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
2750 .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
2751 .hashfn = mlxsw_sp_nexthop_group_hash,
2752 .obj_hashfn = mlxsw_sp_nexthop_group_hash_obj,
2753 .obj_cmpfn = mlxsw_sp_nexthop_group_cmp,
2756 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
2757 struct mlxsw_sp_nexthop_group *nh_grp)
2759 if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2763 return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
2765 mlxsw_sp_nexthop_group_ht_params);
2768 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
2769 struct mlxsw_sp_nexthop_group *nh_grp)
2771 if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2775 rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
2777 mlxsw_sp_nexthop_group_ht_params);
2780 static struct mlxsw_sp_nexthop_group *
2781 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
2782 struct fib_info *fi)
2784 struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2786 cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
2788 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2790 mlxsw_sp_nexthop_group_ht_params);
2793 static struct mlxsw_sp_nexthop_group *
2794 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
2795 struct mlxsw_sp_fib6_entry *fib6_entry)
2797 struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2799 cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
2800 cmp_arg.fib6_entry = fib6_entry;
2801 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2803 mlxsw_sp_nexthop_group_ht_params);
2806 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
2807 .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
2808 .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
2809 .key_len = sizeof(struct mlxsw_sp_nexthop_key),
2812 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
2813 struct mlxsw_sp_nexthop *nh)
2815 return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
2816 &nh->ht_node, mlxsw_sp_nexthop_ht_params);
2819 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
2820 struct mlxsw_sp_nexthop *nh)
2822 rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
2823 mlxsw_sp_nexthop_ht_params);
2826 static struct mlxsw_sp_nexthop *
2827 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
2828 struct mlxsw_sp_nexthop_key key)
2830 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
2831 mlxsw_sp_nexthop_ht_params);
2834 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
2835 const struct mlxsw_sp_fib *fib,
2836 u32 adj_index, u16 ecmp_size,
2840 char raleu_pl[MLXSW_REG_RALEU_LEN];
2842 mlxsw_reg_raleu_pack(raleu_pl,
2843 (enum mlxsw_reg_ralxx_protocol) fib->proto,
2844 fib->vr->id, adj_index, ecmp_size, new_adj_index,
2846 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
2849 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
2850 struct mlxsw_sp_nexthop_group *nh_grp,
2851 u32 old_adj_index, u16 old_ecmp_size)
2853 struct mlxsw_sp_fib_entry *fib_entry;
2854 struct mlxsw_sp_fib *fib = NULL;
2857 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
2858 if (fib == fib_entry->fib_node->fib)
2860 fib = fib_entry->fib_node->fib;
2861 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
2872 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2873 struct mlxsw_sp_nexthop *nh)
2875 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
2876 char ratr_pl[MLXSW_REG_RATR_LEN];
2878 mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
2879 true, MLXSW_REG_RATR_TYPE_ETHERNET,
2880 adj_index, neigh_entry->rif);
2881 mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
2882 if (nh->counter_valid)
2883 mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
2885 mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
2887 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
2890 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2891 struct mlxsw_sp_nexthop *nh)
2895 for (i = 0; i < nh->num_adj_entries; i++) {
2898 err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
2906 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
2908 struct mlxsw_sp_nexthop *nh)
2910 const struct mlxsw_sp_ipip_ops *ipip_ops;
2912 ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
2913 return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
2916 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
2918 struct mlxsw_sp_nexthop *nh)
2922 for (i = 0; i < nh->num_adj_entries; i++) {
2925 err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
2935 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
2936 struct mlxsw_sp_nexthop_group *nh_grp,
2939 u32 adj_index = nh_grp->adj_index; /* base */
2940 struct mlxsw_sp_nexthop *nh;
2944 for (i = 0; i < nh_grp->count; i++) {
2945 nh = &nh_grp->nexthops[i];
2947 if (!nh->should_offload) {
2952 if (nh->update || reallocate) {
2954 case MLXSW_SP_NEXTHOP_TYPE_ETH:
2955 err = mlxsw_sp_nexthop_update
2956 (mlxsw_sp, adj_index, nh);
2958 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
2959 err = mlxsw_sp_nexthop_ipip_update
2960 (mlxsw_sp, adj_index, nh);
2968 adj_index += nh->num_adj_entries;
2974 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
2975 const struct mlxsw_sp_fib_entry *fib_entry);
2978 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
2979 struct mlxsw_sp_nexthop_group *nh_grp)
2981 struct mlxsw_sp_fib_entry *fib_entry;
2984 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
2985 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
2988 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2996 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
2997 enum mlxsw_reg_ralue_op op, int err);
3000 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
3002 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
3003 struct mlxsw_sp_fib_entry *fib_entry;
3005 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3006 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3009 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
3013 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3015 /* Valid sizes for an adjacency group are:
3016 * 1-64, 512, 1024, 2048 and 4096.
3018 if (*p_adj_grp_size <= 64)
3020 else if (*p_adj_grp_size <= 512)
3021 *p_adj_grp_size = 512;
3022 else if (*p_adj_grp_size <= 1024)
3023 *p_adj_grp_size = 1024;
3024 else if (*p_adj_grp_size <= 2048)
3025 *p_adj_grp_size = 2048;
3027 *p_adj_grp_size = 4096;
3030 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3031 unsigned int alloc_size)
3033 if (alloc_size >= 4096)
3034 *p_adj_grp_size = 4096;
3035 else if (alloc_size >= 2048)
3036 *p_adj_grp_size = 2048;
3037 else if (alloc_size >= 1024)
3038 *p_adj_grp_size = 1024;
3039 else if (alloc_size >= 512)
3040 *p_adj_grp_size = 512;
3043 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3044 u16 *p_adj_grp_size)
3046 unsigned int alloc_size;
3049 /* Round up the requested group size to the next size supported
3050 * by the device and make sure the request can be satisfied.
3052 mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3053 err = mlxsw_sp_kvdl_alloc_size_query(mlxsw_sp, *p_adj_grp_size,
3057 /* It is possible the allocation results in more allocated
3058 * entries than requested. Try to use as much of them as
3061 mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3067 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3069 int i, g = 0, sum_norm_weight = 0;
3070 struct mlxsw_sp_nexthop *nh;
3072 for (i = 0; i < nh_grp->count; i++) {
3073 nh = &nh_grp->nexthops[i];
3075 if (!nh->should_offload)
3078 g = gcd(nh->nh_weight, g);
3083 for (i = 0; i < nh_grp->count; i++) {
3084 nh = &nh_grp->nexthops[i];
3086 if (!nh->should_offload)
3088 nh->norm_nh_weight = nh->nh_weight / g;
3089 sum_norm_weight += nh->norm_nh_weight;
3092 nh_grp->sum_norm_weight = sum_norm_weight;
3096 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3098 int total = nh_grp->sum_norm_weight;
3099 u16 ecmp_size = nh_grp->ecmp_size;
3100 int i, weight = 0, lower_bound = 0;
3102 for (i = 0; i < nh_grp->count; i++) {
3103 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3106 if (!nh->should_offload)
3108 weight += nh->norm_nh_weight;
3109 upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3110 nh->num_adj_entries = upper_bound - lower_bound;
3111 lower_bound = upper_bound;
3116 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3117 struct mlxsw_sp_nexthop_group *nh_grp)
3119 u16 ecmp_size, old_ecmp_size;
3120 struct mlxsw_sp_nexthop *nh;
3121 bool offload_change = false;
3123 bool old_adj_index_valid;
3128 if (!nh_grp->gateway) {
3129 mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3133 for (i = 0; i < nh_grp->count; i++) {
3134 nh = &nh_grp->nexthops[i];
3136 if (nh->should_offload != nh->offloaded) {
3137 offload_change = true;
3138 if (nh->should_offload)
3142 if (!offload_change) {
3143 /* Nothing was added or removed, so no need to reallocate. Just
3144 * update MAC on existing adjacency indexes.
3146 err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3148 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3153 mlxsw_sp_nexthop_group_normalize(nh_grp);
3154 if (!nh_grp->sum_norm_weight)
3155 /* No neigh of this group is connected so we just set
3156 * the trap and let everthing flow through kernel.
3160 ecmp_size = nh_grp->sum_norm_weight;
3161 err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3163 /* No valid allocation size available. */
3166 err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index);
3168 /* We ran out of KVD linear space, just set the
3169 * trap and let everything flow through kernel.
3171 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3174 old_adj_index_valid = nh_grp->adj_index_valid;
3175 old_adj_index = nh_grp->adj_index;
3176 old_ecmp_size = nh_grp->ecmp_size;
3177 nh_grp->adj_index_valid = 1;
3178 nh_grp->adj_index = adj_index;
3179 nh_grp->ecmp_size = ecmp_size;
3180 mlxsw_sp_nexthop_group_rebalance(nh_grp);
3181 err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3183 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3187 if (!old_adj_index_valid) {
3188 /* The trap was set for fib entries, so we have to call
3189 * fib entry update to unset it and use adjacency index.
3191 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3193 dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3199 err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3200 old_adj_index, old_ecmp_size);
3201 mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
3203 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3207 /* Offload state within the group changed, so update the flags. */
3208 mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
3213 old_adj_index_valid = nh_grp->adj_index_valid;
3214 nh_grp->adj_index_valid = 0;
3215 for (i = 0; i < nh_grp->count; i++) {
3216 nh = &nh_grp->nexthops[i];
3219 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3221 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3222 if (old_adj_index_valid)
3223 mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
3226 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3230 nh->should_offload = 1;
3231 else if (nh->offloaded)
3232 nh->should_offload = 0;
3237 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3238 struct mlxsw_sp_neigh_entry *neigh_entry,
3241 struct mlxsw_sp_nexthop *nh;
3243 list_for_each_entry(nh, &neigh_entry->nexthop_list,
3245 __mlxsw_sp_nexthop_neigh_update(nh, removing);
3246 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3250 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3251 struct mlxsw_sp_rif *rif)
3257 list_add(&nh->rif_list_node, &rif->nexthop_list);
3260 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3265 list_del(&nh->rif_list_node);
3269 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3270 struct mlxsw_sp_nexthop *nh)
3272 struct mlxsw_sp_neigh_entry *neigh_entry;
3273 struct neighbour *n;
3277 if (!nh->nh_grp->gateway || nh->neigh_entry)
3280 /* Take a reference of neigh here ensuring that neigh would
3281 * not be destructed before the nexthop entry is finished.
3282 * The reference is taken either in neigh_lookup() or
3283 * in neigh_create() in case n is not found.
3285 n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3287 n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3291 neigh_event_send(n, NULL);
3293 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3295 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3296 if (IS_ERR(neigh_entry)) {
3298 goto err_neigh_entry_create;
3302 /* If that is the first nexthop connected to that neigh, add to
3303 * nexthop_neighs_list
3305 if (list_empty(&neigh_entry->nexthop_list))
3306 list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3307 &mlxsw_sp->router->nexthop_neighs_list);
3309 nh->neigh_entry = neigh_entry;
3310 list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3311 read_lock_bh(&n->lock);
3312 nud_state = n->nud_state;
3314 read_unlock_bh(&n->lock);
3315 __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3319 err_neigh_entry_create:
3324 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3325 struct mlxsw_sp_nexthop *nh)
3327 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3328 struct neighbour *n;
3332 n = neigh_entry->key.n;
3334 __mlxsw_sp_nexthop_neigh_update(nh, true);
3335 list_del(&nh->neigh_list_node);
3336 nh->neigh_entry = NULL;
3338 /* If that is the last nexthop connected to that neigh, remove from
3339 * nexthop_neighs_list
3341 if (list_empty(&neigh_entry->nexthop_list))
3342 list_del(&neigh_entry->nexthop_neighs_list_node);
3344 if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3345 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3350 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3352 struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3354 return ul_dev ? (ul_dev->flags & IFF_UP) : true;
3357 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3358 struct mlxsw_sp_nexthop *nh,
3359 struct mlxsw_sp_ipip_entry *ipip_entry)
3363 if (!nh->nh_grp->gateway || nh->ipip_entry)
3366 nh->ipip_entry = ipip_entry;
3367 removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
3368 __mlxsw_sp_nexthop_neigh_update(nh, removing);
3369 mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
3372 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3373 struct mlxsw_sp_nexthop *nh)
3375 struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3380 __mlxsw_sp_nexthop_neigh_update(nh, true);
3381 nh->ipip_entry = NULL;
3384 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3385 const struct fib_nh *fib_nh,
3386 enum mlxsw_sp_ipip_type *p_ipipt)
3388 struct net_device *dev = fib_nh->nh_dev;
3391 fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3392 mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3395 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3396 struct mlxsw_sp_nexthop *nh)
3399 case MLXSW_SP_NEXTHOP_TYPE_ETH:
3400 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3401 mlxsw_sp_nexthop_rif_fini(nh);
3403 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3404 mlxsw_sp_nexthop_rif_fini(nh);
3405 mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3410 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3411 struct mlxsw_sp_nexthop *nh,
3412 struct fib_nh *fib_nh)
3414 const struct mlxsw_sp_ipip_ops *ipip_ops;
3415 struct net_device *dev = fib_nh->nh_dev;
3416 struct mlxsw_sp_ipip_entry *ipip_entry;
3417 struct mlxsw_sp_rif *rif;
3420 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
3422 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3423 if (ipip_ops->can_offload(mlxsw_sp, dev,
3424 MLXSW_SP_L3_PROTO_IPV4)) {
3425 nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3426 mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
3431 nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3432 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3436 mlxsw_sp_nexthop_rif_init(nh, rif);
3437 err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3439 goto err_neigh_init;
3444 mlxsw_sp_nexthop_rif_fini(nh);
3448 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3449 struct mlxsw_sp_nexthop *nh)
3451 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3454 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3455 struct mlxsw_sp_nexthop_group *nh_grp,
3456 struct mlxsw_sp_nexthop *nh,
3457 struct fib_nh *fib_nh)
3459 struct net_device *dev = fib_nh->nh_dev;
3460 struct in_device *in_dev;
3463 nh->nh_grp = nh_grp;
3464 nh->key.fib_nh = fib_nh;
3465 #ifdef CONFIG_IP_ROUTE_MULTIPATH
3466 nh->nh_weight = fib_nh->nh_weight;
3470 memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
3471 err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3475 mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3476 list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3481 in_dev = __in_dev_get_rtnl(dev);
3482 if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3483 fib_nh->nh_flags & RTNH_F_LINKDOWN)
3486 err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3488 goto err_nexthop_neigh_init;
3492 err_nexthop_neigh_init:
3493 mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3497 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3498 struct mlxsw_sp_nexthop *nh)
3500 mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3501 list_del(&nh->router_list_node);
3502 mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3503 mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3506 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3507 unsigned long event, struct fib_nh *fib_nh)
3509 struct mlxsw_sp_nexthop_key key;
3510 struct mlxsw_sp_nexthop *nh;
3512 if (mlxsw_sp->router->aborted)
3515 key.fib_nh = fib_nh;
3516 nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3517 if (WARN_ON_ONCE(!nh))
3521 case FIB_EVENT_NH_ADD:
3522 mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3524 case FIB_EVENT_NH_DEL:
3525 mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3529 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3532 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3533 struct mlxsw_sp_rif *rif)
3535 struct mlxsw_sp_nexthop *nh;
3538 list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3540 case MLXSW_SP_NEXTHOP_TYPE_ETH:
3543 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3544 removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3551 __mlxsw_sp_nexthop_neigh_update(nh, removing);
3552 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3556 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
3557 struct mlxsw_sp_rif *old_rif,
3558 struct mlxsw_sp_rif *new_rif)
3560 struct mlxsw_sp_nexthop *nh;
3562 list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
3563 list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
3565 mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
3568 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3569 struct mlxsw_sp_rif *rif)
3571 struct mlxsw_sp_nexthop *nh, *tmp;
3573 list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3574 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3575 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3579 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3580 const struct fib_info *fi)
3582 return fi->fib_nh->nh_scope == RT_SCOPE_LINK ||
3583 mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
3586 static struct mlxsw_sp_nexthop_group *
3587 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3589 struct mlxsw_sp_nexthop_group *nh_grp;
3590 struct mlxsw_sp_nexthop *nh;
3591 struct fib_nh *fib_nh;
3596 alloc_size = sizeof(*nh_grp) +
3597 fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
3598 nh_grp = kzalloc(alloc_size, GFP_KERNEL);
3600 return ERR_PTR(-ENOMEM);
3602 INIT_LIST_HEAD(&nh_grp->fib_list);
3603 nh_grp->neigh_tbl = &arp_tbl;
3605 nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3606 nh_grp->count = fi->fib_nhs;
3608 for (i = 0; i < nh_grp->count; i++) {
3609 nh = &nh_grp->nexthops[i];
3610 fib_nh = &fi->fib_nh[i];
3611 err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3613 goto err_nexthop4_init;
3615 err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3617 goto err_nexthop_group_insert;
3618 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3621 err_nexthop_group_insert:
3623 for (i--; i >= 0; i--) {
3624 nh = &nh_grp->nexthops[i];
3625 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3629 return ERR_PTR(err);
3633 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3634 struct mlxsw_sp_nexthop_group *nh_grp)
3636 struct mlxsw_sp_nexthop *nh;
3639 mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3640 for (i = 0; i < nh_grp->count; i++) {
3641 nh = &nh_grp->nexthops[i];
3642 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3644 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3645 WARN_ON_ONCE(nh_grp->adj_index_valid);
3646 fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
3650 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
3651 struct mlxsw_sp_fib_entry *fib_entry,
3652 struct fib_info *fi)
3654 struct mlxsw_sp_nexthop_group *nh_grp;
3656 nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
3658 nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
3660 return PTR_ERR(nh_grp);
3662 list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
3663 fib_entry->nh_group = nh_grp;
3667 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
3668 struct mlxsw_sp_fib_entry *fib_entry)
3670 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3672 list_del(&fib_entry->nexthop_group_node);
3673 if (!list_empty(&nh_grp->fib_list))
3675 mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
3679 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3681 struct mlxsw_sp_fib4_entry *fib4_entry;
3683 fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
3685 return !fib4_entry->tos;
3689 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3691 struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
3693 switch (fib_entry->fib_node->fib->proto) {
3694 case MLXSW_SP_L3_PROTO_IPV4:
3695 if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
3698 case MLXSW_SP_L3_PROTO_IPV6:
3702 switch (fib_entry->type) {
3703 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3704 return !!nh_group->adj_index_valid;
3705 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3706 return !!nh_group->nh_rif;
3707 case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3714 static struct mlxsw_sp_nexthop *
3715 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3716 const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
3720 for (i = 0; i < nh_grp->count; i++) {
3721 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3722 struct rt6_info *rt = mlxsw_sp_rt6->rt;
3724 if (nh->rif && nh->rif->dev == rt->dst.dev &&
3725 ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
3735 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3737 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3740 if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
3741 fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) {
3742 nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3746 for (i = 0; i < nh_grp->count; i++) {
3747 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3750 nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3752 nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3757 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3759 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3762 for (i = 0; i < nh_grp->count; i++) {
3763 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3765 nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3770 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3772 struct mlxsw_sp_fib6_entry *fib6_entry;
3773 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3775 fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3778 if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
3779 list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
3780 list)->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
3784 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3785 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3786 struct mlxsw_sp_nexthop *nh;
3788 nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3789 if (nh && nh->offloaded)
3790 mlxsw_sp_rt6->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
3792 mlxsw_sp_rt6->rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
3797 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3799 struct mlxsw_sp_fib6_entry *fib6_entry;
3800 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3802 fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3804 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3805 struct rt6_info *rt = mlxsw_sp_rt6->rt;
3807 rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
3811 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3813 switch (fib_entry->fib_node->fib->proto) {
3814 case MLXSW_SP_L3_PROTO_IPV4:
3815 mlxsw_sp_fib4_entry_offload_set(fib_entry);
3817 case MLXSW_SP_L3_PROTO_IPV6:
3818 mlxsw_sp_fib6_entry_offload_set(fib_entry);
3824 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3826 switch (fib_entry->fib_node->fib->proto) {
3827 case MLXSW_SP_L3_PROTO_IPV4:
3828 mlxsw_sp_fib4_entry_offload_unset(fib_entry);
3830 case MLXSW_SP_L3_PROTO_IPV6:
3831 mlxsw_sp_fib6_entry_offload_unset(fib_entry);
3837 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3838 enum mlxsw_reg_ralue_op op, int err)
3841 case MLXSW_REG_RALUE_OP_WRITE_DELETE:
3842 return mlxsw_sp_fib_entry_offload_unset(fib_entry);
3843 case MLXSW_REG_RALUE_OP_WRITE_WRITE:
3846 if (mlxsw_sp_fib_entry_should_offload(fib_entry))
3847 mlxsw_sp_fib_entry_offload_set(fib_entry);
3849 mlxsw_sp_fib_entry_offload_unset(fib_entry);
3857 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
3858 const struct mlxsw_sp_fib_entry *fib_entry,
3859 enum mlxsw_reg_ralue_op op)
3861 struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
3862 enum mlxsw_reg_ralxx_protocol proto;
3865 proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
3867 switch (fib->proto) {
3868 case MLXSW_SP_L3_PROTO_IPV4:
3869 p_dip = (u32 *) fib_entry->fib_node->key.addr;
3870 mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
3871 fib_entry->fib_node->key.prefix_len,
3874 case MLXSW_SP_L3_PROTO_IPV6:
3875 mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
3876 fib_entry->fib_node->key.prefix_len,
3877 fib_entry->fib_node->key.addr);
3882 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
3883 struct mlxsw_sp_fib_entry *fib_entry,
3884 enum mlxsw_reg_ralue_op op)
3886 char ralue_pl[MLXSW_REG_RALUE_LEN];
3887 enum mlxsw_reg_ralue_trap_action trap_action;
3889 u32 adjacency_index = 0;
3892 /* In case the nexthop group adjacency index is valid, use it
3893 * with provided ECMP size. Otherwise, setup trap and pass
3894 * traffic to kernel.
3896 if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
3897 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
3898 adjacency_index = fib_entry->nh_group->adj_index;
3899 ecmp_size = fib_entry->nh_group->ecmp_size;
3901 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
3902 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
3905 mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3906 mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
3907 adjacency_index, ecmp_size);
3908 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3911 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
3912 struct mlxsw_sp_fib_entry *fib_entry,
3913 enum mlxsw_reg_ralue_op op)
3915 struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
3916 enum mlxsw_reg_ralue_trap_action trap_action;
3917 char ralue_pl[MLXSW_REG_RALUE_LEN];
3921 if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
3922 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
3923 rif_index = rif->rif_index;
3925 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
3926 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
3929 mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3930 mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
3932 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3935 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
3936 struct mlxsw_sp_fib_entry *fib_entry,
3937 enum mlxsw_reg_ralue_op op)
3939 char ralue_pl[MLXSW_REG_RALUE_LEN];
3941 mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3942 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
3943 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3947 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
3948 struct mlxsw_sp_fib_entry *fib_entry,
3949 enum mlxsw_reg_ralue_op op)
3951 struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
3952 const struct mlxsw_sp_ipip_ops *ipip_ops;
3954 if (WARN_ON(!ipip_entry))
3957 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3958 return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
3959 fib_entry->decap.tunnel_index);
3962 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
3963 struct mlxsw_sp_fib_entry *fib_entry,
3964 enum mlxsw_reg_ralue_op op)
3966 switch (fib_entry->type) {
3967 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3968 return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
3969 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3970 return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
3971 case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
3972 return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
3973 case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3974 return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
3980 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
3981 struct mlxsw_sp_fib_entry *fib_entry,
3982 enum mlxsw_reg_ralue_op op)
3984 int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
3986 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
3991 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
3992 struct mlxsw_sp_fib_entry *fib_entry)
3994 return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
3995 MLXSW_REG_RALUE_OP_WRITE_WRITE);
3998 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
3999 struct mlxsw_sp_fib_entry *fib_entry)
4001 return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4002 MLXSW_REG_RALUE_OP_WRITE_DELETE);
4006 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4007 const struct fib_entry_notifier_info *fen_info,
4008 struct mlxsw_sp_fib_entry *fib_entry)
4010 union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
4011 struct net_device *dev = fen_info->fi->fib_dev;
4012 struct mlxsw_sp_ipip_entry *ipip_entry;
4013 struct fib_info *fi = fen_info->fi;
4015 switch (fen_info->type) {
4017 ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
4018 MLXSW_SP_L3_PROTO_IPV4, dip);
4019 if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
4020 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4021 return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4027 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4029 case RTN_UNREACHABLE: /* fall through */
4030 case RTN_BLACKHOLE: /* fall through */
4032 /* Packets hitting these routes need to be trapped, but
4033 * can do so with a lower priority than packets directed
4034 * at the host, so use action type local instead of trap.
4036 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4039 if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4040 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4042 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4049 static struct mlxsw_sp_fib4_entry *
4050 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4051 struct mlxsw_sp_fib_node *fib_node,
4052 const struct fib_entry_notifier_info *fen_info)
4054 struct mlxsw_sp_fib4_entry *fib4_entry;
4055 struct mlxsw_sp_fib_entry *fib_entry;
4058 fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4060 return ERR_PTR(-ENOMEM);
4061 fib_entry = &fib4_entry->common;
4063 err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4065 goto err_fib4_entry_type_set;
4067 err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4069 goto err_nexthop4_group_get;
4071 fib4_entry->prio = fen_info->fi->fib_priority;
4072 fib4_entry->tb_id = fen_info->tb_id;
4073 fib4_entry->type = fen_info->type;
4074 fib4_entry->tos = fen_info->tos;
4076 fib_entry->fib_node = fib_node;
4080 err_nexthop4_group_get:
4081 err_fib4_entry_type_set:
4083 return ERR_PTR(err);
4086 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4087 struct mlxsw_sp_fib4_entry *fib4_entry)
4089 mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4093 static struct mlxsw_sp_fib4_entry *
4094 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4095 const struct fib_entry_notifier_info *fen_info)
4097 struct mlxsw_sp_fib4_entry *fib4_entry;
4098 struct mlxsw_sp_fib_node *fib_node;
4099 struct mlxsw_sp_fib *fib;
4100 struct mlxsw_sp_vr *vr;
4102 vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4105 fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4107 fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4108 sizeof(fen_info->dst),
4113 list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4114 if (fib4_entry->tb_id == fen_info->tb_id &&
4115 fib4_entry->tos == fen_info->tos &&
4116 fib4_entry->type == fen_info->type &&
4117 mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4126 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4127 .key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4128 .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4129 .key_len = sizeof(struct mlxsw_sp_fib_key),
4130 .automatic_shrinking = true,
4133 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4134 struct mlxsw_sp_fib_node *fib_node)
4136 return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4137 mlxsw_sp_fib_ht_params);
4140 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4141 struct mlxsw_sp_fib_node *fib_node)
4143 rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4144 mlxsw_sp_fib_ht_params);
4147 static struct mlxsw_sp_fib_node *
4148 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4149 size_t addr_len, unsigned char prefix_len)
4151 struct mlxsw_sp_fib_key key;
4153 memset(&key, 0, sizeof(key));
4154 memcpy(key.addr, addr, addr_len);
4155 key.prefix_len = prefix_len;
4156 return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4159 static struct mlxsw_sp_fib_node *
4160 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4161 size_t addr_len, unsigned char prefix_len)
4163 struct mlxsw_sp_fib_node *fib_node;
4165 fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4169 INIT_LIST_HEAD(&fib_node->entry_list);
4170 list_add(&fib_node->list, &fib->node_list);
4171 memcpy(fib_node->key.addr, addr, addr_len);
4172 fib_node->key.prefix_len = prefix_len;
4177 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4179 list_del(&fib_node->list);
4180 WARN_ON(!list_empty(&fib_node->entry_list));
4185 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
4186 const struct mlxsw_sp_fib_entry *fib_entry)
4188 return list_first_entry(&fib_node->entry_list,
4189 struct mlxsw_sp_fib_entry, list) == fib_entry;
4192 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4193 struct mlxsw_sp_fib *fib,
4194 struct mlxsw_sp_fib_node *fib_node)
4196 struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
4197 struct mlxsw_sp_lpm_tree *lpm_tree;
4200 /* Since the tree is shared between all virtual routers we must
4201 * make sure it contains all the required prefix lengths. This
4202 * can be computed by either adding the new prefix length to the
4203 * existing prefix usage of a bound tree, or by aggregating the
4204 * prefix lengths across all virtual routers and adding the new
4208 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage,
4209 &fib->lpm_tree->prefix_usage);
4211 mlxsw_sp_vrs_prefixes(mlxsw_sp, fib->proto, &req_prefix_usage);
4212 mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4214 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4216 if (IS_ERR(lpm_tree))
4217 return PTR_ERR(lpm_tree);
4219 if (fib->lpm_tree && fib->lpm_tree->id == lpm_tree->id)
4222 err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4229 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4230 struct mlxsw_sp_fib *fib)
4232 if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage))
4234 mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
4235 mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
4236 fib->lpm_tree = NULL;
4239 static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node)
4241 unsigned char prefix_len = fib_node->key.prefix_len;
4242 struct mlxsw_sp_fib *fib = fib_node->fib;
4244 if (fib->prefix_ref_count[prefix_len]++ == 0)
4245 mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
4248 static void mlxsw_sp_fib_node_prefix_dec(struct mlxsw_sp_fib_node *fib_node)
4250 unsigned char prefix_len = fib_node->key.prefix_len;
4251 struct mlxsw_sp_fib *fib = fib_node->fib;
4253 if (--fib->prefix_ref_count[prefix_len] == 0)
4254 mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
4257 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4258 struct mlxsw_sp_fib_node *fib_node,
4259 struct mlxsw_sp_fib *fib)
4263 err = mlxsw_sp_fib_node_insert(fib, fib_node);
4266 fib_node->fib = fib;
4268 err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib, fib_node);
4270 goto err_fib_lpm_tree_link;
4272 mlxsw_sp_fib_node_prefix_inc(fib_node);
4276 err_fib_lpm_tree_link:
4277 fib_node->fib = NULL;
4278 mlxsw_sp_fib_node_remove(fib, fib_node);
4282 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4283 struct mlxsw_sp_fib_node *fib_node)
4285 struct mlxsw_sp_fib *fib = fib_node->fib;
4287 mlxsw_sp_fib_node_prefix_dec(fib_node);
4288 mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib);
4289 fib_node->fib = NULL;
4290 mlxsw_sp_fib_node_remove(fib, fib_node);
4293 static struct mlxsw_sp_fib_node *
4294 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4295 size_t addr_len, unsigned char prefix_len,
4296 enum mlxsw_sp_l3proto proto)
4298 struct mlxsw_sp_fib_node *fib_node;
4299 struct mlxsw_sp_fib *fib;
4300 struct mlxsw_sp_vr *vr;
4303 vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4305 return ERR_CAST(vr);
4306 fib = mlxsw_sp_vr_fib(vr, proto);
4308 fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4312 fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4315 goto err_fib_node_create;
4318 err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4320 goto err_fib_node_init;
4325 mlxsw_sp_fib_node_destroy(fib_node);
4326 err_fib_node_create:
4327 mlxsw_sp_vr_put(vr);
4328 return ERR_PTR(err);
4331 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4332 struct mlxsw_sp_fib_node *fib_node)
4334 struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4336 if (!list_empty(&fib_node->entry_list))
4338 mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4339 mlxsw_sp_fib_node_destroy(fib_node);
4340 mlxsw_sp_vr_put(vr);
4343 static struct mlxsw_sp_fib4_entry *
4344 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4345 const struct mlxsw_sp_fib4_entry *new4_entry)
4347 struct mlxsw_sp_fib4_entry *fib4_entry;
4349 list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4350 if (fib4_entry->tb_id > new4_entry->tb_id)
4352 if (fib4_entry->tb_id != new4_entry->tb_id)
4354 if (fib4_entry->tos > new4_entry->tos)
4356 if (fib4_entry->prio >= new4_entry->prio ||
4357 fib4_entry->tos < new4_entry->tos)
4365 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
4366 struct mlxsw_sp_fib4_entry *new4_entry)
4368 struct mlxsw_sp_fib_node *fib_node;
4370 if (WARN_ON(!fib4_entry))
4373 fib_node = fib4_entry->common.fib_node;
4374 list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
4376 if (fib4_entry->tb_id != new4_entry->tb_id ||
4377 fib4_entry->tos != new4_entry->tos ||
4378 fib4_entry->prio != new4_entry->prio)
4382 list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
4387 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
4388 bool replace, bool append)
4390 struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
4391 struct mlxsw_sp_fib4_entry *fib4_entry;
4393 fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
4396 return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
4397 if (replace && WARN_ON(!fib4_entry))
4400 /* Insert new entry before replaced one, so that we can later
4401 * remove the second.
4404 list_add_tail(&new4_entry->common.list,
4405 &fib4_entry->common.list);
4407 struct mlxsw_sp_fib4_entry *last;
4409 list_for_each_entry(last, &fib_node->entry_list, common.list) {
4410 if (new4_entry->tb_id > last->tb_id)
4416 list_add(&new4_entry->common.list,
4417 &fib4_entry->common.list);
4419 list_add(&new4_entry->common.list,
4420 &fib_node->entry_list);
4427 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
4429 list_del(&fib4_entry->common.list);
4432 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
4433 struct mlxsw_sp_fib_entry *fib_entry)
4435 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4437 if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4440 /* To prevent packet loss, overwrite the previously offloaded
4443 if (!list_is_singular(&fib_node->entry_list)) {
4444 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4445 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4447 mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
4450 return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4453 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
4454 struct mlxsw_sp_fib_entry *fib_entry)
4456 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4458 if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4461 /* Promote the next entry by overwriting the deleted entry */
4462 if (!list_is_singular(&fib_node->entry_list)) {
4463 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4464 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4466 mlxsw_sp_fib_entry_update(mlxsw_sp, n);
4467 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
4471 mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4474 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4475 struct mlxsw_sp_fib4_entry *fib4_entry,
4476 bool replace, bool append)
4480 err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
4484 err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
4486 goto err_fib_node_entry_add;
4490 err_fib_node_entry_add:
4491 mlxsw_sp_fib4_node_list_remove(fib4_entry);
4496 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4497 struct mlxsw_sp_fib4_entry *fib4_entry)
4499 mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
4500 mlxsw_sp_fib4_node_list_remove(fib4_entry);
4502 if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
4503 mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
4506 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
4507 struct mlxsw_sp_fib4_entry *fib4_entry,
4510 struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4511 struct mlxsw_sp_fib4_entry *replaced;
4516 /* We inserted the new entry before replaced one */
4517 replaced = list_next_entry(fib4_entry, common.list);
4519 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
4520 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
4521 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4525 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
4526 const struct fib_entry_notifier_info *fen_info,
4527 bool replace, bool append)
4529 struct mlxsw_sp_fib4_entry *fib4_entry;
4530 struct mlxsw_sp_fib_node *fib_node;
4533 if (mlxsw_sp->router->aborted)
4536 fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4537 &fen_info->dst, sizeof(fen_info->dst),
4539 MLXSW_SP_L3_PROTO_IPV4);
4540 if (IS_ERR(fib_node)) {
4541 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4542 return PTR_ERR(fib_node);
4545 fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4546 if (IS_ERR(fib4_entry)) {
4547 dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4548 err = PTR_ERR(fib4_entry);
4549 goto err_fib4_entry_create;
4552 err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
4555 dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4556 goto err_fib4_node_entry_link;
4559 mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
4563 err_fib4_node_entry_link:
4564 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4565 err_fib4_entry_create:
4566 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4570 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4571 struct fib_entry_notifier_info *fen_info)
4573 struct mlxsw_sp_fib4_entry *fib4_entry;
4574 struct mlxsw_sp_fib_node *fib_node;
4576 if (mlxsw_sp->router->aborted)
4579 fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4580 if (WARN_ON(!fib4_entry))
4582 fib_node = fib4_entry->common.fib_node;
4584 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4585 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4586 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4589 static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt)
4591 /* Packets with link-local destination IP arriving to the router
4592 * are trapped to the CPU, so no need to program specific routes
4595 if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LINKLOCAL)
4598 /* Multicast routes aren't supported, so ignore them. Neighbour
4599 * Discovery packets are specifically trapped.
4601 if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_MULTICAST)
4604 /* Cloned routes are irrelevant in the forwarding path. */
4605 if (rt->rt6i_flags & RTF_CACHE)
4611 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
4613 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4615 mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4617 return ERR_PTR(-ENOMEM);
4619 /* In case of route replace, replaced route is deleted with
4620 * no notification. Take reference to prevent accessing freed
4623 mlxsw_sp_rt6->rt = rt;
4626 return mlxsw_sp_rt6;
4629 #if IS_ENABLED(CONFIG_IPV6)
4630 static void mlxsw_sp_rt6_release(struct rt6_info *rt)
4635 static void mlxsw_sp_rt6_release(struct rt6_info *rt)
4640 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4642 mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
4643 kfree(mlxsw_sp_rt6);
4646 static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt)
4648 /* RTF_CACHE routes are ignored */
4649 return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
4652 static struct rt6_info *
4653 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
4655 return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4659 static struct mlxsw_sp_fib6_entry *
4660 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4661 const struct rt6_info *nrt, bool replace)
4663 struct mlxsw_sp_fib6_entry *fib6_entry;
4665 if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
4668 list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4669 struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4671 /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
4674 if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
4676 if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
4678 if (rt->rt6i_metric < nrt->rt6i_metric)
4680 if (rt->rt6i_metric == nrt->rt6i_metric &&
4681 mlxsw_sp_fib6_rt_can_mp(rt))
4683 if (rt->rt6i_metric > nrt->rt6i_metric)
4690 static struct mlxsw_sp_rt6 *
4691 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
4692 const struct rt6_info *rt)
4694 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4696 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4697 if (mlxsw_sp_rt6->rt == rt)
4698 return mlxsw_sp_rt6;
4704 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4705 const struct rt6_info *rt,
4706 enum mlxsw_sp_ipip_type *ret)
4708 return rt->dst.dev &&
4709 mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->dst.dev, ret);
4712 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
4713 struct mlxsw_sp_nexthop_group *nh_grp,
4714 struct mlxsw_sp_nexthop *nh,
4715 const struct rt6_info *rt)
4717 const struct mlxsw_sp_ipip_ops *ipip_ops;
4718 struct mlxsw_sp_ipip_entry *ipip_entry;
4719 struct net_device *dev = rt->dst.dev;
4720 struct mlxsw_sp_rif *rif;
4723 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4725 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4726 if (ipip_ops->can_offload(mlxsw_sp, dev,
4727 MLXSW_SP_L3_PROTO_IPV6)) {
4728 nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4729 mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4734 nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4735 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4738 mlxsw_sp_nexthop_rif_init(nh, rif);
4740 err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4742 goto err_nexthop_neigh_init;
4746 err_nexthop_neigh_init:
4747 mlxsw_sp_nexthop_rif_fini(nh);
4751 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
4752 struct mlxsw_sp_nexthop *nh)
4754 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4757 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
4758 struct mlxsw_sp_nexthop_group *nh_grp,
4759 struct mlxsw_sp_nexthop *nh,
4760 const struct rt6_info *rt)
4762 struct net_device *dev = rt->dst.dev;
4764 nh->nh_grp = nh_grp;
4766 memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr));
4767 mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4769 list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4773 nh->ifindex = dev->ifindex;
4775 return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
4778 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
4779 struct mlxsw_sp_nexthop *nh)
4781 mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
4782 list_del(&nh->router_list_node);
4783 mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4786 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
4787 const struct rt6_info *rt)
4789 return rt->rt6i_flags & RTF_GATEWAY ||
4790 mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
4793 static struct mlxsw_sp_nexthop_group *
4794 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
4795 struct mlxsw_sp_fib6_entry *fib6_entry)
4797 struct mlxsw_sp_nexthop_group *nh_grp;
4798 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4799 struct mlxsw_sp_nexthop *nh;
4804 alloc_size = sizeof(*nh_grp) +
4805 fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
4806 nh_grp = kzalloc(alloc_size, GFP_KERNEL);
4808 return ERR_PTR(-ENOMEM);
4809 INIT_LIST_HEAD(&nh_grp->fib_list);
4810 #if IS_ENABLED(CONFIG_IPV6)
4811 nh_grp->neigh_tbl = &nd_tbl;
4813 mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
4814 struct mlxsw_sp_rt6, list);
4815 nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
4816 nh_grp->count = fib6_entry->nrt6;
4817 for (i = 0; i < nh_grp->count; i++) {
4818 struct rt6_info *rt = mlxsw_sp_rt6->rt;
4820 nh = &nh_grp->nexthops[i];
4821 err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
4823 goto err_nexthop6_init;
4824 mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
4827 err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
4829 goto err_nexthop_group_insert;
4831 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4834 err_nexthop_group_insert:
4836 for (i--; i >= 0; i--) {
4837 nh = &nh_grp->nexthops[i];
4838 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4841 return ERR_PTR(err);
4845 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
4846 struct mlxsw_sp_nexthop_group *nh_grp)
4848 struct mlxsw_sp_nexthop *nh;
4849 int i = nh_grp->count;
4851 mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
4852 for (i--; i >= 0; i--) {
4853 nh = &nh_grp->nexthops[i];
4854 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4856 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4857 WARN_ON(nh_grp->adj_index_valid);
4861 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
4862 struct mlxsw_sp_fib6_entry *fib6_entry)
4864 struct mlxsw_sp_nexthop_group *nh_grp;
4866 nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
4868 nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
4870 return PTR_ERR(nh_grp);
4873 list_add_tail(&fib6_entry->common.nexthop_group_node,
4875 fib6_entry->common.nh_group = nh_grp;
4880 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
4881 struct mlxsw_sp_fib_entry *fib_entry)
4883 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4885 list_del(&fib_entry->nexthop_group_node);
4886 if (!list_empty(&nh_grp->fib_list))
4888 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
4892 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
4893 struct mlxsw_sp_fib6_entry *fib6_entry)
4895 struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
4898 fib6_entry->common.nh_group = NULL;
4899 list_del(&fib6_entry->common.nexthop_group_node);
4901 err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
4903 goto err_nexthop6_group_get;
4905 /* In case this entry is offloaded, then the adjacency index
4906 * currently associated with it in the device's table is that
4907 * of the old group. Start using the new one instead.
4909 err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
4911 goto err_fib_node_entry_add;
4913 if (list_empty(&old_nh_grp->fib_list))
4914 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
4918 err_fib_node_entry_add:
4919 mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
4920 err_nexthop6_group_get:
4921 list_add_tail(&fib6_entry->common.nexthop_group_node,
4922 &old_nh_grp->fib_list);
4923 fib6_entry->common.nh_group = old_nh_grp;
4928 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
4929 struct mlxsw_sp_fib6_entry *fib6_entry,
4930 struct rt6_info *rt)
4932 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4935 mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
4936 if (IS_ERR(mlxsw_sp_rt6))
4937 return PTR_ERR(mlxsw_sp_rt6);
4939 list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
4942 err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
4944 goto err_nexthop6_group_update;
4948 err_nexthop6_group_update:
4950 list_del(&mlxsw_sp_rt6->list);
4951 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
4956 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
4957 struct mlxsw_sp_fib6_entry *fib6_entry,
4958 struct rt6_info *rt)
4960 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4962 mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
4963 if (WARN_ON(!mlxsw_sp_rt6))
4967 list_del(&mlxsw_sp_rt6->list);
4968 mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
4969 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
4972 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4973 struct mlxsw_sp_fib_entry *fib_entry,
4974 const struct rt6_info *rt)
4976 /* Packets hitting RTF_REJECT routes need to be discarded by the
4977 * stack. We can rely on their destination device not having a
4978 * RIF (it's the loopback device) and can thus use action type
4979 * local, which will cause them to be trapped with a lower
4980 * priority than packets that need to be locally received.
4982 if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
4983 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4984 else if (rt->rt6i_flags & RTF_REJECT)
4985 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4986 else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
4987 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4989 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4993 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
4995 struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
4997 list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
5000 list_del(&mlxsw_sp_rt6->list);
5001 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5005 static struct mlxsw_sp_fib6_entry *
5006 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
5007 struct mlxsw_sp_fib_node *fib_node,
5008 struct rt6_info *rt)
5010 struct mlxsw_sp_fib6_entry *fib6_entry;
5011 struct mlxsw_sp_fib_entry *fib_entry;
5012 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5015 fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
5017 return ERR_PTR(-ENOMEM);
5018 fib_entry = &fib6_entry->common;
5020 mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5021 if (IS_ERR(mlxsw_sp_rt6)) {
5022 err = PTR_ERR(mlxsw_sp_rt6);
5023 goto err_rt6_create;
5026 mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
5028 INIT_LIST_HEAD(&fib6_entry->rt6_list);
5029 list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5030 fib6_entry->nrt6 = 1;
5031 err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5033 goto err_nexthop6_group_get;
5035 fib_entry->fib_node = fib_node;
5039 err_nexthop6_group_get:
5040 list_del(&mlxsw_sp_rt6->list);
5041 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5044 return ERR_PTR(err);
5047 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5048 struct mlxsw_sp_fib6_entry *fib6_entry)
5050 mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5051 mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5052 WARN_ON(fib6_entry->nrt6);
5056 static struct mlxsw_sp_fib6_entry *
5057 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
5058 const struct rt6_info *nrt, bool replace)
5060 struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
5062 list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5063 struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5065 if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
5067 if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
5069 if (replace && rt->rt6i_metric == nrt->rt6i_metric) {
5070 if (mlxsw_sp_fib6_rt_can_mp(rt) ==
5071 mlxsw_sp_fib6_rt_can_mp(nrt))
5073 if (mlxsw_sp_fib6_rt_can_mp(nrt))
5074 fallback = fallback ?: fib6_entry;
5076 if (rt->rt6i_metric > nrt->rt6i_metric)
5077 return fallback ?: fib6_entry;
5084 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
5087 struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
5088 struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
5089 struct mlxsw_sp_fib6_entry *fib6_entry;
5091 fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
5093 if (replace && WARN_ON(!fib6_entry))
5097 list_add_tail(&new6_entry->common.list,
5098 &fib6_entry->common.list);
5100 struct mlxsw_sp_fib6_entry *last;
5102 list_for_each_entry(last, &fib_node->entry_list, common.list) {
5103 struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last);
5105 if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id)
5111 list_add(&new6_entry->common.list,
5112 &fib6_entry->common.list);
5114 list_add(&new6_entry->common.list,
5115 &fib_node->entry_list);
5122 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
5124 list_del(&fib6_entry->common.list);
5127 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
5128 struct mlxsw_sp_fib6_entry *fib6_entry,
5133 err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
5137 err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5139 goto err_fib_node_entry_add;
5143 err_fib_node_entry_add:
5144 mlxsw_sp_fib6_node_list_remove(fib6_entry);
5149 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5150 struct mlxsw_sp_fib6_entry *fib6_entry)
5152 mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
5153 mlxsw_sp_fib6_node_list_remove(fib6_entry);
5156 static struct mlxsw_sp_fib6_entry *
5157 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5158 const struct rt6_info *rt)
5160 struct mlxsw_sp_fib6_entry *fib6_entry;
5161 struct mlxsw_sp_fib_node *fib_node;
5162 struct mlxsw_sp_fib *fib;
5163 struct mlxsw_sp_vr *vr;
5165 vr = mlxsw_sp_vr_find(mlxsw_sp, rt->rt6i_table->tb6_id);
5168 fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5170 fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->rt6i_dst.addr,
5171 sizeof(rt->rt6i_dst.addr),
5176 list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5177 struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5179 if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id &&
5180 rt->rt6i_metric == iter_rt->rt6i_metric &&
5181 mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5188 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
5189 struct mlxsw_sp_fib6_entry *fib6_entry,
5192 struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5193 struct mlxsw_sp_fib6_entry *replaced;
5198 replaced = list_next_entry(fib6_entry, common.list);
5200 mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
5201 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
5202 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5205 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
5206 struct rt6_info *rt, bool replace)
5208 struct mlxsw_sp_fib6_entry *fib6_entry;
5209 struct mlxsw_sp_fib_node *fib_node;
5212 if (mlxsw_sp->router->aborted)
5215 if (rt->rt6i_src.plen)
5218 if (mlxsw_sp_fib6_rt_should_ignore(rt))
5221 fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->rt6i_table->tb6_id,
5223 sizeof(rt->rt6i_dst.addr),
5225 MLXSW_SP_L3_PROTO_IPV6);
5226 if (IS_ERR(fib_node))
5227 return PTR_ERR(fib_node);
5229 /* Before creating a new entry, try to append route to an existing
5232 fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
5234 err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
5236 goto err_fib6_entry_nexthop_add;
5240 fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
5241 if (IS_ERR(fib6_entry)) {
5242 err = PTR_ERR(fib6_entry);
5243 goto err_fib6_entry_create;
5246 err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
5248 goto err_fib6_node_entry_link;
5250 mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
5254 err_fib6_node_entry_link:
5255 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5256 err_fib6_entry_create:
5257 err_fib6_entry_nexthop_add:
5258 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5262 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5263 struct rt6_info *rt)
5265 struct mlxsw_sp_fib6_entry *fib6_entry;
5266 struct mlxsw_sp_fib_node *fib_node;
5268 if (mlxsw_sp->router->aborted)
5271 if (mlxsw_sp_fib6_rt_should_ignore(rt))
5274 fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5275 if (WARN_ON(!fib6_entry))
5278 /* If route is part of a multipath entry, but not the last one
5279 * removed, then only reduce its nexthop group.
5281 if (!list_is_singular(&fib6_entry->rt6_list)) {
5282 mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
5286 fib_node = fib6_entry->common.fib_node;
5288 mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5289 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5290 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5293 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5294 enum mlxsw_reg_ralxx_protocol proto,
5297 char ralta_pl[MLXSW_REG_RALTA_LEN];
5298 char ralst_pl[MLXSW_REG_RALST_LEN];
5301 mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5302 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5306 mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5307 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5311 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5312 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5313 char raltb_pl[MLXSW_REG_RALTB_LEN];
5314 char ralue_pl[MLXSW_REG_RALUE_LEN];
5316 mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5317 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5322 mlxsw_reg_ralue_pack(ralue_pl, proto,
5323 MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5324 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5325 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5334 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5335 struct mfc_entry_notifier_info *men_info,
5338 struct mlxsw_sp_vr *vr;
5340 if (mlxsw_sp->router->aborted)
5343 vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5347 return mlxsw_sp_mr_route4_add(vr->mr4_table, men_info->mfc, replace);
5350 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5351 struct mfc_entry_notifier_info *men_info)
5353 struct mlxsw_sp_vr *vr;
5355 if (mlxsw_sp->router->aborted)
5358 vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5362 mlxsw_sp_mr_route4_del(vr->mr4_table, men_info->mfc);
5363 mlxsw_sp_vr_put(vr);
5367 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5368 struct vif_entry_notifier_info *ven_info)
5370 struct mlxsw_sp_rif *rif;
5371 struct mlxsw_sp_vr *vr;
5373 if (mlxsw_sp->router->aborted)
5376 vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5380 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5381 return mlxsw_sp_mr_vif_add(vr->mr4_table, ven_info->dev,
5382 ven_info->vif_index,
5383 ven_info->vif_flags, rif);
5387 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5388 struct vif_entry_notifier_info *ven_info)
5390 struct mlxsw_sp_vr *vr;
5392 if (mlxsw_sp->router->aborted)
5395 vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5399 mlxsw_sp_mr_vif_del(vr->mr4_table, ven_info->vif_index);
5400 mlxsw_sp_vr_put(vr);
5403 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5405 enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5408 err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5409 MLXSW_SP_LPM_TREE_MIN);
5413 /* The multicast router code does not need an abort trap as by default,
5414 * packets that don't match any routes are trapped to the CPU.
5417 proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5418 return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5419 MLXSW_SP_LPM_TREE_MIN + 1);
5422 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5423 struct mlxsw_sp_fib_node *fib_node)
5425 struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
5427 list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
5429 bool do_break = &tmp->common.list == &fib_node->entry_list;
5431 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
5432 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5433 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5434 /* Break when entry list is empty and node was freed.
5435 * Otherwise, we'll access freed memory in the next
5443 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5444 struct mlxsw_sp_fib_node *fib_node)
5446 struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
5448 list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
5450 bool do_break = &tmp->common.list == &fib_node->entry_list;
5452 mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5453 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5454 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5460 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5461 struct mlxsw_sp_fib_node *fib_node)
5463 switch (fib_node->fib->proto) {
5464 case MLXSW_SP_L3_PROTO_IPV4:
5465 mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5467 case MLXSW_SP_L3_PROTO_IPV6:
5468 mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5473 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5474 struct mlxsw_sp_vr *vr,
5475 enum mlxsw_sp_l3proto proto)
5477 struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5478 struct mlxsw_sp_fib_node *fib_node, *tmp;
5480 list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5481 bool do_break = &tmp->list == &fib->node_list;
5483 mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5489 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5493 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5494 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5496 if (!mlxsw_sp_vr_is_used(vr))
5499 mlxsw_sp_mr_table_flush(vr->mr4_table);
5500 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5502 /* If virtual router was only used for IPv4, then it's no
5505 if (!mlxsw_sp_vr_is_used(vr))
5507 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5511 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5515 if (mlxsw_sp->router->aborted)
5517 dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5518 mlxsw_sp_router_fib_flush(mlxsw_sp);
5519 mlxsw_sp->router->aborted = true;
5520 err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5522 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5525 struct mlxsw_sp_fib_event_work {
5526 struct work_struct work;
5528 struct fib6_entry_notifier_info fen6_info;
5529 struct fib_entry_notifier_info fen_info;
5530 struct fib_rule_notifier_info fr_info;
5531 struct fib_nh_notifier_info fnh_info;
5532 struct mfc_entry_notifier_info men_info;
5533 struct vif_entry_notifier_info ven_info;
5535 struct mlxsw_sp *mlxsw_sp;
5536 unsigned long event;
5539 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5541 struct mlxsw_sp_fib_event_work *fib_work =
5542 container_of(work, struct mlxsw_sp_fib_event_work, work);
5543 struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5544 bool replace, append;
5547 /* Protect internal structures from changes */
5549 switch (fib_work->event) {
5550 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5551 case FIB_EVENT_ENTRY_APPEND: /* fall through */
5552 case FIB_EVENT_ENTRY_ADD:
5553 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5554 append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5555 err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
5558 mlxsw_sp_router_fib_abort(mlxsw_sp);
5559 fib_info_put(fib_work->fen_info.fi);
5561 case FIB_EVENT_ENTRY_DEL:
5562 mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5563 fib_info_put(fib_work->fen_info.fi);
5565 case FIB_EVENT_RULE_ADD:
5566 /* if we get here, a rule was added that we do not support.
5567 * just do the fib_abort
5569 mlxsw_sp_router_fib_abort(mlxsw_sp);
5571 case FIB_EVENT_NH_ADD: /* fall through */
5572 case FIB_EVENT_NH_DEL:
5573 mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5574 fib_work->fnh_info.fib_nh);
5575 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5582 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5584 struct mlxsw_sp_fib_event_work *fib_work =
5585 container_of(work, struct mlxsw_sp_fib_event_work, work);
5586 struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5591 switch (fib_work->event) {
5592 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5593 case FIB_EVENT_ENTRY_ADD:
5594 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5595 err = mlxsw_sp_router_fib6_add(mlxsw_sp,
5596 fib_work->fen6_info.rt, replace);
5598 mlxsw_sp_router_fib_abort(mlxsw_sp);
5599 mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5601 case FIB_EVENT_ENTRY_DEL:
5602 mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
5603 mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5605 case FIB_EVENT_RULE_ADD:
5606 /* if we get here, a rule was added that we do not support.
5607 * just do the fib_abort
5609 mlxsw_sp_router_fib_abort(mlxsw_sp);
5616 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
5618 struct mlxsw_sp_fib_event_work *fib_work =
5619 container_of(work, struct mlxsw_sp_fib_event_work, work);
5620 struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5625 switch (fib_work->event) {
5626 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5627 case FIB_EVENT_ENTRY_ADD:
5628 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5630 err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
5633 mlxsw_sp_router_fib_abort(mlxsw_sp);
5634 ipmr_cache_put(fib_work->men_info.mfc);
5636 case FIB_EVENT_ENTRY_DEL:
5637 mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
5638 ipmr_cache_put(fib_work->men_info.mfc);
5640 case FIB_EVENT_VIF_ADD:
5641 err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
5642 &fib_work->ven_info);
5644 mlxsw_sp_router_fib_abort(mlxsw_sp);
5645 dev_put(fib_work->ven_info.dev);
5647 case FIB_EVENT_VIF_DEL:
5648 mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
5649 &fib_work->ven_info);
5650 dev_put(fib_work->ven_info.dev);
5652 case FIB_EVENT_RULE_ADD:
5653 /* if we get here, a rule was added that we do not support.
5654 * just do the fib_abort
5656 mlxsw_sp_router_fib_abort(mlxsw_sp);
5663 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
5664 struct fib_notifier_info *info)
5666 struct fib_entry_notifier_info *fen_info;
5667 struct fib_nh_notifier_info *fnh_info;
5669 switch (fib_work->event) {
5670 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5671 case FIB_EVENT_ENTRY_APPEND: /* fall through */
5672 case FIB_EVENT_ENTRY_ADD: /* fall through */
5673 case FIB_EVENT_ENTRY_DEL:
5674 fen_info = container_of(info, struct fib_entry_notifier_info,
5676 fib_work->fen_info = *fen_info;
5677 /* Take reference on fib_info to prevent it from being
5678 * freed while work is queued. Release it afterwards.
5680 fib_info_hold(fib_work->fen_info.fi);
5682 case FIB_EVENT_NH_ADD: /* fall through */
5683 case FIB_EVENT_NH_DEL:
5684 fnh_info = container_of(info, struct fib_nh_notifier_info,
5686 fib_work->fnh_info = *fnh_info;
5687 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
5692 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
5693 struct fib_notifier_info *info)
5695 struct fib6_entry_notifier_info *fen6_info;
5697 switch (fib_work->event) {
5698 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5699 case FIB_EVENT_ENTRY_ADD: /* fall through */
5700 case FIB_EVENT_ENTRY_DEL:
5701 fen6_info = container_of(info, struct fib6_entry_notifier_info,
5703 fib_work->fen6_info = *fen6_info;
5704 rt6_hold(fib_work->fen6_info.rt);
5710 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
5711 struct fib_notifier_info *info)
5713 switch (fib_work->event) {
5714 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5715 case FIB_EVENT_ENTRY_ADD: /* fall through */
5716 case FIB_EVENT_ENTRY_DEL:
5717 memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
5718 ipmr_cache_hold(fib_work->men_info.mfc);
5720 case FIB_EVENT_VIF_ADD: /* fall through */
5721 case FIB_EVENT_VIF_DEL:
5722 memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
5723 dev_hold(fib_work->ven_info.dev);
5728 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
5729 struct fib_notifier_info *info,
5730 struct mlxsw_sp *mlxsw_sp)
5732 struct netlink_ext_ack *extack = info->extack;
5733 struct fib_rule_notifier_info *fr_info;
5734 struct fib_rule *rule;
5737 /* nothing to do at the moment */
5738 if (event == FIB_EVENT_RULE_DEL)
5741 if (mlxsw_sp->router->aborted)
5744 fr_info = container_of(info, struct fib_rule_notifier_info, info);
5745 rule = fr_info->rule;
5747 switch (info->family) {
5749 if (!fib4_rule_default(rule) && !rule->l3mdev)
5753 if (!fib6_rule_default(rule) && !rule->l3mdev)
5756 case RTNL_FAMILY_IPMR:
5757 if (!ipmr_rule_default(rule) && !rule->l3mdev)
5763 NL_SET_ERR_MSG(extack, "spectrum: FIB rules not supported. Aborting offload");
5768 /* Called with rcu_read_lock() */
5769 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
5770 unsigned long event, void *ptr)
5772 struct mlxsw_sp_fib_event_work *fib_work;
5773 struct fib_notifier_info *info = ptr;
5774 struct mlxsw_sp_router *router;
5777 if (!net_eq(info->net, &init_net) ||
5778 (info->family != AF_INET && info->family != AF_INET6 &&
5779 info->family != RTNL_FAMILY_IPMR))
5782 router = container_of(nb, struct mlxsw_sp_router, fib_nb);
5785 case FIB_EVENT_RULE_ADD: /* fall through */
5786 case FIB_EVENT_RULE_DEL:
5787 err = mlxsw_sp_router_fib_rule_event(event, info,
5793 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
5794 if (WARN_ON(!fib_work))
5797 fib_work->mlxsw_sp = router->mlxsw_sp;
5798 fib_work->event = event;
5800 switch (info->family) {
5802 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
5803 mlxsw_sp_router_fib4_event(fib_work, info);
5806 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
5807 mlxsw_sp_router_fib6_event(fib_work, info);
5809 case RTNL_FAMILY_IPMR:
5810 INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
5811 mlxsw_sp_router_fibmr_event(fib_work, info);
5815 mlxsw_core_schedule_work(&fib_work->work);
5820 static struct mlxsw_sp_rif *
5821 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
5822 const struct net_device *dev)
5826 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
5827 if (mlxsw_sp->router->rifs[i] &&
5828 mlxsw_sp->router->rifs[i]->dev == dev)
5829 return mlxsw_sp->router->rifs[i];
5834 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
5836 char ritr_pl[MLXSW_REG_RITR_LEN];
5839 mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
5840 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5841 if (WARN_ON_ONCE(err))
5844 mlxsw_reg_ritr_enable_set(ritr_pl, false);
5845 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5848 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
5849 struct mlxsw_sp_rif *rif)
5851 mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
5852 mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
5853 mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
5857 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
5858 unsigned long event)
5860 struct inet6_dev *inet6_dev;
5861 bool addr_list_empty = true;
5862 struct in_device *idev;
5868 idev = __in_dev_get_rtnl(dev);
5869 if (idev && idev->ifa_list)
5870 addr_list_empty = false;
5872 inet6_dev = __in6_dev_get(dev);
5873 if (addr_list_empty && inet6_dev &&
5874 !list_empty(&inet6_dev->addr_list))
5875 addr_list_empty = false;
5877 if (rif && addr_list_empty &&
5878 !netif_is_l3_slave(rif->dev))
5880 /* It is possible we already removed the RIF ourselves
5881 * if it was assigned to a netdev that is now a bridge
5890 static enum mlxsw_sp_rif_type
5891 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
5892 const struct net_device *dev)
5894 enum mlxsw_sp_fid_type type;
5896 if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
5897 return MLXSW_SP_RIF_TYPE_IPIP_LB;
5899 /* Otherwise RIF type is derived from the type of the underlying FID. */
5900 if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
5901 type = MLXSW_SP_FID_TYPE_8021Q;
5902 else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
5903 type = MLXSW_SP_FID_TYPE_8021Q;
5904 else if (netif_is_bridge_master(dev))
5905 type = MLXSW_SP_FID_TYPE_8021D;
5907 type = MLXSW_SP_FID_TYPE_RFID;
5909 return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
5912 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
5916 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
5917 if (!mlxsw_sp->router->rifs[i]) {
5926 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
5928 struct net_device *l3_dev)
5930 struct mlxsw_sp_rif *rif;
5932 rif = kzalloc(rif_size, GFP_KERNEL);
5936 INIT_LIST_HEAD(&rif->nexthop_list);
5937 INIT_LIST_HEAD(&rif->neigh_list);
5938 ether_addr_copy(rif->addr, l3_dev->dev_addr);
5939 rif->mtu = l3_dev->mtu;
5942 rif->rif_index = rif_index;
5947 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
5950 return mlxsw_sp->router->rifs[rif_index];
5953 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
5955 return rif->rif_index;
5958 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
5960 return lb_rif->common.rif_index;
5963 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
5965 return lb_rif->ul_vr_id;
5968 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
5970 return rif->dev->ifindex;
5973 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
5978 static struct mlxsw_sp_rif *
5979 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
5980 const struct mlxsw_sp_rif_params *params,
5981 struct netlink_ext_ack *extack)
5983 u32 tb_id = l3mdev_fib_table(params->dev);
5984 const struct mlxsw_sp_rif_ops *ops;
5985 struct mlxsw_sp_fid *fid = NULL;
5986 enum mlxsw_sp_rif_type type;
5987 struct mlxsw_sp_rif *rif;
5988 struct mlxsw_sp_vr *vr;
5992 type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
5993 ops = mlxsw_sp->router->rif_ops_arr[type];
5995 vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
5997 return ERR_CAST(vr);
6000 err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6002 NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported router interfaces");
6003 goto err_rif_index_alloc;
6006 rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
6011 rif->mlxsw_sp = mlxsw_sp;
6015 fid = ops->fid_get(rif);
6024 ops->setup(rif, params);
6026 err = ops->configure(rif);
6030 err = mlxsw_sp_mr_rif_add(vr->mr4_table, rif);
6032 goto err_mr_rif_add;
6034 mlxsw_sp_rif_counters_alloc(rif);
6035 mlxsw_sp->router->rifs[rif_index] = rif;
6040 ops->deconfigure(rif);
6043 mlxsw_sp_fid_put(fid);
6047 err_rif_index_alloc:
6049 mlxsw_sp_vr_put(vr);
6050 return ERR_PTR(err);
6053 void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6055 const struct mlxsw_sp_rif_ops *ops = rif->ops;
6056 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6057 struct mlxsw_sp_fid *fid = rif->fid;
6058 struct mlxsw_sp_vr *vr;
6060 mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6061 vr = &mlxsw_sp->router->vrs[rif->vr_id];
6063 mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6064 mlxsw_sp_rif_counters_free(rif);
6065 mlxsw_sp_mr_rif_del(vr->mr4_table, rif);
6066 ops->deconfigure(rif);
6068 /* Loopback RIFs are not associated with a FID. */
6069 mlxsw_sp_fid_put(fid);
6072 mlxsw_sp_vr_put(vr);
6076 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6077 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6079 struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6081 params->vid = mlxsw_sp_port_vlan->vid;
6082 params->lag = mlxsw_sp_port->lagged;
6084 params->lag_id = mlxsw_sp_port->lag_id;
6086 params->system_port = mlxsw_sp_port->local_port;
6090 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6091 struct net_device *l3_dev,
6092 struct netlink_ext_ack *extack)
6094 struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6095 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6096 u16 vid = mlxsw_sp_port_vlan->vid;
6097 struct mlxsw_sp_rif *rif;
6098 struct mlxsw_sp_fid *fid;
6101 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6103 struct mlxsw_sp_rif_params params = {
6107 mlxsw_sp_rif_subport_params_init(¶ms, mlxsw_sp_port_vlan);
6108 rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms, extack);
6110 return PTR_ERR(rif);
6113 /* FID was already created, just take a reference */
6114 fid = rif->ops->fid_get(rif);
6115 err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6117 goto err_fid_port_vid_map;
6119 err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6121 goto err_port_vid_learning_set;
6123 err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6124 BR_STATE_FORWARDING);
6126 goto err_port_vid_stp_set;
6128 mlxsw_sp_port_vlan->fid = fid;
6132 err_port_vid_stp_set:
6133 mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6134 err_port_vid_learning_set:
6135 mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6136 err_fid_port_vid_map:
6137 mlxsw_sp_fid_put(fid);
6142 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6144 struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6145 struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6146 u16 vid = mlxsw_sp_port_vlan->vid;
6148 if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6151 mlxsw_sp_port_vlan->fid = NULL;
6152 mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6153 mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6154 mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6155 /* If router port holds the last reference on the rFID, then the
6156 * associated Sub-port RIF will be destroyed.
6158 mlxsw_sp_fid_put(fid);
6161 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6162 struct net_device *port_dev,
6163 unsigned long event, u16 vid,
6164 struct netlink_ext_ack *extack)
6166 struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6167 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6169 mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6170 if (WARN_ON(!mlxsw_sp_port_vlan))
6175 return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6178 mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6185 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6186 unsigned long event,
6187 struct netlink_ext_ack *extack)
6189 if (netif_is_bridge_port(port_dev) ||
6190 netif_is_lag_port(port_dev) ||
6191 netif_is_ovs_port(port_dev))
6194 return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1,
6198 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6199 struct net_device *lag_dev,
6200 unsigned long event, u16 vid,
6201 struct netlink_ext_ack *extack)
6203 struct net_device *port_dev;
6204 struct list_head *iter;
6207 netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6208 if (mlxsw_sp_port_dev_check(port_dev)) {
6209 err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6221 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6222 unsigned long event,
6223 struct netlink_ext_ack *extack)
6225 if (netif_is_bridge_port(lag_dev))
6228 return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1,
6232 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
6233 unsigned long event,
6234 struct netlink_ext_ack *extack)
6236 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6237 struct mlxsw_sp_rif_params params = {
6240 struct mlxsw_sp_rif *rif;
6244 rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms, extack);
6246 return PTR_ERR(rif);
6249 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6250 mlxsw_sp_rif_destroy(rif);
6257 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
6258 unsigned long event,
6259 struct netlink_ext_ack *extack)
6261 struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6262 u16 vid = vlan_dev_vlan_id(vlan_dev);
6264 if (netif_is_bridge_port(vlan_dev))
6267 if (mlxsw_sp_port_dev_check(real_dev))
6268 return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6269 event, vid, extack);
6270 else if (netif_is_lag_master(real_dev))
6271 return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6273 else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6274 return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event, extack);
6279 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
6280 unsigned long event,
6281 struct netlink_ext_ack *extack)
6283 if (mlxsw_sp_port_dev_check(dev))
6284 return mlxsw_sp_inetaddr_port_event(dev, event, extack);
6285 else if (netif_is_lag_master(dev))
6286 return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
6287 else if (netif_is_bridge_master(dev))
6288 return mlxsw_sp_inetaddr_bridge_event(dev, event, extack);
6289 else if (is_vlan_dev(dev))
6290 return mlxsw_sp_inetaddr_vlan_event(dev, event, extack);
6295 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
6296 unsigned long event, void *ptr)
6298 struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
6299 struct net_device *dev = ifa->ifa_dev->dev;
6300 struct mlxsw_sp *mlxsw_sp;
6301 struct mlxsw_sp_rif *rif;
6304 /* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
6305 if (event == NETDEV_UP)
6308 mlxsw_sp = mlxsw_sp_lower_get(dev);
6312 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6313 if (!mlxsw_sp_rif_should_config(rif, dev, event))
6316 err = __mlxsw_sp_inetaddr_event(dev, event, NULL);
6318 return notifier_from_errno(err);
6321 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
6322 unsigned long event, void *ptr)
6324 struct in_validator_info *ivi = (struct in_validator_info *) ptr;
6325 struct net_device *dev = ivi->ivi_dev->dev;
6326 struct mlxsw_sp *mlxsw_sp;
6327 struct mlxsw_sp_rif *rif;
6330 mlxsw_sp = mlxsw_sp_lower_get(dev);
6334 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6335 if (!mlxsw_sp_rif_should_config(rif, dev, event))
6338 err = __mlxsw_sp_inetaddr_event(dev, event, ivi->extack);
6340 return notifier_from_errno(err);
6343 struct mlxsw_sp_inet6addr_event_work {
6344 struct work_struct work;
6345 struct net_device *dev;
6346 unsigned long event;
6349 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
6351 struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
6352 container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
6353 struct net_device *dev = inet6addr_work->dev;
6354 unsigned long event = inet6addr_work->event;
6355 struct mlxsw_sp *mlxsw_sp;
6356 struct mlxsw_sp_rif *rif;
6359 mlxsw_sp = mlxsw_sp_lower_get(dev);
6363 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6364 if (!mlxsw_sp_rif_should_config(rif, dev, event))
6367 __mlxsw_sp_inetaddr_event(dev, event, NULL);
6371 kfree(inet6addr_work);
6374 /* Called with rcu_read_lock() */
6375 int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
6376 unsigned long event, void *ptr)
6378 struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
6379 struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
6380 struct net_device *dev = if6->idev->dev;
6382 /* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
6383 if (event == NETDEV_UP)
6386 if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
6389 inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
6390 if (!inet6addr_work)
6393 INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
6394 inet6addr_work->dev = dev;
6395 inet6addr_work->event = event;
6397 mlxsw_core_schedule_work(&inet6addr_work->work);
6402 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
6403 unsigned long event, void *ptr)
6405 struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
6406 struct net_device *dev = i6vi->i6vi_dev->dev;
6407 struct mlxsw_sp *mlxsw_sp;
6408 struct mlxsw_sp_rif *rif;
6411 mlxsw_sp = mlxsw_sp_lower_get(dev);
6415 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6416 if (!mlxsw_sp_rif_should_config(rif, dev, event))
6419 err = __mlxsw_sp_inetaddr_event(dev, event, i6vi->extack);
6421 return notifier_from_errno(err);
6424 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6425 const char *mac, int mtu)
6427 char ritr_pl[MLXSW_REG_RITR_LEN];
6430 mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6431 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6435 mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
6436 mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
6437 mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
6438 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6441 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
6443 struct mlxsw_sp *mlxsw_sp;
6444 struct mlxsw_sp_rif *rif;
6448 mlxsw_sp = mlxsw_sp_lower_get(dev);
6452 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6455 fid_index = mlxsw_sp_fid_index(rif->fid);
6457 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
6461 err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
6466 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
6468 goto err_rif_fdb_op;
6470 if (rif->mtu != dev->mtu) {
6471 struct mlxsw_sp_vr *vr;
6473 /* The RIF is relevant only to its mr_table instance, as unlike
6474 * unicast routing, in multicast routing a RIF cannot be shared
6475 * between several multicast routing tables.
6477 vr = &mlxsw_sp->router->vrs[rif->vr_id];
6478 mlxsw_sp_mr_rif_mtu_update(vr->mr4_table, rif, dev->mtu);
6481 ether_addr_copy(rif->addr, dev->dev_addr);
6482 rif->mtu = dev->mtu;
6484 netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
6489 mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
6491 mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
6495 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
6496 struct net_device *l3_dev,
6497 struct netlink_ext_ack *extack)
6499 struct mlxsw_sp_rif *rif;
6501 /* If netdev is already associated with a RIF, then we need to
6502 * destroy it and create a new one with the new virtual router ID.
6504 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6506 __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, extack);
6508 return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP, extack);
6511 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
6512 struct net_device *l3_dev)
6514 struct mlxsw_sp_rif *rif;
6516 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6519 __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, NULL);
6522 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
6523 struct netdev_notifier_changeupper_info *info)
6525 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6532 case NETDEV_PRECHANGEUPPER:
6534 case NETDEV_CHANGEUPPER:
6535 if (info->linking) {
6536 struct netlink_ext_ack *extack;
6538 extack = netdev_notifier_info_to_extack(&info->info);
6539 err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
6541 mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
6549 static struct mlxsw_sp_rif_subport *
6550 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
6552 return container_of(rif, struct mlxsw_sp_rif_subport, common);
6555 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
6556 const struct mlxsw_sp_rif_params *params)
6558 struct mlxsw_sp_rif_subport *rif_subport;
6560 rif_subport = mlxsw_sp_rif_subport_rif(rif);
6561 rif_subport->vid = params->vid;
6562 rif_subport->lag = params->lag;
6564 rif_subport->lag_id = params->lag_id;
6566 rif_subport->system_port = params->system_port;
6569 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
6571 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6572 struct mlxsw_sp_rif_subport *rif_subport;
6573 char ritr_pl[MLXSW_REG_RITR_LEN];
6575 rif_subport = mlxsw_sp_rif_subport_rif(rif);
6576 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
6577 rif->rif_index, rif->vr_id, rif->dev->mtu);
6578 mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6579 mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
6580 rif_subport->lag ? rif_subport->lag_id :
6581 rif_subport->system_port,
6584 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6587 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
6591 err = mlxsw_sp_rif_subport_op(rif, true);
6595 err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6596 mlxsw_sp_fid_index(rif->fid), true);
6598 goto err_rif_fdb_op;
6600 mlxsw_sp_fid_rif_set(rif->fid, rif);
6604 mlxsw_sp_rif_subport_op(rif, false);
6608 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
6610 struct mlxsw_sp_fid *fid = rif->fid;
6612 mlxsw_sp_fid_rif_set(fid, NULL);
6613 mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6614 mlxsw_sp_fid_index(fid), false);
6615 mlxsw_sp_rif_subport_op(rif, false);
6618 static struct mlxsw_sp_fid *
6619 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif)
6621 return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
6624 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
6625 .type = MLXSW_SP_RIF_TYPE_SUBPORT,
6626 .rif_size = sizeof(struct mlxsw_sp_rif_subport),
6627 .setup = mlxsw_sp_rif_subport_setup,
6628 .configure = mlxsw_sp_rif_subport_configure,
6629 .deconfigure = mlxsw_sp_rif_subport_deconfigure,
6630 .fid_get = mlxsw_sp_rif_subport_fid_get,
6633 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
6634 enum mlxsw_reg_ritr_if_type type,
6635 u16 vid_fid, bool enable)
6637 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6638 char ritr_pl[MLXSW_REG_RITR_LEN];
6640 mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
6642 mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6643 mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
6645 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6648 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
6650 return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
6653 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
6655 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6656 u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6659 err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
6663 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6664 mlxsw_sp_router_port(mlxsw_sp), true);
6666 goto err_fid_mc_flood_set;
6668 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6669 mlxsw_sp_router_port(mlxsw_sp), true);
6671 goto err_fid_bc_flood_set;
6673 err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6674 mlxsw_sp_fid_index(rif->fid), true);
6676 goto err_rif_fdb_op;
6678 mlxsw_sp_fid_rif_set(rif->fid, rif);
6682 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6683 mlxsw_sp_router_port(mlxsw_sp), false);
6684 err_fid_bc_flood_set:
6685 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6686 mlxsw_sp_router_port(mlxsw_sp), false);
6687 err_fid_mc_flood_set:
6688 mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
6692 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
6694 u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6695 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6696 struct mlxsw_sp_fid *fid = rif->fid;
6698 mlxsw_sp_fid_rif_set(fid, NULL);
6699 mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6700 mlxsw_sp_fid_index(fid), false);
6701 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6702 mlxsw_sp_router_port(mlxsw_sp), false);
6703 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6704 mlxsw_sp_router_port(mlxsw_sp), false);
6705 mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
6708 static struct mlxsw_sp_fid *
6709 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif)
6711 u16 vid = is_vlan_dev(rif->dev) ? vlan_dev_vlan_id(rif->dev) : 1;
6713 return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
6716 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
6717 .type = MLXSW_SP_RIF_TYPE_VLAN,
6718 .rif_size = sizeof(struct mlxsw_sp_rif),
6719 .configure = mlxsw_sp_rif_vlan_configure,
6720 .deconfigure = mlxsw_sp_rif_vlan_deconfigure,
6721 .fid_get = mlxsw_sp_rif_vlan_fid_get,
6724 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
6726 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6727 u16 fid_index = mlxsw_sp_fid_index(rif->fid);
6730 err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
6735 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6736 mlxsw_sp_router_port(mlxsw_sp), true);
6738 goto err_fid_mc_flood_set;
6740 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6741 mlxsw_sp_router_port(mlxsw_sp), true);
6743 goto err_fid_bc_flood_set;
6745 err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6746 mlxsw_sp_fid_index(rif->fid), true);
6748 goto err_rif_fdb_op;
6750 mlxsw_sp_fid_rif_set(rif->fid, rif);
6754 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6755 mlxsw_sp_router_port(mlxsw_sp), false);
6756 err_fid_bc_flood_set:
6757 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6758 mlxsw_sp_router_port(mlxsw_sp), false);
6759 err_fid_mc_flood_set:
6760 mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
6764 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
6766 u16 fid_index = mlxsw_sp_fid_index(rif->fid);
6767 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6768 struct mlxsw_sp_fid *fid = rif->fid;
6770 mlxsw_sp_fid_rif_set(fid, NULL);
6771 mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6772 mlxsw_sp_fid_index(fid), false);
6773 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6774 mlxsw_sp_router_port(mlxsw_sp), false);
6775 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6776 mlxsw_sp_router_port(mlxsw_sp), false);
6777 mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
6780 static struct mlxsw_sp_fid *
6781 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif)
6783 return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
6786 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
6787 .type = MLXSW_SP_RIF_TYPE_FID,
6788 .rif_size = sizeof(struct mlxsw_sp_rif),
6789 .configure = mlxsw_sp_rif_fid_configure,
6790 .deconfigure = mlxsw_sp_rif_fid_deconfigure,
6791 .fid_get = mlxsw_sp_rif_fid_fid_get,
6794 static struct mlxsw_sp_rif_ipip_lb *
6795 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
6797 return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
6801 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
6802 const struct mlxsw_sp_rif_params *params)
6804 struct mlxsw_sp_rif_params_ipip_lb *params_lb;
6805 struct mlxsw_sp_rif_ipip_lb *rif_lb;
6807 params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
6809 rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
6810 rif_lb->lb_config = params_lb->lb_config;
6814 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
6815 struct mlxsw_sp_vr *ul_vr, bool enable)
6817 struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
6818 struct mlxsw_sp_rif *rif = &lb_rif->common;
6819 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6820 char ritr_pl[MLXSW_REG_RITR_LEN];
6823 switch (lb_cf.ul_protocol) {
6824 case MLXSW_SP_L3_PROTO_IPV4:
6825 saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
6826 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
6827 rif->rif_index, rif->vr_id, rif->dev->mtu);
6828 mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
6829 MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
6830 ul_vr->id, saddr4, lb_cf.okey);
6833 case MLXSW_SP_L3_PROTO_IPV6:
6834 return -EAFNOSUPPORT;
6837 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6841 mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
6843 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
6844 u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
6845 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6846 struct mlxsw_sp_vr *ul_vr;
6849 ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
6851 return PTR_ERR(ul_vr);
6853 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
6855 goto err_loopback_op;
6857 lb_rif->ul_vr_id = ul_vr->id;
6862 mlxsw_sp_vr_put(ul_vr);
6866 static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
6868 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
6869 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6870 struct mlxsw_sp_vr *ul_vr;
6872 ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
6873 mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false);
6876 mlxsw_sp_vr_put(ul_vr);
6879 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = {
6880 .type = MLXSW_SP_RIF_TYPE_IPIP_LB,
6881 .rif_size = sizeof(struct mlxsw_sp_rif_ipip_lb),
6882 .setup = mlxsw_sp_rif_ipip_lb_setup,
6883 .configure = mlxsw_sp_rif_ipip_lb_configure,
6884 .deconfigure = mlxsw_sp_rif_ipip_lb_deconfigure,
6887 static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = {
6888 [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops,
6889 [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_ops,
6890 [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops,
6891 [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp_rif_ipip_lb_ops,
6894 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
6896 u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
6898 mlxsw_sp->router->rifs = kcalloc(max_rifs,
6899 sizeof(struct mlxsw_sp_rif *),
6901 if (!mlxsw_sp->router->rifs)
6904 mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr;
6909 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
6913 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
6914 WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
6916 kfree(mlxsw_sp->router->rifs);
6920 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
6922 char tigcr_pl[MLXSW_REG_TIGCR_LEN];
6924 mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
6925 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
6928 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
6930 mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
6931 INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
6932 return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
6935 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
6937 WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
6940 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
6942 struct mlxsw_sp_router *router;
6944 /* Flush pending FIB notifications and then flush the device's
6945 * table before requesting another dump. The FIB notification
6946 * block is unregistered, so no need to take RTNL.
6948 mlxsw_core_flush_owq();
6949 router = container_of(nb, struct mlxsw_sp_router, fib_nb);
6950 mlxsw_sp_router_fib_flush(router->mlxsw_sp);
6953 #ifdef CONFIG_IP_ROUTE_MULTIPATH
6954 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
6956 mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
6959 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
6961 mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
6964 static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
6966 bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
6968 mlxsw_sp_mp_hash_header_set(recr2_pl,
6969 MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
6970 mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
6971 mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
6972 mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
6975 mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
6976 mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
6977 mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
6978 mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
6981 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
6983 mlxsw_sp_mp_hash_header_set(recr2_pl,
6984 MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
6985 mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
6986 mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
6987 mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
6988 mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
6989 mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
6992 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
6994 char recr2_pl[MLXSW_REG_RECR2_LEN];
6997 get_random_bytes(&seed, sizeof(seed));
6998 mlxsw_reg_recr2_pack(recr2_pl, seed);
6999 mlxsw_sp_mp4_hash_init(recr2_pl);
7000 mlxsw_sp_mp6_hash_init(recr2_pl);
7002 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
7005 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7011 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7013 char rgcr_pl[MLXSW_REG_RGCR_LEN];
7017 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
7019 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7021 mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
7022 mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
7023 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7029 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7031 char rgcr_pl[MLXSW_REG_RGCR_LEN];
7033 mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
7034 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7037 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7039 struct mlxsw_sp_router *router;
7042 router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
7045 mlxsw_sp->router = router;
7046 router->mlxsw_sp = mlxsw_sp;
7048 INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
7049 err = __mlxsw_sp_router_init(mlxsw_sp);
7051 goto err_router_init;
7053 err = mlxsw_sp_rifs_init(mlxsw_sp);
7057 err = mlxsw_sp_ipips_init(mlxsw_sp);
7059 goto err_ipips_init;
7061 err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
7062 &mlxsw_sp_nexthop_ht_params);
7064 goto err_nexthop_ht_init;
7066 err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
7067 &mlxsw_sp_nexthop_group_ht_params);
7069 goto err_nexthop_group_ht_init;
7071 INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
7072 err = mlxsw_sp_lpm_init(mlxsw_sp);
7076 err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
7080 err = mlxsw_sp_vrs_init(mlxsw_sp);
7084 err = mlxsw_sp_neigh_init(mlxsw_sp);
7086 goto err_neigh_init;
7088 mlxsw_sp->router->netevent_nb.notifier_call =
7089 mlxsw_sp_router_netevent_event;
7090 err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7092 goto err_register_netevent_notifier;
7094 err = mlxsw_sp_mp_hash_init(mlxsw_sp);
7096 goto err_mp_hash_init;
7098 mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
7099 err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
7100 mlxsw_sp_router_fib_dump_flush);
7102 goto err_register_fib_notifier;
7106 err_register_fib_notifier:
7108 unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7109 err_register_netevent_notifier:
7110 mlxsw_sp_neigh_fini(mlxsw_sp);
7112 mlxsw_sp_vrs_fini(mlxsw_sp);
7114 mlxsw_sp_mr_fini(mlxsw_sp);
7116 mlxsw_sp_lpm_fini(mlxsw_sp);
7118 rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7119 err_nexthop_group_ht_init:
7120 rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7121 err_nexthop_ht_init:
7122 mlxsw_sp_ipips_fini(mlxsw_sp);
7124 mlxsw_sp_rifs_fini(mlxsw_sp);
7126 __mlxsw_sp_router_fini(mlxsw_sp);
7128 kfree(mlxsw_sp->router);
7132 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7134 unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
7135 unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7136 mlxsw_sp_neigh_fini(mlxsw_sp);
7137 mlxsw_sp_vrs_fini(mlxsw_sp);
7138 mlxsw_sp_mr_fini(mlxsw_sp);
7139 mlxsw_sp_lpm_fini(mlxsw_sp);
7140 rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7141 rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7142 mlxsw_sp_ipips_fini(mlxsw_sp);
7143 mlxsw_sp_rifs_fini(mlxsw_sp);
7144 __mlxsw_sp_router_fini(mlxsw_sp);
7145 kfree(mlxsw_sp->router);