2 * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3 * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
4 * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5 * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6 * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the names of the copyright holders nor the names of its
17 * contributors may be used to endorse or promote products derived from
18 * this software without specific prior written permission.
20 * Alternatively, this software may be distributed under the terms of the
21 * GNU General Public License ("GPL") version 2 as published by the Free
22 * Software Foundation.
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
37 #include <linux/kernel.h>
38 #include <linux/types.h>
39 #include <linux/rhashtable.h>
40 #include <linux/bitops.h>
41 #include <linux/in6.h>
42 #include <linux/notifier.h>
43 #include <linux/inetdevice.h>
44 #include <linux/netdevice.h>
45 #include <linux/if_bridge.h>
46 #include <linux/socket.h>
47 #include <linux/route.h>
48 #include <net/netevent.h>
49 #include <net/neighbour.h>
51 #include <net/ip_fib.h>
52 #include <net/ip6_fib.h>
53 #include <net/fib_rules.h>
54 #include <net/l3mdev.h>
55 #include <net/addrconf.h>
56 #include <net/ndisc.h>
58 #include <net/fib_notifier.h>
63 #include "spectrum_cnt.h"
64 #include "spectrum_dpipe.h"
65 #include "spectrum_router.h"
68 struct mlxsw_sp_lpm_tree;
69 struct mlxsw_sp_rif_ops;
71 struct mlxsw_sp_router {
72 struct mlxsw_sp *mlxsw_sp;
73 struct mlxsw_sp_rif **rifs;
74 struct mlxsw_sp_vr *vrs;
75 struct rhashtable neigh_ht;
76 struct rhashtable nexthop_group_ht;
77 struct rhashtable nexthop_ht;
79 struct mlxsw_sp_lpm_tree *trees;
80 unsigned int tree_count;
83 struct delayed_work dw;
84 unsigned long interval; /* ms */
86 struct delayed_work nexthop_probe_dw;
87 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
88 struct list_head nexthop_neighs_list;
90 struct notifier_block fib_nb;
91 const struct mlxsw_sp_rif_ops **rif_ops_arr;
95 struct list_head nexthop_list;
96 struct list_head neigh_list;
97 struct net_device *dev;
98 struct mlxsw_sp_fid *fid;
99 unsigned char addr[ETH_ALEN];
103 const struct mlxsw_sp_rif_ops *ops;
104 struct mlxsw_sp *mlxsw_sp;
106 unsigned int counter_ingress;
107 bool counter_ingress_valid;
108 unsigned int counter_egress;
109 bool counter_egress_valid;
112 struct mlxsw_sp_rif_params {
113 struct net_device *dev;
122 struct mlxsw_sp_rif_subport {
123 struct mlxsw_sp_rif common;
132 struct mlxsw_sp_rif_ops {
133 enum mlxsw_sp_rif_type type;
136 void (*setup)(struct mlxsw_sp_rif *rif,
137 const struct mlxsw_sp_rif_params *params);
138 int (*configure)(struct mlxsw_sp_rif *rif);
139 void (*deconfigure)(struct mlxsw_sp_rif *rif);
140 struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif);
143 static unsigned int *
144 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
145 enum mlxsw_sp_rif_counter_dir dir)
148 case MLXSW_SP_RIF_COUNTER_EGRESS:
149 return &rif->counter_egress;
150 case MLXSW_SP_RIF_COUNTER_INGRESS:
151 return &rif->counter_ingress;
157 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
158 enum mlxsw_sp_rif_counter_dir dir)
161 case MLXSW_SP_RIF_COUNTER_EGRESS:
162 return rif->counter_egress_valid;
163 case MLXSW_SP_RIF_COUNTER_INGRESS:
164 return rif->counter_ingress_valid;
170 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
171 enum mlxsw_sp_rif_counter_dir dir,
175 case MLXSW_SP_RIF_COUNTER_EGRESS:
176 rif->counter_egress_valid = valid;
178 case MLXSW_SP_RIF_COUNTER_INGRESS:
179 rif->counter_ingress_valid = valid;
184 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
185 unsigned int counter_index, bool enable,
186 enum mlxsw_sp_rif_counter_dir dir)
188 char ritr_pl[MLXSW_REG_RITR_LEN];
189 bool is_egress = false;
192 if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
194 mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
195 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
199 mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
201 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
204 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
205 struct mlxsw_sp_rif *rif,
206 enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
208 char ricnt_pl[MLXSW_REG_RICNT_LEN];
209 unsigned int *p_counter_index;
213 valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
217 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
218 if (!p_counter_index)
220 mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
221 MLXSW_REG_RICNT_OPCODE_NOP);
222 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
225 *cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
229 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
230 unsigned int counter_index)
232 char ricnt_pl[MLXSW_REG_RICNT_LEN];
234 mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
235 MLXSW_REG_RICNT_OPCODE_CLEAR);
236 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
239 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
240 struct mlxsw_sp_rif *rif,
241 enum mlxsw_sp_rif_counter_dir dir)
243 unsigned int *p_counter_index;
246 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
247 if (!p_counter_index)
249 err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
254 err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
256 goto err_counter_clear;
258 err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
259 *p_counter_index, true, dir);
261 goto err_counter_edit;
262 mlxsw_sp_rif_counter_valid_set(rif, dir, true);
267 mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
272 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
273 struct mlxsw_sp_rif *rif,
274 enum mlxsw_sp_rif_counter_dir dir)
276 unsigned int *p_counter_index;
278 if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
281 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
282 if (WARN_ON(!p_counter_index))
284 mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
285 *p_counter_index, false, dir);
286 mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
288 mlxsw_sp_rif_counter_valid_set(rif, dir, false);
291 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
293 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
294 struct devlink *devlink;
296 devlink = priv_to_devlink(mlxsw_sp->core);
297 if (!devlink_dpipe_table_counter_enabled(devlink,
298 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
300 mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
303 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
305 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
307 mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
310 static struct mlxsw_sp_rif *
311 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
312 const struct net_device *dev);
314 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
316 struct mlxsw_sp_prefix_usage {
317 DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
320 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
321 for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
324 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
325 struct mlxsw_sp_prefix_usage *prefix_usage2)
327 return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
331 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
333 struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
335 return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none);
339 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
340 struct mlxsw_sp_prefix_usage *prefix_usage2)
342 memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
346 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
347 unsigned char prefix_len)
349 set_bit(prefix_len, prefix_usage->b);
353 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
354 unsigned char prefix_len)
356 clear_bit(prefix_len, prefix_usage->b);
359 struct mlxsw_sp_fib_key {
360 unsigned char addr[sizeof(struct in6_addr)];
361 unsigned char prefix_len;
364 enum mlxsw_sp_fib_entry_type {
365 MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
366 MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
367 MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
370 struct mlxsw_sp_nexthop_group;
373 struct mlxsw_sp_fib_node {
374 struct list_head entry_list;
375 struct list_head list;
376 struct rhash_head ht_node;
377 struct mlxsw_sp_fib *fib;
378 struct mlxsw_sp_fib_key key;
381 struct mlxsw_sp_fib_entry {
382 struct list_head list;
383 struct mlxsw_sp_fib_node *fib_node;
384 enum mlxsw_sp_fib_entry_type type;
385 struct list_head nexthop_group_node;
386 struct mlxsw_sp_nexthop_group *nh_group;
389 struct mlxsw_sp_fib4_entry {
390 struct mlxsw_sp_fib_entry common;
397 struct mlxsw_sp_fib6_entry {
398 struct mlxsw_sp_fib_entry common;
399 struct list_head rt6_list;
403 struct mlxsw_sp_rt6 {
404 struct list_head list;
408 enum mlxsw_sp_l3proto {
409 MLXSW_SP_L3_PROTO_IPV4,
410 MLXSW_SP_L3_PROTO_IPV6,
413 struct mlxsw_sp_lpm_tree {
415 unsigned int ref_count;
416 enum mlxsw_sp_l3proto proto;
417 struct mlxsw_sp_prefix_usage prefix_usage;
420 struct mlxsw_sp_fib {
421 struct rhashtable ht;
422 struct list_head node_list;
423 struct mlxsw_sp_vr *vr;
424 struct mlxsw_sp_lpm_tree *lpm_tree;
425 unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
426 struct mlxsw_sp_prefix_usage prefix_usage;
427 enum mlxsw_sp_l3proto proto;
431 u16 id; /* virtual router ID */
432 u32 tb_id; /* kernel fib table id */
433 unsigned int rif_count;
434 struct mlxsw_sp_fib *fib4;
435 struct mlxsw_sp_fib *fib6;
438 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
440 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp_vr *vr,
441 enum mlxsw_sp_l3proto proto)
443 struct mlxsw_sp_fib *fib;
446 fib = kzalloc(sizeof(*fib), GFP_KERNEL);
448 return ERR_PTR(-ENOMEM);
449 err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
451 goto err_rhashtable_init;
452 INIT_LIST_HEAD(&fib->node_list);
462 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
464 WARN_ON(!list_empty(&fib->node_list));
465 WARN_ON(fib->lpm_tree);
466 rhashtable_destroy(&fib->ht);
470 static struct mlxsw_sp_lpm_tree *
471 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
473 static struct mlxsw_sp_lpm_tree *lpm_tree;
476 for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
477 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
478 if (lpm_tree->ref_count == 0)
484 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
485 struct mlxsw_sp_lpm_tree *lpm_tree)
487 char ralta_pl[MLXSW_REG_RALTA_LEN];
489 mlxsw_reg_ralta_pack(ralta_pl, true,
490 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
492 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
495 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
496 struct mlxsw_sp_lpm_tree *lpm_tree)
498 char ralta_pl[MLXSW_REG_RALTA_LEN];
500 mlxsw_reg_ralta_pack(ralta_pl, false,
501 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
503 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
507 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
508 struct mlxsw_sp_prefix_usage *prefix_usage,
509 struct mlxsw_sp_lpm_tree *lpm_tree)
511 char ralst_pl[MLXSW_REG_RALST_LEN];
514 u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
516 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
519 mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
520 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
523 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
524 MLXSW_REG_RALST_BIN_NO_CHILD);
525 last_prefix = prefix;
527 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
530 static struct mlxsw_sp_lpm_tree *
531 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
532 struct mlxsw_sp_prefix_usage *prefix_usage,
533 enum mlxsw_sp_l3proto proto)
535 struct mlxsw_sp_lpm_tree *lpm_tree;
538 lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
540 return ERR_PTR(-EBUSY);
541 lpm_tree->proto = proto;
542 err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
546 err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
549 goto err_left_struct_set;
550 memcpy(&lpm_tree->prefix_usage, prefix_usage,
551 sizeof(lpm_tree->prefix_usage));
555 mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
559 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
560 struct mlxsw_sp_lpm_tree *lpm_tree)
562 mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
565 static struct mlxsw_sp_lpm_tree *
566 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
567 struct mlxsw_sp_prefix_usage *prefix_usage,
568 enum mlxsw_sp_l3proto proto)
570 struct mlxsw_sp_lpm_tree *lpm_tree;
573 for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
574 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
575 if (lpm_tree->ref_count != 0 &&
576 lpm_tree->proto == proto &&
577 mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
581 return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
584 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
586 lpm_tree->ref_count++;
589 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
590 struct mlxsw_sp_lpm_tree *lpm_tree)
592 if (--lpm_tree->ref_count == 0)
593 mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
596 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
598 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
600 struct mlxsw_sp_lpm_tree *lpm_tree;
604 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
607 max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
608 mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
609 mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
610 sizeof(struct mlxsw_sp_lpm_tree),
612 if (!mlxsw_sp->router->lpm.trees)
615 for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
616 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
617 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
623 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
625 kfree(mlxsw_sp->router->lpm.trees);
628 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
630 return !!vr->fib4 || !!vr->fib6;
633 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
635 struct mlxsw_sp_vr *vr;
638 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
639 vr = &mlxsw_sp->router->vrs[i];
640 if (!mlxsw_sp_vr_is_used(vr))
646 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
647 const struct mlxsw_sp_fib *fib, u8 tree_id)
649 char raltb_pl[MLXSW_REG_RALTB_LEN];
651 mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
652 (enum mlxsw_reg_ralxx_protocol) fib->proto,
654 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
657 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
658 const struct mlxsw_sp_fib *fib)
660 char raltb_pl[MLXSW_REG_RALTB_LEN];
662 /* Bind to tree 0 which is default */
663 mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
664 (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
665 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
668 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
670 /* For our purpose, squash main and local table into one */
671 if (tb_id == RT_TABLE_LOCAL)
672 tb_id = RT_TABLE_MAIN;
676 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
679 struct mlxsw_sp_vr *vr;
682 tb_id = mlxsw_sp_fix_tb_id(tb_id);
684 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
685 vr = &mlxsw_sp->router->vrs[i];
686 if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
692 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
693 enum mlxsw_sp_l3proto proto)
696 case MLXSW_SP_L3_PROTO_IPV4:
698 case MLXSW_SP_L3_PROTO_IPV6:
704 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
707 struct mlxsw_sp_vr *vr;
710 vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
712 return ERR_PTR(-EBUSY);
713 vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4);
714 if (IS_ERR(vr->fib4))
715 return ERR_CAST(vr->fib4);
716 vr->fib6 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV6);
717 if (IS_ERR(vr->fib6)) {
718 err = PTR_ERR(vr->fib6);
719 goto err_fib6_create;
725 mlxsw_sp_fib_destroy(vr->fib4);
730 static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr)
732 mlxsw_sp_fib_destroy(vr->fib6);
734 mlxsw_sp_fib_destroy(vr->fib4);
738 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id)
740 struct mlxsw_sp_vr *vr;
742 tb_id = mlxsw_sp_fix_tb_id(tb_id);
743 vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
745 vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id);
749 static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr)
751 if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
752 list_empty(&vr->fib6->node_list))
753 mlxsw_sp_vr_destroy(vr);
757 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
758 enum mlxsw_sp_l3proto proto, u8 tree_id)
760 struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
762 if (!mlxsw_sp_vr_is_used(vr))
764 if (fib->lpm_tree && fib->lpm_tree->id == tree_id)
769 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
770 struct mlxsw_sp_fib *fib,
771 struct mlxsw_sp_lpm_tree *new_tree)
773 struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
776 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
779 fib->lpm_tree = new_tree;
780 mlxsw_sp_lpm_tree_hold(new_tree);
781 mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
785 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
786 struct mlxsw_sp_fib *fib,
787 struct mlxsw_sp_lpm_tree *new_tree)
789 struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
790 enum mlxsw_sp_l3proto proto = fib->proto;
791 u8 old_id, new_id = new_tree->id;
792 struct mlxsw_sp_vr *vr;
797 old_id = old_tree->id;
799 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
800 vr = &mlxsw_sp->router->vrs[i];
801 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
803 err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
804 mlxsw_sp_vr_fib(vr, proto),
807 goto err_tree_replace;
813 for (i--; i >= 0; i--) {
814 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
816 mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
817 mlxsw_sp_vr_fib(vr, proto),
823 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
826 fib->lpm_tree = new_tree;
827 mlxsw_sp_lpm_tree_hold(new_tree);
832 mlxsw_sp_vrs_prefixes(struct mlxsw_sp *mlxsw_sp,
833 enum mlxsw_sp_l3proto proto,
834 struct mlxsw_sp_prefix_usage *req_prefix_usage)
838 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
839 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
840 struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
841 unsigned char prefix;
843 if (!mlxsw_sp_vr_is_used(vr))
845 mlxsw_sp_prefix_usage_for_each(prefix, &fib->prefix_usage)
846 mlxsw_sp_prefix_usage_set(req_prefix_usage, prefix);
850 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
852 struct mlxsw_sp_vr *vr;
856 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
859 max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
860 mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
862 if (!mlxsw_sp->router->vrs)
865 for (i = 0; i < max_vrs; i++) {
866 vr = &mlxsw_sp->router->vrs[i];
873 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
875 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
877 /* At this stage we're guaranteed not to have new incoming
878 * FIB notifications and the work queue is free from FIBs
879 * sitting on top of mlxsw netdevs. However, we can still
880 * have other FIBs queued. Flush the queue before flushing
881 * the device's tables. No need for locks, as we're the only
884 mlxsw_core_flush_owq();
885 mlxsw_sp_router_fib_flush(mlxsw_sp);
886 kfree(mlxsw_sp->router->vrs);
889 struct mlxsw_sp_neigh_key {
893 struct mlxsw_sp_neigh_entry {
894 struct list_head rif_list_node;
895 struct rhash_head ht_node;
896 struct mlxsw_sp_neigh_key key;
899 unsigned char ha[ETH_ALEN];
900 struct list_head nexthop_list; /* list of nexthops using
903 struct list_head nexthop_neighs_list_node;
904 unsigned int counter_index;
908 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
909 .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
910 .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
911 .key_len = sizeof(struct mlxsw_sp_neigh_key),
914 struct mlxsw_sp_neigh_entry *
915 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
916 struct mlxsw_sp_neigh_entry *neigh_entry)
919 if (list_empty(&rif->neigh_list))
922 return list_first_entry(&rif->neigh_list,
923 typeof(*neigh_entry),
926 if (neigh_entry->rif_list_node.next == &rif->neigh_list)
928 return list_next_entry(neigh_entry, rif_list_node);
931 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
933 return neigh_entry->key.n->tbl->family;
937 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
939 return neigh_entry->ha;
942 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
946 n = neigh_entry->key.n;
947 return ntohl(*((__be32 *) n->primary_key));
950 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
951 struct mlxsw_sp_neigh_entry *neigh_entry,
954 if (!neigh_entry->counter_valid)
957 return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
961 static struct mlxsw_sp_neigh_entry *
962 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
965 struct mlxsw_sp_neigh_entry *neigh_entry;
967 neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
971 neigh_entry->key.n = n;
972 neigh_entry->rif = rif;
973 INIT_LIST_HEAD(&neigh_entry->nexthop_list);
978 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
984 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
985 struct mlxsw_sp_neigh_entry *neigh_entry)
987 return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
988 &neigh_entry->ht_node,
989 mlxsw_sp_neigh_ht_params);
993 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
994 struct mlxsw_sp_neigh_entry *neigh_entry)
996 rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
997 &neigh_entry->ht_node,
998 mlxsw_sp_neigh_ht_params);
1002 mlxsw_sp_neigh4_counter_should_alloc(struct mlxsw_sp *mlxsw_sp)
1004 struct devlink *devlink;
1006 devlink = priv_to_devlink(mlxsw_sp->core);
1007 return devlink_dpipe_table_counter_enabled(devlink,
1008 MLXSW_SP_DPIPE_TABLE_NAME_HOST4);
1012 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
1013 struct mlxsw_sp_neigh_entry *neigh_entry)
1015 if (mlxsw_sp_neigh_entry_type(neigh_entry) != AF_INET ||
1016 !mlxsw_sp_neigh4_counter_should_alloc(mlxsw_sp))
1019 if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
1022 neigh_entry->counter_valid = true;
1026 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
1027 struct mlxsw_sp_neigh_entry *neigh_entry)
1029 if (!neigh_entry->counter_valid)
1031 mlxsw_sp_flow_counter_free(mlxsw_sp,
1032 neigh_entry->counter_index);
1033 neigh_entry->counter_valid = false;
1036 static struct mlxsw_sp_neigh_entry *
1037 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1039 struct mlxsw_sp_neigh_entry *neigh_entry;
1040 struct mlxsw_sp_rif *rif;
1043 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
1045 return ERR_PTR(-EINVAL);
1047 neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
1049 return ERR_PTR(-ENOMEM);
1051 err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
1053 goto err_neigh_entry_insert;
1055 mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
1056 list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
1060 err_neigh_entry_insert:
1061 mlxsw_sp_neigh_entry_free(neigh_entry);
1062 return ERR_PTR(err);
1066 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1067 struct mlxsw_sp_neigh_entry *neigh_entry)
1069 list_del(&neigh_entry->rif_list_node);
1070 mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
1071 mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
1072 mlxsw_sp_neigh_entry_free(neigh_entry);
1075 static struct mlxsw_sp_neigh_entry *
1076 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1078 struct mlxsw_sp_neigh_key key;
1081 return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
1082 &key, mlxsw_sp_neigh_ht_params);
1086 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
1088 unsigned long interval;
1090 #if IS_ENABLED(CONFIG_IPV6)
1091 interval = min_t(unsigned long,
1092 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
1093 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
1095 interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
1097 mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
1100 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
1104 struct net_device *dev;
1105 struct neighbour *n;
1110 mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
1112 if (!mlxsw_sp->router->rifs[rif]) {
1113 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
1118 dev = mlxsw_sp->router->rifs[rif]->dev;
1119 n = neigh_lookup(&arp_tbl, &dipn, dev);
1121 netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
1126 netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
1127 neigh_event_send(n, NULL);
1131 #if IS_ENABLED(CONFIG_IPV6)
1132 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
1136 struct net_device *dev;
1137 struct neighbour *n;
1138 struct in6_addr dip;
1141 mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
1144 if (!mlxsw_sp->router->rifs[rif]) {
1145 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
1149 dev = mlxsw_sp->router->rifs[rif]->dev;
1150 n = neigh_lookup(&nd_tbl, &dip, dev);
1152 netdev_err(dev, "Failed to find matching neighbour for IP=%pI6c\n",
1157 netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
1158 neigh_event_send(n, NULL);
1162 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
1169 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
1176 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
1178 /* Hardware starts counting at 0, so add 1. */
1181 /* Each record consists of several neighbour entries. */
1182 for (i = 0; i < num_entries; i++) {
1185 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
1186 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
1192 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
1196 /* One record contains one entry. */
1197 mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
1201 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
1202 char *rauhtd_pl, int rec_index)
1204 switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
1205 case MLXSW_REG_RAUHTD_TYPE_IPV4:
1206 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
1209 case MLXSW_REG_RAUHTD_TYPE_IPV6:
1210 mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
1216 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
1218 u8 num_rec, last_rec_index, num_entries;
1220 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
1221 last_rec_index = num_rec - 1;
1223 if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
1225 if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
1226 MLXSW_REG_RAUHTD_TYPE_IPV6)
1229 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
1231 if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
1237 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
1239 enum mlxsw_reg_rauhtd_type type)
1244 /* Make sure the neighbour's netdev isn't removed in the
1249 mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
1250 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
1253 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n");
1256 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
1257 for (i = 0; i < num_rec; i++)
1258 mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
1260 } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
1266 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
1268 enum mlxsw_reg_rauhtd_type type;
1272 rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
1276 type = MLXSW_REG_RAUHTD_TYPE_IPV4;
1277 err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
1281 type = MLXSW_REG_RAUHTD_TYPE_IPV6;
1282 err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
1288 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
1290 struct mlxsw_sp_neigh_entry *neigh_entry;
1292 /* Take RTNL mutex here to prevent lists from changes */
1294 list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
1295 nexthop_neighs_list_node)
1296 /* If this neigh have nexthops, make the kernel think this neigh
1297 * is active regardless of the traffic.
1299 neigh_event_send(neigh_entry->key.n, NULL);
1304 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
1306 unsigned long interval = mlxsw_sp->router->neighs_update.interval;
1308 mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
1309 msecs_to_jiffies(interval));
1312 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
1314 struct mlxsw_sp_router *router;
1317 router = container_of(work, struct mlxsw_sp_router,
1318 neighs_update.dw.work);
1319 err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
1321 dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
1323 mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
1325 mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
1328 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
1330 struct mlxsw_sp_neigh_entry *neigh_entry;
1331 struct mlxsw_sp_router *router;
1333 router = container_of(work, struct mlxsw_sp_router,
1334 nexthop_probe_dw.work);
1335 /* Iterate over nexthop neighbours, find those who are unresolved and
1336 * send arp on them. This solves the chicken-egg problem when
1337 * the nexthop wouldn't get offloaded until the neighbor is resolved
1338 * but it wouldn't get resolved ever in case traffic is flowing in HW
1339 * using different nexthop.
1341 * Take RTNL mutex here to prevent lists from changes.
1344 list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
1345 nexthop_neighs_list_node)
1346 if (!neigh_entry->connected)
1347 neigh_event_send(neigh_entry->key.n, NULL);
1350 mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
1351 MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
1355 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
1356 struct mlxsw_sp_neigh_entry *neigh_entry,
1359 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
1361 return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
1362 MLXSW_REG_RAUHT_OP_WRITE_DELETE;
1366 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
1367 struct mlxsw_sp_neigh_entry *neigh_entry,
1368 enum mlxsw_reg_rauht_op op)
1370 struct neighbour *n = neigh_entry->key.n;
1371 u32 dip = ntohl(*((__be32 *) n->primary_key));
1372 char rauht_pl[MLXSW_REG_RAUHT_LEN];
1374 mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
1376 if (neigh_entry->counter_valid)
1377 mlxsw_reg_rauht_pack_counter(rauht_pl,
1378 neigh_entry->counter_index);
1379 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
1383 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
1384 struct mlxsw_sp_neigh_entry *neigh_entry,
1385 enum mlxsw_reg_rauht_op op)
1387 struct neighbour *n = neigh_entry->key.n;
1388 char rauht_pl[MLXSW_REG_RAUHT_LEN];
1389 const char *dip = n->primary_key;
1391 mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
1393 if (neigh_entry->counter_valid)
1394 mlxsw_reg_rauht_pack_counter(rauht_pl,
1395 neigh_entry->counter_index);
1396 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
1399 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
1401 struct neighbour *n = neigh_entry->key.n;
1403 /* Packets with a link-local destination address are trapped
1404 * after LPM lookup and never reach the neighbour table, so
1405 * there is no need to program such neighbours to the device.
1407 if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
1408 IPV6_ADDR_LINKLOCAL)
1414 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
1415 struct mlxsw_sp_neigh_entry *neigh_entry,
1418 if (!adding && !neigh_entry->connected)
1420 neigh_entry->connected = adding;
1421 if (neigh_entry->key.n->tbl->family == AF_INET) {
1422 mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
1423 mlxsw_sp_rauht_op(adding));
1424 } else if (neigh_entry->key.n->tbl->family == AF_INET6) {
1425 if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
1427 mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
1428 mlxsw_sp_rauht_op(adding));
1435 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
1436 struct mlxsw_sp_neigh_entry *neigh_entry,
1440 mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
1442 mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
1443 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
1446 struct mlxsw_sp_neigh_event_work {
1447 struct work_struct work;
1448 struct mlxsw_sp *mlxsw_sp;
1449 struct neighbour *n;
1452 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
1454 struct mlxsw_sp_neigh_event_work *neigh_work =
1455 container_of(work, struct mlxsw_sp_neigh_event_work, work);
1456 struct mlxsw_sp *mlxsw_sp = neigh_work->mlxsw_sp;
1457 struct mlxsw_sp_neigh_entry *neigh_entry;
1458 struct neighbour *n = neigh_work->n;
1459 unsigned char ha[ETH_ALEN];
1460 bool entry_connected;
1463 /* If these parameters are changed after we release the lock,
1464 * then we are guaranteed to receive another event letting us
1467 read_lock_bh(&n->lock);
1468 memcpy(ha, n->ha, ETH_ALEN);
1469 nud_state = n->nud_state;
1471 read_unlock_bh(&n->lock);
1474 entry_connected = nud_state & NUD_VALID && !dead;
1475 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
1476 if (!entry_connected && !neigh_entry)
1479 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
1480 if (IS_ERR(neigh_entry))
1484 memcpy(neigh_entry->ha, ha, ETH_ALEN);
1485 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
1486 mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
1488 if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
1489 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1497 int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
1498 unsigned long event, void *ptr)
1500 struct mlxsw_sp_neigh_event_work *neigh_work;
1501 struct mlxsw_sp_port *mlxsw_sp_port;
1502 struct mlxsw_sp *mlxsw_sp;
1503 unsigned long interval;
1504 struct neigh_parms *p;
1505 struct neighbour *n;
1508 case NETEVENT_DELAY_PROBE_TIME_UPDATE:
1511 /* We don't care about changes in the default table. */
1512 if (!p->dev || (p->tbl->family != AF_INET &&
1513 p->tbl->family != AF_INET6))
1516 /* We are in atomic context and can't take RTNL mutex,
1517 * so use RCU variant to walk the device chain.
1519 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
1523 mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
1524 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
1525 mlxsw_sp->router->neighs_update.interval = interval;
1527 mlxsw_sp_port_dev_put(mlxsw_sp_port);
1529 case NETEVENT_NEIGH_UPDATE:
1532 if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
1535 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
1539 neigh_work = kzalloc(sizeof(*neigh_work), GFP_ATOMIC);
1541 mlxsw_sp_port_dev_put(mlxsw_sp_port);
1545 INIT_WORK(&neigh_work->work, mlxsw_sp_router_neigh_event_work);
1546 neigh_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
1549 /* Take a reference to ensure the neighbour won't be
1550 * destructed until we drop the reference in delayed
1554 mlxsw_core_schedule_work(&neigh_work->work);
1555 mlxsw_sp_port_dev_put(mlxsw_sp_port);
1562 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
1566 err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
1567 &mlxsw_sp_neigh_ht_params);
1571 /* Initialize the polling interval according to the default
1574 mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
1576 /* Create the delayed works for the activity_update */
1577 INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
1578 mlxsw_sp_router_neighs_update_work);
1579 INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
1580 mlxsw_sp_router_probe_unresolved_nexthops);
1581 mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
1582 mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
1586 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
1588 cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
1589 cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
1590 rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
1593 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
1594 struct mlxsw_sp_rif *rif)
1596 struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
1598 list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
1600 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
1601 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1605 struct mlxsw_sp_nexthop_key {
1606 struct fib_nh *fib_nh;
1609 struct mlxsw_sp_nexthop {
1610 struct list_head neigh_list_node; /* member of neigh entry list */
1611 struct list_head rif_list_node;
1612 struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
1615 struct rhash_head ht_node;
1616 struct mlxsw_sp_nexthop_key key;
1617 unsigned char gw_addr[sizeof(struct in6_addr)];
1619 struct mlxsw_sp_rif *rif;
1620 u8 should_offload:1, /* set indicates this neigh is connected and
1621 * should be put to KVD linear area of this group.
1623 offloaded:1, /* set in case the neigh is actually put into
1624 * KVD linear area of this group.
1626 update:1; /* set indicates that MAC of this neigh should be
1629 struct mlxsw_sp_neigh_entry *neigh_entry;
1632 struct mlxsw_sp_nexthop_group {
1634 struct rhash_head ht_node;
1635 struct list_head fib_list; /* list of fib entries that use this group */
1636 struct neigh_table *neigh_tbl;
1637 u8 adj_index_valid:1,
1638 gateway:1; /* routes using the group use a gateway */
1642 struct mlxsw_sp_nexthop nexthops[0];
1643 #define nh_rif nexthops[0].rif
1646 static struct fib_info *
1647 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
1649 return nh_grp->priv;
1652 struct mlxsw_sp_nexthop_group_cmp_arg {
1653 enum mlxsw_sp_l3proto proto;
1655 struct fib_info *fi;
1656 struct mlxsw_sp_fib6_entry *fib6_entry;
1661 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
1662 const struct in6_addr *gw, int ifindex)
1666 for (i = 0; i < nh_grp->count; i++) {
1667 const struct mlxsw_sp_nexthop *nh;
1669 nh = &nh_grp->nexthops[i];
1670 if (nh->ifindex == ifindex &&
1671 ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
1679 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
1680 const struct mlxsw_sp_fib6_entry *fib6_entry)
1682 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
1684 if (nh_grp->count != fib6_entry->nrt6)
1687 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
1688 struct in6_addr *gw;
1691 ifindex = mlxsw_sp_rt6->rt->dst.dev->ifindex;
1692 gw = &mlxsw_sp_rt6->rt->rt6i_gateway;
1693 if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex))
1701 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
1703 const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
1704 const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
1706 switch (cmp_arg->proto) {
1707 case MLXSW_SP_L3_PROTO_IPV4:
1708 return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
1709 case MLXSW_SP_L3_PROTO_IPV6:
1710 return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
1711 cmp_arg->fib6_entry);
1719 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
1721 return nh_grp->neigh_tbl->family;
1724 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
1726 const struct mlxsw_sp_nexthop_group *nh_grp = data;
1727 const struct mlxsw_sp_nexthop *nh;
1728 struct fib_info *fi;
1732 switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
1734 fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
1735 return jhash(&fi, sizeof(fi), seed);
1737 val = nh_grp->count;
1738 for (i = 0; i < nh_grp->count; i++) {
1739 nh = &nh_grp->nexthops[i];
1742 return jhash(&val, sizeof(val), seed);
1750 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
1752 unsigned int val = fib6_entry->nrt6;
1753 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
1754 struct net_device *dev;
1756 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
1757 dev = mlxsw_sp_rt6->rt->dst.dev;
1758 val ^= dev->ifindex;
1761 return jhash(&val, sizeof(val), seed);
1765 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
1767 const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
1769 switch (cmp_arg->proto) {
1770 case MLXSW_SP_L3_PROTO_IPV4:
1771 return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
1772 case MLXSW_SP_L3_PROTO_IPV6:
1773 return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
1780 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
1781 .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
1782 .hashfn = mlxsw_sp_nexthop_group_hash,
1783 .obj_hashfn = mlxsw_sp_nexthop_group_hash_obj,
1784 .obj_cmpfn = mlxsw_sp_nexthop_group_cmp,
1787 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
1788 struct mlxsw_sp_nexthop_group *nh_grp)
1790 if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
1794 return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
1796 mlxsw_sp_nexthop_group_ht_params);
1799 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
1800 struct mlxsw_sp_nexthop_group *nh_grp)
1802 if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
1806 rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
1808 mlxsw_sp_nexthop_group_ht_params);
1811 static struct mlxsw_sp_nexthop_group *
1812 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
1813 struct fib_info *fi)
1815 struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
1817 cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
1819 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
1821 mlxsw_sp_nexthop_group_ht_params);
1824 static struct mlxsw_sp_nexthop_group *
1825 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
1826 struct mlxsw_sp_fib6_entry *fib6_entry)
1828 struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
1830 cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
1831 cmp_arg.fib6_entry = fib6_entry;
1832 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
1834 mlxsw_sp_nexthop_group_ht_params);
1837 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
1838 .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
1839 .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
1840 .key_len = sizeof(struct mlxsw_sp_nexthop_key),
1843 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
1844 struct mlxsw_sp_nexthop *nh)
1846 return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
1847 &nh->ht_node, mlxsw_sp_nexthop_ht_params);
1850 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
1851 struct mlxsw_sp_nexthop *nh)
1853 rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
1854 mlxsw_sp_nexthop_ht_params);
1857 static struct mlxsw_sp_nexthop *
1858 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
1859 struct mlxsw_sp_nexthop_key key)
1861 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
1862 mlxsw_sp_nexthop_ht_params);
1865 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
1866 const struct mlxsw_sp_fib *fib,
1867 u32 adj_index, u16 ecmp_size,
1871 char raleu_pl[MLXSW_REG_RALEU_LEN];
1873 mlxsw_reg_raleu_pack(raleu_pl,
1874 (enum mlxsw_reg_ralxx_protocol) fib->proto,
1875 fib->vr->id, adj_index, ecmp_size, new_adj_index,
1877 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
1880 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
1881 struct mlxsw_sp_nexthop_group *nh_grp,
1882 u32 old_adj_index, u16 old_ecmp_size)
1884 struct mlxsw_sp_fib_entry *fib_entry;
1885 struct mlxsw_sp_fib *fib = NULL;
1888 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1889 if (fib == fib_entry->fib_node->fib)
1891 fib = fib_entry->fib_node->fib;
1892 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
1903 static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
1904 struct mlxsw_sp_nexthop *nh)
1906 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1907 char ratr_pl[MLXSW_REG_RATR_LEN];
1909 mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
1910 true, adj_index, neigh_entry->rif);
1911 mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
1912 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
1916 mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
1917 struct mlxsw_sp_nexthop_group *nh_grp,
1920 u32 adj_index = nh_grp->adj_index; /* base */
1921 struct mlxsw_sp_nexthop *nh;
1925 for (i = 0; i < nh_grp->count; i++) {
1926 nh = &nh_grp->nexthops[i];
1928 if (!nh->should_offload) {
1933 if (nh->update || reallocate) {
1934 err = mlxsw_sp_nexthop_mac_update(mlxsw_sp,
1946 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1947 struct mlxsw_sp_fib_entry *fib_entry);
1950 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
1951 const struct mlxsw_sp_fib_entry *fib_entry);
1954 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
1955 struct mlxsw_sp_nexthop_group *nh_grp)
1957 struct mlxsw_sp_fib_entry *fib_entry;
1960 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1961 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
1964 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1972 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
1973 enum mlxsw_reg_ralue_op op, int err);
1976 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
1978 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
1979 struct mlxsw_sp_fib_entry *fib_entry;
1981 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1982 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
1985 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
1990 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
1991 struct mlxsw_sp_nexthop_group *nh_grp)
1993 struct mlxsw_sp_nexthop *nh;
1994 bool offload_change = false;
1997 bool old_adj_index_valid;
2003 if (!nh_grp->gateway) {
2004 mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
2008 for (i = 0; i < nh_grp->count; i++) {
2009 nh = &nh_grp->nexthops[i];
2011 if (nh->should_offload != nh->offloaded) {
2012 offload_change = true;
2013 if (nh->should_offload)
2016 if (nh->should_offload)
2019 if (!offload_change) {
2020 /* Nothing was added or removed, so no need to reallocate. Just
2021 * update MAC on existing adjacency indexes.
2023 err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp,
2026 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
2032 /* No neigh of this group is connected so we just set
2033 * the trap and let everthing flow through kernel.
2037 err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index);
2039 /* We ran out of KVD linear space, just set the
2040 * trap and let everything flow through kernel.
2042 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
2045 old_adj_index_valid = nh_grp->adj_index_valid;
2046 old_adj_index = nh_grp->adj_index;
2047 old_ecmp_size = nh_grp->ecmp_size;
2048 nh_grp->adj_index_valid = 1;
2049 nh_grp->adj_index = adj_index;
2050 nh_grp->ecmp_size = ecmp_size;
2051 err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, true);
2053 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
2057 if (!old_adj_index_valid) {
2058 /* The trap was set for fib entries, so we have to call
2059 * fib entry update to unset it and use adjacency index.
2061 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
2063 dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
2069 err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
2070 old_adj_index, old_ecmp_size);
2071 mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
2073 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
2077 /* Offload state within the group changed, so update the flags. */
2078 mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
2083 old_adj_index_valid = nh_grp->adj_index_valid;
2084 nh_grp->adj_index_valid = 0;
2085 for (i = 0; i < nh_grp->count; i++) {
2086 nh = &nh_grp->nexthops[i];
2089 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
2091 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
2092 if (old_adj_index_valid)
2093 mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
2096 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
2100 nh->should_offload = 1;
2101 else if (nh->offloaded)
2102 nh->should_offload = 0;
2107 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2108 struct mlxsw_sp_neigh_entry *neigh_entry,
2111 struct mlxsw_sp_nexthop *nh;
2113 list_for_each_entry(nh, &neigh_entry->nexthop_list,
2115 __mlxsw_sp_nexthop_neigh_update(nh, removing);
2116 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
2120 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
2121 struct mlxsw_sp_rif *rif)
2127 list_add(&nh->rif_list_node, &rif->nexthop_list);
2130 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
2135 list_del(&nh->rif_list_node);
2139 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
2140 struct mlxsw_sp_nexthop *nh)
2142 struct mlxsw_sp_neigh_entry *neigh_entry;
2143 struct neighbour *n;
2147 if (!nh->nh_grp->gateway || nh->neigh_entry)
2150 /* Take a reference of neigh here ensuring that neigh would
2151 * not be destructed before the nexthop entry is finished.
2152 * The reference is taken either in neigh_lookup() or
2153 * in neigh_create() in case n is not found.
2155 n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
2157 n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
2161 neigh_event_send(n, NULL);
2163 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2165 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2166 if (IS_ERR(neigh_entry)) {
2168 goto err_neigh_entry_create;
2172 /* If that is the first nexthop connected to that neigh, add to
2173 * nexthop_neighs_list
2175 if (list_empty(&neigh_entry->nexthop_list))
2176 list_add_tail(&neigh_entry->nexthop_neighs_list_node,
2177 &mlxsw_sp->router->nexthop_neighs_list);
2179 nh->neigh_entry = neigh_entry;
2180 list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
2181 read_lock_bh(&n->lock);
2182 nud_state = n->nud_state;
2184 read_unlock_bh(&n->lock);
2185 __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
2189 err_neigh_entry_create:
2194 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
2195 struct mlxsw_sp_nexthop *nh)
2197 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
2198 struct neighbour *n;
2202 n = neigh_entry->key.n;
2204 __mlxsw_sp_nexthop_neigh_update(nh, true);
2205 list_del(&nh->neigh_list_node);
2206 nh->neigh_entry = NULL;
2208 /* If that is the last nexthop connected to that neigh, remove from
2209 * nexthop_neighs_list
2211 if (list_empty(&neigh_entry->nexthop_list))
2212 list_del(&neigh_entry->nexthop_neighs_list_node);
2214 if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2215 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2220 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
2221 struct mlxsw_sp_nexthop_group *nh_grp,
2222 struct mlxsw_sp_nexthop *nh,
2223 struct fib_nh *fib_nh)
2225 struct net_device *dev = fib_nh->nh_dev;
2226 struct in_device *in_dev;
2227 struct mlxsw_sp_rif *rif;
2230 nh->nh_grp = nh_grp;
2231 nh->key.fib_nh = fib_nh;
2232 memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
2233 err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
2240 in_dev = __in_dev_get_rtnl(dev);
2241 if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
2242 fib_nh->nh_flags & RTNH_F_LINKDOWN)
2245 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
2248 mlxsw_sp_nexthop_rif_init(nh, rif);
2250 err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
2252 goto err_nexthop_neigh_init;
2256 err_nexthop_neigh_init:
2257 mlxsw_sp_nexthop_rif_fini(nh);
2258 mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
2262 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
2263 struct mlxsw_sp_nexthop *nh)
2265 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
2266 mlxsw_sp_nexthop_rif_fini(nh);
2267 mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
2270 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
2271 unsigned long event, struct fib_nh *fib_nh)
2273 struct mlxsw_sp_nexthop_key key;
2274 struct mlxsw_sp_nexthop *nh;
2275 struct mlxsw_sp_rif *rif;
2277 if (mlxsw_sp->router->aborted)
2280 key.fib_nh = fib_nh;
2281 nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
2282 if (WARN_ON_ONCE(!nh))
2285 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fib_nh->nh_dev);
2290 case FIB_EVENT_NH_ADD:
2291 mlxsw_sp_nexthop_rif_init(nh, rif);
2292 mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
2294 case FIB_EVENT_NH_DEL:
2295 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
2296 mlxsw_sp_nexthop_rif_fini(nh);
2300 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
2303 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2304 struct mlxsw_sp_rif *rif)
2306 struct mlxsw_sp_nexthop *nh, *tmp;
2308 list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
2309 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
2310 mlxsw_sp_nexthop_rif_fini(nh);
2311 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
2315 static struct mlxsw_sp_nexthop_group *
2316 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
2318 struct mlxsw_sp_nexthop_group *nh_grp;
2319 struct mlxsw_sp_nexthop *nh;
2320 struct fib_nh *fib_nh;
2325 alloc_size = sizeof(*nh_grp) +
2326 fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
2327 nh_grp = kzalloc(alloc_size, GFP_KERNEL);
2329 return ERR_PTR(-ENOMEM);
2331 INIT_LIST_HEAD(&nh_grp->fib_list);
2332 nh_grp->neigh_tbl = &arp_tbl;
2334 nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK;
2335 nh_grp->count = fi->fib_nhs;
2337 for (i = 0; i < nh_grp->count; i++) {
2338 nh = &nh_grp->nexthops[i];
2339 fib_nh = &fi->fib_nh[i];
2340 err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
2342 goto err_nexthop4_init;
2344 err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
2346 goto err_nexthop_group_insert;
2347 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
2350 err_nexthop_group_insert:
2352 for (i--; i >= 0; i--) {
2353 nh = &nh_grp->nexthops[i];
2354 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
2358 return ERR_PTR(err);
2362 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
2363 struct mlxsw_sp_nexthop_group *nh_grp)
2365 struct mlxsw_sp_nexthop *nh;
2368 mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
2369 for (i = 0; i < nh_grp->count; i++) {
2370 nh = &nh_grp->nexthops[i];
2371 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
2373 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
2374 WARN_ON_ONCE(nh_grp->adj_index_valid);
2375 fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
2379 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
2380 struct mlxsw_sp_fib_entry *fib_entry,
2381 struct fib_info *fi)
2383 struct mlxsw_sp_nexthop_group *nh_grp;
2385 nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
2387 nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
2389 return PTR_ERR(nh_grp);
2391 list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
2392 fib_entry->nh_group = nh_grp;
2396 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
2397 struct mlxsw_sp_fib_entry *fib_entry)
2399 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
2401 list_del(&fib_entry->nexthop_group_node);
2402 if (!list_empty(&nh_grp->fib_list))
2404 mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
2408 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
2410 struct mlxsw_sp_fib4_entry *fib4_entry;
2412 fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
2414 return !fib4_entry->tos;
2418 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
2420 struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
2422 switch (fib_entry->fib_node->fib->proto) {
2423 case MLXSW_SP_L3_PROTO_IPV4:
2424 if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
2427 case MLXSW_SP_L3_PROTO_IPV6:
2431 switch (fib_entry->type) {
2432 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
2433 return !!nh_group->adj_index_valid;
2434 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
2435 return !!nh_group->nh_rif;
2441 static struct mlxsw_sp_nexthop *
2442 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
2443 const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
2447 for (i = 0; i < nh_grp->count; i++) {
2448 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
2449 struct rt6_info *rt = mlxsw_sp_rt6->rt;
2451 if (nh->rif && nh->rif->dev == rt->dst.dev &&
2452 ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
2462 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
2464 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
2467 if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
2468 nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
2472 for (i = 0; i < nh_grp->count; i++) {
2473 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
2476 nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
2478 nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
2483 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
2485 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
2488 for (i = 0; i < nh_grp->count; i++) {
2489 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
2491 nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
2496 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
2498 struct mlxsw_sp_fib6_entry *fib6_entry;
2499 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2501 fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
2504 if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
2505 list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
2506 list)->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
2510 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2511 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
2512 struct mlxsw_sp_nexthop *nh;
2514 nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
2515 if (nh && nh->offloaded)
2516 mlxsw_sp_rt6->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
2518 mlxsw_sp_rt6->rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
2523 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
2525 struct mlxsw_sp_fib6_entry *fib6_entry;
2526 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2528 fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
2530 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2531 struct rt6_info *rt = mlxsw_sp_rt6->rt;
2533 rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
2537 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
2539 switch (fib_entry->fib_node->fib->proto) {
2540 case MLXSW_SP_L3_PROTO_IPV4:
2541 mlxsw_sp_fib4_entry_offload_set(fib_entry);
2543 case MLXSW_SP_L3_PROTO_IPV6:
2544 mlxsw_sp_fib6_entry_offload_set(fib_entry);
2550 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
2552 switch (fib_entry->fib_node->fib->proto) {
2553 case MLXSW_SP_L3_PROTO_IPV4:
2554 mlxsw_sp_fib4_entry_offload_unset(fib_entry);
2556 case MLXSW_SP_L3_PROTO_IPV6:
2557 mlxsw_sp_fib6_entry_offload_unset(fib_entry);
2563 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
2564 enum mlxsw_reg_ralue_op op, int err)
2567 case MLXSW_REG_RALUE_OP_WRITE_DELETE:
2568 return mlxsw_sp_fib_entry_offload_unset(fib_entry);
2569 case MLXSW_REG_RALUE_OP_WRITE_WRITE:
2572 if (mlxsw_sp_fib_entry_should_offload(fib_entry))
2573 mlxsw_sp_fib_entry_offload_set(fib_entry);
2574 else if (!mlxsw_sp_fib_entry_should_offload(fib_entry))
2575 mlxsw_sp_fib_entry_offload_unset(fib_entry);
2583 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
2584 const struct mlxsw_sp_fib_entry *fib_entry,
2585 enum mlxsw_reg_ralue_op op)
2587 struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
2588 enum mlxsw_reg_ralxx_protocol proto;
2591 proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
2593 switch (fib->proto) {
2594 case MLXSW_SP_L3_PROTO_IPV4:
2595 p_dip = (u32 *) fib_entry->fib_node->key.addr;
2596 mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
2597 fib_entry->fib_node->key.prefix_len,
2600 case MLXSW_SP_L3_PROTO_IPV6:
2601 mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
2602 fib_entry->fib_node->key.prefix_len,
2603 fib_entry->fib_node->key.addr);
2608 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
2609 struct mlxsw_sp_fib_entry *fib_entry,
2610 enum mlxsw_reg_ralue_op op)
2612 char ralue_pl[MLXSW_REG_RALUE_LEN];
2613 enum mlxsw_reg_ralue_trap_action trap_action;
2615 u32 adjacency_index = 0;
2618 /* In case the nexthop group adjacency index is valid, use it
2619 * with provided ECMP size. Otherwise, setup trap and pass
2620 * traffic to kernel.
2622 if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
2623 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
2624 adjacency_index = fib_entry->nh_group->adj_index;
2625 ecmp_size = fib_entry->nh_group->ecmp_size;
2627 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
2628 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
2631 mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
2632 mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
2633 adjacency_index, ecmp_size);
2634 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
2637 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
2638 struct mlxsw_sp_fib_entry *fib_entry,
2639 enum mlxsw_reg_ralue_op op)
2641 struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
2642 enum mlxsw_reg_ralue_trap_action trap_action;
2643 char ralue_pl[MLXSW_REG_RALUE_LEN];
2647 if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
2648 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
2649 rif_index = rif->rif_index;
2651 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
2652 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
2655 mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
2656 mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
2658 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
2661 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
2662 struct mlxsw_sp_fib_entry *fib_entry,
2663 enum mlxsw_reg_ralue_op op)
2665 char ralue_pl[MLXSW_REG_RALUE_LEN];
2667 mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
2668 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
2669 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
2672 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
2673 struct mlxsw_sp_fib_entry *fib_entry,
2674 enum mlxsw_reg_ralue_op op)
2676 switch (fib_entry->type) {
2677 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
2678 return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
2679 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
2680 return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
2681 case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
2682 return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
2687 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
2688 struct mlxsw_sp_fib_entry *fib_entry,
2689 enum mlxsw_reg_ralue_op op)
2691 int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
2693 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
2698 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
2699 struct mlxsw_sp_fib_entry *fib_entry)
2701 return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
2702 MLXSW_REG_RALUE_OP_WRITE_WRITE);
2705 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
2706 struct mlxsw_sp_fib_entry *fib_entry)
2708 return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
2709 MLXSW_REG_RALUE_OP_WRITE_DELETE);
2713 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
2714 const struct fib_entry_notifier_info *fen_info,
2715 struct mlxsw_sp_fib_entry *fib_entry)
2717 struct fib_info *fi = fen_info->fi;
2719 switch (fen_info->type) {
2720 case RTN_BROADCAST: /* fall through */
2722 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2724 case RTN_UNREACHABLE: /* fall through */
2725 case RTN_BLACKHOLE: /* fall through */
2727 /* Packets hitting these routes need to be trapped, but
2728 * can do so with a lower priority than packets directed
2729 * at the host, so use action type local instead of trap.
2731 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
2734 if (fi->fib_nh->nh_scope != RT_SCOPE_LINK)
2735 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
2737 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
2744 static struct mlxsw_sp_fib4_entry *
2745 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
2746 struct mlxsw_sp_fib_node *fib_node,
2747 const struct fib_entry_notifier_info *fen_info)
2749 struct mlxsw_sp_fib4_entry *fib4_entry;
2750 struct mlxsw_sp_fib_entry *fib_entry;
2753 fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
2755 return ERR_PTR(-ENOMEM);
2756 fib_entry = &fib4_entry->common;
2758 err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
2760 goto err_fib4_entry_type_set;
2762 err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
2764 goto err_nexthop4_group_get;
2766 fib4_entry->prio = fen_info->fi->fib_priority;
2767 fib4_entry->tb_id = fen_info->tb_id;
2768 fib4_entry->type = fen_info->type;
2769 fib4_entry->tos = fen_info->tos;
2771 fib_entry->fib_node = fib_node;
2775 err_nexthop4_group_get:
2776 err_fib4_entry_type_set:
2778 return ERR_PTR(err);
2781 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2782 struct mlxsw_sp_fib4_entry *fib4_entry)
2784 mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
2788 static struct mlxsw_sp_fib_node *
2789 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
2790 size_t addr_len, unsigned char prefix_len);
2792 static struct mlxsw_sp_fib4_entry *
2793 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
2794 const struct fib_entry_notifier_info *fen_info)
2796 struct mlxsw_sp_fib4_entry *fib4_entry;
2797 struct mlxsw_sp_fib_node *fib_node;
2798 struct mlxsw_sp_fib *fib;
2799 struct mlxsw_sp_vr *vr;
2801 vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
2804 fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
2806 fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
2807 sizeof(fen_info->dst),
2812 list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
2813 if (fib4_entry->tb_id == fen_info->tb_id &&
2814 fib4_entry->tos == fen_info->tos &&
2815 fib4_entry->type == fen_info->type &&
2816 mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
2825 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
2826 .key_offset = offsetof(struct mlxsw_sp_fib_node, key),
2827 .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
2828 .key_len = sizeof(struct mlxsw_sp_fib_key),
2829 .automatic_shrinking = true,
2832 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
2833 struct mlxsw_sp_fib_node *fib_node)
2835 return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
2836 mlxsw_sp_fib_ht_params);
2839 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
2840 struct mlxsw_sp_fib_node *fib_node)
2842 rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
2843 mlxsw_sp_fib_ht_params);
2846 static struct mlxsw_sp_fib_node *
2847 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
2848 size_t addr_len, unsigned char prefix_len)
2850 struct mlxsw_sp_fib_key key;
2852 memset(&key, 0, sizeof(key));
2853 memcpy(key.addr, addr, addr_len);
2854 key.prefix_len = prefix_len;
2855 return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
2858 static struct mlxsw_sp_fib_node *
2859 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
2860 size_t addr_len, unsigned char prefix_len)
2862 struct mlxsw_sp_fib_node *fib_node;
2864 fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
2868 INIT_LIST_HEAD(&fib_node->entry_list);
2869 list_add(&fib_node->list, &fib->node_list);
2870 memcpy(fib_node->key.addr, addr, addr_len);
2871 fib_node->key.prefix_len = prefix_len;
2876 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
2878 list_del(&fib_node->list);
2879 WARN_ON(!list_empty(&fib_node->entry_list));
2884 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
2885 const struct mlxsw_sp_fib_entry *fib_entry)
2887 return list_first_entry(&fib_node->entry_list,
2888 struct mlxsw_sp_fib_entry, list) == fib_entry;
2891 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
2892 struct mlxsw_sp_fib *fib,
2893 struct mlxsw_sp_fib_node *fib_node)
2895 struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
2896 struct mlxsw_sp_lpm_tree *lpm_tree;
2899 /* Since the tree is shared between all virtual routers we must
2900 * make sure it contains all the required prefix lengths. This
2901 * can be computed by either adding the new prefix length to the
2902 * existing prefix usage of a bound tree, or by aggregating the
2903 * prefix lengths across all virtual routers and adding the new
2907 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage,
2908 &fib->lpm_tree->prefix_usage);
2910 mlxsw_sp_vrs_prefixes(mlxsw_sp, fib->proto, &req_prefix_usage);
2911 mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
2913 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
2915 if (IS_ERR(lpm_tree))
2916 return PTR_ERR(lpm_tree);
2918 if (fib->lpm_tree && fib->lpm_tree->id == lpm_tree->id)
2921 err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
2928 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
2929 struct mlxsw_sp_fib *fib)
2931 struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
2932 struct mlxsw_sp_lpm_tree *lpm_tree;
2934 /* Aggregate prefix lengths across all virtual routers to make
2935 * sure we only have used prefix lengths in the LPM tree.
2937 mlxsw_sp_vrs_prefixes(mlxsw_sp, fib->proto, &req_prefix_usage);
2938 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
2940 if (IS_ERR(lpm_tree))
2942 mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
2945 if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage))
2947 mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
2948 mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
2949 fib->lpm_tree = NULL;
2952 static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node)
2954 unsigned char prefix_len = fib_node->key.prefix_len;
2955 struct mlxsw_sp_fib *fib = fib_node->fib;
2957 if (fib->prefix_ref_count[prefix_len]++ == 0)
2958 mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
2961 static void mlxsw_sp_fib_node_prefix_dec(struct mlxsw_sp_fib_node *fib_node)
2963 unsigned char prefix_len = fib_node->key.prefix_len;
2964 struct mlxsw_sp_fib *fib = fib_node->fib;
2966 if (--fib->prefix_ref_count[prefix_len] == 0)
2967 mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
2970 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
2971 struct mlxsw_sp_fib_node *fib_node,
2972 struct mlxsw_sp_fib *fib)
2976 err = mlxsw_sp_fib_node_insert(fib, fib_node);
2979 fib_node->fib = fib;
2981 err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib, fib_node);
2983 goto err_fib_lpm_tree_link;
2985 mlxsw_sp_fib_node_prefix_inc(fib_node);
2989 err_fib_lpm_tree_link:
2990 fib_node->fib = NULL;
2991 mlxsw_sp_fib_node_remove(fib, fib_node);
2995 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
2996 struct mlxsw_sp_fib_node *fib_node)
2998 struct mlxsw_sp_fib *fib = fib_node->fib;
3000 mlxsw_sp_fib_node_prefix_dec(fib_node);
3001 mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib);
3002 fib_node->fib = NULL;
3003 mlxsw_sp_fib_node_remove(fib, fib_node);
3006 static struct mlxsw_sp_fib_node *
3007 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
3008 size_t addr_len, unsigned char prefix_len,
3009 enum mlxsw_sp_l3proto proto)
3011 struct mlxsw_sp_fib_node *fib_node;
3012 struct mlxsw_sp_fib *fib;
3013 struct mlxsw_sp_vr *vr;
3016 vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id);
3018 return ERR_CAST(vr);
3019 fib = mlxsw_sp_vr_fib(vr, proto);
3021 fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
3025 fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
3028 goto err_fib_node_create;
3031 err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
3033 goto err_fib_node_init;
3038 mlxsw_sp_fib_node_destroy(fib_node);
3039 err_fib_node_create:
3040 mlxsw_sp_vr_put(vr);
3041 return ERR_PTR(err);
3044 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
3045 struct mlxsw_sp_fib_node *fib_node)
3047 struct mlxsw_sp_vr *vr = fib_node->fib->vr;
3049 if (!list_empty(&fib_node->entry_list))
3051 mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
3052 mlxsw_sp_fib_node_destroy(fib_node);
3053 mlxsw_sp_vr_put(vr);
3056 static struct mlxsw_sp_fib4_entry *
3057 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
3058 const struct mlxsw_sp_fib4_entry *new4_entry)
3060 struct mlxsw_sp_fib4_entry *fib4_entry;
3062 list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
3063 if (fib4_entry->tb_id > new4_entry->tb_id)
3065 if (fib4_entry->tb_id != new4_entry->tb_id)
3067 if (fib4_entry->tos > new4_entry->tos)
3069 if (fib4_entry->prio >= new4_entry->prio ||
3070 fib4_entry->tos < new4_entry->tos)
3078 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
3079 struct mlxsw_sp_fib4_entry *new4_entry)
3081 struct mlxsw_sp_fib_node *fib_node;
3083 if (WARN_ON(!fib4_entry))
3086 fib_node = fib4_entry->common.fib_node;
3087 list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
3089 if (fib4_entry->tb_id != new4_entry->tb_id ||
3090 fib4_entry->tos != new4_entry->tos ||
3091 fib4_entry->prio != new4_entry->prio)
3095 list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
3100 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
3101 bool replace, bool append)
3103 struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
3104 struct mlxsw_sp_fib4_entry *fib4_entry;
3106 fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
3109 return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
3110 if (replace && WARN_ON(!fib4_entry))
3113 /* Insert new entry before replaced one, so that we can later
3114 * remove the second.
3117 list_add_tail(&new4_entry->common.list,
3118 &fib4_entry->common.list);
3120 struct mlxsw_sp_fib4_entry *last;
3122 list_for_each_entry(last, &fib_node->entry_list, common.list) {
3123 if (new4_entry->tb_id > last->tb_id)
3129 list_add(&new4_entry->common.list,
3130 &fib4_entry->common.list);
3132 list_add(&new4_entry->common.list,
3133 &fib_node->entry_list);
3140 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
3142 list_del(&fib4_entry->common.list);
3145 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
3146 struct mlxsw_sp_fib_entry *fib_entry)
3148 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
3150 if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
3153 /* To prevent packet loss, overwrite the previously offloaded
3156 if (!list_is_singular(&fib_node->entry_list)) {
3157 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
3158 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
3160 mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
3163 return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3166 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
3167 struct mlxsw_sp_fib_entry *fib_entry)
3169 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
3171 if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
3174 /* Promote the next entry by overwriting the deleted entry */
3175 if (!list_is_singular(&fib_node->entry_list)) {
3176 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
3177 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
3179 mlxsw_sp_fib_entry_update(mlxsw_sp, n);
3180 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
3184 mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
3187 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
3188 struct mlxsw_sp_fib4_entry *fib4_entry,
3189 bool replace, bool append)
3193 err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
3197 err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
3199 goto err_fib_node_entry_add;
3203 err_fib_node_entry_add:
3204 mlxsw_sp_fib4_node_list_remove(fib4_entry);
3209 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
3210 struct mlxsw_sp_fib4_entry *fib4_entry)
3212 mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
3213 mlxsw_sp_fib4_node_list_remove(fib4_entry);
3216 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
3217 struct mlxsw_sp_fib4_entry *fib4_entry,
3220 struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
3221 struct mlxsw_sp_fib4_entry *replaced;
3226 /* We inserted the new entry before replaced one */
3227 replaced = list_next_entry(fib4_entry, common.list);
3229 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
3230 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
3231 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
3235 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
3236 const struct fib_entry_notifier_info *fen_info,
3237 bool replace, bool append)
3239 struct mlxsw_sp_fib4_entry *fib4_entry;
3240 struct mlxsw_sp_fib_node *fib_node;
3243 if (mlxsw_sp->router->aborted)
3246 fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
3247 &fen_info->dst, sizeof(fen_info->dst),
3249 MLXSW_SP_L3_PROTO_IPV4);
3250 if (IS_ERR(fib_node)) {
3251 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
3252 return PTR_ERR(fib_node);
3255 fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
3256 if (IS_ERR(fib4_entry)) {
3257 dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
3258 err = PTR_ERR(fib4_entry);
3259 goto err_fib4_entry_create;
3262 err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
3265 dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
3266 goto err_fib4_node_entry_link;
3269 mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
3273 err_fib4_node_entry_link:
3274 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
3275 err_fib4_entry_create:
3276 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
3280 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
3281 struct fib_entry_notifier_info *fen_info)
3283 struct mlxsw_sp_fib4_entry *fib4_entry;
3284 struct mlxsw_sp_fib_node *fib_node;
3286 if (mlxsw_sp->router->aborted)
3289 fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
3290 if (WARN_ON(!fib4_entry))
3292 fib_node = fib4_entry->common.fib_node;
3294 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
3295 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
3296 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
3299 static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt)
3301 /* Packets with link-local destination IP arriving to the router
3302 * are trapped to the CPU, so no need to program specific routes
3305 if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LINKLOCAL)
3308 /* Multicast routes aren't supported, so ignore them. Neighbour
3309 * Discovery packets are specifically trapped.
3311 if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_MULTICAST)
3314 /* Cloned routes are irrelevant in the forwarding path. */
3315 if (rt->rt6i_flags & RTF_CACHE)
3321 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
3323 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3325 mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
3327 return ERR_PTR(-ENOMEM);
3329 /* In case of route replace, replaced route is deleted with
3330 * no notification. Take reference to prevent accessing freed
3333 mlxsw_sp_rt6->rt = rt;
3336 return mlxsw_sp_rt6;
3339 #if IS_ENABLED(CONFIG_IPV6)
3340 static void mlxsw_sp_rt6_release(struct rt6_info *rt)
3345 static void mlxsw_sp_rt6_release(struct rt6_info *rt)
3350 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
3352 mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
3353 kfree(mlxsw_sp_rt6);
3356 static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt)
3358 /* RTF_CACHE routes are ignored */
3359 return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
3362 static struct rt6_info *
3363 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
3365 return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
3369 static struct mlxsw_sp_fib6_entry *
3370 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
3371 const struct rt6_info *nrt, bool replace)
3373 struct mlxsw_sp_fib6_entry *fib6_entry;
3375 if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
3378 list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
3379 struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
3381 /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
3384 if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
3386 if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
3388 if (rt->rt6i_metric < nrt->rt6i_metric)
3390 if (rt->rt6i_metric == nrt->rt6i_metric &&
3391 mlxsw_sp_fib6_rt_can_mp(rt))
3393 if (rt->rt6i_metric > nrt->rt6i_metric)
3400 static struct mlxsw_sp_rt6 *
3401 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
3402 const struct rt6_info *rt)
3404 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3406 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3407 if (mlxsw_sp_rt6->rt == rt)
3408 return mlxsw_sp_rt6;
3414 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
3415 struct mlxsw_sp_nexthop_group *nh_grp,
3416 struct mlxsw_sp_nexthop *nh,
3417 const struct rt6_info *rt)
3419 struct net_device *dev = rt->dst.dev;
3420 struct mlxsw_sp_rif *rif;
3423 nh->nh_grp = nh_grp;
3424 memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr));
3428 nh->ifindex = dev->ifindex;
3430 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3433 mlxsw_sp_nexthop_rif_init(nh, rif);
3435 err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3437 goto err_nexthop_neigh_init;
3441 err_nexthop_neigh_init:
3442 mlxsw_sp_nexthop_rif_fini(nh);
3446 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
3447 struct mlxsw_sp_nexthop *nh)
3449 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3450 mlxsw_sp_nexthop_rif_fini(nh);
3453 static struct mlxsw_sp_nexthop_group *
3454 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
3455 struct mlxsw_sp_fib6_entry *fib6_entry)
3457 struct mlxsw_sp_nexthop_group *nh_grp;
3458 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3459 struct mlxsw_sp_nexthop *nh;
3464 alloc_size = sizeof(*nh_grp) +
3465 fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
3466 nh_grp = kzalloc(alloc_size, GFP_KERNEL);
3468 return ERR_PTR(-ENOMEM);
3469 INIT_LIST_HEAD(&nh_grp->fib_list);
3470 #if IS_ENABLED(CONFIG_IPV6)
3471 nh_grp->neigh_tbl = &nd_tbl;
3473 mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
3474 struct mlxsw_sp_rt6, list);
3475 nh_grp->gateway = !!(mlxsw_sp_rt6->rt->rt6i_flags & RTF_GATEWAY);
3476 nh_grp->count = fib6_entry->nrt6;
3477 for (i = 0; i < nh_grp->count; i++) {
3478 struct rt6_info *rt = mlxsw_sp_rt6->rt;
3480 nh = &nh_grp->nexthops[i];
3481 err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
3483 goto err_nexthop6_init;
3484 mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
3487 err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3489 goto err_nexthop_group_insert;
3491 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3494 err_nexthop_group_insert:
3496 for (i--; i >= 0; i--) {
3497 nh = &nh_grp->nexthops[i];
3498 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
3501 return ERR_PTR(err);
3505 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
3506 struct mlxsw_sp_nexthop_group *nh_grp)
3508 struct mlxsw_sp_nexthop *nh;
3509 int i = nh_grp->count;
3511 mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3512 for (i--; i >= 0; i--) {
3513 nh = &nh_grp->nexthops[i];
3514 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
3516 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3517 WARN_ON(nh_grp->adj_index_valid);
3521 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
3522 struct mlxsw_sp_fib6_entry *fib6_entry)
3524 struct mlxsw_sp_nexthop_group *nh_grp;
3526 nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
3528 nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
3530 return PTR_ERR(nh_grp);
3533 list_add_tail(&fib6_entry->common.nexthop_group_node,
3535 fib6_entry->common.nh_group = nh_grp;
3540 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
3541 struct mlxsw_sp_fib_entry *fib_entry)
3543 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3545 list_del(&fib_entry->nexthop_group_node);
3546 if (!list_empty(&nh_grp->fib_list))
3548 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
3552 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
3553 struct mlxsw_sp_fib6_entry *fib6_entry)
3555 struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
3558 fib6_entry->common.nh_group = NULL;
3559 list_del(&fib6_entry->common.nexthop_group_node);
3561 err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
3563 goto err_nexthop6_group_get;
3565 /* In case this entry is offloaded, then the adjacency index
3566 * currently associated with it in the device's table is that
3567 * of the old group. Start using the new one instead.
3569 err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
3571 goto err_fib_node_entry_add;
3573 if (list_empty(&old_nh_grp->fib_list))
3574 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
3578 err_fib_node_entry_add:
3579 mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
3580 err_nexthop6_group_get:
3581 list_add_tail(&fib6_entry->common.nexthop_group_node,
3582 &old_nh_grp->fib_list);
3583 fib6_entry->common.nh_group = old_nh_grp;
3588 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
3589 struct mlxsw_sp_fib6_entry *fib6_entry,
3590 struct rt6_info *rt)
3592 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3595 mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
3596 if (IS_ERR(mlxsw_sp_rt6))
3597 return PTR_ERR(mlxsw_sp_rt6);
3599 list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
3602 err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
3604 goto err_nexthop6_group_update;
3608 err_nexthop6_group_update:
3610 list_del(&mlxsw_sp_rt6->list);
3611 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
3616 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
3617 struct mlxsw_sp_fib6_entry *fib6_entry,
3618 struct rt6_info *rt)
3620 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3622 mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
3623 if (WARN_ON(!mlxsw_sp_rt6))
3627 list_del(&mlxsw_sp_rt6->list);
3628 mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
3629 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
3632 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp_fib_entry *fib_entry,
3633 const struct rt6_info *rt)
3635 /* Packets hitting RTF_REJECT routes need to be discarded by the
3636 * stack. We can rely on their destination device not having a
3637 * RIF (it's the loopback device) and can thus use action type
3638 * local, which will cause them to be trapped with a lower
3639 * priority than packets that need to be locally received.
3641 if (rt->rt6i_flags & RTF_LOCAL)
3642 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
3643 else if (rt->rt6i_flags & RTF_REJECT)
3644 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
3645 else if (rt->rt6i_flags & RTF_GATEWAY)
3646 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
3648 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
3652 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
3654 struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
3656 list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
3659 list_del(&mlxsw_sp_rt6->list);
3660 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
3664 static struct mlxsw_sp_fib6_entry *
3665 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
3666 struct mlxsw_sp_fib_node *fib_node,
3667 struct rt6_info *rt)
3669 struct mlxsw_sp_fib6_entry *fib6_entry;
3670 struct mlxsw_sp_fib_entry *fib_entry;
3671 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3674 fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
3676 return ERR_PTR(-ENOMEM);
3677 fib_entry = &fib6_entry->common;
3679 mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
3680 if (IS_ERR(mlxsw_sp_rt6)) {
3681 err = PTR_ERR(mlxsw_sp_rt6);
3682 goto err_rt6_create;
3685 mlxsw_sp_fib6_entry_type_set(fib_entry, mlxsw_sp_rt6->rt);
3687 INIT_LIST_HEAD(&fib6_entry->rt6_list);
3688 list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
3689 fib6_entry->nrt6 = 1;
3690 err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
3692 goto err_nexthop6_group_get;
3694 fib_entry->fib_node = fib_node;
3698 err_nexthop6_group_get:
3699 list_del(&mlxsw_sp_rt6->list);
3700 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
3703 return ERR_PTR(err);
3706 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
3707 struct mlxsw_sp_fib6_entry *fib6_entry)
3709 mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
3710 mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
3711 WARN_ON(fib6_entry->nrt6);
3715 static struct mlxsw_sp_fib6_entry *
3716 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
3717 const struct rt6_info *nrt, bool replace)
3719 struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
3721 list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
3722 struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
3724 if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
3726 if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
3728 if (replace && rt->rt6i_metric == nrt->rt6i_metric) {
3729 if (mlxsw_sp_fib6_rt_can_mp(rt) ==
3730 mlxsw_sp_fib6_rt_can_mp(nrt))
3732 if (mlxsw_sp_fib6_rt_can_mp(nrt))
3733 fallback = fallback ?: fib6_entry;
3735 if (rt->rt6i_metric > nrt->rt6i_metric)
3736 return fallback ?: fib6_entry;
3743 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
3746 struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
3747 struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
3748 struct mlxsw_sp_fib6_entry *fib6_entry;
3750 fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
3752 if (replace && WARN_ON(!fib6_entry))
3756 list_add_tail(&new6_entry->common.list,
3757 &fib6_entry->common.list);
3759 struct mlxsw_sp_fib6_entry *last;
3761 list_for_each_entry(last, &fib_node->entry_list, common.list) {
3762 struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last);
3764 if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id)
3770 list_add(&new6_entry->common.list,
3771 &fib6_entry->common.list);
3773 list_add(&new6_entry->common.list,
3774 &fib_node->entry_list);
3781 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
3783 list_del(&fib6_entry->common.list);
3786 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
3787 struct mlxsw_sp_fib6_entry *fib6_entry,
3792 err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
3796 err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
3798 goto err_fib_node_entry_add;
3802 err_fib_node_entry_add:
3803 mlxsw_sp_fib6_node_list_remove(fib6_entry);
3808 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
3809 struct mlxsw_sp_fib6_entry *fib6_entry)
3811 mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
3812 mlxsw_sp_fib6_node_list_remove(fib6_entry);
3815 static struct mlxsw_sp_fib6_entry *
3816 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
3817 const struct rt6_info *rt)
3819 struct mlxsw_sp_fib6_entry *fib6_entry;
3820 struct mlxsw_sp_fib_node *fib_node;
3821 struct mlxsw_sp_fib *fib;
3822 struct mlxsw_sp_vr *vr;
3824 vr = mlxsw_sp_vr_find(mlxsw_sp, rt->rt6i_table->tb6_id);
3827 fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
3829 fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->rt6i_dst.addr,
3830 sizeof(rt->rt6i_dst.addr),
3835 list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
3836 struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
3838 if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id &&
3839 rt->rt6i_metric == iter_rt->rt6i_metric &&
3840 mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
3847 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
3848 struct mlxsw_sp_fib6_entry *fib6_entry,
3851 struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
3852 struct mlxsw_sp_fib6_entry *replaced;
3857 replaced = list_next_entry(fib6_entry, common.list);
3859 mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
3860 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
3861 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
3864 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
3865 struct rt6_info *rt, bool replace)
3867 struct mlxsw_sp_fib6_entry *fib6_entry;
3868 struct mlxsw_sp_fib_node *fib_node;
3871 if (mlxsw_sp->router->aborted)
3874 if (rt->rt6i_src.plen)
3877 if (mlxsw_sp_fib6_rt_should_ignore(rt))
3880 fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->rt6i_table->tb6_id,
3882 sizeof(rt->rt6i_dst.addr),
3884 MLXSW_SP_L3_PROTO_IPV6);
3885 if (IS_ERR(fib_node))
3886 return PTR_ERR(fib_node);
3888 /* Before creating a new entry, try to append route to an existing
3891 fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
3893 err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
3895 goto err_fib6_entry_nexthop_add;
3899 fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
3900 if (IS_ERR(fib6_entry)) {
3901 err = PTR_ERR(fib6_entry);
3902 goto err_fib6_entry_create;
3905 err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
3907 goto err_fib6_node_entry_link;
3909 mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
3913 err_fib6_node_entry_link:
3914 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
3915 err_fib6_entry_create:
3916 err_fib6_entry_nexthop_add:
3917 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
3921 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
3922 struct rt6_info *rt)
3924 struct mlxsw_sp_fib6_entry *fib6_entry;
3925 struct mlxsw_sp_fib_node *fib_node;
3927 if (mlxsw_sp->router->aborted)
3930 if (mlxsw_sp_fib6_rt_should_ignore(rt))
3933 fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
3934 if (WARN_ON(!fib6_entry))
3937 /* If route is part of a multipath entry, but not the last one
3938 * removed, then only reduce its nexthop group.
3940 if (!list_is_singular(&fib6_entry->rt6_list)) {
3941 mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
3945 fib_node = fib6_entry->common.fib_node;
3947 mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
3948 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
3949 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
3952 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
3953 enum mlxsw_reg_ralxx_protocol proto,
3956 char ralta_pl[MLXSW_REG_RALTA_LEN];
3957 char ralst_pl[MLXSW_REG_RALST_LEN];
3960 mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
3961 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
3965 mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
3966 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
3970 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
3971 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
3972 char raltb_pl[MLXSW_REG_RALTB_LEN];
3973 char ralue_pl[MLXSW_REG_RALUE_LEN];
3975 if (!mlxsw_sp_vr_is_used(vr))
3978 mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
3979 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
3984 mlxsw_reg_ralue_pack(ralue_pl, proto,
3985 MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
3986 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
3987 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
3996 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
3998 enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
4001 err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
4002 MLXSW_SP_LPM_TREE_MIN);
4006 proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
4007 return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
4008 MLXSW_SP_LPM_TREE_MIN + 1);
4011 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
4012 struct mlxsw_sp_fib_node *fib_node)
4014 struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
4016 list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
4018 bool do_break = &tmp->common.list == &fib_node->entry_list;
4020 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4021 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4022 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4023 /* Break when entry list is empty and node was freed.
4024 * Otherwise, we'll access freed memory in the next
4032 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
4033 struct mlxsw_sp_fib_node *fib_node)
4035 struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
4037 list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
4039 bool do_break = &tmp->common.list == &fib_node->entry_list;
4041 mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
4042 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
4043 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4049 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
4050 struct mlxsw_sp_fib_node *fib_node)
4052 switch (fib_node->fib->proto) {
4053 case MLXSW_SP_L3_PROTO_IPV4:
4054 mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
4056 case MLXSW_SP_L3_PROTO_IPV6:
4057 mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
4062 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
4063 struct mlxsw_sp_vr *vr,
4064 enum mlxsw_sp_l3proto proto)
4066 struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
4067 struct mlxsw_sp_fib_node *fib_node, *tmp;
4069 list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
4070 bool do_break = &tmp->list == &fib->node_list;
4072 mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
4078 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
4082 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
4083 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
4085 if (!mlxsw_sp_vr_is_used(vr))
4087 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
4089 /* If virtual router was only used for IPv4, then it's no
4092 if (!mlxsw_sp_vr_is_used(vr))
4094 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
4098 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
4102 if (mlxsw_sp->router->aborted)
4104 dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
4105 mlxsw_sp_router_fib_flush(mlxsw_sp);
4106 mlxsw_sp->router->aborted = true;
4107 err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
4109 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
4112 struct mlxsw_sp_fib_event_work {
4113 struct work_struct work;
4115 struct fib6_entry_notifier_info fen6_info;
4116 struct fib_entry_notifier_info fen_info;
4117 struct fib_rule_notifier_info fr_info;
4118 struct fib_nh_notifier_info fnh_info;
4120 struct mlxsw_sp *mlxsw_sp;
4121 unsigned long event;
4124 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
4126 struct mlxsw_sp_fib_event_work *fib_work =
4127 container_of(work, struct mlxsw_sp_fib_event_work, work);
4128 struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
4129 struct fib_rule *rule;
4130 bool replace, append;
4133 /* Protect internal structures from changes */
4135 switch (fib_work->event) {
4136 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
4137 case FIB_EVENT_ENTRY_APPEND: /* fall through */
4138 case FIB_EVENT_ENTRY_ADD:
4139 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
4140 append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
4141 err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
4144 mlxsw_sp_router_fib_abort(mlxsw_sp);
4145 fib_info_put(fib_work->fen_info.fi);
4147 case FIB_EVENT_ENTRY_DEL:
4148 mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
4149 fib_info_put(fib_work->fen_info.fi);
4151 case FIB_EVENT_RULE_ADD: /* fall through */
4152 case FIB_EVENT_RULE_DEL:
4153 rule = fib_work->fr_info.rule;
4154 if (!fib4_rule_default(rule) && !rule->l3mdev)
4155 mlxsw_sp_router_fib_abort(mlxsw_sp);
4158 case FIB_EVENT_NH_ADD: /* fall through */
4159 case FIB_EVENT_NH_DEL:
4160 mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
4161 fib_work->fnh_info.fib_nh);
4162 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
4169 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
4171 struct mlxsw_sp_fib_event_work *fib_work =
4172 container_of(work, struct mlxsw_sp_fib_event_work, work);
4173 struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
4174 struct fib_rule *rule;
4179 switch (fib_work->event) {
4180 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
4181 case FIB_EVENT_ENTRY_ADD:
4182 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
4183 err = mlxsw_sp_router_fib6_add(mlxsw_sp,
4184 fib_work->fen6_info.rt, replace);
4186 mlxsw_sp_router_fib_abort(mlxsw_sp);
4187 mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
4189 case FIB_EVENT_ENTRY_DEL:
4190 mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
4191 mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
4193 case FIB_EVENT_RULE_ADD: /* fall through */
4194 case FIB_EVENT_RULE_DEL:
4195 rule = fib_work->fr_info.rule;
4196 if (!fib6_rule_default(rule) && !rule->l3mdev)
4197 mlxsw_sp_router_fib_abort(mlxsw_sp);
4205 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
4206 struct fib_notifier_info *info)
4208 switch (fib_work->event) {
4209 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
4210 case FIB_EVENT_ENTRY_APPEND: /* fall through */
4211 case FIB_EVENT_ENTRY_ADD: /* fall through */
4212 case FIB_EVENT_ENTRY_DEL:
4213 memcpy(&fib_work->fen_info, info, sizeof(fib_work->fen_info));
4214 /* Take referece on fib_info to prevent it from being
4215 * freed while work is queued. Release it afterwards.
4217 fib_info_hold(fib_work->fen_info.fi);
4219 case FIB_EVENT_RULE_ADD: /* fall through */
4220 case FIB_EVENT_RULE_DEL:
4221 memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info));
4222 fib_rule_get(fib_work->fr_info.rule);
4224 case FIB_EVENT_NH_ADD: /* fall through */
4225 case FIB_EVENT_NH_DEL:
4226 memcpy(&fib_work->fnh_info, info, sizeof(fib_work->fnh_info));
4227 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
4232 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
4233 struct fib_notifier_info *info)
4235 switch (fib_work->event) {
4236 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
4237 case FIB_EVENT_ENTRY_ADD: /* fall through */
4238 case FIB_EVENT_ENTRY_DEL:
4239 memcpy(&fib_work->fen6_info, info, sizeof(fib_work->fen6_info));
4240 rt6_hold(fib_work->fen6_info.rt);
4242 case FIB_EVENT_RULE_ADD: /* fall through */
4243 case FIB_EVENT_RULE_DEL:
4244 memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info));
4245 fib_rule_get(fib_work->fr_info.rule);
4250 /* Called with rcu_read_lock() */
4251 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
4252 unsigned long event, void *ptr)
4254 struct mlxsw_sp_fib_event_work *fib_work;
4255 struct fib_notifier_info *info = ptr;
4256 struct mlxsw_sp_router *router;
4258 if (!net_eq(info->net, &init_net))
4261 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
4262 if (WARN_ON(!fib_work))
4265 router = container_of(nb, struct mlxsw_sp_router, fib_nb);
4266 fib_work->mlxsw_sp = router->mlxsw_sp;
4267 fib_work->event = event;
4269 switch (info->family) {
4271 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
4272 mlxsw_sp_router_fib4_event(fib_work, info);
4275 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
4276 mlxsw_sp_router_fib6_event(fib_work, info);
4280 mlxsw_core_schedule_work(&fib_work->work);
4285 static struct mlxsw_sp_rif *
4286 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
4287 const struct net_device *dev)
4291 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
4292 if (mlxsw_sp->router->rifs[i] &&
4293 mlxsw_sp->router->rifs[i]->dev == dev)
4294 return mlxsw_sp->router->rifs[i];
4299 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
4301 char ritr_pl[MLXSW_REG_RITR_LEN];
4304 mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
4305 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
4306 if (WARN_ON_ONCE(err))
4309 mlxsw_reg_ritr_enable_set(ritr_pl, false);
4310 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
4313 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
4314 struct mlxsw_sp_rif *rif)
4316 mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
4317 mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
4318 mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
4322 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
4323 unsigned long event)
4325 struct inet6_dev *inet6_dev;
4326 bool addr_list_empty = true;
4327 struct in_device *idev;
4333 idev = __in_dev_get_rtnl(dev);
4334 if (idev && idev->ifa_list)
4335 addr_list_empty = false;
4337 inet6_dev = __in6_dev_get(dev);
4338 if (addr_list_empty && inet6_dev &&
4339 !list_empty(&inet6_dev->addr_list))
4340 addr_list_empty = false;
4342 if (rif && addr_list_empty &&
4343 !netif_is_l3_slave(rif->dev))
4345 /* It is possible we already removed the RIF ourselves
4346 * if it was assigned to a netdev that is now a bridge
4355 static enum mlxsw_sp_rif_type
4356 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
4357 const struct net_device *dev)
4359 enum mlxsw_sp_fid_type type;
4361 /* RIF type is derived from the type of the underlying FID */
4362 if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
4363 type = MLXSW_SP_FID_TYPE_8021Q;
4364 else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
4365 type = MLXSW_SP_FID_TYPE_8021Q;
4366 else if (netif_is_bridge_master(dev))
4367 type = MLXSW_SP_FID_TYPE_8021D;
4369 type = MLXSW_SP_FID_TYPE_RFID;
4371 return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
4374 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
4378 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
4379 if (!mlxsw_sp->router->rifs[i]) {
4388 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
4390 struct net_device *l3_dev)
4392 struct mlxsw_sp_rif *rif;
4394 rif = kzalloc(rif_size, GFP_KERNEL);
4398 INIT_LIST_HEAD(&rif->nexthop_list);
4399 INIT_LIST_HEAD(&rif->neigh_list);
4400 ether_addr_copy(rif->addr, l3_dev->dev_addr);
4401 rif->mtu = l3_dev->mtu;
4404 rif->rif_index = rif_index;
4409 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
4412 return mlxsw_sp->router->rifs[rif_index];
4415 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
4417 return rif->rif_index;
4420 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
4422 return rif->dev->ifindex;
4425 static struct mlxsw_sp_rif *
4426 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
4427 const struct mlxsw_sp_rif_params *params)
4429 u32 tb_id = l3mdev_fib_table(params->dev);
4430 const struct mlxsw_sp_rif_ops *ops;
4431 enum mlxsw_sp_rif_type type;
4432 struct mlxsw_sp_rif *rif;
4433 struct mlxsw_sp_fid *fid;
4434 struct mlxsw_sp_vr *vr;
4438 type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
4439 ops = mlxsw_sp->router->rif_ops_arr[type];
4441 vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN);
4443 return ERR_CAST(vr);
4445 err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
4447 goto err_rif_index_alloc;
4449 rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
4454 rif->mlxsw_sp = mlxsw_sp;
4457 fid = ops->fid_get(rif);
4465 ops->setup(rif, params);
4467 err = ops->configure(rif);
4471 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, params->dev->dev_addr,
4472 mlxsw_sp_fid_index(fid), true);
4474 goto err_rif_fdb_op;
4476 mlxsw_sp_rif_counters_alloc(rif);
4477 mlxsw_sp_fid_rif_set(fid, rif);
4478 mlxsw_sp->router->rifs[rif_index] = rif;
4484 ops->deconfigure(rif);
4486 mlxsw_sp_fid_put(fid);
4490 err_rif_index_alloc:
4491 mlxsw_sp_vr_put(vr);
4492 return ERR_PTR(err);
4495 void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
4497 const struct mlxsw_sp_rif_ops *ops = rif->ops;
4498 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
4499 struct mlxsw_sp_fid *fid = rif->fid;
4500 struct mlxsw_sp_vr *vr;
4502 mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
4503 vr = &mlxsw_sp->router->vrs[rif->vr_id];
4506 mlxsw_sp->router->rifs[rif->rif_index] = NULL;
4507 mlxsw_sp_fid_rif_set(fid, NULL);
4508 mlxsw_sp_rif_counters_free(rif);
4509 mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->dev->dev_addr,
4510 mlxsw_sp_fid_index(fid), false);
4511 ops->deconfigure(rif);
4512 mlxsw_sp_fid_put(fid);
4514 mlxsw_sp_vr_put(vr);
4518 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
4519 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
4521 struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
4523 params->vid = mlxsw_sp_port_vlan->vid;
4524 params->lag = mlxsw_sp_port->lagged;
4526 params->lag_id = mlxsw_sp_port->lag_id;
4528 params->system_port = mlxsw_sp_port->local_port;
4532 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
4533 struct net_device *l3_dev)
4535 struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
4536 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
4537 u16 vid = mlxsw_sp_port_vlan->vid;
4538 struct mlxsw_sp_rif *rif;
4539 struct mlxsw_sp_fid *fid;
4542 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
4544 struct mlxsw_sp_rif_params params = {
4548 mlxsw_sp_rif_subport_params_init(¶ms, mlxsw_sp_port_vlan);
4549 rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms);
4551 return PTR_ERR(rif);
4554 /* FID was already created, just take a reference */
4555 fid = rif->ops->fid_get(rif);
4556 err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
4558 goto err_fid_port_vid_map;
4560 err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
4562 goto err_port_vid_learning_set;
4564 err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
4565 BR_STATE_FORWARDING);
4567 goto err_port_vid_stp_set;
4569 mlxsw_sp_port_vlan->fid = fid;
4573 err_port_vid_stp_set:
4574 mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
4575 err_port_vid_learning_set:
4576 mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
4577 err_fid_port_vid_map:
4578 mlxsw_sp_fid_put(fid);
4583 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
4585 struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
4586 struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
4587 u16 vid = mlxsw_sp_port_vlan->vid;
4589 if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
4592 mlxsw_sp_port_vlan->fid = NULL;
4593 mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
4594 mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
4595 mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
4596 /* If router port holds the last reference on the rFID, then the
4597 * associated Sub-port RIF will be destroyed.
4599 mlxsw_sp_fid_put(fid);
4602 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
4603 struct net_device *port_dev,
4604 unsigned long event, u16 vid)
4606 struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
4607 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
4609 mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
4610 if (WARN_ON(!mlxsw_sp_port_vlan))
4615 return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
4618 mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
4625 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
4626 unsigned long event)
4628 if (netif_is_bridge_port(port_dev) ||
4629 netif_is_lag_port(port_dev) ||
4630 netif_is_ovs_port(port_dev))
4633 return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1);
4636 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
4637 struct net_device *lag_dev,
4638 unsigned long event, u16 vid)
4640 struct net_device *port_dev;
4641 struct list_head *iter;
4644 netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
4645 if (mlxsw_sp_port_dev_check(port_dev)) {
4646 err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
4657 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
4658 unsigned long event)
4660 if (netif_is_bridge_port(lag_dev))
4663 return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1);
4666 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
4667 unsigned long event)
4669 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
4670 struct mlxsw_sp_rif_params params = {
4673 struct mlxsw_sp_rif *rif;
4677 rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms);
4679 return PTR_ERR(rif);
4682 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
4683 mlxsw_sp_rif_destroy(rif);
4690 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
4691 unsigned long event)
4693 struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
4694 u16 vid = vlan_dev_vlan_id(vlan_dev);
4696 if (netif_is_bridge_port(vlan_dev))
4699 if (mlxsw_sp_port_dev_check(real_dev))
4700 return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
4702 else if (netif_is_lag_master(real_dev))
4703 return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
4705 else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
4706 return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event);
4711 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
4712 unsigned long event)
4714 if (mlxsw_sp_port_dev_check(dev))
4715 return mlxsw_sp_inetaddr_port_event(dev, event);
4716 else if (netif_is_lag_master(dev))
4717 return mlxsw_sp_inetaddr_lag_event(dev, event);
4718 else if (netif_is_bridge_master(dev))
4719 return mlxsw_sp_inetaddr_bridge_event(dev, event);
4720 else if (is_vlan_dev(dev))
4721 return mlxsw_sp_inetaddr_vlan_event(dev, event);
4726 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
4727 unsigned long event, void *ptr)
4729 struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
4730 struct net_device *dev = ifa->ifa_dev->dev;
4731 struct mlxsw_sp *mlxsw_sp;
4732 struct mlxsw_sp_rif *rif;
4735 mlxsw_sp = mlxsw_sp_lower_get(dev);
4739 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4740 if (!mlxsw_sp_rif_should_config(rif, dev, event))
4743 err = __mlxsw_sp_inetaddr_event(dev, event);
4745 return notifier_from_errno(err);
4748 struct mlxsw_sp_inet6addr_event_work {
4749 struct work_struct work;
4750 struct net_device *dev;
4751 unsigned long event;
4754 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
4756 struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
4757 container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
4758 struct net_device *dev = inet6addr_work->dev;
4759 unsigned long event = inet6addr_work->event;
4760 struct mlxsw_sp *mlxsw_sp;
4761 struct mlxsw_sp_rif *rif;
4764 mlxsw_sp = mlxsw_sp_lower_get(dev);
4768 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4769 if (!mlxsw_sp_rif_should_config(rif, dev, event))
4772 __mlxsw_sp_inetaddr_event(dev, event);
4776 kfree(inet6addr_work);
4779 /* Called with rcu_read_lock() */
4780 int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
4781 unsigned long event, void *ptr)
4783 struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
4784 struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
4785 struct net_device *dev = if6->idev->dev;
4787 if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
4790 inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
4791 if (!inet6addr_work)
4794 INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
4795 inet6addr_work->dev = dev;
4796 inet6addr_work->event = event;
4798 mlxsw_core_schedule_work(&inet6addr_work->work);
4803 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
4804 const char *mac, int mtu)
4806 char ritr_pl[MLXSW_REG_RITR_LEN];
4809 mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
4810 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
4814 mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
4815 mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
4816 mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
4817 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
4820 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
4822 struct mlxsw_sp *mlxsw_sp;
4823 struct mlxsw_sp_rif *rif;
4827 mlxsw_sp = mlxsw_sp_lower_get(dev);
4831 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4834 fid_index = mlxsw_sp_fid_index(rif->fid);
4836 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
4840 err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
4845 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
4847 goto err_rif_fdb_op;
4849 ether_addr_copy(rif->addr, dev->dev_addr);
4850 rif->mtu = dev->mtu;
4852 netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
4857 mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
4859 mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
4863 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
4864 struct net_device *l3_dev)
4866 struct mlxsw_sp_rif *rif;
4868 /* If netdev is already associated with a RIF, then we need to
4869 * destroy it and create a new one with the new virtual router ID.
4871 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
4873 __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN);
4875 return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP);
4878 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
4879 struct net_device *l3_dev)
4881 struct mlxsw_sp_rif *rif;
4883 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
4886 __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN);
4889 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
4890 struct netdev_notifier_changeupper_info *info)
4892 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
4899 case NETDEV_PRECHANGEUPPER:
4901 case NETDEV_CHANGEUPPER:
4903 err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev);
4905 mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
4912 static struct mlxsw_sp_rif_subport *
4913 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
4915 return container_of(rif, struct mlxsw_sp_rif_subport, common);
4918 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
4919 const struct mlxsw_sp_rif_params *params)
4921 struct mlxsw_sp_rif_subport *rif_subport;
4923 rif_subport = mlxsw_sp_rif_subport_rif(rif);
4924 rif_subport->vid = params->vid;
4925 rif_subport->lag = params->lag;
4927 rif_subport->lag_id = params->lag_id;
4929 rif_subport->system_port = params->system_port;
4932 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
4934 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
4935 struct mlxsw_sp_rif_subport *rif_subport;
4936 char ritr_pl[MLXSW_REG_RITR_LEN];
4938 rif_subport = mlxsw_sp_rif_subport_rif(rif);
4939 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
4940 rif->rif_index, rif->vr_id, rif->dev->mtu,
4941 rif->dev->dev_addr);
4942 mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
4943 rif_subport->lag ? rif_subport->lag_id :
4944 rif_subport->system_port,
4947 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
4950 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
4952 return mlxsw_sp_rif_subport_op(rif, true);
4955 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
4957 mlxsw_sp_rif_subport_op(rif, false);
4960 static struct mlxsw_sp_fid *
4961 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif)
4963 return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
4966 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
4967 .type = MLXSW_SP_RIF_TYPE_SUBPORT,
4968 .rif_size = sizeof(struct mlxsw_sp_rif_subport),
4969 .setup = mlxsw_sp_rif_subport_setup,
4970 .configure = mlxsw_sp_rif_subport_configure,
4971 .deconfigure = mlxsw_sp_rif_subport_deconfigure,
4972 .fid_get = mlxsw_sp_rif_subport_fid_get,
4975 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
4976 enum mlxsw_reg_ritr_if_type type,
4977 u16 vid_fid, bool enable)
4979 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
4980 char ritr_pl[MLXSW_REG_RITR_LEN];
4982 mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
4983 rif->dev->mtu, rif->dev->dev_addr);
4984 mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
4986 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
4989 static u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
4991 return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
4994 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
4996 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
4997 u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
5000 err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
5004 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
5005 mlxsw_sp_router_port(mlxsw_sp), true);
5007 goto err_fid_mc_flood_set;
5009 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
5010 mlxsw_sp_router_port(mlxsw_sp), true);
5012 goto err_fid_bc_flood_set;
5016 err_fid_bc_flood_set:
5017 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
5018 mlxsw_sp_router_port(mlxsw_sp), false);
5019 err_fid_mc_flood_set:
5020 mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
5024 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
5026 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
5027 u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
5029 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
5030 mlxsw_sp_router_port(mlxsw_sp), false);
5031 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
5032 mlxsw_sp_router_port(mlxsw_sp), false);
5033 mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
5036 static struct mlxsw_sp_fid *
5037 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif)
5039 u16 vid = is_vlan_dev(rif->dev) ? vlan_dev_vlan_id(rif->dev) : 1;
5041 return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
5044 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
5045 .type = MLXSW_SP_RIF_TYPE_VLAN,
5046 .rif_size = sizeof(struct mlxsw_sp_rif),
5047 .configure = mlxsw_sp_rif_vlan_configure,
5048 .deconfigure = mlxsw_sp_rif_vlan_deconfigure,
5049 .fid_get = mlxsw_sp_rif_vlan_fid_get,
5052 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
5054 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
5055 u16 fid_index = mlxsw_sp_fid_index(rif->fid);
5058 err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
5063 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
5064 mlxsw_sp_router_port(mlxsw_sp), true);
5066 goto err_fid_mc_flood_set;
5068 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
5069 mlxsw_sp_router_port(mlxsw_sp), true);
5071 goto err_fid_bc_flood_set;
5075 err_fid_bc_flood_set:
5076 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
5077 mlxsw_sp_router_port(mlxsw_sp), false);
5078 err_fid_mc_flood_set:
5079 mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
5083 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
5085 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
5086 u16 fid_index = mlxsw_sp_fid_index(rif->fid);
5088 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
5089 mlxsw_sp_router_port(mlxsw_sp), false);
5090 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
5091 mlxsw_sp_router_port(mlxsw_sp), false);
5092 mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
5095 static struct mlxsw_sp_fid *
5096 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif)
5098 return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
5101 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
5102 .type = MLXSW_SP_RIF_TYPE_FID,
5103 .rif_size = sizeof(struct mlxsw_sp_rif),
5104 .configure = mlxsw_sp_rif_fid_configure,
5105 .deconfigure = mlxsw_sp_rif_fid_deconfigure,
5106 .fid_get = mlxsw_sp_rif_fid_fid_get,
5109 static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = {
5110 [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops,
5111 [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_ops,
5112 [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops,
5115 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
5117 u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
5119 mlxsw_sp->router->rifs = kcalloc(max_rifs,
5120 sizeof(struct mlxsw_sp_rif *),
5122 if (!mlxsw_sp->router->rifs)
5125 mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr;
5130 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
5134 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
5135 WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
5137 kfree(mlxsw_sp->router->rifs);
5140 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
5142 struct mlxsw_sp_router *router;
5144 /* Flush pending FIB notifications and then flush the device's
5145 * table before requesting another dump. The FIB notification
5146 * block is unregistered, so no need to take RTNL.
5148 mlxsw_core_flush_owq();
5149 router = container_of(nb, struct mlxsw_sp_router, fib_nb);
5150 mlxsw_sp_router_fib_flush(router->mlxsw_sp);
5153 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
5155 char rgcr_pl[MLXSW_REG_RGCR_LEN];
5159 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
5161 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
5163 mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
5164 mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
5165 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
5171 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
5173 char rgcr_pl[MLXSW_REG_RGCR_LEN];
5175 mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
5176 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
5179 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
5181 struct mlxsw_sp_router *router;
5184 router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
5187 mlxsw_sp->router = router;
5188 router->mlxsw_sp = mlxsw_sp;
5190 INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
5191 err = __mlxsw_sp_router_init(mlxsw_sp);
5193 goto err_router_init;
5195 err = mlxsw_sp_rifs_init(mlxsw_sp);
5199 err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
5200 &mlxsw_sp_nexthop_ht_params);
5202 goto err_nexthop_ht_init;
5204 err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
5205 &mlxsw_sp_nexthop_group_ht_params);
5207 goto err_nexthop_group_ht_init;
5209 err = mlxsw_sp_lpm_init(mlxsw_sp);
5213 err = mlxsw_sp_vrs_init(mlxsw_sp);
5217 err = mlxsw_sp_neigh_init(mlxsw_sp);
5219 goto err_neigh_init;
5221 mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
5222 err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
5223 mlxsw_sp_router_fib_dump_flush);
5225 goto err_register_fib_notifier;
5229 err_register_fib_notifier:
5230 mlxsw_sp_neigh_fini(mlxsw_sp);
5232 mlxsw_sp_vrs_fini(mlxsw_sp);
5234 mlxsw_sp_lpm_fini(mlxsw_sp);
5236 rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
5237 err_nexthop_group_ht_init:
5238 rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
5239 err_nexthop_ht_init:
5240 mlxsw_sp_rifs_fini(mlxsw_sp);
5242 __mlxsw_sp_router_fini(mlxsw_sp);
5244 kfree(mlxsw_sp->router);
5248 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
5250 unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
5251 mlxsw_sp_neigh_fini(mlxsw_sp);
5252 mlxsw_sp_vrs_fini(mlxsw_sp);
5253 mlxsw_sp_lpm_fini(mlxsw_sp);
5254 rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
5255 rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
5256 mlxsw_sp_rifs_fini(mlxsw_sp);
5257 __mlxsw_sp_router_fini(mlxsw_sp);
5258 kfree(mlxsw_sp->router);