2 * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3 * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
4 * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5 * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6 * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the names of the copyright holders nor the names of its
17 * contributors may be used to endorse or promote products derived from
18 * this software without specific prior written permission.
20 * Alternatively, this software may be distributed under the terms of the
21 * GNU General Public License ("GPL") version 2 as published by the Free
22 * Software Foundation.
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
37 #include <linux/kernel.h>
38 #include <linux/types.h>
39 #include <linux/rhashtable.h>
40 #include <linux/bitops.h>
41 #include <linux/in6.h>
42 #include <linux/notifier.h>
43 #include <linux/inetdevice.h>
44 #include <linux/netdevice.h>
45 #include <linux/if_bridge.h>
46 #include <linux/socket.h>
47 #include <linux/route.h>
48 #include <net/netevent.h>
49 #include <net/neighbour.h>
51 #include <net/ip_fib.h>
52 #include <net/ip6_fib.h>
53 #include <net/fib_rules.h>
54 #include <net/l3mdev.h>
55 #include <net/addrconf.h>
56 #include <net/ndisc.h>
58 #include <net/fib_notifier.h>
63 #include "spectrum_cnt.h"
64 #include "spectrum_dpipe.h"
65 #include "spectrum_router.h"
68 struct mlxsw_sp_lpm_tree;
69 struct mlxsw_sp_rif_ops;
71 struct mlxsw_sp_router {
72 struct mlxsw_sp *mlxsw_sp;
73 struct mlxsw_sp_rif **rifs;
74 struct mlxsw_sp_vr *vrs;
75 struct rhashtable neigh_ht;
76 struct rhashtable nexthop_group_ht;
77 struct rhashtable nexthop_ht;
79 struct mlxsw_sp_lpm_tree *trees;
80 unsigned int tree_count;
83 struct delayed_work dw;
84 unsigned long interval; /* ms */
86 struct delayed_work nexthop_probe_dw;
87 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
88 struct list_head nexthop_neighs_list;
90 struct notifier_block fib_nb;
91 const struct mlxsw_sp_rif_ops **rif_ops_arr;
95 struct list_head nexthop_list;
96 struct list_head neigh_list;
97 struct net_device *dev;
98 struct mlxsw_sp_fid *fid;
99 unsigned char addr[ETH_ALEN];
103 const struct mlxsw_sp_rif_ops *ops;
104 struct mlxsw_sp *mlxsw_sp;
106 unsigned int counter_ingress;
107 bool counter_ingress_valid;
108 unsigned int counter_egress;
109 bool counter_egress_valid;
112 struct mlxsw_sp_rif_params {
113 struct net_device *dev;
122 struct mlxsw_sp_rif_subport {
123 struct mlxsw_sp_rif common;
132 struct mlxsw_sp_rif_ops {
133 enum mlxsw_sp_rif_type type;
136 void (*setup)(struct mlxsw_sp_rif *rif,
137 const struct mlxsw_sp_rif_params *params);
138 int (*configure)(struct mlxsw_sp_rif *rif);
139 void (*deconfigure)(struct mlxsw_sp_rif *rif);
140 struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif);
143 static unsigned int *
144 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
145 enum mlxsw_sp_rif_counter_dir dir)
148 case MLXSW_SP_RIF_COUNTER_EGRESS:
149 return &rif->counter_egress;
150 case MLXSW_SP_RIF_COUNTER_INGRESS:
151 return &rif->counter_ingress;
157 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
158 enum mlxsw_sp_rif_counter_dir dir)
161 case MLXSW_SP_RIF_COUNTER_EGRESS:
162 return rif->counter_egress_valid;
163 case MLXSW_SP_RIF_COUNTER_INGRESS:
164 return rif->counter_ingress_valid;
170 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
171 enum mlxsw_sp_rif_counter_dir dir,
175 case MLXSW_SP_RIF_COUNTER_EGRESS:
176 rif->counter_egress_valid = valid;
178 case MLXSW_SP_RIF_COUNTER_INGRESS:
179 rif->counter_ingress_valid = valid;
184 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
185 unsigned int counter_index, bool enable,
186 enum mlxsw_sp_rif_counter_dir dir)
188 char ritr_pl[MLXSW_REG_RITR_LEN];
189 bool is_egress = false;
192 if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
194 mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
195 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
199 mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
201 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
204 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
205 struct mlxsw_sp_rif *rif,
206 enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
208 char ricnt_pl[MLXSW_REG_RICNT_LEN];
209 unsigned int *p_counter_index;
213 valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
217 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
218 if (!p_counter_index)
220 mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
221 MLXSW_REG_RICNT_OPCODE_NOP);
222 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
225 *cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
229 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
230 unsigned int counter_index)
232 char ricnt_pl[MLXSW_REG_RICNT_LEN];
234 mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
235 MLXSW_REG_RICNT_OPCODE_CLEAR);
236 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
239 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
240 struct mlxsw_sp_rif *rif,
241 enum mlxsw_sp_rif_counter_dir dir)
243 unsigned int *p_counter_index;
246 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
247 if (!p_counter_index)
249 err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
254 err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
256 goto err_counter_clear;
258 err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
259 *p_counter_index, true, dir);
261 goto err_counter_edit;
262 mlxsw_sp_rif_counter_valid_set(rif, dir, true);
267 mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
272 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
273 struct mlxsw_sp_rif *rif,
274 enum mlxsw_sp_rif_counter_dir dir)
276 unsigned int *p_counter_index;
278 if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
281 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
282 if (WARN_ON(!p_counter_index))
284 mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
285 *p_counter_index, false, dir);
286 mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
288 mlxsw_sp_rif_counter_valid_set(rif, dir, false);
291 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
293 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
294 struct devlink *devlink;
296 devlink = priv_to_devlink(mlxsw_sp->core);
297 if (!devlink_dpipe_table_counter_enabled(devlink,
298 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
300 mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
303 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
305 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
307 mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
310 static struct mlxsw_sp_rif *
311 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
312 const struct net_device *dev);
314 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
316 struct mlxsw_sp_prefix_usage {
317 DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
320 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
321 for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
324 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
325 struct mlxsw_sp_prefix_usage *prefix_usage2)
327 return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
331 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
333 struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
335 return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none);
339 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
340 struct mlxsw_sp_prefix_usage *prefix_usage2)
342 memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
346 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
347 unsigned char prefix_len)
349 set_bit(prefix_len, prefix_usage->b);
353 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
354 unsigned char prefix_len)
356 clear_bit(prefix_len, prefix_usage->b);
359 struct mlxsw_sp_fib_key {
360 unsigned char addr[sizeof(struct in6_addr)];
361 unsigned char prefix_len;
364 enum mlxsw_sp_fib_entry_type {
365 MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
366 MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
367 MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
370 struct mlxsw_sp_nexthop_group;
373 struct mlxsw_sp_fib_node {
374 struct list_head entry_list;
375 struct list_head list;
376 struct rhash_head ht_node;
377 struct mlxsw_sp_fib *fib;
378 struct mlxsw_sp_fib_key key;
381 struct mlxsw_sp_fib_entry {
382 struct list_head list;
383 struct mlxsw_sp_fib_node *fib_node;
384 enum mlxsw_sp_fib_entry_type type;
385 struct list_head nexthop_group_node;
386 struct mlxsw_sp_nexthop_group *nh_group;
389 struct mlxsw_sp_fib4_entry {
390 struct mlxsw_sp_fib_entry common;
397 struct mlxsw_sp_fib6_entry {
398 struct mlxsw_sp_fib_entry common;
399 struct list_head rt6_list;
403 struct mlxsw_sp_rt6 {
404 struct list_head list;
408 struct mlxsw_sp_lpm_tree {
410 unsigned int ref_count;
411 enum mlxsw_sp_l3proto proto;
412 struct mlxsw_sp_prefix_usage prefix_usage;
415 struct mlxsw_sp_fib {
416 struct rhashtable ht;
417 struct list_head node_list;
418 struct mlxsw_sp_vr *vr;
419 struct mlxsw_sp_lpm_tree *lpm_tree;
420 unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
421 struct mlxsw_sp_prefix_usage prefix_usage;
422 enum mlxsw_sp_l3proto proto;
426 u16 id; /* virtual router ID */
427 u32 tb_id; /* kernel fib table id */
428 unsigned int rif_count;
429 struct mlxsw_sp_fib *fib4;
430 struct mlxsw_sp_fib *fib6;
433 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
435 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp_vr *vr,
436 enum mlxsw_sp_l3proto proto)
438 struct mlxsw_sp_fib *fib;
441 fib = kzalloc(sizeof(*fib), GFP_KERNEL);
443 return ERR_PTR(-ENOMEM);
444 err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
446 goto err_rhashtable_init;
447 INIT_LIST_HEAD(&fib->node_list);
457 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
459 WARN_ON(!list_empty(&fib->node_list));
460 WARN_ON(fib->lpm_tree);
461 rhashtable_destroy(&fib->ht);
465 static struct mlxsw_sp_lpm_tree *
466 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
468 static struct mlxsw_sp_lpm_tree *lpm_tree;
471 for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
472 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
473 if (lpm_tree->ref_count == 0)
479 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
480 struct mlxsw_sp_lpm_tree *lpm_tree)
482 char ralta_pl[MLXSW_REG_RALTA_LEN];
484 mlxsw_reg_ralta_pack(ralta_pl, true,
485 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
487 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
490 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
491 struct mlxsw_sp_lpm_tree *lpm_tree)
493 char ralta_pl[MLXSW_REG_RALTA_LEN];
495 mlxsw_reg_ralta_pack(ralta_pl, false,
496 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
498 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
502 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
503 struct mlxsw_sp_prefix_usage *prefix_usage,
504 struct mlxsw_sp_lpm_tree *lpm_tree)
506 char ralst_pl[MLXSW_REG_RALST_LEN];
509 u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
511 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
514 mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
515 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
518 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
519 MLXSW_REG_RALST_BIN_NO_CHILD);
520 last_prefix = prefix;
522 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
525 static struct mlxsw_sp_lpm_tree *
526 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
527 struct mlxsw_sp_prefix_usage *prefix_usage,
528 enum mlxsw_sp_l3proto proto)
530 struct mlxsw_sp_lpm_tree *lpm_tree;
533 lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
535 return ERR_PTR(-EBUSY);
536 lpm_tree->proto = proto;
537 err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
541 err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
544 goto err_left_struct_set;
545 memcpy(&lpm_tree->prefix_usage, prefix_usage,
546 sizeof(lpm_tree->prefix_usage));
550 mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
554 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
555 struct mlxsw_sp_lpm_tree *lpm_tree)
557 mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
560 static struct mlxsw_sp_lpm_tree *
561 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
562 struct mlxsw_sp_prefix_usage *prefix_usage,
563 enum mlxsw_sp_l3proto proto)
565 struct mlxsw_sp_lpm_tree *lpm_tree;
568 for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
569 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
570 if (lpm_tree->ref_count != 0 &&
571 lpm_tree->proto == proto &&
572 mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
576 return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
579 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
581 lpm_tree->ref_count++;
584 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
585 struct mlxsw_sp_lpm_tree *lpm_tree)
587 if (--lpm_tree->ref_count == 0)
588 mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
591 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
593 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
595 struct mlxsw_sp_lpm_tree *lpm_tree;
599 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
602 max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
603 mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
604 mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
605 sizeof(struct mlxsw_sp_lpm_tree),
607 if (!mlxsw_sp->router->lpm.trees)
610 for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
611 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
612 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
618 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
620 kfree(mlxsw_sp->router->lpm.trees);
623 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
625 return !!vr->fib4 || !!vr->fib6;
628 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
630 struct mlxsw_sp_vr *vr;
633 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
634 vr = &mlxsw_sp->router->vrs[i];
635 if (!mlxsw_sp_vr_is_used(vr))
641 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
642 const struct mlxsw_sp_fib *fib, u8 tree_id)
644 char raltb_pl[MLXSW_REG_RALTB_LEN];
646 mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
647 (enum mlxsw_reg_ralxx_protocol) fib->proto,
649 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
652 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
653 const struct mlxsw_sp_fib *fib)
655 char raltb_pl[MLXSW_REG_RALTB_LEN];
657 /* Bind to tree 0 which is default */
658 mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
659 (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
660 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
663 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
665 /* For our purpose, squash main and local table into one */
666 if (tb_id == RT_TABLE_LOCAL)
667 tb_id = RT_TABLE_MAIN;
671 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
674 struct mlxsw_sp_vr *vr;
677 tb_id = mlxsw_sp_fix_tb_id(tb_id);
679 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
680 vr = &mlxsw_sp->router->vrs[i];
681 if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
687 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
688 enum mlxsw_sp_l3proto proto)
691 case MLXSW_SP_L3_PROTO_IPV4:
693 case MLXSW_SP_L3_PROTO_IPV6:
699 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
702 struct mlxsw_sp_vr *vr;
705 vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
707 return ERR_PTR(-EBUSY);
708 vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4);
709 if (IS_ERR(vr->fib4))
710 return ERR_CAST(vr->fib4);
711 vr->fib6 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV6);
712 if (IS_ERR(vr->fib6)) {
713 err = PTR_ERR(vr->fib6);
714 goto err_fib6_create;
720 mlxsw_sp_fib_destroy(vr->fib4);
725 static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr)
727 mlxsw_sp_fib_destroy(vr->fib6);
729 mlxsw_sp_fib_destroy(vr->fib4);
733 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id)
735 struct mlxsw_sp_vr *vr;
737 tb_id = mlxsw_sp_fix_tb_id(tb_id);
738 vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
740 vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id);
744 static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr)
746 if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
747 list_empty(&vr->fib6->node_list))
748 mlxsw_sp_vr_destroy(vr);
752 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
753 enum mlxsw_sp_l3proto proto, u8 tree_id)
755 struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
757 if (!mlxsw_sp_vr_is_used(vr))
759 if (fib->lpm_tree && fib->lpm_tree->id == tree_id)
764 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
765 struct mlxsw_sp_fib *fib,
766 struct mlxsw_sp_lpm_tree *new_tree)
768 struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
771 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
774 fib->lpm_tree = new_tree;
775 mlxsw_sp_lpm_tree_hold(new_tree);
776 mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
780 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
781 struct mlxsw_sp_fib *fib,
782 struct mlxsw_sp_lpm_tree *new_tree)
784 struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
785 enum mlxsw_sp_l3proto proto = fib->proto;
786 u8 old_id, new_id = new_tree->id;
787 struct mlxsw_sp_vr *vr;
792 old_id = old_tree->id;
794 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
795 vr = &mlxsw_sp->router->vrs[i];
796 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
798 err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
799 mlxsw_sp_vr_fib(vr, proto),
802 goto err_tree_replace;
808 for (i--; i >= 0; i--) {
809 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
811 mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
812 mlxsw_sp_vr_fib(vr, proto),
818 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
821 fib->lpm_tree = new_tree;
822 mlxsw_sp_lpm_tree_hold(new_tree);
827 mlxsw_sp_vrs_prefixes(struct mlxsw_sp *mlxsw_sp,
828 enum mlxsw_sp_l3proto proto,
829 struct mlxsw_sp_prefix_usage *req_prefix_usage)
833 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
834 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
835 struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
836 unsigned char prefix;
838 if (!mlxsw_sp_vr_is_used(vr))
840 mlxsw_sp_prefix_usage_for_each(prefix, &fib->prefix_usage)
841 mlxsw_sp_prefix_usage_set(req_prefix_usage, prefix);
845 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
847 struct mlxsw_sp_vr *vr;
851 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
854 max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
855 mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
857 if (!mlxsw_sp->router->vrs)
860 for (i = 0; i < max_vrs; i++) {
861 vr = &mlxsw_sp->router->vrs[i];
868 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
870 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
872 /* At this stage we're guaranteed not to have new incoming
873 * FIB notifications and the work queue is free from FIBs
874 * sitting on top of mlxsw netdevs. However, we can still
875 * have other FIBs queued. Flush the queue before flushing
876 * the device's tables. No need for locks, as we're the only
879 mlxsw_core_flush_owq();
880 mlxsw_sp_router_fib_flush(mlxsw_sp);
881 kfree(mlxsw_sp->router->vrs);
884 struct mlxsw_sp_neigh_key {
888 struct mlxsw_sp_neigh_entry {
889 struct list_head rif_list_node;
890 struct rhash_head ht_node;
891 struct mlxsw_sp_neigh_key key;
894 unsigned char ha[ETH_ALEN];
895 struct list_head nexthop_list; /* list of nexthops using
898 struct list_head nexthop_neighs_list_node;
899 unsigned int counter_index;
903 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
904 .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
905 .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
906 .key_len = sizeof(struct mlxsw_sp_neigh_key),
909 struct mlxsw_sp_neigh_entry *
910 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
911 struct mlxsw_sp_neigh_entry *neigh_entry)
914 if (list_empty(&rif->neigh_list))
917 return list_first_entry(&rif->neigh_list,
918 typeof(*neigh_entry),
921 if (neigh_entry->rif_list_node.next == &rif->neigh_list)
923 return list_next_entry(neigh_entry, rif_list_node);
926 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
928 return neigh_entry->key.n->tbl->family;
932 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
934 return neigh_entry->ha;
937 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
941 n = neigh_entry->key.n;
942 return ntohl(*((__be32 *) n->primary_key));
946 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
950 n = neigh_entry->key.n;
951 return (struct in6_addr *) &n->primary_key;
954 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
955 struct mlxsw_sp_neigh_entry *neigh_entry,
958 if (!neigh_entry->counter_valid)
961 return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
965 static struct mlxsw_sp_neigh_entry *
966 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
969 struct mlxsw_sp_neigh_entry *neigh_entry;
971 neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
975 neigh_entry->key.n = n;
976 neigh_entry->rif = rif;
977 INIT_LIST_HEAD(&neigh_entry->nexthop_list);
982 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
988 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
989 struct mlxsw_sp_neigh_entry *neigh_entry)
991 return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
992 &neigh_entry->ht_node,
993 mlxsw_sp_neigh_ht_params);
997 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
998 struct mlxsw_sp_neigh_entry *neigh_entry)
1000 rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
1001 &neigh_entry->ht_node,
1002 mlxsw_sp_neigh_ht_params);
1006 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
1007 struct mlxsw_sp_neigh_entry *neigh_entry)
1009 struct devlink *devlink;
1010 const char *table_name;
1012 switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
1014 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
1017 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
1024 devlink = priv_to_devlink(mlxsw_sp->core);
1025 return devlink_dpipe_table_counter_enabled(devlink, table_name);
1029 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
1030 struct mlxsw_sp_neigh_entry *neigh_entry)
1032 if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
1035 if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
1038 neigh_entry->counter_valid = true;
1042 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
1043 struct mlxsw_sp_neigh_entry *neigh_entry)
1045 if (!neigh_entry->counter_valid)
1047 mlxsw_sp_flow_counter_free(mlxsw_sp,
1048 neigh_entry->counter_index);
1049 neigh_entry->counter_valid = false;
1052 static struct mlxsw_sp_neigh_entry *
1053 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1055 struct mlxsw_sp_neigh_entry *neigh_entry;
1056 struct mlxsw_sp_rif *rif;
1059 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
1061 return ERR_PTR(-EINVAL);
1063 neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
1065 return ERR_PTR(-ENOMEM);
1067 err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
1069 goto err_neigh_entry_insert;
1071 mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
1072 list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
1076 err_neigh_entry_insert:
1077 mlxsw_sp_neigh_entry_free(neigh_entry);
1078 return ERR_PTR(err);
1082 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1083 struct mlxsw_sp_neigh_entry *neigh_entry)
1085 list_del(&neigh_entry->rif_list_node);
1086 mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
1087 mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
1088 mlxsw_sp_neigh_entry_free(neigh_entry);
1091 static struct mlxsw_sp_neigh_entry *
1092 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1094 struct mlxsw_sp_neigh_key key;
1097 return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
1098 &key, mlxsw_sp_neigh_ht_params);
1102 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
1104 unsigned long interval;
1106 #if IS_ENABLED(CONFIG_IPV6)
1107 interval = min_t(unsigned long,
1108 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
1109 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
1111 interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
1113 mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
1116 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
1120 struct net_device *dev;
1121 struct neighbour *n;
1126 mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
1128 if (!mlxsw_sp->router->rifs[rif]) {
1129 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
1134 dev = mlxsw_sp->router->rifs[rif]->dev;
1135 n = neigh_lookup(&arp_tbl, &dipn, dev);
1137 netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
1142 netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
1143 neigh_event_send(n, NULL);
1147 #if IS_ENABLED(CONFIG_IPV6)
1148 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
1152 struct net_device *dev;
1153 struct neighbour *n;
1154 struct in6_addr dip;
1157 mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
1160 if (!mlxsw_sp->router->rifs[rif]) {
1161 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
1165 dev = mlxsw_sp->router->rifs[rif]->dev;
1166 n = neigh_lookup(&nd_tbl, &dip, dev);
1168 netdev_err(dev, "Failed to find matching neighbour for IP=%pI6c\n",
1173 netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
1174 neigh_event_send(n, NULL);
1178 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
1185 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
1192 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
1194 /* Hardware starts counting at 0, so add 1. */
1197 /* Each record consists of several neighbour entries. */
1198 for (i = 0; i < num_entries; i++) {
1201 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
1202 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
1208 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
1212 /* One record contains one entry. */
1213 mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
1217 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
1218 char *rauhtd_pl, int rec_index)
1220 switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
1221 case MLXSW_REG_RAUHTD_TYPE_IPV4:
1222 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
1225 case MLXSW_REG_RAUHTD_TYPE_IPV6:
1226 mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
1232 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
1234 u8 num_rec, last_rec_index, num_entries;
1236 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
1237 last_rec_index = num_rec - 1;
1239 if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
1241 if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
1242 MLXSW_REG_RAUHTD_TYPE_IPV6)
1245 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
1247 if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
1253 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
1255 enum mlxsw_reg_rauhtd_type type)
1260 /* Make sure the neighbour's netdev isn't removed in the
1265 mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
1266 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
1269 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n");
1272 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
1273 for (i = 0; i < num_rec; i++)
1274 mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
1276 } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
1282 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
1284 enum mlxsw_reg_rauhtd_type type;
1288 rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
1292 type = MLXSW_REG_RAUHTD_TYPE_IPV4;
1293 err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
1297 type = MLXSW_REG_RAUHTD_TYPE_IPV6;
1298 err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
1304 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
1306 struct mlxsw_sp_neigh_entry *neigh_entry;
1308 /* Take RTNL mutex here to prevent lists from changes */
1310 list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
1311 nexthop_neighs_list_node)
1312 /* If this neigh have nexthops, make the kernel think this neigh
1313 * is active regardless of the traffic.
1315 neigh_event_send(neigh_entry->key.n, NULL);
1320 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
1322 unsigned long interval = mlxsw_sp->router->neighs_update.interval;
1324 mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
1325 msecs_to_jiffies(interval));
1328 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
1330 struct mlxsw_sp_router *router;
1333 router = container_of(work, struct mlxsw_sp_router,
1334 neighs_update.dw.work);
1335 err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
1337 dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
1339 mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
1341 mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
1344 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
1346 struct mlxsw_sp_neigh_entry *neigh_entry;
1347 struct mlxsw_sp_router *router;
1349 router = container_of(work, struct mlxsw_sp_router,
1350 nexthop_probe_dw.work);
1351 /* Iterate over nexthop neighbours, find those who are unresolved and
1352 * send arp on them. This solves the chicken-egg problem when
1353 * the nexthop wouldn't get offloaded until the neighbor is resolved
1354 * but it wouldn't get resolved ever in case traffic is flowing in HW
1355 * using different nexthop.
1357 * Take RTNL mutex here to prevent lists from changes.
1360 list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
1361 nexthop_neighs_list_node)
1362 if (!neigh_entry->connected)
1363 neigh_event_send(neigh_entry->key.n, NULL);
1366 mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
1367 MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
1371 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
1372 struct mlxsw_sp_neigh_entry *neigh_entry,
1375 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
1377 return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
1378 MLXSW_REG_RAUHT_OP_WRITE_DELETE;
1382 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
1383 struct mlxsw_sp_neigh_entry *neigh_entry,
1384 enum mlxsw_reg_rauht_op op)
1386 struct neighbour *n = neigh_entry->key.n;
1387 u32 dip = ntohl(*((__be32 *) n->primary_key));
1388 char rauht_pl[MLXSW_REG_RAUHT_LEN];
1390 mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
1392 if (neigh_entry->counter_valid)
1393 mlxsw_reg_rauht_pack_counter(rauht_pl,
1394 neigh_entry->counter_index);
1395 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
1399 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
1400 struct mlxsw_sp_neigh_entry *neigh_entry,
1401 enum mlxsw_reg_rauht_op op)
1403 struct neighbour *n = neigh_entry->key.n;
1404 char rauht_pl[MLXSW_REG_RAUHT_LEN];
1405 const char *dip = n->primary_key;
1407 mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
1409 if (neigh_entry->counter_valid)
1410 mlxsw_reg_rauht_pack_counter(rauht_pl,
1411 neigh_entry->counter_index);
1412 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
1415 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
1417 struct neighbour *n = neigh_entry->key.n;
1419 /* Packets with a link-local destination address are trapped
1420 * after LPM lookup and never reach the neighbour table, so
1421 * there is no need to program such neighbours to the device.
1423 if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
1424 IPV6_ADDR_LINKLOCAL)
1430 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
1431 struct mlxsw_sp_neigh_entry *neigh_entry,
1434 if (!adding && !neigh_entry->connected)
1436 neigh_entry->connected = adding;
1437 if (neigh_entry->key.n->tbl->family == AF_INET) {
1438 mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
1439 mlxsw_sp_rauht_op(adding));
1440 } else if (neigh_entry->key.n->tbl->family == AF_INET6) {
1441 if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
1443 mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
1444 mlxsw_sp_rauht_op(adding));
1451 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
1452 struct mlxsw_sp_neigh_entry *neigh_entry,
1456 mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
1458 mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
1459 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
1462 struct mlxsw_sp_neigh_event_work {
1463 struct work_struct work;
1464 struct mlxsw_sp *mlxsw_sp;
1465 struct neighbour *n;
1468 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
1470 struct mlxsw_sp_neigh_event_work *neigh_work =
1471 container_of(work, struct mlxsw_sp_neigh_event_work, work);
1472 struct mlxsw_sp *mlxsw_sp = neigh_work->mlxsw_sp;
1473 struct mlxsw_sp_neigh_entry *neigh_entry;
1474 struct neighbour *n = neigh_work->n;
1475 unsigned char ha[ETH_ALEN];
1476 bool entry_connected;
1479 /* If these parameters are changed after we release the lock,
1480 * then we are guaranteed to receive another event letting us
1483 read_lock_bh(&n->lock);
1484 memcpy(ha, n->ha, ETH_ALEN);
1485 nud_state = n->nud_state;
1487 read_unlock_bh(&n->lock);
1490 entry_connected = nud_state & NUD_VALID && !dead;
1491 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
1492 if (!entry_connected && !neigh_entry)
1495 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
1496 if (IS_ERR(neigh_entry))
1500 memcpy(neigh_entry->ha, ha, ETH_ALEN);
1501 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
1502 mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
1504 if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
1505 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1513 int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
1514 unsigned long event, void *ptr)
1516 struct mlxsw_sp_neigh_event_work *neigh_work;
1517 struct mlxsw_sp_port *mlxsw_sp_port;
1518 struct mlxsw_sp *mlxsw_sp;
1519 unsigned long interval;
1520 struct neigh_parms *p;
1521 struct neighbour *n;
1524 case NETEVENT_DELAY_PROBE_TIME_UPDATE:
1527 /* We don't care about changes in the default table. */
1528 if (!p->dev || (p->tbl->family != AF_INET &&
1529 p->tbl->family != AF_INET6))
1532 /* We are in atomic context and can't take RTNL mutex,
1533 * so use RCU variant to walk the device chain.
1535 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
1539 mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
1540 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
1541 mlxsw_sp->router->neighs_update.interval = interval;
1543 mlxsw_sp_port_dev_put(mlxsw_sp_port);
1545 case NETEVENT_NEIGH_UPDATE:
1548 if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
1551 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
1555 neigh_work = kzalloc(sizeof(*neigh_work), GFP_ATOMIC);
1557 mlxsw_sp_port_dev_put(mlxsw_sp_port);
1561 INIT_WORK(&neigh_work->work, mlxsw_sp_router_neigh_event_work);
1562 neigh_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
1565 /* Take a reference to ensure the neighbour won't be
1566 * destructed until we drop the reference in delayed
1570 mlxsw_core_schedule_work(&neigh_work->work);
1571 mlxsw_sp_port_dev_put(mlxsw_sp_port);
1578 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
1582 err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
1583 &mlxsw_sp_neigh_ht_params);
1587 /* Initialize the polling interval according to the default
1590 mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
1592 /* Create the delayed works for the activity_update */
1593 INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
1594 mlxsw_sp_router_neighs_update_work);
1595 INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
1596 mlxsw_sp_router_probe_unresolved_nexthops);
1597 mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
1598 mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
1602 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
1604 cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
1605 cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
1606 rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
1609 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
1610 struct mlxsw_sp_rif *rif)
1612 struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
1614 list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
1616 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
1617 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1621 struct mlxsw_sp_nexthop_key {
1622 struct fib_nh *fib_nh;
1625 struct mlxsw_sp_nexthop {
1626 struct list_head neigh_list_node; /* member of neigh entry list */
1627 struct list_head rif_list_node;
1628 struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
1631 struct rhash_head ht_node;
1632 struct mlxsw_sp_nexthop_key key;
1633 unsigned char gw_addr[sizeof(struct in6_addr)];
1635 struct mlxsw_sp_rif *rif;
1636 u8 should_offload:1, /* set indicates this neigh is connected and
1637 * should be put to KVD linear area of this group.
1639 offloaded:1, /* set in case the neigh is actually put into
1640 * KVD linear area of this group.
1642 update:1; /* set indicates that MAC of this neigh should be
1645 struct mlxsw_sp_neigh_entry *neigh_entry;
1648 struct mlxsw_sp_nexthop_group {
1650 struct rhash_head ht_node;
1651 struct list_head fib_list; /* list of fib entries that use this group */
1652 struct neigh_table *neigh_tbl;
1653 u8 adj_index_valid:1,
1654 gateway:1; /* routes using the group use a gateway */
1658 struct mlxsw_sp_nexthop nexthops[0];
1659 #define nh_rif nexthops[0].rif
1662 static struct fib_info *
1663 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
1665 return nh_grp->priv;
1668 struct mlxsw_sp_nexthop_group_cmp_arg {
1669 enum mlxsw_sp_l3proto proto;
1671 struct fib_info *fi;
1672 struct mlxsw_sp_fib6_entry *fib6_entry;
1677 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
1678 const struct in6_addr *gw, int ifindex)
1682 for (i = 0; i < nh_grp->count; i++) {
1683 const struct mlxsw_sp_nexthop *nh;
1685 nh = &nh_grp->nexthops[i];
1686 if (nh->ifindex == ifindex &&
1687 ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
1695 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
1696 const struct mlxsw_sp_fib6_entry *fib6_entry)
1698 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
1700 if (nh_grp->count != fib6_entry->nrt6)
1703 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
1704 struct in6_addr *gw;
1707 ifindex = mlxsw_sp_rt6->rt->dst.dev->ifindex;
1708 gw = &mlxsw_sp_rt6->rt->rt6i_gateway;
1709 if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex))
1717 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
1719 const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
1720 const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
1722 switch (cmp_arg->proto) {
1723 case MLXSW_SP_L3_PROTO_IPV4:
1724 return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
1725 case MLXSW_SP_L3_PROTO_IPV6:
1726 return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
1727 cmp_arg->fib6_entry);
1735 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
1737 return nh_grp->neigh_tbl->family;
1740 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
1742 const struct mlxsw_sp_nexthop_group *nh_grp = data;
1743 const struct mlxsw_sp_nexthop *nh;
1744 struct fib_info *fi;
1748 switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
1750 fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
1751 return jhash(&fi, sizeof(fi), seed);
1753 val = nh_grp->count;
1754 for (i = 0; i < nh_grp->count; i++) {
1755 nh = &nh_grp->nexthops[i];
1758 return jhash(&val, sizeof(val), seed);
1766 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
1768 unsigned int val = fib6_entry->nrt6;
1769 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
1770 struct net_device *dev;
1772 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
1773 dev = mlxsw_sp_rt6->rt->dst.dev;
1774 val ^= dev->ifindex;
1777 return jhash(&val, sizeof(val), seed);
1781 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
1783 const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
1785 switch (cmp_arg->proto) {
1786 case MLXSW_SP_L3_PROTO_IPV4:
1787 return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
1788 case MLXSW_SP_L3_PROTO_IPV6:
1789 return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
1796 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
1797 .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
1798 .hashfn = mlxsw_sp_nexthop_group_hash,
1799 .obj_hashfn = mlxsw_sp_nexthop_group_hash_obj,
1800 .obj_cmpfn = mlxsw_sp_nexthop_group_cmp,
1803 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
1804 struct mlxsw_sp_nexthop_group *nh_grp)
1806 if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
1810 return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
1812 mlxsw_sp_nexthop_group_ht_params);
1815 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
1816 struct mlxsw_sp_nexthop_group *nh_grp)
1818 if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
1822 rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
1824 mlxsw_sp_nexthop_group_ht_params);
1827 static struct mlxsw_sp_nexthop_group *
1828 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
1829 struct fib_info *fi)
1831 struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
1833 cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
1835 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
1837 mlxsw_sp_nexthop_group_ht_params);
1840 static struct mlxsw_sp_nexthop_group *
1841 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
1842 struct mlxsw_sp_fib6_entry *fib6_entry)
1844 struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
1846 cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
1847 cmp_arg.fib6_entry = fib6_entry;
1848 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
1850 mlxsw_sp_nexthop_group_ht_params);
1853 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
1854 .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
1855 .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
1856 .key_len = sizeof(struct mlxsw_sp_nexthop_key),
1859 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
1860 struct mlxsw_sp_nexthop *nh)
1862 return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
1863 &nh->ht_node, mlxsw_sp_nexthop_ht_params);
1866 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
1867 struct mlxsw_sp_nexthop *nh)
1869 rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
1870 mlxsw_sp_nexthop_ht_params);
1873 static struct mlxsw_sp_nexthop *
1874 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
1875 struct mlxsw_sp_nexthop_key key)
1877 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
1878 mlxsw_sp_nexthop_ht_params);
1881 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
1882 const struct mlxsw_sp_fib *fib,
1883 u32 adj_index, u16 ecmp_size,
1887 char raleu_pl[MLXSW_REG_RALEU_LEN];
1889 mlxsw_reg_raleu_pack(raleu_pl,
1890 (enum mlxsw_reg_ralxx_protocol) fib->proto,
1891 fib->vr->id, adj_index, ecmp_size, new_adj_index,
1893 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
1896 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
1897 struct mlxsw_sp_nexthop_group *nh_grp,
1898 u32 old_adj_index, u16 old_ecmp_size)
1900 struct mlxsw_sp_fib_entry *fib_entry;
1901 struct mlxsw_sp_fib *fib = NULL;
1904 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1905 if (fib == fib_entry->fib_node->fib)
1907 fib = fib_entry->fib_node->fib;
1908 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
1919 static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
1920 struct mlxsw_sp_nexthop *nh)
1922 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1923 char ratr_pl[MLXSW_REG_RATR_LEN];
1925 mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
1926 true, MLXSW_REG_RATR_TYPE_ETHERNET,
1927 adj_index, neigh_entry->rif);
1928 mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
1929 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
1933 mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
1934 struct mlxsw_sp_nexthop_group *nh_grp,
1937 u32 adj_index = nh_grp->adj_index; /* base */
1938 struct mlxsw_sp_nexthop *nh;
1942 for (i = 0; i < nh_grp->count; i++) {
1943 nh = &nh_grp->nexthops[i];
1945 if (!nh->should_offload) {
1950 if (nh->update || reallocate) {
1951 err = mlxsw_sp_nexthop_mac_update(mlxsw_sp,
1963 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1964 struct mlxsw_sp_fib_entry *fib_entry);
1967 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
1968 const struct mlxsw_sp_fib_entry *fib_entry);
1971 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
1972 struct mlxsw_sp_nexthop_group *nh_grp)
1974 struct mlxsw_sp_fib_entry *fib_entry;
1977 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1978 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
1981 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1989 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
1990 enum mlxsw_reg_ralue_op op, int err);
1993 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
1995 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
1996 struct mlxsw_sp_fib_entry *fib_entry;
1998 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1999 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
2002 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
2007 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
2008 struct mlxsw_sp_nexthop_group *nh_grp)
2010 struct mlxsw_sp_nexthop *nh;
2011 bool offload_change = false;
2014 bool old_adj_index_valid;
2020 if (!nh_grp->gateway) {
2021 mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
2025 for (i = 0; i < nh_grp->count; i++) {
2026 nh = &nh_grp->nexthops[i];
2028 if (nh->should_offload != nh->offloaded) {
2029 offload_change = true;
2030 if (nh->should_offload)
2033 if (nh->should_offload)
2036 if (!offload_change) {
2037 /* Nothing was added or removed, so no need to reallocate. Just
2038 * update MAC on existing adjacency indexes.
2040 err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp,
2043 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
2049 /* No neigh of this group is connected so we just set
2050 * the trap and let everthing flow through kernel.
2054 err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index);
2056 /* We ran out of KVD linear space, just set the
2057 * trap and let everything flow through kernel.
2059 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
2062 old_adj_index_valid = nh_grp->adj_index_valid;
2063 old_adj_index = nh_grp->adj_index;
2064 old_ecmp_size = nh_grp->ecmp_size;
2065 nh_grp->adj_index_valid = 1;
2066 nh_grp->adj_index = adj_index;
2067 nh_grp->ecmp_size = ecmp_size;
2068 err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, true);
2070 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
2074 if (!old_adj_index_valid) {
2075 /* The trap was set for fib entries, so we have to call
2076 * fib entry update to unset it and use adjacency index.
2078 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
2080 dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
2086 err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
2087 old_adj_index, old_ecmp_size);
2088 mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
2090 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
2094 /* Offload state within the group changed, so update the flags. */
2095 mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
2100 old_adj_index_valid = nh_grp->adj_index_valid;
2101 nh_grp->adj_index_valid = 0;
2102 for (i = 0; i < nh_grp->count; i++) {
2103 nh = &nh_grp->nexthops[i];
2106 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
2108 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
2109 if (old_adj_index_valid)
2110 mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
2113 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
2117 nh->should_offload = 1;
2118 else if (nh->offloaded)
2119 nh->should_offload = 0;
2124 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2125 struct mlxsw_sp_neigh_entry *neigh_entry,
2128 struct mlxsw_sp_nexthop *nh;
2130 list_for_each_entry(nh, &neigh_entry->nexthop_list,
2132 __mlxsw_sp_nexthop_neigh_update(nh, removing);
2133 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
2137 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
2138 struct mlxsw_sp_rif *rif)
2144 list_add(&nh->rif_list_node, &rif->nexthop_list);
2147 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
2152 list_del(&nh->rif_list_node);
2156 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
2157 struct mlxsw_sp_nexthop *nh)
2159 struct mlxsw_sp_neigh_entry *neigh_entry;
2160 struct neighbour *n;
2164 if (!nh->nh_grp->gateway || nh->neigh_entry)
2167 /* Take a reference of neigh here ensuring that neigh would
2168 * not be destructed before the nexthop entry is finished.
2169 * The reference is taken either in neigh_lookup() or
2170 * in neigh_create() in case n is not found.
2172 n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
2174 n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
2178 neigh_event_send(n, NULL);
2180 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2182 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2183 if (IS_ERR(neigh_entry)) {
2185 goto err_neigh_entry_create;
2189 /* If that is the first nexthop connected to that neigh, add to
2190 * nexthop_neighs_list
2192 if (list_empty(&neigh_entry->nexthop_list))
2193 list_add_tail(&neigh_entry->nexthop_neighs_list_node,
2194 &mlxsw_sp->router->nexthop_neighs_list);
2196 nh->neigh_entry = neigh_entry;
2197 list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
2198 read_lock_bh(&n->lock);
2199 nud_state = n->nud_state;
2201 read_unlock_bh(&n->lock);
2202 __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
2206 err_neigh_entry_create:
2211 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
2212 struct mlxsw_sp_nexthop *nh)
2214 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
2215 struct neighbour *n;
2219 n = neigh_entry->key.n;
2221 __mlxsw_sp_nexthop_neigh_update(nh, true);
2222 list_del(&nh->neigh_list_node);
2223 nh->neigh_entry = NULL;
2225 /* If that is the last nexthop connected to that neigh, remove from
2226 * nexthop_neighs_list
2228 if (list_empty(&neigh_entry->nexthop_list))
2229 list_del(&neigh_entry->nexthop_neighs_list_node);
2231 if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2232 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2237 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
2238 struct mlxsw_sp_nexthop_group *nh_grp,
2239 struct mlxsw_sp_nexthop *nh,
2240 struct fib_nh *fib_nh)
2242 struct net_device *dev = fib_nh->nh_dev;
2243 struct in_device *in_dev;
2244 struct mlxsw_sp_rif *rif;
2247 nh->nh_grp = nh_grp;
2248 nh->key.fib_nh = fib_nh;
2249 memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
2250 err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
2257 in_dev = __in_dev_get_rtnl(dev);
2258 if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
2259 fib_nh->nh_flags & RTNH_F_LINKDOWN)
2262 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
2265 mlxsw_sp_nexthop_rif_init(nh, rif);
2267 err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
2269 goto err_nexthop_neigh_init;
2273 err_nexthop_neigh_init:
2274 mlxsw_sp_nexthop_rif_fini(nh);
2275 mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
2279 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
2280 struct mlxsw_sp_nexthop *nh)
2282 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
2283 mlxsw_sp_nexthop_rif_fini(nh);
2284 mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
2287 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
2288 unsigned long event, struct fib_nh *fib_nh)
2290 struct mlxsw_sp_nexthop_key key;
2291 struct mlxsw_sp_nexthop *nh;
2292 struct mlxsw_sp_rif *rif;
2294 if (mlxsw_sp->router->aborted)
2297 key.fib_nh = fib_nh;
2298 nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
2299 if (WARN_ON_ONCE(!nh))
2302 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fib_nh->nh_dev);
2307 case FIB_EVENT_NH_ADD:
2308 mlxsw_sp_nexthop_rif_init(nh, rif);
2309 mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
2311 case FIB_EVENT_NH_DEL:
2312 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
2313 mlxsw_sp_nexthop_rif_fini(nh);
2317 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
2320 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2321 struct mlxsw_sp_rif *rif)
2323 struct mlxsw_sp_nexthop *nh, *tmp;
2325 list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
2326 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
2327 mlxsw_sp_nexthop_rif_fini(nh);
2328 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
2332 static struct mlxsw_sp_nexthop_group *
2333 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
2335 struct mlxsw_sp_nexthop_group *nh_grp;
2336 struct mlxsw_sp_nexthop *nh;
2337 struct fib_nh *fib_nh;
2342 alloc_size = sizeof(*nh_grp) +
2343 fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
2344 nh_grp = kzalloc(alloc_size, GFP_KERNEL);
2346 return ERR_PTR(-ENOMEM);
2348 INIT_LIST_HEAD(&nh_grp->fib_list);
2349 nh_grp->neigh_tbl = &arp_tbl;
2351 nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK;
2352 nh_grp->count = fi->fib_nhs;
2354 for (i = 0; i < nh_grp->count; i++) {
2355 nh = &nh_grp->nexthops[i];
2356 fib_nh = &fi->fib_nh[i];
2357 err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
2359 goto err_nexthop4_init;
2361 err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
2363 goto err_nexthop_group_insert;
2364 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
2367 err_nexthop_group_insert:
2369 for (i--; i >= 0; i--) {
2370 nh = &nh_grp->nexthops[i];
2371 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
2375 return ERR_PTR(err);
2379 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
2380 struct mlxsw_sp_nexthop_group *nh_grp)
2382 struct mlxsw_sp_nexthop *nh;
2385 mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
2386 for (i = 0; i < nh_grp->count; i++) {
2387 nh = &nh_grp->nexthops[i];
2388 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
2390 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
2391 WARN_ON_ONCE(nh_grp->adj_index_valid);
2392 fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
2396 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
2397 struct mlxsw_sp_fib_entry *fib_entry,
2398 struct fib_info *fi)
2400 struct mlxsw_sp_nexthop_group *nh_grp;
2402 nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
2404 nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
2406 return PTR_ERR(nh_grp);
2408 list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
2409 fib_entry->nh_group = nh_grp;
2413 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
2414 struct mlxsw_sp_fib_entry *fib_entry)
2416 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
2418 list_del(&fib_entry->nexthop_group_node);
2419 if (!list_empty(&nh_grp->fib_list))
2421 mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
2425 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
2427 struct mlxsw_sp_fib4_entry *fib4_entry;
2429 fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
2431 return !fib4_entry->tos;
2435 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
2437 struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
2439 switch (fib_entry->fib_node->fib->proto) {
2440 case MLXSW_SP_L3_PROTO_IPV4:
2441 if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
2444 case MLXSW_SP_L3_PROTO_IPV6:
2448 switch (fib_entry->type) {
2449 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
2450 return !!nh_group->adj_index_valid;
2451 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
2452 return !!nh_group->nh_rif;
2458 static struct mlxsw_sp_nexthop *
2459 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
2460 const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
2464 for (i = 0; i < nh_grp->count; i++) {
2465 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
2466 struct rt6_info *rt = mlxsw_sp_rt6->rt;
2468 if (nh->rif && nh->rif->dev == rt->dst.dev &&
2469 ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
2479 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
2481 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
2484 if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
2485 nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
2489 for (i = 0; i < nh_grp->count; i++) {
2490 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
2493 nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
2495 nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
2500 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
2502 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
2505 for (i = 0; i < nh_grp->count; i++) {
2506 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
2508 nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
2513 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
2515 struct mlxsw_sp_fib6_entry *fib6_entry;
2516 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2518 fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
2521 if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
2522 list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
2523 list)->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
2527 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2528 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
2529 struct mlxsw_sp_nexthop *nh;
2531 nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
2532 if (nh && nh->offloaded)
2533 mlxsw_sp_rt6->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
2535 mlxsw_sp_rt6->rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
2540 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
2542 struct mlxsw_sp_fib6_entry *fib6_entry;
2543 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2545 fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
2547 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2548 struct rt6_info *rt = mlxsw_sp_rt6->rt;
2550 rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
2554 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
2556 switch (fib_entry->fib_node->fib->proto) {
2557 case MLXSW_SP_L3_PROTO_IPV4:
2558 mlxsw_sp_fib4_entry_offload_set(fib_entry);
2560 case MLXSW_SP_L3_PROTO_IPV6:
2561 mlxsw_sp_fib6_entry_offload_set(fib_entry);
2567 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
2569 switch (fib_entry->fib_node->fib->proto) {
2570 case MLXSW_SP_L3_PROTO_IPV4:
2571 mlxsw_sp_fib4_entry_offload_unset(fib_entry);
2573 case MLXSW_SP_L3_PROTO_IPV6:
2574 mlxsw_sp_fib6_entry_offload_unset(fib_entry);
2580 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
2581 enum mlxsw_reg_ralue_op op, int err)
2584 case MLXSW_REG_RALUE_OP_WRITE_DELETE:
2585 return mlxsw_sp_fib_entry_offload_unset(fib_entry);
2586 case MLXSW_REG_RALUE_OP_WRITE_WRITE:
2589 if (mlxsw_sp_fib_entry_should_offload(fib_entry))
2590 mlxsw_sp_fib_entry_offload_set(fib_entry);
2591 else if (!mlxsw_sp_fib_entry_should_offload(fib_entry))
2592 mlxsw_sp_fib_entry_offload_unset(fib_entry);
2600 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
2601 const struct mlxsw_sp_fib_entry *fib_entry,
2602 enum mlxsw_reg_ralue_op op)
2604 struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
2605 enum mlxsw_reg_ralxx_protocol proto;
2608 proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
2610 switch (fib->proto) {
2611 case MLXSW_SP_L3_PROTO_IPV4:
2612 p_dip = (u32 *) fib_entry->fib_node->key.addr;
2613 mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
2614 fib_entry->fib_node->key.prefix_len,
2617 case MLXSW_SP_L3_PROTO_IPV6:
2618 mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
2619 fib_entry->fib_node->key.prefix_len,
2620 fib_entry->fib_node->key.addr);
2625 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
2626 struct mlxsw_sp_fib_entry *fib_entry,
2627 enum mlxsw_reg_ralue_op op)
2629 char ralue_pl[MLXSW_REG_RALUE_LEN];
2630 enum mlxsw_reg_ralue_trap_action trap_action;
2632 u32 adjacency_index = 0;
2635 /* In case the nexthop group adjacency index is valid, use it
2636 * with provided ECMP size. Otherwise, setup trap and pass
2637 * traffic to kernel.
2639 if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
2640 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
2641 adjacency_index = fib_entry->nh_group->adj_index;
2642 ecmp_size = fib_entry->nh_group->ecmp_size;
2644 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
2645 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
2648 mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
2649 mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
2650 adjacency_index, ecmp_size);
2651 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
2654 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
2655 struct mlxsw_sp_fib_entry *fib_entry,
2656 enum mlxsw_reg_ralue_op op)
2658 struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
2659 enum mlxsw_reg_ralue_trap_action trap_action;
2660 char ralue_pl[MLXSW_REG_RALUE_LEN];
2664 if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
2665 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
2666 rif_index = rif->rif_index;
2668 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
2669 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
2672 mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
2673 mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
2675 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
2678 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
2679 struct mlxsw_sp_fib_entry *fib_entry,
2680 enum mlxsw_reg_ralue_op op)
2682 char ralue_pl[MLXSW_REG_RALUE_LEN];
2684 mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
2685 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
2686 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
2689 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
2690 struct mlxsw_sp_fib_entry *fib_entry,
2691 enum mlxsw_reg_ralue_op op)
2693 switch (fib_entry->type) {
2694 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
2695 return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
2696 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
2697 return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
2698 case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
2699 return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
2704 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
2705 struct mlxsw_sp_fib_entry *fib_entry,
2706 enum mlxsw_reg_ralue_op op)
2708 int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
2710 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
2715 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
2716 struct mlxsw_sp_fib_entry *fib_entry)
2718 return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
2719 MLXSW_REG_RALUE_OP_WRITE_WRITE);
2722 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
2723 struct mlxsw_sp_fib_entry *fib_entry)
2725 return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
2726 MLXSW_REG_RALUE_OP_WRITE_DELETE);
2730 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
2731 const struct fib_entry_notifier_info *fen_info,
2732 struct mlxsw_sp_fib_entry *fib_entry)
2734 struct fib_info *fi = fen_info->fi;
2736 switch (fen_info->type) {
2737 case RTN_BROADCAST: /* fall through */
2739 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2741 case RTN_UNREACHABLE: /* fall through */
2742 case RTN_BLACKHOLE: /* fall through */
2744 /* Packets hitting these routes need to be trapped, but
2745 * can do so with a lower priority than packets directed
2746 * at the host, so use action type local instead of trap.
2748 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
2751 if (fi->fib_nh->nh_scope != RT_SCOPE_LINK)
2752 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
2754 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
2761 static struct mlxsw_sp_fib4_entry *
2762 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
2763 struct mlxsw_sp_fib_node *fib_node,
2764 const struct fib_entry_notifier_info *fen_info)
2766 struct mlxsw_sp_fib4_entry *fib4_entry;
2767 struct mlxsw_sp_fib_entry *fib_entry;
2770 fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
2772 return ERR_PTR(-ENOMEM);
2773 fib_entry = &fib4_entry->common;
2775 err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
2777 goto err_fib4_entry_type_set;
2779 err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
2781 goto err_nexthop4_group_get;
2783 fib4_entry->prio = fen_info->fi->fib_priority;
2784 fib4_entry->tb_id = fen_info->tb_id;
2785 fib4_entry->type = fen_info->type;
2786 fib4_entry->tos = fen_info->tos;
2788 fib_entry->fib_node = fib_node;
2792 err_nexthop4_group_get:
2793 err_fib4_entry_type_set:
2795 return ERR_PTR(err);
2798 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2799 struct mlxsw_sp_fib4_entry *fib4_entry)
2801 mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
2805 static struct mlxsw_sp_fib_node *
2806 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
2807 size_t addr_len, unsigned char prefix_len);
2809 static struct mlxsw_sp_fib4_entry *
2810 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
2811 const struct fib_entry_notifier_info *fen_info)
2813 struct mlxsw_sp_fib4_entry *fib4_entry;
2814 struct mlxsw_sp_fib_node *fib_node;
2815 struct mlxsw_sp_fib *fib;
2816 struct mlxsw_sp_vr *vr;
2818 vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
2821 fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
2823 fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
2824 sizeof(fen_info->dst),
2829 list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
2830 if (fib4_entry->tb_id == fen_info->tb_id &&
2831 fib4_entry->tos == fen_info->tos &&
2832 fib4_entry->type == fen_info->type &&
2833 mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
2842 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
2843 .key_offset = offsetof(struct mlxsw_sp_fib_node, key),
2844 .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
2845 .key_len = sizeof(struct mlxsw_sp_fib_key),
2846 .automatic_shrinking = true,
2849 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
2850 struct mlxsw_sp_fib_node *fib_node)
2852 return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
2853 mlxsw_sp_fib_ht_params);
2856 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
2857 struct mlxsw_sp_fib_node *fib_node)
2859 rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
2860 mlxsw_sp_fib_ht_params);
2863 static struct mlxsw_sp_fib_node *
2864 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
2865 size_t addr_len, unsigned char prefix_len)
2867 struct mlxsw_sp_fib_key key;
2869 memset(&key, 0, sizeof(key));
2870 memcpy(key.addr, addr, addr_len);
2871 key.prefix_len = prefix_len;
2872 return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
2875 static struct mlxsw_sp_fib_node *
2876 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
2877 size_t addr_len, unsigned char prefix_len)
2879 struct mlxsw_sp_fib_node *fib_node;
2881 fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
2885 INIT_LIST_HEAD(&fib_node->entry_list);
2886 list_add(&fib_node->list, &fib->node_list);
2887 memcpy(fib_node->key.addr, addr, addr_len);
2888 fib_node->key.prefix_len = prefix_len;
2893 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
2895 list_del(&fib_node->list);
2896 WARN_ON(!list_empty(&fib_node->entry_list));
2901 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
2902 const struct mlxsw_sp_fib_entry *fib_entry)
2904 return list_first_entry(&fib_node->entry_list,
2905 struct mlxsw_sp_fib_entry, list) == fib_entry;
2908 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
2909 struct mlxsw_sp_fib *fib,
2910 struct mlxsw_sp_fib_node *fib_node)
2912 struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
2913 struct mlxsw_sp_lpm_tree *lpm_tree;
2916 /* Since the tree is shared between all virtual routers we must
2917 * make sure it contains all the required prefix lengths. This
2918 * can be computed by either adding the new prefix length to the
2919 * existing prefix usage of a bound tree, or by aggregating the
2920 * prefix lengths across all virtual routers and adding the new
2924 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage,
2925 &fib->lpm_tree->prefix_usage);
2927 mlxsw_sp_vrs_prefixes(mlxsw_sp, fib->proto, &req_prefix_usage);
2928 mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
2930 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
2932 if (IS_ERR(lpm_tree))
2933 return PTR_ERR(lpm_tree);
2935 if (fib->lpm_tree && fib->lpm_tree->id == lpm_tree->id)
2938 err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
2945 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
2946 struct mlxsw_sp_fib *fib)
2948 struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
2949 struct mlxsw_sp_lpm_tree *lpm_tree;
2951 /* Aggregate prefix lengths across all virtual routers to make
2952 * sure we only have used prefix lengths in the LPM tree.
2954 mlxsw_sp_vrs_prefixes(mlxsw_sp, fib->proto, &req_prefix_usage);
2955 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
2957 if (IS_ERR(lpm_tree))
2959 mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
2962 if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage))
2964 mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
2965 mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
2966 fib->lpm_tree = NULL;
2969 static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node)
2971 unsigned char prefix_len = fib_node->key.prefix_len;
2972 struct mlxsw_sp_fib *fib = fib_node->fib;
2974 if (fib->prefix_ref_count[prefix_len]++ == 0)
2975 mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
2978 static void mlxsw_sp_fib_node_prefix_dec(struct mlxsw_sp_fib_node *fib_node)
2980 unsigned char prefix_len = fib_node->key.prefix_len;
2981 struct mlxsw_sp_fib *fib = fib_node->fib;
2983 if (--fib->prefix_ref_count[prefix_len] == 0)
2984 mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
2987 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
2988 struct mlxsw_sp_fib_node *fib_node,
2989 struct mlxsw_sp_fib *fib)
2993 err = mlxsw_sp_fib_node_insert(fib, fib_node);
2996 fib_node->fib = fib;
2998 err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib, fib_node);
3000 goto err_fib_lpm_tree_link;
3002 mlxsw_sp_fib_node_prefix_inc(fib_node);
3006 err_fib_lpm_tree_link:
3007 fib_node->fib = NULL;
3008 mlxsw_sp_fib_node_remove(fib, fib_node);
3012 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
3013 struct mlxsw_sp_fib_node *fib_node)
3015 struct mlxsw_sp_fib *fib = fib_node->fib;
3017 mlxsw_sp_fib_node_prefix_dec(fib_node);
3018 mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib);
3019 fib_node->fib = NULL;
3020 mlxsw_sp_fib_node_remove(fib, fib_node);
3023 static struct mlxsw_sp_fib_node *
3024 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
3025 size_t addr_len, unsigned char prefix_len,
3026 enum mlxsw_sp_l3proto proto)
3028 struct mlxsw_sp_fib_node *fib_node;
3029 struct mlxsw_sp_fib *fib;
3030 struct mlxsw_sp_vr *vr;
3033 vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id);
3035 return ERR_CAST(vr);
3036 fib = mlxsw_sp_vr_fib(vr, proto);
3038 fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
3042 fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
3045 goto err_fib_node_create;
3048 err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
3050 goto err_fib_node_init;
3055 mlxsw_sp_fib_node_destroy(fib_node);
3056 err_fib_node_create:
3057 mlxsw_sp_vr_put(vr);
3058 return ERR_PTR(err);
3061 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
3062 struct mlxsw_sp_fib_node *fib_node)
3064 struct mlxsw_sp_vr *vr = fib_node->fib->vr;
3066 if (!list_empty(&fib_node->entry_list))
3068 mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
3069 mlxsw_sp_fib_node_destroy(fib_node);
3070 mlxsw_sp_vr_put(vr);
3073 static struct mlxsw_sp_fib4_entry *
3074 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
3075 const struct mlxsw_sp_fib4_entry *new4_entry)
3077 struct mlxsw_sp_fib4_entry *fib4_entry;
3079 list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
3080 if (fib4_entry->tb_id > new4_entry->tb_id)
3082 if (fib4_entry->tb_id != new4_entry->tb_id)
3084 if (fib4_entry->tos > new4_entry->tos)
3086 if (fib4_entry->prio >= new4_entry->prio ||
3087 fib4_entry->tos < new4_entry->tos)
3095 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
3096 struct mlxsw_sp_fib4_entry *new4_entry)
3098 struct mlxsw_sp_fib_node *fib_node;
3100 if (WARN_ON(!fib4_entry))
3103 fib_node = fib4_entry->common.fib_node;
3104 list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
3106 if (fib4_entry->tb_id != new4_entry->tb_id ||
3107 fib4_entry->tos != new4_entry->tos ||
3108 fib4_entry->prio != new4_entry->prio)
3112 list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
3117 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
3118 bool replace, bool append)
3120 struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
3121 struct mlxsw_sp_fib4_entry *fib4_entry;
3123 fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
3126 return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
3127 if (replace && WARN_ON(!fib4_entry))
3130 /* Insert new entry before replaced one, so that we can later
3131 * remove the second.
3134 list_add_tail(&new4_entry->common.list,
3135 &fib4_entry->common.list);
3137 struct mlxsw_sp_fib4_entry *last;
3139 list_for_each_entry(last, &fib_node->entry_list, common.list) {
3140 if (new4_entry->tb_id > last->tb_id)
3146 list_add(&new4_entry->common.list,
3147 &fib4_entry->common.list);
3149 list_add(&new4_entry->common.list,
3150 &fib_node->entry_list);
3157 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
3159 list_del(&fib4_entry->common.list);
3162 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
3163 struct mlxsw_sp_fib_entry *fib_entry)
3165 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
3167 if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
3170 /* To prevent packet loss, overwrite the previously offloaded
3173 if (!list_is_singular(&fib_node->entry_list)) {
3174 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
3175 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
3177 mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
3180 return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3183 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
3184 struct mlxsw_sp_fib_entry *fib_entry)
3186 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
3188 if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
3191 /* Promote the next entry by overwriting the deleted entry */
3192 if (!list_is_singular(&fib_node->entry_list)) {
3193 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
3194 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
3196 mlxsw_sp_fib_entry_update(mlxsw_sp, n);
3197 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
3201 mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
3204 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
3205 struct mlxsw_sp_fib4_entry *fib4_entry,
3206 bool replace, bool append)
3210 err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
3214 err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
3216 goto err_fib_node_entry_add;
3220 err_fib_node_entry_add:
3221 mlxsw_sp_fib4_node_list_remove(fib4_entry);
3226 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
3227 struct mlxsw_sp_fib4_entry *fib4_entry)
3229 mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
3230 mlxsw_sp_fib4_node_list_remove(fib4_entry);
3233 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
3234 struct mlxsw_sp_fib4_entry *fib4_entry,
3237 struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
3238 struct mlxsw_sp_fib4_entry *replaced;
3243 /* We inserted the new entry before replaced one */
3244 replaced = list_next_entry(fib4_entry, common.list);
3246 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
3247 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
3248 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
3252 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
3253 const struct fib_entry_notifier_info *fen_info,
3254 bool replace, bool append)
3256 struct mlxsw_sp_fib4_entry *fib4_entry;
3257 struct mlxsw_sp_fib_node *fib_node;
3260 if (mlxsw_sp->router->aborted)
3263 fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
3264 &fen_info->dst, sizeof(fen_info->dst),
3266 MLXSW_SP_L3_PROTO_IPV4);
3267 if (IS_ERR(fib_node)) {
3268 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
3269 return PTR_ERR(fib_node);
3272 fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
3273 if (IS_ERR(fib4_entry)) {
3274 dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
3275 err = PTR_ERR(fib4_entry);
3276 goto err_fib4_entry_create;
3279 err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
3282 dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
3283 goto err_fib4_node_entry_link;
3286 mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
3290 err_fib4_node_entry_link:
3291 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
3292 err_fib4_entry_create:
3293 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
3297 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
3298 struct fib_entry_notifier_info *fen_info)
3300 struct mlxsw_sp_fib4_entry *fib4_entry;
3301 struct mlxsw_sp_fib_node *fib_node;
3303 if (mlxsw_sp->router->aborted)
3306 fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
3307 if (WARN_ON(!fib4_entry))
3309 fib_node = fib4_entry->common.fib_node;
3311 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
3312 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
3313 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
3316 static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt)
3318 /* Packets with link-local destination IP arriving to the router
3319 * are trapped to the CPU, so no need to program specific routes
3322 if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LINKLOCAL)
3325 /* Multicast routes aren't supported, so ignore them. Neighbour
3326 * Discovery packets are specifically trapped.
3328 if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_MULTICAST)
3331 /* Cloned routes are irrelevant in the forwarding path. */
3332 if (rt->rt6i_flags & RTF_CACHE)
3338 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
3340 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3342 mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
3344 return ERR_PTR(-ENOMEM);
3346 /* In case of route replace, replaced route is deleted with
3347 * no notification. Take reference to prevent accessing freed
3350 mlxsw_sp_rt6->rt = rt;
3353 return mlxsw_sp_rt6;
3356 #if IS_ENABLED(CONFIG_IPV6)
3357 static void mlxsw_sp_rt6_release(struct rt6_info *rt)
3362 static void mlxsw_sp_rt6_release(struct rt6_info *rt)
3367 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
3369 mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
3370 kfree(mlxsw_sp_rt6);
3373 static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt)
3375 /* RTF_CACHE routes are ignored */
3376 return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
3379 static struct rt6_info *
3380 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
3382 return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
3386 static struct mlxsw_sp_fib6_entry *
3387 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
3388 const struct rt6_info *nrt, bool replace)
3390 struct mlxsw_sp_fib6_entry *fib6_entry;
3392 if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
3395 list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
3396 struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
3398 /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
3401 if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
3403 if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
3405 if (rt->rt6i_metric < nrt->rt6i_metric)
3407 if (rt->rt6i_metric == nrt->rt6i_metric &&
3408 mlxsw_sp_fib6_rt_can_mp(rt))
3410 if (rt->rt6i_metric > nrt->rt6i_metric)
3417 static struct mlxsw_sp_rt6 *
3418 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
3419 const struct rt6_info *rt)
3421 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3423 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3424 if (mlxsw_sp_rt6->rt == rt)
3425 return mlxsw_sp_rt6;
3431 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
3432 struct mlxsw_sp_nexthop_group *nh_grp,
3433 struct mlxsw_sp_nexthop *nh,
3434 const struct rt6_info *rt)
3436 struct net_device *dev = rt->dst.dev;
3437 struct mlxsw_sp_rif *rif;
3440 nh->nh_grp = nh_grp;
3441 memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr));
3445 nh->ifindex = dev->ifindex;
3447 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3450 mlxsw_sp_nexthop_rif_init(nh, rif);
3452 err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3454 goto err_nexthop_neigh_init;
3458 err_nexthop_neigh_init:
3459 mlxsw_sp_nexthop_rif_fini(nh);
3463 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
3464 struct mlxsw_sp_nexthop *nh)
3466 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3467 mlxsw_sp_nexthop_rif_fini(nh);
3470 static struct mlxsw_sp_nexthop_group *
3471 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
3472 struct mlxsw_sp_fib6_entry *fib6_entry)
3474 struct mlxsw_sp_nexthop_group *nh_grp;
3475 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3476 struct mlxsw_sp_nexthop *nh;
3481 alloc_size = sizeof(*nh_grp) +
3482 fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
3483 nh_grp = kzalloc(alloc_size, GFP_KERNEL);
3485 return ERR_PTR(-ENOMEM);
3486 INIT_LIST_HEAD(&nh_grp->fib_list);
3487 #if IS_ENABLED(CONFIG_IPV6)
3488 nh_grp->neigh_tbl = &nd_tbl;
3490 mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
3491 struct mlxsw_sp_rt6, list);
3492 nh_grp->gateway = !!(mlxsw_sp_rt6->rt->rt6i_flags & RTF_GATEWAY);
3493 nh_grp->count = fib6_entry->nrt6;
3494 for (i = 0; i < nh_grp->count; i++) {
3495 struct rt6_info *rt = mlxsw_sp_rt6->rt;
3497 nh = &nh_grp->nexthops[i];
3498 err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
3500 goto err_nexthop6_init;
3501 mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
3504 err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3506 goto err_nexthop_group_insert;
3508 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3511 err_nexthop_group_insert:
3513 for (i--; i >= 0; i--) {
3514 nh = &nh_grp->nexthops[i];
3515 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
3518 return ERR_PTR(err);
3522 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
3523 struct mlxsw_sp_nexthop_group *nh_grp)
3525 struct mlxsw_sp_nexthop *nh;
3526 int i = nh_grp->count;
3528 mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3529 for (i--; i >= 0; i--) {
3530 nh = &nh_grp->nexthops[i];
3531 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
3533 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3534 WARN_ON(nh_grp->adj_index_valid);
3538 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
3539 struct mlxsw_sp_fib6_entry *fib6_entry)
3541 struct mlxsw_sp_nexthop_group *nh_grp;
3543 nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
3545 nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
3547 return PTR_ERR(nh_grp);
3550 list_add_tail(&fib6_entry->common.nexthop_group_node,
3552 fib6_entry->common.nh_group = nh_grp;
3557 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
3558 struct mlxsw_sp_fib_entry *fib_entry)
3560 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3562 list_del(&fib_entry->nexthop_group_node);
3563 if (!list_empty(&nh_grp->fib_list))
3565 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
3569 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
3570 struct mlxsw_sp_fib6_entry *fib6_entry)
3572 struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
3575 fib6_entry->common.nh_group = NULL;
3576 list_del(&fib6_entry->common.nexthop_group_node);
3578 err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
3580 goto err_nexthop6_group_get;
3582 /* In case this entry is offloaded, then the adjacency index
3583 * currently associated with it in the device's table is that
3584 * of the old group. Start using the new one instead.
3586 err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
3588 goto err_fib_node_entry_add;
3590 if (list_empty(&old_nh_grp->fib_list))
3591 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
3595 err_fib_node_entry_add:
3596 mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
3597 err_nexthop6_group_get:
3598 list_add_tail(&fib6_entry->common.nexthop_group_node,
3599 &old_nh_grp->fib_list);
3600 fib6_entry->common.nh_group = old_nh_grp;
3605 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
3606 struct mlxsw_sp_fib6_entry *fib6_entry,
3607 struct rt6_info *rt)
3609 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3612 mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
3613 if (IS_ERR(mlxsw_sp_rt6))
3614 return PTR_ERR(mlxsw_sp_rt6);
3616 list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
3619 err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
3621 goto err_nexthop6_group_update;
3625 err_nexthop6_group_update:
3627 list_del(&mlxsw_sp_rt6->list);
3628 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
3633 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
3634 struct mlxsw_sp_fib6_entry *fib6_entry,
3635 struct rt6_info *rt)
3637 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3639 mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
3640 if (WARN_ON(!mlxsw_sp_rt6))
3644 list_del(&mlxsw_sp_rt6->list);
3645 mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
3646 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
3649 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp_fib_entry *fib_entry,
3650 const struct rt6_info *rt)
3652 /* Packets hitting RTF_REJECT routes need to be discarded by the
3653 * stack. We can rely on their destination device not having a
3654 * RIF (it's the loopback device) and can thus use action type
3655 * local, which will cause them to be trapped with a lower
3656 * priority than packets that need to be locally received.
3658 if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
3659 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
3660 else if (rt->rt6i_flags & RTF_REJECT)
3661 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
3662 else if (rt->rt6i_flags & RTF_GATEWAY)
3663 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
3665 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
3669 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
3671 struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
3673 list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
3676 list_del(&mlxsw_sp_rt6->list);
3677 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
3681 static struct mlxsw_sp_fib6_entry *
3682 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
3683 struct mlxsw_sp_fib_node *fib_node,
3684 struct rt6_info *rt)
3686 struct mlxsw_sp_fib6_entry *fib6_entry;
3687 struct mlxsw_sp_fib_entry *fib_entry;
3688 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3691 fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
3693 return ERR_PTR(-ENOMEM);
3694 fib_entry = &fib6_entry->common;
3696 mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
3697 if (IS_ERR(mlxsw_sp_rt6)) {
3698 err = PTR_ERR(mlxsw_sp_rt6);
3699 goto err_rt6_create;
3702 mlxsw_sp_fib6_entry_type_set(fib_entry, mlxsw_sp_rt6->rt);
3704 INIT_LIST_HEAD(&fib6_entry->rt6_list);
3705 list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
3706 fib6_entry->nrt6 = 1;
3707 err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
3709 goto err_nexthop6_group_get;
3711 fib_entry->fib_node = fib_node;
3715 err_nexthop6_group_get:
3716 list_del(&mlxsw_sp_rt6->list);
3717 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
3720 return ERR_PTR(err);
3723 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
3724 struct mlxsw_sp_fib6_entry *fib6_entry)
3726 mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
3727 mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
3728 WARN_ON(fib6_entry->nrt6);
3732 static struct mlxsw_sp_fib6_entry *
3733 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
3734 const struct rt6_info *nrt, bool replace)
3736 struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
3738 list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
3739 struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
3741 if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
3743 if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
3745 if (replace && rt->rt6i_metric == nrt->rt6i_metric) {
3746 if (mlxsw_sp_fib6_rt_can_mp(rt) ==
3747 mlxsw_sp_fib6_rt_can_mp(nrt))
3749 if (mlxsw_sp_fib6_rt_can_mp(nrt))
3750 fallback = fallback ?: fib6_entry;
3752 if (rt->rt6i_metric > nrt->rt6i_metric)
3753 return fallback ?: fib6_entry;
3760 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
3763 struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
3764 struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
3765 struct mlxsw_sp_fib6_entry *fib6_entry;
3767 fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
3769 if (replace && WARN_ON(!fib6_entry))
3773 list_add_tail(&new6_entry->common.list,
3774 &fib6_entry->common.list);
3776 struct mlxsw_sp_fib6_entry *last;
3778 list_for_each_entry(last, &fib_node->entry_list, common.list) {
3779 struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last);
3781 if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id)
3787 list_add(&new6_entry->common.list,
3788 &fib6_entry->common.list);
3790 list_add(&new6_entry->common.list,
3791 &fib_node->entry_list);
3798 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
3800 list_del(&fib6_entry->common.list);
3803 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
3804 struct mlxsw_sp_fib6_entry *fib6_entry,
3809 err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
3813 err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
3815 goto err_fib_node_entry_add;
3819 err_fib_node_entry_add:
3820 mlxsw_sp_fib6_node_list_remove(fib6_entry);
3825 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
3826 struct mlxsw_sp_fib6_entry *fib6_entry)
3828 mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
3829 mlxsw_sp_fib6_node_list_remove(fib6_entry);
3832 static struct mlxsw_sp_fib6_entry *
3833 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
3834 const struct rt6_info *rt)
3836 struct mlxsw_sp_fib6_entry *fib6_entry;
3837 struct mlxsw_sp_fib_node *fib_node;
3838 struct mlxsw_sp_fib *fib;
3839 struct mlxsw_sp_vr *vr;
3841 vr = mlxsw_sp_vr_find(mlxsw_sp, rt->rt6i_table->tb6_id);
3844 fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
3846 fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->rt6i_dst.addr,
3847 sizeof(rt->rt6i_dst.addr),
3852 list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
3853 struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
3855 if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id &&
3856 rt->rt6i_metric == iter_rt->rt6i_metric &&
3857 mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
3864 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
3865 struct mlxsw_sp_fib6_entry *fib6_entry,
3868 struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
3869 struct mlxsw_sp_fib6_entry *replaced;
3874 replaced = list_next_entry(fib6_entry, common.list);
3876 mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
3877 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
3878 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
3881 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
3882 struct rt6_info *rt, bool replace)
3884 struct mlxsw_sp_fib6_entry *fib6_entry;
3885 struct mlxsw_sp_fib_node *fib_node;
3888 if (mlxsw_sp->router->aborted)
3891 if (rt->rt6i_src.plen)
3894 if (mlxsw_sp_fib6_rt_should_ignore(rt))
3897 fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->rt6i_table->tb6_id,
3899 sizeof(rt->rt6i_dst.addr),
3901 MLXSW_SP_L3_PROTO_IPV6);
3902 if (IS_ERR(fib_node))
3903 return PTR_ERR(fib_node);
3905 /* Before creating a new entry, try to append route to an existing
3908 fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
3910 err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
3912 goto err_fib6_entry_nexthop_add;
3916 fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
3917 if (IS_ERR(fib6_entry)) {
3918 err = PTR_ERR(fib6_entry);
3919 goto err_fib6_entry_create;
3922 err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
3924 goto err_fib6_node_entry_link;
3926 mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
3930 err_fib6_node_entry_link:
3931 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
3932 err_fib6_entry_create:
3933 err_fib6_entry_nexthop_add:
3934 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
3938 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
3939 struct rt6_info *rt)
3941 struct mlxsw_sp_fib6_entry *fib6_entry;
3942 struct mlxsw_sp_fib_node *fib_node;
3944 if (mlxsw_sp->router->aborted)
3947 if (mlxsw_sp_fib6_rt_should_ignore(rt))
3950 fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
3951 if (WARN_ON(!fib6_entry))
3954 /* If route is part of a multipath entry, but not the last one
3955 * removed, then only reduce its nexthop group.
3957 if (!list_is_singular(&fib6_entry->rt6_list)) {
3958 mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
3962 fib_node = fib6_entry->common.fib_node;
3964 mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
3965 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
3966 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
3969 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
3970 enum mlxsw_reg_ralxx_protocol proto,
3973 char ralta_pl[MLXSW_REG_RALTA_LEN];
3974 char ralst_pl[MLXSW_REG_RALST_LEN];
3977 mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
3978 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
3982 mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
3983 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
3987 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
3988 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
3989 char raltb_pl[MLXSW_REG_RALTB_LEN];
3990 char ralue_pl[MLXSW_REG_RALUE_LEN];
3992 mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
3993 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
3998 mlxsw_reg_ralue_pack(ralue_pl, proto,
3999 MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
4000 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
4001 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
4010 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
4012 enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
4015 err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
4016 MLXSW_SP_LPM_TREE_MIN);
4020 proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
4021 return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
4022 MLXSW_SP_LPM_TREE_MIN + 1);
4025 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
4026 struct mlxsw_sp_fib_node *fib_node)
4028 struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
4030 list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
4032 bool do_break = &tmp->common.list == &fib_node->entry_list;
4034 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4035 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4036 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4037 /* Break when entry list is empty and node was freed.
4038 * Otherwise, we'll access freed memory in the next
4046 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
4047 struct mlxsw_sp_fib_node *fib_node)
4049 struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
4051 list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
4053 bool do_break = &tmp->common.list == &fib_node->entry_list;
4055 mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
4056 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
4057 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4063 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
4064 struct mlxsw_sp_fib_node *fib_node)
4066 switch (fib_node->fib->proto) {
4067 case MLXSW_SP_L3_PROTO_IPV4:
4068 mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
4070 case MLXSW_SP_L3_PROTO_IPV6:
4071 mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
4076 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
4077 struct mlxsw_sp_vr *vr,
4078 enum mlxsw_sp_l3proto proto)
4080 struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
4081 struct mlxsw_sp_fib_node *fib_node, *tmp;
4083 list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
4084 bool do_break = &tmp->list == &fib->node_list;
4086 mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
4092 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
4096 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
4097 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
4099 if (!mlxsw_sp_vr_is_used(vr))
4101 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
4103 /* If virtual router was only used for IPv4, then it's no
4106 if (!mlxsw_sp_vr_is_used(vr))
4108 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
4112 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
4116 if (mlxsw_sp->router->aborted)
4118 dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
4119 mlxsw_sp_router_fib_flush(mlxsw_sp);
4120 mlxsw_sp->router->aborted = true;
4121 err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
4123 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
4126 struct mlxsw_sp_fib_event_work {
4127 struct work_struct work;
4129 struct fib6_entry_notifier_info fen6_info;
4130 struct fib_entry_notifier_info fen_info;
4131 struct fib_rule_notifier_info fr_info;
4132 struct fib_nh_notifier_info fnh_info;
4134 struct mlxsw_sp *mlxsw_sp;
4135 unsigned long event;
4138 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
4140 struct mlxsw_sp_fib_event_work *fib_work =
4141 container_of(work, struct mlxsw_sp_fib_event_work, work);
4142 struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
4143 struct fib_rule *rule;
4144 bool replace, append;
4147 /* Protect internal structures from changes */
4149 switch (fib_work->event) {
4150 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
4151 case FIB_EVENT_ENTRY_APPEND: /* fall through */
4152 case FIB_EVENT_ENTRY_ADD:
4153 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
4154 append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
4155 err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
4158 mlxsw_sp_router_fib_abort(mlxsw_sp);
4159 fib_info_put(fib_work->fen_info.fi);
4161 case FIB_EVENT_ENTRY_DEL:
4162 mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
4163 fib_info_put(fib_work->fen_info.fi);
4165 case FIB_EVENT_RULE_ADD: /* fall through */
4166 case FIB_EVENT_RULE_DEL:
4167 rule = fib_work->fr_info.rule;
4168 if (!fib4_rule_default(rule) && !rule->l3mdev)
4169 mlxsw_sp_router_fib_abort(mlxsw_sp);
4172 case FIB_EVENT_NH_ADD: /* fall through */
4173 case FIB_EVENT_NH_DEL:
4174 mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
4175 fib_work->fnh_info.fib_nh);
4176 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
4183 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
4185 struct mlxsw_sp_fib_event_work *fib_work =
4186 container_of(work, struct mlxsw_sp_fib_event_work, work);
4187 struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
4188 struct fib_rule *rule;
4193 switch (fib_work->event) {
4194 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
4195 case FIB_EVENT_ENTRY_ADD:
4196 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
4197 err = mlxsw_sp_router_fib6_add(mlxsw_sp,
4198 fib_work->fen6_info.rt, replace);
4200 mlxsw_sp_router_fib_abort(mlxsw_sp);
4201 mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
4203 case FIB_EVENT_ENTRY_DEL:
4204 mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
4205 mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
4207 case FIB_EVENT_RULE_ADD: /* fall through */
4208 case FIB_EVENT_RULE_DEL:
4209 rule = fib_work->fr_info.rule;
4210 if (!fib6_rule_default(rule) && !rule->l3mdev)
4211 mlxsw_sp_router_fib_abort(mlxsw_sp);
4219 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
4220 struct fib_notifier_info *info)
4222 switch (fib_work->event) {
4223 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
4224 case FIB_EVENT_ENTRY_APPEND: /* fall through */
4225 case FIB_EVENT_ENTRY_ADD: /* fall through */
4226 case FIB_EVENT_ENTRY_DEL:
4227 memcpy(&fib_work->fen_info, info, sizeof(fib_work->fen_info));
4228 /* Take referece on fib_info to prevent it from being
4229 * freed while work is queued. Release it afterwards.
4231 fib_info_hold(fib_work->fen_info.fi);
4233 case FIB_EVENT_RULE_ADD: /* fall through */
4234 case FIB_EVENT_RULE_DEL:
4235 memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info));
4236 fib_rule_get(fib_work->fr_info.rule);
4238 case FIB_EVENT_NH_ADD: /* fall through */
4239 case FIB_EVENT_NH_DEL:
4240 memcpy(&fib_work->fnh_info, info, sizeof(fib_work->fnh_info));
4241 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
4246 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
4247 struct fib_notifier_info *info)
4249 switch (fib_work->event) {
4250 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
4251 case FIB_EVENT_ENTRY_ADD: /* fall through */
4252 case FIB_EVENT_ENTRY_DEL:
4253 memcpy(&fib_work->fen6_info, info, sizeof(fib_work->fen6_info));
4254 rt6_hold(fib_work->fen6_info.rt);
4256 case FIB_EVENT_RULE_ADD: /* fall through */
4257 case FIB_EVENT_RULE_DEL:
4258 memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info));
4259 fib_rule_get(fib_work->fr_info.rule);
4264 /* Called with rcu_read_lock() */
4265 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
4266 unsigned long event, void *ptr)
4268 struct mlxsw_sp_fib_event_work *fib_work;
4269 struct fib_notifier_info *info = ptr;
4270 struct mlxsw_sp_router *router;
4272 if (!net_eq(info->net, &init_net))
4275 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
4276 if (WARN_ON(!fib_work))
4279 router = container_of(nb, struct mlxsw_sp_router, fib_nb);
4280 fib_work->mlxsw_sp = router->mlxsw_sp;
4281 fib_work->event = event;
4283 switch (info->family) {
4285 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
4286 mlxsw_sp_router_fib4_event(fib_work, info);
4289 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
4290 mlxsw_sp_router_fib6_event(fib_work, info);
4294 mlxsw_core_schedule_work(&fib_work->work);
4299 static struct mlxsw_sp_rif *
4300 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
4301 const struct net_device *dev)
4305 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
4306 if (mlxsw_sp->router->rifs[i] &&
4307 mlxsw_sp->router->rifs[i]->dev == dev)
4308 return mlxsw_sp->router->rifs[i];
4313 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
4315 char ritr_pl[MLXSW_REG_RITR_LEN];
4318 mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
4319 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
4320 if (WARN_ON_ONCE(err))
4323 mlxsw_reg_ritr_enable_set(ritr_pl, false);
4324 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
4327 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
4328 struct mlxsw_sp_rif *rif)
4330 mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
4331 mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
4332 mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
4336 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
4337 unsigned long event)
4339 struct inet6_dev *inet6_dev;
4340 bool addr_list_empty = true;
4341 struct in_device *idev;
4347 idev = __in_dev_get_rtnl(dev);
4348 if (idev && idev->ifa_list)
4349 addr_list_empty = false;
4351 inet6_dev = __in6_dev_get(dev);
4352 if (addr_list_empty && inet6_dev &&
4353 !list_empty(&inet6_dev->addr_list))
4354 addr_list_empty = false;
4356 if (rif && addr_list_empty &&
4357 !netif_is_l3_slave(rif->dev))
4359 /* It is possible we already removed the RIF ourselves
4360 * if it was assigned to a netdev that is now a bridge
4369 static enum mlxsw_sp_rif_type
4370 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
4371 const struct net_device *dev)
4373 enum mlxsw_sp_fid_type type;
4375 /* RIF type is derived from the type of the underlying FID */
4376 if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
4377 type = MLXSW_SP_FID_TYPE_8021Q;
4378 else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
4379 type = MLXSW_SP_FID_TYPE_8021Q;
4380 else if (netif_is_bridge_master(dev))
4381 type = MLXSW_SP_FID_TYPE_8021D;
4383 type = MLXSW_SP_FID_TYPE_RFID;
4385 return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
4388 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
4392 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
4393 if (!mlxsw_sp->router->rifs[i]) {
4402 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
4404 struct net_device *l3_dev)
4406 struct mlxsw_sp_rif *rif;
4408 rif = kzalloc(rif_size, GFP_KERNEL);
4412 INIT_LIST_HEAD(&rif->nexthop_list);
4413 INIT_LIST_HEAD(&rif->neigh_list);
4414 ether_addr_copy(rif->addr, l3_dev->dev_addr);
4415 rif->mtu = l3_dev->mtu;
4418 rif->rif_index = rif_index;
4423 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
4426 return mlxsw_sp->router->rifs[rif_index];
4429 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
4431 return rif->rif_index;
4434 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
4436 return rif->dev->ifindex;
4439 static struct mlxsw_sp_rif *
4440 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
4441 const struct mlxsw_sp_rif_params *params)
4443 u32 tb_id = l3mdev_fib_table(params->dev);
4444 const struct mlxsw_sp_rif_ops *ops;
4445 enum mlxsw_sp_rif_type type;
4446 struct mlxsw_sp_rif *rif;
4447 struct mlxsw_sp_fid *fid;
4448 struct mlxsw_sp_vr *vr;
4452 type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
4453 ops = mlxsw_sp->router->rif_ops_arr[type];
4455 vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN);
4457 return ERR_CAST(vr);
4459 err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
4461 goto err_rif_index_alloc;
4463 rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
4468 rif->mlxsw_sp = mlxsw_sp;
4471 fid = ops->fid_get(rif);
4479 ops->setup(rif, params);
4481 err = ops->configure(rif);
4485 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, params->dev->dev_addr,
4486 mlxsw_sp_fid_index(fid), true);
4488 goto err_rif_fdb_op;
4490 mlxsw_sp_rif_counters_alloc(rif);
4491 mlxsw_sp_fid_rif_set(fid, rif);
4492 mlxsw_sp->router->rifs[rif_index] = rif;
4498 ops->deconfigure(rif);
4500 mlxsw_sp_fid_put(fid);
4504 err_rif_index_alloc:
4505 mlxsw_sp_vr_put(vr);
4506 return ERR_PTR(err);
4509 void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
4511 const struct mlxsw_sp_rif_ops *ops = rif->ops;
4512 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
4513 struct mlxsw_sp_fid *fid = rif->fid;
4514 struct mlxsw_sp_vr *vr;
4516 mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
4517 vr = &mlxsw_sp->router->vrs[rif->vr_id];
4520 mlxsw_sp->router->rifs[rif->rif_index] = NULL;
4521 mlxsw_sp_fid_rif_set(fid, NULL);
4522 mlxsw_sp_rif_counters_free(rif);
4523 mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->dev->dev_addr,
4524 mlxsw_sp_fid_index(fid), false);
4525 ops->deconfigure(rif);
4526 mlxsw_sp_fid_put(fid);
4528 mlxsw_sp_vr_put(vr);
4532 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
4533 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
4535 struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
4537 params->vid = mlxsw_sp_port_vlan->vid;
4538 params->lag = mlxsw_sp_port->lagged;
4540 params->lag_id = mlxsw_sp_port->lag_id;
4542 params->system_port = mlxsw_sp_port->local_port;
4546 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
4547 struct net_device *l3_dev)
4549 struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
4550 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
4551 u16 vid = mlxsw_sp_port_vlan->vid;
4552 struct mlxsw_sp_rif *rif;
4553 struct mlxsw_sp_fid *fid;
4556 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
4558 struct mlxsw_sp_rif_params params = {
4562 mlxsw_sp_rif_subport_params_init(¶ms, mlxsw_sp_port_vlan);
4563 rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms);
4565 return PTR_ERR(rif);
4568 /* FID was already created, just take a reference */
4569 fid = rif->ops->fid_get(rif);
4570 err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
4572 goto err_fid_port_vid_map;
4574 err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
4576 goto err_port_vid_learning_set;
4578 err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
4579 BR_STATE_FORWARDING);
4581 goto err_port_vid_stp_set;
4583 mlxsw_sp_port_vlan->fid = fid;
4587 err_port_vid_stp_set:
4588 mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
4589 err_port_vid_learning_set:
4590 mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
4591 err_fid_port_vid_map:
4592 mlxsw_sp_fid_put(fid);
4597 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
4599 struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
4600 struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
4601 u16 vid = mlxsw_sp_port_vlan->vid;
4603 if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
4606 mlxsw_sp_port_vlan->fid = NULL;
4607 mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
4608 mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
4609 mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
4610 /* If router port holds the last reference on the rFID, then the
4611 * associated Sub-port RIF will be destroyed.
4613 mlxsw_sp_fid_put(fid);
4616 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
4617 struct net_device *port_dev,
4618 unsigned long event, u16 vid)
4620 struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
4621 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
4623 mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
4624 if (WARN_ON(!mlxsw_sp_port_vlan))
4629 return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
4632 mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
4639 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
4640 unsigned long event)
4642 if (netif_is_bridge_port(port_dev) ||
4643 netif_is_lag_port(port_dev) ||
4644 netif_is_ovs_port(port_dev))
4647 return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1);
4650 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
4651 struct net_device *lag_dev,
4652 unsigned long event, u16 vid)
4654 struct net_device *port_dev;
4655 struct list_head *iter;
4658 netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
4659 if (mlxsw_sp_port_dev_check(port_dev)) {
4660 err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
4671 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
4672 unsigned long event)
4674 if (netif_is_bridge_port(lag_dev))
4677 return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1);
4680 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
4681 unsigned long event)
4683 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
4684 struct mlxsw_sp_rif_params params = {
4687 struct mlxsw_sp_rif *rif;
4691 rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms);
4693 return PTR_ERR(rif);
4696 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
4697 mlxsw_sp_rif_destroy(rif);
4704 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
4705 unsigned long event)
4707 struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
4708 u16 vid = vlan_dev_vlan_id(vlan_dev);
4710 if (netif_is_bridge_port(vlan_dev))
4713 if (mlxsw_sp_port_dev_check(real_dev))
4714 return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
4716 else if (netif_is_lag_master(real_dev))
4717 return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
4719 else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
4720 return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event);
4725 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
4726 unsigned long event)
4728 if (mlxsw_sp_port_dev_check(dev))
4729 return mlxsw_sp_inetaddr_port_event(dev, event);
4730 else if (netif_is_lag_master(dev))
4731 return mlxsw_sp_inetaddr_lag_event(dev, event);
4732 else if (netif_is_bridge_master(dev))
4733 return mlxsw_sp_inetaddr_bridge_event(dev, event);
4734 else if (is_vlan_dev(dev))
4735 return mlxsw_sp_inetaddr_vlan_event(dev, event);
4740 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
4741 unsigned long event, void *ptr)
4743 struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
4744 struct net_device *dev = ifa->ifa_dev->dev;
4745 struct mlxsw_sp *mlxsw_sp;
4746 struct mlxsw_sp_rif *rif;
4749 mlxsw_sp = mlxsw_sp_lower_get(dev);
4753 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4754 if (!mlxsw_sp_rif_should_config(rif, dev, event))
4757 err = __mlxsw_sp_inetaddr_event(dev, event);
4759 return notifier_from_errno(err);
4762 struct mlxsw_sp_inet6addr_event_work {
4763 struct work_struct work;
4764 struct net_device *dev;
4765 unsigned long event;
4768 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
4770 struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
4771 container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
4772 struct net_device *dev = inet6addr_work->dev;
4773 unsigned long event = inet6addr_work->event;
4774 struct mlxsw_sp *mlxsw_sp;
4775 struct mlxsw_sp_rif *rif;
4778 mlxsw_sp = mlxsw_sp_lower_get(dev);
4782 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4783 if (!mlxsw_sp_rif_should_config(rif, dev, event))
4786 __mlxsw_sp_inetaddr_event(dev, event);
4790 kfree(inet6addr_work);
4793 /* Called with rcu_read_lock() */
4794 int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
4795 unsigned long event, void *ptr)
4797 struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
4798 struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
4799 struct net_device *dev = if6->idev->dev;
4801 if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
4804 inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
4805 if (!inet6addr_work)
4808 INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
4809 inet6addr_work->dev = dev;
4810 inet6addr_work->event = event;
4812 mlxsw_core_schedule_work(&inet6addr_work->work);
4817 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
4818 const char *mac, int mtu)
4820 char ritr_pl[MLXSW_REG_RITR_LEN];
4823 mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
4824 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
4828 mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
4829 mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
4830 mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
4831 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
4834 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
4836 struct mlxsw_sp *mlxsw_sp;
4837 struct mlxsw_sp_rif *rif;
4841 mlxsw_sp = mlxsw_sp_lower_get(dev);
4845 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4848 fid_index = mlxsw_sp_fid_index(rif->fid);
4850 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
4854 err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
4859 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
4861 goto err_rif_fdb_op;
4863 ether_addr_copy(rif->addr, dev->dev_addr);
4864 rif->mtu = dev->mtu;
4866 netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
4871 mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
4873 mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
4877 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
4878 struct net_device *l3_dev)
4880 struct mlxsw_sp_rif *rif;
4882 /* If netdev is already associated with a RIF, then we need to
4883 * destroy it and create a new one with the new virtual router ID.
4885 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
4887 __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN);
4889 return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP);
4892 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
4893 struct net_device *l3_dev)
4895 struct mlxsw_sp_rif *rif;
4897 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
4900 __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN);
4903 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
4904 struct netdev_notifier_changeupper_info *info)
4906 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
4913 case NETDEV_PRECHANGEUPPER:
4915 case NETDEV_CHANGEUPPER:
4917 err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev);
4919 mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
4926 static struct mlxsw_sp_rif_subport *
4927 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
4929 return container_of(rif, struct mlxsw_sp_rif_subport, common);
4932 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
4933 const struct mlxsw_sp_rif_params *params)
4935 struct mlxsw_sp_rif_subport *rif_subport;
4937 rif_subport = mlxsw_sp_rif_subport_rif(rif);
4938 rif_subport->vid = params->vid;
4939 rif_subport->lag = params->lag;
4941 rif_subport->lag_id = params->lag_id;
4943 rif_subport->system_port = params->system_port;
4946 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
4948 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
4949 struct mlxsw_sp_rif_subport *rif_subport;
4950 char ritr_pl[MLXSW_REG_RITR_LEN];
4952 rif_subport = mlxsw_sp_rif_subport_rif(rif);
4953 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
4954 rif->rif_index, rif->vr_id, rif->dev->mtu);
4955 mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
4956 mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
4957 rif_subport->lag ? rif_subport->lag_id :
4958 rif_subport->system_port,
4961 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
4964 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
4966 return mlxsw_sp_rif_subport_op(rif, true);
4969 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
4971 mlxsw_sp_rif_subport_op(rif, false);
4974 static struct mlxsw_sp_fid *
4975 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif)
4977 return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
4980 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
4981 .type = MLXSW_SP_RIF_TYPE_SUBPORT,
4982 .rif_size = sizeof(struct mlxsw_sp_rif_subport),
4983 .setup = mlxsw_sp_rif_subport_setup,
4984 .configure = mlxsw_sp_rif_subport_configure,
4985 .deconfigure = mlxsw_sp_rif_subport_deconfigure,
4986 .fid_get = mlxsw_sp_rif_subport_fid_get,
4989 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
4990 enum mlxsw_reg_ritr_if_type type,
4991 u16 vid_fid, bool enable)
4993 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
4994 char ritr_pl[MLXSW_REG_RITR_LEN];
4996 mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
4998 mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
4999 mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
5001 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5004 static u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
5006 return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
5009 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
5011 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
5012 u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
5015 err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
5019 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
5020 mlxsw_sp_router_port(mlxsw_sp), true);
5022 goto err_fid_mc_flood_set;
5024 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
5025 mlxsw_sp_router_port(mlxsw_sp), true);
5027 goto err_fid_bc_flood_set;
5031 err_fid_bc_flood_set:
5032 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
5033 mlxsw_sp_router_port(mlxsw_sp), false);
5034 err_fid_mc_flood_set:
5035 mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
5039 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
5041 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
5042 u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
5044 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
5045 mlxsw_sp_router_port(mlxsw_sp), false);
5046 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
5047 mlxsw_sp_router_port(mlxsw_sp), false);
5048 mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
5051 static struct mlxsw_sp_fid *
5052 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif)
5054 u16 vid = is_vlan_dev(rif->dev) ? vlan_dev_vlan_id(rif->dev) : 1;
5056 return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
5059 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
5060 .type = MLXSW_SP_RIF_TYPE_VLAN,
5061 .rif_size = sizeof(struct mlxsw_sp_rif),
5062 .configure = mlxsw_sp_rif_vlan_configure,
5063 .deconfigure = mlxsw_sp_rif_vlan_deconfigure,
5064 .fid_get = mlxsw_sp_rif_vlan_fid_get,
5067 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
5069 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
5070 u16 fid_index = mlxsw_sp_fid_index(rif->fid);
5073 err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
5078 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
5079 mlxsw_sp_router_port(mlxsw_sp), true);
5081 goto err_fid_mc_flood_set;
5083 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
5084 mlxsw_sp_router_port(mlxsw_sp), true);
5086 goto err_fid_bc_flood_set;
5090 err_fid_bc_flood_set:
5091 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
5092 mlxsw_sp_router_port(mlxsw_sp), false);
5093 err_fid_mc_flood_set:
5094 mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
5098 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
5100 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
5101 u16 fid_index = mlxsw_sp_fid_index(rif->fid);
5103 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
5104 mlxsw_sp_router_port(mlxsw_sp), false);
5105 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
5106 mlxsw_sp_router_port(mlxsw_sp), false);
5107 mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
5110 static struct mlxsw_sp_fid *
5111 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif)
5113 return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
5116 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
5117 .type = MLXSW_SP_RIF_TYPE_FID,
5118 .rif_size = sizeof(struct mlxsw_sp_rif),
5119 .configure = mlxsw_sp_rif_fid_configure,
5120 .deconfigure = mlxsw_sp_rif_fid_deconfigure,
5121 .fid_get = mlxsw_sp_rif_fid_fid_get,
5124 static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = {
5125 [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops,
5126 [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_ops,
5127 [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops,
5130 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
5132 u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
5134 mlxsw_sp->router->rifs = kcalloc(max_rifs,
5135 sizeof(struct mlxsw_sp_rif *),
5137 if (!mlxsw_sp->router->rifs)
5140 mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr;
5145 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
5149 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
5150 WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
5152 kfree(mlxsw_sp->router->rifs);
5155 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
5157 struct mlxsw_sp_router *router;
5159 /* Flush pending FIB notifications and then flush the device's
5160 * table before requesting another dump. The FIB notification
5161 * block is unregistered, so no need to take RTNL.
5163 mlxsw_core_flush_owq();
5164 router = container_of(nb, struct mlxsw_sp_router, fib_nb);
5165 mlxsw_sp_router_fib_flush(router->mlxsw_sp);
5168 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
5170 char rgcr_pl[MLXSW_REG_RGCR_LEN];
5174 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
5176 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
5178 mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
5179 mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
5180 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
5186 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
5188 char rgcr_pl[MLXSW_REG_RGCR_LEN];
5190 mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
5191 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
5194 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
5196 struct mlxsw_sp_router *router;
5199 router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
5202 mlxsw_sp->router = router;
5203 router->mlxsw_sp = mlxsw_sp;
5205 INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
5206 err = __mlxsw_sp_router_init(mlxsw_sp);
5208 goto err_router_init;
5210 err = mlxsw_sp_rifs_init(mlxsw_sp);
5214 err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
5215 &mlxsw_sp_nexthop_ht_params);
5217 goto err_nexthop_ht_init;
5219 err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
5220 &mlxsw_sp_nexthop_group_ht_params);
5222 goto err_nexthop_group_ht_init;
5224 err = mlxsw_sp_lpm_init(mlxsw_sp);
5228 err = mlxsw_sp_vrs_init(mlxsw_sp);
5232 err = mlxsw_sp_neigh_init(mlxsw_sp);
5234 goto err_neigh_init;
5236 mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
5237 err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
5238 mlxsw_sp_router_fib_dump_flush);
5240 goto err_register_fib_notifier;
5244 err_register_fib_notifier:
5245 mlxsw_sp_neigh_fini(mlxsw_sp);
5247 mlxsw_sp_vrs_fini(mlxsw_sp);
5249 mlxsw_sp_lpm_fini(mlxsw_sp);
5251 rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
5252 err_nexthop_group_ht_init:
5253 rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
5254 err_nexthop_ht_init:
5255 mlxsw_sp_rifs_fini(mlxsw_sp);
5257 __mlxsw_sp_router_fini(mlxsw_sp);
5259 kfree(mlxsw_sp->router);
5263 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
5265 unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
5266 mlxsw_sp_neigh_fini(mlxsw_sp);
5267 mlxsw_sp_vrs_fini(mlxsw_sp);
5268 mlxsw_sp_lpm_fini(mlxsw_sp);
5269 rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
5270 rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
5271 mlxsw_sp_rifs_fini(mlxsw_sp);
5272 __mlxsw_sp_router_fini(mlxsw_sp);
5273 kfree(mlxsw_sp->router);