Merge remote-tracking branch 'asoc/topic/rcar' into asoc-next
[sfrench/cifs-2.6.git] / drivers / net / ethernet / mellanox / mlxsw / spectrum_router.c
1 /*
2  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3  * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the names of the copyright holders nor the names of its
17  *    contributors may be used to endorse or promote products derived from
18  *    this software without specific prior written permission.
19  *
20  * Alternatively, this software may be distributed under the terms of the
21  * GNU General Public License ("GPL") version 2 as published by the Free
22  * Software Foundation.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36
37 #include <linux/kernel.h>
38 #include <linux/types.h>
39 #include <linux/rhashtable.h>
40 #include <linux/bitops.h>
41 #include <linux/in6.h>
42 #include <linux/notifier.h>
43 #include <linux/inetdevice.h>
44 #include <linux/netdevice.h>
45 #include <net/netevent.h>
46 #include <net/neighbour.h>
47 #include <net/arp.h>
48 #include <net/ip_fib.h>
49 #include <net/fib_rules.h>
50 #include <net/l3mdev.h>
51
52 #include "spectrum.h"
53 #include "core.h"
54 #include "reg.h"
55 #include "spectrum_cnt.h"
56 #include "spectrum_dpipe.h"
57 #include "spectrum_router.h"
58
59 struct mlxsw_sp_rif {
60         struct list_head nexthop_list;
61         struct list_head neigh_list;
62         struct net_device *dev;
63         struct mlxsw_sp_fid *f;
64         unsigned char addr[ETH_ALEN];
65         int mtu;
66         u16 rif_index;
67         u16 vr_id;
68         unsigned int counter_ingress;
69         bool counter_ingress_valid;
70         unsigned int counter_egress;
71         bool counter_egress_valid;
72 };
73
74 static unsigned int *
75 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
76                            enum mlxsw_sp_rif_counter_dir dir)
77 {
78         switch (dir) {
79         case MLXSW_SP_RIF_COUNTER_EGRESS:
80                 return &rif->counter_egress;
81         case MLXSW_SP_RIF_COUNTER_INGRESS:
82                 return &rif->counter_ingress;
83         }
84         return NULL;
85 }
86
87 static bool
88 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
89                                enum mlxsw_sp_rif_counter_dir dir)
90 {
91         switch (dir) {
92         case MLXSW_SP_RIF_COUNTER_EGRESS:
93                 return rif->counter_egress_valid;
94         case MLXSW_SP_RIF_COUNTER_INGRESS:
95                 return rif->counter_ingress_valid;
96         }
97         return false;
98 }
99
100 static void
101 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
102                                enum mlxsw_sp_rif_counter_dir dir,
103                                bool valid)
104 {
105         switch (dir) {
106         case MLXSW_SP_RIF_COUNTER_EGRESS:
107                 rif->counter_egress_valid = valid;
108                 break;
109         case MLXSW_SP_RIF_COUNTER_INGRESS:
110                 rif->counter_ingress_valid = valid;
111                 break;
112         }
113 }
114
115 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
116                                      unsigned int counter_index, bool enable,
117                                      enum mlxsw_sp_rif_counter_dir dir)
118 {
119         char ritr_pl[MLXSW_REG_RITR_LEN];
120         bool is_egress = false;
121         int err;
122
123         if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
124                 is_egress = true;
125         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
126         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
127         if (err)
128                 return err;
129
130         mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
131                                     is_egress);
132         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
133 }
134
135 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
136                                    struct mlxsw_sp_rif *rif,
137                                    enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
138 {
139         char ricnt_pl[MLXSW_REG_RICNT_LEN];
140         unsigned int *p_counter_index;
141         bool valid;
142         int err;
143
144         valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
145         if (!valid)
146                 return -EINVAL;
147
148         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
149         if (!p_counter_index)
150                 return -EINVAL;
151         mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
152                              MLXSW_REG_RICNT_OPCODE_NOP);
153         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
154         if (err)
155                 return err;
156         *cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
157         return 0;
158 }
159
160 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
161                                       unsigned int counter_index)
162 {
163         char ricnt_pl[MLXSW_REG_RICNT_LEN];
164
165         mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
166                              MLXSW_REG_RICNT_OPCODE_CLEAR);
167         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
168 }
169
170 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
171                                struct mlxsw_sp_rif *rif,
172                                enum mlxsw_sp_rif_counter_dir dir)
173 {
174         unsigned int *p_counter_index;
175         int err;
176
177         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
178         if (!p_counter_index)
179                 return -EINVAL;
180         err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
181                                      p_counter_index);
182         if (err)
183                 return err;
184
185         err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
186         if (err)
187                 goto err_counter_clear;
188
189         err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
190                                         *p_counter_index, true, dir);
191         if (err)
192                 goto err_counter_edit;
193         mlxsw_sp_rif_counter_valid_set(rif, dir, true);
194         return 0;
195
196 err_counter_edit:
197 err_counter_clear:
198         mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
199                               *p_counter_index);
200         return err;
201 }
202
203 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
204                                struct mlxsw_sp_rif *rif,
205                                enum mlxsw_sp_rif_counter_dir dir)
206 {
207         unsigned int *p_counter_index;
208
209         if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
210                 return;
211
212         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
213         if (WARN_ON(!p_counter_index))
214                 return;
215         mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
216                                   *p_counter_index, false, dir);
217         mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
218                               *p_counter_index);
219         mlxsw_sp_rif_counter_valid_set(rif, dir, false);
220 }
221
222 static struct mlxsw_sp_rif *
223 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
224                          const struct net_device *dev);
225
226 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
227         for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
228
229 static bool
230 mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1,
231                              struct mlxsw_sp_prefix_usage *prefix_usage2)
232 {
233         unsigned char prefix;
234
235         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) {
236                 if (!test_bit(prefix, prefix_usage2->b))
237                         return false;
238         }
239         return true;
240 }
241
242 static bool
243 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
244                          struct mlxsw_sp_prefix_usage *prefix_usage2)
245 {
246         return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
247 }
248
249 static bool
250 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
251 {
252         struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
253
254         return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none);
255 }
256
257 static void
258 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
259                           struct mlxsw_sp_prefix_usage *prefix_usage2)
260 {
261         memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
262 }
263
264 static void
265 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
266                           unsigned char prefix_len)
267 {
268         set_bit(prefix_len, prefix_usage->b);
269 }
270
271 static void
272 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
273                             unsigned char prefix_len)
274 {
275         clear_bit(prefix_len, prefix_usage->b);
276 }
277
278 struct mlxsw_sp_fib_key {
279         unsigned char addr[sizeof(struct in6_addr)];
280         unsigned char prefix_len;
281 };
282
283 enum mlxsw_sp_fib_entry_type {
284         MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
285         MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
286         MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
287 };
288
289 struct mlxsw_sp_nexthop_group;
290
291 struct mlxsw_sp_fib_node {
292         struct list_head entry_list;
293         struct list_head list;
294         struct rhash_head ht_node;
295         struct mlxsw_sp_fib *fib;
296         struct mlxsw_sp_fib_key key;
297 };
298
299 struct mlxsw_sp_fib_entry_params {
300         u32 tb_id;
301         u32 prio;
302         u8 tos;
303         u8 type;
304 };
305
306 struct mlxsw_sp_fib_entry {
307         struct list_head list;
308         struct mlxsw_sp_fib_node *fib_node;
309         enum mlxsw_sp_fib_entry_type type;
310         struct list_head nexthop_group_node;
311         struct mlxsw_sp_nexthop_group *nh_group;
312         struct mlxsw_sp_fib_entry_params params;
313         bool offloaded;
314 };
315
316 struct mlxsw_sp_fib {
317         struct rhashtable ht;
318         struct list_head node_list;
319         struct mlxsw_sp_vr *vr;
320         struct mlxsw_sp_lpm_tree *lpm_tree;
321         unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
322         struct mlxsw_sp_prefix_usage prefix_usage;
323         enum mlxsw_sp_l3proto proto;
324 };
325
326 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
327
328 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp_vr *vr,
329                                                 enum mlxsw_sp_l3proto proto)
330 {
331         struct mlxsw_sp_fib *fib;
332         int err;
333
334         fib = kzalloc(sizeof(*fib), GFP_KERNEL);
335         if (!fib)
336                 return ERR_PTR(-ENOMEM);
337         err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
338         if (err)
339                 goto err_rhashtable_init;
340         INIT_LIST_HEAD(&fib->node_list);
341         fib->proto = proto;
342         fib->vr = vr;
343         return fib;
344
345 err_rhashtable_init:
346         kfree(fib);
347         return ERR_PTR(err);
348 }
349
350 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
351 {
352         WARN_ON(!list_empty(&fib->node_list));
353         WARN_ON(fib->lpm_tree);
354         rhashtable_destroy(&fib->ht);
355         kfree(fib);
356 }
357
358 static struct mlxsw_sp_lpm_tree *
359 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
360 {
361         static struct mlxsw_sp_lpm_tree *lpm_tree;
362         int i;
363
364         for (i = 0; i < mlxsw_sp->router.lpm.tree_count; i++) {
365                 lpm_tree = &mlxsw_sp->router.lpm.trees[i];
366                 if (lpm_tree->ref_count == 0)
367                         return lpm_tree;
368         }
369         return NULL;
370 }
371
372 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
373                                    struct mlxsw_sp_lpm_tree *lpm_tree)
374 {
375         char ralta_pl[MLXSW_REG_RALTA_LEN];
376
377         mlxsw_reg_ralta_pack(ralta_pl, true,
378                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
379                              lpm_tree->id);
380         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
381 }
382
383 static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
384                                   struct mlxsw_sp_lpm_tree *lpm_tree)
385 {
386         char ralta_pl[MLXSW_REG_RALTA_LEN];
387
388         mlxsw_reg_ralta_pack(ralta_pl, false,
389                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
390                              lpm_tree->id);
391         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
392 }
393
394 static int
395 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
396                                   struct mlxsw_sp_prefix_usage *prefix_usage,
397                                   struct mlxsw_sp_lpm_tree *lpm_tree)
398 {
399         char ralst_pl[MLXSW_REG_RALST_LEN];
400         u8 root_bin = 0;
401         u8 prefix;
402         u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
403
404         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
405                 root_bin = prefix;
406
407         mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
408         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
409                 if (prefix == 0)
410                         continue;
411                 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
412                                          MLXSW_REG_RALST_BIN_NO_CHILD);
413                 last_prefix = prefix;
414         }
415         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
416 }
417
418 static struct mlxsw_sp_lpm_tree *
419 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
420                          struct mlxsw_sp_prefix_usage *prefix_usage,
421                          enum mlxsw_sp_l3proto proto)
422 {
423         struct mlxsw_sp_lpm_tree *lpm_tree;
424         int err;
425
426         lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
427         if (!lpm_tree)
428                 return ERR_PTR(-EBUSY);
429         lpm_tree->proto = proto;
430         err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
431         if (err)
432                 return ERR_PTR(err);
433
434         err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
435                                                 lpm_tree);
436         if (err)
437                 goto err_left_struct_set;
438         memcpy(&lpm_tree->prefix_usage, prefix_usage,
439                sizeof(lpm_tree->prefix_usage));
440         return lpm_tree;
441
442 err_left_struct_set:
443         mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
444         return ERR_PTR(err);
445 }
446
447 static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
448                                      struct mlxsw_sp_lpm_tree *lpm_tree)
449 {
450         return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
451 }
452
453 static struct mlxsw_sp_lpm_tree *
454 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
455                       struct mlxsw_sp_prefix_usage *prefix_usage,
456                       enum mlxsw_sp_l3proto proto)
457 {
458         struct mlxsw_sp_lpm_tree *lpm_tree;
459         int i;
460
461         for (i = 0; i < mlxsw_sp->router.lpm.tree_count; i++) {
462                 lpm_tree = &mlxsw_sp->router.lpm.trees[i];
463                 if (lpm_tree->ref_count != 0 &&
464                     lpm_tree->proto == proto &&
465                     mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
466                                              prefix_usage))
467                         goto inc_ref_count;
468         }
469         lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage,
470                                             proto);
471         if (IS_ERR(lpm_tree))
472                 return lpm_tree;
473
474 inc_ref_count:
475         lpm_tree->ref_count++;
476         return lpm_tree;
477 }
478
479 static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
480                                  struct mlxsw_sp_lpm_tree *lpm_tree)
481 {
482         if (--lpm_tree->ref_count == 0)
483                 return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
484         return 0;
485 }
486
487 #define MLXSW_SP_LPM_TREE_MIN 2 /* trees 0 and 1 are reserved */
488
489 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
490 {
491         struct mlxsw_sp_lpm_tree *lpm_tree;
492         u64 max_trees;
493         int i;
494
495         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
496                 return -EIO;
497
498         max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
499         mlxsw_sp->router.lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
500         mlxsw_sp->router.lpm.trees = kcalloc(mlxsw_sp->router.lpm.tree_count,
501                                              sizeof(struct mlxsw_sp_lpm_tree),
502                                              GFP_KERNEL);
503         if (!mlxsw_sp->router.lpm.trees)
504                 return -ENOMEM;
505
506         for (i = 0; i < mlxsw_sp->router.lpm.tree_count; i++) {
507                 lpm_tree = &mlxsw_sp->router.lpm.trees[i];
508                 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
509         }
510
511         return 0;
512 }
513
514 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
515 {
516         kfree(mlxsw_sp->router.lpm.trees);
517 }
518
519 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
520 {
521         return !!vr->fib4;
522 }
523
524 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
525 {
526         struct mlxsw_sp_vr *vr;
527         int i;
528
529         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
530                 vr = &mlxsw_sp->router.vrs[i];
531                 if (!mlxsw_sp_vr_is_used(vr))
532                         return vr;
533         }
534         return NULL;
535 }
536
537 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
538                                      const struct mlxsw_sp_fib *fib)
539 {
540         char raltb_pl[MLXSW_REG_RALTB_LEN];
541
542         mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
543                              (enum mlxsw_reg_ralxx_protocol) fib->proto,
544                              fib->lpm_tree->id);
545         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
546 }
547
548 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
549                                        const struct mlxsw_sp_fib *fib)
550 {
551         char raltb_pl[MLXSW_REG_RALTB_LEN];
552
553         /* Bind to tree 0 which is default */
554         mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
555                              (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
556         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
557 }
558
559 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
560 {
561         /* For our purpose, squash main and local table into one */
562         if (tb_id == RT_TABLE_LOCAL)
563                 tb_id = RT_TABLE_MAIN;
564         return tb_id;
565 }
566
567 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
568                                             u32 tb_id)
569 {
570         struct mlxsw_sp_vr *vr;
571         int i;
572
573         tb_id = mlxsw_sp_fix_tb_id(tb_id);
574
575         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
576                 vr = &mlxsw_sp->router.vrs[i];
577                 if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
578                         return vr;
579         }
580         return NULL;
581 }
582
583 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
584                                             enum mlxsw_sp_l3proto proto)
585 {
586         switch (proto) {
587         case MLXSW_SP_L3_PROTO_IPV4:
588                 return vr->fib4;
589         case MLXSW_SP_L3_PROTO_IPV6:
590                 BUG_ON(1);
591         }
592         return NULL;
593 }
594
595 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
596                                               u32 tb_id)
597 {
598         struct mlxsw_sp_vr *vr;
599
600         vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
601         if (!vr)
602                 return ERR_PTR(-EBUSY);
603         vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4);
604         if (IS_ERR(vr->fib4))
605                 return ERR_CAST(vr->fib4);
606         vr->tb_id = tb_id;
607         return vr;
608 }
609
610 static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr)
611 {
612         mlxsw_sp_fib_destroy(vr->fib4);
613         vr->fib4 = NULL;
614 }
615
616 static int
617 mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib *fib,
618                            struct mlxsw_sp_prefix_usage *req_prefix_usage)
619 {
620         struct mlxsw_sp_lpm_tree *lpm_tree = fib->lpm_tree;
621         struct mlxsw_sp_lpm_tree *new_tree;
622         int err;
623
624         if (mlxsw_sp_prefix_usage_eq(req_prefix_usage, &lpm_tree->prefix_usage))
625                 return 0;
626
627         new_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage,
628                                          fib->proto);
629         if (IS_ERR(new_tree)) {
630                 /* We failed to get a tree according to the required
631                  * prefix usage. However, the current tree might be still good
632                  * for us if our requirement is subset of the prefixes used
633                  * in the tree.
634                  */
635                 if (mlxsw_sp_prefix_usage_subset(req_prefix_usage,
636                                                  &lpm_tree->prefix_usage))
637                         return 0;
638                 return PTR_ERR(new_tree);
639         }
640
641         /* Prevent packet loss by overwriting existing binding */
642         fib->lpm_tree = new_tree;
643         err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib);
644         if (err)
645                 goto err_tree_bind;
646         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
647
648         return 0;
649
650 err_tree_bind:
651         fib->lpm_tree = lpm_tree;
652         mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
653         return err;
654 }
655
656 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id)
657 {
658         struct mlxsw_sp_vr *vr;
659
660         tb_id = mlxsw_sp_fix_tb_id(tb_id);
661         vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
662         if (!vr)
663                 vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id);
664         return vr;
665 }
666
667 static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr)
668 {
669         if (!vr->rif_count && list_empty(&vr->fib4->node_list))
670                 mlxsw_sp_vr_destroy(vr);
671 }
672
673 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
674 {
675         struct mlxsw_sp_vr *vr;
676         u64 max_vrs;
677         int i;
678
679         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
680                 return -EIO;
681
682         max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
683         mlxsw_sp->router.vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
684                                        GFP_KERNEL);
685         if (!mlxsw_sp->router.vrs)
686                 return -ENOMEM;
687
688         for (i = 0; i < max_vrs; i++) {
689                 vr = &mlxsw_sp->router.vrs[i];
690                 vr->id = i;
691         }
692
693         return 0;
694 }
695
696 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
697
698 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
699 {
700         /* At this stage we're guaranteed not to have new incoming
701          * FIB notifications and the work queue is free from FIBs
702          * sitting on top of mlxsw netdevs. However, we can still
703          * have other FIBs queued. Flush the queue before flushing
704          * the device's tables. No need for locks, as we're the only
705          * writer.
706          */
707         mlxsw_core_flush_owq();
708         mlxsw_sp_router_fib_flush(mlxsw_sp);
709         kfree(mlxsw_sp->router.vrs);
710 }
711
712 struct mlxsw_sp_neigh_key {
713         struct neighbour *n;
714 };
715
716 struct mlxsw_sp_neigh_entry {
717         struct list_head rif_list_node;
718         struct rhash_head ht_node;
719         struct mlxsw_sp_neigh_key key;
720         u16 rif;
721         bool connected;
722         unsigned char ha[ETH_ALEN];
723         struct list_head nexthop_list; /* list of nexthops using
724                                         * this neigh entry
725                                         */
726         struct list_head nexthop_neighs_list_node;
727 };
728
729 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
730         .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
731         .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
732         .key_len = sizeof(struct mlxsw_sp_neigh_key),
733 };
734
735 static struct mlxsw_sp_neigh_entry *
736 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
737                            u16 rif)
738 {
739         struct mlxsw_sp_neigh_entry *neigh_entry;
740
741         neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
742         if (!neigh_entry)
743                 return NULL;
744
745         neigh_entry->key.n = n;
746         neigh_entry->rif = rif;
747         INIT_LIST_HEAD(&neigh_entry->nexthop_list);
748
749         return neigh_entry;
750 }
751
752 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
753 {
754         kfree(neigh_entry);
755 }
756
757 static int
758 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
759                             struct mlxsw_sp_neigh_entry *neigh_entry)
760 {
761         return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht,
762                                       &neigh_entry->ht_node,
763                                       mlxsw_sp_neigh_ht_params);
764 }
765
766 static void
767 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
768                             struct mlxsw_sp_neigh_entry *neigh_entry)
769 {
770         rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht,
771                                &neigh_entry->ht_node,
772                                mlxsw_sp_neigh_ht_params);
773 }
774
775 static struct mlxsw_sp_neigh_entry *
776 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
777 {
778         struct mlxsw_sp_neigh_entry *neigh_entry;
779         struct mlxsw_sp_rif *rif;
780         int err;
781
782         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
783         if (!rif)
784                 return ERR_PTR(-EINVAL);
785
786         neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
787         if (!neigh_entry)
788                 return ERR_PTR(-ENOMEM);
789
790         err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
791         if (err)
792                 goto err_neigh_entry_insert;
793
794         list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
795
796         return neigh_entry;
797
798 err_neigh_entry_insert:
799         mlxsw_sp_neigh_entry_free(neigh_entry);
800         return ERR_PTR(err);
801 }
802
803 static void
804 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
805                              struct mlxsw_sp_neigh_entry *neigh_entry)
806 {
807         list_del(&neigh_entry->rif_list_node);
808         mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
809         mlxsw_sp_neigh_entry_free(neigh_entry);
810 }
811
812 static struct mlxsw_sp_neigh_entry *
813 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
814 {
815         struct mlxsw_sp_neigh_key key;
816
817         key.n = n;
818         return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht,
819                                       &key, mlxsw_sp_neigh_ht_params);
820 }
821
822 static void
823 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
824 {
825         unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
826
827         mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval);
828 }
829
830 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
831                                                    char *rauhtd_pl,
832                                                    int ent_index)
833 {
834         struct net_device *dev;
835         struct neighbour *n;
836         __be32 dipn;
837         u32 dip;
838         u16 rif;
839
840         mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
841
842         if (!mlxsw_sp->rifs[rif]) {
843                 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
844                 return;
845         }
846
847         dipn = htonl(dip);
848         dev = mlxsw_sp->rifs[rif]->dev;
849         n = neigh_lookup(&arp_tbl, &dipn, dev);
850         if (!n) {
851                 netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
852                            &dip);
853                 return;
854         }
855
856         netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
857         neigh_event_send(n, NULL);
858         neigh_release(n);
859 }
860
861 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
862                                                    char *rauhtd_pl,
863                                                    int rec_index)
864 {
865         u8 num_entries;
866         int i;
867
868         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
869                                                                 rec_index);
870         /* Hardware starts counting at 0, so add 1. */
871         num_entries++;
872
873         /* Each record consists of several neighbour entries. */
874         for (i = 0; i < num_entries; i++) {
875                 int ent_index;
876
877                 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
878                 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
879                                                        ent_index);
880         }
881
882 }
883
884 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
885                                               char *rauhtd_pl, int rec_index)
886 {
887         switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
888         case MLXSW_REG_RAUHTD_TYPE_IPV4:
889                 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
890                                                        rec_index);
891                 break;
892         case MLXSW_REG_RAUHTD_TYPE_IPV6:
893                 WARN_ON_ONCE(1);
894                 break;
895         }
896 }
897
898 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
899 {
900         u8 num_rec, last_rec_index, num_entries;
901
902         num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
903         last_rec_index = num_rec - 1;
904
905         if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
906                 return false;
907         if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
908             MLXSW_REG_RAUHTD_TYPE_IPV6)
909                 return true;
910
911         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
912                                                                 last_rec_index);
913         if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
914                 return true;
915         return false;
916 }
917
918 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
919 {
920         char *rauhtd_pl;
921         u8 num_rec;
922         int i, err;
923
924         rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
925         if (!rauhtd_pl)
926                 return -ENOMEM;
927
928         /* Make sure the neighbour's netdev isn't removed in the
929          * process.
930          */
931         rtnl_lock();
932         do {
933                 mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4);
934                 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
935                                       rauhtd_pl);
936                 if (err) {
937                         dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n");
938                         break;
939                 }
940                 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
941                 for (i = 0; i < num_rec; i++)
942                         mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
943                                                           i);
944         } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
945         rtnl_unlock();
946
947         kfree(rauhtd_pl);
948         return err;
949 }
950
951 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
952 {
953         struct mlxsw_sp_neigh_entry *neigh_entry;
954
955         /* Take RTNL mutex here to prevent lists from changes */
956         rtnl_lock();
957         list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
958                             nexthop_neighs_list_node)
959                 /* If this neigh have nexthops, make the kernel think this neigh
960                  * is active regardless of the traffic.
961                  */
962                 neigh_event_send(neigh_entry->key.n, NULL);
963         rtnl_unlock();
964 }
965
966 static void
967 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
968 {
969         unsigned long interval = mlxsw_sp->router.neighs_update.interval;
970
971         mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw,
972                                msecs_to_jiffies(interval));
973 }
974
975 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
976 {
977         struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
978                                                  router.neighs_update.dw.work);
979         int err;
980
981         err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp);
982         if (err)
983                 dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
984
985         mlxsw_sp_router_neighs_update_nh(mlxsw_sp);
986
987         mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp);
988 }
989
990 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
991 {
992         struct mlxsw_sp_neigh_entry *neigh_entry;
993         struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
994                                                  router.nexthop_probe_dw.work);
995
996         /* Iterate over nexthop neighbours, find those who are unresolved and
997          * send arp on them. This solves the chicken-egg problem when
998          * the nexthop wouldn't get offloaded until the neighbor is resolved
999          * but it wouldn't get resolved ever in case traffic is flowing in HW
1000          * using different nexthop.
1001          *
1002          * Take RTNL mutex here to prevent lists from changes.
1003          */
1004         rtnl_lock();
1005         list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
1006                             nexthop_neighs_list_node)
1007                 if (!neigh_entry->connected)
1008                         neigh_event_send(neigh_entry->key.n, NULL);
1009         rtnl_unlock();
1010
1011         mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw,
1012                                MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
1013 }
1014
1015 static void
1016 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
1017                               struct mlxsw_sp_neigh_entry *neigh_entry,
1018                               bool removing);
1019
1020 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
1021 {
1022         return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
1023                         MLXSW_REG_RAUHT_OP_WRITE_DELETE;
1024 }
1025
1026 static void
1027 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
1028                                 struct mlxsw_sp_neigh_entry *neigh_entry,
1029                                 enum mlxsw_reg_rauht_op op)
1030 {
1031         struct neighbour *n = neigh_entry->key.n;
1032         u32 dip = ntohl(*((__be32 *) n->primary_key));
1033         char rauht_pl[MLXSW_REG_RAUHT_LEN];
1034
1035         mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
1036                               dip);
1037         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
1038 }
1039
1040 static void
1041 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
1042                             struct mlxsw_sp_neigh_entry *neigh_entry,
1043                             bool adding)
1044 {
1045         if (!adding && !neigh_entry->connected)
1046                 return;
1047         neigh_entry->connected = adding;
1048         if (neigh_entry->key.n->tbl == &arp_tbl)
1049                 mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
1050                                                 mlxsw_sp_rauht_op(adding));
1051         else
1052                 WARN_ON_ONCE(1);
1053 }
1054
1055 struct mlxsw_sp_neigh_event_work {
1056         struct work_struct work;
1057         struct mlxsw_sp *mlxsw_sp;
1058         struct neighbour *n;
1059 };
1060
1061 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
1062 {
1063         struct mlxsw_sp_neigh_event_work *neigh_work =
1064                 container_of(work, struct mlxsw_sp_neigh_event_work, work);
1065         struct mlxsw_sp *mlxsw_sp = neigh_work->mlxsw_sp;
1066         struct mlxsw_sp_neigh_entry *neigh_entry;
1067         struct neighbour *n = neigh_work->n;
1068         unsigned char ha[ETH_ALEN];
1069         bool entry_connected;
1070         u8 nud_state, dead;
1071
1072         /* If these parameters are changed after we release the lock,
1073          * then we are guaranteed to receive another event letting us
1074          * know about it.
1075          */
1076         read_lock_bh(&n->lock);
1077         memcpy(ha, n->ha, ETH_ALEN);
1078         nud_state = n->nud_state;
1079         dead = n->dead;
1080         read_unlock_bh(&n->lock);
1081
1082         rtnl_lock();
1083         entry_connected = nud_state & NUD_VALID && !dead;
1084         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
1085         if (!entry_connected && !neigh_entry)
1086                 goto out;
1087         if (!neigh_entry) {
1088                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
1089                 if (IS_ERR(neigh_entry))
1090                         goto out;
1091         }
1092
1093         memcpy(neigh_entry->ha, ha, ETH_ALEN);
1094         mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
1095         mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
1096
1097         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
1098                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1099
1100 out:
1101         rtnl_unlock();
1102         neigh_release(n);
1103         kfree(neigh_work);
1104 }
1105
1106 int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
1107                                    unsigned long event, void *ptr)
1108 {
1109         struct mlxsw_sp_neigh_event_work *neigh_work;
1110         struct mlxsw_sp_port *mlxsw_sp_port;
1111         struct mlxsw_sp *mlxsw_sp;
1112         unsigned long interval;
1113         struct neigh_parms *p;
1114         struct neighbour *n;
1115
1116         switch (event) {
1117         case NETEVENT_DELAY_PROBE_TIME_UPDATE:
1118                 p = ptr;
1119
1120                 /* We don't care about changes in the default table. */
1121                 if (!p->dev || p->tbl != &arp_tbl)
1122                         return NOTIFY_DONE;
1123
1124                 /* We are in atomic context and can't take RTNL mutex,
1125                  * so use RCU variant to walk the device chain.
1126                  */
1127                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
1128                 if (!mlxsw_sp_port)
1129                         return NOTIFY_DONE;
1130
1131                 mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
1132                 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
1133                 mlxsw_sp->router.neighs_update.interval = interval;
1134
1135                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
1136                 break;
1137         case NETEVENT_NEIGH_UPDATE:
1138                 n = ptr;
1139
1140                 if (n->tbl != &arp_tbl)
1141                         return NOTIFY_DONE;
1142
1143                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
1144                 if (!mlxsw_sp_port)
1145                         return NOTIFY_DONE;
1146
1147                 neigh_work = kzalloc(sizeof(*neigh_work), GFP_ATOMIC);
1148                 if (!neigh_work) {
1149                         mlxsw_sp_port_dev_put(mlxsw_sp_port);
1150                         return NOTIFY_BAD;
1151                 }
1152
1153                 INIT_WORK(&neigh_work->work, mlxsw_sp_router_neigh_event_work);
1154                 neigh_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
1155                 neigh_work->n = n;
1156
1157                 /* Take a reference to ensure the neighbour won't be
1158                  * destructed until we drop the reference in delayed
1159                  * work.
1160                  */
1161                 neigh_clone(n);
1162                 mlxsw_core_schedule_work(&neigh_work->work);
1163                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
1164                 break;
1165         }
1166
1167         return NOTIFY_DONE;
1168 }
1169
1170 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
1171 {
1172         int err;
1173
1174         err = rhashtable_init(&mlxsw_sp->router.neigh_ht,
1175                               &mlxsw_sp_neigh_ht_params);
1176         if (err)
1177                 return err;
1178
1179         /* Initialize the polling interval according to the default
1180          * table.
1181          */
1182         mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
1183
1184         /* Create the delayed works for the activity_update */
1185         INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw,
1186                           mlxsw_sp_router_neighs_update_work);
1187         INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw,
1188                           mlxsw_sp_router_probe_unresolved_nexthops);
1189         mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0);
1190         mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0);
1191         return 0;
1192 }
1193
1194 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
1195 {
1196         cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw);
1197         cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw);
1198         rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
1199 }
1200
1201 static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp,
1202                                     const struct mlxsw_sp_rif *rif)
1203 {
1204         char rauht_pl[MLXSW_REG_RAUHT_LEN];
1205
1206         mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL,
1207                              rif->rif_index, rif->addr);
1208         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
1209 }
1210
1211 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
1212                                          struct mlxsw_sp_rif *rif)
1213 {
1214         struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
1215
1216         mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif);
1217         list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
1218                                  rif_list_node)
1219                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1220 }
1221
1222 struct mlxsw_sp_nexthop_key {
1223         struct fib_nh *fib_nh;
1224 };
1225
1226 struct mlxsw_sp_nexthop {
1227         struct list_head neigh_list_node; /* member of neigh entry list */
1228         struct list_head rif_list_node;
1229         struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
1230                                                 * this belongs to
1231                                                 */
1232         struct rhash_head ht_node;
1233         struct mlxsw_sp_nexthop_key key;
1234         struct mlxsw_sp_rif *rif;
1235         u8 should_offload:1, /* set indicates this neigh is connected and
1236                               * should be put to KVD linear area of this group.
1237                               */
1238            offloaded:1, /* set in case the neigh is actually put into
1239                          * KVD linear area of this group.
1240                          */
1241            update:1; /* set indicates that MAC of this neigh should be
1242                       * updated in HW
1243                       */
1244         struct mlxsw_sp_neigh_entry *neigh_entry;
1245 };
1246
1247 struct mlxsw_sp_nexthop_group_key {
1248         struct fib_info *fi;
1249 };
1250
1251 struct mlxsw_sp_nexthop_group {
1252         struct rhash_head ht_node;
1253         struct list_head fib_list; /* list of fib entries that use this group */
1254         struct mlxsw_sp_nexthop_group_key key;
1255         u8 adj_index_valid:1,
1256            gateway:1; /* routes using the group use a gateway */
1257         u32 adj_index;
1258         u16 ecmp_size;
1259         u16 count;
1260         struct mlxsw_sp_nexthop nexthops[0];
1261 #define nh_rif  nexthops[0].rif
1262 };
1263
1264 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
1265         .key_offset = offsetof(struct mlxsw_sp_nexthop_group, key),
1266         .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
1267         .key_len = sizeof(struct mlxsw_sp_nexthop_group_key),
1268 };
1269
1270 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
1271                                          struct mlxsw_sp_nexthop_group *nh_grp)
1272 {
1273         return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_group_ht,
1274                                       &nh_grp->ht_node,
1275                                       mlxsw_sp_nexthop_group_ht_params);
1276 }
1277
1278 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
1279                                           struct mlxsw_sp_nexthop_group *nh_grp)
1280 {
1281         rhashtable_remove_fast(&mlxsw_sp->router.nexthop_group_ht,
1282                                &nh_grp->ht_node,
1283                                mlxsw_sp_nexthop_group_ht_params);
1284 }
1285
1286 static struct mlxsw_sp_nexthop_group *
1287 mlxsw_sp_nexthop_group_lookup(struct mlxsw_sp *mlxsw_sp,
1288                               struct mlxsw_sp_nexthop_group_key key)
1289 {
1290         return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_group_ht, &key,
1291                                       mlxsw_sp_nexthop_group_ht_params);
1292 }
1293
1294 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
1295         .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
1296         .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
1297         .key_len = sizeof(struct mlxsw_sp_nexthop_key),
1298 };
1299
1300 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
1301                                    struct mlxsw_sp_nexthop *nh)
1302 {
1303         return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_ht,
1304                                       &nh->ht_node, mlxsw_sp_nexthop_ht_params);
1305 }
1306
1307 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
1308                                     struct mlxsw_sp_nexthop *nh)
1309 {
1310         rhashtable_remove_fast(&mlxsw_sp->router.nexthop_ht, &nh->ht_node,
1311                                mlxsw_sp_nexthop_ht_params);
1312 }
1313
1314 static struct mlxsw_sp_nexthop *
1315 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
1316                         struct mlxsw_sp_nexthop_key key)
1317 {
1318         return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_ht, &key,
1319                                       mlxsw_sp_nexthop_ht_params);
1320 }
1321
1322 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
1323                                              const struct mlxsw_sp_fib *fib,
1324                                              u32 adj_index, u16 ecmp_size,
1325                                              u32 new_adj_index,
1326                                              u16 new_ecmp_size)
1327 {
1328         char raleu_pl[MLXSW_REG_RALEU_LEN];
1329
1330         mlxsw_reg_raleu_pack(raleu_pl,
1331                              (enum mlxsw_reg_ralxx_protocol) fib->proto,
1332                              fib->vr->id, adj_index, ecmp_size, new_adj_index,
1333                              new_ecmp_size);
1334         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
1335 }
1336
1337 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
1338                                           struct mlxsw_sp_nexthop_group *nh_grp,
1339                                           u32 old_adj_index, u16 old_ecmp_size)
1340 {
1341         struct mlxsw_sp_fib_entry *fib_entry;
1342         struct mlxsw_sp_fib *fib = NULL;
1343         int err;
1344
1345         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1346                 if (fib == fib_entry->fib_node->fib)
1347                         continue;
1348                 fib = fib_entry->fib_node->fib;
1349                 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
1350                                                         old_adj_index,
1351                                                         old_ecmp_size,
1352                                                         nh_grp->adj_index,
1353                                                         nh_grp->ecmp_size);
1354                 if (err)
1355                         return err;
1356         }
1357         return 0;
1358 }
1359
1360 static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
1361                                        struct mlxsw_sp_nexthop *nh)
1362 {
1363         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1364         char ratr_pl[MLXSW_REG_RATR_LEN];
1365
1366         mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
1367                             true, adj_index, neigh_entry->rif);
1368         mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
1369         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
1370 }
1371
1372 static int
1373 mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
1374                                   struct mlxsw_sp_nexthop_group *nh_grp,
1375                                   bool reallocate)
1376 {
1377         u32 adj_index = nh_grp->adj_index; /* base */
1378         struct mlxsw_sp_nexthop *nh;
1379         int i;
1380         int err;
1381
1382         for (i = 0; i < nh_grp->count; i++) {
1383                 nh = &nh_grp->nexthops[i];
1384
1385                 if (!nh->should_offload) {
1386                         nh->offloaded = 0;
1387                         continue;
1388                 }
1389
1390                 if (nh->update || reallocate) {
1391                         err = mlxsw_sp_nexthop_mac_update(mlxsw_sp,
1392                                                           adj_index, nh);
1393                         if (err)
1394                                 return err;
1395                         nh->update = 0;
1396                         nh->offloaded = 1;
1397                 }
1398                 adj_index++;
1399         }
1400         return 0;
1401 }
1402
1403 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1404                                      struct mlxsw_sp_fib_entry *fib_entry);
1405
1406 static int
1407 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
1408                                     struct mlxsw_sp_nexthop_group *nh_grp)
1409 {
1410         struct mlxsw_sp_fib_entry *fib_entry;
1411         int err;
1412
1413         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1414                 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1415                 if (err)
1416                         return err;
1417         }
1418         return 0;
1419 }
1420
1421 static void
1422 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
1423                                struct mlxsw_sp_nexthop_group *nh_grp)
1424 {
1425         struct mlxsw_sp_nexthop *nh;
1426         bool offload_change = false;
1427         u32 adj_index;
1428         u16 ecmp_size = 0;
1429         bool old_adj_index_valid;
1430         u32 old_adj_index;
1431         u16 old_ecmp_size;
1432         int i;
1433         int err;
1434
1435         if (!nh_grp->gateway) {
1436                 mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1437                 return;
1438         }
1439
1440         for (i = 0; i < nh_grp->count; i++) {
1441                 nh = &nh_grp->nexthops[i];
1442
1443                 if (nh->should_offload ^ nh->offloaded) {
1444                         offload_change = true;
1445                         if (nh->should_offload)
1446                                 nh->update = 1;
1447                 }
1448                 if (nh->should_offload)
1449                         ecmp_size++;
1450         }
1451         if (!offload_change) {
1452                 /* Nothing was added or removed, so no need to reallocate. Just
1453                  * update MAC on existing adjacency indexes.
1454                  */
1455                 err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp,
1456                                                         false);
1457                 if (err) {
1458                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1459                         goto set_trap;
1460                 }
1461                 return;
1462         }
1463         if (!ecmp_size)
1464                 /* No neigh of this group is connected so we just set
1465                  * the trap and let everthing flow through kernel.
1466                  */
1467                 goto set_trap;
1468
1469         err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index);
1470         if (err) {
1471                 /* We ran out of KVD linear space, just set the
1472                  * trap and let everything flow through kernel.
1473                  */
1474                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
1475                 goto set_trap;
1476         }
1477         old_adj_index_valid = nh_grp->adj_index_valid;
1478         old_adj_index = nh_grp->adj_index;
1479         old_ecmp_size = nh_grp->ecmp_size;
1480         nh_grp->adj_index_valid = 1;
1481         nh_grp->adj_index = adj_index;
1482         nh_grp->ecmp_size = ecmp_size;
1483         err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, true);
1484         if (err) {
1485                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1486                 goto set_trap;
1487         }
1488
1489         if (!old_adj_index_valid) {
1490                 /* The trap was set for fib entries, so we have to call
1491                  * fib entry update to unset it and use adjacency index.
1492                  */
1493                 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1494                 if (err) {
1495                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
1496                         goto set_trap;
1497                 }
1498                 return;
1499         }
1500
1501         err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
1502                                              old_adj_index, old_ecmp_size);
1503         mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
1504         if (err) {
1505                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
1506                 goto set_trap;
1507         }
1508         return;
1509
1510 set_trap:
1511         old_adj_index_valid = nh_grp->adj_index_valid;
1512         nh_grp->adj_index_valid = 0;
1513         for (i = 0; i < nh_grp->count; i++) {
1514                 nh = &nh_grp->nexthops[i];
1515                 nh->offloaded = 0;
1516         }
1517         err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1518         if (err)
1519                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
1520         if (old_adj_index_valid)
1521                 mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
1522 }
1523
1524 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
1525                                             bool removing)
1526 {
1527         if (!removing && !nh->should_offload)
1528                 nh->should_offload = 1;
1529         else if (removing && nh->offloaded)
1530                 nh->should_offload = 0;
1531         nh->update = 1;
1532 }
1533
1534 static void
1535 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
1536                               struct mlxsw_sp_neigh_entry *neigh_entry,
1537                               bool removing)
1538 {
1539         struct mlxsw_sp_nexthop *nh;
1540
1541         list_for_each_entry(nh, &neigh_entry->nexthop_list,
1542                             neigh_list_node) {
1543                 __mlxsw_sp_nexthop_neigh_update(nh, removing);
1544                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1545         }
1546 }
1547
1548 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
1549                                       struct mlxsw_sp_rif *rif)
1550 {
1551         if (nh->rif)
1552                 return;
1553
1554         nh->rif = rif;
1555         list_add(&nh->rif_list_node, &rif->nexthop_list);
1556 }
1557
1558 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
1559 {
1560         if (!nh->rif)
1561                 return;
1562
1563         list_del(&nh->rif_list_node);
1564         nh->rif = NULL;
1565 }
1566
1567 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
1568                                        struct mlxsw_sp_nexthop *nh)
1569 {
1570         struct mlxsw_sp_neigh_entry *neigh_entry;
1571         struct fib_nh *fib_nh = nh->key.fib_nh;
1572         struct neighbour *n;
1573         u8 nud_state, dead;
1574         int err;
1575
1576         if (!nh->nh_grp->gateway || nh->neigh_entry)
1577                 return 0;
1578
1579         /* Take a reference of neigh here ensuring that neigh would
1580          * not be detructed before the nexthop entry is finished.
1581          * The reference is taken either in neigh_lookup() or
1582          * in neigh_create() in case n is not found.
1583          */
1584         n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev);
1585         if (!n) {
1586                 n = neigh_create(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev);
1587                 if (IS_ERR(n))
1588                         return PTR_ERR(n);
1589                 neigh_event_send(n, NULL);
1590         }
1591         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
1592         if (!neigh_entry) {
1593                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
1594                 if (IS_ERR(neigh_entry)) {
1595                         err = -EINVAL;
1596                         goto err_neigh_entry_create;
1597                 }
1598         }
1599
1600         /* If that is the first nexthop connected to that neigh, add to
1601          * nexthop_neighs_list
1602          */
1603         if (list_empty(&neigh_entry->nexthop_list))
1604                 list_add_tail(&neigh_entry->nexthop_neighs_list_node,
1605                               &mlxsw_sp->router.nexthop_neighs_list);
1606
1607         nh->neigh_entry = neigh_entry;
1608         list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
1609         read_lock_bh(&n->lock);
1610         nud_state = n->nud_state;
1611         dead = n->dead;
1612         read_unlock_bh(&n->lock);
1613         __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
1614
1615         return 0;
1616
1617 err_neigh_entry_create:
1618         neigh_release(n);
1619         return err;
1620 }
1621
1622 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
1623                                         struct mlxsw_sp_nexthop *nh)
1624 {
1625         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1626         struct neighbour *n;
1627
1628         if (!neigh_entry)
1629                 return;
1630         n = neigh_entry->key.n;
1631
1632         __mlxsw_sp_nexthop_neigh_update(nh, true);
1633         list_del(&nh->neigh_list_node);
1634         nh->neigh_entry = NULL;
1635
1636         /* If that is the last nexthop connected to that neigh, remove from
1637          * nexthop_neighs_list
1638          */
1639         if (list_empty(&neigh_entry->nexthop_list))
1640                 list_del(&neigh_entry->nexthop_neighs_list_node);
1641
1642         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
1643                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1644
1645         neigh_release(n);
1646 }
1647
1648 static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
1649                                  struct mlxsw_sp_nexthop_group *nh_grp,
1650                                  struct mlxsw_sp_nexthop *nh,
1651                                  struct fib_nh *fib_nh)
1652 {
1653         struct net_device *dev = fib_nh->nh_dev;
1654         struct in_device *in_dev;
1655         struct mlxsw_sp_rif *rif;
1656         int err;
1657
1658         nh->nh_grp = nh_grp;
1659         nh->key.fib_nh = fib_nh;
1660         err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
1661         if (err)
1662                 return err;
1663
1664         if (!dev)
1665                 return 0;
1666
1667         in_dev = __in_dev_get_rtnl(dev);
1668         if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
1669             fib_nh->nh_flags & RTNH_F_LINKDOWN)
1670                 return 0;
1671
1672         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
1673         if (!rif)
1674                 return 0;
1675         mlxsw_sp_nexthop_rif_init(nh, rif);
1676
1677         err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
1678         if (err)
1679                 goto err_nexthop_neigh_init;
1680
1681         return 0;
1682
1683 err_nexthop_neigh_init:
1684         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
1685         return err;
1686 }
1687
1688 static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp,
1689                                   struct mlxsw_sp_nexthop *nh)
1690 {
1691         mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1692         mlxsw_sp_nexthop_rif_fini(nh);
1693         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
1694 }
1695
1696 static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp,
1697                                    unsigned long event, struct fib_nh *fib_nh)
1698 {
1699         struct mlxsw_sp_nexthop_key key;
1700         struct mlxsw_sp_nexthop *nh;
1701         struct mlxsw_sp_rif *rif;
1702
1703         if (mlxsw_sp->router.aborted)
1704                 return;
1705
1706         key.fib_nh = fib_nh;
1707         nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
1708         if (WARN_ON_ONCE(!nh))
1709                 return;
1710
1711         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fib_nh->nh_dev);
1712         if (!rif)
1713                 return;
1714
1715         switch (event) {
1716         case FIB_EVENT_NH_ADD:
1717                 mlxsw_sp_nexthop_rif_init(nh, rif);
1718                 mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
1719                 break;
1720         case FIB_EVENT_NH_DEL:
1721                 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1722                 mlxsw_sp_nexthop_rif_fini(nh);
1723                 break;
1724         }
1725
1726         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1727 }
1728
1729 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
1730                                            struct mlxsw_sp_rif *rif)
1731 {
1732         struct mlxsw_sp_nexthop *nh, *tmp;
1733
1734         list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
1735                 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1736                 mlxsw_sp_nexthop_rif_fini(nh);
1737                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1738         }
1739 }
1740
1741 static struct mlxsw_sp_nexthop_group *
1742 mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
1743 {
1744         struct mlxsw_sp_nexthop_group *nh_grp;
1745         struct mlxsw_sp_nexthop *nh;
1746         struct fib_nh *fib_nh;
1747         size_t alloc_size;
1748         int i;
1749         int err;
1750
1751         alloc_size = sizeof(*nh_grp) +
1752                      fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
1753         nh_grp = kzalloc(alloc_size, GFP_KERNEL);
1754         if (!nh_grp)
1755                 return ERR_PTR(-ENOMEM);
1756         INIT_LIST_HEAD(&nh_grp->fib_list);
1757         nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK;
1758         nh_grp->count = fi->fib_nhs;
1759         nh_grp->key.fi = fi;
1760         for (i = 0; i < nh_grp->count; i++) {
1761                 nh = &nh_grp->nexthops[i];
1762                 fib_nh = &fi->fib_nh[i];
1763                 err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh);
1764                 if (err)
1765                         goto err_nexthop_init;
1766         }
1767         err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
1768         if (err)
1769                 goto err_nexthop_group_insert;
1770         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1771         return nh_grp;
1772
1773 err_nexthop_group_insert:
1774 err_nexthop_init:
1775         for (i--; i >= 0; i--) {
1776                 nh = &nh_grp->nexthops[i];
1777                 mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1778         }
1779         kfree(nh_grp);
1780         return ERR_PTR(err);
1781 }
1782
1783 static void
1784 mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
1785                                struct mlxsw_sp_nexthop_group *nh_grp)
1786 {
1787         struct mlxsw_sp_nexthop *nh;
1788         int i;
1789
1790         mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
1791         for (i = 0; i < nh_grp->count; i++) {
1792                 nh = &nh_grp->nexthops[i];
1793                 mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1794         }
1795         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1796         WARN_ON_ONCE(nh_grp->adj_index_valid);
1797         kfree(nh_grp);
1798 }
1799
1800 static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
1801                                       struct mlxsw_sp_fib_entry *fib_entry,
1802                                       struct fib_info *fi)
1803 {
1804         struct mlxsw_sp_nexthop_group_key key;
1805         struct mlxsw_sp_nexthop_group *nh_grp;
1806
1807         key.fi = fi;
1808         nh_grp = mlxsw_sp_nexthop_group_lookup(mlxsw_sp, key);
1809         if (!nh_grp) {
1810                 nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi);
1811                 if (IS_ERR(nh_grp))
1812                         return PTR_ERR(nh_grp);
1813         }
1814         list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
1815         fib_entry->nh_group = nh_grp;
1816         return 0;
1817 }
1818
1819 static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp,
1820                                        struct mlxsw_sp_fib_entry *fib_entry)
1821 {
1822         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
1823
1824         list_del(&fib_entry->nexthop_group_node);
1825         if (!list_empty(&nh_grp->fib_list))
1826                 return;
1827         mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp);
1828 }
1829
1830 static bool
1831 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
1832 {
1833         struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
1834
1835         if (fib_entry->params.tos)
1836                 return false;
1837
1838         switch (fib_entry->type) {
1839         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
1840                 return !!nh_group->adj_index_valid;
1841         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
1842                 return !!nh_group->nh_rif;
1843         default:
1844                 return false;
1845         }
1846 }
1847
1848 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
1849 {
1850         fib_entry->offloaded = true;
1851
1852         switch (fib_entry->fib_node->fib->proto) {
1853         case MLXSW_SP_L3_PROTO_IPV4:
1854                 fib_info_offload_inc(fib_entry->nh_group->key.fi);
1855                 break;
1856         case MLXSW_SP_L3_PROTO_IPV6:
1857                 WARN_ON_ONCE(1);
1858         }
1859 }
1860
1861 static void
1862 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
1863 {
1864         switch (fib_entry->fib_node->fib->proto) {
1865         case MLXSW_SP_L3_PROTO_IPV4:
1866                 fib_info_offload_dec(fib_entry->nh_group->key.fi);
1867                 break;
1868         case MLXSW_SP_L3_PROTO_IPV6:
1869                 WARN_ON_ONCE(1);
1870         }
1871
1872         fib_entry->offloaded = false;
1873 }
1874
1875 static void
1876 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
1877                                    enum mlxsw_reg_ralue_op op, int err)
1878 {
1879         switch (op) {
1880         case MLXSW_REG_RALUE_OP_WRITE_DELETE:
1881                 if (!fib_entry->offloaded)
1882                         return;
1883                 return mlxsw_sp_fib_entry_offload_unset(fib_entry);
1884         case MLXSW_REG_RALUE_OP_WRITE_WRITE:
1885                 if (err)
1886                         return;
1887                 if (mlxsw_sp_fib_entry_should_offload(fib_entry) &&
1888                     !fib_entry->offloaded)
1889                         mlxsw_sp_fib_entry_offload_set(fib_entry);
1890                 else if (!mlxsw_sp_fib_entry_should_offload(fib_entry) &&
1891                          fib_entry->offloaded)
1892                         mlxsw_sp_fib_entry_offload_unset(fib_entry);
1893                 return;
1894         default:
1895                 return;
1896         }
1897 }
1898
1899 static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
1900                                          struct mlxsw_sp_fib_entry *fib_entry,
1901                                          enum mlxsw_reg_ralue_op op)
1902 {
1903         char ralue_pl[MLXSW_REG_RALUE_LEN];
1904         struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
1905         u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1906         enum mlxsw_reg_ralue_trap_action trap_action;
1907         u16 trap_id = 0;
1908         u32 adjacency_index = 0;
1909         u16 ecmp_size = 0;
1910
1911         /* In case the nexthop group adjacency index is valid, use it
1912          * with provided ECMP size. Otherwise, setup trap and pass
1913          * traffic to kernel.
1914          */
1915         if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
1916                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
1917                 adjacency_index = fib_entry->nh_group->adj_index;
1918                 ecmp_size = fib_entry->nh_group->ecmp_size;
1919         } else {
1920                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
1921                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
1922         }
1923
1924         mlxsw_reg_ralue_pack4(ralue_pl,
1925                               (enum mlxsw_reg_ralxx_protocol) fib->proto, op,
1926                               fib->vr->id, fib_entry->fib_node->key.prefix_len,
1927                               *p_dip);
1928         mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
1929                                         adjacency_index, ecmp_size);
1930         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1931 }
1932
1933 static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
1934                                         struct mlxsw_sp_fib_entry *fib_entry,
1935                                         enum mlxsw_reg_ralue_op op)
1936 {
1937         struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
1938         struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
1939         enum mlxsw_reg_ralue_trap_action trap_action;
1940         char ralue_pl[MLXSW_REG_RALUE_LEN];
1941         u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1942         u16 trap_id = 0;
1943         u16 rif_index = 0;
1944
1945         if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
1946                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
1947                 rif_index = rif->rif_index;
1948         } else {
1949                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
1950                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
1951         }
1952
1953         mlxsw_reg_ralue_pack4(ralue_pl,
1954                               (enum mlxsw_reg_ralxx_protocol) fib->proto, op,
1955                               fib->vr->id, fib_entry->fib_node->key.prefix_len,
1956                               *p_dip);
1957         mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
1958                                        rif_index);
1959         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1960 }
1961
1962 static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp,
1963                                        struct mlxsw_sp_fib_entry *fib_entry,
1964                                        enum mlxsw_reg_ralue_op op)
1965 {
1966         struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
1967         char ralue_pl[MLXSW_REG_RALUE_LEN];
1968         u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1969
1970         mlxsw_reg_ralue_pack4(ralue_pl,
1971                               (enum mlxsw_reg_ralxx_protocol) fib->proto, op,
1972                               fib->vr->id, fib_entry->fib_node->key.prefix_len,
1973                               *p_dip);
1974         mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
1975         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1976 }
1977
1978 static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp,
1979                                   struct mlxsw_sp_fib_entry *fib_entry,
1980                                   enum mlxsw_reg_ralue_op op)
1981 {
1982         switch (fib_entry->type) {
1983         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
1984                 return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op);
1985         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
1986                 return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op);
1987         case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
1988                 return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op);
1989         }
1990         return -EINVAL;
1991 }
1992
1993 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
1994                                  struct mlxsw_sp_fib_entry *fib_entry,
1995                                  enum mlxsw_reg_ralue_op op)
1996 {
1997         int err = -EINVAL;
1998
1999         switch (fib_entry->fib_node->fib->proto) {
2000         case MLXSW_SP_L3_PROTO_IPV4:
2001                 err = mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op);
2002                 break;
2003         case MLXSW_SP_L3_PROTO_IPV6:
2004                 return err;
2005         }
2006         mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
2007         return err;
2008 }
2009
2010 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
2011                                      struct mlxsw_sp_fib_entry *fib_entry)
2012 {
2013         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
2014                                      MLXSW_REG_RALUE_OP_WRITE_WRITE);
2015 }
2016
2017 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
2018                                   struct mlxsw_sp_fib_entry *fib_entry)
2019 {
2020         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
2021                                      MLXSW_REG_RALUE_OP_WRITE_DELETE);
2022 }
2023
2024 static int
2025 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
2026                              const struct fib_entry_notifier_info *fen_info,
2027                              struct mlxsw_sp_fib_entry *fib_entry)
2028 {
2029         struct fib_info *fi = fen_info->fi;
2030
2031         switch (fen_info->type) {
2032         case RTN_BROADCAST: /* fall through */
2033         case RTN_LOCAL:
2034                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2035                 return 0;
2036         case RTN_UNREACHABLE: /* fall through */
2037         case RTN_BLACKHOLE: /* fall through */
2038         case RTN_PROHIBIT:
2039                 /* Packets hitting these routes need to be trapped, but
2040                  * can do so with a lower priority than packets directed
2041                  * at the host, so use action type local instead of trap.
2042                  */
2043                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
2044                 return 0;
2045         case RTN_UNICAST:
2046                 if (fi->fib_nh->nh_scope != RT_SCOPE_LINK)
2047                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
2048                 else
2049                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
2050                 return 0;
2051         default:
2052                 return -EINVAL;
2053         }
2054 }
2055
2056 static struct mlxsw_sp_fib_entry *
2057 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
2058                            struct mlxsw_sp_fib_node *fib_node,
2059                            const struct fib_entry_notifier_info *fen_info)
2060 {
2061         struct mlxsw_sp_fib_entry *fib_entry;
2062         int err;
2063
2064         fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL);
2065         if (!fib_entry) {
2066                 err = -ENOMEM;
2067                 goto err_fib_entry_alloc;
2068         }
2069
2070         err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
2071         if (err)
2072                 goto err_fib4_entry_type_set;
2073
2074         err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fen_info->fi);
2075         if (err)
2076                 goto err_nexthop_group_get;
2077
2078         fib_entry->params.prio = fen_info->fi->fib_priority;
2079         fib_entry->params.tb_id = fen_info->tb_id;
2080         fib_entry->params.type = fen_info->type;
2081         fib_entry->params.tos = fen_info->tos;
2082
2083         fib_entry->fib_node = fib_node;
2084
2085         return fib_entry;
2086
2087 err_nexthop_group_get:
2088 err_fib4_entry_type_set:
2089         kfree(fib_entry);
2090 err_fib_entry_alloc:
2091         return ERR_PTR(err);
2092 }
2093
2094 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2095                                         struct mlxsw_sp_fib_entry *fib_entry)
2096 {
2097         mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry);
2098         kfree(fib_entry);
2099 }
2100
2101 static struct mlxsw_sp_fib_node *
2102 mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
2103                        const struct fib_entry_notifier_info *fen_info);
2104
2105 static struct mlxsw_sp_fib_entry *
2106 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
2107                            const struct fib_entry_notifier_info *fen_info)
2108 {
2109         struct mlxsw_sp_fib_entry *fib_entry;
2110         struct mlxsw_sp_fib_node *fib_node;
2111
2112         fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info);
2113         if (IS_ERR(fib_node))
2114                 return NULL;
2115
2116         list_for_each_entry(fib_entry, &fib_node->entry_list, list) {
2117                 if (fib_entry->params.tb_id == fen_info->tb_id &&
2118                     fib_entry->params.tos == fen_info->tos &&
2119                     fib_entry->params.type == fen_info->type &&
2120                     fib_entry->nh_group->key.fi == fen_info->fi) {
2121                         return fib_entry;
2122                 }
2123         }
2124
2125         return NULL;
2126 }
2127
2128 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
2129         .key_offset = offsetof(struct mlxsw_sp_fib_node, key),
2130         .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
2131         .key_len = sizeof(struct mlxsw_sp_fib_key),
2132         .automatic_shrinking = true,
2133 };
2134
2135 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
2136                                     struct mlxsw_sp_fib_node *fib_node)
2137 {
2138         return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
2139                                       mlxsw_sp_fib_ht_params);
2140 }
2141
2142 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
2143                                      struct mlxsw_sp_fib_node *fib_node)
2144 {
2145         rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
2146                                mlxsw_sp_fib_ht_params);
2147 }
2148
2149 static struct mlxsw_sp_fib_node *
2150 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
2151                          size_t addr_len, unsigned char prefix_len)
2152 {
2153         struct mlxsw_sp_fib_key key;
2154
2155         memset(&key, 0, sizeof(key));
2156         memcpy(key.addr, addr, addr_len);
2157         key.prefix_len = prefix_len;
2158         return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
2159 }
2160
2161 static struct mlxsw_sp_fib_node *
2162 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
2163                          size_t addr_len, unsigned char prefix_len)
2164 {
2165         struct mlxsw_sp_fib_node *fib_node;
2166
2167         fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
2168         if (!fib_node)
2169                 return NULL;
2170
2171         INIT_LIST_HEAD(&fib_node->entry_list);
2172         list_add(&fib_node->list, &fib->node_list);
2173         memcpy(fib_node->key.addr, addr, addr_len);
2174         fib_node->key.prefix_len = prefix_len;
2175
2176         return fib_node;
2177 }
2178
2179 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
2180 {
2181         list_del(&fib_node->list);
2182         WARN_ON(!list_empty(&fib_node->entry_list));
2183         kfree(fib_node);
2184 }
2185
2186 static bool
2187 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
2188                                  const struct mlxsw_sp_fib_entry *fib_entry)
2189 {
2190         return list_first_entry(&fib_node->entry_list,
2191                                 struct mlxsw_sp_fib_entry, list) == fib_entry;
2192 }
2193
2194 static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node)
2195 {
2196         unsigned char prefix_len = fib_node->key.prefix_len;
2197         struct mlxsw_sp_fib *fib = fib_node->fib;
2198
2199         if (fib->prefix_ref_count[prefix_len]++ == 0)
2200                 mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
2201 }
2202
2203 static void mlxsw_sp_fib_node_prefix_dec(struct mlxsw_sp_fib_node *fib_node)
2204 {
2205         unsigned char prefix_len = fib_node->key.prefix_len;
2206         struct mlxsw_sp_fib *fib = fib_node->fib;
2207
2208         if (--fib->prefix_ref_count[prefix_len] == 0)
2209                 mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
2210 }
2211
2212 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
2213                                   struct mlxsw_sp_fib_node *fib_node,
2214                                   struct mlxsw_sp_fib *fib)
2215 {
2216         struct mlxsw_sp_prefix_usage req_prefix_usage;
2217         struct mlxsw_sp_lpm_tree *lpm_tree;
2218         int err;
2219
2220         err = mlxsw_sp_fib_node_insert(fib, fib_node);
2221         if (err)
2222                 return err;
2223         fib_node->fib = fib;
2224
2225         mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &fib->prefix_usage);
2226         mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
2227
2228         if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) {
2229                 err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, fib,
2230                                                  &req_prefix_usage);
2231                 if (err)
2232                         goto err_tree_check;
2233         } else {
2234                 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
2235                                                  fib->proto);
2236                 if (IS_ERR(lpm_tree))
2237                         return PTR_ERR(lpm_tree);
2238                 fib->lpm_tree = lpm_tree;
2239                 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib);
2240                 if (err)
2241                         goto err_tree_bind;
2242         }
2243
2244         mlxsw_sp_fib_node_prefix_inc(fib_node);
2245
2246         return 0;
2247
2248 err_tree_bind:
2249         fib->lpm_tree = NULL;
2250         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
2251 err_tree_check:
2252         fib_node->fib = NULL;
2253         mlxsw_sp_fib_node_remove(fib, fib_node);
2254         return err;
2255 }
2256
2257 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
2258                                    struct mlxsw_sp_fib_node *fib_node)
2259 {
2260         struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
2261         struct mlxsw_sp_fib *fib = fib_node->fib;
2262
2263         mlxsw_sp_fib_node_prefix_dec(fib_node);
2264
2265         if (mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) {
2266                 mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
2267                 fib->lpm_tree = NULL;
2268                 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
2269         } else {
2270                 mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, fib, &fib->prefix_usage);
2271         }
2272
2273         fib_node->fib = NULL;
2274         mlxsw_sp_fib_node_remove(fib, fib_node);
2275 }
2276
2277 static struct mlxsw_sp_fib_node *
2278 mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
2279                        const struct fib_entry_notifier_info *fen_info)
2280 {
2281         struct mlxsw_sp_fib_node *fib_node;
2282         struct mlxsw_sp_fib *fib;
2283         struct mlxsw_sp_vr *vr;
2284         int err;
2285
2286         vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->tb_id);
2287         if (IS_ERR(vr))
2288                 return ERR_CAST(vr);
2289         fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
2290
2291         fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
2292                                             sizeof(fen_info->dst),
2293                                             fen_info->dst_len);
2294         if (fib_node)
2295                 return fib_node;
2296
2297         fib_node = mlxsw_sp_fib_node_create(fib, &fen_info->dst,
2298                                             sizeof(fen_info->dst),
2299                                             fen_info->dst_len);
2300         if (!fib_node) {
2301                 err = -ENOMEM;
2302                 goto err_fib_node_create;
2303         }
2304
2305         err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
2306         if (err)
2307                 goto err_fib_node_init;
2308
2309         return fib_node;
2310
2311 err_fib_node_init:
2312         mlxsw_sp_fib_node_destroy(fib_node);
2313 err_fib_node_create:
2314         mlxsw_sp_vr_put(vr);
2315         return ERR_PTR(err);
2316 }
2317
2318 static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp,
2319                                    struct mlxsw_sp_fib_node *fib_node)
2320 {
2321         struct mlxsw_sp_vr *vr = fib_node->fib->vr;
2322
2323         if (!list_empty(&fib_node->entry_list))
2324                 return;
2325         mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
2326         mlxsw_sp_fib_node_destroy(fib_node);
2327         mlxsw_sp_vr_put(vr);
2328 }
2329
2330 static struct mlxsw_sp_fib_entry *
2331 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
2332                               const struct mlxsw_sp_fib_entry_params *params)
2333 {
2334         struct mlxsw_sp_fib_entry *fib_entry;
2335
2336         list_for_each_entry(fib_entry, &fib_node->entry_list, list) {
2337                 if (fib_entry->params.tb_id > params->tb_id)
2338                         continue;
2339                 if (fib_entry->params.tb_id != params->tb_id)
2340                         break;
2341                 if (fib_entry->params.tos > params->tos)
2342                         continue;
2343                 if (fib_entry->params.prio >= params->prio ||
2344                     fib_entry->params.tos < params->tos)
2345                         return fib_entry;
2346         }
2347
2348         return NULL;
2349 }
2350
2351 static int mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib_entry *fib_entry,
2352                                           struct mlxsw_sp_fib_entry *new_entry)
2353 {
2354         struct mlxsw_sp_fib_node *fib_node;
2355
2356         if (WARN_ON(!fib_entry))
2357                 return -EINVAL;
2358
2359         fib_node = fib_entry->fib_node;
2360         list_for_each_entry_from(fib_entry, &fib_node->entry_list, list) {
2361                 if (fib_entry->params.tb_id != new_entry->params.tb_id ||
2362                     fib_entry->params.tos != new_entry->params.tos ||
2363                     fib_entry->params.prio != new_entry->params.prio)
2364                         break;
2365         }
2366
2367         list_add_tail(&new_entry->list, &fib_entry->list);
2368         return 0;
2369 }
2370
2371 static int
2372 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib_node *fib_node,
2373                                struct mlxsw_sp_fib_entry *new_entry,
2374                                bool replace, bool append)
2375 {
2376         struct mlxsw_sp_fib_entry *fib_entry;
2377
2378         fib_entry = mlxsw_sp_fib4_node_entry_find(fib_node, &new_entry->params);
2379
2380         if (append)
2381                 return mlxsw_sp_fib4_node_list_append(fib_entry, new_entry);
2382         if (replace && WARN_ON(!fib_entry))
2383                 return -EINVAL;
2384
2385         /* Insert new entry before replaced one, so that we can later
2386          * remove the second.
2387          */
2388         if (fib_entry) {
2389                 list_add_tail(&new_entry->list, &fib_entry->list);
2390         } else {
2391                 struct mlxsw_sp_fib_entry *last;
2392
2393                 list_for_each_entry(last, &fib_node->entry_list, list) {
2394                         if (new_entry->params.tb_id > last->params.tb_id)
2395                                 break;
2396                         fib_entry = last;
2397                 }
2398
2399                 if (fib_entry)
2400                         list_add(&new_entry->list, &fib_entry->list);
2401                 else
2402                         list_add(&new_entry->list, &fib_node->entry_list);
2403         }
2404
2405         return 0;
2406 }
2407
2408 static void
2409 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib_entry *fib_entry)
2410 {
2411         list_del(&fib_entry->list);
2412 }
2413
2414 static int
2415 mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp,
2416                              const struct mlxsw_sp_fib_node *fib_node,
2417                              struct mlxsw_sp_fib_entry *fib_entry)
2418 {
2419         if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
2420                 return 0;
2421
2422         /* To prevent packet loss, overwrite the previously offloaded
2423          * entry.
2424          */
2425         if (!list_is_singular(&fib_node->entry_list)) {
2426                 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
2427                 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
2428
2429                 mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
2430         }
2431
2432         return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2433 }
2434
2435 static void
2436 mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp,
2437                              const struct mlxsw_sp_fib_node *fib_node,
2438                              struct mlxsw_sp_fib_entry *fib_entry)
2439 {
2440         if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
2441                 return;
2442
2443         /* Promote the next entry by overwriting the deleted entry */
2444         if (!list_is_singular(&fib_node->entry_list)) {
2445                 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
2446                 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
2447
2448                 mlxsw_sp_fib_entry_update(mlxsw_sp, n);
2449                 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
2450                 return;
2451         }
2452
2453         mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
2454 }
2455
2456 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
2457                                          struct mlxsw_sp_fib_entry *fib_entry,
2458                                          bool replace, bool append)
2459 {
2460         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2461         int err;
2462
2463         err = mlxsw_sp_fib4_node_list_insert(fib_node, fib_entry, replace,
2464                                              append);
2465         if (err)
2466                 return err;
2467
2468         err = mlxsw_sp_fib4_node_entry_add(mlxsw_sp, fib_node, fib_entry);
2469         if (err)
2470                 goto err_fib4_node_entry_add;
2471
2472         return 0;
2473
2474 err_fib4_node_entry_add:
2475         mlxsw_sp_fib4_node_list_remove(fib_entry);
2476         return err;
2477 }
2478
2479 static void
2480 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
2481                                 struct mlxsw_sp_fib_entry *fib_entry)
2482 {
2483         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2484
2485         mlxsw_sp_fib4_node_entry_del(mlxsw_sp, fib_node, fib_entry);
2486         mlxsw_sp_fib4_node_list_remove(fib_entry);
2487 }
2488
2489 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
2490                                         struct mlxsw_sp_fib_entry *fib_entry,
2491                                         bool replace)
2492 {
2493         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2494         struct mlxsw_sp_fib_entry *replaced;
2495
2496         if (!replace)
2497                 return;
2498
2499         /* We inserted the new entry before replaced one */
2500         replaced = list_next_entry(fib_entry, list);
2501
2502         mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
2503         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
2504         mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2505 }
2506
2507 static int
2508 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
2509                          const struct fib_entry_notifier_info *fen_info,
2510                          bool replace, bool append)
2511 {
2512         struct mlxsw_sp_fib_entry *fib_entry;
2513         struct mlxsw_sp_fib_node *fib_node;
2514         int err;
2515
2516         if (mlxsw_sp->router.aborted)
2517                 return 0;
2518
2519         fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info);
2520         if (IS_ERR(fib_node)) {
2521                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
2522                 return PTR_ERR(fib_node);
2523         }
2524
2525         fib_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
2526         if (IS_ERR(fib_entry)) {
2527                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
2528                 err = PTR_ERR(fib_entry);
2529                 goto err_fib4_entry_create;
2530         }
2531
2532         err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib_entry, replace,
2533                                             append);
2534         if (err) {
2535                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
2536                 goto err_fib4_node_entry_link;
2537         }
2538
2539         mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib_entry, replace);
2540
2541         return 0;
2542
2543 err_fib4_node_entry_link:
2544         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2545 err_fib4_entry_create:
2546         mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2547         return err;
2548 }
2549
2550 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
2551                                      struct fib_entry_notifier_info *fen_info)
2552 {
2553         struct mlxsw_sp_fib_entry *fib_entry;
2554         struct mlxsw_sp_fib_node *fib_node;
2555
2556         if (mlxsw_sp->router.aborted)
2557                 return;
2558
2559         fib_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
2560         if (WARN_ON(!fib_entry))
2561                 return;
2562         fib_node = fib_entry->fib_node;
2563
2564         mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry);
2565         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2566         mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2567 }
2568
2569 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
2570 {
2571         char ralta_pl[MLXSW_REG_RALTA_LEN];
2572         char ralst_pl[MLXSW_REG_RALST_LEN];
2573         int i, err;
2574
2575         mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4,
2576                              MLXSW_SP_LPM_TREE_MIN);
2577         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
2578         if (err)
2579                 return err;
2580
2581         mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN);
2582         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
2583         if (err)
2584                 return err;
2585
2586         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
2587                 struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[i];
2588                 char raltb_pl[MLXSW_REG_RALTB_LEN];
2589                 char ralue_pl[MLXSW_REG_RALUE_LEN];
2590
2591                 if (!mlxsw_sp_vr_is_used(vr))
2592                         continue;
2593
2594                 mlxsw_reg_raltb_pack(raltb_pl, vr->id,
2595                                      MLXSW_REG_RALXX_PROTOCOL_IPV4,
2596                                      MLXSW_SP_LPM_TREE_MIN);
2597                 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
2598                                       raltb_pl);
2599                 if (err)
2600                         return err;
2601
2602                 mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4,
2603                                       MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0,
2604                                       0);
2605                 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
2606                 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
2607                                       ralue_pl);
2608                 if (err)
2609                         return err;
2610         }
2611
2612         return 0;
2613 }
2614
2615 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
2616                                      struct mlxsw_sp_fib_node *fib_node)
2617 {
2618         struct mlxsw_sp_fib_entry *fib_entry, *tmp;
2619
2620         list_for_each_entry_safe(fib_entry, tmp, &fib_node->entry_list, list) {
2621                 bool do_break = &tmp->list == &fib_node->entry_list;
2622
2623                 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry);
2624                 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2625                 mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2626                 /* Break when entry list is empty and node was freed.
2627                  * Otherwise, we'll access freed memory in the next
2628                  * iteration.
2629                  */
2630                 if (do_break)
2631                         break;
2632         }
2633 }
2634
2635 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
2636                                     struct mlxsw_sp_fib_node *fib_node)
2637 {
2638         switch (fib_node->fib->proto) {
2639         case MLXSW_SP_L3_PROTO_IPV4:
2640                 mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
2641                 break;
2642         case MLXSW_SP_L3_PROTO_IPV6:
2643                 WARN_ON_ONCE(1);
2644                 break;
2645         }
2646 }
2647
2648 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
2649                                   struct mlxsw_sp_vr *vr,
2650                                   enum mlxsw_sp_l3proto proto)
2651 {
2652         struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
2653         struct mlxsw_sp_fib_node *fib_node, *tmp;
2654
2655         list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
2656                 bool do_break = &tmp->list == &fib->node_list;
2657
2658                 mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
2659                 if (do_break)
2660                         break;
2661         }
2662 }
2663
2664 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
2665 {
2666         int i;
2667
2668         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
2669                 struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[i];
2670
2671                 if (!mlxsw_sp_vr_is_used(vr))
2672                         continue;
2673                 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
2674         }
2675 }
2676
2677 static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
2678 {
2679         int err;
2680
2681         if (mlxsw_sp->router.aborted)
2682                 return;
2683         dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
2684         mlxsw_sp_router_fib_flush(mlxsw_sp);
2685         mlxsw_sp->router.aborted = true;
2686         err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
2687         if (err)
2688                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
2689 }
2690
2691 struct mlxsw_sp_fib_event_work {
2692         struct work_struct work;
2693         union {
2694                 struct fib_entry_notifier_info fen_info;
2695                 struct fib_rule_notifier_info fr_info;
2696                 struct fib_nh_notifier_info fnh_info;
2697         };
2698         struct mlxsw_sp *mlxsw_sp;
2699         unsigned long event;
2700 };
2701
2702 static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
2703 {
2704         struct mlxsw_sp_fib_event_work *fib_work =
2705                 container_of(work, struct mlxsw_sp_fib_event_work, work);
2706         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
2707         struct fib_rule *rule;
2708         bool replace, append;
2709         int err;
2710
2711         /* Protect internal structures from changes */
2712         rtnl_lock();
2713         switch (fib_work->event) {
2714         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
2715         case FIB_EVENT_ENTRY_APPEND: /* fall through */
2716         case FIB_EVENT_ENTRY_ADD:
2717                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
2718                 append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
2719                 err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
2720                                                replace, append);
2721                 if (err)
2722                         mlxsw_sp_router_fib4_abort(mlxsw_sp);
2723                 fib_info_put(fib_work->fen_info.fi);
2724                 break;
2725         case FIB_EVENT_ENTRY_DEL:
2726                 mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
2727                 fib_info_put(fib_work->fen_info.fi);
2728                 break;
2729         case FIB_EVENT_RULE_ADD: /* fall through */
2730         case FIB_EVENT_RULE_DEL:
2731                 rule = fib_work->fr_info.rule;
2732                 if (!fib4_rule_default(rule) && !rule->l3mdev)
2733                         mlxsw_sp_router_fib4_abort(mlxsw_sp);
2734                 fib_rule_put(rule);
2735                 break;
2736         case FIB_EVENT_NH_ADD: /* fall through */
2737         case FIB_EVENT_NH_DEL:
2738                 mlxsw_sp_nexthop_event(mlxsw_sp, fib_work->event,
2739                                        fib_work->fnh_info.fib_nh);
2740                 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
2741                 break;
2742         }
2743         rtnl_unlock();
2744         kfree(fib_work);
2745 }
2746
2747 /* Called with rcu_read_lock() */
2748 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
2749                                      unsigned long event, void *ptr)
2750 {
2751         struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
2752         struct mlxsw_sp_fib_event_work *fib_work;
2753         struct fib_notifier_info *info = ptr;
2754
2755         if (!net_eq(info->net, &init_net))
2756                 return NOTIFY_DONE;
2757
2758         fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
2759         if (WARN_ON(!fib_work))
2760                 return NOTIFY_BAD;
2761
2762         INIT_WORK(&fib_work->work, mlxsw_sp_router_fib_event_work);
2763         fib_work->mlxsw_sp = mlxsw_sp;
2764         fib_work->event = event;
2765
2766         switch (event) {
2767         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
2768         case FIB_EVENT_ENTRY_APPEND: /* fall through */
2769         case FIB_EVENT_ENTRY_ADD: /* fall through */
2770         case FIB_EVENT_ENTRY_DEL:
2771                 memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));
2772                 /* Take referece on fib_info to prevent it from being
2773                  * freed while work is queued. Release it afterwards.
2774                  */
2775                 fib_info_hold(fib_work->fen_info.fi);
2776                 break;
2777         case FIB_EVENT_RULE_ADD: /* fall through */
2778         case FIB_EVENT_RULE_DEL:
2779                 memcpy(&fib_work->fr_info, ptr, sizeof(fib_work->fr_info));
2780                 fib_rule_get(fib_work->fr_info.rule);
2781                 break;
2782         case FIB_EVENT_NH_ADD: /* fall through */
2783         case FIB_EVENT_NH_DEL:
2784                 memcpy(&fib_work->fnh_info, ptr, sizeof(fib_work->fnh_info));
2785                 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
2786                 break;
2787         }
2788
2789         mlxsw_core_schedule_work(&fib_work->work);
2790
2791         return NOTIFY_DONE;
2792 }
2793
2794 static struct mlxsw_sp_rif *
2795 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
2796                          const struct net_device *dev)
2797 {
2798         int i;
2799
2800         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
2801                 if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev)
2802                         return mlxsw_sp->rifs[i];
2803
2804         return NULL;
2805 }
2806
2807 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
2808 {
2809         char ritr_pl[MLXSW_REG_RITR_LEN];
2810         int err;
2811
2812         mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
2813         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
2814         if (WARN_ON_ONCE(err))
2815                 return err;
2816
2817         mlxsw_reg_ritr_enable_set(ritr_pl, false);
2818         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
2819 }
2820
2821 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2822                                           struct mlxsw_sp_rif *rif)
2823 {
2824         mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
2825         mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
2826         mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
2827 }
2828
2829 static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif,
2830                                        const struct in_device *in_dev,
2831                                        unsigned long event)
2832 {
2833         switch (event) {
2834         case NETDEV_UP:
2835                 if (!rif)
2836                         return true;
2837                 return false;
2838         case NETDEV_DOWN:
2839                 if (rif && !in_dev->ifa_list &&
2840                     !netif_is_l3_slave(rif->dev))
2841                         return true;
2842                 /* It is possible we already removed the RIF ourselves
2843                  * if it was assigned to a netdev that is now a bridge
2844                  * or LAG slave.
2845                  */
2846                 return false;
2847         }
2848
2849         return false;
2850 }
2851
2852 #define MLXSW_SP_INVALID_INDEX_RIF 0xffff
2853 static int mlxsw_sp_avail_rif_get(struct mlxsw_sp *mlxsw_sp)
2854 {
2855         int i;
2856
2857         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
2858                 if (!mlxsw_sp->rifs[i])
2859                         return i;
2860
2861         return MLXSW_SP_INVALID_INDEX_RIF;
2862 }
2863
2864 static void mlxsw_sp_vport_rif_sp_attr_get(struct mlxsw_sp_port *mlxsw_sp_vport,
2865                                            bool *p_lagged, u16 *p_system_port)
2866 {
2867         u8 local_port = mlxsw_sp_vport->local_port;
2868
2869         *p_lagged = mlxsw_sp_vport->lagged;
2870         *p_system_port = *p_lagged ? mlxsw_sp_vport->lag_id : local_port;
2871 }
2872
2873 static int mlxsw_sp_vport_rif_sp_op(struct mlxsw_sp_port *mlxsw_sp_vport,
2874                                     u16 vr_id, struct net_device *l3_dev,
2875                                     u16 rif_index, bool create)
2876 {
2877         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
2878         bool lagged = mlxsw_sp_vport->lagged;
2879         char ritr_pl[MLXSW_REG_RITR_LEN];
2880         u16 system_port;
2881
2882         mlxsw_reg_ritr_pack(ritr_pl, create, MLXSW_REG_RITR_SP_IF, rif_index,
2883                             vr_id, l3_dev->mtu, l3_dev->dev_addr);
2884
2885         mlxsw_sp_vport_rif_sp_attr_get(mlxsw_sp_vport, &lagged, &system_port);
2886         mlxsw_reg_ritr_sp_if_pack(ritr_pl, lagged, system_port,
2887                                   mlxsw_sp_vport_vid_get(mlxsw_sp_vport));
2888
2889         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
2890 }
2891
2892 static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport);
2893
2894 static u16 mlxsw_sp_rif_sp_to_fid(u16 rif_index)
2895 {
2896         return MLXSW_SP_RFID_BASE + rif_index;
2897 }
2898
2899 static struct mlxsw_sp_fid *
2900 mlxsw_sp_rfid_alloc(u16 fid, struct net_device *l3_dev)
2901 {
2902         struct mlxsw_sp_fid *f;
2903
2904         f = kzalloc(sizeof(*f), GFP_KERNEL);
2905         if (!f)
2906                 return NULL;
2907
2908         f->leave = mlxsw_sp_vport_rif_sp_leave;
2909         f->ref_count = 0;
2910         f->dev = l3_dev;
2911         f->fid = fid;
2912
2913         return f;
2914 }
2915
2916 static struct mlxsw_sp_rif *
2917 mlxsw_sp_rif_alloc(u16 rif_index, u16 vr_id, struct net_device *l3_dev,
2918                    struct mlxsw_sp_fid *f)
2919 {
2920         struct mlxsw_sp_rif *rif;
2921
2922         rif = kzalloc(sizeof(*rif), GFP_KERNEL);
2923         if (!rif)
2924                 return NULL;
2925
2926         INIT_LIST_HEAD(&rif->nexthop_list);
2927         INIT_LIST_HEAD(&rif->neigh_list);
2928         ether_addr_copy(rif->addr, l3_dev->dev_addr);
2929         rif->mtu = l3_dev->mtu;
2930         rif->vr_id = vr_id;
2931         rif->dev = l3_dev;
2932         rif->rif_index = rif_index;
2933         rif->f = f;
2934
2935         return rif;
2936 }
2937
2938 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
2939 {
2940         return rif->rif_index;
2941 }
2942
2943 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
2944 {
2945         return rif->dev->ifindex;
2946 }
2947
2948 static struct mlxsw_sp_rif *
2949 mlxsw_sp_vport_rif_sp_create(struct mlxsw_sp_port *mlxsw_sp_vport,
2950                              struct net_device *l3_dev)
2951 {
2952         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
2953         u32 tb_id = l3mdev_fib_table(l3_dev);
2954         struct mlxsw_sp_vr *vr;
2955         struct mlxsw_sp_fid *f;
2956         struct mlxsw_sp_rif *rif;
2957         u16 fid, rif_index;
2958         int err;
2959
2960         rif_index = mlxsw_sp_avail_rif_get(mlxsw_sp);
2961         if (rif_index == MLXSW_SP_INVALID_INDEX_RIF)
2962                 return ERR_PTR(-ERANGE);
2963
2964         vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN);
2965         if (IS_ERR(vr))
2966                 return ERR_CAST(vr);
2967
2968         err = mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, vr->id, l3_dev,
2969                                        rif_index, true);
2970         if (err)
2971                 goto err_vport_rif_sp_op;
2972
2973         fid = mlxsw_sp_rif_sp_to_fid(rif_index);
2974         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, true);
2975         if (err)
2976                 goto err_rif_fdb_op;
2977
2978         f = mlxsw_sp_rfid_alloc(fid, l3_dev);
2979         if (!f) {
2980                 err = -ENOMEM;
2981                 goto err_rfid_alloc;
2982         }
2983
2984         rif = mlxsw_sp_rif_alloc(rif_index, vr->id, l3_dev, f);
2985         if (!rif) {
2986                 err = -ENOMEM;
2987                 goto err_rif_alloc;
2988         }
2989
2990         if (devlink_dpipe_table_counter_enabled(priv_to_devlink(mlxsw_sp->core),
2991                                                 MLXSW_SP_DPIPE_TABLE_NAME_ERIF)) {
2992                 err = mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif,
2993                                                  MLXSW_SP_RIF_COUNTER_EGRESS);
2994                 if (err)
2995                         netdev_dbg(mlxsw_sp_vport->dev,
2996                                    "Counter alloc Failed err=%d\n", err);
2997         }
2998
2999         f->rif = rif;
3000         mlxsw_sp->rifs[rif_index] = rif;
3001         vr->rif_count++;
3002
3003         return rif;
3004
3005 err_rif_alloc:
3006         kfree(f);
3007 err_rfid_alloc:
3008         mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false);
3009 err_rif_fdb_op:
3010         mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, vr->id, l3_dev, rif_index,
3011                                  false);
3012 err_vport_rif_sp_op:
3013         mlxsw_sp_vr_put(vr);
3014         return ERR_PTR(err);
3015 }
3016
3017 static void mlxsw_sp_vport_rif_sp_destroy(struct mlxsw_sp_port *mlxsw_sp_vport,
3018                                           struct mlxsw_sp_rif *rif)
3019 {
3020         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
3021         struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[rif->vr_id];
3022         struct net_device *l3_dev = rif->dev;
3023         struct mlxsw_sp_fid *f = rif->f;
3024         u16 rif_index = rif->rif_index;
3025         u16 fid = f->fid;
3026
3027         mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
3028
3029         mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
3030         mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_INGRESS);
3031
3032         vr->rif_count--;
3033         mlxsw_sp->rifs[rif_index] = NULL;
3034         f->rif = NULL;
3035
3036         kfree(rif);
3037
3038         kfree(f);
3039
3040         mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false);
3041
3042         mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, vr->id, l3_dev, rif_index,
3043                                  false);
3044         mlxsw_sp_vr_put(vr);
3045 }
3046
3047 static int mlxsw_sp_vport_rif_sp_join(struct mlxsw_sp_port *mlxsw_sp_vport,
3048                                       struct net_device *l3_dev)
3049 {
3050         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
3051         struct mlxsw_sp_rif *rif;
3052
3053         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
3054         if (!rif) {
3055                 rif = mlxsw_sp_vport_rif_sp_create(mlxsw_sp_vport, l3_dev);
3056                 if (IS_ERR(rif))
3057                         return PTR_ERR(rif);
3058         }
3059
3060         mlxsw_sp_vport_fid_set(mlxsw_sp_vport, rif->f);
3061         rif->f->ref_count++;
3062
3063         netdev_dbg(mlxsw_sp_vport->dev, "Joined FID=%d\n", rif->f->fid);
3064
3065         return 0;
3066 }
3067
3068 static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport)
3069 {
3070         struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport);
3071
3072         netdev_dbg(mlxsw_sp_vport->dev, "Left FID=%d\n", f->fid);
3073
3074         mlxsw_sp_vport_fid_set(mlxsw_sp_vport, NULL);
3075         if (--f->ref_count == 0)
3076                 mlxsw_sp_vport_rif_sp_destroy(mlxsw_sp_vport, f->rif);
3077 }
3078
3079 static int mlxsw_sp_inetaddr_vport_event(struct net_device *l3_dev,
3080                                          struct net_device *port_dev,
3081                                          unsigned long event, u16 vid)
3082 {
3083         struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
3084         struct mlxsw_sp_port *mlxsw_sp_vport;
3085
3086         mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid);
3087         if (WARN_ON(!mlxsw_sp_vport))
3088                 return -EINVAL;
3089
3090         switch (event) {
3091         case NETDEV_UP:
3092                 return mlxsw_sp_vport_rif_sp_join(mlxsw_sp_vport, l3_dev);
3093         case NETDEV_DOWN:
3094                 mlxsw_sp_vport_rif_sp_leave(mlxsw_sp_vport);
3095                 break;
3096         }
3097
3098         return 0;
3099 }
3100
3101 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
3102                                         unsigned long event)
3103 {
3104         if (netif_is_bridge_port(port_dev) ||
3105             netif_is_lag_port(port_dev) ||
3106             netif_is_ovs_port(port_dev))
3107                 return 0;
3108
3109         return mlxsw_sp_inetaddr_vport_event(port_dev, port_dev, event, 1);
3110 }
3111
3112 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
3113                                          struct net_device *lag_dev,
3114                                          unsigned long event, u16 vid)
3115 {
3116         struct net_device *port_dev;
3117         struct list_head *iter;
3118         int err;
3119
3120         netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
3121                 if (mlxsw_sp_port_dev_check(port_dev)) {
3122                         err = mlxsw_sp_inetaddr_vport_event(l3_dev, port_dev,
3123                                                             event, vid);
3124                         if (err)
3125                                 return err;
3126                 }
3127         }
3128
3129         return 0;
3130 }
3131
3132 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
3133                                        unsigned long event)
3134 {
3135         if (netif_is_bridge_port(lag_dev))
3136                 return 0;
3137
3138         return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1);
3139 }
3140
3141 static struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp,
3142                                                     struct net_device *l3_dev)
3143 {
3144         u16 fid;
3145
3146         if (is_vlan_dev(l3_dev))
3147                 fid = vlan_dev_vlan_id(l3_dev);
3148         else if (mlxsw_sp->master_bridge.dev == l3_dev)
3149                 fid = 1;
3150         else
3151                 return mlxsw_sp_vfid_find(mlxsw_sp, l3_dev);
3152
3153         return mlxsw_sp_fid_find(mlxsw_sp, fid);
3154 }
3155
3156 static u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
3157 {
3158         return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
3159 }
3160
3161 static enum mlxsw_flood_table_type mlxsw_sp_flood_table_type_get(u16 fid)
3162 {
3163         return mlxsw_sp_fid_is_vfid(fid) ? MLXSW_REG_SFGC_TABLE_TYPE_FID :
3164                MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST;
3165 }
3166
3167 static u16 mlxsw_sp_flood_table_index_get(u16 fid)
3168 {
3169         return mlxsw_sp_fid_is_vfid(fid) ? mlxsw_sp_fid_to_vfid(fid) : fid;
3170 }
3171
3172 static int mlxsw_sp_router_port_flood_set(struct mlxsw_sp *mlxsw_sp, u16 fid,
3173                                           bool set)
3174 {
3175         u8 router_port = mlxsw_sp_router_port(mlxsw_sp);
3176         enum mlxsw_flood_table_type table_type;
3177         char *sftr_pl;
3178         u16 index;
3179         int err;
3180
3181         sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL);
3182         if (!sftr_pl)
3183                 return -ENOMEM;
3184
3185         table_type = mlxsw_sp_flood_table_type_get(fid);
3186         index = mlxsw_sp_flood_table_index_get(fid);
3187         mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BC, index, table_type,
3188                             1, router_port, set);
3189         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl);
3190
3191         kfree(sftr_pl);
3192         return err;
3193 }
3194
3195 static enum mlxsw_reg_ritr_if_type mlxsw_sp_rif_type_get(u16 fid)
3196 {
3197         if (mlxsw_sp_fid_is_vfid(fid))
3198                 return MLXSW_REG_RITR_FID_IF;
3199         else
3200                 return MLXSW_REG_RITR_VLAN_IF;
3201 }
3202
3203 static int mlxsw_sp_rif_bridge_op(struct mlxsw_sp *mlxsw_sp, u16 vr_id,
3204                                   struct net_device *l3_dev,
3205                                   u16 fid, u16 rif,
3206                                   bool create)
3207 {
3208         enum mlxsw_reg_ritr_if_type rif_type;
3209         char ritr_pl[MLXSW_REG_RITR_LEN];
3210
3211         rif_type = mlxsw_sp_rif_type_get(fid);
3212         mlxsw_reg_ritr_pack(ritr_pl, create, rif_type, rif, vr_id, l3_dev->mtu,
3213                             l3_dev->dev_addr);
3214         mlxsw_reg_ritr_fid_set(ritr_pl, rif_type, fid);
3215
3216         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
3217 }
3218
3219 static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp,
3220                                       struct net_device *l3_dev,
3221                                       struct mlxsw_sp_fid *f)
3222 {
3223         u32 tb_id = l3mdev_fib_table(l3_dev);
3224         struct mlxsw_sp_rif *rif;
3225         struct mlxsw_sp_vr *vr;
3226         u16 rif_index;
3227         int err;
3228
3229         rif_index = mlxsw_sp_avail_rif_get(mlxsw_sp);
3230         if (rif_index == MLXSW_SP_INVALID_INDEX_RIF)
3231                 return -ERANGE;
3232
3233         vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN);
3234         if (IS_ERR(vr))
3235                 return PTR_ERR(vr);
3236
3237         err = mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, true);
3238         if (err)
3239                 goto err_port_flood_set;
3240
3241         err = mlxsw_sp_rif_bridge_op(mlxsw_sp, vr->id, l3_dev, f->fid,
3242                                      rif_index, true);
3243         if (err)
3244                 goto err_rif_bridge_op;
3245
3246         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, true);
3247         if (err)
3248                 goto err_rif_fdb_op;
3249
3250         rif = mlxsw_sp_rif_alloc(rif_index, vr->id, l3_dev, f);
3251         if (!rif) {
3252                 err = -ENOMEM;
3253                 goto err_rif_alloc;
3254         }
3255
3256         f->rif = rif;
3257         mlxsw_sp->rifs[rif_index] = rif;
3258         vr->rif_count++;
3259
3260         netdev_dbg(l3_dev, "RIF=%d created\n", rif_index);
3261
3262         return 0;
3263
3264 err_rif_alloc:
3265         mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false);
3266 err_rif_fdb_op:
3267         mlxsw_sp_rif_bridge_op(mlxsw_sp, vr->id, l3_dev, f->fid, rif_index,
3268                                false);
3269 err_rif_bridge_op:
3270         mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false);
3271 err_port_flood_set:
3272         mlxsw_sp_vr_put(vr);
3273         return err;
3274 }
3275
3276 void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp,
3277                                  struct mlxsw_sp_rif *rif)
3278 {
3279         struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[rif->vr_id];
3280         struct net_device *l3_dev = rif->dev;
3281         struct mlxsw_sp_fid *f = rif->f;
3282         u16 rif_index = rif->rif_index;
3283
3284         mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
3285
3286         vr->rif_count--;
3287         mlxsw_sp->rifs[rif_index] = NULL;
3288         f->rif = NULL;
3289
3290         kfree(rif);
3291
3292         mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false);
3293
3294         mlxsw_sp_rif_bridge_op(mlxsw_sp, vr->id, l3_dev, f->fid, rif_index,
3295                                false);
3296
3297         mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false);
3298
3299         mlxsw_sp_vr_put(vr);
3300
3301         netdev_dbg(l3_dev, "RIF=%d destroyed\n", rif_index);
3302 }
3303
3304 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
3305                                           struct net_device *br_dev,
3306                                           unsigned long event)
3307 {
3308         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
3309         struct mlxsw_sp_fid *f;
3310
3311         /* FID can either be an actual FID if the L3 device is the
3312          * VLAN-aware bridge or a VLAN device on top. Otherwise, the
3313          * L3 device is a VLAN-unaware bridge and we get a vFID.
3314          */
3315         f = mlxsw_sp_bridge_fid_get(mlxsw_sp, l3_dev);
3316         if (WARN_ON(!f))
3317                 return -EINVAL;
3318
3319         switch (event) {
3320         case NETDEV_UP:
3321                 return mlxsw_sp_rif_bridge_create(mlxsw_sp, l3_dev, f);
3322         case NETDEV_DOWN:
3323                 mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->rif);
3324                 break;
3325         }
3326
3327         return 0;
3328 }
3329
3330 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
3331                                         unsigned long event)
3332 {
3333         struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
3334         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev);
3335         u16 vid = vlan_dev_vlan_id(vlan_dev);
3336
3337         if (netif_is_bridge_port(vlan_dev))
3338                 return 0;
3339
3340         if (mlxsw_sp_port_dev_check(real_dev))
3341                 return mlxsw_sp_inetaddr_vport_event(vlan_dev, real_dev, event,
3342                                                      vid);
3343         else if (netif_is_lag_master(real_dev))
3344                 return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
3345                                                      vid);
3346         else if (netif_is_bridge_master(real_dev) &&
3347                  mlxsw_sp->master_bridge.dev == real_dev)
3348                 return mlxsw_sp_inetaddr_bridge_event(vlan_dev, real_dev,
3349                                                       event);
3350
3351         return 0;
3352 }
3353
3354 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
3355                                      unsigned long event)
3356 {
3357         if (mlxsw_sp_port_dev_check(dev))
3358                 return mlxsw_sp_inetaddr_port_event(dev, event);
3359         else if (netif_is_lag_master(dev))
3360                 return mlxsw_sp_inetaddr_lag_event(dev, event);
3361         else if (netif_is_bridge_master(dev))
3362                 return mlxsw_sp_inetaddr_bridge_event(dev, dev, event);
3363         else if (is_vlan_dev(dev))
3364                 return mlxsw_sp_inetaddr_vlan_event(dev, event);
3365         else
3366                 return 0;
3367 }
3368
3369 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
3370                             unsigned long event, void *ptr)
3371 {
3372         struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
3373         struct net_device *dev = ifa->ifa_dev->dev;
3374         struct mlxsw_sp *mlxsw_sp;
3375         struct mlxsw_sp_rif *rif;
3376         int err = 0;
3377
3378         mlxsw_sp = mlxsw_sp_lower_get(dev);
3379         if (!mlxsw_sp)
3380                 goto out;
3381
3382         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3383         if (!mlxsw_sp_rif_should_config(rif, ifa->ifa_dev, event))
3384                 goto out;
3385
3386         err = __mlxsw_sp_inetaddr_event(dev, event);
3387 out:
3388         return notifier_from_errno(err);
3389 }
3390
3391 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
3392                              const char *mac, int mtu)
3393 {
3394         char ritr_pl[MLXSW_REG_RITR_LEN];
3395         int err;
3396
3397         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
3398         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
3399         if (err)
3400                 return err;
3401
3402         mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
3403         mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
3404         mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
3405         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
3406 }
3407
3408 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
3409 {
3410         struct mlxsw_sp *mlxsw_sp;
3411         struct mlxsw_sp_rif *rif;
3412         int err;
3413
3414         mlxsw_sp = mlxsw_sp_lower_get(dev);
3415         if (!mlxsw_sp)
3416                 return 0;
3417
3418         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3419         if (!rif)
3420                 return 0;
3421
3422         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, rif->f->fid, false);
3423         if (err)
3424                 return err;
3425
3426         err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
3427                                 dev->mtu);
3428         if (err)
3429                 goto err_rif_edit;
3430
3431         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, rif->f->fid, true);
3432         if (err)
3433                 goto err_rif_fdb_op;
3434
3435         ether_addr_copy(rif->addr, dev->dev_addr);
3436         rif->mtu = dev->mtu;
3437
3438         netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
3439
3440         return 0;
3441
3442 err_rif_fdb_op:
3443         mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
3444 err_rif_edit:
3445         mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, rif->f->fid, true);
3446         return err;
3447 }
3448
3449 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
3450                                   struct net_device *l3_dev)
3451 {
3452         struct mlxsw_sp_rif *rif;
3453
3454         /* If netdev is already associated with a RIF, then we need to
3455          * destroy it and create a new one with the new virtual router ID.
3456          */
3457         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
3458         if (rif)
3459                 __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN);
3460
3461         return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP);
3462 }
3463
3464 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
3465                                     struct net_device *l3_dev)
3466 {
3467         struct mlxsw_sp_rif *rif;
3468
3469         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
3470         if (!rif)
3471                 return;
3472         __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN);
3473 }
3474
3475 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
3476                                  struct netdev_notifier_changeupper_info *info)
3477 {
3478         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
3479         int err = 0;
3480
3481         if (!mlxsw_sp)
3482                 return 0;
3483
3484         switch (event) {
3485         case NETDEV_PRECHANGEUPPER:
3486                 return 0;
3487         case NETDEV_CHANGEUPPER:
3488                 if (info->linking)
3489                         err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev);
3490                 else
3491                         mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
3492                 break;
3493         }
3494
3495         return err;
3496 }
3497
3498 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
3499 {
3500         struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
3501
3502         /* Flush pending FIB notifications and then flush the device's
3503          * table before requesting another dump. The FIB notification
3504          * block is unregistered, so no need to take RTNL.
3505          */
3506         mlxsw_core_flush_owq();
3507         mlxsw_sp_router_fib_flush(mlxsw_sp);
3508 }
3509
3510 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
3511 {
3512         char rgcr_pl[MLXSW_REG_RGCR_LEN];
3513         u64 max_rifs;
3514         int err;
3515
3516         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
3517                 return -EIO;
3518
3519         max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
3520         mlxsw_sp->rifs = kcalloc(max_rifs, sizeof(struct mlxsw_sp_rif *),
3521                                  GFP_KERNEL);
3522         if (!mlxsw_sp->rifs)
3523                 return -ENOMEM;
3524
3525         mlxsw_reg_rgcr_pack(rgcr_pl, true);
3526         mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
3527         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
3528         if (err)
3529                 goto err_rgcr_fail;
3530
3531         return 0;
3532
3533 err_rgcr_fail:
3534         kfree(mlxsw_sp->rifs);
3535         return err;
3536 }
3537
3538 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
3539 {
3540         char rgcr_pl[MLXSW_REG_RGCR_LEN];
3541         int i;
3542
3543         mlxsw_reg_rgcr_pack(rgcr_pl, false);
3544         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
3545
3546         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
3547                 WARN_ON_ONCE(mlxsw_sp->rifs[i]);
3548
3549         kfree(mlxsw_sp->rifs);
3550 }
3551
3552 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
3553 {
3554         int err;
3555
3556         INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list);
3557         err = __mlxsw_sp_router_init(mlxsw_sp);
3558         if (err)
3559                 return err;
3560
3561         err = rhashtable_init(&mlxsw_sp->router.nexthop_ht,
3562                               &mlxsw_sp_nexthop_ht_params);
3563         if (err)
3564                 goto err_nexthop_ht_init;
3565
3566         err = rhashtable_init(&mlxsw_sp->router.nexthop_group_ht,
3567                               &mlxsw_sp_nexthop_group_ht_params);
3568         if (err)
3569                 goto err_nexthop_group_ht_init;
3570
3571         err = mlxsw_sp_lpm_init(mlxsw_sp);
3572         if (err)
3573                 goto err_lpm_init;
3574
3575         err = mlxsw_sp_vrs_init(mlxsw_sp);
3576         if (err)
3577                 goto err_vrs_init;
3578
3579         err = mlxsw_sp_neigh_init(mlxsw_sp);
3580         if (err)
3581                 goto err_neigh_init;
3582
3583         mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
3584         err = register_fib_notifier(&mlxsw_sp->fib_nb,
3585                                     mlxsw_sp_router_fib_dump_flush);
3586         if (err)
3587                 goto err_register_fib_notifier;
3588
3589         return 0;
3590
3591 err_register_fib_notifier:
3592         mlxsw_sp_neigh_fini(mlxsw_sp);
3593 err_neigh_init:
3594         mlxsw_sp_vrs_fini(mlxsw_sp);
3595 err_vrs_init:
3596         mlxsw_sp_lpm_fini(mlxsw_sp);
3597 err_lpm_init:
3598         rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht);
3599 err_nexthop_group_ht_init:
3600         rhashtable_destroy(&mlxsw_sp->router.nexthop_ht);
3601 err_nexthop_ht_init:
3602         __mlxsw_sp_router_fini(mlxsw_sp);
3603         return err;
3604 }
3605
3606 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
3607 {
3608         unregister_fib_notifier(&mlxsw_sp->fib_nb);
3609         mlxsw_sp_neigh_fini(mlxsw_sp);
3610         mlxsw_sp_vrs_fini(mlxsw_sp);
3611         mlxsw_sp_lpm_fini(mlxsw_sp);
3612         rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht);
3613         rhashtable_destroy(&mlxsw_sp->router.nexthop_ht);
3614         __mlxsw_sp_router_fini(mlxsw_sp);
3615 }