1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
4 #include <linux/netdevice.h>
5 #include <net/nexthop.h>
12 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
14 if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
17 return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev,
18 ldev->pf[MLX5_LAG_P2].dev);
21 static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
23 return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH);
26 bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
28 struct mlx5_lag *ldev;
31 ldev = mlx5_lag_dev_get(dev);
32 res = ldev && __mlx5_lag_is_multipath(ldev);
38 * Set lag port affinity
42 * 0 - set normal affinity.
43 * 1 - set affinity to port 1.
44 * 2 - set affinity to port 2.
47 static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
48 enum mlx5_lag_port_affinity port)
50 struct lag_tracker tracker;
52 if (!__mlx5_lag_is_multipath(ldev))
56 case MLX5_LAG_NORMAL_AFFINITY:
57 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
58 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
59 tracker.netdev_state[MLX5_LAG_P1].link_up = true;
60 tracker.netdev_state[MLX5_LAG_P2].link_up = true;
62 case MLX5_LAG_P1_AFFINITY:
63 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
64 tracker.netdev_state[MLX5_LAG_P1].link_up = true;
65 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false;
66 tracker.netdev_state[MLX5_LAG_P2].link_up = false;
68 case MLX5_LAG_P2_AFFINITY:
69 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false;
70 tracker.netdev_state[MLX5_LAG_P1].link_up = false;
71 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
72 tracker.netdev_state[MLX5_LAG_P2].link_up = true;
75 mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
76 "Invalid affinity port %d", port);
80 if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled)
81 mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events,
82 MLX5_DEV_EVENT_PORT_AFFINITY,
85 if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled)
86 mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events,
87 MLX5_DEV_EVENT_PORT_AFFINITY,
90 mlx5_modify_lag(ldev, &tracker);
93 static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
95 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
97 flush_workqueue(mp->wq);
100 struct mlx5_fib_event_work {
101 struct work_struct work;
102 struct mlx5_lag *ldev;
105 struct fib_entry_notifier_info fen_info;
106 struct fib_nh_notifier_info fnh_info;
110 static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
114 struct lag_mp *mp = &ldev->lag_mp;
115 struct fib_nh *fib_nh0, *fib_nh1;
118 /* Handle delete event */
119 if (event == FIB_EVENT_ENTRY_DEL) {
126 /* Handle add/replace event */
127 nhs = fib_info_num_path(fi);
129 if (__mlx5_lag_is_active(ldev)) {
130 struct fib_nh *nh = fib_info_nh(fi, 0);
131 struct net_device *nh_dev = nh->fib_nh_dev;
132 int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
134 mlx5_lag_set_port_affinity(ldev, ++i);
142 /* Verify next hops are ports of the same hca */
143 fib_nh0 = fib_info_nh(fi, 0);
144 fib_nh1 = fib_info_nh(fi, 1);
145 if (!(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev &&
146 fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev) &&
147 !(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev &&
148 fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev)) {
149 mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
150 "Multipath offload require two ports of the same HCA\n");
154 /* First time we see multipath route */
155 if (!mp->mfi && !__mlx5_lag_is_active(ldev)) {
156 struct lag_tracker tracker;
158 tracker = ldev->tracker;
159 mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH);
162 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
166 static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
168 struct fib_nh *fib_nh,
171 struct lag_mp *mp = &ldev->lag_mp;
173 /* Check the nh event is related to the route */
174 if (!mp->mfi || mp->mfi != fi)
177 /* nh added/removed */
178 if (event == FIB_EVENT_NH_DEL) {
179 int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev);
182 i = (i + 1) % 2 + 1; /* peer port */
183 mlx5_lag_set_port_affinity(ldev, i);
185 } else if (event == FIB_EVENT_NH_ADD &&
186 fib_info_num_path(fi) == 2) {
187 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
191 static void mlx5_lag_fib_update(struct work_struct *work)
193 struct mlx5_fib_event_work *fib_work =
194 container_of(work, struct mlx5_fib_event_work, work);
195 struct mlx5_lag *ldev = fib_work->ldev;
196 struct fib_nh *fib_nh;
198 /* Protect internal structures from changes */
200 switch (fib_work->event) {
201 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
202 case FIB_EVENT_ENTRY_DEL:
203 mlx5_lag_fib_route_event(ldev, fib_work->event,
204 fib_work->fen_info.fi);
205 fib_info_put(fib_work->fen_info.fi);
207 case FIB_EVENT_NH_ADD: /* fall through */
208 case FIB_EVENT_NH_DEL:
209 fib_nh = fib_work->fnh_info.fib_nh;
210 mlx5_lag_fib_nexthop_event(ldev,
212 fib_work->fnh_info.fib_nh,
214 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
222 static struct mlx5_fib_event_work *
223 mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
225 struct mlx5_fib_event_work *fib_work;
227 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
228 if (WARN_ON(!fib_work))
231 INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
232 fib_work->ldev = ldev;
233 fib_work->event = event;
238 static int mlx5_lag_fib_event(struct notifier_block *nb,
242 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
243 struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
244 struct fib_notifier_info *info = ptr;
245 struct mlx5_fib_event_work *fib_work;
246 struct fib_entry_notifier_info *fen_info;
247 struct fib_nh_notifier_info *fnh_info;
248 struct net_device *fib_dev;
251 if (info->family != AF_INET)
254 if (!mlx5_lag_multipath_check_prereq(ldev))
258 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
259 case FIB_EVENT_ENTRY_DEL:
260 fen_info = container_of(info, struct fib_entry_notifier_info,
264 NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
265 return notifier_from_errno(-EINVAL);
267 fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
268 if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev &&
269 fib_dev != ldev->pf[MLX5_LAG_P2].netdev) {
272 fib_work = mlx5_lag_init_fib_work(ldev, event);
275 fib_work->fen_info = *fen_info;
276 /* Take reference on fib_info to prevent it from being
277 * freed while work is queued. Release it afterwards.
279 fib_info_hold(fib_work->fen_info.fi);
281 case FIB_EVENT_NH_ADD: /* fall through */
282 case FIB_EVENT_NH_DEL:
283 fnh_info = container_of(info, struct fib_nh_notifier_info,
285 fib_work = mlx5_lag_init_fib_work(ldev, event);
288 fib_work->fnh_info = *fnh_info;
289 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
295 queue_work(mp->wq, &fib_work->work);
300 int mlx5_lag_mp_init(struct mlx5_lag *ldev)
302 struct lag_mp *mp = &ldev->lag_mp;
305 if (mp->fib_nb.notifier_call)
308 mp->wq = create_singlethread_workqueue("mlx5_lag_mp");
312 mp->fib_nb.notifier_call = mlx5_lag_fib_event;
313 err = register_fib_notifier(&init_net, &mp->fib_nb,
314 mlx5_lag_fib_event_flush, NULL);
316 destroy_workqueue(mp->wq);
317 mp->fib_nb.notifier_call = NULL;
323 void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
325 struct lag_mp *mp = &ldev->lag_mp;
327 if (!mp->fib_nb.notifier_call)
330 unregister_fib_notifier(&init_net, &mp->fib_nb);
331 destroy_workqueue(mp->wq);
332 mp->fib_nb.notifier_call = NULL;