1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 // Copyright (c) 2018 Mellanox Technologies
4 #include <linux/mlx5/driver.h>
10 struct mlx5_event_nb {
15 /* General events handlers for the low level mlx5_core driver
17 * Other Major feature specific events such as
18 * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with
19 * separate notifiers callbacks, specifically by those mlx5 components.
21 static int any_notifier(struct notifier_block *, unsigned long, void *);
22 static int port_change(struct notifier_block *, unsigned long, void *);
23 static int general_event(struct notifier_block *, unsigned long, void *);
24 static int temp_warn(struct notifier_block *, unsigned long, void *);
25 static int port_module(struct notifier_block *, unsigned long, void *);
27 /* handler which forwards the event to events->nh, driver notifiers */
28 static int forward_event(struct notifier_block *, unsigned long, void *);
30 static struct mlx5_nb events_nbs_ref[] = {
31 {.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY },
32 {.nb.notifier_call = port_change, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE },
33 {.nb.notifier_call = general_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
34 {.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT },
35 {.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT },
37 /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */
38 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE },
42 struct mlx5_core_dev *dev;
43 struct mlx5_event_nb notifiers[ARRAY_SIZE(events_nbs_ref)];
44 /* driver notifier chain */
45 struct atomic_notifier_head nh;
46 /* port module events stats */
47 struct mlx5_pme_stats pme_stats;
50 static const char *eqe_type_str(u8 type)
53 case MLX5_EVENT_TYPE_COMP:
54 return "MLX5_EVENT_TYPE_COMP";
55 case MLX5_EVENT_TYPE_PATH_MIG:
56 return "MLX5_EVENT_TYPE_PATH_MIG";
57 case MLX5_EVENT_TYPE_COMM_EST:
58 return "MLX5_EVENT_TYPE_COMM_EST";
59 case MLX5_EVENT_TYPE_SQ_DRAINED:
60 return "MLX5_EVENT_TYPE_SQ_DRAINED";
61 case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
62 return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
63 case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
64 return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
65 case MLX5_EVENT_TYPE_CQ_ERROR:
66 return "MLX5_EVENT_TYPE_CQ_ERROR";
67 case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
68 return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
69 case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
70 return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
71 case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
72 return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
73 case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
74 return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
75 case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
76 return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
77 case MLX5_EVENT_TYPE_INTERNAL_ERROR:
78 return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
79 case MLX5_EVENT_TYPE_PORT_CHANGE:
80 return "MLX5_EVENT_TYPE_PORT_CHANGE";
81 case MLX5_EVENT_TYPE_GPIO_EVENT:
82 return "MLX5_EVENT_TYPE_GPIO_EVENT";
83 case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
84 return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
85 case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
86 return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
87 case MLX5_EVENT_TYPE_REMOTE_CONFIG:
88 return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
89 case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
90 return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
91 case MLX5_EVENT_TYPE_STALL_EVENT:
92 return "MLX5_EVENT_TYPE_STALL_EVENT";
93 case MLX5_EVENT_TYPE_CMD:
94 return "MLX5_EVENT_TYPE_CMD";
95 case MLX5_EVENT_TYPE_PAGE_REQUEST:
96 return "MLX5_EVENT_TYPE_PAGE_REQUEST";
97 case MLX5_EVENT_TYPE_PAGE_FAULT:
98 return "MLX5_EVENT_TYPE_PAGE_FAULT";
99 case MLX5_EVENT_TYPE_PPS_EVENT:
100 return "MLX5_EVENT_TYPE_PPS_EVENT";
101 case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
102 return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
103 case MLX5_EVENT_TYPE_FPGA_ERROR:
104 return "MLX5_EVENT_TYPE_FPGA_ERROR";
105 case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
106 return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
107 case MLX5_EVENT_TYPE_GENERAL_EVENT:
108 return "MLX5_EVENT_TYPE_GENERAL_EVENT";
109 case MLX5_EVENT_TYPE_DEVICE_TRACER:
110 return "MLX5_EVENT_TYPE_DEVICE_TRACER";
112 return "Unrecognized event";
116 /* handles all FW events, type == eqe->type */
117 static int any_notifier(struct notifier_block *nb,
118 unsigned long type, void *data)
120 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
121 struct mlx5_events *events = event_nb->ctx;
122 struct mlx5_eqe *eqe = data;
124 mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n",
125 eqe_type_str(eqe->type), eqe->sub_type);
129 static enum mlx5_dev_event port_subtype2dev(u8 subtype)
132 case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
133 return MLX5_DEV_EVENT_PORT_DOWN;
134 case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
135 return MLX5_DEV_EVENT_PORT_UP;
136 case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
137 return MLX5_DEV_EVENT_PORT_INITIALIZED;
138 case MLX5_PORT_CHANGE_SUBTYPE_LID:
139 return MLX5_DEV_EVENT_LID_CHANGE;
140 case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
141 return MLX5_DEV_EVENT_PKEY_CHANGE;
142 case MLX5_PORT_CHANGE_SUBTYPE_GUID:
143 return MLX5_DEV_EVENT_GUID_CHANGE;
144 case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
145 return MLX5_DEV_EVENT_CLIENT_REREG;
150 /* type == MLX5_EVENT_TYPE_PORT_CHANGE */
151 static int port_change(struct notifier_block *nb,
152 unsigned long type, void *data)
154 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
155 struct mlx5_events *events = event_nb->ctx;
156 struct mlx5_core_dev *dev = events->dev;
158 bool dev_event_dispatch = false;
159 enum mlx5_dev_event dev_event;
160 unsigned long dev_event_data;
161 struct mlx5_eqe *eqe = data;
162 u8 port = (eqe->data.port.port >> 4) & 0xf;
164 switch (eqe->sub_type) {
165 case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
166 case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
167 case MLX5_PORT_CHANGE_SUBTYPE_LID:
168 case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
169 case MLX5_PORT_CHANGE_SUBTYPE_GUID:
170 case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
171 case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
172 dev_event = port_subtype2dev(eqe->sub_type);
173 dev_event_data = (unsigned long)port;
174 dev_event_dispatch = true;
177 mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n",
178 port, eqe->sub_type);
181 if (dev->event && dev_event_dispatch)
182 dev->event(dev, dev_event, dev_event_data);
187 /* type == MLX5_EVENT_TYPE_GENERAL_EVENT */
188 static int general_event(struct notifier_block *nb, unsigned long type, void *data)
190 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
191 struct mlx5_events *events = event_nb->ctx;
192 struct mlx5_core_dev *dev = events->dev;
194 bool dev_event_dispatch = false;
195 enum mlx5_dev_event dev_event;
196 unsigned long dev_event_data;
197 struct mlx5_eqe *eqe = data;
199 switch (eqe->sub_type) {
200 case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT:
201 dev_event = MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT;
203 dev_event_dispatch = true;
206 mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n",
210 if (dev->event && dev_event_dispatch)
211 dev->event(dev, dev_event, dev_event_data);
216 /* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */
217 static int temp_warn(struct notifier_block *nb, unsigned long type, void *data)
219 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
220 struct mlx5_events *events = event_nb->ctx;
221 struct mlx5_eqe *eqe = data;
225 value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
226 value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
228 mlx5_core_warn(events->dev,
229 "High temperature on sensors with bit set %llx %llx",
230 value_msb, value_lsb);
235 /* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
236 static const char *mlx5_pme_status[MLX5_MODULE_STATUS_NUM] = {
237 "Cable plugged", /* MLX5_MODULE_STATUS_PLUGGED = 0x1 */
238 "Cable unplugged", /* MLX5_MODULE_STATUS_UNPLUGGED = 0x2 */
239 "Cable error", /* MLX5_MODULE_STATUS_ERROR = 0x3 */
242 static const char *mlx5_pme_error[MLX5_MODULE_EVENT_ERROR_NUM] = {
243 "Power budget exceeded",
244 "Long Range for non MLNX cable",
245 "Bus stuck(I2C or data shorted)",
246 "No EEPROM/retry timeout",
247 "Enforce part number list",
248 "Unknown identifier",
250 "Bad or shorted cable/module",
254 /* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
255 static int port_module(struct notifier_block *nb, unsigned long type, void *data)
257 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
258 struct mlx5_events *events = event_nb->ctx;
259 struct mlx5_eqe *eqe = data;
261 enum port_module_event_status_type module_status;
262 enum port_module_event_error_type error_type;
263 struct mlx5_eqe_port_module *module_event_eqe;
266 module_event_eqe = &eqe->data.port_module;
267 module_num = module_event_eqe->module;
268 module_status = module_event_eqe->module_status &
269 PORT_MODULE_EVENT_MODULE_STATUS_MASK;
270 error_type = module_event_eqe->error_type &
271 PORT_MODULE_EVENT_ERROR_TYPE_MASK;
272 if (module_status < MLX5_MODULE_STATUS_ERROR) {
273 events->pme_stats.status_counters[module_status - 1]++;
274 } else if (module_status == MLX5_MODULE_STATUS_ERROR) {
275 if (error_type >= MLX5_MODULE_EVENT_ERROR_UNKNOWN)
276 /* Unknown error type */
277 error_type = MLX5_MODULE_EVENT_ERROR_UNKNOWN;
278 events->pme_stats.error_counters[error_type]++;
281 if (!printk_ratelimit())
284 if (module_status < MLX5_MODULE_STATUS_ERROR)
285 mlx5_core_info(events->dev,
286 "Port module event: module %u, %s\n",
287 module_num, mlx5_pme_status[module_status - 1]);
289 else if (module_status == MLX5_MODULE_STATUS_ERROR)
290 mlx5_core_info(events->dev,
291 "Port module event[error]: module %u, %s, %s\n",
292 module_num, mlx5_pme_status[module_status - 1],
293 mlx5_pme_error[error_type]);
298 void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats)
300 *stats = dev->priv.events->pme_stats;
303 /* forward event as is to registered interfaces (mlx5e/mlx5_ib) */
304 static int forward_event(struct notifier_block *nb, unsigned long event, void *data)
306 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
307 struct mlx5_events *events = event_nb->ctx;
309 atomic_notifier_call_chain(&events->nh, event, data);
313 int mlx5_events_init(struct mlx5_core_dev *dev)
315 struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL);
320 ATOMIC_INIT_NOTIFIER_HEAD(&events->nh);
322 dev->priv.events = events;
326 void mlx5_events_cleanup(struct mlx5_core_dev *dev)
328 kvfree(dev->priv.events);
331 void mlx5_events_start(struct mlx5_core_dev *dev)
333 struct mlx5_events *events = dev->priv.events;
336 for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) {
337 events->notifiers[i].nb = events_nbs_ref[i];
338 events->notifiers[i].ctx = events;
339 mlx5_eq_notifier_register(dev, &events->notifiers[i].nb);
343 void mlx5_events_stop(struct mlx5_core_dev *dev)
345 struct mlx5_events *events = dev->priv.events;
348 for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--)
349 mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb);
352 int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
354 struct mlx5_events *events = dev->priv.events;
356 return atomic_notifier_chain_register(&events->nh, nb);
358 EXPORT_SYMBOL(mlx5_notifier_register);
360 int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
362 struct mlx5_events *events = dev->priv.events;
364 return atomic_notifier_chain_unregister(&events->nh, nb);
366 EXPORT_SYMBOL(mlx5_notifier_unregister);
368 int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data)
370 return atomic_notifier_call_chain(&events->nh, event, data);