1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 // Copyright (c) 2018 Mellanox Technologies
4 #include <linux/mlx5/driver.h>
10 struct mlx5_event_nb {
15 /* General events handlers for the low level mlx5_core driver
17 * Other Major feature specific events such as
18 * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with
19 * separate notifiers callbacks, specifically by those mlx5 components.
21 static int any_notifier(struct notifier_block *, unsigned long, void *);
22 static int temp_warn(struct notifier_block *, unsigned long, void *);
23 static int port_module(struct notifier_block *, unsigned long, void *);
24 static int pcie_core(struct notifier_block *, unsigned long, void *);
26 /* handler which forwards the event to events->fw_nh, driver notifiers */
27 static int forward_event(struct notifier_block *, unsigned long, void *);
29 static struct mlx5_nb events_nbs_ref[] = {
30 /* Events to be proccessed by mlx5_core */
31 {.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY },
32 {.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT },
33 {.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT },
34 {.nb.notifier_call = pcie_core, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
36 /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */
37 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE },
38 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
39 /* QP/WQ resource events to forward */
40 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_DCT_DRAINED },
41 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG },
42 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_COMM_EST },
43 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SQ_DRAINED },
44 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE },
45 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR },
46 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED },
47 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR },
48 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR },
50 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR },
51 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT },
55 struct mlx5_core_dev *dev;
56 struct workqueue_struct *wq;
57 struct mlx5_event_nb notifiers[ARRAY_SIZE(events_nbs_ref)];
58 /* driver notifier chain for fw events */
59 struct atomic_notifier_head fw_nh;
60 /* port module events stats */
61 struct mlx5_pme_stats pme_stats;
63 struct work_struct pcie_core_work;
64 /* driver notifier chain for sw events */
65 struct blocking_notifier_head sw_nh;
68 static const char *eqe_type_str(u8 type)
71 case MLX5_EVENT_TYPE_COMP:
72 return "MLX5_EVENT_TYPE_COMP";
73 case MLX5_EVENT_TYPE_PATH_MIG:
74 return "MLX5_EVENT_TYPE_PATH_MIG";
75 case MLX5_EVENT_TYPE_COMM_EST:
76 return "MLX5_EVENT_TYPE_COMM_EST";
77 case MLX5_EVENT_TYPE_SQ_DRAINED:
78 return "MLX5_EVENT_TYPE_SQ_DRAINED";
79 case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
80 return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
81 case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
82 return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
83 case MLX5_EVENT_TYPE_CQ_ERROR:
84 return "MLX5_EVENT_TYPE_CQ_ERROR";
85 case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
86 return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
87 case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
88 return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
89 case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
90 return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
91 case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
92 return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
93 case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
94 return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
95 case MLX5_EVENT_TYPE_INTERNAL_ERROR:
96 return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
97 case MLX5_EVENT_TYPE_PORT_CHANGE:
98 return "MLX5_EVENT_TYPE_PORT_CHANGE";
99 case MLX5_EVENT_TYPE_GPIO_EVENT:
100 return "MLX5_EVENT_TYPE_GPIO_EVENT";
101 case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
102 return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
103 case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
104 return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
105 case MLX5_EVENT_TYPE_REMOTE_CONFIG:
106 return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
107 case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
108 return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
109 case MLX5_EVENT_TYPE_STALL_EVENT:
110 return "MLX5_EVENT_TYPE_STALL_EVENT";
111 case MLX5_EVENT_TYPE_CMD:
112 return "MLX5_EVENT_TYPE_CMD";
113 case MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED:
114 return "MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED";
115 case MLX5_EVENT_TYPE_VHCA_STATE_CHANGE:
116 return "MLX5_EVENT_TYPE_VHCA_STATE_CHANGE";
117 case MLX5_EVENT_TYPE_PAGE_REQUEST:
118 return "MLX5_EVENT_TYPE_PAGE_REQUEST";
119 case MLX5_EVENT_TYPE_PAGE_FAULT:
120 return "MLX5_EVENT_TYPE_PAGE_FAULT";
121 case MLX5_EVENT_TYPE_PPS_EVENT:
122 return "MLX5_EVENT_TYPE_PPS_EVENT";
123 case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
124 return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
125 case MLX5_EVENT_TYPE_FPGA_ERROR:
126 return "MLX5_EVENT_TYPE_FPGA_ERROR";
127 case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
128 return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
129 case MLX5_EVENT_TYPE_GENERAL_EVENT:
130 return "MLX5_EVENT_TYPE_GENERAL_EVENT";
131 case MLX5_EVENT_TYPE_MONITOR_COUNTER:
132 return "MLX5_EVENT_TYPE_MONITOR_COUNTER";
133 case MLX5_EVENT_TYPE_DEVICE_TRACER:
134 return "MLX5_EVENT_TYPE_DEVICE_TRACER";
136 return "Unrecognized event";
140 /* handles all FW events, type == eqe->type */
141 static int any_notifier(struct notifier_block *nb,
142 unsigned long type, void *data)
144 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
145 struct mlx5_events *events = event_nb->ctx;
146 struct mlx5_eqe *eqe = data;
148 mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n",
149 eqe_type_str(eqe->type), eqe->sub_type);
153 /* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */
154 static int temp_warn(struct notifier_block *nb, unsigned long type, void *data)
156 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
157 struct mlx5_events *events = event_nb->ctx;
158 struct mlx5_eqe *eqe = data;
162 value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
163 value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
165 mlx5_core_warn(events->dev,
166 "High temperature on sensors with bit set %llx %llx",
167 value_msb, value_lsb);
172 /* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
173 static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status)
176 case MLX5_MODULE_STATUS_PLUGGED:
177 return "Cable plugged";
178 case MLX5_MODULE_STATUS_UNPLUGGED:
179 return "Cable unplugged";
180 case MLX5_MODULE_STATUS_ERROR:
181 return "Cable error";
182 case MLX5_MODULE_STATUS_DISABLED:
183 return "Cable disabled";
185 return "Unknown status";
189 static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error)
192 case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED:
193 return "Power budget exceeded";
194 case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX:
195 return "Long Range for non MLNX cable";
196 case MLX5_MODULE_EVENT_ERROR_BUS_STUCK:
197 return "Bus stuck (I2C or data shorted)";
198 case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT:
199 return "No EEPROM/retry timeout";
200 case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST:
201 return "Enforce part number list";
202 case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER:
203 return "Unknown identifier";
204 case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE:
205 return "High Temperature";
206 case MLX5_MODULE_EVENT_ERROR_BAD_CABLE:
207 return "Bad or shorted cable/module";
208 case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED:
209 return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot";
211 return "Unknown error";
215 /* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
216 static int port_module(struct notifier_block *nb, unsigned long type, void *data)
218 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
219 struct mlx5_events *events = event_nb->ctx;
220 struct mlx5_eqe *eqe = data;
222 enum port_module_event_status_type module_status;
223 enum port_module_event_error_type error_type;
224 struct mlx5_eqe_port_module *module_event_eqe;
225 const char *status_str;
228 module_event_eqe = &eqe->data.port_module;
229 module_status = module_event_eqe->module_status &
230 PORT_MODULE_EVENT_MODULE_STATUS_MASK;
231 error_type = module_event_eqe->error_type &
232 PORT_MODULE_EVENT_ERROR_TYPE_MASK;
234 if (module_status < MLX5_MODULE_STATUS_NUM)
235 events->pme_stats.status_counters[module_status]++;
237 if (module_status == MLX5_MODULE_STATUS_ERROR)
238 if (error_type < MLX5_MODULE_EVENT_ERROR_NUM)
239 events->pme_stats.error_counters[error_type]++;
241 if (!printk_ratelimit())
244 module_num = module_event_eqe->module;
245 status_str = mlx5_pme_status_to_string(module_status);
246 if (module_status == MLX5_MODULE_STATUS_ERROR) {
247 const char *error_str = mlx5_pme_error_to_string(error_type);
249 mlx5_core_err(events->dev,
250 "Port module event[error]: module %u, %s, %s\n",
251 module_num, status_str, error_str);
253 mlx5_core_info(events->dev,
254 "Port module event: module %u, %s\n",
255 module_num, status_str);
262 MLX5_PCI_POWER_COULD_NOT_BE_READ = 0x0,
263 MLX5_PCI_POWER_SUFFICIENT_REPORTED = 0x1,
264 MLX5_PCI_POWER_INSUFFICIENT_REPORTED = 0x2,
267 static void mlx5_pcie_event(struct work_struct *work)
269 u32 out[MLX5_ST_SZ_DW(mpein_reg)] = {0};
270 u32 in[MLX5_ST_SZ_DW(mpein_reg)] = {0};
271 struct mlx5_events *events;
272 struct mlx5_core_dev *dev;
276 events = container_of(work, struct mlx5_events, pcie_core_work);
279 if (!MLX5_CAP_MCAM_FEATURE(dev, pci_status_and_power))
282 mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
283 MLX5_REG_MPEIN, 0, 0);
284 power_status = MLX5_GET(mpein_reg, out, pwr_status);
285 pci_power = MLX5_GET(mpein_reg, out, pci_power);
287 switch (power_status) {
288 case MLX5_PCI_POWER_COULD_NOT_BE_READ:
289 mlx5_core_info_rl(dev,
290 "PCIe slot power capability was not advertised.\n");
292 case MLX5_PCI_POWER_INSUFFICIENT_REPORTED:
293 mlx5_core_warn_rl(dev,
294 "Detected insufficient power on the PCIe slot (%uW).\n",
297 case MLX5_PCI_POWER_SUFFICIENT_REPORTED:
298 mlx5_core_info_rl(dev,
299 "PCIe slot advertised sufficient power (%uW).\n",
305 static int pcie_core(struct notifier_block *nb, unsigned long type, void *data)
307 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb,
308 struct mlx5_event_nb,
310 struct mlx5_events *events = event_nb->ctx;
311 struct mlx5_eqe *eqe = data;
313 switch (eqe->sub_type) {
314 case MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT:
315 queue_work(events->wq, &events->pcie_core_work);
324 void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats)
326 *stats = dev->priv.events->pme_stats;
329 /* forward event as is to registered interfaces (mlx5e/mlx5_ib) */
330 static int forward_event(struct notifier_block *nb, unsigned long event, void *data)
332 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
333 struct mlx5_events *events = event_nb->ctx;
334 struct mlx5_eqe *eqe = data;
336 mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n",
337 eqe_type_str(eqe->type), eqe->sub_type);
338 atomic_notifier_call_chain(&events->fw_nh, event, data);
342 int mlx5_events_init(struct mlx5_core_dev *dev)
344 struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL);
349 ATOMIC_INIT_NOTIFIER_HEAD(&events->fw_nh);
351 dev->priv.events = events;
352 events->wq = create_singlethread_workqueue("mlx5_events");
357 INIT_WORK(&events->pcie_core_work, mlx5_pcie_event);
358 BLOCKING_INIT_NOTIFIER_HEAD(&events->sw_nh);
363 void mlx5_events_cleanup(struct mlx5_core_dev *dev)
365 destroy_workqueue(dev->priv.events->wq);
366 kvfree(dev->priv.events);
369 void mlx5_events_start(struct mlx5_core_dev *dev)
371 struct mlx5_events *events = dev->priv.events;
374 for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) {
375 events->notifiers[i].nb = events_nbs_ref[i];
376 events->notifiers[i].ctx = events;
377 mlx5_eq_notifier_register(dev, &events->notifiers[i].nb);
381 void mlx5_events_stop(struct mlx5_core_dev *dev)
383 struct mlx5_events *events = dev->priv.events;
386 for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--)
387 mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb);
388 flush_workqueue(events->wq);
391 /* This API is used only for processing and forwarding firmware
392 * events to mlx5 consumer.
394 int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
396 struct mlx5_events *events = dev->priv.events;
398 return atomic_notifier_chain_register(&events->fw_nh, nb);
400 EXPORT_SYMBOL(mlx5_notifier_register);
402 int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
404 struct mlx5_events *events = dev->priv.events;
406 return atomic_notifier_chain_unregister(&events->fw_nh, nb);
408 EXPORT_SYMBOL(mlx5_notifier_unregister);
410 int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data)
412 return atomic_notifier_call_chain(&events->fw_nh, event, data);
415 /* This API is used only for processing and forwarding driver-specific
416 * events to mlx5 consumers.
418 int mlx5_blocking_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
420 struct mlx5_events *events = dev->priv.events;
422 return blocking_notifier_chain_register(&events->sw_nh, nb);
425 int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
427 struct mlx5_events *events = dev->priv.events;
429 return blocking_notifier_chain_unregister(&events->sw_nh, nb);
432 int mlx5_blocking_notifier_call_chain(struct mlx5_core_dev *dev, unsigned int event,
435 struct mlx5_events *events = dev->priv.events;
437 return blocking_notifier_call_chain(&events->sw_nh, event, data);
440 void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work)
442 queue_work(dev->priv.events->wq, work);