net/mlx5e: Add CONFIG_MLX5_EN_ARFS for accelerated flow steering support
[sfrench/cifs-2.6.git] / drivers / net / ethernet / mellanox / mlx5 / core / eq.c
1 /*
2  * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #include <linux/interrupt.h>
34 #include <linux/module.h>
35 #include <linux/mlx5/driver.h>
36 #include <linux/mlx5/cmd.h>
37 #ifdef CONFIG_RFS_ACCEL
38 #include <linux/cpu_rmap.h>
39 #endif
40 #include "mlx5_core.h"
41 #include "fpga/core.h"
42 #include "eswitch.h"
43 #include "diag/fw_tracer.h"
44
45 enum {
46         MLX5_EQE_SIZE           = sizeof(struct mlx5_eqe),
47         MLX5_EQE_OWNER_INIT_VAL = 0x1,
48 };
49
50 enum {
51         MLX5_EQ_STATE_ARMED             = 0x9,
52         MLX5_EQ_STATE_FIRED             = 0xa,
53         MLX5_EQ_STATE_ALWAYS_ARMED      = 0xb,
54 };
55
56 enum {
57         MLX5_NUM_SPARE_EQE      = 0x80,
58         MLX5_NUM_ASYNC_EQE      = 0x1000,
59         MLX5_NUM_CMD_EQE        = 32,
60         MLX5_NUM_PF_DRAIN       = 64,
61 };
62
63 enum {
64         MLX5_EQ_DOORBEL_OFFSET  = 0x40,
65 };
66
67 #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG)           | \
68                                (1ull << MLX5_EVENT_TYPE_COMM_EST)           | \
69                                (1ull << MLX5_EVENT_TYPE_SQ_DRAINED)         | \
70                                (1ull << MLX5_EVENT_TYPE_CQ_ERROR)           | \
71                                (1ull << MLX5_EVENT_TYPE_WQ_CATAS_ERROR)     | \
72                                (1ull << MLX5_EVENT_TYPE_PATH_MIG_FAILED)    | \
73                                (1ull << MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
74                                (1ull << MLX5_EVENT_TYPE_WQ_ACCESS_ERROR)    | \
75                                (1ull << MLX5_EVENT_TYPE_PORT_CHANGE)        | \
76                                (1ull << MLX5_EVENT_TYPE_SRQ_CATAS_ERROR)    | \
77                                (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE)       | \
78                                (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT))
79
80 struct map_eq_in {
81         u64     mask;
82         u32     reserved;
83         u32     unmap_eqn;
84 };
85
86 struct cre_des_eq {
87         u8      reserved[15];
88         u8      eqn;
89 };
90
91 static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
92 {
93         u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0};
94         u32 in[MLX5_ST_SZ_DW(destroy_eq_in)]   = {0};
95
96         MLX5_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ);
97         MLX5_SET(destroy_eq_in, in, eq_number, eqn);
98         return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
99 }
100
101 static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
102 {
103         return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE);
104 }
105
106 static struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
107 {
108         struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1));
109
110         return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe;
111 }
112
113 static const char *eqe_type_str(u8 type)
114 {
115         switch (type) {
116         case MLX5_EVENT_TYPE_COMP:
117                 return "MLX5_EVENT_TYPE_COMP";
118         case MLX5_EVENT_TYPE_PATH_MIG:
119                 return "MLX5_EVENT_TYPE_PATH_MIG";
120         case MLX5_EVENT_TYPE_COMM_EST:
121                 return "MLX5_EVENT_TYPE_COMM_EST";
122         case MLX5_EVENT_TYPE_SQ_DRAINED:
123                 return "MLX5_EVENT_TYPE_SQ_DRAINED";
124         case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
125                 return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
126         case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
127                 return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
128         case MLX5_EVENT_TYPE_CQ_ERROR:
129                 return "MLX5_EVENT_TYPE_CQ_ERROR";
130         case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
131                 return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
132         case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
133                 return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
134         case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
135                 return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
136         case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
137                 return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
138         case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
139                 return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
140         case MLX5_EVENT_TYPE_INTERNAL_ERROR:
141                 return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
142         case MLX5_EVENT_TYPE_PORT_CHANGE:
143                 return "MLX5_EVENT_TYPE_PORT_CHANGE";
144         case MLX5_EVENT_TYPE_GPIO_EVENT:
145                 return "MLX5_EVENT_TYPE_GPIO_EVENT";
146         case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
147                 return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
148         case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
149                 return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
150         case MLX5_EVENT_TYPE_REMOTE_CONFIG:
151                 return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
152         case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
153                 return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
154         case MLX5_EVENT_TYPE_STALL_EVENT:
155                 return "MLX5_EVENT_TYPE_STALL_EVENT";
156         case MLX5_EVENT_TYPE_CMD:
157                 return "MLX5_EVENT_TYPE_CMD";
158         case MLX5_EVENT_TYPE_PAGE_REQUEST:
159                 return "MLX5_EVENT_TYPE_PAGE_REQUEST";
160         case MLX5_EVENT_TYPE_PAGE_FAULT:
161                 return "MLX5_EVENT_TYPE_PAGE_FAULT";
162         case MLX5_EVENT_TYPE_PPS_EVENT:
163                 return "MLX5_EVENT_TYPE_PPS_EVENT";
164         case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
165                 return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
166         case MLX5_EVENT_TYPE_FPGA_ERROR:
167                 return "MLX5_EVENT_TYPE_FPGA_ERROR";
168         case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
169                 return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
170         case MLX5_EVENT_TYPE_GENERAL_EVENT:
171                 return "MLX5_EVENT_TYPE_GENERAL_EVENT";
172         case MLX5_EVENT_TYPE_DEVICE_TRACER:
173                 return "MLX5_EVENT_TYPE_DEVICE_TRACER";
174         default:
175                 return "Unrecognized event";
176         }
177 }
178
179 static enum mlx5_dev_event port_subtype_event(u8 subtype)
180 {
181         switch (subtype) {
182         case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
183                 return MLX5_DEV_EVENT_PORT_DOWN;
184         case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
185                 return MLX5_DEV_EVENT_PORT_UP;
186         case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
187                 return MLX5_DEV_EVENT_PORT_INITIALIZED;
188         case MLX5_PORT_CHANGE_SUBTYPE_LID:
189                 return MLX5_DEV_EVENT_LID_CHANGE;
190         case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
191                 return MLX5_DEV_EVENT_PKEY_CHANGE;
192         case MLX5_PORT_CHANGE_SUBTYPE_GUID:
193                 return MLX5_DEV_EVENT_GUID_CHANGE;
194         case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
195                 return MLX5_DEV_EVENT_CLIENT_REREG;
196         }
197         return -1;
198 }
199
200 static void eq_update_ci(struct mlx5_eq *eq, int arm)
201 {
202         __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
203         u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
204
205         __raw_writel((__force u32)cpu_to_be32(val), addr);
206         /* We still want ordering, just not swabbing, so add a barrier */
207         mb();
208 }
209
210 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
211 static void eqe_pf_action(struct work_struct *work)
212 {
213         struct mlx5_pagefault *pfault = container_of(work,
214                                                      struct mlx5_pagefault,
215                                                      work);
216         struct mlx5_eq *eq = pfault->eq;
217
218         mlx5_core_page_fault(eq->dev, pfault);
219         mempool_free(pfault, eq->pf_ctx.pool);
220 }
221
222 static void eq_pf_process(struct mlx5_eq *eq)
223 {
224         struct mlx5_core_dev *dev = eq->dev;
225         struct mlx5_eqe_page_fault *pf_eqe;
226         struct mlx5_pagefault *pfault;
227         struct mlx5_eqe *eqe;
228         int set_ci = 0;
229
230         while ((eqe = next_eqe_sw(eq))) {
231                 pfault = mempool_alloc(eq->pf_ctx.pool, GFP_ATOMIC);
232                 if (!pfault) {
233                         schedule_work(&eq->pf_ctx.work);
234                         break;
235                 }
236
237                 dma_rmb();
238                 pf_eqe = &eqe->data.page_fault;
239                 pfault->event_subtype = eqe->sub_type;
240                 pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed);
241
242                 mlx5_core_dbg(dev,
243                               "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n",
244                               eqe->sub_type, pfault->bytes_committed);
245
246                 switch (eqe->sub_type) {
247                 case MLX5_PFAULT_SUBTYPE_RDMA:
248                         /* RDMA based event */
249                         pfault->type =
250                                 be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24;
251                         pfault->token =
252                                 be32_to_cpu(pf_eqe->rdma.pftype_token) &
253                                 MLX5_24BIT_MASK;
254                         pfault->rdma.r_key =
255                                 be32_to_cpu(pf_eqe->rdma.r_key);
256                         pfault->rdma.packet_size =
257                                 be16_to_cpu(pf_eqe->rdma.packet_length);
258                         pfault->rdma.rdma_op_len =
259                                 be32_to_cpu(pf_eqe->rdma.rdma_op_len);
260                         pfault->rdma.rdma_va =
261                                 be64_to_cpu(pf_eqe->rdma.rdma_va);
262                         mlx5_core_dbg(dev,
263                                       "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n",
264                                       pfault->type, pfault->token,
265                                       pfault->rdma.r_key);
266                         mlx5_core_dbg(dev,
267                                       "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n",
268                                       pfault->rdma.rdma_op_len,
269                                       pfault->rdma.rdma_va);
270                         break;
271
272                 case MLX5_PFAULT_SUBTYPE_WQE:
273                         /* WQE based event */
274                         pfault->type =
275                                 be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24;
276                         pfault->token =
277                                 be32_to_cpu(pf_eqe->wqe.token);
278                         pfault->wqe.wq_num =
279                                 be32_to_cpu(pf_eqe->wqe.pftype_wq) &
280                                 MLX5_24BIT_MASK;
281                         pfault->wqe.wqe_index =
282                                 be16_to_cpu(pf_eqe->wqe.wqe_index);
283                         pfault->wqe.packet_size =
284                                 be16_to_cpu(pf_eqe->wqe.packet_length);
285                         mlx5_core_dbg(dev,
286                                       "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n",
287                                       pfault->type, pfault->token,
288                                       pfault->wqe.wq_num,
289                                       pfault->wqe.wqe_index);
290                         break;
291
292                 default:
293                         mlx5_core_warn(dev,
294                                        "Unsupported page fault event sub-type: 0x%02hhx\n",
295                                        eqe->sub_type);
296                         /* Unsupported page faults should still be
297                          * resolved by the page fault handler
298                          */
299                 }
300
301                 pfault->eq = eq;
302                 INIT_WORK(&pfault->work, eqe_pf_action);
303                 queue_work(eq->pf_ctx.wq, &pfault->work);
304
305                 ++eq->cons_index;
306                 ++set_ci;
307
308                 if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
309                         eq_update_ci(eq, 0);
310                         set_ci = 0;
311                 }
312         }
313
314         eq_update_ci(eq, 1);
315 }
316
317 static irqreturn_t mlx5_eq_pf_int(int irq, void *eq_ptr)
318 {
319         struct mlx5_eq *eq = eq_ptr;
320         unsigned long flags;
321
322         if (spin_trylock_irqsave(&eq->pf_ctx.lock, flags)) {
323                 eq_pf_process(eq);
324                 spin_unlock_irqrestore(&eq->pf_ctx.lock, flags);
325         } else {
326                 schedule_work(&eq->pf_ctx.work);
327         }
328
329         return IRQ_HANDLED;
330 }
331
332 /* mempool_refill() was proposed but unfortunately wasn't accepted
333  * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html
334  * Chip workaround.
335  */
336 static void mempool_refill(mempool_t *pool)
337 {
338         while (pool->curr_nr < pool->min_nr)
339                 mempool_free(mempool_alloc(pool, GFP_KERNEL), pool);
340 }
341
342 static void eq_pf_action(struct work_struct *work)
343 {
344         struct mlx5_eq *eq = container_of(work, struct mlx5_eq, pf_ctx.work);
345
346         mempool_refill(eq->pf_ctx.pool);
347
348         spin_lock_irq(&eq->pf_ctx.lock);
349         eq_pf_process(eq);
350         spin_unlock_irq(&eq->pf_ctx.lock);
351 }
352
353 static int init_pf_ctx(struct mlx5_eq_pagefault *pf_ctx, const char *name)
354 {
355         spin_lock_init(&pf_ctx->lock);
356         INIT_WORK(&pf_ctx->work, eq_pf_action);
357
358         pf_ctx->wq = alloc_ordered_workqueue(name,
359                                              WQ_MEM_RECLAIM);
360         if (!pf_ctx->wq)
361                 return -ENOMEM;
362
363         pf_ctx->pool = mempool_create_kmalloc_pool
364                 (MLX5_NUM_PF_DRAIN, sizeof(struct mlx5_pagefault));
365         if (!pf_ctx->pool)
366                 goto err_wq;
367
368         return 0;
369 err_wq:
370         destroy_workqueue(pf_ctx->wq);
371         return -ENOMEM;
372 }
373
374 int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
375                                 u32 wq_num, u8 type, int error)
376 {
377         u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0};
378         u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)]   = {0};
379
380         MLX5_SET(page_fault_resume_in, in, opcode,
381                  MLX5_CMD_OP_PAGE_FAULT_RESUME);
382         MLX5_SET(page_fault_resume_in, in, error, !!error);
383         MLX5_SET(page_fault_resume_in, in, page_fault_type, type);
384         MLX5_SET(page_fault_resume_in, in, wq_number, wq_num);
385         MLX5_SET(page_fault_resume_in, in, token, token);
386
387         return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
388 }
389 EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
390 #endif
391
392 static void general_event_handler(struct mlx5_core_dev *dev,
393                                   struct mlx5_eqe *eqe)
394 {
395         switch (eqe->sub_type) {
396         case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT:
397                 if (dev->event)
398                         dev->event(dev, MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT, 0);
399                 break;
400         default:
401                 mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n",
402                               eqe->sub_type);
403         }
404 }
405
406 static void mlx5_temp_warning_event(struct mlx5_core_dev *dev,
407                                     struct mlx5_eqe *eqe)
408 {
409         u64 value_lsb;
410         u64 value_msb;
411
412         value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
413         value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
414
415         mlx5_core_warn(dev,
416                        "High temperature on sensors with bit set %llx %llx",
417                        value_msb, value_lsb);
418 }
419
420 /* caller must eventually call mlx5_cq_put on the returned cq */
421 static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
422 {
423         struct mlx5_cq_table *table = &eq->cq_table;
424         struct mlx5_core_cq *cq = NULL;
425
426         spin_lock(&table->lock);
427         cq = radix_tree_lookup(&table->tree, cqn);
428         if (likely(cq))
429                 mlx5_cq_hold(cq);
430         spin_unlock(&table->lock);
431
432         return cq;
433 }
434
435 static void mlx5_eq_cq_completion(struct mlx5_eq *eq, u32 cqn)
436 {
437         struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
438
439         if (unlikely(!cq)) {
440                 mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
441                 return;
442         }
443
444         ++cq->arm_sn;
445
446         cq->comp(cq);
447
448         mlx5_cq_put(cq);
449 }
450
451 static void mlx5_eq_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type)
452 {
453         struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
454
455         if (unlikely(!cq)) {
456                 mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
457                 return;
458         }
459
460         cq->event(cq, event_type);
461
462         mlx5_cq_put(cq);
463 }
464
465 static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
466 {
467         struct mlx5_eq *eq = eq_ptr;
468         struct mlx5_core_dev *dev = eq->dev;
469         struct mlx5_eqe *eqe;
470         int set_ci = 0;
471         u32 cqn = -1;
472         u32 rsn;
473         u8 port;
474
475         while ((eqe = next_eqe_sw(eq))) {
476                 /*
477                  * Make sure we read EQ entry contents after we've
478                  * checked the ownership bit.
479                  */
480                 dma_rmb();
481
482                 mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n",
483                               eq->eqn, eqe_type_str(eqe->type));
484                 switch (eqe->type) {
485                 case MLX5_EVENT_TYPE_COMP:
486                         cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
487                         mlx5_eq_cq_completion(eq, cqn);
488                         break;
489                 case MLX5_EVENT_TYPE_DCT_DRAINED:
490                         rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
491                         rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN);
492                         mlx5_rsc_event(dev, rsn, eqe->type);
493                         break;
494                 case MLX5_EVENT_TYPE_PATH_MIG:
495                 case MLX5_EVENT_TYPE_COMM_EST:
496                 case MLX5_EVENT_TYPE_SQ_DRAINED:
497                 case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
498                 case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
499                 case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
500                 case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
501                 case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
502                         rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
503                         rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
504                         mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n",
505                                       eqe_type_str(eqe->type), eqe->type, rsn);
506                         mlx5_rsc_event(dev, rsn, eqe->type);
507                         break;
508
509                 case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
510                 case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
511                         rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
512                         mlx5_core_dbg(dev, "SRQ event %s(%d): srqn 0x%x\n",
513                                       eqe_type_str(eqe->type), eqe->type, rsn);
514                         mlx5_srq_event(dev, rsn, eqe->type);
515                         break;
516
517                 case MLX5_EVENT_TYPE_CMD:
518                         mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
519                         break;
520
521                 case MLX5_EVENT_TYPE_PORT_CHANGE:
522                         port = (eqe->data.port.port >> 4) & 0xf;
523                         switch (eqe->sub_type) {
524                         case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
525                         case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
526                         case MLX5_PORT_CHANGE_SUBTYPE_LID:
527                         case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
528                         case MLX5_PORT_CHANGE_SUBTYPE_GUID:
529                         case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
530                         case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
531                                 if (dev->event)
532                                         dev->event(dev, port_subtype_event(eqe->sub_type),
533                                                    (unsigned long)port);
534                                 break;
535                         default:
536                                 mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n",
537                                                port, eqe->sub_type);
538                         }
539                         break;
540                 case MLX5_EVENT_TYPE_CQ_ERROR:
541                         cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
542                         mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
543                                        cqn, eqe->data.cq_err.syndrome);
544                         mlx5_eq_cq_event(eq, cqn, eqe->type);
545                         break;
546
547                 case MLX5_EVENT_TYPE_PAGE_REQUEST:
548                         {
549                                 u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id);
550                                 s32 npages = be32_to_cpu(eqe->data.req_pages.num_pages);
551
552                                 mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n",
553                                               func_id, npages);
554                                 mlx5_core_req_pages_handler(dev, func_id, npages);
555                         }
556                         break;
557
558                 case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
559                         mlx5_eswitch_vport_event(dev->priv.eswitch, eqe);
560                         break;
561
562                 case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
563                         mlx5_port_module_event(dev, eqe);
564                         break;
565
566                 case MLX5_EVENT_TYPE_PPS_EVENT:
567                         mlx5_pps_event(dev, eqe);
568                         break;
569
570                 case MLX5_EVENT_TYPE_FPGA_ERROR:
571                 case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
572                         mlx5_fpga_event(dev, eqe->type, &eqe->data.raw);
573                         break;
574
575                 case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
576                         mlx5_temp_warning_event(dev, eqe);
577                         break;
578
579                 case MLX5_EVENT_TYPE_GENERAL_EVENT:
580                         general_event_handler(dev, eqe);
581                         break;
582
583                 case MLX5_EVENT_TYPE_DEVICE_TRACER:
584                         mlx5_fw_tracer_event(dev, eqe);
585                         break;
586
587                 default:
588                         mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
589                                        eqe->type, eq->eqn);
590                         break;
591                 }
592
593                 ++eq->cons_index;
594                 ++set_ci;
595
596                 /* The HCA will think the queue has overflowed if we
597                  * don't tell it we've been processing events.  We
598                  * create our EQs with MLX5_NUM_SPARE_EQE extra
599                  * entries, so we must update our consumer index at
600                  * least that often.
601                  */
602                 if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
603                         eq_update_ci(eq, 0);
604                         set_ci = 0;
605                 }
606         }
607
608         eq_update_ci(eq, 1);
609
610         if (cqn != -1)
611                 tasklet_schedule(&eq->tasklet_ctx.task);
612
613         return IRQ_HANDLED;
614 }
615
616 /* Some architectures don't latch interrupts when they are disabled, so using
617  * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to
618  * avoid losing them.  It is not recommended to use it, unless this is the last
619  * resort.
620  */
621 u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq)
622 {
623         u32 count_eqe;
624
625         disable_irq(eq->irqn);
626         count_eqe = eq->cons_index;
627         mlx5_eq_int(eq->irqn, eq);
628         count_eqe = eq->cons_index - count_eqe;
629         enable_irq(eq->irqn);
630
631         return count_eqe;
632 }
633
634 static void init_eq_buf(struct mlx5_eq *eq)
635 {
636         struct mlx5_eqe *eqe;
637         int i;
638
639         for (i = 0; i < eq->nent; i++) {
640                 eqe = get_eqe(eq, i);
641                 eqe->owner = MLX5_EQE_OWNER_INIT_VAL;
642         }
643 }
644
645 int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
646                        int nent, u64 mask, const char *name,
647                        enum mlx5_eq_type type)
648 {
649         struct mlx5_cq_table *cq_table = &eq->cq_table;
650         u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
651         struct mlx5_priv *priv = &dev->priv;
652         irq_handler_t handler;
653         __be64 *pas;
654         void *eqc;
655         int inlen;
656         u32 *in;
657         int err;
658
659         /* Init CQ table */
660         memset(cq_table, 0, sizeof(*cq_table));
661         spin_lock_init(&cq_table->lock);
662         INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
663
664         eq->type = type;
665         eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
666         eq->cons_index = 0;
667         err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf);
668         if (err)
669                 return err;
670
671 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
672         if (type == MLX5_EQ_TYPE_PF)
673                 handler = mlx5_eq_pf_int;
674         else
675 #endif
676                 handler = mlx5_eq_int;
677
678         init_eq_buf(eq);
679
680         inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
681                 MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->buf.npages;
682
683         in = kvzalloc(inlen, GFP_KERNEL);
684         if (!in) {
685                 err = -ENOMEM;
686                 goto err_buf;
687         }
688
689         pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas);
690         mlx5_fill_page_array(&eq->buf, pas);
691
692         MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ);
693         MLX5_SET64(create_eq_in, in, event_bitmask, mask);
694
695         eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry);
696         MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent));
697         MLX5_SET(eqc, eqc, uar_page, priv->uar->index);
698         MLX5_SET(eqc, eqc, intr, vecidx);
699         MLX5_SET(eqc, eqc, log_page_size,
700                  eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
701
702         err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
703         if (err)
704                 goto err_in;
705
706         snprintf(priv->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s",
707                  name, pci_name(dev->pdev));
708
709         eq->eqn = MLX5_GET(create_eq_out, out, eq_number);
710         eq->irqn = pci_irq_vector(dev->pdev, vecidx);
711         eq->dev = dev;
712         eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET;
713         err = request_irq(eq->irqn, handler, 0,
714                           priv->irq_info[vecidx].name, eq);
715         if (err)
716                 goto err_eq;
717
718         err = mlx5_debug_eq_add(dev, eq);
719         if (err)
720                 goto err_irq;
721
722 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
723         if (type == MLX5_EQ_TYPE_PF) {
724                 err = init_pf_ctx(&eq->pf_ctx, name);
725                 if (err)
726                         goto err_irq;
727         } else
728 #endif
729         {
730                 INIT_LIST_HEAD(&eq->tasklet_ctx.list);
731                 INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
732                 spin_lock_init(&eq->tasklet_ctx.lock);
733                 tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
734                              (unsigned long)&eq->tasklet_ctx);
735         }
736
737         /* EQs are created in ARMED state
738          */
739         eq_update_ci(eq, 1);
740
741         kvfree(in);
742         return 0;
743
744 err_irq:
745         free_irq(eq->irqn, eq);
746
747 err_eq:
748         mlx5_cmd_destroy_eq(dev, eq->eqn);
749
750 err_in:
751         kvfree(in);
752
753 err_buf:
754         mlx5_buf_free(dev, &eq->buf);
755         return err;
756 }
757
758 int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
759 {
760         int err;
761
762         mlx5_debug_eq_remove(dev, eq);
763         free_irq(eq->irqn, eq);
764         err = mlx5_cmd_destroy_eq(dev, eq->eqn);
765         if (err)
766                 mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
767                                eq->eqn);
768         synchronize_irq(eq->irqn);
769
770         if (eq->type == MLX5_EQ_TYPE_COMP) {
771                 tasklet_disable(&eq->tasklet_ctx.task);
772 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
773         } else if (eq->type == MLX5_EQ_TYPE_PF) {
774                 cancel_work_sync(&eq->pf_ctx.work);
775                 destroy_workqueue(eq->pf_ctx.wq);
776                 mempool_destroy(eq->pf_ctx.pool);
777 #endif
778         }
779         mlx5_buf_free(dev, &eq->buf);
780
781         return err;
782 }
783
784 int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
785 {
786         struct mlx5_cq_table *table = &eq->cq_table;
787         int err;
788
789         spin_lock_irq(&table->lock);
790         err = radix_tree_insert(&table->tree, cq->cqn, cq);
791         spin_unlock_irq(&table->lock);
792
793         return err;
794 }
795
796 int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
797 {
798         struct mlx5_cq_table *table = &eq->cq_table;
799         struct mlx5_core_cq *tmp;
800
801         spin_lock_irq(&table->lock);
802         tmp = radix_tree_delete(&table->tree, cq->cqn);
803         spin_unlock_irq(&table->lock);
804
805         if (!tmp) {
806                 mlx5_core_warn(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", eq->eqn, cq->cqn);
807                 return -ENOENT;
808         }
809
810         if (tmp != cq) {
811                 mlx5_core_warn(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n", eq->eqn, cq->cqn);
812                 return -EINVAL;
813         }
814
815         return 0;
816 }
817
818 int mlx5_eq_init(struct mlx5_core_dev *dev)
819 {
820         int err;
821
822         spin_lock_init(&dev->priv.eq_table.lock);
823
824         err = mlx5_eq_debugfs_init(dev);
825
826         return err;
827 }
828
829 void mlx5_eq_cleanup(struct mlx5_core_dev *dev)
830 {
831         mlx5_eq_debugfs_cleanup(dev);
832 }
833
834 int mlx5_start_eqs(struct mlx5_core_dev *dev)
835 {
836         struct mlx5_eq_table *table = &dev->priv.eq_table;
837         u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
838         int err;
839
840         if (MLX5_VPORT_MANAGER(dev))
841                 async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE);
842
843         if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH &&
844             MLX5_CAP_GEN(dev, general_notification_event))
845                 async_event_mask |= (1ull << MLX5_EVENT_TYPE_GENERAL_EVENT);
846
847         if (MLX5_CAP_GEN(dev, port_module_event))
848                 async_event_mask |= (1ull << MLX5_EVENT_TYPE_PORT_MODULE_EVENT);
849         else
850                 mlx5_core_dbg(dev, "port_module_event is not set\n");
851
852         if (MLX5_PPS_CAP(dev))
853                 async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT);
854
855         if (MLX5_CAP_GEN(dev, fpga))
856                 async_event_mask |= (1ull << MLX5_EVENT_TYPE_FPGA_ERROR) |
857                                     (1ull << MLX5_EVENT_TYPE_FPGA_QP_ERROR);
858         if (MLX5_CAP_GEN_MAX(dev, dct))
859                 async_event_mask |= (1ull << MLX5_EVENT_TYPE_DCT_DRAINED);
860
861         if (MLX5_CAP_GEN(dev, temp_warn_event))
862                 async_event_mask |= (1ull << MLX5_EVENT_TYPE_TEMP_WARN_EVENT);
863
864         if (MLX5_CAP_MCAM_REG(dev, tracer_registers))
865                 async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER);
866
867         err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
868                                  MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
869                                  "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC);
870         if (err) {
871                 mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err);
872                 return err;
873         }
874
875         mlx5_cmd_use_events(dev);
876
877         err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC,
878                                  MLX5_NUM_ASYNC_EQE, async_event_mask,
879                                  "mlx5_async_eq", MLX5_EQ_TYPE_ASYNC);
880         if (err) {
881                 mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
882                 goto err1;
883         }
884
885         err = mlx5_create_map_eq(dev, &table->pages_eq,
886                                  MLX5_EQ_VEC_PAGES,
887                                  /* TODO: sriov max_vf + */ 1,
888                                  1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq",
889                                  MLX5_EQ_TYPE_ASYNC);
890         if (err) {
891                 mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
892                 goto err2;
893         }
894
895 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
896         if (MLX5_CAP_GEN(dev, pg)) {
897                 err = mlx5_create_map_eq(dev, &table->pfault_eq,
898                                          MLX5_EQ_VEC_PFAULT,
899                                          MLX5_NUM_ASYNC_EQE,
900                                          1 << MLX5_EVENT_TYPE_PAGE_FAULT,
901                                          "mlx5_page_fault_eq",
902                                          MLX5_EQ_TYPE_PF);
903                 if (err) {
904                         mlx5_core_warn(dev, "failed to create page fault EQ %d\n",
905                                        err);
906                         goto err3;
907                 }
908         }
909
910         return err;
911 err3:
912         mlx5_destroy_unmap_eq(dev, &table->pages_eq);
913 #else
914         return err;
915 #endif
916
917 err2:
918         mlx5_destroy_unmap_eq(dev, &table->async_eq);
919
920 err1:
921         mlx5_cmd_use_polling(dev);
922         mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
923         return err;
924 }
925
926 void mlx5_stop_eqs(struct mlx5_core_dev *dev)
927 {
928         struct mlx5_eq_table *table = &dev->priv.eq_table;
929         int err;
930
931 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
932         if (MLX5_CAP_GEN(dev, pg)) {
933                 err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq);
934                 if (err)
935                         mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n",
936                                       err);
937         }
938 #endif
939
940         err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
941         if (err)
942                 mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
943                               err);
944
945         err = mlx5_destroy_unmap_eq(dev, &table->async_eq);
946         if (err)
947                 mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
948                               err);
949         mlx5_cmd_use_polling(dev);
950
951         err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
952         if (err)
953                 mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
954                               err);
955 }
956
957 int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
958                        u32 *out, int outlen)
959 {
960         u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {0};
961
962         MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ);
963         MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
964         return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
965 }
966
967 /* This function should only be called after mlx5_cmd_force_teardown_hca */
968 void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
969 {
970         struct mlx5_eq_table *table = &dev->priv.eq_table;
971         struct mlx5_eq *eq;
972
973 #ifdef CONFIG_RFS_ACCEL
974         if (dev->rmap) {
975                 free_irq_cpu_rmap(dev->rmap);
976                 dev->rmap = NULL;
977         }
978 #endif
979         list_for_each_entry(eq, &table->comp_eqs_list, list)
980                 free_irq(eq->irqn, eq);
981
982         free_irq(table->pages_eq.irqn, &table->pages_eq);
983         free_irq(table->async_eq.irqn, &table->async_eq);
984         free_irq(table->cmd_eq.irqn, &table->cmd_eq);
985 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
986         if (MLX5_CAP_GEN(dev, pg))
987                 free_irq(table->pfault_eq.irqn, &table->pfault_eq);
988 #endif
989         pci_free_irq_vectors(dev->pdev);
990 }