7c74abeee864cc39758c86b2e2b956a5c08adb6f
[sfrench/cifs-2.6.git] / drivers / infiniband / hw / erdma / erdma_main.c
1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2
3 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
4 /*          Kai Shen <kaishen@linux.alibaba.com> */
5 /* Copyright (c) 2020-2022, Alibaba Group. */
6
7 #include <linux/module.h>
8 #include <net/addrconf.h>
9 #include <rdma/erdma-abi.h>
10
11 #include "erdma.h"
12 #include "erdma_cm.h"
13 #include "erdma_verbs.h"
14
15 MODULE_AUTHOR("Cheng Xu <chengyou@linux.alibaba.com>");
16 MODULE_DESCRIPTION("Alibaba elasticRDMA adapter driver");
17 MODULE_LICENSE("Dual BSD/GPL");
18
19 static int erdma_netdev_event(struct notifier_block *nb, unsigned long event,
20                               void *arg)
21 {
22         struct net_device *netdev = netdev_notifier_info_to_dev(arg);
23         struct erdma_dev *dev = container_of(nb, struct erdma_dev, netdev_nb);
24
25         if (dev->netdev == NULL || dev->netdev != netdev)
26                 goto done;
27
28         switch (event) {
29         case NETDEV_UP:
30                 dev->state = IB_PORT_ACTIVE;
31                 erdma_port_event(dev, IB_EVENT_PORT_ACTIVE);
32                 break;
33         case NETDEV_DOWN:
34                 dev->state = IB_PORT_DOWN;
35                 erdma_port_event(dev, IB_EVENT_PORT_ERR);
36                 break;
37         case NETDEV_CHANGEMTU:
38                 if (dev->mtu != netdev->mtu) {
39                         erdma_set_mtu(dev, netdev->mtu);
40                         dev->mtu = netdev->mtu;
41                 }
42                 break;
43         case NETDEV_REGISTER:
44         case NETDEV_UNREGISTER:
45         case NETDEV_CHANGEADDR:
46         case NETDEV_GOING_DOWN:
47         case NETDEV_CHANGE:
48         default:
49                 break;
50         }
51
52 done:
53         return NOTIFY_OK;
54 }
55
56 static int erdma_enum_and_get_netdev(struct erdma_dev *dev)
57 {
58         struct net_device *netdev;
59         int ret = -EPROBE_DEFER;
60
61         /* Already binded to a net_device, so we skip. */
62         if (dev->netdev)
63                 return 0;
64
65         rtnl_lock();
66         for_each_netdev(&init_net, netdev) {
67                 /*
68                  * In erdma, the paired netdev and ibdev should have the same
69                  * MAC address. erdma can get the value from its PCIe bar
70                  * registers. Since erdma can not get the paired netdev
71                  * reference directly, we do a traverse here to get the paired
72                  * netdev.
73                  */
74                 if (ether_addr_equal_unaligned(netdev->perm_addr,
75                                                dev->attrs.peer_addr)) {
76                         ret = ib_device_set_netdev(&dev->ibdev, netdev, 1);
77                         if (ret) {
78                                 rtnl_unlock();
79                                 ibdev_warn(&dev->ibdev,
80                                            "failed (%d) to link netdev", ret);
81                                 return ret;
82                         }
83
84                         dev->netdev = netdev;
85                         break;
86                 }
87         }
88
89         rtnl_unlock();
90
91         return ret;
92 }
93
94 static int erdma_device_register(struct erdma_dev *dev)
95 {
96         struct ib_device *ibdev = &dev->ibdev;
97         int ret;
98
99         ret = erdma_enum_and_get_netdev(dev);
100         if (ret)
101                 return ret;
102
103         dev->mtu = dev->netdev->mtu;
104         addrconf_addr_eui48((u8 *)&ibdev->node_guid, dev->netdev->dev_addr);
105
106         ret = ib_register_device(ibdev, "erdma_%d", &dev->pdev->dev);
107         if (ret) {
108                 dev_err(&dev->pdev->dev,
109                         "ib_register_device failed: ret = %d\n", ret);
110                 return ret;
111         }
112
113         dev->netdev_nb.notifier_call = erdma_netdev_event;
114         ret = register_netdevice_notifier(&dev->netdev_nb);
115         if (ret) {
116                 ibdev_err(&dev->ibdev, "failed to register notifier.\n");
117                 ib_unregister_device(ibdev);
118         }
119
120         return ret;
121 }
122
123 static irqreturn_t erdma_comm_irq_handler(int irq, void *data)
124 {
125         struct erdma_dev *dev = data;
126
127         erdma_cmdq_completion_handler(&dev->cmdq);
128         erdma_aeq_event_handler(dev);
129
130         return IRQ_HANDLED;
131 }
132
133 static void erdma_dwqe_resource_init(struct erdma_dev *dev)
134 {
135         int total_pages, type0, type1;
136
137         dev->attrs.grp_num = erdma_reg_read32(dev, ERDMA_REGS_GRP_NUM_REG);
138
139         if (dev->attrs.grp_num < 4)
140                 dev->attrs.disable_dwqe = true;
141         else
142                 dev->attrs.disable_dwqe = false;
143
144         /* One page contains 4 goups. */
145         total_pages = dev->attrs.grp_num * 4;
146
147         if (dev->attrs.grp_num >= ERDMA_DWQE_MAX_GRP_CNT) {
148                 dev->attrs.grp_num = ERDMA_DWQE_MAX_GRP_CNT;
149                 type0 = ERDMA_DWQE_TYPE0_CNT;
150                 type1 = ERDMA_DWQE_TYPE1_CNT / ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
151         } else {
152                 type1 = total_pages / 3;
153                 type0 = total_pages - type1 - 1;
154         }
155
156         dev->attrs.dwqe_pages = type0;
157         dev->attrs.dwqe_entries = type1 * ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
158 }
159
160 static int erdma_request_vectors(struct erdma_dev *dev)
161 {
162         int expect_irq_num = min(num_possible_cpus() + 1, ERDMA_NUM_MSIX_VEC);
163         int ret;
164
165         ret = pci_alloc_irq_vectors(dev->pdev, 1, expect_irq_num, PCI_IRQ_MSIX);
166         if (ret < 0) {
167                 dev_err(&dev->pdev->dev, "request irq vectors failed(%d)\n",
168                         ret);
169                 return ret;
170         }
171         dev->attrs.irq_num = ret;
172
173         return 0;
174 }
175
176 static int erdma_comm_irq_init(struct erdma_dev *dev)
177 {
178         snprintf(dev->comm_irq.name, ERDMA_IRQNAME_SIZE, "erdma-common@pci:%s",
179                  pci_name(dev->pdev));
180         dev->comm_irq.msix_vector =
181                 pci_irq_vector(dev->pdev, ERDMA_MSIX_VECTOR_CMDQ);
182
183         cpumask_set_cpu(cpumask_first(cpumask_of_pcibus(dev->pdev->bus)),
184                         &dev->comm_irq.affinity_hint_mask);
185         irq_set_affinity_hint(dev->comm_irq.msix_vector,
186                               &dev->comm_irq.affinity_hint_mask);
187
188         return request_irq(dev->comm_irq.msix_vector, erdma_comm_irq_handler, 0,
189                            dev->comm_irq.name, dev);
190 }
191
192 static void erdma_comm_irq_uninit(struct erdma_dev *dev)
193 {
194         irq_set_affinity_hint(dev->comm_irq.msix_vector, NULL);
195         free_irq(dev->comm_irq.msix_vector, dev);
196 }
197
198 static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev)
199 {
200         int ret;
201
202         erdma_dwqe_resource_init(dev);
203
204         ret = dma_set_mask_and_coherent(&pdev->dev,
205                                         DMA_BIT_MASK(ERDMA_PCI_WIDTH));
206         if (ret)
207                 return ret;
208
209         dma_set_max_seg_size(&pdev->dev, UINT_MAX);
210
211         return 0;
212 }
213
214 static void erdma_hw_reset(struct erdma_dev *dev)
215 {
216         u32 ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_RESET_MASK, 1);
217
218         erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl);
219 }
220
221 static int erdma_wait_hw_init_done(struct erdma_dev *dev)
222 {
223         int i;
224
225         erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG,
226                           FIELD_PREP(ERDMA_REG_DEV_CTRL_INIT_MASK, 1));
227
228         for (i = 0; i < ERDMA_WAIT_DEV_DONE_CNT; i++) {
229                 if (erdma_reg_read32_filed(dev, ERDMA_REGS_DEV_ST_REG,
230                                            ERDMA_REG_DEV_ST_INIT_DONE_MASK))
231                         break;
232
233                 msleep(ERDMA_REG_ACCESS_WAIT_MS);
234         }
235
236         if (i == ERDMA_WAIT_DEV_DONE_CNT) {
237                 dev_err(&dev->pdev->dev, "wait init done failed.\n");
238                 return -ETIMEDOUT;
239         }
240
241         return 0;
242 }
243
244 static const struct pci_device_id erdma_pci_tbl[] = {
245         { PCI_DEVICE(PCI_VENDOR_ID_ALIBABA, 0x107f) },
246         {}
247 };
248
249 static int erdma_probe_dev(struct pci_dev *pdev)
250 {
251         struct erdma_dev *dev;
252         int bars, err;
253         u32 version;
254
255         err = pci_enable_device(pdev);
256         if (err) {
257                 dev_err(&pdev->dev, "pci_enable_device failed(%d)\n", err);
258                 return err;
259         }
260
261         pci_set_master(pdev);
262
263         dev = ib_alloc_device(erdma_dev, ibdev);
264         if (!dev) {
265                 dev_err(&pdev->dev, "ib_alloc_device failed\n");
266                 err = -ENOMEM;
267                 goto err_disable_device;
268         }
269
270         pci_set_drvdata(pdev, dev);
271         dev->pdev = pdev;
272         dev->attrs.numa_node = dev_to_node(&pdev->dev);
273
274         bars = pci_select_bars(pdev, IORESOURCE_MEM);
275         err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
276         if (bars != ERDMA_BAR_MASK || err) {
277                 err = err ? err : -EINVAL;
278                 goto err_ib_device_release;
279         }
280
281         dev->func_bar_addr = pci_resource_start(pdev, ERDMA_FUNC_BAR);
282         dev->func_bar_len = pci_resource_len(pdev, ERDMA_FUNC_BAR);
283
284         dev->func_bar =
285                 devm_ioremap(&pdev->dev, dev->func_bar_addr, dev->func_bar_len);
286         if (!dev->func_bar) {
287                 dev_err(&pdev->dev, "devm_ioremap failed.\n");
288                 err = -EFAULT;
289                 goto err_release_bars;
290         }
291
292         version = erdma_reg_read32(dev, ERDMA_REGS_VERSION_REG);
293         if (version == 0) {
294                 /* we knows that it is a non-functional function. */
295                 err = -ENODEV;
296                 goto err_iounmap_func_bar;
297         }
298
299         err = erdma_device_init(dev, pdev);
300         if (err)
301                 goto err_iounmap_func_bar;
302
303         err = erdma_request_vectors(dev);
304         if (err)
305                 goto err_iounmap_func_bar;
306
307         err = erdma_comm_irq_init(dev);
308         if (err)
309                 goto err_free_vectors;
310
311         err = erdma_aeq_init(dev);
312         if (err)
313                 goto err_uninit_comm_irq;
314
315         err = erdma_cmdq_init(dev);
316         if (err)
317                 goto err_uninit_aeq;
318
319         err = erdma_wait_hw_init_done(dev);
320         if (err)
321                 goto err_uninit_cmdq;
322
323         err = erdma_ceqs_init(dev);
324         if (err)
325                 goto err_reset_hw;
326
327         erdma_finish_cmdq_init(dev);
328
329         return 0;
330
331 err_reset_hw:
332         erdma_hw_reset(dev);
333
334 err_uninit_cmdq:
335         erdma_cmdq_destroy(dev);
336
337 err_uninit_aeq:
338         erdma_aeq_destroy(dev);
339
340 err_uninit_comm_irq:
341         erdma_comm_irq_uninit(dev);
342
343 err_free_vectors:
344         pci_free_irq_vectors(dev->pdev);
345
346 err_iounmap_func_bar:
347         devm_iounmap(&pdev->dev, dev->func_bar);
348
349 err_release_bars:
350         pci_release_selected_regions(pdev, bars);
351
352 err_ib_device_release:
353         ib_dealloc_device(&dev->ibdev);
354
355 err_disable_device:
356         pci_disable_device(pdev);
357
358         return err;
359 }
360
361 static void erdma_remove_dev(struct pci_dev *pdev)
362 {
363         struct erdma_dev *dev = pci_get_drvdata(pdev);
364
365         erdma_ceqs_uninit(dev);
366         erdma_hw_reset(dev);
367         erdma_cmdq_destroy(dev);
368         erdma_aeq_destroy(dev);
369         erdma_comm_irq_uninit(dev);
370         pci_free_irq_vectors(dev->pdev);
371
372         devm_iounmap(&pdev->dev, dev->func_bar);
373         pci_release_selected_regions(pdev, ERDMA_BAR_MASK);
374
375         ib_dealloc_device(&dev->ibdev);
376
377         pci_disable_device(pdev);
378 }
379
380 #define ERDMA_GET_CAP(name, cap) FIELD_GET(ERDMA_CMD_DEV_CAP_##name##_MASK, cap)
381
382 static int erdma_dev_attrs_init(struct erdma_dev *dev)
383 {
384         int err;
385         u64 req_hdr, cap0, cap1;
386
387         erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_RDMA,
388                                 CMDQ_OPCODE_QUERY_DEVICE);
389
390         err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
391                                   &cap1);
392         if (err)
393                 return err;
394
395         dev->attrs.max_cqe = 1 << ERDMA_GET_CAP(MAX_CQE, cap0);
396         dev->attrs.max_mr_size = 1ULL << ERDMA_GET_CAP(MAX_MR_SIZE, cap0);
397         dev->attrs.max_mw = 1 << ERDMA_GET_CAP(MAX_MW, cap1);
398         dev->attrs.max_recv_wr = 1 << ERDMA_GET_CAP(MAX_RECV_WR, cap0);
399         dev->attrs.local_dma_key = ERDMA_GET_CAP(DMA_LOCAL_KEY, cap1);
400         dev->attrs.cc = ERDMA_GET_CAP(DEFAULT_CC, cap1);
401         dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1);
402         dev->attrs.max_mr = dev->attrs.max_qp << 1;
403         dev->attrs.max_cq = dev->attrs.max_qp << 1;
404         dev->attrs.cap_flags = ERDMA_GET_CAP(FLAGS, cap0);
405
406         dev->attrs.max_send_wr = ERDMA_MAX_SEND_WR;
407         dev->attrs.max_ord = ERDMA_MAX_ORD;
408         dev->attrs.max_ird = ERDMA_MAX_IRD;
409         dev->attrs.max_send_sge = ERDMA_MAX_SEND_SGE;
410         dev->attrs.max_recv_sge = ERDMA_MAX_RECV_SGE;
411         dev->attrs.max_sge_rd = ERDMA_MAX_SGE_RD;
412         dev->attrs.max_pd = ERDMA_MAX_PD;
413
414         dev->res_cb[ERDMA_RES_TYPE_PD].max_cap = ERDMA_MAX_PD;
415         dev->res_cb[ERDMA_RES_TYPE_STAG_IDX].max_cap = dev->attrs.max_mr;
416
417         erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_COMMON,
418                                 CMDQ_OPCODE_QUERY_FW_INFO);
419
420         err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
421                                   &cap1);
422         if (!err)
423                 dev->attrs.fw_version =
424                         FIELD_GET(ERDMA_CMD_INFO0_FW_VER_MASK, cap0);
425
426         return err;
427 }
428
429 static int erdma_res_cb_init(struct erdma_dev *dev)
430 {
431         int i, j;
432
433         for (i = 0; i < ERDMA_RES_CNT; i++) {
434                 dev->res_cb[i].next_alloc_idx = 1;
435                 spin_lock_init(&dev->res_cb[i].lock);
436                 dev->res_cb[i].bitmap =
437                         bitmap_zalloc(dev->res_cb[i].max_cap, GFP_KERNEL);
438                 if (!dev->res_cb[i].bitmap)
439                         goto err;
440         }
441
442         return 0;
443
444 err:
445         for (j = 0; j < i; j++)
446                 bitmap_free(dev->res_cb[j].bitmap);
447
448         return -ENOMEM;
449 }
450
451 static void erdma_res_cb_free(struct erdma_dev *dev)
452 {
453         int i;
454
455         for (i = 0; i < ERDMA_RES_CNT; i++)
456                 bitmap_free(dev->res_cb[i].bitmap);
457 }
458
459 static const struct ib_device_ops erdma_device_ops = {
460         .owner = THIS_MODULE,
461         .driver_id = RDMA_DRIVER_ERDMA,
462         .uverbs_abi_ver = ERDMA_ABI_VERSION,
463
464         .alloc_mr = erdma_ib_alloc_mr,
465         .alloc_pd = erdma_alloc_pd,
466         .alloc_ucontext = erdma_alloc_ucontext,
467         .create_cq = erdma_create_cq,
468         .create_qp = erdma_create_qp,
469         .dealloc_pd = erdma_dealloc_pd,
470         .dealloc_ucontext = erdma_dealloc_ucontext,
471         .dereg_mr = erdma_dereg_mr,
472         .destroy_cq = erdma_destroy_cq,
473         .destroy_qp = erdma_destroy_qp,
474         .get_dma_mr = erdma_get_dma_mr,
475         .get_port_immutable = erdma_get_port_immutable,
476         .iw_accept = erdma_accept,
477         .iw_add_ref = erdma_qp_get_ref,
478         .iw_connect = erdma_connect,
479         .iw_create_listen = erdma_create_listen,
480         .iw_destroy_listen = erdma_destroy_listen,
481         .iw_get_qp = erdma_get_ibqp,
482         .iw_reject = erdma_reject,
483         .iw_rem_ref = erdma_qp_put_ref,
484         .map_mr_sg = erdma_map_mr_sg,
485         .mmap = erdma_mmap,
486         .mmap_free = erdma_mmap_free,
487         .modify_qp = erdma_modify_qp,
488         .post_recv = erdma_post_recv,
489         .post_send = erdma_post_send,
490         .poll_cq = erdma_poll_cq,
491         .query_device = erdma_query_device,
492         .query_gid = erdma_query_gid,
493         .query_port = erdma_query_port,
494         .query_qp = erdma_query_qp,
495         .req_notify_cq = erdma_req_notify_cq,
496         .reg_user_mr = erdma_reg_user_mr,
497
498         INIT_RDMA_OBJ_SIZE(ib_cq, erdma_cq, ibcq),
499         INIT_RDMA_OBJ_SIZE(ib_pd, erdma_pd, ibpd),
500         INIT_RDMA_OBJ_SIZE(ib_ucontext, erdma_ucontext, ibucontext),
501         INIT_RDMA_OBJ_SIZE(ib_qp, erdma_qp, ibqp),
502 };
503
504 static int erdma_ib_device_add(struct pci_dev *pdev)
505 {
506         struct erdma_dev *dev = pci_get_drvdata(pdev);
507         struct ib_device *ibdev = &dev->ibdev;
508         u64 mac;
509         int ret;
510
511         ret = erdma_dev_attrs_init(dev);
512         if (ret)
513                 return ret;
514
515         ibdev->node_type = RDMA_NODE_RNIC;
516         memcpy(ibdev->node_desc, ERDMA_NODE_DESC, sizeof(ERDMA_NODE_DESC));
517
518         /*
519          * Current model (one-to-one device association):
520          * One ERDMA device per net_device or, equivalently,
521          * per physical port.
522          */
523         ibdev->phys_port_cnt = 1;
524         ibdev->num_comp_vectors = dev->attrs.irq_num - 1;
525
526         ib_set_device_ops(ibdev, &erdma_device_ops);
527
528         INIT_LIST_HEAD(&dev->cep_list);
529
530         spin_lock_init(&dev->lock);
531         xa_init_flags(&dev->qp_xa, XA_FLAGS_ALLOC1);
532         xa_init_flags(&dev->cq_xa, XA_FLAGS_ALLOC1);
533         dev->next_alloc_cqn = 1;
534         dev->next_alloc_qpn = 1;
535
536         ret = erdma_res_cb_init(dev);
537         if (ret)
538                 return ret;
539
540         spin_lock_init(&dev->db_bitmap_lock);
541         bitmap_zero(dev->sdb_page, ERDMA_DWQE_TYPE0_CNT);
542         bitmap_zero(dev->sdb_entry, ERDMA_DWQE_TYPE1_CNT);
543
544         atomic_set(&dev->num_ctx, 0);
545
546         mac = erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_L_REG);
547         mac |= (u64)erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_H_REG) << 32;
548
549         u64_to_ether_addr(mac, dev->attrs.peer_addr);
550
551         dev->reflush_wq = alloc_workqueue("erdma-reflush-wq", WQ_UNBOUND,
552                                           WQ_UNBOUND_MAX_ACTIVE);
553         if (!dev->reflush_wq) {
554                 ret = -ENOMEM;
555                 goto err_alloc_workqueue;
556         }
557
558         ret = erdma_device_register(dev);
559         if (ret)
560                 goto err_register;
561
562         return 0;
563
564 err_register:
565         destroy_workqueue(dev->reflush_wq);
566 err_alloc_workqueue:
567         xa_destroy(&dev->qp_xa);
568         xa_destroy(&dev->cq_xa);
569
570         erdma_res_cb_free(dev);
571
572         return ret;
573 }
574
575 static void erdma_ib_device_remove(struct pci_dev *pdev)
576 {
577         struct erdma_dev *dev = pci_get_drvdata(pdev);
578
579         unregister_netdevice_notifier(&dev->netdev_nb);
580         ib_unregister_device(&dev->ibdev);
581
582         destroy_workqueue(dev->reflush_wq);
583         erdma_res_cb_free(dev);
584         xa_destroy(&dev->qp_xa);
585         xa_destroy(&dev->cq_xa);
586 }
587
588 static int erdma_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
589 {
590         int ret;
591
592         ret = erdma_probe_dev(pdev);
593         if (ret)
594                 return ret;
595
596         ret = erdma_ib_device_add(pdev);
597         if (ret) {
598                 erdma_remove_dev(pdev);
599                 return ret;
600         }
601
602         return 0;
603 }
604
605 static void erdma_remove(struct pci_dev *pdev)
606 {
607         erdma_ib_device_remove(pdev);
608         erdma_remove_dev(pdev);
609 }
610
611 static struct pci_driver erdma_pci_driver = {
612         .name = DRV_MODULE_NAME,
613         .id_table = erdma_pci_tbl,
614         .probe = erdma_probe,
615         .remove = erdma_remove
616 };
617
618 MODULE_DEVICE_TABLE(pci, erdma_pci_tbl);
619
620 static __init int erdma_init_module(void)
621 {
622         int ret;
623
624         ret = erdma_cm_init();
625         if (ret)
626                 return ret;
627
628         ret = pci_register_driver(&erdma_pci_driver);
629         if (ret)
630                 erdma_cm_exit();
631
632         return ret;
633 }
634
635 static void __exit erdma_exit_module(void)
636 {
637         pci_unregister_driver(&erdma_pci_driver);
638
639         erdma_cm_exit();
640 }
641
642 module_init(erdma_init_module);
643 module_exit(erdma_exit_module);