1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
3 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
4 /* Kai Shen <kaishen@linux.alibaba.com> */
5 /* Copyright (c) 2020-2022, Alibaba Group. */
7 #include <linux/module.h>
8 #include <net/addrconf.h>
9 #include <rdma/erdma-abi.h>
13 #include "erdma_verbs.h"
15 MODULE_AUTHOR("Cheng Xu <chengyou@linux.alibaba.com>");
16 MODULE_DESCRIPTION("Alibaba elasticRDMA adapter driver");
17 MODULE_LICENSE("Dual BSD/GPL");
19 static int erdma_netdev_event(struct notifier_block *nb, unsigned long event,
22 struct net_device *netdev = netdev_notifier_info_to_dev(arg);
23 struct erdma_dev *dev = container_of(nb, struct erdma_dev, netdev_nb);
25 if (dev->netdev == NULL || dev->netdev != netdev)
30 dev->state = IB_PORT_ACTIVE;
31 erdma_port_event(dev, IB_EVENT_PORT_ACTIVE);
34 dev->state = IB_PORT_DOWN;
35 erdma_port_event(dev, IB_EVENT_PORT_ERR);
37 case NETDEV_CHANGEMTU:
38 if (dev->mtu != netdev->mtu) {
39 erdma_set_mtu(dev, netdev->mtu);
40 dev->mtu = netdev->mtu;
44 case NETDEV_UNREGISTER:
45 case NETDEV_CHANGEADDR:
46 case NETDEV_GOING_DOWN:
56 static int erdma_enum_and_get_netdev(struct erdma_dev *dev)
58 struct net_device *netdev;
59 int ret = -EPROBE_DEFER;
61 /* Already binded to a net_device, so we skip. */
66 for_each_netdev(&init_net, netdev) {
68 * In erdma, the paired netdev and ibdev should have the same
69 * MAC address. erdma can get the value from its PCIe bar
70 * registers. Since erdma can not get the paired netdev
71 * reference directly, we do a traverse here to get the paired
74 if (ether_addr_equal_unaligned(netdev->perm_addr,
75 dev->attrs.peer_addr)) {
76 ret = ib_device_set_netdev(&dev->ibdev, netdev, 1);
79 ibdev_warn(&dev->ibdev,
80 "failed (%d) to link netdev", ret);
94 static int erdma_device_register(struct erdma_dev *dev)
96 struct ib_device *ibdev = &dev->ibdev;
99 ret = erdma_enum_and_get_netdev(dev);
103 dev->mtu = dev->netdev->mtu;
104 addrconf_addr_eui48((u8 *)&ibdev->node_guid, dev->netdev->dev_addr);
106 ret = ib_register_device(ibdev, "erdma_%d", &dev->pdev->dev);
108 dev_err(&dev->pdev->dev,
109 "ib_register_device failed: ret = %d\n", ret);
113 dev->netdev_nb.notifier_call = erdma_netdev_event;
114 ret = register_netdevice_notifier(&dev->netdev_nb);
116 ibdev_err(&dev->ibdev, "failed to register notifier.\n");
117 ib_unregister_device(ibdev);
123 static irqreturn_t erdma_comm_irq_handler(int irq, void *data)
125 struct erdma_dev *dev = data;
127 erdma_cmdq_completion_handler(&dev->cmdq);
128 erdma_aeq_event_handler(dev);
133 static void erdma_dwqe_resource_init(struct erdma_dev *dev)
135 int total_pages, type0, type1;
137 dev->attrs.grp_num = erdma_reg_read32(dev, ERDMA_REGS_GRP_NUM_REG);
139 if (dev->attrs.grp_num < 4)
140 dev->attrs.disable_dwqe = true;
142 dev->attrs.disable_dwqe = false;
144 /* One page contains 4 goups. */
145 total_pages = dev->attrs.grp_num * 4;
147 if (dev->attrs.grp_num >= ERDMA_DWQE_MAX_GRP_CNT) {
148 dev->attrs.grp_num = ERDMA_DWQE_MAX_GRP_CNT;
149 type0 = ERDMA_DWQE_TYPE0_CNT;
150 type1 = ERDMA_DWQE_TYPE1_CNT / ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
152 type1 = total_pages / 3;
153 type0 = total_pages - type1 - 1;
156 dev->attrs.dwqe_pages = type0;
157 dev->attrs.dwqe_entries = type1 * ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
160 static int erdma_request_vectors(struct erdma_dev *dev)
162 int expect_irq_num = min(num_possible_cpus() + 1, ERDMA_NUM_MSIX_VEC);
165 ret = pci_alloc_irq_vectors(dev->pdev, 1, expect_irq_num, PCI_IRQ_MSIX);
167 dev_err(&dev->pdev->dev, "request irq vectors failed(%d)\n",
171 dev->attrs.irq_num = ret;
176 static int erdma_comm_irq_init(struct erdma_dev *dev)
178 snprintf(dev->comm_irq.name, ERDMA_IRQNAME_SIZE, "erdma-common@pci:%s",
179 pci_name(dev->pdev));
180 dev->comm_irq.msix_vector =
181 pci_irq_vector(dev->pdev, ERDMA_MSIX_VECTOR_CMDQ);
183 cpumask_set_cpu(cpumask_first(cpumask_of_pcibus(dev->pdev->bus)),
184 &dev->comm_irq.affinity_hint_mask);
185 irq_set_affinity_hint(dev->comm_irq.msix_vector,
186 &dev->comm_irq.affinity_hint_mask);
188 return request_irq(dev->comm_irq.msix_vector, erdma_comm_irq_handler, 0,
189 dev->comm_irq.name, dev);
192 static void erdma_comm_irq_uninit(struct erdma_dev *dev)
194 irq_set_affinity_hint(dev->comm_irq.msix_vector, NULL);
195 free_irq(dev->comm_irq.msix_vector, dev);
198 static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev)
202 erdma_dwqe_resource_init(dev);
204 ret = dma_set_mask_and_coherent(&pdev->dev,
205 DMA_BIT_MASK(ERDMA_PCI_WIDTH));
209 dma_set_max_seg_size(&pdev->dev, UINT_MAX);
214 static void erdma_hw_reset(struct erdma_dev *dev)
216 u32 ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_RESET_MASK, 1);
218 erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl);
221 static int erdma_wait_hw_init_done(struct erdma_dev *dev)
225 erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG,
226 FIELD_PREP(ERDMA_REG_DEV_CTRL_INIT_MASK, 1));
228 for (i = 0; i < ERDMA_WAIT_DEV_DONE_CNT; i++) {
229 if (erdma_reg_read32_filed(dev, ERDMA_REGS_DEV_ST_REG,
230 ERDMA_REG_DEV_ST_INIT_DONE_MASK))
233 msleep(ERDMA_REG_ACCESS_WAIT_MS);
236 if (i == ERDMA_WAIT_DEV_DONE_CNT) {
237 dev_err(&dev->pdev->dev, "wait init done failed.\n");
244 static const struct pci_device_id erdma_pci_tbl[] = {
245 { PCI_DEVICE(PCI_VENDOR_ID_ALIBABA, 0x107f) },
249 static int erdma_probe_dev(struct pci_dev *pdev)
251 struct erdma_dev *dev;
255 err = pci_enable_device(pdev);
257 dev_err(&pdev->dev, "pci_enable_device failed(%d)\n", err);
261 pci_set_master(pdev);
263 dev = ib_alloc_device(erdma_dev, ibdev);
265 dev_err(&pdev->dev, "ib_alloc_device failed\n");
267 goto err_disable_device;
270 pci_set_drvdata(pdev, dev);
272 dev->attrs.numa_node = dev_to_node(&pdev->dev);
274 bars = pci_select_bars(pdev, IORESOURCE_MEM);
275 err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
276 if (bars != ERDMA_BAR_MASK || err) {
277 err = err ? err : -EINVAL;
278 goto err_ib_device_release;
281 dev->func_bar_addr = pci_resource_start(pdev, ERDMA_FUNC_BAR);
282 dev->func_bar_len = pci_resource_len(pdev, ERDMA_FUNC_BAR);
285 devm_ioremap(&pdev->dev, dev->func_bar_addr, dev->func_bar_len);
286 if (!dev->func_bar) {
287 dev_err(&pdev->dev, "devm_ioremap failed.\n");
289 goto err_release_bars;
292 version = erdma_reg_read32(dev, ERDMA_REGS_VERSION_REG);
294 /* we knows that it is a non-functional function. */
296 goto err_iounmap_func_bar;
299 err = erdma_device_init(dev, pdev);
301 goto err_iounmap_func_bar;
303 err = erdma_request_vectors(dev);
305 goto err_iounmap_func_bar;
307 err = erdma_comm_irq_init(dev);
309 goto err_free_vectors;
311 err = erdma_aeq_init(dev);
313 goto err_uninit_comm_irq;
315 err = erdma_cmdq_init(dev);
319 err = erdma_wait_hw_init_done(dev);
321 goto err_uninit_cmdq;
323 err = erdma_ceqs_init(dev);
327 erdma_finish_cmdq_init(dev);
335 erdma_cmdq_destroy(dev);
338 erdma_aeq_destroy(dev);
341 erdma_comm_irq_uninit(dev);
344 pci_free_irq_vectors(dev->pdev);
346 err_iounmap_func_bar:
347 devm_iounmap(&pdev->dev, dev->func_bar);
350 pci_release_selected_regions(pdev, bars);
352 err_ib_device_release:
353 ib_dealloc_device(&dev->ibdev);
356 pci_disable_device(pdev);
361 static void erdma_remove_dev(struct pci_dev *pdev)
363 struct erdma_dev *dev = pci_get_drvdata(pdev);
365 erdma_ceqs_uninit(dev);
367 erdma_cmdq_destroy(dev);
368 erdma_aeq_destroy(dev);
369 erdma_comm_irq_uninit(dev);
370 pci_free_irq_vectors(dev->pdev);
372 devm_iounmap(&pdev->dev, dev->func_bar);
373 pci_release_selected_regions(pdev, ERDMA_BAR_MASK);
375 ib_dealloc_device(&dev->ibdev);
377 pci_disable_device(pdev);
380 #define ERDMA_GET_CAP(name, cap) FIELD_GET(ERDMA_CMD_DEV_CAP_##name##_MASK, cap)
382 static int erdma_dev_attrs_init(struct erdma_dev *dev)
385 u64 req_hdr, cap0, cap1;
387 erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_RDMA,
388 CMDQ_OPCODE_QUERY_DEVICE);
390 err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
395 dev->attrs.max_cqe = 1 << ERDMA_GET_CAP(MAX_CQE, cap0);
396 dev->attrs.max_mr_size = 1ULL << ERDMA_GET_CAP(MAX_MR_SIZE, cap0);
397 dev->attrs.max_mw = 1 << ERDMA_GET_CAP(MAX_MW, cap1);
398 dev->attrs.max_recv_wr = 1 << ERDMA_GET_CAP(MAX_RECV_WR, cap0);
399 dev->attrs.local_dma_key = ERDMA_GET_CAP(DMA_LOCAL_KEY, cap1);
400 dev->attrs.cc = ERDMA_GET_CAP(DEFAULT_CC, cap1);
401 dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1);
402 dev->attrs.max_mr = dev->attrs.max_qp << 1;
403 dev->attrs.max_cq = dev->attrs.max_qp << 1;
404 dev->attrs.cap_flags = ERDMA_GET_CAP(FLAGS, cap0);
406 dev->attrs.max_send_wr = ERDMA_MAX_SEND_WR;
407 dev->attrs.max_ord = ERDMA_MAX_ORD;
408 dev->attrs.max_ird = ERDMA_MAX_IRD;
409 dev->attrs.max_send_sge = ERDMA_MAX_SEND_SGE;
410 dev->attrs.max_recv_sge = ERDMA_MAX_RECV_SGE;
411 dev->attrs.max_sge_rd = ERDMA_MAX_SGE_RD;
412 dev->attrs.max_pd = ERDMA_MAX_PD;
414 dev->res_cb[ERDMA_RES_TYPE_PD].max_cap = ERDMA_MAX_PD;
415 dev->res_cb[ERDMA_RES_TYPE_STAG_IDX].max_cap = dev->attrs.max_mr;
417 erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_COMMON,
418 CMDQ_OPCODE_QUERY_FW_INFO);
420 err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
423 dev->attrs.fw_version =
424 FIELD_GET(ERDMA_CMD_INFO0_FW_VER_MASK, cap0);
429 static int erdma_res_cb_init(struct erdma_dev *dev)
433 for (i = 0; i < ERDMA_RES_CNT; i++) {
434 dev->res_cb[i].next_alloc_idx = 1;
435 spin_lock_init(&dev->res_cb[i].lock);
436 dev->res_cb[i].bitmap =
437 bitmap_zalloc(dev->res_cb[i].max_cap, GFP_KERNEL);
438 if (!dev->res_cb[i].bitmap)
445 for (j = 0; j < i; j++)
446 bitmap_free(dev->res_cb[j].bitmap);
451 static void erdma_res_cb_free(struct erdma_dev *dev)
455 for (i = 0; i < ERDMA_RES_CNT; i++)
456 bitmap_free(dev->res_cb[i].bitmap);
459 static const struct ib_device_ops erdma_device_ops = {
460 .owner = THIS_MODULE,
461 .driver_id = RDMA_DRIVER_ERDMA,
462 .uverbs_abi_ver = ERDMA_ABI_VERSION,
464 .alloc_mr = erdma_ib_alloc_mr,
465 .alloc_pd = erdma_alloc_pd,
466 .alloc_ucontext = erdma_alloc_ucontext,
467 .create_cq = erdma_create_cq,
468 .create_qp = erdma_create_qp,
469 .dealloc_pd = erdma_dealloc_pd,
470 .dealloc_ucontext = erdma_dealloc_ucontext,
471 .dereg_mr = erdma_dereg_mr,
472 .destroy_cq = erdma_destroy_cq,
473 .destroy_qp = erdma_destroy_qp,
474 .get_dma_mr = erdma_get_dma_mr,
475 .get_port_immutable = erdma_get_port_immutable,
476 .iw_accept = erdma_accept,
477 .iw_add_ref = erdma_qp_get_ref,
478 .iw_connect = erdma_connect,
479 .iw_create_listen = erdma_create_listen,
480 .iw_destroy_listen = erdma_destroy_listen,
481 .iw_get_qp = erdma_get_ibqp,
482 .iw_reject = erdma_reject,
483 .iw_rem_ref = erdma_qp_put_ref,
484 .map_mr_sg = erdma_map_mr_sg,
486 .mmap_free = erdma_mmap_free,
487 .modify_qp = erdma_modify_qp,
488 .post_recv = erdma_post_recv,
489 .post_send = erdma_post_send,
490 .poll_cq = erdma_poll_cq,
491 .query_device = erdma_query_device,
492 .query_gid = erdma_query_gid,
493 .query_port = erdma_query_port,
494 .query_qp = erdma_query_qp,
495 .req_notify_cq = erdma_req_notify_cq,
496 .reg_user_mr = erdma_reg_user_mr,
498 INIT_RDMA_OBJ_SIZE(ib_cq, erdma_cq, ibcq),
499 INIT_RDMA_OBJ_SIZE(ib_pd, erdma_pd, ibpd),
500 INIT_RDMA_OBJ_SIZE(ib_ucontext, erdma_ucontext, ibucontext),
501 INIT_RDMA_OBJ_SIZE(ib_qp, erdma_qp, ibqp),
504 static int erdma_ib_device_add(struct pci_dev *pdev)
506 struct erdma_dev *dev = pci_get_drvdata(pdev);
507 struct ib_device *ibdev = &dev->ibdev;
511 ret = erdma_dev_attrs_init(dev);
515 ibdev->node_type = RDMA_NODE_RNIC;
516 memcpy(ibdev->node_desc, ERDMA_NODE_DESC, sizeof(ERDMA_NODE_DESC));
519 * Current model (one-to-one device association):
520 * One ERDMA device per net_device or, equivalently,
523 ibdev->phys_port_cnt = 1;
524 ibdev->num_comp_vectors = dev->attrs.irq_num - 1;
526 ib_set_device_ops(ibdev, &erdma_device_ops);
528 INIT_LIST_HEAD(&dev->cep_list);
530 spin_lock_init(&dev->lock);
531 xa_init_flags(&dev->qp_xa, XA_FLAGS_ALLOC1);
532 xa_init_flags(&dev->cq_xa, XA_FLAGS_ALLOC1);
533 dev->next_alloc_cqn = 1;
534 dev->next_alloc_qpn = 1;
536 ret = erdma_res_cb_init(dev);
540 spin_lock_init(&dev->db_bitmap_lock);
541 bitmap_zero(dev->sdb_page, ERDMA_DWQE_TYPE0_CNT);
542 bitmap_zero(dev->sdb_entry, ERDMA_DWQE_TYPE1_CNT);
544 atomic_set(&dev->num_ctx, 0);
546 mac = erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_L_REG);
547 mac |= (u64)erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_H_REG) << 32;
549 u64_to_ether_addr(mac, dev->attrs.peer_addr);
551 dev->reflush_wq = alloc_workqueue("erdma-reflush-wq", WQ_UNBOUND,
552 WQ_UNBOUND_MAX_ACTIVE);
553 if (!dev->reflush_wq) {
555 goto err_alloc_workqueue;
558 ret = erdma_device_register(dev);
565 destroy_workqueue(dev->reflush_wq);
567 xa_destroy(&dev->qp_xa);
568 xa_destroy(&dev->cq_xa);
570 erdma_res_cb_free(dev);
575 static void erdma_ib_device_remove(struct pci_dev *pdev)
577 struct erdma_dev *dev = pci_get_drvdata(pdev);
579 unregister_netdevice_notifier(&dev->netdev_nb);
580 ib_unregister_device(&dev->ibdev);
582 destroy_workqueue(dev->reflush_wq);
583 erdma_res_cb_free(dev);
584 xa_destroy(&dev->qp_xa);
585 xa_destroy(&dev->cq_xa);
588 static int erdma_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
592 ret = erdma_probe_dev(pdev);
596 ret = erdma_ib_device_add(pdev);
598 erdma_remove_dev(pdev);
605 static void erdma_remove(struct pci_dev *pdev)
607 erdma_ib_device_remove(pdev);
608 erdma_remove_dev(pdev);
611 static struct pci_driver erdma_pci_driver = {
612 .name = DRV_MODULE_NAME,
613 .id_table = erdma_pci_tbl,
614 .probe = erdma_probe,
615 .remove = erdma_remove
618 MODULE_DEVICE_TABLE(pci, erdma_pci_tbl);
620 static __init int erdma_init_module(void)
624 ret = erdma_cm_init();
628 ret = pci_register_driver(&erdma_pci_driver);
635 static void __exit erdma_exit_module(void)
637 pci_unregister_driver(&erdma_pci_driver);
642 module_init(erdma_init_module);
643 module_exit(erdma_exit_module);