RDMA/hns: Add the process of AEQ overflow for hip08
authorXiaofei Tan <tanxiaofei@huawei.com>
Sat, 19 Jan 2019 06:23:29 +0000 (14:23 +0800)
committerJason Gunthorpe <jgg@mellanox.com>
Mon, 21 Jan 2019 23:47:54 +0000 (16:47 -0700)
AEQ overflow will be reported by hardware when too many asynchronous
events occurred but not be handled in time.  Normally, AEQ overflow error
is not easy to occur. Once happened, we have to do physical function reset
to recover.  PF reset is implemented in two steps. Firstly, set reset
level with ae_dev->ops->set_default_reset_request.  Secondly, run reset
with ae_dev->ops->reset_event.

Signed-off-by: Xiaofei Tan <tanxiaofei@huawei.com>
Signed-off-by: Yixian Liu <liuyixian@huawei.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
drivers/infiniband/hw/hns/hns_roce_hw_v2.c

index d778457ec1e54da5b4fe46d367607d9f9157c229..fb990ff0c1276315a6e2168db4dff2e41a3e8caa 100644 (file)
@@ -4702,11 +4702,22 @@ static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id)
        int_en = roce_read(hr_dev, ROCEE_VF_ABN_INT_EN_REG);
 
        if (roce_get_bit(int_st, HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S)) {
+               struct pci_dev *pdev = hr_dev->pci_dev;
+               struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
+               const struct hnae3_ae_ops *ops = ae_dev->ops;
+
                dev_err(dev, "AEQ overflow!\n");
 
                roce_set_bit(int_st, HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S, 1);
                roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st);
 
+               /* Set reset level for reset_event() */
+               if (ops->set_default_reset_request)
+                       ops->set_default_reset_request(ae_dev,
+                                                      HNAE3_FUNC_RESET);
+               if (ops->reset_event)
+                       ops->reset_event(pdev, NULL);
+
                roce_set_bit(int_en, HNS_ROCE_V2_VF_ABN_INT_EN_S, 1);
                roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en);