net/smc: handle device, port, and QP error events
authorUrsula Braun <ubraun@linux.vnet.ibm.com>
Fri, 26 Jan 2018 08:28:46 +0000 (09:28 +0100)
committerDavid S. Miller <davem@davemloft.net>
Fri, 26 Jan 2018 15:41:56 +0000 (10:41 -0500)
RoCE device changes cause an IB event, processed in the global event
handler for the ROCE device. Problems for a certain Queue Pair cause a QP
event, processed in the QP event handler for this QP.
Among those events are port errors and other fatal device errors. All
link groups using such a port or device must be terminated in those cases.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/smc/smc_ib.c

index 90f1a7f9085c7bfaa0e2b2722938494a6baf68c0..2a8957bd6d3890debe7e3c58aec0cede68d104ee 100644 (file)
@@ -141,6 +141,17 @@ out:
        return rc;
 }
 
+static void smc_ib_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
+{
+       struct smc_link_group *lgr, *l;
+
+       list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
+               if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
+                   lgr->lnk[SMC_SINGLE_LINK].ibport == ibport)
+                       smc_lgr_terminate(lgr);
+       }
+}
+
 /* process context wrapper for might_sleep smc_ib_remember_port_attr */
 static void smc_ib_port_event_work(struct work_struct *work)
 {
@@ -151,6 +162,8 @@ static void smc_ib_port_event_work(struct work_struct *work)
        for_each_set_bit(port_idx, &smcibdev->port_event_mask, SMC_MAX_PORTS) {
                smc_ib_remember_port_attr(smcibdev, port_idx + 1);
                clear_bit(port_idx, &smcibdev->port_event_mask);
+               if (!smc_ib_port_active(smcibdev, port_idx + 1))
+                       smc_ib_port_terminate(smcibdev, port_idx + 1);
        }
 }
 
@@ -165,15 +178,7 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
 
        switch (ibevent->event) {
        case IB_EVENT_PORT_ERR:
-               port_idx = ibevent->element.port_num - 1;
-               set_bit(port_idx, &smcibdev->port_event_mask);
-               schedule_work(&smcibdev->port_event_work);
-               /* fall through */
        case IB_EVENT_DEVICE_FATAL:
-               /* tbd in follow-on patch:
-                * abnormal close of corresponding connections
-                */
-               break;
        case IB_EVENT_PORT_ACTIVE:
                port_idx = ibevent->element.port_num - 1;
                set_bit(port_idx, &smcibdev->port_event_mask);
@@ -186,7 +191,8 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
 
 void smc_ib_dealloc_protection_domain(struct smc_link *lnk)
 {
-       ib_dealloc_pd(lnk->roce_pd);
+       if (lnk->roce_pd)
+               ib_dealloc_pd(lnk->roce_pd);
        lnk->roce_pd = NULL;
 }
 
@@ -203,14 +209,18 @@ int smc_ib_create_protection_domain(struct smc_link *lnk)
 
 static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)
 {
+       struct smc_ib_device *smcibdev =
+               (struct smc_ib_device *)ibevent->device;
+       u8 port_idx;
+
        switch (ibevent->event) {
        case IB_EVENT_DEVICE_FATAL:
        case IB_EVENT_GID_CHANGE:
        case IB_EVENT_PORT_ERR:
        case IB_EVENT_QP_ACCESS_ERR:
-               /* tbd in follow-on patch:
-                * abnormal close of corresponding connections
-                */
+               port_idx = ibevent->element.port_num - 1;
+               set_bit(port_idx, &smcibdev->port_event_mask);
+               schedule_work(&smcibdev->port_event_work);
                break;
        default:
                break;
@@ -219,7 +229,8 @@ static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)
 
 void smc_ib_destroy_queue_pair(struct smc_link *lnk)
 {
-       ib_destroy_qp(lnk->roce_qp);
+       if (lnk->roce_qp)
+               ib_destroy_qp(lnk->roce_qp);
        lnk->roce_qp = NULL;
 }
 
@@ -462,6 +473,7 @@ static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev)
 {
        if (!smcibdev->initialized)
                return;
+       smcibdev->initialized = 0;
        smc_wr_remove_dev(smcibdev);
        ib_unregister_event_handler(&smcibdev->event_handler);
        ib_destroy_cq(smcibdev->roce_cq_recv);