a558ce0bde9724c4fb17b475aae34ac282987f9f
[sfrench/cifs-2.6.git] / net / smc / smc_core.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  *  Basic Transport Functions exploiting Infiniband API
6  *
7  *  Copyright IBM Corp. 2016
8  *
9  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
10  */
11
12 #include <linux/socket.h>
13 #include <linux/if_vlan.h>
14 #include <linux/random.h>
15 #include <linux/workqueue.h>
16 #include <linux/wait.h>
17 #include <linux/reboot.h>
18 #include <net/tcp.h>
19 #include <net/sock.h>
20 #include <rdma/ib_verbs.h>
21 #include <rdma/ib_cache.h>
22
23 #include "smc.h"
24 #include "smc_clc.h"
25 #include "smc_core.h"
26 #include "smc_ib.h"
27 #include "smc_wr.h"
28 #include "smc_llc.h"
29 #include "smc_cdc.h"
30 #include "smc_close.h"
31 #include "smc_ism.h"
32
33 #define SMC_LGR_NUM_INCR                256
34 #define SMC_LGR_FREE_DELAY_SERV         (600 * HZ)
35 #define SMC_LGR_FREE_DELAY_CLNT         (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
36 #define SMC_LGR_FREE_DELAY_FAST         (8 * HZ)
37
38 static struct smc_lgr_list smc_lgr_list = {     /* established link groups */
39         .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
40         .list = LIST_HEAD_INIT(smc_lgr_list.list),
41         .num = 0,
42 };
43
44 static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
45 static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
46
47 struct smc_ib_up_work {
48         struct work_struct      work;
49         struct smc_link_group   *lgr;
50         struct smc_ib_device    *smcibdev;
51         u8                      ibport;
52 };
53
54 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
55                          struct smc_buf_desc *buf_desc);
56 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
57
58 static void smc_link_up_work(struct work_struct *work);
59 static void smc_link_down_work(struct work_struct *work);
60
61 /* return head of link group list and its lock for a given link group */
62 static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
63                                                   spinlock_t **lgr_lock)
64 {
65         if (lgr->is_smcd) {
66                 *lgr_lock = &lgr->smcd->lgr_lock;
67                 return &lgr->smcd->lgr_list;
68         }
69
70         *lgr_lock = &smc_lgr_list.lock;
71         return &smc_lgr_list.list;
72 }
73
74 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
75 {
76         /* client link group creation always follows the server link group
77          * creation. For client use a somewhat higher removal delay time,
78          * otherwise there is a risk of out-of-sync link groups.
79          */
80         if (!lgr->freeing && !lgr->freefast) {
81                 mod_delayed_work(system_wq, &lgr->free_work,
82                                  (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
83                                                 SMC_LGR_FREE_DELAY_CLNT :
84                                                 SMC_LGR_FREE_DELAY_SERV);
85         }
86 }
87
88 void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr)
89 {
90         if (!lgr->freeing && !lgr->freefast) {
91                 lgr->freefast = 1;
92                 mod_delayed_work(system_wq, &lgr->free_work,
93                                  SMC_LGR_FREE_DELAY_FAST);
94         }
95 }
96
97 /* Register connection's alert token in our lookup structure.
98  * To use rbtrees we have to implement our own insert core.
99  * Requires @conns_lock
100  * @smc         connection to register
101  * Returns 0 on success, != otherwise.
102  */
103 static void smc_lgr_add_alert_token(struct smc_connection *conn)
104 {
105         struct rb_node **link, *parent = NULL;
106         u32 token = conn->alert_token_local;
107
108         link = &conn->lgr->conns_all.rb_node;
109         while (*link) {
110                 struct smc_connection *cur = rb_entry(*link,
111                                         struct smc_connection, alert_node);
112
113                 parent = *link;
114                 if (cur->alert_token_local > token)
115                         link = &parent->rb_left;
116                 else
117                         link = &parent->rb_right;
118         }
119         /* Put the new node there */
120         rb_link_node(&conn->alert_node, parent, link);
121         rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
122 }
123
124 /* Register connection in link group by assigning an alert token
125  * registered in a search tree.
126  * Requires @conns_lock
127  * Note that '0' is a reserved value and not assigned.
128  */
129 static int smc_lgr_register_conn(struct smc_connection *conn)
130 {
131         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
132         static atomic_t nexttoken = ATOMIC_INIT(0);
133
134         /* find a new alert_token_local value not yet used by some connection
135          * in this link group
136          */
137         sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
138         while (!conn->alert_token_local) {
139                 conn->alert_token_local = atomic_inc_return(&nexttoken);
140                 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
141                         conn->alert_token_local = 0;
142         }
143         smc_lgr_add_alert_token(conn);
144
145         /* assign the new connection to a link */
146         if (!conn->lgr->is_smcd) {
147                 struct smc_link *lnk;
148                 int i;
149
150                 /* tbd - link balancing */
151                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
152                         lnk = &conn->lgr->lnk[i];
153                         if (lnk->state == SMC_LNK_ACTIVATING ||
154                             lnk->state == SMC_LNK_ACTIVE)
155                                 conn->lnk = lnk;
156                 }
157                 if (!conn->lnk)
158                         return SMC_CLC_DECL_NOACTLINK;
159         }
160         conn->lgr->conns_num++;
161         return 0;
162 }
163
164 /* Unregister connection and reset the alert token of the given connection<
165  */
166 static void __smc_lgr_unregister_conn(struct smc_connection *conn)
167 {
168         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
169         struct smc_link_group *lgr = conn->lgr;
170
171         rb_erase(&conn->alert_node, &lgr->conns_all);
172         lgr->conns_num--;
173         conn->alert_token_local = 0;
174         sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
175 }
176
177 /* Unregister connection from lgr
178  */
179 static void smc_lgr_unregister_conn(struct smc_connection *conn)
180 {
181         struct smc_link_group *lgr = conn->lgr;
182
183         if (!lgr)
184                 return;
185         write_lock_bh(&lgr->conns_lock);
186         if (conn->alert_token_local) {
187                 __smc_lgr_unregister_conn(conn);
188         }
189         write_unlock_bh(&lgr->conns_lock);
190         conn->lgr = NULL;
191 }
192
193 void smc_lgr_cleanup_early(struct smc_connection *conn)
194 {
195         struct smc_link_group *lgr = conn->lgr;
196         struct list_head *lgr_list;
197         spinlock_t *lgr_lock;
198
199         if (!lgr)
200                 return;
201
202         smc_conn_free(conn);
203         lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
204         spin_lock_bh(lgr_lock);
205         /* do not use this link group for new connections */
206         if (!list_empty(lgr_list))
207                 list_del_init(lgr_list);
208         spin_unlock_bh(lgr_lock);
209         smc_lgr_schedule_free_work_fast(lgr);
210 }
211
212 static void smc_lgr_free(struct smc_link_group *lgr);
213
214 static void smc_lgr_free_work(struct work_struct *work)
215 {
216         struct smc_link_group *lgr = container_of(to_delayed_work(work),
217                                                   struct smc_link_group,
218                                                   free_work);
219         spinlock_t *lgr_lock;
220         bool conns;
221         int i;
222
223         smc_lgr_list_head(lgr, &lgr_lock);
224         spin_lock_bh(lgr_lock);
225         if (lgr->freeing) {
226                 spin_unlock_bh(lgr_lock);
227                 return;
228         }
229         read_lock_bh(&lgr->conns_lock);
230         conns = RB_EMPTY_ROOT(&lgr->conns_all);
231         read_unlock_bh(&lgr->conns_lock);
232         if (!conns) { /* number of lgr connections is no longer zero */
233                 spin_unlock_bh(lgr_lock);
234                 return;
235         }
236         list_del_init(&lgr->list); /* remove from smc_lgr_list */
237         lgr->freeing = 1; /* this instance does the freeing, no new schedule */
238         spin_unlock_bh(lgr_lock);
239         cancel_delayed_work(&lgr->free_work);
240
241         if (lgr->is_smcd && !lgr->terminating)
242                 smc_ism_signal_shutdown(lgr);
243         if (!lgr->is_smcd) {
244                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
245                         struct smc_link *lnk = &lgr->lnk[i];
246
247                         if (smc_link_usable(lnk))
248                                 lnk->state = SMC_LNK_INACTIVE;
249                 }
250                 wake_up_interruptible_all(&lgr->llc_waiter);
251         }
252         smc_lgr_free(lgr);
253 }
254
255 static void smc_lgr_terminate_work(struct work_struct *work)
256 {
257         struct smc_link_group *lgr = container_of(work, struct smc_link_group,
258                                                   terminate_work);
259
260         __smc_lgr_terminate(lgr, true);
261 }
262
263 /* return next unique link id for the lgr */
264 static u8 smcr_next_link_id(struct smc_link_group *lgr)
265 {
266         u8 link_id;
267         int i;
268
269         while (1) {
270                 link_id = ++lgr->next_link_id;
271                 if (!link_id)   /* skip zero as link_id */
272                         link_id = ++lgr->next_link_id;
273                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
274                         if (smc_link_usable(&lgr->lnk[i]) &&
275                             lgr->lnk[i].link_id == link_id)
276                                 continue;
277                 }
278                 break;
279         }
280         return link_id;
281 }
282
283 int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
284                    u8 link_idx, struct smc_init_info *ini)
285 {
286         u8 rndvec[3];
287         int rc;
288
289         get_device(&ini->ib_dev->ibdev->dev);
290         atomic_inc(&ini->ib_dev->lnk_cnt);
291         lnk->state = SMC_LNK_ACTIVATING;
292         lnk->link_id = smcr_next_link_id(lgr);
293         lnk->lgr = lgr;
294         lnk->link_idx = link_idx;
295         lnk->smcibdev = ini->ib_dev;
296         lnk->ibport = ini->ib_port;
297         lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
298         INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
299         if (!ini->ib_dev->initialized) {
300                 rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev);
301                 if (rc)
302                         goto out;
303         }
304         get_random_bytes(rndvec, sizeof(rndvec));
305         lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
306                 (rndvec[2] << 16);
307         rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
308                                   ini->vlan_id, lnk->gid, &lnk->sgid_index);
309         if (rc)
310                 goto out;
311         rc = smc_llc_link_init(lnk);
312         if (rc)
313                 goto out;
314         rc = smc_wr_alloc_link_mem(lnk);
315         if (rc)
316                 goto clear_llc_lnk;
317         rc = smc_ib_create_protection_domain(lnk);
318         if (rc)
319                 goto free_link_mem;
320         rc = smc_ib_create_queue_pair(lnk);
321         if (rc)
322                 goto dealloc_pd;
323         rc = smc_wr_create_link(lnk);
324         if (rc)
325                 goto destroy_qp;
326         return 0;
327
328 destroy_qp:
329         smc_ib_destroy_queue_pair(lnk);
330 dealloc_pd:
331         smc_ib_dealloc_protection_domain(lnk);
332 free_link_mem:
333         smc_wr_free_link_mem(lnk);
334 clear_llc_lnk:
335         smc_llc_link_clear(lnk);
336 out:
337         put_device(&ini->ib_dev->ibdev->dev);
338         memset(lnk, 0, sizeof(struct smc_link));
339         lnk->state = SMC_LNK_UNUSED;
340         if (!atomic_dec_return(&ini->ib_dev->lnk_cnt))
341                 wake_up(&ini->ib_dev->lnks_deleted);
342         return rc;
343 }
344
345 /* create a new SMC link group */
346 static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
347 {
348         struct smc_link_group *lgr;
349         struct list_head *lgr_list;
350         struct smc_link *lnk;
351         spinlock_t *lgr_lock;
352         u8 link_idx;
353         int rc = 0;
354         int i;
355
356         if (ini->is_smcd && ini->vlan_id) {
357                 if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) {
358                         rc = SMC_CLC_DECL_ISMVLANERR;
359                         goto out;
360                 }
361         }
362
363         lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
364         if (!lgr) {
365                 rc = SMC_CLC_DECL_MEM;
366                 goto ism_put_vlan;
367         }
368         lgr->is_smcd = ini->is_smcd;
369         lgr->sync_err = 0;
370         lgr->terminating = 0;
371         lgr->freefast = 0;
372         lgr->freeing = 0;
373         lgr->vlan_id = ini->vlan_id;
374         mutex_init(&lgr->sndbufs_lock);
375         mutex_init(&lgr->rmbs_lock);
376         rwlock_init(&lgr->conns_lock);
377         for (i = 0; i < SMC_RMBE_SIZES; i++) {
378                 INIT_LIST_HEAD(&lgr->sndbufs[i]);
379                 INIT_LIST_HEAD(&lgr->rmbs[i]);
380         }
381         lgr->next_link_id = 0;
382         smc_lgr_list.num += SMC_LGR_NUM_INCR;
383         memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
384         INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
385         INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work);
386         lgr->conns_all = RB_ROOT;
387         if (ini->is_smcd) {
388                 /* SMC-D specific settings */
389                 get_device(&ini->ism_dev->dev);
390                 lgr->peer_gid = ini->ism_gid;
391                 lgr->smcd = ini->ism_dev;
392                 lgr_list = &ini->ism_dev->lgr_list;
393                 lgr_lock = &lgr->smcd->lgr_lock;
394                 lgr->peer_shutdown = 0;
395                 atomic_inc(&ini->ism_dev->lgr_cnt);
396         } else {
397                 /* SMC-R specific settings */
398                 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
399                 memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
400                        SMC_SYSTEMID_LEN);
401                 memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1],
402                        SMC_MAX_PNETID_LEN);
403                 smc_llc_lgr_init(lgr, smc);
404
405                 link_idx = SMC_SINGLE_LINK;
406                 lnk = &lgr->lnk[link_idx];
407                 rc = smcr_link_init(lgr, lnk, link_idx, ini);
408                 if (rc)
409                         goto free_lgr;
410                 lgr_list = &smc_lgr_list.list;
411                 lgr_lock = &smc_lgr_list.lock;
412                 atomic_inc(&lgr_cnt);
413         }
414         smc->conn.lgr = lgr;
415         spin_lock_bh(lgr_lock);
416         list_add(&lgr->list, lgr_list);
417         spin_unlock_bh(lgr_lock);
418         return 0;
419
420 free_lgr:
421         kfree(lgr);
422 ism_put_vlan:
423         if (ini->is_smcd && ini->vlan_id)
424                 smc_ism_put_vlan(ini->ism_dev, ini->vlan_id);
425 out:
426         if (rc < 0) {
427                 if (rc == -ENOMEM)
428                         rc = SMC_CLC_DECL_MEM;
429                 else
430                         rc = SMC_CLC_DECL_INTERR;
431         }
432         return rc;
433 }
434
435 static int smc_write_space(struct smc_connection *conn)
436 {
437         int buffer_len = conn->peer_rmbe_size;
438         union smc_host_cursor prod;
439         union smc_host_cursor cons;
440         int space;
441
442         smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);
443         smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
444         /* determine rx_buf space */
445         space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod);
446         return space;
447 }
448
449 static int smc_switch_cursor(struct smc_sock *smc)
450 {
451         struct smc_connection *conn = &smc->conn;
452         union smc_host_cursor cons, fin;
453         int rc = 0;
454         int diff;
455
456         smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn);
457         smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn);
458         /* set prod cursor to old state, enforce tx_rdma_writes() */
459         smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn);
460         smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
461
462         if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) {
463                 /* cons cursor advanced more than fin, and prod was set
464                  * fin above, so now prod is smaller than cons. Fix that.
465                  */
466                 diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons);
467                 smc_curs_add(conn->sndbuf_desc->len,
468                              &conn->tx_curs_sent, diff);
469                 smc_curs_add(conn->sndbuf_desc->len,
470                              &conn->tx_curs_fin, diff);
471
472                 smp_mb__before_atomic();
473                 atomic_add(diff, &conn->sndbuf_space);
474                 smp_mb__after_atomic();
475
476                 smc_curs_add(conn->peer_rmbe_size,
477                              &conn->local_tx_ctrl.prod, diff);
478                 smc_curs_add(conn->peer_rmbe_size,
479                              &conn->local_tx_ctrl_fin, diff);
480         }
481         /* recalculate, value is used by tx_rdma_writes() */
482         atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn));
483
484         if (smc->sk.sk_state != SMC_INIT &&
485             smc->sk.sk_state != SMC_CLOSED) {
486                 rc = smcr_cdc_msg_send_validation(conn);
487                 if (!rc) {
488                         schedule_delayed_work(&conn->tx_work, 0);
489                         smc->sk.sk_data_ready(&smc->sk);
490                 }
491         }
492         return rc;
493 }
494
495 struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
496                                   struct smc_link *from_lnk, bool is_dev_err)
497 {
498         struct smc_link *to_lnk = NULL;
499         struct smc_connection *conn;
500         struct smc_sock *smc;
501         struct rb_node *node;
502         int i, rc = 0;
503
504         /* link is inactive, wake up tx waiters */
505         smc_wr_wakeup_tx_wait(from_lnk);
506
507         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
508                 if (lgr->lnk[i].state != SMC_LNK_ACTIVE ||
509                     i == from_lnk->link_idx)
510                         continue;
511                 if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev &&
512                     from_lnk->ibport == lgr->lnk[i].ibport) {
513                         continue;
514                 }
515                 to_lnk = &lgr->lnk[i];
516                 break;
517         }
518         if (!to_lnk) {
519                 smc_lgr_terminate_sched(lgr);
520                 return NULL;
521         }
522 again:
523         read_lock_bh(&lgr->conns_lock);
524         for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) {
525                 conn = rb_entry(node, struct smc_connection, alert_node);
526                 if (conn->lnk != from_lnk)
527                         continue;
528                 smc = container_of(conn, struct smc_sock, conn);
529                 /* conn->lnk not yet set in SMC_INIT state */
530                 if (smc->sk.sk_state == SMC_INIT)
531                         continue;
532                 if (smc->sk.sk_state == SMC_CLOSED ||
533                     smc->sk.sk_state == SMC_PEERCLOSEWAIT1 ||
534                     smc->sk.sk_state == SMC_PEERCLOSEWAIT2 ||
535                     smc->sk.sk_state == SMC_APPFINCLOSEWAIT ||
536                     smc->sk.sk_state == SMC_APPCLOSEWAIT1 ||
537                     smc->sk.sk_state == SMC_APPCLOSEWAIT2 ||
538                     smc->sk.sk_state == SMC_PEERFINCLOSEWAIT ||
539                     smc->sk.sk_state == SMC_PEERABORTWAIT ||
540                     smc->sk.sk_state == SMC_PROCESSABORT) {
541                         spin_lock_bh(&conn->send_lock);
542                         conn->lnk = to_lnk;
543                         spin_unlock_bh(&conn->send_lock);
544                         continue;
545                 }
546                 sock_hold(&smc->sk);
547                 read_unlock_bh(&lgr->conns_lock);
548                 /* avoid race with smcr_tx_sndbuf_nonempty() */
549                 spin_lock_bh(&conn->send_lock);
550                 conn->lnk = to_lnk;
551                 rc = smc_switch_cursor(smc);
552                 spin_unlock_bh(&conn->send_lock);
553                 sock_put(&smc->sk);
554                 if (rc) {
555                         smcr_link_down_cond_sched(to_lnk);
556                         return NULL;
557                 }
558                 goto again;
559         }
560         read_unlock_bh(&lgr->conns_lock);
561         return to_lnk;
562 }
563
564 static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
565                            struct smc_link_group *lgr)
566 {
567         int rc;
568
569         if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) {
570                 /* unregister rmb with peer */
571                 rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
572                 if (!rc) {
573                         /* protect against smc_llc_cli_rkey_exchange() */
574                         mutex_lock(&lgr->llc_conf_mutex);
575                         smc_llc_do_delete_rkey(lgr, rmb_desc);
576                         rmb_desc->is_conf_rkey = false;
577                         mutex_unlock(&lgr->llc_conf_mutex);
578                         smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
579                 }
580         }
581
582         if (rmb_desc->is_reg_err) {
583                 /* buf registration failed, reuse not possible */
584                 mutex_lock(&lgr->rmbs_lock);
585                 list_del(&rmb_desc->list);
586                 mutex_unlock(&lgr->rmbs_lock);
587
588                 smc_buf_free(lgr, true, rmb_desc);
589         } else {
590                 rmb_desc->used = 0;
591         }
592 }
593
594 static void smc_buf_unuse(struct smc_connection *conn,
595                           struct smc_link_group *lgr)
596 {
597         if (conn->sndbuf_desc)
598                 conn->sndbuf_desc->used = 0;
599         if (conn->rmb_desc && lgr->is_smcd)
600                 conn->rmb_desc->used = 0;
601         else if (conn->rmb_desc)
602                 smcr_buf_unuse(conn->rmb_desc, lgr);
603 }
604
605 /* remove a finished connection from its link group */
606 void smc_conn_free(struct smc_connection *conn)
607 {
608         struct smc_link_group *lgr = conn->lgr;
609
610         if (!lgr)
611                 return;
612         if (lgr->is_smcd) {
613                 if (!list_empty(&lgr->list))
614                         smc_ism_unset_conn(conn);
615                 tasklet_kill(&conn->rx_tsklet);
616         } else {
617                 smc_cdc_tx_dismiss_slots(conn);
618         }
619         if (!list_empty(&lgr->list)) {
620                 smc_lgr_unregister_conn(conn);
621                 smc_buf_unuse(conn, lgr); /* allow buffer reuse */
622         }
623
624         if (!lgr->conns_num)
625                 smc_lgr_schedule_free_work(lgr);
626 }
627
628 /* unregister a link from a buf_desc */
629 static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb,
630                                 struct smc_link *lnk)
631 {
632         if (is_rmb)
633                 buf_desc->is_reg_mr[lnk->link_idx] = false;
634         if (!buf_desc->is_map_ib[lnk->link_idx])
635                 return;
636         if (is_rmb) {
637                 if (buf_desc->mr_rx[lnk->link_idx]) {
638                         smc_ib_put_memory_region(
639                                         buf_desc->mr_rx[lnk->link_idx]);
640                         buf_desc->mr_rx[lnk->link_idx] = NULL;
641                 }
642                 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
643         } else {
644                 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
645         }
646         sg_free_table(&buf_desc->sgt[lnk->link_idx]);
647         buf_desc->is_map_ib[lnk->link_idx] = false;
648 }
649
650 /* unmap all buffers of lgr for a deleted link */
651 static void smcr_buf_unmap_lgr(struct smc_link *lnk)
652 {
653         struct smc_link_group *lgr = lnk->lgr;
654         struct smc_buf_desc *buf_desc, *bf;
655         int i;
656
657         for (i = 0; i < SMC_RMBE_SIZES; i++) {
658                 mutex_lock(&lgr->rmbs_lock);
659                 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
660                         smcr_buf_unmap_link(buf_desc, true, lnk);
661                 mutex_unlock(&lgr->rmbs_lock);
662                 mutex_lock(&lgr->sndbufs_lock);
663                 list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
664                                          list)
665                         smcr_buf_unmap_link(buf_desc, false, lnk);
666                 mutex_unlock(&lgr->sndbufs_lock);
667         }
668 }
669
670 static void smcr_rtoken_clear_link(struct smc_link *lnk)
671 {
672         struct smc_link_group *lgr = lnk->lgr;
673         int i;
674
675         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
676                 lgr->rtokens[i][lnk->link_idx].rkey = 0;
677                 lgr->rtokens[i][lnk->link_idx].dma_addr = 0;
678         }
679 }
680
681 /* must be called under lgr->llc_conf_mutex lock */
682 void smcr_link_clear(struct smc_link *lnk)
683 {
684         struct smc_ib_device *smcibdev;
685
686         if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED)
687                 return;
688         lnk->peer_qpn = 0;
689         smc_llc_link_clear(lnk);
690         smcr_buf_unmap_lgr(lnk);
691         smcr_rtoken_clear_link(lnk);
692         smc_ib_modify_qp_reset(lnk);
693         smc_wr_free_link(lnk);
694         smc_ib_destroy_queue_pair(lnk);
695         smc_ib_dealloc_protection_domain(lnk);
696         smc_wr_free_link_mem(lnk);
697         put_device(&lnk->smcibdev->ibdev->dev);
698         smcibdev = lnk->smcibdev;
699         memset(lnk, 0, sizeof(struct smc_link));
700         lnk->state = SMC_LNK_UNUSED;
701         if (!atomic_dec_return(&smcibdev->lnk_cnt))
702                 wake_up(&smcibdev->lnks_deleted);
703 }
704
705 static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
706                           struct smc_buf_desc *buf_desc)
707 {
708         int i;
709
710         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
711                 smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]);
712
713         if (buf_desc->pages)
714                 __free_pages(buf_desc->pages, buf_desc->order);
715         kfree(buf_desc);
716 }
717
718 static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
719                           struct smc_buf_desc *buf_desc)
720 {
721         if (is_dmb) {
722                 /* restore original buf len */
723                 buf_desc->len += sizeof(struct smcd_cdc_msg);
724                 smc_ism_unregister_dmb(lgr->smcd, buf_desc);
725         } else {
726                 kfree(buf_desc->cpu_addr);
727         }
728         kfree(buf_desc);
729 }
730
731 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
732                          struct smc_buf_desc *buf_desc)
733 {
734         if (lgr->is_smcd)
735                 smcd_buf_free(lgr, is_rmb, buf_desc);
736         else
737                 smcr_buf_free(lgr, is_rmb, buf_desc);
738 }
739
740 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
741 {
742         struct smc_buf_desc *buf_desc, *bf_desc;
743         struct list_head *buf_list;
744         int i;
745
746         for (i = 0; i < SMC_RMBE_SIZES; i++) {
747                 if (is_rmb)
748                         buf_list = &lgr->rmbs[i];
749                 else
750                         buf_list = &lgr->sndbufs[i];
751                 list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
752                                          list) {
753                         list_del(&buf_desc->list);
754                         smc_buf_free(lgr, is_rmb, buf_desc);
755                 }
756         }
757 }
758
759 static void smc_lgr_free_bufs(struct smc_link_group *lgr)
760 {
761         /* free send buffers */
762         __smc_lgr_free_bufs(lgr, false);
763         /* free rmbs */
764         __smc_lgr_free_bufs(lgr, true);
765 }
766
767 /* remove a link group */
768 static void smc_lgr_free(struct smc_link_group *lgr)
769 {
770         int i;
771
772         smc_lgr_free_bufs(lgr);
773         if (lgr->is_smcd) {
774                 if (!lgr->terminating) {
775                         smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
776                         put_device(&lgr->smcd->dev);
777                 }
778                 if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
779                         wake_up(&lgr->smcd->lgrs_deleted);
780         } else {
781                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
782                         if (lgr->lnk[i].state != SMC_LNK_UNUSED)
783                                 smcr_link_clear(&lgr->lnk[i]);
784                 }
785                 smc_llc_lgr_clear(lgr);
786                 if (!atomic_dec_return(&lgr_cnt))
787                         wake_up(&lgrs_deleted);
788         }
789         kfree(lgr);
790 }
791
792 static void smcd_unregister_all_dmbs(struct smc_link_group *lgr)
793 {
794         int i;
795
796         for (i = 0; i < SMC_RMBE_SIZES; i++) {
797                 struct smc_buf_desc *buf_desc;
798
799                 list_for_each_entry(buf_desc, &lgr->rmbs[i], list) {
800                         buf_desc->len += sizeof(struct smcd_cdc_msg);
801                         smc_ism_unregister_dmb(lgr->smcd, buf_desc);
802                 }
803         }
804 }
805
806 static void smc_sk_wake_ups(struct smc_sock *smc)
807 {
808         smc->sk.sk_write_space(&smc->sk);
809         smc->sk.sk_data_ready(&smc->sk);
810         smc->sk.sk_state_change(&smc->sk);
811 }
812
813 /* kill a connection */
814 static void smc_conn_kill(struct smc_connection *conn, bool soft)
815 {
816         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
817
818         if (conn->lgr->is_smcd && conn->lgr->peer_shutdown)
819                 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
820         else
821                 smc_close_abort(conn);
822         conn->killed = 1;
823         smc->sk.sk_err = ECONNABORTED;
824         smc_sk_wake_ups(smc);
825         if (conn->lgr->is_smcd) {
826                 smc_ism_unset_conn(conn);
827                 if (soft)
828                         tasklet_kill(&conn->rx_tsklet);
829                 else
830                         tasklet_unlock_wait(&conn->rx_tsklet);
831         } else {
832                 smc_cdc_tx_dismiss_slots(conn);
833         }
834         smc_lgr_unregister_conn(conn);
835         smc_close_active_abort(smc);
836 }
837
838 static void smc_lgr_cleanup(struct smc_link_group *lgr)
839 {
840         int i;
841
842         if (lgr->is_smcd) {
843                 smc_ism_signal_shutdown(lgr);
844                 smcd_unregister_all_dmbs(lgr);
845                 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
846                 put_device(&lgr->smcd->dev);
847         } else {
848                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
849                         struct smc_link *lnk = &lgr->lnk[i];
850
851                         if (smc_link_usable(lnk))
852                                 lnk->state = SMC_LNK_INACTIVE;
853                 }
854                 wake_up_interruptible_all(&lgr->llc_waiter);
855         }
856 }
857
858 /* terminate link group
859  * @soft: true if link group shutdown can take its time
860  *        false if immediate link group shutdown is required
861  */
862 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
863 {
864         struct smc_connection *conn;
865         struct smc_sock *smc;
866         struct rb_node *node;
867
868         if (lgr->terminating)
869                 return; /* lgr already terminating */
870         if (!soft)
871                 cancel_delayed_work_sync(&lgr->free_work);
872         lgr->terminating = 1;
873
874         /* kill remaining link group connections */
875         read_lock_bh(&lgr->conns_lock);
876         node = rb_first(&lgr->conns_all);
877         while (node) {
878                 read_unlock_bh(&lgr->conns_lock);
879                 conn = rb_entry(node, struct smc_connection, alert_node);
880                 smc = container_of(conn, struct smc_sock, conn);
881                 sock_hold(&smc->sk); /* sock_put below */
882                 lock_sock(&smc->sk);
883                 smc_conn_kill(conn, soft);
884                 release_sock(&smc->sk);
885                 sock_put(&smc->sk); /* sock_hold above */
886                 read_lock_bh(&lgr->conns_lock);
887                 node = rb_first(&lgr->conns_all);
888         }
889         read_unlock_bh(&lgr->conns_lock);
890         smc_lgr_cleanup(lgr);
891         if (soft)
892                 smc_lgr_schedule_free_work_fast(lgr);
893         else
894                 smc_lgr_free(lgr);
895 }
896
897 /* unlink link group and schedule termination */
898 void smc_lgr_terminate_sched(struct smc_link_group *lgr)
899 {
900         spinlock_t *lgr_lock;
901
902         smc_lgr_list_head(lgr, &lgr_lock);
903         spin_lock_bh(lgr_lock);
904         if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) {
905                 spin_unlock_bh(lgr_lock);
906                 return; /* lgr already terminating */
907         }
908         list_del_init(&lgr->list);
909         spin_unlock_bh(lgr_lock);
910         schedule_work(&lgr->terminate_work);
911 }
912
913 /* Called when peer lgr shutdown (regularly or abnormally) is received */
914 void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
915 {
916         struct smc_link_group *lgr, *l;
917         LIST_HEAD(lgr_free_list);
918
919         /* run common cleanup function and build free list */
920         spin_lock_bh(&dev->lgr_lock);
921         list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
922                 if ((!peer_gid || lgr->peer_gid == peer_gid) &&
923                     (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
924                         if (peer_gid) /* peer triggered termination */
925                                 lgr->peer_shutdown = 1;
926                         list_move(&lgr->list, &lgr_free_list);
927                 }
928         }
929         spin_unlock_bh(&dev->lgr_lock);
930
931         /* cancel the regular free workers and actually free lgrs */
932         list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
933                 list_del_init(&lgr->list);
934                 schedule_work(&lgr->terminate_work);
935         }
936 }
937
938 /* Called when an SMCD device is removed or the smc module is unloaded */
939 void smc_smcd_terminate_all(struct smcd_dev *smcd)
940 {
941         struct smc_link_group *lgr, *lg;
942         LIST_HEAD(lgr_free_list);
943
944         spin_lock_bh(&smcd->lgr_lock);
945         list_splice_init(&smcd->lgr_list, &lgr_free_list);
946         list_for_each_entry(lgr, &lgr_free_list, list)
947                 lgr->freeing = 1;
948         spin_unlock_bh(&smcd->lgr_lock);
949
950         list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
951                 list_del_init(&lgr->list);
952                 __smc_lgr_terminate(lgr, false);
953         }
954
955         if (atomic_read(&smcd->lgr_cnt))
956                 wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt));
957 }
958
959 /* Called when an SMCR device is removed or the smc module is unloaded.
960  * If smcibdev is given, all SMCR link groups using this device are terminated.
961  * If smcibdev is NULL, all SMCR link groups are terminated.
962  */
963 void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
964 {
965         struct smc_link_group *lgr, *lg;
966         LIST_HEAD(lgr_free_list);
967         int i;
968
969         spin_lock_bh(&smc_lgr_list.lock);
970         if (!smcibdev) {
971                 list_splice_init(&smc_lgr_list.list, &lgr_free_list);
972                 list_for_each_entry(lgr, &lgr_free_list, list)
973                         lgr->freeing = 1;
974         } else {
975                 list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
976                         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
977                                 if (lgr->lnk[i].smcibdev == smcibdev)
978                                         smcr_link_down_cond_sched(&lgr->lnk[i]);
979                         }
980                 }
981         }
982         spin_unlock_bh(&smc_lgr_list.lock);
983
984         list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
985                 list_del_init(&lgr->list);
986                 __smc_lgr_terminate(lgr, false);
987         }
988
989         if (smcibdev) {
990                 if (atomic_read(&smcibdev->lnk_cnt))
991                         wait_event(smcibdev->lnks_deleted,
992                                    !atomic_read(&smcibdev->lnk_cnt));
993         } else {
994                 if (atomic_read(&lgr_cnt))
995                         wait_event(lgrs_deleted, !atomic_read(&lgr_cnt));
996         }
997 }
998
999 /* link is up - establish alternate link if applicable */
1000 static void smcr_link_up(struct smc_link_group *lgr,
1001                          struct smc_ib_device *smcibdev, u8 ibport)
1002 {
1003         struct smc_link *link = NULL;
1004
1005         if (list_empty(&lgr->list) ||
1006             lgr->type == SMC_LGR_SYMMETRIC ||
1007             lgr->type == SMC_LGR_ASYMMETRIC_PEER)
1008                 return;
1009
1010         if (lgr->role == SMC_SERV) {
1011                 /* trigger local add link processing */
1012                 link = smc_llc_usable_link(lgr);
1013                 if (!link)
1014                         return;
1015                 smc_llc_srv_add_link_local(link);
1016         } else {
1017                 /* invite server to start add link processing */
1018                 u8 gid[SMC_GID_SIZE];
1019
1020                 if (smc_ib_determine_gid(smcibdev, ibport, lgr->vlan_id, gid,
1021                                          NULL))
1022                         return;
1023                 if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
1024                         /* some other llc task is ongoing */
1025                         wait_event_interruptible_timeout(lgr->llc_waiter,
1026                                 (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
1027                                 SMC_LLC_WAIT_TIME);
1028                 }
1029                 if (list_empty(&lgr->list) ||
1030                     !smc_ib_port_active(smcibdev, ibport))
1031                         return; /* lgr or device no longer active */
1032                 link = smc_llc_usable_link(lgr);
1033                 if (!link)
1034                         return;
1035                 smc_llc_send_add_link(link, smcibdev->mac[ibport - 1], gid,
1036                                       NULL, SMC_LLC_REQ);
1037         }
1038 }
1039
1040 void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
1041 {
1042         struct smc_ib_up_work *ib_work;
1043         struct smc_link_group *lgr, *n;
1044
1045         list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
1046                 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
1047                             SMC_MAX_PNETID_LEN) ||
1048                     lgr->type == SMC_LGR_SYMMETRIC ||
1049                     lgr->type == SMC_LGR_ASYMMETRIC_PEER)
1050                         continue;
1051                 ib_work = kmalloc(sizeof(*ib_work), GFP_KERNEL);
1052                 if (!ib_work)
1053                         continue;
1054                 INIT_WORK(&ib_work->work, smc_link_up_work);
1055                 ib_work->lgr = lgr;
1056                 ib_work->smcibdev = smcibdev;
1057                 ib_work->ibport = ibport;
1058                 schedule_work(&ib_work->work);
1059         }
1060 }
1061
1062 /* link is down - switch connections to alternate link,
1063  * must be called under lgr->llc_conf_mutex lock
1064  */
1065 static void smcr_link_down(struct smc_link *lnk)
1066 {
1067         struct smc_link_group *lgr = lnk->lgr;
1068         struct smc_link *to_lnk;
1069         int del_link_id;
1070
1071         if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
1072                 return;
1073
1074         smc_ib_modify_qp_reset(lnk);
1075         to_lnk = smc_switch_conns(lgr, lnk, true);
1076         if (!to_lnk) { /* no backup link available */
1077                 smcr_link_clear(lnk);
1078                 return;
1079         }
1080         lgr->type = SMC_LGR_SINGLE;
1081         del_link_id = lnk->link_id;
1082
1083         if (lgr->role == SMC_SERV) {
1084                 /* trigger local delete link processing */
1085                 smc_llc_srv_delete_link_local(to_lnk, del_link_id);
1086         } else {
1087                 if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
1088                         /* another llc task is ongoing */
1089                         mutex_unlock(&lgr->llc_conf_mutex);
1090                         wait_event_interruptible_timeout(lgr->llc_waiter,
1091                                 (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
1092                                 SMC_LLC_WAIT_TIME);
1093                         mutex_lock(&lgr->llc_conf_mutex);
1094                 }
1095                 smc_llc_send_delete_link(to_lnk, del_link_id, SMC_LLC_REQ, true,
1096                                          SMC_LLC_DEL_LOST_PATH);
1097         }
1098 }
1099
1100 /* must be called under lgr->llc_conf_mutex lock */
1101 void smcr_link_down_cond(struct smc_link *lnk)
1102 {
1103         if (smc_link_downing(&lnk->state))
1104                 smcr_link_down(lnk);
1105 }
1106
1107 /* will get the lgr->llc_conf_mutex lock */
1108 void smcr_link_down_cond_sched(struct smc_link *lnk)
1109 {
1110         if (smc_link_downing(&lnk->state))
1111                 schedule_work(&lnk->link_down_wrk);
1112 }
1113
1114 void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport)
1115 {
1116         struct smc_link_group *lgr, *n;
1117         int i;
1118
1119         list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
1120                 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
1121                             SMC_MAX_PNETID_LEN))
1122                         continue; /* lgr is not affected */
1123                 if (list_empty(&lgr->list))
1124                         continue;
1125                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1126                         struct smc_link *lnk = &lgr->lnk[i];
1127
1128                         if (smc_link_usable(lnk) &&
1129                             lnk->smcibdev == smcibdev && lnk->ibport == ibport)
1130                                 smcr_link_down_cond_sched(lnk);
1131                 }
1132         }
1133 }
1134
1135 static void smc_link_up_work(struct work_struct *work)
1136 {
1137         struct smc_ib_up_work *ib_work = container_of(work,
1138                                                       struct smc_ib_up_work,
1139                                                       work);
1140         struct smc_link_group *lgr = ib_work->lgr;
1141
1142         if (list_empty(&lgr->list))
1143                 goto out;
1144         smcr_link_up(lgr, ib_work->smcibdev, ib_work->ibport);
1145 out:
1146         kfree(ib_work);
1147 }
1148
1149 static void smc_link_down_work(struct work_struct *work)
1150 {
1151         struct smc_link *link = container_of(work, struct smc_link,
1152                                              link_down_wrk);
1153         struct smc_link_group *lgr = link->lgr;
1154
1155         if (list_empty(&lgr->list))
1156                 return;
1157         wake_up_interruptible_all(&lgr->llc_waiter);
1158         mutex_lock(&lgr->llc_conf_mutex);
1159         smcr_link_down(link);
1160         mutex_unlock(&lgr->llc_conf_mutex);
1161 }
1162
1163 /* Determine vlan of internal TCP socket.
1164  * @vlan_id: address to store the determined vlan id into
1165  */
1166 int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
1167 {
1168         struct dst_entry *dst = sk_dst_get(clcsock->sk);
1169         struct net_device *ndev;
1170         int i, nest_lvl, rc = 0;
1171
1172         ini->vlan_id = 0;
1173         if (!dst) {
1174                 rc = -ENOTCONN;
1175                 goto out;
1176         }
1177         if (!dst->dev) {
1178                 rc = -ENODEV;
1179                 goto out_rel;
1180         }
1181
1182         ndev = dst->dev;
1183         if (is_vlan_dev(ndev)) {
1184                 ini->vlan_id = vlan_dev_vlan_id(ndev);
1185                 goto out_rel;
1186         }
1187
1188         rtnl_lock();
1189         nest_lvl = ndev->lower_level;
1190         for (i = 0; i < nest_lvl; i++) {
1191                 struct list_head *lower = &ndev->adj_list.lower;
1192
1193                 if (list_empty(lower))
1194                         break;
1195                 lower = lower->next;
1196                 ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower);
1197                 if (is_vlan_dev(ndev)) {
1198                         ini->vlan_id = vlan_dev_vlan_id(ndev);
1199                         break;
1200                 }
1201         }
1202         rtnl_unlock();
1203
1204 out_rel:
1205         dst_release(dst);
1206 out:
1207         return rc;
1208 }
1209
1210 static bool smcr_lgr_match(struct smc_link_group *lgr,
1211                            struct smc_clc_msg_local *lcl,
1212                            enum smc_lgr_role role, u32 clcqpn)
1213 {
1214         int i;
1215
1216         if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) ||
1217             lgr->role != role)
1218                 return false;
1219
1220         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1221                 if (lgr->lnk[i].state != SMC_LNK_ACTIVE)
1222                         continue;
1223                 if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) &&
1224                     !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) &&
1225                     !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac)))
1226                         return true;
1227         }
1228         return false;
1229 }
1230
1231 static bool smcd_lgr_match(struct smc_link_group *lgr,
1232                            struct smcd_dev *smcismdev, u64 peer_gid)
1233 {
1234         return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
1235 }
1236
1237 /* create a new SMC connection (and a new link group if necessary) */
1238 int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
1239 {
1240         struct smc_connection *conn = &smc->conn;
1241         struct list_head *lgr_list;
1242         struct smc_link_group *lgr;
1243         enum smc_lgr_role role;
1244         spinlock_t *lgr_lock;
1245         int rc = 0;
1246
1247         lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list;
1248         lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock;
1249         ini->cln_first_contact = SMC_FIRST_CONTACT;
1250         role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
1251         if (role == SMC_CLNT && ini->srv_first_contact)
1252                 /* create new link group as well */
1253                 goto create;
1254
1255         /* determine if an existing link group can be reused */
1256         spin_lock_bh(lgr_lock);
1257         list_for_each_entry(lgr, lgr_list, list) {
1258                 write_lock_bh(&lgr->conns_lock);
1259                 if ((ini->is_smcd ?
1260                      smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) :
1261                      smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
1262                     !lgr->sync_err &&
1263                     lgr->vlan_id == ini->vlan_id &&
1264                     (role == SMC_CLNT ||
1265                      lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
1266                         /* link group found */
1267                         ini->cln_first_contact = SMC_REUSE_CONTACT;
1268                         conn->lgr = lgr;
1269                         rc = smc_lgr_register_conn(conn); /* add conn to lgr */
1270                         write_unlock_bh(&lgr->conns_lock);
1271                         if (!rc && delayed_work_pending(&lgr->free_work))
1272                                 cancel_delayed_work(&lgr->free_work);
1273                         break;
1274                 }
1275                 write_unlock_bh(&lgr->conns_lock);
1276         }
1277         spin_unlock_bh(lgr_lock);
1278         if (rc)
1279                 return rc;
1280
1281         if (role == SMC_CLNT && !ini->srv_first_contact &&
1282             ini->cln_first_contact == SMC_FIRST_CONTACT) {
1283                 /* Server reuses a link group, but Client wants to start
1284                  * a new one
1285                  * send out_of_sync decline, reason synchr. error
1286                  */
1287                 return SMC_CLC_DECL_SYNCERR;
1288         }
1289
1290 create:
1291         if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
1292                 rc = smc_lgr_create(smc, ini);
1293                 if (rc)
1294                         goto out;
1295                 lgr = conn->lgr;
1296                 write_lock_bh(&lgr->conns_lock);
1297                 rc = smc_lgr_register_conn(conn); /* add smc conn to lgr */
1298                 write_unlock_bh(&lgr->conns_lock);
1299                 if (rc)
1300                         goto out;
1301         }
1302         conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
1303         conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
1304         conn->urg_state = SMC_URG_READ;
1305         if (ini->is_smcd) {
1306                 conn->rx_off = sizeof(struct smcd_cdc_msg);
1307                 smcd_cdc_rx_init(conn); /* init tasklet for this conn */
1308         }
1309 #ifndef KERNEL_HAS_ATOMIC64
1310         spin_lock_init(&conn->acurs_lock);
1311 #endif
1312
1313 out:
1314         return rc;
1315 }
1316
1317 /* convert the RMB size into the compressed notation - minimum 16K.
1318  * In contrast to plain ilog2, this rounds towards the next power of 2,
1319  * so the socket application gets at least its desired sndbuf / rcvbuf size.
1320  */
1321 static u8 smc_compress_bufsize(int size)
1322 {
1323         u8 compressed;
1324
1325         if (size <= SMC_BUF_MIN_SIZE)
1326                 return 0;
1327
1328         size = (size - 1) >> 14;
1329         compressed = ilog2(size) + 1;
1330         if (compressed >= SMC_RMBE_SIZES)
1331                 compressed = SMC_RMBE_SIZES - 1;
1332         return compressed;
1333 }
1334
1335 /* convert the RMB size from compressed notation into integer */
1336 int smc_uncompress_bufsize(u8 compressed)
1337 {
1338         u32 size;
1339
1340         size = 0x00000001 << (((int)compressed) + 14);
1341         return (int)size;
1342 }
1343
1344 /* try to reuse a sndbuf or rmb description slot for a certain
1345  * buffer size; if not available, return NULL
1346  */
1347 static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
1348                                              struct mutex *lock,
1349                                              struct list_head *buf_list)
1350 {
1351         struct smc_buf_desc *buf_slot;
1352
1353         mutex_lock(lock);
1354         list_for_each_entry(buf_slot, buf_list, list) {
1355                 if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
1356                         mutex_unlock(lock);
1357                         return buf_slot;
1358                 }
1359         }
1360         mutex_unlock(lock);
1361         return NULL;
1362 }
1363
1364 /* one of the conditions for announcing a receiver's current window size is
1365  * that it "results in a minimum increase in the window size of 10% of the
1366  * receive buffer space" [RFC7609]
1367  */
1368 static inline int smc_rmb_wnd_update_limit(int rmbe_size)
1369 {
1370         return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
1371 }
1372
1373 /* map an rmb buf to a link */
1374 static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
1375                              struct smc_link *lnk)
1376 {
1377         int rc;
1378
1379         if (buf_desc->is_map_ib[lnk->link_idx])
1380                 return 0;
1381
1382         rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL);
1383         if (rc)
1384                 return rc;
1385         sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
1386                    buf_desc->cpu_addr, buf_desc->len);
1387
1388         /* map sg table to DMA address */
1389         rc = smc_ib_buf_map_sg(lnk, buf_desc,
1390                                is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1391         /* SMC protocol depends on mapping to one DMA address only */
1392         if (rc != 1) {
1393                 rc = -EAGAIN;
1394                 goto free_table;
1395         }
1396
1397         /* create a new memory region for the RMB */
1398         if (is_rmb) {
1399                 rc = smc_ib_get_memory_region(lnk->roce_pd,
1400                                               IB_ACCESS_REMOTE_WRITE |
1401                                               IB_ACCESS_LOCAL_WRITE,
1402                                               buf_desc, lnk->link_idx);
1403                 if (rc)
1404                         goto buf_unmap;
1405                 smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE);
1406         }
1407         buf_desc->is_map_ib[lnk->link_idx] = true;
1408         return 0;
1409
1410 buf_unmap:
1411         smc_ib_buf_unmap_sg(lnk, buf_desc,
1412                             is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1413 free_table:
1414         sg_free_table(&buf_desc->sgt[lnk->link_idx]);
1415         return rc;
1416 }
1417
1418 /* register a new rmb on IB device,
1419  * must be called under lgr->llc_conf_mutex lock
1420  */
1421 int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc)
1422 {
1423         if (list_empty(&link->lgr->list))
1424                 return -ENOLINK;
1425         if (!rmb_desc->is_reg_mr[link->link_idx]) {
1426                 /* register memory region for new rmb */
1427                 if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) {
1428                         rmb_desc->is_reg_err = true;
1429                         return -EFAULT;
1430                 }
1431                 rmb_desc->is_reg_mr[link->link_idx] = true;
1432         }
1433         return 0;
1434 }
1435
1436 static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock,
1437                              struct list_head *lst, bool is_rmb)
1438 {
1439         struct smc_buf_desc *buf_desc, *bf;
1440         int rc = 0;
1441
1442         mutex_lock(lock);
1443         list_for_each_entry_safe(buf_desc, bf, lst, list) {
1444                 if (!buf_desc->used)
1445                         continue;
1446                 rc = smcr_buf_map_link(buf_desc, is_rmb, lnk);
1447                 if (rc)
1448                         goto out;
1449         }
1450 out:
1451         mutex_unlock(lock);
1452         return rc;
1453 }
1454
1455 /* map all used buffers of lgr for a new link */
1456 int smcr_buf_map_lgr(struct smc_link *lnk)
1457 {
1458         struct smc_link_group *lgr = lnk->lgr;
1459         int i, rc = 0;
1460
1461         for (i = 0; i < SMC_RMBE_SIZES; i++) {
1462                 rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock,
1463                                        &lgr->rmbs[i], true);
1464                 if (rc)
1465                         return rc;
1466                 rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock,
1467                                        &lgr->sndbufs[i], false);
1468                 if (rc)
1469                         return rc;
1470         }
1471         return 0;
1472 }
1473
1474 /* register all used buffers of lgr for a new link,
1475  * must be called under lgr->llc_conf_mutex lock
1476  */
1477 int smcr_buf_reg_lgr(struct smc_link *lnk)
1478 {
1479         struct smc_link_group *lgr = lnk->lgr;
1480         struct smc_buf_desc *buf_desc, *bf;
1481         int i, rc = 0;
1482
1483         mutex_lock(&lgr->rmbs_lock);
1484         for (i = 0; i < SMC_RMBE_SIZES; i++) {
1485                 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
1486                         if (!buf_desc->used)
1487                                 continue;
1488                         rc = smcr_link_reg_rmb(lnk, buf_desc);
1489                         if (rc)
1490                                 goto out;
1491                 }
1492         }
1493 out:
1494         mutex_unlock(&lgr->rmbs_lock);
1495         return rc;
1496 }
1497
1498 static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
1499                                                 bool is_rmb, int bufsize)
1500 {
1501         struct smc_buf_desc *buf_desc;
1502
1503         /* try to alloc a new buffer */
1504         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
1505         if (!buf_desc)
1506                 return ERR_PTR(-ENOMEM);
1507
1508         buf_desc->order = get_order(bufsize);
1509         buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
1510                                       __GFP_NOMEMALLOC | __GFP_COMP |
1511                                       __GFP_NORETRY | __GFP_ZERO,
1512                                       buf_desc->order);
1513         if (!buf_desc->pages) {
1514                 kfree(buf_desc);
1515                 return ERR_PTR(-EAGAIN);
1516         }
1517         buf_desc->cpu_addr = (void *)page_address(buf_desc->pages);
1518         buf_desc->len = bufsize;
1519         return buf_desc;
1520 }
1521
1522 /* map buf_desc on all usable links,
1523  * unused buffers stay mapped as long as the link is up
1524  */
1525 static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
1526                                      struct smc_buf_desc *buf_desc, bool is_rmb)
1527 {
1528         int i, rc = 0;
1529
1530         /* protect against parallel link reconfiguration */
1531         mutex_lock(&lgr->llc_conf_mutex);
1532         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1533                 struct smc_link *lnk = &lgr->lnk[i];
1534
1535                 if (!smc_link_usable(lnk))
1536                         continue;
1537                 if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
1538                         rc = -ENOMEM;
1539                         goto out;
1540                 }
1541         }
1542 out:
1543         mutex_unlock(&lgr->llc_conf_mutex);
1544         return rc;
1545 }
1546
1547 #define SMCD_DMBE_SIZES         7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
1548
1549 static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
1550                                                 bool is_dmb, int bufsize)
1551 {
1552         struct smc_buf_desc *buf_desc;
1553         int rc;
1554
1555         if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
1556                 return ERR_PTR(-EAGAIN);
1557
1558         /* try to alloc a new DMB */
1559         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
1560         if (!buf_desc)
1561                 return ERR_PTR(-ENOMEM);
1562         if (is_dmb) {
1563                 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
1564                 if (rc) {
1565                         kfree(buf_desc);
1566                         return ERR_PTR(-EAGAIN);
1567                 }
1568                 buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
1569                 /* CDC header stored in buf. So, pretend it was smaller */
1570                 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
1571         } else {
1572                 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
1573                                              __GFP_NOWARN | __GFP_NORETRY |
1574                                              __GFP_NOMEMALLOC);
1575                 if (!buf_desc->cpu_addr) {
1576                         kfree(buf_desc);
1577                         return ERR_PTR(-EAGAIN);
1578                 }
1579                 buf_desc->len = bufsize;
1580         }
1581         return buf_desc;
1582 }
1583
1584 static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
1585 {
1586         struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
1587         struct smc_connection *conn = &smc->conn;
1588         struct smc_link_group *lgr = conn->lgr;
1589         struct list_head *buf_list;
1590         int bufsize, bufsize_short;
1591         struct mutex *lock;     /* lock buffer list */
1592         int sk_buf_size;
1593
1594         if (is_rmb)
1595                 /* use socket recv buffer size (w/o overhead) as start value */
1596                 sk_buf_size = smc->sk.sk_rcvbuf / 2;
1597         else
1598                 /* use socket send buffer size (w/o overhead) as start value */
1599                 sk_buf_size = smc->sk.sk_sndbuf / 2;
1600
1601         for (bufsize_short = smc_compress_bufsize(sk_buf_size);
1602              bufsize_short >= 0; bufsize_short--) {
1603
1604                 if (is_rmb) {
1605                         lock = &lgr->rmbs_lock;
1606                         buf_list = &lgr->rmbs[bufsize_short];
1607                 } else {
1608                         lock = &lgr->sndbufs_lock;
1609                         buf_list = &lgr->sndbufs[bufsize_short];
1610                 }
1611                 bufsize = smc_uncompress_bufsize(bufsize_short);
1612                 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
1613                         continue;
1614
1615                 /* check for reusable slot in the link group */
1616                 buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
1617                 if (buf_desc) {
1618                         memset(buf_desc->cpu_addr, 0, bufsize);
1619                         break; /* found reusable slot */
1620                 }
1621
1622                 if (is_smcd)
1623                         buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
1624                 else
1625                         buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);
1626
1627                 if (PTR_ERR(buf_desc) == -ENOMEM)
1628                         break;
1629                 if (IS_ERR(buf_desc))
1630                         continue;
1631
1632                 buf_desc->used = 1;
1633                 mutex_lock(lock);
1634                 list_add(&buf_desc->list, buf_list);
1635                 mutex_unlock(lock);
1636                 break; /* found */
1637         }
1638
1639         if (IS_ERR(buf_desc))
1640                 return -ENOMEM;
1641
1642         if (!is_smcd) {
1643                 if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
1644                         smcr_buf_unuse(buf_desc, lgr);
1645                         return -ENOMEM;
1646                 }
1647         }
1648
1649         if (is_rmb) {
1650                 conn->rmb_desc = buf_desc;
1651                 conn->rmbe_size_short = bufsize_short;
1652                 smc->sk.sk_rcvbuf = bufsize * 2;
1653                 atomic_set(&conn->bytes_to_rcv, 0);
1654                 conn->rmbe_update_limit =
1655                         smc_rmb_wnd_update_limit(buf_desc->len);
1656                 if (is_smcd)
1657                         smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
1658         } else {
1659                 conn->sndbuf_desc = buf_desc;
1660                 smc->sk.sk_sndbuf = bufsize * 2;
1661                 atomic_set(&conn->sndbuf_space, bufsize);
1662         }
1663         return 0;
1664 }
1665
1666 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
1667 {
1668         if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
1669                 return;
1670         smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
1671 }
1672
1673 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
1674 {
1675         if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
1676                 return;
1677         smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
1678 }
1679
1680 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
1681 {
1682         int i;
1683
1684         if (!conn->lgr || conn->lgr->is_smcd)
1685                 return;
1686         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1687                 if (!smc_link_usable(&conn->lgr->lnk[i]))
1688                         continue;
1689                 smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc,
1690                                        DMA_FROM_DEVICE);
1691         }
1692 }
1693
1694 void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
1695 {
1696         int i;
1697
1698         if (!conn->lgr || conn->lgr->is_smcd)
1699                 return;
1700         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1701                 if (!smc_link_usable(&conn->lgr->lnk[i]))
1702                         continue;
1703                 smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc,
1704                                           DMA_FROM_DEVICE);
1705         }
1706 }
1707
1708 /* create the send and receive buffer for an SMC socket;
1709  * receive buffers are called RMBs;
1710  * (even though the SMC protocol allows more than one RMB-element per RMB,
1711  * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
1712  * extra RMB for every connection in a link group
1713  */
1714 int smc_buf_create(struct smc_sock *smc, bool is_smcd)
1715 {
1716         int rc;
1717
1718         /* create send buffer */
1719         rc = __smc_buf_create(smc, is_smcd, false);
1720         if (rc)
1721                 return rc;
1722         /* create rmb */
1723         rc = __smc_buf_create(smc, is_smcd, true);
1724         if (rc)
1725                 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
1726         return rc;
1727 }
1728
1729 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
1730 {
1731         int i;
1732
1733         for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
1734                 if (!test_and_set_bit(i, lgr->rtokens_used_mask))
1735                         return i;
1736         }
1737         return -ENOSPC;
1738 }
1739
1740 static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx,
1741                                    u32 rkey)
1742 {
1743         int i;
1744
1745         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1746                 if (test_bit(i, lgr->rtokens_used_mask) &&
1747                     lgr->rtokens[i][lnk_idx].rkey == rkey)
1748                         return i;
1749         }
1750         return -ENOENT;
1751 }
1752
1753 /* set rtoken for a new link to an existing rmb */
1754 void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
1755                     __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey)
1756 {
1757         int rtok_idx;
1758
1759         rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known));
1760         if (rtok_idx == -ENOENT)
1761                 return;
1762         lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey);
1763         lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr);
1764 }
1765
1766 /* set rtoken for a new link whose link_id is given */
1767 void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
1768                      __be64 nw_vaddr, __be32 nw_rkey)
1769 {
1770         u64 dma_addr = be64_to_cpu(nw_vaddr);
1771         u32 rkey = ntohl(nw_rkey);
1772         bool found = false;
1773         int link_idx;
1774
1775         for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) {
1776                 if (lgr->lnk[link_idx].link_id == link_id) {
1777                         found = true;
1778                         break;
1779                 }
1780         }
1781         if (!found)
1782                 return;
1783         lgr->rtokens[rtok_idx][link_idx].rkey = rkey;
1784         lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr;
1785 }
1786
1787 /* add a new rtoken from peer */
1788 int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
1789 {
1790         struct smc_link_group *lgr = smc_get_lgr(lnk);
1791         u64 dma_addr = be64_to_cpu(nw_vaddr);
1792         u32 rkey = ntohl(nw_rkey);
1793         int i;
1794
1795         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1796                 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
1797                     lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr &&
1798                     test_bit(i, lgr->rtokens_used_mask)) {
1799                         /* already in list */
1800                         return i;
1801                 }
1802         }
1803         i = smc_rmb_reserve_rtoken_idx(lgr);
1804         if (i < 0)
1805                 return i;
1806         lgr->rtokens[i][lnk->link_idx].rkey = rkey;
1807         lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr;
1808         return i;
1809 }
1810
1811 /* delete an rtoken from all links */
1812 int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
1813 {
1814         struct smc_link_group *lgr = smc_get_lgr(lnk);
1815         u32 rkey = ntohl(nw_rkey);
1816         int i, j;
1817
1818         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1819                 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
1820                     test_bit(i, lgr->rtokens_used_mask)) {
1821                         for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) {
1822                                 lgr->rtokens[i][j].rkey = 0;
1823                                 lgr->rtokens[i][j].dma_addr = 0;
1824                         }
1825                         clear_bit(i, lgr->rtokens_used_mask);
1826                         return 0;
1827                 }
1828         }
1829         return -ENOENT;
1830 }
1831
1832 /* save rkey and dma_addr received from peer during clc handshake */
1833 int smc_rmb_rtoken_handling(struct smc_connection *conn,
1834                             struct smc_link *lnk,
1835                             struct smc_clc_msg_accept_confirm *clc)
1836 {
1837         conn->rtoken_idx = smc_rtoken_add(lnk, clc->rmb_dma_addr,
1838                                           clc->rmb_rkey);
1839         if (conn->rtoken_idx < 0)
1840                 return conn->rtoken_idx;
1841         return 0;
1842 }
1843
1844 static void smc_core_going_away(void)
1845 {
1846         struct smc_ib_device *smcibdev;
1847         struct smcd_dev *smcd;
1848
1849         spin_lock(&smc_ib_devices.lock);
1850         list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
1851                 int i;
1852
1853                 for (i = 0; i < SMC_MAX_PORTS; i++)
1854                         set_bit(i, smcibdev->ports_going_away);
1855         }
1856         spin_unlock(&smc_ib_devices.lock);
1857
1858         spin_lock(&smcd_dev_list.lock);
1859         list_for_each_entry(smcd, &smcd_dev_list.list, list) {
1860                 smcd->going_away = 1;
1861         }
1862         spin_unlock(&smcd_dev_list.lock);
1863 }
1864
1865 /* Clean up all SMC link groups */
1866 static void smc_lgrs_shutdown(void)
1867 {
1868         struct smcd_dev *smcd;
1869
1870         smc_core_going_away();
1871
1872         smc_smcr_terminate_all(NULL);
1873
1874         spin_lock(&smcd_dev_list.lock);
1875         list_for_each_entry(smcd, &smcd_dev_list.list, list)
1876                 smc_smcd_terminate_all(smcd);
1877         spin_unlock(&smcd_dev_list.lock);
1878 }
1879
1880 static int smc_core_reboot_event(struct notifier_block *this,
1881                                  unsigned long event, void *ptr)
1882 {
1883         smc_lgrs_shutdown();
1884         smc_ib_unregister_client();
1885         return 0;
1886 }
1887
1888 static struct notifier_block smc_reboot_notifier = {
1889         .notifier_call = smc_core_reboot_event,
1890 };
1891
1892 int __init smc_core_init(void)
1893 {
1894         return register_reboot_notifier(&smc_reboot_notifier);
1895 }
1896
1897 /* Called (from smc_exit) when module is removed */
1898 void smc_core_exit(void)
1899 {
1900         unregister_reboot_notifier(&smc_reboot_notifier);
1901         smc_lgrs_shutdown();
1902 }