84f67f601838e019244e83268bfc3a2a92da618d
[sfrench/cifs-2.6.git] / net / smc / af_smc.c
1 /*
2  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
3  *
4  *  AF_SMC protocol family socket handler keeping the AF_INET sock address type
5  *  applies to SOCK_STREAM sockets only
6  *  offers an alternative communication option for TCP-protocol sockets
7  *  applicable with RoCE-cards only
8  *
9  *  Initial restrictions:
10  *    - support for alternate links postponed
11  *
12  *  Copyright IBM Corp. 2016, 2018
13  *
14  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
15  *              based on prototype from Frank Blaschka
16  */
17
18 #define KMSG_COMPONENT "smc"
19 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
20
21 #include <linux/module.h>
22 #include <linux/socket.h>
23 #include <linux/workqueue.h>
24 #include <linux/in.h>
25 #include <linux/sched/signal.h>
26 #include <linux/if_vlan.h>
27
28 #include <net/sock.h>
29 #include <net/tcp.h>
30 #include <net/smc.h>
31 #include <asm/ioctls.h>
32
33 #include "smc.h"
34 #include "smc_clc.h"
35 #include "smc_llc.h"
36 #include "smc_cdc.h"
37 #include "smc_core.h"
38 #include "smc_ib.h"
39 #include "smc_ism.h"
40 #include "smc_pnet.h"
41 #include "smc_tx.h"
42 #include "smc_rx.h"
43 #include "smc_close.h"
44
45 static DEFINE_MUTEX(smc_create_lgr_pending);    /* serialize link group
46                                                  * creation
47                                                  */
48
49 static void smc_tcp_listen_work(struct work_struct *);
50 static void smc_connect_work(struct work_struct *);
51
52 static void smc_set_keepalive(struct sock *sk, int val)
53 {
54         struct smc_sock *smc = smc_sk(sk);
55
56         smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val);
57 }
58
59 static struct smc_hashinfo smc_v4_hashinfo = {
60         .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
61 };
62
63 static struct smc_hashinfo smc_v6_hashinfo = {
64         .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
65 };
66
67 int smc_hash_sk(struct sock *sk)
68 {
69         struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
70         struct hlist_head *head;
71
72         head = &h->ht;
73
74         write_lock_bh(&h->lock);
75         sk_add_node(sk, head);
76         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
77         write_unlock_bh(&h->lock);
78
79         return 0;
80 }
81 EXPORT_SYMBOL_GPL(smc_hash_sk);
82
83 void smc_unhash_sk(struct sock *sk)
84 {
85         struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
86
87         write_lock_bh(&h->lock);
88         if (sk_del_node_init(sk))
89                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
90         write_unlock_bh(&h->lock);
91 }
92 EXPORT_SYMBOL_GPL(smc_unhash_sk);
93
94 struct proto smc_proto = {
95         .name           = "SMC",
96         .owner          = THIS_MODULE,
97         .keepalive      = smc_set_keepalive,
98         .hash           = smc_hash_sk,
99         .unhash         = smc_unhash_sk,
100         .obj_size       = sizeof(struct smc_sock),
101         .h.smc_hash     = &smc_v4_hashinfo,
102         .slab_flags     = SLAB_TYPESAFE_BY_RCU,
103 };
104 EXPORT_SYMBOL_GPL(smc_proto);
105
106 struct proto smc_proto6 = {
107         .name           = "SMC6",
108         .owner          = THIS_MODULE,
109         .keepalive      = smc_set_keepalive,
110         .hash           = smc_hash_sk,
111         .unhash         = smc_unhash_sk,
112         .obj_size       = sizeof(struct smc_sock),
113         .h.smc_hash     = &smc_v6_hashinfo,
114         .slab_flags     = SLAB_TYPESAFE_BY_RCU,
115 };
116 EXPORT_SYMBOL_GPL(smc_proto6);
117
118 static int smc_release(struct socket *sock)
119 {
120         struct sock *sk = sock->sk;
121         struct smc_sock *smc;
122         int rc = 0;
123
124         if (!sk)
125                 goto out;
126
127         smc = smc_sk(sk);
128
129         /* cleanup for a dangling non-blocking connect */
130         if (smc->connect_info && sk->sk_state == SMC_INIT)
131                 tcp_abort(smc->clcsock->sk, ECONNABORTED);
132         flush_work(&smc->connect_work);
133         kfree(smc->connect_info);
134         smc->connect_info = NULL;
135
136         if (sk->sk_state == SMC_LISTEN)
137                 /* smc_close_non_accepted() is called and acquires
138                  * sock lock for child sockets again
139                  */
140                 lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
141         else
142                 lock_sock(sk);
143
144         if (!smc->use_fallback) {
145                 rc = smc_close_active(smc);
146                 sock_set_flag(sk, SOCK_DEAD);
147                 sk->sk_shutdown |= SHUTDOWN_MASK;
148         }
149         if (smc->clcsock) {
150                 sock_release(smc->clcsock);
151                 smc->clcsock = NULL;
152         }
153         if (smc->use_fallback) {
154                 if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT)
155                         sock_put(sk); /* passive closing */
156                 sk->sk_state = SMC_CLOSED;
157                 sk->sk_state_change(sk);
158         }
159
160         /* detach socket */
161         sock_orphan(sk);
162         sock->sk = NULL;
163         if (!smc->use_fallback && sk->sk_state == SMC_CLOSED)
164                 smc_conn_free(&smc->conn);
165         release_sock(sk);
166
167         sk->sk_prot->unhash(sk);
168         sock_put(sk); /* final sock_put */
169 out:
170         return rc;
171 }
172
173 static void smc_destruct(struct sock *sk)
174 {
175         if (sk->sk_state != SMC_CLOSED)
176                 return;
177         if (!sock_flag(sk, SOCK_DEAD))
178                 return;
179
180         sk_refcnt_debug_dec(sk);
181 }
182
183 static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
184                                    int protocol)
185 {
186         struct smc_sock *smc;
187         struct proto *prot;
188         struct sock *sk;
189
190         prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
191         sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
192         if (!sk)
193                 return NULL;
194
195         sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
196         sk->sk_state = SMC_INIT;
197         sk->sk_destruct = smc_destruct;
198         sk->sk_protocol = protocol;
199         smc = smc_sk(sk);
200         INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
201         INIT_WORK(&smc->connect_work, smc_connect_work);
202         INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
203         INIT_LIST_HEAD(&smc->accept_q);
204         spin_lock_init(&smc->accept_q_lock);
205         spin_lock_init(&smc->conn.send_lock);
206         sk->sk_prot->hash(sk);
207         sk_refcnt_debug_inc(sk);
208
209         return sk;
210 }
211
212 static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
213                     int addr_len)
214 {
215         struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
216         struct sock *sk = sock->sk;
217         struct smc_sock *smc;
218         int rc;
219
220         smc = smc_sk(sk);
221
222         /* replicate tests from inet_bind(), to be safe wrt. future changes */
223         rc = -EINVAL;
224         if (addr_len < sizeof(struct sockaddr_in))
225                 goto out;
226
227         rc = -EAFNOSUPPORT;
228         if (addr->sin_family != AF_INET &&
229             addr->sin_family != AF_INET6 &&
230             addr->sin_family != AF_UNSPEC)
231                 goto out;
232         /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
233         if (addr->sin_family == AF_UNSPEC &&
234             addr->sin_addr.s_addr != htonl(INADDR_ANY))
235                 goto out;
236
237         lock_sock(sk);
238
239         /* Check if socket is already active */
240         rc = -EINVAL;
241         if (sk->sk_state != SMC_INIT)
242                 goto out_rel;
243
244         smc->clcsock->sk->sk_reuse = sk->sk_reuse;
245         rc = kernel_bind(smc->clcsock, uaddr, addr_len);
246
247 out_rel:
248         release_sock(sk);
249 out:
250         return rc;
251 }
252
253 static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
254                                    unsigned long mask)
255 {
256         /* options we don't get control via setsockopt for */
257         nsk->sk_type = osk->sk_type;
258         nsk->sk_sndbuf = osk->sk_sndbuf;
259         nsk->sk_rcvbuf = osk->sk_rcvbuf;
260         nsk->sk_sndtimeo = osk->sk_sndtimeo;
261         nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
262         nsk->sk_mark = osk->sk_mark;
263         nsk->sk_priority = osk->sk_priority;
264         nsk->sk_rcvlowat = osk->sk_rcvlowat;
265         nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
266         nsk->sk_err = osk->sk_err;
267
268         nsk->sk_flags &= ~mask;
269         nsk->sk_flags |= osk->sk_flags & mask;
270 }
271
272 #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
273                              (1UL << SOCK_KEEPOPEN) | \
274                              (1UL << SOCK_LINGER) | \
275                              (1UL << SOCK_BROADCAST) | \
276                              (1UL << SOCK_TIMESTAMP) | \
277                              (1UL << SOCK_DBG) | \
278                              (1UL << SOCK_RCVTSTAMP) | \
279                              (1UL << SOCK_RCVTSTAMPNS) | \
280                              (1UL << SOCK_LOCALROUTE) | \
281                              (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
282                              (1UL << SOCK_RXQ_OVFL) | \
283                              (1UL << SOCK_WIFI_STATUS) | \
284                              (1UL << SOCK_NOFCS) | \
285                              (1UL << SOCK_FILTER_LOCKED))
286 /* copy only relevant settings and flags of SOL_SOCKET level from smc to
287  * clc socket (since smc is not called for these options from net/core)
288  */
289 static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
290 {
291         smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
292 }
293
294 #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \
295                              (1UL << SOCK_KEEPOPEN) | \
296                              (1UL << SOCK_LINGER) | \
297                              (1UL << SOCK_DBG))
298 /* copy only settings and flags relevant for smc from clc to smc socket */
299 static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
300 {
301         smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
302 }
303
304 /* register a new rmb, optionally send confirm_rkey msg to register with peer */
305 static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc,
306                        bool conf_rkey)
307 {
308         /* register memory region for new rmb */
309         if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) {
310                 rmb_desc->regerr = 1;
311                 return -EFAULT;
312         }
313         if (!conf_rkey)
314                 return 0;
315         /* exchange confirm_rkey msg with peer */
316         if (smc_llc_do_confirm_rkey(link, rmb_desc)) {
317                 rmb_desc->regerr = 1;
318                 return -EFAULT;
319         }
320         return 0;
321 }
322
323 static int smc_clnt_conf_first_link(struct smc_sock *smc)
324 {
325         struct net *net = sock_net(smc->clcsock->sk);
326         struct smc_link_group *lgr = smc->conn.lgr;
327         struct smc_link *link;
328         int rest;
329         int rc;
330
331         link = &lgr->lnk[SMC_SINGLE_LINK];
332         /* receive CONFIRM LINK request from server over RoCE fabric */
333         rest = wait_for_completion_interruptible_timeout(
334                 &link->llc_confirm,
335                 SMC_LLC_WAIT_FIRST_TIME);
336         if (rest <= 0) {
337                 struct smc_clc_msg_decline dclc;
338
339                 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
340                                       SMC_CLC_DECLINE);
341                 return rc;
342         }
343
344         if (link->llc_confirm_rc)
345                 return SMC_CLC_DECL_RMBE_EC;
346
347         rc = smc_ib_modify_qp_rts(link);
348         if (rc)
349                 return SMC_CLC_DECL_ERR_RDYLNK;
350
351         smc_wr_remember_qp_attr(link);
352
353         if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
354                 return SMC_CLC_DECL_ERR_REGRMB;
355
356         /* send CONFIRM LINK response over RoCE fabric */
357         rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP);
358         if (rc < 0)
359                 return SMC_CLC_DECL_TIMEOUT_CL;
360
361         /* receive ADD LINK request from server over RoCE fabric */
362         rest = wait_for_completion_interruptible_timeout(&link->llc_add,
363                                                          SMC_LLC_WAIT_TIME);
364         if (rest <= 0) {
365                 struct smc_clc_msg_decline dclc;
366
367                 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
368                                       SMC_CLC_DECLINE);
369                 return rc;
370         }
371
372         /* send add link reject message, only one link supported for now */
373         rc = smc_llc_send_add_link(link,
374                                    link->smcibdev->mac[link->ibport - 1],
375                                    link->gid, SMC_LLC_RESP);
376         if (rc < 0)
377                 return SMC_CLC_DECL_TIMEOUT_AL;
378
379         smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
380
381         return 0;
382 }
383
384 static void smcr_conn_save_peer_info(struct smc_sock *smc,
385                                      struct smc_clc_msg_accept_confirm *clc)
386 {
387         int bufsize = smc_uncompress_bufsize(clc->rmbe_size);
388
389         smc->conn.peer_rmbe_idx = clc->rmbe_idx;
390         smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token);
391         smc->conn.peer_rmbe_size = bufsize;
392         atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
393         smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
394 }
395
396 static void smcd_conn_save_peer_info(struct smc_sock *smc,
397                                      struct smc_clc_msg_accept_confirm *clc)
398 {
399         int bufsize = smc_uncompress_bufsize(clc->dmbe_size);
400
401         smc->conn.peer_rmbe_idx = clc->dmbe_idx;
402         smc->conn.peer_token = clc->token;
403         /* msg header takes up space in the buffer */
404         smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
405         atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
406         smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
407 }
408
409 static void smc_conn_save_peer_info(struct smc_sock *smc,
410                                     struct smc_clc_msg_accept_confirm *clc)
411 {
412         if (smc->conn.lgr->is_smcd)
413                 smcd_conn_save_peer_info(smc, clc);
414         else
415                 smcr_conn_save_peer_info(smc, clc);
416 }
417
418 static void smc_link_save_peer_info(struct smc_link *link,
419                                     struct smc_clc_msg_accept_confirm *clc)
420 {
421         link->peer_qpn = ntoh24(clc->qpn);
422         memcpy(link->peer_gid, clc->lcl.gid, SMC_GID_SIZE);
423         memcpy(link->peer_mac, clc->lcl.mac, sizeof(link->peer_mac));
424         link->peer_psn = ntoh24(clc->psn);
425         link->peer_mtu = clc->qp_mtu;
426 }
427
428 /* fall back during connect */
429 static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
430 {
431         smc->use_fallback = true;
432         smc->fallback_rsn = reason_code;
433         smc_copy_sock_settings_to_clc(smc);
434         if (smc->sk.sk_state == SMC_INIT)
435                 smc->sk.sk_state = SMC_ACTIVE;
436         return 0;
437 }
438
439 /* decline and fall back during connect */
440 static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
441 {
442         int rc;
443
444         if (reason_code < 0) { /* error, fallback is not possible */
445                 if (smc->sk.sk_state == SMC_INIT)
446                         sock_put(&smc->sk); /* passive closing */
447                 return reason_code;
448         }
449         if (reason_code != SMC_CLC_DECL_PEERDECL) {
450                 rc = smc_clc_send_decline(smc, reason_code);
451                 if (rc < 0) {
452                         if (smc->sk.sk_state == SMC_INIT)
453                                 sock_put(&smc->sk); /* passive closing */
454                         return rc;
455                 }
456         }
457         return smc_connect_fallback(smc, reason_code);
458 }
459
460 /* abort connecting */
461 static int smc_connect_abort(struct smc_sock *smc, int reason_code,
462                              int local_contact)
463 {
464         if (local_contact == SMC_FIRST_CONTACT)
465                 smc_lgr_forget(smc->conn.lgr);
466         mutex_unlock(&smc_create_lgr_pending);
467         smc_conn_free(&smc->conn);
468         return reason_code;
469 }
470
471 /* check if there is a rdma device available for this connection. */
472 /* called for connect and listen */
473 static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev,
474                           u8 *ibport, unsigned short vlan_id, u8 gid[])
475 {
476         int reason_code = 0;
477
478         /* PNET table look up: search active ib_device and port
479          * within same PNETID that also contains the ethernet device
480          * used for the internal TCP socket
481          */
482         smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport, vlan_id,
483                                     gid);
484         if (!(*ibdev))
485                 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
486
487         return reason_code;
488 }
489
490 /* check if there is an ISM device available for this connection. */
491 /* called for connect and listen */
492 static int smc_check_ism(struct smc_sock *smc, struct smcd_dev **ismdev)
493 {
494         /* Find ISM device with same PNETID as connecting interface  */
495         smc_pnet_find_ism_resource(smc->clcsock->sk, ismdev);
496         if (!(*ismdev))
497                 return SMC_CLC_DECL_CNFERR; /* configuration error */
498         return 0;
499 }
500
501 /* Check for VLAN ID and register it on ISM device just for CLC handshake */
502 static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
503                                       struct smcd_dev *ismdev,
504                                       unsigned short vlan_id)
505 {
506         if (vlan_id && smc_ism_get_vlan(ismdev, vlan_id))
507                 return SMC_CLC_DECL_CNFERR;
508         return 0;
509 }
510
511 /* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is
512  * used, the VLAN ID will be registered again during the connection setup.
513  */
514 static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd,
515                                         struct smcd_dev *ismdev,
516                                         unsigned short vlan_id)
517 {
518         if (!is_smcd)
519                 return 0;
520         if (vlan_id && smc_ism_put_vlan(ismdev, vlan_id))
521                 return SMC_CLC_DECL_CNFERR;
522         return 0;
523 }
524
525 /* CLC handshake during connect */
526 static int smc_connect_clc(struct smc_sock *smc, int smc_type,
527                            struct smc_clc_msg_accept_confirm *aclc,
528                            struct smc_ib_device *ibdev, u8 ibport,
529                            u8 gid[], struct smcd_dev *ismdev)
530 {
531         int rc = 0;
532
533         /* do inband token exchange */
534         rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, gid, ismdev);
535         if (rc)
536                 return rc;
537         /* receive SMC Accept CLC message */
538         return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT);
539 }
540
541 /* setup for RDMA connection of client */
542 static int smc_connect_rdma(struct smc_sock *smc,
543                             struct smc_clc_msg_accept_confirm *aclc,
544                             struct smc_ib_device *ibdev, u8 ibport)
545 {
546         int local_contact = SMC_FIRST_CONTACT;
547         struct smc_link *link;
548         int reason_code = 0;
549
550         mutex_lock(&smc_create_lgr_pending);
551         local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev,
552                                         ibport, &aclc->lcl, NULL, 0);
553         if (local_contact < 0) {
554                 if (local_contact == -ENOMEM)
555                         reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
556                 else if (local_contact == -ENOLINK)
557                         reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */
558                 else
559                         reason_code = SMC_CLC_DECL_INTERR; /* other error */
560                 return smc_connect_abort(smc, reason_code, 0);
561         }
562         link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK];
563
564         smc_conn_save_peer_info(smc, aclc);
565
566         /* create send buffer and rmb */
567         if (smc_buf_create(smc, false))
568                 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
569
570         if (local_contact == SMC_FIRST_CONTACT)
571                 smc_link_save_peer_info(link, aclc);
572
573         if (smc_rmb_rtoken_handling(&smc->conn, aclc))
574                 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK,
575                                          local_contact);
576
577         smc_close_init(smc);
578         smc_rx_init(smc);
579
580         if (local_contact == SMC_FIRST_CONTACT) {
581                 if (smc_ib_ready_link(link))
582                         return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
583                                                  local_contact);
584         } else {
585                 if (!smc->conn.rmb_desc->reused &&
586                     smc_reg_rmb(link, smc->conn.rmb_desc, true))
587                         return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
588                                                  local_contact);
589         }
590         smc_rmb_sync_sg_for_device(&smc->conn);
591
592         reason_code = smc_clc_send_confirm(smc);
593         if (reason_code)
594                 return smc_connect_abort(smc, reason_code, local_contact);
595
596         smc_tx_init(smc);
597
598         if (local_contact == SMC_FIRST_CONTACT) {
599                 /* QP confirmation over RoCE fabric */
600                 reason_code = smc_clnt_conf_first_link(smc);
601                 if (reason_code)
602                         return smc_connect_abort(smc, reason_code,
603                                                  local_contact);
604         }
605         mutex_unlock(&smc_create_lgr_pending);
606
607         smc_copy_sock_settings_to_clc(smc);
608         if (smc->sk.sk_state == SMC_INIT)
609                 smc->sk.sk_state = SMC_ACTIVE;
610
611         return 0;
612 }
613
614 /* setup for ISM connection of client */
615 static int smc_connect_ism(struct smc_sock *smc,
616                            struct smc_clc_msg_accept_confirm *aclc,
617                            struct smcd_dev *ismdev)
618 {
619         int local_contact = SMC_FIRST_CONTACT;
620         int rc = 0;
621
622         mutex_lock(&smc_create_lgr_pending);
623         local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0,
624                                         NULL, ismdev, aclc->gid);
625         if (local_contact < 0)
626                 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0);
627
628         /* Create send and receive buffers */
629         if (smc_buf_create(smc, true))
630                 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
631
632         smc_conn_save_peer_info(smc, aclc);
633         smc_close_init(smc);
634         smc_rx_init(smc);
635         smc_tx_init(smc);
636
637         rc = smc_clc_send_confirm(smc);
638         if (rc)
639                 return smc_connect_abort(smc, rc, local_contact);
640         mutex_unlock(&smc_create_lgr_pending);
641
642         smc_copy_sock_settings_to_clc(smc);
643         if (smc->sk.sk_state == SMC_INIT)
644                 smc->sk.sk_state = SMC_ACTIVE;
645
646         return 0;
647 }
648
649 /* perform steps before actually connecting */
650 static int __smc_connect(struct smc_sock *smc)
651 {
652         bool ism_supported = false, rdma_supported = false;
653         struct smc_clc_msg_accept_confirm aclc;
654         struct smc_ib_device *ibdev;
655         struct smcd_dev *ismdev;
656         u8 gid[SMC_GID_SIZE];
657         unsigned short vlan;
658         int smc_type;
659         int rc = 0;
660         u8 ibport;
661
662         sock_hold(&smc->sk); /* sock put in passive closing */
663
664         if (smc->use_fallback)
665                 return smc_connect_fallback(smc, smc->fallback_rsn);
666
667         /* if peer has not signalled SMC-capability, fall back */
668         if (!tcp_sk(smc->clcsock->sk)->syn_smc)
669                 return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC);
670
671         /* IPSec connections opt out of SMC-R optimizations */
672         if (using_ipsec(smc))
673                 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC);
674
675         /* check for VLAN ID */
676         if (smc_vlan_by_tcpsk(smc->clcsock, &vlan))
677                 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR);
678
679         /* check if there is an ism device available */
680         if (!smc_check_ism(smc, &ismdev) &&
681             !smc_connect_ism_vlan_setup(smc, ismdev, vlan)) {
682                 /* ISM is supported for this connection */
683                 ism_supported = true;
684                 smc_type = SMC_TYPE_D;
685         }
686
687         /* check if there is a rdma device available */
688         if (!smc_check_rdma(smc, &ibdev, &ibport, vlan, gid)) {
689                 /* RDMA is supported for this connection */
690                 rdma_supported = true;
691                 if (ism_supported)
692                         smc_type = SMC_TYPE_B; /* both */
693                 else
694                         smc_type = SMC_TYPE_R; /* only RDMA */
695         }
696
697         /* if neither ISM nor RDMA are supported, fallback */
698         if (!rdma_supported && !ism_supported)
699                 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV);
700
701         /* perform CLC handshake */
702         rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, gid, ismdev);
703         if (rc) {
704                 smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
705                 return smc_connect_decline_fallback(smc, rc);
706         }
707
708         /* depending on previous steps, connect using rdma or ism */
709         if (rdma_supported && aclc.hdr.path == SMC_TYPE_R)
710                 rc = smc_connect_rdma(smc, &aclc, ibdev, ibport);
711         else if (ism_supported && aclc.hdr.path == SMC_TYPE_D)
712                 rc = smc_connect_ism(smc, &aclc, ismdev);
713         else
714                 rc = SMC_CLC_DECL_MODEUNSUPP;
715         if (rc) {
716                 smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
717                 return smc_connect_decline_fallback(smc, rc);
718         }
719
720         smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
721         return 0;
722 }
723
724 static void smc_connect_work(struct work_struct *work)
725 {
726         struct smc_sock *smc = container_of(work, struct smc_sock,
727                                             connect_work);
728         int rc;
729
730         lock_sock(&smc->sk);
731         rc = kernel_connect(smc->clcsock, &smc->connect_info->addr,
732                             smc->connect_info->alen, smc->connect_info->flags);
733         if (smc->clcsock->sk->sk_err) {
734                 smc->sk.sk_err = smc->clcsock->sk->sk_err;
735                 goto out;
736         }
737         if (rc < 0) {
738                 smc->sk.sk_err = -rc;
739                 goto out;
740         }
741
742         rc = __smc_connect(smc);
743         if (rc < 0)
744                 smc->sk.sk_err = -rc;
745
746 out:
747         if (smc->sk.sk_err)
748                 smc->sk.sk_state_change(&smc->sk);
749         else
750                 smc->sk.sk_write_space(&smc->sk);
751         kfree(smc->connect_info);
752         smc->connect_info = NULL;
753         release_sock(&smc->sk);
754 }
755
756 static int smc_connect(struct socket *sock, struct sockaddr *addr,
757                        int alen, int flags)
758 {
759         struct sock *sk = sock->sk;
760         struct smc_sock *smc;
761         int rc = -EINVAL;
762
763         smc = smc_sk(sk);
764
765         /* separate smc parameter checking to be safe */
766         if (alen < sizeof(addr->sa_family))
767                 goto out_err;
768         if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
769                 goto out_err;
770
771         lock_sock(sk);
772         switch (sk->sk_state) {
773         default:
774                 goto out;
775         case SMC_ACTIVE:
776                 rc = -EISCONN;
777                 goto out;
778         case SMC_INIT:
779                 rc = 0;
780                 break;
781         }
782
783         smc_copy_sock_settings_to_clc(smc);
784         tcp_sk(smc->clcsock->sk)->syn_smc = 1;
785         if (flags & O_NONBLOCK) {
786                 if (smc->connect_info) {
787                         rc = -EALREADY;
788                         goto out;
789                 }
790                 smc->connect_info = kzalloc(alen + 2 * sizeof(int), GFP_KERNEL);
791                 if (!smc->connect_info) {
792                         rc = -ENOMEM;
793                         goto out;
794                 }
795                 smc->connect_info->alen = alen;
796                 smc->connect_info->flags = flags ^ O_NONBLOCK;
797                 memcpy(&smc->connect_info->addr, addr, alen);
798                 schedule_work(&smc->connect_work);
799                 rc = -EINPROGRESS;
800         } else {
801                 rc = kernel_connect(smc->clcsock, addr, alen, flags);
802                 if (rc)
803                         goto out;
804
805                 rc = __smc_connect(smc);
806                 if (rc < 0)
807                         goto out;
808                 else
809                         rc = 0; /* success cases including fallback */
810         }
811
812 out:
813         release_sock(sk);
814 out_err:
815         return rc;
816 }
817
818 static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
819 {
820         struct socket *new_clcsock = NULL;
821         struct sock *lsk = &lsmc->sk;
822         struct sock *new_sk;
823         int rc;
824
825         release_sock(lsk);
826         new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
827         if (!new_sk) {
828                 rc = -ENOMEM;
829                 lsk->sk_err = ENOMEM;
830                 *new_smc = NULL;
831                 lock_sock(lsk);
832                 goto out;
833         }
834         *new_smc = smc_sk(new_sk);
835
836         rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
837         lock_sock(lsk);
838         if  (rc < 0)
839                 lsk->sk_err = -rc;
840         if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
841                 if (new_clcsock)
842                         sock_release(new_clcsock);
843                 new_sk->sk_state = SMC_CLOSED;
844                 sock_set_flag(new_sk, SOCK_DEAD);
845                 new_sk->sk_prot->unhash(new_sk);
846                 sock_put(new_sk); /* final */
847                 *new_smc = NULL;
848                 goto out;
849         }
850
851         (*new_smc)->clcsock = new_clcsock;
852 out:
853         return rc;
854 }
855
856 /* add a just created sock to the accept queue of the listen sock as
857  * candidate for a following socket accept call from user space
858  */
859 static void smc_accept_enqueue(struct sock *parent, struct sock *sk)
860 {
861         struct smc_sock *par = smc_sk(parent);
862
863         sock_hold(sk); /* sock_put in smc_accept_unlink () */
864         spin_lock(&par->accept_q_lock);
865         list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q);
866         spin_unlock(&par->accept_q_lock);
867         sk_acceptq_added(parent);
868 }
869
870 /* remove a socket from the accept queue of its parental listening socket */
871 static void smc_accept_unlink(struct sock *sk)
872 {
873         struct smc_sock *par = smc_sk(sk)->listen_smc;
874
875         spin_lock(&par->accept_q_lock);
876         list_del_init(&smc_sk(sk)->accept_q);
877         spin_unlock(&par->accept_q_lock);
878         sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk);
879         sock_put(sk); /* sock_hold in smc_accept_enqueue */
880 }
881
882 /* remove a sock from the accept queue to bind it to a new socket created
883  * for a socket accept call from user space
884  */
885 struct sock *smc_accept_dequeue(struct sock *parent,
886                                 struct socket *new_sock)
887 {
888         struct smc_sock *isk, *n;
889         struct sock *new_sk;
890
891         list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) {
892                 new_sk = (struct sock *)isk;
893
894                 smc_accept_unlink(new_sk);
895                 if (new_sk->sk_state == SMC_CLOSED) {
896                         if (isk->clcsock) {
897                                 sock_release(isk->clcsock);
898                                 isk->clcsock = NULL;
899                         }
900                         new_sk->sk_prot->unhash(new_sk);
901                         sock_put(new_sk); /* final */
902                         continue;
903                 }
904                 if (new_sock)
905                         sock_graft(new_sk, new_sock);
906                 return new_sk;
907         }
908         return NULL;
909 }
910
911 /* clean up for a created but never accepted sock */
912 void smc_close_non_accepted(struct sock *sk)
913 {
914         struct smc_sock *smc = smc_sk(sk);
915
916         lock_sock(sk);
917         if (!sk->sk_lingertime)
918                 /* wait for peer closing */
919                 sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
920         if (!smc->use_fallback) {
921                 smc_close_active(smc);
922                 sock_set_flag(sk, SOCK_DEAD);
923                 sk->sk_shutdown |= SHUTDOWN_MASK;
924         }
925         if (smc->clcsock) {
926                 struct socket *tcp;
927
928                 tcp = smc->clcsock;
929                 smc->clcsock = NULL;
930                 sock_release(tcp);
931         }
932         if (smc->use_fallback) {
933                 sock_put(sk); /* passive closing */
934                 sk->sk_state = SMC_CLOSED;
935         } else {
936                 if (sk->sk_state == SMC_CLOSED)
937                         smc_conn_free(&smc->conn);
938         }
939         release_sock(sk);
940         sk->sk_prot->unhash(sk);
941         sock_put(sk); /* final sock_put */
942 }
943
944 static int smc_serv_conf_first_link(struct smc_sock *smc)
945 {
946         struct net *net = sock_net(smc->clcsock->sk);
947         struct smc_link_group *lgr = smc->conn.lgr;
948         struct smc_link *link;
949         int rest;
950         int rc;
951
952         link = &lgr->lnk[SMC_SINGLE_LINK];
953
954         if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
955                 return SMC_CLC_DECL_ERR_REGRMB;
956
957         /* send CONFIRM LINK request to client over the RoCE fabric */
958         rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ);
959         if (rc < 0)
960                 return SMC_CLC_DECL_TIMEOUT_CL;
961
962         /* receive CONFIRM LINK response from client over the RoCE fabric */
963         rest = wait_for_completion_interruptible_timeout(
964                 &link->llc_confirm_resp,
965                 SMC_LLC_WAIT_FIRST_TIME);
966         if (rest <= 0) {
967                 struct smc_clc_msg_decline dclc;
968
969                 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
970                                       SMC_CLC_DECLINE);
971                 return rc;
972         }
973
974         if (link->llc_confirm_resp_rc)
975                 return SMC_CLC_DECL_RMBE_EC;
976
977         /* send ADD LINK request to client over the RoCE fabric */
978         rc = smc_llc_send_add_link(link,
979                                    link->smcibdev->mac[link->ibport - 1],
980                                    link->gid, SMC_LLC_REQ);
981         if (rc < 0)
982                 return SMC_CLC_DECL_TIMEOUT_AL;
983
984         /* receive ADD LINK response from client over the RoCE fabric */
985         rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
986                                                          SMC_LLC_WAIT_TIME);
987         if (rest <= 0) {
988                 struct smc_clc_msg_decline dclc;
989
990                 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
991                                       SMC_CLC_DECLINE);
992                 return rc;
993         }
994
995         smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
996
997         return 0;
998 }
999
1000 /* listen worker: finish */
1001 static void smc_listen_out(struct smc_sock *new_smc)
1002 {
1003         struct smc_sock *lsmc = new_smc->listen_smc;
1004         struct sock *newsmcsk = &new_smc->sk;
1005
1006         lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
1007         if (lsmc->sk.sk_state == SMC_LISTEN) {
1008                 smc_accept_enqueue(&lsmc->sk, newsmcsk);
1009         } else { /* no longer listening */
1010                 smc_close_non_accepted(newsmcsk);
1011         }
1012         release_sock(&lsmc->sk);
1013
1014         /* Wake up accept */
1015         lsmc->sk.sk_data_ready(&lsmc->sk);
1016         sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
1017 }
1018
1019 /* listen worker: finish in state connected */
1020 static void smc_listen_out_connected(struct smc_sock *new_smc)
1021 {
1022         struct sock *newsmcsk = &new_smc->sk;
1023
1024         sk_refcnt_debug_inc(newsmcsk);
1025         if (newsmcsk->sk_state == SMC_INIT)
1026                 newsmcsk->sk_state = SMC_ACTIVE;
1027
1028         smc_listen_out(new_smc);
1029 }
1030
1031 /* listen worker: finish in error state */
1032 static void smc_listen_out_err(struct smc_sock *new_smc)
1033 {
1034         struct sock *newsmcsk = &new_smc->sk;
1035
1036         if (newsmcsk->sk_state == SMC_INIT)
1037                 sock_put(&new_smc->sk); /* passive closing */
1038         newsmcsk->sk_state = SMC_CLOSED;
1039         smc_conn_free(&new_smc->conn);
1040
1041         smc_listen_out(new_smc);
1042 }
1043
1044 /* listen worker: decline and fall back if possible */
1045 static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
1046                                int local_contact)
1047 {
1048         /* RDMA setup failed, switch back to TCP */
1049         if (local_contact == SMC_FIRST_CONTACT)
1050                 smc_lgr_forget(new_smc->conn.lgr);
1051         if (reason_code < 0) { /* error, no fallback possible */
1052                 smc_listen_out_err(new_smc);
1053                 return;
1054         }
1055         smc_conn_free(&new_smc->conn);
1056         new_smc->use_fallback = true;
1057         new_smc->fallback_rsn = reason_code;
1058         if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
1059                 if (smc_clc_send_decline(new_smc, reason_code) < 0) {
1060                         smc_listen_out_err(new_smc);
1061                         return;
1062                 }
1063         }
1064         smc_listen_out_connected(new_smc);
1065 }
1066
1067 /* listen worker: check prefixes */
1068 static int smc_listen_rdma_check(struct smc_sock *new_smc,
1069                                  struct smc_clc_msg_proposal *pclc)
1070 {
1071         struct smc_clc_msg_proposal_prefix *pclc_prfx;
1072         struct socket *newclcsock = new_smc->clcsock;
1073
1074         pclc_prfx = smc_clc_proposal_get_prefix(pclc);
1075         if (smc_clc_prfx_match(newclcsock, pclc_prfx))
1076                 return SMC_CLC_DECL_CNFERR;
1077
1078         return 0;
1079 }
1080
1081 /* listen worker: initialize connection and buffers */
1082 static int smc_listen_rdma_init(struct smc_sock *new_smc,
1083                                 struct smc_clc_msg_proposal *pclc,
1084                                 struct smc_ib_device *ibdev, u8 ibport,
1085                                 int *local_contact)
1086 {
1087         /* allocate connection / link group */
1088         *local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport,
1089                                          &pclc->lcl, NULL, 0);
1090         if (*local_contact < 0) {
1091                 if (*local_contact == -ENOMEM)
1092                         return SMC_CLC_DECL_MEM;/* insufficient memory*/
1093                 return SMC_CLC_DECL_INTERR; /* other error */
1094         }
1095
1096         /* create send buffer and rmb */
1097         if (smc_buf_create(new_smc, false))
1098                 return SMC_CLC_DECL_MEM;
1099
1100         return 0;
1101 }
1102
1103 /* listen worker: initialize connection and buffers for SMC-D */
1104 static int smc_listen_ism_init(struct smc_sock *new_smc,
1105                                struct smc_clc_msg_proposal *pclc,
1106                                struct smcd_dev *ismdev,
1107                                int *local_contact)
1108 {
1109         struct smc_clc_msg_smcd *pclc_smcd;
1110
1111         pclc_smcd = smc_get_clc_msg_smcd(pclc);
1112         *local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, NULL,
1113                                          ismdev, pclc_smcd->gid);
1114         if (*local_contact < 0) {
1115                 if (*local_contact == -ENOMEM)
1116                         return SMC_CLC_DECL_MEM;/* insufficient memory*/
1117                 return SMC_CLC_DECL_INTERR; /* other error */
1118         }
1119
1120         /* Check if peer can be reached via ISM device */
1121         if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid,
1122                             new_smc->conn.lgr->vlan_id,
1123                             new_smc->conn.lgr->smcd)) {
1124                 if (*local_contact == SMC_FIRST_CONTACT)
1125                         smc_lgr_forget(new_smc->conn.lgr);
1126                 smc_conn_free(&new_smc->conn);
1127                 return SMC_CLC_DECL_CNFERR;
1128         }
1129
1130         /* Create send and receive buffers */
1131         if (smc_buf_create(new_smc, true)) {
1132                 if (*local_contact == SMC_FIRST_CONTACT)
1133                         smc_lgr_forget(new_smc->conn.lgr);
1134                 smc_conn_free(&new_smc->conn);
1135                 return SMC_CLC_DECL_MEM;
1136         }
1137
1138         return 0;
1139 }
1140
1141 /* listen worker: register buffers */
1142 static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
1143 {
1144         struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
1145
1146         if (local_contact != SMC_FIRST_CONTACT) {
1147                 if (!new_smc->conn.rmb_desc->reused) {
1148                         if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
1149                                 return SMC_CLC_DECL_ERR_REGRMB;
1150                 }
1151         }
1152         smc_rmb_sync_sg_for_device(&new_smc->conn);
1153
1154         return 0;
1155 }
1156
1157 /* listen worker: finish RDMA setup */
1158 static int smc_listen_rdma_finish(struct smc_sock *new_smc,
1159                                   struct smc_clc_msg_accept_confirm *cclc,
1160                                   int local_contact)
1161 {
1162         struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
1163         int reason_code = 0;
1164
1165         if (local_contact == SMC_FIRST_CONTACT)
1166                 smc_link_save_peer_info(link, cclc);
1167
1168         if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) {
1169                 reason_code = SMC_CLC_DECL_ERR_RTOK;
1170                 goto decline;
1171         }
1172
1173         if (local_contact == SMC_FIRST_CONTACT) {
1174                 if (smc_ib_ready_link(link)) {
1175                         reason_code = SMC_CLC_DECL_ERR_RDYLNK;
1176                         goto decline;
1177                 }
1178                 /* QP confirmation over RoCE fabric */
1179                 reason_code = smc_serv_conf_first_link(new_smc);
1180                 if (reason_code)
1181                         goto decline;
1182         }
1183         return 0;
1184
1185 decline:
1186         mutex_unlock(&smc_create_lgr_pending);
1187         smc_listen_decline(new_smc, reason_code, local_contact);
1188         return reason_code;
1189 }
1190
1191 /* setup for RDMA connection of server */
1192 static void smc_listen_work(struct work_struct *work)
1193 {
1194         struct smc_sock *new_smc = container_of(work, struct smc_sock,
1195                                                 smc_listen_work);
1196         struct socket *newclcsock = new_smc->clcsock;
1197         struct smc_clc_msg_accept_confirm cclc;
1198         struct smc_clc_msg_proposal *pclc;
1199         struct smc_ib_device *ibdev;
1200         bool ism_supported = false;
1201         struct smcd_dev *ismdev;
1202         u8 buf[SMC_CLC_MAX_LEN];
1203         int local_contact = 0;
1204         unsigned short vlan;
1205         int reason_code = 0;
1206         int rc = 0;
1207         u8 ibport;
1208
1209         if (new_smc->use_fallback) {
1210                 smc_listen_out_connected(new_smc);
1211                 return;
1212         }
1213
1214         /* check if peer is smc capable */
1215         if (!tcp_sk(newclcsock->sk)->syn_smc) {
1216                 new_smc->use_fallback = true;
1217                 new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC;
1218                 smc_listen_out_connected(new_smc);
1219                 return;
1220         }
1221
1222         /* do inband token exchange -
1223          * wait for and receive SMC Proposal CLC message
1224          */
1225         pclc = (struct smc_clc_msg_proposal *)&buf;
1226         reason_code = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN,
1227                                        SMC_CLC_PROPOSAL);
1228         if (reason_code) {
1229                 smc_listen_decline(new_smc, reason_code, 0);
1230                 return;
1231         }
1232
1233         /* IPSec connections opt out of SMC-R optimizations */
1234         if (using_ipsec(new_smc)) {
1235                 smc_listen_decline(new_smc, SMC_CLC_DECL_IPSEC, 0);
1236                 return;
1237         }
1238
1239         mutex_lock(&smc_create_lgr_pending);
1240         smc_close_init(new_smc);
1241         smc_rx_init(new_smc);
1242         smc_tx_init(new_smc);
1243
1244         /* check if ISM is available */
1245         if ((pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) &&
1246             !smc_check_ism(new_smc, &ismdev) &&
1247             !smc_listen_ism_init(new_smc, pclc, ismdev, &local_contact)) {
1248                 ism_supported = true;
1249         }
1250
1251         /* check if RDMA is available */
1252         if (!ism_supported &&
1253             ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) ||
1254              smc_vlan_by_tcpsk(new_smc->clcsock, &vlan) ||
1255              smc_check_rdma(new_smc, &ibdev, &ibport, vlan, NULL) ||
1256              smc_listen_rdma_check(new_smc, pclc) ||
1257              smc_listen_rdma_init(new_smc, pclc, ibdev, ibport,
1258                                   &local_contact) ||
1259              smc_listen_rdma_reg(new_smc, local_contact))) {
1260                 /* SMC not supported, decline */
1261                 mutex_unlock(&smc_create_lgr_pending);
1262                 smc_listen_decline(new_smc, SMC_CLC_DECL_MODEUNSUPP,
1263                                    local_contact);
1264                 return;
1265         }
1266
1267         /* send SMC Accept CLC message */
1268         rc = smc_clc_send_accept(new_smc, local_contact);
1269         if (rc) {
1270                 mutex_unlock(&smc_create_lgr_pending);
1271                 smc_listen_decline(new_smc, rc, local_contact);
1272                 return;
1273         }
1274
1275         /* receive SMC Confirm CLC message */
1276         reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
1277                                        SMC_CLC_CONFIRM);
1278         if (reason_code) {
1279                 mutex_unlock(&smc_create_lgr_pending);
1280                 smc_listen_decline(new_smc, reason_code, local_contact);
1281                 return;
1282         }
1283
1284         /* finish worker */
1285         if (!ism_supported) {
1286                 if (smc_listen_rdma_finish(new_smc, &cclc, local_contact))
1287                         return;
1288         }
1289         smc_conn_save_peer_info(new_smc, &cclc);
1290         mutex_unlock(&smc_create_lgr_pending);
1291         smc_listen_out_connected(new_smc);
1292 }
1293
1294 static void smc_tcp_listen_work(struct work_struct *work)
1295 {
1296         struct smc_sock *lsmc = container_of(work, struct smc_sock,
1297                                              tcp_listen_work);
1298         struct sock *lsk = &lsmc->sk;
1299         struct smc_sock *new_smc;
1300         int rc = 0;
1301
1302         lock_sock(lsk);
1303         while (lsk->sk_state == SMC_LISTEN) {
1304                 rc = smc_clcsock_accept(lsmc, &new_smc);
1305                 if (rc)
1306                         goto out;
1307                 if (!new_smc)
1308                         continue;
1309
1310                 new_smc->listen_smc = lsmc;
1311                 new_smc->use_fallback = lsmc->use_fallback;
1312                 new_smc->fallback_rsn = lsmc->fallback_rsn;
1313                 sock_hold(lsk); /* sock_put in smc_listen_work */
1314                 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
1315                 smc_copy_sock_settings_to_smc(new_smc);
1316                 new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf;
1317                 new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf;
1318                 sock_hold(&new_smc->sk); /* sock_put in passive closing */
1319                 if (!schedule_work(&new_smc->smc_listen_work))
1320                         sock_put(&new_smc->sk);
1321         }
1322
1323 out:
1324         release_sock(lsk);
1325         sock_put(&lsmc->sk); /* sock_hold in smc_listen */
1326 }
1327
1328 static int smc_listen(struct socket *sock, int backlog)
1329 {
1330         struct sock *sk = sock->sk;
1331         struct smc_sock *smc;
1332         int rc;
1333
1334         smc = smc_sk(sk);
1335         lock_sock(sk);
1336
1337         rc = -EINVAL;
1338         if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN))
1339                 goto out;
1340
1341         rc = 0;
1342         if (sk->sk_state == SMC_LISTEN) {
1343                 sk->sk_max_ack_backlog = backlog;
1344                 goto out;
1345         }
1346         /* some socket options are handled in core, so we could not apply
1347          * them to the clc socket -- copy smc socket options to clc socket
1348          */
1349         smc_copy_sock_settings_to_clc(smc);
1350         if (!smc->use_fallback)
1351                 tcp_sk(smc->clcsock->sk)->syn_smc = 1;
1352
1353         rc = kernel_listen(smc->clcsock, backlog);
1354         if (rc)
1355                 goto out;
1356         sk->sk_max_ack_backlog = backlog;
1357         sk->sk_ack_backlog = 0;
1358         sk->sk_state = SMC_LISTEN;
1359         INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
1360         sock_hold(sk); /* sock_hold in tcp_listen_worker */
1361         if (!schedule_work(&smc->tcp_listen_work))
1362                 sock_put(sk);
1363
1364 out:
1365         release_sock(sk);
1366         return rc;
1367 }
1368
1369 static int smc_accept(struct socket *sock, struct socket *new_sock,
1370                       int flags, bool kern)
1371 {
1372         struct sock *sk = sock->sk, *nsk;
1373         DECLARE_WAITQUEUE(wait, current);
1374         struct smc_sock *lsmc;
1375         long timeo;
1376         int rc = 0;
1377
1378         lsmc = smc_sk(sk);
1379         sock_hold(sk); /* sock_put below */
1380         lock_sock(sk);
1381
1382         if (lsmc->sk.sk_state != SMC_LISTEN) {
1383                 rc = -EINVAL;
1384                 release_sock(sk);
1385                 goto out;
1386         }
1387
1388         /* Wait for an incoming connection */
1389         timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
1390         add_wait_queue_exclusive(sk_sleep(sk), &wait);
1391         while (!(nsk = smc_accept_dequeue(sk, new_sock))) {
1392                 set_current_state(TASK_INTERRUPTIBLE);
1393                 if (!timeo) {
1394                         rc = -EAGAIN;
1395                         break;
1396                 }
1397                 release_sock(sk);
1398                 timeo = schedule_timeout(timeo);
1399                 /* wakeup by sk_data_ready in smc_listen_work() */
1400                 sched_annotate_sleep();
1401                 lock_sock(sk);
1402                 if (signal_pending(current)) {
1403                         rc = sock_intr_errno(timeo);
1404                         break;
1405                 }
1406         }
1407         set_current_state(TASK_RUNNING);
1408         remove_wait_queue(sk_sleep(sk), &wait);
1409
1410         if (!rc)
1411                 rc = sock_error(nsk);
1412         release_sock(sk);
1413         if (rc)
1414                 goto out;
1415
1416         if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) {
1417                 /* wait till data arrives on the socket */
1418                 timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept *
1419                                                                 MSEC_PER_SEC);
1420                 if (smc_sk(nsk)->use_fallback) {
1421                         struct sock *clcsk = smc_sk(nsk)->clcsock->sk;
1422
1423                         lock_sock(clcsk);
1424                         if (skb_queue_empty(&clcsk->sk_receive_queue))
1425                                 sk_wait_data(clcsk, &timeo, NULL);
1426                         release_sock(clcsk);
1427                 } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) {
1428                         lock_sock(nsk);
1429                         smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available);
1430                         release_sock(nsk);
1431                 }
1432         }
1433
1434 out:
1435         sock_put(sk); /* sock_hold above */
1436         return rc;
1437 }
1438
1439 static int smc_getname(struct socket *sock, struct sockaddr *addr,
1440                        int peer)
1441 {
1442         struct smc_sock *smc;
1443
1444         if (peer && (sock->sk->sk_state != SMC_ACTIVE) &&
1445             (sock->sk->sk_state != SMC_APPCLOSEWAIT1))
1446                 return -ENOTCONN;
1447
1448         smc = smc_sk(sock->sk);
1449
1450         return smc->clcsock->ops->getname(smc->clcsock, addr, peer);
1451 }
1452
1453 static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
1454 {
1455         struct sock *sk = sock->sk;
1456         struct smc_sock *smc;
1457         int rc = -EPIPE;
1458
1459         smc = smc_sk(sk);
1460         lock_sock(sk);
1461         if ((sk->sk_state != SMC_ACTIVE) &&
1462             (sk->sk_state != SMC_APPCLOSEWAIT1) &&
1463             (sk->sk_state != SMC_INIT))
1464                 goto out;
1465
1466         if (msg->msg_flags & MSG_FASTOPEN) {
1467                 if (sk->sk_state == SMC_INIT) {
1468                         smc->use_fallback = true;
1469                         smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
1470                 } else {
1471                         rc = -EINVAL;
1472                         goto out;
1473                 }
1474         }
1475
1476         if (smc->use_fallback)
1477                 rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
1478         else
1479                 rc = smc_tx_sendmsg(smc, msg, len);
1480 out:
1481         release_sock(sk);
1482         return rc;
1483 }
1484
1485 static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
1486                        int flags)
1487 {
1488         struct sock *sk = sock->sk;
1489         struct smc_sock *smc;
1490         int rc = -ENOTCONN;
1491
1492         smc = smc_sk(sk);
1493         lock_sock(sk);
1494         if ((sk->sk_state == SMC_INIT) ||
1495             (sk->sk_state == SMC_LISTEN) ||
1496             (sk->sk_state == SMC_CLOSED))
1497                 goto out;
1498
1499         if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
1500                 rc = 0;
1501                 goto out;
1502         }
1503
1504         if (smc->use_fallback) {
1505                 rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags);
1506         } else {
1507                 msg->msg_namelen = 0;
1508                 rc = smc_rx_recvmsg(smc, msg, NULL, len, flags);
1509         }
1510
1511 out:
1512         release_sock(sk);
1513         return rc;
1514 }
1515
1516 static __poll_t smc_accept_poll(struct sock *parent)
1517 {
1518         struct smc_sock *isk = smc_sk(parent);
1519         __poll_t mask = 0;
1520
1521         spin_lock(&isk->accept_q_lock);
1522         if (!list_empty(&isk->accept_q))
1523                 mask = EPOLLIN | EPOLLRDNORM;
1524         spin_unlock(&isk->accept_q_lock);
1525
1526         return mask;
1527 }
1528
1529 static __poll_t smc_poll(struct file *file, struct socket *sock,
1530                              poll_table *wait)
1531 {
1532         struct sock *sk = sock->sk;
1533         __poll_t mask = 0;
1534         struct smc_sock *smc;
1535
1536         if (!sk)
1537                 return EPOLLNVAL;
1538
1539         smc = smc_sk(sock->sk);
1540         if (smc->use_fallback) {
1541                 /* delegate to CLC child sock */
1542                 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
1543                 sk->sk_err = smc->clcsock->sk->sk_err;
1544                 if (sk->sk_err)
1545                         mask |= EPOLLERR;
1546         } else {
1547                 if (sk->sk_state != SMC_CLOSED)
1548                         sock_poll_wait(file, sock, wait);
1549                 if (sk->sk_err)
1550                         mask |= EPOLLERR;
1551                 if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
1552                     (sk->sk_state == SMC_CLOSED))
1553                         mask |= EPOLLHUP;
1554                 if (sk->sk_state == SMC_LISTEN) {
1555                         /* woken up by sk_data_ready in smc_listen_work() */
1556                         mask = smc_accept_poll(sk);
1557                 } else {
1558                         if (atomic_read(&smc->conn.sndbuf_space) ||
1559                             sk->sk_shutdown & SEND_SHUTDOWN) {
1560                                 mask |= EPOLLOUT | EPOLLWRNORM;
1561                         } else {
1562                                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1563                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1564                         }
1565                         if (atomic_read(&smc->conn.bytes_to_rcv))
1566                                 mask |= EPOLLIN | EPOLLRDNORM;
1567                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1568                                 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
1569                         if (sk->sk_state == SMC_APPCLOSEWAIT1)
1570                                 mask |= EPOLLIN;
1571                         if (smc->conn.urg_state == SMC_URG_VALID)
1572                                 mask |= EPOLLPRI;
1573                 }
1574         }
1575
1576         return mask;
1577 }
1578
1579 static int smc_shutdown(struct socket *sock, int how)
1580 {
1581         struct sock *sk = sock->sk;
1582         struct smc_sock *smc;
1583         int rc = -EINVAL;
1584         int rc1 = 0;
1585
1586         smc = smc_sk(sk);
1587
1588         if ((how < SHUT_RD) || (how > SHUT_RDWR))
1589                 return rc;
1590
1591         lock_sock(sk);
1592
1593         rc = -ENOTCONN;
1594         if ((sk->sk_state != SMC_ACTIVE) &&
1595             (sk->sk_state != SMC_PEERCLOSEWAIT1) &&
1596             (sk->sk_state != SMC_PEERCLOSEWAIT2) &&
1597             (sk->sk_state != SMC_APPCLOSEWAIT1) &&
1598             (sk->sk_state != SMC_APPCLOSEWAIT2) &&
1599             (sk->sk_state != SMC_APPFINCLOSEWAIT))
1600                 goto out;
1601         if (smc->use_fallback) {
1602                 rc = kernel_sock_shutdown(smc->clcsock, how);
1603                 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
1604                 if (sk->sk_shutdown == SHUTDOWN_MASK)
1605                         sk->sk_state = SMC_CLOSED;
1606                 goto out;
1607         }
1608         switch (how) {
1609         case SHUT_RDWR:         /* shutdown in both directions */
1610                 rc = smc_close_active(smc);
1611                 break;
1612         case SHUT_WR:
1613                 rc = smc_close_shutdown_write(smc);
1614                 break;
1615         case SHUT_RD:
1616                 rc = 0;
1617                 /* nothing more to do because peer is not involved */
1618                 break;
1619         }
1620         if (smc->clcsock)
1621                 rc1 = kernel_sock_shutdown(smc->clcsock, how);
1622         /* map sock_shutdown_cmd constants to sk_shutdown value range */
1623         sk->sk_shutdown |= how + 1;
1624
1625 out:
1626         release_sock(sk);
1627         return rc ? rc : rc1;
1628 }
1629
1630 static int smc_setsockopt(struct socket *sock, int level, int optname,
1631                           char __user *optval, unsigned int optlen)
1632 {
1633         struct sock *sk = sock->sk;
1634         struct smc_sock *smc;
1635         int val, rc;
1636
1637         smc = smc_sk(sk);
1638
1639         /* generic setsockopts reaching us here always apply to the
1640          * CLC socket
1641          */
1642         rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
1643                                            optval, optlen);
1644         if (smc->clcsock->sk->sk_err) {
1645                 sk->sk_err = smc->clcsock->sk->sk_err;
1646                 sk->sk_error_report(sk);
1647         }
1648         if (rc)
1649                 return rc;
1650
1651         if (optlen < sizeof(int))
1652                 return -EINVAL;
1653         if (get_user(val, (int __user *)optval))
1654                 return -EFAULT;
1655
1656         lock_sock(sk);
1657         switch (optname) {
1658         case TCP_ULP:
1659         case TCP_FASTOPEN:
1660         case TCP_FASTOPEN_CONNECT:
1661         case TCP_FASTOPEN_KEY:
1662         case TCP_FASTOPEN_NO_COOKIE:
1663                 /* option not supported by SMC */
1664                 if (sk->sk_state == SMC_INIT) {
1665                         smc->use_fallback = true;
1666                         smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
1667                 } else {
1668                         if (!smc->use_fallback)
1669                                 rc = -EINVAL;
1670                 }
1671                 break;
1672         case TCP_NODELAY:
1673                 if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
1674                         if (val && !smc->use_fallback)
1675                                 mod_delayed_work(system_wq, &smc->conn.tx_work,
1676                                                  0);
1677                 }
1678                 break;
1679         case TCP_CORK:
1680                 if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
1681                         if (!val && !smc->use_fallback)
1682                                 mod_delayed_work(system_wq, &smc->conn.tx_work,
1683                                                  0);
1684                 }
1685                 break;
1686         case TCP_DEFER_ACCEPT:
1687                 smc->sockopt_defer_accept = val;
1688                 break;
1689         default:
1690                 break;
1691         }
1692         release_sock(sk);
1693
1694         return rc;
1695 }
1696
1697 static int smc_getsockopt(struct socket *sock, int level, int optname,
1698                           char __user *optval, int __user *optlen)
1699 {
1700         struct smc_sock *smc;
1701
1702         smc = smc_sk(sock->sk);
1703         /* socket options apply to the CLC socket */
1704         return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
1705                                              optval, optlen);
1706 }
1707
1708 static int smc_ioctl(struct socket *sock, unsigned int cmd,
1709                      unsigned long arg)
1710 {
1711         union smc_host_cursor cons, urg;
1712         struct smc_connection *conn;
1713         struct smc_sock *smc;
1714         int answ;
1715
1716         smc = smc_sk(sock->sk);
1717         conn = &smc->conn;
1718         lock_sock(&smc->sk);
1719         if (smc->use_fallback) {
1720                 if (!smc->clcsock) {
1721                         release_sock(&smc->sk);
1722                         return -EBADF;
1723                 }
1724                 answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
1725                 release_sock(&smc->sk);
1726                 return answ;
1727         }
1728         switch (cmd) {
1729         case SIOCINQ: /* same as FIONREAD */
1730                 if (smc->sk.sk_state == SMC_LISTEN) {
1731                         release_sock(&smc->sk);
1732                         return -EINVAL;
1733                 }
1734                 if (smc->sk.sk_state == SMC_INIT ||
1735                     smc->sk.sk_state == SMC_CLOSED)
1736                         answ = 0;
1737                 else
1738                         answ = atomic_read(&smc->conn.bytes_to_rcv);
1739                 break;
1740         case SIOCOUTQ:
1741                 /* output queue size (not send + not acked) */
1742                 if (smc->sk.sk_state == SMC_LISTEN) {
1743                         release_sock(&smc->sk);
1744                         return -EINVAL;
1745                 }
1746                 if (smc->sk.sk_state == SMC_INIT ||
1747                     smc->sk.sk_state == SMC_CLOSED)
1748                         answ = 0;
1749                 else
1750                         answ = smc->conn.sndbuf_desc->len -
1751                                         atomic_read(&smc->conn.sndbuf_space);
1752                 break;
1753         case SIOCOUTQNSD:
1754                 /* output queue size (not send only) */
1755                 if (smc->sk.sk_state == SMC_LISTEN) {
1756                         release_sock(&smc->sk);
1757                         return -EINVAL;
1758                 }
1759                 if (smc->sk.sk_state == SMC_INIT ||
1760                     smc->sk.sk_state == SMC_CLOSED)
1761                         answ = 0;
1762                 else
1763                         answ = smc_tx_prepared_sends(&smc->conn);
1764                 break;
1765         case SIOCATMARK:
1766                 if (smc->sk.sk_state == SMC_LISTEN) {
1767                         release_sock(&smc->sk);
1768                         return -EINVAL;
1769                 }
1770                 if (smc->sk.sk_state == SMC_INIT ||
1771                     smc->sk.sk_state == SMC_CLOSED) {
1772                         answ = 0;
1773                 } else {
1774                         smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
1775                         smc_curs_copy(&urg, &conn->urg_curs, conn);
1776                         answ = smc_curs_diff(conn->rmb_desc->len,
1777                                              &cons, &urg) == 1;
1778                 }
1779                 break;
1780         default:
1781                 release_sock(&smc->sk);
1782                 return -ENOIOCTLCMD;
1783         }
1784         release_sock(&smc->sk);
1785
1786         return put_user(answ, (int __user *)arg);
1787 }
1788
1789 static ssize_t smc_sendpage(struct socket *sock, struct page *page,
1790                             int offset, size_t size, int flags)
1791 {
1792         struct sock *sk = sock->sk;
1793         struct smc_sock *smc;
1794         int rc = -EPIPE;
1795
1796         smc = smc_sk(sk);
1797         lock_sock(sk);
1798         if (sk->sk_state != SMC_ACTIVE) {
1799                 release_sock(sk);
1800                 goto out;
1801         }
1802         release_sock(sk);
1803         if (smc->use_fallback)
1804                 rc = kernel_sendpage(smc->clcsock, page, offset,
1805                                      size, flags);
1806         else
1807                 rc = sock_no_sendpage(sock, page, offset, size, flags);
1808
1809 out:
1810         return rc;
1811 }
1812
1813 /* Map the affected portions of the rmbe into an spd, note the number of bytes
1814  * to splice in conn->splice_pending, and press 'go'. Delays consumer cursor
1815  * updates till whenever a respective page has been fully processed.
1816  * Note that subsequent recv() calls have to wait till all splice() processing
1817  * completed.
1818  */
1819 static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
1820                                struct pipe_inode_info *pipe, size_t len,
1821                                unsigned int flags)
1822 {
1823         struct sock *sk = sock->sk;
1824         struct smc_sock *smc;
1825         int rc = -ENOTCONN;
1826
1827         smc = smc_sk(sk);
1828         lock_sock(sk);
1829
1830         if (sk->sk_state == SMC_INIT ||
1831             sk->sk_state == SMC_LISTEN ||
1832             sk->sk_state == SMC_CLOSED)
1833                 goto out;
1834
1835         if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
1836                 rc = 0;
1837                 goto out;
1838         }
1839
1840         if (smc->use_fallback) {
1841                 rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos,
1842                                                     pipe, len, flags);
1843         } else {
1844                 if (*ppos) {
1845                         rc = -ESPIPE;
1846                         goto out;
1847                 }
1848                 if (flags & SPLICE_F_NONBLOCK)
1849                         flags = MSG_DONTWAIT;
1850                 else
1851                         flags = 0;
1852                 rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags);
1853         }
1854 out:
1855         release_sock(sk);
1856
1857         return rc;
1858 }
1859
1860 /* must look like tcp */
1861 static const struct proto_ops smc_sock_ops = {
1862         .family         = PF_SMC,
1863         .owner          = THIS_MODULE,
1864         .release        = smc_release,
1865         .bind           = smc_bind,
1866         .connect        = smc_connect,
1867         .socketpair     = sock_no_socketpair,
1868         .accept         = smc_accept,
1869         .getname        = smc_getname,
1870         .poll           = smc_poll,
1871         .ioctl          = smc_ioctl,
1872         .listen         = smc_listen,
1873         .shutdown       = smc_shutdown,
1874         .setsockopt     = smc_setsockopt,
1875         .getsockopt     = smc_getsockopt,
1876         .sendmsg        = smc_sendmsg,
1877         .recvmsg        = smc_recvmsg,
1878         .mmap           = sock_no_mmap,
1879         .sendpage       = smc_sendpage,
1880         .splice_read    = smc_splice_read,
1881 };
1882
1883 static int smc_create(struct net *net, struct socket *sock, int protocol,
1884                       int kern)
1885 {
1886         int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
1887         struct smc_sock *smc;
1888         struct sock *sk;
1889         int rc;
1890
1891         rc = -ESOCKTNOSUPPORT;
1892         if (sock->type != SOCK_STREAM)
1893                 goto out;
1894
1895         rc = -EPROTONOSUPPORT;
1896         if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6)
1897                 goto out;
1898
1899         rc = -ENOBUFS;
1900         sock->ops = &smc_sock_ops;
1901         sk = smc_sock_alloc(net, sock, protocol);
1902         if (!sk)
1903                 goto out;
1904
1905         /* create internal TCP socket for CLC handshake and fallback */
1906         smc = smc_sk(sk);
1907         smc->use_fallback = false; /* assume rdma capability first */
1908         smc->fallback_rsn = 0;
1909         rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
1910                               &smc->clcsock);
1911         if (rc) {
1912                 sk_common_release(sk);
1913                 goto out;
1914         }
1915         smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE);
1916         smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE);
1917
1918 out:
1919         return rc;
1920 }
1921
1922 static const struct net_proto_family smc_sock_family_ops = {
1923         .family = PF_SMC,
1924         .owner  = THIS_MODULE,
1925         .create = smc_create,
1926 };
1927
1928 static int __init smc_init(void)
1929 {
1930         int rc;
1931
1932         rc = smc_pnet_init();
1933         if (rc)
1934                 return rc;
1935
1936         rc = smc_llc_init();
1937         if (rc) {
1938                 pr_err("%s: smc_llc_init fails with %d\n", __func__, rc);
1939                 goto out_pnet;
1940         }
1941
1942         rc = smc_cdc_init();
1943         if (rc) {
1944                 pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc);
1945                 goto out_pnet;
1946         }
1947
1948         rc = proto_register(&smc_proto, 1);
1949         if (rc) {
1950                 pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
1951                 goto out_pnet;
1952         }
1953
1954         rc = proto_register(&smc_proto6, 1);
1955         if (rc) {
1956                 pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc);
1957                 goto out_proto;
1958         }
1959
1960         rc = sock_register(&smc_sock_family_ops);
1961         if (rc) {
1962                 pr_err("%s: sock_register fails with %d\n", __func__, rc);
1963                 goto out_proto6;
1964         }
1965         INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
1966         INIT_HLIST_HEAD(&smc_v6_hashinfo.ht);
1967
1968         rc = smc_ib_register_client();
1969         if (rc) {
1970                 pr_err("%s: ib_register fails with %d\n", __func__, rc);
1971                 goto out_sock;
1972         }
1973
1974         static_branch_enable(&tcp_have_smc);
1975         return 0;
1976
1977 out_sock:
1978         sock_unregister(PF_SMC);
1979 out_proto6:
1980         proto_unregister(&smc_proto6);
1981 out_proto:
1982         proto_unregister(&smc_proto);
1983 out_pnet:
1984         smc_pnet_exit();
1985         return rc;
1986 }
1987
1988 static void __exit smc_exit(void)
1989 {
1990         smc_core_exit();
1991         static_branch_disable(&tcp_have_smc);
1992         smc_ib_unregister_client();
1993         sock_unregister(PF_SMC);
1994         proto_unregister(&smc_proto6);
1995         proto_unregister(&smc_proto);
1996         smc_pnet_exit();
1997 }
1998
1999 module_init(smc_init);
2000 module_exit(smc_exit);
2001
2002 MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>");
2003 MODULE_DESCRIPTION("smc socket address family");
2004 MODULE_LICENSE("GPL");
2005 MODULE_ALIAS_NETPROTO(PF_SMC);