Merge branch 'work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[sfrench/cifs-2.6.git] / drivers / staging / lustre / lnet / klnds / socklnd / socklnd.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * GPL HEADER START
4  *
5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 only,
9  * as published by the Free Software Foundation.
10  *
11  * This program is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * General Public License version 2 for more details (a copy is included
15  * in the LICENSE file that accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License
18  * version 2 along with this program; If not, see
19  * http://www.gnu.org/licenses/gpl-2.0.html
20  *
21  * GPL HEADER END
22  */
23 /*
24  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
25  * Use is subject to license terms.
26  *
27  * Copyright (c) 2011, 2015, Intel Corporation.
28  */
29 /*
30  * This file is part of Lustre, http://www.lustre.org/
31  * Lustre is a trademark of Sun Microsystems, Inc.
32  *
33  * lnet/klnds/socklnd/socklnd.c
34  *
35  * Author: Zach Brown <zab@zabbo.net>
36  * Author: Peter J. Braam <braam@clusterfs.com>
37  * Author: Phil Schwan <phil@clusterfs.com>
38  * Author: Eric Barton <eric@bartonsoftware.com>
39  */
40
41 #include "socklnd.h"
42
43 static struct lnet_lnd the_ksocklnd;
44 struct ksock_nal_data ksocknal_data;
45
46 static struct ksock_interface *
47 ksocknal_ip2iface(struct lnet_ni *ni, __u32 ip)
48 {
49         struct ksock_net *net = ni->ni_data;
50         int i;
51         struct ksock_interface *iface;
52
53         for (i = 0; i < net->ksnn_ninterfaces; i++) {
54                 LASSERT(i < LNET_MAX_INTERFACES);
55                 iface = &net->ksnn_interfaces[i];
56
57                 if (iface->ksni_ipaddr == ip)
58                         return iface;
59         }
60
61         return NULL;
62 }
63
64 static struct ksock_route *
65 ksocknal_create_route(__u32 ipaddr, int port)
66 {
67         struct ksock_route *route;
68
69         LIBCFS_ALLOC(route, sizeof(*route));
70         if (!route)
71                 return NULL;
72
73         atomic_set(&route->ksnr_refcount, 1);
74         route->ksnr_peer = NULL;
75         route->ksnr_retry_interval = 0;  /* OK to connect at any time */
76         route->ksnr_ipaddr = ipaddr;
77         route->ksnr_port = port;
78         route->ksnr_scheduled = 0;
79         route->ksnr_connecting = 0;
80         route->ksnr_connected = 0;
81         route->ksnr_deleted = 0;
82         route->ksnr_conn_count = 0;
83         route->ksnr_share_count = 0;
84
85         return route;
86 }
87
88 void
89 ksocknal_destroy_route(struct ksock_route *route)
90 {
91         LASSERT(!atomic_read(&route->ksnr_refcount));
92
93         if (route->ksnr_peer)
94                 ksocknal_peer_decref(route->ksnr_peer);
95
96         LIBCFS_FREE(route, sizeof(*route));
97 }
98
99 static int
100 ksocknal_create_peer(struct ksock_peer **peerp, struct lnet_ni *ni,
101                      struct lnet_process_id id)
102 {
103         int cpt = lnet_cpt_of_nid(id.nid);
104         struct ksock_net *net = ni->ni_data;
105         struct ksock_peer *peer;
106
107         LASSERT(id.nid != LNET_NID_ANY);
108         LASSERT(id.pid != LNET_PID_ANY);
109         LASSERT(!in_interrupt());
110
111         LIBCFS_CPT_ALLOC(peer, lnet_cpt_table(), cpt, sizeof(*peer));
112         if (!peer)
113                 return -ENOMEM;
114
115         peer->ksnp_ni = ni;
116         peer->ksnp_id = id;
117         atomic_set(&peer->ksnp_refcount, 1);   /* 1 ref for caller */
118         peer->ksnp_closing = 0;
119         peer->ksnp_accepting = 0;
120         peer->ksnp_proto = NULL;
121         peer->ksnp_last_alive = 0;
122         peer->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1;
123
124         INIT_LIST_HEAD(&peer->ksnp_conns);
125         INIT_LIST_HEAD(&peer->ksnp_routes);
126         INIT_LIST_HEAD(&peer->ksnp_tx_queue);
127         INIT_LIST_HEAD(&peer->ksnp_zc_req_list);
128         spin_lock_init(&peer->ksnp_lock);
129
130         spin_lock_bh(&net->ksnn_lock);
131
132         if (net->ksnn_shutdown) {
133                 spin_unlock_bh(&net->ksnn_lock);
134
135                 LIBCFS_FREE(peer, sizeof(*peer));
136                 CERROR("Can't create peer: network shutdown\n");
137                 return -ESHUTDOWN;
138         }
139
140         net->ksnn_npeers++;
141
142         spin_unlock_bh(&net->ksnn_lock);
143
144         *peerp = peer;
145         return 0;
146 }
147
148 void
149 ksocknal_destroy_peer(struct ksock_peer *peer)
150 {
151         struct ksock_net *net = peer->ksnp_ni->ni_data;
152
153         CDEBUG(D_NET, "peer %s %p deleted\n",
154                libcfs_id2str(peer->ksnp_id), peer);
155
156         LASSERT(!atomic_read(&peer->ksnp_refcount));
157         LASSERT(!peer->ksnp_accepting);
158         LASSERT(list_empty(&peer->ksnp_conns));
159         LASSERT(list_empty(&peer->ksnp_routes));
160         LASSERT(list_empty(&peer->ksnp_tx_queue));
161         LASSERT(list_empty(&peer->ksnp_zc_req_list));
162
163         LIBCFS_FREE(peer, sizeof(*peer));
164
165         /*
166          * NB a peer's connections and routes keep a reference on their peer
167          * until they are destroyed, so we can be assured that _all_ state to
168          * do with this peer has been cleaned up when its refcount drops to
169          * zero.
170          */
171         spin_lock_bh(&net->ksnn_lock);
172         net->ksnn_npeers--;
173         spin_unlock_bh(&net->ksnn_lock);
174 }
175
176 struct ksock_peer *
177 ksocknal_find_peer_locked(struct lnet_ni *ni, struct lnet_process_id id)
178 {
179         struct list_head *peer_list = ksocknal_nid2peerlist(id.nid);
180         struct ksock_peer *peer;
181
182         list_for_each_entry(peer, peer_list, ksnp_list) {
183                 LASSERT(!peer->ksnp_closing);
184
185                 if (peer->ksnp_ni != ni)
186                         continue;
187
188                 if (peer->ksnp_id.nid != id.nid ||
189                     peer->ksnp_id.pid != id.pid)
190                         continue;
191
192                 CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n",
193                        peer, libcfs_id2str(id),
194                        atomic_read(&peer->ksnp_refcount));
195                 return peer;
196         }
197         return NULL;
198 }
199
200 struct ksock_peer *
201 ksocknal_find_peer(struct lnet_ni *ni, struct lnet_process_id id)
202 {
203         struct ksock_peer *peer;
204
205         read_lock(&ksocknal_data.ksnd_global_lock);
206         peer = ksocknal_find_peer_locked(ni, id);
207         if (peer)                       /* +1 ref for caller? */
208                 ksocknal_peer_addref(peer);
209         read_unlock(&ksocknal_data.ksnd_global_lock);
210
211         return peer;
212 }
213
214 static void
215 ksocknal_unlink_peer_locked(struct ksock_peer *peer)
216 {
217         int i;
218         __u32 ip;
219         struct ksock_interface *iface;
220
221         for (i = 0; i < peer->ksnp_n_passive_ips; i++) {
222                 LASSERT(i < LNET_MAX_INTERFACES);
223                 ip = peer->ksnp_passive_ips[i];
224
225                 iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
226                 /*
227                  * All IPs in peer->ksnp_passive_ips[] come from the
228                  * interface list, therefore the call must succeed.
229                  */
230                 LASSERT(iface);
231
232                 CDEBUG(D_NET, "peer=%p iface=%p ksni_nroutes=%d\n",
233                        peer, iface, iface->ksni_nroutes);
234                 iface->ksni_npeers--;
235         }
236
237         LASSERT(list_empty(&peer->ksnp_conns));
238         LASSERT(list_empty(&peer->ksnp_routes));
239         LASSERT(!peer->ksnp_closing);
240         peer->ksnp_closing = 1;
241         list_del(&peer->ksnp_list);
242         /* lose peerlist's ref */
243         ksocknal_peer_decref(peer);
244 }
245
246 static int
247 ksocknal_get_peer_info(struct lnet_ni *ni, int index,
248                        struct lnet_process_id *id, __u32 *myip, __u32 *peer_ip,
249                        int *port, int *conn_count, int *share_count)
250 {
251         struct ksock_peer *peer;
252         struct list_head *ptmp;
253         struct ksock_route *route;
254         struct list_head *rtmp;
255         int i;
256         int j;
257         int rc = -ENOENT;
258
259         read_lock(&ksocknal_data.ksnd_global_lock);
260
261         for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
262                 list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
263                         peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
264
265                         if (peer->ksnp_ni != ni)
266                                 continue;
267
268                         if (!peer->ksnp_n_passive_ips &&
269                             list_empty(&peer->ksnp_routes)) {
270                                 if (index-- > 0)
271                                         continue;
272
273                                 *id = peer->ksnp_id;
274                                 *myip = 0;
275                                 *peer_ip = 0;
276                                 *port = 0;
277                                 *conn_count = 0;
278                                 *share_count = 0;
279                                 rc = 0;
280                                 goto out;
281                         }
282
283                         for (j = 0; j < peer->ksnp_n_passive_ips; j++) {
284                                 if (index-- > 0)
285                                         continue;
286
287                                 *id = peer->ksnp_id;
288                                 *myip = peer->ksnp_passive_ips[j];
289                                 *peer_ip = 0;
290                                 *port = 0;
291                                 *conn_count = 0;
292                                 *share_count = 0;
293                                 rc = 0;
294                                 goto out;
295                         }
296
297                         list_for_each(rtmp, &peer->ksnp_routes) {
298                                 if (index-- > 0)
299                                         continue;
300
301                                 route = list_entry(rtmp, struct ksock_route,
302                                                    ksnr_list);
303
304                                 *id = peer->ksnp_id;
305                                 *myip = route->ksnr_myipaddr;
306                                 *peer_ip = route->ksnr_ipaddr;
307                                 *port = route->ksnr_port;
308                                 *conn_count = route->ksnr_conn_count;
309                                 *share_count = route->ksnr_share_count;
310                                 rc = 0;
311                                 goto out;
312                         }
313                 }
314         }
315  out:
316         read_unlock(&ksocknal_data.ksnd_global_lock);
317         return rc;
318 }
319
320 static void
321 ksocknal_associate_route_conn_locked(struct ksock_route *route,
322                                      struct ksock_conn *conn)
323 {
324         struct ksock_peer *peer = route->ksnr_peer;
325         int type = conn->ksnc_type;
326         struct ksock_interface *iface;
327
328         conn->ksnc_route = route;
329         ksocknal_route_addref(route);
330
331         if (route->ksnr_myipaddr != conn->ksnc_myipaddr) {
332                 if (!route->ksnr_myipaddr) {
333                         /* route wasn't bound locally yet (the initial route) */
334                         CDEBUG(D_NET, "Binding %s %pI4h to %pI4h\n",
335                                libcfs_id2str(peer->ksnp_id),
336                                &route->ksnr_ipaddr,
337                                &conn->ksnc_myipaddr);
338                 } else {
339                         CDEBUG(D_NET, "Rebinding %s %pI4h from %pI4h to %pI4h\n",
340                                libcfs_id2str(peer->ksnp_id),
341                                &route->ksnr_ipaddr,
342                                &route->ksnr_myipaddr,
343                                &conn->ksnc_myipaddr);
344
345                         iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
346                                                   route->ksnr_myipaddr);
347                         if (iface)
348                                 iface->ksni_nroutes--;
349                 }
350                 route->ksnr_myipaddr = conn->ksnc_myipaddr;
351                 iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
352                                           route->ksnr_myipaddr);
353                 if (iface)
354                         iface->ksni_nroutes++;
355         }
356
357         route->ksnr_connected |= (1 << type);
358         route->ksnr_conn_count++;
359
360         /*
361          * Successful connection => further attempts can
362          * proceed immediately
363          */
364         route->ksnr_retry_interval = 0;
365 }
366
367 static void
368 ksocknal_add_route_locked(struct ksock_peer *peer, struct ksock_route *route)
369 {
370         struct list_head *tmp;
371         struct ksock_conn *conn;
372         struct ksock_route *route2;
373
374         LASSERT(!peer->ksnp_closing);
375         LASSERT(!route->ksnr_peer);
376         LASSERT(!route->ksnr_scheduled);
377         LASSERT(!route->ksnr_connecting);
378         LASSERT(!route->ksnr_connected);
379
380         /* LASSERT(unique) */
381         list_for_each(tmp, &peer->ksnp_routes) {
382                 route2 = list_entry(tmp, struct ksock_route, ksnr_list);
383
384                 if (route2->ksnr_ipaddr == route->ksnr_ipaddr) {
385                         CERROR("Duplicate route %s %pI4h\n",
386                                libcfs_id2str(peer->ksnp_id),
387                                &route->ksnr_ipaddr);
388                         LBUG();
389                 }
390         }
391
392         route->ksnr_peer = peer;
393         ksocknal_peer_addref(peer);
394         /* peer's routelist takes over my ref on 'route' */
395         list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
396
397         list_for_each(tmp, &peer->ksnp_conns) {
398                 conn = list_entry(tmp, struct ksock_conn, ksnc_list);
399
400                 if (conn->ksnc_ipaddr != route->ksnr_ipaddr)
401                         continue;
402
403                 ksocknal_associate_route_conn_locked(route, conn);
404                 /* keep going (typed routes) */
405         }
406 }
407
408 static void
409 ksocknal_del_route_locked(struct ksock_route *route)
410 {
411         struct ksock_peer *peer = route->ksnr_peer;
412         struct ksock_interface *iface;
413         struct ksock_conn *conn;
414         struct list_head *ctmp;
415         struct list_head *cnxt;
416
417         LASSERT(!route->ksnr_deleted);
418
419         /* Close associated conns */
420         list_for_each_safe(ctmp, cnxt, &peer->ksnp_conns) {
421                 conn = list_entry(ctmp, struct ksock_conn, ksnc_list);
422
423                 if (conn->ksnc_route != route)
424                         continue;
425
426                 ksocknal_close_conn_locked(conn, 0);
427         }
428
429         if (route->ksnr_myipaddr) {
430                 iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
431                                           route->ksnr_myipaddr);
432                 if (iface)
433                         iface->ksni_nroutes--;
434         }
435
436         route->ksnr_deleted = 1;
437         list_del(&route->ksnr_list);
438         ksocknal_route_decref(route);        /* drop peer's ref */
439
440         if (list_empty(&peer->ksnp_routes) &&
441             list_empty(&peer->ksnp_conns)) {
442                 /*
443                  * I've just removed the last route to a peer with no active
444                  * connections
445                  */
446                 ksocknal_unlink_peer_locked(peer);
447         }
448 }
449
450 int
451 ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ipaddr,
452                   int port)
453 {
454         struct ksock_peer *peer;
455         struct ksock_peer *peer2;
456         struct ksock_route *route;
457         struct ksock_route *route2;
458         int rc;
459
460         if (id.nid == LNET_NID_ANY ||
461             id.pid == LNET_PID_ANY)
462                 return -EINVAL;
463
464         /* Have a brand new peer ready... */
465         rc = ksocknal_create_peer(&peer, ni, id);
466         if (rc)
467                 return rc;
468
469         route = ksocknal_create_route(ipaddr, port);
470         if (!route) {
471                 ksocknal_peer_decref(peer);
472                 return -ENOMEM;
473         }
474
475         write_lock_bh(&ksocknal_data.ksnd_global_lock);
476
477         /* always called with a ref on ni, so shutdown can't have started */
478         LASSERT(!((struct ksock_net *)ni->ni_data)->ksnn_shutdown);
479
480         peer2 = ksocknal_find_peer_locked(ni, id);
481         if (peer2) {
482                 ksocknal_peer_decref(peer);
483                 peer = peer2;
484         } else {
485                 /* peer table takes my ref on peer */
486                 list_add_tail(&peer->ksnp_list,
487                               ksocknal_nid2peerlist(id.nid));
488         }
489
490         route2 = NULL;
491         list_for_each_entry(route2, &peer->ksnp_routes, ksnr_list) {
492                 if (route2->ksnr_ipaddr == ipaddr)
493                         break;
494
495                 route2 = NULL;
496         }
497         if (!route2) {
498                 ksocknal_add_route_locked(peer, route);
499                 route->ksnr_share_count++;
500         } else {
501                 ksocknal_route_decref(route);
502                 route2->ksnr_share_count++;
503         }
504
505         write_unlock_bh(&ksocknal_data.ksnd_global_lock);
506
507         return 0;
508 }
509
510 static void
511 ksocknal_del_peer_locked(struct ksock_peer *peer, __u32 ip)
512 {
513         struct ksock_conn *conn;
514         struct ksock_route *route;
515         struct list_head *tmp;
516         struct list_head *nxt;
517         int nshared;
518
519         LASSERT(!peer->ksnp_closing);
520
521         /* Extra ref prevents peer disappearing until I'm done with it */
522         ksocknal_peer_addref(peer);
523
524         list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
525                 route = list_entry(tmp, struct ksock_route, ksnr_list);
526
527                 /* no match */
528                 if (!(!ip || route->ksnr_ipaddr == ip))
529                         continue;
530
531                 route->ksnr_share_count = 0;
532                 /* This deletes associated conns too */
533                 ksocknal_del_route_locked(route);
534         }
535
536         nshared = 0;
537         list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
538                 route = list_entry(tmp, struct ksock_route, ksnr_list);
539                 nshared += route->ksnr_share_count;
540         }
541
542         if (!nshared) {
543                 /*
544                  * remove everything else if there are no explicit entries
545                  * left
546                  */
547                 list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
548                         route = list_entry(tmp, struct ksock_route, ksnr_list);
549
550                         /* we should only be removing auto-entries */
551                         LASSERT(!route->ksnr_share_count);
552                         ksocknal_del_route_locked(route);
553                 }
554
555                 list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
556                         conn = list_entry(tmp, struct ksock_conn, ksnc_list);
557
558                         ksocknal_close_conn_locked(conn, 0);
559                 }
560         }
561
562         ksocknal_peer_decref(peer);
563         /* NB peer unlinks itself when last conn/route is removed */
564 }
565
566 static int
567 ksocknal_del_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ip)
568 {
569         LIST_HEAD(zombies);
570         struct list_head *ptmp;
571         struct list_head *pnxt;
572         struct ksock_peer *peer;
573         int lo;
574         int hi;
575         int i;
576         int rc = -ENOENT;
577
578         write_lock_bh(&ksocknal_data.ksnd_global_lock);
579
580         if (id.nid != LNET_NID_ANY) {
581                 lo = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
582                 hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
583         } else {
584                 lo = 0;
585                 hi = ksocknal_data.ksnd_peer_hash_size - 1;
586         }
587
588         for (i = lo; i <= hi; i++) {
589                 list_for_each_safe(ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) {
590                         peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
591
592                         if (peer->ksnp_ni != ni)
593                                 continue;
594
595                         if (!((id.nid == LNET_NID_ANY || peer->ksnp_id.nid == id.nid) &&
596                               (id.pid == LNET_PID_ANY || peer->ksnp_id.pid == id.pid)))
597                                 continue;
598
599                         ksocknal_peer_addref(peer);     /* a ref for me... */
600
601                         ksocknal_del_peer_locked(peer, ip);
602
603                         if (peer->ksnp_closing &&
604                             !list_empty(&peer->ksnp_tx_queue)) {
605                                 LASSERT(list_empty(&peer->ksnp_conns));
606                                 LASSERT(list_empty(&peer->ksnp_routes));
607
608                                 list_splice_init(&peer->ksnp_tx_queue,
609                                                  &zombies);
610                         }
611
612                         ksocknal_peer_decref(peer);     /* ...till here */
613
614                         rc = 0;          /* matched! */
615                 }
616         }
617
618         write_unlock_bh(&ksocknal_data.ksnd_global_lock);
619
620         ksocknal_txlist_done(ni, &zombies, 1);
621
622         return rc;
623 }
624
625 static struct ksock_conn *
626 ksocknal_get_conn_by_idx(struct lnet_ni *ni, int index)
627 {
628         struct ksock_peer *peer;
629         struct list_head *ptmp;
630         struct ksock_conn *conn;
631         struct list_head *ctmp;
632         int i;
633
634         read_lock(&ksocknal_data.ksnd_global_lock);
635
636         for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
637                 list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
638                         peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
639
640                         LASSERT(!peer->ksnp_closing);
641
642                         if (peer->ksnp_ni != ni)
643                                 continue;
644
645                         list_for_each(ctmp, &peer->ksnp_conns) {
646                                 if (index-- > 0)
647                                         continue;
648
649                                 conn = list_entry(ctmp, struct ksock_conn,
650                                                   ksnc_list);
651                                 ksocknal_conn_addref(conn);
652                                 read_unlock(&ksocknal_data.ksnd_global_lock);
653                                 return conn;
654                         }
655                 }
656         }
657
658         read_unlock(&ksocknal_data.ksnd_global_lock);
659         return NULL;
660 }
661
662 static struct ksock_sched *
663 ksocknal_choose_scheduler_locked(unsigned int cpt)
664 {
665         struct ksock_sched_info *info = ksocknal_data.ksnd_sched_info[cpt];
666         struct ksock_sched *sched;
667         int i;
668
669         LASSERT(info->ksi_nthreads > 0);
670
671         sched = &info->ksi_scheds[0];
672         /*
673          * NB: it's safe so far, but info->ksi_nthreads could be changed
674          * at runtime when we have dynamic LNet configuration, then we
675          * need to take care of this.
676          */
677         for (i = 1; i < info->ksi_nthreads; i++) {
678                 if (sched->kss_nconns > info->ksi_scheds[i].kss_nconns)
679                         sched = &info->ksi_scheds[i];
680         }
681
682         return sched;
683 }
684
685 static int
686 ksocknal_local_ipvec(struct lnet_ni *ni, __u32 *ipaddrs)
687 {
688         struct ksock_net *net = ni->ni_data;
689         int i;
690         int nip;
691
692         read_lock(&ksocknal_data.ksnd_global_lock);
693
694         nip = net->ksnn_ninterfaces;
695         LASSERT(nip <= LNET_MAX_INTERFACES);
696
697         /*
698          * Only offer interfaces for additional connections if I have
699          * more than one.
700          */
701         if (nip < 2) {
702                 read_unlock(&ksocknal_data.ksnd_global_lock);
703                 return 0;
704         }
705
706         for (i = 0; i < nip; i++) {
707                 ipaddrs[i] = net->ksnn_interfaces[i].ksni_ipaddr;
708                 LASSERT(ipaddrs[i]);
709         }
710
711         read_unlock(&ksocknal_data.ksnd_global_lock);
712         return nip;
713 }
714
715 static int
716 ksocknal_match_peerip(struct ksock_interface *iface, __u32 *ips, int nips)
717 {
718         int best_netmatch = 0;
719         int best_xor      = 0;
720         int best          = -1;
721         int this_xor;
722         int this_netmatch;
723         int i;
724
725         for (i = 0; i < nips; i++) {
726                 if (!ips[i])
727                         continue;
728
729                 this_xor = ips[i] ^ iface->ksni_ipaddr;
730                 this_netmatch = !(this_xor & iface->ksni_netmask) ? 1 : 0;
731
732                 if (!(best < 0 ||
733                       best_netmatch < this_netmatch ||
734                       (best_netmatch == this_netmatch &&
735                        best_xor > this_xor)))
736                         continue;
737
738                 best = i;
739                 best_netmatch = this_netmatch;
740                 best_xor = this_xor;
741         }
742
743         LASSERT(best >= 0);
744         return best;
745 }
746
747 static int
748 ksocknal_select_ips(struct ksock_peer *peer, __u32 *peerips, int n_peerips)
749 {
750         rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
751         struct ksock_net *net = peer->ksnp_ni->ni_data;
752         struct ksock_interface *iface;
753         struct ksock_interface *best_iface;
754         int n_ips;
755         int i;
756         int j;
757         int k;
758         __u32 ip;
759         __u32 xor;
760         int this_netmatch;
761         int best_netmatch;
762         int best_npeers;
763
764         /*
765          * CAVEAT EMPTOR: We do all our interface matching with an
766          * exclusive hold of global lock at IRQ priority.  We're only
767          * expecting to be dealing with small numbers of interfaces, so the
768          * O(n**3)-ness shouldn't matter
769          */
770         /*
771          * Also note that I'm not going to return more than n_peerips
772          * interfaces, even if I have more myself
773          */
774         write_lock_bh(global_lock);
775
776         LASSERT(n_peerips <= LNET_MAX_INTERFACES);
777         LASSERT(net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
778
779         /*
780          * Only match interfaces for additional connections
781          * if I have > 1 interface
782          */
783         n_ips = (net->ksnn_ninterfaces < 2) ? 0 :
784                 min(n_peerips, net->ksnn_ninterfaces);
785
786         for (i = 0; peer->ksnp_n_passive_ips < n_ips; i++) {
787                 /*            ^ yes really... */
788
789                 /*
790                  * If we have any new interfaces, first tick off all the
791                  * peer IPs that match old interfaces, then choose new
792                  * interfaces to match the remaining peer IPS.
793                  * We don't forget interfaces we've stopped using; we might
794                  * start using them again...
795                  */
796                 if (i < peer->ksnp_n_passive_ips) {
797                         /* Old interface. */
798                         ip = peer->ksnp_passive_ips[i];
799                         best_iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
800
801                         /* peer passive ips are kept up to date */
802                         LASSERT(best_iface);
803                 } else {
804                         /* choose a new interface */
805                         LASSERT(i == peer->ksnp_n_passive_ips);
806
807                         best_iface = NULL;
808                         best_netmatch = 0;
809                         best_npeers = 0;
810
811                         for (j = 0; j < net->ksnn_ninterfaces; j++) {
812                                 iface = &net->ksnn_interfaces[j];
813                                 ip = iface->ksni_ipaddr;
814
815                                 for (k = 0; k < peer->ksnp_n_passive_ips; k++)
816                                         if (peer->ksnp_passive_ips[k] == ip)
817                                                 break;
818
819                                 if (k < peer->ksnp_n_passive_ips) /* using it already */
820                                         continue;
821
822                                 k = ksocknal_match_peerip(iface, peerips,
823                                                           n_peerips);
824                                 xor = ip ^ peerips[k];
825                                 this_netmatch = !(xor & iface->ksni_netmask) ? 1 : 0;
826
827                                 if (!(!best_iface ||
828                                       best_netmatch < this_netmatch ||
829                                       (best_netmatch == this_netmatch &&
830                                        best_npeers > iface->ksni_npeers)))
831                                         continue;
832
833                                 best_iface = iface;
834                                 best_netmatch = this_netmatch;
835                                 best_npeers = iface->ksni_npeers;
836                         }
837
838                         LASSERT(best_iface);
839
840                         best_iface->ksni_npeers++;
841                         ip = best_iface->ksni_ipaddr;
842                         peer->ksnp_passive_ips[i] = ip;
843                         peer->ksnp_n_passive_ips = i + 1;
844                 }
845
846                 /* mark the best matching peer IP used */
847                 j = ksocknal_match_peerip(best_iface, peerips, n_peerips);
848                 peerips[j] = 0;
849         }
850
851         /* Overwrite input peer IP addresses */
852         memcpy(peerips, peer->ksnp_passive_ips, n_ips * sizeof(*peerips));
853
854         write_unlock_bh(global_lock);
855
856         return n_ips;
857 }
858
859 static void
860 ksocknal_create_routes(struct ksock_peer *peer, int port,
861                        __u32 *peer_ipaddrs, int npeer_ipaddrs)
862 {
863         struct ksock_route *newroute = NULL;
864         rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
865         struct lnet_ni *ni = peer->ksnp_ni;
866         struct ksock_net *net = ni->ni_data;
867         struct list_head *rtmp;
868         struct ksock_route *route;
869         struct ksock_interface *iface;
870         struct ksock_interface *best_iface;
871         int best_netmatch;
872         int this_netmatch;
873         int best_nroutes;
874         int i;
875         int j;
876
877         /*
878          * CAVEAT EMPTOR: We do all our interface matching with an
879          * exclusive hold of global lock at IRQ priority.  We're only
880          * expecting to be dealing with small numbers of interfaces, so the
881          * O(n**3)-ness here shouldn't matter
882          */
883         write_lock_bh(global_lock);
884
885         if (net->ksnn_ninterfaces < 2) {
886                 /*
887                  * Only create additional connections
888                  * if I have > 1 interface
889                  */
890                 write_unlock_bh(global_lock);
891                 return;
892         }
893
894         LASSERT(npeer_ipaddrs <= LNET_MAX_INTERFACES);
895
896         for (i = 0; i < npeer_ipaddrs; i++) {
897                 if (newroute) {
898                         newroute->ksnr_ipaddr = peer_ipaddrs[i];
899                 } else {
900                         write_unlock_bh(global_lock);
901
902                         newroute = ksocknal_create_route(peer_ipaddrs[i], port);
903                         if (!newroute)
904                                 return;
905
906                         write_lock_bh(global_lock);
907                 }
908
909                 if (peer->ksnp_closing) {
910                         /* peer got closed under me */
911                         break;
912                 }
913
914                 /* Already got a route? */
915                 route = NULL;
916                 list_for_each(rtmp, &peer->ksnp_routes) {
917                         route = list_entry(rtmp, struct ksock_route, ksnr_list);
918
919                         if (route->ksnr_ipaddr == newroute->ksnr_ipaddr)
920                                 break;
921
922                         route = NULL;
923                 }
924                 if (route)
925                         continue;
926
927                 best_iface = NULL;
928                 best_nroutes = 0;
929                 best_netmatch = 0;
930
931                 LASSERT(net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
932
933                 /* Select interface to connect from */
934                 for (j = 0; j < net->ksnn_ninterfaces; j++) {
935                         iface = &net->ksnn_interfaces[j];
936
937                         /* Using this interface already? */
938                         list_for_each(rtmp, &peer->ksnp_routes) {
939                                 route = list_entry(rtmp, struct ksock_route,
940                                                    ksnr_list);
941
942                                 if (route->ksnr_myipaddr == iface->ksni_ipaddr)
943                                         break;
944
945                                 route = NULL;
946                         }
947                         if (route)
948                                 continue;
949
950                         this_netmatch = (!((iface->ksni_ipaddr ^
951                                            newroute->ksnr_ipaddr) &
952                                            iface->ksni_netmask)) ? 1 : 0;
953
954                         if (!(!best_iface ||
955                               best_netmatch < this_netmatch ||
956                               (best_netmatch == this_netmatch &&
957                                best_nroutes > iface->ksni_nroutes)))
958                                 continue;
959
960                         best_iface = iface;
961                         best_netmatch = this_netmatch;
962                         best_nroutes = iface->ksni_nroutes;
963                 }
964
965                 if (!best_iface)
966                         continue;
967
968                 newroute->ksnr_myipaddr = best_iface->ksni_ipaddr;
969                 best_iface->ksni_nroutes++;
970
971                 ksocknal_add_route_locked(peer, newroute);
972                 newroute = NULL;
973         }
974
975         write_unlock_bh(global_lock);
976         if (newroute)
977                 ksocknal_route_decref(newroute);
978 }
979
980 int
981 ksocknal_accept(struct lnet_ni *ni, struct socket *sock)
982 {
983         struct ksock_connreq *cr;
984         int rc;
985         __u32 peer_ip;
986         int peer_port;
987
988         rc = lnet_sock_getaddr(sock, 1, &peer_ip, &peer_port);
989         LASSERT(!rc);                 /* we succeeded before */
990
991         LIBCFS_ALLOC(cr, sizeof(*cr));
992         if (!cr) {
993                 LCONSOLE_ERROR_MSG(0x12f, "Dropping connection request from %pI4h: memory exhausted\n",
994                                    &peer_ip);
995                 return -ENOMEM;
996         }
997
998         lnet_ni_addref(ni);
999         cr->ksncr_ni   = ni;
1000         cr->ksncr_sock = sock;
1001
1002         spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
1003
1004         list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs);
1005         wake_up(&ksocknal_data.ksnd_connd_waitq);
1006
1007         spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
1008         return 0;
1009 }
1010
1011 static int
1012 ksocknal_connecting(struct ksock_peer *peer, __u32 ipaddr)
1013 {
1014         struct ksock_route *route;
1015
1016         list_for_each_entry(route, &peer->ksnp_routes, ksnr_list) {
1017                 if (route->ksnr_ipaddr == ipaddr)
1018                         return route->ksnr_connecting;
1019         }
1020         return 0;
1021 }
1022
1023 int
1024 ksocknal_create_conn(struct lnet_ni *ni, struct ksock_route *route,
1025                      struct socket *sock, int type)
1026 {
1027         rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
1028         LIST_HEAD(zombies);
1029         struct lnet_process_id peerid;
1030         struct list_head *tmp;
1031         __u64 incarnation;
1032         struct ksock_conn *conn;
1033         struct ksock_conn *conn2;
1034         struct ksock_peer *peer = NULL;
1035         struct ksock_peer *peer2;
1036         struct ksock_sched *sched;
1037         struct ksock_hello_msg *hello;
1038         int cpt;
1039         struct ksock_tx *tx;
1040         struct ksock_tx *txtmp;
1041         int rc;
1042         int active;
1043         char *warn = NULL;
1044
1045         active = !!route;
1046
1047         LASSERT(active == (type != SOCKLND_CONN_NONE));
1048
1049         LIBCFS_ALLOC(conn, sizeof(*conn));
1050         if (!conn) {
1051                 rc = -ENOMEM;
1052                 goto failed_0;
1053         }
1054
1055         conn->ksnc_peer = NULL;
1056         conn->ksnc_route = NULL;
1057         conn->ksnc_sock = sock;
1058         /*
1059          * 2 ref, 1 for conn, another extra ref prevents socket
1060          * being closed before establishment of connection
1061          */
1062         atomic_set(&conn->ksnc_sock_refcount, 2);
1063         conn->ksnc_type = type;
1064         ksocknal_lib_save_callback(sock, conn);
1065         atomic_set(&conn->ksnc_conn_refcount, 1); /* 1 ref for me */
1066
1067         conn->ksnc_rx_ready = 0;
1068         conn->ksnc_rx_scheduled = 0;
1069
1070         INIT_LIST_HEAD(&conn->ksnc_tx_queue);
1071         conn->ksnc_tx_ready = 0;
1072         conn->ksnc_tx_scheduled = 0;
1073         conn->ksnc_tx_carrier = NULL;
1074         atomic_set(&conn->ksnc_tx_nob, 0);
1075
1076         LIBCFS_ALLOC(hello, offsetof(struct ksock_hello_msg,
1077                                      kshm_ips[LNET_MAX_INTERFACES]));
1078         if (!hello) {
1079                 rc = -ENOMEM;
1080                 goto failed_1;
1081         }
1082
1083         /* stash conn's local and remote addrs */
1084         rc = ksocknal_lib_get_conn_addrs(conn);
1085         if (rc)
1086                 goto failed_1;
1087
1088         /*
1089          * Find out/confirm peer's NID and connection type and get the
1090          * vector of interfaces she's willing to let me connect to.
1091          * Passive connections use the listener timeout since the peer sends
1092          * eagerly
1093          */
1094         if (active) {
1095                 peer = route->ksnr_peer;
1096                 LASSERT(ni == peer->ksnp_ni);
1097
1098                 /* Active connection sends HELLO eagerly */
1099                 hello->kshm_nips = ksocknal_local_ipvec(ni, hello->kshm_ips);
1100                 peerid = peer->ksnp_id;
1101
1102                 write_lock_bh(global_lock);
1103                 conn->ksnc_proto = peer->ksnp_proto;
1104                 write_unlock_bh(global_lock);
1105
1106                 if (!conn->ksnc_proto) {
1107                         conn->ksnc_proto = &ksocknal_protocol_v3x;
1108 #if SOCKNAL_VERSION_DEBUG
1109                         if (*ksocknal_tunables.ksnd_protocol == 2)
1110                                 conn->ksnc_proto = &ksocknal_protocol_v2x;
1111                         else if (*ksocknal_tunables.ksnd_protocol == 1)
1112                                 conn->ksnc_proto = &ksocknal_protocol_v1x;
1113 #endif
1114                 }
1115
1116                 rc = ksocknal_send_hello(ni, conn, peerid.nid, hello);
1117                 if (rc)
1118                         goto failed_1;
1119         } else {
1120                 peerid.nid = LNET_NID_ANY;
1121                 peerid.pid = LNET_PID_ANY;
1122
1123                 /* Passive, get protocol from peer */
1124                 conn->ksnc_proto = NULL;
1125         }
1126
1127         rc = ksocknal_recv_hello(ni, conn, hello, &peerid, &incarnation);
1128         if (rc < 0)
1129                 goto failed_1;
1130
1131         LASSERT(!rc || active);
1132         LASSERT(conn->ksnc_proto);
1133         LASSERT(peerid.nid != LNET_NID_ANY);
1134
1135         cpt = lnet_cpt_of_nid(peerid.nid);
1136
1137         if (active) {
1138                 ksocknal_peer_addref(peer);
1139                 write_lock_bh(global_lock);
1140         } else {
1141                 rc = ksocknal_create_peer(&peer, ni, peerid);
1142                 if (rc)
1143                         goto failed_1;
1144
1145                 write_lock_bh(global_lock);
1146
1147                 /* called with a ref on ni, so shutdown can't have started */
1148                 LASSERT(!((struct ksock_net *)ni->ni_data)->ksnn_shutdown);
1149
1150                 peer2 = ksocknal_find_peer_locked(ni, peerid);
1151                 if (!peer2) {
1152                         /*
1153                          * NB this puts an "empty" peer in the peer
1154                          * table (which takes my ref)
1155                          */
1156                         list_add_tail(&peer->ksnp_list,
1157                                       ksocknal_nid2peerlist(peerid.nid));
1158                 } else {
1159                         ksocknal_peer_decref(peer);
1160                         peer = peer2;
1161                 }
1162
1163                 /* +1 ref for me */
1164                 ksocknal_peer_addref(peer);
1165                 peer->ksnp_accepting++;
1166
1167                 /*
1168                  * Am I already connecting to this guy?  Resolve in
1169                  * favour of higher NID...
1170                  */
1171                 if (peerid.nid < ni->ni_nid &&
1172                     ksocknal_connecting(peer, conn->ksnc_ipaddr)) {
1173                         rc = EALREADY;
1174                         warn = "connection race resolution";
1175                         goto failed_2;
1176                 }
1177         }
1178
1179         if (peer->ksnp_closing ||
1180             (active && route->ksnr_deleted)) {
1181                 /* peer/route got closed under me */
1182                 rc = -ESTALE;
1183                 warn = "peer/route removed";
1184                 goto failed_2;
1185         }
1186
1187         if (!peer->ksnp_proto) {
1188                 /*
1189                  * Never connected before.
1190                  * NB recv_hello may have returned EPROTO to signal my peer
1191                  * wants a different protocol than the one I asked for.
1192                  */
1193                 LASSERT(list_empty(&peer->ksnp_conns));
1194
1195                 peer->ksnp_proto = conn->ksnc_proto;
1196                 peer->ksnp_incarnation = incarnation;
1197         }
1198
1199         if (peer->ksnp_proto != conn->ksnc_proto ||
1200             peer->ksnp_incarnation != incarnation) {
1201                 /* Peer rebooted or I've got the wrong protocol version */
1202                 ksocknal_close_peer_conns_locked(peer, 0, 0);
1203
1204                 peer->ksnp_proto = NULL;
1205                 rc = ESTALE;
1206                 warn = peer->ksnp_incarnation != incarnation ?
1207                        "peer rebooted" :
1208                        "wrong proto version";
1209                 goto failed_2;
1210         }
1211
1212         switch (rc) {
1213         default:
1214                 LBUG();
1215         case 0:
1216                 break;
1217         case EALREADY:
1218                 warn = "lost conn race";
1219                 goto failed_2;
1220         case EPROTO:
1221                 warn = "retry with different protocol version";
1222                 goto failed_2;
1223         }
1224
1225         /*
1226          * Refuse to duplicate an existing connection, unless this is a
1227          * loopback connection
1228          */
1229         if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) {
1230                 list_for_each(tmp, &peer->ksnp_conns) {
1231                         conn2 = list_entry(tmp, struct ksock_conn, ksnc_list);
1232
1233                         if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr ||
1234                             conn2->ksnc_myipaddr != conn->ksnc_myipaddr ||
1235                             conn2->ksnc_type != conn->ksnc_type)
1236                                 continue;
1237
1238                         /*
1239                          * Reply on a passive connection attempt so the peer
1240                          * realises we're connected.
1241                          */
1242                         LASSERT(!rc);
1243                         if (!active)
1244                                 rc = EALREADY;
1245
1246                         warn = "duplicate";
1247                         goto failed_2;
1248                 }
1249         }
1250
1251         /*
1252          * If the connection created by this route didn't bind to the IP
1253          * address the route connected to, the connection/route matching
1254          * code below probably isn't going to work.
1255          */
1256         if (active &&
1257             route->ksnr_ipaddr != conn->ksnc_ipaddr) {
1258                 CERROR("Route %s %pI4h connected to %pI4h\n",
1259                        libcfs_id2str(peer->ksnp_id),
1260                        &route->ksnr_ipaddr,
1261                        &conn->ksnc_ipaddr);
1262         }
1263
1264         /*
1265          * Search for a route corresponding to the new connection and
1266          * create an association.  This allows incoming connections created
1267          * by routes in my peer to match my own route entries so I don't
1268          * continually create duplicate routes.
1269          */
1270         list_for_each(tmp, &peer->ksnp_routes) {
1271                 route = list_entry(tmp, struct ksock_route, ksnr_list);
1272
1273                 if (route->ksnr_ipaddr != conn->ksnc_ipaddr)
1274                         continue;
1275
1276                 ksocknal_associate_route_conn_locked(route, conn);
1277                 break;
1278         }
1279
1280         conn->ksnc_peer = peer;          /* conn takes my ref on peer */
1281         peer->ksnp_last_alive = cfs_time_current();
1282         peer->ksnp_send_keepalive = 0;
1283         peer->ksnp_error = 0;
1284
1285         sched = ksocknal_choose_scheduler_locked(cpt);
1286         sched->kss_nconns++;
1287         conn->ksnc_scheduler = sched;
1288
1289         conn->ksnc_tx_last_post = cfs_time_current();
1290         /* Set the deadline for the outgoing HELLO to drain */
1291         conn->ksnc_tx_bufnob = sock->sk->sk_wmem_queued;
1292         conn->ksnc_tx_deadline = cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
1293         mb();   /* order with adding to peer's conn list */
1294
1295         list_add(&conn->ksnc_list, &peer->ksnp_conns);
1296         ksocknal_conn_addref(conn);
1297
1298         ksocknal_new_packet(conn, 0);
1299
1300         conn->ksnc_zc_capable = ksocknal_lib_zc_capable(conn);
1301
1302         /* Take packets blocking for this connection. */
1303         list_for_each_entry_safe(tx, txtmp, &peer->ksnp_tx_queue, tx_list) {
1304                 int match = conn->ksnc_proto->pro_match_tx(conn, tx,
1305                                                            tx->tx_nonblk);
1306
1307                 if (match == SOCKNAL_MATCH_NO)
1308                         continue;
1309
1310                 list_del(&tx->tx_list);
1311                 ksocknal_queue_tx_locked(tx, conn);
1312         }
1313
1314         write_unlock_bh(global_lock);
1315
1316         /*
1317          * We've now got a new connection.  Any errors from here on are just
1318          * like "normal" comms errors and we close the connection normally.
1319          * NB (a) we still have to send the reply HELLO for passive
1320          *      connections,
1321          *    (b) normal I/O on the conn is blocked until I setup and call the
1322          *      socket callbacks.
1323          */
1324         CDEBUG(D_NET, "New conn %s p %d.x %pI4h -> %pI4h/%d incarnation:%lld sched[%d:%d]\n",
1325                libcfs_id2str(peerid), conn->ksnc_proto->pro_version,
1326                &conn->ksnc_myipaddr, &conn->ksnc_ipaddr,
1327                conn->ksnc_port, incarnation, cpt,
1328                (int)(sched - &sched->kss_info->ksi_scheds[0]));
1329
1330         if (active) {
1331                 /* additional routes after interface exchange? */
1332                 ksocknal_create_routes(peer, conn->ksnc_port,
1333                                        hello->kshm_ips, hello->kshm_nips);
1334         } else {
1335                 hello->kshm_nips = ksocknal_select_ips(peer, hello->kshm_ips,
1336                                                        hello->kshm_nips);
1337                 rc = ksocknal_send_hello(ni, conn, peerid.nid, hello);
1338         }
1339
1340         LIBCFS_FREE(hello, offsetof(struct ksock_hello_msg,
1341                                     kshm_ips[LNET_MAX_INTERFACES]));
1342
1343         /*
1344          * setup the socket AFTER I've received hello (it disables
1345          * SO_LINGER).  I might call back to the acceptor who may want
1346          * to send a protocol version response and then close the
1347          * socket; this ensures the socket only tears down after the
1348          * response has been sent.
1349          */
1350         if (!rc)
1351                 rc = ksocknal_lib_setup_sock(sock);
1352
1353         write_lock_bh(global_lock);
1354
1355         /* NB my callbacks block while I hold ksnd_global_lock */
1356         ksocknal_lib_set_callback(sock, conn);
1357
1358         if (!active)
1359                 peer->ksnp_accepting--;
1360
1361         write_unlock_bh(global_lock);
1362
1363         if (rc) {
1364                 write_lock_bh(global_lock);
1365                 if (!conn->ksnc_closing) {
1366                         /* could be closed by another thread */
1367                         ksocknal_close_conn_locked(conn, rc);
1368                 }
1369                 write_unlock_bh(global_lock);
1370         } else if (!ksocknal_connsock_addref(conn)) {
1371                 /* Allow I/O to proceed. */
1372                 ksocknal_read_callback(conn);
1373                 ksocknal_write_callback(conn);
1374                 ksocknal_connsock_decref(conn);
1375         }
1376
1377         ksocknal_connsock_decref(conn);
1378         ksocknal_conn_decref(conn);
1379         return rc;
1380
1381  failed_2:
1382         if (!peer->ksnp_closing &&
1383             list_empty(&peer->ksnp_conns) &&
1384             list_empty(&peer->ksnp_routes)) {
1385                 list_add(&zombies, &peer->ksnp_tx_queue);
1386                 list_del_init(&peer->ksnp_tx_queue);
1387                 ksocknal_unlink_peer_locked(peer);
1388         }
1389
1390         write_unlock_bh(global_lock);
1391
1392         if (warn) {
1393                 if (rc < 0)
1394                         CERROR("Not creating conn %s type %d: %s\n",
1395                                libcfs_id2str(peerid), conn->ksnc_type, warn);
1396                 else
1397                         CDEBUG(D_NET, "Not creating conn %s type %d: %s\n",
1398                                libcfs_id2str(peerid), conn->ksnc_type, warn);
1399         }
1400
1401         if (!active) {
1402                 if (rc > 0) {
1403                         /*
1404                          * Request retry by replying with CONN_NONE
1405                          * ksnc_proto has been set already
1406                          */
1407                         conn->ksnc_type = SOCKLND_CONN_NONE;
1408                         hello->kshm_nips = 0;
1409                         ksocknal_send_hello(ni, conn, peerid.nid, hello);
1410                 }
1411
1412                 write_lock_bh(global_lock);
1413                 peer->ksnp_accepting--;
1414                 write_unlock_bh(global_lock);
1415         }
1416
1417         ksocknal_txlist_done(ni, &zombies, 1);
1418         ksocknal_peer_decref(peer);
1419
1420 failed_1:
1421         if (hello)
1422                 LIBCFS_FREE(hello, offsetof(struct ksock_hello_msg,
1423                                             kshm_ips[LNET_MAX_INTERFACES]));
1424
1425         LIBCFS_FREE(conn, sizeof(*conn));
1426
1427 failed_0:
1428         sock_release(sock);
1429         return rc;
1430 }
1431
1432 void
1433 ksocknal_close_conn_locked(struct ksock_conn *conn, int error)
1434 {
1435         /*
1436          * This just does the immmediate housekeeping, and queues the
1437          * connection for the reaper to terminate.
1438          * Caller holds ksnd_global_lock exclusively in irq context
1439          */
1440         struct ksock_peer *peer = conn->ksnc_peer;
1441         struct ksock_route *route;
1442         struct ksock_conn *conn2;
1443         struct list_head *tmp;
1444
1445         LASSERT(!peer->ksnp_error);
1446         LASSERT(!conn->ksnc_closing);
1447         conn->ksnc_closing = 1;
1448
1449         /* ksnd_deathrow_conns takes over peer's ref */
1450         list_del(&conn->ksnc_list);
1451
1452         route = conn->ksnc_route;
1453         if (route) {
1454                 /* dissociate conn from route... */
1455                 LASSERT(!route->ksnr_deleted);
1456                 LASSERT(route->ksnr_connected & (1 << conn->ksnc_type));
1457
1458                 conn2 = NULL;
1459                 list_for_each(tmp, &peer->ksnp_conns) {
1460                         conn2 = list_entry(tmp, struct ksock_conn, ksnc_list);
1461
1462                         if (conn2->ksnc_route == route &&
1463                             conn2->ksnc_type == conn->ksnc_type)
1464                                 break;
1465
1466                         conn2 = NULL;
1467                 }
1468                 if (!conn2)
1469                         route->ksnr_connected &= ~(1 << conn->ksnc_type);
1470
1471                 conn->ksnc_route = NULL;
1472
1473                 ksocknal_route_decref(route);     /* drop conn's ref on route */
1474         }
1475
1476         if (list_empty(&peer->ksnp_conns)) {
1477                 /* No more connections to this peer */
1478
1479                 if (!list_empty(&peer->ksnp_tx_queue)) {
1480                         struct ksock_tx *tx;
1481
1482                         LASSERT(conn->ksnc_proto == &ksocknal_protocol_v3x);
1483
1484                         /*
1485                          * throw them to the last connection...,
1486                          * these TXs will be send to /dev/null by scheduler
1487                          */
1488                         list_for_each_entry(tx, &peer->ksnp_tx_queue,
1489                                             tx_list)
1490                                 ksocknal_tx_prep(conn, tx);
1491
1492                         spin_lock_bh(&conn->ksnc_scheduler->kss_lock);
1493                         list_splice_init(&peer->ksnp_tx_queue,
1494                                          &conn->ksnc_tx_queue);
1495                         spin_unlock_bh(&conn->ksnc_scheduler->kss_lock);
1496                 }
1497
1498                 peer->ksnp_proto = NULL;  /* renegotiate protocol version */
1499                 peer->ksnp_error = error; /* stash last conn close reason */
1500
1501                 if (list_empty(&peer->ksnp_routes)) {
1502                         /*
1503                          * I've just closed last conn belonging to a
1504                          * peer with no routes to it
1505                          */
1506                         ksocknal_unlink_peer_locked(peer);
1507                 }
1508         }
1509
1510         spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
1511
1512         list_add_tail(&conn->ksnc_list,
1513                       &ksocknal_data.ksnd_deathrow_conns);
1514         wake_up(&ksocknal_data.ksnd_reaper_waitq);
1515
1516         spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
1517 }
1518
1519 void
1520 ksocknal_peer_failed(struct ksock_peer *peer)
1521 {
1522         int notify = 0;
1523         unsigned long last_alive = 0;
1524
1525         /*
1526          * There has been a connection failure or comms error; but I'll only
1527          * tell LNET I think the peer is dead if it's to another kernel and
1528          * there are no connections or connection attempts in existence.
1529          */
1530         read_lock(&ksocknal_data.ksnd_global_lock);
1531
1532         if (!(peer->ksnp_id.pid & LNET_PID_USERFLAG) &&
1533             list_empty(&peer->ksnp_conns) &&
1534             !peer->ksnp_accepting &&
1535             !ksocknal_find_connecting_route_locked(peer)) {
1536                 notify = 1;
1537                 last_alive = peer->ksnp_last_alive;
1538         }
1539
1540         read_unlock(&ksocknal_data.ksnd_global_lock);
1541
1542         if (notify)
1543                 lnet_notify(peer->ksnp_ni, peer->ksnp_id.nid, 0,
1544                             last_alive);
1545 }
1546
1547 void
1548 ksocknal_finalize_zcreq(struct ksock_conn *conn)
1549 {
1550         struct ksock_peer *peer = conn->ksnc_peer;
1551         struct ksock_tx *tx;
1552         struct ksock_tx *temp;
1553         struct ksock_tx *tmp;
1554         LIST_HEAD(zlist);
1555
1556         /*
1557          * NB safe to finalize TXs because closing of socket will
1558          * abort all buffered data
1559          */
1560         LASSERT(!conn->ksnc_sock);
1561
1562         spin_lock(&peer->ksnp_lock);
1563
1564         list_for_each_entry_safe(tx, tmp, &peer->ksnp_zc_req_list, tx_zc_list) {
1565                 if (tx->tx_conn != conn)
1566                         continue;
1567
1568                 LASSERT(tx->tx_msg.ksm_zc_cookies[0]);
1569
1570                 tx->tx_msg.ksm_zc_cookies[0] = 0;
1571                 tx->tx_zc_aborted = 1; /* mark it as not-acked */
1572                 list_del(&tx->tx_zc_list);
1573                 list_add(&tx->tx_zc_list, &zlist);
1574         }
1575
1576         spin_unlock(&peer->ksnp_lock);
1577
1578         list_for_each_entry_safe(tx, temp, &zlist, tx_zc_list) {
1579                 list_del(&tx->tx_zc_list);
1580                 ksocknal_tx_decref(tx);
1581         }
1582 }
1583
1584 void
1585 ksocknal_terminate_conn(struct ksock_conn *conn)
1586 {
1587         /*
1588          * This gets called by the reaper (guaranteed thread context) to
1589          * disengage the socket from its callbacks and close it.
1590          * ksnc_refcount will eventually hit zero, and then the reaper will
1591          * destroy it.
1592          */
1593         struct ksock_peer *peer = conn->ksnc_peer;
1594         struct ksock_sched *sched = conn->ksnc_scheduler;
1595         int failed = 0;
1596
1597         LASSERT(conn->ksnc_closing);
1598
1599         /* wake up the scheduler to "send" all remaining packets to /dev/null */
1600         spin_lock_bh(&sched->kss_lock);
1601
1602         /* a closing conn is always ready to tx */
1603         conn->ksnc_tx_ready = 1;
1604
1605         if (!conn->ksnc_tx_scheduled &&
1606             !list_empty(&conn->ksnc_tx_queue)) {
1607                 list_add_tail(&conn->ksnc_tx_list,
1608                               &sched->kss_tx_conns);
1609                 conn->ksnc_tx_scheduled = 1;
1610                 /* extra ref for scheduler */
1611                 ksocknal_conn_addref(conn);
1612
1613                 wake_up(&sched->kss_waitq);
1614         }
1615
1616         spin_unlock_bh(&sched->kss_lock);
1617
1618         /* serialise with callbacks */
1619         write_lock_bh(&ksocknal_data.ksnd_global_lock);
1620
1621         ksocknal_lib_reset_callback(conn->ksnc_sock, conn);
1622
1623         /*
1624          * OK, so this conn may not be completely disengaged from its
1625          * scheduler yet, but it _has_ committed to terminate...
1626          */
1627         conn->ksnc_scheduler->kss_nconns--;
1628
1629         if (peer->ksnp_error) {
1630                 /* peer's last conn closed in error */
1631                 LASSERT(list_empty(&peer->ksnp_conns));
1632                 failed = 1;
1633                 peer->ksnp_error = 0;     /* avoid multiple notifications */
1634         }
1635
1636         write_unlock_bh(&ksocknal_data.ksnd_global_lock);
1637
1638         if (failed)
1639                 ksocknal_peer_failed(peer);
1640
1641         /*
1642          * The socket is closed on the final put; either here, or in
1643          * ksocknal_{send,recv}msg().  Since we set up the linger2 option
1644          * when the connection was established, this will close the socket
1645          * immediately, aborting anything buffered in it. Any hung
1646          * zero-copy transmits will therefore complete in finite time.
1647          */
1648         ksocknal_connsock_decref(conn);
1649 }
1650
1651 void
1652 ksocknal_queue_zombie_conn(struct ksock_conn *conn)
1653 {
1654         /* Queue the conn for the reaper to destroy */
1655
1656         LASSERT(!atomic_read(&conn->ksnc_conn_refcount));
1657         spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
1658
1659         list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns);
1660         wake_up(&ksocknal_data.ksnd_reaper_waitq);
1661
1662         spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
1663 }
1664
1665 void
1666 ksocknal_destroy_conn(struct ksock_conn *conn)
1667 {
1668         unsigned long last_rcv;
1669
1670         /* Final coup-de-grace of the reaper */
1671         CDEBUG(D_NET, "connection %p\n", conn);
1672
1673         LASSERT(!atomic_read(&conn->ksnc_conn_refcount));
1674         LASSERT(!atomic_read(&conn->ksnc_sock_refcount));
1675         LASSERT(!conn->ksnc_sock);
1676         LASSERT(!conn->ksnc_route);
1677         LASSERT(!conn->ksnc_tx_scheduled);
1678         LASSERT(!conn->ksnc_rx_scheduled);
1679         LASSERT(list_empty(&conn->ksnc_tx_queue));
1680
1681         /* complete current receive if any */
1682         switch (conn->ksnc_rx_state) {
1683         case SOCKNAL_RX_LNET_PAYLOAD:
1684                 last_rcv = conn->ksnc_rx_deadline -
1685                            cfs_time_seconds(*ksocknal_tunables.ksnd_timeout);
1686                 CERROR("Completing partial receive from %s[%d], ip %pI4h:%d, with error, wanted: %zd, left: %d, last alive is %ld secs ago\n",
1687                        libcfs_id2str(conn->ksnc_peer->ksnp_id), conn->ksnc_type,
1688                        &conn->ksnc_ipaddr, conn->ksnc_port,
1689                        iov_iter_count(&conn->ksnc_rx_to), conn->ksnc_rx_nob_left,
1690                        cfs_duration_sec(cfs_time_sub(cfs_time_current(),
1691                                                      last_rcv)));
1692                 lnet_finalize(conn->ksnc_peer->ksnp_ni,
1693                               conn->ksnc_cookie, -EIO);
1694                 break;
1695         case SOCKNAL_RX_LNET_HEADER:
1696                 if (conn->ksnc_rx_started)
1697                         CERROR("Incomplete receive of lnet header from %s, ip %pI4h:%d, with error, protocol: %d.x.\n",
1698                                libcfs_id2str(conn->ksnc_peer->ksnp_id),
1699                                &conn->ksnc_ipaddr, conn->ksnc_port,
1700                                conn->ksnc_proto->pro_version);
1701                 break;
1702         case SOCKNAL_RX_KSM_HEADER:
1703                 if (conn->ksnc_rx_started)
1704                         CERROR("Incomplete receive of ksock message from %s, ip %pI4h:%d, with error, protocol: %d.x.\n",
1705                                libcfs_id2str(conn->ksnc_peer->ksnp_id),
1706                                &conn->ksnc_ipaddr, conn->ksnc_port,
1707                                conn->ksnc_proto->pro_version);
1708                 break;
1709         case SOCKNAL_RX_SLOP:
1710                 if (conn->ksnc_rx_started)
1711                         CERROR("Incomplete receive of slops from %s, ip %pI4h:%d, with error\n",
1712                                libcfs_id2str(conn->ksnc_peer->ksnp_id),
1713                                &conn->ksnc_ipaddr, conn->ksnc_port);
1714                break;
1715         default:
1716                 LBUG();
1717                 break;
1718         }
1719
1720         ksocknal_peer_decref(conn->ksnc_peer);
1721
1722         LIBCFS_FREE(conn, sizeof(*conn));
1723 }
1724
1725 int
1726 ksocknal_close_peer_conns_locked(struct ksock_peer *peer, __u32 ipaddr, int why)
1727 {
1728         struct ksock_conn *conn;
1729         struct list_head *ctmp;
1730         struct list_head *cnxt;
1731         int count = 0;
1732
1733         list_for_each_safe(ctmp, cnxt, &peer->ksnp_conns) {
1734                 conn = list_entry(ctmp, struct ksock_conn, ksnc_list);
1735
1736                 if (!ipaddr || conn->ksnc_ipaddr == ipaddr) {
1737                         count++;
1738                         ksocknal_close_conn_locked(conn, why);
1739                 }
1740         }
1741
1742         return count;
1743 }
1744
1745 int
1746 ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why)
1747 {
1748         struct ksock_peer *peer = conn->ksnc_peer;
1749         __u32 ipaddr = conn->ksnc_ipaddr;
1750         int count;
1751
1752         write_lock_bh(&ksocknal_data.ksnd_global_lock);
1753
1754         count = ksocknal_close_peer_conns_locked(peer, ipaddr, why);
1755
1756         write_unlock_bh(&ksocknal_data.ksnd_global_lock);
1757
1758         return count;
1759 }
1760
1761 int
1762 ksocknal_close_matching_conns(struct lnet_process_id id, __u32 ipaddr)
1763 {
1764         struct ksock_peer *peer;
1765         struct list_head *ptmp;
1766         struct list_head *pnxt;
1767         int lo;
1768         int hi;
1769         int i;
1770         int count = 0;
1771
1772         write_lock_bh(&ksocknal_data.ksnd_global_lock);
1773
1774         if (id.nid != LNET_NID_ANY) {
1775                 lo = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
1776                 hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
1777         } else {
1778                 lo = 0;
1779                 hi = ksocknal_data.ksnd_peer_hash_size - 1;
1780         }
1781
1782         for (i = lo; i <= hi; i++) {
1783                 list_for_each_safe(ptmp, pnxt,
1784                                    &ksocknal_data.ksnd_peers[i]) {
1785                         peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
1786
1787                         if (!((id.nid == LNET_NID_ANY || id.nid == peer->ksnp_id.nid) &&
1788                               (id.pid == LNET_PID_ANY || id.pid == peer->ksnp_id.pid)))
1789                                 continue;
1790
1791                         count += ksocknal_close_peer_conns_locked(peer, ipaddr,
1792                                                                   0);
1793                 }
1794         }
1795
1796         write_unlock_bh(&ksocknal_data.ksnd_global_lock);
1797
1798         /* wildcards always succeed */
1799         if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || !ipaddr)
1800                 return 0;
1801
1802         if (!count)
1803                 return -ENOENT;
1804         else
1805                 return 0;
1806 }
1807
1808 void
1809 ksocknal_notify(struct lnet_ni *ni, lnet_nid_t gw_nid, int alive)
1810 {
1811         /*
1812          * The router is telling me she's been notified of a change in
1813          * gateway state....
1814          */
1815         struct lnet_process_id id = {0};
1816
1817         id.nid = gw_nid;
1818         id.pid = LNET_PID_ANY;
1819
1820         CDEBUG(D_NET, "gw %s %s\n", libcfs_nid2str(gw_nid),
1821                alive ? "up" : "down");
1822
1823         if (!alive) {
1824                 /* If the gateway crashed, close all open connections... */
1825                 ksocknal_close_matching_conns(id, 0);
1826                 return;
1827         }
1828
1829         /*
1830          * ...otherwise do nothing.  We can only establish new connections
1831          * if we have autroutes, and these connect on demand.
1832          */
1833 }
1834
1835 void
1836 ksocknal_query(struct lnet_ni *ni, lnet_nid_t nid, unsigned long *when)
1837 {
1838         int connect = 1;
1839         unsigned long last_alive = 0;
1840         unsigned long now = cfs_time_current();
1841         struct ksock_peer *peer = NULL;
1842         rwlock_t *glock = &ksocknal_data.ksnd_global_lock;
1843         struct lnet_process_id id = {
1844                 .nid = nid,
1845                 .pid = LNET_PID_LUSTRE,
1846         };
1847
1848         read_lock(glock);
1849
1850         peer = ksocknal_find_peer_locked(ni, id);
1851         if (peer) {
1852                 struct ksock_conn *conn;
1853                 int bufnob;
1854
1855                 list_for_each_entry(conn, &peer->ksnp_conns, ksnc_list) {
1856                         bufnob = conn->ksnc_sock->sk->sk_wmem_queued;
1857
1858                         if (bufnob < conn->ksnc_tx_bufnob) {
1859                                 /* something got ACKed */
1860                                 conn->ksnc_tx_deadline =
1861                                         cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
1862                                 peer->ksnp_last_alive = now;
1863                                 conn->ksnc_tx_bufnob = bufnob;
1864                         }
1865                 }
1866
1867                 last_alive = peer->ksnp_last_alive;
1868                 if (!ksocknal_find_connectable_route_locked(peer))
1869                         connect = 0;
1870         }
1871
1872         read_unlock(glock);
1873
1874         if (last_alive)
1875                 *when = last_alive;
1876
1877         CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago, connect %d\n",
1878                libcfs_nid2str(nid), peer,
1879                last_alive ? cfs_duration_sec(now - last_alive) : -1,
1880                connect);
1881
1882         if (!connect)
1883                 return;
1884
1885         ksocknal_add_peer(ni, id, LNET_NIDADDR(nid), lnet_acceptor_port());
1886
1887         write_lock_bh(glock);
1888
1889         peer = ksocknal_find_peer_locked(ni, id);
1890         if (peer)
1891                 ksocknal_launch_all_connections_locked(peer);
1892
1893         write_unlock_bh(glock);
1894 }
1895
1896 static void
1897 ksocknal_push_peer(struct ksock_peer *peer)
1898 {
1899         int index;
1900         int i;
1901         struct list_head *tmp;
1902         struct ksock_conn *conn;
1903
1904         for (index = 0; ; index++) {
1905                 read_lock(&ksocknal_data.ksnd_global_lock);
1906
1907                 i = 0;
1908                 conn = NULL;
1909
1910                 list_for_each(tmp, &peer->ksnp_conns) {
1911                         if (i++ == index) {
1912                                 conn = list_entry(tmp, struct ksock_conn,
1913                                                   ksnc_list);
1914                                 ksocknal_conn_addref(conn);
1915                                 break;
1916                         }
1917                 }
1918
1919                 read_unlock(&ksocknal_data.ksnd_global_lock);
1920
1921                 if (!conn)
1922                         break;
1923
1924                 ksocknal_lib_push_conn(conn);
1925                 ksocknal_conn_decref(conn);
1926         }
1927 }
1928
1929 static int ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id)
1930 {
1931         struct list_head *start;
1932         struct list_head *end;
1933         struct list_head *tmp;
1934         int rc = -ENOENT;
1935         unsigned int hsize = ksocknal_data.ksnd_peer_hash_size;
1936
1937         if (id.nid == LNET_NID_ANY) {
1938                 start = &ksocknal_data.ksnd_peers[0];
1939                 end = &ksocknal_data.ksnd_peers[hsize - 1];
1940         } else {
1941                 start = ksocknal_nid2peerlist(id.nid);
1942                 end = ksocknal_nid2peerlist(id.nid);
1943         }
1944
1945         for (tmp = start; tmp <= end; tmp++) {
1946                 int peer_off; /* searching offset in peer hash table */
1947
1948                 for (peer_off = 0; ; peer_off++) {
1949                         struct ksock_peer *peer;
1950                         int i = 0;
1951
1952                         read_lock(&ksocknal_data.ksnd_global_lock);
1953                         list_for_each_entry(peer, tmp, ksnp_list) {
1954                                 if (!((id.nid == LNET_NID_ANY ||
1955                                        id.nid == peer->ksnp_id.nid) &&
1956                                       (id.pid == LNET_PID_ANY ||
1957                                        id.pid == peer->ksnp_id.pid)))
1958                                         continue;
1959
1960                                 if (i++ == peer_off) {
1961                                         ksocknal_peer_addref(peer);
1962                                         break;
1963                                 }
1964                         }
1965                         read_unlock(&ksocknal_data.ksnd_global_lock);
1966
1967                         if (!i) /* no match */
1968                                 break;
1969
1970                         rc = 0;
1971                         ksocknal_push_peer(peer);
1972                         ksocknal_peer_decref(peer);
1973                 }
1974         }
1975         return rc;
1976 }
1977
1978 static int
1979 ksocknal_add_interface(struct lnet_ni *ni, __u32 ipaddress, __u32 netmask)
1980 {
1981         struct ksock_net *net = ni->ni_data;
1982         struct ksock_interface *iface;
1983         int rc;
1984         int i;
1985         int j;
1986         struct list_head *ptmp;
1987         struct ksock_peer *peer;
1988         struct list_head *rtmp;
1989         struct ksock_route *route;
1990
1991         if (!ipaddress || !netmask)
1992                 return -EINVAL;
1993
1994         write_lock_bh(&ksocknal_data.ksnd_global_lock);
1995
1996         iface = ksocknal_ip2iface(ni, ipaddress);
1997         if (iface) {
1998                 /* silently ignore dups */
1999                 rc = 0;
2000         } else if (net->ksnn_ninterfaces == LNET_MAX_INTERFACES) {
2001                 rc = -ENOSPC;
2002         } else {
2003                 iface = &net->ksnn_interfaces[net->ksnn_ninterfaces++];
2004
2005                 iface->ksni_ipaddr = ipaddress;
2006                 iface->ksni_netmask = netmask;
2007                 iface->ksni_nroutes = 0;
2008                 iface->ksni_npeers = 0;
2009
2010                 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
2011                         list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
2012                                 peer = list_entry(ptmp, struct ksock_peer,
2013                                                   ksnp_list);
2014
2015                                 for (j = 0; j < peer->ksnp_n_passive_ips; j++)
2016                                         if (peer->ksnp_passive_ips[j] == ipaddress)
2017                                                 iface->ksni_npeers++;
2018
2019                                 list_for_each(rtmp, &peer->ksnp_routes) {
2020                                         route = list_entry(rtmp, struct ksock_route,
2021                                                            ksnr_list);
2022
2023                                         if (route->ksnr_myipaddr == ipaddress)
2024                                                 iface->ksni_nroutes++;
2025                                 }
2026                         }
2027                 }
2028
2029                 rc = 0;
2030                 /*
2031                  * NB only new connections will pay attention to the
2032                  * new interface!
2033                  */
2034         }
2035
2036         write_unlock_bh(&ksocknal_data.ksnd_global_lock);
2037
2038         return rc;
2039 }
2040
2041 static void
2042 ksocknal_peer_del_interface_locked(struct ksock_peer *peer, __u32 ipaddr)
2043 {
2044         struct list_head *tmp;
2045         struct list_head *nxt;
2046         struct ksock_route *route;
2047         struct ksock_conn *conn;
2048         int i;
2049         int j;
2050
2051         for (i = 0; i < peer->ksnp_n_passive_ips; i++)
2052                 if (peer->ksnp_passive_ips[i] == ipaddr) {
2053                         for (j = i + 1; j < peer->ksnp_n_passive_ips; j++)
2054                                 peer->ksnp_passive_ips[j - 1] =
2055                                         peer->ksnp_passive_ips[j];
2056                         peer->ksnp_n_passive_ips--;
2057                         break;
2058                 }
2059
2060         list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
2061                 route = list_entry(tmp, struct ksock_route, ksnr_list);
2062
2063                 if (route->ksnr_myipaddr != ipaddr)
2064                         continue;
2065
2066                 if (route->ksnr_share_count) {
2067                         /* Manually created; keep, but unbind */
2068                         route->ksnr_myipaddr = 0;
2069                 } else {
2070                         ksocknal_del_route_locked(route);
2071                 }
2072         }
2073
2074         list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
2075                 conn = list_entry(tmp, struct ksock_conn, ksnc_list);
2076
2077                 if (conn->ksnc_myipaddr == ipaddr)
2078                         ksocknal_close_conn_locked(conn, 0);
2079         }
2080 }
2081
2082 static int
2083 ksocknal_del_interface(struct lnet_ni *ni, __u32 ipaddress)
2084 {
2085         struct ksock_net *net = ni->ni_data;
2086         int rc = -ENOENT;
2087         struct list_head *tmp;
2088         struct list_head *nxt;
2089         struct ksock_peer *peer;
2090         __u32 this_ip;
2091         int i;
2092         int j;
2093
2094         write_lock_bh(&ksocknal_data.ksnd_global_lock);
2095
2096         for (i = 0; i < net->ksnn_ninterfaces; i++) {
2097                 this_ip = net->ksnn_interfaces[i].ksni_ipaddr;
2098
2099                 if (!(!ipaddress || ipaddress == this_ip))
2100                         continue;
2101
2102                 rc = 0;
2103
2104                 for (j = i + 1; j < net->ksnn_ninterfaces; j++)
2105                         net->ksnn_interfaces[j - 1] =
2106                                 net->ksnn_interfaces[j];
2107
2108                 net->ksnn_ninterfaces--;
2109
2110                 for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) {
2111                         list_for_each_safe(tmp, nxt,
2112                                            &ksocknal_data.ksnd_peers[j]) {
2113                                 peer = list_entry(tmp, struct ksock_peer, ksnp_list);
2114
2115                                 if (peer->ksnp_ni != ni)
2116                                         continue;
2117
2118                                 ksocknal_peer_del_interface_locked(peer, this_ip);
2119                         }
2120                 }
2121         }
2122
2123         write_unlock_bh(&ksocknal_data.ksnd_global_lock);
2124
2125         return rc;
2126 }
2127
2128 int
2129 ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg)
2130 {
2131         struct lnet_process_id id = {0};
2132         struct libcfs_ioctl_data *data = arg;
2133         int rc;
2134
2135         switch (cmd) {
2136         case IOC_LIBCFS_GET_INTERFACE: {
2137                 struct ksock_net       *net = ni->ni_data;
2138                 struct ksock_interface *iface;
2139
2140                 read_lock(&ksocknal_data.ksnd_global_lock);
2141
2142                 if (data->ioc_count >= (__u32)net->ksnn_ninterfaces) {
2143                         rc = -ENOENT;
2144                 } else {
2145                         rc = 0;
2146                         iface = &net->ksnn_interfaces[data->ioc_count];
2147
2148                         data->ioc_u32[0] = iface->ksni_ipaddr;
2149                         data->ioc_u32[1] = iface->ksni_netmask;
2150                         data->ioc_u32[2] = iface->ksni_npeers;
2151                         data->ioc_u32[3] = iface->ksni_nroutes;
2152                 }
2153
2154                 read_unlock(&ksocknal_data.ksnd_global_lock);
2155                 return rc;
2156         }
2157
2158         case IOC_LIBCFS_ADD_INTERFACE:
2159                 return ksocknal_add_interface(ni,
2160                                               data->ioc_u32[0], /* IP address */
2161                                               data->ioc_u32[1]); /* net mask */
2162
2163         case IOC_LIBCFS_DEL_INTERFACE:
2164                 return ksocknal_del_interface(ni,
2165                                               data->ioc_u32[0]); /* IP address */
2166
2167         case IOC_LIBCFS_GET_PEER: {
2168                 __u32 myip = 0;
2169                 __u32 ip = 0;
2170                 int port = 0;
2171                 int conn_count = 0;
2172                 int share_count = 0;
2173
2174                 rc = ksocknal_get_peer_info(ni, data->ioc_count,
2175                                             &id, &myip, &ip, &port,
2176                                             &conn_count,  &share_count);
2177                 if (rc)
2178                         return rc;
2179
2180                 data->ioc_nid    = id.nid;
2181                 data->ioc_count  = share_count;
2182                 data->ioc_u32[0] = ip;
2183                 data->ioc_u32[1] = port;
2184                 data->ioc_u32[2] = myip;
2185                 data->ioc_u32[3] = conn_count;
2186                 data->ioc_u32[4] = id.pid;
2187                 return 0;
2188         }
2189
2190         case IOC_LIBCFS_ADD_PEER:
2191                 id.nid = data->ioc_nid;
2192                 id.pid = LNET_PID_LUSTRE;
2193                 return ksocknal_add_peer(ni, id,
2194                                           data->ioc_u32[0], /* IP */
2195                                           data->ioc_u32[1]); /* port */
2196
2197         case IOC_LIBCFS_DEL_PEER:
2198                 id.nid = data->ioc_nid;
2199                 id.pid = LNET_PID_ANY;
2200                 return ksocknal_del_peer(ni, id,
2201                                           data->ioc_u32[0]); /* IP */
2202
2203         case IOC_LIBCFS_GET_CONN: {
2204                 int txmem;
2205                 int rxmem;
2206                 int nagle;
2207                 struct ksock_conn *conn;
2208
2209                 conn = ksocknal_get_conn_by_idx(ni, data->ioc_count);
2210                 if (!conn)
2211                         return -ENOENT;
2212
2213                 ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle);
2214
2215                 data->ioc_count  = txmem;
2216                 data->ioc_nid    = conn->ksnc_peer->ksnp_id.nid;
2217                 data->ioc_flags  = nagle;
2218                 data->ioc_u32[0] = conn->ksnc_ipaddr;
2219                 data->ioc_u32[1] = conn->ksnc_port;
2220                 data->ioc_u32[2] = conn->ksnc_myipaddr;
2221                 data->ioc_u32[3] = conn->ksnc_type;
2222                 data->ioc_u32[4] = conn->ksnc_scheduler->kss_info->ksi_cpt;
2223                 data->ioc_u32[5] = rxmem;
2224                 data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid;
2225                 ksocknal_conn_decref(conn);
2226                 return 0;
2227         }
2228
2229         case IOC_LIBCFS_CLOSE_CONNECTION:
2230                 id.nid = data->ioc_nid;
2231                 id.pid = LNET_PID_ANY;
2232                 return ksocknal_close_matching_conns(id,
2233                                                       data->ioc_u32[0]);
2234
2235         case IOC_LIBCFS_REGISTER_MYNID:
2236                 /* Ignore if this is a noop */
2237                 if (data->ioc_nid == ni->ni_nid)
2238                         return 0;
2239
2240                 CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
2241                        libcfs_nid2str(data->ioc_nid),
2242                        libcfs_nid2str(ni->ni_nid));
2243                 return -EINVAL;
2244
2245         case IOC_LIBCFS_PUSH_CONNECTION:
2246                 id.nid = data->ioc_nid;
2247                 id.pid = LNET_PID_ANY;
2248                 return ksocknal_push(ni, id);
2249
2250         default:
2251                 return -EINVAL;
2252         }
2253         /* not reached */
2254 }
2255
2256 static void
2257 ksocknal_free_buffers(void)
2258 {
2259         LASSERT(!atomic_read(&ksocknal_data.ksnd_nactive_txs));
2260
2261         if (ksocknal_data.ksnd_sched_info) {
2262                 struct ksock_sched_info *info;
2263                 int i;
2264
2265                 cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) {
2266                         if (info->ksi_scheds) {
2267                                 LIBCFS_FREE(info->ksi_scheds,
2268                                             info->ksi_nthreads_max *
2269                                             sizeof(info->ksi_scheds[0]));
2270                         }
2271                 }
2272                 cfs_percpt_free(ksocknal_data.ksnd_sched_info);
2273         }
2274
2275         LIBCFS_FREE(ksocknal_data.ksnd_peers,
2276                     sizeof(struct list_head) *
2277                     ksocknal_data.ksnd_peer_hash_size);
2278
2279         spin_lock(&ksocknal_data.ksnd_tx_lock);
2280
2281         if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
2282                 struct list_head zlist;
2283                 struct ksock_tx *tx;
2284                 struct ksock_tx *temp;
2285
2286                 list_add(&zlist, &ksocknal_data.ksnd_idle_noop_txs);
2287                 list_del_init(&ksocknal_data.ksnd_idle_noop_txs);
2288                 spin_unlock(&ksocknal_data.ksnd_tx_lock);
2289
2290                 list_for_each_entry_safe(tx, temp, &zlist, tx_list) {
2291                         list_del(&tx->tx_list);
2292                         LIBCFS_FREE(tx, tx->tx_desc_size);
2293                 }
2294         } else {
2295                 spin_unlock(&ksocknal_data.ksnd_tx_lock);
2296         }
2297 }
2298
2299 static void
2300 ksocknal_base_shutdown(void)
2301 {
2302         struct ksock_sched_info *info;
2303         struct ksock_sched *sched;
2304         int i;
2305         int j;
2306
2307         LASSERT(!ksocknal_data.ksnd_nnets);
2308
2309         switch (ksocknal_data.ksnd_init) {
2310         default:
2311                 LASSERT(0);
2312                 /* fall through */
2313         case SOCKNAL_INIT_ALL:
2314         case SOCKNAL_INIT_DATA:
2315                 LASSERT(ksocknal_data.ksnd_peers);
2316                 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
2317                         LASSERT(list_empty(&ksocknal_data.ksnd_peers[i]));
2318
2319                 LASSERT(list_empty(&ksocknal_data.ksnd_nets));
2320                 LASSERT(list_empty(&ksocknal_data.ksnd_enomem_conns));
2321                 LASSERT(list_empty(&ksocknal_data.ksnd_zombie_conns));
2322                 LASSERT(list_empty(&ksocknal_data.ksnd_connd_connreqs));
2323                 LASSERT(list_empty(&ksocknal_data.ksnd_connd_routes));
2324
2325                 if (ksocknal_data.ksnd_sched_info) {
2326                         cfs_percpt_for_each(info, i,
2327                                             ksocknal_data.ksnd_sched_info) {
2328                                 if (!info->ksi_scheds)
2329                                         continue;
2330
2331                                 for (j = 0; j < info->ksi_nthreads_max; j++) {
2332                                         sched = &info->ksi_scheds[j];
2333                                         LASSERT(list_empty(
2334                                                 &sched->kss_tx_conns));
2335                                         LASSERT(list_empty(
2336                                                 &sched->kss_rx_conns));
2337                                         LASSERT(list_empty(
2338                                                 &sched->kss_zombie_noop_txs));
2339                                         LASSERT(!sched->kss_nconns);
2340                                 }
2341                         }
2342                 }
2343
2344                 /* flag threads to terminate; wake and wait for them to die */
2345                 ksocknal_data.ksnd_shuttingdown = 1;
2346                 wake_up_all(&ksocknal_data.ksnd_connd_waitq);
2347                 wake_up_all(&ksocknal_data.ksnd_reaper_waitq);
2348
2349                 if (ksocknal_data.ksnd_sched_info) {
2350                         cfs_percpt_for_each(info, i,
2351                                             ksocknal_data.ksnd_sched_info) {
2352                                 if (!info->ksi_scheds)
2353                                         continue;
2354
2355                                 for (j = 0; j < info->ksi_nthreads_max; j++) {
2356                                         sched = &info->ksi_scheds[j];
2357                                         wake_up_all(&sched->kss_waitq);
2358                                 }
2359                         }
2360                 }
2361
2362                 i = 4;
2363                 read_lock(&ksocknal_data.ksnd_global_lock);
2364                 while (ksocknal_data.ksnd_nthreads) {
2365                         i++;
2366                         CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
2367                                "waiting for %d threads to terminate\n",
2368                                 ksocknal_data.ksnd_nthreads);
2369                         read_unlock(&ksocknal_data.ksnd_global_lock);
2370                         set_current_state(TASK_UNINTERRUPTIBLE);
2371                         schedule_timeout(cfs_time_seconds(1));
2372                         read_lock(&ksocknal_data.ksnd_global_lock);
2373                 }
2374                 read_unlock(&ksocknal_data.ksnd_global_lock);
2375
2376                 ksocknal_free_buffers();
2377
2378                 ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING;
2379                 break;
2380         }
2381
2382         module_put(THIS_MODULE);
2383 }
2384
2385 static __u64
2386 ksocknal_new_incarnation(void)
2387 {
2388         /* The incarnation number is the time this module loaded and it
2389          * identifies this particular instance of the socknal.
2390          */
2391         return ktime_get_ns();
2392 }
2393
2394 static int
2395 ksocknal_base_startup(void)
2396 {
2397         struct ksock_sched_info *info;
2398         int rc;
2399         int i;
2400
2401         LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING);
2402         LASSERT(!ksocknal_data.ksnd_nnets);
2403
2404         memset(&ksocknal_data, 0, sizeof(ksocknal_data)); /* zero pointers */
2405
2406         ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE;
2407         LIBCFS_ALLOC(ksocknal_data.ksnd_peers,
2408                      sizeof(struct list_head) *
2409                      ksocknal_data.ksnd_peer_hash_size);
2410         if (!ksocknal_data.ksnd_peers)
2411                 return -ENOMEM;
2412
2413         for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
2414                 INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]);
2415
2416         rwlock_init(&ksocknal_data.ksnd_global_lock);
2417         INIT_LIST_HEAD(&ksocknal_data.ksnd_nets);
2418
2419         spin_lock_init(&ksocknal_data.ksnd_reaper_lock);
2420         INIT_LIST_HEAD(&ksocknal_data.ksnd_enomem_conns);
2421         INIT_LIST_HEAD(&ksocknal_data.ksnd_zombie_conns);
2422         INIT_LIST_HEAD(&ksocknal_data.ksnd_deathrow_conns);
2423         init_waitqueue_head(&ksocknal_data.ksnd_reaper_waitq);
2424
2425         spin_lock_init(&ksocknal_data.ksnd_connd_lock);
2426         INIT_LIST_HEAD(&ksocknal_data.ksnd_connd_connreqs);
2427         INIT_LIST_HEAD(&ksocknal_data.ksnd_connd_routes);
2428         init_waitqueue_head(&ksocknal_data.ksnd_connd_waitq);
2429
2430         spin_lock_init(&ksocknal_data.ksnd_tx_lock);
2431         INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_noop_txs);
2432
2433         /* NB memset above zeros whole of ksocknal_data */
2434
2435         /* flag lists/ptrs/locks initialised */
2436         ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA;
2437         try_module_get(THIS_MODULE);
2438
2439         ksocknal_data.ksnd_sched_info = cfs_percpt_alloc(lnet_cpt_table(),
2440                                                          sizeof(*info));
2441         if (!ksocknal_data.ksnd_sched_info)
2442                 goto failed;
2443
2444         cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) {
2445                 struct ksock_sched *sched;
2446                 int nthrs;
2447
2448                 nthrs = cfs_cpt_weight(lnet_cpt_table(), i);
2449                 if (*ksocknal_tunables.ksnd_nscheds > 0) {
2450                         nthrs = min(nthrs, *ksocknal_tunables.ksnd_nscheds);
2451                 } else {
2452                         /*
2453                          * max to half of CPUs, assume another half should be
2454                          * reserved for upper layer modules
2455                          */
2456                         nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs);
2457                 }
2458
2459                 info->ksi_nthreads_max = nthrs;
2460                 info->ksi_cpt = i;
2461
2462                 LIBCFS_CPT_ALLOC(info->ksi_scheds, lnet_cpt_table(), i,
2463                                  info->ksi_nthreads_max * sizeof(*sched));
2464                 if (!info->ksi_scheds)
2465                         goto failed;
2466
2467                 for (; nthrs > 0; nthrs--) {
2468                         sched = &info->ksi_scheds[nthrs - 1];
2469
2470                         sched->kss_info = info;
2471                         spin_lock_init(&sched->kss_lock);
2472                         INIT_LIST_HEAD(&sched->kss_rx_conns);
2473                         INIT_LIST_HEAD(&sched->kss_tx_conns);
2474                         INIT_LIST_HEAD(&sched->kss_zombie_noop_txs);
2475                         init_waitqueue_head(&sched->kss_waitq);
2476                 }
2477         }
2478
2479         ksocknal_data.ksnd_connd_starting       = 0;
2480         ksocknal_data.ksnd_connd_failed_stamp   = 0;
2481         ksocknal_data.ksnd_connd_starting_stamp = ktime_get_real_seconds();
2482         /*
2483          * must have at least 2 connds to remain responsive to accepts while
2484          * connecting
2485          */
2486         if (*ksocknal_tunables.ksnd_nconnds < SOCKNAL_CONND_RESV + 1)
2487                 *ksocknal_tunables.ksnd_nconnds = SOCKNAL_CONND_RESV + 1;
2488
2489         if (*ksocknal_tunables.ksnd_nconnds_max <
2490             *ksocknal_tunables.ksnd_nconnds) {
2491                 ksocknal_tunables.ksnd_nconnds_max =
2492                         ksocknal_tunables.ksnd_nconnds;
2493         }
2494
2495         for (i = 0; i < *ksocknal_tunables.ksnd_nconnds; i++) {
2496                 char name[16];
2497
2498                 spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
2499                 ksocknal_data.ksnd_connd_starting++;
2500                 spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
2501
2502                 snprintf(name, sizeof(name), "socknal_cd%02d", i);
2503                 rc = ksocknal_thread_start(ksocknal_connd,
2504                                            (void *)((uintptr_t)i), name);
2505                 if (rc) {
2506                         spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
2507                         ksocknal_data.ksnd_connd_starting--;
2508                         spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
2509                         CERROR("Can't spawn socknal connd: %d\n", rc);
2510                         goto failed;
2511                 }
2512         }
2513
2514         rc = ksocknal_thread_start(ksocknal_reaper, NULL, "socknal_reaper");
2515         if (rc) {
2516                 CERROR("Can't spawn socknal reaper: %d\n", rc);
2517                 goto failed;
2518         }
2519
2520         /* flag everything initialised */
2521         ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL;
2522
2523         return 0;
2524
2525  failed:
2526         ksocknal_base_shutdown();
2527         return -ENETDOWN;
2528 }
2529
2530 static void
2531 ksocknal_debug_peerhash(struct lnet_ni *ni)
2532 {
2533         struct ksock_peer *peer = NULL;
2534         struct list_head *tmp;
2535         int i;
2536
2537         read_lock(&ksocknal_data.ksnd_global_lock);
2538
2539         for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
2540                 list_for_each(tmp, &ksocknal_data.ksnd_peers[i]) {
2541                         peer = list_entry(tmp, struct ksock_peer, ksnp_list);
2542
2543                         if (peer->ksnp_ni == ni)
2544                                 break;
2545
2546                         peer = NULL;
2547                 }
2548         }
2549
2550         if (peer) {
2551                 struct ksock_route *route;
2552                 struct ksock_conn  *conn;
2553
2554                 CWARN("Active peer on shutdown: %s, ref %d, scnt %d, closing %d, accepting %d, err %d, zcookie %llu, txq %d, zc_req %d\n",
2555                       libcfs_id2str(peer->ksnp_id),
2556                       atomic_read(&peer->ksnp_refcount),
2557                       peer->ksnp_sharecount, peer->ksnp_closing,
2558                       peer->ksnp_accepting, peer->ksnp_error,
2559                       peer->ksnp_zc_next_cookie,
2560                       !list_empty(&peer->ksnp_tx_queue),
2561                       !list_empty(&peer->ksnp_zc_req_list));
2562
2563                 list_for_each(tmp, &peer->ksnp_routes) {
2564                         route = list_entry(tmp, struct ksock_route, ksnr_list);
2565                         CWARN("Route: ref %d, schd %d, conn %d, cnted %d, del %d\n",
2566                               atomic_read(&route->ksnr_refcount),
2567                               route->ksnr_scheduled, route->ksnr_connecting,
2568                               route->ksnr_connected, route->ksnr_deleted);
2569                 }
2570
2571                 list_for_each(tmp, &peer->ksnp_conns) {
2572                         conn = list_entry(tmp, struct ksock_conn, ksnc_list);
2573                         CWARN("Conn: ref %d, sref %d, t %d, c %d\n",
2574                               atomic_read(&conn->ksnc_conn_refcount),
2575                               atomic_read(&conn->ksnc_sock_refcount),
2576                               conn->ksnc_type, conn->ksnc_closing);
2577                 }
2578         }
2579
2580         read_unlock(&ksocknal_data.ksnd_global_lock);
2581 }
2582
2583 void
2584 ksocknal_shutdown(struct lnet_ni *ni)
2585 {
2586         struct ksock_net *net = ni->ni_data;
2587         int i;
2588         struct lnet_process_id anyid = {0};
2589
2590         anyid.nid = LNET_NID_ANY;
2591         anyid.pid = LNET_PID_ANY;
2592
2593         LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL);
2594         LASSERT(ksocknal_data.ksnd_nnets > 0);
2595
2596         spin_lock_bh(&net->ksnn_lock);
2597         net->ksnn_shutdown = 1;          /* prevent new peers */
2598         spin_unlock_bh(&net->ksnn_lock);
2599
2600         /* Delete all peers */
2601         ksocknal_del_peer(ni, anyid, 0);
2602
2603         /* Wait for all peer state to clean up */
2604         i = 2;
2605         spin_lock_bh(&net->ksnn_lock);
2606         while (net->ksnn_npeers) {
2607                 spin_unlock_bh(&net->ksnn_lock);
2608
2609                 i++;
2610                 CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
2611                        "waiting for %d peers to disconnect\n",
2612                        net->ksnn_npeers);
2613                 set_current_state(TASK_UNINTERRUPTIBLE);
2614                 schedule_timeout(cfs_time_seconds(1));
2615
2616                 ksocknal_debug_peerhash(ni);
2617
2618                 spin_lock_bh(&net->ksnn_lock);
2619         }
2620         spin_unlock_bh(&net->ksnn_lock);
2621
2622         for (i = 0; i < net->ksnn_ninterfaces; i++) {
2623                 LASSERT(!net->ksnn_interfaces[i].ksni_npeers);
2624                 LASSERT(!net->ksnn_interfaces[i].ksni_nroutes);
2625         }
2626
2627         list_del(&net->ksnn_list);
2628         LIBCFS_FREE(net, sizeof(*net));
2629
2630         ksocknal_data.ksnd_nnets--;
2631         if (!ksocknal_data.ksnd_nnets)
2632                 ksocknal_base_shutdown();
2633 }
2634
2635 static int
2636 ksocknal_enumerate_interfaces(struct ksock_net *net)
2637 {
2638         char **names;
2639         int i;
2640         int j;
2641         int rc;
2642         int n;
2643
2644         n = lnet_ipif_enumerate(&names);
2645         if (n <= 0) {
2646                 CERROR("Can't enumerate interfaces: %d\n", n);
2647                 return n;
2648         }
2649
2650         for (i = j = 0; i < n; i++) {
2651                 int up;
2652                 __u32 ip;
2653                 __u32 mask;
2654
2655                 if (!strcmp(names[i], "lo")) /* skip the loopback IF */
2656                         continue;
2657
2658                 rc = lnet_ipif_query(names[i], &up, &ip, &mask);
2659                 if (rc) {
2660                         CWARN("Can't get interface %s info: %d\n",
2661                               names[i], rc);
2662                         continue;
2663                 }
2664
2665                 if (!up) {
2666                         CWARN("Ignoring interface %s (down)\n",
2667                               names[i]);
2668                         continue;
2669                 }
2670
2671                 if (j == LNET_MAX_INTERFACES) {
2672                         CWARN("Ignoring interface %s (too many interfaces)\n",
2673                               names[i]);
2674                         continue;
2675                 }
2676
2677                 net->ksnn_interfaces[j].ksni_ipaddr = ip;
2678                 net->ksnn_interfaces[j].ksni_netmask = mask;
2679                 strlcpy(net->ksnn_interfaces[j].ksni_name,
2680                         names[i], sizeof(net->ksnn_interfaces[j].ksni_name));
2681                 j++;
2682         }
2683
2684         lnet_ipif_free_enumeration(names, n);
2685
2686         if (!j)
2687                 CERROR("Can't find any usable interfaces\n");
2688
2689         return j;
2690 }
2691
2692 static int
2693 ksocknal_search_new_ipif(struct ksock_net *net)
2694 {
2695         int new_ipif = 0;
2696         int i;
2697
2698         for (i = 0; i < net->ksnn_ninterfaces; i++) {
2699                 char *ifnam = &net->ksnn_interfaces[i].ksni_name[0];
2700                 char *colon = strchr(ifnam, ':');
2701                 int found  = 0;
2702                 struct ksock_net *tmp;
2703                 int j;
2704
2705                 if (colon) /* ignore alias device */
2706                         *colon = 0;
2707
2708                 list_for_each_entry(tmp, &ksocknal_data.ksnd_nets, ksnn_list) {
2709                         for (j = 0; !found && j < tmp->ksnn_ninterfaces; j++) {
2710                                 char *ifnam2 =
2711                                         &tmp->ksnn_interfaces[j].ksni_name[0];
2712                                 char *colon2 = strchr(ifnam2, ':');
2713
2714                                 if (colon2)
2715                                         *colon2 = 0;
2716
2717                                 found = !strcmp(ifnam, ifnam2);
2718                                 if (colon2)
2719                                         *colon2 = ':';
2720                         }
2721                         if (found)
2722                                 break;
2723                 }
2724
2725                 new_ipif += !found;
2726                 if (colon)
2727                         *colon = ':';
2728         }
2729
2730         return new_ipif;
2731 }
2732
2733 static int
2734 ksocknal_start_schedulers(struct ksock_sched_info *info)
2735 {
2736         int nthrs;
2737         int rc = 0;
2738         int i;
2739
2740         if (!info->ksi_nthreads) {
2741                 if (*ksocknal_tunables.ksnd_nscheds > 0) {
2742                         nthrs = info->ksi_nthreads_max;
2743                 } else {
2744                         nthrs = cfs_cpt_weight(lnet_cpt_table(),
2745                                                info->ksi_cpt);
2746                         nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs);
2747                         nthrs = min(SOCKNAL_NSCHEDS_HIGH, nthrs);
2748                 }
2749                 nthrs = min(nthrs, info->ksi_nthreads_max);
2750         } else {
2751                 LASSERT(info->ksi_nthreads <= info->ksi_nthreads_max);
2752                 /* increase two threads if there is new interface */
2753                 nthrs = min(2, info->ksi_nthreads_max - info->ksi_nthreads);
2754         }
2755
2756         for (i = 0; i < nthrs; i++) {
2757                 long id;
2758                 char name[20];
2759                 struct ksock_sched *sched;
2760
2761                 id = KSOCK_THREAD_ID(info->ksi_cpt, info->ksi_nthreads + i);
2762                 sched = &info->ksi_scheds[KSOCK_THREAD_SID(id)];
2763                 snprintf(name, sizeof(name), "socknal_sd%02d_%02d",
2764                          info->ksi_cpt, (int)(sched - &info->ksi_scheds[0]));
2765
2766                 rc = ksocknal_thread_start(ksocknal_scheduler,
2767                                            (void *)id, name);
2768                 if (!rc)
2769                         continue;
2770
2771                 CERROR("Can't spawn thread %d for scheduler[%d]: %d\n",
2772                        info->ksi_cpt, info->ksi_nthreads + i, rc);
2773                 break;
2774         }
2775
2776         info->ksi_nthreads += i;
2777         return rc;
2778 }
2779
2780 static int
2781 ksocknal_net_start_threads(struct ksock_net *net, __u32 *cpts, int ncpts)
2782 {
2783         int newif = ksocknal_search_new_ipif(net);
2784         int rc;
2785         int i;
2786
2787         LASSERT(ncpts > 0 && ncpts <= cfs_cpt_number(lnet_cpt_table()));
2788
2789         for (i = 0; i < ncpts; i++) {
2790                 struct ksock_sched_info *info;
2791                 int cpt = !cpts ? i : cpts[i];
2792
2793                 LASSERT(cpt < cfs_cpt_number(lnet_cpt_table()));
2794                 info = ksocknal_data.ksnd_sched_info[cpt];
2795
2796                 if (!newif && info->ksi_nthreads > 0)
2797                         continue;
2798
2799                 rc = ksocknal_start_schedulers(info);
2800                 if (rc)
2801                         return rc;
2802         }
2803         return 0;
2804 }
2805
2806 int
2807 ksocknal_startup(struct lnet_ni *ni)
2808 {
2809         struct ksock_net *net;
2810         int rc;
2811         int i;
2812
2813         LASSERT(ni->ni_lnd == &the_ksocklnd);
2814
2815         if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) {
2816                 rc = ksocknal_base_startup();
2817                 if (rc)
2818                         return rc;
2819         }
2820
2821         LIBCFS_ALLOC(net, sizeof(*net));
2822         if (!net)
2823                 goto fail_0;
2824
2825         spin_lock_init(&net->ksnn_lock);
2826         net->ksnn_incarnation = ksocknal_new_incarnation();
2827         ni->ni_data = net;
2828         ni->ni_peertimeout    = *ksocknal_tunables.ksnd_peertimeout;
2829         ni->ni_maxtxcredits   = *ksocknal_tunables.ksnd_credits;
2830         ni->ni_peertxcredits  = *ksocknal_tunables.ksnd_peertxcredits;
2831         ni->ni_peerrtrcredits = *ksocknal_tunables.ksnd_peerrtrcredits;
2832
2833         if (!ni->ni_interfaces[0]) {
2834                 rc = ksocknal_enumerate_interfaces(net);
2835                 if (rc <= 0)
2836                         goto fail_1;
2837
2838                 net->ksnn_ninterfaces = 1;
2839         } else {
2840                 for (i = 0; i < LNET_MAX_INTERFACES; i++) {
2841                         int up;
2842
2843                         if (!ni->ni_interfaces[i])
2844                                 break;
2845
2846                         rc = lnet_ipif_query(ni->ni_interfaces[i], &up,
2847                                              &net->ksnn_interfaces[i].ksni_ipaddr,
2848                                              &net->ksnn_interfaces[i].ksni_netmask);
2849
2850                         if (rc) {
2851                                 CERROR("Can't get interface %s info: %d\n",
2852                                        ni->ni_interfaces[i], rc);
2853                                 goto fail_1;
2854                         }
2855
2856                         if (!up) {
2857                                 CERROR("Interface %s is down\n",
2858                                        ni->ni_interfaces[i]);
2859                                 goto fail_1;
2860                         }
2861
2862                         strlcpy(net->ksnn_interfaces[i].ksni_name,
2863                                 ni->ni_interfaces[i],
2864                                 sizeof(net->ksnn_interfaces[i].ksni_name));
2865                 }
2866                 net->ksnn_ninterfaces = i;
2867         }
2868
2869         /* call it before add it to ksocknal_data.ksnd_nets */
2870         rc = ksocknal_net_start_threads(net, ni->ni_cpts, ni->ni_ncpts);
2871         if (rc)
2872                 goto fail_1;
2873
2874         ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid),
2875                                 net->ksnn_interfaces[0].ksni_ipaddr);
2876         list_add(&net->ksnn_list, &ksocknal_data.ksnd_nets);
2877
2878         ksocknal_data.ksnd_nnets++;
2879
2880         return 0;
2881
2882  fail_1:
2883         LIBCFS_FREE(net, sizeof(*net));
2884  fail_0:
2885         if (!ksocknal_data.ksnd_nnets)
2886                 ksocknal_base_shutdown();
2887
2888         return -ENETDOWN;
2889 }
2890
2891 static void __exit ksocklnd_exit(void)
2892 {
2893         lnet_unregister_lnd(&the_ksocklnd);
2894 }
2895
2896 static int __init ksocklnd_init(void)
2897 {
2898         int rc;
2899
2900         /* check ksnr_connected/connecting field large enough */
2901         BUILD_BUG_ON(SOCKLND_CONN_NTYPES > 4);
2902         BUILD_BUG_ON(SOCKLND_CONN_ACK != SOCKLND_CONN_BULK_IN);
2903
2904         /* initialize the_ksocklnd */
2905         the_ksocklnd.lnd_type     = SOCKLND;
2906         the_ksocklnd.lnd_startup  = ksocknal_startup;
2907         the_ksocklnd.lnd_shutdown = ksocknal_shutdown;
2908         the_ksocklnd.lnd_ctl      = ksocknal_ctl;
2909         the_ksocklnd.lnd_send     = ksocknal_send;