2 Unix SMB/CIFS implementation.
3 Infrastructure for SMB-Direct RDMA as transport
4 Copyright (C) Stefan Metzmacher 2012,2016
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "system/network.h"
23 #include "lib/util/tevent_ntstatus.h"
24 #include "lib/tsocket/tsocket.h"
25 #include "lib/util/util_net.h"
26 #include "libcli/smb/smb_common.h"
27 #include "libcli/smb/smb_direct.h"
28 #include "lib/util/dlinklist.h"
29 #include "lib/util/iov_buf.h"
30 #include "librpc/ndr/libndr.h"
32 #ifdef SMB_TRANSPORT_ENABLE_RDMA
33 #include <rdma/rdma_cma_abi.h>
34 #include <rdma/rdma_cma.h>
35 #include <infiniband/verbs.h>
37 #define SMB_DIRECT_LISTENER_BACKLOG 5
39 #define SMB_DIRECT_MAX_READ_WRITE_SIZE 1048576
40 #define SMB_DIRECT_MAX_FRAGMENT_SIZE 1048576
41 #define SMB_DIRECT_MAX_SEND_SIZE 1364
42 #define SMB_DIRECT_MAX_RECEIVE_SIZE 8192
43 #define SMB_DIRECT_MAX_SEND_CREDITS 255
44 #define SMB_DIRECT_MAX_RECEIVE_CREDITS 255
45 #define SMB_DIRECT_KEEPALIVE_INTERVAL 5
47 #define SMB_DIRECT_IO_MAX_DATA NDR_ROUND(MAX(SMB_DIRECT_MAX_SEND_SIZE,SMB_DIRECT_MAX_RECEIVE_SIZE),16)
48 #define SMB_DIRECT_DATA_MIN_HDR_SIZE 0x14
49 #define SMB_DIRECT_DATA_OFFSET NDR_ROUND(SMB_DIRECT_DATA_MIN_HDR_SIZE, 8)
51 #undef SMB_DIRECT_MAX_SEND_SIZE
52 #undef SMB_DIRECT_MAX_RECEIVE_SIZE
53 #undef SMB_DIRECT_MAX_FRAGMENT_SIZE
54 #undef SMB_DIRECT_MAX_READ_WRITE_SIZE
55 #define SMB_DIRECT_MAX_SEND_SIZE 8192*16*4
56 #define SMB_DIRECT_MAX_RECEIVE_SIZE 8192*16*4
57 #define SMB_DIRECT_MAX_FRAGMENT_SIZE 1048576*8
58 #define SMB_DIRECT_MAX_READ_WRITE_SIZE 1048576*8
60 #define SMB_DIRECT_RESPONSE_REQUESTED 0x0001
62 struct smb_direct_listener;
63 struct smb_direct_connection;
66 struct smb_direct_listener {
68 struct smb_direct_rdma_context context;
69 struct rdma_cm_id *cm_id;
70 struct rdma_event_channel *cm_channel;
71 struct tevent_fd *fde_channel;
72 enum rdma_cm_event_type expected_event;
74 * We fetch events from the ready queue and store it
75 * here, it's acked in the listener destructor.
77 struct rdma_cm_event *cm_event;
79 struct smb_direct_connection *pending;
80 struct smb_direct_connection *ready;
83 struct smb_direct_connection {
84 struct smb_direct_connection *next, *prev; /* used in the listener ready list */
85 struct smb_direct_listener *l; /* only valid before fully accepted */
89 uint32_t max_send_size;
90 uint32_t max_receive_size;
91 uint32_t max_fragmented_size;
92 uint32_t max_read_write_size;
93 uint16_t send_credit_target;
94 uint16_t send_credits;
95 uint16_t receive_credit_max;
96 uint16_t receive_posted;
97 uint16_t receive_credit_target;
98 uint16_t receive_credits;
99 uint32_t keep_alive_internal;
103 int tmp_fd; /* given to the caller end */
105 struct tevent_fd *fde;
108 struct smb_direct_rdma_context context;
109 struct rdma_event_channel *cm_channel;
110 struct rdma_cm_id *cm_id;
111 struct tevent_fd *fde_channel;
112 enum rdma_cm_event_type expected_event;
114 * We fetch events from the ready queue and store it
115 * here, it's acked in the listener destructor.
117 struct rdma_cm_event *cm_event;
118 struct rdma_conn_param conn_param;
119 uint8_t ird_ord_hdr[8];
123 struct ibv_comp_channel *comp_channel;
124 struct tevent_fd *fde_channel;
125 struct ibv_cq *send_cq;
126 struct ibv_cq *recv_cq;
128 struct ibv_qp_init_attr init_attr;
131 TALLOC_CTX *io_mem_ctx;
134 * here we have io coming into
135 * the rdma layer, which needs to
136 * be flushed to the socketpair
138 struct smb_direct_io *idle;
139 struct smb_direct_io *posted;
140 struct smb_direct_io *ready;
141 struct smb_direct_io *out;
142 uint32_t remaining_length;
146 * here we have io coming from the socketpair
147 * which needs to be flushed into the rdma layer.
149 struct smb_direct_io *idle;
150 struct smb_direct_io *posted;
151 struct smb_direct_io *ready;
152 struct smb_direct_io *in;
153 uint32_t remaining_length;
157 struct smb_direct_connection *smb_direct_conn;
160 struct smb_direct_io {
161 struct smb_direct_io *prev, *next;
163 struct ibv_mr *hdr_mr;
164 struct ibv_mr *data_mr;
165 struct ibv_sge sge[2];
167 struct ibv_recv_wr recv_wr;
168 struct ibv_send_wr send_wr;
170 struct iovec _iov_array[2];
174 uint32_t data_length;
175 uint32_t remaining_length;
177 uint8_t nbt_hdr[NBT_HDR_SIZE];
178 uint8_t smbd_hdr[SMB_DIRECT_DATA_OFFSET];
179 uint8_t data[SMB_DIRECT_IO_MAX_DATA];
182 static int smb_direct_io_destructor(struct smb_direct_io *io);
184 static struct smb_direct_io *smb_direct_io_create(struct smb_direct_connection *c)
186 struct smb_direct_io *io;
188 if (c->io_mem_ctx == NULL) {
192 io = talloc_zero(c->io_mem_ctx, struct smb_direct_io);
196 talloc_set_destructor(io, smb_direct_io_destructor);
198 io->hdr_mr = ibv_reg_mr(c->ibv.pd,
200 sizeof(io->smbd_hdr),
201 IBV_ACCESS_LOCAL_WRITE);
202 if (io->hdr_mr == NULL) {
207 io->data_mr = ibv_reg_mr(c->ibv.pd,
210 IBV_ACCESS_LOCAL_WRITE);
211 if (io->data_mr == NULL) {
216 io->sge[0].addr = (uint64_t) (uintptr_t) io->smbd_hdr;
217 io->sge[0].length = sizeof(io->smbd_hdr);
218 io->sge[0].lkey = io->hdr_mr->lkey;
219 io->sge[1].addr = (uint64_t) (uintptr_t) io->data;
220 io->sge[1].length = sizeof(io->data);
221 io->sge[1].lkey = io->data_mr->lkey;
223 io->send_wr.wr_id = (uint64_t) (uintptr_t) io;
224 io->send_wr.opcode = IBV_WR_SEND;
225 io->send_wr.send_flags = IBV_SEND_SIGNALED;
226 io->send_wr.sg_list = io->sge;
227 io->send_wr.num_sge = ARRAY_SIZE(io->sge);
229 io->recv_wr.wr_id = (uint64_t) (uintptr_t) io;
230 io->recv_wr.sg_list = io->sge;
231 io->recv_wr.num_sge = ARRAY_SIZE(io->sge);
236 static int smb_direct_io_destructor(struct smb_direct_io *io)
238 if (io->hdr_mr != NULL) {
239 ibv_dereg_mr(io->hdr_mr);
243 if (io->data_mr != NULL) {
244 ibv_dereg_mr(io->data_mr);
251 static int smb_direct_connection_destructor(struct smb_direct_connection *c);
255 #define DEBUG( _level, body ) do {\
257 (void)( ((level) <= MAX_DEBUG_LEVEL) && \
258 unlikely(DEBUGLEVEL_CLASS[ DBGC_CLASS ] >= (level)) \
259 && (dbghdrclass( level, DBGC_CLASS, __location__, __FUNCTION__ )) \
260 && (dbgtext body) ); \
263 struct smb_direct_connection *smb_direct_connection_create(TALLOC_CTX *mem_ctx)
265 struct smb_direct_connection *c;
270 c = talloc_zero(mem_ctx, struct smb_direct_connection);
276 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
277 talloc_set_destructor(c, smb_direct_connection_destructor);
279 c->state.max_send_size = SMB_DIRECT_MAX_SEND_SIZE;
280 c->state.max_receive_size = SMB_DIRECT_MAX_RECEIVE_SIZE;
281 c->state.max_fragmented_size = SMB_DIRECT_MAX_FRAGMENT_SIZE;
282 c->state.max_read_write_size = SMB_DIRECT_MAX_READ_WRITE_SIZE;
283 c->state.receive_credit_max = SMB_DIRECT_MAX_RECEIVE_CREDITS;
284 c->state.send_credit_target = SMB_DIRECT_MAX_SEND_CREDITS;
285 c->state.keep_alive_internal = SMB_DIRECT_KEEPALIVE_INTERVAL;
287 ret = socketpair(AF_UNIX, SOCK_STREAM, 0, sfd);
289 int saved_errno = errno;
291 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
295 c->sock.tmp_fd = sfd[0];
298 DEBUG(0,("%s:%s: sock.fd[%d] sock.tmp_fd[%d]\n",
299 __location__, __func__, c->sock.fd, c->sock.tmp_fd));
301 smb_set_close_on_exec(c->sock.tmp_fd);
302 smb_set_close_on_exec(c->sock.fd);
303 set_blocking(c->sock.fd, false);
304 set_blocking(c->sock.tmp_fd, false);
306 c->rdma.cm_channel = rdma_create_event_channel();
307 if (c->rdma.cm_channel == NULL) {
309 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
312 smb_set_close_on_exec(c->rdma.cm_channel->fd);
313 //set_blocking(c->rdma.cm_channel->fd, false);
315 c->rdma.context.c = c;
317 c->ibv.init_attr.cap.max_send_wr = c->state.send_credit_target; // more for RDMA READ/WRITE??
318 c->ibv.init_attr.cap.max_recv_wr = c->state.receive_credit_max; // more for RDMA READ/WRITE??
319 c->ibv.init_attr.cap.max_recv_sge = 2;
320 c->ibv.init_attr.cap.max_send_sge = 2;
321 c->ibv.init_attr.qp_type = IBV_QPT_RC;
322 c->ibv.init_attr.sq_sig_all = 1;
324 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
328 static NTSTATUS smb_direct_connection_complete_alloc(struct smb_direct_connection *c)
333 c->ibv.comp_channel = ibv_create_comp_channel(c->rdma.cm_id->verbs);
334 if (c->ibv.comp_channel == NULL) {
335 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
336 return NT_STATUS_NO_MEMORY;
338 smb_set_close_on_exec(c->ibv.comp_channel->fd);
339 //set_blocking(c->ibv.comp_channel->fd, false);
341 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
342 c->ibv.pd = ibv_alloc_pd(c->rdma.cm_id->verbs);
343 if (c->ibv.pd == NULL) {
344 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
345 return NT_STATUS_NO_MEMORY;
348 c->ibv.send_cq = ibv_create_cq(c->rdma.cm_id->verbs,
349 c->ibv.init_attr.cap.max_send_wr,
350 c, c->ibv.comp_channel, 0);
351 if (c->ibv.send_cq == NULL) {
352 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
353 return NT_STATUS_NO_MEMORY;
355 c->ibv.init_attr.send_cq = c->ibv.send_cq;
357 c->ibv.recv_cq = ibv_create_cq(c->rdma.cm_id->verbs,
358 c->ibv.init_attr.cap.max_recv_wr,
359 c, c->ibv.comp_channel, 0);
360 if (c->ibv.recv_cq == NULL) {
361 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
362 return NT_STATUS_NO_MEMORY;
364 c->ibv.init_attr.recv_cq = c->ibv.recv_cq;
366 ret = ibv_req_notify_cq(c->ibv.send_cq, 0);
368 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
371 ret = ibv_req_notify_cq(c->ibv.recv_cq, 0);
373 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
374 return NT_STATUS_NO_MEMORY;
377 ret = rdma_create_qp(c->rdma.cm_id, c->ibv.pd, &c->ibv.init_attr);
379 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
380 return NT_STATUS_NO_MEMORY;
382 c->ibv.qp = c->rdma.cm_id->qp;
384 c->io_mem_ctx = talloc_named_const(c, 0, "io_mem_ctx");
385 if (c->io_mem_ctx == NULL) {
386 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
387 return NT_STATUS_NO_MEMORY;
390 for (i = 0; i < c->state.receive_credit_max; i++) {
391 struct smb_direct_io *io;
393 io = smb_direct_io_create(c);
395 DEBUG(0,("%s:%s: SETUP r2s here...\n", __location__, __func__));
396 return NT_STATUS_NO_MEMORY;
398 DLIST_ADD_END(c->r2s.idle, io);
399 //DEBUG(0,("%s:%s: SETUP r2s here...\n", __location__, __func__));
402 for (i = 0; i < c->state.send_credit_target; i++) {
403 struct smb_direct_io *io;
405 io = smb_direct_io_create(c);
407 DEBUG(0,("%s:%s: SETUP s2r here...\n", __location__, __func__));
408 return NT_STATUS_NO_MEMORY;
410 DLIST_ADD_END(c->s2r.idle, io);
411 //DEBUG(0,("%s:%s: SETUP s2r here...\n", __location__, __func__));
414 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
418 static void smb_direct_connection_debug_credits(struct smb_direct_connection *c,
420 const struct smb_direct_io *io,
421 const char *location, const char *func)
423 DEBUG(0,("%s:%s: IO[%p] CREDITS: RMAX[%u] RPOSTED[%u] RTARGET[%u] R[%u] RSIZE[%u] "
424 "STARGET[%u] S[%u] SSIZE[%u] MF[%u] MRW[%u]\n",
425 location, reason, io, //"",//func,
426 c->state.receive_credit_max,
427 c->state.receive_posted,
428 c->state.receive_credit_target,
429 c->state.receive_credits,
430 c->state.max_receive_size,
431 c->state.send_credit_target,
432 c->state.send_credits,
433 c->state.max_send_size,
434 c->state.max_fragmented_size,
435 c->state.max_read_write_size));
438 static int smb_direct_connection_destructor(struct smb_direct_connection *c)
441 DLIST_REMOVE(c->l->ready, c);
445 TALLOC_FREE(c->sock.fde);
447 if (c->sock.fd != -1) {
452 if (c->sock.tmp_fd != -1) {
453 close(c->sock.tmp_fd);
457 TALLOC_FREE(c->ibv.fde_channel);
458 TALLOC_FREE(c->rdma.fde_channel);
460 TALLOC_FREE(c->io_mem_ctx);
464 if (c->rdma.cm_event != NULL) {
465 rdma_ack_cm_event(c->rdma.cm_event);
466 c->rdma.cm_event = NULL;
469 if (c->ibv.qp != NULL) {
470 ibv_destroy_qp(c->ibv.qp);
474 if (c->ibv.send_cq != NULL) {
475 ibv_destroy_cq(c->ibv.send_cq);
476 c->ibv.send_cq = NULL;
479 if (c->ibv.recv_cq != NULL) {
480 ibv_destroy_cq(c->ibv.recv_cq);
481 c->ibv.recv_cq = NULL;
484 if (c->ibv.comp_channel != NULL) {
485 ibv_destroy_comp_channel(c->ibv.comp_channel);
486 c->ibv.comp_channel = NULL;
489 if (c->ibv.pd != NULL) {
490 ibv_dealloc_pd(c->ibv.pd);
494 if (c->rdma.cm_id != NULL) {
495 rdma_destroy_id(c->rdma.cm_id);
496 c->rdma.cm_id = NULL;
499 if (c->rdma.cm_channel != NULL) {
500 rdma_destroy_event_channel(c->rdma.cm_channel);
501 c->rdma.cm_channel = NULL;
507 static int smb_direct_connection_post_recv(struct smb_direct_connection *c)
509 struct smb_direct_io *io = NULL;
510 struct ibv_recv_wr *bad_recv_wr = NULL;
513 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
514 smb_direct_connection_debug_credits(c, "post_recv", NULL, __location__, __func__);
515 if (c->r2s.idle == NULL) {
516 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
520 for (io = c->r2s.idle; io != NULL; io = io->next) {
521 //DEBUG(0,("%s:%s: POST RECV[%p]\n", __location__, __func__, io));
522 c->state.receive_posted += 1;
523 smb_direct_connection_debug_credits(c, "POST_RECV", io, __location__, __func__);
524 if (io->next == NULL) {
525 io->recv_wr.next = NULL;
529 io->recv_wr.next = &io->next->recv_wr;
533 ret = ibv_post_recv(c->ibv.qp, &c->r2s.idle->recv_wr, &bad_recv_wr);
536 status = map_nt_error_from_unix_common(errno);
537 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
538 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
542 DLIST_CONCATENATE(c->r2s.posted, c->r2s.idle);
545 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
549 static int smb_direct_connection_post_send(struct smb_direct_connection *c)
551 struct smb_direct_io *io = NULL;
552 struct smb_direct_io *next = NULL;
553 struct smb_direct_io *posted = NULL;
554 struct smb_direct_io *last = NULL;
555 struct ibv_send_wr *bad_send_wr = NULL;
558 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
559 smb_direct_connection_debug_credits(c, "post_send", NULL, __location__, __func__);
560 if (c->s2r.ready == NULL) {
561 DEBUG(0,("%s:%s: none READY\n", __location__, __func__));
565 if (c->state.send_credits == 0) {
566 DEBUG(0,("%s:%s: no credits\n", __location__, __func__));
572 DEBUG(0,("%s:%s: TODO: REMOVE me ...here...\n", __location__, __func__));
577 for (io = c->s2r.ready; io != NULL; io = next) {
578 uint16_t granted = 0;
580 uint32_t data_offset = 0;
584 if (c->state.send_credits == 0) {
585 smb_direct_connection_debug_credits(c, "WAIT_FOR_POST_SEND", io, __location__, __func__);
589 c->state.send_credits -= 1;
591 if (c->state.send_credits == 0) {
592 flags |= SMB_DIRECT_RESPONSE_REQUESTED;
595 granted = c->state.receive_posted;
596 granted -= c->state.receive_credits;
597 granted = MIN(granted, c->state.receive_credit_target);
598 c->state.receive_credits += granted;
599 smb_direct_connection_debug_credits(c, "POST_SEND", io, __location__, __func__);
601 // if (c->state.receive_credits == 0) {
603 // c->state.receive_credits += granted;
606 io->send_wr.sg_list = io->sge;
607 if (io->data_length > 0) {
608 data_offset = SMB_DIRECT_DATA_OFFSET;
609 io->sge[0].length = data_offset;
610 io->sge[1].length = io->data_length;
611 io->send_wr.num_sge = 2;
613 io->sge[0].length = SMB_DIRECT_DATA_MIN_HDR_SIZE;
614 io->send_wr.num_sge = 1;
617 SSVAL(io->smbd_hdr, 0x00, c->state.send_credit_target);
618 SSVAL(io->smbd_hdr, 0x02, granted);
619 SSVAL(io->smbd_hdr, 0x04, flags);
620 SSVAL(io->smbd_hdr, 0x06, 0x0000);
621 SIVAL(io->smbd_hdr, 0x08, io->remaining_length);
622 SIVAL(io->smbd_hdr, 0x0C, data_offset);
623 SIVAL(io->smbd_hdr, 0x10, io->data_length);
624 SIVAL(io->smbd_hdr, 0x14, 0x00000000);
627 io->send_wr.next = &next->send_wr;
629 io->send_wr.next = NULL;
631 io->send_wr.next = NULL;
632 DLIST_REMOVE(c->s2r.ready, io);
633 DLIST_ADD_END(posted, io);
634 DEBUG(0,("%s:%s: POST SEND[%p] data_length[%u] remaining_length[%u]\n",
635 __location__, __FUNCTION__, io,
636 (unsigned)io->data_length, (unsigned)io->remaining_length));
638 ret = ibv_post_send(c->ibv.qp, &io->send_wr, &bad_send_wr);
641 // DLIST_CONCATENATE(c->s2r.ready, posted); // TODO: check bad_send_wr
642 status = map_nt_error_from_unix_common(errno);
643 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
644 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
649 last = DLIST_TAIL(posted);
650 last->send_wr.next = NULL;
653 //ret = ibv_post_send(c->ibv.qp, &posted->send_wr, &bad_send_wr);
656 //// DLIST_CONCATENATE(c->s2r.ready, posted); // TODO: check bad_send_wr
657 // status = map_nt_error_from_unix_common(errno);
658 // DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
659 // __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
663 DLIST_CONCATENATE(c->s2r.posted, posted);
668 static int smb_direct_connection_post_io(struct smb_direct_connection *c)
673 ret = smb_direct_connection_post_recv(c);
676 status = map_nt_error_from_unix_common(errno);
677 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
678 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
683 ret = smb_direct_connection_post_send(c);
686 status = map_nt_error_from_unix_common(errno);
687 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
688 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
695 static int smb_direct_connection_post_io(struct smb_direct_connection *c)
697 struct smb_direct_io *io = NULL;
699 bool need_keepalive = false;
701 DEBUG(0,("%s:%s: IO "
702 "s2r posted[%p] ready[%p] idle[%p] in[%p] remaining[%u] "
703 "r2s posted[%p] ready[%p] idle[%p] out[%p] remaining[%u]\n",
704 __location__, __func__,
705 c->s2r.posted, c->s2r.ready, c->s2r.idle, c->s2r.in, c->s2r.remaining_length,
706 c->r2s.posted, c->r2s.ready, c->r2s.idle, c->r2s.out, c->r2s.remaining_length));
709 ret = smb_direct_connection_post_recv(c);
712 status = map_nt_error_from_unix_common(errno);
713 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
714 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
718 if (c->state.send_immediate) {
719 need_keepalive = true;
722 if (need_keepalive && c->s2r.posted != NULL) {
723 DEBUG(0,("%s:%s: KEEP skip...(posted)\n", __location__, __func__));
724 need_keepalive = false;
727 if (c->state.receive_credits == 0) {
728 need_keepalive = true;
731 if (need_keepalive && c->state.receive_posted == 0) {
732 DEBUG(0,("%s:%s: KEEP skip...(no RECV posted)\n", __location__, __func__));
733 need_keepalive = false;
736 if (need_keepalive && c->s2r.ready != NULL) {
737 DEBUG(0,("%s:%s: KEEP skip...(ready)\n", __location__, __func__));
738 need_keepalive = false;
741 if (need_keepalive && c->s2r.idle == NULL) {
742 DEBUG(0,("%s:%s: KEEP skip...(no idle)\n", __location__, __func__));
743 need_keepalive = false;
746 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
747 if (need_keepalive) {
749 DLIST_REMOVE(c->s2r.idle, io);
752 io->remaining_length = 0;
754 DLIST_ADD_END(c->s2r.ready, io);
757 //smb_direct_connection_debug_credits(c, "POST_KEEP", io, __location__, __func__);
758 ret = smb_direct_connection_post_send(c);
761 status = map_nt_error_from_unix_common(errno);
762 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
763 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
766 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
768 if (c->state.send_immediate) {
769 if (c->s2r.posted != NULL) {
770 c->state.send_immediate = false;
776 static int smb_direct_connection_setup_readv(struct smb_direct_connection *c)
778 DEBUG(0,("%s:%s: TEVENT_FD_READABLE on\n", __location__, __func__));
779 TEVENT_FD_READABLE(c->sock.fde);
780 // TODO: immediate_event?? may skips a syscall.
784 static int smb_direct_connection_setup_writev(struct smb_direct_connection *c)
786 DEBUG(0,("%s:%s: TEVENT_FD_WRITEABLE on\n", __location__, __func__));
787 TEVENT_FD_WRITEABLE(c->sock.fde);
788 // TODO: immediate_event?? may skips a syscall.
792 struct smb_direct_connection_rdma_connect_state {
793 struct smb_direct_connection *c;
796 static int smb_direct_connection_rdma_connect_state_destructor(
797 struct smb_direct_connection_rdma_connect_state *state)
799 struct smb_direct_connection *c = state->c;
801 TALLOC_FREE(c->rdma.fde_channel);
806 static void smb_direct_connection_rdma_connect_handler(struct tevent_context *ev,
807 struct tevent_fd *fde,
811 static struct tevent_req *smb_direct_connection_rdma_connect_send(TALLOC_CTX *mem_ctx,
812 struct tevent_context *ev,
813 struct smb_direct_connection *c,
814 const struct sockaddr_storage *src,
815 const struct sockaddr_storage *dst,
816 struct tsocket_address *local_addr,
817 struct tsocket_address *remote_addr)
819 struct tevent_req *req;
820 struct smb_direct_connection_rdma_connect_state *state;
822 //struct sockaddr *src_addr = (const struct sockaddr *)src;
823 struct sockaddr *src_addr = NULL;
824 struct sockaddr_storage _dst_addr = *dst;
825 struct sockaddr *dst_addr = (struct sockaddr *)&_dst_addr;
827 set_sockaddr_port(dst_addr, 5445);
829 req = tevent_req_create(mem_ctx, &state,
830 struct smb_direct_connection_rdma_connect_state);
836 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
837 talloc_set_destructor(state, smb_direct_connection_rdma_connect_state_destructor);
839 #if RDMA_USER_CM_MAX_ABI_VERSION >= 2
840 ret = rdma_create_id(c->rdma.cm_channel,
846 ret = rdma_create_id(c->rdma.cm_channel,
851 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
853 return tevent_req_post(req, ev);
856 c->rdma.fde_channel = tevent_add_fd(ev, state,
857 c->rdma.cm_channel->fd,
859 smb_direct_connection_rdma_connect_handler,
861 if (tevent_req_nomem(c->rdma.fde_channel, req)) {
862 return tevent_req_post(req, ev);
866 ret = rdma_resolve_addr(c->rdma.cm_id,
870 NTSTATUS status = map_nt_error_from_unix_common(errno);
871 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
872 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
873 tevent_req_nterror(req, status);
874 return tevent_req_post(req, ev);
876 c->rdma.expected_event = RDMA_CM_EVENT_ADDR_RESOLVED;
881 static void smb_direct_connection_rdma_connect_handler(struct tevent_context *ev,
882 struct tevent_fd *fde,
886 struct tevent_req *req =
887 talloc_get_type_abort(private_data,
889 struct smb_direct_connection_rdma_connect_state *state =
891 struct smb_direct_connection_rdma_connect_state);
892 struct smb_direct_connection *c = state->c;
893 struct rdma_conn_param conn_param;
894 uint8_t ird_ord_hdr[8];
895 NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
899 ret = rdma_get_cm_event(c->rdma.cm_channel,
902 status = map_nt_error_from_unix_common(errno);
903 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
904 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
905 tevent_req_nterror(req, status);
910 if (c->rdma.cm_event->status != 0) {
911 errno = c->rdma.cm_event->status;
912 status = map_nt_error_from_unix_common(errno);
913 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
914 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
915 tevent_req_nterror(req, status);
919 if (c->rdma.cm_event->event != c->rdma.expected_event) {
920 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
921 __location__, __FUNCTION__, ret, errno));
925 switch (c->rdma.cm_event->event) {
926 case RDMA_CM_EVENT_ADDR_RESOLVED:
928 ret = rdma_resolve_route(c->rdma.cm_id, 5000);
930 status = map_nt_error_from_unix_common(errno);
931 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
932 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
933 tevent_req_nterror(req, status);
936 c->rdma.expected_event = RDMA_CM_EVENT_ROUTE_RESOLVED;
938 case RDMA_CM_EVENT_ROUTE_RESOLVED:
941 status = smb_direct_connection_complete_alloc(c);
942 if (!NT_STATUS_IS_OK(status)) {
943 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
944 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
945 tevent_req_nterror(req, status);
949 RSIVAL(ird_ord_hdr, 0, 16);
950 RSIVAL(ird_ord_hdr, 4, 0);
952 ZERO_STRUCT(conn_param);
953 conn_param.private_data = ird_ord_hdr;
954 conn_param.private_data_len = sizeof(ird_ord_hdr);
955 conn_param.responder_resources = 1;
956 conn_param.initiator_depth = 1;
957 conn_param.retry_count = 10;
960 ret = rdma_connect(c->rdma.cm_id, &conn_param);
962 status = map_nt_error_from_unix_common(errno);
963 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
964 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
965 tevent_req_nterror(req, status);
968 c->rdma.expected_event = RDMA_CM_EVENT_ESTABLISHED;
971 case RDMA_CM_EVENT_ESTABLISHED:
974 //DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
975 // __location__, __FUNCTION__, ret, errno));
977 c->rdma.expected_event = RDMA_CM_EVENT_DISCONNECTED;
978 TALLOC_FREE(c->rdma.fde_channel);
979 rdma_ack_cm_event(c->rdma.cm_event);
980 c->rdma.cm_event = NULL;
981 tevent_req_done(req);
984 case RDMA_CM_EVENT_ADDR_ERROR:
985 case RDMA_CM_EVENT_ROUTE_ERROR:
986 case RDMA_CM_EVENT_CONNECT_REQUEST:
987 case RDMA_CM_EVENT_CONNECT_RESPONSE:
988 case RDMA_CM_EVENT_CONNECT_ERROR:
989 case RDMA_CM_EVENT_UNREACHABLE:
990 case RDMA_CM_EVENT_REJECTED:
991 case RDMA_CM_EVENT_DISCONNECTED:
992 case RDMA_CM_EVENT_DEVICE_REMOVAL:
993 case RDMA_CM_EVENT_MULTICAST_JOIN:
994 case RDMA_CM_EVENT_MULTICAST_ERROR:
995 case RDMA_CM_EVENT_ADDR_CHANGE:
996 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
997 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
998 DEBUG(0,("%s:%s: event[%d] ret[%d] errno[%d] status[%s]\n",
999 __location__, __FUNCTION__,
1000 c->rdma.cm_event->event, ret, errno, nt_errstr(status)));
1001 tevent_req_nterror(req, status);
1005 rdma_ack_cm_event(c->rdma.cm_event);
1006 c->rdma.cm_event = NULL;
1009 static NTSTATUS smb_direct_connection_rdma_connect_recv(struct tevent_req *req)
1011 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1012 return tevent_req_simple_recv_ntstatus(req);
1015 struct smb_direct_connection_negotiate_connect_state {
1016 struct smb_direct_connection *c;
1019 struct ibv_sge sge[1];
1020 struct ibv_send_wr wr;
1023 uint8_t buffer[0x14];
1025 struct ibv_sge sge[1];
1026 struct ibv_send_wr wr;
1029 uint8_t buffer[512];//0x20];
1031 struct ibv_sge sge[1];
1032 struct ibv_recv_wr wr;
1037 static int smb_direct_connection_negotiate_connect_destructor(
1038 struct smb_direct_connection_negotiate_connect_state *state)
1040 struct smb_direct_connection *c = state->c;
1042 TALLOC_FREE(c->ibv.fde_channel);
1043 TALLOC_FREE(c->rdma.fde_channel);
1048 static void smb_direct_connection_negotiate_connect_rdma_handler(struct tevent_context *ev,
1049 struct tevent_fd *fde,
1051 void *private_data);
1052 static void smb_direct_connection_negotiate_connect_ibv_handler(struct tevent_context *ev,
1053 struct tevent_fd *fde,
1055 void *private_data);
1057 static struct tevent_req *smb_direct_connection_negotiate_connect_send(TALLOC_CTX *mem_ctx,
1058 struct tevent_context *ev,
1059 struct smb_direct_connection *c)
1061 struct tevent_req *req;
1062 struct smb_direct_connection_negotiate_connect_state *state;
1063 struct smb_direct_io *rdma_read = NULL;
1064 struct ibv_send_wr *bad_send_wr = NULL;
1068 req = tevent_req_create(mem_ctx, &state,
1069 struct smb_direct_connection_negotiate_connect_state);
1075 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1077 talloc_set_destructor(state, smb_direct_connection_negotiate_connect_destructor);
1079 c->rdma.fde_channel = tevent_add_fd(ev, state,
1080 c->rdma.cm_channel->fd,
1082 smb_direct_connection_negotiate_connect_rdma_handler,
1084 if (tevent_req_nomem(c->rdma.fde_channel, req)) {
1085 return tevent_req_post(req, ev);
1087 c->ibv.fde_channel = tevent_add_fd(ev, state,
1088 c->ibv.comp_channel->fd,
1090 smb_direct_connection_negotiate_connect_ibv_handler,
1092 if (tevent_req_nomem(c->ibv.fde_channel, req)) {
1093 return tevent_req_post(req, ev);
1096 rdma_read = smb_direct_io_create(c);
1097 if (tevent_req_nomem(rdma_read, req)) {
1098 return tevent_req_post(req, ev);
1100 rdma_read->sge[0].addr = 1;
1101 rdma_read->sge[0].length = 0;
1102 rdma_read->sge[0].lkey = 1;
1103 rdma_read->send_wr.opcode = IBV_WR_RDMA_READ;
1104 rdma_read->send_wr.send_flags = IBV_SEND_SIGNALED;
1105 rdma_read->send_wr.sg_list = rdma_read->sge;
1106 rdma_read->send_wr.num_sge = 1;
1107 rdma_read->send_wr.wr.rdma.rkey = 1;
1108 rdma_read->send_wr.wr.rdma.remote_addr = 1;
1111 ret = ibv_post_send(c->ibv.qp, &rdma_read->send_wr, &bad_send_wr);
1113 status = map_nt_error_from_unix_common(errno);
1114 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1115 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1116 tevent_req_nterror(req, status);
1117 return tevent_req_post(req, ev);
1120 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
1121 __location__, __FUNCTION__, ret, errno));
1125 static void smb_direct_connection_negotiate_connect_rdma_handler(struct tevent_context *ev,
1126 struct tevent_fd *fde,
1130 struct tevent_req *req =
1131 talloc_get_type_abort(private_data,
1133 struct smb_direct_connection_negotiate_connect_state *state =
1134 tevent_req_data(req,
1135 struct smb_direct_connection_negotiate_connect_state);
1136 struct smb_direct_connection *c = state->c;
1137 NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
1141 ret = rdma_get_cm_event(c->rdma.cm_channel,
1144 status = map_nt_error_from_unix_common(errno);
1145 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1146 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1147 tevent_req_nterror(req, status);
1151 if (c->rdma.cm_event->status != 0) {
1152 errno = c->rdma.cm_event->status;
1153 status = map_nt_error_from_unix_common(errno);
1154 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1155 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1156 tevent_req_nterror(req, status);
1160 switch (c->rdma.cm_event->event) {
1161 case RDMA_CM_EVENT_DISCONNECTED:
1162 status = NT_STATUS_CONNECTION_DISCONNECTED;
1163 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1164 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1165 tevent_req_nterror(req, status);
1167 case RDMA_CM_EVENT_ADDR_RESOLVED:
1168 case RDMA_CM_EVENT_ADDR_ERROR:
1169 case RDMA_CM_EVENT_ROUTE_RESOLVED:
1170 case RDMA_CM_EVENT_ESTABLISHED:
1171 case RDMA_CM_EVENT_ROUTE_ERROR:
1172 case RDMA_CM_EVENT_CONNECT_REQUEST:
1173 case RDMA_CM_EVENT_CONNECT_RESPONSE:
1174 case RDMA_CM_EVENT_CONNECT_ERROR:
1175 case RDMA_CM_EVENT_UNREACHABLE:
1176 case RDMA_CM_EVENT_REJECTED:
1177 case RDMA_CM_EVENT_DEVICE_REMOVAL:
1178 case RDMA_CM_EVENT_MULTICAST_JOIN:
1179 case RDMA_CM_EVENT_MULTICAST_ERROR:
1180 case RDMA_CM_EVENT_ADDR_CHANGE:
1181 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
1182 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1183 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1184 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1185 tevent_req_nterror(req, status);
1189 status = NT_STATUS_INTERNAL_ERROR;
1190 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1191 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1192 tevent_req_nterror(req, status);
1195 static void smb_direct_connection_negotiate_connect_ibv_handler(struct tevent_context *ev,
1196 struct tevent_fd *fde,
1200 struct tevent_req *req =
1201 talloc_get_type_abort(private_data,
1203 struct smb_direct_connection_negotiate_connect_state *state =
1204 tevent_req_data(req,
1205 struct smb_direct_connection_negotiate_connect_state);
1206 struct smb_direct_connection *c = state->c;
1207 struct ibv_cq *cq = NULL;
1208 void *cq_context = NULL;
1209 NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
1212 uint16_t credits_requested;
1213 uint16_t credits_granted;
1214 uint32_t max_read_write_size;
1215 uint32_t preferred_send_size;
1216 uint32_t max_receive_size;
1217 uint32_t max_fragmented_size;
1219 struct smb_direct_io *io = NULL;
1220 struct smb_direct_io *neg_send = NULL;
1221 struct smb_direct_io *neg_recv = NULL;
1222 struct ibv_recv_wr *bad_recv_wr = NULL;
1223 struct ibv_send_wr *bad_send_wr = NULL;
1226 ret = ibv_get_cq_event(c->ibv.comp_channel,
1229 status = map_nt_error_from_unix_common(errno);
1230 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1231 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1232 tevent_req_nterror(req, status);
1236 ibv_ack_cq_events(cq, 1);
1238 if (cq_context != c) {
1239 status = NT_STATUS_INTERNAL_ERROR;
1240 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1241 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1242 tevent_req_nterror(req, status);
1247 ret = ibv_req_notify_cq(cq, 0);
1249 status = map_nt_error_from_unix_common(errno);
1250 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1251 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1252 tevent_req_nterror(req, status);
1258 ret = ibv_poll_cq(cq, 1, &wc);
1260 status = map_nt_error_from_unix_common(errno);
1261 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1262 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1263 tevent_req_nterror(req, status);
1268 if (wc.status == IBV_WC_WR_FLUSH_ERR) {
1269 //errno = wc.status;
1270 status = map_nt_error_from_unix_common(wc.status);//errno);
1271 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1272 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1273 TALLOC_FREE(c->ibv.fde_channel);
1274 TALLOC_FREE(c->rdma.fde_channel);
1275 smb_direct_connection_negotiate_connect_rdma_handler(ev, fde, flags, private_data);
1278 if (wc.status != IBV_WC_SUCCESS) {
1280 status = map_nt_error_from_unix_common(errno);
1281 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s] ibv[%s]\n",
1282 __location__, __FUNCTION__, ret, errno, nt_errstr(status),
1283 ibv_wc_status_str(wc.status)));
1284 tevent_req_nterror(req, status);
1288 io = talloc_get_type_abort((void *)(uintptr_t)wc.wr_id,
1289 struct smb_direct_io);
1291 switch (wc.opcode) {
1292 case IBV_WC_RDMA_READ:
1293 DEBUG(0,("%s:%s: GOT RDMA_READ[%p] next[%p] ret[%d] errno[%d]\n",
1294 __location__, __FUNCTION__, io, io->send_wr.next, ret, errno));
1297 neg_recv = smb_direct_io_create(c);
1298 if (tevent_req_nomem(neg_recv, req)) {
1301 //neg_recv->sge[0].addr = (uint64_t) (uintptr_t) neg_recv->data;
1302 //neg_recv->sge[0].length = sizeof(neg_recv->data);
1303 //neg_recv->sge[0].lkey = neg_recv->data_mr->lkey;
1304 neg_recv->recv_wr.sg_list = &neg_recv->sge[1];
1305 neg_recv->recv_wr.num_sge = 1;
1307 neg_send = smb_direct_io_create(c);
1308 if (tevent_req_nomem(neg_send, req)) {
1311 SSVAL(neg_send->data, 0x00, 0x0100);
1312 SSVAL(neg_send->data, 0x02, 0x0100);
1313 SSVAL(neg_send->data, 0x04, 0x0000);
1314 SSVAL(neg_send->data, 0x06, c->state.send_credit_target);
1315 SIVAL(neg_send->data, 0x08, c->state.max_send_size);
1316 SIVAL(neg_send->data, 0x0C, c->state.max_receive_size);
1317 SIVAL(neg_send->data, 0x10, c->state.max_fragmented_size);
1318 //neg_send->sge[0].addr = (uint64_t) (uintptr_t) neg_send->data;
1319 neg_send->sge[1].length = 0x14;
1320 //neg_send->sge[0].lkey = neg_send->data_mr->lkey;
1321 //neg_send->send_wr.opcode = IBV_WR_SEND;
1322 //neg_send->send_wr.send_flags = IBV_SEND_SIGNALED;
1323 neg_send->send_wr.sg_list = &neg_send->sge[1];
1324 neg_send->send_wr.num_sge = 1;
1327 ret = ibv_post_recv(c->ibv.qp, &neg_recv->recv_wr, &bad_recv_wr);
1329 status = map_nt_error_from_unix_common(errno);
1330 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1331 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1332 tevent_req_nterror(req, status);
1337 ret = ibv_post_send(c->ibv.qp, &neg_send->send_wr, &bad_send_wr);
1339 status = map_nt_error_from_unix_common(errno);
1340 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1341 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1342 tevent_req_nterror(req, status);
1346 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
1347 __location__, __FUNCTION__, ret, errno));
1351 DEBUG(0,("%s:%s: GOT SEND[%p] next[%p] ret[%d] errno[%d]\n",
1352 __location__, __FUNCTION__, io, io->send_wr.next, ret, errno));
1356 DEBUG(0,("%s:%s: GOT RECV[%p] next[%p] ret[%d] errno[%d]\n",
1357 __location__, __FUNCTION__, io, io->recv_wr.next, ret, errno));
1358 //dump_data(0, io->data, wc.byte_len);
1359 if (wc.byte_len < 0x20) {
1360 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1361 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1362 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1363 tevent_req_nterror(req, status);
1366 if (SVAL(io->data, 0x00) != 0x0100) {
1367 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1368 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1369 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1370 tevent_req_nterror(req, status);
1373 if (SVAL(io->data, 0x02) != 0x0100) {
1374 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1375 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1376 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1377 tevent_req_nterror(req, status);
1380 if (SVAL(io->data, 0x04) != 0x0100) {
1381 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1382 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1383 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1384 tevent_req_nterror(req, status);
1387 credits_requested = SVAL(io->data, 0x08);
1388 if (credits_requested == 0) {
1389 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1390 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1391 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1392 tevent_req_nterror(req, status);
1395 credits_granted = SVAL(io->data, 0x0A);
1396 if (credits_granted == 0) {
1397 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1398 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1399 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1400 tevent_req_nterror(req, status);
1403 status = NT_STATUS(IVAL(io->data, 0x0C));
1404 if (!NT_STATUS_IS_OK(status)) {
1405 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1406 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1407 tevent_req_nterror(req, status);
1410 max_read_write_size = IVAL(io->data, 0x10);
1411 preferred_send_size = IVAL(io->data, 0x14);
1412 if (preferred_send_size > c->state.max_receive_size) {
1413 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1414 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1415 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1416 tevent_req_nterror(req, status);
1419 max_receive_size = IVAL(io->data, 0x18);
1420 if (max_receive_size < 0x80) {
1421 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1422 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1423 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1424 tevent_req_nterror(req, status);
1427 max_fragmented_size = IVAL(io->data, 0x1C);
1428 if (max_fragmented_size < 0x20000) {
1429 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1430 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1431 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1432 tevent_req_nterror(req, status);
1436 c->state.receive_credit_target = credits_requested;
1438 tmp = c->state.max_receive_size;
1439 tmp = MIN(tmp, preferred_send_size);
1440 tmp = MAX(tmp, 128);
1441 c->state.max_receive_size = tmp;
1443 tmp = c->state.max_send_size;
1444 tmp = MIN(tmp, max_receive_size);
1445 c->state.max_send_size = tmp;
1447 tmp = MIN(1048576, max_read_write_size);
1448 c->state.max_read_write_size = tmp;
1450 tmp = c->state.max_fragmented_size;
1451 tmp = MIN(tmp, max_fragmented_size);
1452 c->state.max_fragmented_size = tmp;
1454 c->state.send_credits = credits_granted;
1456 TALLOC_FREE(c->ibv.fde_channel);
1457 TALLOC_FREE(c->rdma.fde_channel);
1459 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
1460 __location__, __FUNCTION__, ret, errno));
1465 ret = smb_direct_connection_post_io(c);
1467 status = map_nt_error_from_unix_common(errno);
1468 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1469 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1470 tevent_req_nterror(req, status);
1474 tevent_req_done(req);
1477 case IBV_WC_RDMA_WRITE:
1479 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1480 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1481 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1482 tevent_req_nterror(req, status);
1487 static NTSTATUS smb_direct_connection_negotiate_connect_recv(struct tevent_req *req)
1489 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1490 return tevent_req_simple_recv_ntstatus(req);
1493 struct smb_direct_connection_connect_state {
1494 struct tevent_context *ev;
1495 struct smb_direct_connection *c;
1498 static void smb_direct_connection_connect_done_rdma(struct tevent_req *subreq);
1499 static void smb_direct_connection_connect_done_negotiate(struct tevent_req *subreq);
1501 struct tevent_req *smb_direct_connection_connect_send(TALLOC_CTX *mem_ctx,
1502 struct tevent_context *ev,
1503 struct smb_direct_connection *c,
1504 const struct sockaddr_storage *src,
1505 const struct sockaddr_storage *dst)
1507 struct tevent_req *req = NULL;
1508 struct smb_direct_connection_connect_state *state = NULL;
1509 struct tevent_req *subreq = NULL;
1511 req = tevent_req_create(mem_ctx, &state,
1512 struct smb_direct_connection_connect_state);
1519 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1520 subreq = smb_direct_connection_rdma_connect_send(state, ev, c, src, dst, NULL, NULL);
1521 if (tevent_req_nomem(subreq, req)) {
1522 return tevent_req_post(req, ev);
1524 tevent_req_set_callback(subreq,
1525 smb_direct_connection_connect_done_rdma,
1528 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1532 static void smb_direct_connection_connect_done_rdma(struct tevent_req *subreq)
1534 struct tevent_req *req =
1535 tevent_req_callback_data(subreq,
1537 struct smb_direct_connection_connect_state *state =
1538 tevent_req_data(req,
1539 struct smb_direct_connection_connect_state);
1542 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1543 status = smb_direct_connection_rdma_connect_recv(subreq);
1544 TALLOC_FREE(subreq);
1545 if (tevent_req_nterror(req, status)) {
1549 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1550 subreq = smb_direct_connection_negotiate_connect_send(state, state->ev, state->c);
1551 if (tevent_req_nomem(subreq, req)) {
1554 tevent_req_set_callback(subreq,
1555 smb_direct_connection_connect_done_negotiate,
1557 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1560 static void smb_direct_connection_connect_done_negotiate(struct tevent_req *subreq)
1562 struct tevent_req *req =
1563 tevent_req_callback_data(subreq,
1567 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1568 status = smb_direct_connection_negotiate_connect_recv(subreq);
1569 TALLOC_FREE(subreq);
1570 if (tevent_req_nterror(req, status)) {
1574 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1575 tevent_req_done(req);
1578 NTSTATUS smb_direct_connection_connect_recv(struct tevent_req *req, int *fd)
1580 struct smb_direct_connection_connect_state *state =
1581 tevent_req_data(req,
1582 struct smb_direct_connection_connect_state);
1583 struct smb_direct_connection *c = state->c;
1588 if (tevent_req_is_nterror(req, &status)) {
1589 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1590 tevent_req_received(req);
1594 DEBUG(0,("%s:%s: sock.fd[%d] sock.tmp_fd[%d]\n",
1595 __location__, __func__, c->sock.fd, c->sock.tmp_fd));
1596 *fd = c->sock.tmp_fd;
1597 c->sock.tmp_fd = -1;
1598 tevent_req_received(req);
1599 return NT_STATUS_OK;
1602 struct smb_direct_connection_negotiate_accept_state {
1603 struct smb_direct_connection *c;
1606 static int smb_direct_connection_negotiate_accept_destructor(
1607 struct smb_direct_connection_negotiate_accept_state *state)
1609 struct smb_direct_connection *c = state->c;
1611 TALLOC_FREE(c->ibv.fde_channel);
1612 TALLOC_FREE(c->rdma.fde_channel);
1617 static void smb_direct_connection_negotiate_accept_rdma_handler(struct tevent_context *ev,
1618 struct tevent_fd *fde,
1620 void *private_data);
1621 static void smb_direct_connection_negotiate_accept_ibv_handler(struct tevent_context *ev,
1622 struct tevent_fd *fde,
1624 void *private_data);
1626 static struct tevent_req *smb_direct_connection_negotiate_accept_send(
1627 TALLOC_CTX *mem_ctx,
1628 struct tevent_context *ev,
1629 struct smb_direct_connection **_c)
1631 struct tevent_req *req = NULL;
1632 struct smb_direct_connection_negotiate_accept_state *state = NULL;
1633 struct smb_direct_connection *c = NULL;
1634 struct smb_direct_io *neg_recv = NULL;
1635 struct ibv_recv_wr *bad_recv_wr = NULL;
1639 req = tevent_req_create(mem_ctx, &state,
1640 struct smb_direct_connection_negotiate_accept_state);
1644 c = talloc_move(state, _c);
1646 talloc_set_destructor(state, smb_direct_connection_negotiate_accept_destructor);
1648 c->rdma.fde_channel = tevent_add_fd(ev, state,
1649 c->rdma.cm_channel->fd,
1651 smb_direct_connection_negotiate_accept_rdma_handler,
1653 if (tevent_req_nomem(c->rdma.fde_channel, req)) {
1654 return tevent_req_post(req, ev);
1656 c->ibv.fde_channel = tevent_add_fd(ev, state,
1657 c->ibv.comp_channel->fd,
1659 smb_direct_connection_negotiate_accept_ibv_handler,
1661 if (tevent_req_nomem(c->ibv.fde_channel, req)) {
1662 return tevent_req_post(req, ev);
1665 neg_recv = smb_direct_io_create(c);
1666 if (tevent_req_nomem(neg_recv, req)) {
1667 return tevent_req_post(req, ev);
1669 //neg_recv->sge[0].addr = (uint64_t) (uintptr_t) neg_recv->data;
1670 neg_recv->sge[1].length = sizeof(neg_recv->data);
1671 //neg_recv->sge[0].lkey = neg_recv->data_mr->lkey;
1672 neg_recv->recv_wr.sg_list = &neg_recv->sge[1];
1673 neg_recv->recv_wr.num_sge = 1;
1675 ret = ibv_post_recv(c->ibv.qp, &neg_recv->recv_wr, &bad_recv_wr);
1677 status = map_nt_error_from_unix_common(errno);
1678 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1679 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1680 tevent_req_nterror(req, status);
1681 return tevent_req_post(req, ev);
1684 ret = rdma_accept(c->rdma.cm_id, &c->rdma.conn_param);
1686 DBG_ERR("rdma_accept failed [%s] result [%d]\n", strerror(errno), ret);
1687 status = map_nt_error_from_unix_common(errno);
1688 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1689 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1690 tevent_req_nterror(req, status);
1691 return tevent_req_post(req, ev);
1697 static void smb_direct_connection_negotiate_accept_rdma_handler(
1698 struct tevent_context *ev,
1699 struct tevent_fd *fde,
1703 struct tevent_req *req =
1704 talloc_get_type_abort(private_data,
1706 struct smb_direct_connection_negotiate_accept_state *state =
1707 tevent_req_data(req,
1708 struct smb_direct_connection_negotiate_accept_state);
1709 struct smb_direct_connection *c = state->c;
1710 NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
1713 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1716 ret = rdma_get_cm_event(c->rdma.cm_channel,
1719 status = map_nt_error_from_unix_common(errno);
1720 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1721 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1722 tevent_req_nterror(req, status);
1726 if (c->rdma.cm_event->status != 0) {
1727 errno = c->rdma.cm_event->status;
1728 status = map_nt_error_from_unix_common(errno);
1729 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1730 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1731 tevent_req_nterror(req, status);
1735 switch (c->rdma.cm_event->event) {
1736 case RDMA_CM_EVENT_ESTABLISHED:
1737 DEBUG(0,("RDMA conn established [%p]\n", c));
1738 rdma_ack_cm_event(c->rdma.cm_event);
1739 c->rdma.cm_event = NULL;
1741 case RDMA_CM_EVENT_DISCONNECTED:
1742 status = NT_STATUS_CONNECTION_DISCONNECTED;
1743 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1744 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1745 tevent_req_nterror(req, status);
1747 case RDMA_CM_EVENT_ADDR_RESOLVED:
1748 case RDMA_CM_EVENT_ADDR_ERROR:
1749 case RDMA_CM_EVENT_ROUTE_RESOLVED:
1750 case RDMA_CM_EVENT_ROUTE_ERROR:
1751 case RDMA_CM_EVENT_CONNECT_REQUEST:
1752 case RDMA_CM_EVENT_CONNECT_RESPONSE:
1753 case RDMA_CM_EVENT_CONNECT_ERROR:
1754 case RDMA_CM_EVENT_UNREACHABLE:
1755 case RDMA_CM_EVENT_REJECTED:
1756 case RDMA_CM_EVENT_DEVICE_REMOVAL:
1757 case RDMA_CM_EVENT_MULTICAST_JOIN:
1758 case RDMA_CM_EVENT_MULTICAST_ERROR:
1759 case RDMA_CM_EVENT_ADDR_CHANGE:
1760 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
1761 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1762 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s] event [%d] "
1763 "RDMA_CM_EVENT_REJECTED [%d]\n",
1764 __location__, __FUNCTION__, ret, errno, nt_errstr(status),
1765 c->rdma.cm_event->event, RDMA_CM_EVENT_REJECTED));
1766 tevent_req_nterror(req, status);
1770 status = NT_STATUS_INTERNAL_ERROR;
1771 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1772 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1773 tevent_req_nterror(req, status);
1776 static void smb_direct_connection_negotiate_accept_ibv_handler(
1777 struct tevent_context *ev,
1778 struct tevent_fd *fde,
1782 struct tevent_req *req =
1783 talloc_get_type_abort(private_data,
1785 struct smb_direct_connection_negotiate_accept_state *state =
1786 tevent_req_data(req,
1787 struct smb_direct_connection_negotiate_accept_state);
1788 struct smb_direct_connection *c = state->c;
1789 struct ibv_cq *cq = NULL;
1790 void *cq_context = NULL;
1791 NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
1794 uint16_t credits_requested;
1795 uint16_t credits_granted;
1796 uint32_t preferred_send_size;
1797 uint32_t max_receive_size;
1798 uint32_t max_fragmented_size;
1800 struct smb_direct_io *io = NULL;
1801 struct smb_direct_io *neg_send = NULL;
1802 struct ibv_send_wr *bad_send_wr = NULL;
1804 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1807 ret = ibv_get_cq_event(c->ibv.comp_channel,
1810 status = map_nt_error_from_unix_common(errno);
1811 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1812 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1813 tevent_req_nterror(req, status);
1817 ibv_ack_cq_events(cq, 1);
1819 if (cq_context != c) {
1820 status = NT_STATUS_INTERNAL_ERROR;
1821 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1822 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1823 tevent_req_nterror(req, status);
1828 ret = ibv_req_notify_cq(cq, 0);
1830 status = map_nt_error_from_unix_common(errno);
1831 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1832 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1833 tevent_req_nterror(req, status);
1839 ret = ibv_poll_cq(cq, 1, &wc);
1841 status = map_nt_error_from_unix_common(errno);
1842 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1843 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1844 tevent_req_nterror(req, status);
1849 if (wc.status == IBV_WC_WR_FLUSH_ERR) {
1850 //errno = wc.status;
1851 status = map_nt_error_from_unix_common(wc.status);//errno);
1852 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1853 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1854 TALLOC_FREE(c->ibv.fde_channel);
1855 TALLOC_FREE(c->rdma.fde_channel);
1856 smb_direct_connection_negotiate_connect_rdma_handler(ev, fde, flags, private_data);
1859 if (wc.status != IBV_WC_SUCCESS) {
1861 status = map_nt_error_from_unix_common(errno);
1862 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s] ibv[%s]\n",
1863 __location__, __FUNCTION__, ret, errno, nt_errstr(status),
1864 ibv_wc_status_str(wc.status)));
1865 tevent_req_nterror(req, status);
1869 io = talloc_get_type_abort((void *)(uintptr_t)wc.wr_id,
1870 struct smb_direct_io);
1872 switch (wc.opcode) {
1874 DEBUG(0,("%s:%s: GOT SEND[%p] next[%p] ret[%d] errno[%d]\n",
1875 __location__, __FUNCTION__, io, io->send_wr.next, ret, errno));
1878 case IBV_WC_RDMA_READ:
1879 DEBUG(0,("%s:%s: GOT RDMA_READ[%p] next[%p] ret[%d] errno[%d]\n",
1880 __location__, __FUNCTION__, io, io->send_wr.next, ret, errno));
1884 DEBUG(0,("%s:%s: GOT RECV[%p] next[%p] ret[%d] errno[%d]\n",
1885 __location__, __FUNCTION__, io, io->recv_wr.next, ret, errno));
1886 //dump_data(0, io->data, wc.byte_len);
1887 if (wc.byte_len < 0x14) {
1888 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1889 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1890 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1891 tevent_req_nterror(req, status);
1894 if (SVAL(io->data, 0x00) != 0x0100) {
1895 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1896 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1897 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1898 tevent_req_nterror(req, status);
1901 if (SVAL(io->data, 0x02) != 0x0100) {
1902 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1903 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1904 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1905 tevent_req_nterror(req, status);
1909 credits_requested = SVAL(io->data, 0x06);
1910 if (credits_requested == 0) {
1911 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1912 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1913 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1914 tevent_req_nterror(req, status);
1918 preferred_send_size = IVAL(io->data, 0x08);
1919 if (preferred_send_size > c->state.max_receive_size) {
1920 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1921 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1922 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1923 tevent_req_nterror(req, status);
1927 max_receive_size = IVAL(io->data, 0x0c);
1928 if (max_receive_size < 0x80) {
1929 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1930 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1931 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1932 tevent_req_nterror(req, status);
1936 max_fragmented_size = IVAL(io->data, 0x10);
1937 if (max_fragmented_size < 0x20000) {
1938 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1939 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1940 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1941 tevent_req_nterror(req, status);
1945 c->state.receive_credit_target = credits_requested;
1947 tmp = c->state.max_receive_size;
1948 tmp = MIN(tmp, preferred_send_size);
1949 tmp = MAX(tmp, 128);
1950 c->state.max_receive_size = tmp;
1952 tmp = c->state.max_send_size;
1953 tmp = MIN(tmp, max_receive_size);
1954 c->state.max_send_size = tmp;
1956 TALLOC_FREE(c->ibv.fde_channel);
1957 TALLOC_FREE(c->rdma.fde_channel);
1959 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
1960 __location__, __FUNCTION__, ret, errno));
1965 ret = smb_direct_connection_post_io(c);
1967 status = map_nt_error_from_unix_common(errno);
1968 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1969 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1970 tevent_req_nterror(req, status);
1974 neg_send = c->s2r.idle;
1975 DLIST_REMOVE(c->s2r.idle, neg_send);
1977 credits_granted = c->state.receive_posted;
1978 credits_granted -= c->state.receive_credits;
1979 credits_granted = MIN(credits_granted, c->state.receive_credit_target);
1980 c->state.receive_credits += credits_granted;
1982 SSVAL(neg_send->data, 0x00, 0x0100);
1983 SSVAL(neg_send->data, 0x02, 0x0100);
1984 SSVAL(neg_send->data, 0x04, 0x0100);
1985 SSVAL(neg_send->data, 0x06, 0x0000);
1986 SSVAL(neg_send->data, 0x08, c->state.send_credit_target);
1987 SSVAL(neg_send->data, 0x0a, credits_granted);
1988 SIVAL(neg_send->data, 0x0c, NT_STATUS_V(NT_STATUS_OK));
1989 SIVAL(neg_send->data, 0x10, c->state.max_read_write_size);
1990 SIVAL(neg_send->data, 0x14, c->state.max_send_size);
1991 SIVAL(neg_send->data, 0x18, c->state.max_receive_size);
1992 SIVAL(neg_send->data, 0x1c, c->state.max_fragmented_size);
1994 neg_send->sge[1].length = 0x20;
1995 neg_send->send_wr.sg_list = &neg_send->sge[1];
1996 neg_send->send_wr.num_sge = 1;
1998 smb_direct_connection_debug_credits(c, "NEGOTIATE_ACCEPT", NULL, __location__, __FUNCTION__);
2000 ret = ibv_post_send(c->ibv.qp, &neg_send->send_wr, &bad_send_wr);
2002 status = map_nt_error_from_unix_common(errno);
2003 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2004 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2005 tevent_req_nterror(req, status);
2008 tevent_req_done(req);
2011 case IBV_WC_RDMA_WRITE:
2013 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
2014 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2015 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2016 tevent_req_nterror(req, status);
2021 static NTSTATUS smb_direct_connection_negotiate_accept_recv(
2022 struct tevent_req *req,
2023 TALLOC_CTX *mem_ctx,
2024 struct smb_direct_connection **_c)
2026 struct smb_direct_connection_negotiate_accept_state *state =
2027 tevent_req_data(req,
2028 struct smb_direct_connection_negotiate_accept_state);
2031 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2035 if (tevent_req_is_nterror(req, &status)) {
2036 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2037 tevent_req_received(req);
2041 TALLOC_FREE(state->c->ibv.fde_channel);
2042 TALLOC_FREE(state->c->rdma.fde_channel);
2043 talloc_set_destructor(state, NULL);
2045 *_c = talloc_move(mem_ctx, &state->c);
2047 tevent_req_received(req);
2048 return NT_STATUS_OK;
2051 static void smb_direct_connection_disconnect(struct smb_direct_connection *c,
2054 if (NT_STATUS_IS_OK(status)) {
2055 status = NT_STATUS_UNEXPECTED_NETWORK_ERROR;
2058 smb_direct_connection_destructor(c);
2061 static void smb_direct_connection_rdma_handler(struct tevent_context *ev,
2062 struct tevent_fd *fde,
2066 struct smb_direct_connection *c =
2067 talloc_get_type_abort(private_data,
2068 struct smb_direct_connection);
2069 NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
2074 ret = rdma_get_cm_event(c->rdma.cm_channel,
2077 status = map_nt_error_from_unix_common(errno);
2078 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2079 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2080 smb_direct_connection_disconnect(c, status);
2084 if (c->rdma.cm_event->status != 0) {
2085 errno = c->rdma.cm_event->status;
2086 status = map_nt_error_from_unix_common(errno);
2087 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2088 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2089 smb_direct_connection_disconnect(c, status);
2093 switch (c->rdma.cm_event->event) {
2094 case RDMA_CM_EVENT_DISCONNECTED:
2095 status = NT_STATUS_CONNECTION_DISCONNECTED;
2096 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2097 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2098 smb_direct_connection_disconnect(c, status);
2100 case RDMA_CM_EVENT_ADDR_RESOLVED:
2101 case RDMA_CM_EVENT_ADDR_ERROR:
2102 case RDMA_CM_EVENT_ROUTE_RESOLVED:
2103 case RDMA_CM_EVENT_ESTABLISHED:
2104 case RDMA_CM_EVENT_ROUTE_ERROR:
2105 case RDMA_CM_EVENT_CONNECT_REQUEST:
2106 case RDMA_CM_EVENT_CONNECT_RESPONSE:
2107 case RDMA_CM_EVENT_CONNECT_ERROR:
2108 case RDMA_CM_EVENT_UNREACHABLE:
2109 case RDMA_CM_EVENT_REJECTED:
2110 case RDMA_CM_EVENT_DEVICE_REMOVAL:
2111 case RDMA_CM_EVENT_MULTICAST_JOIN:
2112 case RDMA_CM_EVENT_MULTICAST_ERROR:
2113 case RDMA_CM_EVENT_ADDR_CHANGE:
2114 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
2115 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
2116 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2117 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2118 smb_direct_connection_disconnect(c, status);
2122 status = NT_STATUS_INTERNAL_ERROR;
2123 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2124 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2125 smb_direct_connection_disconnect(c, status);
2128 static void smb_direct_connection_ibv_handler(struct tevent_context *ev,
2129 struct tevent_fd *fde,
2133 struct smb_direct_connection *c =
2134 talloc_get_type_abort(private_data,
2135 struct smb_direct_connection);
2136 struct ibv_cq *cq = NULL;
2137 void *cq_context = NULL;
2138 NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
2141 uint16_t credits_requested;
2142 uint16_t credits_granted;
2143 uint32_t send_credits;
2145 uint32_t data_offset;
2146 struct smb_direct_io *io = NULL;
2149 ret = ibv_get_cq_event(c->ibv.comp_channel,
2152 status = map_nt_error_from_unix_common(errno);
2153 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2154 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2155 smb_direct_connection_disconnect(c, status);
2159 ibv_ack_cq_events(cq, 1);
2161 if (cq_context != c) {
2162 status = NT_STATUS_INTERNAL_ERROR;
2163 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2164 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2165 smb_direct_connection_disconnect(c, status);
2170 ret = ibv_req_notify_cq(cq, 0);
2172 status = map_nt_error_from_unix_common(errno);
2173 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2174 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2175 smb_direct_connection_disconnect(c, status);
2181 DEBUG(0,("%s:%s: try[%d] ret[%d] errno[%d] status[%s]\n",
2182 __location__, __FUNCTION__, try, ret, errno, nt_errstr(status)));
2185 ret = ibv_poll_cq(cq, 1, &wc);
2186 if (ret == 0 && try > 1) {
2188 * The queue is empty...
2193 status = map_nt_error_from_unix_common(errno);
2194 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2195 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2196 smb_direct_connection_disconnect(c, status);
2201 if (wc.status == IBV_WC_WR_FLUSH_ERR) {
2203 status = map_nt_error_from_unix_common(errno);
2204 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s] ibv[%s]\n",
2205 __location__, __FUNCTION__, ret, errno, nt_errstr(status),
2206 ibv_wc_status_str(wc.status)));
2207 TALLOC_FREE(c->ibv.fde_channel);
2208 TALLOC_FREE(c->rdma.fde_channel);
2209 smb_direct_connection_rdma_handler(ev, fde, 0 /* flags */, private_data);
2212 if (wc.status != IBV_WC_SUCCESS) {
2214 status = map_nt_error_from_unix_common(errno);
2215 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s] ibv[%s]\n",
2216 __location__, __FUNCTION__, ret, errno, nt_errstr(status),
2217 ibv_wc_status_str(wc.status)));
2218 smb_direct_connection_disconnect(c, status);
2222 io = talloc_get_type_abort((void *)(uintptr_t)wc.wr_id,
2223 struct smb_direct_io);
2225 switch (wc.opcode) {
2227 DEBUG(0,("%s:%s: GOT SEND[%p] data_length[%u] remaining_length[%u] ret[%d] errno[%d]\n",
2228 __location__, __FUNCTION__, io,
2229 (unsigned)io->data_length, (unsigned)io->remaining_length, ret, errno));
2230 DLIST_REMOVE(c->s2r.posted, io);
2231 DLIST_ADD_END(c->s2r.idle, io);
2234 ret = smb_direct_connection_setup_readv(c);
2236 status = map_nt_error_from_unix_common(errno);
2237 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2238 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2239 smb_direct_connection_disconnect(c, status);
2246 DEBUG(0,("%s:%s: GOT RECV[%p] ret[%d] errno[%d]\n",
2247 __location__, __FUNCTION__, io, ret, errno));
2248 if (wc.byte_len > c->state.max_receive_size) {
2249 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
2250 DEBUG(0,("%s:%s: wc.byte_len[%u/0x%x] max_receive_size[%u/0x%x] ret[%d] errno[%d] status[%s]\n",
2251 __location__, __FUNCTION__,
2252 (unsigned)wc.byte_len,
2253 (unsigned)wc.byte_len,
2254 (unsigned)c->state.max_receive_size,
2255 (unsigned)c->state.max_receive_size,
2256 ret, errno, nt_errstr(status)));
2257 smb_direct_connection_disconnect(c, status);
2260 if (wc.byte_len < SMB_DIRECT_DATA_MIN_HDR_SIZE) {
2261 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
2262 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2263 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2264 smb_direct_connection_disconnect(c, status);
2267 DLIST_REMOVE(c->r2s.posted, io);
2268 //dump_data(0, io->smbd_hdr, MIN(wc.byte_len, sizeof(io->smbd_hdr)));
2269 credits_requested = SVAL(io->smbd_hdr, 0x00);
2270 if (credits_requested == 0) {
2271 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
2272 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2273 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2274 smb_direct_connection_disconnect(c, status);
2277 credits_granted = SVAL(io->smbd_hdr, 0x02);
2278 flags = SVAL(io->smbd_hdr, 0x04);
2279 io->remaining_length = IVAL(io->smbd_hdr, 0x08);
2280 data_offset = IVAL(io->smbd_hdr, 0x0C);
2281 io->data_length = IVAL(io->smbd_hdr, 0x10);
2283 c->state.receive_posted -= 1;
2284 c->state.receive_credits -= 1;
2285 c->state.receive_credit_target = credits_requested;
2287 send_credits = c->state.send_credits + credits_granted;
2288 if (send_credits > c->state.send_credit_target) {
2289 smb_direct_connection_debug_credits(c, "wrong RECV",io, __location__, __func__);
2290 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
2291 DEBUG(0,("%s:%s: credits_granted[%u] send_credits[%u] ret[%d] errno[%d] status[%s]\n",
2292 __location__, __FUNCTION__,
2293 (unsigned)credits_granted, (unsigned)send_credits,
2294 ret, errno, nt_errstr(status)));
2295 smb_direct_connection_disconnect(c, status);
2298 c->state.send_credits = send_credits;
2300 smb_direct_connection_debug_credits(c, "GOT_RECV", io, __location__, __FUNCTION__);
2302 if (data_offset == 0) {
2303 if (wc.byte_len != SMB_DIRECT_DATA_MIN_HDR_SIZE) {
2304 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
2305 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2306 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2307 smb_direct_connection_disconnect(c, status);
2310 DLIST_ADD_END(c->r2s.idle, io);
2312 ret = smb_direct_connection_post_io(c);
2314 status = map_nt_error_from_unix_common(errno);
2315 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2316 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2317 smb_direct_connection_disconnect(c, status);
2324 if (data_offset != SMB_DIRECT_DATA_OFFSET) {
2325 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
2326 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2327 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2328 smb_direct_connection_disconnect(c, status);
2332 if (io->data_length > (c->state.max_receive_size - data_offset)) {
2333 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
2334 DEBUG(0,("%s:%s: io->data_length[%u/0x%x] max_receive_size-data_offset[%u/0x%x] ret[%d] errno[%d] status[%s]\n",
2335 __location__, __FUNCTION__,
2336 (unsigned)io->data_length,
2337 (unsigned)io->data_length,
2338 (unsigned)c->state.max_receive_size - data_offset,
2339 (unsigned)c->state.max_receive_size - data_offset,
2340 ret, errno, nt_errstr(status)));
2341 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2342 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2343 smb_direct_connection_disconnect(c, status);
2347 if (c->r2s.remaining_length > 0) {
2348 if (io->data_length > c->r2s.remaining_length) {
2349 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
2350 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2351 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2352 smb_direct_connection_disconnect(c, status);
2356 c->r2s.remaining_length -= io->data_length;
2359 if (io->remaining_length > c->r2s.remaining_length) {
2360 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
2361 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2362 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2363 smb_direct_connection_disconnect(c, status);
2367 io->iov = io->_iov_array;
2368 io->iov[0].iov_base = io->data;
2369 io->iov[0].iov_len = io->data_length;
2371 DEBUG(0,("%s:%s: CONTINUE[%p] io->data_length[%u] io->remaining_length[%u]\n",
2372 __location__, __func__, io,
2373 (unsigned)io->data_length,
2374 (unsigned)io->remaining_length));
2376 uint64_t total_length = io->data_length + io->remaining_length;
2378 if (total_length >= c->state.max_fragmented_size) { //correct direction
2379 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
2380 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2381 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2382 smb_direct_connection_disconnect(c, status);
2386 _smb_setlen_tcp(io->nbt_hdr, total_length);
2387 io->iov = io->_iov_array;
2388 io->iov[0].iov_base = io->nbt_hdr;
2389 io->iov[0].iov_len = sizeof(io->nbt_hdr);
2390 io->iov[1].iov_base = io->data;
2391 io->iov[1].iov_len = io->data_length;
2393 DEBUG(0,("%s:%s: START[%p] total_length[%u] io->data_length[%u] io->remaining_length[%u]\n",
2394 __location__, __func__, io, (unsigned)total_length,
2395 (unsigned)io->data_length,
2396 (unsigned)io->remaining_length));
2398 c->r2s.remaining_length = io->remaining_length;;
2401 if (flags & ~SMB_DIRECT_RESPONSE_REQUESTED) {
2402 status = map_nt_error_from_unix_common(errno);
2403 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2404 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2405 smb_direct_connection_disconnect(c, status);
2409 if (flags & SMB_DIRECT_RESPONSE_REQUESTED) {
2410 c->state.send_immediate = true;
2411 ret = smb_direct_connection_post_io(c);
2413 status = map_nt_error_from_unix_common(errno);
2414 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2415 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2416 smb_direct_connection_disconnect(c, status);
2421 if (c->state.receive_credits == 0) {
2422 ret = smb_direct_connection_post_io(c);
2424 status = map_nt_error_from_unix_common(errno);
2425 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2426 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2427 smb_direct_connection_disconnect(c, status);
2432 DLIST_ADD_END(c->r2s.ready, io);
2434 ret = smb_direct_connection_setup_writev(c);
2436 status = map_nt_error_from_unix_common(errno);
2437 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2438 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2439 smb_direct_connection_disconnect(c, status);
2445 case IBV_WC_RDMA_READ:
2446 case IBV_WC_RDMA_WRITE:
2448 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
2449 DEBUG(0,("%s:%s: GOT OPCODE[%u] IO[%p] ret[%d] errno[%d] status[%s]\n",
2450 __location__, __FUNCTION__, wc.opcode, io, ret, errno, nt_errstr(status)));
2451 smb_direct_connection_disconnect(c, status);
2456 static bool smb_direct_connection_sock_handler_writable(
2457 struct smb_direct_connection *c)
2459 struct smb_direct_io *io = NULL;
2464 if (c->r2s.out != NULL) {
2466 DEBUG(0,("%s:%s: CONTINUE[%p] io->data_length[%u] io->remaining_length[%u] io->iov_count[%u] io->iov[0].iov_len[%u]\n",
2467 __location__, __func__, io,
2468 (unsigned)io->data_length,
2469 (unsigned)io->remaining_length,
2470 (unsigned)io->iov_count,
2471 (unsigned)io->iov[0].iov_len));
2475 DLIST_REMOVE(c->r2s.ready, io);
2481 DBG_DEBUG("TEVENT_FD_NOT_READABLE\n");
2482 TEVENT_FD_NOT_WRITEABLE(c->sock.fde);
2486 DEBUG(0,("%s:%s: WRITEV[%p] io->data_length[%u] io->remaining_length[%u] io->iov_count[%u] io->iov[0].iov_len[%u]\n",
2487 __location__, __func__, io,
2488 (unsigned)io->data_length,
2489 (unsigned)io->remaining_length,
2490 (unsigned)io->iov_count,
2491 (unsigned)io->iov[0].iov_len));
2492 sret = writev(c->sock.fd, io->iov, io->iov_count);
2494 if (errno == EAGAIN) {
2495 DEBUG(0,("%s:%s: EAGAIN WRITEV[%p] io->data_length[%u] io->remaining_length[%u] io->iov_count[%u] io->iov[0].iov_len[%u]\n",
2496 __location__, __func__, io,
2497 (unsigned)io->data_length,
2498 (unsigned)io->remaining_length,
2499 (unsigned)io->iov_count,
2500 (unsigned)io->iov[0].iov_len));
2501 DBG_DEBUG("readv returned EAGAIN\n");
2502 TEVENT_FD_WRITEABLE(c->sock.fde);
2506 DBG_ERR("writev failed ret [%zd] [%s]\n",
2507 sret, strerror(errno));
2511 ok = iov_advance(&io->iov, &io->iov_count, sret);
2513 DBG_ERR("iov_advance failed [%s]\n", strerror(errno));
2517 if (io->iov_count == 0) {
2518 DEBUG(0,("%s:%s: done WRITEV[%p] io->data_length[%u] io->remaining_length[%u] io->iov_count[%u] io->iov[0].iov_len[%u]\n",
2519 __location__, __func__, io,
2520 (unsigned)io->data_length,
2521 (unsigned)io->remaining_length,
2522 (unsigned)io->iov_count,
2523 (unsigned)io->iov[0].iov_len));
2525 DLIST_ADD_END(c->r2s.idle, io);
2529 DEBUG(0,("%s:%s: remain WRITEV[%p] io->data_length[%u] io->remaining_length[%u] io->iov_count[%u] io->iov[0].iov_len[%u]\n",
2530 __location__, __func__, io,
2531 (unsigned)io->data_length,
2532 (unsigned)io->remaining_length,
2533 (unsigned)io->iov_count,
2534 (unsigned)io->iov[0].iov_len));
2538 static bool smb_direct_connection_sock_handler_readable(
2539 struct smb_direct_connection *c)
2541 struct smb_direct_io *io = NULL;
2546 if (c->s2r.in != NULL) {
2551 DLIST_REMOVE(c->s2r.idle, io);
2554 if (c->s2r.remaining_length > 0) {
2556 * We need to continue to get
2557 * the incomplete packet.
2559 io->data_length = MIN(c->state.max_send_size - SMB_DIRECT_DATA_OFFSET,
2560 c->s2r.remaining_length);
2561 io->remaining_length = c->s2r.remaining_length;
2562 io->remaining_length -= io->data_length;
2563 c->s2r.remaining_length = io->remaining_length;
2565 io->iov = io->_iov_array;
2566 io->iov[0].iov_base = io->data;
2567 io->iov[0].iov_len = io->data_length;
2569 DEBUG(0,("%s:%s: CONTINUE[%p] io->data_length[%u] io->remaining_length[%u]\n",
2570 __location__, __func__, io,
2571 (unsigned)io->data_length,
2572 (unsigned)io->remaining_length));
2574 DEBUG(0,("%s:%s: WAIT[%p] for NBT\n", __location__, __func__, io));
2576 * For a new packet we need to get the length
2579 io->data_length = 0;
2580 io->remaining_length = 0;
2582 io->iov = io->_iov_array;
2583 io->iov[0].iov_base = io->nbt_hdr;
2584 io->iov[0].iov_len = sizeof(io->nbt_hdr);
2591 DBG_DEBUG("TEVENT_FD_NOT_READABLE\n");
2592 TEVENT_FD_NOT_READABLE(c->sock.fde);
2597 sret = readv(c->sock.fd, io->iov, io->iov_count);
2599 if (errno == EAGAIN) {
2600 DEBUG(0,("%s:%s: EAGAIN READV[%p] io->data_length[%u] io->remaining_length[%u] io->iov_count[%u] io->iov[0].iov_len[%u]\n",
2601 __location__, __func__, io,
2602 (unsigned)io->data_length,
2603 (unsigned)io->remaining_length,
2604 (unsigned)io->iov_count,
2605 (unsigned)io->iov[0].iov_len));
2606 DBG_DEBUG("readv returned EAGAIN\n");
2607 TEVENT_FD_READABLE(c->sock.fde);
2618 ok = iov_advance(&io->iov, &io->iov_count, sret);
2624 if (io->iov_count != 0) {
2625 /* Wait for more data */
2626 DEBUG(0,("%s:%s: more READV[%p] io->data_length[%u] io->remaining_length[%u] io->iov_count[%u] io->iov[0].iov_len[%u]\n",
2627 __location__, __func__, io,
2628 (unsigned)io->data_length,
2629 (unsigned)io->remaining_length,
2630 (unsigned)io->iov_count,
2631 (unsigned)io->iov[0].iov_len));
2632 TEVENT_FD_READABLE(c->sock.fde);
2636 if (io->data_length != 0) {
2637 DEBUG(0,("%s:%s: FINISH[%p] io->data_length[%u] io->remaining_length[%u]\n",
2638 __location__, __func__, io, (unsigned)io->data_length, (unsigned)io->remaining_length));
2640 * We managed to read the whole fragment which
2641 * is ready to be posted into the send queue.
2644 DLIST_ADD_END(c->s2r.ready, io);
2648 c->s2r.remaining_length = smb_len_tcp(io->nbt_hdr);
2649 if (c->s2r.remaining_length > c->state.max_fragmented_size) {
2654 io->data_length = MIN(c->state.max_send_size - SMB_DIRECT_DATA_OFFSET,
2655 c->s2r.remaining_length);
2656 io->remaining_length = c->s2r.remaining_length;
2657 io->remaining_length -= io->data_length;
2659 io->iov = io->_iov_array;
2660 io->iov[0].iov_base = io->data;
2661 io->iov[0].iov_len = io->data_length;
2664 DEBUG(0,("%s:%s: AFTER[%p] total[%u] io->data_length[%u] io->remaining_length[%u]\n",
2665 __location__, __func__, io, (unsigned)c->s2r.remaining_length,
2666 (unsigned)io->data_length,
2667 (unsigned)io->remaining_length));
2668 c->s2r.remaining_length = io->remaining_length;
2670 * try to read the rest immediately.
2675 static void smb_direct_connection_sock_handler(struct tevent_context *ev,
2676 struct tevent_fd *fde,
2680 struct smb_direct_connection *c =
2681 talloc_get_type_abort(private_data,
2682 struct smb_direct_connection);
2687 if (fde_flags & TEVENT_FD_WRITE) {
2688 ok = smb_direct_connection_sock_handler_writable(c);
2690 status = map_nt_error_from_unix_common(errno);
2691 DBG_ERR("smb-d sock_handler_writable failed [%s]\n",
2693 smb_direct_connection_disconnect(c, status);
2698 ret = smb_direct_connection_post_io(c);
2700 status = map_nt_error_from_unix_common(errno);
2701 DBG_ERR("smb_direct_connection_post_io failed [%s]\n",
2703 smb_direct_connection_disconnect(c, status);
2707 if (fde_flags & TEVENT_FD_READ) {
2708 ok = smb_direct_connection_sock_handler_readable(c);
2710 status = map_nt_error_from_unix_common(errno);
2711 DBG_ERR("smb-d sock_handler_readable failed [%s]\n",
2713 smb_direct_connection_disconnect(c, status);
2718 ret = smb_direct_connection_post_io(c);
2720 status = map_nt_error_from_unix_common(errno);
2721 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2722 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2723 smb_direct_connection_disconnect(c, status);
2728 NTSTATUS smb_direct_connection_setup_events(struct smb_direct_connection *c,
2729 struct tevent_context *ev)
2731 uint16_t sock_fde_flags = TEVENT_FD_READ;
2733 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2734 if (c->r2s.out != NULL) {
2735 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2736 sock_fde_flags |= TEVENT_FD_WRITE;
2739 if (c->rdma.cm_channel == NULL) {
2740 return NT_STATUS_CONNECTION_DISCONNECTED;
2743 if (tevent_fd_get_flags(c->ibv.fde_channel) == 0) {
2745 TALLOC_FREE(c->sock.fde);
2746 TALLOC_FREE(c->ibv.fde_channel);
2747 TALLOC_FREE(c->rdma.fde_channel);
2748 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2751 if (tevent_fd_get_flags(c->rdma.fde_channel) == 0) {
2753 TALLOC_FREE(c->sock.fde);
2754 TALLOC_FREE(c->ibv.fde_channel);
2755 TALLOC_FREE(c->rdma.fde_channel);
2756 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2760 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2762 TALLOC_FREE(c->sock.fde);
2763 TALLOC_FREE(c->ibv.fde_channel);
2764 TALLOC_FREE(c->rdma.fde_channel);
2765 return NT_STATUS_OK;
2768 if (ev == c->last_ev) {
2769 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2770 return NT_STATUS_OK;
2773 if (c->last_ev != NULL) {
2774 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2775 return NT_STATUS_INVALID_PARAMETER_MIX;
2778 c->rdma.fde_channel = tevent_add_fd(ev, c,
2779 c->rdma.cm_channel->fd,
2781 smb_direct_connection_rdma_handler,
2783 if (c->rdma.fde_channel == NULL) {
2784 return NT_STATUS_NO_MEMORY;
2786 c->ibv.fde_channel = tevent_add_fd(ev, c,
2787 c->ibv.comp_channel->fd,
2789 smb_direct_connection_ibv_handler,
2791 if (c->ibv.fde_channel == NULL) {
2792 TALLOC_FREE(c->rdma.fde_channel);
2793 return NT_STATUS_NO_MEMORY;
2795 c->sock.fde = tevent_add_fd(ev, c, c->sock.fd,
2797 smb_direct_connection_sock_handler,
2799 if (c->sock.fde == NULL) {
2800 TALLOC_FREE(c->rdma.fde_channel);
2801 TALLOC_FREE(c->ibv.fde_channel);
2802 return NT_STATUS_NO_MEMORY;
2805 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2807 return NT_STATUS_OK;
2810 uint32_t smb_direct_connection_max_fragmented_size(struct smb_direct_connection *c)
2812 return c->state.max_fragmented_size;
2815 uint32_t smb_direct_connection_max_read_write_size(struct smb_direct_connection *c)
2817 return c->state.max_read_write_size;
2820 static int smb_direct_listener_destructor(struct smb_direct_listener *l);
2822 NTSTATUS smbd_direct_listener_setup(TALLOC_CTX *mem_ctx,
2823 const struct sockaddr_storage *addr,
2824 struct smb_direct_listener **_l)
2826 struct smb_direct_listener *l;
2828 struct sockaddr_storage _addr = *addr;
2829 struct sockaddr *bind_addr = (struct sockaddr *)&_addr;
2830 char addr_buf[INET6_ADDRSTRLEN] = { 0, };
2832 set_sockaddr_port(bind_addr, 5445);
2834 l = talloc_zero(mem_ctx, struct smb_direct_listener);
2836 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2837 return NT_STATUS_NO_MEMORY;
2839 talloc_set_destructor(l, smb_direct_listener_destructor);
2841 l->rdma.cm_channel = rdma_create_event_channel();
2842 if (l->rdma.cm_channel == NULL) {
2844 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2845 return NT_STATUS_NO_MEMORY;
2847 smb_set_close_on_exec(l->rdma.cm_channel->fd);
2848 set_blocking(l->rdma.cm_channel->fd, false);
2850 l->rdma.context.l = l;
2852 #if RDMA_USER_CM_MAX_ABI_VERSION >= 2
2853 ret = rdma_create_id(l->rdma.cm_channel,
2859 ret = rdma_create_id(l->rdma.cm_channel,
2865 status = map_nt_error_from_unix_common(errno);
2867 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2871 l->rdma.cm_channel = l->rdma.cm_id->channel;
2874 ret = rdma_bind_addr(l->rdma.cm_id, bind_addr);
2877 status = map_nt_error_from_unix_common(errno);
2879 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2883 ret = rdma_listen(l->rdma.cm_id, SMB_DIRECT_LISTENER_BACKLOG);
2886 status = map_nt_error_from_unix_common(errno);
2888 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2891 l->rdma.expected_event = RDMA_CM_EVENT_CONNECT_REQUEST;
2893 DBG_ERR("SMB-D daemon started listening SMB-D connections on listener[%p]: %s\n",
2894 l, print_sockaddr(addr_buf, sizeof(addr_buf), &_addr));
2897 return NT_STATUS_OK;
2900 static int smb_direct_listener_destructor(struct smb_direct_listener *l)
2902 TALLOC_FREE(l->rdma.fde_channel);
2904 if (l->rdma.cm_event != NULL) {
2905 rdma_ack_cm_event(l->rdma.cm_event);
2906 l->rdma.cm_event = NULL;
2909 if (l->rdma.cm_id != NULL) {
2910 rdma_destroy_id(l->rdma.cm_id);
2911 l->rdma.cm_id = NULL;
2914 if (l->rdma.cm_channel != NULL) {
2915 rdma_destroy_event_channel(l->rdma.cm_channel);
2916 l->rdma.cm_channel = NULL;
2922 struct smb_direct_listener_accept_state {
2923 struct tevent_context *ev;
2924 struct smb_direct_listener *l;
2927 static int smb_direct_listener_accept_state_destructor(
2928 struct smb_direct_listener_accept_state *state)
2930 TALLOC_FREE(state->l->rdma.fde_channel);
2935 static void smb_direct_listener_accept_rdma_handler(struct tevent_context *ev,
2936 struct tevent_fd *fde,
2938 void *private_data);
2940 struct tevent_req *smb_direct_listener_accept_send(TALLOC_CTX *mem_ctx,
2941 struct tevent_context *ev,
2942 struct smb_direct_listener *l)
2944 struct tevent_req *req = NULL;
2945 struct smb_direct_listener_accept_state *state = NULL;
2947 req = tevent_req_create(mem_ctx, &state,
2948 struct smb_direct_listener_accept_state);
2954 talloc_set_destructor(state, smb_direct_listener_accept_state_destructor);
2956 // HACK: if smb_direct_listener_accept_rdma_handler is not triggered by fde
2957 // smb_direct_listener_accept_rdma_handler(ev, NULL, 0, req);
2959 l->rdma.fde_channel = tevent_add_fd(ev, state,
2960 l->rdma.cm_channel->fd,
2962 smb_direct_listener_accept_rdma_handler,
2964 if (tevent_req_nomem(l->rdma.fde_channel, req)) {
2965 return tevent_req_post(req, ev);
2968 DBG_ERR("SMB-D daemon started accepting SMB-D connections on listener[%p]\n", l);
2973 static void smb_direct_listener_accept_done(struct tevent_req *subreq);
2975 static void smb_direct_listener_accept_rdma_handler(struct tevent_context *ev,
2976 struct tevent_fd *fde,
2980 struct tevent_req *req =
2981 talloc_get_type_abort(private_data,
2983 struct smb_direct_listener_accept_state *state =
2984 tevent_req_data(req,
2985 struct smb_direct_listener_accept_state);
2986 struct smb_direct_listener *l = state->l;
2987 struct smb_direct_connection *c = NULL;
2988 struct tevent_req *subreq = NULL;
2992 DBG_ERR("SMB-D got connection event listener[%p]\n", l);
2994 ret = rdma_get_cm_event(l->rdma.cm_channel,
2997 if (errno == EAGAIN) {
2998 DBG_ERR("SMB-D got EAGAIN error event on listener [%p]\n", l);
3001 status = map_nt_error_from_unix_common(errno);
3002 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
3003 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
3004 smb_direct_listener_destructor(l); //TODO cleanup???
3005 tevent_req_nterror(req, status);
3010 if (l->rdma.cm_event->status != 0) {
3011 errno = l->rdma.cm_event->status;
3012 status = map_nt_error_from_unix_common(errno);
3013 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
3014 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
3015 smb_direct_listener_destructor(l); //TODO cleanup???
3016 tevent_req_nterror(req, status);
3020 if (l->rdma.cm_event->event != l->rdma.expected_event) {
3021 DEBUG(0,("%s:%s: l->rdma.cm_event->event[%u] != l->rdma.expected_event[%u]\n",
3022 __location__, __FUNCTION__,
3023 l->rdma.cm_event->event, l->rdma.expected_event));
3026 switch (l->rdma.cm_event->event) {
3027 case RDMA_CM_EVENT_CONNECT_REQUEST:
3029 DBG_ERR("RDMA_CM_EVENT_CONNECT_REQUEST\n");
3031 c = smb_direct_connection_create(l);
3033 DBG_ERR("smb_direct_connection_create failed - ignoring\n");
3035 rdma_reject(l->rdma.cm_event->id, NULL, 0);
3040 RSIVAL(c->rdma.ird_ord_hdr, 0, 0);
3041 RSIVAL(c->rdma.ird_ord_hdr, 4, 16);
3043 c->rdma.conn_param = l->rdma.cm_event->param.conn;
3044 c->rdma.conn_param.private_data = c->rdma.ird_ord_hdr;
3045 c->rdma.conn_param.private_data_len = sizeof(c->rdma.ird_ord_hdr);
3047 c->rdma.cm_id = l->rdma.cm_event->id;
3048 c->rdma.cm_id->context = &c->rdma.context;
3050 ret = rdma_migrate_id(c->rdma.cm_id, c->rdma.cm_channel);
3052 DBG_ERR("rdma_migrate_id failed [%s] result [%d]\n", strerror(errno), ret);
3053 c->rdma.cm_id->context = NULL;
3054 c->rdma.cm_id = NULL;
3057 rdma_reject(l->rdma.cm_event->id, NULL, 0);
3062 status = smb_direct_connection_complete_alloc(c);
3063 if (!NT_STATUS_IS_OK(status)) {
3064 DBG_ERR("smb_direct_connection_complete_alloc failed - ignoring\n");
3065 c->rdma.cm_id->context = NULL;
3066 c->rdma.cm_id->channel = NULL;
3067 c->rdma.cm_id = NULL;
3070 rdma_reject(l->rdma.cm_event->id, NULL, 0);
3076 DLIST_ADD_END(l->pending, c);
3078 subreq = smb_direct_connection_negotiate_accept_send(state,
3081 if (subreq == NULL) {
3082 DBG_ERR("smb_direct_connection_accept_send ENOMEM\n");
3087 tevent_req_set_callback(subreq, smb_direct_listener_accept_done, req);
3090 case RDMA_CM_EVENT_DISCONNECTED:
3091 DBG_DEBUG("RDMA_CM_EVENT_DISCONNECTED\n");
3094 case RDMA_CM_EVENT_DEVICE_REMOVAL:
3095 DBG_ERR("RDMA device removal\n");
3099 DBG_ERR("event %d\n", l->rdma.cm_event->event);
3103 rdma_ack_cm_event(l->rdma.cm_event);
3104 l->rdma.cm_event = NULL;
3109 static void smb_direct_listener_accept_done(struct tevent_req *subreq)
3111 struct tevent_req *req = tevent_req_callback_data(
3112 subreq, struct tevent_req);
3113 struct smb_direct_listener_accept_state *state =
3114 tevent_req_data(req,
3115 struct smb_direct_listener_accept_state);
3116 struct smb_direct_listener *l = state->l;
3117 struct smb_direct_connection *c = NULL;
3120 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
3122 status = smb_direct_connection_negotiate_accept_recv(subreq, state, &c);
3123 TALLOC_FREE(subreq);
3124 if (!NT_STATUS_IS_OK(status)) {
3134 DLIST_REMOVE(l->pending, c);
3135 DLIST_ADD_END(l->ready, c);
3137 talloc_reparent(state, l, c);
3139 tevent_req_defer_callback(req, state->ev);
3140 tevent_req_notify_callback(req);
3144 NTSTATUS smb_direct_listener_accept_recv(struct tevent_req *req,
3145 TALLOC_CTX *mem_ctx,
3146 struct smb_direct_connection **_c,
3148 struct sockaddr_storage *laddr,
3149 struct sockaddr_storage *raddr)
3151 struct smb_direct_listener_accept_state *state =
3152 tevent_req_data(req,
3153 struct smb_direct_listener_accept_state);
3154 struct smb_direct_listener *l = state->l;
3155 struct smb_direct_connection *c = NULL;
3158 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
3161 if (laddr != NULL) {
3162 ZERO_STRUCTP(laddr);
3164 if (raddr != NULL) {
3165 ZERO_STRUCTP(raddr);
3168 if (tevent_req_is_in_progress(req)) {
3170 * We don't call tevent_req_received()
3171 * because the caller can leave this alive
3172 * in order to consume more connections
3174 if (l->ready == NULL) {
3175 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
3176 return NT_STATUS_NO_MORE_ENTRIES;
3180 DLIST_REMOVE(l->ready, c);
3183 if (l->ready != NULL) {
3184 tevent_req_defer_callback(req, state->ev);
3185 tevent_req_notify_callback(req);
3188 *fd = c->sock.tmp_fd;
3189 c->sock.tmp_fd = -1;
3190 if (laddr != NULL) {
3191 ZERO_STRUCTP(laddr);
3193 if (raddr != NULL) {
3194 ZERO_STRUCTP(raddr);
3196 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
3197 *_c = talloc_move(mem_ctx, &c);
3198 return NT_STATUS_OK;
3201 if (tevent_req_is_nterror(req, &status)) {
3202 DEBUG(0,("%s:%s: here...[%s] %s\n", __location__, __func__, nt_errstr(status),
3203 tevent_req_print(req, req)));
3204 tevent_req_received(req);
3208 /* should never be reached */
3209 DEBUG(0,("%s:%s: here...[%s] %s\n", __location__, __func__, nt_errstr(status),
3210 tevent_req_print(req, req)));
3211 tevent_req_received(req);
3212 return NT_STATUS_INTERNAL_ERROR;
3215 #endif /* SMB_TRANSPORT_ENABLE_RDMA */