smb2_server: move struct msghdr to smbd_smb2_send_queue
[samba.git] / source3 / smbd / smb2_server.c
index ccbea87bfd0dc74d0a599e8eaa6141b37ec875f0..55b383072e6a3cf1bca466127fed71c978578e3b 100644 (file)
 */
 
 #include "includes.h"
+#include "system/network.h"
 #include "smbd/smbd.h"
 #include "smbd/globals.h"
+#include "smbd/smbXsrv_open.h"
+#include "lib/param/param.h"
 #include "../libcli/smb/smb_common.h"
 #include "../lib/tsocket/tsocket.h"
 #include "../lib/util/tevent_ntstatus.h"
 #include "../librpc/gen_ndr/krb5pac.h"
 #include "lib/util/iov_buf.h"
 #include "auth.h"
-#include "lib/crypto/sha512.h"
+#include "libcli/smb/smbXcli_base.h"
+#include "source3/lib/substitute.h"
+
+#if defined(LINUX)
+/* SIOCOUTQ TIOCOUTQ are the same */
+#define __IOCTL_SEND_QUEUE_SIZE_OPCODE TIOCOUTQ
+#define __HAVE_TCP_INFO_RTO 1
+#define __ALLOW_MULTI_CHANNEL_SUPPORT 1
+#elif defined(FREEBSD)
+#define __IOCTL_SEND_QUEUE_SIZE_OPCODE FIONWRITE
+#define __HAVE_TCP_INFO_RTO 1
+#define __ALLOW_MULTI_CHANNEL_SUPPORT 1
+#endif
+
+#include "lib/crypto/gnutls_helpers.h"
+#include <gnutls/gnutls.h>
+#include <gnutls/crypto.h>
 
 #undef DBGC_CLASS
 #define DBGC_CLASS DBGC_SMB2
@@ -43,27 +62,24 @@ static NTSTATUS smbd_smb2_flush_send_queue(struct smbXsrv_connection *xconn);
 
 static const struct smbd_smb2_dispatch_table {
        uint16_t opcode;
-       const char *name;
-       bool need_session;
-       bool need_tcon;
-       bool as_root;
        uint16_t fileid_ofs;
-       bool allow_invalid_fileid;
-       bool modify;
+       bool need_session : 1;
+       bool need_tcon : 1;
+       bool as_root : 1;
+       bool modify : 1;
 } smbd_smb2_table[] = {
-#define _OP(o) .opcode = o, .name = #o
        {
-               _OP(SMB2_OP_NEGPROT),
+               .opcode = SMB2_OP_NEGPROT,
                .as_root = true,
        },{
-               _OP(SMB2_OP_SESSSETUP),
+               .opcode = SMB2_OP_SESSSETUP,
                .as_root = true,
        },{
-               _OP(SMB2_OP_LOGOFF),
+               .opcode = SMB2_OP_LOGOFF,
                .need_session = true,
                .as_root = true,
        },{
-               _OP(SMB2_OP_TCON),
+               .opcode = SMB2_OP_TCON,
                .need_session = true,
                /*
                 * This call needs to be run as root.
@@ -74,76 +90,74 @@ static const struct smbd_smb2_dispatch_table {
                 */
                .as_root = true,
        },{
-               _OP(SMB2_OP_TDIS),
+               .opcode = SMB2_OP_TDIS,
                .need_session = true,
                .need_tcon = true,
                .as_root = true,
        },{
-               _OP(SMB2_OP_CREATE),
+               .opcode = SMB2_OP_CREATE,
                .need_session = true,
                .need_tcon = true,
        },{
-               _OP(SMB2_OP_CLOSE),
+               .opcode = SMB2_OP_CLOSE,
                .need_session = true,
                .need_tcon = true,
                .fileid_ofs = 0x08,
        },{
-               _OP(SMB2_OP_FLUSH),
+               .opcode = SMB2_OP_FLUSH,
                .need_session = true,
                .need_tcon = true,
                .fileid_ofs = 0x08,
        },{
-               _OP(SMB2_OP_READ),
+               .opcode = SMB2_OP_READ,
                .need_session = true,
                .need_tcon = true,
                .fileid_ofs = 0x10,
        },{
-               _OP(SMB2_OP_WRITE),
+               .opcode = SMB2_OP_WRITE,
                .need_session = true,
                .need_tcon = true,
                .fileid_ofs = 0x10,
                .modify = true,
        },{
-               _OP(SMB2_OP_LOCK),
+               .opcode = SMB2_OP_LOCK,
                .need_session = true,
                .need_tcon = true,
                .fileid_ofs = 0x08,
        },{
-               _OP(SMB2_OP_IOCTL),
+               .opcode = SMB2_OP_IOCTL,
                .need_session = true,
                .need_tcon = true,
                .fileid_ofs = 0x08,
-               .allow_invalid_fileid = true,
                .modify = true,
        },{
-               _OP(SMB2_OP_CANCEL),
+               .opcode = SMB2_OP_CANCEL,
                .as_root = true,
        },{
-               _OP(SMB2_OP_KEEPALIVE),
-               .as_root = true,
+               .opcode = SMB2_OP_KEEPALIVE,
        },{
-               _OP(SMB2_OP_QUERY_DIRECTORY),
+               .opcode = SMB2_OP_QUERY_DIRECTORY,
                .need_session = true,
                .need_tcon = true,
                .fileid_ofs = 0x08,
        },{
-               _OP(SMB2_OP_NOTIFY),
+               .opcode = SMB2_OP_NOTIFY,
                .need_session = true,
                .need_tcon = true,
                .fileid_ofs = 0x08,
        },{
-               _OP(SMB2_OP_GETINFO),
+               .opcode = SMB2_OP_GETINFO,
                .need_session = true,
                .need_tcon = true,
                .fileid_ofs = 0x18,
        },{
-               _OP(SMB2_OP_SETINFO),
+               .opcode = SMB2_OP_SETINFO,
                .need_session = true,
                .need_tcon = true,
                .fileid_ofs = 0x10,
                .modify = true,
        },{
-               _OP(SMB2_OP_BREAK),
+               .opcode = SMB2_OP_BREAK,
                .need_session = true,
                .need_tcon = true,
                /*
@@ -157,10 +171,70 @@ static const struct smbd_smb2_dispatch_table {
 
 const char *smb2_opcode_name(uint16_t opcode)
 {
-       if (opcode >= ARRAY_SIZE(smbd_smb2_table)) {
-               return "Bad SMB2 opcode";
+       const char *result = "Bad SMB2 opcode";
+
+       switch (opcode) {
+       case SMB2_OP_NEGPROT:
+               result = "SMB2_OP_NEGPROT";
+               break;
+       case SMB2_OP_SESSSETUP:
+               result = "SMB2_OP_SESSSETUP";
+               break;
+       case SMB2_OP_LOGOFF:
+               result = "SMB2_OP_LOGOFF";
+               break;
+       case SMB2_OP_TCON:
+               result = "SMB2_OP_TCON";
+               break;
+       case SMB2_OP_TDIS:
+               result = "SMB2_OP_TDIS";
+               break;
+       case SMB2_OP_CREATE:
+               result = "SMB2_OP_CREATE";
+               break;
+       case SMB2_OP_CLOSE:
+               result = "SMB2_OP_CLOSE";
+               break;
+       case SMB2_OP_FLUSH:
+               result = "SMB2_OP_FLUSH";
+               break;
+       case SMB2_OP_READ:
+               result = "SMB2_OP_READ";
+               break;
+       case SMB2_OP_WRITE:
+               result = "SMB2_OP_WRITE";
+               break;
+       case SMB2_OP_LOCK:
+               result = "SMB2_OP_LOCK";
+               break;
+       case SMB2_OP_IOCTL:
+               result = "SMB2_OP_IOCTL";
+               break;
+       case SMB2_OP_CANCEL:
+               result = "SMB2_OP_CANCEL";
+               break;
+       case SMB2_OP_KEEPALIVE:
+               result = "SMB2_OP_KEEPALIVE";
+               break;
+       case SMB2_OP_QUERY_DIRECTORY:
+               result = "SMB2_OP_QUERY_DIRECTORY";
+               break;
+       case SMB2_OP_NOTIFY:
+               result = "SMB2_OP_NOTIFY";
+               break;
+       case SMB2_OP_GETINFO:
+               result = "SMB2_OP_GETINFO";
+               break;
+       case SMB2_OP_SETINFO:
+               result = "SMB2_OP_SETINFO";
+               break;
+       case SMB2_OP_BREAK:
+               result = "SMB2_OP_BREAK";
+               break;
+       default:
+               break;
        }
-       return smbd_smb2_table[opcode].name;
+       return result;
 }
 
 static const struct smbd_smb2_dispatch_table *smbd_smb2_call(uint16_t opcode)
@@ -212,10 +286,16 @@ bool smbd_smb2_is_compound(const struct smbd_smb2_request *req)
        return req->in.vector_count >= (2*SMBD_SMB2_NUM_IOV_PER_REQ);
 }
 
+bool smbd_smb2_is_last_in_compound(const struct smbd_smb2_request *req)
+{
+       return (req->current_idx + SMBD_SMB2_NUM_IOV_PER_REQ ==
+               req->in.vector_count);
+}
+
 static NTSTATUS smbd_initialize_smb2(struct smbXsrv_connection *xconn,
                                     uint64_t expected_seq_low)
 {
-       TALLOC_FREE(xconn->transport.fde);
+       int rc;
 
        xconn->smb2.credits.seq_low = expected_seq_low;
        xconn->smb2.credits.seq_range = 1;
@@ -227,6 +307,9 @@ static NTSTATUS smbd_initialize_smb2(struct smbXsrv_connection *xconn,
                return NT_STATUS_NO_MEMORY;
        }
 
+       tevent_fd_set_close_fn(xconn->transport.fde, NULL);
+       TALLOC_FREE(xconn->transport.fde);
+
        xconn->transport.fde = tevent_add_fd(
                                        xconn->client->raw_ev_ctx,
                                        xconn,
@@ -235,11 +318,30 @@ static NTSTATUS smbd_initialize_smb2(struct smbXsrv_connection *xconn,
                                        smbd_smb2_connection_handler,
                                        xconn);
        if (xconn->transport.fde == NULL) {
+               close(xconn->transport.sock);
+               xconn->transport.sock = -1;
                return NT_STATUS_NO_MEMORY;
        }
+       tevent_fd_set_auto_close(xconn->transport.fde);
+
+       /*
+        * Ensure child is set to non-blocking mode,
+        * unless the system supports MSG_DONTWAIT,
+        * if MSG_DONTWAIT is available we should force
+        * blocking mode.
+        */
+#ifdef MSG_DONTWAIT
+       rc = set_blocking(xconn->transport.sock, true);
+       if (rc < 0) {
+               return NT_STATUS_INTERNAL_ERROR;
+       }
+#else
+       rc = set_blocking(xconn->transport.sock, false);
+       if (rc < 0) {
+               return NT_STATUS_INTERNAL_ERROR;
+       }
+#endif
 
-       /* Ensure child is set to non-blocking mode */
-       set_blocking(xconn->transport.sock, false);
        return NT_STATUS_OK;
 }
 
@@ -272,12 +374,8 @@ static bool smb2_setup_nbt_length(struct iovec *vector, int count)
 
 static int smbd_smb2_request_destructor(struct smbd_smb2_request *req)
 {
-       if (req->first_key.length > 0) {
-               data_blob_clear_free(&req->first_key);
-       }
-       if (req->last_key.length > 0) {
-               data_blob_clear_free(&req->last_key);
-       }
+       TALLOC_FREE(req->first_enc_key);
+       TALLOC_FREE(req->last_sign_key);
        return 0;
 }
 
@@ -287,7 +385,7 @@ void smb2_request_set_async_internal(struct smbd_smb2_request *req,
        req->async_internal = async_internal;
 }
 
-static struct smbd_smb2_request *smbd_smb2_request_allocate(TALLOC_CTX *mem_ctx)
+static struct smbd_smb2_request *smbd_smb2_request_allocate(struct smbXsrv_connection *xconn)
 {
        TALLOC_CTX *mem_pool;
        struct smbd_smb2_request *req;
@@ -302,18 +400,21 @@ static struct smbd_smb2_request *smbd_smb2_request_allocate(TALLOC_CTX *mem_ctx)
                return NULL;
        }
 
-       req = talloc_zero(mem_pool, struct smbd_smb2_request);
+       req = talloc(mem_pool, struct smbd_smb2_request);
        if (req == NULL) {
                talloc_free(mem_pool);
                return NULL;
        }
-       talloc_reparent(mem_pool, mem_ctx, req);
+       talloc_reparent(mem_pool, xconn, req);
 #if 0
        TALLOC_FREE(mem_pool);
 #endif
-
-       req->last_session_id = UINT64_MAX;
-       req->last_tid = UINT32_MAX;
+       *req = (struct smbd_smb2_request) {
+               .sconn = xconn->client->sconn,
+               .xconn = xconn,
+               .last_session_id = UINT64_MAX,
+               .last_tid = UINT32_MAX,
+       };
 
        talloc_set_destructor(req, smbd_smb2_request_destructor);
 
@@ -377,7 +478,7 @@ static NTSTATUS smbd_smb2_inbuf_parse_compound(struct smbXsrv_connection *xconn,
                        NTSTATUS status;
                        size_t enc_len;
 
-                       if (xconn->protocol < PROTOCOL_SMB2_24) {
+                       if (xconn->protocol < PROTOCOL_SMB3_00) {
                                DEBUG(10, ("Got SMB2_TRANSFORM header, "
                                           "but dialect[0x%04X] is used\n",
                                           xconn->smb2.server.dialect));
@@ -415,6 +516,10 @@ static NTSTATUS smbd_smb2_inbuf_parse_compound(struct smbXsrv_connection *xconn,
 
                        status = smb2srv_session_lookup_conn(xconn, uid, now,
                                                             &s);
+                       if (s == NULL) {
+                               status = smb2srv_session_lookup_global(xconn->client,
+                                                                      uid, req, &s);
+                       }
                        if (s == NULL) {
                                DEBUG(1, ("invalid session[%llu] in "
                                          "SMB2_TRANSFORM header\n",
@@ -429,7 +534,6 @@ static NTSTATUS smbd_smb2_inbuf_parse_compound(struct smbXsrv_connection *xconn,
                        tf_iov[1].iov_len = enc_len;
 
                        status = smb2_signing_decrypt_pdu(s->global->decryption_key,
-                                                         xconn->smb2.server.cipher,
                                                          tf_iov, 2);
                        if (!NT_STATUS_IS_OK(status)) {
                                TALLOC_FREE(iov_alloc);
@@ -445,6 +549,17 @@ static NTSTATUS smbd_smb2_inbuf_parse_compound(struct smbXsrv_connection *xconn,
                 */
 
                if (len < SMB2_HDR_BODY + 2) {
+
+                       if ((len == 5) &&
+                           (IVAL(hdr, 0) == SMB_SUICIDE_PACKET) &&
+                           lp_parm_bool(-1, "smbd", "suicide mode", false)) {
+                               uint8_t exitcode = CVAL(hdr, 4);
+                               DBG_WARNING("SUICIDE: Exiting immediately "
+                                           "with code %"PRIu8"\n",
+                                           exitcode);
+                               exit(exitcode);
+                       }
+
                        DEBUG(10, ("%d bytes left, expected at least %d\n",
                                   (int)len, SMB2_HDR_BODY));
                        goto inval;
@@ -538,7 +653,6 @@ static NTSTATUS smbd_smb2_request_create(struct smbXsrv_connection *xconn,
                                         const uint8_t *_inpdu, size_t size,
                                         struct smbd_smb2_request **_req)
 {
-       struct smbd_server_connection *sconn = xconn->client->sconn;
        struct smbd_smb2_request *req;
        uint32_t protocol_version;
        uint8_t *inpdu = NULL;
@@ -580,8 +694,6 @@ static NTSTATUS smbd_smb2_request_create(struct smbXsrv_connection *xconn,
        if (req == NULL) {
                return NT_STATUS_NO_MEMORY;
        }
-       req->sconn = sconn;
-       req->xconn = xconn;
 
        inpdu = talloc_memdup(req, _inpdu, size);
        if (inpdu == NULL) {
@@ -755,7 +867,7 @@ static bool smb2_validate_message_id(struct smbXsrv_connection *xconn,
                }
        }
 
-       /* substract used credits */
+       /* subtract used credits */
        xconn->smb2.credits.granted -= credit_charge;
 
        return true;
@@ -829,8 +941,11 @@ static void smb2_set_operation_credit(struct smbXsrv_connection *xconn,
         *       of requests and the used sequence number.
         *       Which means we would grant more credits
         *       for client which use multi credit requests.
+        *
+        * The above is what Windows Server < 2016 is doing,
+        * but new servers use all credits (8192 by default).
         */
-       current_max_credits = xconn->smb2.credits.max / 16;
+       current_max_credits = xconn->smb2.credits.max;
        current_max_credits = MAX(current_max_credits, 1);
 
        if (xconn->smb2.credits.multicredit) {
@@ -875,15 +990,19 @@ static void smb2_set_operation_credit(struct smbXsrv_connection *xconn,
                         * with a successful session setup
                         */
                        if (NT_STATUS_IS_OK(out_status)) {
-                               additional_max = 32;
+                               additional_max = xconn->smb2.credits.max;
                        }
                        break;
                default:
                        /*
-                        * We match windows and only grant additional credits
-                        * in chunks of 32.
+                        * Windows Server < 2016 and older Samba versions
+                        * used to only grant additional credits in
+                        * chunks of 32 credits.
+                        *
+                        * But we match Windows Server 2016 and grant
+                        * all credits as requested.
                         */
-                       additional_max = 32;
+                       additional_max = xconn->smb2.credits.max;
                        break;
                }
 
@@ -1084,203 +1203,788 @@ static NTSTATUS smbd_smb2_request_setup_out(struct smbd_smb2_request *req)
        return NT_STATUS_OK;
 }
 
-void smbd_server_connection_terminate_ex(struct smbXsrv_connection *xconn,
-                                        const char *reason,
-                                        const char *location)
+bool smbXsrv_server_multi_channel_enabled(void)
 {
-       struct smbXsrv_client *client = xconn->client;
-
-       DEBUG(10,("smbd_server_connection_terminate_ex: conn[%s] reason[%s] at %s\n",
-                 smbXsrv_connection_dbg(xconn), reason, location));
-
-       if (client->connections->next != NULL) {
-               /* TODO: cancel pending requests */
-               DLIST_REMOVE(client->connections, xconn);
-               TALLOC_FREE(xconn);
-               return;
+       bool enabled = lp_server_multi_channel_support();
+#ifndef __ALLOW_MULTI_CHANNEL_SUPPORT
+       bool forced = false;
+       struct loadparm_context *lp_ctx = loadparm_init_s3(NULL, loadparm_s3_helpers());
+       bool unspecified = lpcfg_parm_is_unspecified(lp_ctx, "server multi channel support");
+       if (unspecified) {
+               enabled = false;
        }
-
        /*
-        * The last connection was disconnected
+        * If we don't have support from the kernel
+        * to ask for the un-acked number of bytes
+        * in the socket send queue, we better
+        * don't support multi-channel.
         */
-       exit_server_cleanly(reason);
+       forced = lp_parm_bool(-1, "force", "server multi channel support", false);
+       if (enabled && !forced) {
+               D_NOTICE("'server multi channel support' enabled "
+                        "but not supported on %s (%s)\n",
+                        SYSTEM_UNAME_SYSNAME, SYSTEM_UNAME_RELEASE);
+               DEBUGADD(DBGLVL_NOTICE, ("Please report this on "
+                       "https://bugzilla.samba.org/show_bug.cgi?id=11897\n"));
+               enabled = false;
+       }
+       TALLOC_FREE(lp_ctx);
+#endif /* ! __ALLOW_MULTI_CHANNEL_SUPPORT */
+       return enabled;
 }
 
-static bool dup_smb2_vec4(TALLOC_CTX *ctx,
-                       struct iovec *outvec,
-                       const struct iovec *srcvec)
+static NTSTATUS smbXsrv_connection_get_rto_usecs(struct smbXsrv_connection *xconn,
+                                                uint32_t *_rto_usecs)
 {
-       const uint8_t *srctf;
-       size_t srctf_len;
-       const uint8_t *srchdr;
-       size_t srchdr_len;
-       const uint8_t *srcbody;
-       size_t srcbody_len;
-       const uint8_t *expected_srcbody;
-       const uint8_t *srcdyn;
-       size_t srcdyn_len;
-       const uint8_t *expected_srcdyn;
-       uint8_t *dsttf;
-       uint8_t *dsthdr;
-       uint8_t *dstbody;
-       uint8_t *dstdyn;
+       /*
+        * Define an Retransmission Timeout
+        * of 1 second, if there's no way for the
+        * kernel to tell us the current value.
+        */
+       uint32_t rto_usecs = 1000000;
 
-       srctf  = (const uint8_t *)srcvec[SMBD_SMB2_TF_IOV_OFS].iov_base;
-       srctf_len = srcvec[SMBD_SMB2_TF_IOV_OFS].iov_len;
-       srchdr  = (const uint8_t *)srcvec[SMBD_SMB2_HDR_IOV_OFS].iov_base;
-       srchdr_len = srcvec[SMBD_SMB2_HDR_IOV_OFS].iov_len;
-       srcbody = (const uint8_t *)srcvec[SMBD_SMB2_BODY_IOV_OFS].iov_base;
-       srcbody_len = srcvec[SMBD_SMB2_BODY_IOV_OFS].iov_len;
-       expected_srcbody = srchdr + SMB2_HDR_BODY;
-       srcdyn  = (const uint8_t *)srcvec[SMBD_SMB2_DYN_IOV_OFS].iov_base;
-       srcdyn_len = srcvec[SMBD_SMB2_DYN_IOV_OFS].iov_len;
-       expected_srcdyn = srcbody + 8;
+#ifdef __HAVE_TCP_INFO_RTO
+       {
+               struct tcp_info info;
+               socklen_t ilen = sizeof(info);
+               int ret;
+
+               ZERO_STRUCT(info);
+               ret = getsockopt(xconn->transport.sock,
+                                IPPROTO_TCP, TCP_INFO,
+                                (void *)&info, &ilen);
+               if (ret != 0) {
+                       int saved_errno = errno;
+                       NTSTATUS status = map_nt_error_from_unix(errno);
+                       DBG_ERR("getsockopt(TCP_INFO) errno[%d/%s] -s %s\n",
+                               saved_errno, strerror(saved_errno),
+                               nt_errstr(status));
+                       return status;
+               }
 
-       if ((srctf_len != SMB2_TF_HDR_SIZE) && (srctf_len != 0)) {
-               return false;
+               DBG_DEBUG("tcpi_rto[%u] tcpi_rtt[%u] tcpi_rttvar[%u]\n",
+                         (unsigned)info.tcpi_rto,
+                         (unsigned)info.tcpi_rtt,
+                         (unsigned)info.tcpi_rttvar);
+               rto_usecs = info.tcpi_rto;
        }
+#endif /* __HAVE_TCP_INFO_RTO */
 
-       if (srchdr_len != SMB2_HDR_BODY) {
-               return false;
-       }
+       rto_usecs = MAX(rto_usecs,  200000); /* at least 0.2s */
+       rto_usecs = MIN(rto_usecs, 1000000); /* at max   1.0s */
+       *_rto_usecs = rto_usecs;
+       return NT_STATUS_OK;
+}
 
-       if (srctf_len == SMB2_TF_HDR_SIZE) {
-               dsttf = talloc_memdup(ctx, srctf, SMB2_TF_HDR_SIZE);
-               if (dsttf == NULL) {
-                       return false;
-               }
-       } else {
-               dsttf = NULL;
-       }
-       outvec[SMBD_SMB2_TF_IOV_OFS].iov_base = (void *)dsttf;
-       outvec[SMBD_SMB2_TF_IOV_OFS].iov_len = srctf_len;
+static NTSTATUS smbXsrv_connection_get_acked_bytes(struct smbXsrv_connection *xconn,
+                                                  uint64_t *_acked_bytes)
+{
+       /*
+        * Unless the kernel has an interface
+        * to reveal the number of un-acked bytes
+        * in the socket send queue, we'll assume
+        * everything is already acked.
+        *
+        * But that would mean that we better don't
+        * pretent to support multi-channel.
+        */
+       uint64_t unacked_bytes = 0;
 
-       /* vec[SMBD_SMB2_HDR_IOV_OFS] is always boilerplate and must
-        * be allocated with size OUTVEC_ALLOC_SIZE. */
+       *_acked_bytes = 0;
 
-       dsthdr = talloc_memdup(ctx, srchdr, OUTVEC_ALLOC_SIZE);
-       if (dsthdr == NULL) {
-               return false;
+       if (xconn->ack.force_unacked_timeout) {
+               /*
+                * Smbtorture tries to test channel failures...
+                * Just pretend nothing was acked...
+                */
+               DBG_INFO("Simulating channel failure: "
+                        "xconn->ack.unacked_bytes[%llu]\n",
+                        (unsigned long long)xconn->ack.unacked_bytes);
+               return NT_STATUS_OK;
        }
-       outvec[SMBD_SMB2_HDR_IOV_OFS].iov_base = (void *)dsthdr;
-       outvec[SMBD_SMB2_HDR_IOV_OFS].iov_len = SMB2_HDR_BODY;
 
-       /*
-        * If this is a "standard" vec[SMBD_SMB2_BOFY_IOV_OFS] of length 8,
-        * pointing to srcvec[SMBD_SMB2_HDR_IOV_OFS].iov_base + SMB2_HDR_BODY,
-        * then duplicate this. Else use talloc_memdup().
-        */
+#ifdef __IOCTL_SEND_QUEUE_SIZE_OPCODE
+       {
+               int value = 0;
+               int ret;
 
-       if ((srcbody == expected_srcbody) && (srcbody_len == 8)) {
-               dstbody = dsthdr + SMB2_HDR_BODY;
-       } else {
-               dstbody = talloc_memdup(ctx, srcbody, srcbody_len);
-               if (dstbody == NULL) {
-                       return false;
+               /*
+                * If we have kernel support to get
+                * the number of bytes waiting in
+                * the socket's send queue, we
+                * use that in order to find out
+                * the number of unacked bytes.
+                */
+               ret = ioctl(xconn->transport.sock,
+                           __IOCTL_SEND_QUEUE_SIZE_OPCODE,
+                           &value);
+               if (ret != 0) {
+                       int saved_errno = errno;
+                       NTSTATUS status = map_nt_error_from_unix(saved_errno);
+                       DBG_ERR("Failed to get the SEND_QUEUE_SIZE - "
+                               "errno %d (%s) - %s\n",
+                               saved_errno, strerror(saved_errno),
+                               nt_errstr(status));
+                       return status;
                }
-       }
-       outvec[SMBD_SMB2_BODY_IOV_OFS].iov_base = (void *)dstbody;
-       outvec[SMBD_SMB2_BODY_IOV_OFS].iov_len = srcbody_len;
-
-       /*
-        * If this is a "standard" vec[SMBD_SMB2_DYN_IOV_OFS] of length 1,
-        * pointing to
-        * srcvec[SMBD_SMB2_HDR_IOV_OFS].iov_base + 8
-        * then duplicate this. Else use talloc_memdup().
-        */
 
-       if ((srcdyn == expected_srcdyn) && (srcdyn_len == 1)) {
-               dstdyn = dsthdr + SMB2_HDR_BODY + 8;
-       } else if (srcdyn == NULL) {
-               dstdyn = NULL;
-       } else {
-               dstdyn = talloc_memdup(ctx, srcdyn, srcdyn_len);
-               if (dstdyn == NULL) {
-                       return false;
+               if (value < 0) {
+                       DBG_ERR("xconn->ack.unacked_bytes[%llu] value[%d]\n",
+                               (unsigned long long)xconn->ack.unacked_bytes,
+                               value);
+                       return NT_STATUS_INTERNAL_ERROR;
                }
+               unacked_bytes = value;
+       }
+#endif
+       if (xconn->ack.unacked_bytes == 0) {
+               xconn->ack.unacked_bytes = unacked_bytes;
+               return NT_STATUS_OK;
        }
-       outvec[SMBD_SMB2_DYN_IOV_OFS].iov_base = (void *)dstdyn;
-       outvec[SMBD_SMB2_DYN_IOV_OFS].iov_len = srcdyn_len;
 
-       return true;
+       if (xconn->ack.unacked_bytes < unacked_bytes) {
+               DBG_ERR("xconn->ack.unacked_bytes[%llu] unacked_bytes[%llu]\n",
+                       (unsigned long long)xconn->ack.unacked_bytes,
+                       (unsigned long long)unacked_bytes);
+               return NT_STATUS_INTERNAL_ERROR;
+       }
+
+       *_acked_bytes = xconn->ack.unacked_bytes - unacked_bytes;
+       xconn->ack.unacked_bytes = unacked_bytes;
+       return NT_STATUS_OK;
 }
 
-static struct smbd_smb2_request *dup_smb2_req(const struct smbd_smb2_request *req)
+static void smbd_smb2_send_queue_ack_fail(struct smbd_smb2_send_queue **queue,
+                                         NTSTATUS status)
 {
-       struct smbd_smb2_request *newreq = NULL;
-       struct iovec *outvec = NULL;
-       int count = req->out.vector_count;
-       int i;
-       bool ok;
+       struct smbd_smb2_send_queue *e = NULL;
+       struct smbd_smb2_send_queue *n = NULL;
 
-       newreq = smbd_smb2_request_allocate(req->xconn);
-       if (!newreq) {
-               return NULL;
+       for (e = *queue; e != NULL; e = n) {
+               n = e->next;
+
+               DLIST_REMOVE(*queue, e);
+               if (e->ack.req != NULL) {
+                       tevent_req_nterror(e->ack.req, status);
+               }
        }
+}
 
-       newreq->sconn = req->sconn;
-       newreq->xconn = req->xconn;
-       newreq->session = req->session;
-       newreq->do_encryption = req->do_encryption;
-       newreq->do_signing = req->do_signing;
-       newreq->current_idx = req->current_idx;
+static NTSTATUS smbd_smb2_send_queue_ack_bytes(struct smbd_smb2_send_queue **queue,
+                                              uint64_t acked_bytes)
+{
+       struct smbd_smb2_send_queue *e = NULL;
+       struct smbd_smb2_send_queue *n = NULL;
 
-       outvec = talloc_zero_array(newreq, struct iovec, count);
-       if (!outvec) {
-               TALLOC_FREE(newreq);
-               return NULL;
-       }
-       newreq->out.vector = outvec;
-       newreq->out.vector_count = count;
+       for (e = *queue; e != NULL; e = n) {
+               bool expired;
 
-       /* Setup the outvec's identically to req. */
-       outvec[0].iov_base = newreq->out.nbt_hdr;
-       outvec[0].iov_len = 4;
-       memcpy(newreq->out.nbt_hdr, req->out.nbt_hdr, 4);
+               n = e->next;
 
-       /* Setup the vectors identically to the ones in req. */
-       for (i = 1; i < count; i += SMBD_SMB2_NUM_IOV_PER_REQ) {
-               if (!dup_smb2_vec4(outvec, &outvec[i], &req->out.vector[i])) {
-                       break;
+               if (e->ack.req == NULL) {
+                       continue;
                }
-       }
 
-       if (i < count) {
-               /* Alloc failed. */
-               TALLOC_FREE(newreq);
-               return NULL;
-       }
+               if (e->ack.required_acked_bytes <= acked_bytes) {
+                       e->ack.required_acked_bytes = 0;
+                       DLIST_REMOVE(*queue, e);
+                       tevent_req_done(e->ack.req);
+                       continue;
+               }
+               e->ack.required_acked_bytes -= acked_bytes;
 
-       ok = smb2_setup_nbt_length(newreq->out.vector,
-                                  newreq->out.vector_count);
-       if (!ok) {
-               TALLOC_FREE(newreq);
-               return NULL;
+               expired = timeval_expired(&e->ack.timeout);
+               if (expired) {
+                       return NT_STATUS_IO_TIMEOUT;
+               }
        }
 
-       return newreq;
+       return NT_STATUS_OK;
 }
 
-static NTSTATUS smb2_send_async_interim_response(const struct smbd_smb2_request *req)
+static NTSTATUS smbd_smb2_check_ack_queue(struct smbXsrv_connection *xconn)
 {
-       struct smbXsrv_connection *xconn = req->xconn;
-       int first_idx = 1;
-       struct iovec *firsttf = NULL;
-       struct iovec *outhdr_v = NULL;
-       uint8_t *outhdr = NULL;
-       struct smbd_smb2_request *nreq = NULL;
+       uint64_t acked_bytes = 0;
        NTSTATUS status;
-       bool ok;
 
-       /* Create a new smb2 request we'll use
-          for the interim return. */
-       nreq = dup_smb2_req(req);
-       if (!nreq) {
-               return NT_STATUS_NO_MEMORY;
+       status = smbXsrv_connection_get_acked_bytes(xconn, &acked_bytes);
+       if (!NT_STATUS_IS_OK(status)) {
+               return status;
        }
 
-       /* Lose the last X out vectors. They're the
-          ones we'll be using for the async reply. */
+       status = smbd_smb2_send_queue_ack_bytes(&xconn->ack.queue, acked_bytes);
+       if (!NT_STATUS_IS_OK(status)) {
+               return status;
+       }
+
+       status = smbd_smb2_send_queue_ack_bytes(&xconn->smb2.send_queue, 0);
+       if (!NT_STATUS_IS_OK(status)) {
+               return status;
+       }
+
+       return NT_STATUS_OK;
+}
+
+static void smbXsrv_connection_ack_checker(struct tevent_req *subreq)
+{
+       struct smbXsrv_connection *xconn =
+               tevent_req_callback_data(subreq,
+               struct smbXsrv_connection);
+       struct smbXsrv_client *client = xconn->client;
+       struct timeval next_check;
+       NTSTATUS status;
+       bool ok;
+
+       xconn->ack.checker_subreq = NULL;
+
+       ok = tevent_wakeup_recv(subreq);
+       TALLOC_FREE(subreq);
+       if (!ok) {
+               smbd_server_connection_terminate(xconn,
+                                                "tevent_wakeup_recv() failed");
+               return;
+       }
+
+       status = smbd_smb2_check_ack_queue(xconn);
+       if (!NT_STATUS_IS_OK(status)) {
+               smbd_server_connection_terminate(xconn, nt_errstr(status));
+               return;
+       }
+
+       next_check = timeval_current_ofs_usec(xconn->ack.rto_usecs);
+       xconn->ack.checker_subreq = tevent_wakeup_send(xconn,
+                                                      client->raw_ev_ctx,
+                                                      next_check);
+       if (xconn->ack.checker_subreq == NULL) {
+               smbd_server_connection_terminate(xconn,
+                                                "tevent_wakeup_send() failed");
+               return;
+       }
+       tevent_req_set_callback(xconn->ack.checker_subreq,
+                               smbXsrv_connection_ack_checker,
+                               xconn);
+}
+
+static NTSTATUS smbXsrv_client_pending_breaks_updated(struct smbXsrv_client *client)
+{
+       struct smbXsrv_connection *xconn = NULL;
+
+       for (xconn = client->connections; xconn != NULL; xconn = xconn->next) {
+               struct timeval next_check;
+               uint64_t acked_bytes = 0;
+               NTSTATUS status;
+
+               /*
+                * A new 'pending break cycle' starts
+                * with a first pending break and lasts until
+                * all pending breaks are finished.
+                *
+                * This is typically a very short time,
+                * the value of one retransmission timeout.
+                */
+
+               if (client->pending_breaks == NULL) {
+                       /*
+                        * No more pending breaks, remove a pending
+                        * checker timer
+                        */
+                       TALLOC_FREE(xconn->ack.checker_subreq);
+                       continue;
+               }
+
+               if (xconn->ack.checker_subreq != NULL) {
+                       /*
+                        * The cycle already started =>
+                        * nothing todo
+                        */
+                       continue;
+               }
+
+               /*
+                * Get the current retransmission timeout value.
+                *
+                * It may change over time, but fetching it once
+                * per 'pending break' cycled should be enough.
+                */
+               status = smbXsrv_connection_get_rto_usecs(xconn,
+                                                         &xconn->ack.rto_usecs);
+               if (!NT_STATUS_IS_OK(status)) {
+                       return status;
+               }
+
+               /*
+                * At the start of the cycle we reset the
+                * unacked_bytes counter (first to 0 and
+                * within smbXsrv_connection_get_acked_bytes()
+                * to the current value in the kernel
+                * send queue.
+                */
+               xconn->ack.unacked_bytes = 0;
+               status = smbXsrv_connection_get_acked_bytes(xconn, &acked_bytes);
+               if (!NT_STATUS_IS_OK(status)) {
+                       return status;
+               }
+
+               /*
+                * We setup a timer in order to check for
+                * acked bytes after one retransmission timeout.
+                *
+                * The code that sets up the send_queue.ack.timeout
+                * uses a multiple of the retransmission timeout.
+                */
+               next_check = timeval_current_ofs_usec(xconn->ack.rto_usecs);
+               xconn->ack.checker_subreq = tevent_wakeup_send(xconn,
+                                                       client->raw_ev_ctx,
+                                                       next_check);
+               if (xconn->ack.checker_subreq == NULL) {
+                       return NT_STATUS_NO_MEMORY;
+               }
+               tevent_req_set_callback(xconn->ack.checker_subreq,
+                                       smbXsrv_connection_ack_checker,
+                                       xconn);
+       }
+
+       return NT_STATUS_OK;
+}
+
+void smbXsrv_connection_disconnect_transport(struct smbXsrv_connection *xconn,
+                                            NTSTATUS status)
+{
+       if (!NT_STATUS_IS_OK(xconn->transport.status)) {
+               return;
+       }
+
+       xconn->transport.status = status;
+       TALLOC_FREE(xconn->transport.fde);
+       if (xconn->transport.sock != -1) {
+               xconn->transport.sock = -1;
+       }
+       smbd_smb2_send_queue_ack_fail(&xconn->ack.queue, status);
+       smbd_smb2_send_queue_ack_fail(&xconn->smb2.send_queue, status);
+       xconn->smb2.send_queue_len = 0;
+       DO_PROFILE_INC(disconnect);
+}
+
+size_t smbXsrv_client_valid_connections(struct smbXsrv_client *client)
+{
+       struct smbXsrv_connection *xconn = NULL;
+       size_t num_ok = 0;
+
+       for (xconn = client->connections; xconn != NULL; xconn = xconn->next) {
+               if (NT_STATUS_IS_OK(xconn->transport.status)) {
+                       num_ok++;
+               }
+       }
+
+       return num_ok;
+}
+
+struct smbXsrv_connection_shutdown_state {
+       struct smbXsrv_connection *xconn;
+};
+
+static void smbXsrv_connection_shutdown_wait_done(struct tevent_req *subreq);
+
+static struct tevent_req *smbXsrv_connection_shutdown_send(TALLOC_CTX *mem_ctx,
+                                       struct tevent_context *ev,
+                                       struct smbXsrv_connection *xconn)
+{
+       struct tevent_req *req = NULL;
+       struct smbXsrv_connection_shutdown_state *state = NULL;
+       struct tevent_req *subreq = NULL;
+       size_t len = 0;
+       struct smbd_smb2_request *preq = NULL;
+       NTSTATUS status;
+
+       /*
+        * The caller should have called
+        * smbXsrv_connection_disconnect_transport() before.
+        */
+       SMB_ASSERT(!NT_STATUS_IS_OK(xconn->transport.status));
+       SMB_ASSERT(xconn->transport.terminating);
+       SMB_ASSERT(xconn->transport.shutdown_wait_queue == NULL);
+
+       req = tevent_req_create(mem_ctx, &state,
+                               struct smbXsrv_connection_shutdown_state);
+       if (req == NULL) {
+               return NULL;
+       }
+
+       state->xconn = xconn;
+       tevent_req_defer_callback(req, ev);
+
+       xconn->transport.shutdown_wait_queue =
+               tevent_queue_create(state, "smbXsrv_connection_shutdown_queue");
+       if (tevent_req_nomem(xconn->transport.shutdown_wait_queue, req)) {
+               return tevent_req_post(req, ev);
+       }
+
+       for (preq = xconn->smb2.requests; preq != NULL; preq = preq->next) {
+               /*
+                * Now wait until the request is finished.
+                *
+                * We don't set a callback, as we just want to block the
+                * wait queue and the talloc_free() of the request will
+                * remove the item from the wait queue.
+                *
+                * Note that we don't cancel the requests here
+                * in order to keep the replay detection logic correct.
+                *
+                * However if we teardown the last channel of
+                * a connection, we'll call some logic via
+                * smbXsrv_session_disconnect_xconn()
+                * -> smbXsrv_session_disconnect_xconn_callback()
+                *   -> smbXsrv_session_remove_channel()
+                *     -> smb2srv_session_shutdown_send()
+                * will indeed cancel the request.
+                */
+               subreq = tevent_queue_wait_send(preq, ev,
+                                       xconn->transport.shutdown_wait_queue);
+               if (tevent_req_nomem(subreq, req)) {
+                       return tevent_req_post(req, ev);
+               }
+       }
+
+       /*
+        * This may attach sessions with num_channels == 0
+        * to xconn->transport.shutdown_wait_queue.
+        */
+       status = smbXsrv_session_disconnect_xconn(xconn);
+       if (tevent_req_nterror(req, status)) {
+               return tevent_req_post(req, ev);
+       }
+
+       len = tevent_queue_length(xconn->transport.shutdown_wait_queue);
+       if (len == 0) {
+               tevent_req_done(req);
+               return tevent_req_post(req, ev);
+       }
+
+       /*
+        * Now we add our own waiter to the end of the queue,
+        * this way we get notified when all pending requests are finished
+        * and send to the socket.
+        */
+       subreq = tevent_queue_wait_send(state, ev, xconn->transport.shutdown_wait_queue);
+       if (tevent_req_nomem(subreq, req)) {
+               return tevent_req_post(req, ev);
+       }
+       tevent_req_set_callback(subreq, smbXsrv_connection_shutdown_wait_done, req);
+
+       return req;
+}
+
+static void smbXsrv_connection_shutdown_wait_done(struct tevent_req *subreq)
+{
+       struct tevent_req *req =
+               tevent_req_callback_data(subreq,
+               struct tevent_req);
+       struct smbXsrv_connection_shutdown_state *state =
+               tevent_req_data(req,
+               struct smbXsrv_connection_shutdown_state);
+       struct smbXsrv_connection *xconn = state->xconn;
+
+       tevent_queue_wait_recv(subreq);
+       TALLOC_FREE(subreq);
+
+       tevent_req_done(req);
+       /*
+        * make sure the xconn pointer is still valid,
+        * it should as we used tevent_req_defer_callback()
+        */
+       SMB_ASSERT(xconn->transport.terminating);
+}
+
+static NTSTATUS smbXsrv_connection_shutdown_recv(struct tevent_req *req)
+{
+       struct smbXsrv_connection_shutdown_state *state =
+               tevent_req_data(req,
+               struct smbXsrv_connection_shutdown_state);
+       struct smbXsrv_connection *xconn = state->xconn;
+       /*
+        * make sure the xconn pointer is still valid,
+        * it should as we used tevent_req_defer_callback()
+        */
+       SMB_ASSERT(xconn->transport.terminating);
+       return tevent_req_simple_recv_ntstatus(req);
+}
+
+static void smbd_server_connection_terminate_done(struct tevent_req *subreq)
+{
+       struct smbXsrv_connection *xconn =
+               tevent_req_callback_data(subreq,
+               struct smbXsrv_connection);
+       struct smbXsrv_client *client = xconn->client;
+       NTSTATUS status;
+
+       status = smbXsrv_connection_shutdown_recv(subreq);
+       TALLOC_FREE(subreq);
+       if (!NT_STATUS_IS_OK(status)) {
+               exit_server("smbXsrv_connection_shutdown_recv failed");
+       }
+
+       DLIST_REMOVE(client->connections, xconn);
+       TALLOC_FREE(xconn);
+}
+
+void smbd_server_connection_terminate_ex(struct smbXsrv_connection *xconn,
+                                        const char *reason,
+                                        const char *location)
+{
+       struct smbXsrv_client *client = xconn->client;
+       size_t num_ok = 0;
+
+       /*
+        * Make sure that no new request will be able to use this session.
+        *
+        * smbXsrv_connection_disconnect_transport() might be called already,
+        * but calling it again is a no-op.
+        */
+       smbXsrv_connection_disconnect_transport(xconn,
+                                       NT_STATUS_CONNECTION_DISCONNECTED);
+
+       num_ok = smbXsrv_client_valid_connections(client);
+
+       if (xconn->transport.terminating) {
+               DBG_DEBUG("skip recursion conn[%s] num_ok[%zu] reason[%s] at %s\n",
+                         smbXsrv_connection_dbg(xconn), num_ok,
+                         reason, location);
+               return;
+       }
+       xconn->transport.terminating = true;
+
+       DBG_DEBUG("conn[%s] num_ok[%zu] reason[%s] at %s\n",
+                 smbXsrv_connection_dbg(xconn), num_ok,
+                 reason, location);
+
+       if (xconn->has_cluster_movable_ip) {
+               /*
+                * If the connection has a movable cluster public address
+                * we disconnect all client connections,
+                * as the public address might be moved to
+                * a different node.
+                *
+                * In future we may recheck which node currently
+                * holds this address, but for now we keep it simple.
+                */
+               smbd_server_disconnect_client_ex(xconn->client,
+                                                reason,
+                                                location);
+               return;
+       }
+
+       if (num_ok != 0) {
+               struct tevent_req *subreq = NULL;
+
+               subreq = smbXsrv_connection_shutdown_send(client,
+                                                         client->raw_ev_ctx,
+                                                         xconn);
+               if (subreq == NULL) {
+                       exit_server("smbXsrv_connection_shutdown_send failed");
+               }
+               tevent_req_set_callback(subreq,
+                                       smbd_server_connection_terminate_done,
+                                       xconn);
+               return;
+       }
+
+       /*
+        * The last connection was disconnected
+        */
+       exit_server_cleanly(reason);
+}
+
+void smbd_server_disconnect_client_ex(struct smbXsrv_client *client,
+                                     const char *reason,
+                                     const char *location)
+{
+       size_t num_ok = 0;
+
+       num_ok = smbXsrv_client_valid_connections(client);
+
+       DBG_WARNING("client[%s] num_ok[%zu] reason[%s] at %s\n",
+                   client->global->remote_address, num_ok,
+                   reason, location);
+
+       /*
+        * Something bad happened we need to disconnect all connections.
+        */
+       exit_server_cleanly(reason);
+}
+
+static bool dup_smb2_vec4(TALLOC_CTX *ctx,
+                       struct iovec *outvec,
+                       const struct iovec *srcvec)
+{
+       const uint8_t *srctf;
+       size_t srctf_len;
+       const uint8_t *srchdr;
+       size_t srchdr_len;
+       const uint8_t *srcbody;
+       size_t srcbody_len;
+       const uint8_t *expected_srcbody;
+       const uint8_t *srcdyn;
+       size_t srcdyn_len;
+       const uint8_t *expected_srcdyn;
+       uint8_t *dsttf;
+       uint8_t *dsthdr;
+       uint8_t *dstbody;
+       uint8_t *dstdyn;
+
+       srctf  = (const uint8_t *)srcvec[SMBD_SMB2_TF_IOV_OFS].iov_base;
+       srctf_len = srcvec[SMBD_SMB2_TF_IOV_OFS].iov_len;
+       srchdr  = (const uint8_t *)srcvec[SMBD_SMB2_HDR_IOV_OFS].iov_base;
+       srchdr_len = srcvec[SMBD_SMB2_HDR_IOV_OFS].iov_len;
+       srcbody = (const uint8_t *)srcvec[SMBD_SMB2_BODY_IOV_OFS].iov_base;
+       srcbody_len = srcvec[SMBD_SMB2_BODY_IOV_OFS].iov_len;
+       expected_srcbody = srchdr + SMB2_HDR_BODY;
+       srcdyn  = (const uint8_t *)srcvec[SMBD_SMB2_DYN_IOV_OFS].iov_base;
+       srcdyn_len = srcvec[SMBD_SMB2_DYN_IOV_OFS].iov_len;
+       expected_srcdyn = srcbody + 8;
+
+       if ((srctf_len != SMB2_TF_HDR_SIZE) && (srctf_len != 0)) {
+               return false;
+       }
+
+       if (srchdr_len != SMB2_HDR_BODY) {
+               return false;
+       }
+
+       if (srctf_len == SMB2_TF_HDR_SIZE) {
+               dsttf = talloc_memdup(ctx, srctf, SMB2_TF_HDR_SIZE);
+               if (dsttf == NULL) {
+                       return false;
+               }
+       } else {
+               dsttf = NULL;
+       }
+       outvec[SMBD_SMB2_TF_IOV_OFS].iov_base = (void *)dsttf;
+       outvec[SMBD_SMB2_TF_IOV_OFS].iov_len = srctf_len;
+
+       /* vec[SMBD_SMB2_HDR_IOV_OFS] is always boilerplate and must
+        * be allocated with size OUTVEC_ALLOC_SIZE. */
+
+       dsthdr = talloc_memdup(ctx, srchdr, OUTVEC_ALLOC_SIZE);
+       if (dsthdr == NULL) {
+               return false;
+       }
+       outvec[SMBD_SMB2_HDR_IOV_OFS].iov_base = (void *)dsthdr;
+       outvec[SMBD_SMB2_HDR_IOV_OFS].iov_len = SMB2_HDR_BODY;
+
+       /*
+        * If this is a "standard" vec[SMBD_SMB2_BOFY_IOV_OFS] of length 8,
+        * pointing to srcvec[SMBD_SMB2_HDR_IOV_OFS].iov_base + SMB2_HDR_BODY,
+        * then duplicate this. Else use talloc_memdup().
+        */
+
+       if ((srcbody == expected_srcbody) && (srcbody_len == 8)) {
+               dstbody = dsthdr + SMB2_HDR_BODY;
+       } else {
+               dstbody = talloc_memdup(ctx, srcbody, srcbody_len);
+               if (dstbody == NULL) {
+                       return false;
+               }
+       }
+       outvec[SMBD_SMB2_BODY_IOV_OFS].iov_base = (void *)dstbody;
+       outvec[SMBD_SMB2_BODY_IOV_OFS].iov_len = srcbody_len;
+
+       /*
+        * If this is a "standard" vec[SMBD_SMB2_DYN_IOV_OFS] of length 1,
+        * pointing to
+        * srcvec[SMBD_SMB2_HDR_IOV_OFS].iov_base + 8
+        * then duplicate this. Else use talloc_memdup().
+        */
+
+       if ((srcdyn == expected_srcdyn) && (srcdyn_len == 1)) {
+               dstdyn = dsthdr + SMB2_HDR_BODY + 8;
+       } else if (srcdyn == NULL) {
+               dstdyn = NULL;
+       } else {
+               dstdyn = talloc_memdup(ctx, srcdyn, srcdyn_len);
+               if (dstdyn == NULL) {
+                       return false;
+               }
+       }
+       outvec[SMBD_SMB2_DYN_IOV_OFS].iov_base = (void *)dstdyn;
+       outvec[SMBD_SMB2_DYN_IOV_OFS].iov_len = srcdyn_len;
+
+       return true;
+}
+
+static struct smbd_smb2_request *dup_smb2_req(const struct smbd_smb2_request *req)
+{
+       struct smbd_smb2_request *newreq = NULL;
+       struct iovec *outvec = NULL;
+       int count = req->out.vector_count;
+       int i;
+       bool ok;
+
+       newreq = smbd_smb2_request_allocate(req->xconn);
+       if (!newreq) {
+               return NULL;
+       }
+
+       newreq->session = req->session;
+       newreq->do_encryption = req->do_encryption;
+       newreq->do_signing = req->do_signing;
+       newreq->current_idx = req->current_idx;
+
+       outvec = talloc_zero_array(newreq, struct iovec, count);
+       if (!outvec) {
+               TALLOC_FREE(newreq);
+               return NULL;
+       }
+       newreq->out.vector = outvec;
+       newreq->out.vector_count = count;
+
+       /* Setup the outvec's identically to req. */
+       outvec[0].iov_base = newreq->out.nbt_hdr;
+       outvec[0].iov_len = 4;
+       memcpy(newreq->out.nbt_hdr, req->out.nbt_hdr, 4);
+
+       /* Setup the vectors identically to the ones in req. */
+       for (i = 1; i < count; i += SMBD_SMB2_NUM_IOV_PER_REQ) {
+               if (!dup_smb2_vec4(outvec, &outvec[i], &req->out.vector[i])) {
+                       break;
+               }
+       }
+
+       if (i < count) {
+               /* Alloc failed. */
+               TALLOC_FREE(newreq);
+               return NULL;
+       }
+
+       ok = smb2_setup_nbt_length(newreq->out.vector,
+                                  newreq->out.vector_count);
+       if (!ok) {
+               TALLOC_FREE(newreq);
+               return NULL;
+       }
+
+       return newreq;
+}
+
+static NTSTATUS smb2_send_async_interim_response(const struct smbd_smb2_request *req)
+{
+       struct smbXsrv_connection *xconn = req->xconn;
+       int first_idx = 1;
+       struct iovec *firsttf = NULL;
+       struct iovec *outhdr_v = NULL;
+       uint8_t *outhdr = NULL;
+       struct smbd_smb2_request *nreq = NULL;
+       NTSTATUS status;
+       bool ok;
+
+       /* Create a new smb2 request we'll use
+          for the interim return. */
+       nreq = dup_smb2_req(req);
+       if (!nreq) {
+               return NT_STATUS_NO_MEMORY;
+       }
+
+       /* Lose the last X out vectors. They're the
+          ones we'll be using for the async reply. */
        nreq->out.vector_count -= SMBD_SMB2_NUM_IOV_PER_REQ;
 
        ok = smb2_setup_nbt_length(nreq->out.vector,
@@ -1313,16 +2017,14 @@ static NTSTATUS smb2_send_async_interim_response(const struct smbd_smb2_request
         * we need to sign/encrypt here with the last/first key we remembered
         */
        if (firsttf->iov_len == SMB2_TF_HDR_SIZE) {
-               status = smb2_signing_encrypt_pdu(req->first_key,
-                                       xconn->smb2.server.cipher,
+               status = smb2_signing_encrypt_pdu(req->first_enc_key,
                                        firsttf,
                                        nreq->out.vector_count - first_idx);
                if (!NT_STATUS_IS_OK(status)) {
                        return status;
                }
-       } else if (req->last_key.length > 0) {
-               status = smb2_signing_sign_pdu(req->last_key,
-                                              xconn->protocol,
+       } else if (smb2_signing_key_valid(req->last_sign_key)) {
+               status = smb2_signing_sign_pdu(req->last_sign_key,
                                               outhdr_v,
                                               SMBD_SMB2_NUM_IOV_PER_REQ - 1);
                if (!NT_STATUS_IS_OK(status)) {
@@ -1390,7 +2092,7 @@ NTSTATUS smbd_smb2_request_pending_queue(struct smbd_smb2_request *req,
                return NT_STATUS_OK;
        }
 
-       if (req->async_internal) {
+       if (req->async_internal || defer_time == 0) {
                /*
                 * An SMB2 request implementation wants to handle the request
                 * asynchronously "internally" while keeping synchronous
@@ -1445,9 +2147,7 @@ NTSTATUS smbd_smb2_request_pending_queue(struct smbd_smb2_request *req,
                if (!NT_STATUS_IS_OK(status)) {
                        return status;
                }
-               if (req->first_key.length > 0) {
-                       data_blob_clear_free(&req->first_key);
-               }
+               TALLOC_FREE(req->first_enc_key);
 
                req->current_idx = 1;
 
@@ -1478,9 +2178,7 @@ NTSTATUS smbd_smb2_request_pending_queue(struct smbd_smb2_request *req,
                        SIVAL(outhdr, SMB2_HDR_FLAGS, flags);
                }
        }
-       if (req->last_key.length > 0) {
-               data_blob_clear_free(&req->last_key);
-       }
+       TALLOC_FREE(req->last_sign_key);
 
        /*
         * smbd_smb2_request_pending_timer() just send a packet
@@ -1500,20 +2198,29 @@ NTSTATUS smbd_smb2_request_pending_queue(struct smbd_smb2_request *req,
        return NT_STATUS_OK;
 }
 
-static DATA_BLOB smbd_smb2_signing_key(struct smbXsrv_session *session,
-                                      struct smbXsrv_connection *xconn)
+static
+struct smb2_signing_key *smbd_smb2_signing_key(struct smbXsrv_session *session,
+                                              struct smbXsrv_connection *xconn,
+                                              bool *_has_channel)
 {
        struct smbXsrv_channel_global0 *c = NULL;
        NTSTATUS status;
-       DATA_BLOB key = data_blob_null;
+       struct smb2_signing_key *key = NULL;
+       bool has_channel = false;
 
        status = smbXsrv_session_find_channel(session, xconn, &c);
        if (NT_STATUS_IS_OK(status)) {
                key = c->signing_key;
+               has_channel = true;
        }
 
-       if (key.length == 0) {
+       if (!smb2_signing_key_valid(key)) {
                key = session->global->signing_key;
+               has_channel = false;
+       }
+
+       if (_has_channel != NULL) {
+               *_has_channel = has_channel;
        }
 
        return key;
@@ -1537,7 +2244,7 @@ static NTSTATUS smb2_get_new_nonce(struct smbXsrv_session *session,
         * nonce wrap, or the security of the whole
         * communication and the keys is destroyed.
         * We must drop the connection once we have
-        * transfered too much data.
+        * transferred too much data.
         *
         * NOTE: We assume nonces greater than 8 bytes.
         */
@@ -1567,15 +2274,11 @@ static void smbd_smb2_request_pending_timer(struct tevent_context *ev,
        uint8_t *outhdr = NULL;
        const uint8_t *inhdr = NULL;
        uint8_t *tf = NULL;
-       size_t tf_len = 0;
        uint8_t *hdr = NULL;
        uint8_t *body = NULL;
        uint8_t *dyn = NULL;
        uint32_t flags = 0;
-       uint64_t session_id = 0;
        uint64_t message_id = 0;
-       uint64_t nonce_high = 0;
-       uint64_t nonce_low = 0;
        uint64_t async_id = 0;
        NTSTATUS status;
        bool ok;
@@ -1587,7 +2290,6 @@ static void smbd_smb2_request_pending_timer(struct tevent_context *ev,
        outhdr = SMBD_SMB2_OUT_HDR_PTR(req);
        flags = IVAL(outhdr, SMB2_HDR_FLAGS);
        message_id = BVAL(outhdr, SMB2_HDR_MESSAGE_ID);
-       session_id = BVAL(outhdr, SMB2_HDR_SESSION_ID);
 
        async_id = message_id; /* keep it simple for now... */
 
@@ -1613,13 +2315,16 @@ static void smbd_smb2_request_pending_timer(struct tevent_context *ev,
        }
 
        tf = state->buf + NBT_HDR_SIZE;
-       tf_len = SMB2_TF_HDR_SIZE;
 
        hdr = tf + SMB2_TF_HDR_SIZE;
        body = hdr + SMB2_HDR_BODY;
        dyn = body + 8;
 
        if (req->do_encryption) {
+               uint64_t nonce_high = 0;
+               uint64_t nonce_low = 0;
+               uint64_t session_id = req->session->global->session_wire_id;
+
                status = smb2_get_new_nonce(req->session,
                                            &nonce_high,
                                            &nonce_low);
@@ -1628,19 +2333,24 @@ static void smbd_smb2_request_pending_timer(struct tevent_context *ev,
                                                         nt_errstr(status));
                        return;
                }
-       }
 
-       SIVAL(tf, SMB2_TF_PROTOCOL_ID, SMB2_TF_MAGIC);
-       SBVAL(tf, SMB2_TF_NONCE+0, nonce_low);
-       SBVAL(tf, SMB2_TF_NONCE+8, nonce_high);
-       SBVAL(tf, SMB2_TF_SESSION_ID, session_id);
+               SIVAL(tf, SMB2_TF_PROTOCOL_ID, SMB2_TF_MAGIC);
+               SBVAL(tf, SMB2_TF_NONCE+0, nonce_low);
+               SBVAL(tf, SMB2_TF_NONCE+8, nonce_high);
+               SBVAL(tf, SMB2_TF_SESSION_ID, session_id);
+       }
 
        SIVAL(hdr, SMB2_HDR_PROTOCOL_ID, SMB2_MAGIC);
        SSVAL(hdr, SMB2_HDR_LENGTH, SMB2_HDR_BODY);
        SSVAL(hdr, SMB2_HDR_EPOCH, 0);
-       SIVAL(hdr, SMB2_HDR_STATUS, NT_STATUS_V(STATUS_PENDING));
+       SIVAL(hdr, SMB2_HDR_STATUS, NT_STATUS_V(NT_STATUS_PENDING));
        SSVAL(hdr, SMB2_HDR_OPCODE, SVAL(outhdr, SMB2_HDR_OPCODE));
 
+       /*
+        * The STATUS_PENDING response has SMB2_HDR_FLAG_SIGNED
+        * clearedm, but echoes the signature field.
+        */
+       flags &= ~SMB2_HDR_FLAG_SIGNED;
        SIVAL(hdr, SMB2_HDR_FLAGS, flags);
        SIVAL(hdr, SMB2_HDR_NEXT_COMMAND, 0);
        SBVAL(hdr, SMB2_HDR_MESSAGE_ID, message_id);
@@ -1663,7 +2373,8 @@ static void smbd_smb2_request_pending_timer(struct tevent_context *ev,
 
        if (req->do_encryption) {
                state->vector[1+SMBD_SMB2_TF_IOV_OFS].iov_base   = tf;
-               state->vector[1+SMBD_SMB2_TF_IOV_OFS].iov_len    = tf_len;
+               state->vector[1+SMBD_SMB2_TF_IOV_OFS].iov_len    =
+                                                       SMB2_TF_HDR_SIZE;
        } else {
                state->vector[1+SMBD_SMB2_TF_IOV_OFS].iov_base   = NULL;
                state->vector[1+SMBD_SMB2_TF_IOV_OFS].iov_len    = 0;
@@ -1693,6 +2404,10 @@ static void smbd_smb2_request_pending_timer(struct tevent_context *ev,
                        SMBD_SMB2_IN_HDR_IOV(req),
                        &state->vector[1+SMBD_SMB2_HDR_IOV_OFS]);
 
+       /*
+        * We add SMB2_HDR_FLAG_ASYNC after smb2_set_operation_credit()
+        * as it reacts on it
+        */
        SIVAL(hdr, SMB2_HDR_FLAGS, flags | SMB2_HDR_FLAG_ASYNC);
 
        if (DEBUGLVL(10)) {
@@ -1708,10 +2423,9 @@ static void smbd_smb2_request_pending_timer(struct tevent_context *ev,
 
        if (req->do_encryption) {
                struct smbXsrv_session *x = req->session;
-               DATA_BLOB encryption_key = x->global->encryption_key;
+               struct smb2_signing_key *encryption_key = x->global->encryption_key;
 
                status = smb2_signing_encrypt_pdu(encryption_key,
-                                       xconn->smb2.server.cipher,
                                        &state->vector[1+SMBD_SMB2_TF_IOV_OFS],
                                        SMBD_SMB2_NUM_IOV_PER_REQ);
                if (!NT_STATUS_IS_OK(status)) {
@@ -1719,19 +2433,6 @@ static void smbd_smb2_request_pending_timer(struct tevent_context *ev,
                                                nt_errstr(status));
                        return;
                }
-       } else if (req->do_signing) {
-               struct smbXsrv_session *x = req->session;
-               DATA_BLOB signing_key = smbd_smb2_signing_key(x, xconn);
-
-               status = smb2_signing_sign_pdu(signing_key,
-                                       xconn->protocol,
-                                       &state->vector[1+SMBD_SMB2_HDR_IOV_OFS],
-                                       SMBD_SMB2_NUM_IOV_PER_REQ - 1);
-               if (!NT_STATUS_IS_OK(status)) {
-                       smbd_server_connection_terminate(xconn,
-                                               nt_errstr(status));
-                       return;
-               }
        }
 
        state->queue_entry.mem_ctx = state;
@@ -1777,6 +2478,10 @@ static NTSTATUS smbd_smb2_request_process_cancel(struct smbd_smb2_request *req)
                uint64_t message_id;
                uint64_t async_id;
 
+               if (cur->session != req->session) {
+                       continue;
+               }
+
                if (cur->compound_related) {
                        /*
                         * Never cancel anything in a compound request.
@@ -1849,7 +2554,10 @@ static NTSTATUS smbd_smb2_request_check_tcon(struct smbd_smb2_request *req)
                return status;
        }
 
-       if (!change_to_user(tcon->compat, req->session->compat->vuid)) {
+       if (!change_to_user_and_service(
+                   tcon->compat,
+                   req->session->global->session_wire_id))
+       {
                return NT_STATUS_ACCESS_DENIED;
        }
 
@@ -1913,6 +2621,28 @@ static NTSTATUS smbd_smb2_request_check_session(struct smbd_smb2_request *req)
                req->session = session;
                req->last_session_id = in_session_id;
        }
+       if (NT_STATUS_EQUAL(status, NT_STATUS_USER_SESSION_DELETED)) {
+               switch (in_opcode) {
+               case SMB2_OP_SESSSETUP:
+                       status = smb2srv_session_lookup_global(req->xconn->client,
+                                                              in_session_id,
+                                                              req,
+                                                              &session);
+                       if (NT_STATUS_IS_OK(status)) {
+                               /*
+                                * We fallback to a session of
+                                * another process in order to
+                                * get the signing correct.
+                                *
+                                * We don't set req->last_session_id here.
+                                */
+                               req->session = session;
+                       }
+                       break;
+               default:
+                       break;
+               }
+       }
        if (NT_STATUS_EQUAL(status, NT_STATUS_NETWORK_SESSION_EXPIRED)) {
                switch (in_opcode) {
                case SMB2_OP_SESSSETUP:
@@ -2037,16 +2767,8 @@ NTSTATUS smbd_smb2_request_verify_sizes(struct smbd_smb2_request *req,
        switch (opcode) {
        case SMB2_OP_IOCTL:
        case SMB2_OP_GETINFO:
-               min_dyn_size = 0;
-               break;
        case SMB2_OP_WRITE:
-               if (req->smb1req != NULL && req->smb1req->unread_bytes > 0) {
-                       if (req->smb1req->unread_bytes < min_dyn_size) {
-                               return NT_STATUS_INVALID_PARAMETER;
-                       }
-
-                       min_dyn_size = 0;
-               }
+               min_dyn_size = 0;
                break;
        }
 
@@ -2115,6 +2837,18 @@ static void smb2srv_update_crypto_flags(struct smbd_smb2_request *req,
        bool update_session = false;
        bool update_tcon = false;
 
+       if (session->table == NULL) {
+               /*
+                * sessions from smb2srv_session_lookup_global()
+                * have NT_STATUS_BAD_LOGON_SESSION_STATE
+                * and session->table == NULL.
+                *
+                * They only used to give the correct error
+                * status, we should not update any state.
+                */
+               goto out;
+       }
+
        if (req->was_encrypted && req->do_encryption) {
                encrypt_flag = SMBXSRV_PROCESSED_ENCRYPTED_PACKET;
                sign_flag = SMBXSRV_PROCESSED_SIGNED_PACKET;
@@ -2122,9 +2856,6 @@ static void smb2srv_update_crypto_flags(struct smbd_smb2_request *req,
                /* Unencrypted packet, can be signed */
                if (req->do_signing) {
                        sign_flag = SMBXSRV_PROCESSED_SIGNED_PACKET;
-               } else if (opcode == SMB2_OP_CANCEL) {
-                       /* Cancel requests are allowed to skip signing */
-                       sign_flag &= ~SMBXSRV_PROCESSED_UNSIGNED_PACKET;
                }
        }
 
@@ -2140,6 +2871,7 @@ static void smb2srv_update_crypto_flags(struct smbd_smb2_request *req,
                        &tcon->global->signing_flags, sign_flag);
        }
 
+out:
        *update_session_globalp = update_session;
        *update_tcon_globalp = update_tcon;
        return;
@@ -2178,7 +2910,7 @@ static NTSTATUS smbd_smb2_request_dispatch_update_counts(
 
        SMB_ASSERT(!req->request_counters_updated);
 
-       if (xconn->protocol < PROTOCOL_SMB2_22) {
+       if (xconn->protocol < PROTOCOL_SMB3_00) {
                return NT_STATUS_OK;
        }
 
@@ -2318,9 +3050,9 @@ NTSTATUS smbd_smb2_request_dispatch(struct smbd_smb2_request *req)
        flags = IVAL(inhdr, SMB2_HDR_FLAGS);
        opcode = SVAL(inhdr, SMB2_HDR_OPCODE);
        mid = BVAL(inhdr, SMB2_HDR_MESSAGE_ID);
-       DEBUG(10,("smbd_smb2_request_dispatch: opcode[%s] mid = %llu\n",
-               smb2_opcode_name(opcode),
-               (unsigned long long)mid));
+       DBG_DEBUG("opcode[%s] mid = %"PRIu64"\n",
+                 smb2_opcode_name(opcode),
+                 mid);
 
        if (xconn->protocol >= PROTOCOL_SMB2_02) {
                /*
@@ -2358,7 +3090,11 @@ NTSTATUS smbd_smb2_request_dispatch(struct smbd_smb2_request *req)
 
        req->async_internal = false;
        req->do_signing = false;
-       req->do_encryption = false;
+       if (opcode != SMB2_OP_SESSSETUP) {
+               req->do_encryption = encryption_desired;
+       } else {
+               req->do_encryption = false;
+       }
        req->was_encrypted = false;
        if (intf_v->iov_len == SMB2_TF_HDR_SIZE) {
                const uint8_t *intf = SMBD_SMB2_IN_TF_PTR(req);
@@ -2382,9 +3118,11 @@ NTSTATUS smbd_smb2_request_dispatch(struct smbd_smb2_request *req)
                }
 
                req->was_encrypted = true;
+               req->do_encryption = true;
        }
 
        if (encryption_required && !req->was_encrypted) {
+               req->do_encryption = true;
                return smbd_smb2_request_error(req,
                                NT_STATUS_ACCESS_DENIED);
        }
@@ -2408,7 +3146,7 @@ NTSTATUS smbd_smb2_request_dispatch(struct smbd_smb2_request *req)
        if (opcode == SMB2_OP_CANCEL) {
                allowed_flags |= SMB2_HDR_FLAG_ASYNC;
        }
-       if (xconn->protocol >= PROTOCOL_SMB2_22) {
+       if (xconn->protocol >= PROTOCOL_SMB3_00) {
                allowed_flags |= SMB2_HDR_FLAG_REPLAY_OPERATION;
        }
        if ((flags & ~allowed_flags) != 0) {
@@ -2430,7 +3168,8 @@ NTSTATUS smbd_smb2_request_dispatch(struct smbd_smb2_request *req)
        if (req->was_encrypted) {
                signing_required = false;
        } else if (signing_required || (flags & SMB2_HDR_FLAG_SIGNED)) {
-               DATA_BLOB signing_key = data_blob_null;
+               struct smb2_signing_key *signing_key = NULL;
+               bool has_channel = false;
 
                if (x == NULL) {
                        /*
@@ -2452,20 +3191,46 @@ NTSTATUS smbd_smb2_request_dispatch(struct smbd_smb2_request *req)
                        return smbd_smb2_request_error(req, status);
                }
 
-               signing_key = smbd_smb2_signing_key(x, xconn);
+               signing_key = smbd_smb2_signing_key(x, xconn, &has_channel);
 
                /*
                 * If we have a signing key, we should
                 * sign the response
                 */
-               if (signing_key.length > 0) {
+               if (smb2_signing_key_valid(signing_key) && opcode != SMB2_OP_CANCEL) {
                        req->do_signing = true;
                }
 
                status = smb2_signing_check_pdu(signing_key,
-                                               xconn->protocol,
                                                SMBD_SMB2_IN_HDR_IOV(req),
                                                SMBD_SMB2_NUM_IOV_PER_REQ - 1);
+               if (NT_STATUS_EQUAL(status, NT_STATUS_ACCESS_DENIED) &&
+                   opcode == SMB2_OP_SESSSETUP && !has_channel &&
+                   NT_STATUS_IS_OK(session_status))
+               {
+                       if (!NT_STATUS_EQUAL(x->status, NT_STATUS_BAD_LOGON_SESSION_STATE)) {
+                               struct smbXsrv_session *session = NULL;
+                               NTSTATUS error;
+
+                               error = smb2srv_session_lookup_global(req->xconn->client,
+                                                                     x->global->session_wire_id,
+                                                                     req,
+                                                                     &session);
+                               if (!NT_STATUS_IS_OK(error)) {
+                                       return smbd_smb2_request_error(req, error);
+                               }
+
+                               /*
+                                * We fallback to a session of
+                                * another process in order to
+                                * get the signing correct.
+                                *
+                                * We don't set req->last_session_id here.
+                                */
+                               req->session = x = session;
+                       }
+                       goto skipped_signing;
+               }
                if (!NT_STATUS_IS_OK(status)) {
                        return smbd_smb2_request_error(req, status);
                }
@@ -2474,22 +3239,61 @@ NTSTATUS smbd_smb2_request_dispatch(struct smbd_smb2_request *req)
                 * Now that we know the request was correctly signed
                 * we have to sign the response too.
                 */
-               req->do_signing = true;
+               if (opcode != SMB2_OP_CANCEL) {
+                       req->do_signing = true;
+               }
 
                if (!NT_STATUS_IS_OK(session_status)) {
                        return smbd_smb2_request_error(req, session_status);
                }
-       } else if (opcode == SMB2_OP_CANCEL) {
-               /* Cancel requests are allowed to skip the signing */
-       } else if (signing_required) {
+       }
+
+       if (opcode == SMB2_OP_IOCTL) {
+               /*
+                * Some special IOCTL calls don't require
+                * file, tcon nor session.
+                *
+                * They typically don't do any real action
+                * on behalf of the client.
+                *
+                * They are mainly used to alter the behavior
+                * of the connection for testing. So we can
+                * run as root and skip all file, tcon and session
+                * checks below.
+                */
+               static const struct smbd_smb2_dispatch_table _root_ioctl_call = {
+                       .opcode = SMB2_OP_IOCTL,
+                       .as_root = true,
+               };
+               const uint8_t *body = SMBD_SMB2_IN_BODY_PTR(req);
+               size_t body_size = SMBD_SMB2_IN_BODY_LEN(req);
+               uint32_t in_ctl_code;
+               size_t needed = 8;
+
+               if (needed > body_size) {
+                       return smbd_smb2_request_error(req,
+                                       NT_STATUS_INVALID_PARAMETER);
+               }
+
+               in_ctl_code = IVAL(body, 0x04);
                /*
-                * If signing is required we try to sign
-                * a possible error response
+                * Only add trusted IOCTL codes here!
                 */
-               req->do_signing = true;
-               return smbd_smb2_request_error(req, NT_STATUS_ACCESS_DENIED);
+               switch (in_ctl_code) {
+               case FSCTL_SMBTORTURE_FORCE_UNACKED_TIMEOUT:
+                       call = &_root_ioctl_call;
+                       break;
+               case FSCTL_VALIDATE_NEGOTIATE_INFO:
+                       call = &_root_ioctl_call;
+                       break;
+               case FSCTL_QUERY_NETWORK_INTERFACE_INFO:
+                       call = &_root_ioctl_call;
+                       break;
+               }
        }
 
+skipped_signing:
+
        if (flags & SMB2_HDR_FLAG_CHAINED) {
                req->compound_related = true;
        }
@@ -2522,8 +3326,11 @@ NTSTATUS smbd_smb2_request_dispatch(struct smbd_smb2_request *req)
                        encryption_required = true;
                }
                if (encryption_required && !req->was_encrypted) {
+                       req->do_encryption = true;
                        return smbd_smb2_request_error(req,
                                NT_STATUS_ACCESS_DENIED);
+               } else if (encryption_desired) {
+                       req->do_encryption = true;
                }
        } else if (call->need_session) {
                struct auth_session_info *session_info = NULL;
@@ -2543,10 +3350,6 @@ NTSTATUS smbd_smb2_request_dispatch(struct smbd_smb2_request *req)
                                      session_info->info->domain_name);
        }
 
-       if (req->was_encrypted || encryption_desired) {
-               req->do_encryption = true;
-       }
-
        if (req->session) {
                bool update_session_global = false;
                bool update_tcon_global = false;
@@ -2589,18 +3392,25 @@ NTSTATUS smbd_smb2_request_dispatch(struct smbd_smb2_request *req)
 
                fsp = file_fsp_smb2(req, file_id_persistent, file_id_volatile);
                if (fsp == NULL) {
-                       if (!call->allow_invalid_fileid) {
+                       if (req->compound_related &&
+                           !NT_STATUS_IS_OK(req->compound_create_err))
+                       {
                                return smbd_smb2_request_error(req,
-                                               NT_STATUS_FILE_CLOSED);
+                                               req->compound_create_err);
                        }
-
-                       if (file_id_persistent != UINT64_MAX) {
+                       /*
+                        * smbd_smb2_request_process_ioctl()
+                        * has more checks in order to return more
+                        * detailed error codes...
+                        */
+                       if (opcode != SMB2_OP_IOCTL) {
                                return smbd_smb2_request_error(req,
                                                NT_STATUS_FILE_CLOSED);
                        }
-                       if (file_id_volatile != UINT64_MAX) {
+               } else {
+                       if (fsp->fsp_flags.encryption_required && !req->was_encrypted) {
                                return smbd_smb2_request_error(req,
-                                               NT_STATUS_FILE_CLOSED);
+                                               NT_STATUS_ACCESS_DENIED);
                        }
                }
        }
@@ -2614,7 +3424,7 @@ NTSTATUS smbd_smb2_request_dispatch(struct smbd_smb2_request *req)
                SMB_ASSERT(call->fileid_ofs == 0);
                /* This call needs to be run as root */
                change_to_root_user();
-       } else {
+       } else if (opcode != SMB2_OP_KEEPALIVE) {
                SMB_ASSERT(call->need_tcon);
        }
 
@@ -2771,7 +3581,7 @@ static void smbd_smb2_request_reply_update_counts(struct smbd_smb2_request *req)
 
        req->request_counters_updated = false;
 
-       if (xconn->protocol < PROTOCOL_SMB2_22) {
+       if (xconn->protocol < PROTOCOL_SMB3_00) {
                return;
        }
 
@@ -2815,11 +3625,12 @@ static NTSTATUS smbd_smb2_request_reply(struct smbd_smb2_request *req)
 
        if (req->do_encryption &&
            (firsttf->iov_len == 0) &&
-           (req->first_key.length == 0) &&
+           (!smb2_signing_key_valid(req->first_enc_key)) &&
            (req->session != NULL) &&
-           (req->session->global->encryption_key.length != 0))
+           smb2_signing_key_valid(req->session->global->encryption_key))
        {
-               DATA_BLOB encryption_key = req->session->global->encryption_key;
+               struct smb2_signing_key *encryption_key =
+                       req->session->global->encryption_key;
                uint8_t *tf;
                uint64_t session_id = req->session->global->session_wire_id;
                uint64_t nonce_high;
@@ -2843,9 +3654,11 @@ static NTSTATUS smbd_smb2_request_reply(struct smbd_smb2_request *req)
                 * we are sure that we do not change
                 * the header again.
                 */
-               req->first_key = data_blob_dup_talloc(req, encryption_key);
-               if (req->first_key.data == NULL) {
-                       return NT_STATUS_NO_MEMORY;
+               status = smb2_signing_key_copy(req,
+                                              encryption_key,
+                                              &req->first_enc_key);
+               if (!NT_STATUS_IS_OK(status)) {
+                       return status;
                }
 
                tf = talloc_zero_array(req, uint8_t,
@@ -2864,7 +3677,7 @@ static NTSTATUS smbd_smb2_request_reply(struct smbd_smb2_request *req)
        }
 
        if ((req->current_idx > SMBD_SMB2_NUM_IOV_PER_REQ) &&
-           (req->last_key.length > 0) &&
+           (smb2_signing_key_valid(req->last_sign_key)) &&
            (firsttf->iov_len == 0))
        {
                int last_idx = req->current_idx - SMBD_SMB2_NUM_IOV_PER_REQ;
@@ -2875,17 +3688,14 @@ static NTSTATUS smbd_smb2_request_reply(struct smbd_smb2_request *req)
                 * compound chain will not change, we can to sign here
                 * with the last signing key we remembered.
                 */
-               status = smb2_signing_sign_pdu(req->last_key,
-                                              xconn->protocol,
+               status = smb2_signing_sign_pdu(req->last_sign_key,
                                               lasthdr,
                                               SMBD_SMB2_NUM_IOV_PER_REQ - 1);
                if (!NT_STATUS_IS_OK(status)) {
                        return status;
                }
        }
-       if (req->last_key.length > 0) {
-               data_blob_clear_free(&req->last_key);
-       }
+       TALLOC_FREE(req->last_sign_key);
 
        SMBPROFILE_IOBYTES_ASYNC_END(req->profile,
                iov_buflen(outhdr, SMBD_SMB2_NUM_IOV_PER_REQ-1));
@@ -2908,7 +3718,8 @@ static NTSTATUS smbd_smb2_request_reply(struct smbd_smb2_request *req)
 
                if (req->do_signing && firsttf->iov_len == 0) {
                        struct smbXsrv_session *x = req->session;
-                       DATA_BLOB signing_key = smbd_smb2_signing_key(x, xconn);
+                       struct smb2_signing_key *signing_key =
+                               smbd_smb2_signing_key(x, xconn, NULL);
 
                        /*
                         * we need to remember the signing key
@@ -2916,14 +3727,21 @@ static NTSTATUS smbd_smb2_request_reply(struct smbd_smb2_request *req)
                         * we are sure that we do not change
                         * the header again.
                         */
-                       req->last_key = data_blob_dup_talloc(req, signing_key);
-                       if (req->last_key.data == NULL) {
-                               return NT_STATUS_NO_MEMORY;
+                       status = smb2_signing_key_copy(req,
+                                                      signing_key,
+                                                      &req->last_sign_key);
+                       if (!NT_STATUS_IS_OK(status)) {
+                               return status;
                        }
                }
 
+               /*
+                * smbd_smb2_request_dispatch() will redo the impersonation.
+                * So we use req->xconn->client->raw_ev_ctx instead
+                * of req->ev_ctx here.
+                */
                tevent_schedule_immediate(im,
-                                       req->sconn->ev_ctx,
+                                       req->xconn->client->raw_ev_ctx,
                                        smbd_smb2_request_dispatch_immediate,
                                        req);
                return NT_STATUS_OK;
@@ -2946,8 +3764,7 @@ static NTSTATUS smbd_smb2_request_reply(struct smbd_smb2_request *req)
         * now check if we need to sign the current response
         */
        if (firsttf->iov_len == SMB2_TF_HDR_SIZE) {
-               status = smb2_signing_encrypt_pdu(req->first_key,
-                                       xconn->smb2.server.cipher,
+               status = smb2_signing_encrypt_pdu(req->first_enc_key,
                                        firsttf,
                                        req->out.vector_count - first_idx);
                if (!NT_STATUS_IS_OK(status)) {
@@ -2955,43 +3772,67 @@ static NTSTATUS smbd_smb2_request_reply(struct smbd_smb2_request *req)
                }
        } else if (req->do_signing) {
                struct smbXsrv_session *x = req->session;
-               DATA_BLOB signing_key = smbd_smb2_signing_key(x, xconn);
+               struct smb2_signing_key *signing_key =
+                       smbd_smb2_signing_key(x, xconn, NULL);
 
                status = smb2_signing_sign_pdu(signing_key,
-                                              xconn->protocol,
                                               outhdr,
                                               SMBD_SMB2_NUM_IOV_PER_REQ - 1);
                if (!NT_STATUS_IS_OK(status)) {
                        return status;
                }
        }
-       if (req->first_key.length > 0) {
-               data_blob_clear_free(&req->first_key);
-       }
+       TALLOC_FREE(req->first_enc_key);
 
        if (req->preauth != NULL) {
-               struct hc_sha512state sctx;
-               int i;
+               gnutls_hash_hd_t hash_hnd = NULL;
+               size_t i;
+               int rc;
 
-               samba_SHA512_Init(&sctx);
-               samba_SHA512_Update(&sctx, req->preauth->sha512_value,
-                                   sizeof(req->preauth->sha512_value));
+               rc = gnutls_hash_init(&hash_hnd, GNUTLS_DIG_SHA512);
+               if (rc < 0) {
+                       return gnutls_error_to_ntstatus(rc, NT_STATUS_HASH_NOT_SUPPORTED);
+               }
+               rc = gnutls_hash(hash_hnd,
+                           req->preauth->sha512_value,
+                           sizeof(req->preauth->sha512_value));
+               if (rc < 0) {
+                       gnutls_hash_deinit(hash_hnd, NULL);
+                       return gnutls_error_to_ntstatus(rc, NT_STATUS_HASH_NOT_SUPPORTED);
+               }
                for (i = 1; i < req->in.vector_count; i++) {
-                       samba_SHA512_Update(&sctx,
-                                           req->in.vector[i].iov_base,
-                                           req->in.vector[i].iov_len);
+                       rc = gnutls_hash(hash_hnd,
+                                        req->in.vector[i].iov_base,
+                                        req->in.vector[i].iov_len);
+                       if (rc < 0) {
+                               gnutls_hash_deinit(hash_hnd, NULL);
+                               return gnutls_error_to_ntstatus(rc, NT_STATUS_HASH_NOT_SUPPORTED);
+                       }
+               }
+               if (rc < 0) {
+                       gnutls_hash_deinit(hash_hnd, NULL);
+                       return gnutls_error_to_ntstatus(rc, NT_STATUS_HASH_NOT_SUPPORTED);
+               }
+               gnutls_hash_output(hash_hnd, req->preauth->sha512_value);
+
+               rc = gnutls_hash(hash_hnd,
+                                req->preauth->sha512_value,
+                                sizeof(req->preauth->sha512_value));
+               if (rc < 0) {
+                       gnutls_hash_deinit(hash_hnd, NULL);
+                       return gnutls_error_to_ntstatus(rc, NT_STATUS_HASH_NOT_SUPPORTED);
                }
-               samba_SHA512_Final(req->preauth->sha512_value, &sctx);
-
-               samba_SHA512_Init(&sctx);
-               samba_SHA512_Update(&sctx, req->preauth->sha512_value,
-                                   sizeof(req->preauth->sha512_value));
                for (i = 1; i < req->out.vector_count; i++) {
-                       samba_SHA512_Update(&sctx,
-                                           req->out.vector[i].iov_base,
-                                           req->out.vector[i].iov_len);
+                       rc = gnutls_hash(hash_hnd,
+                                        req->out.vector[i].iov_base,
+                                        req->out.vector[i].iov_len);
+                       if (rc < 0) {
+                               gnutls_hash_deinit(hash_hnd, NULL);
+                               return gnutls_error_to_ntstatus(rc, NT_STATUS_HASH_NOT_SUPPORTED);
+                       }
                }
-               samba_SHA512_Final(req->preauth->sha512_value, &sctx);
+
+               gnutls_hash_deinit(hash_hnd, req->preauth->sha512_value);
 
                req->preauth = NULL;
        }
@@ -3065,13 +3906,20 @@ NTSTATUS smbd_smb2_request_done_ex(struct smbd_smb2_request *req,
        struct iovec *outbody_v;
        struct iovec *outdyn_v;
        uint32_t next_command_ofs;
+       uint64_t mid;
 
-       DEBUG(10,("smbd_smb2_request_done_ex: "
-                 "idx[%d] status[%s] body[%u] dyn[%s:%u] at %s\n",
-                 req->current_idx, nt_errstr(status), (unsigned int)body.length,
-                 dyn ? "yes": "no",
+       outhdr = SMBD_SMB2_OUT_HDR_PTR(req);
+       mid = BVAL(outhdr, SMB2_HDR_MESSAGE_ID);
+
+       DBG_DEBUG("mid [%"PRIu64"] idx[%d] status[%s] "
+                 "body[%u] dyn[%s:%u] at %s\n",
+                 mid,
+                 req->current_idx,
+                 nt_errstr(status),
+                 (unsigned int)body.length,
+                 dyn ? "yes" : "no",
                  (unsigned int)(dyn ? dyn->length : 0),
-                 location));
+                 location);
 
        if (body.length < 2) {
                return smbd_smb2_request_error(req, NT_STATUS_INTERNAL_ERROR);
@@ -3081,7 +3929,6 @@ NTSTATUS smbd_smb2_request_done_ex(struct smbd_smb2_request *req,
                return smbd_smb2_request_error(req, NT_STATUS_INTERNAL_ERROR);
        }
 
-       outhdr = SMBD_SMB2_OUT_HDR_PTR(req);
        outbody_v = SMBD_SMB2_OUT_BODY_IOV(req);
        outdyn_v = SMBD_SMB2_OUT_DYN_IOV(req);
 
@@ -3169,6 +4016,7 @@ NTSTATUS smbd_smb2_request_done_ex(struct smbd_smb2_request *req,
 
 NTSTATUS smbd_smb2_request_error_ex(struct smbd_smb2_request *req,
                                    NTSTATUS status,
+                                   uint8_t error_context_count,
                                    DATA_BLOB *info,
                                    const char *location)
 {
@@ -3208,6 +4056,7 @@ NTSTATUS smbd_smb2_request_error_ex(struct smbd_smb2_request *req,
        body.data = outhdr + SMB2_HDR_BODY;
        body.length = 8;
        SSVAL(body.data, 0, 9);
+       SCVAL(body.data, 2, error_context_count);
 
        if (info) {
                SIVAL(body.data, 0x04, info->length);
@@ -3231,61 +4080,34 @@ NTSTATUS smbd_smb2_request_error_ex(struct smbd_smb2_request *req,
        return smbd_smb2_request_done_ex(req, status, body, info, __location__);
 }
 
-
-struct smbd_smb2_send_break_state {
+struct smbd_smb2_break_state {
+       struct tevent_req *req;
        struct smbd_smb2_send_queue queue_entry;
        uint8_t nbt_hdr[NBT_HDR_SIZE];
-       uint8_t tf[SMB2_TF_HDR_SIZE];
        uint8_t hdr[SMB2_HDR_BODY];
        struct iovec vector[1+SMBD_SMB2_NUM_IOV_PER_REQ];
-       uint8_t body[1];
 };
 
-static NTSTATUS smbd_smb2_send_break(struct smbXsrv_connection *xconn,
-                                    struct smbXsrv_session *session,
-                                    struct smbXsrv_tcon *tcon,
-                                    const uint8_t *body,
-                                    size_t body_len)
-{
-       struct smbd_smb2_send_break_state *state;
-       bool do_encryption = false;
-       uint64_t session_wire_id = 0;
-       uint64_t nonce_high = 0;
-       uint64_t nonce_low = 0;
+static struct tevent_req *smbd_smb2_break_send(TALLOC_CTX *mem_ctx,
+                                              struct tevent_context *ev,
+                                              struct smbXsrv_connection *xconn,
+                                              uint64_t session_id,
+                                              const uint8_t *body,
+                                              size_t body_len)
+{
+       struct tevent_req *req = NULL;
+       struct smbd_smb2_break_state *state = NULL;
        NTSTATUS status;
-       size_t statelen;
        bool ok;
 
-       if (session != NULL) {
-               session_wire_id = session->global->session_wire_id;
-               do_encryption = session->global->encryption_flags & SMBXSRV_ENCRYPTION_DESIRED;
-               if (tcon->global->encryption_flags & SMBXSRV_ENCRYPTION_DESIRED) {
-                       do_encryption = true;
-               }
-       }
-
-       statelen = offsetof(struct smbd_smb2_send_break_state, body) +
-               body_len;
-
-       state = talloc_zero_size(xconn, statelen);
-       if (state == NULL) {
-               return NT_STATUS_NO_MEMORY;
-       }
-       talloc_set_name_const(state, "struct smbd_smb2_send_break_state");
-
-       if (do_encryption) {
-               status = smb2_get_new_nonce(session,
-                                           &nonce_high,
-                                           &nonce_low);
-               if (!NT_STATUS_IS_OK(status)) {
-                       return status;
-               }
+       req = tevent_req_create(mem_ctx, &state,
+                               struct smbd_smb2_break_state);
+       if (req == NULL) {
+               return NULL;
        }
 
-       SIVAL(state->tf, SMB2_TF_PROTOCOL_ID, SMB2_TF_MAGIC);
-       SBVAL(state->tf, SMB2_TF_NONCE+0, nonce_low);
-       SBVAL(state->tf, SMB2_TF_NONCE+8, nonce_high);
-       SBVAL(state->tf, SMB2_TF_SESSION_ID, session_wire_id);
+       state->req = req;
+       tevent_req_defer_callback(req, ev);
 
        SIVAL(state->hdr, 0,                            SMB2_MAGIC);
        SSVAL(state->hdr, SMB2_HDR_LENGTH,              SMB2_HDR_BODY);
@@ -3296,9 +4118,9 @@ static NTSTATUS smbd_smb2_send_break(struct smbXsrv_connection *xconn,
        SIVAL(state->hdr, SMB2_HDR_FLAGS,               SMB2_HDR_FLAG_REDIRECT);
        SIVAL(state->hdr, SMB2_HDR_NEXT_COMMAND,        0);
        SBVAL(state->hdr, SMB2_HDR_MESSAGE_ID,          UINT64_MAX);
-       SIVAL(state->hdr, SMB2_HDR_PID,         0);
-       SIVAL(state->hdr, SMB2_HDR_TID,         0);
-       SBVAL(state->hdr, SMB2_HDR_SESSION_ID,          0);
+       SIVAL(state->hdr, SMB2_HDR_PID,                 0);
+       SIVAL(state->hdr, SMB2_HDR_TID,                 0);
+       SBVAL(state->hdr, SMB2_HDR_SESSION_ID,          session_id);
        memset(state->hdr+SMB2_HDR_SIGNATURE, 0, 16);
 
        state->vector[0] = (struct iovec) {
@@ -3306,94 +4128,328 @@ static NTSTATUS smbd_smb2_send_break(struct smbXsrv_connection *xconn,
                .iov_len  = sizeof(state->nbt_hdr)
        };
 
-       if (do_encryption) {
-               state->vector[1+SMBD_SMB2_TF_IOV_OFS] = (struct iovec) {
-                       .iov_base = state->tf,
-                       .iov_len  = sizeof(state->tf)
-               };
-       } else {
-               state->vector[1+SMBD_SMB2_TF_IOV_OFS] = (struct iovec) {
-                       .iov_base = NULL,
-                       .iov_len  = 0
-               };
-       }
+       state->vector[1+SMBD_SMB2_TF_IOV_OFS] = (struct iovec) {
+               .iov_base = NULL,
+               .iov_len  = 0
+       };
 
        state->vector[1+SMBD_SMB2_HDR_IOV_OFS] = (struct iovec) {
                .iov_base = state->hdr,
                .iov_len  = sizeof(state->hdr)
        };
 
-       memcpy(state->body, body, body_len);
-
        state->vector[1+SMBD_SMB2_BODY_IOV_OFS] = (struct iovec) {
-               .iov_base = state->body,
-               .iov_len  = body_len /* no sizeof(state->body) .. :-) */
+               .iov_base = discard_const_p(uint8_t, body),
+               .iov_len  = body_len,
        };
 
-       /*
-        * state->vector[1+SMBD_SMB2_DYN_IOV_OFS] is NULL by talloc_zero above
-        */
+       /*
+        * state->vector[1+SMBD_SMB2_DYN_IOV_OFS] is NULL by talloc_zero above
+        */
+
+       ok = smb2_setup_nbt_length(state->vector,
+                                  1 + SMBD_SMB2_NUM_IOV_PER_REQ);
+       if (!ok) {
+               tevent_req_nterror(req, NT_STATUS_INVALID_PARAMETER_MIX);
+               return tevent_req_post(req, ev);
+       }
+
+       /*
+        * We require TCP acks for this PDU to the client!
+        * We want 5 retransmissions and timeout when the
+        * retransmission timeout (rto) passed 6 times.
+        *
+        * required_acked_bytes gets a dummy value of
+        * UINT64_MAX, as long it's in xconn->smb2.send_queue,
+        * it'll get the real value when it's moved to
+        * xconn->ack.queue.
+        *
+        * state->queue_entry.ack.req gets completed with
+        * 1.  tevent_req_done(), when all bytes are acked.
+        * 2a. tevent_req_nterror(NT_STATUS_IO_TIMEOUT), when
+        *     the timeout expired before all bytes were acked.
+        * 2b. tevent_req_nterror(transport_error), when the
+        *     connection got a disconnect from the kernel.
+        */
+       state->queue_entry.ack.timeout =
+               timeval_current_ofs_usec(xconn->ack.rto_usecs * 6);
+       state->queue_entry.ack.required_acked_bytes = UINT64_MAX;
+       state->queue_entry.ack.req = req;
+       state->queue_entry.mem_ctx = state;
+       state->queue_entry.vector = state->vector;
+       state->queue_entry.count = ARRAY_SIZE(state->vector);
+       DLIST_ADD_END(xconn->smb2.send_queue, &state->queue_entry);
+       xconn->smb2.send_queue_len++;
+
+       status = smbd_smb2_flush_send_queue(xconn);
+       if (tevent_req_nterror(req, status)) {
+               return tevent_req_post(req, ev);
+       }
+
+       return req;
+}
+
+static NTSTATUS smbd_smb2_break_recv(struct tevent_req *req)
+{
+       return tevent_req_simple_recv_ntstatus(req);
+}
+
+struct smbXsrv_pending_break {
+       struct smbXsrv_pending_break *prev, *next;
+       struct smbXsrv_client *client;
+       bool disable_oplock_break_retries;
+       uint64_t session_id;
+       uint64_t last_channel_id;
+       union {
+               uint8_t generic[1];
+               uint8_t oplock[0x18];
+               uint8_t lease[0x2c];
+       } body;
+       size_t body_len;
+};
+
+static void smbXsrv_pending_break_done(struct tevent_req *subreq);
+
+static struct smbXsrv_pending_break *smbXsrv_pending_break_create(
+               struct smbXsrv_client *client,
+               uint64_t session_id)
+{
+       struct smbXsrv_pending_break *pb = NULL;
+
+       pb = talloc_zero(client, struct smbXsrv_pending_break);
+       if (pb == NULL) {
+               return NULL;
+       }
+       pb->client = client;
+       pb->session_id = session_id;
+       pb->disable_oplock_break_retries = lp_smb2_disable_oplock_break_retry();
+
+       return pb;
+}
+
+static NTSTATUS smbXsrv_pending_break_submit(struct smbXsrv_pending_break *pb);
+
+static NTSTATUS smbXsrv_pending_break_schedule(struct smbXsrv_pending_break *pb)
+{
+       struct smbXsrv_client *client = pb->client;
+       NTSTATUS status;
+
+       DLIST_ADD_END(client->pending_breaks, pb);
+       status = smbXsrv_client_pending_breaks_updated(client);
+       if (!NT_STATUS_IS_OK(status)) {
+               return status;
+       }
+
+       status = smbXsrv_pending_break_submit(pb);
+       if (!NT_STATUS_IS_OK(status)) {
+               return status;
+       }
+
+       return NT_STATUS_OK;
+}
+
+static NTSTATUS smbXsrv_pending_break_submit(struct smbXsrv_pending_break *pb)
+{
+       struct smbXsrv_client *client = pb->client;
+       struct smbXsrv_session *session = NULL;
+       struct smbXsrv_connection *xconn = NULL;
+       struct smbXsrv_connection *oplock_xconn = NULL;
+       struct tevent_req *subreq = NULL;
+       NTSTATUS status;
+
+       if (pb->session_id != 0) {
+               status = get_valid_smbXsrv_session(client,
+                                                  pb->session_id,
+                                                  &session);
+               if (NT_STATUS_EQUAL(status, NT_STATUS_USER_SESSION_DELETED)) {
+                       return NT_STATUS_ABANDONED;
+               }
+               if (!NT_STATUS_IS_OK(status)) {
+                       return status;
+               }
+
+               if (pb->last_channel_id != 0) {
+                       /*
+                        * This is what current Windows servers
+                        * do, they don't retry on all available
+                        * channels. They only use the last channel.
+                        *
+                        * But it doesn't match the specification in
+                        * [MS-SMB2] "3.3.4.6 Object Store Indicates an
+                        * Oplock Break"
+                        *
+                        * Per default disable_oplock_break_retries is false
+                        * and we behave like the specification.
+                        */
+                       if (pb->disable_oplock_break_retries) {
+                               return NT_STATUS_ABANDONED;
+                       }
+               }
+       }
+
+       for (xconn = client->connections; xconn != NULL; xconn = xconn->next) {
+               if (!NT_STATUS_IS_OK(xconn->transport.status)) {
+                       continue;
+               }
+
+               if (xconn->channel_id == 0) {
+                       /*
+                        * non-multichannel case
+                        */
+                       break;
+               }
+
+               if (session != NULL) {
+                       struct smbXsrv_channel_global0 *c = NULL;
+
+                       /*
+                        * Having a session means we're handling
+                        * an oplock break and we only need to
+                        * use channels available on the
+                        * session.
+                        */
+                       status = smbXsrv_session_find_channel(session, xconn, &c);
+                       if (!NT_STATUS_IS_OK(status)) {
+                               continue;
+                       }
+
+                       /*
+                        * This is what current Windows servers
+                        * do, they don't retry on all available
+                        * channels. They only use the last channel.
+                        *
+                        * But it doesn't match the specification
+                        * in [MS-SMB2] "3.3.4.6 Object Store Indicates an
+                        * Oplock Break"
+                        *
+                        * Per default disable_oplock_break_retries is false
+                        * and we behave like the specification.
+                        */
+                       if (pb->disable_oplock_break_retries) {
+                               oplock_xconn = xconn;
+                               continue;
+                       }
+               }
+
+               if (xconn->channel_id > pb->last_channel_id) {
+                       /*
+                        * multichannel case
+                        */
+                       break;
+               }
+       }
+
+       if (xconn == NULL) {
+               xconn = oplock_xconn;
+       }
+
+       if (xconn == NULL) {
+               /*
+                * If there's no remaining connection available
+                * tell the caller to stop...
+                */
+               return NT_STATUS_ABANDONED;
+       }
+
+       pb->last_channel_id = xconn->channel_id;
 
-       ok = smb2_setup_nbt_length(state->vector,
-                                  1 + SMBD_SMB2_NUM_IOV_PER_REQ);
-       if (!ok) {
-               return NT_STATUS_INVALID_PARAMETER_MIX;
+       subreq = smbd_smb2_break_send(pb,
+                                     client->raw_ev_ctx,
+                                     xconn,
+                                     pb->session_id,
+                                     pb->body.generic,
+                                     pb->body_len);
+       if (subreq == NULL) {
+               return NT_STATUS_NO_MEMORY;
        }
+       tevent_req_set_callback(subreq,
+                               smbXsrv_pending_break_done,
+                               pb);
 
-       if (do_encryption) {
-               DATA_BLOB encryption_key = session->global->encryption_key;
+       return NT_STATUS_OK;
+}
 
-               status = smb2_signing_encrypt_pdu(encryption_key,
-                                       xconn->smb2.server.cipher,
-                                       &state->vector[1+SMBD_SMB2_TF_IOV_OFS],
-                                       SMBD_SMB2_NUM_IOV_PER_REQ);
+static void smbXsrv_pending_break_done(struct tevent_req *subreq)
+{
+       struct smbXsrv_pending_break *pb =
+               tevent_req_callback_data(subreq,
+               struct smbXsrv_pending_break);
+       struct smbXsrv_client *client = pb->client;
+       NTSTATUS status;
+
+       status = smbd_smb2_break_recv(subreq);
+       TALLOC_FREE(subreq);
+       if (!NT_STATUS_IS_OK(status)) {
+               status = smbXsrv_pending_break_submit(pb);
+               if (NT_STATUS_EQUAL(status, NT_STATUS_ABANDONED)) {
+                       /*
+                        * If there's no remaining connection
+                        * there's no need to send a break again.
+                        */
+                       goto remove;
+               }
                if (!NT_STATUS_IS_OK(status)) {
-                       return status;
+                       smbd_server_disconnect_client(client, nt_errstr(status));
+                       return;
                }
+               return;
        }
 
-       state->queue_entry.mem_ctx = state;
-       state->queue_entry.vector = state->vector;
-       state->queue_entry.count = ARRAY_SIZE(state->vector);
-       DLIST_ADD_END(xconn->smb2.send_queue, &state->queue_entry);
-       xconn->smb2.send_queue_len++;
+remove:
+       DLIST_REMOVE(client->pending_breaks, pb);
+       TALLOC_FREE(pb);
 
-       status = smbd_smb2_flush_send_queue(xconn);
+       status = smbXsrv_client_pending_breaks_updated(client);
        if (!NT_STATUS_IS_OK(status)) {
-               return status;
+               smbd_server_disconnect_client(client, nt_errstr(status));
+               return;
        }
-
-       return NT_STATUS_OK;
 }
 
-NTSTATUS smbd_smb2_send_oplock_break(struct smbXsrv_connection *xconn,
-                                    struct smbXsrv_session *session,
-                                    struct smbXsrv_tcon *tcon,
+NTSTATUS smbd_smb2_send_oplock_break(struct smbXsrv_client *client,
                                     struct smbXsrv_open *op,
                                     uint8_t oplock_level)
 {
-       uint8_t body[0x18];
+       struct smbXsrv_pending_break *pb = NULL;
+       uint8_t *body = NULL;
+
+       pb = smbXsrv_pending_break_create(client,
+                                         op->compat->vuid);
+       if (pb == NULL) {
+               return NT_STATUS_NO_MEMORY;
+       }
+       pb->body_len = sizeof(pb->body.oplock);
+       body = pb->body.oplock;
 
-       SSVAL(body, 0x00, sizeof(body));
+       SSVAL(body, 0x00, pb->body_len);
        SCVAL(body, 0x02, oplock_level);
        SCVAL(body, 0x03, 0);           /* reserved */
        SIVAL(body, 0x04, 0);           /* reserved */
        SBVAL(body, 0x08, op->global->open_persistent_id);
        SBVAL(body, 0x10, op->global->open_volatile_id);
 
-       return smbd_smb2_send_break(xconn, NULL, NULL, body, sizeof(body));
+       return smbXsrv_pending_break_schedule(pb);
 }
 
-NTSTATUS smbd_smb2_send_lease_break(struct smbXsrv_connection *xconn,
+NTSTATUS smbd_smb2_send_lease_break(struct smbXsrv_client *client,
                                    uint16_t new_epoch,
                                    uint32_t lease_flags,
                                    struct smb2_lease_key *lease_key,
                                    uint32_t current_lease_state,
                                    uint32_t new_lease_state)
 {
-       uint8_t body[0x2c];
+       struct smbXsrv_pending_break *pb = NULL;
+       uint8_t *body = NULL;
+
+       pb = smbXsrv_pending_break_create(client,
+                                         0); /* no session_id */
+       if (pb == NULL) {
+               return NT_STATUS_NO_MEMORY;
+       }
+       pb->body_len = sizeof(pb->body.lease);
+       body = pb->body.lease;
 
-       SSVAL(body, 0x00, sizeof(body));
+       SSVAL(body, 0x00, pb->body_len);
        SSVAL(body, 0x02, new_epoch);
        SIVAL(body, 0x04, lease_flags);
        SBVAL(body, 0x08, lease_key->data[0]);
@@ -3404,7 +4460,7 @@ NTSTATUS smbd_smb2_send_lease_break(struct smbXsrv_connection *xconn,
        SIVAL(body, 0x24, 0);           /* AccessMaskHint, MUST be 0 */
        SIVAL(body, 0x28, 0);           /* ShareMaskHint, MUST be 0 */
 
-       return smbd_smb2_send_break(xconn, NULL, NULL, body, sizeof(body));
+       return smbXsrv_pending_break_schedule(pb);
 }
 
 static bool is_smb2_recvfile_write(struct smbd_smb2_request_read_state *state)
@@ -3481,6 +4537,9 @@ static bool is_smb2_recvfile_write(struct smbd_smb2_request_read_state *state)
        if (IS_PRINT(fsp->conn)) {
                return false;
        }
+       if (fsp_is_alternate_stream(fsp)) {
+               return false;
+       }
 
        DEBUG(10,("Doing recvfile write len = %u\n",
                (unsigned int)(state->pktfull - state->pktlen)));
@@ -3490,8 +4549,8 @@ static bool is_smb2_recvfile_write(struct smbd_smb2_request_read_state *state)
 
 static NTSTATUS smbd_smb2_request_next_incoming(struct smbXsrv_connection *xconn)
 {
-       struct smbd_server_connection *sconn = xconn->client->sconn;
        struct smbd_smb2_request_read_state *state = &xconn->smb2.request_read_state;
+       struct smbd_smb2_request *req = NULL;
        size_t max_send_queue_len;
        size_t cur_send_queue_len;
 
@@ -3523,21 +4582,29 @@ static NTSTATUS smbd_smb2_request_next_incoming(struct smbXsrv_connection *xconn
        }
 
        /* ask for the next request */
-       ZERO_STRUCTP(state);
-       state->req = smbd_smb2_request_allocate(xconn);
-       if (state->req == NULL) {
+       req = smbd_smb2_request_allocate(xconn);
+       if (req == NULL) {
                return NT_STATUS_NO_MEMORY;
        }
-       state->req->sconn = sconn;
-       state->req->xconn = xconn;
-       state->min_recv_size = lp_min_receive_file_size();
+       *state = (struct smbd_smb2_request_read_state) {
+               .req = req,
+               .min_recv_size = lp_min_receive_file_size(),
+               ._vector = {
+                       [0] = (struct iovec) {
+                               .iov_base = (void *)state->hdr.nbt,
+                               .iov_len = NBT_HDR_SIZE,
+                       },
+               },
+               .vector = state->_vector,
+               .count = 1,
+       };
 
        TEVENT_FD_READABLE(xconn->transport.fde);
 
        return NT_STATUS_OK;
 }
 
-void smbd_smb2_process_negprot(struct smbXsrv_connection *xconn,
+NTSTATUS smbd_smb2_process_negprot(struct smbXsrv_connection *xconn,
                               uint64_t expected_seq_low,
                               const uint8_t *inpdu, size_t size)
 {
@@ -3551,25 +4618,42 @@ void smbd_smb2_process_negprot(struct smbXsrv_connection *xconn,
        status = smbd_initialize_smb2(xconn, expected_seq_low);
        if (!NT_STATUS_IS_OK(status)) {
                smbd_server_connection_terminate(xconn, nt_errstr(status));
-               return;
+               return status;
+       }
+
+       /*
+        * If a new connection joins the process, when we're
+        * already in a "pending break cycle", we need to
+        * turn on the ack checker on the new connection.
+        */
+       status = smbXsrv_client_pending_breaks_updated(xconn->client);
+       if (!NT_STATUS_IS_OK(status)) {
+               /*
+                * If there's a problem, we disconnect the whole
+                * client with all connections here!
+                *
+                * Instead of just the new connection.
+                */
+               smbd_server_disconnect_client(xconn->client, nt_errstr(status));
+               return status;
        }
 
        status = smbd_smb2_request_create(xconn, inpdu, size, &req);
        if (!NT_STATUS_IS_OK(status)) {
                smbd_server_connection_terminate(xconn, nt_errstr(status));
-               return;
+               return status;
        }
 
        status = smbd_smb2_request_validate(req);
        if (!NT_STATUS_IS_OK(status)) {
                smbd_server_connection_terminate(xconn, nt_errstr(status));
-               return;
+               return status;
        }
 
        status = smbd_smb2_request_setup_out(req);
        if (!NT_STATUS_IS_OK(status)) {
                smbd_server_connection_terminate(xconn, nt_errstr(status));
-               return;
+               return status;
        }
 
 #ifdef WITH_PROFILE
@@ -3584,16 +4668,17 @@ void smbd_smb2_process_negprot(struct smbXsrv_connection *xconn,
        status = smbd_smb2_request_dispatch(req);
        if (!NT_STATUS_IS_OK(status)) {
                smbd_server_connection_terminate(xconn, nt_errstr(status));
-               return;
+               return status;
        }
 
        status = smbd_smb2_request_next_incoming(xconn);
        if (!NT_STATUS_IS_OK(status)) {
                smbd_server_connection_terminate(xconn, nt_errstr(status));
-               return;
+               return status;
        }
 
        sconn->num_requests++;
+       return NT_STATUS_OK;
 }
 
 static int socket_error_from_errno(int ret,
@@ -3647,7 +4732,40 @@ static int socket_error_from_errno(int ret,
        return sys_errno;
 }
 
-static NTSTATUS smbd_smb2_flush_send_queue(struct smbXsrv_connection *xconn)
+static NTSTATUS smbd_smb2_advance_send_queue(struct smbXsrv_connection *xconn,
+                                            struct smbd_smb2_send_queue **_e,
+                                            size_t n)
+{
+       struct smbd_smb2_send_queue *e = *_e;
+       bool ok;
+
+       xconn->ack.unacked_bytes += n;
+
+       ok = iov_advance(&e->vector, &e->count, n);
+       if (!ok) {
+               return NT_STATUS_INTERNAL_ERROR;
+       }
+
+       if (e->count > 0) {
+               return NT_STATUS_RETRY;
+       }
+
+       xconn->smb2.send_queue_len--;
+       DLIST_REMOVE(xconn->smb2.send_queue, e);
+
+       if (e->ack.req == NULL) {
+               *_e = NULL;
+               talloc_free(e->mem_ctx);
+               return NT_STATUS_OK;
+       }
+
+       e->ack.required_acked_bytes = xconn->ack.unacked_bytes;
+       DLIST_ADD_END(xconn->ack.queue, e);
+
+       return NT_STATUS_OK;
+}
+
+static NTSTATUS smbd_smb2_flush_with_sendmsg(struct smbXsrv_connection *xconn)
 {
        int ret;
        int err;
@@ -3661,7 +4779,19 @@ static NTSTATUS smbd_smb2_flush_send_queue(struct smbXsrv_connection *xconn)
 
        while (xconn->smb2.send_queue != NULL) {
                struct smbd_smb2_send_queue *e = xconn->smb2.send_queue;
-               bool ok;
+               unsigned sendmsg_flags = 0;
+
+               if (!NT_STATUS_IS_OK(xconn->transport.status)) {
+                       /*
+                        * we're not supposed to do any io
+                        * just flush all pending stuff.
+                        */
+                       xconn->smb2.send_queue_len--;
+                       DLIST_REMOVE(xconn->smb2.send_queue, e);
+
+                       talloc_free(e->mem_ctx);
+                       continue;
+               }
 
                if (e->sendfile_header != NULL) {
                        size_t size = 0;
@@ -3698,6 +4828,9 @@ static NTSTATUS smbd_smb2_flush_send_queue(struct smbXsrv_connection *xconn)
 
                        xconn->smb2.send_queue_len--;
                        DLIST_REMOVE(xconn->smb2.send_queue, e);
+
+                       size += e->sendfile_body_size;
+
                        /*
                         * This triggers the sendfile path via
                         * the destructor.
@@ -3705,12 +4838,27 @@ static NTSTATUS smbd_smb2_flush_send_queue(struct smbXsrv_connection *xconn)
                        talloc_free(e->mem_ctx);
 
                        if (!NT_STATUS_IS_OK(status)) {
+                               smbXsrv_connection_disconnect_transport(xconn,
+                                                                       status);
                                return status;
                        }
+                       xconn->ack.unacked_bytes += size;
                        continue;
                }
 
-               ret = writev(xconn->transport.sock, e->vector, e->count);
+               e->msg = (struct msghdr) {
+                       .msg_iov = e->vector,
+                       .msg_iovlen = e->count,
+               };
+
+#ifdef MSG_NOSIGNAL
+               sendmsg_flags |= MSG_NOSIGNAL;
+#endif
+#ifdef MSG_DONTWAIT
+               sendmsg_flags |= MSG_DONTWAIT;
+#endif
+
+               ret = sendmsg(xconn->transport.sock, &e->msg, sendmsg_flags);
                if (ret == 0) {
                        /* propagate end of file */
                        return NT_STATUS_INTERNAL_ERROR;
@@ -3722,23 +4870,35 @@ static NTSTATUS smbd_smb2_flush_send_queue(struct smbXsrv_connection *xconn)
                        return NT_STATUS_OK;
                }
                if (err != 0) {
-                       return map_nt_error_from_unix_common(err);
-               }
-
-               ok = iov_advance(&e->vector, &e->count, ret);
-               if (!ok) {
-                       return NT_STATUS_INTERNAL_ERROR;
+                       status = map_nt_error_from_unix_common(err);
+                       smbXsrv_connection_disconnect_transport(xconn,
+                                                               status);
+                       return status;
                }
 
-               if (e->count > 0) {
-                       /* we have more to write */
+               status = smbd_smb2_advance_send_queue(xconn, &e, ret);
+               if (NT_STATUS_EQUAL(status, NT_STATUS_RETRY)) {
+                       /* retry later */
                        TEVENT_FD_WRITEABLE(xconn->transport.fde);
                        return NT_STATUS_OK;
                }
+               if (!NT_STATUS_IS_OK(status)) {
+                       smbXsrv_connection_disconnect_transport(xconn,
+                                                               status);
+                       return status;
+               }
+       }
 
-               xconn->smb2.send_queue_len--;
-               DLIST_REMOVE(xconn->smb2.send_queue, e);
-               talloc_free(e->mem_ctx);
+       return NT_STATUS_MORE_PROCESSING_REQUIRED;
+}
+
+static NTSTATUS smbd_smb2_flush_send_queue(struct smbXsrv_connection *xconn)
+{
+       NTSTATUS status;
+
+       status = smbd_smb2_flush_with_sendmsg(xconn);
+       if (!NT_STATUS_EQUAL(status, NT_STATUS_MORE_PROCESSING_REQUIRED)) {
+               return status;
        }
 
        /*
@@ -3754,80 +4914,36 @@ static NTSTATUS smbd_smb2_flush_send_queue(struct smbXsrv_connection *xconn)
        return NT_STATUS_OK;
 }
 
-static NTSTATUS smbd_smb2_io_handler(struct smbXsrv_connection *xconn,
-                                    uint16_t fde_flags)
+static NTSTATUS smbd_smb2_advance_incoming(struct smbXsrv_connection *xconn, size_t n)
 {
        struct smbd_server_connection *sconn = xconn->client->sconn;
        struct smbd_smb2_request_read_state *state = &xconn->smb2.request_read_state;
        struct smbd_smb2_request *req = NULL;
        size_t min_recvfile_size = UINT32_MAX;
-       int ret;
-       int err;
-       bool retry;
        NTSTATUS status;
        NTTIME now;
+       bool ok;
 
-       if (!NT_STATUS_IS_OK(xconn->transport.status)) {
-               /*
-                * we're not supposed to do any io
-                */
-               TEVENT_FD_NOT_READABLE(xconn->transport.fde);
-               TEVENT_FD_NOT_WRITEABLE(xconn->transport.fde);
-               return NT_STATUS_OK;
-       }
-
-       if (fde_flags & TEVENT_FD_WRITE) {
-               status = smbd_smb2_flush_send_queue(xconn);
-               if (!NT_STATUS_IS_OK(status)) {
-                       return status;
-               }
-       }
-
-       if (!(fde_flags & TEVENT_FD_READ)) {
-               return NT_STATUS_OK;
-       }
-
-       if (state->req == NULL) {
-               TEVENT_FD_NOT_READABLE(xconn->transport.fde);
-               return NT_STATUS_OK;
+       ok = iov_advance(&state->vector, &state->count, n);
+       if (!ok) {
+               return NT_STATUS_INTERNAL_ERROR;
        }
 
-again:
-       if (!state->hdr.done) {
-               state->hdr.done = true;
-
-               state->vector.iov_base = (void *)state->hdr.nbt;
-               state->vector.iov_len = NBT_HDR_SIZE;
+       if (state->count > 0) {
+               return NT_STATUS_PENDING;
        }
 
-       ret = readv(xconn->transport.sock, &state->vector, 1);
-       if (ret == 0) {
-               /* propagate end of file */
-               return NT_STATUS_END_OF_FILE;
-       }
-       err = socket_error_from_errno(ret, errno, &retry);
-       if (retry) {
-               /* retry later */
-               TEVENT_FD_READABLE(xconn->transport.fde);
-               return NT_STATUS_OK;
-       }
-       if (err != 0) {
-               return map_nt_error_from_unix_common(err);
-       }
+       if (state->pktlen > 0) {
+               if (!state->doing_receivefile) {
+                       /*
+                        * we have all the data.
+                        */
+                       goto got_full;
+               }
 
-       if (ret < state->vector.iov_len) {
-               uint8_t *base;
-               base = (uint8_t *)state->vector.iov_base;
-               base += ret;
-               state->vector.iov_base = (void *)base;
-               state->vector.iov_len -= ret;
-               /* we have more to read */
-               TEVENT_FD_READABLE(xconn->transport.fde);
-               return NT_STATUS_OK;
-       }
+               if (!is_smb2_recvfile_write(state)) {
+                       size_t ofs = state->pktlen;
 
-       if (state->pktlen > 0) {
-               if (state->doing_receivefile && !is_smb2_recvfile_write(state)) {
                        /*
                         * Not a possible receivefile write.
                         * Read the rest of the data.
@@ -3842,18 +4958,20 @@ again:
                                return NT_STATUS_NO_MEMORY;
                        }
 
-                       state->vector.iov_base = (void *)(state->pktbuf +
-                               state->pktlen);
-                       state->vector.iov_len = (state->pktfull -
-                               state->pktlen);
+                       state->_vector[0]  = (struct iovec) {
+                               .iov_base = (void *)(state->pktbuf + ofs),
+                               .iov_len = (state->pktfull - ofs),
+                       };
+                       state->vector = state->_vector;
+                       state->count = 1;
 
                        state->pktlen = state->pktfull;
-                       goto again;
+                       return NT_STATUS_RETRY;
                }
 
                /*
-                * Either this is a receivefile write so we've
-                * done a short read, or if not we have all the data.
+                * This is a receivefile write so we've
+                * done a short read.
                 */
                goto got_full;
        }
@@ -3894,10 +5012,14 @@ again:
                return NT_STATUS_NO_MEMORY;
        }
 
-       state->vector.iov_base = (void *)state->pktbuf;
-       state->vector.iov_len = state->pktlen;
+       state->_vector[0] = (struct iovec) {
+               .iov_base = (void *)state->pktbuf,
+               .iov_len = state->pktlen,
+       };
+       state->vector = state->_vector;
+       state->count = 1;
 
-       goto again;
+       return NT_STATUS_RETRY;
 
 got_full:
 
@@ -3906,15 +5028,22 @@ got_full:
                         state->hdr.nbt[0]));
 
                req = state->req;
-               ZERO_STRUCTP(state);
-               state->req = req;
-               state->min_recv_size = lp_min_receive_file_size();
-               req = NULL;
-               goto again;
+               *state = (struct smbd_smb2_request_read_state) {
+                       .req = req,
+                       .min_recv_size = lp_min_receive_file_size(),
+                       ._vector = {
+                               [0] = (struct iovec) {
+                                       .iov_base = (void *)state->hdr.nbt,
+                                       .iov_len = NBT_HDR_SIZE,
+                               },
+                       },
+                       .vector = state->_vector,
+                       .count = 1,
+               };
+               return NT_STATUS_RETRY;
        }
 
        req = state->req;
-       state->req = NULL;
 
        req->request_time = timeval_current();
        now = timeval_to_nttime(&req->request_time);
@@ -3938,7 +5067,9 @@ got_full:
                req->smb1req->unread_bytes = state->pktfull - state->pktlen;
        }
 
-       ZERO_STRUCTP(state);
+       *state = (struct smbd_smb2_request_read_state) {
+               .req = NULL,
+       };
 
        req->current_idx = 1;
 
@@ -3983,6 +5114,96 @@ got_full:
        return NT_STATUS_OK;
 }
 
+static NTSTATUS smbd_smb2_io_handler(struct smbXsrv_connection *xconn,
+                                    uint16_t fde_flags)
+{
+       struct smbd_smb2_request_read_state *state = &xconn->smb2.request_read_state;
+       unsigned recvmsg_flags = 0;
+       int ret;
+       int err;
+       bool retry;
+       NTSTATUS status;
+
+       if (!NT_STATUS_IS_OK(xconn->transport.status)) {
+               /*
+                * we're not supposed to do any io
+                */
+               TEVENT_FD_NOT_READABLE(xconn->transport.fde);
+               TEVENT_FD_NOT_WRITEABLE(xconn->transport.fde);
+               return NT_STATUS_OK;
+       }
+
+       if (fde_flags & TEVENT_FD_WRITE) {
+               status = smbd_smb2_flush_send_queue(xconn);
+               if (!NT_STATUS_IS_OK(status)) {
+                       return status;
+               }
+       }
+
+       if (!(fde_flags & TEVENT_FD_READ)) {
+               return NT_STATUS_OK;
+       }
+
+       if (state->req == NULL) {
+               TEVENT_FD_NOT_READABLE(xconn->transport.fde);
+               return NT_STATUS_OK;
+       }
+
+again:
+
+       state->msg = (struct msghdr) {
+               .msg_iov = state->vector,
+               .msg_iovlen = state->count,
+       };
+
+#ifdef MSG_NOSIGNAL
+       recvmsg_flags |= MSG_NOSIGNAL;
+#endif
+#ifdef MSG_DONTWAIT
+       recvmsg_flags |= MSG_DONTWAIT;
+#endif
+
+       ret = recvmsg(xconn->transport.sock, &state->msg, recvmsg_flags);
+       if (ret == 0) {
+               /* propagate end of file */
+               status = NT_STATUS_END_OF_FILE;
+               smbXsrv_connection_disconnect_transport(xconn,
+                                                       status);
+               return status;
+       }
+       err = socket_error_from_errno(ret, errno, &retry);
+       if (retry) {
+               /* retry later */
+               TEVENT_FD_READABLE(xconn->transport.fde);
+               return NT_STATUS_OK;
+       }
+       if (err != 0) {
+               status = map_nt_error_from_unix_common(err);
+               smbXsrv_connection_disconnect_transport(xconn,
+                                                       status);
+               return status;
+       }
+
+       status = smbd_smb2_advance_incoming(xconn, ret);
+       if (NT_STATUS_EQUAL(status, NT_STATUS_PENDING)) {
+               /* we have more to read */
+               TEVENT_FD_READABLE(xconn->transport.fde);
+               return NT_STATUS_OK;
+       }
+       if (NT_STATUS_EQUAL(status, NT_STATUS_RETRY)) {
+               /*
+                * smbd_smb2_advance_incoming setup a new vector
+                * that we should try to read immediately.
+                */
+               goto again;
+       }
+       if (!NT_STATUS_IS_OK(status)) {
+               return status;
+       }
+
+       return NT_STATUS_OK;
+}
+
 static void smbd_smb2_connection_handler(struct tevent_context *ev,
                                         struct tevent_fd *fde,
                                         uint16_t flags,