smbd: use async dos_mode_at_send in smbd_smb2_query_directory_send()
authorRalph Boehme <slow@samba.org>
Wed, 25 Jul 2018 17:14:25 +0000 (19:14 +0200)
committerRalph Boehme <slow@samba.org>
Fri, 27 Jul 2018 11:07:15 +0000 (13:07 +0200)
Finally: use the new dos_mode_at_send() in the directory enumeration
loop. This means that fetching the DOS attributes for directory entries
is done asynchronously with regard to the enumeration loop.

As the DOS attribute is typically read from an extended attribute in the
filesytem, this avoids sequentially blocking on IO. If the IO subsystem
is slow servicing these request, enabling async processing can result in
performance improvements.

A parametric option

  smbd:async dosmode = true | false (default: false)

can be used to enable the new async processing.

Simulating slow IO with usleep(5000) in the synchronous and asynchronous
versions of SMB_VFS_GET_DOS_ATTRIBUTES(), the results of enumerating a
directory with 10,000 files are:

    smbd:async dosmode = no:

        $ time bin/smbclient -U slow%x //localhost/test -c "ls dir\*" > /dev/null
        real    0m59.597s
        user    0m0.024s
        sys     0m0.012s

    smbd:async dosmode = yes:

        $ time bin/smbclient -U slow%x //localhost/test -c "ls dir\*" > /dev/null
        real    0m0.698s
        user    0m0.038s
        sys     0m0.025s

Performance gains in real world workloads depends on whether the actual
IO requests can be merged and parallelized by the kernel. Without such
wins at the IO layer, the async processing may even be slower then the
sync processing due to the additional overhead.

The following parameters can be used to adapt async processing behaviour
for specific workloads and systems:

        aio max threads = X (default: 100)
        smbd:max async dosmode = Y (default: "aio max threads" * 2)

By default we have at most twice the number of async requests in flight
as threads provided by the underlying threadpool. This ensures a worker
thread that finishes a job can directly pick up a new one without going
to sleep.

It may be advisable to reduce the number of threads to avoid scheduling
overhead while also increasing "smbd:max async dosmode".

Note that we disable async processing for certain VFS modules in the VFS
connect function to avoid the overhead of triggering the sync fallback
in dos_mode_at_send(). This is done for VFS modules that implement the
sync SMB_VFS_GET_DOS_ATTRIBUTES(), but not the async version (gpfs), and
for VFS modules that don't share a real filesystem where fchdir() can be
used (ceph, gluster). It is disabled for catia, because we realized that
the catia name translation macros used on
fsps (CATIA_FETCH_FSP_[PRE|POST]_NEXT) have a bug (#13547).

We use threadpool = smb_vfs_ev_glue_tp_chdir_safe() and then
pthreadpool_tevent_max_threads(threadpool) to get the number of maximum
worker threads which matches the pool used by the low level
SMB_VFS_GETXATTRAT_[SEND|RECV] implementation in vfs_default.

This is a terrible abstraction leak that should be removed in the future
by maybe making it possible to ask a VFS function which threadpool it
uses, internally suporting chaining so VFS function FOO that internally
uses BAR can forward the question to BAR.

On a hyphotetical system that had a getxattrat(dirfd, path, ...)
syscall and at the same time doesn't support per-thread current working
directories (eg FreeBSD doesn't have the latter) but has support for
per-thread-credentials, pthreadpool_tevent_max_threads() on the
tp_chdir_safe threadpool returns 1.

So when hooking the hyphotetical getxattrat() into the async
SMB_VFS_GETXATTRAT_[SEND|RECV] implementation in an VFS module, the
implementation could use the tp_path_safe threadpool, but the SMB2
layer would use the wrong threadpool in the call to
pthreadpool_tevent_max_threads(), resulting in no parallelism.

Signed-off-by: Ralph Boehme <slow@samba.org>
Reviewed-by: Stefan Metzmacher <metze@samba.org>
source3/modules/vfs_catia.c
source3/modules/vfs_ceph.c
source3/modules/vfs_glusterfs.c
source3/modules/vfs_gpfs.c
source3/smbd/smb2_query_directory.c

index 12995dda9bfaf87933972d40b9f1804a679223c9..c362be764cc5bc41be30957b94a38a6b63e9e037 100644 (file)
@@ -158,6 +158,19 @@ static NTSTATUS catia_string_replace_allocate(connection_struct *conn,
        return status;
 }
 
+static int catia_connect(struct vfs_handle_struct *handle,
+                        const char *service,
+                        const char *user)
+{
+       /*
+        * Unless we have an async implementation of get_dos_attributes turn
+        * this off.
+        */
+       lp_do_parameter(SNUM(handle->conn), "smbd:async dosmode", "false");
+
+       return SMB_VFS_NEXT_CONNECT(handle, service, user);
+}
+
 static DIR *catia_opendir(vfs_handle_struct *handle,
                        const struct smb_filename *smb_fname,
                        const char *mask,
@@ -2405,6 +2418,8 @@ static NTSTATUS catia_set_dos_attributes(struct vfs_handle_struct *handle,
 }
 
 static struct vfs_fn_pointers vfs_catia_fns = {
+       .connect_fn = catia_connect,
+
        /* Directory operations */
        .mkdir_fn = catia_mkdir,
        .rmdir_fn = catia_rmdir,
index 8b709eddc908473a19ef951e89cfa0d7daf886b9..d863c8add5a9a0adcc5e59508911061f65a3bf71 100644 (file)
@@ -132,6 +132,11 @@ static int cephwrap_connect(struct vfs_handle_struct *handle,  const char *servi
        handle->data = cmount;
        cmount_cnt++;
 
+       /*
+        * Unless we have an async implementation of getxattrat turn this off.
+        */
+       lp_do_parameter(SNUM(handle->conn), "smbd:async dosmode", "false");
+
        return 0;
 
 err_cm_release:
index 98be3c6d4e2a7bf6123987269380f9098f4fb627..431f6fff48c02d08268003f0df3b981b510af2a0 100644 (file)
@@ -362,6 +362,11 @@ static int vfs_gluster_connect(struct vfs_handle_struct *handle,
         */
        lp_do_parameter(SNUM(handle->conn), "shadow:mountpoint", "/");
 
+       /*
+        * Unless we have an async implementation of getxattrat turn this off.
+        */
+       lp_do_parameter(SNUM(handle->conn), "smbd:async dosmode", "false");
+
 done:
        if (ret < 0) {
                if (fs)
index 5f21bc0826d5db49643cf6f7f34becbc3d226acf..982dc19e785fb58a1e97cccfd3e6b5771afdcc77 100644 (file)
@@ -2163,6 +2163,12 @@ static int vfs_gpfs_connect(struct vfs_handle_struct *handle,
                }
        }
 
+       /*
+        * Unless we have an async implementation of get_dos_attributes turn
+        * this off.
+        */
+       lp_do_parameter(SNUM(handle->conn), "smbd:async dosmode", "false");
+
        return 0;
 }
 
index aeba134810fa521a2168ac38b15b1486f3499a6b..9052779081783fa534c86ca7a01708b74c6a3acc 100644 (file)
@@ -25,6 +25,7 @@
 #include "trans2.h"
 #include "../lib/util/tevent_ntstatus.h"
 #include "system/filesys.h"
+#include "lib/pthreadpool/pthreadpool_tevent.h"
 
 #undef DBGC_CLASS
 #define DBGC_CLASS DBGC_SMB2
@@ -205,7 +206,18 @@ static struct tevent_req *fetch_write_time_send(TALLOC_CTX *mem_ctx,
                                                bool *stop);
 static NTSTATUS fetch_write_time_recv(struct tevent_req *req);
 
+static struct tevent_req *fetch_dos_mode_send(
+       TALLOC_CTX *mem_ctx,
+       struct smb_vfs_ev_glue *evg,
+       struct files_struct *dir_fsp,
+       struct smb_filename **smb_fname,
+       uint32_t info_level,
+       uint8_t *entry_marshall_buf);
+
+static NTSTATUS fetch_dos_mode_recv(struct tevent_req *req);
+
 struct smbd_smb2_query_directory_state {
+       struct smb_vfs_ev_glue *evg;
        struct tevent_context *ev;
        struct smbd_smb2_request *smb2req;
        uint64_t async_sharemode_count;
@@ -225,13 +237,18 @@ struct smbd_smb2_query_directory_state {
        uint32_t dirtype;
        bool dont_descend;
        bool ask_sharemode;
+       bool async_dosmode;
        bool async_ask_sharemode;
        int last_entry_off;
+       struct pthreadpool_tevent *tp_chdir_safe;
+       size_t max_async_dosmode_active;
+       uint32_t async_dosmode_active;
        bool done;
 };
 
 static bool smb2_query_directory_next_entry(struct tevent_req *req);
 static void smb2_query_directory_fetch_write_time_done(struct tevent_req *subreq);
+static void smb2_query_directory_dos_mode_done(struct tevent_req *subreq);
 static void smb2_query_directory_waited(struct tevent_req *subreq);
 
 static struct tevent_req *smbd_smb2_query_directory_send(TALLOC_CTX *mem_ctx,
@@ -260,7 +277,9 @@ static struct tevent_req *smbd_smb2_query_directory_send(TALLOC_CTX *mem_ctx,
        if (req == NULL) {
                return NULL;
        }
+       state->evg = conn->user_vfs_evg;
        state->ev = ev;
+       state->tp_chdir_safe = smb_vfs_ev_glue_tp_chdir_safe(state->evg);
        state->fsp = fsp;
        state->smb2req = smb2req;
        state->in_output_buffer_length = in_output_buffer_length;
@@ -488,12 +507,31 @@ static struct tevent_req *smbd_smb2_query_directory_send(TALLOC_CTX *mem_ctx,
        if (state->info_level != SMB_FIND_FILE_NAMES_INFO) {
                state->ask_sharemode = lp_parm_bool(
                        SNUM(conn), "smbd", "search ask sharemode", true);
+
+               state->async_dosmode = lp_parm_bool(
+                        SNUM(conn), "smbd", "async dosmode", false);
        }
 
        if (state->ask_sharemode && lp_clustering()) {
                state->ask_sharemode = false;
                state->async_ask_sharemode = true;
+       }
 
+       if (state->async_dosmode) {
+               size_t max_threads;
+
+               max_threads = pthreadpool_tevent_max_threads(state->tp_chdir_safe);
+
+               state->max_async_dosmode_active = lp_parm_ulong(
+                       SNUM(conn), "smbd", "max async dosmode",
+                       max_threads * 2);
+
+               if (state->max_async_dosmode_active == 0) {
+                       state->max_async_dosmode_active = 1;
+               }
+       }
+
+       if (state->async_dosmode || state->async_ask_sharemode) {
                /*
                 * Should we only set async_internal
                 * if we're not the last request in
@@ -537,6 +575,7 @@ static bool smb2_query_directory_next_entry(struct tevent_req *req)
        int space_remaining = state->in_output_buffer_length - off;
        struct file_id file_id;
        NTSTATUS status;
+       bool get_dosmode = !state->async_dosmode;
        bool stop = false;
 
        SMB_ASSERT(space_remaining >= 0);
@@ -551,7 +590,7 @@ static bool smb2_query_directory_next_entry(struct tevent_req *req)
                                           false, /* requires_resume_key */
                                           state->dont_descend,
                                           state->ask_sharemode,
-                                          true,
+                                          get_dosmode,
                                           8, /* align to 8 bytes */
                                           false, /* no padding */
                                           &state->pdata,
@@ -605,6 +644,36 @@ static bool smb2_query_directory_next_entry(struct tevent_req *req)
                state->async_sharemode_count++;
        }
 
+       if (state->async_dosmode) {
+               struct tevent_req *subreq = NULL;
+               uint8_t *buf = NULL;
+               size_t outstanding_aio;
+
+               buf = (uint8_t *)state->base_data + state->last_entry_off;
+
+               subreq = fetch_dos_mode_send(state,
+                                            state->evg,
+                                            state->fsp,
+                                            &smb_fname,
+                                            state->info_level,
+                                            buf);
+               if (tevent_req_nomem(subreq, req)) {
+                       return true;
+               }
+               tevent_req_set_callback(subreq,
+                                       smb2_query_directory_dos_mode_done,
+                                       req);
+
+               state->async_dosmode_active++;
+
+               outstanding_aio = pthreadpool_tevent_queued_jobs(
+                                       state->tp_chdir_safe);
+
+               if (outstanding_aio > state->max_async_dosmode_active) {
+                       stop = true;
+               }
+       }
+
        TALLOC_FREE(smb_fname);
 
        state->num++;
@@ -625,6 +694,10 @@ last_entry_done:
                return true;
        }
 
+       if (state->async_dosmode_active > 0) {
+               return true;
+       }
+
        if (state->find_async_delay_usec > 0) {
                struct timeval tv;
                struct tevent_req *subreq = NULL;
@@ -674,6 +747,28 @@ static void smb2_query_directory_fetch_write_time_done(struct tevent_req *subreq
        return;
 }
 
+static void smb2_query_directory_dos_mode_done(struct tevent_req *subreq)
+{
+       struct tevent_req *req =
+               tevent_req_callback_data(subreq,
+               struct tevent_req);
+       struct smbd_smb2_query_directory_state *state =
+               tevent_req_data(req,
+               struct smbd_smb2_query_directory_state);
+       NTSTATUS status;
+
+       status = fetch_dos_mode_recv(subreq);
+       TALLOC_FREE(subreq);
+       if (tevent_req_nterror(req, status)) {
+               return;
+       }
+
+       state->async_dosmode_active--;
+
+       smb2_query_directory_check_next_entry(req);
+       return;
+}
+
 static void smb2_query_directory_check_next_entry(struct tevent_req *req)
 {
        struct smbd_smb2_query_directory_state *state = tevent_req_data(
@@ -687,7 +782,9 @@ static void smb2_query_directory_check_next_entry(struct tevent_req *req)
                return;
        }
 
-       if (state->async_sharemode_count > 0) {
+       if (state->async_sharemode_count > 0 ||
+           state->async_dosmode_active > 0)
+       {
                return;
        }
 
@@ -860,3 +957,127 @@ static NTSTATUS fetch_write_time_recv(struct tevent_req *req)
        tevent_req_received(req);
        return NT_STATUS_OK;
 }
+
+struct fetch_dos_mode_state {
+       struct files_struct *dir_fsp;
+       struct smb_filename *smb_fname;
+       uint32_t info_level;
+       uint8_t *entry_marshall_buf;
+};
+
+static void fetch_dos_mode_done(struct tevent_req *subreq);
+
+static struct tevent_req *fetch_dos_mode_send(
+                       TALLOC_CTX *mem_ctx,
+                       struct smb_vfs_ev_glue *evg,
+                       struct files_struct *dir_fsp,
+                       struct smb_filename **smb_fname,
+                       uint32_t info_level,
+                       uint8_t *entry_marshall_buf)
+{
+       struct tevent_context *ev = smb_vfs_ev_glue_ev_ctx(evg);
+       struct tevent_req *req = NULL;
+       struct fetch_dos_mode_state *state = NULL;
+       struct tevent_req *subreq = NULL;
+
+       req = tevent_req_create(mem_ctx, &state, struct fetch_dos_mode_state);
+       if (req == NULL) {
+               return NULL;
+       }
+       *state = (struct fetch_dos_mode_state) {
+               .dir_fsp = dir_fsp,
+               .info_level = info_level,
+               .entry_marshall_buf = entry_marshall_buf,
+       };
+
+       state->smb_fname = talloc_move(state, smb_fname);
+
+       subreq = dos_mode_at_send(state, evg, dir_fsp, state->smb_fname);
+       if (tevent_req_nomem(subreq, req)) {
+               return tevent_req_post(req, ev);
+       }
+       tevent_req_set_callback(subreq, fetch_dos_mode_done, req);
+
+       return req;
+}
+
+static void fetch_dos_mode_done(struct tevent_req *subreq)
+{
+       struct tevent_req *req =
+               tevent_req_callback_data(subreq,
+               struct tevent_req);
+       struct fetch_dos_mode_state *state =
+               tevent_req_data(req,
+               struct fetch_dos_mode_state);
+       uint32_t dfs_dosmode;
+       uint32_t dosmode;
+       struct timespec btime_ts = {0};
+       off_t dosmode_off;
+       off_t btime_off;
+       NTSTATUS status;
+
+       status = dos_mode_at_recv(subreq, &dosmode);
+       TALLOC_FREE(subreq);
+       if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
+               tevent_req_done(req);
+               return;
+       }
+       if (!NT_STATUS_IS_OK(status)) {
+               tevent_req_nterror(req, status);
+               return;
+       }
+
+       switch (state->info_level) {
+       case SMB_FIND_ID_BOTH_DIRECTORY_INFO:
+       case SMB_FIND_FILE_BOTH_DIRECTORY_INFO:
+       case SMB_FIND_FILE_DIRECTORY_INFO:
+       case SMB_FIND_FILE_FULL_DIRECTORY_INFO:
+       case SMB_FIND_ID_FULL_DIRECTORY_INFO:
+               btime_off = 8;
+               dosmode_off = 56;
+               break;
+
+       default:
+               DBG_ERR("Unsupported info_level [%u]\n", state->info_level);
+               tevent_req_nterror(req, NT_STATUS_INVALID_LEVEL);
+               return;
+       }
+
+
+       dfs_dosmode = IVAL(state->entry_marshall_buf, dosmode_off);
+       if (dfs_dosmode == 0) {
+               /*
+                * DOS mode for a DFS link, only overwrite if still set to 0 and
+                * not already populated by the lower layer for a DFS link in
+                * smbd_dirptr_lanman2_mode_fn().
+                */
+               SIVAL(state->entry_marshall_buf, dosmode_off, dosmode);
+       }
+
+       btime_ts = get_create_timespec(state->dir_fsp->conn,
+                                      NULL,
+                                      state->smb_fname);
+       if (lp_dos_filetime_resolution(SNUM(state->dir_fsp->conn))) {
+               dos_filetime_timespec(&btime_ts);
+       }
+
+       put_long_date_timespec(state->dir_fsp->conn->ts_res,
+                              (char *)state->entry_marshall_buf + btime_off,
+                              btime_ts);
+
+       tevent_req_done(req);
+       return;
+}
+
+static NTSTATUS fetch_dos_mode_recv(struct tevent_req *req)
+{
+       NTSTATUS status;
+
+       if (tevent_req_is_nterror(req, &status)) {
+               tevent_req_received(req);
+               return status;
+       }
+
+       tevent_req_received(req);
+       return NT_STATUS_OK;
+}