From 3d280639c4652d6cd35577e333bcd46c2517754c Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Tue, 10 Mar 2009 18:02:21 +0100 Subject: [PATCH] Add a vfs_preopen module to hide fs latencies --- docs-xml/manpages-3/vfs_preopen.8.xml | 115 +++++++ source3/Makefile.in | 5 + source3/configure.in | 3 +- source3/modules/vfs_preopen.c | 456 ++++++++++++++++++++++++++ 4 files changed, 578 insertions(+), 1 deletion(-) create mode 100644 docs-xml/manpages-3/vfs_preopen.8.xml create mode 100644 source3/modules/vfs_preopen.c diff --git a/docs-xml/manpages-3/vfs_preopen.8.xml b/docs-xml/manpages-3/vfs_preopen.8.xml new file mode 100644 index 00000000000..a84d4720bb2 --- /dev/null +++ b/docs-xml/manpages-3/vfs_preopen.8.xml @@ -0,0 +1,115 @@ + + + + + + vfs_preopen + 8 + Samba + System Administration tools + 3.3 + + + + vfs_preopen + Hide read latencies for applications reading numbered files + + + + + vfs objects = preopen + + + + + DESCRIPTION + + This VFS module is part of the + samba + 7 suite. + + This module assists applications that want to read numbered + files in sequence with very strict latency requirements. One area + where this happens in video streaming applications that want to read + one file per frame. + + When you use this module, a number of helper processes is + started that speculatively open files and read a number of bytes to + prime the file system cache, so that later on when the real + application's request comes along, no disk access is necessary. + + This module is stackable. + + + + + + OPTIONS + + + + + preopen:names = /pattern/ + + + preopen:names specifies the file name pattern which should + trigger the preopen helpers to do their work. We assume that + the files are numbered incrementally. So if your file names + are numbered FRAME00000.frm FRAME00001.frm and so on you would + list them as preopen:names=/FRAME*.frm/ + + + + + + preopen:num_bytes = BYTES + + + Specifies the number of bytes the helpers should speculatively + read, defaults to 1. + + + + + + preopen:helpers = NUM-PROCS + + + Number of forked helper processes, defaults to 1. + + + + + + preopen:queuelen = NUM-FILES + + + Number of files that should be speculatively opened. Defaults + to the 10 subsequent files. + + + + + + + + + VERSION + This man page is correct for version 3.3 of the Samba suite. + + + + + AUTHOR + + The original Samba software and related utilities + were created by Andrew Tridgell. Samba is now developed + by the Samba Team as an Open Source project similar + to the way the Linux kernel is developed. + + The PREOPEN VFS module was created with contributions from + Volker Lendecke and the developers at IBM. + + + + diff --git a/source3/Makefile.in b/source3/Makefile.in index 6aabcf0c8d2..76fd91a31ee 100644 --- a/source3/Makefile.in +++ b/source3/Makefile.in @@ -667,6 +667,7 @@ VFS_READAHEAD_OBJ = modules/vfs_readahead.o VFS_TSMSM_OBJ = modules/vfs_tsmsm.o VFS_FILEID_OBJ = modules/vfs_fileid.o VFS_AIO_FORK_OBJ = modules/vfs_aio_fork.o +VFS_PREOPEN_OBJ = modules/vfs_preopen.o VFS_SYNCOPS_OBJ = modules/vfs_syncops.o VFS_ACL_XATTR_OBJ = modules/vfs_acl_xattr.o VFS_ACL_TDB_OBJ = modules/vfs_acl_tdb.o @@ -2567,6 +2568,10 @@ bin/aio_fork.@SHLIBEXT@: $(BINARY_PREREQS) $(VFS_AIO_FORK_OBJ) @echo "Building plugin $@" @$(SHLD_MODULE) $(VFS_AIO_FORK_OBJ) +bin/preopen.@SHLIBEXT@: $(BINARY_PREREQS) $(VFS_PREOPEN_OBJ) + @echo "Building plugin $@" + @$(SHLD_MODULE) $(VFS_PREOPEN_OBJ) + bin/acl_xattr.@SHLIBEXT@: $(BINARY_PREREQS) $(VFS_ACL_XATTR_OBJ) @echo "Building plugin $@" @$(SHLD_MODULE) $(VFS_ACL_XATTR_OBJ) diff --git a/source3/configure.in b/source3/configure.in index e48ff345540..2af1545d581 100644 --- a/source3/configure.in +++ b/source3/configure.in @@ -417,7 +417,7 @@ dnl These have to be built static: default_static_modules="pdb_smbpasswd pdb_tdbsam pdb_wbc_sam rpc_lsarpc rpc_samr rpc_winreg rpc_initshutdown rpc_dssetup rpc_wkssvc rpc_svcctl rpc_ntsvcs rpc_netlogon rpc_netdfs rpc_srvsvc rpc_spoolss2 rpc_eventlog auth_sam auth_unix auth_winbind auth_wbc auth_server auth_domain auth_builtin auth_netlogond vfs_default nss_info_template" dnl These are preferably build shared, and static if dlopen() is not available -default_shared_modules="vfs_recycle vfs_audit vfs_extd_audit vfs_full_audit vfs_netatalk vfs_fake_perms vfs_default_quota vfs_readonly vfs_cap vfs_expand_msdfs vfs_shadow_copy vfs_shadow_copy2 charset_CP850 charset_CP437 auth_script vfs_readahead vfs_xattr_tdb vfs_streams_xattr vfs_streams_depot vfs_acl_xattr vfs_acl_tdb vfs_smb_traffic_analyzer" +default_shared_modules="vfs_recycle vfs_audit vfs_extd_audit vfs_full_audit vfs_netatalk vfs_fake_perms vfs_default_quota vfs_readonly vfs_cap vfs_expand_msdfs vfs_shadow_copy vfs_shadow_copy2 charset_CP850 charset_CP437 auth_script vfs_readahead vfs_xattr_tdb vfs_streams_xattr vfs_streams_depot vfs_acl_xattr vfs_acl_tdb vfs_smb_traffic_analyzer vfs_preopen" if test "x$developer" = xyes; then default_static_modules="$default_static_modules rpc_rpcecho" @@ -6185,6 +6185,7 @@ SMB_MODULE(vfs_readahead, \$(VFS_READAHEAD_OBJ), "bin/readahead.$SHLIBEXT", VFS) SMB_MODULE(vfs_tsmsm, \$(VFS_TSMSM_OBJ), "bin/tsmsm.$SHLIBEXT", VFS) SMB_MODULE(vfs_fileid, \$(VFS_FILEID_OBJ), "bin/fileid.$SHLIBEXT", VFS) SMB_MODULE(vfs_aio_fork, \$(VFS_AIO_FORK_OBJ), "bin/aio_fork.$SHLIBEXT", VFS) +SMB_MODULE(vfs_preopen, \$(VFS_PREOPEN_OBJ), "bin/preopen.$SHLIBEXT", VFS) SMB_MODULE(vfs_syncops, \$(VFS_SYNCOPS_OBJ), "bin/syncops.$SHLIBEXT", VFS) SMB_MODULE(vfs_zfsacl, \$(VFS_ZFSACL_OBJ), "bin/zfsacl.$SHLIBEXT", VFS) SMB_MODULE(vfs_notify_fam, \$(VFS_NOTIFY_FAM_OBJ), "bin/notify_fam.$SHLIBEXT", VFS) diff --git a/source3/modules/vfs_preopen.c b/source3/modules/vfs_preopen.c new file mode 100644 index 00000000000..25b9e7f3e46 --- /dev/null +++ b/source3/modules/vfs_preopen.c @@ -0,0 +1,456 @@ +/* + * Force a readahead of files by opening them and reading the first bytes + * + * Copyright (C) Volker Lendecke 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "includes.h" + +struct preopen_state; + +struct preopen_helper { + struct preopen_state *state; + struct fd_event *fde; + pid_t pid; + int fd; + bool busy; +}; + +struct preopen_state { + int num_helpers; + struct preopen_helper *helpers; + + size_t to_read; /* How many bytes to read in children? */ + int queue_max; + + char *template_fname; /* Filename to be sent to children */ + size_t number_start; /* start offset into "template_fname" */ + int num_digits; /* How many digits is the number long? */ + + int fnum_sent; /* last fname sent to children */ + + int fnum_queue_end; /* last fname to be sent, based on + * last open call + preopen:queuelen + */ + + name_compare_entry *preopen_names; +}; + +static void preopen_helper_destroy(struct preopen_helper *c) +{ + int status; + close(c->fd); + c->fd = -1; + kill(c->pid, SIGKILL); + waitpid(c->pid, &status, 0); + c->busy = true; +} + +static void preopen_queue_run(struct preopen_state *state) +{ + char *pdelimiter; + char delimiter; + + pdelimiter = state->template_fname + state->number_start + + state->num_digits; + delimiter = *pdelimiter; + + while (state->fnum_sent < state->fnum_queue_end) { + + ssize_t written; + size_t to_write; + int helper; + + for (helper=0; helpernum_helpers; helper++) { + if (state->helpers[helper].busy) { + continue; + } + break; + } + if (helper == state->num_helpers) { + /* everyone is busy */ + return; + } + + snprintf(state->template_fname + state->number_start, + state->num_digits + 1, + "%.*lu", state->num_digits, + (long unsigned int)(state->fnum_sent + 1)); + *pdelimiter = delimiter; + + to_write = talloc_get_size(state->template_fname); + written = write_data(state->helpers[helper].fd, + state->template_fname, to_write); + state->helpers[helper].busy = true; + + if (written != to_write) { + preopen_helper_destroy(&state->helpers[helper]); + } + state->fnum_sent += 1; + } +} + +static void preopen_helper_readable(struct event_context *ev, + struct fd_event *fde, uint16_t flags, + void *priv) +{ + struct preopen_helper *helper = (struct preopen_helper *)priv; + struct preopen_state *state = helper->state; + ssize_t nread; + char c; + + if ((flags & EVENT_FD_READ) == 0) { + return; + } + + nread = read(helper->fd, &c, 1); + if (nread <= 0) { + preopen_helper_destroy(helper); + return; + } + + helper->busy = false; + + preopen_queue_run(state); +} + +static int preopen_helpers_destructor(struct preopen_state *c) +{ + int i; + + for (i=0; inum_helpers; i++) { + if (c->helpers[i].fd == -1) { + continue; + } + preopen_helper_destroy(&c->helpers[i]); + } + + return 0; +} + +static bool preopen_helper_open_one(int sock_fd, char **pnamebuf, + size_t to_read, void *filebuf) +{ + char *namebuf = *pnamebuf; + ssize_t nwritten, nread; + char c = 0; + int fd; + + nread = 0; + + while ((nread == 0) || (namebuf[nread-1] != '\0')) { + ssize_t thistime; + + thistime = read(sock_fd, namebuf + nread, + talloc_get_size(namebuf) - nread); + if (thistime <= 0) { + return false; + } + + nread += thistime; + + if (nread == talloc_get_size(namebuf)) { + namebuf = TALLOC_REALLOC_ARRAY( + NULL, namebuf, char, + talloc_get_size(namebuf) * 2); + if (namebuf == NULL) { + return false; + } + *pnamebuf = namebuf; + } + } + + fd = open(namebuf, O_RDONLY); + if (fd == -1) { + goto done; + } + nread = read(fd, filebuf, to_read); + close(fd); + + done: + nwritten = write(sock_fd, &c, 1); + return true; +} + +static bool preopen_helper(int fd, size_t to_read) +{ + char *namebuf; + void *readbuf; + + namebuf = TALLOC_ARRAY(NULL, char, 1024); + if (namebuf == NULL) { + return false; + } + + readbuf = talloc_size(NULL, to_read); + if (readbuf == NULL) { + TALLOC_FREE(namebuf); + return false; + } + + while (preopen_helper_open_one(fd, &namebuf, to_read, readbuf)) { + ; + } + + TALLOC_FREE(readbuf); + TALLOC_FREE(namebuf); + return false; +} + +static NTSTATUS preopen_init_helper(struct preopen_helper *h) +{ + int fdpair[2]; + NTSTATUS status; + + if (socketpair(AF_UNIX, SOCK_STREAM, 0, fdpair) == -1) { + status = map_nt_error_from_unix(errno); + DEBUG(10, ("socketpair() failed: %s\n", strerror(errno))); + return status; + } + + h->pid = sys_fork(); + + if (h->pid == -1) { + return map_nt_error_from_unix(errno); + } + + if (h->pid == 0) { + close(fdpair[0]); + preopen_helper(fdpair[1], h->state->to_read); + exit(0); + } + close(fdpair[1]); + h->fd = fdpair[0]; + h->fde = event_add_fd(smbd_event_context(), h->state, h->fd, + EVENT_FD_READ, preopen_helper_readable, h); + if (h->fde == NULL) { + close(h->fd); + h->fd = -1; + return NT_STATUS_NO_MEMORY; + } + h->busy = false; + return NT_STATUS_OK; +} + +static NTSTATUS preopen_init_helpers(TALLOC_CTX *mem_ctx, size_t to_read, + int num_helpers, int queue_max, + struct preopen_state **presult) +{ + struct preopen_state *result; + int i; + + result = talloc(mem_ctx, struct preopen_state); + if (result == NULL) { + return NT_STATUS_NO_MEMORY; + } + + result->num_helpers = num_helpers; + result->helpers = TALLOC_ARRAY(result, struct preopen_helper, + num_helpers); + if (result->helpers == NULL) { + TALLOC_FREE(result); + return NT_STATUS_NO_MEMORY; + } + + result->to_read = to_read; + result->queue_max = queue_max; + result->template_fname = NULL; + result->fnum_sent = 0; + + for (i=0; ihelpers[i].state = result; + result->helpers[i].fd = -1; + } + + talloc_set_destructor(result, preopen_helpers_destructor); + + for (i=0; ihelpers[i]); + } + + *presult = result; + return NT_STATUS_OK; +} + +static void preopen_free_helpers(void **ptr) +{ + TALLOC_FREE(*ptr); +} + +static struct preopen_state *preopen_state_get(vfs_handle_struct *handle) +{ + struct preopen_state *state; + NTSTATUS status; + const char *namelist; + + if (SMB_VFS_HANDLE_TEST_DATA(handle)) { + SMB_VFS_HANDLE_GET_DATA(handle, state, struct preopen_state, + return NULL); + return state; + } + + namelist = lp_parm_const_string(SNUM(handle->conn), "preopen", "names", + NULL); + + if (namelist == NULL) { + return NULL; + } + + status = preopen_init_helpers( + NULL, + lp_parm_int(SNUM(handle->conn), "preopen", "num_bytes", 1), + lp_parm_int(SNUM(handle->conn), "preopen", "helpers", 1), + lp_parm_int(SNUM(handle->conn), "preopen", "queuelen", 10), + &state); + if (!NT_STATUS_IS_OK(status)) { + return NULL; + } + + set_namearray(&state->preopen_names, (char *)namelist); + + if (state->preopen_names == NULL) { + TALLOC_FREE(state); + return NULL; + } + + if (!SMB_VFS_HANDLE_TEST_DATA(handle)) { + SMB_VFS_HANDLE_SET_DATA(handle, state, preopen_free_helpers, + struct preopen_state, return NULL); + } + + return state; +} + +static bool preopen_parse_fname(const char *fname, unsigned long *pnum, + size_t *pstart_idx, int *pnum_digits) +{ + const char *p, *q; + unsigned long num; + + p = strrchr_m(fname, '/'); + if (p == NULL) { + p = fname; + } + + p += 1; + while (p[0] != '\0') { + if (isdigit(p[0]) && isdigit(p[1]) && isdigit(p[2])) { + break; + } + p += 1; + } + if (*p == '\0') { + /* no digits around */ + return false; + } + + num = strtoul(p, (char **)&q, 10); + + if (num+1 < num) { + /* overflow */ + return false; + } + + *pnum = num; + *pstart_idx = (p - fname); + *pnum_digits = (q - p); + return true; +} + +static int preopen_open(vfs_handle_struct *handle, const char *fname, + files_struct *fsp, int flags, mode_t mode) +{ + struct preopen_state *state; + int res; + unsigned long num; + + DEBUG(10, ("preopen_open called on %s\n", fname)); + + state = preopen_state_get(handle); + if (state == NULL) { + return SMB_VFS_NEXT_OPEN(handle, fname, fsp, flags, mode); + } + + res = SMB_VFS_NEXT_OPEN(handle, fname, fsp, flags, mode); + if (res == -1) { + return -1; + } + + if (flags != O_RDONLY) { + return res; + } + + if (!is_in_path(fname, state->preopen_names, true)) { + DEBUG(10, ("%s does not match the preopen:names list\n", + fname)); + return res; + } + + TALLOC_FREE(state->template_fname); + state->template_fname = talloc_asprintf( + state, "%s/%s", fsp->conn->connectpath, fname); + + if (state->template_fname == NULL) { + return res; + } + + if (!preopen_parse_fname(state->template_fname, &num, + &state->number_start, &state->num_digits)) { + TALLOC_FREE(state->template_fname); + return res; + } + + if (num > state->fnum_sent) { + /* + * Helpers were too slow, there's no point in reading + * files in helpers that we already read in the + * parent. + */ + state->fnum_sent = num; + } + + if ((state->fnum_queue_end != 0) /* Something was started earlier */ + && (num < (state->fnum_queue_end - state->queue_max))) { + /* + * "num" is before the queue we announced. This means + * a new run is started. + */ + state->fnum_sent = num; + } + + state->fnum_queue_end = num + state->queue_max; + + preopen_queue_run(state); + + return res; +} + +/* VFS operations structure */ + +static vfs_op_tuple preopen_ops[] = { + {SMB_VFS_OP(preopen_open), SMB_VFS_OP_OPEN, + SMB_VFS_LAYER_TRANSPARENT}, + {SMB_VFS_OP(NULL), SMB_VFS_OP_NOOP, + SMB_VFS_LAYER_NOOP} +}; + +NTSTATUS vfs_preopen_init(void); +NTSTATUS vfs_preopen_init(void) +{ + return smb_register_vfs(SMB_VFS_INTERFACE_VERSION, + "preopen", preopen_ops); +} -- 2.34.1