vfs/glusterfs: Change xattr key to match gluster key.
[obnox/samba/samba-obnox.git] / source3 / modules / vfs_glusterfs.c
index 24e1bdaea788ba1abc6433fce10405b0399d96e1..10c3a222608bdb355092fede4871ecd261c8e7af 100644 (file)
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
+/**
+ * @file   vfs_glusterfs.c
+ * @author Anand Avati <avati@redhat.com>
+ * @date   May 2013
+ * @brief  Samba VFS module for glusterfs
+ *
+ * @todo
+ *   - sendfile/recvfile support
+ *
+ * A Samba VFS module for GlusterFS, based on Gluster's libgfapi.
+ * This is a "bottom" vfs module (not something to be stacked on top of
+ * another module), and translates (most) calls to the closest actions
+ * available in libgfapi.
+ *
+ */
+
 #include "includes.h"
 #include "smbd/smbd.h"
 #include <stdio.h>
 #include "api/glfs.h"
 #include "lib/util/dlinklist.h"
+#include "lib/util/tevent_unix.h"
+#ifdef HAVE_SYS_EVENTFD_H
+#include <sys/eventfd.h>
+#endif
+#include <pthread.h>
+#include "smbd/globals.h"
 
 #define DEFAULT_VOLFILE_SERVER "localhost"
 
-/*
-  TODO
-  ----
-  Short term:
-  - AIO support
-  - sendfile/recvfile support
-*/
-
-/* Helpers to provide 'integer' fds */
-
-/* This is global. gfapi's FD operations do not
-   require filesystem context.
-*/
-
-static glfs_fd_t **glfd_fd;
-static int glfd_fd_size;
-static int glfd_fd_used;
-
-static int glfd_fd_store(glfs_fd_t *glfd)
-{
-       int i;
-       void *tmp;
-
-       if (glfd_fd_size == glfd_fd_used) {
-               if (glfd_fd_size >= INT_MAX - 1) {
-                       errno = ENOMEM;
-                       return -1;
-               }
-
-               tmp = talloc_realloc(glfd_fd, glfd_fd, glfs_fd_t *,
-                                    glfd_fd_size + 1);
-               if (tmp == NULL) {
-                       errno = ENOMEM;
-                       return -1;
-               }
-
-               glfd_fd = tmp;
-               glfd_fd[glfd_fd_size] = 0;
-               glfd_fd_size++;
-       }
-
-       for (i = 0; i < glfd_fd_size; i++) {
-               if (glfd_fd[i] == NULL) {
-                       break;
-               }
-       }
-       glfd_fd_used++;
-       glfd_fd[i] = glfd;
-       return i;
-}
-
-static glfs_fd_t *glfd_fd_get(int i)
-{
-       if (i < 0 || i >= glfd_fd_size) {
-               return NULL;
-       }
-       return glfd_fd[i];
-}
-
-static glfs_fd_t *glfd_fd_clear(int i)
-{
-       glfs_fd_t *glfd = NULL;
-
-       if (i < 0 || i >= glfd_fd_size) {
-               return NULL;
-       }
-
-       glfd = glfd_fd[i];
-
-       glfd_fd[i] = 0;
-       glfd_fd_used--;
-       return glfd;
-}
-
-/* Helper to convert stat to stat_ex */
+#ifdef HAVE_EVENTFD
+static pthread_mutex_t lock_req_list = PTHREAD_MUTEX_INITIALIZER;
+static int event_fd = -1;
+static struct tevent_fd *aio_read_event = NULL;
+static struct tevent_req **req_producer_list = NULL;
+static struct tevent_req **req_consumer_list = NULL;
+static uint64_t req_counter = 0;
+#endif
 
+/**
+ * Helper to convert struct stat to struct stat_ex.
+ */
 static void smb_stat_ex_from_stat(struct stat_ex *dst, const struct stat *src)
 {
        ZERO_STRUCTP(dst);
@@ -116,36 +74,31 @@ static void smb_stat_ex_from_stat(struct stat_ex *dst, const struct stat *src)
        dst->st_ex_rdev = src->st_rdev;
        dst->st_ex_size = src->st_size;
        dst->st_ex_atime.tv_sec = src->st_atime;
-#ifdef STAT_HAVE_NSEC
-       dst->st_ex_atime.tv_nsec = src->st_atime_nsec;
-#endif
        dst->st_ex_mtime.tv_sec = src->st_mtime;
-#ifdef STAT_HAVE_NSEC
-       dst->st_ex_mtime.tv_nsec = src->st_mtime_nsec;
-#endif
        dst->st_ex_ctime.tv_sec = src->st_ctime;
-#ifdef STAT_HAVE_NSEC
-       dst->st_ex_ctime.tv_nsec = src->st_ctime_nsec;
-#endif
        dst->st_ex_btime.tv_sec = src->st_mtime;
+       dst->st_ex_blksize = src->st_blksize;
+       dst->st_ex_blocks = src->st_blocks;
 #ifdef STAT_HAVE_NSEC
+       dst->st_ex_atime.tv_nsec = src->st_atime_nsec;
+       dst->st_ex_mtime.tv_nsec = src->st_mtime_nsec;
+       dst->st_ex_ctime.tv_nsec = src->st_ctime_nsec;
        dst->st_ex_btime.tv_nsec = src->st_mtime_nsec;
 #endif
-       dst->st_ex_blksize = src->st_blksize;
-       dst->st_ex_blocks = src->st_blocks;
 }
 
 /* pre-opened glfs_t */
 
 static struct glfs_preopened {
        char *volume;
+       char *connectpath;
        glfs_t *fs;
        int ref;
        struct glfs_preopened *next, *prev;
 } *glfs_preopened;
 
 
-int glfs_set_preopened(const char *volume, glfs_t *fs)
+static int glfs_set_preopened(const char *volume, const char *connectpath, glfs_t *fs)
 {
        struct glfs_preopened *entry = NULL;
 
@@ -162,6 +115,13 @@ int glfs_set_preopened(const char *volume, glfs_t *fs)
                return -1;
        }
 
+       entry->connectpath = talloc_strdup(entry, connectpath);
+       if (entry->connectpath == NULL) {
+               talloc_free(entry);
+               errno = ENOMEM;
+               return -1;
+       }
+
        entry->fs = fs;
        entry->ref = 1;
 
@@ -170,12 +130,14 @@ int glfs_set_preopened(const char *volume, glfs_t *fs)
        return 0;
 }
 
-static glfs_t *glfs_find_preopened(const char *volume)
+static glfs_t *glfs_find_preopened(const char *volume, const char *connectpath)
 {
        struct glfs_preopened *entry = NULL;
 
        for (entry = glfs_preopened; entry; entry = entry->next) {
-               if (strcmp(entry->volume, volume) == 0) {
+               if (strcmp(entry->volume, volume) == 0 &&
+                   strcmp(entry->connectpath, connectpath) == 0)
+               {
                        entry->ref++;
                        return entry->fs;
                }
@@ -209,12 +171,18 @@ static int vfs_gluster_connect(struct vfs_handle_struct *handle,
 {
        const char *volfile_server;
        const char *volume;
-       const char *logfile;
+       char *logfile;
        int loglevel;
-       glfs_t *fs;
-       int ret;
+       glfs_t *fs = NULL;
+       TALLOC_CTX *tmp_ctx;
+       int ret = 0;
 
-       logfile = lp_parm_const_string(SNUM(handle->conn), "glusterfs",
+       tmp_ctx = talloc_new(NULL);
+       if (tmp_ctx == NULL) {
+               ret = -1;
+               goto done;
+       }
+       logfile = lp_parm_talloc_string(tmp_ctx, SNUM(handle->conn), "glusterfs",
                                       "logfile", NULL);
 
        loglevel = lp_parm_int(SNUM(handle->conn), "glusterfs", "loglevel", -1);
@@ -231,27 +199,37 @@ static int vfs_gluster_connect(struct vfs_handle_struct *handle,
                volume = service;
        }
 
-       fs = glfs_find_preopened(volume);
+       fs = glfs_find_preopened(volume, handle->conn->connectpath);
        if (fs) {
-               goto found;
+               goto done;
        }
 
        fs = glfs_new(volume);
        if (fs == NULL) {
-               return -1;
+               ret = -1;
+               goto done;
        }
 
        ret = glfs_set_volfile_server(fs, "tcp", volfile_server, 0);
        if (ret < 0) {
                DEBUG(0, ("Failed to set volfile_server %s\n", volfile_server));
-               glfs_fini(fs);
-               return -1;
+               goto done;
        }
 
        ret = glfs_set_xlator_option(fs, "*-md-cache", "cache-posix-acl",
                                     "true");
        if (ret < 0) {
                DEBUG(0, ("%s: Failed to set xlator options\n", volume));
+               goto done;
+       }
+
+
+       ret = glfs_set_xlator_option(fs, "*-snapview-client",
+                                    "snapdir-entry-path",
+                                    handle->conn->connectpath);
+       if (ret < 0) {
+               DEBUG(0, ("%s: Failed to set xlator option:"
+                         " snapdir-entry-path\n", volume));
                glfs_fini(fs);
                return -1;
        }
@@ -260,30 +238,34 @@ static int vfs_gluster_connect(struct vfs_handle_struct *handle,
        if (ret < 0) {
                DEBUG(0, ("%s: Failed to set logfile %s loglevel %d\n",
                          volume, logfile, loglevel));
-               glfs_fini(fs);
-               return -1;
+               goto done;
        }
 
        ret = glfs_init(fs);
        if (ret < 0) {
                DEBUG(0, ("%s: Failed to initialize volume (%s)\n",
                          volume, strerror(errno)));
-               glfs_fini(fs);
-               return -1;
+               goto done;
        }
 
-       ret = glfs_set_preopened(volume, fs);
+       ret = glfs_set_preopened(volume, handle->conn->connectpath, fs);
        if (ret < 0) {
                DEBUG(0, ("%s: Failed to register volume (%s)\n",
                          volume, strerror(errno)));
-               glfs_fini(fs);
+               goto done;
+       }
+done:
+       talloc_free(tmp_ctx);
+       if (ret < 0) {
+               if (fs)
+                       glfs_fini(fs);
                return -1;
+       } else {
+               DEBUG(0, ("%s: Initialized volume from server %s\n",
+                         volume, volfile_server));
+               handle->data = fs;
+               return 0;
        }
-found:
-       DEBUG(0, ("%s: Initialized volume from server %s\n",
-                 volume, volfile_server));
-       handle->data = fs;
-       return 0;
 }
 
 static void vfs_gluster_disconnect(struct vfs_handle_struct *handle)
@@ -301,7 +283,6 @@ static uint64_t vfs_gluster_disk_free(struct vfs_handle_struct *handle,
                                      uint64_t *dsize_p)
 {
        struct statvfs statvfs = { 0, };
-       uint64_t dfree = 0;
        int ret;
 
        ret = glfs_statvfs(handle->data, path, &statvfs);
@@ -309,19 +290,17 @@ static uint64_t vfs_gluster_disk_free(struct vfs_handle_struct *handle,
                return -1;
        }
 
-       dfree = statvfs.f_bsize * statvfs.f_bavail;
-
        if (bsize_p != NULL) {
-               *bsize_p = statvfs.f_bsize;
+               *bsize_p = (uint64_t)statvfs.f_bsize; /* Block size */
        }
        if (dfree_p != NULL) {
-               *dfree_p = dfree;
+               *dfree_p = (uint64_t)statvfs.f_bavail; /* Available Block units */
        }
        if (dsize_p != NULL) {
-               *dsize_p = statvfs.f_bsize * statvfs.f_blocks;
+               *dsize_p = (uint64_t)statvfs.f_blocks; /* Total Block units */
        }
 
-       return dfree;
+       return (uint64_t)statvfs.f_bavail;
 }
 
 static int vfs_gluster_get_quota(struct vfs_handle_struct *handle,
@@ -401,7 +380,7 @@ static DIR *vfs_gluster_fdopendir(struct vfs_handle_struct *handle,
                                  files_struct *fsp, const char *mask,
                                  uint32 attributes)
 {
-       return (DIR *) glfd_fd_get(fsp->fh->fd);
+       return (DIR *) *(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp);
 }
 
 static int vfs_gluster_closedir(struct vfs_handle_struct *handle, DIR *dirp)
@@ -473,6 +452,7 @@ static int vfs_gluster_open(struct vfs_handle_struct *handle,
                            int flags, mode_t mode)
 {
        glfs_fd_t *glfd;
+       glfs_fd_t **p_tmp;
 
        if (flags & O_DIRECTORY) {
                glfd = glfs_opendir(handle->data, smb_fname->base_name);
@@ -484,31 +464,170 @@ static int vfs_gluster_open(struct vfs_handle_struct *handle,
        }
 
        if (glfd == NULL) {
-               DEBUG(0, ("glfs_{open[dir],creat}(%s) failed: %s\n",
-                         smb_fname->base_name, strerror(errno)));
                return -1;
        }
-
-       return glfd_fd_store(glfd);
+       p_tmp = (glfs_fd_t **)VFS_ADD_FSP_EXTENSION(handle, fsp,
+                                                         glfs_fd_t *, NULL);
+       *p_tmp = glfd;
+       /* An arbitrary value for error reporting, so you know its us. */
+       return 13371337;
 }
 
 static int vfs_gluster_close(struct vfs_handle_struct *handle,
                             files_struct *fsp)
 {
-       return glfs_close(glfd_fd_clear(fsp->fh->fd));
+       glfs_fd_t *glfd;
+       glfd = *(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp);
+       VFS_REMOVE_FSP_EXTENSION(handle, fsp);
+       return glfs_close(glfd);
 }
 
 static ssize_t vfs_gluster_read(struct vfs_handle_struct *handle,
                                files_struct *fsp, void *data, size_t n)
 {
-       return glfs_read(glfd_fd_get(fsp->fh->fd), data, n, 0);
+       return glfs_read(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), data, n, 0);
 }
 
 static ssize_t vfs_gluster_pread(struct vfs_handle_struct *handle,
                                 files_struct *fsp, void *data, size_t n,
                                 off_t offset)
 {
-       return glfs_pread(glfd_fd_get(fsp->fh->fd), data, n, offset, 0);
+       return glfs_pread(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), data, n, offset, 0);
+}
+
+struct glusterfs_aio_state {
+       ssize_t ret;
+       int err;
+};
+
+/*
+ * This function is the callback that will be called on glusterfs
+ * threads once the async IO submitted is complete. To notify
+ * Samba of the completion we use eventfd mechanism.
+ */
+static void aio_glusterfs_done(glfs_fd_t *fd, ssize_t ret, void *data)
+{
+#if HAVE_EVENTFD
+       struct tevent_req *req = NULL;
+       struct glusterfs_aio_state *state = NULL;
+       int i, sts = 0;
+       uint64_t u = 1;
+
+       req = talloc_get_type_abort(data, struct tevent_req);
+       state = tevent_req_data(req, struct glusterfs_aio_state);
+
+       if (ret < 0) {
+               state->ret = -1;
+               state->err = errno;
+       } else {
+               state->ret = ret;
+               state->err = 0;
+       }
+
+       /*
+        * Store the reqs that needs to be completed by calling
+        * tevent_req_done(). tevent_req_done() cannot be called
+        * here, as it is not designed to be executed in the
+        * multithread environment, tevent_req_done() should be
+        * executed from the smbd main thread.
+        */
+       pthread_mutex_lock (&lock_req_list);
+       {
+               for (i = 0 ; i < aio_pending_size ; i++) {
+                       if(!req_producer_list[i]) {
+                               req_producer_list[i] = req;
+                               req_counter = req_counter + 1;
+                               break;
+                       }
+               }
+       }
+       pthread_mutex_unlock (&lock_req_list);
+
+       /*
+        * For a bunch of fops notify only once
+        */
+       if (req_counter == 1) {
+               sts = write (event_fd, &u, sizeof(uint64_t));
+               if (sts < 0 && errno == EAGAIN)
+                       DEBUG(0,("\nWRITE: reached max value"));
+       }
+       return;
+#endif
+}
+
+#ifdef HAVE_EVENTFD
+static void aio_tevent_fd_done(struct tevent_context *event_ctx,
+                               struct tevent_fd *fde,
+                               uint16 flags, void *data)
+{
+       struct tevent_req *req = NULL;
+       struct tevent_req **temp = NULL;
+       int i = 0, sts = 0;
+       uint64_t u = 0;
+
+       sts = read (event_fd, &u, sizeof(uint64_t));
+       if (sts < 0 && errno == EAGAIN)
+               DEBUG(0,("\nREAD: eventfd read failed (%s)",strerror(errno)));
+
+       pthread_mutex_lock (&lock_req_list);
+       {
+               temp = req_producer_list;
+               req_producer_list = req_consumer_list;
+               req_consumer_list = temp;
+               req_counter = 0;
+       }
+       pthread_mutex_unlock (&lock_req_list);
+
+       for (i = 0 ; i < aio_pending_size ; i++) {
+               req = req_consumer_list[i];
+               if (req) {
+                       tevent_req_done(req);
+                       req_consumer_list[i] = 0;
+               }
+       }
+       return;
+}
+#endif
+
+static bool init_gluster_aio(struct vfs_handle_struct *handle)
+{
+#ifdef HAVE_EVENTFD
+       if (event_fd != -1) {
+               /*
+                * Already initialized.
+                */
+               return true;
+       }
+
+       event_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+       if (event_fd == -1) {
+               goto fail;
+       }
+
+       aio_read_event = tevent_add_fd(handle->conn->sconn->ev_ctx,
+                                       NULL,
+                                       event_fd,
+                                       TEVENT_FD_READ,
+                                       aio_tevent_fd_done,
+                                       NULL);
+       if (aio_read_event == NULL) {
+               goto fail;
+       }
+
+       req_producer_list = talloc_zero_array(NULL, struct tevent_req *,
+                                               aio_pending_size);
+       req_consumer_list = talloc_zero_array(NULL, struct tevent_req *,
+                                               aio_pending_size);
+
+       return true;
+fail:
+       TALLOC_FREE(aio_read_event);
+       if (event_fd != -1) {
+               close(event_fd);
+               event_fd = -1;
+       }
+#endif
+       return false;
 }
 
 static struct tevent_req *vfs_gluster_pread_send(struct vfs_handle_struct
@@ -517,27 +636,46 @@ static struct tevent_req *vfs_gluster_pread_send(struct vfs_handle_struct
                                                 files_struct *fsp, void *data,
                                                 size_t n, off_t offset)
 {
+       struct tevent_req *req = NULL;
+       struct glusterfs_aio_state *state = NULL;
+       int ret = 0;
+
+#ifndef HAVE_EVENTFD
        errno = ENOTSUP;
        return NULL;
-}
+#endif
 
-static ssize_t vfs_gluster_pread_recv(struct tevent_req *req, int *err)
-{
-       errno = ENOTSUP;
-       return -1;
+       req = tevent_req_create(mem_ctx, &state, struct glusterfs_aio_state);
+       if (req == NULL) {
+               return NULL;
+       }
+
+       if (!init_gluster_aio(handle)) {
+               tevent_req_error(req, EIO);
+               return tevent_req_post(req, ev);
+       }
+       ret = glfs_pread_async(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle,
+                               fsp), data, n, offset, 0, aio_glusterfs_done,
+                               req);
+       if (ret < 0) {
+               tevent_req_error(req, -ret);
+               return tevent_req_post(req, ev);
+       }
+
+       return req;
 }
 
 static ssize_t vfs_gluster_write(struct vfs_handle_struct *handle,
                                 files_struct *fsp, const void *data, size_t n)
 {
-       return glfs_write(glfd_fd_get(fsp->fh->fd), data, n, 0);
+       return glfs_write(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), data, n, 0);
 }
 
 static ssize_t vfs_gluster_pwrite(struct vfs_handle_struct *handle,
                                  files_struct *fsp, const void *data,
                                  size_t n, off_t offset)
 {
-       return glfs_pwrite(glfd_fd_get(fsp->fh->fd), data, n, offset, 0);
+       return glfs_pwrite(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), data, n, offset, 0);
 }
 
 static struct tevent_req *vfs_gluster_pwrite_send(struct vfs_handle_struct
@@ -547,20 +685,59 @@ static struct tevent_req *vfs_gluster_pwrite_send(struct vfs_handle_struct
                                                  const void *data, size_t n,
                                                  off_t offset)
 {
+       struct tevent_req *req = NULL;
+       struct glusterfs_aio_state *state = NULL;
+       int ret = 0;
+
+#ifndef HAVE_EVENTFD
        errno = ENOTSUP;
        return NULL;
+#endif
+
+       req = tevent_req_create(mem_ctx, &state, struct glusterfs_aio_state);
+       if (req == NULL) {
+               return NULL;
+       }
+       if (!init_gluster_aio(handle)) {
+               tevent_req_error(req, EIO);
+               return tevent_req_post(req, ev);
+       }
+       ret = glfs_pwrite_async(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle,
+                               fsp), data, n, offset, 0, aio_glusterfs_done,
+                               req);
+       if (ret < 0) {
+               tevent_req_error(req, -ret);
+               return tevent_req_post(req, ev);
+       }
+       return req;
 }
 
-static ssize_t vfs_gluster_pwrite_recv(struct tevent_req *req, int *err)
+static ssize_t vfs_gluster_recv(struct tevent_req *req, int *err)
 {
+       struct glusterfs_aio_state *state = NULL;
+
+#ifndef HAVE_EVENTFD
        errno = ENOTSUP;
        return -1;
+#endif
+       state = tevent_req_data(req, struct glusterfs_aio_state);
+       if (state == NULL) {
+               return -1;
+       }
+
+       if (tevent_req_is_unix_error(req, err)) {
+               return -1;
+       }
+       if (state->ret == -1) {
+               *err = state->err;
+       }
+       return state->ret;
 }
 
 static off_t vfs_gluster_lseek(struct vfs_handle_struct *handle,
                               files_struct *fsp, off_t offset, int whence)
 {
-       return glfs_lseek(glfd_fd_get(fsp->fh->fd), offset, whence);
+       return glfs_lseek(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), offset, whence);
 }
 
 static ssize_t vfs_gluster_sendfile(struct vfs_handle_struct *handle, int tofd,
@@ -591,7 +768,7 @@ static int vfs_gluster_rename(struct vfs_handle_struct *handle,
 static int vfs_gluster_fsync(struct vfs_handle_struct *handle,
                             files_struct *fsp)
 {
-       return glfs_fsync(glfd_fd_get(fsp->fh->fd));
+       return glfs_fsync(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp));
 }
 
 static struct tevent_req *vfs_gluster_fsync_send(struct vfs_handle_struct
@@ -599,14 +776,38 @@ static struct tevent_req *vfs_gluster_fsync_send(struct vfs_handle_struct
                                                 struct tevent_context *ev,
                                                 files_struct *fsp)
 {
+       struct tevent_req *req = NULL;
+       struct glusterfs_aio_state *state = NULL;
+       int ret = 0;
+
+#ifndef HAVE_EVENTFD
        errno = ENOTSUP;
        return NULL;
+#endif
+
+       req = tevent_req_create(mem_ctx, &state, struct glusterfs_aio_state);
+       if (req == NULL) {
+               return NULL;
+       }
+       if (!init_gluster_aio(handle)) {
+               tevent_req_error(req, EIO);
+               return tevent_req_post(req, ev);
+       }
+       ret = glfs_fsync_async(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle,
+                               fsp), aio_glusterfs_done, req);
+       if (ret < 0) {
+               tevent_req_error(req, -ret);
+               return tevent_req_post(req, ev);
+       }
+       return req;
 }
 
 static int vfs_gluster_fsync_recv(struct tevent_req *req, int *err)
 {
-       errno = ENOTSUP;
-       return -1;
+       /*
+        * Use implicit conversion ssize_t->int
+        */
+       return vfs_gluster_recv(req, err);
 }
 
 static int vfs_gluster_stat(struct vfs_handle_struct *handle,
@@ -632,7 +833,7 @@ static int vfs_gluster_fstat(struct vfs_handle_struct *handle,
        struct stat st;
        int ret;
 
-       ret = glfs_fstat(glfd_fd_get(fsp->fh->fd), &st);
+       ret = glfs_fstat(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), &st);
        if (ret == 0) {
                smb_stat_ex_from_stat(sbuf, &st);
        }
@@ -682,7 +883,7 @@ static int vfs_gluster_chmod(struct vfs_handle_struct *handle,
 static int vfs_gluster_fchmod(struct vfs_handle_struct *handle,
                              files_struct *fsp, mode_t mode)
 {
-       return glfs_fchmod(glfd_fd_get(fsp->fh->fd), mode);
+       return glfs_fchmod(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), mode);
 }
 
 static int vfs_gluster_chown(struct vfs_handle_struct *handle,
@@ -694,7 +895,7 @@ static int vfs_gluster_chown(struct vfs_handle_struct *handle,
 static int vfs_gluster_fchown(struct vfs_handle_struct *handle,
                              files_struct *fsp, uid_t uid, gid_t gid)
 {
-       return glfs_fchown(glfd_fd_get(fsp->fh->fd), uid, gid);
+       return glfs_fchown(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), uid, gid);
 }
 
 static int vfs_gluster_lchown(struct vfs_handle_struct *handle,
@@ -713,12 +914,12 @@ static char *vfs_gluster_getwd(struct vfs_handle_struct *handle)
        char *cwd;
        char *ret;
 
-       cwd = calloc(1, PATH_MAX + 1);
+       cwd = SMB_CALLOC_ARRAY(char, PATH_MAX);
        if (cwd == NULL) {
                return NULL;
        }
 
-       ret = glfs_getcwd(handle->data, cwd, PATH_MAX);
+       ret = glfs_getcwd(handle->data, cwd, PATH_MAX - 1);
        if (ret == 0) {
                free(cwd);
        }
@@ -731,10 +932,28 @@ static int vfs_gluster_ntimes(struct vfs_handle_struct *handle,
 {
        struct timespec times[2];
 
-       times[0].tv_sec = ft->atime.tv_sec;
-       times[0].tv_nsec = ft->atime.tv_nsec;
-       times[1].tv_sec = ft->mtime.tv_sec;
-       times[1].tv_nsec = ft->mtime.tv_nsec;
+       if (null_timespec(ft->atime)) {
+               times[0].tv_sec = smb_fname->st.st_ex_atime.tv_sec;
+               times[0].tv_nsec = smb_fname->st.st_ex_atime.tv_nsec;
+       } else {
+               times[0].tv_sec = ft->atime.tv_sec;
+               times[0].tv_nsec = ft->atime.tv_nsec;
+       }
+
+       if (null_timespec(ft->mtime)) {
+               times[1].tv_sec = smb_fname->st.st_ex_mtime.tv_sec;
+               times[1].tv_nsec = smb_fname->st.st_ex_mtime.tv_nsec;
+       } else {
+               times[1].tv_sec = ft->mtime.tv_sec;
+               times[1].tv_nsec = ft->mtime.tv_nsec;
+       }
+
+       if ((timespec_compare(&times[0],
+                             &smb_fname->st.st_ex_atime) == 0) &&
+           (timespec_compare(&times[1],
+                             &smb_fname->st.st_ex_mtime) == 0)) {
+               return 0;
+       }
 
        return glfs_utimens(handle->data, smb_fname->base_name, times);
 }
@@ -742,7 +961,7 @@ static int vfs_gluster_ntimes(struct vfs_handle_struct *handle,
 static int vfs_gluster_ftruncate(struct vfs_handle_struct *handle,
                                 files_struct *fsp, off_t offset)
 {
-       return glfs_ftruncate(glfd_fd_get(fsp->fh->fd), offset);
+       return glfs_ftruncate(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), offset);
 }
 
 static int vfs_gluster_fallocate(struct vfs_handle_struct *handle,
@@ -773,7 +992,7 @@ static bool vfs_gluster_lock(struct vfs_handle_struct *handle,
        flock.l_len = count;
        flock.l_pid = 0;
 
-       ret = glfs_posix_lock(glfd_fd_get(fsp->fh->fd), op, &flock);
+       ret = glfs_posix_lock(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), op, &flock);
 
        if (op == F_GETLK) {
                /* lock query, true if someone else has locked */
@@ -820,7 +1039,7 @@ static bool vfs_gluster_getlock(struct vfs_handle_struct *handle,
        flock.l_len = *pcount;
        flock.l_pid = 0;
 
-       ret = glfs_posix_lock(glfd_fd_get(fsp->fh->fd), F_GETLK, &flock);
+       ret = glfs_posix_lock(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), F_GETLK, &flock);
 
        if (ret == -1) {
                return false;
@@ -891,11 +1110,13 @@ static int vfs_gluster_get_real_filename(struct vfs_handle_struct *handle,
        }
 
        snprintf(key_buf, NAME_MAX + 64,
-                "user.glusterfs.get_real_filename:%s", name);
+                "glusterfs.get_real_filename:%s", name);
 
        ret = glfs_getxattr(handle->data, path, key_buf, val_buf, NAME_MAX + 1);
-       if (ret == -1 && errno == ENODATA) {
-               errno = EOPNOTSUPP;
+       if (ret == -1) {
+               if (errno == ENODATA) {
+                       errno = EOPNOTSUPP;
+               }
                return -1;
        }
 
@@ -926,7 +1147,7 @@ static ssize_t vfs_gluster_fgetxattr(struct vfs_handle_struct *handle,
                                     files_struct *fsp, const char *name,
                                     void *value, size_t size)
 {
-       return glfs_fgetxattr(glfd_fd_get(fsp->fh->fd), name, value, size);
+       return glfs_fgetxattr(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), name, value, size);
 }
 
 static ssize_t vfs_gluster_listxattr(struct vfs_handle_struct *handle,
@@ -939,7 +1160,7 @@ static ssize_t vfs_gluster_flistxattr(struct vfs_handle_struct *handle,
                                      files_struct *fsp, char *list,
                                      size_t size)
 {
-       return glfs_flistxattr(glfd_fd_get(fsp->fh->fd), list, size);
+       return glfs_flistxattr(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), list, size);
 }
 
 static int vfs_gluster_removexattr(struct vfs_handle_struct *handle,
@@ -951,7 +1172,7 @@ static int vfs_gluster_removexattr(struct vfs_handle_struct *handle,
 static int vfs_gluster_fremovexattr(struct vfs_handle_struct *handle,
                                    files_struct *fsp, const char *name)
 {
-       return glfs_fremovexattr(glfd_fd_get(fsp->fh->fd), name);
+       return glfs_fremovexattr(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), name);
 }
 
 static int vfs_gluster_setxattr(struct vfs_handle_struct *handle,
@@ -965,7 +1186,7 @@ static int vfs_gluster_fsetxattr(struct vfs_handle_struct *handle,
                                 files_struct *fsp, const char *name,
                                 const void *value, size_t size, int flags)
 {
-       return glfs_fsetxattr(glfd_fd_get(fsp->fh->fd), name, value, size,
+       return glfs_fsetxattr(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), name, value, size,
                              flags);
 }
 
@@ -993,13 +1214,36 @@ static int vfs_gluster_set_offline(struct vfs_handle_struct *handle,
        return -1;
 }
 
-/* Posix ACL Operations */
+/*
+  Gluster ACL Format:
+
+  Size = 4 (header) + N * 8 (entry)
+
+  Offset  Size    Field (Little Endian)
+  -------------------------------------
+  0-3     4-byte  Version
+
+  4-5     2-byte  Entry-1 tag
+  6-7     2-byte  Entry-1 perm
+  8-11    4-byte  Entry-1 id
+
+  12-13   2-byte  Entry-2 tag
+  14-15   2-byte  Entry-2 perm
+  16-19   4-byte  Entry-2 id
+
+  ...
 
+ */
+
+/* header version */
 #define GLUSTER_ACL_VERSION 2
+
+/* perm bits */
 #define GLUSTER_ACL_READ    0x04
 #define GLUSTER_ACL_WRITE   0x02
 #define GLUSTER_ACL_EXECUTE 0x01
 
+/* tag values */
 #define GLUSTER_ACL_UNDEFINED_TAG  0x00
 #define GLUSTER_ACL_USER_OBJ       0x01
 #define GLUSTER_ACL_USER           0x02
@@ -1010,58 +1254,84 @@ static int vfs_gluster_set_offline(struct vfs_handle_struct *handle,
 
 #define GLUSTER_ACL_UNDEFINED_ID  (-1)
 
-struct gluster_ace {
-       uint16_t tag;
-       uint16_t perm;
-       uint32_t id;
-};
+#define GLUSTER_ACL_HEADER_SIZE    4
+#define GLUSTER_ACL_ENTRY_SIZE     8
 
-struct gluster_acl_header {
-       uint32_t version;
-       struct gluster_ace entries[];
-};
+#define GLUSTER_ACL_SIZE(n)       (GLUSTER_ACL_HEADER_SIZE + (n * GLUSTER_ACL_ENTRY_SIZE))
+
+static SMB_ACL_T mode_to_smb_acls(const struct stat *mode, TALLOC_CTX *mem_ctx)
+{
+       struct smb_acl_t *result;
+       int count;
+
+       count = 3;
+       result = sys_acl_init(mem_ctx);
+       if (!result) {
+               errno = ENOMEM;
+               return NULL;
+       }
+
+       result->acl = talloc_array(result, struct smb_acl_entry, count);
+       if (!result->acl) {
+               errno = ENOMEM;
+               talloc_free(result);
+               return NULL;
+       }
+
+       result->count = count;
+
+       result->acl[0].a_type = SMB_ACL_USER_OBJ;
+       result->acl[0].a_perm = (mode->st_mode & S_IRWXU) >> 6;;
+
+       result->acl[1].a_type = SMB_ACL_GROUP_OBJ;
+       result->acl[1].a_perm = (mode->st_mode & S_IRWXG) >> 3;;
+
+       result->acl[2].a_type = SMB_ACL_OTHER;
+       result->acl[2].a_perm = mode->st_mode & S_IRWXO;;
+
+       return result;
+}
 
 static SMB_ACL_T gluster_to_smb_acl(const char *buf, size_t xattr_size,
                                    TALLOC_CTX *mem_ctx)
 {
        int count;
        size_t size;
-       struct gluster_ace *ace;
        struct smb_acl_entry *smb_ace;
-       struct gluster_acl_header *hdr;
        struct smb_acl_t *result;
        int i;
+       int offset;
        uint16_t tag;
        uint16_t perm;
        uint32_t id;
 
        size = xattr_size;
 
-       if (size < sizeof(*hdr)) {
-               /* ACL should be at least as big as the header */
+       if (size < GLUSTER_ACL_HEADER_SIZE) {
+               /* ACL should be at least as big as the header (4 bytes) */
                errno = EINVAL;
                return NULL;
        }
 
-       size -= sizeof(*hdr);
+       size -= GLUSTER_ACL_HEADER_SIZE; /* size of header = 4 bytes */
 
-       if (size % sizeof(*ace)) {
+       if (size % GLUSTER_ACL_ENTRY_SIZE) {
                /* Size of entries must strictly be a multiple of
-                  size of an ACE
+                  size of an ACE (8 bytes)
                */
                errno = EINVAL;
                return NULL;
        }
 
-       count = size / sizeof(*ace);
-
-       hdr = (void *)buf;
+       count = size / GLUSTER_ACL_ENTRY_SIZE;
 
-       if (ntohl(hdr->version) != GLUSTER_ACL_VERSION) {
+       /* Version is the first 4 bytes of the ACL */
+       if (IVAL(buf, 0) != GLUSTER_ACL_VERSION) {
                DEBUG(0, ("Unknown gluster ACL version: %d\n",
-                         ntohl(hdr->version)));
+                         IVAL(buf, 0)));
                return NULL;
        }
+       offset = GLUSTER_ACL_HEADER_SIZE;
 
        result = sys_acl_init(mem_ctx);
        if (!result) {
@@ -1079,10 +1349,19 @@ static SMB_ACL_T gluster_to_smb_acl(const char *buf, size_t xattr_size,
        result->count = count;
 
        smb_ace = result->acl;
-       ace = hdr->entries;
 
        for (i = 0; i < count; i++) {
-               tag = ntohs(ace->tag);
+               /* TAG is the first 2 bytes of an entry */
+               tag = SVAL(buf, offset);
+               offset += 2;
+
+               /* PERM is the next 2 bytes of an entry */
+               perm = SVAL(buf, offset);
+               offset += 2;
+
+               /* ID is the last 4 bytes of an entry */
+               id = IVAL(buf, offset);
+               offset += 4;
 
                switch(tag) {
                case GLUSTER_ACL_USER:
@@ -1108,7 +1387,6 @@ static SMB_ACL_T gluster_to_smb_acl(const char *buf, size_t xattr_size,
                        return NULL;
                }
 
-               id = ntohl(ace->id);
 
                switch(smb_ace->a_type) {
                case SMB_ACL_USER:
@@ -1121,8 +1399,6 @@ static SMB_ACL_T gluster_to_smb_acl(const char *buf, size_t xattr_size,
                        break;
                }
 
-               perm = ntohs(ace->perm);
-
                smb_ace->a_perm = 0;
                smb_ace->a_perm |=
                        ((perm & GLUSTER_ACL_READ) ? SMB_ACL_READ : 0);
@@ -1131,7 +1407,6 @@ static SMB_ACL_T gluster_to_smb_acl(const char *buf, size_t xattr_size,
                smb_ace->a_perm |=
                        ((perm & GLUSTER_ACL_EXECUTE) ? SMB_ACL_EXECUTE : 0);
 
-               ace++;
                smb_ace++;
        }
 
@@ -1139,21 +1414,54 @@ static SMB_ACL_T gluster_to_smb_acl(const char *buf, size_t xattr_size,
 }
 
 
+static int gluster_ace_cmp(const void *left, const void *right)
+{
+       int ret = 0;
+       uint16_t tag_left, tag_right;
+       uint32_t id_left, id_right;
+
+       /*
+         Sorting precedence:
+
+          - Smaller TAG values must be earlier.
+
+          - Within same TAG, smaller identifiers must be earlier, E.g:
+            UID 0 entry must be earlier than UID 200
+            GID 17 entry must be earlier than GID 19
+       */
+
+       /* TAG is the first element in the entry */
+       tag_left = SVAL(left, 0);
+       tag_right = SVAL(right, 0);
+
+       ret = (tag_left - tag_right);
+       if (!ret) {
+               /* ID is the third element in the entry, after two short
+                  integers (tag and perm), i.e at offset 4.
+               */
+               id_left = IVAL(left, 4);
+               id_right = IVAL(right, 4);
+               ret = id_left - id_right;
+       }
+
+       return ret;
+}
+
+
 static ssize_t smb_to_gluster_acl(SMB_ACL_T theacl, char *buf, size_t len)
 {
        ssize_t size;
-       struct gluster_ace *ace;
        struct smb_acl_entry *smb_ace;
-       struct gluster_acl_header *hdr;
        int i;
        int count;
        uint16_t tag;
        uint16_t perm;
        uint32_t id;
+       int offset;
 
        count = theacl->count;
 
-       size = sizeof(*hdr) + (count * sizeof(*ace));
+       size = GLUSTER_ACL_HEADER_SIZE + (count * GLUSTER_ACL_ENTRY_SIZE);
        if (!buf) {
                return size;
        }
@@ -1163,13 +1471,14 @@ static ssize_t smb_to_gluster_acl(SMB_ACL_T theacl, char *buf, size_t len)
                return -1;
        }
 
-       hdr = (void *)buf;
-       ace = hdr->entries;
        smb_ace = theacl->acl;
 
-       hdr->version = htonl(GLUSTER_ACL_VERSION);
+       /* Version is the first 4 bytes of the ACL */
+       SIVAL(buf, 0, GLUSTER_ACL_VERSION);
+       offset = GLUSTER_ACL_HEADER_SIZE;
 
        for (i = 0; i < count; i++) {
+               /* Calculate tag */
                switch(smb_ace->a_type) {
                case SMB_ACL_USER:
                        tag = GLUSTER_ACL_USER;
@@ -1196,8 +1505,8 @@ static ssize_t smb_to_gluster_acl(SMB_ACL_T theacl, char *buf, size_t len)
                        return -1;
                }
 
-               ace->tag = ntohs(tag);
 
+               /* Calculate id */
                switch(smb_ace->a_type) {
                case SMB_ACL_USER:
                        id = smb_ace->info.user.uid;
@@ -1210,20 +1519,36 @@ static ssize_t smb_to_gluster_acl(SMB_ACL_T theacl, char *buf, size_t len)
                        break;
                }
 
-               ace->id = ntohl(id);
+               /* Calculate perm */
+               perm = 0;
 
-               ace->perm = 0;
-               ace->perm |=
+               perm |=
                        ((smb_ace->a_perm & SMB_ACL_READ) ? GLUSTER_ACL_READ : 0);
-               ace->perm |=
+               perm |=
                        ((smb_ace->a_perm & SMB_ACL_WRITE) ? GLUSTER_ACL_WRITE : 0);
-               ace->perm |=
+               perm |=
                        ((smb_ace->a_perm & SMB_ACL_EXECUTE) ? GLUSTER_ACL_EXECUTE : 0);
 
-               ace++;
+
+               /* TAG is the first 2 bytes of an entry */
+               SSVAL(buf, offset, tag);
+               offset += 2;
+
+               /* PERM is the next 2 bytes of an entry */
+               SSVAL(buf, offset, perm);
+               offset += 2;
+
+               /* ID is the last 4 bytes of an entry */
+               SIVAL(buf, offset, id);
+               offset += 4;
+
                smb_ace++;
        }
 
+       /* Skip the header, sort @count number of 8-byte entries */
+       qsort(buf+GLUSTER_ACL_HEADER_SIZE, count, GLUSTER_ACL_ENTRY_SIZE,
+             gluster_ace_cmp);
+
        return size;
 }
 
@@ -1234,9 +1559,10 @@ static SMB_ACL_T vfs_gluster_sys_acl_get_file(struct vfs_handle_struct *handle,
                                              TALLOC_CTX *mem_ctx)
 {
        struct smb_acl_t *result;
+       struct stat st;
        char *buf;
-       char *key;
-       ssize_t ret;
+       const char *key;
+       ssize_t ret, size = GLUSTER_ACL_SIZE(20);
 
        switch (type) {
        case SMB_ACL_TYPE_ACCESS:
@@ -1250,13 +1576,34 @@ static SMB_ACL_T vfs_gluster_sys_acl_get_file(struct vfs_handle_struct *handle,
                return NULL;
        }
 
-       ret = glfs_getxattr(handle->data, path_p, key, 0, 0);
-       if (ret <= 0) {
+       buf = alloca(size);
+       if (!buf) {
                return NULL;
        }
 
-       buf = alloca(ret);
-       ret = glfs_getxattr(handle->data, path_p, key, buf, ret);
+       ret = glfs_getxattr(handle->data, path_p, key, buf, size);
+       if (ret == -1 && errno == ERANGE) {
+               ret = glfs_getxattr(handle->data, path_p, key, 0, 0);
+               if (ret > 0) {
+                       buf = alloca(ret);
+                       if (!buf) {
+                               return NULL;
+                       }
+                       ret = glfs_getxattr(handle->data, path_p, key, buf, ret);
+               }
+       }
+
+       /* retrieving the ACL from the xattr has finally failed, do a
+        * mode-to-acl mapping */
+
+       if (ret == -1 && errno == ENODATA) {
+               ret = glfs_stat(handle->data, path_p, &st);
+               if (ret == 0) {
+                       result = mode_to_smb_acls(&st, mem_ctx);
+                       return result;
+               }
+       }
+
        if (ret <= 0) {
                return NULL;
        }
@@ -1271,18 +1618,42 @@ static SMB_ACL_T vfs_gluster_sys_acl_get_fd(struct vfs_handle_struct *handle,
                                            TALLOC_CTX *mem_ctx)
 {
        struct smb_acl_t *result;
-       int ret;
+       struct stat st;
+       ssize_t ret, size = GLUSTER_ACL_SIZE(20);
        char *buf;
+       glfs_fd_t *glfd;
 
-       ret = glfs_fgetxattr(glfd_fd_get(fsp->fh->fd),
-                            "system.posix_acl_access", 0, 0);
-       if (ret <= 0) {
+       glfd = *(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp);
+
+       buf = alloca(size);
+       if (!buf) {
                return NULL;
        }
 
-       buf = alloca(ret);
-       ret = glfs_fgetxattr(glfd_fd_get(fsp->fh->fd),
-                            "system.posix_acl_access", buf, ret);
+       ret = glfs_fgetxattr(glfd, "system.posix_acl_access", buf, size);
+       if (ret == -1 && errno == ERANGE) {
+               ret = glfs_fgetxattr(glfd, "system.posix_acl_access", 0, 0);
+               if (ret > 0) {
+                       buf = alloca(ret);
+                       if (!buf) {
+                               return NULL;
+                       }
+                       ret = glfs_fgetxattr(glfd, "system.posix_acl_access",
+                                            buf, ret);
+               }
+       }
+
+       /* retrieving the ACL from the xattr has finally failed, do a
+        * mode-to-acl mapping */
+
+       if (ret == -1 && errno == ENODATA) {
+               ret = glfs_fstat(glfd, &st);
+               if (ret == 0) {
+                       result = mode_to_smb_acls(&st, mem_ctx);
+                       return result;
+               }
+       }
+
        if (ret <= 0) {
                return NULL;
        }
@@ -1298,7 +1669,7 @@ static int vfs_gluster_sys_acl_set_file(struct vfs_handle_struct *handle,
                                        SMB_ACL_T theacl)
 {
        int ret;
-       char *key;
+       const char *key;
        char *buf;
        ssize_t size;
 
@@ -1332,7 +1703,6 @@ static int vfs_gluster_sys_acl_set_fd(struct vfs_handle_struct *handle,
                                      SMB_ACL_T theacl)
 {
        int ret;
-       char *key;
        char *buf;
        ssize_t size;
 
@@ -1344,7 +1714,7 @@ static int vfs_gluster_sys_acl_set_fd(struct vfs_handle_struct *handle,
                return -1;
        }
 
-       ret = glfs_fsetxattr(glfd_fd_get(fsp->fh->fd),
+       ret = glfs_fsetxattr(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp),
                             "system.posix_acl_access", buf, size, 0);
        return ret;
 }
@@ -1390,11 +1760,11 @@ static struct vfs_fn_pointers glusterfs_fns = {
        .read_fn = vfs_gluster_read,
        .pread_fn = vfs_gluster_pread,
        .pread_send_fn = vfs_gluster_pread_send,
-       .pread_recv_fn = vfs_gluster_pread_recv,
+       .pread_recv_fn = vfs_gluster_recv,
        .write_fn = vfs_gluster_write,
        .pwrite_fn = vfs_gluster_pwrite,
        .pwrite_send_fn = vfs_gluster_pwrite_send,
-       .pwrite_recv_fn = vfs_gluster_pwrite_recv,
+       .pwrite_recv_fn = vfs_gluster_recv,
        .lseek_fn = vfs_gluster_lseek,
        .sendfile_fn = vfs_gluster_sendfile,
        .recvfile_fn = vfs_gluster_recvfile,