vfs_glusterfs: Implement AIO support
[sfrench/samba-autobuild/.git] / source3 / modules / vfs_glusterfs.c
1 /*
2    Unix SMB/CIFS implementation.
3
4    Wrap GlusterFS GFAPI calls in vfs functions.
5
6    Copyright (c) 2013 Anand Avati <avati@redhat.com>
7
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3 of the License, or
11    (at your option) any later version.
12
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17
18    You should have received a copy of the GNU General Public License
19    along with this program.  If not, see <http://www.gnu.org/licenses/>.
20 */
21
22 /**
23  * @file   vfs_glusterfs.c
24  * @author Anand Avati <avati@redhat.com>
25  * @date   May 2013
26  * @brief  Samba VFS module for glusterfs
27  *
28  * @todo
29  *   - sendfile/recvfile support
30  *
31  * A Samba VFS module for GlusterFS, based on Gluster's libgfapi.
32  * This is a "bottom" vfs module (not something to be stacked on top of
33  * another module), and translates (most) calls to the closest actions
34  * available in libgfapi.
35  *
36  */
37
38 #include "includes.h"
39 #include "smbd/smbd.h"
40 #include <stdio.h>
41 #include "api/glfs.h"
42 #include "lib/util/dlinklist.h"
43 #include "lib/util/tevent_unix.h"
44 #ifdef HAVE_SYS_EVENTFD_H
45 #include <sys/eventfd.h>
46 #endif
47 #include <pthread.h>
48 #include "smbd/globals.h"
49
50 #define DEFAULT_VOLFILE_SERVER "localhost"
51
52 #ifdef HAVE_EVENTFD
53 static pthread_mutex_t lock_req_list = PTHREAD_MUTEX_INITIALIZER;
54 static int event_fd = -1;
55 static struct tevent_fd *aio_read_event = NULL;
56 static struct tevent_req **req_producer_list = NULL;
57 static struct tevent_req **req_consumer_list = NULL;
58 static uint64_t req_counter = 0;
59 #endif
60
61 /**
62  * Helper to convert struct stat to struct stat_ex.
63  */
64 static void smb_stat_ex_from_stat(struct stat_ex *dst, const struct stat *src)
65 {
66         ZERO_STRUCTP(dst);
67
68         dst->st_ex_dev = src->st_dev;
69         dst->st_ex_ino = src->st_ino;
70         dst->st_ex_mode = src->st_mode;
71         dst->st_ex_nlink = src->st_nlink;
72         dst->st_ex_uid = src->st_uid;
73         dst->st_ex_gid = src->st_gid;
74         dst->st_ex_rdev = src->st_rdev;
75         dst->st_ex_size = src->st_size;
76         dst->st_ex_atime.tv_sec = src->st_atime;
77         dst->st_ex_mtime.tv_sec = src->st_mtime;
78         dst->st_ex_ctime.tv_sec = src->st_ctime;
79         dst->st_ex_btime.tv_sec = src->st_mtime;
80         dst->st_ex_blksize = src->st_blksize;
81         dst->st_ex_blocks = src->st_blocks;
82 #ifdef STAT_HAVE_NSEC
83         dst->st_ex_atime.tv_nsec = src->st_atime_nsec;
84         dst->st_ex_mtime.tv_nsec = src->st_mtime_nsec;
85         dst->st_ex_ctime.tv_nsec = src->st_ctime_nsec;
86         dst->st_ex_btime.tv_nsec = src->st_mtime_nsec;
87 #endif
88 }
89
90 /* pre-opened glfs_t */
91
92 static struct glfs_preopened {
93         char *volume;
94         char *connectpath;
95         glfs_t *fs;
96         int ref;
97         struct glfs_preopened *next, *prev;
98 } *glfs_preopened;
99
100
101 static int glfs_set_preopened(const char *volume, const char *connectpath, glfs_t *fs)
102 {
103         struct glfs_preopened *entry = NULL;
104
105         entry = talloc_zero(NULL, struct glfs_preopened);
106         if (!entry) {
107                 errno = ENOMEM;
108                 return -1;
109         }
110
111         entry->volume = talloc_strdup(entry, volume);
112         if (!entry->volume) {
113                 talloc_free(entry);
114                 errno = ENOMEM;
115                 return -1;
116         }
117
118         entry->connectpath = talloc_strdup(entry, connectpath);
119         if (entry->connectpath == NULL) {
120                 talloc_free(entry);
121                 errno = ENOMEM;
122                 return -1;
123         }
124
125         entry->fs = fs;
126         entry->ref = 1;
127
128         DLIST_ADD(glfs_preopened, entry);
129
130         return 0;
131 }
132
133 static glfs_t *glfs_find_preopened(const char *volume, const char *connectpath)
134 {
135         struct glfs_preopened *entry = NULL;
136
137         for (entry = glfs_preopened; entry; entry = entry->next) {
138                 if (strcmp(entry->volume, volume) == 0 &&
139                     strcmp(entry->connectpath, connectpath) == 0)
140                 {
141                         entry->ref++;
142                         return entry->fs;
143                 }
144         }
145
146         return NULL;
147 }
148
149 static void glfs_clear_preopened(glfs_t *fs)
150 {
151         struct glfs_preopened *entry = NULL;
152
153         for (entry = glfs_preopened; entry; entry = entry->next) {
154                 if (entry->fs == fs) {
155                         if (--entry->ref)
156                                 return;
157
158                         DLIST_REMOVE(glfs_preopened, entry);
159
160                         glfs_fini(entry->fs);
161                         talloc_free(entry);
162                 }
163         }
164 }
165
166 /* Disk Operations */
167
168 static int vfs_gluster_connect(struct vfs_handle_struct *handle,
169                                const char *service,
170                                const char *user)
171 {
172         const char *volfile_server;
173         const char *volume;
174         char *logfile;
175         int loglevel;
176         glfs_t *fs = NULL;
177         TALLOC_CTX *tmp_ctx;
178         int ret = 0;
179
180         tmp_ctx = talloc_new(NULL);
181         if (tmp_ctx == NULL) {
182                 ret = -1;
183                 goto done;
184         }
185         logfile = lp_parm_talloc_string(tmp_ctx, SNUM(handle->conn), "glusterfs",
186                                        "logfile", NULL);
187
188         loglevel = lp_parm_int(SNUM(handle->conn), "glusterfs", "loglevel", -1);
189
190         volfile_server = lp_parm_const_string(SNUM(handle->conn), "glusterfs",
191                                                "volfile_server", NULL);
192         if (volfile_server == NULL) {
193                 volfile_server = DEFAULT_VOLFILE_SERVER;
194         }
195
196         volume = lp_parm_const_string(SNUM(handle->conn), "glusterfs", "volume",
197                                       NULL);
198         if (volume == NULL) {
199                 volume = service;
200         }
201
202         fs = glfs_find_preopened(volume, handle->conn->connectpath);
203         if (fs) {
204                 goto done;
205         }
206
207         fs = glfs_new(volume);
208         if (fs == NULL) {
209                 ret = -1;
210                 goto done;
211         }
212
213         ret = glfs_set_volfile_server(fs, "tcp", volfile_server, 0);
214         if (ret < 0) {
215                 DEBUG(0, ("Failed to set volfile_server %s\n", volfile_server));
216                 goto done;
217         }
218
219         ret = glfs_set_xlator_option(fs, "*-md-cache", "cache-posix-acl",
220                                      "true");
221         if (ret < 0) {
222                 DEBUG(0, ("%s: Failed to set xlator options\n", volume));
223                 goto done;
224         }
225
226
227         ret = glfs_set_xlator_option(fs, "*-snapview-client",
228                                      "snapdir-entry-path",
229                                      handle->conn->connectpath);
230         if (ret < 0) {
231                 DEBUG(0, ("%s: Failed to set xlator option:"
232                           " snapdir-entry-path\n", volume));
233                 glfs_fini(fs);
234                 return -1;
235         }
236
237         ret = glfs_set_logging(fs, logfile, loglevel);
238         if (ret < 0) {
239                 DEBUG(0, ("%s: Failed to set logfile %s loglevel %d\n",
240                           volume, logfile, loglevel));
241                 goto done;
242         }
243
244         ret = glfs_init(fs);
245         if (ret < 0) {
246                 DEBUG(0, ("%s: Failed to initialize volume (%s)\n",
247                           volume, strerror(errno)));
248                 goto done;
249         }
250
251         ret = glfs_set_preopened(volume, handle->conn->connectpath, fs);
252         if (ret < 0) {
253                 DEBUG(0, ("%s: Failed to register volume (%s)\n",
254                           volume, strerror(errno)));
255                 goto done;
256         }
257 done:
258         talloc_free(tmp_ctx);
259         if (ret < 0) {
260                 if (fs)
261                         glfs_fini(fs);
262                 return -1;
263         } else {
264                 DEBUG(0, ("%s: Initialized volume from server %s\n",
265                          volume, volfile_server));
266                 handle->data = fs;
267                 return 0;
268         }
269 }
270
271 static void vfs_gluster_disconnect(struct vfs_handle_struct *handle)
272 {
273         glfs_t *fs = NULL;
274
275         fs = handle->data;
276
277         glfs_clear_preopened(fs);
278 }
279
280 static uint64_t vfs_gluster_disk_free(struct vfs_handle_struct *handle,
281                                       const char *path, bool small_query,
282                                       uint64_t *bsize_p, uint64_t *dfree_p,
283                                       uint64_t *dsize_p)
284 {
285         struct statvfs statvfs = { 0, };
286         int ret;
287
288         ret = glfs_statvfs(handle->data, path, &statvfs);
289         if (ret < 0) {
290                 return -1;
291         }
292
293         if (bsize_p != NULL) {
294                 *bsize_p = (uint64_t)statvfs.f_bsize; /* Block size */
295         }
296         if (dfree_p != NULL) {
297                 *dfree_p = (uint64_t)statvfs.f_bavail; /* Available Block units */
298         }
299         if (dsize_p != NULL) {
300                 *dsize_p = (uint64_t)statvfs.f_blocks; /* Total Block units */
301         }
302
303         return (uint64_t)statvfs.f_bavail;
304 }
305
306 static int vfs_gluster_get_quota(struct vfs_handle_struct *handle,
307                                  enum SMB_QUOTA_TYPE qtype, unid_t id,
308                                  SMB_DISK_QUOTA *qt)
309 {
310         errno = ENOSYS;
311         return -1;
312 }
313
314 static int
315 vfs_gluster_set_quota(struct vfs_handle_struct *handle,
316                       enum SMB_QUOTA_TYPE qtype, unid_t id, SMB_DISK_QUOTA *qt)
317 {
318         errno = ENOSYS;
319         return -1;
320 }
321
322 static int vfs_gluster_statvfs(struct vfs_handle_struct *handle,
323                                const char *path,
324                                struct vfs_statvfs_struct *vfs_statvfs)
325 {
326         struct statvfs statvfs = { 0, };
327         int ret;
328
329         ret = glfs_statvfs(handle->data, path, &statvfs);
330         if (ret < 0) {
331                 DEBUG(0, ("glfs_statvfs(%s) failed: %s\n",
332                           path, strerror(errno)));
333                 return -1;
334         }
335
336         ZERO_STRUCTP(vfs_statvfs);
337
338         vfs_statvfs->OptimalTransferSize = statvfs.f_frsize;
339         vfs_statvfs->BlockSize = statvfs.f_bsize;
340         vfs_statvfs->TotalBlocks = statvfs.f_blocks;
341         vfs_statvfs->BlocksAvail = statvfs.f_bfree;
342         vfs_statvfs->UserBlocksAvail = statvfs.f_bavail;
343         vfs_statvfs->TotalFileNodes = statvfs.f_files;
344         vfs_statvfs->FreeFileNodes = statvfs.f_ffree;
345         vfs_statvfs->FsIdentifier = statvfs.f_fsid;
346         vfs_statvfs->FsCapabilities =
347             FILE_CASE_SENSITIVE_SEARCH | FILE_CASE_PRESERVED_NAMES;
348
349         return ret;
350 }
351
352 static uint32_t vfs_gluster_fs_capabilities(struct vfs_handle_struct *handle,
353                                             enum timestamp_set_resolution *p_ts_res)
354 {
355         uint32_t caps = FILE_CASE_SENSITIVE_SEARCH | FILE_CASE_PRESERVED_NAMES;
356
357 #ifdef STAT_HAVE_NSEC
358         *p_ts_res = TIMESTAMP_SET_NT_OR_BETTER;
359 #endif
360
361         return caps;
362 }
363
364 static DIR *vfs_gluster_opendir(struct vfs_handle_struct *handle,
365                                 const char *path, const char *mask,
366                                 uint32 attributes)
367 {
368         glfs_fd_t *fd;
369
370         fd = glfs_opendir(handle->data, path);
371         if (fd == NULL) {
372                 DEBUG(0, ("glfs_opendir(%s) failed: %s\n",
373                           path, strerror(errno)));
374         }
375
376         return (DIR *) fd;
377 }
378
379 static DIR *vfs_gluster_fdopendir(struct vfs_handle_struct *handle,
380                                   files_struct *fsp, const char *mask,
381                                   uint32 attributes)
382 {
383         return (DIR *) *(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp);
384 }
385
386 static int vfs_gluster_closedir(struct vfs_handle_struct *handle, DIR *dirp)
387 {
388         return glfs_closedir((void *)dirp);
389 }
390
391 static struct dirent *vfs_gluster_readdir(struct vfs_handle_struct *handle,
392                                           DIR *dirp, SMB_STRUCT_STAT *sbuf)
393 {
394         static char direntbuf[512];
395         int ret;
396         struct stat stat;
397         struct dirent *dirent = 0;
398
399         if (sbuf != NULL) {
400                 ret = glfs_readdirplus_r((void *)dirp, &stat, (void *)direntbuf,
401                                          &dirent);
402         } else {
403                 ret = glfs_readdir_r((void *)dirp, (void *)direntbuf, &dirent);
404         }
405
406         if ((ret < 0) || (dirent == NULL)) {
407                 return NULL;
408         }
409
410         if (sbuf != NULL) {
411                 smb_stat_ex_from_stat(sbuf, &stat);
412         }
413
414         return dirent;
415 }
416
417 static long vfs_gluster_telldir(struct vfs_handle_struct *handle, DIR *dirp)
418 {
419         return glfs_telldir((void *)dirp);
420 }
421
422 static void vfs_gluster_seekdir(struct vfs_handle_struct *handle, DIR *dirp,
423                                 long offset)
424 {
425         glfs_seekdir((void *)dirp, offset);
426 }
427
428 static void vfs_gluster_rewinddir(struct vfs_handle_struct *handle, DIR *dirp)
429 {
430         glfs_seekdir((void *)dirp, 0);
431 }
432
433 static void vfs_gluster_init_search_op(struct vfs_handle_struct *handle,
434                                        DIR *dirp)
435 {
436         return;
437 }
438
439 static int vfs_gluster_mkdir(struct vfs_handle_struct *handle, const char *path,
440                              mode_t mode)
441 {
442         return glfs_mkdir(handle->data, path, mode);
443 }
444
445 static int vfs_gluster_rmdir(struct vfs_handle_struct *handle, const char *path)
446 {
447         return glfs_rmdir(handle->data, path);
448 }
449
450 static int vfs_gluster_open(struct vfs_handle_struct *handle,
451                             struct smb_filename *smb_fname, files_struct *fsp,
452                             int flags, mode_t mode)
453 {
454         glfs_fd_t *glfd;
455         glfs_fd_t **p_tmp;
456
457         if (flags & O_DIRECTORY) {
458                 glfd = glfs_opendir(handle->data, smb_fname->base_name);
459         } else if (flags & O_CREAT) {
460                 glfd = glfs_creat(handle->data, smb_fname->base_name, flags,
461                                   mode);
462         } else {
463                 glfd = glfs_open(handle->data, smb_fname->base_name, flags);
464         }
465
466         if (glfd == NULL) {
467                 return -1;
468         }
469         p_tmp = (glfs_fd_t **)VFS_ADD_FSP_EXTENSION(handle, fsp,
470                                                           glfs_fd_t *, NULL);
471         *p_tmp = glfd;
472         /* An arbitrary value for error reporting, so you know its us. */
473         return 13371337;
474 }
475
476 static int vfs_gluster_close(struct vfs_handle_struct *handle,
477                              files_struct *fsp)
478 {
479         glfs_fd_t *glfd;
480         glfd = *(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp);
481         VFS_REMOVE_FSP_EXTENSION(handle, fsp);
482         return glfs_close(glfd);
483 }
484
485 static ssize_t vfs_gluster_read(struct vfs_handle_struct *handle,
486                                 files_struct *fsp, void *data, size_t n)
487 {
488         return glfs_read(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), data, n, 0);
489 }
490
491 static ssize_t vfs_gluster_pread(struct vfs_handle_struct *handle,
492                                  files_struct *fsp, void *data, size_t n,
493                                  off_t offset)
494 {
495         return glfs_pread(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), data, n, offset, 0);
496 }
497
498 struct glusterfs_aio_state {
499         ssize_t ret;
500         int err;
501 };
502
503 /*
504  * This function is the callback that will be called on glusterfs
505  * threads once the async IO submitted is complete. To notify
506  * Samba of the completion we use eventfd mechanism.
507  */
508 static void aio_glusterfs_done(glfs_fd_t *fd, ssize_t ret, void *data)
509 {
510 #if HAVE_EVENTFD
511         struct tevent_req *req = NULL;
512         struct glusterfs_aio_state *state = NULL;
513         int i, sts = 0;
514         uint64_t u = 1;
515
516         req = talloc_get_type_abort(data, struct tevent_req);
517         state = tevent_req_data(req, struct glusterfs_aio_state);
518
519         if (ret < 0) {
520                 state->ret = -1;
521                 state->err = errno;
522         } else {
523                 state->ret = ret;
524                 state->err = 0;
525         }
526
527         /*
528          * Store the reqs that needs to be completed by calling
529          * tevent_req_done(). tevent_req_done() cannot be called
530          * here, as it is not designed to be executed in the
531          * multithread environment, tevent_req_done() should be
532          * executed from the smbd main thread.
533          */
534         pthread_mutex_lock (&lock_req_list);
535         {
536                 for (i = 0 ; i < aio_pending_size ; i++) {
537                         if(!req_producer_list[i]) {
538                                 req_producer_list[i] = req;
539                                 req_counter = req_counter + 1;
540                                 break;
541                         }
542                 }
543         }
544         pthread_mutex_unlock (&lock_req_list);
545
546         /*
547          * For a bunch of fops notify only once
548          */
549         if (req_counter == 1) {
550                 sts = write (event_fd, &u, sizeof(uint64_t));
551                 if (sts < 0 && errno == EAGAIN)
552                         DEBUG(0,("\nWRITE: reached max value"));
553         }
554         return;
555 #endif
556 }
557
558 #ifdef HAVE_EVENTFD
559 static void aio_tevent_fd_done(struct tevent_context *event_ctx,
560                                 struct tevent_fd *fde,
561                                 uint16 flags, void *data)
562 {
563         struct tevent_req *req = NULL;
564         struct tevent_req **temp = NULL;
565         int i = 0, sts = 0;
566         uint64_t u = 0;
567
568         sts = read (event_fd, &u, sizeof(uint64_t));
569         if (sts < 0 && errno == EAGAIN)
570                 DEBUG(0,("\nREAD: eventfd read failed (%s)",strerror(errno)));
571
572         pthread_mutex_lock (&lock_req_list);
573         {
574                 temp = req_producer_list;
575                 req_producer_list = req_consumer_list;
576                 req_consumer_list = temp;
577                 req_counter = 0;
578         }
579         pthread_mutex_unlock (&lock_req_list);
580
581         for (i = 0 ; i < aio_pending_size ; i++) {
582                 req = req_consumer_list[i];
583                 if (req) {
584                         tevent_req_done(req);
585                         req_consumer_list[i] = 0;
586                 }
587         }
588         return;
589 }
590 #endif
591
592 static bool init_gluster_aio(struct vfs_handle_struct *handle)
593 {
594 #ifdef HAVE_EVENTFD
595         if (event_fd != -1) {
596                 /*
597                  * Already initialized.
598                  */
599                 return true;
600         }
601
602         event_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
603         if (event_fd == -1) {
604                 goto fail;
605         }
606
607         aio_read_event = tevent_add_fd(handle->conn->sconn->ev_ctx,
608                                         NULL,
609                                         event_fd,
610                                         TEVENT_FD_READ,
611                                         aio_tevent_fd_done,
612                                         NULL);
613         if (aio_read_event == NULL) {
614                 goto fail;
615         }
616
617         req_producer_list = talloc_zero_array(NULL, struct tevent_req *,
618                                                 aio_pending_size);
619         req_consumer_list = talloc_zero_array(NULL, struct tevent_req *,
620                                                 aio_pending_size);
621
622         return true;
623 fail:
624         TALLOC_FREE(aio_read_event);
625         if (event_fd != -1) {
626                 close(event_fd);
627                 event_fd = -1;
628         }
629 #endif
630         return false;
631 }
632
633 static struct tevent_req *vfs_gluster_pread_send(struct vfs_handle_struct
634                                                  *handle, TALLOC_CTX *mem_ctx,
635                                                  struct tevent_context *ev,
636                                                  files_struct *fsp, void *data,
637                                                  size_t n, off_t offset)
638 {
639         struct tevent_req *req = NULL;
640         struct glusterfs_aio_state *state = NULL;
641         int ret = 0;
642
643 #ifndef HAVE_EVENTFD
644         errno = ENOTSUP;
645         return NULL;
646 #endif
647
648         req = tevent_req_create(mem_ctx, &state, struct glusterfs_aio_state);
649         if (req == NULL) {
650                 return NULL;
651         }
652
653         if (!init_gluster_aio(handle)) {
654                 tevent_req_error(req, EIO);
655                 return tevent_req_post(req, ev);
656         }
657         ret = glfs_pread_async(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle,
658                                 fsp), data, n, offset, 0, aio_glusterfs_done,
659                                 req);
660         if (ret < 0) {
661                 tevent_req_error(req, -ret);
662                 return tevent_req_post(req, ev);
663         }
664
665         return req;
666 }
667
668 static ssize_t vfs_gluster_write(struct vfs_handle_struct *handle,
669                                  files_struct *fsp, const void *data, size_t n)
670 {
671         return glfs_write(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), data, n, 0);
672 }
673
674 static ssize_t vfs_gluster_pwrite(struct vfs_handle_struct *handle,
675                                   files_struct *fsp, const void *data,
676                                   size_t n, off_t offset)
677 {
678         return glfs_pwrite(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), data, n, offset, 0);
679 }
680
681 static struct tevent_req *vfs_gluster_pwrite_send(struct vfs_handle_struct
682                                                   *handle, TALLOC_CTX *mem_ctx,
683                                                   struct tevent_context *ev,
684                                                   files_struct *fsp,
685                                                   const void *data, size_t n,
686                                                   off_t offset)
687 {
688         struct tevent_req *req = NULL;
689         struct glusterfs_aio_state *state = NULL;
690         int ret = 0;
691
692 #ifndef HAVE_EVENTFD
693         errno = ENOTSUP;
694         return NULL;
695 #endif
696
697         req = tevent_req_create(mem_ctx, &state, struct glusterfs_aio_state);
698         if (req == NULL) {
699                 return NULL;
700         }
701         if (!init_gluster_aio(handle)) {
702                 tevent_req_error(req, EIO);
703                 return tevent_req_post(req, ev);
704         }
705         ret = glfs_pwrite_async(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle,
706                                 fsp), data, n, offset, 0, aio_glusterfs_done,
707                                 req);
708         if (ret < 0) {
709                 tevent_req_error(req, -ret);
710                 return tevent_req_post(req, ev);
711         }
712         return req;
713 }
714
715 static ssize_t vfs_gluster_recv(struct tevent_req *req, int *err)
716 {
717         struct glusterfs_aio_state *state = NULL;
718
719 #ifndef HAVE_EVENTFD
720         errno = ENOTSUP;
721         return -1;
722 #endif
723         state = tevent_req_data(req, struct glusterfs_aio_state);
724         if (state == NULL) {
725                 return -1;
726         }
727
728         if (tevent_req_is_unix_error(req, err)) {
729                 return -1;
730         }
731         if (state->ret == -1) {
732                 *err = state->err;
733         }
734         return state->ret;
735 }
736
737 static off_t vfs_gluster_lseek(struct vfs_handle_struct *handle,
738                                files_struct *fsp, off_t offset, int whence)
739 {
740         return glfs_lseek(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), offset, whence);
741 }
742
743 static ssize_t vfs_gluster_sendfile(struct vfs_handle_struct *handle, int tofd,
744                                     files_struct *fromfsp,
745                                     const DATA_BLOB *hdr,
746                                     off_t offset, size_t n)
747 {
748         errno = ENOTSUP;
749         return -1;
750 }
751
752 static ssize_t vfs_gluster_recvfile(struct vfs_handle_struct *handle,
753                                     int fromfd, files_struct *tofsp,
754                                     off_t offset, size_t n)
755 {
756         errno = ENOTSUP;
757         return -1;
758 }
759
760 static int vfs_gluster_rename(struct vfs_handle_struct *handle,
761                               const struct smb_filename *smb_fname_src,
762                               const struct smb_filename *smb_fname_dst)
763 {
764         return glfs_rename(handle->data, smb_fname_src->base_name,
765                            smb_fname_dst->base_name);
766 }
767
768 static int vfs_gluster_fsync(struct vfs_handle_struct *handle,
769                              files_struct *fsp)
770 {
771         return glfs_fsync(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp));
772 }
773
774 static struct tevent_req *vfs_gluster_fsync_send(struct vfs_handle_struct
775                                                  *handle, TALLOC_CTX *mem_ctx,
776                                                  struct tevent_context *ev,
777                                                  files_struct *fsp)
778 {
779         struct tevent_req *req = NULL;
780         struct glusterfs_aio_state *state = NULL;
781         int ret = 0;
782
783 #ifndef HAVE_EVENTFD
784         errno = ENOTSUP;
785         return NULL;
786 #endif
787
788         req = tevent_req_create(mem_ctx, &state, struct glusterfs_aio_state);
789         if (req == NULL) {
790                 return NULL;
791         }
792         if (!init_gluster_aio(handle)) {
793                 tevent_req_error(req, EIO);
794                 return tevent_req_post(req, ev);
795         }
796         ret = glfs_fsync_async(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle,
797                                 fsp), aio_glusterfs_done, req);
798         if (ret < 0) {
799                 tevent_req_error(req, -ret);
800                 return tevent_req_post(req, ev);
801         }
802         return req;
803 }
804
805 static int vfs_gluster_fsync_recv(struct tevent_req *req, int *err)
806 {
807         /*
808          * Use implicit conversion ssize_t->int
809          */
810         return vfs_gluster_recv(req, err);
811 }
812
813 static int vfs_gluster_stat(struct vfs_handle_struct *handle,
814                             struct smb_filename *smb_fname)
815 {
816         struct stat st;
817         int ret;
818
819         ret = glfs_stat(handle->data, smb_fname->base_name, &st);
820         if (ret == 0) {
821                 smb_stat_ex_from_stat(&smb_fname->st, &st);
822         }
823         if (ret < 0 && errno != ENOENT) {
824                 DEBUG(0, ("glfs_stat(%s) failed: %s\n",
825                           smb_fname->base_name, strerror(errno)));
826         }
827         return ret;
828 }
829
830 static int vfs_gluster_fstat(struct vfs_handle_struct *handle,
831                              files_struct *fsp, SMB_STRUCT_STAT *sbuf)
832 {
833         struct stat st;
834         int ret;
835
836         ret = glfs_fstat(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), &st);
837         if (ret == 0) {
838                 smb_stat_ex_from_stat(sbuf, &st);
839         }
840         if (ret < 0) {
841                 DEBUG(0, ("glfs_fstat(%d) failed: %s\n",
842                           fsp->fh->fd, strerror(errno)));
843         }
844         return ret;
845 }
846
847 static int vfs_gluster_lstat(struct vfs_handle_struct *handle,
848                              struct smb_filename *smb_fname)
849 {
850         struct stat st;
851         int ret;
852
853         ret = glfs_lstat(handle->data, smb_fname->base_name, &st);
854         if (ret == 0) {
855                 smb_stat_ex_from_stat(&smb_fname->st, &st);
856         }
857         if (ret < 0 && errno != ENOENT) {
858                 DEBUG(0, ("glfs_lstat(%s) failed: %s\n",
859                           smb_fname->base_name, strerror(errno)));
860         }
861         return ret;
862 }
863
864 static uint64_t vfs_gluster_get_alloc_size(struct vfs_handle_struct *handle,
865                                            files_struct *fsp,
866                                            const SMB_STRUCT_STAT *sbuf)
867 {
868         return sbuf->st_ex_blocks * 512;
869 }
870
871 static int vfs_gluster_unlink(struct vfs_handle_struct *handle,
872                               const struct smb_filename *smb_fname)
873 {
874         return glfs_unlink(handle->data, smb_fname->base_name);
875 }
876
877 static int vfs_gluster_chmod(struct vfs_handle_struct *handle,
878                              const char *path, mode_t mode)
879 {
880         return glfs_chmod(handle->data, path, mode);
881 }
882
883 static int vfs_gluster_fchmod(struct vfs_handle_struct *handle,
884                               files_struct *fsp, mode_t mode)
885 {
886         return glfs_fchmod(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), mode);
887 }
888
889 static int vfs_gluster_chown(struct vfs_handle_struct *handle,
890                              const char *path, uid_t uid, gid_t gid)
891 {
892         return glfs_chown(handle->data, path, uid, gid);
893 }
894
895 static int vfs_gluster_fchown(struct vfs_handle_struct *handle,
896                               files_struct *fsp, uid_t uid, gid_t gid)
897 {
898         return glfs_fchown(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), uid, gid);
899 }
900
901 static int vfs_gluster_lchown(struct vfs_handle_struct *handle,
902                               const char *path, uid_t uid, gid_t gid)
903 {
904         return glfs_lchown(handle->data, path, uid, gid);
905 }
906
907 static int vfs_gluster_chdir(struct vfs_handle_struct *handle, const char *path)
908 {
909         return glfs_chdir(handle->data, path);
910 }
911
912 static char *vfs_gluster_getwd(struct vfs_handle_struct *handle)
913 {
914         char *cwd;
915         char *ret;
916
917         cwd = SMB_CALLOC_ARRAY(char, PATH_MAX);
918         if (cwd == NULL) {
919                 return NULL;
920         }
921
922         ret = glfs_getcwd(handle->data, cwd, PATH_MAX - 1);
923         if (ret == 0) {
924                 free(cwd);
925         }
926         return ret;
927 }
928
929 static int vfs_gluster_ntimes(struct vfs_handle_struct *handle,
930                               const struct smb_filename *smb_fname,
931                               struct smb_file_time *ft)
932 {
933         struct timespec times[2];
934
935         if (null_timespec(ft->atime)) {
936                 times[0].tv_sec = smb_fname->st.st_ex_atime.tv_sec;
937                 times[0].tv_nsec = smb_fname->st.st_ex_atime.tv_nsec;
938         } else {
939                 times[0].tv_sec = ft->atime.tv_sec;
940                 times[0].tv_nsec = ft->atime.tv_nsec;
941         }
942
943         if (null_timespec(ft->mtime)) {
944                 times[1].tv_sec = smb_fname->st.st_ex_mtime.tv_sec;
945                 times[1].tv_nsec = smb_fname->st.st_ex_mtime.tv_nsec;
946         } else {
947                 times[1].tv_sec = ft->mtime.tv_sec;
948                 times[1].tv_nsec = ft->mtime.tv_nsec;
949         }
950
951         if ((timespec_compare(&times[0],
952                               &smb_fname->st.st_ex_atime) == 0) &&
953             (timespec_compare(&times[1],
954                               &smb_fname->st.st_ex_mtime) == 0)) {
955                 return 0;
956         }
957
958         return glfs_utimens(handle->data, smb_fname->base_name, times);
959 }
960
961 static int vfs_gluster_ftruncate(struct vfs_handle_struct *handle,
962                                  files_struct *fsp, off_t offset)
963 {
964         return glfs_ftruncate(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), offset);
965 }
966
967 static int vfs_gluster_fallocate(struct vfs_handle_struct *handle,
968                                  struct files_struct *fsp,
969                                  enum vfs_fallocate_mode mode,
970                                  off_t offset, off_t len)
971 {
972         errno = ENOTSUP;
973         return -1;
974 }
975
976 static char *vfs_gluster_realpath(struct vfs_handle_struct *handle,
977                                   const char *path)
978 {
979         return glfs_realpath(handle->data, path, 0);
980 }
981
982 static bool vfs_gluster_lock(struct vfs_handle_struct *handle,
983                              files_struct *fsp, int op, off_t offset,
984                              off_t count, int type)
985 {
986         struct flock flock = { 0, };
987         int ret;
988
989         flock.l_type = type;
990         flock.l_whence = SEEK_SET;
991         flock.l_start = offset;
992         flock.l_len = count;
993         flock.l_pid = 0;
994
995         ret = glfs_posix_lock(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), op, &flock);
996
997         if (op == F_GETLK) {
998                 /* lock query, true if someone else has locked */
999                 if ((ret != -1) &&
1000                     (flock.l_type != F_UNLCK) &&
1001                     (flock.l_pid != 0) && (flock.l_pid != getpid()))
1002                         return true;
1003                 /* not me */
1004                 return false;
1005         }
1006
1007         if (ret == -1) {
1008                 return false;
1009         }
1010
1011         return true;
1012 }
1013
1014 static int vfs_gluster_kernel_flock(struct vfs_handle_struct *handle,
1015                                     files_struct *fsp, uint32 share_mode,
1016                                     uint32_t access_mask)
1017 {
1018         errno = ENOSYS;
1019         return -1;
1020 }
1021
1022 static int vfs_gluster_linux_setlease(struct vfs_handle_struct *handle,
1023                                       files_struct *fsp, int leasetype)
1024 {
1025         errno = ENOSYS;
1026         return -1;
1027 }
1028
1029 static bool vfs_gluster_getlock(struct vfs_handle_struct *handle,
1030                                 files_struct *fsp, off_t *poffset,
1031                                 off_t *pcount, int *ptype, pid_t *ppid)
1032 {
1033         struct flock flock = { 0, };
1034         int ret;
1035
1036         flock.l_type = *ptype;
1037         flock.l_whence = SEEK_SET;
1038         flock.l_start = *poffset;
1039         flock.l_len = *pcount;
1040         flock.l_pid = 0;
1041
1042         ret = glfs_posix_lock(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), F_GETLK, &flock);
1043
1044         if (ret == -1) {
1045                 return false;
1046         }
1047
1048         *ptype = flock.l_type;
1049         *poffset = flock.l_start;
1050         *pcount = flock.l_len;
1051         *ppid = flock.l_pid;
1052
1053         return true;
1054 }
1055
1056 static int vfs_gluster_symlink(struct vfs_handle_struct *handle,
1057                                const char *oldpath, const char *newpath)
1058 {
1059         return glfs_symlink(handle->data, oldpath, newpath);
1060 }
1061
1062 static int vfs_gluster_readlink(struct vfs_handle_struct *handle,
1063                                 const char *path, char *buf, size_t bufsiz)
1064 {
1065         return glfs_readlink(handle->data, path, buf, bufsiz);
1066 }
1067
1068 static int vfs_gluster_link(struct vfs_handle_struct *handle,
1069                             const char *oldpath, const char *newpath)
1070 {
1071         return glfs_link(handle->data, oldpath, newpath);
1072 }
1073
1074 static int vfs_gluster_mknod(struct vfs_handle_struct *handle, const char *path,
1075                              mode_t mode, SMB_DEV_T dev)
1076 {
1077         return glfs_mknod(handle->data, path, mode, dev);
1078 }
1079
1080 static NTSTATUS vfs_gluster_notify_watch(struct vfs_handle_struct *handle,
1081                                          struct sys_notify_context *ctx,
1082                                          const char *path, uint32_t *filter,
1083                                          uint32_t *subdir_filter,
1084                                          void (*callback) (struct sys_notify_context *ctx,
1085                                                            void *private_data,
1086                                                            struct notify_event *ev),
1087                                          void *private_data, void *handle_p)
1088 {
1089         return NT_STATUS_NOT_IMPLEMENTED;
1090 }
1091
1092 static int vfs_gluster_chflags(struct vfs_handle_struct *handle,
1093                                const char *path, unsigned int flags)
1094 {
1095         errno = ENOSYS;
1096         return -1;
1097 }
1098
1099 static int vfs_gluster_get_real_filename(struct vfs_handle_struct *handle,
1100                                          const char *path, const char *name,
1101                                          TALLOC_CTX *mem_ctx, char **found_name)
1102 {
1103         int ret;
1104         char key_buf[NAME_MAX + 64];
1105         char val_buf[NAME_MAX + 1];
1106
1107         if (strlen(name) >= NAME_MAX) {
1108                 errno = ENAMETOOLONG;
1109                 return -1;
1110         }
1111
1112         snprintf(key_buf, NAME_MAX + 64,
1113                  "user.glusterfs.get_real_filename:%s", name);
1114
1115         ret = glfs_getxattr(handle->data, path, key_buf, val_buf, NAME_MAX + 1);
1116         if (ret == -1) {
1117                 if (errno == ENODATA) {
1118                         errno = EOPNOTSUPP;
1119                 }
1120                 return -1;
1121         }
1122
1123         *found_name = talloc_strdup(mem_ctx, val_buf);
1124         if (found_name[0] == NULL) {
1125                 errno = ENOMEM;
1126                 return -1;
1127         }
1128         return 0;
1129 }
1130
1131 static const char *vfs_gluster_connectpath(struct vfs_handle_struct *handle,
1132                                            const char *filename)
1133 {
1134         return handle->conn->connectpath;
1135 }
1136
1137 /* EA Operations */
1138
1139 static ssize_t vfs_gluster_getxattr(struct vfs_handle_struct *handle,
1140                                     const char *path, const char *name,
1141                                     void *value, size_t size)
1142 {
1143         return glfs_getxattr(handle->data, path, name, value, size);
1144 }
1145
1146 static ssize_t vfs_gluster_fgetxattr(struct vfs_handle_struct *handle,
1147                                      files_struct *fsp, const char *name,
1148                                      void *value, size_t size)
1149 {
1150         return glfs_fgetxattr(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), name, value, size);
1151 }
1152
1153 static ssize_t vfs_gluster_listxattr(struct vfs_handle_struct *handle,
1154                                      const char *path, char *list, size_t size)
1155 {
1156         return glfs_listxattr(handle->data, path, list, size);
1157 }
1158
1159 static ssize_t vfs_gluster_flistxattr(struct vfs_handle_struct *handle,
1160                                       files_struct *fsp, char *list,
1161                                       size_t size)
1162 {
1163         return glfs_flistxattr(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), list, size);
1164 }
1165
1166 static int vfs_gluster_removexattr(struct vfs_handle_struct *handle,
1167                                    const char *path, const char *name)
1168 {
1169         return glfs_removexattr(handle->data, path, name);
1170 }
1171
1172 static int vfs_gluster_fremovexattr(struct vfs_handle_struct *handle,
1173                                     files_struct *fsp, const char *name)
1174 {
1175         return glfs_fremovexattr(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), name);
1176 }
1177
1178 static int vfs_gluster_setxattr(struct vfs_handle_struct *handle,
1179                                 const char *path, const char *name,
1180                                 const void *value, size_t size, int flags)
1181 {
1182         return glfs_setxattr(handle->data, path, name, value, size, flags);
1183 }
1184
1185 static int vfs_gluster_fsetxattr(struct vfs_handle_struct *handle,
1186                                  files_struct *fsp, const char *name,
1187                                  const void *value, size_t size, int flags)
1188 {
1189         return glfs_fsetxattr(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), name, value, size,
1190                               flags);
1191 }
1192
1193 /* AIO Operations */
1194
1195 static bool vfs_gluster_aio_force(struct vfs_handle_struct *handle,
1196                                   files_struct *fsp)
1197 {
1198         return false;
1199 }
1200
1201 /* Offline Operations */
1202
1203 static bool vfs_gluster_is_offline(struct vfs_handle_struct *handle,
1204                                    const struct smb_filename *fname,
1205                                    SMB_STRUCT_STAT *sbuf)
1206 {
1207         return false;
1208 }
1209
1210 static int vfs_gluster_set_offline(struct vfs_handle_struct *handle,
1211                                    const struct smb_filename *fname)
1212 {
1213         errno = ENOTSUP;
1214         return -1;
1215 }
1216
1217 /*
1218   Gluster ACL Format:
1219
1220   Size = 4 (header) + N * 8 (entry)
1221
1222   Offset  Size    Field (Little Endian)
1223   -------------------------------------
1224   0-3     4-byte  Version
1225
1226   4-5     2-byte  Entry-1 tag
1227   6-7     2-byte  Entry-1 perm
1228   8-11    4-byte  Entry-1 id
1229
1230   12-13   2-byte  Entry-2 tag
1231   14-15   2-byte  Entry-2 perm
1232   16-19   4-byte  Entry-2 id
1233
1234   ...
1235
1236  */
1237
1238 /* header version */
1239 #define GLUSTER_ACL_VERSION 2
1240
1241 /* perm bits */
1242 #define GLUSTER_ACL_READ    0x04
1243 #define GLUSTER_ACL_WRITE   0x02
1244 #define GLUSTER_ACL_EXECUTE 0x01
1245
1246 /* tag values */
1247 #define GLUSTER_ACL_UNDEFINED_TAG  0x00
1248 #define GLUSTER_ACL_USER_OBJ       0x01
1249 #define GLUSTER_ACL_USER           0x02
1250 #define GLUSTER_ACL_GROUP_OBJ      0x04
1251 #define GLUSTER_ACL_GROUP          0x08
1252 #define GLUSTER_ACL_MASK           0x10
1253 #define GLUSTER_ACL_OTHER          0x20
1254
1255 #define GLUSTER_ACL_UNDEFINED_ID  (-1)
1256
1257 #define GLUSTER_ACL_HEADER_SIZE    4
1258 #define GLUSTER_ACL_ENTRY_SIZE     8
1259
1260 #define GLUSTER_ACL_SIZE(n)       (GLUSTER_ACL_HEADER_SIZE + (n * GLUSTER_ACL_ENTRY_SIZE))
1261
1262 static SMB_ACL_T mode_to_smb_acls(const struct stat *mode, TALLOC_CTX *mem_ctx)
1263 {
1264         struct smb_acl_t *result;
1265         int count;
1266
1267         count = 3;
1268         result = sys_acl_init(mem_ctx);
1269         if (!result) {
1270                 errno = ENOMEM;
1271                 return NULL;
1272         }
1273
1274         result->acl = talloc_array(result, struct smb_acl_entry, count);
1275         if (!result->acl) {
1276                 errno = ENOMEM;
1277                 talloc_free(result);
1278                 return NULL;
1279         }
1280
1281         result->count = count;
1282
1283         result->acl[0].a_type = SMB_ACL_USER_OBJ;
1284         result->acl[0].a_perm = (mode->st_mode & S_IRWXU) >> 6;;
1285
1286         result->acl[1].a_type = SMB_ACL_GROUP_OBJ;
1287         result->acl[1].a_perm = (mode->st_mode & S_IRWXG) >> 3;;
1288
1289         result->acl[2].a_type = SMB_ACL_OTHER;
1290         result->acl[2].a_perm = mode->st_mode & S_IRWXO;;
1291
1292         return result;
1293 }
1294
1295 static SMB_ACL_T gluster_to_smb_acl(const char *buf, size_t xattr_size,
1296                                     TALLOC_CTX *mem_ctx)
1297 {
1298         int count;
1299         size_t size;
1300         struct smb_acl_entry *smb_ace;
1301         struct smb_acl_t *result;
1302         int i;
1303         int offset;
1304         uint16_t tag;
1305         uint16_t perm;
1306         uint32_t id;
1307
1308         size = xattr_size;
1309
1310         if (size < GLUSTER_ACL_HEADER_SIZE) {
1311                 /* ACL should be at least as big as the header (4 bytes) */
1312                 errno = EINVAL;
1313                 return NULL;
1314         }
1315
1316         size -= GLUSTER_ACL_HEADER_SIZE; /* size of header = 4 bytes */
1317
1318         if (size % GLUSTER_ACL_ENTRY_SIZE) {
1319                 /* Size of entries must strictly be a multiple of
1320                    size of an ACE (8 bytes)
1321                 */
1322                 errno = EINVAL;
1323                 return NULL;
1324         }
1325
1326         count = size / GLUSTER_ACL_ENTRY_SIZE;
1327
1328         /* Version is the first 4 bytes of the ACL */
1329         if (IVAL(buf, 0) != GLUSTER_ACL_VERSION) {
1330                 DEBUG(0, ("Unknown gluster ACL version: %d\n",
1331                           IVAL(buf, 0)));
1332                 return NULL;
1333         }
1334         offset = GLUSTER_ACL_HEADER_SIZE;
1335
1336         result = sys_acl_init(mem_ctx);
1337         if (!result) {
1338                 errno = ENOMEM;
1339                 return NULL;
1340         }
1341
1342         result->acl = talloc_array(result, struct smb_acl_entry, count);
1343         if (!result->acl) {
1344                 errno = ENOMEM;
1345                 talloc_free(result);
1346                 return NULL;
1347         }
1348
1349         result->count = count;
1350
1351         smb_ace = result->acl;
1352
1353         for (i = 0; i < count; i++) {
1354                 /* TAG is the first 2 bytes of an entry */
1355                 tag = SVAL(buf, offset);
1356                 offset += 2;
1357
1358                 /* PERM is the next 2 bytes of an entry */
1359                 perm = SVAL(buf, offset);
1360                 offset += 2;
1361
1362                 /* ID is the last 4 bytes of an entry */
1363                 id = IVAL(buf, offset);
1364                 offset += 4;
1365
1366                 switch(tag) {
1367                 case GLUSTER_ACL_USER:
1368                         smb_ace->a_type = SMB_ACL_USER;
1369                         break;
1370                 case GLUSTER_ACL_USER_OBJ:
1371                         smb_ace->a_type = SMB_ACL_USER_OBJ;
1372                         break;
1373                 case GLUSTER_ACL_GROUP:
1374                         smb_ace->a_type = SMB_ACL_GROUP;
1375                         break;
1376                 case GLUSTER_ACL_GROUP_OBJ:
1377                         smb_ace->a_type = SMB_ACL_GROUP_OBJ;
1378                         break;
1379                 case GLUSTER_ACL_OTHER:
1380                         smb_ace->a_type = SMB_ACL_OTHER;
1381                         break;
1382                 case GLUSTER_ACL_MASK:
1383                         smb_ace->a_type = SMB_ACL_MASK;
1384                         break;
1385                 default:
1386                         DEBUG(0, ("unknown tag type %d\n", (unsigned int) tag));
1387                         return NULL;
1388                 }
1389
1390
1391                 switch(smb_ace->a_type) {
1392                 case SMB_ACL_USER:
1393                         smb_ace->info.user.uid = id;
1394                         break;
1395                 case SMB_ACL_GROUP:
1396                         smb_ace->info.group.gid = id;
1397                         break;
1398                 default:
1399                         break;
1400                 }
1401
1402                 smb_ace->a_perm = 0;
1403                 smb_ace->a_perm |=
1404                         ((perm & GLUSTER_ACL_READ) ? SMB_ACL_READ : 0);
1405                 smb_ace->a_perm |=
1406                         ((perm & GLUSTER_ACL_WRITE) ? SMB_ACL_WRITE : 0);
1407                 smb_ace->a_perm |=
1408                         ((perm & GLUSTER_ACL_EXECUTE) ? SMB_ACL_EXECUTE : 0);
1409
1410                 smb_ace++;
1411         }
1412
1413         return result;
1414 }
1415
1416
1417 static int gluster_ace_cmp(const void *left, const void *right)
1418 {
1419         int ret = 0;
1420         uint16_t tag_left, tag_right;
1421         uint32_t id_left, id_right;
1422
1423         /*
1424           Sorting precedence:
1425
1426            - Smaller TAG values must be earlier.
1427
1428            - Within same TAG, smaller identifiers must be earlier, E.g:
1429              UID 0 entry must be earlier than UID 200
1430              GID 17 entry must be earlier than GID 19
1431         */
1432
1433         /* TAG is the first element in the entry */
1434         tag_left = SVAL(left, 0);
1435         tag_right = SVAL(right, 0);
1436
1437         ret = (tag_left - tag_right);
1438         if (!ret) {
1439                 /* ID is the third element in the entry, after two short
1440                    integers (tag and perm), i.e at offset 4.
1441                 */
1442                 id_left = IVAL(left, 4);
1443                 id_right = IVAL(right, 4);
1444                 ret = id_left - id_right;
1445         }
1446
1447         return ret;
1448 }
1449
1450
1451 static ssize_t smb_to_gluster_acl(SMB_ACL_T theacl, char *buf, size_t len)
1452 {
1453         ssize_t size;
1454         struct smb_acl_entry *smb_ace;
1455         int i;
1456         int count;
1457         uint16_t tag;
1458         uint16_t perm;
1459         uint32_t id;
1460         int offset;
1461
1462         count = theacl->count;
1463
1464         size = GLUSTER_ACL_HEADER_SIZE + (count * GLUSTER_ACL_ENTRY_SIZE);
1465         if (!buf) {
1466                 return size;
1467         }
1468
1469         if (len < size) {
1470                 errno = ERANGE;
1471                 return -1;
1472         }
1473
1474         smb_ace = theacl->acl;
1475
1476         /* Version is the first 4 bytes of the ACL */
1477         SIVAL(buf, 0, GLUSTER_ACL_VERSION);
1478         offset = GLUSTER_ACL_HEADER_SIZE;
1479
1480         for (i = 0; i < count; i++) {
1481                 /* Calculate tag */
1482                 switch(smb_ace->a_type) {
1483                 case SMB_ACL_USER:
1484                         tag = GLUSTER_ACL_USER;
1485                         break;
1486                 case SMB_ACL_USER_OBJ:
1487                         tag = GLUSTER_ACL_USER_OBJ;
1488                         break;
1489                 case SMB_ACL_GROUP:
1490                         tag = GLUSTER_ACL_GROUP;
1491                         break;
1492                 case SMB_ACL_GROUP_OBJ:
1493                         tag = GLUSTER_ACL_GROUP_OBJ;
1494                         break;
1495                 case SMB_ACL_OTHER:
1496                         tag = GLUSTER_ACL_OTHER;
1497                         break;
1498                 case SMB_ACL_MASK:
1499                         tag = GLUSTER_ACL_MASK;
1500                         break;
1501                 default:
1502                         DEBUG(0, ("Unknown tag value %d\n",
1503                                   smb_ace->a_type));
1504                         errno = EINVAL;
1505                         return -1;
1506                 }
1507
1508
1509                 /* Calculate id */
1510                 switch(smb_ace->a_type) {
1511                 case SMB_ACL_USER:
1512                         id = smb_ace->info.user.uid;
1513                         break;
1514                 case SMB_ACL_GROUP:
1515                         id = smb_ace->info.group.gid;
1516                         break;
1517                 default:
1518                         id = GLUSTER_ACL_UNDEFINED_ID;
1519                         break;
1520                 }
1521
1522                 /* Calculate perm */
1523                 perm = 0;
1524
1525                 perm |=
1526                         ((smb_ace->a_perm & SMB_ACL_READ) ? GLUSTER_ACL_READ : 0);
1527                 perm |=
1528                         ((smb_ace->a_perm & SMB_ACL_WRITE) ? GLUSTER_ACL_WRITE : 0);
1529                 perm |=
1530                         ((smb_ace->a_perm & SMB_ACL_EXECUTE) ? GLUSTER_ACL_EXECUTE : 0);
1531
1532
1533                 /* TAG is the first 2 bytes of an entry */
1534                 SSVAL(buf, offset, tag);
1535                 offset += 2;
1536
1537                 /* PERM is the next 2 bytes of an entry */
1538                 SSVAL(buf, offset, perm);
1539                 offset += 2;
1540
1541                 /* ID is the last 4 bytes of an entry */
1542                 SIVAL(buf, offset, id);
1543                 offset += 4;
1544
1545                 smb_ace++;
1546         }
1547
1548         /* Skip the header, sort @count number of 8-byte entries */
1549         qsort(buf+GLUSTER_ACL_HEADER_SIZE, count, GLUSTER_ACL_ENTRY_SIZE,
1550               gluster_ace_cmp);
1551
1552         return size;
1553 }
1554
1555
1556 static SMB_ACL_T vfs_gluster_sys_acl_get_file(struct vfs_handle_struct *handle,
1557                                               const char *path_p,
1558                                               SMB_ACL_TYPE_T type,
1559                                               TALLOC_CTX *mem_ctx)
1560 {
1561         struct smb_acl_t *result;
1562         struct stat st;
1563         char *buf;
1564         const char *key;
1565         ssize_t ret, size = GLUSTER_ACL_SIZE(20);
1566
1567         switch (type) {
1568         case SMB_ACL_TYPE_ACCESS:
1569                 key = "system.posix_acl_access";
1570                 break;
1571         case SMB_ACL_TYPE_DEFAULT:
1572                 key = "system.posix_acl_default";
1573                 break;
1574         default:
1575                 errno = EINVAL;
1576                 return NULL;
1577         }
1578
1579         buf = alloca(size);
1580         if (!buf) {
1581                 return NULL;
1582         }
1583
1584         ret = glfs_getxattr(handle->data, path_p, key, buf, size);
1585         if (ret == -1 && errno == ERANGE) {
1586                 ret = glfs_getxattr(handle->data, path_p, key, 0, 0);
1587                 if (ret > 0) {
1588                         buf = alloca(ret);
1589                         if (!buf) {
1590                                 return NULL;
1591                         }
1592                         ret = glfs_getxattr(handle->data, path_p, key, buf, ret);
1593                 }
1594         }
1595
1596         /* retrieving the ACL from the xattr has finally failed, do a
1597          * mode-to-acl mapping */
1598
1599         if (ret == -1 && errno == ENODATA) {
1600                 ret = glfs_stat(handle->data, path_p, &st);
1601                 if (ret == 0) {
1602                         result = mode_to_smb_acls(&st, mem_ctx);
1603                         return result;
1604                 }
1605         }
1606
1607         if (ret <= 0) {
1608                 return NULL;
1609         }
1610
1611         result = gluster_to_smb_acl(buf, ret, mem_ctx);
1612
1613         return result;
1614 }
1615
1616 static SMB_ACL_T vfs_gluster_sys_acl_get_fd(struct vfs_handle_struct *handle,
1617                                             struct files_struct *fsp,
1618                                             TALLOC_CTX *mem_ctx)
1619 {
1620         struct smb_acl_t *result;
1621         struct stat st;
1622         ssize_t ret, size = GLUSTER_ACL_SIZE(20);
1623         char *buf;
1624         glfs_fd_t *glfd;
1625
1626         glfd = *(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp);
1627
1628         buf = alloca(size);
1629         if (!buf) {
1630                 return NULL;
1631         }
1632
1633         ret = glfs_fgetxattr(glfd, "system.posix_acl_access", buf, size);
1634         if (ret == -1 && errno == ERANGE) {
1635                 ret = glfs_fgetxattr(glfd, "system.posix_acl_access", 0, 0);
1636                 if (ret > 0) {
1637                         buf = alloca(ret);
1638                         if (!buf) {
1639                                 return NULL;
1640                         }
1641                         ret = glfs_fgetxattr(glfd, "system.posix_acl_access",
1642                                              buf, ret);
1643                 }
1644         }
1645
1646         /* retrieving the ACL from the xattr has finally failed, do a
1647          * mode-to-acl mapping */
1648
1649         if (ret == -1 && errno == ENODATA) {
1650                 ret = glfs_fstat(glfd, &st);
1651                 if (ret == 0) {
1652                         result = mode_to_smb_acls(&st, mem_ctx);
1653                         return result;
1654                 }
1655         }
1656
1657         if (ret <= 0) {
1658                 return NULL;
1659         }
1660
1661         result = gluster_to_smb_acl(buf, ret, mem_ctx);
1662
1663         return result;
1664 }
1665
1666 static int vfs_gluster_sys_acl_set_file(struct vfs_handle_struct *handle,
1667                                         const char *name,
1668                                         SMB_ACL_TYPE_T acltype,
1669                                         SMB_ACL_T theacl)
1670 {
1671         int ret;
1672         const char *key;
1673         char *buf;
1674         ssize_t size;
1675
1676         switch (acltype) {
1677         case SMB_ACL_TYPE_ACCESS:
1678                 key = "system.posix_acl_access";
1679                 break;
1680         case SMB_ACL_TYPE_DEFAULT:
1681                 key = "system.posix_acl_default";
1682                 break;
1683         default:
1684                 errno = EINVAL;
1685                 return -1;
1686         }
1687
1688         size = smb_to_gluster_acl(theacl, 0, 0);
1689         buf = alloca(size);
1690
1691         size = smb_to_gluster_acl(theacl, buf, size);
1692         if (size == -1) {
1693                 return -1;
1694         }
1695
1696         ret = glfs_setxattr(handle->data, name, key, buf, size, 0);
1697
1698         return ret;
1699 }
1700
1701 static int vfs_gluster_sys_acl_set_fd(struct vfs_handle_struct *handle,
1702                                       struct files_struct *fsp,
1703                                       SMB_ACL_T theacl)
1704 {
1705         int ret;
1706         char *buf;
1707         ssize_t size;
1708
1709         size = smb_to_gluster_acl(theacl, 0, 0);
1710         buf = alloca(size);
1711
1712         size = smb_to_gluster_acl(theacl, buf, size);
1713         if (size == -1) {
1714                 return -1;
1715         }
1716
1717         ret = glfs_fsetxattr(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp),
1718                              "system.posix_acl_access", buf, size, 0);
1719         return ret;
1720 }
1721
1722 static int vfs_gluster_sys_acl_delete_def_file(struct vfs_handle_struct *handle,
1723                                                const char *path)
1724 {
1725         return glfs_removexattr(handle->data, path, "system.posix_acl_default");
1726 }
1727
1728 static struct vfs_fn_pointers glusterfs_fns = {
1729
1730         /* Disk Operations */
1731
1732         .connect_fn = vfs_gluster_connect,
1733         .disconnect_fn = vfs_gluster_disconnect,
1734         .disk_free_fn = vfs_gluster_disk_free,
1735         .get_quota_fn = vfs_gluster_get_quota,
1736         .set_quota_fn = vfs_gluster_set_quota,
1737         .statvfs_fn = vfs_gluster_statvfs,
1738         .fs_capabilities_fn = vfs_gluster_fs_capabilities,
1739
1740         .get_dfs_referrals_fn = NULL,
1741
1742         /* Directory Operations */
1743
1744         .opendir_fn = vfs_gluster_opendir,
1745         .fdopendir_fn = vfs_gluster_fdopendir,
1746         .readdir_fn = vfs_gluster_readdir,
1747         .seekdir_fn = vfs_gluster_seekdir,
1748         .telldir_fn = vfs_gluster_telldir,
1749         .rewind_dir_fn = vfs_gluster_rewinddir,
1750         .mkdir_fn = vfs_gluster_mkdir,
1751         .rmdir_fn = vfs_gluster_rmdir,
1752         .closedir_fn = vfs_gluster_closedir,
1753         .init_search_op_fn = vfs_gluster_init_search_op,
1754
1755         /* File Operations */
1756
1757         .open_fn = vfs_gluster_open,
1758         .create_file_fn = NULL,
1759         .close_fn = vfs_gluster_close,
1760         .read_fn = vfs_gluster_read,
1761         .pread_fn = vfs_gluster_pread,
1762         .pread_send_fn = vfs_gluster_pread_send,
1763         .pread_recv_fn = vfs_gluster_recv,
1764         .write_fn = vfs_gluster_write,
1765         .pwrite_fn = vfs_gluster_pwrite,
1766         .pwrite_send_fn = vfs_gluster_pwrite_send,
1767         .pwrite_recv_fn = vfs_gluster_recv,
1768         .lseek_fn = vfs_gluster_lseek,
1769         .sendfile_fn = vfs_gluster_sendfile,
1770         .recvfile_fn = vfs_gluster_recvfile,
1771         .rename_fn = vfs_gluster_rename,
1772         .fsync_fn = vfs_gluster_fsync,
1773         .fsync_send_fn = vfs_gluster_fsync_send,
1774         .fsync_recv_fn = vfs_gluster_fsync_recv,
1775
1776         .stat_fn = vfs_gluster_stat,
1777         .fstat_fn = vfs_gluster_fstat,
1778         .lstat_fn = vfs_gluster_lstat,
1779         .get_alloc_size_fn = vfs_gluster_get_alloc_size,
1780         .unlink_fn = vfs_gluster_unlink,
1781
1782         .chmod_fn = vfs_gluster_chmod,
1783         .fchmod_fn = vfs_gluster_fchmod,
1784         .chown_fn = vfs_gluster_chown,
1785         .fchown_fn = vfs_gluster_fchown,
1786         .lchown_fn = vfs_gluster_lchown,
1787         .chdir_fn = vfs_gluster_chdir,
1788         .getwd_fn = vfs_gluster_getwd,
1789         .ntimes_fn = vfs_gluster_ntimes,
1790         .ftruncate_fn = vfs_gluster_ftruncate,
1791         .fallocate_fn = vfs_gluster_fallocate,
1792         .lock_fn = vfs_gluster_lock,
1793         .kernel_flock_fn = vfs_gluster_kernel_flock,
1794         .linux_setlease_fn = vfs_gluster_linux_setlease,
1795         .getlock_fn = vfs_gluster_getlock,
1796         .symlink_fn = vfs_gluster_symlink,
1797         .readlink_fn = vfs_gluster_readlink,
1798         .link_fn = vfs_gluster_link,
1799         .mknod_fn = vfs_gluster_mknod,
1800         .realpath_fn = vfs_gluster_realpath,
1801         .notify_watch_fn = vfs_gluster_notify_watch,
1802         .chflags_fn = vfs_gluster_chflags,
1803         .file_id_create_fn = NULL,
1804         .copy_chunk_send_fn = NULL,
1805         .copy_chunk_recv_fn = NULL,
1806         .streaminfo_fn = NULL,
1807         .get_real_filename_fn = vfs_gluster_get_real_filename,
1808         .connectpath_fn = vfs_gluster_connectpath,
1809
1810         .brl_lock_windows_fn = NULL,
1811         .brl_unlock_windows_fn = NULL,
1812         .brl_cancel_windows_fn = NULL,
1813         .strict_lock_fn = NULL,
1814         .strict_unlock_fn = NULL,
1815         .translate_name_fn = NULL,
1816         .fsctl_fn = NULL,
1817
1818         /* NT ACL Operations */
1819         .fget_nt_acl_fn = NULL,
1820         .get_nt_acl_fn = NULL,
1821         .fset_nt_acl_fn = NULL,
1822         .audit_file_fn = NULL,
1823
1824         /* Posix ACL Operations */
1825         .chmod_acl_fn = NULL,   /* passthrough to default */
1826         .fchmod_acl_fn = NULL,  /* passthrough to default */
1827         .sys_acl_get_file_fn = vfs_gluster_sys_acl_get_file,
1828         .sys_acl_get_fd_fn = vfs_gluster_sys_acl_get_fd,
1829         .sys_acl_blob_get_file_fn = posix_sys_acl_blob_get_file,
1830         .sys_acl_blob_get_fd_fn = posix_sys_acl_blob_get_fd,
1831         .sys_acl_set_file_fn = vfs_gluster_sys_acl_set_file,
1832         .sys_acl_set_fd_fn = vfs_gluster_sys_acl_set_fd,
1833         .sys_acl_delete_def_file_fn = vfs_gluster_sys_acl_delete_def_file,
1834
1835         /* EA Operations */
1836         .getxattr_fn = vfs_gluster_getxattr,
1837         .fgetxattr_fn = vfs_gluster_fgetxattr,
1838         .listxattr_fn = vfs_gluster_listxattr,
1839         .flistxattr_fn = vfs_gluster_flistxattr,
1840         .removexattr_fn = vfs_gluster_removexattr,
1841         .fremovexattr_fn = vfs_gluster_fremovexattr,
1842         .setxattr_fn = vfs_gluster_setxattr,
1843         .fsetxattr_fn = vfs_gluster_fsetxattr,
1844
1845         /* AIO Operations */
1846         .aio_force_fn = vfs_gluster_aio_force,
1847
1848         /* Offline Operations */
1849         .is_offline_fn = vfs_gluster_is_offline,
1850         .set_offline_fn = vfs_gluster_set_offline,
1851
1852         /* Durable handle Operations */
1853         .durable_cookie_fn = NULL,
1854         .durable_disconnect_fn = NULL,
1855         .durable_reconnect_fn = NULL,
1856 };
1857
1858 NTSTATUS vfs_glusterfs_init(void);
1859 NTSTATUS vfs_glusterfs_init(void)
1860 {
1861         return smb_register_vfs(SMB_VFS_INTERFACE_VERSION,
1862                                 "glusterfs", &glusterfs_fns);
1863 }