vfs/glusterfs: measure libglusterfs aio function call duration
[nivanova/samba-autobuild/.git] / source3 / modules / vfs_glusterfs.c
1 /*
2    Unix SMB/CIFS implementation.
3
4    Wrap GlusterFS GFAPI calls in vfs functions.
5
6    Copyright (c) 2013 Anand Avati <avati@redhat.com>
7
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3 of the License, or
11    (at your option) any later version.
12
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17
18    You should have received a copy of the GNU General Public License
19    along with this program.  If not, see <http://www.gnu.org/licenses/>.
20 */
21
22 /**
23  * @file   vfs_glusterfs.c
24  * @author Anand Avati <avati@redhat.com>
25  * @date   May 2013
26  * @brief  Samba VFS module for glusterfs
27  *
28  * @todo
29  *   - sendfile/recvfile support
30  *
31  * A Samba VFS module for GlusterFS, based on Gluster's libgfapi.
32  * This is a "bottom" vfs module (not something to be stacked on top of
33  * another module), and translates (most) calls to the closest actions
34  * available in libgfapi.
35  *
36  */
37
38 #include "includes.h"
39 #include "smbd/smbd.h"
40 #include <stdio.h>
41 #include "api/glfs.h"
42 #include "lib/util/dlinklist.h"
43 #include "lib/util/tevent_unix.h"
44 #include "lib/tevent/tevent_internal.h"
45 #include "smbd/globals.h"
46 #include "lib/util/sys_rw.h"
47
48 #define DEFAULT_VOLFILE_SERVER "localhost"
49
50 static int read_fd = -1;
51 static int write_fd = -1;
52 static struct tevent_fd *aio_read_event = NULL;
53
54 /**
55  * Helper to convert struct stat to struct stat_ex.
56  */
57 static void smb_stat_ex_from_stat(struct stat_ex *dst, const struct stat *src)
58 {
59         ZERO_STRUCTP(dst);
60
61         dst->st_ex_dev = src->st_dev;
62         dst->st_ex_ino = src->st_ino;
63         dst->st_ex_mode = src->st_mode;
64         dst->st_ex_nlink = src->st_nlink;
65         dst->st_ex_uid = src->st_uid;
66         dst->st_ex_gid = src->st_gid;
67         dst->st_ex_rdev = src->st_rdev;
68         dst->st_ex_size = src->st_size;
69         dst->st_ex_atime.tv_sec = src->st_atime;
70         dst->st_ex_mtime.tv_sec = src->st_mtime;
71         dst->st_ex_ctime.tv_sec = src->st_ctime;
72         dst->st_ex_btime.tv_sec = src->st_mtime;
73         dst->st_ex_blksize = src->st_blksize;
74         dst->st_ex_blocks = src->st_blocks;
75 #ifdef STAT_HAVE_NSEC
76         dst->st_ex_atime.tv_nsec = src->st_atime_nsec;
77         dst->st_ex_mtime.tv_nsec = src->st_mtime_nsec;
78         dst->st_ex_ctime.tv_nsec = src->st_ctime_nsec;
79         dst->st_ex_btime.tv_nsec = src->st_mtime_nsec;
80 #endif
81 }
82
83 /* pre-opened glfs_t */
84
85 static struct glfs_preopened {
86         char *volume;
87         char *connectpath;
88         glfs_t *fs;
89         int ref;
90         struct glfs_preopened *next, *prev;
91 } *glfs_preopened;
92
93
94 static int glfs_set_preopened(const char *volume, const char *connectpath, glfs_t *fs)
95 {
96         struct glfs_preopened *entry = NULL;
97
98         entry = talloc_zero(NULL, struct glfs_preopened);
99         if (!entry) {
100                 errno = ENOMEM;
101                 return -1;
102         }
103
104         entry->volume = talloc_strdup(entry, volume);
105         if (!entry->volume) {
106                 talloc_free(entry);
107                 errno = ENOMEM;
108                 return -1;
109         }
110
111         entry->connectpath = talloc_strdup(entry, connectpath);
112         if (entry->connectpath == NULL) {
113                 talloc_free(entry);
114                 errno = ENOMEM;
115                 return -1;
116         }
117
118         entry->fs = fs;
119         entry->ref = 1;
120
121         DLIST_ADD(glfs_preopened, entry);
122
123         return 0;
124 }
125
126 static glfs_t *glfs_find_preopened(const char *volume, const char *connectpath)
127 {
128         struct glfs_preopened *entry = NULL;
129
130         for (entry = glfs_preopened; entry; entry = entry->next) {
131                 if (strcmp(entry->volume, volume) == 0 &&
132                     strcmp(entry->connectpath, connectpath) == 0)
133                 {
134                         entry->ref++;
135                         return entry->fs;
136                 }
137         }
138
139         return NULL;
140 }
141
142 static void glfs_clear_preopened(glfs_t *fs)
143 {
144         struct glfs_preopened *entry = NULL;
145
146         for (entry = glfs_preopened; entry; entry = entry->next) {
147                 if (entry->fs == fs) {
148                         if (--entry->ref)
149                                 return;
150
151                         DLIST_REMOVE(glfs_preopened, entry);
152
153                         glfs_fini(entry->fs);
154                         talloc_free(entry);
155                 }
156         }
157 }
158
159 /* Disk Operations */
160
161 static int vfs_gluster_connect(struct vfs_handle_struct *handle,
162                                const char *service,
163                                const char *user)
164 {
165         const char *volfile_server;
166         const char *volume;
167         char *logfile;
168         int loglevel;
169         glfs_t *fs = NULL;
170         TALLOC_CTX *tmp_ctx;
171         int ret = 0;
172
173         tmp_ctx = talloc_new(NULL);
174         if (tmp_ctx == NULL) {
175                 ret = -1;
176                 goto done;
177         }
178         logfile = lp_parm_talloc_string(tmp_ctx, SNUM(handle->conn), "glusterfs",
179                                        "logfile", NULL);
180
181         loglevel = lp_parm_int(SNUM(handle->conn), "glusterfs", "loglevel", -1);
182
183         volfile_server = lp_parm_const_string(SNUM(handle->conn), "glusterfs",
184                                                "volfile_server", NULL);
185         if (volfile_server == NULL) {
186                 volfile_server = DEFAULT_VOLFILE_SERVER;
187         }
188
189         volume = lp_parm_const_string(SNUM(handle->conn), "glusterfs", "volume",
190                                       NULL);
191         if (volume == NULL) {
192                 volume = service;
193         }
194
195         fs = glfs_find_preopened(volume, handle->conn->connectpath);
196         if (fs) {
197                 goto done;
198         }
199
200         fs = glfs_new(volume);
201         if (fs == NULL) {
202                 ret = -1;
203                 goto done;
204         }
205
206         ret = glfs_set_volfile_server(fs, "tcp", volfile_server, 0);
207         if (ret < 0) {
208                 DEBUG(0, ("Failed to set volfile_server %s\n", volfile_server));
209                 goto done;
210         }
211
212         ret = glfs_set_xlator_option(fs, "*-md-cache", "cache-posix-acl",
213                                      "true");
214         if (ret < 0) {
215                 DEBUG(0, ("%s: Failed to set xlator options\n", volume));
216                 goto done;
217         }
218
219
220         ret = glfs_set_xlator_option(fs, "*-snapview-client",
221                                      "snapdir-entry-path",
222                                      handle->conn->connectpath);
223         if (ret < 0) {
224                 DEBUG(0, ("%s: Failed to set xlator option:"
225                           " snapdir-entry-path\n", volume));
226                 glfs_fini(fs);
227                 return -1;
228         }
229
230         ret = glfs_set_logging(fs, logfile, loglevel);
231         if (ret < 0) {
232                 DEBUG(0, ("%s: Failed to set logfile %s loglevel %d\n",
233                           volume, logfile, loglevel));
234                 goto done;
235         }
236
237         ret = glfs_init(fs);
238         if (ret < 0) {
239                 DEBUG(0, ("%s: Failed to initialize volume (%s)\n",
240                           volume, strerror(errno)));
241                 goto done;
242         }
243
244         ret = glfs_set_preopened(volume, handle->conn->connectpath, fs);
245         if (ret < 0) {
246                 DEBUG(0, ("%s: Failed to register volume (%s)\n",
247                           volume, strerror(errno)));
248                 goto done;
249         }
250 done:
251         talloc_free(tmp_ctx);
252         if (ret < 0) {
253                 if (fs)
254                         glfs_fini(fs);
255                 return -1;
256         } else {
257                 DEBUG(0, ("%s: Initialized volume from server %s\n",
258                          volume, volfile_server));
259                 handle->data = fs;
260                 return 0;
261         }
262 }
263
264 static void vfs_gluster_disconnect(struct vfs_handle_struct *handle)
265 {
266         glfs_t *fs = NULL;
267
268         fs = handle->data;
269
270         glfs_clear_preopened(fs);
271 }
272
273 static uint64_t vfs_gluster_disk_free(struct vfs_handle_struct *handle,
274                                       const char *path, uint64_t *bsize_p,
275                                       uint64_t *dfree_p, uint64_t *dsize_p)
276 {
277         struct statvfs statvfs = { 0, };
278         int ret;
279
280         ret = glfs_statvfs(handle->data, path, &statvfs);
281         if (ret < 0) {
282                 return -1;
283         }
284
285         if (bsize_p != NULL) {
286                 *bsize_p = (uint64_t)statvfs.f_bsize; /* Block size */
287         }
288         if (dfree_p != NULL) {
289                 *dfree_p = (uint64_t)statvfs.f_bavail; /* Available Block units */
290         }
291         if (dsize_p != NULL) {
292                 *dsize_p = (uint64_t)statvfs.f_blocks; /* Total Block units */
293         }
294
295         return (uint64_t)statvfs.f_bavail;
296 }
297
298 static int vfs_gluster_get_quota(struct vfs_handle_struct *handle,
299                                  const char *path,
300                                  enum SMB_QUOTA_TYPE qtype, unid_t id,
301                                  SMB_DISK_QUOTA *qt)
302 {
303         errno = ENOSYS;
304         return -1;
305 }
306
307 static int
308 vfs_gluster_set_quota(struct vfs_handle_struct *handle,
309                       enum SMB_QUOTA_TYPE qtype, unid_t id, SMB_DISK_QUOTA *qt)
310 {
311         errno = ENOSYS;
312         return -1;
313 }
314
315 static int vfs_gluster_statvfs(struct vfs_handle_struct *handle,
316                                const char *path,
317                                struct vfs_statvfs_struct *vfs_statvfs)
318 {
319         struct statvfs statvfs = { 0, };
320         int ret;
321
322         ret = glfs_statvfs(handle->data, path, &statvfs);
323         if (ret < 0) {
324                 DEBUG(0, ("glfs_statvfs(%s) failed: %s\n",
325                           path, strerror(errno)));
326                 return -1;
327         }
328
329         ZERO_STRUCTP(vfs_statvfs);
330
331         vfs_statvfs->OptimalTransferSize = statvfs.f_frsize;
332         vfs_statvfs->BlockSize = statvfs.f_bsize;
333         vfs_statvfs->TotalBlocks = statvfs.f_blocks;
334         vfs_statvfs->BlocksAvail = statvfs.f_bfree;
335         vfs_statvfs->UserBlocksAvail = statvfs.f_bavail;
336         vfs_statvfs->TotalFileNodes = statvfs.f_files;
337         vfs_statvfs->FreeFileNodes = statvfs.f_ffree;
338         vfs_statvfs->FsIdentifier = statvfs.f_fsid;
339         vfs_statvfs->FsCapabilities =
340             FILE_CASE_SENSITIVE_SEARCH | FILE_CASE_PRESERVED_NAMES;
341
342         return ret;
343 }
344
345 static uint32_t vfs_gluster_fs_capabilities(struct vfs_handle_struct *handle,
346                                             enum timestamp_set_resolution *p_ts_res)
347 {
348         uint32_t caps = FILE_CASE_SENSITIVE_SEARCH | FILE_CASE_PRESERVED_NAMES;
349
350 #ifdef STAT_HAVE_NSEC
351         *p_ts_res = TIMESTAMP_SET_NT_OR_BETTER;
352 #endif
353
354         return caps;
355 }
356
357 static DIR *vfs_gluster_opendir(struct vfs_handle_struct *handle,
358                                 const char *path, const char *mask,
359                                 uint32_t attributes)
360 {
361         glfs_fd_t *fd;
362
363         fd = glfs_opendir(handle->data, path);
364         if (fd == NULL) {
365                 DEBUG(0, ("glfs_opendir(%s) failed: %s\n",
366                           path, strerror(errno)));
367         }
368
369         return (DIR *) fd;
370 }
371
372 static DIR *vfs_gluster_fdopendir(struct vfs_handle_struct *handle,
373                                   files_struct *fsp, const char *mask,
374                                   uint32_t attributes)
375 {
376         return (DIR *) *(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp);
377 }
378
379 static int vfs_gluster_closedir(struct vfs_handle_struct *handle, DIR *dirp)
380 {
381         return glfs_closedir((void *)dirp);
382 }
383
384 static struct dirent *vfs_gluster_readdir(struct vfs_handle_struct *handle,
385                                           DIR *dirp, SMB_STRUCT_STAT *sbuf)
386 {
387         static char direntbuf[512];
388         int ret;
389         struct stat stat;
390         struct dirent *dirent = 0;
391
392         if (sbuf != NULL) {
393                 ret = glfs_readdirplus_r((void *)dirp, &stat, (void *)direntbuf,
394                                          &dirent);
395         } else {
396                 ret = glfs_readdir_r((void *)dirp, (void *)direntbuf, &dirent);
397         }
398
399         if ((ret < 0) || (dirent == NULL)) {
400                 return NULL;
401         }
402
403         if (sbuf != NULL) {
404                 smb_stat_ex_from_stat(sbuf, &stat);
405         }
406
407         return dirent;
408 }
409
410 static long vfs_gluster_telldir(struct vfs_handle_struct *handle, DIR *dirp)
411 {
412         return glfs_telldir((void *)dirp);
413 }
414
415 static void vfs_gluster_seekdir(struct vfs_handle_struct *handle, DIR *dirp,
416                                 long offset)
417 {
418         glfs_seekdir((void *)dirp, offset);
419 }
420
421 static void vfs_gluster_rewinddir(struct vfs_handle_struct *handle, DIR *dirp)
422 {
423         glfs_seekdir((void *)dirp, 0);
424 }
425
426 static void vfs_gluster_init_search_op(struct vfs_handle_struct *handle,
427                                        DIR *dirp)
428 {
429         return;
430 }
431
432 static int vfs_gluster_mkdir(struct vfs_handle_struct *handle,
433                              const struct smb_filename *smb_fname,
434                              mode_t mode)
435 {
436         return glfs_mkdir(handle->data, smb_fname->base_name, mode);
437 }
438
439 static int vfs_gluster_rmdir(struct vfs_handle_struct *handle,
440                         const struct smb_filename *smb_fname)
441 {
442         return glfs_rmdir(handle->data, smb_fname->base_name);
443 }
444
445 static int vfs_gluster_open(struct vfs_handle_struct *handle,
446                             struct smb_filename *smb_fname, files_struct *fsp,
447                             int flags, mode_t mode)
448 {
449         glfs_fd_t *glfd;
450         glfs_fd_t **p_tmp;
451
452         if (flags & O_DIRECTORY) {
453                 glfd = glfs_opendir(handle->data, smb_fname->base_name);
454         } else if (flags & O_CREAT) {
455                 glfd = glfs_creat(handle->data, smb_fname->base_name, flags,
456                                   mode);
457         } else {
458                 glfd = glfs_open(handle->data, smb_fname->base_name, flags);
459         }
460
461         if (glfd == NULL) {
462                 return -1;
463         }
464         p_tmp = (glfs_fd_t **)VFS_ADD_FSP_EXTENSION(handle, fsp,
465                                                           glfs_fd_t *, NULL);
466         *p_tmp = glfd;
467         /* An arbitrary value for error reporting, so you know its us. */
468         return 13371337;
469 }
470
471 static int vfs_gluster_close(struct vfs_handle_struct *handle,
472                              files_struct *fsp)
473 {
474         glfs_fd_t *glfd;
475         glfd = *(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp);
476         VFS_REMOVE_FSP_EXTENSION(handle, fsp);
477         return glfs_close(glfd);
478 }
479
480 static ssize_t vfs_gluster_read(struct vfs_handle_struct *handle,
481                                 files_struct *fsp, void *data, size_t n)
482 {
483         return glfs_read(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), data, n, 0);
484 }
485
486 static ssize_t vfs_gluster_pread(struct vfs_handle_struct *handle,
487                                  files_struct *fsp, void *data, size_t n,
488                                  off_t offset)
489 {
490         return glfs_pread(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), data, n, offset, 0);
491 }
492
493 struct glusterfs_aio_state;
494
495 struct glusterfs_aio_wrapper {
496         struct glusterfs_aio_state *state;
497 };
498
499 struct glusterfs_aio_state {
500         ssize_t ret;
501         struct tevent_req *req;
502         bool cancelled;
503         struct vfs_aio_state vfs_aio_state;
504         struct timespec start;
505 };
506
507 static int aio_wrapper_destructor(struct glusterfs_aio_wrapper *wrap)
508 {
509         wrap->state->cancelled = true;
510
511         return 0;
512 }
513
514 /*
515  * This function is the callback that will be called on glusterfs
516  * threads once the async IO submitted is complete. To notify
517  * Samba of the completion we use a pipe based queue.
518  */
519 static void aio_glusterfs_done(glfs_fd_t *fd, ssize_t ret, void *data)
520 {
521         struct glusterfs_aio_state *state = NULL;
522         int sts = 0;
523         struct timespec end;
524
525         state = (struct glusterfs_aio_state *)data;
526
527         clock_gettime_mono(&end);
528
529         if (ret < 0) {
530                 state->ret = -1;
531                 state->vfs_aio_state.error = errno;
532         } else {
533                 state->ret = ret;
534         }
535         state->vfs_aio_state.duration = nsec_time_diff(&end, &state->start);
536
537         /*
538          * Write the state pointer to glusterfs_aio_state to the
539          * pipe, so we can call tevent_req_done() from the main thread,
540          * because tevent_req_done() is not designed to be executed in
541          * the multithread environment, so tevent_req_done() must be
542          * executed from the smbd main thread.
543          *
544          * write(2) on pipes with sizes under _POSIX_PIPE_BUF
545          * in size is atomic, without this, the use op pipes in this
546          * code would not work.
547          *
548          * sys_write is a thin enough wrapper around write(2)
549          * that we can trust it here.
550          */
551
552         sts = sys_write(write_fd, &state, sizeof(struct glusterfs_aio_state *));
553         if (sts < 0) {
554                 DEBUG(0,("\nWrite to pipe failed (%s)", strerror(errno)));
555         }
556
557         return;
558 }
559
560 /*
561  * Read each req off the pipe and process it.
562  */
563 static void aio_tevent_fd_done(struct tevent_context *event_ctx,
564                                 struct tevent_fd *fde,
565                                 uint16_t flags, void *data)
566 {
567         struct tevent_req *req = NULL;
568         struct glusterfs_aio_state *state = NULL;
569         int sts = 0;
570
571         /*
572          * read(2) on pipes is atomic if the needed data is available
573          * in the pipe, per SUS and POSIX.  Because we always write
574          * to the pipe in sizeof(struct tevent_req *) chunks, we can
575          * always read in those chunks, atomically.
576          *
577          * sys_read is a thin enough wrapper around read(2) that we
578          * can trust it here.
579          */
580
581         sts = sys_read(read_fd, &state, sizeof(struct glusterfs_aio_state *));
582
583         if (sts < 0) {
584                 DEBUG(0,("\nRead from pipe failed (%s)", strerror(errno)));
585         }
586
587         /* if we've cancelled the op, there is no req, so just clean up. */
588         if (state->cancelled == true) {
589                 TALLOC_FREE(state);
590                 return;
591         }
592
593         req = state->req;
594
595         if (req) {
596                 tevent_req_done(req);
597         }
598         return;
599 }
600
601 static bool init_gluster_aio(struct vfs_handle_struct *handle)
602 {
603         int fds[2];
604         int ret = -1;
605
606         if (read_fd != -1) {
607                 /*
608                  * Already initialized.
609                  */
610                 return true;
611         }
612
613         ret = pipe(fds);
614         if (ret == -1) {
615                 goto fail;
616         }
617
618         read_fd = fds[0];
619         write_fd = fds[1];
620
621         aio_read_event = tevent_add_fd(handle->conn->sconn->ev_ctx,
622                                         NULL,
623                                         read_fd,
624                                         TEVENT_FD_READ,
625                                         aio_tevent_fd_done,
626                                         NULL);
627         if (aio_read_event == NULL) {
628                 goto fail;
629         }
630
631         return true;
632 fail:
633         TALLOC_FREE(aio_read_event);
634         if (read_fd != -1) {
635                 close(read_fd);
636                 close(write_fd);
637                 read_fd = -1;
638                 write_fd = -1;
639         }
640         return false;
641 }
642
643 static struct glusterfs_aio_state *aio_state_create(TALLOC_CTX *mem_ctx)
644 {
645         struct tevent_req *req = NULL;
646         struct glusterfs_aio_state *state = NULL;
647         struct glusterfs_aio_wrapper *wrapper = NULL;
648
649         req = tevent_req_create(mem_ctx, &wrapper, struct glusterfs_aio_wrapper);
650
651         if (req == NULL) {
652                 return NULL;
653         }
654
655         state = talloc_zero(NULL, struct glusterfs_aio_state);
656
657         if (state == NULL) {
658                 TALLOC_FREE(req);
659                 return NULL;
660         }
661
662         talloc_set_destructor(wrapper, aio_wrapper_destructor);
663         state->cancelled = false;
664         state->req = req;
665
666         wrapper->state = state;
667
668         return state;
669 }
670
671 static struct tevent_req *vfs_gluster_pread_send(struct vfs_handle_struct
672                                                   *handle, TALLOC_CTX *mem_ctx,
673                                                   struct tevent_context *ev,
674                                                   files_struct *fsp,
675                                                   void *data, size_t n,
676                                                   off_t offset)
677 {
678         struct glusterfs_aio_state *state = NULL;
679         struct tevent_req *req = NULL;
680         int ret = 0;
681
682         state = aio_state_create(mem_ctx);
683
684         if (state == NULL) {
685                 return NULL;
686         }
687
688         req = state->req;
689
690         if (!init_gluster_aio(handle)) {
691                 tevent_req_error(req, EIO);
692                 return tevent_req_post(req, ev);
693         }
694
695         clock_gettime_mono(&state->start);
696         ret = glfs_pread_async(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle,
697                                 fsp), data, n, offset, 0, aio_glusterfs_done,
698                                 state);
699         if (ret < 0) {
700                 tevent_req_error(req, -ret);
701                 return tevent_req_post(req, ev);
702         }
703
704         return req;
705 }
706
707 static struct tevent_req *vfs_gluster_pwrite_send(struct vfs_handle_struct
708                                                   *handle, TALLOC_CTX *mem_ctx,
709                                                   struct tevent_context *ev,
710                                                   files_struct *fsp,
711                                                   const void *data, size_t n,
712                                                   off_t offset)
713 {
714         struct glusterfs_aio_state *state = NULL;
715         struct tevent_req *req = NULL;
716         int ret = 0;
717
718         state = aio_state_create(mem_ctx);
719
720         if (state == NULL) {
721                 return NULL;
722         }
723
724         req = state->req;
725
726         if (!init_gluster_aio(handle)) {
727                 tevent_req_error(req, EIO);
728                 return tevent_req_post(req, ev);
729         }
730
731         clock_gettime_mono(&state->start);
732         ret = glfs_pwrite_async(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle,
733                                 fsp), data, n, offset, 0, aio_glusterfs_done,
734                                 state);
735         if (ret < 0) {
736                 tevent_req_error(req, -ret);
737                 return tevent_req_post(req, ev);
738         }
739
740         return req;
741 }
742
743 static ssize_t vfs_gluster_recv(struct tevent_req *req,
744                                 struct vfs_aio_state *vfs_aio_state)
745 {
746         struct glusterfs_aio_state *state = NULL;
747         struct glusterfs_aio_wrapper *wrapper = NULL;
748         int ret = 0;
749
750         wrapper = tevent_req_data(req, struct glusterfs_aio_wrapper);
751
752         if (wrapper == NULL) {
753                 return -1;
754         }
755
756         state = wrapper->state;
757
758         if (state == NULL) {
759                 return -1;
760         }
761
762         if (tevent_req_is_unix_error(req, &vfs_aio_state->error)) {
763                 return -1;
764         }
765
766         *vfs_aio_state = state->vfs_aio_state;
767         ret = state->ret;
768
769         /* Clean up the state, it is in a NULL context. */
770
771         TALLOC_FREE(state);
772
773         return ret;
774 }
775
776 static ssize_t vfs_gluster_write(struct vfs_handle_struct *handle,
777                                  files_struct *fsp, const void *data, size_t n)
778 {
779         return glfs_write(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), data, n, 0);
780 }
781
782 static ssize_t vfs_gluster_pwrite(struct vfs_handle_struct *handle,
783                                   files_struct *fsp, const void *data,
784                                   size_t n, off_t offset)
785 {
786         return glfs_pwrite(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), data, n, offset, 0);
787 }
788
789 static off_t vfs_gluster_lseek(struct vfs_handle_struct *handle,
790                                files_struct *fsp, off_t offset, int whence)
791 {
792         return glfs_lseek(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), offset, whence);
793 }
794
795 static ssize_t vfs_gluster_sendfile(struct vfs_handle_struct *handle, int tofd,
796                                     files_struct *fromfsp,
797                                     const DATA_BLOB *hdr,
798                                     off_t offset, size_t n)
799 {
800         errno = ENOTSUP;
801         return -1;
802 }
803
804 static ssize_t vfs_gluster_recvfile(struct vfs_handle_struct *handle,
805                                     int fromfd, files_struct *tofsp,
806                                     off_t offset, size_t n)
807 {
808         errno = ENOTSUP;
809         return -1;
810 }
811
812 static int vfs_gluster_rename(struct vfs_handle_struct *handle,
813                               const struct smb_filename *smb_fname_src,
814                               const struct smb_filename *smb_fname_dst)
815 {
816         return glfs_rename(handle->data, smb_fname_src->base_name,
817                            smb_fname_dst->base_name);
818 }
819
820 static int vfs_gluster_fsync(struct vfs_handle_struct *handle,
821                              files_struct *fsp)
822 {
823         return glfs_fsync(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp));
824 }
825
826 static struct tevent_req *vfs_gluster_fsync_send(struct vfs_handle_struct
827                                                  *handle, TALLOC_CTX *mem_ctx,
828                                                  struct tevent_context *ev,
829                                                  files_struct *fsp)
830 {
831         struct tevent_req *req = NULL;
832         struct glusterfs_aio_state *state = NULL;
833         int ret = 0;
834
835         state = aio_state_create(mem_ctx);
836
837         if (state == NULL) {
838                 return NULL;
839         }
840
841         req = state->req;
842
843         if (!init_gluster_aio(handle)) {
844                 tevent_req_error(req, EIO);
845                 return tevent_req_post(req, ev);
846         }
847
848         clock_gettime_mono(&state->start);
849         ret = glfs_fsync_async(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle,
850                                 fsp), aio_glusterfs_done, req);
851         if (ret < 0) {
852                 tevent_req_error(req, -ret);
853                 return tevent_req_post(req, ev);
854         }
855         return req;
856 }
857
858 static int vfs_gluster_fsync_recv(struct tevent_req *req,
859                                   struct vfs_aio_state *vfs_aio_state)
860 {
861         /*
862          * Use implicit conversion ssize_t->int
863          */
864         return vfs_gluster_recv(req, vfs_aio_state);
865 }
866
867 static int vfs_gluster_stat(struct vfs_handle_struct *handle,
868                             struct smb_filename *smb_fname)
869 {
870         struct stat st;
871         int ret;
872
873         ret = glfs_stat(handle->data, smb_fname->base_name, &st);
874         if (ret == 0) {
875                 smb_stat_ex_from_stat(&smb_fname->st, &st);
876         }
877         if (ret < 0 && errno != ENOENT) {
878                 DEBUG(0, ("glfs_stat(%s) failed: %s\n",
879                           smb_fname->base_name, strerror(errno)));
880         }
881         return ret;
882 }
883
884 static int vfs_gluster_fstat(struct vfs_handle_struct *handle,
885                              files_struct *fsp, SMB_STRUCT_STAT *sbuf)
886 {
887         struct stat st;
888         int ret;
889
890         ret = glfs_fstat(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), &st);
891         if (ret == 0) {
892                 smb_stat_ex_from_stat(sbuf, &st);
893         }
894         if (ret < 0) {
895                 DEBUG(0, ("glfs_fstat(%d) failed: %s\n",
896                           fsp->fh->fd, strerror(errno)));
897         }
898         return ret;
899 }
900
901 static int vfs_gluster_lstat(struct vfs_handle_struct *handle,
902                              struct smb_filename *smb_fname)
903 {
904         struct stat st;
905         int ret;
906
907         ret = glfs_lstat(handle->data, smb_fname->base_name, &st);
908         if (ret == 0) {
909                 smb_stat_ex_from_stat(&smb_fname->st, &st);
910         }
911         if (ret < 0 && errno != ENOENT) {
912                 DEBUG(0, ("glfs_lstat(%s) failed: %s\n",
913                           smb_fname->base_name, strerror(errno)));
914         }
915         return ret;
916 }
917
918 static uint64_t vfs_gluster_get_alloc_size(struct vfs_handle_struct *handle,
919                                            files_struct *fsp,
920                                            const SMB_STRUCT_STAT *sbuf)
921 {
922         return sbuf->st_ex_blocks * 512;
923 }
924
925 static int vfs_gluster_unlink(struct vfs_handle_struct *handle,
926                               const struct smb_filename *smb_fname)
927 {
928         return glfs_unlink(handle->data, smb_fname->base_name);
929 }
930
931 static int vfs_gluster_chmod(struct vfs_handle_struct *handle,
932                              const char *path, mode_t mode)
933 {
934         return glfs_chmod(handle->data, path, mode);
935 }
936
937 static int vfs_gluster_fchmod(struct vfs_handle_struct *handle,
938                               files_struct *fsp, mode_t mode)
939 {
940         return glfs_fchmod(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), mode);
941 }
942
943 static int vfs_gluster_chown(struct vfs_handle_struct *handle,
944                              const char *path, uid_t uid, gid_t gid)
945 {
946         return glfs_chown(handle->data, path, uid, gid);
947 }
948
949 static int vfs_gluster_fchown(struct vfs_handle_struct *handle,
950                               files_struct *fsp, uid_t uid, gid_t gid)
951 {
952         return glfs_fchown(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), uid, gid);
953 }
954
955 static int vfs_gluster_lchown(struct vfs_handle_struct *handle,
956                               const char *path, uid_t uid, gid_t gid)
957 {
958         return glfs_lchown(handle->data, path, uid, gid);
959 }
960
961 static int vfs_gluster_chdir(struct vfs_handle_struct *handle, const char *path)
962 {
963         return glfs_chdir(handle->data, path);
964 }
965
966 static char *vfs_gluster_getwd(struct vfs_handle_struct *handle)
967 {
968         char *cwd;
969         char *ret;
970
971         cwd = SMB_CALLOC_ARRAY(char, PATH_MAX);
972         if (cwd == NULL) {
973                 return NULL;
974         }
975
976         ret = glfs_getcwd(handle->data, cwd, PATH_MAX - 1);
977         if (ret == 0) {
978                 free(cwd);
979         }
980         return ret;
981 }
982
983 static int vfs_gluster_ntimes(struct vfs_handle_struct *handle,
984                               const struct smb_filename *smb_fname,
985                               struct smb_file_time *ft)
986 {
987         struct timespec times[2];
988
989         if (null_timespec(ft->atime)) {
990                 times[0].tv_sec = smb_fname->st.st_ex_atime.tv_sec;
991                 times[0].tv_nsec = smb_fname->st.st_ex_atime.tv_nsec;
992         } else {
993                 times[0].tv_sec = ft->atime.tv_sec;
994                 times[0].tv_nsec = ft->atime.tv_nsec;
995         }
996
997         if (null_timespec(ft->mtime)) {
998                 times[1].tv_sec = smb_fname->st.st_ex_mtime.tv_sec;
999                 times[1].tv_nsec = smb_fname->st.st_ex_mtime.tv_nsec;
1000         } else {
1001                 times[1].tv_sec = ft->mtime.tv_sec;
1002                 times[1].tv_nsec = ft->mtime.tv_nsec;
1003         }
1004
1005         if ((timespec_compare(&times[0],
1006                               &smb_fname->st.st_ex_atime) == 0) &&
1007             (timespec_compare(&times[1],
1008                               &smb_fname->st.st_ex_mtime) == 0)) {
1009                 return 0;
1010         }
1011
1012         return glfs_utimens(handle->data, smb_fname->base_name, times);
1013 }
1014
1015 static int vfs_gluster_ftruncate(struct vfs_handle_struct *handle,
1016                                  files_struct *fsp, off_t offset)
1017 {
1018         return glfs_ftruncate(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), offset);
1019 }
1020
1021 static int vfs_gluster_fallocate(struct vfs_handle_struct *handle,
1022                                  struct files_struct *fsp,
1023                                  uint32_t mode,
1024                                  off_t offset, off_t len)
1025 {
1026         /* TODO: add support using glfs_fallocate() and glfs_zerofill() */
1027         errno = ENOTSUP;
1028         return -1;
1029 }
1030
1031 static char *vfs_gluster_realpath(struct vfs_handle_struct *handle,
1032                                   const char *path)
1033 {
1034         return glfs_realpath(handle->data, path, 0);
1035 }
1036
1037 static bool vfs_gluster_lock(struct vfs_handle_struct *handle,
1038                              files_struct *fsp, int op, off_t offset,
1039                              off_t count, int type)
1040 {
1041         struct flock flock = { 0, };
1042         int ret;
1043
1044         flock.l_type = type;
1045         flock.l_whence = SEEK_SET;
1046         flock.l_start = offset;
1047         flock.l_len = count;
1048         flock.l_pid = 0;
1049
1050         ret = glfs_posix_lock(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), op, &flock);
1051
1052         if (op == F_GETLK) {
1053                 /* lock query, true if someone else has locked */
1054                 if ((ret != -1) &&
1055                     (flock.l_type != F_UNLCK) &&
1056                     (flock.l_pid != 0) && (flock.l_pid != getpid()))
1057                         return true;
1058                 /* not me */
1059                 return false;
1060         }
1061
1062         if (ret == -1) {
1063                 return false;
1064         }
1065
1066         return true;
1067 }
1068
1069 static int vfs_gluster_kernel_flock(struct vfs_handle_struct *handle,
1070                                     files_struct *fsp, uint32_t share_mode,
1071                                     uint32_t access_mask)
1072 {
1073         errno = ENOSYS;
1074         return -1;
1075 }
1076
1077 static int vfs_gluster_linux_setlease(struct vfs_handle_struct *handle,
1078                                       files_struct *fsp, int leasetype)
1079 {
1080         errno = ENOSYS;
1081         return -1;
1082 }
1083
1084 static bool vfs_gluster_getlock(struct vfs_handle_struct *handle,
1085                                 files_struct *fsp, off_t *poffset,
1086                                 off_t *pcount, int *ptype, pid_t *ppid)
1087 {
1088         struct flock flock = { 0, };
1089         int ret;
1090
1091         flock.l_type = *ptype;
1092         flock.l_whence = SEEK_SET;
1093         flock.l_start = *poffset;
1094         flock.l_len = *pcount;
1095         flock.l_pid = 0;
1096
1097         ret = glfs_posix_lock(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), F_GETLK, &flock);
1098
1099         if (ret == -1) {
1100                 return false;
1101         }
1102
1103         *ptype = flock.l_type;
1104         *poffset = flock.l_start;
1105         *pcount = flock.l_len;
1106         *ppid = flock.l_pid;
1107
1108         return true;
1109 }
1110
1111 static int vfs_gluster_symlink(struct vfs_handle_struct *handle,
1112                                const char *oldpath, const char *newpath)
1113 {
1114         return glfs_symlink(handle->data, oldpath, newpath);
1115 }
1116
1117 static int vfs_gluster_readlink(struct vfs_handle_struct *handle,
1118                                 const char *path, char *buf, size_t bufsiz)
1119 {
1120         return glfs_readlink(handle->data, path, buf, bufsiz);
1121 }
1122
1123 static int vfs_gluster_link(struct vfs_handle_struct *handle,
1124                             const char *oldpath, const char *newpath)
1125 {
1126         return glfs_link(handle->data, oldpath, newpath);
1127 }
1128
1129 static int vfs_gluster_mknod(struct vfs_handle_struct *handle, const char *path,
1130                              mode_t mode, SMB_DEV_T dev)
1131 {
1132         return glfs_mknod(handle->data, path, mode, dev);
1133 }
1134
1135 static int vfs_gluster_chflags(struct vfs_handle_struct *handle,
1136                                const char *path, unsigned int flags)
1137 {
1138         errno = ENOSYS;
1139         return -1;
1140 }
1141
1142 static int vfs_gluster_get_real_filename(struct vfs_handle_struct *handle,
1143                                          const char *path, const char *name,
1144                                          TALLOC_CTX *mem_ctx, char **found_name)
1145 {
1146         int ret;
1147         char key_buf[NAME_MAX + 64];
1148         char val_buf[NAME_MAX + 1];
1149
1150         if (strlen(name) >= NAME_MAX) {
1151                 errno = ENAMETOOLONG;
1152                 return -1;
1153         }
1154
1155         snprintf(key_buf, NAME_MAX + 64,
1156                  "glusterfs.get_real_filename:%s", name);
1157
1158         ret = glfs_getxattr(handle->data, path, key_buf, val_buf, NAME_MAX + 1);
1159         if (ret == -1) {
1160                 if (errno == ENODATA) {
1161                         errno = EOPNOTSUPP;
1162                 }
1163                 return -1;
1164         }
1165
1166         *found_name = talloc_strdup(mem_ctx, val_buf);
1167         if (found_name[0] == NULL) {
1168                 errno = ENOMEM;
1169                 return -1;
1170         }
1171         return 0;
1172 }
1173
1174 static const char *vfs_gluster_connectpath(struct vfs_handle_struct *handle,
1175                                            const char *filename)
1176 {
1177         return handle->conn->connectpath;
1178 }
1179
1180 /* EA Operations */
1181
1182 static ssize_t vfs_gluster_getxattr(struct vfs_handle_struct *handle,
1183                                     const char *path, const char *name,
1184                                     void *value, size_t size)
1185 {
1186         return glfs_getxattr(handle->data, path, name, value, size);
1187 }
1188
1189 static ssize_t vfs_gluster_fgetxattr(struct vfs_handle_struct *handle,
1190                                      files_struct *fsp, const char *name,
1191                                      void *value, size_t size)
1192 {
1193         return glfs_fgetxattr(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), name, value, size);
1194 }
1195
1196 static ssize_t vfs_gluster_listxattr(struct vfs_handle_struct *handle,
1197                                      const char *path, char *list, size_t size)
1198 {
1199         return glfs_listxattr(handle->data, path, list, size);
1200 }
1201
1202 static ssize_t vfs_gluster_flistxattr(struct vfs_handle_struct *handle,
1203                                       files_struct *fsp, char *list,
1204                                       size_t size)
1205 {
1206         return glfs_flistxattr(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), list, size);
1207 }
1208
1209 static int vfs_gluster_removexattr(struct vfs_handle_struct *handle,
1210                                    const char *path, const char *name)
1211 {
1212         return glfs_removexattr(handle->data, path, name);
1213 }
1214
1215 static int vfs_gluster_fremovexattr(struct vfs_handle_struct *handle,
1216                                     files_struct *fsp, const char *name)
1217 {
1218         return glfs_fremovexattr(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), name);
1219 }
1220
1221 static int vfs_gluster_setxattr(struct vfs_handle_struct *handle,
1222                                 const char *path, const char *name,
1223                                 const void *value, size_t size, int flags)
1224 {
1225         return glfs_setxattr(handle->data, path, name, value, size, flags);
1226 }
1227
1228 static int vfs_gluster_fsetxattr(struct vfs_handle_struct *handle,
1229                                  files_struct *fsp, const char *name,
1230                                  const void *value, size_t size, int flags)
1231 {
1232         return glfs_fsetxattr(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp), name, value, size,
1233                               flags);
1234 }
1235
1236 /* AIO Operations */
1237
1238 static bool vfs_gluster_aio_force(struct vfs_handle_struct *handle,
1239                                   files_struct *fsp)
1240 {
1241         return false;
1242 }
1243
1244 /* Offline Operations */
1245
1246 static bool vfs_gluster_is_offline(struct vfs_handle_struct *handle,
1247                                    const struct smb_filename *fname,
1248                                    SMB_STRUCT_STAT *sbuf)
1249 {
1250         return false;
1251 }
1252
1253 static int vfs_gluster_set_offline(struct vfs_handle_struct *handle,
1254                                    const struct smb_filename *fname)
1255 {
1256         errno = ENOTSUP;
1257         return -1;
1258 }
1259
1260 /*
1261   Gluster ACL Format:
1262
1263   Size = 4 (header) + N * 8 (entry)
1264
1265   Offset  Size    Field (Little Endian)
1266   -------------------------------------
1267   0-3     4-byte  Version
1268
1269   4-5     2-byte  Entry-1 tag
1270   6-7     2-byte  Entry-1 perm
1271   8-11    4-byte  Entry-1 id
1272
1273   12-13   2-byte  Entry-2 tag
1274   14-15   2-byte  Entry-2 perm
1275   16-19   4-byte  Entry-2 id
1276
1277   ...
1278
1279  */
1280
1281 /* header version */
1282 #define GLUSTER_ACL_VERSION 2
1283
1284 /* perm bits */
1285 #define GLUSTER_ACL_READ    0x04
1286 #define GLUSTER_ACL_WRITE   0x02
1287 #define GLUSTER_ACL_EXECUTE 0x01
1288
1289 /* tag values */
1290 #define GLUSTER_ACL_UNDEFINED_TAG  0x00
1291 #define GLUSTER_ACL_USER_OBJ       0x01
1292 #define GLUSTER_ACL_USER           0x02
1293 #define GLUSTER_ACL_GROUP_OBJ      0x04
1294 #define GLUSTER_ACL_GROUP          0x08
1295 #define GLUSTER_ACL_MASK           0x10
1296 #define GLUSTER_ACL_OTHER          0x20
1297
1298 #define GLUSTER_ACL_UNDEFINED_ID  (-1)
1299
1300 #define GLUSTER_ACL_HEADER_SIZE    4
1301 #define GLUSTER_ACL_ENTRY_SIZE     8
1302
1303 #define GLUSTER_ACL_SIZE(n)       (GLUSTER_ACL_HEADER_SIZE + (n * GLUSTER_ACL_ENTRY_SIZE))
1304
1305 static SMB_ACL_T mode_to_smb_acls(const struct stat *mode, TALLOC_CTX *mem_ctx)
1306 {
1307         struct smb_acl_t *result;
1308         int count;
1309
1310         count = 3;
1311         result = sys_acl_init(mem_ctx);
1312         if (!result) {
1313                 errno = ENOMEM;
1314                 return NULL;
1315         }
1316
1317         result->acl = talloc_array(result, struct smb_acl_entry, count);
1318         if (!result->acl) {
1319                 errno = ENOMEM;
1320                 talloc_free(result);
1321                 return NULL;
1322         }
1323
1324         result->count = count;
1325
1326         result->acl[0].a_type = SMB_ACL_USER_OBJ;
1327         result->acl[0].a_perm = (mode->st_mode & S_IRWXU) >> 6;;
1328
1329         result->acl[1].a_type = SMB_ACL_GROUP_OBJ;
1330         result->acl[1].a_perm = (mode->st_mode & S_IRWXG) >> 3;;
1331
1332         result->acl[2].a_type = SMB_ACL_OTHER;
1333         result->acl[2].a_perm = mode->st_mode & S_IRWXO;;
1334
1335         return result;
1336 }
1337
1338 static SMB_ACL_T gluster_to_smb_acl(const char *buf, size_t xattr_size,
1339                                     TALLOC_CTX *mem_ctx)
1340 {
1341         int count;
1342         size_t size;
1343         struct smb_acl_entry *smb_ace;
1344         struct smb_acl_t *result;
1345         int i;
1346         int offset;
1347         uint16_t tag;
1348         uint16_t perm;
1349         uint32_t id;
1350
1351         size = xattr_size;
1352
1353         if (size < GLUSTER_ACL_HEADER_SIZE) {
1354                 /* ACL should be at least as big as the header (4 bytes) */
1355                 errno = EINVAL;
1356                 return NULL;
1357         }
1358
1359         size -= GLUSTER_ACL_HEADER_SIZE; /* size of header = 4 bytes */
1360
1361         if (size % GLUSTER_ACL_ENTRY_SIZE) {
1362                 /* Size of entries must strictly be a multiple of
1363                    size of an ACE (8 bytes)
1364                 */
1365                 errno = EINVAL;
1366                 return NULL;
1367         }
1368
1369         count = size / GLUSTER_ACL_ENTRY_SIZE;
1370
1371         /* Version is the first 4 bytes of the ACL */
1372         if (IVAL(buf, 0) != GLUSTER_ACL_VERSION) {
1373                 DEBUG(0, ("Unknown gluster ACL version: %d\n",
1374                           IVAL(buf, 0)));
1375                 return NULL;
1376         }
1377         offset = GLUSTER_ACL_HEADER_SIZE;
1378
1379         result = sys_acl_init(mem_ctx);
1380         if (!result) {
1381                 errno = ENOMEM;
1382                 return NULL;
1383         }
1384
1385         result->acl = talloc_array(result, struct smb_acl_entry, count);
1386         if (!result->acl) {
1387                 errno = ENOMEM;
1388                 talloc_free(result);
1389                 return NULL;
1390         }
1391
1392         result->count = count;
1393
1394         smb_ace = result->acl;
1395
1396         for (i = 0; i < count; i++) {
1397                 /* TAG is the first 2 bytes of an entry */
1398                 tag = SVAL(buf, offset);
1399                 offset += 2;
1400
1401                 /* PERM is the next 2 bytes of an entry */
1402                 perm = SVAL(buf, offset);
1403                 offset += 2;
1404
1405                 /* ID is the last 4 bytes of an entry */
1406                 id = IVAL(buf, offset);
1407                 offset += 4;
1408
1409                 switch(tag) {
1410                 case GLUSTER_ACL_USER:
1411                         smb_ace->a_type = SMB_ACL_USER;
1412                         break;
1413                 case GLUSTER_ACL_USER_OBJ:
1414                         smb_ace->a_type = SMB_ACL_USER_OBJ;
1415                         break;
1416                 case GLUSTER_ACL_GROUP:
1417                         smb_ace->a_type = SMB_ACL_GROUP;
1418                         break;
1419                 case GLUSTER_ACL_GROUP_OBJ:
1420                         smb_ace->a_type = SMB_ACL_GROUP_OBJ;
1421                         break;
1422                 case GLUSTER_ACL_OTHER:
1423                         smb_ace->a_type = SMB_ACL_OTHER;
1424                         break;
1425                 case GLUSTER_ACL_MASK:
1426                         smb_ace->a_type = SMB_ACL_MASK;
1427                         break;
1428                 default:
1429                         DEBUG(0, ("unknown tag type %d\n", (unsigned int) tag));
1430                         return NULL;
1431                 }
1432
1433
1434                 switch(smb_ace->a_type) {
1435                 case SMB_ACL_USER:
1436                         smb_ace->info.user.uid = id;
1437                         break;
1438                 case SMB_ACL_GROUP:
1439                         smb_ace->info.group.gid = id;
1440                         break;
1441                 default:
1442                         break;
1443                 }
1444
1445                 smb_ace->a_perm = 0;
1446                 smb_ace->a_perm |=
1447                         ((perm & GLUSTER_ACL_READ) ? SMB_ACL_READ : 0);
1448                 smb_ace->a_perm |=
1449                         ((perm & GLUSTER_ACL_WRITE) ? SMB_ACL_WRITE : 0);
1450                 smb_ace->a_perm |=
1451                         ((perm & GLUSTER_ACL_EXECUTE) ? SMB_ACL_EXECUTE : 0);
1452
1453                 smb_ace++;
1454         }
1455
1456         return result;
1457 }
1458
1459
1460 static int gluster_ace_cmp(const void *left, const void *right)
1461 {
1462         int ret = 0;
1463         uint16_t tag_left, tag_right;
1464         uint32_t id_left, id_right;
1465
1466         /*
1467           Sorting precedence:
1468
1469            - Smaller TAG values must be earlier.
1470
1471            - Within same TAG, smaller identifiers must be earlier, E.g:
1472              UID 0 entry must be earlier than UID 200
1473              GID 17 entry must be earlier than GID 19
1474         */
1475
1476         /* TAG is the first element in the entry */
1477         tag_left = SVAL(left, 0);
1478         tag_right = SVAL(right, 0);
1479
1480         ret = (tag_left - tag_right);
1481         if (!ret) {
1482                 /* ID is the third element in the entry, after two short
1483                    integers (tag and perm), i.e at offset 4.
1484                 */
1485                 id_left = IVAL(left, 4);
1486                 id_right = IVAL(right, 4);
1487                 ret = id_left - id_right;
1488         }
1489
1490         return ret;
1491 }
1492
1493
1494 static ssize_t smb_to_gluster_acl(SMB_ACL_T theacl, char *buf, size_t len)
1495 {
1496         ssize_t size;
1497         struct smb_acl_entry *smb_ace;
1498         int i;
1499         int count;
1500         uint16_t tag;
1501         uint16_t perm;
1502         uint32_t id;
1503         int offset;
1504
1505         count = theacl->count;
1506
1507         size = GLUSTER_ACL_HEADER_SIZE + (count * GLUSTER_ACL_ENTRY_SIZE);
1508         if (!buf) {
1509                 return size;
1510         }
1511
1512         if (len < size) {
1513                 errno = ERANGE;
1514                 return -1;
1515         }
1516
1517         smb_ace = theacl->acl;
1518
1519         /* Version is the first 4 bytes of the ACL */
1520         SIVAL(buf, 0, GLUSTER_ACL_VERSION);
1521         offset = GLUSTER_ACL_HEADER_SIZE;
1522
1523         for (i = 0; i < count; i++) {
1524                 /* Calculate tag */
1525                 switch(smb_ace->a_type) {
1526                 case SMB_ACL_USER:
1527                         tag = GLUSTER_ACL_USER;
1528                         break;
1529                 case SMB_ACL_USER_OBJ:
1530                         tag = GLUSTER_ACL_USER_OBJ;
1531                         break;
1532                 case SMB_ACL_GROUP:
1533                         tag = GLUSTER_ACL_GROUP;
1534                         break;
1535                 case SMB_ACL_GROUP_OBJ:
1536                         tag = GLUSTER_ACL_GROUP_OBJ;
1537                         break;
1538                 case SMB_ACL_OTHER:
1539                         tag = GLUSTER_ACL_OTHER;
1540                         break;
1541                 case SMB_ACL_MASK:
1542                         tag = GLUSTER_ACL_MASK;
1543                         break;
1544                 default:
1545                         DEBUG(0, ("Unknown tag value %d\n",
1546                                   smb_ace->a_type));
1547                         errno = EINVAL;
1548                         return -1;
1549                 }
1550
1551
1552                 /* Calculate id */
1553                 switch(smb_ace->a_type) {
1554                 case SMB_ACL_USER:
1555                         id = smb_ace->info.user.uid;
1556                         break;
1557                 case SMB_ACL_GROUP:
1558                         id = smb_ace->info.group.gid;
1559                         break;
1560                 default:
1561                         id = GLUSTER_ACL_UNDEFINED_ID;
1562                         break;
1563                 }
1564
1565                 /* Calculate perm */
1566                 perm = 0;
1567
1568                 perm |=
1569                         ((smb_ace->a_perm & SMB_ACL_READ) ? GLUSTER_ACL_READ : 0);
1570                 perm |=
1571                         ((smb_ace->a_perm & SMB_ACL_WRITE) ? GLUSTER_ACL_WRITE : 0);
1572                 perm |=
1573                         ((smb_ace->a_perm & SMB_ACL_EXECUTE) ? GLUSTER_ACL_EXECUTE : 0);
1574
1575
1576                 /* TAG is the first 2 bytes of an entry */
1577                 SSVAL(buf, offset, tag);
1578                 offset += 2;
1579
1580                 /* PERM is the next 2 bytes of an entry */
1581                 SSVAL(buf, offset, perm);
1582                 offset += 2;
1583
1584                 /* ID is the last 4 bytes of an entry */
1585                 SIVAL(buf, offset, id);
1586                 offset += 4;
1587
1588                 smb_ace++;
1589         }
1590
1591         /* Skip the header, sort @count number of 8-byte entries */
1592         qsort(buf+GLUSTER_ACL_HEADER_SIZE, count, GLUSTER_ACL_ENTRY_SIZE,
1593               gluster_ace_cmp);
1594
1595         return size;
1596 }
1597
1598
1599 static SMB_ACL_T vfs_gluster_sys_acl_get_file(struct vfs_handle_struct *handle,
1600                                               const char *path_p,
1601                                               SMB_ACL_TYPE_T type,
1602                                               TALLOC_CTX *mem_ctx)
1603 {
1604         struct smb_acl_t *result;
1605         struct stat st;
1606         char *buf;
1607         const char *key;
1608         ssize_t ret, size = GLUSTER_ACL_SIZE(20);
1609
1610         switch (type) {
1611         case SMB_ACL_TYPE_ACCESS:
1612                 key = "system.posix_acl_access";
1613                 break;
1614         case SMB_ACL_TYPE_DEFAULT:
1615                 key = "system.posix_acl_default";
1616                 break;
1617         default:
1618                 errno = EINVAL;
1619                 return NULL;
1620         }
1621
1622         buf = alloca(size);
1623         if (!buf) {
1624                 return NULL;
1625         }
1626
1627         ret = glfs_getxattr(handle->data, path_p, key, buf, size);
1628         if (ret == -1 && errno == ERANGE) {
1629                 ret = glfs_getxattr(handle->data, path_p, key, 0, 0);
1630                 if (ret > 0) {
1631                         buf = alloca(ret);
1632                         if (!buf) {
1633                                 return NULL;
1634                         }
1635                         ret = glfs_getxattr(handle->data, path_p, key, buf, ret);
1636                 }
1637         }
1638
1639         /* retrieving the ACL from the xattr has finally failed, do a
1640          * mode-to-acl mapping */
1641
1642         if (ret == -1 && errno == ENODATA) {
1643                 ret = glfs_stat(handle->data, path_p, &st);
1644                 if (ret == 0) {
1645                         result = mode_to_smb_acls(&st, mem_ctx);
1646                         return result;
1647                 }
1648         }
1649
1650         if (ret <= 0) {
1651                 return NULL;
1652         }
1653
1654         result = gluster_to_smb_acl(buf, ret, mem_ctx);
1655
1656         return result;
1657 }
1658
1659 static SMB_ACL_T vfs_gluster_sys_acl_get_fd(struct vfs_handle_struct *handle,
1660                                             struct files_struct *fsp,
1661                                             TALLOC_CTX *mem_ctx)
1662 {
1663         struct smb_acl_t *result;
1664         struct stat st;
1665         ssize_t ret, size = GLUSTER_ACL_SIZE(20);
1666         char *buf;
1667         glfs_fd_t *glfd;
1668
1669         glfd = *(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp);
1670
1671         buf = alloca(size);
1672         if (!buf) {
1673                 return NULL;
1674         }
1675
1676         ret = glfs_fgetxattr(glfd, "system.posix_acl_access", buf, size);
1677         if (ret == -1 && errno == ERANGE) {
1678                 ret = glfs_fgetxattr(glfd, "system.posix_acl_access", 0, 0);
1679                 if (ret > 0) {
1680                         buf = alloca(ret);
1681                         if (!buf) {
1682                                 return NULL;
1683                         }
1684                         ret = glfs_fgetxattr(glfd, "system.posix_acl_access",
1685                                              buf, ret);
1686                 }
1687         }
1688
1689         /* retrieving the ACL from the xattr has finally failed, do a
1690          * mode-to-acl mapping */
1691
1692         if (ret == -1 && errno == ENODATA) {
1693                 ret = glfs_fstat(glfd, &st);
1694                 if (ret == 0) {
1695                         result = mode_to_smb_acls(&st, mem_ctx);
1696                         return result;
1697                 }
1698         }
1699
1700         if (ret <= 0) {
1701                 return NULL;
1702         }
1703
1704         result = gluster_to_smb_acl(buf, ret, mem_ctx);
1705
1706         return result;
1707 }
1708
1709 static int vfs_gluster_sys_acl_set_file(struct vfs_handle_struct *handle,
1710                                         const char *name,
1711                                         SMB_ACL_TYPE_T acltype,
1712                                         SMB_ACL_T theacl)
1713 {
1714         int ret;
1715         const char *key;
1716         char *buf;
1717         ssize_t size;
1718
1719         switch (acltype) {
1720         case SMB_ACL_TYPE_ACCESS:
1721                 key = "system.posix_acl_access";
1722                 break;
1723         case SMB_ACL_TYPE_DEFAULT:
1724                 key = "system.posix_acl_default";
1725                 break;
1726         default:
1727                 errno = EINVAL;
1728                 return -1;
1729         }
1730
1731         size = smb_to_gluster_acl(theacl, 0, 0);
1732         buf = alloca(size);
1733
1734         size = smb_to_gluster_acl(theacl, buf, size);
1735         if (size == -1) {
1736                 return -1;
1737         }
1738
1739         ret = glfs_setxattr(handle->data, name, key, buf, size, 0);
1740
1741         return ret;
1742 }
1743
1744 static int vfs_gluster_sys_acl_set_fd(struct vfs_handle_struct *handle,
1745                                       struct files_struct *fsp,
1746                                       SMB_ACL_T theacl)
1747 {
1748         int ret;
1749         char *buf;
1750         ssize_t size;
1751
1752         size = smb_to_gluster_acl(theacl, 0, 0);
1753         buf = alloca(size);
1754
1755         size = smb_to_gluster_acl(theacl, buf, size);
1756         if (size == -1) {
1757                 return -1;
1758         }
1759
1760         ret = glfs_fsetxattr(*(glfs_fd_t **)VFS_FETCH_FSP_EXTENSION(handle, fsp),
1761                              "system.posix_acl_access", buf, size, 0);
1762         return ret;
1763 }
1764
1765 static int vfs_gluster_sys_acl_delete_def_file(struct vfs_handle_struct *handle,
1766                                                const char *path)
1767 {
1768         return glfs_removexattr(handle->data, path, "system.posix_acl_default");
1769 }
1770
1771 static struct vfs_fn_pointers glusterfs_fns = {
1772
1773         /* Disk Operations */
1774
1775         .connect_fn = vfs_gluster_connect,
1776         .disconnect_fn = vfs_gluster_disconnect,
1777         .disk_free_fn = vfs_gluster_disk_free,
1778         .get_quota_fn = vfs_gluster_get_quota,
1779         .set_quota_fn = vfs_gluster_set_quota,
1780         .statvfs_fn = vfs_gluster_statvfs,
1781         .fs_capabilities_fn = vfs_gluster_fs_capabilities,
1782
1783         .get_dfs_referrals_fn = NULL,
1784
1785         /* Directory Operations */
1786
1787         .opendir_fn = vfs_gluster_opendir,
1788         .fdopendir_fn = vfs_gluster_fdopendir,
1789         .readdir_fn = vfs_gluster_readdir,
1790         .seekdir_fn = vfs_gluster_seekdir,
1791         .telldir_fn = vfs_gluster_telldir,
1792         .rewind_dir_fn = vfs_gluster_rewinddir,
1793         .mkdir_fn = vfs_gluster_mkdir,
1794         .rmdir_fn = vfs_gluster_rmdir,
1795         .closedir_fn = vfs_gluster_closedir,
1796         .init_search_op_fn = vfs_gluster_init_search_op,
1797
1798         /* File Operations */
1799
1800         .open_fn = vfs_gluster_open,
1801         .create_file_fn = NULL,
1802         .close_fn = vfs_gluster_close,
1803         .read_fn = vfs_gluster_read,
1804         .pread_fn = vfs_gluster_pread,
1805         .pread_send_fn = vfs_gluster_pread_send,
1806         .pread_recv_fn = vfs_gluster_recv,
1807         .write_fn = vfs_gluster_write,
1808         .pwrite_fn = vfs_gluster_pwrite,
1809         .pwrite_send_fn = vfs_gluster_pwrite_send,
1810         .pwrite_recv_fn = vfs_gluster_recv,
1811         .lseek_fn = vfs_gluster_lseek,
1812         .sendfile_fn = vfs_gluster_sendfile,
1813         .recvfile_fn = vfs_gluster_recvfile,
1814         .rename_fn = vfs_gluster_rename,
1815         .fsync_fn = vfs_gluster_fsync,
1816         .fsync_send_fn = vfs_gluster_fsync_send,
1817         .fsync_recv_fn = vfs_gluster_fsync_recv,
1818
1819         .stat_fn = vfs_gluster_stat,
1820         .fstat_fn = vfs_gluster_fstat,
1821         .lstat_fn = vfs_gluster_lstat,
1822         .get_alloc_size_fn = vfs_gluster_get_alloc_size,
1823         .unlink_fn = vfs_gluster_unlink,
1824
1825         .chmod_fn = vfs_gluster_chmod,
1826         .fchmod_fn = vfs_gluster_fchmod,
1827         .chown_fn = vfs_gluster_chown,
1828         .fchown_fn = vfs_gluster_fchown,
1829         .lchown_fn = vfs_gluster_lchown,
1830         .chdir_fn = vfs_gluster_chdir,
1831         .getwd_fn = vfs_gluster_getwd,
1832         .ntimes_fn = vfs_gluster_ntimes,
1833         .ftruncate_fn = vfs_gluster_ftruncate,
1834         .fallocate_fn = vfs_gluster_fallocate,
1835         .lock_fn = vfs_gluster_lock,
1836         .kernel_flock_fn = vfs_gluster_kernel_flock,
1837         .linux_setlease_fn = vfs_gluster_linux_setlease,
1838         .getlock_fn = vfs_gluster_getlock,
1839         .symlink_fn = vfs_gluster_symlink,
1840         .readlink_fn = vfs_gluster_readlink,
1841         .link_fn = vfs_gluster_link,
1842         .mknod_fn = vfs_gluster_mknod,
1843         .realpath_fn = vfs_gluster_realpath,
1844         .chflags_fn = vfs_gluster_chflags,
1845         .file_id_create_fn = NULL,
1846         .copy_chunk_send_fn = NULL,
1847         .copy_chunk_recv_fn = NULL,
1848         .streaminfo_fn = NULL,
1849         .get_real_filename_fn = vfs_gluster_get_real_filename,
1850         .connectpath_fn = vfs_gluster_connectpath,
1851
1852         .brl_lock_windows_fn = NULL,
1853         .brl_unlock_windows_fn = NULL,
1854         .brl_cancel_windows_fn = NULL,
1855         .strict_lock_fn = NULL,
1856         .strict_unlock_fn = NULL,
1857         .translate_name_fn = NULL,
1858         .fsctl_fn = NULL,
1859
1860         /* NT ACL Operations */
1861         .fget_nt_acl_fn = NULL,
1862         .get_nt_acl_fn = NULL,
1863         .fset_nt_acl_fn = NULL,
1864         .audit_file_fn = NULL,
1865
1866         /* Posix ACL Operations */
1867         .chmod_acl_fn = NULL,   /* passthrough to default */
1868         .fchmod_acl_fn = NULL,  /* passthrough to default */
1869         .sys_acl_get_file_fn = vfs_gluster_sys_acl_get_file,
1870         .sys_acl_get_fd_fn = vfs_gluster_sys_acl_get_fd,
1871         .sys_acl_blob_get_file_fn = posix_sys_acl_blob_get_file,
1872         .sys_acl_blob_get_fd_fn = posix_sys_acl_blob_get_fd,
1873         .sys_acl_set_file_fn = vfs_gluster_sys_acl_set_file,
1874         .sys_acl_set_fd_fn = vfs_gluster_sys_acl_set_fd,
1875         .sys_acl_delete_def_file_fn = vfs_gluster_sys_acl_delete_def_file,
1876
1877         /* EA Operations */
1878         .getxattr_fn = vfs_gluster_getxattr,
1879         .fgetxattr_fn = vfs_gluster_fgetxattr,
1880         .listxattr_fn = vfs_gluster_listxattr,
1881         .flistxattr_fn = vfs_gluster_flistxattr,
1882         .removexattr_fn = vfs_gluster_removexattr,
1883         .fremovexattr_fn = vfs_gluster_fremovexattr,
1884         .setxattr_fn = vfs_gluster_setxattr,
1885         .fsetxattr_fn = vfs_gluster_fsetxattr,
1886
1887         /* AIO Operations */
1888         .aio_force_fn = vfs_gluster_aio_force,
1889
1890         /* Offline Operations */
1891         .is_offline_fn = vfs_gluster_is_offline,
1892         .set_offline_fn = vfs_gluster_set_offline,
1893
1894         /* Durable handle Operations */
1895         .durable_cookie_fn = NULL,
1896         .durable_disconnect_fn = NULL,
1897         .durable_reconnect_fn = NULL,
1898 };
1899
1900 NTSTATUS vfs_glusterfs_init(void);
1901 NTSTATUS vfs_glusterfs_init(void)
1902 {
1903         return smb_register_vfs(SMB_VFS_INTERFACE_VERSION,
1904                                 "glusterfs", &glusterfs_fns);
1905 }