vfs_ceph: Fix a usage in comments
[samba.git] / source3 / modules / vfs_ceph.c
1 /*
2    Unix SMB/CIFS implementation.
3    Wrap disk only vfs functions to sidestep dodgy compilers.
4    Copyright (C) Tim Potter 1998
5    Copyright (C) Jeremy Allison 2007
6    Copyright (C) Brian Chrisman 2011 <bchrisman@gmail.com>
7    Copyright (C) Richard Sharpe 2011 <realrichardsharpe@gmail.com>
8
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 3 of the License, or
12    (at your option) any later version.
13
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18
19    You should have received a copy of the GNU General Public License
20    along with this program.  If not, see <http://www.gnu.org/licenses/>.
21 */
22
23 /*
24  * This VFS only works with the libcephfs.so user-space client. It is not needed
25  * if you are using the kernel client or the FUSE client.
26  *
27  * Add the following smb.conf parameter to each share that will be hosted on
28  * Ceph:
29  *
30  *   vfs objects = [any others you need go here] ceph
31  */
32
33 #include "includes.h"
34 #include "smbd/smbd.h"
35 #include "system/filesys.h"
36 #include <dirent.h>
37 #include <sys/statvfs.h>
38 #include "cephfs/libcephfs.h"
39 #include "smbprofile.h"
40 #include "modules/posixacl_xattr.h"
41 #include "lib/util/tevent_unix.h"
42
43 #undef DBGC_CLASS
44 #define DBGC_CLASS DBGC_VFS
45
46 #ifndef LIBCEPHFS_VERSION
47 #define LIBCEPHFS_VERSION(maj, min, extra) ((maj << 16) + (min << 8) + extra)
48 #define LIBCEPHFS_VERSION_CODE LIBCEPHFS_VERSION(0, 0, 0)
49 #endif
50
51 /*
52  * Use %llu whenever we have a 64bit unsigned int, and cast to (long long unsigned)
53  */
54 #define llu(_var) ((long long unsigned)_var)
55
56 /*
57  * Note, libcephfs's return code model is to return -errno! So we have to
58  * convert to what Samba expects, which is to set errno to -return and return -1
59  */
60 #define WRAP_RETURN(_res) \
61         errno = 0; \
62         if (_res < 0) { \
63                 errno = -_res; \
64                 return -1; \
65         } \
66         return _res \
67
68 /*
69  * Track unique connections, as virtual mounts, to cephfs file systems.
70  * Individual mounts will be set on the handle->data attribute, but
71  * the mounts themselves will be shared so as not to spawn extra mounts
72  * to the same cephfs.
73  *
74  * Individual mounts are IDed by a 'cookie' value that is a string built
75  * from identifying parameters found in smb.conf.
76  */
77
78 static struct cephmount_cached {
79         char *cookie;
80         uint32_t count;
81         struct ceph_mount_info *mount;
82         struct cephmount_cached *next, *prev;
83 } *cephmount_cached;
84
85 static int cephmount_cache_add(const char *cookie,
86                                struct ceph_mount_info *mount)
87 {
88         struct cephmount_cached *entry = NULL;
89
90         entry = talloc_zero(NULL, struct cephmount_cached);
91         if (entry == NULL) {
92                 errno = ENOMEM;
93                 return -1;
94         }
95
96         entry->cookie = talloc_strdup(entry, cookie);
97         if (entry->cookie == NULL) {
98                 talloc_free(entry);
99                 errno = ENOMEM;
100                 return -1;
101         }
102
103         entry->mount = mount;
104         entry->count = 1;
105
106         DBG_DEBUG("adding mount cache entry for %s\n", entry->cookie);
107         DLIST_ADD(cephmount_cached, entry);
108         return 0;
109 }
110
111 static struct ceph_mount_info *cephmount_cache_update(const char *cookie)
112 {
113         struct cephmount_cached *entry = NULL;
114
115         for (entry = cephmount_cached; entry; entry = entry->next) {
116                 if (strcmp(entry->cookie, cookie) == 0) {
117                         entry->count++;
118                         DBG_DEBUG("updated mount cache: count is [%"
119                                   PRIu32 "]\n", entry->count);
120                         return entry->mount;
121                 }
122         }
123
124         errno = ENOENT;
125         return NULL;
126 }
127
128 static int cephmount_cache_remove(struct ceph_mount_info *mount)
129 {
130         struct cephmount_cached *entry = NULL;
131
132         for (entry = cephmount_cached; entry; entry = entry->next) {
133                 if (entry->mount == mount) {
134                         if (--entry->count) {
135                                 DBG_DEBUG("updated mount cache: count is [%"
136                                           PRIu32 "]\n", entry->count);
137                                 return entry->count;
138                         }
139
140                         DBG_DEBUG("removing mount cache entry for %s\n",
141                                   entry->cookie);
142                         DLIST_REMOVE(cephmount_cached, entry);
143                         talloc_free(entry);
144                         return 0;
145                 }
146         }
147         errno = ENOENT;
148         return -1;
149 }
150
151 static char *cephmount_get_cookie(TALLOC_CTX * mem_ctx, const int snum)
152 {
153         const char *conf_file =
154             lp_parm_const_string(snum, "ceph", "config_file", ".");
155         const char *user_id = lp_parm_const_string(snum, "ceph", "user_id", "");
156         const char *fsname =
157             lp_parm_const_string(snum, "ceph", "filesystem", "");
158         return talloc_asprintf(mem_ctx, "(%s/%s/%s)", conf_file, user_id,
159                                fsname);
160 }
161
162 static int cephmount_select_fs(struct ceph_mount_info *mnt, const char *fsname)
163 {
164         /*
165          * ceph_select_filesystem was added in ceph 'nautilus' (v14).
166          * Earlier versions of libcephfs will lack that API function.
167          * At the time of this writing (Feb 2023) all versions of ceph
168          * supported by ceph upstream have this function.
169          */
170 #if defined(HAVE_CEPH_SELECT_FILESYSTEM)
171         DBG_DEBUG("[CEPH] calling: ceph_select_filesystem with %s\n", fsname);
172         return ceph_select_filesystem(mnt, fsname);
173 #else
174         DBG_ERR("[CEPH] ceph_select_filesystem not available\n");
175         return -ENOTSUP;
176 #endif
177 }
178
179 static struct ceph_mount_info *cephmount_mount_fs(const int snum)
180 {
181         int ret;
182         char buf[256];
183         struct ceph_mount_info *mnt = NULL;
184         /* if config_file and/or user_id are NULL, ceph will use defaults */
185         const char *conf_file =
186             lp_parm_const_string(snum, "ceph", "config_file", NULL);
187         const char *user_id =
188             lp_parm_const_string(snum, "ceph", "user_id", NULL);
189         const char *fsname =
190             lp_parm_const_string(snum, "ceph", "filesystem", NULL);
191
192         DBG_DEBUG("[CEPH] calling: ceph_create\n");
193         ret = ceph_create(&mnt, user_id);
194         if (ret) {
195                 errno = -ret;
196                 return NULL;
197         }
198
199         DBG_DEBUG("[CEPH] calling: ceph_conf_read_file with %s\n",
200                   (conf_file == NULL ? "default path" : conf_file));
201         ret = ceph_conf_read_file(mnt, conf_file);
202         if (ret) {
203                 goto err_cm_release;
204         }
205
206         DBG_DEBUG("[CEPH] calling: ceph_conf_get\n");
207         ret = ceph_conf_get(mnt, "log file", buf, sizeof(buf));
208         if (ret < 0) {
209                 goto err_cm_release;
210         }
211
212         /* libcephfs disables POSIX ACL support by default, enable it... */
213         ret = ceph_conf_set(mnt, "client_acl_type", "posix_acl");
214         if (ret < 0) {
215                 goto err_cm_release;
216         }
217         /* tell libcephfs to perform local permission checks */
218         ret = ceph_conf_set(mnt, "fuse_default_permissions", "false");
219         if (ret < 0) {
220                 goto err_cm_release;
221         }
222         /*
223          * select a cephfs file system to use:
224          * In ceph, multiple file system support has been stable since 'pacific'.
225          * Permit different shares to access different file systems.
226          */
227         if (fsname != NULL) {
228                 ret = cephmount_select_fs(mnt, fsname);
229                 if (ret < 0) {
230                         goto err_cm_release;
231                 }
232         }
233
234         DBG_DEBUG("[CEPH] calling: ceph_mount\n");
235         ret = ceph_mount(mnt, NULL);
236         if (ret >= 0) {
237                 goto cm_done;
238         }
239
240       err_cm_release:
241         ceph_release(mnt);
242         mnt = NULL;
243         DBG_DEBUG("[CEPH] Error mounting fs: %s\n", strerror(-ret));
244       cm_done:
245         /*
246          * Handle the error correctly. Ceph returns -errno.
247          */
248         if (ret) {
249                 errno = -ret;
250         }
251         return mnt;
252 }
253
254 /* Check for NULL pointer parameters in cephwrap_* functions */
255
256 /* We don't want to have NULL function pointers lying around.  Someone
257    is sure to try and execute them.  These stubs are used to prevent
258    this possibility. */
259
260 static int cephwrap_connect(struct vfs_handle_struct *handle,
261                             const char *service, const char *user)
262 {
263         int ret = 0;
264         struct ceph_mount_info *cmount = NULL;
265         int snum = SNUM(handle->conn);
266         char *cookie = cephmount_get_cookie(handle, snum);
267         if (cookie == NULL) {
268                 return -1;
269         }
270
271         cmount = cephmount_cache_update(cookie);
272         if (cmount != NULL) {
273                 goto connect_ok;
274         }
275
276         cmount = cephmount_mount_fs(snum);
277         if (cmount == NULL) {
278                 ret = -1;
279                 goto connect_fail;
280         }
281         ret = cephmount_cache_add(cookie, cmount);
282         if (ret) {
283                 goto connect_fail;
284         }
285
286       connect_ok:
287         handle->data = cmount;
288         /*
289          * Unless we have an async implementation of getxattrat turn this off.
290          */
291         lp_do_parameter(SNUM(handle->conn), "smbd async dosmode", "false");
292       connect_fail:
293         talloc_free(cookie);
294         return ret;
295 }
296
297 static void cephwrap_disconnect(struct vfs_handle_struct *handle)
298 {
299         int ret = cephmount_cache_remove(handle->data);
300         if (ret < 0) {
301                 DBG_ERR("failed to remove ceph mount from cache: %s\n",
302                         strerror(errno));
303                 return;
304         }
305         if (ret > 0) {
306                 DBG_DEBUG("mount cache entry still in use\n");
307                 return;
308         }
309
310         ret = ceph_unmount(handle->data);
311         if (ret < 0) {
312                 DBG_ERR("[CEPH] failed to unmount: %s\n", strerror(-ret));
313         }
314
315         ret = ceph_release(handle->data);
316         if (ret < 0) {
317                 DBG_ERR("[CEPH] failed to release: %s\n", strerror(-ret));
318         }
319         handle->data = NULL;
320 }
321
322 /* Disk operations */
323
324 static uint64_t cephwrap_disk_free(struct vfs_handle_struct *handle,
325                                 const struct smb_filename *smb_fname,
326                                 uint64_t *bsize,
327                                 uint64_t *dfree,
328                                 uint64_t *dsize)
329 {
330         struct statvfs statvfs_buf = { 0 };
331         int ret;
332
333         if (!(ret = ceph_statfs(handle->data, smb_fname->base_name,
334                         &statvfs_buf))) {
335                 /*
336                  * Provide all the correct values.
337                  */
338                 *bsize = statvfs_buf.f_bsize;
339                 *dfree = statvfs_buf.f_bavail;
340                 *dsize = statvfs_buf.f_blocks;
341                 DBG_DEBUG("[CEPH] bsize: %llu, dfree: %llu, dsize: %llu\n",
342                         llu(*bsize), llu(*dfree), llu(*dsize));
343                 return *dfree;
344         } else {
345                 DBG_DEBUG("[CEPH] ceph_statfs returned %d\n", ret);
346                 WRAP_RETURN(ret);
347         }
348 }
349
350 static int cephwrap_get_quota(struct vfs_handle_struct *handle,
351                                 const struct smb_filename *smb_fname,
352                                 enum SMB_QUOTA_TYPE qtype,
353                                 unid_t id,
354                                 SMB_DISK_QUOTA *qt)
355 {
356         /* libcephfs: Ceph does not implement this */
357 #if 0
358 /* was ifdef HAVE_SYS_QUOTAS */
359         int ret;
360
361         ret = ceph_get_quota(handle->conn->connectpath, qtype, id, qt);
362
363         if (ret) {
364                 errno = -ret;
365                 ret = -1;
366         }
367
368         return ret;
369 #else
370         errno = ENOSYS;
371         return -1;
372 #endif
373 }
374
375 static int cephwrap_set_quota(struct vfs_handle_struct *handle,  enum SMB_QUOTA_TYPE qtype, unid_t id, SMB_DISK_QUOTA *qt)
376 {
377         /* libcephfs: Ceph does not implement this */
378 #if 0
379 /* was ifdef HAVE_SYS_QUOTAS */
380         int ret;
381
382         ret = ceph_set_quota(handle->conn->connectpath, qtype, id, qt);
383         if (ret) {
384                 errno = -ret;
385                 ret = -1;
386         }
387
388         return ret;
389 #else
390         WRAP_RETURN(-ENOSYS);
391 #endif
392 }
393
394 static int cephwrap_statvfs(struct vfs_handle_struct *handle,
395                             const struct smb_filename *smb_fname,
396                             struct vfs_statvfs_struct *statbuf)
397 {
398         struct statvfs statvfs_buf = { 0 };
399         int ret;
400
401         ret = ceph_statfs(handle->data, smb_fname->base_name, &statvfs_buf);
402         if (ret < 0) {
403                 WRAP_RETURN(ret);
404         }
405
406         statbuf->OptimalTransferSize = statvfs_buf.f_frsize;
407         statbuf->BlockSize = statvfs_buf.f_bsize;
408         statbuf->TotalBlocks = statvfs_buf.f_blocks;
409         statbuf->BlocksAvail = statvfs_buf.f_bfree;
410         statbuf->UserBlocksAvail = statvfs_buf.f_bavail;
411         statbuf->TotalFileNodes = statvfs_buf.f_files;
412         statbuf->FreeFileNodes = statvfs_buf.f_ffree;
413         statbuf->FsIdentifier = statvfs_buf.f_fsid;
414         DBG_DEBUG("[CEPH] f_bsize: %ld, f_blocks: %ld, f_bfree: %ld, f_bavail: %ld\n",
415                 (long int)statvfs_buf.f_bsize, (long int)statvfs_buf.f_blocks,
416                 (long int)statvfs_buf.f_bfree, (long int)statvfs_buf.f_bavail);
417
418         return ret;
419 }
420
421 static uint32_t cephwrap_fs_capabilities(struct vfs_handle_struct *handle,
422                                          enum timestamp_set_resolution *p_ts_res)
423 {
424         uint32_t caps = FILE_CASE_SENSITIVE_SEARCH | FILE_CASE_PRESERVED_NAMES;
425
426         *p_ts_res = TIMESTAMP_SET_NT_OR_BETTER;
427
428         return caps;
429 }
430
431 /* Directory operations */
432
433 static DIR *cephwrap_fdopendir(struct vfs_handle_struct *handle,
434                                struct files_struct *fsp,
435                                const char *mask,
436                                uint32_t attributes)
437 {
438         int ret = 0;
439         struct ceph_dir_result *result = NULL;
440         DBG_DEBUG("[CEPH] fdopendir(%p, %p)\n", handle, fsp);
441
442         ret = ceph_opendir(handle->data, fsp->fsp_name->base_name, &result);
443         if (ret < 0) {
444                 result = NULL;
445                 errno = -ret; /* We return result which is NULL in this case */
446         }
447
448         DBG_DEBUG("[CEPH] fdopendir(...) = %d\n", ret);
449         return (DIR *) result;
450 }
451
452 static struct dirent *cephwrap_readdir(struct vfs_handle_struct *handle,
453                                        struct files_struct *dirfsp,
454                                        DIR *dirp)
455 {
456         struct dirent *result = NULL;
457
458         DBG_DEBUG("[CEPH] readdir(%p, %p)\n", handle, dirp);
459         result = ceph_readdir(handle->data, (struct ceph_dir_result *) dirp);
460         DBG_DEBUG("[CEPH] readdir(...) = %p\n", result);
461
462         return result;
463 }
464
465 static void cephwrap_rewinddir(struct vfs_handle_struct *handle, DIR *dirp)
466 {
467         DBG_DEBUG("[CEPH] rewinddir(%p, %p)\n", handle, dirp);
468         ceph_rewinddir(handle->data, (struct ceph_dir_result *) dirp);
469 }
470
471 static int cephwrap_mkdirat(struct vfs_handle_struct *handle,
472                         files_struct *dirfsp,
473                         const struct smb_filename *smb_fname,
474                         mode_t mode)
475 {
476         int result = -1;
477 #ifdef HAVE_CEPH_MKDIRAT
478         int dirfd = fsp_get_pathref_fd(dirfsp);
479
480         DBG_DEBUG("[CEPH] mkdirat(%p, %d, %s)\n",
481                   handle,
482                   dirfd,
483                   smb_fname->base_name);
484
485         result = ceph_mkdirat(handle->data, dirfd, smb_fname->base_name, mode);
486
487         DBG_DEBUG("[CEPH] mkdirat(...) = %d\n", result);
488
489         WRAP_RETURN(result);
490 #else
491         struct smb_filename *full_fname = NULL;
492
493         full_fname = full_path_from_dirfsp_atname(talloc_tos(),
494                                                 dirfsp,
495                                                 smb_fname);
496         if (full_fname == NULL) {
497                 return -1;
498         }
499
500         DBG_DEBUG("[CEPH] mkdir(%p, %s)\n",
501                   handle, smb_fname_str_dbg(full_fname));
502
503         result = ceph_mkdir(handle->data, full_fname->base_name, mode);
504
505         TALLOC_FREE(full_fname);
506
507         WRAP_RETURN(result);
508 #endif
509 }
510
511 static int cephwrap_closedir(struct vfs_handle_struct *handle, DIR *dirp)
512 {
513         int result;
514
515         DBG_DEBUG("[CEPH] closedir(%p, %p)\n", handle, dirp);
516         result = ceph_closedir(handle->data, (struct ceph_dir_result *) dirp);
517         DBG_DEBUG("[CEPH] closedir(...) = %d\n", result);
518         WRAP_RETURN(result);
519 }
520
521 /* File operations */
522
523 static int cephwrap_openat(struct vfs_handle_struct *handle,
524                            const struct files_struct *dirfsp,
525                            const struct smb_filename *smb_fname,
526                            files_struct *fsp,
527                            const struct vfs_open_how *how)
528 {
529         int flags = how->flags;
530         mode_t mode = how->mode;
531         struct smb_filename *name = NULL;
532         bool have_opath = false;
533         bool became_root = false;
534         int result = -ENOENT;
535 #ifdef HAVE_CEPH_OPENAT
536         int dirfd = -1;
537 #endif
538
539         if (how->resolve != 0) {
540                 errno = ENOSYS;
541                 return -1;
542         }
543
544         if (smb_fname->stream_name) {
545                 goto out;
546         }
547
548 #ifdef O_PATH
549         have_opath = true;
550         if (fsp->fsp_flags.is_pathref) {
551                 flags |= O_PATH;
552         }
553 #endif
554
555 #ifdef HAVE_CEPH_OPENAT
556         dirfd = fsp_get_pathref_fd(dirfsp);
557
558         DBG_DEBUG("[CEPH] openat(%p, %d, %p, %d, %d)\n",
559                   handle, dirfd, fsp, flags, mode);
560
561         if (fsp->fsp_flags.is_pathref && !have_opath) {
562                 become_root();
563                 became_root = true;
564         }
565
566         result = ceph_openat(handle->data,
567                              dirfd,
568                              smb_fname->base_name,
569                              flags,
570                              mode);
571
572 #else
573         if (fsp_get_pathref_fd(dirfsp) != AT_FDCWD) {
574                 name = full_path_from_dirfsp_atname(talloc_tos(),
575                                                     dirfsp,
576                                                     smb_fname);
577                 if (name == NULL) {
578                         return -1;
579                 }
580                 smb_fname = name;
581         }
582
583         DBG_DEBUG("[CEPH] openat(%p, %s, %p, %d, %d)\n", handle,
584                   smb_fname_str_dbg(smb_fname), fsp, flags, mode);
585
586         if (fsp->fsp_flags.is_pathref && !have_opath) {
587                 become_root();
588                 became_root = true;
589         }
590
591         result = ceph_open(handle->data, smb_fname->base_name, flags, mode);
592 #endif
593         if (became_root) {
594                 unbecome_root();
595         }
596 out:
597         TALLOC_FREE(name);
598         fsp->fsp_flags.have_proc_fds = false;
599         DBG_DEBUG("[CEPH] open(...) = %d\n", result);
600         WRAP_RETURN(result);
601 }
602
603 static int cephwrap_close(struct vfs_handle_struct *handle, files_struct *fsp)
604 {
605         int result;
606
607         DBG_DEBUG("[CEPH] close(%p, %p)\n", handle, fsp);
608         result = ceph_close(handle->data, fsp_get_pathref_fd(fsp));
609         DBG_DEBUG("[CEPH] close(...) = %d\n", result);
610
611         WRAP_RETURN(result);
612 }
613
614 static ssize_t cephwrap_pread(struct vfs_handle_struct *handle, files_struct *fsp, void *data,
615                         size_t n, off_t offset)
616 {
617         ssize_t result;
618
619         DBG_DEBUG("[CEPH] pread(%p, %p, %p, %llu, %llu)\n", handle, fsp, data, llu(n), llu(offset));
620
621         result = ceph_read(handle->data, fsp_get_io_fd(fsp), data, n, offset);
622         DBG_DEBUG("[CEPH] pread(...) = %llu\n", llu(result));
623         WRAP_RETURN(result);
624 }
625
626 struct cephwrap_pread_state {
627         ssize_t bytes_read;
628         struct vfs_aio_state vfs_aio_state;
629 };
630
631 /*
632  * Fake up an async ceph read by calling the synchronous API.
633  */
634 static struct tevent_req *cephwrap_pread_send(struct vfs_handle_struct *handle,
635                                               TALLOC_CTX *mem_ctx,
636                                               struct tevent_context *ev,
637                                               struct files_struct *fsp,
638                                               void *data,
639                                               size_t n, off_t offset)
640 {
641         struct tevent_req *req = NULL;
642         struct cephwrap_pread_state *state = NULL;
643         int ret = -1;
644
645         DBG_DEBUG("[CEPH] %s\n", __func__);
646         req = tevent_req_create(mem_ctx, &state, struct cephwrap_pread_state);
647         if (req == NULL) {
648                 return NULL;
649         }
650
651         ret = ceph_read(handle->data, fsp_get_io_fd(fsp), data, n, offset);
652         if (ret < 0) {
653                 /* ceph returns -errno on error. */
654                 tevent_req_error(req, -ret);
655                 return tevent_req_post(req, ev);
656         }
657
658         state->bytes_read = ret;
659         tevent_req_done(req);
660         /* Return and schedule the completion of the call. */
661         return tevent_req_post(req, ev);
662 }
663
664 static ssize_t cephwrap_pread_recv(struct tevent_req *req,
665                                    struct vfs_aio_state *vfs_aio_state)
666 {
667         struct cephwrap_pread_state *state =
668                 tevent_req_data(req, struct cephwrap_pread_state);
669
670         DBG_DEBUG("[CEPH] %s\n", __func__);
671         if (tevent_req_is_unix_error(req, &vfs_aio_state->error)) {
672                 return -1;
673         }
674         *vfs_aio_state = state->vfs_aio_state;
675         return state->bytes_read;
676 }
677
678 static ssize_t cephwrap_pwrite(struct vfs_handle_struct *handle, files_struct *fsp, const void *data,
679                         size_t n, off_t offset)
680 {
681         ssize_t result;
682
683         DBG_DEBUG("[CEPH] pwrite(%p, %p, %p, %llu, %llu)\n", handle, fsp, data, llu(n), llu(offset));
684         result = ceph_write(handle->data, fsp_get_io_fd(fsp), data, n, offset);
685         DBG_DEBUG("[CEPH] pwrite(...) = %llu\n", llu(result));
686         WRAP_RETURN(result);
687 }
688
689 struct cephwrap_pwrite_state {
690         ssize_t bytes_written;
691         struct vfs_aio_state vfs_aio_state;
692 };
693
694 /*
695  * Fake up an async ceph write by calling the synchronous API.
696  */
697 static struct tevent_req *cephwrap_pwrite_send(struct vfs_handle_struct *handle,
698                                                TALLOC_CTX *mem_ctx,
699                                                struct tevent_context *ev,
700                                                struct files_struct *fsp,
701                                                const void *data,
702                                                size_t n, off_t offset)
703 {
704         struct tevent_req *req = NULL;
705         struct cephwrap_pwrite_state *state = NULL;
706         int ret = -1;
707
708         DBG_DEBUG("[CEPH] %s\n", __func__);
709         req = tevent_req_create(mem_ctx, &state, struct cephwrap_pwrite_state);
710         if (req == NULL) {
711                 return NULL;
712         }
713
714         ret = ceph_write(handle->data, fsp_get_io_fd(fsp), data, n, offset);
715         if (ret < 0) {
716                 /* ceph returns -errno on error. */
717                 tevent_req_error(req, -ret);
718                 return tevent_req_post(req, ev);
719         }
720
721         state->bytes_written = ret;
722         tevent_req_done(req);
723         /* Return and schedule the completion of the call. */
724         return tevent_req_post(req, ev);
725 }
726
727 static ssize_t cephwrap_pwrite_recv(struct tevent_req *req,
728                                     struct vfs_aio_state *vfs_aio_state)
729 {
730         struct cephwrap_pwrite_state *state =
731                 tevent_req_data(req, struct cephwrap_pwrite_state);
732
733         DBG_DEBUG("[CEPH] %s\n", __func__);
734         if (tevent_req_is_unix_error(req, &vfs_aio_state->error)) {
735                 return -1;
736         }
737         *vfs_aio_state = state->vfs_aio_state;
738         return state->bytes_written;
739 }
740
741 static off_t cephwrap_lseek(struct vfs_handle_struct *handle, files_struct *fsp, off_t offset, int whence)
742 {
743         off_t result = 0;
744
745         DBG_DEBUG("[CEPH] cephwrap_lseek\n");
746         result = ceph_lseek(handle->data, fsp_get_io_fd(fsp), offset, whence);
747         WRAP_RETURN(result);
748 }
749
750 static ssize_t cephwrap_sendfile(struct vfs_handle_struct *handle, int tofd, files_struct *fromfsp, const DATA_BLOB *hdr,
751                         off_t offset, size_t n)
752 {
753         /*
754          * We cannot support sendfile because libcephfs is in user space.
755          */
756         DBG_DEBUG("[CEPH] cephwrap_sendfile\n");
757         errno = ENOTSUP;
758         return -1;
759 }
760
761 static ssize_t cephwrap_recvfile(struct vfs_handle_struct *handle,
762                         int fromfd,
763                         files_struct *tofsp,
764                         off_t offset,
765                         size_t n)
766 {
767         /*
768          * We cannot support recvfile because libcephfs is in user space.
769          */
770         DBG_DEBUG("[CEPH] cephwrap_recvfile\n");
771         errno=ENOTSUP;
772         return -1;
773 }
774
775 static int cephwrap_renameat(struct vfs_handle_struct *handle,
776                         files_struct *srcfsp,
777                         const struct smb_filename *smb_fname_src,
778                         files_struct *dstfsp,
779                         const struct smb_filename *smb_fname_dst)
780 {
781         struct smb_filename *full_fname_src = NULL;
782         struct smb_filename *full_fname_dst = NULL;
783         int result = -1;
784
785         DBG_DEBUG("[CEPH] cephwrap_renameat\n");
786         if (smb_fname_src->stream_name || smb_fname_dst->stream_name) {
787                 errno = ENOENT;
788                 return result;
789         }
790
791         full_fname_src = full_path_from_dirfsp_atname(talloc_tos(),
792                                                   srcfsp,
793                                                   smb_fname_src);
794         if (full_fname_src == NULL) {
795                 errno = ENOMEM;
796                 return -1;
797         }
798         full_fname_dst = full_path_from_dirfsp_atname(talloc_tos(),
799                                                   dstfsp,
800                                                   smb_fname_dst);
801         if (full_fname_dst == NULL) {
802                 TALLOC_FREE(full_fname_src);
803                 errno = ENOMEM;
804                 return -1;
805         }
806
807         result = ceph_rename(handle->data,
808                              full_fname_src->base_name,
809                              full_fname_dst->base_name);
810
811         TALLOC_FREE(full_fname_src);
812         TALLOC_FREE(full_fname_dst);
813
814         WRAP_RETURN(result);
815 }
816
817 /*
818  * Fake up an async ceph fsync by calling the synchronous API.
819  */
820
821 static struct tevent_req *cephwrap_fsync_send(struct vfs_handle_struct *handle,
822                                         TALLOC_CTX *mem_ctx,
823                                         struct tevent_context *ev,
824                                         files_struct *fsp)
825 {
826         struct tevent_req *req = NULL;
827         struct vfs_aio_state *state = NULL;
828         int ret = -1;
829
830         DBG_DEBUG("[CEPH] cephwrap_fsync_send\n");
831
832         req = tevent_req_create(mem_ctx, &state, struct vfs_aio_state);
833         if (req == NULL) {
834                 return NULL;
835         }
836
837         /* Make sync call. */
838         ret = ceph_fsync(handle->data, fsp_get_io_fd(fsp), false);
839
840         if (ret != 0) {
841                 /* ceph_fsync returns -errno on error. */
842                 tevent_req_error(req, -ret);
843                 return tevent_req_post(req, ev);
844         }
845
846         /* Mark it as done. */
847         tevent_req_done(req);
848         /* Return and schedule the completion of the call. */
849         return tevent_req_post(req, ev);
850 }
851
852 static int cephwrap_fsync_recv(struct tevent_req *req,
853                                 struct vfs_aio_state *vfs_aio_state)
854 {
855         struct vfs_aio_state *state =
856                 tevent_req_data(req, struct vfs_aio_state);
857
858         DBG_DEBUG("[CEPH] cephwrap_fsync_recv\n");
859
860         if (tevent_req_is_unix_error(req, &vfs_aio_state->error)) {
861                 return -1;
862         }
863         *vfs_aio_state = *state;
864         return 0;
865 }
866
867 #define SAMBA_STATX_ATTR_MASK   (CEPH_STATX_BASIC_STATS|CEPH_STATX_BTIME)
868
869 static void init_stat_ex_from_ceph_statx(struct stat_ex *dst, const struct ceph_statx *stx)
870 {
871         DBG_DEBUG("[CEPH]\tstx = {dev = %llx, ino = %llu, mode = 0x%x, "
872                   "nlink = %llu, uid = %d, gid = %d, rdev = %llx, size = %llu, "
873                   "blksize = %llu, blocks = %llu, atime = %llu, mtime = %llu, "
874                   "ctime = %llu, btime = %llu}\n",
875                   llu(stx->stx_dev), llu(stx->stx_ino), stx->stx_mode,
876                   llu(stx->stx_nlink), stx->stx_uid, stx->stx_gid,
877                   llu(stx->stx_rdev), llu(stx->stx_size), llu(stx->stx_blksize),
878                   llu(stx->stx_blocks), llu(stx->stx_atime.tv_sec),
879                   llu(stx->stx_mtime.tv_sec), llu(stx->stx_ctime.tv_sec),
880                   llu(stx->stx_btime.tv_sec));
881
882         if ((stx->stx_mask & SAMBA_STATX_ATTR_MASK) != SAMBA_STATX_ATTR_MASK) {
883                 DBG_WARNING("%s: stx->stx_mask is incorrect (wanted %x, got %x)\n",
884                                 __func__, SAMBA_STATX_ATTR_MASK, stx->stx_mask);
885         }
886
887         dst->st_ex_dev = stx->stx_dev;
888         dst->st_ex_rdev = stx->stx_rdev;
889         dst->st_ex_ino = stx->stx_ino;
890         dst->st_ex_mode = stx->stx_mode;
891         dst->st_ex_uid = stx->stx_uid;
892         dst->st_ex_gid = stx->stx_gid;
893         dst->st_ex_size = stx->stx_size;
894         dst->st_ex_nlink = stx->stx_nlink;
895         dst->st_ex_atime = stx->stx_atime;
896         dst->st_ex_btime = stx->stx_btime;
897         dst->st_ex_ctime = stx->stx_ctime;
898         dst->st_ex_mtime = stx->stx_mtime;
899         dst->st_ex_blksize = stx->stx_blksize;
900         dst->st_ex_blocks = stx->stx_blocks;
901 }
902
903 static int cephwrap_stat(struct vfs_handle_struct *handle,
904                         struct smb_filename *smb_fname)
905 {
906         int result = -1;
907         struct ceph_statx stx = { 0 };
908
909         DBG_DEBUG("[CEPH] stat(%p, %s)\n", handle, smb_fname_str_dbg(smb_fname));
910
911         if (smb_fname->stream_name) {
912                 errno = ENOENT;
913                 return result;
914         }
915
916         result = ceph_statx(handle->data, smb_fname->base_name, &stx,
917                                 SAMBA_STATX_ATTR_MASK, 0);
918         DBG_DEBUG("[CEPH] statx(...) = %d\n", result);
919         if (result < 0) {
920                 WRAP_RETURN(result);
921         }
922
923         init_stat_ex_from_ceph_statx(&smb_fname->st, &stx);
924         DBG_DEBUG("[CEPH] mode = 0x%x\n", smb_fname->st.st_ex_mode);
925         return result;
926 }
927
928 static int cephwrap_fstat(struct vfs_handle_struct *handle, files_struct *fsp, SMB_STRUCT_STAT *sbuf)
929 {
930         int result = -1;
931         struct ceph_statx stx = { 0 };
932         int fd = fsp_get_pathref_fd(fsp);
933
934         DBG_DEBUG("[CEPH] fstat(%p, %d)\n", handle, fd);
935         result = ceph_fstatx(handle->data, fd, &stx,
936                                 SAMBA_STATX_ATTR_MASK, 0);
937         DBG_DEBUG("[CEPH] fstat(...) = %d\n", result);
938         if (result < 0) {
939                 WRAP_RETURN(result);
940         }
941
942         init_stat_ex_from_ceph_statx(sbuf, &stx);
943         DBG_DEBUG("[CEPH] mode = 0x%x\n", sbuf->st_ex_mode);
944         return result;
945 }
946
947 static int cephwrap_lstat(struct vfs_handle_struct *handle,
948                          struct smb_filename *smb_fname)
949 {
950         int result = -1;
951         struct ceph_statx stx = { 0 };
952
953         DBG_DEBUG("[CEPH] lstat(%p, %s)\n", handle, smb_fname_str_dbg(smb_fname));
954
955         if (smb_fname->stream_name) {
956                 errno = ENOENT;
957                 return result;
958         }
959
960         result = ceph_statx(handle->data, smb_fname->base_name, &stx,
961                                 SAMBA_STATX_ATTR_MASK, AT_SYMLINK_NOFOLLOW);
962         DBG_DEBUG("[CEPH] lstat(...) = %d\n", result);
963         if (result < 0) {
964                 WRAP_RETURN(result);
965         }
966
967         init_stat_ex_from_ceph_statx(&smb_fname->st, &stx);
968         return result;
969 }
970
971 static int cephwrap_fntimes(struct vfs_handle_struct *handle,
972                             files_struct *fsp,
973                             struct smb_file_time *ft)
974 {
975         struct ceph_statx stx = { 0 };
976         int result;
977         int mask = 0;
978
979         if (!is_omit_timespec(&ft->atime)) {
980                 stx.stx_atime = ft->atime;
981                 mask |= CEPH_SETATTR_ATIME;
982         }
983         if (!is_omit_timespec(&ft->mtime)) {
984                 stx.stx_mtime = ft->mtime;
985                 mask |= CEPH_SETATTR_MTIME;
986         }
987         if (!is_omit_timespec(&ft->create_time)) {
988                 stx.stx_btime = ft->create_time;
989                 mask |= CEPH_SETATTR_BTIME;
990         }
991
992         if (!mask) {
993                 return 0;
994         }
995
996         if (!fsp->fsp_flags.is_pathref) {
997                 /*
998                  * We can use an io_fd to set xattrs.
999                  */
1000                 result = ceph_fsetattrx(handle->data,
1001                                         fsp_get_io_fd(fsp),
1002                                         &stx,
1003                                         mask);
1004         } else {
1005                 /*
1006                  * This is no longer a handle based call.
1007                  */
1008                 result = ceph_setattrx(handle->data,
1009                                        fsp->fsp_name->base_name,
1010                                        &stx,
1011                                        mask,
1012                                        0);
1013         }
1014
1015         DBG_DEBUG("[CEPH] ntimes(%p, %s, {%ld, %ld, %ld, %ld}) = %d\n",
1016                   handle, fsp_str_dbg(fsp), ft->mtime.tv_sec, ft->atime.tv_sec,
1017                   ft->ctime.tv_sec, ft->create_time.tv_sec, result);
1018
1019         return result;
1020 }
1021
1022 static int cephwrap_unlinkat(struct vfs_handle_struct *handle,
1023                         struct files_struct *dirfsp,
1024                         const struct smb_filename *smb_fname,
1025                         int flags)
1026 {
1027         int result = -1;
1028 #ifdef HAVE_CEPH_UNLINKAT
1029         int dirfd = fsp_get_pathref_fd(dirfsp);
1030
1031         DBG_DEBUG("[CEPH] unlinkat(%p, %d, %s)\n",
1032                   handle,
1033                   dirfd,
1034                   smb_fname_str_dbg(smb_fname));
1035
1036         if (smb_fname->stream_name) {
1037                 errno = ENOENT;
1038                 return result;
1039         }
1040
1041         result = ceph_unlinkat(handle->data,
1042                                dirfd,
1043                                smb_fname->base_name,
1044                                flags);
1045         DBG_DEBUG("[CEPH] unlinkat(...) = %d\n", result);
1046         WRAP_RETURN(result);
1047 #else
1048         struct smb_filename *full_fname = NULL;
1049
1050         DBG_DEBUG("[CEPH] unlink(%p, %s)\n",
1051                 handle,
1052                 smb_fname_str_dbg(smb_fname));
1053
1054         if (smb_fname->stream_name) {
1055                 errno = ENOENT;
1056                 return result;
1057         }
1058
1059         full_fname = full_path_from_dirfsp_atname(talloc_tos(),
1060                                                   dirfsp,
1061                                                   smb_fname);
1062         if (full_fname == NULL) {
1063                 return -1;
1064         }
1065
1066         if (flags & AT_REMOVEDIR) {
1067                 result = ceph_rmdir(handle->data, full_fname->base_name);
1068         } else {
1069                 result = ceph_unlink(handle->data, full_fname->base_name);
1070         }
1071         TALLOC_FREE(full_fname);
1072         DBG_DEBUG("[CEPH] unlink(...) = %d\n", result);
1073         WRAP_RETURN(result);
1074 #endif
1075 }
1076
1077 static int cephwrap_fchmod(struct vfs_handle_struct *handle, files_struct *fsp, mode_t mode)
1078 {
1079         int result;
1080
1081         DBG_DEBUG("[CEPH] fchmod(%p, %p, %d)\n", handle, fsp, mode);
1082         if (!fsp->fsp_flags.is_pathref) {
1083                 /*
1084                  * We can use an io_fd to change permissions.
1085                  */
1086                 result = ceph_fchmod(handle->data, fsp_get_io_fd(fsp), mode);
1087         } else {
1088                 /*
1089                  * This is no longer a handle based call.
1090                  */
1091                 result = ceph_chmod(handle->data,
1092                                     fsp->fsp_name->base_name,
1093                                     mode);
1094         }
1095         DBG_DEBUG("[CEPH] fchmod(...) = %d\n", result);
1096         WRAP_RETURN(result);
1097 }
1098
1099 static int cephwrap_fchown(struct vfs_handle_struct *handle, files_struct *fsp, uid_t uid, gid_t gid)
1100 {
1101         int result;
1102
1103         DBG_DEBUG("[CEPH] fchown(%p, %p, %d, %d)\n", handle, fsp, uid, gid);
1104         if (!fsp->fsp_flags.is_pathref) {
1105                 /*
1106                  * We can use an io_fd to change ownership.
1107                  */
1108                 result = ceph_fchown(handle->data,
1109                                      fsp_get_io_fd(fsp),
1110                                      uid,
1111                                      gid);
1112         } else {
1113                 /*
1114                  * This is no longer a handle based call.
1115                  */
1116                 result = ceph_chown(handle->data,
1117                                     fsp->fsp_name->base_name,
1118                                     uid,
1119                                     gid);
1120         }
1121
1122         DBG_DEBUG("[CEPH] fchown(...) = %d\n", result);
1123         WRAP_RETURN(result);
1124 }
1125
1126 static int cephwrap_lchown(struct vfs_handle_struct *handle,
1127                         const struct smb_filename *smb_fname,
1128                         uid_t uid,
1129                         gid_t gid)
1130 {
1131         int result;
1132         DBG_DEBUG("[CEPH] lchown(%p, %s, %d, %d)\n", handle, smb_fname->base_name, uid, gid);
1133         result = ceph_lchown(handle->data, smb_fname->base_name, uid, gid);
1134         DBG_DEBUG("[CEPH] lchown(...) = %d\n", result);
1135         WRAP_RETURN(result);
1136 }
1137
1138 static int cephwrap_chdir(struct vfs_handle_struct *handle,
1139                         const struct smb_filename *smb_fname)
1140 {
1141         int result = -1;
1142         DBG_DEBUG("[CEPH] chdir(%p, %s)\n", handle, smb_fname->base_name);
1143         result = ceph_chdir(handle->data, smb_fname->base_name);
1144         DBG_DEBUG("[CEPH] chdir(...) = %d\n", result);
1145         WRAP_RETURN(result);
1146 }
1147
1148 static struct smb_filename *cephwrap_getwd(struct vfs_handle_struct *handle,
1149                         TALLOC_CTX *ctx)
1150 {
1151         const char *cwd = ceph_getcwd(handle->data);
1152         DBG_DEBUG("[CEPH] getwd(%p) = %s\n", handle, cwd);
1153         return synthetic_smb_fname(ctx,
1154                                 cwd,
1155                                 NULL,
1156                                 NULL,
1157                                 0,
1158                                 0);
1159 }
1160
1161 static int strict_allocate_ftruncate(struct vfs_handle_struct *handle, files_struct *fsp, off_t len)
1162 {
1163         off_t space_to_write;
1164         int result;
1165         NTSTATUS status;
1166         SMB_STRUCT_STAT *pst;
1167
1168         status = vfs_stat_fsp(fsp);
1169         if (!NT_STATUS_IS_OK(status)) {
1170                 return -1;
1171         }
1172         pst = &fsp->fsp_name->st;
1173
1174 #ifdef S_ISFIFO
1175         if (S_ISFIFO(pst->st_ex_mode))
1176                 return 0;
1177 #endif
1178
1179         if (pst->st_ex_size == len)
1180                 return 0;
1181
1182         /* Shrink - just ftruncate. */
1183         if (pst->st_ex_size > len) {
1184                 result = ceph_ftruncate(handle->data, fsp_get_io_fd(fsp), len);
1185                 WRAP_RETURN(result);
1186         }
1187
1188         space_to_write = len - pst->st_ex_size;
1189         result = ceph_fallocate(handle->data, fsp_get_io_fd(fsp), 0, pst->st_ex_size,
1190                                 space_to_write);
1191         WRAP_RETURN(result);
1192 }
1193
1194 static int cephwrap_ftruncate(struct vfs_handle_struct *handle, files_struct *fsp, off_t len)
1195 {
1196         int result = -1;
1197
1198         DBG_DEBUG("[CEPH] ftruncate(%p, %p, %llu\n", handle, fsp, llu(len));
1199
1200         if (lp_strict_allocate(SNUM(fsp->conn))) {
1201                 return strict_allocate_ftruncate(handle, fsp, len);
1202         }
1203
1204         result = ceph_ftruncate(handle->data, fsp_get_io_fd(fsp), len);
1205         WRAP_RETURN(result);
1206 }
1207
1208 static int cephwrap_fallocate(struct vfs_handle_struct *handle,
1209                               struct files_struct *fsp,
1210                               uint32_t mode,
1211                               off_t offset,
1212                               off_t len)
1213 {
1214         int result;
1215
1216         DBG_DEBUG("[CEPH] fallocate(%p, %p, %u, %llu, %llu\n",
1217                   handle, fsp, mode, llu(offset), llu(len));
1218         /* unsupported mode flags are rejected by libcephfs */
1219         result = ceph_fallocate(handle->data, fsp_get_io_fd(fsp), mode, offset, len);
1220         DBG_DEBUG("[CEPH] fallocate(...) = %d\n", result);
1221         WRAP_RETURN(result);
1222 }
1223
1224 static bool cephwrap_lock(struct vfs_handle_struct *handle, files_struct *fsp, int op, off_t offset, off_t count, int type)
1225 {
1226         DBG_DEBUG("[CEPH] lock\n");
1227         return true;
1228 }
1229
1230 static int cephwrap_filesystem_sharemode(struct vfs_handle_struct *handle,
1231                                          files_struct *fsp,
1232                                          uint32_t share_access,
1233                                          uint32_t access_mask)
1234 {
1235         DBG_ERR("[CEPH] filesystem sharemodes unsupported! Consider setting "
1236                 "\"kernel share modes = no\"\n");
1237
1238         errno = ENOSYS;
1239         return -1;
1240 }
1241
1242 static int cephwrap_fcntl(vfs_handle_struct *handle,
1243                           files_struct *fsp, int cmd, va_list cmd_arg)
1244 {
1245         /*
1246          * SMB_VFS_FCNTL() is currently only called by vfs_set_blocking() to
1247          * clear O_NONBLOCK, etc for LOCK_MAND and FIFOs. Ignore it.
1248          */
1249         if (cmd == F_GETFL) {
1250                 return 0;
1251         } else if (cmd == F_SETFL) {
1252                 va_list dup_cmd_arg;
1253                 int opt;
1254
1255                 va_copy(dup_cmd_arg, cmd_arg);
1256                 opt = va_arg(dup_cmd_arg, int);
1257                 va_end(dup_cmd_arg);
1258                 if (opt == 0) {
1259                         return 0;
1260                 }
1261                 DBG_ERR("unexpected fcntl SETFL(%d)\n", opt);
1262                 goto err_out;
1263         }
1264         DBG_ERR("unexpected fcntl: %d\n", cmd);
1265 err_out:
1266         errno = EINVAL;
1267         return -1;
1268 }
1269
1270 static bool cephwrap_getlock(struct vfs_handle_struct *handle, files_struct *fsp, off_t *poffset, off_t *pcount, int *ptype, pid_t *ppid)
1271 {
1272         DBG_DEBUG("[CEPH] getlock returning false and errno=0\n");
1273
1274         errno = 0;
1275         return false;
1276 }
1277
1278 /*
1279  * We cannot let this fall through to the default, because the file might only
1280  * be accessible from libcephfs (which is a user-space client) but the fd might
1281  * be for some file the kernel knows about.
1282  */
1283 static int cephwrap_linux_setlease(struct vfs_handle_struct *handle, files_struct *fsp,
1284                                 int leasetype)
1285 {
1286         int result = -1;
1287
1288         DBG_DEBUG("[CEPH] linux_setlease\n");
1289         errno = ENOSYS;
1290         return result;
1291 }
1292
1293 static int cephwrap_symlinkat(struct vfs_handle_struct *handle,
1294                 const struct smb_filename *link_target,
1295                 struct files_struct *dirfsp,
1296                 const struct smb_filename *new_smb_fname)
1297 {
1298         int result = -1;
1299 #ifdef HAVE_CEPH_SYMLINKAT
1300         int dirfd = fsp_get_pathref_fd(dirfsp);
1301
1302         DBG_DEBUG("[CEPH] symlinkat(%p, %s, %d, %s)\n",
1303                   handle,
1304                   link_target->base_name,
1305                   dirfd,
1306                   new_smb_fname->base_name);
1307
1308         result = ceph_symlinkat(handle->data,
1309                                 link_target->base_name,
1310                                 dirfd,
1311                                 new_smb_fname->base_name);
1312         DBG_DEBUG("[CEPH] symlinkat(...) = %d\n", result);
1313         WRAP_RETURN(result);
1314 #else
1315         struct smb_filename *full_fname = NULL;
1316
1317         full_fname = full_path_from_dirfsp_atname(talloc_tos(),
1318                                                 dirfsp,
1319                                                 new_smb_fname);
1320         if (full_fname == NULL) {
1321                 return -1;
1322         }
1323
1324         DBG_DEBUG("[CEPH] symlink(%p, %s, %s)\n", handle,
1325                         link_target->base_name,
1326                         full_fname->base_name);
1327
1328         result = ceph_symlink(handle->data,
1329                         link_target->base_name,
1330                         full_fname->base_name);
1331         TALLOC_FREE(full_fname);
1332         DBG_DEBUG("[CEPH] symlink(...) = %d\n", result);
1333         WRAP_RETURN(result);
1334 #endif
1335 }
1336
1337 static int cephwrap_readlinkat(struct vfs_handle_struct *handle,
1338                 const struct files_struct *dirfsp,
1339                 const struct smb_filename *smb_fname,
1340                 char *buf,
1341                 size_t bufsiz)
1342 {
1343         int result = -1;
1344 #ifdef HAVE_CEPH_READLINKAT
1345         int dirfd = fsp_get_pathref_fd(dirfsp);
1346
1347         DBG_DEBUG("[CEPH] readlinkat(%p, %d, %s, %p, %llu)\n",
1348                   handle,
1349                   dirfd,
1350                   smb_fname->base_name,
1351                   buf,
1352                   llu(bufsiz));
1353
1354         result = ceph_readlinkat(handle->data,
1355                                  dirfd,
1356                                  smb_fname->base_name,
1357                                  buf,
1358                                  bufsiz);
1359
1360         DBG_DEBUG("[CEPH] readlinkat(...) = %d\n", result);
1361         WRAP_RETURN(result);
1362 #else
1363         struct smb_filename *full_fname = NULL;
1364
1365         full_fname = full_path_from_dirfsp_atname(talloc_tos(),
1366                                                 dirfsp,
1367                                                 smb_fname);
1368         if (full_fname == NULL) {
1369                 return -1;
1370         }
1371
1372         DBG_DEBUG("[CEPH] readlink(%p, %s, %p, %llu)\n", handle,
1373                         full_fname->base_name, buf, llu(bufsiz));
1374
1375         result = ceph_readlink(handle->data, full_fname->base_name, buf, bufsiz);
1376         TALLOC_FREE(full_fname);
1377         DBG_DEBUG("[CEPH] readlink(...) = %d\n", result);
1378         WRAP_RETURN(result);
1379 #endif
1380 }
1381
1382 static int cephwrap_linkat(struct vfs_handle_struct *handle,
1383                 files_struct *srcfsp,
1384                 const struct smb_filename *old_smb_fname,
1385                 files_struct *dstfsp,
1386                 const struct smb_filename *new_smb_fname,
1387                 int flags)
1388 {
1389         struct smb_filename *full_fname_old = NULL;
1390         struct smb_filename *full_fname_new = NULL;
1391         int result = -1;
1392
1393         full_fname_old = full_path_from_dirfsp_atname(talloc_tos(),
1394                                         srcfsp,
1395                                         old_smb_fname);
1396         if (full_fname_old == NULL) {
1397                 return -1;
1398         }
1399         full_fname_new = full_path_from_dirfsp_atname(talloc_tos(),
1400                                         dstfsp,
1401                                         new_smb_fname);
1402         if (full_fname_new == NULL) {
1403                 TALLOC_FREE(full_fname_old);
1404                 return -1;
1405         }
1406
1407         DBG_DEBUG("[CEPH] link(%p, %s, %s)\n", handle,
1408                         full_fname_old->base_name,
1409                         full_fname_new->base_name);
1410
1411         result = ceph_link(handle->data,
1412                                 full_fname_old->base_name,
1413                                 full_fname_new->base_name);
1414         DBG_DEBUG("[CEPH] link(...) = %d\n", result);
1415         TALLOC_FREE(full_fname_old);
1416         TALLOC_FREE(full_fname_new);
1417         WRAP_RETURN(result);
1418 }
1419
1420 static int cephwrap_mknodat(struct vfs_handle_struct *handle,
1421                 files_struct *dirfsp,
1422                 const struct smb_filename *smb_fname,
1423                 mode_t mode,
1424                 SMB_DEV_T dev)
1425 {
1426         struct smb_filename *full_fname = NULL;
1427         int result = -1;
1428
1429         full_fname = full_path_from_dirfsp_atname(talloc_tos(),
1430                                                 dirfsp,
1431                                                 smb_fname);
1432         if (full_fname == NULL) {
1433                 return -1;
1434         }
1435
1436         DBG_DEBUG("[CEPH] mknodat(%p, %s)\n", handle, full_fname->base_name);
1437         result = ceph_mknod(handle->data, full_fname->base_name, mode, dev);
1438         DBG_DEBUG("[CEPH] mknodat(...) = %d\n", result);
1439
1440         TALLOC_FREE(full_fname);
1441
1442         WRAP_RETURN(result);
1443 }
1444
1445 /*
1446  * This is a simple version of real-path ... a better version is needed to
1447  * ask libcephfs about symbolic links.
1448  */
1449 static struct smb_filename *cephwrap_realpath(struct vfs_handle_struct *handle,
1450                                 TALLOC_CTX *ctx,
1451                                 const struct smb_filename *smb_fname)
1452 {
1453         char *result = NULL;
1454         const char *path = smb_fname->base_name;
1455         size_t len = strlen(path);
1456         struct smb_filename *result_fname = NULL;
1457         int r = -1;
1458
1459         if (len && (path[0] == '/')) {
1460                 r = asprintf(&result, "%s", path);
1461         } else if ((len >= 2) && (path[0] == '.') && (path[1] == '/')) {
1462                 if (len == 2) {
1463                         r = asprintf(&result, "%s",
1464                                         handle->conn->cwd_fsp->fsp_name->base_name);
1465                 } else {
1466                         r = asprintf(&result, "%s/%s",
1467                                         handle->conn->cwd_fsp->fsp_name->base_name, &path[2]);
1468                 }
1469         } else {
1470                 r = asprintf(&result, "%s/%s",
1471                                 handle->conn->cwd_fsp->fsp_name->base_name, path);
1472         }
1473
1474         if (r < 0) {
1475                 return NULL;
1476         }
1477
1478         DBG_DEBUG("[CEPH] realpath(%p, %s) = %s\n", handle, path, result);
1479         result_fname = synthetic_smb_fname(ctx,
1480                                 result,
1481                                 NULL,
1482                                 NULL,
1483                                 0,
1484                                 0);
1485         SAFE_FREE(result);
1486         return result_fname;
1487 }
1488
1489
1490 static int cephwrap_fchflags(struct vfs_handle_struct *handle,
1491                         struct files_struct *fsp,
1492                         unsigned int flags)
1493 {
1494         errno = ENOSYS;
1495         return -1;
1496 }
1497
1498 static NTSTATUS cephwrap_get_real_filename_at(
1499         struct vfs_handle_struct *handle,
1500         struct files_struct *dirfsp,
1501         const char *name,
1502         TALLOC_CTX *mem_ctx,
1503         char **found_name)
1504 {
1505         /*
1506          * Don't fall back to get_real_filename so callers can differentiate
1507          * between a full directory scan and an actual case-insensitive stat.
1508          */
1509         return NT_STATUS_NOT_SUPPORTED;
1510 }
1511
1512 static const char *cephwrap_connectpath(
1513         struct vfs_handle_struct *handle,
1514         const struct files_struct *dirfsp,
1515         const struct smb_filename *smb_fname)
1516 {
1517         return handle->conn->connectpath;
1518 }
1519
1520 /****************************************************************
1521  Extended attribute operations.
1522 *****************************************************************/
1523
1524 static ssize_t cephwrap_fgetxattr(struct vfs_handle_struct *handle,
1525                                   struct files_struct *fsp,
1526                                   const char *name,
1527                                   void *value,
1528                                   size_t size)
1529 {
1530         int ret;
1531         DBG_DEBUG("[CEPH] fgetxattr(%p, %p, %s, %p, %llu)\n",
1532                   handle,
1533                   fsp,
1534                   name,
1535                   value,
1536                   llu(size));
1537         if (!fsp->fsp_flags.is_pathref) {
1538                 ret = ceph_fgetxattr(handle->data,
1539                                      fsp_get_io_fd(fsp),
1540                                      name,
1541                                      value,
1542                                      size);
1543         } else {
1544                 ret = ceph_getxattr(handle->data,
1545                                     fsp->fsp_name->base_name,
1546                                     name,
1547                                     value,
1548                                     size);
1549         }
1550         DBG_DEBUG("[CEPH] fgetxattr(...) = %d\n", ret);
1551         if (ret < 0) {
1552                 WRAP_RETURN(ret);
1553         }
1554         return (ssize_t)ret;
1555 }
1556
1557 static ssize_t cephwrap_flistxattr(struct vfs_handle_struct *handle, struct files_struct *fsp, char *list, size_t size)
1558 {
1559         int ret;
1560         DBG_DEBUG("[CEPH] flistxattr(%p, %p, %p, %llu)\n",
1561                   handle, fsp, list, llu(size));
1562         if (!fsp->fsp_flags.is_pathref) {
1563                 /*
1564                  * We can use an io_fd to list xattrs.
1565                  */
1566                 ret = ceph_flistxattr(handle->data,
1567                                         fsp_get_io_fd(fsp),
1568                                         list,
1569                                         size);
1570         } else {
1571                 /*
1572                  * This is no longer a handle based call.
1573                  */
1574                 ret = ceph_listxattr(handle->data,
1575                                         fsp->fsp_name->base_name,
1576                                         list,
1577                                         size);
1578         }
1579         DBG_DEBUG("[CEPH] flistxattr(...) = %d\n", ret);
1580         if (ret < 0) {
1581                 WRAP_RETURN(ret);
1582         }
1583         return (ssize_t)ret;
1584 }
1585
1586 static int cephwrap_fremovexattr(struct vfs_handle_struct *handle, struct files_struct *fsp, const char *name)
1587 {
1588         int ret;
1589         DBG_DEBUG("[CEPH] fremovexattr(%p, %p, %s)\n", handle, fsp, name);
1590         if (!fsp->fsp_flags.is_pathref) {
1591                 /*
1592                  * We can use an io_fd to remove xattrs.
1593                  */
1594                 ret = ceph_fremovexattr(handle->data, fsp_get_io_fd(fsp), name);
1595         } else {
1596                 /*
1597                  * This is no longer a handle based call.
1598                  */
1599                 ret = ceph_removexattr(handle->data,
1600                                         fsp->fsp_name->base_name,
1601                                         name);
1602         }
1603         DBG_DEBUG("[CEPH] fremovexattr(...) = %d\n", ret);
1604         WRAP_RETURN(ret);
1605 }
1606
1607 static int cephwrap_fsetxattr(struct vfs_handle_struct *handle, struct files_struct *fsp, const char *name, const void *value, size_t size, int flags)
1608 {
1609         int ret;
1610         DBG_DEBUG("[CEPH] fsetxattr(%p, %p, %s, %p, %llu, %d)\n", handle, fsp, name, value, llu(size), flags);
1611         if (!fsp->fsp_flags.is_pathref) {
1612                 /*
1613                  * We can use an io_fd to set xattrs.
1614                  */
1615                 ret = ceph_fsetxattr(handle->data,
1616                                 fsp_get_io_fd(fsp),
1617                                 name,
1618                                 value,
1619                                 size,
1620                                 flags);
1621         } else {
1622                 /*
1623                  * This is no longer a handle based call.
1624                  */
1625                 ret = ceph_setxattr(handle->data,
1626                                 fsp->fsp_name->base_name,
1627                                 name,
1628                                 value,
1629                                 size,
1630                                 flags);
1631         }
1632         DBG_DEBUG("[CEPH] fsetxattr(...) = %d\n", ret);
1633         WRAP_RETURN(ret);
1634 }
1635
1636 static bool cephwrap_aio_force(struct vfs_handle_struct *handle, struct files_struct *fsp)
1637 {
1638
1639         /*
1640          * We do not support AIO yet.
1641          */
1642
1643         DBG_DEBUG("[CEPH] cephwrap_aio_force(%p, %p) = false (errno = ENOTSUP)\n", handle, fsp);
1644         errno = ENOTSUP;
1645         return false;
1646 }
1647
1648 static NTSTATUS cephwrap_create_dfs_pathat(struct vfs_handle_struct *handle,
1649                                 struct files_struct *dirfsp,
1650                                 const struct smb_filename *smb_fname,
1651                                 const struct referral *reflist,
1652                                 size_t referral_count)
1653 {
1654         TALLOC_CTX *frame = talloc_stackframe();
1655         NTSTATUS status = NT_STATUS_NO_MEMORY;
1656         int ret;
1657         char *msdfs_link = NULL;
1658         struct smb_filename *full_fname = NULL;
1659
1660         full_fname = full_path_from_dirfsp_atname(talloc_tos(),
1661                                                 dirfsp,
1662                                                 smb_fname);
1663         if (full_fname == NULL) {
1664                 goto out;
1665         }
1666
1667         /* Form the msdfs_link contents */
1668         msdfs_link = msdfs_link_string(frame,
1669                                         reflist,
1670                                         referral_count);
1671         if (msdfs_link == NULL) {
1672                 goto out;
1673         }
1674
1675         ret = ceph_symlink(handle->data,
1676                         msdfs_link,
1677                         full_fname->base_name);
1678         if (ret == 0) {
1679                 status = NT_STATUS_OK;
1680         } else {
1681                 status = map_nt_error_from_unix(-ret);
1682         }
1683
1684   out:
1685
1686         DBG_DEBUG("[CEPH] create_dfs_pathat(%s) = %s\n",
1687                         full_fname != NULL ? full_fname->base_name : "",
1688                         nt_errstr(status));
1689
1690         TALLOC_FREE(frame);
1691         return status;
1692 }
1693
1694 /*
1695  * Read and return the contents of a DFS redirect given a
1696  * pathname. A caller can pass in NULL for ppreflist and
1697  * preferral_count but still determine if this was a
1698  * DFS redirect point by getting NT_STATUS_OK back
1699  * without incurring the overhead of reading and parsing
1700  * the referral contents.
1701  */
1702
1703 static NTSTATUS cephwrap_read_dfs_pathat(struct vfs_handle_struct *handle,
1704                                 TALLOC_CTX *mem_ctx,
1705                                 struct files_struct *dirfsp,
1706                                 struct smb_filename *smb_fname,
1707                                 struct referral **ppreflist,
1708                                 size_t *preferral_count)
1709 {
1710         NTSTATUS status = NT_STATUS_NO_MEMORY;
1711         size_t bufsize;
1712         char *link_target = NULL;
1713         int referral_len;
1714         bool ok;
1715 #if defined(HAVE_BROKEN_READLINK)
1716         char link_target_buf[PATH_MAX];
1717 #else
1718         char link_target_buf[7];
1719 #endif
1720         struct ceph_statx stx = { 0 };
1721         struct smb_filename *full_fname = NULL;
1722         int ret;
1723
1724         if (is_named_stream(smb_fname)) {
1725                 status = NT_STATUS_OBJECT_NAME_NOT_FOUND;
1726                 goto err;
1727         }
1728
1729         if (ppreflist == NULL && preferral_count == NULL) {
1730                 /*
1731                  * We're only checking if this is a DFS
1732                  * redirect. We don't need to return data.
1733                  */
1734                 bufsize = sizeof(link_target_buf);
1735                 link_target = link_target_buf;
1736         } else {
1737                 bufsize = PATH_MAX;
1738                 link_target = talloc_array(mem_ctx, char, bufsize);
1739                 if (!link_target) {
1740                         goto err;
1741                 }
1742         }
1743
1744         full_fname = full_path_from_dirfsp_atname(talloc_tos(),
1745                                                   dirfsp,
1746                                                   smb_fname);
1747         if (full_fname == NULL) {
1748                 status = NT_STATUS_NO_MEMORY;
1749                 goto err;
1750         }
1751
1752         ret = ceph_statx(handle->data,
1753                          full_fname->base_name,
1754                          &stx,
1755                          SAMBA_STATX_ATTR_MASK,
1756                          AT_SYMLINK_NOFOLLOW);
1757         if (ret < 0) {
1758                 status = map_nt_error_from_unix(-ret);
1759                 goto err;
1760         }
1761
1762         referral_len = ceph_readlink(handle->data,
1763                                 full_fname->base_name,
1764                                 link_target,
1765                                 bufsize - 1);
1766         if (referral_len < 0) {
1767                 /* ceph errors are -errno. */
1768                 if (-referral_len == EINVAL) {
1769                         DBG_INFO("%s is not a link.\n",
1770                                 full_fname->base_name);
1771                         status = NT_STATUS_OBJECT_TYPE_MISMATCH;
1772                 } else {
1773                         status = map_nt_error_from_unix(-referral_len);
1774                         DBG_ERR("Error reading "
1775                                 "msdfs link %s: %s\n",
1776                                 full_fname->base_name,
1777                         strerror(errno));
1778                 }
1779                 goto err;
1780         }
1781         link_target[referral_len] = '\0';
1782
1783         DBG_INFO("%s -> %s\n",
1784                         full_fname->base_name,
1785                         link_target);
1786
1787         if (!strnequal(link_target, "msdfs:", 6)) {
1788                 status = NT_STATUS_OBJECT_TYPE_MISMATCH;
1789                 goto err;
1790         }
1791
1792         if (ppreflist == NULL && preferral_count == NULL) {
1793                 /* Early return for checking if this is a DFS link. */
1794                 TALLOC_FREE(full_fname);
1795                 init_stat_ex_from_ceph_statx(&smb_fname->st, &stx);
1796                 return NT_STATUS_OK;
1797         }
1798
1799         ok = parse_msdfs_symlink(mem_ctx,
1800                         lp_msdfs_shuffle_referrals(SNUM(handle->conn)),
1801                         link_target,
1802                         ppreflist,
1803                         preferral_count);
1804
1805         if (ok) {
1806                 init_stat_ex_from_ceph_statx(&smb_fname->st, &stx);
1807                 status = NT_STATUS_OK;
1808         } else {
1809                 status = NT_STATUS_NO_MEMORY;
1810         }
1811
1812   err:
1813
1814         if (link_target != link_target_buf) {
1815                 TALLOC_FREE(link_target);
1816         }
1817         TALLOC_FREE(full_fname);
1818         return status;
1819 }
1820
1821 static struct vfs_fn_pointers ceph_fns = {
1822         /* Disk operations */
1823
1824         .connect_fn = cephwrap_connect,
1825         .disconnect_fn = cephwrap_disconnect,
1826         .disk_free_fn = cephwrap_disk_free,
1827         .get_quota_fn = cephwrap_get_quota,
1828         .set_quota_fn = cephwrap_set_quota,
1829         .statvfs_fn = cephwrap_statvfs,
1830         .fs_capabilities_fn = cephwrap_fs_capabilities,
1831
1832         /* Directory operations */
1833
1834         .fdopendir_fn = cephwrap_fdopendir,
1835         .readdir_fn = cephwrap_readdir,
1836         .rewind_dir_fn = cephwrap_rewinddir,
1837         .mkdirat_fn = cephwrap_mkdirat,
1838         .closedir_fn = cephwrap_closedir,
1839
1840         /* File operations */
1841
1842         .create_dfs_pathat_fn = cephwrap_create_dfs_pathat,
1843         .read_dfs_pathat_fn = cephwrap_read_dfs_pathat,
1844         .openat_fn = cephwrap_openat,
1845         .close_fn = cephwrap_close,
1846         .pread_fn = cephwrap_pread,
1847         .pread_send_fn = cephwrap_pread_send,
1848         .pread_recv_fn = cephwrap_pread_recv,
1849         .pwrite_fn = cephwrap_pwrite,
1850         .pwrite_send_fn = cephwrap_pwrite_send,
1851         .pwrite_recv_fn = cephwrap_pwrite_recv,
1852         .lseek_fn = cephwrap_lseek,
1853         .sendfile_fn = cephwrap_sendfile,
1854         .recvfile_fn = cephwrap_recvfile,
1855         .renameat_fn = cephwrap_renameat,
1856         .fsync_send_fn = cephwrap_fsync_send,
1857         .fsync_recv_fn = cephwrap_fsync_recv,
1858         .stat_fn = cephwrap_stat,
1859         .fstat_fn = cephwrap_fstat,
1860         .lstat_fn = cephwrap_lstat,
1861         .unlinkat_fn = cephwrap_unlinkat,
1862         .fchmod_fn = cephwrap_fchmod,
1863         .fchown_fn = cephwrap_fchown,
1864         .lchown_fn = cephwrap_lchown,
1865         .chdir_fn = cephwrap_chdir,
1866         .getwd_fn = cephwrap_getwd,
1867         .fntimes_fn = cephwrap_fntimes,
1868         .ftruncate_fn = cephwrap_ftruncate,
1869         .fallocate_fn = cephwrap_fallocate,
1870         .lock_fn = cephwrap_lock,
1871         .filesystem_sharemode_fn = cephwrap_filesystem_sharemode,
1872         .fcntl_fn = cephwrap_fcntl,
1873         .linux_setlease_fn = cephwrap_linux_setlease,
1874         .getlock_fn = cephwrap_getlock,
1875         .symlinkat_fn = cephwrap_symlinkat,
1876         .readlinkat_fn = cephwrap_readlinkat,
1877         .linkat_fn = cephwrap_linkat,
1878         .mknodat_fn = cephwrap_mknodat,
1879         .realpath_fn = cephwrap_realpath,
1880         .fchflags_fn = cephwrap_fchflags,
1881         .get_real_filename_at_fn = cephwrap_get_real_filename_at,
1882         .connectpath_fn = cephwrap_connectpath,
1883
1884         /* EA operations. */
1885         .getxattrat_send_fn = vfs_not_implemented_getxattrat_send,
1886         .getxattrat_recv_fn = vfs_not_implemented_getxattrat_recv,
1887         .fgetxattr_fn = cephwrap_fgetxattr,
1888         .flistxattr_fn = cephwrap_flistxattr,
1889         .fremovexattr_fn = cephwrap_fremovexattr,
1890         .fsetxattr_fn = cephwrap_fsetxattr,
1891
1892         /* Posix ACL Operations */
1893         .sys_acl_get_fd_fn = posixacl_xattr_acl_get_fd,
1894         .sys_acl_blob_get_fd_fn = posix_sys_acl_blob_get_fd,
1895         .sys_acl_set_fd_fn = posixacl_xattr_acl_set_fd,
1896         .sys_acl_delete_def_fd_fn = posixacl_xattr_acl_delete_def_fd,
1897
1898         /* aio operations */
1899         .aio_force_fn = cephwrap_aio_force,
1900 };
1901
1902 static_decl_vfs;
1903 NTSTATUS vfs_ceph_init(TALLOC_CTX *ctx)
1904 {
1905         return smb_register_vfs(SMB_VFS_INTERFACE_VERSION,
1906                                 "ceph", &ceph_fns);
1907 }