2 * Module to make use of awesome Btrfs features
4 * Copyright (C) David Disseldorp 2011-2013
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 3 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
20 #include <linux/ioctl.h>
22 #include <sys/ioctl.h>
27 #include "system/filesys.h"
29 #include "smbd/smbd.h"
30 #include "smbd/globals.h"
31 #include "librpc/gen_ndr/smbXsrv.h"
32 #include "librpc/gen_ndr/ioctl.h"
33 #include "lib/util/tevent_ntstatus.h"
34 #include "offload_token.h"
36 static uint32_t btrfs_fs_capabilities(struct vfs_handle_struct *handle,
37 enum timestamp_set_resolution *_ts_res)
39 uint32_t fs_capabilities;
40 enum timestamp_set_resolution ts_res;
42 /* inherit default capabilities, expose compression support */
43 fs_capabilities = SMB_VFS_NEXT_FS_CAPABILITIES(handle, &ts_res);
44 fs_capabilities |= (FILE_FILE_COMPRESSION
45 | FILE_SUPPORTS_BLOCK_REFCOUNTING);
48 return fs_capabilities;
51 #define SHADOW_COPY_PREFIX "@GMT-" /* vfs_shadow_copy format */
52 #define SHADOW_COPY_PATH_FORMAT "@GMT-%Y.%m.%d-%H.%M.%S"
54 #define BTRFS_SUBVOL_RDONLY (1ULL << 1)
55 #define BTRFS_SUBVOL_NAME_MAX 4039
56 #define BTRFS_PATH_NAME_MAX 4087
57 struct btrfs_ioctl_vol_args_v2 {
62 char name[BTRFS_SUBVOL_NAME_MAX + 1];
64 struct btrfs_ioctl_vol_args {
66 char name[BTRFS_PATH_NAME_MAX + 1];
69 struct btrfs_ioctl_clone_range_args {
76 #define BTRFS_IOCTL_MAGIC 0x94
77 #define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \
78 struct btrfs_ioctl_clone_range_args)
79 #define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \
80 struct btrfs_ioctl_vol_args)
81 #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
82 struct btrfs_ioctl_vol_args_v2)
84 static struct vfs_offload_ctx *btrfs_offload_ctx;
86 struct btrfs_offload_read_state {
87 struct vfs_handle_struct *handle;
92 static void btrfs_offload_read_done(struct tevent_req *subreq);
94 static struct tevent_req *btrfs_offload_read_send(
96 struct tevent_context *ev,
97 struct vfs_handle_struct *handle,
104 struct tevent_req *req = NULL;
105 struct tevent_req *subreq = NULL;
106 struct btrfs_offload_read_state *state = NULL;
109 req = tevent_req_create(mem_ctx, &state,
110 struct btrfs_offload_read_state);
114 *state = (struct btrfs_offload_read_state) {
119 status = vfs_offload_token_ctx_init(fsp->conn->sconn->client,
121 if (tevent_req_nterror(req, status)) {
122 return tevent_req_post(req, ev);
125 if (fsctl == FSCTL_DUP_EXTENTS_TO_FILE) {
126 status = vfs_offload_token_create_blob(state, fsp, fsctl,
128 if (tevent_req_nterror(req, status)) {
129 return tevent_req_post(req, ev);
132 status = vfs_offload_token_db_store_fsp(btrfs_offload_ctx, fsp,
134 if (tevent_req_nterror(req, status)) {
135 return tevent_req_post(req, ev);
137 tevent_req_done(req);
138 return tevent_req_post(req, ev);
141 subreq = SMB_VFS_NEXT_OFFLOAD_READ_SEND(mem_ctx, ev, handle, fsp,
142 fsctl, ttl, offset, to_copy);
143 if (tevent_req_nomem(subreq, req)) {
144 return tevent_req_post(req, ev);
146 tevent_req_set_callback(subreq, btrfs_offload_read_done, req);
150 static void btrfs_offload_read_done(struct tevent_req *subreq)
152 struct tevent_req *req = tevent_req_callback_data(
153 subreq, struct tevent_req);
154 struct btrfs_offload_read_state *state = tevent_req_data(
155 req, struct btrfs_offload_read_state);
158 status = SMB_VFS_NEXT_OFFLOAD_READ_RECV(subreq,
163 if (tevent_req_nterror(req, status)) {
167 status = vfs_offload_token_db_store_fsp(btrfs_offload_ctx,
170 if (tevent_req_nterror(req, status)) {
174 tevent_req_done(req);
178 static NTSTATUS btrfs_offload_read_recv(struct tevent_req *req,
179 struct vfs_handle_struct *handle,
183 struct btrfs_offload_read_state *state = tevent_req_data(
184 req, struct btrfs_offload_read_state);
187 if (tevent_req_is_nterror(req, &status)) {
188 tevent_req_received(req);
192 token->length = state->token.length;
193 token->data = talloc_move(mem_ctx, &state->token.data);
195 tevent_req_received(req);
199 struct btrfs_cc_state {
200 struct vfs_handle_struct *handle;
202 struct tevent_req *subreq; /* non-null if passed to next VFS fn */
204 static void btrfs_offload_write_done(struct tevent_req *subreq);
206 static struct tevent_req *btrfs_offload_write_send(struct vfs_handle_struct *handle,
208 struct tevent_context *ev,
211 off_t transfer_offset,
212 struct files_struct *dest_fsp,
216 struct tevent_req *req;
217 struct btrfs_cc_state *cc_state;
218 struct btrfs_ioctl_clone_range_args cr_args;
219 struct lock_struct src_lck;
220 struct lock_struct dest_lck;
221 off_t src_off = transfer_offset;
222 files_struct *src_fsp = NULL;
224 bool handle_offload_write = true;
225 bool do_locking = false;
228 req = tevent_req_create(mem_ctx, &cc_state, struct btrfs_cc_state);
233 cc_state->handle = handle;
235 status = vfs_offload_token_db_fetch_fsp(btrfs_offload_ctx,
237 if (tevent_req_nterror(req, status)) {
238 return tevent_req_post(req, ev);
242 case FSCTL_SRV_COPYCHUNK:
243 case FSCTL_SRV_COPYCHUNK_WRITE:
247 case FSCTL_DUP_EXTENTS_TO_FILE:
248 /* dup extents does not use locking */
252 handle_offload_write = false;
258 * With a @src_length of zero, BTRFS_IOC_CLONE_RANGE clones
259 * all data from @src_offset->EOF! This is certainly not what
260 * the caller expects, and not what vfs_default does.
262 handle_offload_write = false;
265 if (!handle_offload_write) {
266 cc_state->subreq = SMB_VFS_NEXT_OFFLOAD_WRITE_SEND(handle,
274 if (tevent_req_nomem(cc_state->subreq, req)) {
275 return tevent_req_post(req, ev);
277 tevent_req_set_callback(cc_state->subreq,
278 btrfs_offload_write_done,
283 status = vfs_offload_token_check_handles(
284 fsctl, src_fsp, dest_fsp);
285 if (!NT_STATUS_IS_OK(status)) {
286 tevent_req_nterror(req, status);
287 return tevent_req_post(req, ev);
290 status = vfs_stat_fsp(src_fsp);
291 if (tevent_req_nterror(req, status)) {
292 return tevent_req_post(req, ev);
295 if (src_fsp->fsp_name->st.st_ex_size < src_off + num) {
296 /* [MS-SMB2] Handling a Server-Side Data Copy Request */
297 tevent_req_nterror(req, NT_STATUS_INVALID_VIEW_SIZE);
298 return tevent_req_post(req, ev);
301 if (src_fsp->op == NULL || dest_fsp->op == NULL) {
302 tevent_req_nterror(req, NT_STATUS_INTERNAL_ERROR);
303 return tevent_req_post(req, ev);
307 init_strict_lock_struct(src_fsp,
308 src_fsp->op->global->open_persistent_id,
313 init_strict_lock_struct(dest_fsp,
314 dest_fsp->op->global->open_persistent_id,
320 if (!SMB_VFS_STRICT_LOCK_CHECK(src_fsp->conn, src_fsp, &src_lck)) {
321 tevent_req_nterror(req, NT_STATUS_FILE_LOCK_CONFLICT);
322 return tevent_req_post(req, ev);
324 if (!SMB_VFS_STRICT_LOCK_CHECK(dest_fsp->conn, dest_fsp, &dest_lck)) {
325 tevent_req_nterror(req, NT_STATUS_FILE_LOCK_CONFLICT);
326 return tevent_req_post(req, ev);
330 ZERO_STRUCT(cr_args);
331 cr_args.src_fd = src_fsp->fh->fd;
332 cr_args.src_offset = (uint64_t)src_off;
333 cr_args.dest_offset = (uint64_t)dest_off;
334 cr_args.src_length = (uint64_t)num;
336 ret = ioctl(dest_fsp->fh->fd, BTRFS_IOC_CLONE_RANGE, &cr_args);
339 * BTRFS_IOC_CLONE_RANGE only supports 'sectorsize' aligned
340 * cloning. Which is 4096 by default, therefore fall back to
341 * manual read/write on failure.
343 DEBUG(5, ("BTRFS_IOC_CLONE_RANGE failed: %s, length %llu, "
344 "src fd: %lld off: %llu, dest fd: %d off: %llu\n",
346 (unsigned long long)cr_args.src_length,
347 (long long)cr_args.src_fd,
348 (unsigned long long)cr_args.src_offset,
350 (unsigned long long)cr_args.dest_offset));
351 cc_state->subreq = SMB_VFS_NEXT_OFFLOAD_WRITE_SEND(handle,
359 if (tevent_req_nomem(cc_state->subreq, req)) {
360 return tevent_req_post(req, ev);
362 /* wait for subreq completion */
363 tevent_req_set_callback(cc_state->subreq,
364 btrfs_offload_write_done,
369 DEBUG(5, ("BTRFS_IOC_CLONE_RANGE returned %d\n", ret));
370 /* BTRFS_IOC_CLONE_RANGE is all or nothing */
371 cc_state->copied = num;
372 tevent_req_done(req);
373 return tevent_req_post(req, ev);
376 /* only used if the request is passed through to next VFS module */
377 static void btrfs_offload_write_done(struct tevent_req *subreq)
379 struct tevent_req *req = tevent_req_callback_data(
380 subreq, struct tevent_req);
381 struct btrfs_cc_state *cc_state = tevent_req_data(req,
382 struct btrfs_cc_state);
385 status = SMB_VFS_NEXT_OFFLOAD_WRITE_RECV(cc_state->handle,
388 if (tevent_req_nterror(req, status)) {
391 tevent_req_done(req);
394 static NTSTATUS btrfs_offload_write_recv(struct vfs_handle_struct *handle,
395 struct tevent_req *req,
399 struct btrfs_cc_state *cc_state = tevent_req_data(req,
400 struct btrfs_cc_state);
402 if (tevent_req_is_nterror(req, &status)) {
403 DEBUG(4, ("server side copy chunk failed: %s\n",
405 tevent_req_received(req);
409 DEBUG(10, ("server side copy chunk copied %llu\n",
410 (unsigned long long)cc_state->copied));
411 *copied = cc_state->copied;
412 tevent_req_received(req);
417 * caller must pass a non-null fsp or smb_fname. If fsp is null, then
418 * fall back to opening the corresponding file to issue the ioctl.
420 static NTSTATUS btrfs_get_compression(struct vfs_handle_struct *handle,
422 struct files_struct *fsp,
423 struct smb_filename *smb_fname,
424 uint16_t *_compression_fmt)
433 if ((fsp != NULL) && (fsp->fh->fd != -1)) {
435 } else if (smb_fname != NULL) {
436 if (S_ISDIR(smb_fname->st.st_ex_mode)) {
437 dir = opendir(smb_fname->base_name);
439 return NT_STATUS_UNSUCCESSFUL;
444 status = NT_STATUS_UNSUCCESSFUL;
448 fd = open(smb_fname->base_name, O_RDONLY);
450 return NT_STATUS_UNSUCCESSFUL;
455 return NT_STATUS_INVALID_PARAMETER;
458 ret = ioctl(fd, FS_IOC_GETFLAGS, &flags);
460 DEBUG(1, ("FS_IOC_GETFLAGS failed: %s, fd %lld\n",
461 strerror(errno), (long long)fd));
462 status = map_nt_error_from_unix(errno);
465 if (flags & FS_COMPR_FL) {
466 *_compression_fmt = COMPRESSION_FORMAT_LZNT1;
468 *_compression_fmt = COMPRESSION_FORMAT_NONE;
470 status = NT_STATUS_OK;
483 static NTSTATUS btrfs_set_compression(struct vfs_handle_struct *handle,
485 struct files_struct *fsp,
486 uint16_t compression_fmt)
493 if ((fsp == NULL) || (fsp->fh->fd == -1)) {
494 status = NT_STATUS_INVALID_PARAMETER;
499 ret = ioctl(fd, FS_IOC_GETFLAGS, &flags);
501 DEBUG(1, ("FS_IOC_GETFLAGS failed: %s, fd %d\n",
502 strerror(errno), fd));
503 status = map_nt_error_from_unix(errno);
507 if (compression_fmt == COMPRESSION_FORMAT_NONE) {
508 DEBUG(5, ("setting compression\n"));
509 flags &= (~FS_COMPR_FL);
510 } else if ((compression_fmt == COMPRESSION_FORMAT_DEFAULT)
511 || (compression_fmt == COMPRESSION_FORMAT_LZNT1)) {
512 DEBUG(5, ("clearing compression\n"));
513 flags |= FS_COMPR_FL;
515 DEBUG(1, ("invalid compression format 0x%x\n",
516 (int)compression_fmt));
517 status = NT_STATUS_INVALID_PARAMETER;
521 ret = ioctl(fd, FS_IOC_SETFLAGS, &flags);
523 DEBUG(1, ("FS_IOC_SETFLAGS failed: %s, fd %d\n",
524 strerror(errno), fd));
525 status = map_nt_error_from_unix(errno);
528 status = NT_STATUS_OK;
534 * Check whether a path can be shadow copied. Return the base volume, allowing
535 * the caller to determine if multiple paths lie on the same base volume.
537 #define BTRFS_INODE_SUBVOL 256
538 static NTSTATUS btrfs_snap_check_path(struct vfs_handle_struct *handle,
540 const char *service_path,
546 if (!lp_parm_bool(SNUM(handle->conn),
547 "btrfs", "manipulate snapshots", false)) {
548 DEBUG(2, ("Btrfs snapshot manipulation disabled, passing\n"));
549 return SMB_VFS_NEXT_SNAP_CHECK_PATH(handle, mem_ctx,
550 service_path, base_volume);
553 /* btrfs userspace uses this logic to confirm subvolume */
554 if (stat(service_path, &st) < 0) {
555 return NT_STATUS_NOT_SUPPORTED;
557 if ((st.st_ino != BTRFS_INODE_SUBVOL) || !S_ISDIR(st.st_mode)) {
558 DEBUG(0, ("%s not a btrfs subvolume, snapshots not available\n",
560 return NT_STATUS_NOT_SUPPORTED;
563 /* we "snapshot" the service path itself */
564 base = talloc_strdup(mem_ctx, service_path);
566 return NT_STATUS_NO_MEMORY;
573 static NTSTATUS btrfs_gen_snap_dest_path(TALLOC_CTX *mem_ctx,
574 const char *src_path,
576 char **dest_path, char **subvolume)
582 gmtime_r(tstamp, &t_gmt);
584 tlen = strftime(time_str, ARRAY_SIZE(time_str),
585 SHADOW_COPY_PATH_FORMAT, &t_gmt);
587 return NT_STATUS_UNSUCCESSFUL;
590 *dest_path = talloc_strdup(mem_ctx, src_path);
591 *subvolume = talloc_strdup(mem_ctx, time_str);
592 if ((*dest_path == NULL) || (*subvolume == NULL)) {
593 return NT_STATUS_NO_MEMORY;
599 static NTSTATUS btrfs_snap_create(struct vfs_handle_struct *handle,
601 const char *base_volume,
607 struct btrfs_ioctl_vol_args_v2 ioctl_arg;
612 char *dest_path = NULL;
613 char *dest_subvolume = NULL;
622 if (!lp_parm_bool(SNUM(handle->conn),
623 "btrfs", "manipulate snapshots", false)) {
624 DEBUG(2, ("Btrfs snapshot manipulation disabled, passing\n"));
625 return SMB_VFS_NEXT_SNAP_CREATE(handle, mem_ctx, base_volume,
626 tstamp, rw, _base_path,
630 tmp_ctx = talloc_new(mem_ctx);
631 if (tmp_ctx == NULL) {
632 return NT_STATUS_NO_MEMORY;
635 base_path = talloc_strdup(tmp_ctx, base_volume);
636 if (base_path == NULL) {
637 talloc_free(tmp_ctx);
638 return NT_STATUS_NO_MEMORY;
641 status = btrfs_gen_snap_dest_path(tmp_ctx, base_volume, tstamp,
642 &dest_path, &dest_subvolume);
643 if (!NT_STATUS_IS_OK(status)) {
644 talloc_free(tmp_ctx);
648 snap_path = talloc_asprintf(tmp_ctx, "%s/%s", dest_path,
650 if (snap_path == NULL) {
651 talloc_free(tmp_ctx);
652 return NT_STATUS_NO_MEMORY;
655 src_dir = opendir(base_volume);
656 if (src_dir == NULL) {
657 DEBUG(0, ("snap src %s open failed: %s\n",
658 base_volume, strerror(errno)));
659 status = map_nt_error_from_unix(errno);
660 talloc_free(tmp_ctx);
663 src_fd = dirfd(src_dir);
665 status = map_nt_error_from_unix(errno);
667 talloc_free(tmp_ctx);
671 dest_dir = opendir(dest_path);
672 if (dest_dir == NULL) {
673 DEBUG(0, ("snap dest %s open failed: %s\n",
674 dest_path, strerror(errno)));
675 status = map_nt_error_from_unix(errno);
677 talloc_free(tmp_ctx);
680 dest_fd = dirfd(dest_dir);
682 status = map_nt_error_from_unix(errno);
685 talloc_free(tmp_ctx);
689 /* avoid zeroing the entire struct here, name is 4k */
690 ioctl_arg.fd = src_fd;
691 ioctl_arg.transid = 0;
692 ioctl_arg.flags = (rw == false) ? BTRFS_SUBVOL_RDONLY : 0;
693 memset(ioctl_arg.unused, 0, sizeof(ioctl_arg.unused));
694 len = strlcpy(ioctl_arg.name, dest_subvolume,
695 ARRAY_SIZE(ioctl_arg.name));
696 if (len >= ARRAY_SIZE(ioctl_arg.name)) {
697 DEBUG(1, ("subvolume name too long for SNAP_CREATE ioctl\n"));
700 talloc_free(tmp_ctx);
701 return NT_STATUS_INVALID_PARAMETER;
705 ret = ioctl(dest_fd, BTRFS_IOC_SNAP_CREATE_V2, &ioctl_arg);
709 DEBUG(0, ("%s -> %s(%s) BTRFS_IOC_SNAP_CREATE_V2 failed: %s\n",
710 base_volume, dest_path, dest_subvolume,
711 strerror(saved_errno)));
712 status = map_nt_error_from_unix(saved_errno);
715 talloc_free(tmp_ctx);
718 DEBUG(5, ("%s -> %s(%s) BTRFS_IOC_SNAP_CREATE_V2 done\n",
719 base_volume, dest_path, dest_subvolume));
721 *_base_path = talloc_steal(mem_ctx, base_path);
722 *_snap_path = talloc_steal(mem_ctx, snap_path);
725 talloc_free(tmp_ctx);
730 static NTSTATUS btrfs_snap_delete(struct vfs_handle_struct *handle,
739 struct btrfs_ioctl_vol_args ioctl_arg;
748 if (!lp_parm_bool(SNUM(handle->conn),
749 "btrfs", "manipulate snapshots", false)) {
750 DEBUG(2, ("Btrfs snapshot manipulation disabled, passing\n"));
751 return SMB_VFS_NEXT_SNAP_DELETE(handle, mem_ctx,
752 base_path, snap_path);
755 tmp_ctx = talloc_new(mem_ctx);
756 if (tmp_ctx == NULL) {
757 return NT_STATUS_NO_MEMORY;
760 dest_path = talloc_strdup(tmp_ctx, snap_path);
761 if (dest_path == NULL) {
762 talloc_free(tmp_ctx);
763 return NT_STATUS_NO_MEMORY;
765 subvolume = talloc_strdup(tmp_ctx, snap_path);
766 if (subvolume == NULL) {
767 talloc_free(tmp_ctx);
768 return NT_STATUS_NO_MEMORY;
770 dest_path = dirname(dest_path);
771 subvolume = basename(subvolume);
773 /* confirm snap_path matches creation format */
774 tstr = strptime(subvolume, SHADOW_COPY_PATH_FORMAT, &t_gmt);
775 if ((tstr == NULL) || (*tstr != '\0')) {
776 DEBUG(0, ("snapshot path %s does not match creation format\n",
778 talloc_free(tmp_ctx);
779 return NT_STATUS_UNSUCCESSFUL;
782 dest_dir = opendir(dest_path);
783 if (dest_dir == NULL) {
784 DEBUG(0, ("snap destroy dest %s open failed: %s\n",
785 dest_path, strerror(errno)));
786 status = map_nt_error_from_unix(errno);
787 talloc_free(tmp_ctx);
790 dest_fd = dirfd(dest_dir);
792 status = map_nt_error_from_unix(errno);
794 talloc_free(tmp_ctx);
798 ioctl_arg.fd = -1; /* not needed */
799 len = strlcpy(ioctl_arg.name, subvolume, ARRAY_SIZE(ioctl_arg.name));
800 if (len >= ARRAY_SIZE(ioctl_arg.name)) {
801 DEBUG(1, ("subvolume name too long for SNAP_DESTROY ioctl\n"));
803 talloc_free(tmp_ctx);
804 return NT_STATUS_INVALID_PARAMETER;
808 ret = ioctl(dest_fd, BTRFS_IOC_SNAP_DESTROY, &ioctl_arg);
812 DEBUG(0, ("%s(%s) BTRFS_IOC_SNAP_DESTROY failed: %s\n",
813 dest_path, subvolume, strerror(saved_errno)));
814 status = map_nt_error_from_unix(saved_errno);
816 talloc_free(tmp_ctx);
819 DEBUG(5, ("%s(%s) BTRFS_IOC_SNAP_DESTROY done\n",
820 dest_path, subvolume));
823 talloc_free(tmp_ctx);
827 static struct vfs_fn_pointers btrfs_fns = {
828 .fs_capabilities_fn = btrfs_fs_capabilities,
829 .offload_read_send_fn = btrfs_offload_read_send,
830 .offload_read_recv_fn = btrfs_offload_read_recv,
831 .offload_write_send_fn = btrfs_offload_write_send,
832 .offload_write_recv_fn = btrfs_offload_write_recv,
833 .get_compression_fn = btrfs_get_compression,
834 .set_compression_fn = btrfs_set_compression,
835 .snap_check_path_fn = btrfs_snap_check_path,
836 .snap_create_fn = btrfs_snap_create,
837 .snap_delete_fn = btrfs_snap_delete,
840 NTSTATUS vfs_btrfs_init(TALLOC_CTX *);
841 NTSTATUS vfs_btrfs_init(TALLOC_CTX *ctx)
843 return smb_register_vfs(SMB_VFS_INTERFACE_VERSION,
844 "btrfs", &btrfs_fns);