s3: VFS: Change SMB_VFS_SYS_ACL_GET_FILE to use const struct smb_filename * instead...
[samba.git] / source3 / smbd / open.c
index d05c9ec199a94aa97e9157beb767ac4547958c7e..8fdc96376d87d3da45ad33423fee09929a15282a 100644 (file)
@@ -4,6 +4,7 @@
    Copyright (C) Andrew Tridgell 1992-1998
    Copyright (C) Jeremy Allison 2001-2004
    Copyright (C) Volker Lendecke 2005
+   Copyright (C) Ralph Boehme 2017
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -21,6 +22,7 @@
 
 #include "includes.h"
 #include "system/filesys.h"
+#include "lib/util/server_id.h"
 #include "printing.h"
 #include "smbd/smbd.h"
 #include "smbd/globals.h"
@@ -35,6 +37,8 @@
 #include "serverid.h"
 #include "messages.h"
 #include "source3/lib/dbwrap/dbwrap_watch.h"
+#include "locking/leases_db.h"
+#include "librpc/gen_ndr/ndr_leases_db.h"
 
 extern const struct generic_mapping file_generic_mapping;
 
@@ -42,6 +46,13 @@ struct deferred_open_record {
         bool delayed_for_oplocks;
        bool async_open;
         struct file_id id;
+
+       /*
+        * Timer for async opens, needed because they don't use a watch on
+        * a locking.tdb record. This is currently only used for real async
+        * opens and just terminates smbd if the async open times out.
+        */
+       struct tevent_timer *te;
 };
 
 /****************************************************************************
@@ -117,7 +128,7 @@ NTSTATUS smbd_check_access_rights(struct connection_struct *conn,
                return NT_STATUS_OK;
        }
 
-       status = SMB_VFS_GET_NT_ACL(conn, smb_fname->base_name,
+       status = SMB_VFS_GET_NT_ACL(conn, smb_fname,
                        (SECINFO_OWNER |
                        SECINFO_GROUP |
                         SECINFO_DACL), talloc_tos(), &sd);
@@ -151,7 +162,7 @@ NTSTATUS smbd_check_access_rights(struct connection_struct *conn,
         * Samba 3.6 and earlier granted execute access even
         * if the ACL did not contain execute rights.
         * Samba 4.0 is more correct and checks it.
-        * The compatibilty mode allows to skip this check
+        * The compatibilty mode allows one to skip this check
         * to smoothen upgrades.
         */
        if (lp_acl_allow_execute_always(SNUM(conn))) {
@@ -233,7 +244,7 @@ NTSTATUS smbd_check_access_rights(struct connection_struct *conn,
        return NT_STATUS_OK;
 }
 
-static NTSTATUS check_parent_access(struct connection_struct *conn,
+NTSTATUS check_parent_access(struct connection_struct *conn,
                                struct smb_filename *smb_fname,
                                uint32_t access_mask)
 {
@@ -241,6 +252,7 @@ static NTSTATUS check_parent_access(struct connection_struct *conn,
        char *parent_dir = NULL;
        struct security_descriptor *parent_sd = NULL;
        uint32_t access_granted = 0;
+       struct smb_filename *parent_smb_fname = NULL;
 
        if (!parent_dirname(talloc_tos(),
                                smb_fname->base_name,
@@ -249,6 +261,15 @@ static NTSTATUS check_parent_access(struct connection_struct *conn,
                return NT_STATUS_NO_MEMORY;
        }
 
+       parent_smb_fname = synthetic_smb_fname(talloc_tos(),
+                               parent_dir,
+                               NULL,
+                               NULL,
+                               smb_fname->flags);
+       if (parent_smb_fname == NULL) {
+               return NT_STATUS_NO_MEMORY;
+       }
+
        if (get_current_uid(conn) == (uid_t)0) {
                /* I'm sorry sir, I didn't know you were root... */
                DEBUG(10,("check_parent_access: root override "
@@ -259,7 +280,7 @@ static NTSTATUS check_parent_access(struct connection_struct *conn,
        }
 
        status = SMB_VFS_GET_NT_ACL(conn,
-                               parent_dir,
+                               parent_smb_fname,
                                SECINFO_DACL,
                                    talloc_tos(),
                                &parent_sd);
@@ -342,6 +363,269 @@ static NTSTATUS check_base_file_access(struct connection_struct *conn,
                                        access_mask);
 }
 
+/****************************************************************************
+ Handle differing symlink errno's
+****************************************************************************/
+
+static int link_errno_convert(int err)
+{
+#if defined(ENOTSUP) && defined(OSF1)
+       /* handle special Tru64 errno */
+       if (err == ENOTSUP) {
+               err = ELOOP;
+       }
+#endif /* ENOTSUP */
+#ifdef EFTYPE
+       /* fix broken NetBSD errno */
+       if (err == EFTYPE) {
+               err = ELOOP;
+       }
+#endif /* EFTYPE */
+       /* fix broken FreeBSD errno */
+       if (err == EMLINK) {
+               err = ELOOP;
+       }
+       return err;
+}
+
+static int non_widelink_open(struct connection_struct *conn,
+                       const char *conn_rootdir,
+                       files_struct *fsp,
+                       struct smb_filename *smb_fname,
+                       int flags,
+                       mode_t mode,
+                       unsigned int link_depth);
+
+/****************************************************************************
+ Follow a symlink in userspace.
+****************************************************************************/
+
+static int process_symlink_open(struct connection_struct *conn,
+                       const char *conn_rootdir,
+                       files_struct *fsp,
+                       struct smb_filename *smb_fname,
+                       int flags,
+                       mode_t mode,
+                       unsigned int link_depth)
+{
+       int fd = -1;
+       char *link_target = NULL;
+       int link_len = -1;
+       char *oldwd = NULL;
+       size_t rootdir_len = 0;
+       char *resolved_name = NULL;
+       bool matched = false;
+       int saved_errno = 0;
+
+       /*
+        * Ensure we don't get stuck in a symlink loop.
+        */
+       link_depth++;
+       if (link_depth >= 20) {
+               errno = ELOOP;
+               goto out;
+       }
+
+       /* Allocate space for the link target. */
+       link_target = talloc_array(talloc_tos(), char, PATH_MAX);
+       if (link_target == NULL) {
+               errno = ENOMEM;
+               goto out;
+       }
+
+       /* Read the link target. */
+       link_len = SMB_VFS_READLINK(conn,
+                               smb_fname->base_name,
+                               link_target,
+                               PATH_MAX - 1);
+       if (link_len == -1) {
+               goto out;
+       }
+
+       /* Ensure it's at least null terminated. */
+       link_target[link_len] = '\0';
+
+       /* Convert to an absolute path. */
+       resolved_name = SMB_VFS_REALPATH(conn, link_target);
+       if (resolved_name == NULL) {
+               goto out;
+       }
+
+       /*
+        * We know conn_rootdir starts with '/' and
+        * does not end in '/'. FIXME ! Should we
+        * smb_assert this ?
+        */
+       rootdir_len = strlen(conn_rootdir);
+
+       matched = (strncmp(conn_rootdir, resolved_name, rootdir_len) == 0);
+       if (!matched) {
+               errno = EACCES;
+               goto out;
+       }
+
+       /*
+        * Turn into a path relative to the share root.
+        */
+       if (resolved_name[rootdir_len] == '\0') {
+               /* Link to the root of the share. */
+               smb_fname->base_name = talloc_strdup(talloc_tos(), ".");
+               if (smb_fname->base_name == NULL) {
+                       errno = ENOMEM;
+                       goto out;
+               }
+       } else if (resolved_name[rootdir_len] == '/') {
+               smb_fname->base_name = &resolved_name[rootdir_len+1];
+       } else {
+               errno = EACCES;
+               goto out;
+       }
+
+       oldwd = vfs_GetWd(talloc_tos(), conn);
+       if (oldwd == NULL) {
+               goto out;
+       }
+
+       /* Ensure we operate from the root of the share. */
+       if (vfs_ChDir(conn, conn_rootdir) == -1) {
+               goto out;
+       }
+
+       /* And do it all again.. */
+       fd = non_widelink_open(conn,
+                               conn_rootdir,
+                               fsp,
+                               smb_fname,
+                               flags,
+                               mode,
+                               link_depth);
+       if (fd == -1) {
+               saved_errno = errno;
+       }
+
+  out:
+
+       SAFE_FREE(resolved_name);
+       TALLOC_FREE(link_target);
+       if (oldwd != NULL) {
+               int ret = vfs_ChDir(conn, oldwd);
+               if (ret == -1) {
+                       smb_panic("unable to get back to old directory\n");
+               }
+               TALLOC_FREE(oldwd);
+       }
+       if (saved_errno != 0) {
+               errno = saved_errno;
+       }
+       return fd;
+}
+
+/****************************************************************************
+ Non-widelink open.
+****************************************************************************/
+
+static int non_widelink_open(struct connection_struct *conn,
+                       const char *conn_rootdir,
+                       files_struct *fsp,
+                       struct smb_filename *smb_fname,
+                       int flags,
+                       mode_t mode,
+                       unsigned int link_depth)
+{
+       NTSTATUS status;
+       int fd = -1;
+       struct smb_filename *smb_fname_rel = NULL;
+       int saved_errno = 0;
+       char *oldwd = NULL;
+       char *parent_dir = NULL;
+       const char *final_component = NULL;
+
+       if (!parent_dirname(talloc_tos(),
+                       smb_fname->base_name,
+                       &parent_dir,
+                       &final_component)) {
+               goto out;
+       }
+
+       oldwd = vfs_GetWd(talloc_tos(), conn);
+       if (oldwd == NULL) {
+               goto out;
+       }
+
+       /* Pin parent directory in place. */
+       if (vfs_ChDir(conn, parent_dir) == -1) {
+               goto out;
+       }
+
+       /* Ensure the relative path is below the share. */
+       status = check_reduced_name(conn, parent_dir, final_component);
+       if (!NT_STATUS_IS_OK(status)) {
+               saved_errno = map_errno_from_nt_status(status);
+               goto out;
+       }
+
+       smb_fname_rel = synthetic_smb_fname(talloc_tos(),
+                               final_component,
+                               smb_fname->stream_name,
+                               &smb_fname->st,
+                               smb_fname->flags);
+
+       flags |= O_NOFOLLOW;
+
+       {
+               struct smb_filename *tmp_name = fsp->fsp_name;
+               fsp->fsp_name = smb_fname_rel;
+               fd = SMB_VFS_OPEN(conn, smb_fname_rel, fsp, flags, mode);
+               fsp->fsp_name = tmp_name;
+       }
+
+       if (fd == -1) {
+               saved_errno = link_errno_convert(errno);
+               if (saved_errno == ELOOP) {
+                       if (fsp->posix_flags & FSP_POSIX_FLAGS_OPEN) {
+                               /* Never follow symlinks on posix open. */
+                               goto out;
+                       }
+                       if (!lp_follow_symlinks(SNUM(conn))) {
+                               /* Explicitly no symlinks. */
+                               goto out;
+                       }
+                       /*
+                        * We have a symlink. Follow in userspace
+                        * to ensure it's under the share definition.
+                        */
+                       fd = process_symlink_open(conn,
+                                       conn_rootdir,
+                                       fsp,
+                                       smb_fname_rel,
+                                       flags,
+                                       mode,
+                                       link_depth);
+                       if (fd == -1) {
+                               saved_errno =
+                                       link_errno_convert(errno);
+                       }
+               }
+       }
+
+  out:
+
+       TALLOC_FREE(parent_dir);
+       TALLOC_FREE(smb_fname_rel);
+
+       if (oldwd != NULL) {
+               int ret = vfs_ChDir(conn, oldwd);
+               if (ret == -1) {
+                       smb_panic("unable to get back to old directory\n");
+               }
+               TALLOC_FREE(oldwd);
+       }
+       if (saved_errno != 0) {
+               errno = saved_errno;
+       }
+       return fd;
+}
+
 /****************************************************************************
  fd support routines - attempt to do a dos_open.
 ****************************************************************************/
@@ -354,38 +638,39 @@ NTSTATUS fd_open(struct connection_struct *conn,
        struct smb_filename *smb_fname = fsp->fsp_name;
        NTSTATUS status = NT_STATUS_OK;
 
-#ifdef O_NOFOLLOW
-       /* 
+       /*
         * Never follow symlinks on a POSIX client. The
         * client should be doing this.
         */
 
-       if (fsp->posix_open || !lp_follow_symlinks(SNUM(conn))) {
+       if ((fsp->posix_flags & FSP_POSIX_FLAGS_OPEN) || !lp_follow_symlinks(SNUM(conn))) {
                flags |= O_NOFOLLOW;
        }
-#endif
 
-       fsp->fh->fd = SMB_VFS_OPEN(conn, smb_fname, fsp, flags, mode);
-       if (fsp->fh->fd == -1) {
-               int posix_errno = errno;
-#ifdef O_NOFOLLOW
-#if defined(ENOTSUP) && defined(OSF1)
-               /* handle special Tru64 errno */
-               if (errno == ENOTSUP) {
-                       posix_errno = ELOOP;
-               }
-#endif /* ENOTSUP */
-#ifdef EFTYPE
-               /* fix broken NetBSD errno */
-               if (errno == EFTYPE) {
-                       posix_errno = ELOOP;
-               }
-#endif /* EFTYPE */
-               /* fix broken FreeBSD errno */
-               if (errno == EMLINK) {
-                       posix_errno = ELOOP;
+       /* Ensure path is below share definition. */
+       if (!lp_widelinks(SNUM(conn))) {
+               const char *conn_rootdir = SMB_VFS_CONNECTPATH(conn,
+                                               smb_fname->base_name);
+               if (conn_rootdir == NULL) {
+                       return NT_STATUS_NO_MEMORY;
                }
-#endif /* O_NOFOLLOW */
+               /*
+                * Only follow symlinks within a share
+                * definition.
+                */
+               fsp->fh->fd = non_widelink_open(conn,
+                                       conn_rootdir,
+                                       fsp,
+                                       smb_fname,
+                                       flags,
+                                       mode,
+                                       0);
+       } else {
+               fsp->fh->fd = SMB_VFS_OPEN(conn, smb_fname, fsp, flags, mode);
+       }
+
+       if (fsp->fh->fd == -1) {
+               int posix_errno = link_errno_convert(errno);
                status = map_nt_error_from_unix(posix_errno);
                if (errno == EMFILE) {
                        static time_t last_warned = 0L;
@@ -446,8 +731,11 @@ void change_file_owner_to_parent(connection_struct *conn,
        struct smb_filename *smb_fname_parent;
        int ret;
 
-       smb_fname_parent = synthetic_smb_fname(talloc_tos(), inherit_from_dir,
-                                              NULL, NULL);
+       smb_fname_parent = synthetic_smb_fname(talloc_tos(),
+                                       inherit_from_dir,
+                                       NULL,
+                                       NULL,
+                                       0);
        if (smb_fname_parent == NULL) {
                return;
        }
@@ -504,8 +792,11 @@ NTSTATUS change_dir_owner_to_parent(connection_struct *conn,
        NTSTATUS status = NT_STATUS_OK;
        int ret;
 
-       smb_fname_parent = synthetic_smb_fname(ctx, inherit_from_dir,
-                                              NULL, NULL);
+       smb_fname_parent = synthetic_smb_fname(ctx,
+                                       inherit_from_dir,
+                                       NULL,
+                                       NULL,
+                                       0);
        if (smb_fname_parent == NULL) {
                return NT_STATUS_NO_MEMORY;
        }
@@ -545,7 +836,7 @@ NTSTATUS change_dir_owner_to_parent(connection_struct *conn,
                goto chdir;
        }
 
-       smb_fname_cwd = synthetic_smb_fname(ctx, ".", NULL, NULL);
+       smb_fname_cwd = synthetic_smb_fname(ctx, ".", NULL, NULL, 0);
        if (smb_fname_cwd == NULL) {
                status = NT_STATUS_NO_MEMORY;
                goto chdir;
@@ -581,8 +872,10 @@ NTSTATUS change_dir_owner_to_parent(connection_struct *conn,
        }
 
        become_root();
-       ret = SMB_VFS_LCHOWN(conn, ".", smb_fname_parent->st.st_ex_uid,
-                           (gid_t)-1);
+       ret = SMB_VFS_LCHOWN(conn,
+                       smb_fname_cwd,
+                       smb_fname_parent->st.st_ex_uid,
+                       (gid_t)-1);
        unbecome_root();
        if (ret == -1) {
                status = map_nt_error_from_unix(errno);
@@ -619,7 +912,9 @@ static NTSTATUS fd_open_atomic(struct connection_struct *conn,
                        bool *file_created)
 {
        NTSTATUS status = NT_STATUS_UNSUCCESSFUL;
+       NTSTATUS retry_status;
        bool file_existed = VALID_STAT(fsp->fsp_name->st);
+       int curr_flags;
 
        *file_created = false;
 
@@ -651,59 +946,65 @@ static NTSTATUS fd_open_atomic(struct connection_struct *conn,
         * we can never call O_CREAT without O_EXCL. So if
         * we think the file existed, try without O_CREAT|O_EXCL.
         * If we think the file didn't exist, try with
-        * O_CREAT|O_EXCL. Keep bouncing between these two
-        * requests until either the file is created, or
-        * opened. Either way, we keep going until we get
-        * a returnable result (error, or open/create).
+        * O_CREAT|O_EXCL.
+        *
+        * The big problem here is dangling symlinks. Opening
+        * without O_NOFOLLOW means both bad symlink
+        * and missing path return -1, ENOENT from open(). As POSIX
+        * is pathname based it's not possible to tell
+        * the difference between these two cases in a
+        * non-racy way, so change to try only two attempts before
+        * giving up.
+        *
+        * We don't have this problem for the O_NOFOLLOW
+        * case as it just returns NT_STATUS_OBJECT_PATH_NOT_FOUND
+        * mapped from the ELOOP POSIX error.
         */
 
-       while(1) {
-               int curr_flags = flags;
+       curr_flags = flags;
 
-               if (file_existed) {
-                       /* Just try open, do not create. */
-                       curr_flags &= ~(O_CREAT);
-                       status = fd_open(conn, fsp, curr_flags, mode);
-                       if (NT_STATUS_EQUAL(status,
-                                       NT_STATUS_OBJECT_NAME_NOT_FOUND)) {
-                               /*
-                                * Someone deleted it in the meantime.
-                                * Retry with O_EXCL.
-                                */
-                               file_existed = false;
-                               DEBUG(10,("fd_open_atomic: file %s existed. "
-                                       "Retry.\n",
-                                       smb_fname_str_dbg(fsp->fsp_name)));
-                                       continue;
-                       }
-               } else {
-                       /* Try create exclusively, fail if it exists. */
-                       curr_flags |= O_EXCL;
-                       status = fd_open(conn, fsp, curr_flags, mode);
-                       if (NT_STATUS_EQUAL(status,
-                                       NT_STATUS_OBJECT_NAME_COLLISION)) {
-                               /*
-                                * Someone created it in the meantime.
-                                * Retry without O_CREAT.
-                                */
-                               file_existed = true;
-                               DEBUG(10,("fd_open_atomic: file %s "
-                                       "did not exist. Retry.\n",
-                                       smb_fname_str_dbg(fsp->fsp_name)));
-                               continue;
-                       }
-                       if (NT_STATUS_IS_OK(status)) {
-                               /*
-                                * Here we've opened with O_CREAT|O_EXCL
-                                * and got success. We *know* we created
-                                * this file.
-                                */
-                               *file_created = true;
-                       }
+       if (file_existed) {
+               curr_flags &= ~(O_CREAT);
+               retry_status = NT_STATUS_OBJECT_NAME_NOT_FOUND;
+       } else {
+               curr_flags |= O_EXCL;
+               retry_status = NT_STATUS_OBJECT_NAME_COLLISION;
+       }
+
+       status = fd_open(conn, fsp, curr_flags, mode);
+       if (NT_STATUS_IS_OK(status)) {
+               if (!file_existed) {
+                       *file_created = true;
                }
-               /* Create is done, or failed. */
-               break;
+               return NT_STATUS_OK;
+       }
+       if (!NT_STATUS_EQUAL(status, retry_status)) {
+               return status;
+       }
+
+       curr_flags = flags;
+
+       /*
+        * Keep file_existed up to date for clarity.
+        */
+       if (NT_STATUS_EQUAL(status, NT_STATUS_OBJECT_NAME_NOT_FOUND)) {
+               file_existed = false;
+               curr_flags |= O_EXCL;
+               DBG_DEBUG("file %s did not exist. Retry.\n",
+                       smb_fname_str_dbg(fsp->fsp_name));
+       } else {
+               file_existed = true;
+               curr_flags &= ~(O_CREAT);
+               DBG_DEBUG("file %s existed. Retry.\n",
+                       smb_fname_str_dbg(fsp->fsp_name));
+       }
+
+       status = fd_open(conn, fsp, curr_flags, mode);
+
+       if (NT_STATUS_IS_OK(status) && (!file_existed)) {
+               *file_created = true;
        }
+
        return status;
 }
 
@@ -717,8 +1018,8 @@ static NTSTATUS open_file(files_struct *fsp,
                          const char *parent_dir,
                          int flags,
                          mode_t unx_mode,
-                         uint32 access_mask, /* client requested access mask. */
-                         uint32 open_access_mask, /* what we're actually using in the open. */
+                         uint32_t access_mask, /* client requested access mask. */
+                         uint32_t open_access_mask, /* what we're actually using in the open. */
                          bool *p_file_created)
 {
        struct smb_filename *smb_fname = fsp->fsp_name;
@@ -807,6 +1108,7 @@ static NTSTATUS open_file(files_struct *fsp,
                        wild = smb_fname->base_name;
                }
                if ((local_flags & O_CREAT) && !file_existed &&
+                   !(fsp->posix_flags & FSP_POSIX_FLAGS_PATHNAMES) &&
                    ms_has_wild(wild))  {
                        return NT_STATUS_OBJECT_NAME_INVALID;
                }
@@ -819,29 +1121,59 @@ static NTSTATUS open_file(files_struct *fsp,
                                                smb_fname,
                                                false,
                                                access_mask);
-                       } else if (local_flags & O_CREAT){
-                               status = check_parent_access(conn,
-                                               smb_fname,
-                                               SEC_DIR_ADD_FILE);
-                       } else {
-                               /* File didn't exist and no O_CREAT. */
-                               return NT_STATUS_OBJECT_NAME_NOT_FOUND;
+
+                               if (!NT_STATUS_IS_OK(status)) {
+                                       DEBUG(10, ("open_file: "
+                                                  "smbd_check_access_rights "
+                                                  "on file %s returned %s\n",
+                                                  smb_fname_str_dbg(smb_fname),
+                                                  nt_errstr(status)));
+                               }
+
+                               if (!NT_STATUS_IS_OK(status) &&
+                                   !NT_STATUS_EQUAL(status,
+                                       NT_STATUS_OBJECT_NAME_NOT_FOUND))
+                               {
+                                       return status;
+                               }
+
+                               if (NT_STATUS_EQUAL(status,
+                                       NT_STATUS_OBJECT_NAME_NOT_FOUND))
+                               {
+                                       DEBUG(10, ("open_file: "
+                                               "file %s vanished since we "
+                                               "checked for existence.\n",
+                                               smb_fname_str_dbg(smb_fname)));
+                                       file_existed = false;
+                                       SET_STAT_INVALID(fsp->fsp_name->st);
+                               }
                        }
-                       if (!NT_STATUS_IS_OK(status)) {
-                               DEBUG(10,("open_file: "
-                                       "%s on file "
-                                       "%s returned %s\n",
-                                       file_existed ?
-                                               "smbd_check_access_rights" :
-                                               "check_parent_access",
-                                       smb_fname_str_dbg(smb_fname),
-                                       nt_errstr(status) ));
-                               return status;
+
+                       if (!file_existed) {
+                               if (!(local_flags & O_CREAT)) {
+                                       /* File didn't exist and no O_CREAT. */
+                                       return NT_STATUS_OBJECT_NAME_NOT_FOUND;
+                               }
+
+                               status = check_parent_access(conn,
+                                                            smb_fname,
+                                                            SEC_DIR_ADD_FILE);
+                               if (!NT_STATUS_IS_OK(status)) {
+                                       DEBUG(10, ("open_file: "
+                                                  "check_parent_access on "
+                                                  "file %s returned %s\n",
+                                                  smb_fname_str_dbg(smb_fname),
+                                                  nt_errstr(status) ));
+                                       return status;
+                               }
                        }
                }
 
-               /* Actually do the open */
-               status = fd_open_atomic(conn, fsp, local_flags,
+               /*
+                * Actually do the open - if O_TRUNC is needed handle it
+                * below under the share mode lock.
+                */
+               status = fd_open_atomic(conn, fsp, local_flags & ~O_TRUNC,
                                unx_mode, p_file_created);
                if (!NT_STATUS_IS_OK(status)) {
                        DEBUG(3,("Error opening file %s (%s) (local_flags=%d) "
@@ -850,6 +1182,25 @@ static NTSTATUS open_file(files_struct *fsp,
                        return status;
                }
 
+               if (local_flags & O_NONBLOCK) {
+                       /*
+                        * GPFS can return ETIMEDOUT for pread on
+                        * nonblocking file descriptors when files
+                        * migrated to tape need to be recalled. I
+                        * could imagine this happens elsehwere
+                        * too. With blocking file descriptors this
+                        * does not happen.
+                        */
+                       ret = set_blocking(fsp->fh->fd, true);
+                       if (ret == -1) {
+                               status = map_nt_error_from_unix(errno);
+                               DBG_WARNING("Could not set fd to blocking: "
+                                           "%s\n", strerror(errno));
+                               fd_close(fsp);
+                               return status;
+                       }
+               }
+
                ret = SMB_VFS_FSTAT(fsp, &smb_fname->st);
                if (ret == -1) {
                        /* If we have an fd, this stat should succeed. */
@@ -873,13 +1224,13 @@ static NTSTATUS open_file(files_struct *fsp,
                        /* Inherit the ACL if required */
                        if (lp_inherit_permissions(SNUM(conn))) {
                                inherit_access_posix_acl(conn, parent_dir,
-                                                        smb_fname->base_name,
+                                                        smb_fname,
                                                         unx_mode);
                                need_re_stat = true;
                        }
 
                        /* Change the owner if required. */
-                       if (lp_inherit_owner(SNUM(conn))) {
+                       if (lp_inherit_owner(SNUM(conn)) != INHERIT_OWNER_NO) {
                                change_file_owner_to_parent(conn, parent_dir,
                                                            fsp);
                                need_re_stat = true;
@@ -913,7 +1264,7 @@ static NTSTATUS open_file(files_struct *fsp,
                                access_mask);
 
                if (NT_STATUS_EQUAL(status, NT_STATUS_OBJECT_NAME_NOT_FOUND) &&
-                               fsp->posix_open &&
+                               (fsp->posix_flags & FSP_POSIX_FLAGS_OPEN) &&
                                S_ISLNK(smb_fname->st.st_ex_mode)) {
                        /* This is a POSIX stat open for delete
                         * or rename on a symlink that points
@@ -982,8 +1333,8 @@ static NTSTATUS open_file(files_struct *fsp,
 ****************************************************************************/
 
 static bool share_conflict(struct share_mode_entry *entry,
-                          uint32 access_mask,
-                          uint32 share_access)
+                          uint32_t access_mask,
+                          uint32_t share_access)
 {
        DEBUG(10,("share_conflict: entry->access_mask = 0x%x, "
                  "entry->share_access = 0x%x, "
@@ -1080,6 +1431,11 @@ static void validate_my_share_entries(struct smbd_server_connection *sconn,
                return;
        }
 
+       if (share_entry->op_mid == 0) {
+               /* INTERNAL_OPEN_ONLY */
+               return;
+       }
+
        if (!is_valid_share_mode_entry(share_entry)) {
                return;
        }
@@ -1093,7 +1449,7 @@ static void validate_my_share_entries(struct smbd_server_connection *sconn,
                          "share entry with an open file\n");
        }
 
-       if (((uint16)fsp->oplock_type) != share_entry->op_type) {
+       if (((uint16_t)fsp->oplock_type) != share_entry->op_type) {
                goto panic;
        }
 
@@ -1115,7 +1471,7 @@ static void validate_my_share_entries(struct smbd_server_connection *sconn,
 }
 #endif
 
-bool is_stat_open(uint32 access_mask)
+bool is_stat_open(uint32_t access_mask)
 {
        const uint32_t stat_open_bits =
                (SYNCHRONIZE_ACCESS|
@@ -1148,16 +1504,16 @@ static bool has_delete_on_close(struct share_mode_lock *lck,
 
 /****************************************************************************
  Deal with share modes
- Invarient: Share mode must be locked on entry and exit.
+ Invariant: Share mode must be locked on entry and exit.
  Returns -1 on error, or number of share modes on success (may be zero).
 ****************************************************************************/
 
 static NTSTATUS open_mode_check(connection_struct *conn,
                                struct share_mode_lock *lck,
-                               uint32 access_mask,
-                               uint32 share_access)
+                               uint32_t access_mask,
+                               uint32_t share_access)
 {
-       int i;
+       uint32_t i;
 
        if(lck->data->num_share_modes == 0) {
                return NT_STATUS_OK;
@@ -1208,25 +1564,30 @@ static NTSTATUS open_mode_check(connection_struct *conn,
  * our client.
  */
 
-static NTSTATUS send_break_message(struct messaging_context *msg_ctx,
+NTSTATUS send_break_message(struct messaging_context *msg_ctx,
                                   const struct share_mode_entry *exclusive,
                                   uint16_t break_to)
 {
        NTSTATUS status;
        char msg[MSG_SMB_SHARE_MODE_ENTRY_SIZE];
+       struct server_id_buf tmp;
 
        DEBUG(10, ("Sending break request to PID %s\n",
-                  procid_str_static(&exclusive->pid)));
+                  server_id_str_buf(exclusive->pid, &tmp)));
 
        /* Create the message. */
        share_mode_entry_to_message(msg, exclusive);
 
        /* Overload entry->op_type */
+       /*
+        * This is a cut from uint32_t to uint16_t, but so far only the lower 3
+        * bits (LEASE_WRITE/HANDLE/READ are used anyway.
+        */
        SSVAL(msg,OP_BREAK_MSG_OP_TYPE_OFFSET, break_to);
 
        status = messaging_send_buf(msg_ctx, exclusive->pid,
                                    MSG_SMB_BREAK_REQUEST,
-                                   (uint8 *)msg, sizeof(msg));
+                                   (uint8_t *)msg, sizeof(msg));
        if (!NT_STATUS_IS_OK(status)) {
                DEBUG(3, ("Could not send oplock break message: %s\n",
                          nt_errstr(status)));
@@ -1256,6 +1617,11 @@ static bool validate_oplock_types(struct share_mode_lock *lck)
                        continue;
                }
 
+               if (e->op_mid == 0) {
+                       /* INTERNAL_OPEN_ONLY */
+                       continue;
+               }
+
                if (e->op_type == NO_OPLOCK && is_stat_open(e->access_mask)) {
                        /* We ignore stat opens in the table - they
                           always have NO_OPLOCK and never get or
@@ -1336,201 +1702,514 @@ static bool validate_oplock_types(struct share_mode_lock *lck)
 
 static bool delay_for_oplock(files_struct *fsp,
                             int oplock_request,
+                            const struct smb2_lease *lease,
                             struct share_mode_lock *lck,
                             bool have_sharing_violation,
-                            uint32_t create_disposition)
+                            uint32_t create_disposition,
+                            bool first_open_attempt)
 {
        struct share_mode_data *d = lck->data;
-       struct share_mode_entry *entry;
-       uint32_t num_non_stat_opens = 0;
        uint32_t i;
-       uint16_t break_to;
+       bool delay = false;
+       bool will_overwrite;
 
-       if ((oplock_request & INTERNAL_OPEN_ONLY) || is_stat_open(fsp->access_mask)) {
+       if ((oplock_request & INTERNAL_OPEN_ONLY) ||
+           is_stat_open(fsp->access_mask)) {
                return false;
        }
+
+       switch (create_disposition) {
+       case FILE_SUPERSEDE:
+       case FILE_OVERWRITE:
+       case FILE_OVERWRITE_IF:
+               will_overwrite = true;
+               break;
+       default:
+               will_overwrite = false;
+               break;
+       }
+
        for (i=0; i<d->num_share_modes; i++) {
                struct share_mode_entry *e = &d->share_modes[i];
-               if (e->op_type == NO_OPLOCK && is_stat_open(e->access_mask)) {
+               struct share_mode_lease *l = NULL;
+               uint32_t e_lease_type = get_lease_type(d, e);
+               uint32_t break_to;
+               uint32_t delay_mask = 0;
+
+               if (e->op_type == LEASE_OPLOCK) {
+                       l = &d->leases[e->lease_idx];
+               }
+
+               if (have_sharing_violation) {
+                       delay_mask = SMB2_LEASE_HANDLE;
+               } else {
+                       delay_mask = SMB2_LEASE_WRITE;
+               }
+
+               break_to = e_lease_type & ~delay_mask;
+
+               if (will_overwrite) {
+                       /*
+                        * we'll decide about SMB2_LEASE_READ later.
+                        *
+                        * Maybe the break will be deferred
+                        */
+                       break_to &= ~SMB2_LEASE_HANDLE;
+               }
+
+               DEBUG(10, ("entry %u: e_lease_type %u, will_overwrite: %u\n",
+                          (unsigned)i, (unsigned)e_lease_type,
+                          (unsigned)will_overwrite));
+
+               if (lease != NULL && l != NULL) {
+                       bool ign;
+
+                       ign = smb2_lease_equal(fsp_client_guid(fsp),
+                                              &lease->lease_key,
+                                              &l->client_guid,
+                                              &l->lease_key);
+                       if (ign) {
+                               continue;
+                       }
+               }
+
+               if ((e_lease_type & ~break_to) == 0) {
+                       if (l != NULL && l->breaking) {
+                               delay = true;
+                       }
                        continue;
                }
-               num_non_stat_opens += 1;
 
-               /*
-                * We found the a non-stat open, which in the exclusive/batch
-                * case will be inspected further down.
-                */
-               entry = e;
+               if (share_mode_stale_pid(d, i)) {
+                       continue;
+               }
+
+               if (will_overwrite) {
+                       /*
+                        * If we break anyway break to NONE directly.
+                        * Otherwise vfs_set_filelen() will trigger the
+                        * break.
+                        */
+                       break_to &= ~(SMB2_LEASE_READ|SMB2_LEASE_WRITE);
+               }
+
+               if (e->op_type != LEASE_OPLOCK) {
+                       /*
+                        * Oplocks only support breaking to R or NONE.
+                        */
+                       break_to &= ~(SMB2_LEASE_HANDLE|SMB2_LEASE_WRITE);
+               }
+
+               DEBUG(10, ("breaking from %d to %d\n",
+                          (int)e_lease_type, (int)break_to));
+               send_break_message(fsp->conn->sconn->msg_ctx, e,
+                                  break_to);
+               if (e_lease_type & delay_mask) {
+                       delay = true;
+               }
+               if (l != NULL && l->breaking && !first_open_attempt) {
+                       delay = true;
+               }
+               continue;
        }
-       if (num_non_stat_opens == 0) {
-               /*
-                * Nothing to wait for around
-                */
-               return false;
+
+       return delay;
+}
+
+/**
+ * Return lease or oplock state from a share mode
+ **/
+static uint32_t get_lease_type_from_share_mode(const struct share_mode_data *d)
+{
+       uint32_t e_lease_type = 0;
+       uint32_t i;
+
+       for (i=0; i < d->num_share_modes; i++) {
+               struct share_mode_entry *e = &d->share_modes[i];
+
+               e_lease_type |= get_lease_type(d, e);
        }
-       if (num_non_stat_opens != 1) {
-               /*
-                * More than one open around. There can't be any exclusive or
-                * batch left, this is all level2.
-                */
+
+       return e_lease_type;
+}
+
+static bool file_has_brlocks(files_struct *fsp)
+{
+       struct byte_range_lock *br_lck;
+
+       br_lck = brl_get_locks_readonly(fsp);
+       if (!br_lck)
                return false;
+
+       return (brl_num_locks(br_lck) > 0);
+}
+
+int find_share_mode_lease(struct share_mode_data *d,
+                         const struct GUID *client_guid,
+                         const struct smb2_lease_key *key)
+{
+       uint32_t i;
+
+       for (i=0; i<d->num_leases; i++) {
+               struct share_mode_lease *l = &d->leases[i];
+
+               if (smb2_lease_equal(client_guid,
+                                    key,
+                                    &l->client_guid,
+                                    &l->lease_key)) {
+                       return i;
+               }
        }
 
-       if (server_id_is_disconnected(&entry->pid)) {
-               /*
-                * TODO: clean up.
-                * This could be achieved by sending a break message
-                * to ourselves. Special considerations for files
-                * with delete_on_close flag set!
-                *
-                * For now we keep it simple and do not
-                * allow delete on close for durable handles.
-                */
-               return false;
+       return -1;
+}
+
+struct fsp_lease *find_fsp_lease(struct files_struct *new_fsp,
+                                const struct smb2_lease_key *key,
+                                const struct share_mode_lease *l)
+{
+       struct files_struct *fsp;
+
+       /*
+        * TODO: Measure how expensive this loop is with thousands of open
+        * handles...
+        */
+
+       for (fsp = file_find_di_first(new_fsp->conn->sconn, new_fsp->file_id);
+            fsp != NULL;
+            fsp = file_find_di_next(fsp)) {
+
+               if (fsp == new_fsp) {
+                       continue;
+               }
+               if (fsp->oplock_type != LEASE_OPLOCK) {
+                       continue;
+               }
+               if (smb2_lease_key_equal(&fsp->lease->lease.lease_key, key)) {
+                       fsp->lease->ref_count += 1;
+                       return fsp->lease;
+               }
        }
 
-       switch (create_disposition) {
-       case FILE_SUPERSEDE:
-       case FILE_OVERWRITE_IF:
-               break_to = NO_OPLOCK;
-               break;
-       default:
-               break_to = LEVEL_II_OPLOCK;
-               break;
+       /* Not found - must be leased in another smbd. */
+       new_fsp->lease = talloc_zero(new_fsp->conn->sconn, struct fsp_lease);
+       if (new_fsp->lease == NULL) {
+               return NULL;
        }
+       new_fsp->lease->ref_count = 1;
+       new_fsp->lease->sconn = new_fsp->conn->sconn;
+       new_fsp->lease->lease.lease_key = *key;
+       new_fsp->lease->lease.lease_state = l->current_state;
+       /*
+        * We internally treat all leases as V2 and update
+        * the epoch, but when sending breaks it matters if
+        * the requesting lease was v1 or v2.
+        */
+       new_fsp->lease->lease.lease_version = l->lease_version;
+       new_fsp->lease->lease.lease_epoch = l->epoch;
+       return new_fsp->lease;
+}
+
+static NTSTATUS grant_fsp_lease(struct files_struct *fsp,
+                               struct share_mode_lock *lck,
+                               const struct smb2_lease *lease,
+                               uint32_t *p_lease_idx,
+                               uint32_t granted)
+{
+       struct share_mode_data *d = lck->data;
+       const struct GUID *client_guid = fsp_client_guid(fsp);
+       struct share_mode_lease *tmp;
+       NTSTATUS status;
+       int idx;
+
+       idx = find_share_mode_lease(d, client_guid, &lease->lease_key);
 
-       if (have_sharing_violation && (entry->op_type & BATCH_OPLOCK)) {
-               if (share_mode_stale_pid(d, 0)) {
-                       return false;
+       if (idx != -1) {
+               struct share_mode_lease *l = &d->leases[idx];
+               bool do_upgrade;
+               uint32_t existing, requested;
+
+               fsp->lease = find_fsp_lease(fsp, &lease->lease_key, l);
+               if (fsp->lease == NULL) {
+                       DEBUG(1, ("Did not find existing lease for file %s\n",
+                                 fsp_str_dbg(fsp)));
+                       return NT_STATUS_NO_MEMORY;
                }
-               send_break_message(fsp->conn->sconn->msg_ctx, entry, break_to);
-               return true;
-       }
-       if (have_sharing_violation) {
+
+               *p_lease_idx = idx;
+
                /*
-                * Non-batch exclusive is not broken if we have a sharing
-                * violation
+                * Upgrade only if the requested lease is a strict upgrade.
                 */
-               return false;
-       }
-       if (LEVEL_II_OPLOCK_TYPE(entry->op_type) &&
-           (break_to == NO_OPLOCK)) {
-               if (share_mode_stale_pid(d, 0)) {
-                       return false;
+               existing = l->current_state;
+               requested = lease->lease_state;
+
+               /*
+                * Tricky: This test makes sure that "requested" is a
+                * strict bitwise superset of "existing".
+                */
+               do_upgrade = ((existing & requested) == existing);
+
+               /*
+                * Upgrade only if there's a change.
+                */
+               do_upgrade &= (granted != existing);
+
+               /*
+                * Upgrade only if other leases don't prevent what was asked
+                * for.
+                */
+               do_upgrade &= (granted == requested);
+
+               /*
+                * only upgrade if we are not in breaking state
+                */
+               do_upgrade &= !l->breaking;
+
+               DEBUG(10, ("existing=%"PRIu32", requested=%"PRIu32", "
+                          "granted=%"PRIu32", do_upgrade=%d\n",
+                          existing, requested, granted, (int)do_upgrade));
+
+               if (do_upgrade) {
+                       l->current_state = granted;
+                       l->epoch += 1;
                }
-               DEBUG(10, ("Asynchronously breaking level2 oplock for "
-                          "create_disposition=%u\n",
-                          (unsigned)create_disposition));
-               send_break_message(fsp->conn->sconn->msg_ctx, entry, break_to);
-               return false;
+
+               /* Ensure we're in sync with current lease state. */
+               fsp_lease_update(lck, fsp_client_guid(fsp), fsp->lease);
+               return NT_STATUS_OK;
        }
-       if (!EXCLUSIVE_OPLOCK_TYPE(entry->op_type)) {
+
+       /*
+        * Create new lease
+        */
+
+       tmp = talloc_realloc(d, d->leases, struct share_mode_lease,
+                            d->num_leases+1);
+       if (tmp == NULL) {
                /*
-                * No break for NO_OPLOCK or LEVEL2_OPLOCK oplocks
+                * See [MS-SMB2]
                 */
-               return false;
-       }
-       if (share_mode_stale_pid(d, 0)) {
-               return false;
+               return NT_STATUS_INSUFFICIENT_RESOURCES;
+       }
+       d->leases = tmp;
+
+       fsp->lease = talloc_zero(fsp->conn->sconn, struct fsp_lease);
+       if (fsp->lease == NULL) {
+               return NT_STATUS_INSUFFICIENT_RESOURCES;
+       }
+       fsp->lease->ref_count = 1;
+       fsp->lease->sconn = fsp->conn->sconn;
+       fsp->lease->lease.lease_version = lease->lease_version;
+       fsp->lease->lease.lease_key = lease->lease_key;
+       fsp->lease->lease.lease_state = granted;
+       fsp->lease->lease.lease_epoch = lease->lease_epoch + 1;
+
+       *p_lease_idx = d->num_leases;
+
+       d->leases[d->num_leases] = (struct share_mode_lease) {
+               .client_guid = *client_guid,
+               .lease_key = fsp->lease->lease.lease_key,
+               .current_state = fsp->lease->lease.lease_state,
+               .lease_version = fsp->lease->lease.lease_version,
+               .epoch = fsp->lease->lease.lease_epoch,
+       };
+
+       status = leases_db_add(client_guid,
+                              &lease->lease_key,
+                              &fsp->file_id,
+                              fsp->conn->connectpath,
+                              fsp->fsp_name->base_name,
+                              fsp->fsp_name->stream_name);
+       if (!NT_STATUS_IS_OK(status)) {
+               DEBUG(10, ("%s: leases_db_add failed: %s\n", __func__,
+                          nt_errstr(status)));
+               TALLOC_FREE(fsp->lease);
+               return NT_STATUS_INSUFFICIENT_RESOURCES;
        }
 
-       send_break_message(fsp->conn->sconn->msg_ctx, entry, break_to);
-       return true;
+       d->num_leases += 1;
+       d->modified = true;
+
+       return NT_STATUS_OK;
 }
 
-static bool file_has_brlocks(files_struct *fsp)
+static bool is_same_lease(const files_struct *fsp,
+                         const struct share_mode_data *d,
+                         const struct share_mode_entry *e,
+                         const struct smb2_lease *lease)
 {
-       struct byte_range_lock *br_lck;
-
-       br_lck = brl_get_locks_readonly(fsp);
-       if (!br_lck)
+       if (e->op_type != LEASE_OPLOCK) {
                return false;
+       }
+       if (lease == NULL) {
+               return false;
+       }
 
-       return (brl_num_locks(br_lck) > 0);
+       return smb2_lease_equal(fsp_client_guid(fsp),
+                               &lease->lease_key,
+                               &d->leases[e->lease_idx].client_guid,
+                               &d->leases[e->lease_idx].lease_key);
 }
 
-static void grant_fsp_oplock_type(files_struct *fsp,
-                                 struct share_mode_lock *lck,
-                                 int oplock_request)
+static NTSTATUS grant_fsp_oplock_type(struct smb_request *req,
+                                     struct files_struct *fsp,
+                                     struct share_mode_lock *lck,
+                                     int oplock_request,
+                                     struct smb2_lease *lease)
 {
-       bool allow_level2 = (global_client_caps & CAP_LEVEL_II_OPLOCKS) &&
-                           lp_level2_oplocks(SNUM(fsp->conn));
-       bool got_level2_oplock, got_a_none_oplock;
+       struct share_mode_data *d = lck->data;
+       bool got_handle_lease = false;
+       bool got_oplock = false;
        uint32_t i;
-
-       /* Start by granting what the client asked for,
-          but ensure no SAMBA_PRIVATE bits can be set. */
-       fsp->oplock_type = (oplock_request & ~SAMBA_PRIVATE_OPLOCK_MASK);
+       uint32_t granted;
+       uint32_t lease_idx = UINT32_MAX;
+       bool ok;
+       NTSTATUS status;
 
        if (oplock_request & INTERNAL_OPEN_ONLY) {
                /* No oplocks on internal open. */
-               fsp->oplock_type = NO_OPLOCK;
+               oplock_request = NO_OPLOCK;
                DEBUG(10,("grant_fsp_oplock_type: oplock type 0x%x on file %s\n",
                        fsp->oplock_type, fsp_str_dbg(fsp)));
-               return;
+       }
+
+       if (oplock_request == LEASE_OPLOCK) {
+               if (lease == NULL) {
+                       /*
+                        * The SMB2 layer should have checked this
+                        */
+                       return NT_STATUS_INTERNAL_ERROR;
+               }
+
+               granted = lease->lease_state;
+
+               if (lp_kernel_oplocks(SNUM(fsp->conn))) {
+                       DEBUG(10, ("No lease granted because kernel oplocks are enabled\n"));
+                       granted = SMB2_LEASE_NONE;
+               }
+               if ((granted & (SMB2_LEASE_READ|SMB2_LEASE_WRITE)) == 0) {
+                       DEBUG(10, ("No read or write lease requested\n"));
+                       granted = SMB2_LEASE_NONE;
+               }
+               if (granted == SMB2_LEASE_WRITE) {
+                       DEBUG(10, ("pure write lease requested\n"));
+                       granted = SMB2_LEASE_NONE;
+               }
+               if (granted == (SMB2_LEASE_WRITE|SMB2_LEASE_HANDLE)) {
+                       DEBUG(10, ("write and handle lease requested\n"));
+                       granted = SMB2_LEASE_NONE;
+               }
+       } else {
+               granted = map_oplock_to_lease_type(
+                       oplock_request & ~SAMBA_PRIVATE_OPLOCK_MASK);
        }
 
        if (lp_locking(fsp->conn->params) && file_has_brlocks(fsp)) {
                DEBUG(10,("grant_fsp_oplock_type: file %s has byte range locks\n",
                        fsp_str_dbg(fsp)));
-               fsp->oplock_type = NO_OPLOCK;
+               granted &= ~SMB2_LEASE_READ;
        }
 
-       if (is_stat_open(fsp->access_mask)) {
-               /* Leave the value already set. */
-               DEBUG(10,("grant_fsp_oplock_type: oplock type 0x%x on file %s\n",
-                       fsp->oplock_type, fsp_str_dbg(fsp)));
-               return;
-       }
+       for (i=0; i<d->num_share_modes; i++) {
+               struct share_mode_entry *e = &d->share_modes[i];
+               uint32_t e_lease_type;
 
-       got_level2_oplock = false;
-       got_a_none_oplock = false;
+               e_lease_type = get_lease_type(d, e);
 
-       for (i=0; i<lck->data->num_share_modes; i++) {
-               int op_type = lck->data->share_modes[i].op_type;
+               if ((granted & SMB2_LEASE_WRITE) &&
+                   !is_same_lease(fsp, d, e, lease) &&
+                   !share_mode_stale_pid(d, i)) {
+                       /*
+                        * Can grant only one writer
+                        */
+                       granted &= ~SMB2_LEASE_WRITE;
+               }
 
-               if (LEVEL_II_OPLOCK_TYPE(op_type)) {
-                       got_level2_oplock = true;
+               if ((e_lease_type & SMB2_LEASE_HANDLE) && !got_handle_lease &&
+                   !share_mode_stale_pid(d, i)) {
+                       got_handle_lease = true;
                }
-               if (op_type == NO_OPLOCK) {
-                       got_a_none_oplock = true;
+
+               if ((e->op_type != LEASE_OPLOCK) && !got_oplock &&
+                   !share_mode_stale_pid(d, i)) {
+                       got_oplock = true;
                }
        }
 
-       /*
-        * Match what was requested (fsp->oplock_type) with
-        * what was found in the existing share modes.
-        */
+       if ((granted & SMB2_LEASE_READ) && !(granted & SMB2_LEASE_WRITE)) {
+               bool allow_level2 =
+                       (global_client_caps & CAP_LEVEL_II_OPLOCKS) &&
+                       lp_level2_oplocks(SNUM(fsp->conn));
 
-       if (got_level2_oplock || got_a_none_oplock) {
-               if (EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type)) {
-                       fsp->oplock_type = LEVEL_II_OPLOCK;
+               if (!allow_level2) {
+                       granted = SMB2_LEASE_NONE;
                }
        }
 
-       /*
-        * Don't grant level2 to clients that don't want them
-        * or if we've turned them off.
-        */
-       if (fsp->oplock_type == LEVEL_II_OPLOCK && !allow_level2) {
-               fsp->oplock_type = NO_OPLOCK;
-       }
+       if (oplock_request == LEASE_OPLOCK) {
+               if (got_oplock) {
+                       granted &= ~SMB2_LEASE_HANDLE;
+               }
 
-       if (fsp->oplock_type == LEVEL_II_OPLOCK && !got_level2_oplock) {
-               /*
-                * We're the first level2 oplock. Indicate that in brlock.tdb.
-                */
-               struct byte_range_lock *brl;
+               fsp->oplock_type = LEASE_OPLOCK;
+
+               status = grant_fsp_lease(fsp, lck, lease, &lease_idx,
+                                        granted);
+               if (!NT_STATUS_IS_OK(status)) {
+                       return status;
 
-               brl = brl_get_locks(talloc_tos(), fsp);
-               if (brl != NULL) {
-                       brl_set_have_read_oplocks(brl, true);
-                       TALLOC_FREE(brl);
                }
+               *lease = fsp->lease->lease;
+               DEBUG(10, ("lease_state=%d\n", lease->lease_state));
+       } else {
+               if (got_handle_lease) {
+                       granted = SMB2_LEASE_NONE;
+               }
+
+               switch (granted) {
+               case SMB2_LEASE_READ|SMB2_LEASE_WRITE|SMB2_LEASE_HANDLE:
+                       fsp->oplock_type = BATCH_OPLOCK|EXCLUSIVE_OPLOCK;
+                       break;
+               case SMB2_LEASE_READ|SMB2_LEASE_WRITE:
+                       fsp->oplock_type = EXCLUSIVE_OPLOCK;
+                       break;
+               case SMB2_LEASE_READ|SMB2_LEASE_HANDLE:
+               case SMB2_LEASE_READ:
+                       fsp->oplock_type = LEVEL_II_OPLOCK;
+                       break;
+               default:
+                       fsp->oplock_type = NO_OPLOCK;
+                       break;
+               }
+
+               status = set_file_oplock(fsp);
+               if (!NT_STATUS_IS_OK(status)) {
+                       /*
+                        * Could not get the kernel oplock
+                        */
+                       fsp->oplock_type = NO_OPLOCK;
+               }
+       }
+
+       ok = set_share_mode(lck, fsp, get_current_uid(fsp->conn),
+                           req ? req->mid : 0,
+                           fsp->oplock_type,
+                           lease_idx);
+       if (!ok) {
+               return NT_STATUS_NO_MEMORY;
+       }
+
+       ok = update_num_read_oplocks(fsp, lck);
+       if (!ok) {
+               del_share_mode(lck, fsp);
+               return NT_STATUS_INTERNAL_ERROR;
        }
 
        DEBUG(10,("grant_fsp_oplock_type: oplock type 0x%x on file %s\n",
                  fsp->oplock_type, fsp_str_dbg(fsp)));
+
+       return NT_STATUS_OK;
 }
 
 static bool request_timed_out(struct timeval request_time,
@@ -1542,62 +2221,114 @@ static bool request_timed_out(struct timeval request_time,
        return (timeval_compare(&end_time, &now) < 0);
 }
 
+static struct deferred_open_record *deferred_open_record_create(
+       bool delayed_for_oplocks,
+       bool async_open,
+       struct file_id id)
+{
+       struct deferred_open_record *record = NULL;
+
+       record = talloc(NULL, struct deferred_open_record);
+       if (record == NULL) {
+               return NULL;
+       }
+
+       *record = (struct deferred_open_record) {
+               .delayed_for_oplocks = delayed_for_oplocks,
+               .async_open = async_open,
+               .id = id,
+       };
+
+       return record;
+}
+
 struct defer_open_state {
-       struct smbd_server_connection *sconn;
+       struct smbXsrv_connection *xconn;
        uint64_t mid;
+       struct file_id file_id;
+       struct timeval request_time;
+       struct timeval timeout;
+       bool kernel_oplock;
+       uint32_t lease_type;
 };
 
 static void defer_open_done(struct tevent_req *req);
 
-/****************************************************************************
- Handle the 1 second delay in returning a SHARING_VIOLATION error.
-****************************************************************************/
-
+/**
+ * Defer an open and watch a locking.tdb record
+ *
+ * This defers an open that gets rescheduled once the locking.tdb record watch
+ * is triggered by a change to the record.
+ *
+ * It is used to defer opens that triggered an oplock break and for the SMB1
+ * sharing violation delay.
+ **/
 static void defer_open(struct share_mode_lock *lck,
                       struct timeval request_time,
                       struct timeval timeout,
                       struct smb_request *req,
-                      struct deferred_open_record *state)
+                      bool delayed_for_oplocks,
+                      bool kernel_oplock,
+                      struct file_id id)
 {
-       DEBUG(10,("defer_open_sharing_error: time [%u.%06u] adding deferred "
-                 "open entry for mid %llu\n",
-                 (unsigned int)request_time.tv_sec,
-                 (unsigned int)request_time.tv_usec,
-                 (unsigned long long)req->mid));
-
-       if (!push_deferred_open_message_smb(req, request_time, timeout,
-                                      state->id, (char *)state, sizeof(*state))) {
+       struct deferred_open_record *open_rec = NULL;
+       struct timeval abs_timeout;
+       struct defer_open_state *watch_state;
+       struct tevent_req *watch_req;
+       bool ok;
+
+       abs_timeout = timeval_sum(&request_time, &timeout);
+
+       DBG_DEBUG("request time [%s] timeout [%s] mid [%" PRIu64 "] "
+                 "delayed_for_oplocks [%s] kernel_oplock [%s] file_id [%s]\n",
+                 timeval_string(talloc_tos(), &request_time, false),
+                 timeval_string(talloc_tos(), &abs_timeout, false),
+                 req->mid,
+                 delayed_for_oplocks ? "yes" : "no",
+                 kernel_oplock ? "yes" : "no",
+                 file_id_string_tos(&id));
+
+       open_rec = deferred_open_record_create(delayed_for_oplocks,
+                                              false,
+                                              id);
+       if (open_rec == NULL) {
                TALLOC_FREE(lck);
-               exit_server("push_deferred_open_message_smb failed");
+               exit_server("talloc failed");
        }
-       if (lck) {
-               struct defer_open_state *watch_state;
-               struct tevent_req *watch_req;
-               bool ret;
 
-               watch_state = talloc(req->sconn, struct defer_open_state);
-               if (watch_state == NULL) {
-                       exit_server("talloc failed");
-               }
-               watch_state->sconn = req->sconn;
-               watch_state->mid = req->mid;
+       watch_state = talloc(open_rec, struct defer_open_state);
+       if (watch_state == NULL) {
+               exit_server("talloc failed");
+       }
+       watch_state->xconn = req->xconn;
+       watch_state->mid = req->mid;
+       watch_state->file_id = lck->data->id;
+       watch_state->request_time = request_time;
+       watch_state->timeout = timeout;
+       watch_state->kernel_oplock = kernel_oplock;
+       watch_state->lease_type = get_lease_type_from_share_mode(lck->data);
+
+       DBG_DEBUG("defering mid %" PRIu64 "\n", req->mid);
 
-               DEBUG(10, ("defering mid %llu\n",
-                          (unsigned long long)req->mid));
+       watch_req = dbwrap_watched_watch_send(watch_state,
+                                             req->sconn->ev_ctx,
+                                             lck->data->record,
+                                             (struct server_id){0});
+       if (watch_req == NULL) {
+               exit_server("Could not watch share mode record");
+       }
+       tevent_req_set_callback(watch_req, defer_open_done, watch_state);
 
-               watch_req = dbwrap_record_watch_send(
-                       watch_state, req->sconn->ev_ctx, lck->data->record,
-                       req->sconn->msg_ctx);
-               if (watch_req == NULL) {
-                       exit_server("Could not watch share mode record");
-               }
-               tevent_req_set_callback(watch_req, defer_open_done,
-                                       watch_state);
+       ok = tevent_req_set_endtime(watch_req, req->sconn->ev_ctx, abs_timeout);
+       if (!ok) {
+               exit_server("tevent_req_set_endtime failed");
+       }
 
-               ret = tevent_req_set_endtime(
-                       watch_req, req->sconn->ev_ctx,
-                       timeval_sum(&request_time, &timeout));
-               SMB_ASSERT(ret);
+       ok = push_deferred_open_message_smb(req, request_time, timeout,
+                                           open_rec->id, open_rec);
+       if (!ok) {
+               TALLOC_FREE(lck);
+               exit_server("push_deferred_open_message_smb failed");
        }
 }
 
@@ -1605,40 +2336,138 @@ static void defer_open_done(struct tevent_req *req)
 {
        struct defer_open_state *state = tevent_req_callback_data(
                req, struct defer_open_state);
+       struct tevent_req *watch_req = NULL;
+       struct share_mode_lock *lck = NULL;
+       bool schedule_req = true;
+       struct timeval timeout;
        NTSTATUS status;
-       bool ret;
+       bool ok;
 
-       status = dbwrap_record_watch_recv(req, talloc_tos(), NULL);
+       status = dbwrap_watched_watch_recv(req, talloc_tos(), NULL, NULL,
+                                         NULL);
        TALLOC_FREE(req);
        if (!NT_STATUS_IS_OK(status)) {
-               DEBUG(5, ("dbwrap_record_watch_recv returned %s\n",
+               DEBUG(5, ("dbwrap_watched_watch_recv returned %s\n",
                          nt_errstr(status)));
                /*
                 * Even if it failed, retry anyway. TODO: We need a way to
                 * tell a re-scheduled open about that error.
                 */
+               if (NT_STATUS_EQUAL(status, NT_STATUS_IO_TIMEOUT) &&
+                   state->kernel_oplock)
+               {
+                       /*
+                        * If we reschedule but the kernel oplock is still hold
+                        * we would block in the second open as that will be a
+                        * blocking open attempt.
+                        */
+                       exit_server("Kernel oplock holder didn't "
+                                   "respond to break message");
+               }
+       }
+
+       if (state->kernel_oplock) {
+               lck = get_existing_share_mode_lock(talloc_tos(), state->file_id);
+               if (lck != NULL) {
+                       uint32_t lease_type;
+
+                       lease_type = get_lease_type_from_share_mode(lck->data);
+
+                       if ((lease_type != 0) &&
+                           (lease_type == state->lease_type))
+                       {
+                               DBG_DEBUG("Unchanged lease: %" PRIu32 "\n",
+                                         lease_type);
+                               schedule_req = false;
+                       }
+               }
        }
 
-       DEBUG(10, ("scheduling mid %llu\n", (unsigned long long)state->mid));
+       if (schedule_req) {
+               DBG_DEBUG("scheduling mid %" PRIu64 "\n", state->mid);
 
-       ret = schedule_deferred_open_message_smb(state->sconn, state->mid);
-       SMB_ASSERT(ret);
-       TALLOC_FREE(state);
+               ok = schedule_deferred_open_message_smb(state->xconn,
+                                                       state->mid);
+               if (!ok) {
+                       exit_server("schedule_deferred_open_message_smb failed");
+               }
+               TALLOC_FREE(lck);
+               TALLOC_FREE(state);
+               return;
+       }
+
+       DBG_DEBUG("Keep waiting for oplock release for [%s/%s%s] "
+                 "mid: %" PRIu64 "\n",
+                 lck->data->servicepath,
+                 lck->data->base_name,
+                 lck->data->stream_name ? lck->data->stream_name : "",
+                 state->mid);
+
+       watch_req = dbwrap_watched_watch_send(state,
+                                             state->xconn->ev_ctx,
+                                             lck->data->record,
+                                             (struct server_id){0});
+       if (watch_req == NULL) {
+               exit_server("Could not watch share mode record");
+       }
+       tevent_req_set_callback(watch_req, defer_open_done, state);
+
+       timeout = timeval_sum(&state->request_time, &state->timeout);
+       ok = tevent_req_set_endtime(watch_req, state->xconn->ev_ctx, timeout);
+       if (!ok) {
+               exit_server("tevent_req_set_endtime failed");
+       }
+
+       TALLOC_FREE(lck);
 }
 
+/**
+ * Reschedule an open for immediate execution
+ **/
+static void retry_open(struct timeval request_time,
+                      struct smb_request *req,
+                      struct file_id id)
+{
+       struct deferred_open_record *open_rec = NULL;
+       bool ok;
+
+       DBG_DEBUG("request time [%s] mid [%" PRIu64 "] file_id [%s]\n",
+                 timeval_string(talloc_tos(), &request_time, false),
+                 req->mid,
+                 file_id_string_tos(&id));
+
+       open_rec = deferred_open_record_create(false, false, id);
+       if (open_rec == NULL) {
+               exit_server("talloc failed");
+       }
+
+       ok = push_deferred_open_message_smb(req,
+                                           request_time,
+                                           timeval_set(0, 0),
+                                           id,
+                                           open_rec);
+       if (!ok) {
+               exit_server("push_deferred_open_message_smb failed");
+       }
+
+       ok = schedule_deferred_open_message_smb(req->xconn, req->mid);
+       if (!ok) {
+               exit_server("schedule_deferred_open_message_smb failed");
+       }
+}
 
 /****************************************************************************
  On overwrite open ensure that the attributes match.
 ****************************************************************************/
 
 static bool open_match_attributes(connection_struct *conn,
-                                 uint32 old_dos_attr,
-                                 uint32 new_dos_attr,
+                                 uint32_t old_dos_attr,
+                                 uint32_t new_dos_attr,
                                  mode_t existing_unx_mode,
                                  mode_t new_unx_mode,
                                  mode_t *returned_unx_mode)
 {
-       uint32 noarch_old_dos_attr, noarch_new_dos_attr;
+       uint32_t noarch_old_dos_attr, noarch_new_dos_attr;
 
        noarch_old_dos_attr = (old_dos_attr & ~FILE_ATTRIBUTE_ARCHIVE);
        noarch_new_dos_attr = (new_dos_attr & ~FILE_ATTRIBUTE_ARCHIVE);
@@ -1684,11 +2513,11 @@ static NTSTATUS fcb_or_dos_open(struct smb_request *req,
                                files_struct *fsp_to_dup_into,
                                const struct smb_filename *smb_fname,
                                struct file_id id,
-                               uint16 file_pid,
+                               uint16_t file_pid,
                                uint64_t vuid,
-                               uint32 access_mask,
-                               uint32 share_access,
-                               uint32 create_options)
+                               uint32_t access_mask,
+                               uint32_t share_access,
+                               uint32_t create_options)
 {
        files_struct *fsp;
 
@@ -1740,10 +2569,9 @@ static NTSTATUS fcb_or_dos_open(struct smb_request *req,
 static void schedule_defer_open(struct share_mode_lock *lck,
                                struct file_id id,
                                struct timeval request_time,
-                               struct smb_request *req)
+                               struct smb_request *req,
+                               bool kernel_oplock)
 {
-       struct deferred_open_record state;
-
        /* This is a relative time, added to the absolute
           request_time value to get the absolute timeout time.
           Note that if this is the second or greater time we enter
@@ -1762,38 +2590,54 @@ static void schedule_defer_open(struct share_mode_lock *lck,
 
        timeout = timeval_set(OPLOCK_BREAK_TIMEOUT*2, 0);
 
-       /* Nothing actually uses state.delayed_for_oplocks
-          but it's handy to differentiate in debug messages
-          between a 30 second delay due to oplock break, and
-          a 1 second delay for share mode conflicts. */
-
-       state.delayed_for_oplocks = True;
-       state.async_open = false;
-       state.id = id;
-
-       if (!request_timed_out(request_time, timeout)) {
-               defer_open(lck, request_time, timeout, req, &state);
+       if (request_timed_out(request_time, timeout)) {
+               return;
        }
+
+       defer_open(lck, request_time, timeout, req, true, kernel_oplock, id);
 }
 
 /****************************************************************************
  Reschedule an open call that went asynchronous.
 ****************************************************************************/
 
+static void schedule_async_open_timer(struct tevent_context *ev,
+                                     struct tevent_timer *te,
+                                     struct timeval current_time,
+                                     void *private_data)
+{
+       exit_server("async open timeout");
+}
+
 static void schedule_async_open(struct timeval request_time,
                                struct smb_request *req)
 {
-       struct deferred_open_record state;
-       struct timeval timeout;
+       struct deferred_open_record *open_rec = NULL;
+       struct timeval timeout = timeval_set(20, 0);
+       bool ok;
 
-       timeout = timeval_set(20, 0);
+       if (request_timed_out(request_time, timeout)) {
+               return;
+       }
+
+       open_rec = deferred_open_record_create(false, true, (struct file_id){0});
+       if (open_rec == NULL) {
+               exit_server("deferred_open_record_create failed");
+       }
 
-       ZERO_STRUCT(state);
-       state.delayed_for_oplocks = false;
-       state.async_open = true;
+       ok = push_deferred_open_message_smb(req, request_time, timeout,
+                                           (struct file_id){0}, open_rec);
+       if (!ok) {
+               exit_server("push_deferred_open_message_smb failed");
+       }
 
-       if (!request_timed_out(request_time, timeout)) {
-               defer_open(NULL, request_time, timeout, req, &state);
+       open_rec->te = tevent_add_timer(req->sconn->ev_ctx,
+                                       req,
+                                       timeval_current_ofs(20, 0),
+                                       schedule_async_open_timer,
+                                       open_rec);
+       if (open_rec->te == NULL) {
+               exit_server("tevent_add_timer failed");
        }
 }
 
@@ -1816,7 +2660,7 @@ static NTSTATUS smbd_calculate_maximum_allowed_access(
                return NT_STATUS_OK;
        }
 
-       status = SMB_VFS_GET_NT_ACL(conn, smb_fname->base_name,
+       status = SMB_VFS_GET_NT_ACL(conn, smb_fname,
                                    (SECINFO_OWNER |
                                     SECINFO_GROUP |
                                     SECINFO_DACL),
@@ -1881,6 +2725,12 @@ NTSTATUS smbd_calculate_access_mask(connection_struct *conn,
        uint32_t orig_access_mask = access_mask;
        uint32_t rejected_share_access;
 
+       if (access_mask & SEC_MASK_INVALID) {
+               DBG_DEBUG("access_mask [%8x] contains invalid bits\n",
+                         access_mask);
+               return NT_STATUS_ACCESS_DENIED;
+       }
+
        /*
         * Convert GENERIC bits to specific bits.
         */
@@ -1925,11 +2775,9 @@ NTSTATUS smbd_calculate_access_mask(connection_struct *conn,
  Return true if this is a state pointer to an asynchronous create.
 ****************************************************************************/
 
-bool is_deferred_open_async(const void *ptr)
+bool is_deferred_open_async(const struct deferred_open_record *rec)
 {
-       const struct deferred_open_record *state = (const struct deferred_open_record *)ptr;
-
-       return state->async_open;
+       return rec->async_open;
 }
 
 static bool clear_ads(uint32_t create_disposition)
@@ -2002,7 +2850,6 @@ static int disposition_to_open_flags(uint32_t create_disposition)
 }
 
 static int calculate_open_access_flags(uint32_t access_mask,
-                                      int oplock_request,
                                       uint32_t private_flags)
 {
        bool need_write, need_read;
@@ -2038,12 +2885,13 @@ static int calculate_open_access_flags(uint32_t access_mask,
 
 static NTSTATUS open_file_ntcreate(connection_struct *conn,
                            struct smb_request *req,
-                           uint32 access_mask,         /* access bits (FILE_READ_DATA etc.) */
-                           uint32 share_access,        /* share constants (FILE_SHARE_READ etc) */
-                           uint32 create_disposition,  /* FILE_OPEN_IF etc. */
-                           uint32 create_options,      /* options such as delete on close. */
-                           uint32 new_dos_attributes,  /* attributes used for new file. */
+                           uint32_t access_mask,               /* access bits (FILE_READ_DATA etc.) */
+                           uint32_t share_access,      /* share constants (FILE_SHARE_READ etc) */
+                           uint32_t create_disposition,        /* FILE_OPEN_IF etc. */
+                           uint32_t create_options,    /* options such as delete on close. */
+                           uint32_t new_dos_attributes,        /* attributes used for new file. */
                            int oplock_request,         /* internal Samba oplock codes. */
+                           struct smb2_lease *lease,
                                                        /* Information (FILE_EXISTS etc.) */
                            uint32_t private_flags,     /* Samba specific flags. */
                            int *pinfo,
@@ -2061,10 +2909,10 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
        mode_t new_unx_mode = (mode_t)0;
        mode_t unx_mode = (mode_t)0;
        int info;
-       uint32 existing_dos_attributes = 0;
+       uint32_t existing_dos_attributes = 0;
        struct timeval request_time = timeval_zero();
        struct share_mode_lock *lck = NULL;
-       uint32 open_access_mask = access_mask;
+       uint32_t open_access_mask = access_mask;
        NTSTATUS status;
        char *parent_dir;
        SMB_STRUCT_STAT saved_stat = smb_fname->st;
@@ -2125,9 +2973,12 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
                   create_options, (unsigned int)unx_mode, oplock_request,
                   (unsigned int)private_flags));
 
-       if ((req == NULL) && ((oplock_request & INTERNAL_OPEN_ONLY) == 0)) {
-               DEBUG(0, ("No smb request but not an internal only open!\n"));
-               return NT_STATUS_INTERNAL_ERROR;
+       if (req == NULL) {
+               /* Ensure req == NULL means INTERNAL_OPEN_ONLY */
+               SMB_ASSERT(((oplock_request & INTERNAL_OPEN_ONLY) != 0));
+       } else {
+               /* And req != NULL means no INTERNAL_OPEN_ONLY */
+               SMB_ASSERT(((oplock_request & INTERNAL_OPEN_ONLY) == 0));
        }
 
        /*
@@ -2135,10 +2986,10 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
         */
 
        if (req) {
-               void *ptr;
+               struct deferred_open_record *open_rec;
                if (get_deferred_open_message_state(req,
                                &request_time,
-                               &ptr)) {
+                               &open_rec)) {
                        /* Remember the absolute time of the original
                           request with this mid. We'll use it later to
                           see if this has timed out. */
@@ -2146,13 +2997,13 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
                        /* If it was an async create retry, the file
                           didn't exist. */
 
-                       if (is_deferred_open_async(ptr)) {
+                       if (is_deferred_open_async(open_rec)) {
                                SET_STAT_INVALID(smb_fname->st);
                                file_existed = false;
                        }
 
                        /* Ensure we don't reprocess this message. */
-                       remove_deferred_open_message_smb(req->sconn, req->mid);
+                       remove_deferred_open_message_smb(req->xconn, req->mid);
 
                        first_open_attempt = false;
                }
@@ -2161,7 +3012,18 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
        if (!posix_open) {
                new_dos_attributes &= SAMBA_ATTRIBUTES_MASK;
                if (file_existed) {
-                       existing_dos_attributes = dos_mode(conn, smb_fname);
+                       /*
+                        * Only use strored DOS attributes for checks
+                        * against requested attributes (below via
+                        * open_match_attributes()), cf bug #11992
+                        * for details. -slow
+                        */
+                       uint32_t attr = 0;
+
+                       status = SMB_VFS_GET_DOS_ATTRIBUTES(conn, smb_fname, &attr);
+                       if (NT_STATUS_IS_OK(status)) {
+                               existing_dos_attributes = attr;
+                       }
                }
        }
 
@@ -2173,7 +3035,8 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
        }
 
        /* this is for OS/2 long file names - say we don't support them */
-       if (!lp_posix_pathnames() && strstr(smb_fname->base_name,".+,;=[].")) {
+       if (req != NULL && !req->posix_pathnames &&
+                       strstr(smb_fname->base_name,".+,;=[].")) {
                /* OS/2 Workplace shell fix may be main code stream in a later
                 * release. */
                DEBUG(5,("open_file_ntcreate: OS/2 long filenames are not "
@@ -2285,8 +3148,7 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
         * mean the same thing under DOS and Unix.
         */
 
-       flags = calculate_open_access_flags(access_mask, oplock_request,
-                                           private_flags);
+       flags = calculate_open_access_flags(access_mask, private_flags);
 
        /*
         * Currently we only look at FILE_WRITE_THROUGH for create options.
@@ -2349,10 +3211,9 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
        fsp->access_mask = open_access_mask; /* We change this to the
                                              * requested access_mask after
                                              * the open is done. */
-       fsp->posix_open = posix_open;
-
-       /* Ensure no SAMBA_PRIVATE bits can be set. */
-       fsp->oplock_type = (oplock_request & ~SAMBA_PRIVATE_OPLOCK_MASK);
+       if (posix_open) {
+               fsp->posix_flags |= FSP_POSIX_FLAGS_ALL;
+       }
 
        if (timeval_is_zero(&request_time)) {
                request_time = fsp->open_time;
@@ -2378,10 +3239,16 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
                             open_access_mask, &new_file_created);
 
        if (NT_STATUS_EQUAL(fsp_open, NT_STATUS_NETWORK_BUSY)) {
-               struct deferred_open_record state;
+               bool delay;
 
                /*
-                * EWOULDBLOCK/EAGAIN maps to NETWORK_BUSY.
+                * This handles the kernel oplock case:
+                *
+                * the file has an active kernel oplock and the open() returned
+                * EWOULDBLOCK/EAGAIN which maps to NETWORK_BUSY.
+                *
+                * "Samba locking.tdb oplocks" are handled below after acquiring
+                * the sharemode lock with get_share_mode_lock().
                 */
                if (file_existed && S_ISFIFO(fsp->fsp_name->st.st_ex_mode)) {
                        DEBUG(10, ("FIFO busy\n"));
@@ -2398,11 +3265,7 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
 
                lck = get_existing_share_mode_lock(talloc_tos(), fsp->file_id);
                if (lck == NULL) {
-                       state.delayed_for_oplocks = false;
-                       state.async_open = false;
-                       state.id = fsp->file_id;
-                       defer_open(NULL, request_time, timeval_set(0, 0),
-                                  req, &state);
+                       retry_open(request_time, req, fsp->file_id);
                        DEBUG(10, ("No share mode lock found after "
                                   "EWOULDBLOCK, retrying sync\n"));
                        return NT_STATUS_SHARING_VIOLATION;
@@ -2412,8 +3275,12 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
                        smb_panic("validate_oplock_types failed");
                }
 
-               if (delay_for_oplock(fsp, 0, lck, false, create_disposition)) {
-                       schedule_defer_open(lck, fsp->file_id, request_time, req);
+               delay = delay_for_oplock(fsp, 0, lease, lck, false,
+                                        create_disposition,
+                                        first_open_attempt);
+               if (delay) {
+                       schedule_defer_open(lck, fsp->file_id, request_time,
+                                           req, true);
                        TALLOC_FREE(lck);
                        DEBUG(10, ("Sent oplock break request to kernel "
                                   "oplock holder\n"));
@@ -2424,10 +3291,8 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
                 * No oplock from Samba around. Immediately retry with
                 * a blocking open.
                 */
-               state.delayed_for_oplocks = false;
-               state.async_open = false;
-               state.id = fsp->file_id;
-               defer_open(lck, request_time, timeval_set(0, 0), req, &state);
+               retry_open(request_time, req, fsp->file_id);
+
                TALLOC_FREE(lck);
                DEBUG(10, ("No Samba oplock around after EWOULDBLOCK. "
                           "Retrying sync\n"));
@@ -2441,6 +3306,17 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
                return fsp_open;
        }
 
+       if (new_file_created) {
+               /*
+                * As we atomically create using O_CREAT|O_EXCL,
+                * then if new_file_created is true, then
+                * file_existed *MUST* have been false (even
+                * if the file was previously detected as being
+                * there).
+                */
+               file_existed = false;
+       }
+
        if (file_existed && !check_same_dev_ino(&saved_stat, &smb_fname->st)) {
                /*
                 * The file did exist, but some other (local or NFS)
@@ -2521,19 +3397,31 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
                file_existed = true;
        }
 
-       if ((req != NULL) &&
-           delay_for_oplock(
-                   fsp, oplock_request, lck,
-                   NT_STATUS_EQUAL(status, NT_STATUS_SHARING_VIOLATION),
-                   create_disposition)) {
-               schedule_defer_open(lck, fsp->file_id, request_time, req);
-               TALLOC_FREE(lck);
-               fd_close(fsp);
-               return NT_STATUS_SHARING_VIOLATION;
+       if (req != NULL) {
+               /*
+                * Handle oplocks, deferring the request if delay_for_oplock()
+                * triggered a break message and we have to wait for the break
+                * response.
+                */
+               bool delay;
+               bool sharing_violation = NT_STATUS_EQUAL(
+                       status, NT_STATUS_SHARING_VIOLATION);
+
+               delay = delay_for_oplock(fsp, oplock_request, lease, lck,
+                                        sharing_violation,
+                                        create_disposition,
+                                        first_open_attempt);
+               if (delay) {
+                       schedule_defer_open(lck, fsp->file_id,
+                                           request_time, req, false);
+                       TALLOC_FREE(lck);
+                       fd_close(fsp);
+                       return NT_STATUS_SHARING_VIOLATION;
+               }
        }
 
        if (!NT_STATUS_IS_OK(status)) {
-               uint32 can_access_mask;
+               uint32_t can_access_mask;
                bool can_access = True;
 
                SMB_ASSERT(NT_STATUS_EQUAL(status, NT_STATUS_SHARING_VIOLATION));
@@ -2609,7 +3497,6 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
                    !conn->sconn->using_smb2 &&
                    lp_defer_sharing_violations()) {
                        struct timeval timeout;
-                       struct deferred_open_record state;
                        int timeout_usecs;
 
                        /* this is a hack to speed up torture tests
@@ -2628,20 +3515,9 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
 
                        timeout = timeval_set(0, timeout_usecs);
 
-                       /* Nothing actually uses state.delayed_for_oplocks
-                          but it's handy to differentiate in debug messages
-                          between a 30 second delay due to oplock break, and
-                          a 1 second delay for share mode conflicts. */
-
-                       state.delayed_for_oplocks = False;
-                       state.async_open = false;
-                       state.id = id;
-
-                       if ((req != NULL)
-                           && !request_timed_out(request_time,
-                                                 timeout)) {
-                               defer_open(lck, request_time, timeout,
-                                          req, &state);
+                       if (!request_timed_out(request_time, timeout)) {
+                               defer_open(lck, request_time, timeout, req,
+                                          false, false, id);
                        }
                }
 
@@ -2659,7 +3535,20 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
                return status;
        }
 
-       grant_fsp_oplock_type(fsp, lck, oplock_request);
+       /* Should we atomically (to the client at least) truncate ? */
+       if ((!new_file_created) &&
+           (flags2 & O_TRUNC) &&
+           (!S_ISFIFO(fsp->fsp_name->st.st_ex_mode))) {
+               int ret;
+
+               ret = vfs_set_filelen(fsp, 0);
+               if (ret != 0) {
+                       status = map_nt_error_from_unix(errno);
+                       TALLOC_FREE(lck);
+                       fd_close(fsp);
+                       return status;
+               }
+       }
 
        /*
         * We have the share entry *locked*.....
@@ -2668,7 +3557,7 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
        /* Delete streams if create_disposition requires it */
        if (!new_file_created && clear_ads(create_disposition) &&
            !is_ntfs_stream_smb_fname(smb_fname)) {
-               status = delete_all_streams(conn, smb_fname->base_name);
+               status = delete_all_streams(conn, smb_fname);
                if (!NT_STATUS_IS_OK(status)) {
                        TALLOC_FREE(lck);
                        fd_close(fsp);
@@ -2686,6 +3575,15 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
 
        if (fsp->fh->fd != -1 && lp_kernel_share_modes(SNUM(conn))) {
                int ret_flock;
+               /*
+                * Beware: streams implementing VFS modules may
+                * implement streams in a way that fsp will have the
+                * basefile open in the fsp fd, so lacking a distinct
+                * fd for the stream kernel_flock will apply on the
+                * basefile which is wrong. The actual check is
+                * deffered to the VFS module implementing the
+                * kernel_flock call.
+                */
                ret_flock = SMB_VFS_KERNEL_FLOCK(fsp, share_access, access_mask);
                if(ret_flock == -1 ){
 
@@ -2694,6 +3592,8 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
 
                        return NT_STATUS_SHARING_VIOLATION;
                }
+
+               fsp->kernel_share_modes_taken = true;
        }
 
        /*
@@ -2720,9 +3620,19 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
        }
 
        if (file_existed) {
-               /* stat opens on existing files don't get oplocks. */
-               if (is_stat_open(open_access_mask)) {
-                       fsp->oplock_type = NO_OPLOCK;
+               /*
+                * stat opens on existing files don't get oplocks.
+                * They can get leases.
+                *
+                * Note that we check for stat open on the *open_access_mask*,
+                * i.e. the access mask we actually used to do the open,
+                * not the one the client asked for (which is in
+                * fsp->access_mask). This is due to the fact that
+                * FILE_OVERWRITE and FILE_OVERWRITE_IF add in O_TRUNC,
+                * which adds FILE_WRITE_DATA to open_access_mask.
+                */
+               if (is_stat_open(open_access_mask) && lease == NULL) {
+                       oplock_request = NO_OPLOCK;
                }
        }
 
@@ -2744,21 +3654,11 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
         * Setup the oplock info in both the shared memory and
         * file structs.
         */
-
-       status = set_file_oplock(fsp);
+       status = grant_fsp_oplock_type(req, fsp, lck, oplock_request, lease);
        if (!NT_STATUS_IS_OK(status)) {
-               /*
-                * Could not get the kernel oplock
-                */
-               fsp->oplock_type = NO_OPLOCK;
-       }
-
-       if (!set_share_mode(lck, fsp, get_current_uid(conn),
-                           req ? req->mid : 0,
-                           fsp->oplock_type)) {
                TALLOC_FREE(lck);
                fd_close(fsp);
-               return NT_STATUS_NO_MEMORY;
+               return status;
        }
 
        /* Handle strange delete on close create semantics. */
@@ -2779,8 +3679,8 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
        }
 
        if (info != FILE_WAS_OPENED) {
-               /* Files should be initially set as archive */
-               if (lp_map_archive(SNUM(conn)) ||
+               /* Overwritten files should be initially set as archive */
+               if ((info == FILE_WAS_OVERWRITTEN && lp_map_archive(SNUM(conn))) ||
                    lp_store_dos_attributes(SNUM(conn))) {
                        if (!posix_open) {
                                if (file_set_dosmode(conn, smb_fname,
@@ -2862,42 +3762,9 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
        return NT_STATUS_OK;
 }
 
-
-/****************************************************************************
- Open a file for for write to ensure that we can fchmod it.
-****************************************************************************/
-
-NTSTATUS open_file_fchmod(connection_struct *conn,
-                         struct smb_filename *smb_fname,
-                         files_struct **result)
-{
-       if (!VALID_STAT(smb_fname->st)) {
-               return NT_STATUS_INVALID_PARAMETER;
-       }
-
-        return SMB_VFS_CREATE_FILE(
-               conn,                                   /* conn */
-               NULL,                                   /* req */
-               0,                                      /* root_dir_fid */
-               smb_fname,                              /* fname */
-               FILE_WRITE_DATA,                        /* access_mask */
-               (FILE_SHARE_READ | FILE_SHARE_WRITE |   /* share_access */
-                   FILE_SHARE_DELETE),
-               FILE_OPEN,                              /* create_disposition*/
-               0,                                      /* create_options */
-               0,                                      /* file_attributes */
-               INTERNAL_OPEN_ONLY,                     /* oplock_request */
-               0,                                      /* allocation_size */
-               0,                                      /* private_flags */
-               NULL,                                   /* sd */
-               NULL,                                   /* ea_list */
-               result,                                 /* result */
-               NULL);                                  /* pinfo */
-}
-
 static NTSTATUS mkdir_internal(connection_struct *conn,
                               struct smb_filename *smb_dname,
-                              uint32 file_attributes)
+                              uint32_t file_attributes)
 {
        mode_t mode;
        char *parent_dir = NULL;
@@ -2936,7 +3803,7 @@ static NTSTATUS mkdir_internal(connection_struct *conn,
                return status;
        }
 
-       if (SMB_VFS_MKDIR(conn, smb_dname->base_name, mode) != 0) {
+       if (SMB_VFS_MKDIR(conn, smb_dname, mode) != 0) {
                return map_nt_error_from_unix(errno);
        }
 
@@ -2965,7 +3832,7 @@ static NTSTATUS mkdir_internal(connection_struct *conn,
 
        if (lp_inherit_permissions(SNUM(conn))) {
                inherit_access_posix_acl(conn, parent_dir,
-                                        smb_dname->base_name, mode);
+                                        smb_dname, mode);
                need_re_stat = true;
        }
 
@@ -2978,7 +3845,7 @@ static NTSTATUS mkdir_internal(connection_struct *conn,
                 */
                if ((mode & ~(S_IRWXU|S_IRWXG|S_IRWXO)) &&
                    (mode & ~smb_dname->st.st_ex_mode)) {
-                       SMB_VFS_CHMOD(conn, smb_dname->base_name,
+                       SMB_VFS_CHMOD(conn, smb_dname,
                                      (smb_dname->st.st_ex_mode |
                                          (mode & ~smb_dname->st.st_ex_mode)));
                        need_re_stat = true;
@@ -2986,7 +3853,7 @@ static NTSTATUS mkdir_internal(connection_struct *conn,
        }
 
        /* Change the owner if required. */
-       if (lp_inherit_owner(SNUM(conn))) {
+       if (lp_inherit_owner(SNUM(conn)) != INHERIT_OWNER_NO) {
                change_dir_owner_to_parent(conn, parent_dir,
                                           smb_dname->base_name,
                                           &smb_dname->st);
@@ -3014,11 +3881,11 @@ static NTSTATUS mkdir_internal(connection_struct *conn,
 static NTSTATUS open_directory(connection_struct *conn,
                               struct smb_request *req,
                               struct smb_filename *smb_dname,
-                              uint32 access_mask,
-                              uint32 share_access,
-                              uint32 create_disposition,
-                              uint32 create_options,
-                              uint32 file_attributes,
+                              uint32_t access_mask,
+                              uint32_t share_access,
+                              uint32_t create_disposition,
+                              uint32_t create_options,
+                              uint32_t file_attributes,
                               int *pinfo,
                               files_struct **result)
 {
@@ -3028,6 +3895,7 @@ static NTSTATUS open_directory(connection_struct *conn,
        NTSTATUS status;
        struct timespec mtimespec;
        int info = 0;
+       bool ok;
 
        if (is_ntfs_stream_smb_fname(smb_dname)) {
                DEBUG(2, ("open_directory: %s is a stream name!\n",
@@ -3132,6 +4000,25 @@ static NTSTATUS open_directory(connection_struct *conn,
                                                        nt_errstr(status)));
                                                return status;
                                        }
+
+                                       /*
+                                        * If mkdir_internal() returned
+                                        * NT_STATUS_OBJECT_NAME_COLLISION
+                                        * we still must lstat the path.
+                                        */
+
+                                       if (SMB_VFS_LSTAT(conn, smb_dname)
+                                                       == -1) {
+                                               DEBUG(2, ("Could not stat "
+                                                       "directory '%s' just "
+                                                       "opened: %s\n",
+                                                       smb_fname_str_dbg(
+                                                               smb_dname),
+                                                       strerror(errno)));
+                                               return map_nt_error_from_unix(
+                                                               errno);
+                                       }
+
                                        info = FILE_WAS_OPENED;
                                }
                        }
@@ -3196,7 +4083,9 @@ static NTSTATUS open_directory(connection_struct *conn,
        fsp->oplock_type = NO_OPLOCK;
        fsp->sent_oplock_break = NO_BREAK_SENT;
        fsp->is_directory = True;
-       fsp->posix_open = (file_attributes & FILE_FLAG_POSIX_SEMANTICS) ? True : False;
+       if (file_attributes & FILE_FLAG_POSIX_SEMANTICS) {
+               fsp->posix_flags |= FSP_POSIX_FLAGS_ALL;
+       }
        status = fsp_set_smb_fname(fsp, smb_dname);
        if (!NT_STATUS_IS_OK(status)) {
                file_free(req, fsp);
@@ -3245,8 +4134,18 @@ static NTSTATUS open_directory(connection_struct *conn,
                return status;
        }
 
-       /* Ensure there was no race condition. */
-       if (!check_same_stat(&smb_dname->st, &fsp->fsp_name->st)) {
+       if(!S_ISDIR(fsp->fsp_name->st.st_ex_mode)) {
+               DEBUG(5,("open_directory: %s is not a directory !\n",
+                        smb_fname_str_dbg(smb_dname)));
+                fd_close(fsp);
+                file_free(req, fsp);
+               return NT_STATUS_NOT_A_DIRECTORY;
+       }
+
+       /* Ensure there was no race condition.  We need to check
+        * dev/inode but not permissions, as these can change
+        * legitimately */
+       if (!check_same_dev_ino(&smb_dname->st, &fsp->fsp_name->st)) {
                DEBUG(5,("open_directory: stat struct differs for "
                        "directory %s.\n",
                        smb_fname_str_dbg(smb_dname)));
@@ -3284,8 +4183,10 @@ static NTSTATUS open_directory(connection_struct *conn,
                return status;
        }
 
-       if (!set_share_mode(lck, fsp, get_current_uid(conn),
-                           req ? req->mid : 0, NO_OPLOCK)) {
+       ok = set_share_mode(lck, fsp, get_current_uid(conn),
+                           req ? req->mid : 0, NO_OPLOCK,
+                           UINT32_MAX);
+       if (!ok) {
                TALLOC_FREE(lck);
                fd_close(fsp);
                file_free(req, fsp);
@@ -3350,12 +4251,14 @@ NTSTATUS create_directory(connection_struct *conn, struct smb_request *req,
                FILE_DIRECTORY_FILE,                    /* create_options */
                FILE_ATTRIBUTE_DIRECTORY,               /* file_attributes */
                0,                                      /* oplock_request */
+               NULL,                                   /* lease */
                0,                                      /* allocation_size */
                0,                                      /* private_flags */
                NULL,                                   /* sd */
                NULL,                                   /* ea_list */
                &fsp,                                   /* result */
-               NULL);                                  /* pinfo */
+               NULL,                                   /* pinfo */
+               NULL, NULL);                            /* create context */
 
        if (NT_STATUS_IS_OK(status)) {
                close_file(req, fsp, NORMAL_CLOSE);
@@ -3408,8 +4311,11 @@ void msg_file_was_renamed(struct messaging_context *msg,
                stream_name = NULL;
        }
 
-       smb_fname = synthetic_smb_fname(talloc_tos(), base_name,
-                                       stream_name, NULL);
+       smb_fname = synthetic_smb_fname(talloc_tos(),
+                                       base_name,
+                                       stream_name,
+                                       NULL,
+                                       0);
        if (smb_fname == NULL) {
                return;
        }
@@ -3455,8 +4361,8 @@ void msg_file_was_renamed(struct messaging_context *msg,
  * If that works, delete them all by setting the delete on close and close.
  */
 
-NTSTATUS open_streams_for_delete(connection_struct *conn,
-                                       const char *fname)
+static NTSTATUS open_streams_for_delete(connection_struct *conn,
+                                       const struct smb_filename *smb_fname)
 {
        struct stream_struct *stream_info = NULL;
        files_struct **streams = NULL;
@@ -3465,7 +4371,7 @@ NTSTATUS open_streams_for_delete(connection_struct *conn,
        TALLOC_CTX *frame = talloc_stackframe();
        NTSTATUS status;
 
-       status = vfs_streaminfo(conn, NULL, fname, talloc_tos(),
+       status = vfs_streaminfo(conn, NULL, smb_fname, talloc_tos(),
                                &num_streams, &stream_info);
 
        if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_IMPLEMENTED)
@@ -3497,30 +4403,34 @@ NTSTATUS open_streams_for_delete(connection_struct *conn,
        }
 
        for (i=0; i<num_streams; i++) {
-               struct smb_filename *smb_fname;
+               struct smb_filename *smb_fname_cp;
 
                if (strequal(stream_info[i].name, "::$DATA")) {
                        streams[i] = NULL;
                        continue;
                }
 
-               smb_fname = synthetic_smb_fname(
-                       talloc_tos(), fname, stream_info[i].name, NULL);
-               if (smb_fname == NULL) {
+               smb_fname_cp = synthetic_smb_fname(talloc_tos(),
+                                       smb_fname->base_name,
+                                       stream_info[i].name,
+                                       NULL,
+                                       (smb_fname->flags &
+                                               ~SMB_FILENAME_POSIX_PATH));
+               if (smb_fname_cp == NULL) {
                        status = NT_STATUS_NO_MEMORY;
                        goto fail;
                }
 
-               if (SMB_VFS_STAT(conn, smb_fname) == -1) {
+               if (SMB_VFS_STAT(conn, smb_fname_cp) == -1) {
                        DEBUG(10, ("Unable to stat stream: %s\n",
-                                  smb_fname_str_dbg(smb_fname)));
+                                  smb_fname_str_dbg(smb_fname_cp)));
                }
 
                status = SMB_VFS_CREATE_FILE(
                         conn,                  /* conn */
                         NULL,                  /* req */
                         0,                     /* root_dir_fid */
-                        smb_fname,             /* fname */
+                        smb_fname_cp,          /* fname */
                         DELETE_ACCESS,         /* access_mask */
                         (FILE_SHARE_READ |     /* share_access */
                             FILE_SHARE_WRITE | FILE_SHARE_DELETE),
@@ -3528,22 +4438,24 @@ NTSTATUS open_streams_for_delete(connection_struct *conn,
                         0,                     /* create_options */
                         FILE_ATTRIBUTE_NORMAL, /* file_attributes */
                         0,                     /* oplock_request */
+                        NULL,                  /* lease */
                         0,                     /* allocation_size */
                         NTCREATEX_OPTIONS_PRIVATE_STREAM_DELETE, /* private_flags */
                         NULL,                  /* sd */
                         NULL,                  /* ea_list */
                         &streams[i],           /* result */
-                        NULL);                 /* pinfo */
+                        NULL,                  /* pinfo */
+                        NULL, NULL);           /* create context */
 
                if (!NT_STATUS_IS_OK(status)) {
                        DEBUG(10, ("Could not open stream %s: %s\n",
-                                  smb_fname_str_dbg(smb_fname),
+                                  smb_fname_str_dbg(smb_fname_cp),
                                   nt_errstr(status)));
 
-                       TALLOC_FREE(smb_fname);
+                       TALLOC_FREE(smb_fname_cp);
                        break;
                }
-               TALLOC_FREE(smb_fname);
+               TALLOC_FREE(smb_fname_cp);
        }
 
        /*
@@ -3583,7 +4495,8 @@ static NTSTATUS inherit_new_acl(files_struct *fsp)
        const struct dom_sid *group_sid = NULL;
        uint32_t security_info_sent = (SECINFO_OWNER | SECINFO_GROUP | SECINFO_DACL);
        struct security_token *token = fsp->conn->session_info->security_token;
-       bool inherit_owner = lp_inherit_owner(SNUM(fsp->conn));
+       bool inherit_owner =
+           (lp_inherit_owner(SNUM(fsp->conn)) == INHERIT_OWNER_WINDOWS_AND_UNIX);
        bool inheritable_components = false;
        bool try_builtin_administrators = false;
        const struct dom_sid *BA_U_sid = NULL;
@@ -3592,14 +4505,25 @@ static NTSTATUS inherit_new_acl(files_struct *fsp)
        const struct dom_sid *SY_U_sid = NULL;
        const struct dom_sid *SY_G_sid = NULL;
        size_t size = 0;
+       struct smb_filename *parent_smb_fname = NULL;
 
        if (!parent_dirname(frame, fsp->fsp_name->base_name, &parent_name, NULL)) {
                TALLOC_FREE(frame);
                return NT_STATUS_NO_MEMORY;
        }
+       parent_smb_fname = synthetic_smb_fname(talloc_tos(),
+                                               parent_name,
+                                               NULL,
+                                               NULL,
+                                               fsp->fsp_name->flags);
+
+       if (parent_smb_fname == NULL) {
+               TALLOC_FREE(frame);
+               return NT_STATUS_NO_MEMORY;
+       }
 
        status = SMB_VFS_GET_NT_ACL(fsp->conn,
-                                   parent_name,
+                                   parent_smb_fname,
                                    (SECINFO_OWNER | SECINFO_GROUP | SECINFO_DACL),
                                    frame,
                                    &parent_desc);
@@ -3767,6 +4691,263 @@ static NTSTATUS inherit_new_acl(files_struct *fsp)
        return status;
 }
 
+/*
+ * If we already have a lease, it must match the new file id. [MS-SMB2]
+ * 3.3.5.9.8 speaks about INVALID_PARAMETER if an already used lease key is
+ * used for a different file name.
+ */
+
+struct lease_match_state {
+       /* Input parameters. */
+       TALLOC_CTX *mem_ctx;
+       const char *servicepath;
+       const struct smb_filename *fname;
+       bool file_existed;
+       struct file_id id;
+       /* Return parameters. */
+       uint32_t num_file_ids;
+       struct file_id *ids;
+       NTSTATUS match_status;
+};
+
+/*************************************************************
+ File doesn't exist but this lease key+guid is already in use.
+
+ This is only allowable in the dynamic share case where the
+ service path must be different.
+
+ There is a small race condition here in the multi-connection
+ case where a client sends two create calls on different connections,
+ where the file doesn't exist and one smbd creates the leases_db
+ entry first, but this will get fixed by the multichannel cleanup
+ when all identical client_guids get handled by a single smbd.
+**************************************************************/
+
+static void lease_match_parser_new_file(
+       uint32_t num_files,
+       const struct leases_db_file *files,
+       struct lease_match_state *state)
+{
+       uint32_t i;
+
+       for (i = 0; i < num_files; i++) {
+               const struct leases_db_file *f = &files[i];
+               if (strequal(state->servicepath, f->servicepath)) {
+                       state->match_status = NT_STATUS_INVALID_PARAMETER;
+                       return;
+               }
+       }
+
+       /* Dynamic share case. Break leases on all other files. */
+       state->match_status = leases_db_copy_file_ids(state->mem_ctx,
+                                       num_files,
+                                       files,
+                                       &state->ids);
+       if (!NT_STATUS_IS_OK(state->match_status)) {
+               return;
+       }
+
+       state->num_file_ids = num_files;
+       state->match_status = NT_STATUS_OPLOCK_NOT_GRANTED;
+       return;
+}
+
+static void lease_match_parser(
+       uint32_t num_files,
+       const struct leases_db_file *files,
+       void *private_data)
+{
+       struct lease_match_state *state =
+               (struct lease_match_state *)private_data;
+       uint32_t i;
+
+       if (!state->file_existed) {
+               /*
+                * Deal with name mismatch or
+                * possible dynamic share case separately
+                * to make code clearer.
+                */
+               lease_match_parser_new_file(num_files,
+                                               files,
+                                               state);
+               return;
+       }
+
+       /* File existed. */
+       state->match_status = NT_STATUS_OK;
+
+       for (i = 0; i < num_files; i++) {
+               const struct leases_db_file *f = &files[i];
+
+               /* Everything should be the same. */
+               if (!file_id_equal(&state->id, &f->id)) {
+                       /* This should catch all dynamic share cases. */
+                       state->match_status = NT_STATUS_OPLOCK_NOT_GRANTED;
+                       break;
+               }
+               if (!strequal(f->servicepath, state->servicepath)) {
+                       state->match_status = NT_STATUS_INVALID_PARAMETER;
+                       break;
+               }
+               if (!strequal(f->base_name, state->fname->base_name)) {
+                       state->match_status = NT_STATUS_INVALID_PARAMETER;
+                       break;
+               }
+               if (!strequal(f->stream_name, state->fname->stream_name)) {
+                       state->match_status = NT_STATUS_INVALID_PARAMETER;
+                       break;
+               }
+       }
+
+       if (NT_STATUS_IS_OK(state->match_status)) {
+               /*
+                * Common case - just opening another handle on a
+                * file on a non-dynamic share.
+                */
+               return;
+       }
+
+       if (NT_STATUS_EQUAL(state->match_status, NT_STATUS_INVALID_PARAMETER)) {
+               /* Mismatched path. Error back to client. */
+               return;
+       }
+
+       /*
+        * File id mismatch. Dynamic share case NT_STATUS_OPLOCK_NOT_GRANTED.
+        * Don't allow leases.
+        */
+
+       state->match_status = leases_db_copy_file_ids(state->mem_ctx,
+                                       num_files,
+                                       files,
+                                       &state->ids);
+       if (!NT_STATUS_IS_OK(state->match_status)) {
+               return;
+       }
+
+       state->num_file_ids = num_files;
+       state->match_status = NT_STATUS_OPLOCK_NOT_GRANTED;
+       return;
+}
+
+static NTSTATUS lease_match(connection_struct *conn,
+                           struct smb_request *req,
+                           struct smb2_lease_key *lease_key,
+                           const char *servicepath,
+                           const struct smb_filename *fname,
+                           uint16_t *p_version,
+                           uint16_t *p_epoch)
+{
+       struct smbd_server_connection *sconn = req->sconn;
+       TALLOC_CTX *tos = talloc_tos();
+       struct lease_match_state state = {
+               .mem_ctx = tos,
+               .servicepath = servicepath,
+               .fname = fname,
+               .match_status = NT_STATUS_OK
+       };
+       uint32_t i;
+       NTSTATUS status;
+
+       state.file_existed = VALID_STAT(fname->st);
+       if (state.file_existed) {
+               state.id = vfs_file_id_from_sbuf(conn, &fname->st);
+       } else {
+               memset(&state.id, '\0', sizeof(state.id));
+       }
+
+       status = leases_db_parse(&sconn->client->connections->smb2.client.guid,
+                                lease_key, lease_match_parser, &state);
+       if (!NT_STATUS_IS_OK(status)) {
+               /*
+                * Not found or error means okay: We can make the lease pass
+                */
+               return NT_STATUS_OK;
+       }
+       if (!NT_STATUS_EQUAL(state.match_status, NT_STATUS_OPLOCK_NOT_GRANTED)) {
+               /*
+                * Anything but NT_STATUS_OPLOCK_NOT_GRANTED, let the caller
+                * deal with it.
+                */
+               return state.match_status;
+       }
+
+       /* We have to break all existing leases. */
+       for (i = 0; i < state.num_file_ids; i++) {
+               struct share_mode_lock *lck;
+               struct share_mode_data *d;
+               uint32_t j;
+
+               if (file_id_equal(&state.ids[i], &state.id)) {
+                       /* Don't need to break our own file. */
+                       continue;
+               }
+
+               lck = get_existing_share_mode_lock(talloc_tos(), state.ids[i]);
+               if (lck == NULL) {
+                       /* Race condition - file already closed. */
+                       continue;
+               }
+               d = lck->data;
+               for (j=0; j<d->num_share_modes; j++) {
+                       struct share_mode_entry *e = &d->share_modes[j];
+                       uint32_t e_lease_type = get_lease_type(d, e);
+                       struct share_mode_lease *l = NULL;
+
+                       if (share_mode_stale_pid(d, j)) {
+                               continue;
+                       }
+
+                       if (e->op_type == LEASE_OPLOCK) {
+                               l = &lck->data->leases[e->lease_idx];
+                               if (!smb2_lease_key_equal(&l->lease_key,
+                                                         lease_key)) {
+                                       continue;
+                               }
+                               *p_epoch = l->epoch;
+                               *p_version = l->lease_version;
+                       }
+
+                       if (e_lease_type == SMB2_LEASE_NONE) {
+                               continue;
+                       }
+
+                       send_break_message(conn->sconn->msg_ctx, e,
+                                          SMB2_LEASE_NONE);
+
+                       /*
+                        * Windows 7 and 8 lease clients
+                        * are broken in that they will not
+                        * respond to lease break requests
+                        * whilst waiting for an outstanding
+                        * open request on that lease handle
+                        * on the same TCP connection, due
+                        * to holding an internal inode lock.
+                        *
+                        * This means we can't reschedule
+                        * ourselves here, but must return
+                        * from the create.
+                        *
+                        * Work around:
+                        *
+                        * Send the breaks and then return
+                        * SMB2_LEASE_NONE in the lease handle
+                        * to cause them to acknowledge the
+                        * lease break. Consulatation with
+                        * Microsoft engineering confirmed
+                        * this approach is safe.
+                        */
+
+               }
+               TALLOC_FREE(lck);
+       }
+       /*
+        * Ensure we don't grant anything more so we
+        * never upgrade.
+        */
+       return NT_STATUS_OPLOCK_NOT_GRANTED;
+}
+
 /*
  * Wrapper around open_file_ntcreate and open_directory
  */
@@ -3780,6 +4961,7 @@ static NTSTATUS create_file_unixpath(connection_struct *conn,
                                     uint32_t create_options,
                                     uint32_t file_attributes,
                                     uint32_t oplock_request,
+                                    struct smb2_lease *lease,
                                     uint64_t allocation_size,
                                     uint32_t private_flags,
                                     struct security_descriptor *sd,
@@ -3822,6 +5004,26 @@ static NTSTATUS create_file_unixpath(connection_struct *conn,
                oplock_request |= INTERNAL_OPEN_ONLY;
        }
 
+       if (lease != NULL) {
+               uint16_t epoch = lease->lease_epoch;
+               uint16_t version = lease->lease_version;
+               status = lease_match(conn,
+                               req,
+                               &lease->lease_key,
+                               conn->connectpath,
+                               smb_fname,
+                               &version,
+                               &epoch);
+               if (NT_STATUS_EQUAL(status, NT_STATUS_OPLOCK_NOT_GRANTED)) {
+                       /* Dynamic share file. No leases and update epoch... */
+                       lease->lease_state = SMB2_LEASE_NONE;
+                       lease->lease_epoch = epoch;
+                       lease->lease_version = version;
+               } else if (!NT_STATUS_IS_OK(status)) {
+                       goto fail;
+               }
+       }
+
        if ((conn->fs_capabilities & FILE_NAMED_STREAMS)
            && (access_mask & DELETE_ACCESS)
            && !is_ntfs_stream_smb_fname(smb_fname)) {
@@ -3829,7 +5031,7 @@ static NTSTATUS create_file_unixpath(connection_struct *conn,
                 * We can't open a file with DELETE access if any of the
                 * streams is open without FILE_SHARE_DELETE
                 */
-               status = open_streams_for_delete(conn, smb_fname->base_name);
+               status = open_streams_for_delete(conn, smb_fname);
 
                if (!NT_STATUS_IS_OK(status)) {
                        goto fail;
@@ -3849,7 +5051,7 @@ static NTSTATUS create_file_unixpath(connection_struct *conn,
        if ((conn->fs_capabilities & FILE_NAMED_STREAMS)
            && is_ntfs_stream_smb_fname(smb_fname)
            && (!(private_flags & NTCREATEX_OPTIONS_PRIVATE_STREAM_DELETE))) {
-               uint32 base_create_disposition;
+               uint32_t base_create_disposition;
                struct smb_filename *smb_fname_base = NULL;
 
                if (create_options & FILE_DIRECTORY_FILE) {
@@ -3868,8 +5070,10 @@ static NTSTATUS create_file_unixpath(connection_struct *conn,
 
                /* Create an smb_filename with stream_name == NULL. */
                smb_fname_base = synthetic_smb_fname(talloc_tos(),
-                                                    smb_fname->base_name,
-                                                    NULL, NULL);
+                                               smb_fname->base_name,
+                                               NULL,
+                                               NULL,
+                                               smb_fname->flags);
                if (smb_fname_base == NULL) {
                        status = NT_STATUS_NO_MEMORY;
                        goto fail;
@@ -3905,7 +5109,7 @@ static NTSTATUS create_file_unixpath(connection_struct *conn,
                                              | FILE_SHARE_WRITE
                                              | FILE_SHARE_DELETE,
                                              base_create_disposition,
-                                             0, 0, 0, 0, 0, NULL, NULL,
+                                             0, 0, 0, NULL, 0, 0, NULL, NULL,
                                              &base_fsp, NULL);
                TALLOC_FREE(smb_fname_base);
 
@@ -3915,7 +5119,7 @@ static NTSTATUS create_file_unixpath(connection_struct *conn,
                                   nt_errstr(status)));
                        goto fail;
                }
-               /* we don't need to low level fd */
+               /* we don't need the low level fd */
                fd_close(base_fsp);
        }
 
@@ -3986,6 +5190,7 @@ static NTSTATUS create_file_unixpath(connection_struct *conn,
                                            create_options,
                                            file_attributes,
                                            oplock_request,
+                                           lease,
                                            private_flags,
                                            &info,
                                            fsp);
@@ -4044,15 +5249,11 @@ static NTSTATUS create_file_unixpath(connection_struct *conn,
 
        /* Save the requested allocation size. */
        if ((info == FILE_WAS_CREATED) || (info == FILE_WAS_OVERWRITTEN)) {
-               if (allocation_size
-                   && (allocation_size > fsp->fsp_name->st.st_ex_size)) {
+               if ((allocation_size > fsp->fsp_name->st.st_ex_size)
+                   && !(fsp->is_directory))
+               {
                        fsp->initial_allocation_size = smb_roundup(
                                fsp->conn, allocation_size);
-                       if (fsp->is_directory) {
-                               /* Can't set allocation size on a directory. */
-                               status = NT_STATUS_ACCESS_DENIED;
-                               goto fail;
-                       }
                        if (vfs_allocate_file_space(
                                    fsp, fsp->initial_allocation_size) == -1) {
                                status = NT_STATUS_DISK_FULL;
@@ -4165,6 +5366,7 @@ NTSTATUS get_relative_fid_filename(connection_struct *conn,
        files_struct *dir_fsp;
        char *parent_fname = NULL;
        char *new_base_name = NULL;
+       uint32_t ucf_flags = ucf_flags_from_smb_request(req);
        NTSTATUS status;
 
        if (root_dir_fid == 0 || !smb_fname) {
@@ -4257,9 +5459,8 @@ NTSTATUS get_relative_fid_filename(connection_struct *conn,
 
        status = filename_convert(req,
                                conn,
-                               req->flags2 & FLAGS2_DFS_PATHNAMES,
                                new_base_name,
-                               0,
+                               ucf_flags,
                                NULL,
                                smb_fname_out);
        if (!NT_STATUS_IS_OK(status)) {
@@ -4282,12 +5483,15 @@ NTSTATUS create_file_default(connection_struct *conn,
                             uint32_t create_options,
                             uint32_t file_attributes,
                             uint32_t oplock_request,
+                            struct smb2_lease *lease,
                             uint64_t allocation_size,
                             uint32_t private_flags,
                             struct security_descriptor *sd,
                             struct ea_list *ea_list,
                             files_struct **result,
-                            int *pinfo)
+                            int *pinfo,
+                            const struct smb2_create_blobs *in_context_blobs,
+                            struct smb2_create_blobs *out_context_blobs)
 {
        int info = FILE_WAS_OPENED;
        files_struct *fsp = NULL;
@@ -4373,7 +5577,7 @@ NTSTATUS create_file_default(connection_struct *conn,
                        status = NT_STATUS_NOT_A_DIRECTORY;
                        goto fail;
                }
-               if (lp_posix_pathnames()) {
+               if (req != NULL && req->posix_pathnames) {
                        ret = SMB_VFS_LSTAT(conn, smb_fname);
                } else {
                        ret = SMB_VFS_STAT(conn, smb_fname);
@@ -4388,7 +5592,7 @@ NTSTATUS create_file_default(connection_struct *conn,
        status = create_file_unixpath(
                conn, req, smb_fname, access_mask, share_access,
                create_disposition, create_options, file_attributes,
-               oplock_request, allocation_size, private_flags,
+               oplock_request, lease, allocation_size, private_flags,
                sd, ea_list,
                &fsp, &info);