smbd: Remove separate oplock_type parameter from set_file_oplock
[kai/samba-autobuild/.git] / source3 / smbd / open.c
index 26b48c1cf14acb8d67387fdf6bb06a10f3f4d91a..4db673acd33f42d86fa38fe38db6c71ac5d36482 100644 (file)
 #include "../libcli/security/security.h"
 #include "../librpc/gen_ndr/ndr_security.h"
 #include "../librpc/gen_ndr/open_files.h"
+#include "../librpc/gen_ndr/idmap.h"
+#include "passdb/lookup_sid.h"
 #include "auth.h"
+#include "serverid.h"
 #include "messages.h"
+#include "source3/lib/dbwrap/dbwrap_watch.h"
 
 extern const struct generic_mapping file_generic_mapping;
 
@@ -64,6 +68,7 @@ static bool parent_override_delete(connection_struct *conn,
 
 NTSTATUS smbd_check_access_rights(struct connection_struct *conn,
                                const struct smb_filename *smb_fname,
+                               bool use_privs,
                                uint32_t access_mask)
 {
        /* Check if we have rights to open. */
@@ -71,6 +76,7 @@ NTSTATUS smbd_check_access_rights(struct connection_struct *conn,
        struct security_descriptor *sd = NULL;
        uint32_t rejected_share_access;
        uint32_t rejected_mask = access_mask;
+       uint32_t do_not_check_mask = 0;
 
        rejected_share_access = access_mask & ~(conn->share_access);
 
@@ -83,7 +89,7 @@ NTSTATUS smbd_check_access_rights(struct connection_struct *conn,
                return NT_STATUS_ACCESS_DENIED;
        }
 
-       if (get_current_uid(conn) == (uid_t)0) {
+       if (!use_privs && get_current_uid(conn) == (uid_t)0) {
                /* I'm sorry sir, I didn't know you were root... */
                DEBUG(10,("smbd_check_access_rights: root override "
                        "on %s. Granting 0x%x\n",
@@ -113,7 +119,7 @@ NTSTATUS smbd_check_access_rights(struct connection_struct *conn,
        status = SMB_VFS_GET_NT_ACL(conn, smb_fname->base_name,
                        (SECINFO_OWNER |
                        SECINFO_GROUP |
-                       SECINFO_DACL),&sd);
+                        SECINFO_DACL), talloc_tos(), &sd);
 
        if (!NT_STATUS_IS_OK(status)) {
                DEBUG(10, ("smbd_check_access_rights: Could not get acl "
@@ -129,12 +135,32 @@ NTSTATUS smbd_check_access_rights(struct connection_struct *conn,
        }
 
        /*
-        * Never test FILE_READ_ATTRIBUTES. se_access_check() also takes care of
+        * If we can access the path to this file, by
+        * default we have FILE_READ_ATTRIBUTES from the
+        * containing directory. See the section:
+        * "Algorithm to Check Access to an Existing File"
+        * in MS-FSA.pdf.
+        *
+        * se_file_access_check() also takes care of
         * owner WRITE_DAC and READ_CONTROL.
         */
-       status = se_access_check(sd,
+       do_not_check_mask = FILE_READ_ATTRIBUTES;
+
+       /*
+        * Samba 3.6 and earlier granted execute access even
+        * if the ACL did not contain execute rights.
+        * Samba 4.0 is more correct and checks it.
+        * The compatibilty mode allows to skip this check
+        * to smoothen upgrades.
+        */
+       if (lp_acl_allow_execute_always(SNUM(conn))) {
+               do_not_check_mask |= FILE_EXECUTE;
+       }
+
+       status = se_file_access_check(sd,
                                get_current_nttok(conn),
-                               (access_mask & ~FILE_READ_ATTRIBUTES),
+                               use_privs,
+                               (access_mask & ~do_not_check_mask),
                                &rejected_mask);
 
        DEBUG(10,("smbd_check_access_rights: file %s requesting "
@@ -234,6 +260,7 @@ static NTSTATUS check_parent_access(struct connection_struct *conn,
        status = SMB_VFS_GET_NT_ACL(conn,
                                parent_dir,
                                SECINFO_DACL,
+                                   talloc_tos(),
                                &parent_sd);
 
        if (!NT_STATUS_IS_OK(status)) {
@@ -245,11 +272,18 @@ static NTSTATUS check_parent_access(struct connection_struct *conn,
        }
 
        /*
-        * Never test FILE_READ_ATTRIBUTES. se_access_check() also takes care of
+        * If we can access the path to this file, by
+        * default we have FILE_READ_ATTRIBUTES from the
+        * containing directory. See the section:
+        * "Algorithm to Check Access to an Existing File"
+        * in MS-FSA.pdf.
+        *
+        * se_file_access_check() also takes care of
         * owner WRITE_DAC and READ_CONTROL.
         */
-       status = se_access_check(parent_sd,
+       status = se_file_access_check(parent_sd,
                                get_current_nttok(conn),
+                               false,
                                (access_mask & ~FILE_READ_ATTRIBUTES),
                                &access_granted);
        if(!NT_STATUS_IS_OK(status)) {
@@ -271,10 +305,10 @@ static NTSTATUS check_parent_access(struct connection_struct *conn,
  fd support routines - attempt to do a dos_open.
 ****************************************************************************/
 
-static NTSTATUS fd_open(struct connection_struct *conn,
-                   files_struct *fsp,
-                   int flags,
-                   mode_t mode)
+NTSTATUS fd_open(struct connection_struct *conn,
+                files_struct *fsp,
+                int flags,
+                mode_t mode)
 {
        struct smb_filename *smb_fname = fsp->fsp_name;
        NTSTATUS status = NT_STATUS_OK;
@@ -368,13 +402,12 @@ void change_file_owner_to_parent(connection_struct *conn,
                                        const char *inherit_from_dir,
                                        files_struct *fsp)
 {
-       struct smb_filename *smb_fname_parent = NULL;
-       NTSTATUS status;
+       struct smb_filename *smb_fname_parent;
        int ret;
 
-       status = create_synthetic_smb_fname(talloc_tos(), inherit_from_dir,
-                                           NULL, NULL, &smb_fname_parent);
-       if (!NT_STATUS_IS_OK(status)) {
+       smb_fname_parent = synthetic_smb_fname(talloc_tos(), inherit_from_dir,
+                                              NULL, NULL);
+       if (smb_fname_parent == NULL) {
                return;
        }
 
@@ -423,17 +456,17 @@ NTSTATUS change_dir_owner_to_parent(connection_struct *conn,
                                       const char *fname,
                                       SMB_STRUCT_STAT *psbuf)
 {
-       struct smb_filename *smb_fname_parent = NULL;
+       struct smb_filename *smb_fname_parent;
        struct smb_filename *smb_fname_cwd = NULL;
        char *saved_dir = NULL;
        TALLOC_CTX *ctx = talloc_tos();
        NTSTATUS status = NT_STATUS_OK;
        int ret;
 
-       status = create_synthetic_smb_fname(ctx, inherit_from_dir, NULL, NULL,
-                                           &smb_fname_parent);
-       if (!NT_STATUS_IS_OK(status)) {
-               return status;
+       smb_fname_parent = synthetic_smb_fname(ctx, inherit_from_dir,
+                                              NULL, NULL);
+       if (smb_fname_parent == NULL) {
+               return NT_STATUS_NO_MEMORY;
        }
 
        ret = SMB_VFS_STAT(conn, smb_fname_parent);
@@ -471,10 +504,10 @@ NTSTATUS change_dir_owner_to_parent(connection_struct *conn,
                goto chdir;
        }
 
-       status = create_synthetic_smb_fname(ctx, ".", NULL, NULL,
-                                           &smb_fname_cwd);
-       if (!NT_STATUS_IS_OK(status)) {
-               return status;
+       smb_fname_cwd = synthetic_smb_fname(ctx, ".", NULL, NULL);
+       if (smb_fname_cwd == NULL) {
+               status = NT_STATUS_NO_MEMORY;
+               goto chdir;
        }
 
        ret = SMB_VFS_STAT(conn, smb_fname_cwd);
@@ -533,6 +566,106 @@ NTSTATUS change_dir_owner_to_parent(connection_struct *conn,
        return status;
 }
 
+/****************************************************************************
+ Open a file - returning a guaranteed ATOMIC indication of if the
+ file was created or not.
+****************************************************************************/
+
+static NTSTATUS fd_open_atomic(struct connection_struct *conn,
+                       files_struct *fsp,
+                       int flags,
+                       mode_t mode,
+                       bool *file_created)
+{
+       NTSTATUS status = NT_STATUS_UNSUCCESSFUL;
+       bool file_existed = VALID_STAT(fsp->fsp_name->st);
+
+       *file_created = false;
+
+       if (!(flags & O_CREAT)) {
+               /*
+                * We're not creating the file, just pass through.
+                */
+               return fd_open(conn, fsp, flags, mode);
+       }
+
+       if (flags & O_EXCL) {
+               /*
+                * Fail if already exists, just pass through.
+                */
+               status = fd_open(conn, fsp, flags, mode);
+
+               /*
+                * Here we've opened with O_CREAT|O_EXCL. If that went
+                * NT_STATUS_OK, we *know* we created this file.
+                */
+               *file_created = NT_STATUS_IS_OK(status);
+
+               return status;
+       }
+
+       /*
+        * Now it gets tricky. We have O_CREAT, but not O_EXCL.
+        * To know absolutely if we created the file or not,
+        * we can never call O_CREAT without O_EXCL. So if
+        * we think the file existed, try without O_CREAT|O_EXCL.
+        * If we think the file didn't exist, try with
+        * O_CREAT|O_EXCL. Keep bouncing between these two
+        * requests until either the file is created, or
+        * opened. Either way, we keep going until we get
+        * a returnable result (error, or open/create).
+        */
+
+       while(1) {
+               int curr_flags = flags;
+
+               if (file_existed) {
+                       /* Just try open, do not create. */
+                       curr_flags &= ~(O_CREAT);
+                       status = fd_open(conn, fsp, curr_flags, mode);
+                       if (NT_STATUS_EQUAL(status,
+                                       NT_STATUS_OBJECT_NAME_NOT_FOUND)) {
+                               /*
+                                * Someone deleted it in the meantime.
+                                * Retry with O_EXCL.
+                                */
+                               file_existed = false;
+                               DEBUG(10,("fd_open_atomic: file %s existed. "
+                                       "Retry.\n",
+                                       smb_fname_str_dbg(fsp->fsp_name)));
+                                       continue;
+                       }
+               } else {
+                       /* Try create exclusively, fail if it exists. */
+                       curr_flags |= O_EXCL;
+                       status = fd_open(conn, fsp, curr_flags, mode);
+                       if (NT_STATUS_EQUAL(status,
+                                       NT_STATUS_OBJECT_NAME_COLLISION)) {
+                               /*
+                                * Someone created it in the meantime.
+                                * Retry without O_CREAT.
+                                */
+                               file_existed = true;
+                               DEBUG(10,("fd_open_atomic: file %s "
+                                       "did not exist. Retry.\n",
+                                       smb_fname_str_dbg(fsp->fsp_name)));
+                               continue;
+                       }
+                       if (NT_STATUS_IS_OK(status)) {
+                               /*
+                                * Here we've opened with O_CREAT|O_EXCL
+                                * and got success. We *know* we created
+                                * this file.
+                                */
+                               *file_created = true;
+                       }
+               }
+               /* Create is done, or failed. */
+               break;
+       }
+       return status;
+}
+
 /****************************************************************************
  Open a file.
 ****************************************************************************/
@@ -544,14 +677,14 @@ static NTSTATUS open_file(files_struct *fsp,
                          int flags,
                          mode_t unx_mode,
                          uint32 access_mask, /* client requested access mask. */
-                         uint32 open_access_mask) /* what we're actually using in the open. */
+                         uint32 open_access_mask, /* what we're actually using in the open. */
+                         bool *p_file_created)
 {
        struct smb_filename *smb_fname = fsp->fsp_name;
        NTSTATUS status = NT_STATUS_OK;
        int accmode = (flags & O_ACCMODE);
        int local_flags = flags;
        bool file_existed = VALID_STAT(fsp->fsp_name->st);
-       bool file_created = false;
 
        fsp->fh->fd = -1;
        errno = EPERM;
@@ -574,7 +707,8 @@ static NTSTATUS open_file(files_struct *fsp,
                        DEBUG(3,("Permission denied opening %s\n",
                                 smb_fname_str_dbg(smb_fname)));
                        return NT_STATUS_ACCESS_DENIED;
-               } else if(flags & O_CREAT) {
+               }
+               if (flags & O_CREAT) {
                        /* We don't want to write - but we must make sure that
                           O_CREAT doesn't create the file if we have write
                           access into the directory.
@@ -608,13 +742,6 @@ static NTSTATUS open_file(files_struct *fsp,
                const char *wild;
                int ret;
 
-               /*
-                * We can't actually truncate here as the file may be locked.
-                * open_file_ntcreate will take care of the truncate later. JRA.
-                */
-
-               local_flags &= ~O_TRUNC;
-
 #if defined(O_NONBLOCK) && defined(S_ISFIFO)
                /*
                 * We would block on opening a FIFO with no one else on the
@@ -623,6 +750,7 @@ static NTSTATUS open_file(files_struct *fsp,
                 */
 
                if (file_existed && S_ISFIFO(smb_fname->st.st_ex_mode)) {
+                       local_flags &= ~O_TRUNC; /* Can't truncate a FIFO. */
                        local_flags |= O_NONBLOCK;
                }
 #endif
@@ -648,6 +776,7 @@ static NTSTATUS open_file(files_struct *fsp,
                        if (file_existed) {
                                status = smbd_check_access_rights(conn,
                                                smb_fname,
+                                               false,
                                                access_mask);
                        } else if (local_flags & O_CREAT){
                                status = check_parent_access(conn,
@@ -671,7 +800,8 @@ static NTSTATUS open_file(files_struct *fsp,
                }
 
                /* Actually do the open */
-               status = fd_open(conn, fsp, local_flags, unx_mode);
+               status = fd_open_atomic(conn, fsp, local_flags,
+                               unx_mode, p_file_created);
                if (!NT_STATUS_IS_OK(status)) {
                        DEBUG(3,("Error opening file %s (%s) (local_flags=%d) "
                                 "(flags=%d)\n", smb_fname_str_dbg(smb_fname),
@@ -691,10 +821,44 @@ static NTSTATUS open_file(files_struct *fsp,
                        return status;
                }
 
-               if ((local_flags & O_CREAT) && !file_existed) {
-                       file_created = true;
-               }
+               if (*p_file_created) {
+                       /* We created this file. */
+
+                       bool need_re_stat = false;
+                       /* Do all inheritance work after we've
+                          done a successful fstat call and filled
+                          in the stat struct in fsp->fsp_name. */
+
+                       /* Inherit the ACL if required */
+                       if (lp_inherit_perms(SNUM(conn))) {
+                               inherit_access_posix_acl(conn, parent_dir,
+                                                        smb_fname->base_name,
+                                                        unx_mode);
+                               need_re_stat = true;
+                       }
+
+                       /* Change the owner if required. */
+                       if (lp_inherit_owner(SNUM(conn))) {
+                               change_file_owner_to_parent(conn, parent_dir,
+                                                           fsp);
+                               need_re_stat = true;
+                       }
+
+                       if (need_re_stat) {
+                               ret = SMB_VFS_FSTAT(fsp, &smb_fname->st);
+                               /* If we have an fd, this stat should succeed. */
+                               if (ret == -1) {
+                                       DEBUG(0,("Error doing fstat on open file %s "
+                                                "(%s)\n",
+                                                smb_fname_str_dbg(smb_fname),
+                                                strerror(errno) ));
+                               }
+                       }
 
+                       notify_fname(conn, NOTIFY_ACTION_ADDED,
+                                    FILE_NOTIFY_CHANGE_FILE_NAME,
+                                    smb_fname->base_name);
+               }
        } else {
                fsp->fh->fd = -1; /* What we used to call a stat open. */
                if (!file_existed) {
@@ -704,6 +868,7 @@ static NTSTATUS open_file(files_struct *fsp,
 
                status = smbd_check_access_rights(conn,
                                smb_fname,
+                               false,
                                access_mask);
 
                if (NT_STATUS_EQUAL(status, NT_STATUS_OBJECT_NAME_NOT_FOUND) &&
@@ -728,51 +893,6 @@ static NTSTATUS open_file(files_struct *fsp,
                }
        }
 
-       if (!file_existed) {
-               if (file_created) {
-                       bool need_re_stat = false;
-                       /* Do all inheritance work after we've
-                          done a successful stat call and filled
-                          in the stat struct in fsp->fsp_name. */
-
-                       /* Inherit the ACL if required */
-                       if (lp_inherit_perms(SNUM(conn))) {
-                               inherit_access_posix_acl(conn, parent_dir,
-                                                        smb_fname->base_name,
-                                                        unx_mode);
-                               need_re_stat = true;
-                       }
-
-                       /* Change the owner if required. */
-                       if (lp_inherit_owner(SNUM(conn))) {
-                               change_file_owner_to_parent(conn, parent_dir,
-                                                           fsp);
-                               need_re_stat = true;
-                       }
-
-                       if (need_re_stat) {
-                               int ret;
-
-                               if (fsp->fh->fd == -1) {
-                                       ret = SMB_VFS_STAT(conn, smb_fname);
-                               } else {
-                                       ret = SMB_VFS_FSTAT(fsp, &smb_fname->st);
-                                       /* If we have an fd, this stat should succeed. */
-                                       if (ret == -1) {
-                                               DEBUG(0,("Error doing fstat on open file %s "
-                                                        "(%s)\n",
-                                                        smb_fname_str_dbg(smb_fname),
-                                                        strerror(errno) ));
-                                       }
-                               }
-                       }
-
-                       notify_fname(conn, NOTIFY_ACTION_ADDED,
-                                    FILE_NOTIFY_CHANGE_FILE_NAME,
-                                    smb_fname->base_name);
-               }
-       }
-
        /*
         * POSIX allows read-only opens of directories. We don't
         * want to do this (we use a different code path for this)
@@ -831,6 +951,14 @@ static bool share_conflict(struct share_mode_entry *entry,
                  (unsigned int)entry->share_access,
                  (unsigned int)entry->private_options));
 
+       if (server_id_is_disconnected(&entry->pid)) {
+               /*
+                * note: cleanup should have been done by
+                * delay_for_batch_oplocks()
+                */
+               return false;
+       }
+
        DEBUG(10,("share_conflict: access_mask = 0x%x, share_access = 0x%x\n",
                  (unsigned int)access_mask, (unsigned int)share_access));
 
@@ -911,15 +1039,6 @@ static void validate_my_share_entries(struct smbd_server_connection *sconn,
                return;
        }
 
-       if (is_deferred_open_entry(share_entry) &&
-           !open_was_deferred(sconn, share_entry->op_mid)) {
-               char *str = talloc_asprintf(talloc_tos(),
-                       "Got a deferred entry without a request: "
-                       "PANIC: %s\n",
-                       share_mode_str(talloc_tos(), num, share_entry));
-               smb_panic(str);
-       }
-
        if (!is_valid_share_mode_entry(share_entry)) {
                return;
        }
@@ -933,17 +1052,6 @@ static void validate_my_share_entries(struct smbd_server_connection *sconn,
                          "share entry with an open file\n");
        }
 
-       if (is_deferred_open_entry(share_entry)) {
-               goto panic;
-       }
-
-       if ((share_entry->op_type == NO_OPLOCK) &&
-           (fsp->oplock_type == FAKE_LEVEL_II_OPLOCK)) {
-               /* Someone has already written to it, but I haven't yet
-                * noticed */
-               return;
-       }
-
        if (((uint16)fsp->oplock_type) != share_entry->op_type) {
                goto panic;
        }
@@ -968,11 +1076,33 @@ static void validate_my_share_entries(struct smbd_server_connection *sconn,
 
 bool is_stat_open(uint32 access_mask)
 {
-       return (access_mask &&
-               ((access_mask & ~(SYNCHRONIZE_ACCESS| FILE_READ_ATTRIBUTES|
-                                 FILE_WRITE_ATTRIBUTES))==0) &&
-               ((access_mask & (SYNCHRONIZE_ACCESS|FILE_READ_ATTRIBUTES|
-                                FILE_WRITE_ATTRIBUTES)) != 0));
+       const uint32_t stat_open_bits =
+               (SYNCHRONIZE_ACCESS|
+                FILE_READ_ATTRIBUTES|
+                FILE_WRITE_ATTRIBUTES);
+
+       return (((access_mask &  stat_open_bits) != 0) &&
+               ((access_mask & ~stat_open_bits) == 0));
+}
+
+static bool has_delete_on_close(struct share_mode_lock *lck,
+                               uint32_t name_hash)
+{
+       struct share_mode_data *d = lck->data;
+       uint32_t i;
+
+       if (d->num_share_modes == 0) {
+               return false;
+       }
+       if (!is_delete_on_close_set(lck, name_hash)) {
+               return false;
+       }
+       for (i=0; i<d->num_share_modes; i++) {
+               if (!share_mode_stale_pid(d, i)) {
+                       return true;
+               }
+       }
+       return false;
 }
 
 /****************************************************************************
@@ -983,11 +1113,8 @@ bool is_stat_open(uint32 access_mask)
 
 static NTSTATUS open_mode_check(connection_struct *conn,
                                struct share_mode_lock *lck,
-                               uint32_t name_hash,
                                uint32 access_mask,
-                               uint32 share_access,
-                               uint32 create_options,
-                               bool *file_existed)
+                               uint32 share_access)
 {
        int i;
 
@@ -995,25 +1122,6 @@ static NTSTATUS open_mode_check(connection_struct *conn,
                return NT_STATUS_OK;
        }
 
-       /* A delete on close prohibits everything */
-
-       if (is_delete_on_close_set(lck, name_hash)) {
-               /*
-                * Check the delete on close token
-                * is valid. It could have been left
-                * after a server crash.
-                */
-               for(i = 0; i < lck->data->num_share_modes; i++) {
-                       if (!share_mode_stale_pid(lck->data, i)) {
-
-                               *file_existed = true;
-
-                               return NT_STATUS_DELETE_PENDING;
-                       }
-               }
-               return NT_STATUS_OK;
-       }
-
        if (is_stat_open(access_mask)) {
                /* Stat open that doesn't trigger oplock breaks or share mode
                 * checks... ! JRA. */
@@ -1047,56 +1155,37 @@ static NTSTATUS open_mode_check(connection_struct *conn,
                                continue;
                        }
 
-                       *file_existed = true;
-
                        return NT_STATUS_SHARING_VIOLATION;
                }
        }
 
-       if (lck->data->num_share_modes != 0) {
-               *file_existed = true;
-       }
-
        return NT_STATUS_OK;
 }
 
-static bool is_delete_request(files_struct *fsp) {
-       return ((fsp->access_mask == DELETE_ACCESS) &&
-               (fsp->oplock_type == NO_OPLOCK));
-}
-
 /*
  * Send a break message to the oplock holder and delay the open for
  * our client.
  */
 
-static NTSTATUS send_break_message(files_struct *fsp,
-                                       struct share_mode_entry *exclusive,
-                                       uint64_t mid,
-                                       int oplock_request)
+static NTSTATUS send_break_message(struct messaging_context *msg_ctx,
+                                  const struct share_mode_entry *exclusive,
+                                  uint16_t break_to)
 {
        NTSTATUS status;
        char msg[MSG_SMB_SHARE_MODE_ENTRY_SIZE];
 
        DEBUG(10, ("Sending break request to PID %s\n",
                   procid_str_static(&exclusive->pid)));
-       exclusive->op_mid = mid;
 
        /* Create the message. */
        share_mode_entry_to_message(msg, exclusive);
 
-       /* Add in the FORCE_OPLOCK_BREAK_TO_NONE bit in the message if set. We
-          don't want this set in the share mode struct pointed to by lck. */
-
-       if (oplock_request & FORCE_OPLOCK_BREAK_TO_NONE) {
-               SSVAL(msg,OP_BREAK_MSG_OP_TYPE_OFFSET,
-                       exclusive->op_type | FORCE_OPLOCK_BREAK_TO_NONE);
-       }
+       /* Overload entry->op_type */
+       SSVAL(msg,OP_BREAK_MSG_OP_TYPE_OFFSET, break_to);
 
-       status = messaging_send_buf(fsp->conn->sconn->msg_ctx, exclusive->pid,
+       status = messaging_send_buf(msg_ctx, exclusive->pid,
                                    MSG_SMB_BREAK_REQUEST,
-                                   (uint8 *)msg,
-                                   MSG_SMB_SHARE_MODE_ENTRY_SIZE);
+                                   (uint8 *)msg, sizeof(msg));
        if (!NT_STATUS_IS_OK(status)) {
                DEBUG(3, ("Could not send oplock break message: %s\n",
                          nt_errstr(status)));
@@ -1106,141 +1195,205 @@ static NTSTATUS send_break_message(files_struct *fsp,
 }
 
 /*
- * Return share_mode_entry pointers for :
- * 1). Batch oplock entry.
- * 2). Batch or exclusive oplock entry (may be identical to #1).
- * bool have_level2_oplock
- * bool have_no_oplock.
  * Do internal consistency checks on the share mode for a file.
  */
 
-static void find_oplock_types(files_struct *fsp,
-                               int oplock_request,
-                               const struct share_mode_lock *lck,
-                               struct share_mode_entry **pp_batch,
-                               struct share_mode_entry **pp_ex_or_batch,
-                               bool *got_level2,
-                               bool *got_no_oplock)
+static bool validate_oplock_types(struct share_mode_lock *lck)
 {
-       int i;
-
-       *pp_batch = NULL;
-       *pp_ex_or_batch = NULL;
-       *got_level2 = false;
-       *got_no_oplock = false;
-
-       /* Ignore stat or internal opens, as is done in
-               delay_for_batch_oplocks() and
-               delay_for_exclusive_oplocks().
-        */
-       if ((oplock_request & INTERNAL_OPEN_ONLY) || is_stat_open(fsp->access_mask)) {
-               return;
-       }
-
-       for (i=0; i<lck->data->num_share_modes; i++) {
-               if (!is_valid_share_mode_entry(&lck->data->share_modes[i])) {
+       struct share_mode_data *d = lck->data;
+       bool batch = false;
+       bool ex_or_batch = false;
+       bool level2 = false;
+       bool no_oplock = false;
+       uint32_t num_non_stat_opens = 0;
+       uint32_t i;
+
+       for (i=0; i<d->num_share_modes; i++) {
+               struct share_mode_entry *e = &d->share_modes[i];
+
+               if (!is_valid_share_mode_entry(e)) {
                        continue;
                }
 
-               if (lck->data->share_modes[i].op_type == NO_OPLOCK &&
-                               is_stat_open(lck->data->share_modes[i].access_mask)) {
+               if (e->op_type == NO_OPLOCK && is_stat_open(e->access_mask)) {
                        /* We ignore stat opens in the table - they
                           always have NO_OPLOCK and never get or
                           cause breaks. JRA. */
                        continue;
                }
 
-               if (BATCH_OPLOCK_TYPE(lck->data->share_modes[i].op_type)) {
+               num_non_stat_opens += 1;
+
+               if (BATCH_OPLOCK_TYPE(e->op_type)) {
                        /* batch - can only be one. */
-                       if (share_mode_stale_pid(lck->data, i)) {
+                       if (share_mode_stale_pid(d, i)) {
                                DEBUG(10, ("Found stale batch oplock\n"));
                                continue;
                        }
-                       if (*pp_ex_or_batch || *pp_batch || *got_level2 || *got_no_oplock) {
-                               smb_panic("Bad batch oplock entry.");
+                       if (ex_or_batch || batch || level2 || no_oplock) {
+                               DEBUG(0, ("Bad batch oplock entry %u.",
+                                         (unsigned)i));
+                               return false;
                        }
-                       *pp_batch = &lck->data->share_modes[i];
+                       batch = true;
                }
 
-               if (EXCLUSIVE_OPLOCK_TYPE(lck->data->share_modes[i].op_type)) {
-                       if (share_mode_stale_pid(lck->data, i)) {
+               if (EXCLUSIVE_OPLOCK_TYPE(e->op_type)) {
+                       if (share_mode_stale_pid(d, i)) {
                                DEBUG(10, ("Found stale duplicate oplock\n"));
                                continue;
                        }
                        /* Exclusive or batch - can only be one. */
-                       if (*pp_ex_or_batch || *got_level2 || *got_no_oplock) {
-                               smb_panic("Bad exclusive or batch oplock entry.");
+                       if (ex_or_batch || level2 || no_oplock) {
+                               DEBUG(0, ("Bad exclusive or batch oplock "
+                                         "entry %u.", (unsigned)i));
+                               return false;
                        }
-                       *pp_ex_or_batch = &lck->data->share_modes[i];
+                       ex_or_batch = true;
                }
 
-               if (LEVEL_II_OPLOCK_TYPE(lck->data->share_modes[i].op_type)) {
-                       if (*pp_batch || *pp_ex_or_batch) {
-                               if (share_mode_stale_pid(lck->data, i)) {
+               if (LEVEL_II_OPLOCK_TYPE(e->op_type)) {
+                       if (batch || ex_or_batch) {
+                               if (share_mode_stale_pid(d, i)) {
                                        DEBUG(10, ("Found stale LevelII "
                                                   "oplock\n"));
                                        continue;
                                }
-                               smb_panic("Bad levelII oplock entry.");
+                               DEBUG(0, ("Bad levelII oplock entry %u.",
+                                         (unsigned)i));
+                               return false;
                        }
-                       *got_level2 = true;
+                       level2 = true;
                }
 
-               if (lck->data->share_modes[i].op_type == NO_OPLOCK) {
-                       if (*pp_batch || *pp_ex_or_batch) {
-                               if (share_mode_stale_pid(lck->data, i)) {
+               if (e->op_type == NO_OPLOCK) {
+                       if (batch || ex_or_batch) {
+                               if (share_mode_stale_pid(d, i)) {
                                        DEBUG(10, ("Found stale NO_OPLOCK "
                                                   "entry\n"));
                                        continue;
                                }
-                               smb_panic("Bad no oplock entry.");
+                               DEBUG(0, ("Bad no oplock entry %u.",
+                                         (unsigned)i));
+                               return false;
                        }
-                       *got_no_oplock = true;
+                       no_oplock = true;
                }
        }
-}
 
-static bool delay_for_batch_oplocks(files_struct *fsp,
-                                       uint64_t mid,
-                                       int oplock_request,
-                                       struct share_mode_entry *batch_entry)
-{
-       if ((oplock_request & INTERNAL_OPEN_ONLY) || is_stat_open(fsp->access_mask)) {
-               return false;
-       }
-       if (batch_entry == NULL) {
+       remove_stale_share_mode_entries(d);
+
+       if ((batch || ex_or_batch) && (num_non_stat_opens != 1)) {
+               DEBUG(1, ("got batch (%d) or ex (%d) non-exclusively (%d)\n",
+                         (int)batch, (int)ex_or_batch,
+                         (int)d->num_share_modes));
                return false;
        }
 
-       /* Found a batch oplock */
-       send_break_message(fsp, batch_entry, mid, oplock_request);
        return true;
 }
 
-static bool delay_for_exclusive_oplocks(files_struct *fsp,
-                                       uint64_t mid,
-                                       int oplock_request,
-                                       struct share_mode_entry *ex_entry)
+static bool delay_for_oplock(files_struct *fsp,
+                            int oplock_request,
+                            struct share_mode_lock *lck,
+                            bool have_sharing_violation,
+                            uint32_t create_disposition)
 {
-       bool delay_it;
+       struct share_mode_data *d = lck->data;
+       struct share_mode_entry *entry;
+       uint32_t num_non_stat_opens = 0;
+       uint32_t i;
+       uint16_t break_to;
 
        if ((oplock_request & INTERNAL_OPEN_ONLY) || is_stat_open(fsp->access_mask)) {
                return false;
        }
-       if (ex_entry == NULL) {
+       for (i=0; i<d->num_share_modes; i++) {
+               struct share_mode_entry *e = &d->share_modes[i];
+               if (e->op_type == NO_OPLOCK && is_stat_open(e->access_mask)) {
+                       continue;
+               }
+               num_non_stat_opens += 1;
+
+               /*
+                * We found the a non-stat open, which in the exclusive/batch
+                * case will be inspected further down.
+                */
+               entry = e;
+       }
+       if (num_non_stat_opens == 0) {
+               /*
+                * Nothing to wait for around
+                */
+               return false;
+       }
+       if (num_non_stat_opens != 1) {
+               /*
+                * More than one open around. There can't be any exclusive or
+                * batch left, this is all level2.
+                */
                return false;
        }
 
-       /* Found an exclusive or batch oplock */
+       if (server_id_is_disconnected(&entry->pid)) {
+               /*
+                * TODO: clean up.
+                * This could be achieved by sending a break message
+                * to ourselves. Special considerations for files
+                * with delete_on_close flag set!
+                *
+                * For now we keep it simple and do not
+                * allow delete on close for durable handles.
+                */
+               return false;
+       }
 
-       delay_it = is_delete_request(fsp) ?
-               BATCH_OPLOCK_TYPE(ex_entry->op_type) : true;
+       switch (create_disposition) {
+       case FILE_SUPERSEDE:
+       case FILE_OVERWRITE_IF:
+               break_to = NO_OPLOCK;
+               break;
+       default:
+               break_to = LEVEL_II_OPLOCK;
+               break;
+       }
 
-       if (!delay_it) {
+       if (have_sharing_violation && (entry->op_type & BATCH_OPLOCK)) {
+               if (share_mode_stale_pid(d, 0)) {
+                       return false;
+               }
+               send_break_message(fsp->conn->sconn->msg_ctx, entry, break_to);
+               return true;
+       }
+       if (have_sharing_violation) {
+               /*
+                * Non-batch exclusive is not broken if we have a sharing
+                * violation
+                */
+               return false;
+       }
+       if (LEVEL_II_OPLOCK_TYPE(entry->op_type) &&
+           (break_to == NO_OPLOCK)) {
+               if (share_mode_stale_pid(d, 0)) {
+                       return false;
+               }
+               DEBUG(10, ("Asynchronously breaking level2 oplock for "
+                          "create_disposition=%u\n",
+                          (unsigned)create_disposition));
+               send_break_message(fsp->conn->sconn->msg_ctx, entry, break_to);
+               return false;
+       }
+       if (!EXCLUSIVE_OPLOCK_TYPE(entry->op_type)) {
+               /*
+                * No break for NO_OPLOCK or LEVEL2_OPLOCK oplocks
+                */
+               return false;
+       }
+       if (share_mode_stale_pid(d, 0)) {
                return false;
        }
 
-       send_break_message(fsp, ex_entry, mid, oplock_request);
+       send_break_message(fsp->conn->sconn->msg_ctx, entry, break_to);
        return true;
 }
 
@@ -1252,16 +1405,17 @@ static bool file_has_brlocks(files_struct *fsp)
        if (!br_lck)
                return false;
 
-       return br_lck->num_locks > 0 ? true : false;
+       return (brl_num_locks(br_lck) > 0);
 }
 
 static void grant_fsp_oplock_type(files_struct *fsp,
-                               int oplock_request,
-                               bool got_level2_oplock,
-                               bool got_a_none_oplock)
+                                 struct share_mode_lock *lck,
+                                 int oplock_request)
 {
        bool allow_level2 = (global_client_caps & CAP_LEVEL_II_OPLOCKS) &&
                            lp_level2_oplocks(SNUM(fsp->conn));
+       bool got_level2_oplock, got_a_none_oplock;
+       uint32_t i;
 
        /* Start by granting what the client asked for,
           but ensure no SAMBA_PRIVATE bits can be set. */
@@ -1288,29 +1442,29 @@ static void grant_fsp_oplock_type(files_struct *fsp,
                return;
        }
 
+       got_level2_oplock = false;
+       got_a_none_oplock = false;
+
+       for (i=0; i<lck->data->num_share_modes; i++) {
+               int op_type = lck->data->share_modes[i].op_type;
+
+               if (LEVEL_II_OPLOCK_TYPE(op_type)) {
+                       got_level2_oplock = true;
+               }
+               if (op_type == NO_OPLOCK) {
+                       got_a_none_oplock = true;
+               }
+       }
+
        /*
         * Match what was requested (fsp->oplock_type) with
         * what was found in the existing share modes.
         */
 
-       if (got_a_none_oplock) {
-               fsp->oplock_type = NO_OPLOCK;
-       } else if (got_level2_oplock) {
-               if (fsp->oplock_type == NO_OPLOCK ||
-                               fsp->oplock_type == FAKE_LEVEL_II_OPLOCK) {
-                       /* Store a level2 oplock, but don't tell the client */
-                       fsp->oplock_type = FAKE_LEVEL_II_OPLOCK;
-               } else {
+       if (got_level2_oplock || got_a_none_oplock) {
+               if (EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type)) {
                        fsp->oplock_type = LEVEL_II_OPLOCK;
                }
-       } else {
-               /* All share_mode_entries are placeholders or deferred.
-                * Silently upgrade to fake levelII if the client didn't
-                * ask for an oplock. */
-               if (fsp->oplock_type == NO_OPLOCK) {
-                       /* Store a level2 oplock, but don't tell the client */
-                       fsp->oplock_type = FAKE_LEVEL_II_OPLOCK;
-               }
        }
 
        /*
@@ -1318,15 +1472,28 @@ static void grant_fsp_oplock_type(files_struct *fsp,
         * or if we've turned them off.
         */
        if (fsp->oplock_type == LEVEL_II_OPLOCK && !allow_level2) {
-               fsp->oplock_type = FAKE_LEVEL_II_OPLOCK;
+               fsp->oplock_type = NO_OPLOCK;
+       }
+
+       if (fsp->oplock_type == LEVEL_II_OPLOCK && !got_level2_oplock) {
+               /*
+                * We're the first level2 oplock. Indicate that in brlock.tdb.
+                */
+               struct byte_range_lock *brl;
+
+               brl = brl_get_locks(talloc_tos(), fsp);
+               if (brl != NULL) {
+                       brl_set_have_read_oplocks(brl, true);
+                       TALLOC_FREE(brl);
+               }
        }
 
        DEBUG(10,("grant_fsp_oplock_type: oplock type 0x%x on file %s\n",
                  fsp->oplock_type, fsp_str_dbg(fsp)));
 }
 
-bool request_timed_out(struct timeval request_time,
-                      struct timeval timeout)
+static bool request_timed_out(struct timeval request_time,
+                             struct timeval timeout)
 {
        struct timeval now, end_time;
        GetTimeOfDay(&now);
@@ -1334,6 +1501,13 @@ bool request_timed_out(struct timeval request_time,
        return (timeval_compare(&end_time, &now) < 0);
 }
 
+struct defer_open_state {
+       struct smbd_server_connection *sconn;
+       uint64_t mid;
+};
+
+static void defer_open_done(struct tevent_req *req);
+
 /****************************************************************************
  Handle the 1 second delay in returning a SHARING_VIOLATION error.
 ****************************************************************************/
@@ -1344,29 +1518,6 @@ static void defer_open(struct share_mode_lock *lck,
                       struct smb_request *req,
                       struct deferred_open_record *state)
 {
-       struct server_id self = messaging_server_id(req->sconn->msg_ctx);
-
-       /* Paranoia check */
-
-       if (lck) {
-               int i;
-
-               for (i=0; i<lck->data->num_share_modes; i++) {
-                       struct share_mode_entry *e = &lck->data->share_modes[i];
-
-                       if (is_deferred_open_entry(e) &&
-                           serverid_equal(&self, &e->pid) &&
-                           (e->op_mid == req->mid)) {
-                               DEBUG(0, ("Trying to defer an already deferred "
-                                       "request: mid=%llu, exiting\n",
-                                       (unsigned long long)req->mid));
-                               exit_server("attempt to defer a deferred request");
-                       }
-               }
-       }
-
-       /* End paranoia check */
-
        DEBUG(10,("defer_open_sharing_error: time [%u.%06u] adding deferred "
                  "open entry for mid %llu\n",
                  (unsigned int)request_time.tv_sec,
@@ -1375,11 +1526,63 @@ static void defer_open(struct share_mode_lock *lck,
 
        if (!push_deferred_open_message_smb(req, request_time, timeout,
                                       state->id, (char *)state, sizeof(*state))) {
+               TALLOC_FREE(lck);
                exit_server("push_deferred_open_message_smb failed");
        }
        if (lck) {
-               add_deferred_open(lck, req->mid, request_time, self, state->id);
+               struct defer_open_state *watch_state;
+               struct tevent_req *watch_req;
+               bool ret;
+
+               watch_state = talloc(req->sconn, struct defer_open_state);
+               if (watch_state == NULL) {
+                       exit_server("talloc failed");
+               }
+               watch_state->sconn = req->sconn;
+               watch_state->mid = req->mid;
+
+               DEBUG(10, ("defering mid %llu\n",
+                          (unsigned long long)req->mid));
+
+               watch_req = dbwrap_record_watch_send(
+                       watch_state, req->sconn->ev_ctx, lck->data->record,
+                       req->sconn->msg_ctx);
+               if (watch_req == NULL) {
+                       exit_server("Could not watch share mode record");
+               }
+               tevent_req_set_callback(watch_req, defer_open_done,
+                                       watch_state);
+
+               ret = tevent_req_set_endtime(
+                       watch_req, req->sconn->ev_ctx,
+                       timeval_sum(&request_time, &timeout));
+               SMB_ASSERT(ret);
+       }
+}
+
+static void defer_open_done(struct tevent_req *req)
+{
+       struct defer_open_state *state = tevent_req_callback_data(
+               req, struct defer_open_state);
+       NTSTATUS status;
+       bool ret;
+
+       status = dbwrap_record_watch_recv(req, talloc_tos(), NULL);
+       TALLOC_FREE(req);
+       if (!NT_STATUS_IS_OK(status)) {
+               DEBUG(5, ("dbwrap_record_watch_recv returned %s\n",
+                         nt_errstr(status)));
+               /*
+                * Even if it failed, retry anyway. TODO: We need a way to
+                * tell a re-scheduled open about that error.
+                */
        }
+
+       DEBUG(10, ("scheduling mid %llu\n", (unsigned long long)state->mid));
+
+       ret = schedule_deferred_open_message_smb(state->sconn, state->mid);
+       SMB_ASSERT(ret);
+       TALLOC_FREE(state);
 }
 
 
@@ -1387,12 +1590,12 @@ static void defer_open(struct share_mode_lock *lck,
  On overwrite open ensure that the attributes match.
 ****************************************************************************/
 
-bool open_match_attributes(connection_struct *conn,
-                          uint32 old_dos_attr,
-                          uint32 new_dos_attr,
-                          mode_t existing_unx_mode,
-                          mode_t new_unx_mode,
-                          mode_t *returned_unx_mode)
+static bool open_match_attributes(connection_struct *conn,
+                                 uint32 old_dos_attr,
+                                 uint32 new_dos_attr,
+                                 mode_t existing_unx_mode,
+                                 mode_t new_unx_mode,
+                                 mode_t *returned_unx_mode)
 {
        uint32 noarch_old_dos_attr, noarch_new_dos_attr;
 
@@ -1462,7 +1665,8 @@ static NTSTATUS fcb_or_dos_open(struct smb_request *req,
                          (unsigned int)fsp->fh->private_options,
                          (unsigned int)fsp->access_mask ));
 
-               if (fsp->fh->fd != -1 &&
+               if (fsp != fsp_to_dup_into &&
+                   fsp->fh->fd != -1 &&
                    fsp->vuid == vuid &&
                    fsp->file_pid == file_pid &&
                    (fsp->fh->private_options & (NTCREATEX_OPTIONS_PRIVATE_DENY_DOS |
@@ -1558,13 +1762,14 @@ static void schedule_async_open(struct timeval request_time,
 static NTSTATUS smbd_calculate_maximum_allowed_access(
        connection_struct *conn,
        const struct smb_filename *smb_fname,
+       bool use_privs,
        uint32_t *p_access_mask)
 {
        struct security_descriptor *sd;
        uint32_t access_granted;
        NTSTATUS status;
 
-       if (get_current_uid(conn) == (uid_t)0) {
+       if (!use_privs && (get_current_uid(conn) == (uid_t)0)) {
                *p_access_mask |= FILE_GENERIC_ALL;
                return NT_STATUS_OK;
        }
@@ -1572,7 +1777,8 @@ static NTSTATUS smbd_calculate_maximum_allowed_access(
        status = SMB_VFS_GET_NT_ACL(conn, smb_fname->base_name,
                                    (SECINFO_OWNER |
                                     SECINFO_GROUP |
-                                    SECINFO_DACL),&sd);
+                                    SECINFO_DACL),
+                                   talloc_tos(), &sd);
 
        if (NT_STATUS_EQUAL(status, NT_STATUS_OBJECT_NAME_NOT_FOUND)) {
                /*
@@ -1582,37 +1788,50 @@ static NTSTATUS smbd_calculate_maximum_allowed_access(
                return NT_STATUS_OK;
        }
        if (!NT_STATUS_IS_OK(status)) {
-               DEBUG(10,("smbd_calculate_access_mask: "
-                         "Could not get acl on file %s: %s\n",
+               DEBUG(10,("Could not get acl on file %s: %s\n",
                          smb_fname_str_dbg(smb_fname),
                          nt_errstr(status)));
                return NT_STATUS_ACCESS_DENIED;
        }
 
        /*
-        * Never test FILE_READ_ATTRIBUTES. se_access_check()
+        * If we can access the path to this file, by
+        * default we have FILE_READ_ATTRIBUTES from the
+        * containing directory. See the section:
+        * "Algorithm to Check Access to an Existing File"
+        * in MS-FSA.pdf.
+        *
+        * se_file_access_check()
         * also takes care of owner WRITE_DAC and READ_CONTROL.
         */
-       status = se_access_check(sd,
+       status = se_file_access_check(sd,
                                 get_current_nttok(conn),
+                                use_privs,
                                 (*p_access_mask & ~FILE_READ_ATTRIBUTES),
                                 &access_granted);
 
        TALLOC_FREE(sd);
 
        if (!NT_STATUS_IS_OK(status)) {
-               DEBUG(10, ("smbd_calculate_access_mask: "
-                          "Access denied on file %s: "
+               DEBUG(10, ("Access denied on file %s: "
                           "when calculating maximum access\n",
                           smb_fname_str_dbg(smb_fname)));
                return NT_STATUS_ACCESS_DENIED;
        }
        *p_access_mask = (access_granted | FILE_READ_ATTRIBUTES);
+
+       if (!(access_granted & DELETE_ACCESS)) {
+               if (can_delete_file_in_directory(conn, smb_fname)) {
+                       *p_access_mask |= DELETE_ACCESS;
+               }
+       }
+
        return NT_STATUS_OK;
 }
 
 NTSTATUS smbd_calculate_access_mask(connection_struct *conn,
                                    const struct smb_filename *smb_fname,
+                                   bool use_privs,
                                    uint32_t access_mask,
                                    uint32_t *access_mask_out)
 {
@@ -1630,7 +1849,7 @@ NTSTATUS smbd_calculate_access_mask(connection_struct *conn,
        if (access_mask & MAXIMUM_ALLOWED_ACCESS) {
 
                status = smbd_calculate_maximum_allowed_access(
-                       conn, smb_fname, &access_mask);
+                       conn, smb_fname, use_privs, &access_mask);
 
                if (!NT_STATUS_IS_OK(status)) {
                        return status;
@@ -1660,19 +1879,6 @@ NTSTATUS smbd_calculate_access_mask(connection_struct *conn,
  Remove the deferred open entry under lock.
 ****************************************************************************/
 
-void remove_deferred_open_entry(struct file_id id, uint64_t mid,
-                               struct server_id pid)
-{
-       struct share_mode_lock *lck = get_existing_share_mode_lock(
-               talloc_tos(), id);
-       if (lck == NULL) {
-               DEBUG(0, ("could not get share mode lock\n"));
-               return;
-       }
-       del_deferred_open_entry(lck, mid, pid);
-       TALLOC_FREE(lck);
-}
-
 /****************************************************************************
  Return true if this is a state pointer to an asynchronous create.
 ****************************************************************************/
@@ -1684,6 +1890,106 @@ bool is_deferred_open_async(const void *ptr)
        return state->async_open;
 }
 
+static bool clear_ads(uint32_t create_disposition)
+{
+       bool ret = false;
+
+       switch (create_disposition) {
+       case FILE_SUPERSEDE:
+       case FILE_OVERWRITE_IF:
+       case FILE_OVERWRITE:
+               ret = true;
+               break;
+       default:
+               break;
+       }
+       return ret;
+}
+
+static int disposition_to_open_flags(uint32_t create_disposition)
+{
+       int ret = 0;
+
+       /*
+        * Currently we're using FILE_SUPERSEDE as the same as
+        * FILE_OVERWRITE_IF but they really are
+        * different. FILE_SUPERSEDE deletes an existing file
+        * (requiring delete access) then recreates it.
+        */
+
+       switch (create_disposition) {
+       case FILE_SUPERSEDE:
+       case FILE_OVERWRITE_IF:
+               /*
+                * If file exists replace/overwrite. If file doesn't
+                * exist create.
+                */
+               ret = O_CREAT|O_TRUNC;
+               break;
+
+       case FILE_OPEN:
+               /*
+                * If file exists open. If file doesn't exist error.
+                */
+               ret = 0;
+               break;
+
+       case FILE_OVERWRITE:
+               /*
+                * If file exists overwrite. If file doesn't exist
+                * error.
+                */
+               ret = O_TRUNC;
+               break;
+
+       case FILE_CREATE:
+               /*
+                * If file exists error. If file doesn't exist create.
+                */
+               ret = O_CREAT|O_EXCL;
+               break;
+
+       case FILE_OPEN_IF:
+               /*
+                * If file exists open. If file doesn't exist create.
+                */
+               ret = O_CREAT;
+               break;
+       }
+       return ret;
+}
+
+static int calculate_open_access_flags(uint32_t access_mask,
+                                      int oplock_request,
+                                      uint32_t private_flags)
+{
+       bool need_write, need_read;
+
+       /*
+        * Note that we ignore the append flag as append does not
+        * mean the same thing under DOS and Unix.
+        */
+
+       need_write = (access_mask & (FILE_WRITE_DATA | FILE_APPEND_DATA));
+       if (!need_write) {
+               return O_RDONLY;
+       }
+
+       /* DENY_DOS opens are always underlying read-write on the
+          file handle, no matter what the requested access mask
+          says. */
+
+       need_read =
+               ((private_flags & NTCREATEX_OPTIONS_PRIVATE_DENY_DOS) ||
+                access_mask & (FILE_READ_ATTRIBUTES|FILE_READ_DATA|
+                               FILE_READ_EA|FILE_EXECUTE));
+
+       if (!need_read) {
+               return O_WRONLY;
+       }
+       return O_RDWR;
+}
+
 /****************************************************************************
  Open a file with a share mode. Passed in an already created files_struct *.
 ****************************************************************************/
@@ -1708,7 +2014,7 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
        bool def_acl = False;
        bool posix_open = False;
        bool new_file_created = False;
-       bool clear_ads = false;
+       bool first_open_attempt = true;
        NTSTATUS fsp_open = NT_STATUS_ACCESS_DENIED;
        mode_t new_unx_mode = (mode_t)0;
        mode_t unx_mode = (mode_t)0;
@@ -1719,6 +2025,9 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
        uint32 open_access_mask = access_mask;
        NTSTATUS status;
        char *parent_dir;
+       SMB_STRUCT_STAT saved_stat = smb_fname->st;
+       struct timespec old_write_time;
+       struct file_id id;
 
        if (conn->printer) {
                /*
@@ -1798,16 +2107,12 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
                        if (is_deferred_open_async(ptr)) {
                                SET_STAT_INVALID(smb_fname->st);
                                file_existed = false;
-                       } else {
-                               struct deferred_open_record *state = (struct deferred_open_record *)ptr;
-                               /* Remove the deferred open entry under lock. */
-                               remove_deferred_open_entry(
-                                       state->id, req->mid,
-                                       messaging_server_id(req->sconn->msg_ctx));
                        }
 
                        /* Ensure we don't reprocess this message. */
                        remove_deferred_open_message_smb(req->sconn, req->mid);
+
+                       first_open_attempt = false;
                }
        }
 
@@ -1838,26 +2143,6 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
        }
 
        switch( create_disposition ) {
-               /*
-                * Currently we're using FILE_SUPERSEDE as the same as
-                * FILE_OVERWRITE_IF but they really are
-                * different. FILE_SUPERSEDE deletes an existing file
-                * (requiring delete access) then recreates it.
-                */
-               case FILE_SUPERSEDE:
-                       /* If file exists replace/overwrite. If file doesn't
-                        * exist create. */
-                       flags2 |= (O_CREAT | O_TRUNC);
-                       clear_ads = true;
-                       break;
-
-               case FILE_OVERWRITE_IF:
-                       /* If file exists replace/overwrite. If file doesn't
-                        * exist create. */
-                       flags2 |= (O_CREAT | O_TRUNC);
-                       clear_ads = true;
-                       break;
-
                case FILE_OPEN:
                        /* If file exists open. If file doesn't exist error. */
                        if (!file_existed) {
@@ -1881,8 +2166,6 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
                                errno = ENOENT;
                                return NT_STATUS_OBJECT_NAME_NOT_FOUND;
                        }
-                       flags2 |= O_TRUNC;
-                       clear_ads = true;
                        break;
 
                case FILE_CREATE:
@@ -1900,24 +2183,24 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
                                }
                                return map_nt_error_from_unix(errno);
                        }
-                       flags2 |= (O_CREAT|O_EXCL);
                        break;
 
+               case FILE_SUPERSEDE:
+               case FILE_OVERWRITE_IF:
                case FILE_OPEN_IF:
-                       /* If file exists open. If file doesn't exist
-                        * create. */
-                       flags2 |= O_CREAT;
                        break;
-
                default:
                        return NT_STATUS_INVALID_PARAMETER;
        }
 
+       flags2 = disposition_to_open_flags(create_disposition);
+
        /* We only care about matching attributes on file exists and
         * overwrite. */
 
-       if (!posix_open && file_existed && ((create_disposition == FILE_OVERWRITE) ||
-                            (create_disposition == FILE_OVERWRITE_IF))) {
+       if (!posix_open && file_existed &&
+           ((create_disposition == FILE_OVERWRITE) ||
+            (create_disposition == FILE_OVERWRITE_IF))) {
                if (!open_match_attributes(conn, existing_dos_attributes,
                                           new_dos_attributes,
                                           smb_fname->st.st_ex_mode,
@@ -1935,6 +2218,7 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
        }
 
        status = smbd_calculate_access_mask(conn, smb_fname,
+                                       false,
                                        access_mask,
                                        &access_mask); 
        if (!NT_STATUS_IS_OK(status)) {
@@ -1946,7 +2230,7 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
 
        open_access_mask = access_mask;
 
-       if ((flags2 & O_TRUNC) || (oplock_request & FORCE_OPLOCK_BREAK_TO_NONE)) {
+       if (flags2 & O_TRUNC) {
                open_access_mask |= FILE_WRITE_DATA; /* This will cause oplock breaks. */
        }
 
@@ -1959,20 +2243,8 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
         * mean the same thing under DOS and Unix.
         */
 
-       if ((access_mask & (FILE_WRITE_DATA | FILE_APPEND_DATA)) ||
-                       (oplock_request & FORCE_OPLOCK_BREAK_TO_NONE)) {
-               /* DENY_DOS opens are always underlying read-write on the
-                  file handle, no matter what the requested access mask
-                   says. */
-               if ((private_flags & NTCREATEX_OPTIONS_PRIVATE_DENY_DOS) ||
-                       access_mask & (FILE_READ_ATTRIBUTES|FILE_READ_DATA|FILE_READ_EA|FILE_EXECUTE)) {
-                       flags = O_RDWR;
-               } else {
-                       flags = O_WRONLY;
-               }
-       } else {
-               flags = O_RDONLY;
-       }
+       flags = calculate_open_access_flags(access_mask, oplock_request,
+                                           private_flags);
 
        /*
         * Currently we only look at FILE_WRITE_THROUGH for create options.
@@ -1994,7 +2266,25 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
                 * O_CREAT or O_TRUNC are set, but for compatibility with
                 * older versions of Samba we just AND them out.
                 */
-               flags2 &= ~(O_CREAT|O_TRUNC);
+               flags2 &= ~(O_CREAT|O_TRUNC);
+       }
+
+       if (first_open_attempt && lp_kernel_oplocks(SNUM(conn))) {
+               /*
+                * With kernel oplocks the open breaking an oplock
+                * blocks until the oplock holder has given up the
+                * oplock or closed the file. We prevent this by first
+                * trying to open the file with O_NONBLOCK (see "man
+                * fcntl" on Linux). For the second try, triggered by
+                * an oplock break response, we do not need this
+                * anymore.
+                *
+                * This is true under the assumption that only Samba
+                * requests kernel oplocks. Once someone else like
+                * NFSv4 starts to use that API, we will have to
+                * modify this by communicating with the NFSv4 server.
+                */
+               flags2 |= O_NONBLOCK;
        }
 
        /*
@@ -2026,207 +2316,6 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
                request_time = fsp->open_time;
        }
 
-       if (file_existed) {
-               struct share_mode_entry *batch_entry = NULL;
-               struct share_mode_entry *exclusive_entry = NULL;
-               bool got_level2_oplock = false;
-               bool got_a_none_oplock = false;
-               struct file_id id;
-
-               struct timespec old_write_time = smb_fname->st.st_ex_mtime;
-               id = vfs_file_id_from_sbuf(conn, &smb_fname->st);
-
-               lck = get_share_mode_lock(talloc_tos(), id,
-                                         conn->connectpath,
-                                         smb_fname, &old_write_time);
-               if (lck == NULL) {
-                       DEBUG(0, ("Could not get share mode lock\n"));
-                       return NT_STATUS_SHARING_VIOLATION;
-               }
-
-               /* Get the types we need to examine. */
-               find_oplock_types(fsp,
-                               oplock_request,
-                               lck,
-                               &batch_entry,
-                               &exclusive_entry,
-                               &got_level2_oplock,
-                               &got_a_none_oplock);
-
-               /* First pass - send break only on batch oplocks. */
-               if ((req != NULL) &&
-                               delay_for_batch_oplocks(fsp,
-                                       req->mid,
-                                       oplock_request,
-                                       batch_entry)) {
-                       schedule_defer_open(lck, request_time, req);
-                       TALLOC_FREE(lck);
-                       return NT_STATUS_SHARING_VIOLATION;
-               }
-
-               /* Use the client requested access mask here, not the one we
-                * open with. */
-               status = open_mode_check(conn, lck, fsp->name_hash,
-                                       access_mask, share_access,
-                                        create_options, &file_existed);
-
-               if (NT_STATUS_IS_OK(status)) {
-                       /* We might be going to allow this open. Check oplock
-                        * status again. */
-                       /* Second pass - send break for both batch or
-                        * exclusive oplocks. */
-                       if ((req != NULL) &&
-                                       delay_for_exclusive_oplocks(
-                                               fsp,
-                                               req->mid,
-                                               oplock_request,
-                                               exclusive_entry)) {
-                               schedule_defer_open(lck, request_time, req);
-                               TALLOC_FREE(lck);
-                               return NT_STATUS_SHARING_VIOLATION;
-                       }
-               }
-
-               if (NT_STATUS_EQUAL(status, NT_STATUS_DELETE_PENDING)) {
-                       /* DELETE_PENDING is not deferred for a second */
-                       TALLOC_FREE(lck);
-                       return status;
-               }
-
-               grant_fsp_oplock_type(fsp,
-                                oplock_request,
-                                got_level2_oplock,
-                                got_a_none_oplock);
-
-               if (!NT_STATUS_IS_OK(status)) {
-                       uint32 can_access_mask;
-                       bool can_access = True;
-
-                       SMB_ASSERT(NT_STATUS_EQUAL(status, NT_STATUS_SHARING_VIOLATION));
-
-                       /* Check if this can be done with the deny_dos and fcb
-                        * calls. */
-                       if (private_flags &
-                           (NTCREATEX_OPTIONS_PRIVATE_DENY_DOS|
-                            NTCREATEX_OPTIONS_PRIVATE_DENY_FCB)) {
-                               if (req == NULL) {
-                                       DEBUG(0, ("DOS open without an SMB "
-                                                 "request!\n"));
-                                       TALLOC_FREE(lck);
-                                       return NT_STATUS_INTERNAL_ERROR;
-                               }
-
-                               /* Use the client requested access mask here,
-                                * not the one we open with. */
-                               status = fcb_or_dos_open(req,
-                                                       conn,
-                                                       fsp,
-                                                       smb_fname,
-                                                       id,
-                                                       req->smbpid,
-                                                       req->vuid,
-                                                       access_mask,
-                                                       share_access,
-                                                       create_options);
-
-                               if (NT_STATUS_IS_OK(status)) {
-                                       TALLOC_FREE(lck);
-                                       if (pinfo) {
-                                               *pinfo = FILE_WAS_OPENED;
-                                       }
-                                       return NT_STATUS_OK;
-                               }
-                       }
-
-                       /*
-                        * This next line is a subtlety we need for
-                        * MS-Access. If a file open will fail due to share
-                        * permissions and also for security (access) reasons,
-                        * we need to return the access failed error, not the
-                        * share error. We can't open the file due to kernel
-                        * oplock deadlock (it's possible we failed above on
-                        * the open_mode_check()) so use a userspace check.
-                        */
-
-                       if (flags & O_RDWR) {
-                               can_access_mask = FILE_READ_DATA|FILE_WRITE_DATA;
-                       } else if (flags & O_WRONLY) {
-                               can_access_mask = FILE_WRITE_DATA;
-                       } else {
-                               can_access_mask = FILE_READ_DATA;
-                       }
-
-                       if (((can_access_mask & FILE_WRITE_DATA) &&
-                               !CAN_WRITE(conn)) ||
-                               !NT_STATUS_IS_OK(smbd_check_access_rights(conn,
-                                               smb_fname, can_access_mask))) {
-                               can_access = False;
-                       }
-
-                       /*
-                        * If we're returning a share violation, ensure we
-                        * cope with the braindead 1 second delay.
-                        */
-
-                       if (!(oplock_request & INTERNAL_OPEN_ONLY) &&
-                           lp_defer_sharing_violations()) {
-                               struct timeval timeout;
-                               struct deferred_open_record state;
-                               int timeout_usecs;
-
-                               /* this is a hack to speed up torture tests
-                                  in 'make test' */
-                               timeout_usecs = lp_parm_int(SNUM(conn),
-                                                           "smbd","sharedelay",
-                                                           SHARING_VIOLATION_USEC_WAIT);
-
-                               /* This is a relative time, added to the absolute
-                                  request_time value to get the absolute timeout time.
-                                  Note that if this is the second or greater time we enter
-                                  this codepath for this particular request mid then
-                                  request_time is left as the absolute time of the *first*
-                                  time this request mid was processed. This is what allows
-                                  the request to eventually time out. */
-
-                               timeout = timeval_set(0, timeout_usecs);
-
-                               /* Nothing actually uses state.delayed_for_oplocks
-                                  but it's handy to differentiate in debug messages
-                                  between a 30 second delay due to oplock break, and
-                                  a 1 second delay for share mode conflicts. */
-
-                               state.delayed_for_oplocks = False;
-                               state.async_open = false;
-                               state.id = id;
-
-                               if ((req != NULL)
-                                   && !request_timed_out(request_time,
-                                                         timeout)) {
-                                       defer_open(lck, request_time, timeout,
-                                                  req, &state);
-                               }
-                       }
-
-                       TALLOC_FREE(lck);
-                       if (can_access) {
-                               /*
-                                * We have detected a sharing violation here
-                                * so return the correct error code
-                                */
-                               status = NT_STATUS_SHARING_VIOLATION;
-                       } else {
-                               status = NT_STATUS_ACCESS_DENIED;
-                       }
-                       return status;
-               }
-
-               /*
-                * We exit this block with the share entry *locked*.....
-                */
-       }
-
-       SMB_ASSERT(!file_existed || (lck != NULL));
-
        /*
         * Ensure we pay attention to default ACLs on directories if required.
         */
@@ -2242,139 +2331,300 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
                 (unsigned int)unx_mode, (unsigned int)access_mask,
                 (unsigned int)open_access_mask));
 
-       /*
-        * open_file strips any O_TRUNC flags itself.
-        */
-
        fsp_open = open_file(fsp, conn, req, parent_dir,
                             flags|flags2, unx_mode, access_mask,
-                            open_access_mask);
+                            open_access_mask, &new_file_created);
 
-       if (!NT_STATUS_IS_OK(fsp_open)) {
-               if (NT_STATUS_EQUAL(fsp_open, NT_STATUS_RETRY)) {
-                       schedule_async_open(request_time, req);
-               }
-               TALLOC_FREE(lck);
-               return fsp_open;
-       }
+       if (NT_STATUS_EQUAL(fsp_open, NT_STATUS_NETWORK_BUSY)) {
+               struct deferred_open_record state;
 
-       if (!file_existed) {
-               struct share_mode_entry *batch_entry = NULL;
-               struct share_mode_entry *exclusive_entry = NULL;
-               bool got_level2_oplock = false;
-               bool got_a_none_oplock = false;
-               struct timespec old_write_time = smb_fname->st.st_ex_mtime;
-               struct file_id id;
                /*
-                * Deal with the race condition where two smbd's detect the
-                * file doesn't exist and do the create at the same time. One
-                * of them will win and set a share mode, the other (ie. this
-                * one) should check if the requested share mode for this
-                * create is allowed.
+                * EWOULDBLOCK/EAGAIN maps to NETWORK_BUSY.
                 */
+               if (file_existed && S_ISFIFO(fsp->fsp_name->st.st_ex_mode)) {
+                       DEBUG(10, ("FIFO busy\n"));
+                       return NT_STATUS_NETWORK_BUSY;
+               }
+               if (req == NULL) {
+                       DEBUG(10, ("Internal open busy\n"));
+                       return NT_STATUS_NETWORK_BUSY;
+               }
 
                /*
-                * Now the file exists and fsp is successfully opened,
-                * fsp->dev and fsp->inode are valid and should replace the
-                * dev=0,inode=0 from a non existent file. Spotted by
-                * Nadav Danieli <nadavd@exanet.com>. JRA.
+                * From here on we assume this is an oplock break triggered
                 */
 
-               id = fsp->file_id;
-
-               lck = get_share_mode_lock(talloc_tos(), id,
-                                         conn->connectpath,
-                                         smb_fname, &old_write_time);
-
+               lck = get_existing_share_mode_lock(talloc_tos(), fsp->file_id);
                if (lck == NULL) {
-                       DEBUG(0, ("open_file_ntcreate: Could not get share "
-                                 "mode lock for %s\n",
-                                 smb_fname_str_dbg(smb_fname)));
-                       fd_close(fsp);
+                       state.delayed_for_oplocks = false;
+                       state.async_open = false;
+                       state.id = fsp->file_id;
+                       defer_open(NULL, request_time, timeval_set(0, 0),
+                                  req, &state);
+                       DEBUG(10, ("No share mode lock found after "
+                                  "EWOULDBLOCK, retrying sync\n"));
                        return NT_STATUS_SHARING_VIOLATION;
                }
 
-               /* Get the types we need to examine. */
-               find_oplock_types(fsp,
-                               oplock_request,
-                               lck,
-                               &batch_entry,
-                               &exclusive_entry,
-                               &got_level2_oplock,
-                               &got_a_none_oplock);
-
-               /* First pass - send break only on batch oplocks. */
-               if ((req != NULL) &&
-                               delay_for_batch_oplocks(fsp,
-                                       req->mid,
-                                       oplock_request,
-                                       batch_entry)) {
+               if (!validate_oplock_types(lck)) {
+                       smb_panic("validate_oplock_types failed");
+               }
+
+               if (delay_for_oplock(fsp, 0, lck, false, create_disposition)) {
                        schedule_defer_open(lck, request_time, req);
                        TALLOC_FREE(lck);
-                       fd_close(fsp);
+                       DEBUG(10, ("Sent oplock break request to kernel "
+                                  "oplock holder\n"));
                        return NT_STATUS_SHARING_VIOLATION;
                }
 
-               status = open_mode_check(conn, lck, fsp->name_hash,
-                                       access_mask, share_access,
-                                        create_options, &file_existed);
+               /*
+                * No oplock from Samba around. Immediately retry with
+                * a blocking open.
+                */
+               state.delayed_for_oplocks = false;
+               state.async_open = false;
+               state.id = lck->data->id;
+               defer_open(lck, request_time, timeval_set(0, 0), req, &state);
+               TALLOC_FREE(lck);
+               DEBUG(10, ("No Samba oplock around after EWOULDBLOCK. "
+                          "Retrying sync\n"));
+               return NT_STATUS_SHARING_VIOLATION;
+       }
+
+       if (!NT_STATUS_IS_OK(fsp_open)) {
+               if (NT_STATUS_EQUAL(fsp_open, NT_STATUS_RETRY)) {
+                       schedule_async_open(request_time, req);
+               }
+               return fsp_open;
+       }
 
-               if (NT_STATUS_IS_OK(status)) {
-                       /* We might be going to allow this open. Check oplock
-                        * status again. */
-                       /* Second pass - send break for both batch or
-                        * exclusive oplocks. */
-                       if ((req != NULL) &&
-                                       delay_for_exclusive_oplocks(
-                                               fsp,
-                                               req->mid,
-                                               oplock_request,
-                                               exclusive_entry)) {
-                               schedule_defer_open(lck, request_time, req);
+       if (file_existed && !check_same_dev_ino(&saved_stat, &smb_fname->st)) {
+               /*
+                * The file did exist, but some other (local or NFS)
+                * process either renamed/unlinked and re-created the
+                * file with different dev/ino after we walked the path,
+                * but before we did the open. We could retry the
+                * open but it's a rare enough case it's easier to
+                * just fail the open to prevent creating any problems
+                * in the open file db having the wrong dev/ino key.
+                */
+               fd_close(fsp);
+               DEBUG(1,("open_file_ntcreate: file %s - dev/ino mismatch. "
+                       "Old (dev=0x%llu, ino =0x%llu). "
+                       "New (dev=0x%llu, ino=0x%llu). Failing open "
+                       " with NT_STATUS_ACCESS_DENIED.\n",
+                        smb_fname_str_dbg(smb_fname),
+                        (unsigned long long)saved_stat.st_ex_dev,
+                        (unsigned long long)saved_stat.st_ex_ino,
+                        (unsigned long long)smb_fname->st.st_ex_dev,
+                        (unsigned long long)smb_fname->st.st_ex_ino));
+               return NT_STATUS_ACCESS_DENIED;
+       }
+
+       old_write_time = smb_fname->st.st_ex_mtime;
+
+       /*
+        * Deal with the race condition where two smbd's detect the
+        * file doesn't exist and do the create at the same time. One
+        * of them will win and set a share mode, the other (ie. this
+        * one) should check if the requested share mode for this
+        * create is allowed.
+        */
+
+       /*
+        * Now the file exists and fsp is successfully opened,
+        * fsp->dev and fsp->inode are valid and should replace the
+        * dev=0,inode=0 from a non existent file. Spotted by
+        * Nadav Danieli <nadavd@exanet.com>. JRA.
+        */
+
+       id = fsp->file_id;
+
+       lck = get_share_mode_lock(talloc_tos(), id,
+                                 conn->connectpath,
+                                 smb_fname, &old_write_time);
+
+       if (lck == NULL) {
+               DEBUG(0, ("open_file_ntcreate: Could not get share "
+                         "mode lock for %s\n",
+                         smb_fname_str_dbg(smb_fname)));
+               fd_close(fsp);
+               return NT_STATUS_SHARING_VIOLATION;
+       }
+
+       /* Get the types we need to examine. */
+       if (!validate_oplock_types(lck)) {
+               smb_panic("validate_oplock_types failed");
+       }
+
+       if (has_delete_on_close(lck, fsp->name_hash)) {
+               TALLOC_FREE(lck);
+               fd_close(fsp);
+               return NT_STATUS_DELETE_PENDING;
+       }
+
+       status = open_mode_check(conn, lck,
+                                access_mask, share_access);
+
+       if (NT_STATUS_EQUAL(status, NT_STATUS_SHARING_VIOLATION) ||
+           (lck->data->num_share_modes > 0)) {
+               /*
+                * This comes from ancient times out of open_mode_check. I
+                * have no clue whether this is still necessary. I can't think
+                * of a case where this would actually matter further down in
+                * this function. I leave it here for further investigation
+                * :-)
+                */
+               file_existed = true;
+       }
+
+       if ((req != NULL) &&
+           delay_for_oplock(
+                   fsp, oplock_request, lck,
+                   NT_STATUS_EQUAL(status, NT_STATUS_SHARING_VIOLATION),
+                   create_disposition)) {
+               schedule_defer_open(lck, request_time, req);
+               TALLOC_FREE(lck);
+               fd_close(fsp);
+               return NT_STATUS_SHARING_VIOLATION;
+       }
+
+       if (!NT_STATUS_IS_OK(status)) {
+               uint32 can_access_mask;
+               bool can_access = True;
+
+               SMB_ASSERT(NT_STATUS_EQUAL(status, NT_STATUS_SHARING_VIOLATION));
+
+               /* Check if this can be done with the deny_dos and fcb
+                * calls. */
+               if (private_flags &
+                   (NTCREATEX_OPTIONS_PRIVATE_DENY_DOS|
+                    NTCREATEX_OPTIONS_PRIVATE_DENY_FCB)) {
+                       if (req == NULL) {
+                               DEBUG(0, ("DOS open without an SMB "
+                                         "request!\n"));
                                TALLOC_FREE(lck);
                                fd_close(fsp);
-                               return NT_STATUS_SHARING_VIOLATION;
+                               return NT_STATUS_INTERNAL_ERROR;
+                       }
+
+                       /* Use the client requested access mask here,
+                        * not the one we open with. */
+                       status = fcb_or_dos_open(req,
+                                                conn,
+                                                fsp,
+                                                smb_fname,
+                                                id,
+                                                req->smbpid,
+                                                req->vuid,
+                                                access_mask,
+                                                share_access,
+                                                create_options);
+
+                       if (NT_STATUS_IS_OK(status)) {
+                               TALLOC_FREE(lck);
+                               if (pinfo) {
+                                       *pinfo = FILE_WAS_OPENED;
+                               }
+                               return NT_STATUS_OK;
                        }
                }
 
-               if (!NT_STATUS_IS_OK(status)) {
+               /*
+                * This next line is a subtlety we need for
+                * MS-Access. If a file open will fail due to share
+                * permissions and also for security (access) reasons,
+                * we need to return the access failed error, not the
+                * share error. We can't open the file due to kernel
+                * oplock deadlock (it's possible we failed above on
+                * the open_mode_check()) so use a userspace check.
+                */
+
+               if (flags & O_RDWR) {
+                       can_access_mask = FILE_READ_DATA|FILE_WRITE_DATA;
+               } else if (flags & O_WRONLY) {
+                       can_access_mask = FILE_WRITE_DATA;
+               } else {
+                       can_access_mask = FILE_READ_DATA;
+               }
+
+               if (((can_access_mask & FILE_WRITE_DATA) &&
+                    !CAN_WRITE(conn)) ||
+                   !NT_STATUS_IS_OK(smbd_check_access_rights(conn,
+                                                             smb_fname,
+                                                             false,
+                                                             can_access_mask))) {
+                       can_access = False;
+               }
+
+               /*
+                * If we're returning a share violation, ensure we
+                * cope with the braindead 1 second delay (SMB1 only).
+                */
+
+               if (!(oplock_request & INTERNAL_OPEN_ONLY) &&
+                   !conn->sconn->using_smb2 &&
+                   lp_defer_sharing_violations()) {
+                       struct timeval timeout;
                        struct deferred_open_record state;
+                       int timeout_usecs;
+
+                       /* this is a hack to speed up torture tests
+                          in 'make test' */
+                       timeout_usecs = lp_parm_int(SNUM(conn),
+                                                   "smbd","sharedelay",
+                                                   SHARING_VIOLATION_USEC_WAIT);
+
+                       /* This is a relative time, added to the absolute
+                          request_time value to get the absolute timeout time.
+                          Note that if this is the second or greater time we enter
+                          this codepath for this particular request mid then
+                          request_time is left as the absolute time of the *first*
+                          time this request mid was processed. This is what allows
+                          the request to eventually time out. */
+
+                       timeout = timeval_set(0, timeout_usecs);
+
+                       /* Nothing actually uses state.delayed_for_oplocks
+                          but it's handy to differentiate in debug messages
+                          between a 30 second delay due to oplock break, and
+                          a 1 second delay for share mode conflicts. */
 
                        state.delayed_for_oplocks = False;
                        state.async_open = false;
                        state.id = id;
 
-                       /* Do it all over again immediately. In the second
-                        * round we will find that the file existed and handle
-                        * the DELETE_PENDING and FCB cases correctly. No need
-                        * to duplicate the code here. Essentially this is a
-                        * "goto top of this function", but don't tell
-                        * anybody... */
-
-                       if (req != NULL) {
-                               defer_open(lck, request_time, timeval_zero(),
+                       if ((req != NULL)
+                           && !request_timed_out(request_time,
+                                                 timeout)) {
+                               defer_open(lck, request_time, timeout,
                                           req, &state);
                        }
-                       TALLOC_FREE(lck);
-                       fd_close(fsp);
-                       return status;
                }
 
-               grant_fsp_oplock_type(fsp,
-                                oplock_request,
-                                got_level2_oplock,
-                                got_a_none_oplock);
-
-               /*
-                * We exit this block with the share entry *locked*.....
-                */
-
+               TALLOC_FREE(lck);
+               fd_close(fsp);
+               if (can_access) {
+                       /*
+                        * We have detected a sharing violation here
+                        * so return the correct error code
+                        */
+                       status = NT_STATUS_SHARING_VIOLATION;
+               } else {
+                       status = NT_STATUS_ACCESS_DENIED;
+               }
+               return status;
        }
 
-       SMB_ASSERT(lck != NULL);
+       grant_fsp_oplock_type(fsp, lck, oplock_request);
+
+       /*
+        * We have the share entry *locked*.....
+        */
 
        /* Delete streams if create_disposition requires it */
-       if (file_existed && clear_ads &&
+       if (!new_file_created && clear_ads(create_disposition) &&
            !is_ntfs_stream_smb_fname(smb_fname)) {
                status = delete_all_streams(conn, smb_fname->base_name);
                if (!NT_STATUS_IS_OK(status)) {
@@ -2392,7 +2642,7 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
            the kernel refuses the operations then the kernel is wrong.
           note that GPFS supports it as well - jmcd */
 
-       if (fsp->fh->fd != -1) {
+       if (fsp->fh->fd != -1 && lp_kernel_share_modes(SNUM(conn))) {
                int ret_flock;
                ret_flock = SMB_VFS_KERNEL_FLOCK(fsp, share_access, access_mask);
                if(ret_flock == -1 ){
@@ -2405,29 +2655,11 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
        }
 
        /*
-        * At this point onwards, we can guarentee that the share entry
+        * At this point onwards, we can guarantee that the share entry
         * is locked, whether we created the file or not, and that the
         * deny mode is compatible with all current opens.
         */
 
-       /*
-        * If requested, truncate the file.
-        */
-
-       if (file_existed && (flags2&O_TRUNC)) {
-               /*
-                * We are modifying the file after open - update the stat
-                * struct..
-                */
-               if ((SMB_VFS_FTRUNCATE(fsp, 0) == -1) ||
-                   (SMB_VFS_FSTAT(fsp, &smb_fname->st)==-1)) {
-                       status = map_nt_error_from_unix(errno);
-                       TALLOC_FREE(lck);
-                       fd_close(fsp);
-                       return status;
-               }
-       }
-
        /*
         * According to Samba4, SEC_FILE_READ_ATTRIBUTE is always granted,
         * but we don't have to store this - just ignore it on access check.
@@ -2450,14 +2682,16 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
                if (is_stat_open(open_access_mask)) {
                        fsp->oplock_type = NO_OPLOCK;
                }
+       }
 
+       if (new_file_created) {
+               info = FILE_WAS_CREATED;
+       } else {
                if (flags2 & O_TRUNC) {
                        info = FILE_WAS_OVERWRITTEN;
                } else {
                        info = FILE_WAS_OPENED;
                }
-       } else {
-               info = FILE_WAS_CREATED;
        }
 
        if (pinfo) {
@@ -2469,18 +2703,21 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
         * file structs.
         */
 
-       status = set_file_oplock(fsp, fsp->oplock_type);
+       status = set_file_oplock(fsp);
        if (!NT_STATUS_IS_OK(status)) {
                /*
-                * Could not get the kernel oplock or there are byte-range
-                * locks on the file.
+                * Could not get the kernel oplock
                 */
                fsp->oplock_type = NO_OPLOCK;
        }
 
-       set_share_mode(lck, fsp, get_current_uid(conn),
-                       req ? req->mid : 0,
-                      fsp->oplock_type);
+       if (!set_share_mode(lck, fsp, get_current_uid(conn),
+                           req ? req->mid : 0,
+                           fsp->oplock_type)) {
+               TALLOC_FREE(lck);
+               fd_close(fsp);
+               return NT_STATUS_NO_MEMORY;
+       }
 
        /* Handle strange delete on close create semantics. */
        if (create_options & FILE_DELETE_ON_CLOSE) {
@@ -2499,13 +2736,7 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
                fsp->initial_delete_on_close = True;
        }
 
-       if (info == FILE_WAS_OVERWRITTEN
-           || info == FILE_WAS_CREATED
-           || info == FILE_WAS_SUPERSEDED) {
-               new_file_created = True;
-       }
-
-       if (new_file_created) {
+       if (info != FILE_WAS_OPENED) {
                /* Files should be initially set as archive */
                if (lp_map_archive(SNUM(conn)) ||
                    lp_store_dos_attributes(SNUM(conn))) {
@@ -2533,7 +2764,7 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
         * selected.
         */
 
-       if (!posix_open && !file_existed && !def_acl) {
+       if (!posix_open && new_file_created && !def_acl) {
 
                int saved_errno = errno; /* We might get ENOSYS in the next
                                          * call.. */
@@ -2573,12 +2804,6 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
                                  (unsigned int)new_unx_mode));
        }
 
-       /* If this is a successful open, we must remove any deferred open
-        * records. */
-       if (req != NULL) {
-               del_deferred_open_entry(lck, req->mid,
-                                       messaging_server_id(req->sconn->msg_ctx));
-       }
        TALLOC_FREE(lck);
 
        return NT_STATUS_OK;
@@ -2628,9 +2853,9 @@ static NTSTATUS mkdir_internal(connection_struct *conn,
        bool need_re_stat = false;
        uint32_t access_mask = SEC_DIR_ADD_SUBDIR;
 
-       if(access_mask & ~(conn->share_access)) {
+       if (!CAN_WRITE(conn) || (access_mask & ~(conn->share_access))) {
                DEBUG(5,("mkdir_internal: failing share access "
-                        "%s\n", lp_servicename(SNUM(conn))));
+                        "%s\n", lp_servicename(talloc_tos(), SNUM(conn))));
                return NT_STATUS_ACCESS_DENIED;
        }
 
@@ -2772,7 +2997,7 @@ static NTSTATUS open_directory(connection_struct *conn,
                 (unsigned int)create_disposition,
                 (unsigned int)file_attributes));
 
-       status = smbd_calculate_access_mask(conn, smb_dname,
+       status = smbd_calculate_access_mask(conn, smb_dname, false,
                                            access_mask, &access_mask);
        if (!NT_STATUS_IS_OK(status)) {
                DEBUG(10, ("open_directory: smbd_calculate_access_mask "
@@ -2878,7 +3103,10 @@ static NTSTATUS open_directory(connection_struct *conn,
        }
 
        if (info == FILE_WAS_OPENED) {
-               status = smbd_check_access_rights(conn, smb_dname, access_mask);
+               status = smbd_check_access_rights(conn,
+                                               smb_dname,
+                                               false,
+                                               access_mask);
                if (!NT_STATUS_IS_OK(status)) {
                        DEBUG(10, ("open_directory: smbd_check_access_rights on "
                                "file %s failed with %s\n",
@@ -2968,9 +3196,15 @@ static NTSTATUS open_directory(connection_struct *conn,
                return NT_STATUS_SHARING_VIOLATION;
        }
 
-       status = open_mode_check(conn, lck, fsp->name_hash,
-                               access_mask, share_access,
-                                create_options, &dir_existed);
+       if (has_delete_on_close(lck, fsp->name_hash)) {
+               TALLOC_FREE(lck);
+               fd_close(fsp);
+               file_free(req, fsp);
+               return NT_STATUS_DELETE_PENDING;
+       }
+
+       status = open_mode_check(conn, lck,
+                                access_mask, share_access);
 
        if (!NT_STATUS_IS_OK(status)) {
                TALLOC_FREE(lck);
@@ -2979,14 +3213,20 @@ static NTSTATUS open_directory(connection_struct *conn,
                return status;
        }
 
-       set_share_mode(lck, fsp, get_current_uid(conn),
-                       req ? req->mid : 0, NO_OPLOCK);
+       if (!set_share_mode(lck, fsp, get_current_uid(conn),
+                           req ? req->mid : 0, NO_OPLOCK)) {
+               TALLOC_FREE(lck);
+               fd_close(fsp);
+               file_free(req, fsp);
+               return NT_STATUS_NO_MEMORY;
+       }
 
        /* For directories the delete on close bit at open time seems
           always to be honored on close... See test 19 in Samba4 BASE-DELETE. */
        if (create_options & FILE_DELETE_ON_CLOSE) {
                status = can_set_delete_on_close(fsp, 0);
                if (!NT_STATUS_IS_OK(status) && !NT_STATUS_EQUAL(status, NT_STATUS_DIRECTORY_NOT_EMPTY)) {
+                       del_share_mode(lck, fsp);
                        TALLOC_FREE(lck);
                        fd_close(fsp);
                        file_free(req, fsp);
@@ -3085,9 +3325,9 @@ void msg_file_was_renamed(struct messaging_context *msg,
                stream_name = NULL;
        }
 
-       status = create_synthetic_smb_fname(talloc_tos(), base_name,
-                                           stream_name, NULL, &smb_fname);
-       if (!NT_STATUS_IS_OK(status)) {
+       smb_fname = synthetic_smb_fname(talloc_tos(), base_name,
+                                       stream_name, NULL);
+       if (smb_fname == NULL) {
                return;
        }
 
@@ -3174,17 +3414,17 @@ NTSTATUS open_streams_for_delete(connection_struct *conn,
        }
 
        for (i=0; i<num_streams; i++) {
-               struct smb_filename *smb_fname = NULL;
+               struct smb_filename *smb_fname;
 
                if (strequal(stream_info[i].name, "::$DATA")) {
                        streams[i] = NULL;
                        continue;
                }
 
-               status = create_synthetic_smb_fname(talloc_tos(), fname,
-                                                   stream_info[i].name,
-                                                   NULL, &smb_fname);
-               if (!NT_STATUS_IS_OK(status)) {
+               smb_fname = synthetic_smb_fname(
+                       talloc_tos(), fname, stream_info[i].name, NULL);
+               if (smb_fname == NULL) {
+                       status = NT_STATUS_NO_MEMORY;
                        goto fail;
                }
 
@@ -3251,27 +3491,37 @@ NTSTATUS open_streams_for_delete(connection_struct *conn,
 
 static NTSTATUS inherit_new_acl(files_struct *fsp)
 {
-       TALLOC_CTX *ctx = talloc_tos();
+       TALLOC_CTX *frame = talloc_stackframe();
        char *parent_name = NULL;
        struct security_descriptor *parent_desc = NULL;
        NTSTATUS status = NT_STATUS_OK;
        struct security_descriptor *psd = NULL;
-       struct dom_sid *owner_sid = NULL;
-       struct dom_sid *group_sid = NULL;
+       const struct dom_sid *owner_sid = NULL;
+       const struct dom_sid *group_sid = NULL;
        uint32_t security_info_sent = (SECINFO_OWNER | SECINFO_GROUP | SECINFO_DACL);
+       struct security_token *token = fsp->conn->session_info->security_token;
        bool inherit_owner = lp_inherit_owner(SNUM(fsp->conn));
        bool inheritable_components = false;
+       bool try_builtin_administrators = false;
+       const struct dom_sid *BA_U_sid = NULL;
+       const struct dom_sid *BA_G_sid = NULL;
+       bool try_system = false;
+       const struct dom_sid *SY_U_sid = NULL;
+       const struct dom_sid *SY_G_sid = NULL;
        size_t size = 0;
 
-       if (!parent_dirname(ctx, fsp->fsp_name->base_name, &parent_name, NULL)) {
+       if (!parent_dirname(frame, fsp->fsp_name->base_name, &parent_name, NULL)) {
+               TALLOC_FREE(frame);
                return NT_STATUS_NO_MEMORY;
        }
 
        status = SMB_VFS_GET_NT_ACL(fsp->conn,
-                               parent_name,
-                               (SECINFO_OWNER | SECINFO_GROUP | SECINFO_DACL),
-                               &parent_desc);
+                                   parent_name,
+                                   (SECINFO_OWNER | SECINFO_GROUP | SECINFO_DACL),
+                                   frame,
+                                   &parent_desc);
        if (!NT_STATUS_IS_OK(status)) {
+               TALLOC_FREE(frame);
                return status;
        }
 
@@ -3279,6 +3529,7 @@ static NTSTATUS inherit_new_acl(files_struct *fsp)
                                        fsp->is_directory);
 
        if (!inheritable_components && !inherit_owner) {
+               TALLOC_FREE(frame);
                /* Nothing to inherit and not setting owner. */
                return NT_STATUS_OK;
        }
@@ -3298,13 +3549,99 @@ static NTSTATUS inherit_new_acl(files_struct *fsp)
        }
 
        if (owner_sid == NULL) {
-               owner_sid = &fsp->conn->session_info->security_token->sids[PRIMARY_USER_SID_INDEX];
+               if (security_token_has_builtin_administrators(token)) {
+                       try_builtin_administrators = true;
+               } else if (security_token_is_system(token)) {
+                       try_builtin_administrators = true;
+                       try_system = true;
+               }
+       }
+
+       if (group_sid == NULL &&
+           token->num_sids == PRIMARY_GROUP_SID_INDEX)
+       {
+               if (security_token_is_system(token)) {
+                       try_builtin_administrators = true;
+                       try_system = true;
+               }
+       }
+
+       if (try_builtin_administrators) {
+               struct unixid ids;
+               bool ok;
+
+               ZERO_STRUCT(ids);
+               ok = sids_to_unixids(&global_sid_Builtin_Administrators, 1, &ids);
+               if (ok) {
+                       switch (ids.type) {
+                       case ID_TYPE_BOTH:
+                               BA_U_sid = &global_sid_Builtin_Administrators;
+                               BA_G_sid = &global_sid_Builtin_Administrators;
+                               break;
+                       case ID_TYPE_UID:
+                               BA_U_sid = &global_sid_Builtin_Administrators;
+                               break;
+                       case ID_TYPE_GID:
+                               BA_G_sid = &global_sid_Builtin_Administrators;
+                               break;
+                       default:
+                               break;
+                       }
+               }
+       }
+
+       if (try_system) {
+               struct unixid ids;
+               bool ok;
+
+               ZERO_STRUCT(ids);
+               ok = sids_to_unixids(&global_sid_System, 1, &ids);
+               if (ok) {
+                       switch (ids.type) {
+                       case ID_TYPE_BOTH:
+                               SY_U_sid = &global_sid_System;
+                               SY_G_sid = &global_sid_System;
+                               break;
+                       case ID_TYPE_UID:
+                               SY_U_sid = &global_sid_System;
+                               break;
+                       case ID_TYPE_GID:
+                               SY_G_sid = &global_sid_System;
+                               break;
+                       default:
+                               break;
+                       }
+               }
+       }
+
+       if (owner_sid == NULL) {
+               owner_sid = BA_U_sid;
+       }
+
+       if (owner_sid == NULL) {
+               owner_sid = SY_U_sid;
+       }
+
+       if (group_sid == NULL) {
+               group_sid = SY_G_sid;
+       }
+
+       if (try_system && group_sid == NULL) {
+               group_sid = BA_G_sid;
+       }
+
+       if (owner_sid == NULL) {
+               owner_sid = &token->sids[PRIMARY_USER_SID_INDEX];
        }
        if (group_sid == NULL) {
-               group_sid = &fsp->conn->session_info->security_token->sids[PRIMARY_GROUP_SID_INDEX];
+               if (token->num_sids == PRIMARY_GROUP_SID_INDEX) {
+                       group_sid = &token->sids[PRIMARY_USER_SID_INDEX];
+               } else {
+                       group_sid = &token->sids[PRIMARY_GROUP_SID_INDEX];
+               }
        }
 
-       status = se_create_child_secdesc(ctx,
+       status = se_create_child_secdesc(frame,
                        &psd,
                        &size,
                        parent_desc,
@@ -3312,6 +3649,7 @@ static NTSTATUS inherit_new_acl(files_struct *fsp)
                        group_sid,
                        fsp->is_directory);
        if (!NT_STATUS_IS_OK(status)) {
+               TALLOC_FREE(frame);
                return status;
        }
 
@@ -3342,6 +3680,7 @@ static NTSTATUS inherit_new_acl(files_struct *fsp)
        if (inherit_owner) {
                unbecome_root();
        }
+       TALLOC_FREE(frame);
        return status;
 }
 
@@ -3445,11 +3784,11 @@ static NTSTATUS create_file_unixpath(connection_struct *conn,
                }
 
                /* Create an smb_filename with stream_name == NULL. */
-               status = create_synthetic_smb_fname(talloc_tos(),
-                                                   smb_fname->base_name,
-                                                   NULL, NULL,
-                                                   &smb_fname_base);
-               if (!NT_STATUS_IS_OK(status)) {
+               smb_fname_base = synthetic_smb_fname(talloc_tos(),
+                                                    smb_fname->base_name,
+                                                    NULL, NULL);
+               if (smb_fname_base == NULL) {
+                       status = NT_STATUS_NO_MEMORY;
                        goto fail;
                }
 
@@ -3523,14 +3862,20 @@ static NTSTATUS create_file_unixpath(connection_struct *conn,
                        goto fail;
                }
 
-               /*
-                * We're opening the stream element of a base_fsp
-                * we already opened. Set up the base_fsp pointer.
-                */
                if (base_fsp) {
+                       /*
+                        * We're opening the stream element of a
+                        * base_fsp we already opened. Set up the
+                        * base_fsp pointer.
+                        */
                        fsp->base_fsp = base_fsp;
                }
 
+               if (allocation_size) {
+                       fsp->initial_allocation_size = smb_roundup(fsp->conn,
+                                                       allocation_size);
+               }
+
                status = open_file_ntcreate(conn,
                                            req,
                                            access_mask,
@@ -3615,6 +3960,8 @@ static NTSTATUS create_file_unixpath(connection_struct *conn,
                        fsp->initial_allocation_size = smb_roundup(
                                fsp->conn, (uint64_t)fsp->fsp_name->st.st_ex_size);
                }
+       } else {
+               fsp->initial_allocation_size = 0;
        }
 
        if ((info == FILE_WAS_CREATED) && lp_nt_acl_support(SNUM(conn)) &&
@@ -3638,15 +3985,11 @@ static NTSTATUS create_file_unixpath(connection_struct *conn,
 
                        fsp->access_mask = FILE_GENERIC_ALL;
 
-                       /* Convert all the generic bits. */
-                       security_acl_map_generic(sd->dacl, &file_generic_mapping);
-                       security_acl_map_generic(sd->sacl, &file_generic_mapping);
-
                        if (sec_info_sent & (SECINFO_OWNER|
                                                SECINFO_GROUP|
                                                SECINFO_DACL|
                                                SECINFO_SACL)) {
-                               status = SMB_VFS_FSET_NT_ACL(fsp, sec_info_sent, sd);
+                               status = set_sd(fsp, sd, sec_info_sent);
                        }
 
                        fsp->access_mask = saved_access_mask;