r15018: Merge Volker's ipc/trans2/nttrans changes over
[samba.git] / source3 / locking / locking.c
index d42d041b79049c5ef42881e55490a1947269230c..0b3f625d03e98c1cedf5cd0216bce4dd41c3bcf6 100644 (file)
@@ -2,7 +2,8 @@
    Unix SMB/CIFS implementation.
    Locking functions
    Copyright (C) Andrew Tridgell 1992-2000
-   Copyright (C) Jeremy Allison 1992-2000
+   Copyright (C) Jeremy Allison 1992-2006
+   Copyright (C) Volker Lendecke 2005
    
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    rewrtten completely to use new tdb code. Tridge, Dec '99
 
    Added POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
+   Added Unix Extensions POSIX locking support. Jeremy Allison Mar 2006.
 */
 
 #include "includes.h"
 uint16 global_smbpid;
 
+#undef DBGC_CLASS
+#define DBGC_CLASS DBGC_LOCKING
+
 /* the locking database handle */
 static TDB_CONTEXT *tdb;
 
 /****************************************************************************
- Debugging aid :-).
+ Debugging aids :-).
 ****************************************************************************/
 
-static const char *lock_type_name(enum brl_type lock_type)
+const char *lock_type_name(enum brl_type lock_type)
+{
+       switch (lock_type) {
+               case READ_LOCK:
+                       return "READ";
+               case WRITE_LOCK:
+                       return "WRITE";
+               case PENDING_LOCK:
+                       return "PENDING";
+               default:
+                       return "other";
+       }
+}
+
+const char *lock_flav_name(enum brl_flavour lock_flav)
 {
-       return (lock_type == READ_LOCK) ? "READ" : "WRITE";
+       return (lock_flav == WINDOWS_LOCK) ? "WINDOWS_LOCK" : "POSIX_LOCK";
 }
 
 /****************************************************************************
  Utility function called to see if a file region is locked.
- If check_self is True, then checks on our own fd with the same locking context
- are still made. If check_self is False, then checks are not made on our own fd
- with the same locking context are not made.
+ Called in the read/write codepath.
 ****************************************************************************/
 
-BOOL is_locked(files_struct *fsp,connection_struct *conn,
-              SMB_BIG_UINT count,SMB_BIG_UINT offset, 
-              enum brl_type lock_type, BOOL check_self)
+BOOL is_locked(files_struct *fsp,
+               SMB_BIG_UINT count,
+               SMB_BIG_UINT offset, 
+               enum brl_type lock_type)
 {
-       int snum = SNUM(conn);
-       BOOL ret;
+       int snum = SNUM(fsp->conn);
+       int strict_locking = lp_strict_locking(snum);
+       enum brl_flavour lock_flav = lp_posix_cifsu_locktype();
+       BOOL ret = True;
        
-       if (count == 0)
-               return(False);
+       if (count == 0) {
+               return False;
+       }
 
-       if (!lp_locking(snum) || !lp_strict_locking(snum))
-               return(False);
+       if (!lp_locking(snum) || !strict_locking) {
+               return False;
+       }
 
-       ret = !brl_locktest(fsp->dev, fsp->inode, fsp->fnum,
-                            global_smbpid, sys_getpid(), conn->cnum, 
-                            offset, count, lock_type, check_self);
+       if (strict_locking == Auto) {
+               if  (EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type) && (lock_type == READ_LOCK || lock_type == WRITE_LOCK)) {
+                       DEBUG(10,("is_locked: optimisation - exclusive oplock on file %s\n", fsp->fsp_name ));
+                       ret = False;
+               } else if ((fsp->oplock_type == LEVEL_II_OPLOCK) &&
+                          (lock_type == READ_LOCK)) {
+                       DEBUG(10,("is_locked: optimisation - level II oplock on file %s\n", fsp->fsp_name ));
+                       ret = False;
+               } else {
+                       struct byte_range_lock *br_lck = brl_get_locks(NULL, fsp);
+                       if (!br_lck) {
+                               return False;
+                       }
+                       ret = !brl_locktest(br_lck,
+                                       global_smbpid,
+                                       procid_self(),
+                                       offset,
+                                       count,
+                                       lock_type,
+                                       lock_flav);
+                       TALLOC_FREE(br_lck);
+               }
+       } else {
+               struct byte_range_lock *br_lck = brl_get_locks(NULL, fsp);
+               if (!br_lck) {
+                       return False;
+               }
+               ret = !brl_locktest(br_lck,
+                               global_smbpid,
+                               procid_self(),
+                               offset,
+                               count,
+                               lock_type,
+                               lock_flav);
+               TALLOC_FREE(br_lck);
+       }
 
-       DEBUG(10,("is_locked: brl start=%.0f len=%.0f %s for file %s\n",
+       DEBUG(10,("is_locked: flavour = %s brl start=%.0f len=%.0f %s for fnum %d file %s\n",
+                       lock_flav_name(lock_flav),
                        (double)offset, (double)count, ret ? "locked" : "unlocked",
-                       fsp->fsp_name ));
+                       fsp->fnum, fsp->fsp_name ));
 
-       /*
-        * There is no lock held by an SMB daemon, check to
-        * see if there is a POSIX lock from a UNIX or NFS process.
-        */
+       return ret;
+}
 
-       if(!ret && lp_posix_locking(snum)) {
-               ret = is_posix_locked(fsp, offset, count, lock_type);
+/****************************************************************************
+ Find out if a lock could be granted - return who is blocking us if we can't.
+****************************************************************************/
+
+NTSTATUS query_lock(files_struct *fsp,
+                       uint16 *psmbpid,
+                       SMB_BIG_UINT *pcount,
+                       SMB_BIG_UINT *poffset,
+                       enum brl_type *plock_type,
+                       enum brl_flavour lock_flav)
+{
+       struct byte_range_lock *br_lck = NULL;
+       NTSTATUS status = NT_STATUS_LOCK_NOT_GRANTED;
 
-               DEBUG(10,("is_locked: posix start=%.0f len=%.0f %s for file %s\n",
-                               (double)offset, (double)count, ret ? "locked" : "unlocked",
-                               fsp->fsp_name ));
+       if (!OPEN_FSP(fsp) || !fsp->can_lock) {
+               return NT_STATUS_INVALID_HANDLE;
        }
 
-       return ret;
+       if (!lp_locking(SNUM(fsp->conn))) {
+               return NT_STATUS_OK;
+       }
+
+       br_lck = brl_get_locks(NULL, fsp);
+       if (!br_lck) {
+               return NT_STATUS_NO_MEMORY;
+       }
+
+       status = brl_lockquery(br_lck,
+                       psmbpid,
+                       procid_self(),
+                       poffset,
+                       pcount,
+                       plock_type,
+                       lock_flav);
+
+       TALLOC_FREE(br_lck);
+       return status;
 }
 
 /****************************************************************************
  Utility function called by locking requests.
 ****************************************************************************/
 
-static NTSTATUS do_lock(files_struct *fsp,connection_struct *conn, uint16 lock_pid,
-                SMB_BIG_UINT count,SMB_BIG_UINT offset,enum brl_type lock_type)
+NTSTATUS do_lock(files_struct *fsp,
+                       uint16 lock_pid,
+                       SMB_BIG_UINT count,
+                       SMB_BIG_UINT offset,
+                       enum brl_type lock_type,
+                       enum brl_flavour lock_flav,
+                       BOOL *my_lock_ctx)
 {
-       NTSTATUS status;
+       struct byte_range_lock *br_lck = NULL;
+       NTSTATUS status = NT_STATUS_LOCK_NOT_GRANTED;
+
+       if (!OPEN_FSP(fsp) || !fsp->can_lock) {
+               return NT_STATUS_INVALID_HANDLE;
+       }
 
-       if (!lp_locking(SNUM(conn)))
+       if (!lp_locking(SNUM(fsp->conn))) {
                return NT_STATUS_OK;
+       }
 
        /* NOTE! 0 byte long ranges ARE allowed and should be stored  */
 
+       DEBUG(10,("do_lock: lock flavour %s lock type %s start=%.0f len=%.0f requested for fnum %d file %s\n",
+               lock_flav_name(lock_flav), lock_type_name(lock_type),
+               (double)offset, (double)count, fsp->fnum, fsp->fsp_name ));
 
-       DEBUG(10,("do_lock: lock type %s start=%.0f len=%.0f requested for file %s\n",
-                 lock_type_name(lock_type), (double)offset, (double)count, fsp->fsp_name ));
-
-       if (OPEN_FSP(fsp) && fsp->can_lock && (fsp->conn == conn)) {
-               status = brl_lock(fsp->dev, fsp->inode, fsp->fnum,
-                                 lock_pid, sys_getpid(), conn->cnum, 
-                                 offset, count, 
-                                 lock_type);
-
-               if (NT_STATUS_IS_OK(status) && lp_posix_locking(SNUM(conn))) {
-
-                       /*
-                        * Try and get a POSIX lock on this range.
-                        * Note that this is ok if it is a read lock
-                        * overlapping on a different fd. JRA.
-                        */
-
-                       if (!set_posix_lock(fsp, offset, count, lock_type)) {
-                               status = NT_STATUS_LOCK_NOT_GRANTED;
-                               /*
-                                * We failed to map - we must now remove the brl
-                                * lock entry.
-                                */
-                               (void)brl_unlock(fsp->dev, fsp->inode, fsp->fnum,
-                                                               lock_pid, sys_getpid(), conn->cnum, 
-                                                               offset, count);
-                       }
-               }
+       br_lck = brl_get_locks(NULL, fsp);
+       if (!br_lck) {
+               return NT_STATUS_NO_MEMORY;
        }
 
+       status = brl_lock(br_lck,
+                       lock_pid,
+                       procid_self(),
+                       offset,
+                       count, 
+                       lock_type,
+                       lock_flav,
+                       my_lock_ctx);
+
+       TALLOC_FREE(br_lck);
        return status;
 }
 
 /****************************************************************************
- Utility function called by locking requests. This is *DISGISTING*. It also
+ Utility function called by locking requests. This is *DISGUSTING*. It also
  appears to be "What Windows Does" (tm). Andrew, ever wonder why Windows 2000
  is so slow on the locking tests...... ? This is the reason. Much though I hate
  it, we need this. JRA.
 ****************************************************************************/
 
-NTSTATUS do_lock_spin(files_struct *fsp,connection_struct *conn, uint16 lock_pid,
-                SMB_BIG_UINT count,SMB_BIG_UINT offset,enum brl_type lock_type)
+NTSTATUS do_lock_spin(files_struct *fsp,
+                       uint16 lock_pid,
+                       SMB_BIG_UINT count,
+                       SMB_BIG_UINT offset,
+                       enum brl_type lock_type,
+                       enum brl_flavour lock_flav,
+                       BOOL *my_lock_ctx)
 {
        int j, maxj = lp_lock_spin_count();
        int sleeptime = lp_lock_sleep_time();
        NTSTATUS status, ret;
 
-       if (maxj <= 0)
+       if (maxj <= 0) {
                maxj = 1;
+       }
 
        ret = NT_STATUS_OK; /* to keep dumb compilers happy */
 
        for (j = 0; j < maxj; j++) {
-               status = do_lock(fsp, conn, lock_pid, count, offset, lock_type);
+               status = do_lock(fsp,
+                               lock_pid,
+                               count,
+                               offset,
+                               lock_type,
+                               lock_flav,
+                               my_lock_ctx);
+
                if (!NT_STATUS_EQUAL(status, NT_STATUS_LOCK_NOT_GRANTED) &&
                    !NT_STATUS_EQUAL(status, NT_STATUS_FILE_LOCK_CONFLICT)) {
                        return status;
@@ -169,9 +263,20 @@ NTSTATUS do_lock_spin(files_struct *fsp,connection_struct *conn, uint16 lock_pid
                /* if we do fail then return the first error code we got */
                if (j == 0) {
                        ret = status;
+                       /* Don't spin if we blocked ourselves. */
+                       if (*my_lock_ctx) {
+                               return ret;
+                       }
+
+                       /* Only spin for Windows locks. */
+                       if (lock_flav == POSIX_LOCK) {
+                               return ret;
+                       }
                }
-               if (sleeptime)
+
+               if (sleeptime) {
                        sys_usleep(sleeptime);
+               }
        }
        return ret;
 }
@@ -180,40 +285,45 @@ NTSTATUS do_lock_spin(files_struct *fsp,connection_struct *conn, uint16 lock_pid
  Utility function called by unlocking requests.
 ****************************************************************************/
 
-NTSTATUS do_unlock(files_struct *fsp,connection_struct *conn, uint16 lock_pid,
-                  SMB_BIG_UINT count,SMB_BIG_UINT offset)
+NTSTATUS do_unlock(files_struct *fsp,
+                       uint16 lock_pid,
+                       SMB_BIG_UINT count,
+                       SMB_BIG_UINT offset,
+                       enum brl_flavour lock_flav)
 {
        BOOL ok = False;
+       struct byte_range_lock *br_lck = NULL;
        
-       if (!lp_locking(SNUM(conn)))
+       if (!lp_locking(SNUM(fsp->conn))) {
                return NT_STATUS_OK;
+       }
        
-       if (!OPEN_FSP(fsp) || !fsp->can_lock || (fsp->conn != conn)) {
+       if (!OPEN_FSP(fsp) || !fsp->can_lock) {
                return NT_STATUS_INVALID_HANDLE;
        }
        
-       DEBUG(10,("do_unlock: unlock start=%.0f len=%.0f requested for file %s\n",
-                 (double)offset, (double)count, fsp->fsp_name ));
+       DEBUG(10,("do_unlock: unlock start=%.0f len=%.0f requested for fnum %d file %s\n",
+                 (double)offset, (double)count, fsp->fnum, fsp->fsp_name ));
 
-       /*
-        * Remove the existing lock record from the tdb lockdb
-        * before looking at POSIX locks. If this record doesn't
-        * match then don't bother looking to remove POSIX locks.
-        */
+       br_lck = brl_get_locks(NULL, fsp);
+       if (!br_lck) {
+               return NT_STATUS_NO_MEMORY;
+       }
 
-       ok = brl_unlock(fsp->dev, fsp->inode, fsp->fnum,
-                       lock_pid, sys_getpid(), conn->cnum, offset, count);
+       ok = brl_unlock(br_lck,
+                       lock_pid,
+                       procid_self(),
+                       offset,
+                       count,
+                       lock_flav);
    
+       TALLOC_FREE(br_lck);
+
        if (!ok) {
                DEBUG(10,("do_unlock: returning ERRlock.\n" ));
                return NT_STATUS_RANGE_NOT_LOCKED;
        }
 
-       if (!lp_posix_locking(SNUM(conn)))
-               return NT_STATUS_OK;
-
-       (void)release_posix_lock(fsp, offset, count);
-
        return NT_STATUS_OK;
 }
 
@@ -223,7 +333,8 @@ NTSTATUS do_unlock(files_struct *fsp,connection_struct *conn, uint16 lock_pid,
 
 void locking_close_file(files_struct *fsp)
 {
-       pid_t pid = sys_getpid();
+       struct byte_range_lock *br_lck;
+       struct process_id pid = procid_self();
 
        if (!lp_locking(SNUM(fsp->conn)))
                return;
@@ -232,13 +343,14 @@ void locking_close_file(files_struct *fsp)
         * Just release all the brl locks, no need to release individually.
         */
 
-       brl_close(fsp->dev, fsp->inode, pid, fsp->conn->cnum, fsp->fnum);
+       br_lck = brl_get_locks(NULL,fsp);
+       if (br_lck) {
+               brl_close_fnum(br_lck, pid);
+               TALLOC_FREE(br_lck);
+       }
 
        if(lp_posix_locking(SNUM(fsp->conn))) {
-
-               /* 
-                * Release all the POSIX locks.
-                */
+               /* Release all the POSIX locks.*/
                posix_locking_close_file(fsp);
 
        }
@@ -258,15 +370,16 @@ BOOL locking_init(int read_only)
                return True;
 
        tdb = tdb_open_log(lock_path("locking.tdb"), 
-                      0, TDB_DEFAULT|(read_only?0x0:TDB_CLEAR_IF_FIRST), 
-                      read_only?O_RDONLY:O_RDWR|O_CREAT,
-                      0644);
+                       lp_open_files_db_hash_size(),
+                       TDB_DEFAULT|(read_only?0x0:TDB_CLEAR_IF_FIRST), 
+                       read_only?O_RDONLY:O_RDWR|O_CREAT,
+                       0644);
 
        if (!tdb) {
                DEBUG(0,("ERROR: Failed to initialise locking database\n"));
                return False;
        }
-       
+
        if (!posix_locking_init(read_only))
                return False;
 
@@ -281,21 +394,31 @@ BOOL locking_init(int read_only)
 
 BOOL locking_end(void)
 {
+       BOOL ret = True;
 
        brl_shutdown(open_read_only);
        if (tdb) {
-
                if (tdb_close(tdb) != 0)
-                       return False;
+                       ret = False;
        }
 
-       return True;
+       return ret;
 }
 
 /*******************************************************************
  Form a static locking key for a dev/inode pair.
 ******************************************************************/
 
+/* key and data records in the tdb locking database */
+struct locking_key {
+       SMB_DEV_T dev;
+       SMB_INO_T ino;
+};
+
+/*******************************************************************
+ Form a static locking key for a dev/inode pair.
+******************************************************************/
+
 static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
 {
        static struct locking_key key;
@@ -303,67 +426,30 @@ static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
 
        memset(&key, '\0', sizeof(key));
        key.dev = dev;
-       key.inode = inode;
+       key.ino = inode;
        kbuf.dptr = (char *)&key;
        kbuf.dsize = sizeof(key);
        return kbuf;
 }
 
-static TDB_DATA locking_key_fsp(files_struct *fsp)
-{
-       return locking_key(fsp->dev, fsp->inode);
-}
-
-/*******************************************************************
- Lock a hash bucket entry.
-******************************************************************/
-
-BOOL lock_share_entry(connection_struct *conn,
-                     SMB_DEV_T dev, SMB_INO_T inode)
-{
-       return tdb_chainlock(tdb, locking_key(dev, inode)) == 0;
-}
-
-/*******************************************************************
- Unlock a hash bucket entry.
-******************************************************************/
-
-void unlock_share_entry(connection_struct *conn,
-                       SMB_DEV_T dev, SMB_INO_T inode)
-{
-       tdb_chainunlock(tdb, locking_key(dev, inode));
-}
-
-/*******************************************************************
- Lock a hash bucket entry. use a fsp for convenience
-******************************************************************/
-
-BOOL lock_share_entry_fsp(files_struct *fsp)
-{
-       return tdb_chainlock(tdb, locking_key(fsp->dev, fsp->inode)) == 0;
-}
-
-/*******************************************************************
- Unlock a hash bucket entry.
-******************************************************************/
-
-void unlock_share_entry_fsp(files_struct *fsp)
-{
-       tdb_chainunlock(tdb, locking_key(fsp->dev, fsp->inode));
-}
-
 /*******************************************************************
  Print out a share mode.
 ********************************************************************/
 
-static char *share_mode_str(int num, share_mode_entry *e)
+char *share_mode_str(int num, struct share_mode_entry *e)
 {
        static pstring share_str;
 
-       slprintf(share_str, sizeof(share_str)-1, "share_mode_entry[%d]: \
-pid = %u, share_mode = 0x%x, desired_access = 0x%x, port = 0x%x, type= 0x%x, file_id = %lu, dev = 0x%x, inode = %.0f",
-       num, e->pid, e->share_mode, (unsigned int)e->desired_access, e->op_port, e->op_type, e->share_file_id,
-       (unsigned int)e->dev, (double)e->inode );
+       slprintf(share_str, sizeof(share_str)-1, "share_mode_entry[%d]: %s "
+                "pid = %s, share_access = 0x%x, private_options = 0x%x, "
+                "access_mask = 0x%x, mid = 0x%x, type= 0x%x, file_id = %lu, "
+                "dev = 0x%x, inode = %.0f",
+                num,
+                e->op_type == UNUSED_SHARE_MODE_ENTRY ? "UNUSED" : "",
+                procid_str_static(&e->pid),
+                e->share_access, e->private_options,
+                e->access_mask, e->op_mid, e->op_type, e->share_file_id,
+                (unsigned int)e->dev, (double)e->inode );
 
        return share_str;
 }
@@ -374,13 +460,17 @@ pid = %u, share_mode = 0x%x, desired_access = 0x%x, port = 0x%x, type= 0x%x, fil
 
 static void print_share_mode_table(struct locking_data *data)
 {
-       int num_share_modes = data->u.num_share_mode_entries;
-       share_mode_entry *shares = (share_mode_entry *)(data + 1);
+       int num_share_modes = data->u.s.num_share_mode_entries;
+       struct share_mode_entry *shares =
+               (struct share_mode_entry *)(data + 1);
        int i;
 
        for (i = 0; i < num_share_modes; i++) {
-               share_mode_entry *entry_p = &shares[i];
-               DEBUG(10,("print_share_mode_table: %s\n", share_mode_str(i, entry_p) ));
+               struct share_mode_entry entry;
+
+               memcpy(&entry, &shares[i], sizeof(struct share_mode_entry));
+               DEBUG(10,("print_share_mode_table: %s\n",
+                         share_mode_str(i, &entry)));
        }
 }
 
@@ -388,448 +478,823 @@ static void print_share_mode_table(struct locking_data *data)
  Get all share mode entries for a dev/inode pair.
 ********************************************************************/
 
-int get_share_modes(connection_struct *conn, 
-                   SMB_DEV_T dev, SMB_INO_T inode, 
-                   share_mode_entry **pp_shares)
+static BOOL parse_share_modes(TDB_DATA dbuf, struct share_mode_lock *lck)
 {
-       TDB_DATA dbuf;
        struct locking_data *data;
-       int num_share_modes;
-       share_mode_entry *shares = NULL;
-
-       *pp_shares = NULL;
+       int i;
 
-       dbuf = tdb_fetch(tdb, locking_key(dev, inode));
-       if (!dbuf.dptr)
-               return 0;
+       if (dbuf.dsize < sizeof(struct locking_data)) {
+               smb_panic("PANIC: parse_share_modes: buffer too short.\n");
+       }
 
        data = (struct locking_data *)dbuf.dptr;
-       num_share_modes = data->u.num_share_mode_entries;
-       if(num_share_modes) {
-               int i;
-               int del_count = 0;
 
-               shares = (share_mode_entry *)memdup(dbuf.dptr + sizeof(*data),  
-                                               num_share_modes * sizeof(share_mode_entry));
+       lck->delete_on_close = data->u.s.delete_on_close;
+       lck->initial_delete_on_close = data->u.s.initial_delete_on_close;
+       lck->num_share_modes = data->u.s.num_share_mode_entries;
+
+       DEBUG(10, ("parse_share_modes: delete_on_close: %d, "
+                  "initial_delete_on_close: %d, "
+                  "num_share_modes: %d\n",
+               lck->delete_on_close,
+               lck->initial_delete_on_close,
+               lck->num_share_modes));
+
+       if ((lck->num_share_modes < 0) || (lck->num_share_modes > 1000000)) {
+               DEBUG(0, ("invalid number of share modes: %d\n",
+                         lck->num_share_modes));
+               smb_panic("PANIC: invalid number of share modes");
+       }
+
+       lck->share_modes = NULL;
+       
+       if (lck->num_share_modes != 0) {
 
-               if (!shares) {
-                       SAFE_FREE(dbuf.dptr);
-                       return 0;
+               if (dbuf.dsize < (sizeof(struct locking_data) +
+                                 (lck->num_share_modes *
+                                  sizeof(struct share_mode_entry)))) {
+                       smb_panic("PANIC: parse_share_modes: buffer too short.\n");
                }
+                                 
+               lck->share_modes = talloc_memdup(lck, dbuf.dptr+sizeof(*data),
+                                                lck->num_share_modes *
+                                                sizeof(struct share_mode_entry));
 
-               /*
-                * Ensure that each entry has a real process attached.
-                */
-
-               for (i = 0; i < num_share_modes; ) {
-                       share_mode_entry *entry_p = &shares[i];
-                       if (process_exists(entry_p->pid)) {
-                               DEBUG(10,("get_share_modes: %s\n", share_mode_str(i, entry_p) ));
-                               i++;
-                       } else {
-                               DEBUG(10,("get_share_modes: deleted %s\n", share_mode_str(i, entry_p) ));
-                               memcpy( &shares[i], &shares[i+1],
-                                       sizeof(share_mode_entry) * (num_share_modes - i - 1));
-                               num_share_modes--;
-                               del_count++;
-                       }
+               if (lck->share_modes == NULL) {
+                       smb_panic("talloc failed\n");
+               }
+       }
+
+       /* Get any delete token. */
+       if (data->u.s.delete_token_size) {
+               char *p = dbuf.dptr + sizeof(*data) +
+                               (lck->num_share_modes *
+                               sizeof(struct share_mode_entry));
+
+               if ((data->u.s.delete_token_size < sizeof(uid_t) + sizeof(gid_t)) ||
+                               ((data->u.s.delete_token_size - sizeof(uid_t)) % sizeof(gid_t)) != 0) {
+                       DEBUG(0, ("parse_share_modes: invalid token size %d\n",
+                               data->u.s.delete_token_size));
+                       smb_panic("parse_share_modes: invalid token size\n");
+               }
+
+               lck->delete_token = TALLOC_P(lck, UNIX_USER_TOKEN);
+               if (!lck->delete_token) {
+                       smb_panic("talloc failed\n");
                }
 
-               /* Did we delete any ? If so, re-store in tdb. */
-               if (del_count) {
-                       data->u.num_share_mode_entries = num_share_modes;
-                       
-                       if (num_share_modes)
-                               memcpy(dbuf.dptr + sizeof(*data), shares,
-                                               num_share_modes * sizeof(share_mode_entry));
-
-                       /* The record has shrunk a bit */
-                       dbuf.dsize -= del_count * sizeof(share_mode_entry);
-
-                       if (tdb_store(tdb, locking_key(dev, inode), dbuf, TDB_REPLACE) == -1) {
-                               SAFE_FREE(shares);
-                               SAFE_FREE(dbuf.dptr);
-                               return 0;
+               /* Copy out the uid and gid. */
+               memcpy(&lck->delete_token->uid, p, sizeof(uid_t));
+               p += sizeof(uid_t);
+               memcpy(&lck->delete_token->gid, p, sizeof(gid_t));
+               p += sizeof(gid_t);
+
+               /* Any supplementary groups ? */
+               lck->delete_token->ngroups = (data->u.s.delete_token_size > (sizeof(uid_t) + sizeof(gid_t))) ?
+                                       ((data->u.s.delete_token_size -
+                                               (sizeof(uid_t) + sizeof(gid_t)))/sizeof(gid_t)) : 0;
+
+               if (lck->delete_token->ngroups) {
+                       /* Make this a talloc child of lck->delete_token. */
+                       lck->delete_token->groups = TALLOC_ARRAY(lck->delete_token, gid_t,
+                                                       lck->delete_token->ngroups);
+                       if (!lck->delete_token) {
+                               smb_panic("talloc failed\n");
+                       }
+
+                       for (i = 0; i < lck->delete_token->ngroups; i++) {
+                               memcpy(&lck->delete_token->groups[i], p, sizeof(gid_t));
+                               p += sizeof(gid_t);
                        }
                }
+
+       } else {
+               lck->delete_token = NULL;
        }
 
-       SAFE_FREE(dbuf.dptr);
-       *pp_shares = shares;
-       return num_share_modes;
-}
+       /* Save off the associated service path and filename. */
+       lck->servicepath = talloc_strdup(lck, dbuf.dptr + sizeof(*data) +
+                                       (lck->num_share_modes *
+                                       sizeof(struct share_mode_entry)) +
+                                       data->u.s.delete_token_size );
 
-/*******************************************************************
- Fill a share mode entry.
-********************************************************************/
+       lck->filename = talloc_strdup(lck, dbuf.dptr + sizeof(*data) +
+                                       (lck->num_share_modes *
+                                       sizeof(struct share_mode_entry)) +
+                                       data->u.s.delete_token_size +
+                                       strlen(lck->servicepath) + 1 );
+
+       /*
+        * Ensure that each entry has a real process attached.
+        */
+
+       for (i = 0; i < lck->num_share_modes; i++) {
+               struct share_mode_entry *entry_p = &lck->share_modes[i];
+               DEBUG(10,("parse_share_modes: %s\n",
+                         share_mode_str(i, entry_p) ));
+               if (!process_exists(entry_p->pid)) {
+                       DEBUG(10,("parse_share_modes: deleted %s\n",
+                                 share_mode_str(i, entry_p) ));
+                       entry_p->op_type = UNUSED_SHARE_MODE_ENTRY;
+                       lck->modified = True;
+               }
+       }
+
+       return True;
+}
 
-static void fill_share_mode(char *p, files_struct *fsp, uint16 port, uint16 op_type)
+static TDB_DATA unparse_share_modes(struct share_mode_lock *lck)
 {
-       share_mode_entry *e = (share_mode_entry *)p;
-       void *x = &e->time; /* Needed to force alignment. p may not be aligned.... */
+       TDB_DATA result;
+       int num_valid = 0;
+       int i;
+       struct locking_data *data;
+       ssize_t offset;
+       ssize_t sp_len;
+       uint32 delete_token_size;
 
-       memset(e, '\0', sizeof(share_mode_entry));
-       e->pid = sys_getpid();
-       e->share_mode = fsp->share_mode;
-       e->desired_access = fsp->desired_access;
-       e->op_port = port;
-       e->op_type = op_type;
-       memcpy(x, &fsp->open_time, sizeof(struct timeval));
-       e->share_file_id = fsp->file_id;
-       e->dev = fsp->dev;
-       e->inode = fsp->inode;
+       result.dptr = NULL;
+       result.dsize = 0;
+
+       for (i=0; i<lck->num_share_modes; i++) {
+               if (!is_unused_share_mode_entry(&lck->share_modes[i])) {
+                       num_valid += 1;
+               }
+       }
+
+       if (num_valid == 0) {
+               return result;
+       }
+
+       sp_len = strlen(lck->servicepath);
+       delete_token_size = (lck->delete_token ?
+                       (sizeof(uid_t) + sizeof(gid_t) + (lck->delete_token->ngroups*sizeof(gid_t))) : 0);
+
+       result.dsize = sizeof(*data) +
+               lck->num_share_modes * sizeof(struct share_mode_entry) +
+               delete_token_size +
+               sp_len + 1 +
+               strlen(lck->filename) + 1;
+       result.dptr = talloc_size(lck, result.dsize);
+
+       if (result.dptr == NULL) {
+               smb_panic("talloc failed\n");
+       }
+
+       data = (struct locking_data *)result.dptr;
+       ZERO_STRUCTP(data);
+       data->u.s.num_share_mode_entries = lck->num_share_modes;
+       data->u.s.delete_on_close = lck->delete_on_close;
+       data->u.s.initial_delete_on_close = lck->initial_delete_on_close;
+       data->u.s.delete_token_size = delete_token_size;
+       DEBUG(10, ("unparse_share_modes: del: %d, initial del %d, tok = %u, num: %d\n",
+               data->u.s.delete_on_close,
+               data->u.s.initial_delete_on_close,
+               (unsigned int)data->u.s.delete_token_size,
+               data->u.s.num_share_mode_entries));
+       memcpy(result.dptr + sizeof(*data), lck->share_modes,
+              sizeof(struct share_mode_entry)*lck->num_share_modes);
+       offset = sizeof(*data) +
+               sizeof(struct share_mode_entry)*lck->num_share_modes;
+
+       /* Store any delete on close token. */
+       if (lck->delete_token) {
+               char *p = result.dptr + offset;
+
+               memcpy(p, &lck->delete_token->uid, sizeof(uid_t));
+               p += sizeof(uid_t);
+
+               memcpy(p, &lck->delete_token->gid, sizeof(gid_t));
+
+               for (i = 0; i < lck->delete_token->ngroups; i++) {
+                       memcpy(p, &lck->delete_token->groups[i], sizeof(gid_t));
+                       p += sizeof(gid_t);
+               }
+               offset = p - result.dptr;
+       }
+
+       safe_strcpy(result.dptr + offset, lck->servicepath,
+                   result.dsize - offset - 1);
+       offset += sp_len + 1;
+       safe_strcpy(result.dptr + offset, lck->filename,
+                   result.dsize - offset - 1);
+
+       if (DEBUGLEVEL >= 10) {
+               print_share_mode_table(data);
+       }
+
+       return result;
 }
 
-/*******************************************************************
- Check if two share mode entries are identical, ignoring oplock 
- and port info and desired_access.
-********************************************************************/
+static int share_mode_lock_destructor(void *p)
+{
+       struct share_mode_lock *lck =
+               talloc_get_type_abort(p, struct share_mode_lock);
+       TDB_DATA key = locking_key(lck->dev, lck->ino);
+       TDB_DATA data;
+
+       if (!lck->modified) {
+               goto done;
+       }
 
-BOOL share_modes_identical( share_mode_entry *e1, share_mode_entry *e2)
+       data = unparse_share_modes(lck);
+
+       if (data.dptr == NULL) {
+               if (!lck->fresh) {
+                       /* There has been an entry before, delete it */
+                       if (tdb_delete(tdb, key) == -1) {
+                               smb_panic("Could not delete share entry\n");
+                       }
+               }
+               goto done;
+       }
+
+       if (tdb_store(tdb, key, data, TDB_REPLACE) == -1) {
+               smb_panic("Could not store share mode entry\n");
+       }
+
+ done:
+       tdb_chainunlock(tdb, key);
+
+       return 0;
+}
+
+struct share_mode_lock *get_share_mode_lock(TALLOC_CTX *mem_ctx,
+                                               SMB_DEV_T dev, SMB_INO_T ino,
+                                               const char *servicepath,
+                                               const char *fname)
 {
-#if 1 /* JRA PARANOIA TEST - REMOVE LATER */
-       if (e1->pid == e2->pid &&
-               e1->share_file_id == e2->share_file_id &&
-               e1->dev == e2->dev &&
-               e1->inode == e2->inode &&
-               (e1->share_mode & ~DELETE_ON_CLOSE_FLAG) != (e2->share_mode & ~DELETE_ON_CLOSE_FLAG)) {
-                       DEBUG(0,("PANIC: share_modes_identical: share_mode missmatch (e1 = %u, e2 = %u). Logic error.\n",
-                               (unsigned int)(e1->share_mode & ~DELETE_ON_CLOSE_FLAG),
-                               (unsigned int)(e2->share_mode & ~DELETE_ON_CLOSE_FLAG) ));
-               smb_panic("PANIC: share_modes_identical logic error.\n");
+       struct share_mode_lock *lck;
+       TDB_DATA key = locking_key(dev, ino);
+       TDB_DATA data;
+
+       lck = TALLOC_P(mem_ctx, struct share_mode_lock);
+       if (lck == NULL) {
+               DEBUG(0, ("talloc failed\n"));
+               return NULL;
        }
-#endif
 
-       return (e1->pid == e2->pid &&
-               (e1->share_mode & ~DELETE_ON_CLOSE_FLAG) == (e2->share_mode & ~DELETE_ON_CLOSE_FLAG) &&
-               e1->dev == e2->dev &&
-               e1->inode == e2->inode &&
-               e1->share_file_id == e2->share_file_id );
+       /* Ensure we set every field here as the destructor must be
+          valid even if parse_share_modes fails. */
+
+       lck->servicepath = NULL;
+       lck->filename = NULL;
+       lck->dev = dev;
+       lck->ino = ino;
+       lck->num_share_modes = 0;
+       lck->share_modes = NULL;
+       lck->delete_token = NULL;
+       lck->delete_on_close = False;
+       lck->initial_delete_on_close = False;
+       lck->fresh = False;
+       lck->modified = False;
+
+       if (tdb_chainlock(tdb, key) != 0) {
+               DEBUG(3, ("Could not lock share entry\n"));
+               TALLOC_FREE(lck);
+               return NULL;
+       }
+
+       /* We must set the destructor immediately after the chainlock
+          ensure the lock is cleaned up on any of the error return
+          paths below. */
+
+       talloc_set_destructor(lck, share_mode_lock_destructor);
+
+       data = tdb_fetch(tdb, key);
+       lck->fresh = (data.dptr == NULL);
+
+       if (lck->fresh) {
+
+               if (fname == NULL || servicepath == NULL) {
+                       TALLOC_FREE(lck);
+                       return NULL;
+               }
+               lck->filename = talloc_strdup(lck, fname);
+               lck->servicepath = talloc_strdup(lck, servicepath);
+               if (lck->filename == NULL || lck->servicepath == NULL) {
+                       DEBUG(0, ("talloc failed\n"));
+                       TALLOC_FREE(lck);
+                       return NULL;
+               }
+       } else {
+               if (!parse_share_modes(data, lck)) {
+                       DEBUG(0, ("Could not parse share modes\n"));
+                       TALLOC_FREE(lck);
+                       SAFE_FREE(data.dptr);
+                       return NULL;
+               }
+       }
+
+       SAFE_FREE(data.dptr);
+
+       return lck;
 }
 
 /*******************************************************************
- Delete a specific share mode. Return the number
- of entries left, and a memdup'ed copy of the entry deleted (if required).
- Ignore if no entry deleted.
+ Sets the service name and filename for rename.
+ At this point we emit "file renamed" messages to all
+ process id's that have this file open.
+ Based on an initial code idea from SATOH Fumiyasu <fumiya@samba.gr.jp>
 ********************************************************************/
 
-ssize_t del_share_entry( SMB_DEV_T dev, SMB_INO_T inode,
-                       share_mode_entry *entry, share_mode_entry **ppse)
+BOOL rename_share_filename(struct share_mode_lock *lck,
+                       const char *servicepath,
+                       const char *newname)
 {
-       TDB_DATA dbuf;
-       struct locking_data *data;
-       int i, del_count=0;
-       share_mode_entry *shares;
-       ssize_t count = 0;
-
-       if (ppse)
-               *ppse = NULL;
+       size_t sp_len;
+       size_t fn_len;
+       size_t msg_len;
+       char *frm = NULL;
+       int i;
 
-       /* read in the existing share modes */
-       dbuf = tdb_fetch(tdb, locking_key(dev, inode));
-       if (!dbuf.dptr)
-               return -1;
+       if (!lck) {
+               return False;
+       }
 
-       data = (struct locking_data *)dbuf.dptr;
-       shares = (share_mode_entry *)(dbuf.dptr + sizeof(*data));
+       DEBUG(10, ("rename_share_filename: servicepath %s newname %s\n",
+               servicepath, newname));
 
        /*
-        * Find any with this pid and delete it
-        * by overwriting with the rest of the data 
-        * from the record.
+        * rename_internal_fsp() and rename_internals() add './' to
+        * head of newname if newname does not contain a '/'.
         */
+       while (newname[0] && newname[1] && newname[0] == '.' && newname[1] == '/') {
+               newname += 2;
+       }
 
-       DEBUG(10,("del_share_entry: num_share_modes = %d\n", data->u.num_share_mode_entries ));
+       lck->servicepath = talloc_strdup(lck, servicepath);
+       lck->filename = talloc_strdup(lck, newname);
+       if (lck->filename == NULL || lck->servicepath == NULL) {
+               DEBUG(0, ("rename_share_filename: talloc failed\n"));
+               return False;
+       }
+       lck->modified = True;
 
-       for (i=0;i<data->u.num_share_mode_entries;) {
-               if (share_modes_identical(&shares[i], entry)) {
-                       DEBUG(10,("del_share_entry: deleted %s\n",
-                               share_mode_str(i, &shares[i]) ));
-                       if (ppse)
-                               *ppse = memdup(&shares[i], sizeof(*shares));
-                       data->u.num_share_mode_entries--;
-                       memmove(&shares[i], &shares[i+1], 
-                               dbuf.dsize - (sizeof(*data) + (i+1)*sizeof(*shares)));
-                       del_count++;
+       sp_len = strlen(lck->servicepath);
+       fn_len = strlen(lck->filename);
 
-                       DEBUG(10,("del_share_entry: deleting entry %d\n", i ));
+       msg_len = MSG_FILE_RENAMED_MIN_SIZE + sp_len + 1 + fn_len + 1;
 
-               } else {
-                       i++;
-               }
+       /* Set up the name changed message. */
+       frm = TALLOC(lck, msg_len);
+       if (!frm) {
+               return False;
        }
 
-       if (del_count) {
-               /* the record may have shrunk a bit */
-               dbuf.dsize -= del_count * sizeof(*shares);
+       SDEV_T_VAL(frm,0,lck->dev);
+       SINO_T_VAL(frm,8,lck->ino);
 
-               count = (ssize_t)data->u.num_share_mode_entries;
+       DEBUG(10,("rename_share_filename: msg_len = %d\n", msg_len ));
 
-               /* store it back in the database */
-               if (data->u.num_share_mode_entries == 0) {
-                       if (tdb_delete(tdb, locking_key(dev, inode)) == -1)
-                               count = -1;
-               } else {
-                       if (tdb_store(tdb, locking_key(dev, inode), dbuf, TDB_REPLACE) == -1)
-                               count = -1;
+       safe_strcpy(&frm[16], lck->servicepath, sp_len);
+       safe_strcpy(&frm[16 + sp_len + 1], lck->filename, fn_len);
+
+       /* Send the messages. */
+       for (i=0; i<lck->num_share_modes; i++) {
+               struct share_mode_entry *se = &lck->share_modes[i];
+               if (!is_valid_share_mode_entry(se)) {
+                       continue;
                }
+               /* But not to ourselves... */
+               if (procid_is_me(&se->pid)) {
+                       continue;
+               }
+
+               DEBUG(10,("rename_share_filename: sending rename message to pid %u "
+                       "dev %x, inode  %.0f sharepath %s newname %s\n",
+                       (unsigned int)procid_to_pid(&se->pid),
+                       (unsigned int)lck->dev, (double)lck->ino,
+                       lck->servicepath, lck->filename ));
+
+               become_root();
+               message_send_pid(se->pid, MSG_SMB_FILE_RENAME,
+                               frm, msg_len, True);
+               unbecome_root();
        }
-       DEBUG(10,("del_share_entry: Remaining table.\n"));
-       print_share_mode_table((struct locking_data *)dbuf.dptr);
-       SAFE_FREE(dbuf.dptr);
-       return count;
+
+       return True;
 }
 
-/*******************************************************************
- Del the share mode of a file for this process. Return the number
- of entries left, and a memdup'ed copy of the entry deleted.
-********************************************************************/
+BOOL get_delete_on_close_flag(SMB_DEV_T dev, SMB_INO_T inode)
+{
+       BOOL result;
+       struct share_mode_lock *lck = get_share_mode_lock(NULL, dev, inode, NULL, NULL);
+       if (!lck) {
+               return False;
+       }
+       result = lck->delete_on_close;
+       TALLOC_FREE(lck);
+       return result;
+}
 
-ssize_t del_share_mode(files_struct *fsp, share_mode_entry **ppse)
+BOOL is_valid_share_mode_entry(const struct share_mode_entry *e)
 {
-       share_mode_entry entry;
+       int num_props = 0;
 
-       /*
-        * Fake up a share_mode_entry for comparisons.
-        */
+       num_props += ((e->op_type == NO_OPLOCK) ? 1 : 0);
+       num_props += (EXCLUSIVE_OPLOCK_TYPE(e->op_type) ? 1 : 0);
+       num_props += (LEVEL_II_OPLOCK_TYPE(e->op_type) ? 1 : 0);
+
+       SMB_ASSERT(num_props <= 1);
+       return (num_props != 0);
+}
 
-       fill_share_mode((char *)&entry, fsp, 0, 0);
-       return del_share_entry(fsp->dev, fsp->inode, &entry, ppse);
+BOOL is_deferred_open_entry(const struct share_mode_entry *e)
+{
+       return (e->op_type == DEFERRED_OPEN_ENTRY);
+}
+
+BOOL is_unused_share_mode_entry(const struct share_mode_entry *e)
+{
+       return (e->op_type == UNUSED_SHARE_MODE_ENTRY);
 }
 
 /*******************************************************************
Set the share mode of a file. Return False on fail, True on success.
Fill a share mode entry.
 ********************************************************************/
 
-BOOL set_share_mode(files_struct *fsp, uint16 port, uint16 op_type)
+static void fill_share_mode_entry(struct share_mode_entry *e,
+                                 files_struct *fsp,
+                                 uint16 mid, uint16 op_type)
 {
-       TDB_DATA dbuf;
-       struct locking_data *data;
-       char *p=NULL;
-       int size;
-       BOOL ret = True;
-               
-       /* read in the existing share modes if any */
-       dbuf = tdb_fetch(tdb, locking_key_fsp(fsp));
-       if (!dbuf.dptr) {
-               /* we'll need to create a new record */
-               pstring fname;
-
-               pstrcpy(fname, fsp->conn->connectpath);
-               pstrcat(fname, "/");
-               pstrcat(fname, fsp->fsp_name);
-
-               size = sizeof(*data) + sizeof(share_mode_entry) + strlen(fname) + 1;
-               p = (char *)malloc(size);
-               if (!p)
-                       return False;
-               data = (struct locking_data *)p;
-               data->u.num_share_mode_entries = 1;
-       
-               DEBUG(10,("set_share_mode: creating entry for file %s. num_share_modes = 1\n",
-                       fsp->fsp_name ));
-
-               pstrcpy(p + sizeof(*data) + sizeof(share_mode_entry), fname);
-               fill_share_mode(p + sizeof(*data), fsp, port, op_type);
-               dbuf.dptr = p;
-               dbuf.dsize = size;
-               if (tdb_store(tdb, locking_key_fsp(fsp), dbuf, TDB_REPLACE) == -1)
-                       ret = False;
+       ZERO_STRUCTP(e);
+       e->pid = procid_self();
+       e->share_access = fsp->share_access;
+       e->private_options = fsp->fh->private_options;
+       e->access_mask = fsp->access_mask;
+       e->op_mid = mid;
+       e->op_type = op_type;
+       e->time.tv_sec = fsp->open_time.tv_sec;
+       e->time.tv_usec = fsp->open_time.tv_usec;
+       e->share_file_id = fsp->file_id;
+       e->dev = fsp->dev;
+       e->inode = fsp->inode;
+}
 
-               print_share_mode_table((struct locking_data *)p);
+static void fill_deferred_open_entry(struct share_mode_entry *e,
+                                    const struct timeval request_time,
+                                    SMB_DEV_T dev, SMB_INO_T ino, uint16 mid)
+{
+       ZERO_STRUCTP(e);
+       e->pid = procid_self();
+       e->op_mid = mid;
+       e->op_type = DEFERRED_OPEN_ENTRY;
+       e->time.tv_sec = request_time.tv_sec;
+       e->time.tv_usec = request_time.tv_usec;
+       e->dev = dev;
+       e->inode = ino;
+}
 
-               SAFE_FREE(p);
-               return ret;
+static void add_share_mode_entry(struct share_mode_lock *lck,
+                                const struct share_mode_entry *entry)
+{
+       int i;
+
+       for (i=0; i<lck->num_share_modes; i++) {
+               struct share_mode_entry *e = &lck->share_modes[i];
+               if (is_unused_share_mode_entry(e)) {
+                       *e = *entry;
+                       break;
+               }
        }
 
-       /* we're adding to an existing entry - this is a bit fiddly */
-       data = (struct locking_data *)dbuf.dptr;
+       if (i == lck->num_share_modes) {
+               /* No unused entry found */
+               ADD_TO_ARRAY(lck, struct share_mode_entry, *entry,
+                            &lck->share_modes, &lck->num_share_modes);
+       }
+       lck->modified = True;
+}
 
-       data->u.num_share_mode_entries++;
-       
-       DEBUG(10,("set_share_mode: adding entry for file %s. new num_share_modes = %d\n",
-               fsp->fsp_name, data->u.num_share_mode_entries ));
+void set_share_mode(struct share_mode_lock *lck, files_struct *fsp,
+                   uint16 mid, uint16 op_type)
+{
+       struct share_mode_entry entry;
+       fill_share_mode_entry(&entry, fsp, mid, op_type);
+       add_share_mode_entry(lck, &entry);
+}
 
-       size = dbuf.dsize + sizeof(share_mode_entry);
-       p = malloc(size);
-       if (!p)
-               return False;
-       memcpy(p, dbuf.dptr, sizeof(*data));
-       fill_share_mode(p + sizeof(*data), fsp, port, op_type);
-       memcpy(p + sizeof(*data) + sizeof(share_mode_entry), dbuf.dptr + sizeof(*data),
-              dbuf.dsize - sizeof(*data));
-       SAFE_FREE(dbuf.dptr);
-       dbuf.dptr = p;
-       dbuf.dsize = size;
-       if (tdb_store(tdb, locking_key_fsp(fsp), dbuf, TDB_REPLACE) == -1)
-               ret = False;
-       print_share_mode_table((struct locking_data *)p);
-       SAFE_FREE(p);
-       return ret;
+void add_deferred_open(struct share_mode_lock *lck, uint16 mid,
+                      struct timeval request_time,
+                      SMB_DEV_T dev, SMB_INO_T ino)
+{
+       struct share_mode_entry entry;
+       fill_deferred_open_entry(&entry, request_time, dev, ino, mid);
+       add_share_mode_entry(lck, &entry);
 }
 
 /*******************************************************************
- A generic in-place modification call for share mode entries.
+ Check if two share mode entries are identical, ignoring oplock 
+ and mid info and desired_access.
 ********************************************************************/
 
-static BOOL mod_share_mode( SMB_DEV_T dev, SMB_INO_T inode, share_mode_entry *entry,
-                          void (*mod_fn)(share_mode_entry *, SMB_DEV_T, SMB_INO_T, void *),
-                          void *param)
+static BOOL share_modes_identical(struct share_mode_entry *e1,
+                                 struct share_mode_entry *e2)
 {
-       TDB_DATA dbuf;
-       struct locking_data *data;
-       int i;
-       share_mode_entry *shares;
-       BOOL need_store=False;
-       BOOL ret = True;
+#if 1 /* JRA PARANOIA TEST - REMOVE LATER */
+       if (procid_equal(&e1->pid, &e2->pid) &&
+           e1->share_file_id == e2->share_file_id &&
+           e1->dev == e2->dev &&
+           e1->inode == e2->inode &&
+           (e1->share_access) != (e2->share_access)) {
+               DEBUG(0,("PANIC: share_modes_identical: share_mode "
+                        "mismatch (e1 = 0x%x, e2 = 0x%x). Logic error.\n",
+                        (unsigned int)e1->share_access,
+                        (unsigned int)e2->share_access ));
+               smb_panic("PANIC: share_modes_identical logic error.\n");
+       }
+#endif
 
-       /* read in the existing share modes */
-       dbuf = tdb_fetch(tdb, locking_key(dev, inode));
-       if (!dbuf.dptr)
-               return False;
+       return (procid_equal(&e1->pid, &e2->pid) &&
+               (e1->share_access) == (e2->share_access) &&
+               e1->dev == e2->dev &&
+               e1->inode == e2->inode &&
+               e1->share_file_id == e2->share_file_id );
+}
 
-       data = (struct locking_data *)dbuf.dptr;
-       shares = (share_mode_entry *)(dbuf.dptr + sizeof(*data));
+static BOOL deferred_open_identical(struct share_mode_entry *e1,
+                                   struct share_mode_entry *e2)
+{
+       return (procid_equal(&e1->pid, &e2->pid) &&
+               (e1->op_mid == e2->op_mid) &&
+               (e1->dev == e2->dev) &&
+               (e1->inode == e2->inode));
+}
 
-       /* find any with our pid and call the supplied function */
-       for (i=0;i<data->u.num_share_mode_entries;i++) {
-               if (share_modes_identical(entry, &shares[i])) {
-                       mod_fn(&shares[i], dev, inode, param);
-                       need_store=True;
-               }
-       }
+static struct share_mode_entry *find_share_mode_entry(struct share_mode_lock *lck,
+                                                     struct share_mode_entry *entry)
+{
+       int i;
 
-       /* if the mod fn was called then store it back */
-       if (need_store) {
-               if (data->u.num_share_mode_entries == 0) {
-                       if (tdb_delete(tdb, locking_key(dev, inode)) == -1)
-                               ret = False;
-               } else {
-                       if (tdb_store(tdb, locking_key(dev, inode), dbuf, TDB_REPLACE) == -1)
-                               ret = False;
+       for (i=0; i<lck->num_share_modes; i++) {
+               struct share_mode_entry *e = &lck->share_modes[i];
+               if (is_valid_share_mode_entry(entry) &&
+                   is_valid_share_mode_entry(e) &&
+                   share_modes_identical(e, entry)) {
+                       return e;
+               }
+               if (is_deferred_open_entry(entry) &&
+                   is_deferred_open_entry(e) &&
+                   deferred_open_identical(e, entry)) {
+                       return e;
                }
        }
-
-       SAFE_FREE(dbuf.dptr);
-       return ret;
+       return NULL;
 }
 
 /*******************************************************************
- Static function that actually does the work for the generic function
below.
+ Del the share mode of a file for this process. Return the number of
entries left.
 ********************************************************************/
 
-static void remove_share_oplock_fn(share_mode_entry *entry, SMB_DEV_T dev, SMB_INO_T inode, 
-                                   void *param)
+BOOL del_share_mode(struct share_mode_lock *lck, files_struct *fsp)
 {
-       DEBUG(10,("remove_share_oplock_fn: removing oplock info for entry dev=%x ino=%.0f\n",
-                 (unsigned int)dev, (double)inode ));
-       /* Delete the oplock info. */
-       entry->op_port = 0;
-       entry->op_type = NO_OPLOCK;
-}
+       struct share_mode_entry entry, *e;
 
-/*******************************************************************
- Remove an oplock port and mode entry from a share mode.
-********************************************************************/
+       fill_share_mode_entry(&entry, fsp, 0, NO_OPLOCK);
+
+       e = find_share_mode_entry(lck, &entry);
+       if (e == NULL) {
+               return False;
+       }
+
+       e->op_type = UNUSED_SHARE_MODE_ENTRY;
+       lck->modified = True;
+       return True;
+}
 
-BOOL remove_share_oplock(files_struct *fsp)
+void del_deferred_open_entry(struct share_mode_lock *lck, uint16 mid)
 {
-       share_mode_entry entry;
-       /*
-        * Fake up an entry for comparisons...
-        */
-       fill_share_mode((char *)&entry, fsp, 0, 0);
-       return mod_share_mode(fsp->dev, fsp->inode, &entry, remove_share_oplock_fn, NULL);
+       struct share_mode_entry entry, *e;
+
+       fill_deferred_open_entry(&entry, timeval_zero(),
+                                lck->dev, lck->ino, mid);
+
+       e = find_share_mode_entry(lck, &entry);
+       if (e == NULL) {
+               return;
+       }
+
+       e->op_type = UNUSED_SHARE_MODE_ENTRY;
+       lck->modified = True;
 }
 
 /*******************************************************************
- Static function that actually does the work for the generic function
- below.
+ Remove an oplock mid and mode entry from a share mode.
 ********************************************************************/
 
-static void downgrade_share_oplock_fn(share_mode_entry *entry, SMB_DEV_T dev, SMB_INO_T inode, 
-                                   void *param)
+BOOL remove_share_oplock(struct share_mode_lock *lck, files_struct *fsp)
 {
-       DEBUG(10,("downgrade_share_oplock_fn: downgrading oplock info for entry dev=%x ino=%.0f\n",
-                 (unsigned int)dev, (double)inode ));
-       entry->op_type = LEVEL_II_OPLOCK;
+       struct share_mode_entry entry, *e;
+
+       fill_share_mode_entry(&entry, fsp, 0, NO_OPLOCK);
+
+       e = find_share_mode_entry(lck, &entry);
+       if (e == NULL) {
+               return False;
+       }
+
+       e->op_mid = 0;
+       e->op_type = NO_OPLOCK;
+       lck->modified = True;
+       return True;
 }
 
 /*******************************************************************
  Downgrade a oplock type from exclusive to level II.
 ********************************************************************/
 
-BOOL downgrade_share_oplock(files_struct *fsp)
+BOOL downgrade_share_oplock(struct share_mode_lock *lck, files_struct *fsp)
+{
+       struct share_mode_entry entry, *e;
+
+       fill_share_mode_entry(&entry, fsp, 0, NO_OPLOCK);
+
+       e = find_share_mode_entry(lck, &entry);
+       if (e == NULL) {
+               return False;
+       }
+
+       e->op_type = LEVEL_II_OPLOCK;
+       lck->modified = True;
+       return True;
+}
+
+/****************************************************************************
+ Deal with the internal needs of setting the delete on close flag. Note that
+ as the tdb locking is recursive, it is safe to call this from within 
+ open_file_shared. JRA.
+****************************************************************************/
+
+NTSTATUS can_set_delete_on_close(files_struct *fsp, BOOL delete_on_close,
+                                uint32 dosmode)
 {
-       share_mode_entry entry;
+       if (!delete_on_close) {
+               return NT_STATUS_OK;
+       }
+
        /*
-        * Fake up an entry for comparisons...
+        * Only allow delete on close for writable files.
         */
-       fill_share_mode((char *)&entry, fsp, 0, 0);
-       return mod_share_mode(fsp->dev, fsp->inode, &entry, downgrade_share_oplock_fn, NULL);
+
+       if ((dosmode & aRONLY) &&
+           !lp_delete_readonly(SNUM(fsp->conn))) {
+               DEBUG(10,("can_set_delete_on_close: file %s delete on close "
+                         "flag set but file attribute is readonly.\n",
+                         fsp->fsp_name ));
+               return NT_STATUS_CANNOT_DELETE;
+       }
+
+       /*
+        * Only allow delete on close for writable shares.
+        */
+
+       if (!CAN_WRITE(fsp->conn)) {
+               DEBUG(10,("can_set_delete_on_close: file %s delete on "
+                         "close flag set but write access denied on share.\n",
+                         fsp->fsp_name ));
+               return NT_STATUS_ACCESS_DENIED;
+       }
+
+       /*
+        * Only allow delete on close for files/directories opened with delete
+        * intent.
+        */
+
+       if (!(fsp->access_mask & DELETE_ACCESS)) {
+               DEBUG(10,("can_set_delete_on_close: file %s delete on "
+                         "close flag set but delete access denied.\n",
+                         fsp->fsp_name ));
+               return NT_STATUS_ACCESS_DENIED;
+       }
+
+       return NT_STATUS_OK;
 }
 
-/*******************************************************************
Get/Set the delete on close flag in a set of share modes.
Return False on fail, True on success.
-********************************************************************/
+/*************************************************************************
Return a talloced copy of a UNIX_USER_TOKEN. NULL on fail.
(Should this be in locking.c.... ?).
+*************************************************************************/
 
-BOOL modify_delete_flag( SMB_DEV_T dev, SMB_INO_T inode, BOOL delete_on_close)
+static UNIX_USER_TOKEN *copy_unix_token(TALLOC_CTX *ctx, UNIX_USER_TOKEN *tok)
 {
-       TDB_DATA dbuf;
-       struct locking_data *data;
-       int i;
-       share_mode_entry *shares;
-
-       /* read in the existing share modes */
-       dbuf = tdb_fetch(tdb, locking_key(dev, inode));
-       if (!dbuf.dptr)
-               return False;
+       UNIX_USER_TOKEN *cpy;
 
-       data = (struct locking_data *)dbuf.dptr;
-       shares = (share_mode_entry *)(dbuf.dptr + sizeof(*data));
+       if (tok == NULL) {
+               return NULL;
+       }
 
-       /* Set/Unset the delete on close element. */
-       for (i=0;i<data->u.num_share_mode_entries;i++,shares++) {
-               shares->share_mode = (delete_on_close ?
-                            (shares->share_mode | DELETE_ON_CLOSE_FLAG) :
-                            (shares->share_mode & ~DELETE_ON_CLOSE_FLAG) );
+       cpy = TALLOC_P(ctx, UNIX_USER_TOKEN);
+       if (!cpy) {
+               return NULL;
        }
 
-       /* store it back */
-       if (data->u.num_share_mode_entries) {
-               if (tdb_store(tdb, locking_key(dev,inode), dbuf, TDB_REPLACE)==-1) {
-                       SAFE_FREE(dbuf.dptr);
-                       return False;
+       cpy->uid = tok->uid;
+       cpy->gid = tok->gid;
+       cpy->ngroups = tok->ngroups;
+       if (tok->ngroups) {
+               /* Make this a talloc child of cpy. */
+               cpy->groups = TALLOC_ARRAY(cpy, gid_t, tok->ngroups);
+               if (!cpy->groups) {
+                       return NULL;
                }
+               memcpy(cpy->groups, tok->groups, tok->ngroups * sizeof(gid_t));
        }
+       return cpy;
+}
 
-       SAFE_FREE(dbuf.dptr);
-       return True;
+/****************************************************************************
+ Replace the delete on close token.
+****************************************************************************/
+
+void set_delete_on_close_token(struct share_mode_lock *lck, UNIX_USER_TOKEN *tok)
+{
+       /* Ensure there's no token. */
+       if (lck->delete_token) {
+               TALLOC_FREE(lck->delete_token); /* Also deletes groups... */
+               lck->delete_token = NULL;
+       }
+
+       /* Copy the new token (can be NULL). */
+       lck->delete_token = copy_unix_token(lck, tok);
+       lck->modified = True;
 }
 
 /****************************************************************************
- Traverse the whole database with this function, calling traverse_callback
- on each share mode
+ Sets the delete on close flag over all share modes on this file.
+ Modify the share mode entry for all files open
+ on this device and inode to tell other smbds we have
+ changed the delete on close flag. This will be noticed
+ in the close code, the last closer will delete the file
+ if flag is set.
+ Note that setting this to any value clears the initial_delete_on_close flag.
+ If delete_on_close is True this makes a copy of any UNIX_USER_TOKEN into the
+ lck entry.
 ****************************************************************************/
 
+BOOL set_delete_on_close(files_struct *fsp, BOOL delete_on_close, UNIX_USER_TOKEN *tok)
+{
+       struct share_mode_lock *lck;
+       
+       DEBUG(10,("set_delete_on_close: %s delete on close flag for "
+                 "fnum = %d, file %s\n",
+                 delete_on_close ? "Adding" : "Removing", fsp->fnum,
+                 fsp->fsp_name ));
+
+       if (fsp->is_stat) {
+               return True;
+       }
+
+       lck = get_share_mode_lock(NULL, fsp->dev, fsp->inode, NULL, NULL);
+       if (lck == NULL) {
+               return False;
+       }
+
+       if (lck->delete_on_close != delete_on_close) {
+               set_delete_on_close_token(lck, tok);
+               lck->delete_on_close = delete_on_close;
+               if (delete_on_close) {
+                       SMB_ASSERT(lck->delete_token != NULL);
+               }
+               lck->modified = True;
+       }
+
+       if (lck->initial_delete_on_close) {
+               lck->initial_delete_on_close = False;
+               lck->modified = True;
+       }
+
+       TALLOC_FREE(lck);
+       return True;
+}
+
 static int traverse_fn(TDB_CONTEXT *the_tdb, TDB_DATA kbuf, TDB_DATA dbuf, 
-                       voidstate)
+                       void *state)
 {
        struct locking_data *data;
-       share_mode_entry *shares;
-       char *name;
+       struct share_mode_entry *shares;
+       const char *sharepath;
+       const char *fname;
        int i;
+       void (*traverse_callback)(struct share_mode_entry *, const char *, const char *) = state;
 
-       SHAREMODE_FN(traverse_callback) = (SHAREMODE_FN_CAST())state;
+       /* Ensure this is a locking_key record. */
+       if (kbuf.dsize != sizeof(struct locking_key))
+               return 0;
 
        data = (struct locking_data *)dbuf.dptr;
-       shares = (share_mode_entry *)(dbuf.dptr + sizeof(*data));
-       name = dbuf.dptr + sizeof(*data) + data->u.num_share_mode_entries*sizeof(*shares);
-
-       for (i=0;i<data->u.num_share_mode_entries;i++) {
-               traverse_callback(&shares[i], name);
+       shares = (struct share_mode_entry *)(dbuf.dptr + sizeof(*data));
+       sharepath = dbuf.dptr + sizeof(*data) +
+               data->u.s.num_share_mode_entries*sizeof(*shares) +
+               data->u.s.delete_token_size;
+       fname = dbuf.dptr + sizeof(*data) +
+               data->u.s.num_share_mode_entries*sizeof(*shares) +
+               data->u.s.delete_token_size +
+               strlen(sharepath) + 1;
+
+       for (i=0;i<data->u.s.num_share_mode_entries;i++) {
+               traverse_callback(&shares[i], sharepath, fname);
        }
        return 0;
 }
@@ -839,9 +1304,9 @@ static int traverse_fn(TDB_CONTEXT *the_tdb, TDB_DATA kbuf, TDB_DATA dbuf,
  share mode system.
 ********************************************************************/
 
-int share_mode_forall(SHAREMODE_FN(fn))
+int share_mode_forall(void (*fn)(const struct share_mode_entry *, const char *, const char *))
 {
-       if (!tdb)
+       if (tdb == NULL)
                return 0;
-       return tdb_traverse(tdb, traverse_fn, (void*)fn);
+       return tdb_traverse(tdb, traverse_fn, fn);
 }