r22846: Chunk one to replace message_send_pid with messaging_send: Deep inside
[sfrench/samba-autobuild/.git] / source / locking / locking.c
index 1b40187ac4621c6ea7eb89f32c0bf7e42958785e..28a7fab97d3fd2c0a30fc783adba92df404b77c2 100644 (file)
@@ -2,7 +2,8 @@
    Unix SMB/CIFS implementation.
    Locking functions
    Copyright (C) Andrew Tridgell 1992-2000
-   Copyright (C) Jeremy Allison 1992-2000
+   Copyright (C) Jeremy Allison 1992-2006
+   Copyright (C) Volker Lendecke 2005
    
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    rewrtten completely to use new tdb code. Tridge, Dec '99
 
    Added POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
+   Added Unix Extensions POSIX locking support. Jeremy Allison Mar 2006.
 */
 
 #include "includes.h"
-uint16 global_smbpid;
+
+#undef DBGC_CLASS
+#define DBGC_CLASS DBGC_LOCKING
 
 /* the locking database handle */
 static TDB_CONTEXT *tdb;
 
 /****************************************************************************
- Debugging aid :-).
+ Debugging aids :-).
 ****************************************************************************/
 
-static const char *lock_type_name(enum brl_type lock_type)
+const char *lock_type_name(enum brl_type lock_type)
 {
-       return (lock_type == READ_LOCK) ? "READ" : "WRITE";
+       switch (lock_type) {
+               case READ_LOCK:
+                       return "READ";
+               case WRITE_LOCK:
+                       return "WRITE";
+               case PENDING_READ_LOCK:
+                       return "PENDING_READ";
+               case PENDING_WRITE_LOCK:
+                       return "PENDING_WRITE";
+               default:
+                       return "other";
+       }
+}
+
+const char *lock_flav_name(enum brl_flavour lock_flav)
+{
+       return (lock_flav == WINDOWS_LOCK) ? "WINDOWS_LOCK" : "POSIX_LOCK";
 }
 
 /****************************************************************************
  Utility function called to see if a file region is locked.
- If check_self is True, then checks on our own fd with the same locking context
- are still made. If check_self is False, then checks are not made on our own fd
- with the same locking context are not made.
+ Called in the read/write codepath.
 ****************************************************************************/
 
-BOOL is_locked(files_struct *fsp,connection_struct *conn,
-              SMB_BIG_UINT count,SMB_BIG_UINT offset, 
-              enum brl_type lock_type, BOOL check_self)
+BOOL is_locked(files_struct *fsp,
+               uint32 smbpid,
+               SMB_BIG_UINT count,
+               SMB_BIG_UINT offset, 
+               enum brl_type lock_type)
 {
-       int snum = SNUM(conn);
-       BOOL ret;
+       int strict_locking = lp_strict_locking(fsp->conn->params);
+       enum brl_flavour lock_flav = lp_posix_cifsu_locktype(fsp);
+       BOOL ret = True;
        
-       if (count == 0)
-               return(False);
+       if (count == 0) {
+               return False;
+       }
 
-       if (!lp_locking(snum) || !lp_strict_locking(snum))
-               return(False);
+       if (!lp_locking(fsp->conn->params) || !strict_locking) {
+               return False;
+       }
 
-       ret = !brl_locktest(fsp->dev, fsp->inode, fsp->fnum,
-                            global_smbpid, sys_getpid(), conn->cnum, 
-                            offset, count, lock_type, check_self);
+       if (strict_locking == Auto) {
+               if  (EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type) && (lock_type == READ_LOCK || lock_type == WRITE_LOCK)) {
+                       DEBUG(10,("is_locked: optimisation - exclusive oplock on file %s\n", fsp->fsp_name ));
+                       ret = False;
+               } else if ((fsp->oplock_type == LEVEL_II_OPLOCK) &&
+                          (lock_type == READ_LOCK)) {
+                       DEBUG(10,("is_locked: optimisation - level II oplock on file %s\n", fsp->fsp_name ));
+                       ret = False;
+               } else {
+                       struct byte_range_lock *br_lck = brl_get_locks_readonly(NULL, fsp);
+                       if (!br_lck) {
+                               return False;
+                       }
+                       ret = !brl_locktest(br_lck,
+                                       smbpid,
+                                       procid_self(),
+                                       offset,
+                                       count,
+                                       lock_type,
+                                       lock_flav);
+                       TALLOC_FREE(br_lck);
+               }
+       } else {
+               struct byte_range_lock *br_lck = brl_get_locks_readonly(NULL, fsp);
+               if (!br_lck) {
+                       return False;
+               }
+               ret = !brl_locktest(br_lck,
+                               smbpid,
+                               procid_self(),
+                               offset,
+                               count,
+                               lock_type,
+                               lock_flav);
+               TALLOC_FREE(br_lck);
+       }
 
-       DEBUG(10,("is_locked: brl start=%.0f len=%.0f %s for file %s\n",
+       DEBUG(10,("is_locked: flavour = %s brl start=%.0f len=%.0f %s for fnum %d file %s\n",
+                       lock_flav_name(lock_flav),
                        (double)offset, (double)count, ret ? "locked" : "unlocked",
-                       fsp->fsp_name ));
-
-       /*
-        * There is no lock held by an SMB daemon, check to
-        * see if there is a POSIX lock from a UNIX or NFS process.
-        */
-
-       if(!ret && lp_posix_locking(snum)) {
-               ret = is_posix_locked(fsp, offset, count, lock_type);
-
-               DEBUG(10,("is_locked: posix start=%.0f len=%.0f %s for file %s\n",
-                               (double)offset, (double)count, ret ? "locked" : "unlocked",
-                               fsp->fsp_name ));
-       }
+                       fsp->fnum, fsp->fsp_name ));
 
        return ret;
 }
 
 /****************************************************************************
Utility function called by locking requests.
Find out if a lock could be granted - return who is blocking us if we can't.
 ****************************************************************************/
 
-static NTSTATUS do_lock(files_struct *fsp,connection_struct *conn, uint16 lock_pid,
-                SMB_BIG_UINT count,SMB_BIG_UINT offset,enum brl_type lock_type)
+NTSTATUS query_lock(files_struct *fsp,
+                       uint32 *psmbpid,
+                       SMB_BIG_UINT *pcount,
+                       SMB_BIG_UINT *poffset,
+                       enum brl_type *plock_type,
+                       enum brl_flavour lock_flav)
 {
+       struct byte_range_lock *br_lck = NULL;
        NTSTATUS status = NT_STATUS_LOCK_NOT_GRANTED;
 
-       if (!lp_locking(SNUM(conn)))
-               return NT_STATUS_OK;
+       if (!fsp->can_lock) {
+               return fsp->is_directory ? NT_STATUS_INVALID_DEVICE_REQUEST : NT_STATUS_INVALID_HANDLE;
+       }
 
-       /* NOTE! 0 byte long ranges ARE allowed and should be stored  */
+       if (!lp_locking(fsp->conn->params)) {
+               return NT_STATUS_OK;
+       }
 
-       DEBUG(10,("do_lock: lock type %s start=%.0f len=%.0f requested for file %s\n",
-                 lock_type_name(lock_type), (double)offset, (double)count, fsp->fsp_name ));
-
-       if (OPEN_FSP(fsp) && fsp->can_lock && (fsp->conn == conn)) {
-               status = brl_lock(fsp->dev, fsp->inode, fsp->fnum,
-                                 lock_pid, sys_getpid(), conn->cnum, 
-                                 offset, count, 
-                                 lock_type);
-
-               if (NT_STATUS_IS_OK(status) && lp_posix_locking(SNUM(conn))) {
-
-                       /*
-                        * Try and get a POSIX lock on this range.
-                        * Note that this is ok if it is a read lock
-                        * overlapping on a different fd. JRA.
-                        */
-
-                       if (!set_posix_lock(fsp, offset, count, lock_type)) {
-                               status = NT_STATUS_LOCK_NOT_GRANTED;
-                               /*
-                                * We failed to map - we must now remove the brl
-                                * lock entry.
-                                */
-                               (void)brl_unlock(fsp->dev, fsp->inode, fsp->fnum,
-                                                               lock_pid, sys_getpid(), conn->cnum, 
-                                                               offset, count, False,
-                                                               NULL, NULL);
-                       }
-               }
+       br_lck = brl_get_locks_readonly(NULL, fsp);
+       if (!br_lck) {
+               return NT_STATUS_NO_MEMORY;
        }
 
+       status = brl_lockquery(br_lck,
+                       psmbpid,
+                       procid_self(),
+                       poffset,
+                       pcount,
+                       plock_type,
+                       lock_flav);
+
+       TALLOC_FREE(br_lck);
        return status;
 }
 
 /****************************************************************************
- Utility function called by locking requests. This is *DISGUSTING*. It also
- appears to be "What Windows Does" (tm). Andrew, ever wonder why Windows 2000
- is so slow on the locking tests...... ? This is the reason. Much though I hate
- it, we need this. JRA.
+ Utility function called by locking requests.
 ****************************************************************************/
 
-NTSTATUS do_lock_spin(files_struct *fsp,connection_struct *conn, uint16 lock_pid,
-                SMB_BIG_UINT count,SMB_BIG_UINT offset,enum brl_type lock_type)
+struct byte_range_lock *do_lock(struct messaging_context *msg_ctx,
+                       files_struct *fsp,
+                       uint32 lock_pid,
+                       SMB_BIG_UINT count,
+                       SMB_BIG_UINT offset,
+                       enum brl_type lock_type,
+                       enum brl_flavour lock_flav,
+                       BOOL blocking_lock,
+                       NTSTATUS *perr)
 {
-       int j, maxj = lp_lock_spin_count();
-       int sleeptime = lp_lock_sleep_time();
-       NTSTATUS status, ret;
+       struct byte_range_lock *br_lck = NULL;
 
-       if (maxj <= 0)
-               maxj = 1;
+       if (!fsp->can_lock) {
+               *perr = fsp->is_directory ? NT_STATUS_INVALID_DEVICE_REQUEST : NT_STATUS_INVALID_HANDLE;
+               return NULL;
+       }
 
-       ret = NT_STATUS_OK; /* to keep dumb compilers happy */
+       if (!lp_locking(fsp->conn->params)) {
+               *perr = NT_STATUS_OK;
+               return NULL;
+       }
 
-       for (j = 0; j < maxj; j++) {
-               status = do_lock(fsp, conn, lock_pid, count, offset, lock_type);
-               if (!NT_STATUS_EQUAL(status, NT_STATUS_LOCK_NOT_GRANTED) &&
-                   !NT_STATUS_EQUAL(status, NT_STATUS_FILE_LOCK_CONFLICT)) {
-                       return status;
-               }
-               /* if we do fail then return the first error code we got */
-               if (j == 0) {
-                       ret = status;
-               }
-               if (sleeptime)
-                       sys_usleep(sleeptime);
+       /* NOTE! 0 byte long ranges ARE allowed and should be stored  */
+
+       DEBUG(10,("do_lock: lock flavour %s lock type %s start=%.0f len=%.0f requested for fnum %d file %s\n",
+               lock_flav_name(lock_flav), lock_type_name(lock_type),
+               (double)offset, (double)count, fsp->fnum, fsp->fsp_name ));
+
+       br_lck = brl_get_locks(NULL, fsp);
+       if (!br_lck) {
+               *perr = NT_STATUS_NO_MEMORY;
+               return NULL;
        }
-       return ret;
-}
 
-/* Struct passed to brl_unlock. */
-struct posix_unlock_data_struct {
-       files_struct *fsp;
-       SMB_BIG_UINT offset;
-       SMB_BIG_UINT count;
-};
+       *perr = brl_lock(msg_ctx,
+                       br_lck,
+                       lock_pid,
+                       procid_self(),
+                       offset,
+                       count, 
+                       lock_type,
+                       lock_flav,
+                       blocking_lock);
 
-/****************************************************************************
- Function passed to brl_unlock to allow POSIX unlock to be done first.
-****************************************************************************/
+       /* blocking ie. pending, locks also count here,
+        * as this is an efficiency counter to avoid checking
+        * the lock db. on close. JRA. */
 
-static void posix_unlock(void *pre_data)
-{
-       struct posix_unlock_data_struct *pdata = (struct posix_unlock_data_struct *)pre_data;
+       fsp->current_lock_count++;
 
-       if (lp_posix_locking(SNUM(pdata->fsp->conn)))
-               release_posix_lock(pdata->fsp, pdata->offset, pdata->count);
+       return br_lck;
 }
 
 /****************************************************************************
  Utility function called by unlocking requests.
 ****************************************************************************/
 
-NTSTATUS do_unlock(files_struct *fsp,connection_struct *conn, uint16 lock_pid,
-                  SMB_BIG_UINT count,SMB_BIG_UINT offset)
+NTSTATUS do_unlock(struct messaging_context *msg_ctx,
+                       files_struct *fsp,
+                       uint32 lock_pid,
+                       SMB_BIG_UINT count,
+                       SMB_BIG_UINT offset,
+                       enum brl_flavour lock_flav)
 {
        BOOL ok = False;
-       struct posix_unlock_data_struct posix_data;
+       struct byte_range_lock *br_lck = NULL;
        
-       if (!lp_locking(SNUM(conn)))
-               return NT_STATUS_OK;
+       if (!fsp->can_lock) {
+               return fsp->is_directory ? NT_STATUS_INVALID_DEVICE_REQUEST : NT_STATUS_INVALID_HANDLE;
+       }
        
-       if (!OPEN_FSP(fsp) || !fsp->can_lock || (fsp->conn != conn)) {
-               return NT_STATUS_INVALID_HANDLE;
+       if (!lp_locking(fsp->conn->params)) {
+               return NT_STATUS_OK;
        }
        
-       DEBUG(10,("do_unlock: unlock start=%.0f len=%.0f requested for file %s\n",
-                 (double)offset, (double)count, fsp->fsp_name ));
+       DEBUG(10,("do_unlock: unlock start=%.0f len=%.0f requested for fnum %d file %s\n",
+                 (double)offset, (double)count, fsp->fnum, fsp->fsp_name ));
 
-       /*
-        * Remove the existing lock record from the tdb lockdb
-        * before looking at POSIX locks. If this record doesn't
-        * match then don't bother looking to remove POSIX locks.
-        */
-
-       posix_data.fsp = fsp;
-       posix_data.offset = offset;
-       posix_data.count = count;
+       br_lck = brl_get_locks(NULL, fsp);
+       if (!br_lck) {
+               return NT_STATUS_NO_MEMORY;
+       }
 
-       ok = brl_unlock(fsp->dev, fsp->inode, fsp->fnum,
-                       lock_pid, sys_getpid(), conn->cnum, offset, count,
-                       False, posix_unlock, (void *)&posix_data);
+       ok = brl_unlock(msg_ctx,
+                       br_lck,
+                       lock_pid,
+                       procid_self(),
+                       offset,
+                       count,
+                       lock_flav);
    
+       TALLOC_FREE(br_lck);
+
        if (!ok) {
                DEBUG(10,("do_unlock: returning ERRlock.\n" ));
                return NT_STATUS_RANGE_NOT_LOCKED;
        }
+
+       SMB_ASSERT(fsp->current_lock_count > 0);
+       fsp->current_lock_count--;
+
+       return NT_STATUS_OK;
+}
+
+/****************************************************************************
+ Cancel any pending blocked locks.
+****************************************************************************/
+
+NTSTATUS do_lock_cancel(files_struct *fsp,
+                       uint32 lock_pid,
+                       SMB_BIG_UINT count,
+                       SMB_BIG_UINT offset,
+                       enum brl_flavour lock_flav)
+{
+       BOOL ok = False;
+       struct byte_range_lock *br_lck = NULL;
+       
+       if (!fsp->can_lock) {
+               return fsp->is_directory ?
+                       NT_STATUS_INVALID_DEVICE_REQUEST : NT_STATUS_INVALID_HANDLE;
+       }
+       
+       if (!lp_locking(fsp->conn->params)) {
+               return NT_STATUS_DOS(ERRDOS, ERRcancelviolation);
+       }
+
+       DEBUG(10,("do_lock_cancel: cancel start=%.0f len=%.0f requested for fnum %d file %s\n",
+                 (double)offset, (double)count, fsp->fnum, fsp->fsp_name ));
+
+       br_lck = brl_get_locks(NULL, fsp);
+       if (!br_lck) {
+               return NT_STATUS_NO_MEMORY;
+       }
+
+       ok = brl_lock_cancel(br_lck,
+                       lock_pid,
+                       procid_self(),
+                       offset,
+                       count,
+                       lock_flav);
+   
+       TALLOC_FREE(br_lck);
+
+       if (!ok) {
+               DEBUG(10,("do_lock_cancel: returning ERRcancelviolation.\n" ));
+               return NT_STATUS_DOS(ERRDOS, ERRcancelviolation);
+       }
+
+       SMB_ASSERT(fsp->current_lock_count > 0);
+       fsp->current_lock_count--;
+
        return NT_STATUS_OK;
 }
 
@@ -240,26 +338,29 @@ NTSTATUS do_unlock(files_struct *fsp,connection_struct *conn, uint16 lock_pid,
  Remove any locks on this fd. Called from file_close().
 ****************************************************************************/
 
-void locking_close_file(files_struct *fsp)
+void locking_close_file(struct messaging_context *msg_ctx,
+                       files_struct *fsp)
 {
-       pid_t pid = sys_getpid();
+       struct byte_range_lock *br_lck;
 
-       if (!lp_locking(SNUM(fsp->conn)))
+       if (!lp_locking(fsp->conn->params)) {
                return;
+       }
 
-       /*
-        * Just release all the brl locks, no need to release individually.
+       /* If we have not outstanding locks or pending
+        * locks then we don't need to look in the lock db.
         */
 
-       brl_close(fsp->dev, fsp->inode, pid, fsp->conn->cnum, fsp->fnum);
-
-       if(lp_posix_locking(SNUM(fsp->conn))) {
+       if (fsp->current_lock_count == 0) {
+               return;
+       }
 
-               /* 
-                * Release all the POSIX locks.
-                */
-               posix_locking_close_file(fsp);
+       br_lck = brl_get_locks(NULL,fsp);
 
+       if (br_lck) {
+               cancel_pending_lock_requests_by_fid(fsp, br_lck);
+               brl_close_fnum(msg_ctx, br_lck);
+               TALLOC_FREE(br_lck);
        }
 }
 
@@ -277,15 +378,19 @@ BOOL locking_init(int read_only)
                return True;
 
        tdb = tdb_open_log(lock_path("locking.tdb"), 
-                      0, TDB_DEFAULT|(read_only?0x0:TDB_CLEAR_IF_FIRST), 
-                      read_only?O_RDONLY:O_RDWR|O_CREAT,
-                      0644);
+                       lp_open_files_db_hash_size(),
+                       TDB_DEFAULT|(read_only?0x0:TDB_CLEAR_IF_FIRST), 
+                       read_only?O_RDONLY:O_RDWR|O_CREAT,
+                       0644);
 
        if (!tdb) {
                DEBUG(0,("ERROR: Failed to initialise locking database\n"));
                return False;
        }
-       
+
+       /* Activate the per-hashchain freelist */
+       tdb_set_max_dead(tdb, 5);
+
        if (!posix_locking_init(read_only))
                return False;
 
@@ -300,21 +405,31 @@ BOOL locking_init(int read_only)
 
 BOOL locking_end(void)
 {
+       BOOL ret = True;
 
        brl_shutdown(open_read_only);
        if (tdb) {
-
                if (tdb_close(tdb) != 0)
-                       return False;
+                       ret = False;
        }
 
-       return True;
+       return ret;
 }
 
 /*******************************************************************
  Form a static locking key for a dev/inode pair.
 ******************************************************************/
 
+/* key and data records in the tdb locking database */
+struct locking_key {
+       SMB_DEV_T dev;
+       SMB_INO_T ino;
+};
+
+/*******************************************************************
+ Form a static locking key for a dev/inode pair.
+******************************************************************/
+
 static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
 {
        static struct locking_key key;
@@ -322,67 +437,31 @@ static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
 
        memset(&key, '\0', sizeof(key));
        key.dev = dev;
-       key.inode = inode;
-       kbuf.dptr = (char *)&key;
+       key.ino = inode;
+       kbuf.dptr = (uint8 *)&key;
        kbuf.dsize = sizeof(key);
        return kbuf;
 }
 
-static TDB_DATA locking_key_fsp(files_struct *fsp)
-{
-       return locking_key(fsp->dev, fsp->inode);
-}
-
-/*******************************************************************
- Lock a hash bucket entry.
-******************************************************************/
-
-BOOL lock_share_entry(connection_struct *conn,
-                     SMB_DEV_T dev, SMB_INO_T inode)
-{
-       return tdb_chainlock(tdb, locking_key(dev, inode)) == 0;
-}
-
-/*******************************************************************
- Unlock a hash bucket entry.
-******************************************************************/
-
-void unlock_share_entry(connection_struct *conn,
-                       SMB_DEV_T dev, SMB_INO_T inode)
-{
-       tdb_chainunlock(tdb, locking_key(dev, inode));
-}
-
-/*******************************************************************
- Lock a hash bucket entry. use a fsp for convenience
-******************************************************************/
-
-BOOL lock_share_entry_fsp(files_struct *fsp)
-{
-       return tdb_chainlock(tdb, locking_key(fsp->dev, fsp->inode)) == 0;
-}
-
-/*******************************************************************
- Unlock a hash bucket entry.
-******************************************************************/
-
-void unlock_share_entry_fsp(files_struct *fsp)
-{
-       tdb_chainunlock(tdb, locking_key(fsp->dev, fsp->inode));
-}
-
 /*******************************************************************
  Print out a share mode.
 ********************************************************************/
 
-static char *share_mode_str(int num, share_mode_entry *e)
+char *share_mode_str(int num, struct share_mode_entry *e)
 {
        static pstring share_str;
 
-       slprintf(share_str, sizeof(share_str)-1, "share_mode_entry[%d]: \
-pid = %u, share_mode = 0x%x, desired_access = 0x%x, port = 0x%x, type= 0x%x, file_id = %lu, dev = 0x%x, inode = %.0f",
-       num, e->pid, e->share_mode, (unsigned int)e->desired_access, e->op_port, e->op_type, e->share_file_id,
-       (unsigned int)e->dev, (double)e->inode );
+       slprintf(share_str, sizeof(share_str)-1, "share_mode_entry[%d]: %s "
+                "pid = %s, share_access = 0x%x, private_options = 0x%x, "
+                "access_mask = 0x%x, mid = 0x%x, type= 0x%x, file_id = %lu, "
+                "uid = %u, flags = %u, dev = 0x%x, inode = %.0f",
+                num,
+                e->op_type == UNUSED_SHARE_MODE_ENTRY ? "UNUSED" : "",
+                procid_str_static(&e->pid),
+                e->share_access, e->private_options,
+                e->access_mask, e->op_mid, e->op_type, e->share_file_id,
+                (unsigned int)e->uid, (unsigned int)e->flags,
+                (unsigned int)e->dev, (double)e->inode );
 
        return share_str;
 }
@@ -393,13 +472,17 @@ pid = %u, share_mode = 0x%x, desired_access = 0x%x, port = 0x%x, type= 0x%x, fil
 
 static void print_share_mode_table(struct locking_data *data)
 {
-       int num_share_modes = data->u.num_share_mode_entries;
-       share_mode_entry *shares = (share_mode_entry *)(data + 1);
+       int num_share_modes = data->u.s.num_share_mode_entries;
+       struct share_mode_entry *shares =
+               (struct share_mode_entry *)(data + 1);
        int i;
 
        for (i = 0; i < num_share_modes; i++) {
-               share_mode_entry *entry_p = &shares[i];
-               DEBUG(10,("print_share_mode_table: %s\n", share_mode_str(i, entry_p) ));
+               struct share_mode_entry entry;
+
+               memcpy(&entry, &shares[i], sizeof(struct share_mode_entry));
+               DEBUG(10,("print_share_mode_table: %s\n",
+                         share_mode_str(i, &entry)));
        }
 }
 
@@ -407,452 +490,853 @@ static void print_share_mode_table(struct locking_data *data)
  Get all share mode entries for a dev/inode pair.
 ********************************************************************/
 
-int get_share_modes(connection_struct *conn, 
-                   SMB_DEV_T dev, SMB_INO_T inode, 
-                   share_mode_entry **pp_shares)
+static BOOL parse_share_modes(TDB_DATA dbuf, struct share_mode_lock *lck)
 {
-       TDB_DATA dbuf;
        struct locking_data *data;
-       int num_share_modes;
-       share_mode_entry *shares = NULL;
-
-       *pp_shares = NULL;
+       int i;
 
-       dbuf = tdb_fetch(tdb, locking_key(dev, inode));
-       if (!dbuf.dptr)
-               return 0;
+       if (dbuf.dsize < sizeof(struct locking_data)) {
+               smb_panic("PANIC: parse_share_modes: buffer too short.\n");
+       }
 
        data = (struct locking_data *)dbuf.dptr;
-       num_share_modes = data->u.num_share_mode_entries;
-       if(num_share_modes) {
-               int i;
-               int del_count = 0;
 
-               shares = (share_mode_entry *)memdup(dbuf.dptr + sizeof(*data),  
-                                               num_share_modes * sizeof(share_mode_entry));
+       lck->delete_on_close = data->u.s.delete_on_close;
+       lck->num_share_modes = data->u.s.num_share_mode_entries;
+
+       DEBUG(10, ("parse_share_modes: delete_on_close: %d, "
+                  "num_share_modes: %d\n",
+               lck->delete_on_close,
+               lck->num_share_modes));
 
-               if (!shares) {
-                       SAFE_FREE(dbuf.dptr);
-                       return 0;
+       if ((lck->num_share_modes < 0) || (lck->num_share_modes > 1000000)) {
+               DEBUG(0, ("invalid number of share modes: %d\n",
+                         lck->num_share_modes));
+               smb_panic("PANIC: invalid number of share modes");
+       }
+
+       lck->share_modes = NULL;
+       
+       if (lck->num_share_modes != 0) {
+
+               if (dbuf.dsize < (sizeof(struct locking_data) +
+                                 (lck->num_share_modes *
+                                  sizeof(struct share_mode_entry)))) {
+                       smb_panic("PANIC: parse_share_modes: buffer too short.\n");
+               }
+                                 
+               lck->share_modes = (struct share_mode_entry *)
+                       TALLOC_MEMDUP(lck, dbuf.dptr+sizeof(*data),
+                                     lck->num_share_modes *
+                                     sizeof(struct share_mode_entry));
+
+               if (lck->share_modes == NULL) {
+                       smb_panic("talloc failed\n");
                }
+       }
 
-               /*
-                * Ensure that each entry has a real process attached.
-                */
-
-               for (i = 0; i < num_share_modes; ) {
-                       share_mode_entry *entry_p = &shares[i];
-                       if (process_exists(entry_p->pid)) {
-                               DEBUG(10,("get_share_modes: %s\n", share_mode_str(i, entry_p) ));
-                               i++;
-                       } else {
-                               DEBUG(10,("get_share_modes: deleted %s\n", share_mode_str(i, entry_p) ));
-                               memcpy( &shares[i], &shares[i+1],
-                                       sizeof(share_mode_entry) * (num_share_modes - i - 1));
-                               num_share_modes--;
-                               del_count++;
-                       }
+       /* Get any delete token. */
+       if (data->u.s.delete_token_size) {
+               uint8 *p = dbuf.dptr + sizeof(*data) +
+                               (lck->num_share_modes *
+                               sizeof(struct share_mode_entry));
+
+               if ((data->u.s.delete_token_size < sizeof(uid_t) + sizeof(gid_t)) ||
+                               ((data->u.s.delete_token_size - sizeof(uid_t)) % sizeof(gid_t)) != 0) {
+                       DEBUG(0, ("parse_share_modes: invalid token size %d\n",
+                               data->u.s.delete_token_size));
+                       smb_panic("parse_share_modes: invalid token size\n");
+               }
+
+               lck->delete_token = TALLOC_P(lck, UNIX_USER_TOKEN);
+               if (!lck->delete_token) {
+                       smb_panic("talloc failed\n");
                }
 
-               /* Did we delete any ? If so, re-store in tdb. */
-               if (del_count) {
-                       data->u.num_share_mode_entries = num_share_modes;
-                       
-                       if (num_share_modes)
-                               memcpy(dbuf.dptr + sizeof(*data), shares,
-                                               num_share_modes * sizeof(share_mode_entry));
-
-                       /* The record has shrunk a bit */
-                       dbuf.dsize -= del_count * sizeof(share_mode_entry);
-
-                       if (tdb_store(tdb, locking_key(dev, inode), dbuf, TDB_REPLACE) == -1) {
-                               SAFE_FREE(shares);
-                               SAFE_FREE(dbuf.dptr);
-                               return 0;
+               /* Copy out the uid and gid. */
+               memcpy(&lck->delete_token->uid, p, sizeof(uid_t));
+               p += sizeof(uid_t);
+               memcpy(&lck->delete_token->gid, p, sizeof(gid_t));
+               p += sizeof(gid_t);
+
+               /* Any supplementary groups ? */
+               lck->delete_token->ngroups = (data->u.s.delete_token_size > (sizeof(uid_t) + sizeof(gid_t))) ?
+                                       ((data->u.s.delete_token_size -
+                                               (sizeof(uid_t) + sizeof(gid_t)))/sizeof(gid_t)) : 0;
+
+               if (lck->delete_token->ngroups) {
+                       /* Make this a talloc child of lck->delete_token. */
+                       lck->delete_token->groups = TALLOC_ARRAY(lck->delete_token, gid_t,
+                                                       lck->delete_token->ngroups);
+                       if (!lck->delete_token) {
+                               smb_panic("talloc failed\n");
+                       }
+
+                       for (i = 0; i < lck->delete_token->ngroups; i++) {
+                               memcpy(&lck->delete_token->groups[i], p, sizeof(gid_t));
+                               p += sizeof(gid_t);
                        }
                }
+
+       } else {
+               lck->delete_token = NULL;
        }
 
-       SAFE_FREE(dbuf.dptr);
-       *pp_shares = shares;
-       return num_share_modes;
-}
+       /* Save off the associated service path and filename. */
+       lck->servicepath = talloc_strdup(lck, (const char *)dbuf.dptr + sizeof(*data) +
+                                       (lck->num_share_modes *
+                                       sizeof(struct share_mode_entry)) +
+                                       data->u.s.delete_token_size );
+       if (lck->servicepath == NULL) {
+               smb_panic("talloc_strdup failed\n");
+       }
 
-/*******************************************************************
- Fill a share mode entry.
-********************************************************************/
+       lck->filename = talloc_strdup(lck, (const char *)dbuf.dptr + sizeof(*data) +
+                                       (lck->num_share_modes *
+                                       sizeof(struct share_mode_entry)) +
+                                       data->u.s.delete_token_size +
+                                       strlen(lck->servicepath) + 1 );
+       if (lck->filename == NULL) {
+               smb_panic("talloc_strdup failed\n");
+       }
 
-static void fill_share_mode(char *p, files_struct *fsp, uint16 port, uint16 op_type)
-{
-       share_mode_entry *e = (share_mode_entry *)p;
-       void *x = &e->time; /* Needed to force alignment. p may not be aligned.... */
+       /*
+        * Ensure that each entry has a real process attached.
+        */
 
-       memset(e, '\0', sizeof(share_mode_entry));
-       e->pid = sys_getpid();
-       e->share_mode = fsp->share_mode;
-       e->desired_access = fsp->desired_access;
-       e->op_port = port;
-       e->op_type = op_type;
-       memcpy(x, &fsp->open_time, sizeof(struct timeval));
-       e->share_file_id = fsp->file_id;
-       e->dev = fsp->dev;
-       e->inode = fsp->inode;
-}
+       for (i = 0; i < lck->num_share_modes; i++) {
+               struct share_mode_entry *entry_p = &lck->share_modes[i];
+               DEBUG(10,("parse_share_modes: %s\n",
+                         share_mode_str(i, entry_p) ));
+               if (!process_exists(entry_p->pid)) {
+                       DEBUG(10,("parse_share_modes: deleted %s\n",
+                                 share_mode_str(i, entry_p) ));
+                       entry_p->op_type = UNUSED_SHARE_MODE_ENTRY;
+                       lck->modified = True;
+               }
+       }
 
-/*******************************************************************
- Check if two share mode entries are identical, ignoring oplock 
- and port info and desired_access.
-********************************************************************/
+       return True;
+}
 
-BOOL share_modes_identical( share_mode_entry *e1, share_mode_entry *e2)
+static TDB_DATA unparse_share_modes(struct share_mode_lock *lck)
 {
-#if 1 /* JRA PARANOIA TEST - REMOVE LATER */
-       if (e1->pid == e2->pid &&
-               e1->share_file_id == e2->share_file_id &&
-               e1->dev == e2->dev &&
-               e1->inode == e2->inode &&
-               (e1->share_mode & ~DELETE_ON_CLOSE_FLAG) != (e2->share_mode & ~DELETE_ON_CLOSE_FLAG)) {
-                       DEBUG(0,("PANIC: share_modes_identical: share_mode missmatch (e1 = %u, e2 = %u). Logic error.\n",
-                               (unsigned int)(e1->share_mode & ~DELETE_ON_CLOSE_FLAG),
-                               (unsigned int)(e2->share_mode & ~DELETE_ON_CLOSE_FLAG) ));
-               smb_panic("PANIC: share_modes_identical logic error.\n");
+       TDB_DATA result;
+       int num_valid = 0;
+       int i;
+       struct locking_data *data;
+       ssize_t offset;
+       ssize_t sp_len;
+       uint32 delete_token_size;
+
+       result.dptr = NULL;
+       result.dsize = 0;
+
+       for (i=0; i<lck->num_share_modes; i++) {
+               if (!is_unused_share_mode_entry(&lck->share_modes[i])) {
+                       num_valid += 1;
+               }
        }
-#endif
 
-       return (e1->pid == e2->pid &&
-               (e1->share_mode & ~DELETE_ON_CLOSE_FLAG) == (e2->share_mode & ~DELETE_ON_CLOSE_FLAG) &&
-               e1->dev == e2->dev &&
-               e1->inode == e2->inode &&
-               e1->share_file_id == e2->share_file_id );
-}
+       if (num_valid == 0) {
+               return result;
+       }
 
-/*******************************************************************
- Delete a specific share mode. Return the number
- of entries left, and a memdup'ed copy of the entry deleted (if required).
- Ignore if no entry deleted.
-********************************************************************/
+       sp_len = strlen(lck->servicepath);
+       delete_token_size = (lck->delete_token ?
+                       (sizeof(uid_t) + sizeof(gid_t) + (lck->delete_token->ngroups*sizeof(gid_t))) : 0);
 
-ssize_t del_share_entry( SMB_DEV_T dev, SMB_INO_T inode,
-                       share_mode_entry *entry, share_mode_entry **ppse)
-{
-       TDB_DATA dbuf;
-       struct locking_data *data;
-       int i, del_count=0;
-       share_mode_entry *shares;
-       ssize_t count = 0;
+       result.dsize = sizeof(*data) +
+               lck->num_share_modes * sizeof(struct share_mode_entry) +
+               delete_token_size +
+               sp_len + 1 +
+               strlen(lck->filename) + 1;
+       result.dptr = TALLOC_ARRAY(lck, uint8, result.dsize);
 
-       if (ppse)
-               *ppse = NULL;
+       if (result.dptr == NULL) {
+               smb_panic("talloc failed\n");
+       }
 
-       /* read in the existing share modes */
-       dbuf = tdb_fetch(tdb, locking_key(dev, inode));
-       if (!dbuf.dptr)
-               return -1;
+       data = (struct locking_data *)result.dptr;
+       ZERO_STRUCTP(data);
+       data->u.s.num_share_mode_entries = lck->num_share_modes;
+       data->u.s.delete_on_close = lck->delete_on_close;
+       data->u.s.delete_token_size = delete_token_size;
+       DEBUG(10, ("unparse_share_modes: del: %d, tok = %u, num: %d\n",
+               data->u.s.delete_on_close,
+               (unsigned int)data->u.s.delete_token_size,
+               data->u.s.num_share_mode_entries));
+       memcpy(result.dptr + sizeof(*data), lck->share_modes,
+              sizeof(struct share_mode_entry)*lck->num_share_modes);
+       offset = sizeof(*data) +
+               sizeof(struct share_mode_entry)*lck->num_share_modes;
+
+       /* Store any delete on close token. */
+       if (lck->delete_token) {
+               uint8 *p = result.dptr + offset;
+
+               memcpy(p, &lck->delete_token->uid, sizeof(uid_t));
+               p += sizeof(uid_t);
+
+               memcpy(p, &lck->delete_token->gid, sizeof(gid_t));
+               p += sizeof(gid_t);
+
+               for (i = 0; i < lck->delete_token->ngroups; i++) {
+                       memcpy(p, &lck->delete_token->groups[i], sizeof(gid_t));
+                       p += sizeof(gid_t);
+               }
+               offset = p - result.dptr;
+       }
 
-       data = (struct locking_data *)dbuf.dptr;
-       shares = (share_mode_entry *)(dbuf.dptr + sizeof(*data));
+       safe_strcpy((char *)result.dptr + offset, lck->servicepath,
+                   result.dsize - offset - 1);
+       offset += sp_len + 1;
+       safe_strcpy((char *)result.dptr + offset, lck->filename,
+                   result.dsize - offset - 1);
 
-       /*
-        * Find any with this pid and delete it
-        * by overwriting with the rest of the data 
-        * from the record.
-        */
+       if (DEBUGLEVEL >= 10) {
+               print_share_mode_table(data);
+       }
 
-       DEBUG(10,("del_share_entry: num_share_modes = %d\n", data->u.num_share_mode_entries ));
+       return result;
+}
+
+static int share_mode_lock_destructor(struct share_mode_lock *lck)
+{
+       TDB_DATA key = locking_key(lck->dev, lck->ino);
+       TDB_DATA data;
 
-       for (i=0;i<data->u.num_share_mode_entries;) {
-               if (share_modes_identical(&shares[i], entry)) {
-                       DEBUG(10,("del_share_entry: deleted %s\n",
-                               share_mode_str(i, &shares[i]) ));
-                       if (ppse)
-                               *ppse = memdup(&shares[i], sizeof(*shares));
-                       data->u.num_share_mode_entries--;
-                       memmove(&shares[i], &shares[i+1], 
-                               dbuf.dsize - (sizeof(*data) + (i+1)*sizeof(*shares)));
-                       del_count++;
+       if (!lck->modified) {
+               goto done;
+       }
 
-                       DEBUG(10,("del_share_entry: deleting entry %d\n", i ));
+       data = unparse_share_modes(lck);
 
-               } else {
-                       i++;
+       if (data.dptr == NULL) {
+               if (!lck->fresh) {
+                       /* There has been an entry before, delete it */
+                       if (tdb_delete(tdb, key) == -1) {
+                               smb_panic("Could not delete share entry\n");
+                       }
                }
+               goto done;
        }
 
-       if (del_count) {
-               /* the record may have shrunk a bit */
-               dbuf.dsize -= del_count * sizeof(*shares);
+       if (tdb_store(tdb, key, data, TDB_REPLACE) == -1) {
+               smb_panic("Could not store share mode entry\n");
+       }
 
-               count = (ssize_t)data->u.num_share_mode_entries;
+ done:
+       tdb_chainunlock(tdb, key);
 
-               /* store it back in the database */
-               if (data->u.num_share_mode_entries == 0) {
-                       if (tdb_delete(tdb, locking_key(dev, inode)) == -1)
-                               count = -1;
-               } else {
-                       if (tdb_store(tdb, locking_key(dev, inode), dbuf, TDB_REPLACE) == -1)
-                               count = -1;
+       return 0;
+}
+
+struct share_mode_lock *get_share_mode_lock(TALLOC_CTX *mem_ctx,
+                                               SMB_DEV_T dev, SMB_INO_T ino,
+                                               const char *servicepath,
+                                               const char *fname)
+{
+       struct share_mode_lock *lck;
+       TDB_DATA key = locking_key(dev, ino);
+       TDB_DATA data;
+
+       lck = TALLOC_P(mem_ctx, struct share_mode_lock);
+       if (lck == NULL) {
+               DEBUG(0, ("talloc failed\n"));
+               return NULL;
+       }
+
+       /* Ensure we set every field here as the destructor must be
+          valid even if parse_share_modes fails. */
+
+       lck->servicepath = NULL;
+       lck->filename = NULL;
+       lck->dev = dev;
+       lck->ino = ino;
+       lck->num_share_modes = 0;
+       lck->share_modes = NULL;
+       lck->delete_token = NULL;
+       lck->delete_on_close = False;
+       lck->fresh = False;
+       lck->modified = False;
+
+       if (tdb_chainlock(tdb, key) != 0) {
+               DEBUG(3, ("Could not lock share entry\n"));
+               TALLOC_FREE(lck);
+               return NULL;
+       }
+
+       /* We must set the destructor immediately after the chainlock
+          ensure the lock is cleaned up on any of the error return
+          paths below. */
+
+       talloc_set_destructor(lck, share_mode_lock_destructor);
+
+       data = tdb_fetch(tdb, key);
+       lck->fresh = (data.dptr == NULL);
+
+       if (lck->fresh) {
+
+               if (fname == NULL || servicepath == NULL) {
+                       TALLOC_FREE(lck);
+                       return NULL;
+               }
+               lck->filename = talloc_strdup(lck, fname);
+               lck->servicepath = talloc_strdup(lck, servicepath);
+               if (lck->filename == NULL || lck->servicepath == NULL) {
+                       DEBUG(0, ("talloc failed\n"));
+                       TALLOC_FREE(lck);
+                       return NULL;
+               }
+       } else {
+               if (!parse_share_modes(data, lck)) {
+                       DEBUG(0, ("Could not parse share modes\n"));
+                       TALLOC_FREE(lck);
+                       SAFE_FREE(data.dptr);
+                       return NULL;
                }
        }
-       DEBUG(10,("del_share_entry: Remaining table.\n"));
-       print_share_mode_table((struct locking_data *)dbuf.dptr);
-       SAFE_FREE(dbuf.dptr);
-       return count;
+
+       SAFE_FREE(data.dptr);
+
+       return lck;
 }
 
 /*******************************************************************
- Del the share mode of a file for this process. Return the number
- of entries left, and a memdup'ed copy of the entry deleted.
+ Sets the service name and filename for rename.
+ At this point we emit "file renamed" messages to all
+ process id's that have this file open.
+ Based on an initial code idea from SATOH Fumiyasu <fumiya@samba.gr.jp>
 ********************************************************************/
 
-ssize_t del_share_mode(files_struct *fsp, share_mode_entry **ppse)
+BOOL rename_share_filename(struct messaging_context *msg_ctx,
+                       struct share_mode_lock *lck,
+                       const char *servicepath,
+                       const char *newname)
 {
-       share_mode_entry entry;
+       size_t sp_len;
+       size_t fn_len;
+       size_t msg_len;
+       char *frm = NULL;
+       int i;
+       DATA_BLOB msg;
+
+       if (!lck) {
+               return False;
+       }
+
+       DEBUG(10, ("rename_share_filename: servicepath %s newname %s\n",
+               servicepath, newname));
 
        /*
-        * Fake up a share_mode_entry for comparisons.
+        * rename_internal_fsp() and rename_internals() add './' to
+        * head of newname if newname does not contain a '/'.
         */
+       while (newname[0] && newname[1] && newname[0] == '.' && newname[1] == '/') {
+               newname += 2;
+       }
 
-       fill_share_mode((char *)&entry, fsp, 0, 0);
-       return del_share_entry(fsp->dev, fsp->inode, &entry, ppse);
-}
+       lck->servicepath = talloc_strdup(lck, servicepath);
+       lck->filename = talloc_strdup(lck, newname);
+       if (lck->filename == NULL || lck->servicepath == NULL) {
+               DEBUG(0, ("rename_share_filename: talloc failed\n"));
+               return False;
+       }
+       lck->modified = True;
 
-/*******************************************************************
- Set the share mode of a file. Return False on fail, True on success.
-********************************************************************/
+       sp_len = strlen(lck->servicepath);
+       fn_len = strlen(lck->filename);
+
+       msg_len = MSG_FILE_RENAMED_MIN_SIZE + sp_len + 1 + fn_len + 1;
+
+       /* Set up the name changed message. */
+       frm = TALLOC_ARRAY(lck, char, msg_len);
+       if (!frm) {
+               return False;
+       }
+
+       SDEV_T_VAL(frm,0,lck->dev);
+       SINO_T_VAL(frm,8,lck->ino);
+
+       DEBUG(10,("rename_share_filename: msg_len = %u\n", (unsigned int)msg_len ));
 
-BOOL set_share_mode(files_struct *fsp, uint16 port, uint16 op_type)
+       safe_strcpy(&frm[16], lck->servicepath, sp_len);
+       safe_strcpy(&frm[16 + sp_len + 1], lck->filename, fn_len);
+
+       msg = data_blob_const(frm, msg_len);
+
+       /* Send the messages. */
+       for (i=0; i<lck->num_share_modes; i++) {
+               struct share_mode_entry *se = &lck->share_modes[i];
+               if (!is_valid_share_mode_entry(se)) {
+                       continue;
+               }
+               /* But not to ourselves... */
+               if (procid_is_me(&se->pid)) {
+                       continue;
+               }
+
+               DEBUG(10,("rename_share_filename: sending rename message to pid %s "
+                       "dev %x, inode  %.0f sharepath %s newname %s\n",
+                       procid_str_static(&se->pid),
+                       (unsigned int)lck->dev, (double)lck->ino,
+                       lck->servicepath, lck->filename ));
+
+               messaging_send(msg_ctx, se->pid, MSG_SMB_FILE_RENAME, &msg);
+       }
+
+       return True;
+}
+
+static int pull_delete_on_close_flag(TDB_DATA key, TDB_DATA dbuf,
+                                    void *private_data)
 {
-       TDB_DATA dbuf;
+       BOOL *result = (BOOL *)private_data;
        struct locking_data *data;
-       char *p=NULL;
-       int size;
-       BOOL ret = True;
-               
-       /* read in the existing share modes if any */
-       dbuf = tdb_fetch(tdb, locking_key_fsp(fsp));
-       if (!dbuf.dptr) {
-               size_t offset;
-               /* we'll need to create a new record */
-               pstring fname;
-
-               pstrcpy(fname, fsp->conn->connectpath);
-               pstrcat(fname, "/");
-               pstrcat(fname, fsp->fsp_name);
-
-               size = sizeof(*data) + sizeof(share_mode_entry) + strlen(fname) + 1;
-               p = (char *)malloc(size);
-               if (!p)
-                       return False;
-               data = (struct locking_data *)p;
-               data->u.num_share_mode_entries = 1;
-       
-               DEBUG(10,("set_share_mode: creating entry for file %s. num_share_modes = 1\n",
-                       fsp->fsp_name ));
-
-               offset = sizeof(*data) + sizeof(share_mode_entry);
-               safe_strcpy(p + offset, fname, size - offset - 1);
-               fill_share_mode(p + sizeof(*data), fsp, port, op_type);
-               dbuf.dptr = p;
-               dbuf.dsize = size;
-               if (tdb_store(tdb, locking_key_fsp(fsp), dbuf, TDB_REPLACE) == -1)
-                       ret = False;
 
-               print_share_mode_table((struct locking_data *)p);
-
-               SAFE_FREE(p);
-               return ret;
+       if (dbuf.dsize < sizeof(struct locking_data)) {
+               smb_panic("PANIC: parse_share_modes: buffer too short.\n");
        }
 
-       /* we're adding to an existing entry - this is a bit fiddly */
        data = (struct locking_data *)dbuf.dptr;
 
-       data->u.num_share_mode_entries++;
-       
-       DEBUG(10,("set_share_mode: adding entry for file %s. new num_share_modes = %d\n",
-               fsp->fsp_name, data->u.num_share_mode_entries ));
+       *result = data->u.s.delete_on_close;
+       return 0;
+}
 
-       size = dbuf.dsize + sizeof(share_mode_entry);
-       p = malloc(size);
-       if (!p) {
-               SAFE_FREE(dbuf.dptr);
-               return False;
-       }
-       memcpy(p, dbuf.dptr, sizeof(*data));
-       fill_share_mode(p + sizeof(*data), fsp, port, op_type);
-       memcpy(p + sizeof(*data) + sizeof(share_mode_entry), dbuf.dptr + sizeof(*data),
-              dbuf.dsize - sizeof(*data));
-       SAFE_FREE(dbuf.dptr);
-       dbuf.dptr = p;
-       dbuf.dsize = size;
-       if (tdb_store(tdb, locking_key_fsp(fsp), dbuf, TDB_REPLACE) == -1)
-               ret = False;
-       print_share_mode_table((struct locking_data *)p);
-       SAFE_FREE(p);
-       return ret;
+BOOL get_delete_on_close_flag(SMB_DEV_T dev, SMB_INO_T inode)
+{
+       TDB_DATA key = locking_key(dev, inode);
+       BOOL result = False;
+
+       tdb_parse_record(tdb, key, pull_delete_on_close_flag,
+                        (void *)&result);
+       return result;
+}
+
+BOOL is_valid_share_mode_entry(const struct share_mode_entry *e)
+{
+       int num_props = 0;
+
+       num_props += ((e->op_type == NO_OPLOCK) ? 1 : 0);
+       num_props += (EXCLUSIVE_OPLOCK_TYPE(e->op_type) ? 1 : 0);
+       num_props += (LEVEL_II_OPLOCK_TYPE(e->op_type) ? 1 : 0);
+
+       SMB_ASSERT(num_props <= 1);
+       return (num_props != 0);
+}
+
+BOOL is_deferred_open_entry(const struct share_mode_entry *e)
+{
+       return (e->op_type == DEFERRED_OPEN_ENTRY);
+}
+
+BOOL is_unused_share_mode_entry(const struct share_mode_entry *e)
+{
+       return (e->op_type == UNUSED_SHARE_MODE_ENTRY);
 }
 
 /*******************************************************************
A generic in-place modification call for share mode entries.
Fill a share mode entry.
 ********************************************************************/
 
-static BOOL mod_share_mode( SMB_DEV_T dev, SMB_INO_T inode, share_mode_entry *entry,
-                          void (*mod_fn)(share_mode_entry *, SMB_DEV_T, SMB_INO_T, void *),
-                          void *param)
+static void fill_share_mode_entry(struct share_mode_entry *e,
+                                 files_struct *fsp,
+                                 uid_t uid, uint16 mid, uint16 op_type)
 {
-       TDB_DATA dbuf;
-       struct locking_data *data;
-       int i;
-       share_mode_entry *shares;
-       BOOL need_store=False;
-       BOOL ret = True;
+       ZERO_STRUCTP(e);
+       e->pid = procid_self();
+       e->share_access = fsp->share_access;
+       e->private_options = fsp->fh->private_options;
+       e->access_mask = fsp->access_mask;
+       e->op_mid = mid;
+       e->op_type = op_type;
+       e->time.tv_sec = fsp->open_time.tv_sec;
+       e->time.tv_usec = fsp->open_time.tv_usec;
+       e->dev = fsp->dev;
+       e->inode = fsp->inode;
+       e->share_file_id = fsp->fh->file_id;
+       e->uid = (uint32)uid;
+       e->flags = fsp->posix_open ? SHARE_MODE_FLAG_POSIX_OPEN : 0;
+}
 
-       /* read in the existing share modes */
-       dbuf = tdb_fetch(tdb, locking_key(dev, inode));
-       if (!dbuf.dptr)
-               return False;
+static void fill_deferred_open_entry(struct share_mode_entry *e,
+                                    const struct timeval request_time,
+                                    SMB_DEV_T dev, SMB_INO_T ino, uint16 mid)
+{
+       ZERO_STRUCTP(e);
+       e->pid = procid_self();
+       e->op_mid = mid;
+       e->op_type = DEFERRED_OPEN_ENTRY;
+       e->time.tv_sec = request_time.tv_sec;
+       e->time.tv_usec = request_time.tv_usec;
+       e->dev = dev;
+       e->inode = ino;
+       e->uid = (uint32)-1;
+       e->flags = 0;
+}
 
-       data = (struct locking_data *)dbuf.dptr;
-       shares = (share_mode_entry *)(dbuf.dptr + sizeof(*data));
+static void add_share_mode_entry(struct share_mode_lock *lck,
+                                const struct share_mode_entry *entry)
+{
+       int i;
 
-       /* find any with our pid and call the supplied function */
-       for (i=0;i<data->u.num_share_mode_entries;i++) {
-               if (share_modes_identical(entry, &shares[i])) {
-                       mod_fn(&shares[i], dev, inode, param);
-                       need_store=True;
+       for (i=0; i<lck->num_share_modes; i++) {
+               struct share_mode_entry *e = &lck->share_modes[i];
+               if (is_unused_share_mode_entry(e)) {
+                       *e = *entry;
+                       break;
                }
        }
 
-       /* if the mod fn was called then store it back */
-       if (need_store) {
-               if (data->u.num_share_mode_entries == 0) {
-                       if (tdb_delete(tdb, locking_key(dev, inode)) == -1)
-                               ret = False;
-               } else {
-                       if (tdb_store(tdb, locking_key(dev, inode), dbuf, TDB_REPLACE) == -1)
-                               ret = False;
-               }
+       if (i == lck->num_share_modes) {
+               /* No unused entry found */
+               ADD_TO_ARRAY(lck, struct share_mode_entry, *entry,
+                            &lck->share_modes, &lck->num_share_modes);
        }
+       lck->modified = True;
+}
 
-       SAFE_FREE(dbuf.dptr);
-       return ret;
+void set_share_mode(struct share_mode_lock *lck, files_struct *fsp,
+                       uid_t uid, uint16 mid, uint16 op_type)
+{
+       struct share_mode_entry entry;
+       fill_share_mode_entry(&entry, fsp, uid, mid, op_type);
+       add_share_mode_entry(lck, &entry);
+}
+
+void add_deferred_open(struct share_mode_lock *lck, uint16 mid,
+                      struct timeval request_time,
+                      SMB_DEV_T dev, SMB_INO_T ino)
+{
+       struct share_mode_entry entry;
+       fill_deferred_open_entry(&entry, request_time, dev, ino, mid);
+       add_share_mode_entry(lck, &entry);
 }
 
 /*******************************************************************
- Static function that actually does the work for the generic function
- below.
+ Check if two share mode entries are identical, ignoring oplock 
+ and mid info and desired_access. (Removed paranoia test - it's
+ not automatically a logic error if they are identical. JRA.)
 ********************************************************************/
 
-static void remove_share_oplock_fn(share_mode_entry *entry, SMB_DEV_T dev, SMB_INO_T inode, 
-                                   void *param)
+static BOOL share_modes_identical(struct share_mode_entry *e1,
+                                 struct share_mode_entry *e2)
 {
-       DEBUG(10,("remove_share_oplock_fn: removing oplock info for entry dev=%x ino=%.0f\n",
-                 (unsigned int)dev, (double)inode ));
-       /* Delete the oplock info. */
-       entry->op_port = 0;
-       entry->op_type = NO_OPLOCK;
+       /* We used to check for e1->share_access == e2->share_access here
+          as well as the other fields but 2 different DOS or FCB opens
+          sharing the same share mode entry may validly differ in
+          fsp->share_access field. */
+
+       return (procid_equal(&e1->pid, &e2->pid) &&
+               e1->dev == e2->dev &&
+               e1->inode == e2->inode &&
+               e1->share_file_id == e2->share_file_id );
+}
+
+static BOOL deferred_open_identical(struct share_mode_entry *e1,
+                                   struct share_mode_entry *e2)
+{
+       return (procid_equal(&e1->pid, &e2->pid) &&
+               (e1->op_mid == e2->op_mid) &&
+               (e1->dev == e2->dev) &&
+               (e1->inode == e2->inode));
+}
+
+static struct share_mode_entry *find_share_mode_entry(struct share_mode_lock *lck,
+                                                     struct share_mode_entry *entry)
+{
+       int i;
+
+       for (i=0; i<lck->num_share_modes; i++) {
+               struct share_mode_entry *e = &lck->share_modes[i];
+               if (is_valid_share_mode_entry(entry) &&
+                   is_valid_share_mode_entry(e) &&
+                   share_modes_identical(e, entry)) {
+                       return e;
+               }
+               if (is_deferred_open_entry(entry) &&
+                   is_deferred_open_entry(e) &&
+                   deferred_open_identical(e, entry)) {
+                       return e;
+               }
+       }
+       return NULL;
 }
 
 /*******************************************************************
- Remove an oplock port and mode entry from a share mode.
+ Del the share mode of a file for this process. Return the number of
+ entries left.
 ********************************************************************/
 
-BOOL remove_share_oplock(files_struct *fsp)
+BOOL del_share_mode(struct share_mode_lock *lck, files_struct *fsp)
 {
-       share_mode_entry entry;
-       /*
-        * Fake up an entry for comparisons...
-        */
-       fill_share_mode((char *)&entry, fsp, 0, 0);
-       return mod_share_mode(fsp->dev, fsp->inode, &entry, remove_share_oplock_fn, NULL);
+       struct share_mode_entry entry, *e;
+
+       /* Don't care about the pid owner being correct here - just a search. */
+       fill_share_mode_entry(&entry, fsp, (uid_t)-1, 0, NO_OPLOCK);
+
+       e = find_share_mode_entry(lck, &entry);
+       if (e == NULL) {
+               return False;
+       }
+
+       e->op_type = UNUSED_SHARE_MODE_ENTRY;
+       lck->modified = True;
+       return True;
+}
+
+void del_deferred_open_entry(struct share_mode_lock *lck, uint16 mid)
+{
+       struct share_mode_entry entry, *e;
+
+       fill_deferred_open_entry(&entry, timeval_zero(),
+                                lck->dev, lck->ino, mid);
+
+       e = find_share_mode_entry(lck, &entry);
+       if (e == NULL) {
+               return;
+       }
+
+       e->op_type = UNUSED_SHARE_MODE_ENTRY;
+       lck->modified = True;
 }
 
 /*******************************************************************
- Static function that actually does the work for the generic function
- below.
+ Remove an oplock mid and mode entry from a share mode.
 ********************************************************************/
 
-static void downgrade_share_oplock_fn(share_mode_entry *entry, SMB_DEV_T dev, SMB_INO_T inode, 
-                                   void *param)
+BOOL remove_share_oplock(struct share_mode_lock *lck, files_struct *fsp)
 {
-       DEBUG(10,("downgrade_share_oplock_fn: downgrading oplock info for entry dev=%x ino=%.0f\n",
-                 (unsigned int)dev, (double)inode ));
-       entry->op_type = LEVEL_II_OPLOCK;
+       struct share_mode_entry entry, *e;
+
+       /* Don't care about the pid owner being correct here - just a search. */
+       fill_share_mode_entry(&entry, fsp, (uid_t)-1, 0, NO_OPLOCK);
+
+       e = find_share_mode_entry(lck, &entry);
+       if (e == NULL) {
+               return False;
+       }
+
+       e->op_mid = 0;
+       e->op_type = NO_OPLOCK;
+       lck->modified = True;
+       return True;
 }
 
 /*******************************************************************
  Downgrade a oplock type from exclusive to level II.
 ********************************************************************/
 
-BOOL downgrade_share_oplock(files_struct *fsp)
+BOOL downgrade_share_oplock(struct share_mode_lock *lck, files_struct *fsp)
+{
+       struct share_mode_entry entry, *e;
+
+       /* Don't care about the pid owner being correct here - just a search. */
+       fill_share_mode_entry(&entry, fsp, (uid_t)-1, 0, NO_OPLOCK);
+
+       e = find_share_mode_entry(lck, &entry);
+       if (e == NULL) {
+               return False;
+       }
+
+       e->op_type = LEVEL_II_OPLOCK;
+       lck->modified = True;
+       return True;
+}
+
+/****************************************************************************
+ Deal with the internal needs of setting the delete on close flag. Note that
+ as the tdb locking is recursive, it is safe to call this from within 
+ open_file_ntcreate. JRA.
+****************************************************************************/
+
+NTSTATUS can_set_delete_on_close(files_struct *fsp, BOOL delete_on_close,
+                                uint32 dosmode)
 {
-       share_mode_entry entry;
+       if (!delete_on_close) {
+               return NT_STATUS_OK;
+       }
+
+       /*
+        * Only allow delete on close for writable files.
+        */
+
+       if ((dosmode & aRONLY) &&
+           !lp_delete_readonly(SNUM(fsp->conn))) {
+               DEBUG(10,("can_set_delete_on_close: file %s delete on close "
+                         "flag set but file attribute is readonly.\n",
+                         fsp->fsp_name ));
+               return NT_STATUS_CANNOT_DELETE;
+       }
+
        /*
-        * Fake up an entry for comparisons...
+        * Only allow delete on close for writable shares.
         */
-       fill_share_mode((char *)&entry, fsp, 0, 0);
-       return mod_share_mode(fsp->dev, fsp->inode, &entry, downgrade_share_oplock_fn, NULL);
+
+       if (!CAN_WRITE(fsp->conn)) {
+               DEBUG(10,("can_set_delete_on_close: file %s delete on "
+                         "close flag set but write access denied on share.\n",
+                         fsp->fsp_name ));
+               return NT_STATUS_ACCESS_DENIED;
+       }
+
+       /*
+        * Only allow delete on close for files/directories opened with delete
+        * intent.
+        */
+
+       if (!(fsp->access_mask & DELETE_ACCESS)) {
+               DEBUG(10,("can_set_delete_on_close: file %s delete on "
+                         "close flag set but delete access denied.\n",
+                         fsp->fsp_name ));
+               return NT_STATUS_ACCESS_DENIED;
+       }
+
+       /* Don't allow delete on close for non-empty directories. */
+       if (fsp->is_directory) {
+               return can_delete_directory(fsp->conn, fsp->fsp_name);
+       }
+
+       return NT_STATUS_OK;
 }
 
-/*******************************************************************
Get/Set the delete on close flag in a set of share modes.
Return False on fail, True on success.
-********************************************************************/
+/*************************************************************************
Return a talloced copy of a UNIX_USER_TOKEN. NULL on fail.
(Should this be in locking.c.... ?).
+*************************************************************************/
 
-BOOL modify_delete_flag( SMB_DEV_T dev, SMB_INO_T inode, BOOL delete_on_close)
+static UNIX_USER_TOKEN *copy_unix_token(TALLOC_CTX *ctx, UNIX_USER_TOKEN *tok)
 {
-       TDB_DATA dbuf;
-       struct locking_data *data;
-       int i;
-       share_mode_entry *shares;
-
-       /* read in the existing share modes */
-       dbuf = tdb_fetch(tdb, locking_key(dev, inode));
-       if (!dbuf.dptr)
-               return False;
+       UNIX_USER_TOKEN *cpy;
 
-       data = (struct locking_data *)dbuf.dptr;
-       shares = (share_mode_entry *)(dbuf.dptr + sizeof(*data));
+       if (tok == NULL) {
+               return NULL;
+       }
 
-       /* Set/Unset the delete on close element. */
-       for (i=0;i<data->u.num_share_mode_entries;i++,shares++) {
-               shares->share_mode = (delete_on_close ?
-                            (shares->share_mode | DELETE_ON_CLOSE_FLAG) :
-                            (shares->share_mode & ~DELETE_ON_CLOSE_FLAG) );
+       cpy = TALLOC_P(ctx, UNIX_USER_TOKEN);
+       if (!cpy) {
+               return NULL;
        }
 
-       /* store it back */
-       if (data->u.num_share_mode_entries) {
-               if (tdb_store(tdb, locking_key(dev,inode), dbuf, TDB_REPLACE)==-1) {
-                       SAFE_FREE(dbuf.dptr);
-                       return False;
+       cpy->uid = tok->uid;
+       cpy->gid = tok->gid;
+       cpy->ngroups = tok->ngroups;
+       if (tok->ngroups) {
+               /* Make this a talloc child of cpy. */
+               cpy->groups = TALLOC_ARRAY(cpy, gid_t, tok->ngroups);
+               if (!cpy->groups) {
+                       return NULL;
                }
+               memcpy(cpy->groups, tok->groups, tok->ngroups * sizeof(gid_t));
        }
+       return cpy;
+}
 
-       SAFE_FREE(dbuf.dptr);
-       return True;
+/****************************************************************************
+ Replace the delete on close token.
+****************************************************************************/
+
+void set_delete_on_close_token(struct share_mode_lock *lck, UNIX_USER_TOKEN *tok)
+{
+       /* Ensure there's no token. */
+       if (lck->delete_token) {
+               TALLOC_FREE(lck->delete_token); /* Also deletes groups... */
+               lck->delete_token = NULL;
+       }
+
+       /* Copy the new token (can be NULL). */
+       lck->delete_token = copy_unix_token(lck, tok);
+       lck->modified = True;
 }
 
 /****************************************************************************
- Traverse the whole database with this function, calling traverse_callback
- on each share mode
+ Sets the delete on close flag over all share modes on this file.
+ Modify the share mode entry for all files open
+ on this device and inode to tell other smbds we have
+ changed the delete on close flag. This will be noticed
+ in the close code, the last closer will delete the file
+ if flag is set.
+ This makes a copy of any UNIX_USER_TOKEN into the
+ lck entry. This function is used when the lock is already granted.
 ****************************************************************************/
 
+void set_delete_on_close_lck(struct share_mode_lock *lck, BOOL delete_on_close, UNIX_USER_TOKEN *tok)
+{
+       if (lck->delete_on_close != delete_on_close) {
+               set_delete_on_close_token(lck, tok);
+               lck->delete_on_close = delete_on_close;
+               if (delete_on_close) {
+                       SMB_ASSERT(lck->delete_token != NULL);
+               }
+               lck->modified = True;
+       }
+}
+
+BOOL set_delete_on_close(files_struct *fsp, BOOL delete_on_close, UNIX_USER_TOKEN *tok)
+{
+       struct share_mode_lock *lck;
+       
+       DEBUG(10,("set_delete_on_close: %s delete on close flag for "
+                 "fnum = %d, file %s\n",
+                 delete_on_close ? "Adding" : "Removing", fsp->fnum,
+                 fsp->fsp_name ));
+
+       if (fsp->is_stat) {
+               return True;
+       }
+
+       lck = get_share_mode_lock(NULL, fsp->dev, fsp->inode, NULL, NULL);
+       if (lck == NULL) {
+               return False;
+       }
+
+       set_delete_on_close_lck(lck, delete_on_close, tok);
+
+       if (fsp->is_directory) {
+               send_stat_cache_delete_message(fsp->fsp_name);
+       }
+
+       TALLOC_FREE(lck);
+       return True;
+}
+
+struct forall_state {
+       void (*fn)(const struct share_mode_entry *entry,
+                  const char *sharepath,
+                  const char *fname,
+                  void *private_data);
+       void *private_data;
+};
+
 static int traverse_fn(TDB_CONTEXT *the_tdb, TDB_DATA kbuf, TDB_DATA dbuf, 
-                       voidstate)
+                       void *_state)
 {
+       struct forall_state *state = (struct forall_state *)_state;
        struct locking_data *data;
-       share_mode_entry *shares;
-       char *name;
+       struct share_mode_entry *shares;
+       const char *sharepath;
+       const char *fname;
        int i;
 
-       SHAREMODE_FN(traverse_callback) = (SHAREMODE_FN_CAST())state;
+       /* Ensure this is a locking_key record. */
+       if (kbuf.dsize != sizeof(struct locking_key))
+               return 0;
 
        data = (struct locking_data *)dbuf.dptr;
-       shares = (share_mode_entry *)(dbuf.dptr + sizeof(*data));
-       name = dbuf.dptr + sizeof(*data) + data->u.num_share_mode_entries*sizeof(*shares);
-
-       for (i=0;i<data->u.num_share_mode_entries;i++) {
-               traverse_callback(&shares[i], name);
+       shares = (struct share_mode_entry *)(dbuf.dptr + sizeof(*data));
+       sharepath = (const char *)dbuf.dptr + sizeof(*data) +
+               data->u.s.num_share_mode_entries*sizeof(*shares) +
+               data->u.s.delete_token_size;
+       fname = (const char *)dbuf.dptr + sizeof(*data) +
+               data->u.s.num_share_mode_entries*sizeof(*shares) +
+               data->u.s.delete_token_size +
+               strlen(sharepath) + 1;
+
+       for (i=0;i<data->u.s.num_share_mode_entries;i++) {
+               state->fn(&shares[i], sharepath, fname,
+                         state->private_data);
        }
        return 0;
 }
@@ -862,9 +1346,17 @@ static int traverse_fn(TDB_CONTEXT *the_tdb, TDB_DATA kbuf, TDB_DATA dbuf,
  share mode system.
 ********************************************************************/
 
-int share_mode_forall(SHAREMODE_FN(fn))
+int share_mode_forall(void (*fn)(const struct share_mode_entry *, const char *,
+                                const char *, void *),
+                     void *private_data)
 {
-       if (!tdb)
+       struct forall_state state;
+
+       if (tdb == NULL)
                return 0;
-       return tdb_traverse(tdb, traverse_fn, (void*)fn);
+
+       state.fn = fn;
+       state.private_data = private_data;
+
+       return tdb_traverse(tdb, traverse_fn, (void *)&state);
 }