r6543: Fix EDEADLCK problem with deferred open calls. Problem found by
[samba.git] / source3 / locking / locking.c
index b61e8acedc66a6c847d668ed1ac5bda83dcb0e1b..f0a45c2bcbbf4063097442bcdfe531b7d580ba35 100644 (file)
@@ -1,8 +1,8 @@
 /* 
-   Unix SMB/Netbios implementation.
-   Version 3.0
+   Unix SMB/CIFS implementation.
    Locking functions
-   Copyright (C) Andrew Tridgell 1992-1999
+   Copyright (C) Andrew Tridgell 1992-2000
+   Copyright (C) Jeremy Allison 1992-2000
    
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    support.
 
    rewrtten completely to use new tdb code. Tridge, Dec '99
+
+   Added POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
 */
 
 #include "includes.h"
-extern int DEBUGLEVEL;
+uint16 global_smbpid;
+
+#undef DBGC_CLASS
+#define DBGC_CLASS DBGC_LOCKING
 
 /* the locking database handle */
 static TDB_CONTEXT *tdb;
-
-int global_smbpid;
+static TDB_CONTEXT *deferred_open_tdb;
+
+struct locking_data {
+        union {
+                int num_share_mode_entries;
+                share_mode_entry dummy; /* Needed for alignment. */
+        } u;
+        /* the following two entries are implicit
+           share_mode_entry modes[num_share_mode_entries];
+           char file_name[];
+        */
+};
 
 /****************************************************************************
  Debugging aid :-).
@@ -49,332 +64,6 @@ static const char *lock_type_name(enum brl_type lock_type)
        return (lock_type == READ_LOCK) ? "READ" : "WRITE";
 }
 
-/****************************************************************************
- Utility function to map a lock type correctly depending on the open
- mode of a file.
-****************************************************************************/
-
-static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
-{
-       if((lock_type == WRITE_LOCK) && !fsp->can_write) {
-               /*
-                * Many UNIX's cannot get a write lock on a file opened read-only.
-                * Win32 locking semantics allow this.
-                * Do the best we can and attempt a read-only lock.
-                */
-               DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
-               return F_RDLCK;
-       } else if((lock_type == READ_LOCK) && !fsp->can_read) {
-               /*
-                * Ditto for read locks on write only files.
-                */
-               DEBUG(10,("map_posix_lock_type: Changing read lock to write due to write-only file.\n"));
-               return F_WRLCK;
-       }
-
-  /*
-   * This return should be the most normal, as we attempt
-   * to always open files read/write.
-   */
-
-  return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
-}
-
-/****************************************************************************
- Check to see if the given unsigned lock range is within the possible POSIX
- range. Modifies the given args to be in range if possible, just returns
- False if not.
-****************************************************************************/
-
-static BOOL posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
-                                                               SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
-{
-       SMB_OFF_T offset;
-       SMB_OFF_T count;
-
-#if defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
-
-    SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
-    SMB_OFF_T mask = (mask2<<1);
-    SMB_OFF_T neg_mask = ~mask;
-
-       /*
-        * In this case SMB_OFF_T is 64 bits,
-        * and the underlying system can handle 64 bit signed locks.
-        * Cast to signed type.
-        */
-
-       offset = (SMB_OFF_T)u_offset;
-       count = (SMB_OFF_T)u_count;
-
-       /*
-        * Deal with a very common case of count of all ones.
-        * (lock entire file).
-        */
-
-       if(count == (SMB_OFF_T)-1)
-               count &= ~mask;
-
-       /*
-        * POSIX lock ranges cannot be negative.
-        * Fail if any combination becomes negative.
-        */
-
-       if(offset < 0 || count < 0 || (offset + count < 0)) {
-               DEBUG(10,("posix_lock_in_range: negative range: offset = %.0f, count = %.0f. Ignoring lock.\n",
-                               (double)offset, (double)count ));
-               return False;
-       }
-
-       /*
-        * In this case SMB_OFF_T is 64 bits, the offset and count
-        * fit within the positive range, and the underlying
-        * system can handle 64 bit locks. Just return as the
-        * cast values are ok.
-        */
-
-#else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
-
-       /*
-        * In this case either SMB_OFF_T is 32 bits,
-        * or the underlying system cannot handle 64 bit signed locks.
-        * Either way we have to try and mangle to fit within 31 bits.
-        * This is difficult.
-        */
-
-#if defined(HAVE_BROKEN_FCNTL64_LOCKS)
-
-       /*
-        * SMB_OFF_T is 64 bits, but we need to use 31 bits due to
-        * broken large locking.
-        */
-
-       /*
-        * Deal with a very common case of count of all ones.
-        * (lock entire file).
-        */
-
-       if(u_count == (SMB_BIG_UINT)-1)
-               count = 0x7FFFFFFF;
-
-       if(((u_offset >> 32) & 0xFFFFFFFF) || ((u_count >> 32) & 0xFFFFFFFF)) {
-               DEBUG(10,("posix_lock_in_range: top 32 bits not zero. offset = %.0f, count = %.0f. Ignoring lock.\n",
-                               (double)u_offset, (double)u_count ));
-               /* Top 32 bits of offset or count were not zero. */
-               return False;
-       }
-
-       /* Cast from 64 bits unsigned to 64 bits signed. */
-       offset = (SMB_OFF_T)u_offset;
-       count = (SMB_OFF_T)u_count;
-
-       /*
-        * Check if we are within the 2^31 range.
-        */
-
-       {
-               int32 low_offset = (int32)offset;
-               int32 low_count = (int32)count;
-
-               if(low_offset < 0 || low_count < 0 || (low_offset + low_count < 0)) {
-                       DEBUG(10,("posix_lock_in_range: not within 2^31 range. low_offset = %d, low_count = %d. Ignoring lock.\n",
-                                       low_offset, low_count ));
-                       return False;
-               }
-       }
-
-       /*
-        * Ok - we can map from a 64 bit number to a 31 bit lock.
-        */
-
-#else /* HAVE_BROKEN_FCNTL64_LOCKS */
-
-       /*
-        * SMB_OFF_T is 32 bits.
-        */
-
-#if defined(HAVE_LONGLONG)
-
-       /*
-        * SMB_BIG_UINT is 64 bits, we can do a 32 bit shift.
-        */
-
-       /*
-        * Deal with a very common case of count of all ones.
-        * (lock entire file).
-        */
-
-       if(u_count == (SMB_BIG_UINT)-1)
-               count = 0x7FFFFFFF;
-
-       if(((u_offset >> 32) & 0xFFFFFFFF) || ((u_count >> 32) & 0xFFFFFFFF)) {
-               DEBUG(10,("posix_lock_in_range: top 32 bits not zero. u_offset = %.0f, u_count = %.0f. Ignoring lock.\n",
-                               (double)u_offset, (double)u_count ));
-               return False;
-       }
-
-       /* Cast from 64 bits unsigned to 32 bits signed. */
-       offset = (SMB_OFF_T)u_offset;
-       count = (SMB_OFF_T)u_count;
-
-       /*
-        * Check if we are within the 2^31 range.
-        */
-
-       if(offset < 0 || count < 0 || (offset + count < 0)) {
-               DEBUG(10,("posix_lock_in_range: not within 2^31 range. offset = %d, count = %d. Ignoring lock.\n",
-                               (int)offset, (int)count ));
-               return False;
-       }
-
-#else /* HAVE_LONGLONG */
-
-       /*
-        * SMB_BIG_UINT and SMB_OFF_T are both 32 bits,
-        * just cast.
-        */
-
-       /*
-        * Deal with a very common case of count of all ones.
-        * (lock entire file).
-        */
-
-       if(u_count == (SMB_BIG_UINT)-1)
-               count = 0x7FFFFFFF;
-
-       /* Cast from 32 bits unsigned to 32 bits signed. */
-       offset = (SMB_OFF_T)u_offset;
-       count = (SMB_OFF_T)u_count;
-
-       /*
-        * Check if we are within the 2^31 range.
-        */
-
-       if(offset < 0 || count < 0 || (offset + count < 0)) {
-               DEBUG(10,("posix_lock_in_range: not within 2^31 range. offset = %d, count = %d. Ignoring lock.\n",
-                               (int)offset, (int)count ));
-               return False;
-       }
-
-#endif /* HAVE_LONGLONG */
-#endif /* LARGE_SMB_OFF_T */
-#endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
-
-       /*
-        * The mapping was successful.
-        */
-
-       DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
-                       (double)offset, (double)count ));
-
-       *offset_out = offset;
-       *count_out = count;
-       
-       return True;
-}
-
-/****************************************************************************
- POSIX function to see if a file region is locked. Returns True if the
- region is locked, False otherwise.
-****************************************************************************/
-
-static BOOL is_posix_locked(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
-{
-       SMB_OFF_T offset;
-       SMB_OFF_T count;
-
-       DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
-                       fsp->fsp_name, (double)u_offset, (double)u_count, lock_type_name(lock_type) ));
-
-       /*
-        * If the requested lock won't fit in the POSIX range, we will
-        * never set it, so presume it is not locked.
-        */
-
-       if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
-               return False;
-
-       /*
-        * Note that most UNIX's can *test* for a write lock on
-        * a read-only fd, just not *set* a write lock on a read-only
-        * fd. So we don't need to use map_lock_type here.
-        */ 
-
-       return fcntl_lock(fsp->fd,SMB_F_GETLK,offset,count,lock_type);
-}
-
-/****************************************************************************
- POSIX function to acquire a lock. Returns True if the
- lock could be granted, False if not.
-****************************************************************************/
-
-static BOOL set_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
-{
-       SMB_OFF_T offset;
-       SMB_OFF_T count;
-       BOOL ret = True;
-
-       DEBUG(5,("set_posix_lock: File %s, offset = %.0f, count = %.0f, type = %s\n",
-                       fsp->fsp_name, (double)u_offset, (double)u_count, lock_type_name(lock_type) ));
-
-       /*
-        * If the requested lock won't fit in the POSIX range, we will
-        * pretend it was successful.
-        */
-
-       if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
-               return True;
-
-       /*
-        * Note that setting multiple overlapping read locks on different
-        * file descriptors will not be held separately by the kernel (POSIX
-        * braindamage), but will be merged into one continuous read lock
-        * range. We cope with this case in the release_posix_lock code
-        * below. JRA.
-        */
-
-    ret = fcntl_lock(fsp->fd,SMB_F_SETLK,offset,count,map_posix_lock_type(fsp,lock_type)); 
-
-       return ret;
-}
-
-/****************************************************************************
- POSIX function to release a lock. Returns True if the
- lock could be released, False if not.
-****************************************************************************/
-
-static BOOL release_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
-{
-       SMB_OFF_T offset;
-       SMB_OFF_T count;
-       BOOL ret = True;
-
-       DEBUG(5,("release_posix_lock: File %s, offset = %.0f, count = %.0f\n",
-                       fsp->fsp_name, (double)u_offset, (double)u_count ));
-
-       if(u_count == 0) {
-
-               /*
-                * This lock must overlap with an existing read-only lock
-                * help by another fd. Don't do any POSIX call.
-                */
-
-               return True;
-       }
-
-       /*
-        * If the requested lock won't fit in the POSIX range, we will
-        * pretend it was successful.
-        */
-
-       if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
-               return True;
-
-       ret = fcntl_lock(fsp->fd,SMB_F_SETLK,offset,count,F_UNLCK);
-
-       return ret;
-}
-
 /****************************************************************************
  Utility function called to see if a file region is locked.
 ****************************************************************************/
@@ -384,26 +73,50 @@ BOOL is_locked(files_struct *fsp,connection_struct *conn,
               enum brl_type lock_type)
 {
        int snum = SNUM(conn);
+       int strict_locking = lp_strict_locking(snum);
        BOOL ret;
        
        if (count == 0)
                return(False);
 
-       if (!lp_locking(snum) || !lp_strict_locking(snum))
+       if (!lp_locking(snum) || !strict_locking)
                return(False);
 
-       ret = !brl_locktest(fsp->dev, fsp->inode, 
-                            global_smbpid, getpid(), conn->cnum, 
-                            offset, count, lock_type);
+       if (strict_locking == Auto) {
+               if  (EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type) && (lock_type == READ_LOCK || lock_type == WRITE_LOCK)) {
+                       DEBUG(10,("is_locked: optimisation - exclusive oplock on file %s\n", fsp->fsp_name ));
+                       ret = 0;
+               } else if (LEVEL_II_OPLOCK_TYPE(fsp->oplock_type) && (lock_type == READ_LOCK)) {
+                       DEBUG(10,("is_locked: optimisation - level II oplock on file %s\n", fsp->fsp_name ));
+                       ret = 0;
+               } else {
+                       ret = !brl_locktest(fsp->dev, fsp->inode, fsp->fnum,
+                                    global_smbpid, sys_getpid(), conn->cnum, 
+                                    offset, count, lock_type);
+               }
+       } else {
+               ret = !brl_locktest(fsp->dev, fsp->inode, fsp->fnum,
+                               global_smbpid, sys_getpid(), conn->cnum,
+                               offset, count, lock_type);
+       }
+
+       DEBUG(10,("is_locked: brl start=%.0f len=%.0f %s for file %s\n",
+                       (double)offset, (double)count, ret ? "locked" : "unlocked",
+                       fsp->fsp_name ));
 
        /*
         * There is no lock held by an SMB daemon, check to
         * see if there is a POSIX lock from a UNIX or NFS process.
         */
 
-       if(!ret && lp_posix_locking(snum))
+       if(!ret && lp_posix_locking(snum)) {
                ret = is_posix_locked(fsp, offset, count, lock_type);
 
+               DEBUG(10,("is_locked: posix start=%.0f len=%.0f %s for file %s\n",
+                               (double)offset, (double)count, ret ? "locked" : "unlocked",
+                               fsp->fsp_name ));
+       }
+
        return ret;
 }
 
@@ -411,31 +124,26 @@ BOOL is_locked(files_struct *fsp,connection_struct *conn,
  Utility function called by locking requests.
 ****************************************************************************/
 
-BOOL do_lock(files_struct *fsp,connection_struct *conn,
-             SMB_BIG_UINT count,SMB_BIG_UINT offset,enum brl_type lock_type,
-             int *eclass,uint32 *ecode)
+static NTSTATUS do_lock(files_struct *fsp,connection_struct *conn, uint16 lock_pid,
+                SMB_BIG_UINT count,SMB_BIG_UINT offset,enum brl_type lock_type, BOOL *my_lock_ctx)
 {
-       BOOL ok = False;
+       NTSTATUS status = NT_STATUS_LOCK_NOT_GRANTED;
 
        if (!lp_locking(SNUM(conn)))
-               return(True);
+               return NT_STATUS_OK;
+
+       /* NOTE! 0 byte long ranges ARE allowed and should be stored  */
 
-       if (count == 0) {
-               *eclass = ERRDOS;
-               *ecode = ERRnoaccess;
-               return False;
-       }
-       
        DEBUG(10,("do_lock: lock type %s start=%.0f len=%.0f requested for file %s\n",
                  lock_type_name(lock_type), (double)offset, (double)count, fsp->fsp_name ));
 
        if (OPEN_FSP(fsp) && fsp->can_lock && (fsp->conn == conn)) {
-               ok = brl_lock(fsp->dev, fsp->inode, fsp->fnum,
-                             global_smbpid, getpid(), conn->cnum, 
-                             offset, count, 
-                             lock_type);
+               status = brl_lock(fsp->dev, fsp->inode, fsp->fnum,
+                                 lock_pid, sys_getpid(), conn->cnum, 
+                                 offset, count, 
+                                 lock_type, my_lock_ctx);
 
-               if(ok && lp_posix_locking(SNUM(conn))) {
+               if (NT_STATUS_IS_OK(status) && lp_posix_locking(SNUM(conn))) {
 
                        /*
                         * Try and get a POSIX lock on this range.
@@ -443,49 +151,99 @@ BOOL do_lock(files_struct *fsp,connection_struct *conn,
                         * overlapping on a different fd. JRA.
                         */
 
-                       if((ok = set_posix_lock(fsp, offset, count, lock_type)) == True)
-                               fsp->num_posix_locks++;
-                       else {
+                       if (!set_posix_lock(fsp, offset, count, lock_type)) {
+                               if (errno == EACCES || errno == EAGAIN)
+                                       status = NT_STATUS_FILE_LOCK_CONFLICT;
+                               else
+                                       status = map_nt_error_from_unix(errno);
+
                                /*
                                 * We failed to map - we must now remove the brl
                                 * lock entry.
                                 */
                                (void)brl_unlock(fsp->dev, fsp->inode, fsp->fnum,
-                                                               global_smbpid, getpid(), conn->cnum, 
-                                                               offset, count);
+                                                               lock_pid, sys_getpid(), conn->cnum, 
+                                                               offset, count, False,
+                                                               NULL, NULL);
                        }
                }
        }
 
-       if (!ok) {
-               *eclass = ERRDOS;
-               *ecode = ERRlock;
-               return False;
+       return status;
+}
+
+/****************************************************************************
+ Utility function called by locking requests. This is *DISGUSTING*. It also
+ appears to be "What Windows Does" (tm). Andrew, ever wonder why Windows 2000
+ is so slow on the locking tests...... ? This is the reason. Much though I hate
+ it, we need this. JRA.
+****************************************************************************/
+
+NTSTATUS do_lock_spin(files_struct *fsp,connection_struct *conn, uint16 lock_pid,
+                SMB_BIG_UINT count,SMB_BIG_UINT offset,enum brl_type lock_type, BOOL *my_lock_ctx)
+{
+       int j, maxj = lp_lock_spin_count();
+       int sleeptime = lp_lock_sleep_time();
+       NTSTATUS status, ret;
+
+       if (maxj <= 0)
+               maxj = 1;
+
+       ret = NT_STATUS_OK; /* to keep dumb compilers happy */
+
+       for (j = 0; j < maxj; j++) {
+               status = do_lock(fsp, conn, lock_pid, count, offset, lock_type, my_lock_ctx);
+               if (!NT_STATUS_EQUAL(status, NT_STATUS_LOCK_NOT_GRANTED) &&
+                   !NT_STATUS_EQUAL(status, NT_STATUS_FILE_LOCK_CONFLICT)) {
+                       return status;
+               }
+               /* if we do fail then return the first error code we got */
+               if (j == 0) {
+                       ret = status;
+                       /* Don't spin if we blocked ourselves. */
+                       if (*my_lock_ctx)
+                               return ret;
+               }
+               if (sleeptime)
+                       sys_usleep(sleeptime);
        }
-       return True; /* Got lock */
+       return ret;
+}
+
+/* Struct passed to brl_unlock. */
+struct posix_unlock_data_struct {
+       files_struct *fsp;
+       SMB_BIG_UINT offset;
+       SMB_BIG_UINT count;
+};
+
+/****************************************************************************
+ Function passed to brl_unlock to allow POSIX unlock to be done first.
+****************************************************************************/
+
+static void posix_unlock(void *pre_data)
+{
+       struct posix_unlock_data_struct *pdata = (struct posix_unlock_data_struct *)pre_data;
+
+       if (lp_posix_locking(SNUM(pdata->fsp->conn)))
+               release_posix_lock(pdata->fsp, pdata->offset, pdata->count);
 }
 
 /****************************************************************************
  Utility function called by unlocking requests.
 ****************************************************************************/
 
-BOOL do_unlock(files_struct *fsp,connection_struct *conn,
-               SMB_BIG_UINT count,SMB_BIG_UINT offset, 
-              int *eclass,uint32 *ecode)
+NTSTATUS do_unlock(files_struct *fsp,connection_struct *conn, uint16 lock_pid,
+                  SMB_BIG_UINT count,SMB_BIG_UINT offset)
 {
        BOOL ok = False;
-       TALLOC_CTX *ul_ctx = NULL;
-       struct unlock_list *ulist = NULL;
-       struct unlock_list *ul = NULL;
-       pid_t pid;
+       struct posix_unlock_data_struct posix_data;
        
        if (!lp_locking(SNUM(conn)))
-               return(True);
+               return NT_STATUS_OK;
        
        if (!OPEN_FSP(fsp) || !fsp->can_lock || (fsp->conn != conn)) {
-               *eclass = ERRDOS;
-               *ecode = ERRlock;
-               return False;
+               return NT_STATUS_INVALID_HANDLE;
        }
        
        DEBUG(10,("do_unlock: unlock start=%.0f len=%.0f requested for file %s\n",
@@ -497,72 +255,19 @@ BOOL do_unlock(files_struct *fsp,connection_struct *conn,
         * match then don't bother looking to remove POSIX locks.
         */
 
-       pid = getpid();
+       posix_data.fsp = fsp;
+       posix_data.offset = offset;
+       posix_data.count = count;
 
        ok = brl_unlock(fsp->dev, fsp->inode, fsp->fnum,
-                       global_smbpid, pid, conn->cnum, offset, count);
+                       lock_pid, sys_getpid(), conn->cnum, offset, count,
+                       False, posix_unlock, (void *)&posix_data);
    
        if (!ok) {
-               *eclass = ERRDOS;
-               *ecode = ERRlock;
-               return False;
-       }
-
-       if (!lp_posix_locking(SNUM(conn)))
-               return True;
-
-       if ((ul_ctx = talloc_init()) == NULL) {
-               DEBUG(0,("do_unlock: unable to init talloc context.\n"));
-               return True; /* Not a fatal error. */
+               DEBUG(10,("do_unlock: returning ERRlock.\n" ));
+               return NT_STATUS_RANGE_NOT_LOCKED;
        }
-
-       if ((ul = (struct unlock_list *)talloc(ul_ctx, sizeof(struct unlock_list))) == NULL) {
-               DEBUG(0,("do_unlock: unable to talloc unlock list.\n"));
-               talloc_destroy(ul_ctx);
-               return True; /* Not a fatal error. */
-       }
-
-       /*
-        * Create the initial list entry containing the
-        * lock we want to remove.
-        */
-
-       ZERO_STRUCTP(ul);
-       ul->start = offset;
-       ul->size = count;
-
-       DLIST_ADD(ulist, ul);
-
-       /*
-        * The following call calculates if there are any
-        * overlapping read locks held by this process on
-        * other fd's open on the same file and creates a
-        * list of unlock ranges that will allow other
-        * POSIX lock ranges to remain on the file whilst the
-        * unlocks are performed.
-        */
-
-       ulist = brl_unlock_list(ul_ctx, ulist, pid, fsp->dev, fsp->inode);
-
-       /*
-        * Release the POSIX locks on the list of ranges returned.
-        */
-
-       for(; ulist; ulist = ulist->next)
-               (void)release_posix_lock(fsp, ulist->start, ulist->size);
-
-       talloc_destroy(ul_ctx);
-
-       /*
-        * We treat this as one unlock request for POSIX accounting purposes even
-        * if it may have been split into multiple smaller POSIX unlock ranges.
-        */
-
-       fsp->num_posix_locks--;
-
-       SMB_ASSERT(fsp->num_posix_locks >= 0);
-
-       return True; /* Did unlock */
+       return NT_STATUS_OK;
 }
 
 /****************************************************************************
@@ -571,94 +276,111 @@ BOOL do_unlock(files_struct *fsp,connection_struct *conn,
 
 void locking_close_file(files_struct *fsp)
 {
-       pid_t pid = getpid();
+       pid_t pid = sys_getpid();
 
        if (!lp_locking(SNUM(fsp->conn)))
                return;
 
-       if(lp_posix_locking(SNUM(fsp->conn))) {
-
-               TALLOC_CTX *ul_ctx = NULL;
-               struct unlock_list *ul = NULL;
-               int eclass;
-               uint32 ecode;
-
-               if ((ul_ctx = talloc_init()) == NULL) {
-                       DEBUG(0,("locking_close_file: unable to init talloc context.\n"));
-                       return;
-               }
-
-               /*
-                * We need to release all POSIX locks we have on this
-                * fd. Get all our existing locks from the tdb locking database.
-                */
-
-               ul = brl_getlocklist(ul_ctx, fsp->dev, fsp->inode, pid, fsp->conn->cnum, fsp->fnum);
-
-               /*
-                * Now unlock all of them. This will remove the brl entry also
-                * for each lock.
-                */
+       /*
+        * Just release all the brl locks, no need to release individually.
+        */
 
-               for(; ul; ul = ul->next)
-                       do_unlock(fsp,fsp->conn,ul->size,ul->start,&eclass,&ecode);
-               
-               talloc_destroy(ul_ctx);
+       brl_close(fsp->dev, fsp->inode, pid, fsp->conn->cnum, fsp->fnum);
 
-       } else {
+       if(lp_posix_locking(SNUM(fsp->conn))) {
 
-               /*
-                * Just release all the tdb locks, no need to release individually.
+               /* 
+                * Release all the POSIX locks.
                 */
+               posix_locking_close_file(fsp);
 
-               brl_close(fsp->dev, fsp->inode, pid, fsp->conn->cnum, fsp->fnum);
        }
 }
 
 /****************************************************************************
  Initialise the locking functions.
 ****************************************************************************/
+
+static int open_read_only;
+
 BOOL locking_init(int read_only)
 {
        brl_init(read_only);
 
-       if (tdb) return True;
+       if (tdb)
+               return True;
 
-       tdb = tdb_open(lock_path("locking.tdb"), 
-                      0, TDB_CLEAR_IF_FIRST
+       tdb = tdb_open_log(lock_path("locking.tdb"), 
+                      0, TDB_DEFAULT|(read_only?0x0:TDB_CLEAR_IF_FIRST)
                       read_only?O_RDONLY:O_RDWR|O_CREAT,
                       0644);
 
        if (!tdb) {
-               DEBUG(0,("ERROR: Failed to initialise share modes\n"));
+               DEBUG(0,("ERROR: Failed to initialise locking database\n"));
                return False;
        }
-       
+
+       if (!read_only && !deferred_open_tdb) {
+               deferred_open_tdb = tdb_open_log(lock_path("deferred_open.tdb"), 
+                      0, TDB_DEFAULT|TDB_CLEAR_IF_FIRST, 
+                      O_RDWR|O_CREAT,
+                      0644);
+
+               if (!deferred_open_tdb) {
+                       DEBUG(0,("ERROR: Failed to initialise deferred open database\n"));
+                       tdb_close(tdb);
+                       tdb = NULL;
+                       return False;
+               }
+       }
+
+       if (!posix_locking_init(read_only))
+               return False;
+
+       open_read_only = read_only;
+
        return True;
 }
 
 /*******************************************************************
  Deinitialize the share_mode management.
 ******************************************************************/
+
 BOOL locking_end(void)
 {
-       if (tdb && tdb_close(tdb) != 0) return False;
-       return True;
+       BOOL ret = True;
+
+       brl_shutdown(open_read_only);
+       if (tdb) {
+               if (tdb_close(tdb) != 0)
+                       ret = False;
+       }
+
+       if (deferred_open_tdb) {
+               if (tdb_close(tdb) != 0)
+                       ret = False;
+       }
+               
+       return ret;
 }
 
 /*******************************************************************
- form a static locking key for a dev/inode pair 
+ Form a static locking key for a dev/inode pair.
 ******************************************************************/
+
 static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
 {
        static struct locking_key key;
        TDB_DATA kbuf;
+
+       memset(&key, '\0', sizeof(key));
        key.dev = dev;
        key.inode = inode;
        kbuf.dptr = (char *)&key;
        kbuf.dsize = sizeof(key);
        return kbuf;
 }
+
 static TDB_DATA locking_key_fsp(files_struct *fsp)
 {
        return locking_key(fsp->dev, fsp->inode);
@@ -667,138 +389,323 @@ static TDB_DATA locking_key_fsp(files_struct *fsp)
 /*******************************************************************
  Lock a hash bucket entry.
 ******************************************************************/
+
 BOOL lock_share_entry(connection_struct *conn,
                      SMB_DEV_T dev, SMB_INO_T inode)
 {
-       return tdb_lockchain(tdb, locking_key(dev, inode)) == 0;
+       return tdb_chainlock(tdb, locking_key(dev, inode)) == 0;
 }
 
 /*******************************************************************
  Unlock a hash bucket entry.
 ******************************************************************/
-BOOL unlock_share_entry(connection_struct *conn,
+
+void unlock_share_entry(connection_struct *conn,
                        SMB_DEV_T dev, SMB_INO_T inode)
 {
-       return tdb_unlockchain(tdb, locking_key(dev, inode)) == 0;
+       tdb_chainunlock(tdb, locking_key(dev, inode));
 }
 
-
 /*******************************************************************
  Lock a hash bucket entry. use a fsp for convenience
 ******************************************************************/
+
 BOOL lock_share_entry_fsp(files_struct *fsp)
 {
-       return tdb_lockchain(tdb, locking_key(fsp->dev, fsp->inode)) == 0;
+       return tdb_chainlock(tdb, locking_key(fsp->dev, fsp->inode)) == 0;
 }
 
 /*******************************************************************
  Unlock a hash bucket entry.
 ******************************************************************/
-BOOL unlock_share_entry_fsp(files_struct *fsp)
+
+void unlock_share_entry_fsp(files_struct *fsp)
+{
+       tdb_chainunlock(tdb, locking_key(fsp->dev, fsp->inode));
+}
+
+/*******************************************************************
+ Print out a share mode.
+********************************************************************/
+
+char *share_mode_str(int num, share_mode_entry *e)
+{
+       static pstring share_str;
+
+       slprintf(share_str, sizeof(share_str)-1, "share_mode_entry[%d]: \
+pid = %lu, share_mode = 0x%x, desired_access = 0x%x, port = 0x%x, type= 0x%x, file_id = %lu, dev = 0x%x, inode = %.0f",
+       num, (unsigned long)e->pid, e->share_mode, (unsigned int)e->desired_access, e->op_port, e->op_type, e->share_file_id,
+       (unsigned int)e->dev, (double)e->inode );
+
+       return share_str;
+}
+
+/*******************************************************************
+ Print out a share mode table.
+********************************************************************/
+
+static void print_share_mode_table(struct locking_data *data)
 {
-       return tdb_unlockchain(tdb, locking_key(fsp->dev, fsp->inode)) == 0;
+       int num_share_modes = data->u.num_share_mode_entries;
+       share_mode_entry *shares = (share_mode_entry *)(data + 1);
+       int i;
+
+       for (i = 0; i < num_share_modes; i++) {
+               share_mode_entry *entry_p = &shares[i];
+               DEBUG(10,("print_share_mode_table: %s\n", share_mode_str(i, entry_p) ));
+       }
 }
 
 /*******************************************************************
  Get all share mode entries for a dev/inode pair.
 ********************************************************************/
+
 int get_share_modes(connection_struct *conn, 
                    SMB_DEV_T dev, SMB_INO_T inode, 
-                   share_mode_entry **shares)
+                   share_mode_entry **pp_shares)
 {
        TDB_DATA dbuf;
        struct locking_data *data;
-       int ret;
+       int num_share_modes;
+       share_mode_entry *shares = NULL;
+       TDB_DATA key = locking_key(dev, inode);
+       *pp_shares = NULL;
 
-       *shares = NULL;
-
-       dbuf = tdb_fetch(tdb, locking_key(dev, inode));
-       if (!dbuf.dptr) return 0;
+       dbuf = tdb_fetch(tdb, key);
+       if (!dbuf.dptr)
+               return 0;
 
        data = (struct locking_data *)dbuf.dptr;
-       ret = data->num_share_mode_entries;
-       if(ret)
-               *shares = (share_mode_entry *)memdup(dbuf.dptr + sizeof(*data), ret * sizeof(**shares));
-       free(dbuf.dptr);
+       num_share_modes = data->u.num_share_mode_entries;
+       if(num_share_modes) {
+               pstring fname;
+               int i;
+               int del_count = 0;
 
-       if (! *shares) return 0;
+               shares = (share_mode_entry *)memdup(dbuf.dptr + sizeof(*data),  
+                                               num_share_modes * sizeof(share_mode_entry));
 
-       return ret;
+               if (!shares) {
+                       SAFE_FREE(dbuf.dptr);
+                       return 0;
+               }
+
+               /* Save off the associated filename. */
+               pstrcpy(fname, dbuf.dptr + sizeof(*data) + num_share_modes * sizeof(share_mode_entry));
+
+               /*
+                * Ensure that each entry has a real process attached.
+                */
+
+               for (i = 0; i < num_share_modes; ) {
+                       share_mode_entry *entry_p = &shares[i];
+                       if (process_exists(entry_p->pid)) {
+                               DEBUG(10,("get_share_modes: %s\n", share_mode_str(i, entry_p) ));
+                               i++;
+                       } else {
+                               DEBUG(10,("get_share_modes: deleted %s\n", share_mode_str(i, entry_p) ));
+                               if (num_share_modes - i - 1 > 0) {
+                                       memcpy( &shares[i], &shares[i+1],
+                                               sizeof(share_mode_entry) * (num_share_modes - i - 1));
+                               }
+                               num_share_modes--;
+                               del_count++;
+                       }
+               }
+
+               /* Did we delete any ? If so, re-store in tdb. */
+               if (del_count) {
+                       data->u.num_share_mode_entries = num_share_modes;
+                       
+                       if (num_share_modes) {
+                               memcpy(dbuf.dptr + sizeof(*data), shares,
+                                               num_share_modes * sizeof(share_mode_entry));
+                               /* Append the filename. */
+                               pstrcpy(dbuf.dptr + sizeof(*data) + num_share_modes * sizeof(share_mode_entry), fname);
+                       }
+
+                       /* The record has shrunk a bit */
+                       dbuf.dsize -= del_count * sizeof(share_mode_entry);
+
+                       if (data->u.num_share_mode_entries == 0) {
+                               if (tdb_delete(tdb, key) == -1) {
+                                       SAFE_FREE(shares);
+                                       SAFE_FREE(dbuf.dptr);
+                                       return 0;
+                               }
+                       } else {
+                               if (tdb_store(tdb, key, dbuf, TDB_REPLACE) == -1) {
+                                       SAFE_FREE(shares);
+                                       SAFE_FREE(dbuf.dptr);
+                                       return 0;
+                               }
+                       }
+               }
+       }
+
+       SAFE_FREE(dbuf.dptr);
+       *pp_shares = shares;
+       return num_share_modes;
+}
+
+/*******************************************************************
+ Fill a share mode entry.
+********************************************************************/
+
+static void fill_share_mode(char *p, files_struct *fsp, uint16 port, uint16 op_type)
+{
+       share_mode_entry *e = (share_mode_entry *)p;
+       void *x = &e->time; /* Needed to force alignment. p may not be aligned.... */
+
+       memset(e, '\0', sizeof(share_mode_entry));
+       e->pid = sys_getpid();
+       e->share_mode = fsp->share_mode;
+       e->desired_access = fsp->desired_access;
+       e->op_port = port;
+       e->op_type = op_type;
+       memcpy(x, &fsp->open_time, sizeof(struct timeval));
+       e->share_file_id = fsp->file_id;
+       e->dev = fsp->dev;
+       e->inode = fsp->inode;
+}
+
+/*******************************************************************
+ Check if two share mode entries are identical, ignoring oplock 
+ and port info and desired_access.
+********************************************************************/
+
+BOOL share_modes_identical( share_mode_entry *e1, share_mode_entry *e2)
+{
+#if 1 /* JRA PARANOIA TEST - REMOVE LATER */
+       if (e1->pid == e2->pid &&
+               e1->share_file_id == e2->share_file_id &&
+               e1->dev == e2->dev &&
+               e1->inode == e2->inode &&
+               (e1->share_mode & ~DELETE_ON_CLOSE_FLAG) != (e2->share_mode & ~DELETE_ON_CLOSE_FLAG)) {
+                       DEBUG(0,("PANIC: share_modes_identical: share_mode missmatch (e1 = %u, e2 = %u). Logic error.\n",
+                               (unsigned int)(e1->share_mode & ~DELETE_ON_CLOSE_FLAG),
+                               (unsigned int)(e2->share_mode & ~DELETE_ON_CLOSE_FLAG) ));
+               smb_panic("PANIC: share_modes_identical logic error.\n");
+       }
+#endif
+
+       return (e1->pid == e2->pid &&
+               (e1->share_mode & ~DELETE_ON_CLOSE_FLAG) == (e2->share_mode & ~DELETE_ON_CLOSE_FLAG) &&
+               e1->dev == e2->dev &&
+               e1->inode == e2->inode &&
+               e1->share_file_id == e2->share_file_id );
 }
 
 /*******************************************************************
- Del the share mode of a file for this process
+ Delete a specific share mode. Return the number
+ of entries left, and a memdup'ed copy of the entry deleted (if required).
+ Ignore if no entry deleted.
 ********************************************************************/
-void del_share_mode(files_struct *fsp)
+
+ssize_t del_share_entry( SMB_DEV_T dev, SMB_INO_T inode,
+                       share_mode_entry *entry, share_mode_entry **ppse)
 {
        TDB_DATA dbuf;
        struct locking_data *data;
        int i, del_count=0;
        share_mode_entry *shares;
-       pid_t pid = getpid();
+       ssize_t count = 0;
+       TDB_DATA key = locking_key(dev, inode);
+
+       if (ppse)
+               *ppse = NULL;
 
        /* read in the existing share modes */
-       dbuf = tdb_fetch(tdb, locking_key_fsp(fsp));
-       if (!dbuf.dptr) return;
+       dbuf = tdb_fetch(tdb, key);
+       if (!dbuf.dptr)
+               return -1;
 
        data = (struct locking_data *)dbuf.dptr;
        shares = (share_mode_entry *)(dbuf.dptr + sizeof(*data));
 
-       /* find any with our pid and delete it by overwriting with the rest of the data 
-          from the record */
-       for (i=0;i<data->num_share_mode_entries;) {
-               if (shares[i].pid == pid &&
-                   memcmp(&shares[i].time, 
-                          &fsp->open_time,sizeof(struct timeval)) == 0) {
-                       data->num_share_mode_entries--;
-                       memmove(&shares[i], &shares[i+1], 
-                               dbuf.dsize - (sizeof(*data) + (i+1)*sizeof(*shares)));
+       /*
+        * Find any with this pid and delete it
+        * by overwriting with the rest of the data 
+        * from the record.
+        */
+
+       DEBUG(10,("del_share_entry: num_share_modes = %d\n", data->u.num_share_mode_entries ));
+
+       for (i=0;i<data->u.num_share_mode_entries;) {
+               if (share_modes_identical(&shares[i], entry)) {
+                       DEBUG(10,("del_share_entry: deleted %s\n",
+                               share_mode_str(i, &shares[i]) ));
+                       if (ppse)
+                               *ppse = memdup(&shares[i], sizeof(*shares));
+                       data->u.num_share_mode_entries--;
+                       if ((dbuf.dsize - (sizeof(*data) + (i+1)*sizeof(*shares))) > 0) {
+                               memmove(&shares[i], &shares[i+1], 
+                                       dbuf.dsize - (sizeof(*data) + (i+1)*sizeof(*shares)));
+                       }
                        del_count++;
+
+                       DEBUG(10,("del_share_entry: deleting entry %d\n", i ));
+
                } else {
                        i++;
                }
        }
 
-       /* the record has shrunk a bit */
-       dbuf.dsize -= del_count * sizeof(*shares);
+       if (del_count) {
+               /* the record may have shrunk a bit */
+               dbuf.dsize -= del_count * sizeof(*shares);
 
-       /* store it back in the database */
-       if (data->num_share_mode_entries == 0) {
-               tdb_delete(tdb, locking_key_fsp(fsp));
-       } else {
-               tdb_store(tdb, locking_key_fsp(fsp), dbuf, TDB_REPLACE);
-       }
+               count = (ssize_t)data->u.num_share_mode_entries;
 
-       free(dbuf.dptr);
+               /* store it back in the database */
+               if (data->u.num_share_mode_entries == 0) {
+                       if (tdb_delete(tdb, key) == -1)
+                               count = -1;
+               } else {
+                       if (tdb_store(tdb, key, dbuf, TDB_REPLACE) == -1)
+                               count = -1;
+               }
+       }
+       DEBUG(10,("del_share_entry: Remaining table.\n"));
+       print_share_mode_table((struct locking_data *)dbuf.dptr);
+       SAFE_FREE(dbuf.dptr);
+       return count;
 }
 
 /*******************************************************************
-fill a share mode entry
+ Del the share mode of a file for this process. Return the number
+ of entries left, and a memdup'ed copy of the entry deleted.
 ********************************************************************/
-static void fill_share_mode(char *p, files_struct *fsp, uint16 port, uint16 op_type)
+
+ssize_t del_share_mode(files_struct *fsp, share_mode_entry **ppse)
 {
-       share_mode_entry *e = (share_mode_entry *)p;
-       e->pid = getpid();
-       e->share_mode = fsp->share_mode;
-       e->op_port = port;
-       e->op_type = op_type;
-       memcpy((char *)&e->time, (char *)&fsp->open_time, sizeof(struct timeval));
+       share_mode_entry entry;
+
+       /*
+        * Fake up a share_mode_entry for comparisons.
+        */
+
+       fill_share_mode((char *)&entry, fsp, 0, 0);
+       return del_share_entry(fsp->dev, fsp->inode, &entry, ppse);
 }
 
 /*******************************************************************
  Set the share mode of a file. Return False on fail, True on success.
 ********************************************************************/
+
 BOOL set_share_mode(files_struct *fsp, uint16 port, uint16 op_type)
 {
        TDB_DATA dbuf;
        struct locking_data *data;
-       share_mode_entry *shares;
        char *p=NULL;
        int size;
+       TDB_DATA key = locking_key_fsp(fsp);
+       BOOL ret = True;
                
        /* read in the existing share modes if any */
-       dbuf = tdb_fetch(tdb, locking_key_fsp(fsp));
+       dbuf = tdb_fetch(tdb, key);
        if (!dbuf.dptr) {
+               size_t offset;
                /* we'll need to create a new record */
                pstring fname;
 
@@ -806,44 +713,63 @@ BOOL set_share_mode(files_struct *fsp, uint16 port, uint16 op_type)
                pstrcat(fname, "/");
                pstrcat(fname, fsp->fsp_name);
 
-               size = sizeof(*data) + sizeof(*shares) + strlen(fname) + 1;
-               p = (char *)malloc(size);
+               size = sizeof(*data) + sizeof(share_mode_entry) + strlen(fname) + 1;
+               p = (char *)SMB_MALLOC(size);
+               if (!p)
+                       return False;
                data = (struct locking_data *)p;
-               shares = (share_mode_entry *)(p + sizeof(*data));
-               data->num_share_mode_entries = 1;
-               pstrcpy(p + sizeof(*data) + sizeof(*shares), fname);
+               data->u.num_share_mode_entries = 1;
+       
+               DEBUG(10,("set_share_mode: creating entry for file %s. num_share_modes = 1\n",
+                       fsp->fsp_name ));
+
+               offset = sizeof(*data) + sizeof(share_mode_entry);
+               safe_strcpy(p + offset, fname, size - offset - 1);
                fill_share_mode(p + sizeof(*data), fsp, port, op_type);
                dbuf.dptr = p;
                dbuf.dsize = size;
-               tdb_store(tdb, locking_key_fsp(fsp), dbuf, TDB_REPLACE);
-               free(p);
-               return True;
+               if (tdb_store(tdb, key, dbuf, TDB_REPLACE) == -1)
+                       ret = False;
+
+               print_share_mode_table((struct locking_data *)p);
+
+               SAFE_FREE(p);
+               return ret;
        }
 
        /* we're adding to an existing entry - this is a bit fiddly */
        data = (struct locking_data *)dbuf.dptr;
-       shares = (share_mode_entry *)(dbuf.dptr + sizeof(*data));
 
-       data->num_share_mode_entries++;
-       size = dbuf.dsize + sizeof(*shares);
-       p = malloc(size);
+       data->u.num_share_mode_entries++;
+       
+       DEBUG(10,("set_share_mode: adding entry for file %s. new num_share_modes = %d\n",
+               fsp->fsp_name, data->u.num_share_mode_entries ));
+
+       size = dbuf.dsize + sizeof(share_mode_entry);
+       p = SMB_MALLOC(size);
+       if (!p) {
+               SAFE_FREE(dbuf.dptr);
+               return False;
+       }
        memcpy(p, dbuf.dptr, sizeof(*data));
        fill_share_mode(p + sizeof(*data), fsp, port, op_type);
-       memcpy(p + sizeof(*data) + sizeof(*shares), dbuf.dptr + sizeof(*data),
+       memcpy(p + sizeof(*data) + sizeof(share_mode_entry), dbuf.dptr + sizeof(*data),
               dbuf.dsize - sizeof(*data));
-       free(dbuf.dptr);
+       SAFE_FREE(dbuf.dptr);
        dbuf.dptr = p;
        dbuf.dsize = size;
-       tdb_store(tdb, locking_key_fsp(fsp), dbuf, TDB_REPLACE);
-       free(p);
-       return True;
+       if (tdb_store(tdb, key, dbuf, TDB_REPLACE) == -1)
+               ret = False;
+       print_share_mode_table((struct locking_data *)p);
+       SAFE_FREE(p);
+       return ret;
 }
 
-
 /*******************************************************************
-a generic in-place modification call for share mode entries
+ A generic in-place modification call for share mode entries.
 ********************************************************************/
-static BOOL mod_share_mode(files_struct *fsp,
+
+static BOOL mod_share_mode( SMB_DEV_T dev, SMB_INO_T inode, share_mode_entry *entry,
                           void (*mod_fn)(share_mode_entry *, SMB_DEV_T, SMB_INO_T, void *),
                           void *param)
 {
@@ -851,45 +777,46 @@ static BOOL mod_share_mode(files_struct *fsp,
        struct locking_data *data;
        int i;
        share_mode_entry *shares;
-       pid_t pid = getpid();
-       int need_store=0;
+       BOOL need_store=False;
+       BOOL ret = True;
+       TDB_DATA key = locking_key(dev, inode);
 
        /* read in the existing share modes */
-       dbuf = tdb_fetch(tdb, locking_key_fsp(fsp));
-       if (!dbuf.dptr) return False;
+       dbuf = tdb_fetch(tdb, key);
+       if (!dbuf.dptr)
+               return False;
 
        data = (struct locking_data *)dbuf.dptr;
        shares = (share_mode_entry *)(dbuf.dptr + sizeof(*data));
 
        /* find any with our pid and call the supplied function */
-       for (i=0;i<data->num_share_mode_entries;i++) {
-               if (pid == shares[i].pid && 
-                   shares[i].share_mode == fsp->share_mode &&
-                   memcmp(&shares[i].time, 
-                          &fsp->open_time,sizeof(struct timeval)) == 0) {
-                       mod_fn(&shares[i], fsp->dev, fsp->inode, param);
-                       need_store=1;
+       for (i=0;i<data->u.num_share_mode_entries;i++) {
+               if (share_modes_identical(entry, &shares[i])) {
+                       mod_fn(&shares[i], dev, inode, param);
+                       need_store=True;
                }
        }
 
        /* if the mod fn was called then store it back */
        if (need_store) {
-               if (data->num_share_mode_entries == 0) {
-                       tdb_delete(tdb, locking_key_fsp(fsp));
+               if (data->u.num_share_mode_entries == 0) {
+                       if (tdb_delete(tdb, key) == -1)
+                               ret = False;
                } else {
-                       tdb_store(tdb, locking_key_fsp(fsp), dbuf, TDB_REPLACE);
+                       if (tdb_store(tdb, key, dbuf, TDB_REPLACE) == -1)
+                               ret = False;
                }
        }
 
-       free(dbuf.dptr);
-       return need_store;
+       SAFE_FREE(dbuf.dptr);
+       return ret;
 }
 
-
 /*******************************************************************
  Static function that actually does the work for the generic function
  below.
 ********************************************************************/
+
 static void remove_share_oplock_fn(share_mode_entry *entry, SMB_DEV_T dev, SMB_INO_T inode, 
                                    void *param)
 {
@@ -903,15 +830,22 @@ static void remove_share_oplock_fn(share_mode_entry *entry, SMB_DEV_T dev, SMB_I
 /*******************************************************************
  Remove an oplock port and mode entry from a share mode.
 ********************************************************************/
+
 BOOL remove_share_oplock(files_struct *fsp)
 {
-       return mod_share_mode(fsp, remove_share_oplock_fn, NULL);
+       share_mode_entry entry;
+       /*
+        * Fake up an entry for comparisons...
+        */
+       fill_share_mode((char *)&entry, fsp, 0, 0);
+       return mod_share_mode(fsp->dev, fsp->inode, &entry, remove_share_oplock_fn, NULL);
 }
 
 /*******************************************************************
  Static function that actually does the work for the generic function
  below.
 ********************************************************************/
+
 static void downgrade_share_oplock_fn(share_mode_entry *entry, SMB_DEV_T dev, SMB_INO_T inode, 
                                    void *param)
 {
@@ -923,54 +857,401 @@ static void downgrade_share_oplock_fn(share_mode_entry *entry, SMB_DEV_T dev, SM
 /*******************************************************************
  Downgrade a oplock type from exclusive to level II.
 ********************************************************************/
+
 BOOL downgrade_share_oplock(files_struct *fsp)
 {
-       return mod_share_mode(fsp, downgrade_share_oplock_fn, NULL);
+       share_mode_entry entry;
+       /*
+        * Fake up an entry for comparisons...
+        */
+       fill_share_mode((char *)&entry, fsp, 0, 0);
+       return mod_share_mode(fsp->dev, fsp->inode, &entry, downgrade_share_oplock_fn, NULL);
 }
 
+/*******************************************************************
+ Get/Set the delete on close flag in a set of share modes.
+ Return False on fail, True on success.
+********************************************************************/
+
+BOOL modify_delete_flag( SMB_DEV_T dev, SMB_INO_T inode, BOOL delete_on_close)
+{
+       TDB_DATA dbuf;
+       struct locking_data *data;
+       int i;
+       share_mode_entry *shares;
+       TDB_DATA key = locking_key(dev, inode);
+
+       /* read in the existing share modes */
+       dbuf = tdb_fetch(tdb, key);
+       if (!dbuf.dptr)
+               return False;
+
+       data = (struct locking_data *)dbuf.dptr;
+       shares = (share_mode_entry *)(dbuf.dptr + sizeof(*data));
+
+       /* Set/Unset the delete on close element. */
+       for (i=0;i<data->u.num_share_mode_entries;i++,shares++) {
+               shares->share_mode = (delete_on_close ?
+                            (shares->share_mode | DELETE_ON_CLOSE_FLAG) :
+                            (shares->share_mode & ~DELETE_ON_CLOSE_FLAG) );
+       }
+
+       /* store it back */
+       if (data->u.num_share_mode_entries) {
+               if (tdb_store(tdb, key, dbuf, TDB_REPLACE)==-1) {
+                       SAFE_FREE(dbuf.dptr);
+                       return False;
+               }
+       }
+
+       SAFE_FREE(dbuf.dptr);
+       return True;
+}
 
 /*******************************************************************
- Static function that actually does the work for the generic function
- below.
+ Print out a deferred open entry.
 ********************************************************************/
-struct mod_val {
-       int new_share_mode;
-       uint16 new_oplock;
+
+char *deferred_open_str(int num, deferred_open_entry *e)
+{
+       static pstring de_str;
+
+       slprintf(de_str, sizeof(de_str)-1, "deferred_open_entry[%d]: \
+pid = %lu, mid = %u, dev = 0x%x, inode = %.0f, port = %u, time = [%u.%06u]",
+               num, (unsigned long)e->pid, (unsigned int)e->mid, (unsigned int)e->dev, (double)e->inode,
+               (unsigned int)e->port,
+               (unsigned int)e->time.tv_sec, (unsigned int)e->time.tv_usec );
+
+       return de_str;
+}
+
+/* Internal data structures for deferred opens... */
+
+struct de_locking_key {
+       char name[4];
+       SMB_DEV_T dev;
+       SMB_INO_T inode;
 };
 
-static void modify_share_mode_fn(share_mode_entry *entry, SMB_DEV_T dev, SMB_INO_T inode, 
-                                   void *param)
+struct deferred_open_data {
+        union {
+                int num_deferred_open_entries;
+                deferred_open_entry dummy; /* Needed for alignment. */
+        } u;
+        /* the following two entries are implicit
+           deferred_open_entry de_entries[num_deferred_open_entries];
+           char file_name[];
+        */
+};
+
+/*******************************************************************
+ Print out a deferred open table.
+********************************************************************/
+
+static void print_deferred_open_table(struct deferred_open_data *data)
 {
-       struct mod_val *mvp = (struct mod_val *)param;
+       int num_de_entries = data->u.num_deferred_open_entries;
+       deferred_open_entry *de_entries = (deferred_open_entry *)(data + 1);
+       int i;
 
-       DEBUG(10,("modify_share_mode_fn: changing share mode info from %x to %x for entry dev=%x ino=%.0f\n",
-        entry->share_mode, mvp->new_share_mode, (unsigned int)dev, (double)inode ));
-       DEBUG(10,("modify_share_mode_fn: changing oplock state from %x to %x for entry dev=%x ino=%.0f\n",
-        entry->op_type, (int)mvp->new_oplock, (unsigned int)dev, (double)inode ));
-       /* Change the share mode info. */
-       entry->share_mode = mvp->new_share_mode;
-       entry->op_type = mvp->new_oplock;
+       for (i = 0; i < num_de_entries; i++) {
+               deferred_open_entry *entry_p = &de_entries[i];
+               DEBUG(10,("print_deferred_open_table: %s\n", deferred_open_str(i, entry_p) ));
+       }
 }
 
+
 /*******************************************************************
- Modify a share mode on a file. Used by the delete open file code.
- Return False on fail, True on success.
+ Form a static deferred open locking key for a dev/inode pair.
+******************************************************************/
+
+static TDB_DATA deferred_open_locking_key(SMB_DEV_T dev, SMB_INO_T inode)
+{
+       static struct de_locking_key key;
+       TDB_DATA kbuf;
+
+       memset(&key, '\0', sizeof(key));
+       memcpy(&key.name[0], "DOE", 4);
+       key.dev = dev;
+       key.inode = inode;
+       kbuf.dptr = (char *)&key;
+       kbuf.dsize = sizeof(key);
+       return kbuf;
+}
+
+/*******************************************************************
+ Get all deferred open entries for a dev/inode pair.
+********************************************************************/
+
+int get_deferred_opens(connection_struct *conn, 
+                   SMB_DEV_T dev, SMB_INO_T inode, 
+                   deferred_open_entry **pp_de_entries)
+{
+       TDB_DATA dbuf;
+       struct deferred_open_data *data;
+       int num_de_entries;
+       deferred_open_entry *de_entries = NULL;
+       TDB_DATA key = deferred_open_locking_key(dev, inode);
+
+       *pp_de_entries = NULL;
+
+       dbuf = tdb_fetch(deferred_open_tdb, key);
+       if (!dbuf.dptr)
+               return 0;
+
+       data = (struct deferred_open_data *)dbuf.dptr;
+       num_de_entries = data->u.num_deferred_open_entries;
+       if(num_de_entries) {
+               pstring fname;
+               int i;
+               int del_count = 0;
+
+               de_entries = (deferred_open_entry *)memdup(dbuf.dptr + sizeof(*data),   
+                                               num_de_entries * sizeof(deferred_open_entry));
+
+               if (!de_entries) {
+                       SAFE_FREE(dbuf.dptr);
+                       return 0;
+               }
+
+               /* Save off the associated filename. */
+               pstrcpy(fname, dbuf.dptr + sizeof(*data) + num_de_entries * sizeof(deferred_open_entry));
+
+               /*
+                * Ensure that each entry has a real process attached.
+                */
+
+               for (i = 0; i < num_de_entries; ) {
+                       deferred_open_entry *entry_p = &de_entries[i];
+                       if (process_exists(entry_p->pid)) {
+                               DEBUG(10,("get_deferred_opens: %s\n", deferred_open_str(i, entry_p) ));
+                               i++;
+                       } else {
+                               DEBUG(10,("get_deferred_opens: deleted %s\n", deferred_open_str(i, entry_p) ));
+                               if (num_de_entries - i - 1 > 0) {
+                                       memcpy( &de_entries[i], &de_entries[i+1],
+                                               sizeof(deferred_open_entry) * (num_de_entries - i - 1));
+                               }
+                               num_de_entries--;
+                               del_count++;
+                       }
+               }
+
+               /* Did we delete any ? If so, re-store in tdb. */
+               if (del_count) {
+                       data->u.num_deferred_open_entries = num_de_entries;
+                       
+                       if (num_de_entries) {
+                               memcpy(dbuf.dptr + sizeof(*data), de_entries,
+                                               num_de_entries * sizeof(deferred_open_entry));
+                               /* Append the filename. */
+                               pstrcpy(dbuf.dptr + sizeof(*data) + num_de_entries * sizeof(deferred_open_entry), fname);
+                       }
+
+                       /* The record has shrunk a bit */
+                       dbuf.dsize -= del_count * sizeof(deferred_open_entry);
+
+                       if (data->u.num_deferred_open_entries == 0) {
+                               if (tdb_delete(deferred_open_tdb, key) == -1) {
+                                       SAFE_FREE(de_entries);
+                                       SAFE_FREE(dbuf.dptr);
+                                       return 0;
+                               }
+                       } else {
+                               if (tdb_store(deferred_open_tdb, key, dbuf, TDB_REPLACE) == -1) {
+                                       SAFE_FREE(de_entries);
+                                       SAFE_FREE(dbuf.dptr);
+                                       return 0;
+                               }
+                       }
+               }
+       }
+
+       SAFE_FREE(dbuf.dptr);
+       *pp_de_entries = de_entries;
+       return num_de_entries;
+}
+
+/*******************************************************************
+ Check if two deferred open entries are identical.
+********************************************************************/
+
+static BOOL deferred_open_entries_identical( deferred_open_entry *e1, deferred_open_entry *e2)
+{
+       return (e1->pid == e2->pid &&
+               e1->mid == e2->mid &&
+               e1->port == e2->port &&
+               e1->dev == e2->dev &&
+               e1->inode == e2->inode &&
+               e1->time.tv_sec == e2->time.tv_sec &&
+               e1->time.tv_usec == e2->time.tv_usec);
+}
+
+/*******************************************************************
+ Delete a specific deferred open entry.
+ Ignore if no entry deleted.
 ********************************************************************/
-BOOL modify_share_mode(files_struct *fsp, int new_mode, uint16 new_oplock)
+
+BOOL delete_deferred_open_entry(deferred_open_entry *entry)
 {
-       struct mod_val mv;
+       TDB_DATA dbuf;
+       struct deferred_open_data *data;
+       int i, del_count=0;
+       deferred_open_entry *de_entries;
+       BOOL ret = True;
+       TDB_DATA key = deferred_open_locking_key(entry->dev, entry->inode);
 
-       mv.new_share_mode = new_mode;
-       mv.new_oplock = new_oplock;
+       /* read in the existing share modes */
+       dbuf = tdb_fetch(deferred_open_tdb, key);
+       if (!dbuf.dptr)
+               return -1;
+
+       data = (struct deferred_open_data *)dbuf.dptr;
+       de_entries = (deferred_open_entry *)(dbuf.dptr + sizeof(*data));
+
+       /*
+        * Find any with this pid and delete it
+        * by overwriting with the rest of the data 
+        * from the record.
+        */
+
+       DEBUG(10,("delete_deferred_open_entry: num_deferred_open_entries = %d\n",
+               data->u.num_deferred_open_entries ));
+
+       for (i=0;i<data->u.num_deferred_open_entries;) {
+               if (deferred_open_entries_identical(&de_entries[i], entry)) {
+                       DEBUG(10,("delete_deferred_open_entry: deleted %s\n",
+                               deferred_open_str(i, &de_entries[i]) ));
+
+                       data->u.num_deferred_open_entries--;
+                       if ((dbuf.dsize - (sizeof(*data) + (i+1)*sizeof(*de_entries))) > 0) {
+                               memmove(&de_entries[i], &de_entries[i+1], 
+                                       dbuf.dsize - (sizeof(*data) + (i+1)*sizeof(*de_entries)));
+                       }
+                       del_count++;
+
+                       DEBUG(10,("delete_deferred_open_entry: deleting entry %d\n", i ));
+
+               } else {
+                       i++;
+               }
+       }
 
-       return mod_share_mode(fsp, modify_share_mode_fn, (void *)&mv);
+       SMB_ASSERT(del_count == 0 || del_count == 1);
+
+       if (del_count) {
+               /* the record may have shrunk a bit */
+               dbuf.dsize -= del_count * sizeof(*de_entries);
+
+               /* store it back in the database */
+               if (data->u.num_deferred_open_entries == 0) {
+                       if (tdb_delete(deferred_open_tdb, key) == -1)
+                               ret = False;
+               } else {
+                       if (tdb_store(deferred_open_tdb, key, dbuf, TDB_REPLACE) == -1)
+                               ret = False;
+               }
+       }
+       DEBUG(10,("delete_deferred_open_entry: Remaining table.\n"));
+       print_deferred_open_table((struct deferred_open_data*)dbuf.dptr);
+       SAFE_FREE(dbuf.dptr);
+       return ret;
+}
+
+/*******************************************************************
+ Fill a deferred open entry.
+********************************************************************/
+
+static void fill_deferred_open(char *p, uint16 mid, struct timeval *ptv, SMB_DEV_T dev, SMB_INO_T inode, uint16 port)
+{
+       deferred_open_entry *e = (deferred_open_entry *)p;
+       void *x = &e->time; /* Needed to force alignment. p may not be aligned.... */
+
+       memset(e, '\0', sizeof(deferred_open_entry));
+       e->mid = mid;
+       e->pid = sys_getpid();
+       memcpy(x, ptv, sizeof(struct timeval));
+       e->dev = dev;
+       e->inode = inode;
+       e->port = port;
 }
 
+/*******************************************************************
+ Add a deferred open record. Return False on fail, True on success.
+********************************************************************/
+
+BOOL add_deferred_open(uint16 mid, struct timeval *ptv, SMB_DEV_T dev, SMB_INO_T inode, uint16 port, const char *fname)
+{
+       TDB_DATA dbuf;
+       struct deferred_open_data *data;
+       char *p=NULL;
+       int size;
+       TDB_DATA key = deferred_open_locking_key(dev, inode);
+       BOOL ret = True;
+               
+       /* read in the existing deferred open records if any */
+       dbuf = tdb_fetch(deferred_open_tdb, key);
+       if (!dbuf.dptr) {
+               size_t offset;
+               /* we'll need to create a new record */
+
+               size = sizeof(*data) + sizeof(deferred_open_entry) + strlen(fname) + 1;
+               p = (char *)SMB_MALLOC(size);
+               if (!p)
+                       return False;
+               data = (struct deferred_open_data *)p;
+               data->u.num_deferred_open_entries = 1;
+       
+               DEBUG(10,("add_deferred_open: creating entry for file %s. num_deferred_open_entries = 1\n",
+                       fname ));
+
+               offset = sizeof(*data) + sizeof(deferred_open_entry);
+               safe_strcpy(p + offset, fname, size - offset - 1);
+               fill_deferred_open(p + sizeof(*data), mid, ptv, dev, inode, port);
+               dbuf.dptr = p;
+               dbuf.dsize = size;
+               if (tdb_store(deferred_open_tdb, key, dbuf, TDB_REPLACE) == -1)
+                       ret = False;
+
+               print_deferred_open_table((struct deferred_open_data *)p);
+
+               SAFE_FREE(p);
+               return ret;
+       }
+
+       /* we're adding to an existing entry - this is a bit fiddly */
+       data = (struct deferred_open_data *)dbuf.dptr;
+
+       data->u.num_deferred_open_entries++;
+       
+       DEBUG(10,("add_deferred_open: adding entry for file %s. new num_deferred_open_entries = %d\n",
+               fname, data->u.num_deferred_open_entries ));
+
+       size = dbuf.dsize + sizeof(deferred_open_entry);
+       p = SMB_MALLOC(size);
+       if (!p) {
+               SAFE_FREE(dbuf.dptr);
+               return False;
+       }
+       memcpy(p, dbuf.dptr, sizeof(*data));
+       fill_deferred_open(p + sizeof(*data), mid, ptv, dev, inode, port);
+       memcpy(p + sizeof(*data) + sizeof(deferred_open_entry), dbuf.dptr + sizeof(*data),
+              dbuf.dsize - sizeof(*data));
+       SAFE_FREE(dbuf.dptr);
+       dbuf.dptr = p;
+       dbuf.dsize = size;
+       if (tdb_store(deferred_open_tdb, key, dbuf, TDB_REPLACE) == -1)
+               ret = False;
+       print_deferred_open_table((struct deferred_open_data *)p);
+       SAFE_FREE(p);
+       return ret;
+}
 
 /****************************************************************************
-traverse the whole database with this function, calling traverse_callback
-on each share mode
+ Traverse the whole database with this function, calling traverse_callback
+ on each share mode
 ****************************************************************************/
+
 static int traverse_fn(TDB_CONTEXT *the_tdb, TDB_DATA kbuf, TDB_DATA dbuf, 
                        void* state)
 {
@@ -981,11 +1262,15 @@ static int traverse_fn(TDB_CONTEXT *the_tdb, TDB_DATA kbuf, TDB_DATA dbuf,
 
        SHAREMODE_FN(traverse_callback) = (SHAREMODE_FN_CAST())state;
 
+       /* Ensure this is a locking_key record. */
+       if (kbuf.dsize != sizeof(struct locking_key))
+               return 0;
+
        data = (struct locking_data *)dbuf.dptr;
        shares = (share_mode_entry *)(dbuf.dptr + sizeof(*data));
-       name = dbuf.dptr + sizeof(*data) + data->num_share_mode_entries*sizeof(*shares);
+       name = dbuf.dptr + sizeof(*data) + data->u.num_share_mode_entries*sizeof(*shares);
 
-       for (i=0;i<data->num_share_mode_entries;i++) {
+       for (i=0;i<data->u.num_share_mode_entries;i++) {
                traverse_callback(&shares[i], name);
        }
        return 0;
@@ -995,8 +1280,10 @@ static int traverse_fn(TDB_CONTEXT *the_tdb, TDB_DATA kbuf, TDB_DATA dbuf,
  Call the specified function on each entry under management by the
  share mode system.
 ********************************************************************/
+
 int share_mode_forall(SHAREMODE_FN(fn))
 {
-       if (!tdb) return 0;
+       if (!tdb)
+               return 0;
        return tdb_traverse(tdb, traverse_fn, (void*)fn);
 }