dns: Use new DNS debugclass in DNS server

[kai/samba.git] / source3 / locking / posix.c
diff --git a/source3/locking/posix.c b/source3/locking/posix.c

index f7a8cd3d39c4b10e38def865c56d6bab0b8aab2b..2d89110b7d343e374c4bbecb9d1b3f267d4f74a5 100644 (file)
--- a/source3/locking/posix.c
+++ b/source3/locking/posix.c
@@ -1,11 +1,11 @@
  /* 
     Unix SMB/CIFS implementation.
     Locking functions
-   Copyright (C) Jeremy Allison 1992-2000
+   Copyright (C) Jeremy Allison 1992-2006
     
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2 of the License, or
+   the Free Software Foundation; either version 3 of the License, or
     (at your option) any later version.
     
     This program is distributed in the hope that it will be useful,
@@ -14,8 +14,7 @@
     GNU General Public License for more details.
     
     You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
  
     Revision History:
  
@@ -23,709 +22,648 @@
  */
  
  #include "includes.h"
+#include "system/filesys.h"
+#include "locking/proto.h"
+#include "dbwrap/dbwrap.h"
+#include "dbwrap/dbwrap_rbt.h"
+#include "util_tdb.h"
  
-/*
- * The POSIX locking database handle.
- */
-
-static TDB_CONTEXT *posix_lock_tdb;
+#undef DBGC_CLASS
+#define DBGC_CLASS DBGC_LOCKING
  
  /*
   * The pending close database handle.
   */
  
-static TDB_CONTEXT *posix_pending_close_tdb;
-
-/*
- * The data in POSIX lock records is an unsorted linear array of these
- * records.  It is unnecessary to store the count as tdb provides the
- * size of the record.
- */
-
-struct posix_lock {
-       int fd;
-       SMB_OFF_T start;
-       SMB_OFF_T size;
-       int lock_type;
-};
-
-/*
- * The data in POSIX pending close records is an unsorted linear array of int
- * records.  It is unnecessary to store the count as tdb provides the
- * size of the record.
- */
-
-/* The key used in both the POSIX databases. */
-
-struct posix_lock_key {
-       SMB_DEV_T device;
-       SMB_INO_T inode;
-}; 
-
-/*******************************************************************
- Form a static locking key for a dev/inode pair.
-******************************************************************/
-
-static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
-{
-       static struct posix_lock_key key;
-       TDB_DATA kbuf;
-
-       memset(&key, '\0', sizeof(key));
-       key.device = dev;
-       key.inode = inode;
-       kbuf.dptr = (char *)&key;
-       kbuf.dsize = sizeof(key);
-       return kbuf;
-}
-
-/*******************************************************************
- Convenience function to get a key from an fsp.
-******************************************************************/
-
-static TDB_DATA locking_key_fsp(files_struct *fsp)
-{
-       return locking_key(fsp->dev, fsp->inode);
-}
+static struct db_context *posix_pending_close_db;
  
  /****************************************************************************
- Add an fd to the pending close tdb.
+ First - the functions that deal with the underlying system locks - these
+ functions are used no matter if we're mapping CIFS Windows locks or CIFS
+ POSIX locks onto POSIX.
  ****************************************************************************/
  
-static BOOL add_fd_to_close_entry(files_struct *fsp)
-{
-       TDB_DATA kbuf = locking_key_fsp(fsp);
-       TDB_DATA dbuf;
-       char *tp;
-
-       dbuf.dptr = NULL;
-
-       dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
-
-       tp = Realloc(dbuf.dptr, dbuf.dsize + sizeof(int));
-       if (!tp) {
-               DEBUG(0,("add_fd_to_close_entry: Realloc fail !\n"));
-               SAFE_FREE(dbuf.dptr);
-               return False;
-       } else
-               dbuf.dptr = tp;
-
-       memcpy(dbuf.dptr + dbuf.dsize, &fsp->fd, sizeof(int));
-       dbuf.dsize += sizeof(int);
-
-       if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
-               DEBUG(0,("add_fd_to_close_entry: tdb_store fail !\n"));
-       }
-
-       SAFE_FREE(dbuf.dptr);
-       return True;
-}
-
  /****************************************************************************
- Remove all fd entries for a specific dev/inode pair from the tdb.
+ Utility function to map a lock type correctly depending on the open
+ mode of a file.
  ****************************************************************************/
  
-static void delete_close_entries(files_struct *fsp)
+static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
  {
-       TDB_DATA kbuf = locking_key_fsp(fsp);
+       if((lock_type == WRITE_LOCK) && !fsp->can_write) {
+               /*
+                * Many UNIX's cannot get a write lock on a file opened read-only.
+                * Win32 locking semantics allow this.
+                * Do the best we can and attempt a read-only lock.
+                */
+               DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
+               return F_RDLCK;
+       }
+
+       /*
+        * This return should be the most normal, as we attempt
+        * to always open files read/write.
+        */
  
-       if (tdb_delete(posix_pending_close_tdb, kbuf) == -1)
-               DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
+       return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
  }
  
  /****************************************************************************
- Get the array of POSIX pending close records for an open fsp. Caller must
- free. Returns number of entries.
+ Debugging aid :-).
  ****************************************************************************/
  
-static size_t get_posix_pending_close_entries(files_struct *fsp, int **entries)
+static const char *posix_lock_type_name(int lock_type)
  {
-       TDB_DATA kbuf = locking_key_fsp(fsp);
-       TDB_DATA dbuf;
-       size_t count = 0;
-
-       *entries = NULL;
-       dbuf.dptr = NULL;
-
-       dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
-
-    if (!dbuf.dptr) {
-               return 0;
-       }
-
-       *entries = (int *)dbuf.dptr;
-       count = (size_t)(dbuf.dsize / sizeof(int));
-
-       return count;
+       return (lock_type == F_RDLCK) ? "READ" : "WRITE";
  }
  
  /****************************************************************************
- Get the array of POSIX locks for an fsp. Caller must free. Returns
- number of entries.
+ Check to see if the given unsigned lock range is within the possible POSIX
+ range. Modifies the given args to be in range if possible, just returns
+ False if not.
  ****************************************************************************/
  
-static size_t get_posix_lock_entries(files_struct *fsp, struct posix_lock **entries)
+static bool posix_lock_in_range(off_t *offset_out, off_t *count_out,
+                               uint64_t u_offset, uint64_t u_count)
  {
-       TDB_DATA kbuf = locking_key_fsp(fsp);
-       TDB_DATA dbuf;
-       size_t count = 0;
-
-       *entries = NULL;
-
-       dbuf.dptr = NULL;
+       off_t offset = (off_t)u_offset;
+       off_t count = (off_t)u_count;
  
-       dbuf = tdb_fetch(posix_lock_tdb, kbuf);
-
-    if (!dbuf.dptr) {
-               return 0;
-       }
+       /*
+        * For the type of system we are, attempt to
+        * find the maximum positive lock offset as an off_t.
+        */
  
-       *entries = (struct posix_lock *)dbuf.dptr;
-       count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
+#if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
  
-       return count;
-}
+       off_t max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
+#else
+       /*
+        * In this case off_t is 64 bits,
+        * and the underlying system can handle 64 bit signed locks.
+        */
  
-/****************************************************************************
- Deal with pending closes needed by POSIX locking support.
- Note that posix_locking_close_file() is expected to have been called
- to delete all locks on this fsp before this function is called.
-****************************************************************************/
+       off_t mask2 = ((off_t)0x4) << (SMB_OFF_T_BITS-4);
+       off_t mask = (mask2<<1);
+       off_t max_positive_lock_offset = ~mask;
  
-int fd_close_posix(struct connection_struct *conn, files_struct *fsp)
-{
-       int saved_errno = 0;
-       int ret;
-       size_t count, i;
-       struct posix_lock *entries = NULL;
-       int *fd_array = NULL;
-       BOOL locks_on_other_fds = False;
+#endif
+       /*
+        * POSIX locks of length zero mean lock to end-of-file.
+        * Win32 locks of length zero are point probes. Ignore
+        * any Win32 locks of length zero. JRA.
+        */
  
-       if (!lp_posix_locking(SNUM(conn))) {
-               /*
-                * No POSIX to worry about, just close.
-                */
-               ret = conn->vfs_ops.close(fsp,fsp->fd);
-               fsp->fd = -1;
-               return ret;
+       if (count == (off_t)0) {
+               DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
+               return False;
         }
  
         /*
-        * Get the number of outstanding POSIX locks on this dev/inode pair.
+        * If the given offset was > max_positive_lock_offset then we cannot map this at all
+        * ignore this lock.
          */
  
-       count = get_posix_lock_entries(fsp, &entries);
+       if (u_offset & ~((uint64_t)max_positive_lock_offset)) {
+               DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
+                               (double)u_offset, (double)((uint64_t)max_positive_lock_offset) ));
+               return False;
+       }
  
         /*
-        * Check if there are any outstanding locks belonging to
-        * other fd's. This should never be the case if posix_locking_close_file()
-        * has been called first, but it never hurts to be *sure*.
+        * We must truncate the count to less than max_positive_lock_offset.
          */
  
-       for (i = 0; i < count; i++) {
-               if (entries[i].fd != fsp->fd) {
-                       locks_on_other_fds = True;
-                       break;
-               }
+       if (u_count & ~((uint64_t)max_positive_lock_offset)) {
+               count = max_positive_lock_offset;
         }
  
-       if (locks_on_other_fds) {
+       /*
+        * Truncate count to end at max lock offset.
+        */
  
-               /*
-                * There are outstanding locks on this dev/inode pair on other fds.
-                * Add our fd to the pending close tdb and set fsp->fd to -1.
-                */
+       if (offset + count < 0 || offset + count > max_positive_lock_offset) {
+               count = max_positive_lock_offset - offset;
+       }
  
-               if (!add_fd_to_close_entry(fsp)) {
-                       SAFE_FREE(entries);
-                       return False;
-               }
+       /*
+        * If we ate all the count, ignore this lock.
+        */
  
-               SAFE_FREE(entries);
-               fsp->fd = -1;
-               return 0;
+       if (count == 0) {
+               DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
+                               (double)u_offset, (double)u_count ));
+               return False;
         }
  
-       SAFE_FREE(entries);
-
         /*
-        * No outstanding POSIX locks. Get the pending close fd's
-        * from the tdb and close them all.
+        * The mapping was successful.
          */
  
-       count = get_posix_pending_close_entries(fsp, &fd_array);
+       DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
+                       (double)offset, (double)count ));
  
-       if (count) {
-               DEBUG(10,("fd_close_posix: doing close on %u fd's.\n", (unsigned int)count ));
+       *offset_out = offset;
+       *count_out = count;
+       
+       return True;
+}
  
-               for(i = 0; i < count; i++) {
-                       if (conn->vfs_ops.close(fsp,fd_array[i]) == -1) {
-                               saved_errno = errno;
-                       }
-               }
+bool smb_vfs_call_lock(struct vfs_handle_struct *handle,
+                      struct files_struct *fsp, int op, off_t offset,
+                      off_t count, int type)
+{
+       VFS_FIND(lock);
+       return handle->fns->lock_fn(handle, fsp, op, offset, count, type);
+}
  
-               /*
-                * Delete all fd's stored in the tdb
-                * for this dev/inode pair.
-                */
+/****************************************************************************
+ Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
+ broken NFS implementations.
+****************************************************************************/
  
-               delete_close_entries(fsp);
-       }
+static bool posix_fcntl_lock(files_struct *fsp, int op, off_t offset, off_t count, int type)
+{
+       bool ret;
  
-       SAFE_FREE(fd_array);
+       DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fh->fd,op,(double)offset,(double)count,type));
  
-       /*
-        * Finally close the fd associated with this fsp.
-        */
+       ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
  
-       ret = conn->vfs_ops.close(fsp,fsp->fd);
+       if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
  
-       if (saved_errno != 0) {
-        errno = saved_errno;
-               ret = -1;
-    } 
+               DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
+                                       (double)offset,(double)count));
+               DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
+               DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
  
-       fsp->fd = -1;
+               /*
+                * If the offset is > 0x7FFFFFFF then this will cause problems on
+                * 32 bit NFS mounted filesystems. Just ignore it.
+                */
  
+               if (offset & ~((off_t)0x7fffffff)) {
+                       DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
+                       return True;
+               }
+
+               if (count & ~((off_t)0x7fffffff)) {
+                       /* 32 bit NFS file system, retry with smaller offset */
+                       DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
+                       errno = 0;
+                       count &= 0x7fffffff;
+                       ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
+               }
+       }
+
+       DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
         return ret;
  }
  
-/****************************************************************************
- Debugging aid :-).
-****************************************************************************/
-
-static const char *posix_lock_type_name(int lock_type)
+bool smb_vfs_call_getlock(struct vfs_handle_struct *handle,
+                         struct files_struct *fsp, off_t *poffset,
+                         off_t *pcount, int *ptype, pid_t *ppid)
  {
-       return (lock_type == F_RDLCK) ? "READ" : "WRITE";
+       VFS_FIND(getlock);
+       return handle->fns->getlock_fn(handle, fsp, poffset, pcount, ptype, 
+                                      ppid);
  }
  
  /****************************************************************************
- Delete a POSIX lock entry by index number. Used if the tdb add succeeds, but
- then the POSIX fcntl lock fails.
+ Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
+ broken NFS implementations.
  ****************************************************************************/
  
-static BOOL delete_posix_lock_entry_by_index(files_struct *fsp, size_t entry)
+static bool posix_fcntl_getlock(files_struct *fsp, off_t *poffset, off_t *pcount, int *ptype)
  {
-       TDB_DATA kbuf = locking_key_fsp(fsp);
-       TDB_DATA dbuf;
-       struct posix_lock *locks;
-       size_t count;
+       pid_t pid;
+       bool ret;
  
-       dbuf.dptr = NULL;
-       
-       dbuf = tdb_fetch(posix_lock_tdb, kbuf);
+       DEBUG(8,("posix_fcntl_getlock %d %.0f %.0f %d\n",
+               fsp->fh->fd,(double)*poffset,(double)*pcount,*ptype));
  
-       if (!dbuf.dptr) {
-               DEBUG(10,("delete_posix_lock_entry_by_index: tdb_fetch failed !\n"));
-               goto fail;
-       }
+       ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
  
-       count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
-       locks = (struct posix_lock *)dbuf.dptr;
+       if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
  
-       if (count == 1) {
-               tdb_delete(posix_lock_tdb, kbuf);
-       } else {
-               if (entry < count-1) {
-                       memmove(&locks[entry], &locks[entry+1], sizeof(*locks)*((count-1) - entry));
-               }
-               dbuf.dsize -= sizeof(*locks);
-               tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
-       }
+               DEBUG(0,("posix_fcntl_getlock: WARNING: lock request at offset %.0f, length %.0f returned\n",
+                                       (double)*poffset,(double)*pcount));
+               DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
+               DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
+
+               /*
+                * If the offset is > 0x7FFFFFFF then this will cause problems on
+                * 32 bit NFS mounted filesystems. Just ignore it.
+                */
  
-       SAFE_FREE(dbuf.dptr);
+               if (*poffset & ~((off_t)0x7fffffff)) {
+                       DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
+                       return True;
+               }
  
-       return True;
+               if (*pcount & ~((off_t)0x7fffffff)) {
+                       /* 32 bit NFS file system, retry with smaller offset */
+                       DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
+                       errno = 0;
+                       *pcount &= 0x7fffffff;
+                       ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
+               }
+       }
  
- fail:
-    SAFE_FREE(dbuf.dptr);
-    return False;
+       DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
+       return ret;
  }
  
  /****************************************************************************
- Add an entry into the POSIX locking tdb. We return the index number of the
- added lock (used in case we need to delete *exactly* this entry). Returns
- False on fail, True on success.
+ POSIX function to see if a file region is locked. Returns True if the
+ region is locked, False otherwise.
  ****************************************************************************/
  
-static BOOL add_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, int lock_type, size_t *pentry_num)
+bool is_posix_locked(files_struct *fsp,
+                       uint64_t *pu_offset,
+                       uint64_t *pu_count,
+                       enum brl_type *plock_type,
+                       enum brl_flavour lock_flav)
  {
-       TDB_DATA kbuf = locking_key_fsp(fsp);
-       TDB_DATA dbuf;
-       struct posix_lock pl;
-       char *tp;
-
-       dbuf.dptr = NULL;
-
-       dbuf = tdb_fetch(posix_lock_tdb, kbuf);
+       off_t offset;
+       off_t count;
+       int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
  
-       *pentry_num = (size_t)(dbuf.dsize / sizeof(pl));
+       DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, "
+                 "type = %s\n", fsp_str_dbg(fsp), (double)*pu_offset,
+                 (double)*pu_count,  posix_lock_type_name(*plock_type)));
  
         /*
-        * Add new record.
+        * If the requested lock won't fit in the POSIX range, we will
+        * never set it, so presume it is not locked.
          */
  
-       pl.fd = fsp->fd;
-       pl.start = start;
-       pl.size = size;
-       pl.lock_type = lock_type;
-
-       tp = Realloc(dbuf.dptr, dbuf.dsize + sizeof(pl));
-       if (!tp) {
-               DEBUG(0,("add_posix_lock_entry: Realloc fail !\n"));
-               goto fail;
-       } else
-               dbuf.dptr = tp;
-
-       memcpy(dbuf.dptr + dbuf.dsize, &pl, sizeof(pl));
-       dbuf.dsize += sizeof(pl);
-
-       if (tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
-               DEBUG(0,("add_posix_lock: Failed to add lock entry on file %s\n", fsp->fsp_name));
-               goto fail;
+       if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
+               return False;
         }
  
-    SAFE_FREE(dbuf.dptr);
-
-       DEBUG(10,("add_posix_lock: File %s: type = %s: start=%.0f size=%.0f: dev=%.0f inode=%.0f\n",
-                       fsp->fsp_name, posix_lock_type_name(lock_type), (double)start, (double)size,
-                       (double)fsp->dev, (double)fsp->inode ));
+       if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
+               return False;
+       }
  
-    return True;
+       if (posix_lock_type == F_UNLCK) {
+               return False;
+       }
  
- fail:
-    SAFE_FREE(dbuf.dptr);
-    return False;
+       if (lock_flav == POSIX_LOCK) {
+               /* Only POSIX lock queries need to know the details. */
+               *pu_offset = (uint64_t)offset;
+               *pu_count = (uint64_t)count;
+               *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
+       }
+       return True;
  }
  
  /****************************************************************************
- Calculate if locks have any overlap at all.
+ Next - the functions that deal with in memory database storing representations
+ of either Windows CIFS locks or POSIX CIFS locks.
  ****************************************************************************/
  
-static BOOL does_lock_overlap(SMB_OFF_T start1, SMB_OFF_T size1, SMB_OFF_T start2, SMB_OFF_T size2)
+/* The key used in the in-memory POSIX databases. */
+
+struct lock_ref_count_key {
+       struct file_id id;
+       char r;
+}; 
+
+/*******************************************************************
+ Form a static locking key for a dev/inode pair for the lock ref count
+******************************************************************/
+
+static TDB_DATA locking_ref_count_key_fsp(files_struct *fsp,
+                                         struct lock_ref_count_key *tmp)
  {
-       if (start1 >= start2 && start1 <= start2 + size2)
-               return True;
+       ZERO_STRUCTP(tmp);
+       tmp->id = fsp->file_id;
+       tmp->r = 'r';
+       return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
+}
  
-       if (start1 < start2 && start1 + size1 > start2)
-               return True;
+/*******************************************************************
+ Convenience function to get an fd_array key from an fsp.
+******************************************************************/
  
-       return False;
+static TDB_DATA fd_array_key_fsp(files_struct *fsp)
+{
+       return make_tdb_data((uint8 *)&fsp->file_id, sizeof(fsp->file_id));
  }
  
-/****************************************************************************
- Delete an entry from the POSIX locking tdb. Returns a copy of the entry being
- deleted and the number of records that are overlapped by this one, or -1 on error.
-****************************************************************************/
+/*******************************************************************
+ Create the in-memory POSIX lock databases.
+********************************************************************/
  
-static int delete_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, struct posix_lock *pl)
+bool posix_locking_init(bool read_only)
  {
-       TDB_DATA kbuf = locking_key_fsp(fsp);
-       TDB_DATA dbuf;
-       struct posix_lock *locks;
-       size_t i, count;
-       BOOL found = False;
-       int num_overlapping_records = 0;
+       if (posix_pending_close_db != NULL) {
+               return true;
+       }
  
-       dbuf.dptr = NULL;
-       
-       dbuf = tdb_fetch(posix_lock_tdb, kbuf);
+       posix_pending_close_db = db_open_rbt(NULL);
  
-       if (!dbuf.dptr) {
-               DEBUG(10,("delete_posix_lock_entry: tdb_fetch failed !\n"));
-               goto fail;
+       if (posix_pending_close_db == NULL) {
+               DEBUG(0,("Failed to open POSIX pending close database.\n"));
+               return false;
         }
  
-       /* There are existing locks - find a match. */
-       locks = (struct posix_lock *)dbuf.dptr;
-       count = (size_t)(dbuf.dsize / sizeof(*locks));
+       return true;
+}
  
+/*******************************************************************
+ Delete the in-memory POSIX lock databases.
+********************************************************************/
+
+bool posix_locking_end(void)
+{
         /*
-        * Search for and delete the first record that matches the
-        * unlock criteria.
+        * Shouldn't we close all fd's here?
          */
+       TALLOC_FREE(posix_pending_close_db);
+       return true;
+}
  
-       for (i=0; i<count; i++) { 
-               struct posix_lock *entry = &locks[i];
+/****************************************************************************
+ Next - the functions that deal with storing fd's that have outstanding
+ POSIX locks when closed.
+****************************************************************************/
  
-               if (entry->fd == fsp->fd &&
-                       entry->start == start &&
-                       entry->size == size) {
+/****************************************************************************
+ The records in posix_pending_close_db are composed of an array of
+ ints keyed by dev/ino pair. Those ints are the fd's that were open on
+ this dev/ino pair that should have been closed, but can't as the lock
+ ref count is non zero.
+****************************************************************************/
  
-                       /* Make a copy if requested. */
-                       if (pl)
-                               *pl = *entry;
+/****************************************************************************
+ Keep a reference count of the number of Windows locks open on this dev/ino
+ pair. Creates entry if it doesn't exist.
+****************************************************************************/
  
-                       /* Found it - delete it. */
-                       if (count == 1) {
-                               tdb_delete(posix_lock_tdb, kbuf);
-                       } else {
-                               if (i < count-1) {
-                                       memmove(&locks[i], &locks[i+1], sizeof(*locks)*((count-1) - i));
-                               }
-                               dbuf.dsize -= sizeof(*locks);
-                               tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
-                       }
-                       count--;
-                       found = True;
-                       break;
-               }
-       }
+static void increment_windows_lock_ref_count(files_struct *fsp)
+{
+       struct lock_ref_count_key tmp;
+       int32_t lock_ref_count = 0;
+       NTSTATUS status;
  
-       if (!found)
-               goto fail;
+       status = dbwrap_change_int32_atomic(
+               posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
+               &lock_ref_count, 1);
  
-       /*
-        * Count the number of entries that are
-        * overlapped by this unlock request.
-        */
+       SMB_ASSERT(NT_STATUS_IS_OK(status));
+       SMB_ASSERT(lock_ref_count < INT32_MAX);
  
-       for (i = 0; i < count; i++) {
-               struct posix_lock *entry = &locks[i];
+       DEBUG(10,("increment_windows_lock_ref_count for file now %s = %d\n",
+                 fsp_str_dbg(fsp), (int)lock_ref_count));
+}
  
-               if (fsp->fd == entry->fd &&
-                       does_lock_overlap( start, size, entry->start, entry->size))
-                               num_overlapping_records++;
-       }
+/****************************************************************************
+ Bulk delete - subtract as many locks as we've just deleted.
+****************************************************************************/
  
-       DEBUG(10,("delete_posix_lock_entry: type = %s: start=%.0f size=%.0f, num_records = %d\n",
-                       posix_lock_type_name(pl->lock_type), (double)pl->start, (double)pl->size,
-                               (unsigned int)num_overlapping_records ));
+static void decrement_windows_lock_ref_count(files_struct *fsp)
+{
+       struct lock_ref_count_key tmp;
+       int32_t lock_ref_count = 0;
+       NTSTATUS status;
  
-    SAFE_FREE(dbuf.dptr);
+       status = dbwrap_change_int32_atomic(
+               posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
+               &lock_ref_count, -1);
  
-       return num_overlapping_records;
+       SMB_ASSERT(NT_STATUS_IS_OK(status));
+       SMB_ASSERT(lock_ref_count >= 0);
  
- fail:
-    SAFE_FREE(dbuf.dptr);
-    return -1;
+       DEBUG(10,("reduce_windows_lock_ref_count for file now %s = %d\n",
+                 fsp_str_dbg(fsp), (int)lock_ref_count));
  }
  
  /****************************************************************************
- Utility function to map a lock type correctly depending on the open
- mode of a file.
+ Fetch the lock ref count.
  ****************************************************************************/
  
-static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
+static int32_t get_windows_lock_ref_count(files_struct *fsp)
  {
-       if((lock_type == WRITE_LOCK) && !fsp->can_write) {
-               /*
-                * Many UNIX's cannot get a write lock on a file opened read-only.
-                * Win32 locking semantics allow this.
-                * Do the best we can and attempt a read-only lock.
-                */
-               DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
-               return F_RDLCK;
-       } else if((lock_type == READ_LOCK) && !fsp->can_read) {
-               /*
-                * Ditto for read locks on write only files.
-                */
-               DEBUG(10,("map_posix_lock_type: Changing read lock to write due to write-only file.\n"));
-               return F_WRLCK;
+       struct lock_ref_count_key tmp;
+       NTSTATUS status;
+       int32_t lock_ref_count = 0;
+
+       status = dbwrap_fetch_int32(
+               posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
+               &lock_ref_count);
+
+       if (!NT_STATUS_IS_OK(status) &&
+           !NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
+               DEBUG(0, ("get_windows_lock_ref_count: Error fetching "
+                         "lock ref count for file %s: %s\n",
+                         fsp_str_dbg(fsp), nt_errstr(status)));
         }
-
-  /*
-   * This return should be the most normal, as we attempt
-   * to always open files read/write.
-   */
-
-  return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
+       return lock_ref_count;
  }
  
  /****************************************************************************
- Check to see if the given unsigned lock range is within the possible POSIX
- range. Modifies the given args to be in range if possible, just returns
- False if not.
+ Delete a lock_ref_count entry.
  ****************************************************************************/
  
-static BOOL posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
-                                                               SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
+static void delete_windows_lock_ref_count(files_struct *fsp)
  {
-       SMB_OFF_T offset = (SMB_OFF_T)u_offset;
-       SMB_OFF_T count = (SMB_OFF_T)u_count;
+       struct lock_ref_count_key tmp;
  
-       /*
-        * For the type of system we are, attempt to
-        * find the maximum positive lock offset as an SMB_OFF_T.
-        */
+       /* Not a bug if it doesn't exist - no locks were ever granted. */
  
-#if defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
+       dbwrap_delete(posix_pending_close_db,
+                     locking_ref_count_key_fsp(fsp, &tmp));
  
-       /*
-        * In this case SMB_OFF_T is 64 bits,
-        * and the underlying system can handle 64 bit signed locks.
-        */
-
-    SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
-    SMB_OFF_T mask = (mask2<<1);
-    SMB_OFF_T max_positive_lock_offset = ~mask;
-
-#else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
-
-       /*
-        * In this case either SMB_OFF_T is 32 bits,
-        * or the underlying system cannot handle 64 bit signed locks.
-        * All offsets & counts must be 2^31 or less.
-        */
-
-    SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
-
-#endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
-
-       /*
-        * POSIX locks of length zero mean lock to end-of-file.
-        * Win32 locks of length zero are point probes. Ignore
-        * any Win32 locks of length zero. JRA.
-        */
-
-       if (count == (SMB_OFF_T)0) {
-               DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
-               return False;
-       }
-
-       /*
-        * If the given offset was > max_positive_lock_offset then we cannot map this at all
-        * ignore this lock.
-        */
-
-       if (u_offset & ~((SMB_BIG_UINT)max_positive_lock_offset)) {
-               DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
-                               (double)u_offset, (double)((SMB_BIG_UINT)max_positive_lock_offset) ));
-               return False;
-       }
+       DEBUG(10,("delete_windows_lock_ref_count for file %s\n",
+                 fsp_str_dbg(fsp)));
+}
  
-       /*
-        * We must truncate the offset and count to less than max_positive_lock_offset.
-        */
+/****************************************************************************
+ Add an fd to the pending close tdb.
+****************************************************************************/
  
-       offset &= max_positive_lock_offset;
-       count &= max_positive_lock_offset;
+static void add_fd_to_close_entry(files_struct *fsp)
+{
+       struct db_record *rec;
+       int *fds;
+       size_t num_fds;
+       NTSTATUS status;
+       TDB_DATA value;
  
+       rec = dbwrap_fetch_locked(
+               posix_pending_close_db, talloc_tos(),
+               fd_array_key_fsp(fsp));
  
-       /*
-        * Deal with a very common case of count of all ones.
-        * (lock entire file).
-        */
+       SMB_ASSERT(rec != NULL);
  
-       if(count == (SMB_OFF_T)-1)
-               count = max_positive_lock_offset;
+       value = dbwrap_record_get_value(rec);
+       SMB_ASSERT((value.dsize % sizeof(int)) == 0);
  
-       /*
-        * Truncate count to end at max lock offset.
-        */
+       num_fds = value.dsize / sizeof(int);
+       fds = talloc_array(rec, int, num_fds+1);
  
-       if (offset + count < 0 || offset + count > max_positive_lock_offset)
-               count = max_positive_lock_offset - offset;
+       SMB_ASSERT(fds != NULL);
  
-       /*
-        * If we ate all the count, ignore this lock.
-        */
+       memcpy(fds, value.dptr, value.dsize);
+       fds[num_fds] = fsp->fh->fd;
  
-       if (count == 0) {
-               DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
-                               (double)u_offset, (double)u_count ));
-               return False;
-       }
+       status = dbwrap_record_store(
+               rec, make_tdb_data((uint8_t *)fds, talloc_get_size(fds)), 0);
  
-       /*
-        * The mapping was successful.
-        */
+       SMB_ASSERT(NT_STATUS_IS_OK(status));
  
-       DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
-                       (double)offset, (double)count ));
+       TALLOC_FREE(rec);
  
-       *offset_out = offset;
-       *count_out = count;
-       
-       return True;
+       DEBUG(10,("add_fd_to_close_entry: added fd %d file %s\n",
+                 fsp->fh->fd, fsp_str_dbg(fsp)));
  }
  
  /****************************************************************************
- Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
- broken NFS implementations.
+ Remove all fd entries for a specific dev/inode pair from the tdb.
  ****************************************************************************/
  
-static BOOL posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
+static void delete_close_entries(files_struct *fsp)
  {
-       int ret;
-       struct connection_struct *conn = fsp->conn;
-
-       DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fd,op,(double)offset,(double)count,type));
+       struct db_record *rec;
  
-       ret = conn->vfs_ops.lock(fsp,fsp->fd,op,offset,count,type);
+       rec = dbwrap_fetch_locked(
+               posix_pending_close_db, talloc_tos(),
+               fd_array_key_fsp(fsp));
  
-       if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
+       SMB_ASSERT(rec != NULL);
+       dbwrap_record_delete(rec);
+       TALLOC_FREE(rec);
+}
  
-               DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
-                                       (double)offset,(double)count));
-               DEBUG(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
-               DEBUG(0,("on 32 bit NFS mounted file systems.\n"));
+/****************************************************************************
+ Get the array of POSIX pending close records for an open fsp. Returns number
+ of entries.
+****************************************************************************/
  
-               /*
-                * If the offset is > 0x7FFFFFFF then this will cause problems on
-                * 32 bit NFS mounted filesystems. Just ignore it.
-                */
+static size_t get_posix_pending_close_entries(TALLOC_CTX *mem_ctx,
+                                             files_struct *fsp, int **entries)
+{
+       TDB_DATA dbuf;
+       NTSTATUS status;
  
-               if (offset & ~((SMB_OFF_T)0x7fffffff)) {
-                       DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
-                       return True;
-               }
+       status = dbwrap_fetch(
+               posix_pending_close_db, mem_ctx, fd_array_key_fsp(fsp),
+               &dbuf);
  
-               if (count & ~((SMB_OFF_T)0x7fffffff)) {
-                       /* 32 bit NFS file system, retry with smaller offset */
-                       DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
-                       errno = 0;
-                       count &= 0x7fffffff;
-                       ret = conn->vfs_ops.lock(fsp,fsp->fd,op,offset,count,type);
-               }
+       if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
+               *entries = NULL;
+               return 0;
         }
  
-       DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
+       SMB_ASSERT(NT_STATUS_IS_OK(status));
  
-       return ret;
+       if (dbuf.dsize == 0) {
+               *entries = NULL;
+               return 0;
+       }
+
+       *entries = (int *)dbuf.dptr;
+       return (size_t)(dbuf.dsize / sizeof(int));
  }
  
  /****************************************************************************
- POSIX function to see if a file region is locked. Returns True if the
- region is locked, False otherwise.
+ Deal with pending closes needed by POSIX locking support.
+ Note that posix_locking_close_file() is expected to have been called
+ to delete all locks on this fsp before this function is called.
  ****************************************************************************/
  
-BOOL is_posix_locked(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
+int fd_close_posix(struct files_struct *fsp)
  {
-       SMB_OFF_T offset;
-       SMB_OFF_T count;
-       int posix_lock_type = map_posix_lock_type(fsp,lock_type);
+       int saved_errno = 0;
+       int ret;
+       int *fd_array = NULL;
+       size_t count, i;
+
+       if (!lp_locking(fsp->conn->params) ||
+           !lp_posix_locking(fsp->conn->params))
+       {
+               /*
+                * No locking or POSIX to worry about or we want POSIX semantics
+                * which will lose all locks on all fd's open on this dev/inode,
+                * just close.
+                */
+               return close(fsp->fh->fd);
+       }
+
+       if (get_windows_lock_ref_count(fsp)) {
  
-       DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
-                       fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
+               /*
+                * There are outstanding locks on this dev/inode pair on
+                * other fds. Add our fd to the pending close tdb and set
+                * fsp->fh->fd to -1.
+                */
+
+               add_fd_to_close_entry(fsp);
+               return 0;
+       }
  
         /*
-        * If the requested lock won't fit in the POSIX range, we will
-        * never set it, so presume it is not locked.
+        * No outstanding locks. Get the pending close fd's
+        * from the tdb and close them all.
          */
  
-       if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
-               return False;
+       count = get_posix_pending_close_entries(talloc_tos(), fsp, &fd_array);
+
+       if (count) {
+               DEBUG(10,("fd_close_posix: doing close on %u fd's.\n",
+                         (unsigned int)count));
+
+               for(i = 0; i < count; i++) {
+                       if (close(fd_array[i]) == -1) {
+                               saved_errno = errno;
+                       }
+               }
+
+               /*
+                * Delete all fd's stored in the tdb
+                * for this dev/inode pair.
+                */
+
+               delete_close_entries(fsp);
+       }
+
+       TALLOC_FREE(fd_array);
+
+       /* Don't need a lock ref count on this dev/ino anymore. */
+       delete_windows_lock_ref_count(fsp);
  
         /*
-        * Note that most UNIX's can *test* for a write lock on
-        * a read-only fd, just not *set* a write lock on a read-only
-        * fd. So we don't need to use map_lock_type here.
-        */ 
+        * Finally close the fd associated with this fsp.
+        */
  
-       return posix_fcntl_lock(fsp,SMB_F_GETLK,offset,count,posix_lock_type);
+       ret = close(fsp->fh->fd);
+
+       if (ret == 0 && saved_errno != 0) {
+               errno = saved_errno;
+               ret = -1;
+       }
+
+       return ret;
  }
  
+/****************************************************************************
+ Next - the functions that deal with the mapping CIFS Windows locks onto
+ the underlying system POSIX locks.
+****************************************************************************/
+
  /*
   * Structure used when splitting a lock range
   * into a POSIX lock range. Doubly linked list.
   */
  
  struct lock_list {
-    struct lock_list *next;
-    struct lock_list *prev;
-    SMB_OFF_T start;
-    SMB_OFF_T size;
+       struct lock_list *next;
+       struct lock_list *prev;
+       off_t start;
+       off_t size;
  };
  
  /****************************************************************************
@@ -734,22 +672,14 @@ struct lock_list {
   understand it :-).
  ****************************************************************************/
  
-static struct lock_list *posix_lock_list(TALLOC_CTX *ctx, struct lock_list *lhead, files_struct *fsp)
+static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
+                                               struct lock_list *lhead,
+                                               const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
+                                               files_struct *fsp,
+                                               const struct lock_struct *plocks,
+                                               int num_locks)
  {
-       TDB_DATA kbuf = locking_key_fsp(fsp);
-       TDB_DATA dbuf;
-       struct posix_lock *locks;
-       size_t num_locks, i;
-
-       dbuf.dptr = NULL;
-
-       dbuf = tdb_fetch(posix_lock_tdb, kbuf);
-
-       if (!dbuf.dptr)
-               return lhead;
-       
-       locks = (struct posix_lock *)dbuf.dptr;
-       num_locks = (size_t)(dbuf.dsize / sizeof(*locks));
+       int i;
  
         /*
          * Check the current lock list on this dev/inode pair.
@@ -760,10 +690,19 @@ static struct lock_list *posix_lock_list(TALLOC_CTX *ctx, struct lock_list *lhea
                 (double)lhead->start, (double)lhead->size ));
  
         for (i=0; i<num_locks && lhead; i++) {
-
-               struct posix_lock *lock = &locks[i];
+               const struct lock_struct *lock = &plocks[i];
                 struct lock_list *l_curr;
  
+               /* Ignore all but read/write locks. */
+               if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
+                       continue;
+               }
+
+               /* Ignore locks not owned by this process. */
+               if (!serverid_equal(&lock->context.pid, &lock_ctx->pid)) {
+                       continue;
+               }
+
                 /*
                  * Walk the lock list, checking for overlaps. Note that
                  * the lock list can expand within this loop if the current
@@ -772,13 +711,14 @@ static struct lock_list *posix_lock_list(TALLOC_CTX *ctx, struct lock_list *lhea
  
                 for (l_curr = lhead; l_curr;) {
  
-                       DEBUG(10,("posix_lock_list: lock: fd=%d: start=%.0f,size=%.0f:type=%s", lock->fd,
+                       DEBUG(10,("posix_lock_list: lock: fnum=%llu: start=%.0f,size=%.0f:type=%s",
+                               (unsigned long long)lock->fnum,
                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
  
                         if ( (l_curr->start >= (lock->start + lock->size)) ||
                                  (lock->start >= (l_curr->start + l_curr->size))) {
  
-                               /* No overlap with this lock - leave this range alone. */
+                               /* No overlap with existing lock - leave this range alone. */
  /*********************************************
                                               +---------+
                                               | l_curr  |
@@ -792,7 +732,7 @@ OR....
               +---------+
  **********************************************/
  
-                               DEBUG(10,("no overlap case.\n" ));
+                               DEBUG(10,(" no overlap case.\n" ));
  
                                 l_curr = l_curr->next;
  
@@ -800,8 +740,8 @@ OR....
                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
  
                                 /*
-                                * This unlock is completely overlapped by this existing lock range
-                                * and thus should have no effect (not be unlocked). Delete it from the list.
+                                * This range is completely overlapped by this existing lock range
+                                * and thus should have no effect. Delete it from the list.
                                  */
  /*********************************************
                  +---------+
@@ -814,11 +754,12 @@ OR....
                                 /* Save the next pointer */
                                 struct lock_list *ul_next = l_curr->next;
  
-                               DEBUG(10,("delete case.\n" ));
+                               DEBUG(10,(" delete case.\n" ));
  
                                 DLIST_REMOVE(lhead, l_curr);
-                               if(lhead == NULL)
+                               if(lhead == NULL) {
                                         break; /* No more list... */
+                               }
  
                                 l_curr = ul_next;
                                 
@@ -827,7 +768,7 @@ OR....
                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
  
                                 /*
-                                * This unlock overlaps the existing lock range at the high end.
+                                * This range overlaps the existing lock range at the high end.
                                  * Truncate by moving start to existing range end and reducing size.
                                  */
  /*********************************************
@@ -846,7 +787,7 @@ BECOMES....
                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
                                 l_curr->start = lock->start + lock->size;
  
-                               DEBUG(10,("truncate high case: start=%.0f,size=%.0f\n",
+                               DEBUG(10,(" truncate high case: start=%.0f,size=%.0f\n",
                                                                 (double)l_curr->start, (double)l_curr->size ));
  
                                 l_curr = l_curr->next;
@@ -856,7 +797,7 @@ BECOMES....
                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
  
                                 /*
-                                * This unlock overlaps the existing lock range at the low end.
+                                * This range overlaps the existing lock range at the low end.
                                  * Truncate by reducing size.
                                  */
  /*********************************************
@@ -874,7 +815,7 @@ BECOMES....
  
                                 l_curr->size = lock->start - l_curr->start;
  
-                               DEBUG(10,("truncate low case: start=%.0f,size=%.0f\n",
+                               DEBUG(10,(" truncate low case: start=%.0f,size=%.0f\n",
                                                                 (double)l_curr->start, (double)l_curr->size ));
  
                                 l_curr = l_curr->next;
@@ -882,10 +823,10 @@ BECOMES....
                         } else if ( (l_curr->start < lock->start) &&
                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
                                 /*
-                                * Worst case scenario. Unlock request completely overlaps an existing
+                                * Worst case scenario. Range completely overlaps an existing
                                  * lock range. Split the request into two, push the new (upper) request
-                                * into the dlink list, and continue with the entry after ul_new (as we
-                                * know that ul_new will not overlap with this lock).
+                                * into the dlink list, and continue with the entry after l_new (as we
+                                * know that l_new will not overlap with this lock).
                                  */
  /*********************************************
          +---------------------------+
@@ -899,8 +840,7 @@ BECOMES.....
          | l_curr|         | l_new   |
          +-------+         +---------+
  **********************************************/
-                               struct lock_list *l_new = (struct lock_list *)talloc(ctx,
-                                                                                                       sizeof(struct lock_list));
+                               struct lock_list *l_new = talloc(ctx, struct lock_list);
  
                                 if(l_new == NULL) {
                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
@@ -914,18 +854,14 @@ BECOMES.....
                                 /* Truncate the l_curr. */
                                 l_curr->size = lock->start - l_curr->start;
  
-                               DEBUG(10,("split case: curr: start=%.0f,size=%.0f \
+                               DEBUG(10,(" split case: curr: start=%.0f,size=%.0f \
  new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
                                                                 (double)l_new->start, (double)l_new->size ));
  
                                 /*
                                  * Add into the dlink list after the l_curr point - NOT at lhead. 
-                                * Note we can't use DLINK_ADD here as this inserts at the head of the given list.
                                  */
-
-                               l_new->prev = l_curr;
-                               l_new->next = l_curr->next;
-                               l_curr->next = l_new;
+                               DLIST_ADD_AFTER(lhead, l_new, l_curr);
  
                                 /* And move after the link we added. */
                                 l_curr = l_new->next;
@@ -936,18 +872,18 @@ new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
                                  * This logic case should never happen. Ensure this is the
                                  * case by forcing an abort.... Remove in production.
                                  */
-                               pstring msg;
+                               char *msg = NULL;
  
-                               slprintf(msg, sizeof(msg)-1, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
-lock: start = %.0f, size = %.0f\n", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size );
-
-                               smb_panic(msg);
+                               if (asprintf(&msg, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
+lock: start = %.0f, size = %.0f", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size ) != -1) {
+                                       smb_panic(msg);
+                               } else {
+                                       smb_panic("posix_lock_list");
+                               }
                         }
                 } /* end for ( l_curr = lhead; l_curr;) */
         } /* end for (i=0; i<num_locks && ul_head; i++) */
  
-       SAFE_FREE(dbuf.dptr);
-       
         return lhead;
  }
  
@@ -956,28 +892,38 @@ lock: start = %.0f, size = %.0f\n", (double)l_curr->start, (double)l_curr->size,
   lock could be granted, False if not.
  ****************************************************************************/
  
-BOOL set_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
+bool set_posix_lock_windows_flavour(files_struct *fsp,
+                       uint64_t u_offset,
+                       uint64_t u_count,
+                       enum brl_type lock_type,
+                       const struct lock_context *lock_ctx,
+                       const struct lock_struct *plocks,
+                       int num_locks,
+                       int *errno_ret)
  {
-       SMB_OFF_T offset;
-       SMB_OFF_T count;
-       BOOL ret = True;
-       size_t entry_num = 0;
+       off_t offset;
+       off_t count;
+       int posix_lock_type = map_posix_lock_type(fsp,lock_type);
+       bool ret = True;
         size_t lock_count;
         TALLOC_CTX *l_ctx = NULL;
         struct lock_list *llist = NULL;
         struct lock_list *ll = NULL;
-       int posix_lock_type = map_posix_lock_type(fsp,lock_type);
  
-       DEBUG(5,("set_posix_lock: File %s, offset = %.0f, count = %.0f, type = %s\n",
-                       fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
+       DEBUG(5,("set_posix_lock_windows_flavour: File %s, offset = %.0f, "
+                "count = %.0f, type = %s\n", fsp_str_dbg(fsp),
+                (double)u_offset, (double)u_count,
+                posix_lock_type_name(lock_type)));
  
         /*
          * If the requested lock won't fit in the POSIX range, we will
          * pretend it was successful.
          */
  
-       if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
+       if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
+               increment_windows_lock_ref_count(fsp);
                 return True;
+       }
  
         /*
          * Windows is very strange. It allows read locks to be overlayed
@@ -994,21 +940,18 @@ BOOL set_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_cou
          *                                            READ LOCK: start =0, len = 10 - OK
          *
          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
-        * would leave a single read lock over the 0-14 region. In order to
-        * re-create Windows semantics mapped to POSIX locks, we create multiple TDB
-        * entries, one for each overlayed lock request. We are guarenteed by the brlock
-        * semantics that if a write lock is added, then it will be first in the array.
+        * would leave a single read lock over the 0-14 region.
          */
         
-       if ((l_ctx = talloc_init()) == NULL) {
-               DEBUG(0,("set_posix_lock: unable to init talloc context.\n"));
-               return True; /* Not a fatal error. */
+       if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
+               DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
+               return False;
         }
  
-       if ((ll = (struct lock_list *)talloc(l_ctx, sizeof(struct lock_list))) == NULL) {
-               DEBUG(0,("set_posix_lock: unable to talloc unlock list.\n"));
+       if ((ll = talloc(l_ctx, struct lock_list)) == NULL) {
+               DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
                 talloc_destroy(l_ctx);
-               return True; /* Not a fatal error. */
+               return False;
         }
  
         /*
@@ -1030,19 +973,12 @@ BOOL set_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_cou
          * POSIX locks.
          */
  
-       llist = posix_lock_list(l_ctx, llist, fsp);
-
-       /*
-        * Now we have the list of ranges to lock it is safe to add the
-        * entry into the POSIX lock tdb. We take note of the entry we
-        * added here in case we have to remove it on POSIX lock fail.
-        */
-
-       if (!add_posix_lock_entry(fsp,offset,count,posix_lock_type,&entry_num)) {
-               DEBUG(0,("set_posix_lock: Unable to create posix lock entry !\n"));
-               talloc_destroy(l_ctx);
-               return False;
-       }
+       llist = posix_lock_list(l_ctx,
+                               llist,
+                               lock_ctx, /* Lock context llist belongs to. */
+                               fsp,
+                               plocks,
+                               num_locks);
  
         /*
          * Add the POSIX locks on the list of ranges returned.
@@ -1054,11 +990,12 @@ BOOL set_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_cou
                 offset = ll->start;
                 count = ll->size;
  
-               DEBUG(5,("set_posix_lock: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
+               DEBUG(5,("set_posix_lock_windows_flavour: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
  
-               if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
-                       DEBUG(5,("set_posix_lock: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
+               if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
+                       *errno_ret = errno;
+                       DEBUG(5,("set_posix_lock_windows_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
                         ret = False;
                         break;
@@ -1075,17 +1012,14 @@ BOOL set_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_cou
                         offset = ll->start;
                         count = ll->size;
  
-                       DEBUG(5,("set_posix_lock: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
+                       DEBUG(5,("set_posix_lock_windows_flavour: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
  
-                       posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
+                       posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK);
                 }
-
-               /*
-                * Remove the tdb entry for this lock.
-                */
-
-               delete_posix_lock_entry_by_index(fsp,entry_num);
+       } else {
+               /* Remember the number of Windows locks we have on this dev/ino pair. */
+               increment_windows_lock_ref_count(fsp);
         }
  
         talloc_destroy(l_ctx);
@@ -1097,64 +1031,46 @@ BOOL set_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_cou
   lock could be released, False if not.
  ****************************************************************************/
  
-BOOL release_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
+bool release_posix_lock_windows_flavour(files_struct *fsp,
+                               uint64_t u_offset,
+                               uint64_t u_count,
+                               enum brl_type deleted_lock_type,
+                               const struct lock_context *lock_ctx,
+                               const struct lock_struct *plocks,
+                               int num_locks)
  {
-       SMB_OFF_T offset;
-       SMB_OFF_T count;
-       BOOL ret = True;
+       off_t offset;
+       off_t count;
+       bool ret = True;
         TALLOC_CTX *ul_ctx = NULL;
         struct lock_list *ulist = NULL;
         struct lock_list *ul = NULL;
-       struct posix_lock deleted_lock;
-       int num_overlapped_entries;
  
-       DEBUG(5,("release_posix_lock: File %s, offset = %.0f, count = %.0f\n",
-               fsp->fsp_name, (double)u_offset, (double)u_count ));
+       DEBUG(5,("release_posix_lock_windows_flavour: File %s, offset = %.0f, "
+                "count = %.0f\n", fsp_str_dbg(fsp),
+                (double)u_offset, (double)u_count));
+
+       /* Remember the number of Windows locks we have on this dev/ino pair. */
+       decrement_windows_lock_ref_count(fsp);
  
         /*
          * If the requested lock won't fit in the POSIX range, we will
          * pretend it was successful.
          */
  
-       if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
+       if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
                 return True;
-
-       /*
-        * We treat this as one unlock request for POSIX accounting purposes even
-        * if it may later be split into multiple smaller POSIX unlock ranges.
-        * num_overlapped_entries is the number of existing locks that have any
-        * overlap with this unlock request.
-        */ 
-
-       num_overlapped_entries = delete_posix_lock_entry(fsp, offset, count, &deleted_lock);
-
-       if (num_overlapped_entries == -1) {
-        smb_panic("release_posix_lock: unable find entry to delete !\n");
-       }
-
-       /*
-        * If num_overlapped_entries is > 0, and the lock_type we just deleted from the tdb was
-        * a POSIX write lock, then before doing the unlock we need to downgrade
-        * the POSIX lock to a read lock. This allows any overlapping read locks
-        * to be atomically maintained.
-        */
-
-       if (num_overlapped_entries > 0 && deleted_lock.lock_type == F_WRLCK) {
-               if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
-                       DEBUG(0,("release_posix_lock: downgrade of lock failed with error %s !\n", strerror(errno) ));
-                       return False;
-               }
         }
  
-       if ((ul_ctx = talloc_init()) == NULL) {
-               DEBUG(0,("release_posix_lock: unable to init talloc context.\n"));
-               return True; /* Not a fatal error. */
+       if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
+               DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
+               return False;
         }
  
-       if ((ul = (struct lock_list *)talloc(ul_ctx, sizeof(struct lock_list))) == NULL) {
-               DEBUG(0,("release_posix_lock: unable to talloc unlock list.\n"));
+       if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
+               DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
                 talloc_destroy(ul_ctx);
-               return True; /* Not a fatal error. */
+               return False;
         }
  
         /*
@@ -1177,7 +1093,33 @@ BOOL release_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u
          * unlocks are performed.
          */
  
-       ulist = posix_lock_list(ul_ctx, ulist, fsp);
+       ulist = posix_lock_list(ul_ctx,
+                               ulist,
+                               lock_ctx, /* Lock context ulist belongs to. */
+                               fsp,
+                               plocks,
+                               num_locks);
+
+       /*
+        * If there were any overlapped entries (list is > 1 or size or start have changed),
+        * and the lock_type we just deleted from
+        * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
+        * the POSIX lock to a read lock. This allows any overlapping read locks
+        * to be atomically maintained.
+        */
+
+       if (deleted_lock_type == WRITE_LOCK &&
+                       (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
+
+               DEBUG(5,("release_posix_lock_windows_flavour: downgrading lock to READ: offset = %.0f, count = %.0f\n",
+                       (double)offset, (double)count ));
+
+               if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_RDLCK)) {
+                       DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
+                       talloc_destroy(ul_ctx);
+                       return False;
+               }
+       }
  
         /*
          * Release the POSIX locks on the list of ranges returned.
@@ -1187,129 +1129,151 @@ BOOL release_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u
                 offset = ulist->start;
                 count = ulist->size;
  
-               DEBUG(5,("release_posix_lock: Real unlock: offset = %.0f, count = %.0f\n",
+               DEBUG(5,("release_posix_lock_windows_flavour: Real unlock: offset = %.0f, count = %.0f\n",
                         (double)offset, (double)count ));
  
-               if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK))
+               if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
                         ret = False;
+               }
         }
  
         talloc_destroy(ul_ctx);
-
         return ret;
  }
  
  /****************************************************************************
- Remove all lock entries for a specific dev/inode pair from the tdb.
+ Next - the functions that deal with mapping CIFS POSIX locks onto
+ the underlying system POSIX locks.
  ****************************************************************************/
  
-static void delete_posix_lock_entries(files_struct *fsp)
-{
-       TDB_DATA kbuf = locking_key_fsp(fsp);
-
-       if (tdb_delete(posix_lock_tdb, kbuf) == -1)
-               DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
-}
-
  /****************************************************************************
- Debug function.
+ POSIX function to acquire a lock. Returns True if the
+ lock could be granted, False if not.
+ As POSIX locks don't stack or conflict (they just overwrite)
+ we can map the requested lock directly onto a system one. We
+ know it doesn't conflict with locks on other contexts as the
+ upper layer would have refused it.
  ****************************************************************************/
  
-static void dump_entry(struct posix_lock *pl)
+bool set_posix_lock_posix_flavour(files_struct *fsp,
+                       uint64_t u_offset,
+                       uint64_t u_count,
+                       enum brl_type lock_type,
+                       int *errno_ret)
  {
-       DEBUG(10,("entry: start=%.0f, size=%.0f, type=%d, fd=%i\n",
-               (double)pl->start, (double)pl->size, (int)pl->lock_type, pl->fd ));
+       off_t offset;
+       off_t count;
+       int posix_lock_type = map_posix_lock_type(fsp,lock_type);
+
+       DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %.0f, count "
+                "= %.0f, type = %s\n", fsp_str_dbg(fsp),
+                (double)u_offset, (double)u_count,
+                posix_lock_type_name(lock_type)));
+
+       /*
+        * If the requested lock won't fit in the POSIX range, we will
+        * pretend it was successful.
+        */
+
+       if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
+               return True;
+       }
+
+       if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
+               *errno_ret = errno;
+               DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
+                       posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
+               return False;
+       }
+       return True;
  }
  
  /****************************************************************************
- Remove any locks on this fd. Called from file_close().
+ POSIX function to release a lock. Returns True if the
+ lock could be released, False if not.
+ We are given a complete lock state from the upper layer which is what the lock
+ state should be after the unlock has already been done, so what
+ we do is punch out holes in the unlock range where locks owned by this process
+ have a different lock context.
  ****************************************************************************/
  
-void posix_locking_close_file(files_struct *fsp)
+bool release_posix_lock_posix_flavour(files_struct *fsp,
+                               uint64_t u_offset,
+                               uint64_t u_count,
+                               const struct lock_context *lock_ctx,
+                               const struct lock_struct *plocks,
+                               int num_locks)
  {
-       struct posix_lock *entries = NULL;
-       size_t count, i;
+       bool ret = True;
+       off_t offset;
+       off_t count;
+       TALLOC_CTX *ul_ctx = NULL;
+       struct lock_list *ulist = NULL;
+       struct lock_list *ul = NULL;
+
+       DEBUG(5,("release_posix_lock_posix_flavour: File %s, offset = %.0f, "
+                "count = %.0f\n", fsp_str_dbg(fsp),
+                (double)u_offset, (double)u_count));
  
         /*
-        * Optimization for the common case where we are the only
-        * opener of a file. If all fd entries are our own, we don't
-        * need to explicitly release all the locks via the POSIX functions,
-        * we can just remove all the entries in the tdb and allow the
-        * close to remove the real locks.
+        * If the requested lock won't fit in the POSIX range, we will
+        * pretend it was successful.
          */
  
-       count = get_posix_lock_entries(fsp, &entries);
-
-       if (count == 0) {
-               DEBUG(10,("posix_locking_close_file: file %s has no outstanding locks.\n", fsp->fsp_name ));
-               return;
+       if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
+               return True;
         }
  
-       for (i = 0; i < count; i++) {
-               if (entries[i].fd != fsp->fd )
-                       break;
-
-               dump_entry(&entries[i]);
+       if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
+               DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
+               return False;
         }
  
-       if (i == count) {
-               /* All locks are ours. */
-               DEBUG(10,("posix_locking_close_file: file %s has %u outstanding locks, but all on one fd.\n", 
-                       fsp->fsp_name, (unsigned int)count ));
-               SAFE_FREE(entries);
-               delete_posix_lock_entries(fsp);
-               return;
+       if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
+               DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
+               talloc_destroy(ul_ctx);
+               return False;
         }
  
         /*
-        * Difficult case. We need to delete all our locks, whilst leaving
-        * all other POSIX locks in place.
+        * Create the initial list entry containing the
+        * lock we want to remove.
          */
  
-       for (i = 0; i < count; i++) {
-               struct posix_lock *pl = &entries[i];
-               if (pl->fd == fsp->fd)
-                       release_posix_lock(fsp, (SMB_BIG_UINT)pl->start, (SMB_BIG_UINT)pl->size );
-       }
-       SAFE_FREE(entries);
-}
+       ZERO_STRUCTP(ul);
+       ul->start = offset;
+       ul->size = count;
  
-/*******************************************************************
- Create the in-memory POSIX lock databases.
-********************************************************************/
+       DLIST_ADD(ulist, ul);
  
-BOOL posix_locking_init(int read_only)
-{
-       if (posix_lock_tdb && posix_pending_close_tdb)
-               return True;
-       
-       if (!posix_lock_tdb)
-               posix_lock_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
-                                         read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
-       if (!posix_lock_tdb) {
-               DEBUG(0,("Failed to open POSIX byte range locking database.\n"));
-               return False;
-       }
-       if (!posix_pending_close_tdb)
-               posix_pending_close_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
-                                                  read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
-       if (!posix_pending_close_tdb) {
-               DEBUG(0,("Failed to open POSIX pending close database.\n"));
-               return False;
-       }
+       /*
+        * Walk the given array creating a linked list
+        * of unlock requests.
+        */
  
-       return True;
-}
+       ulist = posix_lock_list(ul_ctx,
+                               ulist,
+                               lock_ctx, /* Lock context ulist belongs to. */
+                               fsp,
+                               plocks,
+                               num_locks);
  
-/*******************************************************************
- Delete the in-memory POSIX lock databases.
-********************************************************************/
+       /*
+        * Release the POSIX locks on the list of ranges returned.
+        */
  
-BOOL posix_locking_end(void)
-{
-    if (posix_lock_tdb && tdb_close(posix_lock_tdb) != 0)
-               return False;
-    if (posix_pending_close_tdb && tdb_close(posix_pending_close_tdb) != 0)
-               return False;
-       return True;
+       for(; ulist; ulist = ulist->next) {
+               offset = ulist->start;
+               count = ulist->size;
+
+               DEBUG(5,("release_posix_lock_posix_flavour: Real unlock: offset = %.0f, count = %.0f\n",
+                       (double)offset, (double)count ));
+
+               if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
+                       ret = False;
+               }
+       }
+
+       talloc_destroy(ul_ctx);
+       return ret;
  }