r23755: Fix Coverity id 384
[jra/samba/.git] / source3 / locking / brlock.c
index 7e22b8ac7aa04ebbb99f07eb9a3592b08cb283b9..9b8dcfd15e706e2ba54b42105bfb0ced1a17cca1 100644 (file)
@@ -1,6 +1,5 @@
 /* 
-   Unix SMB/Netbios implementation.
-   Version 3.0
+   Unix SMB/CIFS implementation.
    byte range locking code
    Updated to handle range splits/merges.
 
 
 #include "includes.h"
 
-extern int DEBUGLEVEL;
+#undef DBGC_CLASS
+#define DBGC_CLASS DBGC_LOCKING
 
-/* This contains elements that differentiate locks. The smbpid is a
-   client supplied pid, and is essentially the locking context for
-   this client */
-
-struct lock_context {
-       uint16 smbpid;
-       uint16 tid;
-       pid_t pid;
-};
-
-/* The data in brlock records is an unsorted linear array of these
-   records.  It is unnecessary to store the count as tdb provides the
-   size of the record */
-
-struct lock_struct {
-       struct lock_context context;
-       br_off start;
-       br_off size;
-       int fnum;
-       enum brl_type lock_type;
-};
-
-/* The key used in the brlock database. */
-
-struct lock_key {
-       SMB_DEV_T device;
-       SMB_INO_T inode;
-};
+#define ZERO_ZERO 0
 
 /* The open brlock.tdb database. */
 
-static TDB_CONTEXT *tdb;
+static struct db_context *brlock_db;
 
 /****************************************************************************
Create a locking key - ensuring zero filled for pad purposes.
Debug info at level 10 for lock struct.
 ****************************************************************************/
 
-static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
+static void print_lock_struct(unsigned int i, struct lock_struct *pls)
 {
-        static struct lock_key key;
-        TDB_DATA kbuf;
-
-        memset(&key, '\0', sizeof(key));
-        key.device = dev;
-        key.inode = inode;
-        kbuf.dptr = (char *)&key;
-        kbuf.dsize = sizeof(key);
-        return kbuf;
+       DEBUG(10,("[%u]: smbpid = %u, tid = %u, pid = %u, ",
+                       i,
+                       (unsigned int)pls->context.smbpid,
+                       (unsigned int)pls->context.tid,
+                       (unsigned int)procid_to_pid(&pls->context.pid) ));
+       
+       DEBUG(10,("start = %.0f, size = %.0f, fnum = %d, %s %s\n",
+               (double)pls->start,
+               (double)pls->size,
+               pls->fnum,
+               lock_type_name(pls->lock_type),
+               lock_flav_name(pls->lock_flav) ));
 }
 
 /****************************************************************************
  See if two locking contexts are equal.
 ****************************************************************************/
 
-static BOOL brl_same_context(struct lock_context *ctx1, 
-                            struct lock_context *ctx2)
+BOOL brl_same_context(const struct lock_context *ctx1, 
+                            const struct lock_context *ctx2)
 {
-       return (ctx1->pid == ctx2->pid) &&
+       return (procid_equal(&ctx1->pid, &ctx2->pid) &&
                (ctx1->smbpid == ctx2->smbpid) &&
-               (ctx1->tid == ctx2->tid);
+               (ctx1->tid == ctx2->tid));
+}
+
+/****************************************************************************
+ See if lck1 and lck2 overlap.
+****************************************************************************/
+
+static BOOL brl_overlap(const struct lock_struct *lck1,
+                        const struct lock_struct *lck2)
+{
+       /* this extra check is not redundent - it copes with locks
+          that go beyond the end of 64 bit file space */
+       if (lck1->size != 0 &&
+           lck1->start == lck2->start &&
+           lck1->size == lck2->size) {
+               return True;
+       }
+
+       if (lck1->start >= (lck2->start+lck2->size) ||
+           lck2->start >= (lck1->start+lck1->size)) {
+               return False;
+       }
+       return True;
 }
 
 /****************************************************************************
  See if lock2 can be added when lock1 is in place.
 ****************************************************************************/
 
-static BOOL brl_conflict(struct lock_struct *lck1, 
-                        struct lock_struct *lck2)
+static BOOL brl_conflict(const struct lock_struct *lck1, 
+                        const struct lock_struct *lck2)
 {
-       if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) 
+       /* Ignore PENDING locks. */
+       if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
+               return False;
+
+       /* Read locks never conflict. */
+       if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
+               return False;
+       }
+
+       if (brl_same_context(&lck1->context, &lck2->context) &&
+           lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
+               return False;
+       }
+
+       return brl_overlap(lck1, lck2);
+} 
+
+/****************************************************************************
+ See if lock2 can be added when lock1 is in place - when both locks are POSIX
+ flavour. POSIX locks ignore fnum - they only care about dev/ino which we
+ know already match.
+****************************************************************************/
+
+static BOOL brl_conflict_posix(const struct lock_struct *lck1, 
+                               const struct lock_struct *lck2)
+{
+#if defined(DEVELOPER)
+       SMB_ASSERT(lck1->lock_flav == POSIX_LOCK);
+       SMB_ASSERT(lck2->lock_flav == POSIX_LOCK);
+#endif
+
+       /* Ignore PENDING locks. */
+       if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
+               return False;
+
+       /* Read locks never conflict. */
+       if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
+               return False;
+       }
+
+       /* Locks on the same context con't conflict. Ignore fnum. */
+       if (brl_same_context(&lck1->context, &lck2->context)) {
+               return False;
+       }
+
+       /* One is read, the other write, or the context is different,
+          do they overlap ? */
+       return brl_overlap(lck1, lck2);
+} 
+
+#if ZERO_ZERO
+static BOOL brl_conflict1(const struct lock_struct *lck1, 
+                        const struct lock_struct *lck2)
+{
+       if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
+               return False;
+
+       if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
                return False;
+       }
 
        if (brl_same_context(&lck1->context, &lck2->context) &&
-           lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) return False;
+           lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
+               return False;
+       }
+
+       if (lck2->start == 0 && lck2->size == 0 && lck1->size != 0) {
+               return True;
+       }
 
        if (lck1->start >= (lck2->start + lck2->size) ||
-           lck2->start >= (lck1->start + lck1->size)) return False;
+           lck2->start >= (lck1->start + lck1->size)) {
+               return False;
+       }
            
        return True;
 } 
-
+#endif
 
 /****************************************************************************
-delete a record if it is for a dead process
+ Check to see if this lock conflicts, but ignore our own locks on the
+ same fnum only. This is the read/write lock check code path.
+ This is never used in the POSIX lock case.
 ****************************************************************************/
-static int delete_fn(TDB_CONTEXT *ttdb, TDB_DATA kbuf, TDB_DATA dbuf, void *state)
+
+static BOOL brl_conflict_other(const struct lock_struct *lck1, const struct lock_struct *lck2)
 {
-       struct lock_struct *locks;
-       struct lock_key *key;
-       int count, i;
+       if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
+               return False;
 
-       tdb_lockchain(tdb, kbuf);
+       if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) 
+               return False;
 
-       locks = (struct lock_struct *)dbuf.dptr;
-       key = (struct lock_key *)kbuf.dptr;
+       /* POSIX flavour locks never conflict here - this is only called
+          in the read/write path. */
 
-       count = dbuf.dsize / sizeof(*locks);
-       for (i=0; i<count; i++) {
-               struct lock_struct *lock = &locks[i];
+       if (lck1->lock_flav == POSIX_LOCK && lck2->lock_flav == POSIX_LOCK)
+               return False;
+
+       /*
+        * Incoming WRITE locks conflict with existing READ locks even
+        * if the context is the same. JRA. See LOCKTEST7 in smbtorture.
+        */
+
+       if (!(lck2->lock_type == WRITE_LOCK && lck1->lock_type == READ_LOCK)) {
+               if (brl_same_context(&lck1->context, &lck2->context) &&
+                                       lck1->fnum == lck2->fnum)
+                       return False;
+       }
+
+       return brl_overlap(lck1, lck2);
+} 
+
+/****************************************************************************
+ Check if an unlock overlaps a pending lock.
+****************************************************************************/
+
+static BOOL brl_pending_overlap(const struct lock_struct *lock, const struct lock_struct *pend_lock)
+{
+       if ((lock->start <= pend_lock->start) && (lock->start + lock->size > pend_lock->start))
+               return True;
+       if ((lock->start >= pend_lock->start) && (lock->start <= pend_lock->start + pend_lock->size))
+               return True;
+       return False;
+}
 
-               if (process_exists(lock->context.pid)) continue;
+/****************************************************************************
+ Amazingly enough, w2k3 "remembers" whether the last lock failure on a fnum
+ is the same as this one and changes its error code. I wonder if any
+ app depends on this ?
+****************************************************************************/
 
-               if (count > 1 && i < count-1) {
-                       memmove(&locks[i], &locks[i+1], 
-                               sizeof(*locks)*((count-1) - i));
+static NTSTATUS brl_lock_failed(files_struct *fsp, const struct lock_struct *lock, BOOL blocking_lock)
+{
+       if (lock->start >= 0xEF000000 && (lock->start >> 63) == 0) {
+               /* amazing the little things you learn with a test
+                  suite. Locks beyond this offset (as a 64 bit
+                  number!) always generate the conflict error code,
+                  unless the top bit is set */
+               if (!blocking_lock) {
+                       fsp->last_lock_failure = *lock;
                }
-               count--;
-               i--;
+               return NT_STATUS_FILE_LOCK_CONFLICT;
        }
 
-       if (count == 0) {
-               tdb_delete(tdb, kbuf);
-       } else if (count < (dbuf.dsize / sizeof(*locks))) {
-               dbuf.dsize = count * sizeof(*locks);
-               tdb_store(tdb, kbuf, dbuf, TDB_REPLACE);
+       if (procid_equal(&lock->context.pid, &fsp->last_lock_failure.context.pid) &&
+                       lock->context.tid == fsp->last_lock_failure.context.tid &&
+                       lock->fnum == fsp->last_lock_failure.fnum &&
+                       lock->start == fsp->last_lock_failure.start) {
+               return NT_STATUS_FILE_LOCK_CONFLICT;
        }
 
-       tdb_unlockchain(tdb, kbuf);
-       return 0;
+       if (!blocking_lock) {
+               fsp->last_lock_failure = *lock;
+       }
+       return NT_STATUS_LOCK_NOT_GRANTED;
 }
 
 /****************************************************************************
  Open up the brlock.tdb database.
 ****************************************************************************/
+
 void brl_init(int read_only)
 {
-       if (tdb) return;
-       tdb = tdb_open(lock_path("brlock.tdb"), 0, TDB_CLEAR_IF_FIRST, 
-                      read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
-       if (!tdb) {
-               DEBUG(0,("Failed to open byte range locking database\n"));
+       if (brlock_db) {
                return;
        }
+       brlock_db = db_open(NULL, lock_path("brlock.tdb"),
+                           lp_open_files_db_hash_size(),
+                           TDB_DEFAULT
+                           |TDB_VOLATILE
+                           |(read_only?0x0:TDB_CLEAR_IF_FIRST),
+                           read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644 );
+       if (!brlock_db) {
+               DEBUG(0,("Failed to open byte range locking database %s\n",
+                       lock_path("brlock.tdb")));
+               return;
+       }
+}
 
-       /* delete any dead locks */
-       if (!read_only) {
-               tdb_traverse(tdb, delete_fn, NULL);
+/****************************************************************************
+ Close down the brlock.tdb database.
+****************************************************************************/
+
+void brl_shutdown(int read_only)
+{
+       if (!brlock_db) {
+               return;
        }
+       TALLOC_FREE(brlock_db);
 }
 
+#if ZERO_ZERO
+/****************************************************************************
+ Compare two locks for sorting.
+****************************************************************************/
+
+static int lock_compare(const struct lock_struct *lck1, 
+                        const struct lock_struct *lck2)
+{
+       if (lck1->start != lck2->start) {
+               return (lck1->start - lck2->start);
+       }
+       if (lck2->size != lck1->size) {
+               return ((int)lck1->size - (int)lck2->size);
+       }
+       return 0;
+}
+#endif
 
 /****************************************************************************
- Lock a range of bytes.
+ Lock a range of bytes - Windows lock semantics.
+****************************************************************************/
+
+static NTSTATUS brl_lock_windows(struct byte_range_lock *br_lck,
+                       struct lock_struct *plock, BOOL blocking_lock)
+{
+       unsigned int i;
+       files_struct *fsp = br_lck->fsp;
+       struct lock_struct *locks = br_lck->lock_data;
+
+       for (i=0; i < br_lck->num_locks; i++) {
+               /* Do any Windows or POSIX locks conflict ? */
+               if (brl_conflict(&locks[i], plock)) {
+                       /* Remember who blocked us. */
+                       plock->context.smbpid = locks[i].context.smbpid;
+                       return brl_lock_failed(fsp,plock,blocking_lock);
+               }
+#if ZERO_ZERO
+               if (plock->start == 0 && plock->size == 0 && 
+                               locks[i].size == 0) {
+                       break;
+               }
+#endif
+       }
+
+       /* We can get the Windows lock, now see if it needs to
+          be mapped into a lower level POSIX one, and if so can
+          we get it ? */
+
+       if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(fsp->conn->params)) {
+               int errno_ret;
+               if (!set_posix_lock_windows_flavour(fsp,
+                               plock->start,
+                               plock->size,
+                               plock->lock_type,
+                               &plock->context,
+                               locks,
+                               br_lck->num_locks,
+                               &errno_ret)) {
+
+                       /* We don't know who blocked us. */
+                       plock->context.smbpid = 0xFFFFFFFF;
+
+                       if (errno_ret == EACCES || errno_ret == EAGAIN) {
+                               return NT_STATUS_FILE_LOCK_CONFLICT;
+                       } else {
+                               return map_nt_error_from_unix(errno);
+                       }
+               }
+       }
+
+       /* no conflicts - add it to the list of locks */
+       locks = (struct lock_struct *)SMB_REALLOC(locks, (br_lck->num_locks + 1) * sizeof(*locks));
+       if (!locks) {
+               return NT_STATUS_NO_MEMORY;
+       }
+
+       memcpy(&locks[br_lck->num_locks], plock, sizeof(struct lock_struct));
+       br_lck->num_locks += 1;
+       br_lck->lock_data = locks;
+       br_lck->modified = True;
+
+       return NT_STATUS_OK;
+}
+
+/****************************************************************************
+ Cope with POSIX range splits and merges.
+****************************************************************************/
+
+static unsigned int brlock_posix_split_merge(struct lock_struct *lck_arr,              /* Output array. */
+                                               const struct lock_struct *ex,           /* existing lock. */
+                                               const struct lock_struct *plock,        /* proposed lock. */
+                                               BOOL *lock_was_added)
+{
+       BOOL lock_types_differ = (ex->lock_type != plock->lock_type);
+
+       /* We can't merge non-conflicting locks on different context - ignore fnum. */
+
+       if (!brl_same_context(&ex->context, &plock->context)) {
+               /* Just copy. */
+               memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
+               return 1;
+       }
+
+       /* We now know we have the same context. */
+
+       /* Did we overlap ? */
+
+/*********************************************
+                                             +---------+
+                                             | ex      |
+                                             +---------+
+                              +-------+
+                              | plock |
+                              +-------+
+OR....
+             +---------+
+             |  ex     |
+             +---------+
+**********************************************/
+
+       if ( (ex->start > (plock->start + plock->size)) ||
+                       (plock->start > (ex->start + ex->size))) {
+               /* No overlap with this lock - copy existing. */
+               memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
+               return 1;
+       }
+
+/*********************************************
+        +---------------------------+
+        |          ex               |
+        +---------------------------+
+        +---------------------------+
+        |       plock               | -> replace with plock.
+        +---------------------------+
+**********************************************/
+
+       if ( (ex->start >= plock->start) &&
+                       (ex->start + ex->size <= plock->start + plock->size) ) {
+               memcpy(&lck_arr[0], plock, sizeof(struct lock_struct));
+               *lock_was_added = True;
+               return 1;
+       }
+
+/*********************************************
+        +-----------------------+
+        |          ex           |
+        +-----------------------+
+        +---------------+
+        |   plock       |
+        +---------------+
+OR....
+                        +-------+
+                        |  ex   |
+                        +-------+
+        +---------------+
+        |   plock       |
+        +---------------+
+
+BECOMES....
+        +---------------+-------+
+        |   plock       | ex    | - different lock types.
+        +---------------+-------+
+OR.... (merge)
+        +-----------------------+
+        |   ex                  | - same lock type.
+        +-----------------------+
+**********************************************/
+
+       if ( (ex->start >= plock->start) &&
+                               (ex->start <= plock->start + plock->size) &&
+                               (ex->start + ex->size > plock->start + plock->size) ) {
+
+               *lock_was_added = True;
+
+               /* If the lock types are the same, we merge, if different, we
+                  add the new lock before the old. */
+
+               if (lock_types_differ) {
+                       /* Add new. */
+                       memcpy(&lck_arr[0], plock, sizeof(struct lock_struct));
+                       memcpy(&lck_arr[1], ex, sizeof(struct lock_struct));
+                       /* Adjust existing start and size. */
+                       lck_arr[1].start = plock->start + plock->size;
+                       lck_arr[1].size = (ex->start + ex->size) - (plock->start + plock->size);
+                       return 2;
+               } else {
+                       /* Merge. */
+                       memcpy(&lck_arr[0], plock, sizeof(struct lock_struct));
+                       /* Set new start and size. */
+                       lck_arr[0].start = plock->start;
+                       lck_arr[0].size = (ex->start + ex->size) - plock->start;
+                       return 1;
+               }
+       }
+
+/*********************************************
+   +-----------------------+
+   |  ex                   |
+   +-----------------------+
+           +---------------+
+           |   plock       |
+           +---------------+
+OR....
+   +-------+        
+   |  ex   |
+   +-------+
+           +---------------+
+           |   plock       |
+           +---------------+
+BECOMES....
+   +-------+---------------+
+   | ex    |   plock       | - different lock types
+   +-------+---------------+
+
+OR.... (merge)
+   +-----------------------+
+   | ex                    | - same lock type.
+   +-----------------------+
+
+**********************************************/
+
+       if ( (ex->start < plock->start) &&
+                       (ex->start + ex->size >= plock->start) &&
+                       (ex->start + ex->size <= plock->start + plock->size) ) {
+
+               *lock_was_added = True;
+
+               /* If the lock types are the same, we merge, if different, we
+                  add the new lock after the old. */
+
+               if (lock_types_differ) {
+                       memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
+                       memcpy(&lck_arr[1], plock, sizeof(struct lock_struct));
+                       /* Adjust existing size. */
+                       lck_arr[0].size = plock->start - ex->start;
+                       return 2;
+               } else {
+                       /* Merge. */
+                       memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
+                       /* Adjust existing size. */
+                       lck_arr[0].size = (plock->start + plock->size) - ex->start;
+                       return 1;
+               }
+       }
+
+/*********************************************
+        +---------------------------+
+        |        ex                 |
+        +---------------------------+
+                +---------+
+                |  plock  |
+                +---------+
+BECOMES.....
+        +-------+---------+---------+
+        | ex    |  plock  | ex      | - different lock types.
+        +-------+---------+---------+
+OR
+        +---------------------------+
+        |        ex                 | - same lock type.
+        +---------------------------+
+**********************************************/
+
+       if ( (ex->start < plock->start) && (ex->start + ex->size > plock->start + plock->size) ) {
+               *lock_was_added = True;
+
+               if (lock_types_differ) {
+
+                       /* We have to split ex into two locks here. */
+
+                       memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
+                       memcpy(&lck_arr[1], plock, sizeof(struct lock_struct));
+                       memcpy(&lck_arr[2], ex, sizeof(struct lock_struct));
+
+                       /* Adjust first existing size. */
+                       lck_arr[0].size = plock->start - ex->start;
+
+                       /* Adjust second existing start and size. */
+                       lck_arr[2].start = plock->start + plock->size;
+                       lck_arr[2].size = (ex->start + ex->size) - (plock->start + plock->size);
+                       return 3;
+               } else {
+                       /* Just eat plock. */
+                       memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
+                       return 1;
+               }
+       }
+
+       /* Never get here. */
+       smb_panic("brlock_posix_split_merge");
+       /* Notreached. */
+
+       /* Keep some compilers happy. */
+       return 0;
+}
+
+/****************************************************************************
+ Lock a range of bytes - POSIX lock semantics.
+ We must cope with range splits and merges.
 ****************************************************************************/
 
-BOOL brl_lock(SMB_DEV_T dev, SMB_INO_T ino, int fnum,
-             uint16 smbpid, pid_t pid, uint16 tid,
-             br_off start, br_off size, 
-             enum brl_type lock_type)
+static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
+                              struct byte_range_lock *br_lck,
+                              struct lock_struct *plock)
 {
-       TDB_DATA kbuf, dbuf;
-       int count, i;
-       struct lock_struct lock, *locks;
+       unsigned int i, count;
+       struct lock_struct *locks = br_lck->lock_data;
+       struct lock_struct *tp;
+       BOOL lock_was_added = False;
+       BOOL signal_pending_read = False;
+
+       /* No zero-zero locks for POSIX. */
+       if (plock->start == 0 && plock->size == 0) {
+               return NT_STATUS_INVALID_PARAMETER;
+       }
+
+       /* Don't allow 64-bit lock wrap. */
+       if (plock->start + plock->size < plock->start ||
+                       plock->start + plock->size < plock->size) {
+               return NT_STATUS_INVALID_PARAMETER;
+       }
+
+       /* The worst case scenario here is we have to split an
+          existing POSIX lock range into two, and add our lock,
+          so we need at most 2 more entries. */
+
+       tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 2));
+       if (!tp) {
+               return NT_STATUS_NO_MEMORY;
+       }
+       
+       count = 0;
+       for (i=0; i < br_lck->num_locks; i++) {
+               struct lock_struct *curr_lock = &locks[i];
+
+               /* If we have a pending read lock, a lock downgrade should
+                  trigger a lock re-evaluation. */
+               if (curr_lock->lock_type == PENDING_READ_LOCK &&
+                               brl_pending_overlap(plock, curr_lock)) {
+                       signal_pending_read = True;
+               }
+
+               if (curr_lock->lock_flav == WINDOWS_LOCK) {
+                       /* Do any Windows flavour locks conflict ? */
+                       if (brl_conflict(curr_lock, plock)) {
+                               /* No games with error messages. */
+                               SAFE_FREE(tp);
+                               /* Remember who blocked us. */
+                               plock->context.smbpid = curr_lock->context.smbpid;
+                               return NT_STATUS_FILE_LOCK_CONFLICT;
+                       }
+                       /* Just copy the Windows lock into the new array. */
+                       memcpy(&tp[count], curr_lock, sizeof(struct lock_struct));
+                       count++;
+               } else {
+                       /* POSIX conflict semantics are different. */
+                       if (brl_conflict_posix(curr_lock, plock)) {
+                               /* Can't block ourselves with POSIX locks. */
+                               /* No games with error messages. */
+                               SAFE_FREE(tp);
+                               /* Remember who blocked us. */
+                               plock->context.smbpid = curr_lock->context.smbpid;
+                               return NT_STATUS_FILE_LOCK_CONFLICT;
+                       }
+
+                       /* Work out overlaps. */
+                       count += brlock_posix_split_merge(&tp[count], curr_lock, plock, &lock_was_added);
+               }
+       }
+
+       if (!lock_was_added) {
+               memcpy(&tp[count], plock, sizeof(struct lock_struct));
+               count++;
+       }
 
-       kbuf = locking_key(dev,ino);
+       /* We can get the POSIX lock, now see if it needs to
+          be mapped into a lower level POSIX one, and if so can
+          we get it ? */
 
-       dbuf.dptr = NULL;
+       if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(br_lck->fsp->conn->params)) {
+               int errno_ret;
 
-       tdb_lockchain(tdb, kbuf);
-       dbuf = tdb_fetch(tdb, kbuf);
+               /* The lower layer just needs to attempt to
+                  get the system POSIX lock. We've weeded out
+                  any conflicts above. */
+
+               if (!set_posix_lock_posix_flavour(br_lck->fsp,
+                               plock->start,
+                               plock->size,
+                               plock->lock_type,
+                               &errno_ret)) {
+
+                       /* We don't know who blocked us. */
+                       plock->context.smbpid = 0xFFFFFFFF;
+
+                       if (errno_ret == EACCES || errno_ret == EAGAIN) {
+                               SAFE_FREE(tp);
+                               return NT_STATUS_FILE_LOCK_CONFLICT;
+                       } else {
+                               SAFE_FREE(tp);
+                               return map_nt_error_from_unix(errno);
+                       }
+               }
+       }
+
+       /* Realloc so we don't leak entries per lock call. */
+       tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
+       if (!tp) {
+               return NT_STATUS_NO_MEMORY;
+       }
+       br_lck->num_locks = count;
+       SAFE_FREE(br_lck->lock_data);
+       br_lck->lock_data = tp;
+       locks = tp;
+       br_lck->modified = True;
+
+       /* A successful downgrade from write to read lock can trigger a lock
+          re-evalutation where waiting readers can now proceed. */
+
+       if (signal_pending_read) {
+               /* Send unlock messages to any pending read waiters that overlap. */
+               for (i=0; i < br_lck->num_locks; i++) {
+                       struct lock_struct *pend_lock = &locks[i];
+
+                       /* Ignore non-pending locks. */
+                       if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
+                               continue;
+                       }
+
+                       if (pend_lock->lock_type == PENDING_READ_LOCK &&
+                                       brl_pending_overlap(plock, pend_lock)) {
+                               DEBUG(10,("brl_lock_posix: sending unlock message to pid %s\n",
+                                       procid_str_static(&pend_lock->context.pid )));
+
+                               messaging_send(msg_ctx, pend_lock->context.pid,
+                                              MSG_SMB_UNLOCK, &data_blob_null);
+                       }
+               }
+       }
+
+       return NT_STATUS_OK;
+}
+
+/****************************************************************************
+ Lock a range of bytes.
+****************************************************************************/
+
+NTSTATUS brl_lock(struct messaging_context *msg_ctx,
+               struct byte_range_lock *br_lck,
+               uint32 smbpid,
+               struct server_id pid,
+               br_off start,
+               br_off size, 
+               enum brl_type lock_type,
+               enum brl_flavour lock_flav,
+               BOOL blocking_lock,
+               uint32 *psmbpid)
+{
+       NTSTATUS ret;
+       struct lock_struct lock;
+
+#if !ZERO_ZERO
+       if (start == 0 && size == 0) {
+               DEBUG(0,("client sent 0/0 lock - please report this\n"));
+       }
+#endif
 
        lock.context.smbpid = smbpid;
        lock.context.pid = pid;
-       lock.context.tid = tid;
+       lock.context.tid = br_lck->fsp->conn->cnum;
        lock.start = start;
        lock.size = size;
-       lock.fnum = fnum;
+       lock.fnum = br_lck->fsp->fnum;
        lock.lock_type = lock_type;
+       lock.lock_flav = lock_flav;
 
-       if (dbuf.dptr) {
-               /* there are existing locks - make sure they don't conflict */
-               locks = (struct lock_struct *)dbuf.dptr;
-               count = dbuf.dsize / sizeof(*locks);
-               for (i=0; i<count; i++) {
-                       if (brl_conflict(&locks[i], &lock)) {
-                               goto fail;
-                       }
-               }
+       if (lock_flav == WINDOWS_LOCK) {
+               ret = brl_lock_windows(br_lck, &lock, blocking_lock);
+       } else {
+               ret = brl_lock_posix(msg_ctx, br_lck, &lock);
        }
 
-       /* no conflicts - add it to the list of locks */
-       dbuf.dptr = Realloc(dbuf.dptr, dbuf.dsize + sizeof(*locks));
-       if (!dbuf.dptr) goto fail;
-       memcpy(dbuf.dptr + dbuf.dsize, &lock, sizeof(lock));
-       dbuf.dsize += sizeof(lock);
-       tdb_store(tdb, kbuf, dbuf, TDB_REPLACE);
-
-       free(dbuf.dptr);
-       tdb_unlockchain(tdb, kbuf);
-       return True;
+#if ZERO_ZERO
+       /* sort the lock list */
+       qsort(br_lck->lock_data, (size_t)br_lck->num_locks, sizeof(lock), lock_compare);
+#endif
 
- fail:
-       if (dbuf.dptr) free(dbuf.dptr);
-       tdb_unlockchain(tdb, kbuf);
-       return False;
+       /* If we're returning an error, return who blocked us. */
+       if (!NT_STATUS_IS_OK(ret) && psmbpid) {
+               *psmbpid = lock.context.smbpid;
+       }
+       return ret;
 }
 
 /****************************************************************************
- Unlock a range of bytes.
+ Unlock a range of bytes - Windows semantics.
 ****************************************************************************/
 
-BOOL brl_unlock(SMB_DEV_T dev, SMB_INO_T ino, int fnum,
-               uint16 smbpid, pid_t pid, uint16 tid,
-               br_off start, br_off size)
+static BOOL brl_unlock_windows(struct messaging_context *msg_ctx,
+                              struct byte_range_lock *br_lck,
+                              const struct lock_struct *plock)
 {
-       TDB_DATA kbuf, dbuf;
-       int count, i;
-       struct lock_struct *locks;
-       struct lock_context context;
+       unsigned int i, j;
+       struct lock_struct *locks = br_lck->lock_data;
+       enum brl_type deleted_lock_type = READ_LOCK; /* shut the compiler up.... */
 
-       kbuf = locking_key(dev,ino);
+#if ZERO_ZERO
+       /* Delete write locks by preference... The lock list
+          is sorted in the zero zero case. */
 
-       dbuf.dptr = NULL;
+       for (i = 0; i < br_lck->num_locks; i++) {
+               struct lock_struct *lock = &locks[i];
 
-       tdb_lockchain(tdb, kbuf);
-       dbuf = tdb_fetch(tdb, kbuf);
+               if (lock->lock_type == WRITE_LOCK &&
+                   brl_same_context(&lock->context, &plock->context) &&
+                   lock->fnum == plock->fnum &&
+                   lock->lock_flav == WINDOWS_LOCK &&
+                   lock->start == plock->start &&
+                   lock->size == plock->size) {
 
-       if (!dbuf.dptr) {
-               DEBUG(10,("brl_unlock: tdb_fetch failed !\n"));
-               goto fail;
+                       /* found it - delete it */
+                       deleted_lock_type = lock->lock_type;
+                       break;
+               }
        }
 
-       context.smbpid = smbpid;
-       context.pid = pid;
-       context.tid = tid;
-
-       /* there are existing locks - find a match */
-       locks = (struct lock_struct *)dbuf.dptr;
-       count = dbuf.dsize / sizeof(*locks);
-       for (i=0; i<count; i++) {
+       if (i != br_lck->num_locks) {
+               /* We found it - don't search again. */
+               goto unlock_continue;
+       }
+#endif
 
+       for (i = 0; i < br_lck->num_locks; i++) {
                struct lock_struct *lock = &locks[i];
 
-#if 0
-               /* JRATEST - DEBUGGING INFO */
-               if(!brl_same_context(&lock->context, &context)) {
-                       DEBUG(10,("brl_unlock: Not same context. l_smbpid = %u, l_pid = %u, l_tid = %u: \
-smbpid = %u, pid = %u, tid = %u\n",
-                               lock->context.smbpid, lock->context.pid, lock->context.tid,
-                               context.smbpid, context.pid, context.tid ));
-
+               /* Only remove our own locks that match in start, size, and flavour. */
+               if (brl_same_context(&lock->context, &plock->context) &&
+                                       lock->fnum == plock->fnum &&
+                                       lock->lock_flav == WINDOWS_LOCK &&
+                                       lock->start == plock->start &&
+                                       lock->size == plock->size ) {
+                       deleted_lock_type = lock->lock_type;
+                       break;
                }
-               /* JRATEST */
+       }
+
+       if (i == br_lck->num_locks) {
+               /* we didn't find it */
+               return False;
+       }
+
+#if ZERO_ZERO
+  unlock_continue:
 #endif
 
-               if (brl_same_context(&lock->context, &context) &&
-                   lock->fnum == fnum &&
-                   lock->start == start &&
-                   lock->size == size) {
-                       /* found it - delete it */
-                       if (count == 1) {
-                               tdb_delete(tdb, kbuf);
+       /* Actually delete the lock. */
+       if (i < br_lck->num_locks - 1) {
+               memmove(&locks[i], &locks[i+1], 
+                       sizeof(*locks)*((br_lck->num_locks-1) - i));
+       }
+
+       br_lck->num_locks -= 1;
+       br_lck->modified = True;
+
+       /* Unlock the underlying POSIX regions. */
+       if(lp_posix_locking(br_lck->fsp->conn->params)) {
+               release_posix_lock_windows_flavour(br_lck->fsp,
+                               plock->start,
+                               plock->size,
+                               deleted_lock_type,
+                               &plock->context,
+                               locks,
+                               br_lck->num_locks);
+       }
+
+       /* Send unlock messages to any pending waiters that overlap. */
+       for (j=0; j < br_lck->num_locks; j++) {
+               struct lock_struct *pend_lock = &locks[j];
+
+               /* Ignore non-pending locks. */
+               if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
+                       continue;
+               }
+
+               /* We could send specific lock info here... */
+               if (brl_pending_overlap(plock, pend_lock)) {
+                       DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
+                               procid_str_static(&pend_lock->context.pid )));
+
+                       messaging_send(msg_ctx, pend_lock->context.pid,
+                                      MSG_SMB_UNLOCK, &data_blob_null);
+               }
+       }
+
+       return True;
+}
+
+/****************************************************************************
+ Unlock a range of bytes - POSIX semantics.
+****************************************************************************/
+
+static BOOL brl_unlock_posix(struct messaging_context *msg_ctx,
+                            struct byte_range_lock *br_lck,
+                            const struct lock_struct *plock)
+{
+       unsigned int i, j, count;
+       struct lock_struct *tp;
+       struct lock_struct *locks = br_lck->lock_data;
+       BOOL overlap_found = False;
+
+       /* No zero-zero locks for POSIX. */
+       if (plock->start == 0 && plock->size == 0) {
+               return False;
+       }
+
+       /* Don't allow 64-bit lock wrap. */
+       if (plock->start + plock->size < plock->start ||
+                       plock->start + plock->size < plock->size) {
+               DEBUG(10,("brl_unlock_posix: lock wrap\n"));
+               return False;
+       }
+
+       /* The worst case scenario here is we have to split an
+          existing POSIX lock range into two, so we need at most
+          1 more entry. */
+
+       tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 1));
+       if (!tp) {
+               DEBUG(10,("brl_unlock_posix: malloc fail\n"));
+               return False;
+       }
+
+       count = 0;
+       for (i = 0; i < br_lck->num_locks; i++) {
+               struct lock_struct *lock = &locks[i];
+               struct lock_struct tmp_lock[3];
+               BOOL lock_was_added = False;
+               unsigned int tmp_count;
+
+               /* Only remove our own locks - ignore fnum. */
+               if (IS_PENDING_LOCK(lock->lock_type) ||
+                               !brl_same_context(&lock->context, &plock->context)) {
+                       memcpy(&tp[count], lock, sizeof(struct lock_struct));
+                       count++;
+                       continue;
+               }
+
+               /* Work out overlaps. */
+               tmp_count = brlock_posix_split_merge(&tmp_lock[0], &locks[i], plock, &lock_was_added);
+
+               if (tmp_count == 1) {
+                       /* Ether the locks didn't overlap, or the unlock completely
+                          overlapped this lock. If it didn't overlap, then there's
+                          no change in the locks. */
+                       if (tmp_lock[0].lock_type != UNLOCK_LOCK) {
+                               SMB_ASSERT(tmp_lock[0].lock_type == locks[i].lock_type);
+                               /* No change in this lock. */
+                               memcpy(&tp[count], &tmp_lock[0], sizeof(struct lock_struct));
+                               count++;
                        } else {
-                               if (i < count-1) {
-                                       memmove(&locks[i], &locks[i+1], 
-                                               sizeof(*locks)*((count-1) - i));
+                               SMB_ASSERT(tmp_lock[0].lock_type == UNLOCK_LOCK);
+                               overlap_found = True;
+                       }
+                       continue;
+               } else if (tmp_count == 2) {
+                       /* The unlock overlapped an existing lock. Copy the truncated
+                          lock into the lock array. */
+                       if (tmp_lock[0].lock_type != UNLOCK_LOCK) {
+                               SMB_ASSERT(tmp_lock[0].lock_type == locks[i].lock_type);
+                               SMB_ASSERT(tmp_lock[1].lock_type == UNLOCK_LOCK);
+                               memcpy(&tp[count], &tmp_lock[0], sizeof(struct lock_struct));
+                               if (tmp_lock[0].size != locks[i].size) {
+                                       overlap_found = True;
                                }
-                               dbuf.dsize -= sizeof(*locks);
-                               tdb_store(tdb, kbuf, dbuf, TDB_REPLACE);
+                       } else {
+                               SMB_ASSERT(tmp_lock[0].lock_type == UNLOCK_LOCK);
+                               SMB_ASSERT(tmp_lock[1].lock_type == locks[i].lock_type);
+                               memcpy(&tp[count], &tmp_lock[1], sizeof(struct lock_struct));
+                               if (tmp_lock[1].start != locks[i].start) {
+                                       overlap_found = True;
+                               }
+                       }
+                       count++;
+                       continue;
+               } else {
+                       /* tmp_count == 3 - (we split a lock range in two). */
+                       SMB_ASSERT(tmp_lock[0].lock_type == locks[i].lock_type);
+                       SMB_ASSERT(tmp_lock[1].lock_type == UNLOCK_LOCK);
+                       SMB_ASSERT(tmp_lock[2].lock_type == locks[i].lock_type);
+
+                       memcpy(&tp[count], &tmp_lock[0], sizeof(struct lock_struct));
+                       count++;
+                       memcpy(&tp[count], &tmp_lock[2], sizeof(struct lock_struct));
+                       count++;
+                       overlap_found = True;
+                       /* Optimisation... */
+                       /* We know we're finished here as we can't overlap any
+                          more POSIX locks. Copy the rest of the lock array. */
+                       if (i < br_lck->num_locks - 1) {
+                               memcpy(&tp[count], &locks[i+1], 
+                                       sizeof(*locks)*((br_lck->num_locks-1) - i));
+                               count += ((br_lck->num_locks-1) - i);
                        }
+                       break;
+               }
+       }
+
+       if (!overlap_found) {
+               /* Just ignore - no change. */
+               SAFE_FREE(tp);
+               DEBUG(10,("brl_unlock_posix: No overlap - unlocked.\n"));
+               return True;
+       }
+
+       /* Unlock any POSIX regions. */
+       if(lp_posix_locking(br_lck->fsp->conn->params)) {
+               release_posix_lock_posix_flavour(br_lck->fsp,
+                                               plock->start,
+                                               plock->size,
+                                               &plock->context,
+                                               tp,
+                                               count);
+       }
 
-                       free(dbuf.dptr);
-                       tdb_unlockchain(tdb, kbuf);
-                       return True;
+       /* Realloc so we don't leak entries per unlock call. */
+       if (count) {
+               tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
+               if (!tp) {
+                       DEBUG(10,("brl_unlock_posix: realloc fail\n"));
+                       return False;
                }
+       } else {
+               /* We deleted the last lock. */
+               SAFE_FREE(tp);
+               tp = NULL;
        }
 
-       /* we didn't find it */
+       br_lck->num_locks = count;
+       SAFE_FREE(br_lck->lock_data);
+       locks = tp;
+       br_lck->lock_data = tp;
+       br_lck->modified = True;
 
- fail:
-       if (dbuf.dptr) free(dbuf.dptr);
-       tdb_unlockchain(tdb, kbuf);
-       return False;
+       /* Send unlock messages to any pending waiters that overlap. */
+
+       for (j=0; j < br_lck->num_locks; j++) {
+               struct lock_struct *pend_lock = &locks[j];
+
+               /* Ignore non-pending locks. */
+               if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
+                       continue;
+               }
+
+               /* We could send specific lock info here... */
+               if (brl_pending_overlap(plock, pend_lock)) {
+                       DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
+                               procid_str_static(&pend_lock->context.pid )));
+
+                       messaging_send(msg_ctx, pend_lock->context.pid,
+                                      MSG_SMB_UNLOCK, &data_blob_null);
+               }
+       }
+
+       return True;
 }
 
 /****************************************************************************
Test if we could add a lock if we wanted to.
Unlock a range of bytes.
 ****************************************************************************/
 
-BOOL brl_locktest(SMB_DEV_T dev, SMB_INO_T ino, int fnum,
-                 uint16 smbpid, pid_t pid, uint16 tid,
-                 br_off start, br_off size, 
-                 enum brl_type lock_type)
+BOOL brl_unlock(struct messaging_context *msg_ctx,
+               struct byte_range_lock *br_lck,
+               uint32 smbpid,
+               struct server_id pid,
+               br_off start,
+               br_off size,
+               enum brl_flavour lock_flav)
 {
-       TDB_DATA kbuf, dbuf;
-       int count, i;
-       struct lock_struct lock, *locks;
+       struct lock_struct lock;
 
-       kbuf = locking_key(dev,ino);
+       lock.context.smbpid = smbpid;
+       lock.context.pid = pid;
+       lock.context.tid = br_lck->fsp->conn->cnum;
+       lock.start = start;
+       lock.size = size;
+       lock.fnum = br_lck->fsp->fnum;
+       lock.lock_type = UNLOCK_LOCK;
+       lock.lock_flav = lock_flav;
+
+       if (lock_flav == WINDOWS_LOCK) {
+               return brl_unlock_windows(msg_ctx, br_lck, &lock);
+       } else {
+               return brl_unlock_posix(msg_ctx, br_lck, &lock);
+       }
+}
 
-       dbuf.dptr = NULL;
+/****************************************************************************
+ Test if we could add a lock if we wanted to.
+ Returns True if the region required is currently unlocked, False if locked.
+****************************************************************************/
 
-       tdb_lockchain(tdb, kbuf);
-       dbuf = tdb_fetch(tdb, kbuf);
+BOOL brl_locktest(struct byte_range_lock *br_lck,
+               uint32 smbpid,
+               struct server_id pid,
+               br_off start,
+               br_off size, 
+               enum brl_type lock_type,
+               enum brl_flavour lock_flav)
+{
+       BOOL ret = True;
+       unsigned int i;
+       struct lock_struct lock;
+       const struct lock_struct *locks = br_lck->lock_data;
+       files_struct *fsp = br_lck->fsp;
 
        lock.context.smbpid = smbpid;
        lock.context.pid = pid;
-       lock.context.tid = tid;
+       lock.context.tid = br_lck->fsp->conn->cnum;
        lock.start = start;
        lock.size = size;
-       lock.fnum = fnum;
+       lock.fnum = fsp->fnum;
        lock.lock_type = lock_type;
-
-       if (dbuf.dptr) {
-               /* there are existing locks - make sure they don't conflict */
-               locks = (struct lock_struct *)dbuf.dptr;
-               count = dbuf.dsize / sizeof(*locks);
-               for (i=0; i<count; i++) {
-                       if (brl_conflict(&locks[i], &lock)) {
-                               goto fail;
-                       }
+       lock.lock_flav = lock_flav;
+
+       /* Make sure existing locks don't conflict */
+       for (i=0; i < br_lck->num_locks; i++) {
+               /*
+                * Our own locks don't conflict.
+                */
+               if (brl_conflict_other(&locks[i], &lock)) {
+                       return False;
                }
        }
 
+       /*
+        * There is no lock held by an SMB daemon, check to
+        * see if there is a POSIX lock from a UNIX or NFS process.
+        * This only conflicts with Windows locks, not POSIX locks.
+        */
+
+       if(lp_posix_locking(fsp->conn->params) && (lock_flav == WINDOWS_LOCK)) {
+               ret = is_posix_locked(fsp, &start, &size, &lock_type, WINDOWS_LOCK);
+
+               DEBUG(10,("brl_locktest: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
+                       (double)start, (double)size, ret ? "locked" : "unlocked",
+                       fsp->fnum, fsp->fsp_name ));
+
+               /* We need to return the inverse of is_posix_locked. */
+               ret = !ret;
+        }
+
        /* no conflicts - we could have added it */
-       free(dbuf.dptr);
-       tdb_unlockchain(tdb, kbuf);
-       return True;
+       return ret;
+}
 
- fail:
-       if (dbuf.dptr) free(dbuf.dptr);
-       tdb_unlockchain(tdb, kbuf);
-       return False;
+/****************************************************************************
+ Query for existing locks.
+****************************************************************************/
+
+NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
+               uint32 *psmbpid,
+               struct server_id pid,
+               br_off *pstart,
+               br_off *psize, 
+               enum brl_type *plock_type,
+               enum brl_flavour lock_flav)
+{
+       unsigned int i;
+       struct lock_struct lock;
+       const struct lock_struct *locks = br_lck->lock_data;
+       files_struct *fsp = br_lck->fsp;
+
+       lock.context.smbpid = *psmbpid;
+       lock.context.pid = pid;
+       lock.context.tid = br_lck->fsp->conn->cnum;
+       lock.start = *pstart;
+       lock.size = *psize;
+       lock.fnum = fsp->fnum;
+       lock.lock_type = *plock_type;
+       lock.lock_flav = lock_flav;
+
+       /* Make sure existing locks don't conflict */
+       for (i=0; i < br_lck->num_locks; i++) {
+               const struct lock_struct *exlock = &locks[i];
+               BOOL conflict = False;
+
+               if (exlock->lock_flav == WINDOWS_LOCK) {
+                       conflict = brl_conflict(exlock, &lock);
+               } else {        
+                       conflict = brl_conflict_posix(exlock, &lock);
+               }
+
+               if (conflict) {
+                       *psmbpid = exlock->context.smbpid;
+                       *pstart = exlock->start;
+                       *psize = exlock->size;
+                       *plock_type = exlock->lock_type;
+                       return NT_STATUS_LOCK_NOT_GRANTED;
+               }
+       }
+
+       /*
+        * There is no lock held by an SMB daemon, check to
+        * see if there is a POSIX lock from a UNIX or NFS process.
+        */
+
+       if(lp_posix_locking(fsp->conn->params)) {
+               BOOL ret = is_posix_locked(fsp, pstart, psize, plock_type, POSIX_LOCK);
+
+               DEBUG(10,("brl_lockquery: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
+                       (double)*pstart, (double)*psize, ret ? "locked" : "unlocked",
+                       fsp->fnum, fsp->fsp_name ));
+
+               if (ret) {
+                       /* Hmmm. No clue what to set smbpid to - use -1. */
+                       *psmbpid = 0xFFFF;
+                       return NT_STATUS_LOCK_NOT_GRANTED;
+               }
+        }
+
+       return NT_STATUS_OK;
+}
+
+/****************************************************************************
+ Remove a particular pending lock.
+****************************************************************************/
+
+BOOL brl_lock_cancel(struct byte_range_lock *br_lck,
+               uint32 smbpid,
+               struct server_id pid,
+               br_off start,
+               br_off size,
+               enum brl_flavour lock_flav)
+{
+       unsigned int i;
+       struct lock_struct *locks = br_lck->lock_data;
+       struct lock_context context;
+
+       context.smbpid = smbpid;
+       context.pid = pid;
+       context.tid = br_lck->fsp->conn->cnum;
+
+       for (i = 0; i < br_lck->num_locks; i++) {
+               struct lock_struct *lock = &locks[i];
+
+               /* For pending locks we *always* care about the fnum. */
+               if (brl_same_context(&lock->context, &context) &&
+                               lock->fnum == br_lck->fsp->fnum &&
+                               IS_PENDING_LOCK(lock->lock_type) &&
+                               lock->lock_flav == lock_flav &&
+                               lock->start == start &&
+                               lock->size == size) {
+                       break;
+               }
+       }
+
+       if (i == br_lck->num_locks) {
+               /* Didn't find it. */
+               return False;
+       }
+
+       if (i < br_lck->num_locks - 1) {
+               /* Found this particular pending lock - delete it */
+               memmove(&locks[i], &locks[i+1], 
+                       sizeof(*locks)*((br_lck->num_locks-1) - i));
+       }
+
+       br_lck->num_locks -= 1;
+       br_lck->modified = True;
+       return True;
 }
 
 /****************************************************************************
  Remove any locks associated with a open file.
+ We return True if this process owns any other Windows locks on this
+ fd and so we should not immediately close the fd.
 ****************************************************************************/
 
-void brl_close(SMB_DEV_T dev, SMB_INO_T ino, pid_t pid, int tid, int fnum)
+void brl_close_fnum(struct messaging_context *msg_ctx,
+                   struct byte_range_lock *br_lck)
 {
-       TDB_DATA kbuf, dbuf;
-       int count, i, dcount=0;
-       struct lock_struct *locks;
+       files_struct *fsp = br_lck->fsp;
+       uint16 tid = fsp->conn->cnum;
+       int fnum = fsp->fnum;
+       unsigned int i, j, dcount=0;
+       int num_deleted_windows_locks = 0;
+       struct lock_struct *locks = br_lck->lock_data;
+       struct server_id pid = procid_self();
+       BOOL unlock_individually = False;
+
+       if(lp_posix_locking(fsp->conn->params)) {
+
+               /* Check if there are any Windows locks associated with this dev/ino
+                  pair that are not this fnum. If so we need to call unlock on each
+                  one in order to release the system POSIX locks correctly. */
+
+               for (i=0; i < br_lck->num_locks; i++) {
+                       struct lock_struct *lock = &locks[i];
+
+                       if (!procid_equal(&lock->context.pid, &pid)) {
+                               continue;
+                       }
+
+                       if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
+                               continue; /* Ignore pending. */
+                       }
 
-       kbuf = locking_key(dev,ino);
+                       if (lock->context.tid != tid || lock->fnum != fnum) {
+                               unlock_individually = True;
+                               break;
+                       }
+               }
 
-       dbuf.dptr = NULL;
+               if (unlock_individually) {
+                       struct lock_struct *locks_copy;
+                       unsigned int num_locks_copy;
+
+                       /* Copy the current lock array. */
+                       if (br_lck->num_locks) {
+                               locks_copy = (struct lock_struct *)TALLOC_MEMDUP(br_lck, locks, br_lck->num_locks * sizeof(struct lock_struct));
+                               if (!locks_copy) {
+                                       smb_panic("brl_close_fnum: talloc failed");
+                               }
+                       } else {        
+                               locks_copy = NULL;
+                       }
 
-       tdb_lockchain(tdb, kbuf);
-       dbuf = tdb_fetch(tdb, kbuf);
+                       num_locks_copy = br_lck->num_locks;
 
-       if (!dbuf.dptr) goto fail;
+                       for (i=0; i < num_locks_copy; i++) {
+                               struct lock_struct *lock = &locks_copy[i];
 
-       /* there are existing locks - remove any for this fnum */
-       locks = (struct lock_struct *)dbuf.dptr;
-       count = dbuf.dsize / sizeof(*locks);
-       for (i=0; i<count; i++) {
+                               if (lock->context.tid == tid && procid_equal(&lock->context.pid, &pid) &&
+                                               (lock->fnum == fnum)) {
+                                       brl_unlock(msg_ctx,
+                                               br_lck,
+                                               lock->context.smbpid,
+                                               pid,
+                                               lock->start,
+                                               lock->size,
+                                               lock->lock_flav);
+                               }
+                       }
+                       return;
+               }
+       }
+
+       /* We can bulk delete - any POSIX locks will be removed when the fd closes. */
+
+       /* Remove any existing locks for this fnum (or any fnum if they're POSIX). */
+
+       for (i=0; i < br_lck->num_locks; i++) {
                struct lock_struct *lock = &locks[i];
+               BOOL del_this_lock = False;
+
+               if (lock->context.tid == tid && procid_equal(&lock->context.pid, &pid)) {
+                       if ((lock->lock_flav == WINDOWS_LOCK) && (lock->fnum == fnum)) {
+                               del_this_lock = True;
+                               num_deleted_windows_locks++;
+                       } else if (lock->lock_flav == POSIX_LOCK) {
+                               del_this_lock = True;
+                       }
+               }
+
+               if (del_this_lock) {
+                       /* Send unlock messages to any pending waiters that overlap. */
+                       for (j=0; j < br_lck->num_locks; j++) {
+                               struct lock_struct *pend_lock = &locks[j];
+
+                               /* Ignore our own or non-pending locks. */
+                               if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
+                                       continue;
+                               }
+
+                               /* Optimisation - don't send to this fnum as we're
+                                  closing it. */
+                               if (pend_lock->context.tid == tid &&
+                                   procid_equal(&pend_lock->context.pid, &pid) &&
+                                   pend_lock->fnum == fnum) {
+                                       continue;
+                               }
+
+                               /* We could send specific lock info here... */
+                               if (brl_pending_overlap(lock, pend_lock)) {
+                                       messaging_send(msg_ctx, pend_lock->context.pid,
+                                                      MSG_SMB_UNLOCK, &data_blob_null);
+                               }
+                       }
 
-               if (lock->context.tid == tid &&
-                   lock->context.pid == pid &&
-                   lock->fnum == fnum) {
                        /* found it - delete it */
-                       if (count > 1 && i < count-1) {
+                       if (br_lck->num_locks > 1 && i < br_lck->num_locks - 1) {
                                memmove(&locks[i], &locks[i+1], 
-                                       sizeof(*locks)*((count-1) - i));
+                                       sizeof(*locks)*((br_lck->num_locks-1) - i));
                        }
-                       count--;
+                       br_lck->num_locks--;
+                       br_lck->modified = True;
                        i--;
                        dcount++;
                }
        }
 
-       if (count == 0) {
-               tdb_delete(tdb, kbuf);
-       } else if (count < (dbuf.dsize / sizeof(*locks))) {
-               dbuf.dsize -= dcount * sizeof(*locks);
-               tdb_store(tdb, kbuf, dbuf, TDB_REPLACE);
+       if(lp_posix_locking(fsp->conn->params) && num_deleted_windows_locks) {
+               /* Reduce the Windows lock POSIX reference count on this dev/ino pair. */
+               reduce_windows_lock_ref_count(fsp, num_deleted_windows_locks);
+       }
+}
+
+/****************************************************************************
+ Ensure this set of lock entries is valid.
+****************************************************************************/
+
+static BOOL validate_lock_entries(unsigned int *pnum_entries, struct lock_struct **pplocks)
+{
+       unsigned int i;
+       unsigned int num_valid_entries = 0;
+       struct lock_struct *locks = *pplocks;
+
+       for (i = 0; i < *pnum_entries; i++) {
+               struct lock_struct *lock_data = &locks[i];
+               if (!process_exists(lock_data->context.pid)) {
+                       /* This process no longer exists - mark this
+                          entry as invalid by zeroing it. */
+                       ZERO_STRUCTP(lock_data);
+               } else {
+                       num_valid_entries++;
+               }
+       }
+
+       if (num_valid_entries != *pnum_entries) {
+               struct lock_struct *new_lock_data = NULL;
+
+               if (num_valid_entries) {
+                       new_lock_data = SMB_MALLOC_ARRAY(struct lock_struct, num_valid_entries);
+                       if (!new_lock_data) {
+                               DEBUG(3, ("malloc fail\n"));
+                               return False;
+                       }
+
+                       num_valid_entries = 0;
+                       for (i = 0; i < *pnum_entries; i++) {
+                               struct lock_struct *lock_data = &locks[i];
+                               if (lock_data->context.smbpid &&
+                                               lock_data->context.tid) {
+                                       /* Valid (nonzero) entry - copy it. */
+                                       memcpy(&new_lock_data[num_valid_entries],
+                                               lock_data, sizeof(struct lock_struct));
+                                       num_valid_entries++;
+                               }
+                       }
+               }
+
+               SAFE_FREE(*pplocks);
+               *pplocks = new_lock_data;
+               *pnum_entries = num_valid_entries;
        }
 
-       /* we didn't find it */
- fail:
-       if (dbuf.dptr) free(dbuf.dptr);
-       tdb_unlockchain(tdb, kbuf);
+       return True;
 }
 
+struct brl_forall_cb {
+       void (*fn)(struct file_id id, struct server_id pid,
+                  enum brl_type lock_type,
+                  enum brl_flavour lock_flav,
+                  br_off start, br_off size,
+                  void *private_data);
+       void *private_data;
+};
+
 /****************************************************************************
  Traverse the whole database with this function, calling traverse_callback
  on each lock.
 ****************************************************************************/
 
-static int traverse_fn(TDB_CONTEXT *ttdb, TDB_DATA kbuf, TDB_DATA dbuf, void *state)
+static int traverse_fn(struct db_record *rec, void *state)
 {
+       struct brl_forall_cb *cb = (struct brl_forall_cb *)state;
        struct lock_struct *locks;
-       struct lock_key *key;
-       int i;
+       struct file_id *key;
+       unsigned int i;
+       unsigned int num_locks = 0;
+       unsigned int orig_num_locks = 0;
+
+       /* In a traverse function we must make a copy of
+          dbuf before modifying it. */
+
+       locks = (struct lock_struct *)memdup(rec->value.dptr,
+                                            rec->value.dsize);
+       if (!locks) {
+               return -1; /* Terminate traversal. */
+       }
+
+       key = (struct file_id *)rec->key.dptr;
+       orig_num_locks = num_locks = rec->value.dsize/sizeof(*locks);
 
-       BRLOCK_FN(traverse_callback) = (BRLOCK_FN_CAST())state;
+       /* Ensure the lock db is clean of entries from invalid processes. */
+
+       if (!validate_lock_entries(&num_locks, &locks)) {
+               SAFE_FREE(locks);
+               return -1; /* Terminate traversal */
+       }
 
-       locks = (struct lock_struct *)dbuf.dptr;
-       key = (struct lock_key *)kbuf.dptr;
+       if (orig_num_locks != num_locks) {
+               if (num_locks) {
+                       TDB_DATA data;
+                       data.dptr = (uint8_t *)locks;
+                       data.dsize = num_locks*sizeof(struct lock_struct);
+                       rec->store(rec, data, TDB_REPLACE);
+               } else {
+                       rec->delete_rec(rec);
+               }
+       }
 
-       for (i=0;i<dbuf.dsize/sizeof(*locks);i++) {
-               traverse_callback(key->device, key->inode,
-                                 locks[i].context.pid,
-                                 locks[i].lock_type,
-                                 locks[i].start,
-                                 locks[i].size);
+       for ( i=0; i<num_locks; i++) {
+               cb->fn(*key,
+                      locks[i].context.pid,
+                      locks[i].lock_type,
+                      locks[i].lock_flav,
+                      locks[i].start,
+                      locks[i].size,
+                      cb->private_data);
        }
+
+       SAFE_FREE(locks);
        return 0;
 }
 
@@ -437,8 +1518,289 @@ static int traverse_fn(TDB_CONTEXT *ttdb, TDB_DATA kbuf, TDB_DATA dbuf, void *st
  Call the specified function on each lock in the database.
 ********************************************************************/
 
-int brl_forall(BRLOCK_FN(fn))
+int brl_forall(void (*fn)(struct file_id id, struct server_id pid,
+                         enum brl_type lock_type,
+                         enum brl_flavour lock_flav,
+                         br_off start, br_off size,
+                         void *private_data),
+              void *private_data)
+{
+       struct brl_forall_cb cb;
+
+       if (!brlock_db) {
+               return 0;
+       }
+       cb.fn = fn;
+       cb.private_data = private_data;
+       return brlock_db->traverse(brlock_db, traverse_fn, &cb);
+}
+
+/*******************************************************************
+ Store a potentially modified set of byte range lock data back into
+ the database.
+ Unlock the record.
+********************************************************************/
+
+static int byte_range_lock_destructor(struct byte_range_lock *br_lck)
+{
+       TDB_DATA key;
+
+       key.dptr = (uint8 *)&br_lck->key;
+       key.dsize = sizeof(struct file_id);
+
+       if (br_lck->read_only) {
+               SMB_ASSERT(!br_lck->modified);
+       }
+
+       if (!br_lck->modified) {
+               goto done;
+       }
+
+       if (br_lck->num_locks == 0) {
+               /* No locks - delete this entry. */
+               NTSTATUS status = br_lck->record->delete_rec(br_lck->record);
+               if (!NT_STATUS_IS_OK(status)) {
+                       DEBUG(0, ("delete_rec returned %s\n",
+                                 nt_errstr(status)));
+                       smb_panic("Could not delete byte range lock entry");
+               }
+       } else {
+               TDB_DATA data;
+               NTSTATUS status;
+
+               data.dptr = (uint8 *)br_lck->lock_data;
+               data.dsize = br_lck->num_locks * sizeof(struct lock_struct);
+
+               status = br_lck->record->store(br_lck->record, data,
+                                              TDB_REPLACE);
+               if (!NT_STATUS_IS_OK(status)) {
+                       DEBUG(0, ("store returned %s\n", nt_errstr(status)));
+                       smb_panic("Could not store byte range mode entry");
+               }
+       }
+
+ done:
+
+       SAFE_FREE(br_lck->lock_data);
+       TALLOC_FREE(br_lck->record);
+       return 0;
+}
+
+/*******************************************************************
+ Fetch a set of byte range lock data from the database.
+ Leave the record locked.
+ TALLOC_FREE(brl) will release the lock in the destructor.
+********************************************************************/
+
+static struct byte_range_lock *brl_get_locks_internal(TALLOC_CTX *mem_ctx,
+                                       files_struct *fsp, BOOL read_only)
+{
+       TDB_DATA key, data;
+       struct byte_range_lock *br_lck = TALLOC_P(mem_ctx, struct byte_range_lock);
+
+       if (br_lck == NULL) {
+               return NULL;
+       }
+
+       br_lck->fsp = fsp;
+       br_lck->num_locks = 0;
+       br_lck->modified = False;
+       memset(&br_lck->key, '\0', sizeof(struct file_id));
+       br_lck->key = fsp->file_id;
+
+       key.dptr = (uint8 *)&br_lck->key;
+       key.dsize = sizeof(struct file_id);
+
+       if (!fsp->lockdb_clean) {
+               /* We must be read/write to clean
+                  the dead entries. */
+               read_only = False;
+       }
+
+       if (read_only) {
+               if (brlock_db->fetch(brlock_db, br_lck, key, &data) == -1) {
+                       DEBUG(3, ("Could not fetch byte range lock record\n"));
+                       TALLOC_FREE(br_lck);
+                       return NULL;
+               }
+               br_lck->record = NULL;
+       }
+       else {
+               br_lck->record = brlock_db->fetch_locked(brlock_db, br_lck, key);
+
+               if (br_lck->record == NULL) {
+                       DEBUG(3, ("Could not lock byte range lock entry\n"));
+                       TALLOC_FREE(br_lck);
+                       return NULL;
+               }
+
+               data = br_lck->record->value;
+       }
+
+       br_lck->read_only = read_only;
+
+       talloc_set_destructor(br_lck, byte_range_lock_destructor);
+
+       br_lck->num_locks = data.dsize / sizeof(struct lock_struct);
+       if (!(br_lck->lock_data = SMB_MALLOC_ARRAY(
+                     struct lock_struct, br_lck->num_locks))) {
+               DEBUG(0, ("malloc failed\n"));
+               TALLOC_FREE(br_lck);
+               return NULL;
+       }
+
+       memcpy(br_lck->lock_data, data.dptr, data.dsize);
+       
+       if (!fsp->lockdb_clean) {
+               int orig_num_locks = br_lck->num_locks;
+
+               /* This is the first time we've accessed this. */
+               /* Go through and ensure all entries exist - remove any that don't. */
+               /* Makes the lockdb self cleaning at low cost. */
+
+               if (!validate_lock_entries(&br_lck->num_locks,
+                                          &br_lck->lock_data)) {
+                       SAFE_FREE(br_lck->lock_data);
+                       TALLOC_FREE(br_lck);
+                       return NULL;
+               }
+
+               /* Ensure invalid locks are cleaned up in the destructor. */
+               if (orig_num_locks != br_lck->num_locks) {
+                       br_lck->modified = True;
+               }
+
+               /* Mark the lockdb as "clean" as seen from this open file. */
+               fsp->lockdb_clean = True;
+       }
+
+       if (DEBUGLEVEL >= 10) {
+               unsigned int i;
+               struct lock_struct *locks = br_lck->lock_data;
+               DEBUG(10,("brl_get_locks_internal: %u current locks on file_id %s\n",
+                       br_lck->num_locks,
+                         file_id_static_string(&fsp->file_id)));
+               for( i = 0; i < br_lck->num_locks; i++) {
+                       print_lock_struct(i, &locks[i]);
+               }
+       }
+       return br_lck;
+}
+
+struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx,
+                                       files_struct *fsp)
+{
+       return brl_get_locks_internal(mem_ctx, fsp, False);
+}
+
+struct byte_range_lock *brl_get_locks_readonly(TALLOC_CTX *mem_ctx,
+                                       files_struct *fsp)
+{
+       return brl_get_locks_internal(mem_ctx, fsp, True);
+}
+
+struct brl_revalidate_state {
+       ssize_t array_size;
+       uint32 num_pids;
+       struct server_id *pids;
+};
+
+/*
+ * Collect PIDs of all processes with pending entries
+ */
+
+static void brl_revalidate_collect(struct file_id id, struct server_id pid,
+                                  enum brl_type lock_type,
+                                  enum brl_flavour lock_flav,
+                                  br_off start, br_off size,
+                                  void *private_data)
+{
+       struct brl_revalidate_state *state =
+               (struct brl_revalidate_state *)private_data;
+
+       if (!IS_PENDING_LOCK(lock_type)) {
+               return;
+       }
+
+       add_to_large_array(state, sizeof(pid), (void *)&pid,
+                          &state->pids, &state->num_pids,
+                          &state->array_size);
+}
+
+/*
+ * qsort callback to sort the processes
+ */
+
+static int compare_procids(const void *p1, const void *p2)
+{
+       const struct server_id *i1 = (struct server_id *)p1;
+       const struct server_id *i2 = (struct server_id *)p2;
+
+       if (i1->pid < i2->pid) return -1;
+       if (i2->pid > i2->pid) return 1;
+       return 0;
+}
+
+/*
+ * Send a MSG_SMB_UNLOCK message to all processes with pending byte range
+ * locks so that they retry. Mainly used in the cluster code after a node has
+ * died.
+ *
+ * Done in two steps to avoid double-sends: First we collect all entries in an
+ * array, then qsort that array and only send to non-dupes.
+ */
+
+static void brl_revalidate(struct messaging_context *msg_ctx,
+                          void *private_data,
+                          uint32_t msg_type,
+                          struct server_id server_id,
+                          DATA_BLOB *data)
+{
+       struct brl_revalidate_state *state;
+       uint32 i;
+       struct server_id last_pid;
+
+       if (!(state = TALLOC_ZERO_P(NULL, struct brl_revalidate_state))) {
+               DEBUG(0, ("talloc failed\n"));
+               return;
+       }
+
+       brl_forall(brl_revalidate_collect, state);
+
+       if (state->array_size == -1) {
+               DEBUG(0, ("talloc failed\n"));
+               goto done;
+       }
+
+       if (state->num_pids == 0) {
+               goto done;
+       }
+
+       qsort(state->pids, state->num_pids, sizeof(state->pids[0]),
+             compare_procids);
+
+       ZERO_STRUCT(last_pid);
+
+       for (i=0; i<state->num_pids; i++) {
+               if (procid_equal(&last_pid, &state->pids[i])) {
+                       /*
+                        * We've seen that one already
+                        */
+                       continue;
+               }
+
+               messaging_send(msg_ctx, state->pids[i], MSG_SMB_UNLOCK,
+                              &data_blob_null);
+               last_pid = state->pids[i];
+       }
+
+ done:
+       TALLOC_FREE(state);
+       return;
+}
+
+void brl_register_msgs(struct messaging_context *msg_ctx)
 {
-       if (!tdb) return 0;
-       return tdb_traverse(tdb, traverse_fn, (BRLOCK_FN_CAST())fn);
+       messaging_register(msg_ctx, NULL, MSG_SMB_BRL_VALIDATE,
+                          brl_revalidate);
 }