s3-talloc Change TALLOC_P() to talloc()
[nivanova/samba-autobuild/.git] / source3 / locking / brlock.c
index 07e4b4c16b763817b36ff08d2aa8232e75bfbc8e..9a9fd157892f95716e05b30d6320acccebccc0aa 100644 (file)
@@ -5,20 +5,19 @@
 
    Copyright (C) Andrew Tridgell 1992-2000
    Copyright (C) Jeremy Allison 1992-2000
-   
+
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2 of the License, or
+   the Free Software Foundation; either version 3 of the License, or
    (at your option) any later version.
-   
+
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
-   
+
    You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
 /* This module implements a tdb based byte range locking service,
    used. This allows us to provide the same semantics as NT */
 
 #include "includes.h"
+#include "system/filesys.h"
+#include "locking/proto.h"
+#include "smbd/globals.h"
+#include "dbwrap.h"
+#include "serverid.h"
+#include "messages.h"
 
 #undef DBGC_CLASS
 #define DBGC_CLASS DBGC_LOCKING
@@ -34,7 +39,7 @@
 
 /* The open brlock.tdb database. */
 
-static TDB_CONTEXT *tdb;
+static struct db_context *brlock_db;
 
 /****************************************************************************
  Debug info at level 10 for lock struct.
@@ -42,12 +47,12 @@ static TDB_CONTEXT *tdb;
 
 static void print_lock_struct(unsigned int i, struct lock_struct *pls)
 {
-       DEBUG(10,("[%u]: smbpid = %u, tid = %u, pid = %u, ",
+       DEBUG(10,("[%u]: smblctx = %llu, tid = %u, pid = %s, ",
                        i,
-                       (unsigned int)pls->context.smbpid,
+                       (unsigned long long)pls->context.smblctx,
                        (unsigned int)pls->context.tid,
-                       (unsigned int)procid_to_pid(&pls->context.pid) ));
-       
+                       procid_str(talloc_tos(), &pls->context.pid) ));
+
        DEBUG(10,("start = %.0f, size = %.0f, fnum = %d, %s %s\n",
                (double)pls->start,
                (double)pls->size,
@@ -60,11 +65,11 @@ static void print_lock_struct(unsigned int i, struct lock_struct *pls)
  See if two locking contexts are equal.
 ****************************************************************************/
 
-BOOL brl_same_context(const struct lock_context *ctx1, 
+bool brl_same_context(const struct lock_context *ctx1, 
                             const struct lock_context *ctx2)
 {
        return (procid_equal(&ctx1->pid, &ctx2->pid) &&
-               (ctx1->smbpid == ctx2->smbpid) &&
+               (ctx1->smblctx == ctx2->smblctx) &&
                (ctx1->tid == ctx2->tid));
 }
 
@@ -72,9 +77,10 @@ BOOL brl_same_context(const struct lock_context *ctx1,
  See if lck1 and lck2 overlap.
 ****************************************************************************/
 
-static BOOL brl_overlap(const struct lock_struct *lck1,
+static bool brl_overlap(const struct lock_struct *lck1,
                         const struct lock_struct *lck2)
 {
+       /* XXX Remove for Win7 compatibility. */
        /* this extra check is not redundent - it copes with locks
           that go beyond the end of 64 bit file space */
        if (lck1->size != 0 &&
@@ -94,11 +100,11 @@ static BOOL brl_overlap(const struct lock_struct *lck1,
  See if lock2 can be added when lock1 is in place.
 ****************************************************************************/
 
-static BOOL brl_conflict(const struct lock_struct *lck1, 
+static bool brl_conflict(const struct lock_struct *lck1, 
                         const struct lock_struct *lck2)
 {
        /* Ignore PENDING locks. */
-       if (lck1->lock_type == PENDING_LOCK || lck2->lock_type == PENDING_LOCK )
+       if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
                return False;
 
        /* Read locks never conflict. */
@@ -106,8 +112,11 @@ static BOOL brl_conflict(const struct lock_struct *lck1,
                return False;
        }
 
-       if (brl_same_context(&lck1->context, &lck2->context) &&
-           lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
+       /* A READ lock can stack on top of a WRITE lock if they have the same
+        * context & fnum. */
+       if (lck1->lock_type == WRITE_LOCK && lck2->lock_type == READ_LOCK &&
+           brl_same_context(&lck1->context, &lck2->context) &&
+           lck1->fnum == lck2->fnum) {
                return False;
        }
 
@@ -120,7 +129,7 @@ static BOOL brl_conflict(const struct lock_struct *lck1,
  know already match.
 ****************************************************************************/
 
-static BOOL brl_conflict_posix(const struct lock_struct *lck1, 
+static bool brl_conflict_posix(const struct lock_struct *lck1, 
                                const struct lock_struct *lck2)
 {
 #if defined(DEVELOPER)
@@ -129,7 +138,7 @@ static BOOL brl_conflict_posix(const struct lock_struct *lck1,
 #endif
 
        /* Ignore PENDING locks. */
-       if (lck1->lock_type == PENDING_LOCK || lck2->lock_type == PENDING_LOCK )
+       if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
                return False;
 
        /* Read locks never conflict. */
@@ -148,10 +157,10 @@ static BOOL brl_conflict_posix(const struct lock_struct *lck1,
 } 
 
 #if ZERO_ZERO
-static BOOL brl_conflict1(const struct lock_struct *lck1, 
+static bool brl_conflict1(const struct lock_struct *lck1, 
                         const struct lock_struct *lck2)
 {
-       if (lck1->lock_type == PENDING_LOCK || lck2->lock_type == PENDING_LOCK )
+       if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
                return False;
 
        if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
@@ -171,7 +180,7 @@ static BOOL brl_conflict1(const struct lock_struct *lck1,
            lck2->start >= (lck1->start + lck1->size)) {
                return False;
        }
-           
+
        return True;
 } 
 #endif
@@ -182,9 +191,9 @@ static BOOL brl_conflict1(const struct lock_struct *lck1,
  This is never used in the POSIX lock case.
 ****************************************************************************/
 
-static BOOL brl_conflict_other(const struct lock_struct *lck1, const struct lock_struct *lck2)
+static bool brl_conflict_other(const struct lock_struct *lck1, const struct lock_struct *lck2)
 {
-       if (lck1->lock_type == PENDING_LOCK || lck2->lock_type == PENDING_LOCK )
+       if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
                return False;
 
        if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) 
@@ -210,13 +219,26 @@ static BOOL brl_conflict_other(const struct lock_struct *lck1, const struct lock
        return brl_overlap(lck1, lck2);
 } 
 
+/****************************************************************************
+ Check if an unlock overlaps a pending lock.
+****************************************************************************/
+
+static bool brl_pending_overlap(const struct lock_struct *lock, const struct lock_struct *pend_lock)
+{
+       if ((lock->start <= pend_lock->start) && (lock->start + lock->size > pend_lock->start))
+               return True;
+       if ((lock->start >= pend_lock->start) && (lock->start <= pend_lock->start + pend_lock->size))
+               return True;
+       return False;
+}
+
 /****************************************************************************
  Amazingly enough, w2k3 "remembers" whether the last lock failure on a fnum
  is the same as this one and changes its error code. I wonder if any
  app depends on this ?
 ****************************************************************************/
 
-static NTSTATUS brl_lock_failed(files_struct *fsp, const struct lock_struct *lock, BOOL blocking_lock)
+NTSTATUS brl_lock_failed(files_struct *fsp, const struct lock_struct *lock, bool blocking_lock)
 {
        if (lock->start >= 0xEF000000 && (lock->start >> 63) == 0) {
                /* amazing the little things you learn with a test
@@ -246,16 +268,29 @@ static NTSTATUS brl_lock_failed(files_struct *fsp, const struct lock_struct *loc
  Open up the brlock.tdb database.
 ****************************************************************************/
 
-void brl_init(int read_only)
+void brl_init(bool read_only)
 {
-       if (tdb) {
+       int tdb_flags;
+
+       if (brlock_db) {
                return;
        }
-       tdb = tdb_open_log(lock_path("brlock.tdb"),
-                       lp_open_files_db_hash_size(),
-                       TDB_DEFAULT|(read_only?0x0:TDB_CLEAR_IF_FIRST),
-                       read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644 );
-       if (!tdb) {
+
+       tdb_flags = TDB_DEFAULT|TDB_VOLATILE|TDB_CLEAR_IF_FIRST|TDB_INCOMPATIBLE_HASH;
+
+       if (!lp_clustering()) {
+               /*
+                * We can't use the SEQNUM trick to cache brlock
+                * entries in the clustering case because ctdb seqnum
+                * propagation has a delay.
+                */
+               tdb_flags |= TDB_SEQNUM;
+       }
+
+       brlock_db = db_open(NULL, lock_path("brlock.tdb"),
+                           lp_open_files_db_hash_size(), tdb_flags,
+                           read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644 );
+       if (!brlock_db) {
                DEBUG(0,("Failed to open byte range locking database %s\n",
                        lock_path("brlock.tdb")));
                return;
@@ -266,12 +301,9 @@ void brl_init(int read_only)
  Close down the brlock.tdb database.
 ****************************************************************************/
 
-void brl_shutdown(int read_only)
+void brl_shutdown(void)
 {
-       if (!tdb) {
-               return;
-       }
-       tdb_close(tdb);
+       TALLOC_FREE(brlock_db);
 }
 
 #if ZERO_ZERO
@@ -296,16 +328,26 @@ static int lock_compare(const struct lock_struct *lck1,
  Lock a range of bytes - Windows lock semantics.
 ****************************************************************************/
 
-static NTSTATUS brl_lock_windows(struct byte_range_lock *br_lck,
-                       const struct lock_struct *plock, BOOL blocking_lock)
+NTSTATUS brl_lock_windows_default(struct byte_range_lock *br_lck,
+    struct lock_struct *plock, bool blocking_lock)
 {
        unsigned int i;
        files_struct *fsp = br_lck->fsp;
-       struct lock_struct *locks = (struct lock_struct *)br_lck->lock_data;
+       struct lock_struct *locks = br_lck->lock_data;
+       NTSTATUS status;
+
+       SMB_ASSERT(plock->lock_type != UNLOCK_LOCK);
+
+       if ((plock->start + plock->size - 1 < plock->start) &&
+                       plock->size != 0) {
+               return NT_STATUS_INVALID_LOCK_RANGE;
+       }
 
        for (i=0; i < br_lck->num_locks; i++) {
                /* Do any Windows or POSIX locks conflict ? */
                if (brl_conflict(&locks[i], plock)) {
+                       /* Remember who blocked us. */
+                       plock->context.smblctx = locks[i].context.smblctx;
                        return brl_lock_failed(fsp,plock,blocking_lock);
                }
 #if ZERO_ZERO
@@ -316,11 +358,15 @@ static NTSTATUS brl_lock_windows(struct byte_range_lock *br_lck,
 #endif
        }
 
+       if (!IS_PENDING_LOCK(plock->lock_type)) {
+               contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
+       }
+
        /* We can get the Windows lock, now see if it needs to
           be mapped into a lower level POSIX one, and if so can
           we get it ? */
 
-       if ((plock->lock_type != PENDING_LOCK) && lp_posix_locking(SNUM(fsp->conn))) {
+       if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(fsp->conn->params)) {
                int errno_ret;
                if (!set_posix_lock_windows_flavour(fsp,
                                plock->start,
@@ -330,10 +376,16 @@ static NTSTATUS brl_lock_windows(struct byte_range_lock *br_lck,
                                locks,
                                br_lck->num_locks,
                                &errno_ret)) {
+
+                       /* We don't know who blocked us. */
+                       plock->context.smblctx = 0xFFFFFFFFFFFFFFFFLL;
+
                        if (errno_ret == EACCES || errno_ret == EAGAIN) {
-                               return NT_STATUS_FILE_LOCK_CONFLICT;
+                               status = NT_STATUS_FILE_LOCK_CONFLICT;
+                               goto fail;
                        } else {
-                               return map_nt_error_from_unix(errno);
+                               status = map_nt_error_from_unix(errno);
+                               goto fail;
                        }
                }
        }
@@ -341,27 +393,32 @@ static NTSTATUS brl_lock_windows(struct byte_range_lock *br_lck,
        /* no conflicts - add it to the list of locks */
        locks = (struct lock_struct *)SMB_REALLOC(locks, (br_lck->num_locks + 1) * sizeof(*locks));
        if (!locks) {
-               return NT_STATUS_NO_MEMORY;
+               status = NT_STATUS_NO_MEMORY;
+               goto fail;
        }
 
        memcpy(&locks[br_lck->num_locks], plock, sizeof(struct lock_struct));
        br_lck->num_locks += 1;
-       br_lck->lock_data = (void *)locks;
+       br_lck->lock_data = locks;
        br_lck->modified = True;
 
        return NT_STATUS_OK;
+ fail:
+       if (!IS_PENDING_LOCK(plock->lock_type)) {
+               contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
+       }
+       return status;
 }
 
 /****************************************************************************
  Cope with POSIX range splits and merges.
 ****************************************************************************/
 
-static unsigned int brlock_posix_split_merge(struct lock_struct *lck_arr,              /* Output array. */
-                                               const struct lock_struct *ex,           /* existing lock. */
-                                               const struct lock_struct *plock,        /* proposed lock. */
-                                               BOOL *lock_was_added)
+static unsigned int brlock_posix_split_merge(struct lock_struct *lck_arr,      /* Output array. */
+                                               struct lock_struct *ex,         /* existing lock. */
+                                               struct lock_struct *plock)      /* proposed lock. */
 {
-       BOOL lock_types_differ = (ex->lock_type != plock->lock_type);
+       bool lock_types_differ = (ex->lock_type != plock->lock_type);
 
        /* We can't merge non-conflicting locks on different context - ignore fnum. */
 
@@ -376,21 +433,23 @@ static unsigned int brlock_posix_split_merge(struct lock_struct *lck_arr,         /* Ou
        /* Did we overlap ? */
 
 /*********************************************
-                                             +---------+
-                                             | ex      |
-                                             +---------+
-                              +-------+
-                              | plock |
-                              +-------+
+                                        +---------+
+                                        | ex      |
+                                        +---------+
+                         +-------+
+                         | plock |
+                         +-------+
 OR....
-             +---------+
-             |  ex     |
-             +---------+
+        +---------+
+        |  ex     |
+        +---------+
 **********************************************/
 
        if ( (ex->start > (plock->start + plock->size)) ||
-                       (plock->start > (ex->start + ex->size))) {
+               (plock->start > (ex->start + ex->size))) {
+
                /* No overlap with this lock - copy existing. */
+
                memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
                return 1;
        }
@@ -402,26 +461,109 @@ OR....
         +---------------------------+
         |       plock               | -> replace with plock.
         +---------------------------+
+OR
+             +---------------+
+             |       ex      |
+             +---------------+
+        +---------------------------+
+        |       plock               | -> replace with plock.
+        +---------------------------+
+
 **********************************************/
 
        if ( (ex->start >= plock->start) &&
-                       (ex->start + ex->size <= plock->start + plock->size) ) {
-               memcpy(&lck_arr[0], plock, sizeof(struct lock_struct));
-               *lock_was_added = True;
-               return 1;
+               (ex->start + ex->size <= plock->start + plock->size) ) {
+
+               /* Replace - discard existing lock. */
+
+               return 0;
        }
 
 /*********************************************
+Adjacent after.
+                        +-------+
+                        |  ex   |
+                        +-------+
+        +---------------+
+        |   plock       |
+        +---------------+
+
+BECOMES....
+        +---------------+-------+
+        |   plock       | ex    | - different lock types.
+        +---------------+-------+
+OR.... (merge)
+        +-----------------------+
+        |   plock               | - same lock type.
+        +-----------------------+
+**********************************************/
+
+       if (plock->start + plock->size == ex->start) {
+
+               /* If the lock types are the same, we merge, if different, we
+                  add the remainder of the old lock. */
+
+               if (lock_types_differ) {
+                       /* Add existing. */
+                       memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
+                       return 1;
+               } else {
+                       /* Merge - adjust incoming lock as we may have more
+                        * merging to come. */
+                       plock->size += ex->size;
+                       return 0;
+               }
+       }
+
+/*********************************************
+Adjacent before.
+        +-------+
+        |  ex   |
+        +-------+
+                +---------------+
+                |   plock       |
+                +---------------+
+BECOMES....
+        +-------+---------------+
+        | ex    |   plock       | - different lock types
+        +-------+---------------+
+
+OR.... (merge)
+        +-----------------------+
+        |      plock            | - same lock type.
+        +-----------------------+
+
+**********************************************/
+
+       if (ex->start + ex->size == plock->start) {
+
+               /* If the lock types are the same, we merge, if different, we
+                  add the existing lock. */
+
+               if (lock_types_differ) {
+                       memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
+                       return 1;
+               } else {
+                       /* Merge - adjust incoming lock as we may have more
+                        * merging to come. */
+                       plock->start = ex->start;
+                       plock->size += ex->size;
+                       return 0;
+               }
+       }
+
+/*********************************************
+Overlap after.
         +-----------------------+
         |          ex           |
         +-----------------------+
         +---------------+
         |   plock       |
         +---------------+
-OR....
-                        +-------+
-                        |  ex   |
-                        +-------+
+OR
+               +----------------+
+               |       ex       |
+               +----------------+
         +---------------+
         |   plock       |
         +---------------+
@@ -432,60 +574,57 @@ BECOMES....
         +---------------+-------+
 OR.... (merge)
         +-----------------------+
-        |   ex                  | - same lock type.
+        |   plock               | - same lock type.
         +-----------------------+
 **********************************************/
 
        if ( (ex->start >= plock->start) &&
-                               (ex->start <= plock->start + plock->size) &&
-                               (ex->start + ex->size > plock->start + plock->size) ) {
-
-               *lock_was_added = True;
+               (ex->start <= plock->start + plock->size) &&
+               (ex->start + ex->size > plock->start + plock->size) ) {
 
                /* If the lock types are the same, we merge, if different, we
-                  add the new lock before the old. */
+                  add the remainder of the old lock. */
 
                if (lock_types_differ) {
-                       /* Add new. */
-                       memcpy(&lck_arr[0], plock, sizeof(struct lock_struct));
-                       memcpy(&lck_arr[1], ex, sizeof(struct lock_struct));
+                       /* Add remaining existing. */
+                       memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
                        /* Adjust existing start and size. */
-                       lck_arr[1].start = plock->start + plock->size;
-                       lck_arr[1].size = (ex->start + ex->size) - (plock->start + plock->size);
-                       return 2;
-               } else {
-                       /* Merge. */
-                       memcpy(&lck_arr[0], plock, sizeof(struct lock_struct));
-                       /* Set new start and size. */
-                       lck_arr[0].start = plock->start;
-                       lck_arr[0].size = (ex->start + ex->size) - plock->start;
+                       lck_arr[0].start = plock->start + plock->size;
+                       lck_arr[0].size = (ex->start + ex->size) - (plock->start + plock->size);
                        return 1;
+               } else {
+                       /* Merge - adjust incoming lock as we may have more
+                        * merging to come. */
+                       plock->size += (ex->start + ex->size) - (plock->start + plock->size);
+                       return 0;
                }
        }
 
 /*********************************************
-   +-----------------------+
-   |  ex                   |
-   +-----------------------+
-           +---------------+
-           |   plock       |
-           +---------------+
-OR....
-   +-------+        
-   |  ex   |
-   +-------+
-           +---------------+
-           |   plock       |
-           +---------------+
+Overlap before.
+        +-----------------------+
+        |  ex                   |
+        +-----------------------+
+                +---------------+
+                |   plock       |
+                +---------------+
+OR
+        +-------------+
+        |  ex         |
+        +-------------+
+                +---------------+
+                |   plock       |
+                +---------------+
+
 BECOMES....
-   +-------+---------------+
-   | ex    |   plock       | - different lock types
-   +-------+---------------+
+        +-------+---------------+
+        | ex    |   plock       | - different lock types
+        +-------+---------------+
 
 OR.... (merge)
-   +-----------------------+
-   | ex                    | - same lock type.
-   +-----------------------+
+        +-----------------------+
+        |      plock            | - same lock type.
+        +-----------------------+
 
 **********************************************/
 
@@ -493,27 +632,25 @@ OR.... (merge)
                        (ex->start + ex->size >= plock->start) &&
                        (ex->start + ex->size <= plock->start + plock->size) ) {
 
-               *lock_was_added = True;
-
                /* If the lock types are the same, we merge, if different, we
-                  add the new lock after the old. */
+                  add the truncated old lock. */
 
                if (lock_types_differ) {
                        memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
-                       memcpy(&lck_arr[1], plock, sizeof(struct lock_struct));
                        /* Adjust existing size. */
                        lck_arr[0].size = plock->start - ex->start;
-                       return 2;
-               } else {
-                       /* Merge. */
-                       memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
-                       /* Adjust existing size. */
-                       lck_arr[0].size = (plock->start + plock->size) - ex->start;
                        return 1;
+               } else {
+                       /* Merge - adjust incoming lock as we may have more
+                        * merging to come. MUST ADJUST plock SIZE FIRST ! */
+                       plock->size += (plock->start - ex->start);
+                       plock->start = ex->start;
+                       return 0;
                }
        }
 
 /*********************************************
+Complete overlap.
         +---------------------------+
         |        ex                 |
         +---------------------------+
@@ -526,39 +663,38 @@ BECOMES.....
         +-------+---------+---------+
 OR
         +---------------------------+
-        |        ex                 | - same lock type.
+        |        plock              | - same lock type.
         +---------------------------+
 **********************************************/
 
        if ( (ex->start < plock->start) && (ex->start + ex->size > plock->start + plock->size) ) {
-               *lock_was_added = True;
 
                if (lock_types_differ) {
 
                        /* We have to split ex into two locks here. */
 
                        memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
-                       memcpy(&lck_arr[1], plock, sizeof(struct lock_struct));
-                       memcpy(&lck_arr[2], ex, sizeof(struct lock_struct));
+                       memcpy(&lck_arr[1], ex, sizeof(struct lock_struct));
 
                        /* Adjust first existing size. */
                        lck_arr[0].size = plock->start - ex->start;
 
                        /* Adjust second existing start and size. */
-                       lck_arr[2].start = plock->start + plock->size;
-                       lck_arr[2].size = (ex->start + ex->size) - (plock->start + plock->size);
-                       return 3;
+                       lck_arr[1].start = plock->start + plock->size;
+                       lck_arr[1].size = (ex->start + ex->size) - (plock->start + plock->size);
+                       return 2;
                } else {
-                       /* Just eat plock. */
-                       memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
-                       return 1;
+                       /* Just eat the existing locks, merge them into plock. */
+                       plock->start = ex->start;
+                       plock->size = ex->size;
+                       return 0;
                }
        }
 
        /* Never get here. */
-       smb_panic("brlock_posix_split_merge\n");
+       smb_panic("brlock_posix_split_merge");
        /* Notreached. */
-       abort();
+
        /* Keep some compilers happy. */
        return 0;
 }
@@ -568,13 +704,16 @@ OR
  We must cope with range splits and merges.
 ****************************************************************************/
 
-static NTSTATUS brl_lock_posix(struct byte_range_lock *br_lck,
-                       const struct lock_struct *plock)
+static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
+                              struct byte_range_lock *br_lck,
+                              struct lock_struct *plock)
 {
-       unsigned int i, count;
-       struct lock_struct *locks = (struct lock_struct *)br_lck->lock_data;
+       unsigned int i, count, posix_count;
+       struct lock_struct *locks = br_lck->lock_data;
        struct lock_struct *tp;
-       BOOL lock_was_added = False;
+       bool signal_pending_read = False;
+       bool break_oplocks = false;
+       NTSTATUS status;
 
        /* No zero-zero locks for POSIX. */
        if (plock->start == 0 && plock->size == 0) {
@@ -582,8 +721,7 @@ static NTSTATUS brl_lock_posix(struct byte_range_lock *br_lck,
        }
 
        /* Don't allow 64-bit lock wrap. */
-       if (plock->start + plock->size < plock->start ||
-                       plock->start + plock->size < plock->size) {
+       if (plock->start + plock->size - 1 < plock->start) {
                return NT_STATUS_INVALID_PARAMETER;
        }
 
@@ -595,43 +733,87 @@ static NTSTATUS brl_lock_posix(struct byte_range_lock *br_lck,
        if (!tp) {
                return NT_STATUS_NO_MEMORY;
        }
-       
-       count = 0;
+
+       count = posix_count = 0;
+
        for (i=0; i < br_lck->num_locks; i++) {
-               if (locks[i].lock_flav == WINDOWS_LOCK) {
+               struct lock_struct *curr_lock = &locks[i];
+
+               /* If we have a pending read lock, a lock downgrade should
+                  trigger a lock re-evaluation. */
+               if (curr_lock->lock_type == PENDING_READ_LOCK &&
+                               brl_pending_overlap(plock, curr_lock)) {
+                       signal_pending_read = True;
+               }
+
+               if (curr_lock->lock_flav == WINDOWS_LOCK) {
                        /* Do any Windows flavour locks conflict ? */
-                       if (brl_conflict(&locks[i], plock)) {
+                       if (brl_conflict(curr_lock, plock)) {
                                /* No games with error messages. */
                                SAFE_FREE(tp);
+                               /* Remember who blocked us. */
+                               plock->context.smblctx = curr_lock->context.smblctx;
                                return NT_STATUS_FILE_LOCK_CONFLICT;
                        }
                        /* Just copy the Windows lock into the new array. */
-                       memcpy(&tp[count], &locks[i], sizeof(struct lock_struct));
+                       memcpy(&tp[count], curr_lock, sizeof(struct lock_struct));
                        count++;
                } else {
+                       unsigned int tmp_count = 0;
+
                        /* POSIX conflict semantics are different. */
-                       if (brl_conflict_posix(&locks[i], plock)) {
+                       if (brl_conflict_posix(curr_lock, plock)) {
                                /* Can't block ourselves with POSIX locks. */
                                /* No games with error messages. */
                                SAFE_FREE(tp);
+                               /* Remember who blocked us. */
+                               plock->context.smblctx = curr_lock->context.smblctx;
                                return NT_STATUS_FILE_LOCK_CONFLICT;
                        }
 
                        /* Work out overlaps. */
-                       count += brlock_posix_split_merge(&tp[count], &locks[i], plock, &lock_was_added);
+                       tmp_count += brlock_posix_split_merge(&tp[count], curr_lock, plock);
+                       posix_count += tmp_count;
+                       count += tmp_count;
+               }
+       }
+
+       /*
+        * Break oplocks while we hold a brl. Since lock() and unlock() calls
+        * are not symetric with POSIX semantics, we cannot guarantee our
+        * contend_level2_oplocks_begin/end calls will be acquired and
+        * released one-for-one as with Windows semantics. Therefore we only
+        * call contend_level2_oplocks_begin if this is the first POSIX brl on
+        * the file.
+        */
+       break_oplocks = (!IS_PENDING_LOCK(plock->lock_type) &&
+                        posix_count == 0);
+       if (break_oplocks) {
+               contend_level2_oplocks_begin(br_lck->fsp,
+                                            LEVEL2_CONTEND_POSIX_BRL);
+       }
+
+       /* Try and add the lock in order, sorted by lock start. */
+       for (i=0; i < count; i++) {
+               struct lock_struct *curr_lock = &tp[i];
+
+               if (curr_lock->start <= plock->start) {
+                       continue;
                }
        }
 
-       if (!lock_was_added) {
-               memcpy(&tp[count], plock, sizeof(struct lock_struct));
-               count++;
+       if (i < count) {
+               memmove(&tp[i+1], &tp[i],
+                       (count - i)*sizeof(struct lock_struct));
        }
+       memcpy(&tp[i], plock, sizeof(struct lock_struct));
+       count++;
 
        /* We can get the POSIX lock, now see if it needs to
           be mapped into a lower level POSIX one, and if so can
           we get it ? */
 
-       if ((plock->lock_type != PENDING_LOCK) && lp_posix_locking(SNUM(br_lck->fsp->conn))) {
+       if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(br_lck->fsp->conn->params)) {
                int errno_ret;
 
                /* The lower layer just needs to attempt to
@@ -643,40 +825,97 @@ static NTSTATUS brl_lock_posix(struct byte_range_lock *br_lck,
                                plock->size,
                                plock->lock_type,
                                &errno_ret)) {
+
+                       /* We don't know who blocked us. */
+                       plock->context.smblctx = 0xFFFFFFFFFFFFFFFFLL;
+
                        if (errno_ret == EACCES || errno_ret == EAGAIN) {
                                SAFE_FREE(tp);
-                               return NT_STATUS_FILE_LOCK_CONFLICT;
+                               status = NT_STATUS_FILE_LOCK_CONFLICT;
+                               goto fail;
                        } else {
                                SAFE_FREE(tp);
-                               return map_nt_error_from_unix(errno);
+                               status = map_nt_error_from_unix(errno);
+                               goto fail;
                        }
                }
        }
 
-       /* Realloc so we don't leak entries per lock call. */
-       tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
-       if (!tp) {
-               return NT_STATUS_NO_MEMORY;
+       /* If we didn't use all the allocated size,
+        * Realloc so we don't leak entries per lock call. */
+       if (count < br_lck->num_locks + 2) {
+               tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
+               if (!tp) {
+                       status = NT_STATUS_NO_MEMORY;
+                       goto fail;
+               }
        }
+
        br_lck->num_locks = count;
        SAFE_FREE(br_lck->lock_data);
-       br_lck->lock_data = (void *)tp;
+       br_lck->lock_data = tp;
+       locks = tp;
        br_lck->modified = True;
+
+       /* A successful downgrade from write to read lock can trigger a lock
+          re-evalutation where waiting readers can now proceed. */
+
+       if (signal_pending_read) {
+               /* Send unlock messages to any pending read waiters that overlap. */
+               for (i=0; i < br_lck->num_locks; i++) {
+                       struct lock_struct *pend_lock = &locks[i];
+
+                       /* Ignore non-pending locks. */
+                       if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
+                               continue;
+                       }
+
+                       if (pend_lock->lock_type == PENDING_READ_LOCK &&
+                                       brl_pending_overlap(plock, pend_lock)) {
+                               DEBUG(10,("brl_lock_posix: sending unlock message to pid %s\n",
+                                       procid_str_static(&pend_lock->context.pid )));
+
+                               messaging_send(msg_ctx, pend_lock->context.pid,
+                                              MSG_SMB_UNLOCK, &data_blob_null);
+                       }
+               }
+       }
+
        return NT_STATUS_OK;
+ fail:
+       if (break_oplocks) {
+               contend_level2_oplocks_end(br_lck->fsp,
+                                          LEVEL2_CONTEND_POSIX_BRL);
+       }
+       return status;
+}
+
+NTSTATUS smb_vfs_call_brl_lock_windows(struct vfs_handle_struct *handle,
+                                      struct byte_range_lock *br_lck,
+                                      struct lock_struct *plock,
+                                      bool blocking_lock,
+                                      struct blocking_lock_record *blr)
+{
+       VFS_FIND(brl_lock_windows);
+       return handle->fns->brl_lock_windows(handle, br_lck, plock,
+                                            blocking_lock, blr);
 }
 
 /****************************************************************************
  Lock a range of bytes.
 ****************************************************************************/
 
-NTSTATUS brl_lock(struct byte_range_lock *br_lck,
-               uint32 smbpid,
-               struct process_id pid,
+NTSTATUS brl_lock(struct messaging_context *msg_ctx,
+               struct byte_range_lock *br_lck,
+               uint64_t smblctx,
+               struct server_id pid,
                br_off start,
                br_off size, 
                enum brl_type lock_type,
                enum brl_flavour lock_flav,
-               BOOL blocking_lock)
+               bool blocking_lock,
+               uint64_t *psmblctx,
+               struct blocking_lock_record *blr)
 {
        NTSTATUS ret;
        struct lock_struct lock;
@@ -687,7 +926,12 @@ NTSTATUS brl_lock(struct byte_range_lock *br_lck,
        }
 #endif
 
-       lock.context.smbpid = smbpid;
+#ifdef DEVELOPER
+       /* Quieten valgrind on test. */
+       memset(&lock, '\0', sizeof(lock));
+#endif
+
+       lock.context.smblctx = smblctx;
        lock.context.pid = pid;
        lock.context.tid = br_lck->fsp->conn->cnum;
        lock.start = start;
@@ -697,42 +941,38 @@ NTSTATUS brl_lock(struct byte_range_lock *br_lck,
        lock.lock_flav = lock_flav;
 
        if (lock_flav == WINDOWS_LOCK) {
-               ret = brl_lock_windows(br_lck, &lock, blocking_lock);
+               ret = SMB_VFS_BRL_LOCK_WINDOWS(br_lck->fsp->conn, br_lck,
+                   &lock, blocking_lock, blr);
        } else {
-               ret = brl_lock_posix(br_lck, &lock);
+               ret = brl_lock_posix(msg_ctx, br_lck, &lock);
        }
 
 #if ZERO_ZERO
        /* sort the lock list */
-       qsort(br_lck->lock_data, (size_t)br_lck->num_locks, sizeof(lock), lock_compare);
+       TYPESAFE_QSORT(br_lck->lock_data, (size_t)br_lck->num_locks, lock_compare);
 #endif
 
+       /* If we're returning an error, return who blocked us. */
+       if (!NT_STATUS_IS_OK(ret) && psmblctx) {
+               *psmblctx = lock.context.smblctx;
+       }
        return ret;
 }
 
-/****************************************************************************
- Check if an unlock overlaps a pending lock.
-****************************************************************************/
-
-static BOOL brl_pending_overlap(const struct lock_struct *lock, const struct lock_struct *pend_lock)
-{
-       if ((lock->start <= pend_lock->start) && (lock->start + lock->size > pend_lock->start))
-               return True;
-       if ((lock->start >= pend_lock->start) && (lock->start <= pend_lock->start + pend_lock->size))
-               return True;
-       return False;
-}
-
 /****************************************************************************
  Unlock a range of bytes - Windows semantics.
 ****************************************************************************/
 
-static BOOL brl_unlock_windows(struct byte_range_lock *br_lck, const struct lock_struct *plock)
+bool brl_unlock_windows_default(struct messaging_context *msg_ctx,
+                              struct byte_range_lock *br_lck,
+                              const struct lock_struct *plock)
 {
        unsigned int i, j;
-       struct lock_struct *locks = (struct lock_struct *)br_lck->lock_data;
+       struct lock_struct *locks = br_lck->lock_data;
        enum brl_type deleted_lock_type = READ_LOCK; /* shut the compiler up.... */
 
+       SMB_ASSERT(plock->lock_type == UNLOCK_LOCK);
+
 #if ZERO_ZERO
        /* Delete write locks by preference... The lock list
           is sorted in the zero zero case. */
@@ -762,6 +1002,10 @@ static BOOL brl_unlock_windows(struct byte_range_lock *br_lck, const struct lock
        for (i = 0; i < br_lck->num_locks; i++) {
                struct lock_struct *lock = &locks[i];
 
+               if (IS_PENDING_LOCK(lock->lock_type)) {
+                       continue;
+               }
+
                /* Only remove our own locks that match in start, size, and flavour. */
                if (brl_same_context(&lock->context, &plock->context) &&
                                        lock->fnum == plock->fnum &&
@@ -792,7 +1036,7 @@ static BOOL brl_unlock_windows(struct byte_range_lock *br_lck, const struct lock
        br_lck->modified = True;
 
        /* Unlock the underlying POSIX regions. */
-       if(lp_posix_locking(br_lck->fsp->conn->cnum)) {
+       if(lp_posix_locking(br_lck->fsp->conn->params)) {
                release_posix_lock_windows_flavour(br_lck->fsp,
                                plock->start,
                                plock->size,
@@ -807,7 +1051,7 @@ static BOOL brl_unlock_windows(struct byte_range_lock *br_lck, const struct lock
                struct lock_struct *pend_lock = &locks[j];
 
                /* Ignore non-pending locks. */
-               if (pend_lock->lock_type != PENDING_LOCK) {
+               if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
                        continue;
                }
 
@@ -816,14 +1060,12 @@ static BOOL brl_unlock_windows(struct byte_range_lock *br_lck, const struct lock
                        DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
                                procid_str_static(&pend_lock->context.pid )));
 
-                       become_root();
-                       message_send_pid(pend_lock->context.pid,
-                                       MSG_SMB_UNLOCK,
-                                       NULL, 0, True);
-                       unbecome_root();
+                       messaging_send(msg_ctx, pend_lock->context.pid,
+                                      MSG_SMB_UNLOCK, &data_blob_null);
                }
        }
 
+       contend_level2_oplocks_end(br_lck->fsp, LEVEL2_CONTEND_WINDOWS_BRL);
        return True;
 }
 
@@ -831,12 +1073,14 @@ static BOOL brl_unlock_windows(struct byte_range_lock *br_lck, const struct lock
  Unlock a range of bytes - POSIX semantics.
 ****************************************************************************/
 
-static BOOL brl_unlock_posix(struct byte_range_lock *br_lck, const struct lock_struct *plock)
+static bool brl_unlock_posix(struct messaging_context *msg_ctx,
+                            struct byte_range_lock *br_lck,
+                            struct lock_struct *plock)
 {
        unsigned int i, j, count;
        struct lock_struct *tp;
-       struct lock_struct *locks = (struct lock_struct *)br_lck->lock_data;
-       BOOL overlap_found = False;
+       struct lock_struct *locks = br_lck->lock_data;
+       bool overlap_found = False;
 
        /* No zero-zero locks for POSIX. */
        if (plock->start == 0 && plock->size == 0) {
@@ -863,76 +1107,60 @@ static BOOL brl_unlock_posix(struct byte_range_lock *br_lck, const struct lock_s
        count = 0;
        for (i = 0; i < br_lck->num_locks; i++) {
                struct lock_struct *lock = &locks[i];
-               struct lock_struct tmp_lock[3];
-               BOOL lock_was_added = False;
                unsigned int tmp_count;
 
                /* Only remove our own locks - ignore fnum. */
-               if (lock->lock_type == PENDING_LOCK ||
+               if (IS_PENDING_LOCK(lock->lock_type) ||
                                !brl_same_context(&lock->context, &plock->context)) {
                        memcpy(&tp[count], lock, sizeof(struct lock_struct));
                        count++;
                        continue;
                }
 
-               /* Work out overlaps. */
-               tmp_count = brlock_posix_split_merge(&tmp_lock[0], &locks[i], plock, &lock_was_added);
-
-               if (tmp_count == 1) {
-                       /* Ether the locks didn't overlap, or the unlock completely
-                          overlapped this lock. If it didn't overlap, then there's
-                          no change in the locks. */
-                       if (tmp_lock[0].lock_type != UNLOCK_LOCK) {
-                               SMB_ASSERT(tmp_lock[0].lock_type == locks[i].lock_type);
-                               /* No change in this lock. */
-                               memcpy(&tp[count], &tmp_lock[0], sizeof(struct lock_struct));
-                               count++;
-                       } else {
-                               SMB_ASSERT(tmp_lock[0].lock_type == UNLOCK_LOCK);
-                               overlap_found = True;
-                       }
-                       continue;
-               } else if (tmp_count == 2) {
-                       /* The unlock overlapped an existing lock. Copy the truncated
-                          lock into the lock array. */
-                       if (tmp_lock[0].lock_type != UNLOCK_LOCK) {
-                               SMB_ASSERT(tmp_lock[0].lock_type == locks[i].lock_type);
-                               SMB_ASSERT(tmp_lock[1].lock_type == UNLOCK_LOCK);
-                               memcpy(&tp[count], &tmp_lock[0], sizeof(struct lock_struct));
-                               if (tmp_lock[0].size != locks[i].size) {
-                                       overlap_found = True;
-                               }
-                       } else {
-                               SMB_ASSERT(tmp_lock[0].lock_type == UNLOCK_LOCK);
-                               SMB_ASSERT(tmp_lock[1].lock_type == locks[i].lock_type);
-                               memcpy(&tp[count], &tmp_lock[1], sizeof(struct lock_struct));
-                               if (tmp_lock[1].start != locks[i].start) {
-                                       overlap_found = True;
-                               }
+               if (lock->lock_flav == WINDOWS_LOCK) {
+                       /* Do any Windows flavour locks conflict ? */
+                       if (brl_conflict(lock, plock)) {
+                               SAFE_FREE(tp);
+                               return false;
                        }
+                       /* Just copy the Windows lock into the new array. */
+                       memcpy(&tp[count], lock, sizeof(struct lock_struct));
                        count++;
                        continue;
-               } else {
-                       /* tmp_count == 3 - (we split a lock range in two). */
-                       SMB_ASSERT(tmp_lock[0].lock_type == locks[i].lock_type);
-                       SMB_ASSERT(tmp_lock[1].lock_type == UNLOCK_LOCK);
-                       SMB_ASSERT(tmp_lock[2].lock_type == locks[i].lock_type);
+               }
+
+               /* Work out overlaps. */
+               tmp_count = brlock_posix_split_merge(&tp[count], lock, plock);
+
+               if (tmp_count == 0) {
+                       /* plock overlapped the existing lock completely,
+                          or replaced it. Don't copy the existing lock. */
+                       overlap_found = true;
+               } else if (tmp_count == 1) {
+                       /* Either no overlap, (simple copy of existing lock) or
+                        * an overlap of an existing lock. */
+                       /* If the lock changed size, we had an overlap. */
+                       if (tp[count].size != lock->size) {
+                               overlap_found = true;
+                       }
+                       count += tmp_count;
+               } else if (tmp_count == 2) {
+                       /* We split a lock range in two. */
+                       overlap_found = true;
+                       count += tmp_count;
 
-                       memcpy(&tp[count], &tmp_lock[0], sizeof(struct lock_struct));
-                       count++;
-                       memcpy(&tp[count], &tmp_lock[2], sizeof(struct lock_struct));
-                       count++;
-                       overlap_found = True;
                        /* Optimisation... */
                        /* We know we're finished here as we can't overlap any
                           more POSIX locks. Copy the rest of the lock array. */
+
                        if (i < br_lck->num_locks - 1) {
-                               memcpy(&tp[count], &locks[i+1], 
+                               memcpy(&tp[count], &locks[i+1],
                                        sizeof(*locks)*((br_lck->num_locks-1) - i));
                                count += ((br_lck->num_locks-1) - i);
                        }
                        break;
                }
+
        }
 
        if (!overlap_found) {
@@ -943,7 +1171,7 @@ static BOOL brl_unlock_posix(struct byte_range_lock *br_lck, const struct lock_s
        }
 
        /* Unlock any POSIX regions. */
-       if(lp_posix_locking(br_lck->fsp->conn->cnum)) {
+       if(lp_posix_locking(br_lck->fsp->conn->params)) {
                release_posix_lock_posix_flavour(br_lck->fsp,
                                                plock->start,
                                                plock->size,
@@ -965,9 +1193,13 @@ static BOOL brl_unlock_posix(struct byte_range_lock *br_lck, const struct lock_s
                tp = NULL;
        }
 
+       contend_level2_oplocks_end(br_lck->fsp,
+                                  LEVEL2_CONTEND_POSIX_BRL);
+
        br_lck->num_locks = count;
        SAFE_FREE(br_lck->lock_data);
-       locks = br_lck->lock_data = (void *)tp;
+       locks = tp;
+       br_lck->lock_data = tp;
        br_lck->modified = True;
 
        /* Send unlock messages to any pending waiters that overlap. */
@@ -976,7 +1208,7 @@ static BOOL brl_unlock_posix(struct byte_range_lock *br_lck, const struct lock_s
                struct lock_struct *pend_lock = &locks[j];
 
                /* Ignore non-pending locks. */
-               if (pend_lock->lock_type != PENDING_LOCK) {
+               if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
                        continue;
                }
 
@@ -985,31 +1217,38 @@ static BOOL brl_unlock_posix(struct byte_range_lock *br_lck, const struct lock_s
                        DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
                                procid_str_static(&pend_lock->context.pid )));
 
-                       become_root();
-                       message_send_pid(pend_lock->context.pid,
-                                       MSG_SMB_UNLOCK,
-                                       NULL, 0, True);
-                       unbecome_root();
+                       messaging_send(msg_ctx, pend_lock->context.pid,
+                                      MSG_SMB_UNLOCK, &data_blob_null);
                }
        }
 
        return True;
 }
 
+bool smb_vfs_call_brl_unlock_windows(struct vfs_handle_struct *handle,
+                                    struct messaging_context *msg_ctx,
+                                    struct byte_range_lock *br_lck,
+                                    const struct lock_struct *plock)
+{
+       VFS_FIND(brl_unlock_windows);
+       return handle->fns->brl_unlock_windows(handle, msg_ctx, br_lck, plock);
+}
+
 /****************************************************************************
  Unlock a range of bytes.
 ****************************************************************************/
 
-BOOL brl_unlock(struct byte_range_lock *br_lck,
-               uint32 smbpid,
-               struct process_id pid,
+bool brl_unlock(struct messaging_context *msg_ctx,
+               struct byte_range_lock *br_lck,
+               uint64_t smblctx,
+               struct server_id pid,
                br_off start,
                br_off size,
                enum brl_flavour lock_flav)
 {
        struct lock_struct lock;
 
-       lock.context.smbpid = smbpid;
+       lock.context.smblctx = smblctx;
        lock.context.pid = pid;
        lock.context.tid = br_lck->fsp->conn->cnum;
        lock.start = start;
@@ -1019,9 +1258,10 @@ BOOL brl_unlock(struct byte_range_lock *br_lck,
        lock.lock_flav = lock_flav;
 
        if (lock_flav == WINDOWS_LOCK) {
-               return brl_unlock_windows(br_lck, &lock);
+               return SMB_VFS_BRL_UNLOCK_WINDOWS(br_lck->fsp->conn, msg_ctx,
+                   br_lck, &lock);
        } else {
-               return brl_unlock_posix(br_lck, &lock);
+               return brl_unlock_posix(msg_ctx, br_lck, &lock);
        }
 }
 
@@ -1030,21 +1270,21 @@ BOOL brl_unlock(struct byte_range_lock *br_lck,
  Returns True if the region required is currently unlocked, False if locked.
 ****************************************************************************/
 
-BOOL brl_locktest(struct byte_range_lock *br_lck,
-               uint32 smbpid,
-               struct process_id pid,
+bool brl_locktest(struct byte_range_lock *br_lck,
+               uint64_t smblctx,
+               struct server_id pid,
                br_off start,
                br_off size, 
                enum brl_type lock_type,
                enum brl_flavour lock_flav)
 {
-       BOOL ret = True;
+       bool ret = True;
        unsigned int i;
        struct lock_struct lock;
-       const struct lock_struct *locks = (struct lock_struct *)br_lck->lock_data;
+       const struct lock_struct *locks = br_lck->lock_data;
        files_struct *fsp = br_lck->fsp;
 
-       lock.context.smbpid = smbpid;
+       lock.context.smblctx = smblctx;
        lock.context.pid = pid;
        lock.context.tid = br_lck->fsp->conn->cnum;
        lock.start = start;
@@ -1069,12 +1309,12 @@ BOOL brl_locktest(struct byte_range_lock *br_lck,
         * This only conflicts with Windows locks, not POSIX locks.
         */
 
-       if(lp_posix_locking(fsp->conn->cnum) && (lock_flav == WINDOWS_LOCK)) {
+       if(lp_posix_locking(fsp->conn->params) && (lock_flav == WINDOWS_LOCK)) {
                ret = is_posix_locked(fsp, &start, &size, &lock_type, WINDOWS_LOCK);
 
                DEBUG(10,("brl_locktest: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
                        (double)start, (double)size, ret ? "locked" : "unlocked",
-                       fsp->fnum, fsp->fsp_name ));
+                       fsp->fnum, fsp_str_dbg(fsp)));
 
                /* We need to return the inverse of is_posix_locked. */
                ret = !ret;
@@ -1089,8 +1329,8 @@ BOOL brl_locktest(struct byte_range_lock *br_lck,
 ****************************************************************************/
 
 NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
-               uint32 *psmbpid,
-               struct process_id pid,
+               uint64_t *psmblctx,
+               struct server_id pid,
                br_off *pstart,
                br_off *psize, 
                enum brl_type *plock_type,
@@ -1098,10 +1338,10 @@ NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
 {
        unsigned int i;
        struct lock_struct lock;
-       const struct lock_struct *locks = (struct lock_struct *)br_lck->lock_data;
+       const struct lock_struct *locks = br_lck->lock_data;
        files_struct *fsp = br_lck->fsp;
 
-       lock.context.smbpid = *psmbpid;
+       lock.context.smblctx = *psmblctx;
        lock.context.pid = pid;
        lock.context.tid = br_lck->fsp->conn->cnum;
        lock.start = *pstart;
@@ -1113,7 +1353,7 @@ NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
        /* Make sure existing locks don't conflict */
        for (i=0; i < br_lck->num_locks; i++) {
                const struct lock_struct *exlock = &locks[i];
-               BOOL conflict = False;
+               bool conflict = False;
 
                if (exlock->lock_flav == WINDOWS_LOCK) {
                        conflict = brl_conflict(exlock, &lock);
@@ -1122,7 +1362,7 @@ NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
                }
 
                if (conflict) {
-                       *psmbpid = exlock->context.smbpid;
+                       *psmblctx = exlock->context.smblctx;
                        *pstart = exlock->start;
                        *psize = exlock->size;
                        *plock_type = exlock->lock_type;
@@ -1135,16 +1375,16 @@ NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
         * see if there is a POSIX lock from a UNIX or NFS process.
         */
 
-       if(lp_posix_locking(fsp->conn->cnum)) {
-               BOOL ret = is_posix_locked(fsp, pstart, psize, plock_type, POSIX_LOCK);
+       if(lp_posix_locking(fsp->conn->params)) {
+               bool ret = is_posix_locked(fsp, pstart, psize, plock_type, POSIX_LOCK);
 
                DEBUG(10,("brl_lockquery: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
                        (double)*pstart, (double)*psize, ret ? "locked" : "unlocked",
-                       fsp->fnum, fsp->fsp_name ));
+                       fsp->fnum, fsp_str_dbg(fsp)));
 
                if (ret) {
-                       /* Hmmm. No clue what to set smbpid to - use -1. */
-                       *psmbpid = 0xFFFF;
+                       /* Hmmm. No clue what to set smblctx to - use -1. */
+                       *psmblctx = 0xFFFFFFFFFFFFFFFFLL;
                        return NT_STATUS_LOCK_NOT_GRANTED;
                }
         }
@@ -1152,35 +1392,67 @@ NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
        return NT_STATUS_OK;
 }
 
+
+bool smb_vfs_call_brl_cancel_windows(struct vfs_handle_struct *handle,
+                                    struct byte_range_lock *br_lck,
+                                    struct lock_struct *plock,
+                                    struct blocking_lock_record *blr)
+{
+       VFS_FIND(brl_cancel_windows);
+       return handle->fns->brl_cancel_windows(handle, br_lck, plock, blr);
+}
+
 /****************************************************************************
  Remove a particular pending lock.
 ****************************************************************************/
-
-BOOL brl_lock_cancel(struct byte_range_lock *br_lck,
-               uint32 smbpid,
-               struct process_id pid,
+bool brl_lock_cancel(struct byte_range_lock *br_lck,
+               uint64_t smblctx,
+               struct server_id pid,
                br_off start,
                br_off size,
-               enum brl_flavour lock_flav)
+               enum brl_flavour lock_flav,
+               struct blocking_lock_record *blr)
+{
+       bool ret;
+       struct lock_struct lock;
+
+       lock.context.smblctx = smblctx;
+       lock.context.pid = pid;
+       lock.context.tid = br_lck->fsp->conn->cnum;
+       lock.start = start;
+       lock.size = size;
+       lock.fnum = br_lck->fsp->fnum;
+       lock.lock_flav = lock_flav;
+       /* lock.lock_type doesn't matter */
+
+       if (lock_flav == WINDOWS_LOCK) {
+               ret = SMB_VFS_BRL_CANCEL_WINDOWS(br_lck->fsp->conn, br_lck,
+                   &lock, blr);
+       } else {
+               ret = brl_lock_cancel_default(br_lck, &lock);
+       }
+
+       return ret;
+}
+
+bool brl_lock_cancel_default(struct byte_range_lock *br_lck,
+               struct lock_struct *plock)
 {
        unsigned int i;
-       struct lock_struct *locks = (struct lock_struct *)br_lck->lock_data;
-       struct lock_context context;
+       struct lock_struct *locks = br_lck->lock_data;
 
-       context.smbpid = smbpid;
-       context.pid = pid;
-       context.tid = br_lck->fsp->conn->cnum;
+       SMB_ASSERT(plock);
 
        for (i = 0; i < br_lck->num_locks; i++) {
                struct lock_struct *lock = &locks[i];
 
                /* For pending locks we *always* care about the fnum. */
-               if (brl_same_context(&lock->context, &context) &&
-                               lock->fnum == br_lck->fsp->fnum &&
-                               lock->lock_type == PENDING_LOCK &&
-                               lock->lock_flav == lock_flav &&
-                               lock->start == start &&
-                               lock->size == size) {
+               if (brl_same_context(&lock->context, &plock->context) &&
+                               lock->fnum == plock->fnum &&
+                               IS_PENDING_LOCK(lock->lock_type) &&
+                               lock->lock_flav == plock->lock_flav &&
+                               lock->start == plock->start &&
+                               lock->size == plock->size) {
                        break;
                }
        }
@@ -1207,18 +1479,20 @@ BOOL brl_lock_cancel(struct byte_range_lock *br_lck,
  fd and so we should not immediately close the fd.
 ****************************************************************************/
 
-void brl_close_fnum(struct byte_range_lock *br_lck)
+void brl_close_fnum(struct messaging_context *msg_ctx,
+                   struct byte_range_lock *br_lck)
 {
        files_struct *fsp = br_lck->fsp;
        uint16 tid = fsp->conn->cnum;
        int fnum = fsp->fnum;
        unsigned int i, j, dcount=0;
        int num_deleted_windows_locks = 0;
-       struct lock_struct *locks = (struct lock_struct *)br_lck->lock_data;
-       struct process_id pid = procid_self();
-       BOOL unlock_individually = False;
+       struct lock_struct *locks = br_lck->lock_data;
+       struct server_id pid = sconn_server_id(fsp->conn->sconn);
+       bool unlock_individually = False;
+       bool posix_level2_contention_ended = false;
 
-       if(lp_posix_locking(fsp->conn->cnum)) {
+       if(lp_posix_locking(fsp->conn->params)) {
 
                /* Check if there are any Windows locks associated with this dev/ino
                   pair that are not this fnum. If so we need to call unlock on each
@@ -1246,10 +1520,15 @@ void brl_close_fnum(struct byte_range_lock *br_lck)
                        unsigned int num_locks_copy;
 
                        /* Copy the current lock array. */
-                       locks_copy = TALLOC_MEMDUP(br_lck, locks, br_lck->num_locks * sizeof(struct lock_struct));
-                       if (!locks_copy) {
-                               smb_panic("brl_close_fnum: talloc fail.\n");
+                       if (br_lck->num_locks) {
+                               locks_copy = (struct lock_struct *)TALLOC_MEMDUP(br_lck, locks, br_lck->num_locks * sizeof(struct lock_struct));
+                               if (!locks_copy) {
+                                       smb_panic("brl_close_fnum: talloc failed");
+                               }
+                       } else {        
+                               locks_copy = NULL;
                        }
+
                        num_locks_copy = br_lck->num_locks;
 
                        for (i=0; i < num_locks_copy; i++) {
@@ -1257,8 +1536,9 @@ void brl_close_fnum(struct byte_range_lock *br_lck)
 
                                if (lock->context.tid == tid && procid_equal(&lock->context.pid, &pid) &&
                                                (lock->fnum == fnum)) {
-                                       brl_unlock(br_lck,
-                                               lock->context.smbpid,
+                                       brl_unlock(msg_ctx,
+                                               br_lck,
+                                               lock->context.smblctx,
                                                pid,
                                                lock->start,
                                                lock->size,
@@ -1275,14 +1555,23 @@ void brl_close_fnum(struct byte_range_lock *br_lck)
 
        for (i=0; i < br_lck->num_locks; i++) {
                struct lock_struct *lock = &locks[i];
-               BOOL del_this_lock = False;
+               bool del_this_lock = False;
 
                if (lock->context.tid == tid && procid_equal(&lock->context.pid, &pid)) {
                        if ((lock->lock_flav == WINDOWS_LOCK) && (lock->fnum == fnum)) {
                                del_this_lock = True;
                                num_deleted_windows_locks++;
+                               contend_level2_oplocks_end(br_lck->fsp,
+                                   LEVEL2_CONTEND_WINDOWS_BRL);
                        } else if (lock->lock_flav == POSIX_LOCK) {
                                del_this_lock = True;
+
+                               /* Only end level2 contention once for posix */
+                               if (!posix_level2_contention_ended) {
+                                       posix_level2_contention_ended = true;
+                                       contend_level2_oplocks_end(br_lck->fsp,
+                                           LEVEL2_CONTEND_POSIX_BRL);
+                               }
                        }
                }
 
@@ -1292,7 +1581,7 @@ void brl_close_fnum(struct byte_range_lock *br_lck)
                                struct lock_struct *pend_lock = &locks[j];
 
                                /* Ignore our own or non-pending locks. */
-                               if (pend_lock->lock_type != PENDING_LOCK) {
+                               if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
                                        continue;
                                }
 
@@ -1306,11 +1595,8 @@ void brl_close_fnum(struct byte_range_lock *br_lck)
 
                                /* We could send specific lock info here... */
                                if (brl_pending_overlap(lock, pend_lock)) {
-                                       become_root();
-                                       message_send_pid(pend_lock->context.pid,
-                                                       MSG_SMB_UNLOCK,
-                                                       NULL, 0, True);
-                                       unbecome_root();
+                                       messaging_send(msg_ctx, pend_lock->context.pid,
+                                                      MSG_SMB_UNLOCK, &data_blob_null);
                                }
                        }
 
@@ -1326,8 +1612,8 @@ void brl_close_fnum(struct byte_range_lock *br_lck)
                }
        }
 
-       if (num_deleted_windows_locks) {
-               /* Reduce the Windows lock reference count on this dev/ino pair. */
+       if(lp_posix_locking(fsp->conn->params) && num_deleted_windows_locks) {
+               /* Reduce the Windows lock POSIX reference count on this dev/ino pair. */
                reduce_windows_lock_ref_count(fsp, num_deleted_windows_locks);
        }
 }
@@ -1335,8 +1621,7 @@ void brl_close_fnum(struct byte_range_lock *br_lck)
 /****************************************************************************
  Ensure this set of lock entries is valid.
 ****************************************************************************/
-
-static BOOL validate_lock_entries(unsigned int *pnum_entries, struct lock_struct **pplocks)
+static bool validate_lock_entries(unsigned int *pnum_entries, struct lock_struct **pplocks)
 {
        unsigned int i;
        unsigned int num_valid_entries = 0;
@@ -1344,7 +1629,7 @@ static BOOL validate_lock_entries(unsigned int *pnum_entries, struct lock_struct
 
        for (i = 0; i < *pnum_entries; i++) {
                struct lock_struct *lock_data = &locks[i];
-               if (!process_exists(lock_data->context.pid)) {
+               if (!serverid_exists(&lock_data->context.pid)) {
                        /* This process no longer exists - mark this
                           entry as invalid by zeroing it. */
                        ZERO_STRUCTP(lock_data);
@@ -1366,7 +1651,7 @@ static BOOL validate_lock_entries(unsigned int *pnum_entries, struct lock_struct
                        num_valid_entries = 0;
                        for (i = 0; i < *pnum_entries; i++) {
                                struct lock_struct *lock_data = &locks[i];
-                               if (lock_data->context.smbpid &&
+                               if (lock_data->context.smblctx &&
                                                lock_data->context.tid) {
                                        /* Valid (nonzero) entry - copy it. */
                                        memcpy(&new_lock_data[num_valid_entries],
@@ -1384,31 +1669,40 @@ static BOOL validate_lock_entries(unsigned int *pnum_entries, struct lock_struct
        return True;
 }
 
+struct brl_forall_cb {
+       void (*fn)(struct file_id id, struct server_id pid,
+                  enum brl_type lock_type,
+                  enum brl_flavour lock_flav,
+                  br_off start, br_off size,
+                  void *private_data);
+       void *private_data;
+};
+
 /****************************************************************************
  Traverse the whole database with this function, calling traverse_callback
  on each lock.
 ****************************************************************************/
 
-static int traverse_fn(TDB_CONTEXT *ttdb, TDB_DATA kbuf, TDB_DATA dbuf, void *state)
+static int traverse_fn(struct db_record *rec, void *state)
 {
+       struct brl_forall_cb *cb = (struct brl_forall_cb *)state;
        struct lock_struct *locks;
-       struct lock_key *key;
+       struct file_id *key;
        unsigned int i;
        unsigned int num_locks = 0;
        unsigned int orig_num_locks = 0;
 
-       BRLOCK_FN(traverse_callback) = (BRLOCK_FN_CAST())state;
-
        /* In a traverse function we must make a copy of
           dbuf before modifying it. */
 
-       locks = (struct lock_struct *)memdup(dbuf.dptr, dbuf.dsize);
+       locks = (struct lock_struct *)memdup(rec->value.dptr,
+                                            rec->value.dsize);
        if (!locks) {
                return -1; /* Terminate traversal. */
        }
 
-       key = (struct lock_key *)kbuf.dptr;
-       orig_num_locks = num_locks = dbuf.dsize/sizeof(*locks);
+       key = (struct file_id *)rec->key.dptr;
+       orig_num_locks = num_locks = rec->value.dsize/sizeof(*locks);
 
        /* Ensure the lock db is clean of entries from invalid processes. */
 
@@ -1418,24 +1712,26 @@ static int traverse_fn(TDB_CONTEXT *ttdb, TDB_DATA kbuf, TDB_DATA dbuf, void *st
        }
 
        if (orig_num_locks != num_locks) {
-               dbuf.dptr = (char *)locks;
-               dbuf.dsize = num_locks * sizeof(*locks);
-
-               if (dbuf.dsize) {
-                       tdb_store(ttdb, kbuf, dbuf, TDB_REPLACE);
+               if (num_locks) {
+                       TDB_DATA data;
+                       data.dptr = (uint8_t *)locks;
+                       data.dsize = num_locks*sizeof(struct lock_struct);
+                       rec->store(rec, data, TDB_REPLACE);
                } else {
-                       tdb_delete(ttdb, kbuf);
+                       rec->delete_rec(rec);
                }
        }
 
-       for ( i=0; i<num_locks; i++) {
-               traverse_callback(key->device,
-                                 key->inode,
-                                 locks[i].context.pid,
-                                 locks[i].lock_type,
-                                 locks[i].lock_flav,
-                                 locks[i].start,
-                                 locks[i].size);
+       if (cb->fn) {
+               for ( i=0; i<num_locks; i++) {
+                       cb->fn(*key,
+                               locks[i].context.pid,
+                               locks[i].lock_type,
+                               locks[i].lock_flav,
+                               locks[i].start,
+                               locks[i].size,
+                               cb->private_data);
+               }
        }
 
        SAFE_FREE(locks);
@@ -1446,12 +1742,21 @@ static int traverse_fn(TDB_CONTEXT *ttdb, TDB_DATA kbuf, TDB_DATA dbuf, void *st
  Call the specified function on each lock in the database.
 ********************************************************************/
 
-int brl_forall(BRLOCK_FN(fn))
+int brl_forall(void (*fn)(struct file_id id, struct server_id pid,
+                         enum brl_type lock_type,
+                         enum brl_flavour lock_flav,
+                         br_off start, br_off size,
+                         void *private_data),
+              void *private_data)
 {
-       if (!tdb) {
+       struct brl_forall_cb cb;
+
+       if (!brlock_db) {
                return 0;
        }
-       return tdb_traverse(tdb, traverse_fn, (void *)fn);
+       cb.fn = fn;
+       cb.private_data = private_data;
+       return brlock_db->traverse(brlock_db, traverse_fn, &cb);
 }
 
 /*******************************************************************
@@ -1460,14 +1765,11 @@ int brl_forall(BRLOCK_FN(fn))
  Unlock the record.
 ********************************************************************/
 
-static int byte_range_lock_destructor(void *p)
+static void byte_range_lock_flush(struct byte_range_lock *br_lck)
 {
-       struct byte_range_lock *br_lck =
-               talloc_get_type_abort(p, struct byte_range_lock);
-       TDB_DATA key;
-
-       key.dptr = (char *)&br_lck->key;
-       key.dsize = sizeof(struct lock_key);
+       if (br_lck->read_only) {
+               SMB_ASSERT(!br_lck->modified);
+       }
 
        if (!br_lck->modified) {
                goto done;
@@ -1475,22 +1777,38 @@ static int byte_range_lock_destructor(void *p)
 
        if (br_lck->num_locks == 0) {
                /* No locks - delete this entry. */
-               if (tdb_delete(tdb, key) == -1) {
-                       smb_panic("Could not delete byte range lock entry\n");
+               NTSTATUS status = br_lck->record->delete_rec(br_lck->record);
+               if (!NT_STATUS_IS_OK(status)) {
+                       DEBUG(0, ("delete_rec returned %s\n",
+                                 nt_errstr(status)));
+                       smb_panic("Could not delete byte range lock entry");
                }
        } else {
                TDB_DATA data;
-               data.dptr = (char *)br_lck->lock_data;
+               NTSTATUS status;
+
+               data.dptr = (uint8 *)br_lck->lock_data;
                data.dsize = br_lck->num_locks * sizeof(struct lock_struct);
 
-               if (tdb_store(tdb, key, data, TDB_REPLACE) == -1) {
-                       smb_panic("Could not store byte range mode entry\n");
+               status = br_lck->record->store(br_lck->record, data,
+                                              TDB_REPLACE);
+               if (!NT_STATUS_IS_OK(status)) {
+                       DEBUG(0, ("store returned %s\n", nt_errstr(status)));
+                       smb_panic("Could not store byte range mode entry");
                }
        }
 
  done:
 
-       tdb_chainunlock(tdb, key);
+       br_lck->read_only = true;
+       br_lck->modified = false;
+
+       TALLOC_FREE(br_lck->record);
+}
+
+static int byte_range_lock_destructor(struct byte_range_lock *br_lck)
+{
+       byte_range_lock_flush(br_lck);
        SAFE_FREE(br_lck->lock_data);
        return 0;
 }
@@ -1501,12 +1819,12 @@ static int byte_range_lock_destructor(void *p)
  TALLOC_FREE(brl) will release the lock in the destructor.
 ********************************************************************/
 
-struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx,
-                                       files_struct *fsp)
+static struct byte_range_lock *brl_get_locks_internal(TALLOC_CTX *mem_ctx,
+                                       files_struct *fsp, bool read_only)
 {
-       TDB_DATA key;
-       TDB_DATA data;
-       struct byte_range_lock *br_lck = TALLOC_P(mem_ctx, struct byte_range_lock);
+       TDB_DATA key, data;
+       struct byte_range_lock *br_lck = talloc(mem_ctx, struct byte_range_lock);
+       bool do_read_only = read_only;
 
        if (br_lck == NULL) {
                return NULL;
@@ -1515,46 +1833,73 @@ struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx,
        br_lck->fsp = fsp;
        br_lck->num_locks = 0;
        br_lck->modified = False;
-       memset(&br_lck->key, '\0', sizeof(struct lock_key));
-       br_lck->key.device = fsp->dev;
-       br_lck->key.inode = fsp->inode;
+       br_lck->key = fsp->file_id;
 
-       key.dptr = (char *)&br_lck->key;
-       key.dsize = sizeof(struct lock_key);
+       key.dptr = (uint8 *)&br_lck->key;
+       key.dsize = sizeof(struct file_id);
 
-       if (tdb_chainlock(tdb, key) != 0) {
-               DEBUG(3, ("Could not lock byte range lock entry\n"));
-               TALLOC_FREE(br_lck);
-               return NULL;
+       if (!fsp->lockdb_clean) {
+               /* We must be read/write to clean
+                  the dead entries. */
+               do_read_only = false;
+       }
+
+       if (do_read_only) {
+               if (brlock_db->fetch(brlock_db, br_lck, key, &data) == -1) {
+                       DEBUG(3, ("Could not fetch byte range lock record\n"));
+                       TALLOC_FREE(br_lck);
+                       return NULL;
+               }
+               br_lck->record = NULL;
+       } else {
+               br_lck->record = brlock_db->fetch_locked(brlock_db, br_lck, key);
+
+               if (br_lck->record == NULL) {
+                       DEBUG(3, ("Could not lock byte range lock entry\n"));
+                       TALLOC_FREE(br_lck);
+                       return NULL;
+               }
+
+               data = br_lck->record->value;
        }
 
+       br_lck->read_only = do_read_only;
+       br_lck->lock_data = NULL;
+
        talloc_set_destructor(br_lck, byte_range_lock_destructor);
 
-       data = tdb_fetch(tdb, key);
-       br_lck->lock_data = (void *)data.dptr;
        br_lck->num_locks = data.dsize / sizeof(struct lock_struct);
 
+       if (br_lck->num_locks != 0) {
+               br_lck->lock_data = SMB_MALLOC_ARRAY(struct lock_struct,
+                                                    br_lck->num_locks);
+               if (br_lck->lock_data == NULL) {
+                       DEBUG(0, ("malloc failed\n"));
+                       TALLOC_FREE(br_lck);
+                       return NULL;
+               }
+
+               memcpy(br_lck->lock_data, data.dptr, data.dsize);
+       }
+
        if (!fsp->lockdb_clean) {
+               int orig_num_locks = br_lck->num_locks;
 
                /* This is the first time we've accessed this. */
                /* Go through and ensure all entries exist - remove any that don't. */
                /* Makes the lockdb self cleaning at low cost. */
 
-               struct lock_struct *locks =
-                       (struct lock_struct *)br_lck->lock_data;
-
-               if (!validate_lock_entries(&br_lck->num_locks, &locks)) {
+               if (!validate_lock_entries(&br_lck->num_locks,
+                                          &br_lck->lock_data)) {
                        SAFE_FREE(br_lck->lock_data);
                        TALLOC_FREE(br_lck);
                        return NULL;
                }
 
-               /*
-                * validate_lock_entries might have changed locks. We can't
-                * use a direct pointer here because otherwise gcc warnes
-                * about strict aliasing rules being violated.
-                */
-               br_lck->lock_data = locks;
+               /* Ensure invalid locks are cleaned up in the destructor. */
+               if (orig_num_locks != br_lck->num_locks) {
+                       br_lck->modified = True;
+               }
 
                /* Mark the lockdb as "clean" as seen from this open file. */
                fsp->lockdb_clean = True;
@@ -1562,13 +1907,159 @@ struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx,
 
        if (DEBUGLEVEL >= 10) {
                unsigned int i;
-               struct lock_struct *locks = (struct lock_struct *)br_lck->lock_data;
-               DEBUG(10,("brl_get_locks: %u current locks on dev=%.0f, inode=%.0f\n",
+               struct lock_struct *locks = br_lck->lock_data;
+               DEBUG(10,("brl_get_locks_internal: %u current locks on file_id %s\n",
                        br_lck->num_locks,
-                       (double)fsp->dev, (double)fsp->inode ));
+                         file_id_string_tos(&fsp->file_id)));
                for( i = 0; i < br_lck->num_locks; i++) {
                        print_lock_struct(i, &locks[i]);
                }
        }
+
+       if (do_read_only != read_only) {
+               /*
+                * this stores the record and gets rid of
+                * the write lock that is needed for a cleanup
+                */
+               byte_range_lock_flush(br_lck);
+       }
+
        return br_lck;
 }
+
+struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx,
+                                       files_struct *fsp)
+{
+       return brl_get_locks_internal(mem_ctx, fsp, False);
+}
+
+struct byte_range_lock *brl_get_locks_readonly(files_struct *fsp)
+{
+       struct byte_range_lock *br_lock;
+
+       if (lp_clustering()) {
+               return brl_get_locks_internal(talloc_tos(), fsp, true);
+       }
+
+       if ((fsp->brlock_rec != NULL)
+           && (brlock_db->get_seqnum(brlock_db) == fsp->brlock_seqnum)) {
+               return fsp->brlock_rec;
+       }
+
+       TALLOC_FREE(fsp->brlock_rec);
+
+       br_lock = brl_get_locks_internal(talloc_tos(), fsp, true);
+       if (br_lock == NULL) {
+               return NULL;
+       }
+       fsp->brlock_seqnum = brlock_db->get_seqnum(brlock_db);
+
+       fsp->brlock_rec = talloc_move(fsp, &br_lock);
+
+       return fsp->brlock_rec;
+}
+
+struct brl_revalidate_state {
+       ssize_t array_size;
+       uint32 num_pids;
+       struct server_id *pids;
+};
+
+/*
+ * Collect PIDs of all processes with pending entries
+ */
+
+static void brl_revalidate_collect(struct file_id id, struct server_id pid,
+                                  enum brl_type lock_type,
+                                  enum brl_flavour lock_flav,
+                                  br_off start, br_off size,
+                                  void *private_data)
+{
+       struct brl_revalidate_state *state =
+               (struct brl_revalidate_state *)private_data;
+
+       if (!IS_PENDING_LOCK(lock_type)) {
+               return;
+       }
+
+       add_to_large_array(state, sizeof(pid), (void *)&pid,
+                          &state->pids, &state->num_pids,
+                          &state->array_size);
+}
+
+/*
+ * qsort callback to sort the processes
+ */
+
+static int compare_procids(const void *p1, const void *p2)
+{
+       const struct server_id *i1 = (const struct server_id *)p1;
+       const struct server_id *i2 = (const struct server_id *)p2;
+
+       if (i1->pid < i2->pid) return -1;
+       if (i2->pid > i2->pid) return 1;
+       return 0;
+}
+
+/*
+ * Send a MSG_SMB_UNLOCK message to all processes with pending byte range
+ * locks so that they retry. Mainly used in the cluster code after a node has
+ * died.
+ *
+ * Done in two steps to avoid double-sends: First we collect all entries in an
+ * array, then qsort that array and only send to non-dupes.
+ */
+
+static void brl_revalidate(struct messaging_context *msg_ctx,
+                          void *private_data,
+                          uint32_t msg_type,
+                          struct server_id server_id,
+                          DATA_BLOB *data)
+{
+       struct brl_revalidate_state *state;
+       uint32 i;
+       struct server_id last_pid;
+
+       if (!(state = TALLOC_ZERO_P(NULL, struct brl_revalidate_state))) {
+               DEBUG(0, ("talloc failed\n"));
+               return;
+       }
+
+       brl_forall(brl_revalidate_collect, state);
+
+       if (state->array_size == -1) {
+               DEBUG(0, ("talloc failed\n"));
+               goto done;
+       }
+
+       if (state->num_pids == 0) {
+               goto done;
+       }
+
+       TYPESAFE_QSORT(state->pids, state->num_pids, compare_procids);
+
+       ZERO_STRUCT(last_pid);
+
+       for (i=0; i<state->num_pids; i++) {
+               if (procid_equal(&last_pid, &state->pids[i])) {
+                       /*
+                        * We've seen that one already
+                        */
+                       continue;
+               }
+
+               messaging_send(msg_ctx, state->pids[i], MSG_SMB_UNLOCK,
+                              &data_blob_null);
+               last_pid = state->pids[i];
+       }
+
+ done:
+       TALLOC_FREE(state);
+       return;
+}
+
+void brl_register_msgs(struct messaging_context *msg_ctx)
+{
+       messaging_register(msg_ctx, NULL, MSG_SMB_BRL_VALIDATE,
+                          brl_revalidate);
+}