lib: Add lib/util/server_id.h
[kai/samba-autobuild/.git] / source3 / locking / brlock.c
index 2cb2652791a13ba087014266504d8823f5a6883f..33efe37810ba5524e950d4ca01383b5ba3abd7b5 100644 (file)
@@ -1,4 +1,4 @@
-/* 
+/*
    Unix SMB/CIFS implementation.
    byte range locking code
    Updated to handle range splits/merges.
 
 #include "includes.h"
 #include "system/filesys.h"
+#include "lib/util/server_id.h"
 #include "locking/proto.h"
 #include "smbd/globals.h"
 #include "dbwrap/dbwrap.h"
 #include "dbwrap/dbwrap_open.h"
 #include "serverid.h"
 #include "messages.h"
+#include "util_tdb.h"
 
 #undef DBGC_CLASS
 #define DBGC_CLASS DBGC_LOCKING
 
 static struct db_context *brlock_db;
 
+struct byte_range_lock {
+       struct files_struct *fsp;
+       unsigned int num_locks;
+       bool modified;
+       uint32_t num_read_oplocks;
+       struct lock_struct *lock_data;
+       struct db_record *record;
+};
+
 /****************************************************************************
  Debug info at level 10 for lock struct.
 ****************************************************************************/
 
-static void print_lock_struct(unsigned int i, struct lock_struct *pls)
+static void print_lock_struct(unsigned int i, const struct lock_struct *pls)
 {
+       struct server_id_buf tmp;
+
        DEBUG(10,("[%u]: smblctx = %llu, tid = %u, pid = %s, ",
                        i,
                        (unsigned long long)pls->context.smblctx,
                        (unsigned int)pls->context.tid,
-                       server_id_str(talloc_tos(), &pls->context.pid) ));
-
-       DEBUG(10,("start = %.0f, size = %.0f, fnum = %d, %s %s\n",
-               (double)pls->start,
-               (double)pls->size,
-               pls->fnum,
-               lock_type_name(pls->lock_type),
-               lock_flav_name(pls->lock_flav) ));
+                       server_id_str_buf(pls->context.pid, &tmp) ));
+
+       DEBUG(10, ("start = %ju, size = %ju, fnum = %ju, %s %s\n",
+                  (uintmax_t)pls->start,
+                  (uintmax_t)pls->size,
+                  (uintmax_t)pls->fnum,
+                  lock_type_name(pls->lock_type),
+                  lock_flav_name(pls->lock_flav)));
+}
+
+unsigned int brl_num_locks(const struct byte_range_lock *brl)
+{
+       return brl->num_locks;
+}
+
+struct files_struct *brl_fsp(struct byte_range_lock *brl)
+{
+       return brl->fsp;
+}
+
+uint32_t brl_num_read_oplocks(const struct byte_range_lock *brl)
+{
+       return brl->num_read_oplocks;
+}
+
+void brl_set_num_read_oplocks(struct byte_range_lock *brl,
+                             uint32_t num_read_oplocks)
+{
+       DEBUG(10, ("Setting num_read_oplocks to %"PRIu32"\n",
+                  num_read_oplocks));
+       SMB_ASSERT(brl->record != NULL); /* otherwise we're readonly */
+       brl->num_read_oplocks = num_read_oplocks;
+       brl->modified = true;
 }
 
 /****************************************************************************
  See if two locking contexts are equal.
 ****************************************************************************/
 
-bool brl_same_context(const struct lock_context *ctx1, 
+static bool brl_same_context(const struct lock_context *ctx1,
                             const struct lock_context *ctx2)
 {
-       return (procid_equal(&ctx1->pid, &ctx2->pid) &&
+       return (serverid_equal(&ctx1->pid, &ctx2->pid) &&
                (ctx1->smblctx == ctx2->smblctx) &&
                (ctx1->tid == ctx2->tid));
 }
@@ -82,7 +120,7 @@ static bool brl_overlap(const struct lock_struct *lck1,
                         const struct lock_struct *lck2)
 {
        /* XXX Remove for Win7 compatibility. */
-       /* this extra check is not redundent - it copes with locks
+       /* this extra check is not redundant - it copes with locks
           that go beyond the end of 64 bit file space */
        if (lck1->size != 0 &&
            lck1->start == lck2->start &&
@@ -101,7 +139,7 @@ static bool brl_overlap(const struct lock_struct *lck1,
  See if lock2 can be added when lock1 is in place.
 ****************************************************************************/
 
-static bool brl_conflict(const struct lock_struct *lck1, 
+static bool brl_conflict(const struct lock_struct *lck1,
                         const struct lock_struct *lck2)
 {
        /* Ignore PENDING locks. */
@@ -122,7 +160,7 @@ static bool brl_conflict(const struct lock_struct *lck1,
        }
 
        return brl_overlap(lck1, lck2);
-} 
+}
 
 /****************************************************************************
  See if lock2 can be added when lock1 is in place - when both locks are POSIX
@@ -130,7 +168,7 @@ static bool brl_conflict(const struct lock_struct *lck1,
  know already match.
 ****************************************************************************/
 
-static bool brl_conflict_posix(const struct lock_struct *lck1, 
+static bool brl_conflict_posix(const struct lock_struct *lck1,
                                const struct lock_struct *lck2)
 {
 #if defined(DEVELOPER)
@@ -147,7 +185,7 @@ static bool brl_conflict_posix(const struct lock_struct *lck1,
                return False;
        }
 
-       /* Locks on the same context con't conflict. Ignore fnum. */
+       /* Locks on the same context don't conflict. Ignore fnum. */
        if (brl_same_context(&lck1->context, &lck2->context)) {
                return False;
        }
@@ -155,10 +193,10 @@ static bool brl_conflict_posix(const struct lock_struct *lck1,
        /* One is read, the other write, or the context is different,
           do they overlap ? */
        return brl_overlap(lck1, lck2);
-} 
+}
 
 #if ZERO_ZERO
-static bool brl_conflict1(const struct lock_struct *lck1, 
+static bool brl_conflict1(const struct lock_struct *lck1,
                         const struct lock_struct *lck2)
 {
        if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
@@ -183,7 +221,7 @@ static bool brl_conflict1(const struct lock_struct *lck1,
        }
 
        return True;
-} 
+}
 #endif
 
 /****************************************************************************
@@ -192,33 +230,66 @@ static bool brl_conflict1(const struct lock_struct *lck1,
  This is never used in the POSIX lock case.
 ****************************************************************************/
 
-static bool brl_conflict_other(const struct lock_struct *lck1, const struct lock_struct *lck2)
+static bool brl_conflict_other(const struct lock_struct *lock,
+                              const struct lock_struct *rw_probe)
 {
-       if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
+       if (IS_PENDING_LOCK(lock->lock_type) ||
+           IS_PENDING_LOCK(rw_probe->lock_type)) {
                return False;
+       }
 
-       if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) 
+       if (lock->lock_type == READ_LOCK && rw_probe->lock_type == READ_LOCK) {
                return False;
+       }
 
-       /* POSIX flavour locks never conflict here - this is only called
-          in the read/write path. */
-
-       if (lck1->lock_flav == POSIX_LOCK && lck2->lock_flav == POSIX_LOCK)
+       if (lock->lock_flav == POSIX_LOCK &&
+           rw_probe->lock_flav == POSIX_LOCK) {
+               /*
+                * POSIX flavour locks never conflict here - this is only called
+                * in the read/write path.
+                */
                return False;
+       }
 
-       /*
-        * Incoming WRITE locks conflict with existing READ locks even
-        * if the context is the same. JRA. See LOCKTEST7 in smbtorture.
-        */
+       if (!brl_overlap(lock, rw_probe)) {
+               /*
+                * I/O can only conflict when overlapping a lock, thus let it
+                * pass
+                */
+               return false;
+       }
 
-       if (!(lck2->lock_type == WRITE_LOCK && lck1->lock_type == READ_LOCK)) {
-               if (brl_same_context(&lck1->context, &lck2->context) &&
-                                       lck1->fnum == lck2->fnum)
-                       return False;
+       if (!brl_same_context(&lock->context, &rw_probe->context)) {
+               /*
+                * Different process, conflict
+                */
+               return true;
        }
 
-       return brl_overlap(lck1, lck2);
-} 
+       if (lock->fnum != rw_probe->fnum) {
+               /*
+                * Different file handle, conflict
+                */
+               return true;
+       }
+
+       if ((lock->lock_type == READ_LOCK) &&
+           (rw_probe->lock_type == WRITE_LOCK)) {
+               /*
+                * Incoming WRITE locks conflict with existing READ locks even
+                * if the context is the same. JRA. See LOCKTEST7 in
+                * smbtorture.
+                */
+               return true;
+       }
+
+       /*
+        * I/O request compatible with existing lock, let it pass without
+        * conflict
+        */
+
+       return false;
+}
 
 /****************************************************************************
  Check if an unlock overlaps a pending lock.
@@ -228,7 +299,7 @@ static bool brl_pending_overlap(const struct lock_struct *lock, const struct loc
 {
        if ((lock->start <= pend_lock->start) && (lock->start + lock->size > pend_lock->start))
                return True;
-       if ((lock->start >= pend_lock->start) && (lock->start <= pend_lock->start + pend_lock->size))
+       if ((lock->start >= pend_lock->start) && (lock->start < pend_lock->start + pend_lock->size))
                return True;
        return False;
 }
@@ -239,7 +310,9 @@ static bool brl_pending_overlap(const struct lock_struct *lock, const struct loc
  app depends on this ?
 ****************************************************************************/
 
-NTSTATUS brl_lock_failed(files_struct *fsp, const struct lock_struct *lock, bool blocking_lock)
+static NTSTATUS brl_lock_failed(files_struct *fsp,
+                               const struct lock_struct *lock,
+                               bool blocking_lock)
 {
        if (lock->start >= 0xEF000000 && (lock->start >> 63) == 0) {
                /* amazing the little things you learn with a test
@@ -252,7 +325,7 @@ NTSTATUS brl_lock_failed(files_struct *fsp, const struct lock_struct *lock, bool
                return NT_STATUS_FILE_LOCK_CONFLICT;
        }
 
-       if (procid_equal(&lock->context.pid, &fsp->last_lock_failure.context.pid) &&
+       if (serverid_equal(&lock->context.pid, &fsp->last_lock_failure.context.pid) &&
                        lock->context.tid == fsp->last_lock_failure.context.tid &&
                        lock->fnum == fsp->last_lock_failure.fnum &&
                        lock->start == fsp->last_lock_failure.start) {
@@ -272,6 +345,7 @@ NTSTATUS brl_lock_failed(files_struct *fsp, const struct lock_struct *lock, bool
 void brl_init(bool read_only)
 {
        int tdb_flags;
+       char *db_path;
 
        if (brlock_db) {
                return;
@@ -288,14 +362,23 @@ void brl_init(bool read_only)
                tdb_flags |= TDB_SEQNUM;
        }
 
-       brlock_db = db_open(NULL, lock_path("brlock.tdb"),
-                           lp_open_files_db_hash_size(), tdb_flags,
-                           read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644 );
+       db_path = lock_path("brlock.tdb");
+       if (db_path == NULL) {
+               DEBUG(0, ("out of memory!\n"));
+               return;
+       }
+
+       brlock_db = db_open(NULL, db_path,
+                           SMB_OPEN_DATABASE_TDB_HASH_SIZE, tdb_flags,
+                           read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644,
+                           DBWRAP_LOCK_ORDER_2, DBWRAP_FLAG_NONE);
        if (!brlock_db) {
                DEBUG(0,("Failed to open byte range locking database %s\n",
-                       lock_path("brlock.tdb")));
+                        db_path));
+               TALLOC_FREE(db_path);
                return;
        }
+       TALLOC_FREE(db_path);
 }
 
 /****************************************************************************
@@ -312,7 +395,7 @@ void brl_shutdown(void)
  Compare two locks for sorting.
 ****************************************************************************/
 
-static int lock_compare(const struct lock_struct *lck1, 
+static int lock_compare(const struct lock_struct *lck1,
                         const struct lock_struct *lck2)
 {
        if (lck1->start != lck2->start) {
@@ -347,12 +430,17 @@ NTSTATUS brl_lock_windows_default(struct byte_range_lock *br_lck,
        for (i=0; i < br_lck->num_locks; i++) {
                /* Do any Windows or POSIX locks conflict ? */
                if (brl_conflict(&locks[i], plock)) {
+                       if (!serverid_exists(&locks[i].context.pid)) {
+                               locks[i].context.pid.pid = 0;
+                               br_lck->modified = true;
+                               continue;
+                       }
                        /* Remember who blocked us. */
                        plock->context.smblctx = locks[i].context.smblctx;
                        return brl_lock_failed(fsp,plock,blocking_lock);
                }
 #if ZERO_ZERO
-               if (plock->start == 0 && plock->size == 0 && 
+               if (plock->start == 0 && plock->size == 0 &&
                                locks[i].size == 0) {
                        break;
                }
@@ -392,7 +480,8 @@ NTSTATUS brl_lock_windows_default(struct byte_range_lock *br_lck,
        }
 
        /* no conflicts - add it to the list of locks */
-       locks = (struct lock_struct *)SMB_REALLOC(locks, (br_lck->num_locks + 1) * sizeof(*locks));
+       locks = talloc_realloc(br_lck, locks, struct lock_struct,
+                              (br_lck->num_locks + 1));
        if (!locks) {
                status = NT_STATUS_NO_MEMORY;
                goto fail;
@@ -730,7 +819,7 @@ static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
           existing POSIX lock range into two, and add our lock,
           so we need at most 2 more entries. */
 
-       tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 2));
+       tp = talloc_array(br_lck, struct lock_struct, br_lck->num_locks + 2);
        if (!tp) {
                return NT_STATUS_NO_MEMORY;
        }
@@ -750,8 +839,13 @@ static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
                if (curr_lock->lock_flav == WINDOWS_LOCK) {
                        /* Do any Windows flavour locks conflict ? */
                        if (brl_conflict(curr_lock, plock)) {
+                               if (!serverid_exists(&curr_lock->context.pid)) {
+                                       curr_lock->context.pid.pid = 0;
+                                       br_lck->modified = true;
+                                       continue;
+                               }
                                /* No games with error messages. */
-                               SAFE_FREE(tp);
+                               TALLOC_FREE(tp);
                                /* Remember who blocked us. */
                                plock->context.smblctx = curr_lock->context.smblctx;
                                return NT_STATUS_FILE_LOCK_CONFLICT;
@@ -764,9 +858,14 @@ static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
 
                        /* POSIX conflict semantics are different. */
                        if (brl_conflict_posix(curr_lock, plock)) {
+                               if (!serverid_exists(&curr_lock->context.pid)) {
+                                       curr_lock->context.pid.pid = 0;
+                                       br_lck->modified = true;
+                                       continue;
+                               }
                                /* Can't block ourselves with POSIX locks. */
                                /* No games with error messages. */
-                               SAFE_FREE(tp);
+                               TALLOC_FREE(tp);
                                /* Remember who blocked us. */
                                plock->context.smblctx = curr_lock->context.smblctx;
                                return NT_STATUS_FILE_LOCK_CONFLICT;
@@ -825,17 +924,18 @@ static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
                                plock->start,
                                plock->size,
                                plock->lock_type,
+                               &plock->context,
                                &errno_ret)) {
 
                        /* We don't know who blocked us. */
                        plock->context.smblctx = 0xFFFFFFFFFFFFFFFFLL;
 
                        if (errno_ret == EACCES || errno_ret == EAGAIN) {
-                               SAFE_FREE(tp);
+                               TALLOC_FREE(tp);
                                status = NT_STATUS_FILE_LOCK_CONFLICT;
                                goto fail;
                        } else {
-                               SAFE_FREE(tp);
+                               TALLOC_FREE(tp);
                                status = map_nt_error_from_unix(errno);
                                goto fail;
                        }
@@ -845,7 +945,7 @@ static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
        /* If we didn't use all the allocated size,
         * Realloc so we don't leak entries per lock call. */
        if (count < br_lck->num_locks + 2) {
-               tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
+               tp = talloc_realloc(br_lck, tp, struct lock_struct, count);
                if (!tp) {
                        status = NT_STATUS_NO_MEMORY;
                        goto fail;
@@ -853,7 +953,7 @@ static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
        }
 
        br_lck->num_locks = count;
-       SAFE_FREE(br_lck->lock_data);
+       TALLOC_FREE(br_lck->lock_data);
        br_lck->lock_data = tp;
        locks = tp;
        br_lck->modified = True;
@@ -873,8 +973,12 @@ static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
 
                        if (pend_lock->lock_type == PENDING_READ_LOCK &&
                                        brl_pending_overlap(plock, pend_lock)) {
-                               DEBUG(10,("brl_lock_posix: sending unlock message to pid %s\n",
-                                       procid_str_static(&pend_lock->context.pid )));
+                               struct server_id_buf tmp;
+
+                               DEBUG(10, ("brl_lock_posix: sending unlock "
+                                          "message to pid %s\n",
+                                          server_id_str_buf(pend_lock->context.pid,
+                                                            &tmp)));
 
                                messaging_send(msg_ctx, pend_lock->context.pid,
                                               MSG_SMB_UNLOCK, &data_blob_null);
@@ -894,12 +998,11 @@ static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
 NTSTATUS smb_vfs_call_brl_lock_windows(struct vfs_handle_struct *handle,
                                       struct byte_range_lock *br_lck,
                                       struct lock_struct *plock,
-                                      bool blocking_lock,
-                                      struct blocking_lock_record *blr)
+                                      bool blocking_lock)
 {
        VFS_FIND(brl_lock_windows);
-       return handle->fns->brl_lock_windows(handle, br_lck, plock,
-                                            blocking_lock, blr);
+       return handle->fns->brl_lock_windows_fn(handle, br_lck, plock,
+                                               blocking_lock);
 }
 
 /****************************************************************************
@@ -911,39 +1014,37 @@ NTSTATUS brl_lock(struct messaging_context *msg_ctx,
                uint64_t smblctx,
                struct server_id pid,
                br_off start,
-               br_off size, 
+               br_off size,
                enum brl_type lock_type,
                enum brl_flavour lock_flav,
                bool blocking_lock,
-               uint64_t *psmblctx,
-               struct blocking_lock_record *blr)
+               uint64_t *psmblctx)
 {
        NTSTATUS ret;
        struct lock_struct lock;
 
+       ZERO_STRUCT(lock);
+
 #if !ZERO_ZERO
        if (start == 0 && size == 0) {
                DEBUG(0,("client sent 0/0 lock - please report this\n"));
        }
 #endif
 
-#ifdef DEVELOPER
-       /* Quieten valgrind on test. */
-       memset(&lock, '\0', sizeof(lock));
-#endif
-
-       lock.context.smblctx = smblctx;
-       lock.context.pid = pid;
-       lock.context.tid = br_lck->fsp->conn->cnum;
-       lock.start = start;
-       lock.size = size;
-       lock.fnum = br_lck->fsp->fnum;
-       lock.lock_type = lock_type;
-       lock.lock_flav = lock_flav;
+       lock = (struct lock_struct) {
+               .context.smblctx = smblctx,
+               .context.pid = pid,
+               .context.tid = br_lck->fsp->conn->cnum,
+               .start = start,
+               .size = size,
+               .fnum = br_lck->fsp->fnum,
+               .lock_type = lock_type,
+               .lock_flav = lock_flav
+       };
 
        if (lock_flav == WINDOWS_LOCK) {
                ret = SMB_VFS_BRL_LOCK_WINDOWS(br_lck->fsp->conn, br_lck,
-                   &lock, blocking_lock, blr);
+                                              &lock, blocking_lock);
        } else {
                ret = brl_lock_posix(msg_ctx, br_lck, &lock);
        }
@@ -960,6 +1061,17 @@ NTSTATUS brl_lock(struct messaging_context *msg_ctx,
        return ret;
 }
 
+static void brl_delete_lock_struct(struct lock_struct *locks,
+                                  unsigned num_locks,
+                                  unsigned del_idx)
+{
+       if (del_idx >= num_locks) {
+               return;
+       }
+       memmove(&locks[del_idx], &locks[del_idx+1],
+               sizeof(*locks) * (num_locks - del_idx - 1));
+}
+
 /****************************************************************************
  Unlock a range of bytes - Windows semantics.
 ****************************************************************************/
@@ -1027,12 +1139,7 @@ bool brl_unlock_windows_default(struct messaging_context *msg_ctx,
   unlock_continue:
 #endif
 
-       /* Actually delete the lock. */
-       if (i < br_lck->num_locks - 1) {
-               memmove(&locks[i], &locks[i+1], 
-                       sizeof(*locks)*((br_lck->num_locks-1) - i));
-       }
-
+       brl_delete_lock_struct(locks, br_lck->num_locks, i);
        br_lck->num_locks -= 1;
        br_lck->modified = True;
 
@@ -1058,8 +1165,12 @@ bool brl_unlock_windows_default(struct messaging_context *msg_ctx,
 
                /* We could send specific lock info here... */
                if (brl_pending_overlap(plock, pend_lock)) {
-                       DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
-                               procid_str_static(&pend_lock->context.pid )));
+                       struct server_id_buf tmp;
+
+                       DEBUG(10, ("brl_unlock: sending unlock message to "
+                                  "pid %s\n",
+                                  server_id_str_buf(pend_lock->context.pid,
+                                                    &tmp)));
 
                        messaging_send(msg_ctx, pend_lock->context.pid,
                                       MSG_SMB_UNLOCK, &data_blob_null);
@@ -1099,7 +1210,7 @@ static bool brl_unlock_posix(struct messaging_context *msg_ctx,
           existing POSIX lock range into two, so we need at most
           1 more entry. */
 
-       tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 1));
+       tp = talloc_array(br_lck, struct lock_struct, br_lck->num_locks + 1);
        if (!tp) {
                DEBUG(10,("brl_unlock_posix: malloc fail\n"));
                return False;
@@ -1121,7 +1232,7 @@ static bool brl_unlock_posix(struct messaging_context *msg_ctx,
                if (lock->lock_flav == WINDOWS_LOCK) {
                        /* Do any Windows flavour locks conflict ? */
                        if (brl_conflict(lock, plock)) {
-                               SAFE_FREE(tp);
+                               TALLOC_FREE(tp);
                                return false;
                        }
                        /* Just copy the Windows lock into the new array. */
@@ -1166,7 +1277,7 @@ static bool brl_unlock_posix(struct messaging_context *msg_ctx,
 
        if (!overlap_found) {
                /* Just ignore - no change. */
-               SAFE_FREE(tp);
+               TALLOC_FREE(tp);
                DEBUG(10,("brl_unlock_posix: No overlap - unlocked.\n"));
                return True;
        }
@@ -1183,14 +1294,14 @@ static bool brl_unlock_posix(struct messaging_context *msg_ctx,
 
        /* Realloc so we don't leak entries per unlock call. */
        if (count) {
-               tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
+               tp = talloc_realloc(br_lck, tp, struct lock_struct, count);
                if (!tp) {
                        DEBUG(10,("brl_unlock_posix: realloc fail\n"));
                        return False;
                }
        } else {
                /* We deleted the last lock. */
-               SAFE_FREE(tp);
+               TALLOC_FREE(tp);
                tp = NULL;
        }
 
@@ -1198,7 +1309,7 @@ static bool brl_unlock_posix(struct messaging_context *msg_ctx,
                                   LEVEL2_CONTEND_POSIX_BRL);
 
        br_lck->num_locks = count;
-       SAFE_FREE(br_lck->lock_data);
+       TALLOC_FREE(br_lck->lock_data);
        locks = tp;
        br_lck->lock_data = tp;
        br_lck->modified = True;
@@ -1215,8 +1326,12 @@ static bool brl_unlock_posix(struct messaging_context *msg_ctx,
 
                /* We could send specific lock info here... */
                if (brl_pending_overlap(plock, pend_lock)) {
-                       DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
-                               procid_str_static(&pend_lock->context.pid )));
+                       struct server_id_buf tmp;
+
+                       DEBUG(10, ("brl_unlock: sending unlock message to "
+                                  "pid %s\n",
+                                  server_id_str_buf(pend_lock->context.pid,
+                                                    &tmp)));
 
                        messaging_send(msg_ctx, pend_lock->context.pid,
                                       MSG_SMB_UNLOCK, &data_blob_null);
@@ -1232,7 +1347,8 @@ bool smb_vfs_call_brl_unlock_windows(struct vfs_handle_struct *handle,
                                     const struct lock_struct *plock)
 {
        VFS_FIND(brl_unlock_windows);
-       return handle->fns->brl_unlock_windows(handle, msg_ctx, br_lck, plock);
+       return handle->fns->brl_unlock_windows_fn(handle, msg_ctx, br_lck,
+                                                 plock);
 }
 
 /****************************************************************************
@@ -1272,34 +1388,30 @@ bool brl_unlock(struct messaging_context *msg_ctx,
 ****************************************************************************/
 
 bool brl_locktest(struct byte_range_lock *br_lck,
-               uint64_t smblctx,
-               struct server_id pid,
-               br_off start,
-               br_off size, 
-               enum brl_type lock_type,
-               enum brl_flavour lock_flav)
+                 const struct lock_struct *rw_probe)
 {
        bool ret = True;
        unsigned int i;
-       struct lock_struct lock;
-       const struct lock_struct *locks = br_lck->lock_data;
+       struct lock_struct *locks = br_lck->lock_data;
        files_struct *fsp = br_lck->fsp;
 
-       lock.context.smblctx = smblctx;
-       lock.context.pid = pid;
-       lock.context.tid = br_lck->fsp->conn->cnum;
-       lock.start = start;
-       lock.size = size;
-       lock.fnum = fsp->fnum;
-       lock.lock_type = lock_type;
-       lock.lock_flav = lock_flav;
-
        /* Make sure existing locks don't conflict */
        for (i=0; i < br_lck->num_locks; i++) {
                /*
                 * Our own locks don't conflict.
                 */
-               if (brl_conflict_other(&locks[i], &lock)) {
+               if (brl_conflict_other(&locks[i], rw_probe)) {
+                       if (br_lck->record == NULL) {
+                               /* readonly */
+                               return false;
+                       }
+
+                       if (!serverid_exists(&locks[i].context.pid)) {
+                               locks[i].context.pid.pid = 0;
+                               br_lck->modified = true;
+                               continue;
+                       }
+
                        return False;
                }
        }
@@ -1310,12 +1422,22 @@ bool brl_locktest(struct byte_range_lock *br_lck,
         * This only conflicts with Windows locks, not POSIX locks.
         */
 
-       if(lp_posix_locking(fsp->conn->params) && (lock_flav == WINDOWS_LOCK)) {
+       if(lp_posix_locking(fsp->conn->params) &&
+          (rw_probe->lock_flav == WINDOWS_LOCK)) {
+               /*
+                * Make copies -- is_posix_locked might modify the values
+                */
+
+               br_off start = rw_probe->start;
+               br_off size = rw_probe->size;
+               enum brl_type lock_type = rw_probe->lock_type;
+
                ret = is_posix_locked(fsp, &start, &size, &lock_type, WINDOWS_LOCK);
 
-               DEBUG(10,("brl_locktest: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
-                       (double)start, (double)size, ret ? "locked" : "unlocked",
-                       fsp->fnum, fsp_str_dbg(fsp)));
+               DEBUG(10, ("brl_locktest: posix start=%ju len=%ju %s for %s "
+                          "file %s\n", (uintmax_t)start, (uintmax_t)size,
+                          ret ? "locked" : "unlocked",
+                          fsp_fnum_dbg(fsp), fsp_str_dbg(fsp)));
 
                /* We need to return the inverse of is_posix_locked. */
                ret = !ret;
@@ -1333,7 +1455,7 @@ NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
                uint64_t *psmblctx,
                struct server_id pid,
                br_off *pstart,
-               br_off *psize, 
+               br_off *psize,
                enum brl_type *plock_type,
                enum brl_flavour lock_flav)
 {
@@ -1358,7 +1480,7 @@ NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
 
                if (exlock->lock_flav == WINDOWS_LOCK) {
                        conflict = brl_conflict(exlock, &lock);
-               } else {        
+               } else {
                        conflict = brl_conflict_posix(exlock, &lock);
                }
 
@@ -1379,9 +1501,10 @@ NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
        if(lp_posix_locking(fsp->conn->params)) {
                bool ret = is_posix_locked(fsp, pstart, psize, plock_type, POSIX_LOCK);
 
-               DEBUG(10,("brl_lockquery: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
-                       (double)*pstart, (double)*psize, ret ? "locked" : "unlocked",
-                       fsp->fnum, fsp_str_dbg(fsp)));
+               DEBUG(10, ("brl_lockquery: posix start=%ju len=%ju %s for %s "
+                          "file %s\n", (uintmax_t)*pstart,
+                          (uintmax_t)*psize, ret ? "locked" : "unlocked",
+                          fsp_fnum_dbg(fsp), fsp_str_dbg(fsp)));
 
                if (ret) {
                        /* Hmmm. No clue what to set smblctx to - use -1. */
@@ -1396,11 +1519,10 @@ NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
 
 bool smb_vfs_call_brl_cancel_windows(struct vfs_handle_struct *handle,
                                     struct byte_range_lock *br_lck,
-                                    struct lock_struct *plock,
-                                    struct blocking_lock_record *blr)
+                                    struct lock_struct *plock)
 {
        VFS_FIND(brl_cancel_windows);
-       return handle->fns->brl_cancel_windows(handle, br_lck, plock, blr);
+       return handle->fns->brl_cancel_windows_fn(handle, br_lck, plock);
 }
 
 /****************************************************************************
@@ -1411,8 +1533,7 @@ bool brl_lock_cancel(struct byte_range_lock *br_lck,
                struct server_id pid,
                br_off start,
                br_off size,
-               enum brl_flavour lock_flav,
-               struct blocking_lock_record *blr)
+               enum brl_flavour lock_flav)
 {
        bool ret;
        struct lock_struct lock;
@@ -1428,7 +1549,7 @@ bool brl_lock_cancel(struct byte_range_lock *br_lck,
 
        if (lock_flav == WINDOWS_LOCK) {
                ret = SMB_VFS_BRL_CANCEL_WINDOWS(br_lck->fsp->conn, br_lck,
-                   &lock, blr);
+                                                &lock);
        } else {
                ret = brl_lock_cancel_default(br_lck, &lock);
        }
@@ -1463,12 +1584,7 @@ bool brl_lock_cancel_default(struct byte_range_lock *br_lck,
                return False;
        }
 
-       if (i < br_lck->num_locks - 1) {
-               /* Found this particular pending lock - delete it */
-               memmove(&locks[i], &locks[i+1], 
-                       sizeof(*locks)*((br_lck->num_locks-1) - i));
-       }
-
+       brl_delete_lock_struct(locks, br_lck->num_locks, i);
        br_lck->num_locks -= 1;
        br_lck->modified = True;
        return True;
@@ -1484,11 +1600,11 @@ void brl_close_fnum(struct messaging_context *msg_ctx,
                    struct byte_range_lock *br_lck)
 {
        files_struct *fsp = br_lck->fsp;
-       uint16 tid = fsp->conn->cnum;
-       int fnum = fsp->fnum;
+       uint32_t tid = fsp->conn->cnum;
+       uint64_t fnum = fsp->fnum;
        unsigned int i;
        struct lock_struct *locks = br_lck->lock_data;
-       struct server_id pid = sconn_server_id(fsp->conn->sconn);
+       struct server_id pid = messaging_server_id(fsp->conn->sconn->msg_ctx);
        struct lock_struct *locks_copy;
        unsigned int num_locks_copy;
 
@@ -1498,7 +1614,7 @@ void brl_close_fnum(struct messaging_context *msg_ctx,
                if (!locks_copy) {
                        smb_panic("brl_close_fnum: talloc failed");
                        }
-       } else {        
+       } else {
                locks_copy = NULL;
        }
 
@@ -1507,7 +1623,7 @@ void brl_close_fnum(struct messaging_context *msg_ctx,
        for (i=0; i < num_locks_copy; i++) {
                struct lock_struct *lock = &locks_copy[i];
 
-               if (lock->context.tid == tid && procid_equal(&lock->context.pid, &pid) &&
+               if (lock->context.tid == tid && serverid_equal(&lock->context.pid, &pid) &&
                                (lock->fnum == fnum)) {
                        brl_unlock(msg_ctx,
                                br_lck,
@@ -1520,55 +1636,144 @@ void brl_close_fnum(struct messaging_context *msg_ctx,
        }
 }
 
-/****************************************************************************
- Ensure this set of lock entries is valid.
-****************************************************************************/
-static bool validate_lock_entries(unsigned int *pnum_entries, struct lock_struct **pplocks)
+bool brl_mark_disconnected(struct files_struct *fsp)
 {
+       uint32_t tid = fsp->conn->cnum;
+       uint64_t smblctx;
+       uint64_t fnum = fsp->fnum;
        unsigned int i;
-       unsigned int num_valid_entries = 0;
-       struct lock_struct *locks = *pplocks;
-
-       for (i = 0; i < *pnum_entries; i++) {
-               struct lock_struct *lock_data = &locks[i];
-               if (!serverid_exists(&lock_data->context.pid)) {
-                       /* This process no longer exists - mark this
-                          entry as invalid by zeroing it. */
-                       ZERO_STRUCTP(lock_data);
-               } else {
-                       num_valid_entries++;
+       struct server_id self = messaging_server_id(fsp->conn->sconn->msg_ctx);
+       struct byte_range_lock *br_lck = NULL;
+
+       if (fsp->op == NULL) {
+               return false;
+       }
+
+       smblctx = fsp->op->global->open_persistent_id;
+
+       if (!fsp->op->global->durable) {
+               return false;
+       }
+
+       if (fsp->current_lock_count == 0) {
+               return true;
+       }
+
+       br_lck = brl_get_locks(talloc_tos(), fsp);
+       if (br_lck == NULL) {
+               return false;
+       }
+
+       for (i=0; i < br_lck->num_locks; i++) {
+               struct lock_struct *lock = &br_lck->lock_data[i];
+
+               /*
+                * as this is a durable handle, we only expect locks
+                * of the current file handle!
+                */
+
+               if (lock->context.smblctx != smblctx) {
+                       TALLOC_FREE(br_lck);
+                       return false;
                }
+
+               if (lock->context.tid != tid) {
+                       TALLOC_FREE(br_lck);
+                       return false;
+               }
+
+               if (!serverid_equal(&lock->context.pid, &self)) {
+                       TALLOC_FREE(br_lck);
+                       return false;
+               }
+
+               if (lock->fnum != fnum) {
+                       TALLOC_FREE(br_lck);
+                       return false;
+               }
+
+               server_id_set_disconnected(&lock->context.pid);
+               lock->context.tid = TID_FIELD_INVALID;
+               lock->fnum = FNUM_FIELD_INVALID;
        }
 
-       if (num_valid_entries != *pnum_entries) {
-               struct lock_struct *new_lock_data = NULL;
+       br_lck->modified = true;
+       TALLOC_FREE(br_lck);
+       return true;
+}
+
+bool brl_reconnect_disconnected(struct files_struct *fsp)
+{
+       uint32_t tid = fsp->conn->cnum;
+       uint64_t smblctx;
+       uint64_t fnum = fsp->fnum;
+       unsigned int i;
+       struct server_id self = messaging_server_id(fsp->conn->sconn->msg_ctx);
+       struct byte_range_lock *br_lck = NULL;
 
-               if (num_valid_entries) {
-                       new_lock_data = SMB_MALLOC_ARRAY(struct lock_struct, num_valid_entries);
-                       if (!new_lock_data) {
-                               DEBUG(3, ("malloc fail\n"));
-                               return False;
-                       }
+       if (fsp->op == NULL) {
+               return false;
+       }
 
-                       num_valid_entries = 0;
-                       for (i = 0; i < *pnum_entries; i++) {
-                               struct lock_struct *lock_data = &locks[i];
-                               if (lock_data->context.smblctx &&
-                                               lock_data->context.tid) {
-                                       /* Valid (nonzero) entry - copy it. */
-                                       memcpy(&new_lock_data[num_valid_entries],
-                                               lock_data, sizeof(struct lock_struct));
-                                       num_valid_entries++;
-                               }
-                       }
+       smblctx = fsp->op->global->open_persistent_id;
+
+       if (!fsp->op->global->durable) {
+               return false;
+       }
+
+       /*
+        * When reconnecting, we do not want to validate the brlock entries
+        * and thereby remove our own (disconnected) entries but reactivate
+        * them instead.
+        */
+
+       br_lck = brl_get_locks(talloc_tos(), fsp);
+       if (br_lck == NULL) {
+               return false;
+       }
+
+       if (br_lck->num_locks == 0) {
+               TALLOC_FREE(br_lck);
+               return true;
+       }
+
+       for (i=0; i < br_lck->num_locks; i++) {
+               struct lock_struct *lock = &br_lck->lock_data[i];
+
+               /*
+                * as this is a durable handle we only expect locks
+                * of the current file handle!
+                */
+
+               if (lock->context.smblctx != smblctx) {
+                       TALLOC_FREE(br_lck);
+                       return false;
+               }
+
+               if (lock->context.tid != TID_FIELD_INVALID) {
+                       TALLOC_FREE(br_lck);
+                       return false;
                }
 
-               SAFE_FREE(*pplocks);
-               *pplocks = new_lock_data;
-               *pnum_entries = num_valid_entries;
+               if (!server_id_is_disconnected(&lock->context.pid)) {
+                       TALLOC_FREE(br_lck);
+                       return false;
+               }
+
+               if (lock->fnum != FNUM_FIELD_INVALID) {
+                       TALLOC_FREE(br_lck);
+                       return false;
+               }
+
+               lock->context.pid = self;
+               lock->context.tid = tid;
+               lock->fnum = fnum;
        }
 
-       return True;
+       fsp->current_lock_count = br_lck->num_locks;
+       br_lck->modified = true;
+       TALLOC_FREE(br_lck);
+       return true;
 }
 
 struct brl_forall_cb {
@@ -1585,14 +1790,13 @@ struct brl_forall_cb {
  on each lock.
 ****************************************************************************/
 
-static int traverse_fn(struct db_record *rec, void *state)
+static int brl_traverse_fn(struct db_record *rec, void *state)
 {
        struct brl_forall_cb *cb = (struct brl_forall_cb *)state;
        struct lock_struct *locks;
        struct file_id *key;
        unsigned int i;
        unsigned int num_locks = 0;
-       unsigned int orig_num_locks = 0;
        TDB_DATA dbkey;
        TDB_DATA value;
 
@@ -1602,31 +1806,14 @@ static int traverse_fn(struct db_record *rec, void *state)
        /* In a traverse function we must make a copy of
           dbuf before modifying it. */
 
-       locks = (struct lock_struct *)memdup(value.dptr, value.dsize);
+       locks = (struct lock_struct *)talloc_memdup(
+               talloc_tos(), value.dptr, value.dsize);
        if (!locks) {
                return -1; /* Terminate traversal. */
        }
 
        key = (struct file_id *)dbkey.dptr;
-       orig_num_locks = num_locks = value.dsize/sizeof(*locks);
-
-       /* Ensure the lock db is clean of entries from invalid processes. */
-
-       if (!validate_lock_entries(&num_locks, &locks)) {
-               SAFE_FREE(locks);
-               return -1; /* Terminate traversal */
-       }
-
-       if (orig_num_locks != num_locks) {
-               if (num_locks) {
-                       TDB_DATA data;
-                       data.dptr = (uint8_t *)locks;
-                       data.dsize = num_locks*sizeof(struct lock_struct);
-                       dbwrap_record_store(rec, data, TDB_REPLACE);
-               } else {
-                       dbwrap_record_delete(rec);
-               }
-       }
+       num_locks = value.dsize/sizeof(*locks);
 
        if (cb->fn) {
                for ( i=0; i<num_locks; i++) {
@@ -1640,7 +1827,7 @@ static int traverse_fn(struct db_record *rec, void *state)
                }
        }
 
-       SAFE_FREE(locks);
+       TALLOC_FREE(locks);
        return 0;
 }
 
@@ -1664,7 +1851,7 @@ int brl_forall(void (*fn)(struct file_id id, struct server_id pid,
        }
        cb.fn = fn;
        cb.private_data = private_data;
-       status = dbwrap_traverse(brlock_db, traverse_fn, &cb, &count);
+       status = dbwrap_traverse(brlock_db, brl_traverse_fn, &cb, &count);
 
        if (!NT_STATUS_IS_OK(status)) {
                return -1;
@@ -1681,15 +1868,30 @@ int brl_forall(void (*fn)(struct file_id id, struct server_id pid,
 
 static void byte_range_lock_flush(struct byte_range_lock *br_lck)
 {
-       if (br_lck->read_only) {
-               SMB_ASSERT(!br_lck->modified);
-       }
+       unsigned i;
+       struct lock_struct *locks = br_lck->lock_data;
 
        if (!br_lck->modified) {
+               DEBUG(10, ("br_lck not modified\n"));
                goto done;
        }
 
-       if (br_lck->num_locks == 0) {
+       i = 0;
+
+       while (i < br_lck->num_locks) {
+               if (locks[i].context.pid.pid == 0) {
+                       /*
+                        * Autocleanup, the process conflicted and does not
+                        * exist anymore.
+                        */
+                       locks[i] = locks[br_lck->num_locks-1];
+                       br_lck->num_locks -= 1;
+               } else {
+                       i += 1;
+               }
+       }
+
+       if ((br_lck->num_locks == 0) && (br_lck->num_read_oplocks == 0)) {
                /* No locks - delete this entry. */
                NTSTATUS status = dbwrap_record_delete(br_lck->record);
                if (!NT_STATUS_IS_OK(status)) {
@@ -1698,127 +1900,105 @@ static void byte_range_lock_flush(struct byte_range_lock *br_lck)
                        smb_panic("Could not delete byte range lock entry");
                }
        } else {
+               size_t lock_len, data_len;
                TDB_DATA data;
                NTSTATUS status;
 
-               data.dptr = (uint8 *)br_lck->lock_data;
-               data.dsize = br_lck->num_locks * sizeof(struct lock_struct);
+               lock_len = br_lck->num_locks * sizeof(struct lock_struct);
+               data_len = lock_len + sizeof(br_lck->num_read_oplocks);
+
+               data.dsize = data_len;
+               data.dptr = talloc_array(talloc_tos(), uint8_t, data_len);
+               SMB_ASSERT(data.dptr != NULL);
+
+               memcpy(data.dptr, br_lck->lock_data, lock_len);
+               memcpy(data.dptr + lock_len, &br_lck->num_read_oplocks,
+                      sizeof(br_lck->num_read_oplocks));
 
                status = dbwrap_record_store(br_lck->record, data, TDB_REPLACE);
+               TALLOC_FREE(data.dptr);
                if (!NT_STATUS_IS_OK(status)) {
                        DEBUG(0, ("store returned %s\n", nt_errstr(status)));
                        smb_panic("Could not store byte range mode entry");
                }
        }
 
- done:
+       DEBUG(10, ("seqnum=%d\n", dbwrap_get_seqnum(brlock_db)));
 
-       br_lck->read_only = true;
+ done:
        br_lck->modified = false;
-
        TALLOC_FREE(br_lck->record);
 }
 
 static int byte_range_lock_destructor(struct byte_range_lock *br_lck)
 {
        byte_range_lock_flush(br_lck);
-       SAFE_FREE(br_lck->lock_data);
        return 0;
 }
 
+static bool brl_parse_data(struct byte_range_lock *br_lck, TDB_DATA data)
+{
+       size_t data_len;
+
+       if (data.dsize == 0) {
+               return true;
+       }
+       if (data.dsize % sizeof(struct lock_struct) !=
+           sizeof(br_lck->num_read_oplocks)) {
+               DEBUG(1, ("Invalid data size: %u\n", (unsigned)data.dsize));
+               return false;
+       }
+
+       br_lck->num_locks = data.dsize / sizeof(struct lock_struct);
+       data_len = br_lck->num_locks * sizeof(struct lock_struct);
+
+       br_lck->lock_data = talloc_memdup(br_lck, data.dptr, data_len);
+       if (br_lck->lock_data == NULL) {
+               DEBUG(1, ("talloc_memdup failed\n"));
+               return false;
+       }
+       memcpy(&br_lck->num_read_oplocks, data.dptr + data_len,
+              sizeof(br_lck->num_read_oplocks));
+       return true;
+}
+
 /*******************************************************************
  Fetch a set of byte range lock data from the database.
  Leave the record locked.
  TALLOC_FREE(brl) will release the lock in the destructor.
 ********************************************************************/
 
-static struct byte_range_lock *brl_get_locks_internal(TALLOC_CTX *mem_ctx,
-                                       files_struct *fsp, bool read_only)
+struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx, files_struct *fsp)
 {
        TDB_DATA key, data;
-       struct byte_range_lock *br_lck = talloc(mem_ctx, struct byte_range_lock);
-       bool do_read_only = read_only;
+       struct byte_range_lock *br_lck;
 
+       br_lck = talloc_zero(mem_ctx, struct byte_range_lock);
        if (br_lck == NULL) {
                return NULL;
        }
 
        br_lck->fsp = fsp;
-       br_lck->num_locks = 0;
-       br_lck->modified = False;
-       br_lck->key = fsp->file_id;
 
-       key.dptr = (uint8 *)&br_lck->key;
+       key.dptr = (uint8_t *)&fsp->file_id;
        key.dsize = sizeof(struct file_id);
 
-       if (!fsp->lockdb_clean) {
-               /* We must be read/write to clean
-                  the dead entries. */
-               do_read_only = false;
-       }
-
-       if (do_read_only) {
-               NTSTATUS status;
-               status = dbwrap_fetch(brlock_db, br_lck, key, &data);
-               if (!NT_STATUS_IS_OK(status)) {
-                       DEBUG(3, ("Could not fetch byte range lock record\n"));
-                       TALLOC_FREE(br_lck);
-                       return NULL;
-               }
-               br_lck->record = NULL;
-       } else {
-               br_lck->record = dbwrap_fetch_locked(brlock_db, br_lck, key);
-
-               if (br_lck->record == NULL) {
-                       DEBUG(3, ("Could not lock byte range lock entry\n"));
-                       TALLOC_FREE(br_lck);
-                       return NULL;
-               }
+       br_lck->record = dbwrap_fetch_locked(brlock_db, br_lck, key);
 
-               data = dbwrap_record_get_value(br_lck->record);
+       if (br_lck->record == NULL) {
+               DEBUG(3, ("Could not lock byte range lock entry\n"));
+               TALLOC_FREE(br_lck);
+               return NULL;
        }
 
-       br_lck->read_only = do_read_only;
-       br_lck->lock_data = NULL;
-
-       talloc_set_destructor(br_lck, byte_range_lock_destructor);
-
-       br_lck->num_locks = data.dsize / sizeof(struct lock_struct);
-
-       if (br_lck->num_locks != 0) {
-               br_lck->lock_data = SMB_MALLOC_ARRAY(struct lock_struct,
-                                                    br_lck->num_locks);
-               if (br_lck->lock_data == NULL) {
-                       DEBUG(0, ("malloc failed\n"));
-                       TALLOC_FREE(br_lck);
-                       return NULL;
-               }
+       data = dbwrap_record_get_value(br_lck->record);
 
-               memcpy(br_lck->lock_data, data.dptr, data.dsize);
+       if (!brl_parse_data(br_lck, data)) {
+               TALLOC_FREE(br_lck);
+               return NULL;
        }
 
-       if (!fsp->lockdb_clean) {
-               int orig_num_locks = br_lck->num_locks;
-
-               /* This is the first time we've accessed this. */
-               /* Go through and ensure all entries exist - remove any that don't. */
-               /* Makes the lockdb self cleaning at low cost. */
-
-               if (!validate_lock_entries(&br_lck->num_locks,
-                                          &br_lck->lock_data)) {
-                       SAFE_FREE(br_lck->lock_data);
-                       TALLOC_FREE(br_lck);
-                       return NULL;
-               }
-
-               /* Ensure invalid locks are cleaned up in the destructor. */
-               if (orig_num_locks != br_lck->num_locks) {
-                       br_lck->modified = True;
-               }
-
-               /* Mark the lockdb as "clean" as seen from this open file. */
-               fsp->lockdb_clean = True;
-       }
+       talloc_set_destructor(br_lck, byte_range_lock_destructor);
 
        if (DEBUGLEVEL >= 10) {
                unsigned int i;
@@ -1831,52 +2011,115 @@ static struct byte_range_lock *brl_get_locks_internal(TALLOC_CTX *mem_ctx,
                }
        }
 
-       if (do_read_only != read_only) {
-               /*
-                * this stores the record and gets rid of
-                * the write lock that is needed for a cleanup
-                */
-               byte_range_lock_flush(br_lck);
-       }
-
        return br_lck;
 }
 
-struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx,
-                                       files_struct *fsp)
+struct brl_get_locks_readonly_state {
+       TALLOC_CTX *mem_ctx;
+       struct byte_range_lock **br_lock;
+};
+
+static void brl_get_locks_readonly_parser(TDB_DATA key, TDB_DATA data,
+                                         void *private_data)
 {
-       return brl_get_locks_internal(mem_ctx, fsp, False);
+       struct brl_get_locks_readonly_state *state =
+               (struct brl_get_locks_readonly_state *)private_data;
+       struct byte_range_lock *br_lck;
+
+       br_lck = talloc_pooled_object(
+               state->mem_ctx, struct byte_range_lock, 1, data.dsize);
+       if (br_lck == NULL) {
+               *state->br_lock = NULL;
+               return;
+       }
+       *br_lck = (struct byte_range_lock) { 0 };
+       if (!brl_parse_data(br_lck, data)) {
+               *state->br_lock = NULL;
+               return;
+       }
+       *state->br_lock = br_lck;
 }
 
 struct byte_range_lock *brl_get_locks_readonly(files_struct *fsp)
 {
-       struct byte_range_lock *br_lock;
+       struct byte_range_lock *br_lock = NULL;
+       struct brl_get_locks_readonly_state state;
+       NTSTATUS status;
 
-       if (lp_clustering()) {
-               return brl_get_locks_internal(talloc_tos(), fsp, true);
-       }
+       DEBUG(10, ("seqnum=%d, fsp->brlock_seqnum=%d\n",
+                  dbwrap_get_seqnum(brlock_db), fsp->brlock_seqnum));
 
        if ((fsp->brlock_rec != NULL)
            && (dbwrap_get_seqnum(brlock_db) == fsp->brlock_seqnum)) {
+               /*
+                * We have cached the brlock_rec and the database did not
+                * change.
+                */
                return fsp->brlock_rec;
        }
 
-       TALLOC_FREE(fsp->brlock_rec);
+       /*
+        * Parse the record fresh from the database
+        */
+
+       state.mem_ctx = fsp;
+       state.br_lock = &br_lock;
+
+       status = dbwrap_parse_record(
+               brlock_db,
+               make_tdb_data((uint8_t *)&fsp->file_id,
+                             sizeof(fsp->file_id)),
+               brl_get_locks_readonly_parser, &state);
 
-       br_lock = brl_get_locks_internal(talloc_tos(), fsp, true);
+       if (NT_STATUS_EQUAL(status,NT_STATUS_NOT_FOUND)) {
+               /*
+                * No locks on this file. Return an empty br_lock.
+                */
+               br_lock = talloc(fsp, struct byte_range_lock);
+               if (br_lock == NULL) {
+                       return NULL;
+               }
+
+               br_lock->num_read_oplocks = 0;
+               br_lock->num_locks = 0;
+               br_lock->lock_data = NULL;
+
+       } else if (!NT_STATUS_IS_OK(status)) {
+               DEBUG(3, ("Could not parse byte range lock record: "
+                         "%s\n", nt_errstr(status)));
+               return NULL;
+       }
        if (br_lock == NULL) {
                return NULL;
        }
-       fsp->brlock_seqnum = dbwrap_get_seqnum(brlock_db);
 
-       fsp->brlock_rec = talloc_move(fsp, &br_lock);
+       br_lock->fsp = fsp;
+       br_lock->modified = false;
+       br_lock->record = NULL;
 
-       return fsp->brlock_rec;
+       if (lp_clustering()) {
+               /*
+                * In the cluster case we can't cache the brlock struct
+                * because dbwrap_get_seqnum does not work reliably over
+                * ctdb. Thus we have to throw away the brlock struct soon.
+                */
+               talloc_steal(talloc_tos(), br_lock);
+       } else {
+               /*
+                * Cache the brlock struct, invalidated when the dbwrap_seqnum
+                * changes. See beginning of this routine.
+                */
+               TALLOC_FREE(fsp->brlock_rec);
+               fsp->brlock_rec = br_lock;
+               fsp->brlock_seqnum = dbwrap_get_seqnum(brlock_db);
+       }
+
+       return br_lock;
 }
 
 struct brl_revalidate_state {
        ssize_t array_size;
-       uint32 num_pids;
+       uint32_t num_pids;
        struct server_id *pids;
 };
 
@@ -1912,7 +2155,7 @@ static int compare_procids(const void *p1, const void *p2)
        const struct server_id *i2 = (const struct server_id *)p2;
 
        if (i1->pid < i2->pid) return -1;
-       if (i2->pid > i2->pid) return 1;
+       if (i1->pid > i2->pid) return 1;
        return 0;
 }
 
@@ -1925,14 +2168,14 @@ static int compare_procids(const void *p1, const void *p2)
  * array, then qsort that array and only send to non-dupes.
  */
 
-static void brl_revalidate(struct messaging_context *msg_ctx,
-                          void *private_data,
-                          uint32_t msg_type,
-                          struct server_id server_id,
-                          DATA_BLOB *data)
+void brl_revalidate(struct messaging_context *msg_ctx,
+                   void *private_data,
+                   uint32_t msg_type,
+                   struct server_id server_id,
+                   DATA_BLOB *data)
 {
        struct brl_revalidate_state *state;
-       uint32 i;
+       uint32_t i;
        struct server_id last_pid;
 
        if (!(state = talloc_zero(NULL, struct brl_revalidate_state))) {
@@ -1956,7 +2199,7 @@ static void brl_revalidate(struct messaging_context *msg_ctx,
        ZERO_STRUCT(last_pid);
 
        for (i=0; i<state->num_pids; i++) {
-               if (procid_equal(&last_pid, &state->pids[i])) {
+               if (serverid_equal(&last_pid, &state->pids[i])) {
                        /*
                         * We've seen that one already
                         */
@@ -1973,8 +2216,75 @@ static void brl_revalidate(struct messaging_context *msg_ctx,
        return;
 }
 
-void brl_register_msgs(struct messaging_context *msg_ctx)
+bool brl_cleanup_disconnected(struct file_id fid, uint64_t open_persistent_id)
 {
-       messaging_register(msg_ctx, NULL, MSG_SMB_BRL_VALIDATE,
-                          brl_revalidate);
+       bool ret = false;
+       TALLOC_CTX *frame = talloc_stackframe();
+       TDB_DATA key, val;
+       struct db_record *rec;
+       struct lock_struct *lock;
+       unsigned n, num;
+       NTSTATUS status;
+
+       key = make_tdb_data((void*)&fid, sizeof(fid));
+
+       rec = dbwrap_fetch_locked(brlock_db, frame, key);
+       if (rec == NULL) {
+               DEBUG(5, ("brl_cleanup_disconnected: failed to fetch record "
+                         "for file %s\n", file_id_string(frame, &fid)));
+               goto done;
+       }
+
+       val = dbwrap_record_get_value(rec);
+       lock = (struct lock_struct*)val.dptr;
+       num = val.dsize / sizeof(struct lock_struct);
+       if (lock == NULL) {
+               DEBUG(10, ("brl_cleanup_disconnected: no byte range locks for "
+                          "file %s\n", file_id_string(frame, &fid)));
+               ret = true;
+               goto done;
+       }
+
+       for (n=0; n<num; n++) {
+               struct lock_context *ctx = &lock[n].context;
+
+               if (!server_id_is_disconnected(&ctx->pid)) {
+                       struct server_id_buf tmp;
+                       DEBUG(5, ("brl_cleanup_disconnected: byte range lock "
+                                 "%s used by server %s, do not cleanup\n",
+                                 file_id_string(frame, &fid),
+                                 server_id_str_buf(ctx->pid, &tmp)));
+                       goto done;
+               }
+
+               if (ctx->smblctx != open_persistent_id) {
+                       DEBUG(5, ("brl_cleanup_disconnected: byte range lock "
+                                 "%s expected smblctx %llu but found %llu"
+                                 ", do not cleanup\n",
+                                 file_id_string(frame, &fid),
+                                 (unsigned long long)open_persistent_id,
+                                 (unsigned long long)ctx->smblctx));
+                       goto done;
+               }
+       }
+
+       status = dbwrap_record_delete(rec);
+       if (!NT_STATUS_IS_OK(status)) {
+               DEBUG(5, ("brl_cleanup_disconnected: failed to delete record "
+                         "for file %s from %s, open %llu: %s\n",
+                         file_id_string(frame, &fid), dbwrap_name(brlock_db),
+                         (unsigned long long)open_persistent_id,
+                         nt_errstr(status)));
+               goto done;
+       }
+
+       DEBUG(10, ("brl_cleanup_disconnected: "
+                  "file %s cleaned up %u entries from open %llu\n",
+                  file_id_string(frame, &fid), num,
+                  (unsigned long long)open_persistent_id));
+
+       ret = true;
+done:
+       talloc_free(frame);
+       return ret;
 }