smbd: Remove byte_range_lock->read_only
[kai/samba-autobuild/.git] / source3 / locking / brlock.c
1 /*
2    Unix SMB/CIFS implementation.
3    byte range locking code
4    Updated to handle range splits/merges.
5
6    Copyright (C) Andrew Tridgell 1992-2000
7    Copyright (C) Jeremy Allison 1992-2000
8
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 3 of the License, or
12    (at your option) any later version.
13
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18
19    You should have received a copy of the GNU General Public License
20    along with this program.  If not, see <http://www.gnu.org/licenses/>.
21 */
22
23 /* This module implements a tdb based byte range locking service,
24    replacing the fcntl() based byte range locking previously
25    used. This allows us to provide the same semantics as NT */
26
27 #include "includes.h"
28 #include "system/filesys.h"
29 #include "locking/proto.h"
30 #include "smbd/globals.h"
31 #include "dbwrap/dbwrap.h"
32 #include "dbwrap/dbwrap_open.h"
33 #include "serverid.h"
34 #include "messages.h"
35 #include "util_tdb.h"
36
37 #undef DBGC_CLASS
38 #define DBGC_CLASS DBGC_LOCKING
39
40 #define ZERO_ZERO 0
41
42 /* The open brlock.tdb database. */
43
44 static struct db_context *brlock_db;
45
46 struct byte_range_lock {
47         struct files_struct *fsp;
48         unsigned int num_locks;
49         bool modified;
50         struct lock_struct *lock_data;
51         struct db_record *record;
52 };
53
54 /****************************************************************************
55  Debug info at level 10 for lock struct.
56 ****************************************************************************/
57
58 static void print_lock_struct(unsigned int i, const struct lock_struct *pls)
59 {
60         DEBUG(10,("[%u]: smblctx = %llu, tid = %u, pid = %s, ",
61                         i,
62                         (unsigned long long)pls->context.smblctx,
63                         (unsigned int)pls->context.tid,
64                         server_id_str(talloc_tos(), &pls->context.pid) ));
65
66         DEBUG(10,("start = %.0f, size = %.0f, fnum = %llu, %s %s\n",
67                 (double)pls->start,
68                 (double)pls->size,
69                 (unsigned long long)pls->fnum,
70                 lock_type_name(pls->lock_type),
71                 lock_flav_name(pls->lock_flav) ));
72 }
73
74 unsigned int brl_num_locks(const struct byte_range_lock *brl)
75 {
76         return brl->num_locks;
77 }
78
79 struct files_struct *brl_fsp(struct byte_range_lock *brl)
80 {
81         return brl->fsp;
82 }
83
84 /****************************************************************************
85  See if two locking contexts are equal.
86 ****************************************************************************/
87
88 static bool brl_same_context(const struct lock_context *ctx1,
89                              const struct lock_context *ctx2)
90 {
91         return (serverid_equal(&ctx1->pid, &ctx2->pid) &&
92                 (ctx1->smblctx == ctx2->smblctx) &&
93                 (ctx1->tid == ctx2->tid));
94 }
95
96 /****************************************************************************
97  See if lck1 and lck2 overlap.
98 ****************************************************************************/
99
100 static bool brl_overlap(const struct lock_struct *lck1,
101                         const struct lock_struct *lck2)
102 {
103         /* XXX Remove for Win7 compatibility. */
104         /* this extra check is not redundant - it copes with locks
105            that go beyond the end of 64 bit file space */
106         if (lck1->size != 0 &&
107             lck1->start == lck2->start &&
108             lck1->size == lck2->size) {
109                 return True;
110         }
111
112         if (lck1->start >= (lck2->start+lck2->size) ||
113             lck2->start >= (lck1->start+lck1->size)) {
114                 return False;
115         }
116         return True;
117 }
118
119 /****************************************************************************
120  See if lock2 can be added when lock1 is in place.
121 ****************************************************************************/
122
123 static bool brl_conflict(const struct lock_struct *lck1,
124                          const struct lock_struct *lck2)
125 {
126         /* Ignore PENDING locks. */
127         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
128                 return False;
129
130         /* Read locks never conflict. */
131         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
132                 return False;
133         }
134
135         /* A READ lock can stack on top of a WRITE lock if they have the same
136          * context & fnum. */
137         if (lck1->lock_type == WRITE_LOCK && lck2->lock_type == READ_LOCK &&
138             brl_same_context(&lck1->context, &lck2->context) &&
139             lck1->fnum == lck2->fnum) {
140                 return False;
141         }
142
143         return brl_overlap(lck1, lck2);
144 }
145
146 /****************************************************************************
147  See if lock2 can be added when lock1 is in place - when both locks are POSIX
148  flavour. POSIX locks ignore fnum - they only care about dev/ino which we
149  know already match.
150 ****************************************************************************/
151
152 static bool brl_conflict_posix(const struct lock_struct *lck1,
153                                 const struct lock_struct *lck2)
154 {
155 #if defined(DEVELOPER)
156         SMB_ASSERT(lck1->lock_flav == POSIX_LOCK);
157         SMB_ASSERT(lck2->lock_flav == POSIX_LOCK);
158 #endif
159
160         /* Ignore PENDING locks. */
161         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
162                 return False;
163
164         /* Read locks never conflict. */
165         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
166                 return False;
167         }
168
169         /* Locks on the same context con't conflict. Ignore fnum. */
170         if (brl_same_context(&lck1->context, &lck2->context)) {
171                 return False;
172         }
173
174         /* One is read, the other write, or the context is different,
175            do they overlap ? */
176         return brl_overlap(lck1, lck2);
177 }
178
179 #if ZERO_ZERO
180 static bool brl_conflict1(const struct lock_struct *lck1,
181                          const struct lock_struct *lck2)
182 {
183         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
184                 return False;
185
186         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
187                 return False;
188         }
189
190         if (brl_same_context(&lck1->context, &lck2->context) &&
191             lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
192                 return False;
193         }
194
195         if (lck2->start == 0 && lck2->size == 0 && lck1->size != 0) {
196                 return True;
197         }
198
199         if (lck1->start >= (lck2->start + lck2->size) ||
200             lck2->start >= (lck1->start + lck1->size)) {
201                 return False;
202         }
203
204         return True;
205 }
206 #endif
207
208 /****************************************************************************
209  Check to see if this lock conflicts, but ignore our own locks on the
210  same fnum only. This is the read/write lock check code path.
211  This is never used in the POSIX lock case.
212 ****************************************************************************/
213
214 static bool brl_conflict_other(const struct lock_struct *lck1, const struct lock_struct *lck2)
215 {
216         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
217                 return False;
218
219         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK)
220                 return False;
221
222         /* POSIX flavour locks never conflict here - this is only called
223            in the read/write path. */
224
225         if (lck1->lock_flav == POSIX_LOCK && lck2->lock_flav == POSIX_LOCK)
226                 return False;
227
228         /*
229          * Incoming WRITE locks conflict with existing READ locks even
230          * if the context is the same. JRA. See LOCKTEST7 in smbtorture.
231          */
232
233         if (!(lck2->lock_type == WRITE_LOCK && lck1->lock_type == READ_LOCK)) {
234                 if (brl_same_context(&lck1->context, &lck2->context) &&
235                                         lck1->fnum == lck2->fnum)
236                         return False;
237         }
238
239         return brl_overlap(lck1, lck2);
240 }
241
242 /****************************************************************************
243  Check if an unlock overlaps a pending lock.
244 ****************************************************************************/
245
246 static bool brl_pending_overlap(const struct lock_struct *lock, const struct lock_struct *pend_lock)
247 {
248         if ((lock->start <= pend_lock->start) && (lock->start + lock->size > pend_lock->start))
249                 return True;
250         if ((lock->start >= pend_lock->start) && (lock->start <= pend_lock->start + pend_lock->size))
251                 return True;
252         return False;
253 }
254
255 /****************************************************************************
256  Amazingly enough, w2k3 "remembers" whether the last lock failure on a fnum
257  is the same as this one and changes its error code. I wonder if any
258  app depends on this ?
259 ****************************************************************************/
260
261 static NTSTATUS brl_lock_failed(files_struct *fsp,
262                                 const struct lock_struct *lock,
263                                 bool blocking_lock)
264 {
265         if (lock->start >= 0xEF000000 && (lock->start >> 63) == 0) {
266                 /* amazing the little things you learn with a test
267                    suite. Locks beyond this offset (as a 64 bit
268                    number!) always generate the conflict error code,
269                    unless the top bit is set */
270                 if (!blocking_lock) {
271                         fsp->last_lock_failure = *lock;
272                 }
273                 return NT_STATUS_FILE_LOCK_CONFLICT;
274         }
275
276         if (serverid_equal(&lock->context.pid, &fsp->last_lock_failure.context.pid) &&
277                         lock->context.tid == fsp->last_lock_failure.context.tid &&
278                         lock->fnum == fsp->last_lock_failure.fnum &&
279                         lock->start == fsp->last_lock_failure.start) {
280                 return NT_STATUS_FILE_LOCK_CONFLICT;
281         }
282
283         if (!blocking_lock) {
284                 fsp->last_lock_failure = *lock;
285         }
286         return NT_STATUS_LOCK_NOT_GRANTED;
287 }
288
289 /****************************************************************************
290  Open up the brlock.tdb database.
291 ****************************************************************************/
292
293 void brl_init(bool read_only)
294 {
295         int tdb_flags;
296
297         if (brlock_db) {
298                 return;
299         }
300
301         tdb_flags = TDB_DEFAULT|TDB_VOLATILE|TDB_CLEAR_IF_FIRST|TDB_INCOMPATIBLE_HASH;
302
303         if (!lp_clustering()) {
304                 /*
305                  * We can't use the SEQNUM trick to cache brlock
306                  * entries in the clustering case because ctdb seqnum
307                  * propagation has a delay.
308                  */
309                 tdb_flags |= TDB_SEQNUM;
310         }
311
312         brlock_db = db_open(NULL, lock_path("brlock.tdb"),
313                             lp_open_files_db_hash_size(), tdb_flags,
314                             read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644,
315                             DBWRAP_LOCK_ORDER_2);
316         if (!brlock_db) {
317                 DEBUG(0,("Failed to open byte range locking database %s\n",
318                         lock_path("brlock.tdb")));
319                 return;
320         }
321 }
322
323 /****************************************************************************
324  Close down the brlock.tdb database.
325 ****************************************************************************/
326
327 void brl_shutdown(void)
328 {
329         TALLOC_FREE(brlock_db);
330 }
331
332 #if ZERO_ZERO
333 /****************************************************************************
334  Compare two locks for sorting.
335 ****************************************************************************/
336
337 static int lock_compare(const struct lock_struct *lck1,
338                          const struct lock_struct *lck2)
339 {
340         if (lck1->start != lck2->start) {
341                 return (lck1->start - lck2->start);
342         }
343         if (lck2->size != lck1->size) {
344                 return ((int)lck1->size - (int)lck2->size);
345         }
346         return 0;
347 }
348 #endif
349
350 /****************************************************************************
351  Lock a range of bytes - Windows lock semantics.
352 ****************************************************************************/
353
354 NTSTATUS brl_lock_windows_default(struct byte_range_lock *br_lck,
355     struct lock_struct *plock, bool blocking_lock)
356 {
357         unsigned int i;
358         files_struct *fsp = br_lck->fsp;
359         struct lock_struct *locks = br_lck->lock_data;
360         NTSTATUS status;
361
362         SMB_ASSERT(plock->lock_type != UNLOCK_LOCK);
363
364         if ((plock->start + plock->size - 1 < plock->start) &&
365                         plock->size != 0) {
366                 return NT_STATUS_INVALID_LOCK_RANGE;
367         }
368
369         for (i=0; i < br_lck->num_locks; i++) {
370                 /* Do any Windows or POSIX locks conflict ? */
371                 if (brl_conflict(&locks[i], plock)) {
372                         /* Remember who blocked us. */
373                         plock->context.smblctx = locks[i].context.smblctx;
374                         return brl_lock_failed(fsp,plock,blocking_lock);
375                 }
376 #if ZERO_ZERO
377                 if (plock->start == 0 && plock->size == 0 &&
378                                 locks[i].size == 0) {
379                         break;
380                 }
381 #endif
382         }
383
384         if (!IS_PENDING_LOCK(plock->lock_type)) {
385                 contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
386         }
387
388         /* We can get the Windows lock, now see if it needs to
389            be mapped into a lower level POSIX one, and if so can
390            we get it ? */
391
392         if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(fsp->conn->params)) {
393                 int errno_ret;
394                 if (!set_posix_lock_windows_flavour(fsp,
395                                 plock->start,
396                                 plock->size,
397                                 plock->lock_type,
398                                 &plock->context,
399                                 locks,
400                                 br_lck->num_locks,
401                                 &errno_ret)) {
402
403                         /* We don't know who blocked us. */
404                         plock->context.smblctx = 0xFFFFFFFFFFFFFFFFLL;
405
406                         if (errno_ret == EACCES || errno_ret == EAGAIN) {
407                                 status = NT_STATUS_FILE_LOCK_CONFLICT;
408                                 goto fail;
409                         } else {
410                                 status = map_nt_error_from_unix(errno);
411                                 goto fail;
412                         }
413                 }
414         }
415
416         /* no conflicts - add it to the list of locks */
417         locks = talloc_realloc(br_lck, locks, struct lock_struct,
418                                (br_lck->num_locks + 1));
419         if (!locks) {
420                 status = NT_STATUS_NO_MEMORY;
421                 goto fail;
422         }
423
424         memcpy(&locks[br_lck->num_locks], plock, sizeof(struct lock_struct));
425         br_lck->num_locks += 1;
426         br_lck->lock_data = locks;
427         br_lck->modified = True;
428
429         return NT_STATUS_OK;
430  fail:
431         if (!IS_PENDING_LOCK(plock->lock_type)) {
432                 contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
433         }
434         return status;
435 }
436
437 /****************************************************************************
438  Cope with POSIX range splits and merges.
439 ****************************************************************************/
440
441 static unsigned int brlock_posix_split_merge(struct lock_struct *lck_arr,       /* Output array. */
442                                                 struct lock_struct *ex,         /* existing lock. */
443                                                 struct lock_struct *plock)      /* proposed lock. */
444 {
445         bool lock_types_differ = (ex->lock_type != plock->lock_type);
446
447         /* We can't merge non-conflicting locks on different context - ignore fnum. */
448
449         if (!brl_same_context(&ex->context, &plock->context)) {
450                 /* Just copy. */
451                 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
452                 return 1;
453         }
454
455         /* We now know we have the same context. */
456
457         /* Did we overlap ? */
458
459 /*********************************************
460                                         +---------+
461                                         | ex      |
462                                         +---------+
463                          +-------+
464                          | plock |
465                          +-------+
466 OR....
467         +---------+
468         |  ex     |
469         +---------+
470 **********************************************/
471
472         if ( (ex->start > (plock->start + plock->size)) ||
473                 (plock->start > (ex->start + ex->size))) {
474
475                 /* No overlap with this lock - copy existing. */
476
477                 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
478                 return 1;
479         }
480
481 /*********************************************
482         +---------------------------+
483         |          ex               |
484         +---------------------------+
485         +---------------------------+
486         |       plock               | -> replace with plock.
487         +---------------------------+
488 OR
489              +---------------+
490              |       ex      |
491              +---------------+
492         +---------------------------+
493         |       plock               | -> replace with plock.
494         +---------------------------+
495
496 **********************************************/
497
498         if ( (ex->start >= plock->start) &&
499                 (ex->start + ex->size <= plock->start + plock->size) ) {
500
501                 /* Replace - discard existing lock. */
502
503                 return 0;
504         }
505
506 /*********************************************
507 Adjacent after.
508                         +-------+
509                         |  ex   |
510                         +-------+
511         +---------------+
512         |   plock       |
513         +---------------+
514
515 BECOMES....
516         +---------------+-------+
517         |   plock       | ex    | - different lock types.
518         +---------------+-------+
519 OR.... (merge)
520         +-----------------------+
521         |   plock               | - same lock type.
522         +-----------------------+
523 **********************************************/
524
525         if (plock->start + plock->size == ex->start) {
526
527                 /* If the lock types are the same, we merge, if different, we
528                    add the remainder of the old lock. */
529
530                 if (lock_types_differ) {
531                         /* Add existing. */
532                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
533                         return 1;
534                 } else {
535                         /* Merge - adjust incoming lock as we may have more
536                          * merging to come. */
537                         plock->size += ex->size;
538                         return 0;
539                 }
540         }
541
542 /*********************************************
543 Adjacent before.
544         +-------+
545         |  ex   |
546         +-------+
547                 +---------------+
548                 |   plock       |
549                 +---------------+
550 BECOMES....
551         +-------+---------------+
552         | ex    |   plock       | - different lock types
553         +-------+---------------+
554
555 OR.... (merge)
556         +-----------------------+
557         |      plock            | - same lock type.
558         +-----------------------+
559
560 **********************************************/
561
562         if (ex->start + ex->size == plock->start) {
563
564                 /* If the lock types are the same, we merge, if different, we
565                    add the existing lock. */
566
567                 if (lock_types_differ) {
568                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
569                         return 1;
570                 } else {
571                         /* Merge - adjust incoming lock as we may have more
572                          * merging to come. */
573                         plock->start = ex->start;
574                         plock->size += ex->size;
575                         return 0;
576                 }
577         }
578
579 /*********************************************
580 Overlap after.
581         +-----------------------+
582         |          ex           |
583         +-----------------------+
584         +---------------+
585         |   plock       |
586         +---------------+
587 OR
588                +----------------+
589                |       ex       |
590                +----------------+
591         +---------------+
592         |   plock       |
593         +---------------+
594
595 BECOMES....
596         +---------------+-------+
597         |   plock       | ex    | - different lock types.
598         +---------------+-------+
599 OR.... (merge)
600         +-----------------------+
601         |   plock               | - same lock type.
602         +-----------------------+
603 **********************************************/
604
605         if ( (ex->start >= plock->start) &&
606                 (ex->start <= plock->start + plock->size) &&
607                 (ex->start + ex->size > plock->start + plock->size) ) {
608
609                 /* If the lock types are the same, we merge, if different, we
610                    add the remainder of the old lock. */
611
612                 if (lock_types_differ) {
613                         /* Add remaining existing. */
614                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
615                         /* Adjust existing start and size. */
616                         lck_arr[0].start = plock->start + plock->size;
617                         lck_arr[0].size = (ex->start + ex->size) - (plock->start + plock->size);
618                         return 1;
619                 } else {
620                         /* Merge - adjust incoming lock as we may have more
621                          * merging to come. */
622                         plock->size += (ex->start + ex->size) - (plock->start + plock->size);
623                         return 0;
624                 }
625         }
626
627 /*********************************************
628 Overlap before.
629         +-----------------------+
630         |  ex                   |
631         +-----------------------+
632                 +---------------+
633                 |   plock       |
634                 +---------------+
635 OR
636         +-------------+
637         |  ex         |
638         +-------------+
639                 +---------------+
640                 |   plock       |
641                 +---------------+
642
643 BECOMES....
644         +-------+---------------+
645         | ex    |   plock       | - different lock types
646         +-------+---------------+
647
648 OR.... (merge)
649         +-----------------------+
650         |      plock            | - same lock type.
651         +-----------------------+
652
653 **********************************************/
654
655         if ( (ex->start < plock->start) &&
656                         (ex->start + ex->size >= plock->start) &&
657                         (ex->start + ex->size <= plock->start + plock->size) ) {
658
659                 /* If the lock types are the same, we merge, if different, we
660                    add the truncated old lock. */
661
662                 if (lock_types_differ) {
663                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
664                         /* Adjust existing size. */
665                         lck_arr[0].size = plock->start - ex->start;
666                         return 1;
667                 } else {
668                         /* Merge - adjust incoming lock as we may have more
669                          * merging to come. MUST ADJUST plock SIZE FIRST ! */
670                         plock->size += (plock->start - ex->start);
671                         plock->start = ex->start;
672                         return 0;
673                 }
674         }
675
676 /*********************************************
677 Complete overlap.
678         +---------------------------+
679         |        ex                 |
680         +---------------------------+
681                 +---------+
682                 |  plock  |
683                 +---------+
684 BECOMES.....
685         +-------+---------+---------+
686         | ex    |  plock  | ex      | - different lock types.
687         +-------+---------+---------+
688 OR
689         +---------------------------+
690         |        plock              | - same lock type.
691         +---------------------------+
692 **********************************************/
693
694         if ( (ex->start < plock->start) && (ex->start + ex->size > plock->start + plock->size) ) {
695
696                 if (lock_types_differ) {
697
698                         /* We have to split ex into two locks here. */
699
700                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
701                         memcpy(&lck_arr[1], ex, sizeof(struct lock_struct));
702
703                         /* Adjust first existing size. */
704                         lck_arr[0].size = plock->start - ex->start;
705
706                         /* Adjust second existing start and size. */
707                         lck_arr[1].start = plock->start + plock->size;
708                         lck_arr[1].size = (ex->start + ex->size) - (plock->start + plock->size);
709                         return 2;
710                 } else {
711                         /* Just eat the existing locks, merge them into plock. */
712                         plock->start = ex->start;
713                         plock->size = ex->size;
714                         return 0;
715                 }
716         }
717
718         /* Never get here. */
719         smb_panic("brlock_posix_split_merge");
720         /* Notreached. */
721
722         /* Keep some compilers happy. */
723         return 0;
724 }
725
726 /****************************************************************************
727  Lock a range of bytes - POSIX lock semantics.
728  We must cope with range splits and merges.
729 ****************************************************************************/
730
731 static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
732                                struct byte_range_lock *br_lck,
733                                struct lock_struct *plock)
734 {
735         unsigned int i, count, posix_count;
736         struct lock_struct *locks = br_lck->lock_data;
737         struct lock_struct *tp;
738         bool signal_pending_read = False;
739         bool break_oplocks = false;
740         NTSTATUS status;
741
742         /* No zero-zero locks for POSIX. */
743         if (plock->start == 0 && plock->size == 0) {
744                 return NT_STATUS_INVALID_PARAMETER;
745         }
746
747         /* Don't allow 64-bit lock wrap. */
748         if (plock->start + plock->size - 1 < plock->start) {
749                 return NT_STATUS_INVALID_PARAMETER;
750         }
751
752         /* The worst case scenario here is we have to split an
753            existing POSIX lock range into two, and add our lock,
754            so we need at most 2 more entries. */
755
756         tp = talloc_array(br_lck, struct lock_struct, br_lck->num_locks + 2);
757         if (!tp) {
758                 return NT_STATUS_NO_MEMORY;
759         }
760
761         count = posix_count = 0;
762
763         for (i=0; i < br_lck->num_locks; i++) {
764                 struct lock_struct *curr_lock = &locks[i];
765
766                 /* If we have a pending read lock, a lock downgrade should
767                    trigger a lock re-evaluation. */
768                 if (curr_lock->lock_type == PENDING_READ_LOCK &&
769                                 brl_pending_overlap(plock, curr_lock)) {
770                         signal_pending_read = True;
771                 }
772
773                 if (curr_lock->lock_flav == WINDOWS_LOCK) {
774                         /* Do any Windows flavour locks conflict ? */
775                         if (brl_conflict(curr_lock, plock)) {
776                                 /* No games with error messages. */
777                                 TALLOC_FREE(tp);
778                                 /* Remember who blocked us. */
779                                 plock->context.smblctx = curr_lock->context.smblctx;
780                                 return NT_STATUS_FILE_LOCK_CONFLICT;
781                         }
782                         /* Just copy the Windows lock into the new array. */
783                         memcpy(&tp[count], curr_lock, sizeof(struct lock_struct));
784                         count++;
785                 } else {
786                         unsigned int tmp_count = 0;
787
788                         /* POSIX conflict semantics are different. */
789                         if (brl_conflict_posix(curr_lock, plock)) {
790                                 /* Can't block ourselves with POSIX locks. */
791                                 /* No games with error messages. */
792                                 TALLOC_FREE(tp);
793                                 /* Remember who blocked us. */
794                                 plock->context.smblctx = curr_lock->context.smblctx;
795                                 return NT_STATUS_FILE_LOCK_CONFLICT;
796                         }
797
798                         /* Work out overlaps. */
799                         tmp_count += brlock_posix_split_merge(&tp[count], curr_lock, plock);
800                         posix_count += tmp_count;
801                         count += tmp_count;
802                 }
803         }
804
805         /*
806          * Break oplocks while we hold a brl. Since lock() and unlock() calls
807          * are not symetric with POSIX semantics, we cannot guarantee our
808          * contend_level2_oplocks_begin/end calls will be acquired and
809          * released one-for-one as with Windows semantics. Therefore we only
810          * call contend_level2_oplocks_begin if this is the first POSIX brl on
811          * the file.
812          */
813         break_oplocks = (!IS_PENDING_LOCK(plock->lock_type) &&
814                          posix_count == 0);
815         if (break_oplocks) {
816                 contend_level2_oplocks_begin(br_lck->fsp,
817                                              LEVEL2_CONTEND_POSIX_BRL);
818         }
819
820         /* Try and add the lock in order, sorted by lock start. */
821         for (i=0; i < count; i++) {
822                 struct lock_struct *curr_lock = &tp[i];
823
824                 if (curr_lock->start <= plock->start) {
825                         continue;
826                 }
827         }
828
829         if (i < count) {
830                 memmove(&tp[i+1], &tp[i],
831                         (count - i)*sizeof(struct lock_struct));
832         }
833         memcpy(&tp[i], plock, sizeof(struct lock_struct));
834         count++;
835
836         /* We can get the POSIX lock, now see if it needs to
837            be mapped into a lower level POSIX one, and if so can
838            we get it ? */
839
840         if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(br_lck->fsp->conn->params)) {
841                 int errno_ret;
842
843                 /* The lower layer just needs to attempt to
844                    get the system POSIX lock. We've weeded out
845                    any conflicts above. */
846
847                 if (!set_posix_lock_posix_flavour(br_lck->fsp,
848                                 plock->start,
849                                 plock->size,
850                                 plock->lock_type,
851                                 &errno_ret)) {
852
853                         /* We don't know who blocked us. */
854                         plock->context.smblctx = 0xFFFFFFFFFFFFFFFFLL;
855
856                         if (errno_ret == EACCES || errno_ret == EAGAIN) {
857                                 TALLOC_FREE(tp);
858                                 status = NT_STATUS_FILE_LOCK_CONFLICT;
859                                 goto fail;
860                         } else {
861                                 TALLOC_FREE(tp);
862                                 status = map_nt_error_from_unix(errno);
863                                 goto fail;
864                         }
865                 }
866         }
867
868         /* If we didn't use all the allocated size,
869          * Realloc so we don't leak entries per lock call. */
870         if (count < br_lck->num_locks + 2) {
871                 tp = talloc_realloc(br_lck, tp, struct lock_struct, count);
872                 if (!tp) {
873                         status = NT_STATUS_NO_MEMORY;
874                         goto fail;
875                 }
876         }
877
878         br_lck->num_locks = count;
879         TALLOC_FREE(br_lck->lock_data);
880         br_lck->lock_data = tp;
881         locks = tp;
882         br_lck->modified = True;
883
884         /* A successful downgrade from write to read lock can trigger a lock
885            re-evalutation where waiting readers can now proceed. */
886
887         if (signal_pending_read) {
888                 /* Send unlock messages to any pending read waiters that overlap. */
889                 for (i=0; i < br_lck->num_locks; i++) {
890                         struct lock_struct *pend_lock = &locks[i];
891
892                         /* Ignore non-pending locks. */
893                         if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
894                                 continue;
895                         }
896
897                         if (pend_lock->lock_type == PENDING_READ_LOCK &&
898                                         brl_pending_overlap(plock, pend_lock)) {
899                                 DEBUG(10,("brl_lock_posix: sending unlock message to pid %s\n",
900                                         procid_str_static(&pend_lock->context.pid )));
901
902                                 messaging_send(msg_ctx, pend_lock->context.pid,
903                                                MSG_SMB_UNLOCK, &data_blob_null);
904                         }
905                 }
906         }
907
908         return NT_STATUS_OK;
909  fail:
910         if (break_oplocks) {
911                 contend_level2_oplocks_end(br_lck->fsp,
912                                            LEVEL2_CONTEND_POSIX_BRL);
913         }
914         return status;
915 }
916
917 NTSTATUS smb_vfs_call_brl_lock_windows(struct vfs_handle_struct *handle,
918                                        struct byte_range_lock *br_lck,
919                                        struct lock_struct *plock,
920                                        bool blocking_lock,
921                                        struct blocking_lock_record *blr)
922 {
923         VFS_FIND(brl_lock_windows);
924         return handle->fns->brl_lock_windows_fn(handle, br_lck, plock,
925                                                 blocking_lock, blr);
926 }
927
928 /****************************************************************************
929  Lock a range of bytes.
930 ****************************************************************************/
931
932 NTSTATUS brl_lock(struct messaging_context *msg_ctx,
933                 struct byte_range_lock *br_lck,
934                 uint64_t smblctx,
935                 struct server_id pid,
936                 br_off start,
937                 br_off size,
938                 enum brl_type lock_type,
939                 enum brl_flavour lock_flav,
940                 bool blocking_lock,
941                 uint64_t *psmblctx,
942                 struct blocking_lock_record *blr)
943 {
944         NTSTATUS ret;
945         struct lock_struct lock;
946
947 #if !ZERO_ZERO
948         if (start == 0 && size == 0) {
949                 DEBUG(0,("client sent 0/0 lock - please report this\n"));
950         }
951 #endif
952
953 #ifdef DEVELOPER
954         /* Quieten valgrind on test. */
955         ZERO_STRUCT(lock);
956 #endif
957
958         lock.context.smblctx = smblctx;
959         lock.context.pid = pid;
960         lock.context.tid = br_lck->fsp->conn->cnum;
961         lock.start = start;
962         lock.size = size;
963         lock.fnum = br_lck->fsp->fnum;
964         lock.lock_type = lock_type;
965         lock.lock_flav = lock_flav;
966
967         if (lock_flav == WINDOWS_LOCK) {
968                 ret = SMB_VFS_BRL_LOCK_WINDOWS(br_lck->fsp->conn, br_lck,
969                     &lock, blocking_lock, blr);
970         } else {
971                 ret = brl_lock_posix(msg_ctx, br_lck, &lock);
972         }
973
974 #if ZERO_ZERO
975         /* sort the lock list */
976         TYPESAFE_QSORT(br_lck->lock_data, (size_t)br_lck->num_locks, lock_compare);
977 #endif
978
979         /* If we're returning an error, return who blocked us. */
980         if (!NT_STATUS_IS_OK(ret) && psmblctx) {
981                 *psmblctx = lock.context.smblctx;
982         }
983         return ret;
984 }
985
986 /****************************************************************************
987  Unlock a range of bytes - Windows semantics.
988 ****************************************************************************/
989
990 bool brl_unlock_windows_default(struct messaging_context *msg_ctx,
991                                struct byte_range_lock *br_lck,
992                                const struct lock_struct *plock)
993 {
994         unsigned int i, j;
995         struct lock_struct *locks = br_lck->lock_data;
996         enum brl_type deleted_lock_type = READ_LOCK; /* shut the compiler up.... */
997
998         SMB_ASSERT(plock->lock_type == UNLOCK_LOCK);
999
1000 #if ZERO_ZERO
1001         /* Delete write locks by preference... The lock list
1002            is sorted in the zero zero case. */
1003
1004         for (i = 0; i < br_lck->num_locks; i++) {
1005                 struct lock_struct *lock = &locks[i];
1006
1007                 if (lock->lock_type == WRITE_LOCK &&
1008                     brl_same_context(&lock->context, &plock->context) &&
1009                     lock->fnum == plock->fnum &&
1010                     lock->lock_flav == WINDOWS_LOCK &&
1011                     lock->start == plock->start &&
1012                     lock->size == plock->size) {
1013
1014                         /* found it - delete it */
1015                         deleted_lock_type = lock->lock_type;
1016                         break;
1017                 }
1018         }
1019
1020         if (i != br_lck->num_locks) {
1021                 /* We found it - don't search again. */
1022                 goto unlock_continue;
1023         }
1024 #endif
1025
1026         for (i = 0; i < br_lck->num_locks; i++) {
1027                 struct lock_struct *lock = &locks[i];
1028
1029                 if (IS_PENDING_LOCK(lock->lock_type)) {
1030                         continue;
1031                 }
1032
1033                 /* Only remove our own locks that match in start, size, and flavour. */
1034                 if (brl_same_context(&lock->context, &plock->context) &&
1035                                         lock->fnum == plock->fnum &&
1036                                         lock->lock_flav == WINDOWS_LOCK &&
1037                                         lock->start == plock->start &&
1038                                         lock->size == plock->size ) {
1039                         deleted_lock_type = lock->lock_type;
1040                         break;
1041                 }
1042         }
1043
1044         if (i == br_lck->num_locks) {
1045                 /* we didn't find it */
1046                 return False;
1047         }
1048
1049 #if ZERO_ZERO
1050   unlock_continue:
1051 #endif
1052
1053         /* Actually delete the lock. */
1054         if (i < br_lck->num_locks - 1) {
1055                 memmove(&locks[i], &locks[i+1],
1056                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1057         }
1058
1059         br_lck->num_locks -= 1;
1060         br_lck->modified = True;
1061
1062         /* Unlock the underlying POSIX regions. */
1063         if(lp_posix_locking(br_lck->fsp->conn->params)) {
1064                 release_posix_lock_windows_flavour(br_lck->fsp,
1065                                 plock->start,
1066                                 plock->size,
1067                                 deleted_lock_type,
1068                                 &plock->context,
1069                                 locks,
1070                                 br_lck->num_locks);
1071         }
1072
1073         /* Send unlock messages to any pending waiters that overlap. */
1074         for (j=0; j < br_lck->num_locks; j++) {
1075                 struct lock_struct *pend_lock = &locks[j];
1076
1077                 /* Ignore non-pending locks. */
1078                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1079                         continue;
1080                 }
1081
1082                 /* We could send specific lock info here... */
1083                 if (brl_pending_overlap(plock, pend_lock)) {
1084                         DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
1085                                 procid_str_static(&pend_lock->context.pid )));
1086
1087                         messaging_send(msg_ctx, pend_lock->context.pid,
1088                                        MSG_SMB_UNLOCK, &data_blob_null);
1089                 }
1090         }
1091
1092         contend_level2_oplocks_end(br_lck->fsp, LEVEL2_CONTEND_WINDOWS_BRL);
1093         return True;
1094 }
1095
1096 /****************************************************************************
1097  Unlock a range of bytes - POSIX semantics.
1098 ****************************************************************************/
1099
1100 static bool brl_unlock_posix(struct messaging_context *msg_ctx,
1101                              struct byte_range_lock *br_lck,
1102                              struct lock_struct *plock)
1103 {
1104         unsigned int i, j, count;
1105         struct lock_struct *tp;
1106         struct lock_struct *locks = br_lck->lock_data;
1107         bool overlap_found = False;
1108
1109         /* No zero-zero locks for POSIX. */
1110         if (plock->start == 0 && plock->size == 0) {
1111                 return False;
1112         }
1113
1114         /* Don't allow 64-bit lock wrap. */
1115         if (plock->start + plock->size < plock->start ||
1116                         plock->start + plock->size < plock->size) {
1117                 DEBUG(10,("brl_unlock_posix: lock wrap\n"));
1118                 return False;
1119         }
1120
1121         /* The worst case scenario here is we have to split an
1122            existing POSIX lock range into two, so we need at most
1123            1 more entry. */
1124
1125         tp = talloc_array(br_lck, struct lock_struct, br_lck->num_locks + 1);
1126         if (!tp) {
1127                 DEBUG(10,("brl_unlock_posix: malloc fail\n"));
1128                 return False;
1129         }
1130
1131         count = 0;
1132         for (i = 0; i < br_lck->num_locks; i++) {
1133                 struct lock_struct *lock = &locks[i];
1134                 unsigned int tmp_count;
1135
1136                 /* Only remove our own locks - ignore fnum. */
1137                 if (IS_PENDING_LOCK(lock->lock_type) ||
1138                                 !brl_same_context(&lock->context, &plock->context)) {
1139                         memcpy(&tp[count], lock, sizeof(struct lock_struct));
1140                         count++;
1141                         continue;
1142                 }
1143
1144                 if (lock->lock_flav == WINDOWS_LOCK) {
1145                         /* Do any Windows flavour locks conflict ? */
1146                         if (brl_conflict(lock, plock)) {
1147                                 TALLOC_FREE(tp);
1148                                 return false;
1149                         }
1150                         /* Just copy the Windows lock into the new array. */
1151                         memcpy(&tp[count], lock, sizeof(struct lock_struct));
1152                         count++;
1153                         continue;
1154                 }
1155
1156                 /* Work out overlaps. */
1157                 tmp_count = brlock_posix_split_merge(&tp[count], lock, plock);
1158
1159                 if (tmp_count == 0) {
1160                         /* plock overlapped the existing lock completely,
1161                            or replaced it. Don't copy the existing lock. */
1162                         overlap_found = true;
1163                 } else if (tmp_count == 1) {
1164                         /* Either no overlap, (simple copy of existing lock) or
1165                          * an overlap of an existing lock. */
1166                         /* If the lock changed size, we had an overlap. */
1167                         if (tp[count].size != lock->size) {
1168                                 overlap_found = true;
1169                         }
1170                         count += tmp_count;
1171                 } else if (tmp_count == 2) {
1172                         /* We split a lock range in two. */
1173                         overlap_found = true;
1174                         count += tmp_count;
1175
1176                         /* Optimisation... */
1177                         /* We know we're finished here as we can't overlap any
1178                            more POSIX locks. Copy the rest of the lock array. */
1179
1180                         if (i < br_lck->num_locks - 1) {
1181                                 memcpy(&tp[count], &locks[i+1],
1182                                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1183                                 count += ((br_lck->num_locks-1) - i);
1184                         }
1185                         break;
1186                 }
1187
1188         }
1189
1190         if (!overlap_found) {
1191                 /* Just ignore - no change. */
1192                 TALLOC_FREE(tp);
1193                 DEBUG(10,("brl_unlock_posix: No overlap - unlocked.\n"));
1194                 return True;
1195         }
1196
1197         /* Unlock any POSIX regions. */
1198         if(lp_posix_locking(br_lck->fsp->conn->params)) {
1199                 release_posix_lock_posix_flavour(br_lck->fsp,
1200                                                 plock->start,
1201                                                 plock->size,
1202                                                 &plock->context,
1203                                                 tp,
1204                                                 count);
1205         }
1206
1207         /* Realloc so we don't leak entries per unlock call. */
1208         if (count) {
1209                 tp = talloc_realloc(br_lck, tp, struct lock_struct, count);
1210                 if (!tp) {
1211                         DEBUG(10,("brl_unlock_posix: realloc fail\n"));
1212                         return False;
1213                 }
1214         } else {
1215                 /* We deleted the last lock. */
1216                 TALLOC_FREE(tp);
1217                 tp = NULL;
1218         }
1219
1220         contend_level2_oplocks_end(br_lck->fsp,
1221                                    LEVEL2_CONTEND_POSIX_BRL);
1222
1223         br_lck->num_locks = count;
1224         TALLOC_FREE(br_lck->lock_data);
1225         locks = tp;
1226         br_lck->lock_data = tp;
1227         br_lck->modified = True;
1228
1229         /* Send unlock messages to any pending waiters that overlap. */
1230
1231         for (j=0; j < br_lck->num_locks; j++) {
1232                 struct lock_struct *pend_lock = &locks[j];
1233
1234                 /* Ignore non-pending locks. */
1235                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1236                         continue;
1237                 }
1238
1239                 /* We could send specific lock info here... */
1240                 if (brl_pending_overlap(plock, pend_lock)) {
1241                         DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
1242                                 procid_str_static(&pend_lock->context.pid )));
1243
1244                         messaging_send(msg_ctx, pend_lock->context.pid,
1245                                        MSG_SMB_UNLOCK, &data_blob_null);
1246                 }
1247         }
1248
1249         return True;
1250 }
1251
1252 bool smb_vfs_call_brl_unlock_windows(struct vfs_handle_struct *handle,
1253                                      struct messaging_context *msg_ctx,
1254                                      struct byte_range_lock *br_lck,
1255                                      const struct lock_struct *plock)
1256 {
1257         VFS_FIND(brl_unlock_windows);
1258         return handle->fns->brl_unlock_windows_fn(handle, msg_ctx, br_lck,
1259                                                   plock);
1260 }
1261
1262 /****************************************************************************
1263  Unlock a range of bytes.
1264 ****************************************************************************/
1265
1266 bool brl_unlock(struct messaging_context *msg_ctx,
1267                 struct byte_range_lock *br_lck,
1268                 uint64_t smblctx,
1269                 struct server_id pid,
1270                 br_off start,
1271                 br_off size,
1272                 enum brl_flavour lock_flav)
1273 {
1274         struct lock_struct lock;
1275
1276         lock.context.smblctx = smblctx;
1277         lock.context.pid = pid;
1278         lock.context.tid = br_lck->fsp->conn->cnum;
1279         lock.start = start;
1280         lock.size = size;
1281         lock.fnum = br_lck->fsp->fnum;
1282         lock.lock_type = UNLOCK_LOCK;
1283         lock.lock_flav = lock_flav;
1284
1285         if (lock_flav == WINDOWS_LOCK) {
1286                 return SMB_VFS_BRL_UNLOCK_WINDOWS(br_lck->fsp->conn, msg_ctx,
1287                     br_lck, &lock);
1288         } else {
1289                 return brl_unlock_posix(msg_ctx, br_lck, &lock);
1290         }
1291 }
1292
1293 /****************************************************************************
1294  Test if we could add a lock if we wanted to.
1295  Returns True if the region required is currently unlocked, False if locked.
1296 ****************************************************************************/
1297
1298 bool brl_locktest(struct byte_range_lock *br_lck,
1299                 uint64_t smblctx,
1300                 struct server_id pid,
1301                 br_off start,
1302                 br_off size,
1303                 enum brl_type lock_type,
1304                 enum brl_flavour lock_flav)
1305 {
1306         bool ret = True;
1307         unsigned int i;
1308         struct lock_struct lock;
1309         const struct lock_struct *locks = br_lck->lock_data;
1310         files_struct *fsp = br_lck->fsp;
1311
1312         lock.context.smblctx = smblctx;
1313         lock.context.pid = pid;
1314         lock.context.tid = br_lck->fsp->conn->cnum;
1315         lock.start = start;
1316         lock.size = size;
1317         lock.fnum = fsp->fnum;
1318         lock.lock_type = lock_type;
1319         lock.lock_flav = lock_flav;
1320
1321         /* Make sure existing locks don't conflict */
1322         for (i=0; i < br_lck->num_locks; i++) {
1323                 /*
1324                  * Our own locks don't conflict.
1325                  */
1326                 if (brl_conflict_other(&locks[i], &lock)) {
1327                         return False;
1328                 }
1329         }
1330
1331         /*
1332          * There is no lock held by an SMB daemon, check to
1333          * see if there is a POSIX lock from a UNIX or NFS process.
1334          * This only conflicts with Windows locks, not POSIX locks.
1335          */
1336
1337         if(lp_posix_locking(fsp->conn->params) && (lock_flav == WINDOWS_LOCK)) {
1338                 ret = is_posix_locked(fsp, &start, &size, &lock_type, WINDOWS_LOCK);
1339
1340                 DEBUG(10,("brl_locktest: posix start=%.0f len=%.0f %s for %s file %s\n",
1341                         (double)start, (double)size, ret ? "locked" : "unlocked",
1342                         fsp_fnum_dbg(fsp), fsp_str_dbg(fsp)));
1343
1344                 /* We need to return the inverse of is_posix_locked. */
1345                 ret = !ret;
1346         }
1347
1348         /* no conflicts - we could have added it */
1349         return ret;
1350 }
1351
1352 /****************************************************************************
1353  Query for existing locks.
1354 ****************************************************************************/
1355
1356 NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
1357                 uint64_t *psmblctx,
1358                 struct server_id pid,
1359                 br_off *pstart,
1360                 br_off *psize,
1361                 enum brl_type *plock_type,
1362                 enum brl_flavour lock_flav)
1363 {
1364         unsigned int i;
1365         struct lock_struct lock;
1366         const struct lock_struct *locks = br_lck->lock_data;
1367         files_struct *fsp = br_lck->fsp;
1368
1369         lock.context.smblctx = *psmblctx;
1370         lock.context.pid = pid;
1371         lock.context.tid = br_lck->fsp->conn->cnum;
1372         lock.start = *pstart;
1373         lock.size = *psize;
1374         lock.fnum = fsp->fnum;
1375         lock.lock_type = *plock_type;
1376         lock.lock_flav = lock_flav;
1377
1378         /* Make sure existing locks don't conflict */
1379         for (i=0; i < br_lck->num_locks; i++) {
1380                 const struct lock_struct *exlock = &locks[i];
1381                 bool conflict = False;
1382
1383                 if (exlock->lock_flav == WINDOWS_LOCK) {
1384                         conflict = brl_conflict(exlock, &lock);
1385                 } else {
1386                         conflict = brl_conflict_posix(exlock, &lock);
1387                 }
1388
1389                 if (conflict) {
1390                         *psmblctx = exlock->context.smblctx;
1391                         *pstart = exlock->start;
1392                         *psize = exlock->size;
1393                         *plock_type = exlock->lock_type;
1394                         return NT_STATUS_LOCK_NOT_GRANTED;
1395                 }
1396         }
1397
1398         /*
1399          * There is no lock held by an SMB daemon, check to
1400          * see if there is a POSIX lock from a UNIX or NFS process.
1401          */
1402
1403         if(lp_posix_locking(fsp->conn->params)) {
1404                 bool ret = is_posix_locked(fsp, pstart, psize, plock_type, POSIX_LOCK);
1405
1406                 DEBUG(10,("brl_lockquery: posix start=%.0f len=%.0f %s for %s file %s\n",
1407                         (double)*pstart, (double)*psize, ret ? "locked" : "unlocked",
1408                         fsp_fnum_dbg(fsp), fsp_str_dbg(fsp)));
1409
1410                 if (ret) {
1411                         /* Hmmm. No clue what to set smblctx to - use -1. */
1412                         *psmblctx = 0xFFFFFFFFFFFFFFFFLL;
1413                         return NT_STATUS_LOCK_NOT_GRANTED;
1414                 }
1415         }
1416
1417         return NT_STATUS_OK;
1418 }
1419
1420
1421 bool smb_vfs_call_brl_cancel_windows(struct vfs_handle_struct *handle,
1422                                      struct byte_range_lock *br_lck,
1423                                      struct lock_struct *plock,
1424                                      struct blocking_lock_record *blr)
1425 {
1426         VFS_FIND(brl_cancel_windows);
1427         return handle->fns->brl_cancel_windows_fn(handle, br_lck, plock, blr);
1428 }
1429
1430 /****************************************************************************
1431  Remove a particular pending lock.
1432 ****************************************************************************/
1433 bool brl_lock_cancel(struct byte_range_lock *br_lck,
1434                 uint64_t smblctx,
1435                 struct server_id pid,
1436                 br_off start,
1437                 br_off size,
1438                 enum brl_flavour lock_flav,
1439                 struct blocking_lock_record *blr)
1440 {
1441         bool ret;
1442         struct lock_struct lock;
1443
1444         lock.context.smblctx = smblctx;
1445         lock.context.pid = pid;
1446         lock.context.tid = br_lck->fsp->conn->cnum;
1447         lock.start = start;
1448         lock.size = size;
1449         lock.fnum = br_lck->fsp->fnum;
1450         lock.lock_flav = lock_flav;
1451         /* lock.lock_type doesn't matter */
1452
1453         if (lock_flav == WINDOWS_LOCK) {
1454                 ret = SMB_VFS_BRL_CANCEL_WINDOWS(br_lck->fsp->conn, br_lck,
1455                     &lock, blr);
1456         } else {
1457                 ret = brl_lock_cancel_default(br_lck, &lock);
1458         }
1459
1460         return ret;
1461 }
1462
1463 bool brl_lock_cancel_default(struct byte_range_lock *br_lck,
1464                 struct lock_struct *plock)
1465 {
1466         unsigned int i;
1467         struct lock_struct *locks = br_lck->lock_data;
1468
1469         SMB_ASSERT(plock);
1470
1471         for (i = 0; i < br_lck->num_locks; i++) {
1472                 struct lock_struct *lock = &locks[i];
1473
1474                 /* For pending locks we *always* care about the fnum. */
1475                 if (brl_same_context(&lock->context, &plock->context) &&
1476                                 lock->fnum == plock->fnum &&
1477                                 IS_PENDING_LOCK(lock->lock_type) &&
1478                                 lock->lock_flav == plock->lock_flav &&
1479                                 lock->start == plock->start &&
1480                                 lock->size == plock->size) {
1481                         break;
1482                 }
1483         }
1484
1485         if (i == br_lck->num_locks) {
1486                 /* Didn't find it. */
1487                 return False;
1488         }
1489
1490         if (i < br_lck->num_locks - 1) {
1491                 /* Found this particular pending lock - delete it */
1492                 memmove(&locks[i], &locks[i+1],
1493                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1494         }
1495
1496         br_lck->num_locks -= 1;
1497         br_lck->modified = True;
1498         return True;
1499 }
1500
1501 /****************************************************************************
1502  Remove any locks associated with a open file.
1503  We return True if this process owns any other Windows locks on this
1504  fd and so we should not immediately close the fd.
1505 ****************************************************************************/
1506
1507 void brl_close_fnum(struct messaging_context *msg_ctx,
1508                     struct byte_range_lock *br_lck)
1509 {
1510         files_struct *fsp = br_lck->fsp;
1511         uint32_t tid = fsp->conn->cnum;
1512         uint64_t fnum = fsp->fnum;
1513         unsigned int i;
1514         struct lock_struct *locks = br_lck->lock_data;
1515         struct server_id pid = messaging_server_id(fsp->conn->sconn->msg_ctx);
1516         struct lock_struct *locks_copy;
1517         unsigned int num_locks_copy;
1518
1519         /* Copy the current lock array. */
1520         if (br_lck->num_locks) {
1521                 locks_copy = (struct lock_struct *)talloc_memdup(br_lck, locks, br_lck->num_locks * sizeof(struct lock_struct));
1522                 if (!locks_copy) {
1523                         smb_panic("brl_close_fnum: talloc failed");
1524                         }
1525         } else {
1526                 locks_copy = NULL;
1527         }
1528
1529         num_locks_copy = br_lck->num_locks;
1530
1531         for (i=0; i < num_locks_copy; i++) {
1532                 struct lock_struct *lock = &locks_copy[i];
1533
1534                 if (lock->context.tid == tid && serverid_equal(&lock->context.pid, &pid) &&
1535                                 (lock->fnum == fnum)) {
1536                         brl_unlock(msg_ctx,
1537                                 br_lck,
1538                                 lock->context.smblctx,
1539                                 pid,
1540                                 lock->start,
1541                                 lock->size,
1542                                 lock->lock_flav);
1543                 }
1544         }
1545 }
1546
1547 bool brl_mark_disconnected(struct files_struct *fsp)
1548 {
1549         uint32_t tid = fsp->conn->cnum;
1550         uint64_t smblctx = fsp->op->global->open_persistent_id;
1551         uint64_t fnum = fsp->fnum;
1552         unsigned int i;
1553         struct server_id self = messaging_server_id(fsp->conn->sconn->msg_ctx);
1554         struct byte_range_lock *br_lck = NULL;
1555
1556         if (!fsp->op->global->durable) {
1557                 return false;
1558         }
1559
1560         if (fsp->current_lock_count == 0) {
1561                 return true;
1562         }
1563
1564         br_lck = brl_get_locks(talloc_tos(), fsp);
1565         if (br_lck == NULL) {
1566                 return false;
1567         }
1568
1569         for (i=0; i < br_lck->num_locks; i++) {
1570                 struct lock_struct *lock = &br_lck->lock_data[i];
1571
1572                 /*
1573                  * as this is a durable handle, we only expect locks
1574                  * of the current file handle!
1575                  */
1576
1577                 if (lock->context.smblctx != smblctx) {
1578                         TALLOC_FREE(br_lck);
1579                         return false;
1580                 }
1581
1582                 if (lock->context.tid != tid) {
1583                         TALLOC_FREE(br_lck);
1584                         return false;
1585                 }
1586
1587                 if (!serverid_equal(&lock->context.pid, &self)) {
1588                         TALLOC_FREE(br_lck);
1589                         return false;
1590                 }
1591
1592                 if (lock->fnum != fnum) {
1593                         TALLOC_FREE(br_lck);
1594                         return false;
1595                 }
1596
1597                 server_id_set_disconnected(&lock->context.pid);
1598                 lock->context.tid = TID_FIELD_INVALID;
1599                 lock->fnum = FNUM_FIELD_INVALID;
1600         }
1601
1602         br_lck->modified = true;
1603         TALLOC_FREE(br_lck);
1604         return true;
1605 }
1606
1607 bool brl_reconnect_disconnected(struct files_struct *fsp)
1608 {
1609         uint32_t tid = fsp->conn->cnum;
1610         uint64_t smblctx = fsp->op->global->open_persistent_id;
1611         uint64_t fnum = fsp->fnum;
1612         unsigned int i;
1613         struct server_id self = messaging_server_id(fsp->conn->sconn->msg_ctx);
1614         struct byte_range_lock *br_lck = NULL;
1615
1616         if (!fsp->op->global->durable) {
1617                 return false;
1618         }
1619
1620         /*
1621          * When reconnecting, we do not want to validate the brlock entries
1622          * and thereby remove our own (disconnected) entries but reactivate
1623          * them instead.
1624          */
1625         fsp->lockdb_clean = true;
1626
1627         br_lck = brl_get_locks(talloc_tos(), fsp);
1628         if (br_lck == NULL) {
1629                 return false;
1630         }
1631
1632         if (br_lck->num_locks == 0) {
1633                 TALLOC_FREE(br_lck);
1634                 return true;
1635         }
1636
1637         for (i=0; i < br_lck->num_locks; i++) {
1638                 struct lock_struct *lock = &br_lck->lock_data[i];
1639
1640                 /*
1641                  * as this is a durable handle we only expect locks
1642                  * of the current file handle!
1643                  */
1644
1645                 if (lock->context.smblctx != smblctx) {
1646                         TALLOC_FREE(br_lck);
1647                         return false;
1648                 }
1649
1650                 if (lock->context.tid != TID_FIELD_INVALID) {
1651                         TALLOC_FREE(br_lck);
1652                         return false;
1653                 }
1654
1655                 if (!server_id_is_disconnected(&lock->context.pid)) {
1656                         TALLOC_FREE(br_lck);
1657                         return false;
1658                 }
1659
1660                 if (lock->fnum != FNUM_FIELD_INVALID) {
1661                         TALLOC_FREE(br_lck);
1662                         return false;
1663                 }
1664
1665                 lock->context.pid = self;
1666                 lock->context.tid = tid;
1667                 lock->fnum = fnum;
1668         }
1669
1670         fsp->current_lock_count = br_lck->num_locks;
1671         br_lck->modified = true;
1672         TALLOC_FREE(br_lck);
1673         return true;
1674 }
1675
1676 /****************************************************************************
1677  Ensure this set of lock entries is valid.
1678 ****************************************************************************/
1679 static bool validate_lock_entries(TALLOC_CTX *mem_ctx,
1680                                   unsigned int *pnum_entries, struct lock_struct **pplocks,
1681                                   bool keep_disconnected)
1682 {
1683         unsigned int i;
1684         unsigned int num_valid_entries = 0;
1685         struct lock_struct *locks = *pplocks;
1686         TALLOC_CTX *frame = talloc_stackframe();
1687         struct server_id *ids;
1688         bool *exists;
1689
1690         ids = talloc_array(frame, struct server_id, *pnum_entries);
1691         if (ids == NULL) {
1692                 DEBUG(0, ("validate_lock_entries: "
1693                           "talloc_array(struct server_id, %u) failed\n",
1694                           *pnum_entries));
1695                 talloc_free(frame);
1696                 return false;
1697         }
1698
1699         exists = talloc_array(frame, bool, *pnum_entries);
1700         if (exists == NULL) {
1701                 DEBUG(0, ("validate_lock_entries: "
1702                           "talloc_array(bool, %u) failed\n",
1703                           *pnum_entries));
1704                 talloc_free(frame);
1705                 return false;
1706         }
1707
1708         for (i = 0; i < *pnum_entries; i++) {
1709                 ids[i] = locks[i].context.pid;
1710         }
1711
1712         if (!serverids_exist(ids, *pnum_entries, exists)) {
1713                 DEBUG(3, ("validate_lock_entries: serverids_exists failed\n"));
1714                 talloc_free(frame);
1715                 return false;
1716         }
1717
1718         for (i = 0; i < *pnum_entries; i++) {
1719                 if (exists[i]) {
1720                         num_valid_entries++;
1721                         continue;
1722                 }
1723
1724                 if (keep_disconnected &&
1725                     server_id_is_disconnected(&ids[i]))
1726                 {
1727                         num_valid_entries++;
1728                         continue;
1729                 }
1730
1731                 /* This process no longer exists - mark this
1732                    entry as invalid by zeroing it. */
1733                 ZERO_STRUCTP(&locks[i]);
1734         }
1735         TALLOC_FREE(frame);
1736
1737         if (num_valid_entries != *pnum_entries) {
1738                 struct lock_struct *new_lock_data = NULL;
1739
1740                 if (num_valid_entries) {
1741                         new_lock_data = talloc_array(
1742                                 mem_ctx, struct lock_struct,
1743                                 num_valid_entries);
1744                         if (!new_lock_data) {
1745                                 DEBUG(3, ("malloc fail\n"));
1746                                 return False;
1747                         }
1748
1749                         num_valid_entries = 0;
1750                         for (i = 0; i < *pnum_entries; i++) {
1751                                 struct lock_struct *lock_data = &locks[i];
1752                                 if (lock_data->context.smblctx &&
1753                                                 lock_data->context.tid) {
1754                                         /* Valid (nonzero) entry - copy it. */
1755                                         memcpy(&new_lock_data[num_valid_entries],
1756                                                 lock_data, sizeof(struct lock_struct));
1757                                         num_valid_entries++;
1758                                 }
1759                         }
1760                 }
1761
1762                 TALLOC_FREE(*pplocks);
1763                 *pplocks = new_lock_data;
1764                 *pnum_entries = num_valid_entries;
1765         }
1766
1767         return True;
1768 }
1769
1770 struct brl_forall_cb {
1771         void (*fn)(struct file_id id, struct server_id pid,
1772                    enum brl_type lock_type,
1773                    enum brl_flavour lock_flav,
1774                    br_off start, br_off size,
1775                    void *private_data);
1776         void *private_data;
1777 };
1778
1779 /****************************************************************************
1780  Traverse the whole database with this function, calling traverse_callback
1781  on each lock.
1782 ****************************************************************************/
1783
1784 static int brl_traverse_fn(struct db_record *rec, void *state)
1785 {
1786         struct brl_forall_cb *cb = (struct brl_forall_cb *)state;
1787         struct lock_struct *locks;
1788         struct file_id *key;
1789         unsigned int i;
1790         unsigned int num_locks = 0;
1791         unsigned int orig_num_locks = 0;
1792         TDB_DATA dbkey;
1793         TDB_DATA value;
1794
1795         dbkey = dbwrap_record_get_key(rec);
1796         value = dbwrap_record_get_value(rec);
1797
1798         /* In a traverse function we must make a copy of
1799            dbuf before modifying it. */
1800
1801         locks = (struct lock_struct *)talloc_memdup(
1802                 talloc_tos(), value.dptr, value.dsize);
1803         if (!locks) {
1804                 return -1; /* Terminate traversal. */
1805         }
1806
1807         key = (struct file_id *)dbkey.dptr;
1808         orig_num_locks = num_locks = value.dsize/sizeof(*locks);
1809
1810         /* Ensure the lock db is clean of entries from invalid processes. */
1811
1812         if (!validate_lock_entries(talloc_tos(), &num_locks, &locks, true)) {
1813                 TALLOC_FREE(locks);
1814                 return -1; /* Terminate traversal */
1815         }
1816
1817         if (orig_num_locks != num_locks) {
1818                 if (num_locks) {
1819                         TDB_DATA data;
1820                         data.dptr = (uint8_t *)locks;
1821                         data.dsize = num_locks*sizeof(struct lock_struct);
1822                         dbwrap_record_store(rec, data, TDB_REPLACE);
1823                 } else {
1824                         dbwrap_record_delete(rec);
1825                 }
1826         }
1827
1828         if (cb->fn) {
1829                 for ( i=0; i<num_locks; i++) {
1830                         cb->fn(*key,
1831                                 locks[i].context.pid,
1832                                 locks[i].lock_type,
1833                                 locks[i].lock_flav,
1834                                 locks[i].start,
1835                                 locks[i].size,
1836                                 cb->private_data);
1837                 }
1838         }
1839
1840         TALLOC_FREE(locks);
1841         return 0;
1842 }
1843
1844 /*******************************************************************
1845  Call the specified function on each lock in the database.
1846 ********************************************************************/
1847
1848 int brl_forall(void (*fn)(struct file_id id, struct server_id pid,
1849                           enum brl_type lock_type,
1850                           enum brl_flavour lock_flav,
1851                           br_off start, br_off size,
1852                           void *private_data),
1853                void *private_data)
1854 {
1855         struct brl_forall_cb cb;
1856         NTSTATUS status;
1857         int count = 0;
1858
1859         if (!brlock_db) {
1860                 return 0;
1861         }
1862         cb.fn = fn;
1863         cb.private_data = private_data;
1864         status = dbwrap_traverse(brlock_db, brl_traverse_fn, &cb, &count);
1865
1866         if (!NT_STATUS_IS_OK(status)) {
1867                 return -1;
1868         } else {
1869                 return count;
1870         }
1871 }
1872
1873 /*******************************************************************
1874  Store a potentially modified set of byte range lock data back into
1875  the database.
1876  Unlock the record.
1877 ********************************************************************/
1878
1879 static void byte_range_lock_flush(struct byte_range_lock *br_lck)
1880 {
1881         if (!br_lck->modified) {
1882                 goto done;
1883         }
1884
1885         if (br_lck->num_locks == 0) {
1886                 /* No locks - delete this entry. */
1887                 NTSTATUS status = dbwrap_record_delete(br_lck->record);
1888                 if (!NT_STATUS_IS_OK(status)) {
1889                         DEBUG(0, ("delete_rec returned %s\n",
1890                                   nt_errstr(status)));
1891                         smb_panic("Could not delete byte range lock entry");
1892                 }
1893         } else {
1894                 TDB_DATA data;
1895                 NTSTATUS status;
1896
1897                 data.dptr = (uint8 *)br_lck->lock_data;
1898                 data.dsize = br_lck->num_locks * sizeof(struct lock_struct);
1899
1900                 status = dbwrap_record_store(br_lck->record, data, TDB_REPLACE);
1901                 if (!NT_STATUS_IS_OK(status)) {
1902                         DEBUG(0, ("store returned %s\n", nt_errstr(status)));
1903                         smb_panic("Could not store byte range mode entry");
1904                 }
1905         }
1906
1907  done:
1908         br_lck->modified = false;
1909         TALLOC_FREE(br_lck->record);
1910 }
1911
1912 static int byte_range_lock_destructor(struct byte_range_lock *br_lck)
1913 {
1914         byte_range_lock_flush(br_lck);
1915         return 0;
1916 }
1917
1918 /*******************************************************************
1919  Fetch a set of byte range lock data from the database.
1920  Leave the record locked.
1921  TALLOC_FREE(brl) will release the lock in the destructor.
1922 ********************************************************************/
1923
1924 struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx, files_struct *fsp)
1925 {
1926         TDB_DATA key, data;
1927         struct byte_range_lock *br_lck = talloc(mem_ctx, struct byte_range_lock);
1928
1929         if (br_lck == NULL) {
1930                 return NULL;
1931         }
1932
1933         br_lck->fsp = fsp;
1934         br_lck->num_locks = 0;
1935         br_lck->modified = False;
1936
1937         key.dptr = (uint8 *)&fsp->file_id;
1938         key.dsize = sizeof(struct file_id);
1939
1940         br_lck->record = dbwrap_fetch_locked(brlock_db, br_lck, key);
1941
1942         if (br_lck->record == NULL) {
1943                 DEBUG(3, ("Could not lock byte range lock entry\n"));
1944                 TALLOC_FREE(br_lck);
1945                 return NULL;
1946         }
1947
1948         data = dbwrap_record_get_value(br_lck->record);
1949
1950         if ((data.dsize % sizeof(struct lock_struct)) != 0) {
1951                 DEBUG(3, ("Got invalid brlock data\n"));
1952                 TALLOC_FREE(br_lck);
1953                 return NULL;
1954         }
1955
1956         br_lck->lock_data = NULL;
1957
1958         talloc_set_destructor(br_lck, byte_range_lock_destructor);
1959
1960         br_lck->num_locks = data.dsize / sizeof(struct lock_struct);
1961
1962         if (br_lck->num_locks != 0) {
1963                 br_lck->lock_data = talloc_array(
1964                         br_lck, struct lock_struct, br_lck->num_locks);
1965                 if (br_lck->lock_data == NULL) {
1966                         DEBUG(0, ("malloc failed\n"));
1967                         TALLOC_FREE(br_lck);
1968                         return NULL;
1969                 }
1970
1971                 memcpy(br_lck->lock_data, data.dptr, data.dsize);
1972         }
1973
1974         if (!fsp->lockdb_clean) {
1975                 int orig_num_locks = br_lck->num_locks;
1976
1977                 /*
1978                  * This is the first time we access the byte range lock
1979                  * record with this fsp. Go through and ensure all entries
1980                  * are valid - remove any that don't.
1981                  * This makes the lockdb self cleaning at low cost.
1982                  *
1983                  * Note: Disconnected entries belong to disconnected
1984                  * durable handles. So at this point, we have a new
1985                  * handle on the file and the disconnected durable has
1986                  * already been closed (we are not a durable reconnect).
1987                  * So we need to clean the disconnected brl entry.
1988                  */
1989
1990                 if (!validate_lock_entries(br_lck, &br_lck->num_locks,
1991                                            &br_lck->lock_data, false)) {
1992                         TALLOC_FREE(br_lck);
1993                         return NULL;
1994                 }
1995
1996                 /* Ensure invalid locks are cleaned up in the destructor. */
1997                 if (orig_num_locks != br_lck->num_locks) {
1998                         br_lck->modified = True;
1999                 }
2000
2001                 /* Mark the lockdb as "clean" as seen from this open file. */
2002                 fsp->lockdb_clean = True;
2003         }
2004
2005         if (DEBUGLEVEL >= 10) {
2006                 unsigned int i;
2007                 struct lock_struct *locks = br_lck->lock_data;
2008                 DEBUG(10,("brl_get_locks_internal: %u current locks on file_id %s\n",
2009                         br_lck->num_locks,
2010                           file_id_string_tos(&fsp->file_id)));
2011                 for( i = 0; i < br_lck->num_locks; i++) {
2012                         print_lock_struct(i, &locks[i]);
2013                 }
2014         }
2015
2016         return br_lck;
2017 }
2018
2019 struct brl_get_locks_readonly_state {
2020         TALLOC_CTX *mem_ctx;
2021         struct byte_range_lock **br_lock;
2022 };
2023
2024 static void brl_get_locks_readonly_parser(TDB_DATA key, TDB_DATA data,
2025                                           void *private_data)
2026 {
2027         struct brl_get_locks_readonly_state *state =
2028                 (struct brl_get_locks_readonly_state *)private_data;
2029         struct byte_range_lock *br_lock;
2030
2031         br_lock = talloc_pooled_object(
2032                 state->mem_ctx, struct byte_range_lock, 1, data.dsize);
2033         if (br_lock == NULL) {
2034                 *state->br_lock = NULL;
2035                 return;
2036         }
2037         br_lock->lock_data = (struct lock_struct *)talloc_memdup(
2038                 br_lock, data.dptr, data.dsize);
2039         br_lock->num_locks = data.dsize / sizeof(struct lock_struct);
2040
2041         *state->br_lock = br_lock;
2042 }
2043
2044 struct byte_range_lock *brl_get_locks_readonly(files_struct *fsp)
2045 {
2046         struct byte_range_lock *br_lock = NULL;
2047         struct byte_range_lock *rw = NULL;
2048
2049         if ((fsp->brlock_rec != NULL)
2050             && (dbwrap_get_seqnum(brlock_db) == fsp->brlock_seqnum)) {
2051                 /*
2052                  * We have cached the brlock_rec and the database did not
2053                  * change.
2054                  */
2055                 return fsp->brlock_rec;
2056         }
2057
2058         if (!fsp->lockdb_clean) {
2059                 /*
2060                  * Fetch the record in R/W mode to give validate_lock_entries
2061                  * a chance to kick in once.
2062                  */
2063                 rw = brl_get_locks(talloc_tos(), fsp);
2064                 if (rw == NULL) {
2065                         return NULL;
2066                 }
2067                 fsp->lockdb_clean = true;
2068         }
2069
2070         if (rw != NULL) {
2071                 size_t lock_data_size;
2072
2073                 /*
2074                  * Make a copy of the already retrieved and sanitized rw record
2075                  */
2076                 lock_data_size = rw->num_locks * sizeof(struct lock_struct);
2077                 br_lock = talloc_pooled_object(
2078                         fsp, struct byte_range_lock, 1, lock_data_size);
2079                 if (br_lock == NULL) {
2080                         goto fail;
2081                 }
2082                 br_lock->num_locks = rw->num_locks;
2083                 br_lock->lock_data = (struct lock_struct *)talloc_memdup(
2084                         br_lock, rw->lock_data, lock_data_size);
2085         } else {
2086                 struct brl_get_locks_readonly_state state;
2087                 NTSTATUS status;
2088
2089                 /*
2090                  * Parse the record fresh from the database
2091                  */
2092
2093                 state.mem_ctx = fsp;
2094                 state.br_lock = &br_lock;
2095
2096                 status = dbwrap_parse_record(
2097                         brlock_db,
2098                         make_tdb_data((uint8_t *)&fsp->file_id,
2099                                       sizeof(fsp->file_id)),
2100                         brl_get_locks_readonly_parser, &state);
2101                 if (!NT_STATUS_IS_OK(status)) {
2102                         DEBUG(3, ("Could not parse byte range lock record: "
2103                                   "%s\n", nt_errstr(status)));
2104                         goto fail;
2105                 }
2106                 if (br_lock == NULL) {
2107                         goto fail;
2108                 }
2109         }
2110
2111         br_lock->fsp = fsp;
2112         br_lock->modified = false;
2113         br_lock->record = NULL;
2114
2115         if (lp_clustering()) {
2116                 /*
2117                  * In the cluster case we can't cache the brlock struct
2118                  * because dbwrap_get_seqnum does not work reliably over
2119                  * ctdb. Thus we have to throw away the brlock struct soon.
2120                  */
2121                 talloc_steal(talloc_tos(), br_lock);
2122         } else {
2123                 /*
2124                  * Cache the brlock struct, invalidated when the dbwrap_seqnum
2125                  * changes. See beginning of this routine.
2126                  */
2127                 TALLOC_FREE(fsp->brlock_rec);
2128                 fsp->brlock_rec = br_lock;
2129                 fsp->brlock_seqnum = dbwrap_get_seqnum(brlock_db);
2130         }
2131
2132 fail:
2133         TALLOC_FREE(rw);
2134         return br_lock;
2135 }
2136
2137 struct brl_revalidate_state {
2138         ssize_t array_size;
2139         uint32 num_pids;
2140         struct server_id *pids;
2141 };
2142
2143 /*
2144  * Collect PIDs of all processes with pending entries
2145  */
2146
2147 static void brl_revalidate_collect(struct file_id id, struct server_id pid,
2148                                    enum brl_type lock_type,
2149                                    enum brl_flavour lock_flav,
2150                                    br_off start, br_off size,
2151                                    void *private_data)
2152 {
2153         struct brl_revalidate_state *state =
2154                 (struct brl_revalidate_state *)private_data;
2155
2156         if (!IS_PENDING_LOCK(lock_type)) {
2157                 return;
2158         }
2159
2160         add_to_large_array(state, sizeof(pid), (void *)&pid,
2161                            &state->pids, &state->num_pids,
2162                            &state->array_size);
2163 }
2164
2165 /*
2166  * qsort callback to sort the processes
2167  */
2168
2169 static int compare_procids(const void *p1, const void *p2)
2170 {
2171         const struct server_id *i1 = (const struct server_id *)p1;
2172         const struct server_id *i2 = (const struct server_id *)p2;
2173
2174         if (i1->pid < i2->pid) return -1;
2175         if (i2->pid > i2->pid) return 1;
2176         return 0;
2177 }
2178
2179 /*
2180  * Send a MSG_SMB_UNLOCK message to all processes with pending byte range
2181  * locks so that they retry. Mainly used in the cluster code after a node has
2182  * died.
2183  *
2184  * Done in two steps to avoid double-sends: First we collect all entries in an
2185  * array, then qsort that array and only send to non-dupes.
2186  */
2187
2188 void brl_revalidate(struct messaging_context *msg_ctx,
2189                     void *private_data,
2190                     uint32_t msg_type,
2191                     struct server_id server_id,
2192                     DATA_BLOB *data)
2193 {
2194         struct brl_revalidate_state *state;
2195         uint32 i;
2196         struct server_id last_pid;
2197
2198         if (!(state = talloc_zero(NULL, struct brl_revalidate_state))) {
2199                 DEBUG(0, ("talloc failed\n"));
2200                 return;
2201         }
2202
2203         brl_forall(brl_revalidate_collect, state);
2204
2205         if (state->array_size == -1) {
2206                 DEBUG(0, ("talloc failed\n"));
2207                 goto done;
2208         }
2209
2210         if (state->num_pids == 0) {
2211                 goto done;
2212         }
2213
2214         TYPESAFE_QSORT(state->pids, state->num_pids, compare_procids);
2215
2216         ZERO_STRUCT(last_pid);
2217
2218         for (i=0; i<state->num_pids; i++) {
2219                 if (serverid_equal(&last_pid, &state->pids[i])) {
2220                         /*
2221                          * We've seen that one already
2222                          */
2223                         continue;
2224                 }
2225
2226                 messaging_send(msg_ctx, state->pids[i], MSG_SMB_UNLOCK,
2227                                &data_blob_null);
2228                 last_pid = state->pids[i];
2229         }
2230
2231  done:
2232         TALLOC_FREE(state);
2233         return;
2234 }
2235
2236 bool brl_cleanup_disconnected(struct file_id fid, uint64_t open_persistent_id)
2237 {
2238         bool ret = false;
2239         TALLOC_CTX *frame = talloc_stackframe();
2240         TDB_DATA key, val;
2241         struct db_record *rec;
2242         struct lock_struct *lock;
2243         unsigned n, num;
2244         NTSTATUS status;
2245
2246         key = make_tdb_data((void*)&fid, sizeof(fid));
2247
2248         rec = dbwrap_fetch_locked(brlock_db, frame, key);
2249         if (rec == NULL) {
2250                 DEBUG(5, ("brl_cleanup_disconnected: failed to fetch record "
2251                           "for file %s\n", file_id_string(frame, &fid)));
2252                 goto done;
2253         }
2254
2255         val = dbwrap_record_get_value(rec);
2256         lock = (struct lock_struct*)val.dptr;
2257         num = val.dsize / sizeof(struct lock_struct);
2258         if (lock == NULL) {
2259                 DEBUG(10, ("brl_cleanup_disconnected: no byte range locks for "
2260                            "file %s\n", file_id_string(frame, &fid)));
2261                 ret = true;
2262                 goto done;
2263         }
2264
2265         for (n=0; n<num; n++) {
2266                 struct lock_context *ctx = &lock[n].context;
2267
2268                 if (!server_id_is_disconnected(&ctx->pid)) {
2269                         DEBUG(5, ("brl_cleanup_disconnected: byte range lock "
2270                                   "%s used by server %s, do not cleanup\n",
2271                                   file_id_string(frame, &fid),
2272                                   server_id_str(frame, &ctx->pid)));
2273                         goto done;
2274                 }
2275
2276                 if (ctx->smblctx != open_persistent_id) {
2277                         DEBUG(5, ("brl_cleanup_disconnected: byte range lock "
2278                                   "%s expected smblctx %llu but found %llu"
2279                                   ", do not cleanup\n",
2280                                   file_id_string(frame, &fid),
2281                                   (unsigned long long)open_persistent_id,
2282                                   (unsigned long long)ctx->smblctx));
2283                         goto done;
2284                 }
2285         }
2286
2287         status = dbwrap_record_delete(rec);
2288         if (!NT_STATUS_IS_OK(status)) {
2289                 DEBUG(5, ("brl_cleanup_disconnected: failed to delete record "
2290                           "for file %s from %s, open %llu: %s\n",
2291                           file_id_string(frame, &fid), dbwrap_name(brlock_db),
2292                           (unsigned long long)open_persistent_id,
2293                           nt_errstr(status)));
2294                 goto done;
2295         }
2296
2297         DEBUG(10, ("brl_cleanup_disconnected: "
2298                    "file %s cleaned up %u entries from open %llu\n",
2299                    file_id_string(frame, &fid), num,
2300                    (unsigned long long)open_persistent_id));
2301
2302         ret = true;
2303 done:
2304         talloc_free(frame);
2305         return ret;
2306 }