Merge leftovers of 0e1a86bc845 in 3-0-ctdb
[nivanova/samba-autobuild/.git] / source3 / locking / brlock.c
1 /* 
2    Unix SMB/CIFS implementation.
3    byte range locking code
4    Updated to handle range splits/merges.
5
6    Copyright (C) Andrew Tridgell 1992-2000
7    Copyright (C) Jeremy Allison 1992-2000
8    
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 3 of the License, or
12    (at your option) any later version.
13    
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18    
19    You should have received a copy of the GNU General Public License
20    along with this program.  If not, see <http://www.gnu.org/licenses/>.
21 */
22
23 /* This module implements a tdb based byte range locking service,
24    replacing the fcntl() based byte range locking previously
25    used. This allows us to provide the same semantics as NT */
26
27 #include "includes.h"
28
29 #undef DBGC_CLASS
30 #define DBGC_CLASS DBGC_LOCKING
31
32 #define ZERO_ZERO 0
33
34 /* The open brlock.tdb database. */
35
36 static struct db_context *brlock_db;
37
38 /****************************************************************************
39  Debug info at level 10 for lock struct.
40 ****************************************************************************/
41
42 static void print_lock_struct(unsigned int i, struct lock_struct *pls)
43 {
44         DEBUG(10,("[%u]: smbpid = %u, tid = %u, pid = %u, ",
45                         i,
46                         (unsigned int)pls->context.smbpid,
47                         (unsigned int)pls->context.tid,
48                         (unsigned int)procid_to_pid(&pls->context.pid) ));
49         
50         DEBUG(10,("start = %.0f, size = %.0f, fnum = %d, %s %s\n",
51                 (double)pls->start,
52                 (double)pls->size,
53                 pls->fnum,
54                 lock_type_name(pls->lock_type),
55                 lock_flav_name(pls->lock_flav) ));
56 }
57
58 /****************************************************************************
59  See if two locking contexts are equal.
60 ****************************************************************************/
61
62 bool brl_same_context(const struct lock_context *ctx1, 
63                              const struct lock_context *ctx2)
64 {
65         return (procid_equal(&ctx1->pid, &ctx2->pid) &&
66                 (ctx1->smbpid == ctx2->smbpid) &&
67                 (ctx1->tid == ctx2->tid));
68 }
69
70 /****************************************************************************
71  See if lck1 and lck2 overlap.
72 ****************************************************************************/
73
74 static bool brl_overlap(const struct lock_struct *lck1,
75                         const struct lock_struct *lck2)
76 {
77         /* this extra check is not redundent - it copes with locks
78            that go beyond the end of 64 bit file space */
79         if (lck1->size != 0 &&
80             lck1->start == lck2->start &&
81             lck1->size == lck2->size) {
82                 return True;
83         }
84
85         if (lck1->start >= (lck2->start+lck2->size) ||
86             lck2->start >= (lck1->start+lck1->size)) {
87                 return False;
88         }
89         return True;
90 }
91
92 /****************************************************************************
93  See if lock2 can be added when lock1 is in place.
94 ****************************************************************************/
95
96 static bool brl_conflict(const struct lock_struct *lck1, 
97                          const struct lock_struct *lck2)
98 {
99         /* Ignore PENDING locks. */
100         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
101                 return False;
102
103         /* Read locks never conflict. */
104         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
105                 return False;
106         }
107
108         if (brl_same_context(&lck1->context, &lck2->context) &&
109             lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
110                 return False;
111         }
112
113         return brl_overlap(lck1, lck2);
114
115
116 /****************************************************************************
117  See if lock2 can be added when lock1 is in place - when both locks are POSIX
118  flavour. POSIX locks ignore fnum - they only care about dev/ino which we
119  know already match.
120 ****************************************************************************/
121
122 static bool brl_conflict_posix(const struct lock_struct *lck1, 
123                                 const struct lock_struct *lck2)
124 {
125 #if defined(DEVELOPER)
126         SMB_ASSERT(lck1->lock_flav == POSIX_LOCK);
127         SMB_ASSERT(lck2->lock_flav == POSIX_LOCK);
128 #endif
129
130         /* Ignore PENDING locks. */
131         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
132                 return False;
133
134         /* Read locks never conflict. */
135         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
136                 return False;
137         }
138
139         /* Locks on the same context con't conflict. Ignore fnum. */
140         if (brl_same_context(&lck1->context, &lck2->context)) {
141                 return False;
142         }
143
144         /* One is read, the other write, or the context is different,
145            do they overlap ? */
146         return brl_overlap(lck1, lck2);
147
148
149 #if ZERO_ZERO
150 static bool brl_conflict1(const struct lock_struct *lck1, 
151                          const struct lock_struct *lck2)
152 {
153         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
154                 return False;
155
156         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
157                 return False;
158         }
159
160         if (brl_same_context(&lck1->context, &lck2->context) &&
161             lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
162                 return False;
163         }
164
165         if (lck2->start == 0 && lck2->size == 0 && lck1->size != 0) {
166                 return True;
167         }
168
169         if (lck1->start >= (lck2->start + lck2->size) ||
170             lck2->start >= (lck1->start + lck1->size)) {
171                 return False;
172         }
173             
174         return True;
175
176 #endif
177
178 /****************************************************************************
179  Check to see if this lock conflicts, but ignore our own locks on the
180  same fnum only. This is the read/write lock check code path.
181  This is never used in the POSIX lock case.
182 ****************************************************************************/
183
184 static bool brl_conflict_other(const struct lock_struct *lck1, const struct lock_struct *lck2)
185 {
186         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
187                 return False;
188
189         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) 
190                 return False;
191
192         /* POSIX flavour locks never conflict here - this is only called
193            in the read/write path. */
194
195         if (lck1->lock_flav == POSIX_LOCK && lck2->lock_flav == POSIX_LOCK)
196                 return False;
197
198         /*
199          * Incoming WRITE locks conflict with existing READ locks even
200          * if the context is the same. JRA. See LOCKTEST7 in smbtorture.
201          */
202
203         if (!(lck2->lock_type == WRITE_LOCK && lck1->lock_type == READ_LOCK)) {
204                 if (brl_same_context(&lck1->context, &lck2->context) &&
205                                         lck1->fnum == lck2->fnum)
206                         return False;
207         }
208
209         return brl_overlap(lck1, lck2);
210
211
212 /****************************************************************************
213  Check if an unlock overlaps a pending lock.
214 ****************************************************************************/
215
216 static bool brl_pending_overlap(const struct lock_struct *lock, const struct lock_struct *pend_lock)
217 {
218         if ((lock->start <= pend_lock->start) && (lock->start + lock->size > pend_lock->start))
219                 return True;
220         if ((lock->start >= pend_lock->start) && (lock->start <= pend_lock->start + pend_lock->size))
221                 return True;
222         return False;
223 }
224
225 /****************************************************************************
226  Amazingly enough, w2k3 "remembers" whether the last lock failure on a fnum
227  is the same as this one and changes its error code. I wonder if any
228  app depends on this ?
229 ****************************************************************************/
230
231 static NTSTATUS brl_lock_failed(files_struct *fsp, const struct lock_struct *lock, bool blocking_lock)
232 {
233         if (lock->start >= 0xEF000000 && (lock->start >> 63) == 0) {
234                 /* amazing the little things you learn with a test
235                    suite. Locks beyond this offset (as a 64 bit
236                    number!) always generate the conflict error code,
237                    unless the top bit is set */
238                 if (!blocking_lock) {
239                         fsp->last_lock_failure = *lock;
240                 }
241                 return NT_STATUS_FILE_LOCK_CONFLICT;
242         }
243
244         if (procid_equal(&lock->context.pid, &fsp->last_lock_failure.context.pid) &&
245                         lock->context.tid == fsp->last_lock_failure.context.tid &&
246                         lock->fnum == fsp->last_lock_failure.fnum &&
247                         lock->start == fsp->last_lock_failure.start) {
248                 return NT_STATUS_FILE_LOCK_CONFLICT;
249         }
250
251         if (!blocking_lock) {
252                 fsp->last_lock_failure = *lock;
253         }
254         return NT_STATUS_LOCK_NOT_GRANTED;
255 }
256
257 /****************************************************************************
258  Open up the brlock.tdb database.
259 ****************************************************************************/
260
261 void brl_init(bool read_only)
262 {
263         if (brlock_db) {
264                 return;
265         }
266         brlock_db = db_open(NULL, lock_path("brlock.tdb"),
267                             lp_open_files_db_hash_size(),
268                             TDB_DEFAULT|TDB_VOLATILE|TDB_CLEAR_IF_FIRST,
269                             read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644 );
270         if (!brlock_db) {
271                 DEBUG(0,("Failed to open byte range locking database %s\n",
272                         lock_path("brlock.tdb")));
273                 return;
274         }
275 }
276
277 /****************************************************************************
278  Close down the brlock.tdb database.
279 ****************************************************************************/
280
281 void brl_shutdown(void)
282 {
283         TALLOC_FREE(brlock_db);
284 }
285
286 #if ZERO_ZERO
287 /****************************************************************************
288  Compare two locks for sorting.
289 ****************************************************************************/
290
291 static int lock_compare(const struct lock_struct *lck1, 
292                          const struct lock_struct *lck2)
293 {
294         if (lck1->start != lck2->start) {
295                 return (lck1->start - lck2->start);
296         }
297         if (lck2->size != lck1->size) {
298                 return ((int)lck1->size - (int)lck2->size);
299         }
300         return 0;
301 }
302 #endif
303
304 /****************************************************************************
305  Lock a range of bytes - Windows lock semantics.
306 ****************************************************************************/
307
308 static NTSTATUS brl_lock_windows(struct byte_range_lock *br_lck,
309                         struct lock_struct *plock, bool blocking_lock)
310 {
311         unsigned int i;
312         files_struct *fsp = br_lck->fsp;
313         struct lock_struct *locks = br_lck->lock_data;
314
315         for (i=0; i < br_lck->num_locks; i++) {
316                 /* Do any Windows or POSIX locks conflict ? */
317                 if (brl_conflict(&locks[i], plock)) {
318                         /* Remember who blocked us. */
319                         plock->context.smbpid = locks[i].context.smbpid;
320                         return brl_lock_failed(fsp,plock,blocking_lock);
321                 }
322 #if ZERO_ZERO
323                 if (plock->start == 0 && plock->size == 0 && 
324                                 locks[i].size == 0) {
325                         break;
326                 }
327 #endif
328         }
329
330         /* We can get the Windows lock, now see if it needs to
331            be mapped into a lower level POSIX one, and if so can
332            we get it ? */
333
334         if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(fsp->conn->params)) {
335                 int errno_ret;
336                 if (!set_posix_lock_windows_flavour(fsp,
337                                 plock->start,
338                                 plock->size,
339                                 plock->lock_type,
340                                 &plock->context,
341                                 locks,
342                                 br_lck->num_locks,
343                                 &errno_ret)) {
344
345                         /* We don't know who blocked us. */
346                         plock->context.smbpid = 0xFFFFFFFF;
347
348                         if (errno_ret == EACCES || errno_ret == EAGAIN) {
349                                 return NT_STATUS_FILE_LOCK_CONFLICT;
350                         } else {
351                                 return map_nt_error_from_unix(errno);
352                         }
353                 }
354         }
355
356         /* no conflicts - add it to the list of locks */
357         locks = (struct lock_struct *)SMB_REALLOC(locks, (br_lck->num_locks + 1) * sizeof(*locks));
358         if (!locks) {
359                 return NT_STATUS_NO_MEMORY;
360         }
361
362         memcpy(&locks[br_lck->num_locks], plock, sizeof(struct lock_struct));
363         br_lck->num_locks += 1;
364         br_lck->lock_data = locks;
365         br_lck->modified = True;
366
367         return NT_STATUS_OK;
368 }
369
370 /****************************************************************************
371  Cope with POSIX range splits and merges.
372 ****************************************************************************/
373
374 static unsigned int brlock_posix_split_merge(struct lock_struct *lck_arr,               /* Output array. */
375                                                 const struct lock_struct *ex,           /* existing lock. */
376                                                 const struct lock_struct *plock,        /* proposed lock. */
377                                                 bool *lock_was_added)
378 {
379         bool lock_types_differ = (ex->lock_type != plock->lock_type);
380
381         /* We can't merge non-conflicting locks on different context - ignore fnum. */
382
383         if (!brl_same_context(&ex->context, &plock->context)) {
384                 /* Just copy. */
385                 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
386                 return 1;
387         }
388
389         /* We now know we have the same context. */
390
391         /* Did we overlap ? */
392
393 /*********************************************
394                                              +---------+
395                                              | ex      |
396                                              +---------+
397                               +-------+
398                               | plock |
399                               +-------+
400 OR....
401              +---------+
402              |  ex     |
403              +---------+
404 **********************************************/
405
406         if ( (ex->start > (plock->start + plock->size)) ||
407                         (plock->start > (ex->start + ex->size))) {
408                 /* No overlap with this lock - copy existing. */
409                 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
410                 return 1;
411         }
412
413 /*********************************************
414         +---------------------------+
415         |          ex               |
416         +---------------------------+
417         +---------------------------+
418         |       plock               | -> replace with plock.
419         +---------------------------+
420 **********************************************/
421
422         if ( (ex->start >= plock->start) &&
423                         (ex->start + ex->size <= plock->start + plock->size) ) {
424                 memcpy(&lck_arr[0], plock, sizeof(struct lock_struct));
425                 *lock_was_added = True;
426                 return 1;
427         }
428
429 /*********************************************
430         +-----------------------+
431         |          ex           |
432         +-----------------------+
433         +---------------+
434         |   plock       |
435         +---------------+
436 OR....
437                         +-------+
438                         |  ex   |
439                         +-------+
440         +---------------+
441         |   plock       |
442         +---------------+
443
444 BECOMES....
445         +---------------+-------+
446         |   plock       | ex    | - different lock types.
447         +---------------+-------+
448 OR.... (merge)
449         +-----------------------+
450         |   ex                  | - same lock type.
451         +-----------------------+
452 **********************************************/
453
454         if ( (ex->start >= plock->start) &&
455                                 (ex->start <= plock->start + plock->size) &&
456                                 (ex->start + ex->size > plock->start + plock->size) ) {
457
458                 *lock_was_added = True;
459
460                 /* If the lock types are the same, we merge, if different, we
461                    add the new lock before the old. */
462
463                 if (lock_types_differ) {
464                         /* Add new. */
465                         memcpy(&lck_arr[0], plock, sizeof(struct lock_struct));
466                         memcpy(&lck_arr[1], ex, sizeof(struct lock_struct));
467                         /* Adjust existing start and size. */
468                         lck_arr[1].start = plock->start + plock->size;
469                         lck_arr[1].size = (ex->start + ex->size) - (plock->start + plock->size);
470                         return 2;
471                 } else {
472                         /* Merge. */
473                         memcpy(&lck_arr[0], plock, sizeof(struct lock_struct));
474                         /* Set new start and size. */
475                         lck_arr[0].start = plock->start;
476                         lck_arr[0].size = (ex->start + ex->size) - plock->start;
477                         return 1;
478                 }
479         }
480
481 /*********************************************
482    +-----------------------+
483    |  ex                   |
484    +-----------------------+
485            +---------------+
486            |   plock       |
487            +---------------+
488 OR....
489    +-------+        
490    |  ex   |
491    +-------+
492            +---------------+
493            |   plock       |
494            +---------------+
495 BECOMES....
496    +-------+---------------+
497    | ex    |   plock       | - different lock types
498    +-------+---------------+
499
500 OR.... (merge)
501    +-----------------------+
502    | ex                    | - same lock type.
503    +-----------------------+
504
505 **********************************************/
506
507         if ( (ex->start < plock->start) &&
508                         (ex->start + ex->size >= plock->start) &&
509                         (ex->start + ex->size <= plock->start + plock->size) ) {
510
511                 *lock_was_added = True;
512
513                 /* If the lock types are the same, we merge, if different, we
514                    add the new lock after the old. */
515
516                 if (lock_types_differ) {
517                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
518                         memcpy(&lck_arr[1], plock, sizeof(struct lock_struct));
519                         /* Adjust existing size. */
520                         lck_arr[0].size = plock->start - ex->start;
521                         return 2;
522                 } else {
523                         /* Merge. */
524                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
525                         /* Adjust existing size. */
526                         lck_arr[0].size = (plock->start + plock->size) - ex->start;
527                         return 1;
528                 }
529         }
530
531 /*********************************************
532         +---------------------------+
533         |        ex                 |
534         +---------------------------+
535                 +---------+
536                 |  plock  |
537                 +---------+
538 BECOMES.....
539         +-------+---------+---------+
540         | ex    |  plock  | ex      | - different lock types.
541         +-------+---------+---------+
542 OR
543         +---------------------------+
544         |        ex                 | - same lock type.
545         +---------------------------+
546 **********************************************/
547
548         if ( (ex->start < plock->start) && (ex->start + ex->size > plock->start + plock->size) ) {
549                 *lock_was_added = True;
550
551                 if (lock_types_differ) {
552
553                         /* We have to split ex into two locks here. */
554
555                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
556                         memcpy(&lck_arr[1], plock, sizeof(struct lock_struct));
557                         memcpy(&lck_arr[2], ex, sizeof(struct lock_struct));
558
559                         /* Adjust first existing size. */
560                         lck_arr[0].size = plock->start - ex->start;
561
562                         /* Adjust second existing start and size. */
563                         lck_arr[2].start = plock->start + plock->size;
564                         lck_arr[2].size = (ex->start + ex->size) - (plock->start + plock->size);
565                         return 3;
566                 } else {
567                         /* Just eat plock. */
568                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
569                         return 1;
570                 }
571         }
572
573         /* Never get here. */
574         smb_panic("brlock_posix_split_merge");
575         /* Notreached. */
576
577         /* Keep some compilers happy. */
578         return 0;
579 }
580
581 /****************************************************************************
582  Lock a range of bytes - POSIX lock semantics.
583  We must cope with range splits and merges.
584 ****************************************************************************/
585
586 static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
587                                struct byte_range_lock *br_lck,
588                                struct lock_struct *plock)
589 {
590         unsigned int i, count;
591         struct lock_struct *locks = br_lck->lock_data;
592         struct lock_struct *tp;
593         bool lock_was_added = False;
594         bool signal_pending_read = False;
595
596         /* No zero-zero locks for POSIX. */
597         if (plock->start == 0 && plock->size == 0) {
598                 return NT_STATUS_INVALID_PARAMETER;
599         }
600
601         /* Don't allow 64-bit lock wrap. */
602         if (plock->start + plock->size < plock->start ||
603                         plock->start + plock->size < plock->size) {
604                 return NT_STATUS_INVALID_PARAMETER;
605         }
606
607         /* The worst case scenario here is we have to split an
608            existing POSIX lock range into two, and add our lock,
609            so we need at most 2 more entries. */
610
611         tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 2));
612         if (!tp) {
613                 return NT_STATUS_NO_MEMORY;
614         }
615         
616         count = 0;
617         for (i=0; i < br_lck->num_locks; i++) {
618                 struct lock_struct *curr_lock = &locks[i];
619
620                 /* If we have a pending read lock, a lock downgrade should
621                    trigger a lock re-evaluation. */
622                 if (curr_lock->lock_type == PENDING_READ_LOCK &&
623                                 brl_pending_overlap(plock, curr_lock)) {
624                         signal_pending_read = True;
625                 }
626
627                 if (curr_lock->lock_flav == WINDOWS_LOCK) {
628                         /* Do any Windows flavour locks conflict ? */
629                         if (brl_conflict(curr_lock, plock)) {
630                                 /* No games with error messages. */
631                                 SAFE_FREE(tp);
632                                 /* Remember who blocked us. */
633                                 plock->context.smbpid = curr_lock->context.smbpid;
634                                 return NT_STATUS_FILE_LOCK_CONFLICT;
635                         }
636                         /* Just copy the Windows lock into the new array. */
637                         memcpy(&tp[count], curr_lock, sizeof(struct lock_struct));
638                         count++;
639                 } else {
640                         /* POSIX conflict semantics are different. */
641                         if (brl_conflict_posix(curr_lock, plock)) {
642                                 /* Can't block ourselves with POSIX locks. */
643                                 /* No games with error messages. */
644                                 SAFE_FREE(tp);
645                                 /* Remember who blocked us. */
646                                 plock->context.smbpid = curr_lock->context.smbpid;
647                                 return NT_STATUS_FILE_LOCK_CONFLICT;
648                         }
649
650                         /* Work out overlaps. */
651                         count += brlock_posix_split_merge(&tp[count], curr_lock, plock, &lock_was_added);
652                 }
653         }
654
655         if (!lock_was_added) {
656                 memcpy(&tp[count], plock, sizeof(struct lock_struct));
657                 count++;
658         }
659
660         /* We can get the POSIX lock, now see if it needs to
661            be mapped into a lower level POSIX one, and if so can
662            we get it ? */
663
664         if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(br_lck->fsp->conn->params)) {
665                 int errno_ret;
666
667                 /* The lower layer just needs to attempt to
668                    get the system POSIX lock. We've weeded out
669                    any conflicts above. */
670
671                 if (!set_posix_lock_posix_flavour(br_lck->fsp,
672                                 plock->start,
673                                 plock->size,
674                                 plock->lock_type,
675                                 &errno_ret)) {
676
677                         /* We don't know who blocked us. */
678                         plock->context.smbpid = 0xFFFFFFFF;
679
680                         if (errno_ret == EACCES || errno_ret == EAGAIN) {
681                                 SAFE_FREE(tp);
682                                 return NT_STATUS_FILE_LOCK_CONFLICT;
683                         } else {
684                                 SAFE_FREE(tp);
685                                 return map_nt_error_from_unix(errno);
686                         }
687                 }
688         }
689
690         /* Realloc so we don't leak entries per lock call. */
691         tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
692         if (!tp) {
693                 return NT_STATUS_NO_MEMORY;
694         }
695         br_lck->num_locks = count;
696         SAFE_FREE(br_lck->lock_data);
697         br_lck->lock_data = tp;
698         locks = tp;
699         br_lck->modified = True;
700
701         /* A successful downgrade from write to read lock can trigger a lock
702            re-evalutation where waiting readers can now proceed. */
703
704         if (signal_pending_read) {
705                 /* Send unlock messages to any pending read waiters that overlap. */
706                 for (i=0; i < br_lck->num_locks; i++) {
707                         struct lock_struct *pend_lock = &locks[i];
708
709                         /* Ignore non-pending locks. */
710                         if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
711                                 continue;
712                         }
713
714                         if (pend_lock->lock_type == PENDING_READ_LOCK &&
715                                         brl_pending_overlap(plock, pend_lock)) {
716                                 DEBUG(10,("brl_lock_posix: sending unlock message to pid %s\n",
717                                         procid_str_static(&pend_lock->context.pid )));
718
719                                 messaging_send(msg_ctx, pend_lock->context.pid,
720                                                MSG_SMB_UNLOCK, &data_blob_null);
721                         }
722                 }
723         }
724
725         return NT_STATUS_OK;
726 }
727
728 /****************************************************************************
729  Lock a range of bytes.
730 ****************************************************************************/
731
732 NTSTATUS brl_lock(struct messaging_context *msg_ctx,
733                 struct byte_range_lock *br_lck,
734                 uint32 smbpid,
735                 struct server_id pid,
736                 br_off start,
737                 br_off size, 
738                 enum brl_type lock_type,
739                 enum brl_flavour lock_flav,
740                 bool blocking_lock,
741                 uint32 *psmbpid)
742 {
743         NTSTATUS ret;
744         struct lock_struct lock;
745
746 #if !ZERO_ZERO
747         if (start == 0 && size == 0) {
748                 DEBUG(0,("client sent 0/0 lock - please report this\n"));
749         }
750 #endif
751
752         lock.context.smbpid = smbpid;
753         lock.context.pid = pid;
754         lock.context.tid = br_lck->fsp->conn->cnum;
755         lock.start = start;
756         lock.size = size;
757         lock.fnum = br_lck->fsp->fnum;
758         lock.lock_type = lock_type;
759         lock.lock_flav = lock_flav;
760
761         if (lock_flav == WINDOWS_LOCK) {
762                 ret = brl_lock_windows(br_lck, &lock, blocking_lock);
763         } else {
764                 ret = brl_lock_posix(msg_ctx, br_lck, &lock);
765         }
766
767 #if ZERO_ZERO
768         /* sort the lock list */
769         qsort(br_lck->lock_data, (size_t)br_lck->num_locks, sizeof(lock), lock_compare);
770 #endif
771
772         /* If we're returning an error, return who blocked us. */
773         if (!NT_STATUS_IS_OK(ret) && psmbpid) {
774                 *psmbpid = lock.context.smbpid;
775         }
776         return ret;
777 }
778
779 /****************************************************************************
780  Unlock a range of bytes - Windows semantics.
781 ****************************************************************************/
782
783 static bool brl_unlock_windows(struct messaging_context *msg_ctx,
784                                struct byte_range_lock *br_lck,
785                                const struct lock_struct *plock)
786 {
787         unsigned int i, j;
788         struct lock_struct *locks = br_lck->lock_data;
789         enum brl_type deleted_lock_type = READ_LOCK; /* shut the compiler up.... */
790
791 #if ZERO_ZERO
792         /* Delete write locks by preference... The lock list
793            is sorted in the zero zero case. */
794
795         for (i = 0; i < br_lck->num_locks; i++) {
796                 struct lock_struct *lock = &locks[i];
797
798                 if (lock->lock_type == WRITE_LOCK &&
799                     brl_same_context(&lock->context, &plock->context) &&
800                     lock->fnum == plock->fnum &&
801                     lock->lock_flav == WINDOWS_LOCK &&
802                     lock->start == plock->start &&
803                     lock->size == plock->size) {
804
805                         /* found it - delete it */
806                         deleted_lock_type = lock->lock_type;
807                         break;
808                 }
809         }
810
811         if (i != br_lck->num_locks) {
812                 /* We found it - don't search again. */
813                 goto unlock_continue;
814         }
815 #endif
816
817         for (i = 0; i < br_lck->num_locks; i++) {
818                 struct lock_struct *lock = &locks[i];
819
820                 /* Only remove our own locks that match in start, size, and flavour. */
821                 if (brl_same_context(&lock->context, &plock->context) &&
822                                         lock->fnum == plock->fnum &&
823                                         lock->lock_flav == WINDOWS_LOCK &&
824                                         lock->start == plock->start &&
825                                         lock->size == plock->size ) {
826                         deleted_lock_type = lock->lock_type;
827                         break;
828                 }
829         }
830
831         if (i == br_lck->num_locks) {
832                 /* we didn't find it */
833                 return False;
834         }
835
836 #if ZERO_ZERO
837   unlock_continue:
838 #endif
839
840         /* Actually delete the lock. */
841         if (i < br_lck->num_locks - 1) {
842                 memmove(&locks[i], &locks[i+1], 
843                         sizeof(*locks)*((br_lck->num_locks-1) - i));
844         }
845
846         br_lck->num_locks -= 1;
847         br_lck->modified = True;
848
849         /* Unlock the underlying POSIX regions. */
850         if(lp_posix_locking(br_lck->fsp->conn->params)) {
851                 release_posix_lock_windows_flavour(br_lck->fsp,
852                                 plock->start,
853                                 plock->size,
854                                 deleted_lock_type,
855                                 &plock->context,
856                                 locks,
857                                 br_lck->num_locks);
858         }
859
860         /* Send unlock messages to any pending waiters that overlap. */
861         for (j=0; j < br_lck->num_locks; j++) {
862                 struct lock_struct *pend_lock = &locks[j];
863
864                 /* Ignore non-pending locks. */
865                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
866                         continue;
867                 }
868
869                 /* We could send specific lock info here... */
870                 if (brl_pending_overlap(plock, pend_lock)) {
871                         DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
872                                 procid_str_static(&pend_lock->context.pid )));
873
874                         messaging_send(msg_ctx, pend_lock->context.pid,
875                                        MSG_SMB_UNLOCK, &data_blob_null);
876                 }
877         }
878
879         return True;
880 }
881
882 /****************************************************************************
883  Unlock a range of bytes - POSIX semantics.
884 ****************************************************************************/
885
886 static bool brl_unlock_posix(struct messaging_context *msg_ctx,
887                              struct byte_range_lock *br_lck,
888                              const struct lock_struct *plock)
889 {
890         unsigned int i, j, count;
891         struct lock_struct *tp;
892         struct lock_struct *locks = br_lck->lock_data;
893         bool overlap_found = False;
894
895         /* No zero-zero locks for POSIX. */
896         if (plock->start == 0 && plock->size == 0) {
897                 return False;
898         }
899
900         /* Don't allow 64-bit lock wrap. */
901         if (plock->start + plock->size < plock->start ||
902                         plock->start + plock->size < plock->size) {
903                 DEBUG(10,("brl_unlock_posix: lock wrap\n"));
904                 return False;
905         }
906
907         /* The worst case scenario here is we have to split an
908            existing POSIX lock range into two, so we need at most
909            1 more entry. */
910
911         tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 1));
912         if (!tp) {
913                 DEBUG(10,("brl_unlock_posix: malloc fail\n"));
914                 return False;
915         }
916
917         count = 0;
918         for (i = 0; i < br_lck->num_locks; i++) {
919                 struct lock_struct *lock = &locks[i];
920                 struct lock_struct tmp_lock[3];
921                 bool lock_was_added = False;
922                 unsigned int tmp_count;
923
924                 /* Only remove our own locks - ignore fnum. */
925                 if (IS_PENDING_LOCK(lock->lock_type) ||
926                                 !brl_same_context(&lock->context, &plock->context)) {
927                         memcpy(&tp[count], lock, sizeof(struct lock_struct));
928                         count++;
929                         continue;
930                 }
931
932                 /* Work out overlaps. */
933                 tmp_count = brlock_posix_split_merge(&tmp_lock[0], &locks[i], plock, &lock_was_added);
934
935                 if (tmp_count == 1) {
936                         /* Ether the locks didn't overlap, or the unlock completely
937                            overlapped this lock. If it didn't overlap, then there's
938                            no change in the locks. */
939                         if (tmp_lock[0].lock_type != UNLOCK_LOCK) {
940                                 SMB_ASSERT(tmp_lock[0].lock_type == locks[i].lock_type);
941                                 /* No change in this lock. */
942                                 memcpy(&tp[count], &tmp_lock[0], sizeof(struct lock_struct));
943                                 count++;
944                         } else {
945                                 SMB_ASSERT(tmp_lock[0].lock_type == UNLOCK_LOCK);
946                                 overlap_found = True;
947                         }
948                         continue;
949                 } else if (tmp_count == 2) {
950                         /* The unlock overlapped an existing lock. Copy the truncated
951                            lock into the lock array. */
952                         if (tmp_lock[0].lock_type != UNLOCK_LOCK) {
953                                 SMB_ASSERT(tmp_lock[0].lock_type == locks[i].lock_type);
954                                 SMB_ASSERT(tmp_lock[1].lock_type == UNLOCK_LOCK);
955                                 memcpy(&tp[count], &tmp_lock[0], sizeof(struct lock_struct));
956                                 if (tmp_lock[0].size != locks[i].size) {
957                                         overlap_found = True;
958                                 }
959                         } else {
960                                 SMB_ASSERT(tmp_lock[0].lock_type == UNLOCK_LOCK);
961                                 SMB_ASSERT(tmp_lock[1].lock_type == locks[i].lock_type);
962                                 memcpy(&tp[count], &tmp_lock[1], sizeof(struct lock_struct));
963                                 if (tmp_lock[1].start != locks[i].start) {
964                                         overlap_found = True;
965                                 }
966                         }
967                         count++;
968                         continue;
969                 } else {
970                         /* tmp_count == 3 - (we split a lock range in two). */
971                         SMB_ASSERT(tmp_lock[0].lock_type == locks[i].lock_type);
972                         SMB_ASSERT(tmp_lock[1].lock_type == UNLOCK_LOCK);
973                         SMB_ASSERT(tmp_lock[2].lock_type == locks[i].lock_type);
974
975                         memcpy(&tp[count], &tmp_lock[0], sizeof(struct lock_struct));
976                         count++;
977                         memcpy(&tp[count], &tmp_lock[2], sizeof(struct lock_struct));
978                         count++;
979                         overlap_found = True;
980                         /* Optimisation... */
981                         /* We know we're finished here as we can't overlap any
982                            more POSIX locks. Copy the rest of the lock array. */
983                         if (i < br_lck->num_locks - 1) {
984                                 memcpy(&tp[count], &locks[i+1], 
985                                         sizeof(*locks)*((br_lck->num_locks-1) - i));
986                                 count += ((br_lck->num_locks-1) - i);
987                         }
988                         break;
989                 }
990         }
991
992         if (!overlap_found) {
993                 /* Just ignore - no change. */
994                 SAFE_FREE(tp);
995                 DEBUG(10,("brl_unlock_posix: No overlap - unlocked.\n"));
996                 return True;
997         }
998
999         /* Unlock any POSIX regions. */
1000         if(lp_posix_locking(br_lck->fsp->conn->params)) {
1001                 release_posix_lock_posix_flavour(br_lck->fsp,
1002                                                 plock->start,
1003                                                 plock->size,
1004                                                 &plock->context,
1005                                                 tp,
1006                                                 count);
1007         }
1008
1009         /* Realloc so we don't leak entries per unlock call. */
1010         if (count) {
1011                 tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
1012                 if (!tp) {
1013                         DEBUG(10,("brl_unlock_posix: realloc fail\n"));
1014                         return False;
1015                 }
1016         } else {
1017                 /* We deleted the last lock. */
1018                 SAFE_FREE(tp);
1019                 tp = NULL;
1020         }
1021
1022         br_lck->num_locks = count;
1023         SAFE_FREE(br_lck->lock_data);
1024         locks = tp;
1025         br_lck->lock_data = tp;
1026         br_lck->modified = True;
1027
1028         /* Send unlock messages to any pending waiters that overlap. */
1029
1030         for (j=0; j < br_lck->num_locks; j++) {
1031                 struct lock_struct *pend_lock = &locks[j];
1032
1033                 /* Ignore non-pending locks. */
1034                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1035                         continue;
1036                 }
1037
1038                 /* We could send specific lock info here... */
1039                 if (brl_pending_overlap(plock, pend_lock)) {
1040                         DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
1041                                 procid_str_static(&pend_lock->context.pid )));
1042
1043                         messaging_send(msg_ctx, pend_lock->context.pid,
1044                                        MSG_SMB_UNLOCK, &data_blob_null);
1045                 }
1046         }
1047
1048         return True;
1049 }
1050
1051 /****************************************************************************
1052  Unlock a range of bytes.
1053 ****************************************************************************/
1054
1055 bool brl_unlock(struct messaging_context *msg_ctx,
1056                 struct byte_range_lock *br_lck,
1057                 uint32 smbpid,
1058                 struct server_id pid,
1059                 br_off start,
1060                 br_off size,
1061                 enum brl_flavour lock_flav)
1062 {
1063         struct lock_struct lock;
1064
1065         lock.context.smbpid = smbpid;
1066         lock.context.pid = pid;
1067         lock.context.tid = br_lck->fsp->conn->cnum;
1068         lock.start = start;
1069         lock.size = size;
1070         lock.fnum = br_lck->fsp->fnum;
1071         lock.lock_type = UNLOCK_LOCK;
1072         lock.lock_flav = lock_flav;
1073
1074         if (lock_flav == WINDOWS_LOCK) {
1075                 return brl_unlock_windows(msg_ctx, br_lck, &lock);
1076         } else {
1077                 return brl_unlock_posix(msg_ctx, br_lck, &lock);
1078         }
1079 }
1080
1081 /****************************************************************************
1082  Test if we could add a lock if we wanted to.
1083  Returns True if the region required is currently unlocked, False if locked.
1084 ****************************************************************************/
1085
1086 bool brl_locktest(struct byte_range_lock *br_lck,
1087                 uint32 smbpid,
1088                 struct server_id pid,
1089                 br_off start,
1090                 br_off size, 
1091                 enum brl_type lock_type,
1092                 enum brl_flavour lock_flav)
1093 {
1094         bool ret = True;
1095         unsigned int i;
1096         struct lock_struct lock;
1097         const struct lock_struct *locks = br_lck->lock_data;
1098         files_struct *fsp = br_lck->fsp;
1099
1100         lock.context.smbpid = smbpid;
1101         lock.context.pid = pid;
1102         lock.context.tid = br_lck->fsp->conn->cnum;
1103         lock.start = start;
1104         lock.size = size;
1105         lock.fnum = fsp->fnum;
1106         lock.lock_type = lock_type;
1107         lock.lock_flav = lock_flav;
1108
1109         /* Make sure existing locks don't conflict */
1110         for (i=0; i < br_lck->num_locks; i++) {
1111                 /*
1112                  * Our own locks don't conflict.
1113                  */
1114                 if (brl_conflict_other(&locks[i], &lock)) {
1115                         return False;
1116                 }
1117         }
1118
1119         /*
1120          * There is no lock held by an SMB daemon, check to
1121          * see if there is a POSIX lock from a UNIX or NFS process.
1122          * This only conflicts with Windows locks, not POSIX locks.
1123          */
1124
1125         if(lp_posix_locking(fsp->conn->params) && (lock_flav == WINDOWS_LOCK)) {
1126                 ret = is_posix_locked(fsp, &start, &size, &lock_type, WINDOWS_LOCK);
1127
1128                 DEBUG(10,("brl_locktest: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
1129                         (double)start, (double)size, ret ? "locked" : "unlocked",
1130                         fsp->fnum, fsp->fsp_name ));
1131
1132                 /* We need to return the inverse of is_posix_locked. */
1133                 ret = !ret;
1134         }
1135
1136         /* no conflicts - we could have added it */
1137         return ret;
1138 }
1139
1140 /****************************************************************************
1141  Query for existing locks.
1142 ****************************************************************************/
1143
1144 NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
1145                 uint32 *psmbpid,
1146                 struct server_id pid,
1147                 br_off *pstart,
1148                 br_off *psize, 
1149                 enum brl_type *plock_type,
1150                 enum brl_flavour lock_flav)
1151 {
1152         unsigned int i;
1153         struct lock_struct lock;
1154         const struct lock_struct *locks = br_lck->lock_data;
1155         files_struct *fsp = br_lck->fsp;
1156
1157         lock.context.smbpid = *psmbpid;
1158         lock.context.pid = pid;
1159         lock.context.tid = br_lck->fsp->conn->cnum;
1160         lock.start = *pstart;
1161         lock.size = *psize;
1162         lock.fnum = fsp->fnum;
1163         lock.lock_type = *plock_type;
1164         lock.lock_flav = lock_flav;
1165
1166         /* Make sure existing locks don't conflict */
1167         for (i=0; i < br_lck->num_locks; i++) {
1168                 const struct lock_struct *exlock = &locks[i];
1169                 bool conflict = False;
1170
1171                 if (exlock->lock_flav == WINDOWS_LOCK) {
1172                         conflict = brl_conflict(exlock, &lock);
1173                 } else {        
1174                         conflict = brl_conflict_posix(exlock, &lock);
1175                 }
1176
1177                 if (conflict) {
1178                         *psmbpid = exlock->context.smbpid;
1179                         *pstart = exlock->start;
1180                         *psize = exlock->size;
1181                         *plock_type = exlock->lock_type;
1182                         return NT_STATUS_LOCK_NOT_GRANTED;
1183                 }
1184         }
1185
1186         /*
1187          * There is no lock held by an SMB daemon, check to
1188          * see if there is a POSIX lock from a UNIX or NFS process.
1189          */
1190
1191         if(lp_posix_locking(fsp->conn->params)) {
1192                 bool ret = is_posix_locked(fsp, pstart, psize, plock_type, POSIX_LOCK);
1193
1194                 DEBUG(10,("brl_lockquery: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
1195                         (double)*pstart, (double)*psize, ret ? "locked" : "unlocked",
1196                         fsp->fnum, fsp->fsp_name ));
1197
1198                 if (ret) {
1199                         /* Hmmm. No clue what to set smbpid to - use -1. */
1200                         *psmbpid = 0xFFFF;
1201                         return NT_STATUS_LOCK_NOT_GRANTED;
1202                 }
1203         }
1204
1205         return NT_STATUS_OK;
1206 }
1207
1208 /****************************************************************************
1209  Remove a particular pending lock.
1210 ****************************************************************************/
1211
1212 bool brl_lock_cancel(struct byte_range_lock *br_lck,
1213                 uint32 smbpid,
1214                 struct server_id pid,
1215                 br_off start,
1216                 br_off size,
1217                 enum brl_flavour lock_flav)
1218 {
1219         unsigned int i;
1220         struct lock_struct *locks = br_lck->lock_data;
1221         struct lock_context context;
1222
1223         context.smbpid = smbpid;
1224         context.pid = pid;
1225         context.tid = br_lck->fsp->conn->cnum;
1226
1227         for (i = 0; i < br_lck->num_locks; i++) {
1228                 struct lock_struct *lock = &locks[i];
1229
1230                 /* For pending locks we *always* care about the fnum. */
1231                 if (brl_same_context(&lock->context, &context) &&
1232                                 lock->fnum == br_lck->fsp->fnum &&
1233                                 IS_PENDING_LOCK(lock->lock_type) &&
1234                                 lock->lock_flav == lock_flav &&
1235                                 lock->start == start &&
1236                                 lock->size == size) {
1237                         break;
1238                 }
1239         }
1240
1241         if (i == br_lck->num_locks) {
1242                 /* Didn't find it. */
1243                 return False;
1244         }
1245
1246         if (i < br_lck->num_locks - 1) {
1247                 /* Found this particular pending lock - delete it */
1248                 memmove(&locks[i], &locks[i+1], 
1249                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1250         }
1251
1252         br_lck->num_locks -= 1;
1253         br_lck->modified = True;
1254         return True;
1255 }
1256
1257 /****************************************************************************
1258  Remove any locks associated with a open file.
1259  We return True if this process owns any other Windows locks on this
1260  fd and so we should not immediately close the fd.
1261 ****************************************************************************/
1262
1263 void brl_close_fnum(struct messaging_context *msg_ctx,
1264                     struct byte_range_lock *br_lck)
1265 {
1266         files_struct *fsp = br_lck->fsp;
1267         uint16 tid = fsp->conn->cnum;
1268         int fnum = fsp->fnum;
1269         unsigned int i, j, dcount=0;
1270         int num_deleted_windows_locks = 0;
1271         struct lock_struct *locks = br_lck->lock_data;
1272         struct server_id pid = procid_self();
1273         bool unlock_individually = False;
1274
1275         if(lp_posix_locking(fsp->conn->params)) {
1276
1277                 /* Check if there are any Windows locks associated with this dev/ino
1278                    pair that are not this fnum. If so we need to call unlock on each
1279                    one in order to release the system POSIX locks correctly. */
1280
1281                 for (i=0; i < br_lck->num_locks; i++) {
1282                         struct lock_struct *lock = &locks[i];
1283
1284                         if (!procid_equal(&lock->context.pid, &pid)) {
1285                                 continue;
1286                         }
1287
1288                         if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
1289                                 continue; /* Ignore pending. */
1290                         }
1291
1292                         if (lock->context.tid != tid || lock->fnum != fnum) {
1293                                 unlock_individually = True;
1294                                 break;
1295                         }
1296                 }
1297
1298                 if (unlock_individually) {
1299                         struct lock_struct *locks_copy;
1300                         unsigned int num_locks_copy;
1301
1302                         /* Copy the current lock array. */
1303                         if (br_lck->num_locks) {
1304                                 locks_copy = (struct lock_struct *)TALLOC_MEMDUP(br_lck, locks, br_lck->num_locks * sizeof(struct lock_struct));
1305                                 if (!locks_copy) {
1306                                         smb_panic("brl_close_fnum: talloc failed");
1307                                 }
1308                         } else {        
1309                                 locks_copy = NULL;
1310                         }
1311
1312                         num_locks_copy = br_lck->num_locks;
1313
1314                         for (i=0; i < num_locks_copy; i++) {
1315                                 struct lock_struct *lock = &locks_copy[i];
1316
1317                                 if (lock->context.tid == tid && procid_equal(&lock->context.pid, &pid) &&
1318                                                 (lock->fnum == fnum)) {
1319                                         brl_unlock(msg_ctx,
1320                                                 br_lck,
1321                                                 lock->context.smbpid,
1322                                                 pid,
1323                                                 lock->start,
1324                                                 lock->size,
1325                                                 lock->lock_flav);
1326                                 }
1327                         }
1328                         return;
1329                 }
1330         }
1331
1332         /* We can bulk delete - any POSIX locks will be removed when the fd closes. */
1333
1334         /* Remove any existing locks for this fnum (or any fnum if they're POSIX). */
1335
1336         for (i=0; i < br_lck->num_locks; i++) {
1337                 struct lock_struct *lock = &locks[i];
1338                 bool del_this_lock = False;
1339
1340                 if (lock->context.tid == tid && procid_equal(&lock->context.pid, &pid)) {
1341                         if ((lock->lock_flav == WINDOWS_LOCK) && (lock->fnum == fnum)) {
1342                                 del_this_lock = True;
1343                                 num_deleted_windows_locks++;
1344                         } else if (lock->lock_flav == POSIX_LOCK) {
1345                                 del_this_lock = True;
1346                         }
1347                 }
1348
1349                 if (del_this_lock) {
1350                         /* Send unlock messages to any pending waiters that overlap. */
1351                         for (j=0; j < br_lck->num_locks; j++) {
1352                                 struct lock_struct *pend_lock = &locks[j];
1353
1354                                 /* Ignore our own or non-pending locks. */
1355                                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1356                                         continue;
1357                                 }
1358
1359                                 /* Optimisation - don't send to this fnum as we're
1360                                    closing it. */
1361                                 if (pend_lock->context.tid == tid &&
1362                                     procid_equal(&pend_lock->context.pid, &pid) &&
1363                                     pend_lock->fnum == fnum) {
1364                                         continue;
1365                                 }
1366
1367                                 /* We could send specific lock info here... */
1368                                 if (brl_pending_overlap(lock, pend_lock)) {
1369                                         messaging_send(msg_ctx, pend_lock->context.pid,
1370                                                        MSG_SMB_UNLOCK, &data_blob_null);
1371                                 }
1372                         }
1373
1374                         /* found it - delete it */
1375                         if (br_lck->num_locks > 1 && i < br_lck->num_locks - 1) {
1376                                 memmove(&locks[i], &locks[i+1], 
1377                                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1378                         }
1379                         br_lck->num_locks--;
1380                         br_lck->modified = True;
1381                         i--;
1382                         dcount++;
1383                 }
1384         }
1385
1386         if(lp_posix_locking(fsp->conn->params) && num_deleted_windows_locks) {
1387                 /* Reduce the Windows lock POSIX reference count on this dev/ino pair. */
1388                 reduce_windows_lock_ref_count(fsp, num_deleted_windows_locks);
1389         }
1390 }
1391
1392 /****************************************************************************
1393  Ensure this set of lock entries is valid.
1394 ****************************************************************************/
1395
1396 static bool validate_lock_entries(unsigned int *pnum_entries, struct lock_struct **pplocks)
1397 {
1398         unsigned int i;
1399         unsigned int num_valid_entries = 0;
1400         struct lock_struct *locks = *pplocks;
1401
1402         for (i = 0; i < *pnum_entries; i++) {
1403                 struct lock_struct *lock_data = &locks[i];
1404                 if (!process_exists(lock_data->context.pid)) {
1405                         /* This process no longer exists - mark this
1406                            entry as invalid by zeroing it. */
1407                         ZERO_STRUCTP(lock_data);
1408                 } else {
1409                         num_valid_entries++;
1410                 }
1411         }
1412
1413         if (num_valid_entries != *pnum_entries) {
1414                 struct lock_struct *new_lock_data = NULL;
1415
1416                 if (num_valid_entries) {
1417                         new_lock_data = SMB_MALLOC_ARRAY(struct lock_struct, num_valid_entries);
1418                         if (!new_lock_data) {
1419                                 DEBUG(3, ("malloc fail\n"));
1420                                 return False;
1421                         }
1422
1423                         num_valid_entries = 0;
1424                         for (i = 0; i < *pnum_entries; i++) {
1425                                 struct lock_struct *lock_data = &locks[i];
1426                                 if (lock_data->context.smbpid &&
1427                                                 lock_data->context.tid) {
1428                                         /* Valid (nonzero) entry - copy it. */
1429                                         memcpy(&new_lock_data[num_valid_entries],
1430                                                 lock_data, sizeof(struct lock_struct));
1431                                         num_valid_entries++;
1432                                 }
1433                         }
1434                 }
1435
1436                 SAFE_FREE(*pplocks);
1437                 *pplocks = new_lock_data;
1438                 *pnum_entries = num_valid_entries;
1439         }
1440
1441         return True;
1442 }
1443
1444 struct brl_forall_cb {
1445         void (*fn)(struct file_id id, struct server_id pid,
1446                    enum brl_type lock_type,
1447                    enum brl_flavour lock_flav,
1448                    br_off start, br_off size,
1449                    void *private_data);
1450         void *private_data;
1451 };
1452
1453 /****************************************************************************
1454  Traverse the whole database with this function, calling traverse_callback
1455  on each lock.
1456 ****************************************************************************/
1457
1458 static int traverse_fn(struct db_record *rec, void *state)
1459 {
1460         struct brl_forall_cb *cb = (struct brl_forall_cb *)state;
1461         struct lock_struct *locks;
1462         struct file_id *key;
1463         unsigned int i;
1464         unsigned int num_locks = 0;
1465         unsigned int orig_num_locks = 0;
1466
1467         /* In a traverse function we must make a copy of
1468            dbuf before modifying it. */
1469
1470         locks = (struct lock_struct *)memdup(rec->value.dptr,
1471                                              rec->value.dsize);
1472         if (!locks) {
1473                 return -1; /* Terminate traversal. */
1474         }
1475
1476         key = (struct file_id *)rec->key.dptr;
1477         orig_num_locks = num_locks = rec->value.dsize/sizeof(*locks);
1478
1479         /* Ensure the lock db is clean of entries from invalid processes. */
1480
1481         if (!validate_lock_entries(&num_locks, &locks)) {
1482                 SAFE_FREE(locks);
1483                 return -1; /* Terminate traversal */
1484         }
1485
1486         if (orig_num_locks != num_locks) {
1487                 if (num_locks) {
1488                         TDB_DATA data;
1489                         data.dptr = (uint8_t *)locks;
1490                         data.dsize = num_locks*sizeof(struct lock_struct);
1491                         rec->store(rec, data, TDB_REPLACE);
1492                 } else {
1493                         rec->delete_rec(rec);
1494                 }
1495         }
1496
1497         if (cb->fn) {
1498                 for ( i=0; i<num_locks; i++) {
1499                         cb->fn(*key,
1500                                 locks[i].context.pid,
1501                                 locks[i].lock_type,
1502                                 locks[i].lock_flav,
1503                                 locks[i].start,
1504                                 locks[i].size,
1505                                 cb->private_data);
1506                 }
1507         }
1508
1509         SAFE_FREE(locks);
1510         return 0;
1511 }
1512
1513 /*******************************************************************
1514  Call the specified function on each lock in the database.
1515 ********************************************************************/
1516
1517 int brl_forall(void (*fn)(struct file_id id, struct server_id pid,
1518                           enum brl_type lock_type,
1519                           enum brl_flavour lock_flav,
1520                           br_off start, br_off size,
1521                           void *private_data),
1522                void *private_data)
1523 {
1524         struct brl_forall_cb cb;
1525
1526         if (!brlock_db) {
1527                 return 0;
1528         }
1529         cb.fn = fn;
1530         cb.private_data = private_data;
1531         return brlock_db->traverse(brlock_db, traverse_fn, &cb);
1532 }
1533
1534 /*******************************************************************
1535  Store a potentially modified set of byte range lock data back into
1536  the database.
1537  Unlock the record.
1538 ********************************************************************/
1539
1540 static int byte_range_lock_destructor(struct byte_range_lock *br_lck)
1541 {
1542         if (br_lck->read_only) {
1543                 SMB_ASSERT(!br_lck->modified);
1544         }
1545
1546         if (!br_lck->modified) {
1547                 goto done;
1548         }
1549
1550         if (br_lck->num_locks == 0) {
1551                 /* No locks - delete this entry. */
1552                 NTSTATUS status = br_lck->record->delete_rec(br_lck->record);
1553                 if (!NT_STATUS_IS_OK(status)) {
1554                         DEBUG(0, ("delete_rec returned %s\n",
1555                                   nt_errstr(status)));
1556                         smb_panic("Could not delete byte range lock entry");
1557                 }
1558         } else {
1559                 TDB_DATA data;
1560                 NTSTATUS status;
1561
1562                 data.dptr = (uint8 *)br_lck->lock_data;
1563                 data.dsize = br_lck->num_locks * sizeof(struct lock_struct);
1564
1565                 status = br_lck->record->store(br_lck->record, data,
1566                                                TDB_REPLACE);
1567                 if (!NT_STATUS_IS_OK(status)) {
1568                         DEBUG(0, ("store returned %s\n", nt_errstr(status)));
1569                         smb_panic("Could not store byte range mode entry");
1570                 }
1571         }
1572
1573  done:
1574
1575         SAFE_FREE(br_lck->lock_data);
1576         TALLOC_FREE(br_lck->record);
1577         return 0;
1578 }
1579
1580 /*******************************************************************
1581  Fetch a set of byte range lock data from the database.
1582  Leave the record locked.
1583  TALLOC_FREE(brl) will release the lock in the destructor.
1584 ********************************************************************/
1585
1586 static struct byte_range_lock *brl_get_locks_internal(TALLOC_CTX *mem_ctx,
1587                                         files_struct *fsp, bool read_only)
1588 {
1589         TDB_DATA key, data;
1590         struct byte_range_lock *br_lck = TALLOC_P(mem_ctx, struct byte_range_lock);
1591
1592         if (br_lck == NULL) {
1593                 return NULL;
1594         }
1595
1596         br_lck->fsp = fsp;
1597         br_lck->num_locks = 0;
1598         br_lck->modified = False;
1599         memset(&br_lck->key, '\0', sizeof(struct file_id));
1600         br_lck->key = fsp->file_id;
1601
1602         key.dptr = (uint8 *)&br_lck->key;
1603         key.dsize = sizeof(struct file_id);
1604
1605         if (!fsp->lockdb_clean) {
1606                 /* We must be read/write to clean
1607                    the dead entries. */
1608                 read_only = False;
1609         }
1610
1611         if (read_only) {
1612                 if (brlock_db->fetch(brlock_db, br_lck, key, &data) == -1) {
1613                         DEBUG(3, ("Could not fetch byte range lock record\n"));
1614                         TALLOC_FREE(br_lck);
1615                         return NULL;
1616                 }
1617                 br_lck->record = NULL;
1618         }
1619         else {
1620                 br_lck->record = brlock_db->fetch_locked(brlock_db, br_lck, key);
1621
1622                 if (br_lck->record == NULL) {
1623                         DEBUG(3, ("Could not lock byte range lock entry\n"));
1624                         TALLOC_FREE(br_lck);
1625                         return NULL;
1626                 }
1627
1628                 data = br_lck->record->value;
1629         }
1630
1631         br_lck->read_only = read_only;
1632         br_lck->lock_data = NULL;
1633
1634         talloc_set_destructor(br_lck, byte_range_lock_destructor);
1635
1636         br_lck->num_locks = data.dsize / sizeof(struct lock_struct);
1637
1638         if (br_lck->num_locks != 0) {
1639                 br_lck->lock_data = SMB_MALLOC_ARRAY(struct lock_struct,
1640                                                      br_lck->num_locks);
1641                 if (br_lck->lock_data == NULL) {
1642                         DEBUG(0, ("malloc failed\n"));
1643                         TALLOC_FREE(br_lck);
1644                         return NULL;
1645                 }
1646
1647                 memcpy(br_lck->lock_data, data.dptr, data.dsize);
1648         }
1649         
1650         if (!fsp->lockdb_clean) {
1651                 int orig_num_locks = br_lck->num_locks;
1652
1653                 /* This is the first time we've accessed this. */
1654                 /* Go through and ensure all entries exist - remove any that don't. */
1655                 /* Makes the lockdb self cleaning at low cost. */
1656
1657                 if (!validate_lock_entries(&br_lck->num_locks,
1658                                            &br_lck->lock_data)) {
1659                         SAFE_FREE(br_lck->lock_data);
1660                         TALLOC_FREE(br_lck);
1661                         return NULL;
1662                 }
1663
1664                 /* Ensure invalid locks are cleaned up in the destructor. */
1665                 if (orig_num_locks != br_lck->num_locks) {
1666                         br_lck->modified = True;
1667                 }
1668
1669                 /* Mark the lockdb as "clean" as seen from this open file. */
1670                 fsp->lockdb_clean = True;
1671         }
1672
1673         if (DEBUGLEVEL >= 10) {
1674                 unsigned int i;
1675                 struct lock_struct *locks = br_lck->lock_data;
1676                 DEBUG(10,("brl_get_locks_internal: %u current locks on file_id %s\n",
1677                         br_lck->num_locks,
1678                           file_id_string_tos(&fsp->file_id)));
1679                 for( i = 0; i < br_lck->num_locks; i++) {
1680                         print_lock_struct(i, &locks[i]);
1681                 }
1682         }
1683         return br_lck;
1684 }
1685
1686 struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx,
1687                                         files_struct *fsp)
1688 {
1689         return brl_get_locks_internal(mem_ctx, fsp, False);
1690 }
1691
1692 struct byte_range_lock *brl_get_locks_readonly(TALLOC_CTX *mem_ctx,
1693                                         files_struct *fsp)
1694 {
1695         return brl_get_locks_internal(mem_ctx, fsp, True);
1696 }
1697
1698 struct brl_revalidate_state {
1699         ssize_t array_size;
1700         uint32 num_pids;
1701         struct server_id *pids;
1702 };
1703
1704 /*
1705  * Collect PIDs of all processes with pending entries
1706  */
1707
1708 static void brl_revalidate_collect(struct file_id id, struct server_id pid,
1709                                    enum brl_type lock_type,
1710                                    enum brl_flavour lock_flav,
1711                                    br_off start, br_off size,
1712                                    void *private_data)
1713 {
1714         struct brl_revalidate_state *state =
1715                 (struct brl_revalidate_state *)private_data;
1716
1717         if (!IS_PENDING_LOCK(lock_type)) {
1718                 return;
1719         }
1720
1721         add_to_large_array(state, sizeof(pid), (void *)&pid,
1722                            &state->pids, &state->num_pids,
1723                            &state->array_size);
1724 }
1725
1726 /*
1727  * qsort callback to sort the processes
1728  */
1729
1730 static int compare_procids(const void *p1, const void *p2)
1731 {
1732         const struct server_id *i1 = (struct server_id *)p1;
1733         const struct server_id *i2 = (struct server_id *)p2;
1734
1735         if (i1->pid < i2->pid) return -1;
1736         if (i2->pid > i2->pid) return 1;
1737         return 0;
1738 }
1739
1740 /*
1741  * Send a MSG_SMB_UNLOCK message to all processes with pending byte range
1742  * locks so that they retry. Mainly used in the cluster code after a node has
1743  * died.
1744  *
1745  * Done in two steps to avoid double-sends: First we collect all entries in an
1746  * array, then qsort that array and only send to non-dupes.
1747  */
1748
1749 static void brl_revalidate(struct messaging_context *msg_ctx,
1750                            void *private_data,
1751                            uint32_t msg_type,
1752                            struct server_id server_id,
1753                            DATA_BLOB *data)
1754 {
1755         struct brl_revalidate_state *state;
1756         uint32 i;
1757         struct server_id last_pid;
1758
1759         if (!(state = TALLOC_ZERO_P(NULL, struct brl_revalidate_state))) {
1760                 DEBUG(0, ("talloc failed\n"));
1761                 return;
1762         }
1763
1764         brl_forall(brl_revalidate_collect, state);
1765
1766         if (state->array_size == -1) {
1767                 DEBUG(0, ("talloc failed\n"));
1768                 goto done;
1769         }
1770
1771         if (state->num_pids == 0) {
1772                 goto done;
1773         }
1774
1775         qsort(state->pids, state->num_pids, sizeof(state->pids[0]),
1776               compare_procids);
1777
1778         ZERO_STRUCT(last_pid);
1779
1780         for (i=0; i<state->num_pids; i++) {
1781                 if (procid_equal(&last_pid, &state->pids[i])) {
1782                         /*
1783                          * We've seen that one already
1784                          */
1785                         continue;
1786                 }
1787
1788                 messaging_send(msg_ctx, state->pids[i], MSG_SMB_UNLOCK,
1789                                &data_blob_null);
1790                 last_pid = state->pids[i];
1791         }
1792
1793  done:
1794         TALLOC_FREE(state);
1795         return;
1796 }
1797
1798 void brl_register_msgs(struct messaging_context *msg_ctx)
1799 {
1800         messaging_register(msg_ctx, NULL, MSG_SMB_BRL_VALIDATE,
1801                            brl_revalidate);
1802 }