Move to MS-FSA algorithm when checking for invalid lock range.
[mat/samba.git] / source3 / locking / brlock.c
1 /* 
2    Unix SMB/CIFS implementation.
3    byte range locking code
4    Updated to handle range splits/merges.
5
6    Copyright (C) Andrew Tridgell 1992-2000
7    Copyright (C) Jeremy Allison 1992-2000
8    
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 3 of the License, or
12    (at your option) any later version.
13    
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18    
19    You should have received a copy of the GNU General Public License
20    along with this program.  If not, see <http://www.gnu.org/licenses/>.
21 */
22
23 /* This module implements a tdb based byte range locking service,
24    replacing the fcntl() based byte range locking previously
25    used. This allows us to provide the same semantics as NT */
26
27 #include "includes.h"
28 #include "librpc/gen_ndr/messaging.h"
29
30 #undef DBGC_CLASS
31 #define DBGC_CLASS DBGC_LOCKING
32
33 #define ZERO_ZERO 0
34
35 /* The open brlock.tdb database. */
36
37 static struct db_context *brlock_db;
38
39 /****************************************************************************
40  Debug info at level 10 for lock struct.
41 ****************************************************************************/
42
43 static void print_lock_struct(unsigned int i, struct lock_struct *pls)
44 {
45         DEBUG(10,("[%u]: smbpid = %u, tid = %u, pid = %s, ",
46                         i,
47                         (unsigned int)pls->context.smbpid,
48                         (unsigned int)pls->context.tid,
49                         procid_str(talloc_tos(), &pls->context.pid) ));
50         
51         DEBUG(10,("start = %.0f, size = %.0f, fnum = %d, %s %s\n",
52                 (double)pls->start,
53                 (double)pls->size,
54                 pls->fnum,
55                 lock_type_name(pls->lock_type),
56                 lock_flav_name(pls->lock_flav) ));
57 }
58
59 /****************************************************************************
60  See if two locking contexts are equal.
61 ****************************************************************************/
62
63 bool brl_same_context(const struct lock_context *ctx1, 
64                              const struct lock_context *ctx2)
65 {
66         return (procid_equal(&ctx1->pid, &ctx2->pid) &&
67                 (ctx1->smbpid == ctx2->smbpid) &&
68                 (ctx1->tid == ctx2->tid));
69 }
70
71 /****************************************************************************
72  See if lck1 and lck2 overlap.
73 ****************************************************************************/
74
75 static bool brl_overlap(const struct lock_struct *lck1,
76                         const struct lock_struct *lck2)
77 {
78         /* XXX Remove for Win7 compatibility. */
79         /* this extra check is not redundent - it copes with locks
80            that go beyond the end of 64 bit file space */
81         if (lck1->size != 0 &&
82             lck1->start == lck2->start &&
83             lck1->size == lck2->size) {
84                 return True;
85         }
86
87         if (lck1->start >= (lck2->start+lck2->size) ||
88             lck2->start >= (lck1->start+lck1->size)) {
89                 return False;
90         }
91         return True;
92 }
93
94 /****************************************************************************
95  See if lock2 can be added when lock1 is in place.
96 ****************************************************************************/
97
98 static bool brl_conflict(const struct lock_struct *lck1, 
99                          const struct lock_struct *lck2)
100 {
101         /* Ignore PENDING locks. */
102         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
103                 return False;
104
105         /* Read locks never conflict. */
106         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
107                 return False;
108         }
109
110         /* A READ lock can stack on top of a WRITE lock if they have the same
111          * context & fnum. */
112         if (lck1->lock_type == WRITE_LOCK && lck2->lock_type == READ_LOCK &&
113             brl_same_context(&lck1->context, &lck2->context) &&
114             lck1->fnum == lck2->fnum) {
115                 return False;
116         }
117
118         return brl_overlap(lck1, lck2);
119
120
121 /****************************************************************************
122  See if lock2 can be added when lock1 is in place - when both locks are POSIX
123  flavour. POSIX locks ignore fnum - they only care about dev/ino which we
124  know already match.
125 ****************************************************************************/
126
127 static bool brl_conflict_posix(const struct lock_struct *lck1, 
128                                 const struct lock_struct *lck2)
129 {
130 #if defined(DEVELOPER)
131         SMB_ASSERT(lck1->lock_flav == POSIX_LOCK);
132         SMB_ASSERT(lck2->lock_flav == POSIX_LOCK);
133 #endif
134
135         /* Ignore PENDING locks. */
136         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
137                 return False;
138
139         /* Read locks never conflict. */
140         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
141                 return False;
142         }
143
144         /* Locks on the same context con't conflict. Ignore fnum. */
145         if (brl_same_context(&lck1->context, &lck2->context)) {
146                 return False;
147         }
148
149         /* One is read, the other write, or the context is different,
150            do they overlap ? */
151         return brl_overlap(lck1, lck2);
152
153
154 #if ZERO_ZERO
155 static bool brl_conflict1(const struct lock_struct *lck1, 
156                          const struct lock_struct *lck2)
157 {
158         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
159                 return False;
160
161         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
162                 return False;
163         }
164
165         if (brl_same_context(&lck1->context, &lck2->context) &&
166             lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
167                 return False;
168         }
169
170         if (lck2->start == 0 && lck2->size == 0 && lck1->size != 0) {
171                 return True;
172         }
173
174         if (lck1->start >= (lck2->start + lck2->size) ||
175             lck2->start >= (lck1->start + lck1->size)) {
176                 return False;
177         }
178             
179         return True;
180
181 #endif
182
183 /****************************************************************************
184  Check to see if this lock conflicts, but ignore our own locks on the
185  same fnum only. This is the read/write lock check code path.
186  This is never used in the POSIX lock case.
187 ****************************************************************************/
188
189 static bool brl_conflict_other(const struct lock_struct *lck1, const struct lock_struct *lck2)
190 {
191         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
192                 return False;
193
194         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) 
195                 return False;
196
197         /* POSIX flavour locks never conflict here - this is only called
198            in the read/write path. */
199
200         if (lck1->lock_flav == POSIX_LOCK && lck2->lock_flav == POSIX_LOCK)
201                 return False;
202
203         /*
204          * Incoming WRITE locks conflict with existing READ locks even
205          * if the context is the same. JRA. See LOCKTEST7 in smbtorture.
206          */
207
208         if (!(lck2->lock_type == WRITE_LOCK && lck1->lock_type == READ_LOCK)) {
209                 if (brl_same_context(&lck1->context, &lck2->context) &&
210                                         lck1->fnum == lck2->fnum)
211                         return False;
212         }
213
214         return brl_overlap(lck1, lck2);
215
216
217 /****************************************************************************
218  Check if an unlock overlaps a pending lock.
219 ****************************************************************************/
220
221 static bool brl_pending_overlap(const struct lock_struct *lock, const struct lock_struct *pend_lock)
222 {
223         if ((lock->start <= pend_lock->start) && (lock->start + lock->size > pend_lock->start))
224                 return True;
225         if ((lock->start >= pend_lock->start) && (lock->start <= pend_lock->start + pend_lock->size))
226                 return True;
227         return False;
228 }
229
230 /****************************************************************************
231  Amazingly enough, w2k3 "remembers" whether the last lock failure on a fnum
232  is the same as this one and changes its error code. I wonder if any
233  app depends on this ?
234 ****************************************************************************/
235
236 NTSTATUS brl_lock_failed(files_struct *fsp, const struct lock_struct *lock, bool blocking_lock)
237 {
238         if (lock->start >= 0xEF000000 && (lock->start >> 63) == 0) {
239                 /* amazing the little things you learn with a test
240                    suite. Locks beyond this offset (as a 64 bit
241                    number!) always generate the conflict error code,
242                    unless the top bit is set */
243                 if (!blocking_lock) {
244                         fsp->last_lock_failure = *lock;
245                 }
246                 return NT_STATUS_FILE_LOCK_CONFLICT;
247         }
248
249         if (procid_equal(&lock->context.pid, &fsp->last_lock_failure.context.pid) &&
250                         lock->context.tid == fsp->last_lock_failure.context.tid &&
251                         lock->fnum == fsp->last_lock_failure.fnum &&
252                         lock->start == fsp->last_lock_failure.start) {
253                 return NT_STATUS_FILE_LOCK_CONFLICT;
254         }
255
256         if (!blocking_lock) {
257                 fsp->last_lock_failure = *lock;
258         }
259         return NT_STATUS_LOCK_NOT_GRANTED;
260 }
261
262 /****************************************************************************
263  Open up the brlock.tdb database.
264 ****************************************************************************/
265
266 void brl_init(bool read_only)
267 {
268         int tdb_flags;
269
270         if (brlock_db) {
271                 return;
272         }
273
274         tdb_flags = TDB_DEFAULT|TDB_VOLATILE|TDB_CLEAR_IF_FIRST;
275
276         if (!lp_clustering()) {
277                 /*
278                  * We can't use the SEQNUM trick to cache brlock
279                  * entries in the clustering case because ctdb seqnum
280                  * propagation has a delay.
281                  */
282                 tdb_flags |= TDB_SEQNUM;
283         }
284
285         brlock_db = db_open(NULL, lock_path("brlock.tdb"),
286                             lp_open_files_db_hash_size(), tdb_flags,
287                             read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644 );
288         if (!brlock_db) {
289                 DEBUG(0,("Failed to open byte range locking database %s\n",
290                         lock_path("brlock.tdb")));
291                 return;
292         }
293 }
294
295 /****************************************************************************
296  Close down the brlock.tdb database.
297 ****************************************************************************/
298
299 void brl_shutdown(void)
300 {
301         TALLOC_FREE(brlock_db);
302 }
303
304 #if ZERO_ZERO
305 /****************************************************************************
306  Compare two locks for sorting.
307 ****************************************************************************/
308
309 static int lock_compare(const struct lock_struct *lck1, 
310                          const struct lock_struct *lck2)
311 {
312         if (lck1->start != lck2->start) {
313                 return (lck1->start - lck2->start);
314         }
315         if (lck2->size != lck1->size) {
316                 return ((int)lck1->size - (int)lck2->size);
317         }
318         return 0;
319 }
320 #endif
321
322 /****************************************************************************
323  Lock a range of bytes - Windows lock semantics.
324 ****************************************************************************/
325
326 NTSTATUS brl_lock_windows_default(struct byte_range_lock *br_lck,
327     struct lock_struct *plock, bool blocking_lock)
328 {
329         unsigned int i;
330         files_struct *fsp = br_lck->fsp;
331         struct lock_struct *locks = br_lck->lock_data;
332         NTSTATUS status;
333
334         SMB_ASSERT(plock->lock_type != UNLOCK_LOCK);
335
336         if ((plock->start + plock->size - 1 < plock->start) &&
337                         plock->size != 0) {
338                 return NT_STATUS_INVALID_LOCK_RANGE;
339         }
340
341         for (i=0; i < br_lck->num_locks; i++) {
342                 /* Do any Windows or POSIX locks conflict ? */
343                 if (brl_conflict(&locks[i], plock)) {
344                         /* Remember who blocked us. */
345                         plock->context.smbpid = locks[i].context.smbpid;
346                         return brl_lock_failed(fsp,plock,blocking_lock);
347                 }
348 #if ZERO_ZERO
349                 if (plock->start == 0 && plock->size == 0 && 
350                                 locks[i].size == 0) {
351                         break;
352                 }
353 #endif
354         }
355
356         if (!IS_PENDING_LOCK(plock->lock_type)) {
357                 contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
358         }
359
360         /* We can get the Windows lock, now see if it needs to
361            be mapped into a lower level POSIX one, and if so can
362            we get it ? */
363
364         if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(fsp->conn->params)) {
365                 int errno_ret;
366                 if (!set_posix_lock_windows_flavour(fsp,
367                                 plock->start,
368                                 plock->size,
369                                 plock->lock_type,
370                                 &plock->context,
371                                 locks,
372                                 br_lck->num_locks,
373                                 &errno_ret)) {
374
375                         /* We don't know who blocked us. */
376                         plock->context.smbpid = 0xFFFFFFFF;
377
378                         if (errno_ret == EACCES || errno_ret == EAGAIN) {
379                                 status = NT_STATUS_FILE_LOCK_CONFLICT;
380                                 goto fail;
381                         } else {
382                                 status = map_nt_error_from_unix(errno);
383                                 goto fail;
384                         }
385                 }
386         }
387
388         /* no conflicts - add it to the list of locks */
389         locks = (struct lock_struct *)SMB_REALLOC(locks, (br_lck->num_locks + 1) * sizeof(*locks));
390         if (!locks) {
391                 status = NT_STATUS_NO_MEMORY;
392                 goto fail;
393         }
394
395         memcpy(&locks[br_lck->num_locks], plock, sizeof(struct lock_struct));
396         br_lck->num_locks += 1;
397         br_lck->lock_data = locks;
398         br_lck->modified = True;
399
400         return NT_STATUS_OK;
401  fail:
402         if (!IS_PENDING_LOCK(plock->lock_type)) {
403                 contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
404         }
405         return status;
406 }
407
408 /****************************************************************************
409  Cope with POSIX range splits and merges.
410 ****************************************************************************/
411
412 static unsigned int brlock_posix_split_merge(struct lock_struct *lck_arr,       /* Output array. */
413                                                 struct lock_struct *ex,         /* existing lock. */
414                                                 struct lock_struct *plock)      /* proposed lock. */
415 {
416         bool lock_types_differ = (ex->lock_type != plock->lock_type);
417
418         /* We can't merge non-conflicting locks on different context - ignore fnum. */
419
420         if (!brl_same_context(&ex->context, &plock->context)) {
421                 /* Just copy. */
422                 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
423                 return 1;
424         }
425
426         /* We now know we have the same context. */
427
428         /* Did we overlap ? */
429
430 /*********************************************
431                                         +---------+
432                                         | ex      |
433                                         +---------+
434                          +-------+
435                          | plock |
436                          +-------+
437 OR....
438         +---------+
439         |  ex     |
440         +---------+
441 **********************************************/
442
443         if ( (ex->start > (plock->start + plock->size)) ||
444                 (plock->start > (ex->start + ex->size))) {
445
446                 /* No overlap with this lock - copy existing. */
447
448                 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
449                 return 1;
450         }
451
452 /*********************************************
453         +---------------------------+
454         |          ex               |
455         +---------------------------+
456         +---------------------------+
457         |       plock               | -> replace with plock.
458         +---------------------------+
459 OR
460              +---------------+
461              |       ex      |
462              +---------------+
463         +---------------------------+
464         |       plock               | -> replace with plock.
465         +---------------------------+
466
467 **********************************************/
468
469         if ( (ex->start >= plock->start) &&
470                 (ex->start + ex->size <= plock->start + plock->size) ) {
471
472                 /* Replace - discard existing lock. */
473
474                 return 0;
475         }
476
477 /*********************************************
478 Adjacent after.
479                         +-------+
480                         |  ex   |
481                         +-------+
482         +---------------+
483         |   plock       |
484         +---------------+
485
486 BECOMES....
487         +---------------+-------+
488         |   plock       | ex    | - different lock types.
489         +---------------+-------+
490 OR.... (merge)
491         +-----------------------+
492         |   plock               | - same lock type.
493         +-----------------------+
494 **********************************************/
495
496         if (plock->start + plock->size == ex->start) {
497
498                 /* If the lock types are the same, we merge, if different, we
499                    add the remainder of the old lock. */
500
501                 if (lock_types_differ) {
502                         /* Add existing. */
503                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
504                         return 1;
505                 } else {
506                         /* Merge - adjust incoming lock as we may have more
507                          * merging to come. */
508                         plock->size += ex->size;
509                         return 0;
510                 }
511         }
512
513 /*********************************************
514 Adjacent before.
515         +-------+
516         |  ex   |
517         +-------+
518                 +---------------+
519                 |   plock       |
520                 +---------------+
521 BECOMES....
522         +-------+---------------+
523         | ex    |   plock       | - different lock types
524         +-------+---------------+
525
526 OR.... (merge)
527         +-----------------------+
528         |      plock            | - same lock type.
529         +-----------------------+
530
531 **********************************************/
532
533         if (ex->start + ex->size == plock->start) {
534
535                 /* If the lock types are the same, we merge, if different, we
536                    add the existing lock. */
537
538                 if (lock_types_differ) {
539                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
540                         return 1;
541                 } else {
542                         /* Merge - adjust incoming lock as we may have more
543                          * merging to come. */
544                         plock->start = ex->start;
545                         plock->size += ex->size;
546                         return 0;
547                 }
548         }
549
550 /*********************************************
551 Overlap after.
552         +-----------------------+
553         |          ex           |
554         +-----------------------+
555         +---------------+
556         |   plock       |
557         +---------------+
558 OR
559                +----------------+
560                |       ex       |
561                +----------------+
562         +---------------+
563         |   plock       |
564         +---------------+
565
566 BECOMES....
567         +---------------+-------+
568         |   plock       | ex    | - different lock types.
569         +---------------+-------+
570 OR.... (merge)
571         +-----------------------+
572         |   plock               | - same lock type.
573         +-----------------------+
574 **********************************************/
575
576         if ( (ex->start >= plock->start) &&
577                 (ex->start <= plock->start + plock->size) &&
578                 (ex->start + ex->size > plock->start + plock->size) ) {
579
580                 /* If the lock types are the same, we merge, if different, we
581                    add the remainder of the old lock. */
582
583                 if (lock_types_differ) {
584                         /* Add remaining existing. */
585                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
586                         /* Adjust existing start and size. */
587                         lck_arr[0].start = plock->start + plock->size;
588                         lck_arr[0].size = (ex->start + ex->size) - (plock->start + plock->size);
589                         return 1;
590                 } else {
591                         /* Merge - adjust incoming lock as we may have more
592                          * merging to come. */
593                         plock->size += (ex->start + ex->size) - (plock->start + plock->size);
594                         return 0;
595                 }
596         }
597
598 /*********************************************
599 Overlap before.
600         +-----------------------+
601         |  ex                   |
602         +-----------------------+
603                 +---------------+
604                 |   plock       |
605                 +---------------+
606 OR
607         +-------------+
608         |  ex         |
609         +-------------+
610                 +---------------+
611                 |   plock       |
612                 +---------------+
613
614 BECOMES....
615         +-------+---------------+
616         | ex    |   plock       | - different lock types
617         +-------+---------------+
618
619 OR.... (merge)
620         +-----------------------+
621         |      plock            | - same lock type.
622         +-----------------------+
623
624 **********************************************/
625
626         if ( (ex->start < plock->start) &&
627                         (ex->start + ex->size >= plock->start) &&
628                         (ex->start + ex->size <= plock->start + plock->size) ) {
629
630                 /* If the lock types are the same, we merge, if different, we
631                    add the truncated old lock. */
632
633                 if (lock_types_differ) {
634                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
635                         /* Adjust existing size. */
636                         lck_arr[0].size = plock->start - ex->start;
637                         return 1;
638                 } else {
639                         /* Merge - adjust incoming lock as we may have more
640                          * merging to come. MUST ADJUST plock SIZE FIRST ! */
641                         plock->size += (plock->start - ex->start);
642                         plock->start = ex->start;
643                         return 0;
644                 }
645         }
646
647 /*********************************************
648 Complete overlap.
649         +---------------------------+
650         |        ex                 |
651         +---------------------------+
652                 +---------+
653                 |  plock  |
654                 +---------+
655 BECOMES.....
656         +-------+---------+---------+
657         | ex    |  plock  | ex      | - different lock types.
658         +-------+---------+---------+
659 OR
660         +---------------------------+
661         |        plock              | - same lock type.
662         +---------------------------+
663 **********************************************/
664
665         if ( (ex->start < plock->start) && (ex->start + ex->size > plock->start + plock->size) ) {
666
667                 if (lock_types_differ) {
668
669                         /* We have to split ex into two locks here. */
670
671                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
672                         memcpy(&lck_arr[1], ex, sizeof(struct lock_struct));
673
674                         /* Adjust first existing size. */
675                         lck_arr[0].size = plock->start - ex->start;
676
677                         /* Adjust second existing start and size. */
678                         lck_arr[1].start = plock->start + plock->size;
679                         lck_arr[1].size = (ex->start + ex->size) - (plock->start + plock->size);
680                         return 2;
681                 } else {
682                         /* Just eat the existing locks, merge them into plock. */
683                         plock->start = ex->start;
684                         plock->size = ex->size;
685                         return 0;
686                 }
687         }
688
689         /* Never get here. */
690         smb_panic("brlock_posix_split_merge");
691         /* Notreached. */
692
693         /* Keep some compilers happy. */
694         return 0;
695 }
696
697 /****************************************************************************
698  Lock a range of bytes - POSIX lock semantics.
699  We must cope with range splits and merges.
700 ****************************************************************************/
701
702 static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
703                                struct byte_range_lock *br_lck,
704                                struct lock_struct *plock)
705 {
706         unsigned int i, count, posix_count;
707         struct lock_struct *locks = br_lck->lock_data;
708         struct lock_struct *tp;
709         bool signal_pending_read = False;
710         bool break_oplocks = false;
711         NTSTATUS status;
712
713         /* No zero-zero locks for POSIX. */
714         if (plock->start == 0 && plock->size == 0) {
715                 return NT_STATUS_INVALID_PARAMETER;
716         }
717
718         /* Don't allow 64-bit lock wrap. */
719         if (plock->start + plock->size - 1 < plock->start) {
720                 return NT_STATUS_INVALID_PARAMETER;
721         }
722
723         /* The worst case scenario here is we have to split an
724            existing POSIX lock range into two, and add our lock,
725            so we need at most 2 more entries. */
726
727         tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 2));
728         if (!tp) {
729                 return NT_STATUS_NO_MEMORY;
730         }
731
732         count = posix_count = 0;
733
734         for (i=0; i < br_lck->num_locks; i++) {
735                 struct lock_struct *curr_lock = &locks[i];
736
737                 /* If we have a pending read lock, a lock downgrade should
738                    trigger a lock re-evaluation. */
739                 if (curr_lock->lock_type == PENDING_READ_LOCK &&
740                                 brl_pending_overlap(plock, curr_lock)) {
741                         signal_pending_read = True;
742                 }
743
744                 if (curr_lock->lock_flav == WINDOWS_LOCK) {
745                         /* Do any Windows flavour locks conflict ? */
746                         if (brl_conflict(curr_lock, plock)) {
747                                 /* No games with error messages. */
748                                 SAFE_FREE(tp);
749                                 /* Remember who blocked us. */
750                                 plock->context.smbpid = curr_lock->context.smbpid;
751                                 return NT_STATUS_FILE_LOCK_CONFLICT;
752                         }
753                         /* Just copy the Windows lock into the new array. */
754                         memcpy(&tp[count], curr_lock, sizeof(struct lock_struct));
755                         count++;
756                 } else {
757                         unsigned int tmp_count = 0;
758
759                         /* POSIX conflict semantics are different. */
760                         if (brl_conflict_posix(curr_lock, plock)) {
761                                 /* Can't block ourselves with POSIX locks. */
762                                 /* No games with error messages. */
763                                 SAFE_FREE(tp);
764                                 /* Remember who blocked us. */
765                                 plock->context.smbpid = curr_lock->context.smbpid;
766                                 return NT_STATUS_FILE_LOCK_CONFLICT;
767                         }
768
769                         /* Work out overlaps. */
770                         tmp_count += brlock_posix_split_merge(&tp[count], curr_lock, plock);
771                         posix_count += tmp_count;
772                         count += tmp_count;
773                 }
774         }
775
776         /*
777          * Break oplocks while we hold a brl. Since lock() and unlock() calls
778          * are not symetric with POSIX semantics, we cannot guarantee our
779          * contend_level2_oplocks_begin/end calls will be acquired and
780          * released one-for-one as with Windows semantics. Therefore we only
781          * call contend_level2_oplocks_begin if this is the first POSIX brl on
782          * the file.
783          */
784         break_oplocks = (!IS_PENDING_LOCK(plock->lock_type) &&
785                          posix_count == 0);
786         if (break_oplocks) {
787                 contend_level2_oplocks_begin(br_lck->fsp,
788                                              LEVEL2_CONTEND_POSIX_BRL);
789         }
790
791         /* Try and add the lock in order, sorted by lock start. */
792         for (i=0; i < count; i++) {
793                 struct lock_struct *curr_lock = &tp[i];
794
795                 if (curr_lock->start <= plock->start) {
796                         continue;
797                 }
798         }
799
800         if (i < count) {
801                 memmove(&tp[i+1], &tp[i],
802                         (count - i)*sizeof(struct lock_struct));
803         }
804         memcpy(&tp[i], plock, sizeof(struct lock_struct));
805         count++;
806
807         /* We can get the POSIX lock, now see if it needs to
808            be mapped into a lower level POSIX one, and if so can
809            we get it ? */
810
811         if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(br_lck->fsp->conn->params)) {
812                 int errno_ret;
813
814                 /* The lower layer just needs to attempt to
815                    get the system POSIX lock. We've weeded out
816                    any conflicts above. */
817
818                 if (!set_posix_lock_posix_flavour(br_lck->fsp,
819                                 plock->start,
820                                 plock->size,
821                                 plock->lock_type,
822                                 &errno_ret)) {
823
824                         /* We don't know who blocked us. */
825                         plock->context.smbpid = 0xFFFFFFFF;
826
827                         if (errno_ret == EACCES || errno_ret == EAGAIN) {
828                                 SAFE_FREE(tp);
829                                 status = NT_STATUS_FILE_LOCK_CONFLICT;
830                                 goto fail;
831                         } else {
832                                 SAFE_FREE(tp);
833                                 status = map_nt_error_from_unix(errno);
834                                 goto fail;
835                         }
836                 }
837         }
838
839         /* If we didn't use all the allocated size,
840          * Realloc so we don't leak entries per lock call. */
841         if (count < br_lck->num_locks + 2) {
842                 tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
843                 if (!tp) {
844                         status = NT_STATUS_NO_MEMORY;
845                         goto fail;
846                 }
847         }
848
849         br_lck->num_locks = count;
850         SAFE_FREE(br_lck->lock_data);
851         br_lck->lock_data = tp;
852         locks = tp;
853         br_lck->modified = True;
854
855         /* A successful downgrade from write to read lock can trigger a lock
856            re-evalutation where waiting readers can now proceed. */
857
858         if (signal_pending_read) {
859                 /* Send unlock messages to any pending read waiters that overlap. */
860                 for (i=0; i < br_lck->num_locks; i++) {
861                         struct lock_struct *pend_lock = &locks[i];
862
863                         /* Ignore non-pending locks. */
864                         if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
865                                 continue;
866                         }
867
868                         if (pend_lock->lock_type == PENDING_READ_LOCK &&
869                                         brl_pending_overlap(plock, pend_lock)) {
870                                 DEBUG(10,("brl_lock_posix: sending unlock message to pid %s\n",
871                                         procid_str_static(&pend_lock->context.pid )));
872
873                                 messaging_send(msg_ctx, pend_lock->context.pid,
874                                                MSG_SMB_UNLOCK, &data_blob_null);
875                         }
876                 }
877         }
878
879         return NT_STATUS_OK;
880  fail:
881         if (break_oplocks) {
882                 contend_level2_oplocks_end(br_lck->fsp,
883                                            LEVEL2_CONTEND_POSIX_BRL);
884         }
885         return status;
886 }
887
888 NTSTATUS smb_vfs_call_brl_lock_windows(struct vfs_handle_struct *handle,
889                                        struct byte_range_lock *br_lck,
890                                        struct lock_struct *plock,
891                                        bool blocking_lock,
892                                        struct blocking_lock_record *blr)
893 {
894         VFS_FIND(brl_lock_windows);
895         return handle->fns->brl_lock_windows(handle, br_lck, plock,
896                                              blocking_lock, blr);
897 }
898
899 /****************************************************************************
900  Lock a range of bytes.
901 ****************************************************************************/
902
903 NTSTATUS brl_lock(struct messaging_context *msg_ctx,
904                 struct byte_range_lock *br_lck,
905                 uint32 smbpid,
906                 struct server_id pid,
907                 br_off start,
908                 br_off size, 
909                 enum brl_type lock_type,
910                 enum brl_flavour lock_flav,
911                 bool blocking_lock,
912                 uint32 *psmbpid,
913                 struct blocking_lock_record *blr)
914 {
915         NTSTATUS ret;
916         struct lock_struct lock;
917
918 #if !ZERO_ZERO
919         if (start == 0 && size == 0) {
920                 DEBUG(0,("client sent 0/0 lock - please report this\n"));
921         }
922 #endif
923
924 #ifdef DEVELOPER
925         /* Quieten valgrind on test. */
926         memset(&lock, '\0', sizeof(lock));
927 #endif
928
929         lock.context.smbpid = smbpid;
930         lock.context.pid = pid;
931         lock.context.tid = br_lck->fsp->conn->cnum;
932         lock.start = start;
933         lock.size = size;
934         lock.fnum = br_lck->fsp->fnum;
935         lock.lock_type = lock_type;
936         lock.lock_flav = lock_flav;
937
938         if (lock_flav == WINDOWS_LOCK) {
939                 ret = SMB_VFS_BRL_LOCK_WINDOWS(br_lck->fsp->conn, br_lck,
940                     &lock, blocking_lock, blr);
941         } else {
942                 ret = brl_lock_posix(msg_ctx, br_lck, &lock);
943         }
944
945 #if ZERO_ZERO
946         /* sort the lock list */
947         TYPESAFE_QSORT(br_lck->lock_data, (size_t)br_lck->num_locks, lock_compare);
948 #endif
949
950         /* If we're returning an error, return who blocked us. */
951         if (!NT_STATUS_IS_OK(ret) && psmbpid) {
952                 *psmbpid = lock.context.smbpid;
953         }
954         return ret;
955 }
956
957 /****************************************************************************
958  Unlock a range of bytes - Windows semantics.
959 ****************************************************************************/
960
961 bool brl_unlock_windows_default(struct messaging_context *msg_ctx,
962                                struct byte_range_lock *br_lck,
963                                const struct lock_struct *plock)
964 {
965         unsigned int i, j;
966         struct lock_struct *locks = br_lck->lock_data;
967         enum brl_type deleted_lock_type = READ_LOCK; /* shut the compiler up.... */
968
969         SMB_ASSERT(plock->lock_type == UNLOCK_LOCK);
970
971 #if ZERO_ZERO
972         /* Delete write locks by preference... The lock list
973            is sorted in the zero zero case. */
974
975         for (i = 0; i < br_lck->num_locks; i++) {
976                 struct lock_struct *lock = &locks[i];
977
978                 if (lock->lock_type == WRITE_LOCK &&
979                     brl_same_context(&lock->context, &plock->context) &&
980                     lock->fnum == plock->fnum &&
981                     lock->lock_flav == WINDOWS_LOCK &&
982                     lock->start == plock->start &&
983                     lock->size == plock->size) {
984
985                         /* found it - delete it */
986                         deleted_lock_type = lock->lock_type;
987                         break;
988                 }
989         }
990
991         if (i != br_lck->num_locks) {
992                 /* We found it - don't search again. */
993                 goto unlock_continue;
994         }
995 #endif
996
997         for (i = 0; i < br_lck->num_locks; i++) {
998                 struct lock_struct *lock = &locks[i];
999
1000                 /* Only remove our own locks that match in start, size, and flavour. */
1001                 if (brl_same_context(&lock->context, &plock->context) &&
1002                                         lock->fnum == plock->fnum &&
1003                                         lock->lock_flav == WINDOWS_LOCK &&
1004                                         lock->start == plock->start &&
1005                                         lock->size == plock->size ) {
1006                         deleted_lock_type = lock->lock_type;
1007                         break;
1008                 }
1009         }
1010
1011         if (i == br_lck->num_locks) {
1012                 /* we didn't find it */
1013                 return False;
1014         }
1015
1016 #if ZERO_ZERO
1017   unlock_continue:
1018 #endif
1019
1020         /* Actually delete the lock. */
1021         if (i < br_lck->num_locks - 1) {
1022                 memmove(&locks[i], &locks[i+1], 
1023                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1024         }
1025
1026         br_lck->num_locks -= 1;
1027         br_lck->modified = True;
1028
1029         /* Unlock the underlying POSIX regions. */
1030         if(lp_posix_locking(br_lck->fsp->conn->params)) {
1031                 release_posix_lock_windows_flavour(br_lck->fsp,
1032                                 plock->start,
1033                                 plock->size,
1034                                 deleted_lock_type,
1035                                 &plock->context,
1036                                 locks,
1037                                 br_lck->num_locks);
1038         }
1039
1040         /* Send unlock messages to any pending waiters that overlap. */
1041         for (j=0; j < br_lck->num_locks; j++) {
1042                 struct lock_struct *pend_lock = &locks[j];
1043
1044                 /* Ignore non-pending locks. */
1045                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1046                         continue;
1047                 }
1048
1049                 /* We could send specific lock info here... */
1050                 if (brl_pending_overlap(plock, pend_lock)) {
1051                         DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
1052                                 procid_str_static(&pend_lock->context.pid )));
1053
1054                         messaging_send(msg_ctx, pend_lock->context.pid,
1055                                        MSG_SMB_UNLOCK, &data_blob_null);
1056                 }
1057         }
1058
1059         contend_level2_oplocks_end(br_lck->fsp, LEVEL2_CONTEND_WINDOWS_BRL);
1060         return True;
1061 }
1062
1063 /****************************************************************************
1064  Unlock a range of bytes - POSIX semantics.
1065 ****************************************************************************/
1066
1067 static bool brl_unlock_posix(struct messaging_context *msg_ctx,
1068                              struct byte_range_lock *br_lck,
1069                              struct lock_struct *plock)
1070 {
1071         unsigned int i, j, count;
1072         struct lock_struct *tp;
1073         struct lock_struct *locks = br_lck->lock_data;
1074         bool overlap_found = False;
1075
1076         /* No zero-zero locks for POSIX. */
1077         if (plock->start == 0 && plock->size == 0) {
1078                 return False;
1079         }
1080
1081         /* Don't allow 64-bit lock wrap. */
1082         if (plock->start + plock->size < plock->start ||
1083                         plock->start + plock->size < plock->size) {
1084                 DEBUG(10,("brl_unlock_posix: lock wrap\n"));
1085                 return False;
1086         }
1087
1088         /* The worst case scenario here is we have to split an
1089            existing POSIX lock range into two, so we need at most
1090            1 more entry. */
1091
1092         tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 1));
1093         if (!tp) {
1094                 DEBUG(10,("brl_unlock_posix: malloc fail\n"));
1095                 return False;
1096         }
1097
1098         count = 0;
1099         for (i = 0; i < br_lck->num_locks; i++) {
1100                 struct lock_struct *lock = &locks[i];
1101                 unsigned int tmp_count;
1102
1103                 /* Only remove our own locks - ignore fnum. */
1104                 if (IS_PENDING_LOCK(lock->lock_type) ||
1105                                 !brl_same_context(&lock->context, &plock->context)) {
1106                         memcpy(&tp[count], lock, sizeof(struct lock_struct));
1107                         count++;
1108                         continue;
1109                 }
1110
1111                 if (lock->lock_flav == WINDOWS_LOCK) {
1112                         /* Do any Windows flavour locks conflict ? */
1113                         if (brl_conflict(lock, plock)) {
1114                                 SAFE_FREE(tp);
1115                                 return false;
1116                         }
1117                         /* Just copy the Windows lock into the new array. */
1118                         memcpy(&tp[count], lock, sizeof(struct lock_struct));
1119                         count++;
1120                         continue;
1121                 }
1122
1123                 /* Work out overlaps. */
1124                 tmp_count = brlock_posix_split_merge(&tp[count], lock, plock);
1125
1126                 if (tmp_count == 0) {
1127                         /* plock overlapped the existing lock completely,
1128                            or replaced it. Don't copy the existing lock. */
1129                         overlap_found = true;
1130                 } else if (tmp_count == 1) {
1131                         /* Either no overlap, (simple copy of existing lock) or
1132                          * an overlap of an existing lock. */
1133                         /* If the lock changed size, we had an overlap. */
1134                         if (tp[count].size != lock->size) {
1135                                 overlap_found = true;
1136                         }
1137                         count += tmp_count;
1138                 } else if (tmp_count == 2) {
1139                         /* We split a lock range in two. */
1140                         overlap_found = true;
1141                         count += tmp_count;
1142
1143                         /* Optimisation... */
1144                         /* We know we're finished here as we can't overlap any
1145                            more POSIX locks. Copy the rest of the lock array. */
1146
1147                         if (i < br_lck->num_locks - 1) {
1148                                 memcpy(&tp[count], &locks[i+1],
1149                                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1150                                 count += ((br_lck->num_locks-1) - i);
1151                         }
1152                         break;
1153                 }
1154
1155         }
1156
1157         if (!overlap_found) {
1158                 /* Just ignore - no change. */
1159                 SAFE_FREE(tp);
1160                 DEBUG(10,("brl_unlock_posix: No overlap - unlocked.\n"));
1161                 return True;
1162         }
1163
1164         /* Unlock any POSIX regions. */
1165         if(lp_posix_locking(br_lck->fsp->conn->params)) {
1166                 release_posix_lock_posix_flavour(br_lck->fsp,
1167                                                 plock->start,
1168                                                 plock->size,
1169                                                 &plock->context,
1170                                                 tp,
1171                                                 count);
1172         }
1173
1174         /* Realloc so we don't leak entries per unlock call. */
1175         if (count) {
1176                 tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
1177                 if (!tp) {
1178                         DEBUG(10,("brl_unlock_posix: realloc fail\n"));
1179                         return False;
1180                 }
1181         } else {
1182                 /* We deleted the last lock. */
1183                 SAFE_FREE(tp);
1184                 tp = NULL;
1185         }
1186
1187         contend_level2_oplocks_end(br_lck->fsp,
1188                                    LEVEL2_CONTEND_POSIX_BRL);
1189
1190         br_lck->num_locks = count;
1191         SAFE_FREE(br_lck->lock_data);
1192         locks = tp;
1193         br_lck->lock_data = tp;
1194         br_lck->modified = True;
1195
1196         /* Send unlock messages to any pending waiters that overlap. */
1197
1198         for (j=0; j < br_lck->num_locks; j++) {
1199                 struct lock_struct *pend_lock = &locks[j];
1200
1201                 /* Ignore non-pending locks. */
1202                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1203                         continue;
1204                 }
1205
1206                 /* We could send specific lock info here... */
1207                 if (brl_pending_overlap(plock, pend_lock)) {
1208                         DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
1209                                 procid_str_static(&pend_lock->context.pid )));
1210
1211                         messaging_send(msg_ctx, pend_lock->context.pid,
1212                                        MSG_SMB_UNLOCK, &data_blob_null);
1213                 }
1214         }
1215
1216         return True;
1217 }
1218
1219 bool smb_vfs_call_brl_unlock_windows(struct vfs_handle_struct *handle,
1220                                      struct messaging_context *msg_ctx,
1221                                      struct byte_range_lock *br_lck,
1222                                      const struct lock_struct *plock)
1223 {
1224         VFS_FIND(brl_unlock_windows);
1225         return handle->fns->brl_unlock_windows(handle, msg_ctx, br_lck, plock);
1226 }
1227
1228 /****************************************************************************
1229  Unlock a range of bytes.
1230 ****************************************************************************/
1231
1232 bool brl_unlock(struct messaging_context *msg_ctx,
1233                 struct byte_range_lock *br_lck,
1234                 uint32 smbpid,
1235                 struct server_id pid,
1236                 br_off start,
1237                 br_off size,
1238                 enum brl_flavour lock_flav)
1239 {
1240         struct lock_struct lock;
1241
1242         lock.context.smbpid = smbpid;
1243         lock.context.pid = pid;
1244         lock.context.tid = br_lck->fsp->conn->cnum;
1245         lock.start = start;
1246         lock.size = size;
1247         lock.fnum = br_lck->fsp->fnum;
1248         lock.lock_type = UNLOCK_LOCK;
1249         lock.lock_flav = lock_flav;
1250
1251         if (lock_flav == WINDOWS_LOCK) {
1252                 return SMB_VFS_BRL_UNLOCK_WINDOWS(br_lck->fsp->conn, msg_ctx,
1253                     br_lck, &lock);
1254         } else {
1255                 return brl_unlock_posix(msg_ctx, br_lck, &lock);
1256         }
1257 }
1258
1259 /****************************************************************************
1260  Test if we could add a lock if we wanted to.
1261  Returns True if the region required is currently unlocked, False if locked.
1262 ****************************************************************************/
1263
1264 bool brl_locktest(struct byte_range_lock *br_lck,
1265                 uint32 smbpid,
1266                 struct server_id pid,
1267                 br_off start,
1268                 br_off size, 
1269                 enum brl_type lock_type,
1270                 enum brl_flavour lock_flav)
1271 {
1272         bool ret = True;
1273         unsigned int i;
1274         struct lock_struct lock;
1275         const struct lock_struct *locks = br_lck->lock_data;
1276         files_struct *fsp = br_lck->fsp;
1277
1278         lock.context.smbpid = smbpid;
1279         lock.context.pid = pid;
1280         lock.context.tid = br_lck->fsp->conn->cnum;
1281         lock.start = start;
1282         lock.size = size;
1283         lock.fnum = fsp->fnum;
1284         lock.lock_type = lock_type;
1285         lock.lock_flav = lock_flav;
1286
1287         /* Make sure existing locks don't conflict */
1288         for (i=0; i < br_lck->num_locks; i++) {
1289                 /*
1290                  * Our own locks don't conflict.
1291                  */
1292                 if (brl_conflict_other(&locks[i], &lock)) {
1293                         return False;
1294                 }
1295         }
1296
1297         /*
1298          * There is no lock held by an SMB daemon, check to
1299          * see if there is a POSIX lock from a UNIX or NFS process.
1300          * This only conflicts with Windows locks, not POSIX locks.
1301          */
1302
1303         if(lp_posix_locking(fsp->conn->params) && (lock_flav == WINDOWS_LOCK)) {
1304                 ret = is_posix_locked(fsp, &start, &size, &lock_type, WINDOWS_LOCK);
1305
1306                 DEBUG(10,("brl_locktest: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
1307                         (double)start, (double)size, ret ? "locked" : "unlocked",
1308                         fsp->fnum, fsp_str_dbg(fsp)));
1309
1310                 /* We need to return the inverse of is_posix_locked. */
1311                 ret = !ret;
1312         }
1313
1314         /* no conflicts - we could have added it */
1315         return ret;
1316 }
1317
1318 /****************************************************************************
1319  Query for existing locks.
1320 ****************************************************************************/
1321
1322 NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
1323                 uint32 *psmbpid,
1324                 struct server_id pid,
1325                 br_off *pstart,
1326                 br_off *psize, 
1327                 enum brl_type *plock_type,
1328                 enum brl_flavour lock_flav)
1329 {
1330         unsigned int i;
1331         struct lock_struct lock;
1332         const struct lock_struct *locks = br_lck->lock_data;
1333         files_struct *fsp = br_lck->fsp;
1334
1335         lock.context.smbpid = *psmbpid;
1336         lock.context.pid = pid;
1337         lock.context.tid = br_lck->fsp->conn->cnum;
1338         lock.start = *pstart;
1339         lock.size = *psize;
1340         lock.fnum = fsp->fnum;
1341         lock.lock_type = *plock_type;
1342         lock.lock_flav = lock_flav;
1343
1344         /* Make sure existing locks don't conflict */
1345         for (i=0; i < br_lck->num_locks; i++) {
1346                 const struct lock_struct *exlock = &locks[i];
1347                 bool conflict = False;
1348
1349                 if (exlock->lock_flav == WINDOWS_LOCK) {
1350                         conflict = brl_conflict(exlock, &lock);
1351                 } else {        
1352                         conflict = brl_conflict_posix(exlock, &lock);
1353                 }
1354
1355                 if (conflict) {
1356                         *psmbpid = exlock->context.smbpid;
1357                         *pstart = exlock->start;
1358                         *psize = exlock->size;
1359                         *plock_type = exlock->lock_type;
1360                         return NT_STATUS_LOCK_NOT_GRANTED;
1361                 }
1362         }
1363
1364         /*
1365          * There is no lock held by an SMB daemon, check to
1366          * see if there is a POSIX lock from a UNIX or NFS process.
1367          */
1368
1369         if(lp_posix_locking(fsp->conn->params)) {
1370                 bool ret = is_posix_locked(fsp, pstart, psize, plock_type, POSIX_LOCK);
1371
1372                 DEBUG(10,("brl_lockquery: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
1373                         (double)*pstart, (double)*psize, ret ? "locked" : "unlocked",
1374                         fsp->fnum, fsp_str_dbg(fsp)));
1375
1376                 if (ret) {
1377                         /* Hmmm. No clue what to set smbpid to - use -1. */
1378                         *psmbpid = 0xFFFF;
1379                         return NT_STATUS_LOCK_NOT_GRANTED;
1380                 }
1381         }
1382
1383         return NT_STATUS_OK;
1384 }
1385
1386
1387 bool smb_vfs_call_brl_cancel_windows(struct vfs_handle_struct *handle,
1388                                      struct byte_range_lock *br_lck,
1389                                      struct lock_struct *plock,
1390                                      struct blocking_lock_record *blr)
1391 {
1392         VFS_FIND(brl_cancel_windows);
1393         return handle->fns->brl_cancel_windows(handle, br_lck, plock, blr);
1394 }
1395
1396 /****************************************************************************
1397  Remove a particular pending lock.
1398 ****************************************************************************/
1399 bool brl_lock_cancel(struct byte_range_lock *br_lck,
1400                 uint32 smbpid,
1401                 struct server_id pid,
1402                 br_off start,
1403                 br_off size,
1404                 enum brl_flavour lock_flav,
1405                 struct blocking_lock_record *blr)
1406 {
1407         bool ret;
1408         struct lock_struct lock;
1409
1410         lock.context.smbpid = smbpid;
1411         lock.context.pid = pid;
1412         lock.context.tid = br_lck->fsp->conn->cnum;
1413         lock.start = start;
1414         lock.size = size;
1415         lock.fnum = br_lck->fsp->fnum;
1416         lock.lock_flav = lock_flav;
1417         /* lock.lock_type doesn't matter */
1418
1419         if (lock_flav == WINDOWS_LOCK) {
1420                 ret = SMB_VFS_BRL_CANCEL_WINDOWS(br_lck->fsp->conn, br_lck,
1421                     &lock, blr);
1422         } else {
1423                 ret = brl_lock_cancel_default(br_lck, &lock);
1424         }
1425
1426         return ret;
1427 }
1428
1429 bool brl_lock_cancel_default(struct byte_range_lock *br_lck,
1430                 struct lock_struct *plock)
1431 {
1432         unsigned int i;
1433         struct lock_struct *locks = br_lck->lock_data;
1434
1435         SMB_ASSERT(plock);
1436
1437         for (i = 0; i < br_lck->num_locks; i++) {
1438                 struct lock_struct *lock = &locks[i];
1439
1440                 /* For pending locks we *always* care about the fnum. */
1441                 if (brl_same_context(&lock->context, &plock->context) &&
1442                                 lock->fnum == plock->fnum &&
1443                                 IS_PENDING_LOCK(lock->lock_type) &&
1444                                 lock->lock_flav == plock->lock_flav &&
1445                                 lock->start == plock->start &&
1446                                 lock->size == plock->size) {
1447                         break;
1448                 }
1449         }
1450
1451         if (i == br_lck->num_locks) {
1452                 /* Didn't find it. */
1453                 return False;
1454         }
1455
1456         if (i < br_lck->num_locks - 1) {
1457                 /* Found this particular pending lock - delete it */
1458                 memmove(&locks[i], &locks[i+1], 
1459                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1460         }
1461
1462         br_lck->num_locks -= 1;
1463         br_lck->modified = True;
1464         return True;
1465 }
1466
1467 /****************************************************************************
1468  Remove any locks associated with a open file.
1469  We return True if this process owns any other Windows locks on this
1470  fd and so we should not immediately close the fd.
1471 ****************************************************************************/
1472
1473 void brl_close_fnum(struct messaging_context *msg_ctx,
1474                     struct byte_range_lock *br_lck)
1475 {
1476         files_struct *fsp = br_lck->fsp;
1477         uint16 tid = fsp->conn->cnum;
1478         int fnum = fsp->fnum;
1479         unsigned int i, j, dcount=0;
1480         int num_deleted_windows_locks = 0;
1481         struct lock_struct *locks = br_lck->lock_data;
1482         struct server_id pid = procid_self();
1483         bool unlock_individually = False;
1484         bool posix_level2_contention_ended = false;
1485
1486         if(lp_posix_locking(fsp->conn->params)) {
1487
1488                 /* Check if there are any Windows locks associated with this dev/ino
1489                    pair that are not this fnum. If so we need to call unlock on each
1490                    one in order to release the system POSIX locks correctly. */
1491
1492                 for (i=0; i < br_lck->num_locks; i++) {
1493                         struct lock_struct *lock = &locks[i];
1494
1495                         if (!procid_equal(&lock->context.pid, &pid)) {
1496                                 continue;
1497                         }
1498
1499                         if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
1500                                 continue; /* Ignore pending. */
1501                         }
1502
1503                         if (lock->context.tid != tid || lock->fnum != fnum) {
1504                                 unlock_individually = True;
1505                                 break;
1506                         }
1507                 }
1508
1509                 if (unlock_individually) {
1510                         struct lock_struct *locks_copy;
1511                         unsigned int num_locks_copy;
1512
1513                         /* Copy the current lock array. */
1514                         if (br_lck->num_locks) {
1515                                 locks_copy = (struct lock_struct *)TALLOC_MEMDUP(br_lck, locks, br_lck->num_locks * sizeof(struct lock_struct));
1516                                 if (!locks_copy) {
1517                                         smb_panic("brl_close_fnum: talloc failed");
1518                                 }
1519                         } else {        
1520                                 locks_copy = NULL;
1521                         }
1522
1523                         num_locks_copy = br_lck->num_locks;
1524
1525                         for (i=0; i < num_locks_copy; i++) {
1526                                 struct lock_struct *lock = &locks_copy[i];
1527
1528                                 if (lock->context.tid == tid && procid_equal(&lock->context.pid, &pid) &&
1529                                                 (lock->fnum == fnum)) {
1530                                         brl_unlock(msg_ctx,
1531                                                 br_lck,
1532                                                 lock->context.smbpid,
1533                                                 pid,
1534                                                 lock->start,
1535                                                 lock->size,
1536                                                 lock->lock_flav);
1537                                 }
1538                         }
1539                         return;
1540                 }
1541         }
1542
1543         /* We can bulk delete - any POSIX locks will be removed when the fd closes. */
1544
1545         /* Remove any existing locks for this fnum (or any fnum if they're POSIX). */
1546
1547         for (i=0; i < br_lck->num_locks; i++) {
1548                 struct lock_struct *lock = &locks[i];
1549                 bool del_this_lock = False;
1550
1551                 if (lock->context.tid == tid && procid_equal(&lock->context.pid, &pid)) {
1552                         if ((lock->lock_flav == WINDOWS_LOCK) && (lock->fnum == fnum)) {
1553                                 del_this_lock = True;
1554                                 num_deleted_windows_locks++;
1555                                 contend_level2_oplocks_end(br_lck->fsp,
1556                                     LEVEL2_CONTEND_WINDOWS_BRL);
1557                         } else if (lock->lock_flav == POSIX_LOCK) {
1558                                 del_this_lock = True;
1559
1560                                 /* Only end level2 contention once for posix */
1561                                 if (!posix_level2_contention_ended) {
1562                                         posix_level2_contention_ended = true;
1563                                         contend_level2_oplocks_end(br_lck->fsp,
1564                                             LEVEL2_CONTEND_POSIX_BRL);
1565                                 }
1566                         }
1567                 }
1568
1569                 if (del_this_lock) {
1570                         /* Send unlock messages to any pending waiters that overlap. */
1571                         for (j=0; j < br_lck->num_locks; j++) {
1572                                 struct lock_struct *pend_lock = &locks[j];
1573
1574                                 /* Ignore our own or non-pending locks. */
1575                                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1576                                         continue;
1577                                 }
1578
1579                                 /* Optimisation - don't send to this fnum as we're
1580                                    closing it. */
1581                                 if (pend_lock->context.tid == tid &&
1582                                     procid_equal(&pend_lock->context.pid, &pid) &&
1583                                     pend_lock->fnum == fnum) {
1584                                         continue;
1585                                 }
1586
1587                                 /* We could send specific lock info here... */
1588                                 if (brl_pending_overlap(lock, pend_lock)) {
1589                                         messaging_send(msg_ctx, pend_lock->context.pid,
1590                                                        MSG_SMB_UNLOCK, &data_blob_null);
1591                                 }
1592                         }
1593
1594                         /* found it - delete it */
1595                         if (br_lck->num_locks > 1 && i < br_lck->num_locks - 1) {
1596                                 memmove(&locks[i], &locks[i+1], 
1597                                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1598                         }
1599                         br_lck->num_locks--;
1600                         br_lck->modified = True;
1601                         i--;
1602                         dcount++;
1603                 }
1604         }
1605
1606         if(lp_posix_locking(fsp->conn->params) && num_deleted_windows_locks) {
1607                 /* Reduce the Windows lock POSIX reference count on this dev/ino pair. */
1608                 reduce_windows_lock_ref_count(fsp, num_deleted_windows_locks);
1609         }
1610 }
1611
1612 /****************************************************************************
1613  Ensure this set of lock entries is valid.
1614 ****************************************************************************/
1615 static bool validate_lock_entries(unsigned int *pnum_entries, struct lock_struct **pplocks)
1616 {
1617         unsigned int i;
1618         unsigned int num_valid_entries = 0;
1619         struct lock_struct *locks = *pplocks;
1620
1621         for (i = 0; i < *pnum_entries; i++) {
1622                 struct lock_struct *lock_data = &locks[i];
1623                 if (!serverid_exists(&lock_data->context.pid)) {
1624                         /* This process no longer exists - mark this
1625                            entry as invalid by zeroing it. */
1626                         ZERO_STRUCTP(lock_data);
1627                 } else {
1628                         num_valid_entries++;
1629                 }
1630         }
1631
1632         if (num_valid_entries != *pnum_entries) {
1633                 struct lock_struct *new_lock_data = NULL;
1634
1635                 if (num_valid_entries) {
1636                         new_lock_data = SMB_MALLOC_ARRAY(struct lock_struct, num_valid_entries);
1637                         if (!new_lock_data) {
1638                                 DEBUG(3, ("malloc fail\n"));
1639                                 return False;
1640                         }
1641
1642                         num_valid_entries = 0;
1643                         for (i = 0; i < *pnum_entries; i++) {
1644                                 struct lock_struct *lock_data = &locks[i];
1645                                 if (lock_data->context.smbpid &&
1646                                                 lock_data->context.tid) {
1647                                         /* Valid (nonzero) entry - copy it. */
1648                                         memcpy(&new_lock_data[num_valid_entries],
1649                                                 lock_data, sizeof(struct lock_struct));
1650                                         num_valid_entries++;
1651                                 }
1652                         }
1653                 }
1654
1655                 SAFE_FREE(*pplocks);
1656                 *pplocks = new_lock_data;
1657                 *pnum_entries = num_valid_entries;
1658         }
1659
1660         return True;
1661 }
1662
1663 struct brl_forall_cb {
1664         void (*fn)(struct file_id id, struct server_id pid,
1665                    enum brl_type lock_type,
1666                    enum brl_flavour lock_flav,
1667                    br_off start, br_off size,
1668                    void *private_data);
1669         void *private_data;
1670 };
1671
1672 /****************************************************************************
1673  Traverse the whole database with this function, calling traverse_callback
1674  on each lock.
1675 ****************************************************************************/
1676
1677 static int traverse_fn(struct db_record *rec, void *state)
1678 {
1679         struct brl_forall_cb *cb = (struct brl_forall_cb *)state;
1680         struct lock_struct *locks;
1681         struct file_id *key;
1682         unsigned int i;
1683         unsigned int num_locks = 0;
1684         unsigned int orig_num_locks = 0;
1685
1686         /* In a traverse function we must make a copy of
1687            dbuf before modifying it. */
1688
1689         locks = (struct lock_struct *)memdup(rec->value.dptr,
1690                                              rec->value.dsize);
1691         if (!locks) {
1692                 return -1; /* Terminate traversal. */
1693         }
1694
1695         key = (struct file_id *)rec->key.dptr;
1696         orig_num_locks = num_locks = rec->value.dsize/sizeof(*locks);
1697
1698         /* Ensure the lock db is clean of entries from invalid processes. */
1699
1700         if (!validate_lock_entries(&num_locks, &locks)) {
1701                 SAFE_FREE(locks);
1702                 return -1; /* Terminate traversal */
1703         }
1704
1705         if (orig_num_locks != num_locks) {
1706                 if (num_locks) {
1707                         TDB_DATA data;
1708                         data.dptr = (uint8_t *)locks;
1709                         data.dsize = num_locks*sizeof(struct lock_struct);
1710                         rec->store(rec, data, TDB_REPLACE);
1711                 } else {
1712                         rec->delete_rec(rec);
1713                 }
1714         }
1715
1716         if (cb->fn) {
1717                 for ( i=0; i<num_locks; i++) {
1718                         cb->fn(*key,
1719                                 locks[i].context.pid,
1720                                 locks[i].lock_type,
1721                                 locks[i].lock_flav,
1722                                 locks[i].start,
1723                                 locks[i].size,
1724                                 cb->private_data);
1725                 }
1726         }
1727
1728         SAFE_FREE(locks);
1729         return 0;
1730 }
1731
1732 /*******************************************************************
1733  Call the specified function on each lock in the database.
1734 ********************************************************************/
1735
1736 int brl_forall(void (*fn)(struct file_id id, struct server_id pid,
1737                           enum brl_type lock_type,
1738                           enum brl_flavour lock_flav,
1739                           br_off start, br_off size,
1740                           void *private_data),
1741                void *private_data)
1742 {
1743         struct brl_forall_cb cb;
1744
1745         if (!brlock_db) {
1746                 return 0;
1747         }
1748         cb.fn = fn;
1749         cb.private_data = private_data;
1750         return brlock_db->traverse(brlock_db, traverse_fn, &cb);
1751 }
1752
1753 /*******************************************************************
1754  Store a potentially modified set of byte range lock data back into
1755  the database.
1756  Unlock the record.
1757 ********************************************************************/
1758
1759 static int byte_range_lock_destructor(struct byte_range_lock *br_lck)
1760 {
1761         if (br_lck->read_only) {
1762                 SMB_ASSERT(!br_lck->modified);
1763         }
1764
1765         if (!br_lck->modified) {
1766                 goto done;
1767         }
1768
1769         if (br_lck->num_locks == 0) {
1770                 /* No locks - delete this entry. */
1771                 NTSTATUS status = br_lck->record->delete_rec(br_lck->record);
1772                 if (!NT_STATUS_IS_OK(status)) {
1773                         DEBUG(0, ("delete_rec returned %s\n",
1774                                   nt_errstr(status)));
1775                         smb_panic("Could not delete byte range lock entry");
1776                 }
1777         } else {
1778                 TDB_DATA data;
1779                 NTSTATUS status;
1780
1781                 data.dptr = (uint8 *)br_lck->lock_data;
1782                 data.dsize = br_lck->num_locks * sizeof(struct lock_struct);
1783
1784                 status = br_lck->record->store(br_lck->record, data,
1785                                                TDB_REPLACE);
1786                 if (!NT_STATUS_IS_OK(status)) {
1787                         DEBUG(0, ("store returned %s\n", nt_errstr(status)));
1788                         smb_panic("Could not store byte range mode entry");
1789                 }
1790         }
1791
1792  done:
1793
1794         SAFE_FREE(br_lck->lock_data);
1795         TALLOC_FREE(br_lck->record);
1796         return 0;
1797 }
1798
1799 /*******************************************************************
1800  Fetch a set of byte range lock data from the database.
1801  Leave the record locked.
1802  TALLOC_FREE(brl) will release the lock in the destructor.
1803 ********************************************************************/
1804
1805 static struct byte_range_lock *brl_get_locks_internal(TALLOC_CTX *mem_ctx,
1806                                         files_struct *fsp, bool read_only)
1807 {
1808         TDB_DATA key, data;
1809         struct byte_range_lock *br_lck = TALLOC_P(mem_ctx, struct byte_range_lock);
1810
1811         if (br_lck == NULL) {
1812                 return NULL;
1813         }
1814
1815         br_lck->fsp = fsp;
1816         br_lck->num_locks = 0;
1817         br_lck->modified = False;
1818         br_lck->key = fsp->file_id;
1819
1820         key.dptr = (uint8 *)&br_lck->key;
1821         key.dsize = sizeof(struct file_id);
1822
1823         if (!fsp->lockdb_clean) {
1824                 /* We must be read/write to clean
1825                    the dead entries. */
1826                 read_only = False;
1827         }
1828
1829         if (read_only) {
1830                 if (brlock_db->fetch(brlock_db, br_lck, key, &data) == -1) {
1831                         DEBUG(3, ("Could not fetch byte range lock record\n"));
1832                         TALLOC_FREE(br_lck);
1833                         return NULL;
1834                 }
1835                 br_lck->record = NULL;
1836         }
1837         else {
1838                 br_lck->record = brlock_db->fetch_locked(brlock_db, br_lck, key);
1839
1840                 if (br_lck->record == NULL) {
1841                         DEBUG(3, ("Could not lock byte range lock entry\n"));
1842                         TALLOC_FREE(br_lck);
1843                         return NULL;
1844                 }
1845
1846                 data = br_lck->record->value;
1847         }
1848
1849         br_lck->read_only = read_only;
1850         br_lck->lock_data = NULL;
1851
1852         talloc_set_destructor(br_lck, byte_range_lock_destructor);
1853
1854         br_lck->num_locks = data.dsize / sizeof(struct lock_struct);
1855
1856         if (br_lck->num_locks != 0) {
1857                 br_lck->lock_data = SMB_MALLOC_ARRAY(struct lock_struct,
1858                                                      br_lck->num_locks);
1859                 if (br_lck->lock_data == NULL) {
1860                         DEBUG(0, ("malloc failed\n"));
1861                         TALLOC_FREE(br_lck);
1862                         return NULL;
1863                 }
1864
1865                 memcpy(br_lck->lock_data, data.dptr, data.dsize);
1866         }
1867         
1868         if (!fsp->lockdb_clean) {
1869                 int orig_num_locks = br_lck->num_locks;
1870
1871                 /* This is the first time we've accessed this. */
1872                 /* Go through and ensure all entries exist - remove any that don't. */
1873                 /* Makes the lockdb self cleaning at low cost. */
1874
1875                 if (!validate_lock_entries(&br_lck->num_locks,
1876                                            &br_lck->lock_data)) {
1877                         SAFE_FREE(br_lck->lock_data);
1878                         TALLOC_FREE(br_lck);
1879                         return NULL;
1880                 }
1881
1882                 /* Ensure invalid locks are cleaned up in the destructor. */
1883                 if (orig_num_locks != br_lck->num_locks) {
1884                         br_lck->modified = True;
1885                 }
1886
1887                 /* Mark the lockdb as "clean" as seen from this open file. */
1888                 fsp->lockdb_clean = True;
1889         }
1890
1891         if (DEBUGLEVEL >= 10) {
1892                 unsigned int i;
1893                 struct lock_struct *locks = br_lck->lock_data;
1894                 DEBUG(10,("brl_get_locks_internal: %u current locks on file_id %s\n",
1895                         br_lck->num_locks,
1896                           file_id_string_tos(&fsp->file_id)));
1897                 for( i = 0; i < br_lck->num_locks; i++) {
1898                         print_lock_struct(i, &locks[i]);
1899                 }
1900         }
1901         return br_lck;
1902 }
1903
1904 struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx,
1905                                         files_struct *fsp)
1906 {
1907         return brl_get_locks_internal(mem_ctx, fsp, False);
1908 }
1909
1910 struct byte_range_lock *brl_get_locks_readonly(files_struct *fsp)
1911 {
1912         struct byte_range_lock *br_lock;
1913
1914         if (lp_clustering()) {
1915                 return brl_get_locks_internal(talloc_tos(), fsp, true);
1916         }
1917
1918         if ((fsp->brlock_rec != NULL)
1919             && (brlock_db->get_seqnum(brlock_db) == fsp->brlock_seqnum)) {
1920                 return fsp->brlock_rec;
1921         }
1922
1923         TALLOC_FREE(fsp->brlock_rec);
1924
1925         br_lock = brl_get_locks_internal(talloc_tos(), fsp, false);
1926         if (br_lock == NULL) {
1927                 return NULL;
1928         }
1929         fsp->brlock_seqnum = brlock_db->get_seqnum(brlock_db);
1930
1931         fsp->brlock_rec = talloc_zero(fsp, struct byte_range_lock);
1932         if (fsp->brlock_rec == NULL) {
1933                 goto fail;
1934         }
1935         fsp->brlock_rec->fsp = fsp;
1936         fsp->brlock_rec->num_locks = br_lock->num_locks;
1937         fsp->brlock_rec->read_only = true;
1938         fsp->brlock_rec->key = br_lock->key;
1939
1940         fsp->brlock_rec->lock_data = (struct lock_struct *)
1941                 talloc_memdup(fsp->brlock_rec, br_lock->lock_data,
1942                               sizeof(struct lock_struct) * br_lock->num_locks);
1943         if (fsp->brlock_rec->lock_data == NULL) {
1944                 goto fail;
1945         }
1946
1947         TALLOC_FREE(br_lock);
1948         return fsp->brlock_rec;
1949 fail:
1950         TALLOC_FREE(br_lock);
1951         TALLOC_FREE(fsp->brlock_rec);
1952         return NULL;
1953 }
1954
1955 struct brl_revalidate_state {
1956         ssize_t array_size;
1957         uint32 num_pids;
1958         struct server_id *pids;
1959 };
1960
1961 /*
1962  * Collect PIDs of all processes with pending entries
1963  */
1964
1965 static void brl_revalidate_collect(struct file_id id, struct server_id pid,
1966                                    enum brl_type lock_type,
1967                                    enum brl_flavour lock_flav,
1968                                    br_off start, br_off size,
1969                                    void *private_data)
1970 {
1971         struct brl_revalidate_state *state =
1972                 (struct brl_revalidate_state *)private_data;
1973
1974         if (!IS_PENDING_LOCK(lock_type)) {
1975                 return;
1976         }
1977
1978         add_to_large_array(state, sizeof(pid), (void *)&pid,
1979                            &state->pids, &state->num_pids,
1980                            &state->array_size);
1981 }
1982
1983 /*
1984  * qsort callback to sort the processes
1985  */
1986
1987 static int compare_procids(const void *p1, const void *p2)
1988 {
1989         const struct server_id *i1 = (struct server_id *)p1;
1990         const struct server_id *i2 = (struct server_id *)p2;
1991
1992         if (i1->pid < i2->pid) return -1;
1993         if (i2->pid > i2->pid) return 1;
1994         return 0;
1995 }
1996
1997 /*
1998  * Send a MSG_SMB_UNLOCK message to all processes with pending byte range
1999  * locks so that they retry. Mainly used in the cluster code after a node has
2000  * died.
2001  *
2002  * Done in two steps to avoid double-sends: First we collect all entries in an
2003  * array, then qsort that array and only send to non-dupes.
2004  */
2005
2006 static void brl_revalidate(struct messaging_context *msg_ctx,
2007                            void *private_data,
2008                            uint32_t msg_type,
2009                            struct server_id server_id,
2010                            DATA_BLOB *data)
2011 {
2012         struct brl_revalidate_state *state;
2013         uint32 i;
2014         struct server_id last_pid;
2015
2016         if (!(state = TALLOC_ZERO_P(NULL, struct brl_revalidate_state))) {
2017                 DEBUG(0, ("talloc failed\n"));
2018                 return;
2019         }
2020
2021         brl_forall(brl_revalidate_collect, state);
2022
2023         if (state->array_size == -1) {
2024                 DEBUG(0, ("talloc failed\n"));
2025                 goto done;
2026         }
2027
2028         if (state->num_pids == 0) {
2029                 goto done;
2030         }
2031
2032         TYPESAFE_QSORT(state->pids, state->num_pids, compare_procids);
2033
2034         ZERO_STRUCT(last_pid);
2035
2036         for (i=0; i<state->num_pids; i++) {
2037                 if (procid_equal(&last_pid, &state->pids[i])) {
2038                         /*
2039                          * We've seen that one already
2040                          */
2041                         continue;
2042                 }
2043
2044                 messaging_send(msg_ctx, state->pids[i], MSG_SMB_UNLOCK,
2045                                &data_blob_null);
2046                 last_pid = state->pids[i];
2047         }
2048
2049  done:
2050         TALLOC_FREE(state);
2051         return;
2052 }
2053
2054 void brl_register_msgs(struct messaging_context *msg_ctx)
2055 {
2056         messaging_register(msg_ctx, NULL, MSG_SMB_BRL_VALIDATE,
2057                            brl_revalidate);
2058 }