s3: only include gen_ndr headers where needed.
[kai/samba.git] / source3 / locking / brlock.c
1 /* 
2    Unix SMB/CIFS implementation.
3    byte range locking code
4    Updated to handle range splits/merges.
5
6    Copyright (C) Andrew Tridgell 1992-2000
7    Copyright (C) Jeremy Allison 1992-2000
8    
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 3 of the License, or
12    (at your option) any later version.
13    
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18    
19    You should have received a copy of the GNU General Public License
20    along with this program.  If not, see <http://www.gnu.org/licenses/>.
21 */
22
23 /* This module implements a tdb based byte range locking service,
24    replacing the fcntl() based byte range locking previously
25    used. This allows us to provide the same semantics as NT */
26
27 #include "includes.h"
28 #include "librpc/gen_ndr/messaging.h"
29
30 #undef DBGC_CLASS
31 #define DBGC_CLASS DBGC_LOCKING
32
33 #define ZERO_ZERO 0
34
35 /* The open brlock.tdb database. */
36
37 static struct db_context *brlock_db;
38
39 /****************************************************************************
40  Debug info at level 10 for lock struct.
41 ****************************************************************************/
42
43 static void print_lock_struct(unsigned int i, struct lock_struct *pls)
44 {
45         DEBUG(10,("[%u]: smbpid = %u, tid = %u, pid = %s, ",
46                         i,
47                         (unsigned int)pls->context.smbpid,
48                         (unsigned int)pls->context.tid,
49                         procid_str(talloc_tos(), &pls->context.pid) ));
50         
51         DEBUG(10,("start = %.0f, size = %.0f, fnum = %d, %s %s\n",
52                 (double)pls->start,
53                 (double)pls->size,
54                 pls->fnum,
55                 lock_type_name(pls->lock_type),
56                 lock_flav_name(pls->lock_flav) ));
57 }
58
59 /****************************************************************************
60  See if two locking contexts are equal.
61 ****************************************************************************/
62
63 bool brl_same_context(const struct lock_context *ctx1, 
64                              const struct lock_context *ctx2)
65 {
66         return (procid_equal(&ctx1->pid, &ctx2->pid) &&
67                 (ctx1->smbpid == ctx2->smbpid) &&
68                 (ctx1->tid == ctx2->tid));
69 }
70
71 /****************************************************************************
72  See if lck1 and lck2 overlap.
73 ****************************************************************************/
74
75 static bool brl_overlap(const struct lock_struct *lck1,
76                         const struct lock_struct *lck2)
77 {
78         /* XXX Remove for Win7 compatibility. */
79         /* this extra check is not redundent - it copes with locks
80            that go beyond the end of 64 bit file space */
81         if (lck1->size != 0 &&
82             lck1->start == lck2->start &&
83             lck1->size == lck2->size) {
84                 return True;
85         }
86
87         if (lck1->start >= (lck2->start+lck2->size) ||
88             lck2->start >= (lck1->start+lck1->size)) {
89                 return False;
90         }
91         return True;
92 }
93
94 /****************************************************************************
95  See if lock2 can be added when lock1 is in place.
96 ****************************************************************************/
97
98 static bool brl_conflict(const struct lock_struct *lck1, 
99                          const struct lock_struct *lck2)
100 {
101         /* Ignore PENDING locks. */
102         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
103                 return False;
104
105         /* Read locks never conflict. */
106         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
107                 return False;
108         }
109
110         /* A READ lock can stack on top of a WRITE lock if they have the same
111          * context & fnum. */
112         if (lck1->lock_type == WRITE_LOCK && lck2->lock_type == READ_LOCK &&
113             brl_same_context(&lck1->context, &lck2->context) &&
114             lck1->fnum == lck2->fnum) {
115                 return False;
116         }
117
118         return brl_overlap(lck1, lck2);
119
120
121 /****************************************************************************
122  See if lock2 can be added when lock1 is in place - when both locks are POSIX
123  flavour. POSIX locks ignore fnum - they only care about dev/ino which we
124  know already match.
125 ****************************************************************************/
126
127 static bool brl_conflict_posix(const struct lock_struct *lck1, 
128                                 const struct lock_struct *lck2)
129 {
130 #if defined(DEVELOPER)
131         SMB_ASSERT(lck1->lock_flav == POSIX_LOCK);
132         SMB_ASSERT(lck2->lock_flav == POSIX_LOCK);
133 #endif
134
135         /* Ignore PENDING locks. */
136         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
137                 return False;
138
139         /* Read locks never conflict. */
140         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
141                 return False;
142         }
143
144         /* Locks on the same context con't conflict. Ignore fnum. */
145         if (brl_same_context(&lck1->context, &lck2->context)) {
146                 return False;
147         }
148
149         /* One is read, the other write, or the context is different,
150            do they overlap ? */
151         return brl_overlap(lck1, lck2);
152
153
154 #if ZERO_ZERO
155 static bool brl_conflict1(const struct lock_struct *lck1, 
156                          const struct lock_struct *lck2)
157 {
158         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
159                 return False;
160
161         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
162                 return False;
163         }
164
165         if (brl_same_context(&lck1->context, &lck2->context) &&
166             lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
167                 return False;
168         }
169
170         if (lck2->start == 0 && lck2->size == 0 && lck1->size != 0) {
171                 return True;
172         }
173
174         if (lck1->start >= (lck2->start + lck2->size) ||
175             lck2->start >= (lck1->start + lck1->size)) {
176                 return False;
177         }
178             
179         return True;
180
181 #endif
182
183 /****************************************************************************
184  Check to see if this lock conflicts, but ignore our own locks on the
185  same fnum only. This is the read/write lock check code path.
186  This is never used in the POSIX lock case.
187 ****************************************************************************/
188
189 static bool brl_conflict_other(const struct lock_struct *lck1, const struct lock_struct *lck2)
190 {
191         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
192                 return False;
193
194         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) 
195                 return False;
196
197         /* POSIX flavour locks never conflict here - this is only called
198            in the read/write path. */
199
200         if (lck1->lock_flav == POSIX_LOCK && lck2->lock_flav == POSIX_LOCK)
201                 return False;
202
203         /*
204          * Incoming WRITE locks conflict with existing READ locks even
205          * if the context is the same. JRA. See LOCKTEST7 in smbtorture.
206          */
207
208         if (!(lck2->lock_type == WRITE_LOCK && lck1->lock_type == READ_LOCK)) {
209                 if (brl_same_context(&lck1->context, &lck2->context) &&
210                                         lck1->fnum == lck2->fnum)
211                         return False;
212         }
213
214         return brl_overlap(lck1, lck2);
215
216
217 /****************************************************************************
218  Check if an unlock overlaps a pending lock.
219 ****************************************************************************/
220
221 static bool brl_pending_overlap(const struct lock_struct *lock, const struct lock_struct *pend_lock)
222 {
223         if ((lock->start <= pend_lock->start) && (lock->start + lock->size > pend_lock->start))
224                 return True;
225         if ((lock->start >= pend_lock->start) && (lock->start <= pend_lock->start + pend_lock->size))
226                 return True;
227         return False;
228 }
229
230 /****************************************************************************
231  Amazingly enough, w2k3 "remembers" whether the last lock failure on a fnum
232  is the same as this one and changes its error code. I wonder if any
233  app depends on this ?
234 ****************************************************************************/
235
236 NTSTATUS brl_lock_failed(files_struct *fsp, const struct lock_struct *lock, bool blocking_lock)
237 {
238         if (lock->start >= 0xEF000000 && (lock->start >> 63) == 0) {
239                 /* amazing the little things you learn with a test
240                    suite. Locks beyond this offset (as a 64 bit
241                    number!) always generate the conflict error code,
242                    unless the top bit is set */
243                 if (!blocking_lock) {
244                         fsp->last_lock_failure = *lock;
245                 }
246                 return NT_STATUS_FILE_LOCK_CONFLICT;
247         }
248
249         if (procid_equal(&lock->context.pid, &fsp->last_lock_failure.context.pid) &&
250                         lock->context.tid == fsp->last_lock_failure.context.tid &&
251                         lock->fnum == fsp->last_lock_failure.fnum &&
252                         lock->start == fsp->last_lock_failure.start) {
253                 return NT_STATUS_FILE_LOCK_CONFLICT;
254         }
255
256         if (!blocking_lock) {
257                 fsp->last_lock_failure = *lock;
258         }
259         return NT_STATUS_LOCK_NOT_GRANTED;
260 }
261
262 /****************************************************************************
263  Open up the brlock.tdb database.
264 ****************************************************************************/
265
266 void brl_init(bool read_only)
267 {
268         int tdb_flags;
269
270         if (brlock_db) {
271                 return;
272         }
273
274         tdb_flags = TDB_DEFAULT|TDB_VOLATILE|TDB_CLEAR_IF_FIRST;
275
276         if (!lp_clustering()) {
277                 /*
278                  * We can't use the SEQNUM trick to cache brlock
279                  * entries in the clustering case because ctdb seqnum
280                  * propagation has a delay.
281                  */
282                 tdb_flags |= TDB_SEQNUM;
283         }
284
285         brlock_db = db_open(NULL, lock_path("brlock.tdb"),
286                             lp_open_files_db_hash_size(), tdb_flags,
287                             read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644 );
288         if (!brlock_db) {
289                 DEBUG(0,("Failed to open byte range locking database %s\n",
290                         lock_path("brlock.tdb")));
291                 return;
292         }
293 }
294
295 /****************************************************************************
296  Close down the brlock.tdb database.
297 ****************************************************************************/
298
299 void brl_shutdown(void)
300 {
301         TALLOC_FREE(brlock_db);
302 }
303
304 #if ZERO_ZERO
305 /****************************************************************************
306  Compare two locks for sorting.
307 ****************************************************************************/
308
309 static int lock_compare(const struct lock_struct *lck1, 
310                          const struct lock_struct *lck2)
311 {
312         if (lck1->start != lck2->start) {
313                 return (lck1->start - lck2->start);
314         }
315         if (lck2->size != lck1->size) {
316                 return ((int)lck1->size - (int)lck2->size);
317         }
318         return 0;
319 }
320 #endif
321
322 /****************************************************************************
323  Lock a range of bytes - Windows lock semantics.
324 ****************************************************************************/
325
326 NTSTATUS brl_lock_windows_default(struct byte_range_lock *br_lck,
327     struct lock_struct *plock, bool blocking_lock)
328 {
329         unsigned int i;
330         files_struct *fsp = br_lck->fsp;
331         struct lock_struct *locks = br_lck->lock_data;
332         NTSTATUS status;
333
334         SMB_ASSERT(plock->lock_type != UNLOCK_LOCK);
335
336         for (i=0; i < br_lck->num_locks; i++) {
337                 if (locks[i].start + locks[i].size < locks[i].start) {
338                         /* 64-bit wrap. Error. */
339                         return NT_STATUS_INVALID_LOCK_RANGE;
340                 }
341
342                 /* Do any Windows or POSIX locks conflict ? */
343                 if (brl_conflict(&locks[i], plock)) {
344                         /* Remember who blocked us. */
345                         plock->context.smbpid = locks[i].context.smbpid;
346                         return brl_lock_failed(fsp,plock,blocking_lock);
347                 }
348 #if ZERO_ZERO
349                 if (plock->start == 0 && plock->size == 0 && 
350                                 locks[i].size == 0) {
351                         break;
352                 }
353 #endif
354         }
355
356         if (!IS_PENDING_LOCK(plock->lock_type)) {
357                 contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
358         }
359
360         /* We can get the Windows lock, now see if it needs to
361            be mapped into a lower level POSIX one, and if so can
362            we get it ? */
363
364         if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(fsp->conn->params)) {
365                 int errno_ret;
366                 if (!set_posix_lock_windows_flavour(fsp,
367                                 plock->start,
368                                 plock->size,
369                                 plock->lock_type,
370                                 &plock->context,
371                                 locks,
372                                 br_lck->num_locks,
373                                 &errno_ret)) {
374
375                         /* We don't know who blocked us. */
376                         plock->context.smbpid = 0xFFFFFFFF;
377
378                         if (errno_ret == EACCES || errno_ret == EAGAIN) {
379                                 status = NT_STATUS_FILE_LOCK_CONFLICT;
380                                 goto fail;
381                         } else {
382                                 status = map_nt_error_from_unix(errno);
383                                 goto fail;
384                         }
385                 }
386         }
387
388         /* no conflicts - add it to the list of locks */
389         locks = (struct lock_struct *)SMB_REALLOC(locks, (br_lck->num_locks + 1) * sizeof(*locks));
390         if (!locks) {
391                 status = NT_STATUS_NO_MEMORY;
392                 goto fail;
393         }
394
395         memcpy(&locks[br_lck->num_locks], plock, sizeof(struct lock_struct));
396         br_lck->num_locks += 1;
397         br_lck->lock_data = locks;
398         br_lck->modified = True;
399
400         return NT_STATUS_OK;
401  fail:
402         if (!IS_PENDING_LOCK(plock->lock_type)) {
403                 contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
404         }
405         return status;
406 }
407
408 /****************************************************************************
409  Cope with POSIX range splits and merges.
410 ****************************************************************************/
411
412 static unsigned int brlock_posix_split_merge(struct lock_struct *lck_arr,       /* Output array. */
413                                                 struct lock_struct *ex,         /* existing lock. */
414                                                 struct lock_struct *plock)      /* proposed lock. */
415 {
416         bool lock_types_differ = (ex->lock_type != plock->lock_type);
417
418         /* We can't merge non-conflicting locks on different context - ignore fnum. */
419
420         if (!brl_same_context(&ex->context, &plock->context)) {
421                 /* Just copy. */
422                 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
423                 return 1;
424         }
425
426         /* We now know we have the same context. */
427
428         /* Did we overlap ? */
429
430 /*********************************************
431                                         +---------+
432                                         | ex      |
433                                         +---------+
434                          +-------+
435                          | plock |
436                          +-------+
437 OR....
438         +---------+
439         |  ex     |
440         +---------+
441 **********************************************/
442
443         if ( (ex->start > (plock->start + plock->size)) ||
444                 (plock->start > (ex->start + ex->size))) {
445
446                 /* No overlap with this lock - copy existing. */
447
448                 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
449                 return 1;
450         }
451
452 /*********************************************
453         +---------------------------+
454         |          ex               |
455         +---------------------------+
456         +---------------------------+
457         |       plock               | -> replace with plock.
458         +---------------------------+
459 OR
460              +---------------+
461              |       ex      |
462              +---------------+
463         +---------------------------+
464         |       plock               | -> replace with plock.
465         +---------------------------+
466
467 **********************************************/
468
469         if ( (ex->start >= plock->start) &&
470                 (ex->start + ex->size <= plock->start + plock->size) ) {
471
472                 /* Replace - discard existing lock. */
473
474                 return 0;
475         }
476
477 /*********************************************
478 Adjacent after.
479                         +-------+
480                         |  ex   |
481                         +-------+
482         +---------------+
483         |   plock       |
484         +---------------+
485
486 BECOMES....
487         +---------------+-------+
488         |   plock       | ex    | - different lock types.
489         +---------------+-------+
490 OR.... (merge)
491         +-----------------------+
492         |   plock               | - same lock type.
493         +-----------------------+
494 **********************************************/
495
496         if (plock->start + plock->size == ex->start) {
497
498                 /* If the lock types are the same, we merge, if different, we
499                    add the remainder of the old lock. */
500
501                 if (lock_types_differ) {
502                         /* Add existing. */
503                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
504                         return 1;
505                 } else {
506                         /* Merge - adjust incoming lock as we may have more
507                          * merging to come. */
508                         plock->size += ex->size;
509                         return 0;
510                 }
511         }
512
513 /*********************************************
514 Adjacent before.
515         +-------+
516         |  ex   |
517         +-------+
518                 +---------------+
519                 |   plock       |
520                 +---------------+
521 BECOMES....
522         +-------+---------------+
523         | ex    |   plock       | - different lock types
524         +-------+---------------+
525
526 OR.... (merge)
527         +-----------------------+
528         |      plock            | - same lock type.
529         +-----------------------+
530
531 **********************************************/
532
533         if (ex->start + ex->size == plock->start) {
534
535                 /* If the lock types are the same, we merge, if different, we
536                    add the existing lock. */
537
538                 if (lock_types_differ) {
539                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
540                         return 1;
541                 } else {
542                         /* Merge - adjust incoming lock as we may have more
543                          * merging to come. */
544                         plock->start = ex->start;
545                         plock->size += ex->size;
546                         return 0;
547                 }
548         }
549
550 /*********************************************
551 Overlap after.
552         +-----------------------+
553         |          ex           |
554         +-----------------------+
555         +---------------+
556         |   plock       |
557         +---------------+
558 OR
559                +----------------+
560                |       ex       |
561                +----------------+
562         +---------------+
563         |   plock       |
564         +---------------+
565
566 BECOMES....
567         +---------------+-------+
568         |   plock       | ex    | - different lock types.
569         +---------------+-------+
570 OR.... (merge)
571         +-----------------------+
572         |   plock               | - same lock type.
573         +-----------------------+
574 **********************************************/
575
576         if ( (ex->start >= plock->start) &&
577                 (ex->start <= plock->start + plock->size) &&
578                 (ex->start + ex->size > plock->start + plock->size) ) {
579
580                 /* If the lock types are the same, we merge, if different, we
581                    add the remainder of the old lock. */
582
583                 if (lock_types_differ) {
584                         /* Add remaining existing. */
585                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
586                         /* Adjust existing start and size. */
587                         lck_arr[0].start = plock->start + plock->size;
588                         lck_arr[0].size = (ex->start + ex->size) - (plock->start + plock->size);
589                         return 1;
590                 } else {
591                         /* Merge - adjust incoming lock as we may have more
592                          * merging to come. */
593                         plock->size += (ex->start + ex->size) - (plock->start + plock->size);
594                         return 0;
595                 }
596         }
597
598 /*********************************************
599 Overlap before.
600         +-----------------------+
601         |  ex                   |
602         +-----------------------+
603                 +---------------+
604                 |   plock       |
605                 +---------------+
606 OR
607         +-------------+
608         |  ex         |
609         +-------------+
610                 +---------------+
611                 |   plock       |
612                 +---------------+
613
614 BECOMES....
615         +-------+---------------+
616         | ex    |   plock       | - different lock types
617         +-------+---------------+
618
619 OR.... (merge)
620         +-----------------------+
621         |      plock            | - same lock type.
622         +-----------------------+
623
624 **********************************************/
625
626         if ( (ex->start < plock->start) &&
627                         (ex->start + ex->size >= plock->start) &&
628                         (ex->start + ex->size <= plock->start + plock->size) ) {
629
630                 /* If the lock types are the same, we merge, if different, we
631                    add the truncated old lock. */
632
633                 if (lock_types_differ) {
634                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
635                         /* Adjust existing size. */
636                         lck_arr[0].size = plock->start - ex->start;
637                         return 1;
638                 } else {
639                         /* Merge - adjust incoming lock as we may have more
640                          * merging to come. MUST ADJUST plock SIZE FIRST ! */
641                         plock->size += (plock->start - ex->start);
642                         plock->start = ex->start;
643                         return 0;
644                 }
645         }
646
647 /*********************************************
648 Complete overlap.
649         +---------------------------+
650         |        ex                 |
651         +---------------------------+
652                 +---------+
653                 |  plock  |
654                 +---------+
655 BECOMES.....
656         +-------+---------+---------+
657         | ex    |  plock  | ex      | - different lock types.
658         +-------+---------+---------+
659 OR
660         +---------------------------+
661         |        plock              | - same lock type.
662         +---------------------------+
663 **********************************************/
664
665         if ( (ex->start < plock->start) && (ex->start + ex->size > plock->start + plock->size) ) {
666
667                 if (lock_types_differ) {
668
669                         /* We have to split ex into two locks here. */
670
671                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
672                         memcpy(&lck_arr[1], ex, sizeof(struct lock_struct));
673
674                         /* Adjust first existing size. */
675                         lck_arr[0].size = plock->start - ex->start;
676
677                         /* Adjust second existing start and size. */
678                         lck_arr[1].start = plock->start + plock->size;
679                         lck_arr[1].size = (ex->start + ex->size) - (plock->start + plock->size);
680                         return 2;
681                 } else {
682                         /* Just eat the existing locks, merge them into plock. */
683                         plock->start = ex->start;
684                         plock->size = ex->size;
685                         return 0;
686                 }
687         }
688
689         /* Never get here. */
690         smb_panic("brlock_posix_split_merge");
691         /* Notreached. */
692
693         /* Keep some compilers happy. */
694         return 0;
695 }
696
697 /****************************************************************************
698  Lock a range of bytes - POSIX lock semantics.
699  We must cope with range splits and merges.
700 ****************************************************************************/
701
702 static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
703                                struct byte_range_lock *br_lck,
704                                struct lock_struct *plock)
705 {
706         unsigned int i, count, posix_count;
707         struct lock_struct *locks = br_lck->lock_data;
708         struct lock_struct *tp;
709         bool signal_pending_read = False;
710         bool break_oplocks = false;
711         NTSTATUS status;
712
713         /* No zero-zero locks for POSIX. */
714         if (plock->start == 0 && plock->size == 0) {
715                 return NT_STATUS_INVALID_PARAMETER;
716         }
717
718         /* Don't allow 64-bit lock wrap. */
719         if (plock->start + plock->size < plock->start ||
720                         plock->start + plock->size < plock->size) {
721                 return NT_STATUS_INVALID_PARAMETER;
722         }
723
724         /* The worst case scenario here is we have to split an
725            existing POSIX lock range into two, and add our lock,
726            so we need at most 2 more entries. */
727
728         tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 2));
729         if (!tp) {
730                 return NT_STATUS_NO_MEMORY;
731         }
732
733         count = posix_count = 0;
734
735         for (i=0; i < br_lck->num_locks; i++) {
736                 struct lock_struct *curr_lock = &locks[i];
737
738                 /* If we have a pending read lock, a lock downgrade should
739                    trigger a lock re-evaluation. */
740                 if (curr_lock->lock_type == PENDING_READ_LOCK &&
741                                 brl_pending_overlap(plock, curr_lock)) {
742                         signal_pending_read = True;
743                 }
744
745                 if (curr_lock->lock_flav == WINDOWS_LOCK) {
746                         /* Do any Windows flavour locks conflict ? */
747                         if (brl_conflict(curr_lock, plock)) {
748                                 /* No games with error messages. */
749                                 SAFE_FREE(tp);
750                                 /* Remember who blocked us. */
751                                 plock->context.smbpid = curr_lock->context.smbpid;
752                                 return NT_STATUS_FILE_LOCK_CONFLICT;
753                         }
754                         /* Just copy the Windows lock into the new array. */
755                         memcpy(&tp[count], curr_lock, sizeof(struct lock_struct));
756                         count++;
757                 } else {
758                         unsigned int tmp_count = 0;
759
760                         /* POSIX conflict semantics are different. */
761                         if (brl_conflict_posix(curr_lock, plock)) {
762                                 /* Can't block ourselves with POSIX locks. */
763                                 /* No games with error messages. */
764                                 SAFE_FREE(tp);
765                                 /* Remember who blocked us. */
766                                 plock->context.smbpid = curr_lock->context.smbpid;
767                                 return NT_STATUS_FILE_LOCK_CONFLICT;
768                         }
769
770                         /* Work out overlaps. */
771                         tmp_count += brlock_posix_split_merge(&tp[count], curr_lock, plock);
772                         posix_count += tmp_count;
773                         count += tmp_count;
774                 }
775         }
776
777         /*
778          * Break oplocks while we hold a brl. Since lock() and unlock() calls
779          * are not symetric with POSIX semantics, we cannot guarantee our
780          * contend_level2_oplocks_begin/end calls will be acquired and
781          * released one-for-one as with Windows semantics. Therefore we only
782          * call contend_level2_oplocks_begin if this is the first POSIX brl on
783          * the file.
784          */
785         break_oplocks = (!IS_PENDING_LOCK(plock->lock_type) &&
786                          posix_count == 0);
787         if (break_oplocks) {
788                 contend_level2_oplocks_begin(br_lck->fsp,
789                                              LEVEL2_CONTEND_POSIX_BRL);
790         }
791
792         /* Try and add the lock in order, sorted by lock start. */
793         for (i=0; i < count; i++) {
794                 struct lock_struct *curr_lock = &tp[i];
795
796                 if (curr_lock->start <= plock->start) {
797                         continue;
798                 }
799         }
800
801         if (i < count) {
802                 memmove(&tp[i+1], &tp[i],
803                         (count - i)*sizeof(struct lock_struct));
804         }
805         memcpy(&tp[i], plock, sizeof(struct lock_struct));
806         count++;
807
808         /* We can get the POSIX lock, now see if it needs to
809            be mapped into a lower level POSIX one, and if so can
810            we get it ? */
811
812         if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(br_lck->fsp->conn->params)) {
813                 int errno_ret;
814
815                 /* The lower layer just needs to attempt to
816                    get the system POSIX lock. We've weeded out
817                    any conflicts above. */
818
819                 if (!set_posix_lock_posix_flavour(br_lck->fsp,
820                                 plock->start,
821                                 plock->size,
822                                 plock->lock_type,
823                                 &errno_ret)) {
824
825                         /* We don't know who blocked us. */
826                         plock->context.smbpid = 0xFFFFFFFF;
827
828                         if (errno_ret == EACCES || errno_ret == EAGAIN) {
829                                 SAFE_FREE(tp);
830                                 status = NT_STATUS_FILE_LOCK_CONFLICT;
831                                 goto fail;
832                         } else {
833                                 SAFE_FREE(tp);
834                                 status = map_nt_error_from_unix(errno);
835                                 goto fail;
836                         }
837                 }
838         }
839
840         /* If we didn't use all the allocated size,
841          * Realloc so we don't leak entries per lock call. */
842         if (count < br_lck->num_locks + 2) {
843                 tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
844                 if (!tp) {
845                         status = NT_STATUS_NO_MEMORY;
846                         goto fail;
847                 }
848         }
849
850         br_lck->num_locks = count;
851         SAFE_FREE(br_lck->lock_data);
852         br_lck->lock_data = tp;
853         locks = tp;
854         br_lck->modified = True;
855
856         /* A successful downgrade from write to read lock can trigger a lock
857            re-evalutation where waiting readers can now proceed. */
858
859         if (signal_pending_read) {
860                 /* Send unlock messages to any pending read waiters that overlap. */
861                 for (i=0; i < br_lck->num_locks; i++) {
862                         struct lock_struct *pend_lock = &locks[i];
863
864                         /* Ignore non-pending locks. */
865                         if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
866                                 continue;
867                         }
868
869                         if (pend_lock->lock_type == PENDING_READ_LOCK &&
870                                         brl_pending_overlap(plock, pend_lock)) {
871                                 DEBUG(10,("brl_lock_posix: sending unlock message to pid %s\n",
872                                         procid_str_static(&pend_lock->context.pid )));
873
874                                 messaging_send(msg_ctx, pend_lock->context.pid,
875                                                MSG_SMB_UNLOCK, &data_blob_null);
876                         }
877                 }
878         }
879
880         return NT_STATUS_OK;
881  fail:
882         if (break_oplocks) {
883                 contend_level2_oplocks_end(br_lck->fsp,
884                                            LEVEL2_CONTEND_POSIX_BRL);
885         }
886         return status;
887 }
888
889 NTSTATUS smb_vfs_call_brl_lock_windows(struct vfs_handle_struct *handle,
890                                        struct byte_range_lock *br_lck,
891                                        struct lock_struct *plock,
892                                        bool blocking_lock,
893                                        struct blocking_lock_record *blr)
894 {
895         VFS_FIND(brl_lock_windows);
896         return handle->fns->brl_lock_windows(handle, br_lck, plock,
897                                              blocking_lock, blr);
898 }
899
900 /****************************************************************************
901  Lock a range of bytes.
902 ****************************************************************************/
903
904 NTSTATUS brl_lock(struct messaging_context *msg_ctx,
905                 struct byte_range_lock *br_lck,
906                 uint32 smbpid,
907                 struct server_id pid,
908                 br_off start,
909                 br_off size, 
910                 enum brl_type lock_type,
911                 enum brl_flavour lock_flav,
912                 bool blocking_lock,
913                 uint32 *psmbpid,
914                 struct blocking_lock_record *blr)
915 {
916         NTSTATUS ret;
917         struct lock_struct lock;
918
919 #if !ZERO_ZERO
920         if (start == 0 && size == 0) {
921                 DEBUG(0,("client sent 0/0 lock - please report this\n"));
922         }
923 #endif
924
925 #ifdef DEVELOPER
926         /* Quieten valgrind on test. */
927         memset(&lock, '\0', sizeof(lock));
928 #endif
929
930         lock.context.smbpid = smbpid;
931         lock.context.pid = pid;
932         lock.context.tid = br_lck->fsp->conn->cnum;
933         lock.start = start;
934         lock.size = size;
935         lock.fnum = br_lck->fsp->fnum;
936         lock.lock_type = lock_type;
937         lock.lock_flav = lock_flav;
938
939         if (lock_flav == WINDOWS_LOCK) {
940                 ret = SMB_VFS_BRL_LOCK_WINDOWS(br_lck->fsp->conn, br_lck,
941                     &lock, blocking_lock, blr);
942         } else {
943                 ret = brl_lock_posix(msg_ctx, br_lck, &lock);
944         }
945
946 #if ZERO_ZERO
947         /* sort the lock list */
948         TYPESAFE_QSORT(br_lck->lock_data, (size_t)br_lck->num_locks, lock_compare);
949 #endif
950
951         /* If we're returning an error, return who blocked us. */
952         if (!NT_STATUS_IS_OK(ret) && psmbpid) {
953                 *psmbpid = lock.context.smbpid;
954         }
955         return ret;
956 }
957
958 /****************************************************************************
959  Unlock a range of bytes - Windows semantics.
960 ****************************************************************************/
961
962 bool brl_unlock_windows_default(struct messaging_context *msg_ctx,
963                                struct byte_range_lock *br_lck,
964                                const struct lock_struct *plock)
965 {
966         unsigned int i, j;
967         struct lock_struct *locks = br_lck->lock_data;
968         enum brl_type deleted_lock_type = READ_LOCK; /* shut the compiler up.... */
969
970         SMB_ASSERT(plock->lock_type == UNLOCK_LOCK);
971
972 #if ZERO_ZERO
973         /* Delete write locks by preference... The lock list
974            is sorted in the zero zero case. */
975
976         for (i = 0; i < br_lck->num_locks; i++) {
977                 struct lock_struct *lock = &locks[i];
978
979                 if (lock->lock_type == WRITE_LOCK &&
980                     brl_same_context(&lock->context, &plock->context) &&
981                     lock->fnum == plock->fnum &&
982                     lock->lock_flav == WINDOWS_LOCK &&
983                     lock->start == plock->start &&
984                     lock->size == plock->size) {
985
986                         /* found it - delete it */
987                         deleted_lock_type = lock->lock_type;
988                         break;
989                 }
990         }
991
992         if (i != br_lck->num_locks) {
993                 /* We found it - don't search again. */
994                 goto unlock_continue;
995         }
996 #endif
997
998         for (i = 0; i < br_lck->num_locks; i++) {
999                 struct lock_struct *lock = &locks[i];
1000
1001                 /* Only remove our own locks that match in start, size, and flavour. */
1002                 if (brl_same_context(&lock->context, &plock->context) &&
1003                                         lock->fnum == plock->fnum &&
1004                                         lock->lock_flav == WINDOWS_LOCK &&
1005                                         lock->start == plock->start &&
1006                                         lock->size == plock->size ) {
1007                         deleted_lock_type = lock->lock_type;
1008                         break;
1009                 }
1010         }
1011
1012         if (i == br_lck->num_locks) {
1013                 /* we didn't find it */
1014                 return False;
1015         }
1016
1017 #if ZERO_ZERO
1018   unlock_continue:
1019 #endif
1020
1021         /* Actually delete the lock. */
1022         if (i < br_lck->num_locks - 1) {
1023                 memmove(&locks[i], &locks[i+1], 
1024                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1025         }
1026
1027         br_lck->num_locks -= 1;
1028         br_lck->modified = True;
1029
1030         /* Unlock the underlying POSIX regions. */
1031         if(lp_posix_locking(br_lck->fsp->conn->params)) {
1032                 release_posix_lock_windows_flavour(br_lck->fsp,
1033                                 plock->start,
1034                                 plock->size,
1035                                 deleted_lock_type,
1036                                 &plock->context,
1037                                 locks,
1038                                 br_lck->num_locks);
1039         }
1040
1041         /* Send unlock messages to any pending waiters that overlap. */
1042         for (j=0; j < br_lck->num_locks; j++) {
1043                 struct lock_struct *pend_lock = &locks[j];
1044
1045                 /* Ignore non-pending locks. */
1046                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1047                         continue;
1048                 }
1049
1050                 /* We could send specific lock info here... */
1051                 if (brl_pending_overlap(plock, pend_lock)) {
1052                         DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
1053                                 procid_str_static(&pend_lock->context.pid )));
1054
1055                         messaging_send(msg_ctx, pend_lock->context.pid,
1056                                        MSG_SMB_UNLOCK, &data_blob_null);
1057                 }
1058         }
1059
1060         contend_level2_oplocks_end(br_lck->fsp, LEVEL2_CONTEND_WINDOWS_BRL);
1061         return True;
1062 }
1063
1064 /****************************************************************************
1065  Unlock a range of bytes - POSIX semantics.
1066 ****************************************************************************/
1067
1068 static bool brl_unlock_posix(struct messaging_context *msg_ctx,
1069                              struct byte_range_lock *br_lck,
1070                              struct lock_struct *plock)
1071 {
1072         unsigned int i, j, count;
1073         struct lock_struct *tp;
1074         struct lock_struct *locks = br_lck->lock_data;
1075         bool overlap_found = False;
1076
1077         /* No zero-zero locks for POSIX. */
1078         if (plock->start == 0 && plock->size == 0) {
1079                 return False;
1080         }
1081
1082         /* Don't allow 64-bit lock wrap. */
1083         if (plock->start + plock->size < plock->start ||
1084                         plock->start + plock->size < plock->size) {
1085                 DEBUG(10,("brl_unlock_posix: lock wrap\n"));
1086                 return False;
1087         }
1088
1089         /* The worst case scenario here is we have to split an
1090            existing POSIX lock range into two, so we need at most
1091            1 more entry. */
1092
1093         tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 1));
1094         if (!tp) {
1095                 DEBUG(10,("brl_unlock_posix: malloc fail\n"));
1096                 return False;
1097         }
1098
1099         count = 0;
1100         for (i = 0; i < br_lck->num_locks; i++) {
1101                 struct lock_struct *lock = &locks[i];
1102                 unsigned int tmp_count;
1103
1104                 /* Only remove our own locks - ignore fnum. */
1105                 if (IS_PENDING_LOCK(lock->lock_type) ||
1106                                 !brl_same_context(&lock->context, &plock->context)) {
1107                         memcpy(&tp[count], lock, sizeof(struct lock_struct));
1108                         count++;
1109                         continue;
1110                 }
1111
1112                 if (lock->lock_flav == WINDOWS_LOCK) {
1113                         /* Do any Windows flavour locks conflict ? */
1114                         if (brl_conflict(lock, plock)) {
1115                                 SAFE_FREE(tp);
1116                                 return false;
1117                         }
1118                         /* Just copy the Windows lock into the new array. */
1119                         memcpy(&tp[count], lock, sizeof(struct lock_struct));
1120                         count++;
1121                         continue;
1122                 }
1123
1124                 /* Work out overlaps. */
1125                 tmp_count = brlock_posix_split_merge(&tp[count], lock, plock);
1126
1127                 if (tmp_count == 0) {
1128                         /* plock overlapped the existing lock completely,
1129                            or replaced it. Don't copy the existing lock. */
1130                         overlap_found = true;
1131                 } else if (tmp_count == 1) {
1132                         /* Either no overlap, (simple copy of existing lock) or
1133                          * an overlap of an existing lock. */
1134                         /* If the lock changed size, we had an overlap. */
1135                         if (tp[count].size != lock->size) {
1136                                 overlap_found = true;
1137                         }
1138                         count += tmp_count;
1139                 } else if (tmp_count == 2) {
1140                         /* We split a lock range in two. */
1141                         overlap_found = true;
1142                         count += tmp_count;
1143
1144                         /* Optimisation... */
1145                         /* We know we're finished here as we can't overlap any
1146                            more POSIX locks. Copy the rest of the lock array. */
1147
1148                         if (i < br_lck->num_locks - 1) {
1149                                 memcpy(&tp[count], &locks[i+1],
1150                                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1151                                 count += ((br_lck->num_locks-1) - i);
1152                         }
1153                         break;
1154                 }
1155
1156         }
1157
1158         if (!overlap_found) {
1159                 /* Just ignore - no change. */
1160                 SAFE_FREE(tp);
1161                 DEBUG(10,("brl_unlock_posix: No overlap - unlocked.\n"));
1162                 return True;
1163         }
1164
1165         /* Unlock any POSIX regions. */
1166         if(lp_posix_locking(br_lck->fsp->conn->params)) {
1167                 release_posix_lock_posix_flavour(br_lck->fsp,
1168                                                 plock->start,
1169                                                 plock->size,
1170                                                 &plock->context,
1171                                                 tp,
1172                                                 count);
1173         }
1174
1175         /* Realloc so we don't leak entries per unlock call. */
1176         if (count) {
1177                 tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
1178                 if (!tp) {
1179                         DEBUG(10,("brl_unlock_posix: realloc fail\n"));
1180                         return False;
1181                 }
1182         } else {
1183                 /* We deleted the last lock. */
1184                 SAFE_FREE(tp);
1185                 tp = NULL;
1186         }
1187
1188         contend_level2_oplocks_end(br_lck->fsp,
1189                                    LEVEL2_CONTEND_POSIX_BRL);
1190
1191         br_lck->num_locks = count;
1192         SAFE_FREE(br_lck->lock_data);
1193         locks = tp;
1194         br_lck->lock_data = tp;
1195         br_lck->modified = True;
1196
1197         /* Send unlock messages to any pending waiters that overlap. */
1198
1199         for (j=0; j < br_lck->num_locks; j++) {
1200                 struct lock_struct *pend_lock = &locks[j];
1201
1202                 /* Ignore non-pending locks. */
1203                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1204                         continue;
1205                 }
1206
1207                 /* We could send specific lock info here... */
1208                 if (brl_pending_overlap(plock, pend_lock)) {
1209                         DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
1210                                 procid_str_static(&pend_lock->context.pid )));
1211
1212                         messaging_send(msg_ctx, pend_lock->context.pid,
1213                                        MSG_SMB_UNLOCK, &data_blob_null);
1214                 }
1215         }
1216
1217         return True;
1218 }
1219
1220 bool smb_vfs_call_brl_unlock_windows(struct vfs_handle_struct *handle,
1221                                      struct messaging_context *msg_ctx,
1222                                      struct byte_range_lock *br_lck,
1223                                      const struct lock_struct *plock)
1224 {
1225         VFS_FIND(brl_unlock_windows);
1226         return handle->fns->brl_unlock_windows(handle, msg_ctx, br_lck, plock);
1227 }
1228
1229 /****************************************************************************
1230  Unlock a range of bytes.
1231 ****************************************************************************/
1232
1233 bool brl_unlock(struct messaging_context *msg_ctx,
1234                 struct byte_range_lock *br_lck,
1235                 uint32 smbpid,
1236                 struct server_id pid,
1237                 br_off start,
1238                 br_off size,
1239                 enum brl_flavour lock_flav)
1240 {
1241         struct lock_struct lock;
1242
1243         lock.context.smbpid = smbpid;
1244         lock.context.pid = pid;
1245         lock.context.tid = br_lck->fsp->conn->cnum;
1246         lock.start = start;
1247         lock.size = size;
1248         lock.fnum = br_lck->fsp->fnum;
1249         lock.lock_type = UNLOCK_LOCK;
1250         lock.lock_flav = lock_flav;
1251
1252         if (lock_flav == WINDOWS_LOCK) {
1253                 return SMB_VFS_BRL_UNLOCK_WINDOWS(br_lck->fsp->conn, msg_ctx,
1254                     br_lck, &lock);
1255         } else {
1256                 return brl_unlock_posix(msg_ctx, br_lck, &lock);
1257         }
1258 }
1259
1260 /****************************************************************************
1261  Test if we could add a lock if we wanted to.
1262  Returns True if the region required is currently unlocked, False if locked.
1263 ****************************************************************************/
1264
1265 bool brl_locktest(struct byte_range_lock *br_lck,
1266                 uint32 smbpid,
1267                 struct server_id pid,
1268                 br_off start,
1269                 br_off size, 
1270                 enum brl_type lock_type,
1271                 enum brl_flavour lock_flav)
1272 {
1273         bool ret = True;
1274         unsigned int i;
1275         struct lock_struct lock;
1276         const struct lock_struct *locks = br_lck->lock_data;
1277         files_struct *fsp = br_lck->fsp;
1278
1279         lock.context.smbpid = smbpid;
1280         lock.context.pid = pid;
1281         lock.context.tid = br_lck->fsp->conn->cnum;
1282         lock.start = start;
1283         lock.size = size;
1284         lock.fnum = fsp->fnum;
1285         lock.lock_type = lock_type;
1286         lock.lock_flav = lock_flav;
1287
1288         /* Make sure existing locks don't conflict */
1289         for (i=0; i < br_lck->num_locks; i++) {
1290                 /*
1291                  * Our own locks don't conflict.
1292                  */
1293                 if (brl_conflict_other(&locks[i], &lock)) {
1294                         return False;
1295                 }
1296         }
1297
1298         /*
1299          * There is no lock held by an SMB daemon, check to
1300          * see if there is a POSIX lock from a UNIX or NFS process.
1301          * This only conflicts with Windows locks, not POSIX locks.
1302          */
1303
1304         if(lp_posix_locking(fsp->conn->params) && (lock_flav == WINDOWS_LOCK)) {
1305                 ret = is_posix_locked(fsp, &start, &size, &lock_type, WINDOWS_LOCK);
1306
1307                 DEBUG(10,("brl_locktest: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
1308                         (double)start, (double)size, ret ? "locked" : "unlocked",
1309                         fsp->fnum, fsp_str_dbg(fsp)));
1310
1311                 /* We need to return the inverse of is_posix_locked. */
1312                 ret = !ret;
1313         }
1314
1315         /* no conflicts - we could have added it */
1316         return ret;
1317 }
1318
1319 /****************************************************************************
1320  Query for existing locks.
1321 ****************************************************************************/
1322
1323 NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
1324                 uint32 *psmbpid,
1325                 struct server_id pid,
1326                 br_off *pstart,
1327                 br_off *psize, 
1328                 enum brl_type *plock_type,
1329                 enum brl_flavour lock_flav)
1330 {
1331         unsigned int i;
1332         struct lock_struct lock;
1333         const struct lock_struct *locks = br_lck->lock_data;
1334         files_struct *fsp = br_lck->fsp;
1335
1336         lock.context.smbpid = *psmbpid;
1337         lock.context.pid = pid;
1338         lock.context.tid = br_lck->fsp->conn->cnum;
1339         lock.start = *pstart;
1340         lock.size = *psize;
1341         lock.fnum = fsp->fnum;
1342         lock.lock_type = *plock_type;
1343         lock.lock_flav = lock_flav;
1344
1345         /* Make sure existing locks don't conflict */
1346         for (i=0; i < br_lck->num_locks; i++) {
1347                 const struct lock_struct *exlock = &locks[i];
1348                 bool conflict = False;
1349
1350                 if (exlock->lock_flav == WINDOWS_LOCK) {
1351                         conflict = brl_conflict(exlock, &lock);
1352                 } else {        
1353                         conflict = brl_conflict_posix(exlock, &lock);
1354                 }
1355
1356                 if (conflict) {
1357                         *psmbpid = exlock->context.smbpid;
1358                         *pstart = exlock->start;
1359                         *psize = exlock->size;
1360                         *plock_type = exlock->lock_type;
1361                         return NT_STATUS_LOCK_NOT_GRANTED;
1362                 }
1363         }
1364
1365         /*
1366          * There is no lock held by an SMB daemon, check to
1367          * see if there is a POSIX lock from a UNIX or NFS process.
1368          */
1369
1370         if(lp_posix_locking(fsp->conn->params)) {
1371                 bool ret = is_posix_locked(fsp, pstart, psize, plock_type, POSIX_LOCK);
1372
1373                 DEBUG(10,("brl_lockquery: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
1374                         (double)*pstart, (double)*psize, ret ? "locked" : "unlocked",
1375                         fsp->fnum, fsp_str_dbg(fsp)));
1376
1377                 if (ret) {
1378                         /* Hmmm. No clue what to set smbpid to - use -1. */
1379                         *psmbpid = 0xFFFF;
1380                         return NT_STATUS_LOCK_NOT_GRANTED;
1381                 }
1382         }
1383
1384         return NT_STATUS_OK;
1385 }
1386
1387
1388 bool smb_vfs_call_brl_cancel_windows(struct vfs_handle_struct *handle,
1389                                      struct byte_range_lock *br_lck,
1390                                      struct lock_struct *plock,
1391                                      struct blocking_lock_record *blr)
1392 {
1393         VFS_FIND(brl_cancel_windows);
1394         return handle->fns->brl_cancel_windows(handle, br_lck, plock, blr);
1395 }
1396
1397 /****************************************************************************
1398  Remove a particular pending lock.
1399 ****************************************************************************/
1400 bool brl_lock_cancel(struct byte_range_lock *br_lck,
1401                 uint32 smbpid,
1402                 struct server_id pid,
1403                 br_off start,
1404                 br_off size,
1405                 enum brl_flavour lock_flav,
1406                 struct blocking_lock_record *blr)
1407 {
1408         bool ret;
1409         struct lock_struct lock;
1410
1411         lock.context.smbpid = smbpid;
1412         lock.context.pid = pid;
1413         lock.context.tid = br_lck->fsp->conn->cnum;
1414         lock.start = start;
1415         lock.size = size;
1416         lock.fnum = br_lck->fsp->fnum;
1417         lock.lock_flav = lock_flav;
1418         /* lock.lock_type doesn't matter */
1419
1420         if (lock_flav == WINDOWS_LOCK) {
1421                 ret = SMB_VFS_BRL_CANCEL_WINDOWS(br_lck->fsp->conn, br_lck,
1422                     &lock, blr);
1423         } else {
1424                 ret = brl_lock_cancel_default(br_lck, &lock);
1425         }
1426
1427         return ret;
1428 }
1429
1430 bool brl_lock_cancel_default(struct byte_range_lock *br_lck,
1431                 struct lock_struct *plock)
1432 {
1433         unsigned int i;
1434         struct lock_struct *locks = br_lck->lock_data;
1435
1436         SMB_ASSERT(plock);
1437
1438         for (i = 0; i < br_lck->num_locks; i++) {
1439                 struct lock_struct *lock = &locks[i];
1440
1441                 /* For pending locks we *always* care about the fnum. */
1442                 if (brl_same_context(&lock->context, &plock->context) &&
1443                                 lock->fnum == plock->fnum &&
1444                                 IS_PENDING_LOCK(lock->lock_type) &&
1445                                 lock->lock_flav == plock->lock_flav &&
1446                                 lock->start == plock->start &&
1447                                 lock->size == plock->size) {
1448                         break;
1449                 }
1450         }
1451
1452         if (i == br_lck->num_locks) {
1453                 /* Didn't find it. */
1454                 return False;
1455         }
1456
1457         if (i < br_lck->num_locks - 1) {
1458                 /* Found this particular pending lock - delete it */
1459                 memmove(&locks[i], &locks[i+1], 
1460                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1461         }
1462
1463         br_lck->num_locks -= 1;
1464         br_lck->modified = True;
1465         return True;
1466 }
1467
1468 /****************************************************************************
1469  Remove any locks associated with a open file.
1470  We return True if this process owns any other Windows locks on this
1471  fd and so we should not immediately close the fd.
1472 ****************************************************************************/
1473
1474 void brl_close_fnum(struct messaging_context *msg_ctx,
1475                     struct byte_range_lock *br_lck)
1476 {
1477         files_struct *fsp = br_lck->fsp;
1478         uint16 tid = fsp->conn->cnum;
1479         int fnum = fsp->fnum;
1480         unsigned int i, j, dcount=0;
1481         int num_deleted_windows_locks = 0;
1482         struct lock_struct *locks = br_lck->lock_data;
1483         struct server_id pid = procid_self();
1484         bool unlock_individually = False;
1485         bool posix_level2_contention_ended = false;
1486
1487         if(lp_posix_locking(fsp->conn->params)) {
1488
1489                 /* Check if there are any Windows locks associated with this dev/ino
1490                    pair that are not this fnum. If so we need to call unlock on each
1491                    one in order to release the system POSIX locks correctly. */
1492
1493                 for (i=0; i < br_lck->num_locks; i++) {
1494                         struct lock_struct *lock = &locks[i];
1495
1496                         if (!procid_equal(&lock->context.pid, &pid)) {
1497                                 continue;
1498                         }
1499
1500                         if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
1501                                 continue; /* Ignore pending. */
1502                         }
1503
1504                         if (lock->context.tid != tid || lock->fnum != fnum) {
1505                                 unlock_individually = True;
1506                                 break;
1507                         }
1508                 }
1509
1510                 if (unlock_individually) {
1511                         struct lock_struct *locks_copy;
1512                         unsigned int num_locks_copy;
1513
1514                         /* Copy the current lock array. */
1515                         if (br_lck->num_locks) {
1516                                 locks_copy = (struct lock_struct *)TALLOC_MEMDUP(br_lck, locks, br_lck->num_locks * sizeof(struct lock_struct));
1517                                 if (!locks_copy) {
1518                                         smb_panic("brl_close_fnum: talloc failed");
1519                                 }
1520                         } else {        
1521                                 locks_copy = NULL;
1522                         }
1523
1524                         num_locks_copy = br_lck->num_locks;
1525
1526                         for (i=0; i < num_locks_copy; i++) {
1527                                 struct lock_struct *lock = &locks_copy[i];
1528
1529                                 if (lock->context.tid == tid && procid_equal(&lock->context.pid, &pid) &&
1530                                                 (lock->fnum == fnum)) {
1531                                         brl_unlock(msg_ctx,
1532                                                 br_lck,
1533                                                 lock->context.smbpid,
1534                                                 pid,
1535                                                 lock->start,
1536                                                 lock->size,
1537                                                 lock->lock_flav);
1538                                 }
1539                         }
1540                         return;
1541                 }
1542         }
1543
1544         /* We can bulk delete - any POSIX locks will be removed when the fd closes. */
1545
1546         /* Remove any existing locks for this fnum (or any fnum if they're POSIX). */
1547
1548         for (i=0; i < br_lck->num_locks; i++) {
1549                 struct lock_struct *lock = &locks[i];
1550                 bool del_this_lock = False;
1551
1552                 if (lock->context.tid == tid && procid_equal(&lock->context.pid, &pid)) {
1553                         if ((lock->lock_flav == WINDOWS_LOCK) && (lock->fnum == fnum)) {
1554                                 del_this_lock = True;
1555                                 num_deleted_windows_locks++;
1556                                 contend_level2_oplocks_end(br_lck->fsp,
1557                                     LEVEL2_CONTEND_WINDOWS_BRL);
1558                         } else if (lock->lock_flav == POSIX_LOCK) {
1559                                 del_this_lock = True;
1560
1561                                 /* Only end level2 contention once for posix */
1562                                 if (!posix_level2_contention_ended) {
1563                                         posix_level2_contention_ended = true;
1564                                         contend_level2_oplocks_end(br_lck->fsp,
1565                                             LEVEL2_CONTEND_POSIX_BRL);
1566                                 }
1567                         }
1568                 }
1569
1570                 if (del_this_lock) {
1571                         /* Send unlock messages to any pending waiters that overlap. */
1572                         for (j=0; j < br_lck->num_locks; j++) {
1573                                 struct lock_struct *pend_lock = &locks[j];
1574
1575                                 /* Ignore our own or non-pending locks. */
1576                                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1577                                         continue;
1578                                 }
1579
1580                                 /* Optimisation - don't send to this fnum as we're
1581                                    closing it. */
1582                                 if (pend_lock->context.tid == tid &&
1583                                     procid_equal(&pend_lock->context.pid, &pid) &&
1584                                     pend_lock->fnum == fnum) {
1585                                         continue;
1586                                 }
1587
1588                                 /* We could send specific lock info here... */
1589                                 if (brl_pending_overlap(lock, pend_lock)) {
1590                                         messaging_send(msg_ctx, pend_lock->context.pid,
1591                                                        MSG_SMB_UNLOCK, &data_blob_null);
1592                                 }
1593                         }
1594
1595                         /* found it - delete it */
1596                         if (br_lck->num_locks > 1 && i < br_lck->num_locks - 1) {
1597                                 memmove(&locks[i], &locks[i+1], 
1598                                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1599                         }
1600                         br_lck->num_locks--;
1601                         br_lck->modified = True;
1602                         i--;
1603                         dcount++;
1604                 }
1605         }
1606
1607         if(lp_posix_locking(fsp->conn->params) && num_deleted_windows_locks) {
1608                 /* Reduce the Windows lock POSIX reference count on this dev/ino pair. */
1609                 reduce_windows_lock_ref_count(fsp, num_deleted_windows_locks);
1610         }
1611 }
1612
1613 /****************************************************************************
1614  Ensure this set of lock entries is valid.
1615 ****************************************************************************/
1616 static bool validate_lock_entries(unsigned int *pnum_entries, struct lock_struct **pplocks)
1617 {
1618         unsigned int i;
1619         unsigned int num_valid_entries = 0;
1620         struct lock_struct *locks = *pplocks;
1621
1622         for (i = 0; i < *pnum_entries; i++) {
1623                 struct lock_struct *lock_data = &locks[i];
1624                 if (!serverid_exists(&lock_data->context.pid)) {
1625                         /* This process no longer exists - mark this
1626                            entry as invalid by zeroing it. */
1627                         ZERO_STRUCTP(lock_data);
1628                 } else {
1629                         num_valid_entries++;
1630                 }
1631         }
1632
1633         if (num_valid_entries != *pnum_entries) {
1634                 struct lock_struct *new_lock_data = NULL;
1635
1636                 if (num_valid_entries) {
1637                         new_lock_data = SMB_MALLOC_ARRAY(struct lock_struct, num_valid_entries);
1638                         if (!new_lock_data) {
1639                                 DEBUG(3, ("malloc fail\n"));
1640                                 return False;
1641                         }
1642
1643                         num_valid_entries = 0;
1644                         for (i = 0; i < *pnum_entries; i++) {
1645                                 struct lock_struct *lock_data = &locks[i];
1646                                 if (lock_data->context.smbpid &&
1647                                                 lock_data->context.tid) {
1648                                         /* Valid (nonzero) entry - copy it. */
1649                                         memcpy(&new_lock_data[num_valid_entries],
1650                                                 lock_data, sizeof(struct lock_struct));
1651                                         num_valid_entries++;
1652                                 }
1653                         }
1654                 }
1655
1656                 SAFE_FREE(*pplocks);
1657                 *pplocks = new_lock_data;
1658                 *pnum_entries = num_valid_entries;
1659         }
1660
1661         return True;
1662 }
1663
1664 struct brl_forall_cb {
1665         void (*fn)(struct file_id id, struct server_id pid,
1666                    enum brl_type lock_type,
1667                    enum brl_flavour lock_flav,
1668                    br_off start, br_off size,
1669                    void *private_data);
1670         void *private_data;
1671 };
1672
1673 /****************************************************************************
1674  Traverse the whole database with this function, calling traverse_callback
1675  on each lock.
1676 ****************************************************************************/
1677
1678 static int traverse_fn(struct db_record *rec, void *state)
1679 {
1680         struct brl_forall_cb *cb = (struct brl_forall_cb *)state;
1681         struct lock_struct *locks;
1682         struct file_id *key;
1683         unsigned int i;
1684         unsigned int num_locks = 0;
1685         unsigned int orig_num_locks = 0;
1686
1687         /* In a traverse function we must make a copy of
1688            dbuf before modifying it. */
1689
1690         locks = (struct lock_struct *)memdup(rec->value.dptr,
1691                                              rec->value.dsize);
1692         if (!locks) {
1693                 return -1; /* Terminate traversal. */
1694         }
1695
1696         key = (struct file_id *)rec->key.dptr;
1697         orig_num_locks = num_locks = rec->value.dsize/sizeof(*locks);
1698
1699         /* Ensure the lock db is clean of entries from invalid processes. */
1700
1701         if (!validate_lock_entries(&num_locks, &locks)) {
1702                 SAFE_FREE(locks);
1703                 return -1; /* Terminate traversal */
1704         }
1705
1706         if (orig_num_locks != num_locks) {
1707                 if (num_locks) {
1708                         TDB_DATA data;
1709                         data.dptr = (uint8_t *)locks;
1710                         data.dsize = num_locks*sizeof(struct lock_struct);
1711                         rec->store(rec, data, TDB_REPLACE);
1712                 } else {
1713                         rec->delete_rec(rec);
1714                 }
1715         }
1716
1717         if (cb->fn) {
1718                 for ( i=0; i<num_locks; i++) {
1719                         cb->fn(*key,
1720                                 locks[i].context.pid,
1721                                 locks[i].lock_type,
1722                                 locks[i].lock_flav,
1723                                 locks[i].start,
1724                                 locks[i].size,
1725                                 cb->private_data);
1726                 }
1727         }
1728
1729         SAFE_FREE(locks);
1730         return 0;
1731 }
1732
1733 /*******************************************************************
1734  Call the specified function on each lock in the database.
1735 ********************************************************************/
1736
1737 int brl_forall(void (*fn)(struct file_id id, struct server_id pid,
1738                           enum brl_type lock_type,
1739                           enum brl_flavour lock_flav,
1740                           br_off start, br_off size,
1741                           void *private_data),
1742                void *private_data)
1743 {
1744         struct brl_forall_cb cb;
1745
1746         if (!brlock_db) {
1747                 return 0;
1748         }
1749         cb.fn = fn;
1750         cb.private_data = private_data;
1751         return brlock_db->traverse(brlock_db, traverse_fn, &cb);
1752 }
1753
1754 /*******************************************************************
1755  Store a potentially modified set of byte range lock data back into
1756  the database.
1757  Unlock the record.
1758 ********************************************************************/
1759
1760 static int byte_range_lock_destructor(struct byte_range_lock *br_lck)
1761 {
1762         if (br_lck->read_only) {
1763                 SMB_ASSERT(!br_lck->modified);
1764         }
1765
1766         if (!br_lck->modified) {
1767                 goto done;
1768         }
1769
1770         if (br_lck->num_locks == 0) {
1771                 /* No locks - delete this entry. */
1772                 NTSTATUS status = br_lck->record->delete_rec(br_lck->record);
1773                 if (!NT_STATUS_IS_OK(status)) {
1774                         DEBUG(0, ("delete_rec returned %s\n",
1775                                   nt_errstr(status)));
1776                         smb_panic("Could not delete byte range lock entry");
1777                 }
1778         } else {
1779                 TDB_DATA data;
1780                 NTSTATUS status;
1781
1782                 data.dptr = (uint8 *)br_lck->lock_data;
1783                 data.dsize = br_lck->num_locks * sizeof(struct lock_struct);
1784
1785                 status = br_lck->record->store(br_lck->record, data,
1786                                                TDB_REPLACE);
1787                 if (!NT_STATUS_IS_OK(status)) {
1788                         DEBUG(0, ("store returned %s\n", nt_errstr(status)));
1789                         smb_panic("Could not store byte range mode entry");
1790                 }
1791         }
1792
1793  done:
1794
1795         SAFE_FREE(br_lck->lock_data);
1796         TALLOC_FREE(br_lck->record);
1797         return 0;
1798 }
1799
1800 /*******************************************************************
1801  Fetch a set of byte range lock data from the database.
1802  Leave the record locked.
1803  TALLOC_FREE(brl) will release the lock in the destructor.
1804 ********************************************************************/
1805
1806 static struct byte_range_lock *brl_get_locks_internal(TALLOC_CTX *mem_ctx,
1807                                         files_struct *fsp, bool read_only)
1808 {
1809         TDB_DATA key, data;
1810         struct byte_range_lock *br_lck = TALLOC_P(mem_ctx, struct byte_range_lock);
1811
1812         if (br_lck == NULL) {
1813                 return NULL;
1814         }
1815
1816         br_lck->fsp = fsp;
1817         br_lck->num_locks = 0;
1818         br_lck->modified = False;
1819         br_lck->key = fsp->file_id;
1820
1821         key.dptr = (uint8 *)&br_lck->key;
1822         key.dsize = sizeof(struct file_id);
1823
1824         if (!fsp->lockdb_clean) {
1825                 /* We must be read/write to clean
1826                    the dead entries. */
1827                 read_only = False;
1828         }
1829
1830         if (read_only) {
1831                 if (brlock_db->fetch(brlock_db, br_lck, key, &data) == -1) {
1832                         DEBUG(3, ("Could not fetch byte range lock record\n"));
1833                         TALLOC_FREE(br_lck);
1834                         return NULL;
1835                 }
1836                 br_lck->record = NULL;
1837         }
1838         else {
1839                 br_lck->record = brlock_db->fetch_locked(brlock_db, br_lck, key);
1840
1841                 if (br_lck->record == NULL) {
1842                         DEBUG(3, ("Could not lock byte range lock entry\n"));
1843                         TALLOC_FREE(br_lck);
1844                         return NULL;
1845                 }
1846
1847                 data = br_lck->record->value;
1848         }
1849
1850         br_lck->read_only = read_only;
1851         br_lck->lock_data = NULL;
1852
1853         talloc_set_destructor(br_lck, byte_range_lock_destructor);
1854
1855         br_lck->num_locks = data.dsize / sizeof(struct lock_struct);
1856
1857         if (br_lck->num_locks != 0) {
1858                 br_lck->lock_data = SMB_MALLOC_ARRAY(struct lock_struct,
1859                                                      br_lck->num_locks);
1860                 if (br_lck->lock_data == NULL) {
1861                         DEBUG(0, ("malloc failed\n"));
1862                         TALLOC_FREE(br_lck);
1863                         return NULL;
1864                 }
1865
1866                 memcpy(br_lck->lock_data, data.dptr, data.dsize);
1867         }
1868         
1869         if (!fsp->lockdb_clean) {
1870                 int orig_num_locks = br_lck->num_locks;
1871
1872                 /* This is the first time we've accessed this. */
1873                 /* Go through and ensure all entries exist - remove any that don't. */
1874                 /* Makes the lockdb self cleaning at low cost. */
1875
1876                 if (!validate_lock_entries(&br_lck->num_locks,
1877                                            &br_lck->lock_data)) {
1878                         SAFE_FREE(br_lck->lock_data);
1879                         TALLOC_FREE(br_lck);
1880                         return NULL;
1881                 }
1882
1883                 /* Ensure invalid locks are cleaned up in the destructor. */
1884                 if (orig_num_locks != br_lck->num_locks) {
1885                         br_lck->modified = True;
1886                 }
1887
1888                 /* Mark the lockdb as "clean" as seen from this open file. */
1889                 fsp->lockdb_clean = True;
1890         }
1891
1892         if (DEBUGLEVEL >= 10) {
1893                 unsigned int i;
1894                 struct lock_struct *locks = br_lck->lock_data;
1895                 DEBUG(10,("brl_get_locks_internal: %u current locks on file_id %s\n",
1896                         br_lck->num_locks,
1897                           file_id_string_tos(&fsp->file_id)));
1898                 for( i = 0; i < br_lck->num_locks; i++) {
1899                         print_lock_struct(i, &locks[i]);
1900                 }
1901         }
1902         return br_lck;
1903 }
1904
1905 struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx,
1906                                         files_struct *fsp)
1907 {
1908         return brl_get_locks_internal(mem_ctx, fsp, False);
1909 }
1910
1911 struct byte_range_lock *brl_get_locks_readonly(files_struct *fsp)
1912 {
1913         struct byte_range_lock *br_lock;
1914
1915         if (lp_clustering()) {
1916                 return brl_get_locks_internal(talloc_tos(), fsp, true);
1917         }
1918
1919         if ((fsp->brlock_rec != NULL)
1920             && (brlock_db->get_seqnum(brlock_db) == fsp->brlock_seqnum)) {
1921                 return fsp->brlock_rec;
1922         }
1923
1924         TALLOC_FREE(fsp->brlock_rec);
1925
1926         br_lock = brl_get_locks_internal(talloc_tos(), fsp, false);
1927         if (br_lock == NULL) {
1928                 return NULL;
1929         }
1930         fsp->brlock_seqnum = brlock_db->get_seqnum(brlock_db);
1931
1932         fsp->brlock_rec = talloc_zero(fsp, struct byte_range_lock);
1933         if (fsp->brlock_rec == NULL) {
1934                 goto fail;
1935         }
1936         fsp->brlock_rec->fsp = fsp;
1937         fsp->brlock_rec->num_locks = br_lock->num_locks;
1938         fsp->brlock_rec->read_only = true;
1939         fsp->brlock_rec->key = br_lock->key;
1940
1941         fsp->brlock_rec->lock_data = (struct lock_struct *)
1942                 talloc_memdup(fsp->brlock_rec, br_lock->lock_data,
1943                               sizeof(struct lock_struct) * br_lock->num_locks);
1944         if (fsp->brlock_rec->lock_data == NULL) {
1945                 goto fail;
1946         }
1947
1948         TALLOC_FREE(br_lock);
1949         return fsp->brlock_rec;
1950 fail:
1951         TALLOC_FREE(br_lock);
1952         TALLOC_FREE(fsp->brlock_rec);
1953         return NULL;
1954 }
1955
1956 struct brl_revalidate_state {
1957         ssize_t array_size;
1958         uint32 num_pids;
1959         struct server_id *pids;
1960 };
1961
1962 /*
1963  * Collect PIDs of all processes with pending entries
1964  */
1965
1966 static void brl_revalidate_collect(struct file_id id, struct server_id pid,
1967                                    enum brl_type lock_type,
1968                                    enum brl_flavour lock_flav,
1969                                    br_off start, br_off size,
1970                                    void *private_data)
1971 {
1972         struct brl_revalidate_state *state =
1973                 (struct brl_revalidate_state *)private_data;
1974
1975         if (!IS_PENDING_LOCK(lock_type)) {
1976                 return;
1977         }
1978
1979         add_to_large_array(state, sizeof(pid), (void *)&pid,
1980                            &state->pids, &state->num_pids,
1981                            &state->array_size);
1982 }
1983
1984 /*
1985  * qsort callback to sort the processes
1986  */
1987
1988 static int compare_procids(const void *p1, const void *p2)
1989 {
1990         const struct server_id *i1 = (struct server_id *)p1;
1991         const struct server_id *i2 = (struct server_id *)p2;
1992
1993         if (i1->pid < i2->pid) return -1;
1994         if (i2->pid > i2->pid) return 1;
1995         return 0;
1996 }
1997
1998 /*
1999  * Send a MSG_SMB_UNLOCK message to all processes with pending byte range
2000  * locks so that they retry. Mainly used in the cluster code after a node has
2001  * died.
2002  *
2003  * Done in two steps to avoid double-sends: First we collect all entries in an
2004  * array, then qsort that array and only send to non-dupes.
2005  */
2006
2007 static void brl_revalidate(struct messaging_context *msg_ctx,
2008                            void *private_data,
2009                            uint32_t msg_type,
2010                            struct server_id server_id,
2011                            DATA_BLOB *data)
2012 {
2013         struct brl_revalidate_state *state;
2014         uint32 i;
2015         struct server_id last_pid;
2016
2017         if (!(state = TALLOC_ZERO_P(NULL, struct brl_revalidate_state))) {
2018                 DEBUG(0, ("talloc failed\n"));
2019                 return;
2020         }
2021
2022         brl_forall(brl_revalidate_collect, state);
2023
2024         if (state->array_size == -1) {
2025                 DEBUG(0, ("talloc failed\n"));
2026                 goto done;
2027         }
2028
2029         if (state->num_pids == 0) {
2030                 goto done;
2031         }
2032
2033         TYPESAFE_QSORT(state->pids, state->num_pids, compare_procids);
2034
2035         ZERO_STRUCT(last_pid);
2036
2037         for (i=0; i<state->num_pids; i++) {
2038                 if (procid_equal(&last_pid, &state->pids[i])) {
2039                         /*
2040                          * We've seen that one already
2041                          */
2042                         continue;
2043                 }
2044
2045                 messaging_send(msg_ctx, state->pids[i], MSG_SMB_UNLOCK,
2046                                &data_blob_null);
2047                 last_pid = state->pids[i];
2048         }
2049
2050  done:
2051         TALLOC_FREE(state);
2052         return;
2053 }
2054
2055 void brl_register_msgs(struct messaging_context *msg_ctx)
2056 {
2057         messaging_register(msg_ctx, NULL, MSG_SMB_BRL_VALIDATE,
2058                            brl_revalidate);
2059 }