Merge 2610c05b5b95cc7036b3d6dfb894c6cfbdb68483 as Samba-4.0alpha16
[amitay/samba.git] / source3 / locking / brlock.c
1 /* 
2    Unix SMB/CIFS implementation.
3    byte range locking code
4    Updated to handle range splits/merges.
5
6    Copyright (C) Andrew Tridgell 1992-2000
7    Copyright (C) Jeremy Allison 1992-2000
8
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 3 of the License, or
12    (at your option) any later version.
13
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18
19    You should have received a copy of the GNU General Public License
20    along with this program.  If not, see <http://www.gnu.org/licenses/>.
21 */
22
23 /* This module implements a tdb based byte range locking service,
24    replacing the fcntl() based byte range locking previously
25    used. This allows us to provide the same semantics as NT */
26
27 #include "includes.h"
28 #include "system/filesys.h"
29 #include "locking/proto.h"
30 #include "smbd/globals.h"
31 #include "dbwrap.h"
32 #include "serverid.h"
33 #include "messages.h"
34
35 #undef DBGC_CLASS
36 #define DBGC_CLASS DBGC_LOCKING
37
38 #define ZERO_ZERO 0
39
40 /* The open brlock.tdb database. */
41
42 static struct db_context *brlock_db;
43
44 /****************************************************************************
45  Debug info at level 10 for lock struct.
46 ****************************************************************************/
47
48 static void print_lock_struct(unsigned int i, struct lock_struct *pls)
49 {
50         DEBUG(10,("[%u]: smblctx = %llu, tid = %u, pid = %s, ",
51                         i,
52                         (unsigned long long)pls->context.smblctx,
53                         (unsigned int)pls->context.tid,
54                         server_id_str(talloc_tos(), &pls->context.pid) ));
55
56         DEBUG(10,("start = %.0f, size = %.0f, fnum = %d, %s %s\n",
57                 (double)pls->start,
58                 (double)pls->size,
59                 pls->fnum,
60                 lock_type_name(pls->lock_type),
61                 lock_flav_name(pls->lock_flav) ));
62 }
63
64 /****************************************************************************
65  See if two locking contexts are equal.
66 ****************************************************************************/
67
68 bool brl_same_context(const struct lock_context *ctx1, 
69                              const struct lock_context *ctx2)
70 {
71         return (procid_equal(&ctx1->pid, &ctx2->pid) &&
72                 (ctx1->smblctx == ctx2->smblctx) &&
73                 (ctx1->tid == ctx2->tid));
74 }
75
76 /****************************************************************************
77  See if lck1 and lck2 overlap.
78 ****************************************************************************/
79
80 static bool brl_overlap(const struct lock_struct *lck1,
81                         const struct lock_struct *lck2)
82 {
83         /* XXX Remove for Win7 compatibility. */
84         /* this extra check is not redundent - it copes with locks
85            that go beyond the end of 64 bit file space */
86         if (lck1->size != 0 &&
87             lck1->start == lck2->start &&
88             lck1->size == lck2->size) {
89                 return True;
90         }
91
92         if (lck1->start >= (lck2->start+lck2->size) ||
93             lck2->start >= (lck1->start+lck1->size)) {
94                 return False;
95         }
96         return True;
97 }
98
99 /****************************************************************************
100  See if lock2 can be added when lock1 is in place.
101 ****************************************************************************/
102
103 static bool brl_conflict(const struct lock_struct *lck1, 
104                          const struct lock_struct *lck2)
105 {
106         /* Ignore PENDING locks. */
107         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
108                 return False;
109
110         /* Read locks never conflict. */
111         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
112                 return False;
113         }
114
115         /* A READ lock can stack on top of a WRITE lock if they have the same
116          * context & fnum. */
117         if (lck1->lock_type == WRITE_LOCK && lck2->lock_type == READ_LOCK &&
118             brl_same_context(&lck1->context, &lck2->context) &&
119             lck1->fnum == lck2->fnum) {
120                 return False;
121         }
122
123         return brl_overlap(lck1, lck2);
124
125
126 /****************************************************************************
127  See if lock2 can be added when lock1 is in place - when both locks are POSIX
128  flavour. POSIX locks ignore fnum - they only care about dev/ino which we
129  know already match.
130 ****************************************************************************/
131
132 static bool brl_conflict_posix(const struct lock_struct *lck1, 
133                                 const struct lock_struct *lck2)
134 {
135 #if defined(DEVELOPER)
136         SMB_ASSERT(lck1->lock_flav == POSIX_LOCK);
137         SMB_ASSERT(lck2->lock_flav == POSIX_LOCK);
138 #endif
139
140         /* Ignore PENDING locks. */
141         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
142                 return False;
143
144         /* Read locks never conflict. */
145         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
146                 return False;
147         }
148
149         /* Locks on the same context con't conflict. Ignore fnum. */
150         if (brl_same_context(&lck1->context, &lck2->context)) {
151                 return False;
152         }
153
154         /* One is read, the other write, or the context is different,
155            do they overlap ? */
156         return brl_overlap(lck1, lck2);
157
158
159 #if ZERO_ZERO
160 static bool brl_conflict1(const struct lock_struct *lck1, 
161                          const struct lock_struct *lck2)
162 {
163         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
164                 return False;
165
166         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
167                 return False;
168         }
169
170         if (brl_same_context(&lck1->context, &lck2->context) &&
171             lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
172                 return False;
173         }
174
175         if (lck2->start == 0 && lck2->size == 0 && lck1->size != 0) {
176                 return True;
177         }
178
179         if (lck1->start >= (lck2->start + lck2->size) ||
180             lck2->start >= (lck1->start + lck1->size)) {
181                 return False;
182         }
183
184         return True;
185
186 #endif
187
188 /****************************************************************************
189  Check to see if this lock conflicts, but ignore our own locks on the
190  same fnum only. This is the read/write lock check code path.
191  This is never used in the POSIX lock case.
192 ****************************************************************************/
193
194 static bool brl_conflict_other(const struct lock_struct *lck1, const struct lock_struct *lck2)
195 {
196         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
197                 return False;
198
199         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) 
200                 return False;
201
202         /* POSIX flavour locks never conflict here - this is only called
203            in the read/write path. */
204
205         if (lck1->lock_flav == POSIX_LOCK && lck2->lock_flav == POSIX_LOCK)
206                 return False;
207
208         /*
209          * Incoming WRITE locks conflict with existing READ locks even
210          * if the context is the same. JRA. See LOCKTEST7 in smbtorture.
211          */
212
213         if (!(lck2->lock_type == WRITE_LOCK && lck1->lock_type == READ_LOCK)) {
214                 if (brl_same_context(&lck1->context, &lck2->context) &&
215                                         lck1->fnum == lck2->fnum)
216                         return False;
217         }
218
219         return brl_overlap(lck1, lck2);
220
221
222 /****************************************************************************
223  Check if an unlock overlaps a pending lock.
224 ****************************************************************************/
225
226 static bool brl_pending_overlap(const struct lock_struct *lock, const struct lock_struct *pend_lock)
227 {
228         if ((lock->start <= pend_lock->start) && (lock->start + lock->size > pend_lock->start))
229                 return True;
230         if ((lock->start >= pend_lock->start) && (lock->start <= pend_lock->start + pend_lock->size))
231                 return True;
232         return False;
233 }
234
235 /****************************************************************************
236  Amazingly enough, w2k3 "remembers" whether the last lock failure on a fnum
237  is the same as this one and changes its error code. I wonder if any
238  app depends on this ?
239 ****************************************************************************/
240
241 NTSTATUS brl_lock_failed(files_struct *fsp, const struct lock_struct *lock, bool blocking_lock)
242 {
243         if (lock->start >= 0xEF000000 && (lock->start >> 63) == 0) {
244                 /* amazing the little things you learn with a test
245                    suite. Locks beyond this offset (as a 64 bit
246                    number!) always generate the conflict error code,
247                    unless the top bit is set */
248                 if (!blocking_lock) {
249                         fsp->last_lock_failure = *lock;
250                 }
251                 return NT_STATUS_FILE_LOCK_CONFLICT;
252         }
253
254         if (procid_equal(&lock->context.pid, &fsp->last_lock_failure.context.pid) &&
255                         lock->context.tid == fsp->last_lock_failure.context.tid &&
256                         lock->fnum == fsp->last_lock_failure.fnum &&
257                         lock->start == fsp->last_lock_failure.start) {
258                 return NT_STATUS_FILE_LOCK_CONFLICT;
259         }
260
261         if (!blocking_lock) {
262                 fsp->last_lock_failure = *lock;
263         }
264         return NT_STATUS_LOCK_NOT_GRANTED;
265 }
266
267 /****************************************************************************
268  Open up the brlock.tdb database.
269 ****************************************************************************/
270
271 void brl_init(bool read_only)
272 {
273         int tdb_flags;
274
275         if (brlock_db) {
276                 return;
277         }
278
279         tdb_flags = TDB_DEFAULT|TDB_VOLATILE|TDB_CLEAR_IF_FIRST|TDB_INCOMPATIBLE_HASH;
280
281         if (!lp_clustering()) {
282                 /*
283                  * We can't use the SEQNUM trick to cache brlock
284                  * entries in the clustering case because ctdb seqnum
285                  * propagation has a delay.
286                  */
287                 tdb_flags |= TDB_SEQNUM;
288         }
289
290         brlock_db = db_open(NULL, lock_path("brlock.tdb"),
291                             lp_open_files_db_hash_size(), tdb_flags,
292                             read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644 );
293         if (!brlock_db) {
294                 DEBUG(0,("Failed to open byte range locking database %s\n",
295                         lock_path("brlock.tdb")));
296                 return;
297         }
298 }
299
300 /****************************************************************************
301  Close down the brlock.tdb database.
302 ****************************************************************************/
303
304 void brl_shutdown(void)
305 {
306         TALLOC_FREE(brlock_db);
307 }
308
309 #if ZERO_ZERO
310 /****************************************************************************
311  Compare two locks for sorting.
312 ****************************************************************************/
313
314 static int lock_compare(const struct lock_struct *lck1, 
315                          const struct lock_struct *lck2)
316 {
317         if (lck1->start != lck2->start) {
318                 return (lck1->start - lck2->start);
319         }
320         if (lck2->size != lck1->size) {
321                 return ((int)lck1->size - (int)lck2->size);
322         }
323         return 0;
324 }
325 #endif
326
327 /****************************************************************************
328  Lock a range of bytes - Windows lock semantics.
329 ****************************************************************************/
330
331 NTSTATUS brl_lock_windows_default(struct byte_range_lock *br_lck,
332     struct lock_struct *plock, bool blocking_lock)
333 {
334         unsigned int i;
335         files_struct *fsp = br_lck->fsp;
336         struct lock_struct *locks = br_lck->lock_data;
337         NTSTATUS status;
338
339         SMB_ASSERT(plock->lock_type != UNLOCK_LOCK);
340
341         if ((plock->start + plock->size - 1 < plock->start) &&
342                         plock->size != 0) {
343                 return NT_STATUS_INVALID_LOCK_RANGE;
344         }
345
346         for (i=0; i < br_lck->num_locks; i++) {
347                 /* Do any Windows or POSIX locks conflict ? */
348                 if (brl_conflict(&locks[i], plock)) {
349                         /* Remember who blocked us. */
350                         plock->context.smblctx = locks[i].context.smblctx;
351                         return brl_lock_failed(fsp,plock,blocking_lock);
352                 }
353 #if ZERO_ZERO
354                 if (plock->start == 0 && plock->size == 0 && 
355                                 locks[i].size == 0) {
356                         break;
357                 }
358 #endif
359         }
360
361         if (!IS_PENDING_LOCK(plock->lock_type)) {
362                 contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
363         }
364
365         /* We can get the Windows lock, now see if it needs to
366            be mapped into a lower level POSIX one, and if so can
367            we get it ? */
368
369         if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(fsp->conn->params)) {
370                 int errno_ret;
371                 if (!set_posix_lock_windows_flavour(fsp,
372                                 plock->start,
373                                 plock->size,
374                                 plock->lock_type,
375                                 &plock->context,
376                                 locks,
377                                 br_lck->num_locks,
378                                 &errno_ret)) {
379
380                         /* We don't know who blocked us. */
381                         plock->context.smblctx = 0xFFFFFFFFFFFFFFFFLL;
382
383                         if (errno_ret == EACCES || errno_ret == EAGAIN) {
384                                 status = NT_STATUS_FILE_LOCK_CONFLICT;
385                                 goto fail;
386                         } else {
387                                 status = map_nt_error_from_unix(errno);
388                                 goto fail;
389                         }
390                 }
391         }
392
393         /* no conflicts - add it to the list of locks */
394         locks = (struct lock_struct *)SMB_REALLOC(locks, (br_lck->num_locks + 1) * sizeof(*locks));
395         if (!locks) {
396                 status = NT_STATUS_NO_MEMORY;
397                 goto fail;
398         }
399
400         memcpy(&locks[br_lck->num_locks], plock, sizeof(struct lock_struct));
401         br_lck->num_locks += 1;
402         br_lck->lock_data = locks;
403         br_lck->modified = True;
404
405         return NT_STATUS_OK;
406  fail:
407         if (!IS_PENDING_LOCK(plock->lock_type)) {
408                 contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
409         }
410         return status;
411 }
412
413 /****************************************************************************
414  Cope with POSIX range splits and merges.
415 ****************************************************************************/
416
417 static unsigned int brlock_posix_split_merge(struct lock_struct *lck_arr,       /* Output array. */
418                                                 struct lock_struct *ex,         /* existing lock. */
419                                                 struct lock_struct *plock)      /* proposed lock. */
420 {
421         bool lock_types_differ = (ex->lock_type != plock->lock_type);
422
423         /* We can't merge non-conflicting locks on different context - ignore fnum. */
424
425         if (!brl_same_context(&ex->context, &plock->context)) {
426                 /* Just copy. */
427                 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
428                 return 1;
429         }
430
431         /* We now know we have the same context. */
432
433         /* Did we overlap ? */
434
435 /*********************************************
436                                         +---------+
437                                         | ex      |
438                                         +---------+
439                          +-------+
440                          | plock |
441                          +-------+
442 OR....
443         +---------+
444         |  ex     |
445         +---------+
446 **********************************************/
447
448         if ( (ex->start > (plock->start + plock->size)) ||
449                 (plock->start > (ex->start + ex->size))) {
450
451                 /* No overlap with this lock - copy existing. */
452
453                 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
454                 return 1;
455         }
456
457 /*********************************************
458         +---------------------------+
459         |          ex               |
460         +---------------------------+
461         +---------------------------+
462         |       plock               | -> replace with plock.
463         +---------------------------+
464 OR
465              +---------------+
466              |       ex      |
467              +---------------+
468         +---------------------------+
469         |       plock               | -> replace with plock.
470         +---------------------------+
471
472 **********************************************/
473
474         if ( (ex->start >= plock->start) &&
475                 (ex->start + ex->size <= plock->start + plock->size) ) {
476
477                 /* Replace - discard existing lock. */
478
479                 return 0;
480         }
481
482 /*********************************************
483 Adjacent after.
484                         +-------+
485                         |  ex   |
486                         +-------+
487         +---------------+
488         |   plock       |
489         +---------------+
490
491 BECOMES....
492         +---------------+-------+
493         |   plock       | ex    | - different lock types.
494         +---------------+-------+
495 OR.... (merge)
496         +-----------------------+
497         |   plock               | - same lock type.
498         +-----------------------+
499 **********************************************/
500
501         if (plock->start + plock->size == ex->start) {
502
503                 /* If the lock types are the same, we merge, if different, we
504                    add the remainder of the old lock. */
505
506                 if (lock_types_differ) {
507                         /* Add existing. */
508                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
509                         return 1;
510                 } else {
511                         /* Merge - adjust incoming lock as we may have more
512                          * merging to come. */
513                         plock->size += ex->size;
514                         return 0;
515                 }
516         }
517
518 /*********************************************
519 Adjacent before.
520         +-------+
521         |  ex   |
522         +-------+
523                 +---------------+
524                 |   plock       |
525                 +---------------+
526 BECOMES....
527         +-------+---------------+
528         | ex    |   plock       | - different lock types
529         +-------+---------------+
530
531 OR.... (merge)
532         +-----------------------+
533         |      plock            | - same lock type.
534         +-----------------------+
535
536 **********************************************/
537
538         if (ex->start + ex->size == plock->start) {
539
540                 /* If the lock types are the same, we merge, if different, we
541                    add the existing lock. */
542
543                 if (lock_types_differ) {
544                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
545                         return 1;
546                 } else {
547                         /* Merge - adjust incoming lock as we may have more
548                          * merging to come. */
549                         plock->start = ex->start;
550                         plock->size += ex->size;
551                         return 0;
552                 }
553         }
554
555 /*********************************************
556 Overlap after.
557         +-----------------------+
558         |          ex           |
559         +-----------------------+
560         +---------------+
561         |   plock       |
562         +---------------+
563 OR
564                +----------------+
565                |       ex       |
566                +----------------+
567         +---------------+
568         |   plock       |
569         +---------------+
570
571 BECOMES....
572         +---------------+-------+
573         |   plock       | ex    | - different lock types.
574         +---------------+-------+
575 OR.... (merge)
576         +-----------------------+
577         |   plock               | - same lock type.
578         +-----------------------+
579 **********************************************/
580
581         if ( (ex->start >= plock->start) &&
582                 (ex->start <= plock->start + plock->size) &&
583                 (ex->start + ex->size > plock->start + plock->size) ) {
584
585                 /* If the lock types are the same, we merge, if different, we
586                    add the remainder of the old lock. */
587
588                 if (lock_types_differ) {
589                         /* Add remaining existing. */
590                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
591                         /* Adjust existing start and size. */
592                         lck_arr[0].start = plock->start + plock->size;
593                         lck_arr[0].size = (ex->start + ex->size) - (plock->start + plock->size);
594                         return 1;
595                 } else {
596                         /* Merge - adjust incoming lock as we may have more
597                          * merging to come. */
598                         plock->size += (ex->start + ex->size) - (plock->start + plock->size);
599                         return 0;
600                 }
601         }
602
603 /*********************************************
604 Overlap before.
605         +-----------------------+
606         |  ex                   |
607         +-----------------------+
608                 +---------------+
609                 |   plock       |
610                 +---------------+
611 OR
612         +-------------+
613         |  ex         |
614         +-------------+
615                 +---------------+
616                 |   plock       |
617                 +---------------+
618
619 BECOMES....
620         +-------+---------------+
621         | ex    |   plock       | - different lock types
622         +-------+---------------+
623
624 OR.... (merge)
625         +-----------------------+
626         |      plock            | - same lock type.
627         +-----------------------+
628
629 **********************************************/
630
631         if ( (ex->start < plock->start) &&
632                         (ex->start + ex->size >= plock->start) &&
633                         (ex->start + ex->size <= plock->start + plock->size) ) {
634
635                 /* If the lock types are the same, we merge, if different, we
636                    add the truncated old lock. */
637
638                 if (lock_types_differ) {
639                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
640                         /* Adjust existing size. */
641                         lck_arr[0].size = plock->start - ex->start;
642                         return 1;
643                 } else {
644                         /* Merge - adjust incoming lock as we may have more
645                          * merging to come. MUST ADJUST plock SIZE FIRST ! */
646                         plock->size += (plock->start - ex->start);
647                         plock->start = ex->start;
648                         return 0;
649                 }
650         }
651
652 /*********************************************
653 Complete overlap.
654         +---------------------------+
655         |        ex                 |
656         +---------------------------+
657                 +---------+
658                 |  plock  |
659                 +---------+
660 BECOMES.....
661         +-------+---------+---------+
662         | ex    |  plock  | ex      | - different lock types.
663         +-------+---------+---------+
664 OR
665         +---------------------------+
666         |        plock              | - same lock type.
667         +---------------------------+
668 **********************************************/
669
670         if ( (ex->start < plock->start) && (ex->start + ex->size > plock->start + plock->size) ) {
671
672                 if (lock_types_differ) {
673
674                         /* We have to split ex into two locks here. */
675
676                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
677                         memcpy(&lck_arr[1], ex, sizeof(struct lock_struct));
678
679                         /* Adjust first existing size. */
680                         lck_arr[0].size = plock->start - ex->start;
681
682                         /* Adjust second existing start and size. */
683                         lck_arr[1].start = plock->start + plock->size;
684                         lck_arr[1].size = (ex->start + ex->size) - (plock->start + plock->size);
685                         return 2;
686                 } else {
687                         /* Just eat the existing locks, merge them into plock. */
688                         plock->start = ex->start;
689                         plock->size = ex->size;
690                         return 0;
691                 }
692         }
693
694         /* Never get here. */
695         smb_panic("brlock_posix_split_merge");
696         /* Notreached. */
697
698         /* Keep some compilers happy. */
699         return 0;
700 }
701
702 /****************************************************************************
703  Lock a range of bytes - POSIX lock semantics.
704  We must cope with range splits and merges.
705 ****************************************************************************/
706
707 static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
708                                struct byte_range_lock *br_lck,
709                                struct lock_struct *plock)
710 {
711         unsigned int i, count, posix_count;
712         struct lock_struct *locks = br_lck->lock_data;
713         struct lock_struct *tp;
714         bool signal_pending_read = False;
715         bool break_oplocks = false;
716         NTSTATUS status;
717
718         /* No zero-zero locks for POSIX. */
719         if (plock->start == 0 && plock->size == 0) {
720                 return NT_STATUS_INVALID_PARAMETER;
721         }
722
723         /* Don't allow 64-bit lock wrap. */
724         if (plock->start + plock->size - 1 < plock->start) {
725                 return NT_STATUS_INVALID_PARAMETER;
726         }
727
728         /* The worst case scenario here is we have to split an
729            existing POSIX lock range into two, and add our lock,
730            so we need at most 2 more entries. */
731
732         tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 2));
733         if (!tp) {
734                 return NT_STATUS_NO_MEMORY;
735         }
736
737         count = posix_count = 0;
738
739         for (i=0; i < br_lck->num_locks; i++) {
740                 struct lock_struct *curr_lock = &locks[i];
741
742                 /* If we have a pending read lock, a lock downgrade should
743                    trigger a lock re-evaluation. */
744                 if (curr_lock->lock_type == PENDING_READ_LOCK &&
745                                 brl_pending_overlap(plock, curr_lock)) {
746                         signal_pending_read = True;
747                 }
748
749                 if (curr_lock->lock_flav == WINDOWS_LOCK) {
750                         /* Do any Windows flavour locks conflict ? */
751                         if (brl_conflict(curr_lock, plock)) {
752                                 /* No games with error messages. */
753                                 SAFE_FREE(tp);
754                                 /* Remember who blocked us. */
755                                 plock->context.smblctx = curr_lock->context.smblctx;
756                                 return NT_STATUS_FILE_LOCK_CONFLICT;
757                         }
758                         /* Just copy the Windows lock into the new array. */
759                         memcpy(&tp[count], curr_lock, sizeof(struct lock_struct));
760                         count++;
761                 } else {
762                         unsigned int tmp_count = 0;
763
764                         /* POSIX conflict semantics are different. */
765                         if (brl_conflict_posix(curr_lock, plock)) {
766                                 /* Can't block ourselves with POSIX locks. */
767                                 /* No games with error messages. */
768                                 SAFE_FREE(tp);
769                                 /* Remember who blocked us. */
770                                 plock->context.smblctx = curr_lock->context.smblctx;
771                                 return NT_STATUS_FILE_LOCK_CONFLICT;
772                         }
773
774                         /* Work out overlaps. */
775                         tmp_count += brlock_posix_split_merge(&tp[count], curr_lock, plock);
776                         posix_count += tmp_count;
777                         count += tmp_count;
778                 }
779         }
780
781         /*
782          * Break oplocks while we hold a brl. Since lock() and unlock() calls
783          * are not symetric with POSIX semantics, we cannot guarantee our
784          * contend_level2_oplocks_begin/end calls will be acquired and
785          * released one-for-one as with Windows semantics. Therefore we only
786          * call contend_level2_oplocks_begin if this is the first POSIX brl on
787          * the file.
788          */
789         break_oplocks = (!IS_PENDING_LOCK(plock->lock_type) &&
790                          posix_count == 0);
791         if (break_oplocks) {
792                 contend_level2_oplocks_begin(br_lck->fsp,
793                                              LEVEL2_CONTEND_POSIX_BRL);
794         }
795
796         /* Try and add the lock in order, sorted by lock start. */
797         for (i=0; i < count; i++) {
798                 struct lock_struct *curr_lock = &tp[i];
799
800                 if (curr_lock->start <= plock->start) {
801                         continue;
802                 }
803         }
804
805         if (i < count) {
806                 memmove(&tp[i+1], &tp[i],
807                         (count - i)*sizeof(struct lock_struct));
808         }
809         memcpy(&tp[i], plock, sizeof(struct lock_struct));
810         count++;
811
812         /* We can get the POSIX lock, now see if it needs to
813            be mapped into a lower level POSIX one, and if so can
814            we get it ? */
815
816         if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(br_lck->fsp->conn->params)) {
817                 int errno_ret;
818
819                 /* The lower layer just needs to attempt to
820                    get the system POSIX lock. We've weeded out
821                    any conflicts above. */
822
823                 if (!set_posix_lock_posix_flavour(br_lck->fsp,
824                                 plock->start,
825                                 plock->size,
826                                 plock->lock_type,
827                                 &errno_ret)) {
828
829                         /* We don't know who blocked us. */
830                         plock->context.smblctx = 0xFFFFFFFFFFFFFFFFLL;
831
832                         if (errno_ret == EACCES || errno_ret == EAGAIN) {
833                                 SAFE_FREE(tp);
834                                 status = NT_STATUS_FILE_LOCK_CONFLICT;
835                                 goto fail;
836                         } else {
837                                 SAFE_FREE(tp);
838                                 status = map_nt_error_from_unix(errno);
839                                 goto fail;
840                         }
841                 }
842         }
843
844         /* If we didn't use all the allocated size,
845          * Realloc so we don't leak entries per lock call. */
846         if (count < br_lck->num_locks + 2) {
847                 tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
848                 if (!tp) {
849                         status = NT_STATUS_NO_MEMORY;
850                         goto fail;
851                 }
852         }
853
854         br_lck->num_locks = count;
855         SAFE_FREE(br_lck->lock_data);
856         br_lck->lock_data = tp;
857         locks = tp;
858         br_lck->modified = True;
859
860         /* A successful downgrade from write to read lock can trigger a lock
861            re-evalutation where waiting readers can now proceed. */
862
863         if (signal_pending_read) {
864                 /* Send unlock messages to any pending read waiters that overlap. */
865                 for (i=0; i < br_lck->num_locks; i++) {
866                         struct lock_struct *pend_lock = &locks[i];
867
868                         /* Ignore non-pending locks. */
869                         if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
870                                 continue;
871                         }
872
873                         if (pend_lock->lock_type == PENDING_READ_LOCK &&
874                                         brl_pending_overlap(plock, pend_lock)) {
875                                 DEBUG(10,("brl_lock_posix: sending unlock message to pid %s\n",
876                                         procid_str_static(&pend_lock->context.pid )));
877
878                                 messaging_send(msg_ctx, pend_lock->context.pid,
879                                                MSG_SMB_UNLOCK, &data_blob_null);
880                         }
881                 }
882         }
883
884         return NT_STATUS_OK;
885  fail:
886         if (break_oplocks) {
887                 contend_level2_oplocks_end(br_lck->fsp,
888                                            LEVEL2_CONTEND_POSIX_BRL);
889         }
890         return status;
891 }
892
893 NTSTATUS smb_vfs_call_brl_lock_windows(struct vfs_handle_struct *handle,
894                                        struct byte_range_lock *br_lck,
895                                        struct lock_struct *plock,
896                                        bool blocking_lock,
897                                        struct blocking_lock_record *blr)
898 {
899         VFS_FIND(brl_lock_windows);
900         return handle->fns->brl_lock_windows(handle, br_lck, plock,
901                                              blocking_lock, blr);
902 }
903
904 /****************************************************************************
905  Lock a range of bytes.
906 ****************************************************************************/
907
908 NTSTATUS brl_lock(struct messaging_context *msg_ctx,
909                 struct byte_range_lock *br_lck,
910                 uint64_t smblctx,
911                 struct server_id pid,
912                 br_off start,
913                 br_off size, 
914                 enum brl_type lock_type,
915                 enum brl_flavour lock_flav,
916                 bool blocking_lock,
917                 uint64_t *psmblctx,
918                 struct blocking_lock_record *blr)
919 {
920         NTSTATUS ret;
921         struct lock_struct lock;
922
923 #if !ZERO_ZERO
924         if (start == 0 && size == 0) {
925                 DEBUG(0,("client sent 0/0 lock - please report this\n"));
926         }
927 #endif
928
929 #ifdef DEVELOPER
930         /* Quieten valgrind on test. */
931         memset(&lock, '\0', sizeof(lock));
932 #endif
933
934         lock.context.smblctx = smblctx;
935         lock.context.pid = pid;
936         lock.context.tid = br_lck->fsp->conn->cnum;
937         lock.start = start;
938         lock.size = size;
939         lock.fnum = br_lck->fsp->fnum;
940         lock.lock_type = lock_type;
941         lock.lock_flav = lock_flav;
942
943         if (lock_flav == WINDOWS_LOCK) {
944                 ret = SMB_VFS_BRL_LOCK_WINDOWS(br_lck->fsp->conn, br_lck,
945                     &lock, blocking_lock, blr);
946         } else {
947                 ret = brl_lock_posix(msg_ctx, br_lck, &lock);
948         }
949
950 #if ZERO_ZERO
951         /* sort the lock list */
952         TYPESAFE_QSORT(br_lck->lock_data, (size_t)br_lck->num_locks, lock_compare);
953 #endif
954
955         /* If we're returning an error, return who blocked us. */
956         if (!NT_STATUS_IS_OK(ret) && psmblctx) {
957                 *psmblctx = lock.context.smblctx;
958         }
959         return ret;
960 }
961
962 /****************************************************************************
963  Unlock a range of bytes - Windows semantics.
964 ****************************************************************************/
965
966 bool brl_unlock_windows_default(struct messaging_context *msg_ctx,
967                                struct byte_range_lock *br_lck,
968                                const struct lock_struct *plock)
969 {
970         unsigned int i, j;
971         struct lock_struct *locks = br_lck->lock_data;
972         enum brl_type deleted_lock_type = READ_LOCK; /* shut the compiler up.... */
973
974         SMB_ASSERT(plock->lock_type == UNLOCK_LOCK);
975
976 #if ZERO_ZERO
977         /* Delete write locks by preference... The lock list
978            is sorted in the zero zero case. */
979
980         for (i = 0; i < br_lck->num_locks; i++) {
981                 struct lock_struct *lock = &locks[i];
982
983                 if (lock->lock_type == WRITE_LOCK &&
984                     brl_same_context(&lock->context, &plock->context) &&
985                     lock->fnum == plock->fnum &&
986                     lock->lock_flav == WINDOWS_LOCK &&
987                     lock->start == plock->start &&
988                     lock->size == plock->size) {
989
990                         /* found it - delete it */
991                         deleted_lock_type = lock->lock_type;
992                         break;
993                 }
994         }
995
996         if (i != br_lck->num_locks) {
997                 /* We found it - don't search again. */
998                 goto unlock_continue;
999         }
1000 #endif
1001
1002         for (i = 0; i < br_lck->num_locks; i++) {
1003                 struct lock_struct *lock = &locks[i];
1004
1005                 if (IS_PENDING_LOCK(lock->lock_type)) {
1006                         continue;
1007                 }
1008
1009                 /* Only remove our own locks that match in start, size, and flavour. */
1010                 if (brl_same_context(&lock->context, &plock->context) &&
1011                                         lock->fnum == plock->fnum &&
1012                                         lock->lock_flav == WINDOWS_LOCK &&
1013                                         lock->start == plock->start &&
1014                                         lock->size == plock->size ) {
1015                         deleted_lock_type = lock->lock_type;
1016                         break;
1017                 }
1018         }
1019
1020         if (i == br_lck->num_locks) {
1021                 /* we didn't find it */
1022                 return False;
1023         }
1024
1025 #if ZERO_ZERO
1026   unlock_continue:
1027 #endif
1028
1029         /* Actually delete the lock. */
1030         if (i < br_lck->num_locks - 1) {
1031                 memmove(&locks[i], &locks[i+1], 
1032                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1033         }
1034
1035         br_lck->num_locks -= 1;
1036         br_lck->modified = True;
1037
1038         /* Unlock the underlying POSIX regions. */
1039         if(lp_posix_locking(br_lck->fsp->conn->params)) {
1040                 release_posix_lock_windows_flavour(br_lck->fsp,
1041                                 plock->start,
1042                                 plock->size,
1043                                 deleted_lock_type,
1044                                 &plock->context,
1045                                 locks,
1046                                 br_lck->num_locks);
1047         }
1048
1049         /* Send unlock messages to any pending waiters that overlap. */
1050         for (j=0; j < br_lck->num_locks; j++) {
1051                 struct lock_struct *pend_lock = &locks[j];
1052
1053                 /* Ignore non-pending locks. */
1054                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1055                         continue;
1056                 }
1057
1058                 /* We could send specific lock info here... */
1059                 if (brl_pending_overlap(plock, pend_lock)) {
1060                         DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
1061                                 procid_str_static(&pend_lock->context.pid )));
1062
1063                         messaging_send(msg_ctx, pend_lock->context.pid,
1064                                        MSG_SMB_UNLOCK, &data_blob_null);
1065                 }
1066         }
1067
1068         contend_level2_oplocks_end(br_lck->fsp, LEVEL2_CONTEND_WINDOWS_BRL);
1069         return True;
1070 }
1071
1072 /****************************************************************************
1073  Unlock a range of bytes - POSIX semantics.
1074 ****************************************************************************/
1075
1076 static bool brl_unlock_posix(struct messaging_context *msg_ctx,
1077                              struct byte_range_lock *br_lck,
1078                              struct lock_struct *plock)
1079 {
1080         unsigned int i, j, count;
1081         struct lock_struct *tp;
1082         struct lock_struct *locks = br_lck->lock_data;
1083         bool overlap_found = False;
1084
1085         /* No zero-zero locks for POSIX. */
1086         if (plock->start == 0 && plock->size == 0) {
1087                 return False;
1088         }
1089
1090         /* Don't allow 64-bit lock wrap. */
1091         if (plock->start + plock->size < plock->start ||
1092                         plock->start + plock->size < plock->size) {
1093                 DEBUG(10,("brl_unlock_posix: lock wrap\n"));
1094                 return False;
1095         }
1096
1097         /* The worst case scenario here is we have to split an
1098            existing POSIX lock range into two, so we need at most
1099            1 more entry. */
1100
1101         tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 1));
1102         if (!tp) {
1103                 DEBUG(10,("brl_unlock_posix: malloc fail\n"));
1104                 return False;
1105         }
1106
1107         count = 0;
1108         for (i = 0; i < br_lck->num_locks; i++) {
1109                 struct lock_struct *lock = &locks[i];
1110                 unsigned int tmp_count;
1111
1112                 /* Only remove our own locks - ignore fnum. */
1113                 if (IS_PENDING_LOCK(lock->lock_type) ||
1114                                 !brl_same_context(&lock->context, &plock->context)) {
1115                         memcpy(&tp[count], lock, sizeof(struct lock_struct));
1116                         count++;
1117                         continue;
1118                 }
1119
1120                 if (lock->lock_flav == WINDOWS_LOCK) {
1121                         /* Do any Windows flavour locks conflict ? */
1122                         if (brl_conflict(lock, plock)) {
1123                                 SAFE_FREE(tp);
1124                                 return false;
1125                         }
1126                         /* Just copy the Windows lock into the new array. */
1127                         memcpy(&tp[count], lock, sizeof(struct lock_struct));
1128                         count++;
1129                         continue;
1130                 }
1131
1132                 /* Work out overlaps. */
1133                 tmp_count = brlock_posix_split_merge(&tp[count], lock, plock);
1134
1135                 if (tmp_count == 0) {
1136                         /* plock overlapped the existing lock completely,
1137                            or replaced it. Don't copy the existing lock. */
1138                         overlap_found = true;
1139                 } else if (tmp_count == 1) {
1140                         /* Either no overlap, (simple copy of existing lock) or
1141                          * an overlap of an existing lock. */
1142                         /* If the lock changed size, we had an overlap. */
1143                         if (tp[count].size != lock->size) {
1144                                 overlap_found = true;
1145                         }
1146                         count += tmp_count;
1147                 } else if (tmp_count == 2) {
1148                         /* We split a lock range in two. */
1149                         overlap_found = true;
1150                         count += tmp_count;
1151
1152                         /* Optimisation... */
1153                         /* We know we're finished here as we can't overlap any
1154                            more POSIX locks. Copy the rest of the lock array. */
1155
1156                         if (i < br_lck->num_locks - 1) {
1157                                 memcpy(&tp[count], &locks[i+1],
1158                                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1159                                 count += ((br_lck->num_locks-1) - i);
1160                         }
1161                         break;
1162                 }
1163
1164         }
1165
1166         if (!overlap_found) {
1167                 /* Just ignore - no change. */
1168                 SAFE_FREE(tp);
1169                 DEBUG(10,("brl_unlock_posix: No overlap - unlocked.\n"));
1170                 return True;
1171         }
1172
1173         /* Unlock any POSIX regions. */
1174         if(lp_posix_locking(br_lck->fsp->conn->params)) {
1175                 release_posix_lock_posix_flavour(br_lck->fsp,
1176                                                 plock->start,
1177                                                 plock->size,
1178                                                 &plock->context,
1179                                                 tp,
1180                                                 count);
1181         }
1182
1183         /* Realloc so we don't leak entries per unlock call. */
1184         if (count) {
1185                 tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
1186                 if (!tp) {
1187                         DEBUG(10,("brl_unlock_posix: realloc fail\n"));
1188                         return False;
1189                 }
1190         } else {
1191                 /* We deleted the last lock. */
1192                 SAFE_FREE(tp);
1193                 tp = NULL;
1194         }
1195
1196         contend_level2_oplocks_end(br_lck->fsp,
1197                                    LEVEL2_CONTEND_POSIX_BRL);
1198
1199         br_lck->num_locks = count;
1200         SAFE_FREE(br_lck->lock_data);
1201         locks = tp;
1202         br_lck->lock_data = tp;
1203         br_lck->modified = True;
1204
1205         /* Send unlock messages to any pending waiters that overlap. */
1206
1207         for (j=0; j < br_lck->num_locks; j++) {
1208                 struct lock_struct *pend_lock = &locks[j];
1209
1210                 /* Ignore non-pending locks. */
1211                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1212                         continue;
1213                 }
1214
1215                 /* We could send specific lock info here... */
1216                 if (brl_pending_overlap(plock, pend_lock)) {
1217                         DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
1218                                 procid_str_static(&pend_lock->context.pid )));
1219
1220                         messaging_send(msg_ctx, pend_lock->context.pid,
1221                                        MSG_SMB_UNLOCK, &data_blob_null);
1222                 }
1223         }
1224
1225         return True;
1226 }
1227
1228 bool smb_vfs_call_brl_unlock_windows(struct vfs_handle_struct *handle,
1229                                      struct messaging_context *msg_ctx,
1230                                      struct byte_range_lock *br_lck,
1231                                      const struct lock_struct *plock)
1232 {
1233         VFS_FIND(brl_unlock_windows);
1234         return handle->fns->brl_unlock_windows(handle, msg_ctx, br_lck, plock);
1235 }
1236
1237 /****************************************************************************
1238  Unlock a range of bytes.
1239 ****************************************************************************/
1240
1241 bool brl_unlock(struct messaging_context *msg_ctx,
1242                 struct byte_range_lock *br_lck,
1243                 uint64_t smblctx,
1244                 struct server_id pid,
1245                 br_off start,
1246                 br_off size,
1247                 enum brl_flavour lock_flav)
1248 {
1249         struct lock_struct lock;
1250
1251         lock.context.smblctx = smblctx;
1252         lock.context.pid = pid;
1253         lock.context.tid = br_lck->fsp->conn->cnum;
1254         lock.start = start;
1255         lock.size = size;
1256         lock.fnum = br_lck->fsp->fnum;
1257         lock.lock_type = UNLOCK_LOCK;
1258         lock.lock_flav = lock_flav;
1259
1260         if (lock_flav == WINDOWS_LOCK) {
1261                 return SMB_VFS_BRL_UNLOCK_WINDOWS(br_lck->fsp->conn, msg_ctx,
1262                     br_lck, &lock);
1263         } else {
1264                 return brl_unlock_posix(msg_ctx, br_lck, &lock);
1265         }
1266 }
1267
1268 /****************************************************************************
1269  Test if we could add a lock if we wanted to.
1270  Returns True if the region required is currently unlocked, False if locked.
1271 ****************************************************************************/
1272
1273 bool brl_locktest(struct byte_range_lock *br_lck,
1274                 uint64_t smblctx,
1275                 struct server_id pid,
1276                 br_off start,
1277                 br_off size, 
1278                 enum brl_type lock_type,
1279                 enum brl_flavour lock_flav)
1280 {
1281         bool ret = True;
1282         unsigned int i;
1283         struct lock_struct lock;
1284         const struct lock_struct *locks = br_lck->lock_data;
1285         files_struct *fsp = br_lck->fsp;
1286
1287         lock.context.smblctx = smblctx;
1288         lock.context.pid = pid;
1289         lock.context.tid = br_lck->fsp->conn->cnum;
1290         lock.start = start;
1291         lock.size = size;
1292         lock.fnum = fsp->fnum;
1293         lock.lock_type = lock_type;
1294         lock.lock_flav = lock_flav;
1295
1296         /* Make sure existing locks don't conflict */
1297         for (i=0; i < br_lck->num_locks; i++) {
1298                 /*
1299                  * Our own locks don't conflict.
1300                  */
1301                 if (brl_conflict_other(&locks[i], &lock)) {
1302                         return False;
1303                 }
1304         }
1305
1306         /*
1307          * There is no lock held by an SMB daemon, check to
1308          * see if there is a POSIX lock from a UNIX or NFS process.
1309          * This only conflicts with Windows locks, not POSIX locks.
1310          */
1311
1312         if(lp_posix_locking(fsp->conn->params) && (lock_flav == WINDOWS_LOCK)) {
1313                 ret = is_posix_locked(fsp, &start, &size, &lock_type, WINDOWS_LOCK);
1314
1315                 DEBUG(10,("brl_locktest: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
1316                         (double)start, (double)size, ret ? "locked" : "unlocked",
1317                         fsp->fnum, fsp_str_dbg(fsp)));
1318
1319                 /* We need to return the inverse of is_posix_locked. */
1320                 ret = !ret;
1321         }
1322
1323         /* no conflicts - we could have added it */
1324         return ret;
1325 }
1326
1327 /****************************************************************************
1328  Query for existing locks.
1329 ****************************************************************************/
1330
1331 NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
1332                 uint64_t *psmblctx,
1333                 struct server_id pid,
1334                 br_off *pstart,
1335                 br_off *psize, 
1336                 enum brl_type *plock_type,
1337                 enum brl_flavour lock_flav)
1338 {
1339         unsigned int i;
1340         struct lock_struct lock;
1341         const struct lock_struct *locks = br_lck->lock_data;
1342         files_struct *fsp = br_lck->fsp;
1343
1344         lock.context.smblctx = *psmblctx;
1345         lock.context.pid = pid;
1346         lock.context.tid = br_lck->fsp->conn->cnum;
1347         lock.start = *pstart;
1348         lock.size = *psize;
1349         lock.fnum = fsp->fnum;
1350         lock.lock_type = *plock_type;
1351         lock.lock_flav = lock_flav;
1352
1353         /* Make sure existing locks don't conflict */
1354         for (i=0; i < br_lck->num_locks; i++) {
1355                 const struct lock_struct *exlock = &locks[i];
1356                 bool conflict = False;
1357
1358                 if (exlock->lock_flav == WINDOWS_LOCK) {
1359                         conflict = brl_conflict(exlock, &lock);
1360                 } else {        
1361                         conflict = brl_conflict_posix(exlock, &lock);
1362                 }
1363
1364                 if (conflict) {
1365                         *psmblctx = exlock->context.smblctx;
1366                         *pstart = exlock->start;
1367                         *psize = exlock->size;
1368                         *plock_type = exlock->lock_type;
1369                         return NT_STATUS_LOCK_NOT_GRANTED;
1370                 }
1371         }
1372
1373         /*
1374          * There is no lock held by an SMB daemon, check to
1375          * see if there is a POSIX lock from a UNIX or NFS process.
1376          */
1377
1378         if(lp_posix_locking(fsp->conn->params)) {
1379                 bool ret = is_posix_locked(fsp, pstart, psize, plock_type, POSIX_LOCK);
1380
1381                 DEBUG(10,("brl_lockquery: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
1382                         (double)*pstart, (double)*psize, ret ? "locked" : "unlocked",
1383                         fsp->fnum, fsp_str_dbg(fsp)));
1384
1385                 if (ret) {
1386                         /* Hmmm. No clue what to set smblctx to - use -1. */
1387                         *psmblctx = 0xFFFFFFFFFFFFFFFFLL;
1388                         return NT_STATUS_LOCK_NOT_GRANTED;
1389                 }
1390         }
1391
1392         return NT_STATUS_OK;
1393 }
1394
1395
1396 bool smb_vfs_call_brl_cancel_windows(struct vfs_handle_struct *handle,
1397                                      struct byte_range_lock *br_lck,
1398                                      struct lock_struct *plock,
1399                                      struct blocking_lock_record *blr)
1400 {
1401         VFS_FIND(brl_cancel_windows);
1402         return handle->fns->brl_cancel_windows(handle, br_lck, plock, blr);
1403 }
1404
1405 /****************************************************************************
1406  Remove a particular pending lock.
1407 ****************************************************************************/
1408 bool brl_lock_cancel(struct byte_range_lock *br_lck,
1409                 uint64_t smblctx,
1410                 struct server_id pid,
1411                 br_off start,
1412                 br_off size,
1413                 enum brl_flavour lock_flav,
1414                 struct blocking_lock_record *blr)
1415 {
1416         bool ret;
1417         struct lock_struct lock;
1418
1419         lock.context.smblctx = smblctx;
1420         lock.context.pid = pid;
1421         lock.context.tid = br_lck->fsp->conn->cnum;
1422         lock.start = start;
1423         lock.size = size;
1424         lock.fnum = br_lck->fsp->fnum;
1425         lock.lock_flav = lock_flav;
1426         /* lock.lock_type doesn't matter */
1427
1428         if (lock_flav == WINDOWS_LOCK) {
1429                 ret = SMB_VFS_BRL_CANCEL_WINDOWS(br_lck->fsp->conn, br_lck,
1430                     &lock, blr);
1431         } else {
1432                 ret = brl_lock_cancel_default(br_lck, &lock);
1433         }
1434
1435         return ret;
1436 }
1437
1438 bool brl_lock_cancel_default(struct byte_range_lock *br_lck,
1439                 struct lock_struct *plock)
1440 {
1441         unsigned int i;
1442         struct lock_struct *locks = br_lck->lock_data;
1443
1444         SMB_ASSERT(plock);
1445
1446         for (i = 0; i < br_lck->num_locks; i++) {
1447                 struct lock_struct *lock = &locks[i];
1448
1449                 /* For pending locks we *always* care about the fnum. */
1450                 if (brl_same_context(&lock->context, &plock->context) &&
1451                                 lock->fnum == plock->fnum &&
1452                                 IS_PENDING_LOCK(lock->lock_type) &&
1453                                 lock->lock_flav == plock->lock_flav &&
1454                                 lock->start == plock->start &&
1455                                 lock->size == plock->size) {
1456                         break;
1457                 }
1458         }
1459
1460         if (i == br_lck->num_locks) {
1461                 /* Didn't find it. */
1462                 return False;
1463         }
1464
1465         if (i < br_lck->num_locks - 1) {
1466                 /* Found this particular pending lock - delete it */
1467                 memmove(&locks[i], &locks[i+1], 
1468                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1469         }
1470
1471         br_lck->num_locks -= 1;
1472         br_lck->modified = True;
1473         return True;
1474 }
1475
1476 /****************************************************************************
1477  Remove any locks associated with a open file.
1478  We return True if this process owns any other Windows locks on this
1479  fd and so we should not immediately close the fd.
1480 ****************************************************************************/
1481
1482 void brl_close_fnum(struct messaging_context *msg_ctx,
1483                     struct byte_range_lock *br_lck)
1484 {
1485         files_struct *fsp = br_lck->fsp;
1486         uint16 tid = fsp->conn->cnum;
1487         int fnum = fsp->fnum;
1488         unsigned int i, j, dcount=0;
1489         int num_deleted_windows_locks = 0;
1490         struct lock_struct *locks = br_lck->lock_data;
1491         struct server_id pid = sconn_server_id(fsp->conn->sconn);
1492         bool unlock_individually = False;
1493         bool posix_level2_contention_ended = false;
1494
1495         if(lp_posix_locking(fsp->conn->params)) {
1496
1497                 /* Check if there are any Windows locks associated with this dev/ino
1498                    pair that are not this fnum. If so we need to call unlock on each
1499                    one in order to release the system POSIX locks correctly. */
1500
1501                 for (i=0; i < br_lck->num_locks; i++) {
1502                         struct lock_struct *lock = &locks[i];
1503
1504                         if (!procid_equal(&lock->context.pid, &pid)) {
1505                                 continue;
1506                         }
1507
1508                         if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
1509                                 continue; /* Ignore pending. */
1510                         }
1511
1512                         if (lock->context.tid != tid || lock->fnum != fnum) {
1513                                 unlock_individually = True;
1514                                 break;
1515                         }
1516                 }
1517
1518                 if (unlock_individually) {
1519                         struct lock_struct *locks_copy;
1520                         unsigned int num_locks_copy;
1521
1522                         /* Copy the current lock array. */
1523                         if (br_lck->num_locks) {
1524                                 locks_copy = (struct lock_struct *)talloc_memdup(br_lck, locks, br_lck->num_locks * sizeof(struct lock_struct));
1525                                 if (!locks_copy) {
1526                                         smb_panic("brl_close_fnum: talloc failed");
1527                                 }
1528                         } else {        
1529                                 locks_copy = NULL;
1530                         }
1531
1532                         num_locks_copy = br_lck->num_locks;
1533
1534                         for (i=0; i < num_locks_copy; i++) {
1535                                 struct lock_struct *lock = &locks_copy[i];
1536
1537                                 if (lock->context.tid == tid && procid_equal(&lock->context.pid, &pid) &&
1538                                                 (lock->fnum == fnum)) {
1539                                         brl_unlock(msg_ctx,
1540                                                 br_lck,
1541                                                 lock->context.smblctx,
1542                                                 pid,
1543                                                 lock->start,
1544                                                 lock->size,
1545                                                 lock->lock_flav);
1546                                 }
1547                         }
1548                         return;
1549                 }
1550         }
1551
1552         /* We can bulk delete - any POSIX locks will be removed when the fd closes. */
1553
1554         /* Remove any existing locks for this fnum (or any fnum if they're POSIX). */
1555
1556         for (i=0; i < br_lck->num_locks; i++) {
1557                 struct lock_struct *lock = &locks[i];
1558                 bool del_this_lock = False;
1559
1560                 if (lock->context.tid == tid && procid_equal(&lock->context.pid, &pid)) {
1561                         if ((lock->lock_flav == WINDOWS_LOCK) && (lock->fnum == fnum)) {
1562                                 del_this_lock = True;
1563                                 num_deleted_windows_locks++;
1564                                 contend_level2_oplocks_end(br_lck->fsp,
1565                                     LEVEL2_CONTEND_WINDOWS_BRL);
1566                         } else if (lock->lock_flav == POSIX_LOCK) {
1567                                 del_this_lock = True;
1568
1569                                 /* Only end level2 contention once for posix */
1570                                 if (!posix_level2_contention_ended) {
1571                                         posix_level2_contention_ended = true;
1572                                         contend_level2_oplocks_end(br_lck->fsp,
1573                                             LEVEL2_CONTEND_POSIX_BRL);
1574                                 }
1575                         }
1576                 }
1577
1578                 if (del_this_lock) {
1579                         /* Send unlock messages to any pending waiters that overlap. */
1580                         for (j=0; j < br_lck->num_locks; j++) {
1581                                 struct lock_struct *pend_lock = &locks[j];
1582
1583                                 /* Ignore our own or non-pending locks. */
1584                                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1585                                         continue;
1586                                 }
1587
1588                                 /* Optimisation - don't send to this fnum as we're
1589                                    closing it. */
1590                                 if (pend_lock->context.tid == tid &&
1591                                     procid_equal(&pend_lock->context.pid, &pid) &&
1592                                     pend_lock->fnum == fnum) {
1593                                         continue;
1594                                 }
1595
1596                                 /* We could send specific lock info here... */
1597                                 if (brl_pending_overlap(lock, pend_lock)) {
1598                                         messaging_send(msg_ctx, pend_lock->context.pid,
1599                                                        MSG_SMB_UNLOCK, &data_blob_null);
1600                                 }
1601                         }
1602
1603                         /* found it - delete it */
1604                         if (br_lck->num_locks > 1 && i < br_lck->num_locks - 1) {
1605                                 memmove(&locks[i], &locks[i+1], 
1606                                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1607                         }
1608                         br_lck->num_locks--;
1609                         br_lck->modified = True;
1610                         i--;
1611                         dcount++;
1612                 }
1613         }
1614
1615         if(lp_posix_locking(fsp->conn->params) && num_deleted_windows_locks) {
1616                 /* Reduce the Windows lock POSIX reference count on this dev/ino pair. */
1617                 reduce_windows_lock_ref_count(fsp, num_deleted_windows_locks);
1618         }
1619 }
1620
1621 /****************************************************************************
1622  Ensure this set of lock entries is valid.
1623 ****************************************************************************/
1624 static bool validate_lock_entries(unsigned int *pnum_entries, struct lock_struct **pplocks)
1625 {
1626         unsigned int i;
1627         unsigned int num_valid_entries = 0;
1628         struct lock_struct *locks = *pplocks;
1629
1630         for (i = 0; i < *pnum_entries; i++) {
1631                 struct lock_struct *lock_data = &locks[i];
1632                 if (!serverid_exists(&lock_data->context.pid)) {
1633                         /* This process no longer exists - mark this
1634                            entry as invalid by zeroing it. */
1635                         ZERO_STRUCTP(lock_data);
1636                 } else {
1637                         num_valid_entries++;
1638                 }
1639         }
1640
1641         if (num_valid_entries != *pnum_entries) {
1642                 struct lock_struct *new_lock_data = NULL;
1643
1644                 if (num_valid_entries) {
1645                         new_lock_data = SMB_MALLOC_ARRAY(struct lock_struct, num_valid_entries);
1646                         if (!new_lock_data) {
1647                                 DEBUG(3, ("malloc fail\n"));
1648                                 return False;
1649                         }
1650
1651                         num_valid_entries = 0;
1652                         for (i = 0; i < *pnum_entries; i++) {
1653                                 struct lock_struct *lock_data = &locks[i];
1654                                 if (lock_data->context.smblctx &&
1655                                                 lock_data->context.tid) {
1656                                         /* Valid (nonzero) entry - copy it. */
1657                                         memcpy(&new_lock_data[num_valid_entries],
1658                                                 lock_data, sizeof(struct lock_struct));
1659                                         num_valid_entries++;
1660                                 }
1661                         }
1662                 }
1663
1664                 SAFE_FREE(*pplocks);
1665                 *pplocks = new_lock_data;
1666                 *pnum_entries = num_valid_entries;
1667         }
1668
1669         return True;
1670 }
1671
1672 struct brl_forall_cb {
1673         void (*fn)(struct file_id id, struct server_id pid,
1674                    enum brl_type lock_type,
1675                    enum brl_flavour lock_flav,
1676                    br_off start, br_off size,
1677                    void *private_data);
1678         void *private_data;
1679 };
1680
1681 /****************************************************************************
1682  Traverse the whole database with this function, calling traverse_callback
1683  on each lock.
1684 ****************************************************************************/
1685
1686 static int traverse_fn(struct db_record *rec, void *state)
1687 {
1688         struct brl_forall_cb *cb = (struct brl_forall_cb *)state;
1689         struct lock_struct *locks;
1690         struct file_id *key;
1691         unsigned int i;
1692         unsigned int num_locks = 0;
1693         unsigned int orig_num_locks = 0;
1694
1695         /* In a traverse function we must make a copy of
1696            dbuf before modifying it. */
1697
1698         locks = (struct lock_struct *)memdup(rec->value.dptr,
1699                                              rec->value.dsize);
1700         if (!locks) {
1701                 return -1; /* Terminate traversal. */
1702         }
1703
1704         key = (struct file_id *)rec->key.dptr;
1705         orig_num_locks = num_locks = rec->value.dsize/sizeof(*locks);
1706
1707         /* Ensure the lock db is clean of entries from invalid processes. */
1708
1709         if (!validate_lock_entries(&num_locks, &locks)) {
1710                 SAFE_FREE(locks);
1711                 return -1; /* Terminate traversal */
1712         }
1713
1714         if (orig_num_locks != num_locks) {
1715                 if (num_locks) {
1716                         TDB_DATA data;
1717                         data.dptr = (uint8_t *)locks;
1718                         data.dsize = num_locks*sizeof(struct lock_struct);
1719                         rec->store(rec, data, TDB_REPLACE);
1720                 } else {
1721                         rec->delete_rec(rec);
1722                 }
1723         }
1724
1725         if (cb->fn) {
1726                 for ( i=0; i<num_locks; i++) {
1727                         cb->fn(*key,
1728                                 locks[i].context.pid,
1729                                 locks[i].lock_type,
1730                                 locks[i].lock_flav,
1731                                 locks[i].start,
1732                                 locks[i].size,
1733                                 cb->private_data);
1734                 }
1735         }
1736
1737         SAFE_FREE(locks);
1738         return 0;
1739 }
1740
1741 /*******************************************************************
1742  Call the specified function on each lock in the database.
1743 ********************************************************************/
1744
1745 int brl_forall(void (*fn)(struct file_id id, struct server_id pid,
1746                           enum brl_type lock_type,
1747                           enum brl_flavour lock_flav,
1748                           br_off start, br_off size,
1749                           void *private_data),
1750                void *private_data)
1751 {
1752         struct brl_forall_cb cb;
1753
1754         if (!brlock_db) {
1755                 return 0;
1756         }
1757         cb.fn = fn;
1758         cb.private_data = private_data;
1759         return brlock_db->traverse(brlock_db, traverse_fn, &cb);
1760 }
1761
1762 /*******************************************************************
1763  Store a potentially modified set of byte range lock data back into
1764  the database.
1765  Unlock the record.
1766 ********************************************************************/
1767
1768 static void byte_range_lock_flush(struct byte_range_lock *br_lck)
1769 {
1770         if (br_lck->read_only) {
1771                 SMB_ASSERT(!br_lck->modified);
1772         }
1773
1774         if (!br_lck->modified) {
1775                 goto done;
1776         }
1777
1778         if (br_lck->num_locks == 0) {
1779                 /* No locks - delete this entry. */
1780                 NTSTATUS status = br_lck->record->delete_rec(br_lck->record);
1781                 if (!NT_STATUS_IS_OK(status)) {
1782                         DEBUG(0, ("delete_rec returned %s\n",
1783                                   nt_errstr(status)));
1784                         smb_panic("Could not delete byte range lock entry");
1785                 }
1786         } else {
1787                 TDB_DATA data;
1788                 NTSTATUS status;
1789
1790                 data.dptr = (uint8 *)br_lck->lock_data;
1791                 data.dsize = br_lck->num_locks * sizeof(struct lock_struct);
1792
1793                 status = br_lck->record->store(br_lck->record, data,
1794                                                TDB_REPLACE);
1795                 if (!NT_STATUS_IS_OK(status)) {
1796                         DEBUG(0, ("store returned %s\n", nt_errstr(status)));
1797                         smb_panic("Could not store byte range mode entry");
1798                 }
1799         }
1800
1801  done:
1802
1803         br_lck->read_only = true;
1804         br_lck->modified = false;
1805
1806         TALLOC_FREE(br_lck->record);
1807 }
1808
1809 static int byte_range_lock_destructor(struct byte_range_lock *br_lck)
1810 {
1811         byte_range_lock_flush(br_lck);
1812         SAFE_FREE(br_lck->lock_data);
1813         return 0;
1814 }
1815
1816 /*******************************************************************
1817  Fetch a set of byte range lock data from the database.
1818  Leave the record locked.
1819  TALLOC_FREE(brl) will release the lock in the destructor.
1820 ********************************************************************/
1821
1822 static struct byte_range_lock *brl_get_locks_internal(TALLOC_CTX *mem_ctx,
1823                                         files_struct *fsp, bool read_only)
1824 {
1825         TDB_DATA key, data;
1826         struct byte_range_lock *br_lck = talloc(mem_ctx, struct byte_range_lock);
1827         bool do_read_only = read_only;
1828
1829         if (br_lck == NULL) {
1830                 return NULL;
1831         }
1832
1833         br_lck->fsp = fsp;
1834         br_lck->num_locks = 0;
1835         br_lck->modified = False;
1836         br_lck->key = fsp->file_id;
1837
1838         key.dptr = (uint8 *)&br_lck->key;
1839         key.dsize = sizeof(struct file_id);
1840
1841         if (!fsp->lockdb_clean) {
1842                 /* We must be read/write to clean
1843                    the dead entries. */
1844                 do_read_only = false;
1845         }
1846
1847         if (do_read_only) {
1848                 if (brlock_db->fetch(brlock_db, br_lck, key, &data) != 0) {
1849                         DEBUG(3, ("Could not fetch byte range lock record\n"));
1850                         TALLOC_FREE(br_lck);
1851                         return NULL;
1852                 }
1853                 br_lck->record = NULL;
1854         } else {
1855                 br_lck->record = brlock_db->fetch_locked(brlock_db, br_lck, key);
1856
1857                 if (br_lck->record == NULL) {
1858                         DEBUG(3, ("Could not lock byte range lock entry\n"));
1859                         TALLOC_FREE(br_lck);
1860                         return NULL;
1861                 }
1862
1863                 data = br_lck->record->value;
1864         }
1865
1866         br_lck->read_only = do_read_only;
1867         br_lck->lock_data = NULL;
1868
1869         talloc_set_destructor(br_lck, byte_range_lock_destructor);
1870
1871         br_lck->num_locks = data.dsize / sizeof(struct lock_struct);
1872
1873         if (br_lck->num_locks != 0) {
1874                 br_lck->lock_data = SMB_MALLOC_ARRAY(struct lock_struct,
1875                                                      br_lck->num_locks);
1876                 if (br_lck->lock_data == NULL) {
1877                         DEBUG(0, ("malloc failed\n"));
1878                         TALLOC_FREE(br_lck);
1879                         return NULL;
1880                 }
1881
1882                 memcpy(br_lck->lock_data, data.dptr, data.dsize);
1883         }
1884
1885         if (!fsp->lockdb_clean) {
1886                 int orig_num_locks = br_lck->num_locks;
1887
1888                 /* This is the first time we've accessed this. */
1889                 /* Go through and ensure all entries exist - remove any that don't. */
1890                 /* Makes the lockdb self cleaning at low cost. */
1891
1892                 if (!validate_lock_entries(&br_lck->num_locks,
1893                                            &br_lck->lock_data)) {
1894                         SAFE_FREE(br_lck->lock_data);
1895                         TALLOC_FREE(br_lck);
1896                         return NULL;
1897                 }
1898
1899                 /* Ensure invalid locks are cleaned up in the destructor. */
1900                 if (orig_num_locks != br_lck->num_locks) {
1901                         br_lck->modified = True;
1902                 }
1903
1904                 /* Mark the lockdb as "clean" as seen from this open file. */
1905                 fsp->lockdb_clean = True;
1906         }
1907
1908         if (DEBUGLEVEL >= 10) {
1909                 unsigned int i;
1910                 struct lock_struct *locks = br_lck->lock_data;
1911                 DEBUG(10,("brl_get_locks_internal: %u current locks on file_id %s\n",
1912                         br_lck->num_locks,
1913                           file_id_string_tos(&fsp->file_id)));
1914                 for( i = 0; i < br_lck->num_locks; i++) {
1915                         print_lock_struct(i, &locks[i]);
1916                 }
1917         }
1918
1919         if (do_read_only != read_only) {
1920                 /*
1921                  * this stores the record and gets rid of
1922                  * the write lock that is needed for a cleanup
1923                  */
1924                 byte_range_lock_flush(br_lck);
1925         }
1926
1927         return br_lck;
1928 }
1929
1930 struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx,
1931                                         files_struct *fsp)
1932 {
1933         return brl_get_locks_internal(mem_ctx, fsp, False);
1934 }
1935
1936 struct byte_range_lock *brl_get_locks_readonly(files_struct *fsp)
1937 {
1938         struct byte_range_lock *br_lock;
1939
1940         if (lp_clustering()) {
1941                 return brl_get_locks_internal(talloc_tos(), fsp, true);
1942         }
1943
1944         if ((fsp->brlock_rec != NULL)
1945             && (brlock_db->get_seqnum(brlock_db) == fsp->brlock_seqnum)) {
1946                 return fsp->brlock_rec;
1947         }
1948
1949         TALLOC_FREE(fsp->brlock_rec);
1950
1951         br_lock = brl_get_locks_internal(talloc_tos(), fsp, true);
1952         if (br_lock == NULL) {
1953                 return NULL;
1954         }
1955         fsp->brlock_seqnum = brlock_db->get_seqnum(brlock_db);
1956
1957         fsp->brlock_rec = talloc_move(fsp, &br_lock);
1958
1959         return fsp->brlock_rec;
1960 }
1961
1962 struct brl_revalidate_state {
1963         ssize_t array_size;
1964         uint32 num_pids;
1965         struct server_id *pids;
1966 };
1967
1968 /*
1969  * Collect PIDs of all processes with pending entries
1970  */
1971
1972 static void brl_revalidate_collect(struct file_id id, struct server_id pid,
1973                                    enum brl_type lock_type,
1974                                    enum brl_flavour lock_flav,
1975                                    br_off start, br_off size,
1976                                    void *private_data)
1977 {
1978         struct brl_revalidate_state *state =
1979                 (struct brl_revalidate_state *)private_data;
1980
1981         if (!IS_PENDING_LOCK(lock_type)) {
1982                 return;
1983         }
1984
1985         add_to_large_array(state, sizeof(pid), (void *)&pid,
1986                            &state->pids, &state->num_pids,
1987                            &state->array_size);
1988 }
1989
1990 /*
1991  * qsort callback to sort the processes
1992  */
1993
1994 static int compare_procids(const void *p1, const void *p2)
1995 {
1996         const struct server_id *i1 = (const struct server_id *)p1;
1997         const struct server_id *i2 = (const struct server_id *)p2;
1998
1999         if (i1->pid < i2->pid) return -1;
2000         if (i2->pid > i2->pid) return 1;
2001         return 0;
2002 }
2003
2004 /*
2005  * Send a MSG_SMB_UNLOCK message to all processes with pending byte range
2006  * locks so that they retry. Mainly used in the cluster code after a node has
2007  * died.
2008  *
2009  * Done in two steps to avoid double-sends: First we collect all entries in an
2010  * array, then qsort that array and only send to non-dupes.
2011  */
2012
2013 static void brl_revalidate(struct messaging_context *msg_ctx,
2014                            void *private_data,
2015                            uint32_t msg_type,
2016                            struct server_id server_id,
2017                            DATA_BLOB *data)
2018 {
2019         struct brl_revalidate_state *state;
2020         uint32 i;
2021         struct server_id last_pid;
2022
2023         if (!(state = talloc_zero(NULL, struct brl_revalidate_state))) {
2024                 DEBUG(0, ("talloc failed\n"));
2025                 return;
2026         }
2027
2028         brl_forall(brl_revalidate_collect, state);
2029
2030         if (state->array_size == -1) {
2031                 DEBUG(0, ("talloc failed\n"));
2032                 goto done;
2033         }
2034
2035         if (state->num_pids == 0) {
2036                 goto done;
2037         }
2038
2039         TYPESAFE_QSORT(state->pids, state->num_pids, compare_procids);
2040
2041         ZERO_STRUCT(last_pid);
2042
2043         for (i=0; i<state->num_pids; i++) {
2044                 if (procid_equal(&last_pid, &state->pids[i])) {
2045                         /*
2046                          * We've seen that one already
2047                          */
2048                         continue;
2049                 }
2050
2051                 messaging_send(msg_ctx, state->pids[i], MSG_SMB_UNLOCK,
2052                                &data_blob_null);
2053                 last_pid = state->pids[i];
2054         }
2055
2056  done:
2057         TALLOC_FREE(state);
2058         return;
2059 }
2060
2061 void brl_register_msgs(struct messaging_context *msg_ctx)
2062 {
2063         messaging_register(msg_ctx, NULL, MSG_SMB_BRL_VALIDATE,
2064                            brl_revalidate);
2065 }