Revert "s3-build: taise tdb version when building against system libtdb library."
[kai/samba-autobuild/.git] / source3 / locking / brlock.c
1 /* 
2    Unix SMB/CIFS implementation.
3    byte range locking code
4    Updated to handle range splits/merges.
5
6    Copyright (C) Andrew Tridgell 1992-2000
7    Copyright (C) Jeremy Allison 1992-2000
8    
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 3 of the License, or
12    (at your option) any later version.
13    
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18    
19    You should have received a copy of the GNU General Public License
20    along with this program.  If not, see <http://www.gnu.org/licenses/>.
21 */
22
23 /* This module implements a tdb based byte range locking service,
24    replacing the fcntl() based byte range locking previously
25    used. This allows us to provide the same semantics as NT */
26
27 #include "includes.h"
28
29 #undef DBGC_CLASS
30 #define DBGC_CLASS DBGC_LOCKING
31
32 #define ZERO_ZERO 0
33
34 /* The open brlock.tdb database. */
35
36 static struct db_context *brlock_db;
37
38 /****************************************************************************
39  Debug info at level 10 for lock struct.
40 ****************************************************************************/
41
42 static void print_lock_struct(unsigned int i, struct lock_struct *pls)
43 {
44         DEBUG(10,("[%u]: smbpid = %u, tid = %u, pid = %s, ",
45                         i,
46                         (unsigned int)pls->context.smbpid,
47                         (unsigned int)pls->context.tid,
48                         procid_str(talloc_tos(), &pls->context.pid) ));
49         
50         DEBUG(10,("start = %.0f, size = %.0f, fnum = %d, %s %s\n",
51                 (double)pls->start,
52                 (double)pls->size,
53                 pls->fnum,
54                 lock_type_name(pls->lock_type),
55                 lock_flav_name(pls->lock_flav) ));
56 }
57
58 /****************************************************************************
59  See if two locking contexts are equal.
60 ****************************************************************************/
61
62 bool brl_same_context(const struct lock_context *ctx1, 
63                              const struct lock_context *ctx2)
64 {
65         return (procid_equal(&ctx1->pid, &ctx2->pid) &&
66                 (ctx1->smbpid == ctx2->smbpid) &&
67                 (ctx1->tid == ctx2->tid));
68 }
69
70 /****************************************************************************
71  See if lck1 and lck2 overlap.
72 ****************************************************************************/
73
74 static bool brl_overlap(const struct lock_struct *lck1,
75                         const struct lock_struct *lck2)
76 {
77         /* XXX Remove for Win7 compatibility. */
78         /* this extra check is not redundent - it copes with locks
79            that go beyond the end of 64 bit file space */
80         if (lck1->size != 0 &&
81             lck1->start == lck2->start &&
82             lck1->size == lck2->size) {
83                 return True;
84         }
85
86         if (lck1->start >= (lck2->start+lck2->size) ||
87             lck2->start >= (lck1->start+lck1->size)) {
88                 return False;
89         }
90         return True;
91 }
92
93 /****************************************************************************
94  See if lock2 can be added when lock1 is in place.
95 ****************************************************************************/
96
97 static bool brl_conflict(const struct lock_struct *lck1, 
98                          const struct lock_struct *lck2)
99 {
100         /* Ignore PENDING locks. */
101         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
102                 return False;
103
104         /* Read locks never conflict. */
105         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
106                 return False;
107         }
108
109         /* A READ lock can stack on top of a WRITE lock if they have the same
110          * context & fnum. */
111         if (lck1->lock_type == WRITE_LOCK && lck2->lock_type == READ_LOCK &&
112             brl_same_context(&lck1->context, &lck2->context) &&
113             lck1->fnum == lck2->fnum) {
114                 return False;
115         }
116
117         return brl_overlap(lck1, lck2);
118
119
120 /****************************************************************************
121  See if lock2 can be added when lock1 is in place - when both locks are POSIX
122  flavour. POSIX locks ignore fnum - they only care about dev/ino which we
123  know already match.
124 ****************************************************************************/
125
126 static bool brl_conflict_posix(const struct lock_struct *lck1, 
127                                 const struct lock_struct *lck2)
128 {
129 #if defined(DEVELOPER)
130         SMB_ASSERT(lck1->lock_flav == POSIX_LOCK);
131         SMB_ASSERT(lck2->lock_flav == POSIX_LOCK);
132 #endif
133
134         /* Ignore PENDING locks. */
135         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
136                 return False;
137
138         /* Read locks never conflict. */
139         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
140                 return False;
141         }
142
143         /* Locks on the same context con't conflict. Ignore fnum. */
144         if (brl_same_context(&lck1->context, &lck2->context)) {
145                 return False;
146         }
147
148         /* One is read, the other write, or the context is different,
149            do they overlap ? */
150         return brl_overlap(lck1, lck2);
151
152
153 #if ZERO_ZERO
154 static bool brl_conflict1(const struct lock_struct *lck1, 
155                          const struct lock_struct *lck2)
156 {
157         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
158                 return False;
159
160         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
161                 return False;
162         }
163
164         if (brl_same_context(&lck1->context, &lck2->context) &&
165             lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
166                 return False;
167         }
168
169         if (lck2->start == 0 && lck2->size == 0 && lck1->size != 0) {
170                 return True;
171         }
172
173         if (lck1->start >= (lck2->start + lck2->size) ||
174             lck2->start >= (lck1->start + lck1->size)) {
175                 return False;
176         }
177             
178         return True;
179
180 #endif
181
182 /****************************************************************************
183  Check to see if this lock conflicts, but ignore our own locks on the
184  same fnum only. This is the read/write lock check code path.
185  This is never used in the POSIX lock case.
186 ****************************************************************************/
187
188 static bool brl_conflict_other(const struct lock_struct *lck1, const struct lock_struct *lck2)
189 {
190         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
191                 return False;
192
193         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) 
194                 return False;
195
196         /* POSIX flavour locks never conflict here - this is only called
197            in the read/write path. */
198
199         if (lck1->lock_flav == POSIX_LOCK && lck2->lock_flav == POSIX_LOCK)
200                 return False;
201
202         /*
203          * Incoming WRITE locks conflict with existing READ locks even
204          * if the context is the same. JRA. See LOCKTEST7 in smbtorture.
205          */
206
207         if (!(lck2->lock_type == WRITE_LOCK && lck1->lock_type == READ_LOCK)) {
208                 if (brl_same_context(&lck1->context, &lck2->context) &&
209                                         lck1->fnum == lck2->fnum)
210                         return False;
211         }
212
213         return brl_overlap(lck1, lck2);
214
215
216 /****************************************************************************
217  Check if an unlock overlaps a pending lock.
218 ****************************************************************************/
219
220 static bool brl_pending_overlap(const struct lock_struct *lock, const struct lock_struct *pend_lock)
221 {
222         if ((lock->start <= pend_lock->start) && (lock->start + lock->size > pend_lock->start))
223                 return True;
224         if ((lock->start >= pend_lock->start) && (lock->start <= pend_lock->start + pend_lock->size))
225                 return True;
226         return False;
227 }
228
229 /****************************************************************************
230  Amazingly enough, w2k3 "remembers" whether the last lock failure on a fnum
231  is the same as this one and changes its error code. I wonder if any
232  app depends on this ?
233 ****************************************************************************/
234
235 NTSTATUS brl_lock_failed(files_struct *fsp, const struct lock_struct *lock, bool blocking_lock)
236 {
237         if (lock->start >= 0xEF000000 && (lock->start >> 63) == 0) {
238                 /* amazing the little things you learn with a test
239                    suite. Locks beyond this offset (as a 64 bit
240                    number!) always generate the conflict error code,
241                    unless the top bit is set */
242                 if (!blocking_lock) {
243                         fsp->last_lock_failure = *lock;
244                 }
245                 return NT_STATUS_FILE_LOCK_CONFLICT;
246         }
247
248         if (procid_equal(&lock->context.pid, &fsp->last_lock_failure.context.pid) &&
249                         lock->context.tid == fsp->last_lock_failure.context.tid &&
250                         lock->fnum == fsp->last_lock_failure.fnum &&
251                         lock->start == fsp->last_lock_failure.start) {
252                 return NT_STATUS_FILE_LOCK_CONFLICT;
253         }
254
255         if (!blocking_lock) {
256                 fsp->last_lock_failure = *lock;
257         }
258         return NT_STATUS_LOCK_NOT_GRANTED;
259 }
260
261 /****************************************************************************
262  Open up the brlock.tdb database.
263 ****************************************************************************/
264
265 void brl_init(bool read_only)
266 {
267         int tdb_flags;
268
269         if (brlock_db) {
270                 return;
271         }
272
273         tdb_flags = TDB_DEFAULT|TDB_VOLATILE|TDB_CLEAR_IF_FIRST;
274
275         if (!lp_clustering()) {
276                 /*
277                  * We can't use the SEQNUM trick to cache brlock
278                  * entries in the clustering case because ctdb seqnum
279                  * propagation has a delay.
280                  */
281                 tdb_flags |= TDB_SEQNUM;
282         }
283
284         brlock_db = db_open(NULL, lock_path("brlock.tdb"),
285                             lp_open_files_db_hash_size(), tdb_flags,
286                             read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644 );
287         if (!brlock_db) {
288                 DEBUG(0,("Failed to open byte range locking database %s\n",
289                         lock_path("brlock.tdb")));
290                 return;
291         }
292 }
293
294 /****************************************************************************
295  Close down the brlock.tdb database.
296 ****************************************************************************/
297
298 void brl_shutdown(void)
299 {
300         TALLOC_FREE(brlock_db);
301 }
302
303 #if ZERO_ZERO
304 /****************************************************************************
305  Compare two locks for sorting.
306 ****************************************************************************/
307
308 static int lock_compare(const struct lock_struct *lck1, 
309                          const struct lock_struct *lck2)
310 {
311         if (lck1->start != lck2->start) {
312                 return (lck1->start - lck2->start);
313         }
314         if (lck2->size != lck1->size) {
315                 return ((int)lck1->size - (int)lck2->size);
316         }
317         return 0;
318 }
319 #endif
320
321 /****************************************************************************
322  Lock a range of bytes - Windows lock semantics.
323 ****************************************************************************/
324
325 NTSTATUS brl_lock_windows_default(struct byte_range_lock *br_lck,
326     struct lock_struct *plock, bool blocking_lock)
327 {
328         unsigned int i;
329         files_struct *fsp = br_lck->fsp;
330         struct lock_struct *locks = br_lck->lock_data;
331         NTSTATUS status;
332
333         SMB_ASSERT(plock->lock_type != UNLOCK_LOCK);
334
335         for (i=0; i < br_lck->num_locks; i++) {
336                 /* Do any Windows or POSIX locks conflict ? */
337                 if (brl_conflict(&locks[i], plock)) {
338                         /* Remember who blocked us. */
339                         plock->context.smbpid = locks[i].context.smbpid;
340                         return brl_lock_failed(fsp,plock,blocking_lock);
341                 }
342 #if ZERO_ZERO
343                 if (plock->start == 0 && plock->size == 0 && 
344                                 locks[i].size == 0) {
345                         break;
346                 }
347 #endif
348         }
349
350         if (!IS_PENDING_LOCK(plock->lock_type)) {
351                 contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
352         }
353
354         /* We can get the Windows lock, now see if it needs to
355            be mapped into a lower level POSIX one, and if so can
356            we get it ? */
357
358         if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(fsp->conn->params)) {
359                 int errno_ret;
360                 if (!set_posix_lock_windows_flavour(fsp,
361                                 plock->start,
362                                 plock->size,
363                                 plock->lock_type,
364                                 &plock->context,
365                                 locks,
366                                 br_lck->num_locks,
367                                 &errno_ret)) {
368
369                         /* We don't know who blocked us. */
370                         plock->context.smbpid = 0xFFFFFFFF;
371
372                         if (errno_ret == EACCES || errno_ret == EAGAIN) {
373                                 status = NT_STATUS_FILE_LOCK_CONFLICT;
374                                 goto fail;
375                         } else {
376                                 status = map_nt_error_from_unix(errno);
377                                 goto fail;
378                         }
379                 }
380         }
381
382         /* no conflicts - add it to the list of locks */
383         locks = (struct lock_struct *)SMB_REALLOC(locks, (br_lck->num_locks + 1) * sizeof(*locks));
384         if (!locks) {
385                 status = NT_STATUS_NO_MEMORY;
386                 goto fail;
387         }
388
389         memcpy(&locks[br_lck->num_locks], plock, sizeof(struct lock_struct));
390         br_lck->num_locks += 1;
391         br_lck->lock_data = locks;
392         br_lck->modified = True;
393
394         return NT_STATUS_OK;
395  fail:
396         if (!IS_PENDING_LOCK(plock->lock_type)) {
397                 contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
398         }
399         return status;
400 }
401
402 /****************************************************************************
403  Cope with POSIX range splits and merges.
404 ****************************************************************************/
405
406 static unsigned int brlock_posix_split_merge(struct lock_struct *lck_arr,       /* Output array. */
407                                                 struct lock_struct *ex,         /* existing lock. */
408                                                 struct lock_struct *plock)      /* proposed lock. */
409 {
410         bool lock_types_differ = (ex->lock_type != plock->lock_type);
411
412         /* We can't merge non-conflicting locks on different context - ignore fnum. */
413
414         if (!brl_same_context(&ex->context, &plock->context)) {
415                 /* Just copy. */
416                 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
417                 return 1;
418         }
419
420         /* We now know we have the same context. */
421
422         /* Did we overlap ? */
423
424 /*********************************************
425                                         +---------+
426                                         | ex      |
427                                         +---------+
428                          +-------+
429                          | plock |
430                          +-------+
431 OR....
432         +---------+
433         |  ex     |
434         +---------+
435 **********************************************/
436
437         if ( (ex->start > (plock->start + plock->size)) ||
438                 (plock->start > (ex->start + ex->size))) {
439
440                 /* No overlap with this lock - copy existing. */
441
442                 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
443                 return 1;
444         }
445
446 /*********************************************
447         +---------------------------+
448         |          ex               |
449         +---------------------------+
450         +---------------------------+
451         |       plock               | -> replace with plock.
452         +---------------------------+
453 OR
454              +---------------+
455              |       ex      |
456              +---------------+
457         +---------------------------+
458         |       plock               | -> replace with plock.
459         +---------------------------+
460
461 **********************************************/
462
463         if ( (ex->start >= plock->start) &&
464                 (ex->start + ex->size <= plock->start + plock->size) ) {
465
466                 /* Replace - discard existing lock. */
467
468                 return 0;
469         }
470
471 /*********************************************
472 Adjacent after.
473                         +-------+
474                         |  ex   |
475                         +-------+
476         +---------------+
477         |   plock       |
478         +---------------+
479
480 BECOMES....
481         +---------------+-------+
482         |   plock       | ex    | - different lock types.
483         +---------------+-------+
484 OR.... (merge)
485         +-----------------------+
486         |   plock               | - same lock type.
487         +-----------------------+
488 **********************************************/
489
490         if (plock->start + plock->size == ex->start) {
491
492                 /* If the lock types are the same, we merge, if different, we
493                    add the remainder of the old lock. */
494
495                 if (lock_types_differ) {
496                         /* Add existing. */
497                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
498                         return 1;
499                 } else {
500                         /* Merge - adjust incoming lock as we may have more
501                          * merging to come. */
502                         plock->size += ex->size;
503                         return 0;
504                 }
505         }
506
507 /*********************************************
508 Adjacent before.
509         +-------+
510         |  ex   |
511         +-------+
512                 +---------------+
513                 |   plock       |
514                 +---------------+
515 BECOMES....
516         +-------+---------------+
517         | ex    |   plock       | - different lock types
518         +-------+---------------+
519
520 OR.... (merge)
521         +-----------------------+
522         |      plock            | - same lock type.
523         +-----------------------+
524
525 **********************************************/
526
527         if (ex->start + ex->size == plock->start) {
528
529                 /* If the lock types are the same, we merge, if different, we
530                    add the existing lock. */
531
532                 if (lock_types_differ) {
533                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
534                         return 1;
535                 } else {
536                         /* Merge - adjust incoming lock as we may have more
537                          * merging to come. */
538                         plock->start = ex->start;
539                         plock->size += ex->size;
540                         return 0;
541                 }
542         }
543
544 /*********************************************
545 Overlap after.
546         +-----------------------+
547         |          ex           |
548         +-----------------------+
549         +---------------+
550         |   plock       |
551         +---------------+
552 OR
553                +----------------+
554                |       ex       |
555                +----------------+
556         +---------------+
557         |   plock       |
558         +---------------+
559
560 BECOMES....
561         +---------------+-------+
562         |   plock       | ex    | - different lock types.
563         +---------------+-------+
564 OR.... (merge)
565         +-----------------------+
566         |   plock               | - same lock type.
567         +-----------------------+
568 **********************************************/
569
570         if ( (ex->start >= plock->start) &&
571                 (ex->start <= plock->start + plock->size) &&
572                 (ex->start + ex->size > plock->start + plock->size) ) {
573
574                 /* If the lock types are the same, we merge, if different, we
575                    add the remainder of the old lock. */
576
577                 if (lock_types_differ) {
578                         /* Add remaining existing. */
579                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
580                         /* Adjust existing start and size. */
581                         lck_arr[0].start = plock->start + plock->size;
582                         lck_arr[0].size = (ex->start + ex->size) - (plock->start + plock->size);
583                         return 1;
584                 } else {
585                         /* Merge - adjust incoming lock as we may have more
586                          * merging to come. */
587                         plock->size += (ex->start + ex->size) - (plock->start + plock->size);
588                         return 0;
589                 }
590         }
591
592 /*********************************************
593 Overlap before.
594         +-----------------------+
595         |  ex                   |
596         +-----------------------+
597                 +---------------+
598                 |   plock       |
599                 +---------------+
600 OR
601         +-------------+
602         |  ex         |
603         +-------------+
604                 +---------------+
605                 |   plock       |
606                 +---------------+
607
608 BECOMES....
609         +-------+---------------+
610         | ex    |   plock       | - different lock types
611         +-------+---------------+
612
613 OR.... (merge)
614         +-----------------------+
615         |      plock            | - same lock type.
616         +-----------------------+
617
618 **********************************************/
619
620         if ( (ex->start < plock->start) &&
621                         (ex->start + ex->size >= plock->start) &&
622                         (ex->start + ex->size <= plock->start + plock->size) ) {
623
624                 /* If the lock types are the same, we merge, if different, we
625                    add the truncated old lock. */
626
627                 if (lock_types_differ) {
628                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
629                         /* Adjust existing size. */
630                         lck_arr[0].size = plock->start - ex->start;
631                         return 1;
632                 } else {
633                         /* Merge - adjust incoming lock as we may have more
634                          * merging to come. MUST ADJUST plock SIZE FIRST ! */
635                         plock->size += (plock->start - ex->start);
636                         plock->start = ex->start;
637                         return 0;
638                 }
639         }
640
641 /*********************************************
642 Complete overlap.
643         +---------------------------+
644         |        ex                 |
645         +---------------------------+
646                 +---------+
647                 |  plock  |
648                 +---------+
649 BECOMES.....
650         +-------+---------+---------+
651         | ex    |  plock  | ex      | - different lock types.
652         +-------+---------+---------+
653 OR
654         +---------------------------+
655         |        plock              | - same lock type.
656         +---------------------------+
657 **********************************************/
658
659         if ( (ex->start < plock->start) && (ex->start + ex->size > plock->start + plock->size) ) {
660
661                 if (lock_types_differ) {
662
663                         /* We have to split ex into two locks here. */
664
665                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
666                         memcpy(&lck_arr[1], ex, sizeof(struct lock_struct));
667
668                         /* Adjust first existing size. */
669                         lck_arr[0].size = plock->start - ex->start;
670
671                         /* Adjust second existing start and size. */
672                         lck_arr[1].start = plock->start + plock->size;
673                         lck_arr[1].size = (ex->start + ex->size) - (plock->start + plock->size);
674                         return 2;
675                 } else {
676                         /* Just eat the existing locks, merge them into plock. */
677                         plock->start = ex->start;
678                         plock->size = ex->size;
679                         return 0;
680                 }
681         }
682
683         /* Never get here. */
684         smb_panic("brlock_posix_split_merge");
685         /* Notreached. */
686
687         /* Keep some compilers happy. */
688         return 0;
689 }
690
691 /****************************************************************************
692  Lock a range of bytes - POSIX lock semantics.
693  We must cope with range splits and merges.
694 ****************************************************************************/
695
696 static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
697                                struct byte_range_lock *br_lck,
698                                struct lock_struct *plock)
699 {
700         unsigned int i, count, posix_count;
701         struct lock_struct *locks = br_lck->lock_data;
702         struct lock_struct *tp;
703         bool signal_pending_read = False;
704         bool break_oplocks = false;
705         NTSTATUS status;
706
707         /* No zero-zero locks for POSIX. */
708         if (plock->start == 0 && plock->size == 0) {
709                 return NT_STATUS_INVALID_PARAMETER;
710         }
711
712         /* Don't allow 64-bit lock wrap. */
713         if (plock->start + plock->size < plock->start ||
714                         plock->start + plock->size < plock->size) {
715                 return NT_STATUS_INVALID_PARAMETER;
716         }
717
718         /* The worst case scenario here is we have to split an
719            existing POSIX lock range into two, and add our lock,
720            so we need at most 2 more entries. */
721
722         tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 2));
723         if (!tp) {
724                 return NT_STATUS_NO_MEMORY;
725         }
726
727         count = posix_count = 0;
728
729         for (i=0; i < br_lck->num_locks; i++) {
730                 struct lock_struct *curr_lock = &locks[i];
731
732                 /* If we have a pending read lock, a lock downgrade should
733                    trigger a lock re-evaluation. */
734                 if (curr_lock->lock_type == PENDING_READ_LOCK &&
735                                 brl_pending_overlap(plock, curr_lock)) {
736                         signal_pending_read = True;
737                 }
738
739                 if (curr_lock->lock_flav == WINDOWS_LOCK) {
740                         /* Do any Windows flavour locks conflict ? */
741                         if (brl_conflict(curr_lock, plock)) {
742                                 /* No games with error messages. */
743                                 SAFE_FREE(tp);
744                                 /* Remember who blocked us. */
745                                 plock->context.smbpid = curr_lock->context.smbpid;
746                                 return NT_STATUS_FILE_LOCK_CONFLICT;
747                         }
748                         /* Just copy the Windows lock into the new array. */
749                         memcpy(&tp[count], curr_lock, sizeof(struct lock_struct));
750                         count++;
751                 } else {
752                         unsigned int tmp_count = 0;
753
754                         /* POSIX conflict semantics are different. */
755                         if (brl_conflict_posix(curr_lock, plock)) {
756                                 /* Can't block ourselves with POSIX locks. */
757                                 /* No games with error messages. */
758                                 SAFE_FREE(tp);
759                                 /* Remember who blocked us. */
760                                 plock->context.smbpid = curr_lock->context.smbpid;
761                                 return NT_STATUS_FILE_LOCK_CONFLICT;
762                         }
763
764                         /* Work out overlaps. */
765                         tmp_count += brlock_posix_split_merge(&tp[count], curr_lock, plock);
766                         posix_count += tmp_count;
767                         count += tmp_count;
768                 }
769         }
770
771         /*
772          * Break oplocks while we hold a brl. Since lock() and unlock() calls
773          * are not symetric with POSIX semantics, we cannot guarantee our
774          * contend_level2_oplocks_begin/end calls will be acquired and
775          * released one-for-one as with Windows semantics. Therefore we only
776          * call contend_level2_oplocks_begin if this is the first POSIX brl on
777          * the file.
778          */
779         break_oplocks = (!IS_PENDING_LOCK(plock->lock_type) &&
780                          posix_count == 0);
781         if (break_oplocks) {
782                 contend_level2_oplocks_begin(br_lck->fsp,
783                                              LEVEL2_CONTEND_POSIX_BRL);
784         }
785
786         /* Try and add the lock in order, sorted by lock start. */
787         for (i=0; i < count; i++) {
788                 struct lock_struct *curr_lock = &tp[i];
789
790                 if (curr_lock->start <= plock->start) {
791                         continue;
792                 }
793         }
794
795         if (i < count) {
796                 memmove(&tp[i+1], &tp[i],
797                         (count - i)*sizeof(struct lock_struct));
798         }
799         memcpy(&tp[i], plock, sizeof(struct lock_struct));
800         count++;
801
802         /* We can get the POSIX lock, now see if it needs to
803            be mapped into a lower level POSIX one, and if so can
804            we get it ? */
805
806         if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(br_lck->fsp->conn->params)) {
807                 int errno_ret;
808
809                 /* The lower layer just needs to attempt to
810                    get the system POSIX lock. We've weeded out
811                    any conflicts above. */
812
813                 if (!set_posix_lock_posix_flavour(br_lck->fsp,
814                                 plock->start,
815                                 plock->size,
816                                 plock->lock_type,
817                                 &errno_ret)) {
818
819                         /* We don't know who blocked us. */
820                         plock->context.smbpid = 0xFFFFFFFF;
821
822                         if (errno_ret == EACCES || errno_ret == EAGAIN) {
823                                 SAFE_FREE(tp);
824                                 status = NT_STATUS_FILE_LOCK_CONFLICT;
825                                 goto fail;
826                         } else {
827                                 SAFE_FREE(tp);
828                                 status = map_nt_error_from_unix(errno);
829                                 goto fail;
830                         }
831                 }
832         }
833
834         /* If we didn't use all the allocated size,
835          * Realloc so we don't leak entries per lock call. */
836         if (count < br_lck->num_locks + 2) {
837                 tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
838                 if (!tp) {
839                         status = NT_STATUS_NO_MEMORY;
840                         goto fail;
841                 }
842         }
843
844         br_lck->num_locks = count;
845         SAFE_FREE(br_lck->lock_data);
846         br_lck->lock_data = tp;
847         locks = tp;
848         br_lck->modified = True;
849
850         /* A successful downgrade from write to read lock can trigger a lock
851            re-evalutation where waiting readers can now proceed. */
852
853         if (signal_pending_read) {
854                 /* Send unlock messages to any pending read waiters that overlap. */
855                 for (i=0; i < br_lck->num_locks; i++) {
856                         struct lock_struct *pend_lock = &locks[i];
857
858                         /* Ignore non-pending locks. */
859                         if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
860                                 continue;
861                         }
862
863                         if (pend_lock->lock_type == PENDING_READ_LOCK &&
864                                         brl_pending_overlap(plock, pend_lock)) {
865                                 DEBUG(10,("brl_lock_posix: sending unlock message to pid %s\n",
866                                         procid_str_static(&pend_lock->context.pid )));
867
868                                 messaging_send(msg_ctx, pend_lock->context.pid,
869                                                MSG_SMB_UNLOCK, &data_blob_null);
870                         }
871                 }
872         }
873
874         return NT_STATUS_OK;
875  fail:
876         if (break_oplocks) {
877                 contend_level2_oplocks_end(br_lck->fsp,
878                                            LEVEL2_CONTEND_POSIX_BRL);
879         }
880         return status;
881 }
882
883 NTSTATUS smb_vfs_call_brl_lock_windows(struct vfs_handle_struct *handle,
884                                        struct byte_range_lock *br_lck,
885                                        struct lock_struct *plock,
886                                        bool blocking_lock,
887                                        struct blocking_lock_record *blr)
888 {
889         VFS_FIND(brl_lock_windows);
890         return handle->fns->brl_lock_windows(handle, br_lck, plock,
891                                              blocking_lock, blr);
892 }
893
894 /****************************************************************************
895  Lock a range of bytes.
896 ****************************************************************************/
897
898 NTSTATUS brl_lock(struct messaging_context *msg_ctx,
899                 struct byte_range_lock *br_lck,
900                 uint32 smbpid,
901                 struct server_id pid,
902                 br_off start,
903                 br_off size, 
904                 enum brl_type lock_type,
905                 enum brl_flavour lock_flav,
906                 bool blocking_lock,
907                 uint32 *psmbpid,
908                 struct blocking_lock_record *blr)
909 {
910         NTSTATUS ret;
911         struct lock_struct lock;
912
913 #if !ZERO_ZERO
914         if (start == 0 && size == 0) {
915                 DEBUG(0,("client sent 0/0 lock - please report this\n"));
916         }
917 #endif
918
919 #ifdef DEVELOPER
920         /* Quieten valgrind on test. */
921         memset(&lock, '\0', sizeof(lock));
922 #endif
923
924         lock.context.smbpid = smbpid;
925         lock.context.pid = pid;
926         lock.context.tid = br_lck->fsp->conn->cnum;
927         lock.start = start;
928         lock.size = size;
929         lock.fnum = br_lck->fsp->fnum;
930         lock.lock_type = lock_type;
931         lock.lock_flav = lock_flav;
932
933         if (lock_flav == WINDOWS_LOCK) {
934                 ret = SMB_VFS_BRL_LOCK_WINDOWS(br_lck->fsp->conn, br_lck,
935                     &lock, blocking_lock, blr);
936         } else {
937                 ret = brl_lock_posix(msg_ctx, br_lck, &lock);
938         }
939
940 #if ZERO_ZERO
941         /* sort the lock list */
942         qsort(br_lck->lock_data, (size_t)br_lck->num_locks, sizeof(lock), lock_compare);
943 #endif
944
945         /* If we're returning an error, return who blocked us. */
946         if (!NT_STATUS_IS_OK(ret) && psmbpid) {
947                 *psmbpid = lock.context.smbpid;
948         }
949         return ret;
950 }
951
952 /****************************************************************************
953  Unlock a range of bytes - Windows semantics.
954 ****************************************************************************/
955
956 bool brl_unlock_windows_default(struct messaging_context *msg_ctx,
957                                struct byte_range_lock *br_lck,
958                                const struct lock_struct *plock)
959 {
960         unsigned int i, j;
961         struct lock_struct *locks = br_lck->lock_data;
962         enum brl_type deleted_lock_type = READ_LOCK; /* shut the compiler up.... */
963
964         SMB_ASSERT(plock->lock_type == UNLOCK_LOCK);
965
966 #if ZERO_ZERO
967         /* Delete write locks by preference... The lock list
968            is sorted in the zero zero case. */
969
970         for (i = 0; i < br_lck->num_locks; i++) {
971                 struct lock_struct *lock = &locks[i];
972
973                 if (lock->lock_type == WRITE_LOCK &&
974                     brl_same_context(&lock->context, &plock->context) &&
975                     lock->fnum == plock->fnum &&
976                     lock->lock_flav == WINDOWS_LOCK &&
977                     lock->start == plock->start &&
978                     lock->size == plock->size) {
979
980                         /* found it - delete it */
981                         deleted_lock_type = lock->lock_type;
982                         break;
983                 }
984         }
985
986         if (i != br_lck->num_locks) {
987                 /* We found it - don't search again. */
988                 goto unlock_continue;
989         }
990 #endif
991
992         for (i = 0; i < br_lck->num_locks; i++) {
993                 struct lock_struct *lock = &locks[i];
994
995                 /* Only remove our own locks that match in start, size, and flavour. */
996                 if (brl_same_context(&lock->context, &plock->context) &&
997                                         lock->fnum == plock->fnum &&
998                                         lock->lock_flav == WINDOWS_LOCK &&
999                                         lock->start == plock->start &&
1000                                         lock->size == plock->size ) {
1001                         deleted_lock_type = lock->lock_type;
1002                         break;
1003                 }
1004         }
1005
1006         if (i == br_lck->num_locks) {
1007                 /* we didn't find it */
1008                 return False;
1009         }
1010
1011 #if ZERO_ZERO
1012   unlock_continue:
1013 #endif
1014
1015         /* Actually delete the lock. */
1016         if (i < br_lck->num_locks - 1) {
1017                 memmove(&locks[i], &locks[i+1], 
1018                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1019         }
1020
1021         br_lck->num_locks -= 1;
1022         br_lck->modified = True;
1023
1024         /* Unlock the underlying POSIX regions. */
1025         if(lp_posix_locking(br_lck->fsp->conn->params)) {
1026                 release_posix_lock_windows_flavour(br_lck->fsp,
1027                                 plock->start,
1028                                 plock->size,
1029                                 deleted_lock_type,
1030                                 &plock->context,
1031                                 locks,
1032                                 br_lck->num_locks);
1033         }
1034
1035         /* Send unlock messages to any pending waiters that overlap. */
1036         for (j=0; j < br_lck->num_locks; j++) {
1037                 struct lock_struct *pend_lock = &locks[j];
1038
1039                 /* Ignore non-pending locks. */
1040                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1041                         continue;
1042                 }
1043
1044                 /* We could send specific lock info here... */
1045                 if (brl_pending_overlap(plock, pend_lock)) {
1046                         DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
1047                                 procid_str_static(&pend_lock->context.pid )));
1048
1049                         messaging_send(msg_ctx, pend_lock->context.pid,
1050                                        MSG_SMB_UNLOCK, &data_blob_null);
1051                 }
1052         }
1053
1054         contend_level2_oplocks_end(br_lck->fsp, LEVEL2_CONTEND_WINDOWS_BRL);
1055         return True;
1056 }
1057
1058 /****************************************************************************
1059  Unlock a range of bytes - POSIX semantics.
1060 ****************************************************************************/
1061
1062 static bool brl_unlock_posix(struct messaging_context *msg_ctx,
1063                              struct byte_range_lock *br_lck,
1064                              struct lock_struct *plock)
1065 {
1066         unsigned int i, j, count;
1067         struct lock_struct *tp;
1068         struct lock_struct *locks = br_lck->lock_data;
1069         bool overlap_found = False;
1070
1071         /* No zero-zero locks for POSIX. */
1072         if (plock->start == 0 && plock->size == 0) {
1073                 return False;
1074         }
1075
1076         /* Don't allow 64-bit lock wrap. */
1077         if (plock->start + plock->size < plock->start ||
1078                         plock->start + plock->size < plock->size) {
1079                 DEBUG(10,("brl_unlock_posix: lock wrap\n"));
1080                 return False;
1081         }
1082
1083         /* The worst case scenario here is we have to split an
1084            existing POSIX lock range into two, so we need at most
1085            1 more entry. */
1086
1087         tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 1));
1088         if (!tp) {
1089                 DEBUG(10,("brl_unlock_posix: malloc fail\n"));
1090                 return False;
1091         }
1092
1093         count = 0;
1094         for (i = 0; i < br_lck->num_locks; i++) {
1095                 struct lock_struct *lock = &locks[i];
1096                 unsigned int tmp_count;
1097
1098                 /* Only remove our own locks - ignore fnum. */
1099                 if (IS_PENDING_LOCK(lock->lock_type) ||
1100                                 !brl_same_context(&lock->context, &plock->context)) {
1101                         memcpy(&tp[count], lock, sizeof(struct lock_struct));
1102                         count++;
1103                         continue;
1104                 }
1105
1106                 if (lock->lock_flav == WINDOWS_LOCK) {
1107                         /* Do any Windows flavour locks conflict ? */
1108                         if (brl_conflict(lock, plock)) {
1109                                 SAFE_FREE(tp);
1110                                 return false;
1111                         }
1112                         /* Just copy the Windows lock into the new array. */
1113                         memcpy(&tp[count], lock, sizeof(struct lock_struct));
1114                         count++;
1115                         continue;
1116                 }
1117
1118                 /* Work out overlaps. */
1119                 tmp_count = brlock_posix_split_merge(&tp[count], lock, plock);
1120
1121                 if (tmp_count == 0) {
1122                         /* plock overlapped the existing lock completely,
1123                            or replaced it. Don't copy the existing lock. */
1124                         overlap_found = true;
1125                 } else if (tmp_count == 1) {
1126                         /* Either no overlap, (simple copy of existing lock) or
1127                          * an overlap of an existing lock. */
1128                         /* If the lock changed size, we had an overlap. */
1129                         if (tp[count].size != lock->size) {
1130                                 overlap_found = true;
1131                         }
1132                         count += tmp_count;
1133                 } else if (tmp_count == 2) {
1134                         /* We split a lock range in two. */
1135                         overlap_found = true;
1136                         count += tmp_count;
1137
1138                         /* Optimisation... */
1139                         /* We know we're finished here as we can't overlap any
1140                            more POSIX locks. Copy the rest of the lock array. */
1141
1142                         if (i < br_lck->num_locks - 1) {
1143                                 memcpy(&tp[count], &locks[i+1],
1144                                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1145                                 count += ((br_lck->num_locks-1) - i);
1146                         }
1147                         break;
1148                 }
1149
1150         }
1151
1152         if (!overlap_found) {
1153                 /* Just ignore - no change. */
1154                 SAFE_FREE(tp);
1155                 DEBUG(10,("brl_unlock_posix: No overlap - unlocked.\n"));
1156                 return True;
1157         }
1158
1159         /* Unlock any POSIX regions. */
1160         if(lp_posix_locking(br_lck->fsp->conn->params)) {
1161                 release_posix_lock_posix_flavour(br_lck->fsp,
1162                                                 plock->start,
1163                                                 plock->size,
1164                                                 &plock->context,
1165                                                 tp,
1166                                                 count);
1167         }
1168
1169         /* Realloc so we don't leak entries per unlock call. */
1170         if (count) {
1171                 tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
1172                 if (!tp) {
1173                         DEBUG(10,("brl_unlock_posix: realloc fail\n"));
1174                         return False;
1175                 }
1176         } else {
1177                 /* We deleted the last lock. */
1178                 SAFE_FREE(tp);
1179                 tp = NULL;
1180         }
1181
1182         contend_level2_oplocks_end(br_lck->fsp,
1183                                    LEVEL2_CONTEND_POSIX_BRL);
1184
1185         br_lck->num_locks = count;
1186         SAFE_FREE(br_lck->lock_data);
1187         locks = tp;
1188         br_lck->lock_data = tp;
1189         br_lck->modified = True;
1190
1191         /* Send unlock messages to any pending waiters that overlap. */
1192
1193         for (j=0; j < br_lck->num_locks; j++) {
1194                 struct lock_struct *pend_lock = &locks[j];
1195
1196                 /* Ignore non-pending locks. */
1197                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1198                         continue;
1199                 }
1200
1201                 /* We could send specific lock info here... */
1202                 if (brl_pending_overlap(plock, pend_lock)) {
1203                         DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
1204                                 procid_str_static(&pend_lock->context.pid )));
1205
1206                         messaging_send(msg_ctx, pend_lock->context.pid,
1207                                        MSG_SMB_UNLOCK, &data_blob_null);
1208                 }
1209         }
1210
1211         return True;
1212 }
1213
1214 bool smb_vfs_call_brl_unlock_windows(struct vfs_handle_struct *handle,
1215                                      struct messaging_context *msg_ctx,
1216                                      struct byte_range_lock *br_lck,
1217                                      const struct lock_struct *plock)
1218 {
1219         VFS_FIND(brl_unlock_windows);
1220         return handle->fns->brl_unlock_windows(handle, msg_ctx, br_lck, plock);
1221 }
1222
1223 /****************************************************************************
1224  Unlock a range of bytes.
1225 ****************************************************************************/
1226
1227 bool brl_unlock(struct messaging_context *msg_ctx,
1228                 struct byte_range_lock *br_lck,
1229                 uint32 smbpid,
1230                 struct server_id pid,
1231                 br_off start,
1232                 br_off size,
1233                 enum brl_flavour lock_flav)
1234 {
1235         struct lock_struct lock;
1236
1237         lock.context.smbpid = smbpid;
1238         lock.context.pid = pid;
1239         lock.context.tid = br_lck->fsp->conn->cnum;
1240         lock.start = start;
1241         lock.size = size;
1242         lock.fnum = br_lck->fsp->fnum;
1243         lock.lock_type = UNLOCK_LOCK;
1244         lock.lock_flav = lock_flav;
1245
1246         if (lock_flav == WINDOWS_LOCK) {
1247                 return SMB_VFS_BRL_UNLOCK_WINDOWS(br_lck->fsp->conn, msg_ctx,
1248                     br_lck, &lock);
1249         } else {
1250                 return brl_unlock_posix(msg_ctx, br_lck, &lock);
1251         }
1252 }
1253
1254 /****************************************************************************
1255  Test if we could add a lock if we wanted to.
1256  Returns True if the region required is currently unlocked, False if locked.
1257 ****************************************************************************/
1258
1259 bool brl_locktest(struct byte_range_lock *br_lck,
1260                 uint32 smbpid,
1261                 struct server_id pid,
1262                 br_off start,
1263                 br_off size, 
1264                 enum brl_type lock_type,
1265                 enum brl_flavour lock_flav)
1266 {
1267         bool ret = True;
1268         unsigned int i;
1269         struct lock_struct lock;
1270         const struct lock_struct *locks = br_lck->lock_data;
1271         files_struct *fsp = br_lck->fsp;
1272
1273         lock.context.smbpid = smbpid;
1274         lock.context.pid = pid;
1275         lock.context.tid = br_lck->fsp->conn->cnum;
1276         lock.start = start;
1277         lock.size = size;
1278         lock.fnum = fsp->fnum;
1279         lock.lock_type = lock_type;
1280         lock.lock_flav = lock_flav;
1281
1282         /* Make sure existing locks don't conflict */
1283         for (i=0; i < br_lck->num_locks; i++) {
1284                 /*
1285                  * Our own locks don't conflict.
1286                  */
1287                 if (brl_conflict_other(&locks[i], &lock)) {
1288                         return False;
1289                 }
1290         }
1291
1292         /*
1293          * There is no lock held by an SMB daemon, check to
1294          * see if there is a POSIX lock from a UNIX or NFS process.
1295          * This only conflicts with Windows locks, not POSIX locks.
1296          */
1297
1298         if(lp_posix_locking(fsp->conn->params) && (lock_flav == WINDOWS_LOCK)) {
1299                 ret = is_posix_locked(fsp, &start, &size, &lock_type, WINDOWS_LOCK);
1300
1301                 DEBUG(10,("brl_locktest: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
1302                         (double)start, (double)size, ret ? "locked" : "unlocked",
1303                         fsp->fnum, fsp_str_dbg(fsp)));
1304
1305                 /* We need to return the inverse of is_posix_locked. */
1306                 ret = !ret;
1307         }
1308
1309         /* no conflicts - we could have added it */
1310         return ret;
1311 }
1312
1313 /****************************************************************************
1314  Query for existing locks.
1315 ****************************************************************************/
1316
1317 NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
1318                 uint32 *psmbpid,
1319                 struct server_id pid,
1320                 br_off *pstart,
1321                 br_off *psize, 
1322                 enum brl_type *plock_type,
1323                 enum brl_flavour lock_flav)
1324 {
1325         unsigned int i;
1326         struct lock_struct lock;
1327         const struct lock_struct *locks = br_lck->lock_data;
1328         files_struct *fsp = br_lck->fsp;
1329
1330         lock.context.smbpid = *psmbpid;
1331         lock.context.pid = pid;
1332         lock.context.tid = br_lck->fsp->conn->cnum;
1333         lock.start = *pstart;
1334         lock.size = *psize;
1335         lock.fnum = fsp->fnum;
1336         lock.lock_type = *plock_type;
1337         lock.lock_flav = lock_flav;
1338
1339         /* Make sure existing locks don't conflict */
1340         for (i=0; i < br_lck->num_locks; i++) {
1341                 const struct lock_struct *exlock = &locks[i];
1342                 bool conflict = False;
1343
1344                 if (exlock->lock_flav == WINDOWS_LOCK) {
1345                         conflict = brl_conflict(exlock, &lock);
1346                 } else {        
1347                         conflict = brl_conflict_posix(exlock, &lock);
1348                 }
1349
1350                 if (conflict) {
1351                         *psmbpid = exlock->context.smbpid;
1352                         *pstart = exlock->start;
1353                         *psize = exlock->size;
1354                         *plock_type = exlock->lock_type;
1355                         return NT_STATUS_LOCK_NOT_GRANTED;
1356                 }
1357         }
1358
1359         /*
1360          * There is no lock held by an SMB daemon, check to
1361          * see if there is a POSIX lock from a UNIX or NFS process.
1362          */
1363
1364         if(lp_posix_locking(fsp->conn->params)) {
1365                 bool ret = is_posix_locked(fsp, pstart, psize, plock_type, POSIX_LOCK);
1366
1367                 DEBUG(10,("brl_lockquery: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
1368                         (double)*pstart, (double)*psize, ret ? "locked" : "unlocked",
1369                         fsp->fnum, fsp_str_dbg(fsp)));
1370
1371                 if (ret) {
1372                         /* Hmmm. No clue what to set smbpid to - use -1. */
1373                         *psmbpid = 0xFFFF;
1374                         return NT_STATUS_LOCK_NOT_GRANTED;
1375                 }
1376         }
1377
1378         return NT_STATUS_OK;
1379 }
1380
1381
1382 bool smb_vfs_call_brl_cancel_windows(struct vfs_handle_struct *handle,
1383                                      struct byte_range_lock *br_lck,
1384                                      struct lock_struct *plock,
1385                                      struct blocking_lock_record *blr)
1386 {
1387         VFS_FIND(brl_cancel_windows);
1388         return handle->fns->brl_cancel_windows(handle, br_lck, plock, blr);
1389 }
1390
1391 /****************************************************************************
1392  Remove a particular pending lock.
1393 ****************************************************************************/
1394 bool brl_lock_cancel(struct byte_range_lock *br_lck,
1395                 uint32 smbpid,
1396                 struct server_id pid,
1397                 br_off start,
1398                 br_off size,
1399                 enum brl_flavour lock_flav,
1400                 struct blocking_lock_record *blr)
1401 {
1402         bool ret;
1403         struct lock_struct lock;
1404
1405         lock.context.smbpid = smbpid;
1406         lock.context.pid = pid;
1407         lock.context.tid = br_lck->fsp->conn->cnum;
1408         lock.start = start;
1409         lock.size = size;
1410         lock.fnum = br_lck->fsp->fnum;
1411         lock.lock_flav = lock_flav;
1412         /* lock.lock_type doesn't matter */
1413
1414         if (lock_flav == WINDOWS_LOCK) {
1415                 ret = SMB_VFS_BRL_CANCEL_WINDOWS(br_lck->fsp->conn, br_lck,
1416                     &lock, blr);
1417         } else {
1418                 ret = brl_lock_cancel_default(br_lck, &lock);
1419         }
1420
1421         return ret;
1422 }
1423
1424 bool brl_lock_cancel_default(struct byte_range_lock *br_lck,
1425                 struct lock_struct *plock)
1426 {
1427         unsigned int i;
1428         struct lock_struct *locks = br_lck->lock_data;
1429
1430         SMB_ASSERT(plock);
1431
1432         for (i = 0; i < br_lck->num_locks; i++) {
1433                 struct lock_struct *lock = &locks[i];
1434
1435                 /* For pending locks we *always* care about the fnum. */
1436                 if (brl_same_context(&lock->context, &plock->context) &&
1437                                 lock->fnum == plock->fnum &&
1438                                 IS_PENDING_LOCK(lock->lock_type) &&
1439                                 lock->lock_flav == plock->lock_flav &&
1440                                 lock->start == plock->start &&
1441                                 lock->size == plock->size) {
1442                         break;
1443                 }
1444         }
1445
1446         if (i == br_lck->num_locks) {
1447                 /* Didn't find it. */
1448                 return False;
1449         }
1450
1451         if (i < br_lck->num_locks - 1) {
1452                 /* Found this particular pending lock - delete it */
1453                 memmove(&locks[i], &locks[i+1], 
1454                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1455         }
1456
1457         br_lck->num_locks -= 1;
1458         br_lck->modified = True;
1459         return True;
1460 }
1461
1462 /****************************************************************************
1463  Remove any locks associated with a open file.
1464  We return True if this process owns any other Windows locks on this
1465  fd and so we should not immediately close the fd.
1466 ****************************************************************************/
1467
1468 void brl_close_fnum(struct messaging_context *msg_ctx,
1469                     struct byte_range_lock *br_lck)
1470 {
1471         files_struct *fsp = br_lck->fsp;
1472         uint16 tid = fsp->conn->cnum;
1473         int fnum = fsp->fnum;
1474         unsigned int i, j, dcount=0;
1475         int num_deleted_windows_locks = 0;
1476         struct lock_struct *locks = br_lck->lock_data;
1477         struct server_id pid = procid_self();
1478         bool unlock_individually = False;
1479         bool posix_level2_contention_ended = false;
1480
1481         if(lp_posix_locking(fsp->conn->params)) {
1482
1483                 /* Check if there are any Windows locks associated with this dev/ino
1484                    pair that are not this fnum. If so we need to call unlock on each
1485                    one in order to release the system POSIX locks correctly. */
1486
1487                 for (i=0; i < br_lck->num_locks; i++) {
1488                         struct lock_struct *lock = &locks[i];
1489
1490                         if (!procid_equal(&lock->context.pid, &pid)) {
1491                                 continue;
1492                         }
1493
1494                         if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
1495                                 continue; /* Ignore pending. */
1496                         }
1497
1498                         if (lock->context.tid != tid || lock->fnum != fnum) {
1499                                 unlock_individually = True;
1500                                 break;
1501                         }
1502                 }
1503
1504                 if (unlock_individually) {
1505                         struct lock_struct *locks_copy;
1506                         unsigned int num_locks_copy;
1507
1508                         /* Copy the current lock array. */
1509                         if (br_lck->num_locks) {
1510                                 locks_copy = (struct lock_struct *)TALLOC_MEMDUP(br_lck, locks, br_lck->num_locks * sizeof(struct lock_struct));
1511                                 if (!locks_copy) {
1512                                         smb_panic("brl_close_fnum: talloc failed");
1513                                 }
1514                         } else {        
1515                                 locks_copy = NULL;
1516                         }
1517
1518                         num_locks_copy = br_lck->num_locks;
1519
1520                         for (i=0; i < num_locks_copy; i++) {
1521                                 struct lock_struct *lock = &locks_copy[i];
1522
1523                                 if (lock->context.tid == tid && procid_equal(&lock->context.pid, &pid) &&
1524                                                 (lock->fnum == fnum)) {
1525                                         brl_unlock(msg_ctx,
1526                                                 br_lck,
1527                                                 lock->context.smbpid,
1528                                                 pid,
1529                                                 lock->start,
1530                                                 lock->size,
1531                                                 lock->lock_flav);
1532                                 }
1533                         }
1534                         return;
1535                 }
1536         }
1537
1538         /* We can bulk delete - any POSIX locks will be removed when the fd closes. */
1539
1540         /* Remove any existing locks for this fnum (or any fnum if they're POSIX). */
1541
1542         for (i=0; i < br_lck->num_locks; i++) {
1543                 struct lock_struct *lock = &locks[i];
1544                 bool del_this_lock = False;
1545
1546                 if (lock->context.tid == tid && procid_equal(&lock->context.pid, &pid)) {
1547                         if ((lock->lock_flav == WINDOWS_LOCK) && (lock->fnum == fnum)) {
1548                                 del_this_lock = True;
1549                                 num_deleted_windows_locks++;
1550                                 contend_level2_oplocks_end(br_lck->fsp,
1551                                     LEVEL2_CONTEND_WINDOWS_BRL);
1552                         } else if (lock->lock_flav == POSIX_LOCK) {
1553                                 del_this_lock = True;
1554
1555                                 /* Only end level2 contention once for posix */
1556                                 if (!posix_level2_contention_ended) {
1557                                         posix_level2_contention_ended = true;
1558                                         contend_level2_oplocks_end(br_lck->fsp,
1559                                             LEVEL2_CONTEND_POSIX_BRL);
1560                                 }
1561                         }
1562                 }
1563
1564                 if (del_this_lock) {
1565                         /* Send unlock messages to any pending waiters that overlap. */
1566                         for (j=0; j < br_lck->num_locks; j++) {
1567                                 struct lock_struct *pend_lock = &locks[j];
1568
1569                                 /* Ignore our own or non-pending locks. */
1570                                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1571                                         continue;
1572                                 }
1573
1574                                 /* Optimisation - don't send to this fnum as we're
1575                                    closing it. */
1576                                 if (pend_lock->context.tid == tid &&
1577                                     procid_equal(&pend_lock->context.pid, &pid) &&
1578                                     pend_lock->fnum == fnum) {
1579                                         continue;
1580                                 }
1581
1582                                 /* We could send specific lock info here... */
1583                                 if (brl_pending_overlap(lock, pend_lock)) {
1584                                         messaging_send(msg_ctx, pend_lock->context.pid,
1585                                                        MSG_SMB_UNLOCK, &data_blob_null);
1586                                 }
1587                         }
1588
1589                         /* found it - delete it */
1590                         if (br_lck->num_locks > 1 && i < br_lck->num_locks - 1) {
1591                                 memmove(&locks[i], &locks[i+1], 
1592                                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1593                         }
1594                         br_lck->num_locks--;
1595                         br_lck->modified = True;
1596                         i--;
1597                         dcount++;
1598                 }
1599         }
1600
1601         if(lp_posix_locking(fsp->conn->params) && num_deleted_windows_locks) {
1602                 /* Reduce the Windows lock POSIX reference count on this dev/ino pair. */
1603                 reduce_windows_lock_ref_count(fsp, num_deleted_windows_locks);
1604         }
1605 }
1606
1607 /****************************************************************************
1608  Ensure this set of lock entries is valid.
1609 ****************************************************************************/
1610 static bool validate_lock_entries(unsigned int *pnum_entries, struct lock_struct **pplocks)
1611 {
1612         unsigned int i;
1613         unsigned int num_valid_entries = 0;
1614         struct lock_struct *locks = *pplocks;
1615
1616         for (i = 0; i < *pnum_entries; i++) {
1617                 struct lock_struct *lock_data = &locks[i];
1618                 if (!process_exists(lock_data->context.pid)) {
1619                         /* This process no longer exists - mark this
1620                            entry as invalid by zeroing it. */
1621                         ZERO_STRUCTP(lock_data);
1622                 } else {
1623                         num_valid_entries++;
1624                 }
1625         }
1626
1627         if (num_valid_entries != *pnum_entries) {
1628                 struct lock_struct *new_lock_data = NULL;
1629
1630                 if (num_valid_entries) {
1631                         new_lock_data = SMB_MALLOC_ARRAY(struct lock_struct, num_valid_entries);
1632                         if (!new_lock_data) {
1633                                 DEBUG(3, ("malloc fail\n"));
1634                                 return False;
1635                         }
1636
1637                         num_valid_entries = 0;
1638                         for (i = 0; i < *pnum_entries; i++) {
1639                                 struct lock_struct *lock_data = &locks[i];
1640                                 if (lock_data->context.smbpid &&
1641                                                 lock_data->context.tid) {
1642                                         /* Valid (nonzero) entry - copy it. */
1643                                         memcpy(&new_lock_data[num_valid_entries],
1644                                                 lock_data, sizeof(struct lock_struct));
1645                                         num_valid_entries++;
1646                                 }
1647                         }
1648                 }
1649
1650                 SAFE_FREE(*pplocks);
1651                 *pplocks = new_lock_data;
1652                 *pnum_entries = num_valid_entries;
1653         }
1654
1655         return True;
1656 }
1657
1658 struct brl_forall_cb {
1659         void (*fn)(struct file_id id, struct server_id pid,
1660                    enum brl_type lock_type,
1661                    enum brl_flavour lock_flav,
1662                    br_off start, br_off size,
1663                    void *private_data);
1664         void *private_data;
1665 };
1666
1667 /****************************************************************************
1668  Traverse the whole database with this function, calling traverse_callback
1669  on each lock.
1670 ****************************************************************************/
1671
1672 static int traverse_fn(struct db_record *rec, void *state)
1673 {
1674         struct brl_forall_cb *cb = (struct brl_forall_cb *)state;
1675         struct lock_struct *locks;
1676         struct file_id *key;
1677         unsigned int i;
1678         unsigned int num_locks = 0;
1679         unsigned int orig_num_locks = 0;
1680
1681         /* In a traverse function we must make a copy of
1682            dbuf before modifying it. */
1683
1684         locks = (struct lock_struct *)memdup(rec->value.dptr,
1685                                              rec->value.dsize);
1686         if (!locks) {
1687                 return -1; /* Terminate traversal. */
1688         }
1689
1690         key = (struct file_id *)rec->key.dptr;
1691         orig_num_locks = num_locks = rec->value.dsize/sizeof(*locks);
1692
1693         /* Ensure the lock db is clean of entries from invalid processes. */
1694
1695         if (!validate_lock_entries(&num_locks, &locks)) {
1696                 SAFE_FREE(locks);
1697                 return -1; /* Terminate traversal */
1698         }
1699
1700         if (orig_num_locks != num_locks) {
1701                 if (num_locks) {
1702                         TDB_DATA data;
1703                         data.dptr = (uint8_t *)locks;
1704                         data.dsize = num_locks*sizeof(struct lock_struct);
1705                         rec->store(rec, data, TDB_REPLACE);
1706                 } else {
1707                         rec->delete_rec(rec);
1708                 }
1709         }
1710
1711         if (cb->fn) {
1712                 for ( i=0; i<num_locks; i++) {
1713                         cb->fn(*key,
1714                                 locks[i].context.pid,
1715                                 locks[i].lock_type,
1716                                 locks[i].lock_flav,
1717                                 locks[i].start,
1718                                 locks[i].size,
1719                                 cb->private_data);
1720                 }
1721         }
1722
1723         SAFE_FREE(locks);
1724         return 0;
1725 }
1726
1727 /*******************************************************************
1728  Call the specified function on each lock in the database.
1729 ********************************************************************/
1730
1731 int brl_forall(void (*fn)(struct file_id id, struct server_id pid,
1732                           enum brl_type lock_type,
1733                           enum brl_flavour lock_flav,
1734                           br_off start, br_off size,
1735                           void *private_data),
1736                void *private_data)
1737 {
1738         struct brl_forall_cb cb;
1739
1740         if (!brlock_db) {
1741                 return 0;
1742         }
1743         cb.fn = fn;
1744         cb.private_data = private_data;
1745         return brlock_db->traverse(brlock_db, traverse_fn, &cb);
1746 }
1747
1748 /*******************************************************************
1749  Store a potentially modified set of byte range lock data back into
1750  the database.
1751  Unlock the record.
1752 ********************************************************************/
1753
1754 static int byte_range_lock_destructor(struct byte_range_lock *br_lck)
1755 {
1756         if (br_lck->read_only) {
1757                 SMB_ASSERT(!br_lck->modified);
1758         }
1759
1760         if (!br_lck->modified) {
1761                 goto done;
1762         }
1763
1764         if (br_lck->num_locks == 0) {
1765                 /* No locks - delete this entry. */
1766                 NTSTATUS status = br_lck->record->delete_rec(br_lck->record);
1767                 if (!NT_STATUS_IS_OK(status)) {
1768                         DEBUG(0, ("delete_rec returned %s\n",
1769                                   nt_errstr(status)));
1770                         smb_panic("Could not delete byte range lock entry");
1771                 }
1772         } else {
1773                 TDB_DATA data;
1774                 NTSTATUS status;
1775
1776                 data.dptr = (uint8 *)br_lck->lock_data;
1777                 data.dsize = br_lck->num_locks * sizeof(struct lock_struct);
1778
1779                 status = br_lck->record->store(br_lck->record, data,
1780                                                TDB_REPLACE);
1781                 if (!NT_STATUS_IS_OK(status)) {
1782                         DEBUG(0, ("store returned %s\n", nt_errstr(status)));
1783                         smb_panic("Could not store byte range mode entry");
1784                 }
1785         }
1786
1787  done:
1788
1789         SAFE_FREE(br_lck->lock_data);
1790         TALLOC_FREE(br_lck->record);
1791         return 0;
1792 }
1793
1794 /*******************************************************************
1795  Fetch a set of byte range lock data from the database.
1796  Leave the record locked.
1797  TALLOC_FREE(brl) will release the lock in the destructor.
1798 ********************************************************************/
1799
1800 static struct byte_range_lock *brl_get_locks_internal(TALLOC_CTX *mem_ctx,
1801                                         files_struct *fsp, bool read_only)
1802 {
1803         TDB_DATA key, data;
1804         struct byte_range_lock *br_lck = TALLOC_P(mem_ctx, struct byte_range_lock);
1805
1806         if (br_lck == NULL) {
1807                 return NULL;
1808         }
1809
1810         br_lck->fsp = fsp;
1811         br_lck->num_locks = 0;
1812         br_lck->modified = False;
1813         br_lck->key = fsp->file_id;
1814
1815         key.dptr = (uint8 *)&br_lck->key;
1816         key.dsize = sizeof(struct file_id);
1817
1818         if (!fsp->lockdb_clean) {
1819                 /* We must be read/write to clean
1820                    the dead entries. */
1821                 read_only = False;
1822         }
1823
1824         if (read_only) {
1825                 if (brlock_db->fetch(brlock_db, br_lck, key, &data) == -1) {
1826                         DEBUG(3, ("Could not fetch byte range lock record\n"));
1827                         TALLOC_FREE(br_lck);
1828                         return NULL;
1829                 }
1830                 br_lck->record = NULL;
1831         }
1832         else {
1833                 br_lck->record = brlock_db->fetch_locked(brlock_db, br_lck, key);
1834
1835                 if (br_lck->record == NULL) {
1836                         DEBUG(3, ("Could not lock byte range lock entry\n"));
1837                         TALLOC_FREE(br_lck);
1838                         return NULL;
1839                 }
1840
1841                 data = br_lck->record->value;
1842         }
1843
1844         br_lck->read_only = read_only;
1845         br_lck->lock_data = NULL;
1846
1847         talloc_set_destructor(br_lck, byte_range_lock_destructor);
1848
1849         br_lck->num_locks = data.dsize / sizeof(struct lock_struct);
1850
1851         if (br_lck->num_locks != 0) {
1852                 br_lck->lock_data = SMB_MALLOC_ARRAY(struct lock_struct,
1853                                                      br_lck->num_locks);
1854                 if (br_lck->lock_data == NULL) {
1855                         DEBUG(0, ("malloc failed\n"));
1856                         TALLOC_FREE(br_lck);
1857                         return NULL;
1858                 }
1859
1860                 memcpy(br_lck->lock_data, data.dptr, data.dsize);
1861         }
1862         
1863         if (!fsp->lockdb_clean) {
1864                 int orig_num_locks = br_lck->num_locks;
1865
1866                 /* This is the first time we've accessed this. */
1867                 /* Go through and ensure all entries exist - remove any that don't. */
1868                 /* Makes the lockdb self cleaning at low cost. */
1869
1870                 if (!validate_lock_entries(&br_lck->num_locks,
1871                                            &br_lck->lock_data)) {
1872                         SAFE_FREE(br_lck->lock_data);
1873                         TALLOC_FREE(br_lck);
1874                         return NULL;
1875                 }
1876
1877                 /* Ensure invalid locks are cleaned up in the destructor. */
1878                 if (orig_num_locks != br_lck->num_locks) {
1879                         br_lck->modified = True;
1880                 }
1881
1882                 /* Mark the lockdb as "clean" as seen from this open file. */
1883                 fsp->lockdb_clean = True;
1884         }
1885
1886         if (DEBUGLEVEL >= 10) {
1887                 unsigned int i;
1888                 struct lock_struct *locks = br_lck->lock_data;
1889                 DEBUG(10,("brl_get_locks_internal: %u current locks on file_id %s\n",
1890                         br_lck->num_locks,
1891                           file_id_string_tos(&fsp->file_id)));
1892                 for( i = 0; i < br_lck->num_locks; i++) {
1893                         print_lock_struct(i, &locks[i]);
1894                 }
1895         }
1896         return br_lck;
1897 }
1898
1899 struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx,
1900                                         files_struct *fsp)
1901 {
1902         return brl_get_locks_internal(mem_ctx, fsp, False);
1903 }
1904
1905 struct byte_range_lock *brl_get_locks_readonly(files_struct *fsp)
1906 {
1907         struct byte_range_lock *br_lock;
1908
1909         if (lp_clustering()) {
1910                 return brl_get_locks_internal(talloc_tos(), fsp, true);
1911         }
1912
1913         if ((fsp->brlock_rec != NULL)
1914             && (brlock_db->get_seqnum(brlock_db) == fsp->brlock_seqnum)) {
1915                 return fsp->brlock_rec;
1916         }
1917
1918         TALLOC_FREE(fsp->brlock_rec);
1919
1920         br_lock = brl_get_locks_internal(talloc_tos(), fsp, false);
1921         if (br_lock == NULL) {
1922                 return NULL;
1923         }
1924         fsp->brlock_seqnum = brlock_db->get_seqnum(brlock_db);
1925
1926         fsp->brlock_rec = talloc_zero(fsp, struct byte_range_lock);
1927         if (fsp->brlock_rec == NULL) {
1928                 goto fail;
1929         }
1930         fsp->brlock_rec->fsp = fsp;
1931         fsp->brlock_rec->num_locks = br_lock->num_locks;
1932         fsp->brlock_rec->read_only = true;
1933         fsp->brlock_rec->key = br_lock->key;
1934
1935         fsp->brlock_rec->lock_data = (struct lock_struct *)
1936                 talloc_memdup(fsp->brlock_rec, br_lock->lock_data,
1937                               sizeof(struct lock_struct) * br_lock->num_locks);
1938         if (fsp->brlock_rec->lock_data == NULL) {
1939                 goto fail;
1940         }
1941
1942         TALLOC_FREE(br_lock);
1943         return fsp->brlock_rec;
1944 fail:
1945         TALLOC_FREE(br_lock);
1946         TALLOC_FREE(fsp->brlock_rec);
1947         return NULL;
1948 }
1949
1950 struct brl_revalidate_state {
1951         ssize_t array_size;
1952         uint32 num_pids;
1953         struct server_id *pids;
1954 };
1955
1956 /*
1957  * Collect PIDs of all processes with pending entries
1958  */
1959
1960 static void brl_revalidate_collect(struct file_id id, struct server_id pid,
1961                                    enum brl_type lock_type,
1962                                    enum brl_flavour lock_flav,
1963                                    br_off start, br_off size,
1964                                    void *private_data)
1965 {
1966         struct brl_revalidate_state *state =
1967                 (struct brl_revalidate_state *)private_data;
1968
1969         if (!IS_PENDING_LOCK(lock_type)) {
1970                 return;
1971         }
1972
1973         add_to_large_array(state, sizeof(pid), (void *)&pid,
1974                            &state->pids, &state->num_pids,
1975                            &state->array_size);
1976 }
1977
1978 /*
1979  * qsort callback to sort the processes
1980  */
1981
1982 static int compare_procids(const void *p1, const void *p2)
1983 {
1984         const struct server_id *i1 = (struct server_id *)p1;
1985         const struct server_id *i2 = (struct server_id *)p2;
1986
1987         if (i1->pid < i2->pid) return -1;
1988         if (i2->pid > i2->pid) return 1;
1989         return 0;
1990 }
1991
1992 /*
1993  * Send a MSG_SMB_UNLOCK message to all processes with pending byte range
1994  * locks so that they retry. Mainly used in the cluster code after a node has
1995  * died.
1996  *
1997  * Done in two steps to avoid double-sends: First we collect all entries in an
1998  * array, then qsort that array and only send to non-dupes.
1999  */
2000
2001 static void brl_revalidate(struct messaging_context *msg_ctx,
2002                            void *private_data,
2003                            uint32_t msg_type,
2004                            struct server_id server_id,
2005                            DATA_BLOB *data)
2006 {
2007         struct brl_revalidate_state *state;
2008         uint32 i;
2009         struct server_id last_pid;
2010
2011         if (!(state = TALLOC_ZERO_P(NULL, struct brl_revalidate_state))) {
2012                 DEBUG(0, ("talloc failed\n"));
2013                 return;
2014         }
2015
2016         brl_forall(brl_revalidate_collect, state);
2017
2018         if (state->array_size == -1) {
2019                 DEBUG(0, ("talloc failed\n"));
2020                 goto done;
2021         }
2022
2023         if (state->num_pids == 0) {
2024                 goto done;
2025         }
2026
2027         qsort(state->pids, state->num_pids, sizeof(state->pids[0]),
2028               compare_procids);
2029
2030         ZERO_STRUCT(last_pid);
2031
2032         for (i=0; i<state->num_pids; i++) {
2033                 if (procid_equal(&last_pid, &state->pids[i])) {
2034                         /*
2035                          * We've seen that one already
2036                          */
2037                         continue;
2038                 }
2039
2040                 messaging_send(msg_ctx, state->pids[i], MSG_SMB_UNLOCK,
2041                                &data_blob_null);
2042                 last_pid = state->pids[i];
2043         }
2044
2045  done:
2046         TALLOC_FREE(state);
2047         return;
2048 }
2049
2050 void brl_register_msgs(struct messaging_context *msg_ctx)
2051 {
2052         messaging_register(msg_ctx, NULL, MSG_SMB_BRL_VALIDATE,
2053                            brl_revalidate);
2054 }